You've already forked DataMate
feat: data annotation page adaptation to backend API. Improve labeling project creation module.
* feat: data annotation page adaptation to the backend API. * feat: Implement labeling configuration editor and enhance annotation task creation form
This commit is contained in:
@@ -99,19 +99,15 @@ class Client:
|
||||
title: str,
|
||||
description: str = "",
|
||||
label_config: Optional[str] = None,
|
||||
data_type: str = "image"
|
||||
) -> Optional[Dict[str, Any]]:
|
||||
"""创建Label Studio项目"""
|
||||
try:
|
||||
logger.debug(f"Creating Label Studio project: {title}")
|
||||
|
||||
if not label_config:
|
||||
label_config = self.get_label_config_by_type(data_type)
|
||||
|
||||
project_data = {
|
||||
"title": title,
|
||||
"description": description,
|
||||
"label_config": label_config.strip()
|
||||
"label_config": label_config or "<View></View>"
|
||||
}
|
||||
|
||||
response = await self.client.post("/api/projects", json=project_data)
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
from fastapi import APIRouter
|
||||
|
||||
from .about import router as about_router
|
||||
from .project import router as project_router
|
||||
from .task import router as task_router
|
||||
|
||||
@@ -8,5 +9,6 @@ router = APIRouter(
|
||||
tags = ["annotation"]
|
||||
)
|
||||
|
||||
router.include_router(about_router)
|
||||
router.include_router(project_router)
|
||||
router.include_router(task_router)
|
||||
@@ -0,0 +1,35 @@
|
||||
from fastapi import APIRouter, Depends, HTTPException, Query
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from typing import List, Optional
|
||||
|
||||
from app.db.session import get_db
|
||||
from app.module.shared.schema import StandardResponse
|
||||
from app.module.dataset import DatasetManagementService
|
||||
from app.core.logging import get_logger
|
||||
from app.core.config import settings
|
||||
from app.exception import NoDatasetInfoFoundError, DatasetMappingNotFoundError
|
||||
|
||||
from ..client import LabelStudioClient
|
||||
from ..service.sync import SyncService
|
||||
from ..service.mapping import DatasetMappingService
|
||||
from ..schema import (
|
||||
ConfigResponse
|
||||
)
|
||||
|
||||
|
||||
router = APIRouter(
|
||||
prefix="/about",
|
||||
tags=["annotation/about"]
|
||||
)
|
||||
logger = get_logger(__name__)
|
||||
|
||||
@router.get("", response_model=StandardResponse[ConfigResponse])
|
||||
async def get_config():
|
||||
"""获取配置信息"""
|
||||
return StandardResponse(
|
||||
code=200,
|
||||
message="success",
|
||||
data=ConfigResponse(
|
||||
label_studio_url=settings.label_studio_base_url,
|
||||
)
|
||||
)
|
||||
@@ -5,6 +5,7 @@ from fastapi import APIRouter, Depends, HTTPException, Query
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from app.db.session import get_db
|
||||
from app.db.models import LabelingProject
|
||||
from app.module.shared.schema import StandardResponse, PaginatedData
|
||||
from app.module.dataset import DatasetManagementService
|
||||
from app.core.logging import get_logger
|
||||
@@ -12,6 +13,7 @@ from app.core.config import settings
|
||||
|
||||
from ..client import LabelStudioClient
|
||||
from ..service.mapping import DatasetMappingService
|
||||
from ..service.sync import SyncService
|
||||
from ..schema import (
|
||||
DatasetMappingCreateRequest,
|
||||
DatasetMappingCreateResponse,
|
||||
@@ -25,7 +27,7 @@ router = APIRouter(
|
||||
)
|
||||
logger = get_logger(__name__)
|
||||
|
||||
@router.post("/", response_model=StandardResponse[DatasetMappingCreateResponse], status_code=201)
|
||||
@router.post("", response_model=StandardResponse[DatasetMappingCreateResponse], status_code=201)
|
||||
async def create_mapping(
|
||||
request: DatasetMappingCreateRequest,
|
||||
db: AsyncSession = Depends(get_db)
|
||||
@@ -42,7 +44,8 @@ async def create_mapping(
|
||||
dm_client = DatasetManagementService(db)
|
||||
ls_client = LabelStudioClient(base_url=settings.label_studio_base_url,
|
||||
token=settings.label_studio_user_token)
|
||||
service = DatasetMappingService(db)
|
||||
mapping_service = DatasetMappingService(db)
|
||||
sync_service = SyncService(dm_client, ls_client, mapping_service)
|
||||
|
||||
logger.info(f"Create dataset mapping request: {request.dataset_id}")
|
||||
|
||||
@@ -54,24 +57,18 @@ async def create_mapping(
|
||||
detail=f"Dataset not found in DM service: {request.dataset_id}"
|
||||
)
|
||||
|
||||
# 确定数据类型(基于数据集类型)
|
||||
data_type = "image" # 默认值
|
||||
if dataset_info.type and dataset_info.type.code:
|
||||
type_code = dataset_info.type.code.lower()
|
||||
if "audio" in type_code:
|
||||
data_type = "audio"
|
||||
elif "video" in type_code:
|
||||
data_type = "video"
|
||||
elif "text" in type_code:
|
||||
data_type = "text"
|
||||
|
||||
project_name = f"{dataset_info.name}"
|
||||
project_name = request.name or \
|
||||
dataset_info.name or \
|
||||
"A new project from DataMate"
|
||||
|
||||
project_description = request.description or \
|
||||
dataset_info.description or \
|
||||
f"Imported from DM dataset {dataset_info.name} ({dataset_info.id})"
|
||||
|
||||
# 在Label Studio中创建项目
|
||||
project_data = await ls_client.create_project(
|
||||
title=project_name,
|
||||
description=dataset_info.description or f"Imported from DM dataset {dataset_info.id}",
|
||||
data_type=data_type
|
||||
description=project_description,
|
||||
)
|
||||
|
||||
if not project_data:
|
||||
@@ -97,13 +94,18 @@ async def create_mapping(
|
||||
logger.warning(f"Failed to configure local storage for project {project_id}")
|
||||
else:
|
||||
logger.info(f"Local storage configured for project {project_id}: {local_storage_path}")
|
||||
|
||||
labeling_project = LabelingProject(
|
||||
dataset_id=request.dataset_id,
|
||||
labeling_project_id=str(project_id),
|
||||
name=project_name,
|
||||
)
|
||||
|
||||
# 创建映射关系,包含项目名称(先持久化映射以获得 mapping.id)
|
||||
mapping = await mapping_service.create_mapping(labeling_project)
|
||||
|
||||
# 创建映射关系,包含项目名称
|
||||
mapping = await service.create_mapping(
|
||||
request,
|
||||
str(project_id),
|
||||
project_name
|
||||
)
|
||||
# 进行一次同步,使用创建后的 mapping.id
|
||||
await sync_service.sync_dataset_files(mapping.id, 100)
|
||||
|
||||
response_data = DatasetMappingCreateResponse(
|
||||
id=mapping.id,
|
||||
@@ -123,7 +125,7 @@ async def create_mapping(
|
||||
logger.error(f"Error while creating dataset mapping: {e}")
|
||||
raise HTTPException(status_code=500, detail="Internal server error")
|
||||
|
||||
@router.get("/", response_model=StandardResponse[PaginatedData[DatasetMappingResponse]])
|
||||
@router.get("", response_model=StandardResponse[PaginatedData[DatasetMappingResponse]])
|
||||
async def list_mappings(
|
||||
page: int = Query(1, ge=1, description="页码(从1开始)"),
|
||||
page_size: int = Query(20, ge=1, le=100, description="每页记录数"),
|
||||
@@ -260,7 +262,7 @@ async def get_mappings_by_source(
|
||||
logger.error(f"Error getting mappings: {e}")
|
||||
raise HTTPException(status_code=500, detail="Internal server error")
|
||||
|
||||
@router.delete("/", response_model=StandardResponse[DeleteDatasetResponse])
|
||||
@router.delete("", response_model=StandardResponse[DeleteDatasetResponse])
|
||||
async def delete_mapping(
|
||||
m: Optional[str] = Query(None, description="映射UUID"),
|
||||
proj: Optional[str] = Query(None, description="Label Studio项目ID"),
|
||||
@@ -279,8 +281,11 @@ async def delete_mapping(
|
||||
2. 软删除数据库中的映射记录
|
||||
"""
|
||||
try:
|
||||
# Log incoming request parameters for debugging
|
||||
logger.debug(f"Delete mapping request received: m={m!r}, proj={proj!r}")
|
||||
# 至少需要提供一个参数
|
||||
if not m and not proj:
|
||||
logger.debug("Missing both 'm' and 'proj' in delete request")
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail="Either 'm' (mapping UUID) or 'proj' (project ID) must be provided"
|
||||
@@ -300,6 +305,8 @@ async def delete_mapping(
|
||||
mapping = await service.get_mapping_by_labeling_project_id(proj)
|
||||
else:
|
||||
mapping = None
|
||||
|
||||
logger.debug(f"Mapping lookup result: {mapping}")
|
||||
|
||||
if not mapping:
|
||||
raise HTTPException(
|
||||
@@ -309,12 +316,12 @@ async def delete_mapping(
|
||||
|
||||
id = mapping.id
|
||||
labeling_project_id = mapping.labeling_project_id
|
||||
labeling_project_name = mapping.name
|
||||
|
||||
logger.debug(f"Found mapping: {id}, Label Studio project ID: {labeling_project_id}")
|
||||
|
||||
# 1. 删除 Label Studio 项目
|
||||
try:
|
||||
logger.debug(f"Deleting Label Studio project: {labeling_project_id}")
|
||||
delete_success = await ls_client.delete_project(int(labeling_project_id))
|
||||
if delete_success:
|
||||
logger.debug(f"Successfully deleted Label Studio project: {labeling_project_id}")
|
||||
@@ -326,6 +333,7 @@ async def delete_mapping(
|
||||
|
||||
# 2. 软删除映射记录
|
||||
soft_delete_success = await service.soft_delete_mapping(id)
|
||||
logger.debug(f"Soft delete result for mapping {id}: {soft_delete_success}")
|
||||
|
||||
if not soft_delete_success:
|
||||
raise HTTPException(
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
from .config import ConfigResponse
|
||||
|
||||
from .mapping import (
|
||||
_DatasetMappingBase,
|
||||
DatasetMappingCreateRequest,
|
||||
DatasetMappingCreateResponse,
|
||||
DatasetMappingUpdateRequest,
|
||||
@@ -13,7 +14,7 @@ from .sync import (
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
"_DatasetMappingBase",
|
||||
"ConfigResponse",
|
||||
"DatasetMappingCreateRequest",
|
||||
"DatasetMappingCreateResponse",
|
||||
"DatasetMappingUpdateRequest",
|
||||
|
||||
@@ -5,7 +5,4 @@ from app.module.shared.schema import StandardResponse
|
||||
|
||||
class ConfigResponse(BaseResponseModel):
|
||||
"""配置信息响应模型"""
|
||||
app_name: str = Field(..., description="应用名称")
|
||||
version: str = Field(..., description="应用版本")
|
||||
label_studio_url: str = Field(..., description="Label Studio基础URL")
|
||||
debug: bool = Field(..., description="调试模式状态")
|
||||
label_studio_url: str = Field(..., description="Label Studio基础URL")
|
||||
@@ -1,17 +1,27 @@
|
||||
from pydantic import Field
|
||||
from pydantic import Field, BaseModel
|
||||
from typing import Optional
|
||||
from datetime import datetime
|
||||
|
||||
from app.module.shared.schema import BaseResponseModel
|
||||
from app.module.shared.schema import StandardResponse
|
||||
|
||||
class _DatasetMappingBase(BaseResponseModel):
|
||||
"""数据集映射 基础模型"""
|
||||
dataset_id: str = Field(..., description="源数据集ID")
|
||||
|
||||
class DatasetMappingCreateRequest(_DatasetMappingBase):
|
||||
"""数据集映射 创建 请求模型"""
|
||||
pass
|
||||
class DatasetMappingCreateRequest(BaseModel):
|
||||
"""数据集映射 创建 请求模型
|
||||
|
||||
Accept both snake_case and camelCase field names from frontend JSON by
|
||||
declaring explicit aliases. Frontend sends `datasetId`, `name`,
|
||||
`description` (camelCase), so provide aliases so pydantic will map them
|
||||
to the internal attributes used in the service code (dataset_id, name,
|
||||
description).
|
||||
"""
|
||||
dataset_id: str = Field(..., alias="datasetId", description="源数据集ID")
|
||||
name: Optional[str] = Field(None, alias="name", description="标注项目名称")
|
||||
description: Optional[str] = Field(None, alias="description", description="标注项目描述")
|
||||
|
||||
class Config:
|
||||
# allow population by field name when constructing model programmatically
|
||||
allow_population_by_field_name = True
|
||||
|
||||
class DatasetMappingCreateResponse(BaseResponseModel):
|
||||
"""数据集映射 创建 响应模型"""
|
||||
@@ -23,7 +33,8 @@ class DatasetMappingUpdateRequest(BaseResponseModel):
|
||||
"""数据集映射 更新 请求模型"""
|
||||
dataset_id: Optional[str] = Field(None, description="源数据集ID")
|
||||
|
||||
class DatasetMappingResponse(_DatasetMappingBase):
|
||||
class DatasetMappingResponse(BaseModel):
|
||||
dataset_id: str = Field(..., description="源数据集ID")
|
||||
"""数据集映射 查询 响应模型"""
|
||||
id: str = Field(..., description="映射UUID")
|
||||
labeling_project_id: str = Field(..., description="标注项目ID")
|
||||
|
||||
@@ -23,18 +23,16 @@ class DatasetMappingService:
|
||||
|
||||
async def create_mapping(
|
||||
self,
|
||||
mapping_data: DatasetMappingCreateRequest,
|
||||
labeling_project_id: str,
|
||||
labeling_project_name: str
|
||||
labeling_project: LabelingProject
|
||||
) -> DatasetMappingResponse:
|
||||
"""创建数据集映射"""
|
||||
logger.info(f"Create dataset mapping: {mapping_data.dataset_id} -> {labeling_project_id}")
|
||||
logger.info(f"Create dataset mapping: {labeling_project.dataset_id} -> {labeling_project.labeling_project_id}")
|
||||
|
||||
db_mapping = LabelingProject(
|
||||
id=str(uuid.uuid4()),
|
||||
dataset_id=mapping_data.dataset_id,
|
||||
labeling_project_id=labeling_project_id,
|
||||
name=labeling_project_name
|
||||
dataset_id=labeling_project.dataset_id,
|
||||
labeling_project_id=labeling_project.labeling_project_id,
|
||||
name=labeling_project.name
|
||||
)
|
||||
|
||||
self.db.add(db_mapping)
|
||||
|
||||
@@ -94,22 +94,22 @@ class SyncService:
|
||||
|
||||
async def sync_dataset_files(
|
||||
self,
|
||||
id: str,
|
||||
mapping_id: str,
|
||||
batch_size: int = 50
|
||||
) -> SyncDatasetResponse:
|
||||
"""同步数据集文件到Label Studio"""
|
||||
logger.info(f"Start syncing dataset by mapping: {id}")
|
||||
logger.info(f"Start syncing dataset by mapping: {mapping_id}")
|
||||
|
||||
# 获取映射关系
|
||||
mapping = await self.mapping_service.get_mapping_by_uuid(id)
|
||||
mapping = await self.mapping_service.get_mapping_by_uuid(mapping_id)
|
||||
if not mapping:
|
||||
logger.error(f"Dataset mapping not found: {id}")
|
||||
logger.error(f"Dataset mapping not found: {mapping_id}")
|
||||
return SyncDatasetResponse(
|
||||
id="",
|
||||
status="error",
|
||||
synced_files=0,
|
||||
total_files=0,
|
||||
message=f"Dataset mapping not found: {id}"
|
||||
message=f"Dataset mapping not found: {mapping_id}"
|
||||
)
|
||||
|
||||
try:
|
||||
|
||||
@@ -3,7 +3,7 @@ from typing import Dict, Any
|
||||
from app.core.config import settings
|
||||
from app.module.shared.schema import StandardResponse
|
||||
|
||||
from ..schema import ConfigResponse, HealthResponse
|
||||
from ..schema import HealthResponse
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
@@ -19,18 +19,4 @@ async def health_check():
|
||||
service="Label Studio Adapter",
|
||||
version=settings.app_version
|
||||
)
|
||||
)
|
||||
|
||||
@router.get("/config", response_model=StandardResponse[ConfigResponse])
|
||||
async def get_config():
|
||||
"""获取配置信息"""
|
||||
return StandardResponse(
|
||||
code=200,
|
||||
message="success",
|
||||
data=ConfigResponse(
|
||||
app_name=settings.app_name,
|
||||
version=settings.app_version,
|
||||
label_studio_url=settings.label_studio_base_url,
|
||||
debug=settings.debug
|
||||
)
|
||||
)
|
||||
@@ -1,4 +1,3 @@
|
||||
from .config import ConfigResponse
|
||||
from .health import HealthResponse
|
||||
|
||||
__all__ = ["ConfigResponse", "HealthResponse"]
|
||||
__all__ = ["HealthResponse"]
|
||||
Reference in New Issue
Block a user