You've already forked DataMate
- 移除 LabelStudioClient 和 SyncService 的导入及使用 - 删除与 Label Studio 项目的创建、删除和同步相关代码 - 修改创建数据集映射功能,改为创建 DataMate 标注项目 - 更新删除映射接口,仅进行软删除不再删除 Label Studio 项目 - 修改同步接口为兼容性保留,实际操作为空操作 - 移除 Label Studio 连接诊断功能 - 更新文档说明以反映内嵌编辑器模式的变化
236 lines
8.2 KiB
Python
236 lines
8.2 KiB
Python
from fastapi import APIRouter, Depends, HTTPException, Query, Path
|
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
from typing import List, Optional, Dict, Any
|
|
from datetime import datetime
|
|
from pydantic import BaseModel, Field, ConfigDict
|
|
|
|
from app.db.session import get_db
|
|
from app.module.shared.schema import StandardResponse
|
|
from app.module.dataset import DatasetManagementService
|
|
from app.core.logging import get_logger
|
|
from app.core.config import settings
|
|
|
|
from ..service.mapping import DatasetMappingService
|
|
from ..schema import (
|
|
SyncDatasetRequest,
|
|
SyncDatasetResponse,
|
|
SyncAnnotationsRequest,
|
|
SyncAnnotationsResponse,
|
|
UpdateFileTagsRequest,
|
|
UpdateFileTagsResponse,
|
|
UpdateFileTagsRequest,
|
|
UpdateFileTagsResponse
|
|
)
|
|
|
|
|
|
router = APIRouter(
|
|
prefix="/task",
|
|
tags=["annotation/task"]
|
|
)
|
|
logger = get_logger(__name__)
|
|
|
|
@router.post("/sync", response_model=StandardResponse[SyncDatasetResponse])
|
|
async def sync_dataset_content(
|
|
request: SyncDatasetRequest,
|
|
db: AsyncSession = Depends(get_db)
|
|
):
|
|
"""
|
|
Sync Dataset Content (Files and Annotations)
|
|
|
|
内嵌编辑器模式:任务列表直接读取 DataMate 数据集文件,无需与 Label Studio Server 同步。
|
|
"""
|
|
try:
|
|
mapping_service = DatasetMappingService(db)
|
|
|
|
logger.debug(f"Sync dataset content request: mapping_id={request.id}, sync_annotations={request.sync_annotations}")
|
|
|
|
# request.id validation
|
|
mapping = await mapping_service.get_mapping_by_uuid(request.id)
|
|
if not mapping:
|
|
raise HTTPException(
|
|
status_code=404,
|
|
detail=f"Mapping not found: {request.id}"
|
|
)
|
|
|
|
dm_client = DatasetManagementService(db)
|
|
dataset_info = await dm_client.get_dataset(mapping.dataset_id)
|
|
total_files = int(getattr(dataset_info, "fileCount", 0) or 0) if dataset_info else 0
|
|
|
|
result = SyncDatasetResponse(
|
|
id=mapping.id,
|
|
status="success",
|
|
synced_files=0,
|
|
total_files=total_files,
|
|
message="内嵌编辑器模式:任务列表直接读取数据集文件,无需同步(已忽略 syncAnnotations 等参数)",
|
|
)
|
|
|
|
logger.info(f"Embedded editor mode: sync is a no-op, mapping={mapping.id}, total_files={total_files}")
|
|
|
|
return StandardResponse(
|
|
code=200,
|
|
message="success",
|
|
data=result
|
|
)
|
|
|
|
except HTTPException:
|
|
raise
|
|
except Exception as e:
|
|
logger.error(f"Error syncing dataset content: {e}")
|
|
raise HTTPException(status_code=500, detail="Internal server error")
|
|
|
|
|
|
@router.post("/annotation/sync", response_model=StandardResponse[SyncAnnotationsResponse])
|
|
async def sync_annotations(
|
|
request: SyncAnnotationsRequest,
|
|
db: AsyncSession = Depends(get_db)
|
|
):
|
|
"""
|
|
Sync Annotations Only (Bidirectional Support)
|
|
|
|
同步指定 mapping 下的标注数据,支持单向或双向同步,基于时间戳自动解决冲突。
|
|
请求与响应由 Pydantic 模型 `SyncAnnotationsRequest` / `SyncAnnotationsResponse` 定义。
|
|
"""
|
|
try:
|
|
mapping_service = DatasetMappingService(db)
|
|
|
|
logger.info(f"Sync annotations request: mapping_id={request.id}, direction={request.direction}, overwrite={request.overwrite}, overwrite_ls={request.overwrite_labeling_project}")
|
|
|
|
# 验证映射是否存在
|
|
mapping = await mapping_service.get_mapping_by_uuid(request.id)
|
|
if not mapping:
|
|
raise HTTPException(
|
|
status_code=404,
|
|
detail=f"Mapping not found: {request.id}"
|
|
)
|
|
|
|
result = SyncAnnotationsResponse(
|
|
id=mapping.id,
|
|
status="success",
|
|
synced_to_dm=0,
|
|
synced_to_ls=0,
|
|
skipped=0,
|
|
failed=0,
|
|
conflicts_resolved=0,
|
|
message="内嵌编辑器模式:标注结果由 DataMate 直接存储,无需与 Label Studio 同步(该接口为兼容保留,当前为 no-op)",
|
|
)
|
|
|
|
logger.info(f"Embedded editor mode: annotation sync is a no-op, mapping={mapping.id}")
|
|
|
|
return StandardResponse(
|
|
code=200,
|
|
message="success",
|
|
data=result
|
|
)
|
|
|
|
except HTTPException:
|
|
raise
|
|
except Exception as e:
|
|
logger.error(f"Error syncing annotations: {e}")
|
|
raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")
|
|
|
|
|
|
@router.get("/check-ls-connection")
|
|
async def check_label_studio_connection():
|
|
"""
|
|
Check Label Studio Connection Status
|
|
|
|
诊断 Label Studio 连接并返回简要连接信息(状态、base URL、token 摘要、项目统计)。
|
|
"""
|
|
try:
|
|
return StandardResponse(
|
|
code=200,
|
|
message="success",
|
|
data={
|
|
"status": "disabled",
|
|
"message": "当前为内嵌编辑器模式:不需要 Label Studio Server,该诊断接口已停用",
|
|
},
|
|
)
|
|
except Exception as e:
|
|
logger.error(f"Error checking Label Studio connection: {e}")
|
|
raise HTTPException(status_code=500, detail=str(e))
|
|
|
|
@router.put(
|
|
"/{file_id}",
|
|
response_model=StandardResponse[UpdateFileTagsResponse],
|
|
)
|
|
async def update_file_tags(
|
|
request: UpdateFileTagsRequest,
|
|
file_id: str = Path(..., description="文件ID"),
|
|
db: AsyncSession = Depends(get_db)
|
|
):
|
|
"""
|
|
Update File Tags (Partial Update with Auto Format Conversion)
|
|
|
|
接收部分标签更新并合并到指定文件(只修改提交的标签,其余保持不变),并更新 `tags_updated_at`。
|
|
|
|
支持两种标签格式:
|
|
1. 简化格式(外部用户提交):
|
|
[{"from_name": "label", "to_name": "image", "values": ["cat", "dog"]}]
|
|
|
|
2. 完整格式(内部存储):
|
|
[{"id": "...", "from_name": "label", "to_name": "image", "type": "choices",
|
|
"value": {"choices": ["cat", "dog"]}}]
|
|
|
|
系统会自动根据数据集关联的模板将简化格式转换为完整格式。
|
|
请求与响应使用 Pydantic 模型 `UpdateFileTagsRequest` / `UpdateFileTagsResponse`。
|
|
"""
|
|
service = DatasetManagementService(db)
|
|
|
|
# 首先获取文件所属的数据集
|
|
from sqlalchemy.future import select
|
|
from app.db.models import DatasetFiles
|
|
|
|
result = await db.execute(
|
|
select(DatasetFiles).where(DatasetFiles.id == file_id)
|
|
)
|
|
file_record = result.scalar_one_or_none()
|
|
|
|
if not file_record:
|
|
raise HTTPException(status_code=404, detail=f"File not found: {file_id}")
|
|
|
|
dataset_id = str(file_record.dataset_id) # type: ignore - Convert Column to str
|
|
|
|
# 查找数据集关联的模板ID
|
|
from ..service.mapping import DatasetMappingService
|
|
|
|
mapping_service = DatasetMappingService(db)
|
|
template_id = await mapping_service.get_template_id_by_dataset_id(dataset_id)
|
|
|
|
if template_id:
|
|
logger.info(f"Found template {template_id} for dataset {dataset_id}, will auto-convert tag format")
|
|
else:
|
|
logger.warning(f"No template found for dataset {dataset_id}, tags must be in full format")
|
|
|
|
# 更新标签(如果有模板ID则自动转换格式)
|
|
success, error_msg, updated_at = await service.update_file_tags_partial(
|
|
file_id=file_id,
|
|
new_tags=request.tags,
|
|
template_id=template_id # 传递模板ID以启用自动转换
|
|
)
|
|
|
|
if not success:
|
|
if "not found" in (error_msg or "").lower():
|
|
raise HTTPException(status_code=404, detail=error_msg)
|
|
raise HTTPException(status_code=500, detail=error_msg or "更新标签失败")
|
|
|
|
# 重新获取更新后的文件记录(获取完整标签列表)
|
|
result = await db.execute(
|
|
select(DatasetFiles).where(DatasetFiles.id == file_id)
|
|
)
|
|
file_record = result.scalar_one_or_none()
|
|
|
|
if not file_record:
|
|
raise HTTPException(status_code=404, detail=f"File not found: {file_id}")
|
|
|
|
response_data = UpdateFileTagsResponse(
|
|
fileId=file_id,
|
|
tags=file_record.tags or [], # type: ignore
|
|
tagsUpdatedAt=updated_at or datetime.now()
|
|
)
|
|
|
|
return StandardResponse(
|
|
code=200,
|
|
message="标签更新成功",
|
|
data=response_data
|
|
)
|