Files
Jerry Yan 3aa7f6e3a1 refactor(annotation): 移除对 Label Studio Server 的依赖并切换到内嵌编辑器模式
- 移除 LabelStudioClient 和 SyncService 的导入及使用
- 删除与 Label Studio 项目的创建、删除和同步相关代码
- 修改创建数据集映射功能,改为创建 DataMate 标注项目
- 更新删除映射接口,仅进行软删除不再删除 Label Studio 项目
- 修改同步接口为兼容性保留,实际操作为空操作
- 移除 Label Studio 连接诊断功能
- 更新文档说明以反映内嵌编辑器模式的变化
2026-01-09 12:01:20 +08:00

236 lines
8.2 KiB
Python

from fastapi import APIRouter, Depends, HTTPException, Query, Path
from sqlalchemy.ext.asyncio import AsyncSession
from typing import List, Optional, Dict, Any
from datetime import datetime
from pydantic import BaseModel, Field, ConfigDict
from app.db.session import get_db
from app.module.shared.schema import StandardResponse
from app.module.dataset import DatasetManagementService
from app.core.logging import get_logger
from app.core.config import settings
from ..service.mapping import DatasetMappingService
from ..schema import (
SyncDatasetRequest,
SyncDatasetResponse,
SyncAnnotationsRequest,
SyncAnnotationsResponse,
UpdateFileTagsRequest,
UpdateFileTagsResponse,
UpdateFileTagsRequest,
UpdateFileTagsResponse
)
router = APIRouter(
prefix="/task",
tags=["annotation/task"]
)
logger = get_logger(__name__)
@router.post("/sync", response_model=StandardResponse[SyncDatasetResponse])
async def sync_dataset_content(
request: SyncDatasetRequest,
db: AsyncSession = Depends(get_db)
):
"""
Sync Dataset Content (Files and Annotations)
内嵌编辑器模式:任务列表直接读取 DataMate 数据集文件,无需与 Label Studio Server 同步。
"""
try:
mapping_service = DatasetMappingService(db)
logger.debug(f"Sync dataset content request: mapping_id={request.id}, sync_annotations={request.sync_annotations}")
# request.id validation
mapping = await mapping_service.get_mapping_by_uuid(request.id)
if not mapping:
raise HTTPException(
status_code=404,
detail=f"Mapping not found: {request.id}"
)
dm_client = DatasetManagementService(db)
dataset_info = await dm_client.get_dataset(mapping.dataset_id)
total_files = int(getattr(dataset_info, "fileCount", 0) or 0) if dataset_info else 0
result = SyncDatasetResponse(
id=mapping.id,
status="success",
synced_files=0,
total_files=total_files,
message="内嵌编辑器模式:任务列表直接读取数据集文件,无需同步(已忽略 syncAnnotations 等参数)",
)
logger.info(f"Embedded editor mode: sync is a no-op, mapping={mapping.id}, total_files={total_files}")
return StandardResponse(
code=200,
message="success",
data=result
)
except HTTPException:
raise
except Exception as e:
logger.error(f"Error syncing dataset content: {e}")
raise HTTPException(status_code=500, detail="Internal server error")
@router.post("/annotation/sync", response_model=StandardResponse[SyncAnnotationsResponse])
async def sync_annotations(
request: SyncAnnotationsRequest,
db: AsyncSession = Depends(get_db)
):
"""
Sync Annotations Only (Bidirectional Support)
同步指定 mapping 下的标注数据,支持单向或双向同步,基于时间戳自动解决冲突。
请求与响应由 Pydantic 模型 `SyncAnnotationsRequest` / `SyncAnnotationsResponse` 定义。
"""
try:
mapping_service = DatasetMappingService(db)
logger.info(f"Sync annotations request: mapping_id={request.id}, direction={request.direction}, overwrite={request.overwrite}, overwrite_ls={request.overwrite_labeling_project}")
# 验证映射是否存在
mapping = await mapping_service.get_mapping_by_uuid(request.id)
if not mapping:
raise HTTPException(
status_code=404,
detail=f"Mapping not found: {request.id}"
)
result = SyncAnnotationsResponse(
id=mapping.id,
status="success",
synced_to_dm=0,
synced_to_ls=0,
skipped=0,
failed=0,
conflicts_resolved=0,
message="内嵌编辑器模式:标注结果由 DataMate 直接存储,无需与 Label Studio 同步(该接口为兼容保留,当前为 no-op)",
)
logger.info(f"Embedded editor mode: annotation sync is a no-op, mapping={mapping.id}")
return StandardResponse(
code=200,
message="success",
data=result
)
except HTTPException:
raise
except Exception as e:
logger.error(f"Error syncing annotations: {e}")
raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")
@router.get("/check-ls-connection")
async def check_label_studio_connection():
"""
Check Label Studio Connection Status
诊断 Label Studio 连接并返回简要连接信息(状态、base URL、token 摘要、项目统计)。
"""
try:
return StandardResponse(
code=200,
message="success",
data={
"status": "disabled",
"message": "当前为内嵌编辑器模式:不需要 Label Studio Server,该诊断接口已停用",
},
)
except Exception as e:
logger.error(f"Error checking Label Studio connection: {e}")
raise HTTPException(status_code=500, detail=str(e))
@router.put(
"/{file_id}",
response_model=StandardResponse[UpdateFileTagsResponse],
)
async def update_file_tags(
request: UpdateFileTagsRequest,
file_id: str = Path(..., description="文件ID"),
db: AsyncSession = Depends(get_db)
):
"""
Update File Tags (Partial Update with Auto Format Conversion)
接收部分标签更新并合并到指定文件(只修改提交的标签,其余保持不变),并更新 `tags_updated_at`。
支持两种标签格式:
1. 简化格式(外部用户提交):
[{"from_name": "label", "to_name": "image", "values": ["cat", "dog"]}]
2. 完整格式(内部存储):
[{"id": "...", "from_name": "label", "to_name": "image", "type": "choices",
"value": {"choices": ["cat", "dog"]}}]
系统会自动根据数据集关联的模板将简化格式转换为完整格式。
请求与响应使用 Pydantic 模型 `UpdateFileTagsRequest` / `UpdateFileTagsResponse`。
"""
service = DatasetManagementService(db)
# 首先获取文件所属的数据集
from sqlalchemy.future import select
from app.db.models import DatasetFiles
result = await db.execute(
select(DatasetFiles).where(DatasetFiles.id == file_id)
)
file_record = result.scalar_one_or_none()
if not file_record:
raise HTTPException(status_code=404, detail=f"File not found: {file_id}")
dataset_id = str(file_record.dataset_id) # type: ignore - Convert Column to str
# 查找数据集关联的模板ID
from ..service.mapping import DatasetMappingService
mapping_service = DatasetMappingService(db)
template_id = await mapping_service.get_template_id_by_dataset_id(dataset_id)
if template_id:
logger.info(f"Found template {template_id} for dataset {dataset_id}, will auto-convert tag format")
else:
logger.warning(f"No template found for dataset {dataset_id}, tags must be in full format")
# 更新标签(如果有模板ID则自动转换格式)
success, error_msg, updated_at = await service.update_file_tags_partial(
file_id=file_id,
new_tags=request.tags,
template_id=template_id # 传递模板ID以启用自动转换
)
if not success:
if "not found" in (error_msg or "").lower():
raise HTTPException(status_code=404, detail=error_msg)
raise HTTPException(status_code=500, detail=error_msg or "更新标签失败")
# 重新获取更新后的文件记录(获取完整标签列表)
result = await db.execute(
select(DatasetFiles).where(DatasetFiles.id == file_id)
)
file_record = result.scalar_one_or_none()
if not file_record:
raise HTTPException(status_code=404, detail=f"File not found: {file_id}")
response_data = UpdateFileTagsResponse(
fileId=file_id,
tags=file_record.tags or [], # type: ignore
tagsUpdatedAt=updated_at or datetime.now()
)
return StandardResponse(
code=200,
message="标签更新成功",
data=response_data
)