From 3aa7f6e3a187995f5849489d2c916ff93489ad46 Mon Sep 17 00:00:00 2001 From: Jerry Yan <792602257@qq.com> Date: Fri, 9 Jan 2026 12:01:20 +0800 Subject: [PATCH] =?UTF-8?q?refactor(annotation):=20=E7=A7=BB=E9=99=A4?= =?UTF-8?q?=E5=AF=B9=20Label=20Studio=20Server=20=E7=9A=84=E4=BE=9D?= =?UTF-8?q?=E8=B5=96=E5=B9=B6=E5=88=87=E6=8D=A2=E5=88=B0=E5=86=85=E5=B5=8C?= =?UTF-8?q?=E7=BC=96=E8=BE=91=E5=99=A8=E6=A8=A1=E5=BC=8F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 移除 LabelStudioClient 和 SyncService 的导入及使用 - 删除与 Label Studio 项目的创建、删除和同步相关代码 - 修改创建数据集映射功能,改为创建 DataMate 标注项目 - 更新删除映射接口,仅进行软删除不再删除 Label Studio 项目 - 修改同步接口为兼容性保留,实际操作为空操作 - 移除 Label Studio 连接诊断功能 - 更新文档说明以反映内嵌编辑器模式的变化 --- .../module/annotation/interface/project.py | 114 +++---------- .../app/module/annotation/interface/task.py | 159 ++++-------------- 2 files changed, 57 insertions(+), 216 deletions(-) diff --git a/runtime/datamate-python/app/module/annotation/interface/project.py b/runtime/datamate-python/app/module/annotation/interface/project.py index 4da3727..139bfff 100644 --- a/runtime/datamate-python/app/module/annotation/interface/project.py +++ b/runtime/datamate-python/app/module/annotation/interface/project.py @@ -2,7 +2,7 @@ from typing import Optional import math import uuid -from fastapi import APIRouter, Depends, HTTPException, Query, Path, Response +from fastapi import APIRouter, Depends, HTTPException, Query, Path from sqlalchemy.ext.asyncio import AsyncSession from app.db.session import get_db @@ -10,11 +10,8 @@ from app.db.models import LabelingProject from app.module.shared.schema import StandardResponse, PaginatedData from app.module.dataset import DatasetManagementService from app.core.logging import get_logger -from app.core.config import settings -from ..client import LabelStudioClient from ..service.mapping import DatasetMappingService -from ..service.sync import SyncService from ..service.template import AnnotationTemplateService from ..schema import ( DatasetMappingCreateRequest, @@ -30,37 +27,11 @@ router = APIRouter( logger = get_logger(__name__) @router.get("/{mapping_id}/login") -async def list_mappings( +async def login_label_studio( + mapping_id: str, db: AsyncSession = Depends(get_db) ): - try: - ls_client = LabelStudioClient(base_url=settings.label_studio_base_url, - token=settings.label_studio_user_token) - target_response = await ls_client.login_label_studio() - headers = dict(target_response.headers) - set_cookies = target_response.headers.get_list("set-cookie") - - # 删除合并的 Set-Cookie - if "set-cookie" in headers: - del headers["set-cookie"] - - # 创建新响应,添加多个 Set-Cookie - response = Response( - content=target_response.content, - status_code=target_response.status_code, - headers=headers - ) - - # 分别添加每个 Set-Cookie - for cookie in set_cookies: - response.headers.append("set-cookie", cookie) - - return response - except HTTPException: - raise - except Exception as e: - logger.error(f"Error while logining in LabelStudio: {e}", e) - raise HTTPException(status_code=500, detail="Internal server error") + raise HTTPException(status_code=410, detail="当前为内嵌编辑器模式,不再支持 Label Studio 登录代理接口") @router.post("", response_model=StandardResponse[DatasetMappingCreateResponse], status_code=201) async def create_mapping( @@ -70,8 +41,7 @@ async def create_mapping( """ 创建数据集映射 - 根据指定的DM程序中的数据集,创建Label Studio中的数据集, - 在数据库中记录这一关联关系,返回Label Studio数据集的ID + 在 DataMate 中创建标注项目(t_dm_labeling_projects),用于内嵌 Label Studio 编辑器。 注意:一个数据集可以创建多个标注项目 @@ -79,10 +49,7 @@ async def create_mapping( """ try: dm_client = DatasetManagementService(db) - ls_client = LabelStudioClient(base_url=settings.label_studio_base_url, - token=settings.label_studio_user_token) mapping_service = DatasetMappingService(db) - sync_service = SyncService(dm_client, ls_client, mapping_service) template_service = AnnotationTemplateService() logger.info(f"Create dataset mapping request: {request.dataset_id}") @@ -116,51 +83,28 @@ async def create_mapping( label_config = template.label_config logger.debug(f"Template label config loaded for template: {template.name}") - # 在Label Studio中创建项目 - project_data = await ls_client.create_project( - title=project_name, - description=project_description, - label_config=label_config # 传递模板配置 - ) + # DataMate-only:不再创建/依赖 Label Studio Server 项目。 + # 为兼容既有 schema 字段(labeling_project_id 长度 8),生成一个 8 位数字 ID。 + labeling_project_id = str(uuid.uuid4().int % 10**8).zfill(8) - if not project_data: - raise HTTPException( - status_code=500, - detail="Fail to create Label Studio project." - ) - - project_id = project_data["id"] - - # 配置本地存储:dataset/ - local_storage_path = f"{settings.label_studio_local_document_root}/{request.dataset_id}" - storage_result = await ls_client.create_local_storage( - project_id=project_id, - path=local_storage_path, - title="Dataset_BLOB", - use_blob_urls=True, - description=f"Local storage for dataset {dataset_info.name}" - ) - - if not storage_result: - # 本地存储配置失败,记录警告但不中断流程 - logger.warning(f"Failed to configure local storage for project {project_id}") - else: - logger.info(f"Local storage configured for project {project_id}: {local_storage_path}") + project_configuration = {} + if label_config: + project_configuration["label_config"] = label_config + if project_description: + project_configuration["description"] = project_description labeling_project = LabelingProject( id=str(uuid.uuid4()), # Generate UUID here dataset_id=request.dataset_id, - labeling_project_id=str(project_id), + labeling_project_id=labeling_project_id, name=project_name, template_id=request.template_id, # Save template_id to database + configuration=project_configuration or None, ) # 创建映射关系,包含项目名称(先持久化映射以获得 mapping.id) mapping = await mapping_service.create_mapping(labeling_project) - # 进行一次同步,使用创建后的 mapping.id - await sync_service.sync_dataset_files(mapping.id, 100) - response_data = DatasetMappingCreateResponse( id=mapping.id, labeling_project_id=str(mapping.labeling_project_id), @@ -347,19 +291,15 @@ async def delete_mapping( db: AsyncSession = Depends(get_db) ): """ - 删除映射关系和对应的 Label Studio 项目 + 删除映射关系(软删除) 通过 path 参数 `project_id` 指定要删除的映射(映射的 UUID)。 - 此操作会: - 1. 删除 Label Studio 中的项目 - 2. 软删除数据库中的映射记录 + 内嵌编辑器模式下仅软删除 DataMate 标注项目记录,不再删除/依赖 Label Studio Server 项目。 """ try: logger.debug(f"Delete mapping request received: project_id={project_id!r}") - ls_client = LabelStudioClient(base_url=settings.label_studio_base_url, - token=settings.label_studio_user_token) service = DatasetMappingService(db) # 使用 mapping UUID 查询映射记录 @@ -375,23 +315,9 @@ async def delete_mapping( ) id = mapping.id - labeling_project_id = mapping.labeling_project_id + logger.debug(f"Found mapping: {id}") - logger.debug(f"Found mapping: {id}, Label Studio project ID: {labeling_project_id}") - - # 1. 删除 Label Studio 项目 - try: - logger.debug(f"Deleting Label Studio project: {labeling_project_id}") - delete_success = await ls_client.delete_project(int(labeling_project_id)) - if delete_success: - logger.debug(f"Successfully deleted Label Studio project: {labeling_project_id}") - else: - logger.warning(f"Failed to delete Label Studio project or project not found: {labeling_project_id}") - except Exception as e: - logger.error(f"Error deleting Label Studio project: {e}") - # 继续执行,即使 Label Studio 项目删除失败也要删除映射记录 - - # 2. 软删除映射记录 + # 软删除映射记录 soft_delete_success = await service.soft_delete_mapping(id) logger.debug(f"Soft delete result for mapping {id}: {soft_delete_success}") @@ -401,7 +327,7 @@ async def delete_mapping( detail="Failed to delete mapping record" ) - logger.info(f"Successfully deleted mapping: {id}, Label Studio project: {labeling_project_id}") + logger.info(f"Successfully deleted mapping: {id}") return StandardResponse( code=200, diff --git a/runtime/datamate-python/app/module/annotation/interface/task.py b/runtime/datamate-python/app/module/annotation/interface/task.py index 13b6674..da19c5b 100644 --- a/runtime/datamate-python/app/module/annotation/interface/task.py +++ b/runtime/datamate-python/app/module/annotation/interface/task.py @@ -9,10 +9,7 @@ from app.module.shared.schema import StandardResponse from app.module.dataset import DatasetManagementService from app.core.logging import get_logger from app.core.config import settings -from app.exception import NoDatasetInfoFoundError, DatasetMappingNotFoundError -from ..client import LabelStudioClient -from ..service.sync import SyncService from ..service.mapping import DatasetMappingService from ..schema import ( SyncDatasetRequest, @@ -40,15 +37,10 @@ async def sync_dataset_content( """ Sync Dataset Content (Files and Annotations) - 根据指定的mapping ID,同步DM程序数据集中的内容到Label Studio数据集中。 - 默认同时同步文件和标注数据。 + 内嵌编辑器模式:任务列表直接读取 DataMate 数据集文件,无需与 Label Studio Server 同步。 """ try: - ls_client = LabelStudioClient(base_url=settings.label_studio_base_url, - token=settings.label_studio_user_token) - dm_client = DatasetManagementService(db) mapping_service = DatasetMappingService(db) - sync_service = SyncService(dm_client, ls_client, mapping_service) logger.debug(f"Sync dataset content request: mapping_id={request.id}, sync_annotations={request.sync_annotations}") @@ -59,37 +51,21 @@ async def sync_dataset_content( status_code=404, detail=f"Mapping not found: {request.id}" ) - - # Sync dataset files - result = await sync_service.sync_dataset_files(request.id, request.batch_size) - - # Sync annotations if requested - if request.sync_annotations: - logger.info(f"Syncing annotations: direction={request.annotation_direction}") - - # 根据方向执行标注同步 - if request.annotation_direction == "ls_to_dm": - await sync_service.sync_annotations_from_ls_to_dm( - mapping, - request.batch_size, - request.overwrite - ) - elif request.annotation_direction == "dm_to_ls": - await sync_service.sync_annotations_from_dm_to_ls( - mapping, - request.batch_size, - request.overwrite_labeling_project - ) - elif request.annotation_direction == "bidirectional": - await sync_service.sync_annotations_bidirectional( - mapping, - request.batch_size, - request.overwrite, - request.overwrite_labeling_project - ) - - logger.info(f"Sync completed: {result.synced_files}/{result.total_files} files") - + + dm_client = DatasetManagementService(db) + dataset_info = await dm_client.get_dataset(mapping.dataset_id) + total_files = int(getattr(dataset_info, "fileCount", 0) or 0) if dataset_info else 0 + + result = SyncDatasetResponse( + id=mapping.id, + status="success", + synced_files=0, + total_files=total_files, + message="内嵌编辑器模式:任务列表直接读取数据集文件,无需同步(已忽略 syncAnnotations 等参数)", + ) + + logger.info(f"Embedded editor mode: sync is a no-op, mapping={mapping.id}, total_files={total_files}") + return StandardResponse( code=200, message="success", @@ -98,12 +74,6 @@ async def sync_dataset_content( except HTTPException: raise - except NoDatasetInfoFoundError as e: - logger.error(f"Failed to get dataset info: {e}") - raise HTTPException(status_code=404, detail=str(e)) - except DatasetMappingNotFoundError as e: - logger.error(f"Mapping not found: {e}") - raise HTTPException(status_code=404, detail=str(e)) except Exception as e: logger.error(f"Error syncing dataset content: {e}") raise HTTPException(status_code=500, detail="Internal server error") @@ -121,11 +91,7 @@ async def sync_annotations( 请求与响应由 Pydantic 模型 `SyncAnnotationsRequest` / `SyncAnnotationsResponse` 定义。 """ try: - ls_client = LabelStudioClient(base_url=settings.label_studio_base_url, - token=settings.label_studio_user_token) - dm_client = DatasetManagementService(db) mapping_service = DatasetMappingService(db) - sync_service = SyncService(dm_client, ls_client, mapping_service) logger.info(f"Sync annotations request: mapping_id={request.id}, direction={request.direction}, overwrite={request.overwrite}, overwrite_ls={request.overwrite_labeling_project}") @@ -136,35 +102,20 @@ async def sync_annotations( status_code=404, detail=f"Mapping not found: {request.id}" ) - - # 根据方向执行同步 - if request.direction == "ls_to_dm": - result = await sync_service.sync_annotations_from_ls_to_dm( - mapping, - request.batch_size, - request.overwrite - ) - elif request.direction == "dm_to_ls": - result = await sync_service.sync_annotations_from_dm_to_ls( - mapping, - request.batch_size, - request.overwrite_labeling_project - ) - elif request.direction == "bidirectional": - result = await sync_service.sync_annotations_bidirectional( - mapping, - request.batch_size, - request.overwrite, - request.overwrite_labeling_project - ) - else: - raise HTTPException( - status_code=400, - detail=f"Invalid direction: {request.direction}" - ) - - logger.info(f"Annotation sync completed: synced_to_dm={result.synced_to_dm}, synced_to_ls={result.synced_to_ls}, conflicts_resolved={result.conflicts_resolved}") - + + result = SyncAnnotationsResponse( + id=mapping.id, + status="success", + synced_to_dm=0, + synced_to_ls=0, + skipped=0, + failed=0, + conflicts_resolved=0, + message="内嵌编辑器模式:标注结果由 DataMate 直接存储,无需与 Label Studio 同步(该接口为兼容保留,当前为 no-op)", + ) + + logger.info(f"Embedded editor mode: annotation sync is a no-op, mapping={mapping.id}") + return StandardResponse( code=200, message="success", @@ -186,50 +137,14 @@ async def check_label_studio_connection(): 诊断 Label Studio 连接并返回简要连接信息(状态、base URL、token 摘要、项目统计)。 """ try: - ls_client = LabelStudioClient( - base_url=settings.label_studio_base_url, - token=settings.label_studio_user_token + return StandardResponse( + code=200, + message="success", + data={ + "status": "disabled", + "message": "当前为内嵌编辑器模式:不需要 Label Studio Server,该诊断接口已停用", + }, ) - - # 尝试获取项目列表来测试连接 - try: - response = await ls_client.client.get("/api/projects") - response.raise_for_status() - projects = response.json() - - token_display = settings.label_studio_user_token[:10] + "..." if settings.label_studio_user_token else "None" - - return StandardResponse( - code=200, - message="success", - data={ - "status": "connected", - "base_url": settings.label_studio_base_url, - "token": token_display, - "projects_count": len(projects.get("results", [])) if isinstance(projects, dict) else len(projects), - "message": "Successfully connected to Label Studio" - } - ) - except Exception as e: - token_display = settings.label_studio_user_token[:10] + "..." if settings.label_studio_user_token else "None" - - return StandardResponse( - code=500, - message="error", - data={ - "status": "disconnected", - "base_url": settings.label_studio_base_url, - "token": token_display, - "error": str(e), - "message": f"Failed to connect to Label Studio: {str(e)}", - "troubleshooting": [ - "1. Check if Label Studio is running: docker ps | grep label-studio", - "2. Verify LABEL_STUDIO_BASE_URL in .env file", - "3. Verify LABEL_STUDIO_USER_TOKEN is valid", - "4. Check network connectivity between services" - ] - } - ) except Exception as e: logger.error(f"Error checking Label Studio connection: {e}") raise HTTPException(status_code=500, detail=str(e))