You've already forked DataMate
refactor(annotation): 移除对 Label Studio Server 的依赖并切换到内嵌编辑器模式
- 移除 LabelStudioClient 和 SyncService 的导入及使用 - 删除与 Label Studio 项目的创建、删除和同步相关代码 - 修改创建数据集映射功能,改为创建 DataMate 标注项目 - 更新删除映射接口,仅进行软删除不再删除 Label Studio 项目 - 修改同步接口为兼容性保留,实际操作为空操作 - 移除 Label Studio 连接诊断功能 - 更新文档说明以反映内嵌编辑器模式的变化
This commit is contained in:
@@ -2,7 +2,7 @@ from typing import Optional
|
||||
import math
|
||||
import uuid
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, Query, Path, Response
|
||||
from fastapi import APIRouter, Depends, HTTPException, Query, Path
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from app.db.session import get_db
|
||||
@@ -10,11 +10,8 @@ from app.db.models import LabelingProject
|
||||
from app.module.shared.schema import StandardResponse, PaginatedData
|
||||
from app.module.dataset import DatasetManagementService
|
||||
from app.core.logging import get_logger
|
||||
from app.core.config import settings
|
||||
|
||||
from ..client import LabelStudioClient
|
||||
from ..service.mapping import DatasetMappingService
|
||||
from ..service.sync import SyncService
|
||||
from ..service.template import AnnotationTemplateService
|
||||
from ..schema import (
|
||||
DatasetMappingCreateRequest,
|
||||
@@ -30,37 +27,11 @@ router = APIRouter(
|
||||
logger = get_logger(__name__)
|
||||
|
||||
@router.get("/{mapping_id}/login")
|
||||
async def list_mappings(
|
||||
async def login_label_studio(
|
||||
mapping_id: str,
|
||||
db: AsyncSession = Depends(get_db)
|
||||
):
|
||||
try:
|
||||
ls_client = LabelStudioClient(base_url=settings.label_studio_base_url,
|
||||
token=settings.label_studio_user_token)
|
||||
target_response = await ls_client.login_label_studio()
|
||||
headers = dict(target_response.headers)
|
||||
set_cookies = target_response.headers.get_list("set-cookie")
|
||||
|
||||
# 删除合并的 Set-Cookie
|
||||
if "set-cookie" in headers:
|
||||
del headers["set-cookie"]
|
||||
|
||||
# 创建新响应,添加多个 Set-Cookie
|
||||
response = Response(
|
||||
content=target_response.content,
|
||||
status_code=target_response.status_code,
|
||||
headers=headers
|
||||
)
|
||||
|
||||
# 分别添加每个 Set-Cookie
|
||||
for cookie in set_cookies:
|
||||
response.headers.append("set-cookie", cookie)
|
||||
|
||||
return response
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Error while logining in LabelStudio: {e}", e)
|
||||
raise HTTPException(status_code=500, detail="Internal server error")
|
||||
raise HTTPException(status_code=410, detail="当前为内嵌编辑器模式,不再支持 Label Studio 登录代理接口")
|
||||
|
||||
@router.post("", response_model=StandardResponse[DatasetMappingCreateResponse], status_code=201)
|
||||
async def create_mapping(
|
||||
@@ -70,8 +41,7 @@ async def create_mapping(
|
||||
"""
|
||||
创建数据集映射
|
||||
|
||||
根据指定的DM程序中的数据集,创建Label Studio中的数据集,
|
||||
在数据库中记录这一关联关系,返回Label Studio数据集的ID
|
||||
在 DataMate 中创建标注项目(t_dm_labeling_projects),用于内嵌 Label Studio 编辑器。
|
||||
|
||||
注意:一个数据集可以创建多个标注项目
|
||||
|
||||
@@ -79,10 +49,7 @@ async def create_mapping(
|
||||
"""
|
||||
try:
|
||||
dm_client = DatasetManagementService(db)
|
||||
ls_client = LabelStudioClient(base_url=settings.label_studio_base_url,
|
||||
token=settings.label_studio_user_token)
|
||||
mapping_service = DatasetMappingService(db)
|
||||
sync_service = SyncService(dm_client, ls_client, mapping_service)
|
||||
template_service = AnnotationTemplateService()
|
||||
|
||||
logger.info(f"Create dataset mapping request: {request.dataset_id}")
|
||||
@@ -116,51 +83,28 @@ async def create_mapping(
|
||||
label_config = template.label_config
|
||||
logger.debug(f"Template label config loaded for template: {template.name}")
|
||||
|
||||
# 在Label Studio中创建项目
|
||||
project_data = await ls_client.create_project(
|
||||
title=project_name,
|
||||
description=project_description,
|
||||
label_config=label_config # 传递模板配置
|
||||
)
|
||||
# DataMate-only:不再创建/依赖 Label Studio Server 项目。
|
||||
# 为兼容既有 schema 字段(labeling_project_id 长度 8),生成一个 8 位数字 ID。
|
||||
labeling_project_id = str(uuid.uuid4().int % 10**8).zfill(8)
|
||||
|
||||
if not project_data:
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail="Fail to create Label Studio project."
|
||||
)
|
||||
|
||||
project_id = project_data["id"]
|
||||
|
||||
# 配置本地存储:dataset/<id>
|
||||
local_storage_path = f"{settings.label_studio_local_document_root}/{request.dataset_id}"
|
||||
storage_result = await ls_client.create_local_storage(
|
||||
project_id=project_id,
|
||||
path=local_storage_path,
|
||||
title="Dataset_BLOB",
|
||||
use_blob_urls=True,
|
||||
description=f"Local storage for dataset {dataset_info.name}"
|
||||
)
|
||||
|
||||
if not storage_result:
|
||||
# 本地存储配置失败,记录警告但不中断流程
|
||||
logger.warning(f"Failed to configure local storage for project {project_id}")
|
||||
else:
|
||||
logger.info(f"Local storage configured for project {project_id}: {local_storage_path}")
|
||||
project_configuration = {}
|
||||
if label_config:
|
||||
project_configuration["label_config"] = label_config
|
||||
if project_description:
|
||||
project_configuration["description"] = project_description
|
||||
|
||||
labeling_project = LabelingProject(
|
||||
id=str(uuid.uuid4()), # Generate UUID here
|
||||
dataset_id=request.dataset_id,
|
||||
labeling_project_id=str(project_id),
|
||||
labeling_project_id=labeling_project_id,
|
||||
name=project_name,
|
||||
template_id=request.template_id, # Save template_id to database
|
||||
configuration=project_configuration or None,
|
||||
)
|
||||
|
||||
# 创建映射关系,包含项目名称(先持久化映射以获得 mapping.id)
|
||||
mapping = await mapping_service.create_mapping(labeling_project)
|
||||
|
||||
# 进行一次同步,使用创建后的 mapping.id
|
||||
await sync_service.sync_dataset_files(mapping.id, 100)
|
||||
|
||||
response_data = DatasetMappingCreateResponse(
|
||||
id=mapping.id,
|
||||
labeling_project_id=str(mapping.labeling_project_id),
|
||||
@@ -347,19 +291,15 @@ async def delete_mapping(
|
||||
db: AsyncSession = Depends(get_db)
|
||||
):
|
||||
"""
|
||||
删除映射关系和对应的 Label Studio 项目
|
||||
删除映射关系(软删除)
|
||||
|
||||
通过 path 参数 `project_id` 指定要删除的映射(映射的 UUID)。
|
||||
|
||||
此操作会:
|
||||
1. 删除 Label Studio 中的项目
|
||||
2. 软删除数据库中的映射记录
|
||||
内嵌编辑器模式下仅软删除 DataMate 标注项目记录,不再删除/依赖 Label Studio Server 项目。
|
||||
"""
|
||||
try:
|
||||
logger.debug(f"Delete mapping request received: project_id={project_id!r}")
|
||||
|
||||
ls_client = LabelStudioClient(base_url=settings.label_studio_base_url,
|
||||
token=settings.label_studio_user_token)
|
||||
service = DatasetMappingService(db)
|
||||
|
||||
# 使用 mapping UUID 查询映射记录
|
||||
@@ -375,23 +315,9 @@ async def delete_mapping(
|
||||
)
|
||||
|
||||
id = mapping.id
|
||||
labeling_project_id = mapping.labeling_project_id
|
||||
logger.debug(f"Found mapping: {id}")
|
||||
|
||||
logger.debug(f"Found mapping: {id}, Label Studio project ID: {labeling_project_id}")
|
||||
|
||||
# 1. 删除 Label Studio 项目
|
||||
try:
|
||||
logger.debug(f"Deleting Label Studio project: {labeling_project_id}")
|
||||
delete_success = await ls_client.delete_project(int(labeling_project_id))
|
||||
if delete_success:
|
||||
logger.debug(f"Successfully deleted Label Studio project: {labeling_project_id}")
|
||||
else:
|
||||
logger.warning(f"Failed to delete Label Studio project or project not found: {labeling_project_id}")
|
||||
except Exception as e:
|
||||
logger.error(f"Error deleting Label Studio project: {e}")
|
||||
# 继续执行,即使 Label Studio 项目删除失败也要删除映射记录
|
||||
|
||||
# 2. 软删除映射记录
|
||||
# 软删除映射记录
|
||||
soft_delete_success = await service.soft_delete_mapping(id)
|
||||
logger.debug(f"Soft delete result for mapping {id}: {soft_delete_success}")
|
||||
|
||||
@@ -401,7 +327,7 @@ async def delete_mapping(
|
||||
detail="Failed to delete mapping record"
|
||||
)
|
||||
|
||||
logger.info(f"Successfully deleted mapping: {id}, Label Studio project: {labeling_project_id}")
|
||||
logger.info(f"Successfully deleted mapping: {id}")
|
||||
|
||||
return StandardResponse(
|
||||
code=200,
|
||||
|
||||
@@ -9,10 +9,7 @@ from app.module.shared.schema import StandardResponse
|
||||
from app.module.dataset import DatasetManagementService
|
||||
from app.core.logging import get_logger
|
||||
from app.core.config import settings
|
||||
from app.exception import NoDatasetInfoFoundError, DatasetMappingNotFoundError
|
||||
|
||||
from ..client import LabelStudioClient
|
||||
from ..service.sync import SyncService
|
||||
from ..service.mapping import DatasetMappingService
|
||||
from ..schema import (
|
||||
SyncDatasetRequest,
|
||||
@@ -40,15 +37,10 @@ async def sync_dataset_content(
|
||||
"""
|
||||
Sync Dataset Content (Files and Annotations)
|
||||
|
||||
根据指定的mapping ID,同步DM程序数据集中的内容到Label Studio数据集中。
|
||||
默认同时同步文件和标注数据。
|
||||
内嵌编辑器模式:任务列表直接读取 DataMate 数据集文件,无需与 Label Studio Server 同步。
|
||||
"""
|
||||
try:
|
||||
ls_client = LabelStudioClient(base_url=settings.label_studio_base_url,
|
||||
token=settings.label_studio_user_token)
|
||||
dm_client = DatasetManagementService(db)
|
||||
mapping_service = DatasetMappingService(db)
|
||||
sync_service = SyncService(dm_client, ls_client, mapping_service)
|
||||
|
||||
logger.debug(f"Sync dataset content request: mapping_id={request.id}, sync_annotations={request.sync_annotations}")
|
||||
|
||||
@@ -59,37 +51,21 @@ async def sync_dataset_content(
|
||||
status_code=404,
|
||||
detail=f"Mapping not found: {request.id}"
|
||||
)
|
||||
|
||||
# Sync dataset files
|
||||
result = await sync_service.sync_dataset_files(request.id, request.batch_size)
|
||||
|
||||
# Sync annotations if requested
|
||||
if request.sync_annotations:
|
||||
logger.info(f"Syncing annotations: direction={request.annotation_direction}")
|
||||
|
||||
# 根据方向执行标注同步
|
||||
if request.annotation_direction == "ls_to_dm":
|
||||
await sync_service.sync_annotations_from_ls_to_dm(
|
||||
mapping,
|
||||
request.batch_size,
|
||||
request.overwrite
|
||||
)
|
||||
elif request.annotation_direction == "dm_to_ls":
|
||||
await sync_service.sync_annotations_from_dm_to_ls(
|
||||
mapping,
|
||||
request.batch_size,
|
||||
request.overwrite_labeling_project
|
||||
)
|
||||
elif request.annotation_direction == "bidirectional":
|
||||
await sync_service.sync_annotations_bidirectional(
|
||||
mapping,
|
||||
request.batch_size,
|
||||
request.overwrite,
|
||||
request.overwrite_labeling_project
|
||||
)
|
||||
|
||||
logger.info(f"Sync completed: {result.synced_files}/{result.total_files} files")
|
||||
|
||||
|
||||
dm_client = DatasetManagementService(db)
|
||||
dataset_info = await dm_client.get_dataset(mapping.dataset_id)
|
||||
total_files = int(getattr(dataset_info, "fileCount", 0) or 0) if dataset_info else 0
|
||||
|
||||
result = SyncDatasetResponse(
|
||||
id=mapping.id,
|
||||
status="success",
|
||||
synced_files=0,
|
||||
total_files=total_files,
|
||||
message="内嵌编辑器模式:任务列表直接读取数据集文件,无需同步(已忽略 syncAnnotations 等参数)",
|
||||
)
|
||||
|
||||
logger.info(f"Embedded editor mode: sync is a no-op, mapping={mapping.id}, total_files={total_files}")
|
||||
|
||||
return StandardResponse(
|
||||
code=200,
|
||||
message="success",
|
||||
@@ -98,12 +74,6 @@ async def sync_dataset_content(
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
except NoDatasetInfoFoundError as e:
|
||||
logger.error(f"Failed to get dataset info: {e}")
|
||||
raise HTTPException(status_code=404, detail=str(e))
|
||||
except DatasetMappingNotFoundError as e:
|
||||
logger.error(f"Mapping not found: {e}")
|
||||
raise HTTPException(status_code=404, detail=str(e))
|
||||
except Exception as e:
|
||||
logger.error(f"Error syncing dataset content: {e}")
|
||||
raise HTTPException(status_code=500, detail="Internal server error")
|
||||
@@ -121,11 +91,7 @@ async def sync_annotations(
|
||||
请求与响应由 Pydantic 模型 `SyncAnnotationsRequest` / `SyncAnnotationsResponse` 定义。
|
||||
"""
|
||||
try:
|
||||
ls_client = LabelStudioClient(base_url=settings.label_studio_base_url,
|
||||
token=settings.label_studio_user_token)
|
||||
dm_client = DatasetManagementService(db)
|
||||
mapping_service = DatasetMappingService(db)
|
||||
sync_service = SyncService(dm_client, ls_client, mapping_service)
|
||||
|
||||
logger.info(f"Sync annotations request: mapping_id={request.id}, direction={request.direction}, overwrite={request.overwrite}, overwrite_ls={request.overwrite_labeling_project}")
|
||||
|
||||
@@ -136,35 +102,20 @@ async def sync_annotations(
|
||||
status_code=404,
|
||||
detail=f"Mapping not found: {request.id}"
|
||||
)
|
||||
|
||||
# 根据方向执行同步
|
||||
if request.direction == "ls_to_dm":
|
||||
result = await sync_service.sync_annotations_from_ls_to_dm(
|
||||
mapping,
|
||||
request.batch_size,
|
||||
request.overwrite
|
||||
)
|
||||
elif request.direction == "dm_to_ls":
|
||||
result = await sync_service.sync_annotations_from_dm_to_ls(
|
||||
mapping,
|
||||
request.batch_size,
|
||||
request.overwrite_labeling_project
|
||||
)
|
||||
elif request.direction == "bidirectional":
|
||||
result = await sync_service.sync_annotations_bidirectional(
|
||||
mapping,
|
||||
request.batch_size,
|
||||
request.overwrite,
|
||||
request.overwrite_labeling_project
|
||||
)
|
||||
else:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=f"Invalid direction: {request.direction}"
|
||||
)
|
||||
|
||||
logger.info(f"Annotation sync completed: synced_to_dm={result.synced_to_dm}, synced_to_ls={result.synced_to_ls}, conflicts_resolved={result.conflicts_resolved}")
|
||||
|
||||
|
||||
result = SyncAnnotationsResponse(
|
||||
id=mapping.id,
|
||||
status="success",
|
||||
synced_to_dm=0,
|
||||
synced_to_ls=0,
|
||||
skipped=0,
|
||||
failed=0,
|
||||
conflicts_resolved=0,
|
||||
message="内嵌编辑器模式:标注结果由 DataMate 直接存储,无需与 Label Studio 同步(该接口为兼容保留,当前为 no-op)",
|
||||
)
|
||||
|
||||
logger.info(f"Embedded editor mode: annotation sync is a no-op, mapping={mapping.id}")
|
||||
|
||||
return StandardResponse(
|
||||
code=200,
|
||||
message="success",
|
||||
@@ -186,50 +137,14 @@ async def check_label_studio_connection():
|
||||
诊断 Label Studio 连接并返回简要连接信息(状态、base URL、token 摘要、项目统计)。
|
||||
"""
|
||||
try:
|
||||
ls_client = LabelStudioClient(
|
||||
base_url=settings.label_studio_base_url,
|
||||
token=settings.label_studio_user_token
|
||||
return StandardResponse(
|
||||
code=200,
|
||||
message="success",
|
||||
data={
|
||||
"status": "disabled",
|
||||
"message": "当前为内嵌编辑器模式:不需要 Label Studio Server,该诊断接口已停用",
|
||||
},
|
||||
)
|
||||
|
||||
# 尝试获取项目列表来测试连接
|
||||
try:
|
||||
response = await ls_client.client.get("/api/projects")
|
||||
response.raise_for_status()
|
||||
projects = response.json()
|
||||
|
||||
token_display = settings.label_studio_user_token[:10] + "..." if settings.label_studio_user_token else "None"
|
||||
|
||||
return StandardResponse(
|
||||
code=200,
|
||||
message="success",
|
||||
data={
|
||||
"status": "connected",
|
||||
"base_url": settings.label_studio_base_url,
|
||||
"token": token_display,
|
||||
"projects_count": len(projects.get("results", [])) if isinstance(projects, dict) else len(projects),
|
||||
"message": "Successfully connected to Label Studio"
|
||||
}
|
||||
)
|
||||
except Exception as e:
|
||||
token_display = settings.label_studio_user_token[:10] + "..." if settings.label_studio_user_token else "None"
|
||||
|
||||
return StandardResponse(
|
||||
code=500,
|
||||
message="error",
|
||||
data={
|
||||
"status": "disconnected",
|
||||
"base_url": settings.label_studio_base_url,
|
||||
"token": token_display,
|
||||
"error": str(e),
|
||||
"message": f"Failed to connect to Label Studio: {str(e)}",
|
||||
"troubleshooting": [
|
||||
"1. Check if Label Studio is running: docker ps | grep label-studio",
|
||||
"2. Verify LABEL_STUDIO_BASE_URL in .env file",
|
||||
"3. Verify LABEL_STUDIO_USER_TOKEN is valid",
|
||||
"4. Check network connectivity between services"
|
||||
]
|
||||
}
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Error checking Label Studio connection: {e}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
Reference in New Issue
Block a user