You've already forked DataMate
- 新增 LabelingProjectFile 模型用于存储标注项目的文件快照 - 在创建标注项目时记录关联的文件快照数据 - 更新查询逻辑以基于项目快照过滤文件列表 - 优化导出统计功能使用快照数据进行计算 - 添加数据库表结构支持项目文件快照关系
374 lines
13 KiB
Python
374 lines
13 KiB
Python
from typing import Optional
|
|
import math
|
|
import uuid
|
|
|
|
from fastapi import APIRouter, Depends, HTTPException, Query, Path
|
|
from sqlalchemy import select
|
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
|
|
from app.db.session import get_db
|
|
from app.db.models import LabelingProject, DatasetFiles
|
|
from app.module.shared.schema import StandardResponse, PaginatedData
|
|
from app.module.dataset import DatasetManagementService
|
|
from app.core.logging import get_logger
|
|
|
|
from app.module.annotation.service.editor import AnnotationEditorService
|
|
from ..service.mapping import DatasetMappingService
|
|
from ..service.template import AnnotationTemplateService
|
|
from ..schema import (
|
|
DatasetMappingCreateRequest,
|
|
DatasetMappingCreateResponse,
|
|
DeleteDatasetResponse,
|
|
DatasetMappingResponse,
|
|
)
|
|
|
|
router = APIRouter(
|
|
prefix="/project",
|
|
tags=["annotation/project"]
|
|
)
|
|
logger = get_logger(__name__)
|
|
TEXT_DATASET_TYPE = "TEXT"
|
|
|
|
@router.get("/{mapping_id}/login")
|
|
async def login_label_studio(
|
|
mapping_id: str,
|
|
db: AsyncSession = Depends(get_db)
|
|
):
|
|
raise HTTPException(status_code=410, detail="当前为内嵌编辑器模式,不再支持 Label Studio 登录代理接口")
|
|
|
|
@router.post("", response_model=StandardResponse[DatasetMappingCreateResponse], status_code=201)
|
|
async def create_mapping(
|
|
request: DatasetMappingCreateRequest,
|
|
db: AsyncSession = Depends(get_db)
|
|
):
|
|
"""
|
|
创建数据集映射
|
|
|
|
在 DataMate 中创建标注项目(t_dm_labeling_projects),用于内嵌 Label Studio 编辑器。
|
|
|
|
注意:一个数据集可以创建多个标注项目
|
|
|
|
支持通过 template_id 指定标注模板,如果提供了模板ID,则使用模板的配置
|
|
"""
|
|
try:
|
|
dm_client = DatasetManagementService(db)
|
|
mapping_service = DatasetMappingService(db)
|
|
template_service = AnnotationTemplateService()
|
|
|
|
logger.info(f"Create dataset mapping request: {request.dataset_id}")
|
|
|
|
# 从DM服务获取数据集信息
|
|
dataset_info = await dm_client.get_dataset(request.dataset_id)
|
|
if not dataset_info:
|
|
raise HTTPException(
|
|
status_code=404,
|
|
detail=f"Dataset not found in DM service: {request.dataset_id}"
|
|
)
|
|
|
|
dataset_type = (
|
|
getattr(dataset_info, "datasetType", None)
|
|
or getattr(dataset_info, "dataset_type", None)
|
|
or ""
|
|
).upper()
|
|
|
|
project_name = request.name or \
|
|
dataset_info.name or \
|
|
"A new project from DataMate"
|
|
|
|
project_description = request.description or \
|
|
dataset_info.description or \
|
|
f"Imported from DM dataset {dataset_info.name} ({dataset_info.id})"
|
|
|
|
# 如果提供了模板ID,获取模板配置
|
|
label_config = None
|
|
if request.template_id:
|
|
logger.info(f"Using template: {request.template_id}")
|
|
template = await template_service.get_template(db, request.template_id)
|
|
if not template:
|
|
raise HTTPException(
|
|
status_code=404,
|
|
detail=f"Template not found: {request.template_id}"
|
|
)
|
|
label_config = template.label_config
|
|
logger.debug(f"Template label config loaded for template: {template.name}")
|
|
|
|
# 如果直接提供了 label_config (自定义或修改后的),则覆盖模板配置
|
|
if request.label_config:
|
|
label_config = request.label_config
|
|
logger.debug("Using custom label config from request")
|
|
|
|
# DataMate-only:不再创建/依赖 Label Studio Server 项目。
|
|
# 为兼容既有 schema 字段(labeling_project_id 长度 8),生成一个 8 位数字 ID。
|
|
labeling_project_id = str(uuid.uuid4().int % 10**8).zfill(8)
|
|
|
|
project_configuration = {}
|
|
if label_config:
|
|
project_configuration["label_config"] = label_config
|
|
if project_description:
|
|
project_configuration["description"] = project_description
|
|
if dataset_type == TEXT_DATASET_TYPE and request.segmentation_enabled is not None:
|
|
project_configuration["segmentation_enabled"] = bool(request.segmentation_enabled)
|
|
|
|
labeling_project = LabelingProject(
|
|
id=str(uuid.uuid4()), # Generate UUID here
|
|
dataset_id=request.dataset_id,
|
|
labeling_project_id=labeling_project_id,
|
|
name=project_name,
|
|
template_id=request.template_id, # Save template_id to database
|
|
configuration=project_configuration or None,
|
|
)
|
|
|
|
file_query = select(DatasetFiles.id).where(
|
|
DatasetFiles.dataset_id == request.dataset_id
|
|
)
|
|
if dataset_type == TEXT_DATASET_TYPE:
|
|
file_query = file_query.where(
|
|
~AnnotationEditorService._build_source_document_filter()
|
|
)
|
|
file_result = await db.execute(file_query)
|
|
snapshot_file_ids = [str(fid) for fid in file_result.scalars().all()]
|
|
|
|
# 创建映射关系并写入快照
|
|
mapping = await mapping_service.create_mapping_with_snapshot(
|
|
labeling_project, snapshot_file_ids
|
|
)
|
|
|
|
response_data = DatasetMappingCreateResponse(
|
|
id=mapping.id,
|
|
labeling_project_id=str(mapping.labeling_project_id),
|
|
labeling_project_name=mapping.name or project_name
|
|
)
|
|
|
|
return StandardResponse(
|
|
code=201,
|
|
message="success",
|
|
data=response_data
|
|
)
|
|
|
|
except HTTPException:
|
|
raise
|
|
except Exception as e:
|
|
logger.error(f"Error while creating dataset mapping: {e}")
|
|
raise HTTPException(status_code=500, detail="Internal server error")
|
|
|
|
@router.get("", response_model=StandardResponse[PaginatedData[DatasetMappingResponse]])
|
|
async def list_mappings(
|
|
page: int = Query(1, ge=1, description="页码(从1开始)"),
|
|
size: int = Query(20, ge=1, le=100, description="每页记录数"),
|
|
include_template: bool = Query(False, description="是否包含模板详情", alias="includeTemplate"),
|
|
db: AsyncSession = Depends(get_db)
|
|
):
|
|
"""
|
|
查询所有映射关系(分页)
|
|
|
|
返回所有有效的数据集映射关系(未被软删除的),支持分页查询。
|
|
可选择是否包含完整的标注模板信息(默认不包含,以提高列表查询性能)。
|
|
|
|
参数:
|
|
- page: 页码(从1开始)
|
|
- size: 每页记录数
|
|
- includeTemplate: 是否包含模板详情(默认false)
|
|
"""
|
|
try:
|
|
service = DatasetMappingService(db)
|
|
|
|
# 计算 skip
|
|
skip = (page - 1) * size
|
|
|
|
logger.info(f"List mappings: page={page}, size={size}, include_template={include_template}")
|
|
|
|
# 获取数据和总数
|
|
mappings, total = await service.get_all_mappings_with_count(
|
|
skip=skip,
|
|
limit=size,
|
|
include_deleted=False,
|
|
include_template=include_template
|
|
)
|
|
|
|
# 计算总页数
|
|
total_pages = math.ceil(total / size) if total > 0 else 0
|
|
|
|
# 构造分页响应
|
|
paginated_data = PaginatedData(
|
|
page=page,
|
|
size=size,
|
|
total_elements=total,
|
|
total_pages=total_pages,
|
|
content=mappings
|
|
)
|
|
|
|
logger.info(f"List mappings: page={page}, returned {len(mappings)}/{total}, templates_included: {include_template}")
|
|
|
|
return StandardResponse(
|
|
code=200,
|
|
message="success",
|
|
data=paginated_data
|
|
)
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error listing mappings: {e}")
|
|
raise HTTPException(status_code=500, detail="Internal server error")
|
|
|
|
@router.get("/{mapping_id}", response_model=StandardResponse[DatasetMappingResponse])
|
|
async def get_mapping(
|
|
mapping_id: str,
|
|
db: AsyncSession = Depends(get_db)
|
|
):
|
|
"""
|
|
根据 UUID 查询单个映射关系(包含关联的标注模板详情)
|
|
|
|
返回数据集映射关系以及关联的完整标注模板信息,包括:
|
|
- 映射基本信息
|
|
- 数据集信息
|
|
- Label Studio 项目信息
|
|
- 完整的标注模板配置(如果存在)
|
|
"""
|
|
try:
|
|
service = DatasetMappingService(db)
|
|
|
|
logger.info(f"Get mapping with template details: {mapping_id}")
|
|
|
|
# 获取映射,并包含完整的模板信息
|
|
mapping = await service.get_mapping_by_uuid(mapping_id, include_template=True)
|
|
|
|
if not mapping:
|
|
raise HTTPException(
|
|
status_code=404,
|
|
detail=f"Mapping not found: {mapping_id}"
|
|
)
|
|
|
|
logger.info(f"Found mapping: {mapping.id}, template_included: {mapping.template is not None}")
|
|
|
|
return StandardResponse(
|
|
code=200,
|
|
message="success",
|
|
data=mapping
|
|
)
|
|
|
|
except HTTPException:
|
|
raise
|
|
except Exception as e:
|
|
logger.error(f"Error getting mapping: {e}")
|
|
raise HTTPException(status_code=500, detail="Internal server error")
|
|
|
|
@router.get("/by-source/{dataset_id}", response_model=StandardResponse[PaginatedData[DatasetMappingResponse]])
|
|
async def get_mappings_by_source(
|
|
dataset_id: str,
|
|
page: int = Query(1, ge=1, description="页码(从1开始)"),
|
|
size: int = Query(20, ge=1, le=100, description="每页记录数"),
|
|
include_template: bool = Query(True, description="是否包含模板详情", alias="includeTemplate"),
|
|
db: AsyncSession = Depends(get_db)
|
|
):
|
|
"""
|
|
根据源数据集 ID 查询所有映射关系(分页,包含模板详情)
|
|
|
|
返回该数据集创建的所有标注项目(不包括已删除的),支持分页查询。
|
|
默认包含关联的完整标注模板信息。
|
|
|
|
参数:
|
|
- dataset_id: 数据集ID
|
|
- page: 页码(从1开始)
|
|
- size: 每页记录数
|
|
- includeTemplate: 是否包含模板详情(默认true)
|
|
"""
|
|
try:
|
|
service = DatasetMappingService(db)
|
|
|
|
# 计算 skip
|
|
skip = (page - 1) * size
|
|
|
|
logger.info(f"Get mappings by source dataset id: {dataset_id}, page={page}, size={size}, include_template={include_template}")
|
|
|
|
# 获取数据和总数(包含模板信息)
|
|
mappings, total = await service.get_mappings_by_source_with_count(
|
|
dataset_id=dataset_id,
|
|
skip=skip,
|
|
limit=size,
|
|
include_template=include_template
|
|
)
|
|
|
|
# 计算总页数
|
|
total_pages = math.ceil(total / size) if total > 0 else 0
|
|
|
|
# 构造分页响应
|
|
paginated_data = PaginatedData(
|
|
page=page,
|
|
size=size,
|
|
total_elements=total,
|
|
total_pages=total_pages,
|
|
content=mappings
|
|
)
|
|
|
|
logger.info(f"Found {len(mappings)} mappings on page {page}, total: {total}, templates_included: {include_template}")
|
|
|
|
return StandardResponse(
|
|
code=200,
|
|
message="success",
|
|
data=paginated_data
|
|
)
|
|
|
|
except HTTPException:
|
|
raise
|
|
except Exception as e:
|
|
logger.error(f"Error getting mappings: {e}")
|
|
raise HTTPException(status_code=500, detail="Internal server error")
|
|
|
|
@router.delete("/{project_id}", response_model=StandardResponse[DeleteDatasetResponse])
|
|
async def delete_mapping(
|
|
project_id: str = Path(..., description="映射UUID(path param)"),
|
|
db: AsyncSession = Depends(get_db)
|
|
):
|
|
"""
|
|
删除映射关系(软删除)
|
|
|
|
通过 path 参数 `project_id` 指定要删除的映射(映射的 UUID)。
|
|
|
|
内嵌编辑器模式下仅软删除 DataMate 标注项目记录,不再删除/依赖 Label Studio Server 项目。
|
|
"""
|
|
try:
|
|
logger.debug(f"Delete mapping request received: project_id={project_id!r}")
|
|
|
|
service = DatasetMappingService(db)
|
|
|
|
# 使用 mapping UUID 查询映射记录
|
|
logger.debug(f"Deleting by mapping UUID: {project_id}")
|
|
mapping = await service.get_mapping_by_uuid(project_id)
|
|
|
|
logger.debug(f"Mapping lookup result: {mapping}")
|
|
|
|
if not mapping:
|
|
raise HTTPException(
|
|
status_code=404,
|
|
detail=f"Mapping either not found or not specified."
|
|
)
|
|
|
|
id = mapping.id
|
|
logger.debug(f"Found mapping: {id}")
|
|
|
|
# 软删除映射记录
|
|
soft_delete_success = await service.soft_delete_mapping(id)
|
|
logger.debug(f"Soft delete result for mapping {id}: {soft_delete_success}")
|
|
|
|
if not soft_delete_success:
|
|
raise HTTPException(
|
|
status_code=500,
|
|
detail="Failed to delete mapping record"
|
|
)
|
|
|
|
logger.info(f"Successfully deleted mapping: {id}")
|
|
|
|
return StandardResponse(
|
|
code=200,
|
|
message="success",
|
|
data=DeleteDatasetResponse(
|
|
id=id,
|
|
status="success"
|
|
)
|
|
)
|
|
|
|
except HTTPException:
|
|
raise
|
|
except Exception as e:
|
|
logger.error(f"Error deleting mapping: {e}")
|
|
raise HTTPException(status_code=500, detail="Internal server error")
|