From 36b410ba7b319468109122bc390178677a1c4105 Mon Sep 17 00:00:00 2001 From: Jerry Yan <792602257@qq.com> Date: Sat, 7 Feb 2026 14:13:11 +0800 Subject: [PATCH] =?UTF-8?q?feat(annotation):=20=E6=B7=BB=E5=8A=A0=E5=AF=BC?= =?UTF-8?q?=E5=87=BA=E6=A0=BC=E5=BC=8F=E4=B8=8E=E6=95=B0=E6=8D=AE=E9=9B=86?= =?UTF-8?q?=E7=B1=BB=E5=9E=8B=E7=9A=84=E5=85=BC=E5=AE=B9=E6=80=A7=E6=A3=80?= =?UTF-8?q?=E6=9F=A5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 实现 COCO 格式导出前的数据集类型验证 - COCO 格式仅适用于图像类和目标检测类数据集 - 文本类数据集尝试导出 COCO 格式时返回 HTTP 400 错误 - 添加清晰的错误提示信息,建议使用其他格式 新增功能: - 数据集类型常量定义(TEXT、IMAGE、OBJECT_DETECTION) - COCO 兼容类型集合 - 类型值标准化方法 - 数据集类型查询方法 - 模板标注类型解析方法 - 导出格式兼容性验证方法 相关文件: - runtime/datamate-python/app/module/annotation/service/export.py (+94, -7) Reviewed-by: Codex AI --- .../app/module/annotation/service/export.py | 101 ++++++++++++++++-- 1 file changed, 94 insertions(+), 7 deletions(-) diff --git a/runtime/datamate-python/app/module/annotation/service/export.py b/runtime/datamate-python/app/module/annotation/service/export.py index 0b502b0..c345eec 100644 --- a/runtime/datamate-python/app/module/annotation/service/export.py +++ b/runtime/datamate-python/app/module/annotation/service/export.py @@ -28,6 +28,7 @@ from sqlalchemy.ext.asyncio import AsyncSession from app.core.logging import get_logger from app.db.models import ( AnnotationResult, + AnnotationTemplate, Dataset, DatasetFiles, LabelingProject, @@ -78,6 +79,15 @@ SEGMENTS_KEY = "segments" SEGMENT_RESULT_KEY = "result" SEGMENT_INDEX_KEY = "segmentIndex" SEGMENT_INDEX_FALLBACK_KEY = "segment_index" +DATASET_TYPE_TEXT = "TEXT" +DATASET_TYPE_IMAGE = "IMAGE" +DATASET_TYPE_OBJECT_DETECTION = "OBJECT_DETECTION" +LABELING_TYPE_CONFIG_KEY = "labeling_type" +LABELING_TYPE_OBJECT_DETECTION = "OBJECT_DETECTION" +COCO_COMPATIBLE_DATASET_TYPES = { + DATASET_TYPE_IMAGE, + DATASET_TYPE_OBJECT_DETECTION, +} class AnnotationExportService: @@ -137,6 +147,19 @@ class AnnotationExportService: """ project = await self._get_project_or_404(project_id) + # 根据格式导出 + format_type = ( + ExportFormat(request.format) + if isinstance(request.format, str) + else request.format + ) + + # 兼容性检查 + await self._validate_export_format_compatibility( + project=project, + format_type=format_type, + ) + # 获取标注数据 items = await self._fetch_annotation_data( project_id=project_id, @@ -145,13 +168,6 @@ class AnnotationExportService: include_data=request.include_data, ) - # 根据格式导出 - format_type = ( - ExportFormat(request.format) - if isinstance(request.format, str) - else request.format - ) - if format_type == ExportFormat.JSON: return self._export_json(items, project.name) elif format_type == ExportFormat.JSONL: @@ -180,6 +196,77 @@ class AnnotationExportService: raise HTTPException(status_code=404, detail=f"标注项目不存在: {project_id}") return project + @staticmethod + def _normalize_type_value(value: Optional[str]) -> str: + if not value: + return "" + return str(value).strip().upper().replace("-", "_") + + async def _get_dataset_type(self, dataset_id: str) -> Optional[str]: + result = await self.db.execute( + select(Dataset.dataset_type).where(Dataset.id == dataset_id) + ) + return result.scalar_one_or_none() + + async def _get_template_labeling_type( + self, template_id: Optional[str] + ) -> Optional[str]: + if not template_id: + return None + result = await self.db.execute( + select(AnnotationTemplate.labeling_type).where( + AnnotationTemplate.id == template_id, + AnnotationTemplate.deleted_at.is_(None), + ) + ) + return result.scalar_one_or_none() + + async def _resolve_project_labeling_type( + self, project: LabelingProject + ) -> Optional[str]: + configuration = getattr(project, "configuration", None) + if isinstance(configuration, dict): + labeling_type = configuration.get(LABELING_TYPE_CONFIG_KEY) + if isinstance(labeling_type, str) and labeling_type.strip(): + return labeling_type + return await self._get_template_labeling_type(project.template_id) + + async def _validate_export_format_compatibility( + self, + project: LabelingProject, + format_type: ExportFormat, + ) -> None: + if format_type != ExportFormat.COCO: + return + + dataset_type = self._normalize_type_value( + await self._get_dataset_type(project.dataset_id) + ) + labeling_type = self._normalize_type_value( + await self._resolve_project_labeling_type(project) + ) + + if dataset_type == DATASET_TYPE_TEXT: + raise HTTPException( + status_code=400, + detail="导出格式 COCO 不支持文本类数据集(TEXT),请改用 JSON/JSONL/CSV 格式", + ) + + if ( + dataset_type in COCO_COMPATIBLE_DATASET_TYPES + or labeling_type == LABELING_TYPE_OBJECT_DETECTION + ): + return + + raise HTTPException( + status_code=400, + detail=( + "导出格式 COCO 仅适用于图像类或目标检测类数据集," + f"当前数据集类型: {dataset_type or 'UNKNOWN'}," + f"标注类型: {labeling_type or 'UNKNOWN'}" + ), + ) + async def _fetch_annotation_data( self, project_id: str,