Compare commits

..

2 Commits

Author SHA1 Message Date
3dd4035005 feat: 完善数据标注导出格式兼容性验证
- 后端:添加 YOLO 格式对 TEXT 数据集的限制验证
- 后端:统一 COCO/YOLO 兼容性校验规则(仅允许图像类或目标检测类数据集)
- 后端:修复 datasetType 字段传递,在任务列表响应中补充 dataset_type
- 前端:在导出对话框中禁用 TEXT 数据集的 COCO/YOLO 选项
- 前端:添加 datasetType 和 labelingType 字段传递
- 前端:对齐前后端 COCO/YOLO 兼容性规则
- 前端:优化提示文案,明确说明格式适用范围

修改文件:
- runtime/datamate-python/app/module/annotation/service/export.py
- runtime/datamate-python/app/module/annotation/service/mapping.py
- runtime/datamate-python/app/module/annotation/schema/mapping.py
- frontend/src/pages/DataAnnotation/Home/ExportAnnotationDialog.tsx
- frontend/src/pages/DataAnnotation/Home/DataAnnotation.tsx
- frontend/src/pages/DataAnnotation/annotation.const.tsx
2026-02-07 16:05:57 +08:00
36b410ba7b feat(annotation): 添加导出格式与数据集类型的兼容性检查
- 实现 COCO 格式导出前的数据集类型验证
- COCO 格式仅适用于图像类和目标检测类数据集
- 文本类数据集尝试导出 COCO 格式时返回 HTTP 400 错误
- 添加清晰的错误提示信息,建议使用其他格式

新增功能:
- 数据集类型常量定义(TEXT、IMAGE、OBJECT_DETECTION)
- COCO 兼容类型集合
- 类型值标准化方法
- 数据集类型查询方法
- 模板标注类型解析方法
- 导出格式兼容性验证方法

相关文件:
- runtime/datamate-python/app/module/annotation/service/export.py (+94, -7)

Reviewed-by: Codex AI
2026-02-07 16:05:57 +08:00
6 changed files with 162 additions and 21 deletions

View File

@@ -378,6 +378,8 @@ export default function DataAnnotation() {
open={!!exportTask}
projectId={exportTask?.id || ""}
projectName={exportTask?.name || ""}
datasetType={exportTask?.datasetType}
labelingType={exportTask?.labelingType}
onClose={() => setExportTask(null)}
/>
</div>

View File

@@ -11,7 +11,7 @@ import {
message,
Alert,
} from "antd";
import { FileTextOutlined, CheckCircleOutlined } from "@ant-design/icons";
import { FileTextOutlined, CheckCircleOutlined, InfoCircleOutlined } from "@ant-design/icons";
import {
getExportStatsUsingGet,
downloadAnnotationsUsingGet,
@@ -22,6 +22,8 @@ interface ExportAnnotationDialogProps {
open: boolean;
projectId: string;
projectName: string;
datasetType?: string;
labelingType?: string;
onClose: () => void;
}
@@ -57,6 +59,8 @@ export default function ExportAnnotationDialog({
open,
projectId,
projectName,
datasetType,
labelingType,
onClose,
}: ExportAnnotationDialogProps) {
const [form] = Form.useForm();
@@ -67,6 +71,15 @@ export default function ExportAnnotationDialog({
annotatedFiles: number;
} | null>(null);
const normalizedDatasetType = datasetType?.toUpperCase().replace(/-/g, "_") || "";
const normalizedLabelingType = labelingType?.toUpperCase().replace(/-/g, "_") || "";
const hasDatasetType = Boolean(normalizedDatasetType);
const isTextDataset = normalizedDatasetType === "TEXT";
const isDetectionCompatible =
normalizedDatasetType === "IMAGE" ||
normalizedDatasetType === "OBJECT_DETECTION" ||
normalizedLabelingType === "OBJECT_DETECTION";
// 加载导出统计信息
useEffect(() => {
if (open && projectId) {
@@ -176,20 +189,46 @@ export default function ExportAnnotationDialog({
rules={[{ required: true, message: "请选择导出格式" }]}
>
<Select
options={FORMAT_OPTIONS.map((opt) => ({
label: (
<div className="py-1">
<div className="font-medium">{opt.label}</div>
<div className="text-xs text-gray-400">{opt.description}</div>
</div>
),
value: opt.value,
simpleLabel: opt.label,
}))}
options={FORMAT_OPTIONS.map((opt) => {
const isDetectionFormat = opt.value === "coco" || opt.value === "yolo";
const isDisabledForText = isDetectionFormat && isTextDataset;
const isDisabledForIncompatibleType =
isDetectionFormat && hasDatasetType && !isTextDataset && !isDetectionCompatible;
const isDisabled = isDisabledForText || isDisabledForIncompatibleType;
return {
label: (
<div className="py-1 relative">
<div className={`font-medium ${isDisabled ? "text-gray-400" : ""}`}>{opt.label}</div>
<div className={`text-xs ${isDisabled ? "text-gray-300" : "text-gray-400"}`}>
{opt.description}
{isDisabled && (
<span className="block mt-1 text-orange-500">
<InfoCircleOutlined className="mr-1" />
</span>
)}
</div>
</div>
),
value: opt.value,
simpleLabel: opt.label,
disabled: isDisabled,
};
})}
optionLabelProp="simpleLabel"
/>
</Form.Item>
{(isTextDataset || (hasDatasetType && !isDetectionCompatible)) && (
<Alert
type="info"
message="导出格式兼容性提示"
description="COCO 和 YOLO 格式仅适用于图像类或目标检测类数据集,当前项目建议使用 JSON、JSON Lines 或 CSV 格式导出。"
showIcon
className="mt-4"
/>
)}
<Form.Item name="onlyAnnotated" valuePropName="checked">
<Checkbox></Checkbox>
</Form.Item>

View File

@@ -23,6 +23,8 @@ type AnnotationTaskPayload = {
datasetId?: string;
datasetName?: string;
dataset_name?: string;
datasetType?: string;
dataset_type?: string;
labelingType?: string;
labeling_type?: string;
template?: {
@@ -54,6 +56,7 @@ export type AnnotationTaskListItem = {
description?: string;
datasetId?: string;
datasetName?: string;
datasetType?: string;
labelingType?: string;
totalCount?: number;
annotatedCount?: number;
@@ -97,6 +100,7 @@ export function mapAnnotationTask(task: AnnotationTaskPayload): AnnotationTaskLi
const labelingProjId = task?.labelingProjId || task?.labelingProjectId || task?.projId || task?.labeling_project_id || "";
const segmentationEnabled = task?.segmentationEnabled ?? task?.segmentation_enabled ?? false;
const inProgressCount = task?.inProgressCount ?? task?.in_progress_count ?? 0;
const datasetType = task?.datasetType || task?.dataset_type;
const labelingType =
task?.labelingType ||
task?.labeling_type ||
@@ -119,6 +123,7 @@ export function mapAnnotationTask(task: AnnotationTaskPayload): AnnotationTaskLi
projId: labelingProjId,
segmentationEnabled,
inProgressCount,
datasetType,
labelingType,
name: task.name,
description: task.description || "",

View File

@@ -61,6 +61,7 @@ class DatasetMappingResponse(BaseModel):
id: str = Field(..., description="映射UUID")
dataset_id: str = Field(..., alias="datasetId", description="源数据集ID")
dataset_name: Optional[str] = Field(None, alias="datasetName", description="数据集名称")
dataset_type: Optional[str] = Field(None, alias="datasetType", description="数据集类型")
labeling_project_id: str = Field(..., alias="labelingProjectId", description="标注项目ID")
name: Optional[str] = Field(None, description="标注项目名称")
description: Optional[str] = Field(None, description="标注项目描述")

View File

@@ -28,6 +28,7 @@ from sqlalchemy.ext.asyncio import AsyncSession
from app.core.logging import get_logger
from app.db.models import (
AnnotationResult,
AnnotationTemplate,
Dataset,
DatasetFiles,
LabelingProject,
@@ -78,6 +79,15 @@ SEGMENTS_KEY = "segments"
SEGMENT_RESULT_KEY = "result"
SEGMENT_INDEX_KEY = "segmentIndex"
SEGMENT_INDEX_FALLBACK_KEY = "segment_index"
DATASET_TYPE_TEXT = "TEXT"
DATASET_TYPE_IMAGE = "IMAGE"
DATASET_TYPE_OBJECT_DETECTION = "OBJECT_DETECTION"
LABELING_TYPE_CONFIG_KEY = "labeling_type"
LABELING_TYPE_OBJECT_DETECTION = "OBJECT_DETECTION"
DETECTION_COMPATIBLE_DATASET_TYPES = {
DATASET_TYPE_IMAGE,
DATASET_TYPE_OBJECT_DETECTION,
}
class AnnotationExportService:
@@ -137,6 +147,19 @@ class AnnotationExportService:
"""
project = await self._get_project_or_404(project_id)
# 根据格式导出
format_type = (
ExportFormat(request.format)
if isinstance(request.format, str)
else request.format
)
# 兼容性检查
await self._validate_export_format_compatibility(
project=project,
format_type=format_type,
)
# 获取标注数据
items = await self._fetch_annotation_data(
project_id=project_id,
@@ -145,13 +168,6 @@ class AnnotationExportService:
include_data=request.include_data,
)
# 根据格式导出
format_type = (
ExportFormat(request.format)
if isinstance(request.format, str)
else request.format
)
if format_type == ExportFormat.JSON:
return self._export_json(items, project.name)
elif format_type == ExportFormat.JSONL:
@@ -180,6 +196,77 @@ class AnnotationExportService:
raise HTTPException(status_code=404, detail=f"标注项目不存在: {project_id}")
return project
@staticmethod
def _normalize_type_value(value: Optional[str]) -> str:
if not value:
return ""
return str(value).strip().upper().replace("-", "_")
async def _get_dataset_type(self, dataset_id: str) -> Optional[str]:
result = await self.db.execute(
select(Dataset.dataset_type).where(Dataset.id == dataset_id)
)
return result.scalar_one_or_none()
async def _get_template_labeling_type(
self, template_id: Optional[str]
) -> Optional[str]:
if not template_id:
return None
result = await self.db.execute(
select(AnnotationTemplate.labeling_type).where(
AnnotationTemplate.id == template_id,
AnnotationTemplate.deleted_at.is_(None),
)
)
return result.scalar_one_or_none()
async def _resolve_project_labeling_type(
self, project: LabelingProject
) -> Optional[str]:
configuration = getattr(project, "configuration", None)
if isinstance(configuration, dict):
labeling_type = configuration.get(LABELING_TYPE_CONFIG_KEY)
if isinstance(labeling_type, str) and labeling_type.strip():
return labeling_type
return await self._get_template_labeling_type(project.template_id)
async def _validate_export_format_compatibility(
self,
project: LabelingProject,
format_type: ExportFormat,
) -> None:
if format_type not in (ExportFormat.COCO, ExportFormat.YOLO):
return
dataset_type = self._normalize_type_value(
await self._get_dataset_type(project.dataset_id)
)
labeling_type = self._normalize_type_value(
await self._resolve_project_labeling_type(project)
)
if dataset_type == DATASET_TYPE_TEXT:
raise HTTPException(
status_code=400,
detail=f"导出格式 {format_type.value.upper()} 不支持文本类数据集(TEXT),请改用 JSON/JSONL/CSV 格式",
)
if (
dataset_type in DETECTION_COMPATIBLE_DATASET_TYPES
or labeling_type == LABELING_TYPE_OBJECT_DETECTION
):
return
raise HTTPException(
status_code=400,
detail=(
f"导出格式 {format_type.value.upper()} 仅适用于图像类或目标检测类数据集,"
f"当前数据集类型: {dataset_type or 'UNKNOWN'}"
f"标注类型: {labeling_type or 'UNKNOWN'}"
),
)
async def _fetch_annotation_data(
self,
project_id: str,

View File

@@ -32,7 +32,8 @@ class DatasetMappingService:
"""Build base query with dataset name joined"""
return select(
LabelingProject,
Dataset.name.label('dataset_name')
Dataset.name.label('dataset_name'),
Dataset.dataset_type.label('dataset_type'),
).outerjoin(
Dataset,
LabelingProject.dataset_id == Dataset.id
@@ -98,6 +99,7 @@ class DatasetMappingService:
"""
mapping = row[0] # LabelingProject object
dataset_name = row[1] # dataset_name from join
dataset_type = row[2] # dataset_type from join
# Get template_id from mapping
template_id = getattr(mapping, 'template_id', None)
@@ -134,6 +136,7 @@ class DatasetMappingService:
"id": mapping.id,
"dataset_id": mapping.dataset_id,
"dataset_name": dataset_name,
"dataset_type": dataset_type,
"labeling_project_id": mapping.labeling_project_id,
"name": mapping.name,
"description": description,
@@ -166,12 +169,15 @@ class DatasetMappingService:
"""
# Fetch dataset name
dataset_name = None
dataset_type = None
dataset_id = getattr(mapping, 'dataset_id', None)
if dataset_id:
dataset_result = await self.db.execute(
select(Dataset.name).where(Dataset.id == dataset_id)
select(Dataset.name, Dataset.dataset_type).where(Dataset.id == dataset_id)
)
dataset_name = dataset_result.scalar_one_or_none()
dataset_row = dataset_result.one_or_none()
if dataset_row:
dataset_name, dataset_type = dataset_row
# Get template_id from mapping
template_id = getattr(mapping, 'template_id', None)
@@ -211,6 +217,7 @@ class DatasetMappingService:
"id": mapping.id,
"dataset_id": dataset_id,
"dataset_name": dataset_name,
"dataset_type": dataset_type,
"labeling_project_id": mapping.labeling_project_id,
"name": mapping.name,
"description": description,