feat(annotation): 自动标注任务支持非图像类型数据集(TEXT/AUDIO/VIDEO)

移除自动标注任务创建流程中的 IMAGE-only 限制,使 TEXT、AUDIO、VIDEO
类型数据集均可用于自动标注任务。

- 新增数据库迁移:t_dm_auto_annotation_tasks 表添加 dataset_type 列
- 后端 schema/API/service 全链路传递 dataset_type
- Worker 动态构建 sample key(image/text/audio/video)和输出目录
- 前端移除数据集类型校验,下拉框显示数据集类型标识
- 输出数据集继承源数据集类型,不再硬编码为 IMAGE
- 保持向后兼容:默认值为 IMAGE,worker 有元数据回退和目录 fallback

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-09 23:23:05 +08:00
parent 807c2289e2
commit 8ffa131fad
7 changed files with 1161 additions and 1082 deletions

View File

@@ -1,4 +1,4 @@
"""Schemas for Auto Annotation tasks"""
"""Schemas for Auto Annotation tasks"""
from __future__ import annotations
import json
@@ -7,24 +7,24 @@ from typing import List, Optional, Dict, Any
from datetime import datetime
from pydantic import BaseModel, Field, ConfigDict, model_validator
class AutoAnnotationConfig(BaseModel):
"""自动标注任务配置(与前端 payload 对齐)"""
model_size: str = Field(alias="modelSize", description="模型规模: n/s/m/l/x")
conf_threshold: float = Field(alias="confThreshold", description="置信度阈值 0-1")
target_classes: List[int] = Field(
default_factory=list,
alias="targetClasses",
description="目标类别ID列表,空表示全部类别",
)
output_dataset_name: Optional[str] = Field(
default=None,
alias="outputDatasetName",
description="自动标注结果要写入的新数据集名称(可选)",
)
"""自动标注任务配置(与前端 payload 对齐)"""
model_size: str = Field(alias="modelSize", description="模型规模: n/s/m/l/x")
conf_threshold: float = Field(alias="confThreshold", description="置信度阈值 0-1")
target_classes: List[int] = Field(
default_factory=list,
alias="targetClasses",
description="目标类别ID列表,空表示全部类别",
)
output_dataset_name: Optional[str] = Field(
default=None,
alias="outputDatasetName",
description="自动标注结果要写入的新数据集名称(可选)",
)
model_config = ConfigDict(populate_by_name=True)
@@ -68,13 +68,18 @@ class OperatorPipelineStep(BaseModel):
return normalized
model_config = ConfigDict(populate_by_name=True)
class CreateAutoAnnotationTaskRequest(BaseModel):
"""创建自动标注任务的请求体,对齐前端 CreateAutoAnnotationDialog 发送的结构"""
name: str = Field(..., min_length=1, max_length=255, description="任务名称")
dataset_id: str = Field(..., alias="datasetId", description="数据集ID")
dataset_type: Optional[str] = Field(
default=None,
alias="datasetType",
description="数据集类型: IMAGE/TEXT/AUDIO/VIDEO(不传时由后端自动获取)",
)
config: Optional[AutoAnnotationConfig] = Field(
default=None,
description="兼容旧版 YOLO 任务配置",
@@ -111,15 +116,16 @@ class CreateAutoAnnotationTaskRequest(BaseModel):
return self
model_config = ConfigDict(populate_by_name=True)
class AutoAnnotationTaskResponse(BaseModel):
"""自动标注任务响应模型(列表/详情均可复用)"""
id: str = Field(..., description="任务ID")
name: str = Field(..., description="任务名称")
dataset_id: str = Field(..., alias="datasetId", description="数据集ID")
"""自动标注任务响应模型(列表/详情均可复用)"""
id: str = Field(..., description="任务ID")
name: str = Field(..., description="任务名称")
dataset_id: str = Field(..., alias="datasetId", description="数据集ID")
dataset_name: Optional[str] = Field(None, alias="datasetName", description="数据集名称")
dataset_type: Optional[str] = Field(None, alias="datasetType", description="数据集类型: IMAGE/TEXT/AUDIO/VIDEO")
task_mode: Optional[str] = Field(None, alias="taskMode", description="任务模式")
executor_type: Optional[str] = Field(None, alias="executorType", description="执行器类型")
pipeline: Optional[List[Dict[str, Any]]] = Field(None, description="算子编排定义")
@@ -128,11 +134,11 @@ class AutoAnnotationTaskResponse(BaseModel):
alias="sourceDatasets",
description="本任务实际处理涉及到的所有数据集名称列表",
)
config: Dict[str, Any] = Field(..., description="任务配置")
status: str = Field(..., description="任务状态")
progress: int = Field(..., description="任务进度 0-100")
total_images: int = Field(..., alias="totalImages", description="总图片数")
processed_images: int = Field(..., alias="processedImages", description="已处理图片数")
config: Dict[str, Any] = Field(..., description="任务配置")
status: str = Field(..., description="任务状态")
progress: int = Field(..., description="任务进度 0-100")
total_images: int = Field(..., alias="totalImages", description="总图片数")
processed_images: int = Field(..., alias="processedImages", description="已处理图片数")
detected_objects: int = Field(..., alias="detectedObjects", description="检测到的对象总数")
output_path: Optional[str] = Field(None, alias="outputPath", description="输出路径")
output_dataset_id: Optional[str] = Field(
@@ -152,14 +158,14 @@ class AutoAnnotationTaskResponse(BaseModel):
created_at: datetime = Field(..., alias="createdAt", description="创建时间")
updated_at: Optional[datetime] = Field(None, alias="updatedAt", description="更新时间")
completed_at: Optional[datetime] = Field(None, alias="completedAt", description="完成时间")
model_config = ConfigDict(populate_by_name=True, from_attributes=True)
class AutoAnnotationTaskListResponse(BaseModel):
"""自动标注任务列表响应,目前前端直接使用数组,这里预留分页结构"""
content: List[AutoAnnotationTaskResponse] = Field(..., description="任务列表")
total: int = Field(..., description="总数")
model_config = ConfigDict(populate_by_name=True)
model_config = ConfigDict(populate_by_name=True, from_attributes=True)
class AutoAnnotationTaskListResponse(BaseModel):
"""自动标注任务列表响应,目前前端直接使用数组,这里预留分页结构"""
content: List[AutoAnnotationTaskResponse] = Field(..., description="任务列表")
total: int = Field(..., description="总数")
model_config = ConfigDict(populate_by_name=True)