You've already forked DataMate
移除自动标注任务创建流程中的 IMAGE-only 限制,使 TEXT、AUDIO、VIDEO 类型数据集均可用于自动标注任务。 - 新增数据库迁移:t_dm_auto_annotation_tasks 表添加 dataset_type 列 - 后端 schema/API/service 全链路传递 dataset_type - Worker 动态构建 sample key(image/text/audio/video)和输出目录 - 前端移除数据集类型校验,下拉框显示数据集类型标识 - 输出数据集继承源数据集类型,不再硬编码为 IMAGE - 保持向后兼容:默认值为 IMAGE,worker 有元数据回退和目录 fallback Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
172 lines
6.8 KiB
Python
172 lines
6.8 KiB
Python
"""Schemas for Auto Annotation tasks"""
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
|
|
from typing import List, Optional, Dict, Any
|
|
from datetime import datetime
|
|
|
|
from pydantic import BaseModel, Field, ConfigDict, model_validator
|
|
|
|
|
|
class AutoAnnotationConfig(BaseModel):
|
|
"""自动标注任务配置(与前端 payload 对齐)"""
|
|
|
|
model_size: str = Field(alias="modelSize", description="模型规模: n/s/m/l/x")
|
|
conf_threshold: float = Field(alias="confThreshold", description="置信度阈值 0-1")
|
|
target_classes: List[int] = Field(
|
|
default_factory=list,
|
|
alias="targetClasses",
|
|
description="目标类别ID列表,空表示全部类别",
|
|
)
|
|
output_dataset_name: Optional[str] = Field(
|
|
default=None,
|
|
alias="outputDatasetName",
|
|
description="自动标注结果要写入的新数据集名称(可选)",
|
|
)
|
|
|
|
model_config = ConfigDict(populate_by_name=True)
|
|
|
|
|
|
class OperatorPipelineStep(BaseModel):
|
|
"""通用算子编排中的单个算子节点定义"""
|
|
|
|
operator_id: str = Field(alias="operatorId", description="算子ID(raw_id)")
|
|
overrides: Dict[str, Any] = Field(
|
|
default_factory=dict,
|
|
alias="overrides",
|
|
description="算子参数覆盖(对应 settings override)",
|
|
)
|
|
|
|
@model_validator(mode="before")
|
|
@classmethod
|
|
def normalize_compatible_fields(cls, value: Any):
|
|
if not isinstance(value, dict):
|
|
return value
|
|
|
|
normalized = dict(value)
|
|
|
|
if "operatorId" not in normalized:
|
|
for key in ("operator_id", "id"):
|
|
candidate = normalized.get(key)
|
|
if candidate:
|
|
normalized["operatorId"] = candidate
|
|
break
|
|
|
|
if "overrides" not in normalized:
|
|
for key in ("settingsOverride", "settings_override"):
|
|
candidate = normalized.get(key)
|
|
if isinstance(candidate, str):
|
|
try:
|
|
candidate = json.loads(candidate)
|
|
except Exception:
|
|
candidate = None
|
|
if isinstance(candidate, dict):
|
|
normalized["overrides"] = candidate
|
|
break
|
|
|
|
return normalized
|
|
|
|
model_config = ConfigDict(populate_by_name=True)
|
|
|
|
|
|
class CreateAutoAnnotationTaskRequest(BaseModel):
|
|
"""创建自动标注任务的请求体,对齐前端 CreateAutoAnnotationDialog 发送的结构"""
|
|
|
|
name: str = Field(..., min_length=1, max_length=255, description="任务名称")
|
|
dataset_id: str = Field(..., alias="datasetId", description="数据集ID")
|
|
dataset_type: Optional[str] = Field(
|
|
default=None,
|
|
alias="datasetType",
|
|
description="数据集类型: IMAGE/TEXT/AUDIO/VIDEO(不传时由后端自动获取)",
|
|
)
|
|
config: Optional[AutoAnnotationConfig] = Field(
|
|
default=None,
|
|
description="兼容旧版 YOLO 任务配置",
|
|
)
|
|
pipeline: Optional[List[OperatorPipelineStep]] = Field(
|
|
default=None,
|
|
description="通用算子编排定义",
|
|
)
|
|
task_mode: str = Field(
|
|
default="legacy_yolo",
|
|
alias="taskMode",
|
|
description="任务模式: legacy_yolo/pipeline",
|
|
)
|
|
executor_type: str = Field(
|
|
default="annotation_local",
|
|
alias="executorType",
|
|
description="执行器类型",
|
|
)
|
|
output_dataset_name: Optional[str] = Field(
|
|
default=None,
|
|
alias="outputDatasetName",
|
|
description="输出数据集名称(优先级高于 config.outputDatasetName)",
|
|
)
|
|
file_ids: Optional[List[str]] = Field(
|
|
None,
|
|
alias="fileIds",
|
|
description="要处理的文件ID列表,为空则处理数据集中所有图像",
|
|
)
|
|
|
|
@model_validator(mode="after")
|
|
def validate_config_or_pipeline(self):
|
|
if self.config is None and not self.pipeline:
|
|
raise ValueError("Either config or pipeline must be provided")
|
|
return self
|
|
|
|
model_config = ConfigDict(populate_by_name=True)
|
|
|
|
|
|
class AutoAnnotationTaskResponse(BaseModel):
|
|
"""自动标注任务响应模型(列表/详情均可复用)"""
|
|
|
|
id: str = Field(..., description="任务ID")
|
|
name: str = Field(..., description="任务名称")
|
|
dataset_id: str = Field(..., alias="datasetId", description="数据集ID")
|
|
dataset_name: Optional[str] = Field(None, alias="datasetName", description="数据集名称")
|
|
dataset_type: Optional[str] = Field(None, alias="datasetType", description="数据集类型: IMAGE/TEXT/AUDIO/VIDEO")
|
|
task_mode: Optional[str] = Field(None, alias="taskMode", description="任务模式")
|
|
executor_type: Optional[str] = Field(None, alias="executorType", description="执行器类型")
|
|
pipeline: Optional[List[Dict[str, Any]]] = Field(None, description="算子编排定义")
|
|
source_datasets: Optional[List[str]] = Field(
|
|
default=None,
|
|
alias="sourceDatasets",
|
|
description="本任务实际处理涉及到的所有数据集名称列表",
|
|
)
|
|
config: Dict[str, Any] = Field(..., description="任务配置")
|
|
status: str = Field(..., description="任务状态")
|
|
progress: int = Field(..., description="任务进度 0-100")
|
|
total_images: int = Field(..., alias="totalImages", description="总图片数")
|
|
processed_images: int = Field(..., alias="processedImages", description="已处理图片数")
|
|
detected_objects: int = Field(..., alias="detectedObjects", description="检测到的对象总数")
|
|
output_path: Optional[str] = Field(None, alias="outputPath", description="输出路径")
|
|
output_dataset_id: Optional[str] = Field(
|
|
None,
|
|
alias="outputDatasetId",
|
|
description="输出数据集ID",
|
|
)
|
|
stop_requested: Optional[bool] = Field(
|
|
None,
|
|
alias="stopRequested",
|
|
description="是否请求停止",
|
|
)
|
|
error_message: Optional[str] = Field(None, alias="errorMessage", description="错误信息")
|
|
created_by: Optional[str] = Field(None, alias="createdBy", description="创建人")
|
|
started_at: Optional[datetime] = Field(None, alias="startedAt", description="启动时间")
|
|
heartbeat_at: Optional[datetime] = Field(None, alias="heartbeatAt", description="心跳时间")
|
|
created_at: datetime = Field(..., alias="createdAt", description="创建时间")
|
|
updated_at: Optional[datetime] = Field(None, alias="updatedAt", description="更新时间")
|
|
completed_at: Optional[datetime] = Field(None, alias="completedAt", description="完成时间")
|
|
|
|
model_config = ConfigDict(populate_by_name=True, from_attributes=True)
|
|
|
|
|
|
class AutoAnnotationTaskListResponse(BaseModel):
|
|
"""自动标注任务列表响应,目前前端直接使用数组,这里预留分页结构"""
|
|
|
|
content: List[AutoAnnotationTaskResponse] = Field(..., description="任务列表")
|
|
total: int = Field(..., description="总数")
|
|
|
|
model_config = ConfigDict(populate_by_name=True)
|