You've already forked DataMate
feat(annotation): 自动标注任务支持非图像类型数据集(TEXT/AUDIO/VIDEO)
移除自动标注任务创建流程中的 IMAGE-only 限制,使 TEXT、AUDIO、VIDEO 类型数据集均可用于自动标注任务。 - 新增数据库迁移:t_dm_auto_annotation_tasks 表添加 dataset_type 列 - 后端 schema/API/service 全链路传递 dataset_type - Worker 动态构建 sample key(image/text/audio/video)和输出目录 - 前端移除数据集类型校验,下拉框显示数据集类型标识 - 输出数据集继承源数据集类型,不再硬编码为 IMAGE - 保持向后兼容:默认值为 IMAGE,worker 有元数据回退和目录 fallback Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -6,7 +6,7 @@ import { ArrowLeft } from "lucide-react";
|
|||||||
import { Link, useNavigate } from "react-router";
|
import { Link, useNavigate } from "react-router";
|
||||||
import { queryDatasetsUsingGet } from "@/pages/DataManagement/dataset.api";
|
import { queryDatasetsUsingGet } from "@/pages/DataManagement/dataset.api";
|
||||||
import { mapDataset } from "@/pages/DataManagement/dataset.const";
|
import { mapDataset } from "@/pages/DataManagement/dataset.const";
|
||||||
import { Dataset, DatasetType } from "@/pages/DataManagement/dataset.model";
|
import { Dataset } from "@/pages/DataManagement/dataset.model";
|
||||||
import { createAnnotationOperatorTaskUsingPost } from "../annotation.api";
|
import { createAnnotationOperatorTaskUsingPost } from "../annotation.api";
|
||||||
import { useCreateStepTwo } from "./hooks/useCreateStepTwo";
|
import { useCreateStepTwo } from "./hooks/useCreateStepTwo";
|
||||||
import PipelinePreview from "./components/PipelinePreview";
|
import PipelinePreview from "./components/PipelinePreview";
|
||||||
@@ -85,11 +85,6 @@ export default function AnnotationOperatorTaskCreate() {
|
|||||||
try {
|
try {
|
||||||
if (currentStep === 1) {
|
if (currentStep === 1) {
|
||||||
await form.validateFields();
|
await form.validateFields();
|
||||||
|
|
||||||
if (selectedDataset?.datasetType !== DatasetType.IMAGE) {
|
|
||||||
message.error("自动标注算子编排当前仅支持图片数据集");
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
setCurrentStep((prev) => Math.min(prev + 1, 2));
|
setCurrentStep((prev) => Math.min(prev + 1, 2));
|
||||||
} catch {
|
} catch {
|
||||||
@@ -109,11 +104,6 @@ export default function AnnotationOperatorTaskCreate() {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (selectedDataset?.datasetType !== DatasetType.IMAGE) {
|
|
||||||
message.error("自动标注算子编排当前仅支持图片数据集");
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
const outputDatasetName = values.outputDatasetName?.trim();
|
const outputDatasetName = values.outputDatasetName?.trim();
|
||||||
const pipeline = selectedOperators.map((operator, index) => {
|
const pipeline = selectedOperators.map((operator, index) => {
|
||||||
const overrides = {
|
const overrides = {
|
||||||
@@ -200,10 +190,10 @@ export default function AnnotationOperatorTaskCreate() {
|
|||||||
label="选择数据集"
|
label="选择数据集"
|
||||||
name="datasetId"
|
name="datasetId"
|
||||||
rules={[{ required: true, message: "请选择数据集" }]}
|
rules={[{ required: true, message: "请选择数据集" }]}
|
||||||
extra="自动标注算子编排当前仅支持图片数据集"
|
extra="请选择用于自动标注的数据集"
|
||||||
>
|
>
|
||||||
<Select
|
<Select
|
||||||
placeholder="请选择图片数据集"
|
placeholder="请选择数据集"
|
||||||
optionFilterProp="label"
|
optionFilterProp="label"
|
||||||
options={datasets.map((dataset) => ({
|
options={datasets.map((dataset) => ({
|
||||||
label: (
|
label: (
|
||||||
@@ -215,12 +205,11 @@ export default function AnnotationOperatorTaskCreate() {
|
|||||||
{dataset.name}
|
{dataset.name}
|
||||||
</div>
|
</div>
|
||||||
<div className="text-xs text-gray-500">
|
<div className="text-xs text-gray-500">
|
||||||
{dataset?.fileCount} 文件 • {dataset.size}
|
{dataset.datasetType} • {dataset?.fileCount} 文件 • {dataset.size}
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
),
|
),
|
||||||
value: dataset.id,
|
value: dataset.id,
|
||||||
disabled: dataset.datasetType !== DatasetType.IMAGE,
|
|
||||||
}))}
|
}))}
|
||||||
/>
|
/>
|
||||||
</Form.Item>
|
</Form.Item>
|
||||||
|
|||||||
@@ -210,6 +210,10 @@ class AutoAnnotationTask(Base):
|
|||||||
dataset_name = Column(
|
dataset_name = Column(
|
||||||
String(255), nullable=True, comment="数据集名称(冗余字段,方便查询)"
|
String(255), nullable=True, comment="数据集名称(冗余字段,方便查询)"
|
||||||
)
|
)
|
||||||
|
dataset_type = Column(
|
||||||
|
String(50), nullable=False, default="IMAGE",
|
||||||
|
comment="数据集类型: IMAGE/TEXT/AUDIO/VIDEO",
|
||||||
|
)
|
||||||
created_by = Column(String(255), nullable=True, comment="任务创建人")
|
created_by = Column(String(255), nullable=True, comment="任务创建人")
|
||||||
config = Column(JSON, nullable=False, comment="任务配置(模型规模、置信度等)")
|
config = Column(JSON, nullable=False, comment="任务配置(模型规模、置信度等)")
|
||||||
file_ids = Column(
|
file_ids = Column(
|
||||||
|
|||||||
@@ -85,23 +85,28 @@ async def _create_task_internal(
|
|||||||
await assert_dataset_access(db, normalized_request.dataset_id, user_context)
|
await assert_dataset_access(db, normalized_request.dataset_id, user_context)
|
||||||
# 尝试获取数据集名称和总量用于冗余字段
|
# 尝试获取数据集名称和总量用于冗余字段
|
||||||
dataset_name = None
|
dataset_name = None
|
||||||
|
dataset_type = "IMAGE"
|
||||||
total_images = len(normalized_request.file_ids) if normalized_request.file_ids else 0
|
total_images = len(normalized_request.file_ids) if normalized_request.file_ids else 0
|
||||||
try:
|
try:
|
||||||
dm_client = DatasetManagementService(db)
|
dm_client = DatasetManagementService(db)
|
||||||
dataset = await dm_client.get_dataset(normalized_request.dataset_id)
|
dataset = await dm_client.get_dataset(normalized_request.dataset_id)
|
||||||
if dataset is not None:
|
if dataset is not None:
|
||||||
dataset_name = dataset.name
|
dataset_name = dataset.name
|
||||||
|
dataset_type = getattr(dataset, "datasetType", None) or "IMAGE"
|
||||||
if not normalized_request.file_ids:
|
if not normalized_request.file_ids:
|
||||||
total_images = getattr(dataset, "fileCount", 0) or 0
|
total_images = getattr(dataset, "fileCount", 0) or 0
|
||||||
except Exception as e: # pragma: no cover - 容错
|
except Exception as e: # pragma: no cover - 容错
|
||||||
logger.warning("Failed to fetch dataset summary for annotation task: %s", e)
|
logger.warning("Failed to fetch dataset summary for annotation task: %s", e)
|
||||||
|
|
||||||
|
resolved_dataset_type = normalized_request.dataset_type or dataset_type
|
||||||
|
|
||||||
return await service.create_task(
|
return await service.create_task(
|
||||||
db,
|
db,
|
||||||
normalized_request,
|
normalized_request,
|
||||||
user_context=user_context,
|
user_context=user_context,
|
||||||
dataset_name=dataset_name,
|
dataset_name=dataset_name,
|
||||||
total_images=total_images,
|
total_images=total_images,
|
||||||
|
dataset_type=resolved_dataset_type,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -75,6 +75,11 @@ class CreateAutoAnnotationTaskRequest(BaseModel):
|
|||||||
|
|
||||||
name: str = Field(..., min_length=1, max_length=255, description="任务名称")
|
name: str = Field(..., min_length=1, max_length=255, description="任务名称")
|
||||||
dataset_id: str = Field(..., alias="datasetId", description="数据集ID")
|
dataset_id: str = Field(..., alias="datasetId", description="数据集ID")
|
||||||
|
dataset_type: Optional[str] = Field(
|
||||||
|
default=None,
|
||||||
|
alias="datasetType",
|
||||||
|
description="数据集类型: IMAGE/TEXT/AUDIO/VIDEO(不传时由后端自动获取)",
|
||||||
|
)
|
||||||
config: Optional[AutoAnnotationConfig] = Field(
|
config: Optional[AutoAnnotationConfig] = Field(
|
||||||
default=None,
|
default=None,
|
||||||
description="兼容旧版 YOLO 任务配置",
|
description="兼容旧版 YOLO 任务配置",
|
||||||
@@ -120,6 +125,7 @@ class AutoAnnotationTaskResponse(BaseModel):
|
|||||||
name: str = Field(..., description="任务名称")
|
name: str = Field(..., description="任务名称")
|
||||||
dataset_id: str = Field(..., alias="datasetId", description="数据集ID")
|
dataset_id: str = Field(..., alias="datasetId", description="数据集ID")
|
||||||
dataset_name: Optional[str] = Field(None, alias="datasetName", description="数据集名称")
|
dataset_name: Optional[str] = Field(None, alias="datasetName", description="数据集名称")
|
||||||
|
dataset_type: Optional[str] = Field(None, alias="datasetType", description="数据集类型: IMAGE/TEXT/AUDIO/VIDEO")
|
||||||
task_mode: Optional[str] = Field(None, alias="taskMode", description="任务模式")
|
task_mode: Optional[str] = Field(None, alias="taskMode", description="任务模式")
|
||||||
executor_type: Optional[str] = Field(None, alias="executorType", description="执行器类型")
|
executor_type: Optional[str] = Field(None, alias="executorType", description="执行器类型")
|
||||||
pipeline: Optional[List[Dict[str, Any]]] = Field(None, description="算子编排定义")
|
pipeline: Optional[List[Dict[str, Any]]] = Field(None, description="算子编排定义")
|
||||||
|
|||||||
@@ -141,6 +141,7 @@ class AutoAnnotationTaskService:
|
|||||||
user_context: RequestUserContext,
|
user_context: RequestUserContext,
|
||||||
dataset_name: Optional[str] = None,
|
dataset_name: Optional[str] = None,
|
||||||
total_images: int = 0,
|
total_images: int = 0,
|
||||||
|
dataset_type: str = "IMAGE",
|
||||||
) -> AutoAnnotationTaskResponse:
|
) -> AutoAnnotationTaskResponse:
|
||||||
"""创建自动标注任务,初始状态为 pending。
|
"""创建自动标注任务,初始状态为 pending。
|
||||||
|
|
||||||
@@ -170,6 +171,7 @@ class AutoAnnotationTaskService:
|
|||||||
name=request.name,
|
name=request.name,
|
||||||
dataset_id=request.dataset_id,
|
dataset_id=request.dataset_id,
|
||||||
dataset_name=dataset_name,
|
dataset_name=dataset_name,
|
||||||
|
dataset_type=dataset_type,
|
||||||
created_by=user_context.user_id,
|
created_by=user_context.user_id,
|
||||||
config=normalized_config,
|
config=normalized_config,
|
||||||
task_mode=request.task_mode,
|
task_mode=request.task_mode,
|
||||||
|
|||||||
@@ -175,7 +175,7 @@ def _fetch_pending_task() -> Optional[Dict[str, Any]]:
|
|||||||
|
|
||||||
query_sql = text(
|
query_sql = text(
|
||||||
"""
|
"""
|
||||||
SELECT id, name, dataset_id, dataset_name, created_by,
|
SELECT id, name, dataset_id, dataset_name, dataset_type, created_by,
|
||||||
config, file_ids, pipeline,
|
config, file_ids, pipeline,
|
||||||
task_mode, executor_type,
|
task_mode, executor_type,
|
||||||
status, stop_requested, run_token,
|
status, stop_requested, run_token,
|
||||||
@@ -521,6 +521,35 @@ def _count_detections(sample: Dict[str, Any]) -> int:
|
|||||||
return 0
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# 数据集类型 → sample key / 输出子目录 映射
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
DATASET_TYPE_SAMPLE_KEY: Dict[str, str] = {
|
||||||
|
"IMAGE": "image",
|
||||||
|
"TEXT": "text",
|
||||||
|
"AUDIO": "audio",
|
||||||
|
"VIDEO": "video",
|
||||||
|
}
|
||||||
|
|
||||||
|
DATASET_TYPE_DATA_DIR: Dict[str, str] = {
|
||||||
|
"IMAGE": "images",
|
||||||
|
"TEXT": "data",
|
||||||
|
"AUDIO": "data",
|
||||||
|
"VIDEO": "data",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _get_sample_key(dataset_type: str) -> str:
|
||||||
|
"""根据数据集类型返回 sample dict 中主数据对应的 key。"""
|
||||||
|
return DATASET_TYPE_SAMPLE_KEY.get(dataset_type.upper(), "image")
|
||||||
|
|
||||||
|
|
||||||
|
def _get_data_dir_name(dataset_type: str) -> str:
|
||||||
|
"""根据数据集类型返回输出子目录名。"""
|
||||||
|
return DATASET_TYPE_DATA_DIR.get(dataset_type.upper(), "images")
|
||||||
|
|
||||||
|
|
||||||
def _get_operator_whitelist() -> Optional[set[str]]:
|
def _get_operator_whitelist() -> Optional[set[str]]:
|
||||||
"""获取灰度白名单;返回 None 表示放开全部。"""
|
"""获取灰度白名单;返回 None 表示放开全部。"""
|
||||||
|
|
||||||
@@ -584,7 +613,7 @@ def _load_dataset_meta(dataset_id: str) -> Optional[Dict[str, Any]]:
|
|||||||
|
|
||||||
sql = text(
|
sql = text(
|
||||||
"""
|
"""
|
||||||
SELECT id, name, parent_dataset_id, path
|
SELECT id, name, parent_dataset_id, path, dataset_type
|
||||||
FROM t_dm_datasets
|
FROM t_dm_datasets
|
||||||
WHERE id = :dataset_id
|
WHERE id = :dataset_id
|
||||||
"""
|
"""
|
||||||
@@ -638,11 +667,12 @@ def _load_files_by_ids(file_ids: List[str]) -> List[Tuple[str, str, str]]:
|
|||||||
return [(str(r[0]), str(r[1]), str(r[2])) for r in rows]
|
return [(str(r[0]), str(r[1]), str(r[2])) for r in rows]
|
||||||
|
|
||||||
|
|
||||||
def _ensure_output_dir(output_dir: str) -> str:
|
def _ensure_output_dir(output_dir: str, dataset_type: str = "IMAGE") -> str:
|
||||||
"""确保输出目录及其 images/、annotations/ 子目录存在。"""
|
"""确保输出目录及其数据/annotations 子目录存在。"""
|
||||||
|
|
||||||
os.makedirs(output_dir, exist_ok=True)
|
os.makedirs(output_dir, exist_ok=True)
|
||||||
os.makedirs(os.path.join(output_dir, "images"), exist_ok=True)
|
data_dir_name = _get_data_dir_name(dataset_type)
|
||||||
|
os.makedirs(os.path.join(output_dir, data_dir_name), exist_ok=True)
|
||||||
os.makedirs(os.path.join(output_dir, "annotations"), exist_ok=True)
|
os.makedirs(os.path.join(output_dir, "annotations"), exist_ok=True)
|
||||||
return output_dir
|
return output_dir
|
||||||
|
|
||||||
@@ -651,6 +681,7 @@ def _create_output_dataset(
|
|||||||
source_dataset_id: str,
|
source_dataset_id: str,
|
||||||
source_dataset_name: str,
|
source_dataset_name: str,
|
||||||
output_dataset_name: str,
|
output_dataset_name: str,
|
||||||
|
dataset_type: str = "IMAGE",
|
||||||
) -> Tuple[str, str]:
|
) -> Tuple[str, str]:
|
||||||
"""为自动标注结果创建一个新的数据集并返回 (dataset_id, path)。"""
|
"""为自动标注结果创建一个新的数据集并返回 (dataset_id, path)。"""
|
||||||
|
|
||||||
@@ -673,7 +704,7 @@ def _create_output_dataset(
|
|||||||
"parent_dataset_id": parent_dataset_id,
|
"parent_dataset_id": parent_dataset_id,
|
||||||
"name": output_dataset_name,
|
"name": output_dataset_name,
|
||||||
"description": description,
|
"description": description,
|
||||||
"dataset_type": "IMAGE",
|
"dataset_type": dataset_type,
|
||||||
"path": output_dir,
|
"path": output_dir,
|
||||||
"status": "ACTIVE",
|
"status": "ACTIVE",
|
||||||
}
|
}
|
||||||
@@ -690,31 +721,38 @@ def _register_output_dataset(
|
|||||||
output_dir: str,
|
output_dir: str,
|
||||||
output_dataset_name: str,
|
output_dataset_name: str,
|
||||||
total_images: int,
|
total_images: int,
|
||||||
|
dataset_type: str = "IMAGE",
|
||||||
) -> None:
|
) -> None:
|
||||||
"""将自动标注结果注册到新建的数据集。"""
|
"""将自动标注结果注册到新建的数据集。"""
|
||||||
|
|
||||||
images_dir = os.path.join(output_dir, "images")
|
data_dir_name = _get_data_dir_name(dataset_type)
|
||||||
if not os.path.isdir(images_dir):
|
data_dir = os.path.join(output_dir, data_dir_name)
|
||||||
|
# 兼容旧任务和 IMAGE 算子(它们写入 images/ 目录)
|
||||||
|
if not os.path.isdir(data_dir):
|
||||||
|
fallback_dir = os.path.join(output_dir, "images")
|
||||||
|
if os.path.isdir(fallback_dir):
|
||||||
|
data_dir = fallback_dir
|
||||||
|
else:
|
||||||
logger.warning(
|
logger.warning(
|
||||||
"Auto-annotation images directory not found for task {}: {}",
|
"Auto-annotation data directory not found for task {}: {}",
|
||||||
task_id,
|
task_id,
|
||||||
images_dir,
|
data_dir,
|
||||||
)
|
)
|
||||||
return
|
return
|
||||||
|
|
||||||
image_files: List[Tuple[str, str, int]] = []
|
data_files: List[Tuple[str, str, int]] = []
|
||||||
annotation_files: List[Tuple[str, str, int]] = []
|
annotation_files: List[Tuple[str, str, int]] = []
|
||||||
total_size = 0
|
total_size = 0
|
||||||
|
|
||||||
for file_name in sorted(os.listdir(images_dir)):
|
for file_name in sorted(os.listdir(data_dir)):
|
||||||
file_path = os.path.join(images_dir, file_name)
|
file_path = os.path.join(data_dir, file_name)
|
||||||
if not os.path.isfile(file_path):
|
if not os.path.isfile(file_path):
|
||||||
continue
|
continue
|
||||||
try:
|
try:
|
||||||
file_size = os.path.getsize(file_path)
|
file_size = os.path.getsize(file_path)
|
||||||
except OSError:
|
except OSError:
|
||||||
file_size = 0
|
file_size = 0
|
||||||
image_files.append((file_name, file_path, int(file_size)))
|
data_files.append((file_name, file_path, int(file_size)))
|
||||||
total_size += int(file_size)
|
total_size += int(file_size)
|
||||||
|
|
||||||
annotations_dir = os.path.join(output_dir, "annotations")
|
annotations_dir = os.path.join(output_dir, "annotations")
|
||||||
@@ -730,11 +768,11 @@ def _register_output_dataset(
|
|||||||
annotation_files.append((file_name, file_path, int(file_size)))
|
annotation_files.append((file_name, file_path, int(file_size)))
|
||||||
total_size += int(file_size)
|
total_size += int(file_size)
|
||||||
|
|
||||||
if not image_files:
|
if not data_files:
|
||||||
logger.warning(
|
logger.warning(
|
||||||
"No image files found in auto-annotation output for task {}: {}",
|
"No data files found in auto-annotation output for task {}: {}",
|
||||||
task_id,
|
task_id,
|
||||||
images_dir,
|
data_dir,
|
||||||
)
|
)
|
||||||
return
|
return
|
||||||
|
|
||||||
@@ -759,7 +797,7 @@ def _register_output_dataset(
|
|||||||
with SQLManager.create_connect() as conn:
|
with SQLManager.create_connect() as conn:
|
||||||
added_count = 0
|
added_count = 0
|
||||||
|
|
||||||
for file_name, file_path, file_size in image_files:
|
for file_name, file_path, file_size in data_files:
|
||||||
ext = os.path.splitext(file_name)[1].lstrip(".").upper() or None
|
ext = os.path.splitext(file_name)[1].lstrip(".").upper() or None
|
||||||
logical_path = os.path.relpath(file_path, output_dir).replace("\\", "/")
|
logical_path = os.path.relpath(file_path, output_dir).replace("\\", "/")
|
||||||
conn.execute(
|
conn.execute(
|
||||||
@@ -811,7 +849,7 @@ def _register_output_dataset(
|
|||||||
"Registered auto-annotation output into dataset: dataset_id={}, name={}, added_files={}, added_size_bytes={}, task_id={}, output_dir={}",
|
"Registered auto-annotation output into dataset: dataset_id={}, name={}, added_files={}, added_size_bytes={}, task_id={}, output_dir={}",
|
||||||
output_dataset_id,
|
output_dataset_id,
|
||||||
output_dataset_name,
|
output_dataset_name,
|
||||||
len(image_files) + len(annotation_files),
|
len(data_files) + len(annotation_files),
|
||||||
total_size,
|
total_size,
|
||||||
task_id,
|
task_id,
|
||||||
output_dir,
|
output_dir,
|
||||||
@@ -832,6 +870,13 @@ def _process_single_task(task: Dict[str, Any]) -> None:
|
|||||||
pipeline_raw = task.get("pipeline")
|
pipeline_raw = task.get("pipeline")
|
||||||
selected_file_ids: Optional[List[str]] = task.get("file_ids") or None
|
selected_file_ids: Optional[List[str]] = task.get("file_ids") or None
|
||||||
|
|
||||||
|
# 解析数据集类型,兜底从数据集元数据获取
|
||||||
|
dataset_type = str(task.get("dataset_type") or "").upper() or "IMAGE"
|
||||||
|
if dataset_type == "IMAGE" and not task.get("dataset_type"):
|
||||||
|
source_meta = _load_dataset_meta(dataset_id)
|
||||||
|
if source_meta and source_meta.get("dataset_type"):
|
||||||
|
dataset_type = str(source_meta["dataset_type"]).upper()
|
||||||
|
|
||||||
output_dataset_name = _get_output_dataset_name(
|
output_dataset_name = _get_output_dataset_name(
|
||||||
task_id=task_id,
|
task_id=task_id,
|
||||||
dataset_id=dataset_id,
|
dataset_id=dataset_id,
|
||||||
@@ -892,8 +937,9 @@ def _process_single_task(task: Dict[str, Any]) -> None:
|
|||||||
source_dataset_id=dataset_id,
|
source_dataset_id=dataset_id,
|
||||||
source_dataset_name=source_dataset_name,
|
source_dataset_name=source_dataset_name,
|
||||||
output_dataset_name=output_dataset_name,
|
output_dataset_name=output_dataset_name,
|
||||||
|
dataset_type=dataset_type,
|
||||||
)
|
)
|
||||||
output_dir = _ensure_output_dir(output_dir)
|
output_dir = _ensure_output_dir(output_dir, dataset_type=dataset_type)
|
||||||
|
|
||||||
_update_task_status(
|
_update_task_status(
|
||||||
task_id,
|
task_id,
|
||||||
@@ -959,8 +1005,9 @@ def _process_single_task(task: Dict[str, Any]) -> None:
|
|||||||
return
|
return
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
sample_key = _get_sample_key(dataset_type)
|
||||||
sample = {
|
sample = {
|
||||||
"image": file_path,
|
sample_key: file_path,
|
||||||
"filename": file_name,
|
"filename": file_name,
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -983,7 +1030,7 @@ def _process_single_task(task: Dict[str, Any]) -> None:
|
|||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(
|
logger.error(
|
||||||
"Failed to process image for task {}: file_path={}, error={}",
|
"Failed to process file for task {}: file_path={}, error={}",
|
||||||
task_id,
|
task_id,
|
||||||
file_path,
|
file_path,
|
||||||
e,
|
e,
|
||||||
@@ -1021,6 +1068,7 @@ def _process_single_task(task: Dict[str, Any]) -> None:
|
|||||||
output_dir=output_dir,
|
output_dir=output_dir,
|
||||||
output_dataset_name=output_dataset_name,
|
output_dataset_name=output_dataset_name,
|
||||||
total_images=total_images,
|
total_images=total_images,
|
||||||
|
dataset_type=dataset_type,
|
||||||
)
|
)
|
||||||
except Exception as e: # pragma: no cover - 防御性日志
|
except Exception as e: # pragma: no cover - 防御性日志
|
||||||
logger.error(
|
logger.error(
|
||||||
|
|||||||
25
scripts/db/data-annotation-multitype-migration.sql
Normal file
25
scripts/db/data-annotation-multitype-migration.sql
Normal file
@@ -0,0 +1,25 @@
|
|||||||
|
-- =============================================
|
||||||
|
-- 自动标注任务支持多数据集类型迁移
|
||||||
|
-- 为 t_dm_auto_annotation_tasks 表添加 dataset_type 列
|
||||||
|
-- =============================================
|
||||||
|
|
||||||
|
USE datamate;
|
||||||
|
SET @db_name = DATABASE();
|
||||||
|
|
||||||
|
-- 添加 dataset_type 列(IMAGE/TEXT/AUDIO/VIDEO),已有记录默认为 IMAGE
|
||||||
|
SET @ddl = (
|
||||||
|
SELECT IF(
|
||||||
|
EXISTS(
|
||||||
|
SELECT 1
|
||||||
|
FROM information_schema.COLUMNS
|
||||||
|
WHERE TABLE_SCHEMA = @db_name
|
||||||
|
AND TABLE_NAME = 't_dm_auto_annotation_tasks'
|
||||||
|
AND COLUMN_NAME = 'dataset_type'
|
||||||
|
),
|
||||||
|
'SELECT ''skip: column dataset_type already exists''',
|
||||||
|
'ALTER TABLE t_dm_auto_annotation_tasks ADD COLUMN dataset_type VARCHAR(50) NOT NULL DEFAULT ''IMAGE'' COMMENT ''数据集类型: IMAGE/TEXT/AUDIO/VIDEO'' AFTER dataset_name'
|
||||||
|
)
|
||||||
|
);
|
||||||
|
PREPARE stmt FROM @ddl;
|
||||||
|
EXECUTE stmt;
|
||||||
|
DEALLOCATE PREPARE stmt;
|
||||||
Reference in New Issue
Block a user