diff --git a/runtime/datamate-python/app/db/models/annotation_management.py b/runtime/datamate-python/app/db/models/annotation_management.py index 4003dbb..ea60afb 100644 --- a/runtime/datamate-python/app/db/models/annotation_management.py +++ b/runtime/datamate-python/app/db/models/annotation_management.py @@ -8,17 +8,18 @@ from app.db.session import Base class AnnotationTemplate(Base): """标注配置模板模型""" - + __tablename__ = "t_dm_annotation_templates" - + id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()), comment="UUID") name = Column(String(100), nullable=False, comment="模板名称") description = Column(String(500), nullable=True, comment="模板描述") - data_type = Column(String(50), nullable=False, comment="数据类型: image/text/audio/video/timeseries") - labeling_type = Column(String(50), nullable=False, comment="标注类型: classification/detection/segmentation/ner/relation/etc") + data_type = Column(String(50), nullable=False, comment="数据类型: image/text/audio/video/timeseries/pdf/chat/html/table") + labeling_type = Column(String(50), nullable=False, comment="标注类型: asr/ner/object-detection/等") configuration = Column(JSON, nullable=False, comment="标注配置(包含labels定义等)") + label_config = Column(Text, nullable=True, comment="Label Studio XML配置(内置模板预定义,自定义模板自动生成)") style = Column(String(32), nullable=False, comment="样式配置: horizontal/vertical") - category = Column(String(50), default='custom', comment="模板分类: medical/general/custom/system") + category = Column(String(50), default='custom', comment="模板分类: audio-speech/chat/computer-vision/nlp/等") built_in = Column(Boolean, default=False, comment="是否系统内置模板") version = Column(String(20), default='1.0', comment="模板版本") created_at = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间") @@ -33,10 +34,10 @@ class AnnotationTemplate(Base): """检查是否已被软删除""" return self.deleted_at is not None -class LabelingProject(Base): - """标注项目模型""" - - __tablename__ = "t_dm_labeling_projects" +class LabelingProject(Base): + """标注项目模型""" + + __tablename__ = "t_dm_labeling_projects" id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()), comment="UUID") dataset_id = Column(String(36), nullable=False, comment="数据集ID") @@ -53,29 +54,29 @@ class LabelingProject(Base): return f"" @property - def is_deleted(self) -> bool: - """检查是否已被软删除""" - return self.deleted_at is not None - - -class AnnotationResult(Base): - """标注结果模型(单人单份最终标签,Label Studio annotation 原始 JSON)""" - - __tablename__ = "t_dm_annotation_results" - - id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()), comment="UUID") - project_id = Column(String(36), nullable=False, comment="标注项目ID(t_dm_labeling_projects.id)") - file_id = Column(String(36), nullable=False, comment="文件ID(t_dm_dataset_files.id)") - annotation = Column(JSON, nullable=False, comment="Label Studio annotation 原始JSON(单人单份最终结果)") - created_at = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间") - updated_at = Column(TIMESTAMP, server_default=func.current_timestamp(), onupdate=func.current_timestamp(), comment="更新时间") - - def __repr__(self): - return f"" - - -class AutoAnnotationTask(Base): - """自动标注任务模型,对应表 t_dm_auto_annotation_tasks""" + def is_deleted(self) -> bool: + """检查是否已被软删除""" + return self.deleted_at is not None + + +class AnnotationResult(Base): + """标注结果模型(单人单份最终标签,Label Studio annotation 原始 JSON)""" + + __tablename__ = "t_dm_annotation_results" + + id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()), comment="UUID") + project_id = Column(String(36), nullable=False, comment="标注项目ID(t_dm_labeling_projects.id)") + file_id = Column(String(36), nullable=False, comment="文件ID(t_dm_dataset_files.id)") + annotation = Column(JSON, nullable=False, comment="Label Studio annotation 原始JSON(单人单份最终结果)") + created_at = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间") + updated_at = Column(TIMESTAMP, server_default=func.current_timestamp(), onupdate=func.current_timestamp(), comment="更新时间") + + def __repr__(self): + return f"" + + +class AutoAnnotationTask(Base): + """自动标注任务模型,对应表 t_dm_auto_annotation_tasks""" __tablename__ = "t_dm_auto_annotation_tasks" @@ -108,4 +109,4 @@ class AutoAnnotationTask(Base): @property def is_deleted(self) -> bool: """检查是否已被软删除""" - return self.deleted_at is not None + return self.deleted_at is not None diff --git a/runtime/datamate-python/app/module/annotation/service/template.py b/runtime/datamate-python/app/module/annotation/service/template.py index ed7dce8..478bd4f 100644 --- a/runtime/datamate-python/app/module/annotation/service/template.py +++ b/runtime/datamate-python/app/module/annotation/service/template.py @@ -333,10 +333,10 @@ class AnnotationTemplateService: def _to_response(self, template: AnnotationTemplate) -> AnnotationTemplateResponse: """ 转换为响应对象 - + Args: template: 数据库模型对象 - + Returns: 模板响应对象 """ @@ -344,13 +344,16 @@ class AnnotationTemplateService: from typing import cast, Dict, Any config_dict = cast(Dict[str, Any], template.configuration) config = TemplateConfiguration(**config_dict) - - # 动态生成Label Studio XML配置 - label_config = self.generate_label_studio_config(config) - + + # 优先使用预定义的 label_config,否则动态生成 + if template.label_config: + label_config = template.label_config + else: + label_config = self.generate_label_studio_config(config) + # 使用model_validate从ORM对象创建响应对象 response = AnnotationTemplateResponse.model_validate(template) response.configuration = config response.label_config = label_config # type: ignore - + return response diff --git a/scripts/db/data-annotation-init.sql b/scripts/db/data-annotation-init.sql index 55be49c..667a84c 100644 --- a/scripts/db/data-annotation-init.sql +++ b/scripts/db/data-annotation-init.sql @@ -1,25 +1,36 @@ -use datamate; - -CREATE TABLE t_dm_annotation_templates ( - id VARCHAR(36) PRIMARY KEY COMMENT 'UUID', - name VARCHAR(100) NOT NULL COMMENT '模板名称', - description VARCHAR(500) COMMENT '模板描述', - data_type VARCHAR(50) NOT NULL COMMENT '数据类型: image/text/audio/video/timeseries', - labeling_type VARCHAR(50) NOT NULL COMMENT '标注类型: classification/detection/segmentation/ner/relation/etc', - configuration JSON NOT NULL COMMENT '标注配置(包含labels定义等)', - style VARCHAR(32) NOT NULL COMMENT '样式配置: horizontal/vertical', - category VARCHAR(50) DEFAULT 'custom' COMMENT '模板分类: medical/general/custom/system', - built_in BOOLEAN DEFAULT FALSE COMMENT '是否系统内置模板', - version VARCHAR(20) DEFAULT '1.0' COMMENT '模板版本', - created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间', - updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间', - deleted_at TIMESTAMP NULL COMMENT '删除时间(软删除)', - INDEX idx_data_type (data_type), - INDEX idx_labeling_type (labeling_type), - INDEX idx_category (category), - INDEX idx_built_in (built_in) -) COMMENT='标注配置模板表'; - +-- 数据标注服务数据库初始化脚本 +-- 适用于 datamate 数据库 +-- 基于 Label Studio 模板体系设计 + +USE datamate; + +-- ===================================== +-- DDL语句 - 数据库表结构定义 +-- ===================================== + +-- 标注配置模板表 +CREATE TABLE t_dm_annotation_templates ( + id VARCHAR(36) PRIMARY KEY COMMENT 'UUID', + name VARCHAR(100) NOT NULL COMMENT '模板名称', + description VARCHAR(500) COMMENT '模板描述', + data_type VARCHAR(50) NOT NULL COMMENT '数据类型: text/image/audio/video/pdf/timeseries/chat/html/table', + labeling_type VARCHAR(50) NOT NULL COMMENT '标注类型', + configuration JSON NOT NULL COMMENT '标注配置(包含labels定义等)', + label_config TEXT COMMENT 'Label Studio XML配置(内置模板预定义)', + style VARCHAR(32) NOT NULL COMMENT '样式配置: horizontal/vertical', + category VARCHAR(50) DEFAULT 'custom' COMMENT '模板分类', + built_in BOOLEAN DEFAULT FALSE COMMENT '是否系统内置模板', + version VARCHAR(20) DEFAULT '1.0' COMMENT '模板版本', + created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间', + updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间', + deleted_at TIMESTAMP NULL COMMENT '删除时间(软删除)', + INDEX idx_data_type (data_type), + INDEX idx_labeling_type (labeling_type), + INDEX idx_category (category), + INDEX idx_built_in (built_in) +) COMMENT='标注配置模板表'; + +-- 标注项目表 CREATE TABLE t_dm_labeling_projects ( id VARCHAR(36) PRIMARY KEY COMMENT 'UUID', dataset_id VARCHAR(36) NOT NULL COMMENT '数据集ID', @@ -37,479 +48,1464 @@ CREATE TABLE t_dm_labeling_projects ( INDEX idx_labeling_project_id (labeling_project_id) ) COMMENT='标注项目表'; --- 标注结果表(单人单份最终标签,存储 Label Studio annotation 原始 JSON) --- 设计说明: --- 1) project_id 复用 t_dm_labeling_projects.id(DataMate 内部标注项目ID) --- 2) file_id 复用 t_dm_dataset_files.id(数据集文件ID) --- 3) annotation 字段存 Label Studio annotation 对象(包含 result 等),不做降维转换 -CREATE TABLE IF NOT EXISTS t_dm_annotation_results ( +-- 标注结果表 +CREATE TABLE t_dm_annotation_results ( id VARCHAR(36) PRIMARY KEY COMMENT 'UUID', - project_id VARCHAR(36) NOT NULL COMMENT '标注项目ID(t_dm_labeling_projects.id)', - file_id VARCHAR(36) NOT NULL COMMENT '文件ID(t_dm_dataset_files.id)', - annotation JSON NOT NULL COMMENT 'Label Studio annotation 原始JSON(单人单份最终结果)', + project_id VARCHAR(36) NOT NULL COMMENT '标注项目ID', + file_id VARCHAR(36) NOT NULL COMMENT '文件ID', + annotation JSON NOT NULL COMMENT 'Label Studio annotation 原始JSON', created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间', updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间', UNIQUE KEY uk_project_file (project_id, file_id), INDEX idx_project_id (project_id), INDEX idx_file_id (file_id), INDEX idx_updated_at (updated_at) -) COMMENT='标注结果表(Label Studio兼容,单人单份)'; +) COMMENT='标注结果表'; -- 自动标注任务表 CREATE TABLE t_dm_auto_annotation_tasks ( id VARCHAR(36) PRIMARY KEY COMMENT 'UUID', name VARCHAR(255) NOT NULL COMMENT '任务名称', - dataset_id VARCHAR(36) NOT NULL COMMENT '数据集ID', - dataset_name VARCHAR(255) COMMENT '数据集名称(冗余字段,方便查询)', - config JSON NOT NULL COMMENT '任务配置(模型规模、置信度等)', - file_ids JSON COMMENT '要处理的文件ID列表,为空则处理数据集所有图像', - status VARCHAR(50) NOT NULL DEFAULT 'pending' COMMENT '任务状态: pending/running/completed/failed', - progress INT DEFAULT 0 COMMENT '任务进度 0-100', - total_images INT DEFAULT 0 COMMENT '总图片数', - processed_images INT DEFAULT 0 COMMENT '已处理图片数', - detected_objects INT DEFAULT 0 COMMENT '检测到的对象总数', - output_path VARCHAR(500) COMMENT '输出路径', - error_message TEXT COMMENT '错误信息', - created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间', - updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间', - completed_at TIMESTAMP NULL COMMENT '完成时间', - deleted_at TIMESTAMP NULL COMMENT '删除时间(软删除)', - INDEX idx_dataset_id (dataset_id), - INDEX idx_status (status), - INDEX idx_created_at (created_at) -) COMMENT='自动标注任务表'; - - --- 内置标注模板初始化数据 --- 这些模板将在系统首次启动时自动创建 --- 使用 INSERT ... ON DUPLICATE KEY UPDATE 来覆盖已存在的记录 - --- 1. 图像分类模板 -INSERT INTO t_dm_annotation_templates ( - id, name, description, data_type, labeling_type, - configuration, style, category, built_in, version, created_at -) VALUES ( - 'tpl-image-classification-001', - '图像分类', - '简单的多标签图像分类模板', - '图像', - '分类', - JSON_OBJECT( - 'labels', JSON_ARRAY( - JSON_OBJECT( - 'fromName', 'choice', - 'toName', 'image', - 'type', 'Choices', - 'options', JSON_ARRAY('Cat', 'Dog', 'Bird', 'Other'), - 'required', true, - 'description', '选择最符合图像内容的标签' - ) - ), - 'objects', JSON_ARRAY( - JSON_OBJECT( - 'name', 'image', - 'type', 'Image', - 'value', '$image' - ) - ) - ), - 'horizontal', - '计算机视觉', - 1, - '1.0.0', - NOW() -) -ON DUPLICATE KEY UPDATE - name = VALUES(name), - description = VALUES(description), - data_type = VALUES(data_type), - labeling_type = VALUES(labeling_type), - configuration = VALUES(configuration), - style = VALUES(style), - category = VALUES(category), - built_in = VALUES(built_in), - version = VALUES(version), - updated_at = NOW(); - - --- 2. 目标检测模板(矩形框) -INSERT INTO t_dm_annotation_templates ( - id, name, description, data_type, labeling_type, - configuration, style, category, built_in, version, created_at -) VALUES ( - 'tpl-object-detection-001', - '目标检测(边界框)', - '使用矩形边界框进行目标检测', - '图像', - '目标检测', - JSON_OBJECT( - 'labels', JSON_ARRAY( - JSON_OBJECT( - 'fromName', 'label', - 'toName', 'image', - 'type', 'RectangleLabels', - 'labels', JSON_ARRAY('Person', 'Vehicle', 'Animal', 'Object'), - 'required', false, - 'description', '在图像中框出目标并标注类别' - ) - ), - 'objects', JSON_ARRAY( - JSON_OBJECT( - 'name', 'image', - 'type', 'Image', - 'value', '$image' - ) - ) - ), - 'horizontal', - '计算机视觉', - 1, - '1.0.0', - NOW() -) -ON DUPLICATE KEY UPDATE - name = VALUES(name), - description = VALUES(description), - data_type = VALUES(data_type), - labeling_type = VALUES(labeling_type), - configuration = VALUES(configuration), - style = VALUES(style), - category = VALUES(category), - built_in = VALUES(built_in), - version = VALUES(version), - updated_at = NOW(); - - --- 3. 图像分割模板(多边形) -INSERT INTO t_dm_annotation_templates ( - id, name, description, data_type, labeling_type, - configuration, style, category, built_in, version, created_at -) VALUES ( - 'tpl-image-segmentation-001', - '图像分割(多边形)', - '使用多边形标注进行语义分割', - '图像', - '分割', - JSON_OBJECT( - 'labels', JSON_ARRAY( - JSON_OBJECT( - 'fromName', 'label', - 'toName', 'image', - 'type', 'PolygonLabels', - 'labels', JSON_ARRAY('Background', 'Foreground', 'Person', 'Car'), - 'required', false, - 'description', '使用多边形框选需要分割的区域' - ) - ), - 'objects', JSON_ARRAY( - JSON_OBJECT( - 'name', 'image', - 'type', 'Image', - 'value', '$image' - ) - ) - ), - 'horizontal', - '计算机视觉', - 1, - '1.0.0', - NOW() -) -ON DUPLICATE KEY UPDATE - name = VALUES(name), - description = VALUES(description), - data_type = VALUES(data_type), - labeling_type = VALUES(labeling_type), - configuration = VALUES(configuration), - style = VALUES(style), - category = VALUES(category), - built_in = VALUES(built_in), - version = VALUES(version), - updated_at = NOW(); - - --- 4. 文本分类模板 -INSERT INTO t_dm_annotation_templates ( - id, name, description, data_type, labeling_type, - configuration, style, category, built_in, version, created_at -) VALUES ( - 'tpl-text-classification-001', - '文本情感分类', - '将文本中表达的情感划分到预定义的类别', - '文本', - '分类', - JSON_OBJECT( - 'labels', JSON_ARRAY( - JSON_OBJECT( - 'fromName', 'choice', - 'toName', 'text', - 'type', 'Choices', - 'options', JSON_ARRAY('Positive', 'Negative', 'Neutral'), - 'required', true, - 'description', '对文本的情感或类别进行选择' - ) - ), - 'objects', JSON_ARRAY( - JSON_OBJECT( - 'name', 'text', - 'type', 'Text', - 'value', '$text' - ) - ) - ), - 'vertical', - '自然语言处理', - 1, - '1.0.0', - NOW() -) -ON DUPLICATE KEY UPDATE - name = VALUES(name), - description = VALUES(description), - data_type = VALUES(data_type), - labeling_type = VALUES(labeling_type), - configuration = VALUES(configuration), - style = VALUES(style), - category = VALUES(category), - built_in = VALUES(built_in), - version = VALUES(version), - updated_at = NOW(); - - --- 5. 命名实体识别(NER) -INSERT INTO t_dm_annotation_templates ( - id, name, description, data_type, labeling_type, - configuration, style, category, built_in, version, created_at -) VALUES ( - 'tpl-ner-001', - '命名实体识别', - '从文本中抽取并标注命名实体', - '文本', - '实体识别', - JSON_OBJECT( - 'labels', JSON_ARRAY( - JSON_OBJECT( - 'fromName', 'label', - 'toName', 'text', - 'type', 'Labels', - 'labels', JSON_ARRAY('PERSON', 'ORG', 'LOC', 'DATE', 'MISC'), - 'required', false, - 'description', '在文本中标注人物、地点等实体' - ) - ), - 'objects', JSON_ARRAY( - JSON_OBJECT( - 'name', 'text', - 'type', 'Text', - 'value', '$text' - ) - ) - ), - 'vertical', - '自然语言处理', - 1, - '1.0.0', - NOW() -) -ON DUPLICATE KEY UPDATE - name = VALUES(name), - description = VALUES(description), - data_type = VALUES(data_type), - labeling_type = VALUES(labeling_type), - configuration = VALUES(configuration), - style = VALUES(style), - category = VALUES(category), - built_in = VALUES(built_in), - version = VALUES(version), - updated_at = NOW(); - - --- 6. 音频分类模板 -INSERT INTO t_dm_annotation_templates ( - id, name, description, data_type, labeling_type, - configuration, style, category, built_in, version, created_at -) VALUES ( - 'tpl-audio-classification-001', - '音频分类', - '将音频片段划分到不同类别', - '音频', - '分类', - JSON_OBJECT( - 'labels', JSON_ARRAY( - JSON_OBJECT( - 'fromName', 'choice', - 'toName', 'audio', - 'type', 'Choices', - 'options', JSON_ARRAY('Speech', 'Music', 'Noise', 'Silence'), - 'required', true, - 'description', '选择音频片段对应的类别' - ) - ), - 'objects', JSON_ARRAY( - JSON_OBJECT( - 'name', 'audio', - 'type', 'Audio', - 'value', '$audio' - ) - ) - ), - 'horizontal', - '音频', - 1, - '1.0.0', - NOW() -) -ON DUPLICATE KEY UPDATE - name = VALUES(name), - description = VALUES(description), - data_type = VALUES(data_type), - labeling_type = VALUES(labeling_type), - configuration = VALUES(configuration), - style = VALUES(style), - category = VALUES(category), - built_in = VALUES(built_in), - version = VALUES(version), - updated_at = NOW(); - - --- 7. 文本多标签分类模板 -INSERT INTO t_dm_annotation_templates ( - id, name, description, data_type, labeling_type, - configuration, style, category, built_in, version, created_at -) VALUES ( - 'tpl-text-multilabel-001', - '文本多标签分类', - '可为文本选择多个标签,适用于主题、内容类别等多标签任务', - '文本', - '分类', - JSON_OBJECT( - 'labels', JSON_ARRAY( - JSON_OBJECT( - 'fromName', 'labels', - 'toName', 'text', - 'type', 'Choices', - 'options', JSON_ARRAY('Sports','Politics','Tech','Entertainment'), - 'required', true, - 'choice', 'multiple', - 'description', '可选择多个标签' - ) - ), - 'objects', JSON_ARRAY( - JSON_OBJECT( - 'name', 'text', - 'type', 'Text', - 'value', '$text' - ) - ) - ), - 'vertical', - '自然语言处理', - 1, - '1.0.0', - NOW() -) -ON DUPLICATE KEY UPDATE - name = VALUES(name), - description = VALUES(description), - data_type = VALUES(data_type), - labeling_type = VALUES(labeling_type), - configuration = VALUES(configuration), - style = VALUES(style), - category = VALUES(category), - built_in = VALUES(built_in), - version = VALUES(version), - updated_at = NOW(); - - --- 8. 文本摘要模板 -INSERT INTO t_dm_annotation_templates ( - id, name, description, data_type, labeling_type, - configuration, style, category, built_in, version, created_at -) VALUES ( - 'tpl-text-summarization-001', - '文本摘要', - '根据原文撰写简要摘要', - '文本', - '摘要', - JSON_OBJECT( - 'labels', JSON_ARRAY( - JSON_OBJECT( - 'fromName', 'summary', - 'toName', 'text', - 'type', 'TextArea', - 'required', true, - 'description', '在此填写摘要内容' - ) - ), - 'objects', JSON_ARRAY( - JSON_OBJECT( - 'name', 'text', - 'type', 'Text', - 'value', '$text' - ) - ) - ), - 'vertical', - '自然语言处理', - 1, - '1.0.0', - NOW() -) -ON DUPLICATE KEY UPDATE - name = VALUES(name), - description = VALUES(description), - data_type = VALUES(data_type), - labeling_type = VALUES(labeling_type), - configuration = VALUES(configuration), - style = VALUES(style), - category = VALUES(category), - built_in = VALUES(built_in), - version = VALUES(version), - updated_at = NOW(); - --- 9. 关键词抽取模板 -INSERT INTO t_dm_annotation_templates ( - id, name, description, data_type, labeling_type, - configuration, style, category, built_in, version, created_at -) VALUES ( - 'tpl-keyword-extract-001', - '关键词抽取', - '从文本中选出关键词或关键短语', - '文本', - '实体识别', - JSON_OBJECT( - 'labels', JSON_ARRAY( - JSON_OBJECT( - 'fromName', 'kw', - 'toName', 'text', - 'type', 'Labels', - 'labels', JSON_ARRAY('Keyword'), - 'required', false, - 'description', '高亮文本并标注关键词' - ) - ), - 'objects', JSON_ARRAY( - JSON_OBJECT( - 'name', 'text', - 'type', 'Text', - 'value', '$text' - ) - ) - ), - 'vertical', - '自然语言处理', - 1, - '1.0.0', - NOW() -) -ON DUPLICATE KEY UPDATE - name = VALUES(name), - description = VALUES(description), - data_type = VALUES(data_type), - labeling_type = VALUES(labeling_type), - configuration = VALUES(configuration), - style = VALUES(style), - category = VALUES(category), - built_in = VALUES(built_in), - version = VALUES(version), - updated_at = NOW(); + dataset_id VARCHAR(36) NOT NULL COMMENT '数据集ID', + dataset_name VARCHAR(255) COMMENT '数据集名称', + config JSON NOT NULL COMMENT '任务配置', + file_ids JSON COMMENT '文件ID列表', + status VARCHAR(50) NOT NULL DEFAULT 'pending' COMMENT '任务状态', + progress INT DEFAULT 0 COMMENT '任务进度', + total_images INT DEFAULT 0 COMMENT '总图片数', + processed_images INT DEFAULT 0 COMMENT '已处理图片数', + detected_objects INT DEFAULT 0 COMMENT '检测到的对象数', + output_path VARCHAR(500) COMMENT '输出路径', + error_message TEXT COMMENT '错误信息', + created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间', + updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间', + completed_at TIMESTAMP NULL COMMENT '完成时间', + deleted_at TIMESTAMP NULL COMMENT '删除时间', + INDEX idx_dataset_id (dataset_id), + INDEX idx_status (status), + INDEX idx_created_at (created_at) +) COMMENT='自动标注任务表'; + +-- ===================================== +-- DML语句 - 内置标注模板数据 +-- ===================================== + +-- 清空现有内置模板(保留自定义模板) +DELETE FROM t_dm_annotation_templates WHERE built_in = 1; + +-- ============================================= +-- 1. 音频/语音处理 (Audio/Speech Processing) +-- ============================================= + +-- 1.1 自动语音识别(分段) +INSERT INTO t_dm_annotation_templates ( + id, name, description, data_type, labeling_type, + configuration, label_config, style, category, built_in, version +) VALUES ( + 'tpl-asr-segments-001', + '语音识别(分段)', + '对音频进行语音活动分段并转录文本,适用于呼叫中心转录、会议记录、播客转录、法庭记录等场景。关联模型:Whisper、Wav2Vec2、DeepSpeech', + 'audio', + 'asr-segments', + JSON_OBJECT( + 'labels', JSON_ARRAY( + JSON_OBJECT('fromName', 'labels', 'toName', 'audio', 'type', 'Labels', 'labels', JSON_ARRAY('Speech', 'Noise')), + JSON_OBJECT('fromName', 'transcription', 'toName', 'audio', 'type', 'TextArea', 'required', true) + ), + 'objects', JSON_ARRAY(JSON_OBJECT('name', 'audio', 'type', 'Audio', 'value', '$audio')) + ), + ' + + +