feat(annotation): 扩展标注模板功能支持多模态数据类型

- 扩展数据类型支持包括pdf/chat/html/table等多种格式
- 新增标注类型涵盖asr/ner/object-detection等专业领域
- 添加label_config字段用于Label Studio XML配置存储
- 更新模板分类体系为audio-speech/chat/computer-vision/nlp等
- 实现预定义label_config优先使用的配置加载逻辑
- 完善数据库初始化脚本包含多模态标注模板数据
This commit is contained in:
2026-01-18 20:35:34 +08:00
parent 0e19178a5e
commit b992b08b2c
3 changed files with 1526 additions and 526 deletions

View File

@@ -8,17 +8,18 @@ from app.db.session import Base
class AnnotationTemplate(Base):
"""标注配置模板模型"""
__tablename__ = "t_dm_annotation_templates"
id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()), comment="UUID")
name = Column(String(100), nullable=False, comment="模板名称")
description = Column(String(500), nullable=True, comment="模板描述")
data_type = Column(String(50), nullable=False, comment="数据类型: image/text/audio/video/timeseries")
labeling_type = Column(String(50), nullable=False, comment="标注类型: classification/detection/segmentation/ner/relation/etc")
data_type = Column(String(50), nullable=False, comment="数据类型: image/text/audio/video/timeseries/pdf/chat/html/table")
labeling_type = Column(String(50), nullable=False, comment="标注类型: asr/ner/object-detection/")
configuration = Column(JSON, nullable=False, comment="标注配置(包含labels定义等)")
label_config = Column(Text, nullable=True, comment="Label Studio XML配置(内置模板预定义,自定义模板自动生成)")
style = Column(String(32), nullable=False, comment="样式配置: horizontal/vertical")
category = Column(String(50), default='custom', comment="模板分类: medical/general/custom/system")
category = Column(String(50), default='custom', comment="模板分类: audio-speech/chat/computer-vision/nlp/等")
built_in = Column(Boolean, default=False, comment="是否系统内置模板")
version = Column(String(20), default='1.0', comment="模板版本")
created_at = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间")
@@ -33,10 +34,10 @@ class AnnotationTemplate(Base):
"""检查是否已被软删除"""
return self.deleted_at is not None
class LabelingProject(Base):
"""标注项目模型"""
__tablename__ = "t_dm_labeling_projects"
class LabelingProject(Base):
"""标注项目模型"""
__tablename__ = "t_dm_labeling_projects"
id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()), comment="UUID")
dataset_id = Column(String(36), nullable=False, comment="数据集ID")
@@ -53,29 +54,29 @@ class LabelingProject(Base):
return f"<LabelingProject(id={self.id}, name={self.name}, dataset_id={self.dataset_id})>"
@property
def is_deleted(self) -> bool:
"""检查是否已被软删除"""
return self.deleted_at is not None
class AnnotationResult(Base):
"""标注结果模型(单人单份最终标签,Label Studio annotation 原始 JSON)"""
__tablename__ = "t_dm_annotation_results"
id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()), comment="UUID")
project_id = Column(String(36), nullable=False, comment="标注项目ID(t_dm_labeling_projects.id)")
file_id = Column(String(36), nullable=False, comment="文件ID(t_dm_dataset_files.id)")
annotation = Column(JSON, nullable=False, comment="Label Studio annotation 原始JSON(单人单份最终结果)")
created_at = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间")
updated_at = Column(TIMESTAMP, server_default=func.current_timestamp(), onupdate=func.current_timestamp(), comment="更新时间")
def __repr__(self):
return f"<AnnotationResult(id={self.id}, project_id={self.project_id}, file_id={self.file_id})>"
class AutoAnnotationTask(Base):
"""自动标注任务模型,对应表 t_dm_auto_annotation_tasks"""
def is_deleted(self) -> bool:
"""检查是否已被软删除"""
return self.deleted_at is not None
class AnnotationResult(Base):
"""标注结果模型(单人单份最终标签,Label Studio annotation 原始 JSON)"""
__tablename__ = "t_dm_annotation_results"
id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()), comment="UUID")
project_id = Column(String(36), nullable=False, comment="标注项目ID(t_dm_labeling_projects.id)")
file_id = Column(String(36), nullable=False, comment="文件ID(t_dm_dataset_files.id)")
annotation = Column(JSON, nullable=False, comment="Label Studio annotation 原始JSON(单人单份最终结果)")
created_at = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间")
updated_at = Column(TIMESTAMP, server_default=func.current_timestamp(), onupdate=func.current_timestamp(), comment="更新时间")
def __repr__(self):
return f"<AnnotationResult(id={self.id}, project_id={self.project_id}, file_id={self.file_id})>"
class AutoAnnotationTask(Base):
"""自动标注任务模型,对应表 t_dm_auto_annotation_tasks"""
__tablename__ = "t_dm_auto_annotation_tasks"
@@ -108,4 +109,4 @@ class AutoAnnotationTask(Base):
@property
def is_deleted(self) -> bool:
"""检查是否已被软删除"""
return self.deleted_at is not None
return self.deleted_at is not None

View File

@@ -333,10 +333,10 @@ class AnnotationTemplateService:
def _to_response(self, template: AnnotationTemplate) -> AnnotationTemplateResponse:
"""
转换为响应对象
Args:
template: 数据库模型对象
Returns:
模板响应对象
"""
@@ -344,13 +344,16 @@ class AnnotationTemplateService:
from typing import cast, Dict, Any
config_dict = cast(Dict[str, Any], template.configuration)
config = TemplateConfiguration(**config_dict)
# 动态生成Label Studio XML配置
label_config = self.generate_label_studio_config(config)
# 优先使用预定义的 label_config,否则动态生成
if template.label_config:
label_config = template.label_config
else:
label_config = self.generate_label_studio_config(config)
# 使用model_validate从ORM对象创建响应对象
response = AnnotationTemplateResponse.model_validate(template)
response.configuration = config
response.label_config = label_config # type: ignore
return response

File diff suppressed because it is too large Load Diff