You've already forked DataMate
feat(annotation): 扩展标注模板功能支持多模态数据类型
- 扩展数据类型支持包括pdf/chat/html/table等多种格式 - 新增标注类型涵盖asr/ner/object-detection等专业领域 - 添加label_config字段用于Label Studio XML配置存储 - 更新模板分类体系为audio-speech/chat/computer-vision/nlp等 - 实现预定义label_config优先使用的配置加载逻辑 - 完善数据库初始化脚本包含多模态标注模板数据
This commit is contained in:
@@ -8,17 +8,18 @@ from app.db.session import Base
|
||||
|
||||
class AnnotationTemplate(Base):
|
||||
"""标注配置模板模型"""
|
||||
|
||||
|
||||
__tablename__ = "t_dm_annotation_templates"
|
||||
|
||||
|
||||
id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()), comment="UUID")
|
||||
name = Column(String(100), nullable=False, comment="模板名称")
|
||||
description = Column(String(500), nullable=True, comment="模板描述")
|
||||
data_type = Column(String(50), nullable=False, comment="数据类型: image/text/audio/video/timeseries")
|
||||
labeling_type = Column(String(50), nullable=False, comment="标注类型: classification/detection/segmentation/ner/relation/etc")
|
||||
data_type = Column(String(50), nullable=False, comment="数据类型: image/text/audio/video/timeseries/pdf/chat/html/table")
|
||||
labeling_type = Column(String(50), nullable=False, comment="标注类型: asr/ner/object-detection/等")
|
||||
configuration = Column(JSON, nullable=False, comment="标注配置(包含labels定义等)")
|
||||
label_config = Column(Text, nullable=True, comment="Label Studio XML配置(内置模板预定义,自定义模板自动生成)")
|
||||
style = Column(String(32), nullable=False, comment="样式配置: horizontal/vertical")
|
||||
category = Column(String(50), default='custom', comment="模板分类: medical/general/custom/system")
|
||||
category = Column(String(50), default='custom', comment="模板分类: audio-speech/chat/computer-vision/nlp/等")
|
||||
built_in = Column(Boolean, default=False, comment="是否系统内置模板")
|
||||
version = Column(String(20), default='1.0', comment="模板版本")
|
||||
created_at = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间")
|
||||
@@ -33,10 +34,10 @@ class AnnotationTemplate(Base):
|
||||
"""检查是否已被软删除"""
|
||||
return self.deleted_at is not None
|
||||
|
||||
class LabelingProject(Base):
|
||||
"""标注项目模型"""
|
||||
|
||||
__tablename__ = "t_dm_labeling_projects"
|
||||
class LabelingProject(Base):
|
||||
"""标注项目模型"""
|
||||
|
||||
__tablename__ = "t_dm_labeling_projects"
|
||||
|
||||
id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()), comment="UUID")
|
||||
dataset_id = Column(String(36), nullable=False, comment="数据集ID")
|
||||
@@ -53,29 +54,29 @@ class LabelingProject(Base):
|
||||
return f"<LabelingProject(id={self.id}, name={self.name}, dataset_id={self.dataset_id})>"
|
||||
|
||||
@property
|
||||
def is_deleted(self) -> bool:
|
||||
"""检查是否已被软删除"""
|
||||
return self.deleted_at is not None
|
||||
|
||||
|
||||
class AnnotationResult(Base):
|
||||
"""标注结果模型(单人单份最终标签,Label Studio annotation 原始 JSON)"""
|
||||
|
||||
__tablename__ = "t_dm_annotation_results"
|
||||
|
||||
id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()), comment="UUID")
|
||||
project_id = Column(String(36), nullable=False, comment="标注项目ID(t_dm_labeling_projects.id)")
|
||||
file_id = Column(String(36), nullable=False, comment="文件ID(t_dm_dataset_files.id)")
|
||||
annotation = Column(JSON, nullable=False, comment="Label Studio annotation 原始JSON(单人单份最终结果)")
|
||||
created_at = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间")
|
||||
updated_at = Column(TIMESTAMP, server_default=func.current_timestamp(), onupdate=func.current_timestamp(), comment="更新时间")
|
||||
|
||||
def __repr__(self):
|
||||
return f"<AnnotationResult(id={self.id}, project_id={self.project_id}, file_id={self.file_id})>"
|
||||
|
||||
|
||||
class AutoAnnotationTask(Base):
|
||||
"""自动标注任务模型,对应表 t_dm_auto_annotation_tasks"""
|
||||
def is_deleted(self) -> bool:
|
||||
"""检查是否已被软删除"""
|
||||
return self.deleted_at is not None
|
||||
|
||||
|
||||
class AnnotationResult(Base):
|
||||
"""标注结果模型(单人单份最终标签,Label Studio annotation 原始 JSON)"""
|
||||
|
||||
__tablename__ = "t_dm_annotation_results"
|
||||
|
||||
id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()), comment="UUID")
|
||||
project_id = Column(String(36), nullable=False, comment="标注项目ID(t_dm_labeling_projects.id)")
|
||||
file_id = Column(String(36), nullable=False, comment="文件ID(t_dm_dataset_files.id)")
|
||||
annotation = Column(JSON, nullable=False, comment="Label Studio annotation 原始JSON(单人单份最终结果)")
|
||||
created_at = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间")
|
||||
updated_at = Column(TIMESTAMP, server_default=func.current_timestamp(), onupdate=func.current_timestamp(), comment="更新时间")
|
||||
|
||||
def __repr__(self):
|
||||
return f"<AnnotationResult(id={self.id}, project_id={self.project_id}, file_id={self.file_id})>"
|
||||
|
||||
|
||||
class AutoAnnotationTask(Base):
|
||||
"""自动标注任务模型,对应表 t_dm_auto_annotation_tasks"""
|
||||
|
||||
__tablename__ = "t_dm_auto_annotation_tasks"
|
||||
|
||||
@@ -108,4 +109,4 @@ class AutoAnnotationTask(Base):
|
||||
@property
|
||||
def is_deleted(self) -> bool:
|
||||
"""检查是否已被软删除"""
|
||||
return self.deleted_at is not None
|
||||
return self.deleted_at is not None
|
||||
|
||||
@@ -333,10 +333,10 @@ class AnnotationTemplateService:
|
||||
def _to_response(self, template: AnnotationTemplate) -> AnnotationTemplateResponse:
|
||||
"""
|
||||
转换为响应对象
|
||||
|
||||
|
||||
Args:
|
||||
template: 数据库模型对象
|
||||
|
||||
|
||||
Returns:
|
||||
模板响应对象
|
||||
"""
|
||||
@@ -344,13 +344,16 @@ class AnnotationTemplateService:
|
||||
from typing import cast, Dict, Any
|
||||
config_dict = cast(Dict[str, Any], template.configuration)
|
||||
config = TemplateConfiguration(**config_dict)
|
||||
|
||||
# 动态生成Label Studio XML配置
|
||||
label_config = self.generate_label_studio_config(config)
|
||||
|
||||
|
||||
# 优先使用预定义的 label_config,否则动态生成
|
||||
if template.label_config:
|
||||
label_config = template.label_config
|
||||
else:
|
||||
label_config = self.generate_label_studio_config(config)
|
||||
|
||||
# 使用model_validate从ORM对象创建响应对象
|
||||
response = AnnotationTemplateResponse.model_validate(template)
|
||||
response.configuration = config
|
||||
response.label_config = label_config # type: ignore
|
||||
|
||||
|
||||
return response
|
||||
|
||||
Reference in New Issue
Block a user