Files
DataMate/runtime/datamate-python/app/module/annotation/service/template.py
Jason Wang 45743f39f5 feat: add labeling template. refactor: switch to Poetry, build and deploy of backend Python (#79)
* feat: Enhance annotation module with template management and validation

- Added DatasetMappingCreateRequest and DatasetMappingUpdateRequest schemas to handle dataset mapping requests with camelCase and snake_case support.
- Introduced Annotation Template schemas including CreateAnnotationTemplateRequest, UpdateAnnotationTemplateRequest, and AnnotationTemplateResponse for managing annotation templates.
- Implemented AnnotationTemplateService for creating, updating, retrieving, and deleting annotation templates, including validation of configurations and XML generation.
- Added utility class LabelStudioConfigValidator for validating Label Studio configurations and XML formats.
- Updated database schema for annotation templates and labeling projects to include new fields and constraints.
- Seeded initial annotation templates for various use cases including image classification, object detection, and text classification.

* feat: Enhance TemplateForm with improved validation and dynamic field rendering; update LabelStudio config validation for camelCase support

* feat: Update docker-compose.yml to mark datamate dataset volume and network as external

* feat: Add tag configuration management and related components

- Introduced new components for tag selection and browsing in the frontend.
- Added API endpoint to fetch tag configuration from the backend.
- Implemented tag configuration management in the backend, including loading from YAML.
- Enhanced template service to support dynamic tag rendering based on configuration.
- Updated validation utilities to incorporate tag configuration checks.
- Refactored existing code to utilize the new tag configuration structure.

* feat: Refactor LabelStudioTagConfig for improved configuration loading and validation

* feat: Update Makefile to include backend-python-docker-build in the build process

* feat: Migrate to poetry for better deps management

* Add pyyaml dependency and update Dockerfile to use Poetry for dependency management

- Added pyyaml (>=6.0.3,<7.0.0) to pyproject.toml dependencies.
- Updated Dockerfile to install Poetry and manage dependencies using it.
- Improved layer caching by copying only dependency files before the application code.
- Removed unnecessary installation of build dependencies to keep the final image size small.

* feat: Remove duplicated backend-python-docker-build target from Makefile

* fix: airflow is not ready for adding yet

* feat: update Python version to 3.12 and remove project installation step in Dockerfile
2025-11-13 15:32:30 +08:00

337 lines
11 KiB
Python

"""
Annotation Template Service
"""
from typing import Optional, List
from datetime import datetime
from sqlalchemy import select, func
from sqlalchemy.ext.asyncio import AsyncSession
from uuid import uuid4
from fastapi import HTTPException
from app.db.models.annotation_management import AnnotationTemplate
from app.module.annotation.schema.template import (
CreateAnnotationTemplateRequest,
UpdateAnnotationTemplateRequest,
AnnotationTemplateResponse,
AnnotationTemplateListResponse,
TemplateConfiguration
)
from app.module.annotation.utils.config_validator import LabelStudioConfigValidator
from app.module.annotation.config import LabelStudioTagConfig
class AnnotationTemplateService:
"""标注模板服务"""
@staticmethod
def generate_label_studio_config(config: TemplateConfiguration) -> str:
"""
从配置JSON生成Label Studio XML配置
Args:
config: 模板配置对象
Returns:
Label Studio XML字符串
"""
tag_config = LabelStudioTagConfig()
xml_parts = ['<View>']
# 生成对象定义
for obj in config.objects:
obj_attrs = [
f'name="{obj.name}"',
f'value="{obj.value}"'
]
xml_parts.append(f' <{obj.type} {" ".join(obj_attrs)}/>')
# 生成标签定义
for label in config.labels:
label_attrs = [
f'name="{label.from_name}"',
f'toName="{label.to_name}"'
]
# 添加可选属性
if label.required:
label_attrs.append('required="true"')
tag_type = label.type.capitalize() if label.type else "Choices"
# 检查是否需要子元素
if label.options or label.labels:
choices = label.options or label.labels or []
xml_parts.append(f' <{tag_type} {" ".join(label_attrs)}>')
# 从配置获取子元素标签名
child_tag = tag_config.get_child_tag(tag_type)
if not child_tag:
# 默认使用 Label
child_tag = "Label"
for choice in choices:
xml_parts.append(f' <{child_tag} value="{choice}"/>')
xml_parts.append(f' </{tag_type}>')
else:
# 处理简单标签类型(不需要子元素)
xml_parts.append(f' <{tag_type} {" ".join(label_attrs)}/>')
xml_parts.append('</View>')
return '\n'.join(xml_parts)
async def create_template(
self,
db: AsyncSession,
request: CreateAnnotationTemplateRequest
) -> AnnotationTemplateResponse:
"""
创建标注模板
Args:
db: 数据库会话
request: 创建请求
Returns:
创建的模板响应
"""
# 验证配置JSON
config_dict = request.configuration.model_dump(mode='json', by_alias=False)
valid, error = LabelStudioConfigValidator.validate_configuration_json(config_dict)
if not valid:
raise HTTPException(status_code=400, detail=f"Invalid configuration: {error}")
# 生成Label Studio XML配置(用于验证,但不存储)
label_config = self.generate_label_studio_config(request.configuration)
# 验证生成的XML
valid, error = LabelStudioConfigValidator.validate_xml(label_config)
if not valid:
raise HTTPException(status_code=400, detail=f"Generated XML is invalid: {error}")
# 创建模板对象(不包含label_config字段)
template = AnnotationTemplate(
id=str(uuid4()),
name=request.name,
description=request.description,
data_type=request.data_type,
labeling_type=request.labeling_type,
configuration=config_dict,
style=request.style,
category=request.category,
built_in=False,
version="1.0.0",
created_at=datetime.now()
)
db.add(template)
await db.commit()
await db.refresh(template)
return self._to_response(template)
async def get_template(
self,
db: AsyncSession,
template_id: str
) -> Optional[AnnotationTemplateResponse]:
"""
获取单个模板
Args:
db: 数据库会话
template_id: 模板ID
Returns:
模板响应或None
"""
result = await db.execute(
select(AnnotationTemplate)
.where(
AnnotationTemplate.id == template_id,
AnnotationTemplate.deleted_at.is_(None)
)
)
template = result.scalar_one_or_none()
if template:
return self._to_response(template)
return None
async def list_templates(
self,
db: AsyncSession,
page: int = 1,
size: int = 10,
category: Optional[str] = None,
data_type: Optional[str] = None,
labeling_type: Optional[str] = None,
built_in: Optional[bool] = None
) -> AnnotationTemplateListResponse:
"""
获取模板列表
Args:
db: 数据库会话
page: 页码
size: 每页大小
category: 分类筛选
data_type: 数据类型筛选
labeling_type: 标注类型筛选
built_in: 是否内置模板筛选
Returns:
模板列表响应
"""
# 构建查询条件
conditions: List = [AnnotationTemplate.deleted_at.is_(None)]
if category:
conditions.append(AnnotationTemplate.category == category) # type: ignore
if data_type:
conditions.append(AnnotationTemplate.data_type == data_type) # type: ignore
if labeling_type:
conditions.append(AnnotationTemplate.labeling_type == labeling_type) # type: ignore
if built_in is not None:
conditions.append(AnnotationTemplate.built_in == built_in) # type: ignore
# 查询总数
count_result = await db.execute(
select(func.count()).select_from(AnnotationTemplate).where(*conditions)
)
total = count_result.scalar() or 0
# 分页查询
result = await db.execute(
select(AnnotationTemplate)
.where(*conditions)
.order_by(AnnotationTemplate.created_at.desc())
.limit(size)
.offset((page - 1) * size)
)
templates = result.scalars().all()
return AnnotationTemplateListResponse(
content=[self._to_response(t) for t in templates],
total=total,
page=page,
size=size,
totalPages=(total + size - 1) // size
)
async def update_template(
self,
db: AsyncSession,
template_id: str,
request: UpdateAnnotationTemplateRequest
) -> Optional[AnnotationTemplateResponse]:
"""
更新模板
Args:
db: 数据库会话
template_id: 模板ID
request: 更新请求
Returns:
更新后的模板响应或None
"""
result = await db.execute(
select(AnnotationTemplate)
.where(
AnnotationTemplate.id == template_id,
AnnotationTemplate.deleted_at.is_(None)
)
)
template = result.scalar_one_or_none()
if not template:
return None
# 更新字段
update_data = request.model_dump(exclude_unset=True, by_alias=False)
for field, value in update_data.items():
if field == 'configuration' and value is not None:
# 验证配置JSON
config_dict = value.model_dump(mode='json', by_alias=False)
valid, error = LabelStudioConfigValidator.validate_configuration_json(config_dict)
if not valid:
raise HTTPException(status_code=400, detail=f"Invalid configuration: {error}")
# 重新生成Label Studio XML配置(用于验证)
label_config = self.generate_label_studio_config(value)
# 验证生成的XML
valid, error = LabelStudioConfigValidator.validate_xml(label_config)
if not valid:
raise HTTPException(status_code=400, detail=f"Generated XML is invalid: {error}")
# 只更新configuration字段,不存储label_config
setattr(template, field, config_dict)
else:
setattr(template, field, value)
template.updated_at = datetime.now() # type: ignore
await db.commit()
await db.refresh(template)
return self._to_response(template)
async def delete_template(
self,
db: AsyncSession,
template_id: str
) -> bool:
"""
删除模板(软删除)
Args:
db: 数据库会话
template_id: 模板ID
Returns:
是否删除成功
"""
result = await db.execute(
select(AnnotationTemplate)
.where(
AnnotationTemplate.id == template_id,
AnnotationTemplate.deleted_at.is_(None)
)
)
template = result.scalar_one_or_none()
if not template:
return False
template.deleted_at = datetime.now() # type: ignore
await db.commit()
return True
def _to_response(self, template: AnnotationTemplate) -> AnnotationTemplateResponse:
"""
转换为响应对象
Args:
template: 数据库模型对象
Returns:
模板响应对象
"""
# 将配置JSON转换为TemplateConfiguration对象
from typing import cast, Dict, Any
config_dict = cast(Dict[str, Any], template.configuration)
config = TemplateConfiguration(**config_dict)
# 动态生成Label Studio XML配置
label_config = self.generate_label_studio_config(config)
# 使用model_validate从ORM对象创建响应对象
response = AnnotationTemplateResponse.model_validate(template)
response.configuration = config
response.label_config = label_config # type: ignore
return response