feat: add labeling template. refactor: switch to Poetry, build and deploy of backend Python (#79)

* feat: Enhance annotation module with template management and validation

- Added DatasetMappingCreateRequest and DatasetMappingUpdateRequest schemas to handle dataset mapping requests with camelCase and snake_case support.
- Introduced Annotation Template schemas including CreateAnnotationTemplateRequest, UpdateAnnotationTemplateRequest, and AnnotationTemplateResponse for managing annotation templates.
- Implemented AnnotationTemplateService for creating, updating, retrieving, and deleting annotation templates, including validation of configurations and XML generation.
- Added utility class LabelStudioConfigValidator for validating Label Studio configurations and XML formats.
- Updated database schema for annotation templates and labeling projects to include new fields and constraints.
- Seeded initial annotation templates for various use cases including image classification, object detection, and text classification.

* feat: Enhance TemplateForm with improved validation and dynamic field rendering; update LabelStudio config validation for camelCase support

* feat: Update docker-compose.yml to mark datamate dataset volume and network as external

* feat: Add tag configuration management and related components

- Introduced new components for tag selection and browsing in the frontend.
- Added API endpoint to fetch tag configuration from the backend.
- Implemented tag configuration management in the backend, including loading from YAML.
- Enhanced template service to support dynamic tag rendering based on configuration.
- Updated validation utilities to incorporate tag configuration checks.
- Refactored existing code to utilize the new tag configuration structure.

* feat: Refactor LabelStudioTagConfig for improved configuration loading and validation

* feat: Update Makefile to include backend-python-docker-build in the build process

* feat: Migrate to poetry for better deps management

* Add pyyaml dependency and update Dockerfile to use Poetry for dependency management

- Added pyyaml (>=6.0.3,<7.0.0) to pyproject.toml dependencies.
- Updated Dockerfile to install Poetry and manage dependencies using it.
- Improved layer caching by copying only dependency files before the application code.
- Removed unnecessary installation of build dependencies to keep the final image size small.

* feat: Remove duplicated backend-python-docker-build target from Makefile

* fix: airflow is not ready for adding yet

* feat: update Python version to 3.12 and remove project installation step in Dockerfile
This commit is contained in:
Jason Wang
2025-11-13 15:32:30 +08:00
committed by GitHub
parent 2660845b74
commit 45743f39f5
40 changed files with 3223 additions and 262 deletions

View File

@@ -3,29 +3,16 @@ Label Studio Configuration Validation Utilities
"""
from typing import Dict, List, Tuple, Optional
import xml.etree.ElementTree as ET
from app.module.annotation.config import LabelStudioTagConfig
class LabelStudioConfigValidator:
"""验证Label Studio配置的工具类"""
# 支持的控件类型
CONTROL_TYPES = {
'Choices', 'RectangleLabels', 'PolygonLabels', 'Labels',
'TextArea', 'Rating', 'KeyPointLabels', 'BrushLabels',
'EllipseLabels', 'VideoRectangle', 'AudioPlus'
}
# 支持的对象类型
OBJECT_TYPES = {
'Image', 'Text', 'Audio', 'Video', 'HyperText',
'AudioPlus', 'Paragraphs', 'Table'
}
# 需要子标签的控件类型
LABEL_BASED_CONTROLS = {
'Choices', 'RectangleLabels', 'PolygonLabels', 'Labels',
'KeyPointLabels', 'BrushLabels', 'EllipseLabels'
}
@staticmethod
def _get_config() -> LabelStudioTagConfig:
"""获取标签配置实例"""
return LabelStudioTagConfig()
@staticmethod
def validate_xml(xml_string: str) -> Tuple[bool, Optional[str]]:
@@ -39,6 +26,7 @@ class LabelStudioConfigValidator:
(是否有效, 错误信息)
"""
try:
config = LabelStudioConfigValidator._get_config()
root = ET.fromstring(xml_string)
# 检查根元素
@@ -46,12 +34,14 @@ class LabelStudioConfigValidator:
return False, "Root element must be <View>"
# 检查是否有对象定义
objects = [child for child in root if child.tag in LabelStudioConfigValidator.OBJECT_TYPES]
object_types = config.get_object_types()
objects = [child for child in root if child.tag in object_types]
if not objects:
return False, "No data objects (Image, Text, etc.) found"
# 检查是否有控件定义
controls = [child for child in root if child.tag in LabelStudioConfigValidator.CONTROL_TYPES]
control_types = config.get_control_types()
controls = [child for child in root if child.tag in control_types]
if not controls:
return False, "No annotation controls found"
@@ -79,6 +69,8 @@ class LabelStudioConfigValidator:
Returns:
(是否有效, 错误信息)
"""
config = LabelStudioConfigValidator._get_config()
# 检查必需属性
if 'name' not in control.attrib:
return False, "Missing 'name' attribute"
@@ -86,16 +78,20 @@ class LabelStudioConfigValidator:
if 'toName' not in control.attrib:
return False, "Missing 'toName' attribute"
# 检查标签型控件是否有子标签
if control.tag in LabelStudioConfigValidator.LABEL_BASED_CONTROLS:
labels = control.findall('Label')
if not labels:
return False, f"{control.tag} must have at least one <Label> child"
# 检查控件是否需要子元素
if config.requires_children(control.tag):
child_tag = config.get_child_tag(control.tag)
if not child_tag:
return False, f"Configuration error: no child_tag defined for {control.tag}"
# 检查每个标签是否有value
for label in labels:
if 'value' not in label.attrib:
return False, "Label missing 'value' attribute"
children = control.findall(child_tag)
if not children:
return False, f"{control.tag} must have at least one <{child_tag}> child"
# 检查每个子元素是否有value
for child in children:
if 'value' not in child.attrib:
return False, f"{child_tag} missing 'value' attribute"
return True, None
@@ -111,16 +107,24 @@ class LabelStudioConfigValidator:
字典,键为控件名称,值为标签值列表
"""
result = {}
config = LabelStudioConfigValidator._get_config()
try:
root = ET.fromstring(xml_string)
controls = [child for child in root if child.tag in LabelStudioConfigValidator.LABEL_BASED_CONTROLS]
control_types = config.get_control_types()
controls = [child for child in root if child.tag in control_types]
for control in controls:
if not config.requires_children(control.tag):
continue
control_name = control.get('name', 'unknown')
labels = control.findall('Label')
label_values = [label.get('value', '') for label in labels]
result[control_name] = label_values
child_tag = config.get_child_tag(control.tag)
if child_tag:
children = control.findall(child_tag)
label_values = [child.get('value', '') for child in children]
result[control_name] = label_values
except Exception:
pass
@@ -182,6 +186,9 @@ class LabelStudioConfigValidator:
@staticmethod
def _validate_label_definition(label: Dict) -> Tuple[bool, Optional[str]]:
"""验证标签定义"""
config = LabelStudioConfigValidator._get_config()
control_types = config.get_control_types()
# Support both camelCase and snake_case
from_name = label.get('fromName') or label.get('from_name')
to_name = label.get('toName') or label.get('to_name')
@@ -195,11 +202,11 @@ class LabelStudioConfigValidator:
return False, "Missing required field 'type'"
# 检查类型是否支持
if label_type not in LabelStudioConfigValidator.CONTROL_TYPES:
if label_type not in control_types:
return False, f"Unsupported control type '{label_type}'"
# 检查标签型控件是否有选项或标签
if label_type in LabelStudioConfigValidator.LABEL_BASED_CONTROLS:
# 检查是否需要子元素(options 或 labels)
if config.requires_children(label_type):
if 'options' not in label and 'labels' not in label:
return False, f"{label_type} must have 'options' or 'labels' field"
@@ -208,6 +215,9 @@ class LabelStudioConfigValidator:
@staticmethod
def _validate_object_definition(obj: Dict) -> Tuple[bool, Optional[str]]:
"""验证对象定义"""
config = LabelStudioConfigValidator._get_config()
object_types = config.get_object_types()
required_fields = ['name', 'type', 'value']
for field in required_fields:
@@ -215,7 +225,7 @@ class LabelStudioConfigValidator:
return False, f"Missing required field '{field}'"
# 检查类型是否支持
if obj['type'] not in LabelStudioConfigValidator.OBJECT_TYPES:
if obj['type'] not in object_types:
return False, f"Unsupported object type '{obj['type']}'"
# 检查value格式