You've already forked DataMate
- 移除 TemplateConfigurationForm 组件并引入 TemplateConfigurationTreeEditor - 使用 useTagConfig Hook 获取标签配置 - 将自定义XML状态 customXml 替换为 labelConfig - 删除模板编辑标签页和选择模板状态管理 - 更新XML解析逻辑支持更多对象和标注控件类型 - 添加配置验证功能确保至少包含数据对象和标注控件 - 在模板详情页面使用树形编辑器显示配置详情 - 更新任务创建页面集成新的树形配置编辑器 - 调整预览数据生成功能适配新的XML解析方式
331 lines
12 KiB
Python
331 lines
12 KiB
Python
"""
|
|
Label Studio Configuration Validation Utilities
|
|
"""
|
|
from typing import Dict, List, Tuple, Optional, Set
|
|
import xml.etree.ElementTree as ET
|
|
from app.module.annotation.config import LabelStudioTagConfig
|
|
|
|
|
|
class LabelStudioConfigValidator:
|
|
"""验证Label Studio配置的工具类"""
|
|
|
|
@staticmethod
|
|
def _get_config() -> LabelStudioTagConfig:
|
|
"""获取标签配置实例"""
|
|
return LabelStudioTagConfig()
|
|
|
|
@staticmethod
|
|
def _get_required_attrs(tag_type: str, is_control: bool) -> List[str]:
|
|
config = LabelStudioConfigValidator._get_config()
|
|
tag_config = (
|
|
config.get_control_config(tag_type)
|
|
if is_control
|
|
else config.get_object_config(tag_type)
|
|
)
|
|
required_attrs = tag_config.get("required_attrs", []) if tag_config else []
|
|
return required_attrs if isinstance(required_attrs, list) else []
|
|
|
|
@staticmethod
|
|
def _get_control_category(tag_type: str) -> Optional[str]:
|
|
config = LabelStudioConfigValidator._get_config()
|
|
tag_config = config.get_control_config(tag_type) or {}
|
|
return tag_config.get("category")
|
|
|
|
@staticmethod
|
|
def validate_xml(xml_string: str) -> Tuple[bool, Optional[str]]:
|
|
"""
|
|
验证XML格式是否正确
|
|
|
|
Args:
|
|
xml_string: Label Studio XML配置字符串
|
|
|
|
Returns:
|
|
(是否有效, 错误信息)
|
|
"""
|
|
try:
|
|
config = LabelStudioConfigValidator._get_config()
|
|
root = ET.fromstring(xml_string)
|
|
|
|
# 检查根元素
|
|
if root.tag != 'View':
|
|
return False, "Root element must be <View>"
|
|
|
|
object_types = config.get_object_types()
|
|
control_types = config.get_control_types()
|
|
|
|
objects = [elem for elem in root.iter() if elem.tag in object_types]
|
|
controls = [elem for elem in root.iter() if elem.tag in control_types]
|
|
|
|
labeling_controls = [
|
|
control
|
|
for control in controls
|
|
if LabelStudioConfigValidator._get_control_category(control.tag) == "labeling"
|
|
]
|
|
|
|
if not objects:
|
|
return False, "No data objects (Image, Text, etc.) found"
|
|
|
|
if not labeling_controls:
|
|
return False, "No labeling controls found"
|
|
|
|
object_names = {
|
|
obj.get("name") for obj in objects if obj.get("name")
|
|
}
|
|
|
|
# 校验对象必填属性
|
|
for obj in objects:
|
|
required_attrs = LabelStudioConfigValidator._get_required_attrs(
|
|
obj.tag, is_control=False
|
|
)
|
|
for attr in required_attrs:
|
|
if not obj.attrib.get(attr):
|
|
return False, f"Object {obj.tag} missing '{attr}' attribute"
|
|
if obj.attrib.get("value") and not obj.attrib.get("value", "").startswith("$"):
|
|
return False, "Object value must start with '$' (e.g., '$image')"
|
|
|
|
# 校验控件(布局类仅提示,不作为失败条件)
|
|
for control in controls:
|
|
category = LabelStudioConfigValidator._get_control_category(control.tag)
|
|
strict = category == "labeling"
|
|
valid, error = LabelStudioConfigValidator._validate_control(
|
|
control, object_names, strict
|
|
)
|
|
if not valid and strict:
|
|
return False, f"Control {control.tag}: {error}"
|
|
|
|
return True, None
|
|
|
|
except ET.ParseError as e:
|
|
return False, f"XML parse error: {str(e)}"
|
|
except Exception as e:
|
|
return False, f"Validation error: {str(e)}"
|
|
|
|
@staticmethod
|
|
def _validate_control(
|
|
control: ET.Element,
|
|
object_names: Set[str],
|
|
strict: bool
|
|
) -> Tuple[bool, Optional[str]]:
|
|
"""
|
|
验证单个控件元素
|
|
|
|
Args:
|
|
control: 控件XML元素
|
|
|
|
Returns:
|
|
(是否有效, 错误信息)
|
|
"""
|
|
config = LabelStudioConfigValidator._get_config()
|
|
|
|
# 检查必需属性
|
|
required_attrs = LabelStudioConfigValidator._get_required_attrs(
|
|
control.tag, is_control=True
|
|
)
|
|
for attr in required_attrs:
|
|
if not control.attrib.get(attr):
|
|
return (False, f"Missing '{attr}' attribute") if strict else (True, None)
|
|
|
|
# 校验 toName 指向对象
|
|
if strict and control.attrib.get("toName"):
|
|
to_names = [
|
|
name.strip()
|
|
for name in control.attrib.get("toName", "").split(",")
|
|
if name.strip()
|
|
]
|
|
invalid = [name for name in to_names if name not in object_names]
|
|
if invalid:
|
|
return False, f"toName references unknown object(s): {', '.join(invalid)}"
|
|
|
|
# 检查控件是否需要子元素
|
|
if config.requires_children(control.tag):
|
|
child_tag = config.get_child_tag(control.tag)
|
|
if not child_tag:
|
|
return (False, f"Configuration error: no child_tag defined for {control.tag}") if strict else (True, None)
|
|
|
|
children = control.findall(child_tag)
|
|
if not children:
|
|
return (False, f"{control.tag} must have at least one <{child_tag}> child") if strict else (True, None)
|
|
|
|
for child in children:
|
|
if "value" not in child.attrib or not child.attrib.get("value"):
|
|
return (False, f"{child_tag} missing 'value' attribute") if strict else (True, None)
|
|
|
|
return True, None
|
|
|
|
@staticmethod
|
|
def extract_label_values(xml_string: str) -> Dict[str, List[str]]:
|
|
"""
|
|
从XML中提取所有标签值
|
|
|
|
Args:
|
|
xml_string: Label Studio XML配置字符串
|
|
|
|
Returns:
|
|
字典,键为控件名称,值为标签值列表
|
|
"""
|
|
result = {}
|
|
config = LabelStudioConfigValidator._get_config()
|
|
|
|
try:
|
|
root = ET.fromstring(xml_string)
|
|
control_types = config.get_control_types()
|
|
controls = [child for child in root if child.tag in control_types]
|
|
|
|
for control in controls:
|
|
if not config.requires_children(control.tag):
|
|
continue
|
|
|
|
control_name = control.get('name', 'unknown')
|
|
child_tag = config.get_child_tag(control.tag)
|
|
|
|
if child_tag:
|
|
children = control.findall(child_tag)
|
|
label_values = [child.get('value', '') for child in children]
|
|
result[control_name] = label_values
|
|
|
|
except Exception:
|
|
pass
|
|
|
|
return result
|
|
|
|
@staticmethod
|
|
def validate_configuration_json(config: Dict) -> Tuple[bool, Optional[str]]:
|
|
"""
|
|
验证配置JSON结构
|
|
|
|
Args:
|
|
config: 配置字典
|
|
|
|
Returns:
|
|
(是否有效, 错误信息)
|
|
"""
|
|
# 检查必需字段
|
|
if 'labels' not in config:
|
|
return False, "Missing 'labels' field"
|
|
|
|
if 'objects' not in config:
|
|
return False, "Missing 'objects' field"
|
|
|
|
if not isinstance(config['labels'], list):
|
|
return False, "'labels' must be an array"
|
|
|
|
if not isinstance(config['objects'], list):
|
|
return False, "'objects' must be an array"
|
|
|
|
if not config['labels']:
|
|
return False, "'labels' array cannot be empty"
|
|
|
|
if not config['objects']:
|
|
return False, "'objects' array cannot be empty"
|
|
|
|
# 验证每个标签定义
|
|
for idx, label in enumerate(config['labels']):
|
|
valid, error = LabelStudioConfigValidator._validate_label_definition(label)
|
|
if not valid:
|
|
return False, f"Label {idx}: {error}"
|
|
|
|
# 验证每个对象定义
|
|
for idx, obj in enumerate(config['objects']):
|
|
valid, error = LabelStudioConfigValidator._validate_object_definition(obj)
|
|
if not valid:
|
|
return False, f"Object {idx}: {error}"
|
|
|
|
# 验证toName引用
|
|
object_names = {obj['name'] for obj in config['objects']}
|
|
for label in config['labels']:
|
|
to_name = label.get('toName') or label.get('to_name')
|
|
from_name = label.get('fromName') or label.get('from_name')
|
|
if to_name not in object_names:
|
|
return False, f"Label '{from_name}' references unknown object '{to_name}'"
|
|
|
|
return True, None
|
|
|
|
@staticmethod
|
|
def _validate_label_definition(label: Dict) -> Tuple[bool, Optional[str]]:
|
|
"""验证标签定义"""
|
|
config = LabelStudioConfigValidator._get_config()
|
|
control_types = config.get_control_types()
|
|
|
|
# Support both camelCase and snake_case
|
|
from_name = label.get('fromName') or label.get('from_name')
|
|
to_name = label.get('toName') or label.get('to_name')
|
|
label_type = label.get('type')
|
|
|
|
if not from_name:
|
|
return False, "Missing required field 'fromName'"
|
|
if not to_name:
|
|
return False, "Missing required field 'toName'"
|
|
if not label_type:
|
|
return False, "Missing required field 'type'"
|
|
|
|
# 检查类型是否支持
|
|
if label_type not in control_types:
|
|
return False, f"Unsupported control type '{label_type}'"
|
|
|
|
# 检查是否需要子元素(options 或 labels)
|
|
if config.requires_children(label_type):
|
|
if 'options' not in label and 'labels' not in label:
|
|
return False, f"{label_type} must have 'options' or 'labels' field"
|
|
|
|
return True, None
|
|
|
|
@staticmethod
|
|
def _validate_object_definition(obj: Dict) -> Tuple[bool, Optional[str]]:
|
|
"""验证对象定义"""
|
|
config = LabelStudioConfigValidator._get_config()
|
|
object_types = config.get_object_types()
|
|
|
|
required_fields = ['name', 'type', 'value']
|
|
|
|
for field in required_fields:
|
|
if field not in obj:
|
|
return False, f"Missing required field '{field}'"
|
|
|
|
# 检查类型是否支持
|
|
if obj['type'] not in object_types:
|
|
return False, f"Unsupported object type '{obj['type']}'"
|
|
|
|
# 检查value格式
|
|
if not obj['value'].startswith('$'):
|
|
return False, "Object value must start with '$' (e.g., '$image')"
|
|
|
|
return True, None
|
|
|
|
|
|
# 使用示例
|
|
if __name__ == "__main__":
|
|
# 验证XML
|
|
xml = """<View>
|
|
<Image name="image" value="$image"/>
|
|
<Choices name="choice" toName="image" required="true">
|
|
<Label value="Cat"/>
|
|
<Label value="Dog"/>
|
|
</Choices>
|
|
</View>"""
|
|
|
|
valid, error = LabelStudioConfigValidator.validate_xml(xml)
|
|
print(f"XML Valid: {valid}, Error: {error}")
|
|
|
|
# 验证配置JSON
|
|
config = {
|
|
"labels": [
|
|
{
|
|
"fromName": "choice",
|
|
"toName": "image",
|
|
"type": "Choices",
|
|
"options": ["Cat", "Dog"],
|
|
"required": True
|
|
}
|
|
],
|
|
"objects": [
|
|
{
|
|
"name": "image",
|
|
"type": "Image",
|
|
"value": "$image"
|
|
}
|
|
]
|
|
}
|
|
|
|
valid, error = LabelStudioConfigValidator.validate_configuration_json(config)
|
|
print(f"Config Valid: {valid}, Error: {error}")
|