feat: Add labeling template (#72)

* feat: Enhance annotation module with template management and validation

- Added DatasetMappingCreateRequest and DatasetMappingUpdateRequest schemas to handle dataset mapping requests with camelCase and snake_case support.
- Introduced Annotation Template schemas including CreateAnnotationTemplateRequest, UpdateAnnotationTemplateRequest, and AnnotationTemplateResponse for managing annotation templates.
- Implemented AnnotationTemplateService for creating, updating, retrieving, and deleting annotation templates, including validation of configurations and XML generation.
- Added utility class LabelStudioConfigValidator for validating Label Studio configurations and XML formats.
- Updated database schema for annotation templates and labeling projects to include new fields and constraints.
- Seeded initial annotation templates for various use cases including image classification, object detection, and text classification.

* feat: Enhance TemplateForm with improved validation and dynamic field rendering; update LabelStudio config validation for camelCase support

* feat: Update docker-compose.yml to mark datamate dataset volume and network as external
This commit is contained in:
Jason Wang
2025-11-11 09:14:14 +08:00
committed by GitHub
parent 451d3c8207
commit c5ccc56cca
24 changed files with 2794 additions and 253 deletions

View File

@@ -1,21 +1,379 @@
use datamate;
CREATE TABLE t_dm_annotation_templates (
id VARCHAR(36) PRIMARY KEY,
name VARCHAR(32) NOT NULL COMMENT '模板名称',
description VARCHAR(255) COMMENT '模板描述',
configuration JSON,
id VARCHAR(36) PRIMARY KEY COMMENT 'UUID',
name VARCHAR(100) NOT NULL COMMENT '模板名称',
description VARCHAR(500) COMMENT '模板描述',
data_type VARCHAR(50) NOT NULL COMMENT '数据类型: image/text/audio/video/timeseries',
labeling_type VARCHAR(50) NOT NULL COMMENT '标注类型: classification/detection/segmentation/ner/relation/etc',
configuration JSON NOT NULL COMMENT '标注配置(包含labels定义等)',
style VARCHAR(32) NOT NULL COMMENT '样式配置: horizontal/vertical',
category VARCHAR(50) DEFAULT 'custom' COMMENT '模板分类: medical/general/custom/system',
built_in BOOLEAN DEFAULT FALSE COMMENT '是否系统内置模板',
version VARCHAR(20) DEFAULT '1.0' COMMENT '模板版本',
created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
deleted_at TIMESTAMP NULL COMMENT '删除时间(软删除)'
);
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间',
deleted_at TIMESTAMP NULL COMMENT '删除时间(软删除)',
INDEX idx_data_type (data_type),
INDEX idx_labeling_type (labeling_type),
INDEX idx_category (category),
INDEX idx_built_in (built_in)
) COMMENT='标注配置模板表';
CREATE TABLE t_dm_labeling_projects (
id VARCHAR(36) PRIMARY KEY,
id VARCHAR(36) PRIMARY KEY COMMENT 'UUID',
dataset_id VARCHAR(36) NOT NULL COMMENT '数据集ID',
name VARCHAR(32) NOT NULL COMMENT '项目名称',
name VARCHAR(100) NOT NULL COMMENT '项目名称',
labeling_project_id VARCHAR(8) NOT NULL COMMENT 'Label Studio项目ID',
configuration JSON,
progress JSON,
template_id VARCHAR(36) NULL COMMENT '使用的模板ID',
configuration JSON COMMENT '项目配置(可能包含对模板的自定义修改)',
progress JSON COMMENT '项目进度信息',
created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
deleted_at TIMESTAMP NULL COMMENT '删除时间(软删除)'
);
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间',
deleted_at TIMESTAMP NULL COMMENT '删除时间(软删除)',
FOREIGN KEY (template_id) REFERENCES t_dm_annotation_templates(id) ON DELETE SET NULL,
INDEX idx_dataset_id (dataset_id),
INDEX idx_template_id (template_id),
INDEX idx_labeling_project_id (labeling_project_id)
) COMMENT='标注项目表';
-- 内置标注模板初始化数据
-- 这些模板将在系统首次启动时自动创建
-- 使用 INSERT ... ON DUPLICATE KEY UPDATE 来覆盖已存在的记录
-- 1. 图像分类模板
INSERT INTO t_dm_annotation_templates (
id,
name,
description,
data_type,
labeling_type,
configuration,
style,
category,
built_in,
version,
created_at
) VALUES (
'tpl-image-classification-001',
'Image Classification',
'Simple image classification with multiple choice labels',
'image',
'classification',
JSON_OBJECT(
'labels', JSON_ARRAY(
JSON_OBJECT(
'fromName', 'choice',
'toName', 'image',
'type', 'Choices',
'options', JSON_ARRAY('Cat', 'Dog', 'Bird', 'Other'),
'required', true,
'description', 'Select the category that best describes the image'
)
),
'objects', JSON_ARRAY(
JSON_OBJECT(
'name', 'image',
'type', 'Image',
'value', '$image'
)
)
),
'horizontal',
'computer-vision',
1,
'1.0.0',
NOW()
)
ON DUPLICATE KEY UPDATE
name = VALUES(name),
description = VALUES(description),
data_type = VALUES(data_type),
labeling_type = VALUES(labeling_type),
configuration = VALUES(configuration),
style = VALUES(style),
category = VALUES(category),
built_in = VALUES(built_in),
version = VALUES(version),
updated_at = NOW();
-- 2. 目标检测模板(矩形框)
INSERT INTO t_dm_annotation_templates (
id,
name,
description,
data_type,
labeling_type,
configuration,
style,
category,
built_in,
version,
created_at
) VALUES (
'tpl-object-detection-001',
'Object Detection (Bounding Box)',
'Object detection using rectangular bounding boxes',
'image',
'object-detection',
JSON_OBJECT(
'labels', JSON_ARRAY(
JSON_OBJECT(
'fromName', 'label',
'toName', 'image',
'type', 'RectangleLabels',
'labels', JSON_ARRAY('Person', 'Vehicle', 'Animal', 'Object'),
'required', false,
'description', 'Draw bounding boxes around objects'
)
),
'objects', JSON_ARRAY(
JSON_OBJECT(
'name', 'image',
'type', 'Image',
'value', '$image'
)
)
),
'horizontal',
'computer-vision',
1,
'1.0.0',
NOW()
)
ON DUPLICATE KEY UPDATE
name = VALUES(name),
description = VALUES(description),
data_type = VALUES(data_type),
labeling_type = VALUES(labeling_type),
configuration = VALUES(configuration),
style = VALUES(style),
category = VALUES(category),
built_in = VALUES(built_in),
version = VALUES(version),
updated_at = NOW();
-- 3. 图像分割模板(多边形)
INSERT INTO t_dm_annotation_templates (
id,
name,
description,
data_type,
labeling_type,
configuration,
style,
category,
built_in,
version,
created_at
) VALUES (
'tpl-image-segmentation-001',
'Image Segmentation (Polygon)',
'Semantic segmentation using polygon annotations',
'image',
'segmentation',
JSON_OBJECT(
'labels', JSON_ARRAY(
JSON_OBJECT(
'fromName', 'label',
'toName', 'image',
'type', 'PolygonLabels',
'labels', JSON_ARRAY('Background', 'Foreground', 'Person', 'Car'),
'required', false,
'description', 'Draw polygons to segment regions'
)
),
'objects', JSON_ARRAY(
JSON_OBJECT(
'name', 'image',
'type', 'Image',
'value', '$image'
)
)
),
'horizontal',
'computer-vision',
1,
'1.0.0',
NOW()
)
ON DUPLICATE KEY UPDATE
name = VALUES(name),
description = VALUES(description),
data_type = VALUES(data_type),
labeling_type = VALUES(labeling_type),
configuration = VALUES(configuration),
style = VALUES(style),
category = VALUES(category),
built_in = VALUES(built_in),
version = VALUES(version),
updated_at = NOW();
-- 4. 文本分类模板
INSERT INTO t_dm_annotation_templates (
id,
name,
description,
data_type,
labeling_type,
configuration,
style,
category,
built_in,
version,
created_at
) VALUES (
'tpl-text-classification-001',
'Text Classification',
'Classify text into predefined categories',
'text',
'classification',
JSON_OBJECT(
'labels', JSON_ARRAY(
JSON_OBJECT(
'fromName', 'choice',
'toName', 'text',
'type', 'Choices',
'options', JSON_ARRAY('Positive', 'Negative', 'Neutral'),
'required', true,
'description', 'Sentiment classification'
)
),
'objects', JSON_ARRAY(
JSON_OBJECT(
'name', 'text',
'type', 'Text',
'value', '$text'
)
)
),
'vertical',
'nlp',
1,
'1.0.0',
NOW()
)
ON DUPLICATE KEY UPDATE
name = VALUES(name),
description = VALUES(description),
data_type = VALUES(data_type),
labeling_type = VALUES(labeling_type),
configuration = VALUES(configuration),
style = VALUES(style),
category = VALUES(category),
built_in = VALUES(built_in),
version = VALUES(version),
updated_at = NOW();
-- 5. 命名实体识别(NER)模板
INSERT INTO t_dm_annotation_templates (
id,
name,
description,
data_type,
labeling_type,
configuration,
style,
category,
built_in,
version,
created_at
) VALUES (
'tpl-ner-001',
'Named Entity Recognition',
'Extract and label named entities in text',
'text',
'ner',
JSON_OBJECT(
'labels', JSON_ARRAY(
JSON_OBJECT(
'fromName', 'label',
'toName', 'text',
'type', 'Labels',
'labels', JSON_ARRAY('PERSON', 'ORG', 'LOC', 'DATE', 'MISC'),
'required', false,
'description', 'Highlight and classify named entities'
)
),
'objects', JSON_ARRAY(
JSON_OBJECT(
'name', 'text',
'type', 'Text',
'value', '$text'
)
)
),
'vertical',
'nlp',
1,
'1.0.0',
NOW()
)
ON DUPLICATE KEY UPDATE
name = VALUES(name),
description = VALUES(description),
data_type = VALUES(data_type),
labeling_type = VALUES(labeling_type),
configuration = VALUES(configuration),
style = VALUES(style),
category = VALUES(category),
built_in = VALUES(built_in),
version = VALUES(version),
updated_at = NOW();
-- 6. 音频分类模板
INSERT INTO t_dm_annotation_templates (
id,
name,
description,
data_type,
labeling_type,
configuration,
style,
category,
built_in,
version,
created_at
) VALUES (
'tpl-audio-classification-001',
'Audio Classification',
'Classify audio clips into categories',
'audio',
'classification',
JSON_OBJECT(
'labels', JSON_ARRAY(
JSON_OBJECT(
'fromName', 'choice',
'toName', 'audio',
'type', 'Choices',
'options', JSON_ARRAY('Speech', 'Music', 'Noise', 'Silence'),
'required', true,
'description', 'Audio content classification'
)
),
'objects', JSON_ARRAY(
JSON_OBJECT(
'name', 'audio',
'type', 'Audio',
'value', '$audio'
)
)
),
'horizontal',
'audio',
1,
'1.0.0',
NOW()
)
ON DUPLICATE KEY UPDATE
name = VALUES(name),
description = VALUES(description),
data_type = VALUES(data_type),
labeling_type = VALUES(labeling_type),
configuration = VALUES(configuration),
style = VALUES(style),
category = VALUES(category),
built_in = VALUES(built_in),
version = VALUES(version),
updated_at = NOW();