You've already forked DataMate
* feat: Enhance annotation module with template management and validation - Added DatasetMappingCreateRequest and DatasetMappingUpdateRequest schemas to handle dataset mapping requests with camelCase and snake_case support. - Introduced Annotation Template schemas including CreateAnnotationTemplateRequest, UpdateAnnotationTemplateRequest, and AnnotationTemplateResponse for managing annotation templates. - Implemented AnnotationTemplateService for creating, updating, retrieving, and deleting annotation templates, including validation of configurations and XML generation. - Added utility class LabelStudioConfigValidator for validating Label Studio configurations and XML formats. - Updated database schema for annotation templates and labeling projects to include new fields and constraints. - Seeded initial annotation templates for various use cases including image classification, object detection, and text classification. * feat: Enhance TemplateForm with improved validation and dynamic field rendering; update LabelStudio config validation for camelCase support * feat: Update docker-compose.yml to mark datamate dataset volume and network as external
380 lines
10 KiB
SQL
380 lines
10 KiB
SQL
use datamate;
|
|
|
|
CREATE TABLE t_dm_annotation_templates (
|
|
id VARCHAR(36) PRIMARY KEY COMMENT 'UUID',
|
|
name VARCHAR(100) NOT NULL COMMENT '模板名称',
|
|
description VARCHAR(500) COMMENT '模板描述',
|
|
data_type VARCHAR(50) NOT NULL COMMENT '数据类型: image/text/audio/video/timeseries',
|
|
labeling_type VARCHAR(50) NOT NULL COMMENT '标注类型: classification/detection/segmentation/ner/relation/etc',
|
|
configuration JSON NOT NULL COMMENT '标注配置(包含labels定义等)',
|
|
style VARCHAR(32) NOT NULL COMMENT '样式配置: horizontal/vertical',
|
|
category VARCHAR(50) DEFAULT 'custom' COMMENT '模板分类: medical/general/custom/system',
|
|
built_in BOOLEAN DEFAULT FALSE COMMENT '是否系统内置模板',
|
|
version VARCHAR(20) DEFAULT '1.0' COMMENT '模板版本',
|
|
created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
|
|
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间',
|
|
deleted_at TIMESTAMP NULL COMMENT '删除时间(软删除)',
|
|
INDEX idx_data_type (data_type),
|
|
INDEX idx_labeling_type (labeling_type),
|
|
INDEX idx_category (category),
|
|
INDEX idx_built_in (built_in)
|
|
) COMMENT='标注配置模板表';
|
|
|
|
CREATE TABLE t_dm_labeling_projects (
|
|
id VARCHAR(36) PRIMARY KEY COMMENT 'UUID',
|
|
dataset_id VARCHAR(36) NOT NULL COMMENT '数据集ID',
|
|
name VARCHAR(100) NOT NULL COMMENT '项目名称',
|
|
labeling_project_id VARCHAR(8) NOT NULL COMMENT 'Label Studio项目ID',
|
|
template_id VARCHAR(36) NULL COMMENT '使用的模板ID',
|
|
configuration JSON COMMENT '项目配置(可能包含对模板的自定义修改)',
|
|
progress JSON COMMENT '项目进度信息',
|
|
created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
|
|
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间',
|
|
deleted_at TIMESTAMP NULL COMMENT '删除时间(软删除)',
|
|
FOREIGN KEY (template_id) REFERENCES t_dm_annotation_templates(id) ON DELETE SET NULL,
|
|
INDEX idx_dataset_id (dataset_id),
|
|
INDEX idx_template_id (template_id),
|
|
INDEX idx_labeling_project_id (labeling_project_id)
|
|
) COMMENT='标注项目表';
|
|
|
|
|
|
-- 内置标注模板初始化数据
|
|
-- 这些模板将在系统首次启动时自动创建
|
|
-- 使用 INSERT ... ON DUPLICATE KEY UPDATE 来覆盖已存在的记录
|
|
|
|
-- 1. 图像分类模板
|
|
INSERT INTO t_dm_annotation_templates (
|
|
id,
|
|
name,
|
|
description,
|
|
data_type,
|
|
labeling_type,
|
|
configuration,
|
|
style,
|
|
category,
|
|
built_in,
|
|
version,
|
|
created_at
|
|
) VALUES (
|
|
'tpl-image-classification-001',
|
|
'Image Classification',
|
|
'Simple image classification with multiple choice labels',
|
|
'image',
|
|
'classification',
|
|
JSON_OBJECT(
|
|
'labels', JSON_ARRAY(
|
|
JSON_OBJECT(
|
|
'fromName', 'choice',
|
|
'toName', 'image',
|
|
'type', 'Choices',
|
|
'options', JSON_ARRAY('Cat', 'Dog', 'Bird', 'Other'),
|
|
'required', true,
|
|
'description', 'Select the category that best describes the image'
|
|
)
|
|
),
|
|
'objects', JSON_ARRAY(
|
|
JSON_OBJECT(
|
|
'name', 'image',
|
|
'type', 'Image',
|
|
'value', '$image'
|
|
)
|
|
)
|
|
),
|
|
'horizontal',
|
|
'computer-vision',
|
|
1,
|
|
'1.0.0',
|
|
NOW()
|
|
)
|
|
ON DUPLICATE KEY UPDATE
|
|
name = VALUES(name),
|
|
description = VALUES(description),
|
|
data_type = VALUES(data_type),
|
|
labeling_type = VALUES(labeling_type),
|
|
configuration = VALUES(configuration),
|
|
style = VALUES(style),
|
|
category = VALUES(category),
|
|
built_in = VALUES(built_in),
|
|
version = VALUES(version),
|
|
updated_at = NOW();
|
|
|
|
-- 2. 目标检测模板(矩形框)
|
|
INSERT INTO t_dm_annotation_templates (
|
|
id,
|
|
name,
|
|
description,
|
|
data_type,
|
|
labeling_type,
|
|
configuration,
|
|
style,
|
|
category,
|
|
built_in,
|
|
version,
|
|
created_at
|
|
) VALUES (
|
|
'tpl-object-detection-001',
|
|
'Object Detection (Bounding Box)',
|
|
'Object detection using rectangular bounding boxes',
|
|
'image',
|
|
'object-detection',
|
|
JSON_OBJECT(
|
|
'labels', JSON_ARRAY(
|
|
JSON_OBJECT(
|
|
'fromName', 'label',
|
|
'toName', 'image',
|
|
'type', 'RectangleLabels',
|
|
'labels', JSON_ARRAY('Person', 'Vehicle', 'Animal', 'Object'),
|
|
'required', false,
|
|
'description', 'Draw bounding boxes around objects'
|
|
)
|
|
),
|
|
'objects', JSON_ARRAY(
|
|
JSON_OBJECT(
|
|
'name', 'image',
|
|
'type', 'Image',
|
|
'value', '$image'
|
|
)
|
|
)
|
|
),
|
|
'horizontal',
|
|
'computer-vision',
|
|
1,
|
|
'1.0.0',
|
|
NOW()
|
|
)
|
|
ON DUPLICATE KEY UPDATE
|
|
name = VALUES(name),
|
|
description = VALUES(description),
|
|
data_type = VALUES(data_type),
|
|
labeling_type = VALUES(labeling_type),
|
|
configuration = VALUES(configuration),
|
|
style = VALUES(style),
|
|
category = VALUES(category),
|
|
built_in = VALUES(built_in),
|
|
version = VALUES(version),
|
|
updated_at = NOW();
|
|
|
|
-- 3. 图像分割模板(多边形)
|
|
INSERT INTO t_dm_annotation_templates (
|
|
id,
|
|
name,
|
|
description,
|
|
data_type,
|
|
labeling_type,
|
|
configuration,
|
|
style,
|
|
category,
|
|
built_in,
|
|
version,
|
|
created_at
|
|
) VALUES (
|
|
'tpl-image-segmentation-001',
|
|
'Image Segmentation (Polygon)',
|
|
'Semantic segmentation using polygon annotations',
|
|
'image',
|
|
'segmentation',
|
|
JSON_OBJECT(
|
|
'labels', JSON_ARRAY(
|
|
JSON_OBJECT(
|
|
'fromName', 'label',
|
|
'toName', 'image',
|
|
'type', 'PolygonLabels',
|
|
'labels', JSON_ARRAY('Background', 'Foreground', 'Person', 'Car'),
|
|
'required', false,
|
|
'description', 'Draw polygons to segment regions'
|
|
)
|
|
),
|
|
'objects', JSON_ARRAY(
|
|
JSON_OBJECT(
|
|
'name', 'image',
|
|
'type', 'Image',
|
|
'value', '$image'
|
|
)
|
|
)
|
|
),
|
|
'horizontal',
|
|
'computer-vision',
|
|
1,
|
|
'1.0.0',
|
|
NOW()
|
|
)
|
|
ON DUPLICATE KEY UPDATE
|
|
name = VALUES(name),
|
|
description = VALUES(description),
|
|
data_type = VALUES(data_type),
|
|
labeling_type = VALUES(labeling_type),
|
|
configuration = VALUES(configuration),
|
|
style = VALUES(style),
|
|
category = VALUES(category),
|
|
built_in = VALUES(built_in),
|
|
version = VALUES(version),
|
|
updated_at = NOW();
|
|
|
|
-- 4. 文本分类模板
|
|
INSERT INTO t_dm_annotation_templates (
|
|
id,
|
|
name,
|
|
description,
|
|
data_type,
|
|
labeling_type,
|
|
configuration,
|
|
style,
|
|
category,
|
|
built_in,
|
|
version,
|
|
created_at
|
|
) VALUES (
|
|
'tpl-text-classification-001',
|
|
'Text Classification',
|
|
'Classify text into predefined categories',
|
|
'text',
|
|
'classification',
|
|
JSON_OBJECT(
|
|
'labels', JSON_ARRAY(
|
|
JSON_OBJECT(
|
|
'fromName', 'choice',
|
|
'toName', 'text',
|
|
'type', 'Choices',
|
|
'options', JSON_ARRAY('Positive', 'Negative', 'Neutral'),
|
|
'required', true,
|
|
'description', 'Sentiment classification'
|
|
)
|
|
),
|
|
'objects', JSON_ARRAY(
|
|
JSON_OBJECT(
|
|
'name', 'text',
|
|
'type', 'Text',
|
|
'value', '$text'
|
|
)
|
|
)
|
|
),
|
|
'vertical',
|
|
'nlp',
|
|
1,
|
|
'1.0.0',
|
|
NOW()
|
|
)
|
|
ON DUPLICATE KEY UPDATE
|
|
name = VALUES(name),
|
|
description = VALUES(description),
|
|
data_type = VALUES(data_type),
|
|
labeling_type = VALUES(labeling_type),
|
|
configuration = VALUES(configuration),
|
|
style = VALUES(style),
|
|
category = VALUES(category),
|
|
built_in = VALUES(built_in),
|
|
version = VALUES(version),
|
|
updated_at = NOW();
|
|
|
|
-- 5. 命名实体识别(NER)模板
|
|
INSERT INTO t_dm_annotation_templates (
|
|
id,
|
|
name,
|
|
description,
|
|
data_type,
|
|
labeling_type,
|
|
configuration,
|
|
style,
|
|
category,
|
|
built_in,
|
|
version,
|
|
created_at
|
|
) VALUES (
|
|
'tpl-ner-001',
|
|
'Named Entity Recognition',
|
|
'Extract and label named entities in text',
|
|
'text',
|
|
'ner',
|
|
JSON_OBJECT(
|
|
'labels', JSON_ARRAY(
|
|
JSON_OBJECT(
|
|
'fromName', 'label',
|
|
'toName', 'text',
|
|
'type', 'Labels',
|
|
'labels', JSON_ARRAY('PERSON', 'ORG', 'LOC', 'DATE', 'MISC'),
|
|
'required', false,
|
|
'description', 'Highlight and classify named entities'
|
|
)
|
|
),
|
|
'objects', JSON_ARRAY(
|
|
JSON_OBJECT(
|
|
'name', 'text',
|
|
'type', 'Text',
|
|
'value', '$text'
|
|
)
|
|
)
|
|
),
|
|
'vertical',
|
|
'nlp',
|
|
1,
|
|
'1.0.0',
|
|
NOW()
|
|
)
|
|
ON DUPLICATE KEY UPDATE
|
|
name = VALUES(name),
|
|
description = VALUES(description),
|
|
data_type = VALUES(data_type),
|
|
labeling_type = VALUES(labeling_type),
|
|
configuration = VALUES(configuration),
|
|
style = VALUES(style),
|
|
category = VALUES(category),
|
|
built_in = VALUES(built_in),
|
|
version = VALUES(version),
|
|
updated_at = NOW();
|
|
|
|
-- 6. 音频分类模板
|
|
INSERT INTO t_dm_annotation_templates (
|
|
id,
|
|
name,
|
|
description,
|
|
data_type,
|
|
labeling_type,
|
|
configuration,
|
|
style,
|
|
category,
|
|
built_in,
|
|
version,
|
|
created_at
|
|
) VALUES (
|
|
'tpl-audio-classification-001',
|
|
'Audio Classification',
|
|
'Classify audio clips into categories',
|
|
'audio',
|
|
'classification',
|
|
JSON_OBJECT(
|
|
'labels', JSON_ARRAY(
|
|
JSON_OBJECT(
|
|
'fromName', 'choice',
|
|
'toName', 'audio',
|
|
'type', 'Choices',
|
|
'options', JSON_ARRAY('Speech', 'Music', 'Noise', 'Silence'),
|
|
'required', true,
|
|
'description', 'Audio content classification'
|
|
)
|
|
),
|
|
'objects', JSON_ARRAY(
|
|
JSON_OBJECT(
|
|
'name', 'audio',
|
|
'type', 'Audio',
|
|
'value', '$audio'
|
|
)
|
|
)
|
|
),
|
|
'horizontal',
|
|
'audio',
|
|
1,
|
|
'1.0.0',
|
|
NOW()
|
|
)
|
|
ON DUPLICATE KEY UPDATE
|
|
name = VALUES(name),
|
|
description = VALUES(description),
|
|
data_type = VALUES(data_type),
|
|
labeling_type = VALUES(labeling_type),
|
|
configuration = VALUES(configuration),
|
|
style = VALUES(style),
|
|
category = VALUES(category),
|
|
built_in = VALUES(built_in),
|
|
version = VALUES(version),
|
|
updated_at = NOW();
|