DataMate/scripts/db/data-annotation-init.sql

use datamate;

CREATE TABLE t_dm_annotation_templates (
    id VARCHAR(36) PRIMARY KEY COMMENT 'UUID',
    name VARCHAR(100) NOT NULL COMMENT '模板名称',
    description VARCHAR(500) COMMENT '模板描述',
    data_type VARCHAR(50) NOT NULL COMMENT '数据类型: image/text/audio/video/timeseries',
    labeling_type VARCHAR(50) NOT NULL COMMENT '标注类型: classification/detection/segmentation/ner/relation/etc',
    configuration JSON NOT NULL COMMENT '标注配置（包含labels定义等）',
    style VARCHAR(32) NOT NULL COMMENT '样式配置: horizontal/vertical',
    category VARCHAR(50) DEFAULT 'custom' COMMENT '模板分类: medical/general/custom/system',
    built_in BOOLEAN DEFAULT FALSE COMMENT '是否系统内置模板',
    version VARCHAR(20) DEFAULT '1.0' COMMENT '模板版本',
    created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
    updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间',
    deleted_at TIMESTAMP NULL COMMENT '删除时间（软删除）',
    INDEX idx_data_type (data_type),
    INDEX idx_labeling_type (labeling_type),
    INDEX idx_category (category),
    INDEX idx_built_in (built_in)
) COMMENT='标注配置模板表';

CREATE TABLE t_dm_labeling_projects (
    id VARCHAR(36) PRIMARY KEY COMMENT 'UUID',
    dataset_id VARCHAR(36) NOT NULL COMMENT '数据集ID',
    name VARCHAR(100) NOT NULL COMMENT '项目名称',
    labeling_project_id VARCHAR(8) NOT NULL COMMENT 'Label Studio项目ID',
    template_id VARCHAR(36) NULL COMMENT '使用的模板ID',
    configuration JSON COMMENT '项目配置（可能包含对模板的自定义修改）',
    progress JSON COMMENT '项目进度信息',
    created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
    updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间',
    deleted_at TIMESTAMP NULL COMMENT '删除时间（软删除）',
    FOREIGN KEY (template_id) REFERENCES t_dm_annotation_templates(id) ON DELETE SET NULL,
    INDEX idx_dataset_id (dataset_id),
    INDEX idx_template_id (template_id),
    INDEX idx_labeling_project_id (labeling_project_id)
) COMMENT='标注项目表';


-- 内置标注模板初始化数据
-- 这些模板将在系统首次启动时自动创建
-- 使用 INSERT ... ON DUPLICATE KEY UPDATE 来覆盖已存在的记录

-- 1. 图像分类模板
INSERT INTO t_dm_annotation_templates (
    id,
    name,
    description,
    data_type,
    labeling_type,
    configuration,
    style,
    category,
    built_in,
    version,
    created_at
) VALUES (
    'tpl-image-classification-001',
    'Image Classification',
    'Simple image classification with multiple choice labels',
    'image',
    'classification',
    JSON_OBJECT(
        'labels', JSON_ARRAY(
            JSON_OBJECT(
                'fromName', 'choice',
                'toName', 'image',
                'type', 'Choices',
                'options', JSON_ARRAY('Cat', 'Dog', 'Bird', 'Other'),
                'required', true,
                'description', 'Select the category that best describes the image'
            )
        ),
        'objects', JSON_ARRAY(
            JSON_OBJECT(
                'name', 'image',
                'type', 'Image',
                'value', '$image'
            )
        )
    ),
    'horizontal',
    'computer-vision',
    1,
    '1.0.0',
    NOW()
)
ON DUPLICATE KEY UPDATE
    name = VALUES(name),
    description = VALUES(description),
    data_type = VALUES(data_type),
    labeling_type = VALUES(labeling_type),
    configuration = VALUES(configuration),
    style = VALUES(style),
    category = VALUES(category),
    built_in = VALUES(built_in),
    version = VALUES(version),
    updated_at = NOW();

-- 2. 目标检测模板（矩形框）
INSERT INTO t_dm_annotation_templates (
    id,
    name,
    description,
    data_type,
    labeling_type,
    configuration,
    style,
    category,
    built_in,
    version,
    created_at
) VALUES (
    'tpl-object-detection-001',
    'Object Detection (Bounding Box)',
    'Object detection using rectangular bounding boxes',
    'image',
    'object-detection',
    JSON_OBJECT(
        'labels', JSON_ARRAY(
            JSON_OBJECT(
                'fromName', 'label',
                'toName', 'image',
                'type', 'RectangleLabels',
                'labels', JSON_ARRAY('Person', 'Vehicle', 'Animal', 'Object'),
                'required', false,
                'description', 'Draw bounding boxes around objects'
            )
        ),
        'objects', JSON_ARRAY(
            JSON_OBJECT(
                'name', 'image',
                'type', 'Image',
                'value', '$image'
            )
        )
    ),
    'horizontal',
    'computer-vision',
    1,
    '1.0.0',
    NOW()
)
ON DUPLICATE KEY UPDATE
    name = VALUES(name),
    description = VALUES(description),
    data_type = VALUES(data_type),
    labeling_type = VALUES(labeling_type),
    configuration = VALUES(configuration),
    style = VALUES(style),
    category = VALUES(category),
    built_in = VALUES(built_in),
    version = VALUES(version),
    updated_at = NOW();

-- 3. 图像分割模板（多边形）
INSERT INTO t_dm_annotation_templates (
    id,
    name,
    description,
    data_type,
    labeling_type,
    configuration,
    style,
    category,
    built_in,
    version,
    created_at
) VALUES (
    'tpl-image-segmentation-001',
    'Image Segmentation (Polygon)',
    'Semantic segmentation using polygon annotations',
    'image',
    'segmentation',
    JSON_OBJECT(
        'labels', JSON_ARRAY(
            JSON_OBJECT(
                'fromName', 'label',
                'toName', 'image',
                'type', 'PolygonLabels',
                'labels', JSON_ARRAY('Background', 'Foreground', 'Person', 'Car'),
                'required', false,
                'description', 'Draw polygons to segment regions'
            )
        ),
        'objects', JSON_ARRAY(
            JSON_OBJECT(
                'name', 'image',
                'type', 'Image',
                'value', '$image'
            )
        )
    ),
    'horizontal',
    'computer-vision',
    1,
    '1.0.0',
    NOW()
)
ON DUPLICATE KEY UPDATE
    name = VALUES(name),
    description = VALUES(description),
    data_type = VALUES(data_type),
    labeling_type = VALUES(labeling_type),
    configuration = VALUES(configuration),
    style = VALUES(style),
    category = VALUES(category),
    built_in = VALUES(built_in),
    version = VALUES(version),
    updated_at = NOW();

-- 4. 文本分类模板
INSERT INTO t_dm_annotation_templates (
    id,
    name,
    description,
    data_type,
    labeling_type,
    configuration,
    style,
    category,
    built_in,
    version,
    created_at
) VALUES (
    'tpl-text-classification-001',
    'Text Classification',
    'Classify text into predefined categories',
    'text',
    'classification',
    JSON_OBJECT(
        'labels', JSON_ARRAY(
            JSON_OBJECT(
                'fromName', 'choice',
                'toName', 'text',
                'type', 'Choices',
                'options', JSON_ARRAY('Positive', 'Negative', 'Neutral'),
                'required', true,
                'description', 'Sentiment classification'
            )
        ),
        'objects', JSON_ARRAY(
            JSON_OBJECT(
                'name', 'text',
                'type', 'Text',
                'value', '$text'
            )
        )
    ),
    'vertical',
    'nlp',
    1,
    '1.0.0',
    NOW()
)
ON DUPLICATE KEY UPDATE
    name = VALUES(name),
    description = VALUES(description),
    data_type = VALUES(data_type),
    labeling_type = VALUES(labeling_type),
    configuration = VALUES(configuration),
    style = VALUES(style),
    category = VALUES(category),
    built_in = VALUES(built_in),
    version = VALUES(version),
    updated_at = NOW();

-- 5. 命名实体识别（NER）模板
INSERT INTO t_dm_annotation_templates (
    id,
    name,
    description,
    data_type,
    labeling_type,
    configuration,
    style,
    category,
    built_in,
    version,
    created_at
) VALUES (
    'tpl-ner-001',
    'Named Entity Recognition',
    'Extract and label named entities in text',
    'text',
    'ner',
    JSON_OBJECT(
        'labels', JSON_ARRAY(
            JSON_OBJECT(
                'fromName', 'label',
                'toName', 'text',
                'type', 'Labels',
                'labels', JSON_ARRAY('PERSON', 'ORG', 'LOC', 'DATE', 'MISC'),
                'required', false,
                'description', 'Highlight and classify named entities'
            )
        ),
        'objects', JSON_ARRAY(
            JSON_OBJECT(
                'name', 'text',
                'type', 'Text',
                'value', '$text'
            )
        )
    ),
    'vertical',
    'nlp',
    1,
    '1.0.0',
    NOW()
)
ON DUPLICATE KEY UPDATE
    name = VALUES(name),
    description = VALUES(description),
    data_type = VALUES(data_type),
    labeling_type = VALUES(labeling_type),
    configuration = VALUES(configuration),
    style = VALUES(style),
    category = VALUES(category),
    built_in = VALUES(built_in),
    version = VALUES(version),
    updated_at = NOW();

-- 6. 音频分类模板
INSERT INTO t_dm_annotation_templates (
    id,
    name,
    description,
    data_type,
    labeling_type,
    configuration,
    style,
    category,
    built_in,
    version,
    created_at
) VALUES (
    'tpl-audio-classification-001',
    'Audio Classification',
    'Classify audio clips into categories',
    'audio',
    'classification',
    JSON_OBJECT(
        'labels', JSON_ARRAY(
            JSON_OBJECT(
                'fromName', 'choice',
                'toName', 'audio',
                'type', 'Choices',
                'options', JSON_ARRAY('Speech', 'Music', 'Noise', 'Silence'),
                'required', true,
                'description', 'Audio content classification'
            )
        ),
        'objects', JSON_ARRAY(
            JSON_OBJECT(
                'name', 'audio',
                'type', 'Audio',
                'value', '$audio'
            )
        )
    ),
    'horizontal',
    'audio',
    1,
    '1.0.0',
    NOW()
)
ON DUPLICATE KEY UPDATE
    name = VALUES(name),
    description = VALUES(description),
    data_type = VALUES(data_type),
    labeling_type = VALUES(labeling_type),
    configuration = VALUES(configuration),
    style = VALUES(style),
    category = VALUES(category),
    built_in = VALUES(built_in),
    version = VALUES(version),
    updated_at = NOW();