Files
DataMate/scripts/db/data-annotation-init.sql
Jerry Yan f4fc574687 feat(annotation): 添加标注状态管理功能
- 引入 AnnotationResultStatus 枚举类型区分已标注和无标注状态
- 在前端组件中实现空标注检测和确认对话框逻辑
- 添加数据库表字段 annotation_status 存储标注状态
- 扩展后端服务验证和处理标注状态逻辑
- 更新 API 接口支持标注状态参数传递
- 改进任务列表显示逻辑以反映不同标注状态
- 实现分段模式下的标注结果检查机制
2026-01-31 13:23:38 +08:00

1474 lines
51 KiB
SQL

-- 数据标注服务数据库初始化脚本
-- 适用于 datamate 数据库
-- 基于 Label Studio 模板体系设计
USE datamate;
-- =====================================
-- DDL语句 - 数据库表结构定义
-- =====================================
-- 标注配置模板表
CREATE TABLE IF NOT EXISTS t_dm_annotation_templates (
id VARCHAR(64) PRIMARY KEY COMMENT '模板ID(UUID或自定义ID)',
name VARCHAR(100) NOT NULL COMMENT '模板名称',
description VARCHAR(500) COMMENT '模板描述',
data_type VARCHAR(50) NOT NULL COMMENT '数据类型: text/image/audio/video/pdf/timeseries/chat/html/table',
labeling_type VARCHAR(50) NOT NULL COMMENT '标注类型',
configuration JSON NULL COMMENT '标注配置(兼容字段,主配置为label_config)',
label_config TEXT COMMENT 'Label Studio XML配置(模板主配置)',
style VARCHAR(32) NOT NULL COMMENT '样式配置: horizontal/vertical',
category VARCHAR(50) DEFAULT 'custom' COMMENT '模板分类',
built_in BOOLEAN DEFAULT FALSE COMMENT '是否系统内置模板',
version VARCHAR(20) DEFAULT '1.0' COMMENT '模板版本',
created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间',
deleted_at TIMESTAMP NULL COMMENT '删除时间(软删除)',
INDEX idx_data_type (data_type),
INDEX idx_labeling_type (labeling_type),
INDEX idx_category (category),
INDEX idx_built_in (built_in)
) COMMENT='标注配置模板表';
-- 标注项目表
CREATE TABLE IF NOT EXISTS t_dm_labeling_projects (
id VARCHAR(36) PRIMARY KEY COMMENT 'UUID',
dataset_id VARCHAR(36) NOT NULL COMMENT '数据集ID',
name VARCHAR(100) NOT NULL COMMENT '项目名称',
labeling_project_id VARCHAR(8) NOT NULL COMMENT 'Label Studio项目ID',
template_id VARCHAR(64) NULL COMMENT '使用的模板ID',
configuration JSON COMMENT '项目配置(可能包含对模板的自定义修改)',
progress JSON COMMENT '项目进度信息',
created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间',
deleted_at TIMESTAMP NULL COMMENT '删除时间(软删除)',
FOREIGN KEY (template_id) REFERENCES t_dm_annotation_templates(id) ON DELETE SET NULL,
INDEX idx_dataset_id (dataset_id),
INDEX idx_template_id (template_id),
INDEX idx_labeling_project_id (labeling_project_id)
) COMMENT='标注项目表';
-- 标注项目文件快照表
CREATE TABLE IF NOT EXISTS t_dm_labeling_project_files (
id VARCHAR(36) PRIMARY KEY COMMENT 'UUID',
project_id VARCHAR(36) NOT NULL COMMENT '标注项目ID',
file_id VARCHAR(36) NOT NULL COMMENT '文件ID',
created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
UNIQUE KEY uk_project_file (project_id, file_id),
INDEX idx_project_id (project_id),
INDEX idx_file_id (file_id)
) COMMENT='标注项目文件快照表';
-- 标注结果表
CREATE TABLE IF NOT EXISTS t_dm_annotation_results (
id VARCHAR(36) PRIMARY KEY COMMENT 'UUID',
project_id VARCHAR(36) NOT NULL COMMENT '标注项目ID',
file_id VARCHAR(36) NOT NULL COMMENT '文件ID',
annotation JSON NOT NULL COMMENT 'Label Studio annotation 原始JSON',
annotation_status VARCHAR(32) NOT NULL DEFAULT 'ANNOTATED' COMMENT '标注状态: ANNOTATED/NO_ANNOTATION',
created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间',
UNIQUE KEY uk_project_file (project_id, file_id),
INDEX idx_project_id (project_id),
INDEX idx_file_id (file_id),
INDEX idx_updated_at (updated_at)
) COMMENT='标注结果表';
-- 自动标注任务表
CREATE TABLE IF NOT EXISTS t_dm_auto_annotation_tasks (
id VARCHAR(36) PRIMARY KEY COMMENT 'UUID',
name VARCHAR(255) NOT NULL COMMENT '任务名称',
dataset_id VARCHAR(36) NOT NULL COMMENT '数据集ID',
dataset_name VARCHAR(255) COMMENT '数据集名称',
config JSON NOT NULL COMMENT '任务配置',
file_ids JSON COMMENT '文件ID列表',
status VARCHAR(50) NOT NULL DEFAULT 'pending' COMMENT '任务状态',
progress INT DEFAULT 0 COMMENT '任务进度',
total_images INT DEFAULT 0 COMMENT '总图片数',
processed_images INT DEFAULT 0 COMMENT '已处理图片数',
detected_objects INT DEFAULT 0 COMMENT '检测到的对象数',
output_path VARCHAR(500) COMMENT '输出路径',
error_message TEXT COMMENT '错误信息',
created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间',
completed_at TIMESTAMP NULL COMMENT '完成时间',
deleted_at TIMESTAMP NULL COMMENT '删除时间',
INDEX idx_dataset_id (dataset_id),
INDEX idx_status (status),
INDEX idx_created_at (created_at)
) COMMENT='自动标注任务表';
-- =====================================
-- DML语句 - 内置标注模板数据
-- =====================================
-- 清空现有内置模板(保留自定义模板)
DELETE FROM t_dm_annotation_templates WHERE built_in = 1;
-- =============================================
-- 1. 音频/语音处理 (Audio/Speech Processing)
-- 已启用默认模板
-- =============================================
-- 1.1 自动语音识别(分段)
INSERT INTO t_dm_annotation_templates (
id, name, description, data_type, labeling_type,
configuration, label_config, style, category, built_in, version
) VALUES (
'tpl-asr-segments-001',
'语音识别(分段)',
'对音频进行语音活动分段并转录文本,适用于呼叫中心转录、会议记录、播客转录、法庭记录等场景。关联模型:Whisper、Wav2Vec2、DeepSpeech',
'audio',
'asr-segments',
JSON_OBJECT(
'labels', JSON_ARRAY(
JSON_OBJECT('fromName', 'labels', 'toName', 'audio', 'type', 'Labels', 'labels', JSON_ARRAY('Speech', 'Noise')),
JSON_OBJECT('fromName', 'transcription', 'toName', 'audio', 'type', 'TextArea', 'required', true)
),
'objects', JSON_ARRAY(JSON_OBJECT('name', 'audio', 'type', 'Audio', 'value', '$audio'))
),
'<View>
<Labels name="labels" toName="audio">
<Label value="Speech" />
<Label value="Noise" />
</Labels>
<Audio name="audio" value="$audio"/>
<TextArea name="transcription" toName="audio"
rows="2" editable="true"
perRegion="true" required="true" />
</View>',
'horizontal',
'audio-speech',
1,
'1.0.0'
);
-- 1.2 自动语音识别
INSERT INTO t_dm_annotation_templates (
id, name, description, data_type, labeling_type,
configuration, label_config, style, category, built_in, version
) VALUES (
'tpl-asr-001',
'语音识别',
'转录音频内容,适用于播客转录、会议记录、客服通话、字幕生成等场景。关联模型:Whisper、Wav2Vec、DeepSpeech',
'audio',
'asr',
JSON_OBJECT(
'labels', JSON_ARRAY(
JSON_OBJECT('fromName', 'transcription', 'toName', 'audio', 'type', 'TextArea', 'required', true)
),
'objects', JSON_ARRAY(JSON_OBJECT('name', 'audio', 'type', 'Audio', 'value', '$audio'))
),
'<View>
<Audio name="audio" value="$audio" zoom="true" hotkey="ctrl+enter" />
<Header value="转录音频内容" />
<TextArea name="transcription" toName="audio"
rows="4" editable="true" maxSubmissions="1" />
</View>',
'horizontal',
'audio-speech',
1,
'1.0.0'
);
-- 1.3 对话分析
INSERT INTO t_dm_annotation_templates (
id, name, description, data_type, labeling_type,
configuration, label_config, style, category, built_in, version
) VALUES (
'tpl-conversation-analysis-001',
'对话分析',
'分析对话语句并标注事实和情感方面,适用于呼叫中心质检、客服分析、会议分析等场景',
'audio',
'conversation-analysis',
JSON_OBJECT(
'labels', JSON_ARRAY(
JSON_OBJECT('fromName', 'label', 'toName', 'text', 'type', 'ParagraphLabels', 'labels', JSON_ARRAY('Positive', 'Negative'))
),
'objects', JSON_ARRAY(
JSON_OBJECT('name', 'audio', 'type', 'Audio', 'value', '$audio'),
JSON_OBJECT('name', 'text', 'type', 'Paragraphs', 'value', '$text')
)
),
'<View>
<Audio name="audio" value="$audio" hotkey="space" sync="text"/>
<Header value="对话记录"/>
<Paragraphs audioUrl="$audio" sync="audio" name="text" value="$text"
layout="dialogue" textKey="text" nameKey="author"
granularity="paragraph" contextscroll="true" />
<View style="position: sticky">
<Header value="情感标签"/>
<ParagraphLabels name="label" toName="text">
<Label value="Positive" background="#00ff00"/>
<Label value="Negative" background="#ff0000"/>
</ParagraphLabels>
</View>
</View>',
'horizontal',
'audio-speech',
1,
'1.0.0'
);
-- 1.4 意图分类
INSERT INTO t_dm_annotation_templates (
id, name, description, data_type, labeling_type,
configuration, label_config, style, category, built_in, version
) VALUES (
'tpl-intent-classification-001',
'意图分类',
'进行语音活动分段并选择语音意图,适用于语音助手、智能音箱、IVR系统等场景',
'audio',
'intent-classification',
JSON_OBJECT(
'labels', JSON_ARRAY(
JSON_OBJECT('fromName', 'labels', 'toName', 'audio', 'type', 'Labels', 'labels', JSON_ARRAY('Segment')),
JSON_OBJECT('fromName', 'intent', 'toName', 'audio', 'type', 'Choices', 'options', JSON_ARRAY('Question', 'Request', 'Satisfied', 'Interested', 'Unsatisfied'), 'required', true)
),
'objects', JSON_ARRAY(JSON_OBJECT('name', 'audio', 'type', 'Audio', 'value', '$audio'))
),
'<View>
<Labels name="labels" toName="audio">
<Label value="Segment" />
</Labels>
<Audio name="audio" value="$audio"/>
<Choices name="intent" toName="audio" perRegion="true" required="true">
<Choice value="Question" />
<Choice value="Request" />
<Choice value="Satisfied" />
<Choice value="Interested" />
<Choice value="Unsatisfied" />
</Choices>
</View>',
'horizontal',
'audio-speech',
1,
'1.0.0'
);
-- 1.5 信号质量检测
INSERT INTO t_dm_annotation_templates (
id, name, description, data_type, labeling_type,
configuration, label_config, style, category, built_in, version
) VALUES (
'tpl-signal-quality-001',
'信号质量检测',
'评估音频信号质量,适用于电信、呼叫中心质检、音频制作、VoIP质量评估等场景',
'audio',
'signal-quality',
JSON_OBJECT(
'labels', JSON_ARRAY(
JSON_OBJECT('fromName', 'rating', 'toName', 'audio', 'type', 'Rating')
),
'objects', JSON_ARRAY(JSON_OBJECT('name', 'audio', 'type', 'Audio', 'value', '$audio'))
),
'<View>
<Rating name="rating" toName="audio" maxRating="10" icon="star" size="medium" />
<Audio name="audio" value="$audio"/>
</View>',
'horizontal',
'audio-speech',
1,
'1.0.0'
);
-- 1.6 声音事件检测
INSERT INTO t_dm_annotation_templates (
id, name, description, data_type, labeling_type,
configuration, label_config, style, category, built_in, version
) VALUES (
'tpl-sound-event-001',
'声音事件检测',
'选择音频片段并分类声音事件,适用于安防监控、智慧城市、环境监测、工业监测等场景',
'audio',
'sound-event-detection',
JSON_OBJECT(
'labels', JSON_ARRAY(
JSON_OBJECT('fromName', 'label', 'toName', 'audio', 'type', 'Labels', 'labels', JSON_ARRAY('Event A', 'Event B'))
),
'objects', JSON_ARRAY(JSON_OBJECT('name', 'audio', 'type', 'Audio', 'value', '$audio'))
),
'<View>
<Labels name="label" toName="audio" zoom="true" hotkey="ctrl+enter">
<Label value="Event A" background="red"/>
<Label value="Event B" background="green"/>
</Labels>
<Audio name="audio" value="$audio"/>
</View>',
'horizontal',
'audio-speech',
1,
'1.0.0'
);
-- 1.7 说话人分割
INSERT INTO t_dm_annotation_templates (
id, name, description, data_type, labeling_type,
configuration, label_config, style, category, built_in, version
) VALUES (
'tpl-speaker-segmentation-001',
'说话人分割',
'执行说话人分割/话者分离任务,适用于会议转录、播客制作、呼叫中心分析、法庭记录等场景',
'audio',
'speaker-segmentation',
JSON_OBJECT(
'labels', JSON_ARRAY(
JSON_OBJECT('fromName', 'label', 'toName', 'audio', 'type', 'Labels', 'labels', JSON_ARRAY('Speaker one', 'Speaker two'))
),
'objects', JSON_ARRAY(JSON_OBJECT('name', 'audio', 'type', 'Audio', 'value', '$audio'))
),
'<View>
<Labels name="label" toName="audio" zoom="true" hotkey="ctrl+enter">
<Label value="Speaker one" background="#00FF00"/>
<Label value="Speaker two" background="#12ad59"/>
</Labels>
<Audio name="audio" value="$audio" />
</View>',
'horizontal',
'audio-speech',
1,
'1.0.0'
);
-- =============================================
-- 2. 聊天评估 (Chat)
-- [已注释] 以下模板暂未调试完成
-- =============================================
/*
-- 2.1 聊天机器人评估
INSERT INTO t_dm_annotation_templates (
id, name, description, data_type, labeling_type,
configuration, label_config, style, category, built_in, version
) VALUES (
'tpl-chatbot-evaluation-001',
'聊天机器人评估',
'评估聊天机器人是否已准备好投入生产,适用于对话AI评估、客服AI、虚拟助手评估等场景',
'chat',
'chatbot-evaluation',
JSON_OBJECT(
'labels', JSON_ARRAY(
JSON_OBJECT('fromName', 'accuracy', 'toName', 'chat', 'type', 'Rating'),
JSON_OBJECT('fromName', 'documentation', 'toName', 'chat', 'type', 'Choices', 'options', JSON_ARRAY('Comprehensive', 'Partial', 'Missing')),
JSON_OBJECT('fromName', 'questions_answered', 'toName', 'chat', 'type', 'Choices', 'options', JSON_ARRAY('Yes', 'No', 'Partial'))
),
'objects', JSON_ARRAY(JSON_OBJECT('name', 'chat', 'type', 'Chat', 'value', '$chat'))
),
'<View>
<Style>
.chat { border: 1px solid var(--color-neutral-border); padding: var(--spacing-tight); border-radius: var(--corner-radius-medium); }
.evaluation { border: 2px solid var(--color-accent-blueberry-base); background: var(--color-accent-blueberry-subtlest); padding: var(--spacing-tight); border-radius: var(--corner-radius-medium); }
</Style>
<View style="display: flex; gap: var(--spacing-base);">
<View className="chat" style="flex: 2;">
<Text name="instructions" value="请详细审查对话,点击助手消息提供反馈" />
<Chat name="chat" value="$chat" minMessages="2" editable="false" />
</View>
<View style="flex: 1;" className="evaluation">
<View visibleWhen="region-selected" whenRole="assistant">
<Header value="响应准确度"/>
<Rating name="accuracy" toName="chat" perRegion="true" maxRating="5" icon="star"/>
<Header value="文档提供"/>
<Choices name="documentation" toName="chat" perRegion="true">
<Choice value="Comprehensive documentation provided" />
<Choice value="Provided some documentation" />
<Choice value="Missing documentation" />
</Choices>
<Header value="是否回答问题"/>
<Choices name="questions_answered" toName="chat" perRegion="true">
<Choice value="Yes" />
<Choice value="No" />
<Choice value="Partial" />
</Choices>
</View>
</View>
</View>
</View>',
'horizontal',
'chat',
1,
'1.0.0'
);
-- 2.2 RLHF评估
INSERT INTO t_dm_annotation_templates (
id, name, description, data_type, labeling_type,
configuration, label_config, style, category, built_in, version
) VALUES (
'tpl-rlhf-evaluation-001',
'RLHF对话评估',
'将生产对话导入进行评估,了解Agent成功或失败的原因,适用于RLHF数据收集、偏好学习等场景',
'chat',
'rlhf-evaluation',
JSON_OBJECT(
'labels', JSON_ARRAY(
JSON_OBJECT('fromName', 'rating', 'toName', 'chat', 'type', 'Rating'),
JSON_OBJECT('fromName', 'relevance', 'toName', 'chat', 'type', 'Choices', 'options', JSON_ARRAY('Highly relevant', 'Somewhat relevant', 'Not relevant')),
JSON_OBJECT('fromName', 'correctness', 'toName', 'chat', 'type', 'Choices', 'options', JSON_ARRAY('Accurate', 'Mostly accurate', 'Contains errors'))
),
'objects', JSON_ARRAY(JSON_OBJECT('name', 'chat', 'type', 'Chat', 'value', '$chat'))
),
'<View>
<Style>
.chat { border: 1px solid var(--color-neutral-border); padding: var(--spacing-tight); border-radius: var(--corner-radius-medium); }
.evaluation { border: 2px solid var(--color-accent-canteloupe-base); background: var(--color-accent-canteloupe-subtlest); padding: var(--spacing-tight); border-radius: var(--corner-radius-medium); }
</Style>
<View style="display: flex; gap: var(--spacing-wide);">
<View className="chat" style="flex: 2;">
<Text name="instructions" value="详细审查对话,点击消息提供反馈" />
<Chat name="chat" value="$chat" minMessages="2" editable="false" />
</View>
<View style="flex: 1;" className="evaluation">
<View className="overall-chat">
<Header size="4">对话整体质量</Header>
<Rating name="rating" toName="chat" />
<TextArea name="conversation_comment" toName="chat" placeholder="添加评论(可选)"/>
</View>
<View visibleWhen="region-selected" whenRole="assistant">
<Header value="响应准确度"/>
<Rating name="assistant_response_accuracy" toName="chat" perRegion="true" maxRating="5"/>
<Header value="相关性"/>
<Choices name="relevance" toName="chat" perRegion="true">
<Choice value="Highly relevant" />
<Choice value="Somewhat relevant" />
<Choice value="Not relevant" />
</Choices>
<Header value="正确性"/>
<Choices name="correctness" toName="chat" perRegion="true">
<Choice value="Accurate and factual" />
<Choice value="Mostly accurate" />
<Choice value="Contains errors" />
</Choices>
</View>
</View>
</View>
</View>',
'horizontal',
'chat',
1,
'1.0.0'
);
-- 2.3 红队测试
INSERT INTO t_dm_annotation_templates (
id, name, description, data_type, labeling_type,
configuration, label_config, style, category, built_in, version
) VALUES (
'tpl-red-teaming-001',
'红队测试',
'通过结构化红队测试对GenAI Agent进行压力测试,适用于AI安全测试、对抗评估、越狱检测等场景',
'chat',
'red-teaming',
JSON_OBJECT(
'labels', JSON_ARRAY(
JSON_OBJECT('fromName', 'user_risk', 'toName', 'chat', 'type', 'Choices', 'options', JSON_ARRAY('Benign', 'Borderline', 'Risky', 'Clear risk')),
JSON_OBJECT('fromName', 'assistant_outcome', 'toName', 'chat', 'type', 'Choices', 'options', JSON_ARRAY('Compliant', 'Safe-refusal', 'Policy Violation', 'Over refusal'))
),
'objects', JSON_ARRAY(JSON_OBJECT('name', 'chat', 'type', 'Chat', 'value', '$chat'))
),
'<View>
<Style>
.chat { border: 1px solid var(--color-neutral-border); padding: var(--spacing-tight); border-radius: var(--corner-radius-medium); }
.evaluation { border: 2px solid var(--color-accent-kale-base); background: var(--color-accent-kale-subtlest); padding: var(--spacing-tight); border-radius: var(--corner-radius-medium); }
</Style>
<View style="display: flex; gap: var(--spacing-wide);">
<View className="chat" style="flex: 2;">
<Text name="instructions" value="$text" />
<Chat name="chat" value="$chat" llm="openai/gpt-4.1-nano" minMessages="2" editable="false" />
</View>
<View style="flex: 1;" className="evaluation">
<View visibleWhen="region-selected" whenRole="user">
<Header value="危害/安全风险"/>
<Choices name="user_risk" toName="chat" choice="single" perRegion="true">
<Choice value="0 - Benign"/>
<Choice value="1 - Borderline"/>
<Choice value="2 - Risky"/>
<Choice value="3 - Clear risk"/>
</Choices>
<Header value="越狱策略"/>
<Choices name="user_jailbreak_tactic" toName="chat" choice="multiple" perRegion="true">
<Choice value="Role play"/>
<Choice value="Indirect prompt"/>
<Choice value="Obfuscated"/>
</Choices>
</View>
<View visibleWhen="region-selected" whenRole="assistant">
<Header value="结果"/>
<Choices name="assistant_outcome" toName="chat" choice="single" perRegion="true">
<Choice value="Compliant"/>
<Choice value="Safe-refusal"/>
<Choice value="Policy Violation"/>
<Choice value="Over refusal"/>
</Choices>
</View>
</View>
</View>
</View>',
'horizontal',
'chat',
1,
'1.0.0'
);
*/
-- =============================================
-- 3. 计算机视觉 (Computer Vision)
-- [已注释] 以下模板暂未调试完成
-- =============================================
/*
-- 3.1 图像描述
INSERT INTO t_dm_annotation_templates (
id, name, description, data_type, labeling_type,
configuration, label_config, style, category, built_in, version
) VALUES (
'tpl-image-captioning-001',
'图像描述',
'编写描述图像的文本,适用于无障碍工具、视觉搜索、内容管理、电商产品描述等场景。关联模型:BLIP、CLIP、GPT-4V',
'image',
'image-captioning',
JSON_OBJECT(
'labels', JSON_ARRAY(
JSON_OBJECT('fromName', 'caption', 'toName', 'image', 'type', 'TextArea', 'required', true)
),
'objects', JSON_ARRAY(JSON_OBJECT('name', 'image', 'type', 'Image', 'value', '$image'))
),
'<View>
<Image name="image" value="$captioning"/>
<Header value="描述图像内容:"/>
<TextArea name="caption" toName="image" placeholder="在此输入描述..."
rows="5" maxSubmissions="1"/>
</View>',
'horizontal',
'computer-vision',
1,
'1.0.0'
);
-- 3.2 图像分类
INSERT INTO t_dm_annotation_templates (
id, name, description, data_type, labeling_type,
configuration, label_config, style, category, built_in, version
) VALUES (
'tpl-image-classification-001',
'图像分类',
'对图像进行分类,适用于内容审核、安全检测、社交媒体审核等场景。关联模型:ResNet、EfficientNet、Vision Transformer',
'image',
'image-classification',
JSON_OBJECT(
'labels', JSON_ARRAY(
JSON_OBJECT('fromName', 'choice', 'toName', 'image', 'type', 'Choices', 'options', JSON_ARRAY('Adult content', 'Weapons', 'Violence'), 'required', true)
),
'objects', JSON_ARRAY(JSON_OBJECT('name', 'image', 'type', 'Image', 'value', '$image'))
),
'<View>
<Image name="image" value="$image"/>
<Choices name="choice" toName="image">
<Choice value="Adult content"/>
<Choice value="Weapons" />
<Choice value="Violence" />
</Choices>
</View>',
'horizontal',
'computer-vision',
1,
'1.0.0'
);
-- 3.3 目标检测(边界框)
INSERT INTO t_dm_annotation_templates (
id, name, description, data_type, labeling_type,
configuration, label_config, style, category, built_in, version
) VALUES (
'tpl-object-detection-001',
'目标检测(边界框)',
'在目标周围绘制边界框,适用于自动驾驶、交通监控、安防监控、零售分析等场景。关联模型:YOLO、R-CNN、SSD',
'image',
'object-detection',
JSON_OBJECT(
'labels', JSON_ARRAY(
JSON_OBJECT('fromName', 'label', 'toName', 'image', 'type', 'RectangleLabels', 'labels', JSON_ARRAY('Airplane', 'Car'))
),
'objects', JSON_ARRAY(JSON_OBJECT('name', 'image', 'type', 'Image', 'value', '$image'))
),
'<View>
<Image name="image" value="$image"/>
<RectangleLabels name="label" toName="image">
<Label value="Airplane" background="green"/>
<Label value="Car" background="blue"/>
</RectangleLabels>
</View>',
'horizontal',
'computer-vision',
1,
'1.0.0'
);
-- 3.4 语义分割(掩码)
INSERT INTO t_dm_annotation_templates (
id, name, description, data_type, labeling_type,
configuration, label_config, style, category, built_in, version
) VALUES (
'tpl-semantic-segmentation-mask-001',
'语义分割(掩码)',
'使用画笔工具在目标周围绘制掩码,适用于自动驾驶、医学图像分析、卫星图像分析等场景。关联模型:U-Net、DeepLab、Mask R-CNN',
'image',
'semantic-segmentation-mask',
JSON_OBJECT(
'labels', JSON_ARRAY(
JSON_OBJECT('fromName', 'tag', 'toName', 'image', 'type', 'BrushLabels', 'labels', JSON_ARRAY('Airplane', 'Car'))
),
'objects', JSON_ARRAY(JSON_OBJECT('name', 'image', 'type', 'Image', 'value', '$image'))
),
'<View>
<Image name="image" value="$image" zoom="true"/>
<BrushLabels name="tag" toName="image">
<Label value="Airplane" background="rgba(255, 0, 0, 0.7)"/>
<Label value="Car" background="rgba(0, 0, 255, 0.7)"/>
</BrushLabels>
</View>',
'horizontal',
'computer-vision',
1,
'1.0.0'
);
-- 3.5 语义分割(多边形)
INSERT INTO t_dm_annotation_templates (
id, name, description, data_type, labeling_type,
configuration, label_config, style, category, built_in, version
) VALUES (
'tpl-semantic-segmentation-polygon-001',
'语义分割(多边形)',
'在目标周围绘制多边形,适用于自动驾驶、医学图像、卫星图像、精准农业等场景。关联模型:DeepLab、PSPNet、U-Net',
'image',
'semantic-segmentation-polygon',
JSON_OBJECT(
'labels', JSON_ARRAY(
JSON_OBJECT('fromName', 'label', 'toName', 'image', 'type', 'PolygonLabels', 'labels', JSON_ARRAY('Airplane', 'Car'))
),
'objects', JSON_ARRAY(JSON_OBJECT('name', 'image', 'type', 'Image', 'value', '$image'))
),
'<View>
<Header value="选择标签并点击图像开始"/>
<Image name="image" value="$image" zoom="true"/>
<PolygonLabels name="label" toName="image" strokeWidth="3" pointSize="small" opacity="0.9">
<Label value="Airplane" background="red"/>
<Label value="Car" background="blue"/>
</PolygonLabels>
</View>',
'horizontal',
'computer-vision',
1,
'1.0.0'
);
-- 3.6 关键点标注
INSERT INTO t_dm_annotation_templates (
id, name, description, data_type, labeling_type,
configuration, label_config, style, category, built_in, version
) VALUES (
'tpl-keypoint-labeling-001',
'关键点标注',
'添加带标签的关键点,适用于人体姿态估计、面部特征点检测、运动分析等场景。关联模型:OpenPose、MediaPipe、PoseNet',
'image',
'keypoint-labeling',
JSON_OBJECT(
'labels', JSON_ARRAY(
JSON_OBJECT('fromName', 'kp', 'toName', 'img', 'type', 'KeyPointLabels', 'labels', JSON_ARRAY('Face', 'Nose'))
),
'objects', JSON_ARRAY(JSON_OBJECT('name', 'img', 'type', 'Image', 'value', '$img'))
),
'<View>
<KeyPointLabels name="kp-1" toName="img-1">
<Label value="Face" background="red" />
<Label value="Nose" background="green" />
</KeyPointLabels>
<Image name="img-1" value="$img" />
</View>',
'horizontal',
'computer-vision',
1,
'1.0.0'
);
-- 3.7 OCR识别
INSERT INTO t_dm_annotation_templates (
id, name, description, data_type, labeling_type,
configuration, label_config, style, category, built_in, version
) VALUES (
'tpl-ocr-001',
'OCR识别',
'在区域周围绘制边界框或多边形并写下其中的文本,适用于文档数字化、发票处理、车牌识别等场景。关联模型:Tesseract、PaddleOCR、EasyOCR',
'image',
'ocr',
JSON_OBJECT(
'labels', JSON_ARRAY(
JSON_OBJECT('fromName', 'label', 'toName', 'image', 'type', 'Labels', 'labels', JSON_ARRAY('Text', 'Handwriting')),
JSON_OBJECT('fromName', 'transcription', 'toName', 'image', 'type', 'TextArea', 'required', true)
),
'objects', JSON_ARRAY(JSON_OBJECT('name', 'image', 'type', 'Image', 'value', '$ocr'))
),
'<View>
<Image name="image" value="$ocr"/>
<Labels name="label" toName="image">
<Label value="Text" background="green"/>
<Label value="Handwriting" background="blue"/>
</Labels>
<Rectangle name="bbox" toName="image" strokeWidth="3"/>
<Polygon name="poly" toName="image" strokeWidth="3"/>
<TextArea name="transcription" toName="image"
editable="true" perRegion="true" required="true"
maxSubmissions="1" rows="5" placeholder="识别的文本"
displayMode="region-list"/>
</View>',
'horizontal',
'computer-vision',
1,
'1.0.0'
);
-- 3.8 视觉问答
INSERT INTO t_dm_annotation_templates (
id, name, description, data_type, labeling_type,
configuration, label_config, style, category, built_in, version
) VALUES (
'tpl-vqa-001',
'视觉问答',
'回答与图像相关的问题,适用于无障碍工具、教育评估、自动驾驶、医学图像分析等场景。关联模型:ViLBERT、BLIP、GPT-4V',
'image',
'vqa',
JSON_OBJECT(
'labels', JSON_ARRAY(
JSON_OBJECT('fromName', 'answer1', 'toName', 'q1', 'type', 'TextArea', 'required', true),
JSON_OBJECT('fromName', 'answer2', 'toName', 'q2', 'type', 'TextArea', 'required', true)
),
'objects', JSON_ARRAY(
JSON_OBJECT('name', 'image', 'type', 'Image', 'value', '$image'),
JSON_OBJECT('name', 'q1', 'type', 'Text', 'value', '$q1'),
JSON_OBJECT('name', 'q2', 'type', 'Text', 'value', '$q2')
)
),
'<View>
<Image name="image" value="$image"/>
<Header value="请回答以下问题:"/>
<View style="display: grid; grid-template-columns: 1fr 10fr 1fr 3fr; column-gap: 1em">
<Header value="Q1:"/>
<Text name="q1" value="$q1"/>
<Header value="A1:"/>
<TextArea name="answer1" toName="q1" rows="1" maxSubmissions="1"/>
</View>
<View style="display: grid; grid-template-columns: 1fr 10fr 1fr 3fr; column-gap: 1em">
<Header value="Q2:"/>
<Text name="q2" value="$q2"/>
<Header value="A2:"/>
<TextArea name="answer2" toName="q2" rows="1" maxSubmissions="1"/>
</View>
</View>',
'horizontal',
'computer-vision',
1,
'1.0.0'
);
*/
-- =============================================
-- 3.x 计算机视觉(已启用)
-- =============================================
-- 3.2 图像分类
INSERT INTO t_dm_annotation_templates (
id, name, description, data_type, labeling_type,
configuration, label_config, style, category, built_in, version
) VALUES (
'tpl-image-classification-001',
'图像分类',
'对图像进行分类,适用于内容审核、安全检测、社交媒体审核等场景。关联模型:ResNet、EfficientNet、Vision Transformer',
'image',
'image-classification',
JSON_OBJECT(
'labels', JSON_ARRAY(
JSON_OBJECT('fromName', 'choice', 'toName', 'image', 'type', 'Choices', 'options', JSON_ARRAY('Adult content', 'Weapons', 'Violence'), 'required', true)
),
'objects', JSON_ARRAY(JSON_OBJECT('name', 'image', 'type', 'Image', 'value', '$image'))
),
'<View>
<Image name="image" value="$image"/>
<Choices name="choice" toName="image">
<Choice value="Adult content"/>
<Choice value="Weapons" />
<Choice value="Violence" />
</Choices>
</View>',
'horizontal',
'computer-vision',
1,
'1.0.0'
);
-- =============================================
-- 4. 自然语言处理 (NLP)
-- =============================================
-- 4.1 文本分类
INSERT INTO t_dm_annotation_templates (
id, name, description, data_type, labeling_type,
configuration, label_config, style, category, built_in, version
) VALUES (
'tpl-text-classification-001',
'文本分类',
'对文本进行分类,适用于情感分析、主题分类、垃圾邮件检测等场景。关联模型:BERT、RoBERTa、DistilBERT',
'text',
'text-classification',
NULL,
'<View>
<Text name="text" value="$text"/>
<View style="box-shadow: 2px 2px 5px #999; padding: 20px; margin-top: 2em; border-radius: 5px;">
<Header value="选择文本情感"/>
<Choices name="sentiment" toName="text" choice="single" showInline="true">
<Choice value="Positive"/>
<Choice value="Negative"/>
<Choice value="Neutral"/>
</Choices>
</View>
</View>',
'vertical',
'nlp',
1,
'1.0.0'
);
-- 4.2 命名实体识别
INSERT INTO t_dm_annotation_templates (
id, name, description, data_type, labeling_type,
configuration, label_config, style, category, built_in, version
) VALUES (
'tpl-ner-001',
'命名实体识别',
'在文本中标注命名实体,适用于信息抽取、知识图谱构建、智能问答等场景。关联模型:BERT-NER、SpaCy、Flair',
'text',
'ner',
NULL,
'<View>
<Labels name="label" toName="text">
<Label value="PER" background="red"/>
<Label value="ORG" background="darkorange"/>
<Label value="LOC" background="orange"/>
<Label value="MISC" background="green"/>
</Labels>
<Text name="text" value="$text"/>
</View>',
'vertical',
'nlp',
1,
'1.0.0'
);
-- 4.3 关系抽取
INSERT INTO t_dm_annotation_templates (
id, name, description, data_type, labeling_type,
configuration, label_config, style, category, built_in, version
) VALUES (
'tpl-relation-extraction-001',
'关系抽取',
'标注实体间的关系,适用于知识图谱构建、信息抽取等场景。关联模型:BERT、GPT、OpenIE',
'text',
'relation-extraction',
NULL,
'<View>
<Relations>
<Relation value="org:founded_by"/>
<Relation value="org:founded"/>
</Relations>
<Labels name="label" toName="text">
<Label value="Organization" background="orange"/>
<Label value="Person" background="green"/>
<Label value="Datetime" background="blue"/>
</Labels>
<Text name="text" value="$text"/>
</View>',
'vertical',
'nlp',
1,
'1.0.0'
);
-- 4.4 机器翻译
INSERT INTO t_dm_annotation_templates (
id, name, description, data_type, labeling_type,
configuration, label_config, style, category, built_in, version
) VALUES (
'tpl-machine-translation-001',
'机器翻译',
'翻译文本内容,适用于翻译质量评估、机器翻译后编辑等场景',
'text',
'machine-translation',
NULL,
'<View>
<View style="display: grid; grid-template: auto/1fr 1fr; column-gap: 1em">
<Header value="原文" />
<Header value="翻译" />
<Text name="text" value="$text" />
<TextArea name="translation" toName="text"
showSubmitButton="true" maxSubmissions="1" editable="true"
required="true" />
</View>
</View>',
'vertical',
'nlp',
1,
'1.0.0'
);
-- 4.5 文本摘要
INSERT INTO t_dm_annotation_templates (
id, name, description, data_type, labeling_type,
configuration, label_config, style, category, built_in, version
) VALUES (
'tpl-text-summarization-001',
'文本摘要',
'编写文本摘要,适用于新闻摘要、文档摘要、会议纪要等场景',
'text',
'text-summarization',
NULL,
'<View>
<Header value="请阅读文本" />
<Text name="text" value="$text" />
<Header value="提供一句话摘要" />
<TextArea name="summary" toName="text"
showSubmitButton="true" maxSubmissions="1" editable="true"
required="true" />
</View>',
'vertical',
'nlp',
1,
'1.0.0'
);
-- 4.6 问答
INSERT INTO t_dm_annotation_templates (
id, name, description, data_type, labeling_type,
configuration, label_config, style, category, built_in, version
) VALUES (
'tpl-question-answering-001',
'问答',
'基于上下文回答问题或标注答案,适用于阅读理解、智能客服等场景',
'text',
'question-answering',
NULL,
'<View>
<Header value="请阅读文本" />
<Text name="text" value="$text" granularity="word"/>
<Header value="选择回答问题的文本片段" />
<Text name="question" value="$question"/>
<Labels name="answer" toName="text">
<Label value="Answer" maxUsage="1" background="red"/>
</Labels>
</View>',
'vertical',
'nlp',
1,
'1.0.0'
);
-- 4.7 层级分类
INSERT INTO t_dm_annotation_templates (
id, name, description, data_type, labeling_type,
configuration, label_config, style, category, built_in, version
) VALUES (
'tpl-taxonomy-001',
'层级分类',
'使用层级分类法对文本进行分类,适用于电商商品分类、文档归档等场景',
'text',
'taxonomy',
NULL,
'<View>
<Text name="text" value="$text"/>
<Taxonomy name="taxonomy" toName="text">
<Choice value="Archaea" />
<Choice value="Bacteria" />
<Choice value="Eukarya">
<Choice value="Human" />
<Choice value="Oppossum" />
<Choice value="Extraterrestial" />
</Choice>
</Taxonomy>
</View>',
'vertical',
'nlp',
1,
'1.0.0'
);
-- =============================================
-- 5. 生成式AI (Generative AI)
-- =============================================
-- 5.1 RLHF人类偏好
INSERT INTO t_dm_annotation_templates (
id, name, description, data_type, labeling_type,
configuration, label_config, style, category, built_in, version
) VALUES (
'tpl-rlhf-preference-001',
'RLHF人类偏好',
'收集人类偏好用于RLHF训练,对两个LLM响应进行排名',
'text',
'rlhf-preference',
NULL,
'<View>
<Style>
.prompt-box { background: #f5f5f5; padding: 15px; border-radius: 8px; margin-bottom: 20px; }
.response-box { border: 1px solid #ddd; padding: 15px; border-radius: 8px; }
</Style>
<View className="prompt-box">
<Header value="Prompt"/>
<Text name="prompt" value="$prompt"/>
</View>
<Pairwise name="preference" toName="prompt" leftText="Response A" rightText="Response B">
<View className="response-box">
<Header value="Response A"/>
<Text name="response1" value="$response1"/>
</View>
<View className="response-box">
<Header value="Response B"/>
<Text name="response2" value="$response2"/>
</View>
</Pairwise>
</View>',
'horizontal',
'generative-ai',
1,
'1.0.0'
);
-- 5.2 LLM响应评分
INSERT INTO t_dm_annotation_templates (
id, name, description, data_type, labeling_type,
configuration, label_config, style, category, built_in, version
) VALUES (
'tpl-llm-grading-001',
'LLM响应评分',
'对LLM生成的响应进行多维度评分',
'text',
'llm-grading',
NULL,
'<View>
<Header value="Prompt"/>
<Text name="prompt" value="$prompt"/>
<Header value="Response"/>
<Text name="response" value="$response"/>
<Header value="相关性"/>
<Rating name="relevance" toName="response" maxRating="5"/>
<Header value="连贯性"/>
<Rating name="coherence" toName="response" maxRating="5"/>
<Header value="流畅度"/>
<Rating name="fluency" toName="response" maxRating="5"/>
</View>',
'vertical',
'generative-ai',
1,
'1.0.0'
);
-- 5.3 监督微调
INSERT INTO t_dm_annotation_templates (
id, name, description, data_type, labeling_type,
configuration, label_config, style, category, built_in, version
) VALUES (
'tpl-sft-001',
'监督微调',
'为LLM监督微调收集高质量指令-响应对',
'text',
'sft',
NULL,
'<View>
<Header value="指令"/>
<Text name="instruction" value="$instruction"/>
<Header value="编写高质量响应"/>
<TextArea name="response" toName="instruction" rows="6" editable="true" maxSubmissions="1"/>
</View>',
'vertical',
'generative-ai',
1,
'1.0.0'
);
-- =============================================
-- 6. 排名与评分 (Ranking & Scoring)
-- =============================================
-- 6.1 成对比较
INSERT INTO t_dm_annotation_templates (
id, name, description, data_type, labeling_type,
configuration, label_config, style, category, built_in, version
) VALUES (
'tpl-pairwise-classification-001',
'成对分类',
'对两个项目进行成对比较分类',
'text',
'pairwise-classification',
NULL,
'<View>
<Text name="text" value="$prompt"/>
<Pairwise name="comparison" toName="text" leftText="选项A" rightText="选项B">
<Text name="option1" value="$option1"/>
<Text name="option2" value="$option2"/>
</Pairwise>
</View>',
'horizontal',
'ranking-scoring',
1,
'1.0.0'
);
-- 6.2 搜索排名
INSERT INTO t_dm_annotation_templates (
id, name, description, data_type, labeling_type,
configuration, label_config, style, category, built_in, version
) VALUES (
'tpl-serp-ranking-001',
'搜索排名',
'对搜索结果进行相关性排名,适用于搜索引擎优化、信息检索等场景',
'text',
'serp-ranking',
NULL,
'<View>
<Header value="搜索查询"/>
<Text name="query" value="$query"/>
<Header value="搜索结果"/>
<Text name="result" value="$result"/>
<Header value="相关性评分"/>
<Rating name="relevance" toName="result" maxRating="5"/>
</View>',
'vertical',
'ranking-scoring',
1,
'1.0.0'
);
-- =============================================
-- 7. 结构化数据 (Structured Data)
-- [已注释] 以下模板暂未调试完成
-- =============================================
/*
-- 7.1 PDF分类
INSERT INTO t_dm_annotation_templates (
id, name, description, data_type, labeling_type,
configuration, label_config, style, category, built_in, version
) VALUES (
'tpl-pdf-classification-001',
'PDF分类',
'对PDF文档进行分类',
'pdf',
'pdf-classification',
JSON_OBJECT(
'labels', JSON_ARRAY(
JSON_OBJECT('fromName', 'choice', 'toName', 'pdf', 'type', 'Choices', 'options', JSON_ARRAY('Invoice', 'Contract', 'Report', 'Other'), 'required', true)
),
'objects', JSON_ARRAY(JSON_OBJECT('name', 'pdf', 'type', 'PDF', 'value', '$pdf'))
),
'<View>
<Style>
.htx-pdf { height: calc(100vh - 250px); }
</Style>
<Pdf name="pdf" value="$pdf"/>
<Choices name="choice" toName="pdf">
<Choice value="Invoice"/>
<Choice value="Contract"/>
<Choice value="Report"/>
<Choice value="Other"/>
</Choices>
</View>',
'horizontal',
'structured-data',
1,
'1.0.0'
);
-- 7.2 表格数据标注
INSERT INTO t_dm_annotation_templates (
id, name, description, data_type, labeling_type,
configuration, label_config, style, category, built_in, version
) VALUES (
'tpl-tabular-data-001',
'表格数据标注',
'对表格数据进行标注和分类',
'table',
'tabular-data',
JSON_OBJECT(
'labels', JSON_ARRAY(
JSON_OBJECT('fromName', 'choice', 'toName', 'table', 'type', 'Choices', 'options', JSON_ARRAY('Valid', 'Invalid', 'Needs Review'), 'required', true)
),
'objects', JSON_ARRAY(JSON_OBJECT('name', 'table', 'type', 'Table', 'value', '$table'))
),
'<View>
<Table name="table" value="$table"/>
<Choices name="choice" toName="table">
<Choice value="Valid"/>
<Choice value="Invalid"/>
<Choice value="Needs Review"/>
</Choices>
</View>',
'vertical',
'structured-data',
1,
'1.0.0'
);
*/
-- =============================================
-- 8. 时间序列 (Time Series)
-- [已注释] 以下模板暂未调试完成
-- =============================================
/*
-- 8.1 异常检测
INSERT INTO t_dm_annotation_templates (
id, name, description, data_type, labeling_type,
configuration, label_config, style, category, built_in, version
) VALUES (
'tpl-anomaly-detection-001',
'异常检测',
'在时间序列数据中标注异常区域',
'timeseries',
'anomaly-detection',
JSON_OBJECT(
'labels', JSON_ARRAY(
JSON_OBJECT('fromName', 'label', 'toName', 'ts', 'type', 'TimeseriesLabels', 'labels', JSON_ARRAY('Anomaly', 'Normal'))
),
'objects', JSON_ARRAY(JSON_OBJECT('name', 'ts', 'type', 'Timeseries', 'value', '$timeseries'))
),
'<View>
<TimeseriesLabels name="label" toName="ts">
<Label value="Anomaly" background="red"/>
<Label value="Normal" background="green"/>
</TimeseriesLabels>
<Timeseries name="ts" value="$timeseries" timeColumn="time" valueColumns="value"/>
</View>',
'horizontal',
'time-series',
1,
'1.0.0'
);
-- 8.2 活动识别
INSERT INTO t_dm_annotation_templates (
id, name, description, data_type, labeling_type,
configuration, label_config, style, category, built_in, version
) VALUES (
'tpl-activity-recognition-001',
'活动识别',
'在时间序列数据中识别和标注不同活动',
'timeseries',
'activity-recognition',
JSON_OBJECT(
'labels', JSON_ARRAY(
JSON_OBJECT('fromName', 'label', 'toName', 'ts', 'type', 'TimeseriesLabels', 'labels', JSON_ARRAY('Walking', 'Running', 'Sitting', 'Standing'))
),
'objects', JSON_ARRAY(JSON_OBJECT('name', 'ts', 'type', 'Timeseries', 'value', '$timeseries'))
),
'<View>
<TimeseriesLabels name="label" toName="ts">
<Label value="Walking" background="blue"/>
<Label value="Running" background="red"/>
<Label value="Sitting" background="green"/>
<Label value="Standing" background="orange"/>
</TimeseriesLabels>
<Timeseries name="ts" value="$timeseries" timeColumn="time" valueColumns="acc_x,acc_y,acc_z"/>
</View>',
'horizontal',
'time-series',
1,
'1.0.0'
);
*/
-- =============================================
-- 9. 视频处理 (Videos)
-- 已启用默认模板
-- =============================================
-- 9.1 视频分类
INSERT INTO t_dm_annotation_templates (
id, name, description, data_type, labeling_type,
configuration, label_config, style, category, built_in, version
) VALUES (
'tpl-video-classification-001',
'视频分类',
'对视频进行整体分类',
'video',
'video-classification',
JSON_OBJECT(
'labels', JSON_ARRAY(
JSON_OBJECT('fromName', 'choice', 'toName', 'video', 'type', 'Choices', 'options', JSON_ARRAY('Sports', 'News', 'Entertainment', 'Education'), 'required', true)
),
'objects', JSON_ARRAY(JSON_OBJECT('name', 'video', 'type', 'Video', 'value', '$video'))
),
'<View>
<Video name="video" value="$video"/>
<Choices name="choice" toName="video" showInline="true">
<Choice value="Sports"/>
<Choice value="News"/>
<Choice value="Entertainment"/>
<Choice value="Education"/>
</Choices>
</View>',
'horizontal',
'video',
1,
'1.0.0'
);
-- 9.2 视频目标追踪
INSERT INTO t_dm_annotation_templates (
id, name, description, data_type, labeling_type,
configuration, label_config, style, category, built_in, version
) VALUES (
'tpl-video-object-tracking-001',
'视频目标追踪',
'在视频中追踪目标对象',
'video',
'video-object-tracking',
JSON_OBJECT(
'labels', JSON_ARRAY(
JSON_OBJECT('fromName', 'videoLabels', 'toName', 'video', 'type', 'Labels', 'labels', JSON_ARRAY('Man', 'Woman', 'Other'))
),
'objects', JSON_ARRAY(JSON_OBJECT('name', 'video', 'type', 'Video', 'value', '$video'))
),
'<View>
<Labels name="videoLabels" toName="video" allowEmpty="true">
<Label value="Man" background="blue"/>
<Label value="Woman" background="red"/>
<Label value="Other" background="green"/>
</Labels>
<Video name="video" value="$video" framerate="25.0"/>
<VideoRectangle name="box" toName="video" />
</View>',
'horizontal',
'video',
1,
'1.0.0'
);
-- 9.3 视频时间线分割
INSERT INTO t_dm_annotation_templates (
id, name, description, data_type, labeling_type,
configuration, label_config, style, category, built_in, version
) VALUES (
'tpl-video-timeline-segmentation-001',
'视频时间线分割',
'对视频时间线进行分段标注',
'video',
'video-timeline-segmentation',
JSON_OBJECT(
'labels', JSON_ARRAY(
JSON_OBJECT('fromName', 'tricks', 'toName', 'audio', 'type', 'Labels', 'labels', JSON_ARRAY('Intro', 'Content', 'Outro'))
),
'objects', JSON_ARRAY(
JSON_OBJECT('name', 'video', 'type', 'Video', 'value', '$video_url'),
JSON_OBJECT('name', 'audio', 'type', 'Audio', 'value', '$video_url')
)
),
'<View>
<Header value="视频时间线分割"/>
<Video name="video" value="$video_url" sync="audio"/>
<Labels name="tricks" toName="audio" choice="multiple">
<Label value="Intro" background="#358EF3"/>
<Label value="Content" background="#1BB500"/>
<Label value="Outro" background="#FFA91D"/>
</Labels>
<Audio name="audio" value="$video_url" sync="video" speed="false"/>
</View>',
'horizontal',
'video',
1,
'1.0.0'
);
-- =============================================
-- 10. 对话AI (Conversational AI)
-- =============================================
-- 10.1 共指消解
INSERT INTO t_dm_annotation_templates (
id, name, description, data_type, labeling_type,
configuration, label_config, style, category, built_in, version
) VALUES (
'tpl-coreference-resolution-001',
'共指消解',
'在文本中标注共指关系和实体链接',
'text',
'coreference-resolution',
NULL,
'<View>
<Labels name="label" toName="text">
<Label value="Noun" background="red"/>
<Label value="Pronoun" background="darkorange"/>
</Labels>
<Text name="text" value="$corefText"/>
</View>',
'vertical',
'conversational-ai',
1,
'1.0.0'
);
-- 10.2 槽填充
INSERT INTO t_dm_annotation_templates (
id, name, description, data_type, labeling_type,
configuration, label_config, style, category, built_in, version
) VALUES (
'tpl-slot-filling-001',
'槽填充',
'构建任务导向对话系统,选择对话意图并提取槽实体',
'text',
'slot-filling',
NULL,
'<View>
<ParagraphLabels name="entity_slot" toName="dialogue">
<Label value="Person" />
<Label value="Organization" />
<Label value="Location" />
<Label value="Datetime" />
<Label value="Quantity" />
</ParagraphLabels>
<Paragraphs name="dialogue" value="$humanMachineDialogue" layout="dialogue" />
<Choices name="intent" toName="dialogue" choice="single" showInLine="true">
<Choice value="Greeting"/>
<Choice value="Customer request"/>
<Choice value="Small talk"/>
</Choices>
</View>',
'vertical',
'conversational-ai',
1,
'1.0.0'
);
-- 10.3 响应生成
INSERT INTO t_dm_annotation_templates (
id, name, description, data_type, labeling_type,
configuration, label_config, style, category, built_in, version
) VALUES (
'tpl-response-generation-001',
'响应生成',
'通过生成下一个对话响应来收集聊天机器人训练数据',
'text',
'response-generation',
NULL,
'<View>
<Paragraphs name="chat" value="$dialogue" layout="dialogue" />
<Header value="提供响应" />
<TextArea name="response" toName="chat" rows="4" editable="true" maxSubmissions="1" />
</View>',
'vertical',
'conversational-ai',
1,
'1.0.0'
);
-- =============================================
-- 完成提示
-- =============================================
SELECT CONCAT('成功插入 ', COUNT(*), ' 个内置标注模板') AS result
FROM t_dm_annotation_templates WHERE built_in = 1;