You've already forked DataMate
- 在标注配置模板表创建语句中添加 IF NOT EXISTS 条件 - 在标注项目表创建语句中添加 IF NOT EXISTS 条件 - 在标注结果表创建语句中添加 IF NOT EXISTS 条件 - 在自动标注任务表创建语句中添加 IF NOT EXISTS 条件 - 防止重复执行脚本时出现表已存在的错误 - 提高数据库初始化脚本的健壮性
1512 lines
54 KiB
SQL
1512 lines
54 KiB
SQL
-- 数据标注服务数据库初始化脚本
|
|
-- 适用于 datamate 数据库
|
|
-- 基于 Label Studio 模板体系设计
|
|
|
|
USE datamate;
|
|
|
|
-- =====================================
|
|
-- DDL语句 - 数据库表结构定义
|
|
-- =====================================
|
|
|
|
-- 标注配置模板表
|
|
CREATE TABLE IF NOT EXISTS t_dm_annotation_templates (
|
|
id VARCHAR(64) PRIMARY KEY COMMENT '模板ID(UUID或自定义ID)',
|
|
name VARCHAR(100) NOT NULL COMMENT '模板名称',
|
|
description VARCHAR(500) COMMENT '模板描述',
|
|
data_type VARCHAR(50) NOT NULL COMMENT '数据类型: text/image/audio/video/pdf/timeseries/chat/html/table',
|
|
labeling_type VARCHAR(50) NOT NULL COMMENT '标注类型',
|
|
configuration JSON NOT NULL COMMENT '标注配置(包含labels定义等)',
|
|
label_config TEXT COMMENT 'Label Studio XML配置(内置模板预定义)',
|
|
style VARCHAR(32) NOT NULL COMMENT '样式配置: horizontal/vertical',
|
|
category VARCHAR(50) DEFAULT 'custom' COMMENT '模板分类',
|
|
built_in BOOLEAN DEFAULT FALSE COMMENT '是否系统内置模板',
|
|
version VARCHAR(20) DEFAULT '1.0' COMMENT '模板版本',
|
|
created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
|
|
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间',
|
|
deleted_at TIMESTAMP NULL COMMENT '删除时间(软删除)',
|
|
INDEX idx_data_type (data_type),
|
|
INDEX idx_labeling_type (labeling_type),
|
|
INDEX idx_category (category),
|
|
INDEX idx_built_in (built_in)
|
|
) COMMENT='标注配置模板表';
|
|
|
|
-- 标注项目表
|
|
CREATE TABLE IF NOT EXISTS t_dm_labeling_projects (
|
|
id VARCHAR(36) PRIMARY KEY COMMENT 'UUID',
|
|
dataset_id VARCHAR(36) NOT NULL COMMENT '数据集ID',
|
|
name VARCHAR(100) NOT NULL COMMENT '项目名称',
|
|
labeling_project_id VARCHAR(8) NOT NULL COMMENT 'Label Studio项目ID',
|
|
template_id VARCHAR(64) NULL COMMENT '使用的模板ID',
|
|
configuration JSON COMMENT '项目配置(可能包含对模板的自定义修改)',
|
|
progress JSON COMMENT '项目进度信息',
|
|
created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
|
|
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间',
|
|
deleted_at TIMESTAMP NULL COMMENT '删除时间(软删除)',
|
|
FOREIGN KEY (template_id) REFERENCES t_dm_annotation_templates(id) ON DELETE SET NULL,
|
|
INDEX idx_dataset_id (dataset_id),
|
|
INDEX idx_template_id (template_id),
|
|
INDEX idx_labeling_project_id (labeling_project_id)
|
|
) COMMENT='标注项目表';
|
|
|
|
-- 标注结果表
|
|
CREATE TABLE IF NOT EXISTS t_dm_annotation_results (
|
|
id VARCHAR(36) PRIMARY KEY COMMENT 'UUID',
|
|
project_id VARCHAR(36) NOT NULL COMMENT '标注项目ID',
|
|
file_id VARCHAR(36) NOT NULL COMMENT '文件ID',
|
|
annotation JSON NOT NULL COMMENT 'Label Studio annotation 原始JSON',
|
|
created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
|
|
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间',
|
|
UNIQUE KEY uk_project_file (project_id, file_id),
|
|
INDEX idx_project_id (project_id),
|
|
INDEX idx_file_id (file_id),
|
|
INDEX idx_updated_at (updated_at)
|
|
) COMMENT='标注结果表';
|
|
|
|
-- 自动标注任务表
|
|
CREATE TABLE IF NOT EXISTS t_dm_auto_annotation_tasks (
|
|
id VARCHAR(36) PRIMARY KEY COMMENT 'UUID',
|
|
name VARCHAR(255) NOT NULL COMMENT '任务名称',
|
|
dataset_id VARCHAR(36) NOT NULL COMMENT '数据集ID',
|
|
dataset_name VARCHAR(255) COMMENT '数据集名称',
|
|
config JSON NOT NULL COMMENT '任务配置',
|
|
file_ids JSON COMMENT '文件ID列表',
|
|
status VARCHAR(50) NOT NULL DEFAULT 'pending' COMMENT '任务状态',
|
|
progress INT DEFAULT 0 COMMENT '任务进度',
|
|
total_images INT DEFAULT 0 COMMENT '总图片数',
|
|
processed_images INT DEFAULT 0 COMMENT '已处理图片数',
|
|
detected_objects INT DEFAULT 0 COMMENT '检测到的对象数',
|
|
output_path VARCHAR(500) COMMENT '输出路径',
|
|
error_message TEXT COMMENT '错误信息',
|
|
created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
|
|
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间',
|
|
completed_at TIMESTAMP NULL COMMENT '完成时间',
|
|
deleted_at TIMESTAMP NULL COMMENT '删除时间',
|
|
INDEX idx_dataset_id (dataset_id),
|
|
INDEX idx_status (status),
|
|
INDEX idx_created_at (created_at)
|
|
) COMMENT='自动标注任务表';
|
|
|
|
-- =====================================
|
|
-- DML语句 - 内置标注模板数据
|
|
-- =====================================
|
|
|
|
-- 清空现有内置模板(保留自定义模板)
|
|
DELETE FROM t_dm_annotation_templates WHERE built_in = 1;
|
|
|
|
-- =============================================
|
|
-- 1. 音频/语音处理 (Audio/Speech Processing)
|
|
-- =============================================
|
|
|
|
-- 1.1 自动语音识别(分段)
|
|
INSERT INTO t_dm_annotation_templates (
|
|
id, name, description, data_type, labeling_type,
|
|
configuration, label_config, style, category, built_in, version
|
|
) VALUES (
|
|
'tpl-asr-segments-001',
|
|
'语音识别(分段)',
|
|
'对音频进行语音活动分段并转录文本,适用于呼叫中心转录、会议记录、播客转录、法庭记录等场景。关联模型:Whisper、Wav2Vec2、DeepSpeech',
|
|
'audio',
|
|
'asr-segments',
|
|
JSON_OBJECT(
|
|
'labels', JSON_ARRAY(
|
|
JSON_OBJECT('fromName', 'labels', 'toName', 'audio', 'type', 'Labels', 'labels', JSON_ARRAY('Speech', 'Noise')),
|
|
JSON_OBJECT('fromName', 'transcription', 'toName', 'audio', 'type', 'TextArea', 'required', true)
|
|
),
|
|
'objects', JSON_ARRAY(JSON_OBJECT('name', 'audio', 'type', 'Audio', 'value', '$audio'))
|
|
),
|
|
'<View>
|
|
<Labels name="labels" toName="audio">
|
|
<Label value="Speech" />
|
|
<Label value="Noise" />
|
|
</Labels>
|
|
<Audio name="audio" value="$audio"/>
|
|
<TextArea name="transcription" toName="audio"
|
|
rows="2" editable="true"
|
|
perRegion="true" required="true" />
|
|
</View>',
|
|
'horizontal',
|
|
'audio-speech',
|
|
1,
|
|
'1.0.0'
|
|
);
|
|
|
|
-- 1.2 自动语音识别
|
|
INSERT INTO t_dm_annotation_templates (
|
|
id, name, description, data_type, labeling_type,
|
|
configuration, label_config, style, category, built_in, version
|
|
) VALUES (
|
|
'tpl-asr-001',
|
|
'语音识别',
|
|
'转录音频内容,适用于播客转录、会议记录、客服通话、字幕生成等场景。关联模型:Whisper、Wav2Vec、DeepSpeech',
|
|
'audio',
|
|
'asr',
|
|
JSON_OBJECT(
|
|
'labels', JSON_ARRAY(
|
|
JSON_OBJECT('fromName', 'transcription', 'toName', 'audio', 'type', 'TextArea', 'required', true)
|
|
),
|
|
'objects', JSON_ARRAY(JSON_OBJECT('name', 'audio', 'type', 'Audio', 'value', '$audio'))
|
|
),
|
|
'<View>
|
|
<Audio name="audio" value="$audio" zoom="true" hotkey="ctrl+enter" />
|
|
<Header value="转录音频内容" />
|
|
<TextArea name="transcription" toName="audio"
|
|
rows="4" editable="true" maxSubmissions="1" />
|
|
</View>',
|
|
'horizontal',
|
|
'audio-speech',
|
|
1,
|
|
'1.0.0'
|
|
);
|
|
|
|
-- 1.3 对话分析
|
|
INSERT INTO t_dm_annotation_templates (
|
|
id, name, description, data_type, labeling_type,
|
|
configuration, label_config, style, category, built_in, version
|
|
) VALUES (
|
|
'tpl-conversation-analysis-001',
|
|
'对话分析',
|
|
'分析对话语句并标注事实和情感方面,适用于呼叫中心质检、客服分析、会议分析等场景',
|
|
'audio',
|
|
'conversation-analysis',
|
|
JSON_OBJECT(
|
|
'labels', JSON_ARRAY(
|
|
JSON_OBJECT('fromName', 'label', 'toName', 'text', 'type', 'ParagraphLabels', 'labels', JSON_ARRAY('Positive', 'Negative'))
|
|
),
|
|
'objects', JSON_ARRAY(
|
|
JSON_OBJECT('name', 'audio', 'type', 'Audio', 'value', '$audio'),
|
|
JSON_OBJECT('name', 'text', 'type', 'Paragraphs', 'value', '$text')
|
|
)
|
|
),
|
|
'<View>
|
|
<Audio name="audio" value="$audio" hotkey="space" sync="text"/>
|
|
<Header value="对话记录"/>
|
|
<Paragraphs audioUrl="$audio" sync="audio" name="text" value="$text"
|
|
layout="dialogue" textKey="text" nameKey="author"
|
|
granularity="paragraph" contextscroll="true" />
|
|
<View style="position: sticky">
|
|
<Header value="情感标签"/>
|
|
<ParagraphLabels name="label" toName="text">
|
|
<Label value="Positive" background="#00ff00"/>
|
|
<Label value="Negative" background="#ff0000"/>
|
|
</ParagraphLabels>
|
|
</View>
|
|
</View>',
|
|
'horizontal',
|
|
'audio-speech',
|
|
1,
|
|
'1.0.0'
|
|
);
|
|
|
|
-- 1.4 意图分类
|
|
INSERT INTO t_dm_annotation_templates (
|
|
id, name, description, data_type, labeling_type,
|
|
configuration, label_config, style, category, built_in, version
|
|
) VALUES (
|
|
'tpl-intent-classification-001',
|
|
'意图分类',
|
|
'进行语音活动分段并选择语音意图,适用于语音助手、智能音箱、IVR系统等场景',
|
|
'audio',
|
|
'intent-classification',
|
|
JSON_OBJECT(
|
|
'labels', JSON_ARRAY(
|
|
JSON_OBJECT('fromName', 'labels', 'toName', 'audio', 'type', 'Labels', 'labels', JSON_ARRAY('Segment')),
|
|
JSON_OBJECT('fromName', 'intent', 'toName', 'audio', 'type', 'Choices', 'options', JSON_ARRAY('Question', 'Request', 'Satisfied', 'Interested', 'Unsatisfied'), 'required', true)
|
|
),
|
|
'objects', JSON_ARRAY(JSON_OBJECT('name', 'audio', 'type', 'Audio', 'value', '$audio'))
|
|
),
|
|
'<View>
|
|
<Labels name="labels" toName="audio">
|
|
<Label value="Segment" />
|
|
</Labels>
|
|
<Audio name="audio" value="$audio"/>
|
|
<Choices name="intent" toName="audio" perRegion="true" required="true">
|
|
<Choice value="Question" />
|
|
<Choice value="Request" />
|
|
<Choice value="Satisfied" />
|
|
<Choice value="Interested" />
|
|
<Choice value="Unsatisfied" />
|
|
</Choices>
|
|
</View>',
|
|
'horizontal',
|
|
'audio-speech',
|
|
1,
|
|
'1.0.0'
|
|
);
|
|
|
|
-- 1.5 信号质量检测
|
|
INSERT INTO t_dm_annotation_templates (
|
|
id, name, description, data_type, labeling_type,
|
|
configuration, label_config, style, category, built_in, version
|
|
) VALUES (
|
|
'tpl-signal-quality-001',
|
|
'信号质量检测',
|
|
'评估音频信号质量,适用于电信、呼叫中心质检、音频制作、VoIP质量评估等场景',
|
|
'audio',
|
|
'signal-quality',
|
|
JSON_OBJECT(
|
|
'labels', JSON_ARRAY(
|
|
JSON_OBJECT('fromName', 'rating', 'toName', 'audio', 'type', 'Rating')
|
|
),
|
|
'objects', JSON_ARRAY(JSON_OBJECT('name', 'audio', 'type', 'Audio', 'value', '$audio'))
|
|
),
|
|
'<View>
|
|
<Rating name="rating" toName="audio" maxRating="10" icon="star" size="medium" />
|
|
<Audio name="audio" value="$audio"/>
|
|
</View>',
|
|
'horizontal',
|
|
'audio-speech',
|
|
1,
|
|
'1.0.0'
|
|
);
|
|
|
|
-- 1.6 声音事件检测
|
|
INSERT INTO t_dm_annotation_templates (
|
|
id, name, description, data_type, labeling_type,
|
|
configuration, label_config, style, category, built_in, version
|
|
) VALUES (
|
|
'tpl-sound-event-001',
|
|
'声音事件检测',
|
|
'选择音频片段并分类声音事件,适用于安防监控、智慧城市、环境监测、工业监测等场景',
|
|
'audio',
|
|
'sound-event-detection',
|
|
JSON_OBJECT(
|
|
'labels', JSON_ARRAY(
|
|
JSON_OBJECT('fromName', 'label', 'toName', 'audio', 'type', 'Labels', 'labels', JSON_ARRAY('Event A', 'Event B'))
|
|
),
|
|
'objects', JSON_ARRAY(JSON_OBJECT('name', 'audio', 'type', 'Audio', 'value', '$audio'))
|
|
),
|
|
'<View>
|
|
<Labels name="label" toName="audio" zoom="true" hotkey="ctrl+enter">
|
|
<Label value="Event A" background="red"/>
|
|
<Label value="Event B" background="green"/>
|
|
</Labels>
|
|
<Audio name="audio" value="$audio"/>
|
|
</View>',
|
|
'horizontal',
|
|
'audio-speech',
|
|
1,
|
|
'1.0.0'
|
|
);
|
|
|
|
-- 1.7 说话人分割
|
|
INSERT INTO t_dm_annotation_templates (
|
|
id, name, description, data_type, labeling_type,
|
|
configuration, label_config, style, category, built_in, version
|
|
) VALUES (
|
|
'tpl-speaker-segmentation-001',
|
|
'说话人分割',
|
|
'执行说话人分割/话者分离任务,适用于会议转录、播客制作、呼叫中心分析、法庭记录等场景',
|
|
'audio',
|
|
'speaker-segmentation',
|
|
JSON_OBJECT(
|
|
'labels', JSON_ARRAY(
|
|
JSON_OBJECT('fromName', 'label', 'toName', 'audio', 'type', 'Labels', 'labels', JSON_ARRAY('Speaker one', 'Speaker two'))
|
|
),
|
|
'objects', JSON_ARRAY(JSON_OBJECT('name', 'audio', 'type', 'Audio', 'value', '$audio'))
|
|
),
|
|
'<View>
|
|
<Labels name="label" toName="audio" zoom="true" hotkey="ctrl+enter">
|
|
<Label value="Speaker one" background="#00FF00"/>
|
|
<Label value="Speaker two" background="#12ad59"/>
|
|
</Labels>
|
|
<Audio name="audio" value="$audio" />
|
|
</View>',
|
|
'horizontal',
|
|
'audio-speech',
|
|
1,
|
|
'1.0.0'
|
|
);
|
|
|
|
-- =============================================
|
|
-- 2. 聊天评估 (Chat)
|
|
-- =============================================
|
|
|
|
-- 2.1 聊天机器人评估
|
|
INSERT INTO t_dm_annotation_templates (
|
|
id, name, description, data_type, labeling_type,
|
|
configuration, label_config, style, category, built_in, version
|
|
) VALUES (
|
|
'tpl-chatbot-evaluation-001',
|
|
'聊天机器人评估',
|
|
'评估聊天机器人是否已准备好投入生产,适用于对话AI评估、客服AI、虚拟助手评估等场景',
|
|
'chat',
|
|
'chatbot-evaluation',
|
|
JSON_OBJECT(
|
|
'labels', JSON_ARRAY(
|
|
JSON_OBJECT('fromName', 'accuracy', 'toName', 'chat', 'type', 'Rating'),
|
|
JSON_OBJECT('fromName', 'documentation', 'toName', 'chat', 'type', 'Choices', 'options', JSON_ARRAY('Comprehensive', 'Partial', 'Missing')),
|
|
JSON_OBJECT('fromName', 'questions_answered', 'toName', 'chat', 'type', 'Choices', 'options', JSON_ARRAY('Yes', 'No', 'Partial'))
|
|
),
|
|
'objects', JSON_ARRAY(JSON_OBJECT('name', 'chat', 'type', 'Chat', 'value', '$chat'))
|
|
),
|
|
'<View>
|
|
<Style>
|
|
.chat { border: 1px solid var(--color-neutral-border); padding: var(--spacing-tight); border-radius: var(--corner-radius-medium); }
|
|
.evaluation { border: 2px solid var(--color-accent-blueberry-base); background: var(--color-accent-blueberry-subtlest); padding: var(--spacing-tight); border-radius: var(--corner-radius-medium); }
|
|
</Style>
|
|
<View style="display: flex; gap: var(--spacing-base);">
|
|
<View className="chat" style="flex: 2;">
|
|
<Text name="instructions" value="请详细审查对话,点击助手消息提供反馈" />
|
|
<Chat name="chat" value="$chat" minMessages="2" editable="false" />
|
|
</View>
|
|
<View style="flex: 1;" className="evaluation">
|
|
<View visibleWhen="region-selected" whenRole="assistant">
|
|
<Header value="响应准确度"/>
|
|
<Rating name="accuracy" toName="chat" perRegion="true" maxRating="5" icon="star"/>
|
|
<Header value="文档提供"/>
|
|
<Choices name="documentation" toName="chat" perRegion="true">
|
|
<Choice value="Comprehensive documentation provided" />
|
|
<Choice value="Provided some documentation" />
|
|
<Choice value="Missing documentation" />
|
|
</Choices>
|
|
<Header value="是否回答问题"/>
|
|
<Choices name="questions_answered" toName="chat" perRegion="true">
|
|
<Choice value="Yes" />
|
|
<Choice value="No" />
|
|
<Choice value="Partial" />
|
|
</Choices>
|
|
</View>
|
|
</View>
|
|
</View>
|
|
</View>',
|
|
'horizontal',
|
|
'chat',
|
|
1,
|
|
'1.0.0'
|
|
);
|
|
|
|
-- 2.2 RLHF评估
|
|
INSERT INTO t_dm_annotation_templates (
|
|
id, name, description, data_type, labeling_type,
|
|
configuration, label_config, style, category, built_in, version
|
|
) VALUES (
|
|
'tpl-rlhf-evaluation-001',
|
|
'RLHF对话评估',
|
|
'将生产对话导入进行评估,了解Agent成功或失败的原因,适用于RLHF数据收集、偏好学习等场景',
|
|
'chat',
|
|
'rlhf-evaluation',
|
|
JSON_OBJECT(
|
|
'labels', JSON_ARRAY(
|
|
JSON_OBJECT('fromName', 'rating', 'toName', 'chat', 'type', 'Rating'),
|
|
JSON_OBJECT('fromName', 'relevance', 'toName', 'chat', 'type', 'Choices', 'options', JSON_ARRAY('Highly relevant', 'Somewhat relevant', 'Not relevant')),
|
|
JSON_OBJECT('fromName', 'correctness', 'toName', 'chat', 'type', 'Choices', 'options', JSON_ARRAY('Accurate', 'Mostly accurate', 'Contains errors'))
|
|
),
|
|
'objects', JSON_ARRAY(JSON_OBJECT('name', 'chat', 'type', 'Chat', 'value', '$chat'))
|
|
),
|
|
'<View>
|
|
<Style>
|
|
.chat { border: 1px solid var(--color-neutral-border); padding: var(--spacing-tight); border-radius: var(--corner-radius-medium); }
|
|
.evaluation { border: 2px solid var(--color-accent-canteloupe-base); background: var(--color-accent-canteloupe-subtlest); padding: var(--spacing-tight); border-radius: var(--corner-radius-medium); }
|
|
</Style>
|
|
<View style="display: flex; gap: var(--spacing-wide);">
|
|
<View className="chat" style="flex: 2;">
|
|
<Text name="instructions" value="详细审查对话,点击消息提供反馈" />
|
|
<Chat name="chat" value="$chat" minMessages="2" editable="false" />
|
|
</View>
|
|
<View style="flex: 1;" className="evaluation">
|
|
<View className="overall-chat">
|
|
<Header size="4">对话整体质量</Header>
|
|
<Rating name="rating" toName="chat" />
|
|
<TextArea name="conversation_comment" toName="chat" placeholder="添加评论(可选)"/>
|
|
</View>
|
|
<View visibleWhen="region-selected" whenRole="assistant">
|
|
<Header value="响应准确度"/>
|
|
<Rating name="assistant_response_accuracy" toName="chat" perRegion="true" maxRating="5"/>
|
|
<Header value="相关性"/>
|
|
<Choices name="relevance" toName="chat" perRegion="true">
|
|
<Choice value="Highly relevant" />
|
|
<Choice value="Somewhat relevant" />
|
|
<Choice value="Not relevant" />
|
|
</Choices>
|
|
<Header value="正确性"/>
|
|
<Choices name="correctness" toName="chat" perRegion="true">
|
|
<Choice value="Accurate and factual" />
|
|
<Choice value="Mostly accurate" />
|
|
<Choice value="Contains errors" />
|
|
</Choices>
|
|
</View>
|
|
</View>
|
|
</View>
|
|
</View>',
|
|
'horizontal',
|
|
'chat',
|
|
1,
|
|
'1.0.0'
|
|
);
|
|
|
|
-- 2.3 红队测试
|
|
INSERT INTO t_dm_annotation_templates (
|
|
id, name, description, data_type, labeling_type,
|
|
configuration, label_config, style, category, built_in, version
|
|
) VALUES (
|
|
'tpl-red-teaming-001',
|
|
'红队测试',
|
|
'通过结构化红队测试对GenAI Agent进行压力测试,适用于AI安全测试、对抗评估、越狱检测等场景',
|
|
'chat',
|
|
'red-teaming',
|
|
JSON_OBJECT(
|
|
'labels', JSON_ARRAY(
|
|
JSON_OBJECT('fromName', 'user_risk', 'toName', 'chat', 'type', 'Choices', 'options', JSON_ARRAY('Benign', 'Borderline', 'Risky', 'Clear risk')),
|
|
JSON_OBJECT('fromName', 'assistant_outcome', 'toName', 'chat', 'type', 'Choices', 'options', JSON_ARRAY('Compliant', 'Safe-refusal', 'Policy Violation', 'Over refusal'))
|
|
),
|
|
'objects', JSON_ARRAY(JSON_OBJECT('name', 'chat', 'type', 'Chat', 'value', '$chat'))
|
|
),
|
|
'<View>
|
|
<Style>
|
|
.chat { border: 1px solid var(--color-neutral-border); padding: var(--spacing-tight); border-radius: var(--corner-radius-medium); }
|
|
.evaluation { border: 2px solid var(--color-accent-kale-base); background: var(--color-accent-kale-subtlest); padding: var(--spacing-tight); border-radius: var(--corner-radius-medium); }
|
|
</Style>
|
|
<View style="display: flex; gap: var(--spacing-wide);">
|
|
<View className="chat" style="flex: 2;">
|
|
<Text name="instructions" value="$text" />
|
|
<Chat name="chat" value="$chat" llm="openai/gpt-4.1-nano" minMessages="2" editable="false" />
|
|
</View>
|
|
<View style="flex: 1;" className="evaluation">
|
|
<View visibleWhen="region-selected" whenRole="user">
|
|
<Header value="危害/安全风险"/>
|
|
<Choices name="user_risk" toName="chat" choice="single" perRegion="true">
|
|
<Choice value="0 - Benign"/>
|
|
<Choice value="1 - Borderline"/>
|
|
<Choice value="2 - Risky"/>
|
|
<Choice value="3 - Clear risk"/>
|
|
</Choices>
|
|
<Header value="越狱策略"/>
|
|
<Choices name="user_jailbreak_tactic" toName="chat" choice="multiple" perRegion="true">
|
|
<Choice value="Role play"/>
|
|
<Choice value="Indirect prompt"/>
|
|
<Choice value="Obfuscated"/>
|
|
</Choices>
|
|
</View>
|
|
<View visibleWhen="region-selected" whenRole="assistant">
|
|
<Header value="结果"/>
|
|
<Choices name="assistant_outcome" toName="chat" choice="single" perRegion="true">
|
|
<Choice value="Compliant"/>
|
|
<Choice value="Safe-refusal"/>
|
|
<Choice value="Policy Violation"/>
|
|
<Choice value="Over refusal"/>
|
|
</Choices>
|
|
</View>
|
|
</View>
|
|
</View>
|
|
</View>',
|
|
'horizontal',
|
|
'chat',
|
|
1,
|
|
'1.0.0'
|
|
);
|
|
|
|
-- =============================================
|
|
-- 3. 计算机视觉 (Computer Vision)
|
|
-- =============================================
|
|
|
|
-- 3.1 图像描述
|
|
INSERT INTO t_dm_annotation_templates (
|
|
id, name, description, data_type, labeling_type,
|
|
configuration, label_config, style, category, built_in, version
|
|
) VALUES (
|
|
'tpl-image-captioning-001',
|
|
'图像描述',
|
|
'编写描述图像的文本,适用于无障碍工具、视觉搜索、内容管理、电商产品描述等场景。关联模型:BLIP、CLIP、GPT-4V',
|
|
'image',
|
|
'image-captioning',
|
|
JSON_OBJECT(
|
|
'labels', JSON_ARRAY(
|
|
JSON_OBJECT('fromName', 'caption', 'toName', 'image', 'type', 'TextArea', 'required', true)
|
|
),
|
|
'objects', JSON_ARRAY(JSON_OBJECT('name', 'image', 'type', 'Image', 'value', '$image'))
|
|
),
|
|
'<View>
|
|
<Image name="image" value="$captioning"/>
|
|
<Header value="描述图像内容:"/>
|
|
<TextArea name="caption" toName="image" placeholder="在此输入描述..."
|
|
rows="5" maxSubmissions="1"/>
|
|
</View>',
|
|
'horizontal',
|
|
'computer-vision',
|
|
1,
|
|
'1.0.0'
|
|
);
|
|
|
|
-- 3.2 图像分类
|
|
INSERT INTO t_dm_annotation_templates (
|
|
id, name, description, data_type, labeling_type,
|
|
configuration, label_config, style, category, built_in, version
|
|
) VALUES (
|
|
'tpl-image-classification-001',
|
|
'图像分类',
|
|
'对图像进行分类,适用于内容审核、安全检测、社交媒体审核等场景。关联模型:ResNet、EfficientNet、Vision Transformer',
|
|
'image',
|
|
'image-classification',
|
|
JSON_OBJECT(
|
|
'labels', JSON_ARRAY(
|
|
JSON_OBJECT('fromName', 'choice', 'toName', 'image', 'type', 'Choices', 'options', JSON_ARRAY('Adult content', 'Weapons', 'Violence'), 'required', true)
|
|
),
|
|
'objects', JSON_ARRAY(JSON_OBJECT('name', 'image', 'type', 'Image', 'value', '$image'))
|
|
),
|
|
'<View>
|
|
<Image name="image" value="$image"/>
|
|
<Choices name="choice" toName="image">
|
|
<Choice value="Adult content"/>
|
|
<Choice value="Weapons" />
|
|
<Choice value="Violence" />
|
|
</Choices>
|
|
</View>',
|
|
'horizontal',
|
|
'computer-vision',
|
|
1,
|
|
'1.0.0'
|
|
);
|
|
|
|
-- 3.3 目标检测(边界框)
|
|
INSERT INTO t_dm_annotation_templates (
|
|
id, name, description, data_type, labeling_type,
|
|
configuration, label_config, style, category, built_in, version
|
|
) VALUES (
|
|
'tpl-object-detection-001',
|
|
'目标检测(边界框)',
|
|
'在目标周围绘制边界框,适用于自动驾驶、交通监控、安防监控、零售分析等场景。关联模型:YOLO、R-CNN、SSD',
|
|
'image',
|
|
'object-detection',
|
|
JSON_OBJECT(
|
|
'labels', JSON_ARRAY(
|
|
JSON_OBJECT('fromName', 'label', 'toName', 'image', 'type', 'RectangleLabels', 'labels', JSON_ARRAY('Airplane', 'Car'))
|
|
),
|
|
'objects', JSON_ARRAY(JSON_OBJECT('name', 'image', 'type', 'Image', 'value', '$image'))
|
|
),
|
|
'<View>
|
|
<Image name="image" value="$image"/>
|
|
<RectangleLabels name="label" toName="image">
|
|
<Label value="Airplane" background="green"/>
|
|
<Label value="Car" background="blue"/>
|
|
</RectangleLabels>
|
|
</View>',
|
|
'horizontal',
|
|
'computer-vision',
|
|
1,
|
|
'1.0.0'
|
|
);
|
|
|
|
-- 3.4 语义分割(掩码)
|
|
INSERT INTO t_dm_annotation_templates (
|
|
id, name, description, data_type, labeling_type,
|
|
configuration, label_config, style, category, built_in, version
|
|
) VALUES (
|
|
'tpl-semantic-segmentation-mask-001',
|
|
'语义分割(掩码)',
|
|
'使用画笔工具在目标周围绘制掩码,适用于自动驾驶、医学图像分析、卫星图像分析等场景。关联模型:U-Net、DeepLab、Mask R-CNN',
|
|
'image',
|
|
'semantic-segmentation-mask',
|
|
JSON_OBJECT(
|
|
'labels', JSON_ARRAY(
|
|
JSON_OBJECT('fromName', 'tag', 'toName', 'image', 'type', 'BrushLabels', 'labels', JSON_ARRAY('Airplane', 'Car'))
|
|
),
|
|
'objects', JSON_ARRAY(JSON_OBJECT('name', 'image', 'type', 'Image', 'value', '$image'))
|
|
),
|
|
'<View>
|
|
<Image name="image" value="$image" zoom="true"/>
|
|
<BrushLabels name="tag" toName="image">
|
|
<Label value="Airplane" background="rgba(255, 0, 0, 0.7)"/>
|
|
<Label value="Car" background="rgba(0, 0, 255, 0.7)"/>
|
|
</BrushLabels>
|
|
</View>',
|
|
'horizontal',
|
|
'computer-vision',
|
|
1,
|
|
'1.0.0'
|
|
);
|
|
|
|
-- 3.5 语义分割(多边形)
|
|
INSERT INTO t_dm_annotation_templates (
|
|
id, name, description, data_type, labeling_type,
|
|
configuration, label_config, style, category, built_in, version
|
|
) VALUES (
|
|
'tpl-semantic-segmentation-polygon-001',
|
|
'语义分割(多边形)',
|
|
'在目标周围绘制多边形,适用于自动驾驶、医学图像、卫星图像、精准农业等场景。关联模型:DeepLab、PSPNet、U-Net',
|
|
'image',
|
|
'semantic-segmentation-polygon',
|
|
JSON_OBJECT(
|
|
'labels', JSON_ARRAY(
|
|
JSON_OBJECT('fromName', 'label', 'toName', 'image', 'type', 'PolygonLabels', 'labels', JSON_ARRAY('Airplane', 'Car'))
|
|
),
|
|
'objects', JSON_ARRAY(JSON_OBJECT('name', 'image', 'type', 'Image', 'value', '$image'))
|
|
),
|
|
'<View>
|
|
<Header value="选择标签并点击图像开始"/>
|
|
<Image name="image" value="$image" zoom="true"/>
|
|
<PolygonLabels name="label" toName="image" strokeWidth="3" pointSize="small" opacity="0.9">
|
|
<Label value="Airplane" background="red"/>
|
|
<Label value="Car" background="blue"/>
|
|
</PolygonLabels>
|
|
</View>',
|
|
'horizontal',
|
|
'computer-vision',
|
|
1,
|
|
'1.0.0'
|
|
);
|
|
|
|
-- 3.6 关键点标注
|
|
INSERT INTO t_dm_annotation_templates (
|
|
id, name, description, data_type, labeling_type,
|
|
configuration, label_config, style, category, built_in, version
|
|
) VALUES (
|
|
'tpl-keypoint-labeling-001',
|
|
'关键点标注',
|
|
'添加带标签的关键点,适用于人体姿态估计、面部特征点检测、运动分析等场景。关联模型:OpenPose、MediaPipe、PoseNet',
|
|
'image',
|
|
'keypoint-labeling',
|
|
JSON_OBJECT(
|
|
'labels', JSON_ARRAY(
|
|
JSON_OBJECT('fromName', 'kp', 'toName', 'img', 'type', 'KeyPointLabels', 'labels', JSON_ARRAY('Face', 'Nose'))
|
|
),
|
|
'objects', JSON_ARRAY(JSON_OBJECT('name', 'img', 'type', 'Image', 'value', '$img'))
|
|
),
|
|
'<View>
|
|
<KeyPointLabels name="kp-1" toName="img-1">
|
|
<Label value="Face" background="red" />
|
|
<Label value="Nose" background="green" />
|
|
</KeyPointLabels>
|
|
<Image name="img-1" value="$img" />
|
|
</View>',
|
|
'horizontal',
|
|
'computer-vision',
|
|
1,
|
|
'1.0.0'
|
|
);
|
|
|
|
-- 3.7 OCR识别
|
|
INSERT INTO t_dm_annotation_templates (
|
|
id, name, description, data_type, labeling_type,
|
|
configuration, label_config, style, category, built_in, version
|
|
) VALUES (
|
|
'tpl-ocr-001',
|
|
'OCR识别',
|
|
'在区域周围绘制边界框或多边形并写下其中的文本,适用于文档数字化、发票处理、车牌识别等场景。关联模型:Tesseract、PaddleOCR、EasyOCR',
|
|
'image',
|
|
'ocr',
|
|
JSON_OBJECT(
|
|
'labels', JSON_ARRAY(
|
|
JSON_OBJECT('fromName', 'label', 'toName', 'image', 'type', 'Labels', 'labels', JSON_ARRAY('Text', 'Handwriting')),
|
|
JSON_OBJECT('fromName', 'transcription', 'toName', 'image', 'type', 'TextArea', 'required', true)
|
|
),
|
|
'objects', JSON_ARRAY(JSON_OBJECT('name', 'image', 'type', 'Image', 'value', '$ocr'))
|
|
),
|
|
'<View>
|
|
<Image name="image" value="$ocr"/>
|
|
<Labels name="label" toName="image">
|
|
<Label value="Text" background="green"/>
|
|
<Label value="Handwriting" background="blue"/>
|
|
</Labels>
|
|
<Rectangle name="bbox" toName="image" strokeWidth="3"/>
|
|
<Polygon name="poly" toName="image" strokeWidth="3"/>
|
|
<TextArea name="transcription" toName="image"
|
|
editable="true" perRegion="true" required="true"
|
|
maxSubmissions="1" rows="5" placeholder="识别的文本"
|
|
displayMode="region-list"/>
|
|
</View>',
|
|
'horizontal',
|
|
'computer-vision',
|
|
1,
|
|
'1.0.0'
|
|
);
|
|
|
|
-- 3.8 视觉问答
|
|
INSERT INTO t_dm_annotation_templates (
|
|
id, name, description, data_type, labeling_type,
|
|
configuration, label_config, style, category, built_in, version
|
|
) VALUES (
|
|
'tpl-vqa-001',
|
|
'视觉问答',
|
|
'回答与图像相关的问题,适用于无障碍工具、教育评估、自动驾驶、医学图像分析等场景。关联模型:ViLBERT、BLIP、GPT-4V',
|
|
'image',
|
|
'vqa',
|
|
JSON_OBJECT(
|
|
'labels', JSON_ARRAY(
|
|
JSON_OBJECT('fromName', 'answer1', 'toName', 'q1', 'type', 'TextArea', 'required', true),
|
|
JSON_OBJECT('fromName', 'answer2', 'toName', 'q2', 'type', 'TextArea', 'required', true)
|
|
),
|
|
'objects', JSON_ARRAY(
|
|
JSON_OBJECT('name', 'image', 'type', 'Image', 'value', '$image'),
|
|
JSON_OBJECT('name', 'q1', 'type', 'Text', 'value', '$q1'),
|
|
JSON_OBJECT('name', 'q2', 'type', 'Text', 'value', '$q2')
|
|
)
|
|
),
|
|
'<View>
|
|
<Image name="image" value="$image"/>
|
|
<Header value="请回答以下问题:"/>
|
|
<View style="display: grid; grid-template-columns: 1fr 10fr 1fr 3fr; column-gap: 1em">
|
|
<Header value="Q1:"/>
|
|
<Text name="q1" value="$q1"/>
|
|
<Header value="A1:"/>
|
|
<TextArea name="answer1" toName="q1" rows="1" maxSubmissions="1"/>
|
|
</View>
|
|
<View style="display: grid; grid-template-columns: 1fr 10fr 1fr 3fr; column-gap: 1em">
|
|
<Header value="Q2:"/>
|
|
<Text name="q2" value="$q2"/>
|
|
<Header value="A2:"/>
|
|
<TextArea name="answer2" toName="q2" rows="1" maxSubmissions="1"/>
|
|
</View>
|
|
</View>',
|
|
'horizontal',
|
|
'computer-vision',
|
|
1,
|
|
'1.0.0'
|
|
);
|
|
|
|
-- =============================================
|
|
-- 4. 自然语言处理 (NLP)
|
|
-- =============================================
|
|
|
|
-- 4.1 文本分类
|
|
INSERT INTO t_dm_annotation_templates (
|
|
id, name, description, data_type, labeling_type,
|
|
configuration, label_config, style, category, built_in, version
|
|
) VALUES (
|
|
'tpl-text-classification-001',
|
|
'文本分类',
|
|
'对文本进行分类,适用于情感分析、主题分类、垃圾邮件检测等场景。关联模型:BERT、RoBERTa、DistilBERT',
|
|
'text',
|
|
'text-classification',
|
|
JSON_OBJECT(
|
|
'labels', JSON_ARRAY(
|
|
JSON_OBJECT('fromName', 'sentiment', 'toName', 'text', 'type', 'Choices', 'options', JSON_ARRAY('Positive', 'Negative', 'Neutral'), 'required', true)
|
|
),
|
|
'objects', JSON_ARRAY(JSON_OBJECT('name', 'text', 'type', 'Text', 'value', '$text'))
|
|
),
|
|
'<View>
|
|
<Text name="text" value="$text"/>
|
|
<Choices name="sentiment" toName="text" choice="single-radio" showInLine="true">
|
|
<Choice value="Positive"/>
|
|
<Choice value="Negative"/>
|
|
<Choice value="Neutral"/>
|
|
</Choices>
|
|
</View>',
|
|
'vertical',
|
|
'nlp',
|
|
1,
|
|
'1.0.0'
|
|
);
|
|
|
|
-- 4.2 命名实体识别
|
|
INSERT INTO t_dm_annotation_templates (
|
|
id, name, description, data_type, labeling_type,
|
|
configuration, label_config, style, category, built_in, version
|
|
) VALUES (
|
|
'tpl-ner-001',
|
|
'命名实体识别',
|
|
'在文本中标注命名实体,适用于信息抽取、知识图谱构建、智能问答等场景。关联模型:BERT-NER、SpaCy、Flair',
|
|
'text',
|
|
'ner',
|
|
JSON_OBJECT(
|
|
'labels', JSON_ARRAY(
|
|
JSON_OBJECT('fromName', 'label', 'toName', 'text', 'type', 'Labels', 'labels', JSON_ARRAY('PER', 'ORG', 'LOC', 'MISC'))
|
|
),
|
|
'objects', JSON_ARRAY(JSON_OBJECT('name', 'text', 'type', 'Text', 'value', '$text'))
|
|
),
|
|
'<View>
|
|
<Labels name="label" toName="text">
|
|
<Label value="PER" background="red"/>
|
|
<Label value="ORG" background="darkorange"/>
|
|
<Label value="LOC" background="orange"/>
|
|
<Label value="MISC" background="green"/>
|
|
</Labels>
|
|
<Text name="text" value="$text"/>
|
|
</View>',
|
|
'vertical',
|
|
'nlp',
|
|
1,
|
|
'1.0.0'
|
|
);
|
|
|
|
-- 4.3 关系抽取
|
|
INSERT INTO t_dm_annotation_templates (
|
|
id, name, description, data_type, labeling_type,
|
|
configuration, label_config, style, category, built_in, version
|
|
) VALUES (
|
|
'tpl-relation-extraction-001',
|
|
'关系抽取',
|
|
'标注实体间的关系,适用于知识图谱构建、信息抽取等场景。关联模型:BERT、GPT、OpenIE',
|
|
'text',
|
|
'relation-extraction',
|
|
JSON_OBJECT(
|
|
'labels', JSON_ARRAY(
|
|
JSON_OBJECT('fromName', 'label', 'toName', 'text', 'type', 'Labels', 'labels', JSON_ARRAY('PER', 'ORG', 'LOC')),
|
|
JSON_OBJECT('fromName', 'relation', 'toName', 'text', 'type', 'Relations')
|
|
),
|
|
'objects', JSON_ARRAY(JSON_OBJECT('name', 'text', 'type', 'Text', 'value', '$text'))
|
|
),
|
|
'<View>
|
|
<Relations>
|
|
<Relation value="works_for"/>
|
|
<Relation value="lives_in"/>
|
|
<Relation value="located_in"/>
|
|
</Relations>
|
|
<Labels name="label" toName="text">
|
|
<Label value="PER" background="red"/>
|
|
<Label value="ORG" background="darkorange"/>
|
|
<Label value="LOC" background="orange"/>
|
|
</Labels>
|
|
<Text name="text" value="$text"/>
|
|
</View>',
|
|
'vertical',
|
|
'nlp',
|
|
1,
|
|
'1.0.0'
|
|
);
|
|
|
|
-- 4.4 机器翻译
|
|
INSERT INTO t_dm_annotation_templates (
|
|
id, name, description, data_type, labeling_type,
|
|
configuration, label_config, style, category, built_in, version
|
|
) VALUES (
|
|
'tpl-machine-translation-001',
|
|
'机器翻译',
|
|
'翻译文本内容,适用于翻译质量评估、机器翻译后编辑等场景',
|
|
'text',
|
|
'machine-translation',
|
|
JSON_OBJECT(
|
|
'labels', JSON_ARRAY(
|
|
JSON_OBJECT('fromName', 'translation', 'toName', 'text', 'type', 'TextArea', 'required', true)
|
|
),
|
|
'objects', JSON_ARRAY(JSON_OBJECT('name', 'text', 'type', 'Text', 'value', '$text'))
|
|
),
|
|
'<View>
|
|
<View style="display: grid; grid-template-columns: 1fr 1fr; grid-column-gap: 1em">
|
|
<View>
|
|
<Header value="原文"/>
|
|
<Text name="text" value="$text"/>
|
|
</View>
|
|
<View>
|
|
<Header value="翻译"/>
|
|
<TextArea name="translation" toName="text" rows="5" editable="true" maxSubmissions="1"/>
|
|
</View>
|
|
</View>
|
|
</View>',
|
|
'vertical',
|
|
'nlp',
|
|
1,
|
|
'1.0.0'
|
|
);
|
|
|
|
-- 4.5 文本摘要
|
|
INSERT INTO t_dm_annotation_templates (
|
|
id, name, description, data_type, labeling_type,
|
|
configuration, label_config, style, category, built_in, version
|
|
) VALUES (
|
|
'tpl-text-summarization-001',
|
|
'文本摘要',
|
|
'编写文本摘要,适用于新闻摘要、文档摘要、会议纪要等场景',
|
|
'text',
|
|
'text-summarization',
|
|
JSON_OBJECT(
|
|
'labels', JSON_ARRAY(
|
|
JSON_OBJECT('fromName', 'summary', 'toName', 'text', 'type', 'TextArea', 'required', true)
|
|
),
|
|
'objects', JSON_ARRAY(JSON_OBJECT('name', 'text', 'type', 'Text', 'value', '$text'))
|
|
),
|
|
'<View>
|
|
<Text name="text" value="$text"/>
|
|
<Header value="编写摘要"/>
|
|
<TextArea name="summary" toName="text" rows="4" editable="true" maxSubmissions="1"/>
|
|
</View>',
|
|
'vertical',
|
|
'nlp',
|
|
1,
|
|
'1.0.0'
|
|
);
|
|
|
|
-- 4.6 问答
|
|
INSERT INTO t_dm_annotation_templates (
|
|
id, name, description, data_type, labeling_type,
|
|
configuration, label_config, style, category, built_in, version
|
|
) VALUES (
|
|
'tpl-question-answering-001',
|
|
'问答',
|
|
'基于上下文回答问题或标注答案,适用于阅读理解、智能客服等场景',
|
|
'text',
|
|
'question-answering',
|
|
JSON_OBJECT(
|
|
'labels', JSON_ARRAY(
|
|
JSON_OBJECT('fromName', 'answer', 'toName', 'text', 'type', 'Labels', 'labels', JSON_ARRAY('Answer'))
|
|
),
|
|
'objects', JSON_ARRAY(
|
|
JSON_OBJECT('name', 'text', 'type', 'Text', 'value', '$text'),
|
|
JSON_OBJECT('name', 'question', 'type', 'Text', 'value', '$question')
|
|
)
|
|
),
|
|
'<View>
|
|
<Text name="question" value="$question"/>
|
|
<Labels name="answer" toName="text">
|
|
<Label value="Answer" background="green"/>
|
|
</Labels>
|
|
<Text name="text" value="$text"/>
|
|
</View>',
|
|
'vertical',
|
|
'nlp',
|
|
1,
|
|
'1.0.0'
|
|
);
|
|
|
|
-- 4.7 层级分类
|
|
INSERT INTO t_dm_annotation_templates (
|
|
id, name, description, data_type, labeling_type,
|
|
configuration, label_config, style, category, built_in, version
|
|
) VALUES (
|
|
'tpl-taxonomy-001',
|
|
'层级分类',
|
|
'使用层级分类法对文本进行分类,适用于电商商品分类、文档归档等场景',
|
|
'text',
|
|
'taxonomy',
|
|
JSON_OBJECT(
|
|
'labels', JSON_ARRAY(
|
|
JSON_OBJECT('fromName', 'taxonomy', 'toName', 'text', 'type', 'Taxonomy')
|
|
),
|
|
'objects', JSON_ARRAY(JSON_OBJECT('name', 'text', 'type', 'Text', 'value', '$text'))
|
|
),
|
|
'<View>
|
|
<Text name="text" value="$text"/>
|
|
<Taxonomy name="taxonomy" toName="text">
|
|
<Choice value="Archaea">
|
|
<Choice value="Euryarchaeota"/>
|
|
<Choice value="Crenarchaeota"/>
|
|
</Choice>
|
|
<Choice value="Bacteria">
|
|
<Choice value="Actinobacteria"/>
|
|
<Choice value="Proteobacteria"/>
|
|
</Choice>
|
|
<Choice value="Eukarya">
|
|
<Choice value="Animalia">
|
|
<Choice value="Chordata">
|
|
<Choice value="Mammalia"/>
|
|
<Choice value="Aves"/>
|
|
</Choice>
|
|
</Choice>
|
|
<Choice value="Plantae"/>
|
|
</Choice>
|
|
</Taxonomy>
|
|
</View>',
|
|
'vertical',
|
|
'nlp',
|
|
1,
|
|
'1.0.0'
|
|
);
|
|
|
|
-- =============================================
|
|
-- 5. 生成式AI (Generative AI)
|
|
-- =============================================
|
|
|
|
-- 5.1 RLHF人类偏好
|
|
INSERT INTO t_dm_annotation_templates (
|
|
id, name, description, data_type, labeling_type,
|
|
configuration, label_config, style, category, built_in, version
|
|
) VALUES (
|
|
'tpl-rlhf-preference-001',
|
|
'RLHF人类偏好',
|
|
'收集人类偏好用于RLHF训练,对两个LLM响应进行排名',
|
|
'text',
|
|
'rlhf-preference',
|
|
JSON_OBJECT(
|
|
'labels', JSON_ARRAY(
|
|
JSON_OBJECT('fromName', 'preference', 'toName', 'prompt', 'type', 'Pairwise')
|
|
),
|
|
'objects', JSON_ARRAY(
|
|
JSON_OBJECT('name', 'prompt', 'type', 'Text', 'value', '$prompt'),
|
|
JSON_OBJECT('name', 'response1', 'type', 'Text', 'value', '$response1'),
|
|
JSON_OBJECT('name', 'response2', 'type', 'Text', 'value', '$response2')
|
|
)
|
|
),
|
|
'<View>
|
|
<Style>
|
|
.prompt-box { background: #f5f5f5; padding: 15px; border-radius: 8px; margin-bottom: 20px; }
|
|
.response-box { border: 1px solid #ddd; padding: 15px; border-radius: 8px; }
|
|
</Style>
|
|
<View className="prompt-box">
|
|
<Header value="Prompt"/>
|
|
<Text name="prompt" value="$prompt"/>
|
|
</View>
|
|
<Pairwise name="preference" toName="prompt" leftText="Response A" rightText="Response B">
|
|
<View className="response-box">
|
|
<Header value="Response A"/>
|
|
<Text name="response1" value="$response1"/>
|
|
</View>
|
|
<View className="response-box">
|
|
<Header value="Response B"/>
|
|
<Text name="response2" value="$response2"/>
|
|
</View>
|
|
</Pairwise>
|
|
</View>',
|
|
'horizontal',
|
|
'generative-ai',
|
|
1,
|
|
'1.0.0'
|
|
);
|
|
|
|
-- 5.2 LLM响应评分
|
|
INSERT INTO t_dm_annotation_templates (
|
|
id, name, description, data_type, labeling_type,
|
|
configuration, label_config, style, category, built_in, version
|
|
) VALUES (
|
|
'tpl-llm-grading-001',
|
|
'LLM响应评分',
|
|
'对LLM生成的响应进行多维度评分',
|
|
'text',
|
|
'llm-grading',
|
|
JSON_OBJECT(
|
|
'labels', JSON_ARRAY(
|
|
JSON_OBJECT('fromName', 'relevance', 'toName', 'response', 'type', 'Rating'),
|
|
JSON_OBJECT('fromName', 'coherence', 'toName', 'response', 'type', 'Rating'),
|
|
JSON_OBJECT('fromName', 'fluency', 'toName', 'response', 'type', 'Rating')
|
|
),
|
|
'objects', JSON_ARRAY(
|
|
JSON_OBJECT('name', 'prompt', 'type', 'Text', 'value', '$prompt'),
|
|
JSON_OBJECT('name', 'response', 'type', 'Text', 'value', '$response')
|
|
)
|
|
),
|
|
'<View>
|
|
<Header value="Prompt"/>
|
|
<Text name="prompt" value="$prompt"/>
|
|
<Header value="Response"/>
|
|
<Text name="response" value="$response"/>
|
|
<Header value="相关性"/>
|
|
<Rating name="relevance" toName="response" maxRating="5"/>
|
|
<Header value="连贯性"/>
|
|
<Rating name="coherence" toName="response" maxRating="5"/>
|
|
<Header value="流畅度"/>
|
|
<Rating name="fluency" toName="response" maxRating="5"/>
|
|
</View>',
|
|
'vertical',
|
|
'generative-ai',
|
|
1,
|
|
'1.0.0'
|
|
);
|
|
|
|
-- 5.3 监督微调
|
|
INSERT INTO t_dm_annotation_templates (
|
|
id, name, description, data_type, labeling_type,
|
|
configuration, label_config, style, category, built_in, version
|
|
) VALUES (
|
|
'tpl-sft-001',
|
|
'监督微调',
|
|
'为LLM监督微调收集高质量指令-响应对',
|
|
'text',
|
|
'sft',
|
|
JSON_OBJECT(
|
|
'labels', JSON_ARRAY(
|
|
JSON_OBJECT('fromName', 'response', 'toName', 'instruction', 'type', 'TextArea', 'required', true)
|
|
),
|
|
'objects', JSON_ARRAY(JSON_OBJECT('name', 'instruction', 'type', 'Text', 'value', '$instruction'))
|
|
),
|
|
'<View>
|
|
<Header value="指令"/>
|
|
<Text name="instruction" value="$instruction"/>
|
|
<Header value="编写高质量响应"/>
|
|
<TextArea name="response" toName="instruction" rows="6" editable="true" maxSubmissions="1"/>
|
|
</View>',
|
|
'vertical',
|
|
'generative-ai',
|
|
1,
|
|
'1.0.0'
|
|
);
|
|
|
|
-- =============================================
|
|
-- 6. 排名与评分 (Ranking & Scoring)
|
|
-- =============================================
|
|
|
|
-- 6.1 成对比较
|
|
INSERT INTO t_dm_annotation_templates (
|
|
id, name, description, data_type, labeling_type,
|
|
configuration, label_config, style, category, built_in, version
|
|
) VALUES (
|
|
'tpl-pairwise-classification-001',
|
|
'成对分类',
|
|
'对两个项目进行成对比较分类',
|
|
'text',
|
|
'pairwise-classification',
|
|
JSON_OBJECT(
|
|
'labels', JSON_ARRAY(
|
|
JSON_OBJECT('fromName', 'comparison', 'toName', 'text', 'type', 'Pairwise')
|
|
),
|
|
'objects', JSON_ARRAY(
|
|
JSON_OBJECT('name', 'text', 'type', 'Text', 'value', '$prompt'),
|
|
JSON_OBJECT('name', 'option1', 'type', 'Text', 'value', '$option1'),
|
|
JSON_OBJECT('name', 'option2', 'type', 'Text', 'value', '$option2')
|
|
)
|
|
),
|
|
'<View>
|
|
<Text name="text" value="$prompt"/>
|
|
<Pairwise name="comparison" toName="text" leftText="选项A" rightText="选项B">
|
|
<Text name="option1" value="$option1"/>
|
|
<Text name="option2" value="$option2"/>
|
|
</Pairwise>
|
|
</View>',
|
|
'horizontal',
|
|
'ranking-scoring',
|
|
1,
|
|
'1.0.0'
|
|
);
|
|
|
|
-- 6.2 搜索排名
|
|
INSERT INTO t_dm_annotation_templates (
|
|
id, name, description, data_type, labeling_type,
|
|
configuration, label_config, style, category, built_in, version
|
|
) VALUES (
|
|
'tpl-serp-ranking-001',
|
|
'搜索排名',
|
|
'对搜索结果进行相关性排名,适用于搜索引擎优化、信息检索等场景',
|
|
'text',
|
|
'serp-ranking',
|
|
JSON_OBJECT(
|
|
'labels', JSON_ARRAY(
|
|
JSON_OBJECT('fromName', 'relevance', 'toName', 'result', 'type', 'Rating')
|
|
),
|
|
'objects', JSON_ARRAY(
|
|
JSON_OBJECT('name', 'query', 'type', 'Text', 'value', '$query'),
|
|
JSON_OBJECT('name', 'result', 'type', 'Text', 'value', '$result')
|
|
)
|
|
),
|
|
'<View>
|
|
<Header value="搜索查询"/>
|
|
<Text name="query" value="$query"/>
|
|
<Header value="搜索结果"/>
|
|
<Text name="result" value="$result"/>
|
|
<Header value="相关性评分"/>
|
|
<Rating name="relevance" toName="result" maxRating="5"/>
|
|
</View>',
|
|
'vertical',
|
|
'ranking-scoring',
|
|
1,
|
|
'1.0.0'
|
|
);
|
|
|
|
-- =============================================
|
|
-- 7. 结构化数据 (Structured Data)
|
|
-- =============================================
|
|
|
|
-- 7.1 PDF分类
|
|
INSERT INTO t_dm_annotation_templates (
|
|
id, name, description, data_type, labeling_type,
|
|
configuration, label_config, style, category, built_in, version
|
|
) VALUES (
|
|
'tpl-pdf-classification-001',
|
|
'PDF分类',
|
|
'对PDF文档进行分类',
|
|
'pdf',
|
|
'pdf-classification',
|
|
JSON_OBJECT(
|
|
'labels', JSON_ARRAY(
|
|
JSON_OBJECT('fromName', 'choice', 'toName', 'pdf', 'type', 'Choices', 'options', JSON_ARRAY('Invoice', 'Contract', 'Report', 'Other'), 'required', true)
|
|
),
|
|
'objects', JSON_ARRAY(JSON_OBJECT('name', 'pdf', 'type', 'PDF', 'value', '$pdf'))
|
|
),
|
|
'<View>
|
|
<Style>
|
|
.htx-pdf { height: calc(100vh - 250px); }
|
|
</Style>
|
|
<Pdf name="pdf" value="$pdf"/>
|
|
<Choices name="choice" toName="pdf">
|
|
<Choice value="Invoice"/>
|
|
<Choice value="Contract"/>
|
|
<Choice value="Report"/>
|
|
<Choice value="Other"/>
|
|
</Choices>
|
|
</View>',
|
|
'horizontal',
|
|
'structured-data',
|
|
1,
|
|
'1.0.0'
|
|
);
|
|
|
|
-- 7.2 表格数据标注
|
|
INSERT INTO t_dm_annotation_templates (
|
|
id, name, description, data_type, labeling_type,
|
|
configuration, label_config, style, category, built_in, version
|
|
) VALUES (
|
|
'tpl-tabular-data-001',
|
|
'表格数据标注',
|
|
'对表格数据进行标注和分类',
|
|
'table',
|
|
'tabular-data',
|
|
JSON_OBJECT(
|
|
'labels', JSON_ARRAY(
|
|
JSON_OBJECT('fromName', 'choice', 'toName', 'table', 'type', 'Choices', 'options', JSON_ARRAY('Valid', 'Invalid', 'Needs Review'), 'required', true)
|
|
),
|
|
'objects', JSON_ARRAY(JSON_OBJECT('name', 'table', 'type', 'Table', 'value', '$table'))
|
|
),
|
|
'<View>
|
|
<Table name="table" value="$table"/>
|
|
<Choices name="choice" toName="table">
|
|
<Choice value="Valid"/>
|
|
<Choice value="Invalid"/>
|
|
<Choice value="Needs Review"/>
|
|
</Choices>
|
|
</View>',
|
|
'vertical',
|
|
'structured-data',
|
|
1,
|
|
'1.0.0'
|
|
);
|
|
|
|
-- =============================================
|
|
-- 8. 时间序列 (Time Series)
|
|
-- =============================================
|
|
|
|
-- 8.1 异常检测
|
|
INSERT INTO t_dm_annotation_templates (
|
|
id, name, description, data_type, labeling_type,
|
|
configuration, label_config, style, category, built_in, version
|
|
) VALUES (
|
|
'tpl-anomaly-detection-001',
|
|
'异常检测',
|
|
'在时间序列数据中标注异常区域',
|
|
'timeseries',
|
|
'anomaly-detection',
|
|
JSON_OBJECT(
|
|
'labels', JSON_ARRAY(
|
|
JSON_OBJECT('fromName', 'label', 'toName', 'ts', 'type', 'TimeseriesLabels', 'labels', JSON_ARRAY('Anomaly', 'Normal'))
|
|
),
|
|
'objects', JSON_ARRAY(JSON_OBJECT('name', 'ts', 'type', 'Timeseries', 'value', '$timeseries'))
|
|
),
|
|
'<View>
|
|
<TimeseriesLabels name="label" toName="ts">
|
|
<Label value="Anomaly" background="red"/>
|
|
<Label value="Normal" background="green"/>
|
|
</TimeseriesLabels>
|
|
<Timeseries name="ts" value="$timeseries" timeColumn="time" valueColumns="value"/>
|
|
</View>',
|
|
'horizontal',
|
|
'time-series',
|
|
1,
|
|
'1.0.0'
|
|
);
|
|
|
|
-- 8.2 活动识别
|
|
INSERT INTO t_dm_annotation_templates (
|
|
id, name, description, data_type, labeling_type,
|
|
configuration, label_config, style, category, built_in, version
|
|
) VALUES (
|
|
'tpl-activity-recognition-001',
|
|
'活动识别',
|
|
'在时间序列数据中识别和标注不同活动',
|
|
'timeseries',
|
|
'activity-recognition',
|
|
JSON_OBJECT(
|
|
'labels', JSON_ARRAY(
|
|
JSON_OBJECT('fromName', 'label', 'toName', 'ts', 'type', 'TimeseriesLabels', 'labels', JSON_ARRAY('Walking', 'Running', 'Sitting', 'Standing'))
|
|
),
|
|
'objects', JSON_ARRAY(JSON_OBJECT('name', 'ts', 'type', 'Timeseries', 'value', '$timeseries'))
|
|
),
|
|
'<View>
|
|
<TimeseriesLabels name="label" toName="ts">
|
|
<Label value="Walking" background="blue"/>
|
|
<Label value="Running" background="red"/>
|
|
<Label value="Sitting" background="green"/>
|
|
<Label value="Standing" background="orange"/>
|
|
</TimeseriesLabels>
|
|
<Timeseries name="ts" value="$timeseries" timeColumn="time" valueColumns="acc_x,acc_y,acc_z"/>
|
|
</View>',
|
|
'horizontal',
|
|
'time-series',
|
|
1,
|
|
'1.0.0'
|
|
);
|
|
|
|
-- =============================================
|
|
-- 9. 视频处理 (Videos)
|
|
-- =============================================
|
|
|
|
-- 9.1 视频分类
|
|
INSERT INTO t_dm_annotation_templates (
|
|
id, name, description, data_type, labeling_type,
|
|
configuration, label_config, style, category, built_in, version
|
|
) VALUES (
|
|
'tpl-video-classification-001',
|
|
'视频分类',
|
|
'对视频进行整体分类',
|
|
'video',
|
|
'video-classification',
|
|
JSON_OBJECT(
|
|
'labels', JSON_ARRAY(
|
|
JSON_OBJECT('fromName', 'choice', 'toName', 'video', 'type', 'Choices', 'options', JSON_ARRAY('Sports', 'News', 'Entertainment', 'Education'), 'required', true)
|
|
),
|
|
'objects', JSON_ARRAY(JSON_OBJECT('name', 'video', 'type', 'Video', 'value', '$video'))
|
|
),
|
|
'<View>
|
|
<Video name="video" value="$video"/>
|
|
<Choices name="choice" toName="video">
|
|
<Choice value="Sports"/>
|
|
<Choice value="News"/>
|
|
<Choice value="Entertainment"/>
|
|
<Choice value="Education"/>
|
|
</Choices>
|
|
</View>',
|
|
'horizontal',
|
|
'video',
|
|
1,
|
|
'1.0.0'
|
|
);
|
|
|
|
-- 9.2 视频目标追踪
|
|
INSERT INTO t_dm_annotation_templates (
|
|
id, name, description, data_type, labeling_type,
|
|
configuration, label_config, style, category, built_in, version
|
|
) VALUES (
|
|
'tpl-video-object-tracking-001',
|
|
'视频目标追踪',
|
|
'在视频中追踪目标对象',
|
|
'video',
|
|
'video-object-tracking',
|
|
JSON_OBJECT(
|
|
'labels', JSON_ARRAY(
|
|
JSON_OBJECT('fromName', 'label', 'toName', 'video', 'type', 'VideoRectangle', 'labels', JSON_ARRAY('Person', 'Car', 'Object'))
|
|
),
|
|
'objects', JSON_ARRAY(JSON_OBJECT('name', 'video', 'type', 'Video', 'value', '$video'))
|
|
),
|
|
'<View>
|
|
<Labels name="label" toName="video">
|
|
<Label value="Person" background="red"/>
|
|
<Label value="Car" background="blue"/>
|
|
<Label value="Object" background="green"/>
|
|
</Labels>
|
|
<VideoRectangle name="box" toName="video"/>
|
|
<Video name="video" value="$video"/>
|
|
</View>',
|
|
'horizontal',
|
|
'video',
|
|
1,
|
|
'1.0.0'
|
|
);
|
|
|
|
-- 9.3 视频时间线分割
|
|
INSERT INTO t_dm_annotation_templates (
|
|
id, name, description, data_type, labeling_type,
|
|
configuration, label_config, style, category, built_in, version
|
|
) VALUES (
|
|
'tpl-video-timeline-segmentation-001',
|
|
'视频时间线分割',
|
|
'对视频时间线进行分段标注',
|
|
'video',
|
|
'video-timeline-segmentation',
|
|
JSON_OBJECT(
|
|
'labels', JSON_ARRAY(
|
|
JSON_OBJECT('fromName', 'label', 'toName', 'video', 'type', 'Labels', 'labels', JSON_ARRAY('Intro', 'Content', 'Outro'))
|
|
),
|
|
'objects', JSON_ARRAY(JSON_OBJECT('name', 'video', 'type', 'Video', 'value', '$video'))
|
|
),
|
|
'<View>
|
|
<Labels name="label" toName="video">
|
|
<Label value="Intro" background="blue"/>
|
|
<Label value="Content" background="green"/>
|
|
<Label value="Outro" background="orange"/>
|
|
</Labels>
|
|
<Video name="video" value="$video"/>
|
|
</View>',
|
|
'horizontal',
|
|
'video',
|
|
1,
|
|
'1.0.0'
|
|
);
|
|
|
|
-- =============================================
|
|
-- 10. 对话AI (Conversational AI)
|
|
-- =============================================
|
|
|
|
-- 10.1 共指消解
|
|
INSERT INTO t_dm_annotation_templates (
|
|
id, name, description, data_type, labeling_type,
|
|
configuration, label_config, style, category, built_in, version
|
|
) VALUES (
|
|
'tpl-coreference-resolution-001',
|
|
'共指消解',
|
|
'在文本中标注共指关系和实体链接',
|
|
'text',
|
|
'coreference-resolution',
|
|
JSON_OBJECT(
|
|
'labels', JSON_ARRAY(
|
|
JSON_OBJECT('fromName', 'label', 'toName', 'text', 'type', 'Labels', 'labels', JSON_ARRAY('Noun', 'Pronoun'))
|
|
),
|
|
'objects', JSON_ARRAY(JSON_OBJECT('name', 'text', 'type', 'Text', 'value', '$text'))
|
|
),
|
|
'<View>
|
|
<Labels name="label" toName="text">
|
|
<Label value="Noun" background="red"/>
|
|
<Label value="Pronoun" background="darkorange"/>
|
|
</Labels>
|
|
<Text name="text" value="$corefText"/>
|
|
</View>',
|
|
'vertical',
|
|
'conversational-ai',
|
|
1,
|
|
'1.0.0'
|
|
);
|
|
|
|
-- 10.2 槽填充
|
|
INSERT INTO t_dm_annotation_templates (
|
|
id, name, description, data_type, labeling_type,
|
|
configuration, label_config, style, category, built_in, version
|
|
) VALUES (
|
|
'tpl-slot-filling-001',
|
|
'槽填充',
|
|
'构建任务导向对话系统,选择对话意图并提取槽实体',
|
|
'text',
|
|
'slot-filling',
|
|
JSON_OBJECT(
|
|
'labels', JSON_ARRAY(
|
|
JSON_OBJECT('fromName', 'entity_slot', 'toName', 'dialogue', 'type', 'ParagraphLabels', 'labels', JSON_ARRAY('Person', 'Organization', 'Location', 'Datetime', 'Quantity')),
|
|
JSON_OBJECT('fromName', 'intent', 'toName', 'dialogue', 'type', 'Choices', 'options', JSON_ARRAY('Greeting', 'Customer request', 'Small talk'))
|
|
),
|
|
'objects', JSON_ARRAY(JSON_OBJECT('name', 'dialogue', 'type', 'Paragraphs', 'value', '$dialogue'))
|
|
),
|
|
'<View>
|
|
<ParagraphLabels name="entity_slot" toName="dialogue">
|
|
<Label value="Person" />
|
|
<Label value="Organization" />
|
|
<Label value="Location" />
|
|
<Label value="Datetime" />
|
|
<Label value="Quantity" />
|
|
</ParagraphLabels>
|
|
<Paragraphs name="dialogue" value="$humanMachineDialogue" layout="dialogue" />
|
|
<Choices name="intent" toName="dialogue" choice="single" showInLine="true">
|
|
<Choice value="Greeting"/>
|
|
<Choice value="Customer request"/>
|
|
<Choice value="Small talk"/>
|
|
</Choices>
|
|
</View>',
|
|
'vertical',
|
|
'conversational-ai',
|
|
1,
|
|
'1.0.0'
|
|
);
|
|
|
|
-- 10.3 响应生成
|
|
INSERT INTO t_dm_annotation_templates (
|
|
id, name, description, data_type, labeling_type,
|
|
configuration, label_config, style, category, built_in, version
|
|
) VALUES (
|
|
'tpl-response-generation-001',
|
|
'响应生成',
|
|
'通过生成下一个对话响应来收集聊天机器人训练数据',
|
|
'text',
|
|
'response-generation',
|
|
JSON_OBJECT(
|
|
'labels', JSON_ARRAY(
|
|
JSON_OBJECT('fromName', 'response', 'toName', 'chat', 'type', 'TextArea', 'required', true)
|
|
),
|
|
'objects', JSON_ARRAY(JSON_OBJECT('name', 'chat', 'type', 'Paragraphs', 'value', '$dialogue'))
|
|
),
|
|
'<View>
|
|
<Paragraphs name="chat" value="$dialogue" layout="dialogue" />
|
|
<Header value="提供响应" />
|
|
<TextArea name="response" toName="chat" rows="4" editable="true" maxSubmissions="1" />
|
|
</View>',
|
|
'vertical',
|
|
'conversational-ai',
|
|
1,
|
|
'1.0.0'
|
|
);
|
|
|
|
-- =============================================
|
|
-- 完成提示
|
|
-- =============================================
|
|
SELECT CONCAT('成功插入 ', COUNT(*), ' 个内置标注模板') AS result
|
|
FROM t_dm_annotation_templates WHERE built_in = 1;
|