You've already forked DataMate
feat(annotation): 支持音频和视频数据类型的标注任务
- 添加了音频和视频数据类型常量定义 - 实现了音频和视频标注模板的内置配置 - 扩展前端组件以支持按数据类型过滤标注模板 - 重构后端编辑器服务以处理音频和视频任务构建 - 更新数据库初始化脚本包含音频和视频标注模板 - 添加音频和视频数据类型的预览URL映射逻辑
This commit is contained in:
@@ -12,7 +12,7 @@ import {
|
|||||||
createAnnotationTaskUsingPost,
|
createAnnotationTaskUsingPost,
|
||||||
queryAnnotationTemplatesUsingGet,
|
queryAnnotationTemplatesUsingGet,
|
||||||
} from "../annotation.api";
|
} from "../annotation.api";
|
||||||
import type { AnnotationTemplate } from "../annotation.model";
|
import { DataType, type AnnotationTemplate } from "../annotation.model";
|
||||||
import TemplateConfigurationTreeEditor from "../components/TemplateConfigurationTreeEditor";
|
import TemplateConfigurationTreeEditor from "../components/TemplateConfigurationTreeEditor";
|
||||||
|
|
||||||
const DEFAULT_SEGMENTATION_ENABLED = true;
|
const DEFAULT_SEGMENTATION_ENABLED = true;
|
||||||
@@ -20,6 +20,22 @@ const SEGMENTATION_OPTIONS = [
|
|||||||
{ label: "需要切片段", value: true },
|
{ label: "需要切片段", value: true },
|
||||||
{ label: "不需要切片段", value: false },
|
{ label: "不需要切片段", value: false },
|
||||||
];
|
];
|
||||||
|
const resolveTemplateDataType = (datasetType?: DatasetType) => {
|
||||||
|
switch (datasetType) {
|
||||||
|
case DatasetType.TEXT:
|
||||||
|
return DataType.TEXT;
|
||||||
|
case DatasetType.IMAGE:
|
||||||
|
return DataType.IMAGE;
|
||||||
|
case DatasetType.AUDIO:
|
||||||
|
return DataType.AUDIO;
|
||||||
|
case DatasetType.VIDEO:
|
||||||
|
return DataType.VIDEO;
|
||||||
|
default:
|
||||||
|
return undefined;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
const resolveDefaultTemplate = (items: AnnotationTemplate[]) =>
|
||||||
|
items.find((template) => template.builtIn) || items[0];
|
||||||
|
|
||||||
export default function AnnotationTaskCreate() {
|
export default function AnnotationTaskCreate() {
|
||||||
const navigate = useNavigate();
|
const navigate = useNavigate();
|
||||||
@@ -48,9 +64,17 @@ export default function AnnotationTaskCreate() {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
const fetchTemplates = async () => {
|
const fetchTemplates = async (dataType?: string) => {
|
||||||
|
if (!dataType) {
|
||||||
|
setTemplates([]);
|
||||||
|
return;
|
||||||
|
}
|
||||||
try {
|
try {
|
||||||
const response = await queryAnnotationTemplatesUsingGet({ page: 1, size: 200 });
|
const response = await queryAnnotationTemplatesUsingGet({
|
||||||
|
page: 1,
|
||||||
|
size: 200,
|
||||||
|
dataType,
|
||||||
|
});
|
||||||
if (response.code === 200 && response.data) {
|
if (response.code === 200 && response.data) {
|
||||||
setTemplates(response.data.content || []);
|
setTemplates(response.data.content || []);
|
||||||
} else {
|
} else {
|
||||||
@@ -64,9 +88,39 @@ export default function AnnotationTaskCreate() {
|
|||||||
|
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
fetchDatasets();
|
fetchDatasets();
|
||||||
fetchTemplates();
|
|
||||||
}, []);
|
}, []);
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
if (!selectedDataset) {
|
||||||
|
setTemplates([]);
|
||||||
|
form.setFieldsValue({ templateId: undefined });
|
||||||
|
setLabelConfig("");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
const dataType = resolveTemplateDataType(selectedDataset.datasetType);
|
||||||
|
fetchTemplates(dataType);
|
||||||
|
}, [form, selectedDataset]);
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
if (configMode !== "template" || !selectedDataset) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (templates.length === 0) {
|
||||||
|
form.setFieldsValue({ templateId: undefined });
|
||||||
|
setLabelConfig("");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
const currentTemplateId = form.getFieldValue("templateId");
|
||||||
|
const currentTemplate = templates.find((template) => template.id === currentTemplateId);
|
||||||
|
if (currentTemplate) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
const defaultTemplate = resolveDefaultTemplate(templates);
|
||||||
|
if (defaultTemplate) {
|
||||||
|
form.setFieldsValue({ templateId: defaultTemplate.id });
|
||||||
|
setLabelConfig(defaultTemplate.labelConfig || "");
|
||||||
|
}
|
||||||
|
}, [configMode, form, selectedDataset, templates]);
|
||||||
const handleTemplateSelect = (value?: string) => {
|
const handleTemplateSelect = (value?: string) => {
|
||||||
if (!value) {
|
if (!value) {
|
||||||
setLabelConfig("");
|
setLabelConfig("");
|
||||||
@@ -171,6 +225,8 @@ export default function AnnotationTaskCreate() {
|
|||||||
}))}
|
}))}
|
||||||
onChange={(value) => {
|
onChange={(value) => {
|
||||||
setSelectedDatasetId(value);
|
setSelectedDatasetId(value);
|
||||||
|
form.setFieldsValue({ templateId: undefined });
|
||||||
|
setLabelConfig("");
|
||||||
const dataset = datasets.find((item) => item.id === value);
|
const dataset = datasets.find((item) => item.id === value);
|
||||||
if (dataset?.datasetType === DatasetType.TEXT) {
|
if (dataset?.datasetType === DatasetType.TEXT) {
|
||||||
const currentValue = form.getFieldValue("segmentationEnabled");
|
const currentValue = form.getFieldValue("segmentationEnabled");
|
||||||
|
|||||||
@@ -13,7 +13,7 @@ import {
|
|||||||
queryAnnotationTemplatesUsingGet,
|
queryAnnotationTemplatesUsingGet,
|
||||||
} from "../../annotation.api";
|
} from "../../annotation.api";
|
||||||
import { DatasetType, type Dataset } from "@/pages/DataManagement/dataset.model";
|
import { DatasetType, type Dataset } from "@/pages/DataManagement/dataset.model";
|
||||||
import type { AnnotationTemplate, AnnotationTask } from "../../annotation.model";
|
import { DataType, type AnnotationTemplate, type AnnotationTask } from "../../annotation.model";
|
||||||
import LabelStudioEmbed from "@/components/business/LabelStudioEmbed";
|
import LabelStudioEmbed from "@/components/business/LabelStudioEmbed";
|
||||||
import TemplateConfigurationTreeEditor from "../../components/TemplateConfigurationTreeEditor";
|
import TemplateConfigurationTreeEditor from "../../components/TemplateConfigurationTreeEditor";
|
||||||
import { useTagConfig } from "@/hooks/useTagConfig";
|
import { useTagConfig } from "@/hooks/useTagConfig";
|
||||||
@@ -57,6 +57,22 @@ const SEGMENTATION_OPTIONS = [
|
|||||||
{ label: "需要切片段", value: true },
|
{ label: "需要切片段", value: true },
|
||||||
{ label: "不需要切片段", value: false },
|
{ label: "不需要切片段", value: false },
|
||||||
];
|
];
|
||||||
|
const resolveTemplateDataType = (datasetType?: DatasetType) => {
|
||||||
|
switch (datasetType) {
|
||||||
|
case DatasetType.TEXT:
|
||||||
|
return DataType.TEXT;
|
||||||
|
case DatasetType.IMAGE:
|
||||||
|
return DataType.IMAGE;
|
||||||
|
case DatasetType.AUDIO:
|
||||||
|
return DataType.AUDIO;
|
||||||
|
case DatasetType.VIDEO:
|
||||||
|
return DataType.VIDEO;
|
||||||
|
default:
|
||||||
|
return undefined;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
const resolveDefaultTemplate = (items: AnnotationTemplate[]) =>
|
||||||
|
items.find((template) => template.builtIn) || items[0];
|
||||||
|
|
||||||
export default function CreateAnnotationTask({
|
export default function CreateAnnotationTask({
|
||||||
open,
|
open,
|
||||||
@@ -112,10 +128,24 @@ export default function CreateAnnotationTask({
|
|||||||
});
|
});
|
||||||
setDatasets(datasetData.content.map(mapDataset) || []);
|
setDatasets(datasetData.content.map(mapDataset) || []);
|
||||||
|
|
||||||
// Fetch templates
|
} catch (error) {
|
||||||
|
console.error("Error fetching data:", error);
|
||||||
|
setTemplates([]);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
fetchData();
|
||||||
|
}, [open]);
|
||||||
|
|
||||||
|
const fetchTemplates = async (dataType?: string) => {
|
||||||
|
if (!dataType) {
|
||||||
|
setTemplates([]);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
try {
|
||||||
const templateResponse = await queryAnnotationTemplatesUsingGet({
|
const templateResponse = await queryAnnotationTemplatesUsingGet({
|
||||||
page: 1,
|
page: 1,
|
||||||
size: 100,
|
size: 200,
|
||||||
|
dataType,
|
||||||
});
|
});
|
||||||
|
|
||||||
if (templateResponse.code === 200 && templateResponse.data) {
|
if (templateResponse.code === 200 && templateResponse.data) {
|
||||||
@@ -126,12 +156,45 @@ export default function CreateAnnotationTask({
|
|||||||
setTemplates([]);
|
setTemplates([]);
|
||||||
}
|
}
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error("Error fetching data:", error);
|
console.error("Error fetching templates:", error);
|
||||||
setTemplates([]);
|
setTemplates([]);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
fetchData();
|
|
||||||
}, [open]);
|
useEffect(() => {
|
||||||
|
if (!open || isEditMode) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (!selectedDataset) {
|
||||||
|
setTemplates([]);
|
||||||
|
manualForm.setFieldsValue({ templateId: undefined });
|
||||||
|
setLabelConfig("");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
const dataType = resolveTemplateDataType(selectedDataset.datasetType);
|
||||||
|
fetchTemplates(dataType);
|
||||||
|
}, [isEditMode, manualForm, open, selectedDataset]);
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
if (!open || isEditMode || configMode !== "template" || !selectedDataset) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (templates.length === 0) {
|
||||||
|
manualForm.setFieldsValue({ templateId: undefined });
|
||||||
|
setLabelConfig("");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
const currentTemplateId = manualForm.getFieldValue("templateId");
|
||||||
|
const currentTemplate = templates.find((template) => template.id === currentTemplateId);
|
||||||
|
if (currentTemplate) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
const defaultTemplate = resolveDefaultTemplate(templates);
|
||||||
|
if (defaultTemplate) {
|
||||||
|
manualForm.setFieldsValue({ templateId: defaultTemplate.id });
|
||||||
|
setLabelConfig(defaultTemplate.labelConfig || "");
|
||||||
|
}
|
||||||
|
}, [configMode, isEditMode, manualForm, open, selectedDataset, templates]);
|
||||||
|
|
||||||
// Reset form and manual-edit flag when modal opens, or load task data in edit mode
|
// Reset form and manual-edit flag when modal opens, or load task data in edit mode
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
@@ -587,6 +650,10 @@ export default function CreateAnnotationTask({
|
|||||||
})}
|
})}
|
||||||
onChange={(value) => {
|
onChange={(value) => {
|
||||||
setSelectedDatasetId(value);
|
setSelectedDatasetId(value);
|
||||||
|
if (!isEditMode) {
|
||||||
|
manualForm.setFieldsValue({ templateId: undefined });
|
||||||
|
setLabelConfig("");
|
||||||
|
}
|
||||||
const dataset = datasets.find((item) => item.id === value);
|
const dataset = datasets.find((item) => item.id === value);
|
||||||
if (dataset?.datasetType === DatasetType.TEXT) {
|
if (dataset?.datasetType === DatasetType.TEXT) {
|
||||||
const currentValue = manualForm.getFieldValue("segmentationEnabled");
|
const currentValue = manualForm.getFieldValue("segmentationEnabled");
|
||||||
|
|||||||
@@ -13,7 +13,11 @@ from app.module.annotation.utils.config_validator import LabelStudioConfigValida
|
|||||||
logger = get_logger(__name__)
|
logger = get_logger(__name__)
|
||||||
|
|
||||||
DATA_TYPE_IMAGE = "image"
|
DATA_TYPE_IMAGE = "image"
|
||||||
|
DATA_TYPE_AUDIO = "audio"
|
||||||
|
DATA_TYPE_VIDEO = "video"
|
||||||
CATEGORY_COMPUTER_VISION = "computer-vision"
|
CATEGORY_COMPUTER_VISION = "computer-vision"
|
||||||
|
CATEGORY_AUDIO_SPEECH = "audio-speech"
|
||||||
|
CATEGORY_VIDEO = "video"
|
||||||
STYLE_HORIZONTAL = "horizontal"
|
STYLE_HORIZONTAL = "horizontal"
|
||||||
VERSION_DEFAULT = "1.0.0"
|
VERSION_DEFAULT = "1.0.0"
|
||||||
|
|
||||||
@@ -51,6 +55,105 @@ SEMANTIC_SEGMENTATION_POLYGON_LABEL_CONFIG = """<View>
|
|||||||
</PolygonLabels>
|
</PolygonLabels>
|
||||||
</View>"""
|
</View>"""
|
||||||
|
|
||||||
|
ASR_SEGMENTS_LABEL_CONFIG = """<View>
|
||||||
|
<Labels name=\"labels\" toName=\"audio\">
|
||||||
|
<Label value=\"Speech\" />
|
||||||
|
<Label value=\"Noise\" />
|
||||||
|
</Labels>
|
||||||
|
<Audio name=\"audio\" value=\"$audio\"/>
|
||||||
|
<TextArea name=\"transcription\" toName=\"audio\"
|
||||||
|
rows=\"2\" editable=\"true\"
|
||||||
|
perRegion=\"true\" required=\"true\" />
|
||||||
|
</View>"""
|
||||||
|
|
||||||
|
ASR_LABEL_CONFIG = """<View>
|
||||||
|
<Audio name=\"audio\" value=\"$audio\" zoom=\"true\" hotkey=\"ctrl+enter\" />
|
||||||
|
<Header value=\"转录音频内容\" />
|
||||||
|
<TextArea name=\"transcription\" toName=\"audio\"
|
||||||
|
rows=\"4\" editable=\"true\" maxSubmissions=\"1\" />
|
||||||
|
</View>"""
|
||||||
|
|
||||||
|
CONVERSATION_ANALYSIS_LABEL_CONFIG = """<View>
|
||||||
|
<Audio name=\"audio\" value=\"$audio\" hotkey=\"space\" sync=\"text\"/>
|
||||||
|
<Header value=\"对话记录\"/>
|
||||||
|
<Paragraphs audioUrl=\"$audio\" sync=\"audio\" name=\"text\" value=\"$text\"
|
||||||
|
layout=\"dialogue\" textKey=\"text\" nameKey=\"author\"
|
||||||
|
granularity=\"paragraph\" contextscroll=\"true\" />
|
||||||
|
<View style=\"position: sticky\">
|
||||||
|
<Header value=\"情感标签\"/>
|
||||||
|
<ParagraphLabels name=\"label\" toName=\"text\">
|
||||||
|
<Label value=\"Positive\" background=\"#00ff00\"/>
|
||||||
|
<Label value=\"Negative\" background=\"#ff0000\"/>
|
||||||
|
</ParagraphLabels>
|
||||||
|
</View>
|
||||||
|
</View>"""
|
||||||
|
|
||||||
|
INTENT_CLASSIFICATION_LABEL_CONFIG = """<View>
|
||||||
|
<Labels name=\"labels\" toName=\"audio\">
|
||||||
|
<Label value=\"Segment\" />
|
||||||
|
</Labels>
|
||||||
|
<Audio name=\"audio\" value=\"$audio\"/>
|
||||||
|
<Choices name=\"intent\" toName=\"audio\" perRegion=\"true\" required=\"true\">
|
||||||
|
<Choice value=\"Question\" />
|
||||||
|
<Choice value=\"Request\" />
|
||||||
|
<Choice value=\"Satisfied\" />
|
||||||
|
<Choice value=\"Interested\" />
|
||||||
|
<Choice value=\"Unsatisfied\" />
|
||||||
|
</Choices>
|
||||||
|
</View>"""
|
||||||
|
|
||||||
|
SIGNAL_QUALITY_LABEL_CONFIG = """<View>
|
||||||
|
<Rating name=\"rating\" toName=\"audio\" maxRating=\"10\" icon=\"star\" size=\"medium\" />
|
||||||
|
<Audio name=\"audio\" value=\"$audio\"/>
|
||||||
|
</View>"""
|
||||||
|
|
||||||
|
SOUND_EVENT_DETECTION_LABEL_CONFIG = """<View>
|
||||||
|
<Labels name=\"label\" toName=\"audio\" zoom=\"true\" hotkey=\"ctrl+enter\">
|
||||||
|
<Label value=\"Event A\" background=\"red\"/>
|
||||||
|
<Label value=\"Event B\" background=\"green\"/>
|
||||||
|
</Labels>
|
||||||
|
<Audio name=\"audio\" value=\"$audio\"/>
|
||||||
|
</View>"""
|
||||||
|
|
||||||
|
SPEAKER_SEGMENTATION_LABEL_CONFIG = """<View>
|
||||||
|
<Labels name=\"label\" toName=\"audio\" zoom=\"true\" hotkey=\"ctrl+enter\">
|
||||||
|
<Label value=\"Speaker one\" background=\"#00FF00\"/>
|
||||||
|
<Label value=\"Speaker two\" background=\"#12ad59\"/>
|
||||||
|
</Labels>
|
||||||
|
<Audio name=\"audio\" value=\"$audio\" />
|
||||||
|
</View>"""
|
||||||
|
|
||||||
|
VIDEO_CLASSIFICATION_LABEL_CONFIG = """<View>
|
||||||
|
<Video name=\"video\" value=\"$video\"/>
|
||||||
|
<Choices name=\"choice\" toName=\"video\" showInline=\"true\">
|
||||||
|
<Choice value=\"Sports\" />
|
||||||
|
<Choice value=\"News\" />
|
||||||
|
<Choice value=\"Entertainment\" />
|
||||||
|
<Choice value=\"Education\" />
|
||||||
|
</Choices>
|
||||||
|
</View>"""
|
||||||
|
|
||||||
|
VIDEO_OBJECT_TRACKING_LABEL_CONFIG = """<View>
|
||||||
|
<Labels name=\"videoLabels\" toName=\"video\" allowEmpty=\"true\">
|
||||||
|
<Label value=\"Man\" background=\"blue\"/>
|
||||||
|
<Label value=\"Woman\" background=\"red\"/>
|
||||||
|
<Label value=\"Other\" background=\"green\"/>
|
||||||
|
</Labels>
|
||||||
|
<Video name=\"video\" value=\"$video\" framerate=\"25.0\"/>
|
||||||
|
<VideoRectangle name=\"box\" toName=\"video\" />
|
||||||
|
</View>"""
|
||||||
|
|
||||||
|
VIDEO_TIMELINE_SEGMENTATION_LABEL_CONFIG = """<View>
|
||||||
|
<Header value=\"视频时间线分割\"/>
|
||||||
|
<Video name=\"video\" value=\"$video_url\" sync=\"audio\"/>
|
||||||
|
<Labels name=\"tricks\" toName=\"audio\" choice=\"multiple\">
|
||||||
|
<Label value=\"Intro\" background=\"#358EF3\"/>
|
||||||
|
<Label value=\"Content\" background=\"#1BB500\"/>
|
||||||
|
<Label value=\"Outro\" background=\"#FFA91D\"/>
|
||||||
|
</Labels>
|
||||||
|
<Audio name=\"audio\" value=\"$video_url\" sync=\"video\" speed=\"false\"/>
|
||||||
|
</View>"""
|
||||||
|
|
||||||
|
|
||||||
@dataclass(frozen=True)
|
@dataclass(frozen=True)
|
||||||
class BuiltInTemplateDefinition:
|
class BuiltInTemplateDefinition:
|
||||||
@@ -122,6 +225,122 @@ BUILT_IN_TEMPLATES: List[BuiltInTemplateDefinition] = [
|
|||||||
category=CATEGORY_COMPUTER_VISION,
|
category=CATEGORY_COMPUTER_VISION,
|
||||||
version=VERSION_DEFAULT,
|
version=VERSION_DEFAULT,
|
||||||
),
|
),
|
||||||
|
BuiltInTemplateDefinition(
|
||||||
|
id="tpl-asr-segments-001",
|
||||||
|
name="语音识别(分段)",
|
||||||
|
description=(
|
||||||
|
"对音频进行语音活动分段并转录文本,适用于呼叫中心转录、会议记录、播客转录等场景。"
|
||||||
|
"关联模型:Whisper、Wav2Vec2、DeepSpeech"
|
||||||
|
),
|
||||||
|
data_type=DATA_TYPE_AUDIO,
|
||||||
|
labeling_type="asr-segments",
|
||||||
|
label_config=ASR_SEGMENTS_LABEL_CONFIG,
|
||||||
|
style=STYLE_HORIZONTAL,
|
||||||
|
category=CATEGORY_AUDIO_SPEECH,
|
||||||
|
version=VERSION_DEFAULT,
|
||||||
|
),
|
||||||
|
BuiltInTemplateDefinition(
|
||||||
|
id="tpl-asr-001",
|
||||||
|
name="语音识别",
|
||||||
|
description=(
|
||||||
|
"转录音频内容,适用于播客转录、会议记录、客服通话、字幕生成等场景。"
|
||||||
|
"关联模型:Whisper、Wav2Vec、DeepSpeech"
|
||||||
|
),
|
||||||
|
data_type=DATA_TYPE_AUDIO,
|
||||||
|
labeling_type="asr",
|
||||||
|
label_config=ASR_LABEL_CONFIG,
|
||||||
|
style=STYLE_HORIZONTAL,
|
||||||
|
category=CATEGORY_AUDIO_SPEECH,
|
||||||
|
version=VERSION_DEFAULT,
|
||||||
|
),
|
||||||
|
BuiltInTemplateDefinition(
|
||||||
|
id="tpl-conversation-analysis-001",
|
||||||
|
name="对话分析",
|
||||||
|
description="分析对话语句并标注事实和情感方面,适用于呼叫中心质检、客服分析、会议分析等场景",
|
||||||
|
data_type=DATA_TYPE_AUDIO,
|
||||||
|
labeling_type="conversation-analysis",
|
||||||
|
label_config=CONVERSATION_ANALYSIS_LABEL_CONFIG,
|
||||||
|
style=STYLE_HORIZONTAL,
|
||||||
|
category=CATEGORY_AUDIO_SPEECH,
|
||||||
|
version=VERSION_DEFAULT,
|
||||||
|
),
|
||||||
|
BuiltInTemplateDefinition(
|
||||||
|
id="tpl-intent-classification-001",
|
||||||
|
name="意图分类",
|
||||||
|
description="进行语音活动分段并选择语音意图,适用于语音助手、智能音箱、IVR系统等场景",
|
||||||
|
data_type=DATA_TYPE_AUDIO,
|
||||||
|
labeling_type="intent-classification",
|
||||||
|
label_config=INTENT_CLASSIFICATION_LABEL_CONFIG,
|
||||||
|
style=STYLE_HORIZONTAL,
|
||||||
|
category=CATEGORY_AUDIO_SPEECH,
|
||||||
|
version=VERSION_DEFAULT,
|
||||||
|
),
|
||||||
|
BuiltInTemplateDefinition(
|
||||||
|
id="tpl-signal-quality-001",
|
||||||
|
name="信号质量检测",
|
||||||
|
description="评估音频信号质量,适用于电信、呼叫中心质检、音频制作、VoIP质量评估等场景",
|
||||||
|
data_type=DATA_TYPE_AUDIO,
|
||||||
|
labeling_type="signal-quality",
|
||||||
|
label_config=SIGNAL_QUALITY_LABEL_CONFIG,
|
||||||
|
style=STYLE_HORIZONTAL,
|
||||||
|
category=CATEGORY_AUDIO_SPEECH,
|
||||||
|
version=VERSION_DEFAULT,
|
||||||
|
),
|
||||||
|
BuiltInTemplateDefinition(
|
||||||
|
id="tpl-sound-event-001",
|
||||||
|
name="声音事件检测",
|
||||||
|
description="选择音频片段并分类声音事件,适用于安防监控、智慧城市、环境监测、工业监测等场景",
|
||||||
|
data_type=DATA_TYPE_AUDIO,
|
||||||
|
labeling_type="sound-event-detection",
|
||||||
|
label_config=SOUND_EVENT_DETECTION_LABEL_CONFIG,
|
||||||
|
style=STYLE_HORIZONTAL,
|
||||||
|
category=CATEGORY_AUDIO_SPEECH,
|
||||||
|
version=VERSION_DEFAULT,
|
||||||
|
),
|
||||||
|
BuiltInTemplateDefinition(
|
||||||
|
id="tpl-speaker-segmentation-001",
|
||||||
|
name="说话人分割",
|
||||||
|
description="执行说话人分割/话者分离任务,适用于会议转录、播客制作、呼叫中心分析等场景",
|
||||||
|
data_type=DATA_TYPE_AUDIO,
|
||||||
|
labeling_type="speaker-segmentation",
|
||||||
|
label_config=SPEAKER_SEGMENTATION_LABEL_CONFIG,
|
||||||
|
style=STYLE_HORIZONTAL,
|
||||||
|
category=CATEGORY_AUDIO_SPEECH,
|
||||||
|
version=VERSION_DEFAULT,
|
||||||
|
),
|
||||||
|
BuiltInTemplateDefinition(
|
||||||
|
id="tpl-video-classification-001",
|
||||||
|
name="视频分类",
|
||||||
|
description="对视频进行整体分类,适用于内容审核、媒体分析、质检等场景",
|
||||||
|
data_type=DATA_TYPE_VIDEO,
|
||||||
|
labeling_type="video-classification",
|
||||||
|
label_config=VIDEO_CLASSIFICATION_LABEL_CONFIG,
|
||||||
|
style=STYLE_HORIZONTAL,
|
||||||
|
category=CATEGORY_VIDEO,
|
||||||
|
version=VERSION_DEFAULT,
|
||||||
|
),
|
||||||
|
BuiltInTemplateDefinition(
|
||||||
|
id="tpl-video-object-tracking-001",
|
||||||
|
name="视频目标追踪",
|
||||||
|
description="在视频中追踪目标对象,适用于安防监控、交通分析、行为分析等场景",
|
||||||
|
data_type=DATA_TYPE_VIDEO,
|
||||||
|
labeling_type="video-object-tracking",
|
||||||
|
label_config=VIDEO_OBJECT_TRACKING_LABEL_CONFIG,
|
||||||
|
style=STYLE_HORIZONTAL,
|
||||||
|
category=CATEGORY_VIDEO,
|
||||||
|
version=VERSION_DEFAULT,
|
||||||
|
),
|
||||||
|
BuiltInTemplateDefinition(
|
||||||
|
id="tpl-video-timeline-segmentation-001",
|
||||||
|
name="视频时间线分割",
|
||||||
|
description="对视频时间线进行分段标注,适用于视频剪辑、内容索引等场景",
|
||||||
|
data_type=DATA_TYPE_VIDEO,
|
||||||
|
labeling_type="video-timeline-segmentation",
|
||||||
|
label_config=VIDEO_TIMELINE_SEGMENTATION_LABEL_CONFIG,
|
||||||
|
style=STYLE_HORIZONTAL,
|
||||||
|
category=CATEGORY_VIDEO,
|
||||||
|
version=VERSION_DEFAULT,
|
||||||
|
),
|
||||||
]
|
]
|
||||||
|
|
||||||
assert len({template.id for template in BUILT_IN_TEMPLATES}) == len(BUILT_IN_TEMPLATES), (
|
assert len({template.id for template in BUILT_IN_TEMPLATES}) == len(BUILT_IN_TEMPLATES), (
|
||||||
|
|||||||
@@ -43,6 +43,8 @@ logger = get_logger(__name__)
|
|||||||
|
|
||||||
TEXT_DATA_KEY = "text"
|
TEXT_DATA_KEY = "text"
|
||||||
IMAGE_DATA_KEY = "image"
|
IMAGE_DATA_KEY = "image"
|
||||||
|
AUDIO_DATA_KEY = "audio"
|
||||||
|
VIDEO_DATA_KEY = "video"
|
||||||
DATASET_ID_KEY = "dataset_id"
|
DATASET_ID_KEY = "dataset_id"
|
||||||
FILE_ID_KEY = "file_id"
|
FILE_ID_KEY = "file_id"
|
||||||
FILE_NAME_KEY = "file_name"
|
FILE_NAME_KEY = "file_name"
|
||||||
@@ -53,9 +55,19 @@ SEGMENT_INDEX_KEY = "segment_index"
|
|||||||
SEGMENT_INDEX_CAMEL_KEY = "segmentIndex"
|
SEGMENT_INDEX_CAMEL_KEY = "segmentIndex"
|
||||||
JSONL_EXTENSION = ".jsonl"
|
JSONL_EXTENSION = ".jsonl"
|
||||||
TEXTUAL_OBJECT_CATEGORIES = {"text", "document"}
|
TEXTUAL_OBJECT_CATEGORIES = {"text", "document"}
|
||||||
MEDIA_OBJECT_CATEGORIES = {"image"}
|
IMAGE_OBJECT_CATEGORIES = {"image"}
|
||||||
|
MEDIA_OBJECT_CATEGORIES = {"media"}
|
||||||
OBJECT_NAME_HEADER_PREFIX = "dm_object_header_"
|
OBJECT_NAME_HEADER_PREFIX = "dm_object_header_"
|
||||||
SUPPORTED_EDITOR_DATASET_TYPES = ("TEXT", "IMAGE")
|
DATASET_TYPE_TEXT = "TEXT"
|
||||||
|
DATASET_TYPE_IMAGE = "IMAGE"
|
||||||
|
DATASET_TYPE_AUDIO = "AUDIO"
|
||||||
|
DATASET_TYPE_VIDEO = "VIDEO"
|
||||||
|
SUPPORTED_EDITOR_DATASET_TYPES = (
|
||||||
|
DATASET_TYPE_TEXT,
|
||||||
|
DATASET_TYPE_IMAGE,
|
||||||
|
DATASET_TYPE_AUDIO,
|
||||||
|
DATASET_TYPE_VIDEO,
|
||||||
|
)
|
||||||
SEGMENTATION_ENABLED_KEY = "segmentation_enabled"
|
SEGMENTATION_ENABLED_KEY = "segmentation_enabled"
|
||||||
|
|
||||||
|
|
||||||
@@ -174,21 +186,19 @@ class AnnotationEditorService:
|
|||||||
return keys[0]
|
return keys[0]
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def _resolve_primary_media_key(
|
def _resolve_media_value_keys(
|
||||||
cls,
|
cls,
|
||||||
label_config: Optional[str],
|
label_config: Optional[str],
|
||||||
default_key: str,
|
default_key: str,
|
||||||
categories: Optional[set[str]] = None,
|
categories: Optional[set[str]] = None,
|
||||||
) -> str:
|
) -> List[str]:
|
||||||
if not label_config:
|
if not label_config:
|
||||||
return default_key
|
return [default_key]
|
||||||
target_categories = categories or set()
|
target_categories = categories or set()
|
||||||
keys = cls._extract_object_value_keys_by_category(label_config, target_categories)
|
keys = cls._extract_object_value_keys_by_category(label_config, target_categories)
|
||||||
if not keys:
|
if not keys:
|
||||||
return default_key
|
return [default_key]
|
||||||
if default_key in keys:
|
return keys
|
||||||
return default_key
|
|
||||||
return keys[0]
|
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _try_parse_json_payload(text_content: str) -> Optional[Dict[str, Any]]:
|
def _try_parse_json_payload(text_content: str) -> Optional[Dict[str, Any]]:
|
||||||
@@ -467,7 +477,10 @@ class AnnotationEditorService:
|
|||||||
|
|
||||||
dataset_type = self._normalize_dataset_type(await self._get_dataset_type(project.dataset_id))
|
dataset_type = self._normalize_dataset_type(await self._get_dataset_type(project.dataset_id))
|
||||||
if dataset_type not in SUPPORTED_EDITOR_DATASET_TYPES:
|
if dataset_type not in SUPPORTED_EDITOR_DATASET_TYPES:
|
||||||
raise HTTPException(status_code=400, detail="当前仅支持 TEXT/IMAGE 项目的内嵌编辑器")
|
raise HTTPException(
|
||||||
|
status_code=400,
|
||||||
|
detail="当前仅支持 TEXT/IMAGE/AUDIO/VIDEO 项目的内嵌编辑器",
|
||||||
|
)
|
||||||
|
|
||||||
file_result = await self.db.execute(
|
file_result = await self.db.execute(
|
||||||
select(DatasetFiles).where(
|
select(DatasetFiles).where(
|
||||||
@@ -479,9 +492,15 @@ class AnnotationEditorService:
|
|||||||
if not file_record:
|
if not file_record:
|
||||||
raise HTTPException(status_code=404, detail=f"文件不存在或不属于该项目: {file_id}")
|
raise HTTPException(status_code=404, detail=f"文件不存在或不属于该项目: {file_id}")
|
||||||
|
|
||||||
if dataset_type == "IMAGE":
|
if dataset_type == DATASET_TYPE_IMAGE:
|
||||||
return await self._build_image_task(project, file_record, file_id)
|
return await self._build_image_task(project, file_record, file_id)
|
||||||
|
|
||||||
|
if dataset_type == DATASET_TYPE_AUDIO:
|
||||||
|
return await self._build_audio_task(project, file_record, file_id)
|
||||||
|
|
||||||
|
if dataset_type == DATASET_TYPE_VIDEO:
|
||||||
|
return await self._build_video_task(project, file_record, file_id)
|
||||||
|
|
||||||
return await self._build_text_task(project, file_record, file_id, segment_index)
|
return await self._build_text_task(project, file_record, file_id, segment_index)
|
||||||
|
|
||||||
async def _build_text_task(
|
async def _build_text_task(
|
||||||
@@ -668,23 +687,20 @@ class AnnotationEditorService:
|
|||||||
currentSegmentIndex=current_segment_index,
|
currentSegmentIndex=current_segment_index,
|
||||||
)
|
)
|
||||||
|
|
||||||
async def _build_image_task(
|
async def _build_media_task(
|
||||||
self,
|
self,
|
||||||
project: LabelingProject,
|
project: LabelingProject,
|
||||||
file_record: DatasetFiles,
|
file_record: DatasetFiles,
|
||||||
file_id: str,
|
file_id: str,
|
||||||
|
default_key: str,
|
||||||
|
categories: set[str],
|
||||||
) -> EditorTaskResponse:
|
) -> EditorTaskResponse:
|
||||||
label_config = await self._resolve_project_label_config(project)
|
label_config = await self._resolve_project_label_config(project)
|
||||||
image_key = self._resolve_primary_media_key(
|
media_keys = self._resolve_media_value_keys(label_config, default_key, categories)
|
||||||
label_config,
|
|
||||||
IMAGE_DATA_KEY,
|
|
||||||
MEDIA_OBJECT_CATEGORIES,
|
|
||||||
)
|
|
||||||
preview_url = self._build_file_preview_url(project.dataset_id, file_id)
|
preview_url = self._build_file_preview_url(project.dataset_id, file_id)
|
||||||
file_name = str(getattr(file_record, "file_name", ""))
|
file_name = str(getattr(file_record, "file_name", ""))
|
||||||
|
|
||||||
task_data: Dict[str, Any] = {
|
task_data: Dict[str, Any] = {
|
||||||
image_key: preview_url,
|
|
||||||
FILE_ID_KEY: file_id,
|
FILE_ID_KEY: file_id,
|
||||||
FILE_ID_CAMEL_KEY: file_id,
|
FILE_ID_CAMEL_KEY: file_id,
|
||||||
DATASET_ID_KEY: project.dataset_id,
|
DATASET_ID_KEY: project.dataset_id,
|
||||||
@@ -692,6 +708,9 @@ class AnnotationEditorService:
|
|||||||
FILE_NAME_KEY: file_name,
|
FILE_NAME_KEY: file_name,
|
||||||
FILE_NAME_CAMEL_KEY: file_name,
|
FILE_NAME_CAMEL_KEY: file_name,
|
||||||
}
|
}
|
||||||
|
for key in media_keys:
|
||||||
|
task_data[key] = preview_url
|
||||||
|
self._apply_text_placeholders(task_data, label_config)
|
||||||
|
|
||||||
# 获取现有标注
|
# 获取现有标注
|
||||||
ann_result = await self.db.execute(
|
ann_result = await self.db.execute(
|
||||||
@@ -738,6 +757,48 @@ class AnnotationEditorService:
|
|||||||
currentSegmentIndex=0,
|
currentSegmentIndex=0,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
async def _build_image_task(
|
||||||
|
self,
|
||||||
|
project: LabelingProject,
|
||||||
|
file_record: DatasetFiles,
|
||||||
|
file_id: str,
|
||||||
|
) -> EditorTaskResponse:
|
||||||
|
return await self._build_media_task(
|
||||||
|
project=project,
|
||||||
|
file_record=file_record,
|
||||||
|
file_id=file_id,
|
||||||
|
default_key=IMAGE_DATA_KEY,
|
||||||
|
categories=IMAGE_OBJECT_CATEGORIES,
|
||||||
|
)
|
||||||
|
|
||||||
|
async def _build_audio_task(
|
||||||
|
self,
|
||||||
|
project: LabelingProject,
|
||||||
|
file_record: DatasetFiles,
|
||||||
|
file_id: str,
|
||||||
|
) -> EditorTaskResponse:
|
||||||
|
return await self._build_media_task(
|
||||||
|
project=project,
|
||||||
|
file_record=file_record,
|
||||||
|
file_id=file_id,
|
||||||
|
default_key=AUDIO_DATA_KEY,
|
||||||
|
categories=MEDIA_OBJECT_CATEGORIES,
|
||||||
|
)
|
||||||
|
|
||||||
|
async def _build_video_task(
|
||||||
|
self,
|
||||||
|
project: LabelingProject,
|
||||||
|
file_record: DatasetFiles,
|
||||||
|
file_id: str,
|
||||||
|
) -> EditorTaskResponse:
|
||||||
|
return await self._build_media_task(
|
||||||
|
project=project,
|
||||||
|
file_record=file_record,
|
||||||
|
file_id=file_id,
|
||||||
|
default_key=VIDEO_DATA_KEY,
|
||||||
|
categories=MEDIA_OBJECT_CATEGORIES,
|
||||||
|
)
|
||||||
|
|
||||||
async def upsert_annotation(self, project_id: str, file_id: str, request: UpsertAnnotationRequest) -> UpsertAnnotationResponse:
|
async def upsert_annotation(self, project_id: str, file_id: str, request: UpsertAnnotationRequest) -> UpsertAnnotationResponse:
|
||||||
project = await self._get_project_or_404(project_id)
|
project = await self._get_project_or_404(project_id)
|
||||||
|
|
||||||
|
|||||||
@@ -95,10 +95,8 @@ DELETE FROM t_dm_annotation_templates WHERE built_in = 1;
|
|||||||
|
|
||||||
-- =============================================
|
-- =============================================
|
||||||
-- 1. 音频/语音处理 (Audio/Speech Processing)
|
-- 1. 音频/语音处理 (Audio/Speech Processing)
|
||||||
-- [已注释] 以下模板暂未调试完成
|
-- 已启用默认模板
|
||||||
-- =============================================
|
-- =============================================
|
||||||
|
|
||||||
/*
|
|
||||||
-- 1.1 自动语音识别(分段)
|
-- 1.1 自动语音识别(分段)
|
||||||
INSERT INTO t_dm_annotation_templates (
|
INSERT INTO t_dm_annotation_templates (
|
||||||
id, name, description, data_type, labeling_type,
|
id, name, description, data_type, labeling_type,
|
||||||
@@ -318,7 +316,6 @@ INSERT INTO t_dm_annotation_templates (
|
|||||||
1,
|
1,
|
||||||
'1.0.0'
|
'1.0.0'
|
||||||
);
|
);
|
||||||
*/
|
|
||||||
|
|
||||||
-- =============================================
|
-- =============================================
|
||||||
-- 2. 聊天评估 (Chat)
|
-- 2. 聊天评估 (Chat)
|
||||||
@@ -1276,10 +1273,8 @@ INSERT INTO t_dm_annotation_templates (
|
|||||||
|
|
||||||
-- =============================================
|
-- =============================================
|
||||||
-- 9. 视频处理 (Videos)
|
-- 9. 视频处理 (Videos)
|
||||||
-- [已注释] 以下模板暂未调试完成
|
-- 已启用默认模板
|
||||||
-- =============================================
|
-- =============================================
|
||||||
|
|
||||||
/*
|
|
||||||
-- 9.1 视频分类
|
-- 9.1 视频分类
|
||||||
INSERT INTO t_dm_annotation_templates (
|
INSERT INTO t_dm_annotation_templates (
|
||||||
id, name, description, data_type, labeling_type,
|
id, name, description, data_type, labeling_type,
|
||||||
@@ -1298,7 +1293,7 @@ INSERT INTO t_dm_annotation_templates (
|
|||||||
),
|
),
|
||||||
'<View>
|
'<View>
|
||||||
<Video name="video" value="$video"/>
|
<Video name="video" value="$video"/>
|
||||||
<Choices name="choice" toName="video">
|
<Choices name="choice" toName="video" showInline="true">
|
||||||
<Choice value="Sports"/>
|
<Choice value="Sports"/>
|
||||||
<Choice value="News"/>
|
<Choice value="News"/>
|
||||||
<Choice value="Entertainment"/>
|
<Choice value="Entertainment"/>
|
||||||
@@ -1323,18 +1318,18 @@ INSERT INTO t_dm_annotation_templates (
|
|||||||
'video-object-tracking',
|
'video-object-tracking',
|
||||||
JSON_OBJECT(
|
JSON_OBJECT(
|
||||||
'labels', JSON_ARRAY(
|
'labels', JSON_ARRAY(
|
||||||
JSON_OBJECT('fromName', 'label', 'toName', 'video', 'type', 'VideoRectangle', 'labels', JSON_ARRAY('Person', 'Car', 'Object'))
|
JSON_OBJECT('fromName', 'videoLabels', 'toName', 'video', 'type', 'Labels', 'labels', JSON_ARRAY('Man', 'Woman', 'Other'))
|
||||||
),
|
),
|
||||||
'objects', JSON_ARRAY(JSON_OBJECT('name', 'video', 'type', 'Video', 'value', '$video'))
|
'objects', JSON_ARRAY(JSON_OBJECT('name', 'video', 'type', 'Video', 'value', '$video'))
|
||||||
),
|
),
|
||||||
'<View>
|
'<View>
|
||||||
<Labels name="label" toName="video">
|
<Labels name="videoLabels" toName="video" allowEmpty="true">
|
||||||
<Label value="Person" background="red"/>
|
<Label value="Man" background="blue"/>
|
||||||
<Label value="Car" background="blue"/>
|
<Label value="Woman" background="red"/>
|
||||||
<Label value="Object" background="green"/>
|
<Label value="Other" background="green"/>
|
||||||
</Labels>
|
</Labels>
|
||||||
<VideoRectangle name="box" toName="video"/>
|
<Video name="video" value="$video" framerate="25.0"/>
|
||||||
<Video name="video" value="$video"/>
|
<VideoRectangle name="box" toName="video" />
|
||||||
</View>',
|
</View>',
|
||||||
'horizontal',
|
'horizontal',
|
||||||
'video',
|
'video',
|
||||||
@@ -1354,24 +1349,28 @@ INSERT INTO t_dm_annotation_templates (
|
|||||||
'video-timeline-segmentation',
|
'video-timeline-segmentation',
|
||||||
JSON_OBJECT(
|
JSON_OBJECT(
|
||||||
'labels', JSON_ARRAY(
|
'labels', JSON_ARRAY(
|
||||||
JSON_OBJECT('fromName', 'label', 'toName', 'video', 'type', 'Labels', 'labels', JSON_ARRAY('Intro', 'Content', 'Outro'))
|
JSON_OBJECT('fromName', 'tricks', 'toName', 'audio', 'type', 'Labels', 'labels', JSON_ARRAY('Intro', 'Content', 'Outro'))
|
||||||
),
|
),
|
||||||
'objects', JSON_ARRAY(JSON_OBJECT('name', 'video', 'type', 'Video', 'value', '$video'))
|
'objects', JSON_ARRAY(
|
||||||
|
JSON_OBJECT('name', 'video', 'type', 'Video', 'value', '$video_url'),
|
||||||
|
JSON_OBJECT('name', 'audio', 'type', 'Audio', 'value', '$video_url')
|
||||||
|
)
|
||||||
),
|
),
|
||||||
'<View>
|
'<View>
|
||||||
<Labels name="label" toName="video">
|
<Header value="视频时间线分割"/>
|
||||||
<Label value="Intro" background="blue"/>
|
<Video name="video" value="$video_url" sync="audio"/>
|
||||||
<Label value="Content" background="green"/>
|
<Labels name="tricks" toName="audio" choice="multiple">
|
||||||
<Label value="Outro" background="orange"/>
|
<Label value="Intro" background="#358EF3"/>
|
||||||
|
<Label value="Content" background="#1BB500"/>
|
||||||
|
<Label value="Outro" background="#FFA91D"/>
|
||||||
</Labels>
|
</Labels>
|
||||||
<Video name="video" value="$video"/>
|
<Audio name="audio" value="$video_url" sync="video" speed="false"/>
|
||||||
</View>',
|
</View>',
|
||||||
'horizontal',
|
'horizontal',
|
||||||
'video',
|
'video',
|
||||||
1,
|
1,
|
||||||
'1.0.0'
|
'1.0.0'
|
||||||
);
|
);
|
||||||
*/
|
|
||||||
|
|
||||||
-- =============================================
|
-- =============================================
|
||||||
-- 10. 对话AI (Conversational AI)
|
-- 10. 对话AI (Conversational AI)
|
||||||
|
|||||||
Reference in New Issue
Block a user