You've already forked DataMate
feat(annotation): 添加文本数据集段落切片功能
- 在前端组件中新增 segmentationEnabled 字段控制切片开关 - 为文本数据集添加段落切片配置选项,默认启用切片功能 - 在后端接口中新增 segmentation_enabled 参数传递给标注项目 - 实现切片逻辑控制,支持文本数据的自动段落分割 - 添加数据集类型判断,仅文本数据集支持切片配置 - 更新标注任务创建和编辑表单中的切片相关字段处理
This commit is contained in:
@@ -1,8 +1,10 @@
|
||||
import { queryDatasetsUsingGet, queryDatasetFilesUsingGet } from "@/pages/DataManagement/dataset.api";
|
||||
import { mapDataset } from "@/pages/DataManagement/dataset.const";
|
||||
import { App, Button, Form, Input, Modal, Select, Radio, Table } from "antd";
|
||||
import type { RadioChangeEvent } from "antd";
|
||||
import TextArea from "antd/es/input/TextArea";
|
||||
import { useEffect, useState } from "react";
|
||||
import { useEffect, useMemo, useState } from "react";
|
||||
import type { ReactNode } from "react";
|
||||
import { Eye } from "lucide-react";
|
||||
import {
|
||||
createAnnotationTaskUsingPost,
|
||||
@@ -10,7 +12,7 @@ import {
|
||||
updateAnnotationTaskByIdUsingPut,
|
||||
queryAnnotationTemplatesUsingGet,
|
||||
} from "../../annotation.api";
|
||||
import { type Dataset } from "@/pages/DataManagement/dataset.model";
|
||||
import { DatasetType, type Dataset } from "@/pages/DataManagement/dataset.model";
|
||||
import type { AnnotationTemplate, AnnotationTask } from "../../annotation.model";
|
||||
import LabelStudioEmbed from "@/components/business/LabelStudioEmbed";
|
||||
import TemplateConfigurationTreeEditor from "../../components/TemplateConfigurationTreeEditor";
|
||||
@@ -24,6 +26,38 @@ interface AnnotationTaskDialogProps {
|
||||
editTask?: AnnotationTask | null;
|
||||
}
|
||||
|
||||
type DatasetOption = Dataset & { icon?: ReactNode };
|
||||
|
||||
type DatasetPreviewFile = {
|
||||
id: string;
|
||||
fileName?: string;
|
||||
fileSize?: number;
|
||||
};
|
||||
|
||||
type AnnotationTaskDetail = {
|
||||
name?: string;
|
||||
description?: string;
|
||||
datasetId?: string;
|
||||
labelConfig?: string;
|
||||
template?: { labelConfig?: string };
|
||||
segmentationEnabled?: boolean;
|
||||
};
|
||||
|
||||
type ApiResponse<T> = {
|
||||
code?: number;
|
||||
message?: string;
|
||||
data?: T;
|
||||
};
|
||||
|
||||
const isRecord = (value: unknown): value is Record<string, unknown> =>
|
||||
!!value && typeof value === "object" && !Array.isArray(value);
|
||||
|
||||
const DEFAULT_SEGMENTATION_ENABLED = true;
|
||||
const SEGMENTATION_OPTIONS = [
|
||||
{ label: "需要切片段", value: true },
|
||||
{ label: "不需要切片段", value: false },
|
||||
];
|
||||
|
||||
export default function CreateAnnotationTask({
|
||||
open,
|
||||
onClose,
|
||||
@@ -33,19 +67,19 @@ export default function CreateAnnotationTask({
|
||||
const isEditMode = !!editTask;
|
||||
const { message } = App.useApp();
|
||||
const [manualForm] = Form.useForm();
|
||||
const [datasets, setDatasets] = useState<Dataset[]>([]);
|
||||
const [datasets, setDatasets] = useState<DatasetOption[]>([]);
|
||||
const [templates, setTemplates] = useState<AnnotationTemplate[]>([]);
|
||||
const [submitting, setSubmitting] = useState(false);
|
||||
const [nameManuallyEdited, setNameManuallyEdited] = useState(false);
|
||||
|
||||
const [labelConfig, setLabelConfig] = useState("");
|
||||
const [showPreview, setShowPreview] = useState(false);
|
||||
const [previewTaskData, setPreviewTaskData] = useState<Record<string, any>>({});
|
||||
const [previewTaskData, setPreviewTaskData] = useState<Record<string, unknown>>({});
|
||||
const [configMode, setConfigMode] = useState<"template" | "custom">("template");
|
||||
|
||||
// 数据集预览相关状态
|
||||
const [datasetPreviewVisible, setDatasetPreviewVisible] = useState(false);
|
||||
const [datasetPreviewData, setDatasetPreviewData] = useState<any[]>([]);
|
||||
const [datasetPreviewData, setDatasetPreviewData] = useState<DatasetPreviewFile[]>([]);
|
||||
const [datasetPreviewLoading, setDatasetPreviewLoading] = useState(false);
|
||||
const [selectedDatasetId, setSelectedDatasetId] = useState<string | null>(null);
|
||||
|
||||
@@ -61,6 +95,12 @@ export default function CreateAnnotationTask({
|
||||
const [taskDetailLoading, setTaskDetailLoading] = useState(false);
|
||||
const { config: tagConfig } = useTagConfig(false);
|
||||
|
||||
const selectedDataset = useMemo(
|
||||
() => datasets.find((dataset) => dataset.id === selectedDatasetId),
|
||||
[datasets, selectedDatasetId]
|
||||
);
|
||||
const isTextDataset = selectedDataset?.datasetType === DatasetType.TEXT;
|
||||
|
||||
useEffect(() => {
|
||||
if (!open) return;
|
||||
const fetchData = async () => {
|
||||
@@ -107,7 +147,7 @@ export default function CreateAnnotationTask({
|
||||
// 编辑模式:加载任务详情
|
||||
setTaskDetailLoading(true);
|
||||
getAnnotationTaskByIdUsingGet(editTask.id)
|
||||
.then((res: any) => {
|
||||
.then((res: ApiResponse<AnnotationTaskDetail>) => {
|
||||
if (res.code === 200 && res.data) {
|
||||
const taskDetail = res.data;
|
||||
// 填充基本信息
|
||||
@@ -115,8 +155,13 @@ export default function CreateAnnotationTask({
|
||||
name: taskDetail.name,
|
||||
description: taskDetail.description,
|
||||
datasetId: taskDetail.datasetId,
|
||||
segmentationEnabled: typeof taskDetail.segmentationEnabled === "boolean"
|
||||
? taskDetail.segmentationEnabled
|
||||
: DEFAULT_SEGMENTATION_ENABLED,
|
||||
});
|
||||
setSelectedDatasetId(taskDetail.datasetId);
|
||||
if (taskDetail.datasetId) {
|
||||
setSelectedDatasetId(taskDetail.datasetId);
|
||||
}
|
||||
|
||||
// 获取实际的 labelConfig(优先使用任务自身的配置,回退到模板配置)
|
||||
const configXml = taskDetail.labelConfig || taskDetail.template?.labelConfig;
|
||||
@@ -140,6 +185,9 @@ export default function CreateAnnotationTask({
|
||||
// 创建模式:重置为默认状态
|
||||
setConfigMode("template");
|
||||
setSelectedDatasetId(null);
|
||||
manualForm.setFieldsValue({
|
||||
segmentationEnabled: DEFAULT_SEGMENTATION_ENABLED,
|
||||
});
|
||||
}
|
||||
}
|
||||
}, [open, manualForm, isEditMode, editTask, message]);
|
||||
@@ -154,7 +202,7 @@ export default function CreateAnnotationTask({
|
||||
try {
|
||||
const res = await queryDatasetFilesUsingGet(selectedDatasetId, { page: 0, size: 10 });
|
||||
if (res.code === '0' && res.data) {
|
||||
setDatasetPreviewData(res.data.content || []);
|
||||
setDatasetPreviewData((res.data.content || []) as DatasetPreviewFile[]);
|
||||
setDatasetPreviewVisible(true);
|
||||
} else {
|
||||
message.error("获取数据集预览失败");
|
||||
@@ -168,7 +216,7 @@ export default function CreateAnnotationTask({
|
||||
};
|
||||
|
||||
// 预览文件内容
|
||||
const handlePreviewFileContent = async (file: any) => {
|
||||
const handlePreviewFileContent = async (file: DatasetPreviewFile) => {
|
||||
const fileName = file.fileName?.toLowerCase() || '';
|
||||
|
||||
// 文件类型扩展名映射
|
||||
@@ -318,7 +366,7 @@ export default function CreateAnnotationTask({
|
||||
};
|
||||
|
||||
const generatePreviewTaskDataFromLabelConfig = (xml: string) => {
|
||||
const exampleDataByType: Record<string, any> = {
|
||||
const exampleDataByType: Record<string, unknown> = {
|
||||
Image: "https://labelstud.io/images/opa-header.png",
|
||||
Audio: "https://labelstud.io/files/sample.wav",
|
||||
AudioPlus: "https://labelstud.io/files/sample.wav",
|
||||
@@ -350,7 +398,7 @@ export default function CreateAnnotationTask({
|
||||
};
|
||||
}
|
||||
|
||||
const data: Record<string, any> = {};
|
||||
const data: Record<string, unknown> = {};
|
||||
objects.forEach((obj) => {
|
||||
const name = obj.name || "";
|
||||
const value = obj.value || "";
|
||||
@@ -380,14 +428,16 @@ export default function CreateAnnotationTask({
|
||||
};
|
||||
|
||||
// 当选择模板时,加载 XML 配置到树编辑器(仅快速填充)
|
||||
const handleTemplateSelect = (value: string, option: any) => {
|
||||
const handleTemplateSelect = (value: string, option: unknown) => {
|
||||
if (!value) {
|
||||
setLabelConfig("");
|
||||
return;
|
||||
}
|
||||
|
||||
const selectedTemplate = templates.find((template) => template.id === value);
|
||||
const configXml = selectedTemplate?.labelConfig || option?.config || "";
|
||||
const configXml = selectedTemplate?.labelConfig
|
||||
|| (isRecord(option) && typeof option.config === "string" ? option.config : "")
|
||||
|| "";
|
||||
setLabelConfig(configXml);
|
||||
};
|
||||
|
||||
@@ -437,6 +487,10 @@ export default function CreateAnnotationTask({
|
||||
templateId: configMode === "template" ? values.templateId : undefined,
|
||||
labelConfig: labelConfig.trim(),
|
||||
};
|
||||
if (!isEditMode && isTextDataset) {
|
||||
requestData.segmentationEnabled =
|
||||
values.segmentationEnabled ?? DEFAULT_SEGMENTATION_ENABLED;
|
||||
}
|
||||
|
||||
if (isEditMode && editTask) {
|
||||
// 编辑模式:调用更新接口
|
||||
@@ -449,16 +503,17 @@ export default function CreateAnnotationTask({
|
||||
}
|
||||
onClose();
|
||||
onRefresh();
|
||||
} catch (err: any) {
|
||||
} catch (err: unknown) {
|
||||
console.error(isEditMode ? "Update annotation task failed" : "Create annotation task failed", err);
|
||||
const msg = err?.message || err?.data?.message || (isEditMode ? "更新失败,请稍后重试" : "创建失败,请稍后重试");
|
||||
const error = err as { message?: string; data?: { message?: string } };
|
||||
const msg = error?.message || error?.data?.message || (isEditMode ? "更新失败,请稍后重试" : "创建失败,请稍后重试");
|
||||
message.error(msg);
|
||||
} finally {
|
||||
setSubmitting(false);
|
||||
}
|
||||
};
|
||||
|
||||
const handleConfigModeChange = (e: any) => {
|
||||
const handleConfigModeChange = (e: RadioChangeEvent) => {
|
||||
const mode = e.target.value;
|
||||
setConfigMode(mode);
|
||||
if (mode === "custom") {
|
||||
@@ -521,7 +576,7 @@ export default function CreateAnnotationTask({
|
||||
label: (
|
||||
<div className="flex items-center justify-between gap-3 py-2">
|
||||
<div className="flex items-center font-sm text-gray-900">
|
||||
<span className="mr-2">{(dataset as any).icon}</span>
|
||||
<span className="mr-2">{dataset.icon}</span>
|
||||
<span>{dataset.name}</span>
|
||||
</div>
|
||||
<div className="text-xs text-gray-500">{dataset.size}</div>
|
||||
@@ -532,6 +587,17 @@ export default function CreateAnnotationTask({
|
||||
})}
|
||||
onChange={(value) => {
|
||||
setSelectedDatasetId(value);
|
||||
const dataset = datasets.find((item) => item.id === value);
|
||||
if (dataset?.datasetType === DatasetType.TEXT) {
|
||||
const currentValue = manualForm.getFieldValue("segmentationEnabled");
|
||||
if (currentValue === undefined) {
|
||||
manualForm.setFieldsValue({
|
||||
segmentationEnabled: DEFAULT_SEGMENTATION_ENABLED,
|
||||
});
|
||||
}
|
||||
} else if (dataset) {
|
||||
manualForm.setFieldsValue({ segmentationEnabled: false });
|
||||
}
|
||||
// 如果用户未手动修改名称,则用数据集名称作为默认任务名
|
||||
if (!nameManuallyEdited) {
|
||||
const ds = datasets.find((d) => d.id === value);
|
||||
@@ -578,6 +644,28 @@ export default function CreateAnnotationTask({
|
||||
<TextArea placeholder="(可选)详细描述标注任务的要求和目标" rows={2} />
|
||||
</Form.Item>
|
||||
|
||||
<Form.Item
|
||||
label="段落切片"
|
||||
name="segmentationEnabled"
|
||||
initialValue={DEFAULT_SEGMENTATION_ENABLED}
|
||||
extra={
|
||||
isEditMode
|
||||
? "编辑模式暂不支持修改"
|
||||
: !selectedDatasetId
|
||||
? "请选择数据集后配置"
|
||||
: isTextDataset
|
||||
? "仅文本数据集可配置该项"
|
||||
: "非文本数据集不支持切片段"
|
||||
}
|
||||
>
|
||||
<Radio.Group
|
||||
options={SEGMENTATION_OPTIONS}
|
||||
optionType="button"
|
||||
buttonStyle="solid"
|
||||
disabled={!isTextDataset || isEditMode}
|
||||
/>
|
||||
</Form.Item>
|
||||
|
||||
{/* 标注模板选择 */}
|
||||
<div className="flex items-center justify-between mb-2">
|
||||
<span className="text-sm font-medium text-gray-700 after:content-['*'] after:text-red-500 after:ml-1">标注配置</span>
|
||||
@@ -721,7 +809,7 @@ export default function CreateAnnotationTask({
|
||||
dataIndex: "fileName",
|
||||
key: "fileName",
|
||||
ellipsis: true,
|
||||
render: (text: string, record: any) => (
|
||||
render: (text: string, record: DatasetPreviewFile) => (
|
||||
<Button
|
||||
type="link"
|
||||
size="small"
|
||||
|
||||
Reference in New Issue
Block a user