You've already forked DataMate
feat(annotation): 添加文本数据集段落切片功能
- 在前端组件中新增 segmentationEnabled 字段控制切片开关 - 为文本数据集添加段落切片配置选项,默认启用切片功能 - 在后端接口中新增 segmentation_enabled 参数传递给标注项目 - 实现切片逻辑控制,支持文本数据的自动段落分割 - 添加数据集类型判断,仅文本数据集支持切片配置 - 更新标注任务创建和编辑表单中的切片相关字段处理
This commit is contained in:
@@ -824,7 +824,7 @@ export default function LabelStudioTextEditor() {
|
|||||||
/>
|
/>
|
||||||
</div>
|
</div>
|
||||||
{segmented && (
|
{segmented && (
|
||||||
<div className="border-t border-gray-200 bg-white flex flex-col min-h-0">
|
<div className="flex-1 border-t border-gray-200 bg-white flex flex-col min-h-0">
|
||||||
<div className="px-3 py-2 border-b border-gray-200 bg-gray-50 font-medium text-sm flex items-center justify-between">
|
<div className="px-3 py-2 border-b border-gray-200 bg-gray-50 font-medium text-sm flex items-center justify-between">
|
||||||
<span>段落/分段</span>
|
<span>段落/分段</span>
|
||||||
<Tag color="blue" style={{ margin: 0 }}>
|
<Tag color="blue" style={{ margin: 0 }}>
|
||||||
|
|||||||
@@ -1,12 +1,13 @@
|
|||||||
import { useEffect, useState } from "react";
|
import { useEffect, useMemo, useState } from "react";
|
||||||
import { Button, Input, Select, Form, message, Radio } from "antd";
|
import { Button, Input, Select, Form, message, Radio } from "antd";
|
||||||
|
import type { RadioChangeEvent } from "antd";
|
||||||
import TextArea from "antd/es/input/TextArea";
|
import TextArea from "antd/es/input/TextArea";
|
||||||
import { DatabaseOutlined } from "@ant-design/icons";
|
import { DatabaseOutlined } from "@ant-design/icons";
|
||||||
import { Link, useNavigate } from "react-router";
|
import { Link, useNavigate } from "react-router";
|
||||||
import { ArrowLeft } from "lucide-react";
|
import { ArrowLeft } from "lucide-react";
|
||||||
import { queryDatasetsUsingGet } from "../../DataManagement/dataset.api";
|
import { queryDatasetsUsingGet } from "../../DataManagement/dataset.api";
|
||||||
import { mapDataset } from "@/pages/DataManagement/dataset.const";
|
import { mapDataset } from "@/pages/DataManagement/dataset.const";
|
||||||
import type { Dataset } from "@/pages/DataManagement/dataset.model";
|
import { DatasetType, type Dataset } from "@/pages/DataManagement/dataset.model";
|
||||||
import {
|
import {
|
||||||
createAnnotationTaskUsingPost,
|
createAnnotationTaskUsingPost,
|
||||||
queryAnnotationTemplatesUsingGet,
|
queryAnnotationTemplatesUsingGet,
|
||||||
@@ -14,20 +15,33 @@ import {
|
|||||||
import type { AnnotationTemplate } from "../annotation.model";
|
import type { AnnotationTemplate } from "../annotation.model";
|
||||||
import TemplateConfigurationTreeEditor from "../components/TemplateConfigurationTreeEditor";
|
import TemplateConfigurationTreeEditor from "../components/TemplateConfigurationTreeEditor";
|
||||||
|
|
||||||
|
const DEFAULT_SEGMENTATION_ENABLED = true;
|
||||||
|
const SEGMENTATION_OPTIONS = [
|
||||||
|
{ label: "需要切片段", value: true },
|
||||||
|
{ label: "不需要切片段", value: false },
|
||||||
|
];
|
||||||
|
|
||||||
export default function AnnotationTaskCreate() {
|
export default function AnnotationTaskCreate() {
|
||||||
const navigate = useNavigate();
|
const navigate = useNavigate();
|
||||||
const [form] = Form.useForm();
|
const [form] = Form.useForm();
|
||||||
const [datasets, setDatasets] = useState<Dataset[]>([]);
|
const [datasets, setDatasets] = useState<Dataset[]>([]);
|
||||||
const [templates, setTemplates] = useState<AnnotationTemplate[]>([]);
|
const [templates, setTemplates] = useState<AnnotationTemplate[]>([]);
|
||||||
|
const [selectedDatasetId, setSelectedDatasetId] = useState<string | null>(null);
|
||||||
const [labelConfig, setLabelConfig] = useState("");
|
const [labelConfig, setLabelConfig] = useState("");
|
||||||
const [configMode, setConfigMode] = useState<"template" | "custom">("template");
|
const [configMode, setConfigMode] = useState<"template" | "custom">("template");
|
||||||
const [submitting, setSubmitting] = useState(false);
|
const [submitting, setSubmitting] = useState(false);
|
||||||
|
|
||||||
|
const selectedDataset = useMemo(
|
||||||
|
() => datasets.find((dataset) => dataset.id === selectedDatasetId),
|
||||||
|
[datasets, selectedDatasetId]
|
||||||
|
);
|
||||||
|
const isTextDataset = selectedDataset?.datasetType === DatasetType.TEXT;
|
||||||
|
|
||||||
const fetchDatasets = async () => {
|
const fetchDatasets = async () => {
|
||||||
try {
|
try {
|
||||||
const { data } = await queryDatasetsUsingGet({ page: 0, pageSize: 1000 });
|
const { data } = await queryDatasetsUsingGet({ page: 0, pageSize: 1000 });
|
||||||
const list = data?.content || [];
|
const list = data?.content || [];
|
||||||
setDatasets(list.map((item: any) => mapDataset(item)) || []);
|
setDatasets(list.map((item) => mapDataset(item)) || []);
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error("加载数据集失败:", error);
|
console.error("加载数据集失败:", error);
|
||||||
message.error("加载数据集失败");
|
message.error("加载数据集失败");
|
||||||
@@ -62,7 +76,7 @@ export default function AnnotationTaskCreate() {
|
|||||||
setLabelConfig(selectedTemplate?.labelConfig || "");
|
setLabelConfig(selectedTemplate?.labelConfig || "");
|
||||||
};
|
};
|
||||||
|
|
||||||
const handleConfigModeChange = (e: any) => {
|
const handleConfigModeChange = (e: RadioChangeEvent) => {
|
||||||
const mode = e.target.value;
|
const mode = e.target.value;
|
||||||
setConfigMode(mode);
|
setConfigMode(mode);
|
||||||
if (mode === "custom") {
|
if (mode === "custom") {
|
||||||
@@ -79,20 +93,26 @@ export default function AnnotationTaskCreate() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
setSubmitting(true);
|
setSubmitting(true);
|
||||||
await createAnnotationTaskUsingPost({
|
const requestData: Record<string, unknown> = {
|
||||||
name: values.name,
|
name: values.name,
|
||||||
description: values.description,
|
description: values.description,
|
||||||
datasetId: values.datasetId,
|
datasetId: values.datasetId,
|
||||||
templateId: configMode === "template" ? values.templateId : undefined,
|
templateId: configMode === "template" ? values.templateId : undefined,
|
||||||
labelConfig: labelConfig.trim(),
|
labelConfig: labelConfig.trim(),
|
||||||
});
|
};
|
||||||
|
if (isTextDataset) {
|
||||||
|
requestData.segmentationEnabled =
|
||||||
|
values.segmentationEnabled ?? DEFAULT_SEGMENTATION_ENABLED;
|
||||||
|
}
|
||||||
|
await createAnnotationTaskUsingPost(requestData);
|
||||||
message.success("标注任务创建成功");
|
message.success("标注任务创建成功");
|
||||||
navigate("/data/annotation");
|
navigate("/data/annotation");
|
||||||
} catch (error: any) {
|
} catch (error: unknown) {
|
||||||
if (error?.errorFields) {
|
const err = error as { errorFields?: unknown; message?: string; data?: { message?: string } };
|
||||||
|
if (err?.errorFields) {
|
||||||
message.error("请完善必填信息");
|
message.error("请完善必填信息");
|
||||||
} else {
|
} else {
|
||||||
const msg = error?.message || error?.data?.message || "创建失败,请稍后重试";
|
const msg = err?.message || err?.data?.message || "创建失败,请稍后重试";
|
||||||
message.error(msg);
|
message.error(msg);
|
||||||
console.error(error);
|
console.error(error);
|
||||||
}
|
}
|
||||||
@@ -149,6 +169,40 @@ export default function AnnotationTaskCreate() {
|
|||||||
),
|
),
|
||||||
value: dataset.id,
|
value: dataset.id,
|
||||||
}))}
|
}))}
|
||||||
|
onChange={(value) => {
|
||||||
|
setSelectedDatasetId(value);
|
||||||
|
const dataset = datasets.find((item) => item.id === value);
|
||||||
|
if (dataset?.datasetType === DatasetType.TEXT) {
|
||||||
|
const currentValue = form.getFieldValue("segmentationEnabled");
|
||||||
|
if (currentValue === undefined) {
|
||||||
|
form.setFieldsValue({
|
||||||
|
segmentationEnabled: DEFAULT_SEGMENTATION_ENABLED,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
} else if (dataset) {
|
||||||
|
form.setFieldsValue({ segmentationEnabled: false });
|
||||||
|
}
|
||||||
|
}}
|
||||||
|
/>
|
||||||
|
</Form.Item>
|
||||||
|
|
||||||
|
<Form.Item
|
||||||
|
label="段落切片"
|
||||||
|
name="segmentationEnabled"
|
||||||
|
initialValue={DEFAULT_SEGMENTATION_ENABLED}
|
||||||
|
extra={
|
||||||
|
!selectedDatasetId
|
||||||
|
? "请选择数据集后配置"
|
||||||
|
: isTextDataset
|
||||||
|
? "仅文本数据集可配置该项"
|
||||||
|
: "非文本数据集不支持切片段"
|
||||||
|
}
|
||||||
|
>
|
||||||
|
<Radio.Group
|
||||||
|
options={SEGMENTATION_OPTIONS}
|
||||||
|
optionType="button"
|
||||||
|
buttonStyle="solid"
|
||||||
|
disabled={!isTextDataset}
|
||||||
/>
|
/>
|
||||||
</Form.Item>
|
</Form.Item>
|
||||||
|
|
||||||
|
|||||||
@@ -1,8 +1,10 @@
|
|||||||
import { queryDatasetsUsingGet, queryDatasetFilesUsingGet } from "@/pages/DataManagement/dataset.api";
|
import { queryDatasetsUsingGet, queryDatasetFilesUsingGet } from "@/pages/DataManagement/dataset.api";
|
||||||
import { mapDataset } from "@/pages/DataManagement/dataset.const";
|
import { mapDataset } from "@/pages/DataManagement/dataset.const";
|
||||||
import { App, Button, Form, Input, Modal, Select, Radio, Table } from "antd";
|
import { App, Button, Form, Input, Modal, Select, Radio, Table } from "antd";
|
||||||
|
import type { RadioChangeEvent } from "antd";
|
||||||
import TextArea from "antd/es/input/TextArea";
|
import TextArea from "antd/es/input/TextArea";
|
||||||
import { useEffect, useState } from "react";
|
import { useEffect, useMemo, useState } from "react";
|
||||||
|
import type { ReactNode } from "react";
|
||||||
import { Eye } from "lucide-react";
|
import { Eye } from "lucide-react";
|
||||||
import {
|
import {
|
||||||
createAnnotationTaskUsingPost,
|
createAnnotationTaskUsingPost,
|
||||||
@@ -10,7 +12,7 @@ import {
|
|||||||
updateAnnotationTaskByIdUsingPut,
|
updateAnnotationTaskByIdUsingPut,
|
||||||
queryAnnotationTemplatesUsingGet,
|
queryAnnotationTemplatesUsingGet,
|
||||||
} from "../../annotation.api";
|
} from "../../annotation.api";
|
||||||
import { type Dataset } from "@/pages/DataManagement/dataset.model";
|
import { DatasetType, type Dataset } from "@/pages/DataManagement/dataset.model";
|
||||||
import type { AnnotationTemplate, AnnotationTask } from "../../annotation.model";
|
import type { AnnotationTemplate, AnnotationTask } from "../../annotation.model";
|
||||||
import LabelStudioEmbed from "@/components/business/LabelStudioEmbed";
|
import LabelStudioEmbed from "@/components/business/LabelStudioEmbed";
|
||||||
import TemplateConfigurationTreeEditor from "../../components/TemplateConfigurationTreeEditor";
|
import TemplateConfigurationTreeEditor from "../../components/TemplateConfigurationTreeEditor";
|
||||||
@@ -24,6 +26,38 @@ interface AnnotationTaskDialogProps {
|
|||||||
editTask?: AnnotationTask | null;
|
editTask?: AnnotationTask | null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type DatasetOption = Dataset & { icon?: ReactNode };
|
||||||
|
|
||||||
|
type DatasetPreviewFile = {
|
||||||
|
id: string;
|
||||||
|
fileName?: string;
|
||||||
|
fileSize?: number;
|
||||||
|
};
|
||||||
|
|
||||||
|
type AnnotationTaskDetail = {
|
||||||
|
name?: string;
|
||||||
|
description?: string;
|
||||||
|
datasetId?: string;
|
||||||
|
labelConfig?: string;
|
||||||
|
template?: { labelConfig?: string };
|
||||||
|
segmentationEnabled?: boolean;
|
||||||
|
};
|
||||||
|
|
||||||
|
type ApiResponse<T> = {
|
||||||
|
code?: number;
|
||||||
|
message?: string;
|
||||||
|
data?: T;
|
||||||
|
};
|
||||||
|
|
||||||
|
const isRecord = (value: unknown): value is Record<string, unknown> =>
|
||||||
|
!!value && typeof value === "object" && !Array.isArray(value);
|
||||||
|
|
||||||
|
const DEFAULT_SEGMENTATION_ENABLED = true;
|
||||||
|
const SEGMENTATION_OPTIONS = [
|
||||||
|
{ label: "需要切片段", value: true },
|
||||||
|
{ label: "不需要切片段", value: false },
|
||||||
|
];
|
||||||
|
|
||||||
export default function CreateAnnotationTask({
|
export default function CreateAnnotationTask({
|
||||||
open,
|
open,
|
||||||
onClose,
|
onClose,
|
||||||
@@ -33,19 +67,19 @@ export default function CreateAnnotationTask({
|
|||||||
const isEditMode = !!editTask;
|
const isEditMode = !!editTask;
|
||||||
const { message } = App.useApp();
|
const { message } = App.useApp();
|
||||||
const [manualForm] = Form.useForm();
|
const [manualForm] = Form.useForm();
|
||||||
const [datasets, setDatasets] = useState<Dataset[]>([]);
|
const [datasets, setDatasets] = useState<DatasetOption[]>([]);
|
||||||
const [templates, setTemplates] = useState<AnnotationTemplate[]>([]);
|
const [templates, setTemplates] = useState<AnnotationTemplate[]>([]);
|
||||||
const [submitting, setSubmitting] = useState(false);
|
const [submitting, setSubmitting] = useState(false);
|
||||||
const [nameManuallyEdited, setNameManuallyEdited] = useState(false);
|
const [nameManuallyEdited, setNameManuallyEdited] = useState(false);
|
||||||
|
|
||||||
const [labelConfig, setLabelConfig] = useState("");
|
const [labelConfig, setLabelConfig] = useState("");
|
||||||
const [showPreview, setShowPreview] = useState(false);
|
const [showPreview, setShowPreview] = useState(false);
|
||||||
const [previewTaskData, setPreviewTaskData] = useState<Record<string, any>>({});
|
const [previewTaskData, setPreviewTaskData] = useState<Record<string, unknown>>({});
|
||||||
const [configMode, setConfigMode] = useState<"template" | "custom">("template");
|
const [configMode, setConfigMode] = useState<"template" | "custom">("template");
|
||||||
|
|
||||||
// 数据集预览相关状态
|
// 数据集预览相关状态
|
||||||
const [datasetPreviewVisible, setDatasetPreviewVisible] = useState(false);
|
const [datasetPreviewVisible, setDatasetPreviewVisible] = useState(false);
|
||||||
const [datasetPreviewData, setDatasetPreviewData] = useState<any[]>([]);
|
const [datasetPreviewData, setDatasetPreviewData] = useState<DatasetPreviewFile[]>([]);
|
||||||
const [datasetPreviewLoading, setDatasetPreviewLoading] = useState(false);
|
const [datasetPreviewLoading, setDatasetPreviewLoading] = useState(false);
|
||||||
const [selectedDatasetId, setSelectedDatasetId] = useState<string | null>(null);
|
const [selectedDatasetId, setSelectedDatasetId] = useState<string | null>(null);
|
||||||
|
|
||||||
@@ -61,6 +95,12 @@ export default function CreateAnnotationTask({
|
|||||||
const [taskDetailLoading, setTaskDetailLoading] = useState(false);
|
const [taskDetailLoading, setTaskDetailLoading] = useState(false);
|
||||||
const { config: tagConfig } = useTagConfig(false);
|
const { config: tagConfig } = useTagConfig(false);
|
||||||
|
|
||||||
|
const selectedDataset = useMemo(
|
||||||
|
() => datasets.find((dataset) => dataset.id === selectedDatasetId),
|
||||||
|
[datasets, selectedDatasetId]
|
||||||
|
);
|
||||||
|
const isTextDataset = selectedDataset?.datasetType === DatasetType.TEXT;
|
||||||
|
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
if (!open) return;
|
if (!open) return;
|
||||||
const fetchData = async () => {
|
const fetchData = async () => {
|
||||||
@@ -107,7 +147,7 @@ export default function CreateAnnotationTask({
|
|||||||
// 编辑模式:加载任务详情
|
// 编辑模式:加载任务详情
|
||||||
setTaskDetailLoading(true);
|
setTaskDetailLoading(true);
|
||||||
getAnnotationTaskByIdUsingGet(editTask.id)
|
getAnnotationTaskByIdUsingGet(editTask.id)
|
||||||
.then((res: any) => {
|
.then((res: ApiResponse<AnnotationTaskDetail>) => {
|
||||||
if (res.code === 200 && res.data) {
|
if (res.code === 200 && res.data) {
|
||||||
const taskDetail = res.data;
|
const taskDetail = res.data;
|
||||||
// 填充基本信息
|
// 填充基本信息
|
||||||
@@ -115,8 +155,13 @@ export default function CreateAnnotationTask({
|
|||||||
name: taskDetail.name,
|
name: taskDetail.name,
|
||||||
description: taskDetail.description,
|
description: taskDetail.description,
|
||||||
datasetId: taskDetail.datasetId,
|
datasetId: taskDetail.datasetId,
|
||||||
|
segmentationEnabled: typeof taskDetail.segmentationEnabled === "boolean"
|
||||||
|
? taskDetail.segmentationEnabled
|
||||||
|
: DEFAULT_SEGMENTATION_ENABLED,
|
||||||
});
|
});
|
||||||
setSelectedDatasetId(taskDetail.datasetId);
|
if (taskDetail.datasetId) {
|
||||||
|
setSelectedDatasetId(taskDetail.datasetId);
|
||||||
|
}
|
||||||
|
|
||||||
// 获取实际的 labelConfig(优先使用任务自身的配置,回退到模板配置)
|
// 获取实际的 labelConfig(优先使用任务自身的配置,回退到模板配置)
|
||||||
const configXml = taskDetail.labelConfig || taskDetail.template?.labelConfig;
|
const configXml = taskDetail.labelConfig || taskDetail.template?.labelConfig;
|
||||||
@@ -140,6 +185,9 @@ export default function CreateAnnotationTask({
|
|||||||
// 创建模式:重置为默认状态
|
// 创建模式:重置为默认状态
|
||||||
setConfigMode("template");
|
setConfigMode("template");
|
||||||
setSelectedDatasetId(null);
|
setSelectedDatasetId(null);
|
||||||
|
manualForm.setFieldsValue({
|
||||||
|
segmentationEnabled: DEFAULT_SEGMENTATION_ENABLED,
|
||||||
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}, [open, manualForm, isEditMode, editTask, message]);
|
}, [open, manualForm, isEditMode, editTask, message]);
|
||||||
@@ -154,7 +202,7 @@ export default function CreateAnnotationTask({
|
|||||||
try {
|
try {
|
||||||
const res = await queryDatasetFilesUsingGet(selectedDatasetId, { page: 0, size: 10 });
|
const res = await queryDatasetFilesUsingGet(selectedDatasetId, { page: 0, size: 10 });
|
||||||
if (res.code === '0' && res.data) {
|
if (res.code === '0' && res.data) {
|
||||||
setDatasetPreviewData(res.data.content || []);
|
setDatasetPreviewData((res.data.content || []) as DatasetPreviewFile[]);
|
||||||
setDatasetPreviewVisible(true);
|
setDatasetPreviewVisible(true);
|
||||||
} else {
|
} else {
|
||||||
message.error("获取数据集预览失败");
|
message.error("获取数据集预览失败");
|
||||||
@@ -168,7 +216,7 @@ export default function CreateAnnotationTask({
|
|||||||
};
|
};
|
||||||
|
|
||||||
// 预览文件内容
|
// 预览文件内容
|
||||||
const handlePreviewFileContent = async (file: any) => {
|
const handlePreviewFileContent = async (file: DatasetPreviewFile) => {
|
||||||
const fileName = file.fileName?.toLowerCase() || '';
|
const fileName = file.fileName?.toLowerCase() || '';
|
||||||
|
|
||||||
// 文件类型扩展名映射
|
// 文件类型扩展名映射
|
||||||
@@ -318,7 +366,7 @@ export default function CreateAnnotationTask({
|
|||||||
};
|
};
|
||||||
|
|
||||||
const generatePreviewTaskDataFromLabelConfig = (xml: string) => {
|
const generatePreviewTaskDataFromLabelConfig = (xml: string) => {
|
||||||
const exampleDataByType: Record<string, any> = {
|
const exampleDataByType: Record<string, unknown> = {
|
||||||
Image: "https://labelstud.io/images/opa-header.png",
|
Image: "https://labelstud.io/images/opa-header.png",
|
||||||
Audio: "https://labelstud.io/files/sample.wav",
|
Audio: "https://labelstud.io/files/sample.wav",
|
||||||
AudioPlus: "https://labelstud.io/files/sample.wav",
|
AudioPlus: "https://labelstud.io/files/sample.wav",
|
||||||
@@ -350,7 +398,7 @@ export default function CreateAnnotationTask({
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
const data: Record<string, any> = {};
|
const data: Record<string, unknown> = {};
|
||||||
objects.forEach((obj) => {
|
objects.forEach((obj) => {
|
||||||
const name = obj.name || "";
|
const name = obj.name || "";
|
||||||
const value = obj.value || "";
|
const value = obj.value || "";
|
||||||
@@ -380,14 +428,16 @@ export default function CreateAnnotationTask({
|
|||||||
};
|
};
|
||||||
|
|
||||||
// 当选择模板时,加载 XML 配置到树编辑器(仅快速填充)
|
// 当选择模板时,加载 XML 配置到树编辑器(仅快速填充)
|
||||||
const handleTemplateSelect = (value: string, option: any) => {
|
const handleTemplateSelect = (value: string, option: unknown) => {
|
||||||
if (!value) {
|
if (!value) {
|
||||||
setLabelConfig("");
|
setLabelConfig("");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
const selectedTemplate = templates.find((template) => template.id === value);
|
const selectedTemplate = templates.find((template) => template.id === value);
|
||||||
const configXml = selectedTemplate?.labelConfig || option?.config || "";
|
const configXml = selectedTemplate?.labelConfig
|
||||||
|
|| (isRecord(option) && typeof option.config === "string" ? option.config : "")
|
||||||
|
|| "";
|
||||||
setLabelConfig(configXml);
|
setLabelConfig(configXml);
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -437,6 +487,10 @@ export default function CreateAnnotationTask({
|
|||||||
templateId: configMode === "template" ? values.templateId : undefined,
|
templateId: configMode === "template" ? values.templateId : undefined,
|
||||||
labelConfig: labelConfig.trim(),
|
labelConfig: labelConfig.trim(),
|
||||||
};
|
};
|
||||||
|
if (!isEditMode && isTextDataset) {
|
||||||
|
requestData.segmentationEnabled =
|
||||||
|
values.segmentationEnabled ?? DEFAULT_SEGMENTATION_ENABLED;
|
||||||
|
}
|
||||||
|
|
||||||
if (isEditMode && editTask) {
|
if (isEditMode && editTask) {
|
||||||
// 编辑模式:调用更新接口
|
// 编辑模式:调用更新接口
|
||||||
@@ -449,16 +503,17 @@ export default function CreateAnnotationTask({
|
|||||||
}
|
}
|
||||||
onClose();
|
onClose();
|
||||||
onRefresh();
|
onRefresh();
|
||||||
} catch (err: any) {
|
} catch (err: unknown) {
|
||||||
console.error(isEditMode ? "Update annotation task failed" : "Create annotation task failed", err);
|
console.error(isEditMode ? "Update annotation task failed" : "Create annotation task failed", err);
|
||||||
const msg = err?.message || err?.data?.message || (isEditMode ? "更新失败,请稍后重试" : "创建失败,请稍后重试");
|
const error = err as { message?: string; data?: { message?: string } };
|
||||||
|
const msg = error?.message || error?.data?.message || (isEditMode ? "更新失败,请稍后重试" : "创建失败,请稍后重试");
|
||||||
message.error(msg);
|
message.error(msg);
|
||||||
} finally {
|
} finally {
|
||||||
setSubmitting(false);
|
setSubmitting(false);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
const handleConfigModeChange = (e: any) => {
|
const handleConfigModeChange = (e: RadioChangeEvent) => {
|
||||||
const mode = e.target.value;
|
const mode = e.target.value;
|
||||||
setConfigMode(mode);
|
setConfigMode(mode);
|
||||||
if (mode === "custom") {
|
if (mode === "custom") {
|
||||||
@@ -521,7 +576,7 @@ export default function CreateAnnotationTask({
|
|||||||
label: (
|
label: (
|
||||||
<div className="flex items-center justify-between gap-3 py-2">
|
<div className="flex items-center justify-between gap-3 py-2">
|
||||||
<div className="flex items-center font-sm text-gray-900">
|
<div className="flex items-center font-sm text-gray-900">
|
||||||
<span className="mr-2">{(dataset as any).icon}</span>
|
<span className="mr-2">{dataset.icon}</span>
|
||||||
<span>{dataset.name}</span>
|
<span>{dataset.name}</span>
|
||||||
</div>
|
</div>
|
||||||
<div className="text-xs text-gray-500">{dataset.size}</div>
|
<div className="text-xs text-gray-500">{dataset.size}</div>
|
||||||
@@ -532,6 +587,17 @@ export default function CreateAnnotationTask({
|
|||||||
})}
|
})}
|
||||||
onChange={(value) => {
|
onChange={(value) => {
|
||||||
setSelectedDatasetId(value);
|
setSelectedDatasetId(value);
|
||||||
|
const dataset = datasets.find((item) => item.id === value);
|
||||||
|
if (dataset?.datasetType === DatasetType.TEXT) {
|
||||||
|
const currentValue = manualForm.getFieldValue("segmentationEnabled");
|
||||||
|
if (currentValue === undefined) {
|
||||||
|
manualForm.setFieldsValue({
|
||||||
|
segmentationEnabled: DEFAULT_SEGMENTATION_ENABLED,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
} else if (dataset) {
|
||||||
|
manualForm.setFieldsValue({ segmentationEnabled: false });
|
||||||
|
}
|
||||||
// 如果用户未手动修改名称,则用数据集名称作为默认任务名
|
// 如果用户未手动修改名称,则用数据集名称作为默认任务名
|
||||||
if (!nameManuallyEdited) {
|
if (!nameManuallyEdited) {
|
||||||
const ds = datasets.find((d) => d.id === value);
|
const ds = datasets.find((d) => d.id === value);
|
||||||
@@ -578,6 +644,28 @@ export default function CreateAnnotationTask({
|
|||||||
<TextArea placeholder="(可选)详细描述标注任务的要求和目标" rows={2} />
|
<TextArea placeholder="(可选)详细描述标注任务的要求和目标" rows={2} />
|
||||||
</Form.Item>
|
</Form.Item>
|
||||||
|
|
||||||
|
<Form.Item
|
||||||
|
label="段落切片"
|
||||||
|
name="segmentationEnabled"
|
||||||
|
initialValue={DEFAULT_SEGMENTATION_ENABLED}
|
||||||
|
extra={
|
||||||
|
isEditMode
|
||||||
|
? "编辑模式暂不支持修改"
|
||||||
|
: !selectedDatasetId
|
||||||
|
? "请选择数据集后配置"
|
||||||
|
: isTextDataset
|
||||||
|
? "仅文本数据集可配置该项"
|
||||||
|
: "非文本数据集不支持切片段"
|
||||||
|
}
|
||||||
|
>
|
||||||
|
<Radio.Group
|
||||||
|
options={SEGMENTATION_OPTIONS}
|
||||||
|
optionType="button"
|
||||||
|
buttonStyle="solid"
|
||||||
|
disabled={!isTextDataset || isEditMode}
|
||||||
|
/>
|
||||||
|
</Form.Item>
|
||||||
|
|
||||||
{/* 标注模板选择 */}
|
{/* 标注模板选择 */}
|
||||||
<div className="flex items-center justify-between mb-2">
|
<div className="flex items-center justify-between mb-2">
|
||||||
<span className="text-sm font-medium text-gray-700 after:content-['*'] after:text-red-500 after:ml-1">标注配置</span>
|
<span className="text-sm font-medium text-gray-700 after:content-['*'] after:text-red-500 after:ml-1">标注配置</span>
|
||||||
@@ -721,7 +809,7 @@ export default function CreateAnnotationTask({
|
|||||||
dataIndex: "fileName",
|
dataIndex: "fileName",
|
||||||
key: "fileName",
|
key: "fileName",
|
||||||
ellipsis: true,
|
ellipsis: true,
|
||||||
render: (text: string, record: any) => (
|
render: (text: string, record: DatasetPreviewFile) => (
|
||||||
<Button
|
<Button
|
||||||
type="link"
|
type="link"
|
||||||
size="small"
|
size="small"
|
||||||
|
|||||||
@@ -25,6 +25,7 @@ router = APIRouter(
|
|||||||
tags=["annotation/project"]
|
tags=["annotation/project"]
|
||||||
)
|
)
|
||||||
logger = get_logger(__name__)
|
logger = get_logger(__name__)
|
||||||
|
TEXT_DATASET_TYPE = "TEXT"
|
||||||
|
|
||||||
@router.get("/{mapping_id}/login")
|
@router.get("/{mapping_id}/login")
|
||||||
async def login_label_studio(
|
async def login_label_studio(
|
||||||
@@ -62,6 +63,12 @@ async def create_mapping(
|
|||||||
detail=f"Dataset not found in DM service: {request.dataset_id}"
|
detail=f"Dataset not found in DM service: {request.dataset_id}"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
dataset_type = (
|
||||||
|
getattr(dataset_info, "datasetType", None)
|
||||||
|
or getattr(dataset_info, "dataset_type", None)
|
||||||
|
or ""
|
||||||
|
).upper()
|
||||||
|
|
||||||
project_name = request.name or \
|
project_name = request.name or \
|
||||||
dataset_info.name or \
|
dataset_info.name or \
|
||||||
"A new project from DataMate"
|
"A new project from DataMate"
|
||||||
@@ -97,6 +104,8 @@ async def create_mapping(
|
|||||||
project_configuration["label_config"] = label_config
|
project_configuration["label_config"] = label_config
|
||||||
if project_description:
|
if project_description:
|
||||||
project_configuration["description"] = project_description
|
project_configuration["description"] = project_description
|
||||||
|
if dataset_type == TEXT_DATASET_TYPE and request.segmentation_enabled is not None:
|
||||||
|
project_configuration["segmentation_enabled"] = bool(request.segmentation_enabled)
|
||||||
|
|
||||||
labeling_project = LabelingProject(
|
labeling_project = LabelingProject(
|
||||||
id=str(uuid.uuid4()), # Generate UUID here
|
id=str(uuid.uuid4()), # Generate UUID here
|
||||||
|
|||||||
@@ -23,6 +23,11 @@ class DatasetMappingCreateRequest(BaseModel):
|
|||||||
description: Optional[str] = Field(None, alias="description", description="标注项目描述")
|
description: Optional[str] = Field(None, alias="description", description="标注项目描述")
|
||||||
template_id: Optional[str] = Field(None, alias="templateId", description="标注模板ID")
|
template_id: Optional[str] = Field(None, alias="templateId", description="标注模板ID")
|
||||||
label_config: Optional[str] = Field(None, alias="labelConfig", description="Label Studio XML配置")
|
label_config: Optional[str] = Field(None, alias="labelConfig", description="Label Studio XML配置")
|
||||||
|
segmentation_enabled: Optional[bool] = Field(
|
||||||
|
None,
|
||||||
|
alias="segmentationEnabled",
|
||||||
|
description="是否启用文本分段",
|
||||||
|
)
|
||||||
|
|
||||||
class Config:
|
class Config:
|
||||||
# allow population by field name when constructing model programmatically
|
# allow population by field name when constructing model programmatically
|
||||||
@@ -49,6 +54,11 @@ class DatasetMappingResponse(BaseModel):
|
|||||||
template_id: Optional[str] = Field(None, alias="templateId", description="关联的模板ID")
|
template_id: Optional[str] = Field(None, alias="templateId", description="关联的模板ID")
|
||||||
template: Optional['AnnotationTemplateResponse'] = Field(None, description="关联的标注模板详情")
|
template: Optional['AnnotationTemplateResponse'] = Field(None, description="关联的标注模板详情")
|
||||||
label_config: Optional[str] = Field(None, alias="labelConfig", description="实际使用的 Label Studio XML 配置")
|
label_config: Optional[str] = Field(None, alias="labelConfig", description="实际使用的 Label Studio XML 配置")
|
||||||
|
segmentation_enabled: Optional[bool] = Field(
|
||||||
|
None,
|
||||||
|
alias="segmentationEnabled",
|
||||||
|
description="是否启用文本分段",
|
||||||
|
)
|
||||||
total_count: int = Field(0, alias="totalCount", description="数据集总数据量")
|
total_count: int = Field(0, alias="totalCount", description="数据集总数据量")
|
||||||
annotated_count: int = Field(0, alias="annotatedCount", description="已标注数据量")
|
annotated_count: int = Field(0, alias="annotatedCount", description="已标注数据量")
|
||||||
created_at: datetime = Field(..., alias="createdAt", description="创建时间")
|
created_at: datetime = Field(..., alias="createdAt", description="创建时间")
|
||||||
@@ -62,4 +72,4 @@ class DatasetMappingResponse(BaseModel):
|
|||||||
class DeleteDatasetResponse(BaseResponseModel):
|
class DeleteDatasetResponse(BaseResponseModel):
|
||||||
"""删除数据集响应模型"""
|
"""删除数据集响应模型"""
|
||||||
id: str = Field(..., description="映射UUID")
|
id: str = Field(..., description="映射UUID")
|
||||||
status: str = Field(..., description="删除状态")
|
status: str = Field(..., description="删除状态")
|
||||||
|
|||||||
@@ -56,6 +56,7 @@ TEXTUAL_OBJECT_CATEGORIES = {"text", "document"}
|
|||||||
MEDIA_OBJECT_CATEGORIES = {"image"}
|
MEDIA_OBJECT_CATEGORIES = {"image"}
|
||||||
OBJECT_NAME_HEADER_PREFIX = "dm_object_header_"
|
OBJECT_NAME_HEADER_PREFIX = "dm_object_header_"
|
||||||
SUPPORTED_EDITOR_DATASET_TYPES = ("TEXT", "IMAGE")
|
SUPPORTED_EDITOR_DATASET_TYPES = ("TEXT", "IMAGE")
|
||||||
|
SEGMENTATION_ENABLED_KEY = "segmentation_enabled"
|
||||||
|
|
||||||
|
|
||||||
class AnnotationEditorService:
|
class AnnotationEditorService:
|
||||||
@@ -149,6 +150,18 @@ class AnnotationEditorService:
|
|||||||
label_config = self._decorate_label_config_for_editor(label_config)
|
label_config = self._decorate_label_config_for_editor(label_config)
|
||||||
return label_config
|
return label_config
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _resolve_segmentation_enabled(project: LabelingProject) -> bool:
|
||||||
|
config = project.configuration
|
||||||
|
if not isinstance(config, dict):
|
||||||
|
return True
|
||||||
|
value = config.get(SEGMENTATION_ENABLED_KEY)
|
||||||
|
if isinstance(value, bool):
|
||||||
|
return value
|
||||||
|
if value is None:
|
||||||
|
return True
|
||||||
|
return bool(value)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def _resolve_primary_text_key(cls, label_config: Optional[str]) -> Optional[str]:
|
def _resolve_primary_text_key(cls, label_config: Optional[str]) -> Optional[str]:
|
||||||
if not label_config:
|
if not label_config:
|
||||||
@@ -513,13 +526,19 @@ class AnnotationEditorService:
|
|||||||
ls_task_id = self._make_ls_task_id(project.id, file_id)
|
ls_task_id = self._make_ls_task_id(project.id, file_id)
|
||||||
|
|
||||||
# 判断是否需要分段(JSONL 多行或主文本超过阈值)
|
# 判断是否需要分段(JSONL 多行或主文本超过阈值)
|
||||||
needs_segmentation = len(records) > 1 or any(
|
segmentation_enabled = self._resolve_segmentation_enabled(project)
|
||||||
len(text or "") > self.SEGMENT_THRESHOLD for text in record_texts
|
if not segmentation_enabled:
|
||||||
|
segment_index = None
|
||||||
|
needs_segmentation = segmentation_enabled and (
|
||||||
|
len(records) > 1 or any(len(text or "") > self.SEGMENT_THRESHOLD for text in record_texts)
|
||||||
)
|
)
|
||||||
segments: Optional[List[SegmentInfo]] = None
|
segments: Optional[List[SegmentInfo]] = None
|
||||||
current_segment_index = 0
|
current_segment_index = 0
|
||||||
display_text = record_texts[0] if record_texts else text_content
|
display_text = record_texts[0] if record_texts else text_content
|
||||||
selected_payload = records[0][0] if records else None
|
selected_payload = records[0][0] if records else None
|
||||||
|
if not segmentation_enabled and len(records) > 1:
|
||||||
|
selected_payload = None
|
||||||
|
display_text = "\n".join(record_texts) if record_texts else text_content
|
||||||
|
|
||||||
segment_annotations: Dict[str, Any] = {}
|
segment_annotations: Dict[str, Any] = {}
|
||||||
if ann and ann.annotation and ann.annotation.get("segmented"):
|
if ann and ann.annotation and ann.annotation.get("segmented"):
|
||||||
|
|||||||
@@ -90,9 +90,11 @@ class DatasetMappingService:
|
|||||||
configuration = getattr(mapping, 'configuration', None) or {}
|
configuration = getattr(mapping, 'configuration', None) or {}
|
||||||
label_config = None
|
label_config = None
|
||||||
description = None
|
description = None
|
||||||
|
segmentation_enabled = None
|
||||||
if isinstance(configuration, dict):
|
if isinstance(configuration, dict):
|
||||||
label_config = configuration.get('label_config')
|
label_config = configuration.get('label_config')
|
||||||
description = configuration.get('description')
|
description = configuration.get('description')
|
||||||
|
segmentation_enabled = configuration.get('segmentation_enabled')
|
||||||
|
|
||||||
# Optionally fetch full template details
|
# Optionally fetch full template details
|
||||||
template_response = None
|
template_response = None
|
||||||
@@ -117,6 +119,7 @@ class DatasetMappingService:
|
|||||||
"template_id": template_id,
|
"template_id": template_id,
|
||||||
"template": template_response,
|
"template": template_response,
|
||||||
"label_config": label_config,
|
"label_config": label_config,
|
||||||
|
"segmentation_enabled": segmentation_enabled,
|
||||||
"total_count": total_count,
|
"total_count": total_count,
|
||||||
"annotated_count": annotated_count,
|
"annotated_count": annotated_count,
|
||||||
"created_at": mapping.created_at,
|
"created_at": mapping.created_at,
|
||||||
@@ -154,9 +157,11 @@ class DatasetMappingService:
|
|||||||
configuration = getattr(mapping, 'configuration', None) or {}
|
configuration = getattr(mapping, 'configuration', None) or {}
|
||||||
label_config = None
|
label_config = None
|
||||||
description = None
|
description = None
|
||||||
|
segmentation_enabled = None
|
||||||
if isinstance(configuration, dict):
|
if isinstance(configuration, dict):
|
||||||
label_config = configuration.get('label_config')
|
label_config = configuration.get('label_config')
|
||||||
description = configuration.get('description')
|
description = configuration.get('description')
|
||||||
|
segmentation_enabled = configuration.get('segmentation_enabled')
|
||||||
|
|
||||||
# Optionally fetch full template details
|
# Optionally fetch full template details
|
||||||
template_response = None
|
template_response = None
|
||||||
@@ -184,6 +189,7 @@ class DatasetMappingService:
|
|||||||
"template_id": template_id,
|
"template_id": template_id,
|
||||||
"template": template_response,
|
"template": template_response,
|
||||||
"label_config": label_config,
|
"label_config": label_config,
|
||||||
|
"segmentation_enabled": segmentation_enabled,
|
||||||
"total_count": total_count,
|
"total_count": total_count,
|
||||||
"annotated_count": annotated_count,
|
"annotated_count": annotated_count,
|
||||||
"created_at": mapping.created_at,
|
"created_at": mapping.created_at,
|
||||||
@@ -526,4 +532,4 @@ class DatasetMappingService:
|
|||||||
for row in rows:
|
for row in rows:
|
||||||
response = await self._to_response_from_row(row, include_template=include_template)
|
response = await self._to_response_from_row(row, include_template=include_template)
|
||||||
responses.append(response)
|
responses.append(response)
|
||||||
return responses, total
|
return responses, total
|
||||||
|
|||||||
Reference in New Issue
Block a user