You've already forked DataMate
feat(annotation): 添加分段索引支持和优化标注编辑器
- 在前端 lsf.html 中添加 segmentIndex 字段解析逻辑 - 在 LabelStudioTextEditor 中添加分段索引相关类型定义和处理函数 - 使用 useCallback 优化组件中的异步函数性能 - 添加对驼峰命名和下划线命名的数据字段兼容处理 - 实现分段模式下的标注状态更新功能 - 添加任务 ID 验证防止过期保存请求 - 在后端 editor.py 中添加分段索引字段支持 - 统一前后端数据传输格式确保字段一致性
This commit is contained in:
@@ -200,6 +200,18 @@
|
|||||||
// 最小化对齐 Label Studio Server 的字段(DataMate 侧会原样存储)
|
// 最小化对齐 Label Studio Server 的字段(DataMate 侧会原样存储)
|
||||||
const taskId = typeof currentTask?.id === "number" ? currentTask.id : Number(currentTask?.id) || null;
|
const taskId = typeof currentTask?.id === "number" ? currentTask.id : Number(currentTask?.id) || null;
|
||||||
const fileId = currentTask?.data?.file_id || currentTask?.data?.fileId || null;
|
const fileId = currentTask?.data?.file_id || currentTask?.data?.fileId || null;
|
||||||
|
const segmentIndexValue =
|
||||||
|
currentTask?.data?.segment_index ??
|
||||||
|
currentTask?.data?.segmentIndex ??
|
||||||
|
currentTask?.data?.dm_segment_index ??
|
||||||
|
currentTask?.data?.dmSegmentIndex ??
|
||||||
|
null;
|
||||||
|
const segmentIndex =
|
||||||
|
segmentIndexValue === null || segmentIndexValue === undefined
|
||||||
|
? null
|
||||||
|
: Number.isFinite(Number(segmentIndexValue))
|
||||||
|
? Number(segmentIndexValue)
|
||||||
|
: null;
|
||||||
|
|
||||||
annotationPayload.id = typeof annotationPayload.id === "number" ? annotationPayload.id : taskId || 1;
|
annotationPayload.id = typeof annotationPayload.id === "number" ? annotationPayload.id : taskId || 1;
|
||||||
annotationPayload.task = taskId;
|
annotationPayload.task = taskId;
|
||||||
@@ -209,6 +221,7 @@
|
|||||||
return {
|
return {
|
||||||
taskId,
|
taskId,
|
||||||
fileId,
|
fileId,
|
||||||
|
segmentIndex,
|
||||||
annotation: annotationPayload,
|
annotation: annotationPayload,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
import { useEffect, useMemo, useRef, useState } from "react";
|
import { useCallback, useEffect, useMemo, useRef, useState } from "react";
|
||||||
import { App, Button, Card, List, Spin, Typography, Tag } from "antd";
|
import { App, Button, Card, List, Spin, Typography, Tag } from "antd";
|
||||||
import { LeftOutlined, ReloadOutlined, SaveOutlined, MenuFoldOutlined, MenuUnfoldOutlined, CheckOutlined } from "@ant-design/icons";
|
import { LeftOutlined, ReloadOutlined, SaveOutlined, MenuFoldOutlined, MenuUnfoldOutlined, CheckOutlined } from "@ant-design/icons";
|
||||||
import { useNavigate, useParams } from "react-router";
|
import { useNavigate, useParams } from "react-router";
|
||||||
@@ -29,7 +29,7 @@ type EditorTaskListItem = {
|
|||||||
|
|
||||||
type LsfMessage = {
|
type LsfMessage = {
|
||||||
type?: string;
|
type?: string;
|
||||||
payload?: any;
|
payload?: unknown;
|
||||||
};
|
};
|
||||||
|
|
||||||
type SegmentInfo = {
|
type SegmentInfo = {
|
||||||
@@ -40,8 +40,57 @@ type SegmentInfo = {
|
|||||||
hasAnnotation: boolean;
|
hasAnnotation: boolean;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
type ApiResponse<T> = {
|
||||||
|
code?: number;
|
||||||
|
message?: string;
|
||||||
|
data?: T;
|
||||||
|
};
|
||||||
|
|
||||||
|
type EditorTaskPayload = {
|
||||||
|
id?: number | string;
|
||||||
|
data?: Record<string, unknown>;
|
||||||
|
annotations?: unknown[];
|
||||||
|
};
|
||||||
|
|
||||||
|
type EditorTaskResponse = {
|
||||||
|
task?: EditorTaskPayload;
|
||||||
|
segmented?: boolean;
|
||||||
|
segments?: SegmentInfo[];
|
||||||
|
currentSegmentIndex?: number;
|
||||||
|
};
|
||||||
|
|
||||||
|
type EditorTaskListResponse = {
|
||||||
|
content?: EditorTaskListItem[];
|
||||||
|
};
|
||||||
|
|
||||||
|
type ExportPayload = {
|
||||||
|
taskId?: number | string | null;
|
||||||
|
fileId?: string | null;
|
||||||
|
segmentIndex?: number | string | null;
|
||||||
|
annotation?: Record<string, unknown>;
|
||||||
|
};
|
||||||
|
|
||||||
const LSF_IFRAME_SRC = "/lsf/lsf.html";
|
const LSF_IFRAME_SRC = "/lsf/lsf.html";
|
||||||
|
|
||||||
|
const resolveSegmentIndex = (value: unknown) => {
|
||||||
|
if (value === null || value === undefined) return undefined;
|
||||||
|
const parsed = Number(value);
|
||||||
|
return Number.isFinite(parsed) ? parsed : undefined;
|
||||||
|
};
|
||||||
|
|
||||||
|
const normalizePayload = (payload: unknown): ExportPayload | undefined => {
|
||||||
|
if (!payload || typeof payload !== "object") return undefined;
|
||||||
|
return payload as ExportPayload;
|
||||||
|
};
|
||||||
|
|
||||||
|
const resolvePayloadMessage = (payload: unknown) => {
|
||||||
|
if (!payload || typeof payload !== "object") return undefined;
|
||||||
|
if ("message" in payload && typeof (payload as { message?: unknown }).message === "string") {
|
||||||
|
return (payload as { message?: string }).message;
|
||||||
|
}
|
||||||
|
return undefined;
|
||||||
|
};
|
||||||
|
|
||||||
export default function LabelStudioTextEditor() {
|
export default function LabelStudioTextEditor() {
|
||||||
const { projectId = "" } = useParams();
|
const { projectId = "" } = useParams();
|
||||||
const navigate = useNavigate();
|
const navigate = useNavigate();
|
||||||
@@ -69,17 +118,17 @@ export default function LabelStudioTextEditor() {
|
|||||||
const [segments, setSegments] = useState<SegmentInfo[]>([]);
|
const [segments, setSegments] = useState<SegmentInfo[]>([]);
|
||||||
const [currentSegmentIndex, setCurrentSegmentIndex] = useState(0);
|
const [currentSegmentIndex, setCurrentSegmentIndex] = useState(0);
|
||||||
|
|
||||||
const postToIframe = (type: string, payload?: any) => {
|
const postToIframe = useCallback((type: string, payload?: unknown) => {
|
||||||
const win = iframeRef.current?.contentWindow;
|
const win = iframeRef.current?.contentWindow;
|
||||||
if (!win) return;
|
if (!win) return;
|
||||||
win.postMessage({ type, payload }, origin);
|
win.postMessage({ type, payload }, origin);
|
||||||
};
|
}, [origin]);
|
||||||
|
|
||||||
const loadProject = async () => {
|
const loadProject = useCallback(async () => {
|
||||||
setLoadingProject(true);
|
setLoadingProject(true);
|
||||||
try {
|
try {
|
||||||
const resp = (await getEditorProjectInfoUsingGet(projectId)) as any;
|
const resp = (await getEditorProjectInfoUsingGet(projectId)) as ApiResponse<EditorProjectInfo>;
|
||||||
const data = resp?.data as EditorProjectInfo | undefined;
|
const data = resp?.data;
|
||||||
if (!data?.projectId) {
|
if (!data?.projectId) {
|
||||||
message.error("获取标注项目信息失败");
|
message.error("获取标注项目信息失败");
|
||||||
setProject(null);
|
setProject(null);
|
||||||
@@ -93,18 +142,21 @@ export default function LabelStudioTextEditor() {
|
|||||||
} finally {
|
} finally {
|
||||||
setLoadingProject(false);
|
setLoadingProject(false);
|
||||||
}
|
}
|
||||||
};
|
}, [message, projectId]);
|
||||||
|
|
||||||
const loadTasks = async (silent = false) => {
|
const loadTasks = useCallback(async (silent = false) => {
|
||||||
if (!projectId) return;
|
if (!projectId) return;
|
||||||
if (!silent) setLoadingTasks(true);
|
if (!silent) setLoadingTasks(true);
|
||||||
try {
|
try {
|
||||||
const resp = (await listEditorTasksUsingGet(projectId, { page: 0, size: 200 })) as any;
|
const resp = (await listEditorTasksUsingGet(projectId, {
|
||||||
const content = (resp?.data?.content || []) as EditorTaskListItem[];
|
page: 0,
|
||||||
|
size: 200,
|
||||||
|
})) as ApiResponse<EditorTaskListResponse>;
|
||||||
|
const content = resp?.data?.content || [];
|
||||||
const items = Array.isArray(content) ? content : [];
|
const items = Array.isArray(content) ? content : [];
|
||||||
setTasks(items);
|
setTasks(items);
|
||||||
if (!selectedFileId && items.length > 0) {
|
if (items.length > 0) {
|
||||||
setSelectedFileId(items[0].fileId);
|
setSelectedFileId((prev) => prev || (items[0]?.fileId ?? ""));
|
||||||
}
|
}
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
console.error(e);
|
console.error(e);
|
||||||
@@ -113,9 +165,9 @@ export default function LabelStudioTextEditor() {
|
|||||||
} finally {
|
} finally {
|
||||||
if (!silent) setLoadingTasks(false);
|
if (!silent) setLoadingTasks(false);
|
||||||
}
|
}
|
||||||
};
|
}, [message, projectId]);
|
||||||
|
|
||||||
const initEditorForFile = async (fileId: string, segmentIdx?: number) => {
|
const initEditorForFile = useCallback(async (fileId: string, segmentIdx?: number) => {
|
||||||
if (!project?.supported) return;
|
if (!project?.supported) return;
|
||||||
if (!project?.labelConfig) {
|
if (!project?.labelConfig) {
|
||||||
message.error("该项目未绑定标注模板,无法加载编辑器");
|
message.error("该项目未绑定标注模板,无法加载编辑器");
|
||||||
@@ -131,7 +183,7 @@ export default function LabelStudioTextEditor() {
|
|||||||
try {
|
try {
|
||||||
const resp = (await getEditorTaskUsingGet(projectId, fileId, {
|
const resp = (await getEditorTaskUsingGet(projectId, fileId, {
|
||||||
segmentIndex: segmentIdx,
|
segmentIndex: segmentIdx,
|
||||||
})) as any;
|
})) as ApiResponse<EditorTaskResponse>;
|
||||||
const data = resp?.data;
|
const data = resp?.data;
|
||||||
const task = data?.task;
|
const task = data?.task;
|
||||||
if (!task) {
|
if (!task) {
|
||||||
@@ -151,10 +203,25 @@ export default function LabelStudioTextEditor() {
|
|||||||
setCurrentSegmentIndex(0);
|
setCurrentSegmentIndex(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
expectedTaskIdRef.current = Number(task?.id) || null;
|
const taskData = {
|
||||||
|
...(task?.data || {}),
|
||||||
|
file_id: fileId,
|
||||||
|
fileId: fileId,
|
||||||
|
};
|
||||||
|
if (data?.segmented) {
|
||||||
|
const segmentIndex = resolveSegmentIndex(data.currentSegmentIndex) ?? 0;
|
||||||
|
taskData.segment_index = segmentIndex;
|
||||||
|
taskData.segmentIndex = segmentIndex;
|
||||||
|
}
|
||||||
|
const taskForIframe = {
|
||||||
|
...task,
|
||||||
|
data: taskData,
|
||||||
|
};
|
||||||
|
|
||||||
|
expectedTaskIdRef.current = Number(taskForIframe?.id) || null;
|
||||||
postToIframe("LS_INIT", {
|
postToIframe("LS_INIT", {
|
||||||
labelConfig: project.labelConfig,
|
labelConfig: project.labelConfig,
|
||||||
task,
|
task: taskForIframe,
|
||||||
user: { id: "datamate" },
|
user: { id: "datamate" },
|
||||||
// 完整的 Label Studio 原生界面配置
|
// 完整的 Label Studio 原生界面配置
|
||||||
interfaces: [
|
interfaces: [
|
||||||
@@ -188,30 +255,44 @@ export default function LabelStudioTextEditor() {
|
|||||||
} finally {
|
} finally {
|
||||||
if (seq === initSeqRef.current) setLoadingTaskDetail(false);
|
if (seq === initSeqRef.current) setLoadingTaskDetail(false);
|
||||||
}
|
}
|
||||||
};
|
}, [iframeReady, message, postToIframe, project, projectId]);
|
||||||
|
|
||||||
const saveFromExport = async (payload: any) => {
|
const saveFromExport = useCallback(async (payload?: ExportPayload | null) => {
|
||||||
const fileId = payload?.fileId;
|
const payloadTaskId = payload?.taskId;
|
||||||
|
if (expectedTaskIdRef.current && payloadTaskId) {
|
||||||
|
if (Number(payloadTaskId) !== expectedTaskIdRef.current) {
|
||||||
|
message.warning("已忽略过期的保存请求");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
const fileId = payload?.fileId || selectedFileId;
|
||||||
const annotation = payload?.annotation;
|
const annotation = payload?.annotation;
|
||||||
if (!fileId || !annotation) {
|
if (!fileId || !annotation || typeof annotation !== "object") {
|
||||||
message.error("导出标注失败:缺少 fileId/annotation");
|
message.error("导出标注失败:缺少 fileId/annotation");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
const payloadSegmentIndex = resolveSegmentIndex(payload?.segmentIndex);
|
||||||
|
const segmentIndex =
|
||||||
|
payloadSegmentIndex !== undefined
|
||||||
|
? payloadSegmentIndex
|
||||||
|
: segmented
|
||||||
|
? currentSegmentIndex
|
||||||
|
: undefined;
|
||||||
|
|
||||||
setSaving(true);
|
setSaving(true);
|
||||||
try {
|
try {
|
||||||
await upsertEditorAnnotationUsingPut(projectId, String(fileId), {
|
await upsertEditorAnnotationUsingPut(projectId, String(fileId), {
|
||||||
annotation,
|
annotation,
|
||||||
segmentIndex: segmented ? currentSegmentIndex : undefined,
|
segmentIndex,
|
||||||
});
|
});
|
||||||
message.success("标注已保存");
|
message.success("标注已保存");
|
||||||
await loadTasks(true);
|
await loadTasks(true);
|
||||||
|
|
||||||
// 分段模式下更新当前段落的标注状态
|
// 分段模式下更新当前段落的标注状态
|
||||||
if (segmented) {
|
if (segmented && segmentIndex !== undefined) {
|
||||||
setSegments((prev) =>
|
setSegments((prev) =>
|
||||||
prev.map((seg) =>
|
prev.map((seg) =>
|
||||||
seg.idx === currentSegmentIndex
|
seg.idx === segmentIndex
|
||||||
? { ...seg, hasAnnotation: true }
|
? { ...seg, hasAnnotation: true }
|
||||||
: seg
|
: seg
|
||||||
)
|
)
|
||||||
@@ -223,7 +304,14 @@ export default function LabelStudioTextEditor() {
|
|||||||
} finally {
|
} finally {
|
||||||
setSaving(false);
|
setSaving(false);
|
||||||
}
|
}
|
||||||
};
|
}, [
|
||||||
|
currentSegmentIndex,
|
||||||
|
loadTasks,
|
||||||
|
message,
|
||||||
|
projectId,
|
||||||
|
segmented,
|
||||||
|
selectedFileId,
|
||||||
|
]);
|
||||||
|
|
||||||
const requestExport = () => {
|
const requestExport = () => {
|
||||||
if (!selectedFileId) {
|
if (!selectedFileId) {
|
||||||
@@ -254,20 +342,17 @@ export default function LabelStudioTextEditor() {
|
|||||||
setCurrentSegmentIndex(0);
|
setCurrentSegmentIndex(0);
|
||||||
|
|
||||||
if (projectId) loadProject();
|
if (projectId) loadProject();
|
||||||
// eslint-disable-next-line react-hooks/exhaustive-deps
|
}, [projectId, loadProject]);
|
||||||
}, [projectId]);
|
|
||||||
|
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
if (!project?.supported) return;
|
if (!project?.supported) return;
|
||||||
loadTasks();
|
loadTasks();
|
||||||
// eslint-disable-next-line react-hooks/exhaustive-deps
|
}, [project?.supported, loadTasks]);
|
||||||
}, [project?.projectId, project?.supported]);
|
|
||||||
|
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
if (!selectedFileId) return;
|
if (!selectedFileId) return;
|
||||||
initEditorForFile(selectedFileId);
|
initEditorForFile(selectedFileId);
|
||||||
// eslint-disable-next-line react-hooks/exhaustive-deps
|
}, [selectedFileId, iframeReady, initEditorForFile]);
|
||||||
}, [selectedFileId, iframeReady]);
|
|
||||||
|
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
const handler = (event: MessageEvent<LsfMessage>) => {
|
const handler = (event: MessageEvent<LsfMessage>) => {
|
||||||
@@ -280,8 +365,10 @@ export default function LabelStudioTextEditor() {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const payload = normalizePayload(msg.payload);
|
||||||
|
|
||||||
if (msg.type === "LS_READY") {
|
if (msg.type === "LS_READY") {
|
||||||
const readyTaskId = msg.payload?.taskId;
|
const readyTaskId = payload?.taskId;
|
||||||
if (expectedTaskIdRef.current && readyTaskId) {
|
if (expectedTaskIdRef.current && readyTaskId) {
|
||||||
if (Number(readyTaskId) !== expectedTaskIdRef.current) return;
|
if (Number(readyTaskId) !== expectedTaskIdRef.current) return;
|
||||||
}
|
}
|
||||||
@@ -290,25 +377,26 @@ export default function LabelStudioTextEditor() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (msg.type === "LS_EXPORT_RESULT") {
|
if (msg.type === "LS_EXPORT_RESULT") {
|
||||||
saveFromExport(msg.payload);
|
saveFromExport(payload);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
// 兼容 iframe 内部在 submit 时直接上报(若启用)
|
// 兼容 iframe 内部在 submit 时直接上报(若启用)
|
||||||
if (msg.type === "LS_SUBMIT") {
|
if (msg.type === "LS_SUBMIT") {
|
||||||
saveFromExport(msg.payload);
|
saveFromExport(payload);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (msg.type === "LS_ERROR") {
|
if (msg.type === "LS_ERROR") {
|
||||||
message.error(msg.payload?.message || "编辑器发生错误");
|
const payloadMessage = resolvePayloadMessage(msg.payload);
|
||||||
|
message.error(payloadMessage || "编辑器发生错误");
|
||||||
setLsReady(false);
|
setLsReady(false);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
window.addEventListener("message", handler);
|
window.addEventListener("message", handler);
|
||||||
return () => window.removeEventListener("message", handler);
|
return () => window.removeEventListener("message", handler);
|
||||||
}, [message, origin]);
|
}, [message, origin, saveFromExport]);
|
||||||
|
|
||||||
if (loadingProject) {
|
if (loadingProject) {
|
||||||
return (
|
return (
|
||||||
|
|||||||
@@ -43,6 +43,11 @@ TEXT_DATA_KEY = "text"
|
|||||||
DATASET_ID_KEY = "dataset_id"
|
DATASET_ID_KEY = "dataset_id"
|
||||||
FILE_ID_KEY = "file_id"
|
FILE_ID_KEY = "file_id"
|
||||||
FILE_NAME_KEY = "file_name"
|
FILE_NAME_KEY = "file_name"
|
||||||
|
DATASET_ID_CAMEL_KEY = "datasetId"
|
||||||
|
FILE_ID_CAMEL_KEY = "fileId"
|
||||||
|
FILE_NAME_CAMEL_KEY = "fileName"
|
||||||
|
SEGMENT_INDEX_KEY = "segment_index"
|
||||||
|
SEGMENT_INDEX_CAMEL_KEY = "segmentIndex"
|
||||||
TEXTUAL_OBJECT_CATEGORIES = {"text", "document"}
|
TEXTUAL_OBJECT_CATEGORIES = {"text", "document"}
|
||||||
OBJECT_NAME_HEADER_PREFIX = "dm_object_header_"
|
OBJECT_NAME_HEADER_PREFIX = "dm_object_header_"
|
||||||
|
|
||||||
@@ -252,9 +257,13 @@ class AnnotationEditorService:
|
|||||||
if self._needs_placeholder(data.get(TEXT_DATA_KEY)):
|
if self._needs_placeholder(data.get(TEXT_DATA_KEY)):
|
||||||
data[TEXT_DATA_KEY] = display_text
|
data[TEXT_DATA_KEY] = display_text
|
||||||
|
|
||||||
data.setdefault(FILE_ID_KEY, file_id)
|
file_name = str(getattr(file_record, "file_name", ""))
|
||||||
data.setdefault(DATASET_ID_KEY, dataset_id)
|
data[FILE_ID_KEY] = file_id
|
||||||
data.setdefault(FILE_NAME_KEY, getattr(file_record, "file_name", ""))
|
data[FILE_ID_CAMEL_KEY] = file_id
|
||||||
|
data[DATASET_ID_KEY] = dataset_id
|
||||||
|
data[DATASET_ID_CAMEL_KEY] = dataset_id
|
||||||
|
data[FILE_NAME_KEY] = file_name
|
||||||
|
data[FILE_NAME_CAMEL_KEY] = file_name
|
||||||
|
|
||||||
self._apply_text_placeholders(data, label_config)
|
self._apply_text_placeholders(data, label_config)
|
||||||
return data
|
return data
|
||||||
@@ -418,6 +427,10 @@ class AnnotationEditorService:
|
|||||||
dataset_id=project.dataset_id,
|
dataset_id=project.dataset_id,
|
||||||
file_id=file_id,
|
file_id=file_id,
|
||||||
)
|
)
|
||||||
|
if needs_segmentation:
|
||||||
|
task_data[SEGMENT_INDEX_KEY] = current_segment_index
|
||||||
|
task_data[SEGMENT_INDEX_CAMEL_KEY] = current_segment_index
|
||||||
|
|
||||||
task: Dict[str, Any] = {
|
task: Dict[str, Any] = {
|
||||||
"id": ls_task_id,
|
"id": ls_task_id,
|
||||||
"data": task_data,
|
"data": task_data,
|
||||||
|
|||||||
Reference in New Issue
Block a user