From 1eee1e248ed54d0a348590cbaabd9987c3ed5c47 Mon Sep 17 00:00:00 2001 From: Jerry Yan <792602257@qq.com> Date: Thu, 22 Jan 2026 17:14:37 +0800 Subject: [PATCH] =?UTF-8?q?feat(annotation):=20=E6=B7=BB=E5=8A=A0=E5=88=86?= =?UTF-8?q?=E6=AE=B5=E7=B4=A2=E5=BC=95=E6=94=AF=E6=8C=81=E5=92=8C=E4=BC=98?= =?UTF-8?q?=E5=8C=96=E6=A0=87=E6=B3=A8=E7=BC=96=E8=BE=91=E5=99=A8?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 在前端 lsf.html 中添加 segmentIndex 字段解析逻辑 - 在 LabelStudioTextEditor 中添加分段索引相关类型定义和处理函数 - 使用 useCallback 优化组件中的异步函数性能 - 添加对驼峰命名和下划线命名的数据字段兼容处理 - 实现分段模式下的标注状态更新功能 - 添加任务 ID 验证防止过期保存请求 - 在后端 editor.py 中添加分段索引字段支持 - 统一前后端数据传输格式确保字段一致性 --- frontend/public/lsf/lsf.html | 13 ++ .../Annotate/LabelStudioTextEditor.tsx | 162 ++++++++++++++---- .../app/module/annotation/service/editor.py | 19 +- 3 files changed, 154 insertions(+), 40 deletions(-) diff --git a/frontend/public/lsf/lsf.html b/frontend/public/lsf/lsf.html index 80c90bd..91a6f7f 100644 --- a/frontend/public/lsf/lsf.html +++ b/frontend/public/lsf/lsf.html @@ -200,6 +200,18 @@ // 最小化对齐 Label Studio Server 的字段(DataMate 侧会原样存储) const taskId = typeof currentTask?.id === "number" ? currentTask.id : Number(currentTask?.id) || null; const fileId = currentTask?.data?.file_id || currentTask?.data?.fileId || null; + const segmentIndexValue = + currentTask?.data?.segment_index ?? + currentTask?.data?.segmentIndex ?? + currentTask?.data?.dm_segment_index ?? + currentTask?.data?.dmSegmentIndex ?? + null; + const segmentIndex = + segmentIndexValue === null || segmentIndexValue === undefined + ? null + : Number.isFinite(Number(segmentIndexValue)) + ? Number(segmentIndexValue) + : null; annotationPayload.id = typeof annotationPayload.id === "number" ? annotationPayload.id : taskId || 1; annotationPayload.task = taskId; @@ -209,6 +221,7 @@ return { taskId, fileId, + segmentIndex, annotation: annotationPayload, }; } diff --git a/frontend/src/pages/DataAnnotation/Annotate/LabelStudioTextEditor.tsx b/frontend/src/pages/DataAnnotation/Annotate/LabelStudioTextEditor.tsx index 0caf5bd..b754d19 100644 --- a/frontend/src/pages/DataAnnotation/Annotate/LabelStudioTextEditor.tsx +++ b/frontend/src/pages/DataAnnotation/Annotate/LabelStudioTextEditor.tsx @@ -1,4 +1,4 @@ -import { useEffect, useMemo, useRef, useState } from "react"; +import { useCallback, useEffect, useMemo, useRef, useState } from "react"; import { App, Button, Card, List, Spin, Typography, Tag } from "antd"; import { LeftOutlined, ReloadOutlined, SaveOutlined, MenuFoldOutlined, MenuUnfoldOutlined, CheckOutlined } from "@ant-design/icons"; import { useNavigate, useParams } from "react-router"; @@ -29,7 +29,7 @@ type EditorTaskListItem = { type LsfMessage = { type?: string; - payload?: any; + payload?: unknown; }; type SegmentInfo = { @@ -40,8 +40,57 @@ type SegmentInfo = { hasAnnotation: boolean; }; +type ApiResponse = { + code?: number; + message?: string; + data?: T; +}; + +type EditorTaskPayload = { + id?: number | string; + data?: Record; + annotations?: unknown[]; +}; + +type EditorTaskResponse = { + task?: EditorTaskPayload; + segmented?: boolean; + segments?: SegmentInfo[]; + currentSegmentIndex?: number; +}; + +type EditorTaskListResponse = { + content?: EditorTaskListItem[]; +}; + +type ExportPayload = { + taskId?: number | string | null; + fileId?: string | null; + segmentIndex?: number | string | null; + annotation?: Record; +}; + const LSF_IFRAME_SRC = "/lsf/lsf.html"; +const resolveSegmentIndex = (value: unknown) => { + if (value === null || value === undefined) return undefined; + const parsed = Number(value); + return Number.isFinite(parsed) ? parsed : undefined; +}; + +const normalizePayload = (payload: unknown): ExportPayload | undefined => { + if (!payload || typeof payload !== "object") return undefined; + return payload as ExportPayload; +}; + +const resolvePayloadMessage = (payload: unknown) => { + if (!payload || typeof payload !== "object") return undefined; + if ("message" in payload && typeof (payload as { message?: unknown }).message === "string") { + return (payload as { message?: string }).message; + } + return undefined; +}; + export default function LabelStudioTextEditor() { const { projectId = "" } = useParams(); const navigate = useNavigate(); @@ -69,17 +118,17 @@ export default function LabelStudioTextEditor() { const [segments, setSegments] = useState([]); const [currentSegmentIndex, setCurrentSegmentIndex] = useState(0); - const postToIframe = (type: string, payload?: any) => { + const postToIframe = useCallback((type: string, payload?: unknown) => { const win = iframeRef.current?.contentWindow; if (!win) return; win.postMessage({ type, payload }, origin); - }; + }, [origin]); - const loadProject = async () => { + const loadProject = useCallback(async () => { setLoadingProject(true); try { - const resp = (await getEditorProjectInfoUsingGet(projectId)) as any; - const data = resp?.data as EditorProjectInfo | undefined; + const resp = (await getEditorProjectInfoUsingGet(projectId)) as ApiResponse; + const data = resp?.data; if (!data?.projectId) { message.error("获取标注项目信息失败"); setProject(null); @@ -93,18 +142,21 @@ export default function LabelStudioTextEditor() { } finally { setLoadingProject(false); } - }; + }, [message, projectId]); - const loadTasks = async (silent = false) => { + const loadTasks = useCallback(async (silent = false) => { if (!projectId) return; if (!silent) setLoadingTasks(true); try { - const resp = (await listEditorTasksUsingGet(projectId, { page: 0, size: 200 })) as any; - const content = (resp?.data?.content || []) as EditorTaskListItem[]; + const resp = (await listEditorTasksUsingGet(projectId, { + page: 0, + size: 200, + })) as ApiResponse; + const content = resp?.data?.content || []; const items = Array.isArray(content) ? content : []; setTasks(items); - if (!selectedFileId && items.length > 0) { - setSelectedFileId(items[0].fileId); + if (items.length > 0) { + setSelectedFileId((prev) => prev || (items[0]?.fileId ?? "")); } } catch (e) { console.error(e); @@ -113,9 +165,9 @@ export default function LabelStudioTextEditor() { } finally { if (!silent) setLoadingTasks(false); } - }; + }, [message, projectId]); - const initEditorForFile = async (fileId: string, segmentIdx?: number) => { + const initEditorForFile = useCallback(async (fileId: string, segmentIdx?: number) => { if (!project?.supported) return; if (!project?.labelConfig) { message.error("该项目未绑定标注模板,无法加载编辑器"); @@ -131,7 +183,7 @@ export default function LabelStudioTextEditor() { try { const resp = (await getEditorTaskUsingGet(projectId, fileId, { segmentIndex: segmentIdx, - })) as any; + })) as ApiResponse; const data = resp?.data; const task = data?.task; if (!task) { @@ -151,10 +203,25 @@ export default function LabelStudioTextEditor() { setCurrentSegmentIndex(0); } - expectedTaskIdRef.current = Number(task?.id) || null; + const taskData = { + ...(task?.data || {}), + file_id: fileId, + fileId: fileId, + }; + if (data?.segmented) { + const segmentIndex = resolveSegmentIndex(data.currentSegmentIndex) ?? 0; + taskData.segment_index = segmentIndex; + taskData.segmentIndex = segmentIndex; + } + const taskForIframe = { + ...task, + data: taskData, + }; + + expectedTaskIdRef.current = Number(taskForIframe?.id) || null; postToIframe("LS_INIT", { labelConfig: project.labelConfig, - task, + task: taskForIframe, user: { id: "datamate" }, // 完整的 Label Studio 原生界面配置 interfaces: [ @@ -188,30 +255,44 @@ export default function LabelStudioTextEditor() { } finally { if (seq === initSeqRef.current) setLoadingTaskDetail(false); } - }; + }, [iframeReady, message, postToIframe, project, projectId]); - const saveFromExport = async (payload: any) => { - const fileId = payload?.fileId; + const saveFromExport = useCallback(async (payload?: ExportPayload | null) => { + const payloadTaskId = payload?.taskId; + if (expectedTaskIdRef.current && payloadTaskId) { + if (Number(payloadTaskId) !== expectedTaskIdRef.current) { + message.warning("已忽略过期的保存请求"); + return; + } + } + const fileId = payload?.fileId || selectedFileId; const annotation = payload?.annotation; - if (!fileId || !annotation) { + if (!fileId || !annotation || typeof annotation !== "object") { message.error("导出标注失败:缺少 fileId/annotation"); return; } + const payloadSegmentIndex = resolveSegmentIndex(payload?.segmentIndex); + const segmentIndex = + payloadSegmentIndex !== undefined + ? payloadSegmentIndex + : segmented + ? currentSegmentIndex + : undefined; setSaving(true); try { await upsertEditorAnnotationUsingPut(projectId, String(fileId), { annotation, - segmentIndex: segmented ? currentSegmentIndex : undefined, + segmentIndex, }); message.success("标注已保存"); await loadTasks(true); // 分段模式下更新当前段落的标注状态 - if (segmented) { + if (segmented && segmentIndex !== undefined) { setSegments((prev) => prev.map((seg) => - seg.idx === currentSegmentIndex + seg.idx === segmentIndex ? { ...seg, hasAnnotation: true } : seg ) @@ -223,7 +304,14 @@ export default function LabelStudioTextEditor() { } finally { setSaving(false); } - }; + }, [ + currentSegmentIndex, + loadTasks, + message, + projectId, + segmented, + selectedFileId, + ]); const requestExport = () => { if (!selectedFileId) { @@ -254,20 +342,17 @@ export default function LabelStudioTextEditor() { setCurrentSegmentIndex(0); if (projectId) loadProject(); - // eslint-disable-next-line react-hooks/exhaustive-deps - }, [projectId]); + }, [projectId, loadProject]); useEffect(() => { if (!project?.supported) return; loadTasks(); - // eslint-disable-next-line react-hooks/exhaustive-deps - }, [project?.projectId, project?.supported]); + }, [project?.supported, loadTasks]); useEffect(() => { if (!selectedFileId) return; initEditorForFile(selectedFileId); - // eslint-disable-next-line react-hooks/exhaustive-deps - }, [selectedFileId, iframeReady]); + }, [selectedFileId, iframeReady, initEditorForFile]); useEffect(() => { const handler = (event: MessageEvent) => { @@ -280,8 +365,10 @@ export default function LabelStudioTextEditor() { return; } + const payload = normalizePayload(msg.payload); + if (msg.type === "LS_READY") { - const readyTaskId = msg.payload?.taskId; + const readyTaskId = payload?.taskId; if (expectedTaskIdRef.current && readyTaskId) { if (Number(readyTaskId) !== expectedTaskIdRef.current) return; } @@ -290,25 +377,26 @@ export default function LabelStudioTextEditor() { } if (msg.type === "LS_EXPORT_RESULT") { - saveFromExport(msg.payload); + saveFromExport(payload); return; } // 兼容 iframe 内部在 submit 时直接上报(若启用) if (msg.type === "LS_SUBMIT") { - saveFromExport(msg.payload); + saveFromExport(payload); return; } if (msg.type === "LS_ERROR") { - message.error(msg.payload?.message || "编辑器发生错误"); + const payloadMessage = resolvePayloadMessage(msg.payload); + message.error(payloadMessage || "编辑器发生错误"); setLsReady(false); } }; window.addEventListener("message", handler); return () => window.removeEventListener("message", handler); - }, [message, origin]); + }, [message, origin, saveFromExport]); if (loadingProject) { return ( diff --git a/runtime/datamate-python/app/module/annotation/service/editor.py b/runtime/datamate-python/app/module/annotation/service/editor.py index 30c8098..f8b6632 100644 --- a/runtime/datamate-python/app/module/annotation/service/editor.py +++ b/runtime/datamate-python/app/module/annotation/service/editor.py @@ -43,6 +43,11 @@ TEXT_DATA_KEY = "text" DATASET_ID_KEY = "dataset_id" FILE_ID_KEY = "file_id" FILE_NAME_KEY = "file_name" +DATASET_ID_CAMEL_KEY = "datasetId" +FILE_ID_CAMEL_KEY = "fileId" +FILE_NAME_CAMEL_KEY = "fileName" +SEGMENT_INDEX_KEY = "segment_index" +SEGMENT_INDEX_CAMEL_KEY = "segmentIndex" TEXTUAL_OBJECT_CATEGORIES = {"text", "document"} OBJECT_NAME_HEADER_PREFIX = "dm_object_header_" @@ -252,9 +257,13 @@ class AnnotationEditorService: if self._needs_placeholder(data.get(TEXT_DATA_KEY)): data[TEXT_DATA_KEY] = display_text - data.setdefault(FILE_ID_KEY, file_id) - data.setdefault(DATASET_ID_KEY, dataset_id) - data.setdefault(FILE_NAME_KEY, getattr(file_record, "file_name", "")) + file_name = str(getattr(file_record, "file_name", "")) + data[FILE_ID_KEY] = file_id + data[FILE_ID_CAMEL_KEY] = file_id + data[DATASET_ID_KEY] = dataset_id + data[DATASET_ID_CAMEL_KEY] = dataset_id + data[FILE_NAME_KEY] = file_name + data[FILE_NAME_CAMEL_KEY] = file_name self._apply_text_placeholders(data, label_config) return data @@ -418,6 +427,10 @@ class AnnotationEditorService: dataset_id=project.dataset_id, file_id=file_id, ) + if needs_segmentation: + task_data[SEGMENT_INDEX_KEY] = current_segment_index + task_data[SEGMENT_INDEX_CAMEL_KEY] = current_segment_index + task: Dict[str, Any] = { "id": ls_task_id, "data": task_data,