From 71c4a8d8a69bb3e5812c18443b921b0b2e26be0c Mon Sep 17 00:00:00 2001 From: Jerry Yan <792602257@qq.com> Date: Mon, 19 Jan 2026 18:18:19 +0800 Subject: [PATCH] =?UTF-8?q?feat(annotation):=20=E6=B7=BB=E5=8A=A0=E6=96=87?= =?UTF-8?q?=E6=9C=AC=E5=88=86=E6=AE=B5=E6=A0=87=E6=B3=A8=E5=8A=9F=E8=83=BD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 引入文本分割器实现长文本按200字符自动分段 - 增加分段状态管理和段落导航界面 - 支持按段落保存和加载标注数据 - 实现分段模式下的标注状态跟踪 - 扩展API接口支持段落索引参数 - 添加分段相关的数据模型定义 --- .../Annotate/LabelStudioTextEditor.tsx | 129 ++++++++++++--- .../pages/DataAnnotation/annotation.api.ts | 14 +- .../app/module/annotation/interface/editor.py | 5 +- .../app/module/annotation/schema/editor.py | 24 +++ .../service/annotation_text_splitter.py | 113 +++++++++++++ .../app/module/annotation/service/editor.py | 151 ++++++++++++++++-- 6 files changed, 395 insertions(+), 41 deletions(-) create mode 100644 runtime/datamate-python/app/module/annotation/service/annotation_text_splitter.py diff --git a/frontend/src/pages/DataAnnotation/Annotate/LabelStudioTextEditor.tsx b/frontend/src/pages/DataAnnotation/Annotate/LabelStudioTextEditor.tsx index 15296e1..0caf5bd 100644 --- a/frontend/src/pages/DataAnnotation/Annotate/LabelStudioTextEditor.tsx +++ b/frontend/src/pages/DataAnnotation/Annotate/LabelStudioTextEditor.tsx @@ -1,6 +1,6 @@ import { useEffect, useMemo, useRef, useState } from "react"; -import { App, Button, Card, List, Spin, Typography } from "antd"; -import { LeftOutlined, ReloadOutlined, SaveOutlined, MenuFoldOutlined, MenuUnfoldOutlined } from "@ant-design/icons"; +import { App, Button, Card, List, Spin, Typography, Tag } from "antd"; +import { LeftOutlined, ReloadOutlined, SaveOutlined, MenuFoldOutlined, MenuUnfoldOutlined, CheckOutlined } from "@ant-design/icons"; import { useNavigate, useParams } from "react-router"; import { @@ -32,6 +32,14 @@ type LsfMessage = { payload?: any; }; +type SegmentInfo = { + idx: number; + text: string; + start: number; + end: number; + hasAnnotation: boolean; +}; + const LSF_IFRAME_SRC = "/lsf/lsf.html"; export default function LabelStudioTextEditor() { @@ -56,6 +64,11 @@ export default function LabelStudioTextEditor() { const [selectedFileId, setSelectedFileId] = useState(""); const [sidebarCollapsed, setSidebarCollapsed] = useState(false); + // 分段相关状态 + const [segmented, setSegmented] = useState(false); + const [segments, setSegments] = useState([]); + const [currentSegmentIndex, setCurrentSegmentIndex] = useState(0); + const postToIframe = (type: string, payload?: any) => { const win = iframeRef.current?.contentWindow; if (!win) return; @@ -102,7 +115,7 @@ export default function LabelStudioTextEditor() { } }; - const initEditorForFile = async (fileId: string) => { + const initEditorForFile = async (fileId: string, segmentIdx?: number) => { if (!project?.supported) return; if (!project?.labelConfig) { message.error("该项目未绑定标注模板,无法加载编辑器"); @@ -116,14 +129,28 @@ export default function LabelStudioTextEditor() { expectedTaskIdRef.current = null; try { - const resp = (await getEditorTaskUsingGet(projectId, fileId)) as any; - const task = resp?.data?.task; + const resp = (await getEditorTaskUsingGet(projectId, fileId, { + segmentIndex: segmentIdx, + })) as any; + const data = resp?.data; + const task = data?.task; if (!task) { message.error("获取任务详情失败"); return; } if (seq !== initSeqRef.current) return; + // 更新分段状态 + if (data?.segmented) { + setSegmented(true); + setSegments(data.segments || []); + setCurrentSegmentIndex(data.currentSegmentIndex || 0); + } else { + setSegmented(false); + setSegments([]); + setCurrentSegmentIndex(0); + } + expectedTaskIdRef.current = Number(task?.id) || null; postToIframe("LS_INIT", { labelConfig: project.labelConfig, @@ -173,9 +200,23 @@ export default function LabelStudioTextEditor() { setSaving(true); try { - await upsertEditorAnnotationUsingPut(projectId, String(fileId), { annotation }); + await upsertEditorAnnotationUsingPut(projectId, String(fileId), { + annotation, + segmentIndex: segmented ? currentSegmentIndex : undefined, + }); message.success("标注已保存"); await loadTasks(true); + + // 分段模式下更新当前段落的标注状态 + if (segmented) { + setSegments((prev) => + prev.map((seg) => + seg.idx === currentSegmentIndex + ? { ...seg, hasAnnotation: true } + : seg + ) + ); + } } catch (e) { console.error(e); message.error("保存失败"); @@ -192,6 +233,13 @@ export default function LabelStudioTextEditor() { postToIframe("LS_EXPORT", {}); }; + // 段落切换处理 + const handleSegmentChange = async (newIndex: number) => { + if (newIndex === currentSegmentIndex) return; + setCurrentSegmentIndex(newIndex); + await initEditorForFile(selectedFileId, newIndex); + }; + useEffect(() => { setIframeReady(false); setProject(null); @@ -200,6 +248,10 @@ export default function LabelStudioTextEditor() { initSeqRef.current = 0; setLsReady(false); expectedTaskIdRef.current = null; + // 重置分段状态 + setSegmented(false); + setSegments([]); + setCurrentSegmentIndex(0); if (projectId) loadProject(); // eslint-disable-next-line react-hooks/exhaustive-deps @@ -379,26 +431,55 @@ export default function LabelStudioTextEditor() { {/* 右侧编辑器 - Label Studio iframe */} -
- {(!iframeReady || loadingTaskDetail || (selectedFileId && !lsReady)) && ( -
- +
+ {/* 段落导航栏 */} + {segmented && segments.length > 0 && ( +
+ 段落: +
+ {segments.map((seg) => ( + + ))} +
+ + {currentSegmentIndex + 1} / {segments.length} +
)} -