feat(annotation): 添加分段索引支持和优化标注编辑器

- 在前端 lsf.html 中添加 segmentIndex 字段解析逻辑
- 在 LabelStudioTextEditor 中添加分段索引相关类型定义和处理函数
- 使用 useCallback 优化组件中的异步函数性能
- 添加对驼峰命名和下划线命名的数据字段兼容处理
- 实现分段模式下的标注状态更新功能
- 添加任务 ID 验证防止过期保存请求
- 在后端 editor.py 中添加分段索引字段支持
- 统一前后端数据传输格式确保字段一致性
This commit is contained in:
2026-01-22 17:14:37 +08:00
parent c638182c72
commit 1eee1e248e
3 changed files with 154 additions and 40 deletions

View File

@@ -200,6 +200,18 @@
// 最小化对齐 Label Studio Server 的字段(DataMate 侧会原样存储) // 最小化对齐 Label Studio Server 的字段(DataMate 侧会原样存储)
const taskId = typeof currentTask?.id === "number" ? currentTask.id : Number(currentTask?.id) || null; const taskId = typeof currentTask?.id === "number" ? currentTask.id : Number(currentTask?.id) || null;
const fileId = currentTask?.data?.file_id || currentTask?.data?.fileId || null; const fileId = currentTask?.data?.file_id || currentTask?.data?.fileId || null;
const segmentIndexValue =
currentTask?.data?.segment_index ??
currentTask?.data?.segmentIndex ??
currentTask?.data?.dm_segment_index ??
currentTask?.data?.dmSegmentIndex ??
null;
const segmentIndex =
segmentIndexValue === null || segmentIndexValue === undefined
? null
: Number.isFinite(Number(segmentIndexValue))
? Number(segmentIndexValue)
: null;
annotationPayload.id = typeof annotationPayload.id === "number" ? annotationPayload.id : taskId || 1; annotationPayload.id = typeof annotationPayload.id === "number" ? annotationPayload.id : taskId || 1;
annotationPayload.task = taskId; annotationPayload.task = taskId;
@@ -209,6 +221,7 @@
return { return {
taskId, taskId,
fileId, fileId,
segmentIndex,
annotation: annotationPayload, annotation: annotationPayload,
}; };
} }

View File

@@ -1,4 +1,4 @@
import { useEffect, useMemo, useRef, useState } from "react"; import { useCallback, useEffect, useMemo, useRef, useState } from "react";
import { App, Button, Card, List, Spin, Typography, Tag } from "antd"; import { App, Button, Card, List, Spin, Typography, Tag } from "antd";
import { LeftOutlined, ReloadOutlined, SaveOutlined, MenuFoldOutlined, MenuUnfoldOutlined, CheckOutlined } from "@ant-design/icons"; import { LeftOutlined, ReloadOutlined, SaveOutlined, MenuFoldOutlined, MenuUnfoldOutlined, CheckOutlined } from "@ant-design/icons";
import { useNavigate, useParams } from "react-router"; import { useNavigate, useParams } from "react-router";
@@ -29,7 +29,7 @@ type EditorTaskListItem = {
type LsfMessage = { type LsfMessage = {
type?: string; type?: string;
payload?: any; payload?: unknown;
}; };
type SegmentInfo = { type SegmentInfo = {
@@ -40,8 +40,57 @@ type SegmentInfo = {
hasAnnotation: boolean; hasAnnotation: boolean;
}; };
type ApiResponse<T> = {
code?: number;
message?: string;
data?: T;
};
type EditorTaskPayload = {
id?: number | string;
data?: Record<string, unknown>;
annotations?: unknown[];
};
type EditorTaskResponse = {
task?: EditorTaskPayload;
segmented?: boolean;
segments?: SegmentInfo[];
currentSegmentIndex?: number;
};
type EditorTaskListResponse = {
content?: EditorTaskListItem[];
};
type ExportPayload = {
taskId?: number | string | null;
fileId?: string | null;
segmentIndex?: number | string | null;
annotation?: Record<string, unknown>;
};
const LSF_IFRAME_SRC = "/lsf/lsf.html"; const LSF_IFRAME_SRC = "/lsf/lsf.html";
const resolveSegmentIndex = (value: unknown) => {
if (value === null || value === undefined) return undefined;
const parsed = Number(value);
return Number.isFinite(parsed) ? parsed : undefined;
};
const normalizePayload = (payload: unknown): ExportPayload | undefined => {
if (!payload || typeof payload !== "object") return undefined;
return payload as ExportPayload;
};
const resolvePayloadMessage = (payload: unknown) => {
if (!payload || typeof payload !== "object") return undefined;
if ("message" in payload && typeof (payload as { message?: unknown }).message === "string") {
return (payload as { message?: string }).message;
}
return undefined;
};
export default function LabelStudioTextEditor() { export default function LabelStudioTextEditor() {
const { projectId = "" } = useParams(); const { projectId = "" } = useParams();
const navigate = useNavigate(); const navigate = useNavigate();
@@ -69,17 +118,17 @@ export default function LabelStudioTextEditor() {
const [segments, setSegments] = useState<SegmentInfo[]>([]); const [segments, setSegments] = useState<SegmentInfo[]>([]);
const [currentSegmentIndex, setCurrentSegmentIndex] = useState(0); const [currentSegmentIndex, setCurrentSegmentIndex] = useState(0);
const postToIframe = (type: string, payload?: any) => { const postToIframe = useCallback((type: string, payload?: unknown) => {
const win = iframeRef.current?.contentWindow; const win = iframeRef.current?.contentWindow;
if (!win) return; if (!win) return;
win.postMessage({ type, payload }, origin); win.postMessage({ type, payload }, origin);
}; }, [origin]);
const loadProject = async () => { const loadProject = useCallback(async () => {
setLoadingProject(true); setLoadingProject(true);
try { try {
const resp = (await getEditorProjectInfoUsingGet(projectId)) as any; const resp = (await getEditorProjectInfoUsingGet(projectId)) as ApiResponse<EditorProjectInfo>;
const data = resp?.data as EditorProjectInfo | undefined; const data = resp?.data;
if (!data?.projectId) { if (!data?.projectId) {
message.error("获取标注项目信息失败"); message.error("获取标注项目信息失败");
setProject(null); setProject(null);
@@ -93,18 +142,21 @@ export default function LabelStudioTextEditor() {
} finally { } finally {
setLoadingProject(false); setLoadingProject(false);
} }
}; }, [message, projectId]);
const loadTasks = async (silent = false) => { const loadTasks = useCallback(async (silent = false) => {
if (!projectId) return; if (!projectId) return;
if (!silent) setLoadingTasks(true); if (!silent) setLoadingTasks(true);
try { try {
const resp = (await listEditorTasksUsingGet(projectId, { page: 0, size: 200 })) as any; const resp = (await listEditorTasksUsingGet(projectId, {
const content = (resp?.data?.content || []) as EditorTaskListItem[]; page: 0,
size: 200,
})) as ApiResponse<EditorTaskListResponse>;
const content = resp?.data?.content || [];
const items = Array.isArray(content) ? content : []; const items = Array.isArray(content) ? content : [];
setTasks(items); setTasks(items);
if (!selectedFileId && items.length > 0) { if (items.length > 0) {
setSelectedFileId(items[0].fileId); setSelectedFileId((prev) => prev || (items[0]?.fileId ?? ""));
} }
} catch (e) { } catch (e) {
console.error(e); console.error(e);
@@ -113,9 +165,9 @@ export default function LabelStudioTextEditor() {
} finally { } finally {
if (!silent) setLoadingTasks(false); if (!silent) setLoadingTasks(false);
} }
}; }, [message, projectId]);
const initEditorForFile = async (fileId: string, segmentIdx?: number) => { const initEditorForFile = useCallback(async (fileId: string, segmentIdx?: number) => {
if (!project?.supported) return; if (!project?.supported) return;
if (!project?.labelConfig) { if (!project?.labelConfig) {
message.error("该项目未绑定标注模板,无法加载编辑器"); message.error("该项目未绑定标注模板,无法加载编辑器");
@@ -131,7 +183,7 @@ export default function LabelStudioTextEditor() {
try { try {
const resp = (await getEditorTaskUsingGet(projectId, fileId, { const resp = (await getEditorTaskUsingGet(projectId, fileId, {
segmentIndex: segmentIdx, segmentIndex: segmentIdx,
})) as any; })) as ApiResponse<EditorTaskResponse>;
const data = resp?.data; const data = resp?.data;
const task = data?.task; const task = data?.task;
if (!task) { if (!task) {
@@ -151,10 +203,25 @@ export default function LabelStudioTextEditor() {
setCurrentSegmentIndex(0); setCurrentSegmentIndex(0);
} }
expectedTaskIdRef.current = Number(task?.id) || null; const taskData = {
...(task?.data || {}),
file_id: fileId,
fileId: fileId,
};
if (data?.segmented) {
const segmentIndex = resolveSegmentIndex(data.currentSegmentIndex) ?? 0;
taskData.segment_index = segmentIndex;
taskData.segmentIndex = segmentIndex;
}
const taskForIframe = {
...task,
data: taskData,
};
expectedTaskIdRef.current = Number(taskForIframe?.id) || null;
postToIframe("LS_INIT", { postToIframe("LS_INIT", {
labelConfig: project.labelConfig, labelConfig: project.labelConfig,
task, task: taskForIframe,
user: { id: "datamate" }, user: { id: "datamate" },
// 完整的 Label Studio 原生界面配置 // 完整的 Label Studio 原生界面配置
interfaces: [ interfaces: [
@@ -188,30 +255,44 @@ export default function LabelStudioTextEditor() {
} finally { } finally {
if (seq === initSeqRef.current) setLoadingTaskDetail(false); if (seq === initSeqRef.current) setLoadingTaskDetail(false);
} }
}; }, [iframeReady, message, postToIframe, project, projectId]);
const saveFromExport = async (payload: any) => { const saveFromExport = useCallback(async (payload?: ExportPayload | null) => {
const fileId = payload?.fileId; const payloadTaskId = payload?.taskId;
if (expectedTaskIdRef.current && payloadTaskId) {
if (Number(payloadTaskId) !== expectedTaskIdRef.current) {
message.warning("已忽略过期的保存请求");
return;
}
}
const fileId = payload?.fileId || selectedFileId;
const annotation = payload?.annotation; const annotation = payload?.annotation;
if (!fileId || !annotation) { if (!fileId || !annotation || typeof annotation !== "object") {
message.error("导出标注失败:缺少 fileId/annotation"); message.error("导出标注失败:缺少 fileId/annotation");
return; return;
} }
const payloadSegmentIndex = resolveSegmentIndex(payload?.segmentIndex);
const segmentIndex =
payloadSegmentIndex !== undefined
? payloadSegmentIndex
: segmented
? currentSegmentIndex
: undefined;
setSaving(true); setSaving(true);
try { try {
await upsertEditorAnnotationUsingPut(projectId, String(fileId), { await upsertEditorAnnotationUsingPut(projectId, String(fileId), {
annotation, annotation,
segmentIndex: segmented ? currentSegmentIndex : undefined, segmentIndex,
}); });
message.success("标注已保存"); message.success("标注已保存");
await loadTasks(true); await loadTasks(true);
// 分段模式下更新当前段落的标注状态 // 分段模式下更新当前段落的标注状态
if (segmented) { if (segmented && segmentIndex !== undefined) {
setSegments((prev) => setSegments((prev) =>
prev.map((seg) => prev.map((seg) =>
seg.idx === currentSegmentIndex seg.idx === segmentIndex
? { ...seg, hasAnnotation: true } ? { ...seg, hasAnnotation: true }
: seg : seg
) )
@@ -223,7 +304,14 @@ export default function LabelStudioTextEditor() {
} finally { } finally {
setSaving(false); setSaving(false);
} }
}; }, [
currentSegmentIndex,
loadTasks,
message,
projectId,
segmented,
selectedFileId,
]);
const requestExport = () => { const requestExport = () => {
if (!selectedFileId) { if (!selectedFileId) {
@@ -254,20 +342,17 @@ export default function LabelStudioTextEditor() {
setCurrentSegmentIndex(0); setCurrentSegmentIndex(0);
if (projectId) loadProject(); if (projectId) loadProject();
// eslint-disable-next-line react-hooks/exhaustive-deps }, [projectId, loadProject]);
}, [projectId]);
useEffect(() => { useEffect(() => {
if (!project?.supported) return; if (!project?.supported) return;
loadTasks(); loadTasks();
// eslint-disable-next-line react-hooks/exhaustive-deps }, [project?.supported, loadTasks]);
}, [project?.projectId, project?.supported]);
useEffect(() => { useEffect(() => {
if (!selectedFileId) return; if (!selectedFileId) return;
initEditorForFile(selectedFileId); initEditorForFile(selectedFileId);
// eslint-disable-next-line react-hooks/exhaustive-deps }, [selectedFileId, iframeReady, initEditorForFile]);
}, [selectedFileId, iframeReady]);
useEffect(() => { useEffect(() => {
const handler = (event: MessageEvent<LsfMessage>) => { const handler = (event: MessageEvent<LsfMessage>) => {
@@ -280,8 +365,10 @@ export default function LabelStudioTextEditor() {
return; return;
} }
const payload = normalizePayload(msg.payload);
if (msg.type === "LS_READY") { if (msg.type === "LS_READY") {
const readyTaskId = msg.payload?.taskId; const readyTaskId = payload?.taskId;
if (expectedTaskIdRef.current && readyTaskId) { if (expectedTaskIdRef.current && readyTaskId) {
if (Number(readyTaskId) !== expectedTaskIdRef.current) return; if (Number(readyTaskId) !== expectedTaskIdRef.current) return;
} }
@@ -290,25 +377,26 @@ export default function LabelStudioTextEditor() {
} }
if (msg.type === "LS_EXPORT_RESULT") { if (msg.type === "LS_EXPORT_RESULT") {
saveFromExport(msg.payload); saveFromExport(payload);
return; return;
} }
// 兼容 iframe 内部在 submit 时直接上报(若启用) // 兼容 iframe 内部在 submit 时直接上报(若启用)
if (msg.type === "LS_SUBMIT") { if (msg.type === "LS_SUBMIT") {
saveFromExport(msg.payload); saveFromExport(payload);
return; return;
} }
if (msg.type === "LS_ERROR") { if (msg.type === "LS_ERROR") {
message.error(msg.payload?.message || "编辑器发生错误"); const payloadMessage = resolvePayloadMessage(msg.payload);
message.error(payloadMessage || "编辑器发生错误");
setLsReady(false); setLsReady(false);
} }
}; };
window.addEventListener("message", handler); window.addEventListener("message", handler);
return () => window.removeEventListener("message", handler); return () => window.removeEventListener("message", handler);
}, [message, origin]); }, [message, origin, saveFromExport]);
if (loadingProject) { if (loadingProject) {
return ( return (

View File

@@ -43,6 +43,11 @@ TEXT_DATA_KEY = "text"
DATASET_ID_KEY = "dataset_id" DATASET_ID_KEY = "dataset_id"
FILE_ID_KEY = "file_id" FILE_ID_KEY = "file_id"
FILE_NAME_KEY = "file_name" FILE_NAME_KEY = "file_name"
DATASET_ID_CAMEL_KEY = "datasetId"
FILE_ID_CAMEL_KEY = "fileId"
FILE_NAME_CAMEL_KEY = "fileName"
SEGMENT_INDEX_KEY = "segment_index"
SEGMENT_INDEX_CAMEL_KEY = "segmentIndex"
TEXTUAL_OBJECT_CATEGORIES = {"text", "document"} TEXTUAL_OBJECT_CATEGORIES = {"text", "document"}
OBJECT_NAME_HEADER_PREFIX = "dm_object_header_" OBJECT_NAME_HEADER_PREFIX = "dm_object_header_"
@@ -252,9 +257,13 @@ class AnnotationEditorService:
if self._needs_placeholder(data.get(TEXT_DATA_KEY)): if self._needs_placeholder(data.get(TEXT_DATA_KEY)):
data[TEXT_DATA_KEY] = display_text data[TEXT_DATA_KEY] = display_text
data.setdefault(FILE_ID_KEY, file_id) file_name = str(getattr(file_record, "file_name", ""))
data.setdefault(DATASET_ID_KEY, dataset_id) data[FILE_ID_KEY] = file_id
data.setdefault(FILE_NAME_KEY, getattr(file_record, "file_name", "")) data[FILE_ID_CAMEL_KEY] = file_id
data[DATASET_ID_KEY] = dataset_id
data[DATASET_ID_CAMEL_KEY] = dataset_id
data[FILE_NAME_KEY] = file_name
data[FILE_NAME_CAMEL_KEY] = file_name
self._apply_text_placeholders(data, label_config) self._apply_text_placeholders(data, label_config)
return data return data
@@ -418,6 +427,10 @@ class AnnotationEditorService:
dataset_id=project.dataset_id, dataset_id=project.dataset_id,
file_id=file_id, file_id=file_id,
) )
if needs_segmentation:
task_data[SEGMENT_INDEX_KEY] = current_segment_index
task_data[SEGMENT_INDEX_CAMEL_KEY] = current_segment_index
task: Dict[str, Any] = { task: Dict[str, Any] = {
"id": ls_task_id, "id": ls_task_id,
"data": task_data, "data": task_data,