feat(annotation): 添加分段索引支持和优化标注编辑器

- 在前端 lsf.html 中添加 segmentIndex 字段解析逻辑
- 在 LabelStudioTextEditor 中添加分段索引相关类型定义和处理函数
- 使用 useCallback 优化组件中的异步函数性能
- 添加对驼峰命名和下划线命名的数据字段兼容处理
- 实现分段模式下的标注状态更新功能
- 添加任务 ID 验证防止过期保存请求
- 在后端 editor.py 中添加分段索引字段支持
- 统一前后端数据传输格式确保字段一致性
This commit is contained in:
2026-01-22 17:14:37 +08:00
parent c638182c72
commit 1eee1e248e
3 changed files with 154 additions and 40 deletions

View File

@@ -200,6 +200,18 @@
// 最小化对齐 Label Studio Server 的字段(DataMate 侧会原样存储)
const taskId = typeof currentTask?.id === "number" ? currentTask.id : Number(currentTask?.id) || null;
const fileId = currentTask?.data?.file_id || currentTask?.data?.fileId || null;
const segmentIndexValue =
currentTask?.data?.segment_index ??
currentTask?.data?.segmentIndex ??
currentTask?.data?.dm_segment_index ??
currentTask?.data?.dmSegmentIndex ??
null;
const segmentIndex =
segmentIndexValue === null || segmentIndexValue === undefined
? null
: Number.isFinite(Number(segmentIndexValue))
? Number(segmentIndexValue)
: null;
annotationPayload.id = typeof annotationPayload.id === "number" ? annotationPayload.id : taskId || 1;
annotationPayload.task = taskId;
@@ -209,6 +221,7 @@
return {
taskId,
fileId,
segmentIndex,
annotation: annotationPayload,
};
}

View File

@@ -1,4 +1,4 @@
import { useEffect, useMemo, useRef, useState } from "react";
import { useCallback, useEffect, useMemo, useRef, useState } from "react";
import { App, Button, Card, List, Spin, Typography, Tag } from "antd";
import { LeftOutlined, ReloadOutlined, SaveOutlined, MenuFoldOutlined, MenuUnfoldOutlined, CheckOutlined } from "@ant-design/icons";
import { useNavigate, useParams } from "react-router";
@@ -29,7 +29,7 @@ type EditorTaskListItem = {
type LsfMessage = {
type?: string;
payload?: any;
payload?: unknown;
};
type SegmentInfo = {
@@ -40,8 +40,57 @@ type SegmentInfo = {
hasAnnotation: boolean;
};
type ApiResponse<T> = {
code?: number;
message?: string;
data?: T;
};
type EditorTaskPayload = {
id?: number | string;
data?: Record<string, unknown>;
annotations?: unknown[];
};
type EditorTaskResponse = {
task?: EditorTaskPayload;
segmented?: boolean;
segments?: SegmentInfo[];
currentSegmentIndex?: number;
};
type EditorTaskListResponse = {
content?: EditorTaskListItem[];
};
type ExportPayload = {
taskId?: number | string | null;
fileId?: string | null;
segmentIndex?: number | string | null;
annotation?: Record<string, unknown>;
};
const LSF_IFRAME_SRC = "/lsf/lsf.html";
const resolveSegmentIndex = (value: unknown) => {
if (value === null || value === undefined) return undefined;
const parsed = Number(value);
return Number.isFinite(parsed) ? parsed : undefined;
};
const normalizePayload = (payload: unknown): ExportPayload | undefined => {
if (!payload || typeof payload !== "object") return undefined;
return payload as ExportPayload;
};
const resolvePayloadMessage = (payload: unknown) => {
if (!payload || typeof payload !== "object") return undefined;
if ("message" in payload && typeof (payload as { message?: unknown }).message === "string") {
return (payload as { message?: string }).message;
}
return undefined;
};
export default function LabelStudioTextEditor() {
const { projectId = "" } = useParams();
const navigate = useNavigate();
@@ -69,17 +118,17 @@ export default function LabelStudioTextEditor() {
const [segments, setSegments] = useState<SegmentInfo[]>([]);
const [currentSegmentIndex, setCurrentSegmentIndex] = useState(0);
const postToIframe = (type: string, payload?: any) => {
const postToIframe = useCallback((type: string, payload?: unknown) => {
const win = iframeRef.current?.contentWindow;
if (!win) return;
win.postMessage({ type, payload }, origin);
};
}, [origin]);
const loadProject = async () => {
const loadProject = useCallback(async () => {
setLoadingProject(true);
try {
const resp = (await getEditorProjectInfoUsingGet(projectId)) as any;
const data = resp?.data as EditorProjectInfo | undefined;
const resp = (await getEditorProjectInfoUsingGet(projectId)) as ApiResponse<EditorProjectInfo>;
const data = resp?.data;
if (!data?.projectId) {
message.error("获取标注项目信息失败");
setProject(null);
@@ -93,18 +142,21 @@ export default function LabelStudioTextEditor() {
} finally {
setLoadingProject(false);
}
};
}, [message, projectId]);
const loadTasks = async (silent = false) => {
const loadTasks = useCallback(async (silent = false) => {
if (!projectId) return;
if (!silent) setLoadingTasks(true);
try {
const resp = (await listEditorTasksUsingGet(projectId, { page: 0, size: 200 })) as any;
const content = (resp?.data?.content || []) as EditorTaskListItem[];
const resp = (await listEditorTasksUsingGet(projectId, {
page: 0,
size: 200,
})) as ApiResponse<EditorTaskListResponse>;
const content = resp?.data?.content || [];
const items = Array.isArray(content) ? content : [];
setTasks(items);
if (!selectedFileId && items.length > 0) {
setSelectedFileId(items[0].fileId);
if (items.length > 0) {
setSelectedFileId((prev) => prev || (items[0]?.fileId ?? ""));
}
} catch (e) {
console.error(e);
@@ -113,9 +165,9 @@ export default function LabelStudioTextEditor() {
} finally {
if (!silent) setLoadingTasks(false);
}
};
}, [message, projectId]);
const initEditorForFile = async (fileId: string, segmentIdx?: number) => {
const initEditorForFile = useCallback(async (fileId: string, segmentIdx?: number) => {
if (!project?.supported) return;
if (!project?.labelConfig) {
message.error("该项目未绑定标注模板,无法加载编辑器");
@@ -131,7 +183,7 @@ export default function LabelStudioTextEditor() {
try {
const resp = (await getEditorTaskUsingGet(projectId, fileId, {
segmentIndex: segmentIdx,
})) as any;
})) as ApiResponse<EditorTaskResponse>;
const data = resp?.data;
const task = data?.task;
if (!task) {
@@ -151,10 +203,25 @@ export default function LabelStudioTextEditor() {
setCurrentSegmentIndex(0);
}
expectedTaskIdRef.current = Number(task?.id) || null;
const taskData = {
...(task?.data || {}),
file_id: fileId,
fileId: fileId,
};
if (data?.segmented) {
const segmentIndex = resolveSegmentIndex(data.currentSegmentIndex) ?? 0;
taskData.segment_index = segmentIndex;
taskData.segmentIndex = segmentIndex;
}
const taskForIframe = {
...task,
data: taskData,
};
expectedTaskIdRef.current = Number(taskForIframe?.id) || null;
postToIframe("LS_INIT", {
labelConfig: project.labelConfig,
task,
task: taskForIframe,
user: { id: "datamate" },
// 完整的 Label Studio 原生界面配置
interfaces: [
@@ -188,30 +255,44 @@ export default function LabelStudioTextEditor() {
} finally {
if (seq === initSeqRef.current) setLoadingTaskDetail(false);
}
};
}, [iframeReady, message, postToIframe, project, projectId]);
const saveFromExport = async (payload: any) => {
const fileId = payload?.fileId;
const saveFromExport = useCallback(async (payload?: ExportPayload | null) => {
const payloadTaskId = payload?.taskId;
if (expectedTaskIdRef.current && payloadTaskId) {
if (Number(payloadTaskId) !== expectedTaskIdRef.current) {
message.warning("已忽略过期的保存请求");
return;
}
}
const fileId = payload?.fileId || selectedFileId;
const annotation = payload?.annotation;
if (!fileId || !annotation) {
if (!fileId || !annotation || typeof annotation !== "object") {
message.error("导出标注失败:缺少 fileId/annotation");
return;
}
const payloadSegmentIndex = resolveSegmentIndex(payload?.segmentIndex);
const segmentIndex =
payloadSegmentIndex !== undefined
? payloadSegmentIndex
: segmented
? currentSegmentIndex
: undefined;
setSaving(true);
try {
await upsertEditorAnnotationUsingPut(projectId, String(fileId), {
annotation,
segmentIndex: segmented ? currentSegmentIndex : undefined,
segmentIndex,
});
message.success("标注已保存");
await loadTasks(true);
// 分段模式下更新当前段落的标注状态
if (segmented) {
if (segmented && segmentIndex !== undefined) {
setSegments((prev) =>
prev.map((seg) =>
seg.idx === currentSegmentIndex
seg.idx === segmentIndex
? { ...seg, hasAnnotation: true }
: seg
)
@@ -223,7 +304,14 @@ export default function LabelStudioTextEditor() {
} finally {
setSaving(false);
}
};
}, [
currentSegmentIndex,
loadTasks,
message,
projectId,
segmented,
selectedFileId,
]);
const requestExport = () => {
if (!selectedFileId) {
@@ -254,20 +342,17 @@ export default function LabelStudioTextEditor() {
setCurrentSegmentIndex(0);
if (projectId) loadProject();
// eslint-disable-next-line react-hooks/exhaustive-deps
}, [projectId]);
}, [projectId, loadProject]);
useEffect(() => {
if (!project?.supported) return;
loadTasks();
// eslint-disable-next-line react-hooks/exhaustive-deps
}, [project?.projectId, project?.supported]);
}, [project?.supported, loadTasks]);
useEffect(() => {
if (!selectedFileId) return;
initEditorForFile(selectedFileId);
// eslint-disable-next-line react-hooks/exhaustive-deps
}, [selectedFileId, iframeReady]);
}, [selectedFileId, iframeReady, initEditorForFile]);
useEffect(() => {
const handler = (event: MessageEvent<LsfMessage>) => {
@@ -280,8 +365,10 @@ export default function LabelStudioTextEditor() {
return;
}
const payload = normalizePayload(msg.payload);
if (msg.type === "LS_READY") {
const readyTaskId = msg.payload?.taskId;
const readyTaskId = payload?.taskId;
if (expectedTaskIdRef.current && readyTaskId) {
if (Number(readyTaskId) !== expectedTaskIdRef.current) return;
}
@@ -290,25 +377,26 @@ export default function LabelStudioTextEditor() {
}
if (msg.type === "LS_EXPORT_RESULT") {
saveFromExport(msg.payload);
saveFromExport(payload);
return;
}
// 兼容 iframe 内部在 submit 时直接上报(若启用)
if (msg.type === "LS_SUBMIT") {
saveFromExport(msg.payload);
saveFromExport(payload);
return;
}
if (msg.type === "LS_ERROR") {
message.error(msg.payload?.message || "编辑器发生错误");
const payloadMessage = resolvePayloadMessage(msg.payload);
message.error(payloadMessage || "编辑器发生错误");
setLsReady(false);
}
};
window.addEventListener("message", handler);
return () => window.removeEventListener("message", handler);
}, [message, origin]);
}, [message, origin, saveFromExport]);
if (loadingProject) {
return (

View File

@@ -43,6 +43,11 @@ TEXT_DATA_KEY = "text"
DATASET_ID_KEY = "dataset_id"
FILE_ID_KEY = "file_id"
FILE_NAME_KEY = "file_name"
DATASET_ID_CAMEL_KEY = "datasetId"
FILE_ID_CAMEL_KEY = "fileId"
FILE_NAME_CAMEL_KEY = "fileName"
SEGMENT_INDEX_KEY = "segment_index"
SEGMENT_INDEX_CAMEL_KEY = "segmentIndex"
TEXTUAL_OBJECT_CATEGORIES = {"text", "document"}
OBJECT_NAME_HEADER_PREFIX = "dm_object_header_"
@@ -252,9 +257,13 @@ class AnnotationEditorService:
if self._needs_placeholder(data.get(TEXT_DATA_KEY)):
data[TEXT_DATA_KEY] = display_text
data.setdefault(FILE_ID_KEY, file_id)
data.setdefault(DATASET_ID_KEY, dataset_id)
data.setdefault(FILE_NAME_KEY, getattr(file_record, "file_name", ""))
file_name = str(getattr(file_record, "file_name", ""))
data[FILE_ID_KEY] = file_id
data[FILE_ID_CAMEL_KEY] = file_id
data[DATASET_ID_KEY] = dataset_id
data[DATASET_ID_CAMEL_KEY] = dataset_id
data[FILE_NAME_KEY] = file_name
data[FILE_NAME_CAMEL_KEY] = file_name
self._apply_text_placeholders(data, label_config)
return data
@@ -418,6 +427,10 @@ class AnnotationEditorService:
dataset_id=project.dataset_id,
file_id=file_id,
)
if needs_segmentation:
task_data[SEGMENT_INDEX_KEY] = current_segment_index
task_data[SEGMENT_INDEX_CAMEL_KEY] = current_segment_index
task: Dict[str, Any] = {
"id": ls_task_id,
"data": task_data,