From fa9e9d9f6884fe10fecf22e31d54ecb322c1052c Mon Sep 17 00:00:00 2001 From: Jerry Yan <792602257@qq.com> Date: Wed, 4 Feb 2026 18:08:14 +0800 Subject: [PATCH] =?UTF-8?q?refactor(annotation):=20=E7=AE=80=E5=8C=96?= =?UTF-8?q?=E6=96=87=E6=9C=AC=E6=A0=87=E6=B3=A8=E7=BC=96=E8=BE=91=E5=99=A8?= =?UTF-8?q?=E7=9A=84=E6=AE=B5=E8=90=BD=E7=AE=A1=E7=90=86=E5=8A=9F=E8=83=BD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 移除段落统计相关的数据结构和缓存逻辑 - 删除段落切换确认对话框和自动保存选项 - 简化段落加载和状态管理流程 - 将段落列表视图替换为简单的进度显示 - 更新API接口以支持单段内容获取 - 重构后端服务实现单段内容查询功能 --- .../Annotate/LabelStudioTextEditor.tsx | 425 +----------------- .../pages/DataAnnotation/annotation.api.ts | 8 +- .../app/module/annotation/interface/editor.py | 9 +- .../app/module/annotation/schema/editor.py | 19 +- .../app/module/annotation/service/editor.py | 58 ++- 5 files changed, 98 insertions(+), 421 deletions(-) diff --git a/frontend/src/pages/DataAnnotation/Annotate/LabelStudioTextEditor.tsx b/frontend/src/pages/DataAnnotation/Annotate/LabelStudioTextEditor.tsx index b7edc80..e60c351 100644 --- a/frontend/src/pages/DataAnnotation/Annotate/LabelStudioTextEditor.tsx +++ b/frontend/src/pages/DataAnnotation/Annotate/LabelStudioTextEditor.tsx @@ -1,12 +1,11 @@ import { useCallback, useEffect, useMemo, useRef, useState } from "react"; -import { App, Button, Card, List, Spin, Typography, Tag, Switch, Tree, Empty } from "antd"; -import { LeftOutlined, ReloadOutlined, SaveOutlined, MenuFoldOutlined, MenuUnfoldOutlined, CheckOutlined } from "@ant-design/icons"; +import { App, Button, Card, List, Spin, Typography, Tag, Empty } from "antd"; +import { LeftOutlined, ReloadOutlined, SaveOutlined, MenuFoldOutlined, MenuUnfoldOutlined } from "@ant-design/icons"; import { useNavigate, useParams } from "react-router"; import { getEditorProjectInfoUsingGet, getEditorTaskUsingGet, - getEditorTaskSegmentsUsingGet, listEditorTasksUsingGet, upsertEditorAnnotationUsingPut, } from "../annotation.api"; @@ -29,7 +28,6 @@ type EditorTaskListItem = { hasAnnotation: boolean; annotationUpdatedAt?: string | null; annotationStatus?: AnnotationResultStatus | null; - segmentStats?: SegmentStats; }; type LsfMessage = { @@ -37,18 +35,6 @@ type LsfMessage = { payload?: unknown; }; -type SegmentInfo = { - idx: number; - hasAnnotation: boolean; - lineIndex: number; - chunkIndex: number; -}; - -type SegmentStats = { - done: number; - total: number; -}; - type ApiResponse = { code?: number; message?: string; @@ -68,11 +54,6 @@ type EditorTaskResponse = { currentSegmentIndex?: number; }; -type EditorTaskSegmentsResponse = { - segmented?: boolean; - segments?: SegmentInfo[]; - totalSegments?: number; -}; type EditorTaskListResponse = { content?: EditorTaskListItem[]; @@ -95,8 +76,6 @@ type ExportPayload = { requestId?: string | null; }; -type SwitchDecision = "save" | "discard" | "cancel"; - const LSF_IFRAME_SRC = "/lsf/lsf.html"; const TASK_PAGE_START = 0; const TASK_PAGE_SIZE = 200; @@ -158,16 +137,6 @@ const isAnnotationResultEmpty = (annotation?: Record) => { }; const resolveTaskStatusMeta = (item: EditorTaskListItem) => { - const segmentSummary = resolveSegmentSummary(item); - if (segmentSummary) { - if (segmentSummary.done >= segmentSummary.total) { - return { text: "已标注", type: "success" as const }; - } - if (segmentSummary.done > 0) { - return { text: "标注中", type: "warning" as const }; - } - return { text: "未标注", type: "secondary" as const }; - } if (!item.hasAnnotation) { return { text: "未标注", type: "secondary" as const }; } @@ -220,25 +189,6 @@ const buildAnnotationSnapshot = (annotation?: Record) => { const buildSnapshotKey = (fileId: string, segmentIndex?: number) => `${fileId}::${segmentIndex ?? "full"}`; -const buildSegmentStats = (segmentList?: SegmentInfo[] | null): SegmentStats | null => { - if (!Array.isArray(segmentList) || segmentList.length === 0) return null; - const total = segmentList.length; - const done = segmentList.reduce((count, seg) => count + (seg.hasAnnotation ? 1 : 0), 0); - return { done, total }; -}; - -const normalizeSegmentStats = (stats?: SegmentStats | null): SegmentStats | null => { - if (!stats) return null; - const total = Number(stats.total); - const done = Number(stats.done); - if (!Number.isFinite(total) || total <= 0) return null; - const safeDone = Math.min(Math.max(done, 0), total); - return { done: safeDone, total }; -}; - -const resolveSegmentSummary = (item: EditorTaskListItem) => - normalizeSegmentStats(item.segmentStats); - const mergeTaskItems = (base: EditorTaskListItem[], next: EditorTaskListItem[]) => { if (next.length === 0) return base; const seen = new Set(base.map((item) => item.fileId)); @@ -286,19 +236,13 @@ export default function LabelStudioTextEditor() { resolve: (payload?: ExportPayload) => void; timer?: number; } | null>(null); - const exportCheckSeqRef = useRef(0); const savedSnapshotsRef = useRef>({}); const pendingAutoAdvanceRef = useRef(false); - const segmentStatsCacheRef = useRef>({}); - const segmentStatsSeqRef = useRef(0); - const segmentStatsLoadingRef = useRef>(new Set()); - const segmentSummaryFileRef = useRef(""); const [loadingProject, setLoadingProject] = useState(true); const [loadingTasks, setLoadingTasks] = useState(false); const [loadingTaskDetail, setLoadingTaskDetail] = useState(false); const [saving, setSaving] = useState(false); - const [segmentSwitching, setSegmentSwitching] = useState(false); const [iframeReady, setIframeReady] = useState(false); const [lsReady, setLsReady] = useState(false); @@ -311,12 +255,11 @@ export default function LabelStudioTextEditor() { const [prefetching, setPrefetching] = useState(false); const [selectedFileId, setSelectedFileId] = useState(""); const [sidebarCollapsed, setSidebarCollapsed] = useState(false); - const [autoSaveOnSwitch, setAutoSaveOnSwitch] = useState(false); // 分段相关状态 const [segmented, setSegmented] = useState(false); - const [segments, setSegments] = useState([]); const [currentSegmentIndex, setCurrentSegmentIndex] = useState(0); + const [segmentTotal, setSegmentTotal] = useState(0); const isTextProject = useMemo( () => (project?.datasetType || "").toUpperCase() === "TEXT", [project?.datasetType], @@ -335,68 +278,6 @@ export default function LabelStudioTextEditor() { win.postMessage({ type, payload }, origin); }, [origin]); - const applySegmentStats = useCallback((fileId: string, stats: SegmentStats | null) => { - if (!fileId) return; - const normalized = normalizeSegmentStats(stats); - setTasks((prev) => - prev.map((item) => - item.fileId === fileId - ? { ...item, segmentStats: normalized || undefined } - : item - ) - ); - }, []); - - const updateSegmentStatsCache = useCallback((fileId: string, stats: SegmentStats | null) => { - if (!fileId) return; - const normalized = normalizeSegmentStats(stats); - if (normalized) { - segmentStatsCacheRef.current[fileId] = normalized; - } else { - delete segmentStatsCacheRef.current[fileId]; - } - applySegmentStats(fileId, normalized); - }, [applySegmentStats]); - - const fetchSegmentStatsForFile = useCallback(async (fileId: string, seq: number) => { - if (!projectId || !fileId) return; - if (segmentStatsCacheRef.current[fileId] || segmentStatsLoadingRef.current.has(fileId)) return; - segmentStatsLoadingRef.current.add(fileId); - try { - const resp = (await getEditorTaskSegmentsUsingGet(projectId, fileId)) as ApiResponse; - if (segmentStatsSeqRef.current !== seq) return; - const data = resp?.data; - if (!data?.segmented) return; - const stats = buildSegmentStats(data.segments); - if (!stats) return; - segmentStatsCacheRef.current[fileId] = stats; - applySegmentStats(fileId, stats); - } catch (e) { - console.error(e); - } finally { - segmentStatsLoadingRef.current.delete(fileId); - } - }, [applySegmentStats, projectId]); - - const prefetchSegmentStats = useCallback((items: EditorTaskListItem[]) => { - if (!projectId) return; - const fileIds = items - .map((item) => item.fileId) - .filter((fileId) => fileId && !segmentStatsCacheRef.current[fileId]); - if (fileIds.length === 0) return; - const seq = segmentStatsSeqRef.current; - let cursor = 0; - const workerCount = Math.min(3, fileIds.length); - const runWorker = async () => { - while (cursor < fileIds.length && segmentStatsSeqRef.current === seq) { - const fileId = fileIds[cursor]; - cursor += 1; - await fetchSegmentStatsForFile(fileId, seq); - } - }; - void Promise.all(Array.from({ length: workerCount }, () => runWorker())); - }, [fetchSegmentStatsForFile, projectId]); - const confirmEmptyAnnotationStatus = useCallback(() => { return new Promise((resolve) => { let resolved = false; @@ -599,33 +480,14 @@ export default function LabelStudioTextEditor() { ? resolveSegmentIndex(data.currentSegmentIndex) ?? 0 : undefined; if (isSegmented) { - let nextSegments: SegmentInfo[] = []; - if (segmentSummaryFileRef.current === fileId && segments.length > 0) { - nextSegments = segments; - } else { - try { - const segmentResp = (await getEditorTaskSegmentsUsingGet(projectId, fileId)) as ApiResponse; - if (seq !== initSeqRef.current) return; - const segmentData = segmentResp?.data; - if (segmentData?.segmented) { - nextSegments = Array.isArray(segmentData.segments) ? segmentData.segments : []; - } - } catch (e) { - console.error(e); - } - } - const stats = buildSegmentStats(nextSegments); setSegmented(true); - setSegments(nextSegments); setCurrentSegmentIndex(segmentIndex ?? 0); - updateSegmentStatsCache(fileId, stats); - segmentSummaryFileRef.current = fileId; + const totalSegments = Number(data?.totalSegments ?? 0); + setSegmentTotal(Number.isFinite(totalSegments) && totalSegments > 0 ? totalSegments : 0); } else { setSegmented(false); - setSegments([]); setCurrentSegmentIndex(0); - updateSegmentStatsCache(fileId, null); - segmentSummaryFileRef.current = fileId; + setSegmentTotal(0); } const taskData = { @@ -685,19 +547,14 @@ export default function LabelStudioTextEditor() { } finally { if (seq === initSeqRef.current) setLoadingTaskDetail(false); } - }, [iframeReady, message, postToIframe, project, projectId, segments, updateSegmentStatsCache]); + }, [iframeReady, message, postToIframe, project, projectId]); const advanceAfterSave = useCallback(async (fileId: string, segmentIndex?: number) => { if (!fileId) return; - if (segmented && segments.length > 0) { - const sortedSegmentIndices = segments - .map((seg) => seg.idx) - .sort((a, b) => a - b); - const baseIndex = segmentIndex ?? currentSegmentIndex; - const currentPos = sortedSegmentIndices.indexOf(baseIndex); - const nextSegmentIndex = - currentPos >= 0 ? sortedSegmentIndices[currentPos + 1] : sortedSegmentIndices[0]; - if (nextSegmentIndex !== undefined) { + if (segmented && segmentTotal > 0) { + const baseIndex = Math.max(segmentIndex ?? currentSegmentIndex, 0); + const nextSegmentIndex = baseIndex + 1; + if (nextSegmentIndex < segmentTotal) { await initEditorForFile(fileId, nextSegmentIndex); return; } @@ -719,7 +576,7 @@ export default function LabelStudioTextEditor() { initEditorForFile, message, segmented, - segments, + segmentTotal, tasks, ]); @@ -793,16 +650,6 @@ export default function LabelStudioTextEditor() { const snapshot = buildAnnotationSnapshot(isRecord(annotation) ? annotation : undefined); savedSnapshotsRef.current[snapshotKey] = snapshot; - // 分段模式下更新当前段落的标注状态 - if (segmented && segmentIndex !== undefined) { - const nextSegments = segments.map((seg) => - seg.idx === segmentIndex - ? { ...seg, hasAnnotation: true } - : seg - ); - setSegments(nextSegments); - updateSegmentStatsCache(String(fileId), buildSegmentStats(nextSegments)); - } if (options?.autoAdvance) { await advanceAfterSave(String(fileId), segmentIndex); } @@ -821,69 +668,10 @@ export default function LabelStudioTextEditor() { message, projectId, segmented, - segments, selectedFileId, tasks, - updateSegmentStatsCache, ]); - const requestExportForCheck = useCallback(() => { - if (!iframeReady || !lsReady) return Promise.resolve(undefined); - if (exportCheckRef.current) { - if (exportCheckRef.current.timer) { - window.clearTimeout(exportCheckRef.current.timer); - } - exportCheckRef.current.resolve(undefined); - exportCheckRef.current = null; - } - const requestId = `check_${Date.now()}_${++exportCheckSeqRef.current}`; - return new Promise((resolve) => { - const timer = window.setTimeout(() => { - if (exportCheckRef.current?.requestId === requestId) { - exportCheckRef.current = null; - } - resolve(undefined); - }, 3000); - exportCheckRef.current = { - requestId, - resolve, - timer, - }; - postToIframe("LS_EXPORT_CHECK", { requestId }); - }); - }, [iframeReady, lsReady, postToIframe]); - - const confirmSaveBeforeSwitch = useCallback(() => { - return new Promise((resolve) => { - let resolved = false; - let modalInstance: { destroy: () => void } | null = null; - const settle = (decision: SwitchDecision) => { - if (resolved) return; - resolved = true; - resolve(decision); - }; - const handleDiscard = () => { - if (modalInstance) modalInstance.destroy(); - settle("discard"); - }; - modalInstance = modal.confirm({ - title: "当前段落有未保存标注", - content: ( -
- 切换段落前请先保存当前标注。 - -
- ), - okText: "保存并切换", - cancelText: "取消", - onOk: () => settle("save"), - onCancel: () => settle("cancel"), - }); - }); - }, [modal]); - const requestExport = useCallback((autoAdvance: boolean) => { if (!selectedFileId) { message.warning("请先选择文件"); @@ -896,7 +684,7 @@ export default function LabelStudioTextEditor() { useEffect(() => { const handleSaveShortcut = (event: KeyboardEvent) => { if (!isSaveShortcut(event) || event.repeat) return; - if (saving || loadingTaskDetail || segmentSwitching) return; + if (saving || loadingTaskDetail) return; if (!iframeReady || !lsReady) return; event.preventDefault(); event.stopPropagation(); @@ -904,83 +692,7 @@ export default function LabelStudioTextEditor() { }; window.addEventListener("keydown", handleSaveShortcut); return () => window.removeEventListener("keydown", handleSaveShortcut); - }, [iframeReady, loadingTaskDetail, lsReady, requestExport, saving, segmentSwitching]); - - // 段落切换处理 - const handleSegmentChange = useCallback(async (newIndex: number) => { - if (newIndex === currentSegmentIndex) return; - if (segmentSwitching || saving || loadingTaskDetail) return; - if (!iframeReady || !lsReady) { - message.warning("编辑器未就绪,无法切换段落"); - return; - } - - setSegmentSwitching(true); - try { - const payload = await requestExportForCheck(); - if (!payload) { - message.warning("无法读取当前标注,已取消切换"); - return; - } - - const payloadTaskId = payload.taskId; - if (expectedTaskIdRef.current && payloadTaskId) { - if (Number(payloadTaskId) !== expectedTaskIdRef.current) { - message.warning("已忽略过期的标注数据"); - return; - } - } - - const payloadFileId = payload.fileId || selectedFileId; - const payloadSegmentIndex = resolveSegmentIndex(payload.segmentIndex); - const resolvedSegmentIndex = - payloadSegmentIndex !== undefined - ? payloadSegmentIndex - : segmented - ? currentSegmentIndex - : undefined; - const annotation = isRecord(payload.annotation) ? payload.annotation : undefined; - const snapshotKey = payloadFileId - ? buildSnapshotKey(String(payloadFileId), resolvedSegmentIndex) - : undefined; - const latestSnapshot = buildAnnotationSnapshot(annotation); - const lastSnapshot = snapshotKey ? savedSnapshotsRef.current[snapshotKey] : undefined; - const hasUnsavedChange = snapshotKey !== undefined && lastSnapshot !== undefined && latestSnapshot !== lastSnapshot; - - if (hasUnsavedChange) { - if (autoSaveOnSwitch) { - const saved = await saveFromExport(payload); - if (!saved) return; - } else { - const decision = await confirmSaveBeforeSwitch(); - if (decision === "cancel") return; - if (decision === "save") { - const saved = await saveFromExport(payload); - if (!saved) return; - } - } - } - - await initEditorForFile(selectedFileId, newIndex); - } finally { - setSegmentSwitching(false); - } - }, [ - autoSaveOnSwitch, - confirmSaveBeforeSwitch, - currentSegmentIndex, - iframeReady, - initEditorForFile, - loadingTaskDetail, - lsReady, - message, - requestExportForCheck, - saveFromExport, - segmented, - selectedFileId, - segmentSwitching, - saving, - ]); + }, [iframeReady, loadingTaskDetail, lsReady, requestExport, saving]); useEffect(() => { setIframeReady(false); @@ -998,13 +710,9 @@ export default function LabelStudioTextEditor() { expectedTaskIdRef.current = null; // 重置分段状态 setSegmented(false); - setSegments([]); setCurrentSegmentIndex(0); - segmentSummaryFileRef.current = ""; + setSegmentTotal(0); savedSnapshotsRef.current = {}; - segmentStatsSeqRef.current += 1; - segmentStatsCacheRef.current = {}; - segmentStatsLoadingRef.current = new Set(); if (exportCheckRef.current?.timer) { window.clearTimeout(exportCheckRef.current.timer); } @@ -1018,12 +726,6 @@ export default function LabelStudioTextEditor() { loadTasks({ mode: "reset" }); }, [project?.supported, loadTasks]); - useEffect(() => { - if (!segmented) return; - if (tasks.length === 0) return; - prefetchSegmentStats(tasks); - }, [prefetchSegmentStats, segmented, tasks]); - useEffect(() => { if (!selectedFileId) return; initEditorForFile(selectedFileId); @@ -1048,60 +750,6 @@ export default function LabelStudioTextEditor() { return () => window.removeEventListener("focus", handleWindowFocus); }, [focusIframe, lsReady]); - const segmentTreeData = useMemo(() => { - if (!segmented || segments.length === 0) return []; - const lineMap = new Map(); - segments.forEach((seg) => { - const list = lineMap.get(seg.lineIndex) || []; - list.push(seg); - lineMap.set(seg.lineIndex, list); - }); - return Array.from(lineMap.entries()) - .sort((a, b) => a[0] - b[0]) - .map(([lineIndex, lineSegments]) => ({ - key: `line-${lineIndex}`, - title: `第${lineIndex + 1}行`, - selectable: false, - children: lineSegments - .sort((a, b) => a.chunkIndex - b.chunkIndex) - .map((seg) => ({ - key: `seg-${seg.idx}`, - title: ( - - {`片${seg.chunkIndex + 1}`} - {seg.hasAnnotation && ( - - )} - - ), - })), - })); - }, [segmented, segments]); - - const segmentLineKeys = useMemo( - () => segmentTreeData.map((item) => String(item.key)), - [segmentTreeData] - ); - - const inProgressSegmentedCount = useMemo(() => { - if (tasks.length === 0) return 0; - return tasks.reduce((count, item) => { - const summary = resolveSegmentSummary(item); - if (!summary) return count; - return summary.done < summary.total ? count + 1 : count; - }, 0); - }, [tasks]); - - const handleSegmentSelect = useCallback((keys: Array) => { - const [first] = keys; - if (first === undefined || first === null) return; - const key = String(first); - if (!key.startsWith("seg-")) return; - const nextIndex = Number(key.replace("seg-", "")); - if (!Number.isFinite(nextIndex)) return; - handleSegmentChange(nextIndex); - }, [handleSegmentChange]); - useEffect(() => { const handler = (event: MessageEvent) => { if (event.origin !== origin) return; @@ -1170,7 +818,7 @@ export default function LabelStudioTextEditor() { const canLoadMore = taskTotalPages > 0 && taskPage + 1 < taskTotalPages; const saveDisabled = - !iframeReady || !selectedFileId || saving || segmentSwitching || loadingTaskDetail; + !iframeReady || !selectedFileId || saving || loadingTaskDetail; const loadMoreNode = canLoadMore ? (
diff --git a/frontend/src/pages/DataAnnotation/annotation.api.ts b/frontend/src/pages/DataAnnotation/annotation.api.ts index 9a437ca..d285fa7 100644 --- a/frontend/src/pages/DataAnnotation/annotation.api.ts +++ b/frontend/src/pages/DataAnnotation/annotation.api.ts @@ -80,8 +80,12 @@ export function getEditorTaskUsingGet( return get(`/api/annotation/editor/projects/${projectId}/tasks/${fileId}`, params); } -export function getEditorTaskSegmentsUsingGet(projectId: string, fileId: string) { - return get(`/api/annotation/editor/projects/${projectId}/tasks/${fileId}/segments`); +export function getEditorTaskSegmentUsingGet( + projectId: string, + fileId: string, + params: { segmentIndex: number } +) { + return get(`/api/annotation/editor/projects/${projectId}/tasks/${fileId}/segments`, params); } export function upsertEditorAnnotationUsingPut( diff --git a/runtime/datamate-python/app/module/annotation/interface/editor.py b/runtime/datamate-python/app/module/annotation/interface/editor.py index d748cb2..eb328fb 100644 --- a/runtime/datamate-python/app/module/annotation/interface/editor.py +++ b/runtime/datamate-python/app/module/annotation/interface/editor.py @@ -19,8 +19,8 @@ from app.db.session import get_db from app.module.annotation.schema.editor import ( EditorProjectInfo, EditorTaskListResponse, + EditorTaskSegmentResponse, EditorTaskResponse, - EditorTaskSegmentsResponse, UpsertAnnotationRequest, UpsertAnnotationResponse, ) @@ -90,15 +90,16 @@ async def get_editor_task( @router.get( "/projects/{project_id}/tasks/{file_id}/segments", - response_model=StandardResponse[EditorTaskSegmentsResponse], + response_model=StandardResponse[EditorTaskSegmentResponse], ) -async def list_editor_task_segments( +async def get_editor_task_segment( project_id: str = Path(..., description="标注项目ID(t_dm_labeling_projects.id)"), file_id: str = Path(..., description="文件ID(t_dm_dataset_files.id)"), + segment_index: int = Query(..., ge=0, alias="segmentIndex", description="段落索引(从0开始)"), db: AsyncSession = Depends(get_db), ): service = AnnotationEditorService(db) - result = await service.get_task_segments(project_id, file_id) + result = await service.get_task_segment(project_id, file_id, segment_index) return StandardResponse(code=200, message="success", data=result) diff --git a/runtime/datamate-python/app/module/annotation/schema/editor.py b/runtime/datamate-python/app/module/annotation/schema/editor.py index d9b776b..2af3382 100644 --- a/runtime/datamate-python/app/module/annotation/schema/editor.py +++ b/runtime/datamate-python/app/module/annotation/schema/editor.py @@ -103,12 +103,25 @@ class EditorTaskResponse(BaseModel): model_config = ConfigDict(populate_by_name=True) -class EditorTaskSegmentsResponse(BaseModel): - """编辑器段落摘要响应""" +class SegmentDetail(BaseModel): + """段落内容""" + + idx: int = Field(..., description="段落索引") + text: str = Field(..., description="段落文本") + has_annotation: bool = Field(False, alias="hasAnnotation", description="该段落是否已有标注") + line_index: int = Field(0, alias="lineIndex", description="JSONL 行索引(从0开始)") + chunk_index: int = Field(0, alias="chunkIndex", description="行内分片索引(从0开始)") + + model_config = ConfigDict(populate_by_name=True) + + +class EditorTaskSegmentResponse(BaseModel): + """编辑器单段内容响应""" segmented: bool = Field(False, description="是否启用分段模式") - segments: List[SegmentInfo] = Field(default_factory=list, description="段落摘要列表") + segment: Optional[SegmentDetail] = Field(None, description="段落内容") total_segments: int = Field(0, alias="totalSegments", description="总段落数") + current_segment_index: int = Field(0, alias="currentSegmentIndex", description="当前段落索引") model_config = ConfigDict(populate_by_name=True) diff --git a/runtime/datamate-python/app/module/annotation/service/editor.py b/runtime/datamate-python/app/module/annotation/service/editor.py index 1c6fda4..f62fc5d 100644 --- a/runtime/datamate-python/app/module/annotation/service/editor.py +++ b/runtime/datamate-python/app/module/annotation/service/editor.py @@ -36,8 +36,9 @@ from app.module.annotation.schema.editor import ( EditorProjectInfo, EditorTaskListItem, EditorTaskListResponse, + EditorTaskSegmentResponse, EditorTaskResponse, - EditorTaskSegmentsResponse, + SegmentDetail, SegmentInfo, UpsertAnnotationRequest, UpsertAnnotationResponse, @@ -713,18 +714,19 @@ class AnnotationEditorService: return await self._build_text_task(project, file_record, file_id, segment_index) - async def get_task_segments( + async def get_task_segment( self, project_id: str, file_id: str, - ) -> EditorTaskSegmentsResponse: + segment_index: int, + ) -> EditorTaskSegmentResponse: project = await self._get_project_or_404(project_id) dataset_type = self._normalize_dataset_type(await self._get_dataset_type(project.dataset_id)) if dataset_type != DATASET_TYPE_TEXT: raise HTTPException( status_code=400, - detail="当前仅支持 TEXT 项目的段落摘要", + detail="当前仅支持 TEXT 项目的段落内容", ) file_result = await self.db.execute( @@ -738,7 +740,12 @@ class AnnotationEditorService: raise HTTPException(status_code=404, detail=f"文件不存在或不属于该项目: {file_id}") if not self._resolve_segmentation_enabled(project): - return EditorTaskSegmentsResponse(segmented=False, segments=[], totalSegments=0) + return EditorTaskSegmentResponse( + segmented=False, + segment=None, + totalSegments=0, + currentSegmentIndex=0, + ) text_content = await self._fetch_text_content_via_download_api(project.dataset_id, file_id) assert isinstance(text_content, str) @@ -768,7 +775,12 @@ class AnnotationEditorService: len(text or "") > self.SEGMENT_THRESHOLD for text in record_texts ) if not needs_segmentation: - return EditorTaskSegmentsResponse(segmented=False, segments=[], totalSegments=0) + return EditorTaskSegmentResponse( + segmented=False, + segment=None, + totalSegments=0, + currentSegmentIndex=0, + ) ann_result = await self.db.execute( select(AnnotationResult).where( @@ -782,16 +794,42 @@ class AnnotationEditorService: segment_annotations = self._extract_segment_annotations(ann.annotation) segment_annotation_keys = set(segment_annotations.keys()) - segments, _ = self._build_segment_contexts( + segments, segment_contexts = self._build_segment_contexts( records, record_texts, segment_annotation_keys, ) - return EditorTaskSegmentsResponse( + total_segments = len(segment_contexts) + if total_segments == 0: + return EditorTaskSegmentResponse( + segmented=False, + segment=None, + totalSegments=0, + currentSegmentIndex=0, + ) + + if segment_index < 0 or segment_index >= total_segments: + raise HTTPException( + status_code=400, + detail=f"segmentIndex 超出范围: {segment_index}", + ) + + segment_info = segments[segment_index] + _, _, segment_text, line_index, chunk_index = segment_contexts[segment_index] + segment_detail = SegmentDetail( + idx=segment_info.idx, + text=segment_text, + hasAnnotation=segment_info.has_annotation, + lineIndex=line_index, + chunkIndex=chunk_index, + ) + + return EditorTaskSegmentResponse( segmented=True, - segments=segments, - totalSegments=len(segments), + segment=segment_detail, + totalSegments=total_segments, + currentSegmentIndex=segment_index, ) async def _build_text_task(