From 807c2289e2dd98cb1834c7508f2ff2f0c3e8b231 Mon Sep 17 00:00:00 2001 From: Jerry Yan <792602257@qq.com> Date: Mon, 9 Feb 2026 19:42:59 +0800 Subject: [PATCH] =?UTF-8?q?feat(annotation):=20=E6=96=87=E4=BB=B6=E7=89=88?= =?UTF-8?q?=E6=9C=AC=E6=9B=B4=E6=96=B0=E6=97=B6=E6=94=AF=E6=8C=81=E4=BF=9D?= =?UTF-8?q?=E7=95=99=E6=A0=87=E6=B3=A8=E8=AE=B0=E5=BD=95=EF=BC=88=E4=BD=8D?= =?UTF-8?q?=E7=BD=AE=E5=81=8F=E7=A7=BB+=E6=96=87=E5=AD=97=E5=8C=B9?= =?UTF-8?q?=E9=85=8D=E8=BF=81=E7=A7=BB=EF=BC=89?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 新增 AnnotationMigrator 迁移算法,在 TEXT 类型数据集的文件版本更新时, 可选通过 difflib 位置偏移映射和文字二次匹配将旧版本标注迁移到新版本上。 前端版本切换对话框增加"保留标注"复选框(仅 TEXT 类型显示),后端 API 增加 preserveAnnotations 参数,完全向后兼容。 Co-Authored-By: Claude Opus 4.6 --- .../Annotate/LabelStudioTextEditor.tsx | 36 ++- .../pages/DataAnnotation/annotation.api.ts | 151 ++++++------ .../app/module/annotation/interface/editor.py | 7 +- .../app/module/annotation/schema/editor.py | 18 ++ .../annotation/service/annotation_migrator.py | 215 ++++++++++++++++++ .../app/module/annotation/service/editor.py | 154 ++++++++++++- 6 files changed, 499 insertions(+), 82 deletions(-) create mode 100644 runtime/datamate-python/app/module/annotation/service/annotation_migrator.py diff --git a/frontend/src/pages/DataAnnotation/Annotate/LabelStudioTextEditor.tsx b/frontend/src/pages/DataAnnotation/Annotate/LabelStudioTextEditor.tsx index 9fdc324..2d3f326 100644 --- a/frontend/src/pages/DataAnnotation/Annotate/LabelStudioTextEditor.tsx +++ b/frontend/src/pages/DataAnnotation/Annotate/LabelStudioTextEditor.tsx @@ -1,5 +1,5 @@ import { useCallback, useEffect, useMemo, useRef, useState } from "react"; -import { App, Button, Card, List, Spin, Typography, Tag, Empty } from "antd"; +import { App, Button, Card, Checkbox, List, Spin, Typography, Tag, Empty } from "antd"; import { LeftOutlined, ReloadOutlined, SaveOutlined, MenuFoldOutlined, MenuUnfoldOutlined } from "@ant-design/icons"; import { useNavigate, useParams } from "react-router"; @@ -11,6 +11,7 @@ import { checkFileVersionUsingGet, applyNewVersionUsingPost, type FileVersionCheckResponse, + type UseNewVersionResponse, } from "../annotation.api"; import { AnnotationResultStatus } from "../annotation.model"; @@ -242,6 +243,7 @@ export default function LabelStudioTextEditor() { } | null>(null); const savedSnapshotsRef = useRef>({}); const pendingAutoAdvanceRef = useRef(false); + const preserveAnnotationsRef = useRef(true); const [loadingProject, setLoadingProject] = useState(true); const [loadingTasks, setLoadingTasks] = useState(false); @@ -594,18 +596,31 @@ export default function LabelStudioTextEditor() { const handleUseNewVersion = useCallback(async () => { if (!selectedFileId) return; + // Reset ref to default before opening dialog + preserveAnnotationsRef.current = true; + modal.confirm({ title: "确认使用新版本", content: (
- 确认使用新版本?这将清空当前标注并使用最新版本的文件内容。 + 确认使用新版本?这将使用最新版本的文件内容。 {fileVersionInfo && ( 当前标注版本: {fileVersionInfo.annotationFileVersion},最新文件版本: {fileVersionInfo.currentFileVersion} )} + {isTextProject && ( + { + preserveAnnotationsRef.current = e.target.checked; + }} + > + 尝试保留已有标注(根据文字匹配迁移) + + )}
), okText: "确认", @@ -615,8 +630,19 @@ export default function LabelStudioTextEditor() { if (!projectId || !selectedFileId) return; setUsingNewVersion(true); try { - await applyNewVersionUsingPost(projectId, selectedFileId); - message.success("已使用新版本并清空标注"); + const resp = (await applyNewVersionUsingPost( + projectId, + selectedFileId, + preserveAnnotationsRef.current, + )) as ApiResponse; + const data = resp?.data; + if (data?.migratedCount != null) { + message.success( + `已切换到新版本,${data.migratedCount} 条标注已迁移,${data.failedCount ?? 0} 条无法迁移`, + ); + } else { + message.success("已使用新版本并清空标注"); + } setFileVersionInfo(null); await loadTasks({ mode: "reset" }); await initEditorForFile(selectedFileId); @@ -628,7 +654,7 @@ export default function LabelStudioTextEditor() { } }, }); - }, [modal, message, projectId, selectedFileId, fileVersionInfo, loadTasks, initEditorForFile]); + }, [modal, message, projectId, selectedFileId, fileVersionInfo, isTextProject, loadTasks, initEditorForFile]); const advanceAfterSave = useCallback(async (fileId: string, segmentIndex?: number) => { if (!fileId) return; diff --git a/frontend/src/pages/DataAnnotation/annotation.api.ts b/frontend/src/pages/DataAnnotation/annotation.api.ts index 2be7fd7..5696ba2 100644 --- a/frontend/src/pages/DataAnnotation/annotation.api.ts +++ b/frontend/src/pages/DataAnnotation/annotation.api.ts @@ -1,5 +1,5 @@ -import { get, post, put, del, download } from "@/utils/request"; - +import { get, post, put, del, download } from "@/utils/request"; + // 导出格式类型 export type ExportFormat = "json" | "jsonl" | "csv" | "coco" | "yolo"; @@ -42,26 +42,26 @@ export function stopAnnotationOperatorTaskByIdUsingPost(taskId: string) { export function downloadAnnotationOperatorTaskResultUsingGet(taskId: string, filename?: string) { return download(`/api/annotation/operator-tasks/${taskId}/download`, null, filename); } - -export function deleteAnnotationTaskByIdUsingDelete(mappingId: string) { - // Backend expects mapping UUID as path parameter - return del(`/api/annotation/project/${mappingId}`); -} - -export function getAnnotationTaskByIdUsingGet(taskId: string) { - return get(`/api/annotation/project/${taskId}`); -} - + +export function deleteAnnotationTaskByIdUsingDelete(mappingId: string) { + // Backend expects mapping UUID as path parameter + return del(`/api/annotation/project/${mappingId}`); +} + +export function getAnnotationTaskByIdUsingGet(taskId: string) { + return get(`/api/annotation/project/${taskId}`); +} + export function updateAnnotationTaskByIdUsingPut(taskId: string, data: RequestPayload) { return put(`/api/annotation/project/${taskId}`, data); } - -// 标签配置管理 -export function getTagConfigUsingGet() { - return get("/api/annotation/tags/config"); -} - -// 标注模板管理 + +// 标签配置管理 +export function getTagConfigUsingGet() { + return get("/api/annotation/tags/config"); +} + +// 标注模板管理 export function queryAnnotationTemplatesUsingGet(params?: RequestParams) { return get("/api/annotation/template", params); } @@ -69,33 +69,33 @@ export function queryAnnotationTemplatesUsingGet(params?: RequestParams) { export function createAnnotationTemplateUsingPost(data: RequestPayload) { return post("/api/annotation/template", data); } - + export function updateAnnotationTemplateByIdUsingPut( templateId: string | number, data: RequestPayload ) { return put(`/api/annotation/template/${templateId}`, data); } - -export function deleteAnnotationTemplateByIdUsingDelete( - templateId: string | number -) { - return del(`/api/annotation/template/${templateId}`); -} - - -// ===================== -// Label Studio Editor(内嵌版) -// ===================== - -export function getEditorProjectInfoUsingGet(projectId: string) { - return get(`/api/annotation/editor/projects/${projectId}`); -} - + +export function deleteAnnotationTemplateByIdUsingDelete( + templateId: string | number +) { + return del(`/api/annotation/template/${templateId}`); +} + + +// ===================== +// Label Studio Editor(内嵌版) +// ===================== + +export function getEditorProjectInfoUsingGet(projectId: string) { + return get(`/api/annotation/editor/projects/${projectId}`); +} + export function listEditorTasksUsingGet(projectId: string, params?: RequestParams) { return get(`/api/annotation/editor/projects/${projectId}/tasks`, params); } - + export function getEditorTaskUsingGet( projectId: string, fileId: string, @@ -111,7 +111,7 @@ export function getEditorTaskSegmentUsingGet( ) { return get(`/api/annotation/editor/projects/${projectId}/tasks/${fileId}/segments`, params); } - + export function upsertEditorAnnotationUsingPut( projectId: string, fileId: string, @@ -141,40 +141,49 @@ export interface UseNewVersionResponse { previousFileVersion: number | null; currentFileVersion: number; message: string; + migratedCount?: number; + failedCount?: number; } -export function applyNewVersionUsingPost(projectId: string, fileId: string) { - return post(`/api/annotation/editor/projects/${projectId}/files/${fileId}/use-new-version`, {}); +export function applyNewVersionUsingPost( + projectId: string, + fileId: string, + preserveAnnotations: boolean = false, +) { + return post( + `/api/annotation/editor/projects/${projectId}/files/${fileId}/use-new-version`, + { preserveAnnotations }, + ); } - -// ===================== -// 标注数据导出 -// ===================== - -export interface ExportStatsResponse { - projectId: string; - projectName: string; - totalFiles: number; - annotatedFiles: number; - exportFormat: string; -} - -export function getExportStatsUsingGet(projectId: string) { - return get(`/api/annotation/export/projects/${projectId}/stats`); -} - -export function downloadAnnotationsUsingGet( - projectId: string, - format: ExportFormat = "json", - onlyAnnotated: boolean = true, - includeData: boolean = false, - filename?: string -) { - const params = new URLSearchParams({ - format, - only_annotated: String(onlyAnnotated), - include_data: String(includeData), - }); - return download(`/api/annotation/export/projects/${projectId}/download?${params.toString()}`, null, filename); -} + +// ===================== +// 标注数据导出 +// ===================== + +export interface ExportStatsResponse { + projectId: string; + projectName: string; + totalFiles: number; + annotatedFiles: number; + exportFormat: string; +} + +export function getExportStatsUsingGet(projectId: string) { + return get(`/api/annotation/export/projects/${projectId}/stats`); +} + +export function downloadAnnotationsUsingGet( + projectId: string, + format: ExportFormat = "json", + onlyAnnotated: boolean = true, + includeData: boolean = false, + filename?: string +) { + const params = new URLSearchParams({ + format, + only_annotated: String(onlyAnnotated), + include_data: String(includeData), + }); + return download(`/api/annotation/export/projects/${projectId}/download?${params.toString()}`, null, filename); +} diff --git a/runtime/datamate-python/app/module/annotation/interface/editor.py b/runtime/datamate-python/app/module/annotation/interface/editor.py index ff93922..21a1119 100644 --- a/runtime/datamate-python/app/module/annotation/interface/editor.py +++ b/runtime/datamate-python/app/module/annotation/interface/editor.py @@ -22,6 +22,7 @@ from app.module.annotation.schema.editor import ( EditorTaskSegmentResponse, EditorTaskResponse, FileVersionCheckResponse, + UseNewVersionRequest, UseNewVersionResponse, UpsertAnnotationRequest, UpsertAnnotationResponse, @@ -158,12 +159,14 @@ async def check_file_version( async def use_new_version( project_id: str = Path(..., description="标注项目ID(t_dm_labeling_projects.id)"), file_id: str = Path(..., description="文件ID(t_dm_dataset_files.id)"), + request: Optional[UseNewVersionRequest] = None, db: AsyncSession = Depends(get_db), user_context: RequestUserContext = Depends(get_request_user_context), ): """ - 使用文件新版本并清空标注 + 使用文件新版本(可选保留标注) """ + preserve = request.preserve_annotations if request else False service = AnnotationEditorService(db, user_context) - result = await service.use_new_version(project_id, file_id) + result = await service.use_new_version(project_id, file_id, preserve_annotations=preserve) return StandardResponse(code=200, message="success", data=result) diff --git a/runtime/datamate-python/app/module/annotation/schema/editor.py b/runtime/datamate-python/app/module/annotation/schema/editor.py index 6a3819f..4038fde 100644 --- a/runtime/datamate-python/app/module/annotation/schema/editor.py +++ b/runtime/datamate-python/app/module/annotation/schema/editor.py @@ -220,6 +220,18 @@ class FileVersionCheckResponse(BaseModel): model_config = ConfigDict(populate_by_name=True) +class UseNewVersionRequest(BaseModel): + """使用新版本请求""" + + preserve_annotations: bool = Field( + False, + alias="preserveAnnotations", + description="是否尝试保留标注(基于文字匹配迁移)", + ) + + model_config = ConfigDict(populate_by_name=True) + + class UseNewVersionResponse(BaseModel): """使用新版本响应""" @@ -231,5 +243,11 @@ class UseNewVersionResponse(BaseModel): ..., alias="currentFileVersion", description="当前文件版本" ) message: str = Field(..., description="操作结果消息") + migrated_count: Optional[int] = Field( + None, alias="migratedCount", description="成功迁移的标注数量" + ) + failed_count: Optional[int] = Field( + None, alias="failedCount", description="无法迁移的标注数量" + ) model_config = ConfigDict(populate_by_name=True) diff --git a/runtime/datamate-python/app/module/annotation/service/annotation_migrator.py b/runtime/datamate-python/app/module/annotation/service/annotation_migrator.py new file mode 100644 index 0000000..7d33888 --- /dev/null +++ b/runtime/datamate-python/app/module/annotation/service/annotation_migrator.py @@ -0,0 +1,215 @@ +""" +标注迁移器 + +在文件版本更新时,将旧版本的标注结果迁移到新版本文本上。 +仅适用于 TEXT 类型数据集(标注含有 start/end 字符位置和 text 文本片段)。 + +迁移算法: +1. 对没有 value.start/value.end 的标注项(如 choices),直接保留不变 +2. 对有位置信息的标注项: + a. 用 SequenceMatcher 计算旧位置 -> 新位置的偏移映射 + b. 验证映射后的文本是否匹配 + c. 若不匹配,全文搜索最近的匹配位置 + d. 若仍找不到,记入失败列表 +""" + +from __future__ import annotations + +import difflib +from dataclasses import dataclass, field +from typing import Any, Callable, Dict, List, Optional + + +@dataclass +class MigrationResult: + """标注迁移结果""" + + migrated: List[Dict[str, Any]] = field(default_factory=list) + failed: List[Dict[str, Any]] = field(default_factory=list) + total: int = 0 + migrated_count: int = 0 + failed_count: int = 0 + + +class AnnotationMigrator: + """标注迁移核心算法""" + + @staticmethod + def migrate_annotation_results( + old_text: str, + new_text: str, + results: List[Dict[str, Any]], + ) -> MigrationResult: + """ + 迁移标注结果列表中包含位置信息的标注项。 + + Args: + old_text: 旧版本文本 + new_text: 新版本文本 + results: Label Studio annotation result 数组 + + Returns: + MigrationResult 包含成功/失败的标注项 + """ + if not results: + return MigrationResult() + + offset_map = AnnotationMigrator._build_offset_map(old_text, new_text) + + migrated: List[Dict[str, Any]] = [] + failed: List[Dict[str, Any]] = [] + + for item in results: + value = item.get("value") if isinstance(item, dict) else None + if not isinstance(value, dict): + # 无 value 结构,直接保留 + migrated.append(item) + continue + + old_start = value.get("start") + old_end = value.get("end") + + if old_start is None or old_end is None: + # 无位置信息(如 choices 类型),直接保留 + migrated.append(item) + continue + + if not isinstance(old_start, (int, float)) or not isinstance( + old_end, (int, float) + ): + migrated.append(item) + continue + + old_start = int(old_start) + old_end = int(old_end) + target_text = value.get("text", "") + + # 尝试通过偏移映射迁移 + new_start = offset_map(old_start) + new_end = offset_map(old_end) + + if new_start is not None and new_end is not None: + new_start = int(new_start) + new_end = int(new_end) + if ( + 0 <= new_start <= new_end <= len(new_text) + and new_text[new_start:new_end] == target_text + ): + # 偏移映射成功且文本匹配 + new_item = _deep_copy_item(item) + new_item["value"] = dict(value) + new_item["value"]["start"] = new_start + new_item["value"]["end"] = new_end + migrated.append(new_item) + continue + + # 偏移映射失败或文本不匹配,尝试全文搜索 + if target_text: + hint_pos = new_start if new_start is not None else old_start + found_pos = AnnotationMigrator._find_nearest_occurrence( + new_text, target_text, hint_pos + ) + if found_pos is not None: + new_item = _deep_copy_item(item) + new_item["value"] = dict(value) + new_item["value"]["start"] = found_pos + new_item["value"]["end"] = found_pos + len(target_text) + migrated.append(new_item) + continue + + # 无法迁移 + failed.append(item) + + total = len(results) + return MigrationResult( + migrated=migrated, + failed=failed, + total=total, + migrated_count=len(migrated), + failed_count=len(failed), + ) + + @staticmethod + def _build_offset_map( + old_text: str, new_text: str + ) -> Callable[[int], Optional[int]]: + """ + 用 difflib.SequenceMatcher 构建旧位置 -> 新位置映射函数。 + + 对于旧文本中的每个字符位置,通过匹配块计算其在新文本中的对应位置。 + """ + matcher = difflib.SequenceMatcher(None, old_text, new_text, autojunk=False) + matching_blocks = matcher.get_matching_blocks() + + # 构建映射表:对每个匹配块,旧位置 i 映射到新位置 j + (i - a) + # matching_blocks 中每个元素为 (a, b, size),表示 + # old_text[a:a+size] == new_text[b:b+size] + blocks = [ + (a, b, size) for a, b, size in matching_blocks if size > 0 + ] + + def map_position(old_pos: int) -> Optional[int]: + for a, b, size in blocks: + if a <= old_pos < a + size: + return b + (old_pos - a) + # 位置不在任何匹配块中,尝试找最近的块进行推算 + if not blocks: + return None + + # 找到最近的匹配块 + best_block = None + best_distance = float("inf") + for a, b, size in blocks: + # 到块起始位置的距离 + dist_start = abs(old_pos - a) + dist_end = abs(old_pos - (a + size)) + dist = min(dist_start, dist_end) + if dist < best_distance: + best_distance = dist + best_block = (a, b, size) + + if best_block is None: + return None + + a, b, size = best_block + # 推算偏移 + offset = old_pos - a + new_pos = b + offset + if 0 <= new_pos <= len(new_text): + return new_pos + return None + + return map_position + + @staticmethod + def _find_nearest_occurrence( + text: str, target: str, hint_pos: int + ) -> Optional[int]: + """ + 在 text 中查找 target,优先返回距离 hint_pos 最近的位置。 + """ + if not target: + return None + + positions: List[int] = [] + start = 0 + while True: + idx = text.find(target, start) + if idx < 0: + break + positions.append(idx) + start = idx + 1 + + if not positions: + return None + + # 返回距离 hint_pos 最近的位置 + return min(positions, key=lambda pos: abs(pos - hint_pos)) + + +def _deep_copy_item(item: Dict[str, Any]) -> Dict[str, Any]: + """浅拷贝标注项,深拷贝 value 字段""" + new_item = dict(item) + if "value" in new_item and isinstance(new_item["value"], dict): + new_item["value"] = dict(new_item["value"]) + return new_item diff --git a/runtime/datamate-python/app/module/annotation/service/editor.py b/runtime/datamate-python/app/module/annotation/service/editor.py index f6dd581..cf3a888 100644 --- a/runtime/datamate-python/app/module/annotation/service/editor.py +++ b/runtime/datamate-python/app/module/annotation/service/editor.py @@ -61,6 +61,7 @@ from app.module.annotation.security import ( from app.module.annotation.service.text_fetcher import ( fetch_text_content_via_download_api, ) +from app.module.annotation.service.annotation_migrator import AnnotationMigrator logger = get_logger(__name__) @@ -1734,16 +1735,21 @@ class AnnotationEditorService: "latestFileId": latest_file.id if latest_file else file_id, } - async def use_new_version(self, project_id: str, file_id: str) -> Dict[str, Any]: + async def use_new_version( + self, project_id: str, file_id: str, + preserve_annotations: bool = False, + ) -> Dict[str, Any]: """ - 使用文件新版本并清空标注 + 使用文件新版本 如果文件有多个版本(通过 logical_path 关联),将标注切换到最新版本。 - 如果存在标注记录,会清空标注内容。 + 当 preserve_annotations=True 且数据集类型为 TEXT 时,尝试通过位置偏移 + + 文字匹配将旧版本的标注迁移到新版本上;否则清空标注内容。 Args: project_id: 标注项目ID file_id: 文件ID(当前关联的文件ID) + preserve_annotations: 是否尝试保留标注 Returns: 操作结果 @@ -1819,9 +1825,61 @@ class AnnotationEditorService: now = datetime.utcnow() if annotation: - # 存在标注记录:清空标注并更新文件版本 previous_file_version = annotation.file_version + # 判断是否可以尝试迁移标注 + dataset_type = self._normalize_dataset_type( + await self._get_dataset_type(project.dataset_id) + ) + can_migrate = ( + preserve_annotations + and dataset_type == DATASET_TYPE_TEXT + and self._has_annotation_result(annotation.annotation) + ) + + if can_migrate: + migrated_payload, migrated_count, failed_count = ( + await self._migrate_annotations_to_new_version( + project=project, + annotation=annotation, + old_file_id=file_id, + new_file_id=str(latest_file.id), + ) + ) + + has_result = self._has_annotation_result(migrated_payload) + final_status = ( + ANNOTATION_STATUS_ANNOTATED + if has_result + else ANNOTATION_STATUS_NO_ANNOTATION + ) + + annotation.file_id = str(latest_file.id) + annotation.annotation = migrated_payload + annotation.annotation_status = final_status + annotation.file_version = latest_file.version + annotation.updated_at = now + + await self.db.commit() + await self.db.refresh(annotation) + + await self._sync_annotation_to_knowledge( + project, + latest_file, + migrated_payload, + annotation.updated_at or now, + ) + + return { + "fileId": str(latest_file.id), + "previousFileVersion": previous_file_version, + "currentFileVersion": latest_file.version, + "message": f"已切换到新版本,{migrated_count} 条标注已迁移,{failed_count} 条无法迁移", + "migratedCount": migrated_count, + "failedCount": failed_count, + } + + # 不迁移:清空标注 cleared_payload: Dict[str, Any] = {} if isinstance(annotation.annotation, dict) and self._is_segmented_annotation( annotation.annotation @@ -1879,3 +1937,91 @@ class AnnotationEditorService: "currentFileVersion": latest_file.version, "message": "已切换到新版本", } + + async def _migrate_annotations_to_new_version( + self, + project: LabelingProject, + annotation: AnnotationResult, + old_file_id: str, + new_file_id: str, + ) -> tuple: + """ + 迁移标注到新版本文件。 + + Returns: + (migrated_payload, migrated_count, failed_count) + """ + old_text = await self._fetch_text_content_via_download_api( + project.dataset_id, old_file_id + ) + new_text = await self._fetch_text_content_via_download_api( + project.dataset_id, new_file_id + ) + + ann_data = annotation.annotation + if not isinstance(ann_data, dict): + return {}, 0, 0 + + total_migrated = 0 + total_failed = 0 + + if self._is_segmented_annotation(ann_data): + # 分段标注:逐段迁移 + segments = self._extract_segment_annotations(ann_data) + migrated_segments: Dict[str, Dict[str, Any]] = {} + + for seg_key, seg_data in segments.items(): + if not isinstance(seg_data, dict): + continue + seg_results = seg_data.get(SEGMENT_RESULT_KEY, []) + if not isinstance(seg_results, list) or not seg_results: + # 空标注段落,保留结构 + migrated_segments[seg_key] = dict(seg_data) + migrated_segments[seg_key][SEGMENT_RESULT_KEY] = [] + continue + + migration = AnnotationMigrator.migrate_annotation_results( + old_text, new_text, seg_results + ) + total_migrated += migration.migrated_count + total_failed += migration.failed_count + + new_seg = dict(seg_data) + new_seg[SEGMENT_RESULT_KEY] = migration.migrated + migrated_segments[seg_key] = new_seg + + seg_total = self._resolve_segment_total(ann_data) + if seg_total is None: + seg_total = len(migrated_segments) + + migrated_payload: Dict[str, Any] = { + SEGMENTED_KEY: True, + "version": ann_data.get("version", 1), + SEGMENTS_KEY: migrated_segments, + SEGMENT_TOTAL_KEY: seg_total, + } + else: + # 非分段标注:直接迁移 result + results = ann_data.get(SEGMENT_RESULT_KEY, []) + if not isinstance(results, list): + results = [] + + migration = AnnotationMigrator.migrate_annotation_results( + old_text, new_text, results + ) + total_migrated = migration.migrated_count + total_failed = migration.failed_count + + migrated_payload = dict(ann_data) + migrated_payload[SEGMENT_RESULT_KEY] = migration.migrated + + logger.info( + "标注迁移完成:project_id=%s old_file=%s new_file=%s migrated=%d failed=%d", + project.id, + old_file_id, + new_file_id, + total_migrated, + total_failed, + ) + + return migrated_payload, total_migrated, total_failed