You've already forked DataMate
feat(annotation): 文件版本更新时支持保留标注记录(位置偏移+文字匹配迁移)
新增 AnnotationMigrator 迁移算法,在 TEXT 类型数据集的文件版本更新时, 可选通过 difflib 位置偏移映射和文字二次匹配将旧版本标注迁移到新版本上。 前端版本切换对话框增加"保留标注"复选框(仅 TEXT 类型显示),后端 API 增加 preserveAnnotations 参数,完全向后兼容。 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -1,5 +1,5 @@
|
|||||||
import { useCallback, useEffect, useMemo, useRef, useState } from "react";
|
import { useCallback, useEffect, useMemo, useRef, useState } from "react";
|
||||||
import { App, Button, Card, List, Spin, Typography, Tag, Empty } from "antd";
|
import { App, Button, Card, Checkbox, List, Spin, Typography, Tag, Empty } from "antd";
|
||||||
import { LeftOutlined, ReloadOutlined, SaveOutlined, MenuFoldOutlined, MenuUnfoldOutlined } from "@ant-design/icons";
|
import { LeftOutlined, ReloadOutlined, SaveOutlined, MenuFoldOutlined, MenuUnfoldOutlined } from "@ant-design/icons";
|
||||||
import { useNavigate, useParams } from "react-router";
|
import { useNavigate, useParams } from "react-router";
|
||||||
|
|
||||||
@@ -11,6 +11,7 @@ import {
|
|||||||
checkFileVersionUsingGet,
|
checkFileVersionUsingGet,
|
||||||
applyNewVersionUsingPost,
|
applyNewVersionUsingPost,
|
||||||
type FileVersionCheckResponse,
|
type FileVersionCheckResponse,
|
||||||
|
type UseNewVersionResponse,
|
||||||
} from "../annotation.api";
|
} from "../annotation.api";
|
||||||
import { AnnotationResultStatus } from "../annotation.model";
|
import { AnnotationResultStatus } from "../annotation.model";
|
||||||
|
|
||||||
@@ -242,6 +243,7 @@ export default function LabelStudioTextEditor() {
|
|||||||
} | null>(null);
|
} | null>(null);
|
||||||
const savedSnapshotsRef = useRef<Record<string, string>>({});
|
const savedSnapshotsRef = useRef<Record<string, string>>({});
|
||||||
const pendingAutoAdvanceRef = useRef(false);
|
const pendingAutoAdvanceRef = useRef(false);
|
||||||
|
const preserveAnnotationsRef = useRef(true);
|
||||||
|
|
||||||
const [loadingProject, setLoadingProject] = useState(true);
|
const [loadingProject, setLoadingProject] = useState(true);
|
||||||
const [loadingTasks, setLoadingTasks] = useState(false);
|
const [loadingTasks, setLoadingTasks] = useState(false);
|
||||||
@@ -594,18 +596,31 @@ export default function LabelStudioTextEditor() {
|
|||||||
const handleUseNewVersion = useCallback(async () => {
|
const handleUseNewVersion = useCallback(async () => {
|
||||||
if (!selectedFileId) return;
|
if (!selectedFileId) return;
|
||||||
|
|
||||||
|
// Reset ref to default before opening dialog
|
||||||
|
preserveAnnotationsRef.current = true;
|
||||||
|
|
||||||
modal.confirm({
|
modal.confirm({
|
||||||
title: "确认使用新版本",
|
title: "确认使用新版本",
|
||||||
content: (
|
content: (
|
||||||
<div className="flex flex-col gap-2">
|
<div className="flex flex-col gap-2">
|
||||||
<Typography.Text>
|
<Typography.Text>
|
||||||
确认使用新版本?这将清空当前标注并使用最新版本的文件内容。
|
确认使用新版本?这将使用最新版本的文件内容。
|
||||||
</Typography.Text>
|
</Typography.Text>
|
||||||
{fileVersionInfo && (
|
{fileVersionInfo && (
|
||||||
<Typography.Text type="secondary">
|
<Typography.Text type="secondary">
|
||||||
当前标注版本: {fileVersionInfo.annotationFileVersion},最新文件版本: {fileVersionInfo.currentFileVersion}
|
当前标注版本: {fileVersionInfo.annotationFileVersion},最新文件版本: {fileVersionInfo.currentFileVersion}
|
||||||
</Typography.Text>
|
</Typography.Text>
|
||||||
)}
|
)}
|
||||||
|
{isTextProject && (
|
||||||
|
<Checkbox
|
||||||
|
defaultChecked={true}
|
||||||
|
onChange={(e) => {
|
||||||
|
preserveAnnotationsRef.current = e.target.checked;
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
尝试保留已有标注(根据文字匹配迁移)
|
||||||
|
</Checkbox>
|
||||||
|
)}
|
||||||
</div>
|
</div>
|
||||||
),
|
),
|
||||||
okText: "确认",
|
okText: "确认",
|
||||||
@@ -615,8 +630,19 @@ export default function LabelStudioTextEditor() {
|
|||||||
if (!projectId || !selectedFileId) return;
|
if (!projectId || !selectedFileId) return;
|
||||||
setUsingNewVersion(true);
|
setUsingNewVersion(true);
|
||||||
try {
|
try {
|
||||||
await applyNewVersionUsingPost(projectId, selectedFileId);
|
const resp = (await applyNewVersionUsingPost(
|
||||||
|
projectId,
|
||||||
|
selectedFileId,
|
||||||
|
preserveAnnotationsRef.current,
|
||||||
|
)) as ApiResponse<UseNewVersionResponse>;
|
||||||
|
const data = resp?.data;
|
||||||
|
if (data?.migratedCount != null) {
|
||||||
|
message.success(
|
||||||
|
`已切换到新版本,${data.migratedCount} 条标注已迁移,${data.failedCount ?? 0} 条无法迁移`,
|
||||||
|
);
|
||||||
|
} else {
|
||||||
message.success("已使用新版本并清空标注");
|
message.success("已使用新版本并清空标注");
|
||||||
|
}
|
||||||
setFileVersionInfo(null);
|
setFileVersionInfo(null);
|
||||||
await loadTasks({ mode: "reset" });
|
await loadTasks({ mode: "reset" });
|
||||||
await initEditorForFile(selectedFileId);
|
await initEditorForFile(selectedFileId);
|
||||||
@@ -628,7 +654,7 @@ export default function LabelStudioTextEditor() {
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
});
|
});
|
||||||
}, [modal, message, projectId, selectedFileId, fileVersionInfo, loadTasks, initEditorForFile]);
|
}, [modal, message, projectId, selectedFileId, fileVersionInfo, isTextProject, loadTasks, initEditorForFile]);
|
||||||
|
|
||||||
const advanceAfterSave = useCallback(async (fileId: string, segmentIndex?: number) => {
|
const advanceAfterSave = useCallback(async (fileId: string, segmentIndex?: number) => {
|
||||||
if (!fileId) return;
|
if (!fileId) return;
|
||||||
|
|||||||
@@ -141,10 +141,19 @@ export interface UseNewVersionResponse {
|
|||||||
previousFileVersion: number | null;
|
previousFileVersion: number | null;
|
||||||
currentFileVersion: number;
|
currentFileVersion: number;
|
||||||
message: string;
|
message: string;
|
||||||
|
migratedCount?: number;
|
||||||
|
failedCount?: number;
|
||||||
}
|
}
|
||||||
|
|
||||||
export function applyNewVersionUsingPost(projectId: string, fileId: string) {
|
export function applyNewVersionUsingPost(
|
||||||
return post(`/api/annotation/editor/projects/${projectId}/files/${fileId}/use-new-version`, {});
|
projectId: string,
|
||||||
|
fileId: string,
|
||||||
|
preserveAnnotations: boolean = false,
|
||||||
|
) {
|
||||||
|
return post(
|
||||||
|
`/api/annotation/editor/projects/${projectId}/files/${fileId}/use-new-version`,
|
||||||
|
{ preserveAnnotations },
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -22,6 +22,7 @@ from app.module.annotation.schema.editor import (
|
|||||||
EditorTaskSegmentResponse,
|
EditorTaskSegmentResponse,
|
||||||
EditorTaskResponse,
|
EditorTaskResponse,
|
||||||
FileVersionCheckResponse,
|
FileVersionCheckResponse,
|
||||||
|
UseNewVersionRequest,
|
||||||
UseNewVersionResponse,
|
UseNewVersionResponse,
|
||||||
UpsertAnnotationRequest,
|
UpsertAnnotationRequest,
|
||||||
UpsertAnnotationResponse,
|
UpsertAnnotationResponse,
|
||||||
@@ -158,12 +159,14 @@ async def check_file_version(
|
|||||||
async def use_new_version(
|
async def use_new_version(
|
||||||
project_id: str = Path(..., description="标注项目ID(t_dm_labeling_projects.id)"),
|
project_id: str = Path(..., description="标注项目ID(t_dm_labeling_projects.id)"),
|
||||||
file_id: str = Path(..., description="文件ID(t_dm_dataset_files.id)"),
|
file_id: str = Path(..., description="文件ID(t_dm_dataset_files.id)"),
|
||||||
|
request: Optional[UseNewVersionRequest] = None,
|
||||||
db: AsyncSession = Depends(get_db),
|
db: AsyncSession = Depends(get_db),
|
||||||
user_context: RequestUserContext = Depends(get_request_user_context),
|
user_context: RequestUserContext = Depends(get_request_user_context),
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
使用文件新版本并清空标注
|
使用文件新版本(可选保留标注)
|
||||||
"""
|
"""
|
||||||
|
preserve = request.preserve_annotations if request else False
|
||||||
service = AnnotationEditorService(db, user_context)
|
service = AnnotationEditorService(db, user_context)
|
||||||
result = await service.use_new_version(project_id, file_id)
|
result = await service.use_new_version(project_id, file_id, preserve_annotations=preserve)
|
||||||
return StandardResponse(code=200, message="success", data=result)
|
return StandardResponse(code=200, message="success", data=result)
|
||||||
|
|||||||
@@ -220,6 +220,18 @@ class FileVersionCheckResponse(BaseModel):
|
|||||||
model_config = ConfigDict(populate_by_name=True)
|
model_config = ConfigDict(populate_by_name=True)
|
||||||
|
|
||||||
|
|
||||||
|
class UseNewVersionRequest(BaseModel):
|
||||||
|
"""使用新版本请求"""
|
||||||
|
|
||||||
|
preserve_annotations: bool = Field(
|
||||||
|
False,
|
||||||
|
alias="preserveAnnotations",
|
||||||
|
description="是否尝试保留标注(基于文字匹配迁移)",
|
||||||
|
)
|
||||||
|
|
||||||
|
model_config = ConfigDict(populate_by_name=True)
|
||||||
|
|
||||||
|
|
||||||
class UseNewVersionResponse(BaseModel):
|
class UseNewVersionResponse(BaseModel):
|
||||||
"""使用新版本响应"""
|
"""使用新版本响应"""
|
||||||
|
|
||||||
@@ -231,5 +243,11 @@ class UseNewVersionResponse(BaseModel):
|
|||||||
..., alias="currentFileVersion", description="当前文件版本"
|
..., alias="currentFileVersion", description="当前文件版本"
|
||||||
)
|
)
|
||||||
message: str = Field(..., description="操作结果消息")
|
message: str = Field(..., description="操作结果消息")
|
||||||
|
migrated_count: Optional[int] = Field(
|
||||||
|
None, alias="migratedCount", description="成功迁移的标注数量"
|
||||||
|
)
|
||||||
|
failed_count: Optional[int] = Field(
|
||||||
|
None, alias="failedCount", description="无法迁移的标注数量"
|
||||||
|
)
|
||||||
|
|
||||||
model_config = ConfigDict(populate_by_name=True)
|
model_config = ConfigDict(populate_by_name=True)
|
||||||
|
|||||||
@@ -0,0 +1,215 @@
|
|||||||
|
"""
|
||||||
|
标注迁移器
|
||||||
|
|
||||||
|
在文件版本更新时,将旧版本的标注结果迁移到新版本文本上。
|
||||||
|
仅适用于 TEXT 类型数据集(标注含有 start/end 字符位置和 text 文本片段)。
|
||||||
|
|
||||||
|
迁移算法:
|
||||||
|
1. 对没有 value.start/value.end 的标注项(如 choices),直接保留不变
|
||||||
|
2. 对有位置信息的标注项:
|
||||||
|
a. 用 SequenceMatcher 计算旧位置 -> 新位置的偏移映射
|
||||||
|
b. 验证映射后的文本是否匹配
|
||||||
|
c. 若不匹配,全文搜索最近的匹配位置
|
||||||
|
d. 若仍找不到,记入失败列表
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import difflib
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
from typing import Any, Callable, Dict, List, Optional
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class MigrationResult:
|
||||||
|
"""标注迁移结果"""
|
||||||
|
|
||||||
|
migrated: List[Dict[str, Any]] = field(default_factory=list)
|
||||||
|
failed: List[Dict[str, Any]] = field(default_factory=list)
|
||||||
|
total: int = 0
|
||||||
|
migrated_count: int = 0
|
||||||
|
failed_count: int = 0
|
||||||
|
|
||||||
|
|
||||||
|
class AnnotationMigrator:
|
||||||
|
"""标注迁移核心算法"""
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def migrate_annotation_results(
|
||||||
|
old_text: str,
|
||||||
|
new_text: str,
|
||||||
|
results: List[Dict[str, Any]],
|
||||||
|
) -> MigrationResult:
|
||||||
|
"""
|
||||||
|
迁移标注结果列表中包含位置信息的标注项。
|
||||||
|
|
||||||
|
Args:
|
||||||
|
old_text: 旧版本文本
|
||||||
|
new_text: 新版本文本
|
||||||
|
results: Label Studio annotation result 数组
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
MigrationResult 包含成功/失败的标注项
|
||||||
|
"""
|
||||||
|
if not results:
|
||||||
|
return MigrationResult()
|
||||||
|
|
||||||
|
offset_map = AnnotationMigrator._build_offset_map(old_text, new_text)
|
||||||
|
|
||||||
|
migrated: List[Dict[str, Any]] = []
|
||||||
|
failed: List[Dict[str, Any]] = []
|
||||||
|
|
||||||
|
for item in results:
|
||||||
|
value = item.get("value") if isinstance(item, dict) else None
|
||||||
|
if not isinstance(value, dict):
|
||||||
|
# 无 value 结构,直接保留
|
||||||
|
migrated.append(item)
|
||||||
|
continue
|
||||||
|
|
||||||
|
old_start = value.get("start")
|
||||||
|
old_end = value.get("end")
|
||||||
|
|
||||||
|
if old_start is None or old_end is None:
|
||||||
|
# 无位置信息(如 choices 类型),直接保留
|
||||||
|
migrated.append(item)
|
||||||
|
continue
|
||||||
|
|
||||||
|
if not isinstance(old_start, (int, float)) or not isinstance(
|
||||||
|
old_end, (int, float)
|
||||||
|
):
|
||||||
|
migrated.append(item)
|
||||||
|
continue
|
||||||
|
|
||||||
|
old_start = int(old_start)
|
||||||
|
old_end = int(old_end)
|
||||||
|
target_text = value.get("text", "")
|
||||||
|
|
||||||
|
# 尝试通过偏移映射迁移
|
||||||
|
new_start = offset_map(old_start)
|
||||||
|
new_end = offset_map(old_end)
|
||||||
|
|
||||||
|
if new_start is not None and new_end is not None:
|
||||||
|
new_start = int(new_start)
|
||||||
|
new_end = int(new_end)
|
||||||
|
if (
|
||||||
|
0 <= new_start <= new_end <= len(new_text)
|
||||||
|
and new_text[new_start:new_end] == target_text
|
||||||
|
):
|
||||||
|
# 偏移映射成功且文本匹配
|
||||||
|
new_item = _deep_copy_item(item)
|
||||||
|
new_item["value"] = dict(value)
|
||||||
|
new_item["value"]["start"] = new_start
|
||||||
|
new_item["value"]["end"] = new_end
|
||||||
|
migrated.append(new_item)
|
||||||
|
continue
|
||||||
|
|
||||||
|
# 偏移映射失败或文本不匹配,尝试全文搜索
|
||||||
|
if target_text:
|
||||||
|
hint_pos = new_start if new_start is not None else old_start
|
||||||
|
found_pos = AnnotationMigrator._find_nearest_occurrence(
|
||||||
|
new_text, target_text, hint_pos
|
||||||
|
)
|
||||||
|
if found_pos is not None:
|
||||||
|
new_item = _deep_copy_item(item)
|
||||||
|
new_item["value"] = dict(value)
|
||||||
|
new_item["value"]["start"] = found_pos
|
||||||
|
new_item["value"]["end"] = found_pos + len(target_text)
|
||||||
|
migrated.append(new_item)
|
||||||
|
continue
|
||||||
|
|
||||||
|
# 无法迁移
|
||||||
|
failed.append(item)
|
||||||
|
|
||||||
|
total = len(results)
|
||||||
|
return MigrationResult(
|
||||||
|
migrated=migrated,
|
||||||
|
failed=failed,
|
||||||
|
total=total,
|
||||||
|
migrated_count=len(migrated),
|
||||||
|
failed_count=len(failed),
|
||||||
|
)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _build_offset_map(
|
||||||
|
old_text: str, new_text: str
|
||||||
|
) -> Callable[[int], Optional[int]]:
|
||||||
|
"""
|
||||||
|
用 difflib.SequenceMatcher 构建旧位置 -> 新位置映射函数。
|
||||||
|
|
||||||
|
对于旧文本中的每个字符位置,通过匹配块计算其在新文本中的对应位置。
|
||||||
|
"""
|
||||||
|
matcher = difflib.SequenceMatcher(None, old_text, new_text, autojunk=False)
|
||||||
|
matching_blocks = matcher.get_matching_blocks()
|
||||||
|
|
||||||
|
# 构建映射表:对每个匹配块,旧位置 i 映射到新位置 j + (i - a)
|
||||||
|
# matching_blocks 中每个元素为 (a, b, size),表示
|
||||||
|
# old_text[a:a+size] == new_text[b:b+size]
|
||||||
|
blocks = [
|
||||||
|
(a, b, size) for a, b, size in matching_blocks if size > 0
|
||||||
|
]
|
||||||
|
|
||||||
|
def map_position(old_pos: int) -> Optional[int]:
|
||||||
|
for a, b, size in blocks:
|
||||||
|
if a <= old_pos < a + size:
|
||||||
|
return b + (old_pos - a)
|
||||||
|
# 位置不在任何匹配块中,尝试找最近的块进行推算
|
||||||
|
if not blocks:
|
||||||
|
return None
|
||||||
|
|
||||||
|
# 找到最近的匹配块
|
||||||
|
best_block = None
|
||||||
|
best_distance = float("inf")
|
||||||
|
for a, b, size in blocks:
|
||||||
|
# 到块起始位置的距离
|
||||||
|
dist_start = abs(old_pos - a)
|
||||||
|
dist_end = abs(old_pos - (a + size))
|
||||||
|
dist = min(dist_start, dist_end)
|
||||||
|
if dist < best_distance:
|
||||||
|
best_distance = dist
|
||||||
|
best_block = (a, b, size)
|
||||||
|
|
||||||
|
if best_block is None:
|
||||||
|
return None
|
||||||
|
|
||||||
|
a, b, size = best_block
|
||||||
|
# 推算偏移
|
||||||
|
offset = old_pos - a
|
||||||
|
new_pos = b + offset
|
||||||
|
if 0 <= new_pos <= len(new_text):
|
||||||
|
return new_pos
|
||||||
|
return None
|
||||||
|
|
||||||
|
return map_position
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _find_nearest_occurrence(
|
||||||
|
text: str, target: str, hint_pos: int
|
||||||
|
) -> Optional[int]:
|
||||||
|
"""
|
||||||
|
在 text 中查找 target,优先返回距离 hint_pos 最近的位置。
|
||||||
|
"""
|
||||||
|
if not target:
|
||||||
|
return None
|
||||||
|
|
||||||
|
positions: List[int] = []
|
||||||
|
start = 0
|
||||||
|
while True:
|
||||||
|
idx = text.find(target, start)
|
||||||
|
if idx < 0:
|
||||||
|
break
|
||||||
|
positions.append(idx)
|
||||||
|
start = idx + 1
|
||||||
|
|
||||||
|
if not positions:
|
||||||
|
return None
|
||||||
|
|
||||||
|
# 返回距离 hint_pos 最近的位置
|
||||||
|
return min(positions, key=lambda pos: abs(pos - hint_pos))
|
||||||
|
|
||||||
|
|
||||||
|
def _deep_copy_item(item: Dict[str, Any]) -> Dict[str, Any]:
|
||||||
|
"""浅拷贝标注项,深拷贝 value 字段"""
|
||||||
|
new_item = dict(item)
|
||||||
|
if "value" in new_item and isinstance(new_item["value"], dict):
|
||||||
|
new_item["value"] = dict(new_item["value"])
|
||||||
|
return new_item
|
||||||
@@ -61,6 +61,7 @@ from app.module.annotation.security import (
|
|||||||
from app.module.annotation.service.text_fetcher import (
|
from app.module.annotation.service.text_fetcher import (
|
||||||
fetch_text_content_via_download_api,
|
fetch_text_content_via_download_api,
|
||||||
)
|
)
|
||||||
|
from app.module.annotation.service.annotation_migrator import AnnotationMigrator
|
||||||
|
|
||||||
logger = get_logger(__name__)
|
logger = get_logger(__name__)
|
||||||
|
|
||||||
@@ -1734,16 +1735,21 @@ class AnnotationEditorService:
|
|||||||
"latestFileId": latest_file.id if latest_file else file_id,
|
"latestFileId": latest_file.id if latest_file else file_id,
|
||||||
}
|
}
|
||||||
|
|
||||||
async def use_new_version(self, project_id: str, file_id: str) -> Dict[str, Any]:
|
async def use_new_version(
|
||||||
|
self, project_id: str, file_id: str,
|
||||||
|
preserve_annotations: bool = False,
|
||||||
|
) -> Dict[str, Any]:
|
||||||
"""
|
"""
|
||||||
使用文件新版本并清空标注
|
使用文件新版本
|
||||||
|
|
||||||
如果文件有多个版本(通过 logical_path 关联),将标注切换到最新版本。
|
如果文件有多个版本(通过 logical_path 关联),将标注切换到最新版本。
|
||||||
如果存在标注记录,会清空标注内容。
|
当 preserve_annotations=True 且数据集类型为 TEXT 时,尝试通过位置偏移 +
|
||||||
|
文字匹配将旧版本的标注迁移到新版本上;否则清空标注内容。
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
project_id: 标注项目ID
|
project_id: 标注项目ID
|
||||||
file_id: 文件ID(当前关联的文件ID)
|
file_id: 文件ID(当前关联的文件ID)
|
||||||
|
preserve_annotations: 是否尝试保留标注
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
操作结果
|
操作结果
|
||||||
@@ -1819,9 +1825,61 @@ class AnnotationEditorService:
|
|||||||
now = datetime.utcnow()
|
now = datetime.utcnow()
|
||||||
|
|
||||||
if annotation:
|
if annotation:
|
||||||
# 存在标注记录:清空标注并更新文件版本
|
|
||||||
previous_file_version = annotation.file_version
|
previous_file_version = annotation.file_version
|
||||||
|
|
||||||
|
# 判断是否可以尝试迁移标注
|
||||||
|
dataset_type = self._normalize_dataset_type(
|
||||||
|
await self._get_dataset_type(project.dataset_id)
|
||||||
|
)
|
||||||
|
can_migrate = (
|
||||||
|
preserve_annotations
|
||||||
|
and dataset_type == DATASET_TYPE_TEXT
|
||||||
|
and self._has_annotation_result(annotation.annotation)
|
||||||
|
)
|
||||||
|
|
||||||
|
if can_migrate:
|
||||||
|
migrated_payload, migrated_count, failed_count = (
|
||||||
|
await self._migrate_annotations_to_new_version(
|
||||||
|
project=project,
|
||||||
|
annotation=annotation,
|
||||||
|
old_file_id=file_id,
|
||||||
|
new_file_id=str(latest_file.id),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
has_result = self._has_annotation_result(migrated_payload)
|
||||||
|
final_status = (
|
||||||
|
ANNOTATION_STATUS_ANNOTATED
|
||||||
|
if has_result
|
||||||
|
else ANNOTATION_STATUS_NO_ANNOTATION
|
||||||
|
)
|
||||||
|
|
||||||
|
annotation.file_id = str(latest_file.id)
|
||||||
|
annotation.annotation = migrated_payload
|
||||||
|
annotation.annotation_status = final_status
|
||||||
|
annotation.file_version = latest_file.version
|
||||||
|
annotation.updated_at = now
|
||||||
|
|
||||||
|
await self.db.commit()
|
||||||
|
await self.db.refresh(annotation)
|
||||||
|
|
||||||
|
await self._sync_annotation_to_knowledge(
|
||||||
|
project,
|
||||||
|
latest_file,
|
||||||
|
migrated_payload,
|
||||||
|
annotation.updated_at or now,
|
||||||
|
)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"fileId": str(latest_file.id),
|
||||||
|
"previousFileVersion": previous_file_version,
|
||||||
|
"currentFileVersion": latest_file.version,
|
||||||
|
"message": f"已切换到新版本,{migrated_count} 条标注已迁移,{failed_count} 条无法迁移",
|
||||||
|
"migratedCount": migrated_count,
|
||||||
|
"failedCount": failed_count,
|
||||||
|
}
|
||||||
|
|
||||||
|
# 不迁移:清空标注
|
||||||
cleared_payload: Dict[str, Any] = {}
|
cleared_payload: Dict[str, Any] = {}
|
||||||
if isinstance(annotation.annotation, dict) and self._is_segmented_annotation(
|
if isinstance(annotation.annotation, dict) and self._is_segmented_annotation(
|
||||||
annotation.annotation
|
annotation.annotation
|
||||||
@@ -1879,3 +1937,91 @@ class AnnotationEditorService:
|
|||||||
"currentFileVersion": latest_file.version,
|
"currentFileVersion": latest_file.version,
|
||||||
"message": "已切换到新版本",
|
"message": "已切换到新版本",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async def _migrate_annotations_to_new_version(
|
||||||
|
self,
|
||||||
|
project: LabelingProject,
|
||||||
|
annotation: AnnotationResult,
|
||||||
|
old_file_id: str,
|
||||||
|
new_file_id: str,
|
||||||
|
) -> tuple:
|
||||||
|
"""
|
||||||
|
迁移标注到新版本文件。
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
(migrated_payload, migrated_count, failed_count)
|
||||||
|
"""
|
||||||
|
old_text = await self._fetch_text_content_via_download_api(
|
||||||
|
project.dataset_id, old_file_id
|
||||||
|
)
|
||||||
|
new_text = await self._fetch_text_content_via_download_api(
|
||||||
|
project.dataset_id, new_file_id
|
||||||
|
)
|
||||||
|
|
||||||
|
ann_data = annotation.annotation
|
||||||
|
if not isinstance(ann_data, dict):
|
||||||
|
return {}, 0, 0
|
||||||
|
|
||||||
|
total_migrated = 0
|
||||||
|
total_failed = 0
|
||||||
|
|
||||||
|
if self._is_segmented_annotation(ann_data):
|
||||||
|
# 分段标注:逐段迁移
|
||||||
|
segments = self._extract_segment_annotations(ann_data)
|
||||||
|
migrated_segments: Dict[str, Dict[str, Any]] = {}
|
||||||
|
|
||||||
|
for seg_key, seg_data in segments.items():
|
||||||
|
if not isinstance(seg_data, dict):
|
||||||
|
continue
|
||||||
|
seg_results = seg_data.get(SEGMENT_RESULT_KEY, [])
|
||||||
|
if not isinstance(seg_results, list) or not seg_results:
|
||||||
|
# 空标注段落,保留结构
|
||||||
|
migrated_segments[seg_key] = dict(seg_data)
|
||||||
|
migrated_segments[seg_key][SEGMENT_RESULT_KEY] = []
|
||||||
|
continue
|
||||||
|
|
||||||
|
migration = AnnotationMigrator.migrate_annotation_results(
|
||||||
|
old_text, new_text, seg_results
|
||||||
|
)
|
||||||
|
total_migrated += migration.migrated_count
|
||||||
|
total_failed += migration.failed_count
|
||||||
|
|
||||||
|
new_seg = dict(seg_data)
|
||||||
|
new_seg[SEGMENT_RESULT_KEY] = migration.migrated
|
||||||
|
migrated_segments[seg_key] = new_seg
|
||||||
|
|
||||||
|
seg_total = self._resolve_segment_total(ann_data)
|
||||||
|
if seg_total is None:
|
||||||
|
seg_total = len(migrated_segments)
|
||||||
|
|
||||||
|
migrated_payload: Dict[str, Any] = {
|
||||||
|
SEGMENTED_KEY: True,
|
||||||
|
"version": ann_data.get("version", 1),
|
||||||
|
SEGMENTS_KEY: migrated_segments,
|
||||||
|
SEGMENT_TOTAL_KEY: seg_total,
|
||||||
|
}
|
||||||
|
else:
|
||||||
|
# 非分段标注:直接迁移 result
|
||||||
|
results = ann_data.get(SEGMENT_RESULT_KEY, [])
|
||||||
|
if not isinstance(results, list):
|
||||||
|
results = []
|
||||||
|
|
||||||
|
migration = AnnotationMigrator.migrate_annotation_results(
|
||||||
|
old_text, new_text, results
|
||||||
|
)
|
||||||
|
total_migrated = migration.migrated_count
|
||||||
|
total_failed = migration.failed_count
|
||||||
|
|
||||||
|
migrated_payload = dict(ann_data)
|
||||||
|
migrated_payload[SEGMENT_RESULT_KEY] = migration.migrated
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
"标注迁移完成:project_id=%s old_file=%s new_file=%s migrated=%d failed=%d",
|
||||||
|
project.id,
|
||||||
|
old_file_id,
|
||||||
|
new_file_id,
|
||||||
|
total_migrated,
|
||||||
|
total_failed,
|
||||||
|
)
|
||||||
|
|
||||||
|
return migrated_payload, total_migrated, total_failed
|
||||||
|
|||||||
Reference in New Issue
Block a user