feat(annotation): implement file version management for annotation feature

Add support for detecting new file versions and switching to them:

Backend Changes:
- Add file_version column to AnnotationResult model
- Create Alembic migration for database schema update
- Implement check_file_version() method to compare annotation and file versions
- Implement use_new_version() method to clear annotations and update version
- Update upsert_annotation() to record file version when saving
- Add new API endpoints: GET /version and POST /use-new-version
- Add FileVersionCheckResponse and UseNewVersionResponse schemas

Frontend Changes:
- Add checkFileVersionUsingGet and useNewVersionUsingPost API calls
- Add version warning banner showing current vs latest file version
- Add 'Use New Version' button with confirmation dialog
- Clear version info state when switching files to avoid stale warnings

Bug Fixes:
- Fix previousFileVersion returning updated value (save before update)
- Handle null file_version for historical data compatibility
- Fix segmented annotation clearing (preserve structure, clear results)
- Fix files without annotations incorrectly showing new version warnings
- Preserve total_segments when clearing segmented annotations

Files Modified:
- frontend/src/pages/DataAnnotation/Annotate/LabelStudioTextEditor.tsx
- frontend/src/pages/DataAnnotation/annotation.api.ts
- runtime/datamate-python/app/db/models/annotation_management.py
- runtime/datamate-python/app/module/annotation/interface/editor.py
- runtime/datamate-python/app/module/annotation/schema/editor.py
- runtime/datamate-python/app/module/annotation/service/editor.py

New Files:
- runtime/datamate-python/alembic.ini
- runtime/datamate-python/alembic/env.py
- runtime/datamate-python/alembic/script.py.mako
- runtime/datamate-python/alembic/versions/20250205_0001_add_file_version.py
This commit is contained in:
2026-02-05 20:12:07 +08:00
parent 4143bc75f9
commit f5cb265667
10 changed files with 915 additions and 171 deletions

View File

@@ -23,7 +23,13 @@ from sqlalchemy.ext.asyncio import AsyncSession
from app.core.config import settings
from app.core.logging import get_logger
from app.db.models import AnnotationResult, Dataset, DatasetFiles, LabelingProject, LabelingProjectFile
from app.db.models import (
AnnotationResult,
Dataset,
DatasetFiles,
LabelingProject,
LabelingProjectFile,
)
from app.db.models.annotation_management import (
ANNOTATION_STATUS_ANNOTATED,
ANNOTATION_STATUS_IN_PROGRESS,
@@ -45,8 +51,12 @@ from app.module.annotation.schema.editor import (
)
from app.module.annotation.service.template import AnnotationTemplateService
from app.module.annotation.service.knowledge_sync import KnowledgeSyncService
from app.module.annotation.service.annotation_text_splitter import AnnotationTextSplitter
from app.module.annotation.service.text_fetcher import fetch_text_content_via_download_api
from app.module.annotation.service.annotation_text_splitter import (
AnnotationTextSplitter,
)
from app.module.annotation.service.text_fetcher import (
fetch_text_content_via_download_api,
)
logger = get_logger(__name__)
@@ -169,7 +179,9 @@ class AnnotationEditorService:
template = await self.template_service.get_template(self.db, template_id)
return getattr(template, "label_config", None) if template else None
async def _resolve_project_label_config(self, project: LabelingProject) -> Optional[str]:
async def _resolve_project_label_config(
self, project: LabelingProject
) -> Optional[str]:
label_config = None
if project.configuration and isinstance(project.configuration, dict):
label_config = project.configuration.get("label_config")
@@ -210,7 +222,9 @@ class AnnotationEditorService:
if not label_config:
return [default_key]
target_categories = categories or set()
keys = cls._extract_object_value_keys_by_category(label_config, target_categories)
keys = cls._extract_object_value_keys_by_category(
label_config, target_categories
)
if not keys:
return [default_key]
return keys
@@ -231,7 +245,9 @@ class AnnotationEditorService:
return parsed if isinstance(parsed, dict) else None
@classmethod
def _parse_jsonl_records(cls, text_content: str) -> List[Tuple[Optional[Dict[str, Any]], str]]:
def _parse_jsonl_records(
cls, text_content: str
) -> List[Tuple[Optional[Dict[str, Any]], str]]:
lines = [line for line in text_content.splitlines() if line.strip()]
records: List[Tuple[Optional[Dict[str, Any]], str]] = []
for line in lines:
@@ -277,7 +293,9 @@ class AnnotationEditorService:
@classmethod
def _extract_textual_value_keys(cls, label_config: str) -> List[str]:
return cls._extract_object_value_keys_by_category(label_config, TEXTUAL_OBJECT_CATEGORIES)
return cls._extract_object_value_keys_by_category(
label_config, TEXTUAL_OBJECT_CATEGORIES
)
@staticmethod
def _needs_placeholder(value: Any) -> bool:
@@ -287,7 +305,9 @@ class AnnotationEditorService:
return True
return False
def _apply_text_placeholders(self, data: Dict[str, Any], label_config: Optional[str]) -> None:
def _apply_text_placeholders(
self, data: Dict[str, Any], label_config: Optional[str]
) -> None:
if not label_config:
return
for key in self._extract_textual_value_keys(label_config):
@@ -346,7 +366,9 @@ class AnnotationEditorService:
if i > 0:
prev = children[i - 1]
if prev.tag == "Header" and self._header_already_present(prev, obj_name):
if prev.tag == "Header" and self._header_already_present(
prev, obj_name
):
i += 1
continue
@@ -362,7 +384,9 @@ class AnnotationEditorService:
return ET.tostring(root, encoding="unicode")
@staticmethod
def _extract_segment_annotations(payload: Optional[Dict[str, Any]]) -> Dict[str, Dict[str, Any]]:
def _extract_segment_annotations(
payload: Optional[Dict[str, Any]],
) -> Dict[str, Dict[str, Any]]:
if not payload or not isinstance(payload, dict):
return {}
segments = payload.get(SEGMENTS_KEY)
@@ -440,13 +464,17 @@ class AnnotationEditorService:
file_record: DatasetFiles,
file_id: str,
) -> Optional[int]:
dataset_type = self._normalize_dataset_type(await self._get_dataset_type(project.dataset_id))
dataset_type = self._normalize_dataset_type(
await self._get_dataset_type(project.dataset_id)
)
if dataset_type != DATASET_TYPE_TEXT:
return None
if not self._resolve_segmentation_enabled(project):
return None
text_content = await self._fetch_text_content_via_download_api(project.dataset_id, file_id)
text_content = await self._fetch_text_content_via_download_api(
project.dataset_id, file_id
)
if not isinstance(text_content, str):
return None
@@ -495,7 +523,9 @@ class AnnotationEditorService:
file_type_lower = func.lower(DatasetFiles.file_type)
file_name_lower = func.lower(DatasetFiles.file_name)
type_condition = file_type_lower.in_(SOURCE_DOCUMENT_TYPES)
name_conditions = [file_name_lower.like(f"%{ext}") for ext in SOURCE_DOCUMENT_EXTENSIONS]
name_conditions = [
file_name_lower.like(f"%{ext}") for ext in SOURCE_DOCUMENT_EXTENSIONS
]
return or_(type_condition, *name_conditions)
def _build_task_data(
@@ -545,13 +575,17 @@ class AnnotationEditorService:
records: List[Tuple[Optional[Dict[str, Any]], str]],
record_texts: List[str],
segment_annotation_keys: set[str],
) -> Tuple[List[SegmentInfo], List[Tuple[Optional[Dict[str, Any]], str, str, int, int]]]:
) -> Tuple[
List[SegmentInfo], List[Tuple[Optional[Dict[str, Any]], str, str, int, int]]
]:
splitter = AnnotationTextSplitter(max_chars=self.SEGMENT_THRESHOLD)
segments: List[SegmentInfo] = []
segment_contexts: List[Tuple[Optional[Dict[str, Any]], str, str, int, int]] = []
segment_cursor = 0
for record_index, ((payload, raw_text), record_text) in enumerate(zip(records, record_texts)):
for record_index, ((payload, raw_text), record_text) in enumerate(
zip(records, record_texts)
):
normalized_text = record_text or ""
if len(normalized_text) > self.SEGMENT_THRESHOLD:
raw_segments = splitter.split(normalized_text)
@@ -559,12 +593,15 @@ class AnnotationEditorService:
segments.append(
SegmentInfo(
idx=segment_cursor,
hasAnnotation=str(segment_cursor) in segment_annotation_keys,
hasAnnotation=str(segment_cursor)
in segment_annotation_keys,
lineIndex=record_index,
chunkIndex=chunk_index,
)
)
segment_contexts.append((payload, raw_text, seg["text"], record_index, chunk_index))
segment_contexts.append(
(payload, raw_text, seg["text"], record_index, chunk_index)
)
segment_cursor += 1
else:
segments.append(
@@ -575,11 +612,15 @@ class AnnotationEditorService:
chunkIndex=0,
)
)
segment_contexts.append((payload, raw_text, normalized_text, record_index, 0))
segment_contexts.append(
(payload, raw_text, normalized_text, record_index, 0)
)
segment_cursor += 1
if not segments:
segments = [SegmentInfo(idx=0, hasAnnotation=False, lineIndex=0, chunkIndex=0)]
segments = [
SegmentInfo(idx=0, hasAnnotation=False, lineIndex=0, chunkIndex=0)
]
segment_contexts = [(None, "", "", 0, 0)]
return segments, segment_contexts
@@ -587,7 +628,9 @@ class AnnotationEditorService:
async def get_project_info(self, project_id: str) -> EditorProjectInfo:
project = await self._get_project_or_404(project_id)
dataset_type = self._normalize_dataset_type(await self._get_dataset_type(project.dataset_id))
dataset_type = self._normalize_dataset_type(
await self._get_dataset_type(project.dataset_id)
)
supported = dataset_type in SUPPORTED_EDITOR_DATASET_TYPES
unsupported_reason = None
if not supported:
@@ -653,7 +696,12 @@ class AnnotationEditorService:
rows = files_result.all()
items: List[EditorTaskListItem] = []
for file_record, annotation_id, annotation_updated_at, annotation_status in rows:
for (
file_record,
annotation_id,
annotation_updated_at,
annotation_status,
) in rows:
fid = str(file_record.id) # type: ignore[arg-type]
items.append(
EditorTaskListItem(
@@ -675,7 +723,9 @@ class AnnotationEditorService:
size=size,
)
async def _fetch_text_content_via_download_api(self, dataset_id: str, file_id: str) -> str:
async def _fetch_text_content_via_download_api(
self, dataset_id: str, file_id: str
) -> str:
return await fetch_text_content_via_download_api(dataset_id, file_id)
async def get_task(
@@ -686,7 +736,9 @@ class AnnotationEditorService:
) -> EditorTaskResponse:
project = await self._get_project_or_404(project_id)
dataset_type = self._normalize_dataset_type(await self._get_dataset_type(project.dataset_id))
dataset_type = self._normalize_dataset_type(
await self._get_dataset_type(project.dataset_id)
)
if dataset_type not in SUPPORTED_EDITOR_DATASET_TYPES:
raise HTTPException(
status_code=400,
@@ -701,7 +753,9 @@ class AnnotationEditorService:
)
file_record = file_result.scalar_one_or_none()
if not file_record:
raise HTTPException(status_code=404, detail=f"文件不存在或不属于该项目: {file_id}")
raise HTTPException(
status_code=404, detail=f"文件不存在或不属于该项目: {file_id}"
)
if dataset_type == DATASET_TYPE_IMAGE:
return await self._build_image_task(project, file_record, file_id)
@@ -722,7 +776,9 @@ class AnnotationEditorService:
) -> EditorTaskSegmentResponse:
project = await self._get_project_or_404(project_id)
dataset_type = self._normalize_dataset_type(await self._get_dataset_type(project.dataset_id))
dataset_type = self._normalize_dataset_type(
await self._get_dataset_type(project.dataset_id)
)
if dataset_type != DATASET_TYPE_TEXT:
raise HTTPException(
status_code=400,
@@ -737,7 +793,9 @@ class AnnotationEditorService:
)
file_record = file_result.scalar_one_or_none()
if not file_record:
raise HTTPException(status_code=404, detail=f"文件不存在或不属于该项目: {file_id}")
raise HTTPException(
status_code=404, detail=f"文件不存在或不属于该项目: {file_id}"
)
if not self._resolve_segmentation_enabled(project):
return EditorTaskSegmentResponse(
@@ -747,7 +805,9 @@ class AnnotationEditorService:
currentSegmentIndex=0,
)
text_content = await self._fetch_text_content_via_download_api(project.dataset_id, file_id)
text_content = await self._fetch_text_content_via_download_api(
project.dataset_id, file_id
)
assert isinstance(text_content, str)
label_config = await self._resolve_project_label_config(project)
primary_text_key = self._resolve_primary_text_key(label_config)
@@ -839,7 +899,9 @@ class AnnotationEditorService:
file_id: str,
segment_index: Optional[int],
) -> EditorTaskResponse:
text_content = await self._fetch_text_content_via_download_api(project.dataset_id, file_id)
text_content = await self._fetch_text_content_via_download_api(
project.dataset_id, file_id
)
assert isinstance(text_content, str)
label_config = await self._resolve_project_label_config(project)
primary_text_key = self._resolve_primary_text_key(label_config)
@@ -885,7 +947,8 @@ class AnnotationEditorService:
if not segmentation_enabled:
segment_index = None
needs_segmentation = segmentation_enabled and (
len(records) > 1 or any(len(text or "") > self.SEGMENT_THRESHOLD for text in record_texts)
len(records) > 1
or any(len(text or "") > self.SEGMENT_THRESHOLD for text in record_texts)
)
segments: List[SegmentInfo] = []
segment_contexts: List[Tuple[Optional[Dict[str, Any]], str, str, int, int]] = []
@@ -903,10 +966,14 @@ class AnnotationEditorService:
segment_annotation_keys,
)
current_segment_index = segment_index if segment_index is not None else 0
if current_segment_index < 0 or current_segment_index >= len(segment_contexts):
if current_segment_index < 0 or current_segment_index >= len(
segment_contexts
):
current_segment_index = 0
selected_payload, _, display_text, _, _ = segment_contexts[current_segment_index]
selected_payload, _, display_text, _, _ = segment_contexts[
current_segment_index
]
# 构造 task 对象
task_data = self._build_task_data(
@@ -936,11 +1003,16 @@ class AnnotationEditorService:
# 分段模式:获取当前段落的标注
seg_ann = segment_annotations.get(str(current_segment_index), {})
stored = {
"id": self._make_ls_annotation_id(project.id, file_id) + current_segment_index,
"id": self._make_ls_annotation_id(project.id, file_id)
+ current_segment_index,
"task": ls_task_id,
"result": seg_ann.get(SEGMENT_RESULT_KEY, []),
"created_at": seg_ann.get(SEGMENT_CREATED_AT_KEY, datetime.utcnow().isoformat() + "Z"),
"updated_at": seg_ann.get(SEGMENT_UPDATED_AT_KEY, datetime.utcnow().isoformat() + "Z"),
"created_at": seg_ann.get(
SEGMENT_CREATED_AT_KEY, datetime.utcnow().isoformat() + "Z"
),
"updated_at": seg_ann.get(
SEGMENT_UPDATED_AT_KEY, datetime.utcnow().isoformat() + "Z"
),
}
task["annotations"] = [stored]
elif not needs_segmentation and not has_segmented_annotation:
@@ -952,7 +1024,10 @@ class AnnotationEditorService:
task["annotations"] = [stored]
else:
# 首次从非分段切换到分段:提供空标注
empty_ann_id = self._make_ls_annotation_id(project.id, file_id) + current_segment_index
empty_ann_id = (
self._make_ls_annotation_id(project.id, file_id)
+ current_segment_index
)
task["annotations"] = [
{
"id": empty_ann_id,
@@ -994,7 +1069,9 @@ class AnnotationEditorService:
categories: set[str],
) -> EditorTaskResponse:
label_config = await self._resolve_project_label_config(project)
media_keys = self._resolve_media_value_keys(label_config, default_key, categories)
media_keys = self._resolve_media_value_keys(
label_config, default_key, categories
)
preview_url = self._build_file_preview_url(project.dataset_id, file_id)
file_name = str(getattr(file_record, "file_name", ""))
@@ -1097,7 +1174,9 @@ class AnnotationEditorService:
categories=MEDIA_OBJECT_CATEGORIES,
)
async def upsert_annotation(self, project_id: str, file_id: str, request: UpsertAnnotationRequest) -> UpsertAnnotationResponse:
async def upsert_annotation(
self, project_id: str, file_id: str, request: UpsertAnnotationRequest
) -> UpsertAnnotationResponse:
project = await self._get_project_or_404(project_id)
# 校验文件归属
@@ -1112,7 +1191,26 @@ class AnnotationEditorService:
)
file_record = file_result.scalar_one_or_none()
if not file_record:
raise HTTPException(status_code=404, detail=f"文件不存在或不属于该项目: {file_id}")
raise HTTPException(
status_code=404, detail=f"文件不存在或不属于该项目: {file_id}"
)
# 检查文件版本是否变化
current_file_version = file_record.version
existing_result = await self.db.execute(
select(AnnotationResult).where(
AnnotationResult.project_id == project_id,
AnnotationResult.file_id == file_id,
)
)
existing_annotation = existing_result.scalar_one_or_none()
if existing_annotation and existing_annotation.file_version is not None:
if existing_annotation.file_version != current_file_version:
raise HTTPException(
status_code=409,
detail=f"文件已更新到新版本(当前版本: {current_file_version}, 标注版本: {existing_annotation.file_version}),请使用新版本",
)
annotation_payload = dict(request.annotation or {})
result = annotation_payload.get("result")
@@ -1127,7 +1225,9 @@ class AnnotationEditorService:
if request.segment_index is not None:
segment_total_hint = self._resolve_segment_total(annotation_payload)
if segment_total_hint is None:
segment_total_hint = await self._compute_segment_total(project, file_record, file_id)
segment_total_hint = await self._compute_segment_total(
project, file_record, file_id
)
existing_result = await self.db.execute(
select(AnnotationResult)
@@ -1161,11 +1261,16 @@ class AnnotationEditorService:
# 非分段模式:直接使用传入的 annotation
annotation_payload["task"] = ls_task_id
if not isinstance(annotation_payload.get("id"), int):
annotation_payload["id"] = self._make_ls_annotation_id(project_id, file_id)
annotation_payload["id"] = self._make_ls_annotation_id(
project_id, file_id
)
final_payload = annotation_payload
requested_status = request.annotation_status
if requested_status is not None and requested_status not in ANNOTATION_STATUS_CLIENT_VALUES:
if (
requested_status is not None
and requested_status not in ANNOTATION_STATUS_CLIENT_VALUES
):
raise HTTPException(status_code=400, detail="annotationStatus 不合法")
segment_total = None
@@ -1194,7 +1299,10 @@ class AnnotationEditorService:
elif requested_status == ANNOTATION_STATUS_NOT_APPLICABLE:
final_status = ANNOTATION_STATUS_NOT_APPLICABLE
else:
raise HTTPException(status_code=400, detail="未发现标注内容,请确认无标注/不适用后再保存")
raise HTTPException(
status_code=400,
detail="未发现标注内容,请确认无标注/不适用后再保存",
)
if request.segment_index is not None:
segment_entries = self._extract_segment_annotations(final_payload)
@@ -1210,11 +1318,16 @@ class AnnotationEditorService:
if existing:
if request.expected_updated_at and existing.updated_at:
if existing.updated_at != request.expected_updated_at.replace(tzinfo=None):
raise HTTPException(status_code=409, detail="标注已被更新,请刷新后重试")
if existing.updated_at != request.expected_updated_at.replace(
tzinfo=None
):
raise HTTPException(
status_code=409, detail="标注已被更新,请刷新后重试"
)
existing.annotation = final_payload # type: ignore[assignment]
existing.annotation_status = final_status # type: ignore[assignment]
existing.file_version = current_file_version # type: ignore[assignment]
existing.updated_at = now # type: ignore[assignment]
await self.db.commit()
await self.db.refresh(existing)
@@ -1223,7 +1336,9 @@ class AnnotationEditorService:
annotationId=existing.id,
updatedAt=existing.updated_at or now,
)
await self._sync_annotation_to_knowledge(project, file_record, final_payload, existing.updated_at)
await self._sync_annotation_to_knowledge(
project, file_record, final_payload, existing.updated_at
)
return response
new_id = str(uuid.uuid4())
@@ -1233,6 +1348,7 @@ class AnnotationEditorService:
file_id=file_id,
annotation=final_payload,
annotation_status=final_status,
file_version=current_file_version,
created_at=now,
updated_at=now,
)
@@ -1244,7 +1360,9 @@ class AnnotationEditorService:
annotationId=record.id,
updatedAt=record.updated_at or now,
)
await self._sync_annotation_to_knowledge(project, file_record, final_payload, record.updated_at)
await self._sync_annotation_to_knowledge(
project, file_record, final_payload, record.updated_at
)
return response
def _merge_segment_annotation(
@@ -1292,7 +1410,9 @@ class AnnotationEditorService:
# 更新指定段落的标注
segments[str(segment_index)] = {
SEGMENT_RESULT_KEY: new_annotation.get(SEGMENT_RESULT_KEY, []),
SEGMENT_CREATED_AT_KEY: new_annotation.get(SEGMENT_CREATED_AT_KEY, datetime.utcnow().isoformat() + "Z"),
SEGMENT_CREATED_AT_KEY: new_annotation.get(
SEGMENT_CREATED_AT_KEY, datetime.utcnow().isoformat() + "Z"
),
SEGMENT_UPDATED_AT_KEY: datetime.utcnow().isoformat() + "Z",
}
@@ -1317,9 +1437,7 @@ class AnnotationEditorService:
logger.warning("标注同步知识管理失败:%s", exc)
async def precompute_segmentation_for_project(
self,
project_id: str,
max_retries: int = 3
self, project_id: str, max_retries: int = 3
) -> Dict[str, Any]:
"""
为指定项目的所有文本文件预计算切片结构并持久化到数据库
@@ -1332,7 +1450,9 @@ class AnnotationEditorService:
统计信息:{total_files, succeeded, failed}
"""
project = await self._get_project_or_404(project_id)
dataset_type = self._normalize_dataset_type(await self._get_dataset_type(project.dataset_id))
dataset_type = self._normalize_dataset_type(
await self._get_dataset_type(project.dataset_id)
)
# 只处理文本数据集
if dataset_type != DATASET_TYPE_TEXT:
@@ -1364,9 +1484,8 @@ class AnnotationEditorService:
for file_record in file_records:
file_type = str(getattr(file_record, "file_type", "") or "").lower()
file_name = str(getattr(file_record, "file_name", "")).lower()
is_source_document = (
file_type in SOURCE_DOCUMENT_TYPES or
any(file_name.endswith(ext) for ext in SOURCE_DOCUMENT_EXTENSIONS)
is_source_document = file_type in SOURCE_DOCUMENT_TYPES or any(
file_name.endswith(ext) for ext in SOURCE_DOCUMENT_EXTENSIONS
)
if not is_source_document:
valid_files.append(file_record)
@@ -1385,7 +1504,9 @@ class AnnotationEditorService:
for retry in range(max_retries):
try:
# 读取文本内容
text_content = await self._fetch_text_content_via_download_api(project.dataset_id, file_id)
text_content = await self._fetch_text_content_via_download_api(
project.dataset_id, file_id
)
if not isinstance(text_content, str):
logger.warning(f"文件 {file_id} 内容不是字符串,跳过切片")
failed += 1
@@ -1404,7 +1525,9 @@ class AnnotationEditorService:
records = [(None, text_content)]
record_texts = [
self._resolve_primary_text_value(payload, raw_text, primary_text_key)
self._resolve_primary_text_value(
payload, raw_text, primary_text_key
)
for payload, raw_text in records
]
if not record_texts:
@@ -1412,7 +1535,8 @@ class AnnotationEditorService:
# 判断是否需要分段
needs_segmentation = len(records) > 1 or any(
len(text or "") > self.SEGMENT_THRESHOLD for text in record_texts
len(text or "") > self.SEGMENT_THRESHOLD
for text in record_texts
)
if not needs_segmentation:
@@ -1425,7 +1549,9 @@ class AnnotationEditorService:
segment_cursor = 0
segments = {}
for record_index, ((payload, raw_text), record_text) in enumerate(zip(records, record_texts)):
for record_index, ((payload, raw_text), record_text) in enumerate(
zip(records, record_texts)
):
normalized_text = record_text or ""
if len(normalized_text) > self.SEGMENT_THRESHOLD:
@@ -1433,15 +1559,19 @@ class AnnotationEditorService:
for chunk_index, seg in enumerate(raw_segments):
segments[str(segment_cursor)] = {
SEGMENT_RESULT_KEY: [],
SEGMENT_CREATED_AT_KEY: datetime.utcnow().isoformat() + "Z",
SEGMENT_UPDATED_AT_KEY: datetime.utcnow().isoformat() + "Z",
SEGMENT_CREATED_AT_KEY: datetime.utcnow().isoformat()
+ "Z",
SEGMENT_UPDATED_AT_KEY: datetime.utcnow().isoformat()
+ "Z",
}
segment_cursor += 1
else:
segments[str(segment_cursor)] = {
SEGMENT_RESULT_KEY: [],
SEGMENT_CREATED_AT_KEY: datetime.utcnow().isoformat() + "Z",
SEGMENT_UPDATED_AT_KEY: datetime.utcnow().isoformat() + "Z",
SEGMENT_CREATED_AT_KEY: datetime.utcnow().isoformat()
+ "Z",
SEGMENT_UPDATED_AT_KEY: datetime.utcnow().isoformat()
+ "Z",
}
segment_cursor += 1
@@ -1508,3 +1638,145 @@ class AnnotationEditorService:
"failed": failed,
}
async def check_file_version(self, project_id: str, file_id: str) -> Dict[str, Any]:
"""
检查文件是否有新版本
Args:
project_id: 标注项目ID
file_id: 文件ID
Returns:
包含文件版本信息的字典
"""
project = await self._get_project_or_404(project_id)
# 获取文件信息
file_result = await self.db.execute(
select(DatasetFiles)
.join(LabelingProjectFile, LabelingProjectFile.file_id == DatasetFiles.id)
.where(
LabelingProjectFile.project_id == project.id,
DatasetFiles.id == file_id,
DatasetFiles.dataset_id == project.dataset_id,
)
)
file_record = file_result.scalar_one_or_none()
if not file_record:
raise HTTPException(
status_code=404, detail=f"文件不存在或不属于该项目: {file_id}"
)
# 获取标注信息
annotation_result = await self.db.execute(
select(AnnotationResult).where(
AnnotationResult.project_id == project_id,
AnnotationResult.file_id == file_id,
)
)
annotation = annotation_result.scalar_one_or_none()
current_file_version = file_record.version
annotation_file_version = annotation.file_version if annotation else None
if annotation is None:
has_new_version = False
elif annotation_file_version is None:
has_new_version = True
else:
has_new_version = current_file_version > annotation_file_version
return {
"fileId": file_id,
"currentFileVersion": current_file_version,
"annotationFileVersion": annotation_file_version,
"hasNewVersion": has_new_version,
}
async def use_new_version(self, project_id: str, file_id: str) -> Dict[str, Any]:
"""
使用文件新版本并清空标注
Args:
project_id: 标注项目ID
file_id: 文件ID
Returns:
操作结果
"""
project = await self._get_project_or_404(project_id)
# 获取文件信息
file_result = await self.db.execute(
select(DatasetFiles)
.join(LabelingProjectFile, LabelingProjectFile.file_id == DatasetFiles.id)
.where(
LabelingProjectFile.project_id == project.id,
DatasetFiles.id == file_id,
DatasetFiles.dataset_id == project.dataset_id,
)
)
file_record = file_result.scalar_one_or_none()
if not file_record:
raise HTTPException(
status_code=404, detail=f"文件不存在或不属于该项目: {file_id}"
)
# 获取标注信息
annotation_result = await self.db.execute(
select(AnnotationResult)
.where(
AnnotationResult.project_id == project_id,
AnnotationResult.file_id == file_id,
)
.with_for_update()
)
annotation = annotation_result.scalar_one_or_none()
current_file_version = file_record.version
if not annotation:
raise HTTPException(status_code=404, detail=f"标注不存在: {file_id}")
previous_file_version = annotation.file_version
if annotation.file_version is not None:
if current_file_version <= annotation.file_version:
raise HTTPException(
status_code=400,
detail=f"文件版本({current_file_version})未更新或低于标注版本({annotation.file_version}",
)
# 清空标注并更新版本号
now = datetime.utcnow()
if isinstance(annotation.annotation, dict):
if annotation.annotation.get(SEGMENTED_KEY):
segments = annotation.annotation.get(SEGMENTS_KEY, {})
for segment_id, segment_data in segments.items():
if isinstance(segment_data, dict):
segment_data[SEGMENT_RESULT_KEY] = []
annotation.annotation = {
SEGMENTED_KEY: True,
"version": annotation.annotation.get("version", 1),
SEGMENTS_KEY: segments,
"total_segments": annotation.annotation.get(
"total_segments", len(segments)
),
}
else:
annotation.annotation = {}
else:
annotation.annotation = {}
annotation.annotation_status = ANNOTATION_STATUS_NO_ANNOTATION
annotation.file_version = current_file_version
annotation.updated_at = now
await self.db.commit()
await self.db.refresh(annotation)
return {
"fileId": file_id,
"previousFileVersion": previous_file_version,
"currentFileVersion": current_file_version,
"message": "已使用新版本并清空标注",
}