feat(annotation): implement file version management for annotation feature

Add support for detecting new file versions and switching to them:

Backend Changes:
- Add file_version column to AnnotationResult model
- Create Alembic migration for database schema update
- Implement check_file_version() method to compare annotation and file versions
- Implement use_new_version() method to clear annotations and update version
- Update upsert_annotation() to record file version when saving
- Add new API endpoints: GET /version and POST /use-new-version
- Add FileVersionCheckResponse and UseNewVersionResponse schemas

Frontend Changes:
- Add checkFileVersionUsingGet and useNewVersionUsingPost API calls
- Add version warning banner showing current vs latest file version
- Add 'Use New Version' button with confirmation dialog
- Clear version info state when switching files to avoid stale warnings

Bug Fixes:
- Fix previousFileVersion returning updated value (save before update)
- Handle null file_version for historical data compatibility
- Fix segmented annotation clearing (preserve structure, clear results)
- Fix files without annotations incorrectly showing new version warnings
- Preserve total_segments when clearing segmented annotations

Files Modified:
- frontend/src/pages/DataAnnotation/Annotate/LabelStudioTextEditor.tsx
- frontend/src/pages/DataAnnotation/annotation.api.ts
- runtime/datamate-python/app/db/models/annotation_management.py
- runtime/datamate-python/app/module/annotation/interface/editor.py
- runtime/datamate-python/app/module/annotation/schema/editor.py
- runtime/datamate-python/app/module/annotation/service/editor.py

New Files:
- runtime/datamate-python/alembic.ini
- runtime/datamate-python/alembic/env.py
- runtime/datamate-python/alembic/script.py.mako
- runtime/datamate-python/alembic/versions/20250205_0001_add_file_version.py
This commit is contained in:
2026-02-05 20:12:07 +08:00
parent 4143bc75f9
commit f5cb265667
10 changed files with 915 additions and 171 deletions

View File

@@ -34,17 +34,29 @@ class AnnotationStatus(str, Enum):
class EditorProjectInfo(BaseModel):
"""编辑器项目元信息"""
project_id: str = Field(..., alias="projectId", description="DataMate 标注项目ID(t_dm_labeling_projects.id)")
dataset_id: str = Field(..., alias="datasetId", description="数据集ID(t_dm_datasets.id)")
project_id: str = Field(
...,
alias="projectId",
description="DataMate 标注项目ID(t_dm_labeling_projects.id)",
)
dataset_id: str = Field(
..., alias="datasetId", description="数据集ID(t_dm_datasets.id)"
)
dataset_type: Optional[str] = Field(
None,
alias="datasetType",
description="数据集类型(TEXT/IMAGE/AUDIO/VIDEO 等)",
)
template_id: Optional[str] = Field(None, alias="templateId", description="模板ID(t_dm_annotation_templates.id)")
label_config: Optional[str] = Field(None, alias="labelConfig", description="Label Studio XML 配置")
template_id: Optional[str] = Field(
None, alias="templateId", description="模板ID(t_dm_annotation_templates.id)"
)
label_config: Optional[str] = Field(
None, alias="labelConfig", description="Label Studio XML 配置"
)
supported: bool = Field(..., description="当前数据类型是否支持内嵌编辑器")
unsupported_reason: Optional[str] = Field(None, alias="unsupportedReason", description="不支持原因(当 supported=false)")
unsupported_reason: Optional[str] = Field(
None, alias="unsupportedReason", description="不支持原因(当 supported=false)"
)
model_config = ConfigDict(populate_by_name=True)
@@ -55,8 +67,12 @@ class EditorTaskListItem(BaseModel):
file_id: str = Field(..., alias="fileId", description="文件ID")
file_name: str = Field(..., alias="fileName", description="文件名")
file_type: Optional[str] = Field(None, alias="fileType", description="文件类型")
has_annotation: bool = Field(..., alias="hasAnnotation", description="是否已有最终标注")
annotation_updated_at: Optional[datetime] = Field(None, alias="annotationUpdatedAt", description="标注更新时间")
has_annotation: bool = Field(
..., alias="hasAnnotation", description="是否已有最终标注"
)
annotation_updated_at: Optional[datetime] = Field(
None, alias="annotationUpdatedAt", description="标注更新时间"
)
annotation_status: Optional[AnnotationStatus] = Field(
None,
alias="annotationStatus",
@@ -82,9 +98,13 @@ class SegmentInfo(BaseModel):
"""段落摘要(用于文本分段标注)"""
idx: int = Field(..., description="段落索引")
has_annotation: bool = Field(False, alias="hasAnnotation", description="该段落是否已有标注")
has_annotation: bool = Field(
False, alias="hasAnnotation", description="该段落是否已有标注"
)
line_index: int = Field(0, alias="lineIndex", description="JSONL 行索引(从0开始)")
chunk_index: int = Field(0, alias="chunkIndex", description="行内分片索引(从0开始)")
chunk_index: int = Field(
0, alias="chunkIndex", description="行内分片索引(从0开始)"
)
model_config = ConfigDict(populate_by_name=True)
@@ -93,12 +113,16 @@ class EditorTaskResponse(BaseModel):
"""编辑器任务详情(可直接喂给 Label Studio Editor 的 task 对象)"""
task: Dict[str, Any] = Field(..., description="Label Studio task 对象")
annotation_updated_at: Optional[datetime] = Field(None, alias="annotationUpdatedAt", description="标注更新时间")
annotation_updated_at: Optional[datetime] = Field(
None, alias="annotationUpdatedAt", description="标注更新时间"
)
# 分段相关字段
segmented: bool = Field(False, description="是否启用分段模式")
total_segments: int = Field(0, alias="totalSegments", description="总段落数")
current_segment_index: int = Field(0, alias="currentSegmentIndex", description="当前段落索引")
current_segment_index: int = Field(
0, alias="currentSegmentIndex", description="当前段落索引"
)
model_config = ConfigDict(populate_by_name=True)
@@ -108,9 +132,13 @@ class SegmentDetail(BaseModel):
idx: int = Field(..., description="段落索引")
text: str = Field(..., description="段落文本")
has_annotation: bool = Field(False, alias="hasAnnotation", description="该段落是否已有标注")
has_annotation: bool = Field(
False, alias="hasAnnotation", description="该段落是否已有标注"
)
line_index: int = Field(0, alias="lineIndex", description="JSONL 行索引(从0开始)")
chunk_index: int = Field(0, alias="chunkIndex", description="行内分片索引(从0开始)")
chunk_index: int = Field(
0, alias="chunkIndex", description="行内分片索引(从0开始)"
)
model_config = ConfigDict(populate_by_name=True)
@@ -121,7 +149,9 @@ class EditorTaskSegmentResponse(BaseModel):
segmented: bool = Field(False, description="是否启用分段模式")
segment: Optional[SegmentDetail] = Field(None, description="段落内容")
total_segments: int = Field(0, alias="totalSegments", description="总段落数")
current_segment_index: int = Field(0, alias="currentSegmentIndex", description="当前段落索引")
current_segment_index: int = Field(
0, alias="currentSegmentIndex", description="当前段落索引"
)
model_config = ConfigDict(populate_by_name=True)
@@ -129,7 +159,9 @@ class EditorTaskSegmentResponse(BaseModel):
class UpsertAnnotationRequest(BaseModel):
"""保存/覆盖最终标注(Label Studio annotation 原始对象)"""
annotation: Dict[str, Any] = Field(..., description="Label Studio annotation 对象(包含 result 等)")
annotation: Dict[str, Any] = Field(
..., description="Label Studio annotation 对象(包含 result 等)"
)
annotation_status: Optional[AnnotationStatus] = Field(
None,
alias="annotationStatus",
@@ -153,8 +185,43 @@ class UpsertAnnotationRequest(BaseModel):
class UpsertAnnotationResponse(BaseModel):
"""保存/覆盖最终标注响应"""
annotation_id: str = Field(..., alias="annotationId", description="标注结果ID(t_dm_annotation_results.id)")
annotation_id: str = Field(
...,
alias="annotationId",
description="标注结果ID(t_dm_annotation_results.id)",
)
updated_at: datetime = Field(..., alias="updatedAt", description="标注更新时间")
model_config = ConfigDict(populate_by_name=True)
class FileVersionCheckResponse(BaseModel):
"""文件版本检查响应"""
file_id: str = Field(..., alias="fileId", description="文件ID")
current_file_version: int = Field(
..., alias="currentFileVersion", description="当前文件版本"
)
annotation_file_version: Optional[int] = Field(
None, alias="annotationFileVersion", description="标注时的文件版本"
)
has_new_version: bool = Field(
..., alias="hasNewVersion", description="是否有新版本"
)
model_config = ConfigDict(populate_by_name=True)
class UseNewVersionResponse(BaseModel):
"""使用新版本响应"""
file_id: str = Field(..., alias="fileId", description="文件ID")
previous_file_version: Optional[int] = Field(
None, alias="previousFileVersion", description="之前标注的文件版本"
)
current_file_version: int = Field(
..., alias="currentFileVersion", description="当前文件版本"
)
message: str = Field(..., description="操作结果消息")
model_config = ConfigDict(populate_by_name=True)