You've already forked DataMate
feat(annotation): implement file version management for annotation feature
Add support for detecting new file versions and switching to them: Backend Changes: - Add file_version column to AnnotationResult model - Create Alembic migration for database schema update - Implement check_file_version() method to compare annotation and file versions - Implement use_new_version() method to clear annotations and update version - Update upsert_annotation() to record file version when saving - Add new API endpoints: GET /version and POST /use-new-version - Add FileVersionCheckResponse and UseNewVersionResponse schemas Frontend Changes: - Add checkFileVersionUsingGet and useNewVersionUsingPost API calls - Add version warning banner showing current vs latest file version - Add 'Use New Version' button with confirmation dialog - Clear version info state when switching files to avoid stale warnings Bug Fixes: - Fix previousFileVersion returning updated value (save before update) - Handle null file_version for historical data compatibility - Fix segmented annotation clearing (preserve structure, clear results) - Fix files without annotations incorrectly showing new version warnings - Preserve total_segments when clearing segmented annotations Files Modified: - frontend/src/pages/DataAnnotation/Annotate/LabelStudioTextEditor.tsx - frontend/src/pages/DataAnnotation/annotation.api.ts - runtime/datamate-python/app/db/models/annotation_management.py - runtime/datamate-python/app/module/annotation/interface/editor.py - runtime/datamate-python/app/module/annotation/schema/editor.py - runtime/datamate-python/app/module/annotation/service/editor.py New Files: - runtime/datamate-python/alembic.ini - runtime/datamate-python/alembic/env.py - runtime/datamate-python/alembic/script.py.mako - runtime/datamate-python/alembic/versions/20250205_0001_add_file_version.py
This commit is contained in:
@@ -34,17 +34,29 @@ class AnnotationStatus(str, Enum):
|
||||
class EditorProjectInfo(BaseModel):
|
||||
"""编辑器项目元信息"""
|
||||
|
||||
project_id: str = Field(..., alias="projectId", description="DataMate 标注项目ID(t_dm_labeling_projects.id)")
|
||||
dataset_id: str = Field(..., alias="datasetId", description="数据集ID(t_dm_datasets.id)")
|
||||
project_id: str = Field(
|
||||
...,
|
||||
alias="projectId",
|
||||
description="DataMate 标注项目ID(t_dm_labeling_projects.id)",
|
||||
)
|
||||
dataset_id: str = Field(
|
||||
..., alias="datasetId", description="数据集ID(t_dm_datasets.id)"
|
||||
)
|
||||
dataset_type: Optional[str] = Field(
|
||||
None,
|
||||
alias="datasetType",
|
||||
description="数据集类型(TEXT/IMAGE/AUDIO/VIDEO 等)",
|
||||
)
|
||||
template_id: Optional[str] = Field(None, alias="templateId", description="模板ID(t_dm_annotation_templates.id)")
|
||||
label_config: Optional[str] = Field(None, alias="labelConfig", description="Label Studio XML 配置")
|
||||
template_id: Optional[str] = Field(
|
||||
None, alias="templateId", description="模板ID(t_dm_annotation_templates.id)"
|
||||
)
|
||||
label_config: Optional[str] = Field(
|
||||
None, alias="labelConfig", description="Label Studio XML 配置"
|
||||
)
|
||||
supported: bool = Field(..., description="当前数据类型是否支持内嵌编辑器")
|
||||
unsupported_reason: Optional[str] = Field(None, alias="unsupportedReason", description="不支持原因(当 supported=false)")
|
||||
unsupported_reason: Optional[str] = Field(
|
||||
None, alias="unsupportedReason", description="不支持原因(当 supported=false)"
|
||||
)
|
||||
|
||||
model_config = ConfigDict(populate_by_name=True)
|
||||
|
||||
@@ -55,8 +67,12 @@ class EditorTaskListItem(BaseModel):
|
||||
file_id: str = Field(..., alias="fileId", description="文件ID")
|
||||
file_name: str = Field(..., alias="fileName", description="文件名")
|
||||
file_type: Optional[str] = Field(None, alias="fileType", description="文件类型")
|
||||
has_annotation: bool = Field(..., alias="hasAnnotation", description="是否已有最终标注")
|
||||
annotation_updated_at: Optional[datetime] = Field(None, alias="annotationUpdatedAt", description="标注更新时间")
|
||||
has_annotation: bool = Field(
|
||||
..., alias="hasAnnotation", description="是否已有最终标注"
|
||||
)
|
||||
annotation_updated_at: Optional[datetime] = Field(
|
||||
None, alias="annotationUpdatedAt", description="标注更新时间"
|
||||
)
|
||||
annotation_status: Optional[AnnotationStatus] = Field(
|
||||
None,
|
||||
alias="annotationStatus",
|
||||
@@ -82,9 +98,13 @@ class SegmentInfo(BaseModel):
|
||||
"""段落摘要(用于文本分段标注)"""
|
||||
|
||||
idx: int = Field(..., description="段落索引")
|
||||
has_annotation: bool = Field(False, alias="hasAnnotation", description="该段落是否已有标注")
|
||||
has_annotation: bool = Field(
|
||||
False, alias="hasAnnotation", description="该段落是否已有标注"
|
||||
)
|
||||
line_index: int = Field(0, alias="lineIndex", description="JSONL 行索引(从0开始)")
|
||||
chunk_index: int = Field(0, alias="chunkIndex", description="行内分片索引(从0开始)")
|
||||
chunk_index: int = Field(
|
||||
0, alias="chunkIndex", description="行内分片索引(从0开始)"
|
||||
)
|
||||
|
||||
model_config = ConfigDict(populate_by_name=True)
|
||||
|
||||
@@ -93,12 +113,16 @@ class EditorTaskResponse(BaseModel):
|
||||
"""编辑器任务详情(可直接喂给 Label Studio Editor 的 task 对象)"""
|
||||
|
||||
task: Dict[str, Any] = Field(..., description="Label Studio task 对象")
|
||||
annotation_updated_at: Optional[datetime] = Field(None, alias="annotationUpdatedAt", description="标注更新时间")
|
||||
annotation_updated_at: Optional[datetime] = Field(
|
||||
None, alias="annotationUpdatedAt", description="标注更新时间"
|
||||
)
|
||||
|
||||
# 分段相关字段
|
||||
segmented: bool = Field(False, description="是否启用分段模式")
|
||||
total_segments: int = Field(0, alias="totalSegments", description="总段落数")
|
||||
current_segment_index: int = Field(0, alias="currentSegmentIndex", description="当前段落索引")
|
||||
current_segment_index: int = Field(
|
||||
0, alias="currentSegmentIndex", description="当前段落索引"
|
||||
)
|
||||
|
||||
model_config = ConfigDict(populate_by_name=True)
|
||||
|
||||
@@ -108,9 +132,13 @@ class SegmentDetail(BaseModel):
|
||||
|
||||
idx: int = Field(..., description="段落索引")
|
||||
text: str = Field(..., description="段落文本")
|
||||
has_annotation: bool = Field(False, alias="hasAnnotation", description="该段落是否已有标注")
|
||||
has_annotation: bool = Field(
|
||||
False, alias="hasAnnotation", description="该段落是否已有标注"
|
||||
)
|
||||
line_index: int = Field(0, alias="lineIndex", description="JSONL 行索引(从0开始)")
|
||||
chunk_index: int = Field(0, alias="chunkIndex", description="行内分片索引(从0开始)")
|
||||
chunk_index: int = Field(
|
||||
0, alias="chunkIndex", description="行内分片索引(从0开始)"
|
||||
)
|
||||
|
||||
model_config = ConfigDict(populate_by_name=True)
|
||||
|
||||
@@ -121,7 +149,9 @@ class EditorTaskSegmentResponse(BaseModel):
|
||||
segmented: bool = Field(False, description="是否启用分段模式")
|
||||
segment: Optional[SegmentDetail] = Field(None, description="段落内容")
|
||||
total_segments: int = Field(0, alias="totalSegments", description="总段落数")
|
||||
current_segment_index: int = Field(0, alias="currentSegmentIndex", description="当前段落索引")
|
||||
current_segment_index: int = Field(
|
||||
0, alias="currentSegmentIndex", description="当前段落索引"
|
||||
)
|
||||
|
||||
model_config = ConfigDict(populate_by_name=True)
|
||||
|
||||
@@ -129,7 +159,9 @@ class EditorTaskSegmentResponse(BaseModel):
|
||||
class UpsertAnnotationRequest(BaseModel):
|
||||
"""保存/覆盖最终标注(Label Studio annotation 原始对象)"""
|
||||
|
||||
annotation: Dict[str, Any] = Field(..., description="Label Studio annotation 对象(包含 result 等)")
|
||||
annotation: Dict[str, Any] = Field(
|
||||
..., description="Label Studio annotation 对象(包含 result 等)"
|
||||
)
|
||||
annotation_status: Optional[AnnotationStatus] = Field(
|
||||
None,
|
||||
alias="annotationStatus",
|
||||
@@ -153,8 +185,43 @@ class UpsertAnnotationRequest(BaseModel):
|
||||
class UpsertAnnotationResponse(BaseModel):
|
||||
"""保存/覆盖最终标注响应"""
|
||||
|
||||
annotation_id: str = Field(..., alias="annotationId", description="标注结果ID(t_dm_annotation_results.id)")
|
||||
annotation_id: str = Field(
|
||||
...,
|
||||
alias="annotationId",
|
||||
description="标注结果ID(t_dm_annotation_results.id)",
|
||||
)
|
||||
updated_at: datetime = Field(..., alias="updatedAt", description="标注更新时间")
|
||||
|
||||
model_config = ConfigDict(populate_by_name=True)
|
||||
|
||||
|
||||
class FileVersionCheckResponse(BaseModel):
|
||||
"""文件版本检查响应"""
|
||||
|
||||
file_id: str = Field(..., alias="fileId", description="文件ID")
|
||||
current_file_version: int = Field(
|
||||
..., alias="currentFileVersion", description="当前文件版本"
|
||||
)
|
||||
annotation_file_version: Optional[int] = Field(
|
||||
None, alias="annotationFileVersion", description="标注时的文件版本"
|
||||
)
|
||||
has_new_version: bool = Field(
|
||||
..., alias="hasNewVersion", description="是否有新版本"
|
||||
)
|
||||
|
||||
model_config = ConfigDict(populate_by_name=True)
|
||||
|
||||
|
||||
class UseNewVersionResponse(BaseModel):
|
||||
"""使用新版本响应"""
|
||||
|
||||
file_id: str = Field(..., alias="fileId", description="文件ID")
|
||||
previous_file_version: Optional[int] = Field(
|
||||
None, alias="previousFileVersion", description="之前标注的文件版本"
|
||||
)
|
||||
current_file_version: int = Field(
|
||||
..., alias="currentFileVersion", description="当前文件版本"
|
||||
)
|
||||
message: str = Field(..., description="操作结果消息")
|
||||
|
||||
model_config = ConfigDict(populate_by_name=True)
|
||||
|
||||
Reference in New Issue
Block a user