You've already forked DataMate
feat(annotation): implement file version management for annotation feature
Add support for detecting new file versions and switching to them: Backend Changes: - Add file_version column to AnnotationResult model - Create Alembic migration for database schema update - Implement check_file_version() method to compare annotation and file versions - Implement use_new_version() method to clear annotations and update version - Update upsert_annotation() to record file version when saving - Add new API endpoints: GET /version and POST /use-new-version - Add FileVersionCheckResponse and UseNewVersionResponse schemas Frontend Changes: - Add checkFileVersionUsingGet and useNewVersionUsingPost API calls - Add version warning banner showing current vs latest file version - Add 'Use New Version' button with confirmation dialog - Clear version info state when switching files to avoid stale warnings Bug Fixes: - Fix previousFileVersion returning updated value (save before update) - Handle null file_version for historical data compatibility - Fix segmented annotation clearing (preserve structure, clear results) - Fix files without annotations incorrectly showing new version warnings - Preserve total_segments when clearing segmented annotations Files Modified: - frontend/src/pages/DataAnnotation/Annotate/LabelStudioTextEditor.tsx - frontend/src/pages/DataAnnotation/annotation.api.ts - runtime/datamate-python/app/db/models/annotation_management.py - runtime/datamate-python/app/module/annotation/interface/editor.py - runtime/datamate-python/app/module/annotation/schema/editor.py - runtime/datamate-python/app/module/annotation/service/editor.py New Files: - runtime/datamate-python/alembic.ini - runtime/datamate-python/alembic/env.py - runtime/datamate-python/alembic/script.py.mako - runtime/datamate-python/alembic/versions/20250205_0001_add_file_version.py
This commit is contained in:
@@ -1,98 +1,160 @@
|
||||
"""Tables of Annotation Management Module"""
|
||||
|
||||
import uuid
|
||||
from sqlalchemy import Column, String, Boolean, TIMESTAMP, Text, Integer, JSON, ForeignKey, UniqueConstraint, Index
|
||||
from sqlalchemy.sql import func
|
||||
|
||||
from app.db.session import Base
|
||||
|
||||
ANNOTATION_STATUS_ANNOTATED = "ANNOTATED"
|
||||
ANNOTATION_STATUS_NO_ANNOTATION = "NO_ANNOTATION"
|
||||
ANNOTATION_STATUS_NOT_APPLICABLE = "NOT_APPLICABLE"
|
||||
ANNOTATION_STATUS_IN_PROGRESS = "IN_PROGRESS"
|
||||
ANNOTATION_STATUS_VALUES = {
|
||||
ANNOTATION_STATUS_ANNOTATED,
|
||||
ANNOTATION_STATUS_NO_ANNOTATION,
|
||||
ANNOTATION_STATUS_NOT_APPLICABLE,
|
||||
ANNOTATION_STATUS_IN_PROGRESS,
|
||||
}
|
||||
ANNOTATION_STATUS_CLIENT_VALUES = {
|
||||
ANNOTATION_STATUS_ANNOTATED,
|
||||
ANNOTATION_STATUS_NO_ANNOTATION,
|
||||
ANNOTATION_STATUS_NOT_APPLICABLE,
|
||||
}
|
||||
|
||||
class AnnotationTemplate(Base):
|
||||
import uuid
|
||||
from sqlalchemy import (
|
||||
Column,
|
||||
String,
|
||||
Boolean,
|
||||
TIMESTAMP,
|
||||
Text,
|
||||
Integer,
|
||||
JSON,
|
||||
ForeignKey,
|
||||
UniqueConstraint,
|
||||
Index,
|
||||
BigInteger,
|
||||
)
|
||||
from sqlalchemy.sql import func
|
||||
|
||||
from app.db.session import Base
|
||||
|
||||
ANNOTATION_STATUS_ANNOTATED = "ANNOTATED"
|
||||
ANNOTATION_STATUS_NO_ANNOTATION = "NO_ANNOTATION"
|
||||
ANNOTATION_STATUS_NOT_APPLICABLE = "NOT_APPLICABLE"
|
||||
ANNOTATION_STATUS_IN_PROGRESS = "IN_PROGRESS"
|
||||
ANNOTATION_STATUS_VALUES = {
|
||||
ANNOTATION_STATUS_ANNOTATED,
|
||||
ANNOTATION_STATUS_NO_ANNOTATION,
|
||||
ANNOTATION_STATUS_NOT_APPLICABLE,
|
||||
ANNOTATION_STATUS_IN_PROGRESS,
|
||||
}
|
||||
ANNOTATION_STATUS_CLIENT_VALUES = {
|
||||
ANNOTATION_STATUS_ANNOTATED,
|
||||
ANNOTATION_STATUS_NO_ANNOTATION,
|
||||
ANNOTATION_STATUS_NOT_APPLICABLE,
|
||||
}
|
||||
|
||||
|
||||
class AnnotationTemplate(Base):
|
||||
"""标注配置模板模型"""
|
||||
|
||||
__tablename__ = "t_dm_annotation_templates"
|
||||
|
||||
id = Column(String(64), primary_key=True, default=lambda: str(uuid.uuid4()), comment="模板ID(UUID或自定义ID)")
|
||||
id = Column(
|
||||
String(64),
|
||||
primary_key=True,
|
||||
default=lambda: str(uuid.uuid4()),
|
||||
comment="模板ID(UUID或自定义ID)",
|
||||
)
|
||||
name = Column(String(100), nullable=False, comment="模板名称")
|
||||
description = Column(String(500), nullable=True, comment="模板描述")
|
||||
data_type = Column(String(50), nullable=False, comment="数据类型: image/text/audio/video/timeseries/pdf/chat/html/table")
|
||||
labeling_type = Column(String(50), nullable=False, comment="标注类型: asr/ner/object-detection/等")
|
||||
configuration = Column(JSON, nullable=True, comment="标注配置(兼容字段,主配置为 label_config)")
|
||||
label_config = Column(Text, nullable=True, comment="Label Studio XML配置(模板主配置)")
|
||||
data_type = Column(
|
||||
String(50),
|
||||
nullable=False,
|
||||
comment="数据类型: image/text/audio/video/timeseries/pdf/chat/html/table",
|
||||
)
|
||||
labeling_type = Column(
|
||||
String(50), nullable=False, comment="标注类型: asr/ner/object-detection/等"
|
||||
)
|
||||
configuration = Column(
|
||||
JSON, nullable=True, comment="标注配置(兼容字段,主配置为 label_config)"
|
||||
)
|
||||
label_config = Column(
|
||||
Text, nullable=True, comment="Label Studio XML配置(模板主配置)"
|
||||
)
|
||||
style = Column(String(32), nullable=False, comment="样式配置: horizontal/vertical")
|
||||
category = Column(String(50), default='custom', comment="模板分类: audio-speech/chat/computer-vision/nlp/等")
|
||||
category = Column(
|
||||
String(50),
|
||||
default="custom",
|
||||
comment="模板分类: audio-speech/chat/computer-vision/nlp/等",
|
||||
)
|
||||
built_in = Column(Boolean, default=False, comment="是否系统内置模板")
|
||||
version = Column(String(20), default='1.0', comment="模板版本")
|
||||
created_at = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间")
|
||||
updated_at = Column(TIMESTAMP, server_default=func.current_timestamp(), onupdate=func.current_timestamp(), comment="更新时间")
|
||||
version = Column(String(20), default="1.0", comment="模板版本")
|
||||
created_at = Column(
|
||||
TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间"
|
||||
)
|
||||
updated_at = Column(
|
||||
TIMESTAMP,
|
||||
server_default=func.current_timestamp(),
|
||||
onupdate=func.current_timestamp(),
|
||||
comment="更新时间",
|
||||
)
|
||||
deleted_at = Column(TIMESTAMP, nullable=True, comment="删除时间(软删除)")
|
||||
|
||||
|
||||
def __repr__(self):
|
||||
return f"<AnnotationTemplate(id={self.id}, name={self.name}, data_type={self.data_type})>"
|
||||
|
||||
|
||||
@property
|
||||
def is_deleted(self) -> bool:
|
||||
"""检查是否已被软删除"""
|
||||
return self.deleted_at is not None
|
||||
|
||||
class LabelingProject(Base):
|
||||
|
||||
|
||||
class LabelingProject(Base):
|
||||
"""标注项目模型"""
|
||||
|
||||
|
||||
__tablename__ = "t_dm_labeling_projects"
|
||||
|
||||
id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()), comment="UUID")
|
||||
|
||||
id = Column(
|
||||
String(36), primary_key=True, default=lambda: str(uuid.uuid4()), comment="UUID"
|
||||
)
|
||||
dataset_id = Column(String(36), nullable=False, comment="数据集ID")
|
||||
name = Column(String(100), nullable=False, comment="项目名称")
|
||||
labeling_project_id = Column(String(8), nullable=False, comment="Label Studio项目ID")
|
||||
template_id = Column(String(64), ForeignKey('t_dm_annotation_templates.id', ondelete='SET NULL'), nullable=True, comment="使用的模板ID")
|
||||
configuration = Column(JSON, nullable=True, comment="项目配置(可能包含对模板的自定义修改)")
|
||||
labeling_project_id = Column(
|
||||
String(8), nullable=False, comment="Label Studio项目ID"
|
||||
)
|
||||
template_id = Column(
|
||||
String(64),
|
||||
ForeignKey("t_dm_annotation_templates.id", ondelete="SET NULL"),
|
||||
nullable=True,
|
||||
comment="使用的模板ID",
|
||||
)
|
||||
configuration = Column(
|
||||
JSON, nullable=True, comment="项目配置(可能包含对模板的自定义修改)"
|
||||
)
|
||||
progress = Column(JSON, nullable=True, comment="项目进度信息")
|
||||
created_at = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间")
|
||||
updated_at = Column(TIMESTAMP, server_default=func.current_timestamp(), onupdate=func.current_timestamp(), comment="更新时间")
|
||||
created_at = Column(
|
||||
TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间"
|
||||
)
|
||||
updated_at = Column(
|
||||
TIMESTAMP,
|
||||
server_default=func.current_timestamp(),
|
||||
onupdate=func.current_timestamp(),
|
||||
comment="更新时间",
|
||||
)
|
||||
deleted_at = Column(TIMESTAMP, nullable=True, comment="删除时间(软删除)")
|
||||
|
||||
def __repr__(self):
|
||||
return f"<LabelingProject(id={self.id}, name={self.name}, dataset_id={self.dataset_id})>"
|
||||
|
||||
|
||||
def __repr__(self):
|
||||
return f"<LabelingProject(id={self.id}, name={self.name}, dataset_id={self.dataset_id})>"
|
||||
|
||||
@property
|
||||
def is_deleted(self) -> bool:
|
||||
"""检查是否已被软删除"""
|
||||
return self.deleted_at is not None
|
||||
|
||||
|
||||
class LabelingProjectFile(Base):
|
||||
"""标注项目文件快照模型"""
|
||||
|
||||
__tablename__ = "t_dm_labeling_project_files"
|
||||
|
||||
id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()), comment="UUID")
|
||||
project_id = Column(String(36), nullable=False, comment="标注项目ID")
|
||||
file_id = Column(String(36), nullable=False, comment="文件ID")
|
||||
created_at = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间")
|
||||
|
||||
__table_args__ = (
|
||||
UniqueConstraint("project_id", "file_id", name="uk_project_file"),
|
||||
Index("idx_project_id", "project_id"),
|
||||
Index("idx_file_id", "file_id"),
|
||||
)
|
||||
|
||||
def __repr__(self):
|
||||
return f"<LabelingProjectFile(id={self.id}, project_id={self.project_id}, file_id={self.file_id})>"
|
||||
def is_deleted(self) -> bool:
|
||||
"""检查是否已被软删除"""
|
||||
return self.deleted_at is not None
|
||||
|
||||
|
||||
class LabelingProjectFile(Base):
|
||||
"""标注项目文件快照模型"""
|
||||
|
||||
__tablename__ = "t_dm_labeling_project_files"
|
||||
|
||||
id = Column(
|
||||
String(36), primary_key=True, default=lambda: str(uuid.uuid4()), comment="UUID"
|
||||
)
|
||||
project_id = Column(String(36), nullable=False, comment="标注项目ID")
|
||||
file_id = Column(String(36), nullable=False, comment="文件ID")
|
||||
created_at = Column(
|
||||
TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间"
|
||||
)
|
||||
|
||||
__table_args__ = (
|
||||
UniqueConstraint("project_id", "file_id", name="uk_project_file"),
|
||||
Index("idx_project_id", "project_id"),
|
||||
Index("idx_file_id", "file_id"),
|
||||
)
|
||||
|
||||
def __repr__(self):
|
||||
return f"<LabelingProjectFile(id={self.id}, project_id={self.project_id}, file_id={self.file_id})>"
|
||||
|
||||
|
||||
class AnnotationResult(Base):
|
||||
@@ -100,18 +162,36 @@ class AnnotationResult(Base):
|
||||
|
||||
__tablename__ = "t_dm_annotation_results"
|
||||
|
||||
id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()), comment="UUID")
|
||||
project_id = Column(String(36), nullable=False, comment="标注项目ID(t_dm_labeling_projects.id)")
|
||||
file_id = Column(String(36), nullable=False, comment="文件ID(t_dm_dataset_files.id)")
|
||||
annotation = Column(JSON, nullable=False, comment="Label Studio annotation 原始JSON(单人单份最终结果)")
|
||||
annotation_status = Column(
|
||||
String(32),
|
||||
nullable=False,
|
||||
default=ANNOTATION_STATUS_ANNOTATED,
|
||||
comment="标注状态: ANNOTATED/NO_ANNOTATION/NOT_APPLICABLE/IN_PROGRESS",
|
||||
)
|
||||
created_at = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间")
|
||||
updated_at = Column(TIMESTAMP, server_default=func.current_timestamp(), onupdate=func.current_timestamp(), comment="更新时间")
|
||||
id = Column(
|
||||
String(36), primary_key=True, default=lambda: str(uuid.uuid4()), comment="UUID"
|
||||
)
|
||||
project_id = Column(
|
||||
String(36), nullable=False, comment="标注项目ID(t_dm_labeling_projects.id)"
|
||||
)
|
||||
file_id = Column(
|
||||
String(36), nullable=False, comment="文件ID(t_dm_dataset_files.id)"
|
||||
)
|
||||
annotation = Column(
|
||||
JSON,
|
||||
nullable=False,
|
||||
comment="Label Studio annotation 原始JSON(单人单份最终结果)",
|
||||
)
|
||||
annotation_status = Column(
|
||||
String(32),
|
||||
nullable=False,
|
||||
default=ANNOTATION_STATUS_ANNOTATED,
|
||||
comment="标注状态: ANNOTATED/NO_ANNOTATION/NOT_APPLICABLE/IN_PROGRESS",
|
||||
)
|
||||
file_version = Column(BigInteger, nullable=True, comment="标注时的文件版本号")
|
||||
created_at = Column(
|
||||
TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间"
|
||||
)
|
||||
updated_at = Column(
|
||||
TIMESTAMP,
|
||||
server_default=func.current_timestamp(),
|
||||
onupdate=func.current_timestamp(),
|
||||
comment="更新时间",
|
||||
)
|
||||
|
||||
def __repr__(self):
|
||||
return f"<AnnotationResult(id={self.id}, project_id={self.project_id}, file_id={self.file_id})>"
|
||||
@@ -122,20 +202,33 @@ class AutoAnnotationTask(Base):
|
||||
|
||||
__tablename__ = "t_dm_auto_annotation_tasks"
|
||||
|
||||
id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()), comment="UUID")
|
||||
id = Column(
|
||||
String(36), primary_key=True, default=lambda: str(uuid.uuid4()), comment="UUID"
|
||||
)
|
||||
name = Column(String(255), nullable=False, comment="任务名称")
|
||||
dataset_id = Column(String(36), nullable=False, comment="数据集ID")
|
||||
dataset_name = Column(String(255), nullable=True, comment="数据集名称(冗余字段,方便查询)")
|
||||
dataset_name = Column(
|
||||
String(255), nullable=True, comment="数据集名称(冗余字段,方便查询)"
|
||||
)
|
||||
config = Column(JSON, nullable=False, comment="任务配置(模型规模、置信度等)")
|
||||
file_ids = Column(JSON, nullable=True, comment="要处理的文件ID列表,为空则处理数据集所有图像")
|
||||
status = Column(String(50), nullable=False, default="pending", comment="任务状态: pending/running/completed/failed")
|
||||
file_ids = Column(
|
||||
JSON, nullable=True, comment="要处理的文件ID列表,为空则处理数据集所有图像"
|
||||
)
|
||||
status = Column(
|
||||
String(50),
|
||||
nullable=False,
|
||||
default="pending",
|
||||
comment="任务状态: pending/running/completed/failed",
|
||||
)
|
||||
progress = Column(Integer, default=0, comment="任务进度 0-100")
|
||||
total_images = Column(Integer, default=0, comment="总图片数")
|
||||
processed_images = Column(Integer, default=0, comment="已处理图片数")
|
||||
detected_objects = Column(Integer, default=0, comment="检测到的对象总数")
|
||||
output_path = Column(String(500), nullable=True, comment="输出路径")
|
||||
error_message = Column(Text, nullable=True, comment="错误信息")
|
||||
created_at = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间")
|
||||
created_at = Column(
|
||||
TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间"
|
||||
)
|
||||
updated_at = Column(
|
||||
TIMESTAMP,
|
||||
server_default=func.current_timestamp(),
|
||||
|
||||
Reference in New Issue
Block a user