feat(annotation): implement file version management for annotation feature

Add support for detecting new file versions and switching to them: Backend Changes: - Add file_version column to AnnotationResult model - Create Alembic migration for database schema update - Implement check_file_version() method to compare annotation and file versions - Implement use_new_version() method to clear annotations and update version - Update upsert_annotation() to record file version when saving - Add new API endpoints: GET /version and POST /use-new-version - Add FileVersionCheckResponse and UseNewVersionResponse schemas Frontend Changes: - Add checkFileVersionUsingGet and useNewVersionUsingPost API calls - Add version warning banner showing current vs latest file version - Add 'Use New Version' button with confirmation dialog - Clear version info state when switching files to avoid stale warnings Bug Fixes: - Fix previousFileVersion returning updated value (save before update) - Handle null file_version for historical data compatibility - Fix segmented annotation clearing (preserve structure, clear results) - Fix files without annotations incorrectly showing new version warnings - Preserve total_segments when clearing segmented annotations Files Modified: - frontend/src/pages/DataAnnotation/Annotate/LabelStudioTextEditor.tsx - frontend/src/pages/DataAnnotation/annotation.api.ts - runtime/datamate-python/app/db/models/annotation_management.py - runtime/datamate-python/app/module/annotation/interface/editor.py - runtime/datamate-python/app/module/annotation/schema/editor.py - runtime/datamate-python/app/module/annotation/service/editor.py New Files: - runtime/datamate-python/alembic.ini - runtime/datamate-python/alembic/env.py - runtime/datamate-python/alembic/script.py.mako - runtime/datamate-python/alembic/versions/20250205_0001_add_file_version.py
2026-02-05 20:12:07 +08:00
parent 4143bc75f9
commit f5cb265667
10 changed files with 915 additions and 171 deletions
--- a/frontend/src/pages/DataAnnotation/Annotate/LabelStudioTextEditor.tsx
+++ b/frontend/src/pages/DataAnnotation/Annotate/LabelStudioTextEditor.tsx
@@ -8,6 +8,9 @@ import {
  getEditorTaskUsingGet,
  listEditorTasksUsingGet,
  upsertEditorAnnotationUsingPut,
+  checkFileVersionUsingGet,
+  useNewVersionUsingPost,
+  type FileVersionCheckResponse,
 } from "../annotation.api";
 import { AnnotationResultStatus } from "../annotation.model";

@@ -269,6 +272,11 @@ export default function LabelStudioTextEditor() {
    return Array.from({ length: segmentTotal }, (_, index) => index);
  }, [segmentTotal]);

+  // 文件版本相关状态
+  const [fileVersionInfo, setFileVersionInfo] = useState<FileVersionCheckResponse | null>(null);
+  const [checkingFileVersion, setCheckingFileVersion] = useState(false);
+  const [usingNewVersion, setUsingNewVersion] = useState(false);
+
  const focusIframe = useCallback(() => {
    const iframe = iframeRef.current;
    if (!iframe) return;
@@ -548,6 +556,77 @@ export default function LabelStudioTextEditor() {
    }
  }, [iframeReady, message, postToIframe, project, projectId]);

+  const checkFileVersion = useCallback(async (fileId: string) => {
+    if (!projectId || !fileId) return;
+    setCheckingFileVersion(true);
+    try {
+      const resp = (await checkFileVersionUsingGet(projectId, fileId)) as ApiResponse<FileVersionCheckResponse>;
+      const data = resp?.data;
+      if (data) {
+        setFileVersionInfo(data);
+        if (data.hasNewVersion) {
+          modal.warning({
+            title: "文件有新版本",
+            content: (
+              <div className="flex flex-col gap-2">
+                <Typography.Text>
+                  文件已更新到新版本（当前版本: {data.currentFileVersion}，标注版本: {data.annotationFileVersion}）。
+                </Typography.Text>
+                <Typography.Text type="secondary">
+                  点击"使用新版本"可清空当前标注并使用最新版本的文件内容。
+                </Typography.Text>
+              </div>
+            ),
+            okText: "我知道了",
+          });
+        }
+      }
+    } catch (e) {
+      console.error("检查文件版本失败", e);
+    } finally {
+      setCheckingFileVersion(false);
+    }
+  }, [modal, message, projectId]);
+
+  const handleUseNewVersion = useCallback(async () => {
+    if (!selectedFileId) return;
+
+    modal.confirm({
+      title: "确认使用新版本",
+      content: (
+        <div className="flex flex-col gap-2">
+          <Typography.Text>
+            确认使用新版本？这将清空当前标注并使用最新版本的文件内容。
+          </Typography.Text>
+          {fileVersionInfo && (
+            <Typography.Text type="secondary">
+              当前标注版本: {fileVersionInfo.annotationFileVersion}，最新文件版本: {fileVersionInfo.currentFileVersion}
+            </Typography.Text>
+          )}
+        </div>
+      ),
+      okText: "确认",
+      okType: "danger",
+      cancelText: "取消",
+      onOk: async () => {
+        if (!projectId || !selectedFileId) return;
+        setUsingNewVersion(true);
+        try {
+          await useNewVersionUsingPost(projectId, selectedFileId);
+          message.success("已使用新版本并清空标注");
+          setFileVersionInfo(null);
+          await loadTasks({ mode: "reset" });
+          await initEditorForFile(selectedFileId);
+        } catch (e) {
+          console.error("使用新版本失败", e);
+          message.error("使用新版本失败");
+        } finally {
+          setUsingNewVersion(false);
+        }
+      },
+    });
+  }, [modal, message, projectId, selectedFileId, fileVersionInfo, loadTasks, initEditorForFile]);
+
  const advanceAfterSave = useCallback(async (fileId: string, segmentIndex?: number) => {
    if (!fileId) return;
    if (segmented && segmentTotal > 0) {
@@ -815,6 +894,13 @@ export default function LabelStudioTextEditor() {
    return () => window.removeEventListener("message", handler);
  }, [message, origin, saveFromExport]);

+  useEffect(() => {
+    if (selectedFileId && project?.supported) {
+      setFileVersionInfo(null);
+      checkFileVersion(selectedFileId);
+    }
+  }, [selectedFileId, project?.supported, checkFileVersion]);
+
  const canLoadMore = taskTotalPages > 0 && taskPage + 1 < taskTotalPages;
  const saveDisabled =
    !iframeReady || !selectedFileId || saving || loadingTaskDetail;
@@ -896,6 +982,22 @@ export default function LabelStudioTextEditor() {
          </Typography.Title>
        </div>
        <div className="flex items-center justify-center">
+          {fileVersionInfo?.hasNewVersion && (
+            <div className="flex items-center gap-2 mr-4">
+              <Typography.Text type="warning" className="text-xs">
+                ⚠ 文件有新版本（{fileVersionInfo.currentFileVersion} > {fileVersionInfo.annotationFileVersion}）
+              </Typography.Text>
+              <Button
+                size="small"
+                type="primary"
+                danger
+                loading={usingNewVersion}
+                onClick={handleUseNewVersion}
+              >
+                使用新版本
+              </Button>
+            </div>
+          )}
          <Button
            type="primary"
            icon={<SaveOutlined />}
--- a/frontend/src/pages/DataAnnotation/annotation.api.ts
+++ b/frontend/src/pages/DataAnnotation/annotation.api.ts
@@ -88,7 +88,7 @@ export function getEditorTaskSegmentUsingGet(
  return get(`/api/annotation/editor/projects/${projectId}/tasks/${fileId}/segments`, params);
 }

-export function upsertEditorAnnotationUsingPut(
+export function upsertEditorAnnotationUsingPut(
  projectId: string,
  fileId: string,
  data: {
@@ -97,8 +97,31 @@ export function upsertEditorAnnotationUsingPut(
    segmentIndex?: number;
  }
 ) {
-  return put(`/api/annotation/editor/projects/${projectId}/tasks/${fileId}/annotation`, data);
-}
+  return put(`/api/annotation/editor/projects/${projectId}/tasks/${fileId}/annotation`, data);
+}
+
+export interface FileVersionCheckResponse {
+  fileId: string;
+  currentFileVersion: number;
+  annotationFileVersion: number | null;
+  hasNewVersion: boolean;
+}
+
+export function checkFileVersionUsingGet(projectId: string, fileId: string) {
+  return get(`/api/annotation/editor/projects/${projectId}/files/${fileId}/version`);
+}
+
+export interface UseNewVersionResponse {
+  fileId: string;
+  previousFileVersion: number | null;
+  currentFileVersion: number;
+  message: string;
+}
+
+export function useNewVersionUsingPost(projectId: string, fileId: string) {
+  return post(`/api/annotation/editor/projects/${projectId}/files/${fileId}/use-new-version`, {});
+}
+

 // =====================
 // 标注数据导出
--- a/runtime/datamate-python/alembic.ini
+++ b/runtime/datamate-python/alembic.ini
@@ -0,0 +1,40 @@
+[alembic]
+script_location = alembic
+file_template = %%(year)d%%(month).2d%%(day).2d_%%(hour).2d%%(minute).2d-%%(rev)s_%%(slug)s
+prepend_sys_path = .
+
+[post_write_hooks]
+
+[loggers]
+keys = root,sqlalchemy,alembic
+
+[handlers]
+keys = console
+
+[formatters]
+keys = generic
+
+[logger_root]
+level = WARN
+handlers = console
+qualname =
+
+[logger_sqlalchemy]
+level = WARN
+handlers =
+qualname = sqlalchemy.engine
+
+[logger_alembic]
+level = INFO
+handlers =
+qualname = alembic
+
+[handler_console]
+class = StreamHandler
+args = (sys.stderr,)
+level = NOTSET
+formatter = generic
+
+[formatter_generic]
+format = %(levelname)-5.5s [%(name)s] %(message)s
+datefmt = %H:%M:%S
--- a/runtime/datamate-python/alembic/env.py
+++ b/runtime/datamate-python/alembic/env.py
@@ -0,0 +1,54 @@
+"""Alembic environment configuration"""
+
+from logging.config import fileConfig
+from sqlalchemy import engine_from_config, pool
+from alembic import context
+import sys
+import os
+
+# 添加项目路径
+sys.path.insert(0, os.path.dirname(os.path.dirname(__file__)))
+
+from app.db.session import Base
+from app.db.models import *
+
+config = context.config
+if config.config_file_name is not None:
+    fileConfig(config.config_file_name)
+
+target_metadata = Base.metadata
+
+
+def run_migrations_offline() -> None:
+    """Run migrations in 'offline' mode."""
+    url = config.get_main_option("sqlalchemy.url")
+    context.configure(
+        url=url,
+        target_metadata=target_metadata,
+        literal_binds=True,
+        dialect_opts={"paramstyle": "named"},
+    )
+
+    with context.begin_transaction():
+        context.run_migrations()
+
+
+def run_migrations_online() -> None:
+    """Run migrations in 'online' mode."""
+    connectable = engine_from_config(
+        config.get_section(config.config_ini_section, {}),
+        prefix="sqlalchemy.",
+        poolclass=pool.NullPool,
+    )
+
+    with connectable.connect() as connection:
+        context.configure(connection=connection, target_metadata=target_metadata)
+
+        with context.begin_transaction():
+            context.run_migrations()
+
+
+if context.is_offline_mode():
+    run_migrations_offline()
+else:
+    run_migrations_online()
--- a/runtime/datamate-python/alembic/script.py.mako
+++ b/runtime/datamate-python/alembic/script.py.mako
@@ -0,0 +1,24 @@
+"""${message}
+
+Revision ID: ${up_revision}
+Revises: ${down_revision | comma,n}
+Create Date: ${create_date}
+
+"""
+from alembic import op
+import sqlalchemy as sa
+${imports if imports else ""}
+
+# revision identifiers, used by Alembic.
+revision = ${repr(up_revision)}
+down_revision = ${repr(down_revision)}
+branch_labels = ${repr(branch_labels)}
+depends_on = ${repr(depends_on)}
+
+
+def upgrade() -> None:
+    ${upgrades if upgrades else "pass"}
+
+
+def downgrade() -> None:
+    ${downgrades if downgrades else "pass"}
--- a/runtime/datamate-python/alembic/versions/20250205_0001_add_file_version.py
+++ b/runtime/datamate-python/alembic/versions/20250205_0001_add_file_version.py
@@ -0,0 +1,30 @@
+"""add file_version to annotation_results
+
+Revision ID: 20250205_0001
+Revises:
+Create Date: 2025-02-05 00:00:00.000000
+
+"""
+
+from alembic import op
+import sqlalchemy as sa
+
+
+# revision identifiers, used by Alembic.
+revision = "20250205_0001"
+down_revision = None
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    op.add_column(
+        "t_dm_annotation_results",
+        sa.Column(
+            "file_version", sa.BigInteger(), nullable=True, comment="标注时的文件版本号"
+        ),
+    )
+
+
+def downgrade() -> None:
+    op.drop_column("t_dm_annotation_results", "file_version")
--- a/runtime/datamate-python/app/db/models/annotation_management.py
+++ b/runtime/datamate-python/app/db/models/annotation_management.py
@@ -1,98 +1,160 @@
 """Tables of Annotation Management Module"""

-import uuid
-from sqlalchemy import Column, String, Boolean, TIMESTAMP, Text, Integer, JSON, ForeignKey, UniqueConstraint, Index
-from sqlalchemy.sql import func
-
-from app.db.session import Base
-
-ANNOTATION_STATUS_ANNOTATED = "ANNOTATED"
-ANNOTATION_STATUS_NO_ANNOTATION = "NO_ANNOTATION"
-ANNOTATION_STATUS_NOT_APPLICABLE = "NOT_APPLICABLE"
-ANNOTATION_STATUS_IN_PROGRESS = "IN_PROGRESS"
-ANNOTATION_STATUS_VALUES = {
-    ANNOTATION_STATUS_ANNOTATED,
-    ANNOTATION_STATUS_NO_ANNOTATION,
-    ANNOTATION_STATUS_NOT_APPLICABLE,
-    ANNOTATION_STATUS_IN_PROGRESS,
-}
-ANNOTATION_STATUS_CLIENT_VALUES = {
-    ANNOTATION_STATUS_ANNOTATED,
-    ANNOTATION_STATUS_NO_ANNOTATION,
-    ANNOTATION_STATUS_NOT_APPLICABLE,
-}
-
-class AnnotationTemplate(Base):
+import uuid
+from sqlalchemy import (
+    Column,
+    String,
+    Boolean,
+    TIMESTAMP,
+    Text,
+    Integer,
+    JSON,
+    ForeignKey,
+    UniqueConstraint,
+    Index,
+    BigInteger,
+)
+from sqlalchemy.sql import func
+
+from app.db.session import Base
+
+ANNOTATION_STATUS_ANNOTATED = "ANNOTATED"
+ANNOTATION_STATUS_NO_ANNOTATION = "NO_ANNOTATION"
+ANNOTATION_STATUS_NOT_APPLICABLE = "NOT_APPLICABLE"
+ANNOTATION_STATUS_IN_PROGRESS = "IN_PROGRESS"
+ANNOTATION_STATUS_VALUES = {
+    ANNOTATION_STATUS_ANNOTATED,
+    ANNOTATION_STATUS_NO_ANNOTATION,
+    ANNOTATION_STATUS_NOT_APPLICABLE,
+    ANNOTATION_STATUS_IN_PROGRESS,
+}
+ANNOTATION_STATUS_CLIENT_VALUES = {
+    ANNOTATION_STATUS_ANNOTATED,
+    ANNOTATION_STATUS_NO_ANNOTATION,
+    ANNOTATION_STATUS_NOT_APPLICABLE,
+}
+
+
+class AnnotationTemplate(Base):
    """标注配置模板模型"""

    __tablename__ = "t_dm_annotation_templates"

-    id = Column(String(64), primary_key=True, default=lambda: str(uuid.uuid4()), comment="模板ID（UUID或自定义ID）")
+    id = Column(
+        String(64),
+        primary_key=True,
+        default=lambda: str(uuid.uuid4()),
+        comment="模板ID（UUID或自定义ID）",
+    )
    name = Column(String(100), nullable=False, comment="模板名称")
    description = Column(String(500), nullable=True, comment="模板描述")
-    data_type = Column(String(50), nullable=False, comment="数据类型: image/text/audio/video/timeseries/pdf/chat/html/table")
-    labeling_type = Column(String(50), nullable=False, comment="标注类型: asr/ner/object-detection/等")
-    configuration = Column(JSON, nullable=True, comment="标注配置（兼容字段，主配置为 label_config）")
-    label_config = Column(Text, nullable=True, comment="Label Studio XML配置（模板主配置）")
+    data_type = Column(
+        String(50),
+        nullable=False,
+        comment="数据类型: image/text/audio/video/timeseries/pdf/chat/html/table",
+    )
+    labeling_type = Column(
+        String(50), nullable=False, comment="标注类型: asr/ner/object-detection/等"
+    )
+    configuration = Column(
+        JSON, nullable=True, comment="标注配置（兼容字段，主配置为 label_config）"
+    )
+    label_config = Column(
+        Text, nullable=True, comment="Label Studio XML配置（模板主配置）"
+    )
    style = Column(String(32), nullable=False, comment="样式配置: horizontal/vertical")
-    category = Column(String(50), default='custom', comment="模板分类: audio-speech/chat/computer-vision/nlp/等")
+    category = Column(
+        String(50),
+        default="custom",
+        comment="模板分类: audio-speech/chat/computer-vision/nlp/等",
+    )
    built_in = Column(Boolean, default=False, comment="是否系统内置模板")
-    version = Column(String(20), default='1.0', comment="模板版本")
-    created_at = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间")
-    updated_at = Column(TIMESTAMP, server_default=func.current_timestamp(), onupdate=func.current_timestamp(), comment="更新时间")
+    version = Column(String(20), default="1.0", comment="模板版本")
+    created_at = Column(
+        TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间"
+    )
+    updated_at = Column(
+        TIMESTAMP,
+        server_default=func.current_timestamp(),
+        onupdate=func.current_timestamp(),
+        comment="更新时间",
+    )
    deleted_at = Column(TIMESTAMP, nullable=True, comment="删除时间（软删除）")
-    
+
    def __repr__(self):
        return f"<AnnotationTemplate(id={self.id}, name={self.name}, data_type={self.data_type})>"
-    
+
    @property
    def is_deleted(self) -> bool:
        """检查是否已被软删除"""
        return self.deleted_at is not None
-    
-class LabelingProject(Base):
+
+
+class LabelingProject(Base):
    """标注项目模型"""
-    
+
    __tablename__ = "t_dm_labeling_projects"
-    
-    id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()), comment="UUID")
+
+    id = Column(
+        String(36), primary_key=True, default=lambda: str(uuid.uuid4()), comment="UUID"
+    )
    dataset_id = Column(String(36), nullable=False, comment="数据集ID")
    name = Column(String(100), nullable=False, comment="项目名称")
-    labeling_project_id = Column(String(8), nullable=False, comment="Label Studio项目ID")
-    template_id = Column(String(64), ForeignKey('t_dm_annotation_templates.id', ondelete='SET NULL'), nullable=True, comment="使用的模板ID")
-    configuration = Column(JSON, nullable=True, comment="项目配置（可能包含对模板的自定义修改）")
+    labeling_project_id = Column(
+        String(8), nullable=False, comment="Label Studio项目ID"
+    )
+    template_id = Column(
+        String(64),
+        ForeignKey("t_dm_annotation_templates.id", ondelete="SET NULL"),
+        nullable=True,
+        comment="使用的模板ID",
+    )
+    configuration = Column(
+        JSON, nullable=True, comment="项目配置（可能包含对模板的自定义修改）"
+    )
    progress = Column(JSON, nullable=True, comment="项目进度信息")
-    created_at = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间")
-    updated_at = Column(TIMESTAMP, server_default=func.current_timestamp(), onupdate=func.current_timestamp(), comment="更新时间")
+    created_at = Column(
+        TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间"
+    )
+    updated_at = Column(
+        TIMESTAMP,
+        server_default=func.current_timestamp(),
+        onupdate=func.current_timestamp(),
+        comment="更新时间",
+    )
    deleted_at = Column(TIMESTAMP, nullable=True, comment="删除时间（软删除）")
-    
-    def __repr__(self):
-        return f"<LabelingProject(id={self.id}, name={self.name}, dataset_id={self.dataset_id})>"
-    
+
+    def __repr__(self):
+        return f"<LabelingProject(id={self.id}, name={self.name}, dataset_id={self.dataset_id})>"
+
    @property
-    def is_deleted(self) -> bool:
-        """检查是否已被软删除"""
-        return self.deleted_at is not None
-
-
-class LabelingProjectFile(Base):
-    """标注项目文件快照模型"""
-
-    __tablename__ = "t_dm_labeling_project_files"
-
-    id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()), comment="UUID")
-    project_id = Column(String(36), nullable=False, comment="标注项目ID")
-    file_id = Column(String(36), nullable=False, comment="文件ID")
-    created_at = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间")
-
-    __table_args__ = (
-        UniqueConstraint("project_id", "file_id", name="uk_project_file"),
-        Index("idx_project_id", "project_id"),
-        Index("idx_file_id", "file_id"),
-    )
-
-    def __repr__(self):
-        return f"<LabelingProjectFile(id={self.id}, project_id={self.project_id}, file_id={self.file_id})>"
+    def is_deleted(self) -> bool:
+        """检查是否已被软删除"""
+        return self.deleted_at is not None
+
+
+class LabelingProjectFile(Base):
+    """标注项目文件快照模型"""
+
+    __tablename__ = "t_dm_labeling_project_files"
+
+    id = Column(
+        String(36), primary_key=True, default=lambda: str(uuid.uuid4()), comment="UUID"
+    )
+    project_id = Column(String(36), nullable=False, comment="标注项目ID")
+    file_id = Column(String(36), nullable=False, comment="文件ID")
+    created_at = Column(
+        TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间"
+    )
+
+    __table_args__ = (
+        UniqueConstraint("project_id", "file_id", name="uk_project_file"),
+        Index("idx_project_id", "project_id"),
+        Index("idx_file_id", "file_id"),
+    )
+
+    def __repr__(self):
+        return f"<LabelingProjectFile(id={self.id}, project_id={self.project_id}, file_id={self.file_id})>"


 class AnnotationResult(Base):
@@ -100,18 +162,36 @@ class AnnotationResult(Base):

    __tablename__ = "t_dm_annotation_results"

-    id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()), comment="UUID")
-    project_id = Column(String(36), nullable=False, comment="标注项目ID（t_dm_labeling_projects.id）")
-    file_id = Column(String(36), nullable=False, comment="文件ID（t_dm_dataset_files.id）")
-    annotation = Column(JSON, nullable=False, comment="Label Studio annotation 原始JSON（单人单份最终结果）")
-    annotation_status = Column(
-        String(32),
-        nullable=False,
-        default=ANNOTATION_STATUS_ANNOTATED,
-        comment="标注状态: ANNOTATED/NO_ANNOTATION/NOT_APPLICABLE/IN_PROGRESS",
-    )
-    created_at = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间")
-    updated_at = Column(TIMESTAMP, server_default=func.current_timestamp(), onupdate=func.current_timestamp(), comment="更新时间")
+    id = Column(
+        String(36), primary_key=True, default=lambda: str(uuid.uuid4()), comment="UUID"
+    )
+    project_id = Column(
+        String(36), nullable=False, comment="标注项目ID（t_dm_labeling_projects.id）"
+    )
+    file_id = Column(
+        String(36), nullable=False, comment="文件ID（t_dm_dataset_files.id）"
+    )
+    annotation = Column(
+        JSON,
+        nullable=False,
+        comment="Label Studio annotation 原始JSON（单人单份最终结果）",
+    )
+    annotation_status = Column(
+        String(32),
+        nullable=False,
+        default=ANNOTATION_STATUS_ANNOTATED,
+        comment="标注状态: ANNOTATED/NO_ANNOTATION/NOT_APPLICABLE/IN_PROGRESS",
+    )
+    file_version = Column(BigInteger, nullable=True, comment="标注时的文件版本号")
+    created_at = Column(
+        TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间"
+    )
+    updated_at = Column(
+        TIMESTAMP,
+        server_default=func.current_timestamp(),
+        onupdate=func.current_timestamp(),
+        comment="更新时间",
+    )

    def __repr__(self):
        return f"<AnnotationResult(id={self.id}, project_id={self.project_id}, file_id={self.file_id})>"
@@ -122,20 +202,33 @@ class AutoAnnotationTask(Base):

    __tablename__ = "t_dm_auto_annotation_tasks"

-    id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()), comment="UUID")
+    id = Column(
+        String(36), primary_key=True, default=lambda: str(uuid.uuid4()), comment="UUID"
+    )
    name = Column(String(255), nullable=False, comment="任务名称")
    dataset_id = Column(String(36), nullable=False, comment="数据集ID")
-    dataset_name = Column(String(255), nullable=True, comment="数据集名称（冗余字段，方便查询）")
+    dataset_name = Column(
+        String(255), nullable=True, comment="数据集名称（冗余字段，方便查询）"
+    )
    config = Column(JSON, nullable=False, comment="任务配置（模型规模、置信度等）")
-    file_ids = Column(JSON, nullable=True, comment="要处理的文件ID列表，为空则处理数据集所有图像")
-    status = Column(String(50), nullable=False, default="pending", comment="任务状态: pending/running/completed/failed")
+    file_ids = Column(
+        JSON, nullable=True, comment="要处理的文件ID列表，为空则处理数据集所有图像"
+    )
+    status = Column(
+        String(50),
+        nullable=False,
+        default="pending",
+        comment="任务状态: pending/running/completed/failed",
+    )
    progress = Column(Integer, default=0, comment="任务进度 0-100")
    total_images = Column(Integer, default=0, comment="总图片数")
    processed_images = Column(Integer, default=0, comment="已处理图片数")
    detected_objects = Column(Integer, default=0, comment="检测到的对象总数")
    output_path = Column(String(500), nullable=True, comment="输出路径")
    error_message = Column(Text, nullable=True, comment="错误信息")
-    created_at = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间")
+    created_at = Column(
+        TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间"
+    )
    updated_at = Column(
        TIMESTAMP,
        server_default=func.current_timestamp(),
--- a/runtime/datamate-python/app/module/annotation/interface/editor.py
+++ b/runtime/datamate-python/app/module/annotation/interface/editor.py
@@ -21,6 +21,8 @@ from app.module.annotation.schema.editor import (
    EditorTaskListResponse,
    EditorTaskSegmentResponse,
    EditorTaskResponse,
+    FileVersionCheckResponse,
+    UseNewVersionResponse,
    UpsertAnnotationRequest,
    UpsertAnnotationResponse,
 )
@@ -80,7 +82,9 @@ async def list_editor_tasks(
 async def get_editor_task(
    project_id: str = Path(..., description="标注项目ID（t_dm_labeling_projects.id）"),
    file_id: str = Path(..., description="文件ID（t_dm_dataset_files.id）"),
-    segment_index: Optional[int] = Query(None, alias="segmentIndex", description="段落索引（分段模式下使用）"),
+    segment_index: Optional[int] = Query(
+        None, alias="segmentIndex", description="段落索引（分段模式下使用）"
+    ),
    db: AsyncSession = Depends(get_db),
 ):
    service = AnnotationEditorService(db)
@@ -95,7 +99,9 @@ async def get_editor_task(
 async def get_editor_task_segment(
    project_id: str = Path(..., description="标注项目ID（t_dm_labeling_projects.id）"),
    file_id: str = Path(..., description="文件ID（t_dm_dataset_files.id）"),
-    segment_index: int = Query(..., ge=0, alias="segmentIndex", description="段落索引（从0开始）"),
+    segment_index: int = Query(
+        ..., ge=0, alias="segmentIndex", description="段落索引（从0开始）"
+    ),
    db: AsyncSession = Depends(get_db),
 ):
    service = AnnotationEditorService(db)
@@ -117,3 +123,36 @@ async def upsert_editor_annotation(
    result = await service.upsert_annotation(project_id, file_id, request)
    return StandardResponse(code=200, message="success", data=result)

+
+@router.get(
+    "/projects/{project_id}/files/{file_id}/version",
+    response_model=StandardResponse[FileVersionCheckResponse],
+)
+async def check_file_version(
+    project_id: str = Path(..., description="标注项目ID（t_dm_labeling_projects.id）"),
+    file_id: str = Path(..., description="文件ID（t_dm_dataset_files.id）"),
+    db: AsyncSession = Depends(get_db),
+):
+    """
+    检查文件是否有新版本
+    """
+    service = AnnotationEditorService(db)
+    result = await service.check_file_version(project_id, file_id)
+    return StandardResponse(code=200, message="success", data=result)
+
+
+@router.post(
+    "/projects/{project_id}/files/{file_id}/use-new-version",
+    response_model=StandardResponse[UseNewVersionResponse],
+)
+async def use_new_version(
+    project_id: str = Path(..., description="标注项目ID（t_dm_labeling_projects.id）"),
+    file_id: str = Path(..., description="文件ID（t_dm_dataset_files.id）"),
+    db: AsyncSession = Depends(get_db),
+):
+    """
+    使用文件新版本并清空标注
+    """
+    service = AnnotationEditorService(db)
+    result = await service.use_new_version(project_id, file_id)
+    return StandardResponse(code=200, message="success", data=result)
--- a/runtime/datamate-python/app/module/annotation/schema/editor.py
+++ b/runtime/datamate-python/app/module/annotation/schema/editor.py
@@ -34,17 +34,29 @@ class AnnotationStatus(str, Enum):
 class EditorProjectInfo(BaseModel):
    """编辑器项目元信息"""

-    project_id: str = Field(..., alias="projectId", description="DataMate 标注项目ID（t_dm_labeling_projects.id）")
-    dataset_id: str = Field(..., alias="datasetId", description="数据集ID（t_dm_datasets.id）")
+    project_id: str = Field(
+        ...,
+        alias="projectId",
+        description="DataMate 标注项目ID（t_dm_labeling_projects.id）",
+    )
+    dataset_id: str = Field(
+        ..., alias="datasetId", description="数据集ID（t_dm_datasets.id）"
+    )
    dataset_type: Optional[str] = Field(
        None,
        alias="datasetType",
        description="数据集类型（TEXT/IMAGE/AUDIO/VIDEO 等）",
    )
-    template_id: Optional[str] = Field(None, alias="templateId", description="模板ID（t_dm_annotation_templates.id）")
-    label_config: Optional[str] = Field(None, alias="labelConfig", description="Label Studio XML 配置")
+    template_id: Optional[str] = Field(
+        None, alias="templateId", description="模板ID（t_dm_annotation_templates.id）"
+    )
+    label_config: Optional[str] = Field(
+        None, alias="labelConfig", description="Label Studio XML 配置"
+    )
    supported: bool = Field(..., description="当前数据类型是否支持内嵌编辑器")
-    unsupported_reason: Optional[str] = Field(None, alias="unsupportedReason", description="不支持原因（当 supported=false）")
+    unsupported_reason: Optional[str] = Field(
+        None, alias="unsupportedReason", description="不支持原因（当 supported=false）"
+    )

    model_config = ConfigDict(populate_by_name=True)

@@ -55,8 +67,12 @@ class EditorTaskListItem(BaseModel):
    file_id: str = Field(..., alias="fileId", description="文件ID")
    file_name: str = Field(..., alias="fileName", description="文件名")
    file_type: Optional[str] = Field(None, alias="fileType", description="文件类型")
-    has_annotation: bool = Field(..., alias="hasAnnotation", description="是否已有最终标注")
-    annotation_updated_at: Optional[datetime] = Field(None, alias="annotationUpdatedAt", description="标注更新时间")
+    has_annotation: bool = Field(
+        ..., alias="hasAnnotation", description="是否已有最终标注"
+    )
+    annotation_updated_at: Optional[datetime] = Field(
+        None, alias="annotationUpdatedAt", description="标注更新时间"
+    )
    annotation_status: Optional[AnnotationStatus] = Field(
        None,
        alias="annotationStatus",
@@ -82,9 +98,13 @@ class SegmentInfo(BaseModel):
    """段落摘要（用于文本分段标注）"""

    idx: int = Field(..., description="段落索引")
-    has_annotation: bool = Field(False, alias="hasAnnotation", description="该段落是否已有标注")
+    has_annotation: bool = Field(
+        False, alias="hasAnnotation", description="该段落是否已有标注"
+    )
    line_index: int = Field(0, alias="lineIndex", description="JSONL 行索引（从0开始）")
-    chunk_index: int = Field(0, alias="chunkIndex", description="行内分片索引（从0开始）")
+    chunk_index: int = Field(
+        0, alias="chunkIndex", description="行内分片索引（从0开始）"
+    )

    model_config = ConfigDict(populate_by_name=True)

@@ -93,12 +113,16 @@ class EditorTaskResponse(BaseModel):
    """编辑器任务详情（可直接喂给 Label Studio Editor 的 task 对象）"""

    task: Dict[str, Any] = Field(..., description="Label Studio task 对象")
-    annotation_updated_at: Optional[datetime] = Field(None, alias="annotationUpdatedAt", description="标注更新时间")
+    annotation_updated_at: Optional[datetime] = Field(
+        None, alias="annotationUpdatedAt", description="标注更新时间"
+    )

    # 分段相关字段
    segmented: bool = Field(False, description="是否启用分段模式")
    total_segments: int = Field(0, alias="totalSegments", description="总段落数")
-    current_segment_index: int = Field(0, alias="currentSegmentIndex", description="当前段落索引")
+    current_segment_index: int = Field(
+        0, alias="currentSegmentIndex", description="当前段落索引"
+    )

    model_config = ConfigDict(populate_by_name=True)

@@ -108,9 +132,13 @@ class SegmentDetail(BaseModel):

    idx: int = Field(..., description="段落索引")
    text: str = Field(..., description="段落文本")
-    has_annotation: bool = Field(False, alias="hasAnnotation", description="该段落是否已有标注")
+    has_annotation: bool = Field(
+        False, alias="hasAnnotation", description="该段落是否已有标注"
+    )
    line_index: int = Field(0, alias="lineIndex", description="JSONL 行索引（从0开始）")
-    chunk_index: int = Field(0, alias="chunkIndex", description="行内分片索引（从0开始）")
+    chunk_index: int = Field(
+        0, alias="chunkIndex", description="行内分片索引（从0开始）"
+    )

    model_config = ConfigDict(populate_by_name=True)

@@ -121,7 +149,9 @@ class EditorTaskSegmentResponse(BaseModel):
    segmented: bool = Field(False, description="是否启用分段模式")
    segment: Optional[SegmentDetail] = Field(None, description="段落内容")
    total_segments: int = Field(0, alias="totalSegments", description="总段落数")
-    current_segment_index: int = Field(0, alias="currentSegmentIndex", description="当前段落索引")
+    current_segment_index: int = Field(
+        0, alias="currentSegmentIndex", description="当前段落索引"
+    )

    model_config = ConfigDict(populate_by_name=True)

@@ -129,7 +159,9 @@ class EditorTaskSegmentResponse(BaseModel):
 class UpsertAnnotationRequest(BaseModel):
    """保存/覆盖最终标注（Label Studio annotation 原始对象）"""

-    annotation: Dict[str, Any] = Field(..., description="Label Studio annotation 对象（包含 result 等）")
+    annotation: Dict[str, Any] = Field(
+        ..., description="Label Studio annotation 对象（包含 result 等）"
+    )
    annotation_status: Optional[AnnotationStatus] = Field(
        None,
        alias="annotationStatus",
@@ -153,8 +185,43 @@ class UpsertAnnotationRequest(BaseModel):
 class UpsertAnnotationResponse(BaseModel):
    """保存/覆盖最终标注响应"""

-    annotation_id: str = Field(..., alias="annotationId", description="标注结果ID（t_dm_annotation_results.id）")
+    annotation_id: str = Field(
+        ...,
+        alias="annotationId",
+        description="标注结果ID（t_dm_annotation_results.id）",
+    )
    updated_at: datetime = Field(..., alias="updatedAt", description="标注更新时间")

    model_config = ConfigDict(populate_by_name=True)

+
+class FileVersionCheckResponse(BaseModel):
+    """文件版本检查响应"""
+
+    file_id: str = Field(..., alias="fileId", description="文件ID")
+    current_file_version: int = Field(
+        ..., alias="currentFileVersion", description="当前文件版本"
+    )
+    annotation_file_version: Optional[int] = Field(
+        None, alias="annotationFileVersion", description="标注时的文件版本"
+    )
+    has_new_version: bool = Field(
+        ..., alias="hasNewVersion", description="是否有新版本"
+    )
+
+    model_config = ConfigDict(populate_by_name=True)
+
+
+class UseNewVersionResponse(BaseModel):
+    """使用新版本响应"""
+
+    file_id: str = Field(..., alias="fileId", description="文件ID")
+    previous_file_version: Optional[int] = Field(
+        None, alias="previousFileVersion", description="之前标注的文件版本"
+    )
+    current_file_version: int = Field(
+        ..., alias="currentFileVersion", description="当前文件版本"
+    )
+    message: str = Field(..., description="操作结果消息")
+
+    model_config = ConfigDict(populate_by_name=True)
--- a/runtime/datamate-python/app/module/annotation/service/editor.py
+++ b/runtime/datamate-python/app/module/annotation/service/editor.py
@@ -23,7 +23,13 @@ from sqlalchemy.ext.asyncio import AsyncSession

 from app.core.config import settings
 from app.core.logging import get_logger
-from app.db.models import AnnotationResult, Dataset, DatasetFiles, LabelingProject, LabelingProjectFile
+from app.db.models import (
+    AnnotationResult,
+    Dataset,
+    DatasetFiles,
+    LabelingProject,
+    LabelingProjectFile,
+)
 from app.db.models.annotation_management import (
    ANNOTATION_STATUS_ANNOTATED,
    ANNOTATION_STATUS_IN_PROGRESS,
@@ -45,8 +51,12 @@ from app.module.annotation.schema.editor import (
 )
 from app.module.annotation.service.template import AnnotationTemplateService
 from app.module.annotation.service.knowledge_sync import KnowledgeSyncService
-from app.module.annotation.service.annotation_text_splitter import AnnotationTextSplitter
-from app.module.annotation.service.text_fetcher import fetch_text_content_via_download_api
+from app.module.annotation.service.annotation_text_splitter import (
+    AnnotationTextSplitter,
+)
+from app.module.annotation.service.text_fetcher import (
+    fetch_text_content_via_download_api,
+)

 logger = get_logger(__name__)

@@ -169,7 +179,9 @@ class AnnotationEditorService:
        template = await self.template_service.get_template(self.db, template_id)
        return getattr(template, "label_config", None) if template else None

-    async def _resolve_project_label_config(self, project: LabelingProject) -> Optional[str]:
+    async def _resolve_project_label_config(
+        self, project: LabelingProject
+    ) -> Optional[str]:
        label_config = None
        if project.configuration and isinstance(project.configuration, dict):
            label_config = project.configuration.get("label_config")
@@ -210,7 +222,9 @@ class AnnotationEditorService:
        if not label_config:
            return [default_key]
        target_categories = categories or set()
-        keys = cls._extract_object_value_keys_by_category(label_config, target_categories)
+        keys = cls._extract_object_value_keys_by_category(
+            label_config, target_categories
+        )
        if not keys:
            return [default_key]
        return keys
@@ -231,7 +245,9 @@ class AnnotationEditorService:
        return parsed if isinstance(parsed, dict) else None

    @classmethod
-    def _parse_jsonl_records(cls, text_content: str) -> List[Tuple[Optional[Dict[str, Any]], str]]:
+    def _parse_jsonl_records(
+        cls, text_content: str
+    ) -> List[Tuple[Optional[Dict[str, Any]], str]]:
        lines = [line for line in text_content.splitlines() if line.strip()]
        records: List[Tuple[Optional[Dict[str, Any]], str]] = []
        for line in lines:
@@ -277,7 +293,9 @@ class AnnotationEditorService:

    @classmethod
    def _extract_textual_value_keys(cls, label_config: str) -> List[str]:
-        return cls._extract_object_value_keys_by_category(label_config, TEXTUAL_OBJECT_CATEGORIES)
+        return cls._extract_object_value_keys_by_category(
+            label_config, TEXTUAL_OBJECT_CATEGORIES
+        )

    @staticmethod
    def _needs_placeholder(value: Any) -> bool:
@@ -287,7 +305,9 @@ class AnnotationEditorService:
            return True
        return False

-    def _apply_text_placeholders(self, data: Dict[str, Any], label_config: Optional[str]) -> None:
+    def _apply_text_placeholders(
+        self, data: Dict[str, Any], label_config: Optional[str]
+    ) -> None:
        if not label_config:
            return
        for key in self._extract_textual_value_keys(label_config):
@@ -346,7 +366,9 @@ class AnnotationEditorService:

                if i > 0:
                    prev = children[i - 1]
-                    if prev.tag == "Header" and self._header_already_present(prev, obj_name):
+                    if prev.tag == "Header" and self._header_already_present(
+                        prev, obj_name
+                    ):
                        i += 1
                        continue

@@ -362,7 +384,9 @@ class AnnotationEditorService:
        return ET.tostring(root, encoding="unicode")

    @staticmethod
-    def _extract_segment_annotations(payload: Optional[Dict[str, Any]]) -> Dict[str, Dict[str, Any]]:
+    def _extract_segment_annotations(
+        payload: Optional[Dict[str, Any]],
+    ) -> Dict[str, Dict[str, Any]]:
        if not payload or not isinstance(payload, dict):
            return {}
        segments = payload.get(SEGMENTS_KEY)
@@ -440,13 +464,17 @@ class AnnotationEditorService:
        file_record: DatasetFiles,
        file_id: str,
    ) -> Optional[int]:
-        dataset_type = self._normalize_dataset_type(await self._get_dataset_type(project.dataset_id))
+        dataset_type = self._normalize_dataset_type(
+            await self._get_dataset_type(project.dataset_id)
+        )
        if dataset_type != DATASET_TYPE_TEXT:
            return None
        if not self._resolve_segmentation_enabled(project):
            return None

-        text_content = await self._fetch_text_content_via_download_api(project.dataset_id, file_id)
+        text_content = await self._fetch_text_content_via_download_api(
+            project.dataset_id, file_id
+        )
        if not isinstance(text_content, str):
            return None

@@ -495,7 +523,9 @@ class AnnotationEditorService:
        file_type_lower = func.lower(DatasetFiles.file_type)
        file_name_lower = func.lower(DatasetFiles.file_name)
        type_condition = file_type_lower.in_(SOURCE_DOCUMENT_TYPES)
-        name_conditions = [file_name_lower.like(f"%{ext}") for ext in SOURCE_DOCUMENT_EXTENSIONS]
+        name_conditions = [
+            file_name_lower.like(f"%{ext}") for ext in SOURCE_DOCUMENT_EXTENSIONS
+        ]
        return or_(type_condition, *name_conditions)

    def _build_task_data(
@@ -545,13 +575,17 @@ class AnnotationEditorService:
        records: List[Tuple[Optional[Dict[str, Any]], str]],
        record_texts: List[str],
        segment_annotation_keys: set[str],
-    ) -> Tuple[List[SegmentInfo], List[Tuple[Optional[Dict[str, Any]], str, str, int, int]]]:
+    ) -> Tuple[
+        List[SegmentInfo], List[Tuple[Optional[Dict[str, Any]], str, str, int, int]]
+    ]:
        splitter = AnnotationTextSplitter(max_chars=self.SEGMENT_THRESHOLD)
        segments: List[SegmentInfo] = []
        segment_contexts: List[Tuple[Optional[Dict[str, Any]], str, str, int, int]] = []
        segment_cursor = 0

-        for record_index, ((payload, raw_text), record_text) in enumerate(zip(records, record_texts)):
+        for record_index, ((payload, raw_text), record_text) in enumerate(
+            zip(records, record_texts)
+        ):
            normalized_text = record_text or ""
            if len(normalized_text) > self.SEGMENT_THRESHOLD:
                raw_segments = splitter.split(normalized_text)
@@ -559,12 +593,15 @@ class AnnotationEditorService:
                    segments.append(
                        SegmentInfo(
                            idx=segment_cursor,
-                            hasAnnotation=str(segment_cursor) in segment_annotation_keys,
+                            hasAnnotation=str(segment_cursor)
+                            in segment_annotation_keys,
                            lineIndex=record_index,
                            chunkIndex=chunk_index,
                        )
                    )
-                    segment_contexts.append((payload, raw_text, seg["text"], record_index, chunk_index))
+                    segment_contexts.append(
+                        (payload, raw_text, seg["text"], record_index, chunk_index)
+                    )
                    segment_cursor += 1
            else:
                segments.append(
@@ -575,11 +612,15 @@ class AnnotationEditorService:
                        chunkIndex=0,
                    )
                )
-                segment_contexts.append((payload, raw_text, normalized_text, record_index, 0))
+                segment_contexts.append(
+                    (payload, raw_text, normalized_text, record_index, 0)
+                )
                segment_cursor += 1

        if not segments:
-            segments = [SegmentInfo(idx=0, hasAnnotation=False, lineIndex=0, chunkIndex=0)]
+            segments = [
+                SegmentInfo(idx=0, hasAnnotation=False, lineIndex=0, chunkIndex=0)
+            ]
            segment_contexts = [(None, "", "", 0, 0)]

        return segments, segment_contexts
@@ -587,7 +628,9 @@ class AnnotationEditorService:
    async def get_project_info(self, project_id: str) -> EditorProjectInfo:
        project = await self._get_project_or_404(project_id)

-        dataset_type = self._normalize_dataset_type(await self._get_dataset_type(project.dataset_id))
+        dataset_type = self._normalize_dataset_type(
+            await self._get_dataset_type(project.dataset_id)
+        )
        supported = dataset_type in SUPPORTED_EDITOR_DATASET_TYPES
        unsupported_reason = None
        if not supported:
@@ -653,7 +696,12 @@ class AnnotationEditorService:
        rows = files_result.all()

        items: List[EditorTaskListItem] = []
-        for file_record, annotation_id, annotation_updated_at, annotation_status in rows:
+        for (
+            file_record,
+            annotation_id,
+            annotation_updated_at,
+            annotation_status,
+        ) in rows:
            fid = str(file_record.id)  # type: ignore[arg-type]
            items.append(
                EditorTaskListItem(
@@ -675,7 +723,9 @@ class AnnotationEditorService:
            size=size,
        )

-    async def _fetch_text_content_via_download_api(self, dataset_id: str, file_id: str) -> str:
+    async def _fetch_text_content_via_download_api(
+        self, dataset_id: str, file_id: str
+    ) -> str:
        return await fetch_text_content_via_download_api(dataset_id, file_id)

    async def get_task(
@@ -686,7 +736,9 @@ class AnnotationEditorService:
    ) -> EditorTaskResponse:
        project = await self._get_project_or_404(project_id)

-        dataset_type = self._normalize_dataset_type(await self._get_dataset_type(project.dataset_id))
+        dataset_type = self._normalize_dataset_type(
+            await self._get_dataset_type(project.dataset_id)
+        )
        if dataset_type not in SUPPORTED_EDITOR_DATASET_TYPES:
            raise HTTPException(
                status_code=400,
@@ -701,7 +753,9 @@ class AnnotationEditorService:
        )
        file_record = file_result.scalar_one_or_none()
        if not file_record:
-            raise HTTPException(status_code=404, detail=f"文件不存在或不属于该项目: {file_id}")
+            raise HTTPException(
+                status_code=404, detail=f"文件不存在或不属于该项目: {file_id}"
+            )

        if dataset_type == DATASET_TYPE_IMAGE:
            return await self._build_image_task(project, file_record, file_id)
@@ -722,7 +776,9 @@ class AnnotationEditorService:
    ) -> EditorTaskSegmentResponse:
        project = await self._get_project_or_404(project_id)

-        dataset_type = self._normalize_dataset_type(await self._get_dataset_type(project.dataset_id))
+        dataset_type = self._normalize_dataset_type(
+            await self._get_dataset_type(project.dataset_id)
+        )
        if dataset_type != DATASET_TYPE_TEXT:
            raise HTTPException(
                status_code=400,
@@ -737,7 +793,9 @@ class AnnotationEditorService:
        )
        file_record = file_result.scalar_one_or_none()
        if not file_record:
-            raise HTTPException(status_code=404, detail=f"文件不存在或不属于该项目: {file_id}")
+            raise HTTPException(
+                status_code=404, detail=f"文件不存在或不属于该项目: {file_id}"
+            )

        if not self._resolve_segmentation_enabled(project):
            return EditorTaskSegmentResponse(
@@ -747,7 +805,9 @@ class AnnotationEditorService:
                currentSegmentIndex=0,
            )

-        text_content = await self._fetch_text_content_via_download_api(project.dataset_id, file_id)
+        text_content = await self._fetch_text_content_via_download_api(
+            project.dataset_id, file_id
+        )
        assert isinstance(text_content, str)
        label_config = await self._resolve_project_label_config(project)
        primary_text_key = self._resolve_primary_text_key(label_config)
@@ -839,7 +899,9 @@ class AnnotationEditorService:
        file_id: str,
        segment_index: Optional[int],
    ) -> EditorTaskResponse:
-        text_content = await self._fetch_text_content_via_download_api(project.dataset_id, file_id)
+        text_content = await self._fetch_text_content_via_download_api(
+            project.dataset_id, file_id
+        )
        assert isinstance(text_content, str)
        label_config = await self._resolve_project_label_config(project)
        primary_text_key = self._resolve_primary_text_key(label_config)
@@ -885,7 +947,8 @@ class AnnotationEditorService:
        if not segmentation_enabled:
            segment_index = None
        needs_segmentation = segmentation_enabled and (
-            len(records) > 1 or any(len(text or "") > self.SEGMENT_THRESHOLD for text in record_texts)
+            len(records) > 1
+            or any(len(text or "") > self.SEGMENT_THRESHOLD for text in record_texts)
        )
        segments: List[SegmentInfo] = []
        segment_contexts: List[Tuple[Optional[Dict[str, Any]], str, str, int, int]] = []
@@ -903,10 +966,14 @@ class AnnotationEditorService:
                segment_annotation_keys,
            )
            current_segment_index = segment_index if segment_index is not None else 0
-            if current_segment_index < 0 or current_segment_index >= len(segment_contexts):
+            if current_segment_index < 0 or current_segment_index >= len(
+                segment_contexts
+            ):
                current_segment_index = 0

-            selected_payload, _, display_text, _, _ = segment_contexts[current_segment_index]
+            selected_payload, _, display_text, _, _ = segment_contexts[
+                current_segment_index
+            ]

        # 构造 task 对象
        task_data = self._build_task_data(
@@ -936,11 +1003,16 @@ class AnnotationEditorService:
                # 分段模式：获取当前段落的标注
                seg_ann = segment_annotations.get(str(current_segment_index), {})
                stored = {
-                    "id": self._make_ls_annotation_id(project.id, file_id) + current_segment_index,
+                    "id": self._make_ls_annotation_id(project.id, file_id)
+                    + current_segment_index,
                    "task": ls_task_id,
                    "result": seg_ann.get(SEGMENT_RESULT_KEY, []),
-                    "created_at": seg_ann.get(SEGMENT_CREATED_AT_KEY, datetime.utcnow().isoformat() + "Z"),
-                    "updated_at": seg_ann.get(SEGMENT_UPDATED_AT_KEY, datetime.utcnow().isoformat() + "Z"),
+                    "created_at": seg_ann.get(
+                        SEGMENT_CREATED_AT_KEY, datetime.utcnow().isoformat() + "Z"
+                    ),
+                    "updated_at": seg_ann.get(
+                        SEGMENT_UPDATED_AT_KEY, datetime.utcnow().isoformat() + "Z"
+                    ),
                }
                task["annotations"] = [stored]
            elif not needs_segmentation and not has_segmented_annotation:
@@ -952,7 +1024,10 @@ class AnnotationEditorService:
                task["annotations"] = [stored]
            else:
                # 首次从非分段切换到分段：提供空标注
-                empty_ann_id = self._make_ls_annotation_id(project.id, file_id) + current_segment_index
+                empty_ann_id = (
+                    self._make_ls_annotation_id(project.id, file_id)
+                    + current_segment_index
+                )
                task["annotations"] = [
                    {
                        "id": empty_ann_id,
@@ -994,7 +1069,9 @@ class AnnotationEditorService:
        categories: set[str],
    ) -> EditorTaskResponse:
        label_config = await self._resolve_project_label_config(project)
-        media_keys = self._resolve_media_value_keys(label_config, default_key, categories)
+        media_keys = self._resolve_media_value_keys(
+            label_config, default_key, categories
+        )
        preview_url = self._build_file_preview_url(project.dataset_id, file_id)
        file_name = str(getattr(file_record, "file_name", ""))

@@ -1097,7 +1174,9 @@ class AnnotationEditorService:
            categories=MEDIA_OBJECT_CATEGORIES,
        )

-    async def upsert_annotation(self, project_id: str, file_id: str, request: UpsertAnnotationRequest) -> UpsertAnnotationResponse:
+    async def upsert_annotation(
+        self, project_id: str, file_id: str, request: UpsertAnnotationRequest
+    ) -> UpsertAnnotationResponse:
        project = await self._get_project_or_404(project_id)

        # 校验文件归属
@@ -1112,7 +1191,26 @@ class AnnotationEditorService:
        )
        file_record = file_result.scalar_one_or_none()
        if not file_record:
-            raise HTTPException(status_code=404, detail=f"文件不存在或不属于该项目: {file_id}")
+            raise HTTPException(
+                status_code=404, detail=f"文件不存在或不属于该项目: {file_id}"
+            )
+
+        # 检查文件版本是否变化
+        current_file_version = file_record.version
+        existing_result = await self.db.execute(
+            select(AnnotationResult).where(
+                AnnotationResult.project_id == project_id,
+                AnnotationResult.file_id == file_id,
+            )
+        )
+        existing_annotation = existing_result.scalar_one_or_none()
+
+        if existing_annotation and existing_annotation.file_version is not None:
+            if existing_annotation.file_version != current_file_version:
+                raise HTTPException(
+                    status_code=409,
+                    detail=f"文件已更新到新版本（当前版本: {current_file_version}, 标注版本: {existing_annotation.file_version}），请使用新版本",
+                )

        annotation_payload = dict(request.annotation or {})
        result = annotation_payload.get("result")
@@ -1127,7 +1225,9 @@ class AnnotationEditorService:
        if request.segment_index is not None:
            segment_total_hint = self._resolve_segment_total(annotation_payload)
            if segment_total_hint is None:
-                segment_total_hint = await self._compute_segment_total(project, file_record, file_id)
+                segment_total_hint = await self._compute_segment_total(
+                    project, file_record, file_id
+                )

        existing_result = await self.db.execute(
            select(AnnotationResult)
@@ -1161,11 +1261,16 @@ class AnnotationEditorService:
            # 非分段模式：直接使用传入的 annotation
            annotation_payload["task"] = ls_task_id
            if not isinstance(annotation_payload.get("id"), int):
-                annotation_payload["id"] = self._make_ls_annotation_id(project_id, file_id)
+                annotation_payload["id"] = self._make_ls_annotation_id(
+                    project_id, file_id
+                )
            final_payload = annotation_payload

        requested_status = request.annotation_status
-        if requested_status is not None and requested_status not in ANNOTATION_STATUS_CLIENT_VALUES:
+        if (
+            requested_status is not None
+            and requested_status not in ANNOTATION_STATUS_CLIENT_VALUES
+        ):
            raise HTTPException(status_code=400, detail="annotationStatus 不合法")

        segment_total = None
@@ -1194,7 +1299,10 @@ class AnnotationEditorService:
                elif requested_status == ANNOTATION_STATUS_NOT_APPLICABLE:
                    final_status = ANNOTATION_STATUS_NOT_APPLICABLE
                else:
-                    raise HTTPException(status_code=400, detail="未发现标注内容，请确认无标注/不适用后再保存")
+                    raise HTTPException(
+                        status_code=400,
+                        detail="未发现标注内容，请确认无标注/不适用后再保存",
+                    )

        if request.segment_index is not None:
            segment_entries = self._extract_segment_annotations(final_payload)
@@ -1210,11 +1318,16 @@ class AnnotationEditorService:

        if existing:
            if request.expected_updated_at and existing.updated_at:
-                if existing.updated_at != request.expected_updated_at.replace(tzinfo=None):
-                    raise HTTPException(status_code=409, detail="标注已被更新，请刷新后重试")
+                if existing.updated_at != request.expected_updated_at.replace(
+                    tzinfo=None
+                ):
+                    raise HTTPException(
+                        status_code=409, detail="标注已被更新，请刷新后重试"
+                    )

            existing.annotation = final_payload  # type: ignore[assignment]
            existing.annotation_status = final_status  # type: ignore[assignment]
+            existing.file_version = current_file_version  # type: ignore[assignment]
            existing.updated_at = now  # type: ignore[assignment]
            await self.db.commit()
            await self.db.refresh(existing)
@@ -1223,7 +1336,9 @@ class AnnotationEditorService:
                annotationId=existing.id,
                updatedAt=existing.updated_at or now,
            )
-            await self._sync_annotation_to_knowledge(project, file_record, final_payload, existing.updated_at)
+            await self._sync_annotation_to_knowledge(
+                project, file_record, final_payload, existing.updated_at
+            )
            return response

        new_id = str(uuid.uuid4())
@@ -1233,6 +1348,7 @@ class AnnotationEditorService:
            file_id=file_id,
            annotation=final_payload,
            annotation_status=final_status,
+            file_version=current_file_version,
            created_at=now,
            updated_at=now,
        )
@@ -1244,7 +1360,9 @@ class AnnotationEditorService:
            annotationId=record.id,
            updatedAt=record.updated_at or now,
        )
-        await self._sync_annotation_to_knowledge(project, file_record, final_payload, record.updated_at)
+        await self._sync_annotation_to_knowledge(
+            project, file_record, final_payload, record.updated_at
+        )
        return response

    def _merge_segment_annotation(
@@ -1292,7 +1410,9 @@ class AnnotationEditorService:
        # 更新指定段落的标注
        segments[str(segment_index)] = {
            SEGMENT_RESULT_KEY: new_annotation.get(SEGMENT_RESULT_KEY, []),
-            SEGMENT_CREATED_AT_KEY: new_annotation.get(SEGMENT_CREATED_AT_KEY, datetime.utcnow().isoformat() + "Z"),
+            SEGMENT_CREATED_AT_KEY: new_annotation.get(
+                SEGMENT_CREATED_AT_KEY, datetime.utcnow().isoformat() + "Z"
+            ),
            SEGMENT_UPDATED_AT_KEY: datetime.utcnow().isoformat() + "Z",
        }

@@ -1317,9 +1437,7 @@ class AnnotationEditorService:
            logger.warning("标注同步知识管理失败：%s", exc)

    async def precompute_segmentation_for_project(
-        self,
-        project_id: str,
-        max_retries: int = 3
+        self, project_id: str, max_retries: int = 3
    ) -> Dict[str, Any]:
        """
        为指定项目的所有文本文件预计算切片结构并持久化到数据库
@@ -1332,7 +1450,9 @@ class AnnotationEditorService:
            统计信息：{total_files, succeeded, failed}
        """
        project = await self._get_project_or_404(project_id)
-        dataset_type = self._normalize_dataset_type(await self._get_dataset_type(project.dataset_id))
+        dataset_type = self._normalize_dataset_type(
+            await self._get_dataset_type(project.dataset_id)
+        )

        # 只处理文本数据集
        if dataset_type != DATASET_TYPE_TEXT:
@@ -1364,9 +1484,8 @@ class AnnotationEditorService:
        for file_record in file_records:
            file_type = str(getattr(file_record, "file_type", "") or "").lower()
            file_name = str(getattr(file_record, "file_name", "")).lower()
-            is_source_document = (
-                file_type in SOURCE_DOCUMENT_TYPES or
-                any(file_name.endswith(ext) for ext in SOURCE_DOCUMENT_EXTENSIONS)
+            is_source_document = file_type in SOURCE_DOCUMENT_TYPES or any(
+                file_name.endswith(ext) for ext in SOURCE_DOCUMENT_EXTENSIONS
            )
            if not is_source_document:
                valid_files.append(file_record)
@@ -1385,7 +1504,9 @@ class AnnotationEditorService:
            for retry in range(max_retries):
                try:
                    # 读取文本内容
-                    text_content = await self._fetch_text_content_via_download_api(project.dataset_id, file_id)
+                    text_content = await self._fetch_text_content_via_download_api(
+                        project.dataset_id, file_id
+                    )
                    if not isinstance(text_content, str):
                        logger.warning(f"文件 {file_id} 内容不是字符串，跳过切片")
                        failed += 1
@@ -1404,7 +1525,9 @@ class AnnotationEditorService:
                        records = [(None, text_content)]

                    record_texts = [
-                        self._resolve_primary_text_value(payload, raw_text, primary_text_key)
+                        self._resolve_primary_text_value(
+                            payload, raw_text, primary_text_key
+                        )
                        for payload, raw_text in records
                    ]
                    if not record_texts:
@@ -1412,7 +1535,8 @@ class AnnotationEditorService:

                    # 判断是否需要分段
                    needs_segmentation = len(records) > 1 or any(
-                        len(text or "") > self.SEGMENT_THRESHOLD for text in record_texts
+                        len(text or "") > self.SEGMENT_THRESHOLD
+                        for text in record_texts
                    )

                    if not needs_segmentation:
@@ -1425,7 +1549,9 @@ class AnnotationEditorService:
                    segment_cursor = 0
                    segments = {}

-                    for record_index, ((payload, raw_text), record_text) in enumerate(zip(records, record_texts)):
+                    for record_index, ((payload, raw_text), record_text) in enumerate(
+                        zip(records, record_texts)
+                    ):
                        normalized_text = record_text or ""

                        if len(normalized_text) > self.SEGMENT_THRESHOLD:
@@ -1433,15 +1559,19 @@ class AnnotationEditorService:
                            for chunk_index, seg in enumerate(raw_segments):
                                segments[str(segment_cursor)] = {
                                    SEGMENT_RESULT_KEY: [],
-                                    SEGMENT_CREATED_AT_KEY: datetime.utcnow().isoformat() + "Z",
-                                    SEGMENT_UPDATED_AT_KEY: datetime.utcnow().isoformat() + "Z",
+                                    SEGMENT_CREATED_AT_KEY: datetime.utcnow().isoformat()
+                                    + "Z",
+                                    SEGMENT_UPDATED_AT_KEY: datetime.utcnow().isoformat()
+                                    + "Z",
                                }
                                segment_cursor += 1
                        else:
                            segments[str(segment_cursor)] = {
                                SEGMENT_RESULT_KEY: [],
-                                SEGMENT_CREATED_AT_KEY: datetime.utcnow().isoformat() + "Z",
-                                SEGMENT_UPDATED_AT_KEY: datetime.utcnow().isoformat() + "Z",
+                                SEGMENT_CREATED_AT_KEY: datetime.utcnow().isoformat()
+                                + "Z",
+                                SEGMENT_UPDATED_AT_KEY: datetime.utcnow().isoformat()
+                                + "Z",
                            }
                            segment_cursor += 1

@@ -1508,3 +1638,145 @@ class AnnotationEditorService:
            "failed": failed,
        }

+    async def check_file_version(self, project_id: str, file_id: str) -> Dict[str, Any]:
+        """
+        检查文件是否有新版本
+
+        Args:
+            project_id: 标注项目ID
+            file_id: 文件ID
+
+        Returns:
+            包含文件版本信息的字典
+        """
+        project = await self._get_project_or_404(project_id)
+
+        # 获取文件信息
+        file_result = await self.db.execute(
+            select(DatasetFiles)
+            .join(LabelingProjectFile, LabelingProjectFile.file_id == DatasetFiles.id)
+            .where(
+                LabelingProjectFile.project_id == project.id,
+                DatasetFiles.id == file_id,
+                DatasetFiles.dataset_id == project.dataset_id,
+            )
+        )
+        file_record = file_result.scalar_one_or_none()
+        if not file_record:
+            raise HTTPException(
+                status_code=404, detail=f"文件不存在或不属于该项目: {file_id}"
+            )
+
+        # 获取标注信息
+        annotation_result = await self.db.execute(
+            select(AnnotationResult).where(
+                AnnotationResult.project_id == project_id,
+                AnnotationResult.file_id == file_id,
+            )
+        )
+        annotation = annotation_result.scalar_one_or_none()
+
+        current_file_version = file_record.version
+        annotation_file_version = annotation.file_version if annotation else None
+
+        if annotation is None:
+            has_new_version = False
+        elif annotation_file_version is None:
+            has_new_version = True
+        else:
+            has_new_version = current_file_version > annotation_file_version
+
+        return {
+            "fileId": file_id,
+            "currentFileVersion": current_file_version,
+            "annotationFileVersion": annotation_file_version,
+            "hasNewVersion": has_new_version,
+        }
+
+    async def use_new_version(self, project_id: str, file_id: str) -> Dict[str, Any]:
+        """
+        使用文件新版本并清空标注
+
+        Args:
+            project_id: 标注项目ID
+            file_id: 文件ID
+
+        Returns:
+            操作结果
+        """
+        project = await self._get_project_or_404(project_id)
+
+        # 获取文件信息
+        file_result = await self.db.execute(
+            select(DatasetFiles)
+            .join(LabelingProjectFile, LabelingProjectFile.file_id == DatasetFiles.id)
+            .where(
+                LabelingProjectFile.project_id == project.id,
+                DatasetFiles.id == file_id,
+                DatasetFiles.dataset_id == project.dataset_id,
+            )
+        )
+        file_record = file_result.scalar_one_or_none()
+        if not file_record:
+            raise HTTPException(
+                status_code=404, detail=f"文件不存在或不属于该项目: {file_id}"
+            )
+
+        # 获取标注信息
+        annotation_result = await self.db.execute(
+            select(AnnotationResult)
+            .where(
+                AnnotationResult.project_id == project_id,
+                AnnotationResult.file_id == file_id,
+            )
+            .with_for_update()
+        )
+        annotation = annotation_result.scalar_one_or_none()
+
+        current_file_version = file_record.version
+
+        if not annotation:
+            raise HTTPException(status_code=404, detail=f"标注不存在: {file_id}")
+
+        previous_file_version = annotation.file_version
+
+        if annotation.file_version is not None:
+            if current_file_version <= annotation.file_version:
+                raise HTTPException(
+                    status_code=400,
+                    detail=f"文件版本（{current_file_version}）未更新或低于标注版本（{annotation.file_version}）",
+                )
+
+        # 清空标注并更新版本号
+        now = datetime.utcnow()
+        if isinstance(annotation.annotation, dict):
+            if annotation.annotation.get(SEGMENTED_KEY):
+                segments = annotation.annotation.get(SEGMENTS_KEY, {})
+                for segment_id, segment_data in segments.items():
+                    if isinstance(segment_data, dict):
+                        segment_data[SEGMENT_RESULT_KEY] = []
+                annotation.annotation = {
+                    SEGMENTED_KEY: True,
+                    "version": annotation.annotation.get("version", 1),
+                    SEGMENTS_KEY: segments,
+                    "total_segments": annotation.annotation.get(
+                        "total_segments", len(segments)
+                    ),
+                }
+            else:
+                annotation.annotation = {}
+        else:
+            annotation.annotation = {}
+        annotation.annotation_status = ANNOTATION_STATUS_NO_ANNOTATION
+        annotation.file_version = current_file_version
+        annotation.updated_at = now
+
+        await self.db.commit()
+        await self.db.refresh(annotation)
+
+        return {
+            "fileId": file_id,
+            "previousFileVersion": previous_file_version,
+            "currentFileVersion": current_file_version,
+            "message": "已使用新版本并清空标注",
+        }