feat(annotation): implement file version management for annotation feature

Add support for detecting new file versions and switching to them: Backend Changes: - Add file_version column to AnnotationResult model - Create Alembic migration for database schema update - Implement check_file_version() method to compare annotation and file versions - Implement use_new_version() method to clear annotations and update version - Update upsert_annotation() to record file version when saving - Add new API endpoints: GET /version and POST /use-new-version - Add FileVersionCheckResponse and UseNewVersionResponse schemas Frontend Changes: - Add checkFileVersionUsingGet and useNewVersionUsingPost API calls - Add version warning banner showing current vs latest file version - Add 'Use New Version' button with confirmation dialog - Clear version info state when switching files to avoid stale warnings Bug Fixes: - Fix previousFileVersion returning updated value (save before update) - Handle null file_version for historical data compatibility - Fix segmented annotation clearing (preserve structure, clear results) - Fix files without annotations incorrectly showing new version warnings - Preserve total_segments when clearing segmented annotations Files Modified: - frontend/src/pages/DataAnnotation/Annotate/LabelStudioTextEditor.tsx - frontend/src/pages/DataAnnotation/annotation.api.ts - runtime/datamate-python/app/db/models/annotation_management.py - runtime/datamate-python/app/module/annotation/interface/editor.py - runtime/datamate-python/app/module/annotation/schema/editor.py - runtime/datamate-python/app/module/annotation/service/editor.py New Files: - runtime/datamate-python/alembic.ini - runtime/datamate-python/alembic/env.py - runtime/datamate-python/alembic/script.py.mako - runtime/datamate-python/alembic/versions/20250205_0001_add_file_version.py
2026-02-05 20:12:07 +08:00
parent 4143bc75f9
commit f5cb265667
10 changed files with 915 additions and 171 deletions
--- a/frontend/src/pages/DataAnnotation/Annotate/LabelStudioTextEditor.tsx
+++ b/frontend/src/pages/DataAnnotation/Annotate/LabelStudioTextEditor.tsx
@@ -8,6 +8,9 @@ import {
  getEditorTaskUsingGet,
  listEditorTasksUsingGet,
  upsertEditorAnnotationUsingPut,
  checkFileVersionUsingGet,
  useNewVersionUsingPost,
  type FileVersionCheckResponse,
 } from "../annotation.api";
 import { AnnotationResultStatus } from "../annotation.model";
@@ -269,6 +272,11 @@ export default function LabelStudioTextEditor() {
    return Array.from({ length: segmentTotal }, (_, index) => index);
  }, [segmentTotal]);
  // 文件版本相关状态
  const [fileVersionInfo, setFileVersionInfo] = useState<FileVersionCheckResponse | null>(null);
  const [checkingFileVersion, setCheckingFileVersion] = useState(false);
  const [usingNewVersion, setUsingNewVersion] = useState(false);
  const focusIframe = useCallback(() => {
    const iframe = iframeRef.current;
    if (!iframe) return;
@@ -548,6 +556,77 @@ export default function LabelStudioTextEditor() {
    }
  }, [iframeReady, message, postToIframe, project, projectId]);
  const checkFileVersion = useCallback(async (fileId: string) => {
    if (!projectId || !fileId) return;
    setCheckingFileVersion(true);
    try {
      const resp = (await checkFileVersionUsingGet(projectId, fileId)) as ApiResponse<FileVersionCheckResponse>;
      const data = resp?.data;
      if (data) {
        setFileVersionInfo(data);
        if (data.hasNewVersion) {
          modal.warning({
            title: "文件有新版本",
            content: (
              <div className="flex flex-col gap-2">
                <Typography.Text>
                  文件已更新到新版本（当前版本: {data.currentFileVersion}，标注版本: {data.annotationFileVersion}）。
                </Typography.Text>
                <Typography.Text type="secondary">
                  点击"使用新版本"可清空当前标注并使用最新版本的文件内容。
                </Typography.Text>
              </div>
            ),
            okText: "我知道了",
          });
        }
      }
    } catch (e) {
      console.error("检查文件版本失败", e);
    } finally {
      setCheckingFileVersion(false);
    }
  }, [modal, message, projectId]);
  const handleUseNewVersion = useCallback(async () => {
    if (!selectedFileId) return;
    modal.confirm({
      title: "确认使用新版本",
      content: (
        <div className="flex flex-col gap-2">
          <Typography.Text>
            确认使用新版本？这将清空当前标注并使用最新版本的文件内容。
          </Typography.Text>
          {fileVersionInfo && (
            <Typography.Text type="secondary">
              当前标注版本: {fileVersionInfo.annotationFileVersion}，最新文件版本: {fileVersionInfo.currentFileVersion}
            </Typography.Text>
          )}
        </div>
      ),
      okText: "确认",
      okType: "danger",
      cancelText: "取消",
      onOk: async () => {
        if (!projectId || !selectedFileId) return;
        setUsingNewVersion(true);
        try {
          await useNewVersionUsingPost(projectId, selectedFileId);
          message.success("已使用新版本并清空标注");
          setFileVersionInfo(null);
          await loadTasks({ mode: "reset" });
          await initEditorForFile(selectedFileId);
        } catch (e) {
          console.error("使用新版本失败", e);
          message.error("使用新版本失败");
        } finally {
          setUsingNewVersion(false);
        }
      },
    });
  }, [modal, message, projectId, selectedFileId, fileVersionInfo, loadTasks, initEditorForFile]);
  const advanceAfterSave = useCallback(async (fileId: string, segmentIndex?: number) => {
    if (!fileId) return;
    if (segmented && segmentTotal > 0) {
@@ -815,6 +894,13 @@ export default function LabelStudioTextEditor() {
    return () => window.removeEventListener("message", handler);
  }, [message, origin, saveFromExport]);
  useEffect(() => {
    if (selectedFileId && project?.supported) {
      setFileVersionInfo(null);
      checkFileVersion(selectedFileId);
    }
  }, [selectedFileId, project?.supported, checkFileVersion]);
  const canLoadMore = taskTotalPages > 0 && taskPage + 1 < taskTotalPages;
  const saveDisabled =
    !iframeReady || !selectedFileId || saving || loadingTaskDetail;
@@ -896,6 +982,22 @@ export default function LabelStudioTextEditor() {
          </Typography.Title>
        </div>
        <div className="flex items-center justify-center">
          {fileVersionInfo?.hasNewVersion && (
            <div className="flex items-center gap-2 mr-4">
              <Typography.Text type="warning" className="text-xs">
                ⚠ 文件有新版本（{fileVersionInfo.currentFileVersion} > {fileVersionInfo.annotationFileVersion}）
              </Typography.Text>
              <Button
                size="small"
                type="primary"
                danger
                loading={usingNewVersion}
                onClick={handleUseNewVersion}
              >
                使用新版本
              </Button>
            </div>
          )}
          <Button
            type="primary"
            icon={<SaveOutlined />}
--- a/frontend/src/pages/DataAnnotation/annotation.api.ts
+++ b/frontend/src/pages/DataAnnotation/annotation.api.ts
@@ -100,6 +100,29 @@ export function upsertEditorAnnotationUsingPut(
  return put(`/api/annotation/editor/projects/${projectId}/tasks/${fileId}/annotation`, data);
 }
 export interface FileVersionCheckResponse {
  fileId: string;
  currentFileVersion: number;
  annotationFileVersion: number | null;
  hasNewVersion: boolean;
 }
 export function checkFileVersionUsingGet(projectId: string, fileId: string) {
  return get(`/api/annotation/editor/projects/${projectId}/files/${fileId}/version`);
 }
 export interface UseNewVersionResponse {
  fileId: string;
  previousFileVersion: number | null;
  currentFileVersion: number;
  message: string;
 }
 export function useNewVersionUsingPost(projectId: string, fileId: string) {
  return post(`/api/annotation/editor/projects/${projectId}/files/${fileId}/use-new-version`, {});
 }
 // =====================
 // 标注数据导出
 // =====================
--- a/runtime/datamate-python/alembic.ini
+++ b/runtime/datamate-python/alembic.ini
@@ -0,0 +1,40 @@
 [alembic]
 script_location = alembic
 file_template = %%(year)d%%(month).2d%%(day).2d_%%(hour).2d%%(minute).2d-%%(rev)s_%%(slug)s
 prepend_sys_path = .
 [post_write_hooks]
 [loggers]
 keys = root,sqlalchemy,alembic
 [handlers]
 keys = console
 [formatters]
 keys = generic
 [logger_root]
 level = WARN
 handlers = console
 qualname =
 [logger_sqlalchemy]
 level = WARN
 handlers =
 qualname = sqlalchemy.engine
 [logger_alembic]
 level = INFO
 handlers =
 qualname = alembic
 [handler_console]
 class = StreamHandler
 args = (sys.stderr,)
 level = NOTSET
 formatter = generic
 [formatter_generic]
 format = %(levelname)-5.5s [%(name)s] %(message)s
 datefmt = %H:%M:%S
--- a/runtime/datamate-python/alembic/env.py
+++ b/runtime/datamate-python/alembic/env.py
@@ -0,0 +1,54 @@
 """Alembic environment configuration"""
 from logging.config import fileConfig
 from sqlalchemy import engine_from_config, pool
 from alembic import context
 import sys
 import os
 # 添加项目路径
 sys.path.insert(0, os.path.dirname(os.path.dirname(__file__)))
 from app.db.session import Base
 from app.db.models import *
 config = context.config
 if config.config_file_name is not None:
    fileConfig(config.config_file_name)
 target_metadata = Base.metadata
 def run_migrations_offline() -> None:
    """Run migrations in 'offline' mode."""
    url = config.get_main_option("sqlalchemy.url")
    context.configure(
        url=url,
        target_metadata=target_metadata,
        literal_binds=True,
        dialect_opts={"paramstyle": "named"},
    )
    with context.begin_transaction():
        context.run_migrations()
 def run_migrations_online() -> None:
    """Run migrations in 'online' mode."""
    connectable = engine_from_config(
        config.get_section(config.config_ini_section, {}),
        prefix="sqlalchemy.",
        poolclass=pool.NullPool,
    )
    with connectable.connect() as connection:
        context.configure(connection=connection, target_metadata=target_metadata)
        with context.begin_transaction():
            context.run_migrations()
 if context.is_offline_mode():
    run_migrations_offline()
 else:
    run_migrations_online()
--- a/runtime/datamate-python/alembic/script.py.mako
+++ b/runtime/datamate-python/alembic/script.py.mako
@@ -0,0 +1,24 @@
 """${message}
 Revision ID: ${up_revision}
 Revises: ${down_revision | comma,n}
 Create Date: ${create_date}
 """
 from alembic import op
 import sqlalchemy as sa
 ${imports if imports else ""}
 # revision identifiers, used by Alembic.
 revision = ${repr(up_revision)}
 down_revision = ${repr(down_revision)}
 branch_labels = ${repr(branch_labels)}
 depends_on = ${repr(depends_on)}
 def upgrade() -> None:
    ${upgrades if upgrades else "pass"}
 def downgrade() -> None:
    ${downgrades if downgrades else "pass"}
--- a/runtime/datamate-python/alembic/versions/20250205_0001_add_file_version.py
+++ b/runtime/datamate-python/alembic/versions/20250205_0001_add_file_version.py
@@ -0,0 +1,30 @@
 """add file_version to annotation_results
 Revision ID: 20250205_0001
 Revises:
 Create Date: 2025-02-05 00:00:00.000000
 """
 from alembic import op
 import sqlalchemy as sa
 # revision identifiers, used by Alembic.
 revision = "20250205_0001"
 down_revision = None
 branch_labels = None
 depends_on = None
 def upgrade() -> None:
    op.add_column(
        "t_dm_annotation_results",
        sa.Column(
            "file_version", sa.BigInteger(), nullable=True, comment="标注时的文件版本号"
        ),
    )
 def downgrade() -> None:
    op.drop_column("t_dm_annotation_results", "file_version")
--- a/runtime/datamate-python/app/db/models/annotation_management.py
+++ b/runtime/datamate-python/app/db/models/annotation_management.py
@@ -1,7 +1,19 @@
 """Tables of Annotation Management Module"""
 import uuid
-from sqlalchemy import Column, String, Boolean, TIMESTAMP, Text, Integer, JSON, ForeignKey, UniqueConstraint, Index
+from sqlalchemy import (
    Column,
    String,
    Boolean,
    TIMESTAMP,
    Text,
    Integer,
    JSON,
    ForeignKey,
    UniqueConstraint,
    Index,
    BigInteger,
 )
 from sqlalchemy.sql import func
 from app.db.session import Base
@@ -22,24 +34,51 @@ ANNOTATION_STATUS_CLIENT_VALUES = {
    ANNOTATION_STATUS_NOT_APPLICABLE,
 }
 class AnnotationTemplate(Base):
    """标注配置模板模型"""
    __tablename__ = "t_dm_annotation_templates"
-    id = Column(String(64), primary_key=True, default=lambda: str(uuid.uuid4()), comment="模板ID（UUID或自定义ID）")
+    id = Column(
        String(64),
        primary_key=True,
        default=lambda: str(uuid.uuid4()),
        comment="模板ID（UUID或自定义ID）",
    )
    name = Column(String(100), nullable=False, comment="模板名称")
    description = Column(String(500), nullable=True, comment="模板描述")
-    data_type = Column(String(50), nullable=False, comment="数据类型: image/text/audio/video/timeseries/pdf/chat/html/table")
+    data_type = Column(
-    labeling_type = Column(String(50), nullable=False, comment="标注类型: asr/ner/object-detection/等")
+        String(50),
-    configuration = Column(JSON, nullable=True, comment="标注配置（兼容字段，主配置为 label_config）")
+        nullable=False,
-    label_config = Column(Text, nullable=True, comment="Label Studio XML配置（模板主配置）")
+        comment="数据类型: image/text/audio/video/timeseries/pdf/chat/html/table",
    )
    labeling_type = Column(
        String(50), nullable=False, comment="标注类型: asr/ner/object-detection/等"
    )
    configuration = Column(
        JSON, nullable=True, comment="标注配置（兼容字段，主配置为 label_config）"
    )
    label_config = Column(
        Text, nullable=True, comment="Label Studio XML配置（模板主配置）"
    )
    style = Column(String(32), nullable=False, comment="样式配置: horizontal/vertical")
-    category = Column(String(50), default='custom', comment="模板分类: audio-speech/chat/computer-vision/nlp/等")
+    category = Column(
        String(50),
        default="custom",
        comment="模板分类: audio-speech/chat/computer-vision/nlp/等",
    )
    built_in = Column(Boolean, default=False, comment="是否系统内置模板")
-    version = Column(String(20), default='1.0', comment="模板版本")
+    version = Column(String(20), default="1.0", comment="模板版本")
-    created_at = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间")
+    created_at = Column(
-    updated_at = Column(TIMESTAMP, server_default=func.current_timestamp(), onupdate=func.current_timestamp(), comment="更新时间")
+        TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间"
    )
    updated_at = Column(
        TIMESTAMP,
        server_default=func.current_timestamp(),
        onupdate=func.current_timestamp(),
        comment="更新时间",
    )
    deleted_at = Column(TIMESTAMP, nullable=True, comment="删除时间（软删除）")
    def __repr__(self):
@@ -50,20 +89,39 @@ class AnnotationTemplate(Base):
        """检查是否已被软删除"""
        return self.deleted_at is not None
 class LabelingProject(Base):
    """标注项目模型"""
    __tablename__ = "t_dm_labeling_projects"
-    id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()), comment="UUID")
+    id = Column(
        String(36), primary_key=True, default=lambda: str(uuid.uuid4()), comment="UUID"
    )
    dataset_id = Column(String(36), nullable=False, comment="数据集ID")
    name = Column(String(100), nullable=False, comment="项目名称")
-    labeling_project_id = Column(String(8), nullable=False, comment="Label Studio项目ID")
+    labeling_project_id = Column(
-    template_id = Column(String(64), ForeignKey('t_dm_annotation_templates.id', ondelete='SET NULL'), nullable=True, comment="使用的模板ID")
+        String(8), nullable=False, comment="Label Studio项目ID"
-    configuration = Column(JSON, nullable=True, comment="项目配置（可能包含对模板的自定义修改）")
+    )
    template_id = Column(
        String(64),
        ForeignKey("t_dm_annotation_templates.id", ondelete="SET NULL"),
        nullable=True,
        comment="使用的模板ID",
    )
    configuration = Column(
        JSON, nullable=True, comment="项目配置（可能包含对模板的自定义修改）"
    )
    progress = Column(JSON, nullable=True, comment="项目进度信息")
-    created_at = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间")
+    created_at = Column(
-    updated_at = Column(TIMESTAMP, server_default=func.current_timestamp(), onupdate=func.current_timestamp(), comment="更新时间")
+        TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间"
    )
    updated_at = Column(
        TIMESTAMP,
        server_default=func.current_timestamp(),
        onupdate=func.current_timestamp(),
        comment="更新时间",
    )
    deleted_at = Column(TIMESTAMP, nullable=True, comment="删除时间（软删除）")
    def __repr__(self):
@@ -80,10 +138,14 @@ class LabelingProjectFile(Base):
    __tablename__ = "t_dm_labeling_project_files"
-    id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()), comment="UUID")
+    id = Column(
        String(36), primary_key=True, default=lambda: str(uuid.uuid4()), comment="UUID"
    )
    project_id = Column(String(36), nullable=False, comment="标注项目ID")
    file_id = Column(String(36), nullable=False, comment="文件ID")
-    created_at = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间")
+    created_at = Column(
        TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间"
    )
    __table_args__ = (
        UniqueConstraint("project_id", "file_id", name="uk_project_file"),
@@ -100,18 +162,36 @@ class AnnotationResult(Base):
    __tablename__ = "t_dm_annotation_results"
-    id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()), comment="UUID")
+    id = Column(
-    project_id = Column(String(36), nullable=False, comment="标注项目ID（t_dm_labeling_projects.id）")
+        String(36), primary_key=True, default=lambda: str(uuid.uuid4()), comment="UUID"
-    file_id = Column(String(36), nullable=False, comment="文件ID（t_dm_dataset_files.id）")
+    )
-    annotation = Column(JSON, nullable=False, comment="Label Studio annotation 原始JSON（单人单份最终结果）")
+    project_id = Column(
        String(36), nullable=False, comment="标注项目ID（t_dm_labeling_projects.id）"
    )
    file_id = Column(
        String(36), nullable=False, comment="文件ID（t_dm_dataset_files.id）"
    )
    annotation = Column(
        JSON,
        nullable=False,
        comment="Label Studio annotation 原始JSON（单人单份最终结果）",
    )
    annotation_status = Column(
        String(32),
        nullable=False,
        default=ANNOTATION_STATUS_ANNOTATED,
        comment="标注状态: ANNOTATED/NO_ANNOTATION/NOT_APPLICABLE/IN_PROGRESS",
    )
-    created_at = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间")
+    file_version = Column(BigInteger, nullable=True, comment="标注时的文件版本号")
-    updated_at = Column(TIMESTAMP, server_default=func.current_timestamp(), onupdate=func.current_timestamp(), comment="更新时间")
+    created_at = Column(
        TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间"
    )
    updated_at = Column(
        TIMESTAMP,
        server_default=func.current_timestamp(),
        onupdate=func.current_timestamp(),
        comment="更新时间",
    )
    def __repr__(self):
        return f"<AnnotationResult(id={self.id}, project_id={self.project_id}, file_id={self.file_id})>"
@@ -122,20 +202,33 @@ class AutoAnnotationTask(Base):
    __tablename__ = "t_dm_auto_annotation_tasks"
-    id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()), comment="UUID")
+    id = Column(
        String(36), primary_key=True, default=lambda: str(uuid.uuid4()), comment="UUID"
    )
    name = Column(String(255), nullable=False, comment="任务名称")
    dataset_id = Column(String(36), nullable=False, comment="数据集ID")
-    dataset_name = Column(String(255), nullable=True, comment="数据集名称（冗余字段，方便查询）")
+    dataset_name = Column(
        String(255), nullable=True, comment="数据集名称（冗余字段，方便查询）"
    )
    config = Column(JSON, nullable=False, comment="任务配置（模型规模、置信度等）")
-    file_ids = Column(JSON, nullable=True, comment="要处理的文件ID列表，为空则处理数据集所有图像")
+    file_ids = Column(
-    status = Column(String(50), nullable=False, default="pending", comment="任务状态: pending/running/completed/failed")
+        JSON, nullable=True, comment="要处理的文件ID列表，为空则处理数据集所有图像"
    )
    status = Column(
        String(50),
        nullable=False,
        default="pending",
        comment="任务状态: pending/running/completed/failed",
    )
    progress = Column(Integer, default=0, comment="任务进度 0-100")
    total_images = Column(Integer, default=0, comment="总图片数")
    processed_images = Column(Integer, default=0, comment="已处理图片数")
    detected_objects = Column(Integer, default=0, comment="检测到的对象总数")
    output_path = Column(String(500), nullable=True, comment="输出路径")
    error_message = Column(Text, nullable=True, comment="错误信息")
-    created_at = Column(TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间")
+    created_at = Column(
        TIMESTAMP, server_default=func.current_timestamp(), comment="创建时间"
    )
    updated_at = Column(
        TIMESTAMP,
        server_default=func.current_timestamp(),
--- a/runtime/datamate-python/app/module/annotation/interface/editor.py
+++ b/runtime/datamate-python/app/module/annotation/interface/editor.py
@@ -21,6 +21,8 @@ from app.module.annotation.schema.editor import (
    EditorTaskListResponse,
    EditorTaskSegmentResponse,
    EditorTaskResponse,
    FileVersionCheckResponse,
    UseNewVersionResponse,
    UpsertAnnotationRequest,
    UpsertAnnotationResponse,
 )
@@ -80,7 +82,9 @@ async def list_editor_tasks(
 async def get_editor_task(
    project_id: str = Path(..., description="标注项目ID（t_dm_labeling_projects.id）"),
    file_id: str = Path(..., description="文件ID（t_dm_dataset_files.id）"),
-    segment_index: Optional[int] = Query(None, alias="segmentIndex", description="段落索引（分段模式下使用）"),
+    segment_index: Optional[int] = Query(
        None, alias="segmentIndex", description="段落索引（分段模式下使用）"
    ),
    db: AsyncSession = Depends(get_db),
 ):
    service = AnnotationEditorService(db)
@@ -95,7 +99,9 @@ async def get_editor_task(
 async def get_editor_task_segment(
    project_id: str = Path(..., description="标注项目ID（t_dm_labeling_projects.id）"),
    file_id: str = Path(..., description="文件ID（t_dm_dataset_files.id）"),
-    segment_index: int = Query(..., ge=0, alias="segmentIndex", description="段落索引（从0开始）"),
+    segment_index: int = Query(
        ..., ge=0, alias="segmentIndex", description="段落索引（从0开始）"
    ),
    db: AsyncSession = Depends(get_db),
 ):
    service = AnnotationEditorService(db)
@@ -117,3 +123,36 @@ async def upsert_editor_annotation(
    result = await service.upsert_annotation(project_id, file_id, request)
    return StandardResponse(code=200, message="success", data=result)
@router.get(
    "/projects/{project_id}/files/{file_id}/version",
    response_model=StandardResponse[FileVersionCheckResponse],
 )
 async def check_file_version(
    project_id: str = Path(..., description="标注项目ID（t_dm_labeling_projects.id）"),
    file_id: str = Path(..., description="文件ID（t_dm_dataset_files.id）"),
    db: AsyncSession = Depends(get_db),
 ):
    """
    检查文件是否有新版本
    """
    service = AnnotationEditorService(db)
    result = await service.check_file_version(project_id, file_id)
    return StandardResponse(code=200, message="success", data=result)
@router.post(
    "/projects/{project_id}/files/{file_id}/use-new-version",
    response_model=StandardResponse[UseNewVersionResponse],
 )
 async def use_new_version(
    project_id: str = Path(..., description="标注项目ID（t_dm_labeling_projects.id）"),
    file_id: str = Path(..., description="文件ID（t_dm_dataset_files.id）"),
    db: AsyncSession = Depends(get_db),
 ):
    """
    使用文件新版本并清空标注
    """
    service = AnnotationEditorService(db)
    result = await service.use_new_version(project_id, file_id)
    return StandardResponse(code=200, message="success", data=result)
--- a/runtime/datamate-python/app/module/annotation/schema/editor.py
+++ b/runtime/datamate-python/app/module/annotation/schema/editor.py
@@ -34,17 +34,29 @@ class AnnotationStatus(str, Enum):
 class EditorProjectInfo(BaseModel):
    """编辑器项目元信息"""
-    project_id: str = Field(..., alias="projectId", description="DataMate 标注项目ID（t_dm_labeling_projects.id）")
+    project_id: str = Field(
-    dataset_id: str = Field(..., alias="datasetId", description="数据集ID（t_dm_datasets.id）")
+        ...,
        alias="projectId",
        description="DataMate 标注项目ID（t_dm_labeling_projects.id）",
    )
    dataset_id: str = Field(
        ..., alias="datasetId", description="数据集ID（t_dm_datasets.id）"
    )
    dataset_type: Optional[str] = Field(
        None,
        alias="datasetType",
        description="数据集类型（TEXT/IMAGE/AUDIO/VIDEO 等）",
    )
-    template_id: Optional[str] = Field(None, alias="templateId", description="模板ID（t_dm_annotation_templates.id）")
+    template_id: Optional[str] = Field(
-    label_config: Optional[str] = Field(None, alias="labelConfig", description="Label Studio XML 配置")
+        None, alias="templateId", description="模板ID（t_dm_annotation_templates.id）"
    )
    label_config: Optional[str] = Field(
        None, alias="labelConfig", description="Label Studio XML 配置"
    )
    supported: bool = Field(..., description="当前数据类型是否支持内嵌编辑器")
-    unsupported_reason: Optional[str] = Field(None, alias="unsupportedReason", description="不支持原因（当 supported=false）")
+    unsupported_reason: Optional[str] = Field(
        None, alias="unsupportedReason", description="不支持原因（当 supported=false）"
    )
    model_config = ConfigDict(populate_by_name=True)
@@ -55,8 +67,12 @@ class EditorTaskListItem(BaseModel):
    file_id: str = Field(..., alias="fileId", description="文件ID")
    file_name: str = Field(..., alias="fileName", description="文件名")
    file_type: Optional[str] = Field(None, alias="fileType", description="文件类型")
-    has_annotation: bool = Field(..., alias="hasAnnotation", description="是否已有最终标注")
+    has_annotation: bool = Field(
-    annotation_updated_at: Optional[datetime] = Field(None, alias="annotationUpdatedAt", description="标注更新时间")
+        ..., alias="hasAnnotation", description="是否已有最终标注"
    )
    annotation_updated_at: Optional[datetime] = Field(
        None, alias="annotationUpdatedAt", description="标注更新时间"
    )
    annotation_status: Optional[AnnotationStatus] = Field(
        None,
        alias="annotationStatus",
@@ -82,9 +98,13 @@ class SegmentInfo(BaseModel):
    """段落摘要（用于文本分段标注）"""
    idx: int = Field(..., description="段落索引")
-    has_annotation: bool = Field(False, alias="hasAnnotation", description="该段落是否已有标注")
+    has_annotation: bool = Field(
        False, alias="hasAnnotation", description="该段落是否已有标注"
    )
    line_index: int = Field(0, alias="lineIndex", description="JSONL 行索引（从0开始）")
-    chunk_index: int = Field(0, alias="chunkIndex", description="行内分片索引（从0开始）")
+    chunk_index: int = Field(
        0, alias="chunkIndex", description="行内分片索引（从0开始）"
    )
    model_config = ConfigDict(populate_by_name=True)
@@ -93,12 +113,16 @@ class EditorTaskResponse(BaseModel):
    """编辑器任务详情（可直接喂给 Label Studio Editor 的 task 对象）"""
    task: Dict[str, Any] = Field(..., description="Label Studio task 对象")
-    annotation_updated_at: Optional[datetime] = Field(None, alias="annotationUpdatedAt", description="标注更新时间")
+    annotation_updated_at: Optional[datetime] = Field(
        None, alias="annotationUpdatedAt", description="标注更新时间"
    )
    # 分段相关字段
    segmented: bool = Field(False, description="是否启用分段模式")
    total_segments: int = Field(0, alias="totalSegments", description="总段落数")
-    current_segment_index: int = Field(0, alias="currentSegmentIndex", description="当前段落索引")
+    current_segment_index: int = Field(
        0, alias="currentSegmentIndex", description="当前段落索引"
    )
    model_config = ConfigDict(populate_by_name=True)
@@ -108,9 +132,13 @@ class SegmentDetail(BaseModel):
    idx: int = Field(..., description="段落索引")
    text: str = Field(..., description="段落文本")
-    has_annotation: bool = Field(False, alias="hasAnnotation", description="该段落是否已有标注")
+    has_annotation: bool = Field(
        False, alias="hasAnnotation", description="该段落是否已有标注"
    )
    line_index: int = Field(0, alias="lineIndex", description="JSONL 行索引（从0开始）")
-    chunk_index: int = Field(0, alias="chunkIndex", description="行内分片索引（从0开始）")
+    chunk_index: int = Field(
        0, alias="chunkIndex", description="行内分片索引（从0开始）"
    )
    model_config = ConfigDict(populate_by_name=True)
@@ -121,7 +149,9 @@ class EditorTaskSegmentResponse(BaseModel):
    segmented: bool = Field(False, description="是否启用分段模式")
    segment: Optional[SegmentDetail] = Field(None, description="段落内容")
    total_segments: int = Field(0, alias="totalSegments", description="总段落数")
-    current_segment_index: int = Field(0, alias="currentSegmentIndex", description="当前段落索引")
+    current_segment_index: int = Field(
        0, alias="currentSegmentIndex", description="当前段落索引"
    )
    model_config = ConfigDict(populate_by_name=True)
@@ -129,7 +159,9 @@ class EditorTaskSegmentResponse(BaseModel):
 class UpsertAnnotationRequest(BaseModel):
    """保存/覆盖最终标注（Label Studio annotation 原始对象）"""
-    annotation: Dict[str, Any] = Field(..., description="Label Studio annotation 对象（包含 result 等）")
+    annotation: Dict[str, Any] = Field(
        ..., description="Label Studio annotation 对象（包含 result 等）"
    )
    annotation_status: Optional[AnnotationStatus] = Field(
        None,
        alias="annotationStatus",
@@ -153,8 +185,43 @@ class UpsertAnnotationRequest(BaseModel):
 class UpsertAnnotationResponse(BaseModel):
    """保存/覆盖最终标注响应"""
-    annotation_id: str = Field(..., alias="annotationId", description="标注结果ID（t_dm_annotation_results.id）")
+    annotation_id: str = Field(
        ...,
        alias="annotationId",
        description="标注结果ID（t_dm_annotation_results.id）",
    )
    updated_at: datetime = Field(..., alias="updatedAt", description="标注更新时间")
    model_config = ConfigDict(populate_by_name=True)
 class FileVersionCheckResponse(BaseModel):
    """文件版本检查响应"""
    file_id: str = Field(..., alias="fileId", description="文件ID")
    current_file_version: int = Field(
        ..., alias="currentFileVersion", description="当前文件版本"
    )
    annotation_file_version: Optional[int] = Field(
        None, alias="annotationFileVersion", description="标注时的文件版本"
    )
    has_new_version: bool = Field(
        ..., alias="hasNewVersion", description="是否有新版本"
    )
    model_config = ConfigDict(populate_by_name=True)
 class UseNewVersionResponse(BaseModel):
    """使用新版本响应"""
    file_id: str = Field(..., alias="fileId", description="文件ID")
    previous_file_version: Optional[int] = Field(
        None, alias="previousFileVersion", description="之前标注的文件版本"
    )
    current_file_version: int = Field(
        ..., alias="currentFileVersion", description="当前文件版本"
    )
    message: str = Field(..., description="操作结果消息")
    model_config = ConfigDict(populate_by_name=True)
--- a/runtime/datamate-python/app/module/annotation/service/editor.py
+++ b/runtime/datamate-python/app/module/annotation/service/editor.py
@@ -23,7 +23,13 @@ from sqlalchemy.ext.asyncio import AsyncSession
 from app.core.config import settings
 from app.core.logging import get_logger
-from app.db.models import AnnotationResult, Dataset, DatasetFiles, LabelingProject, LabelingProjectFile
+from app.db.models import (
    AnnotationResult,
    Dataset,
    DatasetFiles,
    LabelingProject,
    LabelingProjectFile,
 )
 from app.db.models.annotation_management import (
    ANNOTATION_STATUS_ANNOTATED,
    ANNOTATION_STATUS_IN_PROGRESS,
@@ -45,8 +51,12 @@ from app.module.annotation.schema.editor import (
 )
 from app.module.annotation.service.template import AnnotationTemplateService
 from app.module.annotation.service.knowledge_sync import KnowledgeSyncService
-from app.module.annotation.service.annotation_text_splitter import AnnotationTextSplitter
+from app.module.annotation.service.annotation_text_splitter import (
-from app.module.annotation.service.text_fetcher import fetch_text_content_via_download_api
+    AnnotationTextSplitter,
 )
 from app.module.annotation.service.text_fetcher import (
    fetch_text_content_via_download_api,
 )
 logger = get_logger(__name__)
@@ -169,7 +179,9 @@ class AnnotationEditorService:
        template = await self.template_service.get_template(self.db, template_id)
        return getattr(template, "label_config", None) if template else None
-    async def _resolve_project_label_config(self, project: LabelingProject) -> Optional[str]:
+    async def _resolve_project_label_config(
        self, project: LabelingProject
    ) -> Optional[str]:
        label_config = None
        if project.configuration and isinstance(project.configuration, dict):
            label_config = project.configuration.get("label_config")
@@ -210,7 +222,9 @@ class AnnotationEditorService:
        if not label_config:
            return [default_key]
        target_categories = categories or set()
-        keys = cls._extract_object_value_keys_by_category(label_config, target_categories)
+        keys = cls._extract_object_value_keys_by_category(
            label_config, target_categories
        )
        if not keys:
            return [default_key]
        return keys
@@ -231,7 +245,9 @@ class AnnotationEditorService:
        return parsed if isinstance(parsed, dict) else None
    @classmethod
-    def _parse_jsonl_records(cls, text_content: str) -> List[Tuple[Optional[Dict[str, Any]], str]]:
+    def _parse_jsonl_records(
        cls, text_content: str
    ) -> List[Tuple[Optional[Dict[str, Any]], str]]:
        lines = [line for line in text_content.splitlines() if line.strip()]
        records: List[Tuple[Optional[Dict[str, Any]], str]] = []
        for line in lines:
@@ -277,7 +293,9 @@ class AnnotationEditorService:
    @classmethod
    def _extract_textual_value_keys(cls, label_config: str) -> List[str]:
-        return cls._extract_object_value_keys_by_category(label_config, TEXTUAL_OBJECT_CATEGORIES)
+        return cls._extract_object_value_keys_by_category(
            label_config, TEXTUAL_OBJECT_CATEGORIES
        )
    @staticmethod
    def _needs_placeholder(value: Any) -> bool:
@@ -287,7 +305,9 @@ class AnnotationEditorService:
            return True
        return False
-    def _apply_text_placeholders(self, data: Dict[str, Any], label_config: Optional[str]) -> None:
+    def _apply_text_placeholders(
        self, data: Dict[str, Any], label_config: Optional[str]
    ) -> None:
        if not label_config:
            return
        for key in self._extract_textual_value_keys(label_config):
@@ -346,7 +366,9 @@ class AnnotationEditorService:
                if i > 0:
                    prev = children[i - 1]
-                    if prev.tag == "Header" and self._header_already_present(prev, obj_name):
+                    if prev.tag == "Header" and self._header_already_present(
                        prev, obj_name
                    ):
                        i += 1
                        continue
@@ -362,7 +384,9 @@ class AnnotationEditorService:
        return ET.tostring(root, encoding="unicode")
    @staticmethod
-    def _extract_segment_annotations(payload: Optional[Dict[str, Any]]) -> Dict[str, Dict[str, Any]]:
+    def _extract_segment_annotations(
        payload: Optional[Dict[str, Any]],
    ) -> Dict[str, Dict[str, Any]]:
        if not payload or not isinstance(payload, dict):
            return {}
        segments = payload.get(SEGMENTS_KEY)
@@ -440,13 +464,17 @@ class AnnotationEditorService:
        file_record: DatasetFiles,
        file_id: str,
    ) -> Optional[int]:
-        dataset_type = self._normalize_dataset_type(await self._get_dataset_type(project.dataset_id))
+        dataset_type = self._normalize_dataset_type(
            await self._get_dataset_type(project.dataset_id)
        )
        if dataset_type != DATASET_TYPE_TEXT:
            return None
        if not self._resolve_segmentation_enabled(project):
            return None
-        text_content = await self._fetch_text_content_via_download_api(project.dataset_id, file_id)
+        text_content = await self._fetch_text_content_via_download_api(
            project.dataset_id, file_id
        )
        if not isinstance(text_content, str):
            return None
@@ -495,7 +523,9 @@ class AnnotationEditorService:
        file_type_lower = func.lower(DatasetFiles.file_type)
        file_name_lower = func.lower(DatasetFiles.file_name)
        type_condition = file_type_lower.in_(SOURCE_DOCUMENT_TYPES)
-        name_conditions = [file_name_lower.like(f"%{ext}") for ext in SOURCE_DOCUMENT_EXTENSIONS]
+        name_conditions = [
            file_name_lower.like(f"%{ext}") for ext in SOURCE_DOCUMENT_EXTENSIONS
        ]
        return or_(type_condition, *name_conditions)
    def _build_task_data(
@@ -545,13 +575,17 @@ class AnnotationEditorService:
        records: List[Tuple[Optional[Dict[str, Any]], str]],
        record_texts: List[str],
        segment_annotation_keys: set[str],
-    ) -> Tuple[List[SegmentInfo], List[Tuple[Optional[Dict[str, Any]], str, str, int, int]]]:
+    ) -> Tuple[
        List[SegmentInfo], List[Tuple[Optional[Dict[str, Any]], str, str, int, int]]
    ]:
        splitter = AnnotationTextSplitter(max_chars=self.SEGMENT_THRESHOLD)
        segments: List[SegmentInfo] = []
        segment_contexts: List[Tuple[Optional[Dict[str, Any]], str, str, int, int]] = []
        segment_cursor = 0
-        for record_index, ((payload, raw_text), record_text) in enumerate(zip(records, record_texts)):
+        for record_index, ((payload, raw_text), record_text) in enumerate(
            zip(records, record_texts)
        ):
            normalized_text = record_text or ""
            if len(normalized_text) > self.SEGMENT_THRESHOLD:
                raw_segments = splitter.split(normalized_text)
@@ -559,12 +593,15 @@ class AnnotationEditorService:
                    segments.append(
                        SegmentInfo(
                            idx=segment_cursor,
-                            hasAnnotation=str(segment_cursor) in segment_annotation_keys,
+                            hasAnnotation=str(segment_cursor)
                            in segment_annotation_keys,
                            lineIndex=record_index,
                            chunkIndex=chunk_index,
                        )
                    )
-                    segment_contexts.append((payload, raw_text, seg["text"], record_index, chunk_index))
+                    segment_contexts.append(
                        (payload, raw_text, seg["text"], record_index, chunk_index)
                    )
                    segment_cursor += 1
            else:
                segments.append(
@@ -575,11 +612,15 @@ class AnnotationEditorService:
                        chunkIndex=0,
                    )
                )
-                segment_contexts.append((payload, raw_text, normalized_text, record_index, 0))
+                segment_contexts.append(
                    (payload, raw_text, normalized_text, record_index, 0)
                )
                segment_cursor += 1
        if not segments:
-            segments = [SegmentInfo(idx=0, hasAnnotation=False, lineIndex=0, chunkIndex=0)]
+            segments = [
                SegmentInfo(idx=0, hasAnnotation=False, lineIndex=0, chunkIndex=0)
            ]
            segment_contexts = [(None, "", "", 0, 0)]
        return segments, segment_contexts
@@ -587,7 +628,9 @@ class AnnotationEditorService:
    async def get_project_info(self, project_id: str) -> EditorProjectInfo:
        project = await self._get_project_or_404(project_id)
-        dataset_type = self._normalize_dataset_type(await self._get_dataset_type(project.dataset_id))
+        dataset_type = self._normalize_dataset_type(
            await self._get_dataset_type(project.dataset_id)
        )
        supported = dataset_type in SUPPORTED_EDITOR_DATASET_TYPES
        unsupported_reason = None
        if not supported:
@@ -653,7 +696,12 @@ class AnnotationEditorService:
        rows = files_result.all()
        items: List[EditorTaskListItem] = []
-        for file_record, annotation_id, annotation_updated_at, annotation_status in rows:
+        for (
            file_record,
            annotation_id,
            annotation_updated_at,
            annotation_status,
        ) in rows:
            fid = str(file_record.id)  # type: ignore[arg-type]
            items.append(
                EditorTaskListItem(
@@ -675,7 +723,9 @@ class AnnotationEditorService:
            size=size,
        )
-    async def _fetch_text_content_via_download_api(self, dataset_id: str, file_id: str) -> str:
+    async def _fetch_text_content_via_download_api(
        self, dataset_id: str, file_id: str
    ) -> str:
        return await fetch_text_content_via_download_api(dataset_id, file_id)
    async def get_task(
@@ -686,7 +736,9 @@ class AnnotationEditorService:
    ) -> EditorTaskResponse:
        project = await self._get_project_or_404(project_id)
-        dataset_type = self._normalize_dataset_type(await self._get_dataset_type(project.dataset_id))
+        dataset_type = self._normalize_dataset_type(
            await self._get_dataset_type(project.dataset_id)
        )
        if dataset_type not in SUPPORTED_EDITOR_DATASET_TYPES:
            raise HTTPException(
                status_code=400,
@@ -701,7 +753,9 @@ class AnnotationEditorService:
        )
        file_record = file_result.scalar_one_or_none()
        if not file_record:
-            raise HTTPException(status_code=404, detail=f"文件不存在或不属于该项目: {file_id}")
+            raise HTTPException(
                status_code=404, detail=f"文件不存在或不属于该项目: {file_id}"
            )
        if dataset_type == DATASET_TYPE_IMAGE:
            return await self._build_image_task(project, file_record, file_id)
@@ -722,7 +776,9 @@ class AnnotationEditorService:
    ) -> EditorTaskSegmentResponse:
        project = await self._get_project_or_404(project_id)
-        dataset_type = self._normalize_dataset_type(await self._get_dataset_type(project.dataset_id))
+        dataset_type = self._normalize_dataset_type(
            await self._get_dataset_type(project.dataset_id)
        )
        if dataset_type != DATASET_TYPE_TEXT:
            raise HTTPException(
                status_code=400,
@@ -737,7 +793,9 @@ class AnnotationEditorService:
        )
        file_record = file_result.scalar_one_or_none()
        if not file_record:
-            raise HTTPException(status_code=404, detail=f"文件不存在或不属于该项目: {file_id}")
+            raise HTTPException(
                status_code=404, detail=f"文件不存在或不属于该项目: {file_id}"
            )
        if not self._resolve_segmentation_enabled(project):
            return EditorTaskSegmentResponse(
@@ -747,7 +805,9 @@ class AnnotationEditorService:
                currentSegmentIndex=0,
            )
-        text_content = await self._fetch_text_content_via_download_api(project.dataset_id, file_id)
+        text_content = await self._fetch_text_content_via_download_api(
            project.dataset_id, file_id
        )
        assert isinstance(text_content, str)
        label_config = await self._resolve_project_label_config(project)
        primary_text_key = self._resolve_primary_text_key(label_config)
@@ -839,7 +899,9 @@ class AnnotationEditorService:
        file_id: str,
        segment_index: Optional[int],
    ) -> EditorTaskResponse:
-        text_content = await self._fetch_text_content_via_download_api(project.dataset_id, file_id)
+        text_content = await self._fetch_text_content_via_download_api(
            project.dataset_id, file_id
        )
        assert isinstance(text_content, str)
        label_config = await self._resolve_project_label_config(project)
        primary_text_key = self._resolve_primary_text_key(label_config)
@@ -885,7 +947,8 @@ class AnnotationEditorService:
        if not segmentation_enabled:
            segment_index = None
        needs_segmentation = segmentation_enabled and (
-            len(records) > 1 or any(len(text or "") > self.SEGMENT_THRESHOLD for text in record_texts)
+            len(records) > 1
            or any(len(text or "") > self.SEGMENT_THRESHOLD for text in record_texts)
        )
        segments: List[SegmentInfo] = []
        segment_contexts: List[Tuple[Optional[Dict[str, Any]], str, str, int, int]] = []
@@ -903,10 +966,14 @@ class AnnotationEditorService:
                segment_annotation_keys,
            )
            current_segment_index = segment_index if segment_index is not None else 0
-            if current_segment_index < 0 or current_segment_index >= len(segment_contexts):
+            if current_segment_index < 0 or current_segment_index >= len(
                segment_contexts
            ):
                current_segment_index = 0
-            selected_payload, _, display_text, _, _ = segment_contexts[current_segment_index]
+            selected_payload, _, display_text, _, _ = segment_contexts[
                current_segment_index
            ]
        # 构造 task 对象
        task_data = self._build_task_data(
@@ -936,11 +1003,16 @@ class AnnotationEditorService:
                # 分段模式：获取当前段落的标注
                seg_ann = segment_annotations.get(str(current_segment_index), {})
                stored = {
-                    "id": self._make_ls_annotation_id(project.id, file_id) + current_segment_index,
+                    "id": self._make_ls_annotation_id(project.id, file_id)
                    + current_segment_index,
                    "task": ls_task_id,
                    "result": seg_ann.get(SEGMENT_RESULT_KEY, []),
-                    "created_at": seg_ann.get(SEGMENT_CREATED_AT_KEY, datetime.utcnow().isoformat() + "Z"),
+                    "created_at": seg_ann.get(
-                    "updated_at": seg_ann.get(SEGMENT_UPDATED_AT_KEY, datetime.utcnow().isoformat() + "Z"),
+                        SEGMENT_CREATED_AT_KEY, datetime.utcnow().isoformat() + "Z"
                    ),
                    "updated_at": seg_ann.get(
                        SEGMENT_UPDATED_AT_KEY, datetime.utcnow().isoformat() + "Z"
                    ),
                }
                task["annotations"] = [stored]
            elif not needs_segmentation and not has_segmented_annotation:
@@ -952,7 +1024,10 @@ class AnnotationEditorService:
                task["annotations"] = [stored]
            else:
                # 首次从非分段切换到分段：提供空标注
-                empty_ann_id = self._make_ls_annotation_id(project.id, file_id) + current_segment_index
+                empty_ann_id = (
                    self._make_ls_annotation_id(project.id, file_id)
                    + current_segment_index
                )
                task["annotations"] = [
                    {
                        "id": empty_ann_id,
@@ -994,7 +1069,9 @@ class AnnotationEditorService:
        categories: set[str],
    ) -> EditorTaskResponse:
        label_config = await self._resolve_project_label_config(project)
-        media_keys = self._resolve_media_value_keys(label_config, default_key, categories)
+        media_keys = self._resolve_media_value_keys(
            label_config, default_key, categories
        )
        preview_url = self._build_file_preview_url(project.dataset_id, file_id)
        file_name = str(getattr(file_record, "file_name", ""))
@@ -1097,7 +1174,9 @@ class AnnotationEditorService:
            categories=MEDIA_OBJECT_CATEGORIES,
        )
-    async def upsert_annotation(self, project_id: str, file_id: str, request: UpsertAnnotationRequest) -> UpsertAnnotationResponse:
+    async def upsert_annotation(
        self, project_id: str, file_id: str, request: UpsertAnnotationRequest
    ) -> UpsertAnnotationResponse:
        project = await self._get_project_or_404(project_id)
        # 校验文件归属
@@ -1112,7 +1191,26 @@ class AnnotationEditorService:
        )
        file_record = file_result.scalar_one_or_none()
        if not file_record:
-            raise HTTPException(status_code=404, detail=f"文件不存在或不属于该项目: {file_id}")
+            raise HTTPException(
                status_code=404, detail=f"文件不存在或不属于该项目: {file_id}"
            )
        # 检查文件版本是否变化
        current_file_version = file_record.version
        existing_result = await self.db.execute(
            select(AnnotationResult).where(
                AnnotationResult.project_id == project_id,
                AnnotationResult.file_id == file_id,
            )
        )
        existing_annotation = existing_result.scalar_one_or_none()
        if existing_annotation and existing_annotation.file_version is not None:
            if existing_annotation.file_version != current_file_version:
                raise HTTPException(
                    status_code=409,
                    detail=f"文件已更新到新版本（当前版本: {current_file_version}, 标注版本: {existing_annotation.file_version}），请使用新版本",
                )
        annotation_payload = dict(request.annotation or {})
        result = annotation_payload.get("result")
@@ -1127,7 +1225,9 @@ class AnnotationEditorService:
        if request.segment_index is not None:
            segment_total_hint = self._resolve_segment_total(annotation_payload)
            if segment_total_hint is None:
-                segment_total_hint = await self._compute_segment_total(project, file_record, file_id)
+                segment_total_hint = await self._compute_segment_total(
                    project, file_record, file_id
                )
        existing_result = await self.db.execute(
            select(AnnotationResult)
@@ -1161,11 +1261,16 @@ class AnnotationEditorService:
            # 非分段模式：直接使用传入的 annotation
            annotation_payload["task"] = ls_task_id
            if not isinstance(annotation_payload.get("id"), int):
-                annotation_payload["id"] = self._make_ls_annotation_id(project_id, file_id)
+                annotation_payload["id"] = self._make_ls_annotation_id(
                    project_id, file_id
                )
            final_payload = annotation_payload
        requested_status = request.annotation_status
-        if requested_status is not None and requested_status not in ANNOTATION_STATUS_CLIENT_VALUES:
+        if (
            requested_status is not None
            and requested_status not in ANNOTATION_STATUS_CLIENT_VALUES
        ):
            raise HTTPException(status_code=400, detail="annotationStatus 不合法")
        segment_total = None
@@ -1194,7 +1299,10 @@ class AnnotationEditorService:
                elif requested_status == ANNOTATION_STATUS_NOT_APPLICABLE:
                    final_status = ANNOTATION_STATUS_NOT_APPLICABLE
                else:
-                    raise HTTPException(status_code=400, detail="未发现标注内容，请确认无标注/不适用后再保存")
+                    raise HTTPException(
                        status_code=400,
                        detail="未发现标注内容，请确认无标注/不适用后再保存",
                    )
        if request.segment_index is not None:
            segment_entries = self._extract_segment_annotations(final_payload)
@@ -1210,11 +1318,16 @@ class AnnotationEditorService:
        if existing:
            if request.expected_updated_at and existing.updated_at:
-                if existing.updated_at != request.expected_updated_at.replace(tzinfo=None):
+                if existing.updated_at != request.expected_updated_at.replace(
-                    raise HTTPException(status_code=409, detail="标注已被更新，请刷新后重试")
+                    tzinfo=None
                ):
                    raise HTTPException(
                        status_code=409, detail="标注已被更新，请刷新后重试"
                    )
            existing.annotation = final_payload  # type: ignore[assignment]
            existing.annotation_status = final_status  # type: ignore[assignment]
            existing.file_version = current_file_version  # type: ignore[assignment]
            existing.updated_at = now  # type: ignore[assignment]
            await self.db.commit()
            await self.db.refresh(existing)
@@ -1223,7 +1336,9 @@ class AnnotationEditorService:
                annotationId=existing.id,
                updatedAt=existing.updated_at or now,
            )
-            await self._sync_annotation_to_knowledge(project, file_record, final_payload, existing.updated_at)
+            await self._sync_annotation_to_knowledge(
                project, file_record, final_payload, existing.updated_at
            )
            return response
        new_id = str(uuid.uuid4())
@@ -1233,6 +1348,7 @@ class AnnotationEditorService:
            file_id=file_id,
            annotation=final_payload,
            annotation_status=final_status,
            file_version=current_file_version,
            created_at=now,
            updated_at=now,
        )
@@ -1244,7 +1360,9 @@ class AnnotationEditorService:
            annotationId=record.id,
            updatedAt=record.updated_at or now,
        )
-        await self._sync_annotation_to_knowledge(project, file_record, final_payload, record.updated_at)
+        await self._sync_annotation_to_knowledge(
            project, file_record, final_payload, record.updated_at
        )
        return response
    def _merge_segment_annotation(
@@ -1292,7 +1410,9 @@ class AnnotationEditorService:
        # 更新指定段落的标注
        segments[str(segment_index)] = {
            SEGMENT_RESULT_KEY: new_annotation.get(SEGMENT_RESULT_KEY, []),
-            SEGMENT_CREATED_AT_KEY: new_annotation.get(SEGMENT_CREATED_AT_KEY, datetime.utcnow().isoformat() + "Z"),
+            SEGMENT_CREATED_AT_KEY: new_annotation.get(
                SEGMENT_CREATED_AT_KEY, datetime.utcnow().isoformat() + "Z"
            ),
            SEGMENT_UPDATED_AT_KEY: datetime.utcnow().isoformat() + "Z",
        }
@@ -1317,9 +1437,7 @@ class AnnotationEditorService:
            logger.warning("标注同步知识管理失败：%s", exc)
    async def precompute_segmentation_for_project(
-        self,
+        self, project_id: str, max_retries: int = 3
        project_id: str,
        max_retries: int = 3
    ) -> Dict[str, Any]:
        """
        为指定项目的所有文本文件预计算切片结构并持久化到数据库
@@ -1332,7 +1450,9 @@ class AnnotationEditorService:
            统计信息：{total_files, succeeded, failed}
        """
        project = await self._get_project_or_404(project_id)
-        dataset_type = self._normalize_dataset_type(await self._get_dataset_type(project.dataset_id))
+        dataset_type = self._normalize_dataset_type(
            await self._get_dataset_type(project.dataset_id)
        )
        # 只处理文本数据集
        if dataset_type != DATASET_TYPE_TEXT:
@@ -1364,9 +1484,8 @@ class AnnotationEditorService:
        for file_record in file_records:
            file_type = str(getattr(file_record, "file_type", "") or "").lower()
            file_name = str(getattr(file_record, "file_name", "")).lower()
-            is_source_document = (
+            is_source_document = file_type in SOURCE_DOCUMENT_TYPES or any(
-                file_type in SOURCE_DOCUMENT_TYPES or
+                file_name.endswith(ext) for ext in SOURCE_DOCUMENT_EXTENSIONS
                any(file_name.endswith(ext) for ext in SOURCE_DOCUMENT_EXTENSIONS)
            )
            if not is_source_document:
                valid_files.append(file_record)
@@ -1385,7 +1504,9 @@ class AnnotationEditorService:
            for retry in range(max_retries):
                try:
                    # 读取文本内容
-                    text_content = await self._fetch_text_content_via_download_api(project.dataset_id, file_id)
+                    text_content = await self._fetch_text_content_via_download_api(
                        project.dataset_id, file_id
                    )
                    if not isinstance(text_content, str):
                        logger.warning(f"文件 {file_id} 内容不是字符串，跳过切片")
                        failed += 1
@@ -1404,7 +1525,9 @@ class AnnotationEditorService:
                        records = [(None, text_content)]
                    record_texts = [
-                        self._resolve_primary_text_value(payload, raw_text, primary_text_key)
+                        self._resolve_primary_text_value(
                            payload, raw_text, primary_text_key
                        )
                        for payload, raw_text in records
                    ]
                    if not record_texts:
@@ -1412,7 +1535,8 @@ class AnnotationEditorService:
                    # 判断是否需要分段
                    needs_segmentation = len(records) > 1 or any(
-                        len(text or "") > self.SEGMENT_THRESHOLD for text in record_texts
+                        len(text or "") > self.SEGMENT_THRESHOLD
                        for text in record_texts
                    )
                    if not needs_segmentation:
@@ -1425,7 +1549,9 @@ class AnnotationEditorService:
                    segment_cursor = 0
                    segments = {}
-                    for record_index, ((payload, raw_text), record_text) in enumerate(zip(records, record_texts)):
+                    for record_index, ((payload, raw_text), record_text) in enumerate(
                        zip(records, record_texts)
                    ):
                        normalized_text = record_text or ""
                        if len(normalized_text) > self.SEGMENT_THRESHOLD:
@@ -1433,15 +1559,19 @@ class AnnotationEditorService:
                            for chunk_index, seg in enumerate(raw_segments):
                                segments[str(segment_cursor)] = {
                                    SEGMENT_RESULT_KEY: [],
-                                    SEGMENT_CREATED_AT_KEY: datetime.utcnow().isoformat() + "Z",
+                                    SEGMENT_CREATED_AT_KEY: datetime.utcnow().isoformat()
-                                    SEGMENT_UPDATED_AT_KEY: datetime.utcnow().isoformat() + "Z",
+                                    + "Z",
                                    SEGMENT_UPDATED_AT_KEY: datetime.utcnow().isoformat()
                                    + "Z",
                                }
                                segment_cursor += 1
                        else:
                            segments[str(segment_cursor)] = {
                                SEGMENT_RESULT_KEY: [],
-                                SEGMENT_CREATED_AT_KEY: datetime.utcnow().isoformat() + "Z",
+                                SEGMENT_CREATED_AT_KEY: datetime.utcnow().isoformat()
-                                SEGMENT_UPDATED_AT_KEY: datetime.utcnow().isoformat() + "Z",
+                                + "Z",
                                SEGMENT_UPDATED_AT_KEY: datetime.utcnow().isoformat()
                                + "Z",
                            }
                            segment_cursor += 1
@@ -1508,3 +1638,145 @@ class AnnotationEditorService:
            "failed": failed,
        }
    async def check_file_version(self, project_id: str, file_id: str) -> Dict[str, Any]:
        """
        检查文件是否有新版本
        Args:
            project_id: 标注项目ID
            file_id: 文件ID
        Returns:
            包含文件版本信息的字典
        """
        project = await self._get_project_or_404(project_id)
        # 获取文件信息
        file_result = await self.db.execute(
            select(DatasetFiles)
            .join(LabelingProjectFile, LabelingProjectFile.file_id == DatasetFiles.id)
            .where(
                LabelingProjectFile.project_id == project.id,
                DatasetFiles.id == file_id,
                DatasetFiles.dataset_id == project.dataset_id,
            )
        )
        file_record = file_result.scalar_one_or_none()
        if not file_record:
            raise HTTPException(
                status_code=404, detail=f"文件不存在或不属于该项目: {file_id}"
            )
        # 获取标注信息
        annotation_result = await self.db.execute(
            select(AnnotationResult).where(
                AnnotationResult.project_id == project_id,
                AnnotationResult.file_id == file_id,
            )
        )
        annotation = annotation_result.scalar_one_or_none()
        current_file_version = file_record.version
        annotation_file_version = annotation.file_version if annotation else None
        if annotation is None:
            has_new_version = False
        elif annotation_file_version is None:
            has_new_version = True
        else:
            has_new_version = current_file_version > annotation_file_version
        return {
            "fileId": file_id,
            "currentFileVersion": current_file_version,
            "annotationFileVersion": annotation_file_version,
            "hasNewVersion": has_new_version,
        }
    async def use_new_version(self, project_id: str, file_id: str) -> Dict[str, Any]:
        """
        使用文件新版本并清空标注
        Args:
            project_id: 标注项目ID
            file_id: 文件ID
        Returns:
            操作结果
        """
        project = await self._get_project_or_404(project_id)
        # 获取文件信息
        file_result = await self.db.execute(
            select(DatasetFiles)
            .join(LabelingProjectFile, LabelingProjectFile.file_id == DatasetFiles.id)
            .where(
                LabelingProjectFile.project_id == project.id,
                DatasetFiles.id == file_id,
                DatasetFiles.dataset_id == project.dataset_id,
            )
        )
        file_record = file_result.scalar_one_or_none()
        if not file_record:
            raise HTTPException(
                status_code=404, detail=f"文件不存在或不属于该项目: {file_id}"
            )
        # 获取标注信息
        annotation_result = await self.db.execute(
            select(AnnotationResult)
            .where(
                AnnotationResult.project_id == project_id,
                AnnotationResult.file_id == file_id,
            )
            .with_for_update()
        )
        annotation = annotation_result.scalar_one_or_none()
        current_file_version = file_record.version
        if not annotation:
            raise HTTPException(status_code=404, detail=f"标注不存在: {file_id}")
        previous_file_version = annotation.file_version
        if annotation.file_version is not None:
            if current_file_version <= annotation.file_version:
                raise HTTPException(
                    status_code=400,
                    detail=f"文件版本（{current_file_version}）未更新或低于标注版本（{annotation.file_version}）",
                )
        # 清空标注并更新版本号
        now = datetime.utcnow()
        if isinstance(annotation.annotation, dict):
            if annotation.annotation.get(SEGMENTED_KEY):
                segments = annotation.annotation.get(SEGMENTS_KEY, {})
                for segment_id, segment_data in segments.items():
                    if isinstance(segment_data, dict):
                        segment_data[SEGMENT_RESULT_KEY] = []
                annotation.annotation = {
                    SEGMENTED_KEY: True,
                    "version": annotation.annotation.get("version", 1),
                    SEGMENTS_KEY: segments,
                    "total_segments": annotation.annotation.get(
                        "total_segments", len(segments)
                    ),
                }
            else:
                annotation.annotation = {}
        else:
            annotation.annotation = {}
        annotation.annotation_status = ANNOTATION_STATUS_NO_ANNOTATION
        annotation.file_version = current_file_version
        annotation.updated_at = now
        await self.db.commit()
        await self.db.refresh(annotation)
        return {
            "fileId": file_id,
            "previousFileVersion": previous_file_version,
            "currentFileVersion": current_file_version,
            "message": "已使用新版本并清空标注",
        }