feat(annotation): 添加标注项目文件快照功能

- 新增 LabelingProjectFile 模型用于存储标注项目的文件快照 - 在创建标注项目时记录关联的文件快照数据 - 更新查询逻辑以基于项目快照过滤文件列表 - 优化导出统计功能使用快照数据进行计算 - 添加数据库表结构支持项目文件快照关系
2026-01-30 18:10:13 +08:00
parent 3c3ca130b3
commit 8b2a19f09a
7 changed files with 145 additions and 33 deletions
--- a/runtime/datamate-python/app/module/annotation/service/export.py
+++ b/runtime/datamate-python/app/module/annotation/service/export.py
@@ -25,7 +25,7 @@ from sqlalchemy import func, select
 from sqlalchemy.ext.asyncio import AsyncSession

 from app.core.logging import get_logger
-from app.db.models import AnnotationResult, Dataset, DatasetFiles, LabelingProject
+from app.db.models import AnnotationResult, Dataset, DatasetFiles, LabelingProject, LabelingProjectFile


 async def _read_file_content(file_path: str, max_size: int = 10 * 1024 * 1024) -> Optional[str]:
@@ -75,15 +75,18 @@ class AnnotationExportService:
        project = await self._get_project_or_404(project_id)
        logger.info(f"Export stats for project: id={project_id}, dataset_id={project.dataset_id}, name={project.name}")

-        # 获取总文件数（只统计 ACTIVE 状态的文件）
+        # 获取总文件数（标注项目快照内的文件）
        total_result = await self.db.execute(
-            select(func.count()).select_from(DatasetFiles).where(
+            select(func.count())
+            .select_from(LabelingProjectFile)
+            .join(DatasetFiles, LabelingProjectFile.file_id == DatasetFiles.id)
+            .where(
+                LabelingProjectFile.project_id == project_id,
                DatasetFiles.dataset_id == project.dataset_id,
-                DatasetFiles.status == "ACTIVE",
            )
        )
        total_files = int(total_result.scalar() or 0)
-        logger.info(f"Total files (ACTIVE): {total_files} for dataset_id={project.dataset_id}")
+        logger.info(f"Total files (snapshot): {total_files} for project_id={project_id}")

        # 获取已标注文件数（统计不同的 file_id 数量）
        annotated_result = await self.db.execute(
@@ -165,8 +168,13 @@ class AnnotationExportService:
            # 只获取已标注的数据
            result = await self.db.execute(
                select(AnnotationResult, DatasetFiles)
+                .join(LabelingProjectFile, LabelingProjectFile.file_id == AnnotationResult.file_id)
                .join(DatasetFiles, AnnotationResult.file_id == DatasetFiles.id)
-                .where(AnnotationResult.project_id == project_id)
+                .where(
+                    AnnotationResult.project_id == project_id,
+                    LabelingProjectFile.project_id == project_id,
+                    DatasetFiles.dataset_id == dataset_id,
+                )
                .order_by(AnnotationResult.updated_at.desc())
            )
            rows = result.all()
@@ -190,11 +198,13 @@ class AnnotationExportService:
                    )
                )
        else:
-            # 获取所有文件，包括未标注的（只获取 ACTIVE 状态的文件）
+            # 获取所有文件（基于标注项目快照）
            files_result = await self.db.execute(
-                select(DatasetFiles).where(
+                select(DatasetFiles)
+                .join(LabelingProjectFile, LabelingProjectFile.file_id == DatasetFiles.id)
+                .where(
+                    LabelingProjectFile.project_id == project_id,
                    DatasetFiles.dataset_id == dataset_id,
-                    DatasetFiles.status == "ACTIVE",
                )
            )
            files = files_result.scalars().all()