fix(annotation): 修复导出统计功能中的文件状态过滤问题

- 在获取总文件数时添加 ACTIVE 状态过滤条件 - 修改已标注文件数统计逻辑，使用 distinct(file_id) 进行计数 - 在导出功能中为所有文件查询添加 ACTIVE 状态过滤 - 增加日志记录以跟踪导出统计过程 - 修正
2026-01-18 17:35:40 +08:00
parent c48d2fdeb8
commit 0c97648a9e
1 changed files with 13 additions and 6 deletions
--- a/runtime/datamate-python/app/module/annotation/service/export.py
+++ b/runtime/datamate-python/app/module/annotation/service/export.py
@@ -47,22 +47,26 @@ class AnnotationExportService:
    async def get_export_stats(self, project_id: str) -> ExportAnnotationsResponse:
        """获取导出统计信息"""
        project = await self._get_project_or_404(project_id)
+        logger.info(f"Export stats for project: id={project_id}, dataset_id={project.dataset_id}, name={project.name}")

-        # 获取总文件数
+        # 获取总文件数（只统计 ACTIVE 状态的文件）
        total_result = await self.db.execute(
            select(func.count()).select_from(DatasetFiles).where(
-                DatasetFiles.dataset_id == project.dataset_id
+                DatasetFiles.dataset_id == project.dataset_id,
+                DatasetFiles.status == "ACTIVE",
            )
        )
        total_files = int(total_result.scalar() or 0)
+        logger.info(f"Total files (ACTIVE): {total_files} for dataset_id={project.dataset_id}")

-        # 获取已标注文件数
+        # 获取已标注文件数（统计不同的 file_id 数量）
        annotated_result = await self.db.execute(
-            select(func.count(AnnotationResult.id.distinct())).where(
+            select(func.count(func.distinct(AnnotationResult.file_id))).where(
                AnnotationResult.project_id == project_id
            )
        )
        annotated_files = int(annotated_result.scalar() or 0)
+        logger.info(f"Annotated files: {annotated_files} for project_id={project_id}")

        return ExportAnnotationsResponse(
            project_id=project_id,
@@ -154,9 +158,12 @@ class AnnotationExportService:
                    )
                )
        else:
-            # 获取所有文件，包括未标注的
+            # 获取所有文件，包括未标注的（只获取 ACTIVE 状态的文件）
            files_result = await self.db.execute(
-                select(DatasetFiles).where(DatasetFiles.dataset_id == dataset_id)
+                select(DatasetFiles).where(
+                    DatasetFiles.dataset_id == dataset_id,
+                    DatasetFiles.status == "ACTIVE",
+                )
            )
            files = files_result.scalars().all()