diff --git a/runtime/datamate-python/app/module/annotation/service/export.py b/runtime/datamate-python/app/module/annotation/service/export.py index 8fa268b..ac27716 100644 --- a/runtime/datamate-python/app/module/annotation/service/export.py +++ b/runtime/datamate-python/app/module/annotation/service/export.py @@ -47,22 +47,26 @@ class AnnotationExportService: async def get_export_stats(self, project_id: str) -> ExportAnnotationsResponse: """获取导出统计信息""" project = await self._get_project_or_404(project_id) + logger.info(f"Export stats for project: id={project_id}, dataset_id={project.dataset_id}, name={project.name}") - # 获取总文件数 + # 获取总文件数(只统计 ACTIVE 状态的文件) total_result = await self.db.execute( select(func.count()).select_from(DatasetFiles).where( - DatasetFiles.dataset_id == project.dataset_id + DatasetFiles.dataset_id == project.dataset_id, + DatasetFiles.status == "ACTIVE", ) ) total_files = int(total_result.scalar() or 0) + logger.info(f"Total files (ACTIVE): {total_files} for dataset_id={project.dataset_id}") - # 获取已标注文件数 + # 获取已标注文件数(统计不同的 file_id 数量) annotated_result = await self.db.execute( - select(func.count(AnnotationResult.id.distinct())).where( + select(func.count(func.distinct(AnnotationResult.file_id))).where( AnnotationResult.project_id == project_id ) ) annotated_files = int(annotated_result.scalar() or 0) + logger.info(f"Annotated files: {annotated_files} for project_id={project_id}") return ExportAnnotationsResponse( project_id=project_id, @@ -154,9 +158,12 @@ class AnnotationExportService: ) ) else: - # 获取所有文件,包括未标注的 + # 获取所有文件,包括未标注的(只获取 ACTIVE 状态的文件) files_result = await self.db.execute( - select(DatasetFiles).where(DatasetFiles.dataset_id == dataset_id) + select(DatasetFiles).where( + DatasetFiles.dataset_id == dataset_id, + DatasetFiles.status == "ACTIVE", + ) ) files = files_result.scalars().all()