feat(annotation): 添加标注任务的数据量统计功能

- 在前端表格中新增数据量和已标注列显示 - 添加标注完成百分比计算和提示功能 - 在后端schema中增加totalCount和annotatedCount字段 - 实现项目统计数据查询服务方法 - 集成前后端数据映射和接口响应更新
2026-01-19 22:43:41 +08:00
parent 649ab2f6bb
commit cc0a977349
4 changed files with 82 additions and 4 deletions
--- a/runtime/datamate-python/app/module/annotation/service/mapping.py
+++ b/runtime/datamate-python/app/module/annotation/service/mapping.py
@@ -7,11 +7,11 @@ from datetime import datetime
 import uuid

 from app.core.logging import get_logger
-from app.db.models import LabelingProject, AnnotationTemplate
-from app.db.models.dataset_management import Dataset
+from app.db.models import LabelingProject, AnnotationTemplate, AnnotationResult
+from app.db.models.dataset_management import Dataset, DatasetFiles
 from app.module.annotation.schema import (
-    DatasetMappingCreateRequest, 
-    DatasetMappingUpdateRequest, 
+    DatasetMappingCreateRequest,
+    DatasetMappingUpdateRequest,
    DatasetMappingResponse,
    AnnotationTemplateResponse
 )
@@ -33,6 +33,40 @@ class DatasetMappingService:
            Dataset,
            LabelingProject.dataset_id == Dataset.id
        )
+
+    async def _get_project_stats(
+        self,
+        project_id: str,
+        dataset_id: str
+    ) -> Tuple[int, int]:
+        """
+        获取项目的统计数据
+
+        Args:
+            project_id: 标注项目ID
+            dataset_id: 数据集ID
+
+        Returns:
+            (total_count, annotated_count) 元组
+        """
+        # 获取数据集总数据量（只统计 ACTIVE 状态的文件）
+        total_result = await self.db.execute(
+            select(func.count()).select_from(DatasetFiles).where(
+                DatasetFiles.dataset_id == dataset_id,
+                DatasetFiles.status == "ACTIVE",
+            )
+        )
+        total_count = int(total_result.scalar() or 0)
+
+        # 获取已标注数据量（统计不同的 file_id 数量）
+        annotated_result = await self.db.execute(
+            select(func.count(func.distinct(AnnotationResult.file_id))).where(
+                AnnotationResult.project_id == project_id
+            )
+        )
+        annotated_count = int(annotated_result.scalar() or 0)
+
+        return total_count, annotated_count
    
    async def _to_response_from_row(
        self,
@@ -68,6 +102,11 @@ class DatasetMappingService:
            template_response = await template_service.get_template(self.db, template_id)
            logger.debug(f"Included template details for template_id: {template_id}")

+        # 获取统计数据
+        total_count, annotated_count = await self._get_project_stats(
+            mapping.id, mapping.dataset_id
+        )
+
        response_data = {
            "id": mapping.id,
            "dataset_id": mapping.dataset_id,
@@ -78,6 +117,8 @@ class DatasetMappingService:
            "template_id": template_id,
            "template": template_response,
            "label_config": label_config,
+            "total_count": total_count,
+            "annotated_count": annotated_count,
            "created_at": mapping.created_at,
            "updated_at": mapping.updated_at,
            "deleted_at": mapping.deleted_at,
@@ -125,6 +166,13 @@ class DatasetMappingService:
            template_response = await template_service.get_template(self.db, template_id)
            logger.debug(f"Included template details for template_id: {template_id}")

+        # 获取统计数据
+        total_count, annotated_count = 0, 0
+        if dataset_id:
+            total_count, annotated_count = await self._get_project_stats(
+                mapping.id, dataset_id
+            )
+
        # Create response dict with all fields
        response_data = {
            "id": mapping.id,
@@ -136,6 +184,8 @@ class DatasetMappingService:
            "template_id": template_id,
            "template": template_response,
            "label_config": label_config,
+            "total_count": total_count,
+            "annotated_count": annotated_count,
            "created_at": mapping.created_at,
            "updated_at": mapping.updated_at,
            "deleted_at": mapping.deleted_at,