feat(annotation): 添加标注任务的数据量统计功能

- 在前端表格中新增数据量和已标注列显示
- 添加标注完成百分比计算和提示功能
- 在后端schema中增加totalCount和annotatedCount字段
- 实现项目统计数据查询服务方法
- 集成前后端数据映射和接口响应更新
This commit is contained in:
2026-01-19 22:43:41 +08:00
parent 649ab2f6bb
commit cc0a977349
4 changed files with 82 additions and 4 deletions

View File

@@ -7,11 +7,11 @@ from datetime import datetime
import uuid
from app.core.logging import get_logger
from app.db.models import LabelingProject, AnnotationTemplate
from app.db.models.dataset_management import Dataset
from app.db.models import LabelingProject, AnnotationTemplate, AnnotationResult
from app.db.models.dataset_management import Dataset, DatasetFiles
from app.module.annotation.schema import (
DatasetMappingCreateRequest,
DatasetMappingUpdateRequest,
DatasetMappingCreateRequest,
DatasetMappingUpdateRequest,
DatasetMappingResponse,
AnnotationTemplateResponse
)
@@ -33,6 +33,40 @@ class DatasetMappingService:
Dataset,
LabelingProject.dataset_id == Dataset.id
)
async def _get_project_stats(
self,
project_id: str,
dataset_id: str
) -> Tuple[int, int]:
"""
获取项目的统计数据
Args:
project_id: 标注项目ID
dataset_id: 数据集ID
Returns:
(total_count, annotated_count) 元组
"""
# 获取数据集总数据量(只统计 ACTIVE 状态的文件)
total_result = await self.db.execute(
select(func.count()).select_from(DatasetFiles).where(
DatasetFiles.dataset_id == dataset_id,
DatasetFiles.status == "ACTIVE",
)
)
total_count = int(total_result.scalar() or 0)
# 获取已标注数据量(统计不同的 file_id 数量)
annotated_result = await self.db.execute(
select(func.count(func.distinct(AnnotationResult.file_id))).where(
AnnotationResult.project_id == project_id
)
)
annotated_count = int(annotated_result.scalar() or 0)
return total_count, annotated_count
async def _to_response_from_row(
self,
@@ -68,6 +102,11 @@ class DatasetMappingService:
template_response = await template_service.get_template(self.db, template_id)
logger.debug(f"Included template details for template_id: {template_id}")
# 获取统计数据
total_count, annotated_count = await self._get_project_stats(
mapping.id, mapping.dataset_id
)
response_data = {
"id": mapping.id,
"dataset_id": mapping.dataset_id,
@@ -78,6 +117,8 @@ class DatasetMappingService:
"template_id": template_id,
"template": template_response,
"label_config": label_config,
"total_count": total_count,
"annotated_count": annotated_count,
"created_at": mapping.created_at,
"updated_at": mapping.updated_at,
"deleted_at": mapping.deleted_at,
@@ -125,6 +166,13 @@ class DatasetMappingService:
template_response = await template_service.get_template(self.db, template_id)
logger.debug(f"Included template details for template_id: {template_id}")
# 获取统计数据
total_count, annotated_count = 0, 0
if dataset_id:
total_count, annotated_count = await self._get_project_stats(
mapping.id, dataset_id
)
# Create response dict with all fields
response_data = {
"id": mapping.id,
@@ -136,6 +184,8 @@ class DatasetMappingService:
"template_id": template_id,
"template": template_response,
"label_config": label_config,
"total_count": total_count,
"annotated_count": annotated_count,
"created_at": mapping.created_at,
"updated_at": mapping.updated_at,
"deleted_at": mapping.deleted_at,