diff --git a/frontend/src/pages/DataAnnotation/Home/DataAnnotation.tsx b/frontend/src/pages/DataAnnotation/Home/DataAnnotation.tsx index 9331708..a82e888 100644 --- a/frontend/src/pages/DataAnnotation/Home/DataAnnotation.tsx +++ b/frontend/src/pages/DataAnnotation/Home/DataAnnotation.tsx @@ -160,6 +160,30 @@ export default function DataAnnotation() { key: "datasetName", width: 180, }, + { + title: "数据量", + dataIndex: "totalCount", + key: "totalCount", + width: 100, + align: "center" as const, + }, + { + title: "已标注", + dataIndex: "annotatedCount", + key: "annotatedCount", + width: 100, + align: "center" as const, + render: (value: number, record: any) => { + const total = record.totalCount || 0; + const annotated = value || 0; + const percent = total > 0 ? Math.round((annotated / total) * 100) : 0; + return ( + + {annotated} + + ); + }, + }, { title: "创建时间", dataIndex: "createdAt", diff --git a/frontend/src/pages/DataAnnotation/annotation.const.tsx b/frontend/src/pages/DataAnnotation/annotation.const.tsx index 9cbb7d2..7310547 100644 --- a/frontend/src/pages/DataAnnotation/annotation.const.tsx +++ b/frontend/src/pages/DataAnnotation/annotation.const.tsx @@ -48,6 +48,8 @@ export function mapAnnotationTask(task: any) { name: task.name, description: task.description || "", datasetName: task.datasetName || task.dataset_name || "-", + totalCount: task.totalCount ?? task.total_count ?? 0, + annotatedCount: task.annotatedCount ?? task.annotated_count ?? 0, createdAt: task.createdAt || task.created_at || "-", updatedAt: task.updatedAt || task.updated_at || "-", icon: , diff --git a/runtime/datamate-python/app/module/annotation/schema/mapping.py b/runtime/datamate-python/app/module/annotation/schema/mapping.py index 87e3a9b..8a221f5 100644 --- a/runtime/datamate-python/app/module/annotation/schema/mapping.py +++ b/runtime/datamate-python/app/module/annotation/schema/mapping.py @@ -49,6 +49,8 @@ class DatasetMappingResponse(BaseModel): template_id: Optional[str] = Field(None, alias="templateId", description="关联的模板ID") template: Optional['AnnotationTemplateResponse'] = Field(None, description="关联的标注模板详情") label_config: Optional[str] = Field(None, alias="labelConfig", description="实际使用的 Label Studio XML 配置") + total_count: int = Field(0, alias="totalCount", description="数据集总数据量") + annotated_count: int = Field(0, alias="annotatedCount", description="已标注数据量") created_at: datetime = Field(..., alias="createdAt", description="创建时间") updated_at: Optional[datetime] = Field(None, alias="updatedAt", description="更新时间") deleted_at: Optional[datetime] = Field(None, alias="deletedAt", description="删除时间") diff --git a/runtime/datamate-python/app/module/annotation/service/mapping.py b/runtime/datamate-python/app/module/annotation/service/mapping.py index a1aae34..5884ad6 100644 --- a/runtime/datamate-python/app/module/annotation/service/mapping.py +++ b/runtime/datamate-python/app/module/annotation/service/mapping.py @@ -7,11 +7,11 @@ from datetime import datetime import uuid from app.core.logging import get_logger -from app.db.models import LabelingProject, AnnotationTemplate -from app.db.models.dataset_management import Dataset +from app.db.models import LabelingProject, AnnotationTemplate, AnnotationResult +from app.db.models.dataset_management import Dataset, DatasetFiles from app.module.annotation.schema import ( - DatasetMappingCreateRequest, - DatasetMappingUpdateRequest, + DatasetMappingCreateRequest, + DatasetMappingUpdateRequest, DatasetMappingResponse, AnnotationTemplateResponse ) @@ -33,6 +33,40 @@ class DatasetMappingService: Dataset, LabelingProject.dataset_id == Dataset.id ) + + async def _get_project_stats( + self, + project_id: str, + dataset_id: str + ) -> Tuple[int, int]: + """ + 获取项目的统计数据 + + Args: + project_id: 标注项目ID + dataset_id: 数据集ID + + Returns: + (total_count, annotated_count) 元组 + """ + # 获取数据集总数据量(只统计 ACTIVE 状态的文件) + total_result = await self.db.execute( + select(func.count()).select_from(DatasetFiles).where( + DatasetFiles.dataset_id == dataset_id, + DatasetFiles.status == "ACTIVE", + ) + ) + total_count = int(total_result.scalar() or 0) + + # 获取已标注数据量(统计不同的 file_id 数量) + annotated_result = await self.db.execute( + select(func.count(func.distinct(AnnotationResult.file_id))).where( + AnnotationResult.project_id == project_id + ) + ) + annotated_count = int(annotated_result.scalar() or 0) + + return total_count, annotated_count async def _to_response_from_row( self, @@ -68,6 +102,11 @@ class DatasetMappingService: template_response = await template_service.get_template(self.db, template_id) logger.debug(f"Included template details for template_id: {template_id}") + # 获取统计数据 + total_count, annotated_count = await self._get_project_stats( + mapping.id, mapping.dataset_id + ) + response_data = { "id": mapping.id, "dataset_id": mapping.dataset_id, @@ -78,6 +117,8 @@ class DatasetMappingService: "template_id": template_id, "template": template_response, "label_config": label_config, + "total_count": total_count, + "annotated_count": annotated_count, "created_at": mapping.created_at, "updated_at": mapping.updated_at, "deleted_at": mapping.deleted_at, @@ -125,6 +166,13 @@ class DatasetMappingService: template_response = await template_service.get_template(self.db, template_id) logger.debug(f"Included template details for template_id: {template_id}") + # 获取统计数据 + total_count, annotated_count = 0, 0 + if dataset_id: + total_count, annotated_count = await self._get_project_stats( + mapping.id, dataset_id + ) + # Create response dict with all fields response_data = { "id": mapping.id, @@ -136,6 +184,8 @@ class DatasetMappingService: "template_id": template_id, "template": template_response, "label_config": label_config, + "total_count": total_count, + "annotated_count": annotated_count, "created_at": mapping.created_at, "updated_at": mapping.updated_at, "deleted_at": mapping.deleted_at,