You've already forked DataMate
feat(annotation): 添加标注任务的数据量统计功能
- 在前端表格中新增数据量和已标注列显示 - 添加标注完成百分比计算和提示功能 - 在后端schema中增加totalCount和annotatedCount字段 - 实现项目统计数据查询服务方法 - 集成前后端数据映射和接口响应更新
This commit is contained in:
@@ -160,6 +160,30 @@ export default function DataAnnotation() {
|
||||
key: "datasetName",
|
||||
width: 180,
|
||||
},
|
||||
{
|
||||
title: "数据量",
|
||||
dataIndex: "totalCount",
|
||||
key: "totalCount",
|
||||
width: 100,
|
||||
align: "center" as const,
|
||||
},
|
||||
{
|
||||
title: "已标注",
|
||||
dataIndex: "annotatedCount",
|
||||
key: "annotatedCount",
|
||||
width: 100,
|
||||
align: "center" as const,
|
||||
render: (value: number, record: any) => {
|
||||
const total = record.totalCount || 0;
|
||||
const annotated = value || 0;
|
||||
const percent = total > 0 ? Math.round((annotated / total) * 100) : 0;
|
||||
return (
|
||||
<span title={`${annotated}/${total} (${percent}%)`}>
|
||||
{annotated}
|
||||
</span>
|
||||
);
|
||||
},
|
||||
},
|
||||
{
|
||||
title: "创建时间",
|
||||
dataIndex: "createdAt",
|
||||
|
||||
@@ -48,6 +48,8 @@ export function mapAnnotationTask(task: any) {
|
||||
name: task.name,
|
||||
description: task.description || "",
|
||||
datasetName: task.datasetName || task.dataset_name || "-",
|
||||
totalCount: task.totalCount ?? task.total_count ?? 0,
|
||||
annotatedCount: task.annotatedCount ?? task.annotated_count ?? 0,
|
||||
createdAt: task.createdAt || task.created_at || "-",
|
||||
updatedAt: task.updatedAt || task.updated_at || "-",
|
||||
icon: <StickyNote />,
|
||||
|
||||
@@ -49,6 +49,8 @@ class DatasetMappingResponse(BaseModel):
|
||||
template_id: Optional[str] = Field(None, alias="templateId", description="关联的模板ID")
|
||||
template: Optional['AnnotationTemplateResponse'] = Field(None, description="关联的标注模板详情")
|
||||
label_config: Optional[str] = Field(None, alias="labelConfig", description="实际使用的 Label Studio XML 配置")
|
||||
total_count: int = Field(0, alias="totalCount", description="数据集总数据量")
|
||||
annotated_count: int = Field(0, alias="annotatedCount", description="已标注数据量")
|
||||
created_at: datetime = Field(..., alias="createdAt", description="创建时间")
|
||||
updated_at: Optional[datetime] = Field(None, alias="updatedAt", description="更新时间")
|
||||
deleted_at: Optional[datetime] = Field(None, alias="deletedAt", description="删除时间")
|
||||
|
||||
@@ -7,11 +7,11 @@ from datetime import datetime
|
||||
import uuid
|
||||
|
||||
from app.core.logging import get_logger
|
||||
from app.db.models import LabelingProject, AnnotationTemplate
|
||||
from app.db.models.dataset_management import Dataset
|
||||
from app.db.models import LabelingProject, AnnotationTemplate, AnnotationResult
|
||||
from app.db.models.dataset_management import Dataset, DatasetFiles
|
||||
from app.module.annotation.schema import (
|
||||
DatasetMappingCreateRequest,
|
||||
DatasetMappingUpdateRequest,
|
||||
DatasetMappingCreateRequest,
|
||||
DatasetMappingUpdateRequest,
|
||||
DatasetMappingResponse,
|
||||
AnnotationTemplateResponse
|
||||
)
|
||||
@@ -33,6 +33,40 @@ class DatasetMappingService:
|
||||
Dataset,
|
||||
LabelingProject.dataset_id == Dataset.id
|
||||
)
|
||||
|
||||
async def _get_project_stats(
|
||||
self,
|
||||
project_id: str,
|
||||
dataset_id: str
|
||||
) -> Tuple[int, int]:
|
||||
"""
|
||||
获取项目的统计数据
|
||||
|
||||
Args:
|
||||
project_id: 标注项目ID
|
||||
dataset_id: 数据集ID
|
||||
|
||||
Returns:
|
||||
(total_count, annotated_count) 元组
|
||||
"""
|
||||
# 获取数据集总数据量(只统计 ACTIVE 状态的文件)
|
||||
total_result = await self.db.execute(
|
||||
select(func.count()).select_from(DatasetFiles).where(
|
||||
DatasetFiles.dataset_id == dataset_id,
|
||||
DatasetFiles.status == "ACTIVE",
|
||||
)
|
||||
)
|
||||
total_count = int(total_result.scalar() or 0)
|
||||
|
||||
# 获取已标注数据量(统计不同的 file_id 数量)
|
||||
annotated_result = await self.db.execute(
|
||||
select(func.count(func.distinct(AnnotationResult.file_id))).where(
|
||||
AnnotationResult.project_id == project_id
|
||||
)
|
||||
)
|
||||
annotated_count = int(annotated_result.scalar() or 0)
|
||||
|
||||
return total_count, annotated_count
|
||||
|
||||
async def _to_response_from_row(
|
||||
self,
|
||||
@@ -68,6 +102,11 @@ class DatasetMappingService:
|
||||
template_response = await template_service.get_template(self.db, template_id)
|
||||
logger.debug(f"Included template details for template_id: {template_id}")
|
||||
|
||||
# 获取统计数据
|
||||
total_count, annotated_count = await self._get_project_stats(
|
||||
mapping.id, mapping.dataset_id
|
||||
)
|
||||
|
||||
response_data = {
|
||||
"id": mapping.id,
|
||||
"dataset_id": mapping.dataset_id,
|
||||
@@ -78,6 +117,8 @@ class DatasetMappingService:
|
||||
"template_id": template_id,
|
||||
"template": template_response,
|
||||
"label_config": label_config,
|
||||
"total_count": total_count,
|
||||
"annotated_count": annotated_count,
|
||||
"created_at": mapping.created_at,
|
||||
"updated_at": mapping.updated_at,
|
||||
"deleted_at": mapping.deleted_at,
|
||||
@@ -125,6 +166,13 @@ class DatasetMappingService:
|
||||
template_response = await template_service.get_template(self.db, template_id)
|
||||
logger.debug(f"Included template details for template_id: {template_id}")
|
||||
|
||||
# 获取统计数据
|
||||
total_count, annotated_count = 0, 0
|
||||
if dataset_id:
|
||||
total_count, annotated_count = await self._get_project_stats(
|
||||
mapping.id, dataset_id
|
||||
)
|
||||
|
||||
# Create response dict with all fields
|
||||
response_data = {
|
||||
"id": mapping.id,
|
||||
@@ -136,6 +184,8 @@ class DatasetMappingService:
|
||||
"template_id": template_id,
|
||||
"template": template_response,
|
||||
"label_config": label_config,
|
||||
"total_count": total_count,
|
||||
"annotated_count": annotated_count,
|
||||
"created_at": mapping.created_at,
|
||||
"updated_at": mapping.updated_at,
|
||||
"deleted_at": mapping.deleted_at,
|
||||
|
||||
Reference in New Issue
Block a user