You've already forked DataMate
feat(annotation): 添加标注项目文件快照功能
- 新增 LabelingProjectFile 模型用于存储标注项目的文件快照 - 在创建标注项目时记录关联的文件快照数据 - 更新查询逻辑以基于项目快照过滤文件列表 - 优化导出统计功能使用快照数据进行计算 - 添加数据库表结构支持项目文件快照关系
This commit is contained in:
@@ -25,7 +25,7 @@ from sqlalchemy import func, select
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from app.core.logging import get_logger
|
||||
from app.db.models import AnnotationResult, Dataset, DatasetFiles, LabelingProject
|
||||
from app.db.models import AnnotationResult, Dataset, DatasetFiles, LabelingProject, LabelingProjectFile
|
||||
|
||||
|
||||
async def _read_file_content(file_path: str, max_size: int = 10 * 1024 * 1024) -> Optional[str]:
|
||||
@@ -75,15 +75,18 @@ class AnnotationExportService:
|
||||
project = await self._get_project_or_404(project_id)
|
||||
logger.info(f"Export stats for project: id={project_id}, dataset_id={project.dataset_id}, name={project.name}")
|
||||
|
||||
# 获取总文件数(只统计 ACTIVE 状态的文件)
|
||||
# 获取总文件数(标注项目快照内的文件)
|
||||
total_result = await self.db.execute(
|
||||
select(func.count()).select_from(DatasetFiles).where(
|
||||
select(func.count())
|
||||
.select_from(LabelingProjectFile)
|
||||
.join(DatasetFiles, LabelingProjectFile.file_id == DatasetFiles.id)
|
||||
.where(
|
||||
LabelingProjectFile.project_id == project_id,
|
||||
DatasetFiles.dataset_id == project.dataset_id,
|
||||
DatasetFiles.status == "ACTIVE",
|
||||
)
|
||||
)
|
||||
total_files = int(total_result.scalar() or 0)
|
||||
logger.info(f"Total files (ACTIVE): {total_files} for dataset_id={project.dataset_id}")
|
||||
logger.info(f"Total files (snapshot): {total_files} for project_id={project_id}")
|
||||
|
||||
# 获取已标注文件数(统计不同的 file_id 数量)
|
||||
annotated_result = await self.db.execute(
|
||||
@@ -165,8 +168,13 @@ class AnnotationExportService:
|
||||
# 只获取已标注的数据
|
||||
result = await self.db.execute(
|
||||
select(AnnotationResult, DatasetFiles)
|
||||
.join(LabelingProjectFile, LabelingProjectFile.file_id == AnnotationResult.file_id)
|
||||
.join(DatasetFiles, AnnotationResult.file_id == DatasetFiles.id)
|
||||
.where(AnnotationResult.project_id == project_id)
|
||||
.where(
|
||||
AnnotationResult.project_id == project_id,
|
||||
LabelingProjectFile.project_id == project_id,
|
||||
DatasetFiles.dataset_id == dataset_id,
|
||||
)
|
||||
.order_by(AnnotationResult.updated_at.desc())
|
||||
)
|
||||
rows = result.all()
|
||||
@@ -190,11 +198,13 @@ class AnnotationExportService:
|
||||
)
|
||||
)
|
||||
else:
|
||||
# 获取所有文件,包括未标注的(只获取 ACTIVE 状态的文件)
|
||||
# 获取所有文件(基于标注项目快照)
|
||||
files_result = await self.db.execute(
|
||||
select(DatasetFiles).where(
|
||||
select(DatasetFiles)
|
||||
.join(LabelingProjectFile, LabelingProjectFile.file_id == DatasetFiles.id)
|
||||
.where(
|
||||
LabelingProjectFile.project_id == project_id,
|
||||
DatasetFiles.dataset_id == dataset_id,
|
||||
DatasetFiles.status == "ACTIVE",
|
||||
)
|
||||
)
|
||||
files = files_result.scalars().all()
|
||||
|
||||
Reference in New Issue
Block a user