feat(annotation): 添加标注项目文件快照功能

- 新增 LabelingProjectFile 模型用于存储标注项目的文件快照
- 在创建标注项目时记录关联的文件快照数据
- 更新查询逻辑以基于项目快照过滤文件列表
- 优化导出统计功能使用快照数据进行计算
- 添加数据库表结构支持项目文件快照关系
This commit is contained in:
2026-01-30 18:10:13 +08:00
parent 3c3ca130b3
commit 8b2a19f09a
7 changed files with 145 additions and 33 deletions

View File

@@ -25,7 +25,7 @@ from sqlalchemy import func, select
from sqlalchemy.ext.asyncio import AsyncSession
from app.core.logging import get_logger
from app.db.models import AnnotationResult, Dataset, DatasetFiles, LabelingProject
from app.db.models import AnnotationResult, Dataset, DatasetFiles, LabelingProject, LabelingProjectFile
async def _read_file_content(file_path: str, max_size: int = 10 * 1024 * 1024) -> Optional[str]:
@@ -75,15 +75,18 @@ class AnnotationExportService:
project = await self._get_project_or_404(project_id)
logger.info(f"Export stats for project: id={project_id}, dataset_id={project.dataset_id}, name={project.name}")
# 获取总文件数(只统计 ACTIVE 状态的文件)
# 获取总文件数(标注项目快照内的文件)
total_result = await self.db.execute(
select(func.count()).select_from(DatasetFiles).where(
select(func.count())
.select_from(LabelingProjectFile)
.join(DatasetFiles, LabelingProjectFile.file_id == DatasetFiles.id)
.where(
LabelingProjectFile.project_id == project_id,
DatasetFiles.dataset_id == project.dataset_id,
DatasetFiles.status == "ACTIVE",
)
)
total_files = int(total_result.scalar() or 0)
logger.info(f"Total files (ACTIVE): {total_files} for dataset_id={project.dataset_id}")
logger.info(f"Total files (snapshot): {total_files} for project_id={project_id}")
# 获取已标注文件数(统计不同的 file_id 数量)
annotated_result = await self.db.execute(
@@ -165,8 +168,13 @@ class AnnotationExportService:
# 只获取已标注的数据
result = await self.db.execute(
select(AnnotationResult, DatasetFiles)
.join(LabelingProjectFile, LabelingProjectFile.file_id == AnnotationResult.file_id)
.join(DatasetFiles, AnnotationResult.file_id == DatasetFiles.id)
.where(AnnotationResult.project_id == project_id)
.where(
AnnotationResult.project_id == project_id,
LabelingProjectFile.project_id == project_id,
DatasetFiles.dataset_id == dataset_id,
)
.order_by(AnnotationResult.updated_at.desc())
)
rows = result.all()
@@ -190,11 +198,13 @@ class AnnotationExportService:
)
)
else:
# 获取所有文件,包括未标注的(只获取 ACTIVE 状态的文件
# 获取所有文件(基于标注项目快照
files_result = await self.db.execute(
select(DatasetFiles).where(
select(DatasetFiles)
.join(LabelingProjectFile, LabelingProjectFile.file_id == DatasetFiles.id)
.where(
LabelingProjectFile.project_id == project_id,
DatasetFiles.dataset_id == dataset_id,
DatasetFiles.status == "ACTIVE",
)
)
files = files_result.scalars().all()