You've already forked DataMate
refactor(data-import): 优化数据源文件扫描和复制逻辑
- 修改数据源文件扫描方法,直接在主流程中获取任务详情和路径 - 移除独立的getFilePaths方法,将路径扫描逻辑整合到scanFilePaths方法中 - 新增copyFilesToDatasetDirWithSourceRoot方法支持保留相对路径的文件复制 - 更新数据集文件应用服务中的文件复制逻辑,支持相对路径处理 - 修改Python后端项目接口中的文件查询逻辑,移除注释掉的编辑器服务引用 - 调整文件过滤逻辑,基于元数据中的派生源ID进行文件筛选 - 移除编辑器服务中已废弃的源文档过滤条件
This commit is contained in:
@@ -12,7 +12,6 @@ from app.module.shared.schema import StandardResponse, PaginatedData
|
||||
from app.module.dataset import DatasetManagementService
|
||||
from app.core.logging import get_logger
|
||||
|
||||
from app.module.annotation.service.editor import AnnotationEditorService
|
||||
from ..service.mapping import DatasetMappingService
|
||||
from ..service.template import AnnotationTemplateService
|
||||
from ..schema import (
|
||||
@@ -118,15 +117,30 @@ async def create_mapping(
|
||||
configuration=project_configuration or None,
|
||||
)
|
||||
|
||||
file_query = select(DatasetFiles.id).where(
|
||||
DatasetFiles.dataset_id == request.dataset_id
|
||||
file_result = await db.execute(
|
||||
select(DatasetFiles).where(DatasetFiles.dataset_id == request.dataset_id)
|
||||
)
|
||||
file_records = file_result.scalars().all()
|
||||
snapshot_file_ids: list[str] = []
|
||||
if dataset_type == TEXT_DATASET_TYPE:
|
||||
file_query = file_query.where(
|
||||
~AnnotationEditorService._build_source_document_filter()
|
||||
)
|
||||
file_result = await db.execute(file_query)
|
||||
snapshot_file_ids = [str(fid) for fid in file_result.scalars().all()]
|
||||
derived_source_ids = set()
|
||||
for file_record in file_records:
|
||||
metadata = getattr(file_record, "dataset_filemetadata", None)
|
||||
if isinstance(metadata, dict):
|
||||
source_id = metadata.get("derived_from_file_id")
|
||||
if source_id:
|
||||
derived_source_ids.add(str(source_id))
|
||||
snapshot_file_ids = [
|
||||
str(file_record.id)
|
||||
for file_record in file_records
|
||||
if file_record.id and str(file_record.id) not in derived_source_ids
|
||||
]
|
||||
else:
|
||||
snapshot_file_ids = [
|
||||
str(file_record.id)
|
||||
for file_record in file_records
|
||||
if file_record.id
|
||||
]
|
||||
|
||||
# 创建映射关系并写入快照
|
||||
mapping = await mapping_service.create_mapping_with_snapshot(
|
||||
|
||||
Reference in New Issue
Block a user