diff --git a/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/application/DatasetFileApplicationService.java b/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/application/DatasetFileApplicationService.java index 869a174..aa4004f 100644 --- a/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/application/DatasetFileApplicationService.java +++ b/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/application/DatasetFileApplicationService.java @@ -1,79 +1,80 @@ -package com.datamate.datamanagement.application; - -import com.baomidou.mybatisplus.core.metadata.IPage; -import com.baomidou.mybatisplus.extension.plugins.pagination.Page; -import com.datamate.common.domain.model.ChunkUploadPreRequest; -import com.datamate.common.domain.model.FileUploadResult; -import com.datamate.common.domain.service.FileService; -import com.datamate.common.domain.utils.AnalyzerUtils; -import com.datamate.common.domain.utils.ArchiveAnalyzer; -import com.datamate.common.infrastructure.exception.BusinessAssert; -import com.datamate.common.infrastructure.exception.BusinessException; -import com.datamate.common.infrastructure.exception.CommonErrorCode; -import com.datamate.common.infrastructure.exception.SystemErrorCode; -import com.datamate.common.interfaces.PagedResponse; -import com.datamate.common.interfaces.PagingQuery; -import com.datamate.datamanagement.common.enums.DuplicateMethod; -import com.datamate.datamanagement.common.enums.DatasetType; -import com.datamate.datamanagement.domain.contants.DatasetConstant; -import com.datamate.datamanagement.domain.model.dataset.Dataset; -import com.datamate.datamanagement.domain.model.dataset.DatasetFile; -import com.datamate.datamanagement.domain.model.dataset.DatasetFileUploadCheckInfo; -import com.datamate.datamanagement.infrastructure.exception.DataManagementErrorCode; -import com.datamate.datamanagement.infrastructure.persistence.repository.DatasetFileRepository; -import com.datamate.datamanagement.infrastructure.persistence.repository.DatasetRepository; -import com.datamate.datamanagement.interfaces.converter.DatasetConverter; -import com.datamate.datamanagement.interfaces.dto.AddFilesRequest; -import com.datamate.datamanagement.interfaces.dto.CopyFilesRequest; -import com.datamate.datamanagement.interfaces.dto.CreateDirectoryRequest; -import com.datamate.datamanagement.interfaces.dto.UploadFileRequest; -import com.datamate.datamanagement.interfaces.dto.UploadFilesPreRequest; -import com.fasterxml.jackson.core.type.TypeReference; -import com.fasterxml.jackson.core.JsonProcessingException; -import com.fasterxml.jackson.databind.ObjectMapper; -import jakarta.servlet.http.HttpServletResponse; -import lombok.extern.slf4j.Slf4j; -import org.apache.commons.compress.archivers.zip.ZipArchiveEntry; -import org.apache.commons.compress.archivers.zip.ZipArchiveOutputStream; -import org.apache.commons.io.IOUtils; -import org.springframework.beans.factory.annotation.Autowired; -import org.springframework.beans.factory.annotation.Value; -import org.springframework.core.io.Resource; -import org.springframework.core.io.UrlResource; -import org.springframework.http.HttpHeaders; -import org.springframework.stereotype.Service; -import org.springframework.transaction.annotation.Transactional; -import org.springframework.transaction.support.TransactionSynchronization; -import org.springframework.transaction.support.TransactionSynchronizationManager; - -import java.io.File; -import java.io.IOException; -import java.io.InputStream; -import java.net.MalformedURLException; -import java.nio.file.Files; -import java.nio.file.Path; -import java.nio.file.Paths; -import java.nio.file.attribute.BasicFileAttributes; -import java.time.LocalDateTime; -import java.time.ZoneId; -import java.time.format.DateTimeFormatter; -import java.util.*; +package com.datamate.datamanagement.application; + +import com.baomidou.mybatisplus.core.metadata.IPage; +import com.baomidou.mybatisplus.extension.plugins.pagination.Page; +import com.datamate.common.domain.model.ChunkUploadPreRequest; +import com.datamate.common.domain.model.FileUploadResult; +import com.datamate.common.domain.service.FileService; +import com.datamate.common.domain.utils.AnalyzerUtils; +import com.datamate.common.domain.utils.ArchiveAnalyzer; +import com.datamate.common.infrastructure.exception.BusinessAssert; +import com.datamate.common.infrastructure.exception.BusinessException; +import com.datamate.common.infrastructure.exception.CommonErrorCode; +import com.datamate.common.infrastructure.exception.SystemErrorCode; +import com.datamate.common.interfaces.PagedResponse; +import com.datamate.common.interfaces.PagingQuery; +import com.datamate.datamanagement.common.enums.DuplicateMethod; +import com.datamate.datamanagement.common.enums.DatasetType; +import com.datamate.datamanagement.domain.contants.DatasetConstant; +import com.datamate.datamanagement.domain.model.dataset.Dataset; +import com.datamate.datamanagement.domain.model.dataset.DatasetFile; +import com.datamate.datamanagement.domain.model.dataset.DatasetFileUploadCheckInfo; +import com.datamate.datamanagement.infrastructure.exception.DataManagementErrorCode; +import com.datamate.datamanagement.infrastructure.persistence.repository.DatasetFileRepository; +import com.datamate.datamanagement.infrastructure.persistence.repository.DatasetRepository; +import com.datamate.datamanagement.interfaces.converter.DatasetConverter; +import com.datamate.datamanagement.interfaces.dto.AddFilesRequest; +import com.datamate.datamanagement.interfaces.dto.CopyFilesRequest; +import com.datamate.datamanagement.interfaces.dto.CreateDirectoryRequest; +import com.datamate.datamanagement.interfaces.dto.UploadFileRequest; +import com.datamate.datamanagement.interfaces.dto.UploadFilesPreRequest; +import com.fasterxml.jackson.core.type.TypeReference; +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.ObjectMapper; +import jakarta.servlet.http.HttpServletResponse; +import lombok.extern.slf4j.Slf4j; +import org.apache.commons.compress.archivers.zip.ZipArchiveEntry; +import org.apache.commons.compress.archivers.zip.ZipArchiveOutputStream; +import org.apache.commons.io.IOUtils; +import org.apache.commons.lang3.StringUtils; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.core.io.Resource; +import org.springframework.core.io.UrlResource; +import org.springframework.http.HttpHeaders; +import org.springframework.stereotype.Service; +import org.springframework.transaction.annotation.Transactional; +import org.springframework.transaction.support.TransactionSynchronization; +import org.springframework.transaction.support.TransactionSynchronizationManager; + +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.net.MalformedURLException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.nio.file.attribute.BasicFileAttributes; +import java.time.LocalDateTime; +import java.time.ZoneId; +import java.time.format.DateTimeFormatter; +import java.util.*; import java.util.function.Function; import java.util.stream.Collectors; import java.util.stream.Stream; - -/** - * 数据集文件应用服务 - */ -@Slf4j -@Service -@Transactional -public class DatasetFileApplicationService { - private static final String PDF_FILE_TYPE = "pdf"; - private static final String DOC_FILE_TYPE = "doc"; - private static final String DOCX_FILE_TYPE = "docx"; - private static final String XLS_FILE_TYPE = "xls"; - private static final String XLSX_FILE_TYPE = "xlsx"; + +/** + * 数据集文件应用服务 + */ +@Slf4j +@Service +@Transactional +public class DatasetFileApplicationService { + private static final String PDF_FILE_TYPE = "pdf"; + private static final String DOC_FILE_TYPE = "doc"; + private static final String DOCX_FILE_TYPE = "docx"; + private static final String XLS_FILE_TYPE = "xls"; + private static final String XLSX_FILE_TYPE = "xlsx"; private static final Set DOCUMENT_TEXT_FILE_TYPES = Set.of( PDF_FILE_TYPE, DOC_FILE_TYPE, @@ -87,20 +88,20 @@ public class DatasetFileApplicationService { private static final String INTERNAL_DIR_NAME = ".datamate"; private static final String INTERNAL_UPLOAD_DIR_NAME = "uploading"; private static final String INTERNAL_VERSIONS_DIR_NAME = "versions"; - + private final DatasetFileRepository datasetFileRepository; private final DatasetRepository datasetRepository; private final FileService fileService; private final PdfTextExtractAsyncService pdfTextExtractAsyncService; private final DatasetFilePreviewService datasetFilePreviewService; - - @Value("${datamate.data-management.base-path:/dataset}") - private String datasetBasePath; - + + @Value("${datamate.data-management.base-path:/dataset}") + private String datasetBasePath; + @Value("${datamate.data-management.file.duplicate:VERSION}") private DuplicateMethod duplicateMethod; - - @Autowired + + @Autowired public DatasetFileApplicationService(DatasetFileRepository datasetFileRepository, DatasetRepository datasetRepository, FileService fileService, @@ -112,53 +113,53 @@ public class DatasetFileApplicationService { this.pdfTextExtractAsyncService = pdfTextExtractAsyncService; this.datasetFilePreviewService = datasetFilePreviewService; } - - /** - * 获取数据集文件列表 - */ - @Transactional(readOnly = true) - public PagedResponse getDatasetFiles(String datasetId, String fileType, String status, String name, - Boolean hasAnnotation, PagingQuery pagingQuery) { - return getDatasetFiles(datasetId, fileType, status, name, hasAnnotation, false, pagingQuery); - } - - /** - * 获取数据集文件列表,支持排除已被转换为TXT的源文档文件 - * - * @param datasetId 数据集ID - * @param fileType 文件类型过滤 - * @param status 状态过滤 - * @param name 文件名模糊查询 - * @param hasAnnotation 是否有标注 - * @param excludeSourceDocuments 是否排除源文档(PDF/DOC/DOCX/XLS/XLSX) - * @param pagingQuery 分页参数 - * @return 分页文件列表 - */ - @Transactional(readOnly = true) - public PagedResponse getDatasetFiles(String datasetId, String fileType, String status, String name, - Boolean hasAnnotation, boolean excludeSourceDocuments, PagingQuery pagingQuery) { - IPage page = new Page<>(pagingQuery.getPage(), pagingQuery.getSize()); - IPage files = datasetFileRepository.findByCriteria(datasetId, fileType, status, name, hasAnnotation, page); - - if (excludeSourceDocuments) { - // 过滤掉源文档文件(PDF/DOC/DOCX/XLS/XLSX),用于标注场景只展示派生文件 - List filteredRecords = files.getRecords().stream() - .filter(file -> !isSourceDocument(file)) - .collect(Collectors.toList()); - - // 重新构建分页结果 - Page filteredPage = new Page<>(files.getCurrent(), files.getSize(), files.getTotal()); - filteredPage.setRecords(filteredRecords); - return PagedResponse.of(filteredPage); - } - - return PagedResponse.of(files); - } - - /** - * 获取数据集文件列表 - */ - @Transactional(readOnly = true) + + /** + * 获取数据集文件列表 + */ + @Transactional(readOnly = true) + public PagedResponse getDatasetFiles(String datasetId, String fileType, String status, String name, + Boolean hasAnnotation, PagingQuery pagingQuery) { + return getDatasetFiles(datasetId, fileType, status, name, hasAnnotation, false, pagingQuery); + } + + /** + * 获取数据集文件列表,支持排除已被转换为TXT的源文档文件 + * + * @param datasetId 数据集ID + * @param fileType 文件类型过滤 + * @param status 状态过滤 + * @param name 文件名模糊查询 + * @param hasAnnotation 是否有标注 + * @param excludeSourceDocuments 是否排除源文档(PDF/DOC/DOCX/XLS/XLSX) + * @param pagingQuery 分页参数 + * @return 分页文件列表 + */ + @Transactional(readOnly = true) + public PagedResponse getDatasetFiles(String datasetId, String fileType, String status, String name, + Boolean hasAnnotation, boolean excludeSourceDocuments, PagingQuery pagingQuery) { + IPage page = new Page<>(pagingQuery.getPage(), pagingQuery.getSize()); + IPage files = datasetFileRepository.findByCriteria(datasetId, fileType, status, name, hasAnnotation, page); + + if (excludeSourceDocuments) { + // 过滤掉源文档文件(PDF/DOC/DOCX/XLS/XLSX),用于标注场景只展示派生文件 + List filteredRecords = files.getRecords().stream() + .filter(file -> !isSourceDocument(file)) + .collect(Collectors.toList()); + + // 重新构建分页结果 + Page filteredPage = new Page<>(files.getCurrent(), files.getSize(), files.getTotal()); + filteredPage.setRecords(filteredRecords); + return PagedResponse.of(filteredPage); + } + + return PagedResponse.of(files); + } + + /** + * 获取数据集文件列表 + */ + @Transactional(readOnly = true) public PagedResponse getDatasetFilesWithDirectory(String datasetId, String prefix, boolean excludeDerivedFiles, PagingQuery pagingQuery) { Dataset dataset = datasetRepository.getById(datasetId); int page = Math.max(pagingQuery.getPage(), 1); @@ -209,55 +210,55 @@ public class DatasetFileApplicationService { .comparing((Path path) -> !Files.isDirectory(path)) .thenComparing(path -> path.getFileName().toString())) .collect(Collectors.toList()); - - // 计算分页 - int total = allFiles.size(); - int totalPages = (int) Math.ceil((double) total / size); - - // 获取当前页数据 - int fromIndex = (page - 1) * size; - fromIndex = Math.max(fromIndex, 0); - int toIndex = Math.min(fromIndex + size, total); - - List pageData = new ArrayList<>(); - if (fromIndex < total) { - pageData = allFiles.subList(fromIndex, toIndex); - } + + // 计算分页 + int total = allFiles.size(); + int totalPages = (int) Math.ceil((double) total / size); + + // 获取当前页数据 + int fromIndex = (page - 1) * size; + fromIndex = Math.max(fromIndex, 0); + int toIndex = Math.min(fromIndex + size, total); + + List pageData = new ArrayList<>(); + if (fromIndex < total) { + pageData = allFiles.subList(fromIndex, toIndex); + } List datasetFiles = pageData.stream() .map(path -> getDatasetFile(path, datasetFilesMap, excludeDerivedFiles, derivedFilePaths)) .toList(); - - return new PagedResponse<>(page, size, total, totalPages, datasetFiles); - } catch (IOException e) { - log.error("list dataset path error", e); - return PagedResponse.of(new Page<>(page, size)); - } - } - + + return new PagedResponse<>(page, size, total, totalPages, datasetFiles); + } catch (IOException e) { + log.error("list dataset path error", e); + return PagedResponse.of(new Page<>(page, size)); + } + } + private DatasetFile getDatasetFile(Path path, Map datasetFilesMap, boolean excludeDerivedFiles, Set derivedFilePaths) { DatasetFile datasetFile = new DatasetFile(); LocalDateTime localDateTime = LocalDateTime.now(); - try { - localDateTime = Files.getLastModifiedTime(path).toInstant().atZone(ZoneId.systemDefault()).toLocalDateTime(); - } catch (IOException e) { - log.error("get last modified time error", e); - } - datasetFile.setFileName(path.getFileName().toString()); - datasetFile.setUploadTime(localDateTime); - - // 目录与普通文件区分处理 - if (Files.isDirectory(path)) { - datasetFile.setId("directory-" + datasetFile.getFileName()); - datasetFile.setDirectory(true); - - // 统计目录下文件数量和总大小 - try { - long fileCount; - long totalSize; - + try { + localDateTime = Files.getLastModifiedTime(path).toInstant().atZone(ZoneId.systemDefault()).toLocalDateTime(); + } catch (IOException e) { + log.error("get last modified time error", e); + } + datasetFile.setFileName(path.getFileName().toString()); + datasetFile.setUploadTime(localDateTime); + + // 目录与普通文件区分处理 + if (Files.isDirectory(path)) { + datasetFile.setId("directory-" + datasetFile.getFileName()); + datasetFile.setDirectory(true); + + // 统计目录下文件数量和总大小 + try { + long fileCount; + long totalSize; + try (Stream walk = Files.walk(path)) { Stream fileStream = walk.filter(Files::isRegularFile); if (excludeDerivedFiles && !derivedFilePaths.isEmpty()) { @@ -284,21 +285,21 @@ public class DatasetFileApplicationService { }) .sum(); } - - datasetFile.setFileCount(fileCount); - datasetFile.setFileSize(totalSize); - } catch (IOException e) { - log.error("stat directory info error", e); - } - } else { + + datasetFile.setFileCount(fileCount); + datasetFile.setFileSize(totalSize); + } catch (IOException e) { + log.error("stat directory info error", e); + } + } else { DatasetFile exist = datasetFilesMap.get(normalizeFilePath(path.toString())); if (exist == null) { datasetFile.setId("file-" + datasetFile.getFileName()); datasetFile.setFileSize(path.toFile().length()); } else { - datasetFile = exist; - } - } + datasetFile = exist; + } + } return datasetFile; } @@ -397,25 +398,25 @@ public class DatasetFileApplicationService { if (datasetFile == null) { return false; } - String fileType = datasetFile.getFileType(); - if (fileType == null || fileType.isBlank()) { - return false; - } - return DOCUMENT_TEXT_FILE_TYPES.contains(fileType.toLowerCase(Locale.ROOT)); - } - + String fileType = datasetFile.getFileType(); + if (fileType == null || fileType.isBlank()) { + return false; + } + return DOCUMENT_TEXT_FILE_TYPES.contains(fileType.toLowerCase(Locale.ROOT)); + } + private boolean isDerivedFile(DatasetFile datasetFile) { if (datasetFile == null) { return false; } - String metadata = datasetFile.getMetadata(); - if (metadata == null || metadata.isBlank()) { - return false; - } - try { - ObjectMapper mapper = new ObjectMapper(); - Map metadataMap = mapper.readValue(metadata, new TypeReference>() {}); - return metadataMap.get(DERIVED_METADATA_KEY) != null; + String metadata = datasetFile.getMetadata(); + if (metadata == null || metadata.isBlank()) { + return false; + } + try { + ObjectMapper mapper = new ObjectMapper(); + Map metadataMap = mapper.readValue(metadata, new TypeReference>() {}); + return metadataMap.get(DERIVED_METADATA_KEY) != null; } catch (Exception e) { log.debug("Failed to parse dataset file metadata for derived detection: {}", datasetFile.getId(), e); return false; @@ -564,65 +565,140 @@ public class DatasetFileApplicationService { * 获取文件详情 */ @Transactional(readOnly = true) - public DatasetFile getDatasetFile(String datasetId, String fileId) { - DatasetFile file = datasetFileRepository.getById(fileId); - if (file == null) { - throw new IllegalArgumentException("File not found: " + fileId); - } - if (!file.getDatasetId().equals(datasetId)) { - throw new IllegalArgumentException("File does not belong to the specified dataset"); - } - return file; - } - - /** - * 删除文件 - */ + public DatasetFile getDatasetFile(String datasetId, String fileId) { + DatasetFile file = datasetFileRepository.getById(fileId); + if (file == null) { + throw new IllegalArgumentException("File not found: " + fileId); + } + if (!file.getDatasetId().equals(datasetId)) { + throw new IllegalArgumentException("File does not belong to the specified dataset"); + } + return file; + } + + /** + * 删除文件 + */ @Transactional public void deleteDatasetFile(String datasetId, String fileId) { DatasetFile file = getDatasetFile(datasetId, fileId); - Dataset dataset = datasetRepository.getById(datasetId); - datasetFileRepository.removeById(fileId); - if (!isArchivedStatus(file)) { - dataset.setFiles(new ArrayList<>(Collections.singleton(file))); - dataset.removeFile(file); - datasetRepository.updateById(dataset); + if (file == null) { + log.warn("File not found: datasetId={}, fileId={}", datasetId, fileId); + return; } - datasetFilePreviewService.deletePreviewFileQuietly(datasetId, fileId); - // 删除文件时,上传到数据集中的文件会同时删除数据库中的记录和文件系统中的文件,归集过来的文件仅删除数据库中的记录 - if (file.getFilePath().startsWith(dataset.getPath())) { + String logicalPath = file.getLogicalPath(); + + // 如果 logicalPath 为 null、空字符串或纯空白字符,直接删除当前文件(兼容旧数据) + if (StringUtils.isBlank(logicalPath)) { + deleteDatasetFileInternal(datasetId, file); + return; + } + + List allVersions = datasetFileRepository.findAllByDatasetIdAndLogicalPath(datasetId, logicalPath); + + for (DatasetFile versionFile : allVersions) { + deleteDatasetFileInternal(datasetId, versionFile); + } + } + + private void deleteDatasetFileInternal(String datasetId, DatasetFile file) { + Dataset dataset = datasetRepository.getById(datasetId); + if (file == null || dataset == null) { + return; + } + + // 先删除数据库记录,确保数据库操作成功后再清理派生文件 + try { + datasetFileRepository.removeById(file.getId()); + } catch (Exception e) { + log.error("Failed to delete file record from database: fileId={}", file.getId(), e); + // 数据库删除失败时,跳过后续清理以避免数据不一致 + return; + } + + // 数据库删除成功后,再删除派生文件 + if (isSourceDocument(file)) { + deleteDerivedTextFileQuietly(datasetId, file.getId()); + } + + if (!isArchivedStatus(file)) { + try { + dataset.setFiles(new ArrayList<>(Collections.singleton(file))); + dataset.removeFile(file); + datasetRepository.updateById(dataset); + } catch (Exception e) { + log.error("Failed to update dataset: datasetId={}", datasetId, e); + } + } + + datasetFilePreviewService.deletePreviewFileQuietly(datasetId, file.getId()); + + if (file.getFilePath() != null && file.getFilePath().startsWith(dataset.getPath())) { try { Path filePath = Paths.get(file.getFilePath()); Files.deleteIfExists(filePath); - } catch (IOException ex) { - throw BusinessException.of(SystemErrorCode.FILE_SYSTEM_ERROR); - } - } - } - - /** - * 下载文件 - */ - @Transactional(readOnly = true) - public Resource downloadFile(String datasetId, String fileId) { - DatasetFile file = getDatasetFile(datasetId, fileId); - try { - Path filePath = Paths.get(file.getFilePath()).normalize(); - Resource resource = new UrlResource(filePath.toUri()); - if (resource.exists()) { - return resource; - } else { - throw new RuntimeException("File not found: " + file.getFileName()); - } - } catch (MalformedURLException ex) { - throw new RuntimeException("File not found: " + file.getFileName(), ex); - } - } - - /** - * 下载文件 - */ - @Transactional(readOnly = true) + } catch (IOException ex) { + log.error("Failed to delete physical file: filePath={}", file.getFilePath(), ex); + } + } + } + + private void deleteDerivedTextFileQuietly(String datasetId, String sourceFileId) { + if (sourceFileId == null || sourceFileId.isBlank()) { + return; + } + + try { + List derivedFiles = datasetFileRepository.findAllByDatasetId(datasetId).stream() + .filter(f -> isDerivedFileFromSource(f, sourceFileId)) + .toList(); + + for (DatasetFile derivedFile : derivedFiles) { + deleteDatasetFileInternal(datasetId, derivedFile); + } + } catch (Exception e) { + log.error("Failed to delete derived text files for sourceFileId: {}", sourceFileId, e); + } + } + + private boolean isDerivedFileFromSource(DatasetFile file, String sourceFileId) { + if (file == null || file.getMetadata() == null || file.getMetadata().isBlank()) { + return false; + } + try { + ObjectMapper mapper = new ObjectMapper(); + Map metadataMap = mapper.readValue(file.getMetadata(), new TypeReference>() {}); + Object derivedFromFileId = metadataMap.get(DERIVED_METADATA_KEY); + return derivedFromFileId != null && sourceFileId.equals(String.valueOf(derivedFromFileId)); + } catch (Exception e) { + log.debug("Failed to parse metadata for derived detection: fileId={}", file.getId(), e); + return false; + } + } + + /** + * 下载文件 + */ + @Transactional(readOnly = true) + public Resource downloadFile(String datasetId, String fileId) { + DatasetFile file = getDatasetFile(datasetId, fileId); + try { + Path filePath = Paths.get(file.getFilePath()).normalize(); + Resource resource = new UrlResource(filePath.toUri()); + if (resource.exists()) { + return resource; + } else { + throw new RuntimeException("File not found: " + file.getFileName()); + } + } catch (MalformedURLException ex) { + throw new RuntimeException("File not found: " + file.getFileName(), ex); + } + } + + /** + * 下载文件 + */ + @Transactional(readOnly = true) public void downloadDatasetFileAsZip(String datasetId, HttpServletResponse response) { Dataset dataset = datasetRepository.getById(datasetId); if (Objects.isNull(dataset)) { @@ -654,49 +730,49 @@ public class DatasetFileApplicationService { } } } catch (IOException e) { - log.error("Failed to download files in batches.", e); - throw BusinessException.of(SystemErrorCode.FILE_SYSTEM_ERROR); - } - } - - private void addToZipFile(Path path, Path basePath, ZipArchiveOutputStream zos) throws IOException { - String entryName = basePath.relativize(path) - .toString() - .replace(File.separator, "/"); - - // 处理目录 - if (Files.isDirectory(path)) { - if (!entryName.isEmpty()) { - entryName += "/"; - ZipArchiveEntry dirEntry = new ZipArchiveEntry(entryName); - zos.putArchiveEntry(dirEntry); - zos.closeArchiveEntry(); - } - } else { - // 处理文件 - ZipArchiveEntry fileEntry = new ZipArchiveEntry(path.toFile(), entryName); - - // 设置更多属性 - BasicFileAttributes attrs = Files.readAttributes(path, BasicFileAttributes.class); - fileEntry.setSize(attrs.size()); - fileEntry.setLastModifiedTime(attrs.lastModifiedTime()); - - zos.putArchiveEntry(fileEntry); - - try (InputStream is = Files.newInputStream(path)) { - IOUtils.copy(is, zos); - } - zos.closeArchiveEntry(); - } - } - - /** - * 预上传 - * - * @param chunkUploadRequest 上传请求 - * @param datasetId 数据集id - * @return 请求id - */ + log.error("Failed to download files in batches.", e); + throw BusinessException.of(SystemErrorCode.FILE_SYSTEM_ERROR); + } + } + + private void addToZipFile(Path path, Path basePath, ZipArchiveOutputStream zos) throws IOException { + String entryName = basePath.relativize(path) + .toString() + .replace(File.separator, "/"); + + // 处理目录 + if (Files.isDirectory(path)) { + if (!entryName.isEmpty()) { + entryName += "/"; + ZipArchiveEntry dirEntry = new ZipArchiveEntry(entryName); + zos.putArchiveEntry(dirEntry); + zos.closeArchiveEntry(); + } + } else { + // 处理文件 + ZipArchiveEntry fileEntry = new ZipArchiveEntry(path.toFile(), entryName); + + // 设置更多属性 + BasicFileAttributes attrs = Files.readAttributes(path, BasicFileAttributes.class); + fileEntry.setSize(attrs.size()); + fileEntry.setLastModifiedTime(attrs.lastModifiedTime()); + + zos.putArchiveEntry(fileEntry); + + try (InputStream is = Files.newInputStream(path)) { + IOUtils.copy(is, zos); + } + zos.closeArchiveEntry(); + } + } + + /** + * 预上传 + * + * @param chunkUploadRequest 上传请求 + * @param datasetId 数据集id + * @return 请求id + */ @Transactional public String preUpload(UploadFilesPreRequest chunkUploadRequest, String datasetId) { Dataset dataset = datasetRepository.getById(datasetId); @@ -736,16 +812,16 @@ public class DatasetFileApplicationService { String checkInfoJson = objectMapper.writeValueAsString(checkInfo); request.setCheckInfo(checkInfoJson); } catch (JsonProcessingException e) { - log.warn("Failed to serialize checkInfo to JSON", e); - } - return fileService.preUpload(request); - } - - /** - * 切片上传 - * - * @param uploadFileRequest 上传请求 - */ + log.warn("Failed to serialize checkInfo to JSON", e); + } + return fileService.preUpload(request); + } + + /** + * 切片上传 + * + * @param uploadFileRequest 上传请求 + */ @Transactional public void chunkUpload(String datasetId, UploadFileRequest uploadFileRequest) { FileUploadResult uploadResult = fileService.chunkUpload(DatasetConverter.INSTANCE.toChunkUploadRequest(uploadFileRequest)); @@ -759,7 +835,7 @@ public class DatasetFileApplicationService { public void cancelUpload(String reqId) { fileService.cancelUpload(reqId); } - + private void saveFileInfoToDb(FileUploadResult fileUploadResult, String datasetId) { if (Objects.isNull(fileUploadResult.getSavedFile())) { // 文件切片上传没有完成 @@ -1028,17 +1104,17 @@ public class DatasetFileApplicationService { log.warn("Failed to archive orphan target file, logicalPath={}, targetPath={}", logicalPath, targetFilePath, e); } } - - /** - * 在数据集下创建子目录 - */ - @Transactional - public void createDirectory(String datasetId, CreateDirectoryRequest req) { - Dataset dataset = datasetRepository.getById(datasetId); - if (dataset == null) { - throw BusinessException.of(DataManagementErrorCode.DATASET_NOT_FOUND); - } - String datasetPath = dataset.getPath(); + + /** + * 在数据集下创建子目录 + */ + @Transactional + public void createDirectory(String datasetId, CreateDirectoryRequest req) { + Dataset dataset = datasetRepository.getById(datasetId); + if (dataset == null) { + throw BusinessException.of(DataManagementErrorCode.DATASET_NOT_FOUND); + } + String datasetPath = dataset.getPath(); String parentPrefix = Optional.ofNullable(req.getParentPrefix()).orElse("").trim(); parentPrefix = parentPrefix.replace("\\", "/"); while (parentPrefix.startsWith("/")) { @@ -1057,41 +1133,41 @@ public class DatasetFileApplicationService { if (directoryName.contains("..") || directoryName.contains("/") || directoryName.contains("\\")) { throw BusinessException.of(CommonErrorCode.PARAM_ERROR); } - - Path basePath = Paths.get(datasetPath); - Path targetPath = parentPrefix.isEmpty() - ? basePath.resolve(directoryName) - : basePath.resolve(parentPrefix).resolve(directoryName); - - Path normalized = targetPath.normalize(); - if (!normalized.startsWith(basePath)) { - throw BusinessException.of(CommonErrorCode.PARAM_ERROR); - } - - try { - Files.createDirectories(normalized); - } catch (IOException e) { - log.error("Failed to create directory {} for dataset {}", normalized, datasetId, e); - throw BusinessException.of(SystemErrorCode.FILE_SYSTEM_ERROR); - } - } - - /** - * 下载目录为 ZIP 文件 - */ - @Transactional(readOnly = true) - public void downloadDirectory(String datasetId, String prefix, HttpServletResponse response) { - Dataset dataset = datasetRepository.getById(datasetId); - if (dataset == null) { - throw BusinessException.of(DataManagementErrorCode.DATASET_NOT_FOUND); - } - - String datasetPath = dataset.getPath(); - prefix = Optional.ofNullable(prefix).orElse("").trim(); - prefix = prefix.replace("\\", "/"); - while (prefix.startsWith("/")) { - prefix = prefix.substring(1); - } + + Path basePath = Paths.get(datasetPath); + Path targetPath = parentPrefix.isEmpty() + ? basePath.resolve(directoryName) + : basePath.resolve(parentPrefix).resolve(directoryName); + + Path normalized = targetPath.normalize(); + if (!normalized.startsWith(basePath)) { + throw BusinessException.of(CommonErrorCode.PARAM_ERROR); + } + + try { + Files.createDirectories(normalized); + } catch (IOException e) { + log.error("Failed to create directory {} for dataset {}", normalized, datasetId, e); + throw BusinessException.of(SystemErrorCode.FILE_SYSTEM_ERROR); + } + } + + /** + * 下载目录为 ZIP 文件 + */ + @Transactional(readOnly = true) + public void downloadDirectory(String datasetId, String prefix, HttpServletResponse response) { + Dataset dataset = datasetRepository.getById(datasetId); + if (dataset == null) { + throw BusinessException.of(DataManagementErrorCode.DATASET_NOT_FOUND); + } + + String datasetPath = dataset.getPath(); + prefix = Optional.ofNullable(prefix).orElse("").trim(); + prefix = prefix.replace("\\", "/"); + while (prefix.startsWith("/")) { + prefix = prefix.substring(1); + } while (prefix.endsWith("/")) { prefix = prefix.substring(0, prefix.length() - 1); } @@ -1102,35 +1178,35 @@ public class DatasetFileApplicationService { Path basePath = Paths.get(datasetPath); Path targetPath = prefix.isEmpty() ? basePath : basePath.resolve(prefix); Path normalized = targetPath.normalize(); - - if (!normalized.startsWith(basePath)) { - throw BusinessException.of(CommonErrorCode.PARAM_ERROR); - } - - if (!Files.exists(normalized) || !Files.isDirectory(normalized)) { - throw BusinessException.of(DataManagementErrorCode.DIRECTORY_NOT_FOUND); - } - - String zipFileName = prefix.isEmpty() ? dataset.getName() : prefix.replace("/", "_"); - zipFileName = zipFileName + "_" + LocalDateTime.now().format(DateTimeFormatter.ofPattern("yyyyMMdd_HHmmss")) + ".zip"; - - try { - response.setContentType("application/zip"); - response.setHeader(HttpHeaders.CONTENT_DISPOSITION, "attachment; filename=\"" + zipFileName + "\""); - - try (ZipArchiveOutputStream zipOut = new ZipArchiveOutputStream(response.getOutputStream())) { - zipDirectory(normalized, normalized, zipOut); - zipOut.finish(); - } - } catch (IOException e) { - log.error("Failed to download directory {} for dataset {}", normalized, datasetId, e); - throw BusinessException.of(SystemErrorCode.FILE_SYSTEM_ERROR); - } - } - - /** - * 递归压缩目录 - */ + + if (!normalized.startsWith(basePath)) { + throw BusinessException.of(CommonErrorCode.PARAM_ERROR); + } + + if (!Files.exists(normalized) || !Files.isDirectory(normalized)) { + throw BusinessException.of(DataManagementErrorCode.DIRECTORY_NOT_FOUND); + } + + String zipFileName = prefix.isEmpty() ? dataset.getName() : prefix.replace("/", "_"); + zipFileName = zipFileName + "_" + LocalDateTime.now().format(DateTimeFormatter.ofPattern("yyyyMMdd_HHmmss")) + ".zip"; + + try { + response.setContentType("application/zip"); + response.setHeader(HttpHeaders.CONTENT_DISPOSITION, "attachment; filename=\"" + zipFileName + "\""); + + try (ZipArchiveOutputStream zipOut = new ZipArchiveOutputStream(response.getOutputStream())) { + zipDirectory(normalized, normalized, zipOut); + zipOut.finish(); + } + } catch (IOException e) { + log.error("Failed to download directory {} for dataset {}", normalized, datasetId, e); + throw BusinessException.of(SystemErrorCode.FILE_SYSTEM_ERROR); + } + } + + /** + * 递归压缩目录 + */ private void zipDirectory(Path sourceDir, Path basePath, ZipArchiveOutputStream zipOut) throws IOException { try (Stream paths = Files.walk(sourceDir)) { paths.filter(path -> !Files.isDirectory(path)) @@ -1140,36 +1216,36 @@ public class DatasetFileApplicationService { Path relativePath = basePath.relativize(path); ZipArchiveEntry zipEntry = new ZipArchiveEntry(relativePath.toString()); zipOut.putArchiveEntry(zipEntry); - try (InputStream fis = Files.newInputStream(path)) { - IOUtils.copy(fis, zipOut); - } - zipOut.closeArchiveEntry(); - } catch (IOException e) { - log.error("Failed to add file to zip: {}", path, e); - } - }); - } - } - - /** - * 删除目录及其所有内容 - */ - @Transactional - public void deleteDirectory(String datasetId, String prefix) { - Dataset dataset = datasetRepository.getById(datasetId); - if (dataset == null) { - throw BusinessException.of(DataManagementErrorCode.DATASET_NOT_FOUND); - } - - prefix = Optional.ofNullable(prefix).orElse("").trim(); - prefix = prefix.replace("\\", "/"); - while (prefix.startsWith("/")) { - prefix = prefix.substring(1); - } - while (prefix.endsWith("/")) { - prefix = prefix.substring(0, prefix.length() - 1); - } - + try (InputStream fis = Files.newInputStream(path)) { + IOUtils.copy(fis, zipOut); + } + zipOut.closeArchiveEntry(); + } catch (IOException e) { + log.error("Failed to add file to zip: {}", path, e); + } + }); + } + } + + /** + * 删除目录及其所有内容 + */ + @Transactional + public void deleteDirectory(String datasetId, String prefix) { + Dataset dataset = datasetRepository.getById(datasetId); + if (dataset == null) { + throw BusinessException.of(DataManagementErrorCode.DATASET_NOT_FOUND); + } + + prefix = Optional.ofNullable(prefix).orElse("").trim(); + prefix = prefix.replace("\\", "/"); + while (prefix.startsWith("/")) { + prefix = prefix.substring(1); + } + while (prefix.endsWith("/")) { + prefix = prefix.substring(0, prefix.length() - 1); + } + if (prefix.isEmpty()) { throw BusinessException.of(CommonErrorCode.PARAM_ERROR); } @@ -1180,80 +1256,80 @@ public class DatasetFileApplicationService { String datasetPath = dataset.getPath(); Path basePath = Paths.get(datasetPath); Path targetPath = basePath.resolve(prefix); - Path normalized = targetPath.normalize(); - - if (!normalized.startsWith(basePath)) { - throw BusinessException.of(CommonErrorCode.PARAM_ERROR); - } - - if (!Files.exists(normalized) || !Files.isDirectory(normalized)) { - throw BusinessException.of(DataManagementErrorCode.DIRECTORY_NOT_FOUND); - } - - // 删除数据库中该目录下的所有文件记录(基于数据集内相对路径判断) - String datasetPathNorm = datasetPath.replace("\\", "/"); - String logicalPrefix = prefix; // 已经去掉首尾斜杠 - List filesToDelete = datasetFileRepository.findAllByDatasetId(datasetId).stream() - .filter(file -> { - if (file.getFilePath() == null) { - return false; - } - String filePath = file.getFilePath().replace("\\", "/"); - if (!filePath.startsWith(datasetPathNorm)) { - return false; - } - String relative = filePath.substring(datasetPathNorm.length()); - while (relative.startsWith("/")) { - relative = relative.substring(1); - } - return relative.equals(logicalPrefix) || relative.startsWith(logicalPrefix + "/"); - }) - .collect(Collectors.toList()); - + Path normalized = targetPath.normalize(); + + if (!normalized.startsWith(basePath)) { + throw BusinessException.of(CommonErrorCode.PARAM_ERROR); + } + + if (!Files.exists(normalized) || !Files.isDirectory(normalized)) { + throw BusinessException.of(DataManagementErrorCode.DIRECTORY_NOT_FOUND); + } + + // 删除数据库中该目录下的所有文件记录(基于数据集内相对路径判断) + String datasetPathNorm = datasetPath.replace("\\", "/"); + String logicalPrefix = prefix; // 已经去掉首尾斜杠 + List filesToDelete = datasetFileRepository.findAllByDatasetId(datasetId).stream() + .filter(file -> { + if (file.getFilePath() == null) { + return false; + } + String filePath = file.getFilePath().replace("\\", "/"); + if (!filePath.startsWith(datasetPathNorm)) { + return false; + } + String relative = filePath.substring(datasetPathNorm.length()); + while (relative.startsWith("/")) { + relative = relative.substring(1); + } + return relative.equals(logicalPrefix) || relative.startsWith(logicalPrefix + "/"); + }) + .collect(Collectors.toList()); + for (DatasetFile file : filesToDelete) { datasetFileRepository.removeById(file.getId()); datasetFilePreviewService.deletePreviewFileQuietly(datasetId, file.getId()); } - - // 删除文件系统中的目录 - try { - deleteDirectoryRecursively(normalized); - } catch (IOException e) { - log.error("Failed to delete directory {} for dataset {}", normalized, datasetId, e); - throw BusinessException.of(SystemErrorCode.FILE_SYSTEM_ERROR); - } - - // 更新数据集 - dataset.setFiles(filesToDelete); - for (DatasetFile file : filesToDelete) { - dataset.removeFile(file); - } - datasetRepository.updateById(dataset); - } - - /** - * 递归删除目录 - */ - private void deleteDirectoryRecursively(Path directory) throws IOException { - try (Stream paths = Files.walk(directory)) { - paths.sorted(Comparator.reverseOrder()) - .forEach(path -> { - try { - Files.delete(path); - } catch (IOException e) { - log.error("Failed to delete: {}", path, e); - } - }); - } - } - - /** - * 复制文件到数据集目录 - * - * @param datasetId 数据集id - * @param req 复制文件请求 - * @return 复制的文件列表 - */ + + // 删除文件系统中的目录 + try { + deleteDirectoryRecursively(normalized); + } catch (IOException e) { + log.error("Failed to delete directory {} for dataset {}", normalized, datasetId, e); + throw BusinessException.of(SystemErrorCode.FILE_SYSTEM_ERROR); + } + + // 更新数据集 + dataset.setFiles(filesToDelete); + for (DatasetFile file : filesToDelete) { + dataset.removeFile(file); + } + datasetRepository.updateById(dataset); + } + + /** + * 递归删除目录 + */ + private void deleteDirectoryRecursively(Path directory) throws IOException { + try (Stream paths = Files.walk(directory)) { + paths.sorted(Comparator.reverseOrder()) + .forEach(path -> { + try { + Files.delete(path); + } catch (IOException e) { + log.error("Failed to delete: {}", path, e); + } + }); + } + } + + /** + * 复制文件到数据集目录 + * + * @param datasetId 数据集id + * @param req 复制文件请求 + * @return 复制的文件列表 + */ @Transactional public List copyFilesToDatasetDir(String datasetId, CopyFilesRequest req) { Dataset dataset = datasetRepository.getById(datasetId); @@ -1275,15 +1351,15 @@ public class DatasetFileApplicationService { datasetRepository.updateById(dataset); return copiedFiles; } - - /** - * 复制文件到数据集目录(保留相对路径,适用于数据源导入) - * - * @param datasetId 数据集id - * @param sourceRoot 数据源根目录 - * @param sourcePaths 源文件路径列表 - * @return 复制的文件列表 - */ + + /** + * 复制文件到数据集目录(保留相对路径,适用于数据源导入) + * + * @param datasetId 数据集id + * @param sourceRoot 数据源根目录 + * @param sourcePaths 源文件路径列表 + * @return 复制的文件列表 + */ @Transactional public List copyFilesToDatasetDirWithSourceRoot(String datasetId, Path sourceRoot, List sourcePaths) { Dataset dataset = datasetRepository.getById(datasetId); @@ -1315,14 +1391,14 @@ public class DatasetFileApplicationService { datasetRepository.updateById(dataset); return copiedFiles; } - - /** - * 添加文件到数据集(仅创建数据库记录,不执行文件系统操作) - * - * @param datasetId 数据集id - * @param req 添加文件请求 - * @return 添加的文件列表 - */ + + /** + * 添加文件到数据集(仅创建数据库记录,不执行文件系统操作) + * + * @param datasetId 数据集id + * @param req 添加文件请求 + * @return 添加的文件列表 + */ @Transactional public List addFilesToDataset(String datasetId, AddFilesRequest req) { Dataset dataset = datasetRepository.getById(datasetId); @@ -1334,11 +1410,11 @@ public class DatasetFileApplicationService { String metadata; try { Map metadataMap = Map.of("softAdd", softAdd); - ObjectMapper objectMapper = new ObjectMapper(); - metadata = objectMapper.writeValueAsString(metadataMap); - } catch (JsonProcessingException e) { - log.error("Failed to serialize metadataMap", e); - throw BusinessException.of(SystemErrorCode.UNKNOWN_ERROR); + ObjectMapper objectMapper = new ObjectMapper(); + metadata = objectMapper.writeValueAsString(metadataMap); + } catch (JsonProcessingException e) { + log.error("Failed to serialize metadataMap", e); + throw BusinessException.of(SystemErrorCode.UNKNOWN_ERROR); } for (String sourceFilePath : req.sourcePaths()) { @@ -1406,34 +1482,34 @@ public class DatasetFileApplicationService { datasetRepository.updateById(dataset); // Note: addFilesToDataset only creates DB records, no file system operations // If file copy is needed, use copyFilesToDatasetDir endpoint instead - return addedFiles; - } - - private void triggerPdfTextExtraction(Dataset dataset, DatasetFile datasetFile) { - if (dataset == null || datasetFile == null) { - return; - } - if (dataset.getDatasetType() != DatasetType.TEXT) { - return; - } - String fileType = datasetFile.getFileType(); - if (fileType == null || !DOCUMENT_TEXT_FILE_TYPES.contains(fileType.toLowerCase(Locale.ROOT))) { - return; - } - String datasetId = dataset.getId(); - String fileId = datasetFile.getId(); - if (datasetId == null || fileId == null) { - return; - } - if (TransactionSynchronizationManager.isSynchronizationActive()) { - TransactionSynchronizationManager.registerSynchronization(new TransactionSynchronization() { - @Override - public void afterCommit() { - pdfTextExtractAsyncService.extractPdfText(datasetId, fileId); - } - }); - return; - } - pdfTextExtractAsyncService.extractPdfText(datasetId, fileId); - } -} + return addedFiles; + } + + private void triggerPdfTextExtraction(Dataset dataset, DatasetFile datasetFile) { + if (dataset == null || datasetFile == null) { + return; + } + if (dataset.getDatasetType() != DatasetType.TEXT) { + return; + } + String fileType = datasetFile.getFileType(); + if (fileType == null || !DOCUMENT_TEXT_FILE_TYPES.contains(fileType.toLowerCase(Locale.ROOT))) { + return; + } + String datasetId = dataset.getId(); + String fileId = datasetFile.getId(); + if (datasetId == null || fileId == null) { + return; + } + if (TransactionSynchronizationManager.isSynchronizationActive()) { + TransactionSynchronizationManager.registerSynchronization(new TransactionSynchronization() { + @Override + public void afterCommit() { + pdfTextExtractAsyncService.extractPdfText(datasetId, fileId); + } + }); + return; + } + pdfTextExtractAsyncService.extractPdfText(datasetId, fileId); + } +} diff --git a/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/infrastructure/persistence/mapper/DatasetFileMapper.java b/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/infrastructure/persistence/mapper/DatasetFileMapper.java index d55c6cc..62a6b95 100644 --- a/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/infrastructure/persistence/mapper/DatasetFileMapper.java +++ b/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/infrastructure/persistence/mapper/DatasetFileMapper.java @@ -46,4 +46,13 @@ public interface DatasetFileMapper extends BaseMapper { * @return 文件数统计列表 */ List countNonDerivedByDatasetIds(@Param("datasetIds") List datasetIds); + + /** + * 查询指定逻辑路径的所有文件(包括所有状态) + * + * @param datasetId 数据集ID + * @param logicalPath 逻辑路径 + * @return 文件列表 + */ + List findAllByDatasetIdAndLogicalPath(@Param("datasetId") String datasetId, @Param("logicalPath") String logicalPath); } diff --git a/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/infrastructure/persistence/repository/DatasetFileRepository.java b/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/infrastructure/persistence/repository/DatasetFileRepository.java index 5d50fd6..75bb7f1 100644 --- a/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/infrastructure/persistence/repository/DatasetFileRepository.java +++ b/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/infrastructure/persistence/repository/DatasetFileRepository.java @@ -37,6 +37,15 @@ public interface DatasetFileRepository extends IRepository { */ DatasetFile findLatestByDatasetIdAndLogicalPath(String datasetId, String logicalPath); + /** + * 查询指定逻辑路径的所有文件(包括所有状态) + * + * @param datasetId 数据集ID + * @param logicalPath 逻辑路径 + * @return 文件列表 + */ + List findAllByDatasetIdAndLogicalPath(String datasetId, String logicalPath); + IPage findByCriteria(String datasetId, String fileType, String status, String name, Boolean hasAnnotation, IPage page); diff --git a/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/infrastructure/persistence/repository/impl/DatasetFileRepositoryImpl.java b/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/infrastructure/persistence/repository/impl/DatasetFileRepositoryImpl.java index db68750..0af1c33 100644 --- a/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/infrastructure/persistence/repository/impl/DatasetFileRepositoryImpl.java +++ b/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/infrastructure/persistence/repository/impl/DatasetFileRepositoryImpl.java @@ -84,6 +84,11 @@ public class DatasetFileRepositoryImpl extends CrudRepository findAllByDatasetIdAndLogicalPath(String datasetId, String logicalPath) { + return datasetFileMapper.findAllByDatasetIdAndLogicalPath(datasetId, logicalPath); + } + public IPage findByCriteria(String datasetId, String fileType, String status, String name, Boolean hasAnnotation, IPage page) { LambdaQueryWrapper wrapper = new LambdaQueryWrapper() diff --git a/backend/services/data-management-service/src/main/resources/mappers/DatasetFileMapper.xml b/backend/services/data-management-service/src/main/resources/mappers/DatasetFileMapper.xml index ef3ccb5..ede5679 100644 --- a/backend/services/data-management-service/src/main/resources/mappers/DatasetFileMapper.xml +++ b/backend/services/data-management-service/src/main/resources/mappers/DatasetFileMapper.xml @@ -64,7 +64,7 @@ AND (status IS NULL OR status <> 'ARCHIVED') - SELECT FROM t_dm_dataset_files WHERE dataset_id = #{datasetId} @@ -74,8 +74,16 @@ LIMIT 1 - + SELECT + FROM t_dm_dataset_files + WHERE dataset_id = #{datasetId} + AND logical_path = #{logicalPath} + ORDER BY version DESC, upload_time DESC + + +