diff --git a/backend/openapi/specs/data-management.yaml b/backend/openapi/specs/data-management.yaml index 1aac250..b9d26fd 100644 --- a/backend/openapi/specs/data-management.yaml +++ b/backend/openapi/specs/data-management.yaml @@ -330,6 +330,35 @@ paths: type: string format: binary + /data-management/datasets/{datasetId}/files/upload/add: + post: + tags: [ DatasetFile ] + operationId: addFilesToDataset + summary: 添加文件到数据集(仅创建数据库记录) + description: 将指定源文件路径列表添加到数据集,仅在数据库中创建记录,不执行物理文件系统操作。 + parameters: + - name: datasetId + in: path + required: true + schema: + type: string + description: 数据集ID + requestBody: + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/AddFilesRequest' + responses: + '200': + description: 添加成功,返回创建的文件记录列表 + content: + application/json: + schema: + type: array + items: + $ref: '#/components/schemas/DatasetFileResponse' + /data-management/datasets/{datasetId}/files/upload/pre-upload: post: tags: [ DatasetFile ] @@ -805,3 +834,19 @@ components: path: type: string description: 请求路径 + + AddFilesRequest: + type: object + description: 将源文件路径添加到数据集的请求 + properties: + sourcePaths: + type: array + items: + type: string + description: 源文件路径列表(相对或绝对路径),每个元素表示一个要添加的文件或目录路径 + softAdd: + type: boolean + description: 如果为 true,则仅在数据库中创建记录(默认 false) + default: false + required: + - sourcePaths diff --git a/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/application/DatasetFileApplicationService.java b/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/application/DatasetFileApplicationService.java index cceb516..02aaf54 100644 --- a/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/application/DatasetFileApplicationService.java +++ b/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/application/DatasetFileApplicationService.java @@ -1,6 +1,7 @@ package com.datamate.datamanagement.application; import com.baomidou.mybatisplus.core.metadata.IPage; +import com.baomidou.mybatisplus.extension.plugins.pagination.Page; import com.datamate.common.domain.model.ChunkUploadPreRequest; import com.datamate.common.domain.model.FileUploadResult; import com.datamate.common.domain.service.FileService; @@ -29,6 +30,9 @@ import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.ObjectMapper; import jakarta.servlet.http.HttpServletResponse; import lombok.extern.slf4j.Slf4j; +import org.apache.commons.compress.archivers.zip.ZipArchiveEntry; +import org.apache.commons.compress.archivers.zip.ZipArchiveOutputStream; +import org.apache.commons.io.IOUtils; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Value; import org.springframework.core.io.Resource; @@ -37,7 +41,6 @@ import org.springframework.http.HttpHeaders; import org.springframework.stereotype.Service; import org.springframework.transaction.annotation.Transactional; -import java.io.BufferedInputStream; import java.io.File; import java.io.IOException; import java.io.InputStream; @@ -45,14 +48,15 @@ import java.net.MalformedURLException; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; +import java.nio.file.attribute.BasicFileAttributes; import java.time.LocalDateTime; +import java.time.ZoneId; import java.time.format.DateTimeFormatter; import java.util.*; import java.util.concurrent.CompletableFuture; import java.util.function.Function; import java.util.stream.Collectors; -import java.util.zip.ZipEntry; -import java.util.zip.ZipOutputStream; +import java.util.stream.Stream; /** * 数据集文件应用服务 @@ -85,11 +89,77 @@ public class DatasetFileApplicationService { */ @Transactional(readOnly = true) public PagedResponse getDatasetFiles(String datasetId, String fileType, String status, String name, PagingQuery pagingQuery) { - IPage page = new com.baomidou.mybatisplus.extension.plugins.pagination.Page<>(pagingQuery.getPage(), pagingQuery.getSize()); + IPage page = new Page<>(pagingQuery.getPage(), pagingQuery.getSize()); IPage files = datasetFileRepository.findByCriteria(datasetId, fileType, status, name, page); return PagedResponse.of(files); } + /** + * 获取数据集文件列表 + */ + @Transactional(readOnly = true) + public PagedResponse getDatasetFilesWithDirectory(String datasetId, String prefix, PagingQuery pagingQuery) { + Dataset dataset = datasetRepository.getById(datasetId); + int page = Math.max(pagingQuery.getPage(), 1); + int size = pagingQuery.getSize() == null || pagingQuery.getSize() < 0 ? 20 : pagingQuery.getSize(); + if (dataset == null) { + return PagedResponse.of(new Page<>(page, size)); + } + String datasetPath = dataset.getPath(); + Path queryPath = Path.of(dataset.getPath() + File.separator + prefix); + Map datasetFilesMap = datasetFileRepository.findAllByDatasetId(datasetId) + .stream().collect(Collectors.toMap(DatasetFile::getFilePath, Function.identity())); + try (Stream pathStream = Files.list(queryPath)) { + List allFiles = pathStream + .filter(path -> path.toString().startsWith(datasetPath)) + .sorted(Comparator + .comparing((Path path) -> !Files.isDirectory(path)) + .thenComparing(path -> path.getFileName().toString())) + .collect(Collectors.toList()); + + // 计算分页 + int total = allFiles.size(); + int totalPages = (int) Math.ceil((double) total / size); + + // 获取当前页数据 + int fromIndex = (page - 1) * size; + fromIndex = Math.max(fromIndex, 0); + int toIndex = Math.min(fromIndex + size, total); + + List pageData = new ArrayList<>(); + if (fromIndex < total) { + pageData = allFiles.subList(fromIndex, toIndex); + } + List datasetFiles = pageData.stream().map(path -> getDatasetFile(path, datasetFilesMap)).toList(); + + return new PagedResponse<>(page, size, total, totalPages, datasetFiles); + } catch (IOException e) { + log.error("list dataset path error", e); + return PagedResponse.of(new Page<>(page, size)); + } + } + + private DatasetFile getDatasetFile(Path path, Map datasetFilesMap) { + DatasetFile datasetFile = new DatasetFile(); + LocalDateTime localDateTime = LocalDateTime.now(); + try { + localDateTime = Files.getLastModifiedTime(path).toInstant().atZone(ZoneId.systemDefault()).toLocalDateTime(); + } catch (IOException e) { + log.error("get last modified time error", e); + } + datasetFile.setFileName(path.getFileName().toString()); + datasetFile.setUploadTime(localDateTime); + if (Files.isDirectory(path)) { + datasetFile.setId("directory-" + datasetFile.getFileName()); + } else if (Objects.isNull(datasetFilesMap.get(path.toString()))) { + datasetFile.setId("file-" + datasetFile.getFileName()); + datasetFile.setFileSize(path.toFile().length()); + } else { + datasetFile = datasetFilesMap.get(path.toString()); + } + return datasetFile; + } + /** * 获取文件详情 */ @@ -151,15 +221,26 @@ public class DatasetFileApplicationService { */ @Transactional(readOnly = true) public void downloadDatasetFileAsZip(String datasetId, HttpServletResponse response) { + Dataset dataset = datasetRepository.getById(datasetId); + if (Objects.isNull(dataset)) { + throw BusinessException.of(DataManagementErrorCode.DATASET_NOT_FOUND); + } List allByDatasetId = datasetFileRepository.findAllByDatasetId(datasetId); - fileRename(allByDatasetId); + Set filePaths = allByDatasetId.stream().map(DatasetFile::getFilePath).collect(Collectors.toSet()); + String datasetPath = dataset.getPath(); + Path downloadPath = Path.of(datasetPath); response.setContentType("application/zip"); String zipName = String.format("dataset_%s.zip", LocalDateTime.now().format(DateTimeFormatter.ofPattern("yyyyMMddHHmmss"))); response.setHeader(HttpHeaders.CONTENT_DISPOSITION, "attachment; filename=" + zipName); - try (ZipOutputStream zos = new ZipOutputStream(response.getOutputStream())) { - for (DatasetFile file : allByDatasetId) { - addToZipFile(file, zos); + try (ZipArchiveOutputStream zos = new ZipArchiveOutputStream(response.getOutputStream())) { + try (Stream pathStream = Files.walk(downloadPath)) { + List allPaths = pathStream.filter(path -> path.toString().startsWith(datasetPath)) + .filter(path -> filePaths.stream().anyMatch(filePath -> filePath.startsWith(path.toString()))) + .toList(); + for (Path path : allPaths) { + addToZipFile(path, downloadPath, zos); + } } } catch (IOException e) { log.error("Failed to download files in batches.", e); @@ -167,42 +248,34 @@ public class DatasetFileApplicationService { } } - private void fileRename(List files) { - Set uniqueFilenames = new HashSet<>(); - for (DatasetFile file : files) { - String originalFilename = file.getFileName(); - if (!uniqueFilenames.add(originalFilename)) { - String newFilename; - int counter = 1; - do { - newFilename = generateNewFilename(originalFilename, counter); - counter++; - } while (!uniqueFilenames.add(newFilename)); - file.setFileName(newFilename); - } - } - } + private void addToZipFile(Path path, Path basePath, ZipArchiveOutputStream zos) throws IOException { + String entryName = basePath.relativize(path) + .toString() + .replace(File.separator, "/"); - private String generateNewFilename(String oldFilename, int counter) { - int dotIndex = oldFilename.lastIndexOf("."); - return oldFilename.substring(0, dotIndex) + "-(" + counter + ")" + oldFilename.substring(dotIndex); - } - - private void addToZipFile(DatasetFile file, ZipOutputStream zos) throws IOException { - if (file.getFilePath() == null || !Files.exists(Paths.get(file.getFilePath()))) { - log.warn("The file hasn't been found on filesystem, id: {}", file.getId()); - return; - } - try (InputStream fis = Files.newInputStream(Paths.get(file.getFilePath())); - BufferedInputStream bis = new BufferedInputStream(fis)) { - ZipEntry zipEntry = new ZipEntry(file.getFileName()); - zos.putNextEntry(zipEntry); - byte[] buffer = new byte[8192]; - int length; - while ((length = bis.read(buffer)) >= 0) { - zos.write(buffer, 0, length); + // 处理目录 + if (Files.isDirectory(path)) { + if (!entryName.isEmpty()) { + entryName += "/"; + ZipArchiveEntry dirEntry = new ZipArchiveEntry(entryName); + zos.putArchiveEntry(dirEntry); + zos.closeArchiveEntry(); } - zos.closeEntry(); + } else { + // 处理文件 + ZipArchiveEntry fileEntry = new ZipArchiveEntry(path.toFile(), entryName); + + // 设置更多属性 + BasicFileAttributes attrs = Files.readAttributes(path, BasicFileAttributes.class); + fileEntry.setSize(attrs.size()); + fileEntry.setLastModifiedTime(attrs.lastModifiedTime()); + + zos.putArchiveEntry(fileEntry); + + try (InputStream is = Files.newInputStream(path)) { + IOUtils.copy(is, zos); + } + zos.closeArchiveEntry(); } } diff --git a/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/rest/DatasetFileController.java b/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/rest/DatasetFileController.java index 2a676c0..1d1a8bf 100644 --- a/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/rest/DatasetFileController.java +++ b/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/rest/DatasetFileController.java @@ -46,12 +46,10 @@ public class DatasetFileController { @PathVariable("datasetId") String datasetId, @RequestParam(value = "page", required = false, defaultValue = "0") Integer page, @RequestParam(value = "size", required = false, defaultValue = "20") Integer size, - @RequestParam(value = "fileType", required = false) String fileType, - @RequestParam(value = "status", required = false) String status, - @RequestParam(value = "name", required = false) String name) { + @RequestParam(value = "prefix", required = false) String prefix) { PagingQuery pagingQuery = new PagingQuery(page, size); - PagedResponse filesPage = datasetFileApplicationService.getDatasetFiles( - datasetId, fileType, status, name, pagingQuery); + PagedResponse filesPage = datasetFileApplicationService.getDatasetFilesWithDirectory( + datasetId, prefix, pagingQuery); return Response.ok(filesPage); } diff --git a/backend/shared/domain-common/src/main/java/com/datamate/common/domain/utils/ArchiveAnalyzer.java b/backend/shared/domain-common/src/main/java/com/datamate/common/domain/utils/ArchiveAnalyzer.java index d42c3f2..d3b6e07 100644 --- a/backend/shared/domain-common/src/main/java/com/datamate/common/domain/utils/ArchiveAnalyzer.java +++ b/backend/shared/domain-common/src/main/java/com/datamate/common/domain/utils/ArchiveAnalyzer.java @@ -12,6 +12,7 @@ import org.apache.commons.compress.archivers.tar.TarArchiveInputStream; import org.apache.commons.compress.archivers.zip.ZipArchiveEntry; import org.apache.commons.compress.archivers.zip.ZipArchiveInputStream; import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream; +import org.apache.commons.io.FileUtils; import java.io.BufferedInputStream; import java.io.BufferedOutputStream; @@ -145,7 +146,7 @@ public class ArchiveAnalyzer { Path path = Paths.get(archivePath.getParent().toString(), archiveEntry.getName()); File file = path.toFile(); long fileSize = 0L; - String extension = AnalyzerUtils.getExtension(archiveEntry.getName()); + FileUtils.createParentDirectories(file); long supportFileSize = 1024*1024*1024; // 上传大小暂定为1个G try (OutputStream outputStream = new BufferedOutputStream(Files.newOutputStream(file.toPath()))) { diff --git a/frontend/src/pages/DataManagement/Detail/components/Overview.tsx b/frontend/src/pages/DataManagement/Detail/components/Overview.tsx index 85900ac..5d76312 100644 --- a/frontend/src/pages/DataManagement/Detail/components/Overview.tsx +++ b/frontend/src/pages/DataManagement/Detail/components/Overview.tsx @@ -1,6 +1,6 @@ import { Button, Descriptions, DescriptionsProps, Modal, Table } from "antd"; import { formatBytes, formatDateTime } from "@/utils/unit"; -import { Download, Trash2 } from "lucide-react"; +import { Download, Trash2, Folder, File } from "lucide-react"; import { datasetTypeMap } from "../../dataset.const"; export default function Overview({ dataset, filesOperation, fetchDataset }) { @@ -102,13 +102,58 @@ export default function Overview({ dataset, filesOperation, fetchDataset }) { dataIndex: "fileName", key: "fileName", fixed: "left", + render: (text: string, record: any) => { + const isDirectory = record.id.startsWith('directory-'); + const iconSize = 16; + + const content = ( +
+ {isDirectory ? ( + + ) : ( + + )} + {text} +
+ ); + + if (isDirectory) { + return ( + + ); + } + + return ( + + ); + }, }, { title: "大小", dataIndex: "fileSize", key: "fileSize", width: 150, - render: (text) => formatBytes(text), + render: (text: number, record: any) => { + const isDirectory = record.id.startsWith('directory-'); + if (isDirectory) { + return "-"; + } + return formatBytes(text) + }, }, { title: "上传时间", @@ -122,7 +167,12 @@ export default function Overview({ dataset, filesOperation, fetchDataset }) { key: "action", width: 180, fixed: "right", - render: (_, record) => ( + render: (_, record) => { + const isDirectory = record.id.startsWith('directory-'); + if (isDirectory) { + return
; + } + return (
- ), + )}, }, ]; + return ( <>
@@ -182,6 +233,43 @@ export default function Overview({ dataset, filesOperation, fetchDataset }) {
)}
+
+ {(filesOperation.pagination.prefix || '') !== '' && ( + + )} + {filesOperation.pagination.prefix && ( + 当前路径: {filesOperation.pagination.prefix} + )} +
`共 ${total} 条`, + onChange: (page, pageSize) => { + filesOperation.setPagination(prev => ({ + ...prev, + current: page, + pageSize: pageSize + })); + filesOperation.fetchFiles(pagination.prefix, page, pageSize); + } }} /> diff --git a/frontend/src/pages/DataManagement/Detail/useFilesOperation.ts b/frontend/src/pages/DataManagement/Detail/useFilesOperation.ts index 5d6e2a1..40d7726 100644 --- a/frontend/src/pages/DataManagement/Detail/useFilesOperation.ts +++ b/frontend/src/pages/DataManagement/Detail/useFilesOperation.ts @@ -23,19 +23,35 @@ export function useFilesOperation(dataset: Dataset) { current: number; pageSize: number; total: number; - }>({ current: 1, pageSize: 10, total: 0 }); + prefix?: string; + }>({ current: 1, pageSize: 10, total: 0, prefix: '' }); // 文件预览相关状态 const [previewVisible, setPreviewVisible] = useState(false); const [previewContent, setPreviewContent] = useState(""); const [previewFileName, setPreviewFileName] = useState(""); - const fetchFiles = async () => { - const { data } = await queryDatasetFilesUsingGet(id!, { - page: pagination.current - 1, - size: pagination.pageSize, - }); + const fetchFiles = async (prefix: string = '', current, pageSize) => { + const params: any = { + page: current ? current : pagination.current, + size: pageSize ? pageSize : pagination.pageSize, + }; + + if (prefix !== undefined) { + params.prefix = prefix; + } else if (pagination.prefix) { + params.prefix = pagination.prefix; + } + + const { data } = await queryDatasetFilesUsingGet(id!, params); setFileList(data.content || []); + + // Update pagination with current prefix + setPagination(prev => ({ + ...prev, + prefix: prefix !== undefined ? prefix : prev.prefix, + total: data.totalElements || 0, + })); }; const handleBatchDeleteFiles = () => { @@ -113,6 +129,7 @@ export function useFilesOperation(dataset: Dataset) { fileList, selectedFiles, setSelectedFiles, + pagination, setPagination, previewVisible, setPreviewVisible, diff --git a/runtime/datamate-python/app/module/evaluation/service/evaluation.py b/runtime/datamate-python/app/module/evaluation/service/evaluation.py index 7af279f..acfd786 100644 --- a/runtime/datamate-python/app/module/evaluation/service/evaluation.py +++ b/runtime/datamate-python/app/module/evaluation/service/evaluation.py @@ -113,6 +113,7 @@ class DatasetEvaluationExecutor(EvaluationExecutor): file_id=dataset_file.id, item_id=item.get("id") if item.get("id") else str(uuid.uuid4()), eval_content=json.dumps(item, ensure_ascii=False), + eval_result="{}", status=TaskStatus.PENDING.value, created_by=self.task.created_by, updated_by=self.task.updated_by, @@ -152,6 +153,7 @@ class SynthesisEvaluationExecutor(EvaluationExecutor): file_id=synthesis_file.id, item_id=synthesis_data.id, eval_content=json.dumps(synthesis_data.data), + eval_result="{}", status=TaskStatus.PENDING.value, created_by=self.task.created_by, updated_by=self.task.updated_by,