feature: data management supports nested folders (#150)

* fix: k8s部署场景下,backend-python服务挂载需要存储

* fix: 增加数据集文件免拷贝的接口定义

* fix: 评估时评估结果赋予初始空值,防止未评估完成时接口报错

* feature: 数据管理支持嵌套文件夹(展示时按照文件系统展示;批量下载时带上相对路径)

* fix: 去除多余的文件重命名逻辑

* refactor: remove unused imports
This commit is contained in:
hefanli
2025-12-10 16:42:45 +08:00
committed by GitHub
parent fea7133dee
commit f87060490c
7 changed files with 290 additions and 58 deletions

View File

@@ -330,6 +330,35 @@ paths:
type: string type: string
format: binary format: binary
/data-management/datasets/{datasetId}/files/upload/add:
post:
tags: [ DatasetFile ]
operationId: addFilesToDataset
summary: 添加文件到数据集(仅创建数据库记录)
description: 将指定源文件路径列表添加到数据集,仅在数据库中创建记录,不执行物理文件系统操作。
parameters:
- name: datasetId
in: path
required: true
schema:
type: string
description: 数据集ID
requestBody:
required: true
content:
application/json:
schema:
$ref: '#/components/schemas/AddFilesRequest'
responses:
'200':
description: 添加成功,返回创建的文件记录列表
content:
application/json:
schema:
type: array
items:
$ref: '#/components/schemas/DatasetFileResponse'
/data-management/datasets/{datasetId}/files/upload/pre-upload: /data-management/datasets/{datasetId}/files/upload/pre-upload:
post: post:
tags: [ DatasetFile ] tags: [ DatasetFile ]
@@ -805,3 +834,19 @@ components:
path: path:
type: string type: string
description: 请求路径 description: 请求路径
AddFilesRequest:
type: object
description: 将源文件路径添加到数据集的请求
properties:
sourcePaths:
type: array
items:
type: string
description: 源文件路径列表(相对或绝对路径),每个元素表示一个要添加的文件或目录路径
softAdd:
type: boolean
description: 如果为 true,则仅在数据库中创建记录(默认 false)
default: false
required:
- sourcePaths

View File

@@ -1,6 +1,7 @@
package com.datamate.datamanagement.application; package com.datamate.datamanagement.application;
import com.baomidou.mybatisplus.core.metadata.IPage; import com.baomidou.mybatisplus.core.metadata.IPage;
import com.baomidou.mybatisplus.extension.plugins.pagination.Page;
import com.datamate.common.domain.model.ChunkUploadPreRequest; import com.datamate.common.domain.model.ChunkUploadPreRequest;
import com.datamate.common.domain.model.FileUploadResult; import com.datamate.common.domain.model.FileUploadResult;
import com.datamate.common.domain.service.FileService; import com.datamate.common.domain.service.FileService;
@@ -29,6 +30,9 @@ import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectMapper;
import jakarta.servlet.http.HttpServletResponse; import jakarta.servlet.http.HttpServletResponse;
import lombok.extern.slf4j.Slf4j; import lombok.extern.slf4j.Slf4j;
import org.apache.commons.compress.archivers.zip.ZipArchiveEntry;
import org.apache.commons.compress.archivers.zip.ZipArchiveOutputStream;
import org.apache.commons.io.IOUtils;
import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value; import org.springframework.beans.factory.annotation.Value;
import org.springframework.core.io.Resource; import org.springframework.core.io.Resource;
@@ -37,7 +41,6 @@ import org.springframework.http.HttpHeaders;
import org.springframework.stereotype.Service; import org.springframework.stereotype.Service;
import org.springframework.transaction.annotation.Transactional; import org.springframework.transaction.annotation.Transactional;
import java.io.BufferedInputStream;
import java.io.File; import java.io.File;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
@@ -45,14 +48,15 @@ import java.net.MalformedURLException;
import java.nio.file.Files; import java.nio.file.Files;
import java.nio.file.Path; import java.nio.file.Path;
import java.nio.file.Paths; import java.nio.file.Paths;
import java.nio.file.attribute.BasicFileAttributes;
import java.time.LocalDateTime; import java.time.LocalDateTime;
import java.time.ZoneId;
import java.time.format.DateTimeFormatter; import java.time.format.DateTimeFormatter;
import java.util.*; import java.util.*;
import java.util.concurrent.CompletableFuture; import java.util.concurrent.CompletableFuture;
import java.util.function.Function; import java.util.function.Function;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import java.util.zip.ZipEntry; import java.util.stream.Stream;
import java.util.zip.ZipOutputStream;
/** /**
* 数据集文件应用服务 * 数据集文件应用服务
@@ -85,11 +89,77 @@ public class DatasetFileApplicationService {
*/ */
@Transactional(readOnly = true) @Transactional(readOnly = true)
public PagedResponse<DatasetFile> getDatasetFiles(String datasetId, String fileType, String status, String name, PagingQuery pagingQuery) { public PagedResponse<DatasetFile> getDatasetFiles(String datasetId, String fileType, String status, String name, PagingQuery pagingQuery) {
IPage<DatasetFile> page = new com.baomidou.mybatisplus.extension.plugins.pagination.Page<>(pagingQuery.getPage(), pagingQuery.getSize()); IPage<DatasetFile> page = new Page<>(pagingQuery.getPage(), pagingQuery.getSize());
IPage<DatasetFile> files = datasetFileRepository.findByCriteria(datasetId, fileType, status, name, page); IPage<DatasetFile> files = datasetFileRepository.findByCriteria(datasetId, fileType, status, name, page);
return PagedResponse.of(files); return PagedResponse.of(files);
} }
/**
* 获取数据集文件列表
*/
@Transactional(readOnly = true)
public PagedResponse<DatasetFile> getDatasetFilesWithDirectory(String datasetId, String prefix, PagingQuery pagingQuery) {
Dataset dataset = datasetRepository.getById(datasetId);
int page = Math.max(pagingQuery.getPage(), 1);
int size = pagingQuery.getSize() == null || pagingQuery.getSize() < 0 ? 20 : pagingQuery.getSize();
if (dataset == null) {
return PagedResponse.of(new Page<>(page, size));
}
String datasetPath = dataset.getPath();
Path queryPath = Path.of(dataset.getPath() + File.separator + prefix);
Map<String, DatasetFile> datasetFilesMap = datasetFileRepository.findAllByDatasetId(datasetId)
.stream().collect(Collectors.toMap(DatasetFile::getFilePath, Function.identity()));
try (Stream<Path> pathStream = Files.list(queryPath)) {
List<Path> allFiles = pathStream
.filter(path -> path.toString().startsWith(datasetPath))
.sorted(Comparator
.comparing((Path path) -> !Files.isDirectory(path))
.thenComparing(path -> path.getFileName().toString()))
.collect(Collectors.toList());
// 计算分页
int total = allFiles.size();
int totalPages = (int) Math.ceil((double) total / size);
// 获取当前页数据
int fromIndex = (page - 1) * size;
fromIndex = Math.max(fromIndex, 0);
int toIndex = Math.min(fromIndex + size, total);
List<Path> pageData = new ArrayList<>();
if (fromIndex < total) {
pageData = allFiles.subList(fromIndex, toIndex);
}
List<DatasetFile> datasetFiles = pageData.stream().map(path -> getDatasetFile(path, datasetFilesMap)).toList();
return new PagedResponse<>(page, size, total, totalPages, datasetFiles);
} catch (IOException e) {
log.error("list dataset path error", e);
return PagedResponse.of(new Page<>(page, size));
}
}
private DatasetFile getDatasetFile(Path path, Map<String, DatasetFile> datasetFilesMap) {
DatasetFile datasetFile = new DatasetFile();
LocalDateTime localDateTime = LocalDateTime.now();
try {
localDateTime = Files.getLastModifiedTime(path).toInstant().atZone(ZoneId.systemDefault()).toLocalDateTime();
} catch (IOException e) {
log.error("get last modified time error", e);
}
datasetFile.setFileName(path.getFileName().toString());
datasetFile.setUploadTime(localDateTime);
if (Files.isDirectory(path)) {
datasetFile.setId("directory-" + datasetFile.getFileName());
} else if (Objects.isNull(datasetFilesMap.get(path.toString()))) {
datasetFile.setId("file-" + datasetFile.getFileName());
datasetFile.setFileSize(path.toFile().length());
} else {
datasetFile = datasetFilesMap.get(path.toString());
}
return datasetFile;
}
/** /**
* 获取文件详情 * 获取文件详情
*/ */
@@ -151,15 +221,26 @@ public class DatasetFileApplicationService {
*/ */
@Transactional(readOnly = true) @Transactional(readOnly = true)
public void downloadDatasetFileAsZip(String datasetId, HttpServletResponse response) { public void downloadDatasetFileAsZip(String datasetId, HttpServletResponse response) {
Dataset dataset = datasetRepository.getById(datasetId);
if (Objects.isNull(dataset)) {
throw BusinessException.of(DataManagementErrorCode.DATASET_NOT_FOUND);
}
List<DatasetFile> allByDatasetId = datasetFileRepository.findAllByDatasetId(datasetId); List<DatasetFile> allByDatasetId = datasetFileRepository.findAllByDatasetId(datasetId);
fileRename(allByDatasetId); Set<String> filePaths = allByDatasetId.stream().map(DatasetFile::getFilePath).collect(Collectors.toSet());
String datasetPath = dataset.getPath();
Path downloadPath = Path.of(datasetPath);
response.setContentType("application/zip"); response.setContentType("application/zip");
String zipName = String.format("dataset_%s.zip", String zipName = String.format("dataset_%s.zip",
LocalDateTime.now().format(DateTimeFormatter.ofPattern("yyyyMMddHHmmss"))); LocalDateTime.now().format(DateTimeFormatter.ofPattern("yyyyMMddHHmmss")));
response.setHeader(HttpHeaders.CONTENT_DISPOSITION, "attachment; filename=" + zipName); response.setHeader(HttpHeaders.CONTENT_DISPOSITION, "attachment; filename=" + zipName);
try (ZipOutputStream zos = new ZipOutputStream(response.getOutputStream())) { try (ZipArchiveOutputStream zos = new ZipArchiveOutputStream(response.getOutputStream())) {
for (DatasetFile file : allByDatasetId) { try (Stream<Path> pathStream = Files.walk(downloadPath)) {
addToZipFile(file, zos); List<Path> allPaths = pathStream.filter(path -> path.toString().startsWith(datasetPath))
.filter(path -> filePaths.stream().anyMatch(filePath -> filePath.startsWith(path.toString())))
.toList();
for (Path path : allPaths) {
addToZipFile(path, downloadPath, zos);
}
} }
} catch (IOException e) { } catch (IOException e) {
log.error("Failed to download files in batches.", e); log.error("Failed to download files in batches.", e);
@@ -167,42 +248,34 @@ public class DatasetFileApplicationService {
} }
} }
private void fileRename(List<DatasetFile> files) { private void addToZipFile(Path path, Path basePath, ZipArchiveOutputStream zos) throws IOException {
Set<String> uniqueFilenames = new HashSet<>(); String entryName = basePath.relativize(path)
for (DatasetFile file : files) { .toString()
String originalFilename = file.getFileName(); .replace(File.separator, "/");
if (!uniqueFilenames.add(originalFilename)) {
String newFilename;
int counter = 1;
do {
newFilename = generateNewFilename(originalFilename, counter);
counter++;
} while (!uniqueFilenames.add(newFilename));
file.setFileName(newFilename);
}
}
}
private String generateNewFilename(String oldFilename, int counter) { // 处理目录
int dotIndex = oldFilename.lastIndexOf("."); if (Files.isDirectory(path)) {
return oldFilename.substring(0, dotIndex) + "-(" + counter + ")" + oldFilename.substring(dotIndex); if (!entryName.isEmpty()) {
} entryName += "/";
ZipArchiveEntry dirEntry = new ZipArchiveEntry(entryName);
private void addToZipFile(DatasetFile file, ZipOutputStream zos) throws IOException { zos.putArchiveEntry(dirEntry);
if (file.getFilePath() == null || !Files.exists(Paths.get(file.getFilePath()))) { zos.closeArchiveEntry();
log.warn("The file hasn't been found on filesystem, id: {}", file.getId());
return;
}
try (InputStream fis = Files.newInputStream(Paths.get(file.getFilePath()));
BufferedInputStream bis = new BufferedInputStream(fis)) {
ZipEntry zipEntry = new ZipEntry(file.getFileName());
zos.putNextEntry(zipEntry);
byte[] buffer = new byte[8192];
int length;
while ((length = bis.read(buffer)) >= 0) {
zos.write(buffer, 0, length);
} }
zos.closeEntry(); } else {
// 处理文件
ZipArchiveEntry fileEntry = new ZipArchiveEntry(path.toFile(), entryName);
// 设置更多属性
BasicFileAttributes attrs = Files.readAttributes(path, BasicFileAttributes.class);
fileEntry.setSize(attrs.size());
fileEntry.setLastModifiedTime(attrs.lastModifiedTime());
zos.putArchiveEntry(fileEntry);
try (InputStream is = Files.newInputStream(path)) {
IOUtils.copy(is, zos);
}
zos.closeArchiveEntry();
} }
} }

View File

@@ -46,12 +46,10 @@ public class DatasetFileController {
@PathVariable("datasetId") String datasetId, @PathVariable("datasetId") String datasetId,
@RequestParam(value = "page", required = false, defaultValue = "0") Integer page, @RequestParam(value = "page", required = false, defaultValue = "0") Integer page,
@RequestParam(value = "size", required = false, defaultValue = "20") Integer size, @RequestParam(value = "size", required = false, defaultValue = "20") Integer size,
@RequestParam(value = "fileType", required = false) String fileType, @RequestParam(value = "prefix", required = false) String prefix) {
@RequestParam(value = "status", required = false) String status,
@RequestParam(value = "name", required = false) String name) {
PagingQuery pagingQuery = new PagingQuery(page, size); PagingQuery pagingQuery = new PagingQuery(page, size);
PagedResponse<DatasetFile> filesPage = datasetFileApplicationService.getDatasetFiles( PagedResponse<DatasetFile> filesPage = datasetFileApplicationService.getDatasetFilesWithDirectory(
datasetId, fileType, status, name, pagingQuery); datasetId, prefix, pagingQuery);
return Response.ok(filesPage); return Response.ok(filesPage);
} }

View File

@@ -12,6 +12,7 @@ import org.apache.commons.compress.archivers.tar.TarArchiveInputStream;
import org.apache.commons.compress.archivers.zip.ZipArchiveEntry; import org.apache.commons.compress.archivers.zip.ZipArchiveEntry;
import org.apache.commons.compress.archivers.zip.ZipArchiveInputStream; import org.apache.commons.compress.archivers.zip.ZipArchiveInputStream;
import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream; import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream;
import org.apache.commons.io.FileUtils;
import java.io.BufferedInputStream; import java.io.BufferedInputStream;
import java.io.BufferedOutputStream; import java.io.BufferedOutputStream;
@@ -145,7 +146,7 @@ public class ArchiveAnalyzer {
Path path = Paths.get(archivePath.getParent().toString(), archiveEntry.getName()); Path path = Paths.get(archivePath.getParent().toString(), archiveEntry.getName());
File file = path.toFile(); File file = path.toFile();
long fileSize = 0L; long fileSize = 0L;
String extension = AnalyzerUtils.getExtension(archiveEntry.getName()); FileUtils.createParentDirectories(file);
long supportFileSize = 1024*1024*1024; // 上传大小暂定为1个G long supportFileSize = 1024*1024*1024; // 上传大小暂定为1个G
try (OutputStream outputStream = new BufferedOutputStream(Files.newOutputStream(file.toPath()))) { try (OutputStream outputStream = new BufferedOutputStream(Files.newOutputStream(file.toPath()))) {

View File

@@ -1,6 +1,6 @@
import { Button, Descriptions, DescriptionsProps, Modal, Table } from "antd"; import { Button, Descriptions, DescriptionsProps, Modal, Table } from "antd";
import { formatBytes, formatDateTime } from "@/utils/unit"; import { formatBytes, formatDateTime } from "@/utils/unit";
import { Download, Trash2 } from "lucide-react"; import { Download, Trash2, Folder, File } from "lucide-react";
import { datasetTypeMap } from "../../dataset.const"; import { datasetTypeMap } from "../../dataset.const";
export default function Overview({ dataset, filesOperation, fetchDataset }) { export default function Overview({ dataset, filesOperation, fetchDataset }) {
@@ -102,13 +102,58 @@ export default function Overview({ dataset, filesOperation, fetchDataset }) {
dataIndex: "fileName", dataIndex: "fileName",
key: "fileName", key: "fileName",
fixed: "left", fixed: "left",
render: (text: string, record: any) => {
const isDirectory = record.id.startsWith('directory-');
const iconSize = 16;
const content = (
<div className="flex items-center">
{isDirectory ? (
<Folder className="mr-2 text-blue-500" size={iconSize} />
) : (
<File className="mr-2 text-black" size={iconSize} />
)}
<span className="truncate text-black">{text}</span>
</div>
);
if (isDirectory) {
return (
<Button
type="link"
onClick={(e) => {
const currentPath = filesOperation.pagination.prefix || '';
const newPath = `${currentPath}${record.fileName}`;
filesOperation.fetchFiles(newPath);
}}
>
{content}
</Button>
);
}
return (
<Button
type="link"
onClick={(e) => {}}
>
{content}
</Button>
);
},
}, },
{ {
title: "大小", title: "大小",
dataIndex: "fileSize", dataIndex: "fileSize",
key: "fileSize", key: "fileSize",
width: 150, width: 150,
render: (text) => formatBytes(text), render: (text: number, record: any) => {
const isDirectory = record.id.startsWith('directory-');
if (isDirectory) {
return "-";
}
return formatBytes(text)
},
}, },
{ {
title: "上传时间", title: "上传时间",
@@ -122,7 +167,12 @@ export default function Overview({ dataset, filesOperation, fetchDataset }) {
key: "action", key: "action",
width: 180, width: 180,
fixed: "right", fixed: "right",
render: (_, record) => ( render: (_, record) => {
const isDirectory = record.id.startsWith('directory-');
if (isDirectory) {
return <div className="flex"/>;
}
return (
<div className="flex"> <div className="flex">
<Button <Button
size="small" size="small"
@@ -143,9 +193,10 @@ export default function Overview({ dataset, filesOperation, fetchDataset }) {
</Button> </Button>
</div> </div>
), )},
}, },
]; ];
return ( return (
<> <>
<div className=" flex flex-col gap-4"> <div className=" flex flex-col gap-4">
@@ -182,6 +233,43 @@ export default function Overview({ dataset, filesOperation, fetchDataset }) {
</div> </div>
)} )}
<div className="overflow-x-auto"> <div className="overflow-x-auto">
<div className="mb-2">
{(filesOperation.pagination.prefix || '') !== '' && (
<Button
type="link"
onClick={() => {
// 获取上一级目录
const currentPath = filesOperation.pagination.prefix || '';
const pathParts = currentPath.split('/').filter(Boolean);
pathParts.pop(); // 移除最后一个目录
const parentPath = pathParts.length > 0 ? `${pathParts.join('/')}/` : '';
filesOperation.fetchFiles(parentPath);
}}
className="p-0"
>
<span className="flex items-center text-blue-500">
<svg
className="w-4 h-4 mr-1"
fill="none"
stroke="currentColor"
viewBox="0 0 24 24"
xmlns="http://www.w3.org/2000/svg"
>
<path
strokeLinecap="round"
strokeLinejoin="round"
strokeWidth={2}
d="M10 19l-7-7m0 0l7-7m-7 7h18"
/>
</svg>
</span>
</Button>
)}
{filesOperation.pagination.prefix && (
<span className="ml-2 text-gray-600">: {filesOperation.pagination.prefix}</span>
)}
</div>
<Table <Table
size="middle" size="middle"
rowKey="id" rowKey="id"
@@ -192,6 +280,14 @@ export default function Overview({ dataset, filesOperation, fetchDataset }) {
pagination={{ pagination={{
...pagination, ...pagination,
showTotal: (total) => `${total}`, showTotal: (total) => `${total}`,
onChange: (page, pageSize) => {
filesOperation.setPagination(prev => ({
...prev,
current: page,
pageSize: pageSize
}));
filesOperation.fetchFiles(pagination.prefix, page, pageSize);
}
}} }}
/> />
</div> </div>

View File

@@ -23,19 +23,35 @@ export function useFilesOperation(dataset: Dataset) {
current: number; current: number;
pageSize: number; pageSize: number;
total: number; total: number;
}>({ current: 1, pageSize: 10, total: 0 }); prefix?: string;
}>({ current: 1, pageSize: 10, total: 0, prefix: '' });
// 文件预览相关状态 // 文件预览相关状态
const [previewVisible, setPreviewVisible] = useState(false); const [previewVisible, setPreviewVisible] = useState(false);
const [previewContent, setPreviewContent] = useState(""); const [previewContent, setPreviewContent] = useState("");
const [previewFileName, setPreviewFileName] = useState(""); const [previewFileName, setPreviewFileName] = useState("");
const fetchFiles = async () => { const fetchFiles = async (prefix: string = '', current, pageSize) => {
const { data } = await queryDatasetFilesUsingGet(id!, { const params: any = {
page: pagination.current - 1, page: current ? current : pagination.current,
size: pagination.pageSize, size: pageSize ? pageSize : pagination.pageSize,
}); };
if (prefix !== undefined) {
params.prefix = prefix;
} else if (pagination.prefix) {
params.prefix = pagination.prefix;
}
const { data } = await queryDatasetFilesUsingGet(id!, params);
setFileList(data.content || []); setFileList(data.content || []);
// Update pagination with current prefix
setPagination(prev => ({
...prev,
prefix: prefix !== undefined ? prefix : prev.prefix,
total: data.totalElements || 0,
}));
}; };
const handleBatchDeleteFiles = () => { const handleBatchDeleteFiles = () => {
@@ -113,6 +129,7 @@ export function useFilesOperation(dataset: Dataset) {
fileList, fileList,
selectedFiles, selectedFiles,
setSelectedFiles, setSelectedFiles,
pagination,
setPagination, setPagination,
previewVisible, previewVisible,
setPreviewVisible, setPreviewVisible,

View File

@@ -113,6 +113,7 @@ class DatasetEvaluationExecutor(EvaluationExecutor):
file_id=dataset_file.id, file_id=dataset_file.id,
item_id=item.get("id") if item.get("id") else str(uuid.uuid4()), item_id=item.get("id") if item.get("id") else str(uuid.uuid4()),
eval_content=json.dumps(item, ensure_ascii=False), eval_content=json.dumps(item, ensure_ascii=False),
eval_result="{}",
status=TaskStatus.PENDING.value, status=TaskStatus.PENDING.value,
created_by=self.task.created_by, created_by=self.task.created_by,
updated_by=self.task.updated_by, updated_by=self.task.updated_by,
@@ -152,6 +153,7 @@ class SynthesisEvaluationExecutor(EvaluationExecutor):
file_id=synthesis_file.id, file_id=synthesis_file.id,
item_id=synthesis_data.id, item_id=synthesis_data.id,
eval_content=json.dumps(synthesis_data.data), eval_content=json.dumps(synthesis_data.data),
eval_result="{}",
status=TaskStatus.PENDING.value, status=TaskStatus.PENDING.value,
created_by=self.task.created_by, created_by=self.task.created_by,
updated_by=self.task.updated_by, updated_by=self.task.updated_by,