feat: new endpoint allowing only add file path to dataset record without any FS operations (#119)

* feat: Implement add files' path only to dataset

* refactor: Rename variable for clarity in metadata serialization
This commit is contained in:
Jason Wang
2025-12-01 20:31:06 +08:00
committed by GitHub
parent f730bd5b0c
commit d692f5fdae
3 changed files with 89 additions and 0 deletions

View File

@@ -19,6 +19,7 @@ import com.datamate.datamanagement.infrastructure.exception.DataManagementErrorC
import com.datamate.datamanagement.infrastructure.persistence.repository.DatasetFileRepository; import com.datamate.datamanagement.infrastructure.persistence.repository.DatasetFileRepository;
import com.datamate.datamanagement.infrastructure.persistence.repository.DatasetRepository; import com.datamate.datamanagement.infrastructure.persistence.repository.DatasetRepository;
import com.datamate.datamanagement.interfaces.converter.DatasetConverter; import com.datamate.datamanagement.interfaces.converter.DatasetConverter;
import com.datamate.datamanagement.interfaces.dto.AddFilesRequest;
import com.datamate.datamanagement.interfaces.dto.CopyFilesRequest; import com.datamate.datamanagement.interfaces.dto.CopyFilesRequest;
import com.datamate.datamanagement.interfaces.dto.UploadFileRequest; import com.datamate.datamanagement.interfaces.dto.UploadFileRequest;
import com.datamate.datamanagement.interfaces.dto.UploadFilesPreRequest; import com.datamate.datamanagement.interfaces.dto.UploadFilesPreRequest;
@@ -344,4 +345,59 @@ public class DatasetFileApplicationService {
} }
} }
} }
/**
* 添加文件到数据集(仅创建数据库记录,不执行文件系统操作)
*
* @param datasetId 数据集id
* @param req 添加文件请求
* @return 添加的文件列表
*/
@Transactional
public List<DatasetFile> addFilesToDataset(String datasetId, AddFilesRequest req) {
Dataset dataset = datasetRepository.getById(datasetId);
BusinessAssert.notNull(dataset, SystemErrorCode.RESOURCE_NOT_FOUND);
List<DatasetFile> addedFiles = new ArrayList<>();
List<DatasetFile> existDatasetFiles = datasetFileRepository.findAllByDatasetId(datasetId);
dataset.setFiles(existDatasetFiles);
boolean softAdd = req.softAdd();
String metadata;
try {
Map<String, Boolean> metadataMap = Map.of("softAdd", softAdd);
ObjectMapper objectMapper = new ObjectMapper();
metadata = objectMapper.writeValueAsString(metadataMap);
} catch (JsonProcessingException e) {
log.error("Failed to serialize metadataMap", e);
throw BusinessException.of(SystemErrorCode.UNKNOWN_ERROR);
}
for (String sourceFilePath : req.sourcePaths()) {
Path sourcePath = Paths.get(sourceFilePath);
String fileName = sourcePath.getFileName().toString();
File sourceFile = sourcePath.toFile();
LocalDateTime currentTime = LocalDateTime.now();
DatasetFile datasetFile = DatasetFile.builder()
.id(UUID.randomUUID().toString())
.datasetId(datasetId)
.fileName(fileName)
.fileType(AnalyzerUtils.getExtension(fileName))
.fileSize(sourceFile.length())
.filePath(sourceFilePath)
.uploadTime(currentTime)
.lastAccessTime(currentTime)
.metadata(metadata)
.build();
setDatasetFileId(datasetFile, dataset);
dataset.addFile(datasetFile);
addedFiles.add(datasetFile);
}
datasetFileRepository.saveOrUpdateBatch(addedFiles, 100);
dataset.active();
datasetRepository.updateById(dataset);
// Note: addFilesToDataset only creates DB records, no file system operations
// If file copy is needed, use copyFilesToDatasetDir endpoint instead
return addedFiles;
}
} }

View File

@@ -0,0 +1,18 @@
package com.datamate.datamanagement.interfaces.dto;
import jakarta.validation.constraints.NotEmpty;
import jakarta.validation.constraints.NotNull;
import java.util.List;
/**
* 添加文件请求DTO(仅创建DB记录,不执行文件系统操作)
*
* @author datamate
* @since 2025-11-29
*/
public record AddFilesRequest(
@NotEmpty List<String> sourcePaths,
@NotNull Boolean softAdd
) {
}

View File

@@ -8,6 +8,7 @@ import com.datamate.common.interfaces.PagingQuery;
import com.datamate.datamanagement.application.DatasetFileApplicationService; import com.datamate.datamanagement.application.DatasetFileApplicationService;
import com.datamate.datamanagement.domain.model.dataset.DatasetFile; import com.datamate.datamanagement.domain.model.dataset.DatasetFile;
import com.datamate.datamanagement.interfaces.converter.DatasetConverter; import com.datamate.datamanagement.interfaces.converter.DatasetConverter;
import com.datamate.datamanagement.interfaces.dto.AddFilesRequest;
import com.datamate.datamanagement.interfaces.dto.CopyFilesRequest; import com.datamate.datamanagement.interfaces.dto.CopyFilesRequest;
import com.datamate.datamanagement.interfaces.dto.DatasetFileResponse; import com.datamate.datamanagement.interfaces.dto.DatasetFileResponse;
import com.datamate.datamanagement.interfaces.dto.UploadFileRequest; import com.datamate.datamanagement.interfaces.dto.UploadFileRequest;
@@ -144,4 +145,18 @@ public class DatasetFileController {
List<DatasetFile> datasetFiles = datasetFileApplicationService.copyFilesToDatasetDir(datasetId, req); List<DatasetFile> datasetFiles = datasetFileApplicationService.copyFilesToDatasetDir(datasetId, req);
return DatasetConverter.INSTANCE.convertToResponseList(datasetFiles); return DatasetConverter.INSTANCE.convertToResponseList(datasetFiles);
} }
/**
* 添加文件到数据集(仅创建数据库记录,不执行文件系统操作)
*
* @param datasetId 数据集ID
* @param req 添加文件请求(包含源文件路径列表和softAdd标志)
* @return 数据集文件响应DTO列表
*/
@PostMapping("/upload/add")
public List<DatasetFileResponse> addFilesToDataset(@PathVariable("datasetId") String datasetId,
@RequestBody @Valid AddFilesRequest req) {
List<DatasetFile> datasetFiles = datasetFileApplicationService.addFilesToDataset(datasetId, req);
return DatasetConverter.INSTANCE.convertToResponseList(datasetFiles);
}
} }