You've already forked DataMate
feature:增加定时清除超出保留期限数据集的功能;增加数据归集任务绑定数据集的接口 (#24)
* refactor: 修改调整数据归集实现,删除无用代码,优化代码结构 * feature: 每天凌晨00:00扫描所有数据集,检查数据集是否超过了预设的保留天数,超出保留天数的数据集调用删除接口进行删除 * fix: 修改删除数据集文件的逻辑,上传到数据集中的文件会同时删除数据库中的记录和文件系统中的文件,归集过来的文件仅删除数据库中的记录 * fix: 增加参数校验和接口定义,删除不使用的接口 * fix: 数据集统计数据默认为0 * feature: 数据集状态增加流转,创建时为草稿状态,上传文件或者归集文件后修改为活动状态 * refactor: 修改分页查询归集任务的代码 * fix: 更新后重新执行;归集任务执行增加事务控制 * feature: 创建归集任务时能够同步创建数据集,更新归集任务时能更新到指定数据集
This commit is contained in:
@@ -28,6 +28,8 @@ import org.springframework.stereotype.Service;
|
||||
import org.springframework.transaction.annotation.Transactional;
|
||||
import org.springframework.util.StringUtils;
|
||||
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Paths;
|
||||
import java.util.*;
|
||||
import java.util.function.Function;
|
||||
import java.util.stream.Collectors;
|
||||
@@ -220,63 +222,75 @@ public class DatasetApplicationService {
|
||||
public void processDataSourceAsync(String datasetId, String dataSourceId) {
|
||||
try {
|
||||
log.info("开始处理数据源文件扫描,数据集ID: {}, 数据源ID: {}", datasetId, dataSourceId);
|
||||
|
||||
// 1. 调用数据归集服务获取任务详情
|
||||
CollectionTaskDetailResponse taskDetail = collectionTaskClient.getTaskDetail(dataSourceId).getData();
|
||||
if (taskDetail == null) {
|
||||
log.error("获取归集任务详情失败,任务ID: {}", dataSourceId);
|
||||
return;
|
||||
}
|
||||
|
||||
log.info("获取到归集任务详情: {}", taskDetail);
|
||||
|
||||
// 2. 解析任务配置
|
||||
LocalCollectionConfig config = parseTaskConfig(taskDetail.getConfig());
|
||||
if (config == null) {
|
||||
log.error("解析任务配置失败,任务ID: {}", dataSourceId);
|
||||
return;
|
||||
}
|
||||
|
||||
// 4. 获取文件路径列表
|
||||
List<String> filePaths = config.getFilePaths();
|
||||
List<String> filePaths = getFilePaths(dataSourceId);
|
||||
if (CollectionUtils.isEmpty(filePaths)) {
|
||||
log.warn("文件路径列表为空,任务ID: {}", dataSourceId);
|
||||
return;
|
||||
}
|
||||
|
||||
log.info("开始扫描文件,共 {} 个文件路径", filePaths.size());
|
||||
|
||||
// 5. 扫描文件元数据
|
||||
List<DatasetFile> datasetFiles = fileMetadataService.scanFiles(filePaths, datasetId);
|
||||
// 查询数据集中已存在的文件
|
||||
List<DatasetFile> existDatasetFileList = datasetFileRepository.findAllByDatasetId(datasetId);
|
||||
Map<String, DatasetFile> existDatasetFilePathMap = existDatasetFileList.stream().collect(Collectors.toMap(DatasetFile::getFilePath, Function.identity()));
|
||||
Dataset dataset = datasetRepository.getById(datasetId);
|
||||
dataset.setFiles(existDatasetFileList);
|
||||
|
||||
// 6. 批量插入数据集文件表
|
||||
if (CollectionUtils.isNotEmpty(datasetFiles)) {
|
||||
for (DatasetFile datasetFile : datasetFiles) {
|
||||
if (existDatasetFilePathMap.containsKey(datasetFile.getFilePath())) {
|
||||
DatasetFile existDatasetFile = existDatasetFilePathMap.get(datasetFile.getFilePath());
|
||||
dataset.removeFile(existDatasetFile);
|
||||
existDatasetFile.setFileSize(datasetFile.getFileSize());
|
||||
dataset.addFile(existDatasetFile);
|
||||
datasetFileRepository.updateById(existDatasetFile);
|
||||
} else {
|
||||
dataset.addFile(datasetFile);
|
||||
datasetFileRepository.save(datasetFile);
|
||||
}
|
||||
}
|
||||
log.info("文件元数据写入完成,共写入 {} 条记录", datasetFiles.size());
|
||||
} else {
|
||||
log.warn("未扫描到有效文件");
|
||||
}
|
||||
// 批量同步数据集文件表
|
||||
asyncDatasetFile(datasetFiles, existDatasetFilePathMap, dataset, existDatasetFileList, filePaths);
|
||||
datasetRepository.updateById(dataset);
|
||||
} catch (Exception e) {
|
||||
log.error("处理数据源文件扫描失败,数据集ID: {}, 数据源ID: {}", datasetId, dataSourceId, e);
|
||||
}
|
||||
}
|
||||
|
||||
private void asyncDatasetFile(List<DatasetFile> datasetFiles, Map<String, DatasetFile> existDatasetFilePathMap, Dataset dataset, List<DatasetFile> existDatasetFileList, List<String> filePaths) {
|
||||
if (CollectionUtils.isNotEmpty(datasetFiles)) {
|
||||
for (DatasetFile datasetFile : datasetFiles) {
|
||||
if (existDatasetFilePathMap.containsKey(datasetFile.getFilePath())) {
|
||||
DatasetFile existDatasetFile = existDatasetFilePathMap.get(datasetFile.getFilePath());
|
||||
dataset.removeFile(existDatasetFile);
|
||||
existDatasetFile.setFileSize(datasetFile.getFileSize());
|
||||
dataset.addFile(existDatasetFile);
|
||||
dataset.active();
|
||||
datasetFileRepository.updateById(existDatasetFile);
|
||||
} else {
|
||||
dataset.addFile(datasetFile);
|
||||
dataset.active();
|
||||
datasetFileRepository.save(datasetFile);
|
||||
}
|
||||
}
|
||||
log.info("文件元数据写入完成,共写入 {} 条记录", datasetFiles.size());
|
||||
} else {
|
||||
log.warn("未扫描到有效文件");
|
||||
}
|
||||
for (DatasetFile datasetFile : existDatasetFileList) {
|
||||
String existFilePath = datasetFile.getFilePath();
|
||||
for (String filePath : filePaths) {
|
||||
if (existFilePath.equals(filePath) || existFilePath.startsWith(filePath)) {
|
||||
if (Files.notExists(Paths.get(existFilePath))) {
|
||||
dataset.removeFile(datasetFile);
|
||||
datasetFileRepository.removeById(datasetFile.getId());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private List<String> getFilePaths(String dataSourceId) {
|
||||
CollectionTaskDetailResponse taskDetail = collectionTaskClient.getTaskDetail(dataSourceId).getData();
|
||||
if (taskDetail == null) {
|
||||
log.warn("获取归集任务详情失败,任务ID: {}", dataSourceId);
|
||||
return Collections.emptyList();
|
||||
}
|
||||
log.info("获取到归集任务详情: {}", taskDetail);
|
||||
LocalCollectionConfig config = parseTaskConfig(taskDetail.getConfig());
|
||||
if (config == null) {
|
||||
log.warn("解析任务配置失败,任务ID: {}", dataSourceId);
|
||||
return Collections.emptyList();
|
||||
}
|
||||
return config.getFilePaths();
|
||||
}
|
||||
|
||||
/**
|
||||
* 解析任务配置
|
||||
*/
|
||||
|
||||
@@ -10,7 +10,6 @@ import com.datamate.datamanagement.domain.contants.DatasetConstant;
|
||||
import com.datamate.datamanagement.domain.model.dataset.Dataset;
|
||||
import com.datamate.datamanagement.domain.model.dataset.DatasetFile;
|
||||
import com.datamate.datamanagement.domain.model.dataset.DatasetFileUploadCheckInfo;
|
||||
import com.datamate.datamanagement.domain.model.dataset.StatusConstants;
|
||||
import com.datamate.datamanagement.infrastructure.persistence.repository.DatasetFileRepository;
|
||||
import com.datamate.datamanagement.infrastructure.persistence.repository.DatasetRepository;
|
||||
import com.datamate.datamanagement.interfaces.converter.DatasetConverter;
|
||||
@@ -31,7 +30,6 @@ import org.springframework.data.domain.Pageable;
|
||||
import org.springframework.http.HttpHeaders;
|
||||
import org.springframework.stereotype.Service;
|
||||
import org.springframework.transaction.annotation.Transactional;
|
||||
import org.springframework.web.multipart.MultipartFile;
|
||||
|
||||
import java.io.BufferedInputStream;
|
||||
import java.io.File;
|
||||
@@ -41,12 +39,9 @@ import java.net.MalformedURLException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.Paths;
|
||||
import java.nio.file.StandardCopyOption;
|
||||
import java.time.LocalDateTime;
|
||||
import java.time.format.DateTimeFormatter;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
import java.util.UUID;
|
||||
import java.util.*;
|
||||
import java.util.zip.ZipEntry;
|
||||
import java.util.zip.ZipOutputStream;
|
||||
|
||||
@@ -60,7 +55,6 @@ public class DatasetFileApplicationService {
|
||||
|
||||
private final DatasetFileRepository datasetFileRepository;
|
||||
private final DatasetRepository datasetRepository;
|
||||
private final Path fileStorageLocation;
|
||||
private final FileService fileService;
|
||||
|
||||
@Value("${dataset.base.path:/dataset}")
|
||||
@@ -68,61 +62,10 @@ public class DatasetFileApplicationService {
|
||||
|
||||
@Autowired
|
||||
public DatasetFileApplicationService(DatasetFileRepository datasetFileRepository,
|
||||
DatasetRepository datasetRepository, FileService fileService,
|
||||
@Value("${app.file.upload-dir:./dataset}") String uploadDir) {
|
||||
DatasetRepository datasetRepository, FileService fileService) {
|
||||
this.datasetFileRepository = datasetFileRepository;
|
||||
this.datasetRepository = datasetRepository;
|
||||
this.fileStorageLocation = Paths.get(uploadDir).toAbsolutePath().normalize();
|
||||
this.fileService = fileService;
|
||||
try {
|
||||
Files.createDirectories(this.fileStorageLocation);
|
||||
} catch (Exception ex) {
|
||||
throw new RuntimeException("Could not create the directory where the uploaded files will be stored.", ex);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 上传文件到数据集
|
||||
*/
|
||||
public DatasetFile uploadFile(String datasetId, MultipartFile file) {
|
||||
Dataset dataset = datasetRepository.getById(datasetId);
|
||||
if (dataset == null) {
|
||||
throw new IllegalArgumentException("Dataset not found: " + datasetId);
|
||||
}
|
||||
|
||||
String originalFilename = file.getOriginalFilename();
|
||||
String fileName = originalFilename != null ? originalFilename : "file";
|
||||
try {
|
||||
// 保存文件到磁盘
|
||||
Path targetLocation = this.fileStorageLocation.resolve(datasetId + File.separator + fileName);
|
||||
// 确保目标目录存在
|
||||
Files.createDirectories(targetLocation);
|
||||
Files.copy(file.getInputStream(), targetLocation, StandardCopyOption.REPLACE_EXISTING);
|
||||
|
||||
// 创建文件实体(UUID 主键)
|
||||
DatasetFile datasetFile = new DatasetFile();
|
||||
datasetFile.setId(UUID.randomUUID().toString());
|
||||
datasetFile.setDatasetId(datasetId);
|
||||
datasetFile.setFileName(fileName);
|
||||
datasetFile.setFilePath(targetLocation.toString());
|
||||
datasetFile.setFileType(getFileExtension(originalFilename));
|
||||
datasetFile.setFileSize(file.getSize());
|
||||
datasetFile.setUploadTime(LocalDateTime.now());
|
||||
datasetFile.setStatus(StatusConstants.DatasetFileStatuses.COMPLETED);
|
||||
|
||||
// 保存到数据库
|
||||
datasetFileRepository.save(datasetFile);
|
||||
|
||||
// 更新数据集统计
|
||||
dataset.addFile(datasetFile);
|
||||
datasetRepository.updateById(dataset);
|
||||
|
||||
return datasetFileRepository.findByDatasetIdAndFileName(datasetId, fileName);
|
||||
|
||||
} catch (IOException ex) {
|
||||
log.error("Could not store file {}", fileName, ex);
|
||||
throw new RuntimeException("Could not store file " + fileName, ex);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -155,20 +98,21 @@ public class DatasetFileApplicationService {
|
||||
/**
|
||||
* 删除文件
|
||||
*/
|
||||
@Transactional
|
||||
public void deleteDatasetFile(String datasetId, String fileId) {
|
||||
DatasetFile file = getDatasetFile(datasetId, fileId);
|
||||
try {
|
||||
Path filePath = Paths.get(file.getFilePath());
|
||||
Files.deleteIfExists(filePath);
|
||||
} catch (IOException ex) {
|
||||
// ignore
|
||||
Dataset dataset = datasetRepository.getById(datasetId);
|
||||
// 删除文件时,上传到数据集中的文件会同时删除数据库中的记录和文件系统中的文件,归集过来的文件仅删除数据库中的记录
|
||||
if (file.getFilePath().startsWith(dataset.getPath())) {
|
||||
try {
|
||||
Path filePath = Paths.get(file.getFilePath());
|
||||
Files.deleteIfExists(filePath);
|
||||
} catch (IOException ex) {
|
||||
throw BusinessException.of(SystemErrorCode.FILE_SYSTEM_ERROR);
|
||||
}
|
||||
}
|
||||
datasetFileRepository.removeById(fileId);
|
||||
|
||||
Dataset dataset = datasetRepository.getById(datasetId);
|
||||
// 简单刷新统计(精确处理可从DB统计)
|
||||
dataset.setFileCount(Math.max(0, dataset.getFileCount() - 1));
|
||||
dataset.setSizeBytes(Math.max(0, dataset.getSizeBytes() - (file.getFileSize() != null ? file.getFileSize() : 0)));
|
||||
dataset.removeFile(file);
|
||||
datasetRepository.updateById(dataset);
|
||||
}
|
||||
|
||||
@@ -197,6 +141,7 @@ public class DatasetFileApplicationService {
|
||||
@Transactional(readOnly = true)
|
||||
public void downloadDatasetFileAsZip(String datasetId, HttpServletResponse response) {
|
||||
List<DatasetFile> allByDatasetId = datasetFileRepository.findAllByDatasetId(datasetId);
|
||||
fileRename(allByDatasetId);
|
||||
response.setContentType("application/zip");
|
||||
String zipName = String.format("dataset_%s.zip",
|
||||
LocalDateTime.now().format(DateTimeFormatter.ofPattern("yyyyMMddHHmmss")));
|
||||
@@ -211,6 +156,27 @@ public class DatasetFileApplicationService {
|
||||
}
|
||||
}
|
||||
|
||||
private void fileRename(List<DatasetFile> files) {
|
||||
Set<String> uniqueFilenames = new HashSet<>();
|
||||
for (DatasetFile file : files) {
|
||||
String originalFilename = file.getFileName();
|
||||
if (!uniqueFilenames.add(originalFilename)) {
|
||||
String newFilename;
|
||||
int counter = 1;
|
||||
do {
|
||||
newFilename = generateNewFilename(originalFilename, counter);
|
||||
counter++;
|
||||
} while (!uniqueFilenames.add(newFilename));
|
||||
file.setFileName(newFilename);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private String generateNewFilename(String oldFilename, int counter) {
|
||||
int dotIndex = oldFilename.lastIndexOf(".");
|
||||
return oldFilename.substring(0, dotIndex) + "-(" + counter + ")" + oldFilename.substring(dotIndex);
|
||||
}
|
||||
|
||||
private void addToZipFile(DatasetFile file, ZipOutputStream zos) throws IOException {
|
||||
if (file.getFilePath() == null || !Files.exists(Paths.get(file.getFilePath()))) {
|
||||
log.warn("The file hasn't been found on filesystem, id: {}", file.getId());
|
||||
@@ -229,17 +195,6 @@ public class DatasetFileApplicationService {
|
||||
}
|
||||
}
|
||||
|
||||
private String getFileExtension(String fileName) {
|
||||
if (fileName == null || fileName.isEmpty()) {
|
||||
return null;
|
||||
}
|
||||
int lastDotIndex = fileName.lastIndexOf(".");
|
||||
if (lastDotIndex == -1) {
|
||||
return null;
|
||||
}
|
||||
return fileName.substring(lastDotIndex + 1);
|
||||
}
|
||||
|
||||
/**
|
||||
* 预上传
|
||||
*
|
||||
@@ -275,9 +230,6 @@ public class DatasetFileApplicationService {
|
||||
public void chunkUpload(String datasetId, UploadFileRequest uploadFileRequest) {
|
||||
FileUploadResult uploadResult = fileService.chunkUpload(DatasetConverter.INSTANCE.toChunkUploadRequest(uploadFileRequest));
|
||||
saveFileInfoToDb(uploadResult, uploadFileRequest, datasetId);
|
||||
if (uploadResult.isAllFilesUploaded()) {
|
||||
// 解析文件,后续依据需求看是否添加校验文件元数据和解析半结构化文件的逻辑,
|
||||
}
|
||||
}
|
||||
|
||||
private void saveFileInfoToDb(FileUploadResult fileUploadResult, UploadFileRequest uploadFile, String datasetId) {
|
||||
@@ -301,6 +253,7 @@ public class DatasetFileApplicationService {
|
||||
|
||||
datasetFileRepository.save(datasetFile);
|
||||
dataset.addFile(datasetFile);
|
||||
dataset.active();
|
||||
datasetRepository.updateById(dataset);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -143,4 +143,10 @@ public class Dataset extends BaseEntity<String> {
|
||||
this.updatedAt = LocalDateTime.now();
|
||||
}
|
||||
}
|
||||
|
||||
public void active() {
|
||||
if (this.status == DatasetStatusType.DRAFT) {
|
||||
this.status = DatasetStatusType.ACTIVE;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -7,7 +7,6 @@ import com.datamate.datamanagement.interfaces.dto.UploadFileRequest;
|
||||
import com.datamate.common.domain.model.ChunkUploadRequest;
|
||||
import com.datamate.datamanagement.domain.model.dataset.Dataset;
|
||||
import com.datamate.datamanagement.domain.model.dataset.DatasetFile;
|
||||
import com.datamate.datamanagement.interfaces.dto.*;
|
||||
import org.mapstruct.Mapper;
|
||||
import org.mapstruct.Mapping;
|
||||
import org.mapstruct.factory.Mappers;
|
||||
|
||||
@@ -10,11 +10,11 @@ import lombok.Setter;
|
||||
@Setter
|
||||
public class AllDatasetStatisticsResponse {
|
||||
/** 总数据集数 */
|
||||
private Integer totalDatasets;
|
||||
private Integer totalDatasets = 0;
|
||||
|
||||
/** 总文件数 */
|
||||
private Long totalSize;
|
||||
private Long totalSize = 0L;
|
||||
|
||||
/** 总大小(字节) */
|
||||
private Long totalFiles;
|
||||
private Long totalFiles = 0L;
|
||||
}
|
||||
|
||||
@@ -3,6 +3,7 @@ package com.datamate.datamanagement.interfaces.dto;
|
||||
import com.datamate.datamanagement.common.enums.DatasetType;
|
||||
import jakarta.validation.constraints.NotBlank;
|
||||
import jakarta.validation.constraints.NotNull;
|
||||
import jakarta.validation.constraints.Size;
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Getter;
|
||||
import lombok.NoArgsConstructor;
|
||||
@@ -19,9 +20,11 @@ import java.util.List;
|
||||
@AllArgsConstructor
|
||||
public class CreateDatasetRequest {
|
||||
/** 数据集名称 */
|
||||
@Size(min = 1, max = 100)
|
||||
@NotBlank(message = "数据集名称不能为空")
|
||||
private String name;
|
||||
/** 数据集描述 */
|
||||
@Size(max = 500)
|
||||
private String description;
|
||||
/** 数据集类型 */
|
||||
@NotNull(message = "数据集类型不能为空")
|
||||
@@ -30,6 +33,4 @@ public class CreateDatasetRequest {
|
||||
private List<String> tags;
|
||||
/** 数据源 */
|
||||
private String dataSource;
|
||||
/** 目标位置 */
|
||||
private String targetLocation;
|
||||
}
|
||||
|
||||
@@ -24,6 +24,8 @@ public class DatasetResponse {
|
||||
private String status;
|
||||
/** 标签列表 */
|
||||
private List<TagResponse> tags;
|
||||
/** 数据集保留天数 */
|
||||
private Integer retentionDays;
|
||||
/** 数据源 */
|
||||
private String dataSource;
|
||||
/** 目标位置 */
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
package com.datamate.datamanagement.interfaces.dto;
|
||||
|
||||
import com.datamate.datamanagement.common.enums.DatasetStatusType;
|
||||
import jakarta.validation.constraints.NotBlank;
|
||||
import jakarta.validation.constraints.Size;
|
||||
import lombok.Getter;
|
||||
import lombok.Setter;
|
||||
|
||||
@@ -13,8 +15,11 @@ import java.util.List;
|
||||
@Setter
|
||||
public class UpdateDatasetRequest {
|
||||
/** 数据集名称 */
|
||||
@Size(min = 1, max = 100)
|
||||
@NotBlank(message = "数据集名称不能为空")
|
||||
private String name;
|
||||
/** 数据集描述 */
|
||||
@Size(max = 500)
|
||||
private String description;
|
||||
/** 归集任务id */
|
||||
private String dataSource;
|
||||
|
||||
@@ -68,22 +68,6 @@ public class DatasetFileController {
|
||||
return ResponseEntity.ok(Response.ok(response));
|
||||
}
|
||||
|
||||
@PostMapping(consumes = MediaType.MULTIPART_FORM_DATA_VALUE)
|
||||
public ResponseEntity<Response<DatasetFileResponse>> uploadDatasetFile(
|
||||
@PathVariable("datasetId") String datasetId,
|
||||
@RequestPart(value = "file", required = false) MultipartFile file) {
|
||||
try {
|
||||
DatasetFile datasetFile = datasetFileApplicationService.uploadFile(datasetId, file);
|
||||
|
||||
return ResponseEntity.status(HttpStatus.CREATED).body(Response.ok(DatasetConverter.INSTANCE.convertToResponse(datasetFile)));
|
||||
} catch (IllegalArgumentException e) {
|
||||
return ResponseEntity.badRequest().body(Response.error(SystemErrorCode.UNKNOWN_ERROR, null));
|
||||
} catch (Exception e) {
|
||||
log.error("upload fail", e);
|
||||
return ResponseEntity.status(HttpStatus.INTERNAL_SERVER_ERROR).body(Response.error(SystemErrorCode.UNKNOWN_ERROR, null));
|
||||
}
|
||||
}
|
||||
|
||||
@GetMapping("/{fileId}")
|
||||
public ResponseEntity<Response<DatasetFileResponse>> getDatasetFileById(
|
||||
@PathVariable("datasetId") String datasetId,
|
||||
@@ -109,10 +93,9 @@ public class DatasetFileController {
|
||||
}
|
||||
|
||||
@IgnoreResponseWrap
|
||||
@GetMapping(value = "/{fileId}/download", produces = MediaType.APPLICATION_OCTET_STREAM_VALUE)
|
||||
public ResponseEntity<Resource> downloadDatasetFileById(
|
||||
@PathVariable("datasetId") String datasetId,
|
||||
@PathVariable("fileId") String fileId) {
|
||||
@GetMapping(value = "/{fileId}/download", produces = MediaType.APPLICATION_OCTET_STREAM_VALUE + ";charset=UTF-8")
|
||||
public ResponseEntity<Resource> downloadDatasetFileById(@PathVariable("datasetId") String datasetId,
|
||||
@PathVariable("fileId") String fileId) {
|
||||
try {
|
||||
DatasetFile datasetFile = datasetFileApplicationService.getDatasetFile(datasetId, fileId);
|
||||
Resource resource = datasetFileApplicationService.downloadFile(datasetId, fileId);
|
||||
@@ -142,8 +125,8 @@ public class DatasetFileController {
|
||||
* @return 批量上传请求id
|
||||
*/
|
||||
@PostMapping("/upload/pre-upload")
|
||||
public ResponseEntity<Response<String>> preUpload(@PathVariable("datasetId") String datasetId, @RequestBody @Valid UploadFilesPreRequest request) {
|
||||
|
||||
public ResponseEntity<Response<String>> preUpload(@PathVariable("datasetId") String datasetId,
|
||||
@RequestBody @Valid UploadFilesPreRequest request) {
|
||||
return ResponseEntity.ok(Response.ok(datasetFileApplicationService.preUpload(request, datasetId)));
|
||||
}
|
||||
|
||||
@@ -153,7 +136,7 @@ public class DatasetFileController {
|
||||
* @param uploadFileRequest 上传文件请求
|
||||
*/
|
||||
@PostMapping("/upload/chunk")
|
||||
public ResponseEntity<Void> chunkUpload(@PathVariable("datasetId") String datasetId, UploadFileRequest uploadFileRequest) {
|
||||
public ResponseEntity<Void> chunkUpload(@PathVariable("datasetId") String datasetId, @Valid UploadFileRequest uploadFileRequest) {
|
||||
log.info("file upload reqId:{}, fileNo:{}, total chunk num:{}, current chunkNo:{}",
|
||||
uploadFileRequest.getReqId(), uploadFileRequest.getFileNo(), uploadFileRequest.getTotalChunkNum(),
|
||||
uploadFileRequest.getChunkNo());
|
||||
|
||||
@@ -0,0 +1,65 @@
|
||||
package com.datamate.datamanagement.interfaces.scheduler;
|
||||
|
||||
import com.datamate.common.interfaces.PagedResponse;
|
||||
import com.datamate.datamanagement.application.DatasetApplicationService;
|
||||
import com.datamate.datamanagement.interfaces.dto.DatasetPagingQuery;
|
||||
import com.datamate.datamanagement.interfaces.dto.DatasetResponse;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.apache.commons.collections4.CollectionUtils;
|
||||
import org.springframework.scheduling.annotation.Scheduled;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
import java.time.LocalDateTime;
|
||||
|
||||
/**
|
||||
* 数据集定时任务触发
|
||||
*
|
||||
* @since 2025/10/24
|
||||
*/
|
||||
@Slf4j
|
||||
@Component
|
||||
@RequiredArgsConstructor
|
||||
public class DatasetScheduler {
|
||||
private final DatasetApplicationService datasetApplicationService;
|
||||
|
||||
/**
|
||||
* 每天凌晨 00:00 扫描并删除超出保留期的数据集
|
||||
*/
|
||||
@Scheduled(cron = "0 0 0 * * ?")
|
||||
public void cleanupExpiredDatasets() {
|
||||
int pageNo = 1;
|
||||
int pageSize = 500;
|
||||
|
||||
while (true) {
|
||||
DatasetPagingQuery datasetPagingQuery = new DatasetPagingQuery();
|
||||
datasetPagingQuery.setPage(pageNo);
|
||||
datasetPagingQuery.setSize(pageSize);
|
||||
PagedResponse<DatasetResponse> datasets = datasetApplicationService.getDatasets(datasetPagingQuery);
|
||||
if (CollectionUtils.isEmpty(datasets.getContent())) {
|
||||
break;
|
||||
}
|
||||
|
||||
datasets.getContent().forEach(dataset -> {
|
||||
Integer retentionDays = dataset.getRetentionDays();
|
||||
LocalDateTime createdAt = dataset.getCreatedAt();
|
||||
if (retentionDays != null && retentionDays > 0 && createdAt != null) {
|
||||
LocalDateTime expireAt = createdAt.plusDays(retentionDays);
|
||||
if (expireAt.isBefore(LocalDateTime.now())) {
|
||||
try {
|
||||
log.info("Deleting dataset {}, expired at {} (retentionDays={})", dataset.getId(), expireAt, retentionDays);
|
||||
datasetApplicationService.deleteDataset(dataset.getId());
|
||||
} catch (Exception e) {
|
||||
log.warn("Failed to delete expired dataset {}: {}", dataset.getId(), e.getMessage());
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
if (datasets.getPage() >= datasets.getTotalPages()) {
|
||||
break;
|
||||
}
|
||||
pageNo++;
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user