Merge remote-tracking branch 'gitea/lsf' into lsf

This commit is contained in:
2026-02-06 18:29:58 +08:00
5 changed files with 636 additions and 529 deletions

View File

@@ -36,6 +36,7 @@ import lombok.extern.slf4j.Slf4j;
import org.apache.commons.compress.archivers.zip.ZipArchiveEntry; import org.apache.commons.compress.archivers.zip.ZipArchiveEntry;
import org.apache.commons.compress.archivers.zip.ZipArchiveOutputStream; import org.apache.commons.compress.archivers.zip.ZipArchiveOutputStream;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value; import org.springframework.beans.factory.annotation.Value;
import org.springframework.core.io.Resource; import org.springframework.core.io.Resource;
@@ -581,25 +582,100 @@ public class DatasetFileApplicationService {
@Transactional @Transactional
public void deleteDatasetFile(String datasetId, String fileId) { public void deleteDatasetFile(String datasetId, String fileId) {
DatasetFile file = getDatasetFile(datasetId, fileId); DatasetFile file = getDatasetFile(datasetId, fileId);
Dataset dataset = datasetRepository.getById(datasetId); if (file == null) {
datasetFileRepository.removeById(fileId); log.warn("File not found: datasetId={}, fileId={}", datasetId, fileId);
if (!isArchivedStatus(file)) { return;
dataset.setFiles(new ArrayList<>(Collections.singleton(file)));
dataset.removeFile(file);
datasetRepository.updateById(dataset);
} }
datasetFilePreviewService.deletePreviewFileQuietly(datasetId, fileId); String logicalPath = file.getLogicalPath();
// 删除文件时,上传到数据集中的文件会同时删除数据库中的记录和文件系统中的文件,归集过来的文件仅删除数据库中的记录
if (file.getFilePath().startsWith(dataset.getPath())) { // 如果 logicalPath 为 null、空字符串或纯空白字符,直接删除当前文件(兼容旧数据)
if (StringUtils.isBlank(logicalPath)) {
deleteDatasetFileInternal(datasetId, file);
return;
}
List<DatasetFile> allVersions = datasetFileRepository.findAllByDatasetIdAndLogicalPath(datasetId, logicalPath);
for (DatasetFile versionFile : allVersions) {
deleteDatasetFileInternal(datasetId, versionFile);
}
}
private void deleteDatasetFileInternal(String datasetId, DatasetFile file) {
Dataset dataset = datasetRepository.getById(datasetId);
if (file == null || dataset == null) {
return;
}
// 先删除数据库记录,确保数据库操作成功后再清理派生文件
try {
datasetFileRepository.removeById(file.getId());
} catch (Exception e) {
log.error("Failed to delete file record from database: fileId={}", file.getId(), e);
// 数据库删除失败时,跳过后续清理以避免数据不一致
return;
}
// 数据库删除成功后,再删除派生文件
if (isSourceDocument(file)) {
deleteDerivedTextFileQuietly(datasetId, file.getId());
}
if (!isArchivedStatus(file)) {
try {
dataset.setFiles(new ArrayList<>(Collections.singleton(file)));
dataset.removeFile(file);
datasetRepository.updateById(dataset);
} catch (Exception e) {
log.error("Failed to update dataset: datasetId={}", datasetId, e);
}
}
datasetFilePreviewService.deletePreviewFileQuietly(datasetId, file.getId());
if (file.getFilePath() != null && file.getFilePath().startsWith(dataset.getPath())) {
try { try {
Path filePath = Paths.get(file.getFilePath()); Path filePath = Paths.get(file.getFilePath());
Files.deleteIfExists(filePath); Files.deleteIfExists(filePath);
} catch (IOException ex) { } catch (IOException ex) {
throw BusinessException.of(SystemErrorCode.FILE_SYSTEM_ERROR); log.error("Failed to delete physical file: filePath={}", file.getFilePath(), ex);
} }
} }
} }
private void deleteDerivedTextFileQuietly(String datasetId, String sourceFileId) {
if (sourceFileId == null || sourceFileId.isBlank()) {
return;
}
try {
List<DatasetFile> derivedFiles = datasetFileRepository.findAllByDatasetId(datasetId).stream()
.filter(f -> isDerivedFileFromSource(f, sourceFileId))
.toList();
for (DatasetFile derivedFile : derivedFiles) {
deleteDatasetFileInternal(datasetId, derivedFile);
}
} catch (Exception e) {
log.error("Failed to delete derived text files for sourceFileId: {}", sourceFileId, e);
}
}
private boolean isDerivedFileFromSource(DatasetFile file, String sourceFileId) {
if (file == null || file.getMetadata() == null || file.getMetadata().isBlank()) {
return false;
}
try {
ObjectMapper mapper = new ObjectMapper();
Map<String, Object> metadataMap = mapper.readValue(file.getMetadata(), new TypeReference<Map<String, Object>>() {});
Object derivedFromFileId = metadataMap.get(DERIVED_METADATA_KEY);
return derivedFromFileId != null && sourceFileId.equals(String.valueOf(derivedFromFileId));
} catch (Exception e) {
log.debug("Failed to parse metadata for derived detection: fileId={}", file.getId(), e);
return false;
}
}
/** /**
* 下载文件 * 下载文件
*/ */

View File

@@ -46,4 +46,13 @@ public interface DatasetFileMapper extends BaseMapper<DatasetFile> {
* @return 文件数统计列表 * @return 文件数统计列表
*/ */
List<DatasetFileCount> countNonDerivedByDatasetIds(@Param("datasetIds") List<String> datasetIds); List<DatasetFileCount> countNonDerivedByDatasetIds(@Param("datasetIds") List<String> datasetIds);
/**
* 查询指定逻辑路径的所有文件(包括所有状态)
*
* @param datasetId 数据集ID
* @param logicalPath 逻辑路径
* @return 文件列表
*/
List<DatasetFile> findAllByDatasetIdAndLogicalPath(@Param("datasetId") String datasetId, @Param("logicalPath") String logicalPath);
} }

View File

@@ -37,6 +37,15 @@ public interface DatasetFileRepository extends IRepository<DatasetFile> {
*/ */
DatasetFile findLatestByDatasetIdAndLogicalPath(String datasetId, String logicalPath); DatasetFile findLatestByDatasetIdAndLogicalPath(String datasetId, String logicalPath);
/**
* 查询指定逻辑路径的所有文件(包括所有状态)
*
* @param datasetId 数据集ID
* @param logicalPath 逻辑路径
* @return 文件列表
*/
List<DatasetFile> findAllByDatasetIdAndLogicalPath(String datasetId, String logicalPath);
IPage<DatasetFile> findByCriteria(String datasetId, String fileType, String status, String name, IPage<DatasetFile> findByCriteria(String datasetId, String fileType, String status, String name,
Boolean hasAnnotation, IPage<DatasetFile> page); Boolean hasAnnotation, IPage<DatasetFile> page);

View File

@@ -84,6 +84,11 @@ public class DatasetFileRepositoryImpl extends CrudRepository<DatasetFileMapper,
.last("LIMIT 1")); .last("LIMIT 1"));
} }
@Override
public List<DatasetFile> findAllByDatasetIdAndLogicalPath(String datasetId, String logicalPath) {
return datasetFileMapper.findAllByDatasetIdAndLogicalPath(datasetId, logicalPath);
}
public IPage<DatasetFile> findByCriteria(String datasetId, String fileType, String status, String name, public IPage<DatasetFile> findByCriteria(String datasetId, String fileType, String status, String name,
Boolean hasAnnotation, IPage<DatasetFile> page) { Boolean hasAnnotation, IPage<DatasetFile> page) {
LambdaQueryWrapper<DatasetFile> wrapper = new LambdaQueryWrapper<DatasetFile>() LambdaQueryWrapper<DatasetFile> wrapper = new LambdaQueryWrapper<DatasetFile>()

View File

@@ -64,7 +64,7 @@
AND (status IS NULL OR status &lt;&gt; 'ARCHIVED') AND (status IS NULL OR status &lt;&gt; 'ARCHIVED')
</select> </select>
<select id="findByDatasetIdAndFileName" resultType="com.datamate.datamanagement.domain.model.dataset.DatasetFile"> <select id="findByDatasetIdAndFileName" resultType="com.datamate.datamanagement.domain.model.dataset.DatasetFile">
SELECT <include refid="Base_Column_List"/> SELECT <include refid="Base_Column_List"/>
FROM t_dm_dataset_files FROM t_dm_dataset_files
WHERE dataset_id = #{datasetId} WHERE dataset_id = #{datasetId}
@@ -74,8 +74,16 @@
LIMIT 1 LIMIT 1
</select> </select>
<select id="findAllByDatasetId" parameterType="string" <select id="findAllByDatasetIdAndLogicalPath" resultType="com.datamate.datamanagement.domain.model.dataset.DatasetFile">
resultType="com.datamate.datamanagement.domain.model.dataset.DatasetFile"> SELECT <include refid="Base_Column_List"/>
FROM t_dm_dataset_files
WHERE dataset_id = #{datasetId}
AND logical_path = #{logicalPath}
ORDER BY version DESC, upload_time DESC
</select>
<select id="findAllByDatasetId" parameterType="string"
resultType="com.datamate.datamanagement.domain.model.dataset.DatasetFile">
SELECT <include refid="Base_Column_List"/> SELECT <include refid="Base_Column_List"/>
FROM t_dm_dataset_files FROM t_dm_dataset_files
WHERE dataset_id = #{datasetId} WHERE dataset_id = #{datasetId}