feat(data-management): 实现数据集文件版本管理和内部路径保护

- 将数据集文件查询方法替换为只查询可见文件的版本
- 引入文件状态管理(ACTIVE/ARCHIVED)和内部目录结构
- 实现文件重复处理策略,支持版本控制模式而非覆盖
- 添加内部数据目录保护,防止访问.datamate等系统目录
- 重构文件上传流程,引入暂存目录和事务后清理机制
- 实现文件版本归档功能,保留历史版本到专用存储位置
- 优化文件路径规范化和安全验证逻辑
- 修复文件删除逻辑,确保归档文件不会被错误移除
- 更新数据集压缩下载功能以排除内部系统文件
This commit is contained in:
2026-02-04 23:53:35 +08:00
parent 473f4e717f
commit d0972cbc9d
16 changed files with 1141 additions and 484 deletions

View File

@@ -3,7 +3,7 @@
"http://mybatis.org/dtd/mybatis-3-mapper.dtd">
<mapper namespace="com.datamate.datamanagement.infrastructure.persistence.mapper.DatasetFileMapper">
<sql id="Base_Column_List">
id, dataset_id, file_name, file_path, file_type, file_size, check_sum, tags, metadata, status,
id, dataset_id, file_name, file_path, logical_path, version, file_type, file_size, check_sum, tags, metadata, status,
upload_time, last_access_time, created_at, updated_at
</sql>
@@ -39,13 +39,17 @@
</select>
<select id="countByDatasetId" parameterType="string" resultType="long">
SELECT COUNT(*) FROM t_dm_dataset_files WHERE dataset_id = #{datasetId}
SELECT COUNT(*)
FROM t_dm_dataset_files
WHERE dataset_id = #{datasetId}
AND (status IS NULL OR status <> 'ARCHIVED')
</select>
<select id="countNonDerivedByDatasetId" parameterType="string" resultType="long">
SELECT COUNT(*)
FROM t_dm_dataset_files
WHERE dataset_id = #{datasetId}
AND (status IS NULL OR status <> 'ARCHIVED')
AND (metadata IS NULL OR JSON_EXTRACT(metadata, '$.derived_from_file_id') IS NULL)
</select>
@@ -54,13 +58,19 @@
</select>
<select id="sumSizeByDatasetId" parameterType="string" resultType="long">
SELECT COALESCE(SUM(file_size), 0) FROM t_dm_dataset_files WHERE dataset_id = #{datasetId}
SELECT COALESCE(SUM(file_size), 0)
FROM t_dm_dataset_files
WHERE dataset_id = #{datasetId}
AND (status IS NULL OR status <> 'ARCHIVED')
</select>
<select id="findByDatasetIdAndFileName" resultType="com.datamate.datamanagement.domain.model.dataset.DatasetFile">
SELECT <include refid="Base_Column_List"/>
FROM t_dm_dataset_files
WHERE dataset_id = #{datasetId} AND file_name = #{fileName}
WHERE dataset_id = #{datasetId}
AND file_name = #{fileName}
AND (status IS NULL OR status <> 'ARCHIVED')
ORDER BY version DESC, upload_time DESC
LIMIT 1
</select>
@@ -91,6 +101,8 @@
UPDATE t_dm_dataset_files
SET file_name = #{fileName},
file_path = #{filePath},
logical_path = #{logicalPath},
version = #{version},
file_type = #{fileType},
file_size = #{fileSize},
upload_time = #{uploadTime},
@@ -126,6 +138,7 @@
<foreach collection="datasetIds" item="datasetId" open="(" separator="," close=")">
#{datasetId}
</foreach>
AND (status IS NULL OR status <> 'ARCHIVED')
AND (metadata IS NULL OR JSON_EXTRACT(metadata, '$.derived_from_file_id') IS NULL)
GROUP BY dataset_id
</select>