feat(dataset): 实现数据集文件可见性过滤功能

- 添加派生文件识别逻辑,通过元数据中的derived_from_file_id字段判断
- 实现applyVisibleFileCounts方法为数据集批量设置可见文件数量
- 修改数据集统计接口使用过滤后的可见文件进行统计计算
- 添加normalizeFilePath工具方法统一路径格式处理
- 更新文件查询逻辑支持派生文件过滤功能
- 新增DatasetFileCount DTO用于文件计数统计返回
This commit is contained in:
2026-02-01 22:55:07 +08:00
parent 9d185bb10c
commit a0239518fb
8 changed files with 226 additions and 64 deletions

View File

@@ -42,6 +42,13 @@
SELECT COUNT(*) FROM t_dm_dataset_files WHERE dataset_id = #{datasetId}
</select>
<select id="countNonDerivedByDatasetId" parameterType="string" resultType="long">
SELECT COUNT(*)
FROM t_dm_dataset_files
WHERE dataset_id = #{datasetId}
AND (metadata IS NULL OR JSON_EXTRACT(metadata, '$.derived_from_file_id') IS NULL)
</select>
<select id="countCompletedByDatasetId" parameterType="string" resultType="long">
SELECT COUNT(*) FROM t_dm_dataset_files WHERE dataset_id = #{datasetId} AND status = 'COMPLETED'
</select>
@@ -110,4 +117,16 @@
AND metadata IS NOT NULL
AND JSON_EXTRACT(metadata, '$.derived_from_file_id') IS NOT NULL
</select>
<select id="countNonDerivedByDatasetIds" resultType="com.datamate.datamanagement.infrastructure.persistence.repository.dto.DatasetFileCount">
SELECT dataset_id AS datasetId,
COUNT(*) AS fileCount
FROM t_dm_dataset_files
WHERE dataset_id IN
<foreach collection="datasetIds" item="datasetId" open="(" separator="," close=")">
#{datasetId}
</foreach>
AND (metadata IS NULL OR JSON_EXTRACT(metadata, '$.derived_from_file_id') IS NULL)
GROUP BY dataset_id
</select>
</mapper>