Files
DataMate/backend/services/data-management-service/src/main/resources/mappers/DatasetFileMapper.xml
Jerry Yan a0239518fb feat(dataset): 实现数据集文件可见性过滤功能
- 添加派生文件识别逻辑,通过元数据中的derived_from_file_id字段判断
- 实现applyVisibleFileCounts方法为数据集批量设置可见文件数量
- 修改数据集统计接口使用过滤后的可见文件进行统计计算
- 添加normalizeFilePath工具方法统一路径格式处理
- 更新文件查询逻辑支持派生文件过滤功能
- 新增DatasetFileCount DTO用于文件计数统计返回
2026-02-01 22:55:07 +08:00

133 lines
5.4 KiB
XML

<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE mapper PUBLIC "-//mybatis.org//DTD Mapper 3.0//EN"
"http://mybatis.org/dtd/mybatis-3-mapper.dtd">
<mapper namespace="com.datamate.datamanagement.infrastructure.persistence.mapper.DatasetFileMapper">
<sql id="Base_Column_List">
id, dataset_id, file_name, file_path, file_type, file_size, check_sum, tags, metadata, status,
upload_time, last_access_time, created_at, updated_at
</sql>
<select id="findById" parameterType="string"
resultType="com.datamate.datamanagement.domain.model.dataset.DatasetFile">
SELECT <include refid="Base_Column_List"/>
FROM t_dm_dataset_files
WHERE id = #{id}
</select>
<select id="findByDatasetId" parameterType="string"
resultType="com.datamate.datamanagement.domain.model.dataset.DatasetFile">
SELECT <include refid="Base_Column_List"/>
FROM t_dm_dataset_files
WHERE dataset_id = #{datasetId}
ORDER BY upload_time DESC
</select>
<select id="findByDatasetIdAndStatus" resultType="com.datamate.datamanagement.domain.model.dataset.DatasetFile">
SELECT <include refid="Base_Column_List"/>
FROM t_dm_dataset_files
WHERE dataset_id = #{datasetId}
AND status = #{status}
ORDER BY upload_time DESC
</select>
<select id="findByDatasetIdAndFileType" resultType="com.datamate.datamanagement.domain.model.dataset.DatasetFile">
SELECT <include refid="Base_Column_List"/>
FROM t_dm_dataset_files
WHERE dataset_id = #{datasetId}
AND file_type = #{fileType}
ORDER BY upload_time DESC
</select>
<select id="countByDatasetId" parameterType="string" resultType="long">
SELECT COUNT(*) FROM t_dm_dataset_files WHERE dataset_id = #{datasetId}
</select>
<select id="countNonDerivedByDatasetId" parameterType="string" resultType="long">
SELECT COUNT(*)
FROM t_dm_dataset_files
WHERE dataset_id = #{datasetId}
AND (metadata IS NULL OR JSON_EXTRACT(metadata, '$.derived_from_file_id') IS NULL)
</select>
<select id="countCompletedByDatasetId" parameterType="string" resultType="long">
SELECT COUNT(*) FROM t_dm_dataset_files WHERE dataset_id = #{datasetId} AND status = 'COMPLETED'
</select>
<select id="sumSizeByDatasetId" parameterType="string" resultType="long">
SELECT COALESCE(SUM(file_size), 0) FROM t_dm_dataset_files WHERE dataset_id = #{datasetId}
</select>
<select id="findByDatasetIdAndFileName" resultType="com.datamate.datamanagement.domain.model.dataset.DatasetFile">
SELECT <include refid="Base_Column_List"/>
FROM t_dm_dataset_files
WHERE dataset_id = #{datasetId} AND file_name = #{fileName}
LIMIT 1
</select>
<select id="findAllByDatasetId" parameterType="string"
resultType="com.datamate.datamanagement.domain.model.dataset.DatasetFile">
SELECT <include refid="Base_Column_List"/>
FROM t_dm_dataset_files
WHERE dataset_id = #{datasetId}
ORDER BY upload_time DESC
</select>
<select id="findByCriteria" resultType="com.datamate.datamanagement.domain.model.dataset.DatasetFile">
SELECT <include refid="Base_Column_List"/>
FROM t_dm_dataset_files
WHERE dataset_id = #{datasetId}
<!-- Replace invalid XML '&&' with 'and' for MyBatis OGNL -->
<if test="fileType != null and fileType != ''">
AND file_type = #{fileType}
</if>
<if test="status != null and status != ''">
AND status = #{status}
</if>
ORDER BY upload_time DESC
</select>
<update id="update" parameterType="com.datamate.datamanagement.domain.model.dataset.DatasetFile">
UPDATE t_dm_dataset_files
SET file_name = #{fileName},
file_path = #{filePath},
file_type = #{fileType},
file_size = #{fileSize},
upload_time = #{uploadTime},
last_access_time = #{lastAccessTime},
status = #{status}
WHERE id = #{id}
</update>
<delete id="deleteById" parameterType="string">
DELETE FROM t_dm_dataset_files WHERE id = #{id}
</delete>
<update id="updateFilePathPrefix">
UPDATE t_dm_dataset_files
SET file_path = CONCAT(#{newPrefix}, SUBSTRING(file_path, LENGTH(#{oldPrefix}) + 1))
WHERE dataset_id = #{datasetId}
AND file_path LIKE CONCAT(#{oldPrefix}, '%')
</update>
<select id="findSourceFileIdsWithDerivedFiles" resultType="java.lang.String">
SELECT DISTINCT JSON_UNQUOTE(JSON_EXTRACT(metadata, '$.derived_from_file_id')) AS source_file_id
FROM t_dm_dataset_files
WHERE dataset_id = #{datasetId}
AND metadata IS NOT NULL
AND JSON_EXTRACT(metadata, '$.derived_from_file_id') IS NOT NULL
</select>
<select id="countNonDerivedByDatasetIds" resultType="com.datamate.datamanagement.infrastructure.persistence.repository.dto.DatasetFileCount">
SELECT dataset_id AS datasetId,
COUNT(*) AS fileCount
FROM t_dm_dataset_files
WHERE dataset_id IN
<foreach collection="datasetIds" item="datasetId" open="(" separator="," close=")">
#{datasetId}
</foreach>
AND (metadata IS NULL OR JSON_EXTRACT(metadata, '$.derived_from_file_id') IS NULL)
GROUP BY dataset_id
</select>
</mapper>