feat(dataset): 实现数据集文件可见性过滤功能

- 添加派生文件识别逻辑,通过元数据中的derived_from_file_id字段判断
- 实现applyVisibleFileCounts方法为数据集批量设置可见文件数量
- 修改数据集统计接口使用过滤后的可见文件进行统计计算
- 添加normalizeFilePath工具方法统一路径格式处理
- 更新文件查询逻辑支持派生文件过滤功能
- 新增DatasetFileCount DTO用于文件计数统计返回
This commit is contained in:
2026-02-01 22:55:07 +08:00
parent 9d185bb10c
commit a0239518fb
8 changed files with 226 additions and 64 deletions

View File

@@ -19,8 +19,11 @@ import com.datamate.datamanagement.infrastructure.exception.DataManagementErrorC
import com.datamate.datamanagement.infrastructure.persistence.mapper.TagMapper; import com.datamate.datamanagement.infrastructure.persistence.mapper.TagMapper;
import com.datamate.datamanagement.infrastructure.persistence.repository.DatasetFileRepository; import com.datamate.datamanagement.infrastructure.persistence.repository.DatasetFileRepository;
import com.datamate.datamanagement.infrastructure.persistence.repository.DatasetRepository; import com.datamate.datamanagement.infrastructure.persistence.repository.DatasetRepository;
import com.datamate.datamanagement.infrastructure.persistence.repository.dto.DatasetFileCount;
import com.datamate.datamanagement.interfaces.converter.DatasetConverter; import com.datamate.datamanagement.interfaces.converter.DatasetConverter;
import com.datamate.datamanagement.interfaces.dto.*; import com.datamate.datamanagement.interfaces.dto.*;
import com.fasterxml.jackson.core.type.TypeReference;
import com.fasterxml.jackson.databind.ObjectMapper;
import lombok.RequiredArgsConstructor; import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j; import lombok.extern.slf4j.Slf4j;
import org.apache.commons.collections4.CollectionUtils; import org.apache.commons.collections4.CollectionUtils;
@@ -53,6 +56,7 @@ public class DatasetApplicationService {
private static final int SIMILAR_DATASET_MAX_LIMIT = 50; private static final int SIMILAR_DATASET_MAX_LIMIT = 50;
private static final int SIMILAR_DATASET_CANDIDATE_FACTOR = 5; private static final int SIMILAR_DATASET_CANDIDATE_FACTOR = 5;
private static final int SIMILAR_DATASET_CANDIDATE_MAX = 100; private static final int SIMILAR_DATASET_CANDIDATE_MAX = 100;
private static final String DERIVED_METADATA_KEY = "derived_from_file_id";
private final DatasetRepository datasetRepository; private final DatasetRepository datasetRepository;
private final TagMapper tagMapper; private final TagMapper tagMapper;
private final DatasetFileRepository datasetFileRepository; private final DatasetFileRepository datasetFileRepository;
@@ -142,6 +146,7 @@ public class DatasetApplicationService {
BusinessAssert.notNull(dataset, DataManagementErrorCode.DATASET_NOT_FOUND); BusinessAssert.notNull(dataset, DataManagementErrorCode.DATASET_NOT_FOUND);
List<DatasetFile> datasetFiles = datasetFileRepository.findAllByDatasetId(datasetId); List<DatasetFile> datasetFiles = datasetFileRepository.findAllByDatasetId(datasetId);
dataset.setFiles(datasetFiles); dataset.setFiles(datasetFiles);
applyVisibleFileCounts(Collections.singletonList(dataset));
return dataset; return dataset;
} }
@@ -153,6 +158,7 @@ public class DatasetApplicationService {
IPage<Dataset> page = new Page<>(query.getPage(), query.getSize()); IPage<Dataset> page = new Page<>(query.getPage(), query.getSize());
page = datasetRepository.findByCriteria(page, query); page = datasetRepository.findByCriteria(page, query);
String datasetPvcName = getDatasetPvcName(); String datasetPvcName = getDatasetPvcName();
applyVisibleFileCounts(page.getRecords());
List<DatasetResponse> datasetResponses = DatasetConverter.INSTANCE.convertToResponse(page.getRecords()); List<DatasetResponse> datasetResponses = DatasetConverter.INSTANCE.convertToResponse(page.getRecords());
datasetResponses.forEach(dataset -> dataset.setPvcName(datasetPvcName)); datasetResponses.forEach(dataset -> dataset.setPvcName(datasetPvcName));
return PagedResponse.of(datasetResponses, page.getCurrent(), page.getTotal(), page.getPages()); return PagedResponse.of(datasetResponses, page.getCurrent(), page.getTotal(), page.getPages());
@@ -200,6 +206,7 @@ public class DatasetApplicationService {
}) })
.limit(safeLimit) .limit(safeLimit)
.toList(); .toList();
applyVisibleFileCounts(sorted);
List<DatasetResponse> responses = DatasetConverter.INSTANCE.convertToResponse(sorted); List<DatasetResponse> responses = DatasetConverter.INSTANCE.convertToResponse(sorted);
responses.forEach(item -> item.setPvcName(datasetPvcName)); responses.forEach(item -> item.setPvcName(datasetPvcName));
return responses; return responses;
@@ -345,6 +352,61 @@ public class DatasetApplicationService {
dataset.setPath(newPath); dataset.setPath(newPath);
} }
private void applyVisibleFileCounts(List<Dataset> datasets) {
if (CollectionUtils.isEmpty(datasets)) {
return;
}
List<String> datasetIds = datasets.stream()
.filter(Objects::nonNull)
.map(Dataset::getId)
.filter(StringUtils::hasText)
.toList();
if (datasetIds.isEmpty()) {
return;
}
Map<String, Long> countMap = datasetFileRepository.countNonDerivedByDatasetIds(datasetIds).stream()
.filter(Objects::nonNull)
.collect(Collectors.toMap(
DatasetFileCount::getDatasetId,
count -> Optional.ofNullable(count.getFileCount()).orElse(0L),
(left, right) -> left
));
for (Dataset dataset : datasets) {
if (dataset == null || !StringUtils.hasText(dataset.getId())) {
continue;
}
Long visibleCount = countMap.get(dataset.getId());
dataset.setFileCount(visibleCount != null ? visibleCount : 0L);
}
}
private List<DatasetFile> filterVisibleFiles(List<DatasetFile> files) {
if (CollectionUtils.isEmpty(files)) {
return Collections.emptyList();
}
return files.stream()
.filter(file -> !isDerivedFile(file))
.collect(Collectors.toList());
}
private boolean isDerivedFile(DatasetFile datasetFile) {
if (datasetFile == null) {
return false;
}
String metadata = datasetFile.getMetadata();
if (!StringUtils.hasText(metadata)) {
return false;
}
try {
ObjectMapper mapper = new ObjectMapper();
Map<String, Object> metadataMap = mapper.readValue(metadata, new TypeReference<Map<String, Object>>() {});
return metadataMap.get(DERIVED_METADATA_KEY) != null;
} catch (Exception e) {
log.debug("Failed to parse dataset file metadata for derived detection: {}", datasetFile.getId(), e);
return false;
}
}
/** /**
* 获取数据集统计信息 * 获取数据集统计信息
*/ */
@@ -357,27 +419,29 @@ public class DatasetApplicationService {
Map<String, Object> statistics = new HashMap<>(); Map<String, Object> statistics = new HashMap<>();
// 基础统计 List<DatasetFile> allFiles = datasetFileRepository.findAllByDatasetId(datasetId);
Long totalFiles = datasetFileRepository.countByDatasetId(datasetId); List<DatasetFile> visibleFiles = filterVisibleFiles(allFiles);
Long completedFiles = datasetFileRepository.countCompletedByDatasetId(datasetId); long totalFiles = visibleFiles.size();
long completedFiles = visibleFiles.stream()
.filter(file -> "COMPLETED".equalsIgnoreCase(file.getStatus()))
.count();
Long totalSize = datasetFileRepository.sumSizeByDatasetId(datasetId); Long totalSize = datasetFileRepository.sumSizeByDatasetId(datasetId);
statistics.put("totalFiles", totalFiles != null ? totalFiles.intValue() : 0); statistics.put("totalFiles", (int) totalFiles);
statistics.put("completedFiles", completedFiles != null ? completedFiles.intValue() : 0); statistics.put("completedFiles", (int) completedFiles);
statistics.put("totalSize", totalSize != null ? totalSize : 0L); statistics.put("totalSize", totalSize != null ? totalSize : 0L);
// 完成率计算 // 完成率计算
float completionRate = 0.0f; float completionRate = 0.0f;
if (totalFiles != null && totalFiles > 0) { if (totalFiles > 0) {
completionRate = (completedFiles != null ? completedFiles.floatValue() : 0.0f) / totalFiles.floatValue() * 100.0f; completionRate = ((float) completedFiles) / (float) totalFiles * 100.0f;
} }
statistics.put("completionRate", completionRate); statistics.put("completionRate", completionRate);
// 文件类型分布统计 // 文件类型分布统计
Map<String, Integer> fileTypeDistribution = new HashMap<>(); Map<String, Integer> fileTypeDistribution = new HashMap<>();
List<DatasetFile> allFiles = datasetFileRepository.findAllByDatasetId(datasetId); if (!visibleFiles.isEmpty()) {
if (allFiles != null) { for (DatasetFile file : visibleFiles) {
for (DatasetFile file : allFiles) {
String fileType = file.getFileType() != null ? file.getFileType() : "unknown"; String fileType = file.getFileType() != null ? file.getFileType() : "unknown";
fileTypeDistribution.put(fileType, fileTypeDistribution.getOrDefault(fileType, 0) + 1); fileTypeDistribution.put(fileType, fileTypeDistribution.getOrDefault(fileType, 0) + 1);
} }
@@ -386,8 +450,8 @@ public class DatasetApplicationService {
// 状态分布统计 // 状态分布统计
Map<String, Integer> statusDistribution = new HashMap<>(); Map<String, Integer> statusDistribution = new HashMap<>();
if (allFiles != null) { if (!visibleFiles.isEmpty()) {
for (DatasetFile file : allFiles) { for (DatasetFile file : visibleFiles) {
String status = file.getStatus() != null ? file.getStatus() : "unknown"; String status = file.getStatus() != null ? file.getStatus() : "unknown";
statusDistribution.put(status, statusDistribution.getOrDefault(status, 0) + 1); statusDistribution.put(status, statusDistribution.getOrDefault(status, 0) + 1);
} }

View File

@@ -165,11 +165,18 @@ public class DatasetFileApplicationService {
String datasetPath = dataset.getPath(); String datasetPath = dataset.getPath();
Path queryPath = Path.of(dataset.getPath() + File.separator + prefix); Path queryPath = Path.of(dataset.getPath() + File.separator + prefix);
Map<String, DatasetFile> datasetFilesMap = datasetFileRepository.findAllByDatasetId(datasetId) Map<String, DatasetFile> datasetFilesMap = datasetFileRepository.findAllByDatasetId(datasetId)
.stream().collect(Collectors.toMap(DatasetFile::getFilePath, Function.identity())); .stream()
.filter(file -> file.getFilePath() != null)
.collect(Collectors.toMap(
file -> normalizeFilePath(file.getFilePath()),
Function.identity(),
(left, right) -> left
));
Set<String> derivedFilePaths = excludeDerivedFiles Set<String> derivedFilePaths = excludeDerivedFiles
? datasetFilesMap.values().stream() ? datasetFilesMap.values().stream()
.filter(this::isDerivedFile) .filter(this::isDerivedFile)
.map(DatasetFile::getFilePath) .map(DatasetFile::getFilePath)
.map(this::normalizeFilePath)
.filter(Objects::nonNull) .filter(Objects::nonNull)
.collect(Collectors.toSet()) .collect(Collectors.toSet())
: Collections.emptySet(); : Collections.emptySet();
@@ -180,7 +187,9 @@ public class DatasetFileApplicationService {
try (Stream<Path> pathStream = Files.list(queryPath)) { try (Stream<Path> pathStream = Files.list(queryPath)) {
List<Path> allFiles = pathStream List<Path> allFiles = pathStream
.filter(path -> path.toString().startsWith(datasetPath)) .filter(path -> path.toString().startsWith(datasetPath))
.filter(path -> !excludeDerivedFiles || Files.isDirectory(path) || !derivedFilePaths.contains(path.toString())) .filter(path -> !excludeDerivedFiles
|| Files.isDirectory(path)
|| !derivedFilePaths.contains(normalizeFilePath(path.toString())))
.sorted(Comparator .sorted(Comparator
.comparing((Path path) -> !Files.isDirectory(path)) .comparing((Path path) -> !Files.isDirectory(path))
.thenComparing(path -> path.getFileName().toString())) .thenComparing(path -> path.getFileName().toString()))
@@ -199,7 +208,9 @@ public class DatasetFileApplicationService {
if (fromIndex < total) { if (fromIndex < total) {
pageData = allFiles.subList(fromIndex, toIndex); pageData = allFiles.subList(fromIndex, toIndex);
} }
List<DatasetFile> datasetFiles = pageData.stream().map(path -> getDatasetFile(path, datasetFilesMap)).toList(); List<DatasetFile> datasetFiles = pageData.stream()
.map(path -> getDatasetFile(path, datasetFilesMap, excludeDerivedFiles, derivedFilePaths))
.toList();
return new PagedResponse<>(page, size, total, totalPages, datasetFiles); return new PagedResponse<>(page, size, total, totalPages, datasetFiles);
} catch (IOException e) { } catch (IOException e) {
@@ -208,7 +219,10 @@ public class DatasetFileApplicationService {
} }
} }
private DatasetFile getDatasetFile(Path path, Map<String, DatasetFile> datasetFilesMap) { private DatasetFile getDatasetFile(Path path,
Map<String, DatasetFile> datasetFilesMap,
boolean excludeDerivedFiles,
Set<String> derivedFilePaths) {
DatasetFile datasetFile = new DatasetFile(); DatasetFile datasetFile = new DatasetFile();
LocalDateTime localDateTime = LocalDateTime.now(); LocalDateTime localDateTime = LocalDateTime.now();
try { try {
@@ -230,12 +244,21 @@ public class DatasetFileApplicationService {
long totalSize; long totalSize;
try (Stream<Path> walk = Files.walk(path)) { try (Stream<Path> walk = Files.walk(path)) {
fileCount = walk.filter(Files::isRegularFile).count(); Stream<Path> fileStream = walk.filter(Files::isRegularFile);
if (excludeDerivedFiles && !derivedFilePaths.isEmpty()) {
fileStream = fileStream.filter(filePath ->
!derivedFilePaths.contains(normalizeFilePath(filePath.toString())));
}
fileCount = fileStream.count();
} }
try (Stream<Path> walk = Files.walk(path)) { try (Stream<Path> walk = Files.walk(path)) {
totalSize = walk Stream<Path> fileStream = walk.filter(Files::isRegularFile);
.filter(Files::isRegularFile) if (excludeDerivedFiles && !derivedFilePaths.isEmpty()) {
fileStream = fileStream.filter(filePath ->
!derivedFilePaths.contains(normalizeFilePath(filePath.toString())));
}
totalSize = fileStream
.mapToLong(p -> { .mapToLong(p -> {
try { try {
return Files.size(p); return Files.size(p);
@@ -253,7 +276,7 @@ public class DatasetFileApplicationService {
log.error("stat directory info error", e); log.error("stat directory info error", e);
} }
} else { } else {
DatasetFile exist = datasetFilesMap.get(path.toString()); DatasetFile exist = datasetFilesMap.get(normalizeFilePath(path.toString()));
if (exist == null) { if (exist == null) {
datasetFile.setId("file-" + datasetFile.getFileName()); datasetFile.setId("file-" + datasetFile.getFileName());
datasetFile.setFileSize(path.toFile().length()); datasetFile.setFileSize(path.toFile().length());
@@ -264,6 +287,17 @@ public class DatasetFileApplicationService {
return datasetFile; return datasetFile;
} }
private String normalizeFilePath(String filePath) {
if (filePath == null || filePath.isBlank()) {
return null;
}
try {
return Paths.get(filePath).toAbsolutePath().normalize().toString();
} catch (Exception e) {
return filePath.replace("\\", "/");
}
}
private boolean isSourceDocument(DatasetFile datasetFile) { private boolean isSourceDocument(DatasetFile datasetFile) {
if (datasetFile == null) { if (datasetFile == null) {
return false; return false;

View File

@@ -2,6 +2,7 @@ package com.datamate.datamanagement.infrastructure.persistence.mapper;
import com.baomidou.mybatisplus.core.mapper.BaseMapper; import com.baomidou.mybatisplus.core.mapper.BaseMapper;
import com.datamate.datamanagement.domain.model.dataset.DatasetFile; import com.datamate.datamanagement.domain.model.dataset.DatasetFile;
import com.datamate.datamanagement.infrastructure.persistence.repository.dto.DatasetFileCount;
import org.apache.ibatis.annotations.Mapper; import org.apache.ibatis.annotations.Mapper;
import org.apache.ibatis.annotations.Param; import org.apache.ibatis.annotations.Param;
import org.apache.ibatis.session.RowBounds; import org.apache.ibatis.session.RowBounds;
@@ -17,6 +18,7 @@ public interface DatasetFileMapper extends BaseMapper<DatasetFile> {
Long countByDatasetId(@Param("datasetId") String datasetId); Long countByDatasetId(@Param("datasetId") String datasetId);
Long countCompletedByDatasetId(@Param("datasetId") String datasetId); Long countCompletedByDatasetId(@Param("datasetId") String datasetId);
Long sumSizeByDatasetId(@Param("datasetId") String datasetId); Long sumSizeByDatasetId(@Param("datasetId") String datasetId);
Long countNonDerivedByDatasetId(@Param("datasetId") String datasetId);
DatasetFile findByDatasetIdAndFileName(@Param("datasetId") String datasetId, @Param("fileName") String fileName); DatasetFile findByDatasetIdAndFileName(@Param("datasetId") String datasetId, @Param("fileName") String fileName);
List<DatasetFile> findAllByDatasetId(@Param("datasetId") String datasetId); List<DatasetFile> findAllByDatasetId(@Param("datasetId") String datasetId);
List<DatasetFile> findByCriteria(@Param("datasetId") String datasetId, List<DatasetFile> findByCriteria(@Param("datasetId") String datasetId,
@@ -38,4 +40,12 @@ public interface DatasetFileMapper extends BaseMapper<DatasetFile> {
* @return 源文件ID列表 * @return 源文件ID列表
*/ */
List<String> findSourceFileIdsWithDerivedFiles(@Param("datasetId") String datasetId); List<String> findSourceFileIdsWithDerivedFiles(@Param("datasetId") String datasetId);
/**
* 批量统计排除衍生文件后的文件数
*
* @param datasetIds 数据集ID列表
* @return 文件数统计列表
*/
List<DatasetFileCount> countNonDerivedByDatasetIds(@Param("datasetIds") List<String> datasetIds);
} }

View File

@@ -3,6 +3,7 @@ package com.datamate.datamanagement.infrastructure.persistence.repository;
import com.baomidou.mybatisplus.core.metadata.IPage; import com.baomidou.mybatisplus.core.metadata.IPage;
import com.baomidou.mybatisplus.extension.repository.IRepository; import com.baomidou.mybatisplus.extension.repository.IRepository;
import com.datamate.datamanagement.domain.model.dataset.DatasetFile; import com.datamate.datamanagement.domain.model.dataset.DatasetFile;
import com.datamate.datamanagement.infrastructure.persistence.repository.dto.DatasetFileCount;
import java.util.List; import java.util.List;
@@ -15,6 +16,8 @@ import java.util.List;
public interface DatasetFileRepository extends IRepository<DatasetFile> { public interface DatasetFileRepository extends IRepository<DatasetFile> {
Long countByDatasetId(String datasetId); Long countByDatasetId(String datasetId);
Long countNonDerivedByDatasetId(String datasetId);
Long countCompletedByDatasetId(String datasetId); Long countCompletedByDatasetId(String datasetId);
Long sumSizeByDatasetId(String datasetId); Long sumSizeByDatasetId(String datasetId);
@@ -36,4 +39,6 @@ public interface DatasetFileRepository extends IRepository<DatasetFile> {
* @return 源文件ID列表 * @return 源文件ID列表
*/ */
List<String> findSourceFileIdsWithDerivedFiles(String datasetId); List<String> findSourceFileIdsWithDerivedFiles(String datasetId);
List<DatasetFileCount> countNonDerivedByDatasetIds(List<String> datasetIds);
} }

View File

@@ -0,0 +1,18 @@
package com.datamate.datamanagement.infrastructure.persistence.repository.dto;
import lombok.AllArgsConstructor;
import lombok.Getter;
import lombok.NoArgsConstructor;
import lombok.Setter;
/**
* 数据集文件数统计结果
*/
@Getter
@Setter
@NoArgsConstructor
@AllArgsConstructor
public class DatasetFileCount {
private String datasetId;
private Long fileCount;
}

View File

@@ -6,6 +6,7 @@ import com.baomidou.mybatisplus.extension.repository.CrudRepository;
import com.datamate.datamanagement.domain.model.dataset.DatasetFile; import com.datamate.datamanagement.domain.model.dataset.DatasetFile;
import com.datamate.datamanagement.infrastructure.persistence.mapper.DatasetFileMapper; import com.datamate.datamanagement.infrastructure.persistence.mapper.DatasetFileMapper;
import com.datamate.datamanagement.infrastructure.persistence.repository.DatasetFileRepository; import com.datamate.datamanagement.infrastructure.persistence.repository.DatasetFileRepository;
import com.datamate.datamanagement.infrastructure.persistence.repository.dto.DatasetFileCount;
import lombok.RequiredArgsConstructor; import lombok.RequiredArgsConstructor;
import org.springframework.stereotype.Repository; import org.springframework.stereotype.Repository;
import org.springframework.util.StringUtils; import org.springframework.util.StringUtils;
@@ -30,6 +31,11 @@ public class DatasetFileRepositoryImpl extends CrudRepository<DatasetFileMapper,
return datasetFileMapper.selectCount(new LambdaQueryWrapper<DatasetFile>().eq(DatasetFile::getDatasetId, datasetId)); return datasetFileMapper.selectCount(new LambdaQueryWrapper<DatasetFile>().eq(DatasetFile::getDatasetId, datasetId));
} }
@Override
public Long countNonDerivedByDatasetId(String datasetId) {
return datasetFileMapper.countNonDerivedByDatasetId(datasetId);
}
@Override @Override
public Long countCompletedByDatasetId(String datasetId) { public Long countCompletedByDatasetId(String datasetId) {
return datasetFileMapper.countCompletedByDatasetId(datasetId); return datasetFileMapper.countCompletedByDatasetId(datasetId);
@@ -71,4 +77,9 @@ public class DatasetFileRepositoryImpl extends CrudRepository<DatasetFileMapper,
// 使用 MyBatis 的 @Select 注解或直接调用 mapper 方法 // 使用 MyBatis 的 @Select 注解或直接调用 mapper 方法
return datasetFileMapper.findSourceFileIdsWithDerivedFiles(datasetId); return datasetFileMapper.findSourceFileIdsWithDerivedFiles(datasetId);
} }
@Override
public List<DatasetFileCount> countNonDerivedByDatasetIds(List<String> datasetIds) {
return datasetFileMapper.countNonDerivedByDatasetIds(datasetIds);
}
} }

View File

@@ -42,6 +42,13 @@
SELECT COUNT(*) FROM t_dm_dataset_files WHERE dataset_id = #{datasetId} SELECT COUNT(*) FROM t_dm_dataset_files WHERE dataset_id = #{datasetId}
</select> </select>
<select id="countNonDerivedByDatasetId" parameterType="string" resultType="long">
SELECT COUNT(*)
FROM t_dm_dataset_files
WHERE dataset_id = #{datasetId}
AND (metadata IS NULL OR JSON_EXTRACT(metadata, '$.derived_from_file_id') IS NULL)
</select>
<select id="countCompletedByDatasetId" parameterType="string" resultType="long"> <select id="countCompletedByDatasetId" parameterType="string" resultType="long">
SELECT COUNT(*) FROM t_dm_dataset_files WHERE dataset_id = #{datasetId} AND status = 'COMPLETED' SELECT COUNT(*) FROM t_dm_dataset_files WHERE dataset_id = #{datasetId} AND status = 'COMPLETED'
</select> </select>
@@ -110,4 +117,16 @@
AND metadata IS NOT NULL AND metadata IS NOT NULL
AND JSON_EXTRACT(metadata, '$.derived_from_file_id') IS NOT NULL AND JSON_EXTRACT(metadata, '$.derived_from_file_id') IS NOT NULL
</select> </select>
<select id="countNonDerivedByDatasetIds" resultType="com.datamate.datamanagement.infrastructure.persistence.repository.dto.DatasetFileCount">
SELECT dataset_id AS datasetId,
COUNT(*) AS fileCount
FROM t_dm_dataset_files
WHERE dataset_id IN
<foreach collection="datasetIds" item="datasetId" open="(" separator="," close=")">
#{datasetId}
</foreach>
AND (metadata IS NULL OR JSON_EXTRACT(metadata, '$.derived_from_file_id') IS NULL)
GROUP BY dataset_id
</select>
</mapper> </mapper>

View File

@@ -145,9 +145,10 @@
<select id="getAllDatasetStatistics" resultType="com.datamate.datamanagement.interfaces.dto.AllDatasetStatisticsResponse"> <select id="getAllDatasetStatistics" resultType="com.datamate.datamanagement.interfaces.dto.AllDatasetStatisticsResponse">
SELECT SELECT
COUNT(*) AS total_datasets, (SELECT COUNT(*) FROM t_dm_datasets) AS total_datasets,
SUM(size_bytes) AS total_size, (SELECT COALESCE(SUM(size_bytes), 0) FROM t_dm_datasets) AS total_size,
SUM(file_count) AS total_files (SELECT COUNT(*)
FROM t_dm_datasets; FROM t_dm_dataset_files
WHERE metadata IS NULL OR JSON_EXTRACT(metadata, '$.derived_from_file_id') IS NULL) AS total_files
</select> </select>
</mapper> </mapper>