feat(data-management): 添加数据集文件标注结果过滤功能

- 在 OpenAPI 规范中添加 hasAnnotation 查询参数用于过滤存在标注结果的文件
- 修改后端服务层 DatasetFileApplicationService 支持 hasAnnotation 参数
- 更新数据访问层 DatasetFileRepositoryImpl 实现基于标注结果的存在性查询
- 调整前端 DatasetFileTransfer 组件支持标注过滤功能
- 移除无用的分块选项配置并优化全选逻辑
- 修复文件查询时的参数传递和依赖追踪问题
This commit is contained in:
2026-01-27 18:11:30 +08:00
parent e56211107e
commit 3a93098b57
7 changed files with 100 additions and 84 deletions

View File

@@ -222,13 +222,18 @@ paths:
schema:
type: string
description: 文件类型过滤
- name: status
in: query
schema:
type: string
enum: [UPLOADED, PROCESSING, COMPLETED, ERROR]
description: 文件状态过滤
responses:
- name: status
in: query
schema:
type: string
enum: [UPLOADED, PROCESSING, COMPLETED, ERROR]
description: 文件状态过滤
- name: hasAnnotation
in: query
schema:
type: boolean
description: 是否仅返回存在标注结果的文件
responses:
'200':
description: 成功
content:

View File

@@ -89,11 +89,12 @@ public class DatasetFileApplicationService {
* 获取数据集文件列表
*/
@Transactional(readOnly = true)
public PagedResponse<DatasetFile> getDatasetFiles(String datasetId, String fileType, String status, String name, PagingQuery pagingQuery) {
IPage<DatasetFile> page = new Page<>(pagingQuery.getPage(), pagingQuery.getSize());
IPage<DatasetFile> files = datasetFileRepository.findByCriteria(datasetId, fileType, status, name, page);
return PagedResponse.of(files);
}
public PagedResponse<DatasetFile> getDatasetFiles(String datasetId, String fileType, String status, String name,
Boolean hasAnnotation, PagingQuery pagingQuery) {
IPage<DatasetFile> page = new Page<>(pagingQuery.getPage(), pagingQuery.getSize());
IPage<DatasetFile> files = datasetFileRepository.findByCriteria(datasetId, fileType, status, name, hasAnnotation, page);
return PagedResponse.of(files);
}
/**
* 获取数据集文件列表

View File

@@ -24,7 +24,7 @@ public interface DatasetFileRepository extends IRepository<DatasetFile> {
DatasetFile findByDatasetIdAndFileName(String datasetId, String fileName);
IPage<DatasetFile> findByCriteria(String datasetId, String fileType, String status, String name,
IPage<DatasetFile> page);
Boolean hasAnnotation, IPage<DatasetFile> page);
int updateFilePathPrefix(String datasetId, String oldPrefix, String newPrefix);
}

View File

@@ -22,6 +22,8 @@ import java.util.List;
@RequiredArgsConstructor
public class DatasetFileRepositoryImpl extends CrudRepository<DatasetFileMapper, DatasetFile> implements DatasetFileRepository {
private final DatasetFileMapper datasetFileMapper;
private static final String ANNOTATION_EXISTS_SQL =
"SELECT 1 FROM t_dm_annotation_results ar WHERE ar.file_id = t_dm_dataset_files.id";
@Override
public Long countByDatasetId(String datasetId) {
@@ -49,12 +51,13 @@ public class DatasetFileRepositoryImpl extends CrudRepository<DatasetFileMapper,
}
public IPage<DatasetFile> findByCriteria(String datasetId, String fileType, String status, String name,
IPage<DatasetFile> page) {
Boolean hasAnnotation, IPage<DatasetFile> page) {
return datasetFileMapper.selectPage(page, new LambdaQueryWrapper<DatasetFile>()
.eq(DatasetFile::getDatasetId, datasetId)
.eq(StringUtils.hasText(fileType), DatasetFile::getFileType, fileType)
.eq(StringUtils.hasText(status), DatasetFile::getStatus, status)
.like(StringUtils.hasText(name), DatasetFile::getFileName, name));
.like(StringUtils.hasText(name), DatasetFile::getFileName, name)
.exists(Boolean.TRUE.equals(hasAnnotation), ANNOTATION_EXISTS_SQL));
}
@Override

View File

@@ -43,22 +43,24 @@ public class DatasetFileController {
this.datasetFileApplicationService = datasetFileApplicationService;
}
@GetMapping
public Response<PagedResponse<DatasetFile>> getDatasetFiles(
@PathVariable("datasetId") String datasetId,
@RequestParam(value = "isWithDirectory", required = false) boolean isWithDirectory,
@RequestParam(value = "page", required = false, defaultValue = "0") Integer page,
@RequestParam(value = "size", required = false, defaultValue = "20") Integer size,
@RequestParam(value = "prefix", required = false, defaultValue = "") String prefix) {
PagingQuery pagingQuery = new PagingQuery(page, size);
PagedResponse<DatasetFile> filesPage;
if (isWithDirectory) {
filesPage = datasetFileApplicationService.getDatasetFilesWithDirectory(datasetId, prefix, pagingQuery);
} else {
filesPage = datasetFileApplicationService.getDatasetFiles(datasetId, null, null, null, pagingQuery);
}
return Response.ok(filesPage);
}
@GetMapping
public Response<PagedResponse<DatasetFile>> getDatasetFiles(
@PathVariable("datasetId") String datasetId,
@RequestParam(value = "isWithDirectory", required = false) boolean isWithDirectory,
@RequestParam(value = "page", required = false, defaultValue = "0") Integer page,
@RequestParam(value = "size", required = false, defaultValue = "20") Integer size,
@RequestParam(value = "prefix", required = false, defaultValue = "") String prefix,
@RequestParam(value = "status", required = false) String status,
@RequestParam(value = "hasAnnotation", required = false) Boolean hasAnnotation) {
PagingQuery pagingQuery = new PagingQuery(page, size);
PagedResponse<DatasetFile> filesPage;
if (isWithDirectory) {
filesPage = datasetFileApplicationService.getDatasetFilesWithDirectory(datasetId, prefix, pagingQuery);
} else {
filesPage = datasetFileApplicationService.getDatasetFiles(datasetId, null, status, null, hasAnnotation, pagingQuery);
}
return Response.ok(filesPage);
}
@GetMapping("/{fileId}")
public ResponseEntity<Response<DatasetFileResponse>> getDatasetFileById(

View File

@@ -14,14 +14,15 @@ import {
import { formatBytes } from "@/utils/unit";
import { useDebouncedEffect } from "@/hooks/useDebouncedEffect";
interface DatasetFileTransferProps
extends React.HTMLAttributes<HTMLDivElement> {
open: boolean;
selectedFilesMap: { [key: string]: DatasetFile };
onSelectedFilesChange: (filesMap: { [key: string]: DatasetFile }) => void;
onDatasetSelect?: (dataset: Dataset | null) => void;
datasetTypeFilter?: DatasetType;
}
interface DatasetFileTransferProps
extends React.HTMLAttributes<HTMLDivElement> {
open: boolean;
selectedFilesMap: { [key: string]: DatasetFile };
onSelectedFilesChange: (filesMap: { [key: string]: DatasetFile }) => void;
onDatasetSelect?: (dataset: Dataset | null) => void;
datasetTypeFilter?: DatasetType;
hasAnnotationFilter?: boolean;
}
const fileCols = [
{
@@ -46,14 +47,15 @@ const fileCols = [
];
// Customize Table Transfer
const DatasetFileTransfer: React.FC<DatasetFileTransferProps> = ({
open,
selectedFilesMap,
onSelectedFilesChange,
onDatasetSelect,
datasetTypeFilter = DatasetType.TEXT,
...props
}) => {
const DatasetFileTransfer: React.FC<DatasetFileTransferProps> = ({
open,
selectedFilesMap,
onSelectedFilesChange,
onDatasetSelect,
datasetTypeFilter = DatasetType.TEXT,
hasAnnotationFilter,
...props
}) => {
const [datasets, setDatasets] = React.useState<Dataset[]>([]);
const [datasetSearch, setDatasetSearch] = React.useState<string>("");
const [datasetPagination, setDatasetPagination] = React.useState<{
@@ -64,11 +66,13 @@ const DatasetFileTransfer: React.FC<DatasetFileTransferProps> = ({
const [files, setFiles] = React.useState<DatasetFile[]>([]);
const [filesSearch, setFilesSearch] = React.useState<string>("");
const [filesPagination, setFilesPagination] = React.useState<{
current: number;
pageSize: number;
total: number;
}>({ current: 1, pageSize: 10, total: 0 });
const [filesPagination, setFilesPagination] = React.useState<{
current: number;
pageSize: number;
total: number;
}>({ current: 1, pageSize: 10, total: 0 });
const filesPage = filesPagination.current;
const filesPageSize = filesPagination.pageSize;
const [showFiles, setShowFiles] = React.useState<boolean>(false);
const [selectedDataset, setSelectedDataset] = React.useState<Dataset | null>(
@@ -104,18 +108,19 @@ const DatasetFileTransfer: React.FC<DatasetFileTransferProps> = ({
const fetchFiles = useCallback(
async (
options?: Partial<{ page: number; pageSize: number; keyword: string }>
) => {
if (!selectedDataset) return;
const page = options?.page ?? filesPagination.current;
const pageSize = options?.pageSize ?? filesPagination.pageSize;
const keyword = options?.keyword ?? filesSearch;
const { data } = await queryDatasetFilesUsingGet(selectedDataset.id, {
page,
size: pageSize,
keyword,
});
options?: Partial<{ page: number; pageSize: number; keyword: string }>
) => {
if (!selectedDataset) return;
const page = options?.page ?? filesPage;
const pageSize = options?.pageSize ?? filesPageSize;
const keyword = options?.keyword ?? filesSearch;
const { data } = await queryDatasetFilesUsingGet(selectedDataset.id, {
page,
size: pageSize,
keyword,
...(hasAnnotationFilter ? { hasAnnotation: true } : {}),
});
setFiles(
(data.content || []).map((item: DatasetFile) => ({
...item,
@@ -130,9 +135,15 @@ const DatasetFileTransfer: React.FC<DatasetFileTransferProps> = ({
pageSize,
total: data.totalElements,
}));
},
[selectedDataset, filesPagination.current, filesPagination.pageSize, filesSearch]
);
},
[
selectedDataset,
filesPage,
filesPageSize,
filesSearch,
hasAnnotationFilter,
]
);
useEffect(() => {
// 当数据集变化时,重置文件分页并拉取第一页文件,避免额外的循环请求
@@ -167,10 +178,11 @@ const DatasetFileTransfer: React.FC<DatasetFileTransferProps> = ({
const allFiles: DatasetFile[] = [];
while (true) {
const { data } = await queryDatasetFilesUsingGet(selectedDataset.id, {
page,
size: pageSize,
});
const { data } = await queryDatasetFilesUsingGet(selectedDataset.id, {
page,
size: pageSize,
...(hasAnnotationFilter ? { hasAnnotation: true } : {}),
});
const content: DatasetFile[] = (data.content || []).map(
(item: DatasetFile) => ({
@@ -217,7 +229,7 @@ const DatasetFileTransfer: React.FC<DatasetFileTransferProps> = ({
} finally {
setSelectingAll(false);
}
}, [selectedDataset, selectedFilesMap, onSelectedFilesChange]);
}, [selectedDataset, selectedFilesMap, onSelectedFilesChange, hasAnnotationFilter]);
const toggleSelectFile = (record: DatasetFile) => {
if (!selectedFilesMap[record.id]) {
@@ -388,10 +400,10 @@ const DatasetFileTransfer: React.FC<DatasetFileTransferProps> = ({
},
// 全选 - 改为全选整个数据集而不是当前页
onSelectAll: (selected, selectedRows: DatasetFile[]) => {
if (selected) {
// 点击表头“全选”时,改为一键全选当前数据集的全部文件
// 而不是只选中当前页
onSelectAll: (selected) => {
if (selected) {
// 点击表头“全选”时,改为一键全选当前数据集的全部文件
// 而不是只选中当前页
handleSelectAllInDataset();
} else {
// 取消表头“全选”时,清空当前已选文件

View File

@@ -16,14 +16,6 @@ import DatasetFileTransfer from "@/components/business/DatasetFileTransfer";
import { DescriptionsItemType } from "antd/es/descriptions";
import { DatasetFileCols } from "../knowledge-base.const";
const sliceOptions = [
{ label: "默认分块", value: "DEFAULT_CHUNK" },
{ label: "章节分块", value: "CHAPTER_CHUNK" },
{ label: "段落分块", value: "PARAGRAPH_CHUNK" },
{ label: "长度分块", value: "LENGTH_CHUNK" },
{ label: "自定义分割符分块", value: "CUSTOM_SEPARATOR_CHUNK" },
];
export default function AddDataDialog({ knowledgeBase, onDataAdded }) {
const [open, setOpen] = useState(false);
const { message } = App.useApp();
@@ -273,6 +265,7 @@ export default function AddDataDialog({ knowledgeBase, onDataAdded }) {
open={open}
selectedFilesMap={selectedFilesMap}
onSelectedFilesChange={setSelectedFilesMap}
hasAnnotationFilter
/>
)}