feat(data-management): 添加数据集文件标注结果过滤功能

- 在 OpenAPI 规范中添加 hasAnnotation 查询参数用于过滤存在标注结果的文件
- 修改后端服务层 DatasetFileApplicationService 支持 hasAnnotation 参数
- 更新数据访问层 DatasetFileRepositoryImpl 实现基于标注结果的存在性查询
- 调整前端 DatasetFileTransfer 组件支持标注过滤功能
- 移除无用的分块选项配置并优化全选逻辑
- 修复文件查询时的参数传递和依赖追踪问题
This commit is contained in:
2026-01-27 18:11:30 +08:00
parent e56211107e
commit 3a93098b57
7 changed files with 100 additions and 84 deletions

View File

@@ -222,13 +222,18 @@ paths:
schema: schema:
type: string type: string
description: 文件类型过滤 description: 文件类型过滤
- name: status - name: status
in: query in: query
schema: schema:
type: string type: string
enum: [UPLOADED, PROCESSING, COMPLETED, ERROR] enum: [UPLOADED, PROCESSING, COMPLETED, ERROR]
description: 文件状态过滤 description: 文件状态过滤
responses: - name: hasAnnotation
in: query
schema:
type: boolean
description: 是否仅返回存在标注结果的文件
responses:
'200': '200':
description: 成功 description: 成功
content: content:

View File

@@ -89,11 +89,12 @@ public class DatasetFileApplicationService {
* 获取数据集文件列表 * 获取数据集文件列表
*/ */
@Transactional(readOnly = true) @Transactional(readOnly = true)
public PagedResponse<DatasetFile> getDatasetFiles(String datasetId, String fileType, String status, String name, PagingQuery pagingQuery) { public PagedResponse<DatasetFile> getDatasetFiles(String datasetId, String fileType, String status, String name,
IPage<DatasetFile> page = new Page<>(pagingQuery.getPage(), pagingQuery.getSize()); Boolean hasAnnotation, PagingQuery pagingQuery) {
IPage<DatasetFile> files = datasetFileRepository.findByCriteria(datasetId, fileType, status, name, page); IPage<DatasetFile> page = new Page<>(pagingQuery.getPage(), pagingQuery.getSize());
return PagedResponse.of(files); IPage<DatasetFile> files = datasetFileRepository.findByCriteria(datasetId, fileType, status, name, hasAnnotation, page);
} return PagedResponse.of(files);
}
/** /**
* 获取数据集文件列表 * 获取数据集文件列表

View File

@@ -24,7 +24,7 @@ public interface DatasetFileRepository extends IRepository<DatasetFile> {
DatasetFile findByDatasetIdAndFileName(String datasetId, String fileName); DatasetFile findByDatasetIdAndFileName(String datasetId, String fileName);
IPage<DatasetFile> findByCriteria(String datasetId, String fileType, String status, String name, IPage<DatasetFile> findByCriteria(String datasetId, String fileType, String status, String name,
IPage<DatasetFile> page); Boolean hasAnnotation, IPage<DatasetFile> page);
int updateFilePathPrefix(String datasetId, String oldPrefix, String newPrefix); int updateFilePathPrefix(String datasetId, String oldPrefix, String newPrefix);
} }

View File

@@ -22,6 +22,8 @@ import java.util.List;
@RequiredArgsConstructor @RequiredArgsConstructor
public class DatasetFileRepositoryImpl extends CrudRepository<DatasetFileMapper, DatasetFile> implements DatasetFileRepository { public class DatasetFileRepositoryImpl extends CrudRepository<DatasetFileMapper, DatasetFile> implements DatasetFileRepository {
private final DatasetFileMapper datasetFileMapper; private final DatasetFileMapper datasetFileMapper;
private static final String ANNOTATION_EXISTS_SQL =
"SELECT 1 FROM t_dm_annotation_results ar WHERE ar.file_id = t_dm_dataset_files.id";
@Override @Override
public Long countByDatasetId(String datasetId) { public Long countByDatasetId(String datasetId) {
@@ -49,12 +51,13 @@ public class DatasetFileRepositoryImpl extends CrudRepository<DatasetFileMapper,
} }
public IPage<DatasetFile> findByCriteria(String datasetId, String fileType, String status, String name, public IPage<DatasetFile> findByCriteria(String datasetId, String fileType, String status, String name,
IPage<DatasetFile> page) { Boolean hasAnnotation, IPage<DatasetFile> page) {
return datasetFileMapper.selectPage(page, new LambdaQueryWrapper<DatasetFile>() return datasetFileMapper.selectPage(page, new LambdaQueryWrapper<DatasetFile>()
.eq(DatasetFile::getDatasetId, datasetId) .eq(DatasetFile::getDatasetId, datasetId)
.eq(StringUtils.hasText(fileType), DatasetFile::getFileType, fileType) .eq(StringUtils.hasText(fileType), DatasetFile::getFileType, fileType)
.eq(StringUtils.hasText(status), DatasetFile::getStatus, status) .eq(StringUtils.hasText(status), DatasetFile::getStatus, status)
.like(StringUtils.hasText(name), DatasetFile::getFileName, name)); .like(StringUtils.hasText(name), DatasetFile::getFileName, name)
.exists(Boolean.TRUE.equals(hasAnnotation), ANNOTATION_EXISTS_SQL));
} }
@Override @Override

View File

@@ -43,22 +43,24 @@ public class DatasetFileController {
this.datasetFileApplicationService = datasetFileApplicationService; this.datasetFileApplicationService = datasetFileApplicationService;
} }
@GetMapping @GetMapping
public Response<PagedResponse<DatasetFile>> getDatasetFiles( public Response<PagedResponse<DatasetFile>> getDatasetFiles(
@PathVariable("datasetId") String datasetId, @PathVariable("datasetId") String datasetId,
@RequestParam(value = "isWithDirectory", required = false) boolean isWithDirectory, @RequestParam(value = "isWithDirectory", required = false) boolean isWithDirectory,
@RequestParam(value = "page", required = false, defaultValue = "0") Integer page, @RequestParam(value = "page", required = false, defaultValue = "0") Integer page,
@RequestParam(value = "size", required = false, defaultValue = "20") Integer size, @RequestParam(value = "size", required = false, defaultValue = "20") Integer size,
@RequestParam(value = "prefix", required = false, defaultValue = "") String prefix) { @RequestParam(value = "prefix", required = false, defaultValue = "") String prefix,
PagingQuery pagingQuery = new PagingQuery(page, size); @RequestParam(value = "status", required = false) String status,
PagedResponse<DatasetFile> filesPage; @RequestParam(value = "hasAnnotation", required = false) Boolean hasAnnotation) {
if (isWithDirectory) { PagingQuery pagingQuery = new PagingQuery(page, size);
filesPage = datasetFileApplicationService.getDatasetFilesWithDirectory(datasetId, prefix, pagingQuery); PagedResponse<DatasetFile> filesPage;
} else { if (isWithDirectory) {
filesPage = datasetFileApplicationService.getDatasetFiles(datasetId, null, null, null, pagingQuery); filesPage = datasetFileApplicationService.getDatasetFilesWithDirectory(datasetId, prefix, pagingQuery);
} } else {
return Response.ok(filesPage); filesPage = datasetFileApplicationService.getDatasetFiles(datasetId, null, status, null, hasAnnotation, pagingQuery);
} }
return Response.ok(filesPage);
}
@GetMapping("/{fileId}") @GetMapping("/{fileId}")
public ResponseEntity<Response<DatasetFileResponse>> getDatasetFileById( public ResponseEntity<Response<DatasetFileResponse>> getDatasetFileById(

View File

@@ -14,14 +14,15 @@ import {
import { formatBytes } from "@/utils/unit"; import { formatBytes } from "@/utils/unit";
import { useDebouncedEffect } from "@/hooks/useDebouncedEffect"; import { useDebouncedEffect } from "@/hooks/useDebouncedEffect";
interface DatasetFileTransferProps interface DatasetFileTransferProps
extends React.HTMLAttributes<HTMLDivElement> { extends React.HTMLAttributes<HTMLDivElement> {
open: boolean; open: boolean;
selectedFilesMap: { [key: string]: DatasetFile }; selectedFilesMap: { [key: string]: DatasetFile };
onSelectedFilesChange: (filesMap: { [key: string]: DatasetFile }) => void; onSelectedFilesChange: (filesMap: { [key: string]: DatasetFile }) => void;
onDatasetSelect?: (dataset: Dataset | null) => void; onDatasetSelect?: (dataset: Dataset | null) => void;
datasetTypeFilter?: DatasetType; datasetTypeFilter?: DatasetType;
} hasAnnotationFilter?: boolean;
}
const fileCols = [ const fileCols = [
{ {
@@ -46,14 +47,15 @@ const fileCols = [
]; ];
// Customize Table Transfer // Customize Table Transfer
const DatasetFileTransfer: React.FC<DatasetFileTransferProps> = ({ const DatasetFileTransfer: React.FC<DatasetFileTransferProps> = ({
open, open,
selectedFilesMap, selectedFilesMap,
onSelectedFilesChange, onSelectedFilesChange,
onDatasetSelect, onDatasetSelect,
datasetTypeFilter = DatasetType.TEXT, datasetTypeFilter = DatasetType.TEXT,
...props hasAnnotationFilter,
}) => { ...props
}) => {
const [datasets, setDatasets] = React.useState<Dataset[]>([]); const [datasets, setDatasets] = React.useState<Dataset[]>([]);
const [datasetSearch, setDatasetSearch] = React.useState<string>(""); const [datasetSearch, setDatasetSearch] = React.useState<string>("");
const [datasetPagination, setDatasetPagination] = React.useState<{ const [datasetPagination, setDatasetPagination] = React.useState<{
@@ -64,11 +66,13 @@ const DatasetFileTransfer: React.FC<DatasetFileTransferProps> = ({
const [files, setFiles] = React.useState<DatasetFile[]>([]); const [files, setFiles] = React.useState<DatasetFile[]>([]);
const [filesSearch, setFilesSearch] = React.useState<string>(""); const [filesSearch, setFilesSearch] = React.useState<string>("");
const [filesPagination, setFilesPagination] = React.useState<{ const [filesPagination, setFilesPagination] = React.useState<{
current: number; current: number;
pageSize: number; pageSize: number;
total: number; total: number;
}>({ current: 1, pageSize: 10, total: 0 }); }>({ current: 1, pageSize: 10, total: 0 });
const filesPage = filesPagination.current;
const filesPageSize = filesPagination.pageSize;
const [showFiles, setShowFiles] = React.useState<boolean>(false); const [showFiles, setShowFiles] = React.useState<boolean>(false);
const [selectedDataset, setSelectedDataset] = React.useState<Dataset | null>( const [selectedDataset, setSelectedDataset] = React.useState<Dataset | null>(
@@ -104,18 +108,19 @@ const DatasetFileTransfer: React.FC<DatasetFileTransferProps> = ({
const fetchFiles = useCallback( const fetchFiles = useCallback(
async ( async (
options?: Partial<{ page: number; pageSize: number; keyword: string }> options?: Partial<{ page: number; pageSize: number; keyword: string }>
) => { ) => {
if (!selectedDataset) return; if (!selectedDataset) return;
const page = options?.page ?? filesPagination.current; const page = options?.page ?? filesPage;
const pageSize = options?.pageSize ?? filesPagination.pageSize; const pageSize = options?.pageSize ?? filesPageSize;
const keyword = options?.keyword ?? filesSearch; const keyword = options?.keyword ?? filesSearch;
const { data } = await queryDatasetFilesUsingGet(selectedDataset.id, { const { data } = await queryDatasetFilesUsingGet(selectedDataset.id, {
page, page,
size: pageSize, size: pageSize,
keyword, keyword,
}); ...(hasAnnotationFilter ? { hasAnnotation: true } : {}),
});
setFiles( setFiles(
(data.content || []).map((item: DatasetFile) => ({ (data.content || []).map((item: DatasetFile) => ({
...item, ...item,
@@ -130,9 +135,15 @@ const DatasetFileTransfer: React.FC<DatasetFileTransferProps> = ({
pageSize, pageSize,
total: data.totalElements, total: data.totalElements,
})); }));
}, },
[selectedDataset, filesPagination.current, filesPagination.pageSize, filesSearch] [
); selectedDataset,
filesPage,
filesPageSize,
filesSearch,
hasAnnotationFilter,
]
);
useEffect(() => { useEffect(() => {
// 当数据集变化时,重置文件分页并拉取第一页文件,避免额外的循环请求 // 当数据集变化时,重置文件分页并拉取第一页文件,避免额外的循环请求
@@ -167,10 +178,11 @@ const DatasetFileTransfer: React.FC<DatasetFileTransferProps> = ({
const allFiles: DatasetFile[] = []; const allFiles: DatasetFile[] = [];
while (true) { while (true) {
const { data } = await queryDatasetFilesUsingGet(selectedDataset.id, { const { data } = await queryDatasetFilesUsingGet(selectedDataset.id, {
page, page,
size: pageSize, size: pageSize,
}); ...(hasAnnotationFilter ? { hasAnnotation: true } : {}),
});
const content: DatasetFile[] = (data.content || []).map( const content: DatasetFile[] = (data.content || []).map(
(item: DatasetFile) => ({ (item: DatasetFile) => ({
@@ -217,7 +229,7 @@ const DatasetFileTransfer: React.FC<DatasetFileTransferProps> = ({
} finally { } finally {
setSelectingAll(false); setSelectingAll(false);
} }
}, [selectedDataset, selectedFilesMap, onSelectedFilesChange]); }, [selectedDataset, selectedFilesMap, onSelectedFilesChange, hasAnnotationFilter]);
const toggleSelectFile = (record: DatasetFile) => { const toggleSelectFile = (record: DatasetFile) => {
if (!selectedFilesMap[record.id]) { if (!selectedFilesMap[record.id]) {
@@ -388,10 +400,10 @@ const DatasetFileTransfer: React.FC<DatasetFileTransferProps> = ({
}, },
// 全选 - 改为全选整个数据集而不是当前页 // 全选 - 改为全选整个数据集而不是当前页
onSelectAll: (selected, selectedRows: DatasetFile[]) => { onSelectAll: (selected) => {
if (selected) { if (selected) {
// 点击表头“全选”时,改为一键全选当前数据集的全部文件 // 点击表头“全选”时,改为一键全选当前数据集的全部文件
// 而不是只选中当前页 // 而不是只选中当前页
handleSelectAllInDataset(); handleSelectAllInDataset();
} else { } else {
// 取消表头“全选”时,清空当前已选文件 // 取消表头“全选”时,清空当前已选文件

View File

@@ -16,14 +16,6 @@ import DatasetFileTransfer from "@/components/business/DatasetFileTransfer";
import { DescriptionsItemType } from "antd/es/descriptions"; import { DescriptionsItemType } from "antd/es/descriptions";
import { DatasetFileCols } from "../knowledge-base.const"; import { DatasetFileCols } from "../knowledge-base.const";
const sliceOptions = [
{ label: "默认分块", value: "DEFAULT_CHUNK" },
{ label: "章节分块", value: "CHAPTER_CHUNK" },
{ label: "段落分块", value: "PARAGRAPH_CHUNK" },
{ label: "长度分块", value: "LENGTH_CHUNK" },
{ label: "自定义分割符分块", value: "CUSTOM_SEPARATOR_CHUNK" },
];
export default function AddDataDialog({ knowledgeBase, onDataAdded }) { export default function AddDataDialog({ knowledgeBase, onDataAdded }) {
const [open, setOpen] = useState(false); const [open, setOpen] = useState(false);
const { message } = App.useApp(); const { message } = App.useApp();
@@ -273,6 +265,7 @@ export default function AddDataDialog({ knowledgeBase, onDataAdded }) {
open={open} open={open}
selectedFilesMap={selectedFilesMap} selectedFilesMap={selectedFilesMap}
onSelectedFilesChange={setSelectedFilesMap} onSelectedFilesChange={setSelectedFilesMap}
hasAnnotationFilter
/> />
)} )}