You've already forked DataMate
feat(data-management): 添加数据集文件标注结果过滤功能
- 在 OpenAPI 规范中添加 hasAnnotation 查询参数用于过滤存在标注结果的文件 - 修改后端服务层 DatasetFileApplicationService 支持 hasAnnotation 参数 - 更新数据访问层 DatasetFileRepositoryImpl 实现基于标注结果的存在性查询 - 调整前端 DatasetFileTransfer 组件支持标注过滤功能 - 移除无用的分块选项配置并优化全选逻辑 - 修复文件查询时的参数传递和依赖追踪问题
This commit is contained in:
@@ -222,13 +222,18 @@ paths:
|
||||
schema:
|
||||
type: string
|
||||
description: 文件类型过滤
|
||||
- name: status
|
||||
in: query
|
||||
schema:
|
||||
type: string
|
||||
enum: [UPLOADED, PROCESSING, COMPLETED, ERROR]
|
||||
description: 文件状态过滤
|
||||
responses:
|
||||
- name: status
|
||||
in: query
|
||||
schema:
|
||||
type: string
|
||||
enum: [UPLOADED, PROCESSING, COMPLETED, ERROR]
|
||||
description: 文件状态过滤
|
||||
- name: hasAnnotation
|
||||
in: query
|
||||
schema:
|
||||
type: boolean
|
||||
description: 是否仅返回存在标注结果的文件
|
||||
responses:
|
||||
'200':
|
||||
description: 成功
|
||||
content:
|
||||
|
||||
@@ -89,11 +89,12 @@ public class DatasetFileApplicationService {
|
||||
* 获取数据集文件列表
|
||||
*/
|
||||
@Transactional(readOnly = true)
|
||||
public PagedResponse<DatasetFile> getDatasetFiles(String datasetId, String fileType, String status, String name, PagingQuery pagingQuery) {
|
||||
IPage<DatasetFile> page = new Page<>(pagingQuery.getPage(), pagingQuery.getSize());
|
||||
IPage<DatasetFile> files = datasetFileRepository.findByCriteria(datasetId, fileType, status, name, page);
|
||||
return PagedResponse.of(files);
|
||||
}
|
||||
public PagedResponse<DatasetFile> getDatasetFiles(String datasetId, String fileType, String status, String name,
|
||||
Boolean hasAnnotation, PagingQuery pagingQuery) {
|
||||
IPage<DatasetFile> page = new Page<>(pagingQuery.getPage(), pagingQuery.getSize());
|
||||
IPage<DatasetFile> files = datasetFileRepository.findByCriteria(datasetId, fileType, status, name, hasAnnotation, page);
|
||||
return PagedResponse.of(files);
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取数据集文件列表
|
||||
|
||||
@@ -24,7 +24,7 @@ public interface DatasetFileRepository extends IRepository<DatasetFile> {
|
||||
DatasetFile findByDatasetIdAndFileName(String datasetId, String fileName);
|
||||
|
||||
IPage<DatasetFile> findByCriteria(String datasetId, String fileType, String status, String name,
|
||||
IPage<DatasetFile> page);
|
||||
Boolean hasAnnotation, IPage<DatasetFile> page);
|
||||
|
||||
int updateFilePathPrefix(String datasetId, String oldPrefix, String newPrefix);
|
||||
}
|
||||
|
||||
@@ -22,6 +22,8 @@ import java.util.List;
|
||||
@RequiredArgsConstructor
|
||||
public class DatasetFileRepositoryImpl extends CrudRepository<DatasetFileMapper, DatasetFile> implements DatasetFileRepository {
|
||||
private final DatasetFileMapper datasetFileMapper;
|
||||
private static final String ANNOTATION_EXISTS_SQL =
|
||||
"SELECT 1 FROM t_dm_annotation_results ar WHERE ar.file_id = t_dm_dataset_files.id";
|
||||
|
||||
@Override
|
||||
public Long countByDatasetId(String datasetId) {
|
||||
@@ -49,12 +51,13 @@ public class DatasetFileRepositoryImpl extends CrudRepository<DatasetFileMapper,
|
||||
}
|
||||
|
||||
public IPage<DatasetFile> findByCriteria(String datasetId, String fileType, String status, String name,
|
||||
IPage<DatasetFile> page) {
|
||||
Boolean hasAnnotation, IPage<DatasetFile> page) {
|
||||
return datasetFileMapper.selectPage(page, new LambdaQueryWrapper<DatasetFile>()
|
||||
.eq(DatasetFile::getDatasetId, datasetId)
|
||||
.eq(StringUtils.hasText(fileType), DatasetFile::getFileType, fileType)
|
||||
.eq(StringUtils.hasText(status), DatasetFile::getStatus, status)
|
||||
.like(StringUtils.hasText(name), DatasetFile::getFileName, name));
|
||||
.like(StringUtils.hasText(name), DatasetFile::getFileName, name)
|
||||
.exists(Boolean.TRUE.equals(hasAnnotation), ANNOTATION_EXISTS_SQL));
|
||||
}
|
||||
|
||||
@Override
|
||||
|
||||
@@ -43,22 +43,24 @@ public class DatasetFileController {
|
||||
this.datasetFileApplicationService = datasetFileApplicationService;
|
||||
}
|
||||
|
||||
@GetMapping
|
||||
public Response<PagedResponse<DatasetFile>> getDatasetFiles(
|
||||
@PathVariable("datasetId") String datasetId,
|
||||
@RequestParam(value = "isWithDirectory", required = false) boolean isWithDirectory,
|
||||
@RequestParam(value = "page", required = false, defaultValue = "0") Integer page,
|
||||
@RequestParam(value = "size", required = false, defaultValue = "20") Integer size,
|
||||
@RequestParam(value = "prefix", required = false, defaultValue = "") String prefix) {
|
||||
PagingQuery pagingQuery = new PagingQuery(page, size);
|
||||
PagedResponse<DatasetFile> filesPage;
|
||||
if (isWithDirectory) {
|
||||
filesPage = datasetFileApplicationService.getDatasetFilesWithDirectory(datasetId, prefix, pagingQuery);
|
||||
} else {
|
||||
filesPage = datasetFileApplicationService.getDatasetFiles(datasetId, null, null, null, pagingQuery);
|
||||
}
|
||||
return Response.ok(filesPage);
|
||||
}
|
||||
@GetMapping
|
||||
public Response<PagedResponse<DatasetFile>> getDatasetFiles(
|
||||
@PathVariable("datasetId") String datasetId,
|
||||
@RequestParam(value = "isWithDirectory", required = false) boolean isWithDirectory,
|
||||
@RequestParam(value = "page", required = false, defaultValue = "0") Integer page,
|
||||
@RequestParam(value = "size", required = false, defaultValue = "20") Integer size,
|
||||
@RequestParam(value = "prefix", required = false, defaultValue = "") String prefix,
|
||||
@RequestParam(value = "status", required = false) String status,
|
||||
@RequestParam(value = "hasAnnotation", required = false) Boolean hasAnnotation) {
|
||||
PagingQuery pagingQuery = new PagingQuery(page, size);
|
||||
PagedResponse<DatasetFile> filesPage;
|
||||
if (isWithDirectory) {
|
||||
filesPage = datasetFileApplicationService.getDatasetFilesWithDirectory(datasetId, prefix, pagingQuery);
|
||||
} else {
|
||||
filesPage = datasetFileApplicationService.getDatasetFiles(datasetId, null, status, null, hasAnnotation, pagingQuery);
|
||||
}
|
||||
return Response.ok(filesPage);
|
||||
}
|
||||
|
||||
@GetMapping("/{fileId}")
|
||||
public ResponseEntity<Response<DatasetFileResponse>> getDatasetFileById(
|
||||
|
||||
@@ -14,14 +14,15 @@ import {
|
||||
import { formatBytes } from "@/utils/unit";
|
||||
import { useDebouncedEffect } from "@/hooks/useDebouncedEffect";
|
||||
|
||||
interface DatasetFileTransferProps
|
||||
extends React.HTMLAttributes<HTMLDivElement> {
|
||||
open: boolean;
|
||||
selectedFilesMap: { [key: string]: DatasetFile };
|
||||
onSelectedFilesChange: (filesMap: { [key: string]: DatasetFile }) => void;
|
||||
onDatasetSelect?: (dataset: Dataset | null) => void;
|
||||
datasetTypeFilter?: DatasetType;
|
||||
}
|
||||
interface DatasetFileTransferProps
|
||||
extends React.HTMLAttributes<HTMLDivElement> {
|
||||
open: boolean;
|
||||
selectedFilesMap: { [key: string]: DatasetFile };
|
||||
onSelectedFilesChange: (filesMap: { [key: string]: DatasetFile }) => void;
|
||||
onDatasetSelect?: (dataset: Dataset | null) => void;
|
||||
datasetTypeFilter?: DatasetType;
|
||||
hasAnnotationFilter?: boolean;
|
||||
}
|
||||
|
||||
const fileCols = [
|
||||
{
|
||||
@@ -46,14 +47,15 @@ const fileCols = [
|
||||
];
|
||||
|
||||
// Customize Table Transfer
|
||||
const DatasetFileTransfer: React.FC<DatasetFileTransferProps> = ({
|
||||
open,
|
||||
selectedFilesMap,
|
||||
onSelectedFilesChange,
|
||||
onDatasetSelect,
|
||||
datasetTypeFilter = DatasetType.TEXT,
|
||||
...props
|
||||
}) => {
|
||||
const DatasetFileTransfer: React.FC<DatasetFileTransferProps> = ({
|
||||
open,
|
||||
selectedFilesMap,
|
||||
onSelectedFilesChange,
|
||||
onDatasetSelect,
|
||||
datasetTypeFilter = DatasetType.TEXT,
|
||||
hasAnnotationFilter,
|
||||
...props
|
||||
}) => {
|
||||
const [datasets, setDatasets] = React.useState<Dataset[]>([]);
|
||||
const [datasetSearch, setDatasetSearch] = React.useState<string>("");
|
||||
const [datasetPagination, setDatasetPagination] = React.useState<{
|
||||
@@ -64,11 +66,13 @@ const DatasetFileTransfer: React.FC<DatasetFileTransferProps> = ({
|
||||
|
||||
const [files, setFiles] = React.useState<DatasetFile[]>([]);
|
||||
const [filesSearch, setFilesSearch] = React.useState<string>("");
|
||||
const [filesPagination, setFilesPagination] = React.useState<{
|
||||
current: number;
|
||||
pageSize: number;
|
||||
total: number;
|
||||
}>({ current: 1, pageSize: 10, total: 0 });
|
||||
const [filesPagination, setFilesPagination] = React.useState<{
|
||||
current: number;
|
||||
pageSize: number;
|
||||
total: number;
|
||||
}>({ current: 1, pageSize: 10, total: 0 });
|
||||
const filesPage = filesPagination.current;
|
||||
const filesPageSize = filesPagination.pageSize;
|
||||
|
||||
const [showFiles, setShowFiles] = React.useState<boolean>(false);
|
||||
const [selectedDataset, setSelectedDataset] = React.useState<Dataset | null>(
|
||||
@@ -104,18 +108,19 @@ const DatasetFileTransfer: React.FC<DatasetFileTransferProps> = ({
|
||||
|
||||
const fetchFiles = useCallback(
|
||||
async (
|
||||
options?: Partial<{ page: number; pageSize: number; keyword: string }>
|
||||
) => {
|
||||
if (!selectedDataset) return;
|
||||
const page = options?.page ?? filesPagination.current;
|
||||
const pageSize = options?.pageSize ?? filesPagination.pageSize;
|
||||
const keyword = options?.keyword ?? filesSearch;
|
||||
|
||||
const { data } = await queryDatasetFilesUsingGet(selectedDataset.id, {
|
||||
page,
|
||||
size: pageSize,
|
||||
keyword,
|
||||
});
|
||||
options?: Partial<{ page: number; pageSize: number; keyword: string }>
|
||||
) => {
|
||||
if (!selectedDataset) return;
|
||||
const page = options?.page ?? filesPage;
|
||||
const pageSize = options?.pageSize ?? filesPageSize;
|
||||
const keyword = options?.keyword ?? filesSearch;
|
||||
|
||||
const { data } = await queryDatasetFilesUsingGet(selectedDataset.id, {
|
||||
page,
|
||||
size: pageSize,
|
||||
keyword,
|
||||
...(hasAnnotationFilter ? { hasAnnotation: true } : {}),
|
||||
});
|
||||
setFiles(
|
||||
(data.content || []).map((item: DatasetFile) => ({
|
||||
...item,
|
||||
@@ -130,9 +135,15 @@ const DatasetFileTransfer: React.FC<DatasetFileTransferProps> = ({
|
||||
pageSize,
|
||||
total: data.totalElements,
|
||||
}));
|
||||
},
|
||||
[selectedDataset, filesPagination.current, filesPagination.pageSize, filesSearch]
|
||||
);
|
||||
},
|
||||
[
|
||||
selectedDataset,
|
||||
filesPage,
|
||||
filesPageSize,
|
||||
filesSearch,
|
||||
hasAnnotationFilter,
|
||||
]
|
||||
);
|
||||
|
||||
useEffect(() => {
|
||||
// 当数据集变化时,重置文件分页并拉取第一页文件,避免额外的循环请求
|
||||
@@ -167,10 +178,11 @@ const DatasetFileTransfer: React.FC<DatasetFileTransferProps> = ({
|
||||
const allFiles: DatasetFile[] = [];
|
||||
|
||||
while (true) {
|
||||
const { data } = await queryDatasetFilesUsingGet(selectedDataset.id, {
|
||||
page,
|
||||
size: pageSize,
|
||||
});
|
||||
const { data } = await queryDatasetFilesUsingGet(selectedDataset.id, {
|
||||
page,
|
||||
size: pageSize,
|
||||
...(hasAnnotationFilter ? { hasAnnotation: true } : {}),
|
||||
});
|
||||
|
||||
const content: DatasetFile[] = (data.content || []).map(
|
||||
(item: DatasetFile) => ({
|
||||
@@ -217,7 +229,7 @@ const DatasetFileTransfer: React.FC<DatasetFileTransferProps> = ({
|
||||
} finally {
|
||||
setSelectingAll(false);
|
||||
}
|
||||
}, [selectedDataset, selectedFilesMap, onSelectedFilesChange]);
|
||||
}, [selectedDataset, selectedFilesMap, onSelectedFilesChange, hasAnnotationFilter]);
|
||||
|
||||
const toggleSelectFile = (record: DatasetFile) => {
|
||||
if (!selectedFilesMap[record.id]) {
|
||||
@@ -388,10 +400,10 @@ const DatasetFileTransfer: React.FC<DatasetFileTransferProps> = ({
|
||||
},
|
||||
|
||||
// 全选 - 改为全选整个数据集而不是当前页
|
||||
onSelectAll: (selected, selectedRows: DatasetFile[]) => {
|
||||
if (selected) {
|
||||
// 点击表头“全选”时,改为一键全选当前数据集的全部文件
|
||||
// 而不是只选中当前页
|
||||
onSelectAll: (selected) => {
|
||||
if (selected) {
|
||||
// 点击表头“全选”时,改为一键全选当前数据集的全部文件
|
||||
// 而不是只选中当前页
|
||||
handleSelectAllInDataset();
|
||||
} else {
|
||||
// 取消表头“全选”时,清空当前已选文件
|
||||
|
||||
@@ -16,14 +16,6 @@ import DatasetFileTransfer from "@/components/business/DatasetFileTransfer";
|
||||
import { DescriptionsItemType } from "antd/es/descriptions";
|
||||
import { DatasetFileCols } from "../knowledge-base.const";
|
||||
|
||||
const sliceOptions = [
|
||||
{ label: "默认分块", value: "DEFAULT_CHUNK" },
|
||||
{ label: "章节分块", value: "CHAPTER_CHUNK" },
|
||||
{ label: "段落分块", value: "PARAGRAPH_CHUNK" },
|
||||
{ label: "长度分块", value: "LENGTH_CHUNK" },
|
||||
{ label: "自定义分割符分块", value: "CUSTOM_SEPARATOR_CHUNK" },
|
||||
];
|
||||
|
||||
export default function AddDataDialog({ knowledgeBase, onDataAdded }) {
|
||||
const [open, setOpen] = useState(false);
|
||||
const { message } = App.useApp();
|
||||
@@ -273,6 +265,7 @@ export default function AddDataDialog({ knowledgeBase, onDataAdded }) {
|
||||
open={open}
|
||||
selectedFilesMap={selectedFilesMap}
|
||||
onSelectedFilesChange={setSelectedFilesMap}
|
||||
hasAnnotationFilter
|
||||
/>
|
||||
)}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user