You've already forked DataMate
feat(data-management): 添加数据集文件标注结果过滤功能
- 在 OpenAPI 规范中添加 hasAnnotation 查询参数用于过滤存在标注结果的文件 - 修改后端服务层 DatasetFileApplicationService 支持 hasAnnotation 参数 - 更新数据访问层 DatasetFileRepositoryImpl 实现基于标注结果的存在性查询 - 调整前端 DatasetFileTransfer 组件支持标注过滤功能 - 移除无用的分块选项配置并优化全选逻辑 - 修复文件查询时的参数传递和依赖追踪问题
This commit is contained in:
@@ -222,13 +222,18 @@ paths:
|
|||||||
schema:
|
schema:
|
||||||
type: string
|
type: string
|
||||||
description: 文件类型过滤
|
description: 文件类型过滤
|
||||||
- name: status
|
- name: status
|
||||||
in: query
|
in: query
|
||||||
schema:
|
schema:
|
||||||
type: string
|
type: string
|
||||||
enum: [UPLOADED, PROCESSING, COMPLETED, ERROR]
|
enum: [UPLOADED, PROCESSING, COMPLETED, ERROR]
|
||||||
description: 文件状态过滤
|
description: 文件状态过滤
|
||||||
responses:
|
- name: hasAnnotation
|
||||||
|
in: query
|
||||||
|
schema:
|
||||||
|
type: boolean
|
||||||
|
description: 是否仅返回存在标注结果的文件
|
||||||
|
responses:
|
||||||
'200':
|
'200':
|
||||||
description: 成功
|
description: 成功
|
||||||
content:
|
content:
|
||||||
|
|||||||
@@ -89,11 +89,12 @@ public class DatasetFileApplicationService {
|
|||||||
* 获取数据集文件列表
|
* 获取数据集文件列表
|
||||||
*/
|
*/
|
||||||
@Transactional(readOnly = true)
|
@Transactional(readOnly = true)
|
||||||
public PagedResponse<DatasetFile> getDatasetFiles(String datasetId, String fileType, String status, String name, PagingQuery pagingQuery) {
|
public PagedResponse<DatasetFile> getDatasetFiles(String datasetId, String fileType, String status, String name,
|
||||||
IPage<DatasetFile> page = new Page<>(pagingQuery.getPage(), pagingQuery.getSize());
|
Boolean hasAnnotation, PagingQuery pagingQuery) {
|
||||||
IPage<DatasetFile> files = datasetFileRepository.findByCriteria(datasetId, fileType, status, name, page);
|
IPage<DatasetFile> page = new Page<>(pagingQuery.getPage(), pagingQuery.getSize());
|
||||||
return PagedResponse.of(files);
|
IPage<DatasetFile> files = datasetFileRepository.findByCriteria(datasetId, fileType, status, name, hasAnnotation, page);
|
||||||
}
|
return PagedResponse.of(files);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 获取数据集文件列表
|
* 获取数据集文件列表
|
||||||
|
|||||||
@@ -24,7 +24,7 @@ public interface DatasetFileRepository extends IRepository<DatasetFile> {
|
|||||||
DatasetFile findByDatasetIdAndFileName(String datasetId, String fileName);
|
DatasetFile findByDatasetIdAndFileName(String datasetId, String fileName);
|
||||||
|
|
||||||
IPage<DatasetFile> findByCriteria(String datasetId, String fileType, String status, String name,
|
IPage<DatasetFile> findByCriteria(String datasetId, String fileType, String status, String name,
|
||||||
IPage<DatasetFile> page);
|
Boolean hasAnnotation, IPage<DatasetFile> page);
|
||||||
|
|
||||||
int updateFilePathPrefix(String datasetId, String oldPrefix, String newPrefix);
|
int updateFilePathPrefix(String datasetId, String oldPrefix, String newPrefix);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -22,6 +22,8 @@ import java.util.List;
|
|||||||
@RequiredArgsConstructor
|
@RequiredArgsConstructor
|
||||||
public class DatasetFileRepositoryImpl extends CrudRepository<DatasetFileMapper, DatasetFile> implements DatasetFileRepository {
|
public class DatasetFileRepositoryImpl extends CrudRepository<DatasetFileMapper, DatasetFile> implements DatasetFileRepository {
|
||||||
private final DatasetFileMapper datasetFileMapper;
|
private final DatasetFileMapper datasetFileMapper;
|
||||||
|
private static final String ANNOTATION_EXISTS_SQL =
|
||||||
|
"SELECT 1 FROM t_dm_annotation_results ar WHERE ar.file_id = t_dm_dataset_files.id";
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Long countByDatasetId(String datasetId) {
|
public Long countByDatasetId(String datasetId) {
|
||||||
@@ -49,12 +51,13 @@ public class DatasetFileRepositoryImpl extends CrudRepository<DatasetFileMapper,
|
|||||||
}
|
}
|
||||||
|
|
||||||
public IPage<DatasetFile> findByCriteria(String datasetId, String fileType, String status, String name,
|
public IPage<DatasetFile> findByCriteria(String datasetId, String fileType, String status, String name,
|
||||||
IPage<DatasetFile> page) {
|
Boolean hasAnnotation, IPage<DatasetFile> page) {
|
||||||
return datasetFileMapper.selectPage(page, new LambdaQueryWrapper<DatasetFile>()
|
return datasetFileMapper.selectPage(page, new LambdaQueryWrapper<DatasetFile>()
|
||||||
.eq(DatasetFile::getDatasetId, datasetId)
|
.eq(DatasetFile::getDatasetId, datasetId)
|
||||||
.eq(StringUtils.hasText(fileType), DatasetFile::getFileType, fileType)
|
.eq(StringUtils.hasText(fileType), DatasetFile::getFileType, fileType)
|
||||||
.eq(StringUtils.hasText(status), DatasetFile::getStatus, status)
|
.eq(StringUtils.hasText(status), DatasetFile::getStatus, status)
|
||||||
.like(StringUtils.hasText(name), DatasetFile::getFileName, name));
|
.like(StringUtils.hasText(name), DatasetFile::getFileName, name)
|
||||||
|
.exists(Boolean.TRUE.equals(hasAnnotation), ANNOTATION_EXISTS_SQL));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|||||||
@@ -43,22 +43,24 @@ public class DatasetFileController {
|
|||||||
this.datasetFileApplicationService = datasetFileApplicationService;
|
this.datasetFileApplicationService = datasetFileApplicationService;
|
||||||
}
|
}
|
||||||
|
|
||||||
@GetMapping
|
@GetMapping
|
||||||
public Response<PagedResponse<DatasetFile>> getDatasetFiles(
|
public Response<PagedResponse<DatasetFile>> getDatasetFiles(
|
||||||
@PathVariable("datasetId") String datasetId,
|
@PathVariable("datasetId") String datasetId,
|
||||||
@RequestParam(value = "isWithDirectory", required = false) boolean isWithDirectory,
|
@RequestParam(value = "isWithDirectory", required = false) boolean isWithDirectory,
|
||||||
@RequestParam(value = "page", required = false, defaultValue = "0") Integer page,
|
@RequestParam(value = "page", required = false, defaultValue = "0") Integer page,
|
||||||
@RequestParam(value = "size", required = false, defaultValue = "20") Integer size,
|
@RequestParam(value = "size", required = false, defaultValue = "20") Integer size,
|
||||||
@RequestParam(value = "prefix", required = false, defaultValue = "") String prefix) {
|
@RequestParam(value = "prefix", required = false, defaultValue = "") String prefix,
|
||||||
PagingQuery pagingQuery = new PagingQuery(page, size);
|
@RequestParam(value = "status", required = false) String status,
|
||||||
PagedResponse<DatasetFile> filesPage;
|
@RequestParam(value = "hasAnnotation", required = false) Boolean hasAnnotation) {
|
||||||
if (isWithDirectory) {
|
PagingQuery pagingQuery = new PagingQuery(page, size);
|
||||||
filesPage = datasetFileApplicationService.getDatasetFilesWithDirectory(datasetId, prefix, pagingQuery);
|
PagedResponse<DatasetFile> filesPage;
|
||||||
} else {
|
if (isWithDirectory) {
|
||||||
filesPage = datasetFileApplicationService.getDatasetFiles(datasetId, null, null, null, pagingQuery);
|
filesPage = datasetFileApplicationService.getDatasetFilesWithDirectory(datasetId, prefix, pagingQuery);
|
||||||
}
|
} else {
|
||||||
return Response.ok(filesPage);
|
filesPage = datasetFileApplicationService.getDatasetFiles(datasetId, null, status, null, hasAnnotation, pagingQuery);
|
||||||
}
|
}
|
||||||
|
return Response.ok(filesPage);
|
||||||
|
}
|
||||||
|
|
||||||
@GetMapping("/{fileId}")
|
@GetMapping("/{fileId}")
|
||||||
public ResponseEntity<Response<DatasetFileResponse>> getDatasetFileById(
|
public ResponseEntity<Response<DatasetFileResponse>> getDatasetFileById(
|
||||||
|
|||||||
@@ -14,14 +14,15 @@ import {
|
|||||||
import { formatBytes } from "@/utils/unit";
|
import { formatBytes } from "@/utils/unit";
|
||||||
import { useDebouncedEffect } from "@/hooks/useDebouncedEffect";
|
import { useDebouncedEffect } from "@/hooks/useDebouncedEffect";
|
||||||
|
|
||||||
interface DatasetFileTransferProps
|
interface DatasetFileTransferProps
|
||||||
extends React.HTMLAttributes<HTMLDivElement> {
|
extends React.HTMLAttributes<HTMLDivElement> {
|
||||||
open: boolean;
|
open: boolean;
|
||||||
selectedFilesMap: { [key: string]: DatasetFile };
|
selectedFilesMap: { [key: string]: DatasetFile };
|
||||||
onSelectedFilesChange: (filesMap: { [key: string]: DatasetFile }) => void;
|
onSelectedFilesChange: (filesMap: { [key: string]: DatasetFile }) => void;
|
||||||
onDatasetSelect?: (dataset: Dataset | null) => void;
|
onDatasetSelect?: (dataset: Dataset | null) => void;
|
||||||
datasetTypeFilter?: DatasetType;
|
datasetTypeFilter?: DatasetType;
|
||||||
}
|
hasAnnotationFilter?: boolean;
|
||||||
|
}
|
||||||
|
|
||||||
const fileCols = [
|
const fileCols = [
|
||||||
{
|
{
|
||||||
@@ -46,14 +47,15 @@ const fileCols = [
|
|||||||
];
|
];
|
||||||
|
|
||||||
// Customize Table Transfer
|
// Customize Table Transfer
|
||||||
const DatasetFileTransfer: React.FC<DatasetFileTransferProps> = ({
|
const DatasetFileTransfer: React.FC<DatasetFileTransferProps> = ({
|
||||||
open,
|
open,
|
||||||
selectedFilesMap,
|
selectedFilesMap,
|
||||||
onSelectedFilesChange,
|
onSelectedFilesChange,
|
||||||
onDatasetSelect,
|
onDatasetSelect,
|
||||||
datasetTypeFilter = DatasetType.TEXT,
|
datasetTypeFilter = DatasetType.TEXT,
|
||||||
...props
|
hasAnnotationFilter,
|
||||||
}) => {
|
...props
|
||||||
|
}) => {
|
||||||
const [datasets, setDatasets] = React.useState<Dataset[]>([]);
|
const [datasets, setDatasets] = React.useState<Dataset[]>([]);
|
||||||
const [datasetSearch, setDatasetSearch] = React.useState<string>("");
|
const [datasetSearch, setDatasetSearch] = React.useState<string>("");
|
||||||
const [datasetPagination, setDatasetPagination] = React.useState<{
|
const [datasetPagination, setDatasetPagination] = React.useState<{
|
||||||
@@ -64,11 +66,13 @@ const DatasetFileTransfer: React.FC<DatasetFileTransferProps> = ({
|
|||||||
|
|
||||||
const [files, setFiles] = React.useState<DatasetFile[]>([]);
|
const [files, setFiles] = React.useState<DatasetFile[]>([]);
|
||||||
const [filesSearch, setFilesSearch] = React.useState<string>("");
|
const [filesSearch, setFilesSearch] = React.useState<string>("");
|
||||||
const [filesPagination, setFilesPagination] = React.useState<{
|
const [filesPagination, setFilesPagination] = React.useState<{
|
||||||
current: number;
|
current: number;
|
||||||
pageSize: number;
|
pageSize: number;
|
||||||
total: number;
|
total: number;
|
||||||
}>({ current: 1, pageSize: 10, total: 0 });
|
}>({ current: 1, pageSize: 10, total: 0 });
|
||||||
|
const filesPage = filesPagination.current;
|
||||||
|
const filesPageSize = filesPagination.pageSize;
|
||||||
|
|
||||||
const [showFiles, setShowFiles] = React.useState<boolean>(false);
|
const [showFiles, setShowFiles] = React.useState<boolean>(false);
|
||||||
const [selectedDataset, setSelectedDataset] = React.useState<Dataset | null>(
|
const [selectedDataset, setSelectedDataset] = React.useState<Dataset | null>(
|
||||||
@@ -104,18 +108,19 @@ const DatasetFileTransfer: React.FC<DatasetFileTransferProps> = ({
|
|||||||
|
|
||||||
const fetchFiles = useCallback(
|
const fetchFiles = useCallback(
|
||||||
async (
|
async (
|
||||||
options?: Partial<{ page: number; pageSize: number; keyword: string }>
|
options?: Partial<{ page: number; pageSize: number; keyword: string }>
|
||||||
) => {
|
) => {
|
||||||
if (!selectedDataset) return;
|
if (!selectedDataset) return;
|
||||||
const page = options?.page ?? filesPagination.current;
|
const page = options?.page ?? filesPage;
|
||||||
const pageSize = options?.pageSize ?? filesPagination.pageSize;
|
const pageSize = options?.pageSize ?? filesPageSize;
|
||||||
const keyword = options?.keyword ?? filesSearch;
|
const keyword = options?.keyword ?? filesSearch;
|
||||||
|
|
||||||
const { data } = await queryDatasetFilesUsingGet(selectedDataset.id, {
|
const { data } = await queryDatasetFilesUsingGet(selectedDataset.id, {
|
||||||
page,
|
page,
|
||||||
size: pageSize,
|
size: pageSize,
|
||||||
keyword,
|
keyword,
|
||||||
});
|
...(hasAnnotationFilter ? { hasAnnotation: true } : {}),
|
||||||
|
});
|
||||||
setFiles(
|
setFiles(
|
||||||
(data.content || []).map((item: DatasetFile) => ({
|
(data.content || []).map((item: DatasetFile) => ({
|
||||||
...item,
|
...item,
|
||||||
@@ -130,9 +135,15 @@ const DatasetFileTransfer: React.FC<DatasetFileTransferProps> = ({
|
|||||||
pageSize,
|
pageSize,
|
||||||
total: data.totalElements,
|
total: data.totalElements,
|
||||||
}));
|
}));
|
||||||
},
|
},
|
||||||
[selectedDataset, filesPagination.current, filesPagination.pageSize, filesSearch]
|
[
|
||||||
);
|
selectedDataset,
|
||||||
|
filesPage,
|
||||||
|
filesPageSize,
|
||||||
|
filesSearch,
|
||||||
|
hasAnnotationFilter,
|
||||||
|
]
|
||||||
|
);
|
||||||
|
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
// 当数据集变化时,重置文件分页并拉取第一页文件,避免额外的循环请求
|
// 当数据集变化时,重置文件分页并拉取第一页文件,避免额外的循环请求
|
||||||
@@ -167,10 +178,11 @@ const DatasetFileTransfer: React.FC<DatasetFileTransferProps> = ({
|
|||||||
const allFiles: DatasetFile[] = [];
|
const allFiles: DatasetFile[] = [];
|
||||||
|
|
||||||
while (true) {
|
while (true) {
|
||||||
const { data } = await queryDatasetFilesUsingGet(selectedDataset.id, {
|
const { data } = await queryDatasetFilesUsingGet(selectedDataset.id, {
|
||||||
page,
|
page,
|
||||||
size: pageSize,
|
size: pageSize,
|
||||||
});
|
...(hasAnnotationFilter ? { hasAnnotation: true } : {}),
|
||||||
|
});
|
||||||
|
|
||||||
const content: DatasetFile[] = (data.content || []).map(
|
const content: DatasetFile[] = (data.content || []).map(
|
||||||
(item: DatasetFile) => ({
|
(item: DatasetFile) => ({
|
||||||
@@ -217,7 +229,7 @@ const DatasetFileTransfer: React.FC<DatasetFileTransferProps> = ({
|
|||||||
} finally {
|
} finally {
|
||||||
setSelectingAll(false);
|
setSelectingAll(false);
|
||||||
}
|
}
|
||||||
}, [selectedDataset, selectedFilesMap, onSelectedFilesChange]);
|
}, [selectedDataset, selectedFilesMap, onSelectedFilesChange, hasAnnotationFilter]);
|
||||||
|
|
||||||
const toggleSelectFile = (record: DatasetFile) => {
|
const toggleSelectFile = (record: DatasetFile) => {
|
||||||
if (!selectedFilesMap[record.id]) {
|
if (!selectedFilesMap[record.id]) {
|
||||||
@@ -388,10 +400,10 @@ const DatasetFileTransfer: React.FC<DatasetFileTransferProps> = ({
|
|||||||
},
|
},
|
||||||
|
|
||||||
// 全选 - 改为全选整个数据集而不是当前页
|
// 全选 - 改为全选整个数据集而不是当前页
|
||||||
onSelectAll: (selected, selectedRows: DatasetFile[]) => {
|
onSelectAll: (selected) => {
|
||||||
if (selected) {
|
if (selected) {
|
||||||
// 点击表头“全选”时,改为一键全选当前数据集的全部文件
|
// 点击表头“全选”时,改为一键全选当前数据集的全部文件
|
||||||
// 而不是只选中当前页
|
// 而不是只选中当前页
|
||||||
handleSelectAllInDataset();
|
handleSelectAllInDataset();
|
||||||
} else {
|
} else {
|
||||||
// 取消表头“全选”时,清空当前已选文件
|
// 取消表头“全选”时,清空当前已选文件
|
||||||
|
|||||||
@@ -16,14 +16,6 @@ import DatasetFileTransfer from "@/components/business/DatasetFileTransfer";
|
|||||||
import { DescriptionsItemType } from "antd/es/descriptions";
|
import { DescriptionsItemType } from "antd/es/descriptions";
|
||||||
import { DatasetFileCols } from "../knowledge-base.const";
|
import { DatasetFileCols } from "../knowledge-base.const";
|
||||||
|
|
||||||
const sliceOptions = [
|
|
||||||
{ label: "默认分块", value: "DEFAULT_CHUNK" },
|
|
||||||
{ label: "章节分块", value: "CHAPTER_CHUNK" },
|
|
||||||
{ label: "段落分块", value: "PARAGRAPH_CHUNK" },
|
|
||||||
{ label: "长度分块", value: "LENGTH_CHUNK" },
|
|
||||||
{ label: "自定义分割符分块", value: "CUSTOM_SEPARATOR_CHUNK" },
|
|
||||||
];
|
|
||||||
|
|
||||||
export default function AddDataDialog({ knowledgeBase, onDataAdded }) {
|
export default function AddDataDialog({ knowledgeBase, onDataAdded }) {
|
||||||
const [open, setOpen] = useState(false);
|
const [open, setOpen] = useState(false);
|
||||||
const { message } = App.useApp();
|
const { message } = App.useApp();
|
||||||
@@ -273,6 +265,7 @@ export default function AddDataDialog({ knowledgeBase, onDataAdded }) {
|
|||||||
open={open}
|
open={open}
|
||||||
selectedFilesMap={selectedFilesMap}
|
selectedFilesMap={selectedFilesMap}
|
||||||
onSelectedFilesChange={setSelectedFilesMap}
|
onSelectedFilesChange={setSelectedFilesMap}
|
||||||
|
hasAnnotationFilter
|
||||||
/>
|
/>
|
||||||
)}
|
)}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user