feat(data-management): 扩展源文档排除功能支持Excel文件类型

- 在后端服务中扩展源文档类型检查,新增对XLS和XLSX文件的支持
- 修改DatasetFileApplicationService中的过滤逻辑,统一处理所有源文档类型
- 新增isSourceDocument和isDerivedFile辅助方法进行文件类型判断
- 更新前端DatasetFileTransfer组件中的注释说明
- 在Python运行时依赖中添加openpyxl和xlrd库以支持Excel文件处理
- 修改标注项目接口中源文档类型的集合定义
- 更新文件操作钩子中的派生文件排除逻辑
This commit is contained in:
2026-01-31 11:30:55 +08:00
parent 6c7ea0c25e
commit b5d7c66240
8 changed files with 210 additions and 119 deletions

View File

@@ -1,7 +1,8 @@
import type {
Dataset,
DatasetFile,
} from "@/pages/DataManagement/dataset.model";
import type {
Dataset,
DatasetFile,
} from "@/pages/DataManagement/dataset.model";
import { DatasetType } from "@/pages/DataManagement/dataset.model";
import { App } from "antd";
import { useState } from "react";
import {
@@ -51,12 +52,14 @@ export function useFilesOperation(dataset: Dataset) {
) => {
// 如果明确传了 prefix(包括空字符串),使用传入的值;否则使用当前 pagination.prefix
const targetPrefix = prefix !== undefined ? prefix : (pagination.prefix || '');
const shouldExcludeDerivedFiles = dataset?.datasetType === DatasetType.TEXT;
const params: DatasetFilesQueryParams = {
page: current !== undefined ? current : pagination.current,
size: pageSize !== undefined ? pageSize : pagination.pageSize,
isWithDirectory: true,
prefix: targetPrefix,
...(shouldExcludeDerivedFiles ? { excludeDerivedFiles: true } : {}),
};
const { data } = await queryDatasetFilesUsingGet(id!, params);
@@ -245,4 +248,5 @@ interface DatasetFilesQueryParams {
size: number;
isWithDirectory: boolean;
prefix: string;
excludeDerivedFiles?: boolean;
}