You've already forked DataMate
feat(data-import): 添加文本文件类型检测和按行分割功能
- 新增 TEXT_FILE_MIME_PREFIX、TEXT_FILE_MIME_TYPES 和 TEXT_FILE_EXTENSIONS 常量用于文本文件识别 - 添加 getUploadFileName、getUploadFileType 和 isTextUploadFile 工具函数 - 在 splitFileByLines 函数中集成文本文件类型检查 - 添加 hasNonTextFile useMemo 钩子来检测是否存在非文本文件 - 当存在非文本文件时禁用按行分割功能并重置开关状态 - 更新 Tooltip 提示内容以反映文件类型限制 - 使用 useCallback 优化 fetchCollectionTasks 和 resetState 函数 - 调整 useEffect 依赖数组以确保正确的重新渲染行为
This commit is contained in:
@@ -2,18 +2,82 @@ import { Select, Input, Form, Radio, Modal, Button, UploadFile, Switch, Tooltip
|
|||||||
import { InboxOutlined, QuestionCircleOutlined } from "@ant-design/icons";
|
import { InboxOutlined, QuestionCircleOutlined } from "@ant-design/icons";
|
||||||
import { dataSourceOptions } from "../../dataset.const";
|
import { dataSourceOptions } from "../../dataset.const";
|
||||||
import { Dataset, DataSource } from "../../dataset.model";
|
import { Dataset, DataSource } from "../../dataset.model";
|
||||||
import { useEffect, useState } from "react";
|
import { useCallback, useEffect, useMemo, useState } from "react";
|
||||||
import { queryTasksUsingGet } from "@/pages/DataCollection/collection.apis";
|
import { queryTasksUsingGet } from "@/pages/DataCollection/collection.apis";
|
||||||
import { updateDatasetByIdUsingPut } from "../../dataset.api";
|
import { updateDatasetByIdUsingPut } from "../../dataset.api";
|
||||||
import { sliceFile } from "@/utils/file.util";
|
import { sliceFile } from "@/utils/file.util";
|
||||||
import Dragger from "antd/es/upload/Dragger";
|
import Dragger from "antd/es/upload/Dragger";
|
||||||
|
|
||||||
|
const TEXT_FILE_MIME_PREFIX = "text/";
|
||||||
|
const TEXT_FILE_MIME_TYPES = new Set([
|
||||||
|
"application/json",
|
||||||
|
"application/xml",
|
||||||
|
"application/csv",
|
||||||
|
"application/ndjson",
|
||||||
|
"application/x-ndjson",
|
||||||
|
"application/x-yaml",
|
||||||
|
"application/yaml",
|
||||||
|
"application/javascript",
|
||||||
|
"application/x-javascript",
|
||||||
|
"application/sql",
|
||||||
|
]);
|
||||||
|
const TEXT_FILE_EXTENSIONS = new Set([
|
||||||
|
".txt",
|
||||||
|
".md",
|
||||||
|
".csv",
|
||||||
|
".tsv",
|
||||||
|
".json",
|
||||||
|
".jsonl",
|
||||||
|
".ndjson",
|
||||||
|
".log",
|
||||||
|
".xml",
|
||||||
|
".yaml",
|
||||||
|
".yml",
|
||||||
|
".sql",
|
||||||
|
]);
|
||||||
|
|
||||||
|
function getUploadFileName(file: UploadFile): string {
|
||||||
|
if (file.name) return file.name;
|
||||||
|
const originFile = file.originFileObj;
|
||||||
|
if (originFile instanceof File && originFile.name) {
|
||||||
|
return originFile.name;
|
||||||
|
}
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
|
||||||
|
function getUploadFileType(file: UploadFile): string {
|
||||||
|
if (file.type) return file.type;
|
||||||
|
const originFile = file.originFileObj;
|
||||||
|
if (originFile instanceof File && typeof originFile.type === "string") {
|
||||||
|
return originFile.type;
|
||||||
|
}
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
|
||||||
|
function isTextUploadFile(file: UploadFile): boolean {
|
||||||
|
const mimeType = getUploadFileType(file).toLowerCase();
|
||||||
|
if (mimeType) {
|
||||||
|
if (mimeType.startsWith(TEXT_FILE_MIME_PREFIX)) return true;
|
||||||
|
if (TEXT_FILE_MIME_TYPES.has(mimeType)) return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
const fileName = getUploadFileName(file);
|
||||||
|
const dotIndex = fileName.lastIndexOf(".");
|
||||||
|
if (dotIndex < 0) return false;
|
||||||
|
const ext = fileName.slice(dotIndex).toLowerCase();
|
||||||
|
return TEXT_FILE_EXTENSIONS.has(ext);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 按行分割文件
|
* 按行分割文件
|
||||||
* @param file 原始文件
|
* @param file 原始文件
|
||||||
* @returns 分割后的文件列表,每行一个文件
|
* @returns 分割后的文件列表,每行一个文件
|
||||||
*/
|
*/
|
||||||
async function splitFileByLines(file: UploadFile): Promise<UploadFile[]> {
|
async function splitFileByLines(file: UploadFile): Promise<UploadFile[]> {
|
||||||
|
if (!isTextUploadFile(file)) {
|
||||||
|
return [file];
|
||||||
|
}
|
||||||
|
|
||||||
const originFile = file.originFileObj ?? file;
|
const originFile = file.originFileObj ?? file;
|
||||||
if (!(originFile instanceof File) || typeof originFile.text !== "function") {
|
if (!(originFile instanceof File) || typeof originFile.text !== "function") {
|
||||||
return [file];
|
return [file];
|
||||||
@@ -90,6 +154,11 @@ export default function ImportConfiguration({
|
|||||||
splitByLine: false,
|
splitByLine: false,
|
||||||
});
|
});
|
||||||
const [currentPrefix, setCurrentPrefix] = useState<string>("");
|
const [currentPrefix, setCurrentPrefix] = useState<string>("");
|
||||||
|
const hasNonTextFile = useMemo(() => {
|
||||||
|
const files = importConfig.files ?? [];
|
||||||
|
if (files.length === 0) return false;
|
||||||
|
return files.some((file) => !isTextUploadFile(file));
|
||||||
|
}, [importConfig.files]);
|
||||||
|
|
||||||
// 本地上传文件相关逻辑
|
// 本地上传文件相关逻辑
|
||||||
|
|
||||||
@@ -98,7 +167,7 @@ export default function ImportConfiguration({
|
|||||||
(form.getFieldValue("files") as UploadFile[] | undefined) || [];
|
(form.getFieldValue("files") as UploadFile[] | undefined) || [];
|
||||||
|
|
||||||
// 如果启用分行分割,处理文件
|
// 如果启用分行分割,处理文件
|
||||||
if (importConfig.splitByLine) {
|
if (importConfig.splitByLine && !hasNonTextFile) {
|
||||||
const splitResults = await Promise.all(
|
const splitResults = await Promise.all(
|
||||||
filesToUpload.map((file) => splitFileByLines(file))
|
filesToUpload.map((file) => splitFileByLines(file))
|
||||||
);
|
);
|
||||||
@@ -131,7 +200,7 @@ export default function ImportConfiguration({
|
|||||||
);
|
);
|
||||||
};
|
};
|
||||||
|
|
||||||
const fetchCollectionTasks = async () => {
|
const fetchCollectionTasks = useCallback(async () => {
|
||||||
if (importConfig.source !== DataSource.COLLECTION) return;
|
if (importConfig.source !== DataSource.COLLECTION) return;
|
||||||
try {
|
try {
|
||||||
const res = await queryTasksUsingGet({ page: 0, size: 100 });
|
const res = await queryTasksUsingGet({ page: 0, size: 100 });
|
||||||
@@ -146,9 +215,9 @@ export default function ImportConfiguration({
|
|||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error("Error fetching collection tasks:", error);
|
console.error("Error fetching collection tasks:", error);
|
||||||
}
|
}
|
||||||
};
|
}, [importConfig.source]);
|
||||||
|
|
||||||
const resetState = () => {
|
const resetState = useCallback(() => {
|
||||||
console.log('[ImportConfiguration] resetState called, preserving currentPrefix:', currentPrefix);
|
console.log('[ImportConfiguration] resetState called, preserving currentPrefix:', currentPrefix);
|
||||||
form.resetFields();
|
form.resetFields();
|
||||||
form.setFieldsValue({ files: null });
|
form.setFieldsValue({ files: null });
|
||||||
@@ -158,7 +227,7 @@ export default function ImportConfiguration({
|
|||||||
splitByLine: false,
|
splitByLine: false,
|
||||||
});
|
});
|
||||||
console.log('[ImportConfiguration] resetState done, currentPrefix still:', currentPrefix);
|
console.log('[ImportConfiguration] resetState done, currentPrefix still:', currentPrefix);
|
||||||
};
|
}, [currentPrefix, form]);
|
||||||
|
|
||||||
const handleImportData = async () => {
|
const handleImportData = async () => {
|
||||||
if (!data) return;
|
if (!data) return;
|
||||||
@@ -180,14 +249,22 @@ export default function ImportConfiguration({
|
|||||||
resetState();
|
resetState();
|
||||||
fetchCollectionTasks();
|
fetchCollectionTasks();
|
||||||
}
|
}
|
||||||
}, [open]);
|
}, [fetchCollectionTasks, open, prefix, resetState]);
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
if (!importConfig.files?.length) return;
|
||||||
|
if (!importConfig.splitByLine) return;
|
||||||
|
if (!hasNonTextFile) return;
|
||||||
|
form.setFieldsValue({ splitByLine: false });
|
||||||
|
setImportConfig((prev) => ({ ...prev, splitByLine: false }));
|
||||||
|
}, [form, hasNonTextFile, importConfig.files, importConfig.splitByLine]);
|
||||||
|
|
||||||
// Separate effect for fetching collection tasks when source changes
|
// Separate effect for fetching collection tasks when source changes
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
if (open && importConfig.source === DataSource.COLLECTION) {
|
if (open && importConfig.source === DataSource.COLLECTION) {
|
||||||
fetchCollectionTasks();
|
fetchCollectionTasks();
|
||||||
}
|
}
|
||||||
}, [importConfig.source]);
|
}, [fetchCollectionTasks, importConfig.source, open]);
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<Modal
|
<Modal
|
||||||
@@ -290,7 +367,13 @@ export default function ImportConfiguration({
|
|||||||
label={
|
label={
|
||||||
<span>
|
<span>
|
||||||
按分行分割{" "}
|
按分行分割{" "}
|
||||||
<Tooltip title="选中后,文本文件的每一行将被分割成独立文件">
|
<Tooltip
|
||||||
|
title={
|
||||||
|
hasNonTextFile
|
||||||
|
? "已选择非文本文件,无法按行分割"
|
||||||
|
: "选中后,文本文件的每一行将被分割成独立文件"
|
||||||
|
}
|
||||||
|
>
|
||||||
<QuestionCircleOutlined style={{ color: "#999" }} />
|
<QuestionCircleOutlined style={{ color: "#999" }} />
|
||||||
</Tooltip>
|
</Tooltip>
|
||||||
</span>
|
</span>
|
||||||
@@ -298,7 +381,7 @@ export default function ImportConfiguration({
|
|||||||
name="splitByLine"
|
name="splitByLine"
|
||||||
valuePropName="checked"
|
valuePropName="checked"
|
||||||
>
|
>
|
||||||
<Switch />
|
<Switch disabled={hasNonTextFile} />
|
||||||
</Form.Item>
|
</Form.Item>
|
||||||
<Form.Item
|
<Form.Item
|
||||||
label="上传文件"
|
label="上传文件"
|
||||||
|
|||||||
Reference in New Issue
Block a user