Files
DataMate/frontend/src/pages/DataManagement/Detail/components/ImportConfiguration.tsx

531 lines
16 KiB
TypeScript

import { Select, Input, Form, Radio, Modal, Button, UploadFile, Switch, Tooltip } from "antd";
import { InboxOutlined, QuestionCircleOutlined } from "@ant-design/icons";
import { dataSourceOptions } from "../../dataset.const";
import { Dataset, DatasetType, DataSource } from "../../dataset.model";
import { useCallback, useEffect, useMemo, useState } from "react";
import { queryTasksUsingGet } from "@/pages/DataCollection/collection.apis";
import { updateDatasetByIdUsingPut } from "../../dataset.api";
import { sliceFile, shouldStreamUpload } from "@/utils/file.util";
import Dragger from "antd/es/upload/Dragger";
const TEXT_FILE_MIME_PREFIX = "text/";
const TEXT_FILE_MIME_TYPES = new Set([
"application/json",
"application/xml",
"application/csv",
"application/ndjson",
"application/x-ndjson",
"application/x-yaml",
"application/yaml",
"application/javascript",
"application/x-javascript",
"application/sql",
]);
const TEXT_FILE_EXTENSIONS = new Set([
".txt",
".md",
".csv",
".tsv",
".json",
".jsonl",
".ndjson",
".log",
".xml",
".yaml",
".yml",
".sql",
]);
function getUploadFileName(file: UploadFile): string {
if (file.name) return file.name;
const originFile = file.originFileObj;
if (originFile instanceof File && originFile.name) {
return originFile.name;
}
return "";
}
function getUploadFileType(file: UploadFile): string {
if (file.type) return file.type;
const originFile = file.originFileObj;
if (originFile instanceof File && typeof originFile.type === "string") {
return originFile.type;
}
return "";
}
function isTextUploadFile(file: UploadFile): boolean {
const mimeType = getUploadFileType(file).toLowerCase();
if (mimeType) {
if (mimeType.startsWith(TEXT_FILE_MIME_PREFIX)) return true;
if (TEXT_FILE_MIME_TYPES.has(mimeType)) return true;
}
const fileName = getUploadFileName(file);
const dotIndex = fileName.lastIndexOf(".");
if (dotIndex < 0) return false;
const ext = fileName.slice(dotIndex).toLowerCase();
return TEXT_FILE_EXTENSIONS.has(ext);
}
/**
* 按行分割文件
* @param file 原始文件
* @returns 分割后的文件列表,每行一个文件
*/
async function splitFileByLines(file: UploadFile): Promise<UploadFile[]> {
if (!isTextUploadFile(file)) {
return [file];
}
const originFile = file.originFileObj ?? file;
if (!(originFile instanceof File) || typeof originFile.text !== "function") {
return [file];
}
const text = await originFile.text();
if (!text) return [file];
// 按行分割并过滤空行
const lines = text.split(/\r?\n/).filter((line: string) => line.trim() !== "");
if (lines.length === 0) return [];
// 生成文件名:原文件名_序号(不保留后缀)
const nameParts = file.name.split(".");
if (nameParts.length > 1) {
nameParts.pop();
}
const baseName = nameParts.join(".");
const padLength = String(lines.length).length;
return lines.map((line: string, index: number) => {
const newFileName = `${baseName}_${String(index + 1).padStart(padLength, "0")}`;
const blob = new Blob([line], { type: "text/plain" });
const newFile = new File([blob], newFileName, { type: "text/plain" });
return {
uid: `${file.uid}-${index}`,
name: newFileName,
size: newFile.size,
type: "text/plain",
originFileObj: newFile as UploadFile["originFileObj"],
} as UploadFile;
});
}
type SelectOption = {
label: string;
value: string;
};
type CollectionTask = {
id: string;
name: string;
};
type ImportConfig = {
source: DataSource;
hasArchive: boolean;
splitByLine: boolean;
files?: UploadFile[];
dataSource?: string;
target?: DataSource;
[key: string]: unknown;
};
export default function ImportConfiguration({
data,
open,
onClose,
updateEvent = "update:dataset",
prefix,
}: {
data: Dataset | null;
open: boolean;
onClose: () => void;
updateEvent?: string;
prefix?: string;
}) {
const [form] = Form.useForm();
const [collectionOptions, setCollectionOptions] = useState<SelectOption[]>([]);
const availableSourceOptions = dataSourceOptions.filter(
(option) => option.value !== DataSource.COLLECTION
);
const [importConfig, setImportConfig] = useState<ImportConfig>({
source: DataSource.UPLOAD,
hasArchive: true,
splitByLine: false,
});
const [currentPrefix, setCurrentPrefix] = useState<string>("");
const hasNonTextFile = useMemo(() => {
const files = importConfig.files ?? [];
if (files.length === 0) return false;
return files.some((file) => !isTextUploadFile(file));
}, [importConfig.files]);
const isTextDataset = data?.datasetType === DatasetType.TEXT;
// 本地上传文件相关逻辑
const handleUpload = async (dataset: Dataset) => {
const filesToUpload =
(form.getFieldValue("files") as UploadFile[] | undefined) || [];
// 如果启用分行分割,对大文件使用流式处理
if (importConfig.splitByLine && !hasNonTextFile) {
// 检查是否有大文件需要流式分割上传
const filesForStreamUpload: File[] = [];
const filesForNormalUpload: UploadFile[] = [];
for (const file of filesToUpload) {
const originFile = file.originFileObj ?? file;
if (originFile instanceof File && shouldStreamUpload(originFile)) {
filesForStreamUpload.push(originFile);
} else {
filesForNormalUpload.push(file);
}
}
// 大文件使用流式分割上传
if (filesForStreamUpload.length > 0) {
window.dispatchEvent(
new CustomEvent("upload:dataset-stream", {
detail: {
dataset,
files: filesForStreamUpload,
updateEvent,
hasArchive: importConfig.hasArchive,
prefix: currentPrefix,
},
})
);
}
// 小文件使用传统分割方式
if (filesForNormalUpload.length > 0) {
const splitResults = await Promise.all(
filesForNormalUpload.map((file) => splitFileByLines(file))
);
const smallFilesToUpload = splitResults.flat();
// 计算分片列表
const sliceList = smallFilesToUpload.map((file) => {
const originFile = (file.originFileObj ?? file) as Blob;
const slices = sliceFile(originFile);
return {
originFile: originFile,
slices,
name: file.name,
size: originFile.size || 0,
};
});
console.log("[ImportConfiguration] Uploading small files with currentPrefix:", currentPrefix);
window.dispatchEvent(
new CustomEvent("upload:dataset", {
detail: {
dataset,
files: sliceList,
updateEvent,
hasArchive: importConfig.hasArchive,
prefix: currentPrefix,
},
})
);
}
return;
}
// 未启用分行分割,使用普通上传
// 计算分片列表
const sliceList = filesToUpload.map((file) => {
const originFile = (file.originFileObj ?? file) as Blob;
const slices = sliceFile(originFile);
return {
originFile: originFile, // 传入真正的 File/Blob 对象
slices,
name: file.name,
size: originFile.size || 0,
};
});
console.log("[ImportConfiguration] Uploading with currentPrefix:", currentPrefix);
window.dispatchEvent(
new CustomEvent("upload:dataset", {
detail: {
dataset,
files: sliceList,
updateEvent,
hasArchive: importConfig.hasArchive,
prefix: currentPrefix,
},
})
);
};
const fetchCollectionTasks = useCallback(async () => {
if (importConfig.source !== DataSource.COLLECTION) return;
try {
const res = await queryTasksUsingGet({ page: 0, size: 100 });
const tasks = Array.isArray(res?.data?.content)
? (res.data.content as CollectionTask[])
: [];
const options = tasks.map((task) => ({
label: task.name,
value: task.id,
}));
setCollectionOptions(options);
} catch (error) {
console.error("Error fetching collection tasks:", error);
}
}, [importConfig.source]);
const resetState = useCallback(() => {
console.log('[ImportConfiguration] resetState called, preserving currentPrefix:', currentPrefix);
form.resetFields();
form.setFieldsValue({ files: null });
setImportConfig({
source: DataSource.UPLOAD,
hasArchive: true,
splitByLine: false,
});
console.log('[ImportConfiguration] resetState done, currentPrefix still:', currentPrefix);
}, [currentPrefix, form]);
const handleImportData = async () => {
if (!data) return;
console.log('[ImportConfiguration] handleImportData called, currentPrefix:', currentPrefix);
if (importConfig.source === DataSource.UPLOAD) {
// 立即显示任务中心,让用户感知上传已开始(在文件分割等耗时操作之前)
window.dispatchEvent(
new CustomEvent("show:task-popover", { detail: { show: true } })
);
await handleUpload(data);
} else if (importConfig.source === DataSource.COLLECTION) {
await updateDatasetByIdUsingPut(data.id, {
...importConfig,
});
}
onClose();
};
useEffect(() => {
if (open) {
setCurrentPrefix(prefix || "");
console.log('[ImportConfiguration] Modal opened with prefix:', prefix);
resetState();
fetchCollectionTasks();
}
}, [fetchCollectionTasks, open, prefix, resetState]);
useEffect(() => {
if (!importConfig.files?.length) return;
if (!importConfig.splitByLine) return;
if (!hasNonTextFile) return;
form.setFieldsValue({ splitByLine: false });
setImportConfig((prev) => ({ ...prev, splitByLine: false }));
}, [form, hasNonTextFile, importConfig.files, importConfig.splitByLine]);
// Separate effect for fetching collection tasks when source changes
useEffect(() => {
if (open && importConfig.source === DataSource.COLLECTION) {
fetchCollectionTasks();
}
}, [fetchCollectionTasks, importConfig.source, open]);
return (
<Modal
title="导入数据"
open={open}
width={600}
onCancel={() => {
onClose();
resetState();
}}
maskClosable={false}
footer={
<>
<Button onClick={onClose}></Button>
<Button
type="primary"
disabled={!importConfig?.files?.length && !importConfig.dataSource}
onClick={handleImportData}
>
</Button>
</>
}
>
<Form
form={form}
layout="vertical"
initialValues={importConfig || {}}
onValuesChange={(_, allValues) => setImportConfig(allValues)}
>
<Form.Item
label="数据源"
name="source"
rules={[{ required: true, message: "请选择数据源" }]}
>
<Radio.Group
buttonStyle="solid"
options={availableSourceOptions}
optionType="button"
/>
</Form.Item>
{importConfig?.source === DataSource.COLLECTION && (
<Form.Item name="dataSource" label="归集任务" required>
<Select placeholder="请选择归集任务" options={collectionOptions} />
</Form.Item>
)}
{/* obs import */}
{importConfig?.source === DataSource.OBS && (
<div className="grid grid-cols-2 gap-3 p-4 bg-blue-50 rounded-lg">
<Form.Item
name="endpoint"
rules={[{ required: true }]}
label="Endpoint"
>
<Input
className="h-8 text-xs"
placeholder="obs.cn-north-4.myhuaweicloud.com"
/>
</Form.Item>
<Form.Item
name="bucket"
rules={[{ required: true }]}
label="Bucket"
>
<Input className="h-8 text-xs" placeholder="my-bucket" />
</Form.Item>
<Form.Item
name="accessKey"
rules={[{ required: true }]}
label="Access Key"
>
<Input className="h-8 text-xs" placeholder="Access Key" />
</Form.Item>
<Form.Item
name="secretKey"
rules={[{ required: true }]}
label="Secret Key"
>
<Input
type="password"
className="h-8 text-xs"
placeholder="Secret Key"
/>
</Form.Item>
</div>
)}
{/* Local Upload Component */}
{importConfig?.source === DataSource.UPLOAD && (
<>
<Form.Item
label="自动解压上传的压缩包"
name="hasArchive"
valuePropName="checked"
>
<Switch />
</Form.Item>
{isTextDataset && (
<Form.Item
label={
<span>
{" "}
<Tooltip
title={
hasNonTextFile
? "已选择非文本文件,无法按行分割"
: "选中后,文本文件的每一行将被分割成独立文件"
}
>
<QuestionCircleOutlined style={{ color: "#999" }} />
</Tooltip>
</span>
}
name="splitByLine"
valuePropName="checked"
>
<Switch disabled={hasNonTextFile} />
</Form.Item>
)}
<Form.Item
label="上传文件"
name="files"
valuePropName="fileList"
getValueFromEvent={(
event: { fileList?: UploadFile[] } | UploadFile[]
) => {
if (Array.isArray(event)) {
return event;
}
return event?.fileList;
}}
rules={[
{
required: true,
message: "请上传文件",
},
]}
>
<Dragger
className="w-full"
beforeUpload={() => false}
multiple
>
<p className="ant-upload-drag-icon">
<InboxOutlined />
</p>
<p className="ant-upload-text"></p>
<p className="ant-upload-hint"></p>
</Dragger>
</Form.Item>
</>
)}
{/* Target Configuration */}
{importConfig?.target && importConfig?.target !== DataSource.UPLOAD && (
<div className="space-y-3 p-4 bg-blue-50 rounded-lg">
{importConfig?.target === DataSource.DATABASE && (
<div className="grid grid-cols-2 gap-3">
<Form.Item
name="databaseType"
rules={[{ required: true }]}
label="数据库类型"
>
<Select
className="w-full"
options={[
{ label: "MySQL", value: "mysql" },
{ label: "PostgreSQL", value: "postgresql" },
{ label: "MongoDB", value: "mongodb" },
]}
></Select>
</Form.Item>
<Form.Item
name="tableName"
rules={[{ required: true }]}
label="表名"
>
<Input className="h-8 text-xs" placeholder="dataset_table" />
</Form.Item>
<Form.Item
name="connectionString"
rules={[{ required: true }]}
label="连接字符串"
>
<Input
className="h-8 text-xs col-span-2"
placeholder="数据库连接字符串"
/>
</Form.Item>
</div>
)}
</div>
)}
</Form>
</Modal>
);
}