You've already forked DataMate
531 lines
16 KiB
TypeScript
531 lines
16 KiB
TypeScript
import { Select, Input, Form, Radio, Modal, Button, UploadFile, Switch, Tooltip } from "antd";
|
|
import { InboxOutlined, QuestionCircleOutlined } from "@ant-design/icons";
|
|
import { dataSourceOptions } from "../../dataset.const";
|
|
import { Dataset, DatasetType, DataSource } from "../../dataset.model";
|
|
import { useCallback, useEffect, useMemo, useState } from "react";
|
|
import { queryTasksUsingGet } from "@/pages/DataCollection/collection.apis";
|
|
import { updateDatasetByIdUsingPut } from "../../dataset.api";
|
|
import { sliceFile, shouldStreamUpload } from "@/utils/file.util";
|
|
import Dragger from "antd/es/upload/Dragger";
|
|
|
|
const TEXT_FILE_MIME_PREFIX = "text/";
|
|
const TEXT_FILE_MIME_TYPES = new Set([
|
|
"application/json",
|
|
"application/xml",
|
|
"application/csv",
|
|
"application/ndjson",
|
|
"application/x-ndjson",
|
|
"application/x-yaml",
|
|
"application/yaml",
|
|
"application/javascript",
|
|
"application/x-javascript",
|
|
"application/sql",
|
|
]);
|
|
const TEXT_FILE_EXTENSIONS = new Set([
|
|
".txt",
|
|
".md",
|
|
".csv",
|
|
".tsv",
|
|
".json",
|
|
".jsonl",
|
|
".ndjson",
|
|
".log",
|
|
".xml",
|
|
".yaml",
|
|
".yml",
|
|
".sql",
|
|
]);
|
|
|
|
function getUploadFileName(file: UploadFile): string {
|
|
if (file.name) return file.name;
|
|
const originFile = file.originFileObj;
|
|
if (originFile instanceof File && originFile.name) {
|
|
return originFile.name;
|
|
}
|
|
return "";
|
|
}
|
|
|
|
function getUploadFileType(file: UploadFile): string {
|
|
if (file.type) return file.type;
|
|
const originFile = file.originFileObj;
|
|
if (originFile instanceof File && typeof originFile.type === "string") {
|
|
return originFile.type;
|
|
}
|
|
return "";
|
|
}
|
|
|
|
function isTextUploadFile(file: UploadFile): boolean {
|
|
const mimeType = getUploadFileType(file).toLowerCase();
|
|
if (mimeType) {
|
|
if (mimeType.startsWith(TEXT_FILE_MIME_PREFIX)) return true;
|
|
if (TEXT_FILE_MIME_TYPES.has(mimeType)) return true;
|
|
}
|
|
|
|
const fileName = getUploadFileName(file);
|
|
const dotIndex = fileName.lastIndexOf(".");
|
|
if (dotIndex < 0) return false;
|
|
const ext = fileName.slice(dotIndex).toLowerCase();
|
|
return TEXT_FILE_EXTENSIONS.has(ext);
|
|
}
|
|
|
|
/**
|
|
* 按行分割文件
|
|
* @param file 原始文件
|
|
* @returns 分割后的文件列表,每行一个文件
|
|
*/
|
|
async function splitFileByLines(file: UploadFile): Promise<UploadFile[]> {
|
|
if (!isTextUploadFile(file)) {
|
|
return [file];
|
|
}
|
|
|
|
const originFile = file.originFileObj ?? file;
|
|
if (!(originFile instanceof File) || typeof originFile.text !== "function") {
|
|
return [file];
|
|
}
|
|
|
|
const text = await originFile.text();
|
|
if (!text) return [file];
|
|
|
|
// 按行分割并过滤空行
|
|
const lines = text.split(/\r?\n/).filter((line: string) => line.trim() !== "");
|
|
if (lines.length === 0) return [];
|
|
|
|
// 生成文件名:原文件名_序号(不保留后缀)
|
|
const nameParts = file.name.split(".");
|
|
if (nameParts.length > 1) {
|
|
nameParts.pop();
|
|
}
|
|
const baseName = nameParts.join(".");
|
|
const padLength = String(lines.length).length;
|
|
|
|
return lines.map((line: string, index: number) => {
|
|
const newFileName = `${baseName}_${String(index + 1).padStart(padLength, "0")}`;
|
|
const blob = new Blob([line], { type: "text/plain" });
|
|
const newFile = new File([blob], newFileName, { type: "text/plain" });
|
|
return {
|
|
uid: `${file.uid}-${index}`,
|
|
name: newFileName,
|
|
size: newFile.size,
|
|
type: "text/plain",
|
|
originFileObj: newFile as UploadFile["originFileObj"],
|
|
} as UploadFile;
|
|
});
|
|
}
|
|
|
|
type SelectOption = {
|
|
label: string;
|
|
value: string;
|
|
};
|
|
|
|
type CollectionTask = {
|
|
id: string;
|
|
name: string;
|
|
};
|
|
|
|
type ImportConfig = {
|
|
source: DataSource;
|
|
hasArchive: boolean;
|
|
splitByLine: boolean;
|
|
files?: UploadFile[];
|
|
dataSource?: string;
|
|
target?: DataSource;
|
|
[key: string]: unknown;
|
|
};
|
|
|
|
export default function ImportConfiguration({
|
|
data,
|
|
open,
|
|
onClose,
|
|
updateEvent = "update:dataset",
|
|
prefix,
|
|
}: {
|
|
data: Dataset | null;
|
|
open: boolean;
|
|
onClose: () => void;
|
|
updateEvent?: string;
|
|
prefix?: string;
|
|
}) {
|
|
const [form] = Form.useForm();
|
|
const [collectionOptions, setCollectionOptions] = useState<SelectOption[]>([]);
|
|
const availableSourceOptions = dataSourceOptions.filter(
|
|
(option) => option.value !== DataSource.COLLECTION
|
|
);
|
|
const [importConfig, setImportConfig] = useState<ImportConfig>({
|
|
source: DataSource.UPLOAD,
|
|
hasArchive: true,
|
|
splitByLine: false,
|
|
});
|
|
const [currentPrefix, setCurrentPrefix] = useState<string>("");
|
|
const hasNonTextFile = useMemo(() => {
|
|
const files = importConfig.files ?? [];
|
|
if (files.length === 0) return false;
|
|
return files.some((file) => !isTextUploadFile(file));
|
|
}, [importConfig.files]);
|
|
const isTextDataset = data?.datasetType === DatasetType.TEXT;
|
|
|
|
// 本地上传文件相关逻辑
|
|
|
|
const handleUpload = async (dataset: Dataset) => {
|
|
const filesToUpload =
|
|
(form.getFieldValue("files") as UploadFile[] | undefined) || [];
|
|
|
|
// 如果启用分行分割,对大文件使用流式处理
|
|
if (importConfig.splitByLine && !hasNonTextFile) {
|
|
// 检查是否有大文件需要流式分割上传
|
|
const filesForStreamUpload: File[] = [];
|
|
const filesForNormalUpload: UploadFile[] = [];
|
|
|
|
for (const file of filesToUpload) {
|
|
const originFile = file.originFileObj ?? file;
|
|
if (originFile instanceof File && shouldStreamUpload(originFile)) {
|
|
filesForStreamUpload.push(originFile);
|
|
} else {
|
|
filesForNormalUpload.push(file);
|
|
}
|
|
}
|
|
|
|
// 大文件使用流式分割上传
|
|
if (filesForStreamUpload.length > 0) {
|
|
window.dispatchEvent(
|
|
new CustomEvent("upload:dataset-stream", {
|
|
detail: {
|
|
dataset,
|
|
files: filesForStreamUpload,
|
|
updateEvent,
|
|
hasArchive: importConfig.hasArchive,
|
|
prefix: currentPrefix,
|
|
},
|
|
})
|
|
);
|
|
}
|
|
|
|
// 小文件使用传统分割方式
|
|
if (filesForNormalUpload.length > 0) {
|
|
const splitResults = await Promise.all(
|
|
filesForNormalUpload.map((file) => splitFileByLines(file))
|
|
);
|
|
const smallFilesToUpload = splitResults.flat();
|
|
|
|
// 计算分片列表
|
|
const sliceList = smallFilesToUpload.map((file) => {
|
|
const originFile = (file.originFileObj ?? file) as Blob;
|
|
const slices = sliceFile(originFile);
|
|
return {
|
|
originFile: originFile,
|
|
slices,
|
|
name: file.name,
|
|
size: originFile.size || 0,
|
|
};
|
|
});
|
|
|
|
console.log("[ImportConfiguration] Uploading small files with currentPrefix:", currentPrefix);
|
|
window.dispatchEvent(
|
|
new CustomEvent("upload:dataset", {
|
|
detail: {
|
|
dataset,
|
|
files: sliceList,
|
|
updateEvent,
|
|
hasArchive: importConfig.hasArchive,
|
|
prefix: currentPrefix,
|
|
},
|
|
})
|
|
);
|
|
}
|
|
return;
|
|
}
|
|
|
|
// 未启用分行分割,使用普通上传
|
|
// 计算分片列表
|
|
const sliceList = filesToUpload.map((file) => {
|
|
const originFile = (file.originFileObj ?? file) as Blob;
|
|
const slices = sliceFile(originFile);
|
|
return {
|
|
originFile: originFile, // 传入真正的 File/Blob 对象
|
|
slices,
|
|
name: file.name,
|
|
size: originFile.size || 0,
|
|
};
|
|
});
|
|
|
|
console.log("[ImportConfiguration] Uploading with currentPrefix:", currentPrefix);
|
|
window.dispatchEvent(
|
|
new CustomEvent("upload:dataset", {
|
|
detail: {
|
|
dataset,
|
|
files: sliceList,
|
|
updateEvent,
|
|
hasArchive: importConfig.hasArchive,
|
|
prefix: currentPrefix,
|
|
},
|
|
})
|
|
);
|
|
};
|
|
|
|
const fetchCollectionTasks = useCallback(async () => {
|
|
if (importConfig.source !== DataSource.COLLECTION) return;
|
|
try {
|
|
const res = await queryTasksUsingGet({ page: 0, size: 100 });
|
|
const tasks = Array.isArray(res?.data?.content)
|
|
? (res.data.content as CollectionTask[])
|
|
: [];
|
|
const options = tasks.map((task) => ({
|
|
label: task.name,
|
|
value: task.id,
|
|
}));
|
|
setCollectionOptions(options);
|
|
} catch (error) {
|
|
console.error("Error fetching collection tasks:", error);
|
|
}
|
|
}, [importConfig.source]);
|
|
|
|
const resetState = useCallback(() => {
|
|
console.log('[ImportConfiguration] resetState called, preserving currentPrefix:', currentPrefix);
|
|
form.resetFields();
|
|
form.setFieldsValue({ files: null });
|
|
setImportConfig({
|
|
source: DataSource.UPLOAD,
|
|
hasArchive: true,
|
|
splitByLine: false,
|
|
});
|
|
console.log('[ImportConfiguration] resetState done, currentPrefix still:', currentPrefix);
|
|
}, [currentPrefix, form]);
|
|
|
|
const handleImportData = async () => {
|
|
if (!data) return;
|
|
console.log('[ImportConfiguration] handleImportData called, currentPrefix:', currentPrefix);
|
|
if (importConfig.source === DataSource.UPLOAD) {
|
|
// 立即显示任务中心,让用户感知上传已开始(在文件分割等耗时操作之前)
|
|
window.dispatchEvent(
|
|
new CustomEvent("show:task-popover", { detail: { show: true } })
|
|
);
|
|
await handleUpload(data);
|
|
} else if (importConfig.source === DataSource.COLLECTION) {
|
|
await updateDatasetByIdUsingPut(data.id, {
|
|
...importConfig,
|
|
});
|
|
}
|
|
onClose();
|
|
};
|
|
|
|
useEffect(() => {
|
|
if (open) {
|
|
setCurrentPrefix(prefix || "");
|
|
console.log('[ImportConfiguration] Modal opened with prefix:', prefix);
|
|
resetState();
|
|
fetchCollectionTasks();
|
|
}
|
|
}, [fetchCollectionTasks, open, prefix, resetState]);
|
|
|
|
useEffect(() => {
|
|
if (!importConfig.files?.length) return;
|
|
if (!importConfig.splitByLine) return;
|
|
if (!hasNonTextFile) return;
|
|
form.setFieldsValue({ splitByLine: false });
|
|
setImportConfig((prev) => ({ ...prev, splitByLine: false }));
|
|
}, [form, hasNonTextFile, importConfig.files, importConfig.splitByLine]);
|
|
|
|
// Separate effect for fetching collection tasks when source changes
|
|
useEffect(() => {
|
|
if (open && importConfig.source === DataSource.COLLECTION) {
|
|
fetchCollectionTasks();
|
|
}
|
|
}, [fetchCollectionTasks, importConfig.source, open]);
|
|
|
|
return (
|
|
<Modal
|
|
title="导入数据"
|
|
open={open}
|
|
width={600}
|
|
onCancel={() => {
|
|
onClose();
|
|
resetState();
|
|
}}
|
|
maskClosable={false}
|
|
footer={
|
|
<>
|
|
<Button onClick={onClose}>取消</Button>
|
|
<Button
|
|
type="primary"
|
|
disabled={!importConfig?.files?.length && !importConfig.dataSource}
|
|
onClick={handleImportData}
|
|
>
|
|
确定
|
|
</Button>
|
|
</>
|
|
}
|
|
>
|
|
<Form
|
|
form={form}
|
|
layout="vertical"
|
|
initialValues={importConfig || {}}
|
|
onValuesChange={(_, allValues) => setImportConfig(allValues)}
|
|
>
|
|
<Form.Item
|
|
label="数据源"
|
|
name="source"
|
|
rules={[{ required: true, message: "请选择数据源" }]}
|
|
>
|
|
<Radio.Group
|
|
buttonStyle="solid"
|
|
options={availableSourceOptions}
|
|
optionType="button"
|
|
/>
|
|
</Form.Item>
|
|
{importConfig?.source === DataSource.COLLECTION && (
|
|
<Form.Item name="dataSource" label="归集任务" required>
|
|
<Select placeholder="请选择归集任务" options={collectionOptions} />
|
|
</Form.Item>
|
|
)}
|
|
|
|
{/* obs import */}
|
|
{importConfig?.source === DataSource.OBS && (
|
|
<div className="grid grid-cols-2 gap-3 p-4 bg-blue-50 rounded-lg">
|
|
<Form.Item
|
|
name="endpoint"
|
|
rules={[{ required: true }]}
|
|
label="Endpoint"
|
|
>
|
|
<Input
|
|
className="h-8 text-xs"
|
|
placeholder="obs.cn-north-4.myhuaweicloud.com"
|
|
/>
|
|
</Form.Item>
|
|
<Form.Item
|
|
name="bucket"
|
|
rules={[{ required: true }]}
|
|
label="Bucket"
|
|
>
|
|
<Input className="h-8 text-xs" placeholder="my-bucket" />
|
|
</Form.Item>
|
|
<Form.Item
|
|
name="accessKey"
|
|
rules={[{ required: true }]}
|
|
label="Access Key"
|
|
>
|
|
<Input className="h-8 text-xs" placeholder="Access Key" />
|
|
</Form.Item>
|
|
<Form.Item
|
|
name="secretKey"
|
|
rules={[{ required: true }]}
|
|
label="Secret Key"
|
|
>
|
|
<Input
|
|
type="password"
|
|
className="h-8 text-xs"
|
|
placeholder="Secret Key"
|
|
/>
|
|
</Form.Item>
|
|
</div>
|
|
)}
|
|
|
|
{/* Local Upload Component */}
|
|
{importConfig?.source === DataSource.UPLOAD && (
|
|
<>
|
|
<Form.Item
|
|
label="自动解压上传的压缩包"
|
|
name="hasArchive"
|
|
valuePropName="checked"
|
|
>
|
|
<Switch />
|
|
</Form.Item>
|
|
{isTextDataset && (
|
|
<Form.Item
|
|
label={
|
|
<span>
|
|
按分行分割{" "}
|
|
<Tooltip
|
|
title={
|
|
hasNonTextFile
|
|
? "已选择非文本文件,无法按行分割"
|
|
: "选中后,文本文件的每一行将被分割成独立文件"
|
|
}
|
|
>
|
|
<QuestionCircleOutlined style={{ color: "#999" }} />
|
|
</Tooltip>
|
|
</span>
|
|
}
|
|
name="splitByLine"
|
|
valuePropName="checked"
|
|
>
|
|
<Switch disabled={hasNonTextFile} />
|
|
</Form.Item>
|
|
)}
|
|
<Form.Item
|
|
label="上传文件"
|
|
name="files"
|
|
valuePropName="fileList"
|
|
getValueFromEvent={(
|
|
event: { fileList?: UploadFile[] } | UploadFile[]
|
|
) => {
|
|
if (Array.isArray(event)) {
|
|
return event;
|
|
}
|
|
return event?.fileList;
|
|
}}
|
|
rules={[
|
|
{
|
|
required: true,
|
|
message: "请上传文件",
|
|
},
|
|
]}
|
|
>
|
|
<Dragger
|
|
className="w-full"
|
|
beforeUpload={() => false}
|
|
multiple
|
|
>
|
|
<p className="ant-upload-drag-icon">
|
|
<InboxOutlined />
|
|
</p>
|
|
<p className="ant-upload-text">本地文件上传</p>
|
|
<p className="ant-upload-hint">拖拽文件到此处或点击选择文件</p>
|
|
</Dragger>
|
|
</Form.Item>
|
|
</>
|
|
)}
|
|
|
|
{/* Target Configuration */}
|
|
{importConfig?.target && importConfig?.target !== DataSource.UPLOAD && (
|
|
<div className="space-y-3 p-4 bg-blue-50 rounded-lg">
|
|
{importConfig?.target === DataSource.DATABASE && (
|
|
<div className="grid grid-cols-2 gap-3">
|
|
<Form.Item
|
|
name="databaseType"
|
|
rules={[{ required: true }]}
|
|
label="数据库类型"
|
|
>
|
|
<Select
|
|
className="w-full"
|
|
options={[
|
|
{ label: "MySQL", value: "mysql" },
|
|
{ label: "PostgreSQL", value: "postgresql" },
|
|
{ label: "MongoDB", value: "mongodb" },
|
|
]}
|
|
></Select>
|
|
</Form.Item>
|
|
<Form.Item
|
|
name="tableName"
|
|
rules={[{ required: true }]}
|
|
label="表名"
|
|
>
|
|
<Input className="h-8 text-xs" placeholder="dataset_table" />
|
|
</Form.Item>
|
|
<Form.Item
|
|
name="connectionString"
|
|
rules={[{ required: true }]}
|
|
label="连接字符串"
|
|
>
|
|
<Input
|
|
className="h-8 text-xs col-span-2"
|
|
placeholder="数据库连接字符串"
|
|
/>
|
|
</Form.Item>
|
|
</div>
|
|
)}
|
|
</div>
|
|
)}
|
|
</Form>
|
|
</Modal>
|
|
);
|
|
}
|