import { Select, Input, Form, Radio, Modal, Button, UploadFile, Switch, Tooltip } from "antd"; import { InboxOutlined, QuestionCircleOutlined } from "@ant-design/icons"; import { dataSourceOptions } from "../../dataset.const"; import { Dataset, DatasetType, DataSource } from "../../dataset.model"; import { useCallback, useEffect, useMemo, useState } from "react"; import { queryTasksUsingGet } from "@/pages/DataCollection/collection.apis"; import { updateDatasetByIdUsingPut } from "../../dataset.api"; import { sliceFile, shouldStreamUpload } from "@/utils/file.util"; import Dragger from "antd/es/upload/Dragger"; const TEXT_FILE_MIME_PREFIX = "text/"; const TEXT_FILE_MIME_TYPES = new Set([ "application/json", "application/xml", "application/csv", "application/ndjson", "application/x-ndjson", "application/x-yaml", "application/yaml", "application/javascript", "application/x-javascript", "application/sql", ]); const TEXT_FILE_EXTENSIONS = new Set([ ".txt", ".md", ".csv", ".tsv", ".json", ".jsonl", ".ndjson", ".log", ".xml", ".yaml", ".yml", ".sql", ]); function getUploadFileName(file: UploadFile): string { if (file.name) return file.name; const originFile = file.originFileObj; if (originFile instanceof File && originFile.name) { return originFile.name; } return ""; } function getUploadFileType(file: UploadFile): string { if (file.type) return file.type; const originFile = file.originFileObj; if (originFile instanceof File && typeof originFile.type === "string") { return originFile.type; } return ""; } function isTextUploadFile(file: UploadFile): boolean { const mimeType = getUploadFileType(file).toLowerCase(); if (mimeType) { if (mimeType.startsWith(TEXT_FILE_MIME_PREFIX)) return true; if (TEXT_FILE_MIME_TYPES.has(mimeType)) return true; } const fileName = getUploadFileName(file); const dotIndex = fileName.lastIndexOf("."); if (dotIndex < 0) return false; const ext = fileName.slice(dotIndex).toLowerCase(); return TEXT_FILE_EXTENSIONS.has(ext); } /** * 按行分割文件 * @param file 原始文件 * @returns 分割后的文件列表,每行一个文件 */ async function splitFileByLines(file: UploadFile): Promise { if (!isTextUploadFile(file)) { return [file]; } const originFile = file.originFileObj ?? file; if (!(originFile instanceof File) || typeof originFile.text !== "function") { return [file]; } const text = await originFile.text(); if (!text) return [file]; // 按行分割并过滤空行 const lines = text.split(/\r?\n/).filter((line: string) => line.trim() !== ""); if (lines.length === 0) return []; // 生成文件名:原文件名_序号(不保留后缀) const nameParts = file.name.split("."); if (nameParts.length > 1) { nameParts.pop(); } const baseName = nameParts.join("."); const padLength = String(lines.length).length; return lines.map((line: string, index: number) => { const newFileName = `${baseName}_${String(index + 1).padStart(padLength, "0")}`; const blob = new Blob([line], { type: "text/plain" }); const newFile = new File([blob], newFileName, { type: "text/plain" }); return { uid: `${file.uid}-${index}`, name: newFileName, size: newFile.size, type: "text/plain", originFileObj: newFile as UploadFile["originFileObj"], } as UploadFile; }); } type SelectOption = { label: string; value: string; }; type CollectionTask = { id: string; name: string; }; type ImportConfig = { source: DataSource; hasArchive: boolean; splitByLine: boolean; files?: UploadFile[]; dataSource?: string; target?: DataSource; [key: string]: unknown; }; export default function ImportConfiguration({ data, open, onClose, updateEvent = "update:dataset", prefix, }: { data: Dataset | null; open: boolean; onClose: () => void; updateEvent?: string; prefix?: string; }) { const [form] = Form.useForm(); const [collectionOptions, setCollectionOptions] = useState([]); const availableSourceOptions = dataSourceOptions.filter( (option) => option.value !== DataSource.COLLECTION ); const [importConfig, setImportConfig] = useState({ source: DataSource.UPLOAD, hasArchive: true, splitByLine: false, }); const [currentPrefix, setCurrentPrefix] = useState(""); const hasNonTextFile = useMemo(() => { const files = importConfig.files ?? []; if (files.length === 0) return false; return files.some((file) => !isTextUploadFile(file)); }, [importConfig.files]); const isTextDataset = data?.datasetType === DatasetType.TEXT; // 本地上传文件相关逻辑 const handleUpload = async (dataset: Dataset) => { const filesToUpload = (form.getFieldValue("files") as UploadFile[] | undefined) || []; // 如果启用分行分割,对大文件使用流式处理 if (importConfig.splitByLine && !hasNonTextFile) { // 检查是否有大文件需要流式分割上传 const filesForStreamUpload: File[] = []; const filesForNormalUpload: UploadFile[] = []; for (const file of filesToUpload) { const originFile = file.originFileObj ?? file; if (originFile instanceof File && shouldStreamUpload(originFile)) { filesForStreamUpload.push(originFile); } else { filesForNormalUpload.push(file); } } // 大文件使用流式分割上传 if (filesForStreamUpload.length > 0) { window.dispatchEvent( new CustomEvent("upload:dataset-stream", { detail: { dataset, files: filesForStreamUpload, updateEvent, hasArchive: importConfig.hasArchive, prefix: currentPrefix, }, }) ); } // 小文件使用传统分割方式 if (filesForNormalUpload.length > 0) { const splitResults = await Promise.all( filesForNormalUpload.map((file) => splitFileByLines(file)) ); const smallFilesToUpload = splitResults.flat(); // 计算分片列表 const sliceList = smallFilesToUpload.map((file) => { const originFile = (file.originFileObj ?? file) as Blob; const slices = sliceFile(originFile); return { originFile: originFile, slices, name: file.name, size: originFile.size || 0, }; }); console.log("[ImportConfiguration] Uploading small files with currentPrefix:", currentPrefix); window.dispatchEvent( new CustomEvent("upload:dataset", { detail: { dataset, files: sliceList, updateEvent, hasArchive: importConfig.hasArchive, prefix: currentPrefix, }, }) ); } return; } // 未启用分行分割,使用普通上传 // 计算分片列表 const sliceList = filesToUpload.map((file) => { const originFile = (file.originFileObj ?? file) as Blob; const slices = sliceFile(originFile); return { originFile: originFile, // 传入真正的 File/Blob 对象 slices, name: file.name, size: originFile.size || 0, }; }); console.log("[ImportConfiguration] Uploading with currentPrefix:", currentPrefix); window.dispatchEvent( new CustomEvent("upload:dataset", { detail: { dataset, files: sliceList, updateEvent, hasArchive: importConfig.hasArchive, prefix: currentPrefix, }, }) ); }; const fetchCollectionTasks = useCallback(async () => { if (importConfig.source !== DataSource.COLLECTION) return; try { const res = await queryTasksUsingGet({ page: 0, size: 100 }); const tasks = Array.isArray(res?.data?.content) ? (res.data.content as CollectionTask[]) : []; const options = tasks.map((task) => ({ label: task.name, value: task.id, })); setCollectionOptions(options); } catch (error) { console.error("Error fetching collection tasks:", error); } }, [importConfig.source]); const resetState = useCallback(() => { console.log('[ImportConfiguration] resetState called, preserving currentPrefix:', currentPrefix); form.resetFields(); form.setFieldsValue({ files: null }); setImportConfig({ source: DataSource.UPLOAD, hasArchive: true, splitByLine: false, }); console.log('[ImportConfiguration] resetState done, currentPrefix still:', currentPrefix); }, [currentPrefix, form]); const handleImportData = async () => { if (!data) return; console.log('[ImportConfiguration] handleImportData called, currentPrefix:', currentPrefix); if (importConfig.source === DataSource.UPLOAD) { // 立即显示任务中心,让用户感知上传已开始(在文件分割等耗时操作之前) window.dispatchEvent( new CustomEvent("show:task-popover", { detail: { show: true } }) ); await handleUpload(data); } else if (importConfig.source === DataSource.COLLECTION) { await updateDatasetByIdUsingPut(data.id, { ...importConfig, }); } onClose(); }; useEffect(() => { if (open) { setCurrentPrefix(prefix || ""); console.log('[ImportConfiguration] Modal opened with prefix:', prefix); resetState(); fetchCollectionTasks(); } }, [fetchCollectionTasks, open, prefix, resetState]); useEffect(() => { if (!importConfig.files?.length) return; if (!importConfig.splitByLine) return; if (!hasNonTextFile) return; form.setFieldsValue({ splitByLine: false }); setImportConfig((prev) => ({ ...prev, splitByLine: false })); }, [form, hasNonTextFile, importConfig.files, importConfig.splitByLine]); // Separate effect for fetching collection tasks when source changes useEffect(() => { if (open && importConfig.source === DataSource.COLLECTION) { fetchCollectionTasks(); } }, [fetchCollectionTasks, importConfig.source, open]); return ( { onClose(); resetState(); }} maskClosable={false} footer={ <> } >
setImportConfig(allValues)} > {importConfig?.source === DataSource.COLLECTION && ( )} {/* Local Upload Component */} {importConfig?.source === DataSource.UPLOAD && ( <> {isTextDataset && ( 按分行分割{" "} } name="splitByLine" valuePropName="checked" > )} { if (Array.isArray(event)) { return event; } return event?.fileList; }} rules={[ { required: true, message: "请上传文件", }, ]} > false} multiple >

本地文件上传

拖拽文件到此处或点击选择文件

)} {/* Target Configuration */} {importConfig?.target && importConfig?.target !== DataSource.UPLOAD && (
{importConfig?.target === DataSource.DATABASE && (
)}
)}
); }