import { mockKnowledgeBases, sliceOperators, vectorDatabases, } from "@/mock/knowledgeBase"; import { useState } from "react"; import { Button, Card, Input, Select, Checkbox, Switch, Tabs, Divider, Upload, message, Form, } from "antd"; import { BookOpen, Database, Brain, Scissors, Split, Upload as UploadIcon, Folder, CheckCircle, File, ArrowLeft, } from "lucide-react"; import { useNavigate } from "react-router"; import type { Dataset } from "@/pages/DataManagement/dataset.model"; import DevelopmentInProgress from "@/components/DevelopmentInProgress"; const { TextArea } = Input; const { Option } = Select; const KnowledgeBaseCreatePage: React.FC = () => { return ; const navigate = useNavigate(); const [form] = Form.useForm(); const [knowledgeBases, setKnowledgeBases] = useState(mockKnowledgeBases); const [datasetSearchQuery, setDatasetSearchQuery] = useState(""); const [selectedDatasetId, setSelectedDatasetId] = useState( null ); const [uploadedFiles, setUploadedFiles] = useState([]); const [datasets, setDatasets] = useState([]); const [selectedDatasetFiles, setSelectedDatasetFiles] = useState< { datasetId: string; fileId: string; name: string; size: string; type: string; }[] >([]); const [selectedSliceOperators, setSelectedSliceOperators] = useState< string[] >(["semantic-split", "paragraph-split"]); // Form initial values const initialValues = { name: "", description: "", type: "unstructured" as "unstructured" | "structured", embeddingModel: "text-embedding-3-large", llmModel: "gpt-4o", chunkSize: 512, overlap: 50, sliceMethod: "semantic" as | "paragraph" | "length" | "delimiter" | "semantic", delimiter: "", enableQA: true, vectorDatabase: "pinecone", }; // Dataset file selection helpers const handleDatasetFileToggle = ( datasetId: string, file: MockDataset["files"][0] ) => { setSelectedDatasetFiles((prev) => { const isSelected = prev.some( (f) => f.datasetId === datasetId && f.fileId === file.id ); if (isSelected) { return prev.filter( (f) => !(f.datasetId === datasetId && f.fileId === file.id) ); } else { return [...prev, { datasetId, ...file }]; } }); }; const handleSelectAllDatasetFiles = ( dataset: MockDataset, checked: boolean ) => { setSelectedDatasetFiles((prev) => { let newSelectedFiles = [...prev]; if (checked) { dataset.files.forEach((file) => { if ( !newSelectedFiles.some( (f) => f.datasetId === dataset.id && f.fileId === file.id ) ) { newSelectedFiles.push({ datasetId: dataset.id, ...file }); } }); } else { newSelectedFiles = newSelectedFiles.filter( (f) => f.datasetId !== dataset.id ); } return newSelectedFiles; }); }; const isDatasetFileSelected = (datasetId: string, fileId: string) => { return selectedDatasetFiles.some( (f) => f.datasetId === datasetId && f.fileId === fileId ); }; const isAllDatasetFilesSelected = (dataset: MockDataset) => { return dataset.files.every((file) => isDatasetFileSelected(dataset.id, file.id) ); }; const handleSliceOperatorToggle = (operatorId: string) => { setSelectedSliceOperators((prev) => prev.includes(operatorId) ? prev.filter((id) => id !== operatorId) : [...prev, operatorId] ); }; // 文件上传 const handleFileChange = (info: any) => { setUploadedFiles(info.fileList.map((f: any) => f.originFileObj)); }; // 提交表单 const handleFinish = (values: any) => { const newKB: KnowledgeBase = { id: Date.now(), name: values.name, description: values.description, type: values.type, status: "importing", fileCount: uploadedFiles.length + selectedDatasetFiles.length, chunkCount: 0, vectorCount: 0, size: "0 MB", progress: 0, createdAt: new Date().toISOString().split("T")[0], lastUpdated: new Date().toISOString().split("T")[0], vectorDatabase: values.vectorDatabase, config: { embeddingModel: values.embeddingModel, llmModel: values.llmModel, chunkSize: values.chunkSize, overlap: values.overlap, sliceMethod: values.sliceMethod, delimiter: values.delimiter, enableQA: values.enableQA, vectorDimension: values.embeddingModel.includes("3-large") ? 3072 : 1536, sliceOperators: selectedSliceOperators, }, files: [ ...uploadedFiles.map((file) => ({ id: Date.now() + Math.random(), name: file.name, type: file.type.split("/")[1] || "unknown", size: `${(file.size / (1024 * 1024)).toFixed(2)} MB`, status: "processing" as const, chunkCount: 0, progress: 0, uploadedAt: new Date().toISOString().split("T")[0], source: "upload" as const, vectorizationStatus: "pending" as const, })), ...selectedDatasetFiles.map((file) => ({ id: Date.now() + Math.random(), name: file.name, type: file.type, size: file.size, status: "processing" as const, chunkCount: 0, progress: 0, uploadedAt: new Date().toISOString().split("T")[0], source: "dataset" as const, datasetId: file.datasetId, vectorizationStatus: "pending" as const, })), ], vectorizationHistory: [], }; setKnowledgeBases([newKB, ...knowledgeBases]); form.resetFields(); setUploadedFiles([]); setSelectedDatasetFiles([]); setSelectedSliceOperators(["semantic-split", "paragraph-split"]); setSelectedDatasetId(null); message.success("知识库创建成功!"); navigate("/data/knowledge-generation"); }; return ( {/* Header */} navigate("/data/knowledge-generation")} > 创建知识库 {/* 基本信息 */} 基本信息 form.setFieldValue("type", "unstructured")} type={ form.getFieldValue("type") === "unstructured" ? "primary" : "default" } className="h-auto py-4 flex flex-col items-center gap-2" > 非结构化知识库 支持文档、PDF等文件 form.setFieldValue("type", "structured")} type={ form.getFieldValue("type") === "structured" ? "primary" : "default" } className="h-auto py-4 flex flex-col items-center gap-2" > 结构化知识库 支持问答对、表格数据 {/* 模型配置 */} 模型配置 text-embedding-3-large (推荐) text-embedding-3-small text-embedding-ada-002 prev.type !== curr.type || prev.enableQA !== curr.enableQA } noStyle > {() => form.getFieldValue("type") === "unstructured" && form.getFieldValue("enableQA") && ( GPT-4o (推荐) GPT-4o Mini GPT-3.5 Turbo ) } {vectorDatabases.map((db) => ( {db.name} {db.description} ))} {/* 切片算子配置 */} prev.type !== curr.type} noStyle > {() => form.getFieldValue("type") === "unstructured" && ( <> 切片算子配置 {sliceOperators.map((operator) => ( handleSliceOperatorToggle(operator.id)} > handleSliceOperatorToggle(operator.id) } /> {operator.icon} {operator.name} {operator.type} {operator.description} ))} {/* 文档分割配置 */} 文档分割配置 语义分割 (推荐) 段落分割 长度分割 分隔符分割 prev.sliceMethod !== curr.sliceMethod } > {() => form.getFieldValue("sliceMethod") === "delimiter" && ( ) } > ) } {/* 数据源选择 */} {form.getFieldValue("type") === "structured" ? "导入模板文件" : "选择数据源"} false} onChange={handleFileChange} fileList={uploadedFiles.map((file, idx) => ({ uid: String(idx), name: file.name, status: "done", originFileObj: file, }))} showUploadList={false} > {form.getFieldValue("type") === "structured" ? "拖拽或点击上传Excel/CSV模板文件" : "拖拽或点击上传文档文件"} 选择文件 {uploadedFiles.length > 0 && ( 已选择文件: {uploadedFiles.map((file, index) => ( {file.name} ))} )} ), }, { key: "dataset", label: "从数据集选择", children: ( setDatasetSearchQuery(e.target.value)} className="flex-1" /> setSelectedDatasetId(null)}> 重置选择 {datasets.length === 0 && ( 无匹配数据集 )} {datasets.map((dataset) => ( setSelectedDatasetId(dataset.id)} > {dataset.name} {dataset.files.length} 个文件 {selectedDatasetId === dataset.id && ( )} ))} {!selectedDatasetId ? ( 请选择一个数据集 ) : ( <> d.id === selectedDatasetId )! )} onChange={(e) => handleSelectAllDatasetFiles( datasets.find( (d) => d.id === selectedDatasetId )!, e.target.checked ) } /> 全选 ( { datasets.find( (d) => d.id === selectedDatasetId )?.files.length }{" "} 个文件) {datasets .find((d) => d.id === selectedDatasetId) ?.files.map((file) => ( handleDatasetFileToggle( selectedDatasetId!, file ) } /> {file.name} {file.size} • {file.type} ))} > )} {selectedDatasetFiles.length > 0 && ( 已选择数据集文件总数: {selectedDatasetFiles.length} )} ), }, ]} /> navigate("/data/knowledge-generation")}> 取消 创建知识库 ); }; export default KnowledgeBaseCreatePage;
非结构化知识库
支持文档、PDF等文件
结构化知识库
支持问答对、表格数据
{operator.description}
{form.getFieldValue("type") === "structured" ? "拖拽或点击上传Excel/CSV模板文件" : "拖拽或点击上传文档文件"}
已选择文件:
无匹配数据集
{dataset.name}
{dataset.files.length} 个文件
请选择一个数据集
{file.name}
{file.size} • {file.type}