export const mockChunks = Array.from({ length: 23 }, (_, i) => ({ id: i + 1, content: `这是第 ${ i + 1 } 个文档分块的内容示例。在实际应用中,这里会显示从原始文档中提取和分割的具体文本内容。用户可以在这里查看和编辑分块的内容,确保知识库的质量和准确性。这个分块包含了重要的业务信息和技术细节,需要仔细维护以确保检索的准确性。`, position: i + 1, tokens: Math.floor(Math.random() * 200) + 100, embedding: Array.from({ length: 1536 }, () => Math.random() - 0.5), similarity: (Math.random() * 0.3 + 0.7).toFixed(3), createdAt: "2024-01-22 10:35", updatedAt: "2024-01-22 10:35", vectorId: `vec_${i + 1}_${Math.random().toString(36).substr(2, 9)}`, sliceOperator: ["semantic-split", "paragraph-split", "table-extract"][ Math.floor(Math.random() * 3) ], parentChunkId: i > 0 ? Math.floor(Math.random() * i) + 1 : undefined, metadata: { source: "API文档.pdf", page: Math.floor(i / 5) + 1, section: `第${Math.floor(i / 3) + 1}章`, }, })); export const mockQAPairs = [ { id: 1, question: "什么是API文档的主要用途?", answer: "API文档的主要用途是为开发者提供详细的接口说明,包括请求参数、响应格式和使用示例.", }, { id: 2, question: "如何正确使用这个API?", answer: "使用API时需要先获取访问令牌,然后按照文档中的格式发送请求,注意处理错误响应.", }, ]; export const sliceOperators: SliceOperator[] = [ { id: "paragraph-split", name: "段落分割", description: "按段落自然分割文档", type: "text", icon: "📄", params: { minLength: 50, maxLength: 1000 }, }, { id: "sentence-split", name: "句子分割", description: "按句子边界分割文档", type: "text", icon: "📝", params: { maxSentences: 5, overlap: 1 }, }, { id: "semantic-split", name: "语义分割", description: "基于语义相似度智能分割", type: "semantic", icon: "🧠", params: { threshold: 0.7, windowSize: 3 }, }, { id: "length-split", name: "长度分割", description: "按固定字符长度分割", type: "text", icon: "📏", params: { chunkSize: 512, overlap: 50 }, }, { id: "structure-split", name: "结构化分割", description: "按文档结构(标题、章节)分割", type: "structure", icon: "🏗️", params: { preserveHeaders: true, minSectionLength: 100 }, }, { id: "table-extract", name: "表格提取", description: "提取并单独处理表格内容", type: "structure", icon: "📊", params: { includeHeaders: true, mergeRows: false }, }, { id: "code-extract", name: "代码提取", description: "识别并提取代码块", type: "custom", icon: "💻", params: { languages: ["python", "javascript", "sql"], preserveIndentation: true, }, }, { id: "qa-extract", name: "问答提取", description: "自动识别问答格式内容", type: "semantic", icon: "❓", params: { confidenceThreshold: 0.8, generateAnswers: true }, }, ]; export const vectorDatabases = [ { id: "pinecone", name: "Pinecone", description: "云端向量数据库,高性能检索", }, { id: "weaviate", name: "Weaviate", description: "开源向量数据库,支持多模态", }, { id: "qdrant", name: "Qdrant", description: "高性能向量搜索引擎" }, { id: "chroma", name: "ChromaDB", description: "轻量级向量数据库" }, { id: "milvus", name: "Milvus", description: "分布式向量数据库" }, { id: "faiss", name: "FAISS", description: "Facebook AI 相似性搜索库" }, ]; export const mockKnowledgeBases: KnowledgeBase[] = [ { id: 1, name: "产品技术文档库", description: "包含所有产品相关的技术文档和API说明,支持多种格式文档的智能解析和向量化处理", type: "unstructured", status: "ready", fileCount: 45, chunkCount: 1250, vectorCount: 1250, size: "2.3 GB", progress: 100, createdAt: "2024-01-15", lastUpdated: "2024-01-22", vectorDatabase: "pinecone", config: { embeddingModel: "text-embedding-3-large", llmModel: "gpt-4o", chunkSize: 512, overlap: 50, sliceMethod: "semantic", enableQA: true, vectorDimension: 1536, sliceOperators: ["semantic-split", "paragraph-split", "table-extract"], }, files: [ { id: 1, name: "API文档.pdf", type: "pdf", size: "2.5 MB", status: "completed", chunkCount: 156, progress: 100, uploadedAt: "2024-01-15", source: "upload", vectorizationStatus: "completed", }, { id: 2, name: "用户手册.docx", type: "docx", size: "1.8 MB", status: "disabled", chunkCount: 89, progress: 65, uploadedAt: "2024-01-22", source: "dataset", datasetId: "dataset-1", vectorizationStatus: "failed", }, ], vectorizationHistory: [ { id: 1, timestamp: "2024-01-22 14:30:00", operation: "create", fileId: 1, fileName: "API文档.pdf", chunksProcessed: 156, vectorsGenerated: 156, status: "success", duration: "2m 15s", config: { embeddingModel: "text-embedding-3-large", chunkSize: 512, sliceMethod: "semantic", }, }, { id: 2, timestamp: "2024-01-22 15:45:00", operation: "update", fileId: 2, fileName: "用户手册.docx", chunksProcessed: 89, vectorsGenerated: 0, status: "failed", duration: "0m 45s", config: { embeddingModel: "text-embedding-3-large", chunkSize: 512, sliceMethod: "semantic", }, error: "向量化服务连接超时", }, ], }, { id: 2, name: "FAQ结构化知识库", description: "客服常见问题的结构化问答对,支持快速检索和智能匹配", type: "structured", status: "vectorizing", fileCount: 12, chunkCount: 890, vectorCount: 750, size: "156 MB", progress: 75, createdAt: "2024-01-20", lastUpdated: "2024-01-23", vectorDatabase: "weaviate", config: { embeddingModel: "text-embedding-ada-002", chunkSize: 256, overlap: 0, sliceMethod: "paragraph", enableQA: false, vectorDimension: 1536, sliceOperators: ["qa-extract", "paragraph-split"], }, files: [ { id: 3, name: "FAQ模板.xlsx", type: "xlsx", size: "450 KB", status: "vectorizing", chunkCount: 234, progress: 75, uploadedAt: "2024-01-20", source: "upload", vectorizationStatus: "processing", }, ], vectorizationHistory: [], }, ];