You've already forked DataMate
init datamate
This commit is contained in:
254
frontend/src/mock/knowledgeBase.tsx
Normal file
254
frontend/src/mock/knowledgeBase.tsx
Normal file
@@ -0,0 +1,254 @@
|
||||
export const mockChunks = Array.from({ length: 23 }, (_, i) => ({
|
||||
id: i + 1,
|
||||
content: `这是第 ${
|
||||
i + 1
|
||||
} 个文档分块的内容示例。在实际应用中,这里会显示从原始文档中提取和分割的具体文本内容。用户可以在这里查看和编辑分块的内容,确保知识库的质量和准确性。这个分块包含了重要的业务信息和技术细节,需要仔细维护以确保检索的准确性。`,
|
||||
position: i + 1,
|
||||
tokens: Math.floor(Math.random() * 200) + 100,
|
||||
embedding: Array.from({ length: 1536 }, () => Math.random() - 0.5),
|
||||
similarity: (Math.random() * 0.3 + 0.7).toFixed(3),
|
||||
createdAt: "2024-01-22 10:35",
|
||||
updatedAt: "2024-01-22 10:35",
|
||||
vectorId: `vec_${i + 1}_${Math.random().toString(36).substr(2, 9)}`,
|
||||
sliceOperator: ["semantic-split", "paragraph-split", "table-extract"][
|
||||
Math.floor(Math.random() * 3)
|
||||
],
|
||||
parentChunkId: i > 0 ? Math.floor(Math.random() * i) + 1 : undefined,
|
||||
metadata: {
|
||||
source: "API文档.pdf",
|
||||
page: Math.floor(i / 5) + 1,
|
||||
section: `第${Math.floor(i / 3) + 1}章`,
|
||||
},
|
||||
}));
|
||||
|
||||
export const mockQAPairs = [
|
||||
{
|
||||
id: 1,
|
||||
question: "什么是API文档的主要用途?",
|
||||
answer:
|
||||
"API文档的主要用途是为开发者提供详细的接口说明,包括请求参数、响应格式和使用示例.",
|
||||
},
|
||||
{
|
||||
id: 2,
|
||||
question: "如何正确使用这个API?",
|
||||
answer:
|
||||
"使用API时需要先获取访问令牌,然后按照文档中的格式发送请求,注意处理错误响应.",
|
||||
},
|
||||
];
|
||||
|
||||
export const sliceOperators: SliceOperator[] = [
|
||||
{
|
||||
id: "paragraph-split",
|
||||
name: "段落分割",
|
||||
description: "按段落自然分割文档",
|
||||
type: "text",
|
||||
icon: "📄",
|
||||
params: { minLength: 50, maxLength: 1000 },
|
||||
},
|
||||
{
|
||||
id: "sentence-split",
|
||||
name: "句子分割",
|
||||
description: "按句子边界分割文档",
|
||||
type: "text",
|
||||
icon: "📝",
|
||||
params: { maxSentences: 5, overlap: 1 },
|
||||
},
|
||||
{
|
||||
id: "semantic-split",
|
||||
name: "语义分割",
|
||||
description: "基于语义相似度智能分割",
|
||||
type: "semantic",
|
||||
icon: "🧠",
|
||||
params: { threshold: 0.7, windowSize: 3 },
|
||||
},
|
||||
{
|
||||
id: "length-split",
|
||||
name: "长度分割",
|
||||
description: "按固定字符长度分割",
|
||||
type: "text",
|
||||
icon: "📏",
|
||||
params: { chunkSize: 512, overlap: 50 },
|
||||
},
|
||||
{
|
||||
id: "structure-split",
|
||||
name: "结构化分割",
|
||||
description: "按文档结构(标题、章节)分割",
|
||||
type: "structure",
|
||||
icon: "🏗️",
|
||||
params: { preserveHeaders: true, minSectionLength: 100 },
|
||||
},
|
||||
{
|
||||
id: "table-extract",
|
||||
name: "表格提取",
|
||||
description: "提取并单独处理表格内容",
|
||||
type: "structure",
|
||||
icon: "📊",
|
||||
params: { includeHeaders: true, mergeRows: false },
|
||||
},
|
||||
{
|
||||
id: "code-extract",
|
||||
name: "代码提取",
|
||||
description: "识别并提取代码块",
|
||||
type: "custom",
|
||||
icon: "💻",
|
||||
params: {
|
||||
languages: ["python", "javascript", "sql"],
|
||||
preserveIndentation: true,
|
||||
},
|
||||
},
|
||||
{
|
||||
id: "qa-extract",
|
||||
name: "问答提取",
|
||||
description: "自动识别问答格式内容",
|
||||
type: "semantic",
|
||||
icon: "❓",
|
||||
params: { confidenceThreshold: 0.8, generateAnswers: true },
|
||||
},
|
||||
];
|
||||
|
||||
export const vectorDatabases = [
|
||||
{
|
||||
id: "pinecone",
|
||||
name: "Pinecone",
|
||||
description: "云端向量数据库,高性能检索",
|
||||
},
|
||||
{
|
||||
id: "weaviate",
|
||||
name: "Weaviate",
|
||||
description: "开源向量数据库,支持多模态",
|
||||
},
|
||||
{ id: "qdrant", name: "Qdrant", description: "高性能向量搜索引擎" },
|
||||
{ id: "chroma", name: "ChromaDB", description: "轻量级向量数据库" },
|
||||
{ id: "milvus", name: "Milvus", description: "分布式向量数据库" },
|
||||
{ id: "faiss", name: "FAISS", description: "Facebook AI 相似性搜索库" },
|
||||
];
|
||||
|
||||
export const mockKnowledgeBases: KnowledgeBase[] = [
|
||||
{
|
||||
id: 1,
|
||||
name: "产品技术文档库",
|
||||
description:
|
||||
"包含所有产品相关的技术文档和API说明,支持多种格式文档的智能解析和向量化处理",
|
||||
type: "unstructured",
|
||||
status: "ready",
|
||||
fileCount: 45,
|
||||
chunkCount: 1250,
|
||||
vectorCount: 1250,
|
||||
size: "2.3 GB",
|
||||
progress: 100,
|
||||
createdAt: "2024-01-15",
|
||||
lastUpdated: "2024-01-22",
|
||||
vectorDatabase: "pinecone",
|
||||
config: {
|
||||
embeddingModel: "text-embedding-3-large",
|
||||
llmModel: "gpt-4o",
|
||||
chunkSize: 512,
|
||||
overlap: 50,
|
||||
sliceMethod: "semantic",
|
||||
enableQA: true,
|
||||
vectorDimension: 1536,
|
||||
sliceOperators: ["semantic-split", "paragraph-split", "table-extract"],
|
||||
},
|
||||
files: [
|
||||
{
|
||||
id: 1,
|
||||
name: "API文档.pdf",
|
||||
type: "pdf",
|
||||
size: "2.5 MB",
|
||||
status: "completed",
|
||||
chunkCount: 156,
|
||||
progress: 100,
|
||||
uploadedAt: "2024-01-15",
|
||||
source: "upload",
|
||||
vectorizationStatus: "completed",
|
||||
},
|
||||
{
|
||||
id: 2,
|
||||
name: "用户手册.docx",
|
||||
type: "docx",
|
||||
size: "1.8 MB",
|
||||
status: "disabled",
|
||||
chunkCount: 89,
|
||||
progress: 65,
|
||||
uploadedAt: "2024-01-22",
|
||||
source: "dataset",
|
||||
datasetId: "dataset-1",
|
||||
vectorizationStatus: "failed",
|
||||
},
|
||||
],
|
||||
vectorizationHistory: [
|
||||
{
|
||||
id: 1,
|
||||
timestamp: "2024-01-22 14:30:00",
|
||||
operation: "create",
|
||||
fileId: 1,
|
||||
fileName: "API文档.pdf",
|
||||
chunksProcessed: 156,
|
||||
vectorsGenerated: 156,
|
||||
status: "success",
|
||||
duration: "2m 15s",
|
||||
config: {
|
||||
embeddingModel: "text-embedding-3-large",
|
||||
chunkSize: 512,
|
||||
sliceMethod: "semantic",
|
||||
},
|
||||
},
|
||||
{
|
||||
id: 2,
|
||||
timestamp: "2024-01-22 15:45:00",
|
||||
operation: "update",
|
||||
fileId: 2,
|
||||
fileName: "用户手册.docx",
|
||||
chunksProcessed: 89,
|
||||
vectorsGenerated: 0,
|
||||
status: "failed",
|
||||
duration: "0m 45s",
|
||||
config: {
|
||||
embeddingModel: "text-embedding-3-large",
|
||||
chunkSize: 512,
|
||||
sliceMethod: "semantic",
|
||||
},
|
||||
error: "向量化服务连接超时",
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
id: 2,
|
||||
name: "FAQ结构化知识库",
|
||||
description: "客服常见问题的结构化问答对,支持快速检索和智能匹配",
|
||||
type: "structured",
|
||||
status: "vectorizing",
|
||||
fileCount: 12,
|
||||
chunkCount: 890,
|
||||
vectorCount: 750,
|
||||
size: "156 MB",
|
||||
progress: 75,
|
||||
createdAt: "2024-01-20",
|
||||
lastUpdated: "2024-01-23",
|
||||
vectorDatabase: "weaviate",
|
||||
config: {
|
||||
embeddingModel: "text-embedding-ada-002",
|
||||
chunkSize: 256,
|
||||
overlap: 0,
|
||||
sliceMethod: "paragraph",
|
||||
enableQA: false,
|
||||
vectorDimension: 1536,
|
||||
sliceOperators: ["qa-extract", "paragraph-split"],
|
||||
},
|
||||
files: [
|
||||
{
|
||||
id: 3,
|
||||
name: "FAQ模板.xlsx",
|
||||
type: "xlsx",
|
||||
size: "450 KB",
|
||||
status: "vectorizing",
|
||||
chunkCount: 234,
|
||||
progress: 75,
|
||||
uploadedAt: "2024-01-20",
|
||||
source: "upload",
|
||||
vectorizationStatus: "processing",
|
||||
},
|
||||
],
|
||||
vectorizationHistory: [],
|
||||
},
|
||||
];
|
||||
Reference in New Issue
Block a user