Files
DataMate/frontend/src/mock/knowledgeBase.tsx

255 lines
7.0 KiB
TypeScript

export const mockChunks = Array.from({ length: 23 }, (_, i) => ({
id: i + 1,
content: `这是第 ${
i + 1
} 个文档分块的内容示例。在实际应用中,这里会显示从原始文档中提取和分割的具体文本内容。用户可以在这里查看和编辑分块的内容,确保知识库的质量和准确性。这个分块包含了重要的业务信息和技术细节,需要仔细维护以确保检索的准确性。`,
position: i + 1,
tokens: Math.floor(Math.random() * 200) + 100,
embedding: Array.from({ length: 1536 }, () => Math.random() - 0.5),
similarity: (Math.random() * 0.3 + 0.7).toFixed(3),
createdAt: "2024-01-22 10:35",
updatedAt: "2024-01-22 10:35",
vectorId: `vec_${i + 1}_${Math.random().toString(36).substr(2, 9)}`,
sliceOperator: ["semantic-split", "paragraph-split", "table-extract"][
Math.floor(Math.random() * 3)
],
parentChunkId: i > 0 ? Math.floor(Math.random() * i) + 1 : undefined,
metadata: {
source: "API文档.pdf",
page: Math.floor(i / 5) + 1,
section: `${Math.floor(i / 3) + 1}`,
},
}));
export const mockQAPairs = [
{
id: 1,
question: "什么是API文档的主要用途?",
answer:
"API文档的主要用途是为开发者提供详细的接口说明,包括请求参数、响应格式和使用示例.",
},
{
id: 2,
question: "如何正确使用这个API?",
answer:
"使用API时需要先获取访问令牌,然后按照文档中的格式发送请求,注意处理错误响应.",
},
];
export const sliceOperators: SliceOperator[] = [
{
id: "paragraph-split",
name: "段落分割",
description: "按段落自然分割文档",
type: "text",
icon: "📄",
params: { minLength: 50, maxLength: 1000 },
},
{
id: "sentence-split",
name: "句子分割",
description: "按句子边界分割文档",
type: "text",
icon: "📝",
params: { maxSentences: 5, overlap: 1 },
},
{
id: "semantic-split",
name: "语义分割",
description: "基于语义相似度智能分割",
type: "semantic",
icon: "🧠",
params: { threshold: 0.7, windowSize: 3 },
},
{
id: "length-split",
name: "长度分割",
description: "按固定字符长度分割",
type: "text",
icon: "📏",
params: { chunkSize: 512, overlap: 50 },
},
{
id: "structure-split",
name: "结构化分割",
description: "按文档结构(标题、章节)分割",
type: "structure",
icon: "🏗️",
params: { preserveHeaders: true, minSectionLength: 100 },
},
{
id: "table-extract",
name: "表格提取",
description: "提取并单独处理表格内容",
type: "structure",
icon: "📊",
params: { includeHeaders: true, mergeRows: false },
},
{
id: "code-extract",
name: "代码提取",
description: "识别并提取代码块",
type: "custom",
icon: "💻",
params: {
languages: ["python", "javascript", "sql"],
preserveIndentation: true,
},
},
{
id: "qa-extract",
name: "问答提取",
description: "自动识别问答格式内容",
type: "semantic",
icon: "❓",
params: { confidenceThreshold: 0.8, generateAnswers: true },
},
];
export const vectorDatabases = [
{
id: "pinecone",
name: "Pinecone",
description: "云端向量数据库,高性能检索",
},
{
id: "weaviate",
name: "Weaviate",
description: "开源向量数据库,支持多模态",
},
{ id: "qdrant", name: "Qdrant", description: "高性能向量搜索引擎" },
{ id: "chroma", name: "ChromaDB", description: "轻量级向量数据库" },
{ id: "milvus", name: "Milvus", description: "分布式向量数据库" },
{ id: "faiss", name: "FAISS", description: "Facebook AI 相似性搜索库" },
];
export const mockKnowledgeBases: KnowledgeBase[] = [
{
id: 1,
name: "产品技术文档库",
description:
"包含所有产品相关的技术文档和API说明,支持多种格式文档的智能解析和向量化处理",
type: "unstructured",
status: "ready",
fileCount: 45,
chunkCount: 1250,
vectorCount: 1250,
size: "2.3 GB",
progress: 100,
createdAt: "2024-01-15",
lastUpdated: "2024-01-22",
vectorDatabase: "pinecone",
config: {
embeddingModel: "text-embedding-3-large",
llmModel: "gpt-4o",
chunkSize: 512,
overlap: 50,
sliceMethod: "semantic",
enableQA: true,
vectorDimension: 1536,
sliceOperators: ["semantic-split", "paragraph-split", "table-extract"],
},
files: [
{
id: 1,
name: "API文档.pdf",
type: "pdf",
size: "2.5 MB",
status: "completed",
chunkCount: 156,
progress: 100,
uploadedAt: "2024-01-15",
source: "upload",
vectorizationStatus: "completed",
},
{
id: 2,
name: "用户手册.docx",
type: "docx",
size: "1.8 MB",
status: "disabled",
chunkCount: 89,
progress: 65,
uploadedAt: "2024-01-22",
source: "dataset",
datasetId: "dataset-1",
vectorizationStatus: "failed",
},
],
vectorizationHistory: [
{
id: 1,
timestamp: "2024-01-22 14:30:00",
operation: "create",
fileId: 1,
fileName: "API文档.pdf",
chunksProcessed: 156,
vectorsGenerated: 156,
status: "success",
duration: "2m 15s",
config: {
embeddingModel: "text-embedding-3-large",
chunkSize: 512,
sliceMethod: "semantic",
},
},
{
id: 2,
timestamp: "2024-01-22 15:45:00",
operation: "update",
fileId: 2,
fileName: "用户手册.docx",
chunksProcessed: 89,
vectorsGenerated: 0,
status: "failed",
duration: "0m 45s",
config: {
embeddingModel: "text-embedding-3-large",
chunkSize: 512,
sliceMethod: "semantic",
},
error: "向量化服务连接超时",
},
],
},
{
id: 2,
name: "FAQ结构化知识库",
description: "客服常见问题的结构化问答对,支持快速检索和智能匹配",
type: "structured",
status: "vectorizing",
fileCount: 12,
chunkCount: 890,
vectorCount: 750,
size: "156 MB",
progress: 75,
createdAt: "2024-01-20",
lastUpdated: "2024-01-23",
vectorDatabase: "weaviate",
config: {
embeddingModel: "text-embedding-ada-002",
chunkSize: 256,
overlap: 0,
sliceMethod: "paragraph",
enableQA: false,
vectorDimension: 1536,
sliceOperators: ["qa-extract", "paragraph-split"],
},
files: [
{
id: 3,
name: "FAQ模板.xlsx",
type: "xlsx",
size: "450 KB",
status: "vectorizing",
chunkCount: 234,
progress: 75,
uploadedAt: "2024-01-20",
source: "upload",
vectorizationStatus: "processing",
},
],
vectorizationHistory: [],
},
];