diff --git a/.editorconfig b/.editorconfig index 65ede07..01ab809 100644 --- a/.editorconfig +++ b/.editorconfig @@ -4,7 +4,7 @@ root = true charset = utf-8 end_of_line = lf indent_style = space -indent_size = 2 +indent_size = 4 insert_final_newline = true trim_trailing_whitespace = true diff --git a/backend/services/main-application/pom.xml b/backend/services/main-application/pom.xml index 342ced3..4f180de 100644 --- a/backend/services/main-application/pom.xml +++ b/backend/services/main-application/pom.xml @@ -130,6 +130,10 @@ spring-boot-starter-test test + + org.springframework.boot + spring-boot-autoconfigure + @@ -141,6 +145,7 @@ ${maven.compiler.source} ${maven.compiler.target} + true -parameters diff --git a/backend/services/main-application/src/main/java/com/datamate/main/DataMatePlatformApplication.java b/backend/services/main-application/src/main/java/com/datamate/main/DataMatePlatformApplication.java index 3356df1..3cf215c 100644 --- a/backend/services/main-application/src/main/java/com/datamate/main/DataMatePlatformApplication.java +++ b/backend/services/main-application/src/main/java/com/datamate/main/DataMatePlatformApplication.java @@ -16,31 +16,12 @@ import org.springframework.transaction.annotation.EnableTransactionManagement; * @version 1.0.0 */ @SpringBootApplication -@ComponentScan(basePackages = { - "com.datamate.main", - "com.datamate.datamanagement", - "com.datamate.collection", - "com.datamate.operator", - "com.datamate.cleaning", - "com.datamate.synthesis", - "com.datamate.annotation", - "com.datamate.evaluation", - "com.datamate.pipeline", - "com.datamate.execution", - "com.datamate.common" -}) -@MapperScan(basePackages = { - "com.datamate.collection.infrastructure.persistence.mapper", - "com.datamate.datamanagement.infrastructure.persistence.mapper", - "com.datamate.operator.infrastructure.persistence.mapper", - "com.datamate.cleaning.infrastructure.persistence.mapper", - "com.datamate.**.mapper" -}) +@ComponentScan(basePackages = {"com.datamate"}) +@MapperScan(basePackages = {"com.datamate.**.mapper"}) @EnableTransactionManagement @EnableAsync @EnableScheduling public class DataMatePlatformApplication { - public static void main(String[] args) { SpringApplication.run(DataMatePlatformApplication.class, args); } diff --git a/backend/services/rag-indexer-service/pom.xml b/backend/services/rag-indexer-service/pom.xml index 90aea6d..454a3d9 100644 --- a/backend/services/rag-indexer-service/pom.xml +++ b/backend/services/rag-indexer-service/pom.xml @@ -16,33 +16,37 @@ RAG Indexer Service RAG文档索引服务 + + + + dev.langchain4j + langchain4j-bom + 1.8.0 + pom + import + + + + com.datamate domain-common ${project.version} + + com.datamate + data-management-service + 1.0.0-SNAPSHOT + org.springframework.boot spring-boot-starter-web - org.springframework.boot - spring-boot-starter-data-elasticsearch - - - com.mysql - mysql-connector-j - ${mysql.version} - - - org.springframework.boot - spring-boot-starter-test - test - - - org.springframework.cloud - spring-cloud-starter-openfeign + mysql + mysql-connector-java + 8.0.33 org.springdoc @@ -56,6 +60,54 @@ jakarta.validation jakarta.validation-api + + org.springframework.boot + spring-boot-starter-test + test + + + dev.langchain4j + langchain4j-open-ai + 1.8.0 + + + dev.langchain4j + langchain4j + 1.8.0 + + + dev.langchain4j + langchain4j-document-parser-apache-pdfbox + + + dev.langchain4j + langchain4j-document-parser-apache-tika + + + dev.langchain4j + langchain4j-document-parser-apache-poi + + + dev.langchain4j + langchain4j-document-parser-markdown + + + dev.langchain4j + langchain4j-document-transformer-jsoup + + + dev.langchain4j + langchain4j-milvus + + + + dev.langchain4j + langchain4j-embeddings-all-minilm-l6-v2 + + + org.testcontainers + milvus + @@ -64,31 +116,6 @@ org.springframework.boot spring-boot-maven-plugin - diff --git a/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/application/KnowledgeBaseService.java b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/application/KnowledgeBaseService.java new file mode 100644 index 0000000..989816d --- /dev/null +++ b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/application/KnowledgeBaseService.java @@ -0,0 +1,118 @@ +package com.datamate.rag.indexer.application; + +import com.baomidou.mybatisplus.core.metadata.IPage; +import com.baomidou.mybatisplus.extension.plugins.pagination.Page; +import com.datamate.rag.indexer.domain.model.FileStatus; +import com.datamate.rag.indexer.domain.model.KnowledgeBase; +import com.datamate.rag.indexer.domain.model.RagChunk; +import com.datamate.rag.indexer.domain.model.RagFile; +import com.datamate.rag.indexer.domain.repository.KnowledgeBaseRepository; +import com.datamate.rag.indexer.domain.repository.RagFileRepository; +import com.datamate.rag.indexer.infrastructure.event.DataInsertedEvent; +import com.datamate.common.infrastructure.exception.BusinessException; +import com.datamate.common.infrastructure.exception.KnowledgeBaseErrorCode; +import com.datamate.common.interfaces.PagedResponse; +import com.datamate.common.interfaces.PagingQuery; +import com.datamate.rag.indexer.interfaces.dto.*; +import lombok.RequiredArgsConstructor; +import org.springframework.beans.BeanUtils; +import org.springframework.context.ApplicationEventPublisher; +import org.springframework.stereotype.Service; +import org.springframework.transaction.annotation.Transactional; +import org.springframework.util.StringUtils; + +import java.util.List; +import java.util.Optional; + +/** + * 知识库服务类 + * + * @author dallas + * @since 2025-10-24 + */ +@Service +@RequiredArgsConstructor +public class KnowledgeBaseService { + private final KnowledgeBaseRepository knowledgeBaseRepository; + private final RagFileRepository ragFileRepository; + private final ApplicationEventPublisher eventPublisher; + + + /** + * 创建知识库 + * + * @param request 知识库创建请求 + * @return 知识库 ID + */ + public String create(KnowledgeBaseCreateReq request) { + KnowledgeBase knowledgeBase = new KnowledgeBase(); + BeanUtils.copyProperties(request, knowledgeBase); + knowledgeBaseRepository.save(knowledgeBase); + return knowledgeBase.getId(); + } + + /** + * 更新知识库 + * + * @param knowledgeBaseId 知识库 ID + * @param request 知识库更新请求 + */ + public void update(String knowledgeBaseId, KnowledgeBaseUpdateReq request) { + KnowledgeBase knowledgeBase = Optional.ofNullable(knowledgeBaseRepository.getById(knowledgeBaseId)) + .orElseThrow(() -> BusinessException.of(KnowledgeBaseErrorCode.KNOWLEDGE_BASE_NOT_FOUND)); + if (StringUtils.hasText(request.getName())) { + knowledgeBase.setName(request.getName()); + } + if (StringUtils.hasText(request.getDescription())) { + knowledgeBase.setDescription(request.getDescription()); + } + knowledgeBaseRepository.updateById(knowledgeBase); + } + + public void delete(String knowledgeBaseId) { + knowledgeBaseRepository.removeById(knowledgeBaseId); + ragFileRepository.removeByKnowledgeBaseId(knowledgeBaseId); + // TODO: 删除知识库关联的所有文档 + } + + public KnowledgeBase getById(String knowledgeBaseId) { + return Optional.ofNullable(knowledgeBaseRepository.getById(knowledgeBaseId)) + .orElseThrow(() -> BusinessException.of(KnowledgeBaseErrorCode.KNOWLEDGE_BASE_NOT_FOUND)); + } + + public PagedResponse list(KnowledgeBaseQueryReq request) { + IPage page = new Page<>(request.getPage(), request.getSize()); + page = knowledgeBaseRepository.page(page, request); + return PagedResponse.of(page.getRecords(), page.getCurrent(), page.getTotal(), page.getPages()); + } + + @Transactional(rollbackFor = Exception.class) + public void addFiles(AddFilesReq request) { + KnowledgeBase knowledgeBase = Optional.ofNullable(knowledgeBaseRepository.getById(request.getKnowledgeBaseId())) + .orElseThrow(() -> BusinessException.of(KnowledgeBaseErrorCode.KNOWLEDGE_BASE_NOT_FOUND)); + List ragFiles = request.getFiles().stream().map(fileInfo -> { + RagFile ragFile = new RagFile(); + ragFile.setKnowledgeBaseId(knowledgeBase.getId()); + ragFile.setFileId(fileInfo.fileId()); + ragFile.setFileName(fileInfo.fileName()); + ragFile.setStatus(FileStatus.UNPROCESSED); + return ragFile; + }).toList(); + ragFileRepository.saveBatch(ragFiles, 100); + eventPublisher.publishEvent(new DataInsertedEvent(knowledgeBase.getId(), request.getProcessType())); + } + + public PagedResponse listFiles(String knowledgeBaseId, RagFileReq request) { + IPage page = new Page<>(request.getPage(), request.getSize()); + page = ragFileRepository.page(page); + return PagedResponse.of(page.getRecords(), page.getCurrent(), page.getTotal(), page.getPages()); + } + + public void deleteFile(String knowledgeBaseId, DeleteFilesReq request) { + } + + public PagedResponse getChunks(String knowledgeBaseId, String ragFileId, PagingQuery pagingQuery) { + IPage page = new Page<>(pagingQuery.getPage(), pagingQuery.getSize()); + return PagedResponse.of(page.getRecords(), page.getCurrent(), page.getTotal(), page.getPages()); + } +} \ No newline at end of file diff --git a/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/domain/model/FileStatus.java b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/domain/model/FileStatus.java new file mode 100644 index 0000000..8f3132f --- /dev/null +++ b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/domain/model/FileStatus.java @@ -0,0 +1,26 @@ +package com.datamate.rag.indexer.domain.model; + +/** + * 文件状态枚举 + * + * @author dallas + * @since 2025-10-29 + */ +public enum FileStatus { + /** + * 未处理 + */ + UNPROCESSED, + /** + * 处理中 + */ + PROCESSING, + /** + * 已处理 + */ + PROCESSED, + /** + * 处理失败 + */ + PROCESS_FAILED +} diff --git a/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/domain/model/KnowledgeBase.java b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/domain/model/KnowledgeBase.java new file mode 100644 index 0000000..4a571b3 --- /dev/null +++ b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/domain/model/KnowledgeBase.java @@ -0,0 +1,37 @@ +package com.datamate.rag.indexer.domain.model; + +import com.baomidou.mybatisplus.annotation.TableName; +import com.datamate.common.domain.model.base.BaseEntity; +import lombok.Getter; +import lombok.Setter; + +/** + * 知识库实体类 + * + * @author dallas + * @since 2025-10-24 + */ +@Getter +@Setter +@TableName("t_rag_knowledge_base") +public class KnowledgeBase extends BaseEntity { + /** + * 知识库名称 + */ + private String name; + + /** + * 知识库描述 + */ + private String description; + + /** + * 嵌入模型 + */ + private String embeddingModel; + + /** + * 聊天模型 + */ + private String chatModel; +} diff --git a/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/domain/model/RagChunk.java b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/domain/model/RagChunk.java new file mode 100644 index 0000000..6a6b884 --- /dev/null +++ b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/domain/model/RagChunk.java @@ -0,0 +1,10 @@ +package com.datamate.rag.indexer.domain.model; + +/** + * RAG 文档块实体类 + * + * @author dallas + * @since 2025-10-29 + */ +public class RagChunk { +} diff --git a/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/domain/model/RagFile.java b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/domain/model/RagFile.java new file mode 100644 index 0000000..ec0445a --- /dev/null +++ b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/domain/model/RagFile.java @@ -0,0 +1,47 @@ +package com.datamate.rag.indexer.domain.model; + + +import com.baomidou.mybatisplus.annotation.TableField; +import com.baomidou.mybatisplus.annotation.TableName; +import com.baomidou.mybatisplus.extension.handlers.JacksonTypeHandler; +import com.datamate.common.domain.model.base.BaseEntity; +import lombok.Getter; +import lombok.Setter; + +import java.util.Map; + +/** + * Rag 文件实体类 + * + * @author dallas + * @since 2025-10-24 + */ +@Getter +@Setter +@TableName("t_rag_file") +public class RagFile extends BaseEntity { + /** + * 知识库ID + */ + private String knowledgeBaseId; + /** + * 文件名 + */ + private String fileName; + /** + * 文件ID + */ + private String fileId; + /** + * 分块数量 + */ + private Integer chunkCount; + + /** + * 元数据 + */ + @TableField(typeHandler = JacksonTypeHandler.class) + private Map metadata; + + private FileStatus status; +} diff --git a/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/domain/repository/KnowledgeBaseRepository.java b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/domain/repository/KnowledgeBaseRepository.java new file mode 100644 index 0000000..273abc9 --- /dev/null +++ b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/domain/repository/KnowledgeBaseRepository.java @@ -0,0 +1,23 @@ +package com.datamate.rag.indexer.domain.repository; + +import com.baomidou.mybatisplus.core.metadata.IPage; +import com.baomidou.mybatisplus.extension.repository.IRepository; +import com.datamate.rag.indexer.domain.model.KnowledgeBase; +import com.datamate.rag.indexer.interfaces.dto.KnowledgeBaseQueryReq; + +/** + * 知识库仓储接口 + * + * @author dallas + * @since 2025-10-24 + */ +public interface KnowledgeBaseRepository extends IRepository { + /** + * 分页查询知识库 + * + * @param page 分页信息 + * @param request 查询请求 + * @return 知识库分页结果 + */ + IPage page(IPage page, KnowledgeBaseQueryReq request); +} diff --git a/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/domain/repository/RagFileRepository.java b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/domain/repository/RagFileRepository.java new file mode 100644 index 0000000..d55b2b1 --- /dev/null +++ b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/domain/repository/RagFileRepository.java @@ -0,0 +1,18 @@ +package com.datamate.rag.indexer.domain.repository; + +import com.baomidou.mybatisplus.extension.repository.IRepository; +import com.datamate.rag.indexer.domain.model.RagFile; + +import java.util.List; + +/** + * 知识库文件仓储接口 + * + * @author dallas + * @since 2025-10-24 + */ +public interface RagFileRepository extends IRepository { + void removeByKnowledgeBaseId(String knowledgeBaseId); + + List findByKnowledgeBaseId(String knowledgeBaseId); +} diff --git a/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/infrastructure/event/DataInsertedEvent.java b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/infrastructure/event/DataInsertedEvent.java new file mode 100644 index 0000000..417de1e --- /dev/null +++ b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/infrastructure/event/DataInsertedEvent.java @@ -0,0 +1,12 @@ +package com.datamate.rag.indexer.infrastructure.event; + +import com.datamate.rag.indexer.interfaces.dto.ProcessType; + +/** + * 数据插入事件 + * + * @author dallas + * @since 2025-10-29 + */ +public record DataInsertedEvent(String knowledgeBaseId, ProcessType processType) { +} diff --git a/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/infrastructure/event/RagEtlService.java b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/infrastructure/event/RagEtlService.java new file mode 100644 index 0000000..5c9979e --- /dev/null +++ b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/infrastructure/event/RagEtlService.java @@ -0,0 +1,157 @@ +package com.datamate.rag.indexer.infrastructure.event; + +import com.datamate.common.models.domain.entity.ModelConfig; +import com.datamate.common.models.domain.repository.ModelConfigRepository; +import com.datamate.common.models.infrastructure.client.ModelClient; +import com.datamate.rag.indexer.domain.model.FileStatus; +import com.datamate.rag.indexer.domain.model.RagFile; +import com.datamate.rag.indexer.domain.repository.RagFileRepository; +import com.datamate.rag.indexer.interfaces.dto.ProcessType; +import com.datamate.datamanagement.domain.model.dataset.DatasetFile; +import com.datamate.datamanagement.infrastructure.persistence.repository.DatasetFileRepository; +import dev.langchain4j.data.document.Document; +import dev.langchain4j.data.document.DocumentParser; +import dev.langchain4j.data.document.DocumentSplitter; +import dev.langchain4j.data.document.loader.FileSystemDocumentLoader; +import dev.langchain4j.data.document.parser.TextDocumentParser; +import dev.langchain4j.data.document.parser.apache.pdfbox.ApachePdfBoxDocumentParser; +import dev.langchain4j.data.document.parser.apache.poi.ApachePoiDocumentParser; +import dev.langchain4j.data.document.parser.apache.tika.ApacheTikaDocumentParser; +import dev.langchain4j.data.document.parser.markdown.MarkdownDocumentParser; +import dev.langchain4j.data.document.splitter.*; +import dev.langchain4j.data.document.transformer.jsoup.HtmlToTextDocumentTransformer; +import dev.langchain4j.data.embedding.Embedding; +import dev.langchain4j.data.segment.TextSegment; +import dev.langchain4j.model.embedding.EmbeddingModel; +import dev.langchain4j.model.output.Response; +import dev.langchain4j.store.embedding.EmbeddingStore; +import dev.langchain4j.store.embedding.milvus.MilvusEmbeddingStore; +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; +import org.jetbrains.annotations.NotNull; +import org.springframework.scheduling.annotation.Async; +import org.springframework.stereotype.Service; +import org.springframework.transaction.event.TransactionPhase; +import org.springframework.transaction.event.TransactionalEventListener; + +import java.util.Arrays; +import java.util.List; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Semaphore; + +/** + * RAG ETL服务 + * + * @author dallas + * @since 2025-10-29 + */ +@Slf4j +@Service +@RequiredArgsConstructor +public class RagEtlService { + private static final Semaphore SEMAPHORE = new Semaphore(10); + + private final RagFileRepository ragFileRepository; + + private final DatasetFileRepository datasetFileRepository; + + private final ModelConfigRepository modelConfigRepository; + + private final ExecutorService executor = Executors.newVirtualThreadPerTaskExecutor(); + + @Async + @TransactionalEventListener(phase = TransactionPhase.AFTER_COMMIT) + public void processAfterCommit(DataInsertedEvent event) { + // 执行 RAG 处理流水线 + List ragFiles = ragFileRepository.findByKnowledgeBaseId(event.knowledgeBaseId()); + + ragFiles.forEach(ragFile -> { + try { + SEMAPHORE.acquire(); + executor.submit(() -> { + try { + // 执行 RAG 处理流水线 + ragFile.setStatus(FileStatus.PROCESSING); + ragFileRepository.updateById(ragFile); + processRagFile(ragFile, event.processType()); + // 更新文件状态为已处理 + ragFile.setStatus(FileStatus.PROCESSED); + ragFileRepository.updateById(ragFile); + } catch (Exception e) { + // 处理异常 + ragFile.setStatus(FileStatus.PROCESS_FAILED); + ragFileRepository.updateById(ragFile); + } finally { + SEMAPHORE.release(); + } + }); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + } + } + ); + } + + private void processRagFile(RagFile ragFile, ProcessType processType) { + DatasetFile file = datasetFileRepository.getById(ragFile.getFileId()); + // 使用文档解析器解析文档 + DocumentParser parser = documentParser(file.getFileType()); + // 从文件系统读取文档 + Document document = FileSystemDocumentLoader.loadDocument(file.getFilePath(), parser); + // 对html文档进行转换 + if (Arrays.asList("html", "htm").contains(file.getFileType().toLowerCase())) { + document= new HtmlToTextDocumentTransformer().transform(document); + } + // 使用文档分块器对文档进行分块 + DocumentSplitter splitter = documentSplitter(processType); + List split = splitter.split(document); + + // 更新分块数量 + ragFile.setChunkCount(split.size()); + ragFileRepository.updateById(ragFile); + + // 调用模型客户端获取嵌入模型 + ModelConfig model = modelConfigRepository.getById("1"); + EmbeddingModel embeddingModel = ModelClient.invokeEmbeddingModel(model); + // 调用嵌入模型获取嵌入向量 + Response<@NotNull List> response = embeddingModel.embedAll(split); + // 存储嵌入向量到 Milvus + embeddingStore().addAll(response.content(), split); + } + + /** + * 根据文件类型返回对应的文档解析器 + * + * @param fileType 文件类型 + * @return 文档解析器 + */ + public DocumentParser documentParser(String fileType) { + fileType = fileType.toLowerCase(); + return switch (fileType) { + case "txt", "html", "htm" -> new TextDocumentParser(); + case "md" -> new MarkdownDocumentParser(); + case "pdf" -> new ApachePdfBoxDocumentParser(); + case "doc", "docx", "xls", "xlsx", "ppt", "pptx" -> new ApachePoiDocumentParser(); + default -> new ApacheTikaDocumentParser(); + }; + } + + public DocumentSplitter documentSplitter(ProcessType processType) { + return switch (processType) { + case CHAPTER_CHUNK -> new DocumentByParagraphSplitter(1000, 100); + case PARAGRAPH_CHUNK -> new DocumentByLineSplitter(1000, 100); + case LENGTH_CHUNK -> new DocumentBySentenceSplitter(1000, 100); + case CUSTOM_SEPARATOR_CHUNK -> new DocumentByWordSplitter(1000, 100); + case DEFAULT_CHUNK -> new DocumentByRegexSplitter("\\n\\n", "",1000, 100); + }; + } + + public EmbeddingStore embeddingStore() { + return MilvusEmbeddingStore.builder() + .uri("http://milvus:19530") + .collectionName("rag_embeddings") + .dimension(1536) + .build(); + } +} diff --git a/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/infrastructure/persistence/impl/KnowledgeBaseRepositoryImpl.java b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/infrastructure/persistence/impl/KnowledgeBaseRepositoryImpl.java new file mode 100644 index 0000000..c186bac --- /dev/null +++ b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/infrastructure/persistence/impl/KnowledgeBaseRepositoryImpl.java @@ -0,0 +1,31 @@ +package com.datamate.rag.indexer.infrastructure.persistence.impl; + +import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper; +import com.baomidou.mybatisplus.core.metadata.IPage; +import com.baomidou.mybatisplus.extension.repository.CrudRepository; +import com.datamate.rag.indexer.domain.model.KnowledgeBase; +import com.datamate.rag.indexer.domain.repository.KnowledgeBaseRepository; +import com.datamate.rag.indexer.infrastructure.persistence.mapper.KnowledgeBaseMapper; +import com.datamate.rag.indexer.interfaces.dto.KnowledgeBaseQueryReq; +import org.springframework.stereotype.Repository; +import org.springframework.util.StringUtils; + +/** + * 知识库仓储实现类 + * + * @author dallas + * @since 2025-10-24 + */ +@Repository +public class KnowledgeBaseRepositoryImpl extends CrudRepository implements KnowledgeBaseRepository { + + @Override + public IPage page(IPage page, KnowledgeBaseQueryReq request) { + return this.page(page, new LambdaQueryWrapper() + .like(StringUtils.hasText(request.getName()), KnowledgeBase::getName, request.getName()) + .like(StringUtils.hasText(request.getDescription()), KnowledgeBase::getDescription, request.getDescription()) + .like(StringUtils.hasText(request.getCreatedBy()), KnowledgeBase::getCreatedBy, request.getCreatedBy()) + .like(StringUtils.hasText(request.getUpdatedBy()), KnowledgeBase::getUpdatedBy, request.getUpdatedBy()) + .orderByDesc(KnowledgeBase::getCreatedAt)); + } +} diff --git a/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/infrastructure/persistence/impl/RagFileRepositoryImpl.java b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/infrastructure/persistence/impl/RagFileRepositoryImpl.java new file mode 100644 index 0000000..0e0c098 --- /dev/null +++ b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/infrastructure/persistence/impl/RagFileRepositoryImpl.java @@ -0,0 +1,32 @@ +package com.datamate.rag.indexer.infrastructure.persistence.impl; + +import com.baomidou.mybatisplus.extension.repository.CrudRepository; +import com.datamate.rag.indexer.domain.model.FileStatus; +import com.datamate.rag.indexer.domain.model.RagFile; +import com.datamate.rag.indexer.domain.repository.RagFileRepository; +import com.datamate.rag.indexer.infrastructure.persistence.mapper.RagFileMapper; +import org.springframework.stereotype.Repository; + +import java.util.List; + +/** + * 知识库文件仓储实现类 + * + * @author dallas + * @since 2025-10-24 + */ +@Repository +public class RagFileRepositoryImpl extends CrudRepository implements RagFileRepository { + @Override + public void removeByKnowledgeBaseId(String knowledgeBaseId) { + lambdaUpdate().eq(RagFile::getKnowledgeBaseId, knowledgeBaseId).remove(); + } + + @Override + public List findByKnowledgeBaseId(String knowledgeBaseId) { + return lambdaQuery() + .eq(RagFile::getKnowledgeBaseId, knowledgeBaseId) + .in(RagFile::getStatus, FileStatus.UNPROCESSED, FileStatus.PROCESS_FAILED) + .list(); + } +} diff --git a/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/infrastructure/persistence/mapper/KnowledgeBaseMapper.java b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/infrastructure/persistence/mapper/KnowledgeBaseMapper.java new file mode 100644 index 0000000..cf1525f --- /dev/null +++ b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/infrastructure/persistence/mapper/KnowledgeBaseMapper.java @@ -0,0 +1,16 @@ +package com.datamate.rag.indexer.infrastructure.persistence.mapper; + + +import com.baomidou.mybatisplus.core.mapper.BaseMapper; +import com.datamate.rag.indexer.domain.model.KnowledgeBase; +import org.apache.ibatis.annotations.Mapper; + +/** + * 知识库映射器接口 + * + * @author dallas + * @since 2025-10-24 + */ +@Mapper +public interface KnowledgeBaseMapper extends BaseMapper { +} diff --git a/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/infrastructure/persistence/mapper/RagFileMapper.java b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/infrastructure/persistence/mapper/RagFileMapper.java new file mode 100644 index 0000000..e0f233f --- /dev/null +++ b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/infrastructure/persistence/mapper/RagFileMapper.java @@ -0,0 +1,16 @@ +package com.datamate.rag.indexer.infrastructure.persistence.mapper; + + +import com.baomidou.mybatisplus.core.mapper.BaseMapper; +import com.datamate.rag.indexer.domain.model.RagFile; +import org.apache.ibatis.annotations.Mapper; + +/** + * RAG文件映射器接口 + * + * @author dallas + * @since 2025-10-24 + */ +@Mapper +public interface RagFileMapper extends BaseMapper { +} diff --git a/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/interfaces/EmbeddingController.java b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/interfaces/EmbeddingController.java new file mode 100644 index 0000000..06963dc --- /dev/null +++ b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/interfaces/EmbeddingController.java @@ -0,0 +1,8 @@ +package com.datamate.rag.indexer.interfaces; + +import org.springframework.web.bind.annotation.RestController; + +@RestController +public class EmbeddingController { + +} diff --git a/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/interfaces/KnowledgeBaseController.java b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/interfaces/KnowledgeBaseController.java new file mode 100644 index 0000000..d0ed099 --- /dev/null +++ b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/interfaces/KnowledgeBaseController.java @@ -0,0 +1,137 @@ +package com.datamate.rag.indexer.interfaces; + +import com.datamate.rag.indexer.application.KnowledgeBaseService; +import com.datamate.rag.indexer.domain.model.KnowledgeBase; +import com.datamate.rag.indexer.domain.model.RagChunk; +import com.datamate.rag.indexer.domain.model.RagFile; +import com.datamate.common.infrastructure.common.Response; +import com.datamate.common.interfaces.PagedResponse; +import com.datamate.common.interfaces.PagingQuery; +import com.datamate.rag.indexer.interfaces.dto.*; +import lombok.RequiredArgsConstructor; +import org.springframework.web.bind.annotation.*; + +import javax.validation.Valid; + +/** + * 知识库控制器 + * + * @author dallas + * @since 2025-09-30 + */ +@RestController +@RequiredArgsConstructor +@RequestMapping("/v1/knowledge-base") +public class KnowledgeBaseController { + private final KnowledgeBaseService knowledgeBaseService; + + @GetMapping(path = "/test1") + public String test() { + return "test1"; + } + + /** + * 创建知识库 + * + * @param request 知识库创建请求 + * @return 知识库 ID + */ + @PostMapping("/create") + public String create(@RequestBody @Valid KnowledgeBaseCreateReq request) { + return knowledgeBaseService.create(request); + } + + /** + * 更新知识库 + * + * @param knowledgeBaseId 知识库 ID + * @param request 知识库更新请求 + */ + @PutMapping("/{knowledgeBaseId}") + public void update(@PathVariable("knowledgeBaseId") String knowledgeBaseId, + @RequestBody @Valid KnowledgeBaseUpdateReq request) { + knowledgeBaseService.update(knowledgeBaseId, request); + } + + /** + * 删除知识库 + * + * @param knowledgeBaseId 知识库 ID + */ + @DeleteMapping("/{knowledgeBaseId}") + public void delete(@PathVariable("knowledgeBaseId") String knowledgeBaseId) { + knowledgeBaseService.delete(knowledgeBaseId); + } + + /** + * 获取知识库 + * + * @param knowledgeBaseId 知识库 ID + * @return 知识库 + */ + @GetMapping("/{knowledgeBaseId}") + public KnowledgeBase get(@PathVariable("knowledgeBaseId") String knowledgeBaseId) { + return knowledgeBaseService.getById(knowledgeBaseId); + } + + /** + * 获取知识库列表 + * + * @return 知识库列表 + */ + @PostMapping("/list") + public PagedResponse list(@RequestBody @Valid KnowledgeBaseQueryReq request) { + return knowledgeBaseService.list(request); + } + + /** + * 添加文件到知识库 + * + * @param knowledgeBaseId 知识库 ID + * @param request 添加文件请求 + */ + @PostMapping("/{knowledgeBaseId}/files") + public void addFiles(@PathVariable("knowledgeBaseId") String knowledgeBaseId, + @RequestBody @Valid AddFilesReq request) { + request.setKnowledgeBaseId(knowledgeBaseId); + knowledgeBaseService.addFiles(request); + } + + /** + * 获取知识库文件列表 + * + * @param knowledgeBaseId 知识库 ID + * @return 知识库文件列表 + */ + @GetMapping("/{knowledgeBaseId}/files") + public PagedResponse listFiles(@PathVariable("knowledgeBaseId") String knowledgeBaseId, + @RequestBody @Valid RagFileReq request) { + return knowledgeBaseService.listFiles(knowledgeBaseId, request); + } + + /** + * 删除知识库文件 + * + * @param knowledgeBaseId 知识库 ID + * @param request 删除文件请求 + */ + @DeleteMapping("/{knowledgeBaseId}/files") + public void deleteFile(@PathVariable("knowledgeBaseId") String knowledgeBaseId, + @RequestBody DeleteFilesReq request) { + knowledgeBaseService.deleteFile(knowledgeBaseId, request); + } + + /** + * 知识库文件详情 + * + * @param knowledgeBaseId 知识库 ID + * @param ragFileId 文件 ID + * @return 文件详情 + */ + @GetMapping("/{knowledgeBaseId}/files/{ragFileId}") + public PagedResponse getChunks(@PathVariable("knowledgeBaseId") String knowledgeBaseId, + @PathVariable("ragFileId") String ragFileId, + PagingQuery pagingQuery) { + return knowledgeBaseService.getChunks(knowledgeBaseId, ragFileId, pagingQuery); + } +} diff --git a/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/interfaces/dto/AddFilesReq.java b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/interfaces/dto/AddFilesReq.java new file mode 100644 index 0000000..52568a2 --- /dev/null +++ b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/interfaces/dto/AddFilesReq.java @@ -0,0 +1,23 @@ +package com.datamate.rag.indexer.interfaces.dto; + +import lombok.Getter; +import lombok.Setter; + +import java.util.List; + +/** + * 添加文件请求 + * + * @author dallas + * @since 2025-10-29 + */ +@Getter +@Setter +public class AddFilesReq { + private String knowledgeBaseId; + private ProcessType processType; + private List files; + + public record FileInfo(String fileId, String fileName) { + } +} diff --git a/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/interfaces/dto/DeleteFilesReq.java b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/interfaces/dto/DeleteFilesReq.java new file mode 100644 index 0000000..0837d4e --- /dev/null +++ b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/interfaces/dto/DeleteFilesReq.java @@ -0,0 +1,13 @@ +package com.datamate.rag.indexer.interfaces.dto; + +import java.util.List; + +/** + * 删除文件请求 + * + * @author dallas + * @since 2025-10-29 + */ +public class DeleteFilesReq { + private List fileIds; +} diff --git a/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/interfaces/dto/KnowledgeBaseCreateReq.java b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/interfaces/dto/KnowledgeBaseCreateReq.java new file mode 100644 index 0000000..c3df78c --- /dev/null +++ b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/interfaces/dto/KnowledgeBaseCreateReq.java @@ -0,0 +1,41 @@ +package com.datamate.rag.indexer.interfaces.dto; + +import jakarta.validation.constraints.NotEmpty; +import jakarta.validation.constraints.Pattern; +import jakarta.validation.constraints.Size; +import lombok.Getter; +import lombok.Setter; + +/** + * 知识库创建请求 + * + * @author dallas + * @since 2025-10-24 + */ +@Setter +@Getter +public class KnowledgeBaseCreateReq { + /** + * 知识库名称 + */ + @NotEmpty(message = "知识库名称不能为空") + @Size(min = 1, max = 255, message = "知识库名称长度必须在 1 到 255 之间") + @Pattern(regexp = "^[a-zA-Z0-9_]+$", message = "知识库名称只能包含字母、数字和下划线") + private String name; + /** + * 知识库描述 + */ + @Size(min = 1, max = 512, message = "知识库描述长度必须在 1 到 512 之间") + private String description; + + /** + * 嵌入模型 + */ + @NotEmpty(message = "嵌入模型不能为空") + private String embeddingModel; + + /** + * 聊天模型 + */ + private String chatModel; +} diff --git a/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/interfaces/dto/KnowledgeBaseQueryReq.java b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/interfaces/dto/KnowledgeBaseQueryReq.java new file mode 100644 index 0000000..a2e7147 --- /dev/null +++ b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/interfaces/dto/KnowledgeBaseQueryReq.java @@ -0,0 +1,24 @@ +package com.datamate.rag.indexer.interfaces.dto; + +import com.datamate.common.interfaces.PagingQuery; +import lombok.Getter; +import lombok.Setter; + +import java.time.LocalDateTime; + +/** + * + * + * @author dallas + * @since 2025-10-29 + */ +@Setter +@Getter +public class KnowledgeBaseQueryReq extends PagingQuery { + private String name; + private String description; + private LocalDateTime createdAt; + private LocalDateTime updatedAt; + private String createdBy; + private String updatedBy; +} diff --git a/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/interfaces/dto/KnowledgeBaseUpdateReq.java b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/interfaces/dto/KnowledgeBaseUpdateReq.java new file mode 100644 index 0000000..a115654 --- /dev/null +++ b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/interfaces/dto/KnowledgeBaseUpdateReq.java @@ -0,0 +1,30 @@ +package com.datamate.rag.indexer.interfaces.dto; + +import jakarta.validation.constraints.NotEmpty; +import jakarta.validation.constraints.Pattern; +import jakarta.validation.constraints.Size; +import lombok.Getter; +import lombok.Setter; + +/** + * 知识库更新请求 + * + * @author dallas + * @since 2025-10-24 + */ +@Getter +@Setter +public class KnowledgeBaseUpdateReq { + /** + * 知识库名称 + */ + @NotEmpty(message = "知识库名称不能为空") + @Size(min = 1, max = 255, message = "知识库名称长度必须在 1 到 255 之间") + @Pattern(regexp = "^[a-zA-Z0-9_]+$", message = "知识库名称只能包含字母、数字和下划线") + private String name; + /** + * 知识库描述 + */ + @Size(min = 1, max = 512, message = "知识库描述长度必须在 1 到 512 之间") + private String description; +} diff --git a/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/interfaces/dto/ProcessType.java b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/interfaces/dto/ProcessType.java new file mode 100644 index 0000000..7301163 --- /dev/null +++ b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/interfaces/dto/ProcessType.java @@ -0,0 +1,33 @@ +package com.datamate.rag.indexer.interfaces.dto; + +/** + * 分块处理类型 + * + * @author dallas + * @since 2025-10-29 + */ +public enum ProcessType { + /** + * 章节分块 + */ + CHAPTER_CHUNK, + /** + * 段落分块 + */ + PARAGRAPH_CHUNK, + + /** + * 按长度分块 + */ + LENGTH_CHUNK, + + /** + * 自定义分割符分块 + */ + CUSTOM_SEPARATOR_CHUNK, + + /** + * 默认分块 + */ + DEFAULT_CHUNK, +} diff --git a/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/interfaces/dto/RagFileReq.java b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/interfaces/dto/RagFileReq.java new file mode 100644 index 0000000..a26f1b0 --- /dev/null +++ b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/interfaces/dto/RagFileReq.java @@ -0,0 +1,13 @@ +package com.datamate.rag.indexer.interfaces.dto; + +import com.datamate.common.interfaces.PagingQuery; + +/** + * RAG 文件请求 + * + * @author dallas + * @since 2025-10-29 + */ +public class RagFileReq extends PagingQuery { + private String fileName; +} diff --git a/backend/shared/domain-common/src/main/java/com/datamate/common/interfaces/PagedResponse.java b/backend/shared/domain-common/src/main/java/com/datamate/common/interfaces/PagedResponse.java index 7d87328..4a9647a 100644 --- a/backend/shared/domain-common/src/main/java/com/datamate/common/interfaces/PagedResponse.java +++ b/backend/shared/domain-common/src/main/java/com/datamate/common/interfaces/PagedResponse.java @@ -12,7 +12,9 @@ import java.util.List; @NoArgsConstructor @AllArgsConstructor public class PagedResponse { + // 当前页码(从 0 开始) private long page; + // 每页数量 private long size; private long totalElements; private long totalPages; diff --git a/backend/shared/domain-common/src/main/java/com/datamate/common/interfaces/PagingQuery.java b/backend/shared/domain-common/src/main/java/com/datamate/common/interfaces/PagingQuery.java index 798075f..5c646dd 100644 --- a/backend/shared/domain-common/src/main/java/com/datamate/common/interfaces/PagingQuery.java +++ b/backend/shared/domain-common/src/main/java/com/datamate/common/interfaces/PagingQuery.java @@ -1,14 +1,8 @@ package com.datamate.common.interfaces; -import lombok.AllArgsConstructor; import lombok.Getter; -import lombok.NoArgsConstructor; -import lombok.Setter; @Getter -@Setter -@NoArgsConstructor -@AllArgsConstructor public class PagingQuery { /** * 页码,从0开始 @@ -19,4 +13,19 @@ public class PagingQuery { * 每页大小 */ private Integer size = 20; + + public void setPage(Integer page) { + if (page == null || page < 0) { + this.page = 0; + } else { + this.page = page; + } + } + public void setSize(Integer size) { + if (size == null || size <= 0) { + this.size = 20; + } else { + this.size = size; + } + } } diff --git a/deployment/docker/datamate/docker-compose.yml b/deployment/docker/datamate/docker-compose.yml index 98529fd..ba21622 100644 --- a/deployment/docker/datamate/docker-compose.yml +++ b/deployment/docker/datamate/docker-compose.yml @@ -113,3 +113,4 @@ volumes: networks: datamate: driver: bridge + name: datamate-network diff --git a/deployment/docker/deer-flow/docker-compose.yml b/deployment/docker/deer-flow/docker-compose.yml index 00069fa..03853e3 100644 --- a/deployment/docker/deer-flow/docker-compose.yml +++ b/deployment/docker/deer-flow/docker-compose.yml @@ -24,5 +24,5 @@ services: networks: datamate: driver: bridge - name: datamate_datamate + name: datamate-network external: true diff --git a/deployment/docker/milvus/docker-compose.yml b/deployment/docker/milvus/docker-compose.yml new file mode 100644 index 0000000..6396068 --- /dev/null +++ b/deployment/docker/milvus/docker-compose.yml @@ -0,0 +1,74 @@ +version: '3.5' + +services: + etcd: + container_name: milvus-etcd + image: quay.io/coreos/etcd:v3.5.18 + environment: + - ETCD_AUTO_COMPACTION_MODE=revision + - ETCD_AUTO_COMPACTION_RETENTION=1000 + - ETCD_QUOTA_BACKEND_BYTES=4294967296 + - ETCD_SNAPSHOT_COUNT=50000 + volumes: + - ${DOCKER_VOLUME_DIRECTORY:-.}/volumes/etcd:/etcd + command: etcd -advertise-client-urls=http://etcd:2379 -listen-client-urls http://0.0.0.0:2379 --data-dir /etcd + networks: + - datamate + healthcheck: + test: ["CMD", "etcdctl", "endpoint", "health"] + interval: 30s + timeout: 20s + retries: 3 + + minio: + container_name: milvus-minio + image: minio/minio:RELEASE.2024-12-18T13-15-44Z + environment: + MINIO_ACCESS_KEY: minioadmin + MINIO_SECRET_KEY: minioadmin + ports: + - "9001:9001" + - "9000:9000" + volumes: + - ${DOCKER_VOLUME_DIRECTORY:-.}/volumes/minio:/minio_data + command: minio server /minio_data --console-address ":9001" + networks: + - datamate + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:9000/minio/health/live"] + interval: 30s + timeout: 20s + retries: 3 + + standalone: + container_name: milvus-standalone + image: milvusdb/milvus:v2.6.2 + command: ["milvus", "run", "standalone"] + security_opt: + - seccomp:unconfined + environment: + ETCD_ENDPOINTS: etcd:2379 + MINIO_ADDRESS: minio:9000 + MQ_TYPE: woodpecker + volumes: + - ${DOCKER_VOLUME_DIRECTORY:-.}/volumes/milvus:/var/lib/milvus + networks: + - datamate + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:9091/healthz"] + interval: 30s + start_period: 90s + timeout: 20s + retries: 3 + ports: + - "19530:19530" + - "9091:9091" + depends_on: + - "etcd" + - "minio" + +networks: + datamate: + name: datamate-network + external: true + driver: bridge diff --git a/scripts/db/rag-management-init.sql b/scripts/db/rag-management-init.sql new file mode 100644 index 0000000..0e429ae --- /dev/null +++ b/scripts/db/rag-management-init.sql @@ -0,0 +1,29 @@ +USE datamate; + +create table if not exists t_rag_knowledge_base +( + id VARCHAR(36) PRIMARY KEY COMMENT 'UUID', + name VARCHAR(255) NOT NULL COMMENT '知识库名称', + description VARCHAR(512) NULL COMMENT '知识库描述', + embedding_model VARCHAR(255) NOT NULL COMMENT '嵌入模型', + chat_model VARCHAR(255) NULL COMMENT '聊天模型', + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间', + updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间', + created_by VARCHAR(255) COMMENT '创建者', + updated_by VARCHAR(255) COMMENT '更新者' +) comment '知识库表'; + +create table if not exists t_rag_file +( + id VARCHAR(36) PRIMARY KEY COMMENT 'UUID', + knowledge_base_id VARCHAR(36) NOT NULL COMMENT '知识库ID', + file_name VARCHAR(255) NOT NULL COMMENT '文件名', + file_id VARCHAR(255) NOT NULL COMMENT '文件ID', + chunk_count INT COMMENT '切片数', + metadata JSON COMMENT '元数据', + status VARCHAR(50) COMMENT '文件状态', + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间', + updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间', + created_by VARCHAR(255) COMMENT '创建者', + updated_by VARCHAR(255) COMMENT '更新者' +) comment '知识库切片表';