From 145c154d1fed5f435972e76b4f458db3c8be09f6 Mon Sep 17 00:00:00 2001 From: Dallas98 <40557804+Dallas98@users.noreply.github.com> Date: Mon, 17 Nov 2025 17:36:09 +0800 Subject: [PATCH] feat: Integrate Milvus service for enhanced knowledge base management and file deletion (#88) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat: Refactor system parameter management with new data structure and update logic * fix: 修复知识库相关问题 --- .../application/KnowledgeBaseService.java | 38 ++++++++++++++++--- .../domain/repository/RagFileRepository.java | 8 +++- .../infrastructure/event/RagEtlService.java | 22 +++++++---- .../impl/RagFileRepositoryImpl.java | 20 +++++++++- .../interfaces/KnowledgeBaseController.java | 13 +++---- .../interfaces/dto/KnowledgeBaseResp.java | 21 ++++++++++ .../indexer/interfaces/dto/RagFileReq.java | 5 +++ .../KnowledgeBase/Home/KnowledgeBasePage.tsx | 18 +-------- .../components/CreateKnowledgeBase.tsx | 3 +- .../KnowledgeBase/knowledge-base.const.tsx | 36 +++++++++--------- .../KnowledgeBase/knowledge-base.model.ts | 4 ++ 11 files changed, 133 insertions(+), 55 deletions(-) create mode 100644 backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/interfaces/dto/KnowledgeBaseResp.java diff --git a/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/application/KnowledgeBaseService.java b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/application/KnowledgeBaseService.java index 1d97a84..858aca4 100644 --- a/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/application/KnowledgeBaseService.java +++ b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/application/KnowledgeBaseService.java @@ -2,6 +2,7 @@ package com.datamate.rag.indexer.application; import com.baomidou.mybatisplus.core.metadata.IPage; import com.baomidou.mybatisplus.extension.plugins.pagination.Page; +import com.datamate.common.setting.domain.repository.ModelConfigRepository; import com.datamate.rag.indexer.domain.model.FileStatus; import com.datamate.rag.indexer.domain.model.KnowledgeBase; import com.datamate.rag.indexer.domain.model.RagChunk; @@ -15,6 +16,7 @@ import com.datamate.common.interfaces.PagedResponse; import com.datamate.common.interfaces.PagingQuery; import com.datamate.rag.indexer.interfaces.dto.*; import lombok.RequiredArgsConstructor; +import org.jetbrains.annotations.NotNull; import org.springframework.beans.BeanUtils; import org.springframework.context.ApplicationEventPublisher; import org.springframework.stereotype.Service; @@ -36,6 +38,7 @@ public class KnowledgeBaseService { private final KnowledgeBaseRepository knowledgeBaseRepository; private final RagFileRepository ragFileRepository; private final ApplicationEventPublisher eventPublisher; + private final ModelConfigRepository modelConfigRepository; /** @@ -75,15 +78,39 @@ public class KnowledgeBaseService { // TODO: 删除知识库关联的所有文档 } - public KnowledgeBase getById(String knowledgeBaseId) { - return Optional.ofNullable(knowledgeBaseRepository.getById(knowledgeBaseId)) + public KnowledgeBaseResp getById(String knowledgeBaseId) { + KnowledgeBase knowledgeBase = Optional.ofNullable(knowledgeBaseRepository.getById(knowledgeBaseId)) .orElseThrow(() -> BusinessException.of(KnowledgeBaseErrorCode.KNOWLEDGE_BASE_NOT_FOUND)); + KnowledgeBaseResp resp = getKnowledgeBaseResp(knowledgeBase); + resp.setEmbedding(modelConfigRepository.getById(knowledgeBase.getEmbeddingModel())); + resp.setChat(modelConfigRepository.getById(knowledgeBase.getChatModel())); + return resp; } - public PagedResponse list(KnowledgeBaseQueryReq request) { + @NotNull + private KnowledgeBaseResp getKnowledgeBaseResp(KnowledgeBase knowledgeBase) { + KnowledgeBaseResp resp = new KnowledgeBaseResp(); + BeanUtils.copyProperties(knowledgeBase, resp); + + // 获取该知识库的所有文件 + List files = ragFileRepository.findAllByKnowledgeBaseId(knowledgeBase.getId()); + resp.setFileCount((long) files.size()); + + // 计算分片总数 + long totalChunkCount = files.stream() + .mapToLong(file -> file.getChunkCount() != null ? file.getChunkCount() : 0) + .sum(); + resp.setChunkCount(totalChunkCount); + return resp; + } + + public PagedResponse list(KnowledgeBaseQueryReq request) { IPage page = new Page<>(request.getPage(), request.getSize()); page = knowledgeBaseRepository.page(page, request); - return PagedResponse.of(page.getRecords(), page.getCurrent(), page.getTotal(), page.getPages()); + + // 将 KnowledgeBase 转换为 KnowledgeBaseResp,并计算 fileCount 和 chunkCount + List respList = page.getRecords().stream().map(this::getKnowledgeBaseResp).toList(); + return PagedResponse.of(respList, page.getCurrent(), page.getTotal(), page.getPages()); } @Transactional(rollbackFor = Exception.class) @@ -104,7 +131,8 @@ public class KnowledgeBaseService { public PagedResponse listFiles(String knowledgeBaseId, RagFileReq request) { IPage page = new Page<>(request.getPage(), request.getSize()); - page = ragFileRepository.page(page); + request.setKnowledgeBaseId(knowledgeBaseId); + page = ragFileRepository.page(page, request); return PagedResponse.of(page.getRecords(), page.getCurrent(), page.getTotal(), page.getPages()); } diff --git a/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/domain/repository/RagFileRepository.java b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/domain/repository/RagFileRepository.java index d55b2b1..217f206 100644 --- a/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/domain/repository/RagFileRepository.java +++ b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/domain/repository/RagFileRepository.java @@ -1,7 +1,9 @@ package com.datamate.rag.indexer.domain.repository; +import com.baomidou.mybatisplus.core.metadata.IPage; import com.baomidou.mybatisplus.extension.repository.IRepository; import com.datamate.rag.indexer.domain.model.RagFile; +import com.datamate.rag.indexer.interfaces.dto.RagFileReq; import java.util.List; @@ -14,5 +16,9 @@ import java.util.List; public interface RagFileRepository extends IRepository { void removeByKnowledgeBaseId(String knowledgeBaseId); - List findByKnowledgeBaseId(String knowledgeBaseId); + List findNotSuccessByKnowledgeBaseId(String knowledgeBaseId); + + List findAllByKnowledgeBaseId(String knowledgeBaseId); + + IPage page(IPage page, RagFileReq request); } diff --git a/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/infrastructure/event/RagEtlService.java b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/infrastructure/event/RagEtlService.java index fc1743e..bfe5d53 100644 --- a/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/infrastructure/event/RagEtlService.java +++ b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/infrastructure/event/RagEtlService.java @@ -3,12 +3,13 @@ package com.datamate.rag.indexer.infrastructure.event; import com.datamate.common.setting.domain.entity.ModelConfig; import com.datamate.common.setting.domain.repository.ModelConfigRepository; import com.datamate.common.setting.infrastructure.client.ModelClient; +import com.datamate.datamanagement.domain.model.dataset.DatasetFile; +import com.datamate.datamanagement.infrastructure.persistence.repository.DatasetFileRepository; import com.datamate.rag.indexer.domain.model.FileStatus; import com.datamate.rag.indexer.domain.model.RagFile; import com.datamate.rag.indexer.domain.repository.RagFileRepository; import com.datamate.rag.indexer.interfaces.dto.ProcessType; -import com.datamate.datamanagement.domain.model.dataset.DatasetFile; -import com.datamate.datamanagement.infrastructure.persistence.repository.DatasetFileRepository; +import com.google.common.collect.Lists; import dev.langchain4j.data.document.Document; import dev.langchain4j.data.document.DocumentParser; import dev.langchain4j.data.document.DocumentSplitter; @@ -18,7 +19,10 @@ import dev.langchain4j.data.document.parser.apache.pdfbox.ApachePdfBoxDocumentPa import dev.langchain4j.data.document.parser.apache.poi.ApachePoiDocumentParser; import dev.langchain4j.data.document.parser.apache.tika.ApacheTikaDocumentParser; import dev.langchain4j.data.document.parser.markdown.MarkdownDocumentParser; -import dev.langchain4j.data.document.splitter.*; +import dev.langchain4j.data.document.splitter.DocumentByLineSplitter; +import dev.langchain4j.data.document.splitter.DocumentByParagraphSplitter; +import dev.langchain4j.data.document.splitter.DocumentBySentenceSplitter; +import dev.langchain4j.data.document.splitter.DocumentByWordSplitter; import dev.langchain4j.data.document.transformer.jsoup.HtmlToTextDocumentTransformer; import dev.langchain4j.data.embedding.Embedding; import dev.langchain4j.data.segment.TextSegment; @@ -68,7 +72,7 @@ public class RagEtlService { @TransactionalEventListener(phase = TransactionPhase.AFTER_COMMIT) public void processAfterCommit(DataInsertedEvent event) { // 执行 RAG 处理流水线 - List ragFiles = ragFileRepository.findByKnowledgeBaseId(event.knowledgeBase().getId()); + List ragFiles = ragFileRepository.findNotSuccessByKnowledgeBaseId(event.knowledgeBase().getId()); ragFiles.forEach(ragFile -> { try { @@ -108,6 +112,7 @@ public class RagEtlService { if (Arrays.asList("html", "htm").contains(file.getFileType().toLowerCase())) { document = new HtmlToTextDocumentTransformer().transform(document); } + document.metadata().put("fileId", ragFile.getFileId()); // 使用文档分块器对文档进行分块 DocumentSplitter splitter = documentSplitter(event.addFilesReq().getProcessType()); List split = splitter.split(document); @@ -120,9 +125,12 @@ public class RagEtlService { ModelConfig model = modelConfigRepository.getById(event.knowledgeBase().getEmbeddingModel()); EmbeddingModel embeddingModel = ModelClient.invokeEmbeddingModel(model); // 调用嵌入模型获取嵌入向量 - List content = embeddingModel.embedAll(split).content(); - // 存储嵌入向量到 Milvus - embeddingStore(embeddingModel, event.knowledgeBase().getName()).addAll(content, split); + + Lists.partition(split, 20).forEach(partition -> { + List content = embeddingModel.embedAll(partition).content(); + // 存储嵌入向量到 Milvus + embeddingStore(embeddingModel, event.knowledgeBase().getName()).addAll(content, partition); + }); } /** diff --git a/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/infrastructure/persistence/impl/RagFileRepositoryImpl.java b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/infrastructure/persistence/impl/RagFileRepositoryImpl.java index 0e0c098..a6afa9e 100644 --- a/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/infrastructure/persistence/impl/RagFileRepositoryImpl.java +++ b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/infrastructure/persistence/impl/RagFileRepositoryImpl.java @@ -1,11 +1,14 @@ package com.datamate.rag.indexer.infrastructure.persistence.impl; +import com.baomidou.mybatisplus.core.metadata.IPage; import com.baomidou.mybatisplus.extension.repository.CrudRepository; import com.datamate.rag.indexer.domain.model.FileStatus; import com.datamate.rag.indexer.domain.model.RagFile; import com.datamate.rag.indexer.domain.repository.RagFileRepository; import com.datamate.rag.indexer.infrastructure.persistence.mapper.RagFileMapper; +import com.datamate.rag.indexer.interfaces.dto.RagFileReq; import org.springframework.stereotype.Repository; +import org.springframework.util.StringUtils; import java.util.List; @@ -23,10 +26,25 @@ public class RagFileRepositoryImpl extends CrudRepository findByKnowledgeBaseId(String knowledgeBaseId) { + public List findNotSuccessByKnowledgeBaseId(String knowledgeBaseId) { return lambdaQuery() .eq(RagFile::getKnowledgeBaseId, knowledgeBaseId) .in(RagFile::getStatus, FileStatus.UNPROCESSED, FileStatus.PROCESS_FAILED) .list(); } + + @Override + public List findAllByKnowledgeBaseId(String knowledgeBaseId) { + return lambdaQuery() + .eq(RagFile::getKnowledgeBaseId, knowledgeBaseId) + .list(); + } + + @Override + public IPage page(IPage page, RagFileReq request) { + return lambdaQuery() + .eq(RagFile::getKnowledgeBaseId, request.getKnowledgeBaseId()) + .like(StringUtils.hasText(request.getFileName()), RagFile::getFileName, request.getFileName()) + .page(page); + } } diff --git a/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/interfaces/KnowledgeBaseController.java b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/interfaces/KnowledgeBaseController.java index 98bbe4a..63266db 100644 --- a/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/interfaces/KnowledgeBaseController.java +++ b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/interfaces/KnowledgeBaseController.java @@ -1,11 +1,10 @@ package com.datamate.rag.indexer.interfaces; -import com.datamate.rag.indexer.application.KnowledgeBaseService; -import com.datamate.rag.indexer.domain.model.KnowledgeBase; -import com.datamate.rag.indexer.domain.model.RagChunk; -import com.datamate.rag.indexer.domain.model.RagFile; import com.datamate.common.interfaces.PagedResponse; import com.datamate.common.interfaces.PagingQuery; +import com.datamate.rag.indexer.application.KnowledgeBaseService; +import com.datamate.rag.indexer.domain.model.RagChunk; +import com.datamate.rag.indexer.domain.model.RagFile; import com.datamate.rag.indexer.interfaces.dto.*; import jakarta.validation.Valid; import lombok.RequiredArgsConstructor; @@ -65,7 +64,7 @@ public class KnowledgeBaseController { * @return 知识库 */ @GetMapping("/{knowledgeBaseId}") - public KnowledgeBase get(@PathVariable("knowledgeBaseId") String knowledgeBaseId) { + public KnowledgeBaseResp get(@PathVariable("knowledgeBaseId") String knowledgeBaseId) { return knowledgeBaseService.getById(knowledgeBaseId); } @@ -75,7 +74,7 @@ public class KnowledgeBaseController { * @return 知识库列表 */ @PostMapping("/list") - public PagedResponse list(@RequestBody @Valid KnowledgeBaseQueryReq request) { + public PagedResponse list(@RequestBody @Valid KnowledgeBaseQueryReq request) { return knowledgeBaseService.list(request); } @@ -129,4 +128,4 @@ public class KnowledgeBaseController { PagingQuery pagingQuery) { return knowledgeBaseService.getChunks(knowledgeBaseId, ragFileId, pagingQuery); } -} +} \ No newline at end of file diff --git a/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/interfaces/dto/KnowledgeBaseResp.java b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/interfaces/dto/KnowledgeBaseResp.java new file mode 100644 index 0000000..3776d86 --- /dev/null +++ b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/interfaces/dto/KnowledgeBaseResp.java @@ -0,0 +1,21 @@ +package com.datamate.rag.indexer.interfaces.dto; + +import com.datamate.common.setting.domain.entity.ModelConfig; +import com.datamate.rag.indexer.domain.model.KnowledgeBase; +import lombok.Getter; +import lombok.Setter; + +/** + * 知识库响应实体 + * + * @author dallas + * @since 2025-11-17 + */ +@Setter +@Getter +public class KnowledgeBaseResp extends KnowledgeBase { + private Long fileCount; + private Long chunkCount; + private ModelConfig embedding; + private ModelConfig chat; +} \ No newline at end of file diff --git a/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/interfaces/dto/RagFileReq.java b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/interfaces/dto/RagFileReq.java index a26f1b0..55d1988 100644 --- a/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/interfaces/dto/RagFileReq.java +++ b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/interfaces/dto/RagFileReq.java @@ -1,6 +1,8 @@ package com.datamate.rag.indexer.interfaces.dto; import com.datamate.common.interfaces.PagingQuery; +import lombok.Getter; +import lombok.Setter; /** * RAG 文件请求 @@ -8,6 +10,9 @@ import com.datamate.common.interfaces.PagingQuery; * @author dallas * @since 2025-10-29 */ +@Setter +@Getter public class RagFileReq extends PagingQuery { private String fileName; + private String knowledgeBaseId; } diff --git a/frontend/src/pages/KnowledgeBase/Home/KnowledgeBasePage.tsx b/frontend/src/pages/KnowledgeBase/Home/KnowledgeBasePage.tsx index c372fd7..0d48062 100644 --- a/frontend/src/pages/KnowledgeBase/Home/KnowledgeBasePage.tsx +++ b/frontend/src/pages/KnowledgeBase/Home/KnowledgeBasePage.tsx @@ -28,7 +28,7 @@ export default function KnowledgeBasePage() { handleFiltersChange, } = useFetchData( queryKnowledgeBasesUsingPost, - mapKnowledgeBase + (kb) => mapKnowledgeBase(kb, false) // 在首页不显示索引模型和文本理解模型字段 ); const handleDeleteKB = async (kb: KnowledgeBaseItem) => { @@ -84,20 +84,6 @@ export default function KnowledgeBasePage() { ), }, - { - title: "向量数据库", - dataIndex: "embeddingModel", - key: "embeddingModel", - width: 150, - ellipsis: true, - }, - { - title: "大语言模型", - dataIndex: "chatModel", - key: "chatModel", - width: 150, - ellipsis: true, - }, { title: "创建时间", dataIndex: "createdAt", @@ -192,4 +178,4 @@ export default function KnowledgeBasePage() { )} ); -} +} \ No newline at end of file diff --git a/frontend/src/pages/KnowledgeBase/components/CreateKnowledgeBase.tsx b/frontend/src/pages/KnowledgeBase/components/CreateKnowledgeBase.tsx index fb1d940..d21387c 100644 --- a/frontend/src/pages/KnowledgeBase/components/CreateKnowledgeBase.tsx +++ b/frontend/src/pages/KnowledgeBase/components/CreateKnowledgeBase.tsx @@ -129,6 +129,7 @@ export default function CreateKnowledgeBase({