diff --git a/.editorconfig b/.editorconfig
index 65ede07..01ab809 100644
--- a/.editorconfig
+++ b/.editorconfig
@@ -4,7 +4,7 @@ root = true
charset = utf-8
end_of_line = lf
indent_style = space
-indent_size = 2
+indent_size = 4
insert_final_newline = true
trim_trailing_whitespace = true
diff --git a/backend/services/main-application/pom.xml b/backend/services/main-application/pom.xml
index 342ced3..4f180de 100644
--- a/backend/services/main-application/pom.xml
+++ b/backend/services/main-application/pom.xml
@@ -130,6 +130,10 @@
spring-boot-starter-test
test
+
+ org.springframework.boot
+ spring-boot-autoconfigure
+
@@ -141,6 +145,7 @@
${maven.compiler.source}
${maven.compiler.target}
+ true
-parameters
diff --git a/backend/services/main-application/src/main/java/com/datamate/main/DataMatePlatformApplication.java b/backend/services/main-application/src/main/java/com/datamate/main/DataMatePlatformApplication.java
index 3356df1..3cf215c 100644
--- a/backend/services/main-application/src/main/java/com/datamate/main/DataMatePlatformApplication.java
+++ b/backend/services/main-application/src/main/java/com/datamate/main/DataMatePlatformApplication.java
@@ -16,31 +16,12 @@ import org.springframework.transaction.annotation.EnableTransactionManagement;
* @version 1.0.0
*/
@SpringBootApplication
-@ComponentScan(basePackages = {
- "com.datamate.main",
- "com.datamate.datamanagement",
- "com.datamate.collection",
- "com.datamate.operator",
- "com.datamate.cleaning",
- "com.datamate.synthesis",
- "com.datamate.annotation",
- "com.datamate.evaluation",
- "com.datamate.pipeline",
- "com.datamate.execution",
- "com.datamate.common"
-})
-@MapperScan(basePackages = {
- "com.datamate.collection.infrastructure.persistence.mapper",
- "com.datamate.datamanagement.infrastructure.persistence.mapper",
- "com.datamate.operator.infrastructure.persistence.mapper",
- "com.datamate.cleaning.infrastructure.persistence.mapper",
- "com.datamate.**.mapper"
-})
+@ComponentScan(basePackages = {"com.datamate"})
+@MapperScan(basePackages = {"com.datamate.**.mapper"})
@EnableTransactionManagement
@EnableAsync
@EnableScheduling
public class DataMatePlatformApplication {
-
public static void main(String[] args) {
SpringApplication.run(DataMatePlatformApplication.class, args);
}
diff --git a/backend/services/rag-indexer-service/pom.xml b/backend/services/rag-indexer-service/pom.xml
index 90aea6d..454a3d9 100644
--- a/backend/services/rag-indexer-service/pom.xml
+++ b/backend/services/rag-indexer-service/pom.xml
@@ -16,33 +16,37 @@
RAG Indexer Service
RAG文档索引服务
+
+
+
+ dev.langchain4j
+ langchain4j-bom
+ 1.8.0
+ pom
+ import
+
+
+
+
com.datamate
domain-common
${project.version}
+
+ com.datamate
+ data-management-service
+ 1.0.0-SNAPSHOT
+
org.springframework.boot
spring-boot-starter-web
- org.springframework.boot
- spring-boot-starter-data-elasticsearch
-
-
- com.mysql
- mysql-connector-j
- ${mysql.version}
-
-
- org.springframework.boot
- spring-boot-starter-test
- test
-
-
- org.springframework.cloud
- spring-cloud-starter-openfeign
+ mysql
+ mysql-connector-java
+ 8.0.33
org.springdoc
@@ -56,6 +60,54 @@
jakarta.validation
jakarta.validation-api
+
+ org.springframework.boot
+ spring-boot-starter-test
+ test
+
+
+ dev.langchain4j
+ langchain4j-open-ai
+ 1.8.0
+
+
+ dev.langchain4j
+ langchain4j
+ 1.8.0
+
+
+ dev.langchain4j
+ langchain4j-document-parser-apache-pdfbox
+
+
+ dev.langchain4j
+ langchain4j-document-parser-apache-tika
+
+
+ dev.langchain4j
+ langchain4j-document-parser-apache-poi
+
+
+ dev.langchain4j
+ langchain4j-document-parser-markdown
+
+
+ dev.langchain4j
+ langchain4j-document-transformer-jsoup
+
+
+ dev.langchain4j
+ langchain4j-milvus
+
+
+
+ dev.langchain4j
+ langchain4j-embeddings-all-minilm-l6-v2
+
+
+ org.testcontainers
+ milvus
+
@@ -64,31 +116,6 @@
org.springframework.boot
spring-boot-maven-plugin
-
diff --git a/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/application/KnowledgeBaseService.java b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/application/KnowledgeBaseService.java
new file mode 100644
index 0000000..989816d
--- /dev/null
+++ b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/application/KnowledgeBaseService.java
@@ -0,0 +1,118 @@
+package com.datamate.rag.indexer.application;
+
+import com.baomidou.mybatisplus.core.metadata.IPage;
+import com.baomidou.mybatisplus.extension.plugins.pagination.Page;
+import com.datamate.rag.indexer.domain.model.FileStatus;
+import com.datamate.rag.indexer.domain.model.KnowledgeBase;
+import com.datamate.rag.indexer.domain.model.RagChunk;
+import com.datamate.rag.indexer.domain.model.RagFile;
+import com.datamate.rag.indexer.domain.repository.KnowledgeBaseRepository;
+import com.datamate.rag.indexer.domain.repository.RagFileRepository;
+import com.datamate.rag.indexer.infrastructure.event.DataInsertedEvent;
+import com.datamate.common.infrastructure.exception.BusinessException;
+import com.datamate.common.infrastructure.exception.KnowledgeBaseErrorCode;
+import com.datamate.common.interfaces.PagedResponse;
+import com.datamate.common.interfaces.PagingQuery;
+import com.datamate.rag.indexer.interfaces.dto.*;
+import lombok.RequiredArgsConstructor;
+import org.springframework.beans.BeanUtils;
+import org.springframework.context.ApplicationEventPublisher;
+import org.springframework.stereotype.Service;
+import org.springframework.transaction.annotation.Transactional;
+import org.springframework.util.StringUtils;
+
+import java.util.List;
+import java.util.Optional;
+
+/**
+ * 知识库服务类
+ *
+ * @author dallas
+ * @since 2025-10-24
+ */
+@Service
+@RequiredArgsConstructor
+public class KnowledgeBaseService {
+ private final KnowledgeBaseRepository knowledgeBaseRepository;
+ private final RagFileRepository ragFileRepository;
+ private final ApplicationEventPublisher eventPublisher;
+
+
+ /**
+ * 创建知识库
+ *
+ * @param request 知识库创建请求
+ * @return 知识库 ID
+ */
+ public String create(KnowledgeBaseCreateReq request) {
+ KnowledgeBase knowledgeBase = new KnowledgeBase();
+ BeanUtils.copyProperties(request, knowledgeBase);
+ knowledgeBaseRepository.save(knowledgeBase);
+ return knowledgeBase.getId();
+ }
+
+ /**
+ * 更新知识库
+ *
+ * @param knowledgeBaseId 知识库 ID
+ * @param request 知识库更新请求
+ */
+ public void update(String knowledgeBaseId, KnowledgeBaseUpdateReq request) {
+ KnowledgeBase knowledgeBase = Optional.ofNullable(knowledgeBaseRepository.getById(knowledgeBaseId))
+ .orElseThrow(() -> BusinessException.of(KnowledgeBaseErrorCode.KNOWLEDGE_BASE_NOT_FOUND));
+ if (StringUtils.hasText(request.getName())) {
+ knowledgeBase.setName(request.getName());
+ }
+ if (StringUtils.hasText(request.getDescription())) {
+ knowledgeBase.setDescription(request.getDescription());
+ }
+ knowledgeBaseRepository.updateById(knowledgeBase);
+ }
+
+ public void delete(String knowledgeBaseId) {
+ knowledgeBaseRepository.removeById(knowledgeBaseId);
+ ragFileRepository.removeByKnowledgeBaseId(knowledgeBaseId);
+ // TODO: 删除知识库关联的所有文档
+ }
+
+ public KnowledgeBase getById(String knowledgeBaseId) {
+ return Optional.ofNullable(knowledgeBaseRepository.getById(knowledgeBaseId))
+ .orElseThrow(() -> BusinessException.of(KnowledgeBaseErrorCode.KNOWLEDGE_BASE_NOT_FOUND));
+ }
+
+ public PagedResponse list(KnowledgeBaseQueryReq request) {
+ IPage page = new Page<>(request.getPage(), request.getSize());
+ page = knowledgeBaseRepository.page(page, request);
+ return PagedResponse.of(page.getRecords(), page.getCurrent(), page.getTotal(), page.getPages());
+ }
+
+ @Transactional(rollbackFor = Exception.class)
+ public void addFiles(AddFilesReq request) {
+ KnowledgeBase knowledgeBase = Optional.ofNullable(knowledgeBaseRepository.getById(request.getKnowledgeBaseId()))
+ .orElseThrow(() -> BusinessException.of(KnowledgeBaseErrorCode.KNOWLEDGE_BASE_NOT_FOUND));
+ List ragFiles = request.getFiles().stream().map(fileInfo -> {
+ RagFile ragFile = new RagFile();
+ ragFile.setKnowledgeBaseId(knowledgeBase.getId());
+ ragFile.setFileId(fileInfo.fileId());
+ ragFile.setFileName(fileInfo.fileName());
+ ragFile.setStatus(FileStatus.UNPROCESSED);
+ return ragFile;
+ }).toList();
+ ragFileRepository.saveBatch(ragFiles, 100);
+ eventPublisher.publishEvent(new DataInsertedEvent(knowledgeBase.getId(), request.getProcessType()));
+ }
+
+ public PagedResponse listFiles(String knowledgeBaseId, RagFileReq request) {
+ IPage page = new Page<>(request.getPage(), request.getSize());
+ page = ragFileRepository.page(page);
+ return PagedResponse.of(page.getRecords(), page.getCurrent(), page.getTotal(), page.getPages());
+ }
+
+ public void deleteFile(String knowledgeBaseId, DeleteFilesReq request) {
+ }
+
+ public PagedResponse getChunks(String knowledgeBaseId, String ragFileId, PagingQuery pagingQuery) {
+ IPage page = new Page<>(pagingQuery.getPage(), pagingQuery.getSize());
+ return PagedResponse.of(page.getRecords(), page.getCurrent(), page.getTotal(), page.getPages());
+ }
+}
\ No newline at end of file
diff --git a/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/domain/model/FileStatus.java b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/domain/model/FileStatus.java
new file mode 100644
index 0000000..8f3132f
--- /dev/null
+++ b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/domain/model/FileStatus.java
@@ -0,0 +1,26 @@
+package com.datamate.rag.indexer.domain.model;
+
+/**
+ * 文件状态枚举
+ *
+ * @author dallas
+ * @since 2025-10-29
+ */
+public enum FileStatus {
+ /**
+ * 未处理
+ */
+ UNPROCESSED,
+ /**
+ * 处理中
+ */
+ PROCESSING,
+ /**
+ * 已处理
+ */
+ PROCESSED,
+ /**
+ * 处理失败
+ */
+ PROCESS_FAILED
+}
diff --git a/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/domain/model/KnowledgeBase.java b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/domain/model/KnowledgeBase.java
new file mode 100644
index 0000000..4a571b3
--- /dev/null
+++ b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/domain/model/KnowledgeBase.java
@@ -0,0 +1,37 @@
+package com.datamate.rag.indexer.domain.model;
+
+import com.baomidou.mybatisplus.annotation.TableName;
+import com.datamate.common.domain.model.base.BaseEntity;
+import lombok.Getter;
+import lombok.Setter;
+
+/**
+ * 知识库实体类
+ *
+ * @author dallas
+ * @since 2025-10-24
+ */
+@Getter
+@Setter
+@TableName("t_rag_knowledge_base")
+public class KnowledgeBase extends BaseEntity {
+ /**
+ * 知识库名称
+ */
+ private String name;
+
+ /**
+ * 知识库描述
+ */
+ private String description;
+
+ /**
+ * 嵌入模型
+ */
+ private String embeddingModel;
+
+ /**
+ * 聊天模型
+ */
+ private String chatModel;
+}
diff --git a/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/domain/model/RagChunk.java b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/domain/model/RagChunk.java
new file mode 100644
index 0000000..6a6b884
--- /dev/null
+++ b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/domain/model/RagChunk.java
@@ -0,0 +1,10 @@
+package com.datamate.rag.indexer.domain.model;
+
+/**
+ * RAG 文档块实体类
+ *
+ * @author dallas
+ * @since 2025-10-29
+ */
+public class RagChunk {
+}
diff --git a/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/domain/model/RagFile.java b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/domain/model/RagFile.java
new file mode 100644
index 0000000..ec0445a
--- /dev/null
+++ b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/domain/model/RagFile.java
@@ -0,0 +1,47 @@
+package com.datamate.rag.indexer.domain.model;
+
+
+import com.baomidou.mybatisplus.annotation.TableField;
+import com.baomidou.mybatisplus.annotation.TableName;
+import com.baomidou.mybatisplus.extension.handlers.JacksonTypeHandler;
+import com.datamate.common.domain.model.base.BaseEntity;
+import lombok.Getter;
+import lombok.Setter;
+
+import java.util.Map;
+
+/**
+ * Rag 文件实体类
+ *
+ * @author dallas
+ * @since 2025-10-24
+ */
+@Getter
+@Setter
+@TableName("t_rag_file")
+public class RagFile extends BaseEntity {
+ /**
+ * 知识库ID
+ */
+ private String knowledgeBaseId;
+ /**
+ * 文件名
+ */
+ private String fileName;
+ /**
+ * 文件ID
+ */
+ private String fileId;
+ /**
+ * 分块数量
+ */
+ private Integer chunkCount;
+
+ /**
+ * 元数据
+ */
+ @TableField(typeHandler = JacksonTypeHandler.class)
+ private Map metadata;
+
+ private FileStatus status;
+}
diff --git a/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/domain/repository/KnowledgeBaseRepository.java b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/domain/repository/KnowledgeBaseRepository.java
new file mode 100644
index 0000000..273abc9
--- /dev/null
+++ b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/domain/repository/KnowledgeBaseRepository.java
@@ -0,0 +1,23 @@
+package com.datamate.rag.indexer.domain.repository;
+
+import com.baomidou.mybatisplus.core.metadata.IPage;
+import com.baomidou.mybatisplus.extension.repository.IRepository;
+import com.datamate.rag.indexer.domain.model.KnowledgeBase;
+import com.datamate.rag.indexer.interfaces.dto.KnowledgeBaseQueryReq;
+
+/**
+ * 知识库仓储接口
+ *
+ * @author dallas
+ * @since 2025-10-24
+ */
+public interface KnowledgeBaseRepository extends IRepository {
+ /**
+ * 分页查询知识库
+ *
+ * @param page 分页信息
+ * @param request 查询请求
+ * @return 知识库分页结果
+ */
+ IPage page(IPage page, KnowledgeBaseQueryReq request);
+}
diff --git a/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/domain/repository/RagFileRepository.java b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/domain/repository/RagFileRepository.java
new file mode 100644
index 0000000..d55b2b1
--- /dev/null
+++ b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/domain/repository/RagFileRepository.java
@@ -0,0 +1,18 @@
+package com.datamate.rag.indexer.domain.repository;
+
+import com.baomidou.mybatisplus.extension.repository.IRepository;
+import com.datamate.rag.indexer.domain.model.RagFile;
+
+import java.util.List;
+
+/**
+ * 知识库文件仓储接口
+ *
+ * @author dallas
+ * @since 2025-10-24
+ */
+public interface RagFileRepository extends IRepository {
+ void removeByKnowledgeBaseId(String knowledgeBaseId);
+
+ List findByKnowledgeBaseId(String knowledgeBaseId);
+}
diff --git a/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/infrastructure/event/DataInsertedEvent.java b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/infrastructure/event/DataInsertedEvent.java
new file mode 100644
index 0000000..417de1e
--- /dev/null
+++ b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/infrastructure/event/DataInsertedEvent.java
@@ -0,0 +1,12 @@
+package com.datamate.rag.indexer.infrastructure.event;
+
+import com.datamate.rag.indexer.interfaces.dto.ProcessType;
+
+/**
+ * 数据插入事件
+ *
+ * @author dallas
+ * @since 2025-10-29
+ */
+public record DataInsertedEvent(String knowledgeBaseId, ProcessType processType) {
+}
diff --git a/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/infrastructure/event/RagEtlService.java b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/infrastructure/event/RagEtlService.java
new file mode 100644
index 0000000..5c9979e
--- /dev/null
+++ b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/infrastructure/event/RagEtlService.java
@@ -0,0 +1,157 @@
+package com.datamate.rag.indexer.infrastructure.event;
+
+import com.datamate.common.models.domain.entity.ModelConfig;
+import com.datamate.common.models.domain.repository.ModelConfigRepository;
+import com.datamate.common.models.infrastructure.client.ModelClient;
+import com.datamate.rag.indexer.domain.model.FileStatus;
+import com.datamate.rag.indexer.domain.model.RagFile;
+import com.datamate.rag.indexer.domain.repository.RagFileRepository;
+import com.datamate.rag.indexer.interfaces.dto.ProcessType;
+import com.datamate.datamanagement.domain.model.dataset.DatasetFile;
+import com.datamate.datamanagement.infrastructure.persistence.repository.DatasetFileRepository;
+import dev.langchain4j.data.document.Document;
+import dev.langchain4j.data.document.DocumentParser;
+import dev.langchain4j.data.document.DocumentSplitter;
+import dev.langchain4j.data.document.loader.FileSystemDocumentLoader;
+import dev.langchain4j.data.document.parser.TextDocumentParser;
+import dev.langchain4j.data.document.parser.apache.pdfbox.ApachePdfBoxDocumentParser;
+import dev.langchain4j.data.document.parser.apache.poi.ApachePoiDocumentParser;
+import dev.langchain4j.data.document.parser.apache.tika.ApacheTikaDocumentParser;
+import dev.langchain4j.data.document.parser.markdown.MarkdownDocumentParser;
+import dev.langchain4j.data.document.splitter.*;
+import dev.langchain4j.data.document.transformer.jsoup.HtmlToTextDocumentTransformer;
+import dev.langchain4j.data.embedding.Embedding;
+import dev.langchain4j.data.segment.TextSegment;
+import dev.langchain4j.model.embedding.EmbeddingModel;
+import dev.langchain4j.model.output.Response;
+import dev.langchain4j.store.embedding.EmbeddingStore;
+import dev.langchain4j.store.embedding.milvus.MilvusEmbeddingStore;
+import lombok.RequiredArgsConstructor;
+import lombok.extern.slf4j.Slf4j;
+import org.jetbrains.annotations.NotNull;
+import org.springframework.scheduling.annotation.Async;
+import org.springframework.stereotype.Service;
+import org.springframework.transaction.event.TransactionPhase;
+import org.springframework.transaction.event.TransactionalEventListener;
+
+import java.util.Arrays;
+import java.util.List;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.Semaphore;
+
+/**
+ * RAG ETL服务
+ *
+ * @author dallas
+ * @since 2025-10-29
+ */
+@Slf4j
+@Service
+@RequiredArgsConstructor
+public class RagEtlService {
+ private static final Semaphore SEMAPHORE = new Semaphore(10);
+
+ private final RagFileRepository ragFileRepository;
+
+ private final DatasetFileRepository datasetFileRepository;
+
+ private final ModelConfigRepository modelConfigRepository;
+
+ private final ExecutorService executor = Executors.newVirtualThreadPerTaskExecutor();
+
+ @Async
+ @TransactionalEventListener(phase = TransactionPhase.AFTER_COMMIT)
+ public void processAfterCommit(DataInsertedEvent event) {
+ // 执行 RAG 处理流水线
+ List ragFiles = ragFileRepository.findByKnowledgeBaseId(event.knowledgeBaseId());
+
+ ragFiles.forEach(ragFile -> {
+ try {
+ SEMAPHORE.acquire();
+ executor.submit(() -> {
+ try {
+ // 执行 RAG 处理流水线
+ ragFile.setStatus(FileStatus.PROCESSING);
+ ragFileRepository.updateById(ragFile);
+ processRagFile(ragFile, event.processType());
+ // 更新文件状态为已处理
+ ragFile.setStatus(FileStatus.PROCESSED);
+ ragFileRepository.updateById(ragFile);
+ } catch (Exception e) {
+ // 处理异常
+ ragFile.setStatus(FileStatus.PROCESS_FAILED);
+ ragFileRepository.updateById(ragFile);
+ } finally {
+ SEMAPHORE.release();
+ }
+ });
+ } catch (InterruptedException e) {
+ Thread.currentThread().interrupt();
+ }
+ }
+ );
+ }
+
+ private void processRagFile(RagFile ragFile, ProcessType processType) {
+ DatasetFile file = datasetFileRepository.getById(ragFile.getFileId());
+ // 使用文档解析器解析文档
+ DocumentParser parser = documentParser(file.getFileType());
+ // 从文件系统读取文档
+ Document document = FileSystemDocumentLoader.loadDocument(file.getFilePath(), parser);
+ // 对html文档进行转换
+ if (Arrays.asList("html", "htm").contains(file.getFileType().toLowerCase())) {
+ document= new HtmlToTextDocumentTransformer().transform(document);
+ }
+ // 使用文档分块器对文档进行分块
+ DocumentSplitter splitter = documentSplitter(processType);
+ List split = splitter.split(document);
+
+ // 更新分块数量
+ ragFile.setChunkCount(split.size());
+ ragFileRepository.updateById(ragFile);
+
+ // 调用模型客户端获取嵌入模型
+ ModelConfig model = modelConfigRepository.getById("1");
+ EmbeddingModel embeddingModel = ModelClient.invokeEmbeddingModel(model);
+ // 调用嵌入模型获取嵌入向量
+ Response<@NotNull List> response = embeddingModel.embedAll(split);
+ // 存储嵌入向量到 Milvus
+ embeddingStore().addAll(response.content(), split);
+ }
+
+ /**
+ * 根据文件类型返回对应的文档解析器
+ *
+ * @param fileType 文件类型
+ * @return 文档解析器
+ */
+ public DocumentParser documentParser(String fileType) {
+ fileType = fileType.toLowerCase();
+ return switch (fileType) {
+ case "txt", "html", "htm" -> new TextDocumentParser();
+ case "md" -> new MarkdownDocumentParser();
+ case "pdf" -> new ApachePdfBoxDocumentParser();
+ case "doc", "docx", "xls", "xlsx", "ppt", "pptx" -> new ApachePoiDocumentParser();
+ default -> new ApacheTikaDocumentParser();
+ };
+ }
+
+ public DocumentSplitter documentSplitter(ProcessType processType) {
+ return switch (processType) {
+ case CHAPTER_CHUNK -> new DocumentByParagraphSplitter(1000, 100);
+ case PARAGRAPH_CHUNK -> new DocumentByLineSplitter(1000, 100);
+ case LENGTH_CHUNK -> new DocumentBySentenceSplitter(1000, 100);
+ case CUSTOM_SEPARATOR_CHUNK -> new DocumentByWordSplitter(1000, 100);
+ case DEFAULT_CHUNK -> new DocumentByRegexSplitter("\\n\\n", "",1000, 100);
+ };
+ }
+
+ public EmbeddingStore embeddingStore() {
+ return MilvusEmbeddingStore.builder()
+ .uri("http://milvus:19530")
+ .collectionName("rag_embeddings")
+ .dimension(1536)
+ .build();
+ }
+}
diff --git a/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/infrastructure/persistence/impl/KnowledgeBaseRepositoryImpl.java b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/infrastructure/persistence/impl/KnowledgeBaseRepositoryImpl.java
new file mode 100644
index 0000000..c186bac
--- /dev/null
+++ b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/infrastructure/persistence/impl/KnowledgeBaseRepositoryImpl.java
@@ -0,0 +1,31 @@
+package com.datamate.rag.indexer.infrastructure.persistence.impl;
+
+import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper;
+import com.baomidou.mybatisplus.core.metadata.IPage;
+import com.baomidou.mybatisplus.extension.repository.CrudRepository;
+import com.datamate.rag.indexer.domain.model.KnowledgeBase;
+import com.datamate.rag.indexer.domain.repository.KnowledgeBaseRepository;
+import com.datamate.rag.indexer.infrastructure.persistence.mapper.KnowledgeBaseMapper;
+import com.datamate.rag.indexer.interfaces.dto.KnowledgeBaseQueryReq;
+import org.springframework.stereotype.Repository;
+import org.springframework.util.StringUtils;
+
+/**
+ * 知识库仓储实现类
+ *
+ * @author dallas
+ * @since 2025-10-24
+ */
+@Repository
+public class KnowledgeBaseRepositoryImpl extends CrudRepository implements KnowledgeBaseRepository {
+
+ @Override
+ public IPage page(IPage page, KnowledgeBaseQueryReq request) {
+ return this.page(page, new LambdaQueryWrapper()
+ .like(StringUtils.hasText(request.getName()), KnowledgeBase::getName, request.getName())
+ .like(StringUtils.hasText(request.getDescription()), KnowledgeBase::getDescription, request.getDescription())
+ .like(StringUtils.hasText(request.getCreatedBy()), KnowledgeBase::getCreatedBy, request.getCreatedBy())
+ .like(StringUtils.hasText(request.getUpdatedBy()), KnowledgeBase::getUpdatedBy, request.getUpdatedBy())
+ .orderByDesc(KnowledgeBase::getCreatedAt));
+ }
+}
diff --git a/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/infrastructure/persistence/impl/RagFileRepositoryImpl.java b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/infrastructure/persistence/impl/RagFileRepositoryImpl.java
new file mode 100644
index 0000000..0e0c098
--- /dev/null
+++ b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/infrastructure/persistence/impl/RagFileRepositoryImpl.java
@@ -0,0 +1,32 @@
+package com.datamate.rag.indexer.infrastructure.persistence.impl;
+
+import com.baomidou.mybatisplus.extension.repository.CrudRepository;
+import com.datamate.rag.indexer.domain.model.FileStatus;
+import com.datamate.rag.indexer.domain.model.RagFile;
+import com.datamate.rag.indexer.domain.repository.RagFileRepository;
+import com.datamate.rag.indexer.infrastructure.persistence.mapper.RagFileMapper;
+import org.springframework.stereotype.Repository;
+
+import java.util.List;
+
+/**
+ * 知识库文件仓储实现类
+ *
+ * @author dallas
+ * @since 2025-10-24
+ */
+@Repository
+public class RagFileRepositoryImpl extends CrudRepository implements RagFileRepository {
+ @Override
+ public void removeByKnowledgeBaseId(String knowledgeBaseId) {
+ lambdaUpdate().eq(RagFile::getKnowledgeBaseId, knowledgeBaseId).remove();
+ }
+
+ @Override
+ public List findByKnowledgeBaseId(String knowledgeBaseId) {
+ return lambdaQuery()
+ .eq(RagFile::getKnowledgeBaseId, knowledgeBaseId)
+ .in(RagFile::getStatus, FileStatus.UNPROCESSED, FileStatus.PROCESS_FAILED)
+ .list();
+ }
+}
diff --git a/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/infrastructure/persistence/mapper/KnowledgeBaseMapper.java b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/infrastructure/persistence/mapper/KnowledgeBaseMapper.java
new file mode 100644
index 0000000..cf1525f
--- /dev/null
+++ b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/infrastructure/persistence/mapper/KnowledgeBaseMapper.java
@@ -0,0 +1,16 @@
+package com.datamate.rag.indexer.infrastructure.persistence.mapper;
+
+
+import com.baomidou.mybatisplus.core.mapper.BaseMapper;
+import com.datamate.rag.indexer.domain.model.KnowledgeBase;
+import org.apache.ibatis.annotations.Mapper;
+
+/**
+ * 知识库映射器接口
+ *
+ * @author dallas
+ * @since 2025-10-24
+ */
+@Mapper
+public interface KnowledgeBaseMapper extends BaseMapper {
+}
diff --git a/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/infrastructure/persistence/mapper/RagFileMapper.java b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/infrastructure/persistence/mapper/RagFileMapper.java
new file mode 100644
index 0000000..e0f233f
--- /dev/null
+++ b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/infrastructure/persistence/mapper/RagFileMapper.java
@@ -0,0 +1,16 @@
+package com.datamate.rag.indexer.infrastructure.persistence.mapper;
+
+
+import com.baomidou.mybatisplus.core.mapper.BaseMapper;
+import com.datamate.rag.indexer.domain.model.RagFile;
+import org.apache.ibatis.annotations.Mapper;
+
+/**
+ * RAG文件映射器接口
+ *
+ * @author dallas
+ * @since 2025-10-24
+ */
+@Mapper
+public interface RagFileMapper extends BaseMapper {
+}
diff --git a/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/interfaces/EmbeddingController.java b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/interfaces/EmbeddingController.java
new file mode 100644
index 0000000..06963dc
--- /dev/null
+++ b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/interfaces/EmbeddingController.java
@@ -0,0 +1,8 @@
+package com.datamate.rag.indexer.interfaces;
+
+import org.springframework.web.bind.annotation.RestController;
+
+@RestController
+public class EmbeddingController {
+
+}
diff --git a/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/interfaces/KnowledgeBaseController.java b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/interfaces/KnowledgeBaseController.java
new file mode 100644
index 0000000..d0ed099
--- /dev/null
+++ b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/interfaces/KnowledgeBaseController.java
@@ -0,0 +1,137 @@
+package com.datamate.rag.indexer.interfaces;
+
+import com.datamate.rag.indexer.application.KnowledgeBaseService;
+import com.datamate.rag.indexer.domain.model.KnowledgeBase;
+import com.datamate.rag.indexer.domain.model.RagChunk;
+import com.datamate.rag.indexer.domain.model.RagFile;
+import com.datamate.common.infrastructure.common.Response;
+import com.datamate.common.interfaces.PagedResponse;
+import com.datamate.common.interfaces.PagingQuery;
+import com.datamate.rag.indexer.interfaces.dto.*;
+import lombok.RequiredArgsConstructor;
+import org.springframework.web.bind.annotation.*;
+
+import javax.validation.Valid;
+
+/**
+ * 知识库控制器
+ *
+ * @author dallas
+ * @since 2025-09-30
+ */
+@RestController
+@RequiredArgsConstructor
+@RequestMapping("/v1/knowledge-base")
+public class KnowledgeBaseController {
+ private final KnowledgeBaseService knowledgeBaseService;
+
+ @GetMapping(path = "/test1")
+ public String test() {
+ return "test1";
+ }
+
+ /**
+ * 创建知识库
+ *
+ * @param request 知识库创建请求
+ * @return 知识库 ID
+ */
+ @PostMapping("/create")
+ public String create(@RequestBody @Valid KnowledgeBaseCreateReq request) {
+ return knowledgeBaseService.create(request);
+ }
+
+ /**
+ * 更新知识库
+ *
+ * @param knowledgeBaseId 知识库 ID
+ * @param request 知识库更新请求
+ */
+ @PutMapping("/{knowledgeBaseId}")
+ public void update(@PathVariable("knowledgeBaseId") String knowledgeBaseId,
+ @RequestBody @Valid KnowledgeBaseUpdateReq request) {
+ knowledgeBaseService.update(knowledgeBaseId, request);
+ }
+
+ /**
+ * 删除知识库
+ *
+ * @param knowledgeBaseId 知识库 ID
+ */
+ @DeleteMapping("/{knowledgeBaseId}")
+ public void delete(@PathVariable("knowledgeBaseId") String knowledgeBaseId) {
+ knowledgeBaseService.delete(knowledgeBaseId);
+ }
+
+ /**
+ * 获取知识库
+ *
+ * @param knowledgeBaseId 知识库 ID
+ * @return 知识库
+ */
+ @GetMapping("/{knowledgeBaseId}")
+ public KnowledgeBase get(@PathVariable("knowledgeBaseId") String knowledgeBaseId) {
+ return knowledgeBaseService.getById(knowledgeBaseId);
+ }
+
+ /**
+ * 获取知识库列表
+ *
+ * @return 知识库列表
+ */
+ @PostMapping("/list")
+ public PagedResponse list(@RequestBody @Valid KnowledgeBaseQueryReq request) {
+ return knowledgeBaseService.list(request);
+ }
+
+ /**
+ * 添加文件到知识库
+ *
+ * @param knowledgeBaseId 知识库 ID
+ * @param request 添加文件请求
+ */
+ @PostMapping("/{knowledgeBaseId}/files")
+ public void addFiles(@PathVariable("knowledgeBaseId") String knowledgeBaseId,
+ @RequestBody @Valid AddFilesReq request) {
+ request.setKnowledgeBaseId(knowledgeBaseId);
+ knowledgeBaseService.addFiles(request);
+ }
+
+ /**
+ * 获取知识库文件列表
+ *
+ * @param knowledgeBaseId 知识库 ID
+ * @return 知识库文件列表
+ */
+ @GetMapping("/{knowledgeBaseId}/files")
+ public PagedResponse listFiles(@PathVariable("knowledgeBaseId") String knowledgeBaseId,
+ @RequestBody @Valid RagFileReq request) {
+ return knowledgeBaseService.listFiles(knowledgeBaseId, request);
+ }
+
+ /**
+ * 删除知识库文件
+ *
+ * @param knowledgeBaseId 知识库 ID
+ * @param request 删除文件请求
+ */
+ @DeleteMapping("/{knowledgeBaseId}/files")
+ public void deleteFile(@PathVariable("knowledgeBaseId") String knowledgeBaseId,
+ @RequestBody DeleteFilesReq request) {
+ knowledgeBaseService.deleteFile(knowledgeBaseId, request);
+ }
+
+ /**
+ * 知识库文件详情
+ *
+ * @param knowledgeBaseId 知识库 ID
+ * @param ragFileId 文件 ID
+ * @return 文件详情
+ */
+ @GetMapping("/{knowledgeBaseId}/files/{ragFileId}")
+ public PagedResponse getChunks(@PathVariable("knowledgeBaseId") String knowledgeBaseId,
+ @PathVariable("ragFileId") String ragFileId,
+ PagingQuery pagingQuery) {
+ return knowledgeBaseService.getChunks(knowledgeBaseId, ragFileId, pagingQuery);
+ }
+}
diff --git a/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/interfaces/dto/AddFilesReq.java b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/interfaces/dto/AddFilesReq.java
new file mode 100644
index 0000000..52568a2
--- /dev/null
+++ b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/interfaces/dto/AddFilesReq.java
@@ -0,0 +1,23 @@
+package com.datamate.rag.indexer.interfaces.dto;
+
+import lombok.Getter;
+import lombok.Setter;
+
+import java.util.List;
+
+/**
+ * 添加文件请求
+ *
+ * @author dallas
+ * @since 2025-10-29
+ */
+@Getter
+@Setter
+public class AddFilesReq {
+ private String knowledgeBaseId;
+ private ProcessType processType;
+ private List files;
+
+ public record FileInfo(String fileId, String fileName) {
+ }
+}
diff --git a/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/interfaces/dto/DeleteFilesReq.java b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/interfaces/dto/DeleteFilesReq.java
new file mode 100644
index 0000000..0837d4e
--- /dev/null
+++ b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/interfaces/dto/DeleteFilesReq.java
@@ -0,0 +1,13 @@
+package com.datamate.rag.indexer.interfaces.dto;
+
+import java.util.List;
+
+/**
+ * 删除文件请求
+ *
+ * @author dallas
+ * @since 2025-10-29
+ */
+public class DeleteFilesReq {
+ private List fileIds;
+}
diff --git a/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/interfaces/dto/KnowledgeBaseCreateReq.java b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/interfaces/dto/KnowledgeBaseCreateReq.java
new file mode 100644
index 0000000..c3df78c
--- /dev/null
+++ b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/interfaces/dto/KnowledgeBaseCreateReq.java
@@ -0,0 +1,41 @@
+package com.datamate.rag.indexer.interfaces.dto;
+
+import jakarta.validation.constraints.NotEmpty;
+import jakarta.validation.constraints.Pattern;
+import jakarta.validation.constraints.Size;
+import lombok.Getter;
+import lombok.Setter;
+
+/**
+ * 知识库创建请求
+ *
+ * @author dallas
+ * @since 2025-10-24
+ */
+@Setter
+@Getter
+public class KnowledgeBaseCreateReq {
+ /**
+ * 知识库名称
+ */
+ @NotEmpty(message = "知识库名称不能为空")
+ @Size(min = 1, max = 255, message = "知识库名称长度必须在 1 到 255 之间")
+ @Pattern(regexp = "^[a-zA-Z0-9_]+$", message = "知识库名称只能包含字母、数字和下划线")
+ private String name;
+ /**
+ * 知识库描述
+ */
+ @Size(min = 1, max = 512, message = "知识库描述长度必须在 1 到 512 之间")
+ private String description;
+
+ /**
+ * 嵌入模型
+ */
+ @NotEmpty(message = "嵌入模型不能为空")
+ private String embeddingModel;
+
+ /**
+ * 聊天模型
+ */
+ private String chatModel;
+}
diff --git a/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/interfaces/dto/KnowledgeBaseQueryReq.java b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/interfaces/dto/KnowledgeBaseQueryReq.java
new file mode 100644
index 0000000..a2e7147
--- /dev/null
+++ b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/interfaces/dto/KnowledgeBaseQueryReq.java
@@ -0,0 +1,24 @@
+package com.datamate.rag.indexer.interfaces.dto;
+
+import com.datamate.common.interfaces.PagingQuery;
+import lombok.Getter;
+import lombok.Setter;
+
+import java.time.LocalDateTime;
+
+/**
+ *
+ *
+ * @author dallas
+ * @since 2025-10-29
+ */
+@Setter
+@Getter
+public class KnowledgeBaseQueryReq extends PagingQuery {
+ private String name;
+ private String description;
+ private LocalDateTime createdAt;
+ private LocalDateTime updatedAt;
+ private String createdBy;
+ private String updatedBy;
+}
diff --git a/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/interfaces/dto/KnowledgeBaseUpdateReq.java b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/interfaces/dto/KnowledgeBaseUpdateReq.java
new file mode 100644
index 0000000..a115654
--- /dev/null
+++ b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/interfaces/dto/KnowledgeBaseUpdateReq.java
@@ -0,0 +1,30 @@
+package com.datamate.rag.indexer.interfaces.dto;
+
+import jakarta.validation.constraints.NotEmpty;
+import jakarta.validation.constraints.Pattern;
+import jakarta.validation.constraints.Size;
+import lombok.Getter;
+import lombok.Setter;
+
+/**
+ * 知识库更新请求
+ *
+ * @author dallas
+ * @since 2025-10-24
+ */
+@Getter
+@Setter
+public class KnowledgeBaseUpdateReq {
+ /**
+ * 知识库名称
+ */
+ @NotEmpty(message = "知识库名称不能为空")
+ @Size(min = 1, max = 255, message = "知识库名称长度必须在 1 到 255 之间")
+ @Pattern(regexp = "^[a-zA-Z0-9_]+$", message = "知识库名称只能包含字母、数字和下划线")
+ private String name;
+ /**
+ * 知识库描述
+ */
+ @Size(min = 1, max = 512, message = "知识库描述长度必须在 1 到 512 之间")
+ private String description;
+}
diff --git a/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/interfaces/dto/ProcessType.java b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/interfaces/dto/ProcessType.java
new file mode 100644
index 0000000..7301163
--- /dev/null
+++ b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/interfaces/dto/ProcessType.java
@@ -0,0 +1,33 @@
+package com.datamate.rag.indexer.interfaces.dto;
+
+/**
+ * 分块处理类型
+ *
+ * @author dallas
+ * @since 2025-10-29
+ */
+public enum ProcessType {
+ /**
+ * 章节分块
+ */
+ CHAPTER_CHUNK,
+ /**
+ * 段落分块
+ */
+ PARAGRAPH_CHUNK,
+
+ /**
+ * 按长度分块
+ */
+ LENGTH_CHUNK,
+
+ /**
+ * 自定义分割符分块
+ */
+ CUSTOM_SEPARATOR_CHUNK,
+
+ /**
+ * 默认分块
+ */
+ DEFAULT_CHUNK,
+}
diff --git a/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/interfaces/dto/RagFileReq.java b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/interfaces/dto/RagFileReq.java
new file mode 100644
index 0000000..a26f1b0
--- /dev/null
+++ b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/interfaces/dto/RagFileReq.java
@@ -0,0 +1,13 @@
+package com.datamate.rag.indexer.interfaces.dto;
+
+import com.datamate.common.interfaces.PagingQuery;
+
+/**
+ * RAG 文件请求
+ *
+ * @author dallas
+ * @since 2025-10-29
+ */
+public class RagFileReq extends PagingQuery {
+ private String fileName;
+}
diff --git a/backend/shared/domain-common/src/main/java/com/datamate/common/interfaces/PagedResponse.java b/backend/shared/domain-common/src/main/java/com/datamate/common/interfaces/PagedResponse.java
index 7d87328..4a9647a 100644
--- a/backend/shared/domain-common/src/main/java/com/datamate/common/interfaces/PagedResponse.java
+++ b/backend/shared/domain-common/src/main/java/com/datamate/common/interfaces/PagedResponse.java
@@ -12,7 +12,9 @@ import java.util.List;
@NoArgsConstructor
@AllArgsConstructor
public class PagedResponse {
+ // 当前页码(从 0 开始)
private long page;
+ // 每页数量
private long size;
private long totalElements;
private long totalPages;
diff --git a/backend/shared/domain-common/src/main/java/com/datamate/common/interfaces/PagingQuery.java b/backend/shared/domain-common/src/main/java/com/datamate/common/interfaces/PagingQuery.java
index 798075f..5c646dd 100644
--- a/backend/shared/domain-common/src/main/java/com/datamate/common/interfaces/PagingQuery.java
+++ b/backend/shared/domain-common/src/main/java/com/datamate/common/interfaces/PagingQuery.java
@@ -1,14 +1,8 @@
package com.datamate.common.interfaces;
-import lombok.AllArgsConstructor;
import lombok.Getter;
-import lombok.NoArgsConstructor;
-import lombok.Setter;
@Getter
-@Setter
-@NoArgsConstructor
-@AllArgsConstructor
public class PagingQuery {
/**
* 页码,从0开始
@@ -19,4 +13,19 @@ public class PagingQuery {
* 每页大小
*/
private Integer size = 20;
+
+ public void setPage(Integer page) {
+ if (page == null || page < 0) {
+ this.page = 0;
+ } else {
+ this.page = page;
+ }
+ }
+ public void setSize(Integer size) {
+ if (size == null || size <= 0) {
+ this.size = 20;
+ } else {
+ this.size = size;
+ }
+ }
}
diff --git a/deployment/docker/datamate/docker-compose.yml b/deployment/docker/datamate/docker-compose.yml
index 98529fd..ba21622 100644
--- a/deployment/docker/datamate/docker-compose.yml
+++ b/deployment/docker/datamate/docker-compose.yml
@@ -113,3 +113,4 @@ volumes:
networks:
datamate:
driver: bridge
+ name: datamate-network
diff --git a/deployment/docker/deer-flow/docker-compose.yml b/deployment/docker/deer-flow/docker-compose.yml
index 00069fa..03853e3 100644
--- a/deployment/docker/deer-flow/docker-compose.yml
+++ b/deployment/docker/deer-flow/docker-compose.yml
@@ -24,5 +24,5 @@ services:
networks:
datamate:
driver: bridge
- name: datamate_datamate
+ name: datamate-network
external: true
diff --git a/deployment/docker/milvus/docker-compose.yml b/deployment/docker/milvus/docker-compose.yml
new file mode 100644
index 0000000..6396068
--- /dev/null
+++ b/deployment/docker/milvus/docker-compose.yml
@@ -0,0 +1,74 @@
+version: '3.5'
+
+services:
+ etcd:
+ container_name: milvus-etcd
+ image: quay.io/coreos/etcd:v3.5.18
+ environment:
+ - ETCD_AUTO_COMPACTION_MODE=revision
+ - ETCD_AUTO_COMPACTION_RETENTION=1000
+ - ETCD_QUOTA_BACKEND_BYTES=4294967296
+ - ETCD_SNAPSHOT_COUNT=50000
+ volumes:
+ - ${DOCKER_VOLUME_DIRECTORY:-.}/volumes/etcd:/etcd
+ command: etcd -advertise-client-urls=http://etcd:2379 -listen-client-urls http://0.0.0.0:2379 --data-dir /etcd
+ networks:
+ - datamate
+ healthcheck:
+ test: ["CMD", "etcdctl", "endpoint", "health"]
+ interval: 30s
+ timeout: 20s
+ retries: 3
+
+ minio:
+ container_name: milvus-minio
+ image: minio/minio:RELEASE.2024-12-18T13-15-44Z
+ environment:
+ MINIO_ACCESS_KEY: minioadmin
+ MINIO_SECRET_KEY: minioadmin
+ ports:
+ - "9001:9001"
+ - "9000:9000"
+ volumes:
+ - ${DOCKER_VOLUME_DIRECTORY:-.}/volumes/minio:/minio_data
+ command: minio server /minio_data --console-address ":9001"
+ networks:
+ - datamate
+ healthcheck:
+ test: ["CMD", "curl", "-f", "http://localhost:9000/minio/health/live"]
+ interval: 30s
+ timeout: 20s
+ retries: 3
+
+ standalone:
+ container_name: milvus-standalone
+ image: milvusdb/milvus:v2.6.2
+ command: ["milvus", "run", "standalone"]
+ security_opt:
+ - seccomp:unconfined
+ environment:
+ ETCD_ENDPOINTS: etcd:2379
+ MINIO_ADDRESS: minio:9000
+ MQ_TYPE: woodpecker
+ volumes:
+ - ${DOCKER_VOLUME_DIRECTORY:-.}/volumes/milvus:/var/lib/milvus
+ networks:
+ - datamate
+ healthcheck:
+ test: ["CMD", "curl", "-f", "http://localhost:9091/healthz"]
+ interval: 30s
+ start_period: 90s
+ timeout: 20s
+ retries: 3
+ ports:
+ - "19530:19530"
+ - "9091:9091"
+ depends_on:
+ - "etcd"
+ - "minio"
+
+networks:
+ datamate:
+ name: datamate-network
+ external: true
+ driver: bridge
diff --git a/scripts/db/rag-management-init.sql b/scripts/db/rag-management-init.sql
new file mode 100644
index 0000000..0e429ae
--- /dev/null
+++ b/scripts/db/rag-management-init.sql
@@ -0,0 +1,29 @@
+USE datamate;
+
+create table if not exists t_rag_knowledge_base
+(
+ id VARCHAR(36) PRIMARY KEY COMMENT 'UUID',
+ name VARCHAR(255) NOT NULL COMMENT '知识库名称',
+ description VARCHAR(512) NULL COMMENT '知识库描述',
+ embedding_model VARCHAR(255) NOT NULL COMMENT '嵌入模型',
+ chat_model VARCHAR(255) NULL COMMENT '聊天模型',
+ created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
+ updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间',
+ created_by VARCHAR(255) COMMENT '创建者',
+ updated_by VARCHAR(255) COMMENT '更新者'
+) comment '知识库表';
+
+create table if not exists t_rag_file
+(
+ id VARCHAR(36) PRIMARY KEY COMMENT 'UUID',
+ knowledge_base_id VARCHAR(36) NOT NULL COMMENT '知识库ID',
+ file_name VARCHAR(255) NOT NULL COMMENT '文件名',
+ file_id VARCHAR(255) NOT NULL COMMENT '文件ID',
+ chunk_count INT COMMENT '切片数',
+ metadata JSON COMMENT '元数据',
+ status VARCHAR(50) COMMENT '文件状态',
+ created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
+ updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '更新时间',
+ created_by VARCHAR(255) COMMENT '创建者',
+ updated_by VARCHAR(255) COMMENT '更新者'
+) comment '知识库切片表';