feature: Implement the basic knowledge generation function (#40)

This commit is contained in:
Dallas98
2025-10-30 16:50:54 +08:00
committed by GitHub
parent 5612c7cd91
commit 8d2b41ed94
32 changed files with 1063 additions and 70 deletions

View File

@@ -130,6 +130,10 @@
<artifactId>spring-boot-starter-test</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-autoconfigure</artifactId>
</dependency>
</dependencies>
<build>
@@ -141,6 +145,7 @@
<configuration>
<source>${maven.compiler.source}</source>
<target>${maven.compiler.target}</target>
<parameters>true</parameters>
<compilerArgs>
<arg>-parameters</arg>
</compilerArgs>

View File

@@ -16,31 +16,12 @@ import org.springframework.transaction.annotation.EnableTransactionManagement;
* @version 1.0.0
*/
@SpringBootApplication
@ComponentScan(basePackages = {
"com.datamate.main",
"com.datamate.datamanagement",
"com.datamate.collection",
"com.datamate.operator",
"com.datamate.cleaning",
"com.datamate.synthesis",
"com.datamate.annotation",
"com.datamate.evaluation",
"com.datamate.pipeline",
"com.datamate.execution",
"com.datamate.common"
})
@MapperScan(basePackages = {
"com.datamate.collection.infrastructure.persistence.mapper",
"com.datamate.datamanagement.infrastructure.persistence.mapper",
"com.datamate.operator.infrastructure.persistence.mapper",
"com.datamate.cleaning.infrastructure.persistence.mapper",
"com.datamate.**.mapper"
})
@ComponentScan(basePackages = {"com.datamate"})
@MapperScan(basePackages = {"com.datamate.**.mapper"})
@EnableTransactionManagement
@EnableAsync
@EnableScheduling
public class DataMatePlatformApplication {
public static void main(String[] args) {
SpringApplication.run(DataMatePlatformApplication.class, args);
}

View File

@@ -16,33 +16,37 @@
<name>RAG Indexer Service</name>
<description>RAG文档索引服务</description>
<dependencyManagement>
<dependencies>
<dependency>
<groupId>dev.langchain4j</groupId>
<artifactId>langchain4j-bom</artifactId>
<version>1.8.0</version>
<type>pom</type>
<scope>import</scope>
</dependency>
</dependencies>
</dependencyManagement>
<dependencies>
<dependency>
<groupId>com.datamate</groupId>
<artifactId>domain-common</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>com.datamate</groupId>
<artifactId>data-management-service</artifactId>
<version>1.0.0-SNAPSHOT</version>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-web</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-data-elasticsearch</artifactId>
</dependency>
<dependency>
<groupId>com.mysql</groupId>
<artifactId>mysql-connector-j</artifactId>
<version>${mysql.version}</version>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-test</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.springframework.cloud</groupId>
<artifactId>spring-cloud-starter-openfeign</artifactId>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
<version>8.0.33</version>
</dependency>
<dependency>
<groupId>org.springdoc</groupId>
@@ -56,6 +60,54 @@
<groupId>jakarta.validation</groupId>
<artifactId>jakarta.validation-api</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-test</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>dev.langchain4j</groupId>
<artifactId>langchain4j-open-ai</artifactId>
<version>1.8.0</version>
</dependency>
<dependency>
<groupId>dev.langchain4j</groupId>
<artifactId>langchain4j</artifactId>
<version>1.8.0</version>
</dependency>
<dependency>
<groupId>dev.langchain4j</groupId>
<artifactId>langchain4j-document-parser-apache-pdfbox</artifactId>
</dependency>
<dependency>
<groupId>dev.langchain4j</groupId>
<artifactId>langchain4j-document-parser-apache-tika</artifactId>
</dependency>
<dependency>
<groupId>dev.langchain4j</groupId>
<artifactId>langchain4j-document-parser-apache-poi</artifactId>
</dependency>
<dependency>
<groupId>dev.langchain4j</groupId>
<artifactId>langchain4j-document-parser-markdown</artifactId>
</dependency>
<dependency>
<groupId>dev.langchain4j</groupId>
<artifactId>langchain4j-document-transformer-jsoup</artifactId>
</dependency>
<dependency>
<groupId>dev.langchain4j</groupId>
<artifactId>langchain4j-milvus</artifactId>
</dependency>
<dependency>
<groupId>dev.langchain4j</groupId>
<artifactId>langchain4j-embeddings-all-minilm-l6-v2</artifactId>
</dependency>
<dependency>
<groupId>org.testcontainers</groupId>
<artifactId>milvus</artifactId>
</dependency>
</dependencies>
<build>
@@ -64,31 +116,6 @@
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-maven-plugin</artifactId>
</plugin>
<!--<plugin>
<groupId>org.openapitools</groupId>
<artifactId>openapi-generator-maven-plugin</artifactId>
<version>6.6.0</version>
<executions>
<execution>
<goals>
<goal>generate</goal>
</goals>
<configuration>
<inputSpec>${project.basedir}/../../openapi/specs/rag-services.yaml</inputSpec>
<generatorName>spring</generatorName>
<output>${project.build.directory}/generated-sources/openapi</output>
<apiPackage>com.datamate.rag.indexer.interfaces.api</apiPackage>
<modelPackage>com.datamate.rag.indexer.interfaces.dto</modelPackage>
<configOptions>
<interfaceOnly>true</interfaceOnly>
<useTags>true</useTags>
<useSpringBoot3>true</useSpringBoot3>
<documentationProvider>springdoc</documentationProvider>
</configOptions>
</configuration>
</execution>
</executions>
</plugin>-->
</plugins>
</build>
</project>

View File

@@ -0,0 +1,118 @@
package com.datamate.rag.indexer.application;
import com.baomidou.mybatisplus.core.metadata.IPage;
import com.baomidou.mybatisplus.extension.plugins.pagination.Page;
import com.datamate.rag.indexer.domain.model.FileStatus;
import com.datamate.rag.indexer.domain.model.KnowledgeBase;
import com.datamate.rag.indexer.domain.model.RagChunk;
import com.datamate.rag.indexer.domain.model.RagFile;
import com.datamate.rag.indexer.domain.repository.KnowledgeBaseRepository;
import com.datamate.rag.indexer.domain.repository.RagFileRepository;
import com.datamate.rag.indexer.infrastructure.event.DataInsertedEvent;
import com.datamate.common.infrastructure.exception.BusinessException;
import com.datamate.common.infrastructure.exception.KnowledgeBaseErrorCode;
import com.datamate.common.interfaces.PagedResponse;
import com.datamate.common.interfaces.PagingQuery;
import com.datamate.rag.indexer.interfaces.dto.*;
import lombok.RequiredArgsConstructor;
import org.springframework.beans.BeanUtils;
import org.springframework.context.ApplicationEventPublisher;
import org.springframework.stereotype.Service;
import org.springframework.transaction.annotation.Transactional;
import org.springframework.util.StringUtils;
import java.util.List;
import java.util.Optional;
/**
* 知识库服务类
*
* @author dallas
* @since 2025-10-24
*/
@Service
@RequiredArgsConstructor
public class KnowledgeBaseService {
private final KnowledgeBaseRepository knowledgeBaseRepository;
private final RagFileRepository ragFileRepository;
private final ApplicationEventPublisher eventPublisher;
/**
* 创建知识库
*
* @param request 知识库创建请求
* @return 知识库 ID
*/
public String create(KnowledgeBaseCreateReq request) {
KnowledgeBase knowledgeBase = new KnowledgeBase();
BeanUtils.copyProperties(request, knowledgeBase);
knowledgeBaseRepository.save(knowledgeBase);
return knowledgeBase.getId();
}
/**
* 更新知识库
*
* @param knowledgeBaseId 知识库 ID
* @param request 知识库更新请求
*/
public void update(String knowledgeBaseId, KnowledgeBaseUpdateReq request) {
KnowledgeBase knowledgeBase = Optional.ofNullable(knowledgeBaseRepository.getById(knowledgeBaseId))
.orElseThrow(() -> BusinessException.of(KnowledgeBaseErrorCode.KNOWLEDGE_BASE_NOT_FOUND));
if (StringUtils.hasText(request.getName())) {
knowledgeBase.setName(request.getName());
}
if (StringUtils.hasText(request.getDescription())) {
knowledgeBase.setDescription(request.getDescription());
}
knowledgeBaseRepository.updateById(knowledgeBase);
}
public void delete(String knowledgeBaseId) {
knowledgeBaseRepository.removeById(knowledgeBaseId);
ragFileRepository.removeByKnowledgeBaseId(knowledgeBaseId);
// TODO: 删除知识库关联的所有文档
}
public KnowledgeBase getById(String knowledgeBaseId) {
return Optional.ofNullable(knowledgeBaseRepository.getById(knowledgeBaseId))
.orElseThrow(() -> BusinessException.of(KnowledgeBaseErrorCode.KNOWLEDGE_BASE_NOT_FOUND));
}
public PagedResponse<KnowledgeBase> list(KnowledgeBaseQueryReq request) {
IPage<KnowledgeBase> page = new Page<>(request.getPage(), request.getSize());
page = knowledgeBaseRepository.page(page, request);
return PagedResponse.of(page.getRecords(), page.getCurrent(), page.getTotal(), page.getPages());
}
@Transactional(rollbackFor = Exception.class)
public void addFiles(AddFilesReq request) {
KnowledgeBase knowledgeBase = Optional.ofNullable(knowledgeBaseRepository.getById(request.getKnowledgeBaseId()))
.orElseThrow(() -> BusinessException.of(KnowledgeBaseErrorCode.KNOWLEDGE_BASE_NOT_FOUND));
List<RagFile> ragFiles = request.getFiles().stream().map(fileInfo -> {
RagFile ragFile = new RagFile();
ragFile.setKnowledgeBaseId(knowledgeBase.getId());
ragFile.setFileId(fileInfo.fileId());
ragFile.setFileName(fileInfo.fileName());
ragFile.setStatus(FileStatus.UNPROCESSED);
return ragFile;
}).toList();
ragFileRepository.saveBatch(ragFiles, 100);
eventPublisher.publishEvent(new DataInsertedEvent(knowledgeBase.getId(), request.getProcessType()));
}
public PagedResponse<RagFile> listFiles(String knowledgeBaseId, RagFileReq request) {
IPage<RagFile> page = new Page<>(request.getPage(), request.getSize());
page = ragFileRepository.page(page);
return PagedResponse.of(page.getRecords(), page.getCurrent(), page.getTotal(), page.getPages());
}
public void deleteFile(String knowledgeBaseId, DeleteFilesReq request) {
}
public PagedResponse<RagChunk> getChunks(String knowledgeBaseId, String ragFileId, PagingQuery pagingQuery) {
IPage<RagChunk> page = new Page<>(pagingQuery.getPage(), pagingQuery.getSize());
return PagedResponse.of(page.getRecords(), page.getCurrent(), page.getTotal(), page.getPages());
}
}

View File

@@ -0,0 +1,26 @@
package com.datamate.rag.indexer.domain.model;
/**
* 文件状态枚举
*
* @author dallas
* @since 2025-10-29
*/
public enum FileStatus {
/**
* 未处理
*/
UNPROCESSED,
/**
* 处理中
*/
PROCESSING,
/**
* 已处理
*/
PROCESSED,
/**
* 处理失败
*/
PROCESS_FAILED
}

View File

@@ -0,0 +1,37 @@
package com.datamate.rag.indexer.domain.model;
import com.baomidou.mybatisplus.annotation.TableName;
import com.datamate.common.domain.model.base.BaseEntity;
import lombok.Getter;
import lombok.Setter;
/**
* 知识库实体类
*
* @author dallas
* @since 2025-10-24
*/
@Getter
@Setter
@TableName("t_rag_knowledge_base")
public class KnowledgeBase extends BaseEntity<String> {
/**
* 知识库名称
*/
private String name;
/**
* 知识库描述
*/
private String description;
/**
* 嵌入模型
*/
private String embeddingModel;
/**
* 聊天模型
*/
private String chatModel;
}

View File

@@ -0,0 +1,10 @@
package com.datamate.rag.indexer.domain.model;
/**
* RAG 文档块实体类
*
* @author dallas
* @since 2025-10-29
*/
public class RagChunk {
}

View File

@@ -0,0 +1,47 @@
package com.datamate.rag.indexer.domain.model;
import com.baomidou.mybatisplus.annotation.TableField;
import com.baomidou.mybatisplus.annotation.TableName;
import com.baomidou.mybatisplus.extension.handlers.JacksonTypeHandler;
import com.datamate.common.domain.model.base.BaseEntity;
import lombok.Getter;
import lombok.Setter;
import java.util.Map;
/**
* Rag 文件实体类
*
* @author dallas
* @since 2025-10-24
*/
@Getter
@Setter
@TableName("t_rag_file")
public class RagFile extends BaseEntity<String> {
/**
* 知识库ID
*/
private String knowledgeBaseId;
/**
* 文件名
*/
private String fileName;
/**
* 文件ID
*/
private String fileId;
/**
* 分块数量
*/
private Integer chunkCount;
/**
* 元数据
*/
@TableField(typeHandler = JacksonTypeHandler.class)
private Map<String, Object> metadata;
private FileStatus status;
}

View File

@@ -0,0 +1,23 @@
package com.datamate.rag.indexer.domain.repository;
import com.baomidou.mybatisplus.core.metadata.IPage;
import com.baomidou.mybatisplus.extension.repository.IRepository;
import com.datamate.rag.indexer.domain.model.KnowledgeBase;
import com.datamate.rag.indexer.interfaces.dto.KnowledgeBaseQueryReq;
/**
* 知识库仓储接口
*
* @author dallas
* @since 2025-10-24
*/
public interface KnowledgeBaseRepository extends IRepository<KnowledgeBase> {
/**
* 分页查询知识库
*
* @param page 分页信息
* @param request 查询请求
* @return 知识库分页结果
*/
IPage<KnowledgeBase> page(IPage<KnowledgeBase> page, KnowledgeBaseQueryReq request);
}

View File

@@ -0,0 +1,18 @@
package com.datamate.rag.indexer.domain.repository;
import com.baomidou.mybatisplus.extension.repository.IRepository;
import com.datamate.rag.indexer.domain.model.RagFile;
import java.util.List;
/**
* 知识库文件仓储接口
*
* @author dallas
* @since 2025-10-24
*/
public interface RagFileRepository extends IRepository<RagFile> {
void removeByKnowledgeBaseId(String knowledgeBaseId);
List<RagFile> findByKnowledgeBaseId(String knowledgeBaseId);
}

View File

@@ -0,0 +1,12 @@
package com.datamate.rag.indexer.infrastructure.event;
import com.datamate.rag.indexer.interfaces.dto.ProcessType;
/**
* 数据插入事件
*
* @author dallas
* @since 2025-10-29
*/
public record DataInsertedEvent(String knowledgeBaseId, ProcessType processType) {
}

View File

@@ -0,0 +1,157 @@
package com.datamate.rag.indexer.infrastructure.event;
import com.datamate.common.models.domain.entity.ModelConfig;
import com.datamate.common.models.domain.repository.ModelConfigRepository;
import com.datamate.common.models.infrastructure.client.ModelClient;
import com.datamate.rag.indexer.domain.model.FileStatus;
import com.datamate.rag.indexer.domain.model.RagFile;
import com.datamate.rag.indexer.domain.repository.RagFileRepository;
import com.datamate.rag.indexer.interfaces.dto.ProcessType;
import com.datamate.datamanagement.domain.model.dataset.DatasetFile;
import com.datamate.datamanagement.infrastructure.persistence.repository.DatasetFileRepository;
import dev.langchain4j.data.document.Document;
import dev.langchain4j.data.document.DocumentParser;
import dev.langchain4j.data.document.DocumentSplitter;
import dev.langchain4j.data.document.loader.FileSystemDocumentLoader;
import dev.langchain4j.data.document.parser.TextDocumentParser;
import dev.langchain4j.data.document.parser.apache.pdfbox.ApachePdfBoxDocumentParser;
import dev.langchain4j.data.document.parser.apache.poi.ApachePoiDocumentParser;
import dev.langchain4j.data.document.parser.apache.tika.ApacheTikaDocumentParser;
import dev.langchain4j.data.document.parser.markdown.MarkdownDocumentParser;
import dev.langchain4j.data.document.splitter.*;
import dev.langchain4j.data.document.transformer.jsoup.HtmlToTextDocumentTransformer;
import dev.langchain4j.data.embedding.Embedding;
import dev.langchain4j.data.segment.TextSegment;
import dev.langchain4j.model.embedding.EmbeddingModel;
import dev.langchain4j.model.output.Response;
import dev.langchain4j.store.embedding.EmbeddingStore;
import dev.langchain4j.store.embedding.milvus.MilvusEmbeddingStore;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.jetbrains.annotations.NotNull;
import org.springframework.scheduling.annotation.Async;
import org.springframework.stereotype.Service;
import org.springframework.transaction.event.TransactionPhase;
import org.springframework.transaction.event.TransactionalEventListener;
import java.util.Arrays;
import java.util.List;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Semaphore;
/**
* RAG ETL服务
*
* @author dallas
* @since 2025-10-29
*/
@Slf4j
@Service
@RequiredArgsConstructor
public class RagEtlService {
private static final Semaphore SEMAPHORE = new Semaphore(10);
private final RagFileRepository ragFileRepository;
private final DatasetFileRepository datasetFileRepository;
private final ModelConfigRepository modelConfigRepository;
private final ExecutorService executor = Executors.newVirtualThreadPerTaskExecutor();
@Async
@TransactionalEventListener(phase = TransactionPhase.AFTER_COMMIT)
public void processAfterCommit(DataInsertedEvent event) {
// 执行 RAG 处理流水线
List<RagFile> ragFiles = ragFileRepository.findByKnowledgeBaseId(event.knowledgeBaseId());
ragFiles.forEach(ragFile -> {
try {
SEMAPHORE.acquire();
executor.submit(() -> {
try {
// 执行 RAG 处理流水线
ragFile.setStatus(FileStatus.PROCESSING);
ragFileRepository.updateById(ragFile);
processRagFile(ragFile, event.processType());
// 更新文件状态为已处理
ragFile.setStatus(FileStatus.PROCESSED);
ragFileRepository.updateById(ragFile);
} catch (Exception e) {
// 处理异常
ragFile.setStatus(FileStatus.PROCESS_FAILED);
ragFileRepository.updateById(ragFile);
} finally {
SEMAPHORE.release();
}
});
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
}
}
);
}
private void processRagFile(RagFile ragFile, ProcessType processType) {
DatasetFile file = datasetFileRepository.getById(ragFile.getFileId());
// 使用文档解析器解析文档
DocumentParser parser = documentParser(file.getFileType());
// 从文件系统读取文档
Document document = FileSystemDocumentLoader.loadDocument(file.getFilePath(), parser);
// 对html文档进行转换
if (Arrays.asList("html", "htm").contains(file.getFileType().toLowerCase())) {
document= new HtmlToTextDocumentTransformer().transform(document);
}
// 使用文档分块器对文档进行分块
DocumentSplitter splitter = documentSplitter(processType);
List<TextSegment> split = splitter.split(document);
// 更新分块数量
ragFile.setChunkCount(split.size());
ragFileRepository.updateById(ragFile);
// 调用模型客户端获取嵌入模型
ModelConfig model = modelConfigRepository.getById("1");
EmbeddingModel embeddingModel = ModelClient.invokeEmbeddingModel(model);
// 调用嵌入模型获取嵌入向量
Response<@NotNull List<Embedding>> response = embeddingModel.embedAll(split);
// 存储嵌入向量到 Milvus
embeddingStore().addAll(response.content(), split);
}
/**
* 根据文件类型返回对应的文档解析器
*
* @param fileType 文件类型
* @return 文档解析器
*/
public DocumentParser documentParser(String fileType) {
fileType = fileType.toLowerCase();
return switch (fileType) {
case "txt", "html", "htm" -> new TextDocumentParser();
case "md" -> new MarkdownDocumentParser();
case "pdf" -> new ApachePdfBoxDocumentParser();
case "doc", "docx", "xls", "xlsx", "ppt", "pptx" -> new ApachePoiDocumentParser();
default -> new ApacheTikaDocumentParser();
};
}
public DocumentSplitter documentSplitter(ProcessType processType) {
return switch (processType) {
case CHAPTER_CHUNK -> new DocumentByParagraphSplitter(1000, 100);
case PARAGRAPH_CHUNK -> new DocumentByLineSplitter(1000, 100);
case LENGTH_CHUNK -> new DocumentBySentenceSplitter(1000, 100);
case CUSTOM_SEPARATOR_CHUNK -> new DocumentByWordSplitter(1000, 100);
case DEFAULT_CHUNK -> new DocumentByRegexSplitter("\\n\\n", "",1000, 100);
};
}
public EmbeddingStore<TextSegment> embeddingStore() {
return MilvusEmbeddingStore.builder()
.uri("http://milvus:19530")
.collectionName("rag_embeddings")
.dimension(1536)
.build();
}
}

View File

@@ -0,0 +1,31 @@
package com.datamate.rag.indexer.infrastructure.persistence.impl;
import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper;
import com.baomidou.mybatisplus.core.metadata.IPage;
import com.baomidou.mybatisplus.extension.repository.CrudRepository;
import com.datamate.rag.indexer.domain.model.KnowledgeBase;
import com.datamate.rag.indexer.domain.repository.KnowledgeBaseRepository;
import com.datamate.rag.indexer.infrastructure.persistence.mapper.KnowledgeBaseMapper;
import com.datamate.rag.indexer.interfaces.dto.KnowledgeBaseQueryReq;
import org.springframework.stereotype.Repository;
import org.springframework.util.StringUtils;
/**
* 知识库仓储实现类
*
* @author dallas
* @since 2025-10-24
*/
@Repository
public class KnowledgeBaseRepositoryImpl extends CrudRepository<KnowledgeBaseMapper, KnowledgeBase> implements KnowledgeBaseRepository {
@Override
public IPage<KnowledgeBase> page(IPage<KnowledgeBase> page, KnowledgeBaseQueryReq request) {
return this.page(page, new LambdaQueryWrapper<KnowledgeBase>()
.like(StringUtils.hasText(request.getName()), KnowledgeBase::getName, request.getName())
.like(StringUtils.hasText(request.getDescription()), KnowledgeBase::getDescription, request.getDescription())
.like(StringUtils.hasText(request.getCreatedBy()), KnowledgeBase::getCreatedBy, request.getCreatedBy())
.like(StringUtils.hasText(request.getUpdatedBy()), KnowledgeBase::getUpdatedBy, request.getUpdatedBy())
.orderByDesc(KnowledgeBase::getCreatedAt));
}
}

View File

@@ -0,0 +1,32 @@
package com.datamate.rag.indexer.infrastructure.persistence.impl;
import com.baomidou.mybatisplus.extension.repository.CrudRepository;
import com.datamate.rag.indexer.domain.model.FileStatus;
import com.datamate.rag.indexer.domain.model.RagFile;
import com.datamate.rag.indexer.domain.repository.RagFileRepository;
import com.datamate.rag.indexer.infrastructure.persistence.mapper.RagFileMapper;
import org.springframework.stereotype.Repository;
import java.util.List;
/**
* 知识库文件仓储实现类
*
* @author dallas
* @since 2025-10-24
*/
@Repository
public class RagFileRepositoryImpl extends CrudRepository<RagFileMapper, RagFile> implements RagFileRepository {
@Override
public void removeByKnowledgeBaseId(String knowledgeBaseId) {
lambdaUpdate().eq(RagFile::getKnowledgeBaseId, knowledgeBaseId).remove();
}
@Override
public List<RagFile> findByKnowledgeBaseId(String knowledgeBaseId) {
return lambdaQuery()
.eq(RagFile::getKnowledgeBaseId, knowledgeBaseId)
.in(RagFile::getStatus, FileStatus.UNPROCESSED, FileStatus.PROCESS_FAILED)
.list();
}
}

View File

@@ -0,0 +1,16 @@
package com.datamate.rag.indexer.infrastructure.persistence.mapper;
import com.baomidou.mybatisplus.core.mapper.BaseMapper;
import com.datamate.rag.indexer.domain.model.KnowledgeBase;
import org.apache.ibatis.annotations.Mapper;
/**
* 知识库映射器接口
*
* @author dallas
* @since 2025-10-24
*/
@Mapper
public interface KnowledgeBaseMapper extends BaseMapper<KnowledgeBase> {
}

View File

@@ -0,0 +1,16 @@
package com.datamate.rag.indexer.infrastructure.persistence.mapper;
import com.baomidou.mybatisplus.core.mapper.BaseMapper;
import com.datamate.rag.indexer.domain.model.RagFile;
import org.apache.ibatis.annotations.Mapper;
/**
* RAG文件映射器接口
*
* @author dallas
* @since 2025-10-24
*/
@Mapper
public interface RagFileMapper extends BaseMapper<RagFile> {
}

View File

@@ -0,0 +1,8 @@
package com.datamate.rag.indexer.interfaces;
import org.springframework.web.bind.annotation.RestController;
@RestController
public class EmbeddingController {
}

View File

@@ -0,0 +1,137 @@
package com.datamate.rag.indexer.interfaces;
import com.datamate.rag.indexer.application.KnowledgeBaseService;
import com.datamate.rag.indexer.domain.model.KnowledgeBase;
import com.datamate.rag.indexer.domain.model.RagChunk;
import com.datamate.rag.indexer.domain.model.RagFile;
import com.datamate.common.infrastructure.common.Response;
import com.datamate.common.interfaces.PagedResponse;
import com.datamate.common.interfaces.PagingQuery;
import com.datamate.rag.indexer.interfaces.dto.*;
import lombok.RequiredArgsConstructor;
import org.springframework.web.bind.annotation.*;
import javax.validation.Valid;
/**
* 知识库控制器
*
* @author dallas
* @since 2025-09-30
*/
@RestController
@RequiredArgsConstructor
@RequestMapping("/v1/knowledge-base")
public class KnowledgeBaseController {
private final KnowledgeBaseService knowledgeBaseService;
@GetMapping(path = "/test1")
public String test() {
return "test1";
}
/**
* 创建知识库
*
* @param request 知识库创建请求
* @return 知识库 ID
*/
@PostMapping("/create")
public String create(@RequestBody @Valid KnowledgeBaseCreateReq request) {
return knowledgeBaseService.create(request);
}
/**
* 更新知识库
*
* @param knowledgeBaseId 知识库 ID
* @param request 知识库更新请求
*/
@PutMapping("/{knowledgeBaseId}")
public void update(@PathVariable("knowledgeBaseId") String knowledgeBaseId,
@RequestBody @Valid KnowledgeBaseUpdateReq request) {
knowledgeBaseService.update(knowledgeBaseId, request);
}
/**
* 删除知识库
*
* @param knowledgeBaseId 知识库 ID
*/
@DeleteMapping("/{knowledgeBaseId}")
public void delete(@PathVariable("knowledgeBaseId") String knowledgeBaseId) {
knowledgeBaseService.delete(knowledgeBaseId);
}
/**
* 获取知识库
*
* @param knowledgeBaseId 知识库 ID
* @return 知识库
*/
@GetMapping("/{knowledgeBaseId}")
public KnowledgeBase get(@PathVariable("knowledgeBaseId") String knowledgeBaseId) {
return knowledgeBaseService.getById(knowledgeBaseId);
}
/**
* 获取知识库列表
*
* @return 知识库列表
*/
@PostMapping("/list")
public PagedResponse<KnowledgeBase> list(@RequestBody @Valid KnowledgeBaseQueryReq request) {
return knowledgeBaseService.list(request);
}
/**
* 添加文件到知识库
*
* @param knowledgeBaseId 知识库 ID
* @param request 添加文件请求
*/
@PostMapping("/{knowledgeBaseId}/files")
public void addFiles(@PathVariable("knowledgeBaseId") String knowledgeBaseId,
@RequestBody @Valid AddFilesReq request) {
request.setKnowledgeBaseId(knowledgeBaseId);
knowledgeBaseService.addFiles(request);
}
/**
* 获取知识库文件列表
*
* @param knowledgeBaseId 知识库 ID
* @return 知识库文件列表
*/
@GetMapping("/{knowledgeBaseId}/files")
public PagedResponse<RagFile> listFiles(@PathVariable("knowledgeBaseId") String knowledgeBaseId,
@RequestBody @Valid RagFileReq request) {
return knowledgeBaseService.listFiles(knowledgeBaseId, request);
}
/**
* 删除知识库文件
*
* @param knowledgeBaseId 知识库 ID
* @param request 删除文件请求
*/
@DeleteMapping("/{knowledgeBaseId}/files")
public void deleteFile(@PathVariable("knowledgeBaseId") String knowledgeBaseId,
@RequestBody DeleteFilesReq request) {
knowledgeBaseService.deleteFile(knowledgeBaseId, request);
}
/**
* 知识库文件详情
*
* @param knowledgeBaseId 知识库 ID
* @param ragFileId 文件 ID
* @return 文件详情
*/
@GetMapping("/{knowledgeBaseId}/files/{ragFileId}")
public PagedResponse<RagChunk> getChunks(@PathVariable("knowledgeBaseId") String knowledgeBaseId,
@PathVariable("ragFileId") String ragFileId,
PagingQuery pagingQuery) {
return knowledgeBaseService.getChunks(knowledgeBaseId, ragFileId, pagingQuery);
}
}

View File

@@ -0,0 +1,23 @@
package com.datamate.rag.indexer.interfaces.dto;
import lombok.Getter;
import lombok.Setter;
import java.util.List;
/**
* 添加文件请求
*
* @author dallas
* @since 2025-10-29
*/
@Getter
@Setter
public class AddFilesReq {
private String knowledgeBaseId;
private ProcessType processType;
private List<FileInfo> files;
public record FileInfo(String fileId, String fileName) {
}
}

View File

@@ -0,0 +1,13 @@
package com.datamate.rag.indexer.interfaces.dto;
import java.util.List;
/**
* 删除文件请求
*
* @author dallas
* @since 2025-10-29
*/
public class DeleteFilesReq {
private List<String> fileIds;
}

View File

@@ -0,0 +1,41 @@
package com.datamate.rag.indexer.interfaces.dto;
import jakarta.validation.constraints.NotEmpty;
import jakarta.validation.constraints.Pattern;
import jakarta.validation.constraints.Size;
import lombok.Getter;
import lombok.Setter;
/**
* 知识库创建请求
*
* @author dallas
* @since 2025-10-24
*/
@Setter
@Getter
public class KnowledgeBaseCreateReq {
/**
* 知识库名称
*/
@NotEmpty(message = "知识库名称不能为空")
@Size(min = 1, max = 255, message = "知识库名称长度必须在 1 到 255 之间")
@Pattern(regexp = "^[a-zA-Z0-9_]+$", message = "知识库名称只能包含字母、数字和下划线")
private String name;
/**
* 知识库描述
*/
@Size(min = 1, max = 512, message = "知识库描述长度必须在 1 到 512 之间")
private String description;
/**
* 嵌入模型
*/
@NotEmpty(message = "嵌入模型不能为空")
private String embeddingModel;
/**
* 聊天模型
*/
private String chatModel;
}

View File

@@ -0,0 +1,24 @@
package com.datamate.rag.indexer.interfaces.dto;
import com.datamate.common.interfaces.PagingQuery;
import lombok.Getter;
import lombok.Setter;
import java.time.LocalDateTime;
/**
*
*
* @author dallas
* @since 2025-10-29
*/
@Setter
@Getter
public class KnowledgeBaseQueryReq extends PagingQuery {
private String name;
private String description;
private LocalDateTime createdAt;
private LocalDateTime updatedAt;
private String createdBy;
private String updatedBy;
}

View File

@@ -0,0 +1,30 @@
package com.datamate.rag.indexer.interfaces.dto;
import jakarta.validation.constraints.NotEmpty;
import jakarta.validation.constraints.Pattern;
import jakarta.validation.constraints.Size;
import lombok.Getter;
import lombok.Setter;
/**
* 知识库更新请求
*
* @author dallas
* @since 2025-10-24
*/
@Getter
@Setter
public class KnowledgeBaseUpdateReq {
/**
* 知识库名称
*/
@NotEmpty(message = "知识库名称不能为空")
@Size(min = 1, max = 255, message = "知识库名称长度必须在 1 到 255 之间")
@Pattern(regexp = "^[a-zA-Z0-9_]+$", message = "知识库名称只能包含字母、数字和下划线")
private String name;
/**
* 知识库描述
*/
@Size(min = 1, max = 512, message = "知识库描述长度必须在 1 到 512 之间")
private String description;
}

View File

@@ -0,0 +1,33 @@
package com.datamate.rag.indexer.interfaces.dto;
/**
* 分块处理类型
*
* @author dallas
* @since 2025-10-29
*/
public enum ProcessType {
/**
* 章节分块
*/
CHAPTER_CHUNK,
/**
* 段落分块
*/
PARAGRAPH_CHUNK,
/**
* 按长度分块
*/
LENGTH_CHUNK,
/**
* 自定义分割符分块
*/
CUSTOM_SEPARATOR_CHUNK,
/**
* 默认分块
*/
DEFAULT_CHUNK,
}

View File

@@ -0,0 +1,13 @@
package com.datamate.rag.indexer.interfaces.dto;
import com.datamate.common.interfaces.PagingQuery;
/**
* RAG 文件请求
*
* @author dallas
* @since 2025-10-29
*/
public class RagFileReq extends PagingQuery {
private String fileName;
}