feat(knowledge-graph): 实现知识图谱基础设施搭建

实现功能:
- Neo4j Docker Compose 配置(社区版,端口 7474/7687,数据持久化)
- Makefile 新增 Neo4j 命令(neo4j-up/down/logs/shell)
- knowledge-graph-service Spring Boot 服务(完整的 DDD 分层架构)
- kg_extraction Python 模块(基于 LangChain LLMGraphTransformer)

技术实现:
- Neo4j 配置:环境变量化密码,统一默认值 datamate123
- Java 服务:
  - Domain: GraphEntity, GraphRelation 实体模型
  - Repository: Spring Data Neo4j,支持 graphId 范围查询
  - Service: 业务逻辑,graphId 双重校验,查询限流
  - Controller: REST API,UUID 格式校验
  - Exception: 实现 ErrorCode 接口,统一异常体系
- Python 模块:
  - KnowledgeGraphExtractor 类
  - 支持异步/同步/批量抽取
  - 支持 schema-guided 模式
  - 兼容 OpenAI 及自部署模型

关键设计:
- graphId 权限边界:所有实体操作都在正确的 graphId 范围内
- 查询限流:depth 和 limit 参数受配置约束
- 异常处理:统一使用 BusinessException + ErrorCode
- 凭据管理:环境变量化,避免硬编码
- 双重防御:Controller 格式校验 + Service 业务校验

代码审查:
- 经过 3 轮 Codex 审查和 2 轮 Claude 修复
- 所有 P0 和 P1 问题已解决
- 编译通过,无阻塞性问题

文件变更:
- 新增:Neo4j 配置、knowledge-graph-service(11 个 Java 文件)、kg_extraction(3 个 Python 文件)
- 修改:Makefile、pom.xml、application.yml、pyproject.toml
This commit is contained in:
2026-02-17 20:42:55 +08:00
parent 8f21798d57
commit 5a553ddde3
22 changed files with 1007 additions and 0 deletions

View File

@@ -0,0 +1,109 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0
http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>com.datamate</groupId>
<artifactId>services</artifactId>
<version>1.0.0-SNAPSHOT</version>
<relativePath>../pom.xml</relativePath>
</parent>
<artifactId>knowledge-graph-service</artifactId>
<name>Knowledge Graph Service</name>
<description>知识图谱服务 - 基于Neo4j的实体关系管理与图谱查询</description>
<dependencies>
<dependency>
<groupId>com.datamate</groupId>
<artifactId>domain-common</artifactId>
<version>${project.version}</version>
</dependency>
<!-- Spring Data Neo4j -->
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-data-neo4j</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-web</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-data-redis</artifactId>
</dependency>
<dependency>
<groupId>com.mysql</groupId>
<artifactId>mysql-connector-j</artifactId>
<version>${mysql.version}</version>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-test</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.springdoc</groupId>
<artifactId>springdoc-openapi-starter-webmvc-ui</artifactId>
</dependency>
<dependency>
<groupId>org.openapitools</groupId>
<artifactId>jackson-databind-nullable</artifactId>
</dependency>
<dependency>
<groupId>jakarta.validation</groupId>
<artifactId>jakarta.validation-api</artifactId>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-maven-plugin</artifactId>
<configuration>
<arguments>true</arguments>
<classifier>exec</classifier>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.11.0</version>
<configuration>
<source>${maven.compiler.source}</source>
<target>${maven.compiler.target}</target>
<annotationProcessorPaths>
<path>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
<version>${lombok.version}</version>
</path>
<path>
<groupId>org.projectlombok</groupId>
<artifactId>lombok-mapstruct-binding</artifactId>
<version>${lombok-mapstruct-binding.version}</version>
</path>
<path>
<groupId>org.mapstruct</groupId>
<artifactId>mapstruct-processor</artifactId>
<version>${mapstruct.version}</version>
</path>
</annotationProcessorPaths>
<compilerArgs>
<arg>-parameters</arg>
<arg>-Amapstruct.defaultComponentModel=spring</arg>
</compilerArgs>
</configuration>
</plugin>
</plugins>
</build>
</project>

View File

@@ -0,0 +1,11 @@
package com.datamate.knowledgegraph;
import org.springframework.context.annotation.ComponentScan;
import org.springframework.context.annotation.Configuration;
import org.springframework.data.neo4j.repository.config.EnableNeo4jRepositories;
@Configuration
@ComponentScan(basePackages = "com.datamate.knowledgegraph")
@EnableNeo4jRepositories(basePackages = "com.datamate.knowledgegraph.domain.repository")
public class KnowledgeGraphServiceConfiguration {
}

View File

@@ -0,0 +1,120 @@
package com.datamate.knowledgegraph.application;
import com.datamate.common.infrastructure.exception.BusinessException;
import com.datamate.common.infrastructure.exception.SystemErrorCode;
import com.datamate.knowledgegraph.domain.model.GraphEntity;
import com.datamate.knowledgegraph.domain.repository.GraphEntityRepository;
import com.datamate.knowledgegraph.infrastructure.exception.KnowledgeGraphErrorCode;
import com.datamate.knowledgegraph.infrastructure.neo4j.KnowledgeGraphProperties;
import com.datamate.knowledgegraph.interfaces.dto.CreateEntityRequest;
import com.datamate.knowledgegraph.interfaces.dto.UpdateEntityRequest;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Service;
import org.springframework.transaction.annotation.Transactional;
import java.time.LocalDateTime;
import java.util.List;
import java.util.regex.Pattern;
@Service
@Slf4j
@RequiredArgsConstructor
public class GraphEntityService {
private static final Pattern UUID_PATTERN = Pattern.compile(
"^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$"
);
private final GraphEntityRepository entityRepository;
private final KnowledgeGraphProperties properties;
@Transactional
public GraphEntity createEntity(String graphId, CreateEntityRequest request) {
validateGraphId(graphId);
GraphEntity entity = GraphEntity.builder()
.name(request.getName())
.type(request.getType())
.description(request.getDescription())
.aliases(request.getAliases())
.properties(request.getProperties())
.sourceId(request.getSourceId())
.sourceType(request.getSourceType())
.graphId(graphId)
.confidence(request.getConfidence() != null ? request.getConfidence() : 1.0)
.createdAt(LocalDateTime.now())
.updatedAt(LocalDateTime.now())
.build();
return entityRepository.save(entity);
}
public GraphEntity getEntity(String graphId, String entityId) {
validateGraphId(graphId);
return entityRepository.findByIdAndGraphId(entityId, graphId)
.orElseThrow(() -> BusinessException.of(KnowledgeGraphErrorCode.ENTITY_NOT_FOUND));
}
public List<GraphEntity> listEntities(String graphId) {
validateGraphId(graphId);
return entityRepository.findByGraphId(graphId);
}
public List<GraphEntity> searchEntities(String graphId, String name) {
validateGraphId(graphId);
return entityRepository.findByGraphIdAndNameContaining(graphId, name);
}
public List<GraphEntity> listEntitiesByType(String graphId, String type) {
validateGraphId(graphId);
return entityRepository.findByGraphIdAndType(graphId, type);
}
@Transactional
public GraphEntity updateEntity(String graphId, String entityId, UpdateEntityRequest request) {
validateGraphId(graphId);
GraphEntity entity = getEntity(graphId, entityId);
if (request.getName() != null) {
entity.setName(request.getName());
}
if (request.getDescription() != null) {
entity.setDescription(request.getDescription());
}
if (request.getAliases() != null) {
entity.setAliases(request.getAliases());
}
if (request.getProperties() != null) {
entity.setProperties(request.getProperties());
}
entity.setUpdatedAt(LocalDateTime.now());
return entityRepository.save(entity);
}
@Transactional
public void deleteEntity(String graphId, String entityId) {
validateGraphId(graphId);
GraphEntity entity = getEntity(graphId, entityId);
entityRepository.delete(entity);
}
public List<GraphEntity> getNeighbors(String graphId, String entityId, int depth, int limit) {
validateGraphId(graphId);
int clampedDepth = Math.max(1, Math.min(depth, properties.getMaxDepth()));
int clampedLimit = Math.max(1, Math.min(limit, properties.getMaxNodesPerQuery()));
return entityRepository.findNeighbors(graphId, entityId, clampedDepth, clampedLimit);
}
public long countEntities(String graphId) {
validateGraphId(graphId);
return entityRepository.countByGraphId(graphId);
}
/**
* 校验 graphId 格式(UUID)。
* 防止恶意构造的 graphId 注入 Cypher 查询。
*/
private void validateGraphId(String graphId) {
if (graphId == null || !UUID_PATTERN.matcher(graphId).matches()) {
throw BusinessException.of(SystemErrorCode.INVALID_PARAMETER, "graphId 格式无效");
}
}
}

View File

@@ -0,0 +1,81 @@
package com.datamate.knowledgegraph.domain.model;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;
import org.springframework.data.neo4j.core.schema.DynamicLabels;
import org.springframework.data.neo4j.core.schema.GeneratedValue;
import org.springframework.data.neo4j.core.schema.Id;
import org.springframework.data.neo4j.core.schema.Node;
import org.springframework.data.neo4j.core.schema.Property;
import org.springframework.data.neo4j.core.support.UUIDStringGenerator;
import java.time.LocalDateTime;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
* 知识图谱实体节点。
* <p>
* 在 Neo4j 中,每个实体作为一个节点存储,
* 通过 {@code type} 属性区分具体类型(Person, Organization, Concept 等),
* 并支持通过 {@code properties} 存储灵活的扩展属性。
*/
@Node("Entity")
@Data
@Builder
@NoArgsConstructor
@AllArgsConstructor
public class GraphEntity {
@Id
@GeneratedValue(UUIDStringGenerator.class)
private String id;
@Property("name")
private String name;
@Property("type")
private String type;
@Property("description")
private String description;
@DynamicLabels
@Builder.Default
private List<String> labels = new ArrayList<>();
@Property("aliases")
@Builder.Default
private List<String> aliases = new ArrayList<>();
@Property("properties")
@Builder.Default
private Map<String, Object> properties = new HashMap<>();
/** 来源数据集/知识库的 ID */
@Property("source_id")
private String sourceId;
/** 来源类型:ANNOTATION, KNOWLEDGE_BASE, IMPORT, MANUAL */
@Property("source_type")
private String sourceType;
/** 所属图谱 ID(对应 MySQL 中的 t_dm_knowledge_graphs.id) */
@Property("graph_id")
private String graphId;
/** 自动抽取的置信度 */
@Property("confidence")
@Builder.Default
private Double confidence = 1.0;
@Property("created_at")
private LocalDateTime createdAt;
@Property("updated_at")
private LocalDateTime updatedAt;
}

View File

@@ -0,0 +1,61 @@
package com.datamate.knowledgegraph.domain.model;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;
import org.springframework.data.neo4j.core.schema.GeneratedValue;
import org.springframework.data.neo4j.core.schema.Id;
import org.springframework.data.neo4j.core.schema.Property;
import org.springframework.data.neo4j.core.schema.RelationshipProperties;
import org.springframework.data.neo4j.core.schema.TargetNode;
import org.springframework.data.neo4j.core.support.UUIDStringGenerator;
import java.time.LocalDateTime;
import java.util.HashMap;
import java.util.Map;
/**
* 知识图谱关系(边)。
* <p>
* 使用 Spring Data Neo4j 的 {@code @RelationshipProperties} 表示带属性的关系。
* 关系的具体类型通过 {@code relationType} 表达(如 belongs_to, located_in)。
*/
@RelationshipProperties
@Data
@Builder
@NoArgsConstructor
@AllArgsConstructor
public class GraphRelation {
@Id
@GeneratedValue(UUIDStringGenerator.class)
private String id;
@TargetNode
private GraphEntity target;
@Property("relation_type")
private String relationType;
@Property("properties")
@Builder.Default
private Map<String, Object> properties = new HashMap<>();
@Property("weight")
@Builder.Default
private Double weight = 1.0;
@Property("source_id")
private String sourceId;
@Property("confidence")
@Builder.Default
private Double confidence = 1.0;
@Property("graph_id")
private String graphId;
@Property("created_at")
private LocalDateTime createdAt;
}

View File

@@ -0,0 +1,44 @@
package com.datamate.knowledgegraph.domain.repository;
import com.datamate.knowledgegraph.domain.model.GraphEntity;
import org.springframework.data.neo4j.repository.Neo4jRepository;
import org.springframework.data.neo4j.repository.query.Query;
import org.springframework.data.repository.query.Param;
import org.springframework.stereotype.Repository;
import java.util.List;
import java.util.Optional;
@Repository
public interface GraphEntityRepository extends Neo4jRepository<GraphEntity, String> {
@Query("MATCH (e:Entity {graph_id: $graphId}) WHERE e.id = $entityId RETURN e")
Optional<GraphEntity> findByIdAndGraphId(
@Param("entityId") String entityId,
@Param("graphId") String graphId);
List<GraphEntity> findByGraphId(String graphId);
List<GraphEntity> findByGraphIdAndType(String graphId, String type);
List<GraphEntity> findByGraphIdAndNameContaining(String graphId, String name);
@Query("MATCH (e:Entity {graph_id: $graphId}) " +
"WHERE e.name = $name AND e.type = $type " +
"RETURN e")
List<GraphEntity> findByGraphIdAndNameAndType(
@Param("graphId") String graphId,
@Param("name") String name,
@Param("type") String type);
@Query("MATCH (e:Entity {graph_id: $graphId, id: $entityId})-[r*1..$depth]-(neighbor:Entity) " +
"RETURN DISTINCT neighbor LIMIT $limit")
List<GraphEntity> findNeighbors(
@Param("graphId") String graphId,
@Param("entityId") String entityId,
@Param("depth") int depth,
@Param("limit") int limit);
@Query("MATCH (e:Entity {graph_id: $graphId}) RETURN count(e)")
long countByGraphId(@Param("graphId") String graphId);
}

View File

@@ -0,0 +1,25 @@
package com.datamate.knowledgegraph.infrastructure.exception;
import com.datamate.common.infrastructure.exception.ErrorCode;
import lombok.AllArgsConstructor;
import lombok.Getter;
/**
* 知识图谱模块错误码
*/
@Getter
@AllArgsConstructor
public enum KnowledgeGraphErrorCode implements ErrorCode {
ENTITY_NOT_FOUND("knowledge_graph.0001", "实体不存在"),
RELATION_NOT_FOUND("knowledge_graph.0002", "关系不存在"),
GRAPH_NOT_FOUND("knowledge_graph.0003", "图谱不存在"),
DUPLICATE_ENTITY("knowledge_graph.0004", "实体已存在"),
INVALID_RELATION("knowledge_graph.0005", "无效的关系定义"),
IMPORT_FAILED("knowledge_graph.0006", "图谱导入失败"),
QUERY_DEPTH_EXCEEDED("knowledge_graph.0007", "查询深度超出限制"),
MAX_NODES_EXCEEDED("knowledge_graph.0008", "查询结果节点数超出限制");
private final String code;
private final String message;
}

View File

@@ -0,0 +1,20 @@
package com.datamate.knowledgegraph.infrastructure.neo4j;
import lombok.Data;
import org.springframework.boot.context.properties.ConfigurationProperties;
import org.springframework.stereotype.Component;
@Data
@Component
@ConfigurationProperties(prefix = "datamate.knowledge-graph")
public class KnowledgeGraphProperties {
/** 默认查询跳数限制 */
private int maxDepth = 3;
/** 子图返回最大节点数 */
private int maxNodesPerQuery = 500;
/** 批量导入批次大小 */
private int importBatchSize = 100;
}

View File

@@ -0,0 +1,31 @@
package com.datamate.knowledgegraph.interfaces.dto;
import jakarta.validation.constraints.NotBlank;
import lombok.Data;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
@Data
public class CreateEntityRequest {
@NotBlank(message = "实体名称不能为空")
private String name;
@NotBlank(message = "实体类型不能为空")
private String type;
private String description;
private List<String> aliases = new ArrayList<>();
private Map<String, Object> properties = new HashMap<>();
private String sourceId;
private String sourceType;
private Double confidence;
}

View File

@@ -0,0 +1,28 @@
package com.datamate.knowledgegraph.interfaces.dto;
import jakarta.validation.constraints.NotBlank;
import lombok.Data;
import java.util.HashMap;
import java.util.Map;
@Data
public class CreateRelationRequest {
@NotBlank(message = "源实体ID不能为空")
private String sourceEntityId;
@NotBlank(message = "目标实体ID不能为空")
private String targetEntityId;
@NotBlank(message = "关系类型不能为空")
private String relationType;
private Map<String, Object> properties = new HashMap<>();
private Double weight;
private String sourceId;
private Double confidence;
}

View File

@@ -0,0 +1,18 @@
package com.datamate.knowledgegraph.interfaces.dto;
import lombok.Data;
import java.util.List;
import java.util.Map;
@Data
public class UpdateEntityRequest {
private String name;
private String description;
private List<String> aliases;
private Map<String, Object> properties;
}

View File

@@ -0,0 +1,80 @@
package com.datamate.knowledgegraph.interfaces.rest;
import com.datamate.knowledgegraph.application.GraphEntityService;
import com.datamate.knowledgegraph.domain.model.GraphEntity;
import com.datamate.knowledgegraph.interfaces.dto.CreateEntityRequest;
import com.datamate.knowledgegraph.interfaces.dto.UpdateEntityRequest;
import jakarta.validation.Valid;
import jakarta.validation.constraints.Pattern;
import lombok.RequiredArgsConstructor;
import org.springframework.http.HttpStatus;
import org.springframework.validation.annotation.Validated;
import org.springframework.web.bind.annotation.*;
import java.util.List;
@RestController
@RequestMapping("/knowledge-graph/{graphId}/entities")
@RequiredArgsConstructor
@Validated
public class GraphEntityController {
private static final String UUID_REGEX =
"^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$";
private final GraphEntityService entityService;
@PostMapping
@ResponseStatus(HttpStatus.CREATED)
public GraphEntity createEntity(
@PathVariable @Pattern(regexp = UUID_REGEX, message = "graphId 格式无效") String graphId,
@Valid @RequestBody CreateEntityRequest request) {
return entityService.createEntity(graphId, request);
}
@GetMapping("/{entityId}")
public GraphEntity getEntity(
@PathVariable @Pattern(regexp = UUID_REGEX, message = "graphId 格式无效") String graphId,
@PathVariable @Pattern(regexp = UUID_REGEX, message = "entityId 格式无效") String entityId) {
return entityService.getEntity(graphId, entityId);
}
@GetMapping
public List<GraphEntity> listEntities(
@PathVariable @Pattern(regexp = UUID_REGEX, message = "graphId 格式无效") String graphId,
@RequestParam(required = false) String type,
@RequestParam(required = false) String keyword) {
if (keyword != null && !keyword.isBlank()) {
return entityService.searchEntities(graphId, keyword);
}
if (type != null && !type.isBlank()) {
return entityService.listEntitiesByType(graphId, type);
}
return entityService.listEntities(graphId);
}
@PutMapping("/{entityId}")
public GraphEntity updateEntity(
@PathVariable @Pattern(regexp = UUID_REGEX, message = "graphId 格式无效") String graphId,
@PathVariable @Pattern(regexp = UUID_REGEX, message = "entityId 格式无效") String entityId,
@Valid @RequestBody UpdateEntityRequest request) {
return entityService.updateEntity(graphId, entityId, request);
}
@DeleteMapping("/{entityId}")
@ResponseStatus(HttpStatus.NO_CONTENT)
public void deleteEntity(
@PathVariable @Pattern(regexp = UUID_REGEX, message = "graphId 格式无效") String graphId,
@PathVariable @Pattern(regexp = UUID_REGEX, message = "entityId 格式无效") String entityId) {
entityService.deleteEntity(graphId, entityId);
}
@GetMapping("/{entityId}/neighbors")
public List<GraphEntity> getNeighbors(
@PathVariable @Pattern(regexp = UUID_REGEX, message = "graphId 格式无效") String graphId,
@PathVariable @Pattern(regexp = UUID_REGEX, message = "entityId 格式无效") String entityId,
@RequestParam(defaultValue = "2") int depth,
@RequestParam(defaultValue = "50") int limit) {
return entityService.getNeighbors(graphId, entityId, depth, limit);
}
}

View File

@@ -0,0 +1,25 @@
# 知识图谱服务 - Neo4j连接配置
# 该配置在 main-application 的 spring.config.import 中引入
# 注意:生产环境务必通过环境变量 NEO4J_PASSWORD 设置密码,不要使用默认值
spring:
neo4j:
uri: ${NEO4J_URI:bolt://datamate-neo4j:7687}
authentication:
username: ${NEO4J_USERNAME:neo4j}
password: ${NEO4J_PASSWORD:datamate123}
pool:
max-connection-pool-size: ${NEO4J_POOL_MAX_SIZE:50}
connection-acquisition-timeout: 30s
max-connection-lifetime: 1h
log-leaked-sessions: true
# 知识图谱服务配置
datamate:
knowledge-graph:
# 默认查询跳数限制
max-depth: ${KG_MAX_DEPTH:3}
# 子图返回最大节点数
max-nodes-per-query: ${KG_MAX_NODES:500}
# 批量导入批次大小
import-batch-size: ${KG_IMPORT_BATCH_SIZE:100}