getNeighbors(String graphId, String entityId, int depth, int limit) {
+ validateGraphId(graphId);
+ int clampedDepth = Math.max(1, Math.min(depth, properties.getMaxDepth()));
+ int clampedLimit = Math.max(1, Math.min(limit, properties.getMaxNodesPerQuery()));
+ return entityRepository.findNeighbors(graphId, entityId, clampedDepth, clampedLimit);
+ }
+
+ public long countEntities(String graphId) {
+ validateGraphId(graphId);
+ return entityRepository.countByGraphId(graphId);
+ }
+
+ /**
+ * 校验 graphId 格式(UUID)。
+ * 防止恶意构造的 graphId 注入 Cypher 查询。
+ */
+ private void validateGraphId(String graphId) {
+ if (graphId == null || !UUID_PATTERN.matcher(graphId).matches()) {
+ throw BusinessException.of(SystemErrorCode.INVALID_PARAMETER, "graphId 格式无效");
+ }
+ }
+}
diff --git a/backend/services/knowledge-graph-service/src/main/java/com/datamate/knowledgegraph/domain/model/GraphEntity.java b/backend/services/knowledge-graph-service/src/main/java/com/datamate/knowledgegraph/domain/model/GraphEntity.java
new file mode 100644
index 0000000..3f41212
--- /dev/null
+++ b/backend/services/knowledge-graph-service/src/main/java/com/datamate/knowledgegraph/domain/model/GraphEntity.java
@@ -0,0 +1,81 @@
+package com.datamate.knowledgegraph.domain.model;
+
+import lombok.AllArgsConstructor;
+import lombok.Builder;
+import lombok.Data;
+import lombok.NoArgsConstructor;
+import org.springframework.data.neo4j.core.schema.DynamicLabels;
+import org.springframework.data.neo4j.core.schema.GeneratedValue;
+import org.springframework.data.neo4j.core.schema.Id;
+import org.springframework.data.neo4j.core.schema.Node;
+import org.springframework.data.neo4j.core.schema.Property;
+import org.springframework.data.neo4j.core.support.UUIDStringGenerator;
+
+import java.time.LocalDateTime;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * 知识图谱实体节点。
+ *
+ * 在 Neo4j 中,每个实体作为一个节点存储,
+ * 通过 {@code type} 属性区分具体类型(Person, Organization, Concept 等),
+ * 并支持通过 {@code properties} 存储灵活的扩展属性。
+ */
+@Node("Entity")
+@Data
+@Builder
+@NoArgsConstructor
+@AllArgsConstructor
+public class GraphEntity {
+
+ @Id
+ @GeneratedValue(UUIDStringGenerator.class)
+ private String id;
+
+ @Property("name")
+ private String name;
+
+ @Property("type")
+ private String type;
+
+ @Property("description")
+ private String description;
+
+ @DynamicLabels
+ @Builder.Default
+ private List labels = new ArrayList<>();
+
+ @Property("aliases")
+ @Builder.Default
+ private List aliases = new ArrayList<>();
+
+ @Property("properties")
+ @Builder.Default
+ private Map properties = new HashMap<>();
+
+ /** 来源数据集/知识库的 ID */
+ @Property("source_id")
+ private String sourceId;
+
+ /** 来源类型:ANNOTATION, KNOWLEDGE_BASE, IMPORT, MANUAL */
+ @Property("source_type")
+ private String sourceType;
+
+ /** 所属图谱 ID(对应 MySQL 中的 t_dm_knowledge_graphs.id) */
+ @Property("graph_id")
+ private String graphId;
+
+ /** 自动抽取的置信度 */
+ @Property("confidence")
+ @Builder.Default
+ private Double confidence = 1.0;
+
+ @Property("created_at")
+ private LocalDateTime createdAt;
+
+ @Property("updated_at")
+ private LocalDateTime updatedAt;
+}
diff --git a/backend/services/knowledge-graph-service/src/main/java/com/datamate/knowledgegraph/domain/model/GraphRelation.java b/backend/services/knowledge-graph-service/src/main/java/com/datamate/knowledgegraph/domain/model/GraphRelation.java
new file mode 100644
index 0000000..4138946
--- /dev/null
+++ b/backend/services/knowledge-graph-service/src/main/java/com/datamate/knowledgegraph/domain/model/GraphRelation.java
@@ -0,0 +1,61 @@
+package com.datamate.knowledgegraph.domain.model;
+
+import lombok.AllArgsConstructor;
+import lombok.Builder;
+import lombok.Data;
+import lombok.NoArgsConstructor;
+import org.springframework.data.neo4j.core.schema.GeneratedValue;
+import org.springframework.data.neo4j.core.schema.Id;
+import org.springframework.data.neo4j.core.schema.Property;
+import org.springframework.data.neo4j.core.schema.RelationshipProperties;
+import org.springframework.data.neo4j.core.schema.TargetNode;
+import org.springframework.data.neo4j.core.support.UUIDStringGenerator;
+
+import java.time.LocalDateTime;
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * 知识图谱关系(边)。
+ *
+ * 使用 Spring Data Neo4j 的 {@code @RelationshipProperties} 表示带属性的关系。
+ * 关系的具体类型通过 {@code relationType} 表达(如 belongs_to, located_in)。
+ */
+@RelationshipProperties
+@Data
+@Builder
+@NoArgsConstructor
+@AllArgsConstructor
+public class GraphRelation {
+
+ @Id
+ @GeneratedValue(UUIDStringGenerator.class)
+ private String id;
+
+ @TargetNode
+ private GraphEntity target;
+
+ @Property("relation_type")
+ private String relationType;
+
+ @Property("properties")
+ @Builder.Default
+ private Map properties = new HashMap<>();
+
+ @Property("weight")
+ @Builder.Default
+ private Double weight = 1.0;
+
+ @Property("source_id")
+ private String sourceId;
+
+ @Property("confidence")
+ @Builder.Default
+ private Double confidence = 1.0;
+
+ @Property("graph_id")
+ private String graphId;
+
+ @Property("created_at")
+ private LocalDateTime createdAt;
+}
diff --git a/backend/services/knowledge-graph-service/src/main/java/com/datamate/knowledgegraph/domain/repository/GraphEntityRepository.java b/backend/services/knowledge-graph-service/src/main/java/com/datamate/knowledgegraph/domain/repository/GraphEntityRepository.java
new file mode 100644
index 0000000..ae6a677
--- /dev/null
+++ b/backend/services/knowledge-graph-service/src/main/java/com/datamate/knowledgegraph/domain/repository/GraphEntityRepository.java
@@ -0,0 +1,44 @@
+package com.datamate.knowledgegraph.domain.repository;
+
+import com.datamate.knowledgegraph.domain.model.GraphEntity;
+import org.springframework.data.neo4j.repository.Neo4jRepository;
+import org.springframework.data.neo4j.repository.query.Query;
+import org.springframework.data.repository.query.Param;
+import org.springframework.stereotype.Repository;
+
+import java.util.List;
+import java.util.Optional;
+
+@Repository
+public interface GraphEntityRepository extends Neo4jRepository {
+
+ @Query("MATCH (e:Entity {graph_id: $graphId}) WHERE e.id = $entityId RETURN e")
+ Optional findByIdAndGraphId(
+ @Param("entityId") String entityId,
+ @Param("graphId") String graphId);
+
+ List findByGraphId(String graphId);
+
+ List findByGraphIdAndType(String graphId, String type);
+
+ List findByGraphIdAndNameContaining(String graphId, String name);
+
+ @Query("MATCH (e:Entity {graph_id: $graphId}) " +
+ "WHERE e.name = $name AND e.type = $type " +
+ "RETURN e")
+ List findByGraphIdAndNameAndType(
+ @Param("graphId") String graphId,
+ @Param("name") String name,
+ @Param("type") String type);
+
+ @Query("MATCH (e:Entity {graph_id: $graphId, id: $entityId})-[r*1..$depth]-(neighbor:Entity) " +
+ "RETURN DISTINCT neighbor LIMIT $limit")
+ List findNeighbors(
+ @Param("graphId") String graphId,
+ @Param("entityId") String entityId,
+ @Param("depth") int depth,
+ @Param("limit") int limit);
+
+ @Query("MATCH (e:Entity {graph_id: $graphId}) RETURN count(e)")
+ long countByGraphId(@Param("graphId") String graphId);
+}
diff --git a/backend/services/knowledge-graph-service/src/main/java/com/datamate/knowledgegraph/infrastructure/exception/KnowledgeGraphErrorCode.java b/backend/services/knowledge-graph-service/src/main/java/com/datamate/knowledgegraph/infrastructure/exception/KnowledgeGraphErrorCode.java
new file mode 100644
index 0000000..5983536
--- /dev/null
+++ b/backend/services/knowledge-graph-service/src/main/java/com/datamate/knowledgegraph/infrastructure/exception/KnowledgeGraphErrorCode.java
@@ -0,0 +1,25 @@
+package com.datamate.knowledgegraph.infrastructure.exception;
+
+import com.datamate.common.infrastructure.exception.ErrorCode;
+import lombok.AllArgsConstructor;
+import lombok.Getter;
+
+/**
+ * 知识图谱模块错误码
+ */
+@Getter
+@AllArgsConstructor
+public enum KnowledgeGraphErrorCode implements ErrorCode {
+
+ ENTITY_NOT_FOUND("knowledge_graph.0001", "实体不存在"),
+ RELATION_NOT_FOUND("knowledge_graph.0002", "关系不存在"),
+ GRAPH_NOT_FOUND("knowledge_graph.0003", "图谱不存在"),
+ DUPLICATE_ENTITY("knowledge_graph.0004", "实体已存在"),
+ INVALID_RELATION("knowledge_graph.0005", "无效的关系定义"),
+ IMPORT_FAILED("knowledge_graph.0006", "图谱导入失败"),
+ QUERY_DEPTH_EXCEEDED("knowledge_graph.0007", "查询深度超出限制"),
+ MAX_NODES_EXCEEDED("knowledge_graph.0008", "查询结果节点数超出限制");
+
+ private final String code;
+ private final String message;
+}
diff --git a/backend/services/knowledge-graph-service/src/main/java/com/datamate/knowledgegraph/infrastructure/neo4j/KnowledgeGraphProperties.java b/backend/services/knowledge-graph-service/src/main/java/com/datamate/knowledgegraph/infrastructure/neo4j/KnowledgeGraphProperties.java
new file mode 100644
index 0000000..65612fb
--- /dev/null
+++ b/backend/services/knowledge-graph-service/src/main/java/com/datamate/knowledgegraph/infrastructure/neo4j/KnowledgeGraphProperties.java
@@ -0,0 +1,20 @@
+package com.datamate.knowledgegraph.infrastructure.neo4j;
+
+import lombok.Data;
+import org.springframework.boot.context.properties.ConfigurationProperties;
+import org.springframework.stereotype.Component;
+
+@Data
+@Component
+@ConfigurationProperties(prefix = "datamate.knowledge-graph")
+public class KnowledgeGraphProperties {
+
+ /** 默认查询跳数限制 */
+ private int maxDepth = 3;
+
+ /** 子图返回最大节点数 */
+ private int maxNodesPerQuery = 500;
+
+ /** 批量导入批次大小 */
+ private int importBatchSize = 100;
+}
diff --git a/backend/services/knowledge-graph-service/src/main/java/com/datamate/knowledgegraph/interfaces/dto/CreateEntityRequest.java b/backend/services/knowledge-graph-service/src/main/java/com/datamate/knowledgegraph/interfaces/dto/CreateEntityRequest.java
new file mode 100644
index 0000000..2e873c9
--- /dev/null
+++ b/backend/services/knowledge-graph-service/src/main/java/com/datamate/knowledgegraph/interfaces/dto/CreateEntityRequest.java
@@ -0,0 +1,31 @@
+package com.datamate.knowledgegraph.interfaces.dto;
+
+import jakarta.validation.constraints.NotBlank;
+import lombok.Data;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+@Data
+public class CreateEntityRequest {
+
+ @NotBlank(message = "实体名称不能为空")
+ private String name;
+
+ @NotBlank(message = "实体类型不能为空")
+ private String type;
+
+ private String description;
+
+ private List aliases = new ArrayList<>();
+
+ private Map properties = new HashMap<>();
+
+ private String sourceId;
+
+ private String sourceType;
+
+ private Double confidence;
+}
diff --git a/backend/services/knowledge-graph-service/src/main/java/com/datamate/knowledgegraph/interfaces/dto/CreateRelationRequest.java b/backend/services/knowledge-graph-service/src/main/java/com/datamate/knowledgegraph/interfaces/dto/CreateRelationRequest.java
new file mode 100644
index 0000000..7a6c311
--- /dev/null
+++ b/backend/services/knowledge-graph-service/src/main/java/com/datamate/knowledgegraph/interfaces/dto/CreateRelationRequest.java
@@ -0,0 +1,28 @@
+package com.datamate.knowledgegraph.interfaces.dto;
+
+import jakarta.validation.constraints.NotBlank;
+import lombok.Data;
+
+import java.util.HashMap;
+import java.util.Map;
+
+@Data
+public class CreateRelationRequest {
+
+ @NotBlank(message = "源实体ID不能为空")
+ private String sourceEntityId;
+
+ @NotBlank(message = "目标实体ID不能为空")
+ private String targetEntityId;
+
+ @NotBlank(message = "关系类型不能为空")
+ private String relationType;
+
+ private Map properties = new HashMap<>();
+
+ private Double weight;
+
+ private String sourceId;
+
+ private Double confidence;
+}
diff --git a/backend/services/knowledge-graph-service/src/main/java/com/datamate/knowledgegraph/interfaces/dto/UpdateEntityRequest.java b/backend/services/knowledge-graph-service/src/main/java/com/datamate/knowledgegraph/interfaces/dto/UpdateEntityRequest.java
new file mode 100644
index 0000000..936caf9
--- /dev/null
+++ b/backend/services/knowledge-graph-service/src/main/java/com/datamate/knowledgegraph/interfaces/dto/UpdateEntityRequest.java
@@ -0,0 +1,18 @@
+package com.datamate.knowledgegraph.interfaces.dto;
+
+import lombok.Data;
+
+import java.util.List;
+import java.util.Map;
+
+@Data
+public class UpdateEntityRequest {
+
+ private String name;
+
+ private String description;
+
+ private List aliases;
+
+ private Map properties;
+}
diff --git a/backend/services/knowledge-graph-service/src/main/java/com/datamate/knowledgegraph/interfaces/rest/GraphEntityController.java b/backend/services/knowledge-graph-service/src/main/java/com/datamate/knowledgegraph/interfaces/rest/GraphEntityController.java
new file mode 100644
index 0000000..30a7f4c
--- /dev/null
+++ b/backend/services/knowledge-graph-service/src/main/java/com/datamate/knowledgegraph/interfaces/rest/GraphEntityController.java
@@ -0,0 +1,80 @@
+package com.datamate.knowledgegraph.interfaces.rest;
+
+import com.datamate.knowledgegraph.application.GraphEntityService;
+import com.datamate.knowledgegraph.domain.model.GraphEntity;
+import com.datamate.knowledgegraph.interfaces.dto.CreateEntityRequest;
+import com.datamate.knowledgegraph.interfaces.dto.UpdateEntityRequest;
+import jakarta.validation.Valid;
+import jakarta.validation.constraints.Pattern;
+import lombok.RequiredArgsConstructor;
+import org.springframework.http.HttpStatus;
+import org.springframework.validation.annotation.Validated;
+import org.springframework.web.bind.annotation.*;
+
+import java.util.List;
+
+@RestController
+@RequestMapping("/knowledge-graph/{graphId}/entities")
+@RequiredArgsConstructor
+@Validated
+public class GraphEntityController {
+
+ private static final String UUID_REGEX =
+ "^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$";
+
+ private final GraphEntityService entityService;
+
+ @PostMapping
+ @ResponseStatus(HttpStatus.CREATED)
+ public GraphEntity createEntity(
+ @PathVariable @Pattern(regexp = UUID_REGEX, message = "graphId 格式无效") String graphId,
+ @Valid @RequestBody CreateEntityRequest request) {
+ return entityService.createEntity(graphId, request);
+ }
+
+ @GetMapping("/{entityId}")
+ public GraphEntity getEntity(
+ @PathVariable @Pattern(regexp = UUID_REGEX, message = "graphId 格式无效") String graphId,
+ @PathVariable @Pattern(regexp = UUID_REGEX, message = "entityId 格式无效") String entityId) {
+ return entityService.getEntity(graphId, entityId);
+ }
+
+ @GetMapping
+ public List listEntities(
+ @PathVariable @Pattern(regexp = UUID_REGEX, message = "graphId 格式无效") String graphId,
+ @RequestParam(required = false) String type,
+ @RequestParam(required = false) String keyword) {
+ if (keyword != null && !keyword.isBlank()) {
+ return entityService.searchEntities(graphId, keyword);
+ }
+ if (type != null && !type.isBlank()) {
+ return entityService.listEntitiesByType(graphId, type);
+ }
+ return entityService.listEntities(graphId);
+ }
+
+ @PutMapping("/{entityId}")
+ public GraphEntity updateEntity(
+ @PathVariable @Pattern(regexp = UUID_REGEX, message = "graphId 格式无效") String graphId,
+ @PathVariable @Pattern(regexp = UUID_REGEX, message = "entityId 格式无效") String entityId,
+ @Valid @RequestBody UpdateEntityRequest request) {
+ return entityService.updateEntity(graphId, entityId, request);
+ }
+
+ @DeleteMapping("/{entityId}")
+ @ResponseStatus(HttpStatus.NO_CONTENT)
+ public void deleteEntity(
+ @PathVariable @Pattern(regexp = UUID_REGEX, message = "graphId 格式无效") String graphId,
+ @PathVariable @Pattern(regexp = UUID_REGEX, message = "entityId 格式无效") String entityId) {
+ entityService.deleteEntity(graphId, entityId);
+ }
+
+ @GetMapping("/{entityId}/neighbors")
+ public List getNeighbors(
+ @PathVariable @Pattern(regexp = UUID_REGEX, message = "graphId 格式无效") String graphId,
+ @PathVariable @Pattern(regexp = UUID_REGEX, message = "entityId 格式无效") String entityId,
+ @RequestParam(defaultValue = "2") int depth,
+ @RequestParam(defaultValue = "50") int limit) {
+ return entityService.getNeighbors(graphId, entityId, depth, limit);
+ }
+}
diff --git a/backend/services/knowledge-graph-service/src/main/resources/application-knowledgegraph.yml b/backend/services/knowledge-graph-service/src/main/resources/application-knowledgegraph.yml
new file mode 100644
index 0000000..20dc261
--- /dev/null
+++ b/backend/services/knowledge-graph-service/src/main/resources/application-knowledgegraph.yml
@@ -0,0 +1,25 @@
+# 知识图谱服务 - Neo4j连接配置
+# 该配置在 main-application 的 spring.config.import 中引入
+# 注意:生产环境务必通过环境变量 NEO4J_PASSWORD 设置密码,不要使用默认值
+
+spring:
+ neo4j:
+ uri: ${NEO4J_URI:bolt://datamate-neo4j:7687}
+ authentication:
+ username: ${NEO4J_USERNAME:neo4j}
+ password: ${NEO4J_PASSWORD:datamate123}
+ pool:
+ max-connection-pool-size: ${NEO4J_POOL_MAX_SIZE:50}
+ connection-acquisition-timeout: 30s
+ max-connection-lifetime: 1h
+ log-leaked-sessions: true
+
+# 知识图谱服务配置
+datamate:
+ knowledge-graph:
+ # 默认查询跳数限制
+ max-depth: ${KG_MAX_DEPTH:3}
+ # 子图返回最大节点数
+ max-nodes-per-query: ${KG_MAX_NODES:500}
+ # 批量导入批次大小
+ import-batch-size: ${KG_IMPORT_BATCH_SIZE:100}
diff --git a/backend/services/main-application/pom.xml b/backend/services/main-application/pom.xml
index 22a60ff..96cd1e7 100644
--- a/backend/services/main-application/pom.xml
+++ b/backend/services/main-application/pom.xml
@@ -109,6 +109,13 @@
${project.version}
+
+
+ com.datamate
+ knowledge-graph-service
+ ${project.version}
+
+
com.mysql
diff --git a/backend/services/main-application/src/main/resources/application.yml b/backend/services/main-application/src/main/resources/application.yml
index 3f52916..227f995 100644
--- a/backend/services/main-application/src/main/resources/application.yml
+++ b/backend/services/main-application/src/main/resources/application.yml
@@ -52,6 +52,7 @@ spring:
import:
- classpath:config/application-datacollection.yml
- classpath:config/application-datamanagement.yml
+ - optional:classpath:application-knowledgegraph.yml
# Redis配置
data:
diff --git a/backend/services/pom.xml b/backend/services/pom.xml
index a3caaa4..fd53194 100644
--- a/backend/services/pom.xml
+++ b/backend/services/pom.xml
@@ -37,6 +37,9 @@
rag-indexer-service
rag-query-service
+
+ knowledge-graph-service
+
main-application
diff --git a/deployment/docker/neo4j/docker-compose.yml b/deployment/docker/neo4j/docker-compose.yml
new file mode 100644
index 0000000..3190e13
--- /dev/null
+++ b/deployment/docker/neo4j/docker-compose.yml
@@ -0,0 +1,39 @@
+services:
+ datamate-neo4j:
+ container_name: datamate-neo4j
+ image: neo4j:5-community
+ restart: on-failure
+ ports:
+ - "7474:7474" # HTTP (Neo4j Browser)
+ - "7687:7687" # Bolt protocol
+ environment:
+ NEO4J_AUTH: neo4j/${NEO4J_PASSWORD:-datamate123}
+ # Memory configuration
+ NEO4J_server_memory_heap_initial__size: 512m
+ NEO4J_server_memory_heap_max__size: 1G
+ NEO4J_server_memory_pagecache_size: 512m
+ # Enable APOC plugin
+ NEO4J_PLUGINS: '["apoc"]'
+ # Transaction timeout
+ NEO4J_db_transaction_timeout: 60s
+ volumes:
+ - neo4j_data:/data
+ - neo4j_logs:/logs
+ networks: [ datamate ]
+ healthcheck:
+ test: ["CMD-SHELL", "wget --no-verbose --tries=1 --spider http://localhost:7474 || exit 1"]
+ interval: 15s
+ timeout: 10s
+ retries: 5
+ start_period: 30s
+
+volumes:
+ neo4j_data:
+ name: datamate-neo4j-data-volume
+ neo4j_logs:
+ name: datamate-neo4j-logs-volume
+
+networks:
+ datamate:
+ driver: bridge
+ name: datamate-network
diff --git a/runtime/datamate-python/app/module/kg_extraction/__init__.py b/runtime/datamate-python/app/module/kg_extraction/__init__.py
new file mode 100644
index 0000000..56e8d7c
--- /dev/null
+++ b/runtime/datamate-python/app/module/kg_extraction/__init__.py
@@ -0,0 +1,17 @@
+from app.module.kg_extraction.extractor import KnowledgeGraphExtractor
+from app.module.kg_extraction.models import (
+ ExtractionRequest,
+ ExtractionResult,
+ Triple,
+ GraphNode,
+ GraphEdge,
+)
+
+__all__ = [
+ "KnowledgeGraphExtractor",
+ "ExtractionRequest",
+ "ExtractionResult",
+ "Triple",
+ "GraphNode",
+ "GraphEdge",
+]
diff --git a/runtime/datamate-python/app/module/kg_extraction/extractor.py b/runtime/datamate-python/app/module/kg_extraction/extractor.py
new file mode 100644
index 0000000..498cde8
--- /dev/null
+++ b/runtime/datamate-python/app/module/kg_extraction/extractor.py
@@ -0,0 +1,183 @@
+"""基于 LLM 的知识图谱三元组抽取器。
+
+利用 LangChain 的 LLMGraphTransformer 从非结构化文本中抽取实体和关系,
+支持 schema-guided 抽取以提升准确率。
+"""
+
+from __future__ import annotations
+
+import logging
+from typing import Sequence
+
+from langchain_core.documents import Document
+from langchain_openai import ChatOpenAI
+from langchain_experimental.graph_transformers import LLMGraphTransformer
+
+from app.module.kg_extraction.models import (
+ ExtractionRequest,
+ ExtractionResult,
+ ExtractionSchema,
+ GraphEdge,
+ GraphNode,
+ Triple,
+)
+
+logger = logging.getLogger(__name__)
+
+
+class KnowledgeGraphExtractor:
+ """基于 LLMGraphTransformer 的三元组抽取器。
+
+ Parameters
+ ----------
+ model_name : str
+ OpenAI 兼容模型名称。
+ base_url : str | None
+ 自定义 API base URL(用于对接 vLLM/Ollama 等本地模型服务)。
+ api_key : str
+ API 密钥。
+ temperature : float
+ 生成温度,抽取任务建议使用较低值。
+ """
+
+ def __init__(
+ self,
+ model_name: str = "gpt-4o-mini",
+ base_url: str | None = None,
+ api_key: str = "EMPTY",
+ temperature: float = 0.0,
+ ) -> None:
+ self._llm = ChatOpenAI(
+ model=model_name,
+ base_url=base_url,
+ api_key=api_key,
+ temperature=temperature,
+ )
+
+ def _build_transformer(
+ self,
+ schema: ExtractionSchema | None = None,
+ ) -> LLMGraphTransformer:
+ """根据可选的 schema 约束构造 LLMGraphTransformer。"""
+ kwargs: dict = {"llm": self._llm}
+
+ if schema:
+ if schema.entity_types:
+ kwargs["allowed_nodes"] = [et.name for et in schema.entity_types]
+ if schema.relation_types:
+ kwargs["allowed_relationships"] = [rt.name for rt in schema.relation_types]
+
+ return LLMGraphTransformer(**kwargs)
+
+ async def extract(self, request: ExtractionRequest) -> ExtractionResult:
+ """从文本中抽取三元组。
+
+ Parameters
+ ----------
+ request : ExtractionRequest
+ 包含文本、schema 约束等信息的抽取请求。
+
+ Returns
+ -------
+ ExtractionResult
+ 抽取得到的节点、边和三元组。
+ """
+ transformer = self._build_transformer(request.schema)
+ documents = [Document(page_content=request.text)]
+
+ try:
+ graph_documents = await transformer.aconvert_to_graph_documents(documents)
+ except Exception:
+ logger.exception("LLM graph extraction failed for source_id=%s", request.source_id)
+ return ExtractionResult(raw_text=request.text, source_id=request.source_id)
+
+ return self._convert_result(graph_documents, request)
+
+ def extract_sync(self, request: ExtractionRequest) -> ExtractionResult:
+ """同步版本的三元组抽取。"""
+ transformer = self._build_transformer(request.schema)
+ documents = [Document(page_content=request.text)]
+
+ try:
+ graph_documents = transformer.convert_to_graph_documents(documents)
+ except Exception:
+ logger.exception("LLM graph extraction failed for source_id=%s", request.source_id)
+ return ExtractionResult(raw_text=request.text, source_id=request.source_id)
+
+ return self._convert_result(graph_documents, request)
+
+ async def extract_batch(
+ self,
+ requests: Sequence[ExtractionRequest],
+ ) -> list[ExtractionResult]:
+ """批量抽取。
+
+ 对多段文本逐一抽取并汇总结果。
+ 如需更高吞吐,可自行用 asyncio.gather 并发调用 extract。
+ """
+ results: list[ExtractionResult] = []
+ for req in requests:
+ result = await self.extract(req)
+ results.append(result)
+ return results
+
+ @staticmethod
+ def _convert_result(
+ graph_documents: list,
+ request: ExtractionRequest,
+ ) -> ExtractionResult:
+ """将 LangChain GraphDocument 转换为内部数据模型。"""
+ nodes: list[GraphNode] = []
+ edges: list[GraphEdge] = []
+ triples: list[Triple] = []
+ seen_nodes: set[str] = set()
+
+ for doc in graph_documents:
+ # 收集节点
+ for node in doc.nodes:
+ node_key = f"{node.id}:{node.type}"
+ if node_key not in seen_nodes:
+ seen_nodes.add(node_key)
+ nodes.append(
+ GraphNode(
+ name=node.id,
+ type=node.type,
+ properties=node.properties if hasattr(node, "properties") else {},
+ )
+ )
+
+ # 收集关系
+ for rel in doc.relationships:
+ source_node = GraphNode(
+ name=rel.source.id,
+ type=rel.source.type,
+ )
+ target_node = GraphNode(
+ name=rel.target.id,
+ type=rel.target.type,
+ )
+
+ edges.append(
+ GraphEdge(
+ source=rel.source.id,
+ target=rel.target.id,
+ relation_type=rel.type,
+ properties=rel.properties if hasattr(rel, "properties") else {},
+ )
+ )
+
+ triples.append(
+ Triple(
+ subject=source_node,
+ predicate=rel.type,
+ object=target_node,
+ )
+ )
+
+ return ExtractionResult(
+ nodes=nodes,
+ edges=edges,
+ triples=triples,
+ raw_text=request.text,
+ source_id=request.source_id,
+ )
diff --git a/runtime/datamate-python/app/module/kg_extraction/models.py b/runtime/datamate-python/app/module/kg_extraction/models.py
new file mode 100644
index 0000000..a6220ff
--- /dev/null
+++ b/runtime/datamate-python/app/module/kg_extraction/models.py
@@ -0,0 +1,75 @@
+"""知识图谱三元组抽取数据模型。"""
+
+from __future__ import annotations
+
+from pydantic import BaseModel, Field
+
+
+class GraphNode(BaseModel):
+ """图谱节点(实体)。"""
+
+ name: str = Field(..., description="实体名称")
+ type: str = Field(..., description="实体类型, 如 Person, Organization, Location")
+ properties: dict[str, object] = Field(default_factory=dict, description="扩展属性")
+
+
+class GraphEdge(BaseModel):
+ """图谱边(关系)。"""
+
+ source: str = Field(..., description="源实体名称")
+ target: str = Field(..., description="目标实体名称")
+ relation_type: str = Field(..., description="关系类型, 如 works_at, located_in")
+ properties: dict[str, object] = Field(default_factory=dict, description="关系属性")
+
+
+class Triple(BaseModel):
+ """知识三元组: (主体, 关系, 客体)。"""
+
+ subject: GraphNode
+ predicate: str = Field(..., description="关系类型")
+ object: GraphNode
+
+
+class EntityTypeConstraint(BaseModel):
+ """实体类型约束,用于 Schema-guided 抽取。"""
+
+ name: str = Field(..., description="类型名称")
+ description: str = Field(default="", description="类型说明")
+
+
+class RelationTypeConstraint(BaseModel):
+ """关系类型约束。"""
+
+ name: str = Field(..., description="关系类型名称")
+ source_types: list[str] = Field(default_factory=list, description="允许的源实体类型")
+ target_types: list[str] = Field(default_factory=list, description="允许的目标实体类型")
+ description: str = Field(default="", description="关系说明")
+
+
+class ExtractionSchema(BaseModel):
+ """抽取 schema 约束,约束 LLM 输出的实体和关系类型范围。"""
+
+ entity_types: list[EntityTypeConstraint] = Field(default_factory=list)
+ relation_types: list[RelationTypeConstraint] = Field(default_factory=list)
+
+
+class ExtractionRequest(BaseModel):
+ """三元组抽取请求。"""
+
+ text: str = Field(..., description="待抽取的文本")
+ graph_id: str = Field(..., description="目标图谱 ID")
+ schema: ExtractionSchema | None = Field(
+ default=None, description="可选的 schema 约束, 提供后做 schema-guided 抽取"
+ )
+ source_id: str | None = Field(default=None, description="来源 ID(数据集/知识库条目)")
+ source_type: str = Field(default="KNOWLEDGE_BASE", description="来源类型")
+
+
+class ExtractionResult(BaseModel):
+ """三元组抽取结果。"""
+
+ nodes: list[GraphNode] = Field(default_factory=list)
+ edges: list[GraphEdge] = Field(default_factory=list)
+ triples: list[Triple] = Field(default_factory=list)
+ raw_text: str = Field(default="", description="原始文本")
+ source_id: str | None = None
diff --git a/runtime/datamate-python/pyproject.toml b/runtime/datamate-python/pyproject.toml
index 14f23d5..a83c23c 100644
--- a/runtime/datamate-python/pyproject.toml
+++ b/runtime/datamate-python/pyproject.toml
@@ -31,6 +31,7 @@ dependencies = [
"openai (>=2.9.0,<3.0.0)",
"langchain-openai (>=1.1.1,<2.0.0)",
"langchain (>=1.1.3,<2.0.0)",
+ "langchain-experimental (>=0.3.0,<1.0.0)",
"pydantic (>=2.12.5,<3.0.0)",
"sqlalchemy (>=2.0.45,<3.0.0)",
"fastapi (>=0.124.0,<0.125.0)",