You've already forked DataMate
修复从全新部署到运行的完整流程中的配置和路由问题。 ## P0 修复(功能失效) ### P0-1: GraphRAG KG 服务 URL 错误 - config.py - GRAPHRAG_KG_SERVICE_URL 从 http://datamate-kg:8080 改为 http://datamate-backend:8080(容器名修正) - kg_client.py - 修复 API 路径:/knowledge-graph/... → /api/knowledge-graph/... - kb_access.py - 同类问题修复:/knowledge-base/... → /api/knowledge-base/... - test_kb_access.py - 测试断言同步更新 根因:容器名 datamate-kg 不存在,且 httpx 绝对路径会丢弃 base_url 中的 /api 路径 ### P0-2: Vite 开发代理剥离 /api 前缀 - vite.config.ts - 删除 /api/knowledge-graph 专用代理规则(剥离 /api 导致 404),统一走 ^/api 规则 ## P1 修复(功能受损) ### P1-1: Gateway 缺少 KG Python 端点路由 - ApiGatewayApplication.java - 添加 /api/kg/** 路由(指向 kg-extraction Python 服务) - ApiGatewayApplication.java - 添加 /api/graphrag/** 路由(指向 GraphRAG 服务) ### P1-2: DATA_MANAGEMENT_URL 默认值缺 /api - KnowledgeGraphProperties.java - dataManagementUrl 默认值 http://localhost:8080 → http://localhost:8080/api - KnowledgeGraphProperties.java - annotationServiceUrl 默认值 http://localhost:8081 → http://localhost:8080/api(同 JVM) - application-knowledgegraph.yml - YAML 默认值同步更新 ### P1-3: Neo4j k8s 安装链路失败 - Makefile - VALID_K8S_TARGETS 添加 neo4j - Makefile - %-k8s-install 添加 neo4j case(显式 skip,提示使用 Docker 或外部实例) - Makefile - %-k8s-uninstall 添加 neo4j case(显式 skip) 根因:install 目标无条件调用 neo4j-$(INSTALLER)-install,但 k8s 模式下 neo4j 不在 VALID_K8S_TARGETS 中,导致 "Unknown k8s target 'neo4j'" 错误 ## P2 修复(次要) ### P2-1: Neo4j 加入 Docker install 流程 - Makefile - install target 增加 neo4j-$(INSTALLER)-install,在 datamate 之前启动 - Makefile - VALID_SERVICE_TARGETS 增加 neo4j - Makefile - %-docker-install / %-docker-uninstall 增加 neo4j case ## 验证结果 - mvn test: 311 tests, 0 failures ✅ - eslint: 0 errors ✅ - tsc --noEmit: 通过 ✅ - vite build: 成功 (17.71s) ✅ - Python tests: 46 passed ✅ - make -n install INSTALLER=k8s: 不再报 unknown target ✅ - make -n neo4j-k8s-install: 正确显示 skip 消息 ✅
215 lines
7.4 KiB
Python
215 lines
7.4 KiB
Python
"""KG 服务 REST 客户端。
|
|
|
|
通过 httpx 调用 Java 侧 knowledge-graph-service 的查询 API,
|
|
包括全文检索和子图导出。
|
|
|
|
失败策略:fail-open —— KG 服务不可用时返回空结果 + 日志告警。
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import httpx
|
|
|
|
from app.core.logging import get_logger
|
|
from app.module.kg_graphrag.cache import get_cache, make_cache_key
|
|
from app.module.kg_graphrag.models import EntitySummary, RelationSummary
|
|
|
|
logger = get_logger(__name__)
|
|
|
|
|
|
class KGServiceClient:
|
|
"""Java KG 服务 REST 客户端。"""
|
|
|
|
def __init__(
|
|
self,
|
|
*,
|
|
base_url: str = "http://datamate-backend:8080",
|
|
internal_token: str = "",
|
|
timeout: float = 30.0,
|
|
) -> None:
|
|
self._base_url = base_url.rstrip("/")
|
|
self._internal_token = internal_token
|
|
self._timeout = timeout
|
|
self._client: httpx.AsyncClient | None = None
|
|
|
|
@classmethod
|
|
def from_settings(cls) -> KGServiceClient:
|
|
from app.core.config import settings
|
|
|
|
return cls(
|
|
base_url=settings.graphrag_kg_service_url,
|
|
internal_token=settings.graphrag_kg_internal_token,
|
|
timeout=30.0,
|
|
)
|
|
|
|
def _get_client(self) -> httpx.AsyncClient:
|
|
if self._client is None:
|
|
self._client = httpx.AsyncClient(
|
|
base_url=self._base_url,
|
|
timeout=self._timeout,
|
|
)
|
|
return self._client
|
|
|
|
def _headers(self, user_id: str = "") -> dict[str, str]:
|
|
headers: dict[str, str] = {}
|
|
if self._internal_token:
|
|
headers["X-Internal-Token"] = self._internal_token
|
|
if user_id:
|
|
headers["X-User-Id"] = user_id
|
|
return headers
|
|
|
|
async def fulltext_search(
|
|
self,
|
|
graph_id: str,
|
|
query: str,
|
|
size: int = 10,
|
|
user_id: str = "",
|
|
) -> list[EntitySummary]:
|
|
"""调用 KG 服务全文检索,返回匹配的实体列表。
|
|
|
|
Fail-open: KG 服务不可用时返回空列表。
|
|
结果会被缓存(TTL 由 graphrag_cache_kg_ttl 控制)。
|
|
"""
|
|
cache = get_cache()
|
|
cache_key = make_cache_key("fulltext", graph_id, query, size, user_id)
|
|
cached = cache.get_kg(cache_key)
|
|
if cached is not None:
|
|
return cached
|
|
try:
|
|
result = await self._fulltext_search_impl(graph_id, query, size, user_id)
|
|
cache.set_kg(cache_key, result)
|
|
return result
|
|
except Exception:
|
|
logger.exception(
|
|
"KG fulltext search failed for graph_id=%s (fail-open, returning empty)",
|
|
graph_id,
|
|
)
|
|
return []
|
|
|
|
async def _fulltext_search_impl(
|
|
self,
|
|
graph_id: str,
|
|
query: str,
|
|
size: int,
|
|
user_id: str,
|
|
) -> list[EntitySummary]:
|
|
client = self._get_client()
|
|
resp = await client.get(
|
|
f"/api/knowledge-graph/{graph_id}/query/search",
|
|
params={"q": query, "size": size},
|
|
headers=self._headers(user_id),
|
|
)
|
|
resp.raise_for_status()
|
|
body = resp.json()
|
|
|
|
# Java 返回 PagedResponse<SearchHitVO>:
|
|
# 可能被全局包装为 {"code": 200, "data": PagedResponse}
|
|
# 也可能直接返回 PagedResponse {"page": 0, "content": [...]}
|
|
data = body.get("data", body)
|
|
# PagedResponse 将实体列表放在 content 字段中
|
|
items: list[dict] = (
|
|
data.get("content", []) if isinstance(data, dict) else data if isinstance(data, list) else []
|
|
)
|
|
entities: list[EntitySummary] = []
|
|
for item in items:
|
|
entities.append(
|
|
EntitySummary(
|
|
id=str(item.get("id", "")),
|
|
name=item.get("name", ""),
|
|
type=item.get("type", ""),
|
|
description=item.get("description", ""),
|
|
)
|
|
)
|
|
return entities
|
|
|
|
async def get_subgraph(
|
|
self,
|
|
graph_id: str,
|
|
entity_ids: list[str],
|
|
depth: int = 1,
|
|
user_id: str = "",
|
|
) -> tuple[list[EntitySummary], list[RelationSummary]]:
|
|
"""获取种子实体的 N-hop 子图。
|
|
|
|
Fail-open: KG 服务不可用时返回空子图。
|
|
结果会被缓存(TTL 由 graphrag_cache_kg_ttl 控制)。
|
|
"""
|
|
cache = get_cache()
|
|
cache_key = make_cache_key("subgraph", graph_id, sorted(entity_ids), depth, user_id)
|
|
cached = cache.get_kg(cache_key)
|
|
if cached is not None:
|
|
return cached
|
|
try:
|
|
result = await self._get_subgraph_impl(graph_id, entity_ids, depth, user_id)
|
|
cache.set_kg(cache_key, result)
|
|
return result
|
|
except Exception:
|
|
logger.exception(
|
|
"KG subgraph export failed for graph_id=%s (fail-open, returning empty)",
|
|
graph_id,
|
|
)
|
|
return [], []
|
|
|
|
async def _get_subgraph_impl(
|
|
self,
|
|
graph_id: str,
|
|
entity_ids: list[str],
|
|
depth: int,
|
|
user_id: str,
|
|
) -> tuple[list[EntitySummary], list[RelationSummary]]:
|
|
client = self._get_client()
|
|
resp = await client.post(
|
|
f"/api/knowledge-graph/{graph_id}/query/subgraph/export",
|
|
params={"depth": depth},
|
|
json={"entityIds": entity_ids},
|
|
headers=self._headers(user_id),
|
|
)
|
|
resp.raise_for_status()
|
|
body = resp.json()
|
|
|
|
# Java 返回 SubgraphExportVO:
|
|
# 可能被全局包装为 {"code": 200, "data": SubgraphExportVO}
|
|
# 也可能直接返回 SubgraphExportVO {"nodes": [...], "edges": [...]}
|
|
data = body.get("data", body) if isinstance(body.get("data"), dict) else body
|
|
nodes_raw = data.get("nodes", [])
|
|
edges_raw = data.get("edges", [])
|
|
|
|
# ExportNodeVO: id, name, type, description, properties (Map)
|
|
entities: list[EntitySummary] = []
|
|
for node in nodes_raw:
|
|
entities.append(
|
|
EntitySummary(
|
|
id=str(node.get("id", "")),
|
|
name=node.get("name", ""),
|
|
type=node.get("type", ""),
|
|
description=node.get("description", ""),
|
|
)
|
|
)
|
|
|
|
relations: list[RelationSummary] = []
|
|
# 构建 id -> entity 的映射用于查找 source/target 名称和类型
|
|
entity_map = {e.id: e for e in entities}
|
|
# ExportEdgeVO: sourceEntityId, targetEntityId, relationType
|
|
# 注意:sourceId 是数据来源 ID,不是源实体 ID
|
|
for edge in edges_raw:
|
|
source_id = str(edge.get("sourceEntityId", ""))
|
|
target_id = str(edge.get("targetEntityId", ""))
|
|
source_entity = entity_map.get(source_id)
|
|
target_entity = entity_map.get(target_id)
|
|
relations.append(
|
|
RelationSummary(
|
|
source_name=source_entity.name if source_entity else source_id,
|
|
source_type=source_entity.type if source_entity else "",
|
|
target_name=target_entity.name if target_entity else target_id,
|
|
target_type=target_entity.type if target_entity else "",
|
|
relation_type=edge.get("relationType", ""),
|
|
)
|
|
)
|
|
|
|
return entities, relations
|
|
|
|
async def close(self) -> None:
|
|
if self._client is not None:
|
|
await self._client.aclose()
|
|
self._client = None
|