feat(kg): 实现 Phase 3.3 性能优化

核心功能： - Neo4j 索引优化（entityType, graphId, properties.name） - Redis 缓存（Java 侧，3 个缓存区，TTL 可配置） - LRU 缓存（Python 侧，KG + Embedding，线程安全） - 细粒度缓存清除（graphId 前缀匹配） - 失败路径缓存清除（finally 块）新增文件（Java 侧，7 个）： - V2__PerformanceIndexes.java - Flyway 迁移，创建 3 个索引 - IndexHealthService.java - 索引健康监控 - RedisCacheConfig.java - Spring Cache + Redis 配置 - GraphCacheService.java - 缓存清除管理器 - CacheableIntegrationTest.java - 集成测试（10 tests） - GraphCacheServiceTest.java - 单元测试（19 tests） - V2__PerformanceIndexesTest.java, IndexHealthServiceTest.java 新增文件（Python 侧，2 个）： - cache.py - 内存 TTL+LRU 缓存（cachetools） - test_cache.py - 单元测试（20 tests）修改文件（Java 侧，9 个）： - GraphEntityService.java - 添加 @Cacheable，缓存清除 - GraphQueryService.java - 添加 @Cacheable（包含用户权限上下文） - GraphRelationService.java - 添加缓存清除 - GraphSyncService.java - 添加缓存清除（finally 块，失败路径） - KnowledgeGraphProperties.java - 添加 Cache 配置类 - application-knowledgegraph.yml - 添加 Redis 和缓存 TTL 配置 - GraphEntityServiceTest.java - 添加 verify(cacheService) 断言 - GraphRelationServiceTest.java - 添加 verify(cacheService) 断言 - GraphSyncServiceTest.java - 添加失败路径缓存清除测试修改文件（Python 侧，5 个）： - kg_client.py - 集成缓存（fulltext_search, get_subgraph） - interface.py - 添加 /cache/stats 和 /cache/clear 端点 - config.py - 添加缓存配置字段 - pyproject.toml - 添加 cachetools 依赖 - test_kg_client.py - 添加 _disable_cache fixture 安全修复（3 轮迭代）： - P0: 缓存 key 用户隔离（防止跨用户数据泄露） - P1-1: 同步子步骤后的缓存清除（18 个方法） - P1-2: 实体创建后的搜索缓存清除 - P1-3: 失败路径缓存清除（finally 块） - P2-1: 细粒度缓存清除（graphId 前缀匹配，避免跨图谱冲刷） - P2-2: 服务层测试添加 verify(cacheService) 断言测试结果： - Java: 280 tests pass ✅ (270 → 280, +10 new) - Python: 154 tests pass ✅ (140 → 154, +14 new) 缓存配置： - kg:entities - 实体缓存，TTL 1h - kg:queries - 查询结果缓存，TTL 5min - kg:search - 全文搜索缓存，TTL 3min - KG cache (Python) - 256 entries, 5min TTL - Embedding cache (Python) - 512 entries, 10min TTL
2026-02-20 18:28:33 +08:00
parent 39338df808
commit 9b6ff59a11
24 changed files with 1629 additions and 14 deletions
--- a/runtime/datamate-python/app/module/kg_graphrag/kg_client.py
+++ b/runtime/datamate-python/app/module/kg_graphrag/kg_client.py
@@ -11,6 +11,7 @@ from __future__ import annotations
 import httpx

 from app.core.logging import get_logger
+from app.module.kg_graphrag.cache import get_cache, make_cache_key
 from app.module.kg_graphrag.models import EntitySummary, RelationSummary

 logger = get_logger(__name__)
@@ -67,9 +68,17 @@ class KGServiceClient:
        """调用 KG 服务全文检索，返回匹配的实体列表。

        Fail-open: KG 服务不可用时返回空列表。
+        结果会被缓存（TTL 由 graphrag_cache_kg_ttl 控制）。
        """
+        cache = get_cache()
+        cache_key = make_cache_key("fulltext", graph_id, query, size, user_id)
+        cached = cache.get_kg(cache_key)
+        if cached is not None:
+            return cached
        try:
-            return await self._fulltext_search_impl(graph_id, query, size, user_id)
+            result = await self._fulltext_search_impl(graph_id, query, size, user_id)
+            cache.set_kg(cache_key, result)
+            return result
        except Exception:
            logger.exception(
                "KG fulltext search failed for graph_id=%s (fail-open, returning empty)",
@@ -123,9 +132,17 @@ class KGServiceClient:
        """获取种子实体的 N-hop 子图。

        Fail-open: KG 服务不可用时返回空子图。
+        结果会被缓存（TTL 由 graphrag_cache_kg_ttl 控制）。
        """
+        cache = get_cache()
+        cache_key = make_cache_key("subgraph", graph_id, sorted(entity_ids), depth, user_id)
+        cached = cache.get_kg(cache_key)
+        if cached is not None:
+            return cached
        try:
-            return await self._get_subgraph_impl(graph_id, entity_ids, depth, user_id)
+            result = await self._get_subgraph_impl(graph_id, entity_ids, depth, user_id)
+            cache.set_kg(cache_key, result)
+            return result
        except Exception:
            logger.exception(
                "KG subgraph export failed for graph_id=%s (fail-open, returning empty)",