You've already forked DataMate
核心功能: - Neo4j 索引优化(entityType, graphId, properties.name) - Redis 缓存(Java 侧,3 个缓存区,TTL 可配置) - LRU 缓存(Python 侧,KG + Embedding,线程安全) - 细粒度缓存清除(graphId 前缀匹配) - 失败路径缓存清除(finally 块) 新增文件(Java 侧,7 个): - V2__PerformanceIndexes.java - Flyway 迁移,创建 3 个索引 - IndexHealthService.java - 索引健康监控 - RedisCacheConfig.java - Spring Cache + Redis 配置 - GraphCacheService.java - 缓存清除管理器 - CacheableIntegrationTest.java - 集成测试(10 tests) - GraphCacheServiceTest.java - 单元测试(19 tests) - V2__PerformanceIndexesTest.java, IndexHealthServiceTest.java 新增文件(Python 侧,2 个): - cache.py - 内存 TTL+LRU 缓存(cachetools) - test_cache.py - 单元测试(20 tests) 修改文件(Java 侧,9 个): - GraphEntityService.java - 添加 @Cacheable,缓存清除 - GraphQueryService.java - 添加 @Cacheable(包含用户权限上下文) - GraphRelationService.java - 添加缓存清除 - GraphSyncService.java - 添加缓存清除(finally 块,失败路径) - KnowledgeGraphProperties.java - 添加 Cache 配置类 - application-knowledgegraph.yml - 添加 Redis 和缓存 TTL 配置 - GraphEntityServiceTest.java - 添加 verify(cacheService) 断言 - GraphRelationServiceTest.java - 添加 verify(cacheService) 断言 - GraphSyncServiceTest.java - 添加失败路径缓存清除测试 修改文件(Python 侧,5 个): - kg_client.py - 集成缓存(fulltext_search, get_subgraph) - interface.py - 添加 /cache/stats 和 /cache/clear 端点 - config.py - 添加缓存配置字段 - pyproject.toml - 添加 cachetools 依赖 - test_kg_client.py - 添加 _disable_cache fixture 安全修复(3 轮迭代): - P0: 缓存 key 用户隔离(防止跨用户数据泄露) - P1-1: 同步子步骤后的缓存清除(18 个方法) - P1-2: 实体创建后的搜索缓存清除 - P1-3: 失败路径缓存清除(finally 块) - P2-1: 细粒度缓存清除(graphId 前缀匹配,避免跨图谱冲刷) - P2-2: 服务层测试添加 verify(cacheService) 断言 测试结果: - Java: 280 tests pass ✅ (270 → 280, +10 new) - Python: 154 tests pass ✅ (140 → 154, +14 new) 缓存配置: - kg:entities - 实体缓存,TTL 1h - kg:queries - 查询结果缓存,TTL 5min - kg:search - 全文搜索缓存,TTL 3min - KG cache (Python) - 256 entries, 5min TTL - Embedding cache (Python) - 512 entries, 10min TTL
52 lines
1.4 KiB
TOML
52 lines
1.4 KiB
TOML
[project]
|
|
name = "datamate-python"
|
|
version = "0.1.0"
|
|
description = "This is the Python backend of DataMate."
|
|
authors = [
|
|
{name = "Jason Wang",email = "jasonwong2019@outlook.com"}
|
|
]
|
|
license = {text = "MIT"}
|
|
readme = "README.md"
|
|
requires-python =">=3.12,<4.0.0"
|
|
dependencies = [
|
|
"uvicorn[standard] (>=0.38.0,<0.39.0)",
|
|
"aiomysql (>=0.3.2,<0.4.0)",
|
|
"pymysql (>=1.1.2,<2.0.0)",
|
|
"aiosqlite (>=0.21.0,<0.22.0)",
|
|
"httpx (>=0.28.1,<0.29.0)",
|
|
"pydantic-settings (>=2.12.0,<3.0.0)",
|
|
"python-multipart (>=0.0.20,<0.0.21)",
|
|
"python-dotenv (>=1.2.1,<2.0.0)",
|
|
"python-dateutil (>=2.9.0.post0,<3.0.0)",
|
|
"pyyaml (>=6.0.3,<7.0.0)",
|
|
"unstructured (>=0.18.21,<0.19.0)",
|
|
"markdown (>=3.10,<4.0)",
|
|
"langchain-community (>=0.4.1,<0.5.0)",
|
|
"jsonschema (>=4.25.1,<5.0.0)",
|
|
"greenlet (>=3.3.0,<4.0.0)",
|
|
"docx2txt (>=0.9,<0.10)",
|
|
"openpyxl (>=3.1.5,<4.0.0)",
|
|
"xlrd (>=2.0.1,<3.0.0)",
|
|
"jq (>=1.10.0,<2.0.0)",
|
|
"openai (>=2.9.0,<3.0.0)",
|
|
"langchain-openai (>=1.1.1,<2.0.0)",
|
|
"langchain (>=1.1.3,<2.0.0)",
|
|
"langchain-experimental (>=0.3.0,<1.0.0)",
|
|
"pydantic (>=2.12.5,<3.0.0)",
|
|
"sqlalchemy (>=2.0.45,<3.0.0)",
|
|
"fastapi (>=0.124.0,<0.125.0)",
|
|
"Pillow (>=11.0.0,<12.0.0)",
|
|
"pymilvus (>=2.5.0,<3.0.0)",
|
|
"cachetools (>=5.5.0,<6.0.0)",
|
|
]
|
|
|
|
|
|
[build-system]
|
|
requires = ["poetry-core>=2.0.0,<3.0.0"]
|
|
build-backend = "poetry.core.masonry.api"
|
|
|
|
[tool.poetry]
|
|
packages = [
|
|
{ include = "app" }
|
|
]
|