You've already forked DataMate
修复从全新部署到运行的完整流程中的配置和路由问题。 ## P0 修复(功能失效) ### P0-1: GraphRAG KG 服务 URL 错误 - config.py - GRAPHRAG_KG_SERVICE_URL 从 http://datamate-kg:8080 改为 http://datamate-backend:8080(容器名修正) - kg_client.py - 修复 API 路径:/knowledge-graph/... → /api/knowledge-graph/... - kb_access.py - 同类问题修复:/knowledge-base/... → /api/knowledge-base/... - test_kb_access.py - 测试断言同步更新 根因:容器名 datamate-kg 不存在,且 httpx 绝对路径会丢弃 base_url 中的 /api 路径 ### P0-2: Vite 开发代理剥离 /api 前缀 - vite.config.ts - 删除 /api/knowledge-graph 专用代理规则(剥离 /api 导致 404),统一走 ^/api 规则 ## P1 修复(功能受损) ### P1-1: Gateway 缺少 KG Python 端点路由 - ApiGatewayApplication.java - 添加 /api/kg/** 路由(指向 kg-extraction Python 服务) - ApiGatewayApplication.java - 添加 /api/graphrag/** 路由(指向 GraphRAG 服务) ### P1-2: DATA_MANAGEMENT_URL 默认值缺 /api - KnowledgeGraphProperties.java - dataManagementUrl 默认值 http://localhost:8080 → http://localhost:8080/api - KnowledgeGraphProperties.java - annotationServiceUrl 默认值 http://localhost:8081 → http://localhost:8080/api(同 JVM) - application-knowledgegraph.yml - YAML 默认值同步更新 ### P1-3: Neo4j k8s 安装链路失败 - Makefile - VALID_K8S_TARGETS 添加 neo4j - Makefile - %-k8s-install 添加 neo4j case(显式 skip,提示使用 Docker 或外部实例) - Makefile - %-k8s-uninstall 添加 neo4j case(显式 skip) 根因:install 目标无条件调用 neo4j-$(INSTALLER)-install,但 k8s 模式下 neo4j 不在 VALID_K8S_TARGETS 中,导致 "Unknown k8s target 'neo4j'" 错误 ## P2 修复(次要) ### P2-1: Neo4j 加入 Docker install 流程 - Makefile - install target 增加 neo4j-$(INSTALLER)-install,在 datamate 之前启动 - Makefile - VALID_SERVICE_TARGETS 增加 neo4j - Makefile - %-docker-install / %-docker-uninstall 增加 neo4j case ## 验证结果 - mvn test: 311 tests, 0 failures ✅ - eslint: 0 errors ✅ - tsc --noEmit: 通过 ✅ - vite build: 成功 (17.71s) ✅ - Python tests: 46 passed ✅ - make -n install INSTALLER=k8s: 不再报 unknown target ✅ - make -n neo4j-k8s-install: 正确显示 skip 消息 ✅
158 lines
5.7 KiB
Python
158 lines
5.7 KiB
Python
from pydantic_settings import BaseSettings
|
|
from pydantic import SecretStr, model_validator
|
|
from typing import Optional
|
|
import logging
|
|
import os
|
|
|
|
_logger = logging.getLogger(__name__)
|
|
|
|
# 已知的弱默认凭据,生产环境禁止使用
|
|
_BLOCKED_DEFAULT_PASSWORDS = {"password", "123456", "admin", "root", "datamate123"}
|
|
_BLOCKED_DEFAULT_TOKENS = {"abc123abc123", "EMPTY"}
|
|
|
|
|
|
class Settings(BaseSettings):
|
|
"""应用程序配置"""
|
|
|
|
class Config:
|
|
env_file = ".env"
|
|
case_sensitive = False
|
|
extra = 'ignore'
|
|
|
|
# Service
|
|
app_name: str = "DataMate Python Backend"
|
|
app_version: str = "1.0.0"
|
|
app_description: str = "Adapter for integrating Data Management System with Label Studio"
|
|
|
|
host: str = "0.0.0.0"
|
|
port: int = 18000
|
|
|
|
# CORS
|
|
# allowed_origins: List[str] = ["*"]
|
|
# allowed_methods: List[str] = ["*"]
|
|
# allowed_headers: List[str] = ["*"]
|
|
|
|
# Log
|
|
log_level: str = "INFO"
|
|
debug: bool = True
|
|
log_file_dir: str = "/var/log/datamate/backend-python"
|
|
|
|
# Database
|
|
mysql_host: str = "datamate-database"
|
|
mysql_port: int = 3306
|
|
mysql_user: str = "root"
|
|
mysql_password: str = "password"
|
|
mysql_database: str = "datamate"
|
|
|
|
database_url: str = "" # Will be overridden by build_database_url() if not provided
|
|
|
|
@model_validator(mode='after')
|
|
def build_database_url(self):
|
|
"""如果没有提供 database_url,则根据 MySQL 配置构建"""
|
|
if not self.database_url:
|
|
if self.mysql_password and self.mysql_user:
|
|
self.database_url = f"mysql+aiomysql://{self.mysql_user}:{self.mysql_password}@{self.mysql_host}:{self.mysql_port}/{self.mysql_database}"
|
|
else:
|
|
self.database_url = f"mysql+aiomysql://{self.mysql_host}:{self.mysql_port}/{self.mysql_database}"
|
|
return self
|
|
|
|
|
|
# Label Studio
|
|
label_studio_base_url: str = "http://label-studio:8000"
|
|
label_studio_username: Optional[str] = "admin@demo.com"
|
|
label_studio_password: Optional[str] = "demoadmin"
|
|
label_studio_user_token: Optional[str] = "abc123abc123" # Legacy Token
|
|
|
|
label_studio_local_document_root: str = "/label-studio/local" # Label Studio local file storage path
|
|
label_studio_file_path_prefix: str = "/data/local-files/?d=" # Label Studio local file serving URL prefix
|
|
|
|
ls_task_page_size: int = 1000
|
|
|
|
# DataMate
|
|
dm_file_path_prefix: str = "/dataset" # DM存储文件夹前缀
|
|
|
|
# DataMate Backend (Java) - 用于通过"下载/预览接口"读取文件内容
|
|
datamate_backend_base_url: str = "http://datamate-backend:8080/api"
|
|
|
|
# Knowledge Graph - LLM 三元组抽取配置
|
|
kg_llm_api_key: SecretStr = SecretStr("EMPTY")
|
|
kg_llm_base_url: Optional[str] = None
|
|
kg_llm_model: str = "gpt-4o-mini"
|
|
kg_llm_temperature: float = 0.0
|
|
kg_llm_timeout_seconds: int = 60
|
|
kg_llm_max_retries: int = 2
|
|
|
|
# Knowledge Graph - 实体对齐配置
|
|
kg_alignment_enabled: bool = False
|
|
kg_alignment_embedding_model: str = "text-embedding-3-small"
|
|
kg_alignment_vector_threshold: float = 0.92
|
|
kg_alignment_llm_threshold: float = 0.78
|
|
|
|
# GraphRAG 融合查询配置
|
|
graphrag_enabled: bool = False
|
|
graphrag_milvus_uri: str = "http://milvus-standalone:19530"
|
|
graphrag_kg_service_url: str = "http://datamate-backend:8080"
|
|
graphrag_kg_internal_token: str = ""
|
|
|
|
# GraphRAG - 检索策略默认值
|
|
graphrag_vector_top_k: int = 5
|
|
graphrag_graph_depth: int = 2
|
|
graphrag_graph_max_entities: int = 20
|
|
graphrag_vector_weight: float = 0.6
|
|
graphrag_graph_weight: float = 0.4
|
|
|
|
# GraphRAG - LLM(空则复用 kg_llm_* 配置)
|
|
graphrag_llm_model: str = ""
|
|
graphrag_llm_base_url: Optional[str] = None
|
|
graphrag_llm_api_key: SecretStr = SecretStr("EMPTY")
|
|
graphrag_llm_temperature: float = 0.1
|
|
graphrag_llm_timeout_seconds: int = 60
|
|
|
|
# GraphRAG - Embedding(空则复用 kg_alignment_embedding_* 配置)
|
|
graphrag_embedding_model: str = ""
|
|
|
|
# GraphRAG - 缓存配置
|
|
graphrag_cache_enabled: bool = True
|
|
graphrag_cache_kg_maxsize: int = 256
|
|
graphrag_cache_kg_ttl: int = 300
|
|
graphrag_cache_embedding_maxsize: int = 512
|
|
graphrag_cache_embedding_ttl: int = 600
|
|
|
|
# 标注编辑器(Label Studio Editor)相关
|
|
editor_max_text_bytes: int = 0 # <=0 表示不限制,正数为最大字节数
|
|
|
|
@model_validator(mode='after')
|
|
def check_default_credentials(self):
|
|
"""生产环境下检测弱默认凭据,拒绝启动。
|
|
|
|
通过环境变量 DATAMATE_ENV 判断环境:
|
|
- dev/test/local: 仅发出警告
|
|
- 其他(prod/staging 等): 抛出异常阻止启动
|
|
"""
|
|
env = os.environ.get("DATAMATE_ENV", "dev").lower()
|
|
is_dev = env in ("dev", "test", "local", "development")
|
|
issues: list[str] = []
|
|
|
|
if self.mysql_password in _BLOCKED_DEFAULT_PASSWORDS:
|
|
issues.append(f"mysql_password is set to a weak default ('{self.mysql_password}')")
|
|
|
|
if self.label_studio_password and self.label_studio_password in _BLOCKED_DEFAULT_PASSWORDS:
|
|
issues.append("label_studio_password is set to a weak default")
|
|
|
|
if self.label_studio_user_token and self.label_studio_user_token in _BLOCKED_DEFAULT_TOKENS:
|
|
issues.append("label_studio_user_token is set to a weak default")
|
|
|
|
if issues:
|
|
msg = "SECURITY: Weak default credentials detected: " + "; ".join(issues)
|
|
if is_dev:
|
|
_logger.warning(msg + " (acceptable in dev/test, MUST change for production)")
|
|
else:
|
|
raise ValueError(
|
|
msg + ". Set proper credentials via environment variables "
|
|
"before deploying to production."
|
|
)
|
|
return self
|
|
|
|
# 全局设置实例
|
|
settings = Settings()
|