from pydantic_settings import BaseSettings from pydantic import SecretStr, model_validator from typing import Optional import logging import os _logger = logging.getLogger(__name__) # 已知的弱默认凭据,生产环境禁止使用 _BLOCKED_DEFAULT_PASSWORDS = {"password", "123456", "admin", "root", "datamate123"} _BLOCKED_DEFAULT_TOKENS = {"abc123abc123", "EMPTY"} class Settings(BaseSettings): """应用程序配置""" class Config: env_file = ".env" case_sensitive = False extra = 'ignore' # Service app_name: str = "DataMate Python Backend" app_version: str = "1.0.0" app_description: str = "Adapter for integrating Data Management System with Label Studio" host: str = "0.0.0.0" port: int = 18000 # CORS # allowed_origins: List[str] = ["*"] # allowed_methods: List[str] = ["*"] # allowed_headers: List[str] = ["*"] # Log log_level: str = "INFO" debug: bool = True log_file_dir: str = "/var/log/datamate/backend-python" # Database mysql_host: str = "datamate-database" mysql_port: int = 3306 mysql_user: str = "root" mysql_password: str = "password" mysql_database: str = "datamate" database_url: str = "" # Will be overridden by build_database_url() if not provided @model_validator(mode='after') def build_database_url(self): """如果没有提供 database_url,则根据 MySQL 配置构建""" if not self.database_url: if self.mysql_password and self.mysql_user: self.database_url = f"mysql+aiomysql://{self.mysql_user}:{self.mysql_password}@{self.mysql_host}:{self.mysql_port}/{self.mysql_database}" else: self.database_url = f"mysql+aiomysql://{self.mysql_host}:{self.mysql_port}/{self.mysql_database}" return self # Label Studio label_studio_base_url: str = "http://label-studio:8000" label_studio_username: Optional[str] = "admin@demo.com" label_studio_password: Optional[str] = "demoadmin" label_studio_user_token: Optional[str] = "abc123abc123" # Legacy Token label_studio_local_document_root: str = "/label-studio/local" # Label Studio local file storage path label_studio_file_path_prefix: str = "/data/local-files/?d=" # Label Studio local file serving URL prefix ls_task_page_size: int = 1000 # DataMate dm_file_path_prefix: str = "/dataset" # DM存储文件夹前缀 # DataMate Backend (Java) - 用于通过"下载/预览接口"读取文件内容 datamate_backend_base_url: str = "http://datamate-backend:8080/api" # Knowledge Graph - LLM 三元组抽取配置 kg_llm_api_key: SecretStr = SecretStr("EMPTY") kg_llm_base_url: Optional[str] = None kg_llm_model: str = "gpt-4o-mini" kg_llm_temperature: float = 0.0 kg_llm_timeout_seconds: int = 60 kg_llm_max_retries: int = 2 # Knowledge Graph - 实体对齐配置 kg_alignment_enabled: bool = False kg_alignment_embedding_model: str = "text-embedding-3-small" kg_alignment_vector_threshold: float = 0.92 kg_alignment_llm_threshold: float = 0.78 # GraphRAG 融合查询配置 graphrag_enabled: bool = False graphrag_milvus_uri: str = "http://milvus-standalone:19530" graphrag_kg_service_url: str = "http://datamate-kg:8080" graphrag_kg_internal_token: str = "" # GraphRAG - 检索策略默认值 graphrag_vector_top_k: int = 5 graphrag_graph_depth: int = 2 graphrag_graph_max_entities: int = 20 graphrag_vector_weight: float = 0.6 graphrag_graph_weight: float = 0.4 # GraphRAG - LLM(空则复用 kg_llm_* 配置) graphrag_llm_model: str = "" graphrag_llm_base_url: Optional[str] = None graphrag_llm_api_key: SecretStr = SecretStr("EMPTY") graphrag_llm_temperature: float = 0.1 graphrag_llm_timeout_seconds: int = 60 # GraphRAG - Embedding(空则复用 kg_alignment_embedding_* 配置) graphrag_embedding_model: str = "" # 标注编辑器(Label Studio Editor)相关 editor_max_text_bytes: int = 0 # <=0 表示不限制,正数为最大字节数 @model_validator(mode='after') def check_default_credentials(self): """生产环境下检测弱默认凭据,拒绝启动。 通过环境变量 DATAMATE_ENV 判断环境: - dev/test/local: 仅发出警告 - 其他(prod/staging 等): 抛出异常阻止启动 """ env = os.environ.get("DATAMATE_ENV", "dev").lower() is_dev = env in ("dev", "test", "local", "development") issues: list[str] = [] if self.mysql_password in _BLOCKED_DEFAULT_PASSWORDS: issues.append(f"mysql_password is set to a weak default ('{self.mysql_password}')") if self.label_studio_password and self.label_studio_password in _BLOCKED_DEFAULT_PASSWORDS: issues.append("label_studio_password is set to a weak default") if self.label_studio_user_token and self.label_studio_user_token in _BLOCKED_DEFAULT_TOKENS: issues.append("label_studio_user_token is set to a weak default") if issues: msg = "SECURITY: Weak default credentials detected: " + "; ".join(issues) if is_dev: _logger.warning(msg + " (acceptable in dev/test, MUST change for production)") else: raise ValueError( msg + ". Set proper credentials via environment variables " "before deploying to production." ) return self # 全局设置实例 settings = Settings()