You've already forked DataMate
实现功能: - 创建 kg_extraction/interface.py(FastAPI 路由) - 实现 POST /api/kg/extract(单条文本抽取) - 实现 POST /api/kg/extract/batch(批量抽取,最多 50 条) - 集成到 FastAPI 主路由(/api/kg/ 前缀) 技术实现: - 配置管理:从环境变量读取 LLM 配置(API Key、Base URL、Model、Temperature) - 安全性: - API Key 使用 SecretStr 保护 - 错误信息脱敏(使用 trace_id,不暴露原始异常) - 请求文本不写入日志(使用 SHA-256 hash) - 强制要求 X-User-Id 头(鉴权边界) - 超时控制: - kg_llm_timeout_seconds(60秒) - kg_llm_max_retries(2次) - 输入校验: - graph_id 和 source_id 使用 UUID pattern - source_type 使用 Enum(4个值) - allowed_nodes/relationships 元素使用正则约束(ASCII,1-50字符) - 审计日志:记录 caller、trace_id、text_hash 代码审查: - 经过 3 轮 Codex 审查和 2 轮 Claude 修复 - 所有问题已解决(5个 P1/P2 + 3个 P3) - 语法检查通过 API 端点: - POST /api/kg/extract:单条文本抽取 - POST /api/kg/extract/batch:批量抽取(最多 50 条) 配置环境变量: - KG_LLM_API_KEY:LLM API 密钥 - KG_LLM_BASE_URL:自定义端点(可选) - KG_LLM_MODEL:模型名称(默认 gpt-4o-mini) - KG_LLM_TEMPERATURE:生成温度(默认 0.0) - KG_LLM_TIMEOUT_SECONDS:超时时间(默认 60) - KG_LLM_MAX_RETRIES:重试次数(默认 2)
81 lines
2.8 KiB
Python
81 lines
2.8 KiB
Python
from pydantic_settings import BaseSettings
|
|
from pydantic import SecretStr, model_validator
|
|
from typing import Optional
|
|
|
|
class Settings(BaseSettings):
|
|
"""应用程序配置"""
|
|
|
|
class Config:
|
|
env_file = ".env"
|
|
case_sensitive = False
|
|
extra = 'ignore'
|
|
|
|
# Service
|
|
app_name: str = "DataMate Python Backend"
|
|
app_version: str = "1.0.0"
|
|
app_description: str = "Adapter for integrating Data Management System with Label Studio"
|
|
|
|
host: str = "0.0.0.0"
|
|
port: int = 18000
|
|
|
|
# CORS
|
|
# allowed_origins: List[str] = ["*"]
|
|
# allowed_methods: List[str] = ["*"]
|
|
# allowed_headers: List[str] = ["*"]
|
|
|
|
# Log
|
|
log_level: str = "INFO"
|
|
debug: bool = True
|
|
log_file_dir: str = "/var/log/datamate/backend-python"
|
|
|
|
# Database
|
|
mysql_host: str = "datamate-database"
|
|
mysql_port: int = 3306
|
|
mysql_user: str = "root"
|
|
mysql_password: str = "password"
|
|
mysql_database: str = "datamate"
|
|
|
|
database_url: str = "" # Will be overridden by build_database_url() if not provided
|
|
|
|
@model_validator(mode='after')
|
|
def build_database_url(self):
|
|
"""如果没有提供 database_url,则根据 MySQL 配置构建"""
|
|
if not self.database_url:
|
|
if self.mysql_password and self.mysql_user:
|
|
self.database_url = f"mysql+aiomysql://{self.mysql_user}:{self.mysql_password}@{self.mysql_host}:{self.mysql_port}/{self.mysql_database}"
|
|
else:
|
|
self.database_url = f"mysql+aiomysql://{self.mysql_host}:{self.mysql_port}/{self.mysql_database}"
|
|
return self
|
|
|
|
|
|
# Label Studio
|
|
label_studio_base_url: str = "http://label-studio:8000"
|
|
label_studio_username: Optional[str] = "admin@demo.com"
|
|
label_studio_password: Optional[str] = "demoadmin"
|
|
label_studio_user_token: Optional[str] = "abc123abc123" # Legacy Token
|
|
|
|
label_studio_local_document_root: str = "/label-studio/local" # Label Studio local file storage path
|
|
label_studio_file_path_prefix: str = "/data/local-files/?d=" # Label Studio local file serving URL prefix
|
|
|
|
ls_task_page_size: int = 1000
|
|
|
|
# DataMate
|
|
dm_file_path_prefix: str = "/dataset" # DM存储文件夹前缀
|
|
|
|
# DataMate Backend (Java) - 用于通过"下载/预览接口"读取文件内容
|
|
datamate_backend_base_url: str = "http://datamate-backend:8080/api"
|
|
|
|
# Knowledge Graph - LLM 三元组抽取配置
|
|
kg_llm_api_key: SecretStr = SecretStr("EMPTY")
|
|
kg_llm_base_url: Optional[str] = None
|
|
kg_llm_model: str = "gpt-4o-mini"
|
|
kg_llm_temperature: float = 0.0
|
|
kg_llm_timeout_seconds: int = 60
|
|
kg_llm_max_retries: int = 2
|
|
|
|
# 标注编辑器(Label Studio Editor)相关
|
|
editor_max_text_bytes: int = 0 # <=0 表示不限制,正数为最大字节数
|
|
|
|
# 全局设置实例
|
|
settings = Settings()
|