You've already forked DataMate
feat: Labeling Frontend adaptations + Backend build and deploy + Logging improvement (#55)
* feat: Front-end data annotation page adaptation to the backend API. * feat: Implement labeling configuration editor and enhance annotation task creation form * feat: add python backend build and deployment; add backend configuration for Label Studio integration and improve logging setup * refactor: remove duplicate log configuration
This commit is contained in:
@@ -1,4 +1,5 @@
|
||||
from pydantic_settings import BaseSettings
|
||||
from pydantic import model_validator
|
||||
from typing import Optional, List
|
||||
import os
|
||||
from pathlib import Path
|
||||
@@ -17,124 +18,60 @@ class Settings(BaseSettings):
|
||||
app_name: str = "Label Studio Adapter"
|
||||
app_version: str = "1.0.0"
|
||||
app_description: str = "Adapter for integrating Data Management System with Label Studio"
|
||||
|
||||
# 日志配置
|
||||
log_level: str = "INFO"
|
||||
debug: bool = True
|
||||
log_file_dir: str = "/var/log/datamate"
|
||||
|
||||
# 服务器配置
|
||||
host: str = "0.0.0.0"
|
||||
port: int = 8000
|
||||
|
||||
# CORS配置
|
||||
allowed_origins: List[str] = ["*"]
|
||||
allowed_methods: List[str] = ["*"]
|
||||
allowed_headers: List[str] = ["*"]
|
||||
# allowed_origins: List[str] = ["*"]
|
||||
# allowed_methods: List[str] = ["*"]
|
||||
# allowed_headers: List[str] = ["*"]
|
||||
|
||||
# MySQL数据库配置 (优先级1)
|
||||
mysql_host: Optional[str] = None
|
||||
mysql_host: str = "datamate-database"
|
||||
mysql_port: int = 3306
|
||||
mysql_user: Optional[str] = None
|
||||
mysql_password: Optional[str] = None
|
||||
mysql_database: Optional[str] = None
|
||||
|
||||
# PostgreSQL数据库配置 (优先级2)
|
||||
postgres_host: Optional[str] = None
|
||||
postgres_port: int = 5432
|
||||
postgres_user: Optional[str] = None
|
||||
postgres_password: Optional[str] = None
|
||||
postgres_database: Optional[str] = None
|
||||
|
||||
# SQLite数据库配置 (优先级3 - 兜底)
|
||||
sqlite_path: str = "data/labelstudio_adapter.db"
|
||||
mysql_user: str = "root"
|
||||
mysql_password: str = "password"
|
||||
mysql_database: str = "datamate"
|
||||
|
||||
# 直接数据库URL配置(如果提供,将覆盖上述配置)
|
||||
database_url: Optional[str] = None
|
||||
# 初始值为空字符串,在 model_validator 中会被设置为完整的 URL
|
||||
database_url: str = ""
|
||||
|
||||
@model_validator(mode='after')
|
||||
def build_database_url(self):
|
||||
"""如果没有提供 database_url,则根据 MySQL 配置构建"""
|
||||
if not self.database_url:
|
||||
if self.mysql_password and self.mysql_user:
|
||||
self.database_url = f"mysql+aiomysql://{self.mysql_user}:{self.mysql_password}@{self.mysql_host}:{self.mysql_port}/{self.mysql_database}"
|
||||
else:
|
||||
self.database_url = f"mysql+aiomysql://{self.mysql_host}:{self.mysql_port}/{self.mysql_database}"
|
||||
return self
|
||||
|
||||
# 日志配置
|
||||
log_level: str = "DEBUG"
|
||||
|
||||
# =========================
|
||||
# Label Studio 服务配置
|
||||
# =========================
|
||||
label_studio_base_url: str = "http://label-studio:8080"
|
||||
label_studio_username: Optional[str] = None # Label Studio 用户名(用于登录)
|
||||
label_studio_password: Optional[str] = None # Label Studio 密码(用于登录)
|
||||
label_studio_user_token: Optional[str] = None # Legacy Token
|
||||
label_studio_base_url: str = "http://label-studio:8000"
|
||||
label_studio_username: Optional[str] = "admin@demo.com" # Label Studio 用户名(用于登录)
|
||||
label_studio_password: Optional[str] = "demoadmin" # Label Studio 密码(用于登录)
|
||||
label_studio_user_token: Optional[str] = "abc123abc123" # Legacy Token
|
||||
|
||||
label_studio_local_storage_dataset_base_path: str = "/label-studio/local_files" # Label Studio容器中的本地存储基础路径
|
||||
label_studio_local_storage_dataset_base_path: str = "/label-studio/local" # Label Studio容器中的本地存储基础路径
|
||||
label_studio_file_path_prefix: str = "/data/local-files/?d=" # Label Studio本地文件服务路径前缀
|
||||
|
||||
ls_task_page_size: int = 1000
|
||||
|
||||
|
||||
# =========================
|
||||
# Data Management 服务配置
|
||||
# =========================
|
||||
dm_file_path_prefix: str = "/dataset" # DM存储文件夹前缀
|
||||
|
||||
|
||||
@property
|
||||
def computed_database_url(self) -> str:
|
||||
"""
|
||||
根据优先级自动选择数据库连接URL
|
||||
优先级:MySQL > PostgreSQL > SQLite3
|
||||
"""
|
||||
# 如果直接提供了database_url,优先使用
|
||||
if self.database_url:
|
||||
return self.database_url
|
||||
|
||||
# 优先级1: MySQL
|
||||
if all([self.mysql_host, self.mysql_user, self.mysql_password, self.mysql_database]):
|
||||
return f"mysql+aiomysql://{self.mysql_user}:{self.mysql_password}@{self.mysql_host}:{self.mysql_port}/{self.mysql_database}"
|
||||
|
||||
# 优先级2: PostgreSQL
|
||||
if all([self.postgres_host, self.postgres_user, self.postgres_password, self.postgres_database]):
|
||||
return f"postgresql+asyncpg://{self.postgres_user}:{self.postgres_password}@{self.postgres_host}:{self.postgres_port}/{self.postgres_database}"
|
||||
|
||||
# 优先级3: SQLite (兜底)
|
||||
sqlite_full_path = Path(self.sqlite_path).absolute()
|
||||
# 确保目录存在
|
||||
sqlite_full_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
return f"sqlite+aiosqlite:///{sqlite_full_path}"
|
||||
|
||||
@property
|
||||
def sync_database_url(self) -> str:
|
||||
"""
|
||||
用于数据库迁移的同步连接URL
|
||||
将异步驱动替换为同步驱动
|
||||
"""
|
||||
async_url = self.computed_database_url
|
||||
|
||||
# 替换异步驱动为同步驱动
|
||||
sync_replacements = {
|
||||
"mysql+aiomysql://": "mysql+pymysql://",
|
||||
"postgresql+asyncpg://": "postgresql+psycopg2://",
|
||||
"sqlite+aiosqlite:///": "sqlite:///"
|
||||
}
|
||||
|
||||
for async_driver, sync_driver in sync_replacements.items():
|
||||
if async_url.startswith(async_driver):
|
||||
return async_url.replace(async_driver, sync_driver)
|
||||
|
||||
return async_url
|
||||
|
||||
def get_database_info(self) -> dict:
|
||||
"""获取数据库配置信息"""
|
||||
url = self.computed_database_url
|
||||
|
||||
if url.startswith("mysql"):
|
||||
db_type = "MySQL"
|
||||
elif url.startswith("postgresql"):
|
||||
db_type = "PostgreSQL"
|
||||
elif url.startswith("sqlite"):
|
||||
db_type = "SQLite"
|
||||
else:
|
||||
db_type = "Unknown"
|
||||
|
||||
return {
|
||||
"type": db_type,
|
||||
"url": url,
|
||||
"sync_url": self.sync_database_url
|
||||
}
|
||||
|
||||
|
||||
# 全局设置实例
|
||||
settings = Settings()
|
||||
|
||||
@@ -3,52 +3,68 @@ import sys
|
||||
from pathlib import Path
|
||||
from app.core.config import settings
|
||||
|
||||
class CenteredLevelNameFormatter(logging.Formatter):
|
||||
"""Center the level name in the log output"""
|
||||
|
||||
def format(self, record):
|
||||
# 将 levelname 居中对齐到8个字符
|
||||
record.levelname = record.levelname.center(8)
|
||||
return super().format(record)
|
||||
|
||||
def setup_logging():
|
||||
"""配置应用程序日志"""
|
||||
|
||||
# 创建logs目录
|
||||
log_dir = Path("logs")
|
||||
log_dir.mkdir(exist_ok=True)
|
||||
|
||||
# 配置日志格式
|
||||
log_format = "%(asctime)s - %(name)s - [%(levelname)s] - %(message)s"
|
||||
log_format = "%(asctime)s [%(levelname)s] - %(name)s - %(message)s"
|
||||
date_format = "%Y-%m-%d %H:%M:%S"
|
||||
|
||||
# 创建处理器
|
||||
console_handler = logging.StreamHandler(sys.stdout)
|
||||
console_handler.setLevel(getattr(logging, settings.log_level.upper()))
|
||||
|
||||
log_dir = Path(settings.log_file_dir)
|
||||
log_dir.mkdir(exist_ok=True)
|
||||
file_handler = logging.FileHandler(
|
||||
log_dir / "app.log",
|
||||
log_dir / "python-backend.log",
|
||||
encoding="utf-8"
|
||||
)
|
||||
file_handler.setLevel(getattr(logging, settings.log_level.upper()))
|
||||
|
||||
error_handler = logging.FileHandler(
|
||||
log_dir / "error.log",
|
||||
encoding="utf-8"
|
||||
)
|
||||
error_handler.setLevel(logging.ERROR)
|
||||
|
||||
# 设置格式
|
||||
formatter = logging.Formatter(log_format, date_format)
|
||||
# Style setting - Centered level names
|
||||
formatter = CenteredLevelNameFormatter(log_format, date_format)
|
||||
console_handler.setFormatter(formatter)
|
||||
file_handler.setFormatter(formatter)
|
||||
error_handler.setFormatter(formatter)
|
||||
file_handler.setFormatter(formatter)
|
||||
|
||||
# 配置根日志器
|
||||
# Root Logger
|
||||
root_logger = logging.getLogger()
|
||||
root_logger.setLevel(getattr(logging, settings.log_level.upper()))
|
||||
root_logger.addHandler(console_handler)
|
||||
root_logger.addHandler(file_handler)
|
||||
root_logger.addHandler(error_handler)
|
||||
root_logger.addHandler(file_handler)
|
||||
|
||||
# 配置第三方库日志级别(减少详细日志)
|
||||
logging.getLogger("uvicorn").setLevel(logging.ERROR)
|
||||
logging.getLogger("sqlalchemy.engine").setLevel(logging.ERROR) # 隐藏SQL查询日志
|
||||
# Uvicorn
|
||||
uvicorn_logger = logging.getLogger("uvicorn")
|
||||
uvicorn_logger.handlers.clear()
|
||||
uvicorn_logger.addHandler(console_handler)
|
||||
uvicorn_logger.setLevel(logging.INFO)
|
||||
|
||||
uvicorn_access = logging.getLogger("uvicorn.access")
|
||||
uvicorn_access.handlers.clear()
|
||||
uvicorn_access.addHandler(console_handler)
|
||||
uvicorn_access.setLevel(logging.DEBUG)
|
||||
|
||||
uvicorn_error = logging.getLogger("uvicorn.error")
|
||||
uvicorn_error.handlers.clear()
|
||||
uvicorn_error.addHandler(console_handler)
|
||||
uvicorn_error.setLevel(logging.ERROR)
|
||||
|
||||
# SQLAlchemy (ERROR only)
|
||||
sqlalchemy_logger = logging.getLogger("sqlalchemy.engine")
|
||||
sqlalchemy_logger.setLevel(logging.ERROR)
|
||||
sqlalchemy_logger.addHandler(console_handler)
|
||||
sqlalchemy_logger.setLevel(logging.ERROR)
|
||||
|
||||
# Minimize noise from HTTPX and HTTPCore
|
||||
logging.getLogger("httpx").setLevel(logging.ERROR)
|
||||
logging.getLogger("httpcore").setLevel(logging.ERROR)
|
||||
|
||||
def get_logger(name: str) -> logging.Logger:
|
||||
"""获取指定名称的日志器"""
|
||||
return logging.getLogger(name)
|
||||
@@ -6,17 +6,11 @@ from typing import AsyncGenerator
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
# 获取数据库配置信息
|
||||
db_info = settings.get_database_info()
|
||||
logger.info(f"使用数据库: {db_info['type']} || 连接URL: {db_info['url']}")
|
||||
|
||||
# 创建数据库引擎
|
||||
engine = create_async_engine(
|
||||
settings.computed_database_url,
|
||||
settings.database_url,
|
||||
echo=False, # 关闭SQL调试日志以减少输出
|
||||
future=True,
|
||||
# SQLite特殊配置
|
||||
connect_args={"check_same_thread": False} if "sqlite" in settings.computed_database_url else {}
|
||||
future=True
|
||||
)
|
||||
|
||||
# 创建会话工厂
|
||||
|
||||
@@ -19,29 +19,32 @@ from .exception import (
|
||||
general_exception_handler
|
||||
)
|
||||
|
||||
# 设置日志
|
||||
setup_logging()
|
||||
logger = get_logger(__name__)
|
||||
|
||||
@asynccontextmanager
|
||||
async def lifespan(app: FastAPI):
|
||||
"""应用程序生命周期管理"""
|
||||
|
||||
# 启动时初始化
|
||||
# @startup
|
||||
logger.info("DataMate Python Backend starting...")
|
||||
# 数据库连接验证
|
||||
|
||||
# Database connection validation
|
||||
try:
|
||||
async with AsyncSessionLocal() as session:
|
||||
await session.execute(text("SELECT 1"))
|
||||
logger.info("Database connection validated successfully.")
|
||||
logger.info(f"Database: mysql+aiomysql://{settings.mysql_user}:{settings.mysql_password}@{settings.mysql_host}:{settings.mysql_port}/{settings.mysql_database}")
|
||||
except Exception as e:
|
||||
logger.error(f"Database connection validation failed: {e}")
|
||||
logger.debug(f"Connection details: {settings.computed_database_url}")
|
||||
logger.debug(f"Connection details: {settings.database_url}")
|
||||
raise
|
||||
|
||||
# Label Studio
|
||||
# TODO Add actual connectivity check if needed
|
||||
logger.info(f"Label Studio: {settings.label_studio_base_url}")
|
||||
|
||||
yield
|
||||
|
||||
# 关闭时清理
|
||||
# @shutdown
|
||||
logger.info("DataMate Python Backend shutting down ...")
|
||||
|
||||
# 创建FastAPI应用
|
||||
@@ -53,19 +56,24 @@ app = FastAPI(
|
||||
lifespan=lifespan
|
||||
)
|
||||
|
||||
# 配置CORS中间件
|
||||
app.add_middleware(
|
||||
CORSMiddleware,
|
||||
allow_origins=settings.allowed_origins,
|
||||
allow_credentials=True,
|
||||
allow_methods=settings.allowed_methods,
|
||||
allow_headers=settings.allowed_headers,
|
||||
)
|
||||
# CORS Middleware
|
||||
# app.add_middleware(
|
||||
# CORSMiddleware,
|
||||
# allow_origins=settings.allowed_origins,
|
||||
# allow_credentials=True,
|
||||
# allow_methods=settings.allowed_methods,
|
||||
# allow_headers=settings.allowed_headers,
|
||||
# )
|
||||
|
||||
# 注册路由
|
||||
app.include_router(router)
|
||||
|
||||
logger.debug("Registered routes: %s", [getattr(r, "path", None) for r in app.routes])
|
||||
# 输出注册的路由(每行一个)
|
||||
logger.debug("Registered routes:")
|
||||
for route in app.routes:
|
||||
route_path = getattr(route, "path", None)
|
||||
if route_path:
|
||||
logger.debug(f" {route_path}")
|
||||
|
||||
# 注册全局异常处理器
|
||||
app.add_exception_handler(StarletteHTTPException, starlette_http_exception_handler) # type: ignore
|
||||
|
||||
@@ -1,20 +1,10 @@
|
||||
from fastapi import APIRouter, Depends, HTTPException, Query
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from typing import List, Optional
|
||||
from fastapi import APIRouter
|
||||
|
||||
from app.db.session import get_db
|
||||
from app.module.shared.schema import StandardResponse
|
||||
from app.module.dataset import DatasetManagementService
|
||||
from app.core.logging import get_logger
|
||||
from app.core.config import settings
|
||||
from app.exception import NoDatasetInfoFoundError, DatasetMappingNotFoundError
|
||||
|
||||
from ..client import LabelStudioClient
|
||||
from ..service.sync import SyncService
|
||||
from ..service.mapping import DatasetMappingService
|
||||
from ..schema import (
|
||||
ConfigResponse
|
||||
)
|
||||
from ..schema import ConfigResponse
|
||||
|
||||
|
||||
router = APIRouter(
|
||||
|
||||
@@ -21,7 +21,7 @@ class DatasetMappingCreateRequest(BaseModel):
|
||||
|
||||
class Config:
|
||||
# allow population by field name when constructing model programmatically
|
||||
allow_population_by_field_name = True
|
||||
validate_by_name = True
|
||||
|
||||
class DatasetMappingCreateResponse(BaseResponseModel):
|
||||
"""数据集映射 创建 响应模型"""
|
||||
|
||||
Reference in New Issue
Block a user