feat: Labeling Frontend adaptations + Backend build and deploy + Logging improvement (#55)

* feat: Front-end data annotation page adaptation to the backend API.

* feat: Implement labeling configuration editor and enhance annotation task creation form

* feat: add python backend build and deployment; add backend configuration for Label Studio integration and improve logging setup

* refactor: remove duplicate log configuration
This commit is contained in:
Jason Wang
2025-11-05 01:55:53 +08:00
committed by GitHub
parent f3958f08d9
commit b5fe787c20
13 changed files with 190 additions and 210 deletions

View File

@@ -1,4 +1,5 @@
from pydantic_settings import BaseSettings
from pydantic import model_validator
from typing import Optional, List
import os
from pathlib import Path
@@ -17,124 +18,60 @@ class Settings(BaseSettings):
app_name: str = "Label Studio Adapter"
app_version: str = "1.0.0"
app_description: str = "Adapter for integrating Data Management System with Label Studio"
# 日志配置
log_level: str = "INFO"
debug: bool = True
log_file_dir: str = "/var/log/datamate"
# 服务器配置
host: str = "0.0.0.0"
port: int = 8000
# CORS配置
allowed_origins: List[str] = ["*"]
allowed_methods: List[str] = ["*"]
allowed_headers: List[str] = ["*"]
# allowed_origins: List[str] = ["*"]
# allowed_methods: List[str] = ["*"]
# allowed_headers: List[str] = ["*"]
# MySQL数据库配置 (优先级1)
mysql_host: Optional[str] = None
mysql_host: str = "datamate-database"
mysql_port: int = 3306
mysql_user: Optional[str] = None
mysql_password: Optional[str] = None
mysql_database: Optional[str] = None
# PostgreSQL数据库配置 (优先级2)
postgres_host: Optional[str] = None
postgres_port: int = 5432
postgres_user: Optional[str] = None
postgres_password: Optional[str] = None
postgres_database: Optional[str] = None
# SQLite数据库配置 (优先级3 - 兜底)
sqlite_path: str = "data/labelstudio_adapter.db"
mysql_user: str = "root"
mysql_password: str = "password"
mysql_database: str = "datamate"
# 直接数据库URL配置(如果提供,将覆盖上述配置)
database_url: Optional[str] = None
# 初始值为空字符串,在 model_validator 中会被设置为完整的 URL
database_url: str = ""
@model_validator(mode='after')
def build_database_url(self):
"""如果没有提供 database_url,则根据 MySQL 配置构建"""
if not self.database_url:
if self.mysql_password and self.mysql_user:
self.database_url = f"mysql+aiomysql://{self.mysql_user}:{self.mysql_password}@{self.mysql_host}:{self.mysql_port}/{self.mysql_database}"
else:
self.database_url = f"mysql+aiomysql://{self.mysql_host}:{self.mysql_port}/{self.mysql_database}"
return self
# 日志配置
log_level: str = "DEBUG"
# =========================
# Label Studio 服务配置
# =========================
label_studio_base_url: str = "http://label-studio:8080"
label_studio_username: Optional[str] = None # Label Studio 用户名(用于登录)
label_studio_password: Optional[str] = None # Label Studio 密码(用于登录)
label_studio_user_token: Optional[str] = None # Legacy Token
label_studio_base_url: str = "http://label-studio:8000"
label_studio_username: Optional[str] = "admin@demo.com" # Label Studio 用户名(用于登录)
label_studio_password: Optional[str] = "demoadmin" # Label Studio 密码(用于登录)
label_studio_user_token: Optional[str] = "abc123abc123" # Legacy Token
label_studio_local_storage_dataset_base_path: str = "/label-studio/local_files" # Label Studio容器中的本地存储基础路径
label_studio_local_storage_dataset_base_path: str = "/label-studio/local" # Label Studio容器中的本地存储基础路径
label_studio_file_path_prefix: str = "/data/local-files/?d=" # Label Studio本地文件服务路径前缀
ls_task_page_size: int = 1000
# =========================
# Data Management 服务配置
# =========================
dm_file_path_prefix: str = "/dataset" # DM存储文件夹前缀
@property
def computed_database_url(self) -> str:
"""
根据优先级自动选择数据库连接URL
优先级:MySQL > PostgreSQL > SQLite3
"""
# 如果直接提供了database_url,优先使用
if self.database_url:
return self.database_url
# 优先级1: MySQL
if all([self.mysql_host, self.mysql_user, self.mysql_password, self.mysql_database]):
return f"mysql+aiomysql://{self.mysql_user}:{self.mysql_password}@{self.mysql_host}:{self.mysql_port}/{self.mysql_database}"
# 优先级2: PostgreSQL
if all([self.postgres_host, self.postgres_user, self.postgres_password, self.postgres_database]):
return f"postgresql+asyncpg://{self.postgres_user}:{self.postgres_password}@{self.postgres_host}:{self.postgres_port}/{self.postgres_database}"
# 优先级3: SQLite (兜底)
sqlite_full_path = Path(self.sqlite_path).absolute()
# 确保目录存在
sqlite_full_path.parent.mkdir(parents=True, exist_ok=True)
return f"sqlite+aiosqlite:///{sqlite_full_path}"
@property
def sync_database_url(self) -> str:
"""
用于数据库迁移的同步连接URL
将异步驱动替换为同步驱动
"""
async_url = self.computed_database_url
# 替换异步驱动为同步驱动
sync_replacements = {
"mysql+aiomysql://": "mysql+pymysql://",
"postgresql+asyncpg://": "postgresql+psycopg2://",
"sqlite+aiosqlite:///": "sqlite:///"
}
for async_driver, sync_driver in sync_replacements.items():
if async_url.startswith(async_driver):
return async_url.replace(async_driver, sync_driver)
return async_url
def get_database_info(self) -> dict:
"""获取数据库配置信息"""
url = self.computed_database_url
if url.startswith("mysql"):
db_type = "MySQL"
elif url.startswith("postgresql"):
db_type = "PostgreSQL"
elif url.startswith("sqlite"):
db_type = "SQLite"
else:
db_type = "Unknown"
return {
"type": db_type,
"url": url,
"sync_url": self.sync_database_url
}
# 全局设置实例
settings = Settings()

View File

@@ -3,52 +3,68 @@ import sys
from pathlib import Path
from app.core.config import settings
class CenteredLevelNameFormatter(logging.Formatter):
"""Center the level name in the log output"""
def format(self, record):
# 将 levelname 居中对齐到8个字符
record.levelname = record.levelname.center(8)
return super().format(record)
def setup_logging():
"""配置应用程序日志"""
# 创建logs目录
log_dir = Path("logs")
log_dir.mkdir(exist_ok=True)
# 配置日志格式
log_format = "%(asctime)s - %(name)s - [%(levelname)s] - %(message)s"
log_format = "%(asctime)s [%(levelname)s] - %(name)s - %(message)s"
date_format = "%Y-%m-%d %H:%M:%S"
# 创建处理器
console_handler = logging.StreamHandler(sys.stdout)
console_handler.setLevel(getattr(logging, settings.log_level.upper()))
log_dir = Path(settings.log_file_dir)
log_dir.mkdir(exist_ok=True)
file_handler = logging.FileHandler(
log_dir / "app.log",
log_dir / "python-backend.log",
encoding="utf-8"
)
file_handler.setLevel(getattr(logging, settings.log_level.upper()))
error_handler = logging.FileHandler(
log_dir / "error.log",
encoding="utf-8"
)
error_handler.setLevel(logging.ERROR)
# 设置格式
formatter = logging.Formatter(log_format, date_format)
# Style setting - Centered level names
formatter = CenteredLevelNameFormatter(log_format, date_format)
console_handler.setFormatter(formatter)
file_handler.setFormatter(formatter)
error_handler.setFormatter(formatter)
file_handler.setFormatter(formatter)
# 配置根日志器
# Root Logger
root_logger = logging.getLogger()
root_logger.setLevel(getattr(logging, settings.log_level.upper()))
root_logger.addHandler(console_handler)
root_logger.addHandler(file_handler)
root_logger.addHandler(error_handler)
root_logger.addHandler(file_handler)
# 配置第三方库日志级别(减少详细日志)
logging.getLogger("uvicorn").setLevel(logging.ERROR)
logging.getLogger("sqlalchemy.engine").setLevel(logging.ERROR) # 隐藏SQL查询日志
# Uvicorn
uvicorn_logger = logging.getLogger("uvicorn")
uvicorn_logger.handlers.clear()
uvicorn_logger.addHandler(console_handler)
uvicorn_logger.setLevel(logging.INFO)
uvicorn_access = logging.getLogger("uvicorn.access")
uvicorn_access.handlers.clear()
uvicorn_access.addHandler(console_handler)
uvicorn_access.setLevel(logging.DEBUG)
uvicorn_error = logging.getLogger("uvicorn.error")
uvicorn_error.handlers.clear()
uvicorn_error.addHandler(console_handler)
uvicorn_error.setLevel(logging.ERROR)
# SQLAlchemy (ERROR only)
sqlalchemy_logger = logging.getLogger("sqlalchemy.engine")
sqlalchemy_logger.setLevel(logging.ERROR)
sqlalchemy_logger.addHandler(console_handler)
sqlalchemy_logger.setLevel(logging.ERROR)
# Minimize noise from HTTPX and HTTPCore
logging.getLogger("httpx").setLevel(logging.ERROR)
logging.getLogger("httpcore").setLevel(logging.ERROR)
def get_logger(name: str) -> logging.Logger:
"""获取指定名称的日志器"""
return logging.getLogger(name)

View File

@@ -6,17 +6,11 @@ from typing import AsyncGenerator
logger = get_logger(__name__)
# 获取数据库配置信息
db_info = settings.get_database_info()
logger.info(f"使用数据库: {db_info['type']} || 连接URL: {db_info['url']}")
# 创建数据库引擎
engine = create_async_engine(
settings.computed_database_url,
settings.database_url,
echo=False, # 关闭SQL调试日志以减少输出
future=True,
# SQLite特殊配置
connect_args={"check_same_thread": False} if "sqlite" in settings.computed_database_url else {}
future=True
)
# 创建会话工厂

View File

@@ -19,29 +19,32 @@ from .exception import (
general_exception_handler
)
# 设置日志
setup_logging()
logger = get_logger(__name__)
@asynccontextmanager
async def lifespan(app: FastAPI):
"""应用程序生命周期管理"""
# 启动时初始化
# @startup
logger.info("DataMate Python Backend starting...")
# 数据库连接验证
# Database connection validation
try:
async with AsyncSessionLocal() as session:
await session.execute(text("SELECT 1"))
logger.info("Database connection validated successfully.")
logger.info(f"Database: mysql+aiomysql://{settings.mysql_user}:{settings.mysql_password}@{settings.mysql_host}:{settings.mysql_port}/{settings.mysql_database}")
except Exception as e:
logger.error(f"Database connection validation failed: {e}")
logger.debug(f"Connection details: {settings.computed_database_url}")
logger.debug(f"Connection details: {settings.database_url}")
raise
# Label Studio
# TODO Add actual connectivity check if needed
logger.info(f"Label Studio: {settings.label_studio_base_url}")
yield
# 关闭时清理
# @shutdown
logger.info("DataMate Python Backend shutting down ...")
# 创建FastAPI应用
@@ -53,19 +56,24 @@ app = FastAPI(
lifespan=lifespan
)
# 配置CORS中间件
app.add_middleware(
CORSMiddleware,
allow_origins=settings.allowed_origins,
allow_credentials=True,
allow_methods=settings.allowed_methods,
allow_headers=settings.allowed_headers,
)
# CORS Middleware
# app.add_middleware(
# CORSMiddleware,
# allow_origins=settings.allowed_origins,
# allow_credentials=True,
# allow_methods=settings.allowed_methods,
# allow_headers=settings.allowed_headers,
# )
# 注册路由
app.include_router(router)
logger.debug("Registered routes: %s", [getattr(r, "path", None) for r in app.routes])
# 输出注册的路由(每行一个)
logger.debug("Registered routes:")
for route in app.routes:
route_path = getattr(route, "path", None)
if route_path:
logger.debug(f" {route_path}")
# 注册全局异常处理器
app.add_exception_handler(StarletteHTTPException, starlette_http_exception_handler) # type: ignore

View File

@@ -1,20 +1,10 @@
from fastapi import APIRouter, Depends, HTTPException, Query
from sqlalchemy.ext.asyncio import AsyncSession
from typing import List, Optional
from fastapi import APIRouter
from app.db.session import get_db
from app.module.shared.schema import StandardResponse
from app.module.dataset import DatasetManagementService
from app.core.logging import get_logger
from app.core.config import settings
from app.exception import NoDatasetInfoFoundError, DatasetMappingNotFoundError
from ..client import LabelStudioClient
from ..service.sync import SyncService
from ..service.mapping import DatasetMappingService
from ..schema import (
ConfigResponse
)
from ..schema import ConfigResponse
router = APIRouter(

View File

@@ -21,7 +21,7 @@ class DatasetMappingCreateRequest(BaseModel):
class Config:
# allow population by field name when constructing model programmatically
allow_population_by_field_name = True
validate_by_name = True
class DatasetMappingCreateResponse(BaseResponseModel):
"""数据集映射 创建 响应模型"""

View File

@@ -1,57 +1,16 @@
#!/bin/bash
set -e
echo "=========================================="
echo "Label Studio Adapter Starting..."
echo "=========================================="
# Label Studio 本地存储基础路径(从环境变量获取,默认值)
LABEL_STUDIO_LOCAL_BASE="${LABEL_STUDIO_LOCAL_BASE:-/label-studio/local_files}"
echo "=========================================="
echo "Ensuring Label Studio local storage directories exist..."
echo "Base path: ${LABEL_STUDIO_LOCAL_BASE}"
echo "=========================================="
# 创建必要的目录
mkdir -p "${LABEL_STUDIO_LOCAL_BASE}/dataset"
mkdir -p "${LABEL_STUDIO_LOCAL_BASE}/upload"
echo "✓ Directory 'dataset' ready: ${LABEL_STUDIO_LOCAL_BASE}/dataset"
echo "✓ Directory 'upload' ready: ${LABEL_STUDIO_LOCAL_BASE}/upload"
echo "=========================================="
echo "Directory initialization completed"
echo "=========================================="
# 等待数据库就绪(如果配置了数据库)
if [ -n "$MYSQL_HOST" ] || [ -n "$POSTGRES_HOST" ]; then
echo "Waiting for database to be ready..."
sleep 5
if [-d $LOCAL_FILES_DOCUMENT_ROOT ] && $LOCAL_FILES_SERVING_ENABLED; then
echo "Using local document root: $LOCAL_FILES_DOCUMENT_ROOT"
fi
# 运行数据库迁移
echo "=========================================="
echo "Running database migrations..."
echo "=========================================="
alembic upgrade head
if [ $? -eq 0 ]; then
echo "✓ Database migrations completed successfully"
else
echo "⚠️ WARNING: Database migrations failed"
echo " The application may not work correctly"
fi
echo "=========================================="
# 启动应用
echo "=========================================="
echo "Starting Label Studio Adapter..."
echo "Host: ${HOST:-0.0.0.0}"
echo "Port: ${PORT:-18000}"
echo "Debug: ${DEBUG:-false}"
echo "Label Studio URL: ${LABEL_STUDIO_BASE_URL}"
echo "=========================================="
# 转换 LOG_LEVEL 为小写(uvicorn 要求小写)
LOG_LEVEL_LOWER=$(echo "${LOG_LEVEL:-info}" | tr '[:upper:]' '[:lower:]')

View File

@@ -1,5 +1,4 @@
uvicorn app.main:app \
--host 0.0.0.0 \
--port 18000 \
--reload \
--log-level debug
--reload