diff --git a/deployment/docker/datamate/backend.conf b/deployment/docker/datamate/backend.conf index 7dce600..6b89b20 100644 --- a/deployment/docker/datamate/backend.conf +++ b/deployment/docker/datamate/backend.conf @@ -7,6 +7,20 @@ server { client_max_body_size 1024M; + location /api/synthesis/ { + proxy_pass http://datamate-backend-python:18000/api/synthesis/; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + } + + location /api/annotation/ { + proxy_pass http://datamate-backend-python:18000/api/annotation/; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + } + location /api/ { proxy_pass http://datamate-backend:8080/api/; proxy_set_header Host $host; diff --git a/deployment/docker/datamate/docker-compose.yml b/deployment/docker/datamate/docker-compose.yml index d3f4f3d..20de073 100644 --- a/deployment/docker/datamate/docker-compose.yml +++ b/deployment/docker/datamate/docker-compose.yml @@ -15,6 +15,22 @@ services: depends_on: - datamate-database + # 1) backend (Python) + datamate-backend-python: + container_name: datamate-backend-python + image: datamate-backend-python + restart: on-failure + privileged: true + environment: + - log_level=DEBUG + volumes: + - dataset_volume:/dataset + - flow_volume:/flow + - log_volume:/var/log/datamate + networks: [ datamate ] + depends_on: + - datamate-database + # 2) frontend(NodePort 30000) datamate-frontend: container_name: datamate-frontend @@ -28,6 +44,7 @@ services: networks: [ datamate ] depends_on: - datamate-backend + - datamate-backend-python # 3) database datamate-database: @@ -47,6 +64,8 @@ services: - ../../../scripts/db:/docker-entrypoint-initdb.d - ./utf8.cnf:/etc/mysql/conf.d/utf8.cnf:ro - database_log_volume:/var/log/datamate/database + ports: + - "3306:3306" networks: [ datamate ] # 3) runtime diff --git a/deployment/docker/label-studio/docker-compose.yml b/deployment/docker/label-studio/docker-compose.yml index 059a3e4..e6c739c 100644 --- a/deployment/docker/label-studio/docker-compose.yml +++ b/deployment/docker/label-studio/docker-compose.yml @@ -1,6 +1,6 @@ services: - app: + label-studio: stdin_open: true tty: true image: heartexlabs/label-studio:latest @@ -11,7 +11,7 @@ services: ports: - "8000:8000" depends_on: - - db + - pg-db environment: - DJANGO_DB=default - POSTGRE_NAME=postgres @@ -23,17 +23,19 @@ services: - LOCAL_FILES_SERVING_ENABLED=true - LOCAL_FILES_DOCUMENT_ROOT=/label-studio/local - USE_USERNAME_FOR_LOGIN=true - - LABEL_STUDIO_USERNAME=admin@huawei.com - - LABEL_STUDIO_PASSWORD=admin1234 + - LABEL_STUDIO_USERNAME=admin@demo.com + - LABEL_STUDIO_PASSWORD=demoadmin - LABEL_STUDIO_ENABLE_LEGACY_API_TOKEN=true - LABEL_STUDIO_USER_TOKEN=abc123abc123 - - LOG_LEVEL=INFO + - LOG_LEVEL=DEBUG volumes: - label-studio-data:/label-studio/data:rw - dataset_volume:/label-studio/local:rw + networks: + - datamate command: label-studio-uwsgi - db: + pg-db: image: pgautoupgrade/pgautoupgrade:13-alpine hostname: db restart: unless-stopped @@ -42,9 +44,16 @@ services: - POSTGRES_USER=postgres volumes: - label-studio-db:/var/lib/postgresql/data + networks: + - datamate volumes: label-studio-data: label-studio-db: dataset_volume: - name: datamate-dataset-volume \ No newline at end of file + name: datamate-dataset-volume + +networks: + datamate: + driver: bridge + name: datamate-network \ No newline at end of file diff --git a/frontend/src/pages/DataAnnotation/Home/DataAnnotation.tsx b/frontend/src/pages/DataAnnotation/Home/DataAnnotation.tsx index 345290d..3118d74 100644 --- a/frontend/src/pages/DataAnnotation/Home/DataAnnotation.tsx +++ b/frontend/src/pages/DataAnnotation/Home/DataAnnotation.tsx @@ -47,11 +47,10 @@ export default function DataAnnotation() { let mounted = true; (async () => { try { - const cfg = await getConfigUsingGet(); - const url = cfg?.data?.labelStudioUrl || ""; - if (mounted) setLabelStudioBase((url).replace(/\/+$/, "") || null); + const baseUrl = `http://${window.location.hostname}:8000`; + if (mounted) setLabelStudioBase(baseUrl); } catch (e) { - if (mounted) setLabelStudioBase(null); + if (mounted) setLabelStudioBase(null); } })(); return () => { diff --git a/runtime/datamate-python/app/core/config.py b/runtime/datamate-python/app/core/config.py index 3371ccf..9b8da57 100644 --- a/runtime/datamate-python/app/core/config.py +++ b/runtime/datamate-python/app/core/config.py @@ -1,4 +1,5 @@ from pydantic_settings import BaseSettings +from pydantic import model_validator from typing import Optional, List import os from pathlib import Path @@ -17,124 +18,60 @@ class Settings(BaseSettings): app_name: str = "Label Studio Adapter" app_version: str = "1.0.0" app_description: str = "Adapter for integrating Data Management System with Label Studio" + + # 日志配置 + log_level: str = "INFO" debug: bool = True + log_file_dir: str = "/var/log/datamate" # 服务器配置 host: str = "0.0.0.0" port: int = 8000 # CORS配置 - allowed_origins: List[str] = ["*"] - allowed_methods: List[str] = ["*"] - allowed_headers: List[str] = ["*"] + # allowed_origins: List[str] = ["*"] + # allowed_methods: List[str] = ["*"] + # allowed_headers: List[str] = ["*"] # MySQL数据库配置 (优先级1) - mysql_host: Optional[str] = None + mysql_host: str = "datamate-database" mysql_port: int = 3306 - mysql_user: Optional[str] = None - mysql_password: Optional[str] = None - mysql_database: Optional[str] = None - - # PostgreSQL数据库配置 (优先级2) - postgres_host: Optional[str] = None - postgres_port: int = 5432 - postgres_user: Optional[str] = None - postgres_password: Optional[str] = None - postgres_database: Optional[str] = None - - # SQLite数据库配置 (优先级3 - 兜底) - sqlite_path: str = "data/labelstudio_adapter.db" + mysql_user: str = "root" + mysql_password: str = "password" + mysql_database: str = "datamate" # 直接数据库URL配置(如果提供,将覆盖上述配置) - database_url: Optional[str] = None + # 初始值为空字符串,在 model_validator 中会被设置为完整的 URL + database_url: str = "" + + @model_validator(mode='after') + def build_database_url(self): + """如果没有提供 database_url,则根据 MySQL 配置构建""" + if not self.database_url: + if self.mysql_password and self.mysql_user: + self.database_url = f"mysql+aiomysql://{self.mysql_user}:{self.mysql_password}@{self.mysql_host}:{self.mysql_port}/{self.mysql_database}" + else: + self.database_url = f"mysql+aiomysql://{self.mysql_host}:{self.mysql_port}/{self.mysql_database}" + return self - # 日志配置 - log_level: str = "DEBUG" # ========================= # Label Studio 服务配置 # ========================= - label_studio_base_url: str = "http://label-studio:8080" - label_studio_username: Optional[str] = None # Label Studio 用户名(用于登录) - label_studio_password: Optional[str] = None # Label Studio 密码(用于登录) - label_studio_user_token: Optional[str] = None # Legacy Token + label_studio_base_url: str = "http://label-studio:8000" + label_studio_username: Optional[str] = "admin@demo.com" # Label Studio 用户名(用于登录) + label_studio_password: Optional[str] = "demoadmin" # Label Studio 密码(用于登录) + label_studio_user_token: Optional[str] = "abc123abc123" # Legacy Token - label_studio_local_storage_dataset_base_path: str = "/label-studio/local_files" # Label Studio容器中的本地存储基础路径 + label_studio_local_storage_dataset_base_path: str = "/label-studio/local" # Label Studio容器中的本地存储基础路径 label_studio_file_path_prefix: str = "/data/local-files/?d=" # Label Studio本地文件服务路径前缀 ls_task_page_size: int = 1000 - # ========================= # Data Management 服务配置 # ========================= dm_file_path_prefix: str = "/dataset" # DM存储文件夹前缀 - - @property - def computed_database_url(self) -> str: - """ - 根据优先级自动选择数据库连接URL - 优先级:MySQL > PostgreSQL > SQLite3 - """ - # 如果直接提供了database_url,优先使用 - if self.database_url: - return self.database_url - - # 优先级1: MySQL - if all([self.mysql_host, self.mysql_user, self.mysql_password, self.mysql_database]): - return f"mysql+aiomysql://{self.mysql_user}:{self.mysql_password}@{self.mysql_host}:{self.mysql_port}/{self.mysql_database}" - - # 优先级2: PostgreSQL - if all([self.postgres_host, self.postgres_user, self.postgres_password, self.postgres_database]): - return f"postgresql+asyncpg://{self.postgres_user}:{self.postgres_password}@{self.postgres_host}:{self.postgres_port}/{self.postgres_database}" - - # 优先级3: SQLite (兜底) - sqlite_full_path = Path(self.sqlite_path).absolute() - # 确保目录存在 - sqlite_full_path.parent.mkdir(parents=True, exist_ok=True) - return f"sqlite+aiosqlite:///{sqlite_full_path}" - - @property - def sync_database_url(self) -> str: - """ - 用于数据库迁移的同步连接URL - 将异步驱动替换为同步驱动 - """ - async_url = self.computed_database_url - - # 替换异步驱动为同步驱动 - sync_replacements = { - "mysql+aiomysql://": "mysql+pymysql://", - "postgresql+asyncpg://": "postgresql+psycopg2://", - "sqlite+aiosqlite:///": "sqlite:///" - } - - for async_driver, sync_driver in sync_replacements.items(): - if async_url.startswith(async_driver): - return async_url.replace(async_driver, sync_driver) - - return async_url - - def get_database_info(self) -> dict: - """获取数据库配置信息""" - url = self.computed_database_url - - if url.startswith("mysql"): - db_type = "MySQL" - elif url.startswith("postgresql"): - db_type = "PostgreSQL" - elif url.startswith("sqlite"): - db_type = "SQLite" - else: - db_type = "Unknown" - - return { - "type": db_type, - "url": url, - "sync_url": self.sync_database_url - } - - # 全局设置实例 settings = Settings() diff --git a/runtime/datamate-python/app/core/logging.py b/runtime/datamate-python/app/core/logging.py index 6cb13aa..99e4825 100644 --- a/runtime/datamate-python/app/core/logging.py +++ b/runtime/datamate-python/app/core/logging.py @@ -3,52 +3,68 @@ import sys from pathlib import Path from app.core.config import settings +class CenteredLevelNameFormatter(logging.Formatter): + """Center the level name in the log output""" + + def format(self, record): + # 将 levelname 居中对齐到8个字符 + record.levelname = record.levelname.center(8) + return super().format(record) + def setup_logging(): - """配置应用程序日志""" - # 创建logs目录 - log_dir = Path("logs") - log_dir.mkdir(exist_ok=True) - - # 配置日志格式 - log_format = "%(asctime)s - %(name)s - [%(levelname)s] - %(message)s" + log_format = "%(asctime)s [%(levelname)s] - %(name)s - %(message)s" date_format = "%Y-%m-%d %H:%M:%S" - # 创建处理器 console_handler = logging.StreamHandler(sys.stdout) console_handler.setLevel(getattr(logging, settings.log_level.upper())) + log_dir = Path(settings.log_file_dir) + log_dir.mkdir(exist_ok=True) file_handler = logging.FileHandler( - log_dir / "app.log", + log_dir / "python-backend.log", encoding="utf-8" ) file_handler.setLevel(getattr(logging, settings.log_level.upper())) - error_handler = logging.FileHandler( - log_dir / "error.log", - encoding="utf-8" - ) - error_handler.setLevel(logging.ERROR) - - # 设置格式 - formatter = logging.Formatter(log_format, date_format) + # Style setting - Centered level names + formatter = CenteredLevelNameFormatter(log_format, date_format) console_handler.setFormatter(formatter) file_handler.setFormatter(formatter) - error_handler.setFormatter(formatter) + file_handler.setFormatter(formatter) - # 配置根日志器 + # Root Logger root_logger = logging.getLogger() root_logger.setLevel(getattr(logging, settings.log_level.upper())) root_logger.addHandler(console_handler) root_logger.addHandler(file_handler) - root_logger.addHandler(error_handler) + root_logger.addHandler(file_handler) - # 配置第三方库日志级别(减少详细日志) - logging.getLogger("uvicorn").setLevel(logging.ERROR) - logging.getLogger("sqlalchemy.engine").setLevel(logging.ERROR) # 隐藏SQL查询日志 + # Uvicorn + uvicorn_logger = logging.getLogger("uvicorn") + uvicorn_logger.handlers.clear() + uvicorn_logger.addHandler(console_handler) + uvicorn_logger.setLevel(logging.INFO) + + uvicorn_access = logging.getLogger("uvicorn.access") + uvicorn_access.handlers.clear() + uvicorn_access.addHandler(console_handler) + uvicorn_access.setLevel(logging.DEBUG) + + uvicorn_error = logging.getLogger("uvicorn.error") + uvicorn_error.handlers.clear() + uvicorn_error.addHandler(console_handler) + uvicorn_error.setLevel(logging.ERROR) + + # SQLAlchemy (ERROR only) + sqlalchemy_logger = logging.getLogger("sqlalchemy.engine") + sqlalchemy_logger.setLevel(logging.ERROR) + sqlalchemy_logger.addHandler(console_handler) + sqlalchemy_logger.setLevel(logging.ERROR) + + # Minimize noise from HTTPX and HTTPCore logging.getLogger("httpx").setLevel(logging.ERROR) logging.getLogger("httpcore").setLevel(logging.ERROR) def get_logger(name: str) -> logging.Logger: - """获取指定名称的日志器""" return logging.getLogger(name) \ No newline at end of file diff --git a/runtime/datamate-python/app/db/session.py b/runtime/datamate-python/app/db/session.py index 7195c8f..8ec2db8 100644 --- a/runtime/datamate-python/app/db/session.py +++ b/runtime/datamate-python/app/db/session.py @@ -6,17 +6,11 @@ from typing import AsyncGenerator logger = get_logger(__name__) -# 获取数据库配置信息 -db_info = settings.get_database_info() -logger.info(f"使用数据库: {db_info['type']} || 连接URL: {db_info['url']}") - # 创建数据库引擎 engine = create_async_engine( - settings.computed_database_url, + settings.database_url, echo=False, # 关闭SQL调试日志以减少输出 - future=True, - # SQLite特殊配置 - connect_args={"check_same_thread": False} if "sqlite" in settings.computed_database_url else {} + future=True ) # 创建会话工厂 diff --git a/runtime/datamate-python/app/main.py b/runtime/datamate-python/app/main.py index 374d399..bebad11 100644 --- a/runtime/datamate-python/app/main.py +++ b/runtime/datamate-python/app/main.py @@ -19,29 +19,32 @@ from .exception import ( general_exception_handler ) -# 设置日志 setup_logging() logger = get_logger(__name__) @asynccontextmanager async def lifespan(app: FastAPI): - """应用程序生命周期管理""" - # 启动时初始化 + # @startup logger.info("DataMate Python Backend starting...") - # 数据库连接验证 + + # Database connection validation try: async with AsyncSessionLocal() as session: await session.execute(text("SELECT 1")) - logger.info("Database connection validated successfully.") + logger.info(f"Database: mysql+aiomysql://{settings.mysql_user}:{settings.mysql_password}@{settings.mysql_host}:{settings.mysql_port}/{settings.mysql_database}") except Exception as e: logger.error(f"Database connection validation failed: {e}") - logger.debug(f"Connection details: {settings.computed_database_url}") + logger.debug(f"Connection details: {settings.database_url}") raise + # Label Studio + # TODO Add actual connectivity check if needed + logger.info(f"Label Studio: {settings.label_studio_base_url}") + yield - # 关闭时清理 + # @shutdown logger.info("DataMate Python Backend shutting down ...") # 创建FastAPI应用 @@ -53,19 +56,24 @@ app = FastAPI( lifespan=lifespan ) -# 配置CORS中间件 -app.add_middleware( - CORSMiddleware, - allow_origins=settings.allowed_origins, - allow_credentials=True, - allow_methods=settings.allowed_methods, - allow_headers=settings.allowed_headers, -) +# CORS Middleware +# app.add_middleware( +# CORSMiddleware, +# allow_origins=settings.allowed_origins, +# allow_credentials=True, +# allow_methods=settings.allowed_methods, +# allow_headers=settings.allowed_headers, +# ) # 注册路由 app.include_router(router) -logger.debug("Registered routes: %s", [getattr(r, "path", None) for r in app.routes]) +# 输出注册的路由(每行一个) +logger.debug("Registered routes:") +for route in app.routes: + route_path = getattr(route, "path", None) + if route_path: + logger.debug(f" {route_path}") # 注册全局异常处理器 app.add_exception_handler(StarletteHTTPException, starlette_http_exception_handler) # type: ignore diff --git a/runtime/datamate-python/app/module/annotation/interface/about.py b/runtime/datamate-python/app/module/annotation/interface/about.py index f411227..1e34feb 100644 --- a/runtime/datamate-python/app/module/annotation/interface/about.py +++ b/runtime/datamate-python/app/module/annotation/interface/about.py @@ -1,20 +1,10 @@ -from fastapi import APIRouter, Depends, HTTPException, Query -from sqlalchemy.ext.asyncio import AsyncSession -from typing import List, Optional +from fastapi import APIRouter -from app.db.session import get_db from app.module.shared.schema import StandardResponse -from app.module.dataset import DatasetManagementService from app.core.logging import get_logger from app.core.config import settings -from app.exception import NoDatasetInfoFoundError, DatasetMappingNotFoundError -from ..client import LabelStudioClient -from ..service.sync import SyncService -from ..service.mapping import DatasetMappingService -from ..schema import ( - ConfigResponse -) +from ..schema import ConfigResponse router = APIRouter( diff --git a/runtime/datamate-python/app/module/annotation/schema/mapping.py b/runtime/datamate-python/app/module/annotation/schema/mapping.py index 734d020..6103e38 100644 --- a/runtime/datamate-python/app/module/annotation/schema/mapping.py +++ b/runtime/datamate-python/app/module/annotation/schema/mapping.py @@ -21,7 +21,7 @@ class DatasetMappingCreateRequest(BaseModel): class Config: # allow population by field name when constructing model programmatically - allow_population_by_field_name = True + validate_by_name = True class DatasetMappingCreateResponse(BaseResponseModel): """数据集映射 创建 响应模型""" diff --git a/runtime/datamate-python/deploy/docker-entrypoint.sh b/runtime/datamate-python/deploy/docker-entrypoint.sh index 7487951..3193ebc 100755 --- a/runtime/datamate-python/deploy/docker-entrypoint.sh +++ b/runtime/datamate-python/deploy/docker-entrypoint.sh @@ -1,57 +1,16 @@ #!/bin/bash set -e -echo "==========================================" -echo "Label Studio Adapter Starting..." -echo "==========================================" - -# Label Studio 本地存储基础路径(从环境变量获取,默认值) -LABEL_STUDIO_LOCAL_BASE="${LABEL_STUDIO_LOCAL_BASE:-/label-studio/local_files}" - -echo "==========================================" -echo "Ensuring Label Studio local storage directories exist..." -echo "Base path: ${LABEL_STUDIO_LOCAL_BASE}" -echo "==========================================" - -# 创建必要的目录 -mkdir -p "${LABEL_STUDIO_LOCAL_BASE}/dataset" -mkdir -p "${LABEL_STUDIO_LOCAL_BASE}/upload" - -echo "✓ Directory 'dataset' ready: ${LABEL_STUDIO_LOCAL_BASE}/dataset" -echo "✓ Directory 'upload' ready: ${LABEL_STUDIO_LOCAL_BASE}/upload" - -echo "==========================================" -echo "Directory initialization completed" -echo "==========================================" - -# 等待数据库就绪(如果配置了数据库) -if [ -n "$MYSQL_HOST" ] || [ -n "$POSTGRES_HOST" ]; then - echo "Waiting for database to be ready..." - sleep 5 +if [-d $LOCAL_FILES_DOCUMENT_ROOT ] && $LOCAL_FILES_SERVING_ENABLED; then + echo "Using local document root: $LOCAL_FILES_DOCUMENT_ROOT" fi -# 运行数据库迁移 -echo "==========================================" -echo "Running database migrations..." -echo "==========================================" -alembic upgrade head - -if [ $? -eq 0 ]; then - echo "✓ Database migrations completed successfully" -else - echo "⚠️ WARNING: Database migrations failed" - echo " The application may not work correctly" -fi - -echo "==========================================" - # 启动应用 +echo "==========================================" echo "Starting Label Studio Adapter..." echo "Host: ${HOST:-0.0.0.0}" echo "Port: ${PORT:-18000}" echo "Debug: ${DEBUG:-false}" -echo "Label Studio URL: ${LABEL_STUDIO_BASE_URL}" -echo "==========================================" # 转换 LOG_LEVEL 为小写(uvicorn 要求小写) LOG_LEVEL_LOWER=$(echo "${LOG_LEVEL:-info}" | tr '[:upper:]' '[:lower:]') diff --git a/runtime/datamate-python/uvicorn_start.sh b/runtime/datamate-python/uvicorn_start.sh index 9535a34..fc8783c 100755 --- a/runtime/datamate-python/uvicorn_start.sh +++ b/runtime/datamate-python/uvicorn_start.sh @@ -1,5 +1,4 @@ uvicorn app.main:app \ --host 0.0.0.0 \ --port 18000 \ - --reload \ - --log-level debug \ No newline at end of file + --reload \ No newline at end of file diff --git a/scripts/images/datamate-python/Dockerfile b/scripts/images/datamate-python/Dockerfile new file mode 100644 index 0000000..03cdc88 --- /dev/null +++ b/scripts/images/datamate-python/Dockerfile @@ -0,0 +1,36 @@ +FROM python:3.11-slim + +# Single-stage image with build cache optimization using BuildKit cache mounts. +# Note: to use the cache mount syntax you must build with BuildKit enabled: +# DOCKER_BUILDKIT=1 docker build . -f scripts/images/datamate-python/Dockerfile -t datamate-backend-python + +ENV PYTHONDONTWRITEBYTECODE=1 \ + PYTHONUNBUFFERED=1 + +WORKDIR /app + +# Install build deps needed to compile some wheels. We'll remove them afterwards to keep the +# final image small. Use --no-install-recommends to minimize installed packages. +# RUN apt-get update \ +# && apt-get install -y --no-install-recommends \ +# build-essential \ +# gcc \ +# && apt-get purge -y --auto-remove build-essential gcc \ +# && rm -rf /var/lib/apt/lists/* + +# Copy requirements first (leverages layer caching when requirements don't change) +COPY runtime/datamate-python /app + +# Install Python deps. Use BuildKit cache mount for pip cache to speed subsequent builds. +# The --mount=type=cache requires BuildKit. This keeps downloaded wheels/cache out of the final image. +RUN --mount=type=cache,target=/root/.cache/pip \ + pip install --upgrade --root-user-action=ignore pip setuptools wheel \ + && pip install --root-user-action=ignore -r /app/requirements.txt + +COPY runtime/datamate-python/deploy/docker-entrypoint.sh /docker-entrypoint.sh +RUN chmod +x /docker-entrypoint.sh || true + +# Expose the application port +EXPOSE 18000 + +ENTRYPOINT ["/docker-entrypoint.sh"] \ No newline at end of file