feat: Labeling Frontend adaptations + Backend build and deploy + Logging improvement (#55)

* feat: Front-end data annotation page adaptation to the backend API.

* feat: Implement labeling configuration editor and enhance annotation task creation form

* feat: add python backend build and deployment; add backend configuration for Label Studio integration and improve logging setup

* refactor: remove duplicate log configuration
This commit is contained in:
Jason Wang
2025-11-05 01:55:53 +08:00
committed by GitHub
parent f3958f08d9
commit b5fe787c20
13 changed files with 190 additions and 210 deletions

View File

@@ -7,6 +7,20 @@ server {
client_max_body_size 1024M; client_max_body_size 1024M;
location /api/synthesis/ {
proxy_pass http://datamate-backend-python:18000/api/synthesis/;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
}
location /api/annotation/ {
proxy_pass http://datamate-backend-python:18000/api/annotation/;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
}
location /api/ { location /api/ {
proxy_pass http://datamate-backend:8080/api/; proxy_pass http://datamate-backend:8080/api/;
proxy_set_header Host $host; proxy_set_header Host $host;

View File

@@ -15,6 +15,22 @@ services:
depends_on: depends_on:
- datamate-database - datamate-database
# 1) backend (Python)
datamate-backend-python:
container_name: datamate-backend-python
image: datamate-backend-python
restart: on-failure
privileged: true
environment:
- log_level=DEBUG
volumes:
- dataset_volume:/dataset
- flow_volume:/flow
- log_volume:/var/log/datamate
networks: [ datamate ]
depends_on:
- datamate-database
# 2) frontend(NodePort 30000) # 2) frontend(NodePort 30000)
datamate-frontend: datamate-frontend:
container_name: datamate-frontend container_name: datamate-frontend
@@ -28,6 +44,7 @@ services:
networks: [ datamate ] networks: [ datamate ]
depends_on: depends_on:
- datamate-backend - datamate-backend
- datamate-backend-python
# 3) database # 3) database
datamate-database: datamate-database:
@@ -47,6 +64,8 @@ services:
- ../../../scripts/db:/docker-entrypoint-initdb.d - ../../../scripts/db:/docker-entrypoint-initdb.d
- ./utf8.cnf:/etc/mysql/conf.d/utf8.cnf:ro - ./utf8.cnf:/etc/mysql/conf.d/utf8.cnf:ro
- database_log_volume:/var/log/datamate/database - database_log_volume:/var/log/datamate/database
ports:
- "3306:3306"
networks: [ datamate ] networks: [ datamate ]
# 3) runtime # 3) runtime

View File

@@ -1,6 +1,6 @@
services: services:
app: label-studio:
stdin_open: true stdin_open: true
tty: true tty: true
image: heartexlabs/label-studio:latest image: heartexlabs/label-studio:latest
@@ -11,7 +11,7 @@ services:
ports: ports:
- "8000:8000" - "8000:8000"
depends_on: depends_on:
- db - pg-db
environment: environment:
- DJANGO_DB=default - DJANGO_DB=default
- POSTGRE_NAME=postgres - POSTGRE_NAME=postgres
@@ -23,17 +23,19 @@ services:
- LOCAL_FILES_SERVING_ENABLED=true - LOCAL_FILES_SERVING_ENABLED=true
- LOCAL_FILES_DOCUMENT_ROOT=/label-studio/local - LOCAL_FILES_DOCUMENT_ROOT=/label-studio/local
- USE_USERNAME_FOR_LOGIN=true - USE_USERNAME_FOR_LOGIN=true
- LABEL_STUDIO_USERNAME=admin@huawei.com - LABEL_STUDIO_USERNAME=admin@demo.com
- LABEL_STUDIO_PASSWORD=admin1234 - LABEL_STUDIO_PASSWORD=demoadmin
- LABEL_STUDIO_ENABLE_LEGACY_API_TOKEN=true - LABEL_STUDIO_ENABLE_LEGACY_API_TOKEN=true
- LABEL_STUDIO_USER_TOKEN=abc123abc123 - LABEL_STUDIO_USER_TOKEN=abc123abc123
- LOG_LEVEL=INFO - LOG_LEVEL=DEBUG
volumes: volumes:
- label-studio-data:/label-studio/data:rw - label-studio-data:/label-studio/data:rw
- dataset_volume:/label-studio/local:rw - dataset_volume:/label-studio/local:rw
networks:
- datamate
command: label-studio-uwsgi command: label-studio-uwsgi
db: pg-db:
image: pgautoupgrade/pgautoupgrade:13-alpine image: pgautoupgrade/pgautoupgrade:13-alpine
hostname: db hostname: db
restart: unless-stopped restart: unless-stopped
@@ -42,9 +44,16 @@ services:
- POSTGRES_USER=postgres - POSTGRES_USER=postgres
volumes: volumes:
- label-studio-db:/var/lib/postgresql/data - label-studio-db:/var/lib/postgresql/data
networks:
- datamate
volumes: volumes:
label-studio-data: label-studio-data:
label-studio-db: label-studio-db:
dataset_volume: dataset_volume:
name: datamate-dataset-volume name: datamate-dataset-volume
networks:
datamate:
driver: bridge
name: datamate-network

View File

@@ -47,11 +47,10 @@ export default function DataAnnotation() {
let mounted = true; let mounted = true;
(async () => { (async () => {
try { try {
const cfg = await getConfigUsingGet(); const baseUrl = `http://${window.location.hostname}:8000`;
const url = cfg?.data?.labelStudioUrl || ""; if (mounted) setLabelStudioBase(baseUrl);
if (mounted) setLabelStudioBase((url).replace(/\/+$/, "") || null);
} catch (e) { } catch (e) {
if (mounted) setLabelStudioBase(null); if (mounted) setLabelStudioBase(null);
} }
})(); })();
return () => { return () => {

View File

@@ -1,4 +1,5 @@
from pydantic_settings import BaseSettings from pydantic_settings import BaseSettings
from pydantic import model_validator
from typing import Optional, List from typing import Optional, List
import os import os
from pathlib import Path from pathlib import Path
@@ -17,124 +18,60 @@ class Settings(BaseSettings):
app_name: str = "Label Studio Adapter" app_name: str = "Label Studio Adapter"
app_version: str = "1.0.0" app_version: str = "1.0.0"
app_description: str = "Adapter for integrating Data Management System with Label Studio" app_description: str = "Adapter for integrating Data Management System with Label Studio"
# 日志配置
log_level: str = "INFO"
debug: bool = True debug: bool = True
log_file_dir: str = "/var/log/datamate"
# 服务器配置 # 服务器配置
host: str = "0.0.0.0" host: str = "0.0.0.0"
port: int = 8000 port: int = 8000
# CORS配置 # CORS配置
allowed_origins: List[str] = ["*"] # allowed_origins: List[str] = ["*"]
allowed_methods: List[str] = ["*"] # allowed_methods: List[str] = ["*"]
allowed_headers: List[str] = ["*"] # allowed_headers: List[str] = ["*"]
# MySQL数据库配置 (优先级1) # MySQL数据库配置 (优先级1)
mysql_host: Optional[str] = None mysql_host: str = "datamate-database"
mysql_port: int = 3306 mysql_port: int = 3306
mysql_user: Optional[str] = None mysql_user: str = "root"
mysql_password: Optional[str] = None mysql_password: str = "password"
mysql_database: Optional[str] = None mysql_database: str = "datamate"
# PostgreSQL数据库配置 (优先级2)
postgres_host: Optional[str] = None
postgres_port: int = 5432
postgres_user: Optional[str] = None
postgres_password: Optional[str] = None
postgres_database: Optional[str] = None
# SQLite数据库配置 (优先级3 - 兜底)
sqlite_path: str = "data/labelstudio_adapter.db"
# 直接数据库URL配置(如果提供,将覆盖上述配置) # 直接数据库URL配置(如果提供,将覆盖上述配置)
database_url: Optional[str] = None # 初始值为空字符串,在 model_validator 中会被设置为完整的 URL
database_url: str = ""
@model_validator(mode='after')
def build_database_url(self):
"""如果没有提供 database_url,则根据 MySQL 配置构建"""
if not self.database_url:
if self.mysql_password and self.mysql_user:
self.database_url = f"mysql+aiomysql://{self.mysql_user}:{self.mysql_password}@{self.mysql_host}:{self.mysql_port}/{self.mysql_database}"
else:
self.database_url = f"mysql+aiomysql://{self.mysql_host}:{self.mysql_port}/{self.mysql_database}"
return self
# 日志配置
log_level: str = "DEBUG"
# ========================= # =========================
# Label Studio 服务配置 # Label Studio 服务配置
# ========================= # =========================
label_studio_base_url: str = "http://label-studio:8080" label_studio_base_url: str = "http://label-studio:8000"
label_studio_username: Optional[str] = None # Label Studio 用户名(用于登录) label_studio_username: Optional[str] = "admin@demo.com" # Label Studio 用户名(用于登录)
label_studio_password: Optional[str] = None # Label Studio 密码(用于登录) label_studio_password: Optional[str] = "demoadmin" # Label Studio 密码(用于登录)
label_studio_user_token: Optional[str] = None # Legacy Token label_studio_user_token: Optional[str] = "abc123abc123" # Legacy Token
label_studio_local_storage_dataset_base_path: str = "/label-studio/local_files" # Label Studio容器中的本地存储基础路径 label_studio_local_storage_dataset_base_path: str = "/label-studio/local" # Label Studio容器中的本地存储基础路径
label_studio_file_path_prefix: str = "/data/local-files/?d=" # Label Studio本地文件服务路径前缀 label_studio_file_path_prefix: str = "/data/local-files/?d=" # Label Studio本地文件服务路径前缀
ls_task_page_size: int = 1000 ls_task_page_size: int = 1000
# ========================= # =========================
# Data Management 服务配置 # Data Management 服务配置
# ========================= # =========================
dm_file_path_prefix: str = "/dataset" # DM存储文件夹前缀 dm_file_path_prefix: str = "/dataset" # DM存储文件夹前缀
@property
def computed_database_url(self) -> str:
"""
根据优先级自动选择数据库连接URL
优先级:MySQL > PostgreSQL > SQLite3
"""
# 如果直接提供了database_url,优先使用
if self.database_url:
return self.database_url
# 优先级1: MySQL
if all([self.mysql_host, self.mysql_user, self.mysql_password, self.mysql_database]):
return f"mysql+aiomysql://{self.mysql_user}:{self.mysql_password}@{self.mysql_host}:{self.mysql_port}/{self.mysql_database}"
# 优先级2: PostgreSQL
if all([self.postgres_host, self.postgres_user, self.postgres_password, self.postgres_database]):
return f"postgresql+asyncpg://{self.postgres_user}:{self.postgres_password}@{self.postgres_host}:{self.postgres_port}/{self.postgres_database}"
# 优先级3: SQLite (兜底)
sqlite_full_path = Path(self.sqlite_path).absolute()
# 确保目录存在
sqlite_full_path.parent.mkdir(parents=True, exist_ok=True)
return f"sqlite+aiosqlite:///{sqlite_full_path}"
@property
def sync_database_url(self) -> str:
"""
用于数据库迁移的同步连接URL
将异步驱动替换为同步驱动
"""
async_url = self.computed_database_url
# 替换异步驱动为同步驱动
sync_replacements = {
"mysql+aiomysql://": "mysql+pymysql://",
"postgresql+asyncpg://": "postgresql+psycopg2://",
"sqlite+aiosqlite:///": "sqlite:///"
}
for async_driver, sync_driver in sync_replacements.items():
if async_url.startswith(async_driver):
return async_url.replace(async_driver, sync_driver)
return async_url
def get_database_info(self) -> dict:
"""获取数据库配置信息"""
url = self.computed_database_url
if url.startswith("mysql"):
db_type = "MySQL"
elif url.startswith("postgresql"):
db_type = "PostgreSQL"
elif url.startswith("sqlite"):
db_type = "SQLite"
else:
db_type = "Unknown"
return {
"type": db_type,
"url": url,
"sync_url": self.sync_database_url
}
# 全局设置实例 # 全局设置实例
settings = Settings() settings = Settings()

View File

@@ -3,52 +3,68 @@ import sys
from pathlib import Path from pathlib import Path
from app.core.config import settings from app.core.config import settings
class CenteredLevelNameFormatter(logging.Formatter):
"""Center the level name in the log output"""
def format(self, record):
# 将 levelname 居中对齐到8个字符
record.levelname = record.levelname.center(8)
return super().format(record)
def setup_logging(): def setup_logging():
"""配置应用程序日志"""
# 创建logs目录 log_format = "%(asctime)s [%(levelname)s] - %(name)s - %(message)s"
log_dir = Path("logs")
log_dir.mkdir(exist_ok=True)
# 配置日志格式
log_format = "%(asctime)s - %(name)s - [%(levelname)s] - %(message)s"
date_format = "%Y-%m-%d %H:%M:%S" date_format = "%Y-%m-%d %H:%M:%S"
# 创建处理器
console_handler = logging.StreamHandler(sys.stdout) console_handler = logging.StreamHandler(sys.stdout)
console_handler.setLevel(getattr(logging, settings.log_level.upper())) console_handler.setLevel(getattr(logging, settings.log_level.upper()))
log_dir = Path(settings.log_file_dir)
log_dir.mkdir(exist_ok=True)
file_handler = logging.FileHandler( file_handler = logging.FileHandler(
log_dir / "app.log", log_dir / "python-backend.log",
encoding="utf-8" encoding="utf-8"
) )
file_handler.setLevel(getattr(logging, settings.log_level.upper())) file_handler.setLevel(getattr(logging, settings.log_level.upper()))
error_handler = logging.FileHandler( # Style setting - Centered level names
log_dir / "error.log", formatter = CenteredLevelNameFormatter(log_format, date_format)
encoding="utf-8"
)
error_handler.setLevel(logging.ERROR)
# 设置格式
formatter = logging.Formatter(log_format, date_format)
console_handler.setFormatter(formatter) console_handler.setFormatter(formatter)
file_handler.setFormatter(formatter) file_handler.setFormatter(formatter)
error_handler.setFormatter(formatter) file_handler.setFormatter(formatter)
# 配置根日志器 # Root Logger
root_logger = logging.getLogger() root_logger = logging.getLogger()
root_logger.setLevel(getattr(logging, settings.log_level.upper())) root_logger.setLevel(getattr(logging, settings.log_level.upper()))
root_logger.addHandler(console_handler) root_logger.addHandler(console_handler)
root_logger.addHandler(file_handler) root_logger.addHandler(file_handler)
root_logger.addHandler(error_handler) root_logger.addHandler(file_handler)
# 配置第三方库日志级别(减少详细日志) # Uvicorn
logging.getLogger("uvicorn").setLevel(logging.ERROR) uvicorn_logger = logging.getLogger("uvicorn")
logging.getLogger("sqlalchemy.engine").setLevel(logging.ERROR) # 隐藏SQL查询日志 uvicorn_logger.handlers.clear()
uvicorn_logger.addHandler(console_handler)
uvicorn_logger.setLevel(logging.INFO)
uvicorn_access = logging.getLogger("uvicorn.access")
uvicorn_access.handlers.clear()
uvicorn_access.addHandler(console_handler)
uvicorn_access.setLevel(logging.DEBUG)
uvicorn_error = logging.getLogger("uvicorn.error")
uvicorn_error.handlers.clear()
uvicorn_error.addHandler(console_handler)
uvicorn_error.setLevel(logging.ERROR)
# SQLAlchemy (ERROR only)
sqlalchemy_logger = logging.getLogger("sqlalchemy.engine")
sqlalchemy_logger.setLevel(logging.ERROR)
sqlalchemy_logger.addHandler(console_handler)
sqlalchemy_logger.setLevel(logging.ERROR)
# Minimize noise from HTTPX and HTTPCore
logging.getLogger("httpx").setLevel(logging.ERROR) logging.getLogger("httpx").setLevel(logging.ERROR)
logging.getLogger("httpcore").setLevel(logging.ERROR) logging.getLogger("httpcore").setLevel(logging.ERROR)
def get_logger(name: str) -> logging.Logger: def get_logger(name: str) -> logging.Logger:
"""获取指定名称的日志器"""
return logging.getLogger(name) return logging.getLogger(name)

View File

@@ -6,17 +6,11 @@ from typing import AsyncGenerator
logger = get_logger(__name__) logger = get_logger(__name__)
# 获取数据库配置信息
db_info = settings.get_database_info()
logger.info(f"使用数据库: {db_info['type']} || 连接URL: {db_info['url']}")
# 创建数据库引擎 # 创建数据库引擎
engine = create_async_engine( engine = create_async_engine(
settings.computed_database_url, settings.database_url,
echo=False, # 关闭SQL调试日志以减少输出 echo=False, # 关闭SQL调试日志以减少输出
future=True, future=True
# SQLite特殊配置
connect_args={"check_same_thread": False} if "sqlite" in settings.computed_database_url else {}
) )
# 创建会话工厂 # 创建会话工厂

View File

@@ -19,29 +19,32 @@ from .exception import (
general_exception_handler general_exception_handler
) )
# 设置日志
setup_logging() setup_logging()
logger = get_logger(__name__) logger = get_logger(__name__)
@asynccontextmanager @asynccontextmanager
async def lifespan(app: FastAPI): async def lifespan(app: FastAPI):
"""应用程序生命周期管理"""
# 启动时初始化 # @startup
logger.info("DataMate Python Backend starting...") logger.info("DataMate Python Backend starting...")
# 数据库连接验证
# Database connection validation
try: try:
async with AsyncSessionLocal() as session: async with AsyncSessionLocal() as session:
await session.execute(text("SELECT 1")) await session.execute(text("SELECT 1"))
logger.info("Database connection validated successfully.") logger.info(f"Database: mysql+aiomysql://{settings.mysql_user}:{settings.mysql_password}@{settings.mysql_host}:{settings.mysql_port}/{settings.mysql_database}")
except Exception as e: except Exception as e:
logger.error(f"Database connection validation failed: {e}") logger.error(f"Database connection validation failed: {e}")
logger.debug(f"Connection details: {settings.computed_database_url}") logger.debug(f"Connection details: {settings.database_url}")
raise raise
# Label Studio
# TODO Add actual connectivity check if needed
logger.info(f"Label Studio: {settings.label_studio_base_url}")
yield yield
# 关闭时清理 # @shutdown
logger.info("DataMate Python Backend shutting down ...") logger.info("DataMate Python Backend shutting down ...")
# 创建FastAPI应用 # 创建FastAPI应用
@@ -53,19 +56,24 @@ app = FastAPI(
lifespan=lifespan lifespan=lifespan
) )
# 配置CORS中间件 # CORS Middleware
app.add_middleware( # app.add_middleware(
CORSMiddleware, # CORSMiddleware,
allow_origins=settings.allowed_origins, # allow_origins=settings.allowed_origins,
allow_credentials=True, # allow_credentials=True,
allow_methods=settings.allowed_methods, # allow_methods=settings.allowed_methods,
allow_headers=settings.allowed_headers, # allow_headers=settings.allowed_headers,
) # )
# 注册路由 # 注册路由
app.include_router(router) app.include_router(router)
logger.debug("Registered routes: %s", [getattr(r, "path", None) for r in app.routes]) # 输出注册的路由(每行一个)
logger.debug("Registered routes:")
for route in app.routes:
route_path = getattr(route, "path", None)
if route_path:
logger.debug(f" {route_path}")
# 注册全局异常处理器 # 注册全局异常处理器
app.add_exception_handler(StarletteHTTPException, starlette_http_exception_handler) # type: ignore app.add_exception_handler(StarletteHTTPException, starlette_http_exception_handler) # type: ignore

View File

@@ -1,20 +1,10 @@
from fastapi import APIRouter, Depends, HTTPException, Query from fastapi import APIRouter
from sqlalchemy.ext.asyncio import AsyncSession
from typing import List, Optional
from app.db.session import get_db
from app.module.shared.schema import StandardResponse from app.module.shared.schema import StandardResponse
from app.module.dataset import DatasetManagementService
from app.core.logging import get_logger from app.core.logging import get_logger
from app.core.config import settings from app.core.config import settings
from app.exception import NoDatasetInfoFoundError, DatasetMappingNotFoundError
from ..client import LabelStudioClient from ..schema import ConfigResponse
from ..service.sync import SyncService
from ..service.mapping import DatasetMappingService
from ..schema import (
ConfigResponse
)
router = APIRouter( router = APIRouter(

View File

@@ -21,7 +21,7 @@ class DatasetMappingCreateRequest(BaseModel):
class Config: class Config:
# allow population by field name when constructing model programmatically # allow population by field name when constructing model programmatically
allow_population_by_field_name = True validate_by_name = True
class DatasetMappingCreateResponse(BaseResponseModel): class DatasetMappingCreateResponse(BaseResponseModel):
"""数据集映射 创建 响应模型""" """数据集映射 创建 响应模型"""

View File

@@ -1,57 +1,16 @@
#!/bin/bash #!/bin/bash
set -e set -e
echo "==========================================" if [-d $LOCAL_FILES_DOCUMENT_ROOT ] && $LOCAL_FILES_SERVING_ENABLED; then
echo "Label Studio Adapter Starting..." echo "Using local document root: $LOCAL_FILES_DOCUMENT_ROOT"
echo "=========================================="
# Label Studio 本地存储基础路径(从环境变量获取,默认值)
LABEL_STUDIO_LOCAL_BASE="${LABEL_STUDIO_LOCAL_BASE:-/label-studio/local_files}"
echo "=========================================="
echo "Ensuring Label Studio local storage directories exist..."
echo "Base path: ${LABEL_STUDIO_LOCAL_BASE}"
echo "=========================================="
# 创建必要的目录
mkdir -p "${LABEL_STUDIO_LOCAL_BASE}/dataset"
mkdir -p "${LABEL_STUDIO_LOCAL_BASE}/upload"
echo "✓ Directory 'dataset' ready: ${LABEL_STUDIO_LOCAL_BASE}/dataset"
echo "✓ Directory 'upload' ready: ${LABEL_STUDIO_LOCAL_BASE}/upload"
echo "=========================================="
echo "Directory initialization completed"
echo "=========================================="
# 等待数据库就绪(如果配置了数据库)
if [ -n "$MYSQL_HOST" ] || [ -n "$POSTGRES_HOST" ]; then
echo "Waiting for database to be ready..."
sleep 5
fi fi
# 运行数据库迁移
echo "=========================================="
echo "Running database migrations..."
echo "=========================================="
alembic upgrade head
if [ $? -eq 0 ]; then
echo "✓ Database migrations completed successfully"
else
echo "⚠️ WARNING: Database migrations failed"
echo " The application may not work correctly"
fi
echo "=========================================="
# 启动应用 # 启动应用
echo "=========================================="
echo "Starting Label Studio Adapter..." echo "Starting Label Studio Adapter..."
echo "Host: ${HOST:-0.0.0.0}" echo "Host: ${HOST:-0.0.0.0}"
echo "Port: ${PORT:-18000}" echo "Port: ${PORT:-18000}"
echo "Debug: ${DEBUG:-false}" echo "Debug: ${DEBUG:-false}"
echo "Label Studio URL: ${LABEL_STUDIO_BASE_URL}"
echo "=========================================="
# 转换 LOG_LEVEL 为小写(uvicorn 要求小写) # 转换 LOG_LEVEL 为小写(uvicorn 要求小写)
LOG_LEVEL_LOWER=$(echo "${LOG_LEVEL:-info}" | tr '[:upper:]' '[:lower:]') LOG_LEVEL_LOWER=$(echo "${LOG_LEVEL:-info}" | tr '[:upper:]' '[:lower:]')

View File

@@ -1,5 +1,4 @@
uvicorn app.main:app \ uvicorn app.main:app \
--host 0.0.0.0 \ --host 0.0.0.0 \
--port 18000 \ --port 18000 \
--reload \ --reload
--log-level debug

View File

@@ -0,0 +1,36 @@
FROM python:3.11-slim
# Single-stage image with build cache optimization using BuildKit cache mounts.
# Note: to use the cache mount syntax you must build with BuildKit enabled:
# DOCKER_BUILDKIT=1 docker build . -f scripts/images/datamate-python/Dockerfile -t datamate-backend-python
ENV PYTHONDONTWRITEBYTECODE=1 \
PYTHONUNBUFFERED=1
WORKDIR /app
# Install build deps needed to compile some wheels. We'll remove them afterwards to keep the
# final image small. Use --no-install-recommends to minimize installed packages.
# RUN apt-get update \
# && apt-get install -y --no-install-recommends \
# build-essential \
# gcc \
# && apt-get purge -y --auto-remove build-essential gcc \
# && rm -rf /var/lib/apt/lists/*
# Copy requirements first (leverages layer caching when requirements don't change)
COPY runtime/datamate-python /app
# Install Python deps. Use BuildKit cache mount for pip cache to speed subsequent builds.
# The --mount=type=cache requires BuildKit. This keeps downloaded wheels/cache out of the final image.
RUN --mount=type=cache,target=/root/.cache/pip \
pip install --upgrade --root-user-action=ignore pip setuptools wheel \
&& pip install --root-user-action=ignore -r /app/requirements.txt
COPY runtime/datamate-python/deploy/docker-entrypoint.sh /docker-entrypoint.sh
RUN chmod +x /docker-entrypoint.sh || true
# Expose the application port
EXPOSE 18000
ENTRYPOINT ["/docker-entrypoint.sh"]