Files
DataMate/scripts/images/label-studio/Dockerfile
2026-01-09 10:10:58 +08:00

203 lines
8.4 KiB
Docker

ARG NODE_VERSION=22
ARG PYTHON_VERSION=3.13
ARG POETRY_VERSION=2.1.4
ARG VERSION_OVERRIDE
ARG BRANCH_OVERRIDE
ARG GIT_REPO=https://ghproxy.net/https://github.com/ModelEngine-Group/Label-Studio.git
ARG GIT_BRANCH=ls-release/1.21.0.wsi
################################ Stage: downloader (Source Code)
FROM alpine/git AS downloader
ARG GIT_REPO
ARG GIT_BRANCH
WORKDIR /label-studio
RUN git clone --branch ${GIT_BRANCH} ${GIT_REPO} .
################################ Overview
# This Dockerfile builds a Label Studio environment.
# It consists of five main stages:
# 1. "frontend-builder" - Compiles the frontend assets using Node.
# 2. "frontend-version-generator" - Generates version files for frontend sources.
# 3. "venv-builder" - Prepares the virtualenv environment.
# 4. "py-version-generator" - Generates version files for python sources.
# 5. "prod" - Creates the final production image with the Label Studio, Nginx, and other dependencies.
################################ Stage: frontend-builder (build frontend assets)
FROM --platform=${BUILDPLATFORM} node:${NODE_VERSION}-trixie AS frontend-builder
ENV BUILD_NO_SERVER=true \
BUILD_NO_HASH=true \
BUILD_NO_CHUNKS=true \
BUILD_MODULE=true \
YARN_CACHE_FOLDER=/root/web/.yarn \
NX_CACHE_DIRECTORY=/root/web/.nx \
NODE_ENV=production
WORKDIR /label-studio/web
# 配置 yarn 淘宝镜像
RUN yarn config set registry https://registry.npmmirror.com/
RUN yarn config set network-timeout 1200000 # HTTP timeout used when downloading packages, set to 20 minutes
# [修改] 从 downloader 阶段复制 package.json 和 lock 文件
COPY --from=downloader /label-studio/web/package.json .
COPY --from=downloader /label-studio/web/yarn.lock .
COPY --from=downloader /label-studio/web/tools tools
RUN --mount=type=cache,target=/root/web/.yarn,id=yarn-cache,sharing=locked \
--mount=type=cache,target=/root/web/.nx,id=nx-cache,sharing=locked \
yarn install --prefer-offline --no-progress --pure-lockfile --frozen-lockfile --ignore-engines --non-interactive --production=false
# [修改] 从 downloader 阶段复制前端源码
COPY --from=downloader /label-studio/web/ .
# [修改] 从 downloader 阶段复制 pyproject.toml
COPY --from=downloader /label-studio/pyproject.toml ../pyproject.toml
RUN --mount=type=cache,target=/root/web/.yarn,id=yarn-cache,sharing=locked \
--mount=type=cache,target=/root/web/.nx,id=nx-cache,sharing=locked \
yarn run build
################################ Stage: frontend-version-generator
FROM frontend-builder AS frontend-version-generator
# [修改] 之前是 bind mount 本地 .git,现在需要将 downloader 的 .git 目录复制进来
# 注意:WORKDIR 目前是 /label-studio/web,所以我们将 git 放在上级目录以匹配原逻辑
COPY --from=downloader /label-studio/.git ../.git
RUN --mount=type=cache,target=/root/web/.yarn,id=yarn-cache,sharing=locked \
--mount=type=cache,target=/root/web/.nx,id=nx-cache,sharing=locked \
yarn version:libs
################################ Stage: venv-builder (prepare the virtualenv)
FROM python:${PYTHON_VERSION}-slim-trixie AS venv-builder
ARG POETRY_VERSION
ENV PYTHONUNBUFFERED=1 \
PYTHONDONTWRITEBYTECODE=1 \
PIP_NO_CACHE_DIR=off \
PIP_DISABLE_PIP_VERSION_CHECK=on \
PIP_DEFAULT_TIMEOUT=100 \
PIP_CACHE_DIR="/.cache" \
POETRY_CACHE_DIR="/.poetry-cache" \
POETRY_HOME="/opt/poetry" \
POETRY_VIRTUALENVS_IN_PROJECT=true \
POETRY_VIRTUALENVS_PREFER_ACTIVE_PYTHON=true \
PATH="/opt/poetry/bin:$PATH"
# 配置 pip 阿里云镜像
RUN pip config set global.index-url https://mirrors.aliyun.com/pypi/simple/ && \
pip config set global.trusted-host mirrors.aliyun.com
ADD https://install.python-poetry.org /tmp/install-poetry.py
RUN python /tmp/install-poetry.py
# 配置 apt 阿里云镜像源
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
--mount=type=cache,target=/var/lib/apt/lists,sharing=locked \
set -eux; \
if [ -f /etc/apt/sources.list.d/debian.sources ]; then \
sed -i 's/deb.debian.org/mirrors.aliyun.com/g' /etc/apt/sources.list.d/debian.sources; \
elif [ -f /etc/apt/sources.list ]; then \
sed -i 's/deb.debian.org/mirrors.aliyun.com/g' /etc/apt/sources.list; \
fi; \
apt-get update; \
apt-get install --no-install-recommends -y \
build-essential git; \
apt-get autoremove -y
WORKDIR /label-studio
ENV VENV_PATH="/label-studio/.venv"
ENV PATH="$VENV_PATH/bin:$PATH"
## Starting from this line all packages will be installed in $VENV_PATH
COPY --from=downloader /label-studio/pyproject.toml /label-studio/poetry.lock /label-studio/README.md ./
# Set a default build argument for including dev dependencies
ARG INCLUDE_DEV=false
# Install dependencies
RUN --mount=type=cache,target=/.poetry-cache,id=poetry-cache,sharing=locked \
poetry check --lock && \
if [ "$INCLUDE_DEV" = "true" ]; then \
poetry install --no-root --extras uwsgi --with test; \
else \
poetry install --no-root --without test --extras uwsgi; \
fi
# Install LS
COPY --from=downloader /label-studio/label_studio label_studio
RUN --mount=type=cache,target=/.poetry-cache,id=poetry-cache,sharing=locked \
poetry install --only-root --extras uwsgi && \
python3 label_studio/manage.py collectstatic --no-input
################################ Stage: py-version-generator
FROM venv-builder AS py-version-generator
ARG VERSION_OVERRIDE
ARG BRANCH_OVERRIDE
COPY --from=downloader /label-studio/.git ./.git
# Create version_.py and ls-version_.py
RUN VERSION_OVERRIDE=${VERSION_OVERRIDE} BRANCH_OVERRIDE=${BRANCH_OVERRIDE} poetry run python label_studio/core/version.py
################################### Stage: prod
FROM python:${PYTHON_VERSION}-slim-trixie AS production
ENV LS_DIR=/label-studio \
HOME=/label-studio \
LABEL_STUDIO_BASE_DATA_DIR=/label-studio/data \
OPT_DIR=/opt/heartex/instance-data/etc \
PATH="/label-studio/.venv/bin:$PATH" \
DJANGO_SETTINGS_MODULE=core.settings.label_studio \
PYTHONUNBUFFERED=1 \
PYTHONDONTWRITEBYTECODE=1
WORKDIR $LS_DIR
# 配置 apt 阿里云镜像源并安装应用依赖
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
--mount=type=cache,target=/var/lib/apt/lists,sharing=locked \
set -eux; \
if [ -f /etc/apt/sources.list.d/debian.sources ]; then \
sed -i 's/deb.debian.org/mirrors.aliyun.com/g' /etc/apt/sources.list.d/debian.sources; \
elif [ -f /etc/apt/sources.list ]; then \
sed -i 's/deb.debian.org/mirrors.aliyun.com/g' /etc/apt/sources.list; \
fi; \
apt-get update; \
apt-get upgrade -y; \
apt-get install --no-install-recommends -y libexpat1 libgl1 libglx-mesa0 libglib2.0-0t64 \
gnupg2 curl nginx \
libopenjp2-7 libnuma1 libspeex1 libvdpau1 libgomp1; \
apt-get autoremove -y
RUN set -eux; \
mkdir -p $LS_DIR $LABEL_STUDIO_BASE_DATA_DIR $OPT_DIR && \
chown -R 1001:0 $LS_DIR $LABEL_STUDIO_BASE_DATA_DIR $OPT_DIR /var/log/nginx /etc/nginx
COPY --chown=1001:0 --from=downloader /label-studio/deploy/default.conf /etc/nginx/nginx.conf
# Copy essential files for installing Label Studio and its dependencies
COPY --chown=1001:0 --from=downloader /label-studio/pyproject.toml .
COPY --chown=1001:0 --from=downloader /label-studio/poetry.lock .
COPY --chown=1001:0 --from=downloader /label-studio/README.md .
COPY --chown=1001:0 --from=downloader /label-studio/LICENSE LICENSE
COPY --chown=1001:0 --from=downloader /label-studio/licenses licenses
COPY --chown=1001:0 --from=downloader /label-studio/deploy deploy
# Copy files from build stages
COPY --chown=1001:0 --from=venv-builder $LS_DIR $LS_DIR
COPY --chown=1001:0 --from=py-version-generator $LS_DIR/label_studio/core/version_.py $LS_DIR/label_studio/core/version_.py
COPY --chown=1001:0 --from=frontend-builder $LS_DIR/web/dist $LS_DIR/web/dist
COPY --chown=1001:0 --from=frontend-version-generator $LS_DIR/web/dist/apps/labelstudio/version.json $LS_DIR/web/dist/apps/labelstudio/version.json
COPY --chown=1001:0 --from=frontend-version-generator $LS_DIR/web/dist/libs/editor/version.json $LS_DIR/web/dist/libs/editor/version.json
COPY --chown=1001:0 --from=frontend-version-generator $LS_DIR/web/dist/libs/datamanager/version.json $LS_DIR/web/dist/libs/datamanager/version.json
USER 1001
EXPOSE 8080
ENTRYPOINT ["./deploy/docker-entrypoint.sh"]
CMD ["label-studio"]