Files
DataMate/scripts/offline/Dockerfile.backend-python.offline-v2
Jerry Yan da5b18e423 feat(scripts): 添加 APT 缓存预装功能解决离线构建问题
- 新增 APT 缓存目录和相关构建脚本 export-cache.sh
- 添加 build-base-images.sh 脚本用于构建预装 APT 包的基础镜像
- 增加 build-offline-final.sh 最终版离线构建脚本
- 更新 Makefile.offline.mk 添加新的离线构建目标
- 扩展 README.md 文档详细说明 APT 缓存问题解决方案
- 为多个服务添加使用预装基础镜像的离线 Dockerfile
- 修改打包脚本包含 APT 缓存到最终压缩包中
2026-02-03 13:16:17 +08:00

83 lines
2.8 KiB
Docker

# backend-python Dockerfile 离线版本 v2
FROM maven:3-eclipse-temurin-8 AS datax-builder
# 配置 Maven 阿里云镜像
RUN mkdir -p /root/.m2 && \
echo '<?xml version="1.0" encoding="UTF-8"?>\n\
<settings xmlns="http://maven.apache.org/SETTINGS/1.0.0"\n\
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"\n\
xsi:schemaLocation="http://maven.apache.org/SETTINGS/1.0.0 http://maven.apache.org/xsd/settings-1.0.0.xsd">\n\
<mirrors>\n\
<mirror>\n\
<id>aliyunmaven</id>\n\
<mirrorOf>*</mirrorOf>\n\
<name>阿里云公共仓库</name>\n\
<url>https://maven.aliyun.com/repository/public</url>\n\
</mirror>\n\
</mirrors>\n\
</settings>' > /root/.m2/settings.xml
# 离线模式: 从构建参数获取本地 DataX 路径
ARG RESOURCES_DIR=./build-cache/resources
ARG DATAX_LOCAL_PATH=${RESOURCES_DIR}/DataX
# 复制本地 DataX 源码
COPY ${DATAX_LOCAL_PATH} /DataX
COPY runtime/datax/ DataX/
RUN cd DataX && \
sed -i "s/com.mysql.jdbc.Driver/com.mysql.cj.jdbc.Driver/g" \
plugin-rdbms-util/src/main/java/com/alibaba/datax/plugin/rdbms/util/DataBaseType.java && \
mvn -U clean package assembly:assembly -Dmaven.test.skip=true
# 使用预装 APT 包的基础镜像
FROM datamate-python-base:latest
ENV PYTHONDONTWRITEBYTECODE=1 \
PYTHONUNBUFFERED=1 \
POETRY_VERSION=2.2.1 \
POETRY_NO_INTERACTION=1 \
POETRY_VIRTUALENVS_CREATE=false \
POETRY_CACHE_DIR=/tmp/poetry_cache
ENV JAVA_HOME=/usr/lib/jvm/java-21-openjdk
ENV PATH="/root/.local/bin:$JAVA_HOME/bin:$PATH"
WORKDIR /app
# 配置 pip 阿里云镜像并安装 Poetry
RUN --mount=type=cache,target=/root/.cache/pip \
pip config set global.index-url https://mirrors.aliyun.com/pypi/simple/ && \
pip config set global.trusted-host mirrors.aliyun.com && \
pip install --upgrade --root-user-action=ignore pip \
&& pip install --root-user-action=ignore pipx \
&& pipx install "poetry==$POETRY_VERSION"
COPY --from=datax-builder /DataX/target/datax/datax /opt/datax
RUN cp /opt/datax/plugin/reader/mysqlreader/libs/mysql* /opt/datax/plugin/reader/starrocksreader/libs/
# Copy only dependency files first
COPY runtime/datamate-python/pyproject.toml runtime/datamate-python/poetry.lock* /app/
# Install dependencies
RUN --mount=type=cache,target=$POETRY_CACHE_DIR \
poetry install --no-root --only main
# 离线模式: 使用本地 NLTK 数据
ARG RESOURCES_DIR=./build-cache/resources
ARG NLTK_DATA_LOCAL_PATH=${RESOURCES_DIR}/nltk_data
COPY ${NLTK_DATA_LOCAL_PATH} /usr/local/nltk_data
ENV NLTK_DATA=/usr/local/nltk_data
# Copy the rest of the application
COPY runtime/datamate-python /app
COPY runtime/datamate-python/deploy/docker-entrypoint.sh /docker-entrypoint.sh
RUN chmod +x /docker-entrypoint.sh || true
EXPOSE 18000
ENTRYPOINT ["/docker-entrypoint.sh"]