You've already forked DataMate
feat(build): 添加离线构建支持
- 新增 build-offline.sh 脚本实现无网环境构建 - 添加离线版 Dockerfiles 使用本地资源替代网络下载 - 创建 export-cache.sh 脚本在有网环境预下载依赖 - 集成 Makefile.offline.mk 提供便捷的离线构建命令 - 添加详细的离线构建文档和故障排查指南 - 实现基础镜像、BuildKit 缓存和外部资源的一键打包
This commit is contained in:
93
scripts/offline/Dockerfile.backend-python.offline
Normal file
93
scripts/offline/Dockerfile.backend-python.offline
Normal file
@@ -0,0 +1,93 @@
|
||||
# backend-python Dockerfile 离线版本
|
||||
# 修改点: 使用本地 DataX 源码替代 git clone
|
||||
|
||||
FROM maven:3-eclipse-temurin-8 AS datax-builder
|
||||
|
||||
# 配置 Maven 阿里云镜像
|
||||
RUN mkdir -p /root/.m2 && \
|
||||
echo '<?xml version="1.0" encoding="UTF-8"?>\n\
|
||||
<settings xmlns="http://maven.apache.org/SETTINGS/1.0.0"\n\
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"\n\
|
||||
xsi:schemaLocation="http://maven.apache.org/SETTINGS/1.0.0 http://maven.apache.org/xsd/settings-1.0.0.xsd">\n\
|
||||
<mirrors>\n\
|
||||
<mirror>\n\
|
||||
<id>aliyunmaven</id>\n\
|
||||
<mirrorOf>*</mirrorOf>\n\
|
||||
<name>阿里云公共仓库</name>\n\
|
||||
<url>https://maven.aliyun.com/repository/public</url>\n\
|
||||
</mirror>\n\
|
||||
</mirrors>\n\
|
||||
</settings>' > /root/.m2/settings.xml
|
||||
|
||||
# 离线模式: 从构建参数获取本地 DataX 路径
|
||||
ARG DATAX_LOCAL_PATH=./build-cache/resources/DataX
|
||||
|
||||
# 复制本地 DataX 源码(离线环境预先下载)
|
||||
COPY ${DATAX_LOCAL_PATH} /DataX
|
||||
|
||||
COPY runtime/datax/ DataX/
|
||||
|
||||
RUN cd DataX && \
|
||||
sed -i "s/com.mysql.jdbc.Driver/com.mysql.cj.jdbc.Driver/g" \
|
||||
plugin-rdbms-util/src/main/java/com/alibaba/datax/plugin/rdbms/util/DataBaseType.java && \
|
||||
mvn -U clean package assembly:assembly -Dmaven.test.skip=true
|
||||
|
||||
FROM python:3.12-slim
|
||||
|
||||
# 配置 apt 阿里云镜像源
|
||||
RUN if [ -f /etc/apt/sources.list.d/debian.sources ]; then \
|
||||
sed -i 's/deb.debian.org/mirrors.aliyun.com/g' /etc/apt/sources.list.d/debian.sources; \
|
||||
elif [ -f /etc/apt/sources.list ]; then \
|
||||
sed -i 's/deb.debian.org/mirrors.aliyun.com/g' /etc/apt/sources.list; \
|
||||
fi && \
|
||||
apt-get update && \
|
||||
apt-get install -y --no-install-recommends vim openjdk-21-jre nfs-common glusterfs-client rsync && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
ENV PYTHONDONTWRITEBYTECODE=1 \
|
||||
PYTHONUNBUFFERED=1 \
|
||||
POETRY_VERSION=2.2.1 \
|
||||
POETRY_NO_INTERACTION=1 \
|
||||
POETRY_VIRTUALENVS_CREATE=false \
|
||||
POETRY_CACHE_DIR=/tmp/poetry_cache
|
||||
|
||||
ENV JAVA_HOME=/usr/lib/jvm/java-21-openjdk
|
||||
|
||||
ENV PATH="/root/.local/bin:$JAVA_HOME/bin:$PATH"
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# 配置 pip 阿里云镜像并安装 Poetry
|
||||
RUN --mount=type=cache,target=/root/.cache/pip \
|
||||
pip config set global.index-url https://mirrors.aliyun.com/pypi/simple/ && \
|
||||
pip config set global.trusted-host mirrors.aliyun.com && \
|
||||
pip install --upgrade --root-user-action=ignore pip \
|
||||
&& pip install --root-user-action=ignore pipx \
|
||||
&& pipx install "poetry==$POETRY_VERSION"
|
||||
|
||||
COPY --from=datax-builder /DataX/target/datax/datax /opt/datax
|
||||
RUN cp /opt/datax/plugin/reader/mysqlreader/libs/mysql* /opt/datax/plugin/reader/starrocksreader/libs/
|
||||
|
||||
# Copy only dependency files first
|
||||
COPY runtime/datamate-python/pyproject.toml runtime/datamate-python/poetry.lock* /app/
|
||||
|
||||
# Install dependencies
|
||||
RUN --mount=type=cache,target=$POETRY_CACHE_DIR \
|
||||
poetry install --no-root --only main
|
||||
|
||||
# 离线模式: 使用本地 NLTK 数据
|
||||
ARG NLTK_DATA_LOCAL_PATH=./build-cache/resources/nltk_data
|
||||
COPY ${NLTK_DATA_LOCAL_PATH} /usr/local/nltk_data
|
||||
|
||||
ENV NLTK_DATA=/usr/local/nltk_data
|
||||
|
||||
# Copy the rest of the application
|
||||
COPY runtime/datamate-python /app
|
||||
|
||||
COPY runtime/datamate-python/deploy/docker-entrypoint.sh /docker-entrypoint.sh
|
||||
RUN chmod +x /docker-entrypoint.sh || true
|
||||
|
||||
# Expose the application port
|
||||
EXPOSE 18000
|
||||
|
||||
ENTRYPOINT ["/docker-entrypoint.sh"]
|
||||
Reference in New Issue
Block a user