From da5b18e4231d5de5365ce2707ca68857347fca90 Mon Sep 17 00:00:00 2001 From: Jerry Yan <792602257@qq.com> Date: Tue, 3 Feb 2026 13:16:12 +0800 Subject: [PATCH] =?UTF-8?q?feat(scripts):=20=E6=B7=BB=E5=8A=A0=20APT=20?= =?UTF-8?q?=E7=BC=93=E5=AD=98=E9=A2=84=E8=A3=85=E5=8A=9F=E8=83=BD=E8=A7=A3?= =?UTF-8?q?=E5=86=B3=E7=A6=BB=E7=BA=BF=E6=9E=84=E5=BB=BA=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 新增 APT 缓存目录和相关构建脚本 export-cache.sh - 添加 build-base-images.sh 脚本用于构建预装 APT 包的基础镜像 - 增加 build-offline-final.sh 最终版离线构建脚本 - 更新 Makefile.offline.mk 添加新的离线构建目标 - 扩展 README.md 文档详细说明 APT 缓存问题解决方案 - 为多个服务添加使用预装基础镜像的离线 Dockerfile - 修改打包脚本包含 APT 缓存到最终压缩包中 --- Makefile.offline.mk | 59 ++++-- .../Dockerfile.backend-python.offline-v2 | 82 ++++++++ scripts/offline/Dockerfile.backend.offline | 71 +++++++ scripts/offline/Dockerfile.base-images | 62 ++++++ scripts/offline/Dockerfile.gateway.offline | 47 +++++ scripts/offline/Dockerfile.runtime.offline-v2 | 42 ++++ scripts/offline/README.md | 150 +++++++++++++-- scripts/offline/build-base-images.sh | 87 +++++++++ scripts/offline/build-offline-final.sh | 181 ++++++++++++++++++ scripts/offline/export-cache.sh | 44 ++++- 10 files changed, 794 insertions(+), 31 deletions(-) create mode 100644 scripts/offline/Dockerfile.backend-python.offline-v2 create mode 100644 scripts/offline/Dockerfile.backend.offline create mode 100644 scripts/offline/Dockerfile.base-images create mode 100644 scripts/offline/Dockerfile.gateway.offline create mode 100644 scripts/offline/Dockerfile.runtime.offline-v2 create mode 100644 scripts/offline/build-base-images.sh create mode 100644 scripts/offline/build-offline-final.sh diff --git a/Makefile.offline.mk b/Makefile.offline.mk index 78ae264..038f7ce 100644 --- a/Makefile.offline.mk +++ b/Makefile.offline.mk @@ -246,28 +246,59 @@ offline-build-classic: offline-setup offline-diagnose: @bash scripts/offline/diagnose.sh $(CACHE_DIR) +# 构建 APT 预装基础镜像(有网环境) +.PHONY: offline-build-base-images +offline-build-base-images: + @echo "构建 APT 预装基础镜像..." + @bash scripts/offline/build-base-images.sh $(CACHE_DIR) + +# 使用预装基础镜像进行离线构建(推荐) +.PHONY: offline-build-final +offline-build-final: offline-setup + @echo "使用预装 APT 包的基础镜像进行离线构建..." + @bash scripts/offline/build-offline-final.sh $(CACHE_DIR) $(OFFLINE_VERSION) + +# 完整离线导出(包含 APT 预装基础镜像) +.PHONY: offline-export-full +offline-export-full: + @echo "======================================" + @echo "完整离线缓存导出(含 APT 预装基础镜像)" + @echo "======================================" + @$(MAKE) offline-build-base-images + @$(MAKE) offline-export + @echo "" + @echo "导出完成!传输时请包含以下文件:" + @echo " - build-cache/images/base-images-with-apt.tar" + @echo " - build-cache-YYYYMMDD.tar.gz" + # ========== 帮助 ========== .PHONY: help-offline help-offline: @echo "离线构建命令:" - @echo " make offline-export [CACHE_DIR=./build-cache] - 在有网环境导出构建缓存" + @echo "" + @echo "【有网环境】" + @echo " make offline-export [CACHE_DIR=./build-cache] - 导出构建缓存" + @echo " make offline-export-full - 导出完整缓存(含 APT 预装基础镜像)" + @echo " make offline-build-base-images - 构建 APT 预装基础镜像" + @echo "" + @echo "【无网环境】" @echo " make offline-setup [CACHE_DIR=./build-cache] - 解压并准备离线缓存" - @echo " make offline-build [CACHE_DIR=./build-cache] - 在无网环境构建所有服务(BuildKit)" - @echo " make offline-build-classic - 使用传统 docker build(更稳定)" - @echo " make -offline-build - 离线构建单个服务" - @echo " (如: make backend-offline-build)" + @echo " make offline-build-final - 使用预装基础镜像构建(推荐,解决 APT 问题)" + @echo " make offline-build-classic - 使用传统 docker build" + @echo " make offline-build - 使用 BuildKit 构建" @echo " make offline-diagnose - 诊断离线构建环境" + @echo " make -offline-build - 离线构建单个服务" @echo "" - @echo "完整工作流程:" - @echo " # 1. 有网环境导出缓存" - @echo " make offline-export" + @echo "【完整工作流程(推荐)】" + @echo " # 1. 有网环境导出完整缓存" + @echo " make offline-export-full" @echo "" - @echo " # 2. 传输缓存到无网环境" - @echo " scp build-cache-*.tar.gz user@offline-server:/path/to/project/" + @echo " # 2. 传输到无网环境(需要传输两个文件)" + @echo " scp build-cache/images/base-images-with-apt.tar user@offline-server:/path/" + @echo " scp build-cache-*.tar.gz user@offline-server:/path/" @echo "" - @echo " # 3. 无网环境构建(推荐先用传统方式)" + @echo " # 3. 无网环境构建" @echo " tar -xzf build-cache-*.tar.gz" - @echo " make offline-diagnose # 检查环境" - @echo " make offline-build-classic # 传统构建(推荐)" - @echo " # 或 make offline-build # BuildKit 构建" + @echo " docker load -i build-cache/images/base-images-with-apt.tar" + @echo " make offline-build-final" diff --git a/scripts/offline/Dockerfile.backend-python.offline-v2 b/scripts/offline/Dockerfile.backend-python.offline-v2 new file mode 100644 index 0000000..dbffa3f --- /dev/null +++ b/scripts/offline/Dockerfile.backend-python.offline-v2 @@ -0,0 +1,82 @@ +# backend-python Dockerfile 离线版本 v2 +FROM maven:3-eclipse-temurin-8 AS datax-builder + +# 配置 Maven 阿里云镜像 +RUN mkdir -p /root/.m2 && \ + echo '\n\ +\n\ + \n\ + \n\ + aliyunmaven\n\ + *\n\ + 阿里云公共仓库\n\ + https://maven.aliyun.com/repository/public\n\ + \n\ + \n\ +' > /root/.m2/settings.xml + +# 离线模式: 从构建参数获取本地 DataX 路径 +ARG RESOURCES_DIR=./build-cache/resources +ARG DATAX_LOCAL_PATH=${RESOURCES_DIR}/DataX + +# 复制本地 DataX 源码 +COPY ${DATAX_LOCAL_PATH} /DataX + +COPY runtime/datax/ DataX/ + +RUN cd DataX && \ + sed -i "s/com.mysql.jdbc.Driver/com.mysql.cj.jdbc.Driver/g" \ + plugin-rdbms-util/src/main/java/com/alibaba/datax/plugin/rdbms/util/DataBaseType.java && \ + mvn -U clean package assembly:assembly -Dmaven.test.skip=true + +# 使用预装 APT 包的基础镜像 +FROM datamate-python-base:latest + +ENV PYTHONDONTWRITEBYTECODE=1 \ + PYTHONUNBUFFERED=1 \ + POETRY_VERSION=2.2.1 \ + POETRY_NO_INTERACTION=1 \ + POETRY_VIRTUALENVS_CREATE=false \ + POETRY_CACHE_DIR=/tmp/poetry_cache + +ENV JAVA_HOME=/usr/lib/jvm/java-21-openjdk +ENV PATH="/root/.local/bin:$JAVA_HOME/bin:$PATH" + +WORKDIR /app + +# 配置 pip 阿里云镜像并安装 Poetry +RUN --mount=type=cache,target=/root/.cache/pip \ + pip config set global.index-url https://mirrors.aliyun.com/pypi/simple/ && \ + pip config set global.trusted-host mirrors.aliyun.com && \ + pip install --upgrade --root-user-action=ignore pip \ + && pip install --root-user-action=ignore pipx \ + && pipx install "poetry==$POETRY_VERSION" + +COPY --from=datax-builder /DataX/target/datax/datax /opt/datax +RUN cp /opt/datax/plugin/reader/mysqlreader/libs/mysql* /opt/datax/plugin/reader/starrocksreader/libs/ + +# Copy only dependency files first +COPY runtime/datamate-python/pyproject.toml runtime/datamate-python/poetry.lock* /app/ + +# Install dependencies +RUN --mount=type=cache,target=$POETRY_CACHE_DIR \ + poetry install --no-root --only main + +# 离线模式: 使用本地 NLTK 数据 +ARG RESOURCES_DIR=./build-cache/resources +ARG NLTK_DATA_LOCAL_PATH=${RESOURCES_DIR}/nltk_data +COPY ${NLTK_DATA_LOCAL_PATH} /usr/local/nltk_data + +ENV NLTK_DATA=/usr/local/nltk_data + +# Copy the rest of the application +COPY runtime/datamate-python /app + +COPY runtime/datamate-python/deploy/docker-entrypoint.sh /docker-entrypoint.sh +RUN chmod +x /docker-entrypoint.sh || true + +EXPOSE 18000 + +ENTRYPOINT ["/docker-entrypoint.sh"] diff --git a/scripts/offline/Dockerfile.backend.offline b/scripts/offline/Dockerfile.backend.offline new file mode 100644 index 0000000..8b06a16 --- /dev/null +++ b/scripts/offline/Dockerfile.backend.offline @@ -0,0 +1,71 @@ +# backend Dockerfile 离线版本 +# 使用预装 APT 包的基础镜像 + +FROM maven:3-eclipse-temurin-21 AS builder + +# 配置 Maven 阿里云镜像 +RUN mkdir -p /root/.m2 && \ + echo '\n\ +\n\ + \n\ + \n\ + aliyunmaven\n\ + *\n\ + 阿里云公共仓库\n\ + https://maven.aliyun.com/repository/public\n\ + \n\ + \n\ +' > /root/.m2/settings.xml + +WORKDIR /opt/backend + +# 先复制所有 pom.xml 文件 +COPY backend/pom.xml ./ +COPY backend/services/pom.xml ./services/ +COPY backend/shared/domain-common/pom.xml ./shared/domain-common/ +COPY backend/shared/security-common/pom.xml ./shared/security-common/ +COPY backend/services/data-annotation-service/pom.xml ./services/data-annotation-service/ +COPY backend/services/data-cleaning-service/pom.xml ./services/data-cleaning-service/ +COPY backend/services/data-evaluation-service/pom.xml ./services/data-evaluation-service/ +COPY backend/services/data-management-service/pom.xml ./services/data-management-service/ +COPY backend/services/data-synthesis-service/pom.xml ./services/data-synthesis-service/ +COPY backend/services/execution-engine-service/pom.xml ./services/execution-engine-service/ +COPY backend/services/main-application/pom.xml ./services/main-application/ +COPY backend/services/operator-market-service/pom.xml ./services/operator-market-service/ +COPY backend/services/pipeline-orchestration-service/pom.xml ./services/pipeline-orchestration-service/ +COPY backend/services/rag-indexer-service/pom.xml ./services/rag-indexer-service/ +COPY backend/services/rag-query-service/pom.xml ./services/rag-query-service/ + +# 使用缓存卷下载依赖 +RUN --mount=type=cache,target=/root/.m2/repository \ + cd /opt/backend/services && \ + mvn dependency:go-offline -Dmaven.test.skip=true || true + +# 复制所有源代码 +COPY backend/ /opt/backend + +# 编译打包 +RUN --mount=type=cache,target=/root/.m2/repository \ + cd /opt/backend/services && \ + mvn clean package -Dmaven.test.skip=true + +# 使用预装 APT 包的基础镜像 +FROM datamate-java-base:latest + +# 不再执行 apt-get update,因为基础镜像已经预装了所有需要的包 +# 如果需要添加额外的包,可以在这里添加,但离线环境下会失败 + +COPY --from=builder /opt/backend/services/main-application/target/datamate.jar /opt/backend/datamate.jar +COPY scripts/images/backend/start.sh /opt/backend/start.sh +COPY runtime/ops/examples/test_operator/test_operator.tar /opt/backend/test_operator.tar + +RUN dos2unix /opt/backend/start.sh \ + && chmod +x /opt/backend/start.sh \ + && ln -sf /usr/share/zoneinfo/Asia/Shanghai /etc/localtime + +EXPOSE 8080 + +ENTRYPOINT ["/opt/backend/start.sh"] +CMD ["java", "-Duser.timezone=Asia/Shanghai", "-jar", "/opt/backend/datamate.jar"] diff --git a/scripts/offline/Dockerfile.base-images b/scripts/offline/Dockerfile.base-images new file mode 100644 index 0000000..de11870 --- /dev/null +++ b/scripts/offline/Dockerfile.base-images @@ -0,0 +1,62 @@ +# 预安装 APT 包的基础镜像 +# 在有网环境构建这些镜像,在无网环境作为基础镜像使用 + +# ==================== backend / gateway 基础镜像 ==================== +FROM eclipse-temurin:21-jdk AS datamate-java-base + +# 配置 apt 阿里云镜像源 +RUN if [ -f /etc/apt/sources.list.d/debian.sources ]; then \ + sed -i 's/deb.debian.org/mirrors.aliyun.com/g' /etc/apt/sources.list.d/debian.sources; \ + elif [ -f /etc/apt/sources.list.d/ubuntu.sources ]; then \ + sed -i 's/archive.ubuntu.com/mirrors.aliyun.com/g; s/security.ubuntu.com/mirrors.aliyun.com/g' /etc/apt/sources.list.d/ubuntu.sources; \ + elif [ -f /etc/apt/sources.list ]; then \ + sed -i 's/deb.debian.org/mirrors.aliyun.com/g; s/archive.ubuntu.com/mirrors.aliyun.com/g; s/security.ubuntu.com/mirrors.aliyun.com/g' /etc/apt/sources.list; \ + fi && \ + apt-get update && \ + apt-get install -y vim wget curl rsync python3 python3-pip python-is-python3 dos2unix libreoffice fonts-noto-cjk && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +# ==================== backend-python 基础镜像 ==================== +FROM python:3.12-slim AS datamate-python-base + +RUN if [ -f /etc/apt/sources.list.d/debian.sources ]; then \ + sed -i 's/deb.debian.org/mirrors.aliyun.com/g' /etc/apt/sources.list.d/debian.sources; \ + elif [ -f /etc/apt/sources.list ]; then \ + sed -i 's/deb.debian.org/mirrors.aliyun.com/g' /etc/apt/sources.list; \ + fi && \ + apt-get update && \ + apt-get install -y --no-install-recommends vim openjdk-21-jre nfs-common glusterfs-client rsync && \ + rm -rf /var/lib/apt/lists/* + +# ==================== runtime 基础镜像 ==================== +FROM ghcr.nju.edu.cn/astral-sh/uv:python3.11-bookworm AS datamate-runtime-base + +RUN if [ -f /etc/apt/sources.list.d/debian.sources ]; then \ + sed -i 's/deb.debian.org/mirrors.aliyun.com/g' /etc/apt/sources.list.d/debian.sources; \ + elif [ -f /etc/apt/sources.list ]; then \ + sed -i 's/deb.debian.org/mirrors.aliyun.com/g' /etc/apt/sources.list; \ + fi && \ + apt update && \ + apt install -y libgl1 libglib2.0-0 vim libmagic1 libreoffice dos2unix swig poppler-utils tesseract-ocr && \ + rm -rf /var/lib/apt/lists/* + +# ==================== deer-flow-backend 基础镜像 ==================== +FROM ghcr.nju.edu.cn/astral-sh/uv:python3.12-bookworm AS deer-flow-backend-base + +RUN if [ -f /etc/apt/sources.list.d/debian.sources ]; then \ + sed -i 's/deb.debian.org/mirrors.aliyun.com/g' /etc/apt/sources.list.d/debian.sources; \ + elif [ -f /etc/apt/sources.list ]; then \ + sed -i 's/deb.debian.org/mirrors.aliyun.com/g' /etc/apt/sources.list; \ + fi && \ + apt-get update && apt-get install -y libpq-dev git && \ + rm -rf /var/lib/apt/lists/* + +# ==================== mineru 基础镜像 ==================== +FROM python:3.11-slim AS mineru-base + +RUN sed -i 's/deb.debian.org/mirrors.huaweicloud.com/g' /etc/apt/sources.list.d/debian.sources && \ + apt-get update && \ + apt-get install -y curl vim libgl1 libglx0 libopengl0 libglib2.0-0 procps && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* diff --git a/scripts/offline/Dockerfile.gateway.offline b/scripts/offline/Dockerfile.gateway.offline new file mode 100644 index 0000000..1353ac8 --- /dev/null +++ b/scripts/offline/Dockerfile.gateway.offline @@ -0,0 +1,47 @@ +# gateway Dockerfile 离线版本 +FROM maven:3-eclipse-temurin-21 AS builder + +# 配置 Maven 阿里云镜像 +RUN mkdir -p /root/.m2 && \ + echo '\n\ +\n\ + \n\ + \n\ + aliyunmaven\n\ + *\n\ + 阿里云公共仓库\n\ + https://maven.aliyun.com/repository/public\n\ + \n\ + \n\ +' > /root/.m2/settings.xml + +WORKDIR /opt/gateway + +COPY backend/pom.xml ./ +COPY backend/api-gateway/pom.xml ./api-gateway/ + +RUN --mount=type=cache,target=/root/.m2/repository \ + cd /opt/gateway/api-gateway && \ + mvn dependency:go-offline -Dmaven.test.skip=true || true + +COPY backend/api-gateway /opt/gateway/api-gateway + +RUN --mount=type=cache,target=/root/.m2/repository \ + cd /opt/gateway/api-gateway && \ + mvn clean package -Dmaven.test.skip=true + +FROM datamate-java-base:latest + +COPY --from=builder /opt/gateway/api-gateway/target/gateway.jar /opt/gateway/gateway.jar +COPY scripts/images/gateway/start.sh /opt/gateway/start.sh + +RUN dos2unix /opt/gateway/start.sh \ + && chmod +x /opt/gateway/start.sh \ + && ln -sf /usr/share/zoneinfo/Asia/Shanghai /etc/localtime + +EXPOSE 8080 + +ENTRYPOINT ["/opt/gateway/start.sh"] +CMD ["java", "-Duser.timezone=Asia/Shanghai", "-jar", "/opt/gateway/gateway.jar"] diff --git a/scripts/offline/Dockerfile.runtime.offline-v2 b/scripts/offline/Dockerfile.runtime.offline-v2 new file mode 100644 index 0000000..28f1340 --- /dev/null +++ b/scripts/offline/Dockerfile.runtime.offline-v2 @@ -0,0 +1,42 @@ +# runtime Dockerfile 离线版本 v2 +# 使用预装 APT 包的基础镜像 + +FROM datamate-runtime-base:latest + +# 离线模式: 本地模型文件路径 +ARG RESOURCES_DIR=./build-cache/resources +ARG MODELS_DIR=${RESOURCES_DIR}/models + +# 复制本地 PaddleOCR 模型 +RUN mkdir -p /home/models +COPY ${MODELS_DIR}/ch_ppocr_mobile_v2.0_cls_infer.tar /home/models/ +RUN tar -xf /home/models/ch_ppocr_mobile_v2.0_cls_infer.tar -C /home/models + +COPY runtime/python-executor /opt/runtime +COPY runtime/ops /opt/runtime/datamate/ops +COPY runtime/ops/user /opt/runtime/user +COPY scripts/images/runtime/start.sh /opt/runtime/start.sh + +ENV PYTHONPATH=/opt/runtime/datamate/ +ENV UV_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cpu" +ENV UV_INDEX_STRATEGY=unsafe-best-match +ENV UV_INDEX_URL="https://mirrors.aliyun.com/pypi/simple/" + +WORKDIR /opt/runtime + +# 复制本地 spaCy 模型 +COPY ${MODELS_DIR}/zh_core_web_sm-3.8.0-py3-none-any.whl /tmp/ + +RUN --mount=type=cache,target=/root/.cache/uv \ + uv pip install -e .[all] --system \ + && uv pip install -r /opt/runtime/datamate/ops/pyproject.toml --system \ + && uv pip install /tmp/zh_core_web_sm-3.8.0-py3-none-any.whl --system \ + && echo "/usr/local/lib/ops/site-packages" > /usr/local/lib/python3.11/site-packages/ops.pth + +RUN ln -sf /usr/share/zoneinfo/Asia/Shanghai /etc/localtime \ + && chmod +x /opt/runtime/start.sh \ + && dos2unix /opt/runtime/start.sh + +EXPOSE 8081 + +ENTRYPOINT ["/opt/runtime/start.sh"] diff --git a/scripts/offline/README.md b/scripts/offline/README.md index bc0dc1c..d764a66 100644 --- a/scripts/offline/README.md +++ b/scripts/offline/README.md @@ -171,6 +171,89 @@ tar -czf build-cache-partial.tar.gz build-cache/buildkit/backend-cache make backend-offline-build ``` +## APT 缓存问题详解 + +### 问题描述 + +即使使用了 `--mount=type=cache,target=/var/cache/apt`,Dockerfile 中的 `apt-get update` 仍会尝试从网络获取包列表(list 数据),导致无网环境下构建失败: + +``` +Err:1 http://mirrors.aliyun.com/debian bookworm InRelease + Could not resolve 'mirrors.aliyun.com' +Reading package lists... +E: Failed to fetch http://mirrors.aliyun.com/debian/dists/bookworm/InRelease +``` + +### 根本原因 + +- `--mount=type=cache,target=/var/cache/apt` 只缓存下载的 `.deb` 包 +- `apt-get update` 会尝试从配置的源获取最新的包索引(InRelease/Packages 文件) +- `/var/lib/apt/lists/` 目录存储包索引,但通常不在缓存范围内 + +### 解决方案 + +#### 方案 1: 使用预装 APT 包的基础镜像(推荐) + +这是最有效的方法: + +**步骤 1**: 在有网环境构建预装所有依赖的基础镜像 + +```bash +# 构建并保存带 APT 预装包的基础镜像 +./scripts/offline/build-base-images.sh +``` + +这会创建以下预装基础镜像: +- `datamate-java-base` - 用于 backend、gateway(预装 vim、python3、libreoffice 等) +- `datamate-python-base` - 用于 backend-python(预装 openjdk、nfs-common 等) +- `datamate-runtime-base` - 用于 runtime(预装 libgl1、tesseract-ocr 等) +- `deer-flow-backend-base` - 用于 deer-flow-backend +- `mineru-base` - 用于 mineru + +**步骤 2**: 在无网环境使用这些基础镜像构建 + +```bash +# 加载包含预装基础镜像的 tar 包 +docker load -i build-cache/images/base-images-with-apt.tar + +# 使用最终版构建脚本 +./scripts/offline/build-offline-final.sh +``` + +#### 方案 2: 修改 Dockerfile 跳过 apt update + +如果确定不需要安装新包,可以修改 Dockerfile: + +```dockerfile +# 原代码 +RUN apt-get update && apt-get install -y xxx + +# 修改为(离线环境) +# RUN apt-get update && \ +RUN apt-get install -y xxx || true +``` + +#### 方案 3: 挂载 apt lists 缓存 + +在有网环境预先下载并保存 apt lists: + +```bash +# 有网环境:保存 apt lists +docker run --rm \ + -v "$(pwd)/apt-lists:/var/lib/apt/lists" \ + eclipse-temurin:21-jdk \ + apt-get update + +# 无网环境:挂载保存的 lists +docker build \ + --mount=type=bind,source=$(pwd)/apt-lists,target=/var/lib/apt/lists,ro \ + -f Dockerfile . +``` + +**注意**: BuildKit 的 `--mount=type=bind` 在 `docker build` 中不直接支持,需要在 Dockerfile 中使用。 + +--- + ## 故障排查 ### 问题 1: 构建时仍然尝试拉取镜像(最常见) @@ -329,24 +412,63 @@ docker buildx build \ ``` scripts/offline/ -├── export-cache.sh # 有网环境导出缓存脚本 -├── build-offline.sh # 基础离线构建脚本(BuildKit) -├── build-offline-v2.sh # 增强版离线构建脚本 -├── build-offline-classic.sh # 传统 docker build 脚本(推荐) -├── diagnose.sh # 环境诊断脚本 -├── Dockerfile.backend-python.offline # backend-python 离线 Dockerfile -├── Dockerfile.runtime.offline # runtime 离线 Dockerfile -├── Dockerfile.deer-flow-backend.offline # deer-flow-backend 离线 Dockerfile -├── Dockerfile.deer-flow-frontend.offline # deer-flow-frontend 离线 Dockerfile -├── Makefile.offline # 独立离线构建 Makefile -└── README.md # 本文档 +├── export-cache.sh # 有网环境导出缓存脚本 +├── build-base-images.sh # 构建 APT 预装基础镜像 +├── build-offline.sh # 基础离线构建脚本(BuildKit) +├── build-offline-v2.sh # 增强版离线构建脚本 +├── build-offline-classic.sh # 传统 docker build 脚本 +├── build-offline-final.sh # 最终版(使用预装基础镜像,推荐) +├── diagnose.sh # 环境诊断脚本 +├── Dockerfile.base-images # 预装 APT 包的基础镜像定义 +├── Dockerfile.backend.offline # backend 离线 Dockerfile(使用预装基础镜像) +├── Dockerfile.gateway.offline # gateway 离线 Dockerfile(使用预装基础镜像) +├── Dockerfile.backend-python.offline # backend-python 离线 Dockerfile +├── Dockerfile.backend-python.offline-v2 # backend-python 离线 Dockerfile v2(使用预装基础镜像) +├── Dockerfile.runtime.offline # runtime 离线 Dockerfile +├── Dockerfile.runtime.offline-v2 # runtime 离线 Dockerfile v2(使用预装基础镜像) +├── Dockerfile.deer-flow-backend.offline # deer-flow-backend 离线 Dockerfile +├── Dockerfile.deer-flow-frontend.offline # deer-flow-frontend 离线 Dockerfile +├── Makefile.offline # 独立离线构建 Makefile +└── README.md # 本文档 -Makefile.offline.mk # Makefile 扩展(追加到主 Makefile) +Makefile.offline.mk # Makefile 扩展(追加到主 Makefile) ``` -## 推荐工作流 +## 推荐工作流(解决 APT 问题版) -对于遇到镜像拉取问题的用户,推荐以下工作流: +### 工作流 A: 使用预装 APT 包的基础镜像(彻底解决 APT 问题) + +```bash +# ========== 有网环境 ========== + +# 1. 构建并保存带 APT 预装包的基础镜像 +./scripts/offline/build-base-images.sh +# 输出: build-cache/images/base-images-with-apt.tar + +# 2. 导出其他缓存(BuildKit 缓存、外部资源) +./scripts/offline/export-cache.sh + +# 3. 打包传输 +scp build-cache/images/base-images-with-apt.tar user@offline-server:/opt/datamate/build-cache/images/ +scp build-cache-*.tar.gz user@offline-server:/opt/datamate/ + +# ========== 无网环境 ========== + +cd /opt/datamate + +# 4. 解压 +tar -xzf build-cache-*.tar.gz + +# 5. 加载预装基础镜像(关键!) +docker load -i build-cache/images/base-images-with-apt.tar + +# 6. 使用最终版脚本构建 +./scripts/offline/build-offline-final.sh +``` + +### 工作流 B: 简单场景(使用传统构建) + +如果 APT 包需求简单,可以直接使用传统构建: ```bash # 有网环境 diff --git a/scripts/offline/build-base-images.sh b/scripts/offline/build-base-images.sh new file mode 100644 index 0000000..5778a08 --- /dev/null +++ b/scripts/offline/build-base-images.sh @@ -0,0 +1,87 @@ +#!/bin/bash +# 构建带有预装 APT 包的基础镜像 +# Usage: ./build-base-images.sh [output-dir] + +set -e + +OUTPUT_DIR="${1:-./build-cache}" +IMAGES_DIR="$OUTPUT_DIR/images" + +mkdir -p "$IMAGES_DIR" + +echo "======================================" +echo "构建预装 APT 包的基础镜像" +echo "======================================" + +# 构建各个基础镜像 +echo "" +echo "1. 构建 datamate-java-base (用于 backend, gateway)..." +docker build \ + -t datamate-java-base:latest \ + --target datamate-java-base \ + -f scripts/offline/Dockerfile.base-images \ + . || echo "Warning: datamate-java-base 构建失败" + +echo "" +echo "2. 构建 datamate-python-base (用于 backend-python)..." +docker build \ + -t datamate-python-base:latest \ + --target datamate-python-base \ + -f scripts/offline/Dockerfile.base-images \ + . || echo "Warning: datamate-python-base 构建失败" + +echo "" +echo "3. 构建 datamate-runtime-base (用于 runtime)..." +docker build \ + -t datamate-runtime-base:latest \ + --target datamate-runtime-base \ + -f scripts/offline/Dockerfile.base-images \ + . || echo "Warning: datamate-runtime-base 构建失败" + +echo "" +echo "4. 构建 deer-flow-backend-base (用于 deer-flow-backend)..." +docker build \ + -t deer-flow-backend-base:latest \ + --target deer-flow-backend-base \ + -f scripts/offline/Dockerfile.base-images \ + . || echo "Warning: deer-flow-backend-base 构建失败" + +echo "" +echo "5. 构建 mineru-base (用于 mineru)..." +docker build \ + -t mineru-base:latest \ + --target mineru-base \ + -f scripts/offline/Dockerfile.base-images \ + . || echo "Warning: mineru-base 构建失败" + +echo "" +echo "======================================" +echo "保存基础镜像集合" +echo "======================================" + +docker save -o "$IMAGES_DIR/base-images-with-apt.tar" \ + maven:3-eclipse-temurin-21 \ + maven:3-eclipse-temurin-8 \ + eclipse-temurin:21-jdk \ + mysql:8 \ + node:20-alpine \ + nginx:1.29 \ + ghcr.nju.edu.cn/astral-sh/uv:python3.11-bookworm \ + ghcr.nju.edu.cn/astral-sh/uv:python3.12-bookworm \ + ghcr.nju.edu.cn/astral-sh/uv:latest \ + python:3.12-slim \ + python:3.11-slim \ + gcr.io/distroless/nodejs20-debian12 \ + datamate-java-base:latest \ + datamate-python-base:latest \ + datamate-runtime-base:latest \ + deer-flow-backend-base:latest \ + mineru-base:latest \ + 2>/dev/null || echo "Warning: 部分镜像保存失败" + +echo "" +echo "======================================" +echo "✓ 基础镜像构建完成" +echo "======================================" +echo "镜像列表:" +docker images | grep -E "(datamate-|deer-flow-|mineru-)base" || true diff --git a/scripts/offline/build-offline-final.sh b/scripts/offline/build-offline-final.sh new file mode 100644 index 0000000..96f7dba --- /dev/null +++ b/scripts/offline/build-offline-final.sh @@ -0,0 +1,181 @@ +#!/bin/bash +# 最终版离线构建脚本 - 使用预装 APT 包的基础镜像 +# Usage: ./build-offline-final.sh [cache-dir] [version] + +set -e + +CACHE_DIR="${1:-./build-cache}" +VERSION="${2:-latest}" +IMAGES_DIR="$CACHE_DIR/images" +RESOURCES_DIR="$CACHE_DIR/resources" + +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +NC='\033[0m' + +log_info() { echo -e "${GREEN}[INFO]${NC} $1"; } +log_warn() { echo -e "${YELLOW}[WARN]${NC} $1"; } +log_error() { echo -e "${RED}[ERROR]${NC} $1"; } + +# 检查缓存目录 +if [ ! -d "$CACHE_DIR" ]; then + log_error "缓存目录 $CACHE_DIR 不存在" + exit 1 +fi + +# 加载基础镜像 +load_images() { + log_info "加载基础镜像..." + + # 优先加载带 APT 预装包的镜像集合 + if [ -f "$IMAGES_DIR/base-images-with-apt.tar" ]; then + log_info "加载带 APT 预装包的基础镜像..." + docker load -i "$IMAGES_DIR/base-images-with-apt.tar" + elif [ -f "$IMAGES_DIR/base-images.tar" ]; then + log_warn "加载普通基础镜像(不含 APT 预装包)..." + docker load -i "$IMAGES_DIR/base-images.tar" + else + log_warn "基础镜像 tar 包不存在,检查本地镜像..." + fi + + log_info "✓ 镜像加载完成" +} + +# 验证镜像是否存在 +verify_image() { + docker inspect "$1" > /dev/null 2>&1 +} + +# 构建函数 +build_service() { + local service_name=$1 + local image_name=$2 + local dockerfile=$3 + local base_image=$4 # 必需的基础镜像 + + log_info "----------------------------------------" + log_info "构建 $service_name" + log_info "----------------------------------------" + + if [ ! -f "$dockerfile" ]; then + log_error "Dockerfile 不存在: $dockerfile" + return 1 + fi + + # 检查必需的基础镜像 + if [ -n "$base_image" ]; then + if verify_image "$base_image"; then + log_info "✓ 基础镜像存在: $base_image" + else + log_error "✗ 缺少基础镜像: $base_image" + log_info "请确保已加载正确的 base-images-with-apt.tar" + return 1 + fi + fi + + # 准备构建参数 + local build_args=() + + # 添加资源目录参数 + if [ -d "$RESOURCES_DIR" ]; then + build_args+=("--build-arg" "RESOURCES_DIR=$RESOURCES_DIR") + fi + + # 执行构建 + log_info "开始构建..." + if docker build \ + --pull=false \ + "${build_args[@]}" \ + -f "$dockerfile" \ + -t "$image_name:$VERSION" \ + . 2>&1; then + log_info "✓ $service_name 构建成功" + return 0 + else + log_error "✗ $service_name 构建失败" + return 1 + fi +} + +# 主流程 +main() { + log_info "======================================" + log_info "最终版离线构建 (使用 APT 预装基础镜像)" + log_info "======================================" + + # 加载基础镜像 + load_images + + # 验证关键基础镜像 + log_info "" + log_info "验证预装基础镜像..." + REQUIRED_BASE_IMAGES=( + "datamate-java-base:latest" + "datamate-python-base:latest" + "datamate-runtime-base:latest" + ) + + for img in "${REQUIRED_BASE_IMAGES[@]}"; do + if verify_image "$img"; then + log_info " ✓ $img" + else + log_warn " ✗ $img (缺失)" + fi + done + + # 定义服务配置 + declare -A SERVICES=( + ["database"]="datamate-database:scripts/images/database/Dockerfile:" + ["gateway"]="datamate-gateway:scripts/offline/Dockerfile.gateway.offline:datamate-java-base:latest" + ["backend"]="datamate-backend:scripts/offline/Dockerfile.backend.offline:datamate-java-base:latest" + ["frontend"]="datamate-frontend:scripts/images/frontend/Dockerfile:" + ["runtime"]="datamate-runtime:scripts/offline/Dockerfile.runtime.offline-v2:datamate-runtime-base:latest" + ["backend-python"]="datamate-backend-python:scripts/offline/Dockerfile.backend-python.offline-v2:datamate-python-base:latest" + ) + + log_info "" + log_info "======================================" + log_info "开始构建服务" + log_info "======================================" + + local failed=() + local succeeded=() + + for service_name in "${!SERVICES[@]}"; do + IFS=':' read -r image_name dockerfile base_image <<< "${SERVICES[$service_name]}" + if build_service "$service_name" "$image_name" "$dockerfile" "$base_image"; then + succeeded+=("$service_name") + else + failed+=("$service_name") + fi + echo "" + done + + # 汇总 + log_info "======================================" + log_info "构建结果" + log_info "======================================" + + if [ ${#succeeded[@]} -gt 0 ]; then + log_info "成功 (${#succeeded[@]}): ${succeeded[*]}" + fi + + if [ ${#failed[@]} -gt 0 ]; then + log_error "失败 (${#failed[@]}): ${failed[*]}" + + log_info "" + log_info "提示: 如果失败是因为缺少预装基础镜像,请确保:" + log_info " 1. 在有网环境执行: ./scripts/offline/build-base-images.sh" + log_info " 2. 将生成的 base-images-with-apt.tar 传输到无网环境" + log_info " 3. 在无网环境加载: docker load -i base-images-with-apt.tar" + + exit 1 + else + log_info "✓ 所有服务构建成功!" + echo "" + docker images --format "table {{.Repository}}:{{.Tag}}\t{{.Size}}" | grep -E "(datamate-|deer-flow-)" || true + fi +} + +main "$@" diff --git a/scripts/offline/export-cache.sh b/scripts/offline/export-cache.sh index a952046..70ef29d 100644 --- a/scripts/offline/export-cache.sh +++ b/scripts/offline/export-cache.sh @@ -8,6 +8,7 @@ OUTPUT_DIR="${1:-./build-cache}" BUILDKIT_CACHE_DIR="$OUTPUT_DIR/buildkit" IMAGES_DIR="$OUTPUT_DIR/images" RESOURCES_DIR="$OUTPUT_DIR/resources" +APT_CACHE_DIR="$OUTPUT_DIR/apt-cache" # 确保 buildx 构建器存在 if ! docker buildx inspect offline-builder > /dev/null 2>&1; then @@ -17,7 +18,7 @@ else docker buildx use offline-builder fi -mkdir -p "$BUILDKIT_CACHE_DIR" "$IMAGES_DIR" "$RESOURCES_DIR" +mkdir -p "$BUILDKIT_CACHE_DIR" "$IMAGES_DIR" "$RESOURCES_DIR" "$APT_CACHE_DIR" echo "======================================" echo "1. 导出基础镜像" @@ -117,11 +118,42 @@ fi echo "" echo "======================================" -echo "4. 打包缓存" +echo "4. 导出 APT 缓存" +echo "======================================" + +# 为需要 apt 的镜像预生成 apt 缓存 +echo "生成 APT list 缓存..." + +# eclipse-temurin:21-jdk 的 apt 缓存 +docker run --rm \ + -v "$APT_CACHE_DIR/eclipse-temurin:/var/cache/apt/archives" \ + -v "$APT_CACHE_DIR/eclipse-temurin-lists:/var/lib/apt/lists" \ + eclipse-temurin:21-jdk \ + bash -c "apt-get update && apt-get install -y --download-only vim wget curl rsync python3 python3-pip python-is-python3 dos2unix libreoffice fonts-noto-cjk 2>/dev/null || true" 2>/dev/null || echo " Warning: eclipse-temurin apt 缓存导出失败" + +# python:3.12-slim 的 apt 缓存 +docker run --rm \ + -v "$APT_CACHE_DIR/python312:/var/cache/apt/archives" \ + -v "$APT_CACHE_DIR/python312-lists:/var/lib/apt/lists" \ + python:3.12-slim \ + bash -c "apt-get update && apt-get install -y --download-only vim openjdk-21-jre nfs-common glusterfs-client rsync 2>/dev/null || true" 2>/dev/null || echo " Warning: python3.12 apt 缓存导出失败" + +# python:3.11-slim 的 apt 缓存 +docker run --rm \ + -v "$APT_CACHE_DIR/python311:/var/cache/apt/archives" \ + -v "$APT_CACHE_DIR/python311-lists:/var/lib/apt/lists" \ + python:3.11-slim \ + bash -c "apt-get update && apt-get install -y --download-only curl vim libgl1 libglx0 libopengl0 libglib2.0-0 procps 2>/dev/null || true" 2>/dev/null || echo " Warning: python3.11 apt 缓存导出失败" + +echo "✓ APT 缓存导出完成" + +echo "" +echo "======================================" +echo "5. 打包缓存" echo "======================================" cd "$OUTPUT_DIR" -tar -czf "build-cache-$(date +%Y%m%d).tar.gz" buildkit images resources +tar -czf "build-cache-$(date +%Y%m%d).tar.gz" buildkit images resources apt-cache cd - > /dev/null echo "" @@ -131,4 +163,10 @@ echo "======================================" echo "缓存位置: $OUTPUT_DIR" echo "传输文件: $OUTPUT_DIR/build-cache-$(date +%Y%m%d).tar.gz" echo "" +echo "包含内容:" +echo " - 基础镜像 (images/)" +echo " - BuildKit 缓存 (buildkit/)" +echo " - 外部资源 (resources/)" +echo " - APT 缓存 (apt-cache/)" +echo "" echo "请将此压缩包传输到无网环境后解压使用"