You've already forked DataMate
- 新增 APT 缓存目录和相关构建脚本 export-cache.sh - 添加 build-base-images.sh 脚本用于构建预装 APT 包的基础镜像 - 增加 build-offline-final.sh 最终版离线构建脚本 - 更新 Makefile.offline.mk 添加新的离线构建目标 - 扩展 README.md 文档详细说明 APT 缓存问题解决方案 - 为多个服务添加使用预装基础镜像的离线 Dockerfile - 修改打包脚本包含 APT 缓存到最终压缩包中
173 lines
6.3 KiB
Bash
173 lines
6.3 KiB
Bash
#!/bin/bash
|
|
# BuildKit 缓存导出脚本 - 在有网环境执行
|
|
# Usage: ./export-cache.sh [output-dir]
|
|
|
|
set -e
|
|
|
|
OUTPUT_DIR="${1:-./build-cache}"
|
|
BUILDKIT_CACHE_DIR="$OUTPUT_DIR/buildkit"
|
|
IMAGES_DIR="$OUTPUT_DIR/images"
|
|
RESOURCES_DIR="$OUTPUT_DIR/resources"
|
|
APT_CACHE_DIR="$OUTPUT_DIR/apt-cache"
|
|
|
|
# 确保 buildx 构建器存在
|
|
if ! docker buildx inspect offline-builder > /dev/null 2>&1; then
|
|
echo "创建 buildx 构建器..."
|
|
docker buildx create --name offline-builder --driver docker-container --use
|
|
else
|
|
docker buildx use offline-builder
|
|
fi
|
|
|
|
mkdir -p "$BUILDKIT_CACHE_DIR" "$IMAGES_DIR" "$RESOURCES_DIR" "$APT_CACHE_DIR"
|
|
|
|
echo "======================================"
|
|
echo "1. 导出基础镜像"
|
|
echo "======================================"
|
|
|
|
BASE_IMAGES=(
|
|
"maven:3-eclipse-temurin-21"
|
|
"maven:3-eclipse-temurin-8"
|
|
"eclipse-temurin:21-jdk"
|
|
"mysql:8"
|
|
"node:20-alpine"
|
|
"nginx:1.29"
|
|
"ghcr.nju.edu.cn/astral-sh/uv:python3.11-bookworm"
|
|
"ghcr.nju.edu.cn/astral-sh/uv:python3.12-bookworm"
|
|
"ghcr.nju.edu.cn/astral-sh/uv:latest"
|
|
"python:3.12-slim"
|
|
"python:3.11-slim"
|
|
"gcr.nju.edu.cn/distroless/nodejs20-debian12"
|
|
)
|
|
|
|
for img in "${BASE_IMAGES[@]}"; do
|
|
echo "拉取: $img"
|
|
docker pull "$img" || echo "警告: $img 拉取失败,可能已存在"
|
|
done
|
|
|
|
echo ""
|
|
echo "保存基础镜像到 $IMAGES_DIR/base-images.tar..."
|
|
docker save -o "$IMAGES_DIR/base-images.tar" "${BASE_IMAGES[@]}"
|
|
echo "✓ 基础镜像保存完成"
|
|
|
|
echo ""
|
|
echo "======================================"
|
|
echo "2. 导出 BuildKit 构建缓存"
|
|
echo "======================================"
|
|
|
|
# 定义服务配置
|
|
SERVICES=(
|
|
"backend:datamate-backend:scripts/images/backend/Dockerfile"
|
|
"backend-python:datamate-backend-python:scripts/images/backend-python/Dockerfile"
|
|
"database:datamate-database:scripts/images/database/Dockerfile"
|
|
"frontend:datamate-frontend:scripts/images/frontend/Dockerfile"
|
|
"gateway:datamate-gateway:scripts/images/gateway/Dockerfile"
|
|
"runtime:datamate-runtime:scripts/images/runtime/Dockerfile"
|
|
"deer-flow-backend:deer-flow-backend:scripts/images/deer-flow-backend/Dockerfile"
|
|
"deer-flow-frontend:deer-flow-frontend:scripts/images/deer-flow-frontend/Dockerfile"
|
|
"mineru:datamate-mineru:scripts/images/mineru/Dockerfile"
|
|
)
|
|
|
|
for service_config in "${SERVICES[@]}"; do
|
|
IFS=':' read -r service_name image_name dockerfile <<< "$service_config"
|
|
cache_file="$BUILDKIT_CACHE_DIR/$service_name-cache"
|
|
|
|
echo ""
|
|
echo "导出 [$service_name] 缓存到 $cache_file..."
|
|
|
|
# 先正常构建以填充缓存
|
|
docker buildx build \
|
|
--cache-to "type=local,dest=$cache_file,mode=max" \
|
|
-f "$dockerfile" \
|
|
-t "$image_name:cache" \
|
|
. || echo "警告: $service_name 缓存导出失败"
|
|
|
|
echo "✓ $service_name 缓存导出完成"
|
|
done
|
|
|
|
echo ""
|
|
echo "======================================"
|
|
echo "3. 预下载外部资源"
|
|
echo "======================================"
|
|
|
|
# PaddleOCR 模型
|
|
mkdir -p "$RESOURCES_DIR/models"
|
|
if [ ! -f "$RESOURCES_DIR/models/ch_ppocr_mobile_v2.0_cls_infer.tar" ]; then
|
|
echo "下载 PaddleOCR 模型..."
|
|
wget -O "$RESOURCES_DIR/models/ch_ppocr_mobile_v2.0_cls_infer.tar" \
|
|
"https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar" || true
|
|
fi
|
|
|
|
# spaCy 模型
|
|
if [ ! -f "$RESOURCES_DIR/models/zh_core_web_sm-3.8.0-py3-none-any.whl" ]; then
|
|
echo "下载 spaCy 模型..."
|
|
wget -O "$RESOURCES_DIR/models/zh_core_web_sm-3.8.0-py3-none-any.whl" \
|
|
"https://ghproxy.net/https://github.com/explosion/spacy-models/releases/download/zh_core_web_sm-3.8.0/zh_core_web_sm-3.8.0-py3-none-any.whl" || true
|
|
fi
|
|
|
|
# DataX 源码
|
|
if [ ! -d "$RESOURCES_DIR/DataX" ]; then
|
|
echo "克隆 DataX 源码..."
|
|
git clone --depth 1 "https://gitee.com/alibaba/DataX.git" "$RESOURCES_DIR/DataX" || true
|
|
fi
|
|
|
|
# deer-flow 源码(用于 deer-flow 构建)
|
|
if [ ! -d "$RESOURCES_DIR/deer-flow" ]; then
|
|
echo "克隆 deer-flow 源码..."
|
|
git clone --depth 1 "https://ghproxy.net/https://github.com/ModelEngine-Group/deer-flow.git" "$RESOURCES_DIR/deer-flow" || true
|
|
fi
|
|
|
|
echo ""
|
|
echo "======================================"
|
|
echo "4. 导出 APT 缓存"
|
|
echo "======================================"
|
|
|
|
# 为需要 apt 的镜像预生成 apt 缓存
|
|
echo "生成 APT list 缓存..."
|
|
|
|
# eclipse-temurin:21-jdk 的 apt 缓存
|
|
docker run --rm \
|
|
-v "$APT_CACHE_DIR/eclipse-temurin:/var/cache/apt/archives" \
|
|
-v "$APT_CACHE_DIR/eclipse-temurin-lists:/var/lib/apt/lists" \
|
|
eclipse-temurin:21-jdk \
|
|
bash -c "apt-get update && apt-get install -y --download-only vim wget curl rsync python3 python3-pip python-is-python3 dos2unix libreoffice fonts-noto-cjk 2>/dev/null || true" 2>/dev/null || echo " Warning: eclipse-temurin apt 缓存导出失败"
|
|
|
|
# python:3.12-slim 的 apt 缓存
|
|
docker run --rm \
|
|
-v "$APT_CACHE_DIR/python312:/var/cache/apt/archives" \
|
|
-v "$APT_CACHE_DIR/python312-lists:/var/lib/apt/lists" \
|
|
python:3.12-slim \
|
|
bash -c "apt-get update && apt-get install -y --download-only vim openjdk-21-jre nfs-common glusterfs-client rsync 2>/dev/null || true" 2>/dev/null || echo " Warning: python3.12 apt 缓存导出失败"
|
|
|
|
# python:3.11-slim 的 apt 缓存
|
|
docker run --rm \
|
|
-v "$APT_CACHE_DIR/python311:/var/cache/apt/archives" \
|
|
-v "$APT_CACHE_DIR/python311-lists:/var/lib/apt/lists" \
|
|
python:3.11-slim \
|
|
bash -c "apt-get update && apt-get install -y --download-only curl vim libgl1 libglx0 libopengl0 libglib2.0-0 procps 2>/dev/null || true" 2>/dev/null || echo " Warning: python3.11 apt 缓存导出失败"
|
|
|
|
echo "✓ APT 缓存导出完成"
|
|
|
|
echo ""
|
|
echo "======================================"
|
|
echo "5. 打包缓存"
|
|
echo "======================================"
|
|
|
|
cd "$OUTPUT_DIR"
|
|
tar -czf "build-cache-$(date +%Y%m%d).tar.gz" buildkit images resources apt-cache
|
|
cd - > /dev/null
|
|
|
|
echo ""
|
|
echo "======================================"
|
|
echo "✓ 缓存导出完成!"
|
|
echo "======================================"
|
|
echo "缓存位置: $OUTPUT_DIR"
|
|
echo "传输文件: $OUTPUT_DIR/build-cache-$(date +%Y%m%d).tar.gz"
|
|
echo ""
|
|
echo "包含内容:"
|
|
echo " - 基础镜像 (images/)"
|
|
echo " - BuildKit 缓存 (buildkit/)"
|
|
echo " - 外部资源 (resources/)"
|
|
echo " - APT 缓存 (apt-cache/)"
|
|
echo ""
|
|
echo "请将此压缩包传输到无网环境后解压使用"
|