#!/bin/bash # BuildKit 缓存导出脚本 - 在有网环境执行 # Usage: ./export-cache.sh [output-dir] set -e OUTPUT_DIR="${1:-./build-cache}" BUILDKIT_CACHE_DIR="$OUTPUT_DIR/buildkit" IMAGES_DIR="$OUTPUT_DIR/images" RESOURCES_DIR="$OUTPUT_DIR/resources" APT_CACHE_DIR="$OUTPUT_DIR/apt-cache" # 确保 buildx 构建器存在 if ! docker buildx inspect offline-builder > /dev/null 2>&1; then echo "创建 buildx 构建器..." docker buildx create --name offline-builder --driver docker-container --use else docker buildx use offline-builder fi mkdir -p "$BUILDKIT_CACHE_DIR" "$IMAGES_DIR" "$RESOURCES_DIR" "$APT_CACHE_DIR" echo "======================================" echo "1. 导出基础镜像" echo "======================================" BASE_IMAGES=( "maven:3-eclipse-temurin-21" "maven:3-eclipse-temurin-8" "eclipse-temurin:21-jdk" "mysql:8" "node:20-alpine" "nginx:1.29" "ghcr.nju.edu.cn/astral-sh/uv:python3.11-bookworm" "ghcr.nju.edu.cn/astral-sh/uv:python3.12-bookworm" "ghcr.nju.edu.cn/astral-sh/uv:latest" "python:3.12-slim" "python:3.11-slim" "gcr.nju.edu.cn/distroless/nodejs20-debian12" ) for img in "${BASE_IMAGES[@]}"; do echo "拉取: $img" docker pull "$img" || echo "警告: $img 拉取失败,可能已存在" done echo "" echo "保存基础镜像到 $IMAGES_DIR/base-images.tar..." docker save -o "$IMAGES_DIR/base-images.tar" "${BASE_IMAGES[@]}" echo "✓ 基础镜像保存完成" echo "" echo "======================================" echo "2. 导出 BuildKit 构建缓存" echo "======================================" # 定义服务配置 SERVICES=( "backend:datamate-backend:scripts/images/backend/Dockerfile" "backend-python:datamate-backend-python:scripts/images/backend-python/Dockerfile" "database:datamate-database:scripts/images/database/Dockerfile" "frontend:datamate-frontend:scripts/images/frontend/Dockerfile" "gateway:datamate-gateway:scripts/images/gateway/Dockerfile" "runtime:datamate-runtime:scripts/images/runtime/Dockerfile" "deer-flow-backend:deer-flow-backend:scripts/images/deer-flow-backend/Dockerfile" "deer-flow-frontend:deer-flow-frontend:scripts/images/deer-flow-frontend/Dockerfile" "mineru:datamate-mineru:scripts/images/mineru/Dockerfile" ) for service_config in "${SERVICES[@]}"; do IFS=':' read -r service_name image_name dockerfile <<< "$service_config" cache_file="$BUILDKIT_CACHE_DIR/$service_name-cache" echo "" echo "导出 [$service_name] 缓存到 $cache_file..." # 先正常构建以填充缓存 docker buildx build \ --cache-to "type=local,dest=$cache_file,mode=max" \ -f "$dockerfile" \ -t "$image_name:cache" \ . || echo "警告: $service_name 缓存导出失败" echo "✓ $service_name 缓存导出完成" done echo "" echo "======================================" echo "3. 预下载外部资源" echo "======================================" # PaddleOCR 模型 mkdir -p "$RESOURCES_DIR/models" if [ ! -f "$RESOURCES_DIR/models/ch_ppocr_mobile_v2.0_cls_infer.tar" ]; then echo "下载 PaddleOCR 模型..." wget -O "$RESOURCES_DIR/models/ch_ppocr_mobile_v2.0_cls_infer.tar" \ "https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar" || true fi # spaCy 模型 if [ ! -f "$RESOURCES_DIR/models/zh_core_web_sm-3.8.0-py3-none-any.whl" ]; then echo "下载 spaCy 模型..." wget -O "$RESOURCES_DIR/models/zh_core_web_sm-3.8.0-py3-none-any.whl" \ "https://ghproxy.net/https://github.com/explosion/spacy-models/releases/download/zh_core_web_sm-3.8.0/zh_core_web_sm-3.8.0-py3-none-any.whl" || true fi # DataX 源码 if [ ! -d "$RESOURCES_DIR/DataX" ]; then echo "克隆 DataX 源码..." git clone --depth 1 "https://gitee.com/alibaba/DataX.git" "$RESOURCES_DIR/DataX" || true fi # deer-flow 源码(用于 deer-flow 构建) if [ ! -d "$RESOURCES_DIR/deer-flow" ]; then echo "克隆 deer-flow 源码..." git clone --depth 1 "https://ghproxy.net/https://github.com/ModelEngine-Group/deer-flow.git" "$RESOURCES_DIR/deer-flow" || true fi echo "" echo "======================================" echo "4. 导出 APT 缓存" echo "======================================" # 为需要 apt 的镜像预生成 apt 缓存 echo "生成 APT list 缓存..." # eclipse-temurin:21-jdk 的 apt 缓存 docker run --rm \ -v "$APT_CACHE_DIR/eclipse-temurin:/var/cache/apt/archives" \ -v "$APT_CACHE_DIR/eclipse-temurin-lists:/var/lib/apt/lists" \ eclipse-temurin:21-jdk \ bash -c "apt-get update && apt-get install -y --download-only vim wget curl rsync python3 python3-pip python-is-python3 dos2unix libreoffice fonts-noto-cjk 2>/dev/null || true" 2>/dev/null || echo " Warning: eclipse-temurin apt 缓存导出失败" # python:3.12-slim 的 apt 缓存 docker run --rm \ -v "$APT_CACHE_DIR/python312:/var/cache/apt/archives" \ -v "$APT_CACHE_DIR/python312-lists:/var/lib/apt/lists" \ python:3.12-slim \ bash -c "apt-get update && apt-get install -y --download-only vim openjdk-21-jre nfs-common glusterfs-client rsync 2>/dev/null || true" 2>/dev/null || echo " Warning: python3.12 apt 缓存导出失败" # python:3.11-slim 的 apt 缓存 docker run --rm \ -v "$APT_CACHE_DIR/python311:/var/cache/apt/archives" \ -v "$APT_CACHE_DIR/python311-lists:/var/lib/apt/lists" \ python:3.11-slim \ bash -c "apt-get update && apt-get install -y --download-only curl vim libgl1 libglx0 libopengl0 libglib2.0-0 procps 2>/dev/null || true" 2>/dev/null || echo " Warning: python3.11 apt 缓存导出失败" echo "✓ APT 缓存导出完成" echo "" echo "======================================" echo "5. 打包缓存" echo "======================================" cd "$OUTPUT_DIR" tar -czf "build-cache-$(date +%Y%m%d).tar.gz" buildkit images resources apt-cache cd - > /dev/null echo "" echo "======================================" echo "✓ 缓存导出完成!" echo "======================================" echo "缓存位置: $OUTPUT_DIR" echo "传输文件: $OUTPUT_DIR/build-cache-$(date +%Y%m%d).tar.gz" echo "" echo "包含内容:" echo " - 基础镜像 (images/)" echo " - BuildKit 缓存 (buildkit/)" echo " - 外部资源 (resources/)" echo " - APT 缓存 (apt-cache/)" echo "" echo "请将此压缩包传输到无网环境后解压使用"