feat(build): 添加离线构建支持

- 新增 build-offline.sh 脚本实现无网环境构建
- 添加离线版 Dockerfiles 使用本地资源替代网络下载
- 创建 export-cache.sh 脚本在有网环境预下载依赖
- 集成 Makefile.offline.mk 提供便捷的离线构建命令
- 添加详细的离线构建文档和故障排查指南
- 实现基础镜像、BuildKit 缓存和外部资源的一键打包
This commit is contained in:
2026-02-02 21:44:44 +08:00
parent b36fdd2438
commit 9da187d2c6
9 changed files with 1085 additions and 0 deletions

View File

@@ -0,0 +1,134 @@
#!/bin/bash
# BuildKit 缓存导出脚本 - 在有网环境执行
# Usage: ./export-cache.sh [output-dir]
set -e
OUTPUT_DIR="${1:-./build-cache}"
BUILDKIT_CACHE_DIR="$OUTPUT_DIR/buildkit"
IMAGES_DIR="$OUTPUT_DIR/images"
RESOURCES_DIR="$OUTPUT_DIR/resources"
# 确保 buildx 构建器存在
if ! docker buildx inspect offline-builder > /dev/null 2>&1; then
echo "创建 buildx 构建器..."
docker buildx create --name offline-builder --driver docker-container --use
else
docker buildx use offline-builder
fi
mkdir -p "$BUILDKIT_CACHE_DIR" "$IMAGES_DIR" "$RESOURCES_DIR"
echo "======================================"
echo "1. 导出基础镜像"
echo "======================================"
BASE_IMAGES=(
"maven:3-eclipse-temurin-21"
"maven:3-eclipse-temurin-8"
"eclipse-temurin:21-jdk"
"mysql:8"
"node:20-alpine"
"nginx:1.29"
"ghcr.nju.edu.cn/astral-sh/uv:python3.11-bookworm"
"ghcr.nju.edu.cn/astral-sh/uv:python3.12-bookworm"
"ghcr.nju.edu.cn/astral-sh/uv:latest"
"python:3.12-slim"
"python:3.11-slim"
"gcr.io/distroless/nodejs20-debian12"
)
for img in "${BASE_IMAGES[@]}"; do
echo "拉取: $img"
docker pull "$img" || echo "警告: $img 拉取失败,可能已存在"
done
echo ""
echo "保存基础镜像到 $IMAGES_DIR/base-images.tar..."
docker save -o "$IMAGES_DIR/base-images.tar" "${BASE_IMAGES[@]}"
echo "✓ 基础镜像保存完成"
echo ""
echo "======================================"
echo "2. 导出 BuildKit 构建缓存"
echo "======================================"
# 定义服务配置
SERVICES=(
"backend:datamate-backend:scripts/images/backend/Dockerfile"
"backend-python:datamate-backend-python:scripts/images/backend-python/Dockerfile"
"database:datamate-database:scripts/images/database/Dockerfile"
"frontend:datamate-frontend:scripts/images/frontend/Dockerfile"
"gateway:datamate-gateway:scripts/images/gateway/Dockerfile"
"runtime:datamate-runtime:scripts/images/runtime/Dockerfile"
"deer-flow-backend:deer-flow-backend:scripts/images/deer-flow-backend/Dockerfile"
"deer-flow-frontend:deer-flow-frontend:scripts/images/deer-flow-frontend/Dockerfile"
"mineru:datamate-mineru:scripts/images/mineru/Dockerfile"
)
for service_config in "${SERVICES[@]}"; do
IFS=':' read -r service_name image_name dockerfile <<< "$service_config"
cache_file="$BUILDKIT_CACHE_DIR/$service_name-cache"
echo ""
echo "导出 [$service_name] 缓存到 $cache_file..."
# 先正常构建以填充缓存
docker buildx build \
--cache-to "type=local,dest=$cache_file,mode=max" \
-f "$dockerfile" \
-t "$image_name:cache" \
. || echo "警告: $service_name 缓存导出失败"
echo "$service_name 缓存导出完成"
done
echo ""
echo "======================================"
echo "3. 预下载外部资源"
echo "======================================"
# PaddleOCR 模型
mkdir -p "$RESOURCES_DIR/models"
if [ ! -f "$RESOURCES_DIR/models/ch_ppocr_mobile_v2.0_cls_infer.tar" ]; then
echo "下载 PaddleOCR 模型..."
wget -O "$RESOURCES_DIR/models/ch_ppocr_mobile_v2.0_cls_infer.tar" \
"https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar" || true
fi
# spaCy 模型
if [ ! -f "$RESOURCES_DIR/models/zh_core_web_sm-3.8.0-py3-none-any.whl" ]; then
echo "下载 spaCy 模型..."
wget -O "$RESOURCES_DIR/models/zh_core_web_sm-3.8.0-py3-none-any.whl" \
"https://ghproxy.net/https://github.com/explosion/spacy-models/releases/download/zh_core_web_sm-3.8.0/zh_core_web_sm-3.8.0-py3-none-any.whl" || true
fi
# DataX 源码
if [ ! -d "$RESOURCES_DIR/DataX" ]; then
echo "克隆 DataX 源码..."
git clone --depth 1 "https://gitee.com/alibaba/DataX.git" "$RESOURCES_DIR/DataX" || true
fi
# deer-flow 源码(用于 deer-flow 构建)
if [ ! -d "$RESOURCES_DIR/deer-flow" ]; then
echo "克隆 deer-flow 源码..."
git clone --depth 1 "https://ghproxy.net/https://github.com/ModelEngine-Group/deer-flow.git" "$RESOURCES_DIR/deer-flow" || true
fi
echo ""
echo "======================================"
echo "4. 打包缓存"
echo "======================================"
cd "$OUTPUT_DIR"
tar -czf "build-cache-$(date +%Y%m%d).tar.gz" buildkit images resources
cd - > /dev/null
echo ""
echo "======================================"
echo "✓ 缓存导出完成!"
echo "======================================"
echo "缓存位置: $OUTPUT_DIR"
echo "传输文件: $OUTPUT_DIR/build-cache-$(date +%Y%m%d).tar.gz"
echo ""
echo "请将此压缩包传输到无网环境后解压使用"