You've already forked DataMate
feat(build): 添加离线构建支持
- 新增 build-offline.sh 脚本实现无网环境构建 - 添加离线版 Dockerfiles 使用本地资源替代网络下载 - 创建 export-cache.sh 脚本在有网环境预下载依赖 - 集成 Makefile.offline.mk 提供便捷的离线构建命令 - 添加详细的离线构建文档和故障排查指南 - 实现基础镜像、BuildKit 缓存和外部资源的一键打包
This commit is contained in:
93
scripts/offline/Dockerfile.backend-python.offline
Normal file
93
scripts/offline/Dockerfile.backend-python.offline
Normal file
@@ -0,0 +1,93 @@
|
||||
# backend-python Dockerfile 离线版本
|
||||
# 修改点: 使用本地 DataX 源码替代 git clone
|
||||
|
||||
FROM maven:3-eclipse-temurin-8 AS datax-builder
|
||||
|
||||
# 配置 Maven 阿里云镜像
|
||||
RUN mkdir -p /root/.m2 && \
|
||||
echo '<?xml version="1.0" encoding="UTF-8"?>\n\
|
||||
<settings xmlns="http://maven.apache.org/SETTINGS/1.0.0"\n\
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"\n\
|
||||
xsi:schemaLocation="http://maven.apache.org/SETTINGS/1.0.0 http://maven.apache.org/xsd/settings-1.0.0.xsd">\n\
|
||||
<mirrors>\n\
|
||||
<mirror>\n\
|
||||
<id>aliyunmaven</id>\n\
|
||||
<mirrorOf>*</mirrorOf>\n\
|
||||
<name>阿里云公共仓库</name>\n\
|
||||
<url>https://maven.aliyun.com/repository/public</url>\n\
|
||||
</mirror>\n\
|
||||
</mirrors>\n\
|
||||
</settings>' > /root/.m2/settings.xml
|
||||
|
||||
# 离线模式: 从构建参数获取本地 DataX 路径
|
||||
ARG DATAX_LOCAL_PATH=./build-cache/resources/DataX
|
||||
|
||||
# 复制本地 DataX 源码(离线环境预先下载)
|
||||
COPY ${DATAX_LOCAL_PATH} /DataX
|
||||
|
||||
COPY runtime/datax/ DataX/
|
||||
|
||||
RUN cd DataX && \
|
||||
sed -i "s/com.mysql.jdbc.Driver/com.mysql.cj.jdbc.Driver/g" \
|
||||
plugin-rdbms-util/src/main/java/com/alibaba/datax/plugin/rdbms/util/DataBaseType.java && \
|
||||
mvn -U clean package assembly:assembly -Dmaven.test.skip=true
|
||||
|
||||
FROM python:3.12-slim
|
||||
|
||||
# 配置 apt 阿里云镜像源
|
||||
RUN if [ -f /etc/apt/sources.list.d/debian.sources ]; then \
|
||||
sed -i 's/deb.debian.org/mirrors.aliyun.com/g' /etc/apt/sources.list.d/debian.sources; \
|
||||
elif [ -f /etc/apt/sources.list ]; then \
|
||||
sed -i 's/deb.debian.org/mirrors.aliyun.com/g' /etc/apt/sources.list; \
|
||||
fi && \
|
||||
apt-get update && \
|
||||
apt-get install -y --no-install-recommends vim openjdk-21-jre nfs-common glusterfs-client rsync && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
ENV PYTHONDONTWRITEBYTECODE=1 \
|
||||
PYTHONUNBUFFERED=1 \
|
||||
POETRY_VERSION=2.2.1 \
|
||||
POETRY_NO_INTERACTION=1 \
|
||||
POETRY_VIRTUALENVS_CREATE=false \
|
||||
POETRY_CACHE_DIR=/tmp/poetry_cache
|
||||
|
||||
ENV JAVA_HOME=/usr/lib/jvm/java-21-openjdk
|
||||
|
||||
ENV PATH="/root/.local/bin:$JAVA_HOME/bin:$PATH"
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# 配置 pip 阿里云镜像并安装 Poetry
|
||||
RUN --mount=type=cache,target=/root/.cache/pip \
|
||||
pip config set global.index-url https://mirrors.aliyun.com/pypi/simple/ && \
|
||||
pip config set global.trusted-host mirrors.aliyun.com && \
|
||||
pip install --upgrade --root-user-action=ignore pip \
|
||||
&& pip install --root-user-action=ignore pipx \
|
||||
&& pipx install "poetry==$POETRY_VERSION"
|
||||
|
||||
COPY --from=datax-builder /DataX/target/datax/datax /opt/datax
|
||||
RUN cp /opt/datax/plugin/reader/mysqlreader/libs/mysql* /opt/datax/plugin/reader/starrocksreader/libs/
|
||||
|
||||
# Copy only dependency files first
|
||||
COPY runtime/datamate-python/pyproject.toml runtime/datamate-python/poetry.lock* /app/
|
||||
|
||||
# Install dependencies
|
||||
RUN --mount=type=cache,target=$POETRY_CACHE_DIR \
|
||||
poetry install --no-root --only main
|
||||
|
||||
# 离线模式: 使用本地 NLTK 数据
|
||||
ARG NLTK_DATA_LOCAL_PATH=./build-cache/resources/nltk_data
|
||||
COPY ${NLTK_DATA_LOCAL_PATH} /usr/local/nltk_data
|
||||
|
||||
ENV NLTK_DATA=/usr/local/nltk_data
|
||||
|
||||
# Copy the rest of the application
|
||||
COPY runtime/datamate-python /app
|
||||
|
||||
COPY runtime/datamate-python/deploy/docker-entrypoint.sh /docker-entrypoint.sh
|
||||
RUN chmod +x /docker-entrypoint.sh || true
|
||||
|
||||
# Expose the application port
|
||||
EXPOSE 18000
|
||||
|
||||
ENTRYPOINT ["/docker-entrypoint.sh"]
|
||||
44
scripts/offline/Dockerfile.deer-flow-backend.offline
Normal file
44
scripts/offline/Dockerfile.deer-flow-backend.offline
Normal file
@@ -0,0 +1,44 @@
|
||||
# deer-flow-backend Dockerfile 离线版本
|
||||
# 修改点: 使用本地 deer-flow 源码替代 git clone
|
||||
|
||||
FROM ghcr.nju.edu.cn/astral-sh/uv:python3.12-bookworm
|
||||
|
||||
# Install uv.
|
||||
COPY --from=ghcr.nju.edu.cn/astral-sh/uv:latest /uv /bin/uv
|
||||
|
||||
# 配置 apt 阿里云镜像源并安装系统依赖
|
||||
RUN if [ -f /etc/apt/sources.list.d/debian.sources ]; then \
|
||||
sed -i 's/deb.debian.org/mirrors.aliyun.com/g' /etc/apt/sources.list.d/debian.sources; \
|
||||
elif [ -f /etc/apt/sources.list ]; then \
|
||||
sed -i 's/deb.debian.org/mirrors.aliyun.com/g' /etc/apt/sources.list; \
|
||||
fi && \
|
||||
apt-get update && apt-get install -y \
|
||||
libpq-dev git \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# 配置 uv 使用阿里云 PyPI 镜像
|
||||
ENV UV_INDEX_URL="https://mirrors.aliyun.com/pypi/simple/"
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# 离线模式: 本地 deer-flow 路径
|
||||
ARG RESOURCES_DIR=./build-cache/resources
|
||||
ARG DEERFLOW_DIR=${RESOURCES_DIR}/deer-flow
|
||||
|
||||
# 复制本地 deer-flow 源码(离线环境预先下载)
|
||||
COPY ${DEERFLOW_DIR} /app
|
||||
COPY runtime/deer-flow/.env /app/.env
|
||||
COPY runtime/deer-flow/conf.yaml /app/conf.yaml
|
||||
|
||||
# Pre-cache the application dependencies.
|
||||
RUN --mount=type=cache,target=/root/.cache/uv \
|
||||
uv sync --locked --no-install-project
|
||||
|
||||
# Install the application dependencies.
|
||||
RUN --mount=type=cache,target=/root/.cache/uv \
|
||||
uv sync --locked
|
||||
|
||||
EXPOSE 8000
|
||||
|
||||
# Run the application.
|
||||
CMD ["uv", "run", "--no-sync", "python", "server.py", "--host", "0.0.0.0", "--port", "8000"]
|
||||
75
scripts/offline/Dockerfile.deer-flow-frontend.offline
Normal file
75
scripts/offline/Dockerfile.deer-flow-frontend.offline
Normal file
@@ -0,0 +1,75 @@
|
||||
# deer-flow-frontend Dockerfile 离线版本
|
||||
# 修改点: 使用本地 deer-flow 源码替代 git clone
|
||||
|
||||
##### DEPENDENCIES
|
||||
|
||||
FROM node:20-alpine AS deps
|
||||
RUN apk add --no-cache libc6-compat openssl
|
||||
WORKDIR /app
|
||||
|
||||
# 离线模式: 本地 deer-flow 路径
|
||||
ARG RESOURCES_DIR=./build-cache/resources
|
||||
ARG DEERFLOW_DIR=${RESOURCES_DIR}/deer-flow
|
||||
|
||||
# 复制本地 deer-flow 源码
|
||||
COPY ${DEERFLOW_DIR}/web /app
|
||||
|
||||
# 配置 npm 淘宝镜像并安装依赖
|
||||
RUN npm config set registry https://registry.npmmirror.com && \
|
||||
if [ -f yarn.lock ]; then yarn config set registry https://registry.npmmirror.com && yarn --frozen-lockfile; \
|
||||
elif [ -f package-lock.json ]; then npm ci; \
|
||||
elif [ -f pnpm-lock.yaml ]; then npm install -g pnpm && pnpm config set registry https://registry.npmmirror.com && pnpm i; \
|
||||
else echo "Lockfile not found." && exit 1; \
|
||||
fi
|
||||
|
||||
##### BUILDER
|
||||
|
||||
FROM node:20-alpine AS builder
|
||||
|
||||
RUN apk add --no-cache git
|
||||
|
||||
WORKDIR /app
|
||||
ARG NEXT_PUBLIC_API_URL="/deer-flow-backend"
|
||||
|
||||
# 离线模式: 复制本地源码
|
||||
ARG RESOURCES_DIR=./build-cache/resources
|
||||
ARG DEERFLOW_DIR=${RESOURCES_DIR}/deer-flow
|
||||
|
||||
COPY ${DEERFLOW_DIR} /deer-flow
|
||||
|
||||
RUN cd /deer-flow \
|
||||
&& mv /deer-flow/web/* /app \
|
||||
&& rm -rf /deer-flow
|
||||
|
||||
COPY --from=deps /app/node_modules ./node_modules
|
||||
|
||||
ENV NEXT_TELEMETRY_DISABLED=1
|
||||
|
||||
# 配置 npm 淘宝镜像
|
||||
RUN npm config set registry https://registry.npmmirror.com && \
|
||||
if [ -f yarn.lock ]; then yarn config set registry https://registry.npmmirror.com && SKIP_ENV_VALIDATION=1 yarn build; \
|
||||
elif [ -f package-lock.json ]; then SKIP_ENV_VALIDATION=1 npm run build; \
|
||||
elif [ -f pnpm-lock.yaml ]; then npm install -g pnpm && pnpm config set registry https://registry.npmmirror.com && SKIP_ENV_VALIDATION=1 pnpm run build; \
|
||||
else echo "Lockfile not found." && exit 1; \
|
||||
fi
|
||||
|
||||
##### RUNNER
|
||||
|
||||
FROM gcr.io/distroless/nodejs20-debian12 AS runner
|
||||
WORKDIR /app
|
||||
|
||||
ENV NODE_ENV=production
|
||||
|
||||
ENV NEXT_TELEMETRY_DISABLED=1
|
||||
|
||||
COPY --from=builder /app/next.config.js ./
|
||||
COPY --from=builder /app/public ./public
|
||||
COPY --from=builder /app/package.json ./package.json
|
||||
|
||||
COPY --from=builder /app/.next/standalone ./
|
||||
COPY --from=builder /app/.next/static ./.next/static
|
||||
|
||||
EXPOSE 3000
|
||||
ENV PORT=3000
|
||||
|
||||
CMD ["server.js"]
|
||||
54
scripts/offline/Dockerfile.runtime.offline
Normal file
54
scripts/offline/Dockerfile.runtime.offline
Normal file
@@ -0,0 +1,54 @@
|
||||
# runtime Dockerfile 离线版本
|
||||
# 修改点: 使用本地模型文件替代 wget 下载
|
||||
|
||||
FROM ghcr.nju.edu.cn/astral-sh/uv:python3.11-bookworm
|
||||
|
||||
# 配置 apt 阿里云镜像源
|
||||
RUN --mount=type=cache,target=/var/cache/apt \
|
||||
--mount=type=cache,target=/var/lib/apt \
|
||||
if [ -f /etc/apt/sources.list.d/debian.sources ]; then \
|
||||
sed -i 's/deb.debian.org/mirrors.aliyun.com/g' /etc/apt/sources.list.d/debian.sources; \
|
||||
elif [ -f /etc/apt/sources.list ]; then \
|
||||
sed -i 's/deb.debian.org/mirrors.aliyun.com/g' /etc/apt/sources.list; \
|
||||
fi \
|
||||
&& apt update \
|
||||
&& apt install -y libgl1 libglib2.0-0 vim libmagic1 libreoffice dos2unix swig poppler-utils tesseract-ocr
|
||||
|
||||
# 离线模式: 本地模型文件路径
|
||||
ARG RESOURCES_DIR=./build-cache/resources
|
||||
ARG MODELS_DIR=${RESOURCES_DIR}/models
|
||||
|
||||
# 复制本地 PaddleOCR 模型(离线环境预先下载)
|
||||
RUN mkdir -p /home/models
|
||||
COPY ${MODELS_DIR}/ch_ppocr_mobile_v2.0_cls_infer.tar /home/models/
|
||||
RUN tar -xf /home/models/ch_ppocr_mobile_v2.0_cls_infer.tar -C /home/models
|
||||
|
||||
COPY runtime/python-executor /opt/runtime
|
||||
COPY runtime/ops /opt/runtime/datamate/ops
|
||||
COPY runtime/ops/user /opt/runtime/user
|
||||
COPY scripts/images/runtime/start.sh /opt/runtime/start.sh
|
||||
|
||||
ENV PYTHONPATH=/opt/runtime/datamate/
|
||||
ENV UV_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cpu"
|
||||
ENV UV_INDEX_STRATEGY=unsafe-best-match
|
||||
# 配置 uv 使用阿里云 PyPI 镜像
|
||||
ENV UV_INDEX_URL="https://mirrors.aliyun.com/pypi/simple/"
|
||||
|
||||
WORKDIR /opt/runtime
|
||||
|
||||
# 复制本地 spaCy 模型(离线环境预先下载)
|
||||
COPY ${MODELS_DIR}/zh_core_web_sm-3.8.0-py3-none-any.whl /tmp/
|
||||
|
||||
RUN --mount=type=cache,target=/root/.cache/uv \
|
||||
uv pip install -e .[all] --system \
|
||||
&& uv pip install -r /opt/runtime/datamate/ops/pyproject.toml --system \
|
||||
&& uv pip install /tmp/zh_core_web_sm-3.8.0-py3-none-any.whl --system \
|
||||
&& echo "/usr/local/lib/ops/site-packages" > /usr/local/lib/python3.11/site-packages/ops.pth
|
||||
|
||||
RUN ln -sf /usr/share/zoneinfo/Asia/Shanghai /etc/localtime \
|
||||
&& chmod +x /opt/runtime/start.sh \
|
||||
&& dos2unix /opt/runtime/start.sh
|
||||
|
||||
EXPOSE 8081
|
||||
|
||||
ENTRYPOINT ["/opt/runtime/start.sh"]
|
||||
76
scripts/offline/Makefile.offline
Normal file
76
scripts/offline/Makefile.offline
Normal file
@@ -0,0 +1,76 @@
|
||||
# Makefile 离线构建扩展
|
||||
# 将此内容追加到主 Makefile 或单独使用
|
||||
# 使用方法: make -f Makefile.offline <target>
|
||||
|
||||
# 离线构建配置
|
||||
CACHE_DIR ?= ./build-cache
|
||||
VERSION ?= latest
|
||||
|
||||
# ========== 离线构建目标 ==========
|
||||
|
||||
.PHONY: offline-export
|
||||
offline-export:
|
||||
@echo "导出离线构建缓存..."
|
||||
@bash scripts/offline/export-cache.sh $(CACHE_DIR)
|
||||
|
||||
.PHONY: offline-build
|
||||
offline-build:
|
||||
@echo "使用缓存进行离线构建..."
|
||||
@bash scripts/offline/build-offline.sh $(CACHE_DIR) $(VERSION)
|
||||
|
||||
.PHONY: offline-setup
|
||||
offline-setup:
|
||||
@echo "解压并设置离线缓存..."
|
||||
@if [ ! -d "$(CACHE_DIR)" ]; then \
|
||||
echo "查找缓存压缩包..."; \
|
||||
cache_file=$$(ls -t build-cache-*.tar.gz 2>/dev/null | head -1); \
|
||||
if [ -z "$$cache_file" ]; then \
|
||||
echo "错误: 未找到缓存压缩包 (build-cache-*.tar.gz)"; \
|
||||
exit 1; \
|
||||
fi; \
|
||||
echo "解压 $$cache_file..."; \
|
||||
tar -xzf "$$cache_file"; \
|
||||
fi
|
||||
@echo "✓ 离线缓存准备完成"
|
||||
|
||||
# 单个服务的离线构建
|
||||
.PHONY: %-offline-build
|
||||
%-offline-build:
|
||||
@echo "离线构建 $*..."
|
||||
@$(eval CACHE_FILE := $(CACHE_DIR)/buildkit/$*-cache)
|
||||
@$(eval IMAGE_NAME := $(if $(filter deer-flow%,$*),$*,datamate-$*))
|
||||
@if [ ! -d "$(CACHE_FILE)" ]; then \
|
||||
echo "错误: $* 的缓存不存在于 $(CACHE_FILE)"; \
|
||||
exit 1; \
|
||||
fi
|
||||
@docker buildx build \
|
||||
--cache-from type=local,src=$(CACHE_FILE) \
|
||||
--network=none \
|
||||
-f scripts/images/$*/Dockerfile \
|
||||
-t $(IMAGE_NAME):$(VERSION) \
|
||||
--load \
|
||||
. || echo "警告: $* 离线构建失败"
|
||||
|
||||
# 兼容原 Makefile 的构建目标(离线模式)
|
||||
.PHONY: build-offline
|
||||
build-offline: offline-setup
|
||||
@$(MAKE) offline-build
|
||||
|
||||
.PHONY: help-offline
|
||||
help-offline:
|
||||
@echo "离线构建命令:"
|
||||
@echo " make offline-export - 在有网环境导出构建缓存"
|
||||
@echo " make offline-setup - 解压并准备离线缓存"
|
||||
@echo " make offline-build - 在无网环境使用缓存构建"
|
||||
@echo " make <service>-offline-build - 离线构建单个服务"
|
||||
@echo ""
|
||||
@echo "示例:"
|
||||
@echo " # 有网环境导出缓存"
|
||||
@echo " make offline-export"
|
||||
@echo ""
|
||||
@echo " # 传输 build-cache-*.tar.gz 到无网环境"
|
||||
@echo " scp build-cache-20250202.tar.gz user@offline-server:/path/"
|
||||
@echo ""
|
||||
@echo " # 无网环境构建"
|
||||
@echo " make offline-setup"
|
||||
@echo " make offline-build"
|
||||
245
scripts/offline/README.md
Normal file
245
scripts/offline/README.md
Normal file
@@ -0,0 +1,245 @@
|
||||
# BuildKit 离线构建方案
|
||||
|
||||
本方案使用 Docker BuildKit 的缓存机制,实现在弱网/无网环境下的镜像构建。
|
||||
|
||||
## 方案概述
|
||||
|
||||
```
|
||||
┌─────────────────────────────────────────────────────────────────┐
|
||||
│ 有网环境 (Build Machine) │
|
||||
│ ┌─────────────┐ ┌─────────────┐ ┌─────────────────────┐ │
|
||||
│ │ 基础镜像 │ │ BuildKit │ │ 外部资源 │ │
|
||||
│ │ docker pull │ + │ 缓存导出 │ + │ (模型/源码) │ │
|
||||
│ └─────────────┘ └─────────────┘ └─────────────────────┘ │
|
||||
│ │ │ │ │
|
||||
│ └──────────────────┼──────────────────┘ │
|
||||
│ ▼ │
|
||||
│ ┌──────────────────┐ │
|
||||
│ │ build-cache.tar.gz│ │
|
||||
│ └────────┬─────────┘ │
|
||||
└─────────────────────────────┼───────────────────────────────────┘
|
||||
│ 传输到无网环境
|
||||
▼
|
||||
┌─────────────────────────────────────────────────────────────────┐
|
||||
│ 无网环境 (Offline Machine) │
|
||||
│ ┌──────────────────┐ │
|
||||
│ │ build-cache.tar.gz│ │
|
||||
│ └────────┬─────────┘ │
|
||||
│ ▼ │
|
||||
│ ┌─────────────┐ ┌─────────────┐ ┌─────────────────────┐ │
|
||||
│ │ docker load │ │ BuildKit │ │ 本地资源挂载 │ │
|
||||
│ │ 基础镜像 │ + │ 缓存导入 │ + │ (模型/源码) │ │
|
||||
│ └─────────────┘ └─────────────┘ └─────────────────────┘ │
|
||||
│ │ │ │ │
|
||||
│ └──────────────────┼──────────────────┘ │
|
||||
│ ▼ │
|
||||
│ 构建成功! │
|
||||
└─────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## 快速开始
|
||||
|
||||
### 方法一:使用 Makefile 扩展(推荐)
|
||||
|
||||
#### 1. 合并 Makefile
|
||||
|
||||
将 `Makefile.offline.mk` 追加到主 Makefile:
|
||||
|
||||
```bash
|
||||
# Linux/Mac
|
||||
cat Makefile.offline.mk >> Makefile
|
||||
|
||||
# Windows (PowerShell)
|
||||
Get-Content Makefile.offline.mk | Add-Content Makefile
|
||||
```
|
||||
|
||||
#### 2. 有网环境导出缓存
|
||||
|
||||
```bash
|
||||
# 导出所有缓存(包括基础镜像、BuildKit 缓存、外部资源)
|
||||
make offline-export
|
||||
|
||||
# 或者指定输出目录
|
||||
make offline-export CACHE_DIR=/path/to/cache
|
||||
```
|
||||
|
||||
执行完成后,会生成压缩包:`build-cache-YYYYMMDD.tar.gz`
|
||||
|
||||
#### 3. 传输到无网环境
|
||||
|
||||
```bash
|
||||
# 使用 scp 或其他方式传输
|
||||
scp build-cache-20250202.tar.gz user@offline-server:/opt/datamate/
|
||||
|
||||
# 或者使用 U 盘等物理介质
|
||||
```
|
||||
|
||||
#### 4. 无网环境构建
|
||||
|
||||
```bash
|
||||
# 解压缓存
|
||||
tar -xzf build-cache-20250202.tar.gz
|
||||
|
||||
# 设置环境并构建
|
||||
make offline-setup
|
||||
make offline-build
|
||||
|
||||
# 或者指定版本号
|
||||
make offline-build OFFLINE_VERSION=v1.0.0
|
||||
```
|
||||
|
||||
### 方法二:使用独立脚本
|
||||
|
||||
#### 导出缓存
|
||||
|
||||
```bash
|
||||
cd scripts/offline
|
||||
./export-cache.sh /path/to/output
|
||||
```
|
||||
|
||||
#### 离线构建
|
||||
|
||||
```bash
|
||||
cd scripts/offline
|
||||
./build-offline.sh /path/to/cache [version]
|
||||
```
|
||||
|
||||
## 详细说明
|
||||
|
||||
### 缓存内容
|
||||
|
||||
缓存目录结构:
|
||||
|
||||
```
|
||||
build-cache/
|
||||
├── buildkit/ # BuildKit 缓存
|
||||
│ ├── backend-cache/
|
||||
│ ├── backend-python-cache/
|
||||
│ ├── database-cache/
|
||||
│ ├── frontend-cache/
|
||||
│ ├── gateway-cache/
|
||||
│ ├── runtime-cache/
|
||||
│ ├── deer-flow-backend-cache/
|
||||
│ ├── deer-flow-frontend-cache/
|
||||
│ └── mineru-cache/
|
||||
├── images/
|
||||
│ └── base-images.tar # 基础镜像集合
|
||||
└── resources/ # 外部资源
|
||||
├── models/
|
||||
│ ├── ch_ppocr_mobile_v2.0_cls_infer.tar # PaddleOCR 模型
|
||||
│ └── zh_core_web_sm-3.8.0-py3-none-any.whl # spaCy 模型
|
||||
├── DataX/ # DataX 源码
|
||||
└── deer-flow/ # deer-flow 源码
|
||||
```
|
||||
|
||||
### 单个服务构建
|
||||
|
||||
```bash
|
||||
# 仅构建 backend
|
||||
make backend-offline-build
|
||||
|
||||
# 仅构建 runtime
|
||||
make runtime-offline-build
|
||||
|
||||
# 仅构建 deer-flow-backend
|
||||
make deer-flow-backend-offline-build
|
||||
```
|
||||
|
||||
### 增量更新
|
||||
|
||||
如果只有部分服务代码变更,可以只导出该服务的缓存:
|
||||
|
||||
```bash
|
||||
# 重新导出 backend 缓存
|
||||
docker buildx build \
|
||||
--cache-to type=local,dest=./build-cache/buildkit/backend-cache,mode=max \
|
||||
-f scripts/images/backend/Dockerfile \
|
||||
-t datamate-backend:cache .
|
||||
|
||||
# 传输并重新构建
|
||||
tar -czf build-cache-partial.tar.gz build-cache/buildkit/backend-cache
|
||||
# ... 传输到无网环境 ...
|
||||
make backend-offline-build
|
||||
```
|
||||
|
||||
## 故障排查
|
||||
|
||||
### 问题 1: 缓存导入失败
|
||||
|
||||
```
|
||||
ERROR: failed to solve: failed to read cache metadata
|
||||
```
|
||||
|
||||
**解决**: 缓存目录可能损坏,重新在有网环境导出。
|
||||
|
||||
### 问题 2: 基础镜像不存在
|
||||
|
||||
```
|
||||
ERROR: pull access denied
|
||||
```
|
||||
|
||||
**解决**: 先执行 `make offline-setup` 加载基础镜像。
|
||||
|
||||
### 问题 3: 网络连接错误(无网环境)
|
||||
|
||||
```
|
||||
ERROR: failed to do request: dial tcp: lookup ...
|
||||
```
|
||||
|
||||
**解决**: 检查 Dockerfile 中是否还有网络依赖,可能需要修改 Dockerfile 使用本地资源。
|
||||
|
||||
### 问题 4: 内存不足
|
||||
|
||||
BuildKit 缓存可能占用大量内存,可以设置资源限制:
|
||||
|
||||
```bash
|
||||
# 创建带资源限制的 buildx 构建器
|
||||
docker buildx create --name offline-builder \
|
||||
--driver docker-container \
|
||||
--driver-opt memory=8g \
|
||||
--use
|
||||
```
|
||||
|
||||
## 限制说明
|
||||
|
||||
1. **镜像版本**: 基础镜像版本必须与缓存导出时一致
|
||||
2. **Dockerfile 变更**: 如果 Dockerfile 发生较大变更,可能需要重新导出缓存
|
||||
3. **资源文件**: mineru 镜像中的模型下载(`mineru-models-download`)仍需要网络,如果需要在完全无网环境使用,需要预先将模型文件挂载到镜像中
|
||||
|
||||
## 高级用法
|
||||
|
||||
### 自定义缓存位置
|
||||
|
||||
```bash
|
||||
make offline-export CACHE_DIR=/mnt/nas/build-cache
|
||||
make offline-build CACHE_DIR=/mnt/nas/build-cache
|
||||
```
|
||||
|
||||
### 导出特定平台缓存
|
||||
|
||||
```bash
|
||||
# 导出 ARM64 平台的缓存
|
||||
docker buildx build \
|
||||
--platform linux/arm64 \
|
||||
--cache-to type=local,dest=./build-cache/buildkit/backend-cache,mode=max \
|
||||
-f scripts/images/backend/Dockerfile .
|
||||
```
|
||||
|
||||
### 使用远程缓存(有网环境)
|
||||
|
||||
```bash
|
||||
# 导出到 S3/MinIO
|
||||
docker buildx build \
|
||||
--cache-to type=s3,region=us-east-1,bucket=mybucket,name=backend-cache \
|
||||
-f scripts/images/backend/Dockerfile .
|
||||
|
||||
# 从 S3 导入
|
||||
docker buildx build \
|
||||
--cache-from type=s3,region=us-east-1,bucket=mybucket,name=backend-cache \
|
||||
-f scripts/images/backend/Dockerfile .
|
||||
```
|
||||
|
||||
## 参考
|
||||
|
||||
- [Docker BuildKit Documentation](https://docs.docker.com/build/buildkit/)
|
||||
- [Cache Storage Backends](https://docs.docker.com/build/cache/backends/)
|
||||
107
scripts/offline/build-offline.sh
Normal file
107
scripts/offline/build-offline.sh
Normal file
@@ -0,0 +1,107 @@
|
||||
#!/bin/bash
|
||||
# BuildKit 离线构建脚本 - 在无网环境执行
|
||||
# Usage: ./build-offline.sh [cache-dir] [version]
|
||||
|
||||
set -e
|
||||
|
||||
CACHE_DIR="${1:-./build-cache}"
|
||||
VERSION="${2:-latest}"
|
||||
BUILDKIT_CACHE_DIR="$CACHE_DIR/buildkit"
|
||||
IMAGES_DIR="$CACHE_DIR/images"
|
||||
RESOURCES_DIR="$CACHE_DIR/resources"
|
||||
|
||||
# 检查缓存目录
|
||||
if [ ! -d "$CACHE_DIR" ]; then
|
||||
echo "错误: 缓存目录 $CACHE_DIR 不存在"
|
||||
echo "请先解压缓存包: tar -xzf build-cache-*.tar.gz"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# 确保 buildx 构建器存在
|
||||
if ! docker buildx inspect offline-builder > /dev/null 2>&1; then
|
||||
echo "创建 buildx 构建器..."
|
||||
docker buildx create --name offline-builder --driver docker-container --use
|
||||
else
|
||||
docker buildx use offline-builder
|
||||
fi
|
||||
|
||||
echo "======================================"
|
||||
echo "1. 加载基础镜像"
|
||||
echo "======================================"
|
||||
|
||||
if [ -f "$IMAGES_DIR/base-images.tar" ]; then
|
||||
echo "从 $IMAGES_DIR/base-images.tar 加载基础镜像..."
|
||||
docker load -i "$IMAGES_DIR/base-images.tar"
|
||||
echo "✓ 基础镜像加载完成"
|
||||
else
|
||||
echo "警告: 基础镜像文件不存在,假设镜像已存在"
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "======================================"
|
||||
echo "2. 离线构建服务"
|
||||
echo "======================================"
|
||||
|
||||
# 定义服务配置(与 export-cache.sh 保持一致)
|
||||
SERVICES=(
|
||||
"backend:datamate-backend:scripts/images/backend/Dockerfile"
|
||||
"backend-python:datamate-backend-python:scripts/images/backend-python/Dockerfile"
|
||||
"database:datamate-database:scripts/images/database/Dockerfile"
|
||||
"frontend:datamate-frontend:scripts/images/frontend/Dockerfile"
|
||||
"gateway:datamate-gateway:scripts/images/gateway/Dockerfile"
|
||||
"runtime:datamate-runtime:scripts/images/runtime/Dockerfile"
|
||||
"deer-flow-backend:deer-flow-backend:scripts/images/deer-flow-backend/Dockerfile"
|
||||
"deer-flow-frontend:deer-flow-frontend:scripts/images/deer-flow-frontend/Dockerfile"
|
||||
"mineru:datamate-mineru:scripts/images/mineru/Dockerfile"
|
||||
)
|
||||
|
||||
# 检查是否有资源目录需要挂载
|
||||
MOUNT_ARGS=""
|
||||
if [ -d "$RESOURCES_DIR" ]; then
|
||||
echo "检测到资源目录,将用于本地资源挂载"
|
||||
MOUNT_ARGS="--build-arg RESOURCES_DIR=$RESOURCES_DIR"
|
||||
fi
|
||||
|
||||
for service_config in "${SERVICES[@]}"; do
|
||||
IFS=':' read -r service_name image_name dockerfile <<< "$service_config"
|
||||
cache_file="$BUILDKIT_CACHE_DIR/$service_name-cache"
|
||||
|
||||
echo ""
|
||||
echo "--------------------------------------"
|
||||
echo "构建 [$service_name] -> $image_name:$VERSION"
|
||||
echo "--------------------------------------"
|
||||
|
||||
if [ ! -d "$cache_file" ]; then
|
||||
echo "警告: $service_name 的缓存不存在,跳过..."
|
||||
continue
|
||||
fi
|
||||
|
||||
# 使用缓存进行离线构建
|
||||
# --network=none 确保不访问网络
|
||||
docker buildx build \
|
||||
--cache-from "type=local,src=$cache_file" \
|
||||
--network=none \
|
||||
-f "$dockerfile" \
|
||||
-t "$image_name:$VERSION" \
|
||||
--load \
|
||||
. || {
|
||||
echo "错误: $service_name 构建失败"
|
||||
echo "尝试不使用 --network=none 重新构建..."
|
||||
docker buildx build \
|
||||
--cache-from "type=local,src=$cache_file" \
|
||||
-f "$dockerfile" \
|
||||
-t "$image_name:$VERSION" \
|
||||
--load \
|
||||
.
|
||||
}
|
||||
|
||||
echo "✓ $service_name 构建完成"
|
||||
done
|
||||
|
||||
echo ""
|
||||
echo "======================================"
|
||||
echo "✓ 离线构建完成!"
|
||||
echo "======================================"
|
||||
echo ""
|
||||
echo "构建的镜像列表:"
|
||||
docker images | grep -E "(datamate-|deer-flow-)" || true
|
||||
134
scripts/offline/export-cache.sh
Normal file
134
scripts/offline/export-cache.sh
Normal file
@@ -0,0 +1,134 @@
|
||||
#!/bin/bash
|
||||
# BuildKit 缓存导出脚本 - 在有网环境执行
|
||||
# Usage: ./export-cache.sh [output-dir]
|
||||
|
||||
set -e
|
||||
|
||||
OUTPUT_DIR="${1:-./build-cache}"
|
||||
BUILDKIT_CACHE_DIR="$OUTPUT_DIR/buildkit"
|
||||
IMAGES_DIR="$OUTPUT_DIR/images"
|
||||
RESOURCES_DIR="$OUTPUT_DIR/resources"
|
||||
|
||||
# 确保 buildx 构建器存在
|
||||
if ! docker buildx inspect offline-builder > /dev/null 2>&1; then
|
||||
echo "创建 buildx 构建器..."
|
||||
docker buildx create --name offline-builder --driver docker-container --use
|
||||
else
|
||||
docker buildx use offline-builder
|
||||
fi
|
||||
|
||||
mkdir -p "$BUILDKIT_CACHE_DIR" "$IMAGES_DIR" "$RESOURCES_DIR"
|
||||
|
||||
echo "======================================"
|
||||
echo "1. 导出基础镜像"
|
||||
echo "======================================"
|
||||
|
||||
BASE_IMAGES=(
|
||||
"maven:3-eclipse-temurin-21"
|
||||
"maven:3-eclipse-temurin-8"
|
||||
"eclipse-temurin:21-jdk"
|
||||
"mysql:8"
|
||||
"node:20-alpine"
|
||||
"nginx:1.29"
|
||||
"ghcr.nju.edu.cn/astral-sh/uv:python3.11-bookworm"
|
||||
"ghcr.nju.edu.cn/astral-sh/uv:python3.12-bookworm"
|
||||
"ghcr.nju.edu.cn/astral-sh/uv:latest"
|
||||
"python:3.12-slim"
|
||||
"python:3.11-slim"
|
||||
"gcr.io/distroless/nodejs20-debian12"
|
||||
)
|
||||
|
||||
for img in "${BASE_IMAGES[@]}"; do
|
||||
echo "拉取: $img"
|
||||
docker pull "$img" || echo "警告: $img 拉取失败,可能已存在"
|
||||
done
|
||||
|
||||
echo ""
|
||||
echo "保存基础镜像到 $IMAGES_DIR/base-images.tar..."
|
||||
docker save -o "$IMAGES_DIR/base-images.tar" "${BASE_IMAGES[@]}"
|
||||
echo "✓ 基础镜像保存完成"
|
||||
|
||||
echo ""
|
||||
echo "======================================"
|
||||
echo "2. 导出 BuildKit 构建缓存"
|
||||
echo "======================================"
|
||||
|
||||
# 定义服务配置
|
||||
SERVICES=(
|
||||
"backend:datamate-backend:scripts/images/backend/Dockerfile"
|
||||
"backend-python:datamate-backend-python:scripts/images/backend-python/Dockerfile"
|
||||
"database:datamate-database:scripts/images/database/Dockerfile"
|
||||
"frontend:datamate-frontend:scripts/images/frontend/Dockerfile"
|
||||
"gateway:datamate-gateway:scripts/images/gateway/Dockerfile"
|
||||
"runtime:datamate-runtime:scripts/images/runtime/Dockerfile"
|
||||
"deer-flow-backend:deer-flow-backend:scripts/images/deer-flow-backend/Dockerfile"
|
||||
"deer-flow-frontend:deer-flow-frontend:scripts/images/deer-flow-frontend/Dockerfile"
|
||||
"mineru:datamate-mineru:scripts/images/mineru/Dockerfile"
|
||||
)
|
||||
|
||||
for service_config in "${SERVICES[@]}"; do
|
||||
IFS=':' read -r service_name image_name dockerfile <<< "$service_config"
|
||||
cache_file="$BUILDKIT_CACHE_DIR/$service_name-cache"
|
||||
|
||||
echo ""
|
||||
echo "导出 [$service_name] 缓存到 $cache_file..."
|
||||
|
||||
# 先正常构建以填充缓存
|
||||
docker buildx build \
|
||||
--cache-to "type=local,dest=$cache_file,mode=max" \
|
||||
-f "$dockerfile" \
|
||||
-t "$image_name:cache" \
|
||||
. || echo "警告: $service_name 缓存导出失败"
|
||||
|
||||
echo "✓ $service_name 缓存导出完成"
|
||||
done
|
||||
|
||||
echo ""
|
||||
echo "======================================"
|
||||
echo "3. 预下载外部资源"
|
||||
echo "======================================"
|
||||
|
||||
# PaddleOCR 模型
|
||||
mkdir -p "$RESOURCES_DIR/models"
|
||||
if [ ! -f "$RESOURCES_DIR/models/ch_ppocr_mobile_v2.0_cls_infer.tar" ]; then
|
||||
echo "下载 PaddleOCR 模型..."
|
||||
wget -O "$RESOURCES_DIR/models/ch_ppocr_mobile_v2.0_cls_infer.tar" \
|
||||
"https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar" || true
|
||||
fi
|
||||
|
||||
# spaCy 模型
|
||||
if [ ! -f "$RESOURCES_DIR/models/zh_core_web_sm-3.8.0-py3-none-any.whl" ]; then
|
||||
echo "下载 spaCy 模型..."
|
||||
wget -O "$RESOURCES_DIR/models/zh_core_web_sm-3.8.0-py3-none-any.whl" \
|
||||
"https://ghproxy.net/https://github.com/explosion/spacy-models/releases/download/zh_core_web_sm-3.8.0/zh_core_web_sm-3.8.0-py3-none-any.whl" || true
|
||||
fi
|
||||
|
||||
# DataX 源码
|
||||
if [ ! -d "$RESOURCES_DIR/DataX" ]; then
|
||||
echo "克隆 DataX 源码..."
|
||||
git clone --depth 1 "https://gitee.com/alibaba/DataX.git" "$RESOURCES_DIR/DataX" || true
|
||||
fi
|
||||
|
||||
# deer-flow 源码(用于 deer-flow 构建)
|
||||
if [ ! -d "$RESOURCES_DIR/deer-flow" ]; then
|
||||
echo "克隆 deer-flow 源码..."
|
||||
git clone --depth 1 "https://ghproxy.net/https://github.com/ModelEngine-Group/deer-flow.git" "$RESOURCES_DIR/deer-flow" || true
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "======================================"
|
||||
echo "4. 打包缓存"
|
||||
echo "======================================"
|
||||
|
||||
cd "$OUTPUT_DIR"
|
||||
tar -czf "build-cache-$(date +%Y%m%d).tar.gz" buildkit images resources
|
||||
cd - > /dev/null
|
||||
|
||||
echo ""
|
||||
echo "======================================"
|
||||
echo "✓ 缓存导出完成!"
|
||||
echo "======================================"
|
||||
echo "缓存位置: $OUTPUT_DIR"
|
||||
echo "传输文件: $OUTPUT_DIR/build-cache-$(date +%Y%m%d).tar.gz"
|
||||
echo ""
|
||||
echo "请将此压缩包传输到无网环境后解压使用"
|
||||
Reference in New Issue
Block a user