feat(build): 添加离线构建支持

- 新增 build-offline.sh 脚本实现无网环境构建
- 添加离线版 Dockerfiles 使用本地资源替代网络下载
- 创建 export-cache.sh 脚本在有网环境预下载依赖
- 集成 Makefile.offline.mk 提供便捷的离线构建命令
- 添加详细的离线构建文档和故障排查指南
- 实现基础镜像、BuildKit 缓存和外部资源的一键打包
This commit is contained in:
2026-02-02 21:44:44 +08:00
parent b36fdd2438
commit 9da187d2c6
9 changed files with 1085 additions and 0 deletions

257
Makefile.offline.mk Normal file
View File

@@ -0,0 +1,257 @@
# ============================================================================
# Makefile 离线构建扩展
# 将此文件内容追加到主 Makefile 末尾,或单独包含使用
# ============================================================================
# 离线构建配置
CACHE_DIR ?= ./build-cache
OFFLINE_VERSION ?= latest
# 创建 buildx 构建器(如果不存在)
.PHONY: ensure-buildx
ensure-buildx:
@if ! docker buildx inspect offline-builder > /dev/null 2>&1; then \
echo "创建 buildx 构建器..."; \
docker buildx create --name offline-builder --driver docker-container --use 2>/dev/null || docker buildx use offline-builder; \
else \
docker buildx use offline-builder 2>/dev/null || true; \
fi
# ========== 离线缓存导出(有网环境) ==========
.PHONY: offline-export
offline-export: ensure-buildx
@echo "======================================"
@echo "导出离线构建缓存..."
@echo "======================================"
@mkdir -p $(CACHE_DIR)/buildkit $(CACHE_DIR)/images $(CACHE_DIR)/resources
@$(MAKE) _offline-export-base-images
@$(MAKE) _offline-export-cache
@$(MAKE) _offline-export-resources
@$(MAKE) _offline-package
.PHONY: _offline-export-base-images
_offline-export-base-images:
@echo ""
@echo "1. 导出基础镜像..."
@bash -c 'images=( \
"maven:3-eclipse-temurin-21" \
"maven:3-eclipse-temurin-8" \
"eclipse-temurin:21-jdk" \
"mysql:8" \
"node:20-alpine" \
"nginx:1.29" \
"ghcr.nju.edu.cn/astral-sh/uv:python3.11-bookworm" \
"ghcr.nju.edu.cn/astral-sh/uv:python3.12-bookworm" \
"ghcr.nju.edu.cn/astral-sh/uv:latest" \
"python:3.12-slim" \
"python:3.11-slim" \
"gcr.io/distroless/nodejs20-debian12" \
); for img in "$${images[@]}"; do echo " Pulling $$img..."; docker pull "$$img" 2>/dev/null || true; done'
@echo " Saving base images..."
@docker save -o $(CACHE_DIR)/images/base-images.tar \
maven:3-eclipse-temurin-21 \
maven:3-eclipse-temurin-8 \
eclipse-temurin:21-jdk \
mysql:8 \
node:20-alpine \
nginx:1.29 \
ghcr.nju.edu.cn/astral-sh/uv:python3.11-bookworm \
ghcr.nju.edu.cn/astral-sh/uv:python3.12-bookworm \
ghcr.nju.edu.cn/astral-sh/uv:latest \
python:3.12-slim \
python:3.11-slim \
gcr.io/distroless/nodejs20-debian12 2>/dev/null || echo " Warning: Some images may not exist"
.PHONY: _offline-export-cache
_offline-export-cache:
@echo ""
@echo "2. 导出 BuildKit 缓存..."
@echo " backend..."
@docker buildx build --cache-to type=local,dest=$(CACHE_DIR)/buildkit/backend-cache,mode=max -f scripts/images/backend/Dockerfile -t datamate-backend:cache . 2>/dev/null || echo " Warning: backend cache export failed"
@echo " backend-python..."
@docker buildx build --cache-to type=local,dest=$(CACHE_DIR)/buildkit/backend-python-cache,mode=max -f scripts/images/backend-python/Dockerfile -t datamate-backend-python:cache . 2>/dev/null || echo " Warning: backend-python cache export failed"
@echo " database..."
@docker buildx build --cache-to type=local,dest=$(CACHE_DIR)/buildkit/database-cache,mode=max -f scripts/images/database/Dockerfile -t datamate-database:cache . 2>/dev/null || echo " Warning: database cache export failed"
@echo " frontend..."
@docker buildx build --cache-to type=local,dest=$(CACHE_DIR)/buildkit/frontend-cache,mode=max -f scripts/images/frontend/Dockerfile -t datamate-frontend:cache . 2>/dev/null || echo " Warning: frontend cache export failed"
@echo " gateway..."
@docker buildx build --cache-to type=local,dest=$(CACHE_DIR)/buildkit/gateway-cache,mode=max -f scripts/images/gateway/Dockerfile -t datamate-gateway:cache . 2>/dev/null || echo " Warning: gateway cache export failed"
@echo " runtime..."
@docker buildx build --cache-to type=local,dest=$(CACHE_DIR)/buildkit/runtime-cache,mode=max -f scripts/images/runtime/Dockerfile -t datamate-runtime:cache . 2>/dev/null || echo " Warning: runtime cache export failed"
@echo " deer-flow-backend..."
@docker buildx build --cache-to type=local,dest=$(CACHE_DIR)/buildkit/deer-flow-backend-cache,mode=max -f scripts/images/deer-flow-backend/Dockerfile -t deer-flow-backend:cache . 2>/dev/null || echo " Warning: deer-flow-backend cache export failed"
@echo " deer-flow-frontend..."
@docker buildx build --cache-to type=local,dest=$(CACHE_DIR)/buildkit/deer-flow-frontend-cache,mode=max -f scripts/images/deer-flow-frontend/Dockerfile -t deer-flow-frontend:cache . 2>/dev/null || echo " Warning: deer-flow-frontend cache export failed"
@echo " mineru..."
@docker buildx build --cache-to type=local,dest=$(CACHE_DIR)/buildkit/mineru-cache,mode=max -f scripts/images/mineru/Dockerfile -t datamate-mineru:cache . 2>/dev/null || echo " Warning: mineru cache export failed"
.PHONY: _offline-export-resources
_offline-export-resources:
@echo ""
@echo "3. 预下载外部资源..."
@mkdir -p $(CACHE_DIR)/resources/models
@echo " PaddleOCR model..."
@wget -q -O $(CACHE_DIR)/resources/models/ch_ppocr_mobile_v2.0_cls_infer.tar \
https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar 2>/dev/null || echo " Warning: PaddleOCR model download failed"
@echo " spaCy model..."
@wget -q -O $(CACHE_DIR)/resources/models/zh_core_web_sm-3.8.0-py3-none-any.whl \
https://ghproxy.net/https://github.com/explosion/spacy-models/releases/download/zh_core_web_sm-3.8.0/zh_core_web_sm-3.8.0-py3-none-any.whl 2>/dev/null || echo " Warning: spaCy model download failed"
@echo " DataX source..."
@if [ ! -d "$(CACHE_DIR)/resources/DataX" ]; then \
git clone --depth 1 https://gitee.com/alibaba/DataX.git $(CACHE_DIR)/resources/DataX 2>/dev/null || echo " Warning: DataX clone failed"; \
fi
@echo " deer-flow source..."
@if [ ! -d "$(CACHE_DIR)/resources/deer-flow" ]; then \
git clone --depth 1 https://ghproxy.net/https://github.com/ModelEngine-Group/deer-flow.git $(CACHE_DIR)/resources/deer-flow 2>/dev/null || echo " Warning: deer-flow clone failed"; \
fi
.PHONY: _offline-package
_offline-package:
@echo ""
@echo "4. 打包缓存..."
@cd $(CACHE_DIR) && tar -czf "build-cache-$$(date +%Y%m%d).tar.gz" buildkit images resources 2>/dev/null && cd - > /dev/null
@echo ""
@echo "======================================"
@echo "✓ 缓存导出完成!"
@echo "======================================"
@echo "传输文件: $(CACHE_DIR)/build-cache-$$(date +%Y%m%d).tar.gz"
# ========== 离线构建(无网环境) ==========
.PHONY: offline-setup
offline-setup:
@echo "======================================"
@echo "设置离线构建环境..."
@echo "======================================"
@if [ ! -d "$(CACHE_DIR)" ]; then \
echo "查找并解压缓存包..."; \
cache_file=$$(ls -t build-cache-*.tar.gz 2>/dev/null | head -1); \
if [ -z "$$cache_file" ]; then \
echo "错误: 未找到缓存压缩包 (build-cache-*.tar.gz)"; \
exit 1; \
fi; \
echo "解压 $$cache_file..."; \
tar -xzf "$$cache_file"; \
else \
echo "缓存目录已存在: $(CACHE_DIR)"; \
fi
@echo ""
@echo "加载基础镜像..."
@if [ -f "$(CACHE_DIR)/images/base-images.tar" ]; then \
docker load -i $(CACHE_DIR)/images/base-images.tar; \
else \
echo "警告: 基础镜像文件不存在,假设已手动加载"; \
fi
@$(MAKE) ensure-buildx
@echo ""
@echo "✓ 离线环境准备完成"
.PHONY: offline-build
offline-build: offline-setup
@echo ""
@echo "======================================"
@echo "开始离线构建..."
@echo "======================================"
@$(MAKE) _offline-build-services
.PHONY: _offline-build-services
_offline-build-services: ensure-buildx
@echo ""
@echo "构建 datamate-database..."
@docker buildx build \
--cache-from type=local,src=$(CACHE_DIR)/buildkit/database-cache \
--network=none \
-f scripts/images/database/Dockerfile \
-t datamate-database:$(OFFLINE_VERSION) \
--load . 2>/dev/null || echo " Warning: database build may need network, retrying without --network=none..."
@docker buildx build \
--cache-from type=local,src=$(CACHE_DIR)/buildkit/database-cache \
-f scripts/images/database/Dockerfile \
-t datamate-database:$(OFFLINE_VERSION) \
--load . || echo " Failed"
@echo ""
@echo "构建 datamate-gateway..."
@docker buildx build \
--cache-from type=local,src=$(CACHE_DIR)/buildkit/gateway-cache \
-f scripts/images/gateway/Dockerfile \
-t datamate-gateway:$(OFFLINE_VERSION) \
--load . || echo " Failed"
@echo ""
@echo "构建 datamate-backend..."
@docker buildx build \
--cache-from type=local,src=$(CACHE_DIR)/buildkit/backend-cache \
-f scripts/images/backend/Dockerfile \
-t datamate-backend:$(OFFLINE_VERSION) \
--load . || echo " Failed"
@echo ""
@echo "构建 datamate-frontend..."
@docker buildx build \
--cache-from type=local,src=$(CACHE_DIR)/buildkit/frontend-cache \
-f scripts/images/frontend/Dockerfile \
-t datamate-frontend:$(OFFLINE_VERSION) \
--load . || echo " Failed"
@echo ""
@echo "构建 datamate-runtime..."
@docker buildx build \
--cache-from type=local,src=$(CACHE_DIR)/buildkit/runtime-cache \
--build-arg RESOURCES_DIR=$(CACHE_DIR)/resources \
-f scripts/images/runtime/Dockerfile \
-t datamate-runtime:$(OFFLINE_VERSION) \
--load . || echo " Failed"
@echo ""
@echo "构建 datamate-backend-python..."
@docker buildx build \
--cache-from type=local,src=$(CACHE_DIR)/buildkit/backend-python-cache \
--build-arg RESOURCES_DIR=$(CACHE_DIR)/resources \
-f scripts/images/backend-python/Dockerfile \
-t datamate-backend-python:$(OFFLINE_VERSION) \
--load . || echo " Failed"
@echo ""
@echo "======================================"
@echo "✓ 离线构建完成"
@echo "======================================"
# 单个服务离线构建
.PHONY: %-offline-build
%-offline-build: offline-setup ensure-buildx
@echo "离线构建 $*..."
@if [ ! -d "$(CACHE_DIR)/buildkit/$*-cache" ]; then \
echo "错误: $* 的缓存不存在"; \
exit 1; \
fi
@$(eval IMAGE_NAME := $(if $(filter deer-flow%,$*),$*,datamate-$*))
@docker buildx build \
--cache-from type=local,src=$(CACHE_DIR)/buildkit/$*-cache \
$(if $(filter runtime backend-python deer-flow%,$*),--build-arg RESOURCES_DIR=$(CACHE_DIR)/resources,) \
-f scripts/images/$*/Dockerfile \
-t $(IMAGE_NAME):$(OFFLINE_VERSION) \
--load .
# ========== 帮助 ==========
.PHONY: help-offline
help-offline:
@echo "离线构建命令:"
@echo " make offline-export [CACHE_DIR=./build-cache] - 在有网环境导出构建缓存"
@echo " make offline-setup [CACHE_DIR=./build-cache] - 解压并准备离线缓存"
@echo " make offline-build [CACHE_DIR=./build-cache] - 在无网环境构建所有服务"
@echo " make <service>-offline-build - 离线构建单个服务"
@echo " (如: make backend-offline-build)"
@echo ""
@echo "完整工作流程:"
@echo " # 1. 有网环境导出缓存"
@echo " make offline-export"
@echo ""
@echo " # 2. 传输缓存到无网环境"
@echo " scp build-cache-*.tar.gz user@offline-server:/path/to/project/"
@echo ""
@echo " # 3. 无网环境构建"
@echo " tar -xzf build-cache-*.tar.gz"
@echo " make offline-build"

View File

@@ -0,0 +1,93 @@
# backend-python Dockerfile 离线版本
# 修改点: 使用本地 DataX 源码替代 git clone
FROM maven:3-eclipse-temurin-8 AS datax-builder
# 配置 Maven 阿里云镜像
RUN mkdir -p /root/.m2 && \
echo '<?xml version="1.0" encoding="UTF-8"?>\n\
<settings xmlns="http://maven.apache.org/SETTINGS/1.0.0"\n\
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"\n\
xsi:schemaLocation="http://maven.apache.org/SETTINGS/1.0.0 http://maven.apache.org/xsd/settings-1.0.0.xsd">\n\
<mirrors>\n\
<mirror>\n\
<id>aliyunmaven</id>\n\
<mirrorOf>*</mirrorOf>\n\
<name>阿里云公共仓库</name>\n\
<url>https://maven.aliyun.com/repository/public</url>\n\
</mirror>\n\
</mirrors>\n\
</settings>' > /root/.m2/settings.xml
# 离线模式: 从构建参数获取本地 DataX 路径
ARG DATAX_LOCAL_PATH=./build-cache/resources/DataX
# 复制本地 DataX 源码(离线环境预先下载)
COPY ${DATAX_LOCAL_PATH} /DataX
COPY runtime/datax/ DataX/
RUN cd DataX && \
sed -i "s/com.mysql.jdbc.Driver/com.mysql.cj.jdbc.Driver/g" \
plugin-rdbms-util/src/main/java/com/alibaba/datax/plugin/rdbms/util/DataBaseType.java && \
mvn -U clean package assembly:assembly -Dmaven.test.skip=true
FROM python:3.12-slim
# 配置 apt 阿里云镜像源
RUN if [ -f /etc/apt/sources.list.d/debian.sources ]; then \
sed -i 's/deb.debian.org/mirrors.aliyun.com/g' /etc/apt/sources.list.d/debian.sources; \
elif [ -f /etc/apt/sources.list ]; then \
sed -i 's/deb.debian.org/mirrors.aliyun.com/g' /etc/apt/sources.list; \
fi && \
apt-get update && \
apt-get install -y --no-install-recommends vim openjdk-21-jre nfs-common glusterfs-client rsync && \
rm -rf /var/lib/apt/lists/*
ENV PYTHONDONTWRITEBYTECODE=1 \
PYTHONUNBUFFERED=1 \
POETRY_VERSION=2.2.1 \
POETRY_NO_INTERACTION=1 \
POETRY_VIRTUALENVS_CREATE=false \
POETRY_CACHE_DIR=/tmp/poetry_cache
ENV JAVA_HOME=/usr/lib/jvm/java-21-openjdk
ENV PATH="/root/.local/bin:$JAVA_HOME/bin:$PATH"
WORKDIR /app
# 配置 pip 阿里云镜像并安装 Poetry
RUN --mount=type=cache,target=/root/.cache/pip \
pip config set global.index-url https://mirrors.aliyun.com/pypi/simple/ && \
pip config set global.trusted-host mirrors.aliyun.com && \
pip install --upgrade --root-user-action=ignore pip \
&& pip install --root-user-action=ignore pipx \
&& pipx install "poetry==$POETRY_VERSION"
COPY --from=datax-builder /DataX/target/datax/datax /opt/datax
RUN cp /opt/datax/plugin/reader/mysqlreader/libs/mysql* /opt/datax/plugin/reader/starrocksreader/libs/
# Copy only dependency files first
COPY runtime/datamate-python/pyproject.toml runtime/datamate-python/poetry.lock* /app/
# Install dependencies
RUN --mount=type=cache,target=$POETRY_CACHE_DIR \
poetry install --no-root --only main
# 离线模式: 使用本地 NLTK 数据
ARG NLTK_DATA_LOCAL_PATH=./build-cache/resources/nltk_data
COPY ${NLTK_DATA_LOCAL_PATH} /usr/local/nltk_data
ENV NLTK_DATA=/usr/local/nltk_data
# Copy the rest of the application
COPY runtime/datamate-python /app
COPY runtime/datamate-python/deploy/docker-entrypoint.sh /docker-entrypoint.sh
RUN chmod +x /docker-entrypoint.sh || true
# Expose the application port
EXPOSE 18000
ENTRYPOINT ["/docker-entrypoint.sh"]

View File

@@ -0,0 +1,44 @@
# deer-flow-backend Dockerfile 离线版本
# 修改点: 使用本地 deer-flow 源码替代 git clone
FROM ghcr.nju.edu.cn/astral-sh/uv:python3.12-bookworm
# Install uv.
COPY --from=ghcr.nju.edu.cn/astral-sh/uv:latest /uv /bin/uv
# 配置 apt 阿里云镜像源并安装系统依赖
RUN if [ -f /etc/apt/sources.list.d/debian.sources ]; then \
sed -i 's/deb.debian.org/mirrors.aliyun.com/g' /etc/apt/sources.list.d/debian.sources; \
elif [ -f /etc/apt/sources.list ]; then \
sed -i 's/deb.debian.org/mirrors.aliyun.com/g' /etc/apt/sources.list; \
fi && \
apt-get update && apt-get install -y \
libpq-dev git \
&& rm -rf /var/lib/apt/lists/*
# 配置 uv 使用阿里云 PyPI 镜像
ENV UV_INDEX_URL="https://mirrors.aliyun.com/pypi/simple/"
WORKDIR /app
# 离线模式: 本地 deer-flow 路径
ARG RESOURCES_DIR=./build-cache/resources
ARG DEERFLOW_DIR=${RESOURCES_DIR}/deer-flow
# 复制本地 deer-flow 源码(离线环境预先下载)
COPY ${DEERFLOW_DIR} /app
COPY runtime/deer-flow/.env /app/.env
COPY runtime/deer-flow/conf.yaml /app/conf.yaml
# Pre-cache the application dependencies.
RUN --mount=type=cache,target=/root/.cache/uv \
uv sync --locked --no-install-project
# Install the application dependencies.
RUN --mount=type=cache,target=/root/.cache/uv \
uv sync --locked
EXPOSE 8000
# Run the application.
CMD ["uv", "run", "--no-sync", "python", "server.py", "--host", "0.0.0.0", "--port", "8000"]

View File

@@ -0,0 +1,75 @@
# deer-flow-frontend Dockerfile 离线版本
# 修改点: 使用本地 deer-flow 源码替代 git clone
##### DEPENDENCIES
FROM node:20-alpine AS deps
RUN apk add --no-cache libc6-compat openssl
WORKDIR /app
# 离线模式: 本地 deer-flow 路径
ARG RESOURCES_DIR=./build-cache/resources
ARG DEERFLOW_DIR=${RESOURCES_DIR}/deer-flow
# 复制本地 deer-flow 源码
COPY ${DEERFLOW_DIR}/web /app
# 配置 npm 淘宝镜像并安装依赖
RUN npm config set registry https://registry.npmmirror.com && \
if [ -f yarn.lock ]; then yarn config set registry https://registry.npmmirror.com && yarn --frozen-lockfile; \
elif [ -f package-lock.json ]; then npm ci; \
elif [ -f pnpm-lock.yaml ]; then npm install -g pnpm && pnpm config set registry https://registry.npmmirror.com && pnpm i; \
else echo "Lockfile not found." && exit 1; \
fi
##### BUILDER
FROM node:20-alpine AS builder
RUN apk add --no-cache git
WORKDIR /app
ARG NEXT_PUBLIC_API_URL="/deer-flow-backend"
# 离线模式: 复制本地源码
ARG RESOURCES_DIR=./build-cache/resources
ARG DEERFLOW_DIR=${RESOURCES_DIR}/deer-flow
COPY ${DEERFLOW_DIR} /deer-flow
RUN cd /deer-flow \
&& mv /deer-flow/web/* /app \
&& rm -rf /deer-flow
COPY --from=deps /app/node_modules ./node_modules
ENV NEXT_TELEMETRY_DISABLED=1
# 配置 npm 淘宝镜像
RUN npm config set registry https://registry.npmmirror.com && \
if [ -f yarn.lock ]; then yarn config set registry https://registry.npmmirror.com && SKIP_ENV_VALIDATION=1 yarn build; \
elif [ -f package-lock.json ]; then SKIP_ENV_VALIDATION=1 npm run build; \
elif [ -f pnpm-lock.yaml ]; then npm install -g pnpm && pnpm config set registry https://registry.npmmirror.com && SKIP_ENV_VALIDATION=1 pnpm run build; \
else echo "Lockfile not found." && exit 1; \
fi
##### RUNNER
FROM gcr.io/distroless/nodejs20-debian12 AS runner
WORKDIR /app
ENV NODE_ENV=production
ENV NEXT_TELEMETRY_DISABLED=1
COPY --from=builder /app/next.config.js ./
COPY --from=builder /app/public ./public
COPY --from=builder /app/package.json ./package.json
COPY --from=builder /app/.next/standalone ./
COPY --from=builder /app/.next/static ./.next/static
EXPOSE 3000
ENV PORT=3000
CMD ["server.js"]

View File

@@ -0,0 +1,54 @@
# runtime Dockerfile 离线版本
# 修改点: 使用本地模型文件替代 wget 下载
FROM ghcr.nju.edu.cn/astral-sh/uv:python3.11-bookworm
# 配置 apt 阿里云镜像源
RUN --mount=type=cache,target=/var/cache/apt \
--mount=type=cache,target=/var/lib/apt \
if [ -f /etc/apt/sources.list.d/debian.sources ]; then \
sed -i 's/deb.debian.org/mirrors.aliyun.com/g' /etc/apt/sources.list.d/debian.sources; \
elif [ -f /etc/apt/sources.list ]; then \
sed -i 's/deb.debian.org/mirrors.aliyun.com/g' /etc/apt/sources.list; \
fi \
&& apt update \
&& apt install -y libgl1 libglib2.0-0 vim libmagic1 libreoffice dos2unix swig poppler-utils tesseract-ocr
# 离线模式: 本地模型文件路径
ARG RESOURCES_DIR=./build-cache/resources
ARG MODELS_DIR=${RESOURCES_DIR}/models
# 复制本地 PaddleOCR 模型(离线环境预先下载)
RUN mkdir -p /home/models
COPY ${MODELS_DIR}/ch_ppocr_mobile_v2.0_cls_infer.tar /home/models/
RUN tar -xf /home/models/ch_ppocr_mobile_v2.0_cls_infer.tar -C /home/models
COPY runtime/python-executor /opt/runtime
COPY runtime/ops /opt/runtime/datamate/ops
COPY runtime/ops/user /opt/runtime/user
COPY scripts/images/runtime/start.sh /opt/runtime/start.sh
ENV PYTHONPATH=/opt/runtime/datamate/
ENV UV_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cpu"
ENV UV_INDEX_STRATEGY=unsafe-best-match
# 配置 uv 使用阿里云 PyPI 镜像
ENV UV_INDEX_URL="https://mirrors.aliyun.com/pypi/simple/"
WORKDIR /opt/runtime
# 复制本地 spaCy 模型(离线环境预先下载)
COPY ${MODELS_DIR}/zh_core_web_sm-3.8.0-py3-none-any.whl /tmp/
RUN --mount=type=cache,target=/root/.cache/uv \
uv pip install -e .[all] --system \
&& uv pip install -r /opt/runtime/datamate/ops/pyproject.toml --system \
&& uv pip install /tmp/zh_core_web_sm-3.8.0-py3-none-any.whl --system \
&& echo "/usr/local/lib/ops/site-packages" > /usr/local/lib/python3.11/site-packages/ops.pth
RUN ln -sf /usr/share/zoneinfo/Asia/Shanghai /etc/localtime \
&& chmod +x /opt/runtime/start.sh \
&& dos2unix /opt/runtime/start.sh
EXPOSE 8081
ENTRYPOINT ["/opt/runtime/start.sh"]

View File

@@ -0,0 +1,76 @@
# Makefile 离线构建扩展
# 将此内容追加到主 Makefile 或单独使用
# 使用方法: make -f Makefile.offline <target>
# 离线构建配置
CACHE_DIR ?= ./build-cache
VERSION ?= latest
# ========== 离线构建目标 ==========
.PHONY: offline-export
offline-export:
@echo "导出离线构建缓存..."
@bash scripts/offline/export-cache.sh $(CACHE_DIR)
.PHONY: offline-build
offline-build:
@echo "使用缓存进行离线构建..."
@bash scripts/offline/build-offline.sh $(CACHE_DIR) $(VERSION)
.PHONY: offline-setup
offline-setup:
@echo "解压并设置离线缓存..."
@if [ ! -d "$(CACHE_DIR)" ]; then \
echo "查找缓存压缩包..."; \
cache_file=$$(ls -t build-cache-*.tar.gz 2>/dev/null | head -1); \
if [ -z "$$cache_file" ]; then \
echo "错误: 未找到缓存压缩包 (build-cache-*.tar.gz)"; \
exit 1; \
fi; \
echo "解压 $$cache_file..."; \
tar -xzf "$$cache_file"; \
fi
@echo "✓ 离线缓存准备完成"
# 单个服务的离线构建
.PHONY: %-offline-build
%-offline-build:
@echo "离线构建 $*..."
@$(eval CACHE_FILE := $(CACHE_DIR)/buildkit/$*-cache)
@$(eval IMAGE_NAME := $(if $(filter deer-flow%,$*),$*,datamate-$*))
@if [ ! -d "$(CACHE_FILE)" ]; then \
echo "错误: $* 的缓存不存在于 $(CACHE_FILE)"; \
exit 1; \
fi
@docker buildx build \
--cache-from type=local,src=$(CACHE_FILE) \
--network=none \
-f scripts/images/$*/Dockerfile \
-t $(IMAGE_NAME):$(VERSION) \
--load \
. || echo "警告: $* 离线构建失败"
# 兼容原 Makefile 的构建目标(离线模式)
.PHONY: build-offline
build-offline: offline-setup
@$(MAKE) offline-build
.PHONY: help-offline
help-offline:
@echo "离线构建命令:"
@echo " make offline-export - 在有网环境导出构建缓存"
@echo " make offline-setup - 解压并准备离线缓存"
@echo " make offline-build - 在无网环境使用缓存构建"
@echo " make <service>-offline-build - 离线构建单个服务"
@echo ""
@echo "示例:"
@echo " # 有网环境导出缓存"
@echo " make offline-export"
@echo ""
@echo " # 传输 build-cache-*.tar.gz 到无网环境"
@echo " scp build-cache-20250202.tar.gz user@offline-server:/path/"
@echo ""
@echo " # 无网环境构建"
@echo " make offline-setup"
@echo " make offline-build"

245
scripts/offline/README.md Normal file
View File

@@ -0,0 +1,245 @@
# BuildKit 离线构建方案
本方案使用 Docker BuildKit 的缓存机制,实现在弱网/无网环境下的镜像构建。
## 方案概述
```
┌─────────────────────────────────────────────────────────────────┐
│ 有网环境 (Build Machine) │
│ ┌─────────────┐ ┌─────────────┐ ┌─────────────────────┐ │
│ │ 基础镜像 │ │ BuildKit │ │ 外部资源 │ │
│ │ docker pull │ + │ 缓存导出 │ + │ (模型/源码) │ │
│ └─────────────┘ └─────────────┘ └─────────────────────┘ │
│ │ │ │ │
│ └──────────────────┼──────────────────┘ │
│ ▼ │
│ ┌──────────────────┐ │
│ │ build-cache.tar.gz│ │
│ └────────┬─────────┘ │
└─────────────────────────────┼───────────────────────────────────┘
│ 传输到无网环境
┌─────────────────────────────────────────────────────────────────┐
│ 无网环境 (Offline Machine) │
│ ┌──────────────────┐ │
│ │ build-cache.tar.gz│ │
│ └────────┬─────────┘ │
│ ▼ │
│ ┌─────────────┐ ┌─────────────┐ ┌─────────────────────┐ │
│ │ docker load │ │ BuildKit │ │ 本地资源挂载 │ │
│ │ 基础镜像 │ + │ 缓存导入 │ + │ (模型/源码) │ │
│ └─────────────┘ └─────────────┘ └─────────────────────┘ │
│ │ │ │ │
│ └──────────────────┼──────────────────┘ │
│ ▼ │
│ 构建成功! │
└─────────────────────────────────────────────────────────────────┘
```
## 快速开始
### 方法一:使用 Makefile 扩展(推荐)
#### 1. 合并 Makefile
`Makefile.offline.mk` 追加到主 Makefile:
```bash
# Linux/Mac
cat Makefile.offline.mk >> Makefile
# Windows (PowerShell)
Get-Content Makefile.offline.mk | Add-Content Makefile
```
#### 2. 有网环境导出缓存
```bash
# 导出所有缓存(包括基础镜像、BuildKit 缓存、外部资源)
make offline-export
# 或者指定输出目录
make offline-export CACHE_DIR=/path/to/cache
```
执行完成后,会生成压缩包:`build-cache-YYYYMMDD.tar.gz`
#### 3. 传输到无网环境
```bash
# 使用 scp 或其他方式传输
scp build-cache-20250202.tar.gz user@offline-server:/opt/datamate/
# 或者使用 U 盘等物理介质
```
#### 4. 无网环境构建
```bash
# 解压缓存
tar -xzf build-cache-20250202.tar.gz
# 设置环境并构建
make offline-setup
make offline-build
# 或者指定版本号
make offline-build OFFLINE_VERSION=v1.0.0
```
### 方法二:使用独立脚本
#### 导出缓存
```bash
cd scripts/offline
./export-cache.sh /path/to/output
```
#### 离线构建
```bash
cd scripts/offline
./build-offline.sh /path/to/cache [version]
```
## 详细说明
### 缓存内容
缓存目录结构:
```
build-cache/
├── buildkit/ # BuildKit 缓存
│ ├── backend-cache/
│ ├── backend-python-cache/
│ ├── database-cache/
│ ├── frontend-cache/
│ ├── gateway-cache/
│ ├── runtime-cache/
│ ├── deer-flow-backend-cache/
│ ├── deer-flow-frontend-cache/
│ └── mineru-cache/
├── images/
│ └── base-images.tar # 基础镜像集合
└── resources/ # 外部资源
├── models/
│ ├── ch_ppocr_mobile_v2.0_cls_infer.tar # PaddleOCR 模型
│ └── zh_core_web_sm-3.8.0-py3-none-any.whl # spaCy 模型
├── DataX/ # DataX 源码
└── deer-flow/ # deer-flow 源码
```
### 单个服务构建
```bash
# 仅构建 backend
make backend-offline-build
# 仅构建 runtime
make runtime-offline-build
# 仅构建 deer-flow-backend
make deer-flow-backend-offline-build
```
### 增量更新
如果只有部分服务代码变更,可以只导出该服务的缓存:
```bash
# 重新导出 backend 缓存
docker buildx build \
--cache-to type=local,dest=./build-cache/buildkit/backend-cache,mode=max \
-f scripts/images/backend/Dockerfile \
-t datamate-backend:cache .
# 传输并重新构建
tar -czf build-cache-partial.tar.gz build-cache/buildkit/backend-cache
# ... 传输到无网环境 ...
make backend-offline-build
```
## 故障排查
### 问题 1: 缓存导入失败
```
ERROR: failed to solve: failed to read cache metadata
```
**解决**: 缓存目录可能损坏,重新在有网环境导出。
### 问题 2: 基础镜像不存在
```
ERROR: pull access denied
```
**解决**: 先执行 `make offline-setup` 加载基础镜像。
### 问题 3: 网络连接错误(无网环境)
```
ERROR: failed to do request: dial tcp: lookup ...
```
**解决**: 检查 Dockerfile 中是否还有网络依赖,可能需要修改 Dockerfile 使用本地资源。
### 问题 4: 内存不足
BuildKit 缓存可能占用大量内存,可以设置资源限制:
```bash
# 创建带资源限制的 buildx 构建器
docker buildx create --name offline-builder \
--driver docker-container \
--driver-opt memory=8g \
--use
```
## 限制说明
1. **镜像版本**: 基础镜像版本必须与缓存导出时一致
2. **Dockerfile 变更**: 如果 Dockerfile 发生较大变更,可能需要重新导出缓存
3. **资源文件**: mineru 镜像中的模型下载(`mineru-models-download`)仍需要网络,如果需要在完全无网环境使用,需要预先将模型文件挂载到镜像中
## 高级用法
### 自定义缓存位置
```bash
make offline-export CACHE_DIR=/mnt/nas/build-cache
make offline-build CACHE_DIR=/mnt/nas/build-cache
```
### 导出特定平台缓存
```bash
# 导出 ARM64 平台的缓存
docker buildx build \
--platform linux/arm64 \
--cache-to type=local,dest=./build-cache/buildkit/backend-cache,mode=max \
-f scripts/images/backend/Dockerfile .
```
### 使用远程缓存(有网环境)
```bash
# 导出到 S3/MinIO
docker buildx build \
--cache-to type=s3,region=us-east-1,bucket=mybucket,name=backend-cache \
-f scripts/images/backend/Dockerfile .
# 从 S3 导入
docker buildx build \
--cache-from type=s3,region=us-east-1,bucket=mybucket,name=backend-cache \
-f scripts/images/backend/Dockerfile .
```
## 参考
- [Docker BuildKit Documentation](https://docs.docker.com/build/buildkit/)
- [Cache Storage Backends](https://docs.docker.com/build/cache/backends/)

View File

@@ -0,0 +1,107 @@
#!/bin/bash
# BuildKit 离线构建脚本 - 在无网环境执行
# Usage: ./build-offline.sh [cache-dir] [version]
set -e
CACHE_DIR="${1:-./build-cache}"
VERSION="${2:-latest}"
BUILDKIT_CACHE_DIR="$CACHE_DIR/buildkit"
IMAGES_DIR="$CACHE_DIR/images"
RESOURCES_DIR="$CACHE_DIR/resources"
# 检查缓存目录
if [ ! -d "$CACHE_DIR" ]; then
echo "错误: 缓存目录 $CACHE_DIR 不存在"
echo "请先解压缓存包: tar -xzf build-cache-*.tar.gz"
exit 1
fi
# 确保 buildx 构建器存在
if ! docker buildx inspect offline-builder > /dev/null 2>&1; then
echo "创建 buildx 构建器..."
docker buildx create --name offline-builder --driver docker-container --use
else
docker buildx use offline-builder
fi
echo "======================================"
echo "1. 加载基础镜像"
echo "======================================"
if [ -f "$IMAGES_DIR/base-images.tar" ]; then
echo "$IMAGES_DIR/base-images.tar 加载基础镜像..."
docker load -i "$IMAGES_DIR/base-images.tar"
echo "✓ 基础镜像加载完成"
else
echo "警告: 基础镜像文件不存在,假设镜像已存在"
fi
echo ""
echo "======================================"
echo "2. 离线构建服务"
echo "======================================"
# 定义服务配置(与 export-cache.sh 保持一致)
SERVICES=(
"backend:datamate-backend:scripts/images/backend/Dockerfile"
"backend-python:datamate-backend-python:scripts/images/backend-python/Dockerfile"
"database:datamate-database:scripts/images/database/Dockerfile"
"frontend:datamate-frontend:scripts/images/frontend/Dockerfile"
"gateway:datamate-gateway:scripts/images/gateway/Dockerfile"
"runtime:datamate-runtime:scripts/images/runtime/Dockerfile"
"deer-flow-backend:deer-flow-backend:scripts/images/deer-flow-backend/Dockerfile"
"deer-flow-frontend:deer-flow-frontend:scripts/images/deer-flow-frontend/Dockerfile"
"mineru:datamate-mineru:scripts/images/mineru/Dockerfile"
)
# 检查是否有资源目录需要挂载
MOUNT_ARGS=""
if [ -d "$RESOURCES_DIR" ]; then
echo "检测到资源目录,将用于本地资源挂载"
MOUNT_ARGS="--build-arg RESOURCES_DIR=$RESOURCES_DIR"
fi
for service_config in "${SERVICES[@]}"; do
IFS=':' read -r service_name image_name dockerfile <<< "$service_config"
cache_file="$BUILDKIT_CACHE_DIR/$service_name-cache"
echo ""
echo "--------------------------------------"
echo "构建 [$service_name] -> $image_name:$VERSION"
echo "--------------------------------------"
if [ ! -d "$cache_file" ]; then
echo "警告: $service_name 的缓存不存在,跳过..."
continue
fi
# 使用缓存进行离线构建
# --network=none 确保不访问网络
docker buildx build \
--cache-from "type=local,src=$cache_file" \
--network=none \
-f "$dockerfile" \
-t "$image_name:$VERSION" \
--load \
. || {
echo "错误: $service_name 构建失败"
echo "尝试不使用 --network=none 重新构建..."
docker buildx build \
--cache-from "type=local,src=$cache_file" \
-f "$dockerfile" \
-t "$image_name:$VERSION" \
--load \
.
}
echo "$service_name 构建完成"
done
echo ""
echo "======================================"
echo "✓ 离线构建完成!"
echo "======================================"
echo ""
echo "构建的镜像列表:"
docker images | grep -E "(datamate-|deer-flow-)" || true

View File

@@ -0,0 +1,134 @@
#!/bin/bash
# BuildKit 缓存导出脚本 - 在有网环境执行
# Usage: ./export-cache.sh [output-dir]
set -e
OUTPUT_DIR="${1:-./build-cache}"
BUILDKIT_CACHE_DIR="$OUTPUT_DIR/buildkit"
IMAGES_DIR="$OUTPUT_DIR/images"
RESOURCES_DIR="$OUTPUT_DIR/resources"
# 确保 buildx 构建器存在
if ! docker buildx inspect offline-builder > /dev/null 2>&1; then
echo "创建 buildx 构建器..."
docker buildx create --name offline-builder --driver docker-container --use
else
docker buildx use offline-builder
fi
mkdir -p "$BUILDKIT_CACHE_DIR" "$IMAGES_DIR" "$RESOURCES_DIR"
echo "======================================"
echo "1. 导出基础镜像"
echo "======================================"
BASE_IMAGES=(
"maven:3-eclipse-temurin-21"
"maven:3-eclipse-temurin-8"
"eclipse-temurin:21-jdk"
"mysql:8"
"node:20-alpine"
"nginx:1.29"
"ghcr.nju.edu.cn/astral-sh/uv:python3.11-bookworm"
"ghcr.nju.edu.cn/astral-sh/uv:python3.12-bookworm"
"ghcr.nju.edu.cn/astral-sh/uv:latest"
"python:3.12-slim"
"python:3.11-slim"
"gcr.io/distroless/nodejs20-debian12"
)
for img in "${BASE_IMAGES[@]}"; do
echo "拉取: $img"
docker pull "$img" || echo "警告: $img 拉取失败,可能已存在"
done
echo ""
echo "保存基础镜像到 $IMAGES_DIR/base-images.tar..."
docker save -o "$IMAGES_DIR/base-images.tar" "${BASE_IMAGES[@]}"
echo "✓ 基础镜像保存完成"
echo ""
echo "======================================"
echo "2. 导出 BuildKit 构建缓存"
echo "======================================"
# 定义服务配置
SERVICES=(
"backend:datamate-backend:scripts/images/backend/Dockerfile"
"backend-python:datamate-backend-python:scripts/images/backend-python/Dockerfile"
"database:datamate-database:scripts/images/database/Dockerfile"
"frontend:datamate-frontend:scripts/images/frontend/Dockerfile"
"gateway:datamate-gateway:scripts/images/gateway/Dockerfile"
"runtime:datamate-runtime:scripts/images/runtime/Dockerfile"
"deer-flow-backend:deer-flow-backend:scripts/images/deer-flow-backend/Dockerfile"
"deer-flow-frontend:deer-flow-frontend:scripts/images/deer-flow-frontend/Dockerfile"
"mineru:datamate-mineru:scripts/images/mineru/Dockerfile"
)
for service_config in "${SERVICES[@]}"; do
IFS=':' read -r service_name image_name dockerfile <<< "$service_config"
cache_file="$BUILDKIT_CACHE_DIR/$service_name-cache"
echo ""
echo "导出 [$service_name] 缓存到 $cache_file..."
# 先正常构建以填充缓存
docker buildx build \
--cache-to "type=local,dest=$cache_file,mode=max" \
-f "$dockerfile" \
-t "$image_name:cache" \
. || echo "警告: $service_name 缓存导出失败"
echo "$service_name 缓存导出完成"
done
echo ""
echo "======================================"
echo "3. 预下载外部资源"
echo "======================================"
# PaddleOCR 模型
mkdir -p "$RESOURCES_DIR/models"
if [ ! -f "$RESOURCES_DIR/models/ch_ppocr_mobile_v2.0_cls_infer.tar" ]; then
echo "下载 PaddleOCR 模型..."
wget -O "$RESOURCES_DIR/models/ch_ppocr_mobile_v2.0_cls_infer.tar" \
"https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar" || true
fi
# spaCy 模型
if [ ! -f "$RESOURCES_DIR/models/zh_core_web_sm-3.8.0-py3-none-any.whl" ]; then
echo "下载 spaCy 模型..."
wget -O "$RESOURCES_DIR/models/zh_core_web_sm-3.8.0-py3-none-any.whl" \
"https://ghproxy.net/https://github.com/explosion/spacy-models/releases/download/zh_core_web_sm-3.8.0/zh_core_web_sm-3.8.0-py3-none-any.whl" || true
fi
# DataX 源码
if [ ! -d "$RESOURCES_DIR/DataX" ]; then
echo "克隆 DataX 源码..."
git clone --depth 1 "https://gitee.com/alibaba/DataX.git" "$RESOURCES_DIR/DataX" || true
fi
# deer-flow 源码(用于 deer-flow 构建)
if [ ! -d "$RESOURCES_DIR/deer-flow" ]; then
echo "克隆 deer-flow 源码..."
git clone --depth 1 "https://ghproxy.net/https://github.com/ModelEngine-Group/deer-flow.git" "$RESOURCES_DIR/deer-flow" || true
fi
echo ""
echo "======================================"
echo "4. 打包缓存"
echo "======================================"
cd "$OUTPUT_DIR"
tar -czf "build-cache-$(date +%Y%m%d).tar.gz" buildkit images resources
cd - > /dev/null
echo ""
echo "======================================"
echo "✓ 缓存导出完成!"
echo "======================================"
echo "缓存位置: $OUTPUT_DIR"
echo "传输文件: $OUTPUT_DIR/build-cache-$(date +%Y%m%d).tar.gz"
echo ""
echo "请将此压缩包传输到无网环境后解压使用"