You've already forked DataMate
- 使用缓存挂载 DataX 源码,避免重复克隆提高构建效率 - 添加 NLTK 数据缓存挂载并增加失败检查机制 - 实现 PaddleOCR 模型下载缓存,支持离线重用 - 集成 spaCy 模型缓存机制,提升安装稳定性 - 优化构建流程适配弱网环境下的依赖下载
59 lines
2.6 KiB
Docker
59 lines
2.6 KiB
Docker
FROM ghcr.nju.edu.cn/astral-sh/uv:python3.11-bookworm
|
|
|
|
# 配置 apt 阿里云镜像源
|
|
RUN --mount=type=cache,target=/var/cache/apt \
|
|
--mount=type=cache,target=/var/lib/apt \
|
|
if [ -f /etc/apt/sources.list.d/debian.sources ]; then \
|
|
sed -i 's/deb.debian.org/mirrors.aliyun.com/g' /etc/apt/sources.list.d/debian.sources; \
|
|
elif [ -f /etc/apt/sources.list ]; then \
|
|
sed -i 's/deb.debian.org/mirrors.aliyun.com/g' /etc/apt/sources.list; \
|
|
fi \
|
|
&& apt update \
|
|
&& apt install -y libgl1 libglib2.0-0 vim libmagic1 libreoffice dos2unix swig poppler-utils tesseract-ocr
|
|
|
|
# 使用缓存挂载下载 PaddleOCR 模型,支持弱网环境
|
|
RUN --mount=type=cache,target=/tmp/models \
|
|
mkdir -p /home/models \
|
|
&& if [ ! -f /tmp/models/ch_ppocr_mobile_v2.0_cls_infer.tar ]; then \
|
|
echo "Downloading PaddleOCR model..."; \
|
|
wget -O /tmp/models/ch_ppocr_mobile_v2.0_cls_infer.tar https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar || exit 1; \
|
|
else \
|
|
echo "Using cached PaddleOCR model"; \
|
|
fi \
|
|
&& tar -xf /tmp/models/ch_ppocr_mobile_v2.0_cls_infer.tar -C /home/models
|
|
|
|
COPY runtime/python-executor /opt/runtime
|
|
COPY runtime/ops /opt/runtime/datamate/ops
|
|
COPY runtime/ops/user /opt/runtime/user
|
|
COPY scripts/images/runtime/start.sh /opt/runtime/start.sh
|
|
|
|
ENV PYTHONPATH=/opt/runtime/datamate/
|
|
ENV UV_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cpu"
|
|
ENV UV_INDEX_STRATEGY=unsafe-best-match
|
|
# 配置 uv 使用阿里云 PyPI 镜像
|
|
ENV UV_INDEX_URL="https://mirrors.aliyun.com/pypi/simple/"
|
|
|
|
WORKDIR /opt/runtime
|
|
|
|
# 使用缓存挂载 spaCy 模型,支持弱网环境
|
|
RUN --mount=type=cache,target=/root/.cache/uv \
|
|
--mount=type=cache,target=/tmp/spacy-models \
|
|
uv pip install -e .[all] --system \
|
|
&& uv pip install -r /opt/runtime/datamate/ops/pyproject.toml --system \
|
|
&& if [ ! -f /tmp/spacy-models/zh_core_web_sm-3.8.0-py3-none-any.whl ]; then \
|
|
echo "Downloading spaCy model..."; \
|
|
wget -O /tmp/spacy-models/zh_core_web_sm-3.8.0-py3-none-any.whl https://ghproxy.net/https://github.com/explosion/spacy-models/releases/download/zh_core_web_sm-3.8.0/zh_core_web_sm-3.8.0-py3-none-any.whl || exit 1; \
|
|
else \
|
|
echo "Using cached spaCy model"; \
|
|
fi \
|
|
&& uv pip install /tmp/spacy-models/zh_core_web_sm-3.8.0-py3-none-any.whl --system \
|
|
&& echo "/usr/local/lib/ops/site-packages" > /usr/local/lib/python3.11/site-packages/ops.pth
|
|
|
|
RUN ln -sf /usr/share/zoneinfo/Asia/Shanghai /etc/localtime \
|
|
&& chmod +x /opt/runtime/start.sh \
|
|
&& dos2unix /opt/runtime/start.sh
|
|
|
|
EXPOSE 8081
|
|
|
|
ENTRYPOINT ["/opt/runtime/start.sh"]
|