支持mineru npu处理 (#174)

* feature: unstructured支持简单pdf处理

* feature: update values.yaml to enhance ray-cluster configuration with security context, environment variables, and resource limits

* feature: update deploy.yaml and process.py for mineru server configuration and PDF processing enhancements

* feature: update deploy.yaml and process.py for mineru server configuration and PDF processing enhancements

* feature: improve PDF processing logic and update dependencies in process.py and pyproject.toml

* feature: improve PDF processing logic and update dependencies in process.py and pyproject.toml

* feature: update Dockerfile for improved package source mirrors and add mineru-npu to build targets
This commit is contained in:
hhhhsc701
2025-12-17 16:31:06 +08:00
committed by GitHub
parent 3b4f8488e8
commit 924d977d6f
8 changed files with 110 additions and 35 deletions

View File

@@ -1,6 +1,8 @@
FROM maven:3-eclipse-temurin-8 AS datax-builder
RUN apt-get update && \
RUN sed -i "s@http://.*archive.ubuntu.com@http://mirrors.huaweicloud.com@g" /etc/apt/sources.list && \
sed -i "s@http://.*security.ubuntu.com@http://mirrors.huaweicloud.com@g" /etc/apt/sources.list && \
apt-get update && \
apt-get install -y git && \
git clone https://github.com/alibaba/DataX.git
@@ -21,7 +23,9 @@ RUN cd /opt/backend && \
FROM eclipse-temurin:21-jdk
RUN apt-get update && \
RUN sed -i "s@http://.*archive.ubuntu.com@http://mirrors.huaweicloud.com@g" /etc/apt/sources.list && \
sed -i "s@http://.*security.ubuntu.com@http://mirrors.huaweicloud.com@g" /etc/apt/sources.list && \
apt-get update && \
apt-get install -y vim wget curl nfs-common rsync python3 python3-pip python-is-python3 dos2unix && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*