diff --git a/deployment/docker/datamate/docker-compose.yml b/deployment/docker/datamate/docker-compose.yml index 1d58b88..084182c 100644 --- a/deployment/docker/datamate/docker-compose.yml +++ b/deployment/docker/datamate/docker-compose.yml @@ -90,6 +90,7 @@ services: - dataset_volume:/dataset - flow_volume:/flow - operator-runtime-volume:/opt/runtime/datamate/ops/user + - operator-packages-volume:/usr/local/lib/ops/site-packages networks: [ datamate ] # 4) mineru @@ -150,6 +151,8 @@ volumes: name: datamate-operator-upload-volume operator-runtime-volume: name: datamate-operator-runtime-volume + operator-packages-volume: + name: datamate-operator-packages-volume mineru_log_volume: name: datamate-mineru_log_volume diff --git a/deployment/helm/datamate/values.yaml b/deployment/helm/datamate/values.yaml index 3fa3840..7ed5da6 100644 --- a/deployment/helm/datamate/values.yaml +++ b/deployment/helm/datamate/values.yaml @@ -170,6 +170,9 @@ runtime: - mountPath: /opt/runtime/datamate/ops/user name: operator-volume subPath: extract + - mountPath: /usr/local/lib/ops/site-packages + name: operator-volume + subPath: site-packages ray-cluster: enabled: true @@ -214,6 +217,9 @@ ray-cluster: - mountPath: /opt/runtime/datamate/ops/user name: operator-volume subPath: extract + - mountPath: /usr/local/lib/ops/site-packages + name: operator-volume + subPath: site-packages sidecarContainers: - name: runtime image: datamate-runtime @@ -262,3 +268,6 @@ ray-cluster: - mountPath: /opt/runtime/datamate/ops/user name: operator-volume subPath: extract + - mountPath: /usr/local/lib/ops/site-packages + name: operator-volume + subPath: site-packages diff --git a/runtime/ops/formatter/mineru_formatter/process.py b/runtime/ops/formatter/mineru_formatter/process.py index fcb7508..730accf 100644 --- a/runtime/ops/formatter/mineru_formatter/process.py +++ b/runtime/ops/formatter/mineru_formatter/process.py @@ -26,6 +26,7 @@ class MineruFormatter(Mapper): self.server_url = "http://datamate-mineru:8000" self.backend = "vlm-http-client" self.output_dir = "/dataset/outputs" + self.max_retries = 3 def execute(self, sample: Dict[str, Any]) -> Dict[str, Any]: start = time.time() @@ -51,16 +52,29 @@ class MineruFormatter(Mapper): content = "" for page in range(0, total_page, 10): logger.info(f"fileName: {filename}, total_page: {total_page}, page: {page}.") - await aio_do_parse( - output_dir=self.output_dir, - pdf_file_names=[filename_without_ext], - pdf_bytes_list=[pdf_bytes], - p_lang_list=["ch"], - backend=self.backend, - server_url=self.server_url, - start_page_id=page, - end_page_id=min(page + 9, total_page - 1), - ) + for attempt in range(self.max_retries): + try: + await aio_do_parse( + output_dir=self.output_dir, + pdf_file_names=[filename_without_ext], + pdf_bytes_list=[pdf_bytes], + p_lang_list=["ch"], + backend=self.backend, + server_url=self.server_url, + start_page_id=page, + end_page_id=min(page + 9, total_page - 1), + ) + break # 成功则跳出重试循环 + except Exception as e: + logger.warning( + f"Extract {filename} [{page}-{page + 9}] failed (attempt {attempt + 1}/{self.max_retries}). " + f"Error: {e}. Retrying in 5s..." + ) + if attempt < self.max_retries - 1: + await asyncio.sleep(5) + else: + logger.error(f"aio_do_parse failed after {self.max_retries} attempts.") + raise # 耗尽次数后抛出异常,交给上层 execute 处理 if os.path.exists(parse_dir): content += get_infer_result(".md", filename_without_ext, parse_dir) shutil.rmtree(parse_dir) diff --git a/scripts/images/runtime/Dockerfile b/scripts/images/runtime/Dockerfile index d1f886a..b70cf58 100644 --- a/scripts/images/runtime/Dockerfile +++ b/scripts/images/runtime/Dockerfile @@ -23,7 +23,8 @@ RUN --mount=type=cache,target=/root/.cache/uv \ UV_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cpu" uv pip install -e . --system --index-strategy unsafe-best-match \ && UV_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cpu" uv pip install -r /opt/runtime/datamate/ops/pyproject.toml --system \ && uv pip uninstall torch torchvision --system \ - && python -m spacy download zh_core_web_sm + && python -m spacy download zh_core_web_sm \ + && echo "/usr/local/lib/ops/site-packages" > /usr/local/lib/python3.11/site-packages/ops.pth RUN ln -sf /usr/share/zoneinfo/Asia/Shanghai /etc/localtime \ && chmod +x /opt/runtime/start.sh \