feat: 支持运行data-juicer算子 (#215)

* feature: 增加data-juicer算子

* feat: 支持运行data-juicer算子

* feat: 支持data-juicer任务下发

* feat: 支持data-juicer结果数据集归档

* feat: 支持data-juicer结果数据集归档
This commit is contained in:
hhhhsc701
2025-12-31 09:20:41 +08:00
committed by GitHub
parent 63f4e3e447
commit 6a1eb85e8e
26 changed files with 709 additions and 120 deletions

View File

@@ -16,12 +16,14 @@ COPY runtime/ops/user /opt/runtime/user
COPY scripts/images/runtime/start.sh /opt/runtime/start.sh
ENV PYTHONPATH=/opt/runtime/datamate/
ENV UV_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cpu"
ENV UV_INDEX_STRATEGY=unsafe-best-match
WORKDIR /opt/runtime
RUN --mount=type=cache,target=/root/.cache/uv \
UV_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cpu" uv pip install -e . --system --index-strategy unsafe-best-match \
&& UV_EXTRA_INDEX_URL="https://download.pytorch.org/whl/cpu" uv pip install -r /opt/runtime/datamate/ops/pyproject.toml --system \
uv pip install -e .[all] --system \
&& uv pip install -r /opt/runtime/datamate/ops/pyproject.toml --system \
&& uv pip uninstall torch torchvision --system \
&& python -m spacy download zh_core_web_sm \
&& echo "/usr/local/lib/ops/site-packages" > /usr/local/lib/python3.11/site-packages/ops.pth