feat: 支持运行data-juicer算子 (#215)

* feature: 增加data-juicer算子

* feat: 支持运行data-juicer算子

* feat: 支持data-juicer任务下发

* feat: 支持data-juicer结果数据集归档

* feat: 支持data-juicer结果数据集归档
This commit is contained in:
hhhhsc701
2025-12-31 09:20:41 +08:00
committed by GitHub
parent 63f4e3e447
commit 6a1eb85e8e
26 changed files with 709 additions and 120 deletions

View File

@@ -1,6 +1,11 @@
# -*- coding: utf-8 -*-
import os
from datamate.scheduler import cmd_scheduler
async def submit(task_id, config_path):
await cmd_scheduler.submit(task_id, f"dj-process --config {config_path}")
current_dir = os.path.dirname(__file__)
await cmd_scheduler.submit(task_id, f"python {os.path.join(current_dir, 'data_juicer_executor.py')} "
f"--config_path={config_path}")