feat: 支持运行data-juicer算子 (#215)

* feature: 增加data-juicer算子

* feat: 支持运行data-juicer算子

* feat: 支持data-juicer任务下发

* feat: 支持data-juicer结果数据集归档

* feat: 支持data-juicer结果数据集归档
This commit is contained in:
hhhhsc701
2025-12-31 09:20:41 +08:00
committed by GitHub
parent 63f4e3e447
commit 6a1eb85e8e
26 changed files with 709 additions and 120 deletions

View File

@@ -3,7 +3,7 @@ name = "ops"
version = "0.0.1"
description = "Add your description here"
readme = "README.md"
requires-python = ">=3.11"
requires-python = ">=3.10"
dependencies = [
"beautifulsoup4>=4.14.3",
"datasketch>=1.8.0",
@@ -12,14 +12,14 @@ dependencies = [
"jieba>=0.42.1",
"loguru>=0.7.3",
"mineru>=2.6.5",
"numpy==1.24.3",
"numpy>=2.2.6",
"python-multipart>=0.0.20",
"opencv-contrib-python-headless==4.7.0.72",
"opencv-python-headless==4.7.0.72",
"opencv-contrib-python-headless>=4.12.0.88",
"opencv-python-headless>=4.12.0.88",
"openslide-python>=1.4.3",
"paddleocr==2.8.1",
"paddlepaddle==2.6.2",
"pandas==1.5.3",
"paddleocr==3.3.0",
"paddlepaddle==3.2.2",
"pandas>=2.2.3",
"presidio-analyzer==2.2.25",
"presidio-anonymizer==2.2.25",
"pycryptodome>=3.23.0",
@@ -27,7 +27,7 @@ dependencies = [
"python-docx>=1.2.0",
"pytz>=2025.2",
"six>=1.17.0",
"spacy==3.7.0",
"spacy>=3.7.0",
"sqlalchemy>=2.0.44",
"xmltodict>=1.0.2",
"zhconv>=1.4.3",