bugfix: 创建清洗任务时修改数据集状态;无法删除已在模板/运行任务的算子

* bugfix: 创建清洗任务时修改数据集状态;无法删除已在模板/运行任务的算子
This commit is contained in:
hhhhsc701
2025-11-27 17:34:53 +08:00
committed by GitHub
parent 91390cace0
commit f1bffdcd61
15 changed files with 46 additions and 15 deletions

View File

@@ -3,4 +3,4 @@
from datamate.core.base_op import OPERATORS
OPERATORS.register_module(module_name='MineruFormatter',
module_path="ops.formatter.external_pdf_formatter.process")
module_path="ops.formatter.mineru_formatter.process")

View File

@@ -25,6 +25,8 @@ class MineruFormatter(Mapper):
def execute(self, sample: Dict[str, Any]) -> Dict[str, Any]:
start = time.time()
filename = sample[self.filename_key]
if not filename.lower().endswith(".pdf"):
return sample
try:
data = {"source_path": sample[self.filepath_key], "export_path": sample[self.export_path_key]}
response = http_request(method="POST", url=self.pdf_extract_url, data=data)