From e862925a0640f9bfc625134d26c09b105d26f264 Mon Sep 17 00:00:00 2001 From: Jerry Yan <792602257@qq.com> Date: Fri, 6 Feb 2026 18:46:44 +0800 Subject: [PATCH] =?UTF-8?q?feat(export):=20=E6=B7=BB=E5=8A=A0=E9=80=BB?= =?UTF-8?q?=E8=BE=91=E8=B7=AF=E5=BE=84=E6=9E=84=E5=BB=BA=E5=8A=9F=E8=83=BD?= =?UTF-8?q?=E6=94=AF=E6=8C=81=E6=96=87=E4=BB=B6=E7=AE=A1=E7=90=86?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 在导出服务中实现_build_logical_path方法用于构建相对路径 - 更新数据集文件记录以包含logical_path字段 - 在比率任务服务中实现build_logical_path静态方法 - 将逻辑路径信息添加到数据集文件记录中 - 规范化路径处理并替换反斜杠为正斜杠 - 添加无效路径验证防止目录遍历安全问题 --- .../app/module/generation/service/export_service.py | 11 +++++++++++ .../app/module/ratio/service/ratio_task.py | 11 +++++++++++ 2 files changed, 22 insertions(+) diff --git a/runtime/datamate-python/app/module/generation/service/export_service.py b/runtime/datamate-python/app/module/generation/service/export_service.py index ee3eb24..40cfb23 100644 --- a/runtime/datamate-python/app/module/generation/service/export_service.py +++ b/runtime/datamate-python/app/module/generation/service/export_service.py @@ -74,6 +74,7 @@ class SynthesisDatasetExporter: file_path = os.path.join(base_path, archived_file_name) os.makedirs(os.path.dirname(file_path), exist_ok=True) self._write_jsonl(file_path, records) + logical_path = self._build_logical_path(base_path, file_path) # 计算文件大小 try: @@ -85,6 +86,7 @@ class SynthesisDatasetExporter: dataset_id=dataset.id, file_name=archived_file_name, file_path=file_path, + logical_path=logical_path, file_type="jsonl", file_size=file_size, last_access_time=datetime.datetime.now(), @@ -158,3 +160,12 @@ class SynthesisDatasetExporter: raise SynthesisExportError("Dataset path is empty") os.makedirs(dataset.path, exist_ok=True) return dataset.path + + @staticmethod + def _build_logical_path(dataset_path: str, file_path: str) -> str: + normalized_dataset_path = os.path.abspath(dataset_path) + normalized_file_path = os.path.abspath(file_path) + relative_path = os.path.relpath(normalized_file_path, normalized_dataset_path).replace("\\", "/").strip() + if relative_path in ("", ".") or relative_path.startswith("../"): + raise SynthesisExportError(f"Invalid logical path generated for file: {file_path}") + return relative_path diff --git a/runtime/datamate-python/app/module/ratio/service/ratio_task.py b/runtime/datamate-python/app/module/ratio/service/ratio_task.py index 10247da..8d0304c 100644 --- a/runtime/datamate-python/app/module/ratio/service/ratio_task.py +++ b/runtime/datamate-python/app/module/ratio/service/ratio_task.py @@ -187,11 +187,13 @@ class RatioTaskService: dst_dir = os.path.dirname(new_path) await asyncio.to_thread(os.makedirs, dst_dir, exist_ok=True) await asyncio.to_thread(shutil.copy2, src_path, new_path) + logical_path = RatioTaskService.build_logical_path(dst_prefix, new_path) file_data = { "dataset_id": target_ds.id, # type: ignore "file_name": file_name, "file_path": new_path, + "logical_path": logical_path, "file_type": f.file_type, "file_size": f.file_size, "check_sum": f.check_sum, @@ -204,6 +206,15 @@ class RatioTaskService: session.add(DatasetFiles(**file_record)) existing_paths.add(new_path) + @staticmethod + def build_logical_path(dataset_prefix: str, file_path: str) -> str: + normalized_dataset_prefix = os.path.abspath(dataset_prefix) + normalized_file_path = os.path.abspath(file_path) + relative_path = os.path.relpath(normalized_file_path, normalized_dataset_prefix).replace("\\", "/").strip() + if relative_path in ("", ".") or relative_path.startswith("../"): + raise ValueError(f"Invalid logical path generated for file: {file_path}") + return relative_path + @staticmethod def get_new_file_name(dst_prefix: str, existing_paths: set[Any], f) -> str: file_name = f.file_name