diff --git a/runtime/datamate-python/app/module/dataset/service/pdf_extract.py b/runtime/datamate-python/app/module/dataset/service/pdf_extract.py
index a4e5fb1..a0f2e2a 100644
--- a/runtime/datamate-python/app/module/dataset/service/pdf_extract.py
+++ b/runtime/datamate-python/app/module/dataset/service/pdf_extract.py
@@ -1,5 +1,4 @@
 import csv
-import csv
 import datetime
 import os
 from io import StringIO
@@ -76,6 +75,7 @@ class PdfTextExtractService:
         source_path = self._resolve_source_path(file_record)
         dataset_path = self._resolve_dataset_path(dataset)
         target_path = self._resolve_target_path(dataset_path, source_path, file_record, file_id, file_type)
+        logical_path = self._build_logical_path(dataset_path, target_path)
 
         existing_record = await self._find_existing_text_record(dataset_id, target_path)
         if existing_record:
@@ -85,7 +85,7 @@ class PdfTextExtractService:
             file_size = self._get_file_size(target_path)
             parser_name = PARSER_BY_FILE_TYPE.get(file_type, "")
             record = await self._create_text_file_record(
-                dataset, file_record, target_path, file_size, parser_name, derived_file_type
+                dataset, file_record, target_path, logical_path, file_size, parser_name, derived_file_type
             )
             return self._build_response(dataset_id, file_id, record)
 
@@ -94,7 +94,7 @@ class PdfTextExtractService:
         self._write_text_file(target_path, text_content)
         file_size = self._get_file_size(target_path)
         record = await self._create_text_file_record(
-            dataset, file_record, target_path, file_size, parser_name, derived_file_type
+            dataset, file_record, target_path, logical_path, file_size, parser_name, derived_file_type
         )
         return self._build_response(dataset_id, file_id, record)
 
@@ -170,6 +170,19 @@ class PdfTextExtractService:
         target_dir.mkdir(parents=True, exist_ok=True)
         return target_dir / output_name
 
+    @staticmethod
+    def _build_logical_path(dataset_path: Path, target_path: Path) -> str:
+        normalized_dataset_path = dataset_path.resolve()
+        normalized_target_path = target_path.resolve()
+        try:
+            relative_path = normalized_target_path.relative_to(normalized_dataset_path)
+        except ValueError as exc:
+            raise HTTPException(status_code=400, detail="解析文件路径超出数据集目录") from exc
+        logical_path = str(relative_path).replace("\\", "/").strip()
+        if not logical_path:
+            raise HTTPException(status_code=500, detail="解析文件逻辑路径为空")
+        return logical_path
+
     async def _find_existing_text_record(self, dataset_id: str, target_path: Path) -> DatasetFiles | None:
         result = await self.db.execute(
             select(DatasetFiles).where(
@@ -259,10 +272,12 @@ class PdfTextExtractService:
         dataset: Dataset,
         source_file: DatasetFiles,
         target_path: Path,
+        logical_path: str,
         file_size: int,
         parser_name: str,
         derived_file_type: str,
     ) -> DatasetFiles:
+        assert logical_path
         assert parser_name
         assert derived_file_type
         metadata = {
@@ -275,6 +290,7 @@ class PdfTextExtractService:
             dataset_id=dataset.id,  # type: ignore[arg-type]
             file_name=target_path.name,
             file_path=str(target_path),
+            logical_path=logical_path,
             file_type=derived_file_type,
             file_size=file_size,
             dataset_filemetadata=metadata,