From 2f3a8b38d010adafc0d7d4439d981dac9c5e5398 Mon Sep 17 00:00:00 2001
From: Jerry Yan <792602257@qq.com>
Date: Sat, 31 Jan 2026 19:10:22 +0800
Subject: [PATCH] =?UTF-8?q?fix(dataset):=20=E8=A7=A3=E5=86=B3=E6=95=B0?=
 =?UTF-8?q?=E6=8D=AE=E9=9B=86=E6=96=87=E4=BB=B6=E6=9F=A5=E8=AF=A2=E6=97=B6?=
 =?UTF-8?q?=E7=A9=BA=E7=9B=AE=E5=BD=95=E5=AF=BC=E8=87=B4=E5=BC=82=E5=B8=B8?=
 =?UTF-8?q?=E7=9A=84=E9=97=AE=E9=A2=98?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- 添加目录存在性检查，避免文件系统访问异常
- 目录不存在时返回空分页结果而不是抛出异常
- 优化数据集刚创建时的用户体验
---
 .../DatasetFileApplicationService.java        | 552 +++++++++---------
 1 file changed, 278 insertions(+), 274 deletions(-)

diff --git a/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/application/DatasetFileApplicationService.java b/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/application/DatasetFileApplicationService.java
index 040ec6c..7c65d24 100644
--- a/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/application/DatasetFileApplicationService.java
+++ b/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/application/DatasetFileApplicationService.java
@@ -22,16 +22,16 @@ import com.datamate.datamanagement.domain.model.dataset.DatasetFileUploadCheckIn
 import com.datamate.datamanagement.infrastructure.exception.DataManagementErrorCode;
 import com.datamate.datamanagement.infrastructure.persistence.repository.DatasetFileRepository;
 import com.datamate.datamanagement.infrastructure.persistence.repository.DatasetRepository;
-import com.datamate.datamanagement.interfaces.converter.DatasetConverter;
-import com.datamate.datamanagement.interfaces.dto.AddFilesRequest;
-import com.datamate.datamanagement.interfaces.dto.CopyFilesRequest;
-import com.datamate.datamanagement.interfaces.dto.CreateDirectoryRequest;
-import com.datamate.datamanagement.interfaces.dto.UploadFileRequest;
-import com.datamate.datamanagement.interfaces.dto.UploadFilesPreRequest;
-import com.fasterxml.jackson.core.type.TypeReference;
-import com.fasterxml.jackson.core.JsonProcessingException;
-import com.fasterxml.jackson.databind.ObjectMapper;
-import jakarta.servlet.http.HttpServletResponse;
+import com.datamate.datamanagement.interfaces.converter.DatasetConverter;
+import com.datamate.datamanagement.interfaces.dto.AddFilesRequest;
+import com.datamate.datamanagement.interfaces.dto.CopyFilesRequest;
+import com.datamate.datamanagement.interfaces.dto.CreateDirectoryRequest;
+import com.datamate.datamanagement.interfaces.dto.UploadFileRequest;
+import com.datamate.datamanagement.interfaces.dto.UploadFilesPreRequest;
+import com.fasterxml.jackson.core.type.TypeReference;
+import com.fasterxml.jackson.core.JsonProcessingException;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import jakarta.servlet.http.HttpServletResponse;
 import lombok.extern.slf4j.Slf4j;
 import org.apache.commons.compress.archivers.zip.ZipArchiveEntry;
 import org.apache.commons.compress.archivers.zip.ZipArchiveOutputStream;
@@ -40,24 +40,24 @@ import org.springframework.beans.factory.annotation.Autowired;
 import org.springframework.beans.factory.annotation.Value;
 import org.springframework.core.io.Resource;
 import org.springframework.core.io.UrlResource;
-import org.springframework.http.HttpHeaders;
-import org.springframework.stereotype.Service;
-import org.springframework.transaction.annotation.Transactional;
-import org.springframework.transaction.support.TransactionSynchronization;
-import org.springframework.transaction.support.TransactionSynchronizationManager;
+import org.springframework.http.HttpHeaders;
+import org.springframework.stereotype.Service;
+import org.springframework.transaction.annotation.Transactional;
+import org.springframework.transaction.support.TransactionSynchronization;
+import org.springframework.transaction.support.TransactionSynchronizationManager;
 
 import java.io.File;
 import java.io.IOException;
-import java.io.InputStream;
-import java.net.MalformedURLException;
-import java.nio.file.Files;
-import java.nio.file.Path;
-import java.nio.file.Paths;
-import java.nio.file.attribute.BasicFileAttributes;
-import java.time.LocalDateTime;
-import java.time.ZoneId;
-import java.time.format.DateTimeFormatter;
-import java.util.*;
+import java.io.InputStream;
+import java.net.MalformedURLException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.nio.file.attribute.BasicFileAttributes;
+import java.time.LocalDateTime;
+import java.time.ZoneId;
+import java.time.format.DateTimeFormatter;
+import java.util.*;
 import java.util.concurrent.CompletableFuture;
 import java.util.function.Function;
 import java.util.stream.Collectors;
@@ -70,24 +70,24 @@ import java.util.stream.Stream;
 @Service
 @Transactional
 public class DatasetFileApplicationService {
-    private static final String PDF_FILE_TYPE = "pdf";
-    private static final String DOC_FILE_TYPE = "doc";
-    private static final String DOCX_FILE_TYPE = "docx";
-    private static final String XLS_FILE_TYPE = "xls";
-    private static final String XLSX_FILE_TYPE = "xlsx";
-    private static final Set<String> DOCUMENT_TEXT_FILE_TYPES = Set.of(
-        PDF_FILE_TYPE,
-        DOC_FILE_TYPE,
-        DOCX_FILE_TYPE,
-        XLS_FILE_TYPE,
-        XLSX_FILE_TYPE
-    );
-    private static final String DERIVED_METADATA_KEY = "derived_from_file_id";
-
-    private final DatasetFileRepository datasetFileRepository;
-    private final DatasetRepository datasetRepository;
-    private final FileService fileService;
-    private final PdfTextExtractAsyncService pdfTextExtractAsyncService;
+    private static final String PDF_FILE_TYPE = "pdf";
+    private static final String DOC_FILE_TYPE = "doc";
+    private static final String DOCX_FILE_TYPE = "docx";
+    private static final String XLS_FILE_TYPE = "xls";
+    private static final String XLSX_FILE_TYPE = "xlsx";
+    private static final Set<String> DOCUMENT_TEXT_FILE_TYPES = Set.of(
+        PDF_FILE_TYPE,
+        DOC_FILE_TYPE,
+        DOCX_FILE_TYPE,
+        XLS_FILE_TYPE,
+        XLSX_FILE_TYPE
+    );
+    private static final String DERIVED_METADATA_KEY = "derived_from_file_id";
+
+    private final DatasetFileRepository datasetFileRepository;
+    private final DatasetRepository datasetRepository;
+    private final FileService fileService;
+    private final PdfTextExtractAsyncService pdfTextExtractAsyncService;
 
     @Value("${datamate.data-management.base-path:/dataset}")
     private String datasetBasePath;
@@ -123,61 +123,65 @@ public class DatasetFileApplicationService {
      * @param status                状态过滤
      * @param name                  文件名模糊查询
      * @param hasAnnotation         是否有标注
-     * @param excludeSourceDocuments 是否排除源文档（PDF/DOC/DOCX/XLS/XLSX）
+     * @param excludeSourceDocuments 是否排除源文档（PDF/DOC/DOCX/XLS/XLSX）
      * @param pagingQuery           分页参数
      * @return 分页文件列表
      */
     @Transactional(readOnly = true)
-    public PagedResponse<DatasetFile> getDatasetFiles(String datasetId, String fileType, String status, String name,
-                                                      Boolean hasAnnotation, boolean excludeSourceDocuments, PagingQuery pagingQuery) {
-        IPage<DatasetFile> page = new Page<>(pagingQuery.getPage(), pagingQuery.getSize());
-        IPage<DatasetFile> files = datasetFileRepository.findByCriteria(datasetId, fileType, status, name, hasAnnotation, page);
-        
-        if (excludeSourceDocuments) {
-            // 过滤掉源文档文件（PDF/DOC/DOCX/XLS/XLSX），用于标注场景只展示派生文件
-            List<DatasetFile> filteredRecords = files.getRecords().stream()
-                .filter(file -> !isSourceDocument(file))
-                .collect(Collectors.toList());
-
-            // 重新构建分页结果
-            Page<DatasetFile> filteredPage = new Page<>(files.getCurrent(), files.getSize(), files.getTotal());
-            filteredPage.setRecords(filteredRecords);
-            return PagedResponse.of(filteredPage);
-        }
-        
-        return PagedResponse.of(files);
-    }
+    public PagedResponse<DatasetFile> getDatasetFiles(String datasetId, String fileType, String status, String name,
+                                                      Boolean hasAnnotation, boolean excludeSourceDocuments, PagingQuery pagingQuery) {
+        IPage<DatasetFile> page = new Page<>(pagingQuery.getPage(), pagingQuery.getSize());
+        IPage<DatasetFile> files = datasetFileRepository.findByCriteria(datasetId, fileType, status, name, hasAnnotation, page);
+        
+        if (excludeSourceDocuments) {
+            // 过滤掉源文档文件（PDF/DOC/DOCX/XLS/XLSX），用于标注场景只展示派生文件
+            List<DatasetFile> filteredRecords = files.getRecords().stream()
+                .filter(file -> !isSourceDocument(file))
+                .collect(Collectors.toList());
+
+            // 重新构建分页结果
+            Page<DatasetFile> filteredPage = new Page<>(files.getCurrent(), files.getSize(), files.getTotal());
+            filteredPage.setRecords(filteredRecords);
+            return PagedResponse.of(filteredPage);
+        }
+        
+        return PagedResponse.of(files);
+    }
 
     /**
      * 获取数据集文件列表
      */
     @Transactional(readOnly = true)
-    public PagedResponse<DatasetFile> getDatasetFilesWithDirectory(String datasetId, String prefix, boolean excludeDerivedFiles, PagingQuery pagingQuery) {
-        Dataset dataset = datasetRepository.getById(datasetId);
-        int page = Math.max(pagingQuery.getPage(), 1);
-        int size = pagingQuery.getSize() == null || pagingQuery.getSize() < 0 ? 20 : pagingQuery.getSize();
-        if (dataset == null) {
-            return PagedResponse.of(new Page<>(page, size));
-        }
-        String datasetPath = dataset.getPath();
-        Path queryPath = Path.of(dataset.getPath() + File.separator + prefix);
-        Map<String, DatasetFile> datasetFilesMap = datasetFileRepository.findAllByDatasetId(datasetId)
-            .stream().collect(Collectors.toMap(DatasetFile::getFilePath, Function.identity()));
-        Set<String> derivedFilePaths = excludeDerivedFiles
-            ? datasetFilesMap.values().stream()
-                .filter(this::isDerivedFile)
-                .map(DatasetFile::getFilePath)
-                .filter(Objects::nonNull)
-                .collect(Collectors.toSet())
-            : Collections.emptySet();
-        try (Stream<Path> pathStream = Files.list(queryPath)) {
-            List<Path> allFiles = pathStream
-                .filter(path -> path.toString().startsWith(datasetPath))
-                .filter(path -> !excludeDerivedFiles || Files.isDirectory(path) || !derivedFilePaths.contains(path.toString()))
-                .sorted(Comparator
-                    .comparing((Path path) -> !Files.isDirectory(path))
-                    .thenComparing(path -> path.getFileName().toString()))
-                .collect(Collectors.toList());
+    public PagedResponse<DatasetFile> getDatasetFilesWithDirectory(String datasetId, String prefix, boolean excludeDerivedFiles, PagingQuery pagingQuery) {
+        Dataset dataset = datasetRepository.getById(datasetId);
+        int page = Math.max(pagingQuery.getPage(), 1);
+        int size = pagingQuery.getSize() == null || pagingQuery.getSize() < 0 ? 20 : pagingQuery.getSize();
+        if (dataset == null) {
+            return PagedResponse.of(new Page<>(page, size));
+        }
+        String datasetPath = dataset.getPath();
+        Path queryPath = Path.of(dataset.getPath() + File.separator + prefix);
+        Map<String, DatasetFile> datasetFilesMap = datasetFileRepository.findAllByDatasetId(datasetId)
+            .stream().collect(Collectors.toMap(DatasetFile::getFilePath, Function.identity()));
+        Set<String> derivedFilePaths = excludeDerivedFiles
+            ? datasetFilesMap.values().stream()
+                .filter(this::isDerivedFile)
+                .map(DatasetFile::getFilePath)
+                .filter(Objects::nonNull)
+                .collect(Collectors.toSet())
+            : Collections.emptySet();
+        // 如果目录不存在，直接返回空结果（数据集刚创建时目录可能还未生成）
+        if (!Files.exists(queryPath)) {
+            return new PagedResponse<>(page, size, 0, 0, Collections.emptyList());
+        }
+        try (Stream<Path> pathStream = Files.list(queryPath)) {
+            List<Path> allFiles = pathStream
+                .filter(path -> path.toString().startsWith(datasetPath))
+                .filter(path -> !excludeDerivedFiles || Files.isDirectory(path) || !derivedFilePaths.contains(path.toString()))
+                .sorted(Comparator
+                    .comparing((Path path) -> !Files.isDirectory(path))
+                    .thenComparing(path -> path.getFileName().toString()))
+                .collect(Collectors.toList());
 
             // 计算分页
             int total = allFiles.size();
@@ -195,15 +199,15 @@ public class DatasetFileApplicationService {
             List<DatasetFile> datasetFiles = pageData.stream().map(path -> getDatasetFile(path, datasetFilesMap)).toList();
 
             return new PagedResponse<>(page, size, total, totalPages, datasetFiles);
-        } catch (IOException e) {
-            log.error("list dataset path error", e);
-            return PagedResponse.of(new Page<>(page, size));
-        }
-    }
+        } catch (IOException e) {
+            log.error("list dataset path error", e);
+            return PagedResponse.of(new Page<>(page, size));
+        }
+    }
 
-    private DatasetFile getDatasetFile(Path path, Map<String, DatasetFile> datasetFilesMap) {
-        DatasetFile datasetFile = new DatasetFile();
-        LocalDateTime localDateTime = LocalDateTime.now();
+    private DatasetFile getDatasetFile(Path path, Map<String, DatasetFile> datasetFilesMap) {
+        DatasetFile datasetFile = new DatasetFile();
+        LocalDateTime localDateTime = LocalDateTime.now();
         try {
             localDateTime = Files.getLastModifiedTime(path).toInstant().atZone(ZoneId.systemDefault()).toLocalDateTime();
         } catch (IOException e) {
@@ -254,37 +258,37 @@ public class DatasetFileApplicationService {
                 datasetFile = exist;
             }
         }
-        return datasetFile;
-    }
-
-    private boolean isSourceDocument(DatasetFile datasetFile) {
-        if (datasetFile == null) {
-            return false;
-        }
-        String fileType = datasetFile.getFileType();
-        if (fileType == null || fileType.isBlank()) {
-            return false;
-        }
-        return DOCUMENT_TEXT_FILE_TYPES.contains(fileType.toLowerCase(Locale.ROOT));
-    }
-
-    private boolean isDerivedFile(DatasetFile datasetFile) {
-        if (datasetFile == null) {
-            return false;
-        }
-        String metadata = datasetFile.getMetadata();
-        if (metadata == null || metadata.isBlank()) {
-            return false;
-        }
-        try {
-            ObjectMapper mapper = new ObjectMapper();
-            Map<String, Object> metadataMap = mapper.readValue(metadata, new TypeReference<Map<String, Object>>() {});
-            return metadataMap.get(DERIVED_METADATA_KEY) != null;
-        } catch (Exception e) {
-            log.debug("Failed to parse dataset file metadata for derived detection: {}", datasetFile.getId(), e);
-            return false;
-        }
-    }
+        return datasetFile;
+    }
+
+    private boolean isSourceDocument(DatasetFile datasetFile) {
+        if (datasetFile == null) {
+            return false;
+        }
+        String fileType = datasetFile.getFileType();
+        if (fileType == null || fileType.isBlank()) {
+            return false;
+        }
+        return DOCUMENT_TEXT_FILE_TYPES.contains(fileType.toLowerCase(Locale.ROOT));
+    }
+
+    private boolean isDerivedFile(DatasetFile datasetFile) {
+        if (datasetFile == null) {
+            return false;
+        }
+        String metadata = datasetFile.getMetadata();
+        if (metadata == null || metadata.isBlank()) {
+            return false;
+        }
+        try {
+            ObjectMapper mapper = new ObjectMapper();
+            Map<String, Object> metadataMap = mapper.readValue(metadata, new TypeReference<Map<String, Object>>() {});
+            return metadataMap.get(DERIVED_METADATA_KEY) != null;
+        } catch (Exception e) {
+            log.debug("Failed to parse dataset file metadata for derived detection: {}", datasetFile.getId(), e);
+            return false;
+        }
+    }
 
     /**
      * 获取文件详情
@@ -740,17 +744,17 @@ public class DatasetFileApplicationService {
         }
     }
 
-    /**
-     * 复制文件到数据集目录
-     *
-     * @param datasetId 数据集id
-     * @param req       复制文件请求
-     * @return 复制的文件列表
-     */
-    @Transactional
-    public List<DatasetFile> copyFilesToDatasetDir(String datasetId, CopyFilesRequest req) {
-        Dataset dataset = datasetRepository.getById(datasetId);
-        BusinessAssert.notNull(dataset, SystemErrorCode.RESOURCE_NOT_FOUND);
+    /**
+     * 复制文件到数据集目录
+     *
+     * @param datasetId 数据集id
+     * @param req       复制文件请求
+     * @return 复制的文件列表
+     */
+    @Transactional
+    public List<DatasetFile> copyFilesToDatasetDir(String datasetId, CopyFilesRequest req) {
+        Dataset dataset = datasetRepository.getById(datasetId);
+        BusinessAssert.notNull(dataset, SystemErrorCode.RESOURCE_NOT_FOUND);
         List<DatasetFile> copiedFiles = new ArrayList<>();
         List<DatasetFile> existDatasetFiles = datasetFileRepository.findAllByDatasetId(datasetId);
         dataset.setFiles(existDatasetFiles);
@@ -780,80 +784,80 @@ public class DatasetFileApplicationService {
         datasetFileRepository.saveOrUpdateBatch(copiedFiles, 100);
         dataset.active();
         datasetRepository.updateById(dataset);
-        CompletableFuture.runAsync(() -> copyFilesToDatasetDir(req.sourcePaths(), dataset));
-        return copiedFiles;
-    }
-
-    /**
-     * 复制文件到数据集目录（保留相对路径，适用于数据源导入）
-     *
-     * @param datasetId 数据集id
-     * @param sourceRoot 数据源根目录
-     * @param sourcePaths 源文件路径列表
-     * @return 复制的文件列表
-     */
-    @Transactional
-    public List<DatasetFile> copyFilesToDatasetDirWithSourceRoot(String datasetId, Path sourceRoot, List<String> sourcePaths) {
-        Dataset dataset = datasetRepository.getById(datasetId);
-        BusinessAssert.notNull(dataset, SystemErrorCode.RESOURCE_NOT_FOUND);
-
-        Path normalizedRoot = sourceRoot.toAbsolutePath().normalize();
-        List<DatasetFile> copiedFiles = new ArrayList<>();
-        List<DatasetFile> existDatasetFiles = datasetFileRepository.findAllByDatasetId(datasetId);
-        dataset.setFiles(existDatasetFiles);
-        Map<String, DatasetFile> copyTargets = new LinkedHashMap<>();
-
-        for (String sourceFilePath : sourcePaths) {
-            if (sourceFilePath == null || sourceFilePath.isBlank()) {
-                continue;
-            }
-            Path sourcePath = Paths.get(sourceFilePath).toAbsolutePath().normalize();
-            if (!sourcePath.startsWith(normalizedRoot)) {
-                log.warn("Source file path is out of root: {}", sourceFilePath);
-                continue;
-            }
-            if (!Files.exists(sourcePath) || !Files.isRegularFile(sourcePath)) {
-                log.warn("Source file does not exist or is not a regular file: {}", sourceFilePath);
-                continue;
-            }
-
-            Path relativePath = normalizedRoot.relativize(sourcePath);
-            String fileName = sourcePath.getFileName().toString();
-            File sourceFile = sourcePath.toFile();
-            LocalDateTime currentTime = LocalDateTime.now();
-            Path targetPath = Paths.get(dataset.getPath(), relativePath.toString());
-
-            DatasetFile datasetFile = DatasetFile.builder()
-                .id(UUID.randomUUID().toString())
-                .datasetId(datasetId)
-                .fileName(fileName)
-                .fileType(AnalyzerUtils.getExtension(fileName))
-                .fileSize(sourceFile.length())
-                .filePath(targetPath.toString())
-                .uploadTime(currentTime)
-                .lastAccessTime(currentTime)
-                .build();
-            setDatasetFileId(datasetFile, dataset);
-            dataset.addFile(datasetFile);
-            copiedFiles.add(datasetFile);
-            copyTargets.put(sourceFilePath, datasetFile);
-        }
-
-        if (copiedFiles.isEmpty()) {
-            return copiedFiles;
-        }
-        datasetFileRepository.saveOrUpdateBatch(copiedFiles, 100);
-        dataset.active();
-        datasetRepository.updateById(dataset);
-        CompletableFuture.runAsync(() -> copyFilesToDatasetDirWithRelativePath(copyTargets, dataset, normalizedRoot));
-        return copiedFiles;
-    }
-
-    private void copyFilesToDatasetDir(List<String> sourcePaths, Dataset dataset) {
-        for (String sourcePath : sourcePaths) {
-            Path sourceFilePath = Paths.get(sourcePath);
-            Path targetFilePath = Paths.get(dataset.getPath(), sourceFilePath.getFileName().toString());
-            try {
+        CompletableFuture.runAsync(() -> copyFilesToDatasetDir(req.sourcePaths(), dataset));
+        return copiedFiles;
+    }
+
+    /**
+     * 复制文件到数据集目录（保留相对路径，适用于数据源导入）
+     *
+     * @param datasetId 数据集id
+     * @param sourceRoot 数据源根目录
+     * @param sourcePaths 源文件路径列表
+     * @return 复制的文件列表
+     */
+    @Transactional
+    public List<DatasetFile> copyFilesToDatasetDirWithSourceRoot(String datasetId, Path sourceRoot, List<String> sourcePaths) {
+        Dataset dataset = datasetRepository.getById(datasetId);
+        BusinessAssert.notNull(dataset, SystemErrorCode.RESOURCE_NOT_FOUND);
+
+        Path normalizedRoot = sourceRoot.toAbsolutePath().normalize();
+        List<DatasetFile> copiedFiles = new ArrayList<>();
+        List<DatasetFile> existDatasetFiles = datasetFileRepository.findAllByDatasetId(datasetId);
+        dataset.setFiles(existDatasetFiles);
+        Map<String, DatasetFile> copyTargets = new LinkedHashMap<>();
+
+        for (String sourceFilePath : sourcePaths) {
+            if (sourceFilePath == null || sourceFilePath.isBlank()) {
+                continue;
+            }
+            Path sourcePath = Paths.get(sourceFilePath).toAbsolutePath().normalize();
+            if (!sourcePath.startsWith(normalizedRoot)) {
+                log.warn("Source file path is out of root: {}", sourceFilePath);
+                continue;
+            }
+            if (!Files.exists(sourcePath) || !Files.isRegularFile(sourcePath)) {
+                log.warn("Source file does not exist or is not a regular file: {}", sourceFilePath);
+                continue;
+            }
+
+            Path relativePath = normalizedRoot.relativize(sourcePath);
+            String fileName = sourcePath.getFileName().toString();
+            File sourceFile = sourcePath.toFile();
+            LocalDateTime currentTime = LocalDateTime.now();
+            Path targetPath = Paths.get(dataset.getPath(), relativePath.toString());
+
+            DatasetFile datasetFile = DatasetFile.builder()
+                .id(UUID.randomUUID().toString())
+                .datasetId(datasetId)
+                .fileName(fileName)
+                .fileType(AnalyzerUtils.getExtension(fileName))
+                .fileSize(sourceFile.length())
+                .filePath(targetPath.toString())
+                .uploadTime(currentTime)
+                .lastAccessTime(currentTime)
+                .build();
+            setDatasetFileId(datasetFile, dataset);
+            dataset.addFile(datasetFile);
+            copiedFiles.add(datasetFile);
+            copyTargets.put(sourceFilePath, datasetFile);
+        }
+
+        if (copiedFiles.isEmpty()) {
+            return copiedFiles;
+        }
+        datasetFileRepository.saveOrUpdateBatch(copiedFiles, 100);
+        dataset.active();
+        datasetRepository.updateById(dataset);
+        CompletableFuture.runAsync(() -> copyFilesToDatasetDirWithRelativePath(copyTargets, dataset, normalizedRoot));
+        return copiedFiles;
+    }
+
+    private void copyFilesToDatasetDir(List<String> sourcePaths, Dataset dataset) {
+        for (String sourcePath : sourcePaths) {
+            Path sourceFilePath = Paths.get(sourcePath);
+            Path targetFilePath = Paths.get(dataset.getPath(), sourceFilePath.getFileName().toString());
+            try {
                 Files.createDirectories(Path.of(dataset.getPath()));
                 Files.copy(sourceFilePath, targetFilePath);
                 DatasetFile datasetFile = datasetFileRepository.findByDatasetIdAndFileName(
@@ -863,39 +867,39 @@ public class DatasetFileApplicationService {
                 triggerPdfTextExtraction(dataset, datasetFile);
             } catch (IOException e) {
                 log.error("Failed to copy file from {} to {}", sourcePath, targetFilePath, e);
-            }
-        }
-    }
-
-    private void copyFilesToDatasetDirWithRelativePath(
-        Map<String, DatasetFile> copyTargets,
-        Dataset dataset,
-        Path sourceRoot
-    ) {
-        Path datasetRoot = Paths.get(dataset.getPath()).toAbsolutePath().normalize();
-        Path normalizedRoot = sourceRoot.toAbsolutePath().normalize();
-        for (Map.Entry<String, DatasetFile> entry : copyTargets.entrySet()) {
-            Path sourcePath = Paths.get(entry.getKey()).toAbsolutePath().normalize();
-            if (!sourcePath.startsWith(normalizedRoot)) {
-                log.warn("Source file path is out of root: {}", sourcePath);
-                continue;
-            }
-            Path relativePath = normalizedRoot.relativize(sourcePath);
-            Path targetFilePath = datasetRoot.resolve(relativePath).normalize();
-            if (!targetFilePath.startsWith(datasetRoot)) {
-                log.warn("Target file path is out of dataset path: {}", targetFilePath);
-                continue;
-            }
-            try {
-                Files.createDirectories(targetFilePath.getParent());
-                Files.copy(sourcePath, targetFilePath);
-                triggerPdfTextExtraction(dataset, entry.getValue());
-            } catch (IOException e) {
-                log.error("Failed to copy file from {} to {}", sourcePath, targetFilePath, e);
-            }
-        }
-    }
-
+            }
+        }
+    }
+
+    private void copyFilesToDatasetDirWithRelativePath(
+        Map<String, DatasetFile> copyTargets,
+        Dataset dataset,
+        Path sourceRoot
+    ) {
+        Path datasetRoot = Paths.get(dataset.getPath()).toAbsolutePath().normalize();
+        Path normalizedRoot = sourceRoot.toAbsolutePath().normalize();
+        for (Map.Entry<String, DatasetFile> entry : copyTargets.entrySet()) {
+            Path sourcePath = Paths.get(entry.getKey()).toAbsolutePath().normalize();
+            if (!sourcePath.startsWith(normalizedRoot)) {
+                log.warn("Source file path is out of root: {}", sourcePath);
+                continue;
+            }
+            Path relativePath = normalizedRoot.relativize(sourcePath);
+            Path targetFilePath = datasetRoot.resolve(relativePath).normalize();
+            if (!targetFilePath.startsWith(datasetRoot)) {
+                log.warn("Target file path is out of dataset path: {}", targetFilePath);
+                continue;
+            }
+            try {
+                Files.createDirectories(targetFilePath.getParent());
+                Files.copy(sourcePath, targetFilePath);
+                triggerPdfTextExtraction(dataset, entry.getValue());
+            } catch (IOException e) {
+                log.error("Failed to copy file from {} to {}", sourcePath, targetFilePath, e);
+            }
+        }
+    }
+
     /**
      * 添加文件到数据集（仅创建数据库记录，不执行文件系统操作）
      *
@@ -952,31 +956,31 @@ public class DatasetFileApplicationService {
         return addedFiles;
     }
 
-    private void triggerPdfTextExtraction(Dataset dataset, DatasetFile datasetFile) {
-        if (dataset == null || datasetFile == null) {
-            return;
-        }
-        if (dataset.getDatasetType() != DatasetType.TEXT) {
-            return;
-        }
-        String fileType = datasetFile.getFileType();
-        if (fileType == null || !DOCUMENT_TEXT_FILE_TYPES.contains(fileType.toLowerCase(Locale.ROOT))) {
-            return;
-        }
-        String datasetId = dataset.getId();
-        String fileId = datasetFile.getId();
-        if (datasetId == null || fileId == null) {
-            return;
-        }
-        if (TransactionSynchronizationManager.isSynchronizationActive()) {
-            TransactionSynchronizationManager.registerSynchronization(new TransactionSynchronization() {
-                @Override
-                public void afterCommit() {
-                    pdfTextExtractAsyncService.extractPdfText(datasetId, fileId);
-                }
-            });
-            return;
-        }
-        pdfTextExtractAsyncService.extractPdfText(datasetId, fileId);
-    }
-}
+    private void triggerPdfTextExtraction(Dataset dataset, DatasetFile datasetFile) {
+        if (dataset == null || datasetFile == null) {
+            return;
+        }
+        if (dataset.getDatasetType() != DatasetType.TEXT) {
+            return;
+        }
+        String fileType = datasetFile.getFileType();
+        if (fileType == null || !DOCUMENT_TEXT_FILE_TYPES.contains(fileType.toLowerCase(Locale.ROOT))) {
+            return;
+        }
+        String datasetId = dataset.getId();
+        String fileId = datasetFile.getId();
+        if (datasetId == null || fileId == null) {
+            return;
+        }
+        if (TransactionSynchronizationManager.isSynchronizationActive()) {
+            TransactionSynchronizationManager.registerSynchronization(new TransactionSynchronization() {
+                @Override
+                public void afterCommit() {
+                    pdfTextExtractAsyncService.extractPdfText(datasetId, fileId);
+                }
+            });
+            return;
+        }
+        pdfTextExtractAsyncService.extractPdfText(datasetId, fileId);
+    }
+}