feat(data-management): 添加Office文档预览功能

- 集成LibreOffice转换器实现DOC/DOCX转PDF功能
- 新增DatasetFilePreviewService处理预览文件管理
- 新增DatasetFilePreviewAsyncService异步转换任务
- 在文件删除时同步清理预览文件
- 前端实现Office文档预览状态轮询机制
- 添加预览API接口支持状态查询和转换触发
- 优化文件预览界面显示转换进度和错误信息
This commit is contained in:
2026-02-01 22:26:05 +08:00
parent f06d6e5a7e
commit 438acebb89
10 changed files with 833 additions and 179 deletions

View File

@@ -88,6 +88,7 @@ public class DatasetFileApplicationService {
private final DatasetRepository datasetRepository; private final DatasetRepository datasetRepository;
private final FileService fileService; private final FileService fileService;
private final PdfTextExtractAsyncService pdfTextExtractAsyncService; private final PdfTextExtractAsyncService pdfTextExtractAsyncService;
private final DatasetFilePreviewService datasetFilePreviewService;
@Value("${datamate.data-management.base-path:/dataset}") @Value("${datamate.data-management.base-path:/dataset}")
private String datasetBasePath; private String datasetBasePath;
@@ -99,11 +100,13 @@ public class DatasetFileApplicationService {
public DatasetFileApplicationService(DatasetFileRepository datasetFileRepository, public DatasetFileApplicationService(DatasetFileRepository datasetFileRepository,
DatasetRepository datasetRepository, DatasetRepository datasetRepository,
FileService fileService, FileService fileService,
PdfTextExtractAsyncService pdfTextExtractAsyncService) { PdfTextExtractAsyncService pdfTextExtractAsyncService,
DatasetFilePreviewService datasetFilePreviewService) {
this.datasetFileRepository = datasetFileRepository; this.datasetFileRepository = datasetFileRepository;
this.datasetRepository = datasetRepository; this.datasetRepository = datasetRepository;
this.fileService = fileService; this.fileService = fileService;
this.pdfTextExtractAsyncService = pdfTextExtractAsyncService; this.pdfTextExtractAsyncService = pdfTextExtractAsyncService;
this.datasetFilePreviewService = datasetFilePreviewService;
} }
/** /**
@@ -316,6 +319,7 @@ public class DatasetFileApplicationService {
datasetFileRepository.removeById(fileId); datasetFileRepository.removeById(fileId);
dataset.removeFile(file); dataset.removeFile(file);
datasetRepository.updateById(dataset); datasetRepository.updateById(dataset);
datasetFilePreviewService.deletePreviewFileQuietly(datasetId, fileId);
// 删除文件时,上传到数据集中的文件会同时删除数据库中的记录和文件系统中的文件,归集过来的文件仅删除数据库中的记录 // 删除文件时,上传到数据集中的文件会同时删除数据库中的记录和文件系统中的文件,归集过来的文件仅删除数据库中的记录
if (file.getFilePath().startsWith(dataset.getPath())) { if (file.getFilePath().startsWith(dataset.getPath())) {
try { try {
@@ -688,6 +692,7 @@ public class DatasetFileApplicationService {
for (DatasetFile file : filesToDelete) { for (DatasetFile file : filesToDelete) {
datasetFileRepository.removeById(file.getId()); datasetFileRepository.removeById(file.getId());
datasetFilePreviewService.deletePreviewFileQuietly(datasetId, file.getId());
} }
// 删除文件系统中的目录 // 删除文件系统中的目录

View File

@@ -0,0 +1,171 @@
package com.datamate.datamanagement.application;
import com.datamate.datamanagement.common.enums.KnowledgeItemPreviewStatus;
import com.datamate.datamanagement.domain.model.dataset.DatasetFile;
import com.datamate.datamanagement.infrastructure.config.DataManagementProperties;
import com.datamate.datamanagement.infrastructure.persistence.repository.DatasetFileRepository;
import com.fasterxml.jackson.databind.ObjectMapper;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.StringUtils;
import org.springframework.scheduling.annotation.Async;
import org.springframework.stereotype.Service;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.time.LocalDateTime;
import java.time.format.DateTimeFormatter;
import java.util.Set;
/**
* 数据集文件预览转换异步任务
*/
@Service
@RequiredArgsConstructor
@Slf4j
public class DatasetFilePreviewAsyncService {
private static final Set<String> OFFICE_EXTENSIONS = Set.of("doc", "docx");
private static final String DATASET_PREVIEW_DIR = "dataset-previews";
private static final String PREVIEW_FILE_SUFFIX = ".pdf";
private static final String PATH_SEPARATOR = "/";
private static final int MAX_ERROR_LENGTH = 500;
private static final DateTimeFormatter PREVIEW_TIME_FORMATTER = DateTimeFormatter.ISO_LOCAL_DATE_TIME;
private final DatasetFileRepository datasetFileRepository;
private final DataManagementProperties dataManagementProperties;
private final ObjectMapper objectMapper = new ObjectMapper();
@Async
public void convertPreviewAsync(String fileId) {
if (StringUtils.isBlank(fileId)) {
return;
}
DatasetFile file = datasetFileRepository.getById(fileId);
if (file == null) {
return;
}
String extension = resolveFileExtension(resolveOriginalName(file));
if (!OFFICE_EXTENSIONS.contains(extension)) {
updatePreviewStatus(file, KnowledgeItemPreviewStatus.FAILED, null, "仅支持 DOC/DOCX 转换");
return;
}
if (StringUtils.isBlank(file.getFilePath())) {
updatePreviewStatus(file, KnowledgeItemPreviewStatus.FAILED, null, "源文件路径为空");
return;
}
Path sourcePath = Paths.get(file.getFilePath()).toAbsolutePath().normalize();
if (!Files.exists(sourcePath) || !Files.isRegularFile(sourcePath)) {
updatePreviewStatus(file, KnowledgeItemPreviewStatus.FAILED, null, "源文件不存在");
return;
}
KnowledgeItemPreviewMetadataHelper.PreviewInfo previewInfo = KnowledgeItemPreviewMetadataHelper
.readPreviewInfo(file.getMetadata(), objectMapper);
String previewRelativePath = StringUtils.defaultIfBlank(
previewInfo.pdfPath(),
resolvePreviewRelativePath(file.getDatasetId(), file.getId())
);
Path targetPath = resolvePreviewStoragePath(previewRelativePath);
try {
ensureParentDirectory(targetPath);
LibreOfficeConverter.convertToPdf(sourcePath, targetPath);
updatePreviewStatus(file, KnowledgeItemPreviewStatus.READY, previewRelativePath, null);
} catch (Exception e) {
log.error("dataset preview convert failed, fileId: {}", file.getId(), e);
updatePreviewStatus(file, KnowledgeItemPreviewStatus.FAILED, previewRelativePath, trimError(e.getMessage()));
}
}
private void updatePreviewStatus(
DatasetFile file,
KnowledgeItemPreviewStatus status,
String previewRelativePath,
String error
) {
if (file == null) {
return;
}
String updatedMetadata = KnowledgeItemPreviewMetadataHelper.applyPreviewInfo(
file.getMetadata(),
objectMapper,
status,
previewRelativePath,
error,
nowText()
);
file.setMetadata(updatedMetadata);
datasetFileRepository.updateById(file);
}
private String resolveOriginalName(DatasetFile file) {
if (file == null) {
return "";
}
if (StringUtils.isNotBlank(file.getFileName())) {
return file.getFileName();
}
if (StringUtils.isNotBlank(file.getFilePath())) {
return Paths.get(file.getFilePath()).getFileName().toString();
}
return "";
}
private String resolveFileExtension(String fileName) {
if (StringUtils.isBlank(fileName)) {
return "";
}
int dotIndex = fileName.lastIndexOf('.');
if (dotIndex <= 0 || dotIndex >= fileName.length() - 1) {
return "";
}
return fileName.substring(dotIndex + 1).toLowerCase();
}
private String resolvePreviewRelativePath(String datasetId, String fileId) {
String relativePath = Paths.get(DATASET_PREVIEW_DIR, datasetId, fileId + PREVIEW_FILE_SUFFIX)
.toString();
return relativePath.replace("\\", PATH_SEPARATOR);
}
private Path resolvePreviewStoragePath(String relativePath) {
String normalizedRelativePath = StringUtils.defaultString(relativePath).replace("/", java.io.File.separator);
Path root = resolveUploadRootPath();
Path target = root.resolve(normalizedRelativePath).toAbsolutePath().normalize();
if (!target.startsWith(root)) {
throw new IllegalArgumentException("invalid preview path");
}
return target;
}
private Path resolveUploadRootPath() {
String uploadDir = dataManagementProperties.getFileStorage().getUploadDir();
return Paths.get(uploadDir).toAbsolutePath().normalize();
}
private void ensureParentDirectory(Path targetPath) {
try {
Path parent = targetPath.getParent();
if (parent != null) {
Files.createDirectories(parent);
}
} catch (Exception e) {
throw new IllegalStateException("创建预览目录失败", e);
}
}
private String trimError(String error) {
if (StringUtils.isBlank(error)) {
return "";
}
if (error.length() <= MAX_ERROR_LENGTH) {
return error;
}
return error.substring(0, MAX_ERROR_LENGTH);
}
private String nowText() {
return LocalDateTime.now().format(PREVIEW_TIME_FORMATTER);
}
}

View File

@@ -0,0 +1,233 @@
package com.datamate.datamanagement.application;
import com.datamate.common.infrastructure.exception.BusinessAssert;
import com.datamate.common.infrastructure.exception.CommonErrorCode;
import com.datamate.datamanagement.common.enums.KnowledgeItemPreviewStatus;
import com.datamate.datamanagement.domain.model.dataset.DatasetFile;
import com.datamate.datamanagement.infrastructure.config.DataManagementProperties;
import com.datamate.datamanagement.infrastructure.persistence.repository.DatasetFileRepository;
import com.datamate.datamanagement.interfaces.dto.DatasetFilePreviewStatusResponse;
import com.fasterxml.jackson.databind.ObjectMapper;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.StringUtils;
import org.springframework.stereotype.Service;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.time.LocalDateTime;
import java.time.format.DateTimeFormatter;
import java.util.Objects;
import java.util.Set;
/**
* 数据集文件预览转换服务
*/
@Service
@RequiredArgsConstructor
@Slf4j
public class DatasetFilePreviewService {
private static final Set<String> OFFICE_EXTENSIONS = Set.of("doc", "docx");
private static final String DATASET_PREVIEW_DIR = "dataset-previews";
private static final String PREVIEW_FILE_SUFFIX = ".pdf";
private static final String PATH_SEPARATOR = "/";
private static final DateTimeFormatter PREVIEW_TIME_FORMATTER = DateTimeFormatter.ISO_LOCAL_DATE_TIME;
private final DatasetFileRepository datasetFileRepository;
private final DataManagementProperties dataManagementProperties;
private final DatasetFilePreviewAsyncService datasetFilePreviewAsyncService;
private final ObjectMapper objectMapper = new ObjectMapper();
public DatasetFilePreviewStatusResponse getPreviewStatus(String datasetId, String fileId) {
DatasetFile file = requireDatasetFile(datasetId, fileId);
assertOfficeDocument(file);
KnowledgeItemPreviewMetadataHelper.PreviewInfo previewInfo = KnowledgeItemPreviewMetadataHelper
.readPreviewInfo(file.getMetadata(), objectMapper);
if (previewInfo.status() == KnowledgeItemPreviewStatus.READY && !previewPdfExists(file, previewInfo)) {
previewInfo = markPreviewFailed(file, previewInfo, "预览文件不存在");
}
return buildResponse(previewInfo);
}
public DatasetFilePreviewStatusResponse ensurePreview(String datasetId, String fileId) {
DatasetFile file = requireDatasetFile(datasetId, fileId);
assertOfficeDocument(file);
KnowledgeItemPreviewMetadataHelper.PreviewInfo previewInfo = KnowledgeItemPreviewMetadataHelper
.readPreviewInfo(file.getMetadata(), objectMapper);
if (previewInfo.status() == KnowledgeItemPreviewStatus.READY && previewPdfExists(file, previewInfo)) {
return buildResponse(previewInfo);
}
if (previewInfo.status() == KnowledgeItemPreviewStatus.PROCESSING) {
return buildResponse(previewInfo);
}
String previewRelativePath = resolvePreviewRelativePath(file.getDatasetId(), file.getId());
String updatedMetadata = KnowledgeItemPreviewMetadataHelper.applyPreviewInfo(
file.getMetadata(),
objectMapper,
KnowledgeItemPreviewStatus.PROCESSING,
previewRelativePath,
null,
nowText()
);
file.setMetadata(updatedMetadata);
datasetFileRepository.updateById(file);
datasetFilePreviewAsyncService.convertPreviewAsync(file.getId());
KnowledgeItemPreviewMetadataHelper.PreviewInfo refreshed = KnowledgeItemPreviewMetadataHelper
.readPreviewInfo(updatedMetadata, objectMapper);
return buildResponse(refreshed);
}
public boolean isOfficeDocument(String fileName) {
String extension = resolveFileExtension(fileName);
return StringUtils.isNotBlank(extension) && OFFICE_EXTENSIONS.contains(extension.toLowerCase());
}
public PreviewFile resolveReadyPreviewFile(String datasetId, DatasetFile file) {
if (file == null) {
return null;
}
KnowledgeItemPreviewMetadataHelper.PreviewInfo previewInfo = KnowledgeItemPreviewMetadataHelper
.readPreviewInfo(file.getMetadata(), objectMapper);
if (previewInfo.status() != KnowledgeItemPreviewStatus.READY) {
return null;
}
String relativePath = StringUtils.defaultIfBlank(previewInfo.pdfPath(), resolvePreviewRelativePath(datasetId, file.getId()));
Path filePath = resolvePreviewStoragePath(relativePath);
if (!Files.exists(filePath) || !Files.isRegularFile(filePath)) {
markPreviewFailed(file, previewInfo, "预览文件不存在");
return null;
}
String previewName = resolvePreviewPdfName(file);
return new PreviewFile(filePath, previewName);
}
public void deletePreviewFileQuietly(String datasetId, String fileId) {
String relativePath = resolvePreviewRelativePath(datasetId, fileId);
Path filePath = resolvePreviewStoragePath(relativePath);
try {
Files.deleteIfExists(filePath);
} catch (Exception e) {
log.warn("delete dataset preview pdf error, fileId: {}", fileId, e);
}
}
private DatasetFilePreviewStatusResponse buildResponse(KnowledgeItemPreviewMetadataHelper.PreviewInfo previewInfo) {
DatasetFilePreviewStatusResponse response = new DatasetFilePreviewStatusResponse();
KnowledgeItemPreviewStatus status = previewInfo.status() == null
? KnowledgeItemPreviewStatus.PENDING
: previewInfo.status();
response.setStatus(status);
response.setPreviewError(previewInfo.error());
response.setUpdatedAt(previewInfo.updatedAt());
return response;
}
private DatasetFile requireDatasetFile(String datasetId, String fileId) {
BusinessAssert.isTrue(StringUtils.isNotBlank(datasetId), CommonErrorCode.PARAM_ERROR);
BusinessAssert.isTrue(StringUtils.isNotBlank(fileId), CommonErrorCode.PARAM_ERROR);
DatasetFile datasetFile = datasetFileRepository.getById(fileId);
BusinessAssert.notNull(datasetFile, CommonErrorCode.PARAM_ERROR);
BusinessAssert.isTrue(Objects.equals(datasetFile.getDatasetId(), datasetId), CommonErrorCode.PARAM_ERROR);
return datasetFile;
}
private void assertOfficeDocument(DatasetFile file) {
BusinessAssert.notNull(file, CommonErrorCode.PARAM_ERROR);
String extension = resolveFileExtension(resolveOriginalName(file));
BusinessAssert.isTrue(OFFICE_EXTENSIONS.contains(extension), CommonErrorCode.PARAM_ERROR);
}
private String resolveOriginalName(DatasetFile file) {
if (file == null) {
return "";
}
if (StringUtils.isNotBlank(file.getFileName())) {
return file.getFileName();
}
if (StringUtils.isNotBlank(file.getFilePath())) {
return Paths.get(file.getFilePath()).getFileName().toString();
}
return "";
}
private String resolveFileExtension(String fileName) {
if (StringUtils.isBlank(fileName)) {
return "";
}
int dotIndex = fileName.lastIndexOf('.');
if (dotIndex <= 0 || dotIndex >= fileName.length() - 1) {
return "";
}
return fileName.substring(dotIndex + 1).toLowerCase();
}
private String resolvePreviewPdfName(DatasetFile file) {
String originalName = resolveOriginalName(file);
if (StringUtils.isBlank(originalName)) {
return "预览.pdf";
}
int dotIndex = originalName.lastIndexOf('.');
if (dotIndex <= 0) {
return originalName + PREVIEW_FILE_SUFFIX;
}
return originalName.substring(0, dotIndex) + PREVIEW_FILE_SUFFIX;
}
private boolean previewPdfExists(DatasetFile file, KnowledgeItemPreviewMetadataHelper.PreviewInfo previewInfo) {
String relativePath = StringUtils.defaultIfBlank(previewInfo.pdfPath(), resolvePreviewRelativePath(file.getDatasetId(), file.getId()));
Path filePath = resolvePreviewStoragePath(relativePath);
return Files.exists(filePath) && Files.isRegularFile(filePath);
}
private KnowledgeItemPreviewMetadataHelper.PreviewInfo markPreviewFailed(
DatasetFile file,
KnowledgeItemPreviewMetadataHelper.PreviewInfo previewInfo,
String error
) {
String relativePath = StringUtils.defaultIfBlank(previewInfo.pdfPath(), resolvePreviewRelativePath(file.getDatasetId(), file.getId()));
String updatedMetadata = KnowledgeItemPreviewMetadataHelper.applyPreviewInfo(
file.getMetadata(),
objectMapper,
KnowledgeItemPreviewStatus.FAILED,
relativePath,
error,
nowText()
);
file.setMetadata(updatedMetadata);
datasetFileRepository.updateById(file);
return KnowledgeItemPreviewMetadataHelper.readPreviewInfo(updatedMetadata, objectMapper);
}
private String resolvePreviewRelativePath(String datasetId, String fileId) {
String relativePath = Paths.get(DATASET_PREVIEW_DIR, datasetId, fileId + PREVIEW_FILE_SUFFIX)
.toString();
return relativePath.replace("\\", PATH_SEPARATOR);
}
Path resolvePreviewStoragePath(String relativePath) {
String normalizedRelativePath = StringUtils.defaultString(relativePath).replace("/", java.io.File.separator);
Path root = resolveUploadRootPath();
Path target = root.resolve(normalizedRelativePath).toAbsolutePath().normalize();
BusinessAssert.isTrue(target.startsWith(root), CommonErrorCode.PARAM_ERROR);
return target;
}
private Path resolveUploadRootPath() {
String uploadDir = dataManagementProperties.getFileStorage().getUploadDir();
BusinessAssert.isTrue(StringUtils.isNotBlank(uploadDir), CommonErrorCode.PARAM_ERROR);
return Paths.get(uploadDir).toAbsolutePath().normalize();
}
private String nowText() {
return LocalDateTime.now().format(PREVIEW_TIME_FORMATTER);
}
public record PreviewFile(Path filePath, String fileName) {
}
}

View File

@@ -12,16 +12,11 @@ import org.springframework.scheduling.annotation.Async;
import org.springframework.stereotype.Service; import org.springframework.stereotype.Service;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files; import java.nio.file.Files;
import java.nio.file.Path; import java.nio.file.Path;
import java.nio.file.Paths; import java.nio.file.Paths;
import java.nio.file.StandardCopyOption;
import java.time.Duration;
import java.time.LocalDateTime; import java.time.LocalDateTime;
import java.time.format.DateTimeFormatter; import java.time.format.DateTimeFormatter;
import java.util.List;
import java.util.Set; import java.util.Set;
/** /**
@@ -36,8 +31,6 @@ public class KnowledgeItemPreviewAsyncService {
private static final String PREVIEW_SUB_DIR = "preview"; private static final String PREVIEW_SUB_DIR = "preview";
private static final String PREVIEW_FILE_SUFFIX = ".pdf"; private static final String PREVIEW_FILE_SUFFIX = ".pdf";
private static final String PATH_SEPARATOR = "/"; private static final String PATH_SEPARATOR = "/";
private static final String LIBREOFFICE_COMMAND = "soffice";
private static final Duration CONVERT_TIMEOUT = Duration.ofMinutes(5);
private static final int MAX_ERROR_LENGTH = 500; private static final int MAX_ERROR_LENGTH = 500;
private static final DateTimeFormatter PREVIEW_TIME_FORMATTER = DateTimeFormatter.ISO_LOCAL_DATE_TIME; private static final DateTimeFormatter PREVIEW_TIME_FORMATTER = DateTimeFormatter.ISO_LOCAL_DATE_TIME;
@@ -79,7 +72,7 @@ public class KnowledgeItemPreviewAsyncService {
ensureParentDirectory(targetPath); ensureParentDirectory(targetPath);
try { try {
convertOfficeToPdfByLibreOffice(sourcePath, targetPath); LibreOfficeConverter.convertToPdf(sourcePath, targetPath);
updatePreviewStatus(item, KnowledgeItemPreviewStatus.READY, previewRelativePath, null); updatePreviewStatus(item, KnowledgeItemPreviewStatus.READY, previewRelativePath, null);
} catch (Exception e) { } catch (Exception e) {
log.error("preview convert failed, itemId: {}", item.getId(), e); log.error("preview convert failed, itemId: {}", item.getId(), e);
@@ -87,69 +80,6 @@ public class KnowledgeItemPreviewAsyncService {
} }
} }
private void convertOfficeToPdfByLibreOffice(Path sourcePath, Path targetPath) throws Exception {
Path outputDir = targetPath.getParent();
ensureParentDirectory(targetPath);
List<String> command = List.of(
LIBREOFFICE_COMMAND,
"--headless",
"--nologo",
"--nolockcheck",
"--nodefault",
"--nofirststartwizard",
"--convert-to",
"pdf",
"--outdir",
outputDir.toString(),
sourcePath.toString()
);
ProcessBuilder processBuilder = new ProcessBuilder(command);
processBuilder.redirectErrorStream(true);
Process process = processBuilder.start();
boolean finished = process.waitFor(CONVERT_TIMEOUT.toMillis(), java.util.concurrent.TimeUnit.MILLISECONDS);
String output = readProcessOutput(process.getInputStream());
if (!finished) {
process.destroyForcibly();
throw new IllegalStateException("LibreOffice 转换超时");
}
if (process.exitValue() != 0) {
throw new IllegalStateException("LibreOffice 转换失败: " + output);
}
Path generated = outputDir.resolve(stripExtension(sourcePath.getFileName().toString()) + PREVIEW_FILE_SUFFIX);
if (!Files.exists(generated)) {
throw new IllegalStateException("LibreOffice 输出文件不存在");
}
if (!generated.equals(targetPath)) {
Files.move(generated, targetPath, StandardCopyOption.REPLACE_EXISTING);
}
}
private String readProcessOutput(InputStream inputStream) throws IOException {
if (inputStream == null) {
return "";
}
byte[] buffer = new byte[1024];
StringBuilder builder = new StringBuilder();
int total = 0;
int read;
while ((read = inputStream.read(buffer)) >= 0) {
if (read == 0) {
continue;
}
int remaining = MAX_ERROR_LENGTH - total;
if (remaining <= 0) {
break;
}
int toAppend = Math.min(remaining, read);
builder.append(new String(buffer, 0, toAppend, StandardCharsets.UTF_8));
total += toAppend;
if (total >= MAX_ERROR_LENGTH) {
break;
}
}
return builder.toString();
}
private void updatePreviewStatus( private void updatePreviewStatus(
KnowledgeItem item, KnowledgeItem item,
KnowledgeItemPreviewStatus status, KnowledgeItemPreviewStatus status,
@@ -195,14 +125,6 @@ public class KnowledgeItemPreviewAsyncService {
return fileName.substring(dotIndex + 1).toLowerCase(); return fileName.substring(dotIndex + 1).toLowerCase();
} }
private String stripExtension(String fileName) {
if (StringUtils.isBlank(fileName)) {
return "preview";
}
int dotIndex = fileName.lastIndexOf('.');
return dotIndex <= 0 ? fileName : fileName.substring(0, dotIndex);
}
private String resolvePreviewRelativePath(String setId, String itemId) { private String resolvePreviewRelativePath(String setId, String itemId) {
String relativePath = Paths.get(KNOWLEDGE_ITEM_UPLOAD_DIR, setId, PREVIEW_SUB_DIR, itemId + PREVIEW_FILE_SUFFIX) String relativePath = Paths.get(KNOWLEDGE_ITEM_UPLOAD_DIR, setId, PREVIEW_SUB_DIR, itemId + PREVIEW_FILE_SUFFIX)
.toString(); .toString();

View File

@@ -0,0 +1,93 @@
package com.datamate.datamanagement.application;
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.StandardCopyOption;
import java.time.Duration;
import java.util.List;
import java.util.concurrent.TimeUnit;
/**
* LibreOffice 文档转换工具
*/
public final class LibreOfficeConverter {
private static final String LIBREOFFICE_COMMAND = "soffice";
private static final Duration CONVERT_TIMEOUT = Duration.ofMinutes(5);
private static final int MAX_OUTPUT_LENGTH = 500;
private LibreOfficeConverter() {
}
public static void convertToPdf(Path sourcePath, Path targetPath) throws Exception {
Path outputDir = targetPath.getParent();
List<String> command = List.of(
LIBREOFFICE_COMMAND,
"--headless",
"--nologo",
"--nolockcheck",
"--nodefault",
"--nofirststartwizard",
"--convert-to",
"pdf",
"--outdir",
outputDir.toString(),
sourcePath.toString()
);
ProcessBuilder processBuilder = new ProcessBuilder(command);
processBuilder.redirectErrorStream(true);
Process process = processBuilder.start();
boolean finished = process.waitFor(CONVERT_TIMEOUT.toMillis(), TimeUnit.MILLISECONDS);
String output = readProcessOutput(process.getInputStream());
if (!finished) {
process.destroyForcibly();
throw new IllegalStateException("LibreOffice 转换超时");
}
if (process.exitValue() != 0) {
throw new IllegalStateException("LibreOffice 转换失败: " + output);
}
Path generated = outputDir.resolve(stripExtension(sourcePath.getFileName().toString()) + ".pdf");
if (!Files.exists(generated)) {
throw new IllegalStateException("LibreOffice 输出文件不存在");
}
if (!generated.equals(targetPath)) {
Files.move(generated, targetPath, StandardCopyOption.REPLACE_EXISTING);
}
}
private static String readProcessOutput(InputStream inputStream) throws IOException {
if (inputStream == null) {
return "";
}
byte[] buffer = new byte[1024];
StringBuilder builder = new StringBuilder();
int total = 0;
int read;
while ((read = inputStream.read(buffer)) >= 0) {
if (read == 0) {
continue;
}
int remaining = MAX_OUTPUT_LENGTH - total;
if (remaining <= 0) {
break;
}
int toAppend = Math.min(remaining, read);
builder.append(new String(buffer, 0, toAppend, StandardCharsets.UTF_8));
total += toAppend;
if (total >= MAX_OUTPUT_LENGTH) {
break;
}
}
return builder.toString();
}
private static String stripExtension(String fileName) {
if (fileName == null || fileName.isBlank()) {
return "preview";
}
int dotIndex = fileName.lastIndexOf('.');
return dotIndex <= 0 ? fileName : fileName.substring(0, dotIndex);
}
}

View File

@@ -0,0 +1,16 @@
package com.datamate.datamanagement.interfaces.dto;
import com.datamate.datamanagement.common.enums.KnowledgeItemPreviewStatus;
import lombok.Getter;
import lombok.Setter;
/**
* 数据集文件预览状态响应
*/
@Getter
@Setter
public class DatasetFilePreviewStatusResponse {
private KnowledgeItemPreviewStatus status;
private String previewError;
private String updatedAt;
}

View File

@@ -6,11 +6,13 @@ import com.datamate.common.infrastructure.exception.SystemErrorCode;
import com.datamate.common.interfaces.PagedResponse; import com.datamate.common.interfaces.PagedResponse;
import com.datamate.common.interfaces.PagingQuery; import com.datamate.common.interfaces.PagingQuery;
import com.datamate.datamanagement.application.DatasetFileApplicationService; import com.datamate.datamanagement.application.DatasetFileApplicationService;
import com.datamate.datamanagement.application.DatasetFilePreviewService;
import com.datamate.datamanagement.domain.model.dataset.DatasetFile; import com.datamate.datamanagement.domain.model.dataset.DatasetFile;
import com.datamate.datamanagement.interfaces.converter.DatasetConverter; import com.datamate.datamanagement.interfaces.converter.DatasetConverter;
import com.datamate.datamanagement.interfaces.dto.AddFilesRequest; import com.datamate.datamanagement.interfaces.dto.AddFilesRequest;
import com.datamate.datamanagement.interfaces.dto.CopyFilesRequest; import com.datamate.datamanagement.interfaces.dto.CopyFilesRequest;
import com.datamate.datamanagement.interfaces.dto.CreateDirectoryRequest; import com.datamate.datamanagement.interfaces.dto.CreateDirectoryRequest;
import com.datamate.datamanagement.interfaces.dto.DatasetFilePreviewStatusResponse;
import com.datamate.datamanagement.interfaces.dto.DatasetFileResponse; import com.datamate.datamanagement.interfaces.dto.DatasetFileResponse;
import com.datamate.datamanagement.interfaces.dto.UploadFileRequest; import com.datamate.datamanagement.interfaces.dto.UploadFileRequest;
import com.datamate.datamanagement.interfaces.dto.UploadFilesPreRequest; import com.datamate.datamanagement.interfaces.dto.UploadFilesPreRequest;
@@ -19,6 +21,7 @@ import jakarta.validation.Valid;
import lombok.extern.slf4j.Slf4j; import lombok.extern.slf4j.Slf4j;
import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.core.io.Resource; import org.springframework.core.io.Resource;
import org.springframework.core.io.UrlResource;
import org.springframework.http.HttpHeaders; import org.springframework.http.HttpHeaders;
import org.springframework.http.HttpStatus; import org.springframework.http.HttpStatus;
import org.springframework.http.MediaType; import org.springframework.http.MediaType;
@@ -37,10 +40,13 @@ import java.util.List;
public class DatasetFileController { public class DatasetFileController {
private final DatasetFileApplicationService datasetFileApplicationService; private final DatasetFileApplicationService datasetFileApplicationService;
private final DatasetFilePreviewService datasetFilePreviewService;
@Autowired @Autowired
public DatasetFileController(DatasetFileApplicationService datasetFileApplicationService) { public DatasetFileController(DatasetFileApplicationService datasetFileApplicationService,
DatasetFilePreviewService datasetFilePreviewService) {
this.datasetFileApplicationService = datasetFileApplicationService; this.datasetFileApplicationService = datasetFileApplicationService;
this.datasetFilePreviewService = datasetFilePreviewService;
} }
@GetMapping @GetMapping
@@ -120,6 +126,19 @@ public class DatasetFileController {
@PathVariable("fileId") String fileId) { @PathVariable("fileId") String fileId) {
try { try {
DatasetFile datasetFile = datasetFileApplicationService.getDatasetFile(datasetId, fileId); DatasetFile datasetFile = datasetFileApplicationService.getDatasetFile(datasetId, fileId);
if (datasetFilePreviewService.isOfficeDocument(datasetFile.getFileName())) {
DatasetFilePreviewService.PreviewFile previewFile = datasetFilePreviewService
.resolveReadyPreviewFile(datasetId, datasetFile);
if (previewFile == null) {
return ResponseEntity.status(HttpStatus.CONFLICT).build();
}
Resource previewResource = new UrlResource(previewFile.filePath().toUri());
return ResponseEntity.ok()
.contentType(MediaType.APPLICATION_PDF)
.header(HttpHeaders.CONTENT_DISPOSITION,
"inline; filename=\"" + previewFile.fileName() + "\"")
.body(previewResource);
}
Resource resource = datasetFileApplicationService.downloadFile(datasetId, fileId); Resource resource = datasetFileApplicationService.downloadFile(datasetId, fileId);
MediaType mediaType = MediaTypeFactory.getMediaType(resource) MediaType mediaType = MediaTypeFactory.getMediaType(resource)
.orElse(MediaType.APPLICATION_OCTET_STREAM); .orElse(MediaType.APPLICATION_OCTET_STREAM);
@@ -136,6 +155,18 @@ public class DatasetFileController {
} }
} }
@GetMapping("/{fileId}/preview/status")
public DatasetFilePreviewStatusResponse getDatasetFilePreviewStatus(@PathVariable("datasetId") String datasetId,
@PathVariable("fileId") String fileId) {
return datasetFilePreviewService.getPreviewStatus(datasetId, fileId);
}
@PostMapping("/{fileId}/preview/convert")
public DatasetFilePreviewStatusResponse convertDatasetFilePreview(@PathVariable("datasetId") String datasetId,
@PathVariable("fileId") String fileId) {
return datasetFilePreviewService.ensurePreview(datasetId, fileId);
}
@IgnoreResponseWrap @IgnoreResponseWrap
@GetMapping(value = "/download", produces = MediaType.APPLICATION_OCTET_STREAM_VALUE) @GetMapping(value = "/download", produces = MediaType.APPLICATION_OCTET_STREAM_VALUE)
public void downloadDatasetFileAsZip(@PathVariable("datasetId") String datasetId, HttpServletResponse response) { public void downloadDatasetFileAsZip(@PathVariable("datasetId") String datasetId, HttpServletResponse response) {

View File

@@ -4,6 +4,7 @@ import {
Descriptions, Descriptions,
DescriptionsProps, DescriptionsProps,
Modal, Modal,
Spin,
Table, Table,
Input, Input,
} from "antd"; } from "antd";
@@ -52,6 +53,8 @@ export default function Overview({
previewFileType, previewFileType,
previewMediaUrl, previewMediaUrl,
previewLoading, previewLoading,
officePreviewStatus,
officePreviewError,
closePreview, closePreview,
handleDeleteFile, handleDeleteFile,
handleDownloadFile, handleDownloadFile,
@@ -447,11 +450,39 @@ export default function Overview({
</div> </div>
)} )}
{previewFileType === "pdf" && ( {previewFileType === "pdf" && (
<iframe <>
src={previewMediaUrl} {previewMediaUrl ? (
title={previewFileName || "PDF 预览"} <iframe
style={{ width: "100%", height: `${PREVIEW_MAX_HEIGHT}px`, border: "none" }} src={previewMediaUrl}
/> title={previewFileName || "PDF 预览"}
style={{ width: "100%", height: `${PREVIEW_MAX_HEIGHT}px`, border: "none" }}
/>
) : (
<div
style={{
height: `${PREVIEW_MAX_HEIGHT}px`,
display: "flex",
flexDirection: "column",
alignItems: "center",
justifyContent: "center",
gap: 12,
color: "#666",
}}
>
{officePreviewStatus === "FAILED" ? (
<>
<div></div>
<div>{officePreviewError || "请稍后重试"}</div>
</>
) : (
<>
<Spin />
<div>...</div>
</>
)}
</div>
)}
</>
)} )}
{previewFileType === "video" && ( {previewFileType === "video" && (
<div style={{ textAlign: "center" }}> <div style={{ textAlign: "center" }}>

View File

@@ -4,7 +4,7 @@ import type {
} from "@/pages/DataManagement/dataset.model"; } from "@/pages/DataManagement/dataset.model";
import { DatasetType } from "@/pages/DataManagement/dataset.model"; import { DatasetType } from "@/pages/DataManagement/dataset.model";
import { App } from "antd"; import { App } from "antd";
import { useState } from "react"; import { useCallback, useEffect, useRef, useState } from "react";
import { import {
PREVIEW_TEXT_MAX_LENGTH, PREVIEW_TEXT_MAX_LENGTH,
resolvePreviewFileType, resolvePreviewFileType,
@@ -19,9 +19,33 @@ import {
createDatasetDirectoryUsingPost, createDatasetDirectoryUsingPost,
downloadDirectoryUsingGet, downloadDirectoryUsingGet,
deleteDirectoryUsingDelete, deleteDirectoryUsingDelete,
queryDatasetFilePreviewStatusUsingGet,
convertDatasetFilePreviewUsingPost,
} from "../dataset.api"; } from "../dataset.api";
import { useParams } from "react-router"; import { useParams } from "react-router";
const OFFICE_FILE_EXTENSIONS = [".doc", ".docx"];
const OFFICE_PREVIEW_POLL_INTERVAL = 2000;
const OFFICE_PREVIEW_POLL_MAX_TIMES = 60;
type OfficePreviewStatus = "UNSET" | "PENDING" | "PROCESSING" | "READY" | "FAILED";
const isOfficeFileName = (fileName?: string) => {
const lowerName = (fileName || "").toLowerCase();
return OFFICE_FILE_EXTENSIONS.some((ext) => lowerName.endsWith(ext));
};
const normalizeOfficePreviewStatus = (status?: string): OfficePreviewStatus => {
if (!status) {
return "UNSET";
}
const upper = status.toUpperCase();
if (upper === "PENDING" || upper === "PROCESSING" || upper === "READY" || upper === "FAILED") {
return upper as OfficePreviewStatus;
}
return "UNSET";
};
export function useFilesOperation(dataset: Dataset) { export function useFilesOperation(dataset: Dataset) {
const { message } = App.useApp(); const { message } = App.useApp();
@@ -44,6 +68,23 @@ export function useFilesOperation(dataset: Dataset) {
const [previewFileType, setPreviewFileType] = useState<PreviewFileType>("text"); const [previewFileType, setPreviewFileType] = useState<PreviewFileType>("text");
const [previewMediaUrl, setPreviewMediaUrl] = useState(""); const [previewMediaUrl, setPreviewMediaUrl] = useState("");
const [previewLoading, setPreviewLoading] = useState(false); const [previewLoading, setPreviewLoading] = useState(false);
const [officePreviewStatus, setOfficePreviewStatus] = useState<OfficePreviewStatus | null>(null);
const [officePreviewError, setOfficePreviewError] = useState("");
const officePreviewPollingRef = useRef<number | null>(null);
const officePreviewFileRef = useRef<string | null>(null);
const clearOfficePreviewPolling = useCallback(() => {
if (officePreviewPollingRef.current) {
window.clearTimeout(officePreviewPollingRef.current);
officePreviewPollingRef.current = null;
}
}, []);
useEffect(() => {
return () => {
clearOfficePreviewPolling();
};
}, [clearOfficePreviewPolling]);
const fetchFiles = async ( const fetchFiles = async (
prefix?: string, prefix?: string,
@@ -113,17 +154,61 @@ export function useFilesOperation(dataset: Dataset) {
return; return;
} }
const previewUrl = `/api/data-management/datasets/${datasetId}/files/${file.id}/preview`;
setPreviewFileName(file.fileName);
setPreviewContent("");
setPreviewMediaUrl("");
if (isOfficeFileName(file?.fileName)) {
setPreviewFileType("pdf");
setPreviewVisible(true);
setPreviewLoading(true);
setOfficePreviewStatus("PROCESSING");
setOfficePreviewError("");
officePreviewFileRef.current = file.id;
try {
const { data: statusData } = await queryDatasetFilePreviewStatusUsingGet(datasetId, file.id);
const currentStatus = normalizeOfficePreviewStatus(statusData?.status);
if (currentStatus === "READY") {
setPreviewMediaUrl(previewUrl);
setOfficePreviewStatus("READY");
setPreviewLoading(false);
return;
}
if (currentStatus === "PROCESSING" || currentStatus === "PENDING") {
pollOfficePreviewStatus(datasetId, file.id, 0);
return;
}
const { data } = await convertDatasetFilePreviewUsingPost(datasetId, file.id);
const status = normalizeOfficePreviewStatus(data?.status);
if (status === "READY") {
setPreviewMediaUrl(previewUrl);
setOfficePreviewStatus("READY");
} else if (status === "FAILED") {
setOfficePreviewStatus("FAILED");
setOfficePreviewError(data?.previewError || "转换失败,请稍后重试");
} else {
setOfficePreviewStatus("PROCESSING");
pollOfficePreviewStatus(datasetId, file.id, 0);
return;
}
} catch (error) {
console.error("触发预览转换失败", error);
message.error({ content: "触发预览转换失败" });
setOfficePreviewStatus("FAILED");
setOfficePreviewError("触发预览转换失败");
} finally {
setPreviewLoading(false);
}
return;
}
const fileType = resolvePreviewFileType(file?.fileName); const fileType = resolvePreviewFileType(file?.fileName);
if (!fileType) { if (!fileType) {
message.warning({ content: "不支持预览该文件类型" }); message.warning({ content: "不支持预览该文件类型" });
return; return;
} }
const previewUrl = `/api/data-management/datasets/${datasetId}/files/${file.id}/preview`;
setPreviewFileName(file.fileName);
setPreviewFileType(fileType); setPreviewFileType(fileType);
setPreviewContent("");
setPreviewMediaUrl("");
if (fileType === "text") { if (fileType === "text") {
setPreviewLoading(true); setPreviewLoading(true);
@@ -149,13 +234,62 @@ export function useFilesOperation(dataset: Dataset) {
}; };
const closePreview = () => { const closePreview = () => {
clearOfficePreviewPolling();
officePreviewFileRef.current = null;
setPreviewVisible(false); setPreviewVisible(false);
setPreviewContent(""); setPreviewContent("");
setPreviewMediaUrl(""); setPreviewMediaUrl("");
setPreviewFileName(""); setPreviewFileName("");
setPreviewFileType("text"); setPreviewFileType("text");
setOfficePreviewStatus(null);
setOfficePreviewError("");
}; };
const pollOfficePreviewStatus = useCallback(
async (datasetId: string, fileId: string, attempt: number) => {
clearOfficePreviewPolling();
officePreviewPollingRef.current = window.setTimeout(async () => {
if (officePreviewFileRef.current !== fileId) {
return;
}
try {
const { data } = await queryDatasetFilePreviewStatusUsingGet(datasetId, fileId);
const status = normalizeOfficePreviewStatus(data?.status);
if (status === "READY") {
setPreviewMediaUrl(`/api/data-management/datasets/${datasetId}/files/${fileId}/preview`);
setOfficePreviewStatus("READY");
setOfficePreviewError("");
setPreviewLoading(false);
return;
}
if (status === "FAILED") {
setOfficePreviewStatus("FAILED");
setOfficePreviewError(data?.previewError || "转换失败,请稍后重试");
setPreviewLoading(false);
return;
}
if (attempt >= OFFICE_PREVIEW_POLL_MAX_TIMES - 1) {
setOfficePreviewStatus("FAILED");
setOfficePreviewError("转换超时,请稍后重试");
setPreviewLoading(false);
return;
}
pollOfficePreviewStatus(datasetId, fileId, attempt + 1);
} catch (error) {
console.error("轮询预览状态失败", error);
if (attempt >= OFFICE_PREVIEW_POLL_MAX_TIMES - 1) {
setOfficePreviewStatus("FAILED");
setOfficePreviewError("转换超时,请稍后重试");
setPreviewLoading(false);
return;
}
pollOfficePreviewStatus(datasetId, fileId, attempt + 1);
}
}, OFFICE_PREVIEW_POLL_INTERVAL);
},
[clearOfficePreviewPolling]
);
const handleDeleteFile = async (file: DatasetFile) => { const handleDeleteFile = async (file: DatasetFile) => {
try { try {
await deleteDatasetFileUsingDelete(dataset.id, file.id); await deleteDatasetFileUsingDelete(dataset.id, file.id);
@@ -198,6 +332,8 @@ export function useFilesOperation(dataset: Dataset) {
previewFileType, previewFileType,
previewMediaUrl, previewMediaUrl,
previewLoading, previewLoading,
officePreviewStatus,
officePreviewError,
closePreview, closePreview,
fetchFiles, fetchFiles,
setFileList, setFileList,

View File

@@ -119,6 +119,22 @@ export function downloadFileByIdUsingGet(
); );
} }
// 数据集文件预览状态
export function queryDatasetFilePreviewStatusUsingGet(
datasetId: string | number,
fileId: string | number
) {
return get(`/api/data-management/datasets/${datasetId}/files/${fileId}/preview/status`);
}
// 触发数据集文件预览转换
export function convertDatasetFilePreviewUsingPost(
datasetId: string | number,
fileId: string | number
) {
return post(`/api/data-management/datasets/${datasetId}/files/${fileId}/preview/convert`, {});
}
// 删除数据集文件 // 删除数据集文件
export function deleteDatasetFileUsingDelete( export function deleteDatasetFileUsingDelete(
datasetId: string | number, datasetId: string | number,