feat(data-management): 添加Office文档预览功能

- 集成LibreOffice转换器实现DOC/DOCX转PDF功能
- 新增DatasetFilePreviewService处理预览文件管理
- 新增DatasetFilePreviewAsyncService异步转换任务
- 在文件删除时同步清理预览文件
- 前端实现Office文档预览状态轮询机制
- 添加预览API接口支持状态查询和转换触发
- 优化文件预览界面显示转换进度和错误信息
This commit is contained in:
2026-02-01 22:26:05 +08:00
parent f06d6e5a7e
commit 438acebb89
10 changed files with 833 additions and 179 deletions

View File

@@ -84,10 +84,11 @@ public class DatasetFileApplicationService {
);
private static final String DERIVED_METADATA_KEY = "derived_from_file_id";
private final DatasetFileRepository datasetFileRepository;
private final DatasetRepository datasetRepository;
private final FileService fileService;
private final PdfTextExtractAsyncService pdfTextExtractAsyncService;
private final DatasetFileRepository datasetFileRepository;
private final DatasetRepository datasetRepository;
private final FileService fileService;
private final PdfTextExtractAsyncService pdfTextExtractAsyncService;
private final DatasetFilePreviewService datasetFilePreviewService;
@Value("${datamate.data-management.base-path:/dataset}")
private String datasetBasePath;
@@ -96,15 +97,17 @@ public class DatasetFileApplicationService {
private DuplicateMethod duplicateMethod;
@Autowired
public DatasetFileApplicationService(DatasetFileRepository datasetFileRepository,
DatasetRepository datasetRepository,
FileService fileService,
PdfTextExtractAsyncService pdfTextExtractAsyncService) {
this.datasetFileRepository = datasetFileRepository;
this.datasetRepository = datasetRepository;
this.fileService = fileService;
this.pdfTextExtractAsyncService = pdfTextExtractAsyncService;
}
public DatasetFileApplicationService(DatasetFileRepository datasetFileRepository,
DatasetRepository datasetRepository,
FileService fileService,
PdfTextExtractAsyncService pdfTextExtractAsyncService,
DatasetFilePreviewService datasetFilePreviewService) {
this.datasetFileRepository = datasetFileRepository;
this.datasetRepository = datasetRepository;
this.fileService = fileService;
this.pdfTextExtractAsyncService = pdfTextExtractAsyncService;
this.datasetFilePreviewService = datasetFilePreviewService;
}
/**
* 获取数据集文件列表
@@ -309,18 +312,19 @@ public class DatasetFileApplicationService {
* 删除文件
*/
@Transactional
public void deleteDatasetFile(String datasetId, String fileId) {
DatasetFile file = getDatasetFile(datasetId, fileId);
Dataset dataset = datasetRepository.getById(datasetId);
dataset.setFiles(new ArrayList<>(Collections.singleton(file)));
datasetFileRepository.removeById(fileId);
dataset.removeFile(file);
datasetRepository.updateById(dataset);
// 删除文件时,上传到数据集中的文件会同时删除数据库中的记录和文件系统中的文件,归集过来的文件仅删除数据库中的记录
if (file.getFilePath().startsWith(dataset.getPath())) {
try {
Path filePath = Paths.get(file.getFilePath());
Files.deleteIfExists(filePath);
public void deleteDatasetFile(String datasetId, String fileId) {
DatasetFile file = getDatasetFile(datasetId, fileId);
Dataset dataset = datasetRepository.getById(datasetId);
dataset.setFiles(new ArrayList<>(Collections.singleton(file)));
datasetFileRepository.removeById(fileId);
dataset.removeFile(file);
datasetRepository.updateById(dataset);
datasetFilePreviewService.deletePreviewFileQuietly(datasetId, fileId);
// 删除文件时,上传到数据集中的文件会同时删除数据库中的记录和文件系统中的文件,归集过来的文件仅删除数据库中的记录
if (file.getFilePath().startsWith(dataset.getPath())) {
try {
Path filePath = Paths.get(file.getFilePath());
Files.deleteIfExists(filePath);
} catch (IOException ex) {
throw BusinessException.of(SystemErrorCode.FILE_SYSTEM_ERROR);
}
@@ -686,9 +690,10 @@ public class DatasetFileApplicationService {
})
.collect(Collectors.toList());
for (DatasetFile file : filesToDelete) {
datasetFileRepository.removeById(file.getId());
}
for (DatasetFile file : filesToDelete) {
datasetFileRepository.removeById(file.getId());
datasetFilePreviewService.deletePreviewFileQuietly(datasetId, file.getId());
}
// 删除文件系统中的目录
try {

View File

@@ -0,0 +1,171 @@
package com.datamate.datamanagement.application;
import com.datamate.datamanagement.common.enums.KnowledgeItemPreviewStatus;
import com.datamate.datamanagement.domain.model.dataset.DatasetFile;
import com.datamate.datamanagement.infrastructure.config.DataManagementProperties;
import com.datamate.datamanagement.infrastructure.persistence.repository.DatasetFileRepository;
import com.fasterxml.jackson.databind.ObjectMapper;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.StringUtils;
import org.springframework.scheduling.annotation.Async;
import org.springframework.stereotype.Service;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.time.LocalDateTime;
import java.time.format.DateTimeFormatter;
import java.util.Set;
/**
* 数据集文件预览转换异步任务
*/
@Service
@RequiredArgsConstructor
@Slf4j
public class DatasetFilePreviewAsyncService {
private static final Set<String> OFFICE_EXTENSIONS = Set.of("doc", "docx");
private static final String DATASET_PREVIEW_DIR = "dataset-previews";
private static final String PREVIEW_FILE_SUFFIX = ".pdf";
private static final String PATH_SEPARATOR = "/";
private static final int MAX_ERROR_LENGTH = 500;
private static final DateTimeFormatter PREVIEW_TIME_FORMATTER = DateTimeFormatter.ISO_LOCAL_DATE_TIME;
private final DatasetFileRepository datasetFileRepository;
private final DataManagementProperties dataManagementProperties;
private final ObjectMapper objectMapper = new ObjectMapper();
@Async
public void convertPreviewAsync(String fileId) {
if (StringUtils.isBlank(fileId)) {
return;
}
DatasetFile file = datasetFileRepository.getById(fileId);
if (file == null) {
return;
}
String extension = resolveFileExtension(resolveOriginalName(file));
if (!OFFICE_EXTENSIONS.contains(extension)) {
updatePreviewStatus(file, KnowledgeItemPreviewStatus.FAILED, null, "仅支持 DOC/DOCX 转换");
return;
}
if (StringUtils.isBlank(file.getFilePath())) {
updatePreviewStatus(file, KnowledgeItemPreviewStatus.FAILED, null, "源文件路径为空");
return;
}
Path sourcePath = Paths.get(file.getFilePath()).toAbsolutePath().normalize();
if (!Files.exists(sourcePath) || !Files.isRegularFile(sourcePath)) {
updatePreviewStatus(file, KnowledgeItemPreviewStatus.FAILED, null, "源文件不存在");
return;
}
KnowledgeItemPreviewMetadataHelper.PreviewInfo previewInfo = KnowledgeItemPreviewMetadataHelper
.readPreviewInfo(file.getMetadata(), objectMapper);
String previewRelativePath = StringUtils.defaultIfBlank(
previewInfo.pdfPath(),
resolvePreviewRelativePath(file.getDatasetId(), file.getId())
);
Path targetPath = resolvePreviewStoragePath(previewRelativePath);
try {
ensureParentDirectory(targetPath);
LibreOfficeConverter.convertToPdf(sourcePath, targetPath);
updatePreviewStatus(file, KnowledgeItemPreviewStatus.READY, previewRelativePath, null);
} catch (Exception e) {
log.error("dataset preview convert failed, fileId: {}", file.getId(), e);
updatePreviewStatus(file, KnowledgeItemPreviewStatus.FAILED, previewRelativePath, trimError(e.getMessage()));
}
}
private void updatePreviewStatus(
DatasetFile file,
KnowledgeItemPreviewStatus status,
String previewRelativePath,
String error
) {
if (file == null) {
return;
}
String updatedMetadata = KnowledgeItemPreviewMetadataHelper.applyPreviewInfo(
file.getMetadata(),
objectMapper,
status,
previewRelativePath,
error,
nowText()
);
file.setMetadata(updatedMetadata);
datasetFileRepository.updateById(file);
}
private String resolveOriginalName(DatasetFile file) {
if (file == null) {
return "";
}
if (StringUtils.isNotBlank(file.getFileName())) {
return file.getFileName();
}
if (StringUtils.isNotBlank(file.getFilePath())) {
return Paths.get(file.getFilePath()).getFileName().toString();
}
return "";
}
private String resolveFileExtension(String fileName) {
if (StringUtils.isBlank(fileName)) {
return "";
}
int dotIndex = fileName.lastIndexOf('.');
if (dotIndex <= 0 || dotIndex >= fileName.length() - 1) {
return "";
}
return fileName.substring(dotIndex + 1).toLowerCase();
}
private String resolvePreviewRelativePath(String datasetId, String fileId) {
String relativePath = Paths.get(DATASET_PREVIEW_DIR, datasetId, fileId + PREVIEW_FILE_SUFFIX)
.toString();
return relativePath.replace("\\", PATH_SEPARATOR);
}
private Path resolvePreviewStoragePath(String relativePath) {
String normalizedRelativePath = StringUtils.defaultString(relativePath).replace("/", java.io.File.separator);
Path root = resolveUploadRootPath();
Path target = root.resolve(normalizedRelativePath).toAbsolutePath().normalize();
if (!target.startsWith(root)) {
throw new IllegalArgumentException("invalid preview path");
}
return target;
}
private Path resolveUploadRootPath() {
String uploadDir = dataManagementProperties.getFileStorage().getUploadDir();
return Paths.get(uploadDir).toAbsolutePath().normalize();
}
private void ensureParentDirectory(Path targetPath) {
try {
Path parent = targetPath.getParent();
if (parent != null) {
Files.createDirectories(parent);
}
} catch (Exception e) {
throw new IllegalStateException("创建预览目录失败", e);
}
}
private String trimError(String error) {
if (StringUtils.isBlank(error)) {
return "";
}
if (error.length() <= MAX_ERROR_LENGTH) {
return error;
}
return error.substring(0, MAX_ERROR_LENGTH);
}
private String nowText() {
return LocalDateTime.now().format(PREVIEW_TIME_FORMATTER);
}
}

View File

@@ -0,0 +1,233 @@
package com.datamate.datamanagement.application;
import com.datamate.common.infrastructure.exception.BusinessAssert;
import com.datamate.common.infrastructure.exception.CommonErrorCode;
import com.datamate.datamanagement.common.enums.KnowledgeItemPreviewStatus;
import com.datamate.datamanagement.domain.model.dataset.DatasetFile;
import com.datamate.datamanagement.infrastructure.config.DataManagementProperties;
import com.datamate.datamanagement.infrastructure.persistence.repository.DatasetFileRepository;
import com.datamate.datamanagement.interfaces.dto.DatasetFilePreviewStatusResponse;
import com.fasterxml.jackson.databind.ObjectMapper;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.StringUtils;
import org.springframework.stereotype.Service;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.time.LocalDateTime;
import java.time.format.DateTimeFormatter;
import java.util.Objects;
import java.util.Set;
/**
* 数据集文件预览转换服务
*/
@Service
@RequiredArgsConstructor
@Slf4j
public class DatasetFilePreviewService {
private static final Set<String> OFFICE_EXTENSIONS = Set.of("doc", "docx");
private static final String DATASET_PREVIEW_DIR = "dataset-previews";
private static final String PREVIEW_FILE_SUFFIX = ".pdf";
private static final String PATH_SEPARATOR = "/";
private static final DateTimeFormatter PREVIEW_TIME_FORMATTER = DateTimeFormatter.ISO_LOCAL_DATE_TIME;
private final DatasetFileRepository datasetFileRepository;
private final DataManagementProperties dataManagementProperties;
private final DatasetFilePreviewAsyncService datasetFilePreviewAsyncService;
private final ObjectMapper objectMapper = new ObjectMapper();
public DatasetFilePreviewStatusResponse getPreviewStatus(String datasetId, String fileId) {
DatasetFile file = requireDatasetFile(datasetId, fileId);
assertOfficeDocument(file);
KnowledgeItemPreviewMetadataHelper.PreviewInfo previewInfo = KnowledgeItemPreviewMetadataHelper
.readPreviewInfo(file.getMetadata(), objectMapper);
if (previewInfo.status() == KnowledgeItemPreviewStatus.READY && !previewPdfExists(file, previewInfo)) {
previewInfo = markPreviewFailed(file, previewInfo, "预览文件不存在");
}
return buildResponse(previewInfo);
}
public DatasetFilePreviewStatusResponse ensurePreview(String datasetId, String fileId) {
DatasetFile file = requireDatasetFile(datasetId, fileId);
assertOfficeDocument(file);
KnowledgeItemPreviewMetadataHelper.PreviewInfo previewInfo = KnowledgeItemPreviewMetadataHelper
.readPreviewInfo(file.getMetadata(), objectMapper);
if (previewInfo.status() == KnowledgeItemPreviewStatus.READY && previewPdfExists(file, previewInfo)) {
return buildResponse(previewInfo);
}
if (previewInfo.status() == KnowledgeItemPreviewStatus.PROCESSING) {
return buildResponse(previewInfo);
}
String previewRelativePath = resolvePreviewRelativePath(file.getDatasetId(), file.getId());
String updatedMetadata = KnowledgeItemPreviewMetadataHelper.applyPreviewInfo(
file.getMetadata(),
objectMapper,
KnowledgeItemPreviewStatus.PROCESSING,
previewRelativePath,
null,
nowText()
);
file.setMetadata(updatedMetadata);
datasetFileRepository.updateById(file);
datasetFilePreviewAsyncService.convertPreviewAsync(file.getId());
KnowledgeItemPreviewMetadataHelper.PreviewInfo refreshed = KnowledgeItemPreviewMetadataHelper
.readPreviewInfo(updatedMetadata, objectMapper);
return buildResponse(refreshed);
}
public boolean isOfficeDocument(String fileName) {
String extension = resolveFileExtension(fileName);
return StringUtils.isNotBlank(extension) && OFFICE_EXTENSIONS.contains(extension.toLowerCase());
}
public PreviewFile resolveReadyPreviewFile(String datasetId, DatasetFile file) {
if (file == null) {
return null;
}
KnowledgeItemPreviewMetadataHelper.PreviewInfo previewInfo = KnowledgeItemPreviewMetadataHelper
.readPreviewInfo(file.getMetadata(), objectMapper);
if (previewInfo.status() != KnowledgeItemPreviewStatus.READY) {
return null;
}
String relativePath = StringUtils.defaultIfBlank(previewInfo.pdfPath(), resolvePreviewRelativePath(datasetId, file.getId()));
Path filePath = resolvePreviewStoragePath(relativePath);
if (!Files.exists(filePath) || !Files.isRegularFile(filePath)) {
markPreviewFailed(file, previewInfo, "预览文件不存在");
return null;
}
String previewName = resolvePreviewPdfName(file);
return new PreviewFile(filePath, previewName);
}
public void deletePreviewFileQuietly(String datasetId, String fileId) {
String relativePath = resolvePreviewRelativePath(datasetId, fileId);
Path filePath = resolvePreviewStoragePath(relativePath);
try {
Files.deleteIfExists(filePath);
} catch (Exception e) {
log.warn("delete dataset preview pdf error, fileId: {}", fileId, e);
}
}
private DatasetFilePreviewStatusResponse buildResponse(KnowledgeItemPreviewMetadataHelper.PreviewInfo previewInfo) {
DatasetFilePreviewStatusResponse response = new DatasetFilePreviewStatusResponse();
KnowledgeItemPreviewStatus status = previewInfo.status() == null
? KnowledgeItemPreviewStatus.PENDING
: previewInfo.status();
response.setStatus(status);
response.setPreviewError(previewInfo.error());
response.setUpdatedAt(previewInfo.updatedAt());
return response;
}
private DatasetFile requireDatasetFile(String datasetId, String fileId) {
BusinessAssert.isTrue(StringUtils.isNotBlank(datasetId), CommonErrorCode.PARAM_ERROR);
BusinessAssert.isTrue(StringUtils.isNotBlank(fileId), CommonErrorCode.PARAM_ERROR);
DatasetFile datasetFile = datasetFileRepository.getById(fileId);
BusinessAssert.notNull(datasetFile, CommonErrorCode.PARAM_ERROR);
BusinessAssert.isTrue(Objects.equals(datasetFile.getDatasetId(), datasetId), CommonErrorCode.PARAM_ERROR);
return datasetFile;
}
private void assertOfficeDocument(DatasetFile file) {
BusinessAssert.notNull(file, CommonErrorCode.PARAM_ERROR);
String extension = resolveFileExtension(resolveOriginalName(file));
BusinessAssert.isTrue(OFFICE_EXTENSIONS.contains(extension), CommonErrorCode.PARAM_ERROR);
}
private String resolveOriginalName(DatasetFile file) {
if (file == null) {
return "";
}
if (StringUtils.isNotBlank(file.getFileName())) {
return file.getFileName();
}
if (StringUtils.isNotBlank(file.getFilePath())) {
return Paths.get(file.getFilePath()).getFileName().toString();
}
return "";
}
private String resolveFileExtension(String fileName) {
if (StringUtils.isBlank(fileName)) {
return "";
}
int dotIndex = fileName.lastIndexOf('.');
if (dotIndex <= 0 || dotIndex >= fileName.length() - 1) {
return "";
}
return fileName.substring(dotIndex + 1).toLowerCase();
}
private String resolvePreviewPdfName(DatasetFile file) {
String originalName = resolveOriginalName(file);
if (StringUtils.isBlank(originalName)) {
return "预览.pdf";
}
int dotIndex = originalName.lastIndexOf('.');
if (dotIndex <= 0) {
return originalName + PREVIEW_FILE_SUFFIX;
}
return originalName.substring(0, dotIndex) + PREVIEW_FILE_SUFFIX;
}
private boolean previewPdfExists(DatasetFile file, KnowledgeItemPreviewMetadataHelper.PreviewInfo previewInfo) {
String relativePath = StringUtils.defaultIfBlank(previewInfo.pdfPath(), resolvePreviewRelativePath(file.getDatasetId(), file.getId()));
Path filePath = resolvePreviewStoragePath(relativePath);
return Files.exists(filePath) && Files.isRegularFile(filePath);
}
private KnowledgeItemPreviewMetadataHelper.PreviewInfo markPreviewFailed(
DatasetFile file,
KnowledgeItemPreviewMetadataHelper.PreviewInfo previewInfo,
String error
) {
String relativePath = StringUtils.defaultIfBlank(previewInfo.pdfPath(), resolvePreviewRelativePath(file.getDatasetId(), file.getId()));
String updatedMetadata = KnowledgeItemPreviewMetadataHelper.applyPreviewInfo(
file.getMetadata(),
objectMapper,
KnowledgeItemPreviewStatus.FAILED,
relativePath,
error,
nowText()
);
file.setMetadata(updatedMetadata);
datasetFileRepository.updateById(file);
return KnowledgeItemPreviewMetadataHelper.readPreviewInfo(updatedMetadata, objectMapper);
}
private String resolvePreviewRelativePath(String datasetId, String fileId) {
String relativePath = Paths.get(DATASET_PREVIEW_DIR, datasetId, fileId + PREVIEW_FILE_SUFFIX)
.toString();
return relativePath.replace("\\", PATH_SEPARATOR);
}
Path resolvePreviewStoragePath(String relativePath) {
String normalizedRelativePath = StringUtils.defaultString(relativePath).replace("/", java.io.File.separator);
Path root = resolveUploadRootPath();
Path target = root.resolve(normalizedRelativePath).toAbsolutePath().normalize();
BusinessAssert.isTrue(target.startsWith(root), CommonErrorCode.PARAM_ERROR);
return target;
}
private Path resolveUploadRootPath() {
String uploadDir = dataManagementProperties.getFileStorage().getUploadDir();
BusinessAssert.isTrue(StringUtils.isNotBlank(uploadDir), CommonErrorCode.PARAM_ERROR);
return Paths.get(uploadDir).toAbsolutePath().normalize();
}
private String nowText() {
return LocalDateTime.now().format(PREVIEW_TIME_FORMATTER);
}
public record PreviewFile(Path filePath, String fileName) {
}
}

View File

@@ -12,16 +12,11 @@ import org.springframework.scheduling.annotation.Async;
import org.springframework.stereotype.Service;
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.nio.file.StandardCopyOption;
import java.time.Duration;
import java.time.LocalDateTime;
import java.time.format.DateTimeFormatter;
import java.util.List;
import java.util.Set;
/**
@@ -36,8 +31,6 @@ public class KnowledgeItemPreviewAsyncService {
private static final String PREVIEW_SUB_DIR = "preview";
private static final String PREVIEW_FILE_SUFFIX = ".pdf";
private static final String PATH_SEPARATOR = "/";
private static final String LIBREOFFICE_COMMAND = "soffice";
private static final Duration CONVERT_TIMEOUT = Duration.ofMinutes(5);
private static final int MAX_ERROR_LENGTH = 500;
private static final DateTimeFormatter PREVIEW_TIME_FORMATTER = DateTimeFormatter.ISO_LOCAL_DATE_TIME;
@@ -79,7 +72,7 @@ public class KnowledgeItemPreviewAsyncService {
ensureParentDirectory(targetPath);
try {
convertOfficeToPdfByLibreOffice(sourcePath, targetPath);
LibreOfficeConverter.convertToPdf(sourcePath, targetPath);
updatePreviewStatus(item, KnowledgeItemPreviewStatus.READY, previewRelativePath, null);
} catch (Exception e) {
log.error("preview convert failed, itemId: {}", item.getId(), e);
@@ -87,69 +80,6 @@ public class KnowledgeItemPreviewAsyncService {
}
}
private void convertOfficeToPdfByLibreOffice(Path sourcePath, Path targetPath) throws Exception {
Path outputDir = targetPath.getParent();
ensureParentDirectory(targetPath);
List<String> command = List.of(
LIBREOFFICE_COMMAND,
"--headless",
"--nologo",
"--nolockcheck",
"--nodefault",
"--nofirststartwizard",
"--convert-to",
"pdf",
"--outdir",
outputDir.toString(),
sourcePath.toString()
);
ProcessBuilder processBuilder = new ProcessBuilder(command);
processBuilder.redirectErrorStream(true);
Process process = processBuilder.start();
boolean finished = process.waitFor(CONVERT_TIMEOUT.toMillis(), java.util.concurrent.TimeUnit.MILLISECONDS);
String output = readProcessOutput(process.getInputStream());
if (!finished) {
process.destroyForcibly();
throw new IllegalStateException("LibreOffice 转换超时");
}
if (process.exitValue() != 0) {
throw new IllegalStateException("LibreOffice 转换失败: " + output);
}
Path generated = outputDir.resolve(stripExtension(sourcePath.getFileName().toString()) + PREVIEW_FILE_SUFFIX);
if (!Files.exists(generated)) {
throw new IllegalStateException("LibreOffice 输出文件不存在");
}
if (!generated.equals(targetPath)) {
Files.move(generated, targetPath, StandardCopyOption.REPLACE_EXISTING);
}
}
private String readProcessOutput(InputStream inputStream) throws IOException {
if (inputStream == null) {
return "";
}
byte[] buffer = new byte[1024];
StringBuilder builder = new StringBuilder();
int total = 0;
int read;
while ((read = inputStream.read(buffer)) >= 0) {
if (read == 0) {
continue;
}
int remaining = MAX_ERROR_LENGTH - total;
if (remaining <= 0) {
break;
}
int toAppend = Math.min(remaining, read);
builder.append(new String(buffer, 0, toAppend, StandardCharsets.UTF_8));
total += toAppend;
if (total >= MAX_ERROR_LENGTH) {
break;
}
}
return builder.toString();
}
private void updatePreviewStatus(
KnowledgeItem item,
KnowledgeItemPreviewStatus status,
@@ -195,14 +125,6 @@ public class KnowledgeItemPreviewAsyncService {
return fileName.substring(dotIndex + 1).toLowerCase();
}
private String stripExtension(String fileName) {
if (StringUtils.isBlank(fileName)) {
return "preview";
}
int dotIndex = fileName.lastIndexOf('.');
return dotIndex <= 0 ? fileName : fileName.substring(0, dotIndex);
}
private String resolvePreviewRelativePath(String setId, String itemId) {
String relativePath = Paths.get(KNOWLEDGE_ITEM_UPLOAD_DIR, setId, PREVIEW_SUB_DIR, itemId + PREVIEW_FILE_SUFFIX)
.toString();

View File

@@ -0,0 +1,93 @@
package com.datamate.datamanagement.application;
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.StandardCopyOption;
import java.time.Duration;
import java.util.List;
import java.util.concurrent.TimeUnit;
/**
* LibreOffice 文档转换工具
*/
public final class LibreOfficeConverter {
private static final String LIBREOFFICE_COMMAND = "soffice";
private static final Duration CONVERT_TIMEOUT = Duration.ofMinutes(5);
private static final int MAX_OUTPUT_LENGTH = 500;
private LibreOfficeConverter() {
}
public static void convertToPdf(Path sourcePath, Path targetPath) throws Exception {
Path outputDir = targetPath.getParent();
List<String> command = List.of(
LIBREOFFICE_COMMAND,
"--headless",
"--nologo",
"--nolockcheck",
"--nodefault",
"--nofirststartwizard",
"--convert-to",
"pdf",
"--outdir",
outputDir.toString(),
sourcePath.toString()
);
ProcessBuilder processBuilder = new ProcessBuilder(command);
processBuilder.redirectErrorStream(true);
Process process = processBuilder.start();
boolean finished = process.waitFor(CONVERT_TIMEOUT.toMillis(), TimeUnit.MILLISECONDS);
String output = readProcessOutput(process.getInputStream());
if (!finished) {
process.destroyForcibly();
throw new IllegalStateException("LibreOffice 转换超时");
}
if (process.exitValue() != 0) {
throw new IllegalStateException("LibreOffice 转换失败: " + output);
}
Path generated = outputDir.resolve(stripExtension(sourcePath.getFileName().toString()) + ".pdf");
if (!Files.exists(generated)) {
throw new IllegalStateException("LibreOffice 输出文件不存在");
}
if (!generated.equals(targetPath)) {
Files.move(generated, targetPath, StandardCopyOption.REPLACE_EXISTING);
}
}
private static String readProcessOutput(InputStream inputStream) throws IOException {
if (inputStream == null) {
return "";
}
byte[] buffer = new byte[1024];
StringBuilder builder = new StringBuilder();
int total = 0;
int read;
while ((read = inputStream.read(buffer)) >= 0) {
if (read == 0) {
continue;
}
int remaining = MAX_OUTPUT_LENGTH - total;
if (remaining <= 0) {
break;
}
int toAppend = Math.min(remaining, read);
builder.append(new String(buffer, 0, toAppend, StandardCharsets.UTF_8));
total += toAppend;
if (total >= MAX_OUTPUT_LENGTH) {
break;
}
}
return builder.toString();
}
private static String stripExtension(String fileName) {
if (fileName == null || fileName.isBlank()) {
return "preview";
}
int dotIndex = fileName.lastIndexOf('.');
return dotIndex <= 0 ? fileName : fileName.substring(0, dotIndex);
}
}

View File

@@ -0,0 +1,16 @@
package com.datamate.datamanagement.interfaces.dto;
import com.datamate.datamanagement.common.enums.KnowledgeItemPreviewStatus;
import lombok.Getter;
import lombok.Setter;
/**
* 数据集文件预览状态响应
*/
@Getter
@Setter
public class DatasetFilePreviewStatusResponse {
private KnowledgeItemPreviewStatus status;
private String previewError;
private String updatedAt;
}

View File

@@ -5,20 +5,23 @@ import com.datamate.common.infrastructure.common.Response;
import com.datamate.common.infrastructure.exception.SystemErrorCode;
import com.datamate.common.interfaces.PagedResponse;
import com.datamate.common.interfaces.PagingQuery;
import com.datamate.datamanagement.application.DatasetFileApplicationService;
import com.datamate.datamanagement.application.DatasetFileApplicationService;
import com.datamate.datamanagement.application.DatasetFilePreviewService;
import com.datamate.datamanagement.domain.model.dataset.DatasetFile;
import com.datamate.datamanagement.interfaces.converter.DatasetConverter;
import com.datamate.datamanagement.interfaces.dto.AddFilesRequest;
import com.datamate.datamanagement.interfaces.dto.CopyFilesRequest;
import com.datamate.datamanagement.interfaces.dto.CreateDirectoryRequest;
import com.datamate.datamanagement.interfaces.dto.DatasetFileResponse;
import com.datamate.datamanagement.interfaces.dto.UploadFileRequest;
import com.datamate.datamanagement.interfaces.dto.UploadFilesPreRequest;
import com.datamate.datamanagement.interfaces.dto.AddFilesRequest;
import com.datamate.datamanagement.interfaces.dto.CopyFilesRequest;
import com.datamate.datamanagement.interfaces.dto.CreateDirectoryRequest;
import com.datamate.datamanagement.interfaces.dto.DatasetFilePreviewStatusResponse;
import com.datamate.datamanagement.interfaces.dto.DatasetFileResponse;
import com.datamate.datamanagement.interfaces.dto.UploadFileRequest;
import com.datamate.datamanagement.interfaces.dto.UploadFilesPreRequest;
import jakarta.servlet.http.HttpServletResponse;
import jakarta.validation.Valid;
import lombok.extern.slf4j.Slf4j;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.core.io.Resource;
import lombok.extern.slf4j.Slf4j;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.core.io.Resource;
import org.springframework.core.io.UrlResource;
import org.springframework.http.HttpHeaders;
import org.springframework.http.HttpStatus;
import org.springframework.http.MediaType;
@@ -36,12 +39,15 @@ import java.util.List;
@RequestMapping("/data-management/datasets/{datasetId}/files")
public class DatasetFileController {
private final DatasetFileApplicationService datasetFileApplicationService;
private final DatasetFileApplicationService datasetFileApplicationService;
private final DatasetFilePreviewService datasetFilePreviewService;
@Autowired
public DatasetFileController(DatasetFileApplicationService datasetFileApplicationService) {
this.datasetFileApplicationService = datasetFileApplicationService;
}
public DatasetFileController(DatasetFileApplicationService datasetFileApplicationService,
DatasetFilePreviewService datasetFilePreviewService) {
this.datasetFileApplicationService = datasetFileApplicationService;
this.datasetFilePreviewService = datasetFilePreviewService;
}
@GetMapping
public Response<PagedResponse<DatasetFile>> getDatasetFiles(
@@ -114,15 +120,28 @@ public class DatasetFileController {
}
}
@IgnoreResponseWrap
@GetMapping(value = "/{fileId}/preview", produces = MediaType.ALL_VALUE)
public ResponseEntity<Resource> previewDatasetFileById(@PathVariable("datasetId") String datasetId,
@PathVariable("fileId") String fileId) {
try {
DatasetFile datasetFile = datasetFileApplicationService.getDatasetFile(datasetId, fileId);
Resource resource = datasetFileApplicationService.downloadFile(datasetId, fileId);
MediaType mediaType = MediaTypeFactory.getMediaType(resource)
.orElse(MediaType.APPLICATION_OCTET_STREAM);
@IgnoreResponseWrap
@GetMapping(value = "/{fileId}/preview", produces = MediaType.ALL_VALUE)
public ResponseEntity<Resource> previewDatasetFileById(@PathVariable("datasetId") String datasetId,
@PathVariable("fileId") String fileId) {
try {
DatasetFile datasetFile = datasetFileApplicationService.getDatasetFile(datasetId, fileId);
if (datasetFilePreviewService.isOfficeDocument(datasetFile.getFileName())) {
DatasetFilePreviewService.PreviewFile previewFile = datasetFilePreviewService
.resolveReadyPreviewFile(datasetId, datasetFile);
if (previewFile == null) {
return ResponseEntity.status(HttpStatus.CONFLICT).build();
}
Resource previewResource = new UrlResource(previewFile.filePath().toUri());
return ResponseEntity.ok()
.contentType(MediaType.APPLICATION_PDF)
.header(HttpHeaders.CONTENT_DISPOSITION,
"inline; filename=\"" + previewFile.fileName() + "\"")
.body(previewResource);
}
Resource resource = datasetFileApplicationService.downloadFile(datasetId, fileId);
MediaType mediaType = MediaTypeFactory.getMediaType(resource)
.orElse(MediaType.APPLICATION_OCTET_STREAM);
return ResponseEntity.ok()
.contentType(mediaType)
@@ -133,8 +152,20 @@ public class DatasetFileController {
return ResponseEntity.status(HttpStatus.NOT_FOUND).build();
} catch (Exception e) {
return ResponseEntity.status(HttpStatus.INTERNAL_SERVER_ERROR).build();
}
}
}
}
@GetMapping("/{fileId}/preview/status")
public DatasetFilePreviewStatusResponse getDatasetFilePreviewStatus(@PathVariable("datasetId") String datasetId,
@PathVariable("fileId") String fileId) {
return datasetFilePreviewService.getPreviewStatus(datasetId, fileId);
}
@PostMapping("/{fileId}/preview/convert")
public DatasetFilePreviewStatusResponse convertDatasetFilePreview(@PathVariable("datasetId") String datasetId,
@PathVariable("fileId") String fileId) {
return datasetFilePreviewService.ensurePreview(datasetId, fileId);
}
@IgnoreResponseWrap
@GetMapping(value = "/download", produces = MediaType.APPLICATION_OCTET_STREAM_VALUE)