refactor(data-management): 优化知识项目预览服务的文件转换逻辑

- 移除 docx4j 相关依赖和转换方法
- 统一 office 文件转换为 pdf 的处理方式,全部使用 libreoffice
- 删除单独的 docx 到 pdf 转换方法
- 重命名转换方法为 convertOfficeToPdfByLibreOffice
- 增强路径解析逻辑,添加多种候选路径处理
- 添加路径安全性验证和规范化处理
- 新增 extractRelativePathFromSegment 和 normalizeRelativePathValue 辅助方法
- 改进文件存在性检查和路径构建逻辑
This commit is contained in:
2026-02-01 21:18:14 +08:00
parent 340a0ad364
commit d4a44f3bf5

View File

@@ -8,14 +8,11 @@ import com.fasterxml.jackson.databind.ObjectMapper;
import lombok.RequiredArgsConstructor; import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j; import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import org.docx4j.Docx4J;
import org.docx4j.openpackaging.packages.WordprocessingMLPackage;
import org.springframework.scheduling.annotation.Async; import org.springframework.scheduling.annotation.Async;
import org.springframework.stereotype.Service; import org.springframework.stereotype.Service;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
import java.io.OutputStream;
import java.nio.charset.StandardCharsets; import java.nio.charset.StandardCharsets;
import java.nio.file.Files; import java.nio.file.Files;
import java.nio.file.Path; import java.nio.file.Path;
@@ -82,11 +79,7 @@ public class KnowledgeItemPreviewAsyncService {
ensureParentDirectory(targetPath); ensureParentDirectory(targetPath);
try { try {
if ("docx".equals(extension)) { convertOfficeToPdfByLibreOffice(sourcePath, targetPath);
convertDocxToPdf(sourcePath, targetPath);
} else {
convertDocToPdfByLibreOffice(sourcePath, targetPath);
}
updatePreviewStatus(item, KnowledgeItemPreviewStatus.READY, previewRelativePath, null); updatePreviewStatus(item, KnowledgeItemPreviewStatus.READY, previewRelativePath, null);
} catch (Exception e) { } catch (Exception e) {
log.error("preview convert failed, itemId: {}", item.getId(), e); log.error("preview convert failed, itemId: {}", item.getId(), e);
@@ -94,14 +87,7 @@ public class KnowledgeItemPreviewAsyncService {
} }
} }
private void convertDocxToPdf(Path sourcePath, Path targetPath) throws Exception { private void convertOfficeToPdfByLibreOffice(Path sourcePath, Path targetPath) throws Exception {
WordprocessingMLPackage wordMLPackage = WordprocessingMLPackage.load(sourcePath.toFile());
try (OutputStream outputStream = Files.newOutputStream(targetPath)) {
Docx4J.toPDF(wordMLPackage, outputStream);
}
}
private void convertDocToPdfByLibreOffice(Path sourcePath, Path targetPath) throws Exception {
Path outputDir = targetPath.getParent(); Path outputDir = targetPath.getParent();
ensureParentDirectory(targetPath); ensureParentDirectory(targetPath);
List<String> command = List.of( List<String> command = List.of(
@@ -234,8 +220,70 @@ public class KnowledgeItemPreviewAsyncService {
} }
private Path resolveKnowledgeItemStoragePath(String relativePath) { private Path resolveKnowledgeItemStoragePath(String relativePath) {
String normalizedRelativePath = StringUtils.defaultString(relativePath).replace("/", java.io.File.separator); if (StringUtils.isBlank(relativePath)) {
throw new IllegalArgumentException("invalid knowledge item path");
}
String normalizedInput = relativePath.replace("\\", PATH_SEPARATOR).trim();
Path root = resolveUploadRootPath(); Path root = resolveUploadRootPath();
java.util.LinkedHashSet<Path> candidates = new java.util.LinkedHashSet<>();
Path inputPath = Paths.get(normalizedInput.replace(PATH_SEPARATOR, java.io.File.separator));
if (inputPath.isAbsolute()) {
Path normalizedAbsolute = inputPath.toAbsolutePath().normalize();
if (normalizedAbsolute.startsWith(root)) {
candidates.add(normalizedAbsolute);
}
String segmentRelativePath = extractRelativePathFromSegment(normalizedInput, KNOWLEDGE_ITEM_UPLOAD_DIR);
if (StringUtils.isNotBlank(segmentRelativePath)) {
candidates.add(buildKnowledgeItemStoragePath(root, segmentRelativePath));
}
if (candidates.isEmpty()) {
throw new IllegalArgumentException("invalid knowledge item path");
}
} else {
String normalizedRelative = normalizeRelativePathValue(normalizedInput);
if (StringUtils.isNotBlank(normalizedRelative)) {
candidates.add(buildKnowledgeItemStoragePath(root, normalizedRelative));
}
String segmentRelativePath = extractRelativePathFromSegment(normalizedInput, KNOWLEDGE_ITEM_UPLOAD_DIR);
if (StringUtils.isNotBlank(segmentRelativePath)) {
candidates.add(buildKnowledgeItemStoragePath(root, segmentRelativePath));
}
if (StringUtils.isNotBlank(normalizedRelative)
&& !normalizedRelative.startsWith(KNOWLEDGE_ITEM_UPLOAD_DIR + PATH_SEPARATOR)
&& !normalizedRelative.equals(KNOWLEDGE_ITEM_UPLOAD_DIR)) {
candidates.add(buildKnowledgeItemStoragePath(root, KNOWLEDGE_ITEM_UPLOAD_DIR + PATH_SEPARATOR + normalizedRelative));
}
}
if (root.getFileName() != null && KNOWLEDGE_ITEM_UPLOAD_DIR.equals(root.getFileName().toString())) {
String normalizedRelative = normalizeRelativePathValue(normalizedInput);
if (StringUtils.isNotBlank(normalizedRelative)
&& normalizedRelative.startsWith(KNOWLEDGE_ITEM_UPLOAD_DIR + PATH_SEPARATOR)) {
String withoutPrefix = normalizedRelative.substring(KNOWLEDGE_ITEM_UPLOAD_DIR.length() + PATH_SEPARATOR.length());
if (StringUtils.isNotBlank(withoutPrefix)) {
candidates.add(buildKnowledgeItemStoragePath(root, withoutPrefix));
}
}
}
Path fallback = null;
for (Path candidate : candidates) {
if (fallback == null) {
fallback = candidate;
}
if (Files.exists(candidate) && Files.isRegularFile(candidate)) {
return candidate;
}
}
if (fallback == null) {
throw new IllegalArgumentException("invalid knowledge item path");
}
return fallback;
}
private Path buildKnowledgeItemStoragePath(Path root, String relativePath) {
String normalizedRelativePath = StringUtils.defaultString(relativePath).replace(PATH_SEPARATOR, java.io.File.separator);
Path target = root.resolve(normalizedRelativePath).toAbsolutePath().normalize(); Path target = root.resolve(normalizedRelativePath).toAbsolutePath().normalize();
if (!target.startsWith(root)) { if (!target.startsWith(root)) {
throw new IllegalArgumentException("invalid knowledge item path"); throw new IllegalArgumentException("invalid knowledge item path");
@@ -243,6 +291,36 @@ public class KnowledgeItemPreviewAsyncService {
return target; return target;
} }
private String extractRelativePathFromSegment(String rawPath, String segment) {
if (StringUtils.isBlank(rawPath) || StringUtils.isBlank(segment)) {
return null;
}
String normalized = rawPath.replace("\\", PATH_SEPARATOR).trim();
while (normalized.startsWith(PATH_SEPARATOR)) {
normalized = normalized.substring(1);
}
String segmentPrefix = segment + PATH_SEPARATOR;
int index = normalized.indexOf(segmentPrefix);
if (index < 0) {
return segment.equals(normalized) ? segment : null;
}
return normalizeRelativePathValue(normalized.substring(index));
}
private String normalizeRelativePathValue(String relativePath) {
if (StringUtils.isBlank(relativePath)) {
return "";
}
String normalized = relativePath.replace("\\", PATH_SEPARATOR).trim();
while (normalized.startsWith(PATH_SEPARATOR)) {
normalized = normalized.substring(1);
}
while (normalized.endsWith(PATH_SEPARATOR)) {
normalized = normalized.substring(0, normalized.length() - 1);
}
return normalized;
}
private Path resolveUploadRootPath() { private Path resolveUploadRootPath() {
String uploadDir = dataManagementProperties.getFileStorage().getUploadDir(); String uploadDir = dataManagementProperties.getFileStorage().getUploadDir();
return Paths.get(uploadDir).toAbsolutePath().normalize(); return Paths.get(uploadDir).toAbsolutePath().normalize();