diff --git a/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/application/KnowledgeItemApplicationService.java b/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/application/KnowledgeItemApplicationService.java index f87747d..3e6aa15 100644 --- a/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/application/KnowledgeItemApplicationService.java +++ b/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/application/KnowledgeItemApplicationService.java @@ -27,6 +27,7 @@ import com.datamate.datamanagement.interfaces.dto.CreateKnowledgeItemRequest; import com.datamate.datamanagement.interfaces.dto.ImportKnowledgeItemsRequest; import com.datamate.datamanagement.interfaces.dto.KnowledgeItemPagingQuery; import com.datamate.datamanagement.interfaces.dto.KnowledgeItemResponse; +import com.datamate.datamanagement.interfaces.dto.ReplaceKnowledgeItemFileRequest; import com.datamate.datamanagement.interfaces.dto.UpdateKnowledgeItemRequest; import com.datamate.datamanagement.interfaces.dto.UploadKnowledgeItemsRequest; import jakarta.servlet.http.HttpServletResponse; @@ -366,6 +367,118 @@ public class KnowledgeItemApplicationService { } } + @Transactional(readOnly = true) + public void previewKnowledgeItemFile(String setId, String itemId, HttpServletResponse response) { + BusinessAssert.notNull(response, CommonErrorCode.PARAM_ERROR); + KnowledgeItem knowledgeItem = knowledgeItemRepository.getById(itemId); + BusinessAssert.notNull(knowledgeItem, DataManagementErrorCode.KNOWLEDGE_ITEM_NOT_FOUND); + BusinessAssert.isTrue(Objects.equals(knowledgeItem.getSetId(), setId), CommonErrorCode.PARAM_ERROR); + BusinessAssert.isTrue(knowledgeItem.getContentType() == KnowledgeContentType.FILE + || knowledgeItem.getSourceType() == KnowledgeSourceType.FILE_UPLOAD, + CommonErrorCode.PARAM_ERROR); + + String relativePath = knowledgeItem.getContent(); + BusinessAssert.isTrue(StringUtils.isNotBlank(relativePath), CommonErrorCode.PARAM_ERROR); + Path filePath = resolveKnowledgeItemStoragePath(relativePath); + BusinessAssert.isTrue(Files.exists(filePath) && Files.isRegularFile(filePath), CommonErrorCode.PARAM_ERROR); + + String previewName = StringUtils.isNotBlank(knowledgeItem.getSourceFileId()) + ? knowledgeItem.getSourceFileId() + : filePath.getFileName().toString(); + + String contentType = null; + try { + contentType = Files.probeContentType(filePath); + } catch (IOException e) { + log.warn("probe knowledge item file content type failed, itemId: {}", itemId, e); + } + if (StringUtils.isBlank(contentType)) { + contentType = "application/octet-stream"; + } + + response.setContentType(contentType); + response.setCharacterEncoding(StandardCharsets.UTF_8.name()); + response.setHeader(HttpHeaders.CONTENT_DISPOSITION, + "inline; filename=\"" + URLEncoder.encode(previewName, StandardCharsets.UTF_8) + "\""); + + try (InputStream inputStream = Files.newInputStream(filePath)) { + inputStream.transferTo(response.getOutputStream()); + response.flushBuffer(); + } catch (IOException e) { + log.error("preview knowledge item file error, itemId: {}", itemId, e); + throw BusinessException.of(SystemErrorCode.FILE_SYSTEM_ERROR); + } + } + + public KnowledgeItem replaceKnowledgeItemFile(String setId, String itemId, ReplaceKnowledgeItemFileRequest request) { + KnowledgeSet knowledgeSet = requireKnowledgeSet(setId); + KnowledgeItem knowledgeItem = knowledgeItemRepository.getById(itemId); + BusinessAssert.notNull(knowledgeItem, DataManagementErrorCode.KNOWLEDGE_ITEM_NOT_FOUND); + BusinessAssert.isTrue(Objects.equals(knowledgeItem.getSetId(), setId), CommonErrorCode.PARAM_ERROR); + BusinessAssert.isTrue(!isReadOnlyStatus(knowledgeItem.getStatus()), + DataManagementErrorCode.KNOWLEDGE_ITEM_STATUS_ERROR); + BusinessAssert.isTrue(!isReadOnlyStatus(knowledgeSet.getStatus()), + DataManagementErrorCode.KNOWLEDGE_SET_STATUS_ERROR); + + MultipartFile file = request == null ? null : request.getFile(); + BusinessAssert.notNull(file, CommonErrorCode.PARAM_ERROR); + BusinessAssert.isTrue(!file.isEmpty(), CommonErrorCode.PARAM_ERROR); + BusinessAssert.isTrue(knowledgeItem.getContentType() == KnowledgeContentType.FILE + || knowledgeItem.getSourceType() == KnowledgeSourceType.FILE_UPLOAD, + CommonErrorCode.PARAM_ERROR); + + String oldRelativePath = knowledgeItem.getContent(); + BusinessAssert.isTrue(StringUtils.isNotBlank(oldRelativePath), CommonErrorCode.PARAM_ERROR); + Path oldFilePath = resolveKnowledgeItemStoragePath(oldRelativePath); + BusinessAssert.isTrue(Files.exists(oldFilePath) && Files.isRegularFile(oldFilePath), CommonErrorCode.PARAM_ERROR); + + Path uploadRoot = resolveUploadRootPath(); + Path setDir = uploadRoot.resolve(KNOWLEDGE_ITEM_UPLOAD_DIR).resolve(setId).normalize(); + BusinessAssert.isTrue(setDir.startsWith(uploadRoot), CommonErrorCode.PARAM_ERROR); + createDirectories(setDir); + + String originalName = resolveOriginalFileName(file); + String safeOriginalName = sanitizeFileName(originalName); + if (StringUtils.isBlank(safeOriginalName)) { + safeOriginalName = "file"; + } + + String extension = getFileExtension(safeOriginalName); + String storedName = UUID.randomUUID().toString() + + (StringUtils.isBlank(extension) ? "" : "." + extension); + Path targetPath = setDir.resolve(storedName).normalize(); + BusinessAssert.isTrue(targetPath.startsWith(setDir), CommonErrorCode.PARAM_ERROR); + + saveMultipartFile(file, targetPath); + + String title = stripExtension(safeOriginalName); + if (StringUtils.isBlank(title)) { + title = "未命名文件"; + } + title = trimToLength(title, MAX_TITLE_LENGTH); + + String sourceFileId = trimToLength(safeOriginalName, MAX_TITLE_LENGTH); + String newRelativePath = buildRelativeFilePath(setId, storedName); + + try { + knowledgeItem.setTitle(title); + knowledgeItem.setContent(newRelativePath); + knowledgeItem.setContentType(KnowledgeContentType.FILE); + knowledgeItem.setSourceType(KnowledgeSourceType.FILE_UPLOAD); + knowledgeItem.setSourceFileId(sourceFileId); + knowledgeItemRepository.updateById(knowledgeItem); + deleteFile(oldFilePath); + } catch (Exception e) { + deleteFileQuietly(targetPath); + if (e instanceof BusinessException) { + throw (BusinessException) e; + } + throw BusinessException.of(SystemErrorCode.FILE_SYSTEM_ERROR); + } + + return knowledgeItem; + } + private byte[] resolveExportContent(KnowledgeItem item) { if (item.getContentType() == KnowledgeContentType.FILE) { String relativePath = item.getContent(); @@ -441,6 +554,23 @@ public class KnowledgeItemApplicationService { } } + private void deleteFile(Path filePath) { + try { + Files.deleteIfExists(filePath); + } catch (IOException e) { + log.error("delete knowledge item file error, path: {}", filePath, e); + throw BusinessException.of(SystemErrorCode.FILE_SYSTEM_ERROR); + } + } + + private void deleteFileQuietly(Path filePath) { + try { + Files.deleteIfExists(filePath); + } catch (IOException e) { + log.warn("delete knowledge item file quietly error, path: {}", filePath, e); + } + } + private String resolveOriginalFileName(MultipartFile file) { String originalName = file.getOriginalFilename(); if (StringUtils.isBlank(originalName)) { diff --git a/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/dto/ReplaceKnowledgeItemFileRequest.java b/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/dto/ReplaceKnowledgeItemFileRequest.java new file mode 100644 index 0000000..c48bb24 --- /dev/null +++ b/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/dto/ReplaceKnowledgeItemFileRequest.java @@ -0,0 +1,19 @@ +package com.datamate.datamanagement.interfaces.dto; + +import jakarta.validation.constraints.NotNull; +import lombok.Getter; +import lombok.Setter; +import org.springframework.web.multipart.MultipartFile; + +/** + * 替换知识条目文件请求DTO + */ +@Getter +@Setter +public class ReplaceKnowledgeItemFileRequest { + /** + * 新文件 + */ + @NotNull(message = "文件不能为空") + private MultipartFile file; +} diff --git a/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/rest/KnowledgeItemController.java b/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/rest/KnowledgeItemController.java index 3bee452..dc3e497 100644 --- a/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/rest/KnowledgeItemController.java +++ b/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/rest/KnowledgeItemController.java @@ -9,6 +9,7 @@ import com.datamate.datamanagement.interfaces.dto.CreateKnowledgeItemRequest; import com.datamate.datamanagement.interfaces.dto.ImportKnowledgeItemsRequest; import com.datamate.datamanagement.interfaces.dto.KnowledgeItemPagingQuery; import com.datamate.datamanagement.interfaces.dto.KnowledgeItemResponse; +import com.datamate.datamanagement.interfaces.dto.ReplaceKnowledgeItemFileRequest; import com.datamate.datamanagement.interfaces.dto.UpdateKnowledgeItemRequest; import com.datamate.datamanagement.interfaces.dto.UploadKnowledgeItemsRequest; import jakarta.servlet.http.HttpServletResponse; @@ -71,6 +72,14 @@ public class KnowledgeItemController { knowledgeItemApplicationService.downloadKnowledgeItemFile(setId, itemId, response); } + @IgnoreResponseWrap + @GetMapping("/{itemId}/preview") + public void previewKnowledgeItemFile(@PathVariable("setId") String setId, + @PathVariable("itemId") String itemId, + HttpServletResponse response) { + knowledgeItemApplicationService.previewKnowledgeItemFile(setId, itemId, response); + } + @GetMapping("/{itemId}") public KnowledgeItemResponse getKnowledgeItemById(@PathVariable("setId") String setId, @PathVariable("itemId") String itemId) { @@ -86,6 +95,14 @@ public class KnowledgeItemController { return KnowledgeConverter.INSTANCE.convertToResponse(knowledgeItem); } + @PutMapping(value = "/{itemId}/file", consumes = MediaType.MULTIPART_FORM_DATA_VALUE) + public KnowledgeItemResponse replaceKnowledgeItemFile(@PathVariable("setId") String setId, + @PathVariable("itemId") String itemId, + @Valid ReplaceKnowledgeItemFileRequest request) { + KnowledgeItem knowledgeItem = knowledgeItemApplicationService.replaceKnowledgeItemFile(setId, itemId, request); + return KnowledgeConverter.INSTANCE.convertToResponse(knowledgeItem); + } + @DeleteMapping("/{itemId}") public void deleteKnowledgeItem(@PathVariable("setId") String setId, @PathVariable("itemId") String itemId) { diff --git a/frontend/src/pages/DataManagement/Detail/useFilesOperation.ts b/frontend/src/pages/DataManagement/Detail/useFilesOperation.ts index f3c95cb..e44d185 100644 --- a/frontend/src/pages/DataManagement/Detail/useFilesOperation.ts +++ b/frontend/src/pages/DataManagement/Detail/useFilesOperation.ts @@ -4,6 +4,7 @@ import type { } from "@/pages/DataManagement/dataset.model"; import { App } from "antd"; import { useState } from "react"; +import { PREVIEW_TEXT_MAX_LENGTH, resolvePreviewFileType, truncatePreviewText } from "@/utils/filePreview"; import { deleteDatasetFileUsingDelete, downloadFileByIdUsingGet, @@ -15,11 +16,6 @@ import { } from "../dataset.api"; import { useParams } from "react-router"; -const MAX_PREVIEW_LENGTH = 50000; -const TEXT_FILE_EXTENSIONS = [".json", ".jsonl", ".txt", ".csv", ".tsv", ".xml", ".md", ".yaml", ".yml"]; -const IMAGE_FILE_EXTENSIONS = [".jpg", ".jpeg", ".png", ".gif", ".bmp", ".webp", ".svg"]; -const VIDEO_FILE_EXTENSIONS = [".mp4", ".webm", ".ogg", ".mov", ".avi"]; -const AUDIO_FILE_EXTENSIONS = [".mp3", ".wav", ".ogg", ".aac", ".flac", ".m4a"]; export function useFilesOperation(dataset: Dataset) { const { message } = App.useApp(); @@ -99,23 +95,6 @@ export function useFilesOperation(dataset: Dataset) { setSelectedFiles([]); // 清空选中状态 }; - const resolvePreviewFileType = (fileName?: string) => { - const lowerName = (fileName || "").toLowerCase(); - if (TEXT_FILE_EXTENSIONS.some((ext) => lowerName.endsWith(ext))) { - return "text"; - } - if (IMAGE_FILE_EXTENSIONS.some((ext) => lowerName.endsWith(ext))) { - return "image"; - } - if (VIDEO_FILE_EXTENSIONS.some((ext) => lowerName.endsWith(ext))) { - return "video"; - } - if (AUDIO_FILE_EXTENSIONS.some((ext) => lowerName.endsWith(ext))) { - return "audio"; - } - return null; - }; - const handlePreviewFile = async (file: DatasetFile) => { const datasetId = dataset?.id || id; if (!datasetId) { @@ -146,13 +125,7 @@ export function useFilesOperation(dataset: Dataset) { throw new Error("下载失败"); } const text = await response.text(); - if (text.length > MAX_PREVIEW_LENGTH) { - setPreviewContent( - `${text.slice(0, MAX_PREVIEW_LENGTH)}\n\n... (内容过长,仅显示前 ${MAX_PREVIEW_LENGTH} 字符)` - ); - } else { - setPreviewContent(text); - } + setPreviewContent(truncatePreviewText(text, PREVIEW_TEXT_MAX_LENGTH)); setPreviewVisible(true); } catch (error) { console.error("Preview file content error:", error); diff --git a/frontend/src/pages/KnowledgeManagement/Detail/KnowledgeSetDetail.tsx b/frontend/src/pages/KnowledgeManagement/Detail/KnowledgeSetDetail.tsx index fe5222b..56b8f51 100644 --- a/frontend/src/pages/KnowledgeManagement/Detail/KnowledgeSetDetail.tsx +++ b/frontend/src/pages/KnowledgeManagement/Detail/KnowledgeSetDetail.tsx @@ -42,8 +42,16 @@ import CreateKnowledgeSet from "../components/CreateKnowledgeSet"; import KnowledgeItemEditor from "../components/KnowledgeItemEditor"; import ImportKnowledgeItemsDialog from "../components/ImportKnowledgeItemsDialog"; import { formatDate } from "@/utils/unit"; +import { PREVIEW_TEXT_MAX_LENGTH, resolvePreviewFileType, truncatePreviewText } from "@/utils/filePreview"; -const MAX_READ_LENGTH = 50000; +const PREVIEW_MAX_HEIGHT = 500; +const PREVIEW_MODAL_WIDTH = { + text: 800, + media: 700, +}; +const PREVIEW_TEXT_FONT_SIZE = 12; +const PREVIEW_TEXT_PADDING = 12; +const PREVIEW_AUDIO_PADDING = 40; const KnowledgeSetDetail = () => { const navigate = useNavigate(); @@ -57,6 +65,12 @@ const KnowledgeSetDetail = () => { const [readModalOpen, setReadModalOpen] = useState(false); const [readContent, setReadContent] = useState(""); const [readTitle, setReadTitle] = useState(""); + const [previewVisible, setPreviewVisible] = useState(false); + const [previewContent, setPreviewContent] = useState(""); + const [previewFileName, setPreviewFileName] = useState(""); + const [previewFileType, setPreviewFileType] = useState<"text" | "image" | "video" | "audio">("text"); + const [previewMediaUrl, setPreviewMediaUrl] = useState(""); + const [previewLoadingItemId, setPreviewLoadingItemId] = useState(null); const fetchKnowledgeSet = useCallback(async () => { if (!id) return; @@ -123,6 +137,66 @@ const KnowledgeSetDetail = () => { ); }; + const resolvePreviewFileName = (record: KnowledgeItemView) => { + if (record.sourceFileId) { + return record.sourceFileId; + } + if (record.content) { + const segments = record.content.split("/"); + const lastSegment = segments[segments.length - 1]; + if (lastSegment) { + return lastSegment; + } + } + return record.title || "文件"; + }; + + const handlePreviewItemFile = async (record: KnowledgeItemView) => { + if (!id) return; + const fileName = resolvePreviewFileName(record); + const fileType = resolvePreviewFileType(fileName); + if (!fileType) { + message.warning("不支持预览该文件类型"); + return; + } + + const previewUrl = `/api/data-management/knowledge-sets/${id}/items/${record.id}/preview`; + setPreviewFileName(fileName); + setPreviewFileType(fileType); + setPreviewContent(""); + setPreviewMediaUrl(""); + + if (fileType === "text") { + setPreviewLoadingItemId(record.id); + try { + const response = await fetch(previewUrl); + if (!response.ok) { + throw new Error("下载失败"); + } + const text = await response.text(); + setPreviewContent(truncatePreviewText(text, PREVIEW_TEXT_MAX_LENGTH)); + setPreviewVisible(true); + } catch (error) { + console.error("预览知识条目文件失败", error); + message.error("预览失败,请稍后重试"); + } finally { + setPreviewLoadingItemId(null); + } + return; + } + + setPreviewMediaUrl(previewUrl); + setPreviewVisible(true); + }; + + const closePreview = () => { + setPreviewVisible(false); + setPreviewContent(""); + setPreviewMediaUrl(""); + setPreviewFileName(""); + setPreviewFileType("text"); + }; + const handleReadItem = async (record: KnowledgeItemView) => { if ( record.contentType === KnowledgeContentType.FILE || @@ -136,13 +210,7 @@ const KnowledgeSetDetail = () => { if (!record.sourceDatasetId || !record.sourceFileId) { const content = record.content || ""; - if (content.length > MAX_READ_LENGTH) { - setReadContent( - `${content.slice(0, MAX_READ_LENGTH)}\n\n... (内容过长,仅显示前 ${MAX_READ_LENGTH} 字符)` - ); - } else { - setReadContent(content); - } + setReadContent(truncatePreviewText(content, PREVIEW_TEXT_MAX_LENGTH)); setReadModalOpen(true); setReadItemId(null); return; @@ -156,13 +224,7 @@ const KnowledgeSetDetail = () => { throw new Error("下载失败"); } const text = await response.text(); - if (text.length > MAX_READ_LENGTH) { - setReadContent( - `${text.slice(0, MAX_READ_LENGTH)}\n\n... (内容过长,仅显示前 ${MAX_READ_LENGTH} 字符)` - ); - } else { - setReadContent(text); - } + setReadContent(truncatePreviewText(text, PREVIEW_TEXT_MAX_LENGTH)); setReadModalOpen(true); } catch (error) { console.error("读取知识条目失败", error); @@ -273,6 +335,18 @@ const KnowledgeSetDetail = () => { /> )} + {(record.contentType === KnowledgeContentType.FILE || + record.sourceType === KnowledgeSourceType.FILE_UPLOAD) && ( + + , + ]} + width={previewFileType === "text" ? PREVIEW_MODAL_WIDTH.text : PREVIEW_MODAL_WIDTH.media} + > + {previewFileType === "text" && ( +
+            {previewContent}
+          
+ )} + {previewFileType === "image" && ( +
+ {previewFileName} +
+ )} + {previewFileType === "video" && ( +
+ +
+ )} + {previewFileType === "audio" && ( +
+ +
+ )} + + ([]); const [fileList, setFileList] = useState([]); + const [replaceFileList, setReplaceFileList] = useState([]); + const [titleBeforeReplace, setTitleBeforeReplace] = useState(null); const isMultiFile = fileList.length > 1; const isFileItem = data?.contentType === KnowledgeContentType.FILE || @@ -89,16 +92,21 @@ export default function KnowledgeItemEditor({ tags: data.tags?.map((tag) => tag.name) || [], metadata: data.metadata, }); + setTitleBeforeReplace(null); } else { form.resetFields(); form.setFieldsValue({ status: KnowledgeStatusType.DRAFT, tags: [], }); + setTitleBeforeReplace(null); } setFileList([]); + setReplaceFileList([]); } else { setFileList([]); + setReplaceFileList([]); + setTitleBeforeReplace(null); } }, [open, data, form]); @@ -138,6 +146,32 @@ export default function KnowledgeItemEditor({ return true; }; + const handleReplaceFileBeforeUpload = (file: File) => { + if (!titleBeforeReplace) { + setTitleBeforeReplace(form.getFieldValue("title") || null); + } + setReplaceFileList([ + { + uid: `${Date.now()}-${file.name}`, + name: file.name, + status: "done", + originFileObj: file, + }, + ]); + form.setFieldsValue({ title: stripFileExtension(file.name) }); + message.success("已选择替换文件,提交后生效"); + return false; + }; + + const handleReplaceFileRemove = (removedFile: UploadFile) => { + setReplaceFileList((prev) => prev.filter((file) => file.uid !== removedFile.uid)); + if (titleBeforeReplace !== null) { + form.setFieldsValue({ title: titleBeforeReplace || undefined }); + setTitleBeforeReplace(null); + } + return true; + }; + const handleDownloadFile = async () => { if (!data?.id) { return; @@ -166,13 +200,26 @@ export default function KnowledgeItemEditor({ } if (data?.id) { - const payload = { + const payload: Record = { ...values, validFrom, validTo, tags: values.tags || [], }; + if (replaceFileList.length > 0) { + delete payload.title; + } await updateKnowledgeItemByIdUsingPut(setId, data.id, payload); + if (replaceFileList.length > 0) { + const formData = new FormData(); + const replaceFile = replaceFileList[0]?.originFileObj as File | undefined; + if (!replaceFile) { + message.warning("请先选择要替换的文件"); + return; + } + formData.append("file", replaceFile); + await replaceKnowledgeItemFileUsingPut(setId, data.id, formData); + } message.success("知识条目更新成功"); } else { if (fileList.length === 0) { @@ -224,6 +271,8 @@ export default function KnowledgeItemEditor({ form.resetFields(); setFileList([]); + setReplaceFileList([]); + setTitleBeforeReplace(null); onSuccess(); } catch { message.error("操作失败,请重试"); @@ -289,6 +338,20 @@ export default function KnowledgeItemEditor({ )} + {data?.id && isFileItem && !readOnly && ( + + + + +
仅支持单文件,替换后标题自动更新
+
+ )}
diff --git a/frontend/src/pages/KnowledgeManagement/knowledge-management.api.ts b/frontend/src/pages/KnowledgeManagement/knowledge-management.api.ts index 9c68811..110f5ee 100644 --- a/frontend/src/pages/KnowledgeManagement/knowledge-management.api.ts +++ b/frontend/src/pages/KnowledgeManagement/knowledge-management.api.ts @@ -50,6 +50,11 @@ export function updateKnowledgeItemByIdUsingPut(setId: string, itemId: string, d return put(`/api/data-management/knowledge-sets/${setId}/items/${itemId}`, data); } +// 替换知识条目文件 +export function replaceKnowledgeItemFileUsingPut(setId: string, itemId: string, data: FormData) { + return put(`/api/data-management/knowledge-sets/${setId}/items/${itemId}/file`, data); +} + // 删除知识条目 export function deleteKnowledgeItemByIdUsingDelete(setId: string, itemId: string) { return del(`/api/data-management/knowledge-sets/${setId}/items/${itemId}`); diff --git a/frontend/src/utils/filePreview.ts b/frontend/src/utils/filePreview.ts new file mode 100644 index 0000000..dd4f9d6 --- /dev/null +++ b/frontend/src/utils/filePreview.ts @@ -0,0 +1,50 @@ +export const PREVIEW_TEXT_MAX_LENGTH = 50000; + +const TEXT_FILE_EXTENSIONS = [ + ".json", + ".jsonl", + ".txt", + ".csv", + ".tsv", + ".xml", + ".md", + ".yaml", + ".yml", +]; +const IMAGE_FILE_EXTENSIONS = [ + ".jpg", + ".jpeg", + ".png", + ".gif", + ".bmp", + ".webp", + ".svg", +]; +const VIDEO_FILE_EXTENSIONS = [".mp4", ".webm", ".ogg", ".mov", ".avi"]; +const AUDIO_FILE_EXTENSIONS = [".mp3", ".wav", ".ogg", ".aac", ".flac", ".m4a"]; + +export type PreviewFileType = "text" | "image" | "video" | "audio"; + +export const resolvePreviewFileType = (fileName?: string): PreviewFileType | null => { + const lowerName = (fileName || "").toLowerCase(); + if (TEXT_FILE_EXTENSIONS.some((ext) => lowerName.endsWith(ext))) { + return "text"; + } + if (IMAGE_FILE_EXTENSIONS.some((ext) => lowerName.endsWith(ext))) { + return "image"; + } + if (VIDEO_FILE_EXTENSIONS.some((ext) => lowerName.endsWith(ext))) { + return "video"; + } + if (AUDIO_FILE_EXTENSIONS.some((ext) => lowerName.endsWith(ext))) { + return "audio"; + } + return null; +}; + +export const truncatePreviewText = (text: string, maxLength = PREVIEW_TEXT_MAX_LENGTH) => { + if (text.length > maxLength) { + return `${text.slice(0, maxLength)}\n\n... (内容过长,仅显示前 ${maxLength} 字符)`; + } + return text; +}; diff --git a/frontend/src/utils/request.ts b/frontend/src/utils/request.ts index 9470a05..760baf8 100644 --- a/frontend/src/utils/request.ts +++ b/frontend/src/utils/request.ts @@ -335,7 +335,7 @@ class Request { * @param {object} options - 额外的fetch选项,包括showLoading, onUploadProgress, onDownloadProgress */ async put(url, data = null, options = {}) { - const config = { + let config = { method: "PUT", headers: { ...this.defaultHeaders, @@ -345,6 +345,18 @@ class Request { ...options, }; + const isFormData = data instanceof FormData; + if (isFormData) { + config = { + method: "PUT", + headers: { + ...options.headers, + }, + body: data, + ...options, + }; + } + return this.request(this.baseURL + url, config); } diff --git a/runtime/datamate-python/app/module/dataset/schema/pdf_extract.py b/runtime/datamate-python/app/module/dataset/schema/pdf_extract.py new file mode 100644 index 0000000..2ab5767 --- /dev/null +++ b/runtime/datamate-python/app/module/dataset/schema/pdf_extract.py @@ -0,0 +1,21 @@ +from pydantic import BaseModel, Field + + +class PdfTextExtractRequest(BaseModel): + dataset_id: str = Field(..., alias="datasetId", description="Dataset ID") + file_id: str = Field(..., alias="fileId", description="PDF file ID") + + class Config: + populate_by_name = True + + +class PdfTextExtractResponse(BaseModel): + dataset_id: str = Field(..., alias="datasetId", description="Dataset ID") + source_file_id: str = Field(..., alias="sourceFileId", description="Source PDF file ID") + text_file_id: str = Field(..., alias="textFileId", description="Generated text file ID") + text_file_name: str = Field(..., alias="textFileName", description="Generated text file name") + text_file_path: str = Field(..., alias="textFilePath", description="Generated text file path") + text_file_size: int = Field(..., alias="textFileSize", description="Generated text file size") + + class Config: + populate_by_name = True