You've already forked DataMate
feat: Add original file ID to document metadata in RagEtlService
This commit is contained in:
@@ -106,6 +106,7 @@ public class RagEtlService {
|
|||||||
document = new HtmlToTextDocumentTransformer().transform(document);
|
document = new HtmlToTextDocumentTransformer().transform(document);
|
||||||
}
|
}
|
||||||
document.metadata().put("rag_file_id", ragFile.getId());
|
document.metadata().put("rag_file_id", ragFile.getId());
|
||||||
|
document.metadata().put("original_file_id", ragFile.getFileId());
|
||||||
// 使用文档分块器对文档进行分块
|
// 使用文档分块器对文档进行分块
|
||||||
DocumentSplitter splitter = documentSplitter(event.addFilesReq());
|
DocumentSplitter splitter = documentSplitter(event.addFilesReq());
|
||||||
List<TextSegment> split = splitter.split(document);
|
List<TextSegment> split = splitter.split(document);
|
||||||
|
|||||||
Reference in New Issue
Block a user