You've already forked DataMate
@@ -53,18 +53,17 @@ TAVILY_API_KEY=tvly-xxx
|
||||
# MOI_RETRIEVAL_SIZE=10
|
||||
# MOI_LIST_LIMIT=10
|
||||
|
||||
|
||||
# RAG_PROVIDER: milvus (using free milvus instance on zilliz cloud: https://docs.zilliz.com/docs/quick-start )
|
||||
# RAG_PROVIDER=milvus
|
||||
# MILVUS_URI=<endpoint_of_self_hosted_milvus_or_zilliz_cloud>
|
||||
# MILVUS_USER=<username_of_self_hosted_milvus_or_zilliz_cloud>
|
||||
# MILVUS_PASSWORD=<password_of_self_hosted_milvus_or_zilliz_cloud>
|
||||
MILVUS_URI=http://milvus-standalone:19530
|
||||
MILVUS_USER=root
|
||||
MILVUS_PASSWORD=Milvus
|
||||
MILVUS_AUTO_LOAD_EXAMPLES=false
|
||||
# MILVUS_COLLECTION=documents
|
||||
# MILVUS_EMBEDDING_PROVIDER=openai # support openai,dashscope
|
||||
# MILVUS_EMBEDDING_BASE_URL=
|
||||
# MILVUS_EMBEDDING_MODEL=
|
||||
# MILVUS_EMBEDDING_API_KEY=
|
||||
# MILVUS_AUTO_LOAD_EXAMPLES=true
|
||||
|
||||
# RAG_PROVIDER: milvus (using milvus lite on Mac or Linux)
|
||||
# RAG_PROVIDER=milvus
|
||||
|
||||
@@ -6,7 +6,6 @@ Description: MinerU PDF文本抽取
|
||||
Create: 2025/10/29 17:24
|
||||
"""
|
||||
import json
|
||||
import os
|
||||
import time
|
||||
from loguru import logger
|
||||
from typing import Dict, Any
|
||||
@@ -20,7 +19,7 @@ class MineruFormatter(Mapper):
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super(MineruFormatter, self).__init__(*args, **kwargs)
|
||||
self.base_url = os.getenv("EXTERNAL_PDF_BASE_URL", "http://datamate-mineru:9001")
|
||||
self.base_url = "http://datamate-mineru:9001"
|
||||
self.pdf_extract_url = f"{self.base_url}/api/pdf-extract"
|
||||
|
||||
def execute(self, sample: Dict[str, Any]) -> Dict[str, Any]:
|
||||
|
||||
Reference in New Issue
Block a user