Develop hsc (#58)

feature: 优化镜像构建/部署
This commit is contained in:
hhhhsc701
2025-11-06 17:14:54 +08:00
committed by GitHub
parent d84152b45f
commit f78475e29f
27 changed files with 540 additions and 304 deletions

View File

@@ -53,18 +53,17 @@ TAVILY_API_KEY=tvly-xxx
# MOI_RETRIEVAL_SIZE=10
# MOI_LIST_LIMIT=10
# RAG_PROVIDER: milvus (using free milvus instance on zilliz cloud: https://docs.zilliz.com/docs/quick-start )
# RAG_PROVIDER=milvus
# MILVUS_URI=<endpoint_of_self_hosted_milvus_or_zilliz_cloud>
# MILVUS_USER=<username_of_self_hosted_milvus_or_zilliz_cloud>
# MILVUS_PASSWORD=<password_of_self_hosted_milvus_or_zilliz_cloud>
MILVUS_URI=http://milvus-standalone:19530
MILVUS_USER=root
MILVUS_PASSWORD=Milvus
MILVUS_AUTO_LOAD_EXAMPLES=false
# MILVUS_COLLECTION=documents
# MILVUS_EMBEDDING_PROVIDER=openai # support openai,dashscope
# MILVUS_EMBEDDING_BASE_URL=
# MILVUS_EMBEDDING_MODEL=
# MILVUS_EMBEDDING_API_KEY=
# MILVUS_AUTO_LOAD_EXAMPLES=true
# RAG_PROVIDER: milvus (using milvus lite on Mac or Linux)
# RAG_PROVIDER=milvus

View File

@@ -6,7 +6,6 @@ Description: MinerU PDF文本抽取
Create: 2025/10/29 17:24
"""
import json
import os
import time
from loguru import logger
from typing import Dict, Any
@@ -20,7 +19,7 @@ class MineruFormatter(Mapper):
def __init__(self, *args, **kwargs):
super(MineruFormatter, self).__init__(*args, **kwargs)
self.base_url = os.getenv("EXTERNAL_PDF_BASE_URL", "http://datamate-mineru:9001")
self.base_url = "http://datamate-mineru:9001"
self.pdf_extract_url = f"{self.base_url}/api/pdf-extract"
def execute(self, sample: Dict[str, Any]) -> Dict[str, Any]: