You've already forked DataMate
99
runtime/deer-flow/.env.example
Normal file
99
runtime/deer-flow/.env.example
Normal file
@@ -0,0 +1,99 @@
|
||||
# Application Settings
|
||||
DEBUG=True
|
||||
APP_ENV=development
|
||||
|
||||
# docker build args
|
||||
NEXT_PUBLIC_API_URL="/deer-flow-backend"
|
||||
|
||||
AGENT_RECURSION_LIMIT=30
|
||||
|
||||
# CORS settings
|
||||
# Comma-separated list of allowed origins for CORS requests
|
||||
# Example: ALLOWED_ORIGINS=http://localhost:3000,http://example.com
|
||||
ALLOWED_ORIGINS=*
|
||||
|
||||
# Enable or disable MCP server configuration, the default is false.
|
||||
# Please enable this feature before securing your front-end and back-end in a managed environment.
|
||||
# Otherwise, you system could be compromised.
|
||||
ENABLE_MCP_SERVER_CONFIGURATION=true
|
||||
|
||||
# Enable or disable PYTHON_REPL configuration, the default is false.
|
||||
# Please enable this feature before securing your in a managed environment.
|
||||
# Otherwise, you system could be compromised.
|
||||
ENABLE_PYTHON_REPL=false
|
||||
|
||||
# Search Engine, Supported values: tavily (recommended), duckduckgo, brave_search, arxiv, searx
|
||||
SEARCH_API=tavily
|
||||
TAVILY_API_KEY=tvly-xxx
|
||||
# SEARX_HOST=xxx # Required only if SEARCH_API is searx.(compatible with both Searx and SearxNG)
|
||||
# BRAVE_SEARCH_API_KEY=xxx # Required only if SEARCH_API is brave_search
|
||||
# JINA_API_KEY=jina_xxx # Optional, default is None
|
||||
|
||||
# Optional, RAG provider
|
||||
# RAG_PROVIDER=vikingdb_knowledge_base
|
||||
# VIKINGDB_KNOWLEDGE_BASE_API_URL="api-knowledgebase.mlp.cn-beijing.volces.com"
|
||||
# VIKINGDB_KNOWLEDGE_BASE_API_AK="AKxxx"
|
||||
# VIKINGDB_KNOWLEDGE_BASE_API_SK=""
|
||||
# VIKINGDB_KNOWLEDGE_BASE_RETRIEVAL_SIZE=15
|
||||
|
||||
# RAG_PROVIDER=ragflow
|
||||
# RAGFLOW_API_URL="http://localhost:9388"
|
||||
# RAGFLOW_API_KEY="ragflow-xxx"
|
||||
# RAGFLOW_RETRIEVAL_SIZE=10
|
||||
# RAGFLOW_CROSS_LANGUAGES=English,Chinese,Spanish,French,German,Japanese,Korean # Optional. To use RAGFlow's cross-language search, please separate each language with a single comma
|
||||
|
||||
# RAG_PROVIDER=dify
|
||||
# DIFY_API_URL="https://api.dify.ai/v1"
|
||||
# DIFY_API_KEY="dataset-xxx"
|
||||
|
||||
# MOI is a hybrid database that mainly serves enterprise users (https://www.matrixorigin.io/matrixone-intelligence)
|
||||
# RAG_PROVIDER=moi
|
||||
# MOI_API_URL="https://cluster.matrixonecloud.cn"
|
||||
# MOI_API_KEY="xxx-xxx-xxx-xxx"
|
||||
# MOI_RETRIEVAL_SIZE=10
|
||||
# MOI_LIST_LIMIT=10
|
||||
|
||||
|
||||
# RAG_PROVIDER: milvus (using free milvus instance on zilliz cloud: https://docs.zilliz.com/docs/quick-start )
|
||||
# RAG_PROVIDER=milvus
|
||||
# MILVUS_URI=<endpoint_of_self_hosted_milvus_or_zilliz_cloud>
|
||||
# MILVUS_USER=<username_of_self_hosted_milvus_or_zilliz_cloud>
|
||||
# MILVUS_PASSWORD=<password_of_self_hosted_milvus_or_zilliz_cloud>
|
||||
# MILVUS_COLLECTION=documents
|
||||
# MILVUS_EMBEDDING_PROVIDER=openai # support openai,dashscope
|
||||
# MILVUS_EMBEDDING_BASE_URL=
|
||||
# MILVUS_EMBEDDING_MODEL=
|
||||
# MILVUS_EMBEDDING_API_KEY=
|
||||
# MILVUS_AUTO_LOAD_EXAMPLES=true
|
||||
|
||||
# RAG_PROVIDER: milvus (using milvus lite on Mac or Linux)
|
||||
# RAG_PROVIDER=milvus
|
||||
# MILVUS_URI=./milvus_demo.db
|
||||
# MILVUS_COLLECTION=documents
|
||||
# MILVUS_EMBEDDING_PROVIDER=openai # support openai,dashscope
|
||||
# MILVUS_EMBEDDING_BASE_URL=
|
||||
# MILVUS_EMBEDDING_MODEL=
|
||||
# MILVUS_EMBEDDING_API_KEY=
|
||||
# MILVUS_AUTO_LOAD_EXAMPLES=true
|
||||
|
||||
# Optional, volcengine TTS for generating podcast
|
||||
VOLCENGINE_TTS_APPID=xxx
|
||||
VOLCENGINE_TTS_ACCESS_TOKEN=xxx
|
||||
# VOLCENGINE_TTS_CLUSTER=volcano_tts # Optional, default is volcano_tts
|
||||
# VOLCENGINE_TTS_VOICE_TYPE=BV700_V2_streaming # Optional, default is BV700_V2_streaming
|
||||
|
||||
# Option, for langsmith tracing and monitoring
|
||||
# LANGSMITH_TRACING=true
|
||||
# LANGSMITH_ENDPOINT="https://api.smith.langchain.com"
|
||||
# LANGSMITH_API_KEY="xxx"
|
||||
# LANGSMITH_PROJECT="xxx"
|
||||
|
||||
# [!NOTE]
|
||||
# For model settings and other configurations, please refer to `docs/configuration_guide.md`
|
||||
|
||||
# Option, for langgraph mongodb checkpointer
|
||||
# Enable LangGraph checkpoint saver, supports MongoDB, Postgres
|
||||
#LANGGRAPH_CHECKPOINT_SAVER=true
|
||||
# Set the database URL for saving checkpoints
|
||||
#LANGGRAPH_CHECKPOINT_DB_URL=mongodb://localhost:27017/
|
||||
#LANGGRAPH_CHECKPOINT_DB_URL=postgresql://localhost:5432/postgres
|
||||
71
runtime/deer-flow/conf.yaml.example
Normal file
71
runtime/deer-flow/conf.yaml.example
Normal file
@@ -0,0 +1,71 @@
|
||||
# [!NOTE]
|
||||
# Read the `docs/configuration_guide.md` carefully, and update the
|
||||
# configurations to match your specific settings and requirements.
|
||||
# - Replace `api_key` with your own credentials.
|
||||
# - Replace `base_url` and `model` name if you want to use a custom model.
|
||||
# - Set `verify_ssl` to `false` if your LLM server uses self-signed certificates
|
||||
# - A restart is required every time you change the `conf.yaml` file.
|
||||
|
||||
BASIC_MODEL:
|
||||
base_url: https://ark.cn-beijing.volces.com/api/v3
|
||||
model: "doubao-1-5-pro-32k-250115"
|
||||
api_key: xxxx
|
||||
# max_retries: 3 # Maximum number of retries for LLM calls
|
||||
# verify_ssl: false # Uncomment this line to disable SSL certificate verification for self-signed certificates
|
||||
|
||||
# Local model configuration example:
|
||||
|
||||
# Ollama (Tested and supported for local development)
|
||||
# BASIC_MODEL:
|
||||
# base_url: "http://localhost:11434/v1" # Ollama OpenAI compatible endpoint
|
||||
# model: "qwen3:14b" # or "llama3.2", etc.
|
||||
# api_key: "ollama" # Ollama doesn't need real API key
|
||||
# max_retries: 3
|
||||
# verify_ssl: false # Local deployment usually doesn't need SSL verification
|
||||
|
||||
# To use Google Ai Studio as your basic platform:
|
||||
# BASIC_MODEL:
|
||||
# platform: "google_aistudio"
|
||||
# model: "gemini-2.5-flash" # or "gemini-1.5-pro", "gemini-2.5-flash-exp", etc.
|
||||
# api_key: your_gemini_api_key # Get from https://aistudio.google.com/app/apikey
|
||||
# max_retries: 3
|
||||
|
||||
# Reasoning model is optional.
|
||||
# Uncomment the following settings if you want to use reasoning model
|
||||
# for planning.
|
||||
|
||||
# REASONING_MODEL:
|
||||
# base_url: https://ark.cn-beijing.volces.com/api/v3
|
||||
# model: "doubao-1-5-thinking-pro-m-250428"
|
||||
# api_key: xxxx
|
||||
# max_retries: 3 # Maximum number of retries for LLM calls
|
||||
|
||||
|
||||
# OTHER SETTINGS:
|
||||
# Search engine configuration (Only supports Tavily currently)
|
||||
# SEARCH_ENGINE:
|
||||
# engine: tavily
|
||||
# # Only include results from these domains
|
||||
# include_domains:
|
||||
# - example.com
|
||||
# - trusted-news.com
|
||||
# - reliable-source.org
|
||||
# - gov.cn
|
||||
# - edu.cn
|
||||
# # Exclude results from these domains
|
||||
# exclude_domains:
|
||||
# - example.com
|
||||
# # Include an answer in the search results
|
||||
# include_answer: false
|
||||
# # Search depth: "basic" or "advanced"
|
||||
# search_depth: "advanced"
|
||||
# # Include raw content from pages
|
||||
# include_raw_content: true
|
||||
# # Include images in search results
|
||||
# include_images: true
|
||||
# # Include descriptions for images
|
||||
# include_image_descriptions: true
|
||||
# # Minimum score threshold for results (0-1)
|
||||
# min_score_threshold: 0.0
|
||||
# # Maximum content length per page
|
||||
# max_content_length_per_page: 4000
|
||||
177
runtime/deer-flow/feature_collection.patch
Normal file
177
runtime/deer-flow/feature_collection.patch
Normal file
@@ -0,0 +1,177 @@
|
||||
diff --git a/src/rag/milvus.py b/src/rag/milvus.py
|
||||
index de589d4..c1b9b98 100644
|
||||
--- a/src/rag/milvus.py
|
||||
+++ b/src/rag/milvus.py
|
||||
@@ -9,7 +9,7 @@ from typing import Any, Dict, Iterable, List, Optional, Sequence, Set
|
||||
from langchain_milvus.vectorstores import Milvus as LangchainMilvus
|
||||
from langchain_openai import OpenAIEmbeddings
|
||||
from openai import OpenAI
|
||||
-from pymilvus import CollectionSchema, DataType, FieldSchema, MilvusClient
|
||||
+from pymilvus import CollectionSchema, DataType, FieldSchema, MilvusClient, utility
|
||||
|
||||
from src.config.loader import get_bool_env, get_int_env, get_str_env
|
||||
from src.rag.retriever import Chunk, Document, Resource, Retriever
|
||||
@@ -397,6 +397,36 @@ class MilvusRetriever(Retriever):
|
||||
except Exception as e:
|
||||
raise ConnectionError(f"Failed to connect to Milvus: {str(e)}")
|
||||
|
||||
+ def _connect_with_collection(self, collection_name) -> None:
|
||||
+ """Create the underlying Milvus client (idempotent)."""
|
||||
+ try:
|
||||
+ # Check if using Milvus Lite (file-based) vs server-based Milvus
|
||||
+ if self._is_milvus_lite():
|
||||
+ # Use MilvusClient for Milvus Lite (local file database)
|
||||
+ self.client = MilvusClient(self.uri)
|
||||
+ # Ensure collection exists
|
||||
+ self._ensure_collection_exists()
|
||||
+ else:
|
||||
+ connection_args = {
|
||||
+ "uri": self.uri,
|
||||
+ }
|
||||
+ # Add user/password only if provided
|
||||
+ if self.user:
|
||||
+ connection_args["user"] = self.user
|
||||
+ if self.password:
|
||||
+ connection_args["password"] = self.password
|
||||
+
|
||||
+ # Create LangChain client (it will handle collection creation automatically)
|
||||
+ self.client = LangchainMilvus(
|
||||
+ embedding_function=self.embedding_model,
|
||||
+ collection_name=collection_name,
|
||||
+ connection_args=connection_args,
|
||||
+ # optional (if collection already exists with different schema, be careful)
|
||||
+ drop_old=False,
|
||||
+ )
|
||||
+ except Exception as e:
|
||||
+ raise ConnectionError(f"Failed to connect to Milvus: {str(e)}")
|
||||
+
|
||||
def _is_milvus_lite(self) -> bool:
|
||||
"""Return True if the URI points to a local Milvus Lite file.
|
||||
Milvus Lite uses local file paths (often ``*.db``) without an HTTP/HTTPS
|
||||
@@ -476,26 +506,12 @@ class MilvusRetriever(Retriever):
|
||||
else:
|
||||
# Use similarity_search_by_vector for lightweight listing.
|
||||
# If a query is provided embed it; else use a zero vector.
|
||||
- docs: Iterable[Any] = self.client.similarity_search(
|
||||
- query,
|
||||
- k=100,
|
||||
- expr="source == 'examples'", # Limit to 100 results
|
||||
- )
|
||||
- for d in docs:
|
||||
- meta = getattr(d, "metadata", {}) or {}
|
||||
- # check if the resource is in the list of resources
|
||||
- if resources and any(
|
||||
- r.uri == meta.get(self.url_field, "")
|
||||
- or r.uri == f"milvus://{meta.get(self.id_field, '')}"
|
||||
- for r in resources
|
||||
- ):
|
||||
- continue
|
||||
+ connections = utility.list_collections(using=f"{self.uri}-{self.user}")
|
||||
+ for connection in connections:
|
||||
resources.append(
|
||||
Resource(
|
||||
- uri=meta.get(self.url_field, "")
|
||||
- or f"milvus://{meta.get(self.id_field, '')}",
|
||||
- title=meta.get(self.title_field, "")
|
||||
- or meta.get(self.id_field, "Unnamed"),
|
||||
+ uri=f"milvus://{connection}",
|
||||
+ title=connection,
|
||||
description="Stored Milvus document",
|
||||
)
|
||||
)
|
||||
@@ -621,38 +637,32 @@ class MilvusRetriever(Retriever):
|
||||
|
||||
else:
|
||||
# For LangChain Milvus, use similarity search
|
||||
- search_results = self.client.similarity_search_with_score(
|
||||
- query=query, k=self.top_k
|
||||
- )
|
||||
+ if not resources:
|
||||
+ return []
|
||||
|
||||
documents = {}
|
||||
+ for resource in resources:
|
||||
+ self._connect_with_collection(resource.title)
|
||||
+ search_results = self.client.similarity_search_with_score(
|
||||
+ query=query, k=self.top_k
|
||||
+ )
|
||||
|
||||
- for doc, score in search_results:
|
||||
- metadata = doc.metadata or {}
|
||||
- doc_id = metadata.get(self.id_field, "")
|
||||
- title = metadata.get(self.title_field, "")
|
||||
- url = metadata.get(self.url_field, "")
|
||||
- content = doc.page_content
|
||||
-
|
||||
- # Skip if resource filtering is requested and this doc is not in the list
|
||||
- if resources:
|
||||
- doc_in_resources = False
|
||||
- for resource in resources:
|
||||
- if (url and url in resource.uri) or doc_id in resource.uri:
|
||||
- doc_in_resources = True
|
||||
- break
|
||||
- if not doc_in_resources:
|
||||
- continue
|
||||
-
|
||||
- # Create or update document
|
||||
- if doc_id not in documents:
|
||||
- documents[doc_id] = Document(
|
||||
- id=doc_id, url=url, title=title, chunks=[]
|
||||
- )
|
||||
+ for doc, score in search_results:
|
||||
+ metadata = doc.metadata or {}
|
||||
+ doc_id = metadata.get(self.id_field, "")
|
||||
+ title = metadata.get(self.title_field, "")
|
||||
+ url = metadata.get(self.url_field, "")
|
||||
+ content = doc.page_content
|
||||
+
|
||||
+ # Create or update document
|
||||
+ if doc_id not in documents:
|
||||
+ documents[doc_id] = Document(
|
||||
+ id=doc_id, url=url, title=title, chunks=[]
|
||||
+ )
|
||||
|
||||
- # Add chunk to document
|
||||
- chunk = Chunk(content=content, similarity=score)
|
||||
- documents[doc_id].chunks.append(chunk)
|
||||
+ # Add chunk to document
|
||||
+ chunk = Chunk(content=content, similarity=score)
|
||||
+ documents[doc_id].chunks.append(chunk)
|
||||
|
||||
return list(documents.values())
|
||||
|
||||
diff --git a/web/src/components/deer-flow/theme-provider-wrapper.tsx b/web/src/components/deer-flow/theme-provider-wrapper.tsx
|
||||
index 6da0db8..1a99bcf 100644
|
||||
--- a/web/src/components/deer-flow/theme-provider-wrapper.tsx
|
||||
+++ b/web/src/components/deer-flow/theme-provider-wrapper.tsx
|
||||
@@ -18,9 +18,9 @@ export function ThemeProviderWrapper({
|
||||
return (
|
||||
<ThemeProvider
|
||||
attribute="class"
|
||||
- defaultTheme={"dark"}
|
||||
+ defaultTheme={"light"}
|
||||
enableSystem={isChatPage}
|
||||
- forcedTheme={isChatPage ? undefined : "dark"}
|
||||
+ forcedTheme={isChatPage ? undefined : "light"}
|
||||
disableTransitionOnChange
|
||||
>
|
||||
{children}
|
||||
diff --git a/web/src/core/api/resolve-service-url.ts b/web/src/core/api/resolve-service-url.ts
|
||||
index a87b777..d93e987 100644
|
||||
--- a/web/src/core/api/resolve-service-url.ts
|
||||
+++ b/web/src/core/api/resolve-service-url.ts
|
||||
@@ -4,9 +4,13 @@
|
||||
import { env } from "~/env";
|
||||
|
||||
export function resolveServiceURL(path: string) {
|
||||
- let BASE_URL = env.NEXT_PUBLIC_API_URL ?? "http://localhost:8000/api/";
|
||||
+ let BASE_URL = env.NEXT_PUBLIC_API_URL ?? "/api/";
|
||||
if (!BASE_URL.endsWith("/")) {
|
||||
BASE_URL += "/";
|
||||
}
|
||||
+
|
||||
+ const origin = window.location.origin;
|
||||
+ BASE_URL = origin + BASE_URL;
|
||||
+
|
||||
return new URL(path, BASE_URL).toString();
|
||||
}
|
||||
Reference in New Issue
Block a user