feature: 对接deer-flow (#54)

feature: 对接deer-flow
2025-11-04 20:30:40 +08:00
parent dc30b0d892
commit f3958f08d9
710 changed files with 112812 additions and 52 deletions
--- a/runtime/deer-flow/.env.example
+++ b/runtime/deer-flow/.env.example
@@ -0,0 +1,99 @@
+# Application Settings
+DEBUG=True
+APP_ENV=development
+
+# docker build args
+NEXT_PUBLIC_API_URL="/deer-flow-backend"
+
+AGENT_RECURSION_LIMIT=30
+
+# CORS settings
+# Comma-separated list of allowed origins for CORS requests
+# Example: ALLOWED_ORIGINS=http://localhost:3000,http://example.com
+ALLOWED_ORIGINS=*
+
+# Enable or disable MCP server configuration, the default is false.
+# Please enable this feature before securing your front-end and back-end in a managed environment.
+# Otherwise, you system could be compromised.
+ENABLE_MCP_SERVER_CONFIGURATION=true
+
+# Enable or disable PYTHON_REPL configuration, the default is false.
+# Please enable this feature before securing your in a managed environment.
+# Otherwise, you system could be compromised.
+ENABLE_PYTHON_REPL=false
+
+# Search Engine, Supported values: tavily (recommended), duckduckgo, brave_search, arxiv, searx
+SEARCH_API=tavily
+TAVILY_API_KEY=tvly-xxx
+# SEARX_HOST=xxx # Required only if SEARCH_API is searx.(compatible with both Searx and SearxNG)
+# BRAVE_SEARCH_API_KEY=xxx # Required only if SEARCH_API is brave_search
+# JINA_API_KEY=jina_xxx # Optional, default is None
+
+# Optional, RAG provider
+# RAG_PROVIDER=vikingdb_knowledge_base
+# VIKINGDB_KNOWLEDGE_BASE_API_URL="api-knowledgebase.mlp.cn-beijing.volces.com"
+# VIKINGDB_KNOWLEDGE_BASE_API_AK="AKxxx"
+# VIKINGDB_KNOWLEDGE_BASE_API_SK=""
+# VIKINGDB_KNOWLEDGE_BASE_RETRIEVAL_SIZE=15
+
+# RAG_PROVIDER=ragflow
+# RAGFLOW_API_URL="http://localhost:9388"
+# RAGFLOW_API_KEY="ragflow-xxx"
+# RAGFLOW_RETRIEVAL_SIZE=10
+# RAGFLOW_CROSS_LANGUAGES=English,Chinese,Spanish,French,German,Japanese,Korean # Optional. To use RAGFlow's cross-language search, please separate each language with a single comma
+
+# RAG_PROVIDER=dify
+# DIFY_API_URL="https://api.dify.ai/v1"
+# DIFY_API_KEY="dataset-xxx"
+
+# MOI is a hybrid database that mainly serves enterprise users (https://www.matrixorigin.io/matrixone-intelligence)
+# RAG_PROVIDER=moi
+# MOI_API_URL="https://cluster.matrixonecloud.cn"
+# MOI_API_KEY="xxx-xxx-xxx-xxx"
+# MOI_RETRIEVAL_SIZE=10
+# MOI_LIST_LIMIT=10
+
+
+# RAG_PROVIDER: milvus  (using free milvus instance on zilliz cloud: https://docs.zilliz.com/docs/quick-start )
+# RAG_PROVIDER=milvus
+# MILVUS_URI=<endpoint_of_self_hosted_milvus_or_zilliz_cloud>
+# MILVUS_USER=<username_of_self_hosted_milvus_or_zilliz_cloud>
+# MILVUS_PASSWORD=<password_of_self_hosted_milvus_or_zilliz_cloud>
+# MILVUS_COLLECTION=documents
+# MILVUS_EMBEDDING_PROVIDER=openai # support openai,dashscope
+# MILVUS_EMBEDDING_BASE_URL=
+# MILVUS_EMBEDDING_MODEL=
+# MILVUS_EMBEDDING_API_KEY=
+# MILVUS_AUTO_LOAD_EXAMPLES=true
+
+# RAG_PROVIDER: milvus  (using milvus lite on Mac or Linux)
+# RAG_PROVIDER=milvus
+# MILVUS_URI=./milvus_demo.db
+# MILVUS_COLLECTION=documents
+# MILVUS_EMBEDDING_PROVIDER=openai # support openai,dashscope
+# MILVUS_EMBEDDING_BASE_URL=
+# MILVUS_EMBEDDING_MODEL=
+# MILVUS_EMBEDDING_API_KEY=
+# MILVUS_AUTO_LOAD_EXAMPLES=true
+
+# Optional, volcengine TTS for generating podcast
+VOLCENGINE_TTS_APPID=xxx
+VOLCENGINE_TTS_ACCESS_TOKEN=xxx
+# VOLCENGINE_TTS_CLUSTER=volcano_tts # Optional, default is volcano_tts
+# VOLCENGINE_TTS_VOICE_TYPE=BV700_V2_streaming # Optional, default is BV700_V2_streaming
+
+# Option, for langsmith tracing and monitoring
+# LANGSMITH_TRACING=true
+# LANGSMITH_ENDPOINT="https://api.smith.langchain.com"
+# LANGSMITH_API_KEY="xxx"
+# LANGSMITH_PROJECT="xxx"
+
+# [!NOTE]
+# For model settings and other configurations, please refer to `docs/configuration_guide.md`
+
+# Option, for langgraph mongodb checkpointer
+# Enable LangGraph checkpoint saver, supports MongoDB, Postgres
+#LANGGRAPH_CHECKPOINT_SAVER=true
+# Set the database URL for saving checkpoints
+#LANGGRAPH_CHECKPOINT_DB_URL=mongodb://localhost:27017/
+#LANGGRAPH_CHECKPOINT_DB_URL=postgresql://localhost:5432/postgres
--- a/runtime/deer-flow/conf.yaml.example
+++ b/runtime/deer-flow/conf.yaml.example
@@ -0,0 +1,71 @@
+# [!NOTE]
+# Read the `docs/configuration_guide.md` carefully, and update the
+# configurations to match your specific settings and requirements.
+# - Replace `api_key` with your own credentials.
+# - Replace `base_url` and `model` name if you want to use a custom model.
+# - Set `verify_ssl` to `false` if your LLM server uses self-signed certificates
+# - A restart is required every time you change the `conf.yaml` file.
+
+BASIC_MODEL:
+  base_url: https://ark.cn-beijing.volces.com/api/v3
+  model: "doubao-1-5-pro-32k-250115"
+  api_key: xxxx
+  # max_retries: 3 # Maximum number of retries for LLM calls
+  # verify_ssl: false  # Uncomment this line to disable SSL certificate verification for self-signed certificates
+
+  # Local model configuration example:
+
+  # Ollama (Tested and supported for local development)
+  # BASIC_MODEL:
+  #   base_url: "http://localhost:11434/v1"  # Ollama OpenAI compatible endpoint
+  #   model: "qwen3:14b"  # or "llama3.2", etc.
+  #   api_key: "ollama"  # Ollama doesn't need real API key
+  #   max_retries: 3
+  #   verify_ssl: false  # Local deployment usually doesn't need SSL verification
+
+  # To use Google Ai Studio as your basic platform:
+  # BASIC_MODEL:
+  #   platform: "google_aistudio"
+  #   model: "gemini-2.5-flash"  # or "gemini-1.5-pro", "gemini-2.5-flash-exp", etc.
+  #   api_key: your_gemini_api_key  # Get from https://aistudio.google.com/app/apikey
+  #   max_retries: 3
+
+# Reasoning model is optional.
+# Uncomment the following settings if you want to use reasoning model
+# for planning.
+
+# REASONING_MODEL:
+#   base_url: https://ark.cn-beijing.volces.com/api/v3
+#   model: "doubao-1-5-thinking-pro-m-250428"
+#   api_key: xxxx
+#   max_retries: 3 # Maximum number of retries for LLM calls
+
+
+# OTHER SETTINGS:
+# Search engine configuration (Only supports Tavily currently)
+# SEARCH_ENGINE:
+#   engine: tavily
+#   # Only include results from these domains
+#   include_domains:
+#     - example.com
+#     - trusted-news.com
+#     - reliable-source.org
+#     - gov.cn
+#     - edu.cn
+#   # Exclude results from these domains
+#   exclude_domains:
+#     - example.com
+#   # Include an answer in the search results
+#   include_answer: false
+#   # Search depth: "basic" or "advanced"
+#   search_depth: "advanced"
+#   # Include raw content from pages
+#   include_raw_content: true
+#   # Include images in search results
+#   include_images: true
+#   # Include descriptions for images
+#   include_image_descriptions: true
+#   # Minimum score threshold for results (0-1)
+#   min_score_threshold: 0.0
+#   # Maximum content length per page
+#   max_content_length_per_page: 4000
--- a/runtime/deer-flow/feature_collection.patch
+++ b/runtime/deer-flow/feature_collection.patch
@@ -0,0 +1,177 @@
+diff --git a/src/rag/milvus.py b/src/rag/milvus.py
+index de589d4..c1b9b98 100644
+--- a/src/rag/milvus.py
+++ b/src/rag/milvus.py
+@@ -9,7 +9,7 @@ from typing import Any, Dict, Iterable, List, Optional, Sequence, Set
+ from langchain_milvus.vectorstores import Milvus as LangchainMilvus
+ from langchain_openai import OpenAIEmbeddings
+ from openai import OpenAI
+-from pymilvus import CollectionSchema, DataType, FieldSchema, MilvusClient
+from pymilvus import CollectionSchema, DataType, FieldSchema, MilvusClient, utility
+ 
+ from src.config.loader import get_bool_env, get_int_env, get_str_env
+ from src.rag.retriever import Chunk, Document, Resource, Retriever
+@@ -397,6 +397,36 @@ class MilvusRetriever(Retriever):
+         except Exception as e:
+             raise ConnectionError(f"Failed to connect to Milvus: {str(e)}")
+ 
+    def _connect_with_collection(self, collection_name) -> None:
+        """Create the underlying Milvus client (idempotent)."""
+        try:
+            # Check if using Milvus Lite (file-based) vs server-based Milvus
+            if self._is_milvus_lite():
+                # Use MilvusClient for Milvus Lite (local file database)
+                self.client = MilvusClient(self.uri)
+                # Ensure collection exists
+                self._ensure_collection_exists()
+            else:
+                connection_args = {
+                    "uri": self.uri,
+                }
+                # Add user/password only if provided
+                if self.user:
+                    connection_args["user"] = self.user
+                if self.password:
+                    connection_args["password"] = self.password
+
+                # Create LangChain client (it will handle collection creation automatically)
+                self.client = LangchainMilvus(
+                    embedding_function=self.embedding_model,
+                    collection_name=collection_name,
+                    connection_args=connection_args,
+                    # optional (if collection already exists with different schema, be careful)
+                    drop_old=False,
+                )
+        except Exception as e:
+            raise ConnectionError(f"Failed to connect to Milvus: {str(e)}")
+
+     def _is_milvus_lite(self) -> bool:
+         """Return True if the URI points to a local Milvus Lite file.
+         Milvus Lite uses local file paths (often ``*.db``) without an HTTP/HTTPS
+@@ -476,26 +506,12 @@ class MilvusRetriever(Retriever):
+             else:
+                 # Use similarity_search_by_vector for lightweight listing.
+                 # If a query is provided embed it; else use a zero vector.
+-                docs: Iterable[Any] = self.client.similarity_search(
+-                    query,
+-                    k=100,
+-                    expr="source == 'examples'",  # Limit to 100 results
+-                )
+-                for d in docs:
+-                    meta = getattr(d, "metadata", {}) or {}
+-                    # check if the resource is in the list of resources
+-                    if resources and any(
+-                        r.uri == meta.get(self.url_field, "")
+-                        or r.uri == f"milvus://{meta.get(self.id_field, '')}"
+-                        for r in resources
+-                    ):
+-                        continue
+                connections = utility.list_collections(using=f"{self.uri}-{self.user}")
+                for connection in connections:
+                     resources.append(
+                         Resource(
+-                            uri=meta.get(self.url_field, "")
+-                            or f"milvus://{meta.get(self.id_field, '')}",
+-                            title=meta.get(self.title_field, "")
+-                            or meta.get(self.id_field, "Unnamed"),
+                            uri=f"milvus://{connection}",
+                            title=connection,
+                             description="Stored Milvus document",
+                         )
+                     )
+@@ -621,38 +637,32 @@ class MilvusRetriever(Retriever):
+ 
+             else:
+                 # For LangChain Milvus, use similarity search
+-                search_results = self.client.similarity_search_with_score(
+-                    query=query, k=self.top_k
+-                )
+                if not resources:
+                    return []
+ 
+                 documents = {}
+                for resource in resources:
+                    self._connect_with_collection(resource.title)
+                    search_results = self.client.similarity_search_with_score(
+                        query=query, k=self.top_k
+                    )
+ 
+-                for doc, score in search_results:
+-                    metadata = doc.metadata or {}
+-                    doc_id = metadata.get(self.id_field, "")
+-                    title = metadata.get(self.title_field, "")
+-                    url = metadata.get(self.url_field, "")
+-                    content = doc.page_content
+-
+-                    # Skip if resource filtering is requested and this doc is not in the list
+-                    if resources:
+-                        doc_in_resources = False
+-                        for resource in resources:
+-                            if (url and url in resource.uri) or doc_id in resource.uri:
+-                                doc_in_resources = True
+-                                break
+-                        if not doc_in_resources:
+-                            continue
+-
+-                    # Create or update document
+-                    if doc_id not in documents:
+-                        documents[doc_id] = Document(
+-                            id=doc_id, url=url, title=title, chunks=[]
+-                        )
+                    for doc, score in search_results:
+                        metadata = doc.metadata or {}
+                        doc_id = metadata.get(self.id_field, "")
+                        title = metadata.get(self.title_field, "")
+                        url = metadata.get(self.url_field, "")
+                        content = doc.page_content
+
+                        # Create or update document
+                        if doc_id not in documents:
+                            documents[doc_id] = Document(
+                                id=doc_id, url=url, title=title, chunks=[]
+                            )
+ 
+-                    # Add chunk to document
+-                    chunk = Chunk(content=content, similarity=score)
+-                    documents[doc_id].chunks.append(chunk)
+                        # Add chunk to document
+                        chunk = Chunk(content=content, similarity=score)
+                        documents[doc_id].chunks.append(chunk)
+ 
+                 return list(documents.values())
+ 
+diff --git a/web/src/components/deer-flow/theme-provider-wrapper.tsx b/web/src/components/deer-flow/theme-provider-wrapper.tsx
+index 6da0db8..1a99bcf 100644
+--- a/web/src/components/deer-flow/theme-provider-wrapper.tsx
+++ b/web/src/components/deer-flow/theme-provider-wrapper.tsx
+@@ -18,9 +18,9 @@ export function ThemeProviderWrapper({
+   return (
+     <ThemeProvider
+       attribute="class"
+-      defaultTheme={"dark"}
+      defaultTheme={"light"}
+       enableSystem={isChatPage}
+-      forcedTheme={isChatPage ? undefined : "dark"}
+      forcedTheme={isChatPage ? undefined : "light"}
+       disableTransitionOnChange
+     >
+       {children}
+diff --git a/web/src/core/api/resolve-service-url.ts b/web/src/core/api/resolve-service-url.ts
+index a87b777..d93e987 100644
+--- a/web/src/core/api/resolve-service-url.ts
+++ b/web/src/core/api/resolve-service-url.ts
+@@ -4,9 +4,13 @@
+ import { env } from "~/env";
+ 
+ export function resolveServiceURL(path: string) {
+-  let BASE_URL = env.NEXT_PUBLIC_API_URL ?? "http://localhost:8000/api/";
+  let BASE_URL = env.NEXT_PUBLIC_API_URL ?? "/api/";
+   if (!BASE_URL.endsWith("/")) {
+     BASE_URL += "/";
+   }
+
+  const origin = window.location.origin;
+  BASE_URL = origin + BASE_URL;
+
+   return new URL(path, BASE_URL).toString();
+ }