You've already forked DataMate
178 lines
7.9 KiB
Diff
178 lines
7.9 KiB
Diff
diff --git a/src/rag/milvus.py b/src/rag/milvus.py
|
|
index de589d4..c1b9b98 100644
|
|
--- a/src/rag/milvus.py
|
|
+++ b/src/rag/milvus.py
|
|
@@ -9,7 +9,7 @@ from typing import Any, Dict, Iterable, List, Optional, Sequence, Set
|
|
from langchain_milvus.vectorstores import Milvus as LangchainMilvus
|
|
from langchain_openai import OpenAIEmbeddings
|
|
from openai import OpenAI
|
|
-from pymilvus import CollectionSchema, DataType, FieldSchema, MilvusClient
|
|
+from pymilvus import CollectionSchema, DataType, FieldSchema, MilvusClient, utility
|
|
|
|
from src.config.loader import get_bool_env, get_int_env, get_str_env
|
|
from src.rag.retriever import Chunk, Document, Resource, Retriever
|
|
@@ -397,6 +397,36 @@ class MilvusRetriever(Retriever):
|
|
except Exception as e:
|
|
raise ConnectionError(f"Failed to connect to Milvus: {str(e)}")
|
|
|
|
+ def _connect_with_collection(self, collection_name) -> None:
|
|
+ """Create the underlying Milvus client (idempotent)."""
|
|
+ try:
|
|
+ # Check if using Milvus Lite (file-based) vs server-based Milvus
|
|
+ if self._is_milvus_lite():
|
|
+ # Use MilvusClient for Milvus Lite (local file database)
|
|
+ self.client = MilvusClient(self.uri)
|
|
+ # Ensure collection exists
|
|
+ self._ensure_collection_exists()
|
|
+ else:
|
|
+ connection_args = {
|
|
+ "uri": self.uri,
|
|
+ }
|
|
+ # Add user/password only if provided
|
|
+ if self.user:
|
|
+ connection_args["user"] = self.user
|
|
+ if self.password:
|
|
+ connection_args["password"] = self.password
|
|
+
|
|
+ # Create LangChain client (it will handle collection creation automatically)
|
|
+ self.client = LangchainMilvus(
|
|
+ embedding_function=self.embedding_model,
|
|
+ collection_name=collection_name,
|
|
+ connection_args=connection_args,
|
|
+ # optional (if collection already exists with different schema, be careful)
|
|
+ drop_old=False,
|
|
+ )
|
|
+ except Exception as e:
|
|
+ raise ConnectionError(f"Failed to connect to Milvus: {str(e)}")
|
|
+
|
|
def _is_milvus_lite(self) -> bool:
|
|
"""Return True if the URI points to a local Milvus Lite file.
|
|
Milvus Lite uses local file paths (often ``*.db``) without an HTTP/HTTPS
|
|
@@ -476,26 +506,12 @@ class MilvusRetriever(Retriever):
|
|
else:
|
|
# Use similarity_search_by_vector for lightweight listing.
|
|
# If a query is provided embed it; else use a zero vector.
|
|
- docs: Iterable[Any] = self.client.similarity_search(
|
|
- query,
|
|
- k=100,
|
|
- expr="source == 'examples'", # Limit to 100 results
|
|
- )
|
|
- for d in docs:
|
|
- meta = getattr(d, "metadata", {}) or {}
|
|
- # check if the resource is in the list of resources
|
|
- if resources and any(
|
|
- r.uri == meta.get(self.url_field, "")
|
|
- or r.uri == f"milvus://{meta.get(self.id_field, '')}"
|
|
- for r in resources
|
|
- ):
|
|
- continue
|
|
+ connections = utility.list_collections(using=f"{self.uri}-{self.user}")
|
|
+ for connection in connections:
|
|
resources.append(
|
|
Resource(
|
|
- uri=meta.get(self.url_field, "")
|
|
- or f"milvus://{meta.get(self.id_field, '')}",
|
|
- title=meta.get(self.title_field, "")
|
|
- or meta.get(self.id_field, "Unnamed"),
|
|
+ uri=f"milvus://{connection}",
|
|
+ title=connection,
|
|
description="Stored Milvus document",
|
|
)
|
|
)
|
|
@@ -621,38 +637,32 @@ class MilvusRetriever(Retriever):
|
|
|
|
else:
|
|
# For LangChain Milvus, use similarity search
|
|
- search_results = self.client.similarity_search_with_score(
|
|
- query=query, k=self.top_k
|
|
- )
|
|
+ if not resources:
|
|
+ return []
|
|
|
|
documents = {}
|
|
+ for resource in resources:
|
|
+ self._connect_with_collection(resource.title)
|
|
+ search_results = self.client.similarity_search_with_score(
|
|
+ query=query, k=self.top_k
|
|
+ )
|
|
|
|
- for doc, score in search_results:
|
|
- metadata = doc.metadata or {}
|
|
- doc_id = metadata.get(self.id_field, "")
|
|
- title = metadata.get(self.title_field, "")
|
|
- url = metadata.get(self.url_field, "")
|
|
- content = doc.page_content
|
|
-
|
|
- # Skip if resource filtering is requested and this doc is not in the list
|
|
- if resources:
|
|
- doc_in_resources = False
|
|
- for resource in resources:
|
|
- if (url and url in resource.uri) or doc_id in resource.uri:
|
|
- doc_in_resources = True
|
|
- break
|
|
- if not doc_in_resources:
|
|
- continue
|
|
-
|
|
- # Create or update document
|
|
- if doc_id not in documents:
|
|
- documents[doc_id] = Document(
|
|
- id=doc_id, url=url, title=title, chunks=[]
|
|
- )
|
|
+ for doc, score in search_results:
|
|
+ metadata = doc.metadata or {}
|
|
+ doc_id = metadata.get(self.id_field, "")
|
|
+ title = metadata.get(self.title_field, "")
|
|
+ url = metadata.get(self.url_field, "")
|
|
+ content = doc.page_content
|
|
+
|
|
+ # Create or update document
|
|
+ if doc_id not in documents:
|
|
+ documents[doc_id] = Document(
|
|
+ id=doc_id, url=url, title=title, chunks=[]
|
|
+ )
|
|
|
|
- # Add chunk to document
|
|
- chunk = Chunk(content=content, similarity=score)
|
|
- documents[doc_id].chunks.append(chunk)
|
|
+ # Add chunk to document
|
|
+ chunk = Chunk(content=content, similarity=score)
|
|
+ documents[doc_id].chunks.append(chunk)
|
|
|
|
return list(documents.values())
|
|
|
|
diff --git a/web/src/components/deer-flow/theme-provider-wrapper.tsx b/web/src/components/deer-flow/theme-provider-wrapper.tsx
|
|
index 6da0db8..1a99bcf 100644
|
|
--- a/web/src/components/deer-flow/theme-provider-wrapper.tsx
|
|
+++ b/web/src/components/deer-flow/theme-provider-wrapper.tsx
|
|
@@ -18,9 +18,9 @@ export function ThemeProviderWrapper({
|
|
return (
|
|
<ThemeProvider
|
|
attribute="class"
|
|
- defaultTheme={"dark"}
|
|
+ defaultTheme={"light"}
|
|
enableSystem={isChatPage}
|
|
- forcedTheme={isChatPage ? undefined : "dark"}
|
|
+ forcedTheme={isChatPage ? undefined : "light"}
|
|
disableTransitionOnChange
|
|
>
|
|
{children}
|
|
diff --git a/web/src/core/api/resolve-service-url.ts b/web/src/core/api/resolve-service-url.ts
|
|
index a87b777..d93e987 100644
|
|
--- a/web/src/core/api/resolve-service-url.ts
|
|
+++ b/web/src/core/api/resolve-service-url.ts
|
|
@@ -4,9 +4,13 @@
|
|
import { env } from "~/env";
|
|
|
|
export function resolveServiceURL(path: string) {
|
|
- let BASE_URL = env.NEXT_PUBLIC_API_URL ?? "http://localhost:8000/api/";
|
|
+ let BASE_URL = env.NEXT_PUBLIC_API_URL ?? "/api/";
|
|
if (!BASE_URL.endsWith("/")) {
|
|
BASE_URL += "/";
|
|
}
|
|
+
|
|
+ const origin = window.location.origin;
|
|
+ BASE_URL = origin + BASE_URL;
|
|
+
|
|
return new URL(path, BASE_URL).toString();
|
|
}
|