diff --git a/src/rag/milvus.py b/src/rag/milvus.py index de589d4..c1b9b98 100644 --- a/src/rag/milvus.py +++ b/src/rag/milvus.py @@ -9,7 +9,7 @@ from typing import Any, Dict, Iterable, List, Optional, Sequence, Set from langchain_milvus.vectorstores import Milvus as LangchainMilvus from langchain_openai import OpenAIEmbeddings from openai import OpenAI -from pymilvus import CollectionSchema, DataType, FieldSchema, MilvusClient +from pymilvus import CollectionSchema, DataType, FieldSchema, MilvusClient, utility from src.config.loader import get_bool_env, get_int_env, get_str_env from src.rag.retriever import Chunk, Document, Resource, Retriever @@ -397,6 +397,36 @@ class MilvusRetriever(Retriever): except Exception as e: raise ConnectionError(f"Failed to connect to Milvus: {str(e)}") + def _connect_with_collection(self, collection_name) -> None: + """Create the underlying Milvus client (idempotent).""" + try: + # Check if using Milvus Lite (file-based) vs server-based Milvus + if self._is_milvus_lite(): + # Use MilvusClient for Milvus Lite (local file database) + self.client = MilvusClient(self.uri) + # Ensure collection exists + self._ensure_collection_exists() + else: + connection_args = { + "uri": self.uri, + } + # Add user/password only if provided + if self.user: + connection_args["user"] = self.user + if self.password: + connection_args["password"] = self.password + + # Create LangChain client (it will handle collection creation automatically) + self.client = LangchainMilvus( + embedding_function=self.embedding_model, + collection_name=collection_name, + connection_args=connection_args, + # optional (if collection already exists with different schema, be careful) + drop_old=False, + ) + except Exception as e: + raise ConnectionError(f"Failed to connect to Milvus: {str(e)}") + def _is_milvus_lite(self) -> bool: """Return True if the URI points to a local Milvus Lite file. Milvus Lite uses local file paths (often ``*.db``) without an HTTP/HTTPS @@ -476,26 +506,12 @@ class MilvusRetriever(Retriever): else: # Use similarity_search_by_vector for lightweight listing. # If a query is provided embed it; else use a zero vector. - docs: Iterable[Any] = self.client.similarity_search( - query, - k=100, - expr="source == 'examples'", # Limit to 100 results - ) - for d in docs: - meta = getattr(d, "metadata", {}) or {} - # check if the resource is in the list of resources - if resources and any( - r.uri == meta.get(self.url_field, "") - or r.uri == f"milvus://{meta.get(self.id_field, '')}" - for r in resources - ): - continue + connections = utility.list_collections(using=f"{self.uri}-{self.user}") + for connection in connections: resources.append( Resource( - uri=meta.get(self.url_field, "") - or f"milvus://{meta.get(self.id_field, '')}", - title=meta.get(self.title_field, "") - or meta.get(self.id_field, "Unnamed"), + uri=f"milvus://{connection}", + title=connection, description="Stored Milvus document", ) ) @@ -621,38 +637,32 @@ class MilvusRetriever(Retriever): else: # For LangChain Milvus, use similarity search - search_results = self.client.similarity_search_with_score( - query=query, k=self.top_k - ) + if not resources: + return [] documents = {} + for resource in resources: + self._connect_with_collection(resource.title) + search_results = self.client.similarity_search_with_score( + query=query, k=self.top_k + ) - for doc, score in search_results: - metadata = doc.metadata or {} - doc_id = metadata.get(self.id_field, "") - title = metadata.get(self.title_field, "") - url = metadata.get(self.url_field, "") - content = doc.page_content - - # Skip if resource filtering is requested and this doc is not in the list - if resources: - doc_in_resources = False - for resource in resources: - if (url and url in resource.uri) or doc_id in resource.uri: - doc_in_resources = True - break - if not doc_in_resources: - continue - - # Create or update document - if doc_id not in documents: - documents[doc_id] = Document( - id=doc_id, url=url, title=title, chunks=[] - ) + for doc, score in search_results: + metadata = doc.metadata or {} + doc_id = metadata.get(self.id_field, "") + title = metadata.get(self.title_field, "") + url = metadata.get(self.url_field, "") + content = doc.page_content + + # Create or update document + if doc_id not in documents: + documents[doc_id] = Document( + id=doc_id, url=url, title=title, chunks=[] + ) - # Add chunk to document - chunk = Chunk(content=content, similarity=score) - documents[doc_id].chunks.append(chunk) + # Add chunk to document + chunk = Chunk(content=content, similarity=score) + documents[doc_id].chunks.append(chunk) return list(documents.values()) diff --git a/web/src/components/deer-flow/theme-provider-wrapper.tsx b/web/src/components/deer-flow/theme-provider-wrapper.tsx index 6da0db8..1a99bcf 100644 --- a/web/src/components/deer-flow/theme-provider-wrapper.tsx +++ b/web/src/components/deer-flow/theme-provider-wrapper.tsx @@ -18,9 +18,9 @@ export function ThemeProviderWrapper({ return ( {children} diff --git a/web/src/core/api/resolve-service-url.ts b/web/src/core/api/resolve-service-url.ts index a87b777..d93e987 100644 --- a/web/src/core/api/resolve-service-url.ts +++ b/web/src/core/api/resolve-service-url.ts @@ -4,9 +4,13 @@ import { env } from "~/env"; export function resolveServiceURL(path: string) { - let BASE_URL = env.NEXT_PUBLIC_API_URL ?? "http://localhost:8000/api/"; + let BASE_URL = env.NEXT_PUBLIC_API_URL ?? "/api/"; if (!BASE_URL.endsWith("/")) { BASE_URL += "/"; } + + const origin = window.location.origin; + BASE_URL = origin + BASE_URL; + return new URL(path, BASE_URL).toString(); }