"""Milvus 向量检索客户端。 通过 pymilvus 连接 Milvus,对查询文本进行 embedding 后执行混合搜索, 返回 top-K 文档片段。 失败策略:fail-open —— Milvus 不可用时返回空列表 + 日志告警。 """ from __future__ import annotations import asyncio from pydantic import SecretStr from app.core.logging import get_logger from app.module.kg_graphrag.models import VectorChunk logger = get_logger(__name__) class MilvusVectorRetriever: """Milvus 向量检索器。""" def __init__( self, *, uri: str = "http://milvus-standalone:19530", embedding_model: str = "text-embedding-3-small", embedding_base_url: str | None = None, embedding_api_key: SecretStr = SecretStr("EMPTY"), ) -> None: self._uri = uri self._embedding_model = embedding_model self._embedding_base_url = embedding_base_url self._embedding_api_key = embedding_api_key # Lazy init self._milvus_client = None self._embeddings = None @classmethod def from_settings(cls) -> MilvusVectorRetriever: from app.core.config import settings embedding_model = ( settings.graphrag_embedding_model or settings.kg_alignment_embedding_model ) return cls( uri=settings.graphrag_milvus_uri, embedding_model=embedding_model, embedding_base_url=settings.kg_llm_base_url, embedding_api_key=settings.kg_llm_api_key, ) def _get_embeddings(self): if self._embeddings is None: from langchain_openai import OpenAIEmbeddings self._embeddings = OpenAIEmbeddings( model=self._embedding_model, base_url=self._embedding_base_url, api_key=self._embedding_api_key, ) return self._embeddings def _get_milvus_client(self): if self._milvus_client is None: from pymilvus import MilvusClient self._milvus_client = MilvusClient(uri=self._uri) logger.info("Connected to Milvus at %s", self._uri) return self._milvus_client async def has_collection(self, collection_name: str) -> bool: """检查 Milvus 中是否存在指定 collection(防止越权访问不存在的库)。""" try: client = self._get_milvus_client() return await asyncio.to_thread(client.has_collection, collection_name) except Exception: logger.exception("Milvus has_collection check failed for %s", collection_name) return False async def search( self, collection_name: str, query: str, top_k: int = 5, ) -> list[VectorChunk]: """向量搜索:embed query -> Milvus search -> 返回 top-K 文档片段。 Fail-open: Milvus 不可用时返回空列表。 """ try: return await self._search_impl(collection_name, query, top_k) except Exception: logger.exception( "Milvus search failed for collection=%s (fail-open, returning empty)", collection_name, ) return [] async def _search_impl( self, collection_name: str, query: str, top_k: int, ) -> list[VectorChunk]: # 1. Embed query query_vector = await self._get_embeddings().aembed_query(query) # 2. Milvus search(同步 I/O,通过 to_thread 避免阻塞事件循环) client = self._get_milvus_client() results = await asyncio.to_thread( client.search, collection_name=collection_name, data=[query_vector], limit=top_k, output_fields=["text", "metadata"], search_params={"metric_type": "COSINE", "params": {"nprobe": 16}}, ) # 3. 转换为 VectorChunk chunks: list[VectorChunk] = [] if results and len(results) > 0: for hit in results[0]: entity = hit.get("entity", {}) chunks.append( VectorChunk( id=str(hit.get("id", "")), text=entity.get("text", ""), score=float(hit.get("distance", 0.0)), metadata=entity.get("metadata", {}), ) ) return chunks