DataMate/runtime/datamate-python/app/module/kg_graphrag/milvus_client.py

"""Milvus 向量检索客户端。

通过 pymilvus 连接 Milvus，对查询文本进行 embedding 后执行混合搜索，
返回 top-K 文档片段。

失败策略：fail-open —— Milvus 不可用时返回空列表 + 日志告警。
"""

from __future__ import annotations

import asyncio

from pydantic import SecretStr

from app.core.logging import get_logger
from app.module.kg_graphrag.models import VectorChunk

logger = get_logger(__name__)


class MilvusVectorRetriever:
    """Milvus 向量检索器。"""

    def __init__(
        self,
        *,
        uri: str = "http://milvus-standalone:19530",
        embedding_model: str = "text-embedding-3-small",
        embedding_base_url: str | None = None,
        embedding_api_key: SecretStr = SecretStr("EMPTY"),
    ) -> None:
        self._uri = uri
        self._embedding_model = embedding_model
        self._embedding_base_url = embedding_base_url
        self._embedding_api_key = embedding_api_key
        # Lazy init
        self._milvus_client = None
        self._embeddings = None

    @classmethod
    def from_settings(cls) -> MilvusVectorRetriever:
        from app.core.config import settings

        embedding_model = (
            settings.graphrag_embedding_model
            or settings.kg_alignment_embedding_model
        )
        return cls(
            uri=settings.graphrag_milvus_uri,
            embedding_model=embedding_model,
            embedding_base_url=settings.kg_llm_base_url,
            embedding_api_key=settings.kg_llm_api_key,
        )

    def _get_embeddings(self):
        if self._embeddings is None:
            from langchain_openai import OpenAIEmbeddings

            self._embeddings = OpenAIEmbeddings(
                model=self._embedding_model,
                base_url=self._embedding_base_url,
                api_key=self._embedding_api_key,
            )
        return self._embeddings

    def _get_milvus_client(self):
        if self._milvus_client is None:
            from pymilvus import MilvusClient

            self._milvus_client = MilvusClient(uri=self._uri)
            logger.info("Connected to Milvus at %s", self._uri)
        return self._milvus_client

    async def has_collection(self, collection_name: str) -> bool:
        """检查 Milvus 中是否存在指定 collection（防止越权访问不存在的库）。"""
        try:
            client = self._get_milvus_client()
            return await asyncio.to_thread(client.has_collection, collection_name)
        except Exception:
            logger.exception("Milvus has_collection check failed for %s", collection_name)
            return False

    async def search(
        self,
        collection_name: str,
        query: str,
        top_k: int = 5,
    ) -> list[VectorChunk]:
        """向量搜索：embed query -> Milvus search -> 返回 top-K 文档片段。

        Fail-open: Milvus 不可用时返回空列表。
        """
        try:
            return await self._search_impl(collection_name, query, top_k)
        except Exception:
            logger.exception(
                "Milvus search failed for collection=%s (fail-open, returning empty)",
                collection_name,
            )
            return []

    async def _search_impl(
        self,
        collection_name: str,
        query: str,
        top_k: int,
    ) -> list[VectorChunk]:
        # 1. Embed query
        query_vector = await self._get_embeddings().aembed_query(query)

        # 2. Milvus search（同步 I/O，通过 to_thread 避免阻塞事件循环）
        client = self._get_milvus_client()
        results = await asyncio.to_thread(
            client.search,
            collection_name=collection_name,
            data=[query_vector],
            limit=top_k,
            output_fields=["text", "metadata"],
            search_params={"metric_type": "COSINE", "params": {"nprobe": 16}},
        )

        # 3. 转换为 VectorChunk
        chunks: list[VectorChunk] = []
        if results and len(results) > 0:
            for hit in results[0]:
                entity = hit.get("entity", {})
                chunks.append(
                    VectorChunk(
                        id=str(hit.get("id", "")),
                        text=entity.get("text", ""),
                        score=float(hit.get("distance", 0.0)),
                        metadata=entity.get("metadata", {}),
                    )
                )
        return chunks