from qdrant_client import models from qdrant_client.conversions import common_types as types from qdrant_client.models import NamedVector, SparseVector from app.infrastructure.models.my_models import HybridSearchResponse from app.infrastructure.repository.query_search_repository import QuerySearchRepository from app.modules.denseEmbeddings.denseEmbeddings import DenseEmbeddings from app.qdrant import QdrantConnectionDb class HybridSearcher: def __init__( self, dense_embeddings: DenseEmbeddings, query_search_repository: QuerySearchRepository, ): self.dense_embeddings = dense_embeddings self.query_search_repository = query_search_repository def sparse_dense_rrf_prefetch( self, sparse_vector: SparseVector, dense_vector: NamedVector ) -> models.Prefetch: result = models.Prefetch( prefetch=[ models.Prefetch( query=dense_vector.vector, using="text-dense", limit=10, ), models.Prefetch( query=sparse_vector, using="text-sparse", limit=10, ), ], query=models.FusionQuery( fusion=models.Fusion.RRF, ), ) return result def hybrid_search(self, user_query: str) -> types.QueryResponse: """ Hybrid search :param user_query: str :return: types.QueryResponse """ try: sparse_vector = self.dense_embeddings.get_sparse_vector(user_query) dense_vector = self.dense_embeddings.get_dense_vector(user_query) prefetch_context = self.sparse_dense_rrf_prefetch( sparse_vector, dense_vector ) result = self.query_search_repository.find_text_by_hybrid_search( prefetch_context, dense_vector ) response_data = [ {"chunk-text": point.payload["chunk-text"]} for point in result.points ] return HybridSearchResponse(success=True, data=response_data) except Exception as e: return HybridSearchResponse( success=False, message=f"Database operation failed: {str(e)}" )