Spaces:
Running
Running
from qdrant_client import models | |
from qdrant_client.conversions import common_types as types | |
from qdrant_client.models import NamedVector, SparseVector | |
from app.infrastructure.models.my_models import HybridSearchResponse | |
from app.infrastructure.repository.query_search_repository import QuerySearchRepository | |
from app.modules.denseEmbeddings.denseEmbeddings import DenseEmbeddings | |
from app.qdrant import QdrantConnectionDb | |
class HybridSearcher: | |
def __init__( | |
self, | |
dense_embeddings: DenseEmbeddings, | |
query_search_repository: QuerySearchRepository, | |
): | |
self.dense_embeddings = dense_embeddings | |
self.query_search_repository = query_search_repository | |
def sparse_dense_rrf_prefetch( | |
self, sparse_vector: SparseVector, dense_vector: NamedVector | |
) -> models.Prefetch: | |
result = models.Prefetch( | |
prefetch=[ | |
models.Prefetch( | |
query=dense_vector.vector, | |
using="text-dense", | |
limit=10, | |
), | |
models.Prefetch( | |
query=sparse_vector, | |
using="text-sparse", | |
limit=10, | |
), | |
], | |
query=models.FusionQuery( | |
fusion=models.Fusion.RRF, | |
), | |
) | |
return result | |
def hybrid_search(self, user_query: str) -> types.QueryResponse: | |
""" | |
Hybrid search | |
:param user_query: str | |
:return: types.QueryResponse | |
""" | |
try: | |
sparse_vector = self.dense_embeddings.get_sparse_vector(user_query) | |
dense_vector = self.dense_embeddings.get_dense_vector(user_query) | |
prefetch_context = self.sparse_dense_rrf_prefetch( | |
sparse_vector, dense_vector | |
) | |
result = self.query_search_repository.find_text_by_hybrid_search( | |
prefetch_context, dense_vector | |
) | |
response_data = [ | |
{"chunk-text": point.payload["chunk-text"]} for point in result.points | |
] | |
return HybridSearchResponse(success=True, data=response_data) | |
except Exception as e: | |
return HybridSearchResponse( | |
success=False, message=f"Database operation failed: {str(e)}" | |
) | |