abadesalex's picture
Update to Qdrant db
47b5f0c
raw
history blame
2.35 kB
from qdrant_client import models
from qdrant_client.conversions import common_types as types
from qdrant_client.models import NamedVector, SparseVector
from app.infrastructure.models.my_models import HybridSearchResponse
from app.infrastructure.repository.query_search_repository import QuerySearchRepository
from app.modules.denseEmbeddings.denseEmbeddings import DenseEmbeddings
from app.qdrant import QdrantConnectionDb
class HybridSearcher:
def __init__(
self,
dense_embeddings: DenseEmbeddings,
query_search_repository: QuerySearchRepository,
):
self.dense_embeddings = dense_embeddings
self.query_search_repository = query_search_repository
def sparse_dense_rrf_prefetch(
self, sparse_vector: SparseVector, dense_vector: NamedVector
) -> models.Prefetch:
result = models.Prefetch(
prefetch=[
models.Prefetch(
query=dense_vector.vector,
using="text-dense",
limit=10,
),
models.Prefetch(
query=sparse_vector,
using="text-sparse",
limit=10,
),
],
query=models.FusionQuery(
fusion=models.Fusion.RRF,
),
)
return result
def hybrid_search(self, user_query: str) -> types.QueryResponse:
"""
Hybrid search
:param user_query: str
:return: types.QueryResponse
"""
try:
sparse_vector = self.dense_embeddings.get_sparse_vector(user_query)
dense_vector = self.dense_embeddings.get_dense_vector(user_query)
prefetch_context = self.sparse_dense_rrf_prefetch(
sparse_vector, dense_vector
)
result = self.query_search_repository.find_text_by_hybrid_search(
prefetch_context, dense_vector
)
response_data = [
{"chunk-text": point.payload["chunk-text"]} for point in result.points
]
return HybridSearchResponse(success=True, data=response_data)
except Exception as e:
return HybridSearchResponse(
success=False, message=f"Database operation failed: {str(e)}"
)