import os from dotenv import load_dotenv from qdrant_client import QdrantClient, models load_dotenv() class HybridClient: DENSE_MODEL = "sentence-transformers/all-MiniLM-L6-v2" SPARSE_MODEL = "prithivida/Splade_PP_en_v1" def __init__(self): self.qdrant_client = QdrantClient( url="https://e8c7892c-84a5-4b73-9281-27d52258c6d8.europe-west3-0.gcp.cloud.qdrant.io:6333", api_key=os.getenv("QDRANT_API_KEY"), ) self.qdrant_client.set_model(self.DENSE_MODEL) self.qdrant_client.set_sparse_model(self.SPARSE_MODEL) def create(self, collection: str): if not self.qdrant_client.collection_exists(collection): self.qdrant_client.create_collection( collection_name=collection, vectors_config=self.qdrant_client.get_fastembed_vector_params(), sparse_vectors_config=self.qdrant_client.get_fastembed_sparse_vector_params(), quantization_config=models.ScalarQuantization( scalar=models.ScalarQuantizationConfig( type=models.ScalarType.INT8, quantile=0.99, always_ram=False, ), ), ) print(f"--- {collection} collection created") return collection return None def insert(self, collection, chunks): documents = [] for chunk in chunks: documents.append(chunk.pop("text")) chunk.pop("color") chunk.pop("size") self.qdrant_client.add( collection_name=collection, documents=documents, metadata=chunks, parallel=0, ) print("--- pdf inserted") def search(self, collection, text: str, limit: int = 10): search_result = self.qdrant_client.query( collection_name=collection, query_text=text, query_filter=None, limit=limit, ) # Select and return metadata # metadata = [hit.metadata for hit in search_result] return search_result def get_chapter_name(self, collection: str): points = self.qdrant_client.retrieve(collection_name=collection, ids=[0]) return points[0]