from langchain.schema import Document from langchain_community.embeddings import HuggingFaceEmbeddings from langchain_community.vectorstores.faiss import FAISS class VectorDB: embedding_model = "sentence-transformers/all-MiniLM-l6-v2" model_kwargs = {'device': 'cpu'} encode_kwargs = {'normalize_embeddings': False} local_folder = "db/faiss_db" is_load_local = False text_embeddings = None docs_db = None def __init__(self): self.text_embeddings = self.init_text_embeddings(self.embedding_model, self.model_kwargs, self.encode_kwargs) self.docs_db = self.init_vector_db(self.local_folder, self.text_embeddings) def init_text_embeddings(self, embedding_model: str, model_kwargs: dict, encode_kwargs: dict): return HuggingFaceEmbeddings( model_name=embedding_model, model_kwargs=model_kwargs, encode_kwargs=encode_kwargs ) def init_vector_db(self, folder_path: str, text_embeddings: HuggingFaceEmbeddings): if self.is_load_local: try: return FAISS.load_local(folder_path=folder_path, embeddings=text_embeddings) except Exception as e: return FAISS.from_documents([Document(page_content="")], embedding=text_embeddings) else: return FAISS.from_documents([Document(page_content="")], embedding=text_embeddings) def load_docs_into_vector_db(self, doc_chunks: list): if len(doc_chunks) != 0: if self.docs_db is None: self.docs_db = FAISS.from_documents(doc_chunks, embedding=self.text_embeddings) else: self.docs_db.add_documents(doc_chunks) def save_vector_db(self): if self.docs_db is not None and not self.is_load_local: self.docs_db.save_local(self.local_folder) else: print("No vector db to save.")