File size: 1,909 Bytes
8324134
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
from langchain.schema import Document
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores.faiss import FAISS


class VectorDB:
    embedding_model = "sentence-transformers/all-MiniLM-l6-v2"
    model_kwargs = {'device': 'cpu'}
    encode_kwargs = {'normalize_embeddings': False}
    local_folder = "db/faiss_db"
    is_load_local = False
    text_embeddings = None
    docs_db = None

    def __init__(self):
        self.text_embeddings = self.init_text_embeddings(self.embedding_model, self.model_kwargs, self.encode_kwargs)
        self.docs_db = self.init_vector_db(self.local_folder, self.text_embeddings)

    def init_text_embeddings(self, embedding_model: str, model_kwargs: dict, encode_kwargs: dict):
        return HuggingFaceEmbeddings(
            model_name=embedding_model,
            model_kwargs=model_kwargs,
            encode_kwargs=encode_kwargs
        )

    def init_vector_db(self, folder_path: str, text_embeddings: HuggingFaceEmbeddings):
        if self.is_load_local:
            try:
                return FAISS.load_local(folder_path=folder_path, embeddings=text_embeddings)
            except Exception as e:
                return FAISS.from_documents([Document(page_content="")], embedding=text_embeddings)
        else:
            return FAISS.from_documents([Document(page_content="")], embedding=text_embeddings)

    def load_docs_into_vector_db(self, doc_chunks: list):
        if len(doc_chunks) != 0:
            if self.docs_db is None:
                self.docs_db = FAISS.from_documents(doc_chunks, embedding=self.text_embeddings)
            else:
                self.docs_db.add_documents(doc_chunks)

    def save_vector_db(self):
        if self.docs_db is not None and not self.is_load_local:
            self.docs_db.save_local(self.local_folder)
        else:
            print("No vector db to save.")