dinhquangson commited on
Commit
7878029
1 Parent(s): 09c532f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -9
app.py CHANGED
@@ -12,7 +12,7 @@ from PyPDF2 import PdfReader
12
  from langchain.text_splitter import RecursiveCharacterTextSplitter
13
  from langchain.document_loaders import UnstructuredPDFLoader
14
  from langchain.embeddings import HuggingFaceBgeEmbeddings
15
- from langchain.vectorstores import FAISS
16
  from langchain.chat_models import ChatOpenAI
17
  from langchain.memory import ConversationBufferMemory
18
  from langchain.chains import ConversationalRetrievalChain
@@ -81,14 +81,9 @@ def get_vectorstore(text_chunks):
81
  A FAISS vector store containing the embeddings of the text chunks.
82
 
83
  """
84
- model = "sentence-transformers/all-mpnet-base-v2"
85
- encode_kwargs = {
86
- "normalize_embeddings": True
87
- } # set True to compute cosine similarity
88
- embeddings = HuggingFaceBgeEmbeddings(
89
- model_name=model, encode_kwargs=encode_kwargs, model_kwargs={"device": "cpu"}
90
- )
91
- vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
92
  return vectorstore
93
 
94
 
 
12
  from langchain.text_splitter import RecursiveCharacterTextSplitter
13
  from langchain.document_loaders import UnstructuredPDFLoader
14
  from langchain.embeddings import HuggingFaceBgeEmbeddings
15
+ from langchain.vectorstores import Chroma
16
  from langchain.chat_models import ChatOpenAI
17
  from langchain.memory import ConversationBufferMemory
18
  from langchain.chains import ConversationalRetrievalChain
 
81
  A FAISS vector store containing the embeddings of the text chunks.
82
 
83
  """
84
+ MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
85
+ hf_embeddings = HuggingFaceEmbeddings(model_name=MODEL_NAME)
86
+ vectorstore = Chroma.from_documents(texts, hf_embeddings, persist_directory="db")
 
 
 
 
 
87
  return vectorstore
88
 
89