Phi2-PDF-chat

Sleeping

App Files Files Community

dinhquangson commited on Feb 8

Commit

0c7d70e

•

1 Parent(s): 4f0218b

Update app.py

Browse files

Files changed (1) hide show

app.py +4 -20

app.py CHANGED Viewed

@@ -17,8 +17,8 @@ from langchain.chat_models import ChatOpenAI
 from langchain.memory import ConversationBufferMemory
 from langchain.chains import ConversationalRetrievalChain
 from htmlTemplates import css, bot_template, user_template
-#from langchain.llms import HuggingFaceHub
-from llama_index.llms import LlamaCPP
@@ -110,27 +110,11 @@ def get_conversation_chain(vectorstore):
     ConversationalRetrievalChain
         A conversational retrieval chain for generating responses.
     llm = HuggingFaceHub(
-        repo_id="mistralai/Mixtral-8x7B-Instruct-v0.1",
         model_kwargs={"temperature": 0.5, "max_new_tokens": 1024, "max_length": 1048, "top_k": 3, "trust_remote_code": True, "torch_dtype": "auto"},
     )
-    """
-    llm = LlamaCPP(
-        model_url=None,  # We'll load locally.
-        # Trying small version of an already small model
-        model_path='phi-2.Q4_K_M.gguf',
-        temperature=0.1,
-        max_new_tokens=512,
-        context_window=2048,  # Phi-2 2K context window - this could be a limitation for RAG as it has to put the content into this context window
-        generate_kwargs={},
-        # set to at least 1 to use GPU
-        # This is small model and there's no indication of layers offloaded to the GPU
-        model_kwargs={"n_gpu_layers": 32},
-        messages_to_prompt=messages_to_prompt,
-        completion_to_prompt=completion_to_prompt,
-        verbose=True
-    )
     # llm = ChatOpenAI(temperature=0, model="gpt-3.5-turbo-0613")

 from langchain.memory import ConversationBufferMemory
 from langchain.chains import ConversationalRetrievalChain
 from htmlTemplates import css, bot_template, user_template
+from langchain.llms import HuggingFaceHub
+#from llama_index.llms import LlamaCPP
     ConversationalRetrievalChain
         A conversational retrieval chain for generating responses.
+    """
     llm = HuggingFaceHub(
+        repo_id="mlabonne/phixtral-2x2_8",
         model_kwargs={"temperature": 0.5, "max_new_tokens": 1024, "max_length": 1048, "top_k": 3, "trust_remote_code": True, "torch_dtype": "auto"},
     )
     # llm = ChatOpenAI(temperature=0, model="gpt-3.5-turbo-0613")