Spaces:
Sleeping
Sleeping
dinhquangson
commited on
Commit
•
0c7d70e
1
Parent(s):
4f0218b
Update app.py
Browse files
app.py
CHANGED
@@ -17,8 +17,8 @@ from langchain.chat_models import ChatOpenAI
|
|
17 |
from langchain.memory import ConversationBufferMemory
|
18 |
from langchain.chains import ConversationalRetrievalChain
|
19 |
from htmlTemplates import css, bot_template, user_template
|
20 |
-
|
21 |
-
from llama_index.llms import LlamaCPP
|
22 |
|
23 |
|
24 |
|
@@ -110,27 +110,11 @@ def get_conversation_chain(vectorstore):
|
|
110 |
ConversationalRetrievalChain
|
111 |
A conversational retrieval chain for generating responses.
|
112 |
|
113 |
-
|
114 |
llm = HuggingFaceHub(
|
115 |
-
repo_id="
|
116 |
model_kwargs={"temperature": 0.5, "max_new_tokens": 1024, "max_length": 1048, "top_k": 3, "trust_remote_code": True, "torch_dtype": "auto"},
|
117 |
)
|
118 |
-
"""
|
119 |
-
llm = LlamaCPP(
|
120 |
-
model_url=None, # We'll load locally.
|
121 |
-
# Trying small version of an already small model
|
122 |
-
model_path='phi-2.Q4_K_M.gguf',
|
123 |
-
temperature=0.1,
|
124 |
-
max_new_tokens=512,
|
125 |
-
context_window=2048, # Phi-2 2K context window - this could be a limitation for RAG as it has to put the content into this context window
|
126 |
-
generate_kwargs={},
|
127 |
-
# set to at least 1 to use GPU
|
128 |
-
# This is small model and there's no indication of layers offloaded to the GPU
|
129 |
-
model_kwargs={"n_gpu_layers": 32},
|
130 |
-
messages_to_prompt=messages_to_prompt,
|
131 |
-
completion_to_prompt=completion_to_prompt,
|
132 |
-
verbose=True
|
133 |
-
)
|
134 |
|
135 |
# llm = ChatOpenAI(temperature=0, model="gpt-3.5-turbo-0613")
|
136 |
|
|
|
17 |
from langchain.memory import ConversationBufferMemory
|
18 |
from langchain.chains import ConversationalRetrievalChain
|
19 |
from htmlTemplates import css, bot_template, user_template
|
20 |
+
from langchain.llms import HuggingFaceHub
|
21 |
+
#from llama_index.llms import LlamaCPP
|
22 |
|
23 |
|
24 |
|
|
|
110 |
ConversationalRetrievalChain
|
111 |
A conversational retrieval chain for generating responses.
|
112 |
|
113 |
+
"""
|
114 |
llm = HuggingFaceHub(
|
115 |
+
repo_id="mlabonne/phixtral-2x2_8",
|
116 |
model_kwargs={"temperature": 0.5, "max_new_tokens": 1024, "max_length": 1048, "top_k": 3, "trust_remote_code": True, "torch_dtype": "auto"},
|
117 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
118 |
|
119 |
# llm = ChatOpenAI(temperature=0, model="gpt-3.5-turbo-0613")
|
120 |
|