Spaces:
Sleeping
Sleeping
import gradio as gr | |
from langchain_community.llms import LlamaCpp | |
from langchain.prompts import PromptTemplate | |
from langchain.chains import LLMChain | |
from langchain_core.callbacks import StreamingStdOutCallbackHandler | |
from langchain.retrievers import TFIDFRetriever | |
from langchain.chains import RetrievalQA | |
from langchain.memory import ConversationBufferMemory | |
callbacks = [StreamingStdOutCallbackHandler()] | |
print("creating ll started") | |
llm = LlamaCpp( | |
model_path="finbrov1.gguf", | |
temperature=0.75, | |
max_tokens=100, | |
top_p=4, | |
callback_manager=callbacks, | |
verbose=True, # Verbose is required to pass to the callback manager | |
) | |
print("creating ll ended") | |
def greet(question, model_type): | |
print(f"question is {question}") | |
if model_type == "With memory": | |
retriever = TFIDFRetriever.from_texts( | |
["Finatial AI"]) | |
template = """You are the Finiantial expert: | |
{history} | |
{context} | |
### Instruction: | |
{question} | |
### Input: | |
### Response: | |
""" | |
prompt1 = PromptTemplate( | |
input_variables=["history", "context", "question"], | |
template=template, | |
) | |
llm_chain_model = RetrievalQA.from_chain_type( | |
llm=llm, | |
chain_type='stuff', | |
retriever=retriever, | |
verbose=False, | |
chain_type_kwargs={ | |
"verbose": False, | |
"prompt": prompt1, | |
"memory": ConversationBufferMemory( | |
memory_key="history", | |
input_key="question"), | |
} | |
) | |
print("creating model created") | |
else: | |
template = """You are the Finiantial expert: | |
### Instruction: | |
{question} | |
### Input: | |
### Response: | |
""" | |
prompt = PromptTemplate(template=template, input_variables=["question"]) | |
llm_chain_model = LLMChain(prompt=prompt, llm=llm) | |
out_gen = llm_chain_model.run(question) | |
print(f"out is: {out_gen}") | |
return out_gen | |
demo = gr.Interface(fn=greet, inputs=["text", gr.Dropdown( | |
["With memory", "Without memory"], label="Memory status", info="With using memory, the output will be slow but strong" | |
),], outputs="text") | |
demo.launch(debug=True, share=True) | |
# import gradio as gr | |
# from langchain_community.llms import LlamaCpp | |
# from langchain.prompts import PromptTemplate | |
# from langchain.chains import LLMChain | |
# from langchain_core.callbacks import StreamingStdOutCallbackHandler | |
# from langchain.retrievers import TFIDFRetriever | |
# from langchain.chains import RetrievalQA | |
# from langchain.memory import ConversationBufferMemory | |
# from langchain_community.chat_models import ChatLlamaCpp | |
# callbacks = [StreamingStdOutCallbackHandler()] | |
# print("creating ll started") | |
# M_NAME = "taddeusb90_finbro-v0.1.0-dolphin-2.9-llama-3-8B-instruct-131k_adapt_basic_model_16bit.gguf" | |
# llm = LlamaCpp( | |
# model_path=M_NAME, | |
# n_batch=8, | |
# temperature=0.85, | |
# max_tokens=256, | |
# top_p=0.95, | |
# top_k = 10, | |
# callback_manager=callbacks, | |
# n_ctx=2048, | |
# verbose=True, # Verbose is required to pass to the callback manager | |
# ) | |
# # print("creating ll ended") | |
# def greet(question, model_type): | |
# print("prompt started ") | |
# print(f"question is {question}") | |
# template = """You are the Finiantial expert: | |
# ### Instruction: | |
# {question} | |
# ### Input: | |
# ### Response: | |
# """ | |
# print("test1") | |
# prompt = PromptTemplate(template=template, input_variables=["question"]) | |
# print("test2") | |
# llm_chain_model = LLMChain(prompt=prompt, llm=llm) | |
# print("test3") | |
# out_gen = llm_chain_model.run(question) | |
# print("test4") | |
# print(f"out is: {out_gen}") | |
# return out_gen | |
# demo = gr.Interface(fn=greet, inputs=["text", gr.Dropdown( | |
# ["Without memory", "With memory"], label="Memory status", info="With using memory, the output will be slow but strong" | |
# ),], outputs="text") | |
# demo.launch(debug=True, share=True) |