Spaces:
Sleeping
Sleeping
File size: 4,175 Bytes
f28abbd e7f0199 f28abbd |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 |
import gradio as gr
from langchain_community.llms import LlamaCpp
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
from langchain_core.callbacks import StreamingStdOutCallbackHandler
from langchain.retrievers import TFIDFRetriever
from langchain.chains import RetrievalQA
from langchain.memory import ConversationBufferMemory
callbacks = [StreamingStdOutCallbackHandler()]
print("creating ll started")
llm = LlamaCpp(
model_path="finbrov1.gguf",
temperature=0.75,
max_tokens=100,
top_p=4,
callback_manager=callbacks,
verbose=True, # Verbose is required to pass to the callback manager
)
print("creating ll ended")
def greet(question, model_type):
print(f"question is {question}")
if model_type == "With memory":
retriever = TFIDFRetriever.from_texts(
["Finatial AI"])
template = """You are the Finiantial expert:
{history}
{context}
### Instruction:
{question}
### Input:
### Response:
"""
prompt1 = PromptTemplate(
input_variables=["history", "context", "question"],
template=template,
)
llm_chain_model = RetrievalQA.from_chain_type(
llm=llm,
chain_type='stuff',
retriever=retriever,
verbose=False,
chain_type_kwargs={
"verbose": False,
"prompt": prompt1,
"memory": ConversationBufferMemory(
memory_key="history",
input_key="question"),
}
)
print("creating model created")
else:
template = """You are the Finiantial expert:
### Instruction:
{question}
### Input:
### Response:
"""
prompt = PromptTemplate(template=template, input_variables=["question"])
llm_chain_model = LLMChain(prompt=prompt, llm=llm)
out_gen = llm_chain_model.run(question)
print(f"out is: {out_gen}")
return out_gen
demo = gr.Interface(fn=greet, inputs=["text", gr.Dropdown(
["With memory", "Without memory"], label="Memory status", info="With using memory, the output will be slow but strong"
),], outputs="text")
demo.launch(debug=True, share=True)
# import gradio as gr
# from langchain_community.llms import LlamaCpp
# from langchain.prompts import PromptTemplate
# from langchain.chains import LLMChain
# from langchain_core.callbacks import StreamingStdOutCallbackHandler
# from langchain.retrievers import TFIDFRetriever
# from langchain.chains import RetrievalQA
# from langchain.memory import ConversationBufferMemory
# from langchain_community.chat_models import ChatLlamaCpp
# callbacks = [StreamingStdOutCallbackHandler()]
# print("creating ll started")
# M_NAME = "taddeusb90_finbro-v0.1.0-dolphin-2.9-llama-3-8B-instruct-131k_adapt_basic_model_16bit.gguf"
# llm = LlamaCpp(
# model_path=M_NAME,
# n_batch=8,
# temperature=0.85,
# max_tokens=256,
# top_p=0.95,
# top_k = 10,
# callback_manager=callbacks,
# n_ctx=2048,
# verbose=True, # Verbose is required to pass to the callback manager
# )
# # print("creating ll ended")
# def greet(question, model_type):
# print("prompt started ")
# print(f"question is {question}")
# template = """You are the Finiantial expert:
# ### Instruction:
# {question}
# ### Input:
# ### Response:
# """
# print("test1")
# prompt = PromptTemplate(template=template, input_variables=["question"])
# print("test2")
# llm_chain_model = LLMChain(prompt=prompt, llm=llm)
# print("test3")
# out_gen = llm_chain_model.run(question)
# print("test4")
# print(f"out is: {out_gen}")
# return out_gen
# demo = gr.Interface(fn=greet, inputs=["text", gr.Dropdown(
# ["Without memory", "With memory"], label="Memory status", info="With using memory, the output will be slow but strong"
# ),], outputs="text")
# demo.launch(debug=True, share=True) |