Spaces:
Sleeping
Sleeping
import gradio as gr | |
from langchain_community.llms import LlamaCpp | |
from langchain.prompts import PromptTemplate | |
from langchain.chains import LLMChain | |
from langchain_core.callbacks import StreamingStdOutCallbackHandler | |
from langchain.retrievers import TFIDFRetriever | |
from langchain.chains import RetrievalQA | |
from langchain.memory import ConversationBufferMemory | |
from langchain_community.chat_models import ChatLlamaCpp | |
callbacks = [StreamingStdOutCallbackHandler()] | |
print("creating ll started") | |
llm = ChatLlamaCpp( | |
model_path="finbro-v0.1.0-llama-3-8B-instruct-1m.gguf", | |
n_batch=8, | |
temperature=0.85, | |
max_tokens=256, | |
top_p=0.95, | |
top_k = 10, | |
callback_manager=callbacks, | |
n_ctx=2048, | |
verbose=True, # Verbose is required to pass to the callback manager | |
) | |
print("creating llm ended") | |
def greet(question, model_type): | |
print(f"question is {question}") | |
out_gen = "testsetestestetsetsets" | |
return out_gen | |
demo = gr.Interface(fn=greet, inputs=["text", gr.Dropdown( | |
["With memory", "Without memory"], label="Memory status", info="With using memory, the output will be slow but strong" | |
),], outputs="text") | |
demo.launch(debug=True, share=True) | |
# import gradio as gr | |
# from langchain_community.llms import LlamaCpp | |
# from langchain.prompts import PromptTemplate | |
# from langchain.chains import LLMChain | |
# from langchain_core.callbacks import StreamingStdOutCallbackHandler | |
# from langchain.retrievers import TFIDFRetriever | |
# from langchain.chains import RetrievalQA | |
# from langchain.memory import ConversationBufferMemory | |
# from langchain_community.chat_models import ChatLlamaCpp | |
# callbacks = [StreamingStdOutCallbackHandler()] | |
# print("creating ll started") | |
# M_NAME = "finbro-v0.1.0-llama-3-8B-instruct-1m.gguf" | |
# llm = ChatLlamaCpp( | |
# model_path=M_NAME, | |
# n_batch=8, | |
# temperature=0.85, | |
# max_tokens=256, | |
# top_p=0.95, | |
# top_k = 10, | |
# callback_manager=callbacks, | |
# n_ctx=2048, | |
# verbose=True, # Verbose is required to pass to the callback manager | |
# ) | |
# # print("creating ll ended") | |
# def greet(question, model_type): | |
# print("prompt started ") | |
# print(f"question is {question}") | |
# template = """You are the Finiantial expert: | |
# ### Instruction: | |
# {question} | |
# ### Input: | |
# ### Response: | |
# """ | |
# print("test1") | |
# prompt = PromptTemplate(template=template, input_variables=["question"]) | |
# print("test2") | |
# llm_chain_model = LLMChain(prompt=prompt, llm=llm) | |
# print("test3") | |
# out_gen = llm_chain_model.run(question) | |
# print("test4") | |
# print(f"out is: {out_gen}") | |
# return out_gen | |
# demo = gr.Interface(fn=greet, inputs=["text", gr.Dropdown( | |
# ["Without memory", "With memory"], label="Memory status", info="With using memory, the output will be slow but strong" | |
# ),], outputs="text") | |
# demo.launch(debug=True, share=True) |