import gradio as gr from langchain_community.llms import LlamaCpp from langchain.prompts import PromptTemplate from langchain.chains import LLMChain from langchain_core.callbacks import StreamingStdOutCallbackHandler from langchain.retrievers import TFIDFRetriever from langchain.chains import RetrievalQA from langchain.memory import ConversationBufferMemory from langchain_community.chat_models import ChatLlamaCpp callbacks = [StreamingStdOutCallbackHandler()] print("creating ll started") llm = ChatLlamaCpp( model_path="finbro-v0.1.0-llama-3-8B-instruct-1m.gguf", n_batch=8, temperature=0.85, max_tokens=256, top_p=0.95, top_k = 10, callback_manager=callbacks, n_ctx=2048, verbose=True, # Verbose is required to pass to the callback manager ) print("creating llm ended") def greet(question, model_type): print(f"question is {question}") out_gen = "testsetestestetsetsets" return out_gen demo = gr.Interface(fn=greet, inputs=["text", gr.Dropdown( ["With memory", "Without memory"], label="Memory status", info="With using memory, the output will be slow but strong" ),], outputs="text") demo.launch(debug=True, share=True) # import gradio as gr # from langchain_community.llms import LlamaCpp # from langchain.prompts import PromptTemplate # from langchain.chains import LLMChain # from langchain_core.callbacks import StreamingStdOutCallbackHandler # from langchain.retrievers import TFIDFRetriever # from langchain.chains import RetrievalQA # from langchain.memory import ConversationBufferMemory # from langchain_community.chat_models import ChatLlamaCpp # callbacks = [StreamingStdOutCallbackHandler()] # print("creating ll started") # M_NAME = "finbro-v0.1.0-llama-3-8B-instruct-1m.gguf" # llm = ChatLlamaCpp( # model_path=M_NAME, # n_batch=8, # temperature=0.85, # max_tokens=256, # top_p=0.95, # top_k = 10, # callback_manager=callbacks, # n_ctx=2048, # verbose=True, # Verbose is required to pass to the callback manager # ) # # print("creating ll ended") # def greet(question, model_type): # print("prompt started ") # print(f"question is {question}") # template = """You are the Finiantial expert: # ### Instruction: # {question} # ### Input: # ### Response: # """ # print("test1") # prompt = PromptTemplate(template=template, input_variables=["question"]) # print("test2") # llm_chain_model = LLMChain(prompt=prompt, llm=llm) # print("test3") # out_gen = llm_chain_model.run(question) # print("test4") # print(f"out is: {out_gen}") # return out_gen # demo = gr.Interface(fn=greet, inputs=["text", gr.Dropdown( # ["Without memory", "With memory"], label="Memory status", info="With using memory, the output will be slow but strong" # ),], outputs="text") # demo.launch(debug=True, share=True)