import gradio as gr from langchain_community.llms import LlamaCpp from langchain.prompts import PromptTemplate from langchain.chains import LLMChain from langchain_core.callbacks import StreamingStdOutCallbackHandler from langchain.retrievers import TFIDFRetriever from langchain.chains import RetrievalQA from langchain.memory import ConversationBufferMemory callbacks = [StreamingStdOutCallbackHandler()] print("creating ll started") llm = LlamaCpp( model_path="finbrov1.gguf", temperature=0.75, max_tokens=100, top_p=4, callback_manager=callbacks, verbose=True, # Verbose is required to pass to the callback manager ) print("creating ll ended") def greet(question, model_type): print(f"question is {question}") if model_type == "With memory": retriever = TFIDFRetriever.from_texts( ["Finatial AI"]) template = """You are the Finiantial expert: {history} {context} ### Instruction: {question} ### Input: ### Response: """ prompt1 = PromptTemplate( input_variables=["history", "context", "question"], template=template, ) llm_chain_model = RetrievalQA.from_chain_type( llm=llm, chain_type='stuff', retriever=retriever, verbose=False, chain_type_kwargs={ "verbose": False, "prompt": prompt1, "memory": ConversationBufferMemory( memory_key="history", input_key="question"), } ) print("creating model created") else: template = """You are the Finiantial expert: ### Instruction: {question} ### Input: ### Response: """ prompt = PromptTemplate(template=template, input_variables=["question"]) llm_chain_model = LLMChain(prompt=prompt, llm=llm) out_gen = llm_chain_model.run(question) print(f"out is: {out_gen}") return out_gen demo = gr.Interface(fn=greet, inputs=["text", gr.Dropdown( ["With memory", "Without memory"], label="Memory status", info="With using memory, the output will be slow but strong" ),], outputs="text") demo.launch(debug=True, share=True) # import gradio as gr # from langchain_community.llms import LlamaCpp # from langchain.prompts import PromptTemplate # from langchain.chains import LLMChain # from langchain_core.callbacks import StreamingStdOutCallbackHandler # from langchain.retrievers import TFIDFRetriever # from langchain.chains import RetrievalQA # from langchain.memory import ConversationBufferMemory # from langchain_community.chat_models import ChatLlamaCpp # callbacks = [StreamingStdOutCallbackHandler()] # print("creating ll started") # M_NAME = "taddeusb90_finbro-v0.1.0-dolphin-2.9-llama-3-8B-instruct-131k_adapt_basic_model_16bit.gguf" # llm = LlamaCpp( # model_path=M_NAME, # n_batch=8, # temperature=0.85, # max_tokens=256, # top_p=0.95, # top_k = 10, # callback_manager=callbacks, # n_ctx=2048, # verbose=True, # Verbose is required to pass to the callback manager # ) # # print("creating ll ended") # def greet(question, model_type): # print("prompt started ") # print(f"question is {question}") # template = """You are the Finiantial expert: # ### Instruction: # {question} # ### Input: # ### Response: # """ # print("test1") # prompt = PromptTemplate(template=template, input_variables=["question"]) # print("test2") # llm_chain_model = LLMChain(prompt=prompt, llm=llm) # print("test3") # out_gen = llm_chain_model.run(question) # print("test4") # print(f"out is: {out_gen}") # return out_gen # demo = gr.Interface(fn=greet, inputs=["text", gr.Dropdown( # ["Without memory", "With memory"], label="Memory status", info="With using memory, the output will be slow but strong" # ),], outputs="text") # demo.launch(debug=True, share=True)