# import gradio as gr # from langchain_community.llms import LlamaCpp # from langchain.prompts import PromptTemplate # from langchain.chains import LLMChain # from langchain_core.callbacks import StreamingStdOutCallbackHandler # from langchain.retrievers import TFIDFRetriever # from langchain.chains import RetrievalQA # from langchain.memory import ConversationBufferMemory # from langchain_community.chat_models import ChatLlamaCpp # callbacks = [StreamingStdOutCallbackHandler()] # print("creating ll started") # llm = ChatLlamaCpp( # model_path="finbro-v0.1.0-llama-3-8B-instruct-1m.gguf", # n_batch=8, # temperature=0.85, # max_tokens=256, # top_p=0.95, # top_k = 10, # callback_manager=callbacks, # n_ctx=2048, # verbose=True, # Verbose is required to pass to the callback manager # ) # print("creating llm ended") # def greet(question, model_type): # print(f"question is {question}") # if model_type == "With memory": # retriever = TFIDFRetriever.from_texts( # ["Finatial AI"]) # template = """You are the Finiantial expert: # {history} # {context} # ### Instruction: # {question} # ### Input: # ### Response: # """ # prompt1 = PromptTemplate( # input_variables=["history", "context", "question"], # template=template, # ) # llm_chain_model = RetrievalQA.from_chain_type( # llm=llm, # chain_type='stuff', # retriever=retriever, # verbose=False, # chain_type_kwargs={ # "verbose": False, # "prompt": prompt1, # "memory": ConversationBufferMemory( # memory_key="history", # input_key="question"), # } # ) # print("creating model created") # else: # template = """You are the Finiantial expert: # ### Instruction: # {question} # ### Input: # ### Response: # """ # prompt = PromptTemplate(template=template, input_variables=["question"]) # llm_chain_model = LLMChain(prompt=prompt, llm=llm) # out_gen = llm_chain_model.run(question) # print(f"out is: {out_gen}") # return out_gen # demo = gr.Interface(fn=greet, inputs=["text", gr.Dropdown( # ["With memory", "Without memory"], label="Memory status", info="With using memory, the output will be slow but strong" # ),], outputs="text") # demo.launch(debug=True, share=True) import gradio as gr from langchain_community.llms import LlamaCpp from langchain.prompts import PromptTemplate from langchain.chains import LLMChain from langchain_core.callbacks import StreamingStdOutCallbackHandler from langchain.retrievers import TFIDFRetriever from langchain.chains import RetrievalQA from langchain.memory import ConversationBufferMemory from langchain_community.chat_models import ChatLlamaCpp callbacks = [StreamingStdOutCallbackHandler()] print("creating ll started") M_NAME = "finbro-v0.1.0-llama-3-8B-instruct-1m.gguf" llm = ChatLlamaCpp( model_path=M_NAME, n_batch=8, temperature=0.85, max_tokens=256, top_p=0.95, top_k = 10, callback_manager=callbacks, n_ctx=2048, verbose=True, # Verbose is required to pass to the callback manager ) # print("creating ll ended") def greet(question, model_type): print("prompt started ") print(f"question is {question}") template = """You are the Finiantial expert: ### Instruction: {question} ### Input: ### Response: """ print("test1") prompt = PromptTemplate(template=template, input_variables=["question"]) print("test2") llm_chain_model = LLMChain(prompt=prompt, llm=llm) print("test3") out_gen = llm_chain_model.run(question) print("test4") print(f"out is: {out_gen}") return out_gen demo = gr.Interface(fn=greet, inputs=["text", gr.Dropdown( ["Without memory", "With memory"], label="Memory status", info="With using memory, the output will be slow but strong" ),], outputs="text") demo.launch(debug=True, share=True)