Spaces:

Danielrahmai1991
/

findemov3

Sleeping

App Files Files Community

findemov3 / app.py

Danielrahmai1991

Create app.py

f28abbd verified 2 months ago

raw

history blame

4.18 kB

	import gradio as gr

	from langchain_community.llms import LlamaCpp
	from langchain.prompts import PromptTemplate
	from langchain.chains import LLMChain
	from langchain_core.callbacks import StreamingStdOutCallbackHandler
	from langchain.retrievers import TFIDFRetriever
	from langchain.chains import RetrievalQA
	from langchain.memory import ConversationBufferMemory


	callbacks = [StreamingStdOutCallbackHandler()]
	print("creating ll started")
	llm = LlamaCpp(
	model_path="finbrov1.gguf",
	temperature=0.75,
	max_tokens=100,
	top_p=4,
	callback_manager=callbacks,
	verbose=True, # Verbose is required to pass to the callback manager
	)
	# print("creating ll ended")






	def greet(question, model_type):
	print(f"question is {question}")
	if model_type == "With memory":
	retriever = TFIDFRetriever.from_texts(
	["Finatial AI"])


	template = """You are the Finiantial expert:
	{history}
	{context}
	### Instruction:
	{question}

	### Input:


	### Response:
	"""

	prompt1 = PromptTemplate(
	input_variables=["history", "context", "question"],
	template=template,
	)

	llm_chain_model = RetrievalQA.from_chain_type(
	llm=llm,
	chain_type='stuff',
	retriever=retriever,
	verbose=False,
	chain_type_kwargs={
	"verbose": False,
	"prompt": prompt1,
	"memory": ConversationBufferMemory(
	memory_key="history",
	input_key="question"),
	}
	)
	print("creating model created")
	else:
	template = """You are the Finiantial expert:
	### Instruction:
	{question}
	### Input:
	### Response:
	"""

	prompt = PromptTemplate(template=template, input_variables=["question"])

	llm_chain_model = LLMChain(prompt=prompt, llm=llm)
	out_gen = llm_chain_model.run(question)
	print(f"out is: {out_gen}")
	return out_gen

	demo = gr.Interface(fn=greet, inputs=["text", gr.Dropdown(
	["With memory", "Without memory"], label="Memory status", info="With using memory, the output will be slow but strong"
	),], outputs="text")
	demo.launch(debug=True, share=True)


	# import gradio as gr

	# from langchain_community.llms import LlamaCpp
	# from langchain.prompts import PromptTemplate
	# from langchain.chains import LLMChain
	# from langchain_core.callbacks import StreamingStdOutCallbackHandler
	# from langchain.retrievers import TFIDFRetriever
	# from langchain.chains import RetrievalQA
	# from langchain.memory import ConversationBufferMemory
	# from langchain_community.chat_models import ChatLlamaCpp

	# callbacks = [StreamingStdOutCallbackHandler()]
	# print("creating ll started")
	# M_NAME = "taddeusb90_finbro-v0.1.0-dolphin-2.9-llama-3-8B-instruct-131k_adapt_basic_model_16bit.gguf"
	# llm = LlamaCpp(
	# model_path=M_NAME,
	# n_batch=8,
	# temperature=0.85,
	# max_tokens=256,
	# top_p=0.95,
	# top_k = 10,
	# callback_manager=callbacks,
	# n_ctx=2048,
	# verbose=True, # Verbose is required to pass to the callback manager
	# )
	# # print("creating ll ended")






	# def greet(question, model_type):
	# print("prompt started ")
	# print(f"question is {question}")
	# template = """You are the Finiantial expert:

	# ### Instruction:
	# {question}

	# ### Input:


	# ### Response:
	# """
	# print("test1")
	# prompt = PromptTemplate(template=template, input_variables=["question"])
	# print("test2")
	# llm_chain_model = LLMChain(prompt=prompt, llm=llm)
	# print("test3")
	# out_gen = llm_chain_model.run(question)
	# print("test4")
	# print(f"out is: {out_gen}")
	# return out_gen

	# demo = gr.Interface(fn=greet, inputs=["text", gr.Dropdown(
	# ["Without memory", "With memory"], label="Memory status", info="With using memory, the output will be slow but strong"
	# ),], outputs="text")
	# demo.launch(debug=True, share=True)