Spaces:

Danielrahmai1991
/

findemov3

Sleeping

App Files Files Community

findemov3 / app.py

Danielrahmai1991

Update app.py

2732655 verified 2 months ago

raw

history blame

2.94 kB

	import gradio as gr

	from langchain_community.llms import LlamaCpp
	from langchain.prompts import PromptTemplate
	from langchain.chains import LLMChain
	from langchain_core.callbacks import StreamingStdOutCallbackHandler
	from langchain.retrievers import TFIDFRetriever
	from langchain.chains import RetrievalQA
	from langchain.memory import ConversationBufferMemory
	from langchain_community.chat_models import ChatLlamaCpp


	callbacks = [StreamingStdOutCallbackHandler()]
	print("creating ll started")
	llm = ChatLlamaCpp(
	model_path="finbro-v0.1.0-llama-3-8B-instruct-1m.gguf",
	n_batch=8,
	temperature=0.85,
	max_tokens=256,
	top_p=0.95,
	top_k = 10,
	callback_manager=callbacks,
	n_ctx=2048,
	verbose=True, # Verbose is required to pass to the callback manager
	)
	print("creating llm ended")






	def greet(question, model_type):
	print(f"question is {question}")
	out_gen = "testsetestestetsetsets"
	return out_gen

	demo = gr.Interface(fn=greet, inputs=["text", gr.Dropdown(
	["With memory", "Without memory"], label="Memory status", info="With using memory, the output will be slow but strong"
	),], outputs="text")
	demo.launch(debug=True, share=True)


	# import gradio as gr

	# from langchain_community.llms import LlamaCpp
	# from langchain.prompts import PromptTemplate
	# from langchain.chains import LLMChain
	# from langchain_core.callbacks import StreamingStdOutCallbackHandler
	# from langchain.retrievers import TFIDFRetriever
	# from langchain.chains import RetrievalQA
	# from langchain.memory import ConversationBufferMemory
	# from langchain_community.chat_models import ChatLlamaCpp

	# callbacks = [StreamingStdOutCallbackHandler()]
	# print("creating ll started")
	# M_NAME = "finbro-v0.1.0-llama-3-8B-instruct-1m.gguf"
	# llm = ChatLlamaCpp(
	# model_path=M_NAME,
	# n_batch=8,
	# temperature=0.85,
	# max_tokens=256,
	# top_p=0.95,
	# top_k = 10,
	# callback_manager=callbacks,
	# n_ctx=2048,
	# verbose=True, # Verbose is required to pass to the callback manager
	# )
	# # print("creating ll ended")






	# def greet(question, model_type):
	# print("prompt started ")
	# print(f"question is {question}")
	# template = """You are the Finiantial expert:

	# ### Instruction:
	# {question}

	# ### Input:


	# ### Response:
	# """
	# print("test1")
	# prompt = PromptTemplate(template=template, input_variables=["question"])
	# print("test2")
	# llm_chain_model = LLMChain(prompt=prompt, llm=llm)
	# print("test3")
	# out_gen = llm_chain_model.run(question)
	# print("test4")
	# print(f"out is: {out_gen}")
	# return out_gen

	# demo = gr.Interface(fn=greet, inputs=["text", gr.Dropdown(
	# ["Without memory", "With memory"], label="Memory status", info="With using memory, the output will be slow but strong"
	# ),], outputs="text")
	# demo.launch(debug=True, share=True)