File size: 4,175 Bytes
f28abbd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e7f0199
f28abbd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
import gradio as gr

from langchain_community.llms import LlamaCpp
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
from langchain_core.callbacks import StreamingStdOutCallbackHandler
from langchain.retrievers import TFIDFRetriever
from langchain.chains import RetrievalQA
from langchain.memory import ConversationBufferMemory


callbacks = [StreamingStdOutCallbackHandler()]
print("creating ll started")
llm = LlamaCpp(
    model_path="finbrov1.gguf",
    temperature=0.75,
    max_tokens=100,
    top_p=4,
    callback_manager=callbacks,
    verbose=True,  # Verbose is required to pass to the callback manager
)
print("creating ll ended")






def greet(question, model_type):
    print(f"question is {question}")
    if model_type == "With memory":
        retriever = TFIDFRetriever.from_texts(
            ["Finatial AI"])
        
        
        template = """You are the Finiantial expert:
        {history}
        {context}
        ### Instruction:
        {question}
        
        ### Input:
        
        
        ### Response:
        """
        
        prompt1 = PromptTemplate(
            input_variables=["history", "context", "question"],
            template=template,
        )
        
        llm_chain_model = RetrievalQA.from_chain_type(
            llm=llm,
            chain_type='stuff',
            retriever=retriever,
            verbose=False,
            chain_type_kwargs={
                "verbose": False,
                "prompt": prompt1,
                "memory": ConversationBufferMemory(
                    memory_key="history",
                    input_key="question"),
            }
        )
        print("creating model created")
    else:
        template = """You are the Finiantial expert:
        ### Instruction:
        {question}
        ### Input:
        ### Response:
        """
        
        prompt = PromptTemplate(template=template, input_variables=["question"])
        
        llm_chain_model = LLMChain(prompt=prompt, llm=llm)
    out_gen = llm_chain_model.run(question)  
    print(f"out is: {out_gen}")
    return out_gen

demo = gr.Interface(fn=greet, inputs=["text", gr.Dropdown(
            ["With memory", "Without memory"], label="Memory status", info="With using memory, the output will be slow but strong"
        ),], outputs="text")
demo.launch(debug=True, share=True)


# import gradio as gr

# from langchain_community.llms import LlamaCpp
# from langchain.prompts import PromptTemplate
# from langchain.chains import LLMChain
# from langchain_core.callbacks import StreamingStdOutCallbackHandler
# from langchain.retrievers import TFIDFRetriever
# from langchain.chains import RetrievalQA
# from langchain.memory import ConversationBufferMemory
# from langchain_community.chat_models import ChatLlamaCpp

# callbacks = [StreamingStdOutCallbackHandler()]
# print("creating ll started")
# M_NAME = "taddeusb90_finbro-v0.1.0-dolphin-2.9-llama-3-8B-instruct-131k_adapt_basic_model_16bit.gguf"
# llm = LlamaCpp(
#     model_path=M_NAME,
#     n_batch=8,
#     temperature=0.85,
#     max_tokens=256,
#     top_p=0.95,
#     top_k = 10,
#     callback_manager=callbacks,
#     n_ctx=2048,
#     verbose=True,  # Verbose is required to pass to the callback manager
# )
# # print("creating ll ended")






# def greet(question, model_type):
#     print("prompt started ")
#     print(f"question is {question}")
#     template = """You are the Finiantial expert:
    
#     ### Instruction:
#     {question}
    
#     ### Input:
    
    
#     ### Response:
#     """
#     print("test1")
#     prompt = PromptTemplate(template=template, input_variables=["question"])
#     print("test2")
#     llm_chain_model = LLMChain(prompt=prompt, llm=llm)
#     print("test3")
#     out_gen = llm_chain_model.run(question)  
#     print("test4")
#     print(f"out is: {out_gen}")
#     return out_gen

# demo = gr.Interface(fn=greet, inputs=["text", gr.Dropdown(
#             ["Without memory", "With memory"], label="Memory status", info="With using memory, the output will be slow but strong"
#         ),], outputs="text")
# demo.launch(debug=True, share=True)