CaioXapelaum commited on
Commit
c464ed4
1 Parent(s): 2eb2cd2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -8
app.py CHANGED
@@ -13,14 +13,14 @@ llm = None
13
  llm_model = None
14
 
15
  hf_hub_download(
16
- repo_id="TheBloke/Open_Gpt4_8x7B_v0.2-GGUF",
17
- filename="open_gpt4_8x7b_v0.2.Q5_K_M.gguf",
18
  local_dir = "./models"
19
  )
20
 
21
 
22
  def get_messages_formatter_type(model_name):
23
- return MessagesFormatterType.ALPACA
24
 
25
  @spaces.GPU
26
  def respond(
@@ -36,13 +36,13 @@ def respond(
36
  ):
37
  global llm
38
  global llm_model
39
- model = "open_gpt4_8x7b_v0.2.Q5_K_M.gguf"
40
 
41
  chat_template = get_messages_formatter_type(model)
42
 
43
  if llm is None or llm_model != model:
44
  llm = Llama(
45
- model_path="models/open_gpt4_8x7b_v0.2.Q5_K_M.gguf",
46
  flash_attn=True,
47
  n_gpu_layers=81,
48
  n_batch=1024,
@@ -97,7 +97,7 @@ def respond(
97
  demo = gr.ChatInterface(
98
  fn=respond,
99
  additional_inputs=[
100
- gr.Textbox(value="You are a world-class AI system, capable of complex reasoning and reflection. Reason through the query inside <thinking> tags, and then provide your final response inside <output> tags. If you detect that you made a mistake in your reasoning at any point, correct yourself inside <reflection> tags.", label="System message"),
101
  gr.Slider(minimum=1, maximum=8192, value=2048, step=1, label="Max tokens"),
102
  gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
103
  gr.Slider(
@@ -139,8 +139,8 @@ demo = gr.ChatInterface(
139
  undo_btn="Undo",
140
  clear_btn="Clear",
141
  submit_btn="Send",
142
- title="OpenGPT4",
143
- description="Chat with *GPT-4* for free!",
144
  chatbot=gr.Chatbot(
145
  scale=1,
146
  likeable=False,
 
13
  llm_model = None
14
 
15
  hf_hub_download(
16
+ repo_id="bartowski/Llama-3.2-1B-Instruct-GGUF",
17
+ filename="Llama-3.2-1B-Instruct-Q5_K_M.gguf",
18
  local_dir = "./models"
19
  )
20
 
21
 
22
  def get_messages_formatter_type(model_name):
23
+ return MessagesFormatterType.LLAMA_3
24
 
25
  @spaces.GPU
26
  def respond(
 
36
  ):
37
  global llm
38
  global llm_model
39
+ model = "Llama-3.2-1B-Instruct-Q5_K_M.gguf"
40
 
41
  chat_template = get_messages_formatter_type(model)
42
 
43
  if llm is None or llm_model != model:
44
  llm = Llama(
45
+ model_path=f"models/{model}",
46
  flash_attn=True,
47
  n_gpu_layers=81,
48
  n_batch=1024,
 
97
  demo = gr.ChatInterface(
98
  fn=respond,
99
  additional_inputs=[
100
+ gr.Textbox(value="You are a helpful assistant.", label="System message"),
101
  gr.Slider(minimum=1, maximum=8192, value=2048, step=1, label="Max tokens"),
102
  gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
103
  gr.Slider(
 
139
  undo_btn="Undo",
140
  clear_btn="Clear",
141
  submit_btn="Send",
142
+ title="Llama 3 Lightning",
143
+ description="Chat with Llama 3 Lightning",
144
  chatbot=gr.Chatbot(
145
  scale=1,
146
  likeable=False,