Llama-3-Lightning

Running on Zero

App Files Files Community

CaioXapelaum commited on 1 day ago

Commit

c464ed4

•

1 Parent(s): 2eb2cd2

Update app.py

Browse files

Files changed (1) hide show

app.py +8 -8

app.py CHANGED Viewed

@@ -13,14 +13,14 @@ llm = None
 llm_model = None
 hf_hub_download(
-    repo_id="TheBloke/Open_Gpt4_8x7B_v0.2-GGUF",
-    filename="open_gpt4_8x7b_v0.2.Q5_K_M.gguf",
     local_dir = "./models"
 )
 def get_messages_formatter_type(model_name):
-    return MessagesFormatterType.ALPACA
 @spaces.GPU
 def respond(
@@ -36,13 +36,13 @@ def respond(
 ):
     global llm
     global llm_model
-    model = "open_gpt4_8x7b_v0.2.Q5_K_M.gguf"
     chat_template = get_messages_formatter_type(model)
     if llm is None or llm_model != model:
         llm = Llama(
-            model_path="models/open_gpt4_8x7b_v0.2.Q5_K_M.gguf",
             flash_attn=True,
             n_gpu_layers=81,
             n_batch=1024,
@@ -97,7 +97,7 @@ def respond(
 demo = gr.ChatInterface(
     fn=respond,
     additional_inputs=[
-        gr.Textbox(value="You are a world-class AI system, capable of complex reasoning and reflection. Reason through the query inside <thinking> tags, and then provide your final response inside <output> tags. If you detect that you made a mistake in your reasoning at any point, correct yourself inside <reflection> tags.", label="System message"),
         gr.Slider(minimum=1, maximum=8192, value=2048, step=1, label="Max tokens"),
         gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
         gr.Slider(
@@ -139,8 +139,8 @@ demo = gr.ChatInterface(
     undo_btn="Undo",
     clear_btn="Clear",
     submit_btn="Send",
-    title="OpenGPT4",
-    description="Chat with *GPT-4* for free!",
     chatbot=gr.Chatbot(
         scale=1,
         likeable=False,

 llm_model = None
 hf_hub_download(
+    repo_id="bartowski/Llama-3.2-1B-Instruct-GGUF",
+    filename="Llama-3.2-1B-Instruct-Q5_K_M.gguf",
     local_dir = "./models"
 )
 def get_messages_formatter_type(model_name):
+    return MessagesFormatterType.LLAMA_3
 @spaces.GPU
 def respond(
 ):
     global llm
     global llm_model
+    model = "Llama-3.2-1B-Instruct-Q5_K_M.gguf"
     chat_template = get_messages_formatter_type(model)
     if llm is None or llm_model != model:
         llm = Llama(
+            model_path=f"models/{model}",
             flash_attn=True,
             n_gpu_layers=81,
             n_batch=1024,
 demo = gr.ChatInterface(
     fn=respond,
     additional_inputs=[
+        gr.Textbox(value="You are a helpful assistant.", label="System message"),
         gr.Slider(minimum=1, maximum=8192, value=2048, step=1, label="Max tokens"),
         gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
         gr.Slider(
     undo_btn="Undo",
     clear_btn="Clear",
     submit_btn="Send",
+    title="Llama 3 Lightning",
+    description="Chat with Llama 3 Lightning",
     chatbot=gr.Chatbot(
         scale=1,
         likeable=False,