Spaces:

mikemin027
/

Ministral-8B-Instruct-2410-GGUF

Running

mikemin027 commited on 15 days ago

Commit

3eee526

•

1 Parent(s): b4ed75c

Attempt at fixing Client NameError #1

Files changed (1) hide show

app.py CHANGED Viewed

@@ -2,19 +2,14 @@ import gradio as gr
 from huggingface_hub import InferenceClient
 from llama_cpp import Llama
 llm = Llama.from_pretrained(
-	repo_id="bartowski/Ministral-8B-Instruct-2410-GGUF",
-	filename="Ministral-8B-Instruct-2410-Q4_K_M.gguf",
-)
-llm.create_chat_completion(
-	messages = [
-		{
-			"role": "user",
-			"content": "What is the capital of France?"
-		}
-	]
 )
 def respond(
     message,
@@ -34,25 +29,22 @@ def respond(
     messages.append({"role": "user", "content": message})
-    response = ""
-    for message in client.chat_completion(
-        messages,
         max_tokens=max_tokens,
-        stream=True,
         temperature=temperature,
         top_p=top_p,
-    ):
-        token = message.choices[0].delta.content
-        response += token
-        yield response
 demo = gr.ChatInterface(
     respond,
     additional_inputs=[
-        gr.Textbox(value="You are a friendly, conversational chatbot who utitilizes relevant information and emojis to build efficient conversations.", label="System message"),
         gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
         gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
         gr.Slider(
@@ -65,6 +57,5 @@ demo = gr.ChatInterface(
     ],
 )
 if __name__ == "__main__":
-    demo.launch()

 from huggingface_hub import InferenceClient
 from llama_cpp import Llama
+# Initialize the Llama model
 llm = Llama.from_pretrained(
+    repo_id="bartowski/Ministral-8B-Instruct-2410-GGUF",
+    filename="Ministral-8B-Instruct-2410-Q4_K_M.gguf",
 )
+# Initialize the inference client
+client = InferenceClient(model="bartowski/Ministral-8B-Instruct-2410-GGUF")
 def respond(
     message,
     messages.append({"role": "user", "content": message})
+    # Use llm for chat completion
+    response = llm.create_chat_completion(
+        messages=messages,
         max_tokens=max_tokens,
         temperature=temperature,
         top_p=top_p,
+        stream=True,
+    )
+    for token in response:
+        yield token['choices'][0]['delta']['content']
 demo = gr.ChatInterface(
     respond,
     additional_inputs=[
+        gr.Textbox(value="You are a friendly, conversational chatbot who utilizes relevant information and emojis to build efficient conversations.", label="System message"),
         gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
         gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
         gr.Slider(
     ],
 )
 if __name__ == "__main__":
+    demo.launch()