mikemin027 commited on
Commit
c11820f
1 Parent(s): b6b3aee

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -20
app.py CHANGED
@@ -2,15 +2,14 @@ import gradio as gr
2
  from huggingface_hub import InferenceClient
3
  from llama_cpp import Llama
4
 
5
- # Initialize the Llama model
 
 
6
  llm = Llama.from_pretrained(
7
  repo_id="bartowski/Ministral-8B-Instruct-2410-GGUF",
8
  filename="Ministral-8B-Instruct-2410-Q4_K_M.gguf",
9
  )
10
 
11
- # Initialize the inference client
12
- client = InferenceClient(model="bartowski/Ministral-8B-Instruct-2410-GGUF")
13
-
14
  def respond(
15
  message,
16
  history: list[tuple[str, str]],
@@ -29,29 +28,24 @@ def respond(
29
 
30
  messages.append({"role": "user", "content": message})
31
 
32
- # Use llm for chat completion
33
- response = llm.create_chat_completion(
34
- messages=messages,
 
 
35
  max_tokens=max_tokens,
 
36
  temperature=temperature,
37
  top_p=top_p,
38
- stream=True,
39
- )
40
-
41
- for token in response:
42
- # Print the token to debug the structure
43
- print(token)
44
- # Adjust based on the actual structure of the response
45
- if 'choices' in token and len(token['choices']) > 0:
46
- content = token['choices'][0].get('delta', {}).get('content', '')
47
- yield content
48
- else:
49
- yield "No valid response received."
50
 
51
  demo = gr.ChatInterface(
52
  respond,
53
  additional_inputs=[
54
- gr.Textbox(value="You are a friendly, conversational chatbot who utilizes relevant information and emojis to build efficient conversations.", label="System message"),
55
  gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
56
  gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
57
  gr.Slider(
 
2
  from huggingface_hub import InferenceClient
3
  from llama_cpp import Llama
4
 
5
+ # Initialize the InferenceClient
6
+ client = InferenceClient()
7
+
8
  llm = Llama.from_pretrained(
9
  repo_id="bartowski/Ministral-8B-Instruct-2410-GGUF",
10
  filename="Ministral-8B-Instruct-2410-Q4_K_M.gguf",
11
  )
12
 
 
 
 
13
  def respond(
14
  message,
15
  history: list[tuple[str, str]],
 
28
 
29
  messages.append({"role": "user", "content": message})
30
 
31
+ response = ""
32
+
33
+ # Use the client to get the chat completion
34
+ for message in client.chat_completion(
35
+ messages,
36
  max_tokens=max_tokens,
37
+ stream=True,
38
  temperature=temperature,
39
  top_p=top_p,
40
+ ):
41
+ token = message['choices'][0]['delta']['content']
42
+ response += token
43
+ yield response
 
 
 
 
 
 
 
 
44
 
45
  demo = gr.ChatInterface(
46
  respond,
47
  additional_inputs=[
48
+ gr.Textbox(value="You are a friendly, conversational, helpful, and informative chatbot, designed to help users as best as possible. Responses should be quirky and fun to read, including the use of appropriate emojis in answers, wherever necesssary.", label="System message"),
49
  gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
50
  gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
51
  gr.Slider(