vericudebuget commited on
Commit
c81a905
1 Parent(s): 7339212

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -32
app.py CHANGED
@@ -4,20 +4,24 @@ from pathlib import Path
4
  import datetime
5
 
6
  # Initialize the InferenceClient
7
- client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")
 
 
8
 
9
  def format_prompt(message, history, system_prompt):
10
- prompt = ""
11
  for user_prompt, bot_response in history:
12
  prompt += f"[INST] {user_prompt} [/INST]"
13
- prompt += f" {bot_response} "
14
  prompt += f"[INST] {message} [/INST]"
15
- # Add the empty system prompt
16
  prompt = system_prompt + prompt
17
  return prompt
18
 
19
- def generate(prompt, history, temperature=0.9, max_new_tokens=9048, top_p=0.95, repetition_penalty=1.0):
20
- system_prompt = ''' ''' # Empty system prompt
 
 
21
  temperature = float(temperature)
22
  if temperature < 1e-2:
23
  temperature = 1e-2
@@ -35,10 +39,6 @@ def generate(prompt, history, temperature=0.9, max_new_tokens=9048, top_p=0.95,
35
  now = datetime.datetime.now()
36
  formatted_time = now.strftime("%H.%M.%S, %B, %Y")
37
 
38
- # Load chat history from localStorage
39
- loaded_history = gr.javascript.call('loadChat')
40
- history = loaded_history + history
41
-
42
  formatted_prompt = format_prompt(f"{prompt}", history, system_prompt)
43
 
44
  stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
@@ -47,33 +47,28 @@ def generate(prompt, history, temperature=0.9, max_new_tokens=9048, top_p=0.95,
47
  for response in stream:
48
  output += response.token.text
49
 
50
- # Save the updated chat history to localStorage
51
- new_history = history + [(prompt, output)]
52
- gr.javascript.call('saveChat', [new_history])
53
-
54
- return output
55
 
56
  additional_inputs = [
 
57
  gr.Slider(label="Temperature", value=0.9, minimum=0.0, maximum=1.0, step=0.05, interactive=True, info="Higher values produce more diverse outputs"),
58
- gr.Slider(label="Max new tokens", value=9048, minimum=256, maximum=9048, step=64, interactive=True, info="The maximum numbers of new tokens"),
59
  gr.Slider(label="Top-p (nucleus sampling)", value=0.90, minimum=0.0, maximum=1, step=0.05, interactive=True, info="Higher values sample more low-probability tokens"),
60
- gr.Slider(label="Repetition penalty", value=1.2, minimum=1.0, maximum=2.0, step=0.05, interactive=True, info="Penalize repeated tokens")
61
  ]
62
 
63
  avatar_images = ("https://i.postimg.cc/pXjKKVXG/user-circle.png", "https://i.postimg.cc/qq04Yz93/CL3.png")
64
 
65
- with gr.Blocks() as demo:
66
- chatbot = gr.Chatbot(value=gr.javascript.call('loadChat'))
67
- gr.ChatInterface(
68
- fn=generate,
69
- chatbot=gr.Chatbot(show_label=True, show_share_button=False, show_copy_button=True, likeable=True, layout="panel", height="auto", avatar_images=avatar_images),
70
- additional_inputs=additional_inputs,
71
- title="ConvoLite",
72
- submit_btn="",
73
- retry_btn="Retry",
74
- undo_btn=" Undo",
75
- clear_btn="Clear (New chat)",
76
- stop_btn="Stop ▢",
77
- concurrency_limit=20,
78
- theme=gr.themes.Soft(primary_hue=gr.themes.colors.cyan),
79
- ).launch(show_api=False)
 
4
  import datetime
5
 
6
  # Initialize the InferenceClient
7
+ client = InferenceClient(
8
+ "mistralai/Mixtral-8x7B-Instruct-v0.1"
9
+ )
10
 
11
  def format_prompt(message, history, system_prompt):
12
+ prompt = "<s>"
13
  for user_prompt, bot_response in history:
14
  prompt += f"[INST] {user_prompt} [/INST]"
15
+ prompt += f" {bot_response}</s> "
16
  prompt += f"[INST] {message} [/INST]"
17
+ # Add the system prompt only once at the beginning
18
  prompt = system_prompt + prompt
19
  return prompt
20
 
21
+ def generate(
22
+ prompt, history, user_system_prompt, temperature=0.9, max_new_tokens=9048, top_p=0.95, repetition_penalty=1.0
23
+ ):
24
+ system_prompt = '''Instructions: Everything else is from the user. You are Milo, an AI assistant created by ConvoLite in 2024 (he/him). Be friendly and straigt to the point. Have Bible based values and principles. Make sure to follow the user request as best as possible as long as it is within these guidelines. Act as an expert in everything you do, provide advice and suggestions, and ask questions if neccesary to get a better understanding of what the user wants.'''
25
  temperature = float(temperature)
26
  if temperature < 1e-2:
27
  temperature = 1e-2
 
39
  now = datetime.datetime.now()
40
  formatted_time = now.strftime("%H.%M.%S, %B, %Y")
41
 
 
 
 
 
42
  formatted_prompt = format_prompt(f"{prompt}", history, system_prompt)
43
 
44
  stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
 
47
  for response in stream:
48
  output += response.token.text
49
 
50
+ yield output
 
 
 
 
51
 
52
  additional_inputs = [
53
+ gr.Textbox(label="System Prompt", max_lines=4, interactive=True),
54
  gr.Slider(label="Temperature", value=0.9, minimum=0.0, maximum=1.0, step=0.05, interactive=True, info="Higher values produce more diverse outputs"),
55
+ gr.Slider(label="Max new tokens", value=10480, minimum=256, maximum=10480, step=64, interactive=True, info="The maximum numbers of new tokens that the AI can generate in a single message"),
56
  gr.Slider(label="Top-p (nucleus sampling)", value=0.90, minimum=0.0, maximum=1, step=0.05, interactive=True, info="Higher values sample more low-probability tokens"),
57
+ gr.Slider(label="Repetition penalty", value=1.2, minimum=1.0, maximum=2.0, step=0.05, interactive=True, info="Penalize repeated tokens.")
58
  ]
59
 
60
  avatar_images = ("https://i.postimg.cc/pXjKKVXG/user-circle.png", "https://i.postimg.cc/qq04Yz93/CL3.png")
61
 
62
+ gr.ChatInterface(
63
+ fn=generate,
64
+ chatbot=gr.Chatbot(show_label=True, show_share_button=False, show_copy_button=True, likeable=True, layout="panel", height="auto", avatar_images=avatar_images),
65
+ additional_inputs=additional_inputs,
66
+ title="ConvoLite",
67
+ submit_btn="➢",
68
+ retry_btn="Retry",
69
+ undo_btn="↩ Undo",
70
+ clear_btn="Clear (New chat)",
71
+ stop_btn="Stop ",
72
+ concurrency_limit=20,
73
+ theme=gr.themes.Soft(primary_hue=gr.themes.colors.cyan),
74
+ ).launch(show_api=False)