vericudebuget commited on
Commit
2cb9aa9
1 Parent(s): ce6faeb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -20
app.py CHANGED
@@ -16,7 +16,6 @@ def format_prompt(message, history):
16
  def generate(prompt, history, system_prompt, temperature=0.9, max_new_tokens=9048, top_p=0.95, repetition_penalty=1.0):
17
  temperature = max(float(temperature), 1e-2)
18
  top_p = float(top_p)
19
-
20
  generate_kwargs = dict(
21
  temperature=temperature,
22
  max_new_tokens=max_new_tokens,
@@ -32,15 +31,10 @@ def generate(prompt, history, system_prompt, temperature=0.9, max_new_tokens=904
32
  system_prompt = f"server log: ~This message was sent at {formatted_time}. The actual year is 2024.~"
33
 
34
  formatted_prompt = format_prompt(f"{system_prompt}, {prompt}", history)
35
-
36
- # Use a loading indicator while the model is generating the response
37
- with gr.Blocks().loading_indicator():
38
- stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
39
- output = ""
40
- for response in stream:
41
- output += response.token.text
42
- yield output
43
-
44
  return output
45
 
46
  additional_inputs = [
@@ -51,17 +45,10 @@ additional_inputs = [
51
  gr.Slider(label="Repetition penalty", value=1.2, minimum=1.0, maximum=2.0, step=0.05, interactive=True, info="Penalize repeated tokens")
52
  ]
53
 
54
- # Path to your custom CSS file
55
- css_path = "theme.css"
56
-
57
- chat_interface = gr.ChatInterface(
58
  fn=generate,
59
  chatbot=gr.Chatbot(show_label=True, show_share_button=False, show_copy_button=True, likeable=True, layout="panel"),
60
  additional_inputs=additional_inputs,
61
  title="ConvoLite",
62
- concurrency_limit=50, # Increase the concurrency limit
63
- )
64
-
65
- with gr.Blocks(css=f"file={css_path}", theme=gr.themes.Soft()) as demo:
66
- demo.add(chat_interface)
67
- demo.launch(show_api=False)
 
16
  def generate(prompt, history, system_prompt, temperature=0.9, max_new_tokens=9048, top_p=0.95, repetition_penalty=1.0):
17
  temperature = max(float(temperature), 1e-2)
18
  top_p = float(top_p)
 
19
  generate_kwargs = dict(
20
  temperature=temperature,
21
  max_new_tokens=max_new_tokens,
 
31
  system_prompt = f"server log: ~This message was sent at {formatted_time}. The actual year is 2024.~"
32
 
33
  formatted_prompt = format_prompt(f"{system_prompt}, {prompt}", history)
34
+ stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
35
+ output = ""
36
+ for response in stream:
37
+ output += response.token.text
 
 
 
 
 
38
  return output
39
 
40
  additional_inputs = [
 
45
  gr.Slider(label="Repetition penalty", value=1.2, minimum=1.0, maximum=2.0, step=0.05, interactive=True, info="Penalize repeated tokens")
46
  ]
47
 
48
+ gr.ChatInterface(
 
 
 
49
  fn=generate,
50
  chatbot=gr.Chatbot(show_label=True, show_share_button=False, show_copy_button=True, likeable=True, layout="panel"),
51
  additional_inputs=additional_inputs,
52
  title="ConvoLite",
53
+ concurrency_limit=20,
54
+ ).launch(show_api=False)