awacke1 commited on
Commit
af8e1c9
β€’
1 Parent(s): e303946

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +196 -0
app.py ADDED
@@ -0,0 +1,196 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from huggingface_hub import InferenceClient
2
+ import gradio as gr
3
+
4
+ client = InferenceClient(
5
+ "mistralai/Mistral-7B-Instruct-v0.1"
6
+ )
7
+
8
+
9
+ def format_prompt(message, history):
10
+ prompt = "<s>"
11
+ for user_prompt, bot_response in history:
12
+ prompt += f"[INST] {user_prompt} [/INST]"
13
+ prompt += f" {bot_response}</s> "
14
+ prompt += f"[INST] {message} [/INST]"
15
+ return prompt
16
+
17
+ def generate(
18
+ prompt, history, temperature=0.9, max_new_tokens=256, top_p=0.95, repetition_penalty=1.0,
19
+ ):
20
+ temperature = float(temperature)
21
+ if temperature < 1e-2:
22
+ temperature = 1e-2
23
+ top_p = float(top_p)
24
+
25
+ generate_kwargs = dict(
26
+ temperature=temperature,
27
+ max_new_tokens=max_new_tokens,
28
+ top_p=top_p,
29
+ repetition_penalty=repetition_penalty,
30
+ do_sample=True,
31
+ seed=42,
32
+ )
33
+
34
+ formatted_prompt = format_prompt(prompt, history)
35
+
36
+ stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
37
+ output = ""
38
+
39
+ for response in stream:
40
+ output += response.token.text
41
+ yield output
42
+ return output
43
+
44
+
45
+ additional_inputs=[
46
+ gr.Slider(
47
+ label="Temperature",
48
+ value=0.9,
49
+ minimum=0.0,
50
+ maximum=1.0,
51
+ step=0.05,
52
+ interactive=True,
53
+ info="Higher values produce more diverse outputs",
54
+ ),
55
+ gr.Slider(
56
+ label="Max new tokens",
57
+ value=256,
58
+ minimum=0,
59
+ maximum=1048,
60
+ step=64,
61
+ interactive=True,
62
+ info="The maximum numbers of new tokens",
63
+ ),
64
+ gr.Slider(
65
+ label="Top-p (nucleus sampling)",
66
+ value=0.90,
67
+ minimum=0.0,
68
+ maximum=1,
69
+ step=0.05,
70
+ interactive=True,
71
+ info="Higher values sample more low-probability tokens",
72
+ ),
73
+ gr.Slider(
74
+ label="Repetition penalty",
75
+ value=1.2,
76
+ minimum=1.0,
77
+ maximum=2.0,
78
+ step=0.05,
79
+ interactive=True,
80
+ info="Penalize repeated tokens",
81
+ )
82
+ ]
83
+
84
+ css = """
85
+ #mkd {
86
+ height: 200px;
87
+ overflow: auto;
88
+ border: 1px solid #ccc;
89
+ }
90
+ """
91
+
92
+ with gr.Blocks(css=css) as demo:
93
+
94
+ gr.ChatInterface(
95
+ generate,
96
+ additional_inputs=additional_inputs,
97
+ examples = [
98
+ ["🎸 List top 3 songs by Everclear and also list top 3 songs from when they were top ten on the charts. For each song, list the song name and chords and lyrics as well as the artist. 🎀"],
99
+ ["🎡 List top 3 songs by Taylor Swift and also list top 3 songs from when they were top ten on the charts. For each song, list the song name and chords and lyrics as well as the artist. 🎢"],
100
+ ["πŸŽ™οΈ List top 3 songs by Adele and also list top 3 songs from when they were top ten on the charts. For each song, list the song name and chords and lyrics as well as the artist. 🎧"],
101
+ ["🎼 List top 3 songs by Bruno Mars and also list top 3 songs from when they were top ten on the charts. For each song, list the song name and chords and lyrics as well as the artist. 🎷"],
102
+ ["🎹 List top 3 songs by Lady Gaga and also list top 3 songs from when they were top ten on the charts. For each song, list the song name and chords and lyrics as well as the artist. 🎺"],
103
+ ["🎻 List top 3 songs by Ed Sheeran and also list top 3 songs from when they were top ten on the charts. For each song, list the song name and chords and lyrics as well as the artist. πŸ₯"],
104
+ ["🎀 List top 3 songs by Drake and also list top 3 songs from when they were top ten on the charts. For each song, list the song name and chords and lyrics as well as the artist. 🎢"],
105
+ ["🎧 List top 3 songs by Rihanna and also list top 3 songs from when they were top ten on the charts. For each song, list the song name and chords and lyrics as well as the artist. 🎡"],
106
+ ["🎷 List top 3 songs by Justin Bieber and also list top 3 songs from when they were top ten on the charts. For each song, list the song name and chords and lyrics as well as the artist. 🎼"],
107
+ ["🎢 List top 3 songs by BeyoncΓ© and also list top 3 songs from when they were top ten on the charts. For each song, list the song name and chords and lyrics as well as the artist. πŸŽ™οΈ"],
108
+ ["🎺 List top 3 songs by Katy Perry and also list top 3 songs from when they were top ten on the charts. For each song, list the song name and chords and lyrics as well as the artist. 🎹"],
109
+ ["πŸ₯ List top 3 songs by Eminem and also list top 3 songs from when they were top ten on the charts. For each song, list the song name and chords and lyrics as well as the artist. 🎻"],
110
+ ["🎀 List top 3 songs by Ariana Grande and also list top 3 songs from when they were top ten on the charts. For each song, list the song name and chords and lyrics as well as the artist. 🎧"],
111
+ ["🎢 List top 3 songs by Billie Eilish and also list top 3 songs from when they were top ten on the charts. For each song, list the song name and chords and lyrics as well as the artist. 🎡"]
112
+ ]
113
+ )
114
+ gr.HTML("""<h2>πŸ€– Mistral Chat - Gradio πŸ€–</h2>
115
+ In this demo, you can chat with <a href='https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1'>Mistral-7B-Instruct</a> model. πŸ’¬
116
+ Learn more about the model <a href='https://huggingface.co/docs/transformers/main/model_doc/mistral'>here</a>. πŸ“š
117
+ <h2>πŸ›  Model Features πŸ› </h2>
118
+ <ul>
119
+ <li>πŸͺŸ Sliding Window Attention with 128K tokens span</li>
120
+ <li>πŸš€ GQA for faster inference</li>
121
+ <li>πŸ“ Byte-fallback BPE tokenizer</li>
122
+ </ul>
123
+ <h3>πŸ“œ License πŸ“œ Released under Apache 2.0 License</h3>
124
+ <h3>πŸ“¦ Usage πŸ“¦</h3>
125
+ <ul>
126
+ <li>πŸ“š Available on Huggingface Hub</li>
127
+ <li>🐍 Python code snippets for easy setup</li>
128
+ <li>πŸ“ˆ Expected speedups with Flash Attention 2</li>
129
+ </ul>
130
+ """)
131
+
132
+ markdown="""
133
+ | Feature | Description | Byline |
134
+ |---------|-------------|--------|
135
+ | πŸͺŸ Sliding Window Attention with 128K tokens span | Enables the model to have a larger context for each token. | Increases model's understanding of context, resulting in more coherent and contextually relevant outputs. |
136
+ | πŸš€ GQA for faster inference | Graph Query Attention allows faster computation during inference. | Speeds up the model inference time without sacrificing too much on accuracy. |
137
+ | πŸ“ Byte-fallback BPE tokenizer | Uses Byte Pair Encoding but can fall back to byte-level encoding. | Allows the tokenizer to handle a wider variety of input text while keeping token size manageable. |
138
+ | πŸ“œ License | Released under Apache 2.0 License | Gives you a permissive free software license, allowing you freedom to use, modify, and distribute the code. |
139
+ | πŸ“¦ Usage | | |
140
+ | πŸ“š Available on Huggingface Hub | The model can be easily downloaded and set up from Huggingface. | Makes it easier to integrate the model into various projects. |
141
+ | 🐍 Python code snippets for easy setup | Provides Python code snippets for quick and easy model setup. | Facilitates rapid development and deployment, especially useful for prototyping. |
142
+ | πŸ“ˆ Expected speedups with Flash Attention 2 | Upcoming update expected to bring speed improvements. | Keep an eye out for this update to benefit from performance gains. |
143
+ # πŸ›  Model Features and More πŸ› 
144
+ ## Features
145
+ - πŸͺŸ Sliding Window Attention with 128K tokens span
146
+ - **Byline**: Increases model's understanding of context, resulting in more coherent and contextually relevant outputs.
147
+ - πŸš€ GQA for faster inference
148
+ - **Byline**: Speeds up the model inference time without sacrificing too much on accuracy.
149
+ - πŸ“ Byte-fallback BPE tokenizer
150
+ - **Byline**: Allows the tokenizer to handle a wider variety of input text while keeping token size manageable.
151
+ - πŸ“œ License: Released under Apache 2.0 License
152
+ - **Byline**: Gives you a permissive free software license, allowing you freedom to use, modify, and distribute the code.
153
+ ## Usage πŸ“¦
154
+ - πŸ“š Available on Huggingface Hub
155
+ - **Byline**: Makes it easier to integrate the model into various projects.
156
+ - 🐍 Python code snippets for easy setup
157
+ - **Byline**: Facilitates rapid development and deployment, especially useful for prototyping.
158
+ - πŸ“ˆ Expected speedups with Flash Attention 2
159
+ - **Byline**: Keep an eye out for this update to benefit from performance gains.
160
+ """
161
+ gr.Markdown(markdown)
162
+
163
+
164
+ def SpeechSynthesis(result):
165
+ documentHTML5='''
166
+ <!DOCTYPE html>
167
+ <html>
168
+ <head>
169
+ <title>Read It Aloud</title>
170
+ <script type="text/javascript">
171
+ function readAloud() {
172
+ const text = document.getElementById("textArea").value;
173
+ const speech = new SpeechSynthesisUtterance(text);
174
+ window.speechSynthesis.speak(speech);
175
+ }
176
+ </script>
177
+ </head>
178
+ <body>
179
+ <h1>πŸ”Š Read It Aloud</h1>
180
+ <textarea id="textArea" rows="10" cols="80">
181
+ '''
182
+ documentHTML5 = documentHTML5 + result
183
+ documentHTML5 = documentHTML5 + '''
184
+ </textarea>
185
+ <br>
186
+ <button onclick="readAloud()">πŸ”Š Read Aloud</button>
187
+ </body>
188
+ </html>
189
+ '''
190
+ gr.HTML(documentHTML5)
191
+ # components.html(documentHTML5, width=1280, height=1024)
192
+ #return result
193
+ SpeechSynthesis(markdown)
194
+
195
+
196
+ demo.queue().launch(debug=True)