Spaces:
Sleeping
Sleeping
Mostafa Shahin
commited on
Commit
•
6403fc5
1
Parent(s):
6837b6d
Plot prob out of softmax instead of logits
Browse files
app.py
CHANGED
@@ -10,7 +10,7 @@ import matplotlib.pyplot as plt
|
|
10 |
from scipy.io import wavfile
|
11 |
from scipy.signal import spectrogram
|
12 |
import numpy as np
|
13 |
-
|
14 |
|
15 |
engine = transcriber.transcribe_SA(model_path='models/SA',verbose=0)
|
16 |
phonemizer = Phonemize.phonemization()
|
@@ -109,17 +109,21 @@ def create_spectrogram_with_att(wav_file, att_contour, att ):
|
|
109 |
|
110 |
ax_att = ax.twinx()
|
111 |
# Plot the att contour
|
112 |
-
|
|
|
113 |
ax_att.plot(time_att, att_contour, color='blue', label=f'{att} Contour')
|
|
|
114 |
ax_att.legend()
|
115 |
|
116 |
return fig
|
117 |
|
118 |
|
119 |
def plot_contour(audio_file, att):
|
120 |
-
|
121 |
-
|
122 |
-
|
|
|
|
|
123 |
fig = create_spectrogram_with_att(audio_file, att_contour, att)
|
124 |
return fig
|
125 |
|
@@ -140,7 +144,7 @@ with gr.Blocks() as gui:
|
|
140 |
get_phoneme.click(fn=phonemize_prompt, inputs=[prompt, is_ipa], outputs=prompt_phonemes)
|
141 |
|
142 |
record_audio = gr.Audio(sources=["microphone","upload"], type="filepath")
|
143 |
-
att_list = gr.Dropdown(label="Select Attributes", choices=Attributes, value=['vowel', 'voiced', 'consonant'] ,multiselect=True)
|
144 |
process = gr.Button("Process Audio")
|
145 |
|
146 |
recognition = gr.HTML(label='Output')
|
@@ -179,7 +183,7 @@ with gr.Blocks() as gui:
|
|
179 |
assess.click(fn=get_assessment, inputs= [prompt_phonemes], outputs=diff)
|
180 |
|
181 |
with gr.Tab("Analysis"):
|
182 |
-
selected_att = gr.Dropdown( Attributes, label="Select an Attribute to plot", value='voiced', interactive=True)
|
183 |
do_plot = gr.Button('Plot')
|
184 |
plot_block = gr.Plot(label='Spectrogram with Attribute Contour')
|
185 |
do_plot.click(plot_contour, inputs=[record_audio,selected_att], outputs=plot_block)
|
|
|
10 |
from scipy.io import wavfile
|
11 |
from scipy.signal import spectrogram
|
12 |
import numpy as np
|
13 |
+
from torch import nn
|
14 |
|
15 |
engine = transcriber.transcribe_SA(model_path='models/SA',verbose=0)
|
16 |
phonemizer = Phonemize.phonemization()
|
|
|
109 |
|
110 |
ax_att = ax.twinx()
|
111 |
# Plot the att contour
|
112 |
+
x_points = att_contour.shape[0]
|
113 |
+
time_att = np.arange(0, x_points * 0.02, 0.02)[:x_points]
|
114 |
ax_att.plot(time_att, att_contour, color='blue', label=f'{att} Contour')
|
115 |
+
ax_att.set_ylim(0,1)
|
116 |
ax_att.legend()
|
117 |
|
118 |
return fig
|
119 |
|
120 |
|
121 |
def plot_contour(audio_file, att):
|
122 |
+
indx_n = engine.processor.tokenizer.convert_tokens_to_ids([f'n_{att}'])[0]
|
123 |
+
indx_p = engine.processor.tokenizer.convert_tokens_to_ids([f'p_{att}'])[0]
|
124 |
+
index_all = [engine.processor.tokenizer.pad_token_id, indx_n, indx_p]
|
125 |
+
prob = nn.functional.softmax(engine.logits.squeeze()[:,index_all], dim=-1)
|
126 |
+
att_contour = prob[:,-1]
|
127 |
fig = create_spectrogram_with_att(audio_file, att_contour, att)
|
128 |
return fig
|
129 |
|
|
|
144 |
get_phoneme.click(fn=phonemize_prompt, inputs=[prompt, is_ipa], outputs=prompt_phonemes)
|
145 |
|
146 |
record_audio = gr.Audio(sources=["microphone","upload"], type="filepath")
|
147 |
+
att_list = gr.Dropdown(label="Select Attributes", choices=sorted(Attributes), value=['vowel', 'voiced', 'consonant'] ,multiselect=True)
|
148 |
process = gr.Button("Process Audio")
|
149 |
|
150 |
recognition = gr.HTML(label='Output')
|
|
|
183 |
assess.click(fn=get_assessment, inputs= [prompt_phonemes], outputs=diff)
|
184 |
|
185 |
with gr.Tab("Analysis"):
|
186 |
+
selected_att = gr.Dropdown( sorted(Attributes), label="Select an Attribute to plot", value='voiced', interactive=True)
|
187 |
do_plot = gr.Button('Plot')
|
188 |
plot_block = gr.Plot(label='Spectrogram with Attribute Contour')
|
189 |
do_plot.click(plot_contour, inputs=[record_audio,selected_att], outputs=plot_block)
|