Mostafa Shahin commited on
Commit
6403fc5
1 Parent(s): 6837b6d

Plot prob out of softmax instead of logits

Browse files
Files changed (1) hide show
  1. app.py +11 -7
app.py CHANGED
@@ -10,7 +10,7 @@ import matplotlib.pyplot as plt
10
  from scipy.io import wavfile
11
  from scipy.signal import spectrogram
12
  import numpy as np
13
-
14
 
15
  engine = transcriber.transcribe_SA(model_path='models/SA',verbose=0)
16
  phonemizer = Phonemize.phonemization()
@@ -109,17 +109,21 @@ def create_spectrogram_with_att(wav_file, att_contour, att ):
109
 
110
  ax_att = ax.twinx()
111
  # Plot the att contour
112
- time_att = np.arange(0, len(att_contour) * 0.02, 0.02) # Assuming pitch_contour is sampled every 20 ms
 
113
  ax_att.plot(time_att, att_contour, color='blue', label=f'{att} Contour')
 
114
  ax_att.legend()
115
 
116
  return fig
117
 
118
 
119
  def plot_contour(audio_file, att):
120
- indx = engine.processor.tokenizer.convert_tokens_to_ids([f'p_{att}'])
121
- att_contour = engine.logits.squeeze()[:,indx]
122
- att_contour = scale_vector(att_contour, 0, 6000)
 
 
123
  fig = create_spectrogram_with_att(audio_file, att_contour, att)
124
  return fig
125
 
@@ -140,7 +144,7 @@ with gr.Blocks() as gui:
140
  get_phoneme.click(fn=phonemize_prompt, inputs=[prompt, is_ipa], outputs=prompt_phonemes)
141
 
142
  record_audio = gr.Audio(sources=["microphone","upload"], type="filepath")
143
- att_list = gr.Dropdown(label="Select Attributes", choices=Attributes, value=['vowel', 'voiced', 'consonant'] ,multiselect=True)
144
  process = gr.Button("Process Audio")
145
 
146
  recognition = gr.HTML(label='Output')
@@ -179,7 +183,7 @@ with gr.Blocks() as gui:
179
  assess.click(fn=get_assessment, inputs= [prompt_phonemes], outputs=diff)
180
 
181
  with gr.Tab("Analysis"):
182
- selected_att = gr.Dropdown( Attributes, label="Select an Attribute to plot", value='voiced', interactive=True)
183
  do_plot = gr.Button('Plot')
184
  plot_block = gr.Plot(label='Spectrogram with Attribute Contour')
185
  do_plot.click(plot_contour, inputs=[record_audio,selected_att], outputs=plot_block)
 
10
  from scipy.io import wavfile
11
  from scipy.signal import spectrogram
12
  import numpy as np
13
+ from torch import nn
14
 
15
  engine = transcriber.transcribe_SA(model_path='models/SA',verbose=0)
16
  phonemizer = Phonemize.phonemization()
 
109
 
110
  ax_att = ax.twinx()
111
  # Plot the att contour
112
+ x_points = att_contour.shape[0]
113
+ time_att = np.arange(0, x_points * 0.02, 0.02)[:x_points]
114
  ax_att.plot(time_att, att_contour, color='blue', label=f'{att} Contour')
115
+ ax_att.set_ylim(0,1)
116
  ax_att.legend()
117
 
118
  return fig
119
 
120
 
121
  def plot_contour(audio_file, att):
122
+ indx_n = engine.processor.tokenizer.convert_tokens_to_ids([f'n_{att}'])[0]
123
+ indx_p = engine.processor.tokenizer.convert_tokens_to_ids([f'p_{att}'])[0]
124
+ index_all = [engine.processor.tokenizer.pad_token_id, indx_n, indx_p]
125
+ prob = nn.functional.softmax(engine.logits.squeeze()[:,index_all], dim=-1)
126
+ att_contour = prob[:,-1]
127
  fig = create_spectrogram_with_att(audio_file, att_contour, att)
128
  return fig
129
 
 
144
  get_phoneme.click(fn=phonemize_prompt, inputs=[prompt, is_ipa], outputs=prompt_phonemes)
145
 
146
  record_audio = gr.Audio(sources=["microphone","upload"], type="filepath")
147
+ att_list = gr.Dropdown(label="Select Attributes", choices=sorted(Attributes), value=['vowel', 'voiced', 'consonant'] ,multiselect=True)
148
  process = gr.Button("Process Audio")
149
 
150
  recognition = gr.HTML(label='Output')
 
183
  assess.click(fn=get_assessment, inputs= [prompt_phonemes], outputs=diff)
184
 
185
  with gr.Tab("Analysis"):
186
+ selected_att = gr.Dropdown( sorted(Attributes), label="Select an Attribute to plot", value='voiced', interactive=True)
187
  do_plot = gr.Button('Plot')
188
  plot_block = gr.Plot(label='Spectrogram with Attribute Contour')
189
  do_plot.click(plot_contour, inputs=[record_audio,selected_att], outputs=plot_block)