Spaces:

mostafaashahin
/

Phone-aid

Sleeping

App Files Files Community

Mostafa Shahin commited on Apr 1

Commit

6403fc5

•

1 Parent(s): 6837b6d

Plot prob out of softmax instead of logits

Browse files

Files changed (1) hide show

app.py +11 -7

app.py CHANGED Viewed

@@ -10,7 +10,7 @@ import matplotlib.pyplot as plt
 from scipy.io import wavfile
 from scipy.signal import spectrogram
 import numpy as np
 engine = transcriber.transcribe_SA(model_path='models/SA',verbose=0)
 phonemizer = Phonemize.phonemization()
@@ -109,17 +109,21 @@ def create_spectrogram_with_att(wav_file, att_contour, att ):
     ax_att = ax.twinx()
     # Plot the att contour
-    time_att = np.arange(0, len(att_contour) * 0.02, 0.02)  # Assuming pitch_contour is sampled every 20 ms
     ax_att.plot(time_att, att_contour, color='blue', label=f'{att} Contour')
     ax_att.legend()
     return fig
 def plot_contour(audio_file, att):
-    indx = engine.processor.tokenizer.convert_tokens_to_ids([f'p_{att}'])
-    att_contour = engine.logits.squeeze()[:,indx]
-    att_contour = scale_vector(att_contour, 0, 6000)
     fig = create_spectrogram_with_att(audio_file, att_contour, att)
     return fig
@@ -140,7 +144,7 @@ with gr.Blocks() as gui:
         get_phoneme.click(fn=phonemize_prompt, inputs=[prompt, is_ipa], outputs=prompt_phonemes)
         record_audio = gr.Audio(sources=["microphone","upload"], type="filepath")
-        att_list = gr.Dropdown(label="Select Attributes", choices=Attributes, value=['vowel', 'voiced', 'consonant'] ,multiselect=True)
         process = gr.Button("Process Audio")
         recognition = gr.HTML(label='Output')
@@ -179,7 +183,7 @@ with gr.Blocks() as gui:
         assess.click(fn=get_assessment, inputs= [prompt_phonemes], outputs=diff)
     with gr.Tab("Analysis"):
-        selected_att = gr.Dropdown( Attributes, label="Select an Attribute to plot", value='voiced', interactive=True)
         do_plot = gr.Button('Plot')
         plot_block = gr.Plot(label='Spectrogram with Attribute Contour')
         do_plot.click(plot_contour, inputs=[record_audio,selected_att], outputs=plot_block)

 from scipy.io import wavfile
 from scipy.signal import spectrogram
 import numpy as np
+from torch import nn
 engine = transcriber.transcribe_SA(model_path='models/SA',verbose=0)
 phonemizer = Phonemize.phonemization()
     ax_att = ax.twinx()
     # Plot the att contour
+    x_points = att_contour.shape[0]
+    time_att = np.arange(0,  x_points * 0.02, 0.02)[:x_points]
     ax_att.plot(time_att, att_contour, color='blue', label=f'{att} Contour')
+    ax_att.set_ylim(0,1)
     ax_att.legend()
     return fig
 def plot_contour(audio_file, att):
+    indx_n = engine.processor.tokenizer.convert_tokens_to_ids([f'n_{att}'])[0]
+    indx_p = engine.processor.tokenizer.convert_tokens_to_ids([f'p_{att}'])[0]
+    index_all = [engine.processor.tokenizer.pad_token_id, indx_n, indx_p]
+    prob = nn.functional.softmax(engine.logits.squeeze()[:,index_all], dim=-1)
+    att_contour = prob[:,-1]
     fig = create_spectrogram_with_att(audio_file, att_contour, att)
     return fig
         get_phoneme.click(fn=phonemize_prompt, inputs=[prompt, is_ipa], outputs=prompt_phonemes)
         record_audio = gr.Audio(sources=["microphone","upload"], type="filepath")
+        att_list = gr.Dropdown(label="Select Attributes", choices=sorted(Attributes), value=['vowel', 'voiced', 'consonant'] ,multiselect=True)
         process = gr.Button("Process Audio")
         recognition = gr.HTML(label='Output')
         assess.click(fn=get_assessment, inputs= [prompt_phonemes], outputs=diff)
     with gr.Tab("Analysis"):
+        selected_att = gr.Dropdown( sorted(Attributes), label="Select an Attribute to plot", value='voiced', interactive=True)
         do_plot = gr.Button('Plot')
         plot_block = gr.Plot(label='Spectrogram with Attribute Contour')
         do_plot.click(plot_contour, inputs=[record_audio,selected_att], outputs=plot_block)