Pipe1213 commited on
Commit
f272b23
1 Parent(s): 39de7b5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +33 -39
app.py CHANGED
@@ -25,20 +25,34 @@ def get_text(text, hps):
25
  text_norm = torch.LongTensor(text_norm)
26
  return text_norm
27
 
28
- hps = utils.get_hparams_from_file("configs/vctk_base.json")
 
 
 
 
 
 
 
 
 
29
 
30
- net_g = SynthesizerTrn(
31
- len(symbols),
32
- hps.data.filter_length // 2 + 1,
33
- hps.train.segment_size // hps.data.hop_length,
34
- n_speakers=hps.data.n_speakers,
35
- **hps.model)
36
- _ = net_g.eval()
37
 
38
- _ = utils.load_checkpoint("fr_wa_finetuned_pho/G_125000.pth", net_g, None)
 
 
 
 
 
 
39
 
 
 
40
 
41
- def tts(text, speaker_id):
 
 
 
42
  if len(text) > 2000:
43
  return "Error: Text is too long", None
44
  sid = torch.LongTensor([speaker_id]) # speaker identity
@@ -47,7 +61,6 @@ def tts(text, speaker_id):
47
  with torch.no_grad():
48
  x_tst = stn_tst.unsqueeze(0)
49
  x_tst_lengths = torch.LongTensor([stn_tst.size(0)])
50
- # print(stn_tst.size())
51
  audio = net_g.infer(x_tst, x_tst_lengths, sid=sid, noise_scale=.667, noise_scale_w=0.8, length_scale=1)[0][
52
  0, 0].data.float().numpy()
53
  return "Success", (hps.data.sampling_rate, audio)
@@ -55,34 +68,15 @@ def tts(text, speaker_id):
55
  app = gr.Blocks()
56
  with app:
57
  with gr.Tabs():
58
- with gr.TabItem("Phonemes_finetuned"):
59
- tts_input1 = gr.TextArea(label="Text in Walloon in phonemes IPA (2000 words limitation)", value="")
60
- tts_input2 = gr.Dropdown(label="Speaker", choices=["Male", "Female"], type="index", value="Male")
61
- tts_submit = gr.Button("Generate", variant="primary")
62
- tts_output1 = gr.Textbox(label="Message")
63
- tts_output2 = gr.Audio(label="Output")
64
- tts_submit.click(tts, [tts_input1, tts_input2], [tts_output1, tts_output2])
65
- with gr.TabItem("Graphemes_finetuned"):
66
- tts_input1 = gr.TextArea(label="Text in Walloon in graphemes (2000 words limitation)", value="")
67
- tts_input2 = gr.Dropdown(label="Speaker", choices=["Male", "Female"], type="index", value="Male")
68
- tts_submit = gr.Button("Generate", variant="primary")
69
- tts_output1 = gr.Textbox(label="Message")
70
- tts_output2 = gr.Audio(label="Output")
71
- tts_submit.click(tts, [tts_input1, tts_input2], [tts_output1, tts_output2])
72
- with gr.TabItem("Phonemes"):
73
- tts_input1 = gr.TextArea(label="Text in Walloon in phonemes IPA (2000 words limitation)", value="")
74
- tts_input2 = gr.Dropdown(label="Speaker", choices=["Male", "Female"], type="index", value="Male")
75
- tts_submit = gr.Button("Generate", variant="primary")
76
- tts_output1 = gr.Textbox(label="Message")
77
- tts_output2 = gr.Audio(label="Output")
78
- tts_submit.click(tts, [tts_input1, tts_input2], [tts_output1, tts_output2])
79
- with gr.TabItem("Graphemes"):
80
- tts_input1 = gr.TextArea(label="Text in Walloon in graphemes (2000 words limitation)", value="")
81
- tts_input2 = gr.Dropdown(label="Speaker", choices=["Male", "Female"], type="index", value="Male")
82
- tts_submit = gr.Button("Generate", variant="primary")
83
- tts_output1 = gr.Textbox(label="Message")
84
- tts_output2 = gr.Audio(label="Output")
85
- tts_submit.click(tts, [tts_input1, tts_input2], [tts_output1, tts_output2])
86
 
87
  app.launch()
88
 
 
25
  text_norm = torch.LongTensor(text_norm)
26
  return text_norm
27
 
28
+ def load_model(model_path, hps):
29
+ net_g = SynthesizerTrn(
30
+ len(symbols),
31
+ hps.data.filter_length // 2 + 1,
32
+ hps.train.segment_size // hps.data.hop_length,
33
+ n_speakers=hps.data.n_speakers,
34
+ **hps.model)
35
+ _ = net_g.eval()
36
+ _ = utils.load_checkpoint(model_path, net_g, None)
37
+ return net_g
38
 
39
+ hps = utils.get_hparams_from_file("configs/vctk_base.json")
 
 
 
 
 
 
40
 
41
+ # Define a dictionary to store the model paths
42
+ model_paths = {
43
+ "Model 1": "fr_wa_finetuned_pho/G_125000.pth",
44
+ "Model 2": "fr_wa_finetuned/G_198000.pth",
45
+ "Model 3": "path_to_model_3_checkpoint.pth",
46
+ "Model 4": "path_to_model_4_checkpoint.pth"
47
+ }
48
 
49
+ # Load the initial model
50
+ net_g = load_model(model_paths["Model 1"], hps)
51
 
52
+ def tts(text, speaker_id, model_choice):
53
+ global net_g
54
+ net_g = load_model(model_paths[model_choice], hps)
55
+
56
  if len(text) > 2000:
57
  return "Error: Text is too long", None
58
  sid = torch.LongTensor([speaker_id]) # speaker identity
 
61
  with torch.no_grad():
62
  x_tst = stn_tst.unsqueeze(0)
63
  x_tst_lengths = torch.LongTensor([stn_tst.size(0)])
 
64
  audio = net_g.infer(x_tst, x_tst_lengths, sid=sid, noise_scale=.667, noise_scale_w=0.8, length_scale=1)[0][
65
  0, 0].data.float().numpy()
66
  return "Success", (hps.data.sampling_rate, audio)
 
68
  app = gr.Blocks()
69
  with app:
70
  with gr.Tabs():
71
+ for tab_name in ["Phonemes_finetuned", "Graphemes_finetuned", "Phonemes", "Graphemes"]:
72
+ with gr.TabItem(tab_name):
73
+ tts_input1 = gr.TextArea(label="Text in Walloon in phonemes IPA (2000 words limitation)", value="")
74
+ tts_input2 = gr.Dropdown(label="Speaker", choices=["Male", "Female"], type="index", value="Male")
75
+ model_choice = gr.Dropdown(label="Model", choices=list(model_paths.keys()), value="Model 1")
76
+ tts_submit = gr.Button("Generate", variant="primary")
77
+ tts_output1 = gr.Textbox(label="Message")
78
+ tts_output2 = gr.Audio(label="Output")
79
+ tts_submit.click(tts, [tts_input1, tts_input2, model_choice], [tts_output1, tts_output2])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80
 
81
  app.launch()
82