Update app.py
Browse files
app.py
CHANGED
@@ -22,14 +22,14 @@ models_per_language = {
|
|
22 |
"marathi": ["ylacombe/mms-mar-finetuned-monospeaker"]
|
23 |
}
|
24 |
|
25 |
-
HUB_PATH = "ylacombe/
|
26 |
|
27 |
|
28 |
pipe_dict = {
|
29 |
-
"current_model": "ylacombe/
|
30 |
"pipe": pipeline("text-to-speech", model=HUB_PATH, device=0),
|
31 |
"original_pipe": pipeline("text-to-speech", model=default_model_per_language["marathi"], device=0),
|
32 |
-
"language": "
|
33 |
}
|
34 |
|
35 |
title = """
|
@@ -58,7 +58,7 @@ def generate_audio(text, model_id, language):
|
|
58 |
out = []
|
59 |
# first generate original model result
|
60 |
output = pipe_dict["original_pipe"](text)
|
61 |
-
output = gr.Audio(value = (output["sampling_rate"], output["audio"].squeeze()), type="numpy", autoplay=
|
62 |
visible=True)
|
63 |
out.append(output)
|
64 |
|
@@ -74,7 +74,7 @@ def generate_audio(text, model_id, language):
|
|
74 |
out.extend([gr.Audio(visible=False)]*(max_speakers-num_speakers))
|
75 |
else:
|
76 |
output = pipe_dict["pipe"](text)
|
77 |
-
output = gr.Audio(value = (output["sampling_rate"], output["audio"].squeeze()), type="numpy", autoplay=
|
78 |
visible=True)
|
79 |
out.append(output)
|
80 |
out.extend([gr.Audio(visible=False)]*(max_speakers-2))
|
|
|
22 |
"marathi": ["ylacombe/mms-mar-finetuned-monospeaker"]
|
23 |
}
|
24 |
|
25 |
+
HUB_PATH = "ylacombe/mms-mar-finetuned-monospeaker"
|
26 |
|
27 |
|
28 |
pipe_dict = {
|
29 |
+
"current_model": "ylacombe/mms-mar-finetuned-monospeaker",
|
30 |
"pipe": pipeline("text-to-speech", model=HUB_PATH, device=0),
|
31 |
"original_pipe": pipeline("text-to-speech", model=default_model_per_language["marathi"], device=0),
|
32 |
+
"language": "marathi",
|
33 |
}
|
34 |
|
35 |
title = """
|
|
|
58 |
out = []
|
59 |
# first generate original model result
|
60 |
output = pipe_dict["original_pipe"](text)
|
61 |
+
output = gr.Audio(value = (output["sampling_rate"], output["audio"].squeeze()), type="numpy", autoplay=True, label=f"Non finetuned model prediction {default_model_per_language[language]}", show_label=True,
|
62 |
visible=True)
|
63 |
out.append(output)
|
64 |
|
|
|
74 |
out.extend([gr.Audio(visible=False)]*(max_speakers-num_speakers))
|
75 |
else:
|
76 |
output = pipe_dict["pipe"](text)
|
77 |
+
output = gr.Audio(value = (output["sampling_rate"], output["audio"].squeeze()), type="numpy", autoplay=True, label="Generated Audio - Mono speaker", show_label=True,
|
78 |
visible=True)
|
79 |
out.append(output)
|
80 |
out.extend([gr.Audio(visible=False)]*(max_speakers-2))
|