Spaces:
Sleeping
Sleeping
File size: 3,936 Bytes
c35d162 30c3950 d3b5ad0 30c3950 245b8ae 30c3950 f272b23 5a320c5 f272b23 30c3950 f272b23 30c3950 a4a82c5 f272b23 a4a82c5 12984b2 f272b23 5cf39a1 f272b23 de76d43 5cf39a1 a4a82c5 f272b23 a4a82c5 d06d609 a3d1f8d 5cf39a1 a3d1f8d 5cf39a1 b9a1bad 6110406 b9a1bad 5cf39a1 6110406 038f0e8 6110406 b9a1bad 5cf39a1 6110406 038f0e8 6110406 30c3950 6110406 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 |
import gradio as gr
import os
os.system('cd monotonic_align && python setup.py build_ext --inplace && cd ..')
import json
import math
import torch
from torch import nn
from torch.nn import functional as F
from torch.utils.data import DataLoader
import commons
import utils
from data_utils import TextAudioLoader, TextAudioCollate, TextAudioSpeakerLoader, TextAudioSpeakerCollate
from models import SynthesizerTrn
from text.symbols import symbols
#from text.symbols import symbols_ftgra
from text import text_to_sequence
from scipy.io.wavfile import write
def get_text(text, hps):
text_norm = text_to_sequence(text, hps.data.text_cleaners)
if hps.data.add_blank:
text_norm = commons.intersperse(text_norm, 0)
text_norm = torch.LongTensor(text_norm)
return text_norm
def load_model(model_path, hps):
net_g = SynthesizerTrn(
len(symbols),
hps.data.filter_length // 2 + 1,
hps.train.segment_size // hps.data.hop_length,
n_speakers=hps.data.n_speakers,
**hps.model)
_ = net_g.eval()
_ = utils.load_checkpoint(model_path, net_g, None)
return net_g
hps = utils.get_hparams_from_file("configs/vctk_base.json")
# Define a dictionary to store the model paths for each tab
model_paths = {
"Phonemes_finetuned": "fr_wa_finetuned_pho/G_125000.pth",
"Graphemes_finetuned": "fr_wa_finetuned/G_198000.pth",
"Phonemes": "path_to_phonemes_model.pth",
"Graphemes": "wa_graphemes/G_168000.pth"
}
# Load the initial model
net_g = load_model(model_paths["Phonemes_finetuned"], hps)
def tts(text, speaker_id, tab_name):
global net_g
net_g = load_model(model_paths[tab_name], hps)
sid = torch.LongTensor([speaker_id]) # speaker identity
stn_tst = get_text(text, hps)
with torch.no_grad():
x_tst = stn_tst.unsqueeze(0)
x_tst_lengths = torch.LongTensor([stn_tst.size(0)])
audio = net_g.infer(x_tst, x_tst_lengths, sid=sid, noise_scale=.667, noise_scale_w=0.8, length_scale=1)[0][
0, 0].data.float().numpy()
return "Success", (hps.data.sampling_rate, audio)
def create_tab(tab_name):
with gr.TabItem(tab_name):
gr.Markdown(f"### {tab_name} TTS Model")
tts_input1 = gr.TextArea(label="Text in Walloon (Depending on the model the input should be on phonemes or characters)", value="")
tts_input2 = gr.Dropdown(label="Speaker", choices=["Male", "Female"], type="index", value="Male")
tts_submit = gr.Button("Generate", variant="primary")
tts_output1 = gr.Textbox(label="Message")
tts_output2 = gr.Audio(label="Output")
tts_submit.click(lambda text, speaker_id: tts(text, speaker_id, tab_name), [tts_input1, tts_input2], [tts_output1, tts_output2])
app = gr.Blocks()
with app:
gr.Markdown(
"""
# First Text to Speech (TTS) for Walloon
Based on VITS (https://github.com/jaywalnut310/vits).
Select the desired model and write the text in phonemes or graphemes depending on the model.
For faster inference speed it is recommended to use short sentences.
"""
)
with gr.Tabs():
create_tab("Phonemes_finetuned")
create_tab("Graphemes_finetuned")
create_tab("Phonemes")
create_tab("Graphemes")
gr.Markdown(
"""
### Examples
| Input Text | Speaker | Input Method |
|------------|---------|---------------|
| li biːç ɛ l sɔlja ɛstẽ ki s maʁɡajẽ pɔ sawɛ kiː ski , dɛ døː , ɛstøː l py fwaʁ . m ɛ̃ s koː la , la k i vɛjɛ õ tsminɔː k aʁivef pjim pjam , d ɛ̃ õ bja nuː tsoː paltɔ . | Female | Phonemes |
| Li bijhe et l’ solea estént ki s’ margayént po sawè kî çki, des deus, esteut l’ pus foirt. Mins ç’ côp la, la k’ i veyèt on tchminåd k' arivéve pyim piam, dins on bea noû tchôd paltot. | Male | Graphemes |
"""
)
app.launch()
|