File size: 1,738 Bytes
629e135
 
9617188
 
 
716153a
629e135
 
716153a
9617188
 
629e135
 
9617188
 
 
629e135
8bab724
629e135
54ebdcc
 
 
 
 
 
 
 
 
 
 
02c46e7
9617188
 
 
574f7ec
54ebdcc
 
 
 
 
 
 
574f7ec
 
9617188
629e135
 
9617188
 
54ebdcc
629e135
8d2a973
629e135
8bab724
629e135
 
9617188
54ebdcc
629e135
 
54ebdcc
629e135
 
 
9617188
a5eaf4f
54ebdcc
9617188
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
import os

import gradio as gr
import nltk

os.system("python -m unidic download")
from melo.api import TTS  # noqa: E402

nltk.download("averaged_perceptron_tagger_eng")

# Get device
device = "auto"
model = TTS(language="EN", device=device)
speaker_ids = model.hps.data.spk2id


def inference(
        text: str, speed: float, speaker: str, progress=gr.Progress(track_tqdm=True)
):
    try:
        out_path = "audio.wav"
        model.tts_to_file(
            text,
            speaker_ids[speaker],
            out_path,
            speed=speed,
            format="wav",
        )
    except Exception as e:
        return gr.Error(str(e))
    return out_path


if __name__ == "__main__":
    theme = gr.themes.Soft(
        primary_hue=gr.themes.colors.emerald, secondary_hue=gr.themes.colors.emerald
    )

    sample_text = (
        "Hello, my name is Chi-ku-wa-bu. "
        "I am a text-to-speech system designed to assist you. "
        "How can I help you today?"
    )

    demo = gr.Interface(
        title="Text-to-Speech",
        description="Convert English text to speech",
        fn=inference,
        inputs=[
            gr.Textbox(label="Text to Synthesize", value=sample_text),
            gr.Slider(minimum=0.5, maximum=3.0, value=1.0, label="Speed"),
            gr.Dropdown(
                label="Speaker",
                choices=["EN-US", "EN-BR", "EN_INDIA", "EN-AU", "EN-Default"],
                value="EN-US",
            ),
        ],
        outputs=[gr.Audio(value="audio.wav")],
        examples=[
            [
                sample_text,
                1.0,
                "EN-US",
            ],
        ],
        cache_examples=False,
        theme=theme,
    )
    demo.queue().launch()