Spaces:

Prgckwb
/

tts

Running

File size: 1,738 Bytes

import os

import gradio as gr
import nltk

os.system("python -m unidic download")
from melo.api import TTS  # noqa: E402

nltk.download("averaged_perceptron_tagger_eng")

# Get device
device = "auto"
model = TTS(language="EN", device=device)
speaker_ids = model.hps.data.spk2id


def inference(
        text: str, speed: float, speaker: str, progress=gr.Progress(track_tqdm=True)
):
    try:
        out_path = "audio.wav"
        model.tts_to_file(
            text,
            speaker_ids[speaker],
            out_path,
            speed=speed,
            format="wav",
        )
    except Exception as e:
        return gr.Error(str(e))
    return out_path


if __name__ == "__main__":
    theme = gr.themes.Soft(
        primary_hue=gr.themes.colors.emerald, secondary_hue=gr.themes.colors.emerald
    )

    sample_text = (
        "Hello, my name is Chi-ku-wa-bu. "
        "I am a text-to-speech system designed to assist you. "
        "How can I help you today?"
    )

    demo = gr.Interface(
        title="Text-to-Speech",
        description="Convert English text to speech",
        fn=inference,
        inputs=[
            gr.Textbox(label="Text to Synthesize", value=sample_text),
            gr.Slider(minimum=0.5, maximum=3.0, value=1.0, label="Speed"),
            gr.Dropdown(
                label="Speaker",
                choices=["EN-US", "EN-BR", "EN_INDIA", "EN-AU", "EN-Default"],
                value="EN-US",
            ),
        ],
        outputs=[gr.Audio(value="audio.wav")],
        examples=[
            [
                sample_text,
                1.0,
                "EN-US",
            ],
        ],
        cache_examples=False,
        theme=theme,
    )
    demo.queue().launch()