Spaces:
Running
Running
File size: 3,801 Bytes
6eb1048 4d7078a 6eb1048 27e87f7 4d7078a 6eb1048 27e87f7 6eb1048 27e87f7 6eb1048 4d7078a 6eb1048 2bfbd08 6eb1048 4d7078a 1d3c496 4d7078a 27e87f7 4d7078a 2bfbd08 6eb1048 4d7078a 2bfbd08 4d7078a 2bfbd08 4d7078a 6eb1048 27e87f7 d7b3885 27e87f7 4d7078a 27e87f7 4d7078a 27e87f7 6eb1048 27e87f7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 |
import os
from TTS.utils.download import download_url
from TTS.utils.synthesizer import Synthesizer
import gradio as gr
import tempfile
import torch
import json
from TTS.tts.utils.synthesis import synthesis
from TTS.tts.configs.vits_config import VitsConfig
from TTS.tts.models.vits import Vits, VitsCharacters
from TTS.tts.utils.text.tokenizer import TTSTokenizer
import numpy as np
from TTS.utils.audio.numpy_transforms import save_wav
MAX_TXT_LEN = 800
BASE_DIR = "kbd-vits-tts-{}"
MALE_MODEL_URL = "https://huggingface.co/anzorq/kbd-vits-tts-male/resolve/main/checkpoint_56000.pth"
MALE_CONFIG_URL = "https://huggingface.co/anzorq/kbd-vits-tts-male/resolve/main/config_35000.json"
FEMALE_MODEL_URL = "https://huggingface.co/anzorq/kbd-vits-tts-female/resolve/main/best_model_56351.pth"
FEMALE_CONFIG_URL = "https://huggingface.co/anzorq/kbd-vits-tts-female/resolve/main/config.json"
MALE_ONNX_MODEL_URL = "https://huggingface.co/anzorq/kbd-vits-tts-male/resolve/main/onnx/kbd_vits_male.onnx"
FEMALE_ONNX_MODEL_URL = "https://huggingface.co/anzorq/kbd-vits-tts-male/resolve/main/onnx/kbd_vits_female.onnx"
def download_model_and_config(gender):
dir_path = BASE_DIR.format(gender)
if not os.path.exists(dir_path):
os.makedirs(dir_path)
model_url = MALE_MODEL_URL if gender == "male" else FEMALE_MODEL_URL
config_url = MALE_CONFIG_URL if gender == "male" else FEMALE_CONFIG_URL
download_url(model_url, dir_path, "model.pth")
download_url(config_url, dir_path, "config.json")
return dir_path
download_model_and_config("male")
download_model_and_config("female")
def tts(text: str, voice: str = "Male", use_onnx: bool = True):
if len(text) > MAX_TXT_LEN:
text = text[:MAX_TXT_LEN]
print(f"Input text was cutoff since it went over the {MAX_TXT_LEN} character limit.")
model_dir = BASE_DIR.format("male" if voice == "Male" else "female")
config_file = f"{model_dir}/config.json"
text = text.replace("I", "ӏ") # Replace capital "I" with "Palochka" symbol
text = text.lower()
if use_onnx:
onnx_model_url = MALE_ONNX_MODEL_URL if voice == "Male" else FEMALE_ONNX_MODEL_URL
config = VitsConfig()
config.load_json(config_file)
tokenizer = TTSTokenizer(
use_phonemes=False,
text_cleaner=config.text_cleaner,
characters=VitsCharacters(),
phonemizer=None,
add_blank=config.add_blank,
)
vits = Vits.init_from_config(config)
vits.load_onnx(onnx_model_url)
text_inputs = np.asarray(
vits.tokenizer.text_to_ids(text),
dtype=np.int64,
)[None, :]
audio = vits.inference_onnx(text_inputs)
# Create a temporary WAV file
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
out_path = temp_file.name
save_wav(wav=audio[0], path=out_path, sample_rate=24000)
else:
# Synthesize
synthesizer = Synthesizer(f"{model_dir}/model.pth", config_file)
wavs = synthesizer.tts(text)
# Create a temporary WAV file
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
out_path = temp_file.name
synthesizer.save_wav(wavs, out_path)
return out_path
iface = gr.Interface(
fn=tts,
inputs=[
gr.Textbox(
label="Text",
value="Дауэ ущыт?",
),
gr.Radio(
choices=["Male", "Female"],
value="Male",
label="Voice"
),
gr.Checkbox(
label="Use ONNX",
value=True,
),
],
outputs=gr.Audio(label="Output", type='filepath'),
title="KBD TTS",
live=False
)
iface.launch(share=False) |