Spaces:

anzorq
/

vits-kbd-male

Running

App Files Files Community

vits-kbd-male / app.py

anzorq

Update app.py

2bfbd08 verified 5 months ago

raw

history blame

3.8 kB

	import os
	from TTS.utils.download import download_url
	from TTS.utils.synthesizer import Synthesizer
	import gradio as gr
	import tempfile
	import torch
	import json
	from TTS.tts.utils.synthesis import synthesis
	from TTS.tts.configs.vits_config import VitsConfig
	from TTS.tts.models.vits import Vits, VitsCharacters
	from TTS.tts.utils.text.tokenizer import TTSTokenizer
	import numpy as np
	from TTS.utils.audio.numpy_transforms import save_wav

	MAX_TXT_LEN = 800
	BASE_DIR = "kbd-vits-tts-{}"
	MALE_MODEL_URL = "https://huggingface.co/anzorq/kbd-vits-tts-male/resolve/main/checkpoint_56000.pth"
	MALE_CONFIG_URL = "https://huggingface.co/anzorq/kbd-vits-tts-male/resolve/main/config_35000.json"
	FEMALE_MODEL_URL = "https://huggingface.co/anzorq/kbd-vits-tts-female/resolve/main/best_model_56351.pth"
	FEMALE_CONFIG_URL = "https://huggingface.co/anzorq/kbd-vits-tts-female/resolve/main/config.json"
	MALE_ONNX_MODEL_URL = "https://huggingface.co/anzorq/kbd-vits-tts-male/resolve/main/onnx/kbd_vits_male.onnx"
	FEMALE_ONNX_MODEL_URL = "https://huggingface.co/anzorq/kbd-vits-tts-male/resolve/main/onnx/kbd_vits_female.onnx"

	def download_model_and_config(gender):
	dir_path = BASE_DIR.format(gender)
	if not os.path.exists(dir_path):
	os.makedirs(dir_path)
	model_url = MALE_MODEL_URL if gender == "male" else FEMALE_MODEL_URL
	config_url = MALE_CONFIG_URL if gender == "male" else FEMALE_CONFIG_URL
	download_url(model_url, dir_path, "model.pth")
	download_url(config_url, dir_path, "config.json")
	return dir_path

	download_model_and_config("male")
	download_model_and_config("female")

	def tts(text: str, voice: str = "Male", use_onnx: bool = True):
	if len(text) > MAX_TXT_LEN:
	text = text[:MAX_TXT_LEN]
	print(f"Input text was cutoff since it went over the {MAX_TXT_LEN} character limit.")

	model_dir = BASE_DIR.format("male" if voice == "Male" else "female")
	config_file = f"{model_dir}/config.json"

	text = text.replace("I", "ӏ") # Replace capital "I" with "Palochka" symbol
	text = text.lower()

	if use_onnx:
	onnx_model_url = MALE_ONNX_MODEL_URL if voice == "Male" else FEMALE_ONNX_MODEL_URL

	config = VitsConfig()
	config.load_json(config_file)

	tokenizer = TTSTokenizer(
	use_phonemes=False,
	text_cleaner=config.text_cleaner,
	characters=VitsCharacters(),
	phonemizer=None,
	add_blank=config.add_blank,
	)

	vits = Vits.init_from_config(config)
	vits.load_onnx(onnx_model_url)

	text_inputs = np.asarray(
	vits.tokenizer.text_to_ids(text),
	dtype=np.int64,
	)[None, :]
	audio = vits.inference_onnx(text_inputs)

	# Create a temporary WAV file
	with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
	out_path = temp_file.name
	save_wav(wav=audio[0], path=out_path, sample_rate=24000)
	else:

	# Synthesize
	synthesizer = Synthesizer(f"{model_dir}/model.pth", config_file)
	wavs = synthesizer.tts(text)

	# Create a temporary WAV file
	with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
	out_path = temp_file.name
	synthesizer.save_wav(wavs, out_path)

	return out_path

	iface = gr.Interface(
	fn=tts,
	inputs=[
	gr.Textbox(
	label="Text",
	value="Дауэ ущыт?",
	),
	gr.Radio(
	choices=["Male", "Female"],
	value="Male",
	label="Voice"
	),
	gr.Checkbox(
	label="Use ONNX",
	value=True,
	),
	],
	outputs=gr.Audio(label="Output", type='filepath'),
	title="KBD TTS",
	live=False
	)

	iface.launch(share=False)