anzorq commited on
Commit
4d7078a
1 Parent(s): d7b3885

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +64 -12
app.py CHANGED
@@ -3,6 +3,15 @@ from TTS.utils.download import download_url
3
  from TTS.utils.synthesizer import Synthesizer
4
  import gradio as gr
5
  import tempfile
 
 
 
 
 
 
 
 
 
6
 
7
  MAX_TXT_LEN = 800
8
  BASE_DIR = "kbd-vits-tts-{}"
@@ -10,6 +19,8 @@ MALE_MODEL_URL = "https://huggingface.co/anzorq/kbd-vits-tts-male/resolve/main/c
10
  MALE_CONFIG_URL = "https://huggingface.co/anzorq/kbd-vits-tts-male/resolve/main/config_35000.json"
11
  FEMALE_MODEL_URL = "https://huggingface.co/anzorq/kbd-vits-tts-female/resolve/main/best_model_56351.pth"
12
  FEMALE_CONFIG_URL = "https://huggingface.co/anzorq/kbd-vits-tts-female/resolve/main/config.json"
 
 
13
 
14
  def download_model_and_config(gender):
15
  dir_path = BASE_DIR.format(gender)
@@ -24,24 +35,61 @@ def download_model_and_config(gender):
24
  download_model_and_config("male")
25
  download_model_and_config("female")
26
 
27
- def tts(text: str, voice: str="Male"):
28
  if len(text) > MAX_TXT_LEN:
29
  text = text[:MAX_TXT_LEN]
30
  print(f"Input text was cutoff since it went over the {MAX_TXT_LEN} character limit.")
31
  print(text)
32
 
33
- text = text.replace("I", "ӏ") #replace capital is with "Palochka" symbol
 
34
 
35
- model_dir = BASE_DIR.format("male" if voice == "Male" else "female")
 
 
 
36
 
37
- # synthesize
38
- synthesizer = Synthesizer(f"{model_dir}/model.pth", f"{model_dir}/config.json")
39
- wavs = synthesizer.tts(text)
40
 
41
- # return output
42
- with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
43
- synthesizer.save_wav(wavs, fp)
44
- return fp.name
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
 
46
  iface = gr.Interface(
47
  fn=tts,
@@ -52,9 +100,13 @@ iface = gr.Interface(
52
  ),
53
  gr.Radio(
54
  choices=["Male", "Female"],
55
- value="Male", # Set Male as the default choice
56
  label="Voice"
57
- )
 
 
 
 
58
  ],
59
  outputs=gr.Audio(label="Output", type='filepath'),
60
  title="KBD TTS",
 
3
  from TTS.utils.synthesizer import Synthesizer
4
  import gradio as gr
5
  import tempfile
6
+ import torch
7
+ import onnxruntime as ort
8
+ import json
9
+ from TTS.tts.utils.synthesis import synthesis
10
+ from TTS.tts.configs.vits_config import VitsConfig
11
+ from TTS.tts.models.vits import Vits, VitsCharacters
12
+ from TTS.tts.utils.text.tokenizer import TTSTokenizer
13
+ import numpy as np
14
+ from TTS.utils.audio.numpy_transforms import save_wav
15
 
16
  MAX_TXT_LEN = 800
17
  BASE_DIR = "kbd-vits-tts-{}"
 
19
  MALE_CONFIG_URL = "https://huggingface.co/anzorq/kbd-vits-tts-male/resolve/main/config_35000.json"
20
  FEMALE_MODEL_URL = "https://huggingface.co/anzorq/kbd-vits-tts-female/resolve/main/best_model_56351.pth"
21
  FEMALE_CONFIG_URL = "https://huggingface.co/anzorq/kbd-vits-tts-female/resolve/main/config.json"
22
+ MALE_ONNX_MODEL_URL = "https://huggingface.co/anzorq/kbd-vits-tts-male/resolve/main/onnx/kbd_vits_male.onnx"
23
+ FEMALE_ONNX_MODEL_URL = "https://huggingface.co/anzorq/kbd-vits-tts-male/resolve/main/onnx/kbd_vits_female.onnx"
24
 
25
  def download_model_and_config(gender):
26
  dir_path = BASE_DIR.format(gender)
 
35
  download_model_and_config("male")
36
  download_model_and_config("female")
37
 
38
+ def tts(text: str, voice: str = "Male", use_onnx: bool = True):
39
  if len(text) > MAX_TXT_LEN:
40
  text = text[:MAX_TXT_LEN]
41
  print(f"Input text was cutoff since it went over the {MAX_TXT_LEN} character limit.")
42
  print(text)
43
 
44
+ text = text.replace("I", "ӏ") # Replace capital "I" with "Palochka" symbol
45
+ text = text.lower()
46
 
47
+ if use_onnx:
48
+ # Load the ONNX model
49
+ onnx_model_url = MALE_ONNX_MODEL_URL if voice == "Male" else FEMALE_ONNX_MODEL_URL
50
+ ort_session = ort.InferenceSession(onnx_model_url)
51
 
52
+ config = VitsConfig()
53
+ config.load_json("config_35000.json")
 
54
 
55
+ # Initialize the tokenizer
56
+ tokenizer = TTSTokenizer(
57
+ use_phonemes=False,
58
+ text_cleaner=config.text_cleaner,
59
+ characters=VitsCharacters(), # Assuming the config has character info
60
+ phonemizer=None,
61
+ add_blank=config.add_blank,
62
+ )
63
+
64
+ # Create the Vits model instance
65
+ vits = Vits.init_from_config(config)
66
+
67
+ # Load the ONNX model into the Vits model
68
+ vits.load_onnx(onnx_model_url)
69
+
70
+ text_inputs = np.asarray(
71
+ vits.tokenizer.text_to_ids(text),
72
+ dtype=np.int64,
73
+ )[None, :]
74
+ audio = vits.inference_onnx(text_inputs)
75
+
76
+ # Create a temporary WAV file
77
+ with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
78
+ out_path = temp_file.name
79
+ save_wav(wav=audio[0], path=out_path, sample_rate=24000)
80
+ else:
81
+ model_dir = BASE_DIR.format("male" if voice == "Male" else "female")
82
+
83
+ # Synthesize
84
+ synthesizer = Synthesizer(f"{model_dir}/model.pth", f"{model_dir}/config.json")
85
+ wavs = synthesizer.tts(text)
86
+
87
+ # Create a temporary WAV file
88
+ with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
89
+ out_path = temp_file.name
90
+ synthesizer.save_wav(wavs, out_path)
91
+
92
+ return out_path
93
 
94
  iface = gr.Interface(
95
  fn=tts,
 
100
  ),
101
  gr.Radio(
102
  choices=["Male", "Female"],
103
+ value="Male",
104
  label="Voice"
105
+ ),
106
+ gr.Checkbox(
107
+ label="Use ONNX",
108
+ value=True,
109
+ ),
110
  ],
111
  outputs=gr.Audio(label="Output", type='filepath'),
112
  title="KBD TTS",