awacke1 commited on
Commit
ce220b5
1 Parent(s): 5fe3ad1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +1 -38
app.py CHANGED
@@ -1,98 +1,64 @@
1
  from turtle import title
2
  import gradio as gr
3
-
4
  import git
5
  import os
6
  os.system('git clone https://github.com/Edresson/Coqui-TTS -b multilingual-torchaudio-SE TTS')
7
  os.system('pip install -q -e TTS/')
8
  os.system('pip install -q torchaudio==0.9.0')
9
-
10
  import sys
11
  TTS_PATH = "TTS/"
12
-
13
- # add libraries into environment
14
  sys.path.append(TTS_PATH) # set this if TTS is not installed globally
15
-
16
  import os
17
  import string
18
  import time
19
  import argparse
20
  import json
21
-
22
  import numpy as np
23
  import IPython
24
  from IPython.display import Audio
25
-
26
-
27
  import torch
28
-
29
  from TTS.tts.utils.synthesis import synthesis
30
  from TTS.tts.utils.text.symbols import make_symbols, phonemes, symbols
31
  try:
32
  from TTS.utils.audio import AudioProcessor
33
  except:
34
  from TTS.utils.audio import AudioProcessor
35
-
36
-
37
  from TTS.tts.models import setup_model
38
  from TTS.config import load_config
39
  from TTS.tts.models.vits import *
40
 
41
  OUT_PATH = 'out/'
42
-
43
- # create output path
44
  os.makedirs(OUT_PATH, exist_ok=True)
45
 
46
- # model vars
47
  MODEL_PATH = '/home/user/app/best_model_latest.pth.tar'
48
  CONFIG_PATH = '/home/user/app/config.json'
49
  TTS_LANGUAGES = "/home/user/app/language_ids.json"
50
  TTS_SPEAKERS = "/home/user/app/speakers.json"
51
  USE_CUDA = torch.cuda.is_available()
52
 
53
- # load the config
54
  C = load_config(CONFIG_PATH)
55
-
56
-
57
- # load the audio processor
58
  ap = AudioProcessor(**C.audio)
59
-
60
  speaker_embedding = None
61
-
62
  C.model_args['d_vector_file'] = TTS_SPEAKERS
63
  C.model_args['use_speaker_encoder_as_loss'] = False
64
-
65
  model = setup_model(C)
66
  model.language_manager.set_language_ids_from_file(TTS_LANGUAGES)
67
- # print(model.language_manager.num_languages, model.embedded_language_dim)
68
- # print(model.emb_l)
69
  cp = torch.load(MODEL_PATH, map_location=torch.device('cpu'))
70
- # remove speaker encoder
71
  model_weights = cp['model'].copy()
72
  for key in list(model_weights.keys()):
73
  if "speaker_encoder" in key:
74
  del model_weights[key]
75
-
76
  model.load_state_dict(model_weights)
77
-
78
-
79
  model.eval()
80
-
81
  if USE_CUDA:
82
  model = model.cuda()
83
-
84
- # synthesize voice
85
  use_griffin_lim = False
86
-
87
  os.system('pip install -q pydub ffmpeg-normalize')
88
-
89
  CONFIG_SE_PATH = "config_se.json"
90
  CHECKPOINT_SE_PATH = "SE_checkpoint.pth.tar"
91
-
92
  from TTS.tts.utils.speakers import SpeakerManager
93
  from pydub import AudioSegment
94
  import librosa
95
-
96
  SE_speaker_manager = SpeakerManager(encoder_model_path=CHECKPOINT_SE_PATH, encoder_config_path=CONFIG_SE_PATH, use_cuda=USE_CUDA)
97
 
98
  def compute_spec(ref_file):
@@ -101,8 +67,6 @@ def compute_spec(ref_file):
101
  spec = torch.FloatTensor(spec).unsqueeze(0)
102
  return spec
103
 
104
-
105
-
106
  def greet(Text,Voicetoclone,VoiceMicrophone):
107
  text= "%s" % (Text)
108
  if Voicetoclone is not None:
@@ -130,7 +94,6 @@ def greet(Text,Voicetoclone,VoiceMicrophone):
130
  text = text
131
  model.language_manager.language_id_mapping
132
  language_id = 0
133
-
134
  print(" > text: {}".format(text))
135
  wav, alignment, _, _ = synthesis(
136
  model,
@@ -160,6 +123,6 @@ demo = gr.Interface(
160
  fn=greet,
161
  inputs=[gr.inputs.Textbox(label='What would you like the voice to say? (max. 2000 characters per request)'),gr.Audio(type="filepath", source="upload",label='Please upload a voice to clone (max. 30mb)'),gr.Audio(source="microphone", type="filepath", streaming=True)],
162
  outputs="audio",
163
- title="Bilal's Voice Cloning Tool"
164
  )
165
  demo.launch()
 
1
  from turtle import title
2
  import gradio as gr
 
3
  import git
4
  import os
5
  os.system('git clone https://github.com/Edresson/Coqui-TTS -b multilingual-torchaudio-SE TTS')
6
  os.system('pip install -q -e TTS/')
7
  os.system('pip install -q torchaudio==0.9.0')
 
8
  import sys
9
  TTS_PATH = "TTS/"
 
 
10
  sys.path.append(TTS_PATH) # set this if TTS is not installed globally
 
11
  import os
12
  import string
13
  import time
14
  import argparse
15
  import json
 
16
  import numpy as np
17
  import IPython
18
  from IPython.display import Audio
 
 
19
  import torch
 
20
  from TTS.tts.utils.synthesis import synthesis
21
  from TTS.tts.utils.text.symbols import make_symbols, phonemes, symbols
22
  try:
23
  from TTS.utils.audio import AudioProcessor
24
  except:
25
  from TTS.utils.audio import AudioProcessor
 
 
26
  from TTS.tts.models import setup_model
27
  from TTS.config import load_config
28
  from TTS.tts.models.vits import *
29
 
30
  OUT_PATH = 'out/'
 
 
31
  os.makedirs(OUT_PATH, exist_ok=True)
32
 
 
33
  MODEL_PATH = '/home/user/app/best_model_latest.pth.tar'
34
  CONFIG_PATH = '/home/user/app/config.json'
35
  TTS_LANGUAGES = "/home/user/app/language_ids.json"
36
  TTS_SPEAKERS = "/home/user/app/speakers.json"
37
  USE_CUDA = torch.cuda.is_available()
38
 
 
39
  C = load_config(CONFIG_PATH)
 
 
 
40
  ap = AudioProcessor(**C.audio)
 
41
  speaker_embedding = None
 
42
  C.model_args['d_vector_file'] = TTS_SPEAKERS
43
  C.model_args['use_speaker_encoder_as_loss'] = False
 
44
  model = setup_model(C)
45
  model.language_manager.set_language_ids_from_file(TTS_LANGUAGES)
 
 
46
  cp = torch.load(MODEL_PATH, map_location=torch.device('cpu'))
 
47
  model_weights = cp['model'].copy()
48
  for key in list(model_weights.keys()):
49
  if "speaker_encoder" in key:
50
  del model_weights[key]
 
51
  model.load_state_dict(model_weights)
 
 
52
  model.eval()
 
53
  if USE_CUDA:
54
  model = model.cuda()
 
 
55
  use_griffin_lim = False
 
56
  os.system('pip install -q pydub ffmpeg-normalize')
 
57
  CONFIG_SE_PATH = "config_se.json"
58
  CHECKPOINT_SE_PATH = "SE_checkpoint.pth.tar"
 
59
  from TTS.tts.utils.speakers import SpeakerManager
60
  from pydub import AudioSegment
61
  import librosa
 
62
  SE_speaker_manager = SpeakerManager(encoder_model_path=CHECKPOINT_SE_PATH, encoder_config_path=CONFIG_SE_PATH, use_cuda=USE_CUDA)
63
 
64
  def compute_spec(ref_file):
 
67
  spec = torch.FloatTensor(spec).unsqueeze(0)
68
  return spec
69
 
 
 
70
  def greet(Text,Voicetoclone,VoiceMicrophone):
71
  text= "%s" % (Text)
72
  if Voicetoclone is not None:
 
94
  text = text
95
  model.language_manager.language_id_mapping
96
  language_id = 0
 
97
  print(" > text: {}".format(text))
98
  wav, alignment, _, _ = synthesis(
99
  model,
 
123
  fn=greet,
124
  inputs=[gr.inputs.Textbox(label='What would you like the voice to say? (max. 2000 characters per request)'),gr.Audio(type="filepath", source="upload",label='Please upload a voice to clone (max. 30mb)'),gr.Audio(source="microphone", type="filepath", streaming=True)],
125
  outputs="audio",
126
+ title="Clone Any Voice"
127
  )
128
  demo.launch()