dmcartor commited on
Commit
7477f26
1 Parent(s): 4e0cd67

Update synthesise function again

Browse files
Files changed (1) hide show
  1. app.py +9 -13
app.py CHANGED
@@ -73,24 +73,20 @@ def translate(audio):
73
  return translated_text
74
 
75
  def synthesise(text):
76
- # Tokenize the translated text for the VITS model
77
  inputs = tts_tokenizer(text, return_tensors="pt").to(device)
78
 
79
  with torch.no_grad():
80
  output = tts_model(**inputs)
81
  print(f"TTS Model Output: {output}")
82
-
83
- # Extract the waveform
84
- speech = output['waveform'][0] # Access waveform from the model's output
85
-
86
- # Get the sampling rate from the TTS model configuration
87
- sampling_rate = tts_model.config.sampling_rate # Ensure you're using the correct rate
88
-
89
- # Convert to numpy format suitable for audio output
90
- speech_numpy = (speech.squeeze().cpu().numpy() * 32767).astype(np.int16)
91
-
92
- return sampling_rate, speech_numpy
93
-
94
 
95
  # Normalize audio
96
  def normalize_audio(audio):
 
73
  return translated_text
74
 
75
  def synthesise(text):
76
+ # Properly tokenize the translated text for the VITS model
77
  inputs = tts_tokenizer(text, return_tensors="pt").to(device)
78
 
79
  with torch.no_grad():
80
  output = tts_model(**inputs)
81
  print(f"TTS Model Output: {output}")
82
+
83
+ # Accessing the correct key for the waveform
84
+ speech = output['model_outputs'] # Correct key access for the synthesized waveform
85
+
86
+ # Convert to numpy format suitable for audio output
87
+ speech_numpy = speech.squeeze().cpu().numpy() # Remove batch and channel dimensions if necessary
88
+
89
+ return (speech_numpy * 32767).astype(np.int16) # Ensure correct format for audio output
 
 
 
 
90
 
91
  # Normalize audio
92
  def normalize_audio(audio):