speech-to-speech-translation

Sleeping

dmcartor commited on Sep 9

Commit

7477f26

•

1 Parent(s): 4e0cd67

Update synthesise function again

Files changed (1) hide show

app.py CHANGED Viewed

@@ -73,24 +73,20 @@ def translate(audio):
     return translated_text
 def synthesise(text):
-    # Tokenize the translated text for the VITS model
     inputs = tts_tokenizer(text, return_tensors="pt").to(device)
     with torch.no_grad():
         output = tts_model(**inputs)
         print(f"TTS Model Output: {output}")
-    # Extract the waveform
-    speech = output['waveform'][0]  # Access waveform from the model's output
-    # Get the sampling rate from the TTS model configuration
-    sampling_rate = tts_model.config.sampling_rate  # Ensure you're using the correct rate
-    # Convert to numpy format suitable for audio output
-    speech_numpy = (speech.squeeze().cpu().numpy() * 32767).astype(np.int16)
-    return sampling_rate, speech_numpy
 # Normalize audio
 def normalize_audio(audio):

     return translated_text
 def synthesise(text):
+    # Properly tokenize the translated text for the VITS model
     inputs = tts_tokenizer(text, return_tensors="pt").to(device)
     with torch.no_grad():
         output = tts_model(**inputs)
         print(f"TTS Model Output: {output}")
+        # Accessing the correct key for the waveform
+        speech = output['model_outputs']  # Correct key access for the synthesized waveform
+        # Convert to numpy format suitable for audio output
+        speech_numpy = speech.squeeze().cpu().numpy()  # Remove batch and channel dimensions if necessary
+    return (speech_numpy * 32767).astype(np.int16)  # Ensure correct format for audio output
 # Normalize audio
 def normalize_audio(audio):