dmcartor commited on
Commit
f5b2e41
1 Parent(s): 7477f26

Modifying synthesise function again

Browse files
Files changed (1) hide show
  1. app.py +12 -9
app.py CHANGED
@@ -76,17 +76,20 @@ def synthesise(text):
76
  # Properly tokenize the translated text for the VITS model
77
  inputs = tts_tokenizer(text, return_tensors="pt").to(device)
78
 
 
79
  with torch.no_grad():
80
  output = tts_model(**inputs)
81
- print(f"TTS Model Output: {output}")
82
-
83
- # Accessing the correct key for the waveform
84
- speech = output['model_outputs'] # Correct key access for the synthesized waveform
85
-
86
- # Convert to numpy format suitable for audio output
87
- speech_numpy = speech.squeeze().cpu().numpy() # Remove batch and channel dimensions if necessary
88
-
89
- return (speech_numpy * 32767).astype(np.int16) # Ensure correct format for audio output
 
 
90
 
91
  # Normalize audio
92
  def normalize_audio(audio):
 
76
  # Properly tokenize the translated text for the VITS model
77
  inputs = tts_tokenizer(text, return_tensors="pt").to(device)
78
 
79
+ # Run the model to generate the waveform
80
  with torch.no_grad():
81
  output = tts_model(**inputs)
82
+
83
+ # Check the output and access the waveform
84
+ print(f"TTS Model Output: {output}")
85
+
86
+ # Access the synthesized waveform from the model output
87
+ speech = output.audio # The waveform is stored in the 'audio' key
88
+
89
+ # Convert to numpy format suitable for audio output
90
+ speech_numpy = (speech.squeeze().cpu().numpy() * 32767).astype(np.int16)
91
+
92
+ return speech_numpy
93
 
94
  # Normalize audio
95
  def normalize_audio(audio):