Modifying synthesise function again
Browse files
app.py
CHANGED
@@ -76,17 +76,20 @@ def synthesise(text):
|
|
76 |
# Properly tokenize the translated text for the VITS model
|
77 |
inputs = tts_tokenizer(text, return_tensors="pt").to(device)
|
78 |
|
|
|
79 |
with torch.no_grad():
|
80 |
output = tts_model(**inputs)
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
|
|
|
|
90 |
|
91 |
# Normalize audio
|
92 |
def normalize_audio(audio):
|
|
|
76 |
# Properly tokenize the translated text for the VITS model
|
77 |
inputs = tts_tokenizer(text, return_tensors="pt").to(device)
|
78 |
|
79 |
+
# Run the model to generate the waveform
|
80 |
with torch.no_grad():
|
81 |
output = tts_model(**inputs)
|
82 |
+
|
83 |
+
# Check the output and access the waveform
|
84 |
+
print(f"TTS Model Output: {output}")
|
85 |
+
|
86 |
+
# Access the synthesized waveform from the model output
|
87 |
+
speech = output.audio # The waveform is stored in the 'audio' key
|
88 |
+
|
89 |
+
# Convert to numpy format suitable for audio output
|
90 |
+
speech_numpy = (speech.squeeze().cpu().numpy() * 32767).astype(np.int16)
|
91 |
+
|
92 |
+
return speech_numpy
|
93 |
|
94 |
# Normalize audio
|
95 |
def normalize_audio(audio):
|