Spaces:
Build error
Build error
from google.cloud import speech | |
import types | |
from realtime_ai_character.audio.speech_to_text.base import SpeechToText | |
from realtime_ai_character.logger import get_logger | |
from realtime_ai_character.utils import Singleton | |
logger = get_logger(__name__) | |
config = types.SimpleNamespace(**{ | |
'web': { | |
'encoding': speech.RecognitionConfig.AudioEncoding.WEBM_OPUS, | |
'sample_rate_hertz': 48000, | |
'language_code': 'en-US', | |
'max_alternatives': 1, | |
}, | |
'terminal': { | |
'encoding': speech.RecognitionConfig.AudioEncoding.LINEAR16, | |
'sample_rate_hertz': 44100, | |
'language_code': 'en-US', | |
'max_alternatives': 1, | |
}, | |
}) | |
class Google(Singleton, SpeechToText): | |
def __init__(self): | |
super().__init__() | |
logger.info("Setting up [Google Speech to Text]...") | |
self.client = speech.SpeechClient() | |
def transcribe(self, audio_bytes, platform, prompt='') -> str: | |
batch_config = speech.RecognitionConfig({ | |
'speech_contexts': [speech.SpeechContext(phrases=prompt.split(','))], | |
**config.__dict__[platform]}) | |
response = self.client.recognize( | |
config=batch_config, | |
audio=speech.RecognitionAudio(content=audio_bytes) | |
) | |
if not response.results: | |
return '' | |
result = response.results[0] | |
if not result.alternatives: | |
return '' | |
return result.alternatives[0].transcript | |