pycui's picture
Add RealChar deployment for HuggingFace (V0)
babeaf6
import asyncio
import os
import types
import httpx
from realtime_ai_character.logger import get_logger
from realtime_ai_character.utils import Singleton
from realtime_ai_character.audio.text_to_speech.base import TextToSpeech
logger = get_logger(__name__)
DEBUG = False
config = types.SimpleNamespace(**{
'default_voice': '21m00Tcm4TlvDq8ikWAM',
'default_female_voice': 'EXAVITQu4vr4xnSDxMaL',
'default_male_voice': 'ErXwobaYiN019PkySvjV',
'chunk_size': 1024,
'url': 'https://api.elevenlabs.io/v1/text-to-speech/{voice_id}/stream',
'headers': {
'Accept': 'audio/mpeg',
'Content-Type': 'application/json',
'xi-api-key': os.environ['ELEVEN_LABS_API_KEY']
},
'data': {
'model_id': 'eleven_monolingual_v1',
'voice_settings': {
'stability': 0.5,
'similarity_boost': 0.75
}
}
})
class ElevenLabs(Singleton, TextToSpeech):
def __init__(self):
super().__init__()
logger.info("Initializing [ElevenLabs Text To Speech] voices...")
self.voice_ids = {
"Raiden Shogun And Ei": os.environ.get('RAIDEN_VOICE') or config.default_female_voice,
"Loki": os.environ.get('LOKI_VOICE') or config.default_male_voice,
"Reflection Pi": os.environ.get('PI_VOICE') or config.default_female_voice,
"Elon Musk": os.environ.get('ELON_VOICE') or config.default_male_voice,
"Bruce Wayne": os.environ.get('BRUCE_VOICE') or config.default_male_voice,
"Steve Jobs": os.environ.get('JOBS_VOICE') or config.default_male_voice,
"Sam Altman": os.environ.get('SAM_VOICE') or config.default_male_voice,
}
def get_voice_id(self, name):
return self.voice_ids.get(name, config.default_voice)
async def stream(self, text, websocket, tts_event: asyncio.Event, characater_name="", first_sentence=False) -> None:
if DEBUG:
return
headers = config.headers
data = {
"text": text,
**config.data,
}
voice_id = self.get_voice_id(characater_name)
url = config.url.format(voice_id=voice_id)
if first_sentence:
url = url + '?optimize_streaming_latency=4'
async with httpx.AsyncClient() as client:
response = await client.post(url, json=data, headers=headers)
async for chunk in response.aiter_bytes():
await asyncio.sleep(0.1)
if tts_event.is_set():
# stop streaming audio
break
await websocket.send_bytes(chunk)