text-to-speech / db /make_audio.py
Daryl Fung
fix namepath
94c08a9
raw
history blame
1.57 kB
import io
from pymilvus import Collection
import asyncio
from bark import SAMPLE_RATE, generate_audio, preload_models
from scipy.io.wavfile import write
from db_connect import connect
# for audio storage
from audio_db.is3.is3 import StagedObject
connect()
preload_models()
async def generate_audio():
response = Collection("Response")
audio_response = Collection("AudioResponse")
data = []
response_iterator = response.query_iterator(batch_size=1, output_fields=['text', 'embeddings'])
ids_to_delete = []
while True:
res = response_iterator.next()
if len(res) == 0:
print("query iteration finished, close")
# close the iterator
response_iterator.close()
break
# generate audio
audio_array = generate_audio(res[0]['text'], history_prompt="en_speaker_3")
bytes_io = io.BytesIO()
write(bytes_io, SAMPLE_RATE, audio_array)
audio_bytes = bytes_io.read()
# store the audio
obj = StagedObject(obj=audio_bytes, name='audio')
uploaded_object = await obj.upload()
# save the audio record to AudioResponse
data.append([res[0]['text'], uploaded_object.obj_id, res[0]['embeddings']])
ids_to_delete.append(res[0]['id'])
audio_response.insert(list(zip(*data)))
audio_response.flush()
# delete text to generate audio
response.delete(expr=f"id in {str(ids_to_delete)}")
if __name__ == '__main__':
loop = asyncio.new_event_loop()
loop.run_until_complete(generate_audio())