text-to-speech / db /make_audio.py
Daryl Fung
added flush for delete
fb74953
raw
history blame
2.23 kB
import io
from pymilvus import Collection
import asyncio
from bark import SAMPLE_RATE, generate_audio, preload_models
from scipy.io.wavfile import write
from db_connect import connect
# for audio storage
from audio_db.is3.is3 import StagedObject
connect()
preload_models()
# get response and audio response
response = Collection("Response")
audio_response = Collection("AudioResponse")
def check_if_exist(text):
return len(audio_response.query(expr=f"text == \"{text}\"")) != 0
async def make_audio():
data = []
response_iterator = response.query_iterator(batch_size=1, output_fields=['text', 'embeddings'])
ids_to_delete = []
current_iteration = 0
batch_size = 5
while True:
res = response_iterator.next()
if len(res) == 0:
print("query iteration finished, close")
# close the iterator
response_iterator.close()
break
text = res[0]['text']
# remove the response to generate the audio for if it already exists in the audio database
if check_if_exist(text):
response.delete(expr=f"id in {str([res[0]['id']])}")
response.flush()
continue
# generate audio
audio_array = generate_audio(text, history_prompt="en_speaker_3")
bytes_io = io.BytesIO()
write(bytes_io, SAMPLE_RATE, audio_array)
audio_bytes = bytes_io.read()
# store the audio
obj = StagedObject(obj=audio_bytes, name='audio')
uploaded_object = await obj.upload()
# save the audio record to AudioResponse
data.append([res[0]['text'], uploaded_object.obj_id, res[0]['embeddings']])
ids_to_delete.append(res[0]['id'])
current_iteration += 1
if current_iteration % batch_size == 0:
# insert the audio
audio_response.insert(list(zip(*data)))
audio_response.flush()
# delete text to generate audio
response.delete(expr=f"id in {str(ids_to_delete)}")
response.flush()
ids_to_delete = []
response_iterator.close()
if __name__ == '__main__':
loop = asyncio.new_event_loop()
loop.run_until_complete(make_audio())