import io from pymilvus import Collection import asyncio from bark import SAMPLE_RATE, generate_audio, preload_models from scipy.io.wavfile import write from db_connect import connect # for audio storage from audio_db.is3.is3 import StagedObject connect() preload_models() async def make_audio(): response = Collection("Response") audio_response = Collection("AudioResponse") data = [] response_iterator = response.query_iterator(batch_size=1, output_fields=['text', 'embeddings']) ids_to_delete = [] current_iteration = 0 batch_size = 5 while True: res = response_iterator.next() if len(res) == 0: print("query iteration finished, close") # close the iterator response_iterator.close() break # generate audio audio_array = generate_audio(res[0]['text'], history_prompt="en_speaker_3") bytes_io = io.BytesIO() write(bytes_io, SAMPLE_RATE, audio_array) audio_bytes = bytes_io.read() # store the audio obj = StagedObject(obj=audio_bytes, name='audio') uploaded_object = await obj.upload() # save the audio record to AudioResponse data.append([res[0]['text'], uploaded_object.obj_id, res[0]['embeddings']]) ids_to_delete.append(res[0]['id']) current_iteration += 1 if current_iteration % batch_size == 0: # insert the audio audio_response.insert(list(zip(*data))) audio_response.flush() # delete text to generate audio response.delete(expr=f"id in {str(ids_to_delete)}") ids_to_delete = [] response_iterator.close() if __name__ == '__main__': loop = asyncio.new_event_loop() loop.run_until_complete(make_audio())