from fastapi import FastAPI from fastapi.responses import Response import uvicorn import numpy as np import io from sentence_transformers import SentenceTransformer from pymilvus import Collection import soundfile as sf from bark import SAMPLE_RATE from db.db_connect import connect, disconnect from db.query_db import query model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2') app = FastAPI() def get_wait_responses(): with open('db/wait_responses.txt', 'r') as file: content = file.read() contents = content.split('\n\n') wait_embeddings = model.encode(contents) return wait_embeddings WAIT_RESPONSES_EMBEDDINGS = get_wait_responses() #TODO raise exception def insert_response_to_generate_for_audio(text, embeddings): connect() collection = Collection("Response") data = [ [text], embeddings ] collection.insert(data) collection.flush() disconnect() @app.post('/tts') async def transcribe(text: str): embeddings = model.encode([text]) audio = await query(embeddings, threshold=0.8) # if does not exist then store to response # store this text as response to be used to generate audio if audio is None: insert_response_to_generate_for_audio(text, embeddings) audio = await query(WAIT_RESPONSES_EMBEDDINGS, threshold=0.8) # convert audio bytes to appropriate format to return audio_file = io.BytesIO(np.frombuffer(audio, dtype=np.int16)) audio, sample_rate = sf.read(audio_file) audio_file = io.BytesIO() sf.write(audio_file, audio, sample_rate, format='wav') audio_file.seek(0) return Response( content=audio_file.read(), media_type="audio/wav", # Same as the Content-Type header ) if __name__ == '__main__': uvicorn.run('app:app', host='0.0.0.0', port=7860)