text-to-speech / app.py
Daryl Fung
changed port
cf64ca3
raw
history blame
1.86 kB
from fastapi import FastAPI
from fastapi.responses import Response
import uvicorn
import numpy as np
import io
from sentence_transformers import SentenceTransformer
from pymilvus import Collection
import soundfile as sf
from bark import SAMPLE_RATE
from db.db_connect import connect, disconnect
from db.query_db import query
model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
app = FastAPI()
def get_wait_responses():
with open('db/wait_responses.txt', 'r') as file:
content = file.read()
contents = content.split('\n\n')
wait_embeddings = model.encode(contents)
return wait_embeddings
WAIT_RESPONSES_EMBEDDINGS = get_wait_responses()
#TODO raise exception
def insert_response_to_generate_for_audio(text, embeddings):
connect()
collection = Collection("Response")
data = [
[text],
embeddings
]
collection.insert(data)
collection.flush()
disconnect()
@app.post('/tts')
async def transcribe(text: str):
embeddings = model.encode([text])
audio = await query(embeddings, threshold=0.8)
# if does not exist then store to response
# store this text as response to be used to generate audio
if audio is None:
insert_response_to_generate_for_audio(text, embeddings)
audio = await query(WAIT_RESPONSES_EMBEDDINGS, threshold=0.8)
# convert audio bytes to appropriate format to return
audio_file = io.BytesIO(np.frombuffer(audio, dtype=np.int16))
audio, sample_rate = sf.read(audio_file)
audio_file = io.BytesIO()
sf.write(audio_file, audio, sample_rate, format='wav')
audio_file.seek(0)
return Response(
content=audio_file.read(),
media_type="audio/wav", # Same as the Content-Type header
)
if __name__ == '__main__':
uvicorn.run('app:app', host='0.0.0.0', port=7860)