Spaces:

thivav
/

image-to-audio

Sleeping

File size: 755 Bytes

c826025

# text-to-audio using suno/bark
import scipy
import torch
from transformers import AutoProcessor
from transformers import BarkModel

model = BarkModel.from_pretrained("suno/bark-small")

device = "cuda:0" if torch.cuda.is_available() else "cpu"
print("device: ", device)

processor = AutoProcessor.from_pretrained("suno/bark")

# prepare the inputs
text_prompt = "You are a story teller. You can generate a story based on a simple narrative, the story be no more than 20 words."
inputs = processor(text_prompt)

# generate speech
model = model.to(device)
speech_output = model.generate(**inputs.to(device))

sampling_rate = model.generation_config.sample_rate
scipy.io.wavfile.write("bark_out.wav", rate=sampling_rate, data=speech_output[0].cpu().numpy())