whisper-resume / whisper.py
chan4lk's picture
with voice clone
4cd2ebc
raw
history blame
573 Bytes
import torch
from transformers import pipeline
from datasets import load_dataset
device = "cuda:0" if torch.cuda.is_available() else "cpu"
pipe = pipeline(
"automatic-speech-recognition",
model="openai/whisper-tiny.en",
chunk_length_s=30,
device=device,
)
ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
sample = ds[0]["audio"]
prediction = pipe(sample.copy(), batch_size=8)["text"]
# we can also return timestamps for the predictions
prediction = pipe(sample.copy(), batch_size=8, return_timestamps=True)["chunks"]