|
import whisper |
|
import gradio as gr |
|
import datetime |
|
|
|
model = whisper.load_model('base') |
|
|
|
|
|
|
|
def transcribe(inputs , timestamp): |
|
if inputs is None: |
|
raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.") |
|
output = "" |
|
result = model.transcribe(inputs) |
|
if timestamp == "Yes": |
|
for indx, segment in enumerate(result['segments']): |
|
output += str(datetime.timedelta (seconds=segment['start'])) +" "+ str(datetime.timedelta (seconds=segment['end'])) + "\n" |
|
output += segment['text'].strip() + '\n' |
|
else: |
|
output = result["text"] |
|
|
|
|
|
print(result) |
|
return output |
|
|
|
|
|
|
|
|
|
interface = gr.Interface( |
|
fn=transcribe, |
|
inputs=[gr.Audio(sources=["upload"],type="filepath"), |
|
gr.Radio(["Yes", "No"], label="Timestamp", info="Displays with timestamp if needed."),], |
|
outputs="text", |
|
title="Whisper Large V3: Transcribe Audio", |
|
description=( |
|
"Transcribe long-form microphone or audio inputs with the click of a button! Demo uses the OpenAI Whisper API" |
|
) |
|
) |
|
|
|
interface.launch() |