Spaces:
Running
Running
import gradio as gr | |
from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC, AutoTokenizer, BartForConditionalGeneration | |
import torch | |
import librosa | |
# Load BART tokenizer and model for summarization | |
tokenizer = AutoTokenizer.from_pretrained("facebook/bart-large-cnn") | |
summarizer = BartForConditionalGeneration.from_pretrained("facebook/bart-large-cnn") | |
# Load Wav2Vec2 processor and model for transcription | |
processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-base-960h") | |
model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-base-960h") | |
# Check if CUDA is available | |
device = "cuda" if torch.cuda.is_available() else "cpu" | |
model.to(device) | |
summarizer.to(device) | |
def transcribe_and_summarize(audioFile): | |
# Load audio as an array | |
audio, sampling_rate = librosa.load(audioFile, sr=16000) # Ensure it's 16kHz for Wav2Vec2 | |
values = processor(audio, sampling_rate=sampling_rate, return_tensors="pt").input_values | |
# Move tensors to GPU if available | |
values = values.to(device) | |
# Transcription | |
with torch.no_grad(): | |
logits = model(values).logits | |
predictedIDs = torch.argmax(logits, dim=-1) | |
transcription = processor.batch_decode(predictedIDs, skip_special_tokens=True)[0] | |
# Summarization | |
inputs = tokenizer(transcription, return_tensors="pt", truncation=True, max_length=1024) | |
inputs = inputs.to(device) # Move inputs to GPU | |
result = summarizer.generate( | |
inputs["input_ids"], | |
min_length=10, | |
max_length=256, | |
no_repeat_ngram_size=2, | |
encoder_no_repeat_ngram_size=2, | |
repetition_penalty=2.0, | |
num_beams=4, | |
early_stopping=True, | |
) | |
summary = tokenizer.decode(result[0], skip_special_tokens=True) | |
return transcription, summary | |
# Gradio interface | |
iface = gr.Interface( | |
fn=transcribe_and_summarize, | |
inputs=gr.Audio(type="filepath", label="Upload Audio"), | |
outputs=[gr.Textbox(label="Transcription"), gr.Textbox(label="Summary")], | |
title="Audio Transcription and Summarization", | |
description="Transcribe and summarize audio using Wav2Vec2 and BART.", | |
) | |
iface.launch() | |