Spaces:
Runtime error
Runtime error
import gradio as gr | |
from transformers import pipeline | |
from youtube_transcript_api import YouTubeTranscriptApi | |
from youtube_transcript_api.formatters import TextFormatter | |
import re | |
from pytube import YouTube | |
# Define the models | |
models = { | |
"Falconsai/text_summarization": "Falconsai/text_summarization", | |
"suriya7/bart-finetuned-text-summarization": "suriya7/bart-finetuned-text-summarization" | |
} | |
# Default model | |
default_model = "Falconsai/text_summarization" | |
# Function to create a summarization pipeline | |
def create_summarization_pipeline(model_name): | |
return pipeline("summarization", model=model_name) | |
# Function to extract video ID from URL | |
def extract_video_id(url): | |
regex = r"(?:youtube\.com\/(?:[^\/\n\s]+\/\S+\/|(?:v|e(?:mbed)?)\/|\S*?[?&]v=)|youtu\.be\/)([a-zA-Z0-9_-]{11})" | |
match = re.search(regex, url) | |
if match: | |
return match.group(1) | |
return None | |
# Function to get YouTube transcript | |
def get_youtube_transcript(video_url): | |
video_id = extract_video_id(video_url) | |
if not video_id: | |
return "Video ID could not be extracted. Please check the URL format." | |
# Try to get transcript using YouTubeTranscriptApi | |
try: | |
transcript = YouTubeTranscriptApi.get_transcript(video_id) | |
formatter = TextFormatter() | |
text_transcript = formatter.format_transcript(transcript) | |
return text_transcript | |
except Exception as e: | |
error_message = str(e) | |
if "Subtitles are disabled for this video" in error_message: | |
# Try to get subtitles using pytube | |
return get_subtitles_with_pytube(video_url) | |
return f"An error occurred while retrieving the transcript: {error_message}" | |
def get_subtitles_with_pytube(video_url): | |
video_id = extract_video_id(video_url) | |
if not video_id: | |
return "Video ID could not be extracted. Please check the URL format." | |
try: | |
yt = YouTube(video_url) | |
captions = yt.captions.get_by_language_code('en') # You can modify the language code | |
if captions: | |
return captions.generate_srt_captions() | |
return "No subtitles available in the selected language." | |
except Exception as e: | |
return f"An error occurred while retrieving subtitles with pytube: {str(e)}" | |
# Function to summarize YouTube video with selected model | |
def summarize_youtube_video(url, model_name): | |
transcript = get_youtube_transcript(url) | |
if "An error occurred" in transcript: | |
return transcript | |
# Truncate the transcript if necessary | |
max_length = 1024 # Adjust according to the model's maximum sequence length | |
if len(transcript) > max_length: | |
transcript = transcript[:max_length] | |
summarization_pipeline = create_summarization_pipeline(model_name) | |
try: | |
summary = summarization_pipeline(transcript, min_length=10, max_length=1000, do_sample=False) | |
return summary[0]['summary_text'] | |
except Exception as e: | |
return f"An error occurred while summarizing: {str(e)}" | |
# Define the Gradio interface | |
iface = gr.Interface( | |
fn=summarize_youtube_video, | |
inputs=[ | |
gr.Textbox(label="Enter YouTube Video URL", placeholder="e.g. https://www.youtube.com/watch?v=abcdef12345"), | |
gr.Dropdown(choices=list(models.keys()), value=default_model, label="Select Summarization Model") | |
], | |
outputs=gr.Textbox(label="Video Summary"), | |
title="YouTube Video Summarizer", | |
description="Enter the URL of a YouTube video and select a summarization model to get a summary of its transcript." | |
) | |
if __name__ == "__main__": | |
iface.launch() | |