import gradio as gr from transformers import pipeline from youtube_transcript_api import YouTubeTranscriptApi from youtube_transcript_api.formatters import TextFormatter import re from pytube import YouTube # Define the models models = { "Falconsai/text_summarization": "Falconsai/text_summarization", "suriya7/bart-finetuned-text-summarization": "suriya7/bart-finetuned-text-summarization" } # Default model default_model = "Falconsai/text_summarization" # Function to create a summarization pipeline def create_summarization_pipeline(model_name): return pipeline("summarization", model=model_name) # Function to extract video ID from URL def extract_video_id(url): regex = r"(?:youtube\.com\/(?:[^\/\n\s]+\/\S+\/|(?:v|e(?:mbed)?)\/|\S*?[?&]v=)|youtu\.be\/)([a-zA-Z0-9_-]{11})" match = re.search(regex, url) if match: return match.group(1) return None # Function to get YouTube transcript def get_youtube_transcript(video_url): video_id = extract_video_id(video_url) if not video_id: return "Video ID could not be extracted. Please check the URL format." # Try to get transcript using YouTubeTranscriptApi try: transcript = YouTubeTranscriptApi.get_transcript(video_id) formatter = TextFormatter() text_transcript = formatter.format_transcript(transcript) return text_transcript except Exception as e: error_message = str(e) if "Subtitles are disabled for this video" in error_message: # Try to get subtitles using pytube return get_subtitles_with_pytube(video_url) return f"An error occurred while retrieving the transcript: {error_message}" def get_subtitles_with_pytube(video_url): video_id = extract_video_id(video_url) if not video_id: return "Video ID could not be extracted. Please check the URL format." try: yt = YouTube(video_url) captions = yt.captions.get_by_language_code('en') # You can modify the language code if captions: return captions.generate_srt_captions() return "No subtitles available in the selected language." except Exception as e: return f"An error occurred while retrieving subtitles with pytube: {str(e)}" # Function to summarize YouTube video with selected model def summarize_youtube_video(url, model_name): transcript = get_youtube_transcript(url) if "An error occurred" in transcript: return transcript # Truncate the transcript if necessary max_length = 1024 # Adjust according to the model's maximum sequence length if len(transcript) > max_length: transcript = transcript[:max_length] summarization_pipeline = create_summarization_pipeline(model_name) try: summary = summarization_pipeline(transcript, min_length=10, max_length=1000, do_sample=False) return summary[0]['summary_text'] except Exception as e: return f"An error occurred while summarizing: {str(e)}" # Define the Gradio interface iface = gr.Interface( fn=summarize_youtube_video, inputs=[ gr.Textbox(label="Enter YouTube Video URL", placeholder="e.g. https://www.youtube.com/watch?v=abcdef12345"), gr.Dropdown(choices=list(models.keys()), value=default_model, label="Select Summarization Model") ], outputs=gr.Textbox(label="Video Summary"), title="YouTube Video Summarizer", description="Enter the URL of a YouTube video and select a summarization model to get a summary of its transcript." ) if __name__ == "__main__": iface.launch()