Spaces:

HarshanaLF
/

TubeSummary

Runtime error

App Files Files Community

TubeSummary / app.py

HarshanaLF

Update app.py

ed408a8 verified 3 months ago

raw

history blame

3.59 kB

	import gradio as gr
	from transformers import pipeline
	from youtube_transcript_api import YouTubeTranscriptApi
	from youtube_transcript_api.formatters import TextFormatter
	import re
	from pytube import YouTube

	# Define the models
	models = {
	"Falconsai/text_summarization": "Falconsai/text_summarization",
	"suriya7/bart-finetuned-text-summarization": "suriya7/bart-finetuned-text-summarization"
	}

	# Default model
	default_model = "Falconsai/text_summarization"

	# Function to create a summarization pipeline
	def create_summarization_pipeline(model_name):
	return pipeline("summarization", model=model_name)

	# Function to extract video ID from URL
	def extract_video_id(url):
	regex = r"(?:youtube\.com\/(?:[^\/\n\s]+\/\S+\/\|(?:v\|e(?:mbed)?)\/\|\S*?[?&]v=)\|youtu\.be\/)([a-zA-Z0-9_-]{11})"
	match = re.search(regex, url)
	if match:
	return match.group(1)
	return None

	# Function to get YouTube transcript
	def get_youtube_transcript(video_url):
	video_id = extract_video_id(video_url)
	if not video_id:
	return "Video ID could not be extracted. Please check the URL format."

	# Try to get transcript using YouTubeTranscriptApi
	try:
	transcript = YouTubeTranscriptApi.get_transcript(video_id)
	formatter = TextFormatter()
	text_transcript = formatter.format_transcript(transcript)
	return text_transcript
	except Exception as e:
	error_message = str(e)
	if "Subtitles are disabled for this video" in error_message:
	# Try to get subtitles using pytube
	return get_subtitles_with_pytube(video_url)
	return f"An error occurred while retrieving the transcript: {error_message}"

	def get_subtitles_with_pytube(video_url):
	video_id = extract_video_id(video_url)
	if not video_id:
	return "Video ID could not be extracted. Please check the URL format."

	try:
	yt = YouTube(video_url)
	captions = yt.captions.get_by_language_code('en') # You can modify the language code
	if captions:
	return captions.generate_srt_captions()
	return "No subtitles available in the selected language."
	except Exception as e:
	return f"An error occurred while retrieving subtitles with pytube: {str(e)}"

	# Function to summarize YouTube video with selected model
	def summarize_youtube_video(url, model_name):
	transcript = get_youtube_transcript(url)
	if "An error occurred" in transcript:
	return transcript

	# Truncate the transcript if necessary
	max_length = 1024 # Adjust according to the model's maximum sequence length
	if len(transcript) > max_length:
	transcript = transcript[:max_length]

	summarization_pipeline = create_summarization_pipeline(model_name)
	try:
	summary = summarization_pipeline(transcript, min_length=10, max_length=1000, do_sample=False)
	return summary[0]['summary_text']
	except Exception as e:
	return f"An error occurred while summarizing: {str(e)}"

	# Define the Gradio interface
	iface = gr.Interface(
	fn=summarize_youtube_video,
	inputs=[
	gr.Textbox(label="Enter YouTube Video URL", placeholder="e.g. https://www.youtube.com/watch?v=abcdef12345"),
	gr.Dropdown(choices=list(models.keys()), value=default_model, label="Select Summarization Model")
	],
	outputs=gr.Textbox(label="Video Summary"),
	title="YouTube Video Summarizer",
	description="Enter the URL of a YouTube video and select a summarization model to get a summary of its transcript."
	)

	if __name__ == "__main__":
	iface.launch()