File size: 3,594 Bytes
fd3d3dd
 
 
 
 
60a521e
fd3d3dd
ff1d636
 
 
 
 
fd3d3dd
ff1d636
 
 
 
 
 
 
 
fd3d3dd
 
 
 
 
 
 
ff1d636
fd3d3dd
 
 
fdf75ae
 
60a521e
fd3d3dd
 
 
 
 
 
fdf75ae
 
60a521e
 
fdf75ae
fd3d3dd
60a521e
 
 
 
 
 
 
 
 
 
 
 
 
 
ff1d636
 
fd3d3dd
 
 
ac54f8d
 
 
 
 
 
ff1d636
ed408a8
 
 
 
 
fd3d3dd
 
 
 
ff1d636
 
 
 
fd3d3dd
 
ff1d636
fd3d3dd
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
import gradio as gr
from transformers import pipeline
from youtube_transcript_api import YouTubeTranscriptApi
from youtube_transcript_api.formatters import TextFormatter
import re
from pytube import YouTube

# Define the models
models = {
    "Falconsai/text_summarization": "Falconsai/text_summarization",
    "suriya7/bart-finetuned-text-summarization": "suriya7/bart-finetuned-text-summarization"
}

# Default model
default_model = "Falconsai/text_summarization"

# Function to create a summarization pipeline
def create_summarization_pipeline(model_name):
    return pipeline("summarization", model=model_name)

# Function to extract video ID from URL
def extract_video_id(url):
    regex = r"(?:youtube\.com\/(?:[^\/\n\s]+\/\S+\/|(?:v|e(?:mbed)?)\/|\S*?[?&]v=)|youtu\.be\/)([a-zA-Z0-9_-]{11})"
    match = re.search(regex, url)
    if match:
        return match.group(1)
    return None

# Function to get YouTube transcript
def get_youtube_transcript(video_url):
    video_id = extract_video_id(video_url)
    if not video_id:
        return "Video ID could not be extracted. Please check the URL format."

    # Try to get transcript using YouTubeTranscriptApi
    try:
        transcript = YouTubeTranscriptApi.get_transcript(video_id)
        formatter = TextFormatter()
        text_transcript = formatter.format_transcript(transcript)
        return text_transcript
    except Exception as e:
        error_message = str(e)
        if "Subtitles are disabled for this video" in error_message:
            # Try to get subtitles using pytube
            return get_subtitles_with_pytube(video_url)
        return f"An error occurred while retrieving the transcript: {error_message}"

def get_subtitles_with_pytube(video_url):
    video_id = extract_video_id(video_url)
    if not video_id:
        return "Video ID could not be extracted. Please check the URL format."
    
    try:
        yt = YouTube(video_url)
        captions = yt.captions.get_by_language_code('en')  # You can modify the language code
        if captions:
            return captions.generate_srt_captions()
        return "No subtitles available in the selected language."
    except Exception as e:
        return f"An error occurred while retrieving subtitles with pytube: {str(e)}"

# Function to summarize YouTube video with selected model
def summarize_youtube_video(url, model_name):
    transcript = get_youtube_transcript(url)
    if "An error occurred" in transcript:
        return transcript

    # Truncate the transcript if necessary
    max_length = 1024  # Adjust according to the model's maximum sequence length
    if len(transcript) > max_length:
        transcript = transcript[:max_length]

    summarization_pipeline = create_summarization_pipeline(model_name)
    try:
        summary = summarization_pipeline(transcript, min_length=10, max_length=1000, do_sample=False)
        return summary[0]['summary_text']
    except Exception as e:
        return f"An error occurred while summarizing: {str(e)}"

# Define the Gradio interface
iface = gr.Interface(
    fn=summarize_youtube_video,
    inputs=[
        gr.Textbox(label="Enter YouTube Video URL", placeholder="e.g. https://www.youtube.com/watch?v=abcdef12345"),
        gr.Dropdown(choices=list(models.keys()), value=default_model, label="Select Summarization Model")
    ],
    outputs=gr.Textbox(label="Video Summary"),
    title="YouTube Video Summarizer",
    description="Enter the URL of a YouTube video and select a summarization model to get a summary of its transcript."
)

if __name__ == "__main__":
    iface.launch()