File size: 5,848 Bytes
eeb01ec
b597f21
 
 
495e2d9
 
cfd5440
d728ac6
 
 
eeb01ec
b597f21
 
 
d728ac6
 
 
b597f21
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d728ac6
b597f21
cfd5440
 
b597f21
 
 
 
 
cfd5440
 
b597f21
 
 
 
 
 
 
d728ac6
cfd5440
 
b597f21
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
495e2d9
 
 
 
 
 
 
b597f21
d728ac6
 
 
 
 
 
 
b597f21
 
 
 
 
 
 
 
 
 
 
 
 
cfd5440
 
 
 
 
b597f21
 
 
cfd5440
b597f21
d728ac6
eeb01ec
9959a9b
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
import gradio as gr
import pyperclip
import urllib.parse as urlparse
from pytube import YouTube
import re
import subprocess
from lang_list import ORIGINAL_LANGUAGE_NAME_TO_CODE, S2ST_TARGET_ORIGINAL_LANGUAGE_NAMES
import torch
from seamless_communication.models.inference import Translator
import time

YOUTUBE = "youtube"
TWITCH = "twitch"

# Initialize a Translator object with a multitask model, vocoder on the GPU.
# translator = Translator("seamlessM4T_large", vocoder_name_or_card="vocoder_36langs", device=torch.device("cuda:0"))

def copy_url_from_clipboard():
    return pyperclip.paste()

def clear_video_url():
    return ""

def get_youtube_thumbnail(video_id):
    thumbnail_url = f"https://img.youtube.com/vi/{video_id}/0.jpg"
    return thumbnail_url

def get_youtube_video_id(url):
    parsed_url = urlparse.urlparse(url)
    video_id = urlparse.parse_qs(parsed_url.query).get('v')
    if video_id:
        thumbnail_url = get_youtube_thumbnail(video_id[0])
        return thumbnail_url
    else:
        return None

def is_valid_url(url):
    button = gr.Button(size="sm", value="translate", min_width="10px", scale=0, visible=True)
    original_audio = gr.Audio(label="Original audio", elem_id="original_audio", visible=True, interactive=False)
    translated_audio = gr.Audio(label="Translated audio", elem_id="translated_audio", visible=True)
    source_languaje = gr.Dropdown(visible=True, label="Source languaje", show_label=True, value="English", choices=ORIGINAL_LANGUAGE_NAME_TO_CODE, scale=1, interactive=True)
    target_languaje = gr.Dropdown(visible=True, label="Target languaje", show_label=True, value="Español", choices=S2ST_TARGET_ORIGINAL_LANGUAGE_NAMES, scale=1, interactive=True)
    if "youtube" in url.lower() or "youtu.be" in url.lower():
        thumbnail = get_youtube_video_id(url)
        if thumbnail:
            return (
                gr.Image(value=thumbnail, visible=True, show_download_button=False, container=False), 
                source_languaje,
                target_languaje,
                button, 
                gr.Textbox(value=YOUTUBE, label="Stream page", elem_id="stream_page", visible=False),
                original_audio,
                translated_audio,
                )
    elif "twitch" in url.lower() or "twitch.tv" in url.lower():
        return (
            gr.Image(value="assets/twitch.webp", visible=True, show_download_button=False, container=False), 
            source_languaje,
            target_languaje,
            button, 
            gr.Textbox(value=TWITCH, label="Stream page", elem_id="stream_page", visible=False),
            original_audio,
            translated_audio,
            )

def get_audio_from_video(url, stream_page):
    if stream_page == YOUTUBE:
        yt = YouTube(url)
        audio_streams = yt.streams.filter(mime_type="audio/mp4")

        # Get all available audio bitrates
        abr_list = []
        for stream in audio_streams:
            abr_list.append(stream.abr)
        abr_list = sorted(set(abr_list))

        # Get the highest audio bitrate
        audio_stream = audio_streams.filter(abr=abr_list[0]).first()

        # Download the audio
        audio_stream.download(filename="audio.mp3")

        return gr.Audio("audio.mp3", label="Original audio", elem_id="original_audio", visible=True)
    elif stream_page == TWITCH:
        # Get the video id
        video_id = re.search("\d{10}", url).group(0)

        # Download the video
        subprocess.run(["twitch-dl", "download", "--overwrite", "-q", "audio_only", "--output", "audio.mkv", video_id])

        return gr.Audio("audio.mkv", label="Original audio", elem_id="original_audio", visible=True)

# def translate_audio(input_audio, target_languaje):
#     print("Translating audio...")
#     time.sleep(5)
#     print("Translating audio...")
#     _, wav, _ = translator.predict(input_audio, "s2st", target_languaje)
#     return gr.Audio(wav, label="Translated audio", elem_id="translated_audio", visible=True)


with gr.Blocks() as demo:
    with gr.Row(variant="panel"):
        url_textbox = gr.Textbox(placeholder="Add video URL here", label="Video URL", elem_id="video_url", scale=1, interactive=True)
        copy_button   = gr.Button(size="sm", icon="icons/copy.svg",   value="", min_width="10px", scale=0)
        delete_button = gr.Button(size="sm", icon="icons/delete.svg", value="", min_width="10px", scale=0)
    copy_button.click(fn=copy_url_from_clipboard, outputs=url_textbox)
    delete_button.click(fn=clear_video_url, outputs=url_textbox)

    stream_page = gr.Textbox(label="Stream page", elem_id="stream_page", visible=False)
    visible = False
    with gr.Row(equal_height=False):
        image = gr.Image(visible=visible, scale=1)
        with gr.Column():
            with gr.Row():
                source_languaje = gr.Dropdown(visible=visible, label="Source languaje", choices=ORIGINAL_LANGUAGE_NAME_TO_CODE, scale=1, interactive=True)
                target_languaje = gr.Dropdown(visible=visible, label="Target languaje", choices=S2ST_TARGET_ORIGINAL_LANGUAGE_NAMES, scale=1, interactive=True)
            translate_button = gr.Button(size="lg", value="translate", min_width="10px", visible=visible)

    original_audio = gr.Audio(label="Original audio", elem_id="original_audio", visible=visible)
    translated_audio = gr.Audio(label="Translated audio", elem_id="translated_audio", visible=visible)
    url_textbox.change(fn=is_valid_url, inputs=url_textbox, outputs=[image, source_languaje, target_languaje, translate_button, stream_page, original_audio, translated_audio])
    translate_button.click(fn=get_audio_from_video, inputs=[url_textbox, stream_page], outputs=original_audio)
    # original_audio.change(fn=translate_audio, inputs=[original_audio, target_languaje], outputs=translated_audio)

demo.launch()