translatube / translatube.py
Maximofn's picture
Pre translate audio. Code is writen but fails, so that i commented it
d728ac6
raw
history blame
No virus
5.85 kB
import gradio as gr
import pyperclip
import urllib.parse as urlparse
from pytube import YouTube
import re
import subprocess
from lang_list import ORIGINAL_LANGUAGE_NAME_TO_CODE, S2ST_TARGET_ORIGINAL_LANGUAGE_NAMES
import torch
from seamless_communication.models.inference import Translator
import time
YOUTUBE = "youtube"
TWITCH = "twitch"
# Initialize a Translator object with a multitask model, vocoder on the GPU.
# translator = Translator("seamlessM4T_large", vocoder_name_or_card="vocoder_36langs", device=torch.device("cuda:0"))
def copy_url_from_clipboard():
return pyperclip.paste()
def clear_video_url():
return ""
def get_youtube_thumbnail(video_id):
thumbnail_url = f"https://img.youtube.com/vi/{video_id}/0.jpg"
return thumbnail_url
def get_youtube_video_id(url):
parsed_url = urlparse.urlparse(url)
video_id = urlparse.parse_qs(parsed_url.query).get('v')
if video_id:
thumbnail_url = get_youtube_thumbnail(video_id[0])
return thumbnail_url
else:
return None
def is_valid_url(url):
button = gr.Button(size="sm", value="translate", min_width="10px", scale=0, visible=True)
original_audio = gr.Audio(label="Original audio", elem_id="original_audio", visible=True, interactive=False)
translated_audio = gr.Audio(label="Translated audio", elem_id="translated_audio", visible=True)
source_languaje = gr.Dropdown(visible=True, label="Source languaje", show_label=True, value="English", choices=ORIGINAL_LANGUAGE_NAME_TO_CODE, scale=1, interactive=True)
target_languaje = gr.Dropdown(visible=True, label="Target languaje", show_label=True, value="Español", choices=S2ST_TARGET_ORIGINAL_LANGUAGE_NAMES, scale=1, interactive=True)
if "youtube" in url.lower() or "youtu.be" in url.lower():
thumbnail = get_youtube_video_id(url)
if thumbnail:
return (
gr.Image(value=thumbnail, visible=True, show_download_button=False, container=False),
source_languaje,
target_languaje,
button,
gr.Textbox(value=YOUTUBE, label="Stream page", elem_id="stream_page", visible=False),
original_audio,
translated_audio,
)
elif "twitch" in url.lower() or "twitch.tv" in url.lower():
return (
gr.Image(value="assets/twitch.webp", visible=True, show_download_button=False, container=False),
source_languaje,
target_languaje,
button,
gr.Textbox(value=TWITCH, label="Stream page", elem_id="stream_page", visible=False),
original_audio,
translated_audio,
)
def get_audio_from_video(url, stream_page):
if stream_page == YOUTUBE:
yt = YouTube(url)
audio_streams = yt.streams.filter(mime_type="audio/mp4")
# Get all available audio bitrates
abr_list = []
for stream in audio_streams:
abr_list.append(stream.abr)
abr_list = sorted(set(abr_list))
# Get the highest audio bitrate
audio_stream = audio_streams.filter(abr=abr_list[0]).first()
# Download the audio
audio_stream.download(filename="audio.mp3")
return gr.Audio("audio.mp3", label="Original audio", elem_id="original_audio", visible=True)
elif stream_page == TWITCH:
# Get the video id
video_id = re.search("\d{10}", url).group(0)
# Download the video
subprocess.run(["twitch-dl", "download", "--overwrite", "-q", "audio_only", "--output", "audio.mkv", video_id])
return gr.Audio("audio.mkv", label="Original audio", elem_id="original_audio", visible=True)
# def translate_audio(input_audio, target_languaje):
# print("Translating audio...")
# time.sleep(5)
# print("Translating audio...")
# _, wav, _ = translator.predict(input_audio, "s2st", target_languaje)
# return gr.Audio(wav, label="Translated audio", elem_id="translated_audio", visible=True)
with gr.Blocks() as demo:
with gr.Row(variant="panel"):
url_textbox = gr.Textbox(placeholder="Add video URL here", label="Video URL", elem_id="video_url", scale=1, interactive=True)
copy_button = gr.Button(size="sm", icon="icons/copy.svg", value="", min_width="10px", scale=0)
delete_button = gr.Button(size="sm", icon="icons/delete.svg", value="", min_width="10px", scale=0)
copy_button.click(fn=copy_url_from_clipboard, outputs=url_textbox)
delete_button.click(fn=clear_video_url, outputs=url_textbox)
stream_page = gr.Textbox(label="Stream page", elem_id="stream_page", visible=False)
visible = False
with gr.Row(equal_height=False):
image = gr.Image(visible=visible, scale=1)
with gr.Column():
with gr.Row():
source_languaje = gr.Dropdown(visible=visible, label="Source languaje", choices=ORIGINAL_LANGUAGE_NAME_TO_CODE, scale=1, interactive=True)
target_languaje = gr.Dropdown(visible=visible, label="Target languaje", choices=S2ST_TARGET_ORIGINAL_LANGUAGE_NAMES, scale=1, interactive=True)
translate_button = gr.Button(size="lg", value="translate", min_width="10px", visible=visible)
original_audio = gr.Audio(label="Original audio", elem_id="original_audio", visible=visible)
translated_audio = gr.Audio(label="Translated audio", elem_id="translated_audio", visible=visible)
url_textbox.change(fn=is_valid_url, inputs=url_textbox, outputs=[image, source_languaje, target_languaje, translate_button, stream_page, original_audio, translated_audio])
translate_button.click(fn=get_audio_from_video, inputs=[url_textbox, stream_page], outputs=original_audio)
# original_audio.change(fn=translate_audio, inputs=[original_audio, target_languaje], outputs=translated_audio)
demo.launch()