Spaces:
Running
on
Zero
Running
on
Zero
artificialguybr
commited on
Commit
•
80b43a8
1
Parent(s):
e7c3f3f
Update app.py
Browse files
app.py
CHANGED
@@ -2,6 +2,7 @@ import tempfile
|
|
2 |
import gradio as gr
|
3 |
import subprocess
|
4 |
import os, stat
|
|
|
5 |
from googletrans import Translator
|
6 |
from TTS.api import TTS
|
7 |
import ffmpeg
|
@@ -13,8 +14,6 @@ import numpy as np
|
|
13 |
import librosa
|
14 |
from zipfile import ZipFile
|
15 |
import shlex
|
16 |
-
import librosa
|
17 |
-
import numpy as np
|
18 |
import cv2
|
19 |
import torch
|
20 |
import torchvision
|
@@ -28,39 +27,46 @@ st = os.stat('ffmpeg')
|
|
28 |
os.chmod('ffmpeg', st.st_mode | stat.S_IEXEC)
|
29 |
|
30 |
def process_video(video, high_quality, target_language):
|
31 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
32 |
if high_quality:
|
33 |
ffmpeg.input(video).output(output_filename, vf='scale=-1:720').run()
|
34 |
video_path = output_filename
|
35 |
else:
|
36 |
video_path = video
|
37 |
|
38 |
-
# Debugging Step 1: Check if video_path exists
|
39 |
if not os.path.exists(video_path):
|
40 |
return f"Error: {video_path} does not exist."
|
41 |
|
42 |
-
ffmpeg.input(video_path).output(
|
43 |
|
44 |
-
y, sr = sf.read("
|
45 |
y = y.astype(np.float32)
|
46 |
y_denoised = wiener(y)
|
47 |
-
sf.write("
|
48 |
|
49 |
-
sound = AudioSegment.from_file("
|
50 |
-
sound = sound.apply_gain(0)
|
51 |
sound = sound.low_pass_filter(3000).high_pass_filter(100)
|
52 |
-
sound.export("
|
53 |
|
54 |
-
shell_command = f"ffmpeg -y -i
|
55 |
subprocess.run([item for item in shell_command], capture_output=False, text=True, check=True)
|
56 |
|
57 |
model = whisper.load_model("base")
|
58 |
-
result = model.transcribe("
|
59 |
whisper_text = result["text"]
|
60 |
whisper_language = result['language']
|
61 |
-
|
62 |
print(whisper_text)
|
63 |
-
|
64 |
language_mapping = {'English': 'en', 'Spanish': 'es', 'French': 'fr', 'German': 'de', 'Italian': 'it', 'Portuguese': 'pt', 'Polish': 'pl', 'Turkish': 'tr', 'Russian': 'ru', 'Dutch': 'nl', 'Czech': 'cs', 'Arabic': 'ar', 'Chinese (Simplified)': 'zh-cn'}
|
65 |
target_language_code = language_mapping[target_language]
|
66 |
translator = Translator()
|
@@ -71,11 +77,9 @@ def process_video(video, high_quality, target_language):
|
|
71 |
print("Failed to translate text. Likely an issue with token extraction in the Google Translate API.")
|
72 |
translated_text = "Translation failed due to API issue."
|
73 |
|
74 |
-
|
75 |
-
|
76 |
tts = TTS("tts_models/multilingual/multi-dataset/xtts_v1")
|
77 |
-
tts.to('cuda')
|
78 |
-
tts.tts_to_file(translated_text, speaker_wav=
|
79 |
|
80 |
pad_top = 0
|
81 |
pad_bottom = 15
|
@@ -83,15 +87,33 @@ def process_video(video, high_quality, target_language):
|
|
83 |
pad_right = 0
|
84 |
rescaleFactor = 1
|
85 |
|
86 |
-
# Debugging Step 2: Remove quotes around the video path
|
87 |
video_path_fix = video_path
|
88 |
|
89 |
-
cmd = f"python Wav2Lip/inference.py --checkpoint_path 'Wav2Lip/checkpoints/wav2lip_gan.pth' --face {shlex.quote(video_path_fix)} --audio '
|
90 |
subprocess.run(cmd, shell=True)
|
91 |
-
|
92 |
-
if not os.path.exists("
|
93 |
-
raise FileNotFoundError("Error:
|
94 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
95 |
|
96 |
iface = gr.Interface(
|
97 |
fn=process_video,
|
@@ -100,7 +122,7 @@ iface = gr.Interface(
|
|
100 |
gr.inputs.Checkbox(label="High Quality"),
|
101 |
gr.inputs.Dropdown(choices=["English", "Spanish", "French", "German", "Italian", "Portuguese", "Polish", "Turkish", "Russian", "Dutch", "Czech", "Arabic", "Chinese (Simplified)"], label="Target Language for Dubbing")
|
102 |
],
|
103 |
-
outputs=gr.outputs.
|
104 |
live=False
|
105 |
)
|
106 |
|
|
|
2 |
import gradio as gr
|
3 |
import subprocess
|
4 |
import os, stat
|
5 |
+
import uuid
|
6 |
from googletrans import Translator
|
7 |
from TTS.api import TTS
|
8 |
import ffmpeg
|
|
|
14 |
import librosa
|
15 |
from zipfile import ZipFile
|
16 |
import shlex
|
|
|
|
|
17 |
import cv2
|
18 |
import torch
|
19 |
import torchvision
|
|
|
27 |
os.chmod('ffmpeg', st.st_mode | stat.S_IEXEC)
|
28 |
|
29 |
def process_video(video, high_quality, target_language):
|
30 |
+
# Check video duration
|
31 |
+
video_info = ffmpeg.probe(video)
|
32 |
+
video_duration = float(video_info['streams'][0]['duration'])
|
33 |
+
if video_duration > 90:
|
34 |
+
return gr.Interface.Warnings("Video duration exceeds 1 minute and 30 seconds. Please upload a shorter video.")
|
35 |
+
|
36 |
+
run_uuid = uuid.uuid4().hex[:6]
|
37 |
+
output_filename = f"{run_uuid}_resized_video.mp4"
|
38 |
+
|
39 |
if high_quality:
|
40 |
ffmpeg.input(video).output(output_filename, vf='scale=-1:720').run()
|
41 |
video_path = output_filename
|
42 |
else:
|
43 |
video_path = video
|
44 |
|
|
|
45 |
if not os.path.exists(video_path):
|
46 |
return f"Error: {video_path} does not exist."
|
47 |
|
48 |
+
ffmpeg.input(video_path).output(f"{run_uuid}_output_audio.wav", acodec='pcm_s24le', ar=48000, map='a').run()
|
49 |
|
50 |
+
y, sr = sf.read(f"{run_uuid}_output_audio.wav")
|
51 |
y = y.astype(np.float32)
|
52 |
y_denoised = wiener(y)
|
53 |
+
sf.write(f"{run_uuid}_output_audio_denoised.wav", y_denoised, sr)
|
54 |
|
55 |
+
sound = AudioSegment.from_file(f"{run_uuid}_output_audio_denoised.wav", format="wav")
|
56 |
+
sound = sound.apply_gain(0)
|
57 |
sound = sound.low_pass_filter(3000).high_pass_filter(100)
|
58 |
+
sound.export(f"{run_uuid}_output_audio_processed.wav", format="wav")
|
59 |
|
60 |
+
shell_command = f"ffmpeg -y -i {run_uuid}_output_audio_processed.wav -af lowpass=3000,highpass=100 {run_uuid}_output_audio_final.wav".split(" ")
|
61 |
subprocess.run([item for item in shell_command], capture_output=False, text=True, check=True)
|
62 |
|
63 |
model = whisper.load_model("base")
|
64 |
+
result = model.transcribe(f"{run_uuid}_output_audio_final.wav")
|
65 |
whisper_text = result["text"]
|
66 |
whisper_language = result['language']
|
67 |
+
|
68 |
print(whisper_text)
|
69 |
+
|
70 |
language_mapping = {'English': 'en', 'Spanish': 'es', 'French': 'fr', 'German': 'de', 'Italian': 'it', 'Portuguese': 'pt', 'Polish': 'pl', 'Turkish': 'tr', 'Russian': 'ru', 'Dutch': 'nl', 'Czech': 'cs', 'Arabic': 'ar', 'Chinese (Simplified)': 'zh-cn'}
|
71 |
target_language_code = language_mapping[target_language]
|
72 |
translator = Translator()
|
|
|
77 |
print("Failed to translate text. Likely an issue with token extraction in the Google Translate API.")
|
78 |
translated_text = "Translation failed due to API issue."
|
79 |
|
|
|
|
|
80 |
tts = TTS("tts_models/multilingual/multi-dataset/xtts_v1")
|
81 |
+
tts.to('cuda')
|
82 |
+
tts.tts_to_file(translated_text, speaker_wav=f"{run_uuid}_output_audio_final.wav", file_path=f"{run_uuid}_output_synth.wav", language=target_language_code)
|
83 |
|
84 |
pad_top = 0
|
85 |
pad_bottom = 15
|
|
|
87 |
pad_right = 0
|
88 |
rescaleFactor = 1
|
89 |
|
|
|
90 |
video_path_fix = video_path
|
91 |
|
92 |
+
cmd = f"python Wav2Lip/inference.py --checkpoint_path 'Wav2Lip/checkpoints/wav2lip_gan.pth' --face {shlex.quote(video_path_fix)} --audio '{run_uuid}_output_synth.wav' --pads {pad_top} {pad_bottom} {pad_left} {pad_right} --resize_factor {rescaleFactor} --nosmooth --outfile '{run_uuid}_output_video.mp4'"
|
93 |
subprocess.run(cmd, shell=True)
|
94 |
+
|
95 |
+
if not os.path.exists(f"{run_uuid}_output_video.mp4"):
|
96 |
+
raise FileNotFoundError(f"Error: {run_uuid}_output_video.mp4 was not generated.")
|
97 |
+
|
98 |
+
output_video_path = f"{run_uuid}_output_video.mp4"
|
99 |
+
|
100 |
+
# Cleanup: Delete all generated files except the final output video
|
101 |
+
files_to_delete = [
|
102 |
+
f"{run_uuid}_resized_video.mp4",
|
103 |
+
f"{run_uuid}_output_audio.wav",
|
104 |
+
f"{run_uuid}_output_audio_denoised.wav",
|
105 |
+
f"{run_uuid}_output_audio_processed.wav",
|
106 |
+
f"{run_uuid}_output_audio_final.wav",
|
107 |
+
f"{run_uuid}_output_synth.wav"
|
108 |
+
]
|
109 |
+
|
110 |
+
for file in files_to_delete:
|
111 |
+
try:
|
112 |
+
os.remove(file)
|
113 |
+
except FileNotFoundError:
|
114 |
+
print(f"File {file} not found for deletion.")
|
115 |
+
|
116 |
+
return output_video_path
|
117 |
|
118 |
iface = gr.Interface(
|
119 |
fn=process_video,
|
|
|
122 |
gr.inputs.Checkbox(label="High Quality"),
|
123 |
gr.inputs.Dropdown(choices=["English", "Spanish", "French", "German", "Italian", "Portuguese", "Polish", "Turkish", "Russian", "Dutch", "Czech", "Arabic", "Chinese (Simplified)"], label="Target Language for Dubbing")
|
124 |
],
|
125 |
+
outputs=gr.outputs.Video(),
|
126 |
live=False
|
127 |
)
|
128 |
|