Spaces:
Running
on
L40S
Running
on
L40S
Update app.py
Browse files
app.py
CHANGED
@@ -22,6 +22,7 @@ from decord import VideoReader
|
|
22 |
from PIL import Image
|
23 |
import copy
|
24 |
import cv2
|
|
|
25 |
|
26 |
import importlib
|
27 |
import torch
|
@@ -349,7 +350,7 @@ def test_fn(model, device, iteration, candidate_json_path, test_path, cfg, audio
|
|
349 |
res_motion = []
|
350 |
counter = 0
|
351 |
for path, is_continue in zip(path_list, is_continue_list):
|
352 |
-
if
|
353 |
# time is limited if we create graph on hugging face, lets skip blending.
|
354 |
res_motion_current = path_visualization(
|
355 |
graph, path, is_continue, os.path.join(save_dir, f"audio_{idx}_retri_{counter}.mp4"), audio_path=audio_path, return_motion=True, verbose_continue=True
|
@@ -481,7 +482,7 @@ def save_first_10_seconds(video_path, output_path="./save_video.mp4", max_length
|
|
481 |
new_width = int(original_width * (max_length / original_height))
|
482 |
|
483 |
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
484 |
-
out = cv2.VideoWriter(output_path, fourcc, fps, (new_width, new_height))
|
485 |
|
486 |
frames_to_save = fps * 20
|
487 |
frame_count = 0
|
@@ -498,6 +499,14 @@ def save_first_10_seconds(video_path, output_path="./save_video.mp4", max_length
|
|
498 |
|
499 |
cap.release()
|
500 |
out.release()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
501 |
|
502 |
|
503 |
character_name_to_yaml = {
|
@@ -510,6 +519,7 @@ character_name_to_yaml = {
|
|
510 |
|
511 |
@spaces.GPU(duration=200)
|
512 |
def tango(audio_path, character_name, seed, create_graph=False, video_folder_path=None):
|
|
|
513 |
cfg = prepare_all("./configs/gradio.yaml")
|
514 |
cfg.seed = seed
|
515 |
seed_everything(cfg.seed)
|
@@ -601,8 +611,8 @@ examples_video = [
|
|
601 |
]
|
602 |
|
603 |
combined_examples = [
|
604 |
-
["./datasets/cached_audio/
|
605 |
-
["./datasets/cached_audio/example_female_voice_9_seconds.wav", "./datasets/cached_audio/101099-00_18_09-00_18_19.mp4", 2024],
|
606 |
]
|
607 |
|
608 |
|
@@ -641,23 +651,29 @@ def make_demo():
|
|
641 |
|
642 |
# Create a gallery with 5 videos
|
643 |
with gr.Row():
|
644 |
-
video1 = gr.Video(value="./datasets/cached_audio/demo1.mp4", label="Demo 0")
|
645 |
-
video2 = gr.Video(value="./datasets/cached_audio/demo2.mp4", label="Demo 1")
|
646 |
-
video3 = gr.Video(value="./datasets/cached_audio/demo3.mp4", label="Demo 2")
|
647 |
-
video4 = gr.Video(value="./datasets/cached_audio/demo4.mp4", label="Demo 3")
|
648 |
-
video5 = gr.Video(value="./datasets/cached_audio/demo5.mp4", label="Demo 4")
|
649 |
with gr.Row():
|
650 |
-
video1 = gr.Video(value="./datasets/cached_audio/demo6.mp4", label="Demo 5")
|
651 |
-
video2 = gr.Video(value="./datasets/cached_audio/demo0.mp4", label="Demo 6")
|
652 |
-
video3 = gr.Video(value="./datasets/cached_audio/demo7.mp4", label="Demo 7")
|
653 |
-
video4 = gr.Video(value="./datasets/cached_audio/demo8.mp4", label="Demo 8")
|
654 |
-
video5 = gr.Video(value="./datasets/cached_audio/demo9.mp4", label="Demo 9")
|
655 |
|
656 |
with gr.Row():
|
657 |
gr.Markdown(
|
658 |
"""
|
659 |
<div style="display: flex; justify-content: center; align-items: center; text-align: center;">
|
660 |
-
This is an open-source project supported by Hugging Face's free
|
|
|
|
|
|
|
|
|
|
|
|
|
661 |
</div>
|
662 |
"""
|
663 |
)
|
@@ -668,13 +684,15 @@ def make_demo():
|
|
668 |
interactive=False,
|
669 |
autoplay=False,
|
670 |
loop=False,
|
671 |
-
show_share_button=True
|
|
|
672 |
with gr.Column(scale=4):
|
673 |
video_output_2 = gr.Video(label="Generated video - 2",
|
674 |
interactive=False,
|
675 |
autoplay=False,
|
676 |
loop=False,
|
677 |
-
show_share_button=True
|
|
|
678 |
with gr.Column(scale=1):
|
679 |
file_output_1 = gr.File(label="Download 3D Motion and Visualize in Blender")
|
680 |
file_output_2 = gr.File(label="Download 3D Motion and Visualize in Blender")
|
@@ -682,8 +700,6 @@ def make_demo():
|
|
682 |
<div style="display: flex; justify-content: center; align-items: center; text-align: left;">
|
683 |
Details of the low-quality mode:
|
684 |
<br>
|
685 |
-
0. for free users, hugging face zero-gpu has quota, if you see "over quota", please try it later, e.g., after 30 mins. for saving your quota, this project is estimated to run around 120~160s. by the following trade-off.
|
686 |
-
<br>
|
687 |
1. lower resolution, video resized as long-side 512 and keep aspect ratio.
|
688 |
<br>
|
689 |
2. subgraph instead of full-graph, causing noticeable "frame jumps".
|
@@ -733,17 +749,16 @@ def make_demo():
|
|
733 |
outputs=[video_output_1, video_output_2, file_output_1, file_output_2]
|
734 |
)
|
735 |
|
736 |
-
|
737 |
-
|
738 |
-
|
739 |
-
|
740 |
-
|
741 |
-
|
742 |
-
|
743 |
-
|
744 |
-
|
745 |
-
|
746 |
-
# )
|
747 |
|
748 |
return Interface
|
749 |
|
@@ -752,4 +767,4 @@ if __name__ == "__main__":
|
|
752 |
os.environ["MASTER_PORT"]='8675'
|
753 |
|
754 |
demo = make_demo()
|
755 |
-
demo.launch(share=True)
|
|
|
22 |
from PIL import Image
|
23 |
import copy
|
24 |
import cv2
|
25 |
+
import subprocess
|
26 |
|
27 |
import importlib
|
28 |
import torch
|
|
|
350 |
res_motion = []
|
351 |
counter = 0
|
352 |
for path, is_continue in zip(path_list, is_continue_list):
|
353 |
+
if False:
|
354 |
# time is limited if we create graph on hugging face, lets skip blending.
|
355 |
res_motion_current = path_visualization(
|
356 |
graph, path, is_continue, os.path.join(save_dir, f"audio_{idx}_retri_{counter}.mp4"), audio_path=audio_path, return_motion=True, verbose_continue=True
|
|
|
482 |
new_width = int(original_width * (max_length / original_height))
|
483 |
|
484 |
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
485 |
+
out = cv2.VideoWriter(output_path.replace(".mp4", "_fps.mp4"), fourcc, fps, (new_width, new_height))
|
486 |
|
487 |
frames_to_save = fps * 20
|
488 |
frame_count = 0
|
|
|
499 |
|
500 |
cap.release()
|
501 |
out.release()
|
502 |
+
command = [
|
503 |
+
'ffmpeg',
|
504 |
+
'-i', output_path.replace(".mp4", "_fps.mp4"),
|
505 |
+
'-vf', 'minterpolate=fps=30:mi_mode=mci:mc_mode=aobmc:vsbmc=1',
|
506 |
+
output_path
|
507 |
+
]
|
508 |
+
subprocess.run(command)
|
509 |
+
os.remove(output_path.replace(".mp4", "_fps.mp4"))
|
510 |
|
511 |
|
512 |
character_name_to_yaml = {
|
|
|
519 |
|
520 |
@spaces.GPU(duration=200)
|
521 |
def tango(audio_path, character_name, seed, create_graph=False, video_folder_path=None):
|
522 |
+
os.system("rm -r ./outputs/")
|
523 |
cfg = prepare_all("./configs/gradio.yaml")
|
524 |
cfg.seed = seed
|
525 |
seed_everything(cfg.seed)
|
|
|
611 |
]
|
612 |
|
613 |
combined_examples = [
|
614 |
+
["./datasets/cached_audio/example_female_voice_9_seconds.wav", "./datasets/cached_audio/female_test_V1.mp4", 2024],
|
615 |
+
# ["./datasets/cached_audio/example_female_voice_9_seconds.wav", "./datasets/cached_audio/101099-00_18_09-00_18_19.mp4", 2024],
|
616 |
]
|
617 |
|
618 |
|
|
|
651 |
|
652 |
# Create a gallery with 5 videos
|
653 |
with gr.Row():
|
654 |
+
video1 = gr.Video(value="./datasets/cached_audio/demo1.mp4", label="Demo 0", watermark="./datasets/watermark.png")
|
655 |
+
video2 = gr.Video(value="./datasets/cached_audio/demo2.mp4", label="Demo 1", watermark="./datasets/watermark.png")
|
656 |
+
video3 = gr.Video(value="./datasets/cached_audio/demo3.mp4", label="Demo 2", watermark="./datasets/watermark.png")
|
657 |
+
video4 = gr.Video(value="./datasets/cached_audio/demo4.mp4", label="Demo 3", watermark="./datasets/watermark.png")
|
658 |
+
video5 = gr.Video(value="./datasets/cached_audio/demo5.mp4", label="Demo 4", watermark="./datasets/watermark.png")
|
659 |
with gr.Row():
|
660 |
+
video1 = gr.Video(value="./datasets/cached_audio/demo6.mp4", label="Demo 5", watermark="./datasets/watermark.png")
|
661 |
+
video2 = gr.Video(value="./datasets/cached_audio/demo0.mp4", label="Demo 6", watermark="./datasets/watermark.png")
|
662 |
+
video3 = gr.Video(value="./datasets/cached_audio/demo7.mp4", label="Demo 7", watermark="./datasets/watermark.png")
|
663 |
+
video4 = gr.Video(value="./datasets/cached_audio/demo8.mp4", label="Demo 8", watermark="./datasets/watermark.png")
|
664 |
+
video5 = gr.Video(value="./datasets/cached_audio/demo9.mp4", label="Demo 9", watermark="./datasets/watermark.png")
|
665 |
|
666 |
with gr.Row():
|
667 |
gr.Markdown(
|
668 |
"""
|
669 |
<div style="display: flex; justify-content: center; align-items: center; text-align: center;">
|
670 |
+
This is an open-source project supported by Hugging Face's free L40S GPU. Runtime is limited, so it operates in low-quality mode. Some generated results from high-quality mode are shown above.
|
671 |
+
<br>
|
672 |
+
News:
|
673 |
+
<br>
|
674 |
+
[10/15]: Add watermark, fix bugs on custom character by downgrades to py3.9
|
675 |
+
<br>
|
676 |
+
[10/14]: Hugging face supports free L40S GPU for this project now!
|
677 |
</div>
|
678 |
"""
|
679 |
)
|
|
|
684 |
interactive=False,
|
685 |
autoplay=False,
|
686 |
loop=False,
|
687 |
+
show_share_button=True,
|
688 |
+
watermark="./datasets/watermark.png")
|
689 |
with gr.Column(scale=4):
|
690 |
video_output_2 = gr.Video(label="Generated video - 2",
|
691 |
interactive=False,
|
692 |
autoplay=False,
|
693 |
loop=False,
|
694 |
+
show_share_button=True,
|
695 |
+
watermark="./datasets/watermark.png")
|
696 |
with gr.Column(scale=1):
|
697 |
file_output_1 = gr.File(label="Download 3D Motion and Visualize in Blender")
|
698 |
file_output_2 = gr.File(label="Download 3D Motion and Visualize in Blender")
|
|
|
700 |
<div style="display: flex; justify-content: center; align-items: center; text-align: left;">
|
701 |
Details of the low-quality mode:
|
702 |
<br>
|
|
|
|
|
703 |
1. lower resolution, video resized as long-side 512 and keep aspect ratio.
|
704 |
<br>
|
705 |
2. subgraph instead of full-graph, causing noticeable "frame jumps".
|
|
|
749 |
outputs=[video_output_1, video_output_2, file_output_1, file_output_2]
|
750 |
)
|
751 |
|
752 |
+
with gr.Row():
|
753 |
+
with gr.Column(scale=4):
|
754 |
+
gr.Examples(
|
755 |
+
examples=combined_examples,
|
756 |
+
inputs=[audio_input, video_input, seed_input], # Both audio and video as inputs
|
757 |
+
outputs=[video_output_1, video_output_2, file_output_1, file_output_2],
|
758 |
+
fn=tango, # Function that processes both audio and video inputs
|
759 |
+
label="Select Combined Audio and Video Examples (Cached)",
|
760 |
+
cache_examples=True
|
761 |
+
)
|
|
|
762 |
|
763 |
return Interface
|
764 |
|
|
|
767 |
os.environ["MASTER_PORT"]='8675'
|
768 |
|
769 |
demo = make_demo()
|
770 |
+
demo.launch(share=True)
|