Spaces:
Running
on
L40S
Running
on
L40S
Update app.py
Browse files
app.py
CHANGED
@@ -482,32 +482,9 @@ character_name_to_yaml = {
|
|
482 |
"101099-00_18_09-00_18_19.mp4": "./datasets/data_json/show_oliver_test/Stupid_Watergate_-_Last_Week_Tonight_with_John_Oliver_HBO-FVFdsl29s_Q.mkv.json",
|
483 |
}
|
484 |
|
485 |
-
|
486 |
-
|
487 |
-
|
488 |
-
"./emage/smplx_models/",
|
489 |
-
model_type='smplx',
|
490 |
-
gender='NEUTRAL_2020',
|
491 |
-
use_face_contour=False,
|
492 |
-
num_betas=300,
|
493 |
-
num_expression_coeffs=100,
|
494 |
-
ext='npz',
|
495 |
-
use_pca=False,
|
496 |
-
)
|
497 |
-
model = init_class(cfg.model.name_pyfile, cfg.model.class_name, cfg)
|
498 |
-
for param in model.parameters():
|
499 |
-
param.requires_grad = False
|
500 |
-
model.smplx_model = smplx_model
|
501 |
-
model.get_motion_reps = get_motion_reps_tensor
|
502 |
-
|
503 |
-
checkpoint_path = "./datasets/cached_ckpts/ckpt.pth"
|
504 |
-
checkpoint = torch.load(checkpoint_path)
|
505 |
-
state_dict = checkpoint['model_state_dict']
|
506 |
-
# new_state_dict = {k.replace('module.', ''): v for k, v in state_dict.items()}
|
507 |
-
model.load_state_dict(state_dict, strict=False)
|
508 |
-
|
509 |
-
@spaces.GPU(duration=299)
|
510 |
-
def tango(audio_path, character_name, seed, create_graph=False, video_folder_path=None, smplx_model=smplx_model, model=model, cfg=cfg):
|
511 |
cfg.seed = seed
|
512 |
seed_everything(cfg.seed)
|
513 |
experiment_ckpt_dir = experiment_log_dir = os.path.join(cfg.output_dir, cfg.exp_name)
|
@@ -542,13 +519,35 @@ def tango(audio_path, character_name, seed, create_graph=False, video_folder_pat
|
|
542 |
os.system(f"python ./create_graph.py --json_save_path {json_save_path} --graph_save_path {graph_save_path}")
|
543 |
cfg.data.test_meta_paths = json_save_path
|
544 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
545 |
local_rank = 0
|
546 |
torch.cuda.set_device(local_rank)
|
547 |
device = torch.device("cuda", local_rank)
|
548 |
-
|
549 |
smplx_model = smplx_model.to(device).eval()
|
550 |
model = model.to(device)
|
551 |
model.smplx_model = model.smplx_model.to(device)
|
|
|
|
|
|
|
|
|
|
|
|
|
552 |
|
553 |
test_path = os.path.join(experiment_ckpt_dir, f"test_{0}")
|
554 |
os.makedirs(test_path, exist_ok=True)
|
@@ -572,7 +571,11 @@ examples_video = [
|
|
572 |
]
|
573 |
|
574 |
combined_examples = [
|
575 |
-
[
|
|
|
|
|
|
|
|
|
576 |
]
|
577 |
|
578 |
def make_demo():
|
@@ -594,31 +597,39 @@ def make_demo():
|
|
594 |
<a style='font-size:18px;color: #000000' href=''>[Github Repo]</a>\
|
595 |
<a style='font-size:18px;color: #000000' href=''> [ArXiv] </a>\
|
596 |
<a style='font-size:18px;color: #000000' href='https://pantomatrix.github.io/TANGO/'> [Project Page] </a> </div>
|
|
|
|
|
597 |
"""
|
598 |
)
|
599 |
|
600 |
-
gr.Markdown("""
|
601 |
-
<h4 style="text-align: left;">
|
602 |
-
This demo is part of an open-source project supported by Hugging Face's free, zero-GPU runtime. Due to runtime cost considerations, it operates in low-quality mode. Some high-quality videos are shown below.
|
603 |
|
604 |
-
Details of the low-quality mode:
|
605 |
-
1. Lower resolution.
|
606 |
-
2. More discontinuous frames (causing noticeable "frame jumps").
|
607 |
-
3. Utilizes open-source tools like SMPLerX-s-model, Wav2Lip, and FiLM for faster processing.
|
608 |
-
4. Accepts audio input of up to 8 seconds. If your input exceeds 8 seconds, only the first 8 seconds will be used.
|
609 |
-
5. You can provide a custom background video for your character, but it is limited to 20 seconds.
|
610 |
|
611 |
-
Feel free to open an issue on GitHub or contact the authors if this does not meet your needs.
|
612 |
-
</h4>
|
613 |
-
""")
|
614 |
|
615 |
# Create a gallery with 5 videos
|
616 |
with gr.Row():
|
617 |
-
video1 = gr.Video(value="./datasets/cached_audio/demo1.mp4", label="Demo
|
618 |
-
video2 = gr.Video(value="./datasets/cached_audio/demo2.mp4", label="Demo
|
619 |
-
video3 = gr.Video(value="./datasets/cached_audio/demo3.mp4", label="Demo
|
620 |
-
video4 = gr.Video(value="./datasets/cached_audio/demo4.mp4", label="Demo
|
621 |
-
video5 = gr.Video(value="./datasets/cached_audio/demo5.mp4", label="Demo
|
|
|
|
|
|
|
|
|
|
|
|
|
622 |
|
623 |
|
624 |
with gr.Row():
|
@@ -635,12 +646,31 @@ def make_demo():
|
|
635 |
loop=False,
|
636 |
show_share_button=True)
|
637 |
with gr.Column(scale=1):
|
638 |
-
file_output_1 = gr.File(label="Download Motion and Visualize in Blender")
|
639 |
-
file_output_2 = gr.File(label="Download Motion and Visualize in Blender")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
640 |
|
641 |
with gr.Row():
|
642 |
with gr.Column(scale=1):
|
643 |
audio_input = gr.Audio(label="Upload your audio")
|
|
|
644 |
with gr.Column(scale=2):
|
645 |
gr.Examples(
|
646 |
examples=examples_audio,
|
@@ -659,9 +689,7 @@ def make_demo():
|
|
659 |
label="Character Examples",
|
660 |
cache_examples=False
|
661 |
)
|
662 |
-
|
663 |
-
seed_input = gr.Number(label="Seed", value=2024, interactive=True)
|
664 |
-
|
665 |
# Fourth row: Generate video button
|
666 |
with gr.Row():
|
667 |
run_button = gr.Button("Generate Video")
|
|
|
482 |
"101099-00_18_09-00_18_19.mp4": "./datasets/data_json/show_oliver_test/Stupid_Watergate_-_Last_Week_Tonight_with_John_Oliver_HBO-FVFdsl29s_Q.mkv.json",
|
483 |
}
|
484 |
|
485 |
+
@spaces.GPU(duration=240)
|
486 |
+
def tango(audio_path, character_name, seed, create_graph=False, video_folder_path=None):
|
487 |
+
cfg = prepare_all("./configs/gradio.yaml")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
488 |
cfg.seed = seed
|
489 |
seed_everything(cfg.seed)
|
490 |
experiment_ckpt_dir = experiment_log_dir = os.path.join(cfg.output_dir, cfg.exp_name)
|
|
|
519 |
os.system(f"python ./create_graph.py --json_save_path {json_save_path} --graph_save_path {graph_save_path}")
|
520 |
cfg.data.test_meta_paths = json_save_path
|
521 |
|
522 |
+
smplx_model = smplx.create(
|
523 |
+
"./emage/smplx_models/",
|
524 |
+
model_type='smplx',
|
525 |
+
gender='NEUTRAL_2020',
|
526 |
+
use_face_contour=False,
|
527 |
+
num_betas=300,
|
528 |
+
num_expression_coeffs=100,
|
529 |
+
ext='npz',
|
530 |
+
use_pca=False,
|
531 |
+
)
|
532 |
+
model = init_class(cfg.model.name_pyfile, cfg.model.class_name, cfg)
|
533 |
+
for param in model.parameters():
|
534 |
+
param.requires_grad = False
|
535 |
+
model.smplx_model = smplx_model
|
536 |
+
model.get_motion_reps = get_motion_reps_tensor
|
537 |
+
|
538 |
local_rank = 0
|
539 |
torch.cuda.set_device(local_rank)
|
540 |
device = torch.device("cuda", local_rank)
|
541 |
+
|
542 |
smplx_model = smplx_model.to(device).eval()
|
543 |
model = model.to(device)
|
544 |
model.smplx_model = model.smplx_model.to(device)
|
545 |
+
|
546 |
+
checkpoint_path = "./datasets/cached_ckpts/ckpt.pth"
|
547 |
+
checkpoint = torch.load(checkpoint_path)
|
548 |
+
state_dict = checkpoint['model_state_dict']
|
549 |
+
new_state_dict = {k.replace('module.', ''): v for k, v in state_dict.items()}
|
550 |
+
model.load_state_dict(new_state_dict, strict=False)
|
551 |
|
552 |
test_path = os.path.join(experiment_ckpt_dir, f"test_{0}")
|
553 |
os.makedirs(test_path, exist_ok=True)
|
|
|
571 |
]
|
572 |
|
573 |
combined_examples = [
|
574 |
+
["./datasets/cached_audio/example_male_voice_9_seconds.wav", "./datasets/cached_audio/speaker9_o7Ik1OB4TaE_00-00-38.15_00-00-42.33.mp4", 2024],
|
575 |
+
["./datasets/cached_audio/example_male_voice_9_seconds.wav", "./datasets/cached_audio/speaker7_iuYlGRnC7J8_00-00-0.00_00-00-3.25.mp4", 2024],
|
576 |
+
["./datasets/cached_audio/example_male_voice_9_seconds.wav", "./datasets/cached_audio/101099-00_18_09-00_18_19.mp4", 2024],
|
577 |
+
["./datasets/cached_audio/example_female_voice_9_seconds.wav", "./datasets/cached_audio/1wrQ6Msp7wM_00-00-39.69_00-00-45.68.mp4", 2024],
|
578 |
+
["./datasets/cached_audio/example_female_voice_9_seconds.wav", "./datasets/cached_audio/speaker8_jjRWaMCWs44_00-00-30.16_00-00-33.32.mp4", 2024],
|
579 |
]
|
580 |
|
581 |
def make_demo():
|
|
|
597 |
<a style='font-size:18px;color: #000000' href=''>[Github Repo]</a>\
|
598 |
<a style='font-size:18px;color: #000000' href=''> [ArXiv] </a>\
|
599 |
<a style='font-size:18px;color: #000000' href='https://pantomatrix.github.io/TANGO/'> [Project Page] </a> </div>
|
600 |
+
</h2> \
|
601 |
+
<a style='font-size:18px;color: #000000'>This is an open-source project supported by Hugging Face's free ZeroGPU. Runtime is limited to 300s, so it operates in low-quality mode. Some high-quality mode results are shown below. </a> </div>
|
602 |
"""
|
603 |
)
|
604 |
|
605 |
+
# gr.Markdown("""
|
606 |
+
# <h4 style="text-align: left;">
|
607 |
+
# This demo is part of an open-source project supported by Hugging Face's free, zero-GPU runtime. Due to runtime cost considerations, it operates in low-quality mode. Some high-quality videos are shown below.
|
608 |
|
609 |
+
# Details of the low-quality mode:
|
610 |
+
# 1. Lower resolution.
|
611 |
+
# 2. More discontinuous frames (causing noticeable "frame jumps").
|
612 |
+
# 3. Utilizes open-source tools like SMPLerX-s-model, Wav2Lip, and FiLM for faster processing.
|
613 |
+
# 4. Accepts audio input of up to 8 seconds. If your input exceeds 8 seconds, only the first 8 seconds will be used.
|
614 |
+
# 5. You can provide a custom background video for your character, but it is limited to 20 seconds.
|
615 |
|
616 |
+
# Feel free to open an issue on GitHub or contact the authors if this does not meet your needs.
|
617 |
+
# </h4>
|
618 |
+
# """)
|
619 |
|
620 |
# Create a gallery with 5 videos
|
621 |
with gr.Row():
|
622 |
+
video1 = gr.Video(value="./datasets/cached_audio/demo1.mp4", label="Demo 0")
|
623 |
+
video2 = gr.Video(value="./datasets/cached_audio/demo2.mp4", label="Demo 1")
|
624 |
+
video3 = gr.Video(value="./datasets/cached_audio/demo3.mp4", label="Demo 2")
|
625 |
+
video4 = gr.Video(value="./datasets/cached_audio/demo4.mp4", label="Demo 3")
|
626 |
+
video5 = gr.Video(value="./datasets/cached_audio/demo5.mp4", label="Demo 4")
|
627 |
+
with gr.Row():
|
628 |
+
video1 = gr.Video(value="./datasets/cached_audio/demo6.mp4", label="Demo 5")
|
629 |
+
video2 = gr.Video(value="./datasets/cached_audio/demo0.mp4", label="Demo 6")
|
630 |
+
video3 = gr.Video(value="./datasets/cached_audio/demo7.mp4", label="Demo 7")
|
631 |
+
video4 = gr.Video(value="./datasets/cached_audio/demo8.mp4", label="Demo 8")
|
632 |
+
video5 = gr.Video(value="./datasets/cached_audio/demo9.mp4", label="Demo 9")
|
633 |
|
634 |
|
635 |
with gr.Row():
|
|
|
646 |
loop=False,
|
647 |
show_share_button=True)
|
648 |
with gr.Column(scale=1):
|
649 |
+
file_output_1 = gr.File(label="Download 3D Motion and Visualize in Blender")
|
650 |
+
file_output_2 = gr.File(label="Download 3D Motion and Visualize in Blender")
|
651 |
+
gr.Markdown("""
|
652 |
+
<h4 style="text-align: left;">
|
653 |
+
<a style='font-size:18px;color: #000000'> Details of the low-quality mode: </a>
|
654 |
+
<br>
|
655 |
+
<a style='font-size:18px;color: #000000'> 1. Lower resolution.</a>
|
656 |
+
<br>
|
657 |
+
<a style='font-size:18px;color: #000000'> 2. More discontinuous graph nodes (causing noticeable "frame jumps"). </a>
|
658 |
+
<br>
|
659 |
+
<a style='font-size:18px;color: #000000'> 3. Utilizes open-source tools like SMPLerX-s-model, Wav2Lip, and FiLM for faster processing. </a>
|
660 |
+
<br>
|
661 |
+
<a style='font-size:18px;color: #000000'> 4. only use first 8 seconds of your input audio.</a>
|
662 |
+
<br>
|
663 |
+
<a style='font-size:18px;color: #000000'> 5. custom character for a video up to 10 seconds. </a>
|
664 |
+
<br>
|
665 |
+
<br>
|
666 |
+
<a style='font-size:18px;color: #000000'> Feel free to open an issue on GitHub or contact the authors if this does not meet your needs.</a>
|
667 |
+
</h4>
|
668 |
+
""")
|
669 |
|
670 |
with gr.Row():
|
671 |
with gr.Column(scale=1):
|
672 |
audio_input = gr.Audio(label="Upload your audio")
|
673 |
+
seed_input = gr.Number(label="Seed", value=2024, interactive=True)
|
674 |
with gr.Column(scale=2):
|
675 |
gr.Examples(
|
676 |
examples=examples_audio,
|
|
|
689 |
label="Character Examples",
|
690 |
cache_examples=False
|
691 |
)
|
692 |
+
|
|
|
|
|
693 |
# Fourth row: Generate video button
|
694 |
with gr.Row():
|
695 |
run_button = gr.Button("Generate Video")
|