H-Liu1997 commited on
Commit
18ce27f
1 Parent(s): 4b12aec

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +78 -50
app.py CHANGED
@@ -482,32 +482,9 @@ character_name_to_yaml = {
482
  "101099-00_18_09-00_18_19.mp4": "./datasets/data_json/show_oliver_test/Stupid_Watergate_-_Last_Week_Tonight_with_John_Oliver_HBO-FVFdsl29s_Q.mkv.json",
483
  }
484
 
485
- cfg = prepare_all("./configs/gradio.yaml")
486
-
487
- smplx_model = smplx.create(
488
- "./emage/smplx_models/",
489
- model_type='smplx',
490
- gender='NEUTRAL_2020',
491
- use_face_contour=False,
492
- num_betas=300,
493
- num_expression_coeffs=100,
494
- ext='npz',
495
- use_pca=False,
496
- )
497
- model = init_class(cfg.model.name_pyfile, cfg.model.class_name, cfg)
498
- for param in model.parameters():
499
- param.requires_grad = False
500
- model.smplx_model = smplx_model
501
- model.get_motion_reps = get_motion_reps_tensor
502
-
503
- checkpoint_path = "./datasets/cached_ckpts/ckpt.pth"
504
- checkpoint = torch.load(checkpoint_path)
505
- state_dict = checkpoint['model_state_dict']
506
- # new_state_dict = {k.replace('module.', ''): v for k, v in state_dict.items()}
507
- model.load_state_dict(state_dict, strict=False)
508
-
509
- @spaces.GPU(duration=299)
510
- def tango(audio_path, character_name, seed, create_graph=False, video_folder_path=None, smplx_model=smplx_model, model=model, cfg=cfg):
511
  cfg.seed = seed
512
  seed_everything(cfg.seed)
513
  experiment_ckpt_dir = experiment_log_dir = os.path.join(cfg.output_dir, cfg.exp_name)
@@ -542,13 +519,35 @@ def tango(audio_path, character_name, seed, create_graph=False, video_folder_pat
542
  os.system(f"python ./create_graph.py --json_save_path {json_save_path} --graph_save_path {graph_save_path}")
543
  cfg.data.test_meta_paths = json_save_path
544
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
545
  local_rank = 0
546
  torch.cuda.set_device(local_rank)
547
  device = torch.device("cuda", local_rank)
548
-
549
  smplx_model = smplx_model.to(device).eval()
550
  model = model.to(device)
551
  model.smplx_model = model.smplx_model.to(device)
 
 
 
 
 
 
552
 
553
  test_path = os.path.join(experiment_ckpt_dir, f"test_{0}")
554
  os.makedirs(test_path, exist_ok=True)
@@ -572,7 +571,11 @@ examples_video = [
572
  ]
573
 
574
  combined_examples = [
575
- [audio[0], video[0], 2024] for audio in examples_audio for video in examples_video
 
 
 
 
576
  ]
577
 
578
  def make_demo():
@@ -594,31 +597,39 @@ def make_demo():
594
  <a style='font-size:18px;color: #000000' href=''>[Github Repo]</a>\
595
  <a style='font-size:18px;color: #000000' href=''> [ArXiv] </a>\
596
  <a style='font-size:18px;color: #000000' href='https://pantomatrix.github.io/TANGO/'> [Project Page] </a> </div>
 
 
597
  """
598
  )
599
 
600
- gr.Markdown("""
601
- <h4 style="text-align: left;">
602
- This demo is part of an open-source project supported by Hugging Face's free, zero-GPU runtime. Due to runtime cost considerations, it operates in low-quality mode. Some high-quality videos are shown below.
603
 
604
- Details of the low-quality mode:
605
- 1. Lower resolution.
606
- 2. More discontinuous frames (causing noticeable "frame jumps").
607
- 3. Utilizes open-source tools like SMPLerX-s-model, Wav2Lip, and FiLM for faster processing.
608
- 4. Accepts audio input of up to 8 seconds. If your input exceeds 8 seconds, only the first 8 seconds will be used.
609
- 5. You can provide a custom background video for your character, but it is limited to 20 seconds.
610
 
611
- Feel free to open an issue on GitHub or contact the authors if this does not meet your needs.
612
- </h4>
613
- """)
614
 
615
  # Create a gallery with 5 videos
616
  with gr.Row():
617
- video1 = gr.Video(value="./datasets/cached_audio/demo1.mp4", label="Demo 1")
618
- video2 = gr.Video(value="./datasets/cached_audio/demo2.mp4", label="Demo 2")
619
- video3 = gr.Video(value="./datasets/cached_audio/demo3.mp4", label="Demo 3")
620
- video4 = gr.Video(value="./datasets/cached_audio/demo4.mp4", label="Demo 4")
621
- video5 = gr.Video(value="./datasets/cached_audio/demo5.mp4", label="Demo 5")
 
 
 
 
 
 
622
 
623
 
624
  with gr.Row():
@@ -635,12 +646,31 @@ def make_demo():
635
  loop=False,
636
  show_share_button=True)
637
  with gr.Column(scale=1):
638
- file_output_1 = gr.File(label="Download Motion and Visualize in Blender")
639
- file_output_2 = gr.File(label="Download Motion and Visualize in Blender")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
640
 
641
  with gr.Row():
642
  with gr.Column(scale=1):
643
  audio_input = gr.Audio(label="Upload your audio")
 
644
  with gr.Column(scale=2):
645
  gr.Examples(
646
  examples=examples_audio,
@@ -659,9 +689,7 @@ def make_demo():
659
  label="Character Examples",
660
  cache_examples=False
661
  )
662
- with gr.Row():
663
- seed_input = gr.Number(label="Seed", value=2024, interactive=True)
664
-
665
  # Fourth row: Generate video button
666
  with gr.Row():
667
  run_button = gr.Button("Generate Video")
 
482
  "101099-00_18_09-00_18_19.mp4": "./datasets/data_json/show_oliver_test/Stupid_Watergate_-_Last_Week_Tonight_with_John_Oliver_HBO-FVFdsl29s_Q.mkv.json",
483
  }
484
 
485
+ @spaces.GPU(duration=240)
486
+ def tango(audio_path, character_name, seed, create_graph=False, video_folder_path=None):
487
+ cfg = prepare_all("./configs/gradio.yaml")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
488
  cfg.seed = seed
489
  seed_everything(cfg.seed)
490
  experiment_ckpt_dir = experiment_log_dir = os.path.join(cfg.output_dir, cfg.exp_name)
 
519
  os.system(f"python ./create_graph.py --json_save_path {json_save_path} --graph_save_path {graph_save_path}")
520
  cfg.data.test_meta_paths = json_save_path
521
 
522
+ smplx_model = smplx.create(
523
+ "./emage/smplx_models/",
524
+ model_type='smplx',
525
+ gender='NEUTRAL_2020',
526
+ use_face_contour=False,
527
+ num_betas=300,
528
+ num_expression_coeffs=100,
529
+ ext='npz',
530
+ use_pca=False,
531
+ )
532
+ model = init_class(cfg.model.name_pyfile, cfg.model.class_name, cfg)
533
+ for param in model.parameters():
534
+ param.requires_grad = False
535
+ model.smplx_model = smplx_model
536
+ model.get_motion_reps = get_motion_reps_tensor
537
+
538
  local_rank = 0
539
  torch.cuda.set_device(local_rank)
540
  device = torch.device("cuda", local_rank)
541
+
542
  smplx_model = smplx_model.to(device).eval()
543
  model = model.to(device)
544
  model.smplx_model = model.smplx_model.to(device)
545
+
546
+ checkpoint_path = "./datasets/cached_ckpts/ckpt.pth"
547
+ checkpoint = torch.load(checkpoint_path)
548
+ state_dict = checkpoint['model_state_dict']
549
+ new_state_dict = {k.replace('module.', ''): v for k, v in state_dict.items()}
550
+ model.load_state_dict(new_state_dict, strict=False)
551
 
552
  test_path = os.path.join(experiment_ckpt_dir, f"test_{0}")
553
  os.makedirs(test_path, exist_ok=True)
 
571
  ]
572
 
573
  combined_examples = [
574
+ ["./datasets/cached_audio/example_male_voice_9_seconds.wav", "./datasets/cached_audio/speaker9_o7Ik1OB4TaE_00-00-38.15_00-00-42.33.mp4", 2024],
575
+ ["./datasets/cached_audio/example_male_voice_9_seconds.wav", "./datasets/cached_audio/speaker7_iuYlGRnC7J8_00-00-0.00_00-00-3.25.mp4", 2024],
576
+ ["./datasets/cached_audio/example_male_voice_9_seconds.wav", "./datasets/cached_audio/101099-00_18_09-00_18_19.mp4", 2024],
577
+ ["./datasets/cached_audio/example_female_voice_9_seconds.wav", "./datasets/cached_audio/1wrQ6Msp7wM_00-00-39.69_00-00-45.68.mp4", 2024],
578
+ ["./datasets/cached_audio/example_female_voice_9_seconds.wav", "./datasets/cached_audio/speaker8_jjRWaMCWs44_00-00-30.16_00-00-33.32.mp4", 2024],
579
  ]
580
 
581
  def make_demo():
 
597
  <a style='font-size:18px;color: #000000' href=''>[Github Repo]</a>\
598
  <a style='font-size:18px;color: #000000' href=''> [ArXiv] </a>\
599
  <a style='font-size:18px;color: #000000' href='https://pantomatrix.github.io/TANGO/'> [Project Page] </a> </div>
600
+ </h2> \
601
+ <a style='font-size:18px;color: #000000'>This is an open-source project supported by Hugging Face's free ZeroGPU. Runtime is limited to 300s, so it operates in low-quality mode. Some high-quality mode results are shown below. </a> </div>
602
  """
603
  )
604
 
605
+ # gr.Markdown("""
606
+ # <h4 style="text-align: left;">
607
+ # This demo is part of an open-source project supported by Hugging Face's free, zero-GPU runtime. Due to runtime cost considerations, it operates in low-quality mode. Some high-quality videos are shown below.
608
 
609
+ # Details of the low-quality mode:
610
+ # 1. Lower resolution.
611
+ # 2. More discontinuous frames (causing noticeable "frame jumps").
612
+ # 3. Utilizes open-source tools like SMPLerX-s-model, Wav2Lip, and FiLM for faster processing.
613
+ # 4. Accepts audio input of up to 8 seconds. If your input exceeds 8 seconds, only the first 8 seconds will be used.
614
+ # 5. You can provide a custom background video for your character, but it is limited to 20 seconds.
615
 
616
+ # Feel free to open an issue on GitHub or contact the authors if this does not meet your needs.
617
+ # </h4>
618
+ # """)
619
 
620
  # Create a gallery with 5 videos
621
  with gr.Row():
622
+ video1 = gr.Video(value="./datasets/cached_audio/demo1.mp4", label="Demo 0")
623
+ video2 = gr.Video(value="./datasets/cached_audio/demo2.mp4", label="Demo 1")
624
+ video3 = gr.Video(value="./datasets/cached_audio/demo3.mp4", label="Demo 2")
625
+ video4 = gr.Video(value="./datasets/cached_audio/demo4.mp4", label="Demo 3")
626
+ video5 = gr.Video(value="./datasets/cached_audio/demo5.mp4", label="Demo 4")
627
+ with gr.Row():
628
+ video1 = gr.Video(value="./datasets/cached_audio/demo6.mp4", label="Demo 5")
629
+ video2 = gr.Video(value="./datasets/cached_audio/demo0.mp4", label="Demo 6")
630
+ video3 = gr.Video(value="./datasets/cached_audio/demo7.mp4", label="Demo 7")
631
+ video4 = gr.Video(value="./datasets/cached_audio/demo8.mp4", label="Demo 8")
632
+ video5 = gr.Video(value="./datasets/cached_audio/demo9.mp4", label="Demo 9")
633
 
634
 
635
  with gr.Row():
 
646
  loop=False,
647
  show_share_button=True)
648
  with gr.Column(scale=1):
649
+ file_output_1 = gr.File(label="Download 3D Motion and Visualize in Blender")
650
+ file_output_2 = gr.File(label="Download 3D Motion and Visualize in Blender")
651
+ gr.Markdown("""
652
+ <h4 style="text-align: left;">
653
+ <a style='font-size:18px;color: #000000'> Details of the low-quality mode: </a>
654
+ <br>
655
+ <a style='font-size:18px;color: #000000'> 1. Lower resolution.</a>
656
+ <br>
657
+ <a style='font-size:18px;color: #000000'> 2. More discontinuous graph nodes (causing noticeable "frame jumps"). </a>
658
+ <br>
659
+ <a style='font-size:18px;color: #000000'> 3. Utilizes open-source tools like SMPLerX-s-model, Wav2Lip, and FiLM for faster processing. </a>
660
+ <br>
661
+ <a style='font-size:18px;color: #000000'> 4. only use first 8 seconds of your input audio.</a>
662
+ <br>
663
+ <a style='font-size:18px;color: #000000'> 5. custom character for a video up to 10 seconds. </a>
664
+ <br>
665
+ <br>
666
+ <a style='font-size:18px;color: #000000'> Feel free to open an issue on GitHub or contact the authors if this does not meet your needs.</a>
667
+ </h4>
668
+ """)
669
 
670
  with gr.Row():
671
  with gr.Column(scale=1):
672
  audio_input = gr.Audio(label="Upload your audio")
673
+ seed_input = gr.Number(label="Seed", value=2024, interactive=True)
674
  with gr.Column(scale=2):
675
  gr.Examples(
676
  examples=examples_audio,
 
689
  label="Character Examples",
690
  cache_examples=False
691
  )
692
+
 
 
693
  # Fourth row: Generate video button
694
  with gr.Row():
695
  run_button = gr.Button("Generate Video")