FoleyCrafter / configs /train /train_temporal_adapter.yaml
ymzhang319's picture
init
7f2690b
raw
history blame
921 Bytes
output_dir: "outputs"
pretrained_model_path: ""
motion_module_path: "models/mm_sd_v15_v2.ckpt"
train_data:
csv_path: "./curated.csv"
audio_fps: 48000
audio_size: 480000
validation_data:
prompts:
- "./data/input/lighthouse.png"
- "./data/input/guitar.png"
- "./data/input/lion.png"
- "./data/input/gun.png"
num_inference_steps: 25
guidance_scale: 7.5
sample_size: 512
trainable_modules:
- 'time_conv_in.'
- 'conv_in.'
video_unet_checkpoint_path: "models/vggsound_unet.ckpt"
audio_unet_checkpoint_path: ""
learning_rate: 5.0e-5
train_batch_size: 1 # max for mixed
gradient_accumulation_steps: 1
max_train_epoch: -1
max_train_steps: 500000
checkpointing_epochs: 4000
checkpointing_steps: 500
validation_steps: 3000
validation_steps_tuple: [2, 300, 1000]
global_seed: 42
mixed_precision_training: true
is_debug: False
resume_ckpt: ""
zero_no_label_mel: false