gradio-lipsync-wav2lip / train_basicsr.yml
nijisakai's picture
Duplicate from manavisrani07/gradio-lipsync-wav2lip
5b11db7
# general settings
name: 001_ESRGAN_x4_f64b23_custom16k_500k_B16G1_wandb
model_type: ESRGANModel
scale: 4
num_gpu: 1 # set num_gpu: 0 for cpu mode
manual_seed: 0
# dataset and data loader settings
datasets:
train:
name: face_dataset
type: PairedImageDataset
dataroot_gt: data/hq
dataroot_lq: data/lq
filename_tmpl: '{}'
io_backend:
type: disk
gt_size: 384
use_flip: true
use_rot: true
# data loader
use_shuffle: true
num_worker_per_gpu: 1
batch_size_per_gpu: 4
dataset_enlarge_ratio: 1
prefetch_mode: ~
# network structures
network_g:
type: RRDBNet
num_in_ch: 3
num_out_ch: 3
num_feat: 64
num_block: 23
network_d:
type: VGGStyleDiscriminator128
num_in_ch: 3
num_feat: 64
# path
path:
pretrain_network_g: ~
strict_load_g: true
resume_state: checkpoints/pretrained.state
# training settings
train:
optim_g:
type: Adam
lr: !!float 1e-4
weight_decay: 0
betas: [0.9, 0.99]
optim_d:
type: Adam
lr: !!float 1e-4
weight_decay: 0
betas: [0.9, 0.99]
scheduler:
type: MultiStepLR
milestones: [50000, 100000, 200000, 300000]
gamma: 0.5
total_iter: 150000
warmup_iter: -1 # no warm up
# losses
pixel_opt:
type: L1Loss
loss_weight: !!float 1e-2
reduction: mean
perceptual_opt:
type: PerceptualLoss
layer_weights:
'conv5_4': 1 # before relu
vgg_type: vgg19
use_input_norm: true
range_norm: false
perceptual_weight: 1.0
style_weight: 0
criterion: l1
gan_opt:
type: GANLoss
gan_type: vanilla
real_label_val: 1.0
fake_label_val: 0.0
loss_weight: !!float 5e-3
net_d_iters: 1
net_d_init_iters: 0
# validation settings
val:
val_freq: !!float 25e2
save_img: true
metrics:
psnr: # metric name, can be arbitrary
type: calculate_psnr
crop_border: 4
test_y_channel: false
# logging settings
logger:
print_freq: 100
save_checkpoint_freq: !!float 25e2
use_tb_logger: true
wandb:
project: ~
resume_id: ~
# dist training settings
dist_params:
backend: nccl
port: 29500