File size: 4,868 Bytes
3d7026d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 |
trainer:
target: trainer.TrainerDifIR
model:
target: models.unet.UNetModelSwin
ckpt_path: null
params:
image_size: 64
in_channels: 3
model_channels: 160
out_channels: 3
attention_resolutions:
- 64
- 32
- 16
- 8
dropout: 0
channel_mult:
- 1
- 2
- 2
- 4
num_res_blocks:
- 2
- 2
- 2
- 2
conv_resample: true
dims: 2
use_fp16: false
num_head_channels: 32
use_scale_shift_norm: true
resblock_updown: false
swin_depth: 2
swin_embed_dim: 192
window_size: 8
mlp_ratio: 4
cond_lq: true
lq_size: 64
diffusion:
target: models.script_util.create_gaussian_diffusion
params:
sf: 4
schedule_name: exponential
schedule_kwargs:
power: 0.3
etas_end: 0.99
steps: 15
min_noise_level: 0.04
kappa: 2.0
weighted_mse: false
predict_type: xstart
timestep_respacing: null
scale_factor: 1.0
normalize_input: true
latent_flag: true
autoencoder:
target: ldm.models.autoencoder.VQModelTorch
ckpt_path: weights/autoencoder_vq_f4.pth
use_fp16: true
params:
embed_dim: 3
n_embed: 8192
ddconfig:
double_z: false
z_channels: 3
resolution: 256
in_channels: 3
out_ch: 3
ch: 128
ch_mult:
- 1
- 2
- 4
num_res_blocks: 2
attn_resolutions: []
dropout: 0.0
padding_mode: zeros
degradation:
sf: 4
resize_prob:
- 0.2
- 0.7
- 0.1
resize_range:
- 0.15
- 1.5
gaussian_noise_prob: 0.5
noise_range:
- 1
- 30
poisson_scale_range:
- 0.05
- 3.0
gray_noise_prob: 0.4
jpeg_range:
- 30
- 95
second_order_prob: 0.5
second_blur_prob: 0.8
resize_prob2:
- 0.3
- 0.4
- 0.3
resize_range2:
- 0.3
- 1.2
gaussian_noise_prob2: 0.5
noise_range2:
- 1
- 25
poisson_scale_range2:
- 0.05
- 2.5
gray_noise_prob2: 0.4
jpeg_range2:
- 30
- 95
gt_size: 256
resize_back: false
use_sharp: false
data:
train:
type: realesrgan
params:
dir_paths: []
txt_file_path:
- /content/ResShift/high_res/train.txt
im_exts:
- JPEG
io_backend:
type: disk
blur_kernel_size: 21
kernel_list:
- iso
- aniso
- generalized_iso
- generalized_aniso
- plateau_iso
- plateau_aniso
kernel_prob:
- 0.45
- 0.25
- 0.12
- 0.03
- 0.12
- 0.03
sinc_prob: 0.1
blur_sigma:
- 0.2
- 3.0
betag_range:
- 0.5
- 4.0
betap_range:
- 1
- 2.0
blur_kernel_size2: 15
kernel_list2:
- iso
- aniso
- generalized_iso
- generalized_aniso
- plateau_iso
- plateau_aniso
kernel_prob2:
- 0.45
- 0.25
- 0.12
- 0.03
- 0.12
- 0.03
sinc_prob2: 0.1
blur_sigma2:
- 0.2
- 1.5
betag_range2:
- 0.5
- 4.0
betap_range2:
- 1
- 2.0
final_sinc_prob: 0.8
gt_size: 256
crop_pad_size: 300
use_hflip: true
use_rot: false
rescale_gt: true
val:
type: base
params:
dir_path: testdata/Val_SR/lq
im_exts: png
transform_type: default
transform_kwargs:
mean: 0.5
std: 0.5
extra_dir_path: testdata/Val_SR/gt
extra_transform_type: default
extra_transform_kwargs:
mean: 0.5
std: 0.5
recursive: false
train:
lr: 5.0e-05
lr_min: 2.0e-05
lr_schedule: null
warmup_iterations: 100
batch:
- 8
- 1
microbatch: 1
num_workers: 4
prefetch_factor: 2
weight_decay: 0
ema_rate: 0.999
iterations: 1000
save_freq: 10000
log_freq:
- 200
- 2000
- 1
local_logging: true
tf_logging: false
use_ema_val: true
val_freq: ${train.save_freq}
val_y_channel: true
val_resolution: ${model.params.lq_size}
val_padding_mode: reflect
use_amp: true
seed: 123456
global_seeding: false
compile:
flag: false
mode: reduce-overhead
save_dir: logging/
resume: ''
cfg_path: configs/realsr_swinunet_realesrgan256.yaml
Number of parameters: 118.59M
Restoring autoencoder from weights/autoencoder_vq_f4.pth
Number of images in train data set: 1254
Number of images in val data set: 32
Train: 000200/001000, Loss/MSE: t(1):1.6e-01/1.6e-01, t(8):4.5e-01/4.5e-01, t(15):5.9e-01/5.9e-01, lr:5.00e-05
Train: 000400/001000, Loss/MSE: t(1):2.8e-02/2.8e-02, t(8):3.9e-01/3.9e-01, t(15):5.0e-01/5.0e-01, lr:5.00e-05
Train: 000600/001000, Loss/MSE: t(1):2.1e-02/2.1e-02, t(8):3.4e-01/3.4e-01, t(15):4.6e-01/4.6e-01, lr:5.00e-05
Train: 000800/001000, Loss/MSE: t(1):1.4e-02/1.4e-02, t(8):3.5e-01/3.5e-01, t(15):5.1e-01/5.1e-01, lr:5.00e-05
Train: 001000/001000, Loss/MSE: t(1):1.4e-02/1.4e-02, t(8):2.9e-01/2.9e-01, t(15):4.6e-01/4.6e-01, lr:5.00e-05
|