Spaces:
Build error
Build error
# Dataset parameters | |
dataset_params: | |
# Path to data, data can be stored in several formats: .mp4 or .gif videos, stacked .png images or folders with frames. | |
root_dir: data/taichi-png | |
# Image shape, needed for staked .png format. | |
frame_shape: [256, 256, 3] | |
# In case of TaiChi single video can be splitted in many chunks, or the maybe several videos for single person. | |
# In this case epoch can be a pass over different videos (if id_sampling=True) or over different chunks (if id_sampling=False) | |
# If the name of the video '12335#adsbf.mp4' the id is assumed to be 12335 | |
id_sampling: True | |
# List with pairs for animation, None for random pairs | |
pairs_list: data/taichi256.csv | |
# Augmentation parameters see augmentation.py for all posible augmentations | |
augmentation_params: | |
flip_param: | |
horizontal_flip: True | |
time_flip: True | |
jitter_param: | |
brightness: 0.1 | |
contrast: 0.1 | |
saturation: 0.1 | |
hue: 0.1 | |
# Defines model architecture | |
model_params: | |
common_params: | |
# Number of keypoint | |
num_kp: 10 | |
# Number of channels per image | |
num_channels: 3 | |
# Using first or zero order model | |
estimate_jacobian: True | |
kp_detector_params: | |
# Softmax temperature for keypoint heatmaps | |
temperature: 0.1 | |
# Number of features mutliplier | |
block_expansion: 32 | |
# Maximum allowed number of features | |
max_features: 1024 | |
# Number of block in Unet. Can be increased or decreased depending or resolution. | |
num_blocks: 5 | |
# Keypioint is predicted on smaller images for better performance, | |
# scale_factor=0.25 means that 256x256 image will be resized to 64x64 | |
scale_factor: 0.25 | |
generator_params: | |
# Number of features mutliplier | |
block_expansion: 64 | |
# Maximum allowed number of features | |
max_features: 512 | |
# Number of downsampling blocks in Jonson architecture. | |
# Can be increased or decreased depending or resolution. | |
num_down_blocks: 2 | |
# Number of ResBlocks in Jonson architecture. | |
num_bottleneck_blocks: 6 | |
# Use occlusion map or not | |
estimate_occlusion_map: True | |
dense_motion_params: | |
# Number of features mutliplier | |
block_expansion: 64 | |
# Maximum allowed number of features | |
max_features: 1024 | |
# Number of block in Unet. Can be increased or decreased depending or resolution. | |
num_blocks: 5 | |
# Dense motion is predicted on smaller images for better performance, | |
# scale_factor=0.25 means that 256x256 image will be resized to 64x64 | |
scale_factor: 0.25 | |
discriminator_params: | |
# Discriminator can be multiscale, if you want 2 discriminator on original | |
# resolution and half of the original, specify scales: [1, 0.5] | |
scales: [1] | |
# Number of features mutliplier | |
block_expansion: 32 | |
# Maximum allowed number of features | |
max_features: 512 | |
# Number of blocks. Can be increased or decreased depending or resolution. | |
num_blocks: 4 | |
use_kp: True | |
# Parameters of training | |
train_params: | |
# Number of training epochs | |
num_epochs: 150 | |
# For better i/o performance when number of videos is small number of epochs can be multiplied by this number. | |
# Thus effectivlly with num_repeats=100 each epoch is 100 times larger. | |
num_repeats: 150 | |
# Drop learning rate by 10 times after this epochs | |
epoch_milestones: [] | |
# Initial learing rate for all modules | |
lr_generator: 2.0e-4 | |
lr_discriminator: 2.0e-4 | |
lr_kp_detector: 0 | |
batch_size: 27 | |
# Scales for perceptual pyramide loss. If scales = [1, 0.5, 0.25, 0.125] and image resolution is 256x256, | |
# than the loss will be computer on resolutions 256x256, 128x128, 64x64, 32x32. | |
scales: [1, 0.5, 0.25, 0.125] | |
# Save checkpoint this frequently. If checkpoint_freq=50, checkpoint will be saved every 50 epochs. | |
checkpoint_freq: 50 | |
# Parameters of transform for equivariance loss | |
transform_params: | |
# Sigma for affine part | |
sigma_affine: 0.05 | |
# Sigma for deformation part | |
sigma_tps: 0.005 | |
# Number of point in the deformation grid | |
points_tps: 5 | |
loss_weights: | |
# Weight for LSGAN loss in generator | |
generator_gan: 1 | |
# Weight for LSGAN loss in discriminator | |
discriminator_gan: 1 | |
# Weights for feature matching loss, the number should be the same as number of blocks in discriminator. | |
feature_matching: [10, 10, 10, 10] | |
# Weights for perceptual loss. | |
perceptual: [10, 10, 10, 10, 10] | |
# Weights for value equivariance. | |
equivariance_value: 10 | |
# Weights for jacobian equivariance. | |
equivariance_jacobian: 10 | |
# Parameters of reconstruction | |
reconstruction_params: | |
# Maximum number of videos for reconstruction | |
num_videos: 1000 | |
# Format for visualization, note that results will be also stored in staked .png. | |
format: '.mp4' | |
# Parameters of animation | |
animate_params: | |
# Maximum number of pairs for animation, the pairs will be either taken from pairs_list or random. | |
num_pairs: 50 | |
# Format for visualization, note that results will be also stored in staked .png. | |
format: '.mp4' | |
# Normalization of diriving keypoints | |
normalization_params: | |
# Increase or decrease relative movement scale depending on the size of the object | |
adapt_movement_scale: False | |
# Apply only relative displacement of the keypoint | |
use_relative_movement: True | |
# Apply only relative change in jacobian | |
use_relative_jacobian: True | |
# Visualization parameters | |
visualizer_params: | |
# Draw keypoints of this size, increase or decrease depending on resolution | |
kp_size: 5 | |
# Draw white border around images | |
draw_border: True | |
# Color map for keypoints | |
colormap: 'gist_rainbow' | |