checkpointing_steps: null | |
dataset_name: cifar10 | |
gradient_accumulation_steps: 1 | |
hub_model_id: null | |
hub_token: null | |
ignore_mismatched_sizes: false | |
image_column_name: img | |
label_column_name: label | |
learning_rate: 0.001 | |
lr_scheduler_type: cosine | |
max_eval_samples: null | |
max_train_samples: null | |
max_train_steps: 35000 | |
model_name_or_path: MODELS/cifnet-18-banana | |
num_train_epochs: 106 | |
num_warmup_steps: 6400 | |
num_workers: 4 | |
output_dir: OUTPUTS/cifnet-18-banana--lr0.001--sigmoid_4d_128-128-64-64 | |
per_device_eval_batch_size: 8 | |
per_device_train_batch_size: 128 | |
push_to_hub: false | |
report_to: tensorboard | |
resume_from_checkpoint: null | |
seed: 42 | |
train_dir: null | |
train_val_split: 0.15 | |
trust_remote_code: false | |
validation_dir: null | |
weight_decay: 0.0 | |
with_tracking: true | |