checkpointing_steps: null | |
dataset_name: cifar10 | |
gradient_accumulation_steps: 1 | |
hub_model_id: null | |
hub_token: null | |
ignore_mismatched_sizes: false | |
image_column_name: img | |
label_column_name: label | |
learning_rate: 0.001 | |
lr_scheduler_type: cosine | |
max_eval_samples: null | |
max_train_samples: null | |
max_train_steps: 64000 | |
model_name_or_path: MODELS/best_0407 | |
num_train_epochs: 193 | |
num_warmup_steps: 6400 | |
num_workers: 32 | |
output_dir: OUTPUTS/best_0407--lr0.001--Baseline_lr0.001 | |
per_device_eval_batch_size: 8 | |
per_device_train_batch_size: 64 | |
push_to_hub: false | |
report_to: tensorboard | |
resume_from_checkpoint: null | |
seed: 42 | |
train_dir: null | |
train_val_split: 0.15 | |
trust_remote_code: false | |
validation_dir: null | |
weight_decay: 0.0 | |
with_tracking: true | |