checkpointing_steps: null dataset_name: cifar10 gradient_accumulation_steps: 1 hub_model_id: null hub_token: null ignore_mismatched_sizes: false image_column_name: img label_column_name: label learning_rate: 0.001 lr_scheduler_type: cosine max_eval_samples: null max_train_samples: null max_train_steps: 64000 model_name_or_path: MODELS/best_0409_2 num_train_epochs: 193 num_warmup_steps: 6400 num_workers: 32 output_dir: OUTPUTS/best_0409_2--lr0.001--TwoAug per_device_eval_batch_size: 8 per_device_train_batch_size: 64 push_to_hub: false report_to: tensorboard resume_from_checkpoint: null seed: 42 train_dir: null train_val_split: 0.15 trust_remote_code: false validation_dir: null weight_decay: 0.0 with_tracking: true