{ "best_metric": 0.43870946764945984, "best_model_checkpoint": "./mistral/06-03-24-Weni-pipeline_test_Zeroshot-2_max_steps-3224_batch_64_2024-03-06_ppid_7/checkpoint-200", "epoch": 0.49597024178549287, "eval_steps": 100, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.05, "grad_norm": 1.8498313426971436, "learning_rate": 1.1801242236024846e-05, "loss": 1.9347, "step": 20 }, { "epoch": 0.1, "grad_norm": 8.336210250854492, "learning_rate": 2.4223602484472053e-05, "loss": 1.6596, "step": 40 }, { "epoch": 0.15, "grad_norm": 2.0044610500335693, "learning_rate": 3.6645962732919256e-05, "loss": 1.3842, "step": 60 }, { "epoch": 0.2, "grad_norm": 0.6825340390205383, "learning_rate": 4.906832298136646e-05, "loss": 0.9527, "step": 80 }, { "epoch": 0.25, "grad_norm": 0.24993008375167847, "learning_rate": 6.149068322981368e-05, "loss": 0.5546, "step": 100 }, { "epoch": 0.25, "eval_loss": 0.49679049849510193, "eval_runtime": 179.6977, "eval_samples_per_second": 15.955, "eval_steps_per_second": 1.002, "step": 100 }, { "epoch": 0.3, "grad_norm": 0.2225101739168167, "learning_rate": 7.391304347826086e-05, "loss": 0.4941, "step": 120 }, { "epoch": 0.35, "grad_norm": 0.20095179975032806, "learning_rate": 8.633540372670808e-05, "loss": 0.467, "step": 140 }, { "epoch": 0.4, "grad_norm": 0.25985196232795715, "learning_rate": 9.875776397515528e-05, "loss": 0.4542, "step": 160 }, { "epoch": 0.45, "grad_norm": 0.22290097177028656, "learning_rate": 0.00011118012422360249, "loss": 0.4569, "step": 180 }, { "epoch": 0.5, "grad_norm": 0.2086612433195114, "learning_rate": 0.0001236024844720497, "loss": 0.4381, "step": 200 }, { "epoch": 0.5, "eval_loss": 0.43870946764945984, "eval_runtime": 179.4882, "eval_samples_per_second": 15.973, "eval_steps_per_second": 1.003, "step": 200 } ], "logging_steps": 20, "max_steps": 3224, "num_input_tokens_seen": 0, "num_train_epochs": 8, "save_steps": 100, "total_flos": 4.0748291337905766e+17, "train_batch_size": 8, "trial_name": null, "trial_params": null }