{ "best_metric": null, "best_model_checkpoint": null, "epoch": 4.0, "eval_steps": 500, "global_step": 128, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.16, "learning_rate": 1.9230769230769231e-07, "loss": 2.3131, "step": 5 }, { "epoch": 0.31, "learning_rate": 3.8461538461538463e-07, "loss": 2.3205, "step": 10 }, { "epoch": 0.47, "learning_rate": 4.99626950870707e-07, "loss": 2.1741, "step": 15 }, { "epoch": 0.62, "learning_rate": 4.954429235188896e-07, "loss": 1.9213, "step": 20 }, { "epoch": 0.78, "learning_rate": 4.866867588977609e-07, "loss": 1.8027, "step": 25 }, { "epoch": 0.94, "learning_rate": 4.7352156778691276e-07, "loss": 1.5125, "step": 30 }, { "epoch": 1.0, "eval_loss": 1.4529484510421753, "eval_runtime": 16.1705, "eval_samples_per_second": 7.05, "eval_steps_per_second": 0.928, "step": 32 }, { "epoch": 1.09, "learning_rate": 4.561925927872421e-07, "loss": 1.4467, "step": 35 }, { "epoch": 1.25, "learning_rate": 4.3502263991531294e-07, "loss": 1.3768, "step": 40 }, { "epoch": 1.41, "learning_rate": 4.104060653380402e-07, "loss": 1.2423, "step": 45 }, { "epoch": 1.56, "learning_rate": 3.828014292634508e-07, "loss": 1.1306, "step": 50 }, { "epoch": 1.72, "learning_rate": 3.527229538316371e-07, "loss": 1.0165, "step": 55 }, { "epoch": 1.88, "learning_rate": 3.207309441292325e-07, "loss": 0.9656, "step": 60 }, { "epoch": 2.0, "eval_loss": 0.9357134699821472, "eval_runtime": 15.8011, "eval_samples_per_second": 7.215, "eval_steps_per_second": 0.949, "step": 64 }, { "epoch": 2.03, "learning_rate": 2.8742135076578607e-07, "loss": 0.9752, "step": 65 }, { "epoch": 2.19, "learning_rate": 2.5341466844148774e-07, "loss": 0.8735, "step": 70 }, { "epoch": 2.34, "learning_rate": 2.1934437730492543e-07, "loss": 0.8911, "step": 75 }, { "epoch": 2.5, "learning_rate": 1.8584514241650663e-07, "loss": 0.8266, "step": 80 }, { "epoch": 2.66, "learning_rate": 1.5354099113921612e-07, "loss": 0.8245, "step": 85 }, { "epoch": 2.81, "learning_rate": 1.2303368868954847e-07, "loss": 0.7816, "step": 90 }, { "epoch": 2.97, "learning_rate": 9.489152839010797e-08, "loss": 0.7992, "step": 95 }, { "epoch": 3.0, "eval_loss": 0.8069692850112915, "eval_runtime": 15.7689, "eval_samples_per_second": 7.229, "eval_steps_per_second": 0.951, "step": 96 }, { "epoch": 3.12, "learning_rate": 6.963874544026108e-08, "loss": 0.7874, "step": 100 }, { "epoch": 3.28, "learning_rate": 4.774575140626316e-08, "loss": 0.7714, "step": 105 }, { "epoch": 3.44, "learning_rate": 2.9620371343832106e-08, "loss": 0.7559, "step": 110 }, { "epoch": 3.59, "learning_rate": 1.5600246788994938e-08, "loss": 0.7602, "step": 115 }, { "epoch": 3.75, "learning_rate": 5.946546135113861e-09, "loss": 0.7548, "step": 120 }, { "epoch": 3.91, "learning_rate": 8.390995598676065e-10, "loss": 0.7865, "step": 125 }, { "epoch": 4.0, "eval_loss": 0.7899278998374939, "eval_runtime": 15.7827, "eval_samples_per_second": 7.223, "eval_steps_per_second": 0.95, "step": 128 }, { "epoch": 4.0, "step": 128, "total_flos": 89583526084608.0, "train_loss": 1.1804139949381351, "train_runtime": 4681.6725, "train_samples_per_second": 0.871, "train_steps_per_second": 0.027 } ], "logging_steps": 5, "max_steps": 128, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 500, "total_flos": 89583526084608.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }