{ "best_metric": null, "best_model_checkpoint": null, "epoch": 5.99722479185939, "eval_steps": 30, "global_step": 810, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.22, "learning_rate": 3.157894736842105e-06, "loss": 11.8786, "step": 30 }, { "epoch": 0.22, "eval_loss": 11.730859756469727, "eval_runtime": 60.9075, "eval_samples_per_second": 0.542, "eval_steps_per_second": 0.542, "step": 30 }, { "epoch": 0.44, "learning_rate": 6.31578947368421e-06, "loss": 11.386, "step": 60 }, { "epoch": 0.44, "eval_loss": 10.65054702758789, "eval_runtime": 60.8988, "eval_samples_per_second": 0.542, "eval_steps_per_second": 0.542, "step": 60 }, { "epoch": 0.67, "learning_rate": 9.473684210526315e-06, "loss": 9.8984, "step": 90 }, { "epoch": 0.67, "eval_loss": 8.621472358703613, "eval_runtime": 60.8346, "eval_samples_per_second": 0.542, "eval_steps_per_second": 0.542, "step": 90 }, { "epoch": 0.89, "learning_rate": 1.263157894736842e-05, "loss": 7.9555, "step": 120 }, { "epoch": 0.89, "eval_loss": 6.96145486831665, "eval_runtime": 60.9577, "eval_samples_per_second": 0.541, "eval_steps_per_second": 0.541, "step": 120 }, { "epoch": 1.11, "learning_rate": 1.578947368421053e-05, "loss": 6.6507, "step": 150 }, { "epoch": 1.11, "eval_loss": 6.004063606262207, "eval_runtime": 60.9978, "eval_samples_per_second": 0.541, "eval_steps_per_second": 0.541, "step": 150 }, { "epoch": 1.33, "learning_rate": 1.894736842105263e-05, "loss": 5.8685, "step": 180 }, { "epoch": 1.33, "eval_loss": 5.406551361083984, "eval_runtime": 60.6662, "eval_samples_per_second": 0.544, "eval_steps_per_second": 0.544, "step": 180 }, { "epoch": 1.55, "learning_rate": 1.9993250234920638e-05, "loss": 5.3925, "step": 210 }, { "epoch": 1.55, "eval_loss": 5.084054946899414, "eval_runtime": 61.1004, "eval_samples_per_second": 0.54, "eval_steps_per_second": 0.54, "step": 210 }, { "epoch": 1.78, "learning_rate": 1.9957838880989076e-05, "loss": 5.1386, "step": 240 }, { "epoch": 1.78, "eval_loss": 4.918034553527832, "eval_runtime": 60.9721, "eval_samples_per_second": 0.541, "eval_steps_per_second": 0.541, "step": 240 }, { "epoch": 2.0, "learning_rate": 1.989218589765658e-05, "loss": 5.0205, "step": 270 }, { "epoch": 2.0, "eval_loss": 4.811317443847656, "eval_runtime": 60.7077, "eval_samples_per_second": 0.544, "eval_steps_per_second": 0.544, "step": 270 }, { "epoch": 2.22, "learning_rate": 1.979649067087574e-05, "loss": 4.9088, "step": 300 }, { "epoch": 2.22, "eval_loss": 4.734280109405518, "eval_runtime": 61.3359, "eval_samples_per_second": 0.538, "eval_steps_per_second": 0.538, "step": 300 }, { "epoch": 2.44, "learning_rate": 1.967104382390511e-05, "loss": 4.8469, "step": 330 }, { "epoch": 2.44, "eval_loss": 4.680157661437988, "eval_runtime": 60.9063, "eval_samples_per_second": 0.542, "eval_steps_per_second": 0.542, "step": 330 }, { "epoch": 2.66, "learning_rate": 1.951622633469592e-05, "loss": 4.7796, "step": 360 }, { "epoch": 2.66, "eval_loss": 4.634165287017822, "eval_runtime": 61.2818, "eval_samples_per_second": 0.538, "eval_steps_per_second": 0.538, "step": 360 }, { "epoch": 2.89, "learning_rate": 1.933250837887457e-05, "loss": 4.7591, "step": 390 }, { "epoch": 2.89, "eval_loss": 4.599093437194824, "eval_runtime": 61.1388, "eval_samples_per_second": 0.54, "eval_steps_per_second": 0.54, "step": 390 }, { "epoch": 3.11, "learning_rate": 1.9120447901834708e-05, "loss": 4.7235, "step": 420 }, { "epoch": 3.11, "eval_loss": 4.5706963539123535, "eval_runtime": 60.7134, "eval_samples_per_second": 0.544, "eval_steps_per_second": 0.544, "step": 420 }, { "epoch": 3.33, "learning_rate": 1.888068892427538e-05, "loss": 6.7181, "step": 450 }, { "epoch": 3.33, "eval_loss": 5.124680995941162, "eval_runtime": 55.4445, "eval_samples_per_second": 0.595, "eval_steps_per_second": 0.595, "step": 450 }, { "epoch": 3.56, "learning_rate": 1.8613959586331364e-05, "loss": 5.0297, "step": 480 }, { "epoch": 3.56, "eval_loss": 4.7116570472717285, "eval_runtime": 54.0242, "eval_samples_per_second": 0.611, "eval_steps_per_second": 0.611, "step": 480 }, { "epoch": 3.78, "learning_rate": 1.8321069936235503e-05, "loss": 4.7846, "step": 510 }, { "epoch": 3.78, "eval_loss": 4.612066268920898, "eval_runtime": 55.4504, "eval_samples_per_second": 0.595, "eval_steps_per_second": 0.595, "step": 510 }, { "epoch": 4.0, "learning_rate": 1.800290947022884e-05, "loss": 4.733, "step": 540 }, { "epoch": 4.0, "eval_loss": 4.56680965423584, "eval_runtime": 55.4115, "eval_samples_per_second": 0.596, "eval_steps_per_second": 0.596, "step": 540 }, { "epoch": 4.22, "learning_rate": 1.766044443118978e-05, "loss": 4.6863, "step": 570 }, { "epoch": 4.22, "eval_loss": 4.538435459136963, "eval_runtime": 55.5112, "eval_samples_per_second": 0.594, "eval_steps_per_second": 0.594, "step": 570 }, { "epoch": 4.44, "learning_rate": 1.729471487418621e-05, "loss": 4.6477, "step": 600 }, { "epoch": 4.44, "eval_loss": 4.5178914070129395, "eval_runtime": 55.4747, "eval_samples_per_second": 0.595, "eval_steps_per_second": 0.595, "step": 600 }, { "epoch": 4.67, "learning_rate": 1.6906831507862446e-05, "loss": 4.6356, "step": 630 }, { "epoch": 4.67, "eval_loss": 4.49837589263916, "eval_runtime": 55.4914, "eval_samples_per_second": 0.595, "eval_steps_per_second": 0.595, "step": 630 }, { "epoch": 4.89, "learning_rate": 1.64979723212536e-05, "loss": 4.6119, "step": 660 }, { "epoch": 4.89, "eval_loss": 4.483817100524902, "eval_runtime": 55.3919, "eval_samples_per_second": 0.596, "eval_steps_per_second": 0.596, "step": 660 }, { "epoch": 5.11, "learning_rate": 1.606937900627157e-05, "loss": 4.5994, "step": 690 }, { "epoch": 5.11, "eval_loss": 4.471835613250732, "eval_runtime": 55.4563, "eval_samples_per_second": 0.595, "eval_steps_per_second": 0.595, "step": 690 }, { "epoch": 5.33, "learning_rate": 1.5622353186727542e-05, "loss": 4.5703, "step": 720 }, { "epoch": 5.33, "eval_loss": 4.460899353027344, "eval_runtime": 55.4262, "eval_samples_per_second": 0.595, "eval_steps_per_second": 0.595, "step": 720 }, { "epoch": 5.55, "learning_rate": 1.5158252465343242e-05, "loss": 4.5625, "step": 750 }, { "epoch": 5.55, "eval_loss": 4.450038909912109, "eval_runtime": 55.4182, "eval_samples_per_second": 0.595, "eval_steps_per_second": 0.595, "step": 750 }, { "epoch": 5.78, "learning_rate": 1.467848630075608e-05, "loss": 4.5649, "step": 780 }, { "epoch": 5.78, "eval_loss": 4.4421467781066895, "eval_runtime": 55.3928, "eval_samples_per_second": 0.596, "eval_steps_per_second": 0.596, "step": 780 }, { "epoch": 6.0, "learning_rate": 1.4184511727039612e-05, "loss": 4.559, "step": 810 }, { "epoch": 6.0, "eval_loss": 4.432287693023682, "eval_runtime": 54.0497, "eval_samples_per_second": 0.611, "eval_steps_per_second": 0.611, "step": 810 } ], "logging_steps": 30, "max_steps": 1900, "num_input_tokens_seen": 0, "num_train_epochs": 15, "save_steps": 30, "total_flos": 3.3050813792256e+17, "train_batch_size": 2, "trial_name": null, "trial_params": null }