{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 219, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0045662100456621, "grad_norm": 6.815864086151123, "learning_rate": 9.090909090909091e-06, "loss": 1.7729, "step": 1 }, { "epoch": 0.0228310502283105, "grad_norm": 5.818741798400879, "learning_rate": 4.545454545454546e-05, "loss": 1.7132, "step": 5 }, { "epoch": 0.045662100456621, "grad_norm": 2.6188273429870605, "learning_rate": 9.090909090909092e-05, "loss": 1.4372, "step": 10 }, { "epoch": 0.0684931506849315, "grad_norm": 1.4498921632766724, "learning_rate": 0.00013636363636363637, "loss": 1.2124, "step": 15 }, { "epoch": 0.091324200913242, "grad_norm": 1.0618371963500977, "learning_rate": 0.00018181818181818183, "loss": 1.0648, "step": 20 }, { "epoch": 0.1141552511415525, "grad_norm": 0.8222594857215881, "learning_rate": 0.00019988558131018186, "loss": 0.9718, "step": 25 }, { "epoch": 0.136986301369863, "grad_norm": 0.621496856212616, "learning_rate": 0.00019918730395931649, "loss": 0.9004, "step": 30 }, { "epoch": 0.1598173515981735, "grad_norm": 0.3935670554637909, "learning_rate": 0.00019785874696801202, "loss": 0.8609, "step": 35 }, { "epoch": 0.182648401826484, "grad_norm": 0.37850192189216614, "learning_rate": 0.00019590835257019714, "loss": 0.8519, "step": 40 }, { "epoch": 0.2054794520547945, "grad_norm": 0.33678698539733887, "learning_rate": 0.00019334851442746664, "loss": 0.8249, "step": 45 }, { "epoch": 0.228310502283105, "grad_norm": 0.5497208833694458, "learning_rate": 0.00019019549887431877, "loss": 0.8155, "step": 50 }, { "epoch": 0.2511415525114155, "grad_norm": 0.4304714798927307, "learning_rate": 0.00018646934155473022, "loss": 0.8072, "step": 55 }, { "epoch": 0.273972602739726, "grad_norm": 0.3267618417739868, "learning_rate": 0.00018219372010688515, "loss": 0.792, "step": 60 }, { "epoch": 0.2968036529680365, "grad_norm": 0.5633127689361572, "learning_rate": 0.00017739580370507532, "loss": 0.7879, "step": 65 }, { "epoch": 0.319634703196347, "grad_norm": 0.39324459433555603, "learning_rate": 0.0001721060804148482, "loss": 0.7944, "step": 70 }, { "epoch": 0.3424657534246575, "grad_norm": 0.3261910080909729, "learning_rate": 0.0001663581634584641, "loss": 0.7872, "step": 75 }, { "epoch": 0.365296803652968, "grad_norm": 0.3310278356075287, "learning_rate": 0.0001601885776217367, "loss": 0.7789, "step": 80 }, { "epoch": 0.3881278538812785, "grad_norm": 0.356475830078125, "learning_rate": 0.0001536365271595212, "loss": 0.7805, "step": 85 }, { "epoch": 0.410958904109589, "grad_norm": 0.6090440154075623, "learning_rate": 0.0001467436466746814, "loss": 0.7725, "step": 90 }, { "epoch": 0.4337899543378995, "grad_norm": 0.648769736289978, "learning_rate": 0.0001395537365535585, "loss": 0.7696, "step": 95 }, { "epoch": 0.45662100456621, "grad_norm": 0.3896843492984772, "learning_rate": 0.00013211248463910262, "loss": 0.7703, "step": 100 }, { "epoch": 0.4794520547945205, "grad_norm": 0.40256544947624207, "learning_rate": 0.00012446717591027624, "loss": 0.7577, "step": 105 }, { "epoch": 0.502283105022831, "grad_norm": 0.3406723141670227, "learning_rate": 0.00011666639201255506, "loss": 0.7572, "step": 110 }, { "epoch": 0.5251141552511416, "grad_norm": 0.4368165135383606, "learning_rate": 0.0001087597025488413, "loss": 0.7578, "step": 115 }, { "epoch": 0.547945205479452, "grad_norm": 0.45874515175819397, "learning_rate": 0.00010079735009246167, "loss": 0.7593, "step": 120 }, { "epoch": 0.5707762557077626, "grad_norm": 0.44529327750205994, "learning_rate": 9.282993092381625e-05, "loss": 0.7479, "step": 125 }, { "epoch": 0.593607305936073, "grad_norm": 0.40876489877700806, "learning_rate": 8.490807351941753e-05, "loss": 0.7486, "step": 130 }, { "epoch": 0.6164383561643836, "grad_norm": 0.3738647699356079, "learning_rate": 7.708211683634112e-05, "loss": 0.7478, "step": 135 }, { "epoch": 0.639269406392694, "grad_norm": 0.3480348587036133, "learning_rate": 6.940179043641005e-05, "loss": 0.7401, "step": 140 }, { "epoch": 0.6621004566210046, "grad_norm": 0.3515714108943939, "learning_rate": 6.191589848274368e-05, "loss": 0.7477, "step": 145 }, { "epoch": 0.684931506849315, "grad_norm": 0.4522218108177185, "learning_rate": 5.467200961669619e-05, "loss": 0.7447, "step": 150 }, { "epoch": 0.7077625570776256, "grad_norm": 0.331890344619751, "learning_rate": 4.7716154685841944e-05, "loss": 0.746, "step": 155 }, { "epoch": 0.730593607305936, "grad_norm": 0.4267323613166809, "learning_rate": 4.109253424377772e-05, "loss": 0.7525, "step": 160 }, { "epoch": 0.7534246575342466, "grad_norm": 0.34801220893859863, "learning_rate": 3.4843237680415156e-05, "loss": 0.7408, "step": 165 }, { "epoch": 0.776255707762557, "grad_norm": 0.3299520015716553, "learning_rate": 2.9007975767533714e-05, "loss": 0.7324, "step": 170 }, { "epoch": 0.7990867579908676, "grad_norm": 0.3563040494918823, "learning_rate": 2.3623828319116748e-05, "loss": 0.7487, "step": 175 }, { "epoch": 0.821917808219178, "grad_norm": 0.355241984128952, "learning_rate": 1.8725008569947365e-05, "loss": 0.7521, "step": 180 }, { "epoch": 0.8447488584474886, "grad_norm": 0.32830801606178284, "learning_rate": 1.4342645769705977e-05, "loss": 0.737, "step": 185 }, { "epoch": 0.867579908675799, "grad_norm": 0.32326367497444153, "learning_rate": 1.0504587374062391e-05, "loss": 0.7397, "step": 190 }, { "epoch": 0.8904109589041096, "grad_norm": 0.33771994709968567, "learning_rate": 7.235222089726279e-06, "loss": 0.7373, "step": 195 }, { "epoch": 0.91324200913242, "grad_norm": 0.33751899003982544, "learning_rate": 4.555324897906132e-06, "loss": 0.7426, "step": 200 }, { "epoch": 0.9360730593607306, "grad_norm": 0.34009331464767456, "learning_rate": 2.4819250409651607e-06, "loss": 0.7403, "step": 205 }, { "epoch": 0.958904109589041, "grad_norm": 0.3508203327655792, "learning_rate": 1.0281978111449375e-06, "loss": 0.7469, "step": 210 }, { "epoch": 0.9817351598173516, "grad_norm": 0.3122396469116211, "learning_rate": 2.0338082897886079e-07, "loss": 0.742, "step": 215 }, { "epoch": 1.0, "eval_loss": 1.8227200508117676, "eval_runtime": 1.5688, "eval_samples_per_second": 7.649, "eval_steps_per_second": 0.637, "step": 219 }, { "epoch": 1.0, "step": 219, "total_flos": 6.128945867501076e+17, "train_loss": 0.8279621209183784, "train_runtime": 2699.8939, "train_samples_per_second": 5.187, "train_steps_per_second": 0.081 } ], "logging_steps": 5, "max_steps": 219, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 6.128945867501076e+17, "train_batch_size": 4, "trial_name": null, "trial_params": null }