{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 134, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.007462686567164179, "grad_norm": 7.811283588409424, "learning_rate": 1.4285714285714285e-05, "loss": 2.0521, "step": 1 }, { "epoch": 0.03731343283582089, "grad_norm": 6.445086479187012, "learning_rate": 7.142857142857143e-05, "loss": 2.0103, "step": 5 }, { "epoch": 0.07462686567164178, "grad_norm": 4.3372368812561035, "learning_rate": 0.00014285714285714287, "loss": 1.6079, "step": 10 }, { "epoch": 0.11194029850746269, "grad_norm": 1.0932683944702148, "learning_rate": 0.00019996573249755572, "loss": 1.3371, "step": 15 }, { "epoch": 0.14925373134328357, "grad_norm": 0.9909504055976868, "learning_rate": 0.00019876883405951377, "loss": 1.1968, "step": 20 }, { "epoch": 0.1865671641791045, "grad_norm": 0.8857837915420532, "learning_rate": 0.0001958819734868193, "loss": 1.107, "step": 25 }, { "epoch": 0.22388059701492538, "grad_norm": 0.8879585266113281, "learning_rate": 0.0001913545457642601, "loss": 1.0563, "step": 30 }, { "epoch": 0.26119402985074625, "grad_norm": 0.5094943642616272, "learning_rate": 0.00018526401643540922, "loss": 1.0143, "step": 35 }, { "epoch": 0.29850746268656714, "grad_norm": 0.39295411109924316, "learning_rate": 0.0001777145961456971, "loss": 0.9871, "step": 40 }, { "epoch": 0.3358208955223881, "grad_norm": 0.5487385988235474, "learning_rate": 0.0001688354575693754, "loss": 0.9945, "step": 45 }, { "epoch": 0.373134328358209, "grad_norm": 0.48006948828697205, "learning_rate": 0.00015877852522924732, "loss": 0.9775, "step": 50 }, { "epoch": 0.41044776119402987, "grad_norm": 0.32778072357177734, "learning_rate": 0.00014771587602596084, "loss": 0.9614, "step": 55 }, { "epoch": 0.44776119402985076, "grad_norm": 0.6229733824729919, "learning_rate": 0.00013583679495453, "loss": 0.9528, "step": 60 }, { "epoch": 0.48507462686567165, "grad_norm": 0.37184789776802063, "learning_rate": 0.00012334453638559057, "loss": 0.9527, "step": 65 }, { "epoch": 0.5223880597014925, "grad_norm": 0.8213186264038086, "learning_rate": 0.00011045284632676536, "loss": 0.9422, "step": 70 }, { "epoch": 0.5597014925373134, "grad_norm": 0.5482317209243774, "learning_rate": 9.73823051692127e-05, "loss": 0.9439, "step": 75 }, { "epoch": 0.5970149253731343, "grad_norm": 0.535866379737854, "learning_rate": 8.435655349597689e-05, "loss": 0.9348, "step": 80 }, { "epoch": 0.6343283582089553, "grad_norm": 0.4782055914402008, "learning_rate": 7.159846552960774e-05, "loss": 0.932, "step": 85 }, { "epoch": 0.6716417910447762, "grad_norm": 0.44723376631736755, "learning_rate": 5.9326335692419995e-05, "loss": 0.9185, "step": 90 }, { "epoch": 0.7089552238805971, "grad_norm": 0.3611028790473938, "learning_rate": 4.7750143528405126e-05, "loss": 0.9224, "step": 95 }, { "epoch": 0.746268656716418, "grad_norm": 0.3897744119167328, "learning_rate": 3.7067960895016275e-05, "loss": 0.9175, "step": 100 }, { "epoch": 0.7835820895522388, "grad_norm": 0.3638187646865845, "learning_rate": 2.746256289877126e-05, "loss": 0.9237, "step": 105 }, { "epoch": 0.8208955223880597, "grad_norm": 0.36973854899406433, "learning_rate": 1.9098300562505266e-05, "loss": 0.9264, "step": 110 }, { "epoch": 0.8582089552238806, "grad_norm": 0.3612724244594574, "learning_rate": 1.2118288733803473e-05, "loss": 0.9275, "step": 115 }, { "epoch": 0.8955223880597015, "grad_norm": 0.3824576139450073, "learning_rate": 6.6419573502798374e-06, "loss": 0.9104, "step": 120 }, { "epoch": 0.9328358208955224, "grad_norm": 0.3507688343524933, "learning_rate": 2.7630079602323442e-06, "loss": 0.929, "step": 125 }, { "epoch": 0.9701492537313433, "grad_norm": 0.3513280153274536, "learning_rate": 5.478104631726711e-07, "loss": 0.9141, "step": 130 }, { "epoch": 1.0, "eval_loss": 1.4816477298736572, "eval_runtime": 1.5129, "eval_samples_per_second": 1.322, "eval_steps_per_second": 0.661, "step": 134 }, { "epoch": 1.0, "step": 134, "total_flos": 3.750131265144095e+17, "train_loss": 1.0418888739685515, "train_runtime": 1659.9836, "train_samples_per_second": 5.16, "train_steps_per_second": 0.081 } ], "logging_steps": 5, "max_steps": 134, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3.750131265144095e+17, "train_batch_size": 4, "trial_name": null, "trial_params": null }