{ "best_metric": 0.0846095159649849, "best_model_checkpoint": "/cronus_data/avirinchipur/implicit_motives/schone_ach_roba_large/final/checkpoint-1660", "epoch": 4.992481203007519, "eval_steps": 500, "global_step": 1660, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.15037593984962405, "grad_norm": 2.906954526901245, "learning_rate": 9.698795180722893e-06, "loss": 0.3261, "step": 50 }, { "epoch": 0.3007518796992481, "grad_norm": 2.4998762607574463, "learning_rate": 9.397590361445785e-06, "loss": 0.2116, "step": 100 }, { "epoch": 0.45112781954887216, "grad_norm": 2.2140071392059326, "learning_rate": 9.096385542168675e-06, "loss": 0.2101, "step": 150 }, { "epoch": 0.6015037593984962, "grad_norm": 2.7675399780273438, "learning_rate": 8.795180722891567e-06, "loss": 0.2021, "step": 200 }, { "epoch": 0.7518796992481203, "grad_norm": 3.146375894546509, "learning_rate": 8.493975903614459e-06, "loss": 0.1879, "step": 250 }, { "epoch": 0.9022556390977443, "grad_norm": 2.361593008041382, "learning_rate": 8.19277108433735e-06, "loss": 0.185, "step": 300 }, { "epoch": 0.9984962406015038, "eval_loss": 0.15738171339035034, "eval_roc_auc": 0.9583146269309021, "eval_runtime": 37.5317, "eval_samples_per_second": 434.326, "eval_steps_per_second": 1.705, "step": 332 }, { "epoch": 1.0526315789473684, "grad_norm": 4.361939907073975, "learning_rate": 7.891566265060243e-06, "loss": 0.1854, "step": 350 }, { "epoch": 1.2030075187969924, "grad_norm": 2.5755579471588135, "learning_rate": 7.590361445783133e-06, "loss": 0.1681, "step": 400 }, { "epoch": 1.3533834586466165, "grad_norm": 2.735053539276123, "learning_rate": 7.289156626506025e-06, "loss": 0.1685, "step": 450 }, { "epoch": 1.5037593984962405, "grad_norm": 4.289336681365967, "learning_rate": 6.987951807228917e-06, "loss": 0.1537, "step": 500 }, { "epoch": 1.6541353383458648, "grad_norm": 3.1198232173919678, "learning_rate": 6.686746987951808e-06, "loss": 0.1658, "step": 550 }, { "epoch": 1.8045112781954886, "grad_norm": 2.722614049911499, "learning_rate": 6.385542168674699e-06, "loss": 0.168, "step": 600 }, { "epoch": 1.954887218045113, "grad_norm": 2.933856964111328, "learning_rate": 6.084337349397591e-06, "loss": 0.1607, "step": 650 }, { "epoch": 2.0, "eval_loss": 0.12831807136535645, "eval_roc_auc": 0.9730460251844583, "eval_runtime": 37.2749, "eval_samples_per_second": 437.318, "eval_steps_per_second": 1.717, "step": 665 }, { "epoch": 2.1052631578947367, "grad_norm": 2.1795623302459717, "learning_rate": 5.783132530120482e-06, "loss": 0.1448, "step": 700 }, { "epoch": 2.255639097744361, "grad_norm": 3.511441230773926, "learning_rate": 5.4819277108433745e-06, "loss": 0.1407, "step": 750 }, { "epoch": 2.406015037593985, "grad_norm": 3.4894182682037354, "learning_rate": 5.180722891566266e-06, "loss": 0.1462, "step": 800 }, { "epoch": 2.556390977443609, "grad_norm": 2.4312453269958496, "learning_rate": 4.8795180722891575e-06, "loss": 0.1395, "step": 850 }, { "epoch": 2.706766917293233, "grad_norm": 2.9382171630859375, "learning_rate": 4.578313253012049e-06, "loss": 0.1379, "step": 900 }, { "epoch": 2.857142857142857, "grad_norm": 2.9116907119750977, "learning_rate": 4.27710843373494e-06, "loss": 0.1357, "step": 950 }, { "epoch": 2.998496240601504, "eval_loss": 0.10705896466970444, "eval_roc_auc": 0.9818360739923565, "eval_runtime": 37.422, "eval_samples_per_second": 435.599, "eval_steps_per_second": 1.71, "step": 997 }, { "epoch": 3.007518796992481, "grad_norm": 2.7223000526428223, "learning_rate": 3.975903614457832e-06, "loss": 0.1395, "step": 1000 }, { "epoch": 3.1578947368421053, "grad_norm": 2.47904634475708, "learning_rate": 3.6746987951807235e-06, "loss": 0.1247, "step": 1050 }, { "epoch": 3.308270676691729, "grad_norm": 3.8209755420684814, "learning_rate": 3.3734939759036146e-06, "loss": 0.1138, "step": 1100 }, { "epoch": 3.4586466165413534, "grad_norm": 2.873736619949341, "learning_rate": 3.072289156626506e-06, "loss": 0.121, "step": 1150 }, { "epoch": 3.6090225563909772, "grad_norm": 2.8755416870117188, "learning_rate": 2.771084337349398e-06, "loss": 0.1184, "step": 1200 }, { "epoch": 3.7593984962406015, "grad_norm": 3.195455312728882, "learning_rate": 2.469879518072289e-06, "loss": 0.1201, "step": 1250 }, { "epoch": 3.909774436090226, "grad_norm": 5.824981212615967, "learning_rate": 2.168674698795181e-06, "loss": 0.1214, "step": 1300 }, { "epoch": 4.0, "eval_loss": 0.09776945412158966, "eval_roc_auc": 0.9869506244923937, "eval_runtime": 37.3331, "eval_samples_per_second": 436.636, "eval_steps_per_second": 1.714, "step": 1330 }, { "epoch": 4.06015037593985, "grad_norm": 2.435054063796997, "learning_rate": 1.8674698795180723e-06, "loss": 0.1167, "step": 1350 }, { "epoch": 4.2105263157894735, "grad_norm": 3.873215436935425, "learning_rate": 1.566265060240964e-06, "loss": 0.1077, "step": 1400 }, { "epoch": 4.360902255639098, "grad_norm": 3.5485968589782715, "learning_rate": 1.2650602409638555e-06, "loss": 0.1027, "step": 1450 }, { "epoch": 4.511278195488722, "grad_norm": 4.283125877380371, "learning_rate": 9.638554216867472e-07, "loss": 0.1144, "step": 1500 }, { "epoch": 4.661654135338345, "grad_norm": 4.050317287445068, "learning_rate": 6.626506024096387e-07, "loss": 0.1075, "step": 1550 }, { "epoch": 4.81203007518797, "grad_norm": 3.838085889816284, "learning_rate": 3.614457831325301e-07, "loss": 0.1078, "step": 1600 }, { "epoch": 4.962406015037594, "grad_norm": 3.882603645324707, "learning_rate": 6.02409638554217e-08, "loss": 0.1011, "step": 1650 }, { "epoch": 4.992481203007519, "eval_loss": 0.0846095159649849, "eval_roc_auc": 0.9888130720155254, "eval_runtime": 37.3263, "eval_samples_per_second": 436.716, "eval_steps_per_second": 1.715, "step": 1660 }, { "epoch": 4.992481203007519, "step": 1660, "total_flos": 4.323837898114577e+16, "train_loss": 0.14982930845524892, "train_runtime": 2902.6478, "train_samples_per_second": 146.465, "train_steps_per_second": 0.572 } ], "logging_steps": 50, "max_steps": 1660, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 4.323837898114577e+16, "train_batch_size": 128, "trial_name": null, "trial_params": null }