{ "best_metric": 0.20781023800373077, "best_model_checkpoint": "/home/iais_marenpielka/Bouthaina/res_nw_irq_aragpt2-base/checkpoint-12684", "epoch": 17.0, "eval_steps": 500, "global_step": 17969, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 0.21245746314525604, "learning_rate": 4.865067829457365e-05, "loss": 0.9334, "step": 1057 }, { "epoch": 1.0, "eval_bleu": 0.003236761532472025, "eval_loss": 0.24612107872962952, "eval_rouge1": 0.1592859430436859, "eval_rouge2": 0.01850245530694966, "eval_rougeL": 0.15327183350590745, "eval_runtime": 47.2784, "eval_samples_per_second": 44.693, "eval_steps_per_second": 5.605, "step": 1057 }, { "epoch": 2.0, "grad_norm": 0.1900666505098343, "learning_rate": 4.609011627906977e-05, "loss": 0.0868, "step": 2114 }, { "epoch": 2.0, "eval_bleu": 0.01485892408066134, "eval_loss": 0.2332119345664978, "eval_rouge1": 0.24547054015158856, "eval_rouge2": 0.05101888171169662, "eval_rougeL": 0.23941488622001442, "eval_runtime": 106.4482, "eval_samples_per_second": 19.85, "eval_steps_per_second": 2.489, "step": 2114 }, { "epoch": 3.0, "grad_norm": 0.17272518575191498, "learning_rate": 4.3529554263565894e-05, "loss": 0.0767, "step": 3171 }, { "epoch": 3.0, "eval_bleu": 0.025242358861939478, "eval_loss": 0.23418210446834564, "eval_rouge1": 0.2960794609105263, "eval_rouge2": 0.078188861596172, "eval_rougeL": 0.2910032597311343, "eval_runtime": 46.6571, "eval_samples_per_second": 45.288, "eval_steps_per_second": 5.68, "step": 3171 }, { "epoch": 4.0, "grad_norm": 0.26465269923210144, "learning_rate": 4.096899224806201e-05, "loss": 0.0696, "step": 4228 }, { "epoch": 4.0, "eval_bleu": 0.040436621142289915, "eval_loss": 0.2277713567018509, "eval_rouge1": 0.3300020560602195, "eval_rouge2": 0.10496894572689558, "eval_rougeL": 0.32523455556499214, "eval_runtime": 46.6771, "eval_samples_per_second": 45.268, "eval_steps_per_second": 5.677, "step": 4228 }, { "epoch": 5.0, "grad_norm": 0.19601380825042725, "learning_rate": 3.840843023255814e-05, "loss": 0.0636, "step": 5285 }, { "epoch": 5.0, "eval_bleu": 0.05166825934118283, "eval_loss": 0.22189512848854065, "eval_rouge1": 0.3535589938019066, "eval_rouge2": 0.12152558383102884, "eval_rougeL": 0.3480324766784169, "eval_runtime": 76.0841, "eval_samples_per_second": 27.772, "eval_steps_per_second": 3.483, "step": 5285 }, { "epoch": 6.0, "grad_norm": 0.22665895521640778, "learning_rate": 3.5847868217054265e-05, "loss": 0.0587, "step": 6342 }, { "epoch": 6.0, "eval_bleu": 0.05899956949346335, "eval_loss": 0.22368572652339935, "eval_rouge1": 0.3654354164962346, "eval_rouge2": 0.13482081314433186, "eval_rougeL": 0.3611275649771175, "eval_runtime": 108.2192, "eval_samples_per_second": 19.525, "eval_steps_per_second": 2.449, "step": 6342 }, { "epoch": 7.0, "grad_norm": 0.1510099470615387, "learning_rate": 3.328730620155039e-05, "loss": 0.0542, "step": 7399 }, { "epoch": 7.0, "eval_bleu": 0.06674675327067288, "eval_loss": 0.21935084462165833, "eval_rouge1": 0.375454032579127, "eval_rouge2": 0.1440449212655856, "eval_rougeL": 0.37123789211821234, "eval_runtime": 169.6548, "eval_samples_per_second": 12.455, "eval_steps_per_second": 1.562, "step": 7399 }, { "epoch": 8.0, "grad_norm": 0.17433832585811615, "learning_rate": 3.0726744186046517e-05, "loss": 0.0502, "step": 8456 }, { "epoch": 8.0, "eval_bleu": 0.07150751079471653, "eval_loss": 0.20800183713436127, "eval_rouge1": 0.38017302561406896, "eval_rouge2": 0.1520586373563908, "eval_rougeL": 0.3761265584375465, "eval_runtime": 59.9519, "eval_samples_per_second": 35.245, "eval_steps_per_second": 4.42, "step": 8456 }, { "epoch": 9.0, "grad_norm": 0.23176324367523193, "learning_rate": 2.816618217054264e-05, "loss": 0.0468, "step": 9513 }, { "epoch": 9.0, "eval_bleu": 0.07701622828998697, "eval_loss": 0.21230556070804596, "eval_rouge1": 0.39305333263185294, "eval_rouge2": 0.16160655664305132, "eval_rougeL": 0.3889099045386816, "eval_runtime": 47.0282, "eval_samples_per_second": 44.93, "eval_steps_per_second": 5.635, "step": 9513 }, { "epoch": 10.0, "grad_norm": 0.22061537206172943, "learning_rate": 2.560562015503876e-05, "loss": 0.0438, "step": 10570 }, { "epoch": 10.0, "eval_bleu": 0.08116803822206035, "eval_loss": 0.21123112738132477, "eval_rouge1": 0.39211790695723037, "eval_rouge2": 0.16476729267221837, "eval_rougeL": 0.3884162333833129, "eval_runtime": 52.3744, "eval_samples_per_second": 40.344, "eval_steps_per_second": 5.06, "step": 10570 }, { "epoch": 11.0, "grad_norm": 0.29454106092453003, "learning_rate": 2.3045058139534884e-05, "loss": 0.0408, "step": 11627 }, { "epoch": 11.0, "eval_bleu": 0.08163331720700502, "eval_loss": 0.21018122136592865, "eval_rouge1": 0.39673991981574686, "eval_rouge2": 0.16533579969575996, "eval_rougeL": 0.3935895133910881, "eval_runtime": 169.7591, "eval_samples_per_second": 12.447, "eval_steps_per_second": 1.561, "step": 11627 }, { "epoch": 12.0, "grad_norm": 0.20639048516750336, "learning_rate": 2.0484496124031007e-05, "loss": 0.0384, "step": 12684 }, { "epoch": 12.0, "eval_bleu": 0.08445355735141684, "eval_loss": 0.20781023800373077, "eval_rouge1": 0.40183732359428986, "eval_rouge2": 0.17106600485502385, "eval_rougeL": 0.39778146195081354, "eval_runtime": 169.7164, "eval_samples_per_second": 12.45, "eval_steps_per_second": 1.561, "step": 12684 }, { "epoch": 13.0, "grad_norm": 0.206428661942482, "learning_rate": 1.7923934108527132e-05, "loss": 0.0363, "step": 13741 }, { "epoch": 13.0, "eval_bleu": 0.08696571570490616, "eval_loss": 0.2144923061132431, "eval_rouge1": 0.4023135478695373, "eval_rouge2": 0.1719689510122407, "eval_rougeL": 0.39862674092398764, "eval_runtime": 169.681, "eval_samples_per_second": 12.453, "eval_steps_per_second": 1.562, "step": 13741 }, { "epoch": 14.0, "grad_norm": 0.1995713710784912, "learning_rate": 1.5363372093023258e-05, "loss": 0.0343, "step": 14798 }, { "epoch": 14.0, "eval_bleu": 0.08783833270174173, "eval_loss": 0.21651192009449005, "eval_rouge1": 0.406251961398795, "eval_rouge2": 0.17568144009642983, "eval_rougeL": 0.4022547036607911, "eval_runtime": 169.5372, "eval_samples_per_second": 12.463, "eval_steps_per_second": 1.563, "step": 14798 }, { "epoch": 15.0, "grad_norm": 0.22589825093746185, "learning_rate": 1.280281007751938e-05, "loss": 0.0327, "step": 15855 }, { "epoch": 15.0, "eval_bleu": 0.09203652391095736, "eval_loss": 0.21689023077487946, "eval_rouge1": 0.4049011529575125, "eval_rouge2": 0.17920037507226205, "eval_rougeL": 0.4013754949817908, "eval_runtime": 169.7215, "eval_samples_per_second": 12.45, "eval_steps_per_second": 1.561, "step": 15855 }, { "epoch": 16.0, "grad_norm": 0.2390994280576706, "learning_rate": 1.0242248062015503e-05, "loss": 0.0313, "step": 16912 }, { "epoch": 16.0, "eval_bleu": 0.09196638031992929, "eval_loss": 0.21753403544425964, "eval_rouge1": 0.4078242153735353, "eval_rouge2": 0.18209517089320632, "eval_rougeL": 0.40483345816401317, "eval_runtime": 169.6048, "eval_samples_per_second": 12.458, "eval_steps_per_second": 1.562, "step": 16912 }, { "epoch": 17.0, "grad_norm": 0.31637752056121826, "learning_rate": 7.681686046511629e-06, "loss": 0.0301, "step": 17969 }, { "epoch": 17.0, "eval_bleu": 0.09441216110381762, "eval_loss": 0.2191299945116043, "eval_rouge1": 0.41033944929551286, "eval_rouge2": 0.18390791273310297, "eval_rougeL": 0.40661041910814566, "eval_runtime": 169.743, "eval_samples_per_second": 12.448, "eval_steps_per_second": 1.561, "step": 17969 }, { "epoch": 17.0, "step": 17969, "total_flos": 7.5086968651776e+16, "train_loss": 0.10163650903314878, "train_runtime": 11373.3667, "train_samples_per_second": 14.863, "train_steps_per_second": 1.859 } ], "logging_steps": 500, "max_steps": 21140, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 7.5086968651776e+16, "train_batch_size": 8, "trial_name": null, "trial_params": null }