XLMR_large32_2e-06_32_0.1_0.01 / trainer_state.json
AnnantJain's picture
Upload folder using huggingface_hub
28b1ed3 verified
{
"best_metric": 0.8623634558093347,
"best_model_checkpoint": "./XLMR_large32-multi-outputs/checkpoint-18000",
"epoch": 12.91248206599713,
"eval_steps": 1000,
"global_step": 18000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.7173601147776184,
"grad_norm": 16.311723709106445,
"learning_rate": 8.964589870013447e-07,
"loss": 0.6986,
"step": 1000
},
{
"epoch": 0.7173601147776184,
"eval_accuracy": 0.5649409627611263,
"eval_f1": 0.6076986076986077,
"eval_loss": 0.6808627843856812,
"eval_precision": 0.5584545910687406,
"eval_recall": 0.6664670658682634,
"eval_runtime": 103.0001,
"eval_samples_per_second": 96.204,
"eval_steps_per_second": 1.505,
"step": 1000
},
{
"epoch": 1.4347202295552366,
"grad_norm": 10.006734848022461,
"learning_rate": 1.7929179740026893e-06,
"loss": 0.6307,
"step": 2000
},
{
"epoch": 1.4347202295552366,
"eval_accuracy": 0.7109698254112423,
"eval_f1": 0.686720630059068,
"eval_loss": 0.510513424873352,
"eval_precision": 0.7596805421103582,
"eval_recall": 0.6265469061876248,
"eval_runtime": 102.7492,
"eval_samples_per_second": 96.439,
"eval_steps_per_second": 1.509,
"step": 2000
},
{
"epoch": 2.152080344332855,
"grad_norm": 17.957138061523438,
"learning_rate": 1.9233796642255767e-06,
"loss": 0.4567,
"step": 3000
},
{
"epoch": 2.152080344332855,
"eval_accuracy": 0.75739226965385,
"eval_f1": 0.7636184857423796,
"eval_loss": 0.4005236327648163,
"eval_precision": 0.7525193798449612,
"eval_recall": 0.7750499001996008,
"eval_runtime": 102.7312,
"eval_samples_per_second": 96.456,
"eval_steps_per_second": 1.509,
"step": 3000
},
{
"epoch": 2.869440459110473,
"grad_norm": 12.724874496459961,
"learning_rate": 1.8237433368206048e-06,
"loss": 0.3935,
"step": 4000
},
{
"epoch": 2.869440459110473,
"eval_accuracy": 0.7773741043495812,
"eval_f1": 0.7708766098878271,
"eval_loss": 0.36614805459976196,
"eval_precision": 0.8035946297098311,
"eval_recall": 0.7407185628742515,
"eval_runtime": 102.7412,
"eval_samples_per_second": 96.446,
"eval_steps_per_second": 1.509,
"step": 4000
},
{
"epoch": 3.586800573888092,
"grad_norm": 16.976110458374023,
"learning_rate": 1.724107009415633e-06,
"loss": 0.3607,
"step": 5000
},
{
"epoch": 3.586800573888092,
"eval_accuracy": 0.8000807346856393,
"eval_f1": 0.8191363096868438,
"eval_loss": 0.34079551696777344,
"eval_precision": 0.7548376240955746,
"eval_recall": 0.8954091816367266,
"eval_runtime": 102.5685,
"eval_samples_per_second": 96.609,
"eval_steps_per_second": 1.511,
"step": 5000
},
{
"epoch": 4.30416068866571,
"grad_norm": 13.054981231689453,
"learning_rate": 1.624470682010661e-06,
"loss": 0.3314,
"step": 6000
},
{
"epoch": 4.30416068866571,
"eval_accuracy": 0.8120900191744879,
"eval_f1": 0.8181640625,
"eval_loss": 0.3178483843803406,
"eval_precision": 0.8009560229445507,
"eval_recall": 0.836127744510978,
"eval_runtime": 102.6877,
"eval_samples_per_second": 96.497,
"eval_steps_per_second": 1.509,
"step": 6000
},
{
"epoch": 5.0215208034433285,
"grad_norm": 22.831750869750977,
"learning_rate": 1.5248343546056892e-06,
"loss": 0.3034,
"step": 7000
},
{
"epoch": 5.0215208034433285,
"eval_accuracy": 0.8225855283075991,
"eval_f1": 0.8238476953907815,
"eval_loss": 0.3033134341239929,
"eval_precision": 0.8271629778672032,
"eval_recall": 0.820558882235529,
"eval_runtime": 102.5363,
"eval_samples_per_second": 96.639,
"eval_steps_per_second": 1.512,
"step": 7000
},
{
"epoch": 5.738880918220947,
"grad_norm": 34.70615005493164,
"learning_rate": 1.4251980272007174e-06,
"loss": 0.276,
"step": 8000
},
{
"epoch": 5.738880918220947,
"eval_accuracy": 0.838934302149561,
"eval_f1": 0.8336113427856547,
"eval_loss": 0.2884746193885803,
"eval_precision": 0.8725447402880838,
"eval_recall": 0.7980039920159681,
"eval_runtime": 102.5026,
"eval_samples_per_second": 96.671,
"eval_steps_per_second": 1.512,
"step": 8000
},
{
"epoch": 6.456241032998565,
"grad_norm": 12.14201831817627,
"learning_rate": 1.3255616997957455e-06,
"loss": 0.2633,
"step": 9000
},
{
"epoch": 6.456241032998565,
"eval_accuracy": 0.8357049147239883,
"eval_f1": 0.833979196410361,
"eval_loss": 0.3028285503387451,
"eval_precision": 0.8525854879065888,
"eval_recall": 0.8161676646706587,
"eval_runtime": 102.6676,
"eval_samples_per_second": 96.515,
"eval_steps_per_second": 1.51,
"step": 9000
},
{
"epoch": 7.173601147776184,
"grad_norm": 27.226970672607422,
"learning_rate": 1.2259253723907737e-06,
"loss": 0.248,
"step": 10000
},
{
"epoch": 7.173601147776184,
"eval_accuracy": 0.8506408315672621,
"eval_f1": 0.8506559031281534,
"eval_loss": 0.2664211690425873,
"eval_precision": 0.860204081632653,
"eval_recall": 0.8413173652694611,
"eval_runtime": 102.4829,
"eval_samples_per_second": 96.689,
"eval_steps_per_second": 1.512,
"step": 10000
},
{
"epoch": 7.890961262553802,
"grad_norm": 14.358187675476074,
"learning_rate": 1.1262890449858017e-06,
"loss": 0.2351,
"step": 11000
},
{
"epoch": 7.890961262553802,
"eval_accuracy": 0.85326470885054,
"eval_f1": 0.8551215623754483,
"eval_loss": 0.2719918191432953,
"eval_precision": 0.8537604456824512,
"eval_recall": 0.8564870259481038,
"eval_runtime": 102.5517,
"eval_samples_per_second": 96.624,
"eval_steps_per_second": 1.511,
"step": 11000
},
{
"epoch": 8.60832137733142,
"grad_norm": 7.578697681427002,
"learning_rate": 1.02665271758083e-06,
"loss": 0.2247,
"step": 12000
},
{
"epoch": 8.60832137733142,
"eval_accuracy": 0.8555858310626703,
"eval_f1": 0.852853470437018,
"eval_loss": 0.26294445991516113,
"eval_precision": 0.8795334040296925,
"eval_recall": 0.8277445109780439,
"eval_runtime": 102.496,
"eval_samples_per_second": 96.677,
"eval_steps_per_second": 1.512,
"step": 12000
},
{
"epoch": 9.32568149210904,
"grad_norm": 46.33930969238281,
"learning_rate": 9.27016390175858e-07,
"loss": 0.2158,
"step": 13000
},
{
"epoch": 9.32568149210904,
"eval_accuracy": 0.8530628721364416,
"eval_f1": 0.8558415841584158,
"eval_loss": 0.28966662287712097,
"eval_precision": 0.8491159135559921,
"eval_recall": 0.8626746506986028,
"eval_runtime": 102.3088,
"eval_samples_per_second": 96.854,
"eval_steps_per_second": 1.515,
"step": 13000
},
{
"epoch": 10.043041606886657,
"grad_norm": 13.620414733886719,
"learning_rate": 8.273800627708863e-07,
"loss": 0.2085,
"step": 14000
},
{
"epoch": 10.043041606886657,
"eval_accuracy": 0.8604299122010294,
"eval_f1": 0.8604017361461593,
"eval_loss": 0.2769163250923157,
"eval_precision": 0.8703287727179906,
"eval_recall": 0.8506986027944112,
"eval_runtime": 102.3036,
"eval_samples_per_second": 96.859,
"eval_steps_per_second": 1.515,
"step": 14000
},
{
"epoch": 10.760401721664275,
"grad_norm": 5.338364601135254,
"learning_rate": 7.277437353659144e-07,
"loss": 0.1999,
"step": 15000
},
{
"epoch": 10.760401721664275,
"eval_accuracy": 0.8576041982036532,
"eval_f1": 0.8572006881894545,
"eval_loss": 0.2730887830257416,
"eval_precision": 0.8694313282693492,
"eval_recall": 0.845309381237525,
"eval_runtime": 102.2525,
"eval_samples_per_second": 96.907,
"eval_steps_per_second": 1.516,
"step": 15000
},
{
"epoch": 11.477761836441895,
"grad_norm": 19.627927780151367,
"learning_rate": 6.281074079609425e-07,
"loss": 0.1981,
"step": 16000
},
{
"epoch": 11.477761836441895,
"eval_accuracy": 0.8634574629125038,
"eval_f1": 0.8587535233322894,
"eval_loss": 0.27952489256858826,
"eval_precision": 0.9001969796454367,
"eval_recall": 0.8209580838323354,
"eval_runtime": 102.2452,
"eval_samples_per_second": 96.914,
"eval_steps_per_second": 1.516,
"step": 16000
},
{
"epoch": 12.195121951219512,
"grad_norm": 19.674339294433594,
"learning_rate": 5.284710805559706e-07,
"loss": 0.1933,
"step": 17000
},
{
"epoch": 12.195121951219512,
"eval_accuracy": 0.858815218488243,
"eval_f1": 0.8589291116265,
"eval_loss": 0.28878509998321533,
"eval_precision": 0.8679437538210719,
"eval_recall": 0.8500998003992016,
"eval_runtime": 102.3747,
"eval_samples_per_second": 96.791,
"eval_steps_per_second": 1.514,
"step": 17000
},
{
"epoch": 12.91248206599713,
"grad_norm": 7.702261447906494,
"learning_rate": 4.288347531509988e-07,
"loss": 0.1888,
"step": 18000
},
{
"epoch": 12.91248206599713,
"eval_accuracy": 0.860127157129882,
"eval_f1": 0.8623634558093347,
"eval_loss": 0.2733915150165558,
"eval_precision": 0.858102766798419,
"eval_recall": 0.8666666666666667,
"eval_runtime": 102.3818,
"eval_samples_per_second": 96.785,
"eval_steps_per_second": 1.514,
"step": 18000
}
],
"logging_steps": 1000,
"max_steps": 22304,
"num_input_tokens_seen": 0,
"num_train_epochs": 16,
"save_steps": 1000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 1.0007259177082022e+18,
"train_batch_size": 64,
"trial_name": null,
"trial_params": null
}