Mini-Spyra-0.1.3 / trainer_state.json
Kwokou's picture
Upload 16 files
70308ff verified
raw
history blame contribute delete
No virus
18.9 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.746268656716418,
"eval_steps": 500,
"global_step": 100,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.007462686567164179,
"grad_norm": 36.40082931518555,
"learning_rate": 5e-06,
"loss": 1.6703,
"step": 1
},
{
"epoch": 0.014925373134328358,
"grad_norm": 34.74425506591797,
"learning_rate": 1e-05,
"loss": 1.6634,
"step": 2
},
{
"epoch": 0.022388059701492536,
"grad_norm": 27.06759262084961,
"learning_rate": 9.89795918367347e-06,
"loss": 1.5168,
"step": 3
},
{
"epoch": 0.029850746268656716,
"grad_norm": 19.73917579650879,
"learning_rate": 9.795918367346939e-06,
"loss": 1.5485,
"step": 4
},
{
"epoch": 0.03731343283582089,
"grad_norm": 16.78091049194336,
"learning_rate": 9.693877551020408e-06,
"loss": 1.3194,
"step": 5
},
{
"epoch": 0.04477611940298507,
"grad_norm": 17.270545959472656,
"learning_rate": 9.591836734693878e-06,
"loss": 1.6497,
"step": 6
},
{
"epoch": 0.05223880597014925,
"grad_norm": 28.16309356689453,
"learning_rate": 9.489795918367348e-06,
"loss": 1.3361,
"step": 7
},
{
"epoch": 0.05970149253731343,
"grad_norm": 16.819095611572266,
"learning_rate": 9.387755102040818e-06,
"loss": 1.3992,
"step": 8
},
{
"epoch": 0.06716417910447761,
"grad_norm": 16.588680267333984,
"learning_rate": 9.285714285714288e-06,
"loss": 1.295,
"step": 9
},
{
"epoch": 0.07462686567164178,
"grad_norm": 18.916818618774414,
"learning_rate": 9.183673469387756e-06,
"loss": 1.5424,
"step": 10
},
{
"epoch": 0.08208955223880597,
"grad_norm": 20.738298416137695,
"learning_rate": 9.081632653061225e-06,
"loss": 1.7722,
"step": 11
},
{
"epoch": 0.08955223880597014,
"grad_norm": 17.011619567871094,
"learning_rate": 8.979591836734695e-06,
"loss": 1.3152,
"step": 12
},
{
"epoch": 0.09701492537313433,
"grad_norm": 17.751367568969727,
"learning_rate": 8.877551020408163e-06,
"loss": 1.5504,
"step": 13
},
{
"epoch": 0.1044776119402985,
"grad_norm": 16.80768394470215,
"learning_rate": 8.775510204081633e-06,
"loss": 1.3405,
"step": 14
},
{
"epoch": 0.11194029850746269,
"grad_norm": 16.026403427124023,
"learning_rate": 8.673469387755103e-06,
"loss": 1.5062,
"step": 15
},
{
"epoch": 0.11940298507462686,
"grad_norm": 23.68254852294922,
"learning_rate": 8.571428571428571e-06,
"loss": 1.5282,
"step": 16
},
{
"epoch": 0.12686567164179105,
"grad_norm": 14.981740951538086,
"learning_rate": 8.469387755102042e-06,
"loss": 0.9734,
"step": 17
},
{
"epoch": 0.13432835820895522,
"grad_norm": 15.60690975189209,
"learning_rate": 8.36734693877551e-06,
"loss": 1.2847,
"step": 18
},
{
"epoch": 0.1417910447761194,
"grad_norm": 14.372577667236328,
"learning_rate": 8.26530612244898e-06,
"loss": 1.3,
"step": 19
},
{
"epoch": 0.14925373134328357,
"grad_norm": 15.565217971801758,
"learning_rate": 8.16326530612245e-06,
"loss": 1.344,
"step": 20
},
{
"epoch": 0.15671641791044777,
"grad_norm": 16.995765686035156,
"learning_rate": 8.06122448979592e-06,
"loss": 1.1579,
"step": 21
},
{
"epoch": 0.16417910447761194,
"grad_norm": 19.22214698791504,
"learning_rate": 7.959183673469388e-06,
"loss": 1.3757,
"step": 22
},
{
"epoch": 0.17164179104477612,
"grad_norm": 15.240199089050293,
"learning_rate": 7.857142857142858e-06,
"loss": 1.3662,
"step": 23
},
{
"epoch": 0.1791044776119403,
"grad_norm": 13.559714317321777,
"learning_rate": 7.755102040816327e-06,
"loss": 1.4103,
"step": 24
},
{
"epoch": 0.1865671641791045,
"grad_norm": 11.714433670043945,
"learning_rate": 7.653061224489796e-06,
"loss": 0.4849,
"step": 25
},
{
"epoch": 0.19402985074626866,
"grad_norm": 9.63026237487793,
"learning_rate": 7.551020408163265e-06,
"loss": 0.8331,
"step": 26
},
{
"epoch": 0.20149253731343283,
"grad_norm": 14.981061935424805,
"learning_rate": 7.448979591836736e-06,
"loss": 1.1483,
"step": 27
},
{
"epoch": 0.208955223880597,
"grad_norm": 9.80345630645752,
"learning_rate": 7.346938775510205e-06,
"loss": 0.3934,
"step": 28
},
{
"epoch": 0.21641791044776118,
"grad_norm": 15.892987251281738,
"learning_rate": 7.244897959183675e-06,
"loss": 1.3524,
"step": 29
},
{
"epoch": 0.22388059701492538,
"grad_norm": 15.154711723327637,
"learning_rate": 7.1428571428571436e-06,
"loss": 1.2404,
"step": 30
},
{
"epoch": 0.23134328358208955,
"grad_norm": 21.161544799804688,
"learning_rate": 7.0408163265306125e-06,
"loss": 1.166,
"step": 31
},
{
"epoch": 0.23880597014925373,
"grad_norm": 16.721675872802734,
"learning_rate": 6.938775510204082e-06,
"loss": 1.3453,
"step": 32
},
{
"epoch": 0.2462686567164179,
"grad_norm": 15.240865707397461,
"learning_rate": 6.836734693877551e-06,
"loss": 1.017,
"step": 33
},
{
"epoch": 0.2537313432835821,
"grad_norm": 13.743293762207031,
"learning_rate": 6.734693877551021e-06,
"loss": 1.2646,
"step": 34
},
{
"epoch": 0.26119402985074625,
"grad_norm": 21.402868270874023,
"learning_rate": 6.63265306122449e-06,
"loss": 1.2539,
"step": 35
},
{
"epoch": 0.26865671641791045,
"grad_norm": 12.299694061279297,
"learning_rate": 6.530612244897959e-06,
"loss": 0.5733,
"step": 36
},
{
"epoch": 0.27611940298507465,
"grad_norm": 13.034706115722656,
"learning_rate": 6.4285714285714295e-06,
"loss": 1.3188,
"step": 37
},
{
"epoch": 0.2835820895522388,
"grad_norm": 13.541306495666504,
"learning_rate": 6.326530612244899e-06,
"loss": 0.8876,
"step": 38
},
{
"epoch": 0.291044776119403,
"grad_norm": 14.614362716674805,
"learning_rate": 6.224489795918368e-06,
"loss": 1.4944,
"step": 39
},
{
"epoch": 0.29850746268656714,
"grad_norm": 18.72150230407715,
"learning_rate": 6.122448979591837e-06,
"loss": 1.4622,
"step": 40
},
{
"epoch": 0.30597014925373134,
"grad_norm": 12.321627616882324,
"learning_rate": 6.020408163265307e-06,
"loss": 1.2637,
"step": 41
},
{
"epoch": 0.31343283582089554,
"grad_norm": 11.823638916015625,
"learning_rate": 5.918367346938776e-06,
"loss": 1.1925,
"step": 42
},
{
"epoch": 0.3208955223880597,
"grad_norm": 10.207959175109863,
"learning_rate": 5.816326530612246e-06,
"loss": 1.1677,
"step": 43
},
{
"epoch": 0.3283582089552239,
"grad_norm": 14.41779613494873,
"learning_rate": 5.7142857142857145e-06,
"loss": 1.3201,
"step": 44
},
{
"epoch": 0.3358208955223881,
"grad_norm": 10.349492073059082,
"learning_rate": 5.6122448979591834e-06,
"loss": 1.0491,
"step": 45
},
{
"epoch": 0.34328358208955223,
"grad_norm": 9.687318801879883,
"learning_rate": 5.510204081632653e-06,
"loss": 0.5153,
"step": 46
},
{
"epoch": 0.35074626865671643,
"grad_norm": 12.616000175476074,
"learning_rate": 5.408163265306123e-06,
"loss": 1.3589,
"step": 47
},
{
"epoch": 0.3582089552238806,
"grad_norm": 15.669512748718262,
"learning_rate": 5.306122448979593e-06,
"loss": 0.7591,
"step": 48
},
{
"epoch": 0.3656716417910448,
"grad_norm": 11.46850299835205,
"learning_rate": 5.204081632653062e-06,
"loss": 1.0643,
"step": 49
},
{
"epoch": 0.373134328358209,
"grad_norm": 13.470056533813477,
"learning_rate": 5.1020408163265315e-06,
"loss": 1.1636,
"step": 50
},
{
"epoch": 0.3805970149253731,
"grad_norm": 12.210711479187012,
"learning_rate": 5e-06,
"loss": 0.5832,
"step": 51
},
{
"epoch": 0.3880597014925373,
"grad_norm": 12.775903701782227,
"learning_rate": 4.897959183673469e-06,
"loss": 0.8638,
"step": 52
},
{
"epoch": 0.39552238805970147,
"grad_norm": 12.06881046295166,
"learning_rate": 4.795918367346939e-06,
"loss": 1.234,
"step": 53
},
{
"epoch": 0.40298507462686567,
"grad_norm": 11.675975799560547,
"learning_rate": 4.693877551020409e-06,
"loss": 1.1502,
"step": 54
},
{
"epoch": 0.41044776119402987,
"grad_norm": 10.595233917236328,
"learning_rate": 4.591836734693878e-06,
"loss": 0.5959,
"step": 55
},
{
"epoch": 0.417910447761194,
"grad_norm": 13.459734916687012,
"learning_rate": 4.489795918367348e-06,
"loss": 1.4252,
"step": 56
},
{
"epoch": 0.4253731343283582,
"grad_norm": 10.65233325958252,
"learning_rate": 4.3877551020408165e-06,
"loss": 1.0071,
"step": 57
},
{
"epoch": 0.43283582089552236,
"grad_norm": 10.991082191467285,
"learning_rate": 4.2857142857142855e-06,
"loss": 0.9786,
"step": 58
},
{
"epoch": 0.44029850746268656,
"grad_norm": 12.973753929138184,
"learning_rate": 4.183673469387755e-06,
"loss": 1.3687,
"step": 59
},
{
"epoch": 0.44776119402985076,
"grad_norm": 8.963390350341797,
"learning_rate": 4.081632653061225e-06,
"loss": 0.4663,
"step": 60
},
{
"epoch": 0.4552238805970149,
"grad_norm": 15.190298080444336,
"learning_rate": 3.979591836734694e-06,
"loss": 1.5676,
"step": 61
},
{
"epoch": 0.4626865671641791,
"grad_norm": 12.159441947937012,
"learning_rate": 3.877551020408164e-06,
"loss": 0.9444,
"step": 62
},
{
"epoch": 0.4701492537313433,
"grad_norm": 12.203204154968262,
"learning_rate": 3.7755102040816327e-06,
"loss": 1.0785,
"step": 63
},
{
"epoch": 0.47761194029850745,
"grad_norm": 13.535058975219727,
"learning_rate": 3.6734693877551024e-06,
"loss": 1.3801,
"step": 64
},
{
"epoch": 0.48507462686567165,
"grad_norm": 15.462494850158691,
"learning_rate": 3.5714285714285718e-06,
"loss": 1.3773,
"step": 65
},
{
"epoch": 0.4925373134328358,
"grad_norm": 10.449372291564941,
"learning_rate": 3.469387755102041e-06,
"loss": 0.8747,
"step": 66
},
{
"epoch": 0.5,
"grad_norm": 15.766761779785156,
"learning_rate": 3.3673469387755105e-06,
"loss": 1.0549,
"step": 67
},
{
"epoch": 0.5074626865671642,
"grad_norm": 7.633336067199707,
"learning_rate": 3.2653061224489794e-06,
"loss": 0.344,
"step": 68
},
{
"epoch": 0.5149253731343284,
"grad_norm": 13.762042999267578,
"learning_rate": 3.1632653061224496e-06,
"loss": 1.2072,
"step": 69
},
{
"epoch": 0.5223880597014925,
"grad_norm": 11.871623992919922,
"learning_rate": 3.0612244897959185e-06,
"loss": 1.1146,
"step": 70
},
{
"epoch": 0.5298507462686567,
"grad_norm": 12.153115272521973,
"learning_rate": 2.959183673469388e-06,
"loss": 1.0648,
"step": 71
},
{
"epoch": 0.5373134328358209,
"grad_norm": 15.02953052520752,
"learning_rate": 2.8571428571428573e-06,
"loss": 1.0374,
"step": 72
},
{
"epoch": 0.5447761194029851,
"grad_norm": 13.172088623046875,
"learning_rate": 2.7551020408163266e-06,
"loss": 1.2356,
"step": 73
},
{
"epoch": 0.5522388059701493,
"grad_norm": 11.575133323669434,
"learning_rate": 2.6530612244897964e-06,
"loss": 1.0695,
"step": 74
},
{
"epoch": 0.5597014925373134,
"grad_norm": 12.820709228515625,
"learning_rate": 2.5510204081632657e-06,
"loss": 1.1283,
"step": 75
},
{
"epoch": 0.5671641791044776,
"grad_norm": 12.87095832824707,
"learning_rate": 2.4489795918367347e-06,
"loss": 1.1721,
"step": 76
},
{
"epoch": 0.5746268656716418,
"grad_norm": 13.630508422851562,
"learning_rate": 2.3469387755102044e-06,
"loss": 1.2478,
"step": 77
},
{
"epoch": 0.582089552238806,
"grad_norm": 17.44233512878418,
"learning_rate": 2.244897959183674e-06,
"loss": 1.3778,
"step": 78
},
{
"epoch": 0.5895522388059702,
"grad_norm": 12.048669815063477,
"learning_rate": 2.1428571428571427e-06,
"loss": 0.8454,
"step": 79
},
{
"epoch": 0.5970149253731343,
"grad_norm": 10.956369400024414,
"learning_rate": 2.0408163265306125e-06,
"loss": 0.8798,
"step": 80
},
{
"epoch": 0.6044776119402985,
"grad_norm": 12.508173942565918,
"learning_rate": 1.938775510204082e-06,
"loss": 1.2105,
"step": 81
},
{
"epoch": 0.6119402985074627,
"grad_norm": 10.739660263061523,
"learning_rate": 1.8367346938775512e-06,
"loss": 1.0513,
"step": 82
},
{
"epoch": 0.6194029850746269,
"grad_norm": 14.237381935119629,
"learning_rate": 1.7346938775510206e-06,
"loss": 1.3152,
"step": 83
},
{
"epoch": 0.6268656716417911,
"grad_norm": 10.444908142089844,
"learning_rate": 1.6326530612244897e-06,
"loss": 0.7026,
"step": 84
},
{
"epoch": 0.6343283582089553,
"grad_norm": 12.019598960876465,
"learning_rate": 1.5306122448979593e-06,
"loss": 1.1211,
"step": 85
},
{
"epoch": 0.6417910447761194,
"grad_norm": 12.660603523254395,
"learning_rate": 1.4285714285714286e-06,
"loss": 0.9115,
"step": 86
},
{
"epoch": 0.6492537313432836,
"grad_norm": 10.193121910095215,
"learning_rate": 1.3265306122448982e-06,
"loss": 0.9419,
"step": 87
},
{
"epoch": 0.6567164179104478,
"grad_norm": 14.029544830322266,
"learning_rate": 1.2244897959183673e-06,
"loss": 1.0446,
"step": 88
},
{
"epoch": 0.664179104477612,
"grad_norm": 10.461629867553711,
"learning_rate": 1.122448979591837e-06,
"loss": 0.9771,
"step": 89
},
{
"epoch": 0.6716417910447762,
"grad_norm": 7.607001781463623,
"learning_rate": 1.0204081632653063e-06,
"loss": 0.5959,
"step": 90
},
{
"epoch": 0.6791044776119403,
"grad_norm": 10.331579208374023,
"learning_rate": 9.183673469387756e-07,
"loss": 0.9838,
"step": 91
},
{
"epoch": 0.6865671641791045,
"grad_norm": 10.43489933013916,
"learning_rate": 8.163265306122449e-07,
"loss": 0.9485,
"step": 92
},
{
"epoch": 0.6940298507462687,
"grad_norm": 11.645877838134766,
"learning_rate": 7.142857142857143e-07,
"loss": 1.1304,
"step": 93
},
{
"epoch": 0.7014925373134329,
"grad_norm": 11.262922286987305,
"learning_rate": 6.122448979591837e-07,
"loss": 0.7431,
"step": 94
},
{
"epoch": 0.7089552238805971,
"grad_norm": 12.494215965270996,
"learning_rate": 5.102040816326531e-07,
"loss": 1.1966,
"step": 95
},
{
"epoch": 0.7164179104477612,
"grad_norm": 11.587281227111816,
"learning_rate": 4.0816326530612243e-07,
"loss": 0.6951,
"step": 96
},
{
"epoch": 0.7238805970149254,
"grad_norm": 10.64450454711914,
"learning_rate": 3.0612244897959183e-07,
"loss": 0.6916,
"step": 97
},
{
"epoch": 0.7313432835820896,
"grad_norm": 11.353170394897461,
"learning_rate": 2.0408163265306121e-07,
"loss": 0.7753,
"step": 98
},
{
"epoch": 0.7388059701492538,
"grad_norm": 10.214649200439453,
"learning_rate": 1.0204081632653061e-07,
"loss": 0.6923,
"step": 99
},
{
"epoch": 0.746268656716418,
"grad_norm": 11.73704719543457,
"learning_rate": 0.0,
"loss": 1.1282,
"step": 100
},
{
"epoch": 0.746268656716418,
"step": 100,
"total_flos": 2305515375820800.0,
"train_loss": 1.1278739917278289,
"train_runtime": 528.5108,
"train_samples_per_second": 0.189,
"train_steps_per_second": 0.189
}
],
"logging_steps": 1,
"max_steps": 100,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 2305515375820800.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}