|
{
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 0.746268656716418,
|
|
"eval_steps": 500,
|
|
"global_step": 100,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.007462686567164179,
|
|
"grad_norm": 36.40082931518555,
|
|
"learning_rate": 5e-06,
|
|
"loss": 1.6703,
|
|
"step": 1
|
|
},
|
|
{
|
|
"epoch": 0.014925373134328358,
|
|
"grad_norm": 34.74425506591797,
|
|
"learning_rate": 1e-05,
|
|
"loss": 1.6634,
|
|
"step": 2
|
|
},
|
|
{
|
|
"epoch": 0.022388059701492536,
|
|
"grad_norm": 27.06759262084961,
|
|
"learning_rate": 9.89795918367347e-06,
|
|
"loss": 1.5168,
|
|
"step": 3
|
|
},
|
|
{
|
|
"epoch": 0.029850746268656716,
|
|
"grad_norm": 19.73917579650879,
|
|
"learning_rate": 9.795918367346939e-06,
|
|
"loss": 1.5485,
|
|
"step": 4
|
|
},
|
|
{
|
|
"epoch": 0.03731343283582089,
|
|
"grad_norm": 16.78091049194336,
|
|
"learning_rate": 9.693877551020408e-06,
|
|
"loss": 1.3194,
|
|
"step": 5
|
|
},
|
|
{
|
|
"epoch": 0.04477611940298507,
|
|
"grad_norm": 17.270545959472656,
|
|
"learning_rate": 9.591836734693878e-06,
|
|
"loss": 1.6497,
|
|
"step": 6
|
|
},
|
|
{
|
|
"epoch": 0.05223880597014925,
|
|
"grad_norm": 28.16309356689453,
|
|
"learning_rate": 9.489795918367348e-06,
|
|
"loss": 1.3361,
|
|
"step": 7
|
|
},
|
|
{
|
|
"epoch": 0.05970149253731343,
|
|
"grad_norm": 16.819095611572266,
|
|
"learning_rate": 9.387755102040818e-06,
|
|
"loss": 1.3992,
|
|
"step": 8
|
|
},
|
|
{
|
|
"epoch": 0.06716417910447761,
|
|
"grad_norm": 16.588680267333984,
|
|
"learning_rate": 9.285714285714288e-06,
|
|
"loss": 1.295,
|
|
"step": 9
|
|
},
|
|
{
|
|
"epoch": 0.07462686567164178,
|
|
"grad_norm": 18.916818618774414,
|
|
"learning_rate": 9.183673469387756e-06,
|
|
"loss": 1.5424,
|
|
"step": 10
|
|
},
|
|
{
|
|
"epoch": 0.08208955223880597,
|
|
"grad_norm": 20.738298416137695,
|
|
"learning_rate": 9.081632653061225e-06,
|
|
"loss": 1.7722,
|
|
"step": 11
|
|
},
|
|
{
|
|
"epoch": 0.08955223880597014,
|
|
"grad_norm": 17.011619567871094,
|
|
"learning_rate": 8.979591836734695e-06,
|
|
"loss": 1.3152,
|
|
"step": 12
|
|
},
|
|
{
|
|
"epoch": 0.09701492537313433,
|
|
"grad_norm": 17.751367568969727,
|
|
"learning_rate": 8.877551020408163e-06,
|
|
"loss": 1.5504,
|
|
"step": 13
|
|
},
|
|
{
|
|
"epoch": 0.1044776119402985,
|
|
"grad_norm": 16.80768394470215,
|
|
"learning_rate": 8.775510204081633e-06,
|
|
"loss": 1.3405,
|
|
"step": 14
|
|
},
|
|
{
|
|
"epoch": 0.11194029850746269,
|
|
"grad_norm": 16.026403427124023,
|
|
"learning_rate": 8.673469387755103e-06,
|
|
"loss": 1.5062,
|
|
"step": 15
|
|
},
|
|
{
|
|
"epoch": 0.11940298507462686,
|
|
"grad_norm": 23.68254852294922,
|
|
"learning_rate": 8.571428571428571e-06,
|
|
"loss": 1.5282,
|
|
"step": 16
|
|
},
|
|
{
|
|
"epoch": 0.12686567164179105,
|
|
"grad_norm": 14.981740951538086,
|
|
"learning_rate": 8.469387755102042e-06,
|
|
"loss": 0.9734,
|
|
"step": 17
|
|
},
|
|
{
|
|
"epoch": 0.13432835820895522,
|
|
"grad_norm": 15.60690975189209,
|
|
"learning_rate": 8.36734693877551e-06,
|
|
"loss": 1.2847,
|
|
"step": 18
|
|
},
|
|
{
|
|
"epoch": 0.1417910447761194,
|
|
"grad_norm": 14.372577667236328,
|
|
"learning_rate": 8.26530612244898e-06,
|
|
"loss": 1.3,
|
|
"step": 19
|
|
},
|
|
{
|
|
"epoch": 0.14925373134328357,
|
|
"grad_norm": 15.565217971801758,
|
|
"learning_rate": 8.16326530612245e-06,
|
|
"loss": 1.344,
|
|
"step": 20
|
|
},
|
|
{
|
|
"epoch": 0.15671641791044777,
|
|
"grad_norm": 16.995765686035156,
|
|
"learning_rate": 8.06122448979592e-06,
|
|
"loss": 1.1579,
|
|
"step": 21
|
|
},
|
|
{
|
|
"epoch": 0.16417910447761194,
|
|
"grad_norm": 19.22214698791504,
|
|
"learning_rate": 7.959183673469388e-06,
|
|
"loss": 1.3757,
|
|
"step": 22
|
|
},
|
|
{
|
|
"epoch": 0.17164179104477612,
|
|
"grad_norm": 15.240199089050293,
|
|
"learning_rate": 7.857142857142858e-06,
|
|
"loss": 1.3662,
|
|
"step": 23
|
|
},
|
|
{
|
|
"epoch": 0.1791044776119403,
|
|
"grad_norm": 13.559714317321777,
|
|
"learning_rate": 7.755102040816327e-06,
|
|
"loss": 1.4103,
|
|
"step": 24
|
|
},
|
|
{
|
|
"epoch": 0.1865671641791045,
|
|
"grad_norm": 11.714433670043945,
|
|
"learning_rate": 7.653061224489796e-06,
|
|
"loss": 0.4849,
|
|
"step": 25
|
|
},
|
|
{
|
|
"epoch": 0.19402985074626866,
|
|
"grad_norm": 9.63026237487793,
|
|
"learning_rate": 7.551020408163265e-06,
|
|
"loss": 0.8331,
|
|
"step": 26
|
|
},
|
|
{
|
|
"epoch": 0.20149253731343283,
|
|
"grad_norm": 14.981061935424805,
|
|
"learning_rate": 7.448979591836736e-06,
|
|
"loss": 1.1483,
|
|
"step": 27
|
|
},
|
|
{
|
|
"epoch": 0.208955223880597,
|
|
"grad_norm": 9.80345630645752,
|
|
"learning_rate": 7.346938775510205e-06,
|
|
"loss": 0.3934,
|
|
"step": 28
|
|
},
|
|
{
|
|
"epoch": 0.21641791044776118,
|
|
"grad_norm": 15.892987251281738,
|
|
"learning_rate": 7.244897959183675e-06,
|
|
"loss": 1.3524,
|
|
"step": 29
|
|
},
|
|
{
|
|
"epoch": 0.22388059701492538,
|
|
"grad_norm": 15.154711723327637,
|
|
"learning_rate": 7.1428571428571436e-06,
|
|
"loss": 1.2404,
|
|
"step": 30
|
|
},
|
|
{
|
|
"epoch": 0.23134328358208955,
|
|
"grad_norm": 21.161544799804688,
|
|
"learning_rate": 7.0408163265306125e-06,
|
|
"loss": 1.166,
|
|
"step": 31
|
|
},
|
|
{
|
|
"epoch": 0.23880597014925373,
|
|
"grad_norm": 16.721675872802734,
|
|
"learning_rate": 6.938775510204082e-06,
|
|
"loss": 1.3453,
|
|
"step": 32
|
|
},
|
|
{
|
|
"epoch": 0.2462686567164179,
|
|
"grad_norm": 15.240865707397461,
|
|
"learning_rate": 6.836734693877551e-06,
|
|
"loss": 1.017,
|
|
"step": 33
|
|
},
|
|
{
|
|
"epoch": 0.2537313432835821,
|
|
"grad_norm": 13.743293762207031,
|
|
"learning_rate": 6.734693877551021e-06,
|
|
"loss": 1.2646,
|
|
"step": 34
|
|
},
|
|
{
|
|
"epoch": 0.26119402985074625,
|
|
"grad_norm": 21.402868270874023,
|
|
"learning_rate": 6.63265306122449e-06,
|
|
"loss": 1.2539,
|
|
"step": 35
|
|
},
|
|
{
|
|
"epoch": 0.26865671641791045,
|
|
"grad_norm": 12.299694061279297,
|
|
"learning_rate": 6.530612244897959e-06,
|
|
"loss": 0.5733,
|
|
"step": 36
|
|
},
|
|
{
|
|
"epoch": 0.27611940298507465,
|
|
"grad_norm": 13.034706115722656,
|
|
"learning_rate": 6.4285714285714295e-06,
|
|
"loss": 1.3188,
|
|
"step": 37
|
|
},
|
|
{
|
|
"epoch": 0.2835820895522388,
|
|
"grad_norm": 13.541306495666504,
|
|
"learning_rate": 6.326530612244899e-06,
|
|
"loss": 0.8876,
|
|
"step": 38
|
|
},
|
|
{
|
|
"epoch": 0.291044776119403,
|
|
"grad_norm": 14.614362716674805,
|
|
"learning_rate": 6.224489795918368e-06,
|
|
"loss": 1.4944,
|
|
"step": 39
|
|
},
|
|
{
|
|
"epoch": 0.29850746268656714,
|
|
"grad_norm": 18.72150230407715,
|
|
"learning_rate": 6.122448979591837e-06,
|
|
"loss": 1.4622,
|
|
"step": 40
|
|
},
|
|
{
|
|
"epoch": 0.30597014925373134,
|
|
"grad_norm": 12.321627616882324,
|
|
"learning_rate": 6.020408163265307e-06,
|
|
"loss": 1.2637,
|
|
"step": 41
|
|
},
|
|
{
|
|
"epoch": 0.31343283582089554,
|
|
"grad_norm": 11.823638916015625,
|
|
"learning_rate": 5.918367346938776e-06,
|
|
"loss": 1.1925,
|
|
"step": 42
|
|
},
|
|
{
|
|
"epoch": 0.3208955223880597,
|
|
"grad_norm": 10.207959175109863,
|
|
"learning_rate": 5.816326530612246e-06,
|
|
"loss": 1.1677,
|
|
"step": 43
|
|
},
|
|
{
|
|
"epoch": 0.3283582089552239,
|
|
"grad_norm": 14.41779613494873,
|
|
"learning_rate": 5.7142857142857145e-06,
|
|
"loss": 1.3201,
|
|
"step": 44
|
|
},
|
|
{
|
|
"epoch": 0.3358208955223881,
|
|
"grad_norm": 10.349492073059082,
|
|
"learning_rate": 5.6122448979591834e-06,
|
|
"loss": 1.0491,
|
|
"step": 45
|
|
},
|
|
{
|
|
"epoch": 0.34328358208955223,
|
|
"grad_norm": 9.687318801879883,
|
|
"learning_rate": 5.510204081632653e-06,
|
|
"loss": 0.5153,
|
|
"step": 46
|
|
},
|
|
{
|
|
"epoch": 0.35074626865671643,
|
|
"grad_norm": 12.616000175476074,
|
|
"learning_rate": 5.408163265306123e-06,
|
|
"loss": 1.3589,
|
|
"step": 47
|
|
},
|
|
{
|
|
"epoch": 0.3582089552238806,
|
|
"grad_norm": 15.669512748718262,
|
|
"learning_rate": 5.306122448979593e-06,
|
|
"loss": 0.7591,
|
|
"step": 48
|
|
},
|
|
{
|
|
"epoch": 0.3656716417910448,
|
|
"grad_norm": 11.46850299835205,
|
|
"learning_rate": 5.204081632653062e-06,
|
|
"loss": 1.0643,
|
|
"step": 49
|
|
},
|
|
{
|
|
"epoch": 0.373134328358209,
|
|
"grad_norm": 13.470056533813477,
|
|
"learning_rate": 5.1020408163265315e-06,
|
|
"loss": 1.1636,
|
|
"step": 50
|
|
},
|
|
{
|
|
"epoch": 0.3805970149253731,
|
|
"grad_norm": 12.210711479187012,
|
|
"learning_rate": 5e-06,
|
|
"loss": 0.5832,
|
|
"step": 51
|
|
},
|
|
{
|
|
"epoch": 0.3880597014925373,
|
|
"grad_norm": 12.775903701782227,
|
|
"learning_rate": 4.897959183673469e-06,
|
|
"loss": 0.8638,
|
|
"step": 52
|
|
},
|
|
{
|
|
"epoch": 0.39552238805970147,
|
|
"grad_norm": 12.06881046295166,
|
|
"learning_rate": 4.795918367346939e-06,
|
|
"loss": 1.234,
|
|
"step": 53
|
|
},
|
|
{
|
|
"epoch": 0.40298507462686567,
|
|
"grad_norm": 11.675975799560547,
|
|
"learning_rate": 4.693877551020409e-06,
|
|
"loss": 1.1502,
|
|
"step": 54
|
|
},
|
|
{
|
|
"epoch": 0.41044776119402987,
|
|
"grad_norm": 10.595233917236328,
|
|
"learning_rate": 4.591836734693878e-06,
|
|
"loss": 0.5959,
|
|
"step": 55
|
|
},
|
|
{
|
|
"epoch": 0.417910447761194,
|
|
"grad_norm": 13.459734916687012,
|
|
"learning_rate": 4.489795918367348e-06,
|
|
"loss": 1.4252,
|
|
"step": 56
|
|
},
|
|
{
|
|
"epoch": 0.4253731343283582,
|
|
"grad_norm": 10.65233325958252,
|
|
"learning_rate": 4.3877551020408165e-06,
|
|
"loss": 1.0071,
|
|
"step": 57
|
|
},
|
|
{
|
|
"epoch": 0.43283582089552236,
|
|
"grad_norm": 10.991082191467285,
|
|
"learning_rate": 4.2857142857142855e-06,
|
|
"loss": 0.9786,
|
|
"step": 58
|
|
},
|
|
{
|
|
"epoch": 0.44029850746268656,
|
|
"grad_norm": 12.973753929138184,
|
|
"learning_rate": 4.183673469387755e-06,
|
|
"loss": 1.3687,
|
|
"step": 59
|
|
},
|
|
{
|
|
"epoch": 0.44776119402985076,
|
|
"grad_norm": 8.963390350341797,
|
|
"learning_rate": 4.081632653061225e-06,
|
|
"loss": 0.4663,
|
|
"step": 60
|
|
},
|
|
{
|
|
"epoch": 0.4552238805970149,
|
|
"grad_norm": 15.190298080444336,
|
|
"learning_rate": 3.979591836734694e-06,
|
|
"loss": 1.5676,
|
|
"step": 61
|
|
},
|
|
{
|
|
"epoch": 0.4626865671641791,
|
|
"grad_norm": 12.159441947937012,
|
|
"learning_rate": 3.877551020408164e-06,
|
|
"loss": 0.9444,
|
|
"step": 62
|
|
},
|
|
{
|
|
"epoch": 0.4701492537313433,
|
|
"grad_norm": 12.203204154968262,
|
|
"learning_rate": 3.7755102040816327e-06,
|
|
"loss": 1.0785,
|
|
"step": 63
|
|
},
|
|
{
|
|
"epoch": 0.47761194029850745,
|
|
"grad_norm": 13.535058975219727,
|
|
"learning_rate": 3.6734693877551024e-06,
|
|
"loss": 1.3801,
|
|
"step": 64
|
|
},
|
|
{
|
|
"epoch": 0.48507462686567165,
|
|
"grad_norm": 15.462494850158691,
|
|
"learning_rate": 3.5714285714285718e-06,
|
|
"loss": 1.3773,
|
|
"step": 65
|
|
},
|
|
{
|
|
"epoch": 0.4925373134328358,
|
|
"grad_norm": 10.449372291564941,
|
|
"learning_rate": 3.469387755102041e-06,
|
|
"loss": 0.8747,
|
|
"step": 66
|
|
},
|
|
{
|
|
"epoch": 0.5,
|
|
"grad_norm": 15.766761779785156,
|
|
"learning_rate": 3.3673469387755105e-06,
|
|
"loss": 1.0549,
|
|
"step": 67
|
|
},
|
|
{
|
|
"epoch": 0.5074626865671642,
|
|
"grad_norm": 7.633336067199707,
|
|
"learning_rate": 3.2653061224489794e-06,
|
|
"loss": 0.344,
|
|
"step": 68
|
|
},
|
|
{
|
|
"epoch": 0.5149253731343284,
|
|
"grad_norm": 13.762042999267578,
|
|
"learning_rate": 3.1632653061224496e-06,
|
|
"loss": 1.2072,
|
|
"step": 69
|
|
},
|
|
{
|
|
"epoch": 0.5223880597014925,
|
|
"grad_norm": 11.871623992919922,
|
|
"learning_rate": 3.0612244897959185e-06,
|
|
"loss": 1.1146,
|
|
"step": 70
|
|
},
|
|
{
|
|
"epoch": 0.5298507462686567,
|
|
"grad_norm": 12.153115272521973,
|
|
"learning_rate": 2.959183673469388e-06,
|
|
"loss": 1.0648,
|
|
"step": 71
|
|
},
|
|
{
|
|
"epoch": 0.5373134328358209,
|
|
"grad_norm": 15.02953052520752,
|
|
"learning_rate": 2.8571428571428573e-06,
|
|
"loss": 1.0374,
|
|
"step": 72
|
|
},
|
|
{
|
|
"epoch": 0.5447761194029851,
|
|
"grad_norm": 13.172088623046875,
|
|
"learning_rate": 2.7551020408163266e-06,
|
|
"loss": 1.2356,
|
|
"step": 73
|
|
},
|
|
{
|
|
"epoch": 0.5522388059701493,
|
|
"grad_norm": 11.575133323669434,
|
|
"learning_rate": 2.6530612244897964e-06,
|
|
"loss": 1.0695,
|
|
"step": 74
|
|
},
|
|
{
|
|
"epoch": 0.5597014925373134,
|
|
"grad_norm": 12.820709228515625,
|
|
"learning_rate": 2.5510204081632657e-06,
|
|
"loss": 1.1283,
|
|
"step": 75
|
|
},
|
|
{
|
|
"epoch": 0.5671641791044776,
|
|
"grad_norm": 12.87095832824707,
|
|
"learning_rate": 2.4489795918367347e-06,
|
|
"loss": 1.1721,
|
|
"step": 76
|
|
},
|
|
{
|
|
"epoch": 0.5746268656716418,
|
|
"grad_norm": 13.630508422851562,
|
|
"learning_rate": 2.3469387755102044e-06,
|
|
"loss": 1.2478,
|
|
"step": 77
|
|
},
|
|
{
|
|
"epoch": 0.582089552238806,
|
|
"grad_norm": 17.44233512878418,
|
|
"learning_rate": 2.244897959183674e-06,
|
|
"loss": 1.3778,
|
|
"step": 78
|
|
},
|
|
{
|
|
"epoch": 0.5895522388059702,
|
|
"grad_norm": 12.048669815063477,
|
|
"learning_rate": 2.1428571428571427e-06,
|
|
"loss": 0.8454,
|
|
"step": 79
|
|
},
|
|
{
|
|
"epoch": 0.5970149253731343,
|
|
"grad_norm": 10.956369400024414,
|
|
"learning_rate": 2.0408163265306125e-06,
|
|
"loss": 0.8798,
|
|
"step": 80
|
|
},
|
|
{
|
|
"epoch": 0.6044776119402985,
|
|
"grad_norm": 12.508173942565918,
|
|
"learning_rate": 1.938775510204082e-06,
|
|
"loss": 1.2105,
|
|
"step": 81
|
|
},
|
|
{
|
|
"epoch": 0.6119402985074627,
|
|
"grad_norm": 10.739660263061523,
|
|
"learning_rate": 1.8367346938775512e-06,
|
|
"loss": 1.0513,
|
|
"step": 82
|
|
},
|
|
{
|
|
"epoch": 0.6194029850746269,
|
|
"grad_norm": 14.237381935119629,
|
|
"learning_rate": 1.7346938775510206e-06,
|
|
"loss": 1.3152,
|
|
"step": 83
|
|
},
|
|
{
|
|
"epoch": 0.6268656716417911,
|
|
"grad_norm": 10.444908142089844,
|
|
"learning_rate": 1.6326530612244897e-06,
|
|
"loss": 0.7026,
|
|
"step": 84
|
|
},
|
|
{
|
|
"epoch": 0.6343283582089553,
|
|
"grad_norm": 12.019598960876465,
|
|
"learning_rate": 1.5306122448979593e-06,
|
|
"loss": 1.1211,
|
|
"step": 85
|
|
},
|
|
{
|
|
"epoch": 0.6417910447761194,
|
|
"grad_norm": 12.660603523254395,
|
|
"learning_rate": 1.4285714285714286e-06,
|
|
"loss": 0.9115,
|
|
"step": 86
|
|
},
|
|
{
|
|
"epoch": 0.6492537313432836,
|
|
"grad_norm": 10.193121910095215,
|
|
"learning_rate": 1.3265306122448982e-06,
|
|
"loss": 0.9419,
|
|
"step": 87
|
|
},
|
|
{
|
|
"epoch": 0.6567164179104478,
|
|
"grad_norm": 14.029544830322266,
|
|
"learning_rate": 1.2244897959183673e-06,
|
|
"loss": 1.0446,
|
|
"step": 88
|
|
},
|
|
{
|
|
"epoch": 0.664179104477612,
|
|
"grad_norm": 10.461629867553711,
|
|
"learning_rate": 1.122448979591837e-06,
|
|
"loss": 0.9771,
|
|
"step": 89
|
|
},
|
|
{
|
|
"epoch": 0.6716417910447762,
|
|
"grad_norm": 7.607001781463623,
|
|
"learning_rate": 1.0204081632653063e-06,
|
|
"loss": 0.5959,
|
|
"step": 90
|
|
},
|
|
{
|
|
"epoch": 0.6791044776119403,
|
|
"grad_norm": 10.331579208374023,
|
|
"learning_rate": 9.183673469387756e-07,
|
|
"loss": 0.9838,
|
|
"step": 91
|
|
},
|
|
{
|
|
"epoch": 0.6865671641791045,
|
|
"grad_norm": 10.43489933013916,
|
|
"learning_rate": 8.163265306122449e-07,
|
|
"loss": 0.9485,
|
|
"step": 92
|
|
},
|
|
{
|
|
"epoch": 0.6940298507462687,
|
|
"grad_norm": 11.645877838134766,
|
|
"learning_rate": 7.142857142857143e-07,
|
|
"loss": 1.1304,
|
|
"step": 93
|
|
},
|
|
{
|
|
"epoch": 0.7014925373134329,
|
|
"grad_norm": 11.262922286987305,
|
|
"learning_rate": 6.122448979591837e-07,
|
|
"loss": 0.7431,
|
|
"step": 94
|
|
},
|
|
{
|
|
"epoch": 0.7089552238805971,
|
|
"grad_norm": 12.494215965270996,
|
|
"learning_rate": 5.102040816326531e-07,
|
|
"loss": 1.1966,
|
|
"step": 95
|
|
},
|
|
{
|
|
"epoch": 0.7164179104477612,
|
|
"grad_norm": 11.587281227111816,
|
|
"learning_rate": 4.0816326530612243e-07,
|
|
"loss": 0.6951,
|
|
"step": 96
|
|
},
|
|
{
|
|
"epoch": 0.7238805970149254,
|
|
"grad_norm": 10.64450454711914,
|
|
"learning_rate": 3.0612244897959183e-07,
|
|
"loss": 0.6916,
|
|
"step": 97
|
|
},
|
|
{
|
|
"epoch": 0.7313432835820896,
|
|
"grad_norm": 11.353170394897461,
|
|
"learning_rate": 2.0408163265306121e-07,
|
|
"loss": 0.7753,
|
|
"step": 98
|
|
},
|
|
{
|
|
"epoch": 0.7388059701492538,
|
|
"grad_norm": 10.214649200439453,
|
|
"learning_rate": 1.0204081632653061e-07,
|
|
"loss": 0.6923,
|
|
"step": 99
|
|
},
|
|
{
|
|
"epoch": 0.746268656716418,
|
|
"grad_norm": 11.73704719543457,
|
|
"learning_rate": 0.0,
|
|
"loss": 1.1282,
|
|
"step": 100
|
|
},
|
|
{
|
|
"epoch": 0.746268656716418,
|
|
"step": 100,
|
|
"total_flos": 2305515375820800.0,
|
|
"train_loss": 1.1278739917278289,
|
|
"train_runtime": 528.5108,
|
|
"train_samples_per_second": 0.189,
|
|
"train_steps_per_second": 0.189
|
|
}
|
|
],
|
|
"logging_steps": 1,
|
|
"max_steps": 100,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 1,
|
|
"save_steps": 500,
|
|
"stateful_callbacks": {
|
|
"TrainerControl": {
|
|
"args": {
|
|
"should_epoch_stop": false,
|
|
"should_evaluate": false,
|
|
"should_log": false,
|
|
"should_save": true,
|
|
"should_training_stop": true
|
|
},
|
|
"attributes": {}
|
|
}
|
|
},
|
|
"total_flos": 2305515375820800.0,
|
|
"train_batch_size": 1,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|
|
|