nart-100k-7b / trainer_state.json
jerryjalapeno's picture
Upload 11 files
50e61b8
raw
history blame
137 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.9975284231339594,
"global_step": 1137,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 5.714285714285715e-07,
"loss": 1.4043,
"step": 1
},
{
"epoch": 0.01,
"learning_rate": 1.142857142857143e-06,
"loss": 1.417,
"step": 2
},
{
"epoch": 0.01,
"learning_rate": 1.7142857142857145e-06,
"loss": 1.1751,
"step": 3
},
{
"epoch": 0.01,
"learning_rate": 2.285714285714286e-06,
"loss": 1.1518,
"step": 4
},
{
"epoch": 0.01,
"learning_rate": 2.8571428571428573e-06,
"loss": 1.1192,
"step": 5
},
{
"epoch": 0.02,
"learning_rate": 3.428571428571429e-06,
"loss": 1.0952,
"step": 6
},
{
"epoch": 0.02,
"learning_rate": 4.000000000000001e-06,
"loss": 1.0354,
"step": 7
},
{
"epoch": 0.02,
"learning_rate": 4.571428571428572e-06,
"loss": 1.016,
"step": 8
},
{
"epoch": 0.02,
"learning_rate": 5.142857142857142e-06,
"loss": 0.9737,
"step": 9
},
{
"epoch": 0.03,
"learning_rate": 5.7142857142857145e-06,
"loss": 0.9711,
"step": 10
},
{
"epoch": 0.03,
"learning_rate": 6.285714285714286e-06,
"loss": 0.9546,
"step": 11
},
{
"epoch": 0.03,
"learning_rate": 6.857142857142858e-06,
"loss": 0.9303,
"step": 12
},
{
"epoch": 0.03,
"learning_rate": 7.428571428571429e-06,
"loss": 0.9083,
"step": 13
},
{
"epoch": 0.04,
"learning_rate": 8.000000000000001e-06,
"loss": 0.9051,
"step": 14
},
{
"epoch": 0.04,
"learning_rate": 8.571428571428571e-06,
"loss": 0.8831,
"step": 15
},
{
"epoch": 0.04,
"learning_rate": 9.142857142857144e-06,
"loss": 0.877,
"step": 16
},
{
"epoch": 0.04,
"learning_rate": 9.714285714285715e-06,
"loss": 0.873,
"step": 17
},
{
"epoch": 0.05,
"learning_rate": 1.0285714285714285e-05,
"loss": 0.8656,
"step": 18
},
{
"epoch": 0.05,
"learning_rate": 1.0857142857142858e-05,
"loss": 0.8567,
"step": 19
},
{
"epoch": 0.05,
"learning_rate": 1.1428571428571429e-05,
"loss": 0.845,
"step": 20
},
{
"epoch": 0.06,
"learning_rate": 1.2e-05,
"loss": 0.8401,
"step": 21
},
{
"epoch": 0.06,
"learning_rate": 1.2571428571428572e-05,
"loss": 0.8368,
"step": 22
},
{
"epoch": 0.06,
"learning_rate": 1.3142857142857145e-05,
"loss": 0.8391,
"step": 23
},
{
"epoch": 0.06,
"learning_rate": 1.3714285714285716e-05,
"loss": 0.8185,
"step": 24
},
{
"epoch": 0.07,
"learning_rate": 1.4285714285714287e-05,
"loss": 0.8262,
"step": 25
},
{
"epoch": 0.07,
"learning_rate": 1.4857142857142858e-05,
"loss": 0.8151,
"step": 26
},
{
"epoch": 0.07,
"learning_rate": 1.542857142857143e-05,
"loss": 0.817,
"step": 27
},
{
"epoch": 0.07,
"learning_rate": 1.6000000000000003e-05,
"loss": 0.8086,
"step": 28
},
{
"epoch": 0.08,
"learning_rate": 1.6571428571428574e-05,
"loss": 0.7952,
"step": 29
},
{
"epoch": 0.08,
"learning_rate": 1.7142857142857142e-05,
"loss": 0.8136,
"step": 30
},
{
"epoch": 0.08,
"learning_rate": 1.7714285714285717e-05,
"loss": 0.7942,
"step": 31
},
{
"epoch": 0.08,
"learning_rate": 1.8285714285714288e-05,
"loss": 0.7942,
"step": 32
},
{
"epoch": 0.09,
"learning_rate": 1.885714285714286e-05,
"loss": 0.7965,
"step": 33
},
{
"epoch": 0.09,
"learning_rate": 1.942857142857143e-05,
"loss": 0.7875,
"step": 34
},
{
"epoch": 0.09,
"learning_rate": 2e-05,
"loss": 0.7844,
"step": 35
},
{
"epoch": 0.09,
"learning_rate": 1.999995936443837e-05,
"loss": 0.7812,
"step": 36
},
{
"epoch": 0.1,
"learning_rate": 1.9999837458083738e-05,
"loss": 0.7844,
"step": 37
},
{
"epoch": 0.1,
"learning_rate": 1.999963428192684e-05,
"loss": 0.7829,
"step": 38
},
{
"epoch": 0.1,
"learning_rate": 1.999934983761892e-05,
"loss": 0.7707,
"step": 39
},
{
"epoch": 0.11,
"learning_rate": 1.999898412747168e-05,
"loss": 0.7703,
"step": 40
},
{
"epoch": 0.11,
"learning_rate": 1.9998537154457298e-05,
"loss": 0.7724,
"step": 41
},
{
"epoch": 0.11,
"learning_rate": 1.9998008922208364e-05,
"loss": 0.7745,
"step": 42
},
{
"epoch": 0.11,
"learning_rate": 1.9997399435017893e-05,
"loss": 0.7595,
"step": 43
},
{
"epoch": 0.12,
"learning_rate": 1.9996708697839244e-05,
"loss": 0.7686,
"step": 44
},
{
"epoch": 0.12,
"learning_rate": 1.999593671628612e-05,
"loss": 0.7669,
"step": 45
},
{
"epoch": 0.12,
"learning_rate": 1.99950834966325e-05,
"loss": 0.7627,
"step": 46
},
{
"epoch": 0.12,
"learning_rate": 1.99941490458126e-05,
"loss": 0.7551,
"step": 47
},
{
"epoch": 0.13,
"learning_rate": 1.9993133371420807e-05,
"loss": 0.7633,
"step": 48
},
{
"epoch": 0.13,
"learning_rate": 1.9992036481711618e-05,
"loss": 0.768,
"step": 49
},
{
"epoch": 0.13,
"learning_rate": 1.9990858385599576e-05,
"loss": 0.7676,
"step": 50
},
{
"epoch": 0.13,
"learning_rate": 1.9989599092659207e-05,
"loss": 0.7513,
"step": 51
},
{
"epoch": 0.14,
"learning_rate": 1.998825861312492e-05,
"loss": 0.7466,
"step": 52
},
{
"epoch": 0.14,
"learning_rate": 1.9986836957890947e-05,
"loss": 0.758,
"step": 53
},
{
"epoch": 0.14,
"learning_rate": 1.998533413851124e-05,
"loss": 0.7506,
"step": 54
},
{
"epoch": 0.14,
"learning_rate": 1.9983750167199377e-05,
"loss": 0.7513,
"step": 55
},
{
"epoch": 0.15,
"learning_rate": 1.9982085056828476e-05,
"loss": 0.7544,
"step": 56
},
{
"epoch": 0.15,
"learning_rate": 1.9980338820931074e-05,
"loss": 0.7539,
"step": 57
},
{
"epoch": 0.15,
"learning_rate": 1.9978511473699024e-05,
"loss": 0.7503,
"step": 58
},
{
"epoch": 0.16,
"learning_rate": 1.9976603029983383e-05,
"loss": 0.7426,
"step": 59
},
{
"epoch": 0.16,
"learning_rate": 1.997461350529429e-05,
"loss": 0.7501,
"step": 60
},
{
"epoch": 0.16,
"learning_rate": 1.997254291580083e-05,
"loss": 0.7438,
"step": 61
},
{
"epoch": 0.16,
"learning_rate": 1.9970391278330923e-05,
"loss": 0.7458,
"step": 62
},
{
"epoch": 0.17,
"learning_rate": 1.9968158610371164e-05,
"loss": 0.7547,
"step": 63
},
{
"epoch": 0.17,
"learning_rate": 1.99658449300667e-05,
"loss": 0.746,
"step": 64
},
{
"epoch": 0.17,
"learning_rate": 1.9963450256221066e-05,
"loss": 0.7367,
"step": 65
},
{
"epoch": 0.17,
"learning_rate": 1.996097460829605e-05,
"loss": 0.7461,
"step": 66
},
{
"epoch": 0.18,
"learning_rate": 1.9958418006411522e-05,
"loss": 0.7442,
"step": 67
},
{
"epoch": 0.18,
"learning_rate": 1.9955780471345265e-05,
"loss": 0.7433,
"step": 68
},
{
"epoch": 0.18,
"learning_rate": 1.995306202453283e-05,
"loss": 0.7421,
"step": 69
},
{
"epoch": 0.18,
"learning_rate": 1.9950262688067334e-05,
"loss": 0.7416,
"step": 70
},
{
"epoch": 0.19,
"learning_rate": 1.99473824846993e-05,
"loss": 0.7374,
"step": 71
},
{
"epoch": 0.19,
"learning_rate": 1.9944421437836466e-05,
"loss": 0.7382,
"step": 72
},
{
"epoch": 0.19,
"learning_rate": 1.9941379571543597e-05,
"loss": 0.7431,
"step": 73
},
{
"epoch": 0.2,
"learning_rate": 1.9938256910542276e-05,
"loss": 0.7385,
"step": 74
},
{
"epoch": 0.2,
"learning_rate": 1.993505348021072e-05,
"loss": 0.7432,
"step": 75
},
{
"epoch": 0.2,
"learning_rate": 1.9931769306583568e-05,
"loss": 0.7322,
"step": 76
},
{
"epoch": 0.2,
"learning_rate": 1.992840441635167e-05,
"loss": 0.7396,
"step": 77
},
{
"epoch": 0.21,
"learning_rate": 1.9924958836861865e-05,
"loss": 0.7436,
"step": 78
},
{
"epoch": 0.21,
"learning_rate": 1.9921432596116763e-05,
"loss": 0.7413,
"step": 79
},
{
"epoch": 0.21,
"learning_rate": 1.991782572277452e-05,
"loss": 0.7385,
"step": 80
},
{
"epoch": 0.21,
"learning_rate": 1.99141382461486e-05,
"loss": 0.7323,
"step": 81
},
{
"epoch": 0.22,
"learning_rate": 1.9910370196207545e-05,
"loss": 0.7277,
"step": 82
},
{
"epoch": 0.22,
"learning_rate": 1.9906521603574713e-05,
"loss": 0.7288,
"step": 83
},
{
"epoch": 0.22,
"learning_rate": 1.990259249952805e-05,
"loss": 0.7276,
"step": 84
},
{
"epoch": 0.22,
"learning_rate": 1.9898582915999834e-05,
"loss": 0.7291,
"step": 85
},
{
"epoch": 0.23,
"learning_rate": 1.989449288557639e-05,
"loss": 0.7174,
"step": 86
},
{
"epoch": 0.23,
"learning_rate": 1.9890322441497857e-05,
"loss": 0.7335,
"step": 87
},
{
"epoch": 0.23,
"learning_rate": 1.9886071617657906e-05,
"loss": 0.7376,
"step": 88
},
{
"epoch": 0.23,
"learning_rate": 1.988174044860346e-05,
"loss": 0.7308,
"step": 89
},
{
"epoch": 0.24,
"learning_rate": 1.9877328969534413e-05,
"loss": 0.7304,
"step": 90
},
{
"epoch": 0.24,
"learning_rate": 1.9872837216303353e-05,
"loss": 0.7251,
"step": 91
},
{
"epoch": 0.24,
"learning_rate": 1.9868265225415263e-05,
"loss": 0.7372,
"step": 92
},
{
"epoch": 0.25,
"learning_rate": 1.9863613034027224e-05,
"loss": 0.7293,
"step": 93
},
{
"epoch": 0.25,
"learning_rate": 1.9858880679948122e-05,
"loss": 0.7286,
"step": 94
},
{
"epoch": 0.25,
"learning_rate": 1.9854068201638327e-05,
"loss": 0.7377,
"step": 95
},
{
"epoch": 0.25,
"learning_rate": 1.9849175638209393e-05,
"loss": 0.7208,
"step": 96
},
{
"epoch": 0.26,
"learning_rate": 1.9844203029423733e-05,
"loss": 0.7279,
"step": 97
},
{
"epoch": 0.26,
"learning_rate": 1.983915041569429e-05,
"loss": 0.7242,
"step": 98
},
{
"epoch": 0.26,
"learning_rate": 1.9834017838084234e-05,
"loss": 0.7255,
"step": 99
},
{
"epoch": 0.26,
"learning_rate": 1.9828805338306595e-05,
"loss": 0.7252,
"step": 100
},
{
"epoch": 0.27,
"learning_rate": 1.9823512958723942e-05,
"loss": 0.7376,
"step": 101
},
{
"epoch": 0.27,
"learning_rate": 1.981814074234804e-05,
"loss": 0.7278,
"step": 102
},
{
"epoch": 0.27,
"learning_rate": 1.9812688732839497e-05,
"loss": 0.7249,
"step": 103
},
{
"epoch": 0.27,
"learning_rate": 1.9807156974507403e-05,
"loss": 0.7308,
"step": 104
},
{
"epoch": 0.28,
"learning_rate": 1.9801545512308982e-05,
"loss": 0.7343,
"step": 105
},
{
"epoch": 0.28,
"learning_rate": 1.9795854391849217e-05,
"loss": 0.7273,
"step": 106
},
{
"epoch": 0.28,
"learning_rate": 1.979008365938048e-05,
"loss": 0.7322,
"step": 107
},
{
"epoch": 0.28,
"learning_rate": 1.978423336180217e-05,
"loss": 0.7298,
"step": 108
},
{
"epoch": 0.29,
"learning_rate": 1.9778303546660304e-05,
"loss": 0.7199,
"step": 109
},
{
"epoch": 0.29,
"learning_rate": 1.977229426214716e-05,
"loss": 0.7206,
"step": 110
},
{
"epoch": 0.29,
"learning_rate": 1.976620555710087e-05,
"loss": 0.7162,
"step": 111
},
{
"epoch": 0.3,
"learning_rate": 1.9760037481005018e-05,
"loss": 0.7153,
"step": 112
},
{
"epoch": 0.3,
"learning_rate": 1.9753790083988256e-05,
"loss": 0.7169,
"step": 113
},
{
"epoch": 0.3,
"learning_rate": 1.974746341682388e-05,
"loss": 0.7223,
"step": 114
},
{
"epoch": 0.3,
"learning_rate": 1.9741057530929425e-05,
"loss": 0.7238,
"step": 115
},
{
"epoch": 0.31,
"learning_rate": 1.9734572478366244e-05,
"loss": 0.715,
"step": 116
},
{
"epoch": 0.31,
"learning_rate": 1.972800831183909e-05,
"loss": 0.7172,
"step": 117
},
{
"epoch": 0.31,
"learning_rate": 1.972136508469568e-05,
"loss": 0.7246,
"step": 118
},
{
"epoch": 0.31,
"learning_rate": 1.9714642850926264e-05,
"loss": 0.7185,
"step": 119
},
{
"epoch": 0.32,
"learning_rate": 1.9707841665163196e-05,
"loss": 0.7194,
"step": 120
},
{
"epoch": 0.32,
"learning_rate": 1.9700961582680476e-05,
"loss": 0.7224,
"step": 121
},
{
"epoch": 0.32,
"learning_rate": 1.9694002659393306e-05,
"loss": 0.7164,
"step": 122
},
{
"epoch": 0.32,
"learning_rate": 1.9686964951857636e-05,
"loss": 0.7211,
"step": 123
},
{
"epoch": 0.33,
"learning_rate": 1.9679848517269708e-05,
"loss": 0.7103,
"step": 124
},
{
"epoch": 0.33,
"learning_rate": 1.9672653413465584e-05,
"loss": 0.7139,
"step": 125
},
{
"epoch": 0.33,
"learning_rate": 1.966537969892068e-05,
"loss": 0.713,
"step": 126
},
{
"epoch": 0.33,
"learning_rate": 1.9658027432749293e-05,
"loss": 0.7205,
"step": 127
},
{
"epoch": 0.34,
"learning_rate": 1.9650596674704118e-05,
"loss": 0.7158,
"step": 128
},
{
"epoch": 0.34,
"learning_rate": 1.9643087485175752e-05,
"loss": 0.7269,
"step": 129
},
{
"epoch": 0.34,
"learning_rate": 1.963549992519223e-05,
"loss": 0.7128,
"step": 130
},
{
"epoch": 0.35,
"learning_rate": 1.96278340564185e-05,
"loss": 0.7085,
"step": 131
},
{
"epoch": 0.35,
"learning_rate": 1.9620089941155945e-05,
"loss": 0.721,
"step": 132
},
{
"epoch": 0.35,
"learning_rate": 1.961226764234185e-05,
"loss": 0.7169,
"step": 133
},
{
"epoch": 0.35,
"learning_rate": 1.960436722354892e-05,
"loss": 0.7223,
"step": 134
},
{
"epoch": 0.36,
"learning_rate": 1.959638874898475e-05,
"loss": 0.7134,
"step": 135
},
{
"epoch": 0.36,
"learning_rate": 1.9588332283491297e-05,
"loss": 0.71,
"step": 136
},
{
"epoch": 0.36,
"learning_rate": 1.9580197892544354e-05,
"loss": 0.7096,
"step": 137
},
{
"epoch": 0.36,
"learning_rate": 1.957198564225304e-05,
"loss": 0.7057,
"step": 138
},
{
"epoch": 0.37,
"learning_rate": 1.9563695599359233e-05,
"loss": 0.711,
"step": 139
},
{
"epoch": 0.37,
"learning_rate": 1.9555327831237036e-05,
"loss": 0.7105,
"step": 140
},
{
"epoch": 0.37,
"learning_rate": 1.9546882405892247e-05,
"loss": 0.7171,
"step": 141
},
{
"epoch": 0.37,
"learning_rate": 1.9538359391961786e-05,
"loss": 0.7136,
"step": 142
},
{
"epoch": 0.38,
"learning_rate": 1.952975885871314e-05,
"loss": 0.7172,
"step": 143
},
{
"epoch": 0.38,
"learning_rate": 1.9521080876043816e-05,
"loss": 0.7065,
"step": 144
},
{
"epoch": 0.38,
"learning_rate": 1.951232551448075e-05,
"loss": 0.7064,
"step": 145
},
{
"epoch": 0.38,
"learning_rate": 1.9503492845179746e-05,
"loss": 0.7141,
"step": 146
},
{
"epoch": 0.39,
"learning_rate": 1.9494582939924908e-05,
"loss": 0.7063,
"step": 147
},
{
"epoch": 0.39,
"learning_rate": 1.9485595871128028e-05,
"loss": 0.714,
"step": 148
},
{
"epoch": 0.39,
"learning_rate": 1.9476531711828027e-05,
"loss": 0.7082,
"step": 149
},
{
"epoch": 0.4,
"learning_rate": 1.9467390535690343e-05,
"loss": 0.7059,
"step": 150
},
{
"epoch": 0.4,
"learning_rate": 1.9458172417006347e-05,
"loss": 0.7174,
"step": 151
},
{
"epoch": 0.4,
"learning_rate": 1.9448877430692723e-05,
"loss": 0.7118,
"step": 152
},
{
"epoch": 0.4,
"learning_rate": 1.9439505652290867e-05,
"loss": 0.7127,
"step": 153
},
{
"epoch": 0.41,
"learning_rate": 1.9430057157966272e-05,
"loss": 0.7096,
"step": 154
},
{
"epoch": 0.41,
"learning_rate": 1.942053202450792e-05,
"loss": 0.7051,
"step": 155
},
{
"epoch": 0.41,
"learning_rate": 1.9410930329327636e-05,
"loss": 0.7115,
"step": 156
},
{
"epoch": 0.41,
"learning_rate": 1.9401252150459478e-05,
"loss": 0.7182,
"step": 157
},
{
"epoch": 0.42,
"learning_rate": 1.939149756655909e-05,
"loss": 0.7161,
"step": 158
},
{
"epoch": 0.42,
"learning_rate": 1.9381666656903068e-05,
"loss": 0.716,
"step": 159
},
{
"epoch": 0.42,
"learning_rate": 1.9371759501388327e-05,
"loss": 0.7063,
"step": 160
},
{
"epoch": 0.42,
"learning_rate": 1.9361776180531428e-05,
"loss": 0.7126,
"step": 161
},
{
"epoch": 0.43,
"learning_rate": 1.935171677546794e-05,
"loss": 0.7113,
"step": 162
},
{
"epoch": 0.43,
"learning_rate": 1.934158136795178e-05,
"loss": 0.6949,
"step": 163
},
{
"epoch": 0.43,
"learning_rate": 1.933137004035454e-05,
"loss": 0.7086,
"step": 164
},
{
"epoch": 0.43,
"learning_rate": 1.9321082875664834e-05,
"loss": 0.7074,
"step": 165
},
{
"epoch": 0.44,
"learning_rate": 1.93107199574876e-05,
"loss": 0.7127,
"step": 166
},
{
"epoch": 0.44,
"learning_rate": 1.9300281370043433e-05,
"loss": 0.7118,
"step": 167
},
{
"epoch": 0.44,
"learning_rate": 1.9289767198167918e-05,
"loss": 0.7087,
"step": 168
},
{
"epoch": 0.45,
"learning_rate": 1.92791775273109e-05,
"loss": 0.7049,
"step": 169
},
{
"epoch": 0.45,
"learning_rate": 1.9268512443535826e-05,
"loss": 0.7015,
"step": 170
},
{
"epoch": 0.45,
"learning_rate": 1.9257772033519032e-05,
"loss": 0.7113,
"step": 171
},
{
"epoch": 0.45,
"learning_rate": 1.9246956384549035e-05,
"loss": 0.705,
"step": 172
},
{
"epoch": 0.46,
"learning_rate": 1.923606558452583e-05,
"loss": 0.7125,
"step": 173
},
{
"epoch": 0.46,
"learning_rate": 1.9225099721960175e-05,
"loss": 0.7083,
"step": 174
},
{
"epoch": 0.46,
"learning_rate": 1.921405888597286e-05,
"loss": 0.7069,
"step": 175
},
{
"epoch": 0.46,
"learning_rate": 1.9202943166294004e-05,
"loss": 0.7102,
"step": 176
},
{
"epoch": 0.47,
"learning_rate": 1.919175265326231e-05,
"loss": 0.7041,
"step": 177
},
{
"epoch": 0.47,
"learning_rate": 1.918048743782433e-05,
"loss": 0.7044,
"step": 178
},
{
"epoch": 0.47,
"learning_rate": 1.9169147611533737e-05,
"loss": 0.7013,
"step": 179
},
{
"epoch": 0.47,
"learning_rate": 1.9157733266550577e-05,
"loss": 0.709,
"step": 180
},
{
"epoch": 0.48,
"learning_rate": 1.914624449564051e-05,
"loss": 0.7059,
"step": 181
},
{
"epoch": 0.48,
"learning_rate": 1.9134681392174065e-05,
"loss": 0.6993,
"step": 182
},
{
"epoch": 0.48,
"learning_rate": 1.9123044050125892e-05,
"loss": 0.7117,
"step": 183
},
{
"epoch": 0.49,
"learning_rate": 1.9111332564073967e-05,
"loss": 0.7069,
"step": 184
},
{
"epoch": 0.49,
"learning_rate": 1.909954702919886e-05,
"loss": 0.7033,
"step": 185
},
{
"epoch": 0.49,
"learning_rate": 1.908768754128293e-05,
"loss": 0.7019,
"step": 186
},
{
"epoch": 0.49,
"learning_rate": 1.9075754196709574e-05,
"loss": 0.6988,
"step": 187
},
{
"epoch": 0.5,
"learning_rate": 1.906374709246242e-05,
"loss": 0.6959,
"step": 188
},
{
"epoch": 0.5,
"learning_rate": 1.905166632612455e-05,
"loss": 0.7012,
"step": 189
},
{
"epoch": 0.5,
"learning_rate": 1.9039511995877717e-05,
"loss": 0.7018,
"step": 190
},
{
"epoch": 0.5,
"learning_rate": 1.902728420050152e-05,
"loss": 0.7105,
"step": 191
},
{
"epoch": 0.51,
"learning_rate": 1.9014983039372633e-05,
"loss": 0.6964,
"step": 192
},
{
"epoch": 0.51,
"learning_rate": 1.900260861246397e-05,
"loss": 0.7082,
"step": 193
},
{
"epoch": 0.51,
"learning_rate": 1.8990161020343888e-05,
"loss": 0.7029,
"step": 194
},
{
"epoch": 0.51,
"learning_rate": 1.8977640364175367e-05,
"loss": 0.7039,
"step": 195
},
{
"epoch": 0.52,
"learning_rate": 1.8965046745715184e-05,
"loss": 0.6978,
"step": 196
},
{
"epoch": 0.52,
"learning_rate": 1.89523802673131e-05,
"loss": 0.7056,
"step": 197
},
{
"epoch": 0.52,
"learning_rate": 1.8939641031910997e-05,
"loss": 0.7023,
"step": 198
},
{
"epoch": 0.52,
"learning_rate": 1.8926829143042075e-05,
"loss": 0.7012,
"step": 199
},
{
"epoch": 0.53,
"learning_rate": 1.8913944704829996e-05,
"loss": 0.7058,
"step": 200
},
{
"epoch": 0.53,
"learning_rate": 1.8900987821988038e-05,
"loss": 0.6921,
"step": 201
},
{
"epoch": 0.53,
"learning_rate": 1.8887958599818238e-05,
"loss": 0.7128,
"step": 202
},
{
"epoch": 0.54,
"learning_rate": 1.887485714421055e-05,
"loss": 0.7057,
"step": 203
},
{
"epoch": 0.54,
"learning_rate": 1.886168356164198e-05,
"loss": 0.7052,
"step": 204
},
{
"epoch": 0.54,
"learning_rate": 1.8848437959175703e-05,
"loss": 0.6959,
"step": 205
},
{
"epoch": 0.54,
"learning_rate": 1.883512044446023e-05,
"loss": 0.7086,
"step": 206
},
{
"epoch": 0.55,
"learning_rate": 1.8821731125728493e-05,
"loss": 0.7021,
"step": 207
},
{
"epoch": 0.55,
"learning_rate": 1.880827011179699e-05,
"loss": 0.704,
"step": 208
},
{
"epoch": 0.55,
"learning_rate": 1.879473751206489e-05,
"loss": 0.704,
"step": 209
},
{
"epoch": 0.55,
"learning_rate": 1.878113343651316e-05,
"loss": 0.6995,
"step": 210
},
{
"epoch": 0.56,
"learning_rate": 1.876745799570364e-05,
"loss": 0.6994,
"step": 211
},
{
"epoch": 0.56,
"learning_rate": 1.875371130077818e-05,
"loss": 0.6939,
"step": 212
},
{
"epoch": 0.56,
"learning_rate": 1.873989346345771e-05,
"loss": 0.705,
"step": 213
},
{
"epoch": 0.56,
"learning_rate": 1.8726004596041348e-05,
"loss": 0.6989,
"step": 214
},
{
"epoch": 0.57,
"learning_rate": 1.871204481140548e-05,
"loss": 0.6957,
"step": 215
},
{
"epoch": 0.57,
"learning_rate": 1.869801422300284e-05,
"loss": 0.7077,
"step": 216
},
{
"epoch": 0.57,
"learning_rate": 1.8683912944861606e-05,
"loss": 0.7022,
"step": 217
},
{
"epoch": 0.57,
"learning_rate": 1.8669741091584435e-05,
"loss": 0.706,
"step": 218
},
{
"epoch": 0.58,
"learning_rate": 1.865549877834758e-05,
"loss": 0.7012,
"step": 219
},
{
"epoch": 0.58,
"learning_rate": 1.8641186120899918e-05,
"loss": 0.6916,
"step": 220
},
{
"epoch": 0.58,
"learning_rate": 1.8626803235562025e-05,
"loss": 0.705,
"step": 221
},
{
"epoch": 0.59,
"learning_rate": 1.8612350239225226e-05,
"loss": 0.6956,
"step": 222
},
{
"epoch": 0.59,
"learning_rate": 1.859782724935064e-05,
"loss": 0.7007,
"step": 223
},
{
"epoch": 0.59,
"learning_rate": 1.8583234383968246e-05,
"loss": 0.6974,
"step": 224
},
{
"epoch": 0.59,
"learning_rate": 1.8568571761675893e-05,
"loss": 0.6861,
"step": 225
},
{
"epoch": 0.6,
"learning_rate": 1.8553839501638365e-05,
"loss": 0.6959,
"step": 226
},
{
"epoch": 0.6,
"learning_rate": 1.8539037723586387e-05,
"loss": 0.6989,
"step": 227
},
{
"epoch": 0.6,
"learning_rate": 1.8524166547815677e-05,
"loss": 0.6944,
"step": 228
},
{
"epoch": 0.6,
"learning_rate": 1.850922609518595e-05,
"loss": 0.7017,
"step": 229
},
{
"epoch": 0.61,
"learning_rate": 1.8494216487119944e-05,
"loss": 0.7053,
"step": 230
},
{
"epoch": 0.61,
"learning_rate": 1.8479137845602426e-05,
"loss": 0.7007,
"step": 231
},
{
"epoch": 0.61,
"learning_rate": 1.8463990293179212e-05,
"loss": 0.7029,
"step": 232
},
{
"epoch": 0.61,
"learning_rate": 1.8448773952956164e-05,
"loss": 0.6971,
"step": 233
},
{
"epoch": 0.62,
"learning_rate": 1.843348894859818e-05,
"loss": 0.7081,
"step": 234
},
{
"epoch": 0.62,
"learning_rate": 1.8418135404328218e-05,
"loss": 0.7018,
"step": 235
},
{
"epoch": 0.62,
"learning_rate": 1.840271344492625e-05,
"loss": 0.7,
"step": 236
},
{
"epoch": 0.62,
"learning_rate": 1.8387223195728276e-05,
"loss": 0.6941,
"step": 237
},
{
"epoch": 0.63,
"learning_rate": 1.8371664782625287e-05,
"loss": 0.6997,
"step": 238
},
{
"epoch": 0.63,
"learning_rate": 1.8356038332062258e-05,
"loss": 0.6958,
"step": 239
},
{
"epoch": 0.63,
"learning_rate": 1.8340343971037106e-05,
"loss": 0.7004,
"step": 240
},
{
"epoch": 0.64,
"learning_rate": 1.8324581827099665e-05,
"loss": 0.6995,
"step": 241
},
{
"epoch": 0.64,
"learning_rate": 1.8308752028350652e-05,
"loss": 0.6985,
"step": 242
},
{
"epoch": 0.64,
"learning_rate": 1.829285470344062e-05,
"loss": 0.694,
"step": 243
},
{
"epoch": 0.64,
"learning_rate": 1.827688998156891e-05,
"loss": 0.6908,
"step": 244
},
{
"epoch": 0.65,
"learning_rate": 1.826085799248261e-05,
"loss": 0.6981,
"step": 245
},
{
"epoch": 0.65,
"learning_rate": 1.8244758866475498e-05,
"loss": 0.6971,
"step": 246
},
{
"epoch": 0.65,
"learning_rate": 1.8228592734386983e-05,
"loss": 0.6951,
"step": 247
},
{
"epoch": 0.65,
"learning_rate": 1.821235972760103e-05,
"loss": 0.6943,
"step": 248
},
{
"epoch": 0.66,
"learning_rate": 1.8196059978045117e-05,
"loss": 0.689,
"step": 249
},
{
"epoch": 0.66,
"learning_rate": 1.817969361818913e-05,
"loss": 0.6857,
"step": 250
},
{
"epoch": 0.66,
"learning_rate": 1.8163260781044317e-05,
"loss": 0.6972,
"step": 251
},
{
"epoch": 0.66,
"learning_rate": 1.8146761600162193e-05,
"loss": 0.7069,
"step": 252
},
{
"epoch": 0.67,
"learning_rate": 1.8130196209633455e-05,
"loss": 0.7136,
"step": 253
},
{
"epoch": 0.67,
"learning_rate": 1.811356474408689e-05,
"loss": 0.6976,
"step": 254
},
{
"epoch": 0.67,
"learning_rate": 1.809686733868829e-05,
"loss": 0.6875,
"step": 255
},
{
"epoch": 0.67,
"learning_rate": 1.8080104129139342e-05,
"loss": 0.6995,
"step": 256
},
{
"epoch": 0.68,
"learning_rate": 1.806327525167653e-05,
"loss": 0.6928,
"step": 257
},
{
"epoch": 0.68,
"learning_rate": 1.8046380843070035e-05,
"loss": 0.698,
"step": 258
},
{
"epoch": 0.68,
"learning_rate": 1.8029421040622614e-05,
"loss": 0.6931,
"step": 259
},
{
"epoch": 0.69,
"learning_rate": 1.8012395982168486e-05,
"loss": 0.6952,
"step": 260
},
{
"epoch": 0.69,
"learning_rate": 1.799530580607221e-05,
"loss": 0.6957,
"step": 261
},
{
"epoch": 0.69,
"learning_rate": 1.797815065122757e-05,
"loss": 0.6944,
"step": 262
},
{
"epoch": 0.69,
"learning_rate": 1.796093065705644e-05,
"loss": 0.6885,
"step": 263
},
{
"epoch": 0.7,
"learning_rate": 1.794364596350764e-05,
"loss": 0.6992,
"step": 264
},
{
"epoch": 0.7,
"learning_rate": 1.7926296711055818e-05,
"loss": 0.693,
"step": 265
},
{
"epoch": 0.7,
"learning_rate": 1.79088830407003e-05,
"loss": 0.6928,
"step": 266
},
{
"epoch": 0.7,
"learning_rate": 1.789140509396394e-05,
"loss": 0.6988,
"step": 267
},
{
"epoch": 0.71,
"learning_rate": 1.787386301289197e-05,
"loss": 0.6903,
"step": 268
},
{
"epoch": 0.71,
"learning_rate": 1.785625694005086e-05,
"loss": 0.6915,
"step": 269
},
{
"epoch": 0.71,
"learning_rate": 1.7838587018527135e-05,
"loss": 0.7003,
"step": 270
},
{
"epoch": 0.71,
"learning_rate": 1.782085339192624e-05,
"loss": 0.6888,
"step": 271
},
{
"epoch": 0.72,
"learning_rate": 1.780305620437134e-05,
"loss": 0.6816,
"step": 272
},
{
"epoch": 0.72,
"learning_rate": 1.778519560050219e-05,
"loss": 0.6891,
"step": 273
},
{
"epoch": 0.72,
"learning_rate": 1.7767271725473913e-05,
"loss": 0.6891,
"step": 274
},
{
"epoch": 0.72,
"learning_rate": 1.774928472495586e-05,
"loss": 0.6921,
"step": 275
},
{
"epoch": 0.73,
"learning_rate": 1.77312347451304e-05,
"loss": 0.6933,
"step": 276
},
{
"epoch": 0.73,
"learning_rate": 1.7713121932691755e-05,
"loss": 0.6944,
"step": 277
},
{
"epoch": 0.73,
"learning_rate": 1.769494643484478e-05,
"loss": 0.6853,
"step": 278
},
{
"epoch": 0.74,
"learning_rate": 1.7676708399303784e-05,
"loss": 0.691,
"step": 279
},
{
"epoch": 0.74,
"learning_rate": 1.765840797429134e-05,
"loss": 0.6983,
"step": 280
},
{
"epoch": 0.74,
"learning_rate": 1.764004530853705e-05,
"loss": 0.6988,
"step": 281
},
{
"epoch": 0.74,
"learning_rate": 1.7621620551276366e-05,
"loss": 0.6955,
"step": 282
},
{
"epoch": 0.75,
"learning_rate": 1.7603133852249357e-05,
"loss": 0.6907,
"step": 283
},
{
"epoch": 0.75,
"learning_rate": 1.75845853616995e-05,
"loss": 0.6917,
"step": 284
},
{
"epoch": 0.75,
"learning_rate": 1.7565975230372473e-05,
"loss": 0.6904,
"step": 285
},
{
"epoch": 0.75,
"learning_rate": 1.754730360951489e-05,
"loss": 0.6834,
"step": 286
},
{
"epoch": 0.76,
"learning_rate": 1.7528570650873115e-05,
"loss": 0.6916,
"step": 287
},
{
"epoch": 0.76,
"learning_rate": 1.7509776506692006e-05,
"loss": 0.6912,
"step": 288
},
{
"epoch": 0.76,
"learning_rate": 1.749092132971369e-05,
"loss": 0.6935,
"step": 289
},
{
"epoch": 0.76,
"learning_rate": 1.7472005273176306e-05,
"loss": 0.6893,
"step": 290
},
{
"epoch": 0.77,
"learning_rate": 1.7453028490812764e-05,
"loss": 0.6958,
"step": 291
},
{
"epoch": 0.77,
"learning_rate": 1.743399113684951e-05,
"loss": 0.6945,
"step": 292
},
{
"epoch": 0.77,
"learning_rate": 1.7414893366005263e-05,
"loss": 0.6931,
"step": 293
},
{
"epoch": 0.78,
"learning_rate": 1.7395735333489745e-05,
"loss": 0.6858,
"step": 294
},
{
"epoch": 0.78,
"learning_rate": 1.737651719500244e-05,
"loss": 0.6841,
"step": 295
},
{
"epoch": 0.78,
"learning_rate": 1.735723910673132e-05,
"loss": 0.6978,
"step": 296
},
{
"epoch": 0.78,
"learning_rate": 1.733790122535157e-05,
"loss": 0.6892,
"step": 297
},
{
"epoch": 0.79,
"learning_rate": 1.7318503708024326e-05,
"loss": 0.6828,
"step": 298
},
{
"epoch": 0.79,
"learning_rate": 1.7299046712395394e-05,
"loss": 0.6853,
"step": 299
},
{
"epoch": 0.79,
"learning_rate": 1.7279530396593956e-05,
"loss": 0.6845,
"step": 300
},
{
"epoch": 0.79,
"learning_rate": 1.725995491923131e-05,
"loss": 0.6894,
"step": 301
},
{
"epoch": 0.8,
"learning_rate": 1.7240320439399557e-05,
"loss": 0.6862,
"step": 302
},
{
"epoch": 0.8,
"learning_rate": 1.7220627116670314e-05,
"loss": 0.6892,
"step": 303
},
{
"epoch": 0.8,
"learning_rate": 1.7200875111093434e-05,
"loss": 0.6922,
"step": 304
},
{
"epoch": 0.8,
"learning_rate": 1.718106458319568e-05,
"loss": 0.6884,
"step": 305
},
{
"epoch": 0.81,
"learning_rate": 1.7161195693979444e-05,
"loss": 0.69,
"step": 306
},
{
"epoch": 0.81,
"learning_rate": 1.7141268604921414e-05,
"loss": 0.6882,
"step": 307
},
{
"epoch": 0.81,
"learning_rate": 1.7121283477971285e-05,
"loss": 0.6909,
"step": 308
},
{
"epoch": 0.81,
"learning_rate": 1.7101240475550427e-05,
"loss": 0.6874,
"step": 309
},
{
"epoch": 0.82,
"learning_rate": 1.7081139760550573e-05,
"loss": 0.6929,
"step": 310
},
{
"epoch": 0.82,
"learning_rate": 1.706098149633249e-05,
"loss": 0.6924,
"step": 311
},
{
"epoch": 0.82,
"learning_rate": 1.704076584672466e-05,
"loss": 0.6808,
"step": 312
},
{
"epoch": 0.83,
"learning_rate": 1.702049297602193e-05,
"loss": 0.6874,
"step": 313
},
{
"epoch": 0.83,
"learning_rate": 1.7000163048984202e-05,
"loss": 0.7009,
"step": 314
},
{
"epoch": 0.83,
"learning_rate": 1.6979776230835076e-05,
"loss": 0.68,
"step": 315
},
{
"epoch": 0.83,
"learning_rate": 1.6959332687260514e-05,
"loss": 0.6794,
"step": 316
},
{
"epoch": 0.84,
"learning_rate": 1.6938832584407494e-05,
"loss": 0.6944,
"step": 317
},
{
"epoch": 0.84,
"learning_rate": 1.6918276088882648e-05,
"loss": 0.6841,
"step": 318
},
{
"epoch": 0.84,
"learning_rate": 1.689766336775093e-05,
"loss": 0.6845,
"step": 319
},
{
"epoch": 0.84,
"learning_rate": 1.6876994588534234e-05,
"loss": 0.6865,
"step": 320
},
{
"epoch": 0.85,
"learning_rate": 1.6856269919210056e-05,
"loss": 0.6806,
"step": 321
},
{
"epoch": 0.85,
"learning_rate": 1.6835489528210107e-05,
"loss": 0.6858,
"step": 322
},
{
"epoch": 0.85,
"learning_rate": 1.6814653584418966e-05,
"loss": 0.683,
"step": 323
},
{
"epoch": 0.85,
"learning_rate": 1.6793762257172675e-05,
"loss": 0.6849,
"step": 324
},
{
"epoch": 0.86,
"learning_rate": 1.6772815716257414e-05,
"loss": 0.6901,
"step": 325
},
{
"epoch": 0.86,
"learning_rate": 1.675181413190806e-05,
"loss": 0.6878,
"step": 326
},
{
"epoch": 0.86,
"learning_rate": 1.6730757674806858e-05,
"loss": 0.6908,
"step": 327
},
{
"epoch": 0.86,
"learning_rate": 1.6709646516081992e-05,
"loss": 0.6954,
"step": 328
},
{
"epoch": 0.87,
"learning_rate": 1.6688480827306224e-05,
"loss": 0.6902,
"step": 329
},
{
"epoch": 0.87,
"learning_rate": 1.6667260780495483e-05,
"loss": 0.6877,
"step": 330
},
{
"epoch": 0.87,
"learning_rate": 1.664598654810748e-05,
"loss": 0.6837,
"step": 331
},
{
"epoch": 0.88,
"learning_rate": 1.662465830304028e-05,
"loss": 0.6842,
"step": 332
},
{
"epoch": 0.88,
"learning_rate": 1.6603276218630932e-05,
"loss": 0.6865,
"step": 333
},
{
"epoch": 0.88,
"learning_rate": 1.658184046865404e-05,
"loss": 0.6817,
"step": 334
},
{
"epoch": 0.88,
"learning_rate": 1.656035122732035e-05,
"loss": 0.6841,
"step": 335
},
{
"epoch": 0.89,
"learning_rate": 1.6538808669275335e-05,
"loss": 0.6803,
"step": 336
},
{
"epoch": 0.89,
"learning_rate": 1.6517212969597793e-05,
"loss": 0.6757,
"step": 337
},
{
"epoch": 0.89,
"learning_rate": 1.6495564303798392e-05,
"loss": 0.6739,
"step": 338
},
{
"epoch": 0.89,
"learning_rate": 1.647386284781828e-05,
"loss": 0.6898,
"step": 339
},
{
"epoch": 0.9,
"learning_rate": 1.645210877802762e-05,
"loss": 0.6958,
"step": 340
},
{
"epoch": 0.9,
"learning_rate": 1.6430302271224178e-05,
"loss": 0.6931,
"step": 341
},
{
"epoch": 0.9,
"learning_rate": 1.6408443504631897e-05,
"loss": 0.6943,
"step": 342
},
{
"epoch": 0.9,
"learning_rate": 1.6386532655899418e-05,
"loss": 0.6907,
"step": 343
},
{
"epoch": 0.91,
"learning_rate": 1.6364569903098672e-05,
"loss": 0.683,
"step": 344
},
{
"epoch": 0.91,
"learning_rate": 1.634255542472342e-05,
"loss": 0.6897,
"step": 345
},
{
"epoch": 0.91,
"learning_rate": 1.63204893996878e-05,
"loss": 0.6767,
"step": 346
},
{
"epoch": 0.91,
"learning_rate": 1.6298372007324873e-05,
"loss": 0.6837,
"step": 347
},
{
"epoch": 0.92,
"learning_rate": 1.6276203427385175e-05,
"loss": 0.6713,
"step": 348
},
{
"epoch": 0.92,
"learning_rate": 1.6253983840035243e-05,
"loss": 0.6828,
"step": 349
},
{
"epoch": 0.92,
"learning_rate": 1.6231713425856156e-05,
"loss": 0.6803,
"step": 350
},
{
"epoch": 0.93,
"learning_rate": 1.620939236584208e-05,
"loss": 0.6834,
"step": 351
},
{
"epoch": 0.93,
"learning_rate": 1.6187020841398773e-05,
"loss": 0.6787,
"step": 352
},
{
"epoch": 0.93,
"learning_rate": 1.6164599034342122e-05,
"loss": 0.6888,
"step": 353
},
{
"epoch": 0.93,
"learning_rate": 1.6142127126896682e-05,
"loss": 0.6882,
"step": 354
},
{
"epoch": 0.94,
"learning_rate": 1.6119605301694156e-05,
"loss": 0.6835,
"step": 355
},
{
"epoch": 0.94,
"learning_rate": 1.609703374177196e-05,
"loss": 0.688,
"step": 356
},
{
"epoch": 0.94,
"learning_rate": 1.6074412630571685e-05,
"loss": 0.6756,
"step": 357
},
{
"epoch": 0.94,
"learning_rate": 1.6051742151937655e-05,
"loss": 0.6828,
"step": 358
},
{
"epoch": 0.95,
"learning_rate": 1.6029022490115383e-05,
"loss": 0.6785,
"step": 359
},
{
"epoch": 0.95,
"learning_rate": 1.6006253829750125e-05,
"loss": 0.6883,
"step": 360
},
{
"epoch": 0.95,
"learning_rate": 1.5983436355885333e-05,
"loss": 0.6832,
"step": 361
},
{
"epoch": 0.95,
"learning_rate": 1.5960570253961185e-05,
"loss": 0.6873,
"step": 362
},
{
"epoch": 0.96,
"learning_rate": 1.593765570981306e-05,
"loss": 0.679,
"step": 363
},
{
"epoch": 0.96,
"learning_rate": 1.591469290967002e-05,
"loss": 0.6892,
"step": 364
},
{
"epoch": 0.96,
"learning_rate": 1.589168204015334e-05,
"loss": 0.6864,
"step": 365
},
{
"epoch": 0.96,
"learning_rate": 1.586862328827493e-05,
"loss": 0.6865,
"step": 366
},
{
"epoch": 0.97,
"learning_rate": 1.584551684143586e-05,
"loss": 0.6868,
"step": 367
},
{
"epoch": 0.97,
"learning_rate": 1.5822362887424817e-05,
"loss": 0.6811,
"step": 368
},
{
"epoch": 0.97,
"learning_rate": 1.579916161441658e-05,
"loss": 0.6845,
"step": 369
},
{
"epoch": 0.98,
"learning_rate": 1.5775913210970518e-05,
"loss": 0.6893,
"step": 370
},
{
"epoch": 0.98,
"learning_rate": 1.5752617866029005e-05,
"loss": 0.6874,
"step": 371
},
{
"epoch": 0.98,
"learning_rate": 1.5729275768915924e-05,
"loss": 0.685,
"step": 372
},
{
"epoch": 0.98,
"learning_rate": 1.5705887109335124e-05,
"loss": 0.6774,
"step": 373
},
{
"epoch": 0.99,
"learning_rate": 1.568245207736887e-05,
"loss": 0.6784,
"step": 374
},
{
"epoch": 0.99,
"learning_rate": 1.5658970863476298e-05,
"loss": 0.6826,
"step": 375
},
{
"epoch": 0.99,
"learning_rate": 1.563544365849187e-05,
"loss": 0.6794,
"step": 376
},
{
"epoch": 0.99,
"learning_rate": 1.5611870653623826e-05,
"loss": 0.6755,
"step": 377
},
{
"epoch": 1.0,
"learning_rate": 1.558825204045262e-05,
"loss": 0.6755,
"step": 378
},
{
"epoch": 1.0,
"learning_rate": 1.5564588010929375e-05,
"loss": 0.6837,
"step": 379
},
{
"epoch": 1.0,
"learning_rate": 1.5540878757374326e-05,
"loss": 0.6542,
"step": 380
},
{
"epoch": 1.0,
"learning_rate": 1.551712447247523e-05,
"loss": 0.6466,
"step": 381
},
{
"epoch": 1.01,
"learning_rate": 1.5493325349285824e-05,
"loss": 0.6388,
"step": 382
},
{
"epoch": 1.01,
"learning_rate": 1.5469481581224274e-05,
"loss": 0.64,
"step": 383
},
{
"epoch": 1.01,
"learning_rate": 1.544559336207154e-05,
"loss": 0.6419,
"step": 384
},
{
"epoch": 1.01,
"learning_rate": 1.5421660885969875e-05,
"loss": 0.6334,
"step": 385
},
{
"epoch": 1.02,
"learning_rate": 1.53976843474212e-05,
"loss": 0.6315,
"step": 386
},
{
"epoch": 1.02,
"learning_rate": 1.5373663941285536e-05,
"loss": 0.6332,
"step": 387
},
{
"epoch": 1.02,
"learning_rate": 1.534959986277942e-05,
"loss": 0.6447,
"step": 388
},
{
"epoch": 1.03,
"learning_rate": 1.5325492307474314e-05,
"loss": 0.6364,
"step": 389
},
{
"epoch": 1.03,
"learning_rate": 1.530134147129504e-05,
"loss": 0.6338,
"step": 390
},
{
"epoch": 1.03,
"learning_rate": 1.5277147550518156e-05,
"loss": 0.6409,
"step": 391
},
{
"epoch": 1.03,
"learning_rate": 1.5252910741770364e-05,
"loss": 0.6452,
"step": 392
},
{
"epoch": 1.04,
"learning_rate": 1.522863124202694e-05,
"loss": 0.6306,
"step": 393
},
{
"epoch": 1.04,
"learning_rate": 1.5204309248610103e-05,
"loss": 0.637,
"step": 394
},
{
"epoch": 1.04,
"learning_rate": 1.5179944959187423e-05,
"loss": 0.6401,
"step": 395
},
{
"epoch": 1.04,
"learning_rate": 1.515553857177022e-05,
"loss": 0.6346,
"step": 396
},
{
"epoch": 1.05,
"learning_rate": 1.5131090284711942e-05,
"loss": 0.6388,
"step": 397
},
{
"epoch": 1.05,
"learning_rate": 1.5106600296706568e-05,
"loss": 0.6294,
"step": 398
},
{
"epoch": 1.05,
"learning_rate": 1.508206880678698e-05,
"loss": 0.6393,
"step": 399
},
{
"epoch": 1.05,
"learning_rate": 1.5057496014323354e-05,
"loss": 0.6315,
"step": 400
},
{
"epoch": 1.06,
"learning_rate": 1.503288211902153e-05,
"loss": 0.6358,
"step": 401
},
{
"epoch": 1.06,
"learning_rate": 1.5008227320921402e-05,
"loss": 0.6313,
"step": 402
},
{
"epoch": 1.06,
"learning_rate": 1.4983531820395287e-05,
"loss": 0.6384,
"step": 403
},
{
"epoch": 1.07,
"learning_rate": 1.4958795818146287e-05,
"loss": 0.6383,
"step": 404
},
{
"epoch": 1.07,
"learning_rate": 1.4934019515206675e-05,
"loss": 0.6337,
"step": 405
},
{
"epoch": 1.07,
"learning_rate": 1.4909203112936244e-05,
"loss": 0.6336,
"step": 406
},
{
"epoch": 1.07,
"learning_rate": 1.4884346813020685e-05,
"loss": 0.6376,
"step": 407
},
{
"epoch": 1.08,
"learning_rate": 1.4859450817469938e-05,
"loss": 0.6446,
"step": 408
},
{
"epoch": 1.08,
"learning_rate": 1.4834515328616555e-05,
"loss": 0.6298,
"step": 409
},
{
"epoch": 1.08,
"learning_rate": 1.4809540549114059e-05,
"loss": 0.6257,
"step": 410
},
{
"epoch": 1.08,
"learning_rate": 1.4784526681935282e-05,
"loss": 0.643,
"step": 411
},
{
"epoch": 1.09,
"learning_rate": 1.4759473930370738e-05,
"loss": 0.6342,
"step": 412
},
{
"epoch": 1.09,
"learning_rate": 1.4734382498026946e-05,
"loss": 0.6334,
"step": 413
},
{
"epoch": 1.09,
"learning_rate": 1.4709252588824805e-05,
"loss": 0.633,
"step": 414
},
{
"epoch": 1.09,
"learning_rate": 1.4684084406997903e-05,
"loss": 0.6379,
"step": 415
},
{
"epoch": 1.1,
"learning_rate": 1.4658878157090879e-05,
"loss": 0.632,
"step": 416
},
{
"epoch": 1.1,
"learning_rate": 1.4633634043957767e-05,
"loss": 0.6392,
"step": 417
},
{
"epoch": 1.1,
"learning_rate": 1.46083522727603e-05,
"loss": 0.6397,
"step": 418
},
{
"epoch": 1.1,
"learning_rate": 1.4583033048966273e-05,
"loss": 0.6319,
"step": 419
},
{
"epoch": 1.11,
"learning_rate": 1.4557676578347868e-05,
"loss": 0.6351,
"step": 420
},
{
"epoch": 1.11,
"learning_rate": 1.4532283066979967e-05,
"loss": 0.627,
"step": 421
},
{
"epoch": 1.11,
"learning_rate": 1.4506852721238485e-05,
"loss": 0.6438,
"step": 422
},
{
"epoch": 1.12,
"learning_rate": 1.4481385747798705e-05,
"loss": 0.636,
"step": 423
},
{
"epoch": 1.12,
"learning_rate": 1.4455882353633576e-05,
"loss": 0.6398,
"step": 424
},
{
"epoch": 1.12,
"learning_rate": 1.4430342746012049e-05,
"loss": 0.6342,
"step": 425
},
{
"epoch": 1.12,
"learning_rate": 1.4404767132497386e-05,
"loss": 0.6419,
"step": 426
},
{
"epoch": 1.13,
"learning_rate": 1.4379155720945464e-05,
"loss": 0.6369,
"step": 427
},
{
"epoch": 1.13,
"learning_rate": 1.4353508719503114e-05,
"loss": 0.6307,
"step": 428
},
{
"epoch": 1.13,
"learning_rate": 1.4327826336606383e-05,
"loss": 0.6397,
"step": 429
},
{
"epoch": 1.13,
"learning_rate": 1.4302108780978889e-05,
"loss": 0.6307,
"step": 430
},
{
"epoch": 1.14,
"learning_rate": 1.4276356261630096e-05,
"loss": 0.6448,
"step": 431
},
{
"epoch": 1.14,
"learning_rate": 1.425056898785362e-05,
"loss": 0.6325,
"step": 432
},
{
"epoch": 1.14,
"learning_rate": 1.4224747169225527e-05,
"loss": 0.6488,
"step": 433
},
{
"epoch": 1.14,
"learning_rate": 1.4198891015602648e-05,
"loss": 0.636,
"step": 434
},
{
"epoch": 1.15,
"learning_rate": 1.4173000737120839e-05,
"loss": 0.6423,
"step": 435
},
{
"epoch": 1.15,
"learning_rate": 1.4147076544193303e-05,
"loss": 0.6419,
"step": 436
},
{
"epoch": 1.15,
"learning_rate": 1.4121118647508872e-05,
"loss": 0.6431,
"step": 437
},
{
"epoch": 1.15,
"learning_rate": 1.4095127258030283e-05,
"loss": 0.6334,
"step": 438
},
{
"epoch": 1.16,
"learning_rate": 1.406910258699248e-05,
"loss": 0.6304,
"step": 439
},
{
"epoch": 1.16,
"learning_rate": 1.4043044845900889e-05,
"loss": 0.6421,
"step": 440
},
{
"epoch": 1.16,
"learning_rate": 1.4016954246529697e-05,
"loss": 0.6346,
"step": 441
},
{
"epoch": 1.17,
"learning_rate": 1.3990831000920135e-05,
"loss": 0.6299,
"step": 442
},
{
"epoch": 1.17,
"learning_rate": 1.3964675321378756e-05,
"loss": 0.6327,
"step": 443
},
{
"epoch": 1.17,
"learning_rate": 1.3938487420475703e-05,
"loss": 0.631,
"step": 444
},
{
"epoch": 1.17,
"learning_rate": 1.3912267511042994e-05,
"loss": 0.637,
"step": 445
},
{
"epoch": 1.18,
"learning_rate": 1.3886015806172774e-05,
"loss": 0.6501,
"step": 446
},
{
"epoch": 1.18,
"learning_rate": 1.3859732519215596e-05,
"loss": 0.6364,
"step": 447
},
{
"epoch": 1.18,
"learning_rate": 1.3833417863778689e-05,
"loss": 0.6433,
"step": 448
},
{
"epoch": 1.18,
"learning_rate": 1.380707205372421e-05,
"loss": 0.6383,
"step": 449
},
{
"epoch": 1.19,
"learning_rate": 1.378069530316752e-05,
"loss": 0.6411,
"step": 450
},
{
"epoch": 1.19,
"learning_rate": 1.3754287826475428e-05,
"loss": 0.6436,
"step": 451
},
{
"epoch": 1.19,
"learning_rate": 1.3727849838264472e-05,
"loss": 0.6341,
"step": 452
},
{
"epoch": 1.19,
"learning_rate": 1.3701381553399147e-05,
"loss": 0.6356,
"step": 453
},
{
"epoch": 1.2,
"learning_rate": 1.3674883186990174e-05,
"loss": 0.6323,
"step": 454
},
{
"epoch": 1.2,
"learning_rate": 1.3648354954392758e-05,
"loss": 0.6309,
"step": 455
},
{
"epoch": 1.2,
"learning_rate": 1.3621797071204823e-05,
"loss": 0.6328,
"step": 456
},
{
"epoch": 1.2,
"learning_rate": 1.3595209753265266e-05,
"loss": 0.6282,
"step": 457
},
{
"epoch": 1.21,
"learning_rate": 1.3568593216652215e-05,
"loss": 0.6341,
"step": 458
},
{
"epoch": 1.21,
"learning_rate": 1.3541947677681246e-05,
"loss": 0.6337,
"step": 459
},
{
"epoch": 1.21,
"learning_rate": 1.3515273352903649e-05,
"loss": 0.637,
"step": 460
},
{
"epoch": 1.22,
"learning_rate": 1.3488570459104656e-05,
"loss": 0.6486,
"step": 461
},
{
"epoch": 1.22,
"learning_rate": 1.3461839213301684e-05,
"loss": 0.6347,
"step": 462
},
{
"epoch": 1.22,
"learning_rate": 1.3435079832742577e-05,
"loss": 0.6356,
"step": 463
},
{
"epoch": 1.22,
"learning_rate": 1.3408292534903817e-05,
"loss": 0.6386,
"step": 464
},
{
"epoch": 1.23,
"learning_rate": 1.338147753748879e-05,
"loss": 0.6397,
"step": 465
},
{
"epoch": 1.23,
"learning_rate": 1.3354635058425986e-05,
"loss": 0.6224,
"step": 466
},
{
"epoch": 1.23,
"learning_rate": 1.3327765315867253e-05,
"loss": 0.6332,
"step": 467
},
{
"epoch": 1.23,
"learning_rate": 1.3300868528186001e-05,
"loss": 0.6453,
"step": 468
},
{
"epoch": 1.24,
"learning_rate": 1.3273944913975452e-05,
"loss": 0.6288,
"step": 469
},
{
"epoch": 1.24,
"learning_rate": 1.3246994692046837e-05,
"loss": 0.6338,
"step": 470
},
{
"epoch": 1.24,
"learning_rate": 1.3220018081427637e-05,
"loss": 0.6377,
"step": 471
},
{
"epoch": 1.24,
"learning_rate": 1.31930153013598e-05,
"loss": 0.6313,
"step": 472
},
{
"epoch": 1.25,
"learning_rate": 1.3165986571297953e-05,
"loss": 0.6414,
"step": 473
},
{
"epoch": 1.25,
"learning_rate": 1.3138932110907616e-05,
"loss": 0.6339,
"step": 474
},
{
"epoch": 1.25,
"learning_rate": 1.3111852140063435e-05,
"loss": 0.634,
"step": 475
},
{
"epoch": 1.25,
"learning_rate": 1.3084746878847367e-05,
"loss": 0.6386,
"step": 476
},
{
"epoch": 1.26,
"learning_rate": 1.305761654754692e-05,
"loss": 0.6353,
"step": 477
},
{
"epoch": 1.26,
"learning_rate": 1.303046136665334e-05,
"loss": 0.6424,
"step": 478
},
{
"epoch": 1.26,
"learning_rate": 1.3003281556859837e-05,
"loss": 0.6363,
"step": 479
},
{
"epoch": 1.27,
"learning_rate": 1.2976077339059774e-05,
"loss": 0.6336,
"step": 480
},
{
"epoch": 1.27,
"learning_rate": 1.2948848934344885e-05,
"loss": 0.6336,
"step": 481
},
{
"epoch": 1.27,
"learning_rate": 1.2921596564003479e-05,
"loss": 0.6357,
"step": 482
},
{
"epoch": 1.27,
"learning_rate": 1.2894320449518624e-05,
"loss": 0.6393,
"step": 483
},
{
"epoch": 1.28,
"learning_rate": 1.286702081256637e-05,
"loss": 0.6414,
"step": 484
},
{
"epoch": 1.28,
"learning_rate": 1.283969787501393e-05,
"loss": 0.6235,
"step": 485
},
{
"epoch": 1.28,
"learning_rate": 1.281235185891789e-05,
"loss": 0.6428,
"step": 486
},
{
"epoch": 1.28,
"learning_rate": 1.2784982986522399e-05,
"loss": 0.6328,
"step": 487
},
{
"epoch": 1.29,
"learning_rate": 1.2757591480257342e-05,
"loss": 0.6327,
"step": 488
},
{
"epoch": 1.29,
"learning_rate": 1.2730177562736583e-05,
"loss": 0.638,
"step": 489
},
{
"epoch": 1.29,
"learning_rate": 1.27027414567561e-05,
"loss": 0.6353,
"step": 490
},
{
"epoch": 1.29,
"learning_rate": 1.2675283385292212e-05,
"loss": 0.6416,
"step": 491
},
{
"epoch": 1.3,
"learning_rate": 1.2647803571499744e-05,
"loss": 0.6411,
"step": 492
},
{
"epoch": 1.3,
"learning_rate": 1.2620302238710238e-05,
"loss": 0.6223,
"step": 493
},
{
"epoch": 1.3,
"learning_rate": 1.2592779610430111e-05,
"loss": 0.6301,
"step": 494
},
{
"epoch": 1.3,
"learning_rate": 1.2565235910338852e-05,
"loss": 0.6355,
"step": 495
},
{
"epoch": 1.31,
"learning_rate": 1.2537671362287207e-05,
"loss": 0.6344,
"step": 496
},
{
"epoch": 1.31,
"learning_rate": 1.2510086190295358e-05,
"loss": 0.6314,
"step": 497
},
{
"epoch": 1.31,
"learning_rate": 1.2482480618551092e-05,
"loss": 0.6365,
"step": 498
},
{
"epoch": 1.32,
"learning_rate": 1.2454854871407993e-05,
"loss": 0.6308,
"step": 499
},
{
"epoch": 1.32,
"learning_rate": 1.2427209173383612e-05,
"loss": 0.644,
"step": 500
},
{
"epoch": 1.32,
"learning_rate": 1.239954374915764e-05,
"loss": 0.6347,
"step": 501
},
{
"epoch": 1.32,
"learning_rate": 1.2371858823570085e-05,
"loss": 0.6298,
"step": 502
},
{
"epoch": 1.33,
"learning_rate": 1.234415462161945e-05,
"loss": 0.6334,
"step": 503
},
{
"epoch": 1.33,
"learning_rate": 1.2316431368460902e-05,
"loss": 0.6423,
"step": 504
},
{
"epoch": 1.33,
"learning_rate": 1.2288689289404424e-05,
"loss": 0.6354,
"step": 505
},
{
"epoch": 1.33,
"learning_rate": 1.226092860991301e-05,
"loss": 0.6385,
"step": 506
},
{
"epoch": 1.34,
"learning_rate": 1.2233149555600827e-05,
"loss": 0.6399,
"step": 507
},
{
"epoch": 1.34,
"learning_rate": 1.2205352352231364e-05,
"loss": 0.6458,
"step": 508
},
{
"epoch": 1.34,
"learning_rate": 1.2177537225715615e-05,
"loss": 0.6471,
"step": 509
},
{
"epoch": 1.34,
"learning_rate": 1.2149704402110243e-05,
"loss": 0.6308,
"step": 510
},
{
"epoch": 1.35,
"learning_rate": 1.2121854107615726e-05,
"loss": 0.6326,
"step": 511
},
{
"epoch": 1.35,
"learning_rate": 1.2093986568574537e-05,
"loss": 0.6301,
"step": 512
},
{
"epoch": 1.35,
"learning_rate": 1.2066102011469295e-05,
"loss": 0.6346,
"step": 513
},
{
"epoch": 1.36,
"learning_rate": 1.2038200662920933e-05,
"loss": 0.6359,
"step": 514
},
{
"epoch": 1.36,
"learning_rate": 1.2010282749686839e-05,
"loss": 0.6345,
"step": 515
},
{
"epoch": 1.36,
"learning_rate": 1.1982348498659031e-05,
"loss": 0.6402,
"step": 516
},
{
"epoch": 1.36,
"learning_rate": 1.1954398136862308e-05,
"loss": 0.6271,
"step": 517
},
{
"epoch": 1.37,
"learning_rate": 1.1926431891452397e-05,
"loss": 0.6343,
"step": 518
},
{
"epoch": 1.37,
"learning_rate": 1.1898449989714116e-05,
"loss": 0.6421,
"step": 519
},
{
"epoch": 1.37,
"learning_rate": 1.1870452659059522e-05,
"loss": 0.631,
"step": 520
},
{
"epoch": 1.37,
"learning_rate": 1.184244012702607e-05,
"loss": 0.6372,
"step": 521
},
{
"epoch": 1.38,
"learning_rate": 1.1814412621274751e-05,
"loss": 0.6321,
"step": 522
},
{
"epoch": 1.38,
"learning_rate": 1.1786370369588257e-05,
"loss": 0.6342,
"step": 523
},
{
"epoch": 1.38,
"learning_rate": 1.175831359986911e-05,
"loss": 0.6354,
"step": 524
},
{
"epoch": 1.38,
"learning_rate": 1.1730242540137835e-05,
"loss": 0.6354,
"step": 525
},
{
"epoch": 1.39,
"learning_rate": 1.1702157418531084e-05,
"loss": 0.6297,
"step": 526
},
{
"epoch": 1.39,
"learning_rate": 1.1674058463299798e-05,
"loss": 0.6406,
"step": 527
},
{
"epoch": 1.39,
"learning_rate": 1.164594590280734e-05,
"loss": 0.6342,
"step": 528
},
{
"epoch": 1.39,
"learning_rate": 1.161781996552765e-05,
"loss": 0.6469,
"step": 529
},
{
"epoch": 1.4,
"learning_rate": 1.1589680880043372e-05,
"loss": 0.6359,
"step": 530
},
{
"epoch": 1.4,
"learning_rate": 1.1561528875044026e-05,
"loss": 0.6318,
"step": 531
},
{
"epoch": 1.4,
"learning_rate": 1.1533364179324109e-05,
"loss": 0.6356,
"step": 532
},
{
"epoch": 1.41,
"learning_rate": 1.150518702178127e-05,
"loss": 0.6323,
"step": 533
},
{
"epoch": 1.41,
"learning_rate": 1.147699763141444e-05,
"loss": 0.6305,
"step": 534
},
{
"epoch": 1.41,
"learning_rate": 1.1448796237321949e-05,
"loss": 0.6319,
"step": 535
},
{
"epoch": 1.41,
"learning_rate": 1.14205830686997e-05,
"loss": 0.634,
"step": 536
},
{
"epoch": 1.42,
"learning_rate": 1.1392358354839286e-05,
"loss": 0.6322,
"step": 537
},
{
"epoch": 1.42,
"learning_rate": 1.1364122325126123e-05,
"loss": 0.6366,
"step": 538
},
{
"epoch": 1.42,
"learning_rate": 1.1335875209037598e-05,
"loss": 0.6439,
"step": 539
},
{
"epoch": 1.42,
"learning_rate": 1.1307617236141195e-05,
"loss": 0.6427,
"step": 540
},
{
"epoch": 1.43,
"learning_rate": 1.1279348636092634e-05,
"loss": 0.637,
"step": 541
},
{
"epoch": 1.43,
"learning_rate": 1.125106963863401e-05,
"loss": 0.6414,
"step": 542
},
{
"epoch": 1.43,
"learning_rate": 1.1222780473591902e-05,
"loss": 0.6347,
"step": 543
},
{
"epoch": 1.43,
"learning_rate": 1.1194481370875531e-05,
"loss": 0.637,
"step": 544
},
{
"epoch": 1.44,
"learning_rate": 1.1166172560474894e-05,
"loss": 0.6332,
"step": 545
},
{
"epoch": 1.44,
"learning_rate": 1.1137854272458866e-05,
"loss": 0.6368,
"step": 546
},
{
"epoch": 1.44,
"learning_rate": 1.110952673697335e-05,
"loss": 0.631,
"step": 547
},
{
"epoch": 1.44,
"learning_rate": 1.1081190184239418e-05,
"loss": 0.6326,
"step": 548
},
{
"epoch": 1.45,
"learning_rate": 1.1052844844551416e-05,
"loss": 0.631,
"step": 549
},
{
"epoch": 1.45,
"learning_rate": 1.1024490948275096e-05,
"loss": 0.6357,
"step": 550
},
{
"epoch": 1.45,
"learning_rate": 1.0996128725845764e-05,
"loss": 0.6268,
"step": 551
},
{
"epoch": 1.46,
"learning_rate": 1.096775840776639e-05,
"loss": 0.6288,
"step": 552
},
{
"epoch": 1.46,
"learning_rate": 1.0939380224605727e-05,
"loss": 0.6353,
"step": 553
},
{
"epoch": 1.46,
"learning_rate": 1.0910994406996463e-05,
"loss": 0.6324,
"step": 554
},
{
"epoch": 1.46,
"learning_rate": 1.0882601185633326e-05,
"loss": 0.6333,
"step": 555
},
{
"epoch": 1.47,
"learning_rate": 1.0854200791271215e-05,
"loss": 0.6289,
"step": 556
},
{
"epoch": 1.47,
"learning_rate": 1.0825793454723325e-05,
"loss": 0.6334,
"step": 557
},
{
"epoch": 1.47,
"learning_rate": 1.079737940685927e-05,
"loss": 0.6312,
"step": 558
},
{
"epoch": 1.47,
"learning_rate": 1.076895887860321e-05,
"loss": 0.636,
"step": 559
},
{
"epoch": 1.48,
"learning_rate": 1.0740532100931969e-05,
"loss": 0.6359,
"step": 560
},
{
"epoch": 1.48,
"learning_rate": 1.0712099304873161e-05,
"loss": 0.6316,
"step": 561
},
{
"epoch": 1.48,
"learning_rate": 1.0683660721503318e-05,
"loss": 0.6366,
"step": 562
},
{
"epoch": 1.48,
"learning_rate": 1.0655216581945997e-05,
"loss": 0.625,
"step": 563
},
{
"epoch": 1.49,
"learning_rate": 1.0626767117369916e-05,
"loss": 0.6315,
"step": 564
},
{
"epoch": 1.49,
"learning_rate": 1.0598312558987072e-05,
"loss": 0.6363,
"step": 565
},
{
"epoch": 1.49,
"learning_rate": 1.0569853138050856e-05,
"loss": 0.6294,
"step": 566
},
{
"epoch": 1.49,
"learning_rate": 1.0541389085854177e-05,
"loss": 0.6408,
"step": 567
},
{
"epoch": 1.5,
"learning_rate": 1.0512920633727583e-05,
"loss": 0.6291,
"step": 568
},
{
"epoch": 1.5,
"learning_rate": 1.048444801303739e-05,
"loss": 0.6323,
"step": 569
},
{
"epoch": 1.5,
"learning_rate": 1.0455971455183775e-05,
"loss": 0.6327,
"step": 570
},
{
"epoch": 1.51,
"learning_rate": 1.0427491191598926e-05,
"loss": 0.6443,
"step": 571
},
{
"epoch": 1.51,
"learning_rate": 1.0399007453745147e-05,
"loss": 0.6345,
"step": 572
},
{
"epoch": 1.51,
"learning_rate": 1.037052047311297e-05,
"loss": 0.6331,
"step": 573
},
{
"epoch": 1.51,
"learning_rate": 1.034203048121929e-05,
"loss": 0.6264,
"step": 574
},
{
"epoch": 1.52,
"learning_rate": 1.031353770960547e-05,
"loss": 0.6227,
"step": 575
},
{
"epoch": 1.52,
"learning_rate": 1.0285042389835466e-05,
"loss": 0.6286,
"step": 576
},
{
"epoch": 1.52,
"learning_rate": 1.0256544753493945e-05,
"loss": 0.64,
"step": 577
},
{
"epoch": 1.52,
"learning_rate": 1.022804503218439e-05,
"loss": 0.6298,
"step": 578
},
{
"epoch": 1.53,
"learning_rate": 1.0199543457527247e-05,
"loss": 0.6365,
"step": 579
},
{
"epoch": 1.53,
"learning_rate": 1.0171040261158012e-05,
"loss": 0.6306,
"step": 580
},
{
"epoch": 1.53,
"learning_rate": 1.014253567472536e-05,
"loss": 0.6356,
"step": 581
},
{
"epoch": 1.53,
"learning_rate": 1.011402992988927e-05,
"loss": 0.6403,
"step": 582
},
{
"epoch": 1.54,
"learning_rate": 1.0085523258319131e-05,
"loss": 0.6386,
"step": 583
},
{
"epoch": 1.54,
"learning_rate": 1.0057015891691866e-05,
"loss": 0.6288,
"step": 584
},
{
"epoch": 1.54,
"learning_rate": 1.0028508061690044e-05,
"loss": 0.6324,
"step": 585
},
{
"epoch": 1.54,
"learning_rate": 1e-05,
"loss": 0.6263,
"step": 586
},
{
"epoch": 1.55,
"learning_rate": 9.971491938309963e-06,
"loss": 0.6396,
"step": 587
},
{
"epoch": 1.55,
"learning_rate": 9.942984108308137e-06,
"loss": 0.6314,
"step": 588
},
{
"epoch": 1.55,
"learning_rate": 9.914476741680872e-06,
"loss": 0.6322,
"step": 589
},
{
"epoch": 1.56,
"learning_rate": 9.885970070110733e-06,
"loss": 0.6333,
"step": 590
},
{
"epoch": 1.56,
"learning_rate": 9.857464325274643e-06,
"loss": 0.6218,
"step": 591
},
{
"epoch": 1.56,
"learning_rate": 9.82895973884199e-06,
"loss": 0.6325,
"step": 592
},
{
"epoch": 1.56,
"learning_rate": 9.800456542472754e-06,
"loss": 0.6351,
"step": 593
},
{
"epoch": 1.57,
"learning_rate": 9.77195496781561e-06,
"loss": 0.6367,
"step": 594
},
{
"epoch": 1.57,
"learning_rate": 9.743455246506059e-06,
"loss": 0.6309,
"step": 595
},
{
"epoch": 1.57,
"learning_rate": 9.714957610164534e-06,
"loss": 0.6351,
"step": 596
},
{
"epoch": 1.57,
"learning_rate": 9.686462290394533e-06,
"loss": 0.6241,
"step": 597
},
{
"epoch": 1.58,
"learning_rate": 9.657969518780714e-06,
"loss": 0.6286,
"step": 598
},
{
"epoch": 1.58,
"learning_rate": 9.629479526887032e-06,
"loss": 0.6325,
"step": 599
},
{
"epoch": 1.58,
"learning_rate": 9.60099254625486e-06,
"loss": 0.6348,
"step": 600
},
{
"epoch": 1.58,
"learning_rate": 9.572508808401078e-06,
"loss": 0.6393,
"step": 601
},
{
"epoch": 1.59,
"learning_rate": 9.544028544816229e-06,
"loss": 0.6467,
"step": 602
},
{
"epoch": 1.59,
"learning_rate": 9.515551986962614e-06,
"loss": 0.628,
"step": 603
},
{
"epoch": 1.59,
"learning_rate": 9.487079366272419e-06,
"loss": 0.6306,
"step": 604
},
{
"epoch": 1.59,
"learning_rate": 9.458610914145826e-06,
"loss": 0.6315,
"step": 605
},
{
"epoch": 1.6,
"learning_rate": 9.430146861949147e-06,
"loss": 0.6213,
"step": 606
},
{
"epoch": 1.6,
"learning_rate": 9.401687441012928e-06,
"loss": 0.6356,
"step": 607
},
{
"epoch": 1.6,
"learning_rate": 9.373232882630087e-06,
"loss": 0.6304,
"step": 608
},
{
"epoch": 1.61,
"learning_rate": 9.344783418054005e-06,
"loss": 0.6303,
"step": 609
},
{
"epoch": 1.61,
"learning_rate": 9.316339278496685e-06,
"loss": 0.6314,
"step": 610
},
{
"epoch": 1.61,
"learning_rate": 9.287900695126844e-06,
"loss": 0.6289,
"step": 611
},
{
"epoch": 1.61,
"learning_rate": 9.259467899068034e-06,
"loss": 0.6357,
"step": 612
},
{
"epoch": 1.62,
"learning_rate": 9.231041121396795e-06,
"loss": 0.6249,
"step": 613
},
{
"epoch": 1.62,
"learning_rate": 9.202620593140732e-06,
"loss": 0.6372,
"step": 614
},
{
"epoch": 1.62,
"learning_rate": 9.174206545276678e-06,
"loss": 0.6335,
"step": 615
},
{
"epoch": 1.62,
"learning_rate": 9.145799208728785e-06,
"loss": 0.6356,
"step": 616
},
{
"epoch": 1.63,
"learning_rate": 9.117398814366677e-06,
"loss": 0.6287,
"step": 617
},
{
"epoch": 1.63,
"learning_rate": 9.089005593003539e-06,
"loss": 0.6316,
"step": 618
},
{
"epoch": 1.63,
"learning_rate": 9.060619775394276e-06,
"loss": 0.6224,
"step": 619
},
{
"epoch": 1.63,
"learning_rate": 9.032241592233612e-06,
"loss": 0.6287,
"step": 620
},
{
"epoch": 1.64,
"learning_rate": 9.003871274154237e-06,
"loss": 0.6362,
"step": 621
},
{
"epoch": 1.64,
"learning_rate": 8.975509051724908e-06,
"loss": 0.6266,
"step": 622
},
{
"epoch": 1.64,
"learning_rate": 8.947155155448588e-06,
"loss": 0.634,
"step": 623
},
{
"epoch": 1.65,
"learning_rate": 8.918809815760585e-06,
"loss": 0.6176,
"step": 624
},
{
"epoch": 1.65,
"learning_rate": 8.890473263026653e-06,
"loss": 0.6284,
"step": 625
},
{
"epoch": 1.65,
"learning_rate": 8.862145727541141e-06,
"loss": 0.634,
"step": 626
},
{
"epoch": 1.65,
"learning_rate": 8.833827439525109e-06,
"loss": 0.6292,
"step": 627
},
{
"epoch": 1.66,
"learning_rate": 8.805518629124472e-06,
"loss": 0.6383,
"step": 628
},
{
"epoch": 1.66,
"learning_rate": 8.777219526408103e-06,
"loss": 0.63,
"step": 629
},
{
"epoch": 1.66,
"learning_rate": 8.748930361365994e-06,
"loss": 0.6262,
"step": 630
},
{
"epoch": 1.66,
"learning_rate": 8.720651363907364e-06,
"loss": 0.6378,
"step": 631
},
{
"epoch": 1.67,
"learning_rate": 8.692382763858807e-06,
"loss": 0.629,
"step": 632
},
{
"epoch": 1.67,
"learning_rate": 8.664124790962407e-06,
"loss": 0.6281,
"step": 633
},
{
"epoch": 1.67,
"learning_rate": 8.63587767487388e-06,
"loss": 0.6281,
"step": 634
},
{
"epoch": 1.67,
"learning_rate": 8.607641645160721e-06,
"loss": 0.6182,
"step": 635
},
{
"epoch": 1.68,
"learning_rate": 8.579416931300303e-06,
"loss": 0.6332,
"step": 636
},
{
"epoch": 1.68,
"learning_rate": 8.551203762678056e-06,
"loss": 0.6259,
"step": 637
},
{
"epoch": 1.68,
"learning_rate": 8.523002368585564e-06,
"loss": 0.6334,
"step": 638
},
{
"epoch": 1.68,
"learning_rate": 8.494812978218732e-06,
"loss": 0.6295,
"step": 639
},
{
"epoch": 1.69,
"learning_rate": 8.466635820675891e-06,
"loss": 0.6311,
"step": 640
},
{
"epoch": 1.69,
"learning_rate": 8.438471124955979e-06,
"loss": 0.6278,
"step": 641
},
{
"epoch": 1.69,
"learning_rate": 8.410319119956626e-06,
"loss": 0.6304,
"step": 642
},
{
"epoch": 1.7,
"learning_rate": 8.382180034472353e-06,
"loss": 0.6339,
"step": 643
},
{
"epoch": 1.7,
"learning_rate": 8.35405409719266e-06,
"loss": 0.627,
"step": 644
},
{
"epoch": 1.7,
"learning_rate": 8.325941536700206e-06,
"loss": 0.6267,
"step": 645
},
{
"epoch": 1.7,
"learning_rate": 8.29784258146892e-06,
"loss": 0.629,
"step": 646
},
{
"epoch": 1.71,
"learning_rate": 8.269757459862169e-06,
"loss": 0.6298,
"step": 647
},
{
"epoch": 1.71,
"learning_rate": 8.241686400130895e-06,
"loss": 0.6335,
"step": 648
},
{
"epoch": 1.71,
"learning_rate": 8.213629630411747e-06,
"loss": 0.6317,
"step": 649
},
{
"epoch": 1.71,
"learning_rate": 8.18558737872525e-06,
"loss": 0.6291,
"step": 650
},
{
"epoch": 1.72,
"learning_rate": 8.157559872973932e-06,
"loss": 0.6358,
"step": 651
},
{
"epoch": 1.72,
"learning_rate": 8.129547340940481e-06,
"loss": 0.6232,
"step": 652
},
{
"epoch": 1.72,
"learning_rate": 8.101550010285887e-06,
"loss": 0.641,
"step": 653
},
{
"epoch": 1.72,
"learning_rate": 8.073568108547607e-06,
"loss": 0.6215,
"step": 654
},
{
"epoch": 1.73,
"learning_rate": 8.045601863137694e-06,
"loss": 0.6351,
"step": 655
},
{
"epoch": 1.73,
"learning_rate": 8.017651501340972e-06,
"loss": 0.6285,
"step": 656
},
{
"epoch": 1.73,
"learning_rate": 7.989717250313165e-06,
"loss": 0.6257,
"step": 657
},
{
"epoch": 1.73,
"learning_rate": 7.96179933707907e-06,
"loss": 0.6302,
"step": 658
},
{
"epoch": 1.74,
"learning_rate": 7.93389798853071e-06,
"loss": 0.6277,
"step": 659
},
{
"epoch": 1.74,
"learning_rate": 7.906013431425468e-06,
"loss": 0.6335,
"step": 660
},
{
"epoch": 1.74,
"learning_rate": 7.878145892384279e-06,
"loss": 0.6337,
"step": 661
},
{
"epoch": 1.75,
"learning_rate": 7.85029559788976e-06,
"loss": 0.6214,
"step": 662
},
{
"epoch": 1.75,
"learning_rate": 7.822462774284389e-06,
"loss": 0.6291,
"step": 663
},
{
"epoch": 1.75,
"learning_rate": 7.794647647768636e-06,
"loss": 0.6274,
"step": 664
},
{
"epoch": 1.75,
"learning_rate": 7.766850444399176e-06,
"loss": 0.6276,
"step": 665
},
{
"epoch": 1.76,
"learning_rate": 7.73907139008699e-06,
"loss": 0.6274,
"step": 666
},
{
"epoch": 1.76,
"learning_rate": 7.711310710595578e-06,
"loss": 0.634,
"step": 667
},
{
"epoch": 1.76,
"learning_rate": 7.6835686315391e-06,
"loss": 0.6195,
"step": 668
},
{
"epoch": 1.76,
"learning_rate": 7.655845378380551e-06,
"loss": 0.6304,
"step": 669
},
{
"epoch": 1.77,
"learning_rate": 7.6281411764299185e-06,
"loss": 0.6386,
"step": 670
},
{
"epoch": 1.77,
"learning_rate": 7.600456250842364e-06,
"loss": 0.6229,
"step": 671
},
{
"epoch": 1.77,
"learning_rate": 7.5727908266163945e-06,
"loss": 0.6372,
"step": 672
},
{
"epoch": 1.77,
"learning_rate": 7.545145128592009e-06,
"loss": 0.6296,
"step": 673
},
{
"epoch": 1.78,
"learning_rate": 7.51751938144891e-06,
"loss": 0.6333,
"step": 674
},
{
"epoch": 1.78,
"learning_rate": 7.489913809704643e-06,
"loss": 0.6277,
"step": 675
},
{
"epoch": 1.78,
"learning_rate": 7.4623286377127944e-06,
"loss": 0.6227,
"step": 676
},
{
"epoch": 1.78,
"learning_rate": 7.434764089661151e-06,
"loss": 0.6252,
"step": 677
},
{
"epoch": 1.79,
"learning_rate": 7.407220389569892e-06,
"loss": 0.6177,
"step": 678
},
{
"epoch": 1.79,
"learning_rate": 7.379697761289763e-06,
"loss": 0.6261,
"step": 679
},
{
"epoch": 1.79,
"learning_rate": 7.3521964285002575e-06,
"loss": 0.6314,
"step": 680
},
{
"epoch": 1.8,
"learning_rate": 7.324716614707794e-06,
"loss": 0.6305,
"step": 681
},
{
"epoch": 1.8,
"learning_rate": 7.297258543243904e-06,
"loss": 0.6387,
"step": 682
},
{
"epoch": 1.8,
"learning_rate": 7.269822437263423e-06,
"loss": 0.6266,
"step": 683
},
{
"epoch": 1.8,
"learning_rate": 7.242408519742659e-06,
"loss": 0.6189,
"step": 684
},
{
"epoch": 1.81,
"learning_rate": 7.215017013477607e-06,
"loss": 0.6222,
"step": 685
},
{
"epoch": 1.81,
"learning_rate": 7.1876481410821085e-06,
"loss": 0.6349,
"step": 686
},
{
"epoch": 1.81,
"learning_rate": 7.160302124986073e-06,
"loss": 0.6283,
"step": 687
},
{
"epoch": 1.81,
"learning_rate": 7.132979187433633e-06,
"loss": 0.6301,
"step": 688
},
{
"epoch": 1.82,
"learning_rate": 7.105679550481379e-06,
"loss": 0.6264,
"step": 689
},
{
"epoch": 1.82,
"learning_rate": 7.078403435996524e-06,
"loss": 0.6242,
"step": 690
},
{
"epoch": 1.82,
"learning_rate": 7.0511510656551175e-06,
"loss": 0.633,
"step": 691
},
{
"epoch": 1.82,
"learning_rate": 7.023922660940231e-06,
"loss": 0.6291,
"step": 692
},
{
"epoch": 1.83,
"learning_rate": 6.9967184431401665e-06,
"loss": 0.6246,
"step": 693
},
{
"epoch": 1.83,
"learning_rate": 6.969538633346663e-06,
"loss": 0.6246,
"step": 694
},
{
"epoch": 1.83,
"learning_rate": 6.942383452453083e-06,
"loss": 0.6287,
"step": 695
},
{
"epoch": 1.83,
"learning_rate": 6.915253121152637e-06,
"loss": 0.6306,
"step": 696
},
{
"epoch": 1.84,
"learning_rate": 6.888147859936569e-06,
"loss": 0.6267,
"step": 697
},
{
"epoch": 1.84,
"learning_rate": 6.861067889092385e-06,
"loss": 0.6255,
"step": 698
},
{
"epoch": 1.84,
"learning_rate": 6.834013428702049e-06,
"loss": 0.63,
"step": 699
},
{
"epoch": 1.85,
"learning_rate": 6.806984698640202e-06,
"loss": 0.6164,
"step": 700
},
{
"epoch": 1.85,
"learning_rate": 6.779981918572364e-06,
"loss": 0.6191,
"step": 701
},
{
"epoch": 1.85,
"learning_rate": 6.7530053079531664e-06,
"loss": 0.6285,
"step": 702
},
{
"epoch": 1.85,
"learning_rate": 6.72605508602455e-06,
"loss": 0.6368,
"step": 703
},
{
"epoch": 1.86,
"learning_rate": 6.699131471814001e-06,
"loss": 0.625,
"step": 704
},
{
"epoch": 1.86,
"learning_rate": 6.6722346841327515e-06,
"loss": 0.6193,
"step": 705
},
{
"epoch": 1.86,
"learning_rate": 6.645364941574015e-06,
"loss": 0.6276,
"step": 706
},
{
"epoch": 1.86,
"learning_rate": 6.6185224625112155e-06,
"loss": 0.6204,
"step": 707
},
{
"epoch": 1.87,
"learning_rate": 6.591707465096185e-06,
"loss": 0.6261,
"step": 708
},
{
"epoch": 1.87,
"learning_rate": 6.564920167257427e-06,
"loss": 0.6251,
"step": 709
},
{
"epoch": 1.87,
"learning_rate": 6.538160786698315e-06,
"loss": 0.6256,
"step": 710
},
{
"epoch": 1.87,
"learning_rate": 6.511429540895349e-06,
"loss": 0.6257,
"step": 711
},
{
"epoch": 1.88,
"learning_rate": 6.484726647096354e-06,
"loss": 0.6203,
"step": 712
},
{
"epoch": 1.88,
"learning_rate": 6.458052322318758e-06,
"loss": 0.6246,
"step": 713
},
{
"epoch": 1.88,
"learning_rate": 6.431406783347786e-06,
"loss": 0.6301,
"step": 714
},
{
"epoch": 1.88,
"learning_rate": 6.4047902467347355e-06,
"loss": 0.6182,
"step": 715
},
{
"epoch": 1.89,
"learning_rate": 6.378202928795183e-06,
"loss": 0.6363,
"step": 716
},
{
"epoch": 1.89,
"learning_rate": 6.3516450456072465e-06,
"loss": 0.6292,
"step": 717
},
{
"epoch": 1.89,
"learning_rate": 6.325116813009832e-06,
"loss": 0.6302,
"step": 718
},
{
"epoch": 1.9,
"learning_rate": 6.298618446600856e-06,
"loss": 0.6296,
"step": 719
},
{
"epoch": 1.9,
"learning_rate": 6.272150161735531e-06,
"loss": 0.6215,
"step": 720
},
{
"epoch": 1.9,
"learning_rate": 6.245712173524572e-06,
"loss": 0.625,
"step": 721
},
{
"epoch": 1.9,
"learning_rate": 6.219304696832482e-06,
"loss": 0.6324,
"step": 722
},
{
"epoch": 1.91,
"learning_rate": 6.1929279462757905e-06,
"loss": 0.6248,
"step": 723
},
{
"epoch": 1.91,
"learning_rate": 6.166582136221314e-06,
"loss": 0.6222,
"step": 724
},
{
"epoch": 1.91,
"learning_rate": 6.140267480784405e-06,
"loss": 0.6248,
"step": 725
},
{
"epoch": 1.91,
"learning_rate": 6.1139841938272284e-06,
"loss": 0.6313,
"step": 726
},
{
"epoch": 1.92,
"learning_rate": 6.08773248895701e-06,
"loss": 0.6238,
"step": 727
},
{
"epoch": 1.92,
"learning_rate": 6.061512579524298e-06,
"loss": 0.6241,
"step": 728
},
{
"epoch": 1.92,
"learning_rate": 6.035324678621248e-06,
"loss": 0.6257,
"step": 729
},
{
"epoch": 1.92,
"learning_rate": 6.009168999079867e-06,
"loss": 0.6207,
"step": 730
},
{
"epoch": 1.93,
"learning_rate": 5.983045753470308e-06,
"loss": 0.6371,
"step": 731
},
{
"epoch": 1.93,
"learning_rate": 5.9569551540991135e-06,
"loss": 0.6346,
"step": 732
},
{
"epoch": 1.93,
"learning_rate": 5.930897413007523e-06,
"loss": 0.6205,
"step": 733
},
{
"epoch": 1.94,
"learning_rate": 5.904872741969718e-06,
"loss": 0.6133,
"step": 734
},
{
"epoch": 1.94,
"learning_rate": 5.8788813524911324e-06,
"loss": 0.6331,
"step": 735
},
{
"epoch": 1.94,
"learning_rate": 5.8529234558066985e-06,
"loss": 0.6234,
"step": 736
},
{
"epoch": 1.94,
"learning_rate": 5.826999262879164e-06,
"loss": 0.6234,
"step": 737
},
{
"epoch": 1.95,
"learning_rate": 5.801108984397355e-06,
"loss": 0.6331,
"step": 738
},
{
"epoch": 1.95,
"learning_rate": 5.775252830774475e-06,
"loss": 0.6288,
"step": 739
},
{
"epoch": 1.95,
"learning_rate": 5.749431012146388e-06,
"loss": 0.6244,
"step": 740
},
{
"epoch": 1.95,
"learning_rate": 5.723643738369909e-06,
"loss": 0.6271,
"step": 741
},
{
"epoch": 1.96,
"learning_rate": 5.6978912190211145e-06,
"loss": 0.6245,
"step": 742
},
{
"epoch": 1.96,
"learning_rate": 5.672173663393618e-06,
"loss": 0.6277,
"step": 743
},
{
"epoch": 1.96,
"learning_rate": 5.646491280496893e-06,
"loss": 0.6248,
"step": 744
},
{
"epoch": 1.96,
"learning_rate": 5.620844279054536e-06,
"loss": 0.6283,
"step": 745
},
{
"epoch": 1.97,
"learning_rate": 5.595232867502619e-06,
"loss": 0.6231,
"step": 746
},
{
"epoch": 1.97,
"learning_rate": 5.569657253987952e-06,
"loss": 0.6283,
"step": 747
},
{
"epoch": 1.97,
"learning_rate": 5.544117646366425e-06,
"loss": 0.6212,
"step": 748
},
{
"epoch": 1.97,
"learning_rate": 5.518614252201295e-06,
"loss": 0.6199,
"step": 749
},
{
"epoch": 1.98,
"learning_rate": 5.493147278761517e-06,
"loss": 0.6233,
"step": 750
},
{
"epoch": 1.98,
"learning_rate": 5.46771693302004e-06,
"loss": 0.6112,
"step": 751
},
{
"epoch": 1.98,
"learning_rate": 5.442323421652136e-06,
"loss": 0.6314,
"step": 752
},
{
"epoch": 1.99,
"learning_rate": 5.416966951033731e-06,
"loss": 0.6314,
"step": 753
},
{
"epoch": 1.99,
"learning_rate": 5.391647727239706e-06,
"loss": 0.6284,
"step": 754
},
{
"epoch": 1.99,
"learning_rate": 5.366365956042236e-06,
"loss": 0.6279,
"step": 755
},
{
"epoch": 1.99,
"learning_rate": 5.341121842909119e-06,
"loss": 0.6316,
"step": 756
},
{
"epoch": 2.0,
"learning_rate": 5.3159155930021e-06,
"loss": 0.6229,
"step": 757
},
{
"epoch": 2.0,
"learning_rate": 5.290747411175195e-06,
"loss": 0.6235,
"step": 758
},
{
"epoch": 2.0,
"learning_rate": 5.265617501973055e-06,
"loss": 0.607,
"step": 759
},
{
"epoch": 2.0,
"learning_rate": 5.240526069629265e-06,
"loss": 0.5831,
"step": 760
},
{
"epoch": 2.01,
"learning_rate": 5.215473318064722e-06,
"loss": 0.5819,
"step": 761
},
{
"epoch": 2.01,
"learning_rate": 5.190459450885945e-06,
"loss": 0.5882,
"step": 762
},
{
"epoch": 2.01,
"learning_rate": 5.165484671383445e-06,
"loss": 0.5807,
"step": 763
},
{
"epoch": 2.01,
"learning_rate": 5.140549182530066e-06,
"loss": 0.5892,
"step": 764
},
{
"epoch": 2.02,
"learning_rate": 5.115653186979318e-06,
"loss": 0.5945,
"step": 765
},
{
"epoch": 2.02,
"learning_rate": 5.09079688706376e-06,
"loss": 0.5844,
"step": 766
},
{
"epoch": 2.02,
"learning_rate": 5.065980484793328e-06,
"loss": 0.5868,
"step": 767
},
{
"epoch": 2.02,
"learning_rate": 5.041204181853716e-06,
"loss": 0.578,
"step": 768
},
{
"epoch": 2.03,
"learning_rate": 5.016468179604712e-06,
"loss": 0.5867,
"step": 769
},
{
"epoch": 2.03,
"learning_rate": 4.991772679078598e-06,
"loss": 0.5787,
"step": 770
},
{
"epoch": 2.03,
"learning_rate": 4.96711788097847e-06,
"loss": 0.589,
"step": 771
},
{
"epoch": 2.04,
"learning_rate": 4.94250398567665e-06,
"loss": 0.5851,
"step": 772
},
{
"epoch": 2.04,
"learning_rate": 4.91793119321302e-06,
"loss": 0.5846,
"step": 773
},
{
"epoch": 2.04,
"learning_rate": 4.893399703293436e-06,
"loss": 0.5842,
"step": 774
},
{
"epoch": 2.04,
"learning_rate": 4.86890971528806e-06,
"loss": 0.5875,
"step": 775
},
{
"epoch": 2.05,
"learning_rate": 4.844461428229782e-06,
"loss": 0.5773,
"step": 776
},
{
"epoch": 2.05,
"learning_rate": 4.82005504081258e-06,
"loss": 0.5891,
"step": 777
},
{
"epoch": 2.05,
"learning_rate": 4.795690751389899e-06,
"loss": 0.5927,
"step": 778
},
{
"epoch": 2.05,
"learning_rate": 4.771368757973062e-06,
"loss": 0.5874,
"step": 779
},
{
"epoch": 2.06,
"learning_rate": 4.747089258229635e-06,
"loss": 0.5796,
"step": 780
},
{
"epoch": 2.06,
"learning_rate": 4.722852449481848e-06,
"loss": 0.5769,
"step": 781
},
{
"epoch": 2.06,
"learning_rate": 4.69865852870496e-06,
"loss": 0.5755,
"step": 782
},
{
"epoch": 2.06,
"learning_rate": 4.674507692525685e-06,
"loss": 0.5819,
"step": 783
},
{
"epoch": 2.07,
"learning_rate": 4.650400137220583e-06,
"loss": 0.5801,
"step": 784
},
{
"epoch": 2.07,
"learning_rate": 4.626336058714468e-06,
"loss": 0.578,
"step": 785
},
{
"epoch": 2.07,
"learning_rate": 4.6023156525788034e-06,
"loss": 0.5912,
"step": 786
},
{
"epoch": 2.07,
"learning_rate": 4.578339114030127e-06,
"loss": 0.5767,
"step": 787
},
{
"epoch": 2.08,
"learning_rate": 4.554406637928464e-06,
"loss": 0.5857,
"step": 788
},
{
"epoch": 2.08,
"learning_rate": 4.530518418775734e-06,
"loss": 0.5727,
"step": 789
},
{
"epoch": 2.08,
"learning_rate": 4.5066746507141755e-06,
"loss": 0.5783,
"step": 790
},
{
"epoch": 2.09,
"learning_rate": 4.482875527524774e-06,
"loss": 0.5809,
"step": 791
},
{
"epoch": 2.09,
"learning_rate": 4.459121242625678e-06,
"loss": 0.5822,
"step": 792
},
{
"epoch": 2.09,
"learning_rate": 4.435411989070624e-06,
"loss": 0.5769,
"step": 793
},
{
"epoch": 2.09,
"learning_rate": 4.411747959547384e-06,
"loss": 0.5782,
"step": 794
},
{
"epoch": 2.1,
"learning_rate": 4.388129346376177e-06,
"loss": 0.5783,
"step": 795
},
{
"epoch": 2.1,
"learning_rate": 4.364556341508134e-06,
"loss": 0.5786,
"step": 796
},
{
"epoch": 2.1,
"learning_rate": 4.341029136523701e-06,
"loss": 0.5704,
"step": 797
},
{
"epoch": 2.1,
"learning_rate": 4.317547922631132e-06,
"loss": 0.5818,
"step": 798
},
{
"epoch": 2.11,
"learning_rate": 4.2941128906648786e-06,
"loss": 0.5836,
"step": 799
},
{
"epoch": 2.11,
"learning_rate": 4.270724231084079e-06,
"loss": 0.5672,
"step": 800
},
{
"epoch": 2.11,
"learning_rate": 4.247382133971002e-06,
"loss": 0.5789,
"step": 801
},
{
"epoch": 2.11,
"learning_rate": 4.2240867890294845e-06,
"loss": 0.5761,
"step": 802
},
{
"epoch": 2.12,
"learning_rate": 4.200838385583417e-06,
"loss": 0.5927,
"step": 803
},
{
"epoch": 2.12,
"learning_rate": 4.177637112575185e-06,
"loss": 0.5832,
"step": 804
},
{
"epoch": 2.12,
"learning_rate": 4.154483158564143e-06,
"loss": 0.5871,
"step": 805
},
{
"epoch": 2.12,
"learning_rate": 4.13137671172507e-06,
"loss": 0.582,
"step": 806
},
{
"epoch": 2.13,
"learning_rate": 4.1083179598466625e-06,
"loss": 0.5769,
"step": 807
},
{
"epoch": 2.13,
"learning_rate": 4.085307090329978e-06,
"loss": 0.5869,
"step": 808
},
{
"epoch": 2.13,
"learning_rate": 4.062344290186947e-06,
"loss": 0.5821,
"step": 809
},
{
"epoch": 2.14,
"learning_rate": 4.039429746038818e-06,
"loss": 0.5775,
"step": 810
},
{
"epoch": 2.14,
"learning_rate": 4.0165636441146676e-06,
"loss": 0.5894,
"step": 811
},
{
"epoch": 2.14,
"learning_rate": 3.993746170249878e-06,
"loss": 0.5766,
"step": 812
},
{
"epoch": 2.14,
"learning_rate": 3.970977509884617e-06,
"loss": 0.5879,
"step": 813
},
{
"epoch": 2.15,
"learning_rate": 3.948257848062351e-06,
"loss": 0.5805,
"step": 814
},
{
"epoch": 2.15,
"learning_rate": 3.925587369428316e-06,
"loss": 0.5796,
"step": 815
},
{
"epoch": 2.15,
"learning_rate": 3.902966258228046e-06,
"loss": 0.5811,
"step": 816
},
{
"epoch": 2.15,
"learning_rate": 3.880394698305846e-06,
"loss": 0.5893,
"step": 817
},
{
"epoch": 2.16,
"learning_rate": 3.857872873103322e-06,
"loss": 0.5902,
"step": 818
},
{
"epoch": 2.16,
"learning_rate": 3.8354009656578775e-06,
"loss": 0.5836,
"step": 819
},
{
"epoch": 2.16,
"learning_rate": 3.812979158601231e-06,
"loss": 0.5802,
"step": 820
},
{
"epoch": 2.16,
"learning_rate": 3.7906076341579235e-06,
"loss": 0.5887,
"step": 821
},
{
"epoch": 2.17,
"learning_rate": 3.768286574143846e-06,
"loss": 0.5867,
"step": 822
},
{
"epoch": 2.17,
"learning_rate": 3.746016159964764e-06,
"loss": 0.5871,
"step": 823
},
{
"epoch": 2.17,
"learning_rate": 3.723796572614826e-06,
"loss": 0.5858,
"step": 824
},
{
"epoch": 2.17,
"learning_rate": 3.70162799267513e-06,
"loss": 0.5799,
"step": 825
},
{
"epoch": 2.18,
"learning_rate": 3.679510600312203e-06,
"loss": 0.5809,
"step": 826
},
{
"epoch": 2.18,
"learning_rate": 3.657444575276583e-06,
"loss": 0.5779,
"step": 827
},
{
"epoch": 2.18,
"learning_rate": 3.635430096901329e-06,
"loss": 0.5797,
"step": 828
},
{
"epoch": 2.19,
"learning_rate": 3.6134673441005864e-06,
"loss": 0.5832,
"step": 829
},
{
"epoch": 2.19,
"learning_rate": 3.591556495368106e-06,
"loss": 0.5852,
"step": 830
},
{
"epoch": 2.19,
"learning_rate": 3.5696977287758205e-06,
"loss": 0.5807,
"step": 831
},
{
"epoch": 2.19,
"learning_rate": 3.5478912219723826e-06,
"loss": 0.573,
"step": 832
},
{
"epoch": 2.2,
"learning_rate": 3.5261371521817247e-06,
"loss": 0.5744,
"step": 833
},
{
"epoch": 2.2,
"learning_rate": 3.5044356962016114e-06,
"loss": 0.589,
"step": 834
},
{
"epoch": 2.2,
"learning_rate": 3.4827870304022116e-06,
"loss": 0.582,
"step": 835
},
{
"epoch": 2.2,
"learning_rate": 3.461191330724669e-06,
"loss": 0.5777,
"step": 836
},
{
"epoch": 2.21,
"learning_rate": 3.4396487726796545e-06,
"loss": 0.5863,
"step": 837
},
{
"epoch": 2.21,
"learning_rate": 3.4181595313459613e-06,
"loss": 0.5799,
"step": 838
},
{
"epoch": 2.21,
"learning_rate": 3.396723781369067e-06,
"loss": 0.5741,
"step": 839
},
{
"epoch": 2.21,
"learning_rate": 3.375341696959722e-06,
"loss": 0.5796,
"step": 840
},
{
"epoch": 2.22,
"learning_rate": 3.3540134518925226e-06,
"loss": 0.5782,
"step": 841
},
{
"epoch": 2.22,
"learning_rate": 3.3327392195045173e-06,
"loss": 0.5842,
"step": 842
},
{
"epoch": 2.22,
"learning_rate": 3.311519172693778e-06,
"loss": 0.5803,
"step": 843
},
{
"epoch": 2.23,
"learning_rate": 3.290353483918013e-06,
"loss": 0.5796,
"step": 844
},
{
"epoch": 2.23,
"learning_rate": 3.269242325193147e-06,
"loss": 0.5883,
"step": 845
},
{
"epoch": 2.23,
"learning_rate": 3.248185868091941e-06,
"loss": 0.5852,
"step": 846
},
{
"epoch": 2.23,
"learning_rate": 3.2271842837425917e-06,
"loss": 0.5778,
"step": 847
},
{
"epoch": 2.24,
"learning_rate": 3.206237742827325e-06,
"loss": 0.5866,
"step": 848
},
{
"epoch": 2.24,
"learning_rate": 3.185346415581041e-06,
"loss": 0.5826,
"step": 849
},
{
"epoch": 2.24,
"learning_rate": 3.1645104717898946e-06,
"loss": 0.5771,
"step": 850
},
{
"epoch": 2.24,
"learning_rate": 3.1437300807899462e-06,
"loss": 0.5778,
"step": 851
},
{
"epoch": 2.25,
"learning_rate": 3.123005411465766e-06,
"loss": 0.5783,
"step": 852
},
{
"epoch": 2.25,
"learning_rate": 3.1023366322490733e-06,
"loss": 0.5772,
"step": 853
},
{
"epoch": 2.25,
"learning_rate": 3.0817239111173523e-06,
"loss": 0.5838,
"step": 854
},
{
"epoch": 2.25,
"learning_rate": 3.0611674155925085e-06,
"loss": 0.5853,
"step": 855
},
{
"epoch": 2.26,
"learning_rate": 3.0406673127394894e-06,
"loss": 0.5756,
"step": 856
},
{
"epoch": 2.26,
"learning_rate": 3.0202237691649263e-06,
"loss": 0.5787,
"step": 857
},
{
"epoch": 2.26,
"learning_rate": 2.999836951015801e-06,
"loss": 0.5849,
"step": 858
},
{
"epoch": 2.26,
"learning_rate": 2.9795070239780734e-06,
"loss": 0.5865,
"step": 859
},
{
"epoch": 2.27,
"learning_rate": 2.9592341532753455e-06,
"loss": 0.5793,
"step": 860
},
{
"epoch": 2.27,
"learning_rate": 2.939018503667511e-06,
"loss": 0.5851,
"step": 861
},
{
"epoch": 2.27,
"learning_rate": 2.91886023944943e-06,
"loss": 0.5747,
"step": 862
},
{
"epoch": 2.28,
"learning_rate": 2.8987595244495736e-06,
"loss": 0.5856,
"step": 863
},
{
"epoch": 2.28,
"learning_rate": 2.8787165220287183e-06,
"loss": 0.5763,
"step": 864
},
{
"epoch": 2.28,
"learning_rate": 2.8587313950785876e-06,
"loss": 0.5798,
"step": 865
},
{
"epoch": 2.28,
"learning_rate": 2.838804306020557e-06,
"loss": 0.5819,
"step": 866
},
{
"epoch": 2.29,
"learning_rate": 2.8189354168043183e-06,
"loss": 0.5862,
"step": 867
},
{
"epoch": 2.29,
"learning_rate": 2.7991248889065693e-06,
"loss": 0.5838,
"step": 868
},
{
"epoch": 2.29,
"learning_rate": 2.7793728833296906e-06,
"loss": 0.5847,
"step": 869
},
{
"epoch": 2.29,
"learning_rate": 2.7596795606004477e-06,
"loss": 0.5818,
"step": 870
},
{
"epoch": 2.3,
"learning_rate": 2.740045080768694e-06,
"loss": 0.5869,
"step": 871
},
{
"epoch": 2.3,
"learning_rate": 2.7204696034060464e-06,
"loss": 0.5826,
"step": 872
},
{
"epoch": 2.3,
"learning_rate": 2.7009532876046084e-06,
"loss": 0.5758,
"step": 873
},
{
"epoch": 2.3,
"learning_rate": 2.681496291975674e-06,
"loss": 0.5778,
"step": 874
},
{
"epoch": 2.31,
"learning_rate": 2.662098774648434e-06,
"loss": 0.5852,
"step": 875
},
{
"epoch": 2.31,
"learning_rate": 2.642760893268684e-06,
"loss": 0.588,
"step": 876
},
{
"epoch": 2.31,
"learning_rate": 2.6234828049975645e-06,
"loss": 0.5788,
"step": 877
},
{
"epoch": 2.31,
"learning_rate": 2.6042646665102587e-06,
"loss": 0.584,
"step": 878
},
{
"epoch": 2.32,
"learning_rate": 2.5851066339947397e-06,
"loss": 0.5792,
"step": 879
},
{
"epoch": 2.32,
"learning_rate": 2.566008863150492e-06,
"loss": 0.5783,
"step": 880
},
{
"epoch": 2.32,
"learning_rate": 2.546971509187238e-06,
"loss": 0.5811,
"step": 881
},
{
"epoch": 2.33,
"learning_rate": 2.5279947268237005e-06,
"loss": 0.5746,
"step": 882
},
{
"epoch": 2.33,
"learning_rate": 2.5090786702863126e-06,
"loss": 0.5843,
"step": 883
},
{
"epoch": 2.33,
"learning_rate": 2.490223493307997e-06,
"loss": 0.581,
"step": 884
},
{
"epoch": 2.33,
"learning_rate": 2.4714293491268904e-06,
"loss": 0.5882,
"step": 885
},
{
"epoch": 2.34,
"learning_rate": 2.452696390485114e-06,
"loss": 0.5826,
"step": 886
},
{
"epoch": 2.34,
"learning_rate": 2.4340247696275297e-06,
"loss": 0.5806,
"step": 887
},
{
"epoch": 2.34,
"learning_rate": 2.4154146383004995e-06,
"loss": 0.5724,
"step": 888
},
{
"epoch": 2.34,
"learning_rate": 2.396866147750644e-06,
"loss": 0.5809,
"step": 889
},
{
"epoch": 2.35,
"learning_rate": 2.3783794487236367e-06,
"loss": 0.5838,
"step": 890
},
{
"epoch": 2.35,
"learning_rate": 2.3599546914629534e-06,
"loss": 0.5704,
"step": 891
},
{
"epoch": 2.35,
"learning_rate": 2.3415920257086633e-06,
"loss": 0.5801,
"step": 892
},
{
"epoch": 2.35,
"learning_rate": 2.323291600696217e-06,
"loss": 0.5835,
"step": 893
},
{
"epoch": 2.36,
"learning_rate": 2.305053565155223e-06,
"loss": 0.5868,
"step": 894
},
{
"epoch": 2.36,
"learning_rate": 2.286878067308248e-06,
"loss": 0.5902,
"step": 895
},
{
"epoch": 2.36,
"learning_rate": 2.2687652548695993e-06,
"loss": 0.582,
"step": 896
},
{
"epoch": 2.36,
"learning_rate": 2.250715275044144e-06,
"loss": 0.5799,
"step": 897
},
{
"epoch": 2.37,
"learning_rate": 2.2327282745260893e-06,
"loss": 0.5761,
"step": 898
},
{
"epoch": 2.37,
"learning_rate": 2.214804399497815e-06,
"loss": 0.5821,
"step": 899
},
{
"epoch": 2.37,
"learning_rate": 2.1969437956286575e-06,
"loss": 0.5702,
"step": 900
},
{
"epoch": 2.38,
"learning_rate": 2.179146608073763e-06,
"loss": 0.5865,
"step": 901
},
{
"epoch": 2.38,
"learning_rate": 2.1614129814728648e-06,
"loss": 0.5766,
"step": 902
},
{
"epoch": 2.38,
"learning_rate": 2.143743059949144e-06,
"loss": 0.5818,
"step": 903
},
{
"epoch": 2.38,
"learning_rate": 2.126136987108034e-06,
"loss": 0.5722,
"step": 904
},
{
"epoch": 2.39,
"learning_rate": 2.1085949060360654e-06,
"loss": 0.5725,
"step": 905
},
{
"epoch": 2.39,
"learning_rate": 2.0911169592997027e-06,
"loss": 0.5779,
"step": 906
},
{
"epoch": 2.39,
"learning_rate": 2.073703288944183e-06,
"loss": 0.5826,
"step": 907
},
{
"epoch": 2.39,
"learning_rate": 2.056354036492363e-06,
"loss": 0.5777,
"step": 908
},
{
"epoch": 2.4,
"learning_rate": 2.0390693429435626e-06,
"loss": 0.5875,
"step": 909
},
{
"epoch": 2.4,
"learning_rate": 2.021849348772431e-06,
"loss": 0.5782,
"step": 910
},
{
"epoch": 2.4,
"learning_rate": 2.004694193927791e-06,
"loss": 0.5769,
"step": 911
},
{
"epoch": 2.4,
"learning_rate": 1.9876040178315194e-06,
"loss": 0.5811,
"step": 912
},
{
"epoch": 2.41,
"learning_rate": 1.970578959377388e-06,
"loss": 0.5903,
"step": 913
},
{
"epoch": 2.41,
"learning_rate": 1.9536191569299666e-06,
"loss": 0.5872,
"step": 914
},
{
"epoch": 2.41,
"learning_rate": 1.9367247483234743e-06,
"loss": 0.5797,
"step": 915
},
{
"epoch": 2.41,
"learning_rate": 1.919895870860662e-06,
"loss": 0.5771,
"step": 916
},
{
"epoch": 2.42,
"learning_rate": 1.903132661311714e-06,
"loss": 0.5915,
"step": 917
},
{
"epoch": 2.42,
"learning_rate": 1.8864352559131116e-06,
"loss": 0.5827,
"step": 918
},
{
"epoch": 2.42,
"learning_rate": 1.8698037903665479e-06,
"loss": 0.58,
"step": 919
},
{
"epoch": 2.43,
"learning_rate": 1.853238399837809e-06,
"loss": 0.5833,
"step": 920
},
{
"epoch": 2.43,
"learning_rate": 1.8367392189556843e-06,
"loss": 0.5775,
"step": 921
},
{
"epoch": 2.43,
"learning_rate": 1.8203063818108724e-06,
"loss": 0.5776,
"step": 922
},
{
"epoch": 2.43,
"learning_rate": 1.8039400219548876e-06,
"loss": 0.5794,
"step": 923
},
{
"epoch": 2.44,
"learning_rate": 1.7876402723989695e-06,
"loss": 0.5881,
"step": 924
},
{
"epoch": 2.44,
"learning_rate": 1.7714072656130199e-06,
"loss": 0.5831,
"step": 925
},
{
"epoch": 2.44,
"learning_rate": 1.7552411335245056e-06,
"loss": 0.5791,
"step": 926
},
{
"epoch": 2.44,
"learning_rate": 1.7391420075173915e-06,
"loss": 0.5816,
"step": 927
},
{
"epoch": 2.45,
"learning_rate": 1.7231100184310955e-06,
"loss": 0.5729,
"step": 928
},
{
"epoch": 2.45,
"learning_rate": 1.7071452965593839e-06,
"loss": 0.577,
"step": 929
},
{
"epoch": 2.45,
"learning_rate": 1.6912479716493512e-06,
"loss": 0.5858,
"step": 930
},
{
"epoch": 2.45,
"learning_rate": 1.675418172900336e-06,
"loss": 0.5747,
"step": 931
},
{
"epoch": 2.46,
"learning_rate": 1.6596560289628983e-06,
"loss": 0.5825,
"step": 932
},
{
"epoch": 2.46,
"learning_rate": 1.6439616679377445e-06,
"loss": 0.5763,
"step": 933
},
{
"epoch": 2.46,
"learning_rate": 1.6283352173747148e-06,
"loss": 0.5758,
"step": 934
},
{
"epoch": 2.46,
"learning_rate": 1.6127768042717252e-06,
"loss": 0.5808,
"step": 935
},
{
"epoch": 2.47,
"learning_rate": 1.5972865550737516e-06,
"loss": 0.5824,
"step": 936
},
{
"epoch": 2.47,
"learning_rate": 1.5818645956717827e-06,
"loss": 0.5798,
"step": 937
},
{
"epoch": 2.47,
"learning_rate": 1.5665110514018211e-06,
"loss": 0.5855,
"step": 938
},
{
"epoch": 2.48,
"learning_rate": 1.5512260470438422e-06,
"loss": 0.5887,
"step": 939
},
{
"epoch": 2.48,
"learning_rate": 1.5360097068207902e-06,
"loss": 0.5814,
"step": 940
},
{
"epoch": 2.48,
"learning_rate": 1.5208621543975766e-06,
"loss": 0.5807,
"step": 941
},
{
"epoch": 2.48,
"learning_rate": 1.5057835128800579e-06,
"loss": 0.5832,
"step": 942
},
{
"epoch": 2.49,
"learning_rate": 1.4907739048140524e-06,
"loss": 0.5793,
"step": 943
},
{
"epoch": 2.49,
"learning_rate": 1.4758334521843242e-06,
"loss": 0.5796,
"step": 944
},
{
"epoch": 2.49,
"learning_rate": 1.4609622764136155e-06,
"loss": 0.5816,
"step": 945
},
{
"epoch": 2.49,
"learning_rate": 1.4461604983616385e-06,
"loss": 0.5812,
"step": 946
},
{
"epoch": 2.5,
"learning_rate": 1.4314282383241097e-06,
"loss": 0.5756,
"step": 947
},
{
"epoch": 2.5,
"learning_rate": 1.4167656160317567e-06,
"loss": 0.5837,
"step": 948
},
{
"epoch": 2.5,
"learning_rate": 1.40217275064936e-06,
"loss": 0.5868,
"step": 949
},
{
"epoch": 2.5,
"learning_rate": 1.3876497607747774e-06,
"loss": 0.5728,
"step": 950
},
{
"epoch": 2.51,
"learning_rate": 1.3731967644379761e-06,
"loss": 0.5776,
"step": 951
},
{
"epoch": 2.51,
"learning_rate": 1.3588138791000838e-06,
"loss": 0.5861,
"step": 952
},
{
"epoch": 2.51,
"learning_rate": 1.3445012216524223e-06,
"loss": 0.5612,
"step": 953
},
{
"epoch": 2.52,
"learning_rate": 1.3302589084155692e-06,
"loss": 0.5837,
"step": 954
},
{
"epoch": 2.52,
"learning_rate": 1.3160870551383975e-06,
"loss": 0.5848,
"step": 955
},
{
"epoch": 2.52,
"learning_rate": 1.30198577699716e-06,
"loss": 0.5793,
"step": 956
},
{
"epoch": 2.52,
"learning_rate": 1.287955188594522e-06,
"loss": 0.582,
"step": 957
},
{
"epoch": 2.53,
"learning_rate": 1.2739954039586545e-06,
"loss": 0.5844,
"step": 958
},
{
"epoch": 2.53,
"learning_rate": 1.2601065365422915e-06,
"loss": 0.5829,
"step": 959
},
{
"epoch": 2.53,
"learning_rate": 1.2462886992218238e-06,
"loss": 0.5803,
"step": 960
},
{
"epoch": 2.53,
"learning_rate": 1.232542004296362e-06,
"loss": 0.5789,
"step": 961
},
{
"epoch": 2.54,
"learning_rate": 1.2188665634868424e-06,
"loss": 0.5808,
"step": 962
},
{
"epoch": 2.54,
"learning_rate": 1.2052624879351105e-06,
"loss": 0.5773,
"step": 963
},
{
"epoch": 2.54,
"learning_rate": 1.1917298882030115e-06,
"loss": 0.5763,
"step": 964
},
{
"epoch": 2.54,
"learning_rate": 1.1782688742715098e-06,
"loss": 0.5757,
"step": 965
},
{
"epoch": 2.55,
"learning_rate": 1.1648795555397719e-06,
"loss": 0.5868,
"step": 966
},
{
"epoch": 2.55,
"learning_rate": 1.1515620408242989e-06,
"loss": 0.5838,
"step": 967
},
{
"epoch": 2.55,
"learning_rate": 1.1383164383580248e-06,
"loss": 0.5791,
"step": 968
},
{
"epoch": 2.55,
"learning_rate": 1.1251428557894516e-06,
"loss": 0.5764,
"step": 969
},
{
"epoch": 2.56,
"learning_rate": 1.1120414001817647e-06,
"loss": 0.5737,
"step": 970
},
{
"epoch": 2.56,
"learning_rate": 1.0990121780119668e-06,
"loss": 0.579,
"step": 971
},
{
"epoch": 2.56,
"learning_rate": 1.0860552951700054e-06,
"loss": 0.5785,
"step": 972
},
{
"epoch": 2.57,
"learning_rate": 1.073170856957928e-06,
"loss": 0.5838,
"step": 973
},
{
"epoch": 2.57,
"learning_rate": 1.060358968089008e-06,
"loss": 0.5762,
"step": 974
},
{
"epoch": 2.57,
"learning_rate": 1.0476197326869043e-06,
"loss": 0.5786,
"step": 975
},
{
"epoch": 2.57,
"learning_rate": 1.0349532542848162e-06,
"loss": 0.5862,
"step": 976
},
{
"epoch": 2.58,
"learning_rate": 1.0223596358246346e-06,
"loss": 0.5856,
"step": 977
},
{
"epoch": 2.58,
"learning_rate": 1.0098389796561158e-06,
"loss": 0.5732,
"step": 978
},
{
"epoch": 2.58,
"learning_rate": 9.973913875360331e-07,
"loss": 0.5748,
"step": 979
},
{
"epoch": 2.58,
"learning_rate": 9.850169606273708e-07,
"loss": 0.5779,
"step": 980
},
{
"epoch": 2.59,
"learning_rate": 9.727157994984815e-07,
"loss": 0.579,
"step": 981
},
{
"epoch": 2.59,
"learning_rate": 9.604880041222854e-07,
"loss": 0.5798,
"step": 982
},
{
"epoch": 2.59,
"learning_rate": 9.4833367387545e-07,
"loss": 0.5804,
"step": 983
},
{
"epoch": 2.59,
"learning_rate": 9.362529075375848e-07,
"loss": 0.5731,
"step": 984
},
{
"epoch": 2.6,
"learning_rate": 9.242458032904311e-07,
"loss": 0.5738,
"step": 985
},
{
"epoch": 2.6,
"learning_rate": 9.123124587170728e-07,
"loss": 0.5695,
"step": 986
},
{
"epoch": 2.6,
"learning_rate": 9.004529708011455e-07,
"loss": 0.5843,
"step": 987
},
{
"epoch": 2.6,
"learning_rate": 8.88667435926035e-07,
"loss": 0.5733,
"step": 988
},
{
"epoch": 2.61,
"learning_rate": 8.769559498741109e-07,
"loss": 0.5767,
"step": 989
},
{
"epoch": 2.61,
"learning_rate": 8.653186078259346e-07,
"loss": 0.579,
"step": 990
},
{
"epoch": 2.61,
"learning_rate": 8.537555043594936e-07,
"loss": 0.5761,
"step": 991
},
{
"epoch": 2.62,
"learning_rate": 8.42266733449425e-07,
"loss": 0.5817,
"step": 992
},
{
"epoch": 2.62,
"learning_rate": 8.308523884662656e-07,
"loss": 0.5879,
"step": 993
},
{
"epoch": 2.62,
"learning_rate": 8.195125621756739e-07,
"loss": 0.5718,
"step": 994
},
{
"epoch": 2.62,
"learning_rate": 8.08247346737695e-07,
"loss": 0.5724,
"step": 995
},
{
"epoch": 2.63,
"learning_rate": 7.970568337059992e-07,
"loss": 0.5724,
"step": 996
},
{
"epoch": 2.63,
"learning_rate": 7.859411140271422e-07,
"loss": 0.5789,
"step": 997
},
{
"epoch": 2.63,
"learning_rate": 7.749002780398295e-07,
"loss": 0.5828,
"step": 998
},
{
"epoch": 2.63,
"learning_rate": 7.639344154741713e-07,
"loss": 0.5803,
"step": 999
},
{
"epoch": 2.64,
"learning_rate": 7.530436154509679e-07,
"loss": 0.5707,
"step": 1000
},
{
"epoch": 2.64,
"learning_rate": 7.422279664809706e-07,
"loss": 0.588,
"step": 1001
},
{
"epoch": 2.64,
"learning_rate": 7.314875564641777e-07,
"loss": 0.5756,
"step": 1002
},
{
"epoch": 2.64,
"learning_rate": 7.208224726891044e-07,
"loss": 0.5787,
"step": 1003
},
{
"epoch": 2.65,
"learning_rate": 7.102328018320859e-07,
"loss": 0.5688,
"step": 1004
},
{
"epoch": 2.65,
"learning_rate": 6.997186299565661e-07,
"loss": 0.5799,
"step": 1005
},
{
"epoch": 2.65,
"learning_rate": 6.892800425124035e-07,
"loss": 0.5768,
"step": 1006
},
{
"epoch": 2.65,
"learning_rate": 6.789171243351678e-07,
"loss": 0.5747,
"step": 1007
},
{
"epoch": 2.66,
"learning_rate": 6.686299596454604e-07,
"loss": 0.5741,
"step": 1008
},
{
"epoch": 2.66,
"learning_rate": 6.584186320482222e-07,
"loss": 0.5823,
"step": 1009
},
{
"epoch": 2.66,
"learning_rate": 6.482832245320625e-07,
"loss": 0.5801,
"step": 1010
},
{
"epoch": 2.67,
"learning_rate": 6.382238194685752e-07,
"loss": 0.581,
"step": 1011
},
{
"epoch": 2.67,
"learning_rate": 6.282404986116752e-07,
"loss": 0.5746,
"step": 1012
},
{
"epoch": 2.67,
"learning_rate": 6.18333343096933e-07,
"loss": 0.5813,
"step": 1013
},
{
"epoch": 2.67,
"learning_rate": 6.08502433440914e-07,
"loss": 0.5734,
"step": 1014
},
{
"epoch": 2.68,
"learning_rate": 5.987478495405252e-07,
"loss": 0.5803,
"step": 1015
},
{
"epoch": 2.68,
"learning_rate": 5.890696706723642e-07,
"loss": 0.5775,
"step": 1016
},
{
"epoch": 2.68,
"learning_rate": 5.794679754920796e-07,
"loss": 0.5712,
"step": 1017
},
{
"epoch": 2.68,
"learning_rate": 5.699428420337261e-07,
"loss": 0.5803,
"step": 1018
},
{
"epoch": 2.69,
"learning_rate": 5.604943477091351e-07,
"loss": 0.5704,
"step": 1019
},
{
"epoch": 2.69,
"learning_rate": 5.511225693072797e-07,
"loss": 0.5747,
"step": 1020
},
{
"epoch": 2.69,
"learning_rate": 5.418275829936537e-07,
"loss": 0.58,
"step": 1021
},
{
"epoch": 2.69,
"learning_rate": 5.32609464309658e-07,
"loss": 0.5792,
"step": 1022
},
{
"epoch": 2.7,
"learning_rate": 5.234682881719766e-07,
"loss": 0.5756,
"step": 1023
},
{
"epoch": 2.7,
"learning_rate": 5.144041288719747e-07,
"loss": 0.5775,
"step": 1024
},
{
"epoch": 2.7,
"learning_rate": 5.05417060075094e-07,
"loss": 0.5825,
"step": 1025
},
{
"epoch": 2.7,
"learning_rate": 4.965071548202538e-07,
"loss": 0.5771,
"step": 1026
},
{
"epoch": 2.71,
"learning_rate": 4.876744855192516e-07,
"loss": 0.5844,
"step": 1027
},
{
"epoch": 2.71,
"learning_rate": 4.789191239561852e-07,
"loss": 0.5794,
"step": 1028
},
{
"epoch": 2.71,
"learning_rate": 4.7024114128686017e-07,
"loss": 0.5857,
"step": 1029
},
{
"epoch": 2.72,
"learning_rate": 4.616406080382174e-07,
"loss": 0.5832,
"step": 1030
},
{
"epoch": 2.72,
"learning_rate": 4.531175941077537e-07,
"loss": 0.573,
"step": 1031
},
{
"epoch": 2.72,
"learning_rate": 4.4467216876296625e-07,
"loss": 0.5825,
"step": 1032
},
{
"epoch": 2.72,
"learning_rate": 4.363044006407724e-07,
"loss": 0.5805,
"step": 1033
},
{
"epoch": 2.73,
"learning_rate": 4.28014357746962e-07,
"loss": 0.5836,
"step": 1034
},
{
"epoch": 2.73,
"learning_rate": 4.198021074556469e-07,
"loss": 0.5786,
"step": 1035
},
{
"epoch": 2.73,
"learning_rate": 4.1166771650870687e-07,
"loss": 0.5712,
"step": 1036
},
{
"epoch": 2.73,
"learning_rate": 4.0361125101525124e-07,
"loss": 0.5753,
"step": 1037
},
{
"epoch": 2.74,
"learning_rate": 3.956327764510792e-07,
"loss": 0.5764,
"step": 1038
},
{
"epoch": 2.74,
"learning_rate": 3.8773235765815155e-07,
"loss": 0.5785,
"step": 1039
},
{
"epoch": 2.74,
"learning_rate": 3.799100588440574e-07,
"loss": 0.5748,
"step": 1040
},
{
"epoch": 2.74,
"learning_rate": 3.721659435814995e-07,
"loss": 0.5749,
"step": 1041
},
{
"epoch": 2.75,
"learning_rate": 3.645000748077709e-07,
"loss": 0.5708,
"step": 1042
},
{
"epoch": 2.75,
"learning_rate": 3.5691251482424914e-07,
"loss": 0.5824,
"step": 1043
},
{
"epoch": 2.75,
"learning_rate": 3.4940332529588506e-07,
"loss": 0.5802,
"step": 1044
},
{
"epoch": 2.75,
"learning_rate": 3.419725672507068e-07,
"loss": 0.5772,
"step": 1045
},
{
"epoch": 2.76,
"learning_rate": 3.346203010793203e-07,
"loss": 0.5756,
"step": 1046
},
{
"epoch": 2.76,
"learning_rate": 3.273465865344172e-07,
"loss": 0.5849,
"step": 1047
},
{
"epoch": 2.76,
"learning_rate": 3.201514827302943e-07,
"loss": 0.579,
"step": 1048
},
{
"epoch": 2.77,
"learning_rate": 3.1303504814236494e-07,
"loss": 0.583,
"step": 1049
},
{
"epoch": 2.77,
"learning_rate": 3.059973406066963e-07,
"loss": 0.573,
"step": 1050
},
{
"epoch": 2.77,
"learning_rate": 2.99038417319526e-07,
"loss": 0.5805,
"step": 1051
},
{
"epoch": 2.77,
"learning_rate": 2.921583348368051e-07,
"loss": 0.574,
"step": 1052
},
{
"epoch": 2.78,
"learning_rate": 2.853571490737372e-07,
"loss": 0.5788,
"step": 1053
},
{
"epoch": 2.78,
"learning_rate": 2.786349153043244e-07,
"loss": 0.5774,
"step": 1054
},
{
"epoch": 2.78,
"learning_rate": 2.7199168816091416e-07,
"loss": 0.5787,
"step": 1055
},
{
"epoch": 2.78,
"learning_rate": 2.6542752163375876e-07,
"loss": 0.5728,
"step": 1056
},
{
"epoch": 2.79,
"learning_rate": 2.589424690705777e-07,
"loss": 0.5748,
"step": 1057
},
{
"epoch": 2.79,
"learning_rate": 2.525365831761217e-07,
"loss": 0.5749,
"step": 1058
},
{
"epoch": 2.79,
"learning_rate": 2.4620991601174596e-07,
"loss": 0.5768,
"step": 1059
},
{
"epoch": 2.79,
"learning_rate": 2.399625189949839e-07,
"loss": 0.5769,
"step": 1060
},
{
"epoch": 2.8,
"learning_rate": 2.3379444289913344e-07,
"loss": 0.5835,
"step": 1061
},
{
"epoch": 2.8,
"learning_rate": 2.2770573785284133e-07,
"loss": 0.579,
"step": 1062
},
{
"epoch": 2.8,
"learning_rate": 2.2169645333969815e-07,
"loss": 0.5781,
"step": 1063
},
{
"epoch": 2.81,
"learning_rate": 2.1576663819783295e-07,
"loss": 0.5776,
"step": 1064
},
{
"epoch": 2.81,
"learning_rate": 2.0991634061952038e-07,
"loss": 0.5724,
"step": 1065
},
{
"epoch": 2.81,
"learning_rate": 2.0414560815078533e-07,
"loss": 0.5867,
"step": 1066
},
{
"epoch": 2.81,
"learning_rate": 1.9845448769102105e-07,
"loss": 0.5799,
"step": 1067
},
{
"epoch": 2.82,
"learning_rate": 1.928430254926006e-07,
"loss": 0.5792,
"step": 1068
},
{
"epoch": 2.82,
"learning_rate": 1.8731126716050707e-07,
"loss": 0.5837,
"step": 1069
},
{
"epoch": 2.82,
"learning_rate": 1.818592576519629e-07,
"loss": 0.5743,
"step": 1070
},
{
"epoch": 2.82,
"learning_rate": 1.764870412760611e-07,
"loss": 0.5742,
"step": 1071
},
{
"epoch": 2.83,
"learning_rate": 1.7119466169340794e-07,
"loss": 0.5819,
"step": 1072
},
{
"epoch": 2.83,
"learning_rate": 1.6598216191576643e-07,
"loss": 0.5744,
"step": 1073
},
{
"epoch": 2.83,
"learning_rate": 1.6084958430571006e-07,
"loss": 0.573,
"step": 1074
},
{
"epoch": 2.83,
"learning_rate": 1.5579697057627074e-07,
"loss": 0.5819,
"step": 1075
},
{
"epoch": 2.84,
"learning_rate": 1.508243617906091e-07,
"loss": 0.58,
"step": 1076
},
{
"epoch": 2.84,
"learning_rate": 1.459317983616737e-07,
"loss": 0.5815,
"step": 1077
},
{
"epoch": 2.84,
"learning_rate": 1.411193200518801e-07,
"loss": 0.5715,
"step": 1078
},
{
"epoch": 2.84,
"learning_rate": 1.3638696597277678e-07,
"loss": 0.5726,
"step": 1079
},
{
"epoch": 2.85,
"learning_rate": 1.317347745847386e-07,
"loss": 0.5774,
"step": 1080
},
{
"epoch": 2.85,
"learning_rate": 1.2716278369664825e-07,
"loss": 0.58,
"step": 1081
},
{
"epoch": 2.85,
"learning_rate": 1.226710304655876e-07,
"loss": 0.5754,
"step": 1082
},
{
"epoch": 2.86,
"learning_rate": 1.1825955139654121e-07,
"loss": 0.5837,
"step": 1083
},
{
"epoch": 2.86,
"learning_rate": 1.1392838234209336e-07,
"loss": 0.589,
"step": 1084
},
{
"epoch": 2.86,
"learning_rate": 1.0967755850214257e-07,
"loss": 0.5794,
"step": 1085
},
{
"epoch": 2.86,
"learning_rate": 1.0550711442361083e-07,
"loss": 0.5757,
"step": 1086
},
{
"epoch": 2.87,
"learning_rate": 1.0141708400016714e-07,
"loss": 0.5755,
"step": 1087
},
{
"epoch": 2.87,
"learning_rate": 9.74075004719477e-08,
"loss": 0.5738,
"step": 1088
},
{
"epoch": 2.87,
"learning_rate": 9.347839642528721e-08,
"loss": 0.5751,
"step": 1089
},
{
"epoch": 2.87,
"learning_rate": 8.962980379245589e-08,
"loss": 0.5811,
"step": 1090
},
{
"epoch": 2.88,
"learning_rate": 8.58617538513995e-08,
"loss": 0.5804,
"step": 1091
},
{
"epoch": 2.88,
"learning_rate": 8.217427722548077e-08,
"loss": 0.5866,
"step": 1092
},
{
"epoch": 2.88,
"learning_rate": 7.856740388323847e-08,
"loss": 0.5818,
"step": 1093
},
{
"epoch": 2.88,
"learning_rate": 7.504116313813759e-08,
"loss": 0.5878,
"step": 1094
},
{
"epoch": 2.89,
"learning_rate": 7.159558364833175e-08,
"loss": 0.581,
"step": 1095
},
{
"epoch": 2.89,
"learning_rate": 6.823069341643341e-08,
"loss": 0.5714,
"step": 1096
},
{
"epoch": 2.89,
"learning_rate": 6.494651978928179e-08,
"loss": 0.5755,
"step": 1097
},
{
"epoch": 2.89,
"learning_rate": 6.174308945772644e-08,
"loss": 0.5787,
"step": 1098
},
{
"epoch": 2.9,
"learning_rate": 5.862042845640403e-08,
"loss": 0.5689,
"step": 1099
},
{
"epoch": 2.9,
"learning_rate": 5.557856216353297e-08,
"loss": 0.5812,
"step": 1100
},
{
"epoch": 2.9,
"learning_rate": 5.261751530070136e-08,
"loss": 0.5773,
"step": 1101
},
{
"epoch": 2.91,
"learning_rate": 4.9737311932669395e-08,
"loss": 0.574,
"step": 1102
},
{
"epoch": 2.91,
"learning_rate": 4.693797546717505e-08,
"loss": 0.5809,
"step": 1103
},
{
"epoch": 2.91,
"learning_rate": 4.421952865473755e-08,
"loss": 0.5794,
"step": 1104
},
{
"epoch": 2.91,
"learning_rate": 4.1581993588482025e-08,
"loss": 0.5788,
"step": 1105
},
{
"epoch": 2.92,
"learning_rate": 3.90253917039507e-08,
"loss": 0.578,
"step": 1106
},
{
"epoch": 2.92,
"learning_rate": 3.6549743778934166e-08,
"loss": 0.5771,
"step": 1107
},
{
"epoch": 2.92,
"learning_rate": 3.4155069933301535e-08,
"loss": 0.5818,
"step": 1108
},
{
"epoch": 2.92,
"learning_rate": 3.18413896288372e-08,
"loss": 0.5699,
"step": 1109
},
{
"epoch": 2.93,
"learning_rate": 2.96087216690788e-08,
"loss": 0.576,
"step": 1110
},
{
"epoch": 2.93,
"learning_rate": 2.74570841991717e-08,
"loss": 0.5816,
"step": 1111
},
{
"epoch": 2.93,
"learning_rate": 2.5386494705713638e-08,
"loss": 0.5843,
"step": 1112
},
{
"epoch": 2.93,
"learning_rate": 2.3396970016619225e-08,
"loss": 0.5765,
"step": 1113
},
{
"epoch": 2.94,
"learning_rate": 2.1488526300978972e-08,
"loss": 0.5745,
"step": 1114
},
{
"epoch": 2.94,
"learning_rate": 1.9661179068928283e-08,
"loss": 0.5851,
"step": 1115
},
{
"epoch": 2.94,
"learning_rate": 1.791494317152531e-08,
"loss": 0.583,
"step": 1116
},
{
"epoch": 2.94,
"learning_rate": 1.624983280062331e-08,
"loss": 0.5796,
"step": 1117
},
{
"epoch": 2.95,
"learning_rate": 1.4665861488761813e-08,
"loss": 0.5775,
"step": 1118
},
{
"epoch": 2.95,
"learning_rate": 1.3163042109053393e-08,
"loss": 0.578,
"step": 1119
},
{
"epoch": 2.95,
"learning_rate": 1.1741386875081528e-08,
"loss": 0.5808,
"step": 1120
},
{
"epoch": 2.96,
"learning_rate": 1.040090734079624e-08,
"loss": 0.5815,
"step": 1121
},
{
"epoch": 2.96,
"learning_rate": 9.14161440042527e-09,
"loss": 0.5852,
"step": 1122
},
{
"epoch": 2.96,
"learning_rate": 7.963518288385264e-09,
"loss": 0.5739,
"step": 1123
},
{
"epoch": 2.96,
"learning_rate": 6.866628579195178e-09,
"loss": 0.5825,
"step": 1124
},
{
"epoch": 2.97,
"learning_rate": 5.850954187399671e-09,
"loss": 0.5807,
"step": 1125
},
{
"epoch": 2.97,
"learning_rate": 4.91650336750027e-09,
"loss": 0.5837,
"step": 1126
},
{
"epoch": 2.97,
"learning_rate": 4.063283713883203e-09,
"loss": 0.5748,
"step": 1127
},
{
"epoch": 2.97,
"learning_rate": 3.2913021607594574e-09,
"loss": 0.5764,
"step": 1128
},
{
"epoch": 2.98,
"learning_rate": 2.600564982110365e-09,
"loss": 0.573,
"step": 1129
},
{
"epoch": 2.98,
"learning_rate": 1.9910777916354316e-09,
"loss": 0.5839,
"step": 1130
},
{
"epoch": 2.98,
"learning_rate": 1.462845542704594e-09,
"loss": 0.5807,
"step": 1131
},
{
"epoch": 2.98,
"learning_rate": 1.0158725283204718e-09,
"loss": 0.5857,
"step": 1132
},
{
"epoch": 2.99,
"learning_rate": 6.50162381083952e-10,
"loss": 0.5813,
"step": 1133
},
{
"epoch": 2.99,
"learning_rate": 3.6571807316199135e-10,
"loss": 0.5791,
"step": 1134
},
{
"epoch": 2.99,
"learning_rate": 1.6254191626430272e-10,
"loss": 0.5773,
"step": 1135
},
{
"epoch": 2.99,
"learning_rate": 4.063556162892113e-11,
"loss": 0.5774,
"step": 1136
},
{
"epoch": 3.0,
"learning_rate": 0.0,
"loss": 0.5688,
"step": 1137
},
{
"epoch": 3.0,
"step": 1137,
"total_flos": 3.0126552732370207e+18,
"train_loss": 0.646743938392364,
"train_runtime": 27308.2637,
"train_samples_per_second": 10.668,
"train_steps_per_second": 0.042
}
],
"max_steps": 1137,
"num_train_epochs": 3,
"total_flos": 3.0126552732370207e+18,
"trial_name": null,
"trial_params": null
}