|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9999106584472438, |
|
"eval_steps": 500, |
|
"global_step": 5596, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0001786831055123738, |
|
"grad_norm": 0.677041232585907, |
|
"learning_rate": 3.5714285714285716e-07, |
|
"loss": 2.5858, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.000893415527561869, |
|
"grad_norm": 0.87285977602005, |
|
"learning_rate": 1.7857142857142857e-06, |
|
"loss": 2.8564, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.001786831055123738, |
|
"grad_norm": 0.91197270154953, |
|
"learning_rate": 3.5714285714285714e-06, |
|
"loss": 2.7743, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.002680246582685607, |
|
"grad_norm": 0.8713774681091309, |
|
"learning_rate": 5.357142857142857e-06, |
|
"loss": 2.7189, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.003573662110247476, |
|
"grad_norm": 0.7496689558029175, |
|
"learning_rate": 7.142857142857143e-06, |
|
"loss": 2.7557, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.004467077637809345, |
|
"grad_norm": 0.7549262642860413, |
|
"learning_rate": 8.92857142857143e-06, |
|
"loss": 2.7256, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.005360493165371214, |
|
"grad_norm": 0.8223145008087158, |
|
"learning_rate": 1.0714285714285714e-05, |
|
"loss": 2.7319, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.006253908692933083, |
|
"grad_norm": 0.7578197121620178, |
|
"learning_rate": 1.25e-05, |
|
"loss": 2.661, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.007147324220494952, |
|
"grad_norm": 0.6663674712181091, |
|
"learning_rate": 1.4285714285714285e-05, |
|
"loss": 2.6396, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.008040739748056821, |
|
"grad_norm": 0.5226909518241882, |
|
"learning_rate": 1.6071428571428572e-05, |
|
"loss": 2.5739, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.00893415527561869, |
|
"grad_norm": 0.4958447515964508, |
|
"learning_rate": 1.785714285714286e-05, |
|
"loss": 2.4765, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.00982757080318056, |
|
"grad_norm": 0.40554603934288025, |
|
"learning_rate": 1.9642857142857145e-05, |
|
"loss": 2.4275, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.010720986330742428, |
|
"grad_norm": 0.43960413336753845, |
|
"learning_rate": 2.1428571428571428e-05, |
|
"loss": 2.3822, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.011614401858304297, |
|
"grad_norm": 0.40701353549957275, |
|
"learning_rate": 2.3214285714285715e-05, |
|
"loss": 2.3326, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.012507817385866166, |
|
"grad_norm": 0.3899136483669281, |
|
"learning_rate": 2.5e-05, |
|
"loss": 2.3172, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.013401232913428035, |
|
"grad_norm": 0.444366455078125, |
|
"learning_rate": 2.6785714285714288e-05, |
|
"loss": 2.2801, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.014294648440989904, |
|
"grad_norm": 0.43577924370765686, |
|
"learning_rate": 2.857142857142857e-05, |
|
"loss": 2.23, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.015188063968551773, |
|
"grad_norm": 0.42386895418167114, |
|
"learning_rate": 3.0357142857142857e-05, |
|
"loss": 2.2422, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.016081479496113642, |
|
"grad_norm": 0.4131146967411041, |
|
"learning_rate": 3.2142857142857144e-05, |
|
"loss": 2.1822, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.016974895023675513, |
|
"grad_norm": 0.43191808462142944, |
|
"learning_rate": 3.392857142857143e-05, |
|
"loss": 2.1113, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.01786831055123738, |
|
"grad_norm": 0.49910837411880493, |
|
"learning_rate": 3.571428571428572e-05, |
|
"loss": 2.1751, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.01876172607879925, |
|
"grad_norm": 0.4678761065006256, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 2.1023, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.01965514160636112, |
|
"grad_norm": 0.5106930136680603, |
|
"learning_rate": 3.928571428571429e-05, |
|
"loss": 2.0749, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.02054855713392299, |
|
"grad_norm": 0.5173302292823792, |
|
"learning_rate": 4.107142857142857e-05, |
|
"loss": 2.0946, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.021441972661484857, |
|
"grad_norm": 0.5672593712806702, |
|
"learning_rate": 4.2857142857142856e-05, |
|
"loss": 2.0089, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.022335388189046727, |
|
"grad_norm": 0.5483328700065613, |
|
"learning_rate": 4.464285714285715e-05, |
|
"loss": 2.0303, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.023228803716608595, |
|
"grad_norm": 0.5528935790061951, |
|
"learning_rate": 4.642857142857143e-05, |
|
"loss": 1.9952, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.024122219244170465, |
|
"grad_norm": 0.6560689210891724, |
|
"learning_rate": 4.8214285714285716e-05, |
|
"loss": 1.9854, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.025015634771732333, |
|
"grad_norm": 0.6699485182762146, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0175, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.025909050299294203, |
|
"grad_norm": 0.6667202711105347, |
|
"learning_rate": 5.1785714285714296e-05, |
|
"loss": 1.9973, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.02680246582685607, |
|
"grad_norm": 0.624862790107727, |
|
"learning_rate": 5.3571428571428575e-05, |
|
"loss": 1.9596, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.02769588135441794, |
|
"grad_norm": 0.7823134064674377, |
|
"learning_rate": 5.535714285714286e-05, |
|
"loss": 1.9863, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.02858929688197981, |
|
"grad_norm": 0.7916033864021301, |
|
"learning_rate": 5.714285714285714e-05, |
|
"loss": 1.9652, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.02948271240954168, |
|
"grad_norm": 0.8077523708343506, |
|
"learning_rate": 5.8928571428571435e-05, |
|
"loss": 1.961, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.030376127937103547, |
|
"grad_norm": 0.7293629050254822, |
|
"learning_rate": 6.0714285714285715e-05, |
|
"loss": 1.9324, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.031269543464665414, |
|
"grad_norm": 0.7660003304481506, |
|
"learning_rate": 6.25e-05, |
|
"loss": 1.8978, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.032162958992227285, |
|
"grad_norm": 0.7684458494186401, |
|
"learning_rate": 6.428571428571429e-05, |
|
"loss": 1.8884, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.033056374519789156, |
|
"grad_norm": 0.8633774518966675, |
|
"learning_rate": 6.607142857142857e-05, |
|
"loss": 1.8827, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.033949790047351026, |
|
"grad_norm": 0.918566882610321, |
|
"learning_rate": 6.785714285714286e-05, |
|
"loss": 1.8765, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.03484320557491289, |
|
"grad_norm": 0.8094245791435242, |
|
"learning_rate": 6.964285714285715e-05, |
|
"loss": 1.8944, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.03573662110247476, |
|
"grad_norm": 0.8820126056671143, |
|
"learning_rate": 7.142857142857143e-05, |
|
"loss": 1.8934, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.03663003663003663, |
|
"grad_norm": 0.9279444813728333, |
|
"learning_rate": 7.321428571428571e-05, |
|
"loss": 1.8604, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.0375234521575985, |
|
"grad_norm": 0.8216668963432312, |
|
"learning_rate": 7.500000000000001e-05, |
|
"loss": 1.8466, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.038416867685160366, |
|
"grad_norm": 0.9710394740104675, |
|
"learning_rate": 7.67857142857143e-05, |
|
"loss": 1.8802, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.03931028321272224, |
|
"grad_norm": 1.00575852394104, |
|
"learning_rate": 7.857142857142858e-05, |
|
"loss": 1.8543, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.04020369874028411, |
|
"grad_norm": 0.9068517088890076, |
|
"learning_rate": 8.035714285714287e-05, |
|
"loss": 1.8208, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.04109711426784598, |
|
"grad_norm": 0.9185478091239929, |
|
"learning_rate": 8.214285714285714e-05, |
|
"loss": 1.8395, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.04199052979540784, |
|
"grad_norm": 0.8614683747291565, |
|
"learning_rate": 8.392857142857144e-05, |
|
"loss": 1.7931, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.04288394532296971, |
|
"grad_norm": 1.0321149826049805, |
|
"learning_rate": 8.571428571428571e-05, |
|
"loss": 1.8134, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.043777360850531584, |
|
"grad_norm": 0.997043788433075, |
|
"learning_rate": 8.75e-05, |
|
"loss": 1.8195, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.044670776378093455, |
|
"grad_norm": 0.9486415386199951, |
|
"learning_rate": 8.92857142857143e-05, |
|
"loss": 1.811, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.04556419190565532, |
|
"grad_norm": 0.8874170780181885, |
|
"learning_rate": 9.107142857142857e-05, |
|
"loss": 1.808, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.04645760743321719, |
|
"grad_norm": 0.9631446003913879, |
|
"learning_rate": 9.285714285714286e-05, |
|
"loss": 1.7976, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.04735102296077906, |
|
"grad_norm": 0.9562223553657532, |
|
"learning_rate": 9.464285714285715e-05, |
|
"loss": 1.7817, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.04824443848834093, |
|
"grad_norm": 0.8817927241325378, |
|
"learning_rate": 9.642857142857143e-05, |
|
"loss": 1.7646, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.049137854015902795, |
|
"grad_norm": 0.9726014137268066, |
|
"learning_rate": 9.821428571428572e-05, |
|
"loss": 1.8197, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.050031269543464665, |
|
"grad_norm": 0.914252758026123, |
|
"learning_rate": 0.0001, |
|
"loss": 1.7556, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.050924685071026536, |
|
"grad_norm": 0.9449843764305115, |
|
"learning_rate": 0.00010178571428571428, |
|
"loss": 1.7112, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.05181810059858841, |
|
"grad_norm": 0.9346893429756165, |
|
"learning_rate": 0.00010357142857142859, |
|
"loss": 1.7619, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.05271151612615027, |
|
"grad_norm": 0.8253093957901001, |
|
"learning_rate": 0.00010535714285714286, |
|
"loss": 1.7342, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.05360493165371214, |
|
"grad_norm": 0.8210521936416626, |
|
"learning_rate": 0.00010714285714285715, |
|
"loss": 1.7528, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.05449834718127401, |
|
"grad_norm": 0.7791708707809448, |
|
"learning_rate": 0.00010892857142857142, |
|
"loss": 1.7881, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.05539176270883588, |
|
"grad_norm": 0.9075311422348022, |
|
"learning_rate": 0.00011071428571428572, |
|
"loss": 1.7569, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.05628517823639775, |
|
"grad_norm": 0.8817685842514038, |
|
"learning_rate": 0.00011250000000000001, |
|
"loss": 1.7444, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.05717859376395962, |
|
"grad_norm": 0.9592775106430054, |
|
"learning_rate": 0.00011428571428571428, |
|
"loss": 1.7344, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.05807200929152149, |
|
"grad_norm": 0.8894655704498291, |
|
"learning_rate": 0.00011607142857142858, |
|
"loss": 1.738, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.05896542481908336, |
|
"grad_norm": 0.9590314030647278, |
|
"learning_rate": 0.00011785714285714287, |
|
"loss": 1.7336, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.05985884034664522, |
|
"grad_norm": 0.9731721878051758, |
|
"learning_rate": 0.00011964285714285714, |
|
"loss": 1.7242, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.060752255874207094, |
|
"grad_norm": 1.0074090957641602, |
|
"learning_rate": 0.00012142857142857143, |
|
"loss": 1.7719, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.061645671401768964, |
|
"grad_norm": 0.9376741051673889, |
|
"learning_rate": 0.00012321428571428572, |
|
"loss": 1.7299, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.06253908692933083, |
|
"grad_norm": 0.8597344756126404, |
|
"learning_rate": 0.000125, |
|
"loss": 1.7378, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.0634325024568927, |
|
"grad_norm": 0.8836060762405396, |
|
"learning_rate": 0.0001267857142857143, |
|
"loss": 1.7379, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.06432591798445457, |
|
"grad_norm": 1.0358092784881592, |
|
"learning_rate": 0.00012857142857142858, |
|
"loss": 1.7175, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.06521933351201643, |
|
"grad_norm": 0.8426574468612671, |
|
"learning_rate": 0.00013035714285714286, |
|
"loss": 1.6632, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.06611274903957831, |
|
"grad_norm": 0.9615597128868103, |
|
"learning_rate": 0.00013214285714285715, |
|
"loss": 1.6667, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.06700616456714018, |
|
"grad_norm": 0.9134716391563416, |
|
"learning_rate": 0.00013392857142857144, |
|
"loss": 1.7246, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.06789958009470205, |
|
"grad_norm": 0.8629338145256042, |
|
"learning_rate": 0.00013571428571428572, |
|
"loss": 1.6696, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.06879299562226392, |
|
"grad_norm": 0.8393704295158386, |
|
"learning_rate": 0.0001375, |
|
"loss": 1.6838, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.06968641114982578, |
|
"grad_norm": 0.9799148440361023, |
|
"learning_rate": 0.0001392857142857143, |
|
"loss": 1.7022, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.07057982667738766, |
|
"grad_norm": 0.8769751191139221, |
|
"learning_rate": 0.00014107142857142858, |
|
"loss": 1.6783, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.07147324220494952, |
|
"grad_norm": 0.8460381627082825, |
|
"learning_rate": 0.00014285714285714287, |
|
"loss": 1.6509, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.07236665773251139, |
|
"grad_norm": 0.7672830820083618, |
|
"learning_rate": 0.00014464285714285715, |
|
"loss": 1.6485, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.07326007326007326, |
|
"grad_norm": 0.8759172558784485, |
|
"learning_rate": 0.00014642857142857141, |
|
"loss": 1.6723, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.07415348878763513, |
|
"grad_norm": 0.8765602707862854, |
|
"learning_rate": 0.00014821428571428573, |
|
"loss": 1.5925, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.075046904315197, |
|
"grad_norm": 0.8540732264518738, |
|
"learning_rate": 0.00015000000000000001, |
|
"loss": 1.6583, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.07594031984275887, |
|
"grad_norm": 0.7945284843444824, |
|
"learning_rate": 0.00015178571428571427, |
|
"loss": 1.6644, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.07683373537032073, |
|
"grad_norm": 1.0458513498306274, |
|
"learning_rate": 0.0001535714285714286, |
|
"loss": 1.6946, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.07772715089788261, |
|
"grad_norm": 0.8617852330207825, |
|
"learning_rate": 0.00015535714285714287, |
|
"loss": 1.6218, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.07862056642544447, |
|
"grad_norm": 0.8525850176811218, |
|
"learning_rate": 0.00015714285714285716, |
|
"loss": 1.6579, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.07951398195300634, |
|
"grad_norm": 0.7932422161102295, |
|
"learning_rate": 0.00015892857142857142, |
|
"loss": 1.6593, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.08040739748056822, |
|
"grad_norm": 0.795437216758728, |
|
"learning_rate": 0.00016071428571428573, |
|
"loss": 1.6661, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.08130081300813008, |
|
"grad_norm": 0.8602275848388672, |
|
"learning_rate": 0.00016250000000000002, |
|
"loss": 1.6667, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.08219422853569196, |
|
"grad_norm": 0.7548096776008606, |
|
"learning_rate": 0.00016428571428571428, |
|
"loss": 1.6403, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.08308764406325382, |
|
"grad_norm": 0.8513688445091248, |
|
"learning_rate": 0.0001660714285714286, |
|
"loss": 1.6543, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.08398105959081568, |
|
"grad_norm": 0.836286187171936, |
|
"learning_rate": 0.00016785714285714288, |
|
"loss": 1.6227, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.08487447511837756, |
|
"grad_norm": 0.7904291749000549, |
|
"learning_rate": 0.00016964285714285714, |
|
"loss": 1.6433, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.08576789064593943, |
|
"grad_norm": 0.7850791215896606, |
|
"learning_rate": 0.00017142857142857143, |
|
"loss": 1.6113, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.08666130617350129, |
|
"grad_norm": 0.6912005543708801, |
|
"learning_rate": 0.00017321428571428574, |
|
"loss": 1.6047, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.08755472170106317, |
|
"grad_norm": 0.8217949867248535, |
|
"learning_rate": 0.000175, |
|
"loss": 1.6206, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.08844813722862503, |
|
"grad_norm": 0.7975768446922302, |
|
"learning_rate": 0.00017678571428571428, |
|
"loss": 1.6711, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.08934155275618691, |
|
"grad_norm": 0.7934837937355042, |
|
"learning_rate": 0.0001785714285714286, |
|
"loss": 1.6195, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.09023496828374877, |
|
"grad_norm": 0.8391872048377991, |
|
"learning_rate": 0.00018035714285714286, |
|
"loss": 1.6068, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.09112838381131064, |
|
"grad_norm": 0.8512648344039917, |
|
"learning_rate": 0.00018214285714285714, |
|
"loss": 1.6489, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.09202179933887251, |
|
"grad_norm": 0.7523573637008667, |
|
"learning_rate": 0.00018392857142857143, |
|
"loss": 1.6035, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.09291521486643438, |
|
"grad_norm": 0.6890606880187988, |
|
"learning_rate": 0.00018571428571428572, |
|
"loss": 1.6162, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.09380863039399624, |
|
"grad_norm": 0.8024640679359436, |
|
"learning_rate": 0.0001875, |
|
"loss": 1.5836, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.09470204592155812, |
|
"grad_norm": 0.778583824634552, |
|
"learning_rate": 0.0001892857142857143, |
|
"loss": 1.6539, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.09559546144911998, |
|
"grad_norm": 0.6993560194969177, |
|
"learning_rate": 0.00019107142857142858, |
|
"loss": 1.5736, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.09648887697668186, |
|
"grad_norm": 0.6990346908569336, |
|
"learning_rate": 0.00019285714285714286, |
|
"loss": 1.5987, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.09738229250424373, |
|
"grad_norm": 0.7906347513198853, |
|
"learning_rate": 0.00019464285714285715, |
|
"loss": 1.625, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.09827570803180559, |
|
"grad_norm": 0.8062006831169128, |
|
"learning_rate": 0.00019642857142857144, |
|
"loss": 1.5445, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.09916912355936747, |
|
"grad_norm": 0.7047358751296997, |
|
"learning_rate": 0.00019821428571428572, |
|
"loss": 1.5868, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.10006253908692933, |
|
"grad_norm": 0.7451456785202026, |
|
"learning_rate": 0.0002, |
|
"loss": 1.632, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.1009559546144912, |
|
"grad_norm": 0.7321707606315613, |
|
"learning_rate": 0.00019999951355027364, |
|
"loss": 1.5762, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.10184937014205307, |
|
"grad_norm": 0.790035605430603, |
|
"learning_rate": 0.00019999805420582728, |
|
"loss": 1.5721, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.10274278566961494, |
|
"grad_norm": 0.7690011858940125, |
|
"learning_rate": 0.00019999562198085878, |
|
"loss": 1.5736, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.10363620119717681, |
|
"grad_norm": 0.7484734058380127, |
|
"learning_rate": 0.00019999221689903133, |
|
"loss": 1.5685, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.10452961672473868, |
|
"grad_norm": 0.6638728976249695, |
|
"learning_rate": 0.00019998783899347294, |
|
"loss": 1.5683, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.10542303225230054, |
|
"grad_norm": 0.6009637117385864, |
|
"learning_rate": 0.0001999824883067762, |
|
"loss": 1.6239, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.10631644777986242, |
|
"grad_norm": 0.6219035387039185, |
|
"learning_rate": 0.00019997616489099792, |
|
"loss": 1.5447, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.10720986330742428, |
|
"grad_norm": 0.6466681957244873, |
|
"learning_rate": 0.00019996886880765854, |
|
"loss": 1.5165, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.10810327883498615, |
|
"grad_norm": 0.7301130890846252, |
|
"learning_rate": 0.0001999606001277417, |
|
"loss": 1.5419, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 0.10899669436254802, |
|
"grad_norm": 0.635531485080719, |
|
"learning_rate": 0.0001999513589316933, |
|
"loss": 1.5127, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.10989010989010989, |
|
"grad_norm": 0.7266018986701965, |
|
"learning_rate": 0.00019994114530942088, |
|
"loss": 1.5785, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 0.11078352541767177, |
|
"grad_norm": 0.7803109288215637, |
|
"learning_rate": 0.0001999299593602927, |
|
"loss": 1.5557, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.11167694094523363, |
|
"grad_norm": 0.7546414136886597, |
|
"learning_rate": 0.00019991780119313682, |
|
"loss": 1.5858, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.1125703564727955, |
|
"grad_norm": 0.7428862452507019, |
|
"learning_rate": 0.00019990467092623998, |
|
"loss": 1.5246, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.11346377200035737, |
|
"grad_norm": 0.6253552436828613, |
|
"learning_rate": 0.00019989056868734647, |
|
"loss": 1.5436, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 0.11435718752791924, |
|
"grad_norm": 0.6777628064155579, |
|
"learning_rate": 0.0001998754946136569, |
|
"loss": 1.5812, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.1152506030554811, |
|
"grad_norm": 0.7340919971466064, |
|
"learning_rate": 0.00019985944885182687, |
|
"loss": 1.5125, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.11614401858304298, |
|
"grad_norm": 0.6683153510093689, |
|
"learning_rate": 0.00019984243155796546, |
|
"loss": 1.5239, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.11703743411060484, |
|
"grad_norm": 0.6631706357002258, |
|
"learning_rate": 0.00019982444289763388, |
|
"loss": 1.4991, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 0.11793084963816672, |
|
"grad_norm": 0.6204883456230164, |
|
"learning_rate": 0.00019980548304584364, |
|
"loss": 1.4839, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.11882426516572858, |
|
"grad_norm": 0.6456660032272339, |
|
"learning_rate": 0.00019978555218705513, |
|
"loss": 1.5401, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 0.11971768069329045, |
|
"grad_norm": 0.6324872970581055, |
|
"learning_rate": 0.00019976465051517548, |
|
"loss": 1.5242, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.12061109622085232, |
|
"grad_norm": 0.6695961952209473, |
|
"learning_rate": 0.00019974277823355698, |
|
"loss": 1.5427, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.12150451174841419, |
|
"grad_norm": 0.6281846165657043, |
|
"learning_rate": 0.00019971993555499494, |
|
"loss": 1.5168, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.12239792727597605, |
|
"grad_norm": 0.6930037140846252, |
|
"learning_rate": 0.00019969612270172567, |
|
"loss": 1.5093, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 0.12329134280353793, |
|
"grad_norm": 0.7301099300384521, |
|
"learning_rate": 0.00019967133990542423, |
|
"loss": 1.5472, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.12418475833109979, |
|
"grad_norm": 0.6746507287025452, |
|
"learning_rate": 0.0001996455874072024, |
|
"loss": 1.5237, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 0.12507817385866166, |
|
"grad_norm": 0.7017915844917297, |
|
"learning_rate": 0.00019961886545760598, |
|
"loss": 1.5288, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.12597158938622352, |
|
"grad_norm": 0.6702309846878052, |
|
"learning_rate": 0.00019959117431661273, |
|
"loss": 1.5484, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 0.1268650049137854, |
|
"grad_norm": 0.7421427369117737, |
|
"learning_rate": 0.00019956251425362967, |
|
"loss": 1.4334, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.12775842044134728, |
|
"grad_norm": 0.6117368340492249, |
|
"learning_rate": 0.0001995328855474903, |
|
"loss": 1.5339, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 0.12865183596890914, |
|
"grad_norm": 0.6836646199226379, |
|
"learning_rate": 0.00019950228848645218, |
|
"loss": 1.5339, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.129545251496471, |
|
"grad_norm": 0.6464391350746155, |
|
"learning_rate": 0.00019947072336819397, |
|
"loss": 1.5194, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.13043866702403287, |
|
"grad_norm": 0.7467032670974731, |
|
"learning_rate": 0.00019943819049981248, |
|
"loss": 1.4732, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.13133208255159476, |
|
"grad_norm": 0.6169785857200623, |
|
"learning_rate": 0.00019940469019781985, |
|
"loss": 1.503, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.13222549807915662, |
|
"grad_norm": 0.6561746001243591, |
|
"learning_rate": 0.00019937022278814032, |
|
"loss": 1.5135, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.1331189136067185, |
|
"grad_norm": 0.6765868663787842, |
|
"learning_rate": 0.00019933478860610713, |
|
"loss": 1.498, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 0.13401232913428035, |
|
"grad_norm": 0.5688114762306213, |
|
"learning_rate": 0.00019929838799645925, |
|
"loss": 1.424, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.13490574466184221, |
|
"grad_norm": 0.6881040334701538, |
|
"learning_rate": 0.00019926102131333803, |
|
"loss": 1.4588, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 0.1357991601894041, |
|
"grad_norm": 0.7207677960395813, |
|
"learning_rate": 0.00019922268892028368, |
|
"loss": 1.4658, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.13669257571696597, |
|
"grad_norm": 0.645916759967804, |
|
"learning_rate": 0.0001991833911902319, |
|
"loss": 1.4797, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 0.13758599124452783, |
|
"grad_norm": 0.6428040266036987, |
|
"learning_rate": 0.00019914312850551, |
|
"loss": 1.5143, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.1384794067720897, |
|
"grad_norm": 0.6166855096817017, |
|
"learning_rate": 0.0001991019012578335, |
|
"loss": 1.4823, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.13937282229965156, |
|
"grad_norm": 0.6110146641731262, |
|
"learning_rate": 0.00019905970984830204, |
|
"loss": 1.4651, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.14026623782721342, |
|
"grad_norm": 0.6262656450271606, |
|
"learning_rate": 0.00019901655468739562, |
|
"loss": 1.4983, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 0.14115965335477532, |
|
"grad_norm": 0.6442315578460693, |
|
"learning_rate": 0.00019897243619497056, |
|
"loss": 1.4735, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.14205306888233718, |
|
"grad_norm": 0.5954890251159668, |
|
"learning_rate": 0.00019892735480025545, |
|
"loss": 1.4986, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 0.14294648440989904, |
|
"grad_norm": 0.656102180480957, |
|
"learning_rate": 0.0001988813109418469, |
|
"loss": 1.4934, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.1438398999374609, |
|
"grad_norm": 0.630328893661499, |
|
"learning_rate": 0.00019883430506770536, |
|
"loss": 1.4769, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 0.14473331546502277, |
|
"grad_norm": 0.6247830390930176, |
|
"learning_rate": 0.00019878633763515074, |
|
"loss": 1.4446, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.14562673099258466, |
|
"grad_norm": 0.6472021341323853, |
|
"learning_rate": 0.00019873740911085792, |
|
"loss": 1.4711, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 0.14652014652014653, |
|
"grad_norm": 0.6813840270042419, |
|
"learning_rate": 0.00019868751997085225, |
|
"loss": 1.4533, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.1474135620477084, |
|
"grad_norm": 0.6286069750785828, |
|
"learning_rate": 0.0001986366707005049, |
|
"loss": 1.4806, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.14830697757527025, |
|
"grad_norm": 0.6147023439407349, |
|
"learning_rate": 0.00019858486179452812, |
|
"loss": 1.4779, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.14920039310283212, |
|
"grad_norm": 0.6876225471496582, |
|
"learning_rate": 0.0001985320937569705, |
|
"loss": 1.5072, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 0.150093808630394, |
|
"grad_norm": 0.6138557195663452, |
|
"learning_rate": 0.00019847836710121198, |
|
"loss": 1.4605, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.15098722415795587, |
|
"grad_norm": 0.642737865447998, |
|
"learning_rate": 0.0001984236823499589, |
|
"loss": 1.4692, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 0.15188063968551774, |
|
"grad_norm": 0.6422574520111084, |
|
"learning_rate": 0.0001983680400352389, |
|
"loss": 1.4684, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.1527740552130796, |
|
"grad_norm": 0.6301836967468262, |
|
"learning_rate": 0.00019831144069839578, |
|
"loss": 1.4375, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 0.15366747074064147, |
|
"grad_norm": 0.5667446255683899, |
|
"learning_rate": 0.00019825388489008415, |
|
"loss": 1.427, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.15456088626820333, |
|
"grad_norm": 0.6058502197265625, |
|
"learning_rate": 0.0001981953731702642, |
|
"loss": 1.4666, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 0.15545430179576522, |
|
"grad_norm": 0.6004565358161926, |
|
"learning_rate": 0.00019813590610819604, |
|
"loss": 1.4691, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.15634771732332708, |
|
"grad_norm": 0.6401507258415222, |
|
"learning_rate": 0.00019807548428243447, |
|
"loss": 1.4149, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.15724113285088895, |
|
"grad_norm": 0.6585138440132141, |
|
"learning_rate": 0.00019801410828082307, |
|
"loss": 1.478, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.1581345483784508, |
|
"grad_norm": 0.6155353784561157, |
|
"learning_rate": 0.00019795177870048864, |
|
"loss": 1.446, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 0.15902796390601268, |
|
"grad_norm": 0.6294763684272766, |
|
"learning_rate": 0.00019788849614783534, |
|
"loss": 1.4208, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.15992137943357457, |
|
"grad_norm": 0.5941749811172485, |
|
"learning_rate": 0.00019782426123853873, |
|
"loss": 1.4909, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 0.16081479496113643, |
|
"grad_norm": 0.6547942161560059, |
|
"learning_rate": 0.0001977590745975399, |
|
"loss": 1.3606, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.1617082104886983, |
|
"grad_norm": 0.6711387634277344, |
|
"learning_rate": 0.00019769293685903937, |
|
"loss": 1.4263, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 0.16260162601626016, |
|
"grad_norm": 0.6281640529632568, |
|
"learning_rate": 0.0001976258486664908, |
|
"loss": 1.4387, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.16349504154382202, |
|
"grad_norm": 0.5495440363883972, |
|
"learning_rate": 0.00019755781067259487, |
|
"loss": 1.4227, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 0.16438845707138391, |
|
"grad_norm": 0.6133942008018494, |
|
"learning_rate": 0.00019748882353929283, |
|
"loss": 1.3777, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.16528187259894578, |
|
"grad_norm": 0.6217373013496399, |
|
"learning_rate": 0.00019741888793776012, |
|
"loss": 1.4337, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.16617528812650764, |
|
"grad_norm": 0.5960021018981934, |
|
"learning_rate": 0.00019734800454839985, |
|
"loss": 1.4001, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.1670687036540695, |
|
"grad_norm": 0.6111594438552856, |
|
"learning_rate": 0.00019727617406083608, |
|
"loss": 1.4485, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 0.16796211918163137, |
|
"grad_norm": 0.6061040163040161, |
|
"learning_rate": 0.00019720339717390725, |
|
"loss": 1.4341, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.16885553470919323, |
|
"grad_norm": 0.5827285051345825, |
|
"learning_rate": 0.00019712967459565935, |
|
"loss": 1.4212, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 0.16974895023675513, |
|
"grad_norm": 0.625501275062561, |
|
"learning_rate": 0.00019705500704333888, |
|
"loss": 1.3902, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.170642365764317, |
|
"grad_norm": 0.6008924245834351, |
|
"learning_rate": 0.00019697939524338605, |
|
"loss": 1.4401, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 0.17153578129187885, |
|
"grad_norm": 0.5836713314056396, |
|
"learning_rate": 0.00019690283993142768, |
|
"loss": 1.4285, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.17242919681944072, |
|
"grad_norm": 0.6134169101715088, |
|
"learning_rate": 0.00019682534185226996, |
|
"loss": 1.4527, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 0.17332261234700258, |
|
"grad_norm": 0.593845784664154, |
|
"learning_rate": 0.0001967469017598913, |
|
"loss": 1.4293, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.17421602787456447, |
|
"grad_norm": 0.6239144802093506, |
|
"learning_rate": 0.00019666752041743485, |
|
"loss": 1.394, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.17510944340212634, |
|
"grad_norm": 0.5358985066413879, |
|
"learning_rate": 0.00019658719859720137, |
|
"loss": 1.4162, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.1760028589296882, |
|
"grad_norm": 0.5680092573165894, |
|
"learning_rate": 0.00019650593708064133, |
|
"loss": 1.4292, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 0.17689627445725006, |
|
"grad_norm": 0.6288170218467712, |
|
"learning_rate": 0.0001964237366583476, |
|
"loss": 1.4065, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.17778968998481193, |
|
"grad_norm": 0.6214203238487244, |
|
"learning_rate": 0.00019634059813004767, |
|
"loss": 1.3958, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 0.17868310551237382, |
|
"grad_norm": 0.5966234803199768, |
|
"learning_rate": 0.00019625652230459577, |
|
"loss": 1.4245, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.17957652103993568, |
|
"grad_norm": 0.5608589053153992, |
|
"learning_rate": 0.00019617150999996522, |
|
"loss": 1.3759, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 0.18046993656749755, |
|
"grad_norm": 0.6149086952209473, |
|
"learning_rate": 0.00019608556204324016, |
|
"loss": 1.3928, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.1813633520950594, |
|
"grad_norm": 0.5998888611793518, |
|
"learning_rate": 0.00019599867927060788, |
|
"loss": 1.4535, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 0.18225676762262127, |
|
"grad_norm": 0.6067023873329163, |
|
"learning_rate": 0.0001959108625273504, |
|
"loss": 1.4224, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.18315018315018314, |
|
"grad_norm": 0.6208594441413879, |
|
"learning_rate": 0.0001958221126678363, |
|
"loss": 1.3772, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.18404359867774503, |
|
"grad_norm": 0.555916428565979, |
|
"learning_rate": 0.00019573243055551247, |
|
"loss": 1.4087, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.1849370142053069, |
|
"grad_norm": 0.5708112716674805, |
|
"learning_rate": 0.0001956418170628957, |
|
"loss": 1.3879, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 0.18583042973286876, |
|
"grad_norm": 0.6426623463630676, |
|
"learning_rate": 0.0001955502730715642, |
|
"loss": 1.4281, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.18672384526043062, |
|
"grad_norm": 0.6203565001487732, |
|
"learning_rate": 0.0001954577994721489, |
|
"loss": 1.457, |
|
"step": 1045 |
|
}, |
|
{ |
|
"epoch": 0.18761726078799248, |
|
"grad_norm": 0.5595524907112122, |
|
"learning_rate": 0.00019536439716432496, |
|
"loss": 1.3567, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.18851067631555438, |
|
"grad_norm": 0.5930672287940979, |
|
"learning_rate": 0.00019527006705680297, |
|
"loss": 1.3994, |
|
"step": 1055 |
|
}, |
|
{ |
|
"epoch": 0.18940409184311624, |
|
"grad_norm": 0.5650060176849365, |
|
"learning_rate": 0.00019517481006731997, |
|
"loss": 1.4198, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.1902975073706781, |
|
"grad_norm": 0.558860719203949, |
|
"learning_rate": 0.0001950786271226307, |
|
"loss": 1.399, |
|
"step": 1065 |
|
}, |
|
{ |
|
"epoch": 0.19119092289823997, |
|
"grad_norm": 0.6348270177841187, |
|
"learning_rate": 0.00019498151915849855, |
|
"loss": 1.4218, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.19208433842580183, |
|
"grad_norm": 0.63995760679245, |
|
"learning_rate": 0.00019488348711968633, |
|
"loss": 1.3706, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.19297775395336372, |
|
"grad_norm": 0.5952381491661072, |
|
"learning_rate": 0.00019478453195994719, |
|
"loss": 1.4318, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.1938711694809256, |
|
"grad_norm": 0.6112159490585327, |
|
"learning_rate": 0.0001946846546420154, |
|
"loss": 1.3999, |
|
"step": 1085 |
|
}, |
|
{ |
|
"epoch": 0.19476458500848745, |
|
"grad_norm": 0.5743675231933594, |
|
"learning_rate": 0.0001945838561375968, |
|
"loss": 1.4095, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.19565800053604931, |
|
"grad_norm": 0.542060911655426, |
|
"learning_rate": 0.00019448213742735942, |
|
"loss": 1.3946, |
|
"step": 1095 |
|
}, |
|
{ |
|
"epoch": 0.19655141606361118, |
|
"grad_norm": 0.5920471549034119, |
|
"learning_rate": 0.0001943794995009242, |
|
"loss": 1.3708, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.19744483159117304, |
|
"grad_norm": 0.5597534775733948, |
|
"learning_rate": 0.00019427594335685478, |
|
"loss": 1.35, |
|
"step": 1105 |
|
}, |
|
{ |
|
"epoch": 0.19833824711873493, |
|
"grad_norm": 0.531980037689209, |
|
"learning_rate": 0.00019417147000264852, |
|
"loss": 1.3503, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.1992316626462968, |
|
"grad_norm": 0.5867334604263306, |
|
"learning_rate": 0.0001940660804547259, |
|
"loss": 1.3673, |
|
"step": 1115 |
|
}, |
|
{ |
|
"epoch": 0.20012507817385866, |
|
"grad_norm": 0.5700910091400146, |
|
"learning_rate": 0.00019395977573842142, |
|
"loss": 1.3664, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.20101849370142053, |
|
"grad_norm": 0.5407332181930542, |
|
"learning_rate": 0.000193852556887973, |
|
"loss": 1.3677, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.2019119092289824, |
|
"grad_norm": 0.5347940921783447, |
|
"learning_rate": 0.00019374442494651223, |
|
"loss": 1.3819, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.20280532475654428, |
|
"grad_norm": 0.5672542452812195, |
|
"learning_rate": 0.00019363538096605427, |
|
"loss": 1.3838, |
|
"step": 1135 |
|
}, |
|
{ |
|
"epoch": 0.20369874028410614, |
|
"grad_norm": 0.5784164667129517, |
|
"learning_rate": 0.00019352542600748734, |
|
"loss": 1.3848, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.204592155811668, |
|
"grad_norm": 0.5457838177680969, |
|
"learning_rate": 0.00019341456114056263, |
|
"loss": 1.3567, |
|
"step": 1145 |
|
}, |
|
{ |
|
"epoch": 0.20548557133922987, |
|
"grad_norm": 0.6002548336982727, |
|
"learning_rate": 0.00019330278744388385, |
|
"loss": 1.357, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.20637898686679174, |
|
"grad_norm": 0.58780437707901, |
|
"learning_rate": 0.00019319010600489663, |
|
"loss": 1.3618, |
|
"step": 1155 |
|
}, |
|
{ |
|
"epoch": 0.20727240239435363, |
|
"grad_norm": 0.5515660047531128, |
|
"learning_rate": 0.00019307651791987816, |
|
"loss": 1.344, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.2081658179219155, |
|
"grad_norm": 0.5853777527809143, |
|
"learning_rate": 0.00019296202429392622, |
|
"loss": 1.3699, |
|
"step": 1165 |
|
}, |
|
{ |
|
"epoch": 0.20905923344947736, |
|
"grad_norm": 0.558180034160614, |
|
"learning_rate": 0.00019284662624094874, |
|
"loss": 1.325, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.20995264897703922, |
|
"grad_norm": 0.5694774389266968, |
|
"learning_rate": 0.00019273032488365267, |
|
"loss": 1.3608, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 0.21084606450460108, |
|
"grad_norm": 0.5699239373207092, |
|
"learning_rate": 0.00019261312135353332, |
|
"loss": 1.3384, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.21173948003216295, |
|
"grad_norm": 0.5678359270095825, |
|
"learning_rate": 0.0001924950167908632, |
|
"loss": 1.3271, |
|
"step": 1185 |
|
}, |
|
{ |
|
"epoch": 0.21263289555972484, |
|
"grad_norm": 0.560370147228241, |
|
"learning_rate": 0.00019237601234468096, |
|
"loss": 1.3563, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.2135263110872867, |
|
"grad_norm": 0.5844460129737854, |
|
"learning_rate": 0.0001922561091727802, |
|
"loss": 1.3256, |
|
"step": 1195 |
|
}, |
|
{ |
|
"epoch": 0.21441972661484857, |
|
"grad_norm": 0.5936894416809082, |
|
"learning_rate": 0.00019213530844169817, |
|
"loss": 1.3581, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.21531314214241043, |
|
"grad_norm": 0.6150529384613037, |
|
"learning_rate": 0.00019201361132670456, |
|
"loss": 1.3306, |
|
"step": 1205 |
|
}, |
|
{ |
|
"epoch": 0.2162065576699723, |
|
"grad_norm": 0.5778645873069763, |
|
"learning_rate": 0.00019189101901178997, |
|
"loss": 1.381, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.21709997319753419, |
|
"grad_norm": 0.5521702170372009, |
|
"learning_rate": 0.00019176753268965432, |
|
"loss": 1.3191, |
|
"step": 1215 |
|
}, |
|
{ |
|
"epoch": 0.21799338872509605, |
|
"grad_norm": 0.550817608833313, |
|
"learning_rate": 0.00019164315356169536, |
|
"loss": 1.3381, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.2188868042526579, |
|
"grad_norm": 0.6147415041923523, |
|
"learning_rate": 0.00019151788283799698, |
|
"loss": 1.3591, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 0.21978021978021978, |
|
"grad_norm": 0.5430126786231995, |
|
"learning_rate": 0.00019139172173731733, |
|
"loss": 1.344, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.22067363530778164, |
|
"grad_norm": 0.5844172835350037, |
|
"learning_rate": 0.0001912646714870771, |
|
"loss": 1.371, |
|
"step": 1235 |
|
}, |
|
{ |
|
"epoch": 0.22156705083534353, |
|
"grad_norm": 0.5882902145385742, |
|
"learning_rate": 0.0001911367333233474, |
|
"loss": 1.3712, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.2224604663629054, |
|
"grad_norm": 0.5897734761238098, |
|
"learning_rate": 0.00019100790849083804, |
|
"loss": 1.3004, |
|
"step": 1245 |
|
}, |
|
{ |
|
"epoch": 0.22335388189046726, |
|
"grad_norm": 0.554033637046814, |
|
"learning_rate": 0.00019087819824288504, |
|
"loss": 1.3803, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.22424729741802912, |
|
"grad_norm": 0.5678560733795166, |
|
"learning_rate": 0.0001907476038414387, |
|
"loss": 1.3817, |
|
"step": 1255 |
|
}, |
|
{ |
|
"epoch": 0.225140712945591, |
|
"grad_norm": 0.5871914029121399, |
|
"learning_rate": 0.00019061612655705128, |
|
"loss": 1.3456, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.22603412847315285, |
|
"grad_norm": 0.587515115737915, |
|
"learning_rate": 0.00019048376766886448, |
|
"loss": 1.3484, |
|
"step": 1265 |
|
}, |
|
{ |
|
"epoch": 0.22692754400071474, |
|
"grad_norm": 0.5953535437583923, |
|
"learning_rate": 0.00019035052846459727, |
|
"loss": 1.3351, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.2278209595282766, |
|
"grad_norm": 0.5097110271453857, |
|
"learning_rate": 0.00019021641024053308, |
|
"loss": 1.3122, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 0.22871437505583847, |
|
"grad_norm": 0.563342273235321, |
|
"learning_rate": 0.00019008141430150745, |
|
"loss": 1.3345, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.22960779058340033, |
|
"grad_norm": 0.5420668721199036, |
|
"learning_rate": 0.00018994554196089506, |
|
"loss": 1.3379, |
|
"step": 1285 |
|
}, |
|
{ |
|
"epoch": 0.2305012061109622, |
|
"grad_norm": 0.5748648047447205, |
|
"learning_rate": 0.0001898087945405972, |
|
"loss": 1.2791, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.2313946216385241, |
|
"grad_norm": 0.5751796364784241, |
|
"learning_rate": 0.00018967117337102883, |
|
"loss": 1.3039, |
|
"step": 1295 |
|
}, |
|
{ |
|
"epoch": 0.23228803716608595, |
|
"grad_norm": 0.5670933127403259, |
|
"learning_rate": 0.00018953267979110545, |
|
"loss": 1.358, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.23318145269364782, |
|
"grad_norm": 0.6038489937782288, |
|
"learning_rate": 0.0001893933151482304, |
|
"loss": 1.3747, |
|
"step": 1305 |
|
}, |
|
{ |
|
"epoch": 0.23407486822120968, |
|
"grad_norm": 0.567458987236023, |
|
"learning_rate": 0.00018925308079828152, |
|
"loss": 1.3369, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.23496828374877154, |
|
"grad_norm": 0.5277537107467651, |
|
"learning_rate": 0.00018911197810559803, |
|
"loss": 1.3464, |
|
"step": 1315 |
|
}, |
|
{ |
|
"epoch": 0.23586169927633344, |
|
"grad_norm": 0.5678889155387878, |
|
"learning_rate": 0.00018897000844296727, |
|
"loss": 1.3414, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.2367551148038953, |
|
"grad_norm": 0.548362135887146, |
|
"learning_rate": 0.00018882717319161128, |
|
"loss": 1.3431, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 0.23764853033145716, |
|
"grad_norm": 0.5988355278968811, |
|
"learning_rate": 0.00018868347374117344, |
|
"loss": 1.287, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.23854194585901903, |
|
"grad_norm": 0.6163780093193054, |
|
"learning_rate": 0.00018853891148970498, |
|
"loss": 1.3503, |
|
"step": 1335 |
|
}, |
|
{ |
|
"epoch": 0.2394353613865809, |
|
"grad_norm": 0.5658857822418213, |
|
"learning_rate": 0.00018839348784365116, |
|
"loss": 1.3291, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.24032877691414276, |
|
"grad_norm": 0.5733643770217896, |
|
"learning_rate": 0.0001882472042178379, |
|
"loss": 1.3533, |
|
"step": 1345 |
|
}, |
|
{ |
|
"epoch": 0.24122219244170465, |
|
"grad_norm": 0.5487879514694214, |
|
"learning_rate": 0.0001881000620354578, |
|
"loss": 1.3396, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.2421156079692665, |
|
"grad_norm": 0.5239369869232178, |
|
"learning_rate": 0.0001879520627280563, |
|
"loss": 1.3193, |
|
"step": 1355 |
|
}, |
|
{ |
|
"epoch": 0.24300902349682837, |
|
"grad_norm": 0.5878532528877258, |
|
"learning_rate": 0.0001878032077355179, |
|
"loss": 1.3203, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.24390243902439024, |
|
"grad_norm": 0.5400727391242981, |
|
"learning_rate": 0.00018765349850605195, |
|
"loss": 1.3079, |
|
"step": 1365 |
|
}, |
|
{ |
|
"epoch": 0.2447958545519521, |
|
"grad_norm": 0.5751279592514038, |
|
"learning_rate": 0.0001875029364961788, |
|
"loss": 1.2999, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.245689270079514, |
|
"grad_norm": 0.5734187960624695, |
|
"learning_rate": 0.00018735152317071534, |
|
"loss": 1.3283, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 0.24658268560707586, |
|
"grad_norm": 0.5625242590904236, |
|
"learning_rate": 0.00018719926000276106, |
|
"loss": 1.3355, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.24747610113463772, |
|
"grad_norm": 0.5596960186958313, |
|
"learning_rate": 0.0001870461484736834, |
|
"loss": 1.3156, |
|
"step": 1385 |
|
}, |
|
{ |
|
"epoch": 0.24836951666219959, |
|
"grad_norm": 0.5672683119773865, |
|
"learning_rate": 0.00018689219007310369, |
|
"loss": 1.342, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.24926293218976145, |
|
"grad_norm": 0.5605707764625549, |
|
"learning_rate": 0.00018673738629888226, |
|
"loss": 1.3166, |
|
"step": 1395 |
|
}, |
|
{ |
|
"epoch": 0.2501563477173233, |
|
"grad_norm": 0.5812191367149353, |
|
"learning_rate": 0.0001865817386571043, |
|
"loss": 1.3484, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.2510497632448852, |
|
"grad_norm": 0.5138891935348511, |
|
"learning_rate": 0.00018642524866206475, |
|
"loss": 1.3201, |
|
"step": 1405 |
|
}, |
|
{ |
|
"epoch": 0.25194317877244704, |
|
"grad_norm": 0.5659753084182739, |
|
"learning_rate": 0.000186267917836254, |
|
"loss": 1.3117, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.25283659430000893, |
|
"grad_norm": 0.5799670815467834, |
|
"learning_rate": 0.00018610974771034275, |
|
"loss": 1.3161, |
|
"step": 1415 |
|
}, |
|
{ |
|
"epoch": 0.2537300098275708, |
|
"grad_norm": 0.537071943283081, |
|
"learning_rate": 0.00018595073982316732, |
|
"loss": 1.3183, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.25462342535513266, |
|
"grad_norm": 0.5561192631721497, |
|
"learning_rate": 0.00018579089572171454, |
|
"loss": 1.321, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 0.25551684088269455, |
|
"grad_norm": 0.5795418620109558, |
|
"learning_rate": 0.00018563021696110682, |
|
"loss": 1.3301, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.2564102564102564, |
|
"grad_norm": 0.537104070186615, |
|
"learning_rate": 0.0001854687051045869, |
|
"loss": 1.2937, |
|
"step": 1435 |
|
}, |
|
{ |
|
"epoch": 0.2573036719378183, |
|
"grad_norm": 0.6000511050224304, |
|
"learning_rate": 0.00018530636172350287, |
|
"loss": 1.2987, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.25819708746538017, |
|
"grad_norm": 0.5180054903030396, |
|
"learning_rate": 0.00018514318839729242, |
|
"loss": 1.2903, |
|
"step": 1445 |
|
}, |
|
{ |
|
"epoch": 0.259090502992942, |
|
"grad_norm": 0.5556187629699707, |
|
"learning_rate": 0.00018497918671346808, |
|
"loss": 1.3159, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.2599839185205039, |
|
"grad_norm": 0.5588898062705994, |
|
"learning_rate": 0.0001848143582676013, |
|
"loss": 1.295, |
|
"step": 1455 |
|
}, |
|
{ |
|
"epoch": 0.26087733404806573, |
|
"grad_norm": 0.5521529316902161, |
|
"learning_rate": 0.0001846487046633071, |
|
"loss": 1.2851, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.2617707495756276, |
|
"grad_norm": 0.5741902589797974, |
|
"learning_rate": 0.0001844822275122285, |
|
"loss": 1.3095, |
|
"step": 1465 |
|
}, |
|
{ |
|
"epoch": 0.2626641651031895, |
|
"grad_norm": 0.5776512026786804, |
|
"learning_rate": 0.00018431492843402084, |
|
"loss": 1.3194, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.26355758063075135, |
|
"grad_norm": 0.5787376165390015, |
|
"learning_rate": 0.00018414680905633586, |
|
"loss": 1.3132, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 0.26445099615831325, |
|
"grad_norm": 0.551239013671875, |
|
"learning_rate": 0.00018397787101480612, |
|
"loss": 1.2952, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.2653444116858751, |
|
"grad_norm": 0.535438060760498, |
|
"learning_rate": 0.0001838081159530289, |
|
"loss": 1.277, |
|
"step": 1485 |
|
}, |
|
{ |
|
"epoch": 0.266237827213437, |
|
"grad_norm": 0.5279135704040527, |
|
"learning_rate": 0.0001836375455225502, |
|
"loss": 1.3103, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.26713124274099886, |
|
"grad_norm": 0.54332035779953, |
|
"learning_rate": 0.00018346616138284892, |
|
"loss": 1.3535, |
|
"step": 1495 |
|
}, |
|
{ |
|
"epoch": 0.2680246582685607, |
|
"grad_norm": 0.519645631313324, |
|
"learning_rate": 0.0001832939652013203, |
|
"loss": 1.3083, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.2689180737961226, |
|
"grad_norm": 0.5469849705696106, |
|
"learning_rate": 0.00018312095865326012, |
|
"loss": 1.2726, |
|
"step": 1505 |
|
}, |
|
{ |
|
"epoch": 0.26981148932368443, |
|
"grad_norm": 0.5668709874153137, |
|
"learning_rate": 0.0001829471434218481, |
|
"loss": 1.2965, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.2707049048512463, |
|
"grad_norm": 0.5535402894020081, |
|
"learning_rate": 0.00018277252119813176, |
|
"loss": 1.2742, |
|
"step": 1515 |
|
}, |
|
{ |
|
"epoch": 0.2715983203788082, |
|
"grad_norm": 0.520740807056427, |
|
"learning_rate": 0.00018259709368100962, |
|
"loss": 1.2838, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.27249173590637005, |
|
"grad_norm": 0.5902200937271118, |
|
"learning_rate": 0.00018242086257721515, |
|
"loss": 1.278, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 0.27338515143393194, |
|
"grad_norm": 0.5141775608062744, |
|
"learning_rate": 0.00018224382960129972, |
|
"loss": 1.2815, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.2742785669614938, |
|
"grad_norm": 0.5365716218948364, |
|
"learning_rate": 0.00018206599647561627, |
|
"loss": 1.3087, |
|
"step": 1535 |
|
}, |
|
{ |
|
"epoch": 0.27517198248905567, |
|
"grad_norm": 0.5323083400726318, |
|
"learning_rate": 0.00018188736493030222, |
|
"loss": 1.2783, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.2760653980166175, |
|
"grad_norm": 0.5199279189109802, |
|
"learning_rate": 0.00018170793670326292, |
|
"loss": 1.2883, |
|
"step": 1545 |
|
}, |
|
{ |
|
"epoch": 0.2769588135441794, |
|
"grad_norm": 0.5308305025100708, |
|
"learning_rate": 0.0001815277135401546, |
|
"loss": 1.2868, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.2778522290717413, |
|
"grad_norm": 0.5134423971176147, |
|
"learning_rate": 0.00018134669719436749, |
|
"loss": 1.2758, |
|
"step": 1555 |
|
}, |
|
{ |
|
"epoch": 0.2787456445993031, |
|
"grad_norm": 0.5420312881469727, |
|
"learning_rate": 0.00018116488942700857, |
|
"loss": 1.2882, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.279639060126865, |
|
"grad_norm": 0.5505488514900208, |
|
"learning_rate": 0.00018098229200688462, |
|
"loss": 1.3117, |
|
"step": 1565 |
|
}, |
|
{ |
|
"epoch": 0.28053247565442685, |
|
"grad_norm": 0.5319713950157166, |
|
"learning_rate": 0.00018079890671048493, |
|
"loss": 1.2958, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.28142589118198874, |
|
"grad_norm": 0.5935749411582947, |
|
"learning_rate": 0.0001806147353219641, |
|
"loss": 1.2753, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 0.28231930670955063, |
|
"grad_norm": 0.5329294204711914, |
|
"learning_rate": 0.00018042977963312451, |
|
"loss": 1.3191, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.28321272223711247, |
|
"grad_norm": 0.5126677751541138, |
|
"learning_rate": 0.00018024404144339906, |
|
"loss": 1.2924, |
|
"step": 1585 |
|
}, |
|
{ |
|
"epoch": 0.28410613776467436, |
|
"grad_norm": 0.554897129535675, |
|
"learning_rate": 0.00018005752255983355, |
|
"loss": 1.2715, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.2849995532922362, |
|
"grad_norm": 0.5345707535743713, |
|
"learning_rate": 0.00017987022479706922, |
|
"loss": 1.2999, |
|
"step": 1595 |
|
}, |
|
{ |
|
"epoch": 0.2858929688197981, |
|
"grad_norm": 0.5505250096321106, |
|
"learning_rate": 0.000179682149977325, |
|
"loss": 1.3081, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.28678638434736, |
|
"grad_norm": 0.5407662987709045, |
|
"learning_rate": 0.0001794932999303797, |
|
"loss": 1.2921, |
|
"step": 1605 |
|
}, |
|
{ |
|
"epoch": 0.2876797998749218, |
|
"grad_norm": 0.5246217846870422, |
|
"learning_rate": 0.00017930367649355447, |
|
"loss": 1.3035, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.2885732154024837, |
|
"grad_norm": 0.5454705953598022, |
|
"learning_rate": 0.00017911328151169466, |
|
"loss": 1.302, |
|
"step": 1615 |
|
}, |
|
{ |
|
"epoch": 0.28946663093004554, |
|
"grad_norm": 0.571116030216217, |
|
"learning_rate": 0.000178922116837152, |
|
"loss": 1.2515, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.29036004645760743, |
|
"grad_norm": 0.5103644728660583, |
|
"learning_rate": 0.00017873018432976658, |
|
"loss": 1.2696, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 0.2912534619851693, |
|
"grad_norm": 0.5452694296836853, |
|
"learning_rate": 0.00017853748585684867, |
|
"loss": 1.2893, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.29214687751273116, |
|
"grad_norm": 0.5386608242988586, |
|
"learning_rate": 0.0001783440232931607, |
|
"loss": 1.2519, |
|
"step": 1635 |
|
}, |
|
{ |
|
"epoch": 0.29304029304029305, |
|
"grad_norm": 0.5036607384681702, |
|
"learning_rate": 0.0001781497985208989, |
|
"loss": 1.2865, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.2939337085678549, |
|
"grad_norm": 0.4949742555618286, |
|
"learning_rate": 0.00017795481342967501, |
|
"loss": 1.218, |
|
"step": 1645 |
|
}, |
|
{ |
|
"epoch": 0.2948271240954168, |
|
"grad_norm": 0.5089282989501953, |
|
"learning_rate": 0.0001777590699164979, |
|
"loss": 1.2993, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.2957205396229787, |
|
"grad_norm": 0.5262947678565979, |
|
"learning_rate": 0.00017756256988575513, |
|
"loss": 1.2875, |
|
"step": 1655 |
|
}, |
|
{ |
|
"epoch": 0.2966139551505405, |
|
"grad_norm": 0.598348081111908, |
|
"learning_rate": 0.00017736531524919445, |
|
"loss": 1.3016, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.2975073706781024, |
|
"grad_norm": 0.5289075374603271, |
|
"learning_rate": 0.00017716730792590512, |
|
"loss": 1.3164, |
|
"step": 1665 |
|
}, |
|
{ |
|
"epoch": 0.29840078620566424, |
|
"grad_norm": 0.5724149942398071, |
|
"learning_rate": 0.00017696854984229933, |
|
"loss": 1.299, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.29929420173322613, |
|
"grad_norm": 0.5685451626777649, |
|
"learning_rate": 0.00017676904293209336, |
|
"loss": 1.3032, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 0.300187617260788, |
|
"grad_norm": 0.5210792422294617, |
|
"learning_rate": 0.00017656878913628883, |
|
"loss": 1.2844, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.30108103278834986, |
|
"grad_norm": 0.5431951880455017, |
|
"learning_rate": 0.0001763677904031539, |
|
"loss": 1.274, |
|
"step": 1685 |
|
}, |
|
{ |
|
"epoch": 0.30197444831591175, |
|
"grad_norm": 0.551347553730011, |
|
"learning_rate": 0.00017616604868820406, |
|
"loss": 1.3147, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.3028678638434736, |
|
"grad_norm": 0.54395991563797, |
|
"learning_rate": 0.0001759635659541834, |
|
"loss": 1.2935, |
|
"step": 1695 |
|
}, |
|
{ |
|
"epoch": 0.3037612793710355, |
|
"grad_norm": 0.519129753112793, |
|
"learning_rate": 0.0001757603441710453, |
|
"loss": 1.2166, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.3046546948985973, |
|
"grad_norm": 0.5352473258972168, |
|
"learning_rate": 0.0001755563853159334, |
|
"loss": 1.2378, |
|
"step": 1705 |
|
}, |
|
{ |
|
"epoch": 0.3055481104261592, |
|
"grad_norm": 0.5558743476867676, |
|
"learning_rate": 0.00017535169137316227, |
|
"loss": 1.2911, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.3064415259537211, |
|
"grad_norm": 0.5650675892829895, |
|
"learning_rate": 0.0001751462643341982, |
|
"loss": 1.254, |
|
"step": 1715 |
|
}, |
|
{ |
|
"epoch": 0.30733494148128293, |
|
"grad_norm": 0.5582188367843628, |
|
"learning_rate": 0.0001749401061976397, |
|
"loss": 1.246, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.3082283570088448, |
|
"grad_norm": 0.5283738970756531, |
|
"learning_rate": 0.0001747332189691981, |
|
"loss": 1.246, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 0.30912177253640666, |
|
"grad_norm": 0.5312855839729309, |
|
"learning_rate": 0.00017452560466167818, |
|
"loss": 1.2898, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.31001518806396855, |
|
"grad_norm": 0.5232421159744263, |
|
"learning_rate": 0.00017431726529495837, |
|
"loss": 1.282, |
|
"step": 1735 |
|
}, |
|
{ |
|
"epoch": 0.31090860359153044, |
|
"grad_norm": 0.5114389061927795, |
|
"learning_rate": 0.00017410820289597126, |
|
"loss": 1.2814, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.3118020191190923, |
|
"grad_norm": 0.49061334133148193, |
|
"learning_rate": 0.00017389841949868378, |
|
"loss": 1.2584, |
|
"step": 1745 |
|
}, |
|
{ |
|
"epoch": 0.31269543464665417, |
|
"grad_norm": 0.531428873538971, |
|
"learning_rate": 0.0001736879171440774, |
|
"loss": 1.2596, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.313588850174216, |
|
"grad_norm": 0.5237516164779663, |
|
"learning_rate": 0.00017347669788012846, |
|
"loss": 1.2213, |
|
"step": 1755 |
|
}, |
|
{ |
|
"epoch": 0.3144822657017779, |
|
"grad_norm": 0.4916597306728363, |
|
"learning_rate": 0.00017326476376178796, |
|
"loss": 1.2573, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.3153756812293398, |
|
"grad_norm": 0.5719464421272278, |
|
"learning_rate": 0.00017305211685096178, |
|
"loss": 1.2306, |
|
"step": 1765 |
|
}, |
|
{ |
|
"epoch": 0.3162690967569016, |
|
"grad_norm": 0.5756252408027649, |
|
"learning_rate": 0.00017283875921649057, |
|
"loss": 1.2842, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.3171625122844635, |
|
"grad_norm": 0.544870913028717, |
|
"learning_rate": 0.0001726246929341296, |
|
"loss": 1.264, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 0.31805592781202535, |
|
"grad_norm": 0.4986020028591156, |
|
"learning_rate": 0.00017240992008652855, |
|
"loss": 1.2919, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.31894934333958724, |
|
"grad_norm": 0.5383627414703369, |
|
"learning_rate": 0.00017219444276321127, |
|
"loss": 1.2679, |
|
"step": 1785 |
|
}, |
|
{ |
|
"epoch": 0.31984275886714914, |
|
"grad_norm": 0.5600801706314087, |
|
"learning_rate": 0.00017197826306055547, |
|
"loss": 1.2589, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.32073617439471097, |
|
"grad_norm": 0.5386105179786682, |
|
"learning_rate": 0.0001717613830817723, |
|
"loss": 1.2812, |
|
"step": 1795 |
|
}, |
|
{ |
|
"epoch": 0.32162958992227286, |
|
"grad_norm": 0.5082394480705261, |
|
"learning_rate": 0.00017154380493688583, |
|
"loss": 1.2717, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.3225230054498347, |
|
"grad_norm": 0.5480278134346008, |
|
"learning_rate": 0.00017132553074271272, |
|
"loss": 1.2521, |
|
"step": 1805 |
|
}, |
|
{ |
|
"epoch": 0.3234164209773966, |
|
"grad_norm": 0.522881269454956, |
|
"learning_rate": 0.00017110656262284135, |
|
"loss": 1.233, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.3243098365049585, |
|
"grad_norm": 0.5211268663406372, |
|
"learning_rate": 0.0001708869027076114, |
|
"loss": 1.2448, |
|
"step": 1815 |
|
}, |
|
{ |
|
"epoch": 0.3252032520325203, |
|
"grad_norm": 0.5418733954429626, |
|
"learning_rate": 0.00017066655313409295, |
|
"loss": 1.2354, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.3260966675600822, |
|
"grad_norm": 0.524122416973114, |
|
"learning_rate": 0.00017044551604606585, |
|
"loss": 1.2641, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 0.32699008308764405, |
|
"grad_norm": 0.5205156207084656, |
|
"learning_rate": 0.00017022379359399868, |
|
"loss": 1.2312, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.32788349861520594, |
|
"grad_norm": 0.5240566730499268, |
|
"learning_rate": 0.00017000138793502796, |
|
"loss": 1.2603, |
|
"step": 1835 |
|
}, |
|
{ |
|
"epoch": 0.32877691414276783, |
|
"grad_norm": 0.5625032186508179, |
|
"learning_rate": 0.00016977830123293713, |
|
"loss": 1.2308, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.32967032967032966, |
|
"grad_norm": 0.5633267164230347, |
|
"learning_rate": 0.00016955453565813548, |
|
"loss": 1.287, |
|
"step": 1845 |
|
}, |
|
{ |
|
"epoch": 0.33056374519789156, |
|
"grad_norm": 0.5072932243347168, |
|
"learning_rate": 0.0001693300933876371, |
|
"loss": 1.259, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.3314571607254534, |
|
"grad_norm": 0.532630205154419, |
|
"learning_rate": 0.00016910497660503957, |
|
"loss": 1.2352, |
|
"step": 1855 |
|
}, |
|
{ |
|
"epoch": 0.3323505762530153, |
|
"grad_norm": 0.5094119310379028, |
|
"learning_rate": 0.00016887918750050292, |
|
"loss": 1.2672, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.3332439917805771, |
|
"grad_norm": 0.5230539441108704, |
|
"learning_rate": 0.00016865272827072797, |
|
"loss": 1.2506, |
|
"step": 1865 |
|
}, |
|
{ |
|
"epoch": 0.334137407308139, |
|
"grad_norm": 0.4917963445186615, |
|
"learning_rate": 0.00016842560111893543, |
|
"loss": 1.2685, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.3350308228357009, |
|
"grad_norm": 0.5256093144416809, |
|
"learning_rate": 0.00016819780825484413, |
|
"loss": 1.2344, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 0.33592423836326274, |
|
"grad_norm": 0.5623132586479187, |
|
"learning_rate": 0.00016796935189464956, |
|
"loss": 1.246, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.33681765389082463, |
|
"grad_norm": 0.5536134839057922, |
|
"learning_rate": 0.00016774023426100238, |
|
"loss": 1.2428, |
|
"step": 1885 |
|
}, |
|
{ |
|
"epoch": 0.33771106941838647, |
|
"grad_norm": 0.4987887442111969, |
|
"learning_rate": 0.0001675104575829868, |
|
"loss": 1.2414, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.33860448494594836, |
|
"grad_norm": 0.5539235472679138, |
|
"learning_rate": 0.0001672800240960989, |
|
"loss": 1.262, |
|
"step": 1895 |
|
}, |
|
{ |
|
"epoch": 0.33949790047351025, |
|
"grad_norm": 0.5383912920951843, |
|
"learning_rate": 0.00016704893604222476, |
|
"loss": 1.2957, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.3403913160010721, |
|
"grad_norm": 0.5472669005393982, |
|
"learning_rate": 0.00016681719566961887, |
|
"loss": 1.2723, |
|
"step": 1905 |
|
}, |
|
{ |
|
"epoch": 0.341284731528634, |
|
"grad_norm": 0.5573126673698425, |
|
"learning_rate": 0.00016658480523288195, |
|
"loss": 1.214, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.3421781470561958, |
|
"grad_norm": 0.5227344632148743, |
|
"learning_rate": 0.00016635176699293934, |
|
"loss": 1.2613, |
|
"step": 1915 |
|
}, |
|
{ |
|
"epoch": 0.3430715625837577, |
|
"grad_norm": 0.5032819509506226, |
|
"learning_rate": 0.00016611808321701882, |
|
"loss": 1.2575, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.3439649781113196, |
|
"grad_norm": 0.5216634273529053, |
|
"learning_rate": 0.00016588375617862858, |
|
"loss": 1.2253, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 0.34485839363888143, |
|
"grad_norm": 0.5421084761619568, |
|
"learning_rate": 0.000165648788157535, |
|
"loss": 1.2899, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.3457518091664433, |
|
"grad_norm": 0.5299046039581299, |
|
"learning_rate": 0.00016541318143974075, |
|
"loss": 1.263, |
|
"step": 1935 |
|
}, |
|
{ |
|
"epoch": 0.34664522469400516, |
|
"grad_norm": 0.5358394980430603, |
|
"learning_rate": 0.00016517693831746225, |
|
"loss": 1.2614, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.34753864022156705, |
|
"grad_norm": 0.570563018321991, |
|
"learning_rate": 0.00016494006108910757, |
|
"loss": 1.257, |
|
"step": 1945 |
|
}, |
|
{ |
|
"epoch": 0.34843205574912894, |
|
"grad_norm": 0.4944029450416565, |
|
"learning_rate": 0.000164702552059254, |
|
"loss": 1.2376, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.3493254712766908, |
|
"grad_norm": 0.5532763004302979, |
|
"learning_rate": 0.00016446441353862556, |
|
"loss": 1.3048, |
|
"step": 1955 |
|
}, |
|
{ |
|
"epoch": 0.35021888680425267, |
|
"grad_norm": 0.5413596034049988, |
|
"learning_rate": 0.0001642256478440706, |
|
"loss": 1.2578, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.3511123023318145, |
|
"grad_norm": 0.5195985436439514, |
|
"learning_rate": 0.00016398625729853924, |
|
"loss": 1.2549, |
|
"step": 1965 |
|
}, |
|
{ |
|
"epoch": 0.3520057178593764, |
|
"grad_norm": 0.5096262097358704, |
|
"learning_rate": 0.00016374624423106087, |
|
"loss": 1.2538, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.3528991333869383, |
|
"grad_norm": 0.5297495126724243, |
|
"learning_rate": 0.00016350561097672122, |
|
"loss": 1.2156, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 0.3537925489145001, |
|
"grad_norm": 0.523977518081665, |
|
"learning_rate": 0.00016326435987663995, |
|
"loss": 1.2471, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.354685964442062, |
|
"grad_norm": 0.5540353059768677, |
|
"learning_rate": 0.0001630224932779477, |
|
"loss": 1.232, |
|
"step": 1985 |
|
}, |
|
{ |
|
"epoch": 0.35557937996962385, |
|
"grad_norm": 0.5836973190307617, |
|
"learning_rate": 0.00016278001353376323, |
|
"loss": 1.2102, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.35647279549718575, |
|
"grad_norm": 0.52564537525177, |
|
"learning_rate": 0.0001625369230031707, |
|
"loss": 1.2415, |
|
"step": 1995 |
|
}, |
|
{ |
|
"epoch": 0.35736621102474764, |
|
"grad_norm": 0.512956976890564, |
|
"learning_rate": 0.00016229322405119655, |
|
"loss": 1.2297, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.3582596265523095, |
|
"grad_norm": 0.5465174317359924, |
|
"learning_rate": 0.00016204891904878657, |
|
"loss": 1.2394, |
|
"step": 2005 |
|
}, |
|
{ |
|
"epoch": 0.35915304207987137, |
|
"grad_norm": 0.502322793006897, |
|
"learning_rate": 0.0001618040103727827, |
|
"loss": 1.2317, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.3600464576074332, |
|
"grad_norm": 0.5260788202285767, |
|
"learning_rate": 0.00016155850040590016, |
|
"loss": 1.2446, |
|
"step": 2015 |
|
}, |
|
{ |
|
"epoch": 0.3609398731349951, |
|
"grad_norm": 0.5212415456771851, |
|
"learning_rate": 0.0001613123915367041, |
|
"loss": 1.2422, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.36183328866255693, |
|
"grad_norm": 0.5056548714637756, |
|
"learning_rate": 0.00016106568615958632, |
|
"loss": 1.1996, |
|
"step": 2025 |
|
}, |
|
{ |
|
"epoch": 0.3627267041901188, |
|
"grad_norm": 0.52322918176651, |
|
"learning_rate": 0.00016081838667474213, |
|
"loss": 1.2173, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.3636201197176807, |
|
"grad_norm": 0.5279656052589417, |
|
"learning_rate": 0.0001605704954881468, |
|
"loss": 1.2358, |
|
"step": 2035 |
|
}, |
|
{ |
|
"epoch": 0.36451353524524255, |
|
"grad_norm": 0.5513635277748108, |
|
"learning_rate": 0.00016032201501153242, |
|
"loss": 1.2278, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.36540695077280444, |
|
"grad_norm": 0.5040432214736938, |
|
"learning_rate": 0.00016007294766236406, |
|
"loss": 1.2253, |
|
"step": 2045 |
|
}, |
|
{ |
|
"epoch": 0.3663003663003663, |
|
"grad_norm": 0.4764319062232971, |
|
"learning_rate": 0.00015982329586381675, |
|
"loss": 1.2304, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.36719378182792817, |
|
"grad_norm": 0.4786640703678131, |
|
"learning_rate": 0.00015957306204475132, |
|
"loss": 1.2555, |
|
"step": 2055 |
|
}, |
|
{ |
|
"epoch": 0.36808719735549006, |
|
"grad_norm": 0.5668711066246033, |
|
"learning_rate": 0.00015932224863969135, |
|
"loss": 1.2294, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.3689806128830519, |
|
"grad_norm": 0.5136781930923462, |
|
"learning_rate": 0.000159070858088799, |
|
"loss": 1.1752, |
|
"step": 2065 |
|
}, |
|
{ |
|
"epoch": 0.3698740284106138, |
|
"grad_norm": 0.5189619660377502, |
|
"learning_rate": 0.0001588188928378516, |
|
"loss": 1.2351, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.3707674439381756, |
|
"grad_norm": 0.5765734910964966, |
|
"learning_rate": 0.00015856635533821774, |
|
"loss": 1.2, |
|
"step": 2075 |
|
}, |
|
{ |
|
"epoch": 0.3716608594657375, |
|
"grad_norm": 0.5305128693580627, |
|
"learning_rate": 0.00015831324804683328, |
|
"loss": 1.241, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.3725542749932994, |
|
"grad_norm": 0.5297000408172607, |
|
"learning_rate": 0.0001580595734261777, |
|
"loss": 1.1959, |
|
"step": 2085 |
|
}, |
|
{ |
|
"epoch": 0.37344769052086124, |
|
"grad_norm": 0.5325464010238647, |
|
"learning_rate": 0.00015780533394425006, |
|
"loss": 1.2221, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.37434110604842313, |
|
"grad_norm": 0.5105712413787842, |
|
"learning_rate": 0.00015755053207454483, |
|
"loss": 1.2422, |
|
"step": 2095 |
|
}, |
|
{ |
|
"epoch": 0.37523452157598497, |
|
"grad_norm": 0.5255334973335266, |
|
"learning_rate": 0.00015729517029602802, |
|
"loss": 1.2326, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.37612793710354686, |
|
"grad_norm": 0.5344464778900146, |
|
"learning_rate": 0.00015703925109311295, |
|
"loss": 1.2188, |
|
"step": 2105 |
|
}, |
|
{ |
|
"epoch": 0.37702135263110875, |
|
"grad_norm": 0.5103366374969482, |
|
"learning_rate": 0.00015678277695563617, |
|
"loss": 1.2354, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.3779147681586706, |
|
"grad_norm": 0.5386320948600769, |
|
"learning_rate": 0.00015652575037883318, |
|
"loss": 1.1967, |
|
"step": 2115 |
|
}, |
|
{ |
|
"epoch": 0.3788081836862325, |
|
"grad_norm": 0.5370935201644897, |
|
"learning_rate": 0.0001562681738633141, |
|
"loss": 1.2594, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.3797015992137943, |
|
"grad_norm": 0.5193549990653992, |
|
"learning_rate": 0.00015601004991503946, |
|
"loss": 1.2144, |
|
"step": 2125 |
|
}, |
|
{ |
|
"epoch": 0.3805950147413562, |
|
"grad_norm": 0.5521916151046753, |
|
"learning_rate": 0.00015575138104529577, |
|
"loss": 1.2009, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.3814884302689181, |
|
"grad_norm": 0.4977095425128937, |
|
"learning_rate": 0.00015549216977067099, |
|
"loss": 1.2222, |
|
"step": 2135 |
|
}, |
|
{ |
|
"epoch": 0.38238184579647994, |
|
"grad_norm": 0.4930327832698822, |
|
"learning_rate": 0.0001552324186130302, |
|
"loss": 1.2569, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.3832752613240418, |
|
"grad_norm": 0.5264571905136108, |
|
"learning_rate": 0.00015497213009949104, |
|
"loss": 1.2016, |
|
"step": 2145 |
|
}, |
|
{ |
|
"epoch": 0.38416867685160366, |
|
"grad_norm": 0.505418598651886, |
|
"learning_rate": 0.000154711306762399, |
|
"loss": 1.2115, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.38506209237916555, |
|
"grad_norm": 0.5414779186248779, |
|
"learning_rate": 0.00015444995113930287, |
|
"loss": 1.2452, |
|
"step": 2155 |
|
}, |
|
{ |
|
"epoch": 0.38595550790672745, |
|
"grad_norm": 0.534766435623169, |
|
"learning_rate": 0.00015418806577293013, |
|
"loss": 1.2098, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.3868489234342893, |
|
"grad_norm": 0.5061149001121521, |
|
"learning_rate": 0.00015392565321116207, |
|
"loss": 1.2156, |
|
"step": 2165 |
|
}, |
|
{ |
|
"epoch": 0.3877423389618512, |
|
"grad_norm": 0.5177295207977295, |
|
"learning_rate": 0.00015366271600700902, |
|
"loss": 1.2279, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.388635754489413, |
|
"grad_norm": 0.5544948577880859, |
|
"learning_rate": 0.00015339925671858563, |
|
"loss": 1.2138, |
|
"step": 2175 |
|
}, |
|
{ |
|
"epoch": 0.3895291700169749, |
|
"grad_norm": 0.531894862651825, |
|
"learning_rate": 0.0001531352779090859, |
|
"loss": 1.1939, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.39042258554453674, |
|
"grad_norm": 0.5233749151229858, |
|
"learning_rate": 0.00015287078214675819, |
|
"loss": 1.2357, |
|
"step": 2185 |
|
}, |
|
{ |
|
"epoch": 0.39131600107209863, |
|
"grad_norm": 0.5275238156318665, |
|
"learning_rate": 0.00015260577200488034, |
|
"loss": 1.2322, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.3922094165996605, |
|
"grad_norm": 0.5474215745925903, |
|
"learning_rate": 0.00015234025006173452, |
|
"loss": 1.2265, |
|
"step": 2195 |
|
}, |
|
{ |
|
"epoch": 0.39310283212722236, |
|
"grad_norm": 0.5120342373847961, |
|
"learning_rate": 0.00015207421890058237, |
|
"loss": 1.216, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.39399624765478425, |
|
"grad_norm": 0.5480685830116272, |
|
"learning_rate": 0.0001518076811096395, |
|
"loss": 1.2065, |
|
"step": 2205 |
|
}, |
|
{ |
|
"epoch": 0.3948896631823461, |
|
"grad_norm": 0.47449925541877747, |
|
"learning_rate": 0.00015154063928205067, |
|
"loss": 1.1511, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.395783078709908, |
|
"grad_norm": 0.49476173520088196, |
|
"learning_rate": 0.00015127309601586434, |
|
"loss": 1.1736, |
|
"step": 2215 |
|
}, |
|
{ |
|
"epoch": 0.39667649423746987, |
|
"grad_norm": 0.513217568397522, |
|
"learning_rate": 0.0001510050539140075, |
|
"loss": 1.2213, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.3975699097650317, |
|
"grad_norm": 0.5108741521835327, |
|
"learning_rate": 0.00015073651558426026, |
|
"loss": 1.2367, |
|
"step": 2225 |
|
}, |
|
{ |
|
"epoch": 0.3984633252925936, |
|
"grad_norm": 0.5116496682167053, |
|
"learning_rate": 0.00015046748363923057, |
|
"loss": 1.2457, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.39935674082015543, |
|
"grad_norm": 0.5216470956802368, |
|
"learning_rate": 0.00015019796069632878, |
|
"loss": 1.2356, |
|
"step": 2235 |
|
}, |
|
{ |
|
"epoch": 0.4002501563477173, |
|
"grad_norm": 0.5476000308990479, |
|
"learning_rate": 0.00014992794937774211, |
|
"loss": 1.2221, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.4011435718752792, |
|
"grad_norm": 0.5119903683662415, |
|
"learning_rate": 0.00014965745231040919, |
|
"loss": 1.2492, |
|
"step": 2245 |
|
}, |
|
{ |
|
"epoch": 0.40203698740284105, |
|
"grad_norm": 0.5329665541648865, |
|
"learning_rate": 0.00014938647212599452, |
|
"loss": 1.2128, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.40293040293040294, |
|
"grad_norm": 0.5195185542106628, |
|
"learning_rate": 0.00014911501146086281, |
|
"loss": 1.2172, |
|
"step": 2255 |
|
}, |
|
{ |
|
"epoch": 0.4038238184579648, |
|
"grad_norm": 0.4988158345222473, |
|
"learning_rate": 0.00014884307295605343, |
|
"loss": 1.2229, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.40471723398552667, |
|
"grad_norm": 0.5173177719116211, |
|
"learning_rate": 0.00014857065925725452, |
|
"loss": 1.1982, |
|
"step": 2265 |
|
}, |
|
{ |
|
"epoch": 0.40561064951308856, |
|
"grad_norm": 0.5261069536209106, |
|
"learning_rate": 0.0001482977730147776, |
|
"loss": 1.2049, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.4065040650406504, |
|
"grad_norm": 0.5446577072143555, |
|
"learning_rate": 0.00014802441688353127, |
|
"loss": 1.2377, |
|
"step": 2275 |
|
}, |
|
{ |
|
"epoch": 0.4073974805682123, |
|
"grad_norm": 0.5223033428192139, |
|
"learning_rate": 0.00014775059352299598, |
|
"loss": 1.1983, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.4082908960957741, |
|
"grad_norm": 0.5107192397117615, |
|
"learning_rate": 0.00014747630559719762, |
|
"loss": 1.2022, |
|
"step": 2285 |
|
}, |
|
{ |
|
"epoch": 0.409184311623336, |
|
"grad_norm": 0.5348856449127197, |
|
"learning_rate": 0.00014720155577468193, |
|
"loss": 1.1886, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.4100777271508979, |
|
"grad_norm": 0.5229494571685791, |
|
"learning_rate": 0.00014692634672848847, |
|
"loss": 1.2079, |
|
"step": 2295 |
|
}, |
|
{ |
|
"epoch": 0.41097114267845974, |
|
"grad_norm": 0.49456527829170227, |
|
"learning_rate": 0.00014665068113612449, |
|
"loss": 1.2135, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.41186455820602164, |
|
"grad_norm": 0.5290020108222961, |
|
"learning_rate": 0.00014637456167953907, |
|
"loss": 1.2236, |
|
"step": 2305 |
|
}, |
|
{ |
|
"epoch": 0.41275797373358347, |
|
"grad_norm": 0.4855465888977051, |
|
"learning_rate": 0.00014609799104509685, |
|
"loss": 1.2053, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.41365138926114536, |
|
"grad_norm": 0.5055922269821167, |
|
"learning_rate": 0.00014582097192355207, |
|
"loss": 1.1958, |
|
"step": 2315 |
|
}, |
|
{ |
|
"epoch": 0.41454480478870726, |
|
"grad_norm": 0.5334281325340271, |
|
"learning_rate": 0.00014554350701002222, |
|
"loss": 1.1807, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.4154382203162691, |
|
"grad_norm": 0.5472596287727356, |
|
"learning_rate": 0.00014526559900396188, |
|
"loss": 1.1889, |
|
"step": 2325 |
|
}, |
|
{ |
|
"epoch": 0.416331635843831, |
|
"grad_norm": 0.5031034350395203, |
|
"learning_rate": 0.00014498725060913662, |
|
"loss": 1.2334, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 0.4172250513713928, |
|
"grad_norm": 0.5575581789016724, |
|
"learning_rate": 0.00014470846453359636, |
|
"loss": 1.199, |
|
"step": 2335 |
|
}, |
|
{ |
|
"epoch": 0.4181184668989547, |
|
"grad_norm": 0.5209872126579285, |
|
"learning_rate": 0.00014442924348964938, |
|
"loss": 1.2144, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.41901188242651655, |
|
"grad_norm": 0.5038667321205139, |
|
"learning_rate": 0.00014414959019383564, |
|
"loss": 1.2162, |
|
"step": 2345 |
|
}, |
|
{ |
|
"epoch": 0.41990529795407844, |
|
"grad_norm": 0.5263863801956177, |
|
"learning_rate": 0.00014386950736690053, |
|
"loss": 1.2165, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.42079871348164033, |
|
"grad_norm": 0.5653106570243835, |
|
"learning_rate": 0.00014358899773376832, |
|
"loss": 1.1906, |
|
"step": 2355 |
|
}, |
|
{ |
|
"epoch": 0.42169212900920217, |
|
"grad_norm": 0.49144411087036133, |
|
"learning_rate": 0.00014330806402351574, |
|
"loss": 1.1886, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.42258554453676406, |
|
"grad_norm": 0.5124874114990234, |
|
"learning_rate": 0.00014302670896934532, |
|
"loss": 1.2159, |
|
"step": 2365 |
|
}, |
|
{ |
|
"epoch": 0.4234789600643259, |
|
"grad_norm": 0.5377556085586548, |
|
"learning_rate": 0.00014274493530855878, |
|
"loss": 1.2051, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 0.4243723755918878, |
|
"grad_norm": 0.5524542331695557, |
|
"learning_rate": 0.00014246274578253059, |
|
"loss": 1.2096, |
|
"step": 2375 |
|
}, |
|
{ |
|
"epoch": 0.4252657911194497, |
|
"grad_norm": 0.5271286368370056, |
|
"learning_rate": 0.00014218014313668105, |
|
"loss": 1.222, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.4261592066470115, |
|
"grad_norm": 0.5286676287651062, |
|
"learning_rate": 0.00014189713012044977, |
|
"loss": 1.2163, |
|
"step": 2385 |
|
}, |
|
{ |
|
"epoch": 0.4270526221745734, |
|
"grad_norm": 0.5305603742599487, |
|
"learning_rate": 0.00014161370948726894, |
|
"loss": 1.1911, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 0.42794603770213524, |
|
"grad_norm": 0.5002890825271606, |
|
"learning_rate": 0.00014132988399453617, |
|
"loss": 1.242, |
|
"step": 2395 |
|
}, |
|
{ |
|
"epoch": 0.42883945322969713, |
|
"grad_norm": 0.5160467028617859, |
|
"learning_rate": 0.00014104565640358824, |
|
"loss": 1.1924, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.429732868757259, |
|
"grad_norm": 0.5422557592391968, |
|
"learning_rate": 0.0001407610294796738, |
|
"loss": 1.193, |
|
"step": 2405 |
|
}, |
|
{ |
|
"epoch": 0.43062628428482086, |
|
"grad_norm": 0.5061042904853821, |
|
"learning_rate": 0.00014047600599192666, |
|
"loss": 1.1835, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 0.43151969981238275, |
|
"grad_norm": 0.507078230381012, |
|
"learning_rate": 0.0001401905887133387, |
|
"loss": 1.1788, |
|
"step": 2415 |
|
}, |
|
{ |
|
"epoch": 0.4324131153399446, |
|
"grad_norm": 0.5291483402252197, |
|
"learning_rate": 0.00013990478042073313, |
|
"loss": 1.1941, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.4333065308675065, |
|
"grad_norm": 0.5093267560005188, |
|
"learning_rate": 0.00013961858389473727, |
|
"loss": 1.1938, |
|
"step": 2425 |
|
}, |
|
{ |
|
"epoch": 0.43419994639506837, |
|
"grad_norm": 0.5296744108200073, |
|
"learning_rate": 0.0001393320019197555, |
|
"loss": 1.1978, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 0.4350933619226302, |
|
"grad_norm": 0.49533629417419434, |
|
"learning_rate": 0.00013904503728394234, |
|
"loss": 1.2015, |
|
"step": 2435 |
|
}, |
|
{ |
|
"epoch": 0.4359867774501921, |
|
"grad_norm": 0.5509293079376221, |
|
"learning_rate": 0.00013875769277917513, |
|
"loss": 1.1963, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.43688019297775393, |
|
"grad_norm": 0.49859216809272766, |
|
"learning_rate": 0.000138469971201027, |
|
"loss": 1.1748, |
|
"step": 2445 |
|
}, |
|
{ |
|
"epoch": 0.4377736085053158, |
|
"grad_norm": 0.5663615465164185, |
|
"learning_rate": 0.00013818187534873954, |
|
"loss": 1.2001, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.4386670240328777, |
|
"grad_norm": 0.5102608799934387, |
|
"learning_rate": 0.00013789340802519581, |
|
"loss": 1.2063, |
|
"step": 2455 |
|
}, |
|
{ |
|
"epoch": 0.43956043956043955, |
|
"grad_norm": 0.5029070973396301, |
|
"learning_rate": 0.0001376045720368928, |
|
"loss": 1.2071, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.44045385508800144, |
|
"grad_norm": 0.5075359344482422, |
|
"learning_rate": 0.00013731537019391428, |
|
"loss": 1.152, |
|
"step": 2465 |
|
}, |
|
{ |
|
"epoch": 0.4413472706155633, |
|
"grad_norm": 0.5264108777046204, |
|
"learning_rate": 0.00013702580530990335, |
|
"loss": 1.2019, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 0.4422406861431252, |
|
"grad_norm": 0.5055813789367676, |
|
"learning_rate": 0.00013673588020203517, |
|
"loss": 1.1786, |
|
"step": 2475 |
|
}, |
|
{ |
|
"epoch": 0.44313410167068706, |
|
"grad_norm": 0.5127353668212891, |
|
"learning_rate": 0.0001364455976909896, |
|
"loss": 1.1649, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.4440275171982489, |
|
"grad_norm": 0.5117409825325012, |
|
"learning_rate": 0.00013615496060092355, |
|
"loss": 1.1762, |
|
"step": 2485 |
|
}, |
|
{ |
|
"epoch": 0.4449209327258108, |
|
"grad_norm": 0.5126619935035706, |
|
"learning_rate": 0.00013586397175944368, |
|
"loss": 1.2026, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 0.4458143482533726, |
|
"grad_norm": 0.5334925055503845, |
|
"learning_rate": 0.0001355726339975788, |
|
"loss": 1.2209, |
|
"step": 2495 |
|
}, |
|
{ |
|
"epoch": 0.4467077637809345, |
|
"grad_norm": 0.5059501528739929, |
|
"learning_rate": 0.00013528095014975252, |
|
"loss": 1.1809, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.44760117930849636, |
|
"grad_norm": 0.5418409705162048, |
|
"learning_rate": 0.0001349889230537553, |
|
"loss": 1.2321, |
|
"step": 2505 |
|
}, |
|
{ |
|
"epoch": 0.44849459483605825, |
|
"grad_norm": 0.5434291362762451, |
|
"learning_rate": 0.00013469655555071715, |
|
"loss": 1.1511, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 0.44938801036362014, |
|
"grad_norm": 0.523363471031189, |
|
"learning_rate": 0.00013440385048507997, |
|
"loss": 1.2086, |
|
"step": 2515 |
|
}, |
|
{ |
|
"epoch": 0.450281425891182, |
|
"grad_norm": 0.5296551585197449, |
|
"learning_rate": 0.0001341108107045697, |
|
"loss": 1.1834, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.45117484141874387, |
|
"grad_norm": 0.5411925911903381, |
|
"learning_rate": 0.00013381743906016878, |
|
"loss": 1.173, |
|
"step": 2525 |
|
}, |
|
{ |
|
"epoch": 0.4520682569463057, |
|
"grad_norm": 0.524066150188446, |
|
"learning_rate": 0.00013352373840608834, |
|
"loss": 1.2259, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 0.4529616724738676, |
|
"grad_norm": 0.5258145928382874, |
|
"learning_rate": 0.00013322971159974043, |
|
"loss": 1.1724, |
|
"step": 2535 |
|
}, |
|
{ |
|
"epoch": 0.4538550880014295, |
|
"grad_norm": 0.5472233295440674, |
|
"learning_rate": 0.0001329353615017102, |
|
"loss": 1.1523, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.4547485035289913, |
|
"grad_norm": 0.5172765254974365, |
|
"learning_rate": 0.00013264069097572816, |
|
"loss": 1.1904, |
|
"step": 2545 |
|
}, |
|
{ |
|
"epoch": 0.4556419190565532, |
|
"grad_norm": 0.49758392572402954, |
|
"learning_rate": 0.00013234570288864228, |
|
"loss": 1.1737, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.45653533458411505, |
|
"grad_norm": 0.5258619785308838, |
|
"learning_rate": 0.00013205040011039004, |
|
"loss": 1.1746, |
|
"step": 2555 |
|
}, |
|
{ |
|
"epoch": 0.45742875011167694, |
|
"grad_norm": 0.4878065586090088, |
|
"learning_rate": 0.0001317547855139705, |
|
"loss": 1.1987, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.45832216563923883, |
|
"grad_norm": 0.4854235053062439, |
|
"learning_rate": 0.00013145886197541651, |
|
"loss": 1.1605, |
|
"step": 2565 |
|
}, |
|
{ |
|
"epoch": 0.45921558116680067, |
|
"grad_norm": 0.48126521706581116, |
|
"learning_rate": 0.0001311626323737665, |
|
"loss": 1.2038, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 0.46010899669436256, |
|
"grad_norm": 0.5124824643135071, |
|
"learning_rate": 0.00013086609959103672, |
|
"loss": 1.1791, |
|
"step": 2575 |
|
}, |
|
{ |
|
"epoch": 0.4610024122219244, |
|
"grad_norm": 0.5470382571220398, |
|
"learning_rate": 0.00013056926651219293, |
|
"loss": 1.2034, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.4618958277494863, |
|
"grad_norm": 0.5247116684913635, |
|
"learning_rate": 0.00013027213602512258, |
|
"loss": 1.1979, |
|
"step": 2585 |
|
}, |
|
{ |
|
"epoch": 0.4627892432770482, |
|
"grad_norm": 0.5126326680183411, |
|
"learning_rate": 0.00012997471102060647, |
|
"loss": 1.2037, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 0.46368265880461, |
|
"grad_norm": 0.5170397162437439, |
|
"learning_rate": 0.00012967699439229093, |
|
"loss": 1.186, |
|
"step": 2595 |
|
}, |
|
{ |
|
"epoch": 0.4645760743321719, |
|
"grad_norm": 0.5169094800949097, |
|
"learning_rate": 0.00012937898903665935, |
|
"loss": 1.1941, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.46546948985973374, |
|
"grad_norm": 0.537554144859314, |
|
"learning_rate": 0.0001290806978530042, |
|
"loss": 1.1847, |
|
"step": 2605 |
|
}, |
|
{ |
|
"epoch": 0.46636290538729563, |
|
"grad_norm": 0.49902471899986267, |
|
"learning_rate": 0.00012878212374339883, |
|
"loss": 1.1983, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 0.4672563209148575, |
|
"grad_norm": 0.5386156439781189, |
|
"learning_rate": 0.0001284832696126691, |
|
"loss": 1.1829, |
|
"step": 2615 |
|
}, |
|
{ |
|
"epoch": 0.46814973644241936, |
|
"grad_norm": 0.5420889854431152, |
|
"learning_rate": 0.00012818413836836515, |
|
"loss": 1.1821, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.46904315196998125, |
|
"grad_norm": 0.5098642110824585, |
|
"learning_rate": 0.00012788473292073328, |
|
"loss": 1.1982, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 0.4699365674975431, |
|
"grad_norm": 0.4834945499897003, |
|
"learning_rate": 0.00012758505618268743, |
|
"loss": 1.1951, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 0.470829983025105, |
|
"grad_norm": 0.5107752680778503, |
|
"learning_rate": 0.000127285111069781, |
|
"loss": 1.1451, |
|
"step": 2635 |
|
}, |
|
{ |
|
"epoch": 0.4717233985526669, |
|
"grad_norm": 0.5363967418670654, |
|
"learning_rate": 0.00012698490050017824, |
|
"loss": 1.1832, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.4726168140802287, |
|
"grad_norm": 0.469596266746521, |
|
"learning_rate": 0.0001266844273946262, |
|
"loss": 1.1397, |
|
"step": 2645 |
|
}, |
|
{ |
|
"epoch": 0.4735102296077906, |
|
"grad_norm": 0.5100436210632324, |
|
"learning_rate": 0.0001263836946764261, |
|
"loss": 1.1823, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.47440364513535244, |
|
"grad_norm": 0.5279127359390259, |
|
"learning_rate": 0.0001260827052714049, |
|
"loss": 1.1407, |
|
"step": 2655 |
|
}, |
|
{ |
|
"epoch": 0.47529706066291433, |
|
"grad_norm": 0.49943119287490845, |
|
"learning_rate": 0.00012578146210788686, |
|
"loss": 1.16, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.47619047619047616, |
|
"grad_norm": 0.5424295663833618, |
|
"learning_rate": 0.0001254799681166651, |
|
"loss": 1.1971, |
|
"step": 2665 |
|
}, |
|
{ |
|
"epoch": 0.47708389171803806, |
|
"grad_norm": 0.5269612669944763, |
|
"learning_rate": 0.00012517822623097296, |
|
"loss": 1.1883, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 0.47797730724559995, |
|
"grad_norm": 0.4970255196094513, |
|
"learning_rate": 0.0001248762393864556, |
|
"loss": 1.169, |
|
"step": 2675 |
|
}, |
|
{ |
|
"epoch": 0.4788707227731618, |
|
"grad_norm": 0.46272140741348267, |
|
"learning_rate": 0.0001245740105211414, |
|
"loss": 1.1619, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.4797641383007237, |
|
"grad_norm": 0.5113199949264526, |
|
"learning_rate": 0.00012427154257541333, |
|
"loss": 1.183, |
|
"step": 2685 |
|
}, |
|
{ |
|
"epoch": 0.4806575538282855, |
|
"grad_norm": 0.5340253114700317, |
|
"learning_rate": 0.0001239688384919804, |
|
"loss": 1.1534, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 0.4815509693558474, |
|
"grad_norm": 0.5490040183067322, |
|
"learning_rate": 0.00012366590121584895, |
|
"loss": 1.1496, |
|
"step": 2695 |
|
}, |
|
{ |
|
"epoch": 0.4824443848834093, |
|
"grad_norm": 0.5425564646720886, |
|
"learning_rate": 0.0001233627336942941, |
|
"loss": 1.2009, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.48333780041097113, |
|
"grad_norm": 0.4985598027706146, |
|
"learning_rate": 0.00012305933887683102, |
|
"loss": 1.1716, |
|
"step": 2705 |
|
}, |
|
{ |
|
"epoch": 0.484231215938533, |
|
"grad_norm": 0.48820316791534424, |
|
"learning_rate": 0.00012275571971518616, |
|
"loss": 1.189, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 0.48512463146609486, |
|
"grad_norm": 0.5365601181983948, |
|
"learning_rate": 0.00012245187916326878, |
|
"loss": 1.1969, |
|
"step": 2715 |
|
}, |
|
{ |
|
"epoch": 0.48601804699365675, |
|
"grad_norm": 0.5123448371887207, |
|
"learning_rate": 0.00012214782017714185, |
|
"loss": 1.1424, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.48691146252121864, |
|
"grad_norm": 0.5060442686080933, |
|
"learning_rate": 0.00012184354571499365, |
|
"loss": 1.1682, |
|
"step": 2725 |
|
}, |
|
{ |
|
"epoch": 0.4878048780487805, |
|
"grad_norm": 0.5641975998878479, |
|
"learning_rate": 0.00012153905873710878, |
|
"loss": 1.1952, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 0.48869829357634237, |
|
"grad_norm": 0.4496929347515106, |
|
"learning_rate": 0.00012123436220583931, |
|
"loss": 1.1788, |
|
"step": 2735 |
|
}, |
|
{ |
|
"epoch": 0.4895917091039042, |
|
"grad_norm": 0.5671601295471191, |
|
"learning_rate": 0.00012092945908557616, |
|
"loss": 1.1792, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 0.4904851246314661, |
|
"grad_norm": 0.5188336968421936, |
|
"learning_rate": 0.00012062435234272007, |
|
"loss": 1.1921, |
|
"step": 2745 |
|
}, |
|
{ |
|
"epoch": 0.491378540159028, |
|
"grad_norm": 0.5338768362998962, |
|
"learning_rate": 0.00012031904494565296, |
|
"loss": 1.1879, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.4922719556865898, |
|
"grad_norm": 0.5573875308036804, |
|
"learning_rate": 0.00012001353986470878, |
|
"loss": 1.2, |
|
"step": 2755 |
|
}, |
|
{ |
|
"epoch": 0.4931653712141517, |
|
"grad_norm": 0.523671567440033, |
|
"learning_rate": 0.00011970784007214477, |
|
"loss": 1.1887, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.49405878674171355, |
|
"grad_norm": 0.5382546186447144, |
|
"learning_rate": 0.00011940194854211258, |
|
"loss": 1.1676, |
|
"step": 2765 |
|
}, |
|
{ |
|
"epoch": 0.49495220226927544, |
|
"grad_norm": 0.5233120918273926, |
|
"learning_rate": 0.00011909586825062917, |
|
"loss": 1.178, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 0.49584561779683733, |
|
"grad_norm": 0.5520097017288208, |
|
"learning_rate": 0.00011878960217554809, |
|
"loss": 1.1757, |
|
"step": 2775 |
|
}, |
|
{ |
|
"epoch": 0.49673903332439917, |
|
"grad_norm": 0.5060712695121765, |
|
"learning_rate": 0.00011848315329653028, |
|
"loss": 1.169, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 0.49763244885196106, |
|
"grad_norm": 0.5251619815826416, |
|
"learning_rate": 0.0001181765245950152, |
|
"loss": 1.1813, |
|
"step": 2785 |
|
}, |
|
{ |
|
"epoch": 0.4985258643795229, |
|
"grad_norm": 0.48611682653427124, |
|
"learning_rate": 0.00011786971905419179, |
|
"loss": 1.1437, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 0.4994192799070848, |
|
"grad_norm": 0.5000553727149963, |
|
"learning_rate": 0.00011756273965896953, |
|
"loss": 1.1668, |
|
"step": 2795 |
|
}, |
|
{ |
|
"epoch": 0.5003126954346466, |
|
"grad_norm": 0.548265278339386, |
|
"learning_rate": 0.00011725558939594924, |
|
"loss": 1.1859, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.5012061109622086, |
|
"grad_norm": 0.5142680406570435, |
|
"learning_rate": 0.00011694827125339418, |
|
"loss": 1.176, |
|
"step": 2805 |
|
}, |
|
{ |
|
"epoch": 0.5020995264897704, |
|
"grad_norm": 0.5385056138038635, |
|
"learning_rate": 0.00011664078822120084, |
|
"loss": 1.195, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 0.5029929420173322, |
|
"grad_norm": 0.4849853217601776, |
|
"learning_rate": 0.00011633314329086993, |
|
"loss": 1.1613, |
|
"step": 2815 |
|
}, |
|
{ |
|
"epoch": 0.5038863575448941, |
|
"grad_norm": 0.5373867154121399, |
|
"learning_rate": 0.00011602533945547737, |
|
"loss": 1.1389, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 0.504779773072456, |
|
"grad_norm": 0.5037809014320374, |
|
"learning_rate": 0.00011571737970964496, |
|
"loss": 1.1717, |
|
"step": 2825 |
|
}, |
|
{ |
|
"epoch": 0.5056731886000179, |
|
"grad_norm": 0.4938678741455078, |
|
"learning_rate": 0.00011540926704951136, |
|
"loss": 1.1625, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 0.5065666041275797, |
|
"grad_norm": 0.47168177366256714, |
|
"learning_rate": 0.000115101004472703, |
|
"loss": 1.1373, |
|
"step": 2835 |
|
}, |
|
{ |
|
"epoch": 0.5074600196551416, |
|
"grad_norm": 0.4792795479297638, |
|
"learning_rate": 0.00011479259497830472, |
|
"loss": 1.1635, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 0.5083534351827035, |
|
"grad_norm": 0.5276928544044495, |
|
"learning_rate": 0.00011448404156683088, |
|
"loss": 1.1809, |
|
"step": 2845 |
|
}, |
|
{ |
|
"epoch": 0.5092468507102653, |
|
"grad_norm": 0.4980674684047699, |
|
"learning_rate": 0.00011417534724019592, |
|
"loss": 1.1669, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.5101402662378273, |
|
"grad_norm": 0.5144380927085876, |
|
"learning_rate": 0.00011386651500168524, |
|
"loss": 1.1944, |
|
"step": 2855 |
|
}, |
|
{ |
|
"epoch": 0.5110336817653891, |
|
"grad_norm": 0.5162400007247925, |
|
"learning_rate": 0.00011355754785592596, |
|
"loss": 1.1526, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 0.5119270972929509, |
|
"grad_norm": 0.5225709080696106, |
|
"learning_rate": 0.00011324844880885783, |
|
"loss": 1.1731, |
|
"step": 2865 |
|
}, |
|
{ |
|
"epoch": 0.5128205128205128, |
|
"grad_norm": 0.5327043533325195, |
|
"learning_rate": 0.00011293922086770375, |
|
"loss": 1.1483, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 0.5137139283480747, |
|
"grad_norm": 0.60153728723526, |
|
"learning_rate": 0.00011262986704094065, |
|
"loss": 1.1987, |
|
"step": 2875 |
|
}, |
|
{ |
|
"epoch": 0.5146073438756366, |
|
"grad_norm": 0.5078974962234497, |
|
"learning_rate": 0.00011232039033827025, |
|
"loss": 1.15, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.5155007594031984, |
|
"grad_norm": 0.5076168775558472, |
|
"learning_rate": 0.00011201079377058963, |
|
"loss": 1.14, |
|
"step": 2885 |
|
}, |
|
{ |
|
"epoch": 0.5163941749307603, |
|
"grad_norm": 0.5268626809120178, |
|
"learning_rate": 0.00011170108034996218, |
|
"loss": 1.1686, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 0.5172875904583222, |
|
"grad_norm": 0.5654718279838562, |
|
"learning_rate": 0.00011139125308958804, |
|
"loss": 1.1656, |
|
"step": 2895 |
|
}, |
|
{ |
|
"epoch": 0.518181005985884, |
|
"grad_norm": 0.505577027797699, |
|
"learning_rate": 0.00011108131500377494, |
|
"loss": 1.1724, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.5190744215134458, |
|
"grad_norm": 0.50611811876297, |
|
"learning_rate": 0.00011077126910790882, |
|
"loss": 1.1562, |
|
"step": 2905 |
|
}, |
|
{ |
|
"epoch": 0.5199678370410078, |
|
"grad_norm": 0.5131402015686035, |
|
"learning_rate": 0.0001104611184184245, |
|
"loss": 1.1485, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 0.5208612525685696, |
|
"grad_norm": 0.498565137386322, |
|
"learning_rate": 0.00011015086595277633, |
|
"loss": 1.1614, |
|
"step": 2915 |
|
}, |
|
{ |
|
"epoch": 0.5217546680961315, |
|
"grad_norm": 0.4939316511154175, |
|
"learning_rate": 0.00010984051472940885, |
|
"loss": 1.1401, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 0.5226480836236934, |
|
"grad_norm": 0.5283201336860657, |
|
"learning_rate": 0.00010953006776772747, |
|
"loss": 1.1557, |
|
"step": 2925 |
|
}, |
|
{ |
|
"epoch": 0.5235414991512553, |
|
"grad_norm": 0.492447167634964, |
|
"learning_rate": 0.00010921952808806888, |
|
"loss": 1.1519, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 0.5244349146788171, |
|
"grad_norm": 0.5183736681938171, |
|
"learning_rate": 0.00010890889871167203, |
|
"loss": 1.1543, |
|
"step": 2935 |
|
}, |
|
{ |
|
"epoch": 0.525328330206379, |
|
"grad_norm": 0.5172650218009949, |
|
"learning_rate": 0.00010859818266064835, |
|
"loss": 1.1731, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.5262217457339409, |
|
"grad_norm": 0.5232465267181396, |
|
"learning_rate": 0.00010828738295795262, |
|
"loss": 1.1628, |
|
"step": 2945 |
|
}, |
|
{ |
|
"epoch": 0.5271151612615027, |
|
"grad_norm": 0.5208895206451416, |
|
"learning_rate": 0.00010797650262735346, |
|
"loss": 1.1682, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.5280085767890645, |
|
"grad_norm": 0.5442891716957092, |
|
"learning_rate": 0.00010766554469340386, |
|
"loss": 1.1796, |
|
"step": 2955 |
|
}, |
|
{ |
|
"epoch": 0.5289019923166265, |
|
"grad_norm": 0.5013934373855591, |
|
"learning_rate": 0.00010735451218141191, |
|
"loss": 1.1758, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 0.5297954078441883, |
|
"grad_norm": 0.560184895992279, |
|
"learning_rate": 0.0001070434081174112, |
|
"loss": 1.1602, |
|
"step": 2965 |
|
}, |
|
{ |
|
"epoch": 0.5306888233717502, |
|
"grad_norm": 0.4728075861930847, |
|
"learning_rate": 0.00010673223552813147, |
|
"loss": 1.1579, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 0.5315822388993121, |
|
"grad_norm": 0.547065794467926, |
|
"learning_rate": 0.00010642099744096914, |
|
"loss": 1.1606, |
|
"step": 2975 |
|
}, |
|
{ |
|
"epoch": 0.532475654426874, |
|
"grad_norm": 0.5507860779762268, |
|
"learning_rate": 0.00010610969688395782, |
|
"loss": 1.1524, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 0.5333690699544358, |
|
"grad_norm": 0.5366077423095703, |
|
"learning_rate": 0.00010579833688573897, |
|
"loss": 1.1668, |
|
"step": 2985 |
|
}, |
|
{ |
|
"epoch": 0.5342624854819977, |
|
"grad_norm": 0.49021753668785095, |
|
"learning_rate": 0.00010548692047553227, |
|
"loss": 1.1554, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 0.5351559010095596, |
|
"grad_norm": 0.5239299535751343, |
|
"learning_rate": 0.00010517545068310635, |
|
"loss": 1.1501, |
|
"step": 2995 |
|
}, |
|
{ |
|
"epoch": 0.5360493165371214, |
|
"grad_norm": 0.4920014441013336, |
|
"learning_rate": 0.00010486393053874902, |
|
"loss": 1.1705, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.5369427320646832, |
|
"grad_norm": 0.5581287741661072, |
|
"learning_rate": 0.0001045523630732381, |
|
"loss": 1.1742, |
|
"step": 3005 |
|
}, |
|
{ |
|
"epoch": 0.5378361475922452, |
|
"grad_norm": 0.5146217346191406, |
|
"learning_rate": 0.00010424075131781178, |
|
"loss": 1.185, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 0.538729563119807, |
|
"grad_norm": 0.5112408995628357, |
|
"learning_rate": 0.00010392909830413904, |
|
"loss": 1.162, |
|
"step": 3015 |
|
}, |
|
{ |
|
"epoch": 0.5396229786473689, |
|
"grad_norm": 0.526857316493988, |
|
"learning_rate": 0.00010361740706429046, |
|
"loss": 1.164, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 0.5405163941749308, |
|
"grad_norm": 0.5244843363761902, |
|
"learning_rate": 0.00010330568063070832, |
|
"loss": 1.1951, |
|
"step": 3025 |
|
}, |
|
{ |
|
"epoch": 0.5414098097024926, |
|
"grad_norm": 0.5005354881286621, |
|
"learning_rate": 0.00010299392203617744, |
|
"loss": 1.1552, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 0.5423032252300545, |
|
"grad_norm": 0.5191918015480042, |
|
"learning_rate": 0.00010268213431379543, |
|
"loss": 1.1868, |
|
"step": 3035 |
|
}, |
|
{ |
|
"epoch": 0.5431966407576164, |
|
"grad_norm": 0.4955528974533081, |
|
"learning_rate": 0.00010237032049694335, |
|
"loss": 1.1584, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 0.5440900562851783, |
|
"grad_norm": 0.4983779489994049, |
|
"learning_rate": 0.00010205848361925618, |
|
"loss": 1.1532, |
|
"step": 3045 |
|
}, |
|
{ |
|
"epoch": 0.5449834718127401, |
|
"grad_norm": 0.5516743659973145, |
|
"learning_rate": 0.0001017466267145931, |
|
"loss": 1.1702, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.5458768873403019, |
|
"grad_norm": 0.5391404032707214, |
|
"learning_rate": 0.0001014347528170083, |
|
"loss": 1.1573, |
|
"step": 3055 |
|
}, |
|
{ |
|
"epoch": 0.5467703028678639, |
|
"grad_norm": 0.5334770083427429, |
|
"learning_rate": 0.00010112286496072117, |
|
"loss": 1.1847, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 0.5476637183954257, |
|
"grad_norm": 0.5390617251396179, |
|
"learning_rate": 0.00010081096618008699, |
|
"loss": 1.1428, |
|
"step": 3065 |
|
}, |
|
{ |
|
"epoch": 0.5485571339229875, |
|
"grad_norm": 0.5461019277572632, |
|
"learning_rate": 0.00010049905950956728, |
|
"loss": 1.1484, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 0.5494505494505495, |
|
"grad_norm": 0.5262424945831299, |
|
"learning_rate": 0.00010018714798370035, |
|
"loss": 1.1679, |
|
"step": 3075 |
|
}, |
|
{ |
|
"epoch": 0.5503439649781113, |
|
"grad_norm": 0.4764571189880371, |
|
"learning_rate": 9.98752346370717e-05, |
|
"loss": 1.1585, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 0.5512373805056732, |
|
"grad_norm": 0.49840331077575684, |
|
"learning_rate": 9.956332250428457e-05, |
|
"loss": 1.1572, |
|
"step": 3085 |
|
}, |
|
{ |
|
"epoch": 0.552130796033235, |
|
"grad_norm": 0.48860788345336914, |
|
"learning_rate": 9.925141461993043e-05, |
|
"loss": 1.126, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 0.553024211560797, |
|
"grad_norm": 0.5242288708686829, |
|
"learning_rate": 9.893951401855932e-05, |
|
"loss": 1.1558, |
|
"step": 3095 |
|
}, |
|
{ |
|
"epoch": 0.5539176270883588, |
|
"grad_norm": 0.49476343393325806, |
|
"learning_rate": 9.862762373465055e-05, |
|
"loss": 1.1653, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.5548110426159206, |
|
"grad_norm": 0.583516538143158, |
|
"learning_rate": 9.831574680258297e-05, |
|
"loss": 1.1412, |
|
"step": 3105 |
|
}, |
|
{ |
|
"epoch": 0.5557044581434826, |
|
"grad_norm": 0.5565661191940308, |
|
"learning_rate": 9.800388625660553e-05, |
|
"loss": 1.1802, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 0.5565978736710444, |
|
"grad_norm": 0.5394544005393982, |
|
"learning_rate": 9.769204513080775e-05, |
|
"loss": 1.1222, |
|
"step": 3115 |
|
}, |
|
{ |
|
"epoch": 0.5574912891986062, |
|
"grad_norm": 0.5191746354103088, |
|
"learning_rate": 9.738022645909026e-05, |
|
"loss": 1.2078, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 0.5583847047261682, |
|
"grad_norm": 0.5176223516464233, |
|
"learning_rate": 9.706843327513521e-05, |
|
"loss": 1.1232, |
|
"step": 3125 |
|
}, |
|
{ |
|
"epoch": 0.55927812025373, |
|
"grad_norm": 0.512442409992218, |
|
"learning_rate": 9.675666861237677e-05, |
|
"loss": 1.1489, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 0.5601715357812919, |
|
"grad_norm": 0.5076617002487183, |
|
"learning_rate": 9.644493550397168e-05, |
|
"loss": 1.1471, |
|
"step": 3135 |
|
}, |
|
{ |
|
"epoch": 0.5610649513088537, |
|
"grad_norm": 0.5024195909500122, |
|
"learning_rate": 9.61332369827696e-05, |
|
"loss": 1.1461, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 0.5619583668364156, |
|
"grad_norm": 0.5137572884559631, |
|
"learning_rate": 9.582157608128374e-05, |
|
"loss": 1.1283, |
|
"step": 3145 |
|
}, |
|
{ |
|
"epoch": 0.5628517823639775, |
|
"grad_norm": 0.5018545985221863, |
|
"learning_rate": 9.550995583166133e-05, |
|
"loss": 1.1518, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.5637451978915393, |
|
"grad_norm": 0.4772183299064636, |
|
"learning_rate": 9.519837926565409e-05, |
|
"loss": 1.1379, |
|
"step": 3155 |
|
}, |
|
{ |
|
"epoch": 0.5646386134191013, |
|
"grad_norm": 0.5322695970535278, |
|
"learning_rate": 9.488684941458867e-05, |
|
"loss": 1.1748, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 0.5655320289466631, |
|
"grad_norm": 0.5145288109779358, |
|
"learning_rate": 9.45753693093373e-05, |
|
"loss": 1.1556, |
|
"step": 3165 |
|
}, |
|
{ |
|
"epoch": 0.5664254444742249, |
|
"grad_norm": 0.5218089818954468, |
|
"learning_rate": 9.426394198028823e-05, |
|
"loss": 1.1221, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 0.5673188600017869, |
|
"grad_norm": 0.49951624870300293, |
|
"learning_rate": 9.395257045731627e-05, |
|
"loss": 1.1255, |
|
"step": 3175 |
|
}, |
|
{ |
|
"epoch": 0.5682122755293487, |
|
"grad_norm": 0.5349987745285034, |
|
"learning_rate": 9.364125776975318e-05, |
|
"loss": 1.1647, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 0.5691056910569106, |
|
"grad_norm": 0.5180804133415222, |
|
"learning_rate": 9.333000694635849e-05, |
|
"loss": 1.1653, |
|
"step": 3185 |
|
}, |
|
{ |
|
"epoch": 0.5699991065844724, |
|
"grad_norm": 0.49906909465789795, |
|
"learning_rate": 9.30188210152897e-05, |
|
"loss": 1.1138, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 0.5708925221120343, |
|
"grad_norm": 0.5072324872016907, |
|
"learning_rate": 9.270770300407305e-05, |
|
"loss": 1.1542, |
|
"step": 3195 |
|
}, |
|
{ |
|
"epoch": 0.5717859376395962, |
|
"grad_norm": 0.52315354347229, |
|
"learning_rate": 9.239665593957398e-05, |
|
"loss": 1.1425, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.572679353167158, |
|
"grad_norm": 0.5507954359054565, |
|
"learning_rate": 9.208568284796766e-05, |
|
"loss": 1.1462, |
|
"step": 3205 |
|
}, |
|
{ |
|
"epoch": 0.57357276869472, |
|
"grad_norm": 0.5008817911148071, |
|
"learning_rate": 9.177478675470956e-05, |
|
"loss": 1.1307, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 0.5744661842222818, |
|
"grad_norm": 0.47194069623947144, |
|
"learning_rate": 9.146397068450612e-05, |
|
"loss": 1.13, |
|
"step": 3215 |
|
}, |
|
{ |
|
"epoch": 0.5753595997498436, |
|
"grad_norm": 0.5233185887336731, |
|
"learning_rate": 9.11532376612852e-05, |
|
"loss": 1.1596, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 0.5762530152774055, |
|
"grad_norm": 0.5047646164894104, |
|
"learning_rate": 9.084259070816662e-05, |
|
"loss": 1.1768, |
|
"step": 3225 |
|
}, |
|
{ |
|
"epoch": 0.5771464308049674, |
|
"grad_norm": 0.5104073286056519, |
|
"learning_rate": 9.053203284743294e-05, |
|
"loss": 1.1276, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 0.5780398463325293, |
|
"grad_norm": 0.48400700092315674, |
|
"learning_rate": 9.022156710049985e-05, |
|
"loss": 1.1409, |
|
"step": 3235 |
|
}, |
|
{ |
|
"epoch": 0.5789332618600911, |
|
"grad_norm": 0.5238465666770935, |
|
"learning_rate": 8.991119648788696e-05, |
|
"loss": 1.1316, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 0.579826677387653, |
|
"grad_norm": 0.49904054403305054, |
|
"learning_rate": 8.960092402918819e-05, |
|
"loss": 1.1225, |
|
"step": 3245 |
|
}, |
|
{ |
|
"epoch": 0.5807200929152149, |
|
"grad_norm": 0.5016510486602783, |
|
"learning_rate": 8.929075274304267e-05, |
|
"loss": 1.1262, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.5816135084427767, |
|
"grad_norm": 0.5361919403076172, |
|
"learning_rate": 8.898068564710508e-05, |
|
"loss": 1.1453, |
|
"step": 3255 |
|
}, |
|
{ |
|
"epoch": 0.5825069239703387, |
|
"grad_norm": 0.5483645796775818, |
|
"learning_rate": 8.86707257580165e-05, |
|
"loss": 1.1445, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 0.5834003394979005, |
|
"grad_norm": 0.49957215785980225, |
|
"learning_rate": 8.836087609137502e-05, |
|
"loss": 1.1917, |
|
"step": 3265 |
|
}, |
|
{ |
|
"epoch": 0.5842937550254623, |
|
"grad_norm": 0.5269734859466553, |
|
"learning_rate": 8.805113966170635e-05, |
|
"loss": 1.1277, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 0.5851871705530242, |
|
"grad_norm": 0.5271036028862, |
|
"learning_rate": 8.774151948243453e-05, |
|
"loss": 1.1263, |
|
"step": 3275 |
|
}, |
|
{ |
|
"epoch": 0.5860805860805861, |
|
"grad_norm": 0.5119196772575378, |
|
"learning_rate": 8.743201856585256e-05, |
|
"loss": 1.1596, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 0.5869740016081479, |
|
"grad_norm": 0.5136052370071411, |
|
"learning_rate": 8.712263992309318e-05, |
|
"loss": 1.1191, |
|
"step": 3285 |
|
}, |
|
{ |
|
"epoch": 0.5878674171357098, |
|
"grad_norm": 0.500639796257019, |
|
"learning_rate": 8.681338656409951e-05, |
|
"loss": 1.1134, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 0.5887608326632717, |
|
"grad_norm": 0.5073699951171875, |
|
"learning_rate": 8.650426149759578e-05, |
|
"loss": 1.1346, |
|
"step": 3295 |
|
}, |
|
{ |
|
"epoch": 0.5896542481908336, |
|
"grad_norm": 0.5222281813621521, |
|
"learning_rate": 8.619526773105813e-05, |
|
"loss": 1.1575, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.5905476637183954, |
|
"grad_norm": 0.5326797366142273, |
|
"learning_rate": 8.588640827068512e-05, |
|
"loss": 1.1509, |
|
"step": 3305 |
|
}, |
|
{ |
|
"epoch": 0.5914410792459573, |
|
"grad_norm": 0.49078214168548584, |
|
"learning_rate": 8.557768612136882e-05, |
|
"loss": 1.0998, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 0.5923344947735192, |
|
"grad_norm": 0.5205763578414917, |
|
"learning_rate": 8.52691042866653e-05, |
|
"loss": 1.1247, |
|
"step": 3315 |
|
}, |
|
{ |
|
"epoch": 0.593227910301081, |
|
"grad_norm": 0.49795159697532654, |
|
"learning_rate": 8.496066576876556e-05, |
|
"loss": 1.1234, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 0.5941213258286429, |
|
"grad_norm": 0.5376335382461548, |
|
"learning_rate": 8.465237356846624e-05, |
|
"loss": 1.1647, |
|
"step": 3325 |
|
}, |
|
{ |
|
"epoch": 0.5950147413562048, |
|
"grad_norm": 0.5440948009490967, |
|
"learning_rate": 8.434423068514048e-05, |
|
"loss": 1.1377, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 0.5959081568837666, |
|
"grad_norm": 0.5351749062538147, |
|
"learning_rate": 8.403624011670871e-05, |
|
"loss": 1.1429, |
|
"step": 3335 |
|
}, |
|
{ |
|
"epoch": 0.5968015724113285, |
|
"grad_norm": 0.5545377731323242, |
|
"learning_rate": 8.372840485960947e-05, |
|
"loss": 1.1869, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 0.5976949879388904, |
|
"grad_norm": 0.5247533917427063, |
|
"learning_rate": 8.342072790877033e-05, |
|
"loss": 1.138, |
|
"step": 3345 |
|
}, |
|
{ |
|
"epoch": 0.5985884034664523, |
|
"grad_norm": 0.5115054845809937, |
|
"learning_rate": 8.311321225757858e-05, |
|
"loss": 1.1147, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 0.5994818189940141, |
|
"grad_norm": 0.49062785506248474, |
|
"learning_rate": 8.280586089785236e-05, |
|
"loss": 1.1498, |
|
"step": 3355 |
|
}, |
|
{ |
|
"epoch": 0.600375234521576, |
|
"grad_norm": 0.5327452421188354, |
|
"learning_rate": 8.249867681981139e-05, |
|
"loss": 1.113, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 0.6012686500491379, |
|
"grad_norm": 0.5428333878517151, |
|
"learning_rate": 8.219166301204781e-05, |
|
"loss": 1.1632, |
|
"step": 3365 |
|
}, |
|
{ |
|
"epoch": 0.6021620655766997, |
|
"grad_norm": 0.5033100247383118, |
|
"learning_rate": 8.188482246149736e-05, |
|
"loss": 1.1484, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 0.6030554811042615, |
|
"grad_norm": 0.5400857329368591, |
|
"learning_rate": 8.157815815341002e-05, |
|
"loss": 1.1556, |
|
"step": 3375 |
|
}, |
|
{ |
|
"epoch": 0.6039488966318235, |
|
"grad_norm": 0.5341722369194031, |
|
"learning_rate": 8.127167307132119e-05, |
|
"loss": 1.1387, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 0.6048423121593853, |
|
"grad_norm": 0.5450566411018372, |
|
"learning_rate": 8.096537019702255e-05, |
|
"loss": 1.1598, |
|
"step": 3385 |
|
}, |
|
{ |
|
"epoch": 0.6057357276869472, |
|
"grad_norm": 0.496113121509552, |
|
"learning_rate": 8.065925251053307e-05, |
|
"loss": 1.1664, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 0.6066291432145091, |
|
"grad_norm": 0.5090356469154358, |
|
"learning_rate": 8.035332299007014e-05, |
|
"loss": 1.1338, |
|
"step": 3395 |
|
}, |
|
{ |
|
"epoch": 0.607522558742071, |
|
"grad_norm": 0.48691728711128235, |
|
"learning_rate": 8.004758461202023e-05, |
|
"loss": 1.1481, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.6084159742696328, |
|
"grad_norm": 0.4871600270271301, |
|
"learning_rate": 7.974204035091046e-05, |
|
"loss": 1.1227, |
|
"step": 3405 |
|
}, |
|
{ |
|
"epoch": 0.6093093897971946, |
|
"grad_norm": 0.5205426216125488, |
|
"learning_rate": 7.943669317937923e-05, |
|
"loss": 1.1338, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 0.6102028053247566, |
|
"grad_norm": 0.5116109848022461, |
|
"learning_rate": 7.913154606814753e-05, |
|
"loss": 1.1329, |
|
"step": 3415 |
|
}, |
|
{ |
|
"epoch": 0.6110962208523184, |
|
"grad_norm": 0.5520155429840088, |
|
"learning_rate": 7.882660198598993e-05, |
|
"loss": 1.1526, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 0.6119896363798802, |
|
"grad_norm": 0.5377830266952515, |
|
"learning_rate": 7.852186389970571e-05, |
|
"loss": 1.1676, |
|
"step": 3425 |
|
}, |
|
{ |
|
"epoch": 0.6128830519074422, |
|
"grad_norm": 0.5186334252357483, |
|
"learning_rate": 7.821733477409005e-05, |
|
"loss": 1.1049, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 0.613776467435004, |
|
"grad_norm": 0.5580330491065979, |
|
"learning_rate": 7.791301757190516e-05, |
|
"loss": 1.1103, |
|
"step": 3435 |
|
}, |
|
{ |
|
"epoch": 0.6146698829625659, |
|
"grad_norm": 0.5414636135101318, |
|
"learning_rate": 7.760891525385146e-05, |
|
"loss": 1.1243, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 0.6155632984901278, |
|
"grad_norm": 0.494886577129364, |
|
"learning_rate": 7.730503077853873e-05, |
|
"loss": 1.1476, |
|
"step": 3445 |
|
}, |
|
{ |
|
"epoch": 0.6164567140176896, |
|
"grad_norm": 0.4783135950565338, |
|
"learning_rate": 7.700136710245731e-05, |
|
"loss": 1.0874, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.6173501295452515, |
|
"grad_norm": 0.5152401924133301, |
|
"learning_rate": 7.669792717994946e-05, |
|
"loss": 1.1166, |
|
"step": 3455 |
|
}, |
|
{ |
|
"epoch": 0.6182435450728133, |
|
"grad_norm": 0.5359469056129456, |
|
"learning_rate": 7.639471396318057e-05, |
|
"loss": 1.1519, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 0.6191369606003753, |
|
"grad_norm": 0.5105694532394409, |
|
"learning_rate": 7.609173040211035e-05, |
|
"loss": 1.1516, |
|
"step": 3465 |
|
}, |
|
{ |
|
"epoch": 0.6200303761279371, |
|
"grad_norm": 0.5673738718032837, |
|
"learning_rate": 7.578897944446417e-05, |
|
"loss": 1.168, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 0.6209237916554989, |
|
"grad_norm": 0.5316708087921143, |
|
"learning_rate": 7.548646403570449e-05, |
|
"loss": 1.1237, |
|
"step": 3475 |
|
}, |
|
{ |
|
"epoch": 0.6218172071830609, |
|
"grad_norm": 0.5048865079879761, |
|
"learning_rate": 7.518418711900206e-05, |
|
"loss": 1.1546, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 0.6227106227106227, |
|
"grad_norm": 0.5034053921699524, |
|
"learning_rate": 7.488215163520729e-05, |
|
"loss": 1.114, |
|
"step": 3485 |
|
}, |
|
{ |
|
"epoch": 0.6236040382381846, |
|
"grad_norm": 0.5447829365730286, |
|
"learning_rate": 7.458036052282185e-05, |
|
"loss": 1.0924, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 0.6244974537657465, |
|
"grad_norm": 0.5631914138793945, |
|
"learning_rate": 7.427881671796973e-05, |
|
"loss": 1.1281, |
|
"step": 3495 |
|
}, |
|
{ |
|
"epoch": 0.6253908692933083, |
|
"grad_norm": 0.5400162935256958, |
|
"learning_rate": 7.3977523154369e-05, |
|
"loss": 1.1529, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.6262842848208702, |
|
"grad_norm": 0.5325872302055359, |
|
"learning_rate": 7.367648276330305e-05, |
|
"loss": 1.1596, |
|
"step": 3505 |
|
}, |
|
{ |
|
"epoch": 0.627177700348432, |
|
"grad_norm": 0.5262747406959534, |
|
"learning_rate": 7.337569847359226e-05, |
|
"loss": 1.1311, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 0.628071115875994, |
|
"grad_norm": 0.530774712562561, |
|
"learning_rate": 7.307517321156528e-05, |
|
"loss": 1.1147, |
|
"step": 3515 |
|
}, |
|
{ |
|
"epoch": 0.6289645314035558, |
|
"grad_norm": 0.5109870433807373, |
|
"learning_rate": 7.277490990103079e-05, |
|
"loss": 1.132, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 0.6298579469311176, |
|
"grad_norm": 0.5231294631958008, |
|
"learning_rate": 7.247491146324887e-05, |
|
"loss": 1.1106, |
|
"step": 3525 |
|
}, |
|
{ |
|
"epoch": 0.6307513624586796, |
|
"grad_norm": 0.5209240913391113, |
|
"learning_rate": 7.217518081690265e-05, |
|
"loss": 1.1614, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 0.6316447779862414, |
|
"grad_norm": 0.5387827157974243, |
|
"learning_rate": 7.187572087807e-05, |
|
"loss": 1.1656, |
|
"step": 3535 |
|
}, |
|
{ |
|
"epoch": 0.6325381935138032, |
|
"grad_norm": 0.5082910060882568, |
|
"learning_rate": 7.157653456019504e-05, |
|
"loss": 1.1274, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 0.6334316090413651, |
|
"grad_norm": 0.5118305683135986, |
|
"learning_rate": 7.127762477405976e-05, |
|
"loss": 1.1485, |
|
"step": 3545 |
|
}, |
|
{ |
|
"epoch": 0.634325024568927, |
|
"grad_norm": 0.5459609031677246, |
|
"learning_rate": 7.097899442775584e-05, |
|
"loss": 1.1253, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 0.6352184400964889, |
|
"grad_norm": 0.5178401470184326, |
|
"learning_rate": 7.068064642665631e-05, |
|
"loss": 1.1465, |
|
"step": 3555 |
|
}, |
|
{ |
|
"epoch": 0.6361118556240507, |
|
"grad_norm": 0.5017228722572327, |
|
"learning_rate": 7.038258367338723e-05, |
|
"loss": 1.1346, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 0.6370052711516127, |
|
"grad_norm": 0.5513466000556946, |
|
"learning_rate": 7.008480906779948e-05, |
|
"loss": 1.1423, |
|
"step": 3565 |
|
}, |
|
{ |
|
"epoch": 0.6378986866791745, |
|
"grad_norm": 0.49276652932167053, |
|
"learning_rate": 6.97873255069406e-05, |
|
"loss": 1.1522, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 0.6387921022067363, |
|
"grad_norm": 0.5311774015426636, |
|
"learning_rate": 6.949013588502651e-05, |
|
"loss": 1.1291, |
|
"step": 3575 |
|
}, |
|
{ |
|
"epoch": 0.6396855177342983, |
|
"grad_norm": 0.4871313273906708, |
|
"learning_rate": 6.919324309341341e-05, |
|
"loss": 1.1199, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 0.6405789332618601, |
|
"grad_norm": 0.5568504333496094, |
|
"learning_rate": 6.889665002056966e-05, |
|
"loss": 1.1184, |
|
"step": 3585 |
|
}, |
|
{ |
|
"epoch": 0.6414723487894219, |
|
"grad_norm": 0.4711260199546814, |
|
"learning_rate": 6.860035955204767e-05, |
|
"loss": 1.1204, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 0.6423657643169838, |
|
"grad_norm": 0.5173743963241577, |
|
"learning_rate": 6.830437457045568e-05, |
|
"loss": 1.1491, |
|
"step": 3595 |
|
}, |
|
{ |
|
"epoch": 0.6432591798445457, |
|
"grad_norm": 0.5250765085220337, |
|
"learning_rate": 6.800869795543007e-05, |
|
"loss": 1.1086, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.6441525953721076, |
|
"grad_norm": 0.5202131867408752, |
|
"learning_rate": 6.771333258360694e-05, |
|
"loss": 1.1355, |
|
"step": 3605 |
|
}, |
|
{ |
|
"epoch": 0.6450460108996694, |
|
"grad_norm": 0.5153080821037292, |
|
"learning_rate": 6.74182813285944e-05, |
|
"loss": 1.1181, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 0.6459394264272313, |
|
"grad_norm": 0.5040249228477478, |
|
"learning_rate": 6.712354706094452e-05, |
|
"loss": 1.1061, |
|
"step": 3615 |
|
}, |
|
{ |
|
"epoch": 0.6468328419547932, |
|
"grad_norm": 0.511741042137146, |
|
"learning_rate": 6.682913264812533e-05, |
|
"loss": 1.14, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 0.647726257482355, |
|
"grad_norm": 0.5170619487762451, |
|
"learning_rate": 6.653504095449305e-05, |
|
"loss": 1.1019, |
|
"step": 3625 |
|
}, |
|
{ |
|
"epoch": 0.648619673009917, |
|
"grad_norm": 0.5282120704650879, |
|
"learning_rate": 6.624127484126421e-05, |
|
"loss": 1.1406, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 0.6495130885374788, |
|
"grad_norm": 0.5090754628181458, |
|
"learning_rate": 6.594783716648769e-05, |
|
"loss": 1.1396, |
|
"step": 3635 |
|
}, |
|
{ |
|
"epoch": 0.6504065040650406, |
|
"grad_norm": 0.48920685052871704, |
|
"learning_rate": 6.565473078501698e-05, |
|
"loss": 1.1186, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 0.6512999195926025, |
|
"grad_norm": 0.5252284407615662, |
|
"learning_rate": 6.536195854848248e-05, |
|
"loss": 1.0744, |
|
"step": 3645 |
|
}, |
|
{ |
|
"epoch": 0.6521933351201644, |
|
"grad_norm": 0.5125118494033813, |
|
"learning_rate": 6.506952330526373e-05, |
|
"loss": 1.1227, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 0.6530867506477263, |
|
"grad_norm": 0.5054253339767456, |
|
"learning_rate": 6.477742790046152e-05, |
|
"loss": 1.1191, |
|
"step": 3655 |
|
}, |
|
{ |
|
"epoch": 0.6539801661752881, |
|
"grad_norm": 0.5262144804000854, |
|
"learning_rate": 6.448567517587052e-05, |
|
"loss": 1.1542, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 0.65487358170285, |
|
"grad_norm": 0.5490496158599854, |
|
"learning_rate": 6.419426796995137e-05, |
|
"loss": 1.1468, |
|
"step": 3665 |
|
}, |
|
{ |
|
"epoch": 0.6557669972304119, |
|
"grad_norm": 0.5373784303665161, |
|
"learning_rate": 6.390320911780317e-05, |
|
"loss": 1.1393, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 0.6566604127579737, |
|
"grad_norm": 0.5126230716705322, |
|
"learning_rate": 6.36125014511359e-05, |
|
"loss": 1.1385, |
|
"step": 3675 |
|
}, |
|
{ |
|
"epoch": 0.6575538282855357, |
|
"grad_norm": 0.5112528204917908, |
|
"learning_rate": 6.332214779824288e-05, |
|
"loss": 1.1505, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 0.6584472438130975, |
|
"grad_norm": 0.539935827255249, |
|
"learning_rate": 6.303215098397321e-05, |
|
"loss": 1.1044, |
|
"step": 3685 |
|
}, |
|
{ |
|
"epoch": 0.6593406593406593, |
|
"grad_norm": 0.5397330522537231, |
|
"learning_rate": 6.274251382970427e-05, |
|
"loss": 1.1336, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 0.6602340748682212, |
|
"grad_norm": 0.5695128440856934, |
|
"learning_rate": 6.245323915331439e-05, |
|
"loss": 1.1468, |
|
"step": 3695 |
|
}, |
|
{ |
|
"epoch": 0.6611274903957831, |
|
"grad_norm": 0.5581777095794678, |
|
"learning_rate": 6.216432976915527e-05, |
|
"loss": 1.1658, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.662020905923345, |
|
"grad_norm": 0.5400391817092896, |
|
"learning_rate": 6.187578848802475e-05, |
|
"loss": 1.1089, |
|
"step": 3705 |
|
}, |
|
{ |
|
"epoch": 0.6629143214509068, |
|
"grad_norm": 0.5398588180541992, |
|
"learning_rate": 6.15876181171394e-05, |
|
"loss": 1.1412, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 0.6638077369784687, |
|
"grad_norm": 0.5350549817085266, |
|
"learning_rate": 6.129982146010713e-05, |
|
"loss": 1.1489, |
|
"step": 3715 |
|
}, |
|
{ |
|
"epoch": 0.6647011525060306, |
|
"grad_norm": 0.49134427309036255, |
|
"learning_rate": 6.101240131690009e-05, |
|
"loss": 1.1069, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 0.6655945680335924, |
|
"grad_norm": 0.526871383190155, |
|
"learning_rate": 6.072536048382726e-05, |
|
"loss": 1.1264, |
|
"step": 3725 |
|
}, |
|
{ |
|
"epoch": 0.6664879835611542, |
|
"grad_norm": 0.539218008518219, |
|
"learning_rate": 6.043870175350732e-05, |
|
"loss": 1.1582, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 0.6673813990887162, |
|
"grad_norm": 0.5474951267242432, |
|
"learning_rate": 6.0152427914841544e-05, |
|
"loss": 1.112, |
|
"step": 3735 |
|
}, |
|
{ |
|
"epoch": 0.668274814616278, |
|
"grad_norm": 0.49627193808555603, |
|
"learning_rate": 5.9866541752986485e-05, |
|
"loss": 1.1059, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 0.6691682301438399, |
|
"grad_norm": 0.5210728645324707, |
|
"learning_rate": 5.958104604932706e-05, |
|
"loss": 1.1543, |
|
"step": 3745 |
|
}, |
|
{ |
|
"epoch": 0.6700616456714018, |
|
"grad_norm": 0.510959267616272, |
|
"learning_rate": 5.9295943581449385e-05, |
|
"loss": 1.1369, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.6709550611989636, |
|
"grad_norm": 0.5323443412780762, |
|
"learning_rate": 5.901123712311385e-05, |
|
"loss": 1.1186, |
|
"step": 3755 |
|
}, |
|
{ |
|
"epoch": 0.6718484767265255, |
|
"grad_norm": 0.5052646398544312, |
|
"learning_rate": 5.8726929444228016e-05, |
|
"loss": 1.1084, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 0.6727418922540874, |
|
"grad_norm": 0.5128947496414185, |
|
"learning_rate": 5.844302331081972e-05, |
|
"loss": 1.1218, |
|
"step": 3765 |
|
}, |
|
{ |
|
"epoch": 0.6736353077816493, |
|
"grad_norm": 0.48404374718666077, |
|
"learning_rate": 5.8159521485010214e-05, |
|
"loss": 1.1055, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 0.6745287233092111, |
|
"grad_norm": 0.5284962058067322, |
|
"learning_rate": 5.787642672498719e-05, |
|
"loss": 1.1089, |
|
"step": 3775 |
|
}, |
|
{ |
|
"epoch": 0.6754221388367729, |
|
"grad_norm": 0.49954989552497864, |
|
"learning_rate": 5.759374178497801e-05, |
|
"loss": 1.0823, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 0.6763155543643349, |
|
"grad_norm": 0.5159726142883301, |
|
"learning_rate": 5.731146941522292e-05, |
|
"loss": 1.1137, |
|
"step": 3785 |
|
}, |
|
{ |
|
"epoch": 0.6772089698918967, |
|
"grad_norm": 0.5402414202690125, |
|
"learning_rate": 5.702961236194826e-05, |
|
"loss": 1.1067, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 0.6781023854194586, |
|
"grad_norm": 0.5399303436279297, |
|
"learning_rate": 5.674817336733975e-05, |
|
"loss": 1.106, |
|
"step": 3795 |
|
}, |
|
{ |
|
"epoch": 0.6789958009470205, |
|
"grad_norm": 0.557214081287384, |
|
"learning_rate": 5.646715516951584e-05, |
|
"loss": 1.111, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.6798892164745823, |
|
"grad_norm": 0.5638577342033386, |
|
"learning_rate": 5.618656050250099e-05, |
|
"loss": 1.1255, |
|
"step": 3805 |
|
}, |
|
{ |
|
"epoch": 0.6807826320021442, |
|
"grad_norm": 0.5255394577980042, |
|
"learning_rate": 5.5906392096199255e-05, |
|
"loss": 1.0976, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 0.6816760475297061, |
|
"grad_norm": 0.5245288610458374, |
|
"learning_rate": 5.562665267636751e-05, |
|
"loss": 1.1709, |
|
"step": 3815 |
|
}, |
|
{ |
|
"epoch": 0.682569463057268, |
|
"grad_norm": 0.5460274815559387, |
|
"learning_rate": 5.5347344964588996e-05, |
|
"loss": 1.0736, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 0.6834628785848298, |
|
"grad_norm": 0.5235292911529541, |
|
"learning_rate": 5.506847167824696e-05, |
|
"loss": 1.1055, |
|
"step": 3825 |
|
}, |
|
{ |
|
"epoch": 0.6843562941123916, |
|
"grad_norm": 0.5296208262443542, |
|
"learning_rate": 5.479003553049806e-05, |
|
"loss": 1.1081, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 0.6852497096399536, |
|
"grad_norm": 0.48945724964141846, |
|
"learning_rate": 5.4512039230246035e-05, |
|
"loss": 1.0935, |
|
"step": 3835 |
|
}, |
|
{ |
|
"epoch": 0.6861431251675154, |
|
"grad_norm": 0.5146022439002991, |
|
"learning_rate": 5.42344854821154e-05, |
|
"loss": 1.128, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 0.6870365406950772, |
|
"grad_norm": 0.5302464962005615, |
|
"learning_rate": 5.395737698642503e-05, |
|
"loss": 1.1157, |
|
"step": 3845 |
|
}, |
|
{ |
|
"epoch": 0.6879299562226392, |
|
"grad_norm": 0.502776026725769, |
|
"learning_rate": 5.368071643916194e-05, |
|
"loss": 1.1372, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 0.688823371750201, |
|
"grad_norm": 0.47847309708595276, |
|
"learning_rate": 5.3404506531955146e-05, |
|
"loss": 1.0927, |
|
"step": 3855 |
|
}, |
|
{ |
|
"epoch": 0.6897167872777629, |
|
"grad_norm": 0.5378836989402771, |
|
"learning_rate": 5.3128749952049284e-05, |
|
"loss": 1.1137, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 0.6906102028053247, |
|
"grad_norm": 0.5127781629562378, |
|
"learning_rate": 5.2853449382278605e-05, |
|
"loss": 1.1158, |
|
"step": 3865 |
|
}, |
|
{ |
|
"epoch": 0.6915036183328866, |
|
"grad_norm": 0.5223949551582336, |
|
"learning_rate": 5.2578607501040863e-05, |
|
"loss": 1.1043, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 0.6923970338604485, |
|
"grad_norm": 0.5131104588508606, |
|
"learning_rate": 5.2304226982271174e-05, |
|
"loss": 1.1444, |
|
"step": 3875 |
|
}, |
|
{ |
|
"epoch": 0.6932904493880103, |
|
"grad_norm": 0.5290147662162781, |
|
"learning_rate": 5.203031049541621e-05, |
|
"loss": 1.1286, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 0.6941838649155723, |
|
"grad_norm": 0.5196212530136108, |
|
"learning_rate": 5.175686070540786e-05, |
|
"loss": 1.1189, |
|
"step": 3885 |
|
}, |
|
{ |
|
"epoch": 0.6950772804431341, |
|
"grad_norm": 0.517926037311554, |
|
"learning_rate": 5.148388027263769e-05, |
|
"loss": 1.1648, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 0.6959706959706959, |
|
"grad_norm": 0.5101890563964844, |
|
"learning_rate": 5.1211371852930766e-05, |
|
"loss": 1.0711, |
|
"step": 3895 |
|
}, |
|
{ |
|
"epoch": 0.6968641114982579, |
|
"grad_norm": 0.5416128039360046, |
|
"learning_rate": 5.0939338097520095e-05, |
|
"loss": 1.1141, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.6977575270258197, |
|
"grad_norm": 0.5422283411026001, |
|
"learning_rate": 5.0667781653020584e-05, |
|
"loss": 1.0783, |
|
"step": 3905 |
|
}, |
|
{ |
|
"epoch": 0.6986509425533816, |
|
"grad_norm": 0.5199434161186218, |
|
"learning_rate": 5.039670516140338e-05, |
|
"loss": 1.1424, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 0.6995443580809434, |
|
"grad_norm": 0.5065299868583679, |
|
"learning_rate": 5.012611125997018e-05, |
|
"loss": 1.0881, |
|
"step": 3915 |
|
}, |
|
{ |
|
"epoch": 0.7004377736085053, |
|
"grad_norm": 0.49652373790740967, |
|
"learning_rate": 4.9856002581327565e-05, |
|
"loss": 1.1281, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 0.7013311891360672, |
|
"grad_norm": 0.5640977621078491, |
|
"learning_rate": 4.958638175336137e-05, |
|
"loss": 1.1016, |
|
"step": 3925 |
|
}, |
|
{ |
|
"epoch": 0.702224604663629, |
|
"grad_norm": 0.5382977724075317, |
|
"learning_rate": 4.931725139921126e-05, |
|
"loss": 1.1271, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 0.703118020191191, |
|
"grad_norm": 0.5227957963943481, |
|
"learning_rate": 4.9048614137244865e-05, |
|
"loss": 1.0931, |
|
"step": 3935 |
|
}, |
|
{ |
|
"epoch": 0.7040114357187528, |
|
"grad_norm": 0.5395195484161377, |
|
"learning_rate": 4.878047258103267e-05, |
|
"loss": 1.1264, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 0.7049048512463146, |
|
"grad_norm": 0.5258668065071106, |
|
"learning_rate": 4.8512829339322375e-05, |
|
"loss": 1.1525, |
|
"step": 3945 |
|
}, |
|
{ |
|
"epoch": 0.7057982667738766, |
|
"grad_norm": 0.5186535716056824, |
|
"learning_rate": 4.8245687016013696e-05, |
|
"loss": 1.119, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 0.7066916823014384, |
|
"grad_norm": 0.4964883327484131, |
|
"learning_rate": 4.797904821013278e-05, |
|
"loss": 1.1118, |
|
"step": 3955 |
|
}, |
|
{ |
|
"epoch": 0.7075850978290003, |
|
"grad_norm": 0.4993223547935486, |
|
"learning_rate": 4.771291551580712e-05, |
|
"loss": 1.1115, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 0.7084785133565621, |
|
"grad_norm": 0.5137820243835449, |
|
"learning_rate": 4.744729152224024e-05, |
|
"loss": 1.0969, |
|
"step": 3965 |
|
}, |
|
{ |
|
"epoch": 0.709371928884124, |
|
"grad_norm": 0.49753323197364807, |
|
"learning_rate": 4.71821788136865e-05, |
|
"loss": 1.0846, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 0.7102653444116859, |
|
"grad_norm": 0.5026699900627136, |
|
"learning_rate": 4.691757996942607e-05, |
|
"loss": 1.1105, |
|
"step": 3975 |
|
}, |
|
{ |
|
"epoch": 0.7111587599392477, |
|
"grad_norm": 0.5345607995986938, |
|
"learning_rate": 4.665349756373957e-05, |
|
"loss": 1.1079, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 0.7120521754668097, |
|
"grad_norm": 0.5438992977142334, |
|
"learning_rate": 4.6389934165883306e-05, |
|
"loss": 1.1048, |
|
"step": 3985 |
|
}, |
|
{ |
|
"epoch": 0.7129455909943715, |
|
"grad_norm": 0.48879095911979675, |
|
"learning_rate": 4.6126892340064096e-05, |
|
"loss": 1.1043, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 0.7138390065219333, |
|
"grad_norm": 0.5040327310562134, |
|
"learning_rate": 4.586437464541451e-05, |
|
"loss": 1.1026, |
|
"step": 3995 |
|
}, |
|
{ |
|
"epoch": 0.7147324220494953, |
|
"grad_norm": 0.5185865759849548, |
|
"learning_rate": 4.560238363596771e-05, |
|
"loss": 1.0629, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.7156258375770571, |
|
"grad_norm": 0.4688703119754791, |
|
"learning_rate": 4.53409218606328e-05, |
|
"loss": 1.1158, |
|
"step": 4005 |
|
}, |
|
{ |
|
"epoch": 0.716519253104619, |
|
"grad_norm": 0.5067079663276672, |
|
"learning_rate": 4.5079991863169966e-05, |
|
"loss": 1.0912, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 0.7174126686321808, |
|
"grad_norm": 0.5316655039787292, |
|
"learning_rate": 4.481959618216568e-05, |
|
"loss": 1.0901, |
|
"step": 4015 |
|
}, |
|
{ |
|
"epoch": 0.7183060841597427, |
|
"grad_norm": 0.5138748288154602, |
|
"learning_rate": 4.455973735100818e-05, |
|
"loss": 1.1262, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 0.7191994996873046, |
|
"grad_norm": 0.5119633674621582, |
|
"learning_rate": 4.43004178978626e-05, |
|
"loss": 1.125, |
|
"step": 4025 |
|
}, |
|
{ |
|
"epoch": 0.7200929152148664, |
|
"grad_norm": 0.5808505415916443, |
|
"learning_rate": 4.404164034564641e-05, |
|
"loss": 1.1447, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 0.7209863307424283, |
|
"grad_norm": 0.5414739847183228, |
|
"learning_rate": 4.378340721200501e-05, |
|
"loss": 1.1063, |
|
"step": 4035 |
|
}, |
|
{ |
|
"epoch": 0.7218797462699902, |
|
"grad_norm": 0.5619556903839111, |
|
"learning_rate": 4.3525721009287215e-05, |
|
"loss": 1.1123, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 0.722773161797552, |
|
"grad_norm": 0.5197097063064575, |
|
"learning_rate": 4.326858424452063e-05, |
|
"loss": 1.0999, |
|
"step": 4045 |
|
}, |
|
{ |
|
"epoch": 0.7236665773251139, |
|
"grad_norm": 0.5409570336341858, |
|
"learning_rate": 4.301199941938744e-05, |
|
"loss": 1.0848, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 0.7245599928526758, |
|
"grad_norm": 0.580436110496521, |
|
"learning_rate": 4.275596903020001e-05, |
|
"loss": 1.1184, |
|
"step": 4055 |
|
}, |
|
{ |
|
"epoch": 0.7254534083802376, |
|
"grad_norm": 0.5301364660263062, |
|
"learning_rate": 4.250049556787655e-05, |
|
"loss": 1.1118, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 0.7263468239077995, |
|
"grad_norm": 0.5562632083892822, |
|
"learning_rate": 4.2245581517917065e-05, |
|
"loss": 1.0929, |
|
"step": 4065 |
|
}, |
|
{ |
|
"epoch": 0.7272402394353614, |
|
"grad_norm": 0.5643694996833801, |
|
"learning_rate": 4.199122936037889e-05, |
|
"loss": 1.128, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 0.7281336549629233, |
|
"grad_norm": 0.5907604098320007, |
|
"learning_rate": 4.173744156985283e-05, |
|
"loss": 1.1354, |
|
"step": 4075 |
|
}, |
|
{ |
|
"epoch": 0.7290270704904851, |
|
"grad_norm": 0.5513320565223694, |
|
"learning_rate": 4.148422061543884e-05, |
|
"loss": 1.1238, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 0.729920486018047, |
|
"grad_norm": 0.5430623292922974, |
|
"learning_rate": 4.123156896072219e-05, |
|
"loss": 1.0839, |
|
"step": 4085 |
|
}, |
|
{ |
|
"epoch": 0.7308139015456089, |
|
"grad_norm": 0.5164535045623779, |
|
"learning_rate": 4.097948906374951e-05, |
|
"loss": 1.1311, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 0.7317073170731707, |
|
"grad_norm": 0.5333124399185181, |
|
"learning_rate": 4.0727983377004716e-05, |
|
"loss": 1.1088, |
|
"step": 4095 |
|
}, |
|
{ |
|
"epoch": 0.7326007326007326, |
|
"grad_norm": 0.5494021773338318, |
|
"learning_rate": 4.047705434738527e-05, |
|
"loss": 1.0739, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.7334941481282945, |
|
"grad_norm": 0.5382459759712219, |
|
"learning_rate": 4.02267044161783e-05, |
|
"loss": 1.102, |
|
"step": 4105 |
|
}, |
|
{ |
|
"epoch": 0.7343875636558563, |
|
"grad_norm": 0.5144065022468567, |
|
"learning_rate": 3.997693601903688e-05, |
|
"loss": 1.1283, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 0.7352809791834182, |
|
"grad_norm": 0.5298936367034912, |
|
"learning_rate": 3.9727751585956477e-05, |
|
"loss": 1.1328, |
|
"step": 4115 |
|
}, |
|
{ |
|
"epoch": 0.7361743947109801, |
|
"grad_norm": 0.5441993474960327, |
|
"learning_rate": 3.9479153541251056e-05, |
|
"loss": 1.1399, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 0.737067810238542, |
|
"grad_norm": 0.5025814175605774, |
|
"learning_rate": 3.923114430352958e-05, |
|
"loss": 1.126, |
|
"step": 4125 |
|
}, |
|
{ |
|
"epoch": 0.7379612257661038, |
|
"grad_norm": 0.512550950050354, |
|
"learning_rate": 3.8983726285672536e-05, |
|
"loss": 1.1322, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 0.7388546412936657, |
|
"grad_norm": 0.48689234256744385, |
|
"learning_rate": 3.8736901894808575e-05, |
|
"loss": 1.0553, |
|
"step": 4135 |
|
}, |
|
{ |
|
"epoch": 0.7397480568212276, |
|
"grad_norm": 0.5119283199310303, |
|
"learning_rate": 3.849067353229078e-05, |
|
"loss": 1.1224, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 0.7406414723487894, |
|
"grad_norm": 0.5378916263580322, |
|
"learning_rate": 3.824504359367355e-05, |
|
"loss": 1.1229, |
|
"step": 4145 |
|
}, |
|
{ |
|
"epoch": 0.7415348878763512, |
|
"grad_norm": 0.524563193321228, |
|
"learning_rate": 3.80000144686892e-05, |
|
"loss": 1.1329, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 0.7424283034039132, |
|
"grad_norm": 0.5638777017593384, |
|
"learning_rate": 3.775558854122475e-05, |
|
"loss": 1.0796, |
|
"step": 4155 |
|
}, |
|
{ |
|
"epoch": 0.743321718931475, |
|
"grad_norm": 0.5309896469116211, |
|
"learning_rate": 3.7511768189298746e-05, |
|
"loss": 1.088, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 0.7442151344590369, |
|
"grad_norm": 0.5442638397216797, |
|
"learning_rate": 3.726855578503804e-05, |
|
"loss": 1.0958, |
|
"step": 4165 |
|
}, |
|
{ |
|
"epoch": 0.7451085499865988, |
|
"grad_norm": 0.5414631962776184, |
|
"learning_rate": 3.70259536946548e-05, |
|
"loss": 1.103, |
|
"step": 4170 |
|
}, |
|
{ |
|
"epoch": 0.7460019655141606, |
|
"grad_norm": 0.538829505443573, |
|
"learning_rate": 3.678396427842334e-05, |
|
"loss": 1.1218, |
|
"step": 4175 |
|
}, |
|
{ |
|
"epoch": 0.7468953810417225, |
|
"grad_norm": 0.516124963760376, |
|
"learning_rate": 3.6542589890657476e-05, |
|
"loss": 1.0967, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 0.7477887965692843, |
|
"grad_norm": 0.5191131234169006, |
|
"learning_rate": 3.630183287968727e-05, |
|
"loss": 1.1197, |
|
"step": 4185 |
|
}, |
|
{ |
|
"epoch": 0.7486822120968463, |
|
"grad_norm": 0.5277869701385498, |
|
"learning_rate": 3.606169558783635e-05, |
|
"loss": 1.0792, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 0.7495756276244081, |
|
"grad_norm": 0.5471330285072327, |
|
"learning_rate": 3.5822180351399136e-05, |
|
"loss": 1.0985, |
|
"step": 4195 |
|
}, |
|
{ |
|
"epoch": 0.7504690431519699, |
|
"grad_norm": 0.5002536773681641, |
|
"learning_rate": 3.5583289500618e-05, |
|
"loss": 1.0913, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.7513624586795319, |
|
"grad_norm": 0.5431557297706604, |
|
"learning_rate": 3.53450253596608e-05, |
|
"loss": 1.0816, |
|
"step": 4205 |
|
}, |
|
{ |
|
"epoch": 0.7522558742070937, |
|
"grad_norm": 0.5701403617858887, |
|
"learning_rate": 3.510739024659802e-05, |
|
"loss": 1.1358, |
|
"step": 4210 |
|
}, |
|
{ |
|
"epoch": 0.7531492897346556, |
|
"grad_norm": 0.5729346871376038, |
|
"learning_rate": 3.487038647338038e-05, |
|
"loss": 1.0999, |
|
"step": 4215 |
|
}, |
|
{ |
|
"epoch": 0.7540427052622175, |
|
"grad_norm": 0.5427317023277283, |
|
"learning_rate": 3.463401634581631e-05, |
|
"loss": 1.1184, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 0.7549361207897793, |
|
"grad_norm": 0.5459471344947815, |
|
"learning_rate": 3.4398282163549414e-05, |
|
"loss": 1.1179, |
|
"step": 4225 |
|
}, |
|
{ |
|
"epoch": 0.7558295363173412, |
|
"grad_norm": 0.5358048677444458, |
|
"learning_rate": 3.416318622003634e-05, |
|
"loss": 1.0815, |
|
"step": 4230 |
|
}, |
|
{ |
|
"epoch": 0.756722951844903, |
|
"grad_norm": 0.5081732869148254, |
|
"learning_rate": 3.39287308025242e-05, |
|
"loss": 1.1027, |
|
"step": 4235 |
|
}, |
|
{ |
|
"epoch": 0.757616367372465, |
|
"grad_norm": 0.5418072938919067, |
|
"learning_rate": 3.369491819202849e-05, |
|
"loss": 1.112, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 0.7585097829000268, |
|
"grad_norm": 0.5014515519142151, |
|
"learning_rate": 3.34617506633108e-05, |
|
"loss": 1.1101, |
|
"step": 4245 |
|
}, |
|
{ |
|
"epoch": 0.7594031984275886, |
|
"grad_norm": 0.5339967608451843, |
|
"learning_rate": 3.322923048485672e-05, |
|
"loss": 1.0815, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 0.7602966139551506, |
|
"grad_norm": 0.5515729188919067, |
|
"learning_rate": 3.2997359918853845e-05, |
|
"loss": 1.1077, |
|
"step": 4255 |
|
}, |
|
{ |
|
"epoch": 0.7611900294827124, |
|
"grad_norm": 0.5655061602592468, |
|
"learning_rate": 3.276614122116962e-05, |
|
"loss": 1.0921, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 0.7620834450102743, |
|
"grad_norm": 0.5367369055747986, |
|
"learning_rate": 3.2535576641329514e-05, |
|
"loss": 1.091, |
|
"step": 4265 |
|
}, |
|
{ |
|
"epoch": 0.7629768605378362, |
|
"grad_norm": 0.5201234817504883, |
|
"learning_rate": 3.230566842249497e-05, |
|
"loss": 1.0846, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 0.763870276065398, |
|
"grad_norm": 0.5395834445953369, |
|
"learning_rate": 3.2076418801441886e-05, |
|
"loss": 1.0972, |
|
"step": 4275 |
|
}, |
|
{ |
|
"epoch": 0.7647636915929599, |
|
"grad_norm": 0.5469521284103394, |
|
"learning_rate": 3.1847830008538545e-05, |
|
"loss": 1.0966, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 0.7656571071205217, |
|
"grad_norm": 0.5283955931663513, |
|
"learning_rate": 3.1619904267724065e-05, |
|
"loss": 1.084, |
|
"step": 4285 |
|
}, |
|
{ |
|
"epoch": 0.7665505226480837, |
|
"grad_norm": 0.5325857996940613, |
|
"learning_rate": 3.139264379648671e-05, |
|
"loss": 1.0797, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 0.7674439381756455, |
|
"grad_norm": 0.5412785410881042, |
|
"learning_rate": 3.116605080584235e-05, |
|
"loss": 1.0911, |
|
"step": 4295 |
|
}, |
|
{ |
|
"epoch": 0.7683373537032073, |
|
"grad_norm": 0.5118827819824219, |
|
"learning_rate": 3.0940127500313e-05, |
|
"loss": 1.1005, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.7692307692307693, |
|
"grad_norm": 0.5167849063873291, |
|
"learning_rate": 3.071487607790524e-05, |
|
"loss": 1.143, |
|
"step": 4305 |
|
}, |
|
{ |
|
"epoch": 0.7701241847583311, |
|
"grad_norm": 0.5016911625862122, |
|
"learning_rate": 3.049029873008893e-05, |
|
"loss": 1.0874, |
|
"step": 4310 |
|
}, |
|
{ |
|
"epoch": 0.771017600285893, |
|
"grad_norm": 0.5407574772834778, |
|
"learning_rate": 3.0266397641775835e-05, |
|
"loss": 1.106, |
|
"step": 4315 |
|
}, |
|
{ |
|
"epoch": 0.7719110158134549, |
|
"grad_norm": 0.5007250905036926, |
|
"learning_rate": 3.004317499129845e-05, |
|
"loss": 1.1162, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 0.7728044313410167, |
|
"grad_norm": 0.5197978019714355, |
|
"learning_rate": 2.9820632950388695e-05, |
|
"loss": 1.0865, |
|
"step": 4325 |
|
}, |
|
{ |
|
"epoch": 0.7736978468685786, |
|
"grad_norm": 0.5517399311065674, |
|
"learning_rate": 2.9598773684156878e-05, |
|
"loss": 1.1008, |
|
"step": 4330 |
|
}, |
|
{ |
|
"epoch": 0.7745912623961404, |
|
"grad_norm": 0.5117592215538025, |
|
"learning_rate": 2.9377599351070595e-05, |
|
"loss": 1.1244, |
|
"step": 4335 |
|
}, |
|
{ |
|
"epoch": 0.7754846779237023, |
|
"grad_norm": 0.5437847971916199, |
|
"learning_rate": 2.915711210293367e-05, |
|
"loss": 1.1578, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 0.7763780934512642, |
|
"grad_norm": 0.5368949770927429, |
|
"learning_rate": 2.8937314084865407e-05, |
|
"loss": 1.1185, |
|
"step": 4345 |
|
}, |
|
{ |
|
"epoch": 0.777271508978826, |
|
"grad_norm": 0.5332816243171692, |
|
"learning_rate": 2.8718207435279486e-05, |
|
"loss": 1.1008, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 0.778164924506388, |
|
"grad_norm": 0.5594480633735657, |
|
"learning_rate": 2.849979428586331e-05, |
|
"loss": 1.0855, |
|
"step": 4355 |
|
}, |
|
{ |
|
"epoch": 0.7790583400339498, |
|
"grad_norm": 0.5220233201980591, |
|
"learning_rate": 2.828207676155722e-05, |
|
"loss": 1.0946, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 0.7799517555615116, |
|
"grad_norm": 0.5092322826385498, |
|
"learning_rate": 2.80650569805338e-05, |
|
"loss": 1.0925, |
|
"step": 4365 |
|
}, |
|
{ |
|
"epoch": 0.7808451710890735, |
|
"grad_norm": 0.5742801427841187, |
|
"learning_rate": 2.784873705417731e-05, |
|
"loss": 1.0748, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 0.7817385866166354, |
|
"grad_norm": 0.5175203680992126, |
|
"learning_rate": 2.7633119087063152e-05, |
|
"loss": 1.1221, |
|
"step": 4375 |
|
}, |
|
{ |
|
"epoch": 0.7826320021441973, |
|
"grad_norm": 0.5183749794960022, |
|
"learning_rate": 2.7418205176937327e-05, |
|
"loss": 1.1412, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 0.7835254176717591, |
|
"grad_norm": 0.5045965909957886, |
|
"learning_rate": 2.7203997414696104e-05, |
|
"loss": 1.1163, |
|
"step": 4385 |
|
}, |
|
{ |
|
"epoch": 0.784418833199321, |
|
"grad_norm": 0.5237119793891907, |
|
"learning_rate": 2.6990497884365586e-05, |
|
"loss": 1.0593, |
|
"step": 4390 |
|
}, |
|
{ |
|
"epoch": 0.7853122487268829, |
|
"grad_norm": 0.5403294563293457, |
|
"learning_rate": 2.67777086630816e-05, |
|
"loss": 1.1331, |
|
"step": 4395 |
|
}, |
|
{ |
|
"epoch": 0.7862056642544447, |
|
"grad_norm": 0.5277557373046875, |
|
"learning_rate": 2.6565631821069304e-05, |
|
"loss": 1.0992, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.7870990797820067, |
|
"grad_norm": 0.5190049409866333, |
|
"learning_rate": 2.6354269421623112e-05, |
|
"loss": 1.131, |
|
"step": 4405 |
|
}, |
|
{ |
|
"epoch": 0.7879924953095685, |
|
"grad_norm": 0.4882740080356598, |
|
"learning_rate": 2.6143623521086647e-05, |
|
"loss": 1.0654, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 0.7888859108371303, |
|
"grad_norm": 0.5139947533607483, |
|
"learning_rate": 2.5933696168832743e-05, |
|
"loss": 1.0843, |
|
"step": 4415 |
|
}, |
|
{ |
|
"epoch": 0.7897793263646922, |
|
"grad_norm": 0.48566868901252747, |
|
"learning_rate": 2.5724489407243447e-05, |
|
"loss": 1.1022, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 0.7906727418922541, |
|
"grad_norm": 0.5145934224128723, |
|
"learning_rate": 2.5516005271690203e-05, |
|
"loss": 1.1237, |
|
"step": 4425 |
|
}, |
|
{ |
|
"epoch": 0.791566157419816, |
|
"grad_norm": 0.51795893907547, |
|
"learning_rate": 2.530824579051403e-05, |
|
"loss": 1.0836, |
|
"step": 4430 |
|
}, |
|
{ |
|
"epoch": 0.7924595729473778, |
|
"grad_norm": 0.5562942624092102, |
|
"learning_rate": 2.510121298500573e-05, |
|
"loss": 1.1093, |
|
"step": 4435 |
|
}, |
|
{ |
|
"epoch": 0.7933529884749397, |
|
"grad_norm": 0.5125170350074768, |
|
"learning_rate": 2.4894908869386424e-05, |
|
"loss": 1.1064, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 0.7942464040025016, |
|
"grad_norm": 0.5201259851455688, |
|
"learning_rate": 2.4689335450787675e-05, |
|
"loss": 1.1108, |
|
"step": 4445 |
|
}, |
|
{ |
|
"epoch": 0.7951398195300634, |
|
"grad_norm": 0.5387787222862244, |
|
"learning_rate": 2.4484494729232155e-05, |
|
"loss": 1.0988, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 0.7960332350576254, |
|
"grad_norm": 0.5109546184539795, |
|
"learning_rate": 2.428038869761412e-05, |
|
"loss": 1.0924, |
|
"step": 4455 |
|
}, |
|
{ |
|
"epoch": 0.7969266505851872, |
|
"grad_norm": 0.524856448173523, |
|
"learning_rate": 2.4077019341680042e-05, |
|
"loss": 1.0735, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 0.797820066112749, |
|
"grad_norm": 0.5550493001937866, |
|
"learning_rate": 2.387438864000926e-05, |
|
"loss": 1.1041, |
|
"step": 4465 |
|
}, |
|
{ |
|
"epoch": 0.7987134816403109, |
|
"grad_norm": 0.49559786915779114, |
|
"learning_rate": 2.3672498563994762e-05, |
|
"loss": 1.1026, |
|
"step": 4470 |
|
}, |
|
{ |
|
"epoch": 0.7996068971678728, |
|
"grad_norm": 0.559374213218689, |
|
"learning_rate": 2.3471351077824e-05, |
|
"loss": 1.1167, |
|
"step": 4475 |
|
}, |
|
{ |
|
"epoch": 0.8005003126954346, |
|
"grad_norm": 0.5809065699577332, |
|
"learning_rate": 2.3270948138459735e-05, |
|
"loss": 1.1228, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 0.8013937282229965, |
|
"grad_norm": 0.5337952971458435, |
|
"learning_rate": 2.3071291695621135e-05, |
|
"loss": 1.0787, |
|
"step": 4485 |
|
}, |
|
{ |
|
"epoch": 0.8022871437505584, |
|
"grad_norm": 0.5353108048439026, |
|
"learning_rate": 2.2872383691764586e-05, |
|
"loss": 1.0598, |
|
"step": 4490 |
|
}, |
|
{ |
|
"epoch": 0.8031805592781203, |
|
"grad_norm": 0.5580396056175232, |
|
"learning_rate": 2.2674226062064996e-05, |
|
"loss": 1.0763, |
|
"step": 4495 |
|
}, |
|
{ |
|
"epoch": 0.8040739748056821, |
|
"grad_norm": 0.5546059608459473, |
|
"learning_rate": 2.2476820734396843e-05, |
|
"loss": 1.067, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.8049673903332439, |
|
"grad_norm": 0.5651270151138306, |
|
"learning_rate": 2.2280169629315484e-05, |
|
"loss": 1.134, |
|
"step": 4505 |
|
}, |
|
{ |
|
"epoch": 0.8058608058608059, |
|
"grad_norm": 0.5143851637840271, |
|
"learning_rate": 2.208427466003844e-05, |
|
"loss": 1.0975, |
|
"step": 4510 |
|
}, |
|
{ |
|
"epoch": 0.8067542213883677, |
|
"grad_norm": 0.5212395191192627, |
|
"learning_rate": 2.1889137732426802e-05, |
|
"loss": 1.1308, |
|
"step": 4515 |
|
}, |
|
{ |
|
"epoch": 0.8076476369159296, |
|
"grad_norm": 0.5373924374580383, |
|
"learning_rate": 2.1694760744966668e-05, |
|
"loss": 1.134, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 0.8085410524434915, |
|
"grad_norm": 0.5924869179725647, |
|
"learning_rate": 2.1501145588750694e-05, |
|
"loss": 1.0626, |
|
"step": 4525 |
|
}, |
|
{ |
|
"epoch": 0.8094344679710533, |
|
"grad_norm": 0.5431603193283081, |
|
"learning_rate": 2.1308294147459628e-05, |
|
"loss": 1.1114, |
|
"step": 4530 |
|
}, |
|
{ |
|
"epoch": 0.8103278834986152, |
|
"grad_norm": 0.548326849937439, |
|
"learning_rate": 2.1116208297344155e-05, |
|
"loss": 1.1051, |
|
"step": 4535 |
|
}, |
|
{ |
|
"epoch": 0.8112212990261771, |
|
"grad_norm": 0.5223492383956909, |
|
"learning_rate": 2.0924889907206425e-05, |
|
"loss": 1.071, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 0.812114714553739, |
|
"grad_norm": 0.5413901805877686, |
|
"learning_rate": 2.0734340838382015e-05, |
|
"loss": 1.0917, |
|
"step": 4545 |
|
}, |
|
{ |
|
"epoch": 0.8130081300813008, |
|
"grad_norm": 0.5205683708190918, |
|
"learning_rate": 2.0544562944721778e-05, |
|
"loss": 1.0875, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 0.8139015456088626, |
|
"grad_norm": 0.6497357487678528, |
|
"learning_rate": 2.0355558072573787e-05, |
|
"loss": 1.1216, |
|
"step": 4555 |
|
}, |
|
{ |
|
"epoch": 0.8147949611364246, |
|
"grad_norm": 0.5288578867912292, |
|
"learning_rate": 2.016732806076542e-05, |
|
"loss": 1.0919, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 0.8156883766639864, |
|
"grad_norm": 0.5458259582519531, |
|
"learning_rate": 1.9979874740585426e-05, |
|
"loss": 1.1187, |
|
"step": 4565 |
|
}, |
|
{ |
|
"epoch": 0.8165817921915483, |
|
"grad_norm": 0.5482889413833618, |
|
"learning_rate": 1.979319993576614e-05, |
|
"loss": 1.083, |
|
"step": 4570 |
|
}, |
|
{ |
|
"epoch": 0.8174752077191102, |
|
"grad_norm": 0.535325288772583, |
|
"learning_rate": 1.9607305462465686e-05, |
|
"loss": 1.0897, |
|
"step": 4575 |
|
}, |
|
{ |
|
"epoch": 0.818368623246672, |
|
"grad_norm": 0.5331825017929077, |
|
"learning_rate": 1.942219312925042e-05, |
|
"loss": 1.1001, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 0.8192620387742339, |
|
"grad_norm": 0.541584312915802, |
|
"learning_rate": 1.9237864737077204e-05, |
|
"loss": 1.07, |
|
"step": 4585 |
|
}, |
|
{ |
|
"epoch": 0.8201554543017958, |
|
"grad_norm": 0.49262598156929016, |
|
"learning_rate": 1.9054322079275953e-05, |
|
"loss": 1.1154, |
|
"step": 4590 |
|
}, |
|
{ |
|
"epoch": 0.8210488698293577, |
|
"grad_norm": 0.510992705821991, |
|
"learning_rate": 1.8871566941532182e-05, |
|
"loss": 1.0934, |
|
"step": 4595 |
|
}, |
|
{ |
|
"epoch": 0.8219422853569195, |
|
"grad_norm": 0.47639334201812744, |
|
"learning_rate": 1.8689601101869604e-05, |
|
"loss": 1.0779, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.8228357008844813, |
|
"grad_norm": 0.5555627346038818, |
|
"learning_rate": 1.8508426330632933e-05, |
|
"loss": 1.0919, |
|
"step": 4605 |
|
}, |
|
{ |
|
"epoch": 0.8237291164120433, |
|
"grad_norm": 0.5235790014266968, |
|
"learning_rate": 1.8328044390470478e-05, |
|
"loss": 1.0678, |
|
"step": 4610 |
|
}, |
|
{ |
|
"epoch": 0.8246225319396051, |
|
"grad_norm": 0.5292229652404785, |
|
"learning_rate": 1.8148457036317157e-05, |
|
"loss": 1.0999, |
|
"step": 4615 |
|
}, |
|
{ |
|
"epoch": 0.8255159474671669, |
|
"grad_norm": 0.5767297744750977, |
|
"learning_rate": 1.796966601537734e-05, |
|
"loss": 1.0964, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 0.8264093629947289, |
|
"grad_norm": 0.5446600317955017, |
|
"learning_rate": 1.7791673067107927e-05, |
|
"loss": 1.1066, |
|
"step": 4625 |
|
}, |
|
{ |
|
"epoch": 0.8273027785222907, |
|
"grad_norm": 0.48151037096977234, |
|
"learning_rate": 1.7614479923201333e-05, |
|
"loss": 1.0794, |
|
"step": 4630 |
|
}, |
|
{ |
|
"epoch": 0.8281961940498526, |
|
"grad_norm": 0.5238900780677795, |
|
"learning_rate": 1.7438088307568667e-05, |
|
"loss": 1.073, |
|
"step": 4635 |
|
}, |
|
{ |
|
"epoch": 0.8290896095774145, |
|
"grad_norm": 0.5254760384559631, |
|
"learning_rate": 1.7262499936322997e-05, |
|
"loss": 1.139, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 0.8299830251049763, |
|
"grad_norm": 0.5375151038169861, |
|
"learning_rate": 1.708771651776263e-05, |
|
"loss": 1.1143, |
|
"step": 4645 |
|
}, |
|
{ |
|
"epoch": 0.8308764406325382, |
|
"grad_norm": 0.5599473118782043, |
|
"learning_rate": 1.6913739752354464e-05, |
|
"loss": 1.1043, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 0.8317698561601, |
|
"grad_norm": 0.5429250597953796, |
|
"learning_rate": 1.6740571332717558e-05, |
|
"loss": 1.096, |
|
"step": 4655 |
|
}, |
|
{ |
|
"epoch": 0.832663271687662, |
|
"grad_norm": 0.5355926752090454, |
|
"learning_rate": 1.6568212943606465e-05, |
|
"loss": 1.103, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 0.8335566872152238, |
|
"grad_norm": 0.5205219388008118, |
|
"learning_rate": 1.6396666261895034e-05, |
|
"loss": 1.0765, |
|
"step": 4665 |
|
}, |
|
{ |
|
"epoch": 0.8344501027427856, |
|
"grad_norm": 0.5437434315681458, |
|
"learning_rate": 1.6225932956559943e-05, |
|
"loss": 1.0793, |
|
"step": 4670 |
|
}, |
|
{ |
|
"epoch": 0.8353435182703476, |
|
"grad_norm": 0.5111892223358154, |
|
"learning_rate": 1.6056014688664656e-05, |
|
"loss": 1.1069, |
|
"step": 4675 |
|
}, |
|
{ |
|
"epoch": 0.8362369337979094, |
|
"grad_norm": 0.5251837968826294, |
|
"learning_rate": 1.5886913111343037e-05, |
|
"loss": 1.0945, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 0.8371303493254713, |
|
"grad_norm": 0.5311691164970398, |
|
"learning_rate": 1.571862986978342e-05, |
|
"loss": 1.1, |
|
"step": 4685 |
|
}, |
|
{ |
|
"epoch": 0.8380237648530331, |
|
"grad_norm": 0.515058696269989, |
|
"learning_rate": 1.555116660121253e-05, |
|
"loss": 1.1141, |
|
"step": 4690 |
|
}, |
|
{ |
|
"epoch": 0.838917180380595, |
|
"grad_norm": 0.5322168469429016, |
|
"learning_rate": 1.538452493487956e-05, |
|
"loss": 1.1025, |
|
"step": 4695 |
|
}, |
|
{ |
|
"epoch": 0.8398105959081569, |
|
"grad_norm": 0.5564744472503662, |
|
"learning_rate": 1.5218706492040435e-05, |
|
"loss": 1.0679, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.8407040114357187, |
|
"grad_norm": 0.52479088306427, |
|
"learning_rate": 1.5053712885941862e-05, |
|
"loss": 1.0985, |
|
"step": 4705 |
|
}, |
|
{ |
|
"epoch": 0.8415974269632807, |
|
"grad_norm": 0.5248088240623474, |
|
"learning_rate": 1.4889545721805687e-05, |
|
"loss": 1.0503, |
|
"step": 4710 |
|
}, |
|
{ |
|
"epoch": 0.8424908424908425, |
|
"grad_norm": 0.525884747505188, |
|
"learning_rate": 1.4726206596813363e-05, |
|
"loss": 1.1073, |
|
"step": 4715 |
|
}, |
|
{ |
|
"epoch": 0.8433842580184043, |
|
"grad_norm": 0.5724270343780518, |
|
"learning_rate": 1.456369710009038e-05, |
|
"loss": 1.126, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 0.8442776735459663, |
|
"grad_norm": 0.4863172173500061, |
|
"learning_rate": 1.4402018812690721e-05, |
|
"loss": 1.0695, |
|
"step": 4725 |
|
}, |
|
{ |
|
"epoch": 0.8451710890735281, |
|
"grad_norm": 0.5609614849090576, |
|
"learning_rate": 1.4241173307581558e-05, |
|
"loss": 1.0684, |
|
"step": 4730 |
|
}, |
|
{ |
|
"epoch": 0.84606450460109, |
|
"grad_norm": 0.5545142292976379, |
|
"learning_rate": 1.4081162149627936e-05, |
|
"loss": 1.0362, |
|
"step": 4735 |
|
}, |
|
{ |
|
"epoch": 0.8469579201286518, |
|
"grad_norm": 0.5475736856460571, |
|
"learning_rate": 1.39219868955775e-05, |
|
"loss": 1.0926, |
|
"step": 4740 |
|
}, |
|
{ |
|
"epoch": 0.8478513356562137, |
|
"grad_norm": 0.4896586537361145, |
|
"learning_rate": 1.3763649094045483e-05, |
|
"loss": 1.0675, |
|
"step": 4745 |
|
}, |
|
{ |
|
"epoch": 0.8487447511837756, |
|
"grad_norm": 0.49385780096054077, |
|
"learning_rate": 1.3606150285499475e-05, |
|
"loss": 1.0739, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 0.8496381667113374, |
|
"grad_norm": 0.5389235019683838, |
|
"learning_rate": 1.3449492002244502e-05, |
|
"loss": 1.0808, |
|
"step": 4755 |
|
}, |
|
{ |
|
"epoch": 0.8505315822388994, |
|
"grad_norm": 0.5244671702384949, |
|
"learning_rate": 1.329367576840812e-05, |
|
"loss": 1.0967, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 0.8514249977664612, |
|
"grad_norm": 0.5618093013763428, |
|
"learning_rate": 1.3138703099925676e-05, |
|
"loss": 1.0743, |
|
"step": 4765 |
|
}, |
|
{ |
|
"epoch": 0.852318413294023, |
|
"grad_norm": 0.5471380949020386, |
|
"learning_rate": 1.2984575504525376e-05, |
|
"loss": 1.1035, |
|
"step": 4770 |
|
}, |
|
{ |
|
"epoch": 0.853211828821585, |
|
"grad_norm": 0.5013295412063599, |
|
"learning_rate": 1.2831294481713763e-05, |
|
"loss": 1.0736, |
|
"step": 4775 |
|
}, |
|
{ |
|
"epoch": 0.8541052443491468, |
|
"grad_norm": 0.5280576348304749, |
|
"learning_rate": 1.2678861522761066e-05, |
|
"loss": 1.0825, |
|
"step": 4780 |
|
}, |
|
{ |
|
"epoch": 0.8549986598767086, |
|
"grad_norm": 0.5448872447013855, |
|
"learning_rate": 1.2527278110686712e-05, |
|
"loss": 1.0895, |
|
"step": 4785 |
|
}, |
|
{ |
|
"epoch": 0.8558920754042705, |
|
"grad_norm": 0.5476821660995483, |
|
"learning_rate": 1.237654572024487e-05, |
|
"loss": 1.1082, |
|
"step": 4790 |
|
}, |
|
{ |
|
"epoch": 0.8567854909318324, |
|
"grad_norm": 0.5306336283683777, |
|
"learning_rate": 1.2226665817910166e-05, |
|
"loss": 1.0715, |
|
"step": 4795 |
|
}, |
|
{ |
|
"epoch": 0.8576789064593943, |
|
"grad_norm": 0.5052401423454285, |
|
"learning_rate": 1.2077639861863365e-05, |
|
"loss": 1.0731, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.8585723219869561, |
|
"grad_norm": 0.5403242707252502, |
|
"learning_rate": 1.1929469301977136e-05, |
|
"loss": 1.0941, |
|
"step": 4805 |
|
}, |
|
{ |
|
"epoch": 0.859465737514518, |
|
"grad_norm": 0.5217272639274597, |
|
"learning_rate": 1.1782155579802034e-05, |
|
"loss": 1.0628, |
|
"step": 4810 |
|
}, |
|
{ |
|
"epoch": 0.8603591530420799, |
|
"grad_norm": 0.570672869682312, |
|
"learning_rate": 1.1635700128552508e-05, |
|
"loss": 1.1157, |
|
"step": 4815 |
|
}, |
|
{ |
|
"epoch": 0.8612525685696417, |
|
"grad_norm": 0.5487679839134216, |
|
"learning_rate": 1.1490104373092825e-05, |
|
"loss": 1.0981, |
|
"step": 4820 |
|
}, |
|
{ |
|
"epoch": 0.8621459840972036, |
|
"grad_norm": 0.5410107970237732, |
|
"learning_rate": 1.1345369729923271e-05, |
|
"loss": 1.0917, |
|
"step": 4825 |
|
}, |
|
{ |
|
"epoch": 0.8630393996247655, |
|
"grad_norm": 0.5344003438949585, |
|
"learning_rate": 1.1201497607166423e-05, |
|
"loss": 1.1058, |
|
"step": 4830 |
|
}, |
|
{ |
|
"epoch": 0.8639328151523273, |
|
"grad_norm": 0.548433780670166, |
|
"learning_rate": 1.105848940455334e-05, |
|
"loss": 1.0724, |
|
"step": 4835 |
|
}, |
|
{ |
|
"epoch": 0.8648262306798892, |
|
"grad_norm": 0.573233962059021, |
|
"learning_rate": 1.0916346513410081e-05, |
|
"loss": 1.0733, |
|
"step": 4840 |
|
}, |
|
{ |
|
"epoch": 0.8657196462074511, |
|
"grad_norm": 0.5367952585220337, |
|
"learning_rate": 1.0775070316644042e-05, |
|
"loss": 1.0829, |
|
"step": 4845 |
|
}, |
|
{ |
|
"epoch": 0.866613061735013, |
|
"grad_norm": 0.5424239635467529, |
|
"learning_rate": 1.0634662188730604e-05, |
|
"loss": 1.11, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 0.8675064772625748, |
|
"grad_norm": 0.5632114410400391, |
|
"learning_rate": 1.0495123495699588e-05, |
|
"loss": 1.0926, |
|
"step": 4855 |
|
}, |
|
{ |
|
"epoch": 0.8683998927901367, |
|
"grad_norm": 0.517599880695343, |
|
"learning_rate": 1.0356455595122239e-05, |
|
"loss": 1.1214, |
|
"step": 4860 |
|
}, |
|
{ |
|
"epoch": 0.8692933083176986, |
|
"grad_norm": 0.5182797908782959, |
|
"learning_rate": 1.02186598360978e-05, |
|
"loss": 1.0759, |
|
"step": 4865 |
|
}, |
|
{ |
|
"epoch": 0.8701867238452604, |
|
"grad_norm": 0.5280758142471313, |
|
"learning_rate": 1.0081737559240445e-05, |
|
"loss": 1.1264, |
|
"step": 4870 |
|
}, |
|
{ |
|
"epoch": 0.8710801393728222, |
|
"grad_norm": 0.534575343132019, |
|
"learning_rate": 9.945690096666249e-06, |
|
"loss": 1.0818, |
|
"step": 4875 |
|
}, |
|
{ |
|
"epoch": 0.8719735549003842, |
|
"grad_norm": 0.5310872793197632, |
|
"learning_rate": 9.810518771980225e-06, |
|
"loss": 1.1092, |
|
"step": 4880 |
|
}, |
|
{ |
|
"epoch": 0.872866970427946, |
|
"grad_norm": 0.5414004921913147, |
|
"learning_rate": 9.676224900263497e-06, |
|
"loss": 1.1235, |
|
"step": 4885 |
|
}, |
|
{ |
|
"epoch": 0.8737603859555079, |
|
"grad_norm": 0.5505749583244324, |
|
"learning_rate": 9.542809788060358e-06, |
|
"loss": 1.1467, |
|
"step": 4890 |
|
}, |
|
{ |
|
"epoch": 0.8746538014830698, |
|
"grad_norm": 0.507029116153717, |
|
"learning_rate": 9.410274733365753e-06, |
|
"loss": 1.0818, |
|
"step": 4895 |
|
}, |
|
{ |
|
"epoch": 0.8755472170106317, |
|
"grad_norm": 0.5631771683692932, |
|
"learning_rate": 9.278621025612434e-06, |
|
"loss": 1.0975, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.8764406325381935, |
|
"grad_norm": 0.6328200697898865, |
|
"learning_rate": 9.147849945658648e-06, |
|
"loss": 1.0853, |
|
"step": 4905 |
|
}, |
|
{ |
|
"epoch": 0.8773340480657554, |
|
"grad_norm": 0.5625194907188416, |
|
"learning_rate": 9.017962765775523e-06, |
|
"loss": 1.1083, |
|
"step": 4910 |
|
}, |
|
{ |
|
"epoch": 0.8782274635933173, |
|
"grad_norm": 0.5809071063995361, |
|
"learning_rate": 8.888960749634712e-06, |
|
"loss": 1.0833, |
|
"step": 4915 |
|
}, |
|
{ |
|
"epoch": 0.8791208791208791, |
|
"grad_norm": 0.5429468154907227, |
|
"learning_rate": 8.760845152296116e-06, |
|
"loss": 1.0814, |
|
"step": 4920 |
|
}, |
|
{ |
|
"epoch": 0.8800142946484409, |
|
"grad_norm": 0.5104892253875732, |
|
"learning_rate": 8.63361722019569e-06, |
|
"loss": 1.0644, |
|
"step": 4925 |
|
}, |
|
{ |
|
"epoch": 0.8809077101760029, |
|
"grad_norm": 0.5056647658348083, |
|
"learning_rate": 8.507278191133261e-06, |
|
"loss": 1.0679, |
|
"step": 4930 |
|
}, |
|
{ |
|
"epoch": 0.8818011257035647, |
|
"grad_norm": 0.5692788362503052, |
|
"learning_rate": 8.38182929426059e-06, |
|
"loss": 1.112, |
|
"step": 4935 |
|
}, |
|
{ |
|
"epoch": 0.8826945412311266, |
|
"grad_norm": 0.560130774974823, |
|
"learning_rate": 8.257271750069295e-06, |
|
"loss": 1.1309, |
|
"step": 4940 |
|
}, |
|
{ |
|
"epoch": 0.8835879567586885, |
|
"grad_norm": 0.5144816040992737, |
|
"learning_rate": 8.133606770379055e-06, |
|
"loss": 1.0819, |
|
"step": 4945 |
|
}, |
|
{ |
|
"epoch": 0.8844813722862503, |
|
"grad_norm": 0.484380304813385, |
|
"learning_rate": 8.010835558325735e-06, |
|
"loss": 1.0844, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 0.8853747878138122, |
|
"grad_norm": 0.5501832365989685, |
|
"learning_rate": 7.88895930834983e-06, |
|
"loss": 1.1013, |
|
"step": 4955 |
|
}, |
|
{ |
|
"epoch": 0.8862682033413741, |
|
"grad_norm": 0.49795544147491455, |
|
"learning_rate": 7.767979206184694e-06, |
|
"loss": 1.0627, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 0.887161618868936, |
|
"grad_norm": 0.5382062792778015, |
|
"learning_rate": 7.64789642884508e-06, |
|
"loss": 1.0758, |
|
"step": 4965 |
|
}, |
|
{ |
|
"epoch": 0.8880550343964978, |
|
"grad_norm": 0.5314489603042603, |
|
"learning_rate": 7.528712144615679e-06, |
|
"loss": 1.0501, |
|
"step": 4970 |
|
}, |
|
{ |
|
"epoch": 0.8889484499240596, |
|
"grad_norm": 0.5101165175437927, |
|
"learning_rate": 7.4104275130397085e-06, |
|
"loss": 1.0807, |
|
"step": 4975 |
|
}, |
|
{ |
|
"epoch": 0.8898418654516216, |
|
"grad_norm": 0.5354017019271851, |
|
"learning_rate": 7.29304368490773e-06, |
|
"loss": 1.0803, |
|
"step": 4980 |
|
}, |
|
{ |
|
"epoch": 0.8907352809791834, |
|
"grad_norm": 0.5763533711433411, |
|
"learning_rate": 7.176561802246373e-06, |
|
"loss": 1.1026, |
|
"step": 4985 |
|
}, |
|
{ |
|
"epoch": 0.8916286965067453, |
|
"grad_norm": 0.537377655506134, |
|
"learning_rate": 7.06098299830722e-06, |
|
"loss": 1.0888, |
|
"step": 4990 |
|
}, |
|
{ |
|
"epoch": 0.8925221120343072, |
|
"grad_norm": 0.5555933713912964, |
|
"learning_rate": 6.946308397555823e-06, |
|
"loss": 1.1351, |
|
"step": 4995 |
|
}, |
|
{ |
|
"epoch": 0.893415527561869, |
|
"grad_norm": 0.5403766632080078, |
|
"learning_rate": 6.832539115660752e-06, |
|
"loss": 1.0891, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.8943089430894309, |
|
"grad_norm": 0.517660915851593, |
|
"learning_rate": 6.719676259482721e-06, |
|
"loss": 1.0607, |
|
"step": 5005 |
|
}, |
|
{ |
|
"epoch": 0.8952023586169927, |
|
"grad_norm": 0.5434561371803284, |
|
"learning_rate": 6.607720927063843e-06, |
|
"loss": 1.0982, |
|
"step": 5010 |
|
}, |
|
{ |
|
"epoch": 0.8960957741445547, |
|
"grad_norm": 0.563258707523346, |
|
"learning_rate": 6.496674207616926e-06, |
|
"loss": 1.0701, |
|
"step": 5015 |
|
}, |
|
{ |
|
"epoch": 0.8969891896721165, |
|
"grad_norm": 0.511574387550354, |
|
"learning_rate": 6.386537181514896e-06, |
|
"loss": 1.0808, |
|
"step": 5020 |
|
}, |
|
{ |
|
"epoch": 0.8978826051996783, |
|
"grad_norm": 0.49810346961021423, |
|
"learning_rate": 6.277310920280299e-06, |
|
"loss": 1.0664, |
|
"step": 5025 |
|
}, |
|
{ |
|
"epoch": 0.8987760207272403, |
|
"grad_norm": 0.526667058467865, |
|
"learning_rate": 6.1689964865748185e-06, |
|
"loss": 1.0714, |
|
"step": 5030 |
|
}, |
|
{ |
|
"epoch": 0.8996694362548021, |
|
"grad_norm": 0.5600285530090332, |
|
"learning_rate": 6.061594934188985e-06, |
|
"loss": 1.0706, |
|
"step": 5035 |
|
}, |
|
{ |
|
"epoch": 0.900562851782364, |
|
"grad_norm": 0.5716047883033752, |
|
"learning_rate": 5.955107308031915e-06, |
|
"loss": 1.0806, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 0.9014562673099259, |
|
"grad_norm": 0.5603742599487305, |
|
"learning_rate": 5.849534644121146e-06, |
|
"loss": 1.0755, |
|
"step": 5045 |
|
}, |
|
{ |
|
"epoch": 0.9023496828374877, |
|
"grad_norm": 0.5195134878158569, |
|
"learning_rate": 5.744877969572537e-06, |
|
"loss": 1.0537, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 0.9032430983650496, |
|
"grad_norm": 0.5341048240661621, |
|
"learning_rate": 5.6411383025903205e-06, |
|
"loss": 1.0951, |
|
"step": 5055 |
|
}, |
|
{ |
|
"epoch": 0.9041365138926114, |
|
"grad_norm": 0.55425626039505, |
|
"learning_rate": 5.538316652457121e-06, |
|
"loss": 1.0755, |
|
"step": 5060 |
|
}, |
|
{ |
|
"epoch": 0.9050299294201734, |
|
"grad_norm": 0.520702600479126, |
|
"learning_rate": 5.436414019524216e-06, |
|
"loss": 1.1157, |
|
"step": 5065 |
|
}, |
|
{ |
|
"epoch": 0.9059233449477352, |
|
"grad_norm": 0.5679965019226074, |
|
"learning_rate": 5.335431395201784e-06, |
|
"loss": 1.0801, |
|
"step": 5070 |
|
}, |
|
{ |
|
"epoch": 0.906816760475297, |
|
"grad_norm": 0.5292661190032959, |
|
"learning_rate": 5.235369761949216e-06, |
|
"loss": 1.0908, |
|
"step": 5075 |
|
}, |
|
{ |
|
"epoch": 0.907710176002859, |
|
"grad_norm": 0.5407772660255432, |
|
"learning_rate": 5.136230093265593e-06, |
|
"loss": 1.0639, |
|
"step": 5080 |
|
}, |
|
{ |
|
"epoch": 0.9086035915304208, |
|
"grad_norm": 0.5404497981071472, |
|
"learning_rate": 5.038013353680204e-06, |
|
"loss": 1.1004, |
|
"step": 5085 |
|
}, |
|
{ |
|
"epoch": 0.9094970070579826, |
|
"grad_norm": 0.5132922530174255, |
|
"learning_rate": 4.940720498743179e-06, |
|
"loss": 1.107, |
|
"step": 5090 |
|
}, |
|
{ |
|
"epoch": 0.9103904225855446, |
|
"grad_norm": 0.49404093623161316, |
|
"learning_rate": 4.8443524750161676e-06, |
|
"loss": 1.1043, |
|
"step": 5095 |
|
}, |
|
{ |
|
"epoch": 0.9112838381131064, |
|
"grad_norm": 0.5417544841766357, |
|
"learning_rate": 4.74891022006313e-06, |
|
"loss": 1.0917, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.9121772536406683, |
|
"grad_norm": 0.5480242371559143, |
|
"learning_rate": 4.654394662441264e-06, |
|
"loss": 1.0834, |
|
"step": 5105 |
|
}, |
|
{ |
|
"epoch": 0.9130706691682301, |
|
"grad_norm": 0.555147647857666, |
|
"learning_rate": 4.560806721691913e-06, |
|
"loss": 1.0887, |
|
"step": 5110 |
|
}, |
|
{ |
|
"epoch": 0.913964084695792, |
|
"grad_norm": 0.5397526621818542, |
|
"learning_rate": 4.468147308331605e-06, |
|
"loss": 1.1017, |
|
"step": 5115 |
|
}, |
|
{ |
|
"epoch": 0.9148575002233539, |
|
"grad_norm": 0.510307252407074, |
|
"learning_rate": 4.376417323843318e-06, |
|
"loss": 1.0739, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 0.9157509157509157, |
|
"grad_norm": 0.5409055948257446, |
|
"learning_rate": 4.28561766066754e-06, |
|
"loss": 1.101, |
|
"step": 5125 |
|
}, |
|
{ |
|
"epoch": 0.9166443312784777, |
|
"grad_norm": 0.552873969078064, |
|
"learning_rate": 4.195749202193699e-06, |
|
"loss": 1.0913, |
|
"step": 5130 |
|
}, |
|
{ |
|
"epoch": 0.9175377468060395, |
|
"grad_norm": 0.5643073916435242, |
|
"learning_rate": 4.106812822751538e-06, |
|
"loss": 1.0779, |
|
"step": 5135 |
|
}, |
|
{ |
|
"epoch": 0.9184311623336013, |
|
"grad_norm": 0.523567259311676, |
|
"learning_rate": 4.018809387602596e-06, |
|
"loss": 1.0872, |
|
"step": 5140 |
|
}, |
|
{ |
|
"epoch": 0.9193245778611632, |
|
"grad_norm": 0.5306380391120911, |
|
"learning_rate": 3.931739752931829e-06, |
|
"loss": 1.0917, |
|
"step": 5145 |
|
}, |
|
{ |
|
"epoch": 0.9202179933887251, |
|
"grad_norm": 0.5733050107955933, |
|
"learning_rate": 3.845604765839228e-06, |
|
"loss": 1.111, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 0.921111408916287, |
|
"grad_norm": 0.5619934797286987, |
|
"learning_rate": 3.760405264331612e-06, |
|
"loss": 1.065, |
|
"step": 5155 |
|
}, |
|
{ |
|
"epoch": 0.9220048244438488, |
|
"grad_norm": 0.5529298186302185, |
|
"learning_rate": 3.676142077314448e-06, |
|
"loss": 1.0899, |
|
"step": 5160 |
|
}, |
|
{ |
|
"epoch": 0.9228982399714107, |
|
"grad_norm": 0.5153440237045288, |
|
"learning_rate": 3.592816024583856e-06, |
|
"loss": 1.0995, |
|
"step": 5165 |
|
}, |
|
{ |
|
"epoch": 0.9237916554989726, |
|
"grad_norm": 0.5353826880455017, |
|
"learning_rate": 3.510427916818526e-06, |
|
"loss": 1.0906, |
|
"step": 5170 |
|
}, |
|
{ |
|
"epoch": 0.9246850710265344, |
|
"grad_norm": 0.5490701794624329, |
|
"learning_rate": 3.4289785555719157e-06, |
|
"loss": 1.1228, |
|
"step": 5175 |
|
}, |
|
{ |
|
"epoch": 0.9255784865540964, |
|
"grad_norm": 0.5379487872123718, |
|
"learning_rate": 3.348468733264398e-06, |
|
"loss": 1.0582, |
|
"step": 5180 |
|
}, |
|
{ |
|
"epoch": 0.9264719020816582, |
|
"grad_norm": 0.5375136733055115, |
|
"learning_rate": 3.268899233175604e-06, |
|
"loss": 1.0934, |
|
"step": 5185 |
|
}, |
|
{ |
|
"epoch": 0.92736531760922, |
|
"grad_norm": 0.51712965965271, |
|
"learning_rate": 3.19027082943677e-06, |
|
"loss": 1.0704, |
|
"step": 5190 |
|
}, |
|
{ |
|
"epoch": 0.9282587331367819, |
|
"grad_norm": 0.512758195400238, |
|
"learning_rate": 3.1125842870232014e-06, |
|
"loss": 1.124, |
|
"step": 5195 |
|
}, |
|
{ |
|
"epoch": 0.9291521486643438, |
|
"grad_norm": 0.58779376745224, |
|
"learning_rate": 3.0358403617468446e-06, |
|
"loss": 1.1403, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.9300455641919056, |
|
"grad_norm": 0.5201031565666199, |
|
"learning_rate": 2.960039800248915e-06, |
|
"loss": 1.0889, |
|
"step": 5205 |
|
}, |
|
{ |
|
"epoch": 0.9309389797194675, |
|
"grad_norm": 0.49719128012657166, |
|
"learning_rate": 2.885183339992692e-06, |
|
"loss": 1.0881, |
|
"step": 5210 |
|
}, |
|
{ |
|
"epoch": 0.9318323952470294, |
|
"grad_norm": 0.5498881340026855, |
|
"learning_rate": 2.8112717092562358e-06, |
|
"loss": 1.0472, |
|
"step": 5215 |
|
}, |
|
{ |
|
"epoch": 0.9327258107745913, |
|
"grad_norm": 0.5250979661941528, |
|
"learning_rate": 2.738305627125415e-06, |
|
"loss": 1.0962, |
|
"step": 5220 |
|
}, |
|
{ |
|
"epoch": 0.9336192263021531, |
|
"grad_norm": 0.5712759494781494, |
|
"learning_rate": 2.6662858034868454e-06, |
|
"loss": 1.1149, |
|
"step": 5225 |
|
}, |
|
{ |
|
"epoch": 0.934512641829715, |
|
"grad_norm": 0.5062239170074463, |
|
"learning_rate": 2.5952129390209854e-06, |
|
"loss": 1.0905, |
|
"step": 5230 |
|
}, |
|
{ |
|
"epoch": 0.9354060573572769, |
|
"grad_norm": 0.5184369683265686, |
|
"learning_rate": 2.525087725195352e-06, |
|
"loss": 1.0709, |
|
"step": 5235 |
|
}, |
|
{ |
|
"epoch": 0.9362994728848387, |
|
"grad_norm": 0.5225459933280945, |
|
"learning_rate": 2.4559108442577585e-06, |
|
"loss": 1.0738, |
|
"step": 5240 |
|
}, |
|
{ |
|
"epoch": 0.9371928884124006, |
|
"grad_norm": 0.5459494590759277, |
|
"learning_rate": 2.38768296922971e-06, |
|
"loss": 1.064, |
|
"step": 5245 |
|
}, |
|
{ |
|
"epoch": 0.9380863039399625, |
|
"grad_norm": 0.47445785999298096, |
|
"learning_rate": 2.3204047638998195e-06, |
|
"loss": 1.0625, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 0.9389797194675243, |
|
"grad_norm": 0.5076881051063538, |
|
"learning_rate": 2.2540768828173795e-06, |
|
"loss": 1.0736, |
|
"step": 5255 |
|
}, |
|
{ |
|
"epoch": 0.9398731349950862, |
|
"grad_norm": 0.5733228921890259, |
|
"learning_rate": 2.1886999712860014e-06, |
|
"loss": 1.1301, |
|
"step": 5260 |
|
}, |
|
{ |
|
"epoch": 0.9407665505226481, |
|
"grad_norm": 0.5537548065185547, |
|
"learning_rate": 2.1242746653572845e-06, |
|
"loss": 1.1, |
|
"step": 5265 |
|
}, |
|
{ |
|
"epoch": 0.94165996605021, |
|
"grad_norm": 0.5685492753982544, |
|
"learning_rate": 2.060801591824668e-06, |
|
"loss": 1.1201, |
|
"step": 5270 |
|
}, |
|
{ |
|
"epoch": 0.9425533815777718, |
|
"grad_norm": 0.5240213871002197, |
|
"learning_rate": 1.9982813682173586e-06, |
|
"loss": 1.0814, |
|
"step": 5275 |
|
}, |
|
{ |
|
"epoch": 0.9434467971053337, |
|
"grad_norm": 0.5279719829559326, |
|
"learning_rate": 1.936714602794254e-06, |
|
"loss": 1.0812, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 0.9443402126328956, |
|
"grad_norm": 0.5382205247879028, |
|
"learning_rate": 1.8761018945380849e-06, |
|
"loss": 1.06, |
|
"step": 5285 |
|
}, |
|
{ |
|
"epoch": 0.9452336281604574, |
|
"grad_norm": 0.503291666507721, |
|
"learning_rate": 1.8164438331495614e-06, |
|
"loss": 1.1014, |
|
"step": 5290 |
|
}, |
|
{ |
|
"epoch": 0.9461270436880193, |
|
"grad_norm": 0.5597132444381714, |
|
"learning_rate": 1.7577409990416237e-06, |
|
"loss": 1.0768, |
|
"step": 5295 |
|
}, |
|
{ |
|
"epoch": 0.9470204592155812, |
|
"grad_norm": 0.5632822513580322, |
|
"learning_rate": 1.6999939633338236e-06, |
|
"loss": 1.1121, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 0.947913874743143, |
|
"grad_norm": 0.5778818130493164, |
|
"learning_rate": 1.6432032878467729e-06, |
|
"loss": 1.0763, |
|
"step": 5305 |
|
}, |
|
{ |
|
"epoch": 0.9488072902707049, |
|
"grad_norm": 0.5174503922462463, |
|
"learning_rate": 1.587369525096627e-06, |
|
"loss": 1.0799, |
|
"step": 5310 |
|
}, |
|
{ |
|
"epoch": 0.9497007057982668, |
|
"grad_norm": 0.5436321496963501, |
|
"learning_rate": 1.5324932182897656e-06, |
|
"loss": 1.0975, |
|
"step": 5315 |
|
}, |
|
{ |
|
"epoch": 0.9505941213258287, |
|
"grad_norm": 0.5373766422271729, |
|
"learning_rate": 1.4785749013174754e-06, |
|
"loss": 1.0808, |
|
"step": 5320 |
|
}, |
|
{ |
|
"epoch": 0.9514875368533905, |
|
"grad_norm": 0.5411360859870911, |
|
"learning_rate": 1.4256150987507544e-06, |
|
"loss": 1.1184, |
|
"step": 5325 |
|
}, |
|
{ |
|
"epoch": 0.9523809523809523, |
|
"grad_norm": 0.5489704608917236, |
|
"learning_rate": 1.3736143258352707e-06, |
|
"loss": 1.0733, |
|
"step": 5330 |
|
}, |
|
{ |
|
"epoch": 0.9532743679085143, |
|
"grad_norm": 0.5345862507820129, |
|
"learning_rate": 1.322573088486212e-06, |
|
"loss": 1.1047, |
|
"step": 5335 |
|
}, |
|
{ |
|
"epoch": 0.9541677834360761, |
|
"grad_norm": 0.4998050630092621, |
|
"learning_rate": 1.272491883283533e-06, |
|
"loss": 1.1046, |
|
"step": 5340 |
|
}, |
|
{ |
|
"epoch": 0.955061198963638, |
|
"grad_norm": 0.5264286994934082, |
|
"learning_rate": 1.2233711974669714e-06, |
|
"loss": 1.0887, |
|
"step": 5345 |
|
}, |
|
{ |
|
"epoch": 0.9559546144911999, |
|
"grad_norm": 0.5480632185935974, |
|
"learning_rate": 1.1752115089314398e-06, |
|
"loss": 1.1008, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 0.9568480300187617, |
|
"grad_norm": 0.5870484113693237, |
|
"learning_rate": 1.1280132862222737e-06, |
|
"loss": 1.0944, |
|
"step": 5355 |
|
}, |
|
{ |
|
"epoch": 0.9577414455463236, |
|
"grad_norm": 0.5417635440826416, |
|
"learning_rate": 1.081776988530725e-06, |
|
"loss": 1.0934, |
|
"step": 5360 |
|
}, |
|
{ |
|
"epoch": 0.9586348610738855, |
|
"grad_norm": 0.5396819114685059, |
|
"learning_rate": 1.0365030656894759e-06, |
|
"loss": 1.1019, |
|
"step": 5365 |
|
}, |
|
{ |
|
"epoch": 0.9595282766014473, |
|
"grad_norm": 0.521092414855957, |
|
"learning_rate": 9.921919581682759e-07, |
|
"loss": 1.0899, |
|
"step": 5370 |
|
}, |
|
{ |
|
"epoch": 0.9604216921290092, |
|
"grad_norm": 0.5292440056800842, |
|
"learning_rate": 9.488440970696566e-07, |
|
"loss": 1.1232, |
|
"step": 5375 |
|
}, |
|
{ |
|
"epoch": 0.961315107656571, |
|
"grad_norm": 0.5165191292762756, |
|
"learning_rate": 9.064599041247124e-07, |
|
"loss": 1.1398, |
|
"step": 5380 |
|
}, |
|
{ |
|
"epoch": 0.962208523184133, |
|
"grad_norm": 0.5134493708610535, |
|
"learning_rate": 8.650397916890263e-07, |
|
"loss": 1.0735, |
|
"step": 5385 |
|
}, |
|
{ |
|
"epoch": 0.9631019387116948, |
|
"grad_norm": 0.5425078868865967, |
|
"learning_rate": 8.245841627386397e-07, |
|
"loss": 1.1054, |
|
"step": 5390 |
|
}, |
|
{ |
|
"epoch": 0.9639953542392566, |
|
"grad_norm": 0.556138277053833, |
|
"learning_rate": 7.850934108661556e-07, |
|
"loss": 1.0938, |
|
"step": 5395 |
|
}, |
|
{ |
|
"epoch": 0.9648887697668186, |
|
"grad_norm": 0.5389893054962158, |
|
"learning_rate": 7.465679202768749e-07, |
|
"loss": 1.1052, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.9657821852943804, |
|
"grad_norm": 0.47252729535102844, |
|
"learning_rate": 7.090080657850884e-07, |
|
"loss": 1.063, |
|
"step": 5405 |
|
}, |
|
{ |
|
"epoch": 0.9666756008219423, |
|
"grad_norm": 0.4975879490375519, |
|
"learning_rate": 6.724142128104239e-07, |
|
"loss": 1.0983, |
|
"step": 5410 |
|
}, |
|
{ |
|
"epoch": 0.9675690163495042, |
|
"grad_norm": 0.5652892589569092, |
|
"learning_rate": 6.367867173742603e-07, |
|
"loss": 1.075, |
|
"step": 5415 |
|
}, |
|
{ |
|
"epoch": 0.968462431877066, |
|
"grad_norm": 0.5284938216209412, |
|
"learning_rate": 6.021259260963085e-07, |
|
"loss": 1.0788, |
|
"step": 5420 |
|
}, |
|
{ |
|
"epoch": 0.9693558474046279, |
|
"grad_norm": 0.4806806445121765, |
|
"learning_rate": 5.684321761912247e-07, |
|
"loss": 1.0439, |
|
"step": 5425 |
|
}, |
|
{ |
|
"epoch": 0.9702492629321897, |
|
"grad_norm": 0.5021379590034485, |
|
"learning_rate": 5.357057954653244e-07, |
|
"loss": 1.0704, |
|
"step": 5430 |
|
}, |
|
{ |
|
"epoch": 0.9711426784597517, |
|
"grad_norm": 0.5142560005187988, |
|
"learning_rate": 5.039471023133624e-07, |
|
"loss": 1.1126, |
|
"step": 5435 |
|
}, |
|
{ |
|
"epoch": 0.9720360939873135, |
|
"grad_norm": 0.5299791693687439, |
|
"learning_rate": 4.7315640571550246e-07, |
|
"loss": 1.1047, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 0.9729295095148753, |
|
"grad_norm": 0.49153584241867065, |
|
"learning_rate": 4.433340052342749e-07, |
|
"loss": 1.072, |
|
"step": 5445 |
|
}, |
|
{ |
|
"epoch": 0.9738229250424373, |
|
"grad_norm": 0.5386480093002319, |
|
"learning_rate": 4.1448019101163473e-07, |
|
"loss": 1.0694, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 0.9747163405699991, |
|
"grad_norm": 0.5521134734153748, |
|
"learning_rate": 3.865952437661968e-07, |
|
"loss": 1.1142, |
|
"step": 5455 |
|
}, |
|
{ |
|
"epoch": 0.975609756097561, |
|
"grad_norm": 0.5171558856964111, |
|
"learning_rate": 3.5967943479043867e-07, |
|
"loss": 1.1047, |
|
"step": 5460 |
|
}, |
|
{ |
|
"epoch": 0.9765031716251228, |
|
"grad_norm": 0.5417544841766357, |
|
"learning_rate": 3.3373302594814637e-07, |
|
"loss": 1.0995, |
|
"step": 5465 |
|
}, |
|
{ |
|
"epoch": 0.9773965871526847, |
|
"grad_norm": 0.5449820160865784, |
|
"learning_rate": 3.0875626967176165e-07, |
|
"loss": 1.1006, |
|
"step": 5470 |
|
}, |
|
{ |
|
"epoch": 0.9782900026802466, |
|
"grad_norm": 0.5323817133903503, |
|
"learning_rate": 2.8474940896003887e-07, |
|
"loss": 1.139, |
|
"step": 5475 |
|
}, |
|
{ |
|
"epoch": 0.9791834182078084, |
|
"grad_norm": 0.5092723369598389, |
|
"learning_rate": 2.617126773755696e-07, |
|
"loss": 1.0711, |
|
"step": 5480 |
|
}, |
|
{ |
|
"epoch": 0.9800768337353704, |
|
"grad_norm": 0.5194780230522156, |
|
"learning_rate": 2.3964629904259514e-07, |
|
"loss": 1.1153, |
|
"step": 5485 |
|
}, |
|
{ |
|
"epoch": 0.9809702492629322, |
|
"grad_norm": 0.5183728933334351, |
|
"learning_rate": 2.1855048864479754e-07, |
|
"loss": 1.0979, |
|
"step": 5490 |
|
}, |
|
{ |
|
"epoch": 0.981863664790494, |
|
"grad_norm": 0.5222030878067017, |
|
"learning_rate": 1.984254514232009e-07, |
|
"loss": 1.0832, |
|
"step": 5495 |
|
}, |
|
{ |
|
"epoch": 0.982757080318056, |
|
"grad_norm": 0.5462630987167358, |
|
"learning_rate": 1.7927138317417324e-07, |
|
"loss": 1.0618, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.9836504958456178, |
|
"grad_norm": 0.5219123959541321, |
|
"learning_rate": 1.6108847024755015e-07, |
|
"loss": 1.1152, |
|
"step": 5505 |
|
}, |
|
{ |
|
"epoch": 0.9845439113731796, |
|
"grad_norm": 0.5546027421951294, |
|
"learning_rate": 1.4387688954478063e-07, |
|
"loss": 1.1397, |
|
"step": 5510 |
|
}, |
|
{ |
|
"epoch": 0.9854373269007415, |
|
"grad_norm": 0.49054640531539917, |
|
"learning_rate": 1.276368085172397e-07, |
|
"loss": 1.0925, |
|
"step": 5515 |
|
}, |
|
{ |
|
"epoch": 0.9863307424283034, |
|
"grad_norm": 0.5430747270584106, |
|
"learning_rate": 1.1236838516459625e-07, |
|
"loss": 1.091, |
|
"step": 5520 |
|
}, |
|
{ |
|
"epoch": 0.9872241579558653, |
|
"grad_norm": 0.5430759191513062, |
|
"learning_rate": 9.807176803325879e-08, |
|
"loss": 1.1034, |
|
"step": 5525 |
|
}, |
|
{ |
|
"epoch": 0.9881175734834271, |
|
"grad_norm": 0.554993212223053, |
|
"learning_rate": 8.474709621492105e-08, |
|
"loss": 1.105, |
|
"step": 5530 |
|
}, |
|
{ |
|
"epoch": 0.989010989010989, |
|
"grad_norm": 0.5086091160774231, |
|
"learning_rate": 7.239449934525189e-08, |
|
"loss": 1.0637, |
|
"step": 5535 |
|
}, |
|
{ |
|
"epoch": 0.9899044045385509, |
|
"grad_norm": 0.5667893290519714, |
|
"learning_rate": 6.101409760260746e-08, |
|
"loss": 1.0987, |
|
"step": 5540 |
|
}, |
|
{ |
|
"epoch": 0.9907978200661127, |
|
"grad_norm": 0.5301327705383301, |
|
"learning_rate": 5.0606001706843264e-08, |
|
"loss": 1.0638, |
|
"step": 5545 |
|
}, |
|
{ |
|
"epoch": 0.9916912355936747, |
|
"grad_norm": 0.5770794153213501, |
|
"learning_rate": 4.1170312918259456e-08, |
|
"loss": 1.089, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 0.9925846511212365, |
|
"grad_norm": 0.5558438897132874, |
|
"learning_rate": 3.2707123036646026e-08, |
|
"loss": 1.0793, |
|
"step": 5555 |
|
}, |
|
{ |
|
"epoch": 0.9934780666487983, |
|
"grad_norm": 0.5180490016937256, |
|
"learning_rate": 2.5216514400305813e-08, |
|
"loss": 1.1154, |
|
"step": 5560 |
|
}, |
|
{ |
|
"epoch": 0.9943714821763602, |
|
"grad_norm": 0.5610867142677307, |
|
"learning_rate": 1.869855988534397e-08, |
|
"loss": 1.0993, |
|
"step": 5565 |
|
}, |
|
{ |
|
"epoch": 0.9952648977039221, |
|
"grad_norm": 0.5481888651847839, |
|
"learning_rate": 1.31533229049019e-08, |
|
"loss": 1.1059, |
|
"step": 5570 |
|
}, |
|
{ |
|
"epoch": 0.996158313231484, |
|
"grad_norm": 0.5289728045463562, |
|
"learning_rate": 8.580857408546639e-09, |
|
"loss": 1.0708, |
|
"step": 5575 |
|
}, |
|
{ |
|
"epoch": 0.9970517287590458, |
|
"grad_norm": 0.5380046963691711, |
|
"learning_rate": 4.9812078817934596e-09, |
|
"loss": 1.0486, |
|
"step": 5580 |
|
}, |
|
{ |
|
"epoch": 0.9979451442866077, |
|
"grad_norm": 0.5321928858757019, |
|
"learning_rate": 2.3544093455951654e-09, |
|
"loss": 1.0629, |
|
"step": 5585 |
|
}, |
|
{ |
|
"epoch": 0.9988385598141696, |
|
"grad_norm": 0.5315663814544678, |
|
"learning_rate": 7.004873560645387e-10, |
|
"loss": 1.0994, |
|
"step": 5590 |
|
}, |
|
{ |
|
"epoch": 0.9997319753417314, |
|
"grad_norm": 0.5456522703170776, |
|
"learning_rate": 1.9458004196781787e-11, |
|
"loss": 1.0959, |
|
"step": 5595 |
|
}, |
|
{ |
|
"epoch": 0.9999106584472438, |
|
"eval_loss": 1.0571272373199463, |
|
"eval_runtime": 870.817, |
|
"eval_samples_per_second": 5.123, |
|
"eval_steps_per_second": 0.641, |
|
"step": 5596 |
|
}, |
|
{ |
|
"epoch": 0.9999106584472438, |
|
"step": 5596, |
|
"total_flos": 4.151601388859687e+18, |
|
"train_loss": 1.267410275018411, |
|
"train_runtime": 31869.1034, |
|
"train_samples_per_second": 1.405, |
|
"train_steps_per_second": 0.176 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 5596, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 4.151601388859687e+18, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|