|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9990224828934506, |
|
"eval_steps": 500, |
|
"global_step": 511, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0019550342130987292, |
|
"grad_norm": 28.436507384573893, |
|
"learning_rate": 5.769230769230769e-06, |
|
"loss": 1.9755, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.009775171065493646, |
|
"grad_norm": 16.326267191478145, |
|
"learning_rate": 2.8846153846153845e-05, |
|
"loss": 1.7228, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.019550342130987292, |
|
"grad_norm": 7.77688805789614, |
|
"learning_rate": 5.769230769230769e-05, |
|
"loss": 1.1824, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.02932551319648094, |
|
"grad_norm": 23.182562040558295, |
|
"learning_rate": 8.653846153846152e-05, |
|
"loss": 1.2132, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.039100684261974585, |
|
"grad_norm": 3.8046090746822716, |
|
"learning_rate": 0.00011538461538461538, |
|
"loss": 1.0978, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.04887585532746823, |
|
"grad_norm": 6.364935901579011, |
|
"learning_rate": 0.00014423076923076922, |
|
"loss": 1.2143, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.05865102639296188, |
|
"grad_norm": 4.553589249592707, |
|
"learning_rate": 0.00017307692307692304, |
|
"loss": 1.1357, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.06842619745845552, |
|
"grad_norm": 5.819131618520497, |
|
"learning_rate": 0.00020192307692307691, |
|
"loss": 1.2003, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.07820136852394917, |
|
"grad_norm": 8.631833955662083, |
|
"learning_rate": 0.00023076923076923076, |
|
"loss": 1.2049, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.08797653958944282, |
|
"grad_norm": 27.119527389294134, |
|
"learning_rate": 0.0002596153846153846, |
|
"loss": 2.0482, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.09775171065493646, |
|
"grad_norm": 32.33385091368177, |
|
"learning_rate": 0.00028846153846153843, |
|
"loss": 1.6455, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.10752688172043011, |
|
"grad_norm": 45.94902359405063, |
|
"learning_rate": 0.0002999683799255387, |
|
"loss": 1.9763, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.11730205278592376, |
|
"grad_norm": 128.59363629595714, |
|
"learning_rate": 0.0002997751944121241, |
|
"loss": 1.5422, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.1270772238514174, |
|
"grad_norm": 10.71651832916556, |
|
"learning_rate": 0.0002994066160471166, |
|
"loss": 1.7548, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.13685239491691104, |
|
"grad_norm": 4.189410908699583, |
|
"learning_rate": 0.0002988630764507904, |
|
"loss": 1.3404, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.1466275659824047, |
|
"grad_norm": 33.78267011030123, |
|
"learning_rate": 0.00029814521213014585, |
|
"loss": 1.4341, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.15640273704789834, |
|
"grad_norm": 4.447241036402298, |
|
"learning_rate": 0.00029725386373353455, |
|
"loss": 1.4355, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.16617790811339198, |
|
"grad_norm": 19.324376604180756, |
|
"learning_rate": 0.00029619007506622504, |
|
"loss": 1.4037, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.17595307917888564, |
|
"grad_norm": 56.398325865658165, |
|
"learning_rate": 0.00029495509186806487, |
|
"loss": 2.1883, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.18572825024437928, |
|
"grad_norm": 117.39758059322921, |
|
"learning_rate": 0.0002935503603546683, |
|
"loss": 2.0507, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.19550342130987292, |
|
"grad_norm": 7.6884925370685995, |
|
"learning_rate": 0.00029197752552383914, |
|
"loss": 1.5932, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.20527859237536658, |
|
"grad_norm": 963.0390862127407, |
|
"learning_rate": 0.000290238429229211, |
|
"loss": 5.223, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.21505376344086022, |
|
"grad_norm": 258.2156128276988, |
|
"learning_rate": 0.00028833510802336203, |
|
"loss": 4.5213, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.22482893450635386, |
|
"grad_norm": 264.762100287562, |
|
"learning_rate": 0.0002862697907729285, |
|
"loss": 2.0849, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.23460410557184752, |
|
"grad_norm": 18.694604887076736, |
|
"learning_rate": 0.0002840448960485118, |
|
"loss": 1.8192, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.24437927663734116, |
|
"grad_norm": 67.0812503804155, |
|
"learning_rate": 0.00028166302929243326, |
|
"loss": 1.3915, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.2541544477028348, |
|
"grad_norm": 21.92554603766507, |
|
"learning_rate": 0.0002791269797676551, |
|
"loss": 1.5317, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.26392961876832843, |
|
"grad_norm": 7.078011892011075, |
|
"learning_rate": 0.00027643971729144056, |
|
"loss": 1.4673, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.27370478983382207, |
|
"grad_norm": 9.631691414307214, |
|
"learning_rate": 0.0002736043887575761, |
|
"loss": 1.3131, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.28347996089931576, |
|
"grad_norm": 23.096760351073254, |
|
"learning_rate": 0.00027062431445123124, |
|
"loss": 1.572, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.2932551319648094, |
|
"grad_norm": 3.17350344043114, |
|
"learning_rate": 0.0002675029841607691, |
|
"loss": 1.3668, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.30303030303030304, |
|
"grad_norm": 6.678371795997961, |
|
"learning_rate": 0.00026424405309106216, |
|
"loss": 1.3082, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.3128054740957967, |
|
"grad_norm": 4.718771263163467, |
|
"learning_rate": 0.00026085133758309883, |
|
"loss": 1.3286, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.3225806451612903, |
|
"grad_norm": 8.790382644648172, |
|
"learning_rate": 0.00025732881064489233, |
|
"loss": 1.3241, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.33235581622678395, |
|
"grad_norm": 4.416208770248871, |
|
"learning_rate": 0.0002536805972989267, |
|
"loss": 1.3357, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.3421309872922776, |
|
"grad_norm": 3.1002678359027285, |
|
"learning_rate": 0.0002499109697515875, |
|
"loss": 1.4037, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.3519061583577713, |
|
"grad_norm": 7.124469693986668, |
|
"learning_rate": 0.0002460243423902342, |
|
"loss": 1.625, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.3616813294232649, |
|
"grad_norm": 51.006744297270856, |
|
"learning_rate": 0.00024202526661377277, |
|
"loss": 1.6499, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.37145650048875856, |
|
"grad_norm": 14.662023479458805, |
|
"learning_rate": 0.00023791842550278217, |
|
"loss": 1.8342, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.3812316715542522, |
|
"grad_norm": 4.973242239066626, |
|
"learning_rate": 0.00023370862833543648, |
|
"loss": 1.6823, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.39100684261974583, |
|
"grad_norm": 95.12537291599145, |
|
"learning_rate": 0.0002294008049556441, |
|
"loss": 1.5268, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.40078201368523947, |
|
"grad_norm": 3.394429345078054, |
|
"learning_rate": 0.000225, |
|
"loss": 1.45, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.41055718475073316, |
|
"grad_norm": 3.5902349674560456, |
|
"learning_rate": 0.00022051136699031057, |
|
"loss": 1.2502, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.4203323558162268, |
|
"grad_norm": 260.2217345574012, |
|
"learning_rate": 0.00021594016229861007, |
|
"loss": 1.4486, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.43010752688172044, |
|
"grad_norm": 4.8647475236367725, |
|
"learning_rate": 0.0002112917389917347, |
|
"loss": 1.486, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.4398826979472141, |
|
"grad_norm": 1.9024756731821182, |
|
"learning_rate": 0.0002065715405626634, |
|
"loss": 1.2628, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.4496578690127077, |
|
"grad_norm": 10.37907922884646, |
|
"learning_rate": 0.00020178509455596596, |
|
"loss": 1.2518, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.45943304007820135, |
|
"grad_norm": 1.9405506982628546, |
|
"learning_rate": 0.00019693800609482315, |
|
"loss": 1.2849, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.46920821114369504, |
|
"grad_norm": 2.6160283264932462, |
|
"learning_rate": 0.00019203595131719932, |
|
"loss": 1.2548, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.4789833822091887, |
|
"grad_norm": 2.1695347772705373, |
|
"learning_rate": 0.00018708467072885382, |
|
"loss": 1.3377, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.4887585532746823, |
|
"grad_norm": 2.2144620011763374, |
|
"learning_rate": 0.00018208996248097458, |
|
"loss": 1.3093, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.49853372434017595, |
|
"grad_norm": 2.1880901805448403, |
|
"learning_rate": 0.00017705767558030754, |
|
"loss": 1.245, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.5083088954056696, |
|
"grad_norm": 2.991429961153096, |
|
"learning_rate": 0.0001719937030397311, |
|
"loss": 1.2559, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.5180840664711632, |
|
"grad_norm": 23.064953881729117, |
|
"learning_rate": 0.00016690397497729818, |
|
"loss": 1.288, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.5278592375366569, |
|
"grad_norm": 1.3455036549144204, |
|
"learning_rate": 0.00016179445167182677, |
|
"loss": 1.2717, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.5376344086021505, |
|
"grad_norm": 1.1846238387921606, |
|
"learning_rate": 0.00015667111658317054, |
|
"loss": 1.2394, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.5474095796676441, |
|
"grad_norm": 137.50996798714343, |
|
"learning_rate": 0.00015153996934534348, |
|
"loss": 1.3296, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.5571847507331378, |
|
"grad_norm": 1.1488279921928382, |
|
"learning_rate": 0.00014640701874070455, |
|
"loss": 1.2874, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.5669599217986315, |
|
"grad_norm": 1.2689991198054311, |
|
"learning_rate": 0.00014127827566342863, |
|
"loss": 1.2561, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.5767350928641252, |
|
"grad_norm": 2.140511295392104, |
|
"learning_rate": 0.0001361597460805047, |
|
"loss": 1.2205, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.5865102639296188, |
|
"grad_norm": 12.286388401977892, |
|
"learning_rate": 0.000131057423998504, |
|
"loss": 1.252, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.5962854349951124, |
|
"grad_norm": 2.5541457654289395, |
|
"learning_rate": 0.00012597728444435418, |
|
"loss": 1.215, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.6060606060606061, |
|
"grad_norm": 1.1732833800621696, |
|
"learning_rate": 0.00012092527646833949, |
|
"loss": 1.2053, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.6158357771260997, |
|
"grad_norm": 1.4481798374657, |
|
"learning_rate": 0.00011590731617752066, |
|
"loss": 1.2061, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.6256109481915934, |
|
"grad_norm": 0.9912604590459435, |
|
"learning_rate": 0.00011092927980773267, |
|
"loss": 1.1604, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.635386119257087, |
|
"grad_norm": 1.0322599469502478, |
|
"learning_rate": 0.00010599699684227311, |
|
"loss": 1.1369, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.6451612903225806, |
|
"grad_norm": 1.5437893851108073, |
|
"learning_rate": 0.00010111624318534006, |
|
"loss": 1.1721, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.6549364613880743, |
|
"grad_norm": 1.383693940282455, |
|
"learning_rate": 9.629273439821313e-05, |
|
"loss": 1.1094, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.6647116324535679, |
|
"grad_norm": 2.598856235735631, |
|
"learning_rate": 9.15321190060981e-05, |
|
"loss": 1.1251, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.6744868035190615, |
|
"grad_norm": 1.0440965009342513, |
|
"learning_rate": 8.683997188347435e-05, |
|
"loss": 1.0953, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.6842619745845552, |
|
"grad_norm": 1.2172823916521676, |
|
"learning_rate": 8.222178772568959e-05, |
|
"loss": 1.0839, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.6940371456500489, |
|
"grad_norm": 0.9291103607651107, |
|
"learning_rate": 7.768297461444765e-05, |
|
"loss": 1.0786, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.7038123167155426, |
|
"grad_norm": 36.656353739168324, |
|
"learning_rate": 7.32288476847252e-05, |
|
"loss": 1.1001, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.7135874877810362, |
|
"grad_norm": 0.7923067460462172, |
|
"learning_rate": 6.886462290053158e-05, |
|
"loss": 1.0793, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.7233626588465298, |
|
"grad_norm": 0.8419278909431203, |
|
"learning_rate": 6.4595410946803e-05, |
|
"loss": 1.0869, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.7331378299120235, |
|
"grad_norm": 1.0339571657214093, |
|
"learning_rate": 6.04262112445821e-05, |
|
"loss": 1.0128, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.7429130009775171, |
|
"grad_norm": 0.7333799573780848, |
|
"learning_rate": 5.636190609649249e-05, |
|
"loss": 1.0101, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.7526881720430108, |
|
"grad_norm": 0.8595033406539786, |
|
"learning_rate": 5.240725496936372e-05, |
|
"loss": 1.0224, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.7624633431085044, |
|
"grad_norm": 0.7112388580251547, |
|
"learning_rate": 4.8566888920701196e-05, |
|
"loss": 1.0016, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.772238514173998, |
|
"grad_norm": 0.8167634152987004, |
|
"learning_rate": 4.48453051755301e-05, |
|
"loss": 0.9793, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.7820136852394917, |
|
"grad_norm": 0.6682157986400304, |
|
"learning_rate": 4.12468618599611e-05, |
|
"loss": 1.0015, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.7917888563049853, |
|
"grad_norm": 0.7797058649722416, |
|
"learning_rate": 3.777577289764752e-05, |
|
"loss": 0.9784, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.8015640273704789, |
|
"grad_norm": 0.6768885831261553, |
|
"learning_rate": 3.443610307510907e-05, |
|
"loss": 0.9605, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.8113391984359726, |
|
"grad_norm": 0.6416162504789994, |
|
"learning_rate": 3.1231763281701305e-05, |
|
"loss": 0.971, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.8211143695014663, |
|
"grad_norm": 0.6784950124595185, |
|
"learning_rate": 2.816650592980495e-05, |
|
"loss": 0.9553, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.83088954056696, |
|
"grad_norm": 0.689853020596839, |
|
"learning_rate": 2.5243920560598184e-05, |
|
"loss": 0.9351, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.8406647116324536, |
|
"grad_norm": 0.6460996639595076, |
|
"learning_rate": 2.24674296405579e-05, |
|
"loss": 0.9313, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.8504398826979472, |
|
"grad_norm": 0.6636695831614443, |
|
"learning_rate": 1.98402845536117e-05, |
|
"loss": 0.9266, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.8602150537634409, |
|
"grad_norm": 0.6119153060968766, |
|
"learning_rate": 1.736556179363543e-05, |
|
"loss": 0.9134, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.8699902248289345, |
|
"grad_norm": 0.5955119176542452, |
|
"learning_rate": 1.5046159361753224e-05, |
|
"loss": 0.9198, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.8797653958944281, |
|
"grad_norm": 0.6324852203366718, |
|
"learning_rate": 1.2884793372660207e-05, |
|
"loss": 0.9051, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.8895405669599218, |
|
"grad_norm": 0.6803072103352026, |
|
"learning_rate": 1.0883994873941815e-05, |
|
"loss": 0.8923, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.8993157380254154, |
|
"grad_norm": 2.125025596226289, |
|
"learning_rate": 9.046106882113751e-06, |
|
"loss": 0.9315, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.9090909090909091, |
|
"grad_norm": 0.5812394213670566, |
|
"learning_rate": 7.373281638854328e-06, |
|
"loss": 0.9131, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.9188660801564027, |
|
"grad_norm": 0.6819326019234143, |
|
"learning_rate": 5.867478090641892e-06, |
|
"loss": 0.9521, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.9286412512218963, |
|
"grad_norm": 0.6009777818830081, |
|
"learning_rate": 4.530459594748592e-06, |
|
"loss": 0.8585, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.9384164222873901, |
|
"grad_norm": 0.5996940109383458, |
|
"learning_rate": 3.363791854277348e-06, |
|
"loss": 0.8938, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.9481915933528837, |
|
"grad_norm": 0.6175664551394122, |
|
"learning_rate": 2.3688410846596282e-06, |
|
"loss": 0.8891, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.9579667644183774, |
|
"grad_norm": 2.0911762607681768, |
|
"learning_rate": 1.5467724137617043e-06, |
|
"loss": 0.924, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.967741935483871, |
|
"grad_norm": 0.6696352577624144, |
|
"learning_rate": 8.985485174722973e-07, |
|
"loss": 0.9077, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.9775171065493646, |
|
"grad_norm": 0.5766498486118373, |
|
"learning_rate": 4.249284923700358e-07, |
|
"loss": 0.9012, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.9872922776148583, |
|
"grad_norm": 0.6254945643083392, |
|
"learning_rate": 1.2646696679042833e-07, |
|
"loss": 0.9035, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.9970674486803519, |
|
"grad_norm": 0.6202670688713676, |
|
"learning_rate": 3.5134513334200697e-09, |
|
"loss": 0.9303, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.9990224828934506, |
|
"eval_loss": 3.596351385116577, |
|
"eval_runtime": 2.2495, |
|
"eval_samples_per_second": 2.667, |
|
"eval_steps_per_second": 0.445, |
|
"step": 511 |
|
}, |
|
{ |
|
"epoch": 0.9990224828934506, |
|
"step": 511, |
|
"total_flos": 26722078556160.0, |
|
"train_loss": 1.3314139091805235, |
|
"train_runtime": 8881.3456, |
|
"train_samples_per_second": 1.842, |
|
"train_steps_per_second": 0.058 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 511, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 26722078556160.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|