|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.9381931732529343, |
|
"eval_steps": 500, |
|
"global_step": 4500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.004307095940562076, |
|
"grad_norm": 9.035698890686035, |
|
"learning_rate": 5.730659025787966e-07, |
|
"loss": 1.2789, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.008614191881124151, |
|
"grad_norm": 6.42362642288208, |
|
"learning_rate": 1.1461318051575932e-06, |
|
"loss": 1.1797, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.012921287821686228, |
|
"grad_norm": 1.6813161373138428, |
|
"learning_rate": 1.7191977077363897e-06, |
|
"loss": 0.9736, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.017228383762248303, |
|
"grad_norm": 0.9371042251586914, |
|
"learning_rate": 2.2922636103151864e-06, |
|
"loss": 0.871, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.02153547970281038, |
|
"grad_norm": 0.7695503234863281, |
|
"learning_rate": 2.865329512893983e-06, |
|
"loss": 0.8025, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.025842575643372456, |
|
"grad_norm": 0.716374397277832, |
|
"learning_rate": 3.4383954154727795e-06, |
|
"loss": 0.7698, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.030149671583934532, |
|
"grad_norm": 0.6864265203475952, |
|
"learning_rate": 4.011461318051576e-06, |
|
"loss": 0.7701, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.034456767524496605, |
|
"grad_norm": 0.7581704258918762, |
|
"learning_rate": 4.584527220630373e-06, |
|
"loss": 0.7619, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.03876386346505868, |
|
"grad_norm": 0.6616791486740112, |
|
"learning_rate": 5.157593123209169e-06, |
|
"loss": 0.7296, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.04307095940562076, |
|
"grad_norm": 0.6397051811218262, |
|
"learning_rate": 5.730659025787966e-06, |
|
"loss": 0.7453, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.047378055346182835, |
|
"grad_norm": 0.6572911143302917, |
|
"learning_rate": 6.303724928366762e-06, |
|
"loss": 0.767, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.05168515128674491, |
|
"grad_norm": 0.669222354888916, |
|
"learning_rate": 6.876790830945559e-06, |
|
"loss": 0.7369, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.05599224722730699, |
|
"grad_norm": 0.6517964601516724, |
|
"learning_rate": 7.449856733524356e-06, |
|
"loss": 0.7186, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.060299343167869064, |
|
"grad_norm": 0.6209223866462708, |
|
"learning_rate": 8.022922636103152e-06, |
|
"loss": 0.7155, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.06460643910843114, |
|
"grad_norm": 0.6591508388519287, |
|
"learning_rate": 8.595988538681949e-06, |
|
"loss": 0.7289, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.06891353504899321, |
|
"grad_norm": 0.5842370390892029, |
|
"learning_rate": 9.169054441260746e-06, |
|
"loss": 0.7183, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.0732206309895553, |
|
"grad_norm": 0.7117204070091248, |
|
"learning_rate": 9.742120343839543e-06, |
|
"loss": 0.7192, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.07752772693011736, |
|
"grad_norm": 0.6163178086280823, |
|
"learning_rate": 1.0315186246418338e-05, |
|
"loss": 0.7193, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.08183482287067945, |
|
"grad_norm": 0.5932906270027161, |
|
"learning_rate": 1.0888252148997137e-05, |
|
"loss": 0.714, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.08614191881124152, |
|
"grad_norm": 0.5982919335365295, |
|
"learning_rate": 1.1461318051575932e-05, |
|
"loss": 0.7058, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.0904490147518036, |
|
"grad_norm": 0.6208463907241821, |
|
"learning_rate": 1.2034383954154729e-05, |
|
"loss": 0.7189, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.09475611069236567, |
|
"grad_norm": 0.5887411236763, |
|
"learning_rate": 1.2607449856733524e-05, |
|
"loss": 0.7249, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.09906320663292775, |
|
"grad_norm": 0.5963988900184631, |
|
"learning_rate": 1.3180515759312323e-05, |
|
"loss": 0.7293, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.10337030257348982, |
|
"grad_norm": 0.5715692043304443, |
|
"learning_rate": 1.3753581661891118e-05, |
|
"loss": 0.6845, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.1076773985140519, |
|
"grad_norm": 0.639398455619812, |
|
"learning_rate": 1.4326647564469915e-05, |
|
"loss": 0.6994, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.11198449445461398, |
|
"grad_norm": 0.6884477734565735, |
|
"learning_rate": 1.4899713467048712e-05, |
|
"loss": 0.7126, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.11629159039517606, |
|
"grad_norm": 0.6021578907966614, |
|
"learning_rate": 1.5472779369627507e-05, |
|
"loss": 0.7215, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.12059868633573813, |
|
"grad_norm": 0.6716468930244446, |
|
"learning_rate": 1.6045845272206304e-05, |
|
"loss": 0.6969, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.1249057822763002, |
|
"grad_norm": 0.5783571600914001, |
|
"learning_rate": 1.66189111747851e-05, |
|
"loss": 0.7111, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.12921287821686228, |
|
"grad_norm": 0.5546681880950928, |
|
"learning_rate": 1.7191977077363898e-05, |
|
"loss": 0.7, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.13351997415742436, |
|
"grad_norm": 0.5409330129623413, |
|
"learning_rate": 1.7765042979942695e-05, |
|
"loss": 0.696, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.13782707009798642, |
|
"grad_norm": 0.5752865672111511, |
|
"learning_rate": 1.833810888252149e-05, |
|
"loss": 0.6883, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.1421341660385485, |
|
"grad_norm": 0.6340565085411072, |
|
"learning_rate": 1.891117478510029e-05, |
|
"loss": 0.6881, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.1464412619791106, |
|
"grad_norm": 0.5298891067504883, |
|
"learning_rate": 1.9484240687679085e-05, |
|
"loss": 0.6935, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.15074835791967267, |
|
"grad_norm": 0.5659753680229187, |
|
"learning_rate": 1.9999998871916207e-05, |
|
"loss": 0.7103, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.15505545386023473, |
|
"grad_norm": 0.6017744541168213, |
|
"learning_rate": 1.999986350216883e-05, |
|
"loss": 0.6855, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.1593625498007968, |
|
"grad_norm": 0.5426760911941528, |
|
"learning_rate": 1.999950251916212e-05, |
|
"loss": 0.6914, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.1636696457413589, |
|
"grad_norm": 0.5532637238502502, |
|
"learning_rate": 1.999891593104044e-05, |
|
"loss": 0.6895, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.16797674168192098, |
|
"grad_norm": 0.5581168532371521, |
|
"learning_rate": 1.9998103751038177e-05, |
|
"loss": 0.6897, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.17228383762248303, |
|
"grad_norm": 0.5208210945129395, |
|
"learning_rate": 1.9997065997479442e-05, |
|
"loss": 0.6889, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.17659093356304512, |
|
"grad_norm": 0.5863595604896545, |
|
"learning_rate": 1.9995802693777644e-05, |
|
"loss": 0.6905, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.1808980295036072, |
|
"grad_norm": 0.5605342984199524, |
|
"learning_rate": 1.9994313868434988e-05, |
|
"loss": 0.6815, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.18520512544416926, |
|
"grad_norm": 0.5580301880836487, |
|
"learning_rate": 1.9992599555041798e-05, |
|
"loss": 0.7067, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.18951222138473134, |
|
"grad_norm": 0.558312177658081, |
|
"learning_rate": 1.999065979227579e-05, |
|
"loss": 0.7061, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.19381931732529342, |
|
"grad_norm": 0.5273975133895874, |
|
"learning_rate": 1.998849462390118e-05, |
|
"loss": 0.6905, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.1981264132658555, |
|
"grad_norm": 0.4772217571735382, |
|
"learning_rate": 1.9986104098767703e-05, |
|
"loss": 0.686, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.20243350920641756, |
|
"grad_norm": 0.5336763858795166, |
|
"learning_rate": 1.9983488270809515e-05, |
|
"loss": 0.6861, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.20674060514697964, |
|
"grad_norm": 0.4961983859539032, |
|
"learning_rate": 1.9980647199043966e-05, |
|
"loss": 0.6882, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.21104770108754173, |
|
"grad_norm": 0.5408128499984741, |
|
"learning_rate": 1.9977580947570275e-05, |
|
"loss": 0.7001, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.2153547970281038, |
|
"grad_norm": 0.5350680351257324, |
|
"learning_rate": 1.997428958556809e-05, |
|
"loss": 0.6931, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.21966189296866587, |
|
"grad_norm": 0.5455281734466553, |
|
"learning_rate": 1.9970773187295917e-05, |
|
"loss": 0.6919, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.22396898890922795, |
|
"grad_norm": 0.524664580821991, |
|
"learning_rate": 1.9967031832089438e-05, |
|
"loss": 0.6738, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.22827608484979003, |
|
"grad_norm": 0.48598727583885193, |
|
"learning_rate": 1.9963065604359746e-05, |
|
"loss": 0.6678, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.23258318079035212, |
|
"grad_norm": 0.5560494065284729, |
|
"learning_rate": 1.9958874593591418e-05, |
|
"loss": 0.694, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.23689027673091417, |
|
"grad_norm": 0.5516777038574219, |
|
"learning_rate": 1.99544588943405e-05, |
|
"loss": 0.6715, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.24119737267147626, |
|
"grad_norm": 0.5097941756248474, |
|
"learning_rate": 1.9949818606232393e-05, |
|
"loss": 0.6782, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.24550446861203834, |
|
"grad_norm": 0.5353350639343262, |
|
"learning_rate": 1.9944953833959567e-05, |
|
"loss": 0.6904, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.2498115645526004, |
|
"grad_norm": 0.5160298943519592, |
|
"learning_rate": 1.9939864687279237e-05, |
|
"loss": 0.6756, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.2541186604931625, |
|
"grad_norm": 0.5377163887023926, |
|
"learning_rate": 1.993455128101087e-05, |
|
"loss": 0.712, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.25842575643372456, |
|
"grad_norm": 0.47318100929260254, |
|
"learning_rate": 1.992901373503359e-05, |
|
"loss": 0.6648, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.2627328523742866, |
|
"grad_norm": 0.4977729916572571, |
|
"learning_rate": 1.992325217428348e-05, |
|
"loss": 0.6893, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.26703994831484873, |
|
"grad_norm": 0.5569038391113281, |
|
"learning_rate": 1.991726672875077e-05, |
|
"loss": 0.6876, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.2713470442554108, |
|
"grad_norm": 0.544884443283081, |
|
"learning_rate": 1.9911057533476884e-05, |
|
"loss": 0.6736, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.27565414019597284, |
|
"grad_norm": 0.5159808993339539, |
|
"learning_rate": 1.9904624728551417e-05, |
|
"loss": 0.674, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.27996123613653495, |
|
"grad_norm": 0.48680537939071655, |
|
"learning_rate": 1.989796845910896e-05, |
|
"loss": 0.6903, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.284268332077097, |
|
"grad_norm": 0.527867317199707, |
|
"learning_rate": 1.9891088875325827e-05, |
|
"loss": 0.6693, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.2885754280176591, |
|
"grad_norm": 0.5441365838050842, |
|
"learning_rate": 1.988398613241666e-05, |
|
"loss": 0.6721, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.2928825239582212, |
|
"grad_norm": 0.5693966150283813, |
|
"learning_rate": 1.9876660390630954e-05, |
|
"loss": 0.6684, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.29718961989878323, |
|
"grad_norm": 0.5607503652572632, |
|
"learning_rate": 1.986911181524941e-05, |
|
"loss": 0.6783, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.30149671583934534, |
|
"grad_norm": 0.5421719551086426, |
|
"learning_rate": 1.9861340576580225e-05, |
|
"loss": 0.6658, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.3058038117799074, |
|
"grad_norm": 0.497612863779068, |
|
"learning_rate": 1.9853346849955236e-05, |
|
"loss": 0.6816, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.31011090772046945, |
|
"grad_norm": 0.5503632426261902, |
|
"learning_rate": 1.984513081572598e-05, |
|
"loss": 0.6663, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.31441800366103156, |
|
"grad_norm": 0.5319767594337463, |
|
"learning_rate": 1.983669265925961e-05, |
|
"loss": 0.6513, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.3187250996015936, |
|
"grad_norm": 0.5350950956344604, |
|
"learning_rate": 1.9828032570934726e-05, |
|
"loss": 0.6699, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.3230321955421557, |
|
"grad_norm": 0.5330127477645874, |
|
"learning_rate": 1.9819150746137067e-05, |
|
"loss": 0.6786, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.3273392914827178, |
|
"grad_norm": 0.4740910232067108, |
|
"learning_rate": 1.981004738525512e-05, |
|
"loss": 0.6867, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.33164638742327984, |
|
"grad_norm": 0.5131900906562805, |
|
"learning_rate": 1.980072269367557e-05, |
|
"loss": 0.6618, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.33595348336384195, |
|
"grad_norm": 0.4712623059749603, |
|
"learning_rate": 1.97911768817787e-05, |
|
"loss": 0.6863, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.340260579304404, |
|
"grad_norm": 0.5240254998207092, |
|
"learning_rate": 1.9781410164933626e-05, |
|
"loss": 0.6941, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.34456767524496607, |
|
"grad_norm": 0.5192612409591675, |
|
"learning_rate": 1.9771422763493434e-05, |
|
"loss": 0.6726, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.3488747711855282, |
|
"grad_norm": 0.4864448010921478, |
|
"learning_rate": 1.9761214902790217e-05, |
|
"loss": 0.6541, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.35318186712609023, |
|
"grad_norm": 0.5248873829841614, |
|
"learning_rate": 1.9750786813129995e-05, |
|
"loss": 0.6713, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.3574889630666523, |
|
"grad_norm": 0.5010212659835815, |
|
"learning_rate": 1.9740138729787505e-05, |
|
"loss": 0.6793, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.3617960590072144, |
|
"grad_norm": 0.4966225326061249, |
|
"learning_rate": 1.9729270893000913e-05, |
|
"loss": 0.6692, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.36610315494777645, |
|
"grad_norm": 0.48576685786247253, |
|
"learning_rate": 1.9718183547966366e-05, |
|
"loss": 0.6812, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.3704102508883385, |
|
"grad_norm": 0.5232109427452087, |
|
"learning_rate": 1.9706876944832486e-05, |
|
"loss": 0.6567, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.3747173468289006, |
|
"grad_norm": 0.4847777485847473, |
|
"learning_rate": 1.9695351338694713e-05, |
|
"loss": 0.6638, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.3790244427694627, |
|
"grad_norm": 0.49412795901298523, |
|
"learning_rate": 1.9683606989589553e-05, |
|
"loss": 0.6731, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.3833315387100248, |
|
"grad_norm": 0.5143546462059021, |
|
"learning_rate": 1.9671644162488716e-05, |
|
"loss": 0.6779, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.38763863465058684, |
|
"grad_norm": 0.5516107082366943, |
|
"learning_rate": 1.965946312729312e-05, |
|
"loss": 0.6798, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.3919457305911489, |
|
"grad_norm": 0.5140990018844604, |
|
"learning_rate": 1.9647064158826825e-05, |
|
"loss": 0.6473, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.396252826531711, |
|
"grad_norm": 0.4911974370479584, |
|
"learning_rate": 1.9634447536830815e-05, |
|
"loss": 0.6565, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.40055992247227307, |
|
"grad_norm": 0.4995877742767334, |
|
"learning_rate": 1.9621613545956703e-05, |
|
"loss": 0.6514, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.4048670184128351, |
|
"grad_norm": 0.48752328753471375, |
|
"learning_rate": 1.9608562475760287e-05, |
|
"loss": 0.6751, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.40917411435339723, |
|
"grad_norm": 0.4956004321575165, |
|
"learning_rate": 1.9595294620695036e-05, |
|
"loss": 0.6492, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.4134812102939593, |
|
"grad_norm": 0.48215603828430176, |
|
"learning_rate": 1.958181028010544e-05, |
|
"loss": 0.6741, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.4177883062345214, |
|
"grad_norm": 0.48835939168930054, |
|
"learning_rate": 1.9568109758220253e-05, |
|
"loss": 0.6638, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.42209540217508346, |
|
"grad_norm": 0.47754788398742676, |
|
"learning_rate": 1.9554193364145635e-05, |
|
"loss": 0.6657, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.4264024981156455, |
|
"grad_norm": 0.5080917477607727, |
|
"learning_rate": 1.9540061411858172e-05, |
|
"loss": 0.6675, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.4307095940562076, |
|
"grad_norm": 0.4634297788143158, |
|
"learning_rate": 1.9525714220197802e-05, |
|
"loss": 0.6693, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.4350166899967697, |
|
"grad_norm": 0.4760366678237915, |
|
"learning_rate": 1.951115211286061e-05, |
|
"loss": 0.6721, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.43932378593733173, |
|
"grad_norm": 0.5227916836738586, |
|
"learning_rate": 1.9496375418391525e-05, |
|
"loss": 0.6691, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.44363088187789385, |
|
"grad_norm": 0.5157990455627441, |
|
"learning_rate": 1.948138447017692e-05, |
|
"loss": 0.6774, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.4479379778184559, |
|
"grad_norm": 0.49596408009529114, |
|
"learning_rate": 1.9466179606437087e-05, |
|
"loss": 0.6313, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.45224507375901796, |
|
"grad_norm": 0.47041237354278564, |
|
"learning_rate": 1.945076117021859e-05, |
|
"loss": 0.6724, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.45655216969958007, |
|
"grad_norm": 0.5206364989280701, |
|
"learning_rate": 1.9435129509386538e-05, |
|
"loss": 0.6843, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.4608592656401421, |
|
"grad_norm": 0.5067657828330994, |
|
"learning_rate": 1.9419284976616745e-05, |
|
"loss": 0.6649, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.46516636158070424, |
|
"grad_norm": 1.3445152044296265, |
|
"learning_rate": 1.9403227929387756e-05, |
|
"loss": 0.6548, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.4694734575212663, |
|
"grad_norm": 0.5465224385261536, |
|
"learning_rate": 1.93869587299728e-05, |
|
"loss": 0.6427, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.47378055346182835, |
|
"grad_norm": 0.49137911200523376, |
|
"learning_rate": 1.9370477745431587e-05, |
|
"loss": 0.6519, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.47808764940239046, |
|
"grad_norm": 0.48190736770629883, |
|
"learning_rate": 1.935378534760206e-05, |
|
"loss": 0.6615, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.4823947453429525, |
|
"grad_norm": 0.4869353771209717, |
|
"learning_rate": 1.9336881913091992e-05, |
|
"loss": 0.65, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.48670184128351457, |
|
"grad_norm": 0.4473590552806854, |
|
"learning_rate": 1.931976782327048e-05, |
|
"loss": 0.6821, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.4910089372240767, |
|
"grad_norm": 0.4703207314014435, |
|
"learning_rate": 1.9302443464259352e-05, |
|
"loss": 0.657, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.49531603316463874, |
|
"grad_norm": 0.48172295093536377, |
|
"learning_rate": 1.9284909226924457e-05, |
|
"loss": 0.6581, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.4996231291052008, |
|
"grad_norm": 0.4986841082572937, |
|
"learning_rate": 1.9267165506866835e-05, |
|
"loss": 0.664, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.5039302250457629, |
|
"grad_norm": 0.4936910569667816, |
|
"learning_rate": 1.9249212704413803e-05, |
|
"loss": 0.6409, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.508237320986325, |
|
"grad_norm": 0.48618724942207336, |
|
"learning_rate": 1.9231051224609918e-05, |
|
"loss": 0.6566, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.512544416926887, |
|
"grad_norm": 0.5300356149673462, |
|
"learning_rate": 1.921268147720784e-05, |
|
"loss": 0.6533, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.5168515128674491, |
|
"grad_norm": 0.4799743890762329, |
|
"learning_rate": 1.919410387665908e-05, |
|
"loss": 0.6677, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.5211586088080112, |
|
"grad_norm": 0.5317394137382507, |
|
"learning_rate": 1.9175318842104667e-05, |
|
"loss": 0.6464, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.5254657047485732, |
|
"grad_norm": 0.49199768900871277, |
|
"learning_rate": 1.9156326797365665e-05, |
|
"loss": 0.6655, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.5297728006891353, |
|
"grad_norm": 0.4916874170303345, |
|
"learning_rate": 1.913712817093364e-05, |
|
"loss": 0.6372, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.5340798966296975, |
|
"grad_norm": 0.48562970757484436, |
|
"learning_rate": 1.9117723395960972e-05, |
|
"loss": 0.6639, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.5383869925702595, |
|
"grad_norm": 0.5152992010116577, |
|
"learning_rate": 1.909811291025109e-05, |
|
"loss": 0.6609, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.5426940885108216, |
|
"grad_norm": 0.48352181911468506, |
|
"learning_rate": 1.907829715624859e-05, |
|
"loss": 0.6726, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.5470011844513837, |
|
"grad_norm": 0.5064017176628113, |
|
"learning_rate": 1.905827658102926e-05, |
|
"loss": 0.6698, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.5513082803919457, |
|
"grad_norm": 0.46494290232658386, |
|
"learning_rate": 1.9038051636289997e-05, |
|
"loss": 0.68, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.5556153763325078, |
|
"grad_norm": 0.4788792133331299, |
|
"learning_rate": 1.9017622778338585e-05, |
|
"loss": 0.6501, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.5599224722730699, |
|
"grad_norm": 0.4712987542152405, |
|
"learning_rate": 1.8996990468083448e-05, |
|
"loss": 0.6488, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.5642295682136319, |
|
"grad_norm": 0.4997137784957886, |
|
"learning_rate": 1.8976155171023216e-05, |
|
"loss": 0.6518, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.568536664154194, |
|
"grad_norm": 0.5003030896186829, |
|
"learning_rate": 1.895511735723623e-05, |
|
"loss": 0.6317, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.5728437600947561, |
|
"grad_norm": 0.4551664888858795, |
|
"learning_rate": 1.8933877501369944e-05, |
|
"loss": 0.6634, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.5771508560353182, |
|
"grad_norm": 0.532534122467041, |
|
"learning_rate": 1.891243608263021e-05, |
|
"loss": 0.6656, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.5814579519758802, |
|
"grad_norm": 0.47166600823402405, |
|
"learning_rate": 1.889079358477047e-05, |
|
"loss": 0.657, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.5857650479164423, |
|
"grad_norm": 0.45552805066108704, |
|
"learning_rate": 1.8868950496080832e-05, |
|
"loss": 0.6652, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.5900721438570045, |
|
"grad_norm": 0.5267536044120789, |
|
"learning_rate": 1.884690730937707e-05, |
|
"loss": 0.6463, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.5943792397975665, |
|
"grad_norm": 0.49093228578567505, |
|
"learning_rate": 1.882466452198949e-05, |
|
"loss": 0.6604, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.5986863357381286, |
|
"grad_norm": 0.5105960369110107, |
|
"learning_rate": 1.880222263575172e-05, |
|
"loss": 0.6457, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.6029934316786907, |
|
"grad_norm": 0.47326135635375977, |
|
"learning_rate": 1.8779582156989384e-05, |
|
"loss": 0.6464, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.6073005276192527, |
|
"grad_norm": 0.4910115599632263, |
|
"learning_rate": 1.875674359650867e-05, |
|
"loss": 0.6547, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.6116076235598148, |
|
"grad_norm": 0.48352956771850586, |
|
"learning_rate": 1.873370746958482e-05, |
|
"loss": 0.654, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.6159147195003769, |
|
"grad_norm": 0.4722056984901428, |
|
"learning_rate": 1.871047429595049e-05, |
|
"loss": 0.6372, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.6202218154409389, |
|
"grad_norm": 0.4340212345123291, |
|
"learning_rate": 1.868704459978405e-05, |
|
"loss": 0.6507, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.624528911381501, |
|
"grad_norm": 0.48497867584228516, |
|
"learning_rate": 1.8663418909697723e-05, |
|
"loss": 0.6349, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.6288360073220631, |
|
"grad_norm": 0.4707370102405548, |
|
"learning_rate": 1.863959775872567e-05, |
|
"loss": 0.6445, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.6331431032626251, |
|
"grad_norm": 0.5151925683021545, |
|
"learning_rate": 1.861558168431199e-05, |
|
"loss": 0.6493, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.6374501992031872, |
|
"grad_norm": 0.47226110100746155, |
|
"learning_rate": 1.8591371228298554e-05, |
|
"loss": 0.6211, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.6417572951437494, |
|
"grad_norm": 0.48166829347610474, |
|
"learning_rate": 1.856696693691281e-05, |
|
"loss": 0.6476, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.6460643910843114, |
|
"grad_norm": 0.5039719343185425, |
|
"learning_rate": 1.8542369360755448e-05, |
|
"loss": 0.636, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.6503714870248735, |
|
"grad_norm": 0.45818519592285156, |
|
"learning_rate": 1.8517579054787974e-05, |
|
"loss": 0.658, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.6546785829654356, |
|
"grad_norm": 0.4803057014942169, |
|
"learning_rate": 1.8492596578320194e-05, |
|
"loss": 0.6468, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.6589856789059977, |
|
"grad_norm": 0.480227530002594, |
|
"learning_rate": 1.8467422494997593e-05, |
|
"loss": 0.641, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.6632927748465597, |
|
"grad_norm": 0.49187588691711426, |
|
"learning_rate": 1.844205737278863e-05, |
|
"loss": 0.6572, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.6675998707871218, |
|
"grad_norm": 0.49701517820358276, |
|
"learning_rate": 1.84165017839719e-05, |
|
"loss": 0.6567, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.6719069667276839, |
|
"grad_norm": 0.48368483781814575, |
|
"learning_rate": 1.8390756305123246e-05, |
|
"loss": 0.669, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.6762140626682459, |
|
"grad_norm": 0.5007254481315613, |
|
"learning_rate": 1.836482151710273e-05, |
|
"loss": 0.6448, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.680521158608808, |
|
"grad_norm": 0.44526585936546326, |
|
"learning_rate": 1.8338698005041556e-05, |
|
"loss": 0.6386, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.6848282545493701, |
|
"grad_norm": 0.4812663197517395, |
|
"learning_rate": 1.8312386358328828e-05, |
|
"loss": 0.6447, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.6891353504899321, |
|
"grad_norm": 0.4910503029823303, |
|
"learning_rate": 1.828588717059829e-05, |
|
"loss": 0.6449, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.6934424464304942, |
|
"grad_norm": 0.47431930899620056, |
|
"learning_rate": 1.8259201039714914e-05, |
|
"loss": 0.6372, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.6977495423710564, |
|
"grad_norm": 0.5024338364601135, |
|
"learning_rate": 1.8232328567761416e-05, |
|
"loss": 0.6433, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.7020566383116184, |
|
"grad_norm": 0.47510799765586853, |
|
"learning_rate": 1.820527036102467e-05, |
|
"loss": 0.6601, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.7063637342521805, |
|
"grad_norm": 0.47990313172340393, |
|
"learning_rate": 1.8178027029982027e-05, |
|
"loss": 0.6463, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.7106708301927426, |
|
"grad_norm": 0.5117030739784241, |
|
"learning_rate": 1.8150599189287553e-05, |
|
"loss": 0.6455, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.7149779261333046, |
|
"grad_norm": 0.4917861819267273, |
|
"learning_rate": 1.8122987457758147e-05, |
|
"loss": 0.6688, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.7192850220738667, |
|
"grad_norm": 0.49872297048568726, |
|
"learning_rate": 1.8095192458359588e-05, |
|
"loss": 0.6513, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.7235921180144288, |
|
"grad_norm": 0.47510796785354614, |
|
"learning_rate": 1.806721481819247e-05, |
|
"loss": 0.649, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.7278992139549908, |
|
"grad_norm": 0.4924173057079315, |
|
"learning_rate": 1.8039055168478074e-05, |
|
"loss": 0.6177, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.7322063098955529, |
|
"grad_norm": 0.4918348789215088, |
|
"learning_rate": 1.8010714144544104e-05, |
|
"loss": 0.6543, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.736513405836115, |
|
"grad_norm": 0.45298415422439575, |
|
"learning_rate": 1.7982192385810372e-05, |
|
"loss": 0.6367, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.740820501776677, |
|
"grad_norm": 0.46879851818084717, |
|
"learning_rate": 1.795349053577435e-05, |
|
"loss": 0.6414, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.7451275977172391, |
|
"grad_norm": 0.4573706388473511, |
|
"learning_rate": 1.7924609241996672e-05, |
|
"loss": 0.628, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.7494346936578012, |
|
"grad_norm": 0.46929094195365906, |
|
"learning_rate": 1.7895549156086514e-05, |
|
"loss": 0.6478, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.7537417895983634, |
|
"grad_norm": 0.5428628325462341, |
|
"learning_rate": 1.78663109336869e-05, |
|
"loss": 0.6405, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.7580488855389254, |
|
"grad_norm": 0.47853079438209534, |
|
"learning_rate": 1.78368952344599e-05, |
|
"loss": 0.6442, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.7623559814794875, |
|
"grad_norm": 0.46747061610221863, |
|
"learning_rate": 1.7807302722071742e-05, |
|
"loss": 0.6369, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.7666630774200496, |
|
"grad_norm": 0.5107671022415161, |
|
"learning_rate": 1.7777534064177864e-05, |
|
"loss": 0.6322, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.7709701733606116, |
|
"grad_norm": 0.5013517141342163, |
|
"learning_rate": 1.7747589932407826e-05, |
|
"loss": 0.6384, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.7752772693011737, |
|
"grad_norm": 0.5039073824882507, |
|
"learning_rate": 1.7717471002350162e-05, |
|
"loss": 0.6504, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.7795843652417358, |
|
"grad_norm": 0.4767347276210785, |
|
"learning_rate": 1.7687177953537148e-05, |
|
"loss": 0.645, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.7838914611822978, |
|
"grad_norm": 0.4766087532043457, |
|
"learning_rate": 1.7656711469429464e-05, |
|
"loss": 0.6249, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.7881985571228599, |
|
"grad_norm": 0.5031486749649048, |
|
"learning_rate": 1.7626072237400764e-05, |
|
"loss": 0.6263, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.792505653063422, |
|
"grad_norm": 0.444658488035202, |
|
"learning_rate": 1.759526094872219e-05, |
|
"loss": 0.6561, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.796812749003984, |
|
"grad_norm": 0.5070600509643555, |
|
"learning_rate": 1.7564278298546758e-05, |
|
"loss": 0.6477, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.8011198449445461, |
|
"grad_norm": 0.45487794280052185, |
|
"learning_rate": 1.753312498589367e-05, |
|
"loss": 0.6257, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.8054269408851082, |
|
"grad_norm": 0.4745471477508545, |
|
"learning_rate": 1.7501801713632568e-05, |
|
"loss": 0.6586, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.8097340368256702, |
|
"grad_norm": 0.4743909537792206, |
|
"learning_rate": 1.7470309188467645e-05, |
|
"loss": 0.6255, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.8140411327662324, |
|
"grad_norm": 0.5165956020355225, |
|
"learning_rate": 1.7438648120921736e-05, |
|
"loss": 0.6592, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.8183482287067945, |
|
"grad_norm": 0.455861359834671, |
|
"learning_rate": 1.740681922532025e-05, |
|
"loss": 0.6467, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.8226553246473565, |
|
"grad_norm": 0.468013733625412, |
|
"learning_rate": 1.7374823219775073e-05, |
|
"loss": 0.6382, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.8269624205879186, |
|
"grad_norm": 0.46119919419288635, |
|
"learning_rate": 1.7342660826168374e-05, |
|
"loss": 0.6437, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.8312695165284807, |
|
"grad_norm": 0.4399983286857605, |
|
"learning_rate": 1.73103327701363e-05, |
|
"loss": 0.6379, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.8355766124690428, |
|
"grad_norm": 0.46829739212989807, |
|
"learning_rate": 1.7277839781052617e-05, |
|
"loss": 0.6402, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.8398837084096048, |
|
"grad_norm": 0.5193459987640381, |
|
"learning_rate": 1.7245182592012248e-05, |
|
"loss": 0.6348, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.8441908043501669, |
|
"grad_norm": 0.5310715436935425, |
|
"learning_rate": 1.7212361939814735e-05, |
|
"loss": 0.6351, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.848497900290729, |
|
"grad_norm": 0.4883059561252594, |
|
"learning_rate": 1.7179378564947615e-05, |
|
"loss": 0.6401, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.852804996231291, |
|
"grad_norm": 0.5028474926948547, |
|
"learning_rate": 1.7146233211569723e-05, |
|
"loss": 0.6559, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.8571120921718531, |
|
"grad_norm": 0.48668941855430603, |
|
"learning_rate": 1.7112926627494385e-05, |
|
"loss": 0.6572, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.8614191881124152, |
|
"grad_norm": 0.4668605327606201, |
|
"learning_rate": 1.7079459564172555e-05, |
|
"loss": 0.6321, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.8657262840529772, |
|
"grad_norm": 0.4556910991668701, |
|
"learning_rate": 1.7045832776675863e-05, |
|
"loss": 0.6268, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.8700333799935394, |
|
"grad_norm": 0.45260846614837646, |
|
"learning_rate": 1.701204702367958e-05, |
|
"loss": 0.6271, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.8743404759341015, |
|
"grad_norm": 0.4828309714794159, |
|
"learning_rate": 1.6978103067445494e-05, |
|
"loss": 0.6351, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.8786475718746635, |
|
"grad_norm": 0.4691152274608612, |
|
"learning_rate": 1.6944001673804723e-05, |
|
"loss": 0.6512, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.8829546678152256, |
|
"grad_norm": 0.4812765419483185, |
|
"learning_rate": 1.6909743612140417e-05, |
|
"loss": 0.6335, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.8872617637557877, |
|
"grad_norm": 0.4415755867958069, |
|
"learning_rate": 1.687532965537043e-05, |
|
"loss": 0.6541, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.8915688596963497, |
|
"grad_norm": 0.4993227422237396, |
|
"learning_rate": 1.6840760579929846e-05, |
|
"loss": 0.6318, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.8958759556369118, |
|
"grad_norm": 0.4628779888153076, |
|
"learning_rate": 1.6806037165753498e-05, |
|
"loss": 0.6369, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.9001830515774739, |
|
"grad_norm": 0.5235878229141235, |
|
"learning_rate": 1.677116019625834e-05, |
|
"loss": 0.6415, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.9044901475180359, |
|
"grad_norm": 0.4750138819217682, |
|
"learning_rate": 1.6736130458325793e-05, |
|
"loss": 0.6101, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.908797243458598, |
|
"grad_norm": 0.5292583107948303, |
|
"learning_rate": 1.6700948742283977e-05, |
|
"loss": 0.6248, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.9131043393991601, |
|
"grad_norm": 0.45959070324897766, |
|
"learning_rate": 1.6665615841889885e-05, |
|
"loss": 0.6339, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.9174114353397222, |
|
"grad_norm": 0.48287901282310486, |
|
"learning_rate": 1.6630132554311486e-05, |
|
"loss": 0.6161, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.9217185312802842, |
|
"grad_norm": 0.4725618064403534, |
|
"learning_rate": 1.6594499680109722e-05, |
|
"loss": 0.627, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.9260256272208464, |
|
"grad_norm": 0.4820912778377533, |
|
"learning_rate": 1.6558718023220457e-05, |
|
"loss": 0.6399, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.9303327231614085, |
|
"grad_norm": 0.48815685510635376, |
|
"learning_rate": 1.6522788390936328e-05, |
|
"loss": 0.6437, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.9346398191019705, |
|
"grad_norm": 0.4747340679168701, |
|
"learning_rate": 1.648671159388855e-05, |
|
"loss": 0.6455, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.9389469150425326, |
|
"grad_norm": 0.4894673526287079, |
|
"learning_rate": 1.6450488446028612e-05, |
|
"loss": 0.6545, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.9432540109830947, |
|
"grad_norm": 0.4756160080432892, |
|
"learning_rate": 1.641411976460991e-05, |
|
"loss": 0.6498, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.9475611069236567, |
|
"grad_norm": 0.45228078961372375, |
|
"learning_rate": 1.637760637016932e-05, |
|
"loss": 0.6438, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.9518682028642188, |
|
"grad_norm": 0.49898287653923035, |
|
"learning_rate": 1.6340949086508676e-05, |
|
"loss": 0.6518, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.9561752988047809, |
|
"grad_norm": 0.4354493021965027, |
|
"learning_rate": 1.6304148740676204e-05, |
|
"loss": 0.6125, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.9604823947453429, |
|
"grad_norm": 0.45118704438209534, |
|
"learning_rate": 1.6267206162947823e-05, |
|
"loss": 0.6146, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.964789490685905, |
|
"grad_norm": 0.4822487533092499, |
|
"learning_rate": 1.6230122186808443e-05, |
|
"loss": 0.6425, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.9690965866264671, |
|
"grad_norm": 0.490903377532959, |
|
"learning_rate": 1.619289764893317e-05, |
|
"loss": 0.6353, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.9734036825670291, |
|
"grad_norm": 0.4738866686820984, |
|
"learning_rate": 1.615553338916839e-05, |
|
"loss": 0.6315, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.9777107785075912, |
|
"grad_norm": 0.46285027265548706, |
|
"learning_rate": 1.6118030250512863e-05, |
|
"loss": 0.6501, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.9820178744481534, |
|
"grad_norm": 0.46414172649383545, |
|
"learning_rate": 1.6080389079098657e-05, |
|
"loss": 0.6501, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.9863249703887154, |
|
"grad_norm": 0.5042113661766052, |
|
"learning_rate": 1.604261072417211e-05, |
|
"loss": 0.6319, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.9906320663292775, |
|
"grad_norm": 0.43653419613838196, |
|
"learning_rate": 1.600469603807464e-05, |
|
"loss": 0.6461, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.9949391622698396, |
|
"grad_norm": 0.4572006165981293, |
|
"learning_rate": 1.5966645876223505e-05, |
|
"loss": 0.6477, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.9992462582104016, |
|
"grad_norm": 0.43867436051368713, |
|
"learning_rate": 1.5928461097092532e-05, |
|
"loss": 0.6288, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 1.0035533541509638, |
|
"grad_norm": 0.5620077848434448, |
|
"learning_rate": 1.589014256219273e-05, |
|
"loss": 0.5378, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 1.0078604500915258, |
|
"grad_norm": 0.4836018681526184, |
|
"learning_rate": 1.5851691136052842e-05, |
|
"loss": 0.5421, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 1.0121675460320878, |
|
"grad_norm": 0.49632197618484497, |
|
"learning_rate": 1.581310768619988e-05, |
|
"loss": 0.5237, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 1.01647464197265, |
|
"grad_norm": 0.49445948004722595, |
|
"learning_rate": 1.5774393083139513e-05, |
|
"loss": 0.5313, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 1.020781737913212, |
|
"grad_norm": 0.5299666523933411, |
|
"learning_rate": 1.5735548200336435e-05, |
|
"loss": 0.5326, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 1.025088833853774, |
|
"grad_norm": 0.5012844204902649, |
|
"learning_rate": 1.569657391419468e-05, |
|
"loss": 0.5401, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 1.0293959297943363, |
|
"grad_norm": 0.4741289019584656, |
|
"learning_rate": 1.565747110403781e-05, |
|
"loss": 0.5052, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 1.0337030257348983, |
|
"grad_norm": 0.4950823485851288, |
|
"learning_rate": 1.5618240652089123e-05, |
|
"loss": 0.5294, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.0380101216754603, |
|
"grad_norm": 0.4934958517551422, |
|
"learning_rate": 1.557888344345171e-05, |
|
"loss": 0.5278, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 1.0423172176160225, |
|
"grad_norm": 0.467101514339447, |
|
"learning_rate": 1.5539400366088503e-05, |
|
"loss": 0.504, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 1.0466243135565845, |
|
"grad_norm": 0.5479716062545776, |
|
"learning_rate": 1.5499792310802238e-05, |
|
"loss": 0.5256, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 1.0509314094971465, |
|
"grad_norm": 0.4706737697124481, |
|
"learning_rate": 1.5460060171215362e-05, |
|
"loss": 0.5251, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 1.0552385054377087, |
|
"grad_norm": 0.5142565965652466, |
|
"learning_rate": 1.5420204843749857e-05, |
|
"loss": 0.5333, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 1.0595456013782707, |
|
"grad_norm": 0.5430694222450256, |
|
"learning_rate": 1.5380227227607032e-05, |
|
"loss": 0.5391, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 1.0638526973188327, |
|
"grad_norm": 0.4780258536338806, |
|
"learning_rate": 1.5340128224747225e-05, |
|
"loss": 0.5338, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 1.068159793259395, |
|
"grad_norm": 0.47647717595100403, |
|
"learning_rate": 1.5299908739869464e-05, |
|
"loss": 0.5178, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 1.072466889199957, |
|
"grad_norm": 0.5330241918563843, |
|
"learning_rate": 1.525956968039103e-05, |
|
"loss": 0.5027, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 1.076773985140519, |
|
"grad_norm": 0.4681854546070099, |
|
"learning_rate": 1.5219111956427027e-05, |
|
"loss": 0.5315, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.0810810810810811, |
|
"grad_norm": 0.5060921311378479, |
|
"learning_rate": 1.5178536480769803e-05, |
|
"loss": 0.5103, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 1.0853881770216431, |
|
"grad_norm": 0.497199147939682, |
|
"learning_rate": 1.5137844168868391e-05, |
|
"loss": 0.5302, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 1.0896952729622051, |
|
"grad_norm": 0.4658927321434021, |
|
"learning_rate": 1.5097035938807834e-05, |
|
"loss": 0.5196, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 1.0940023689027674, |
|
"grad_norm": 0.5109249353408813, |
|
"learning_rate": 1.5056112711288475e-05, |
|
"loss": 0.5099, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 1.0983094648433294, |
|
"grad_norm": 0.5212246775627136, |
|
"learning_rate": 1.5015075409605189e-05, |
|
"loss": 0.4911, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 1.1026165607838914, |
|
"grad_norm": 0.47850698232650757, |
|
"learning_rate": 1.497392495962656e-05, |
|
"loss": 0.5225, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 1.1069236567244536, |
|
"grad_norm": 0.4982755184173584, |
|
"learning_rate": 1.4932662289773969e-05, |
|
"loss": 0.5278, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 1.1112307526650156, |
|
"grad_norm": 0.49975791573524475, |
|
"learning_rate": 1.4891288331000668e-05, |
|
"loss": 0.5261, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 1.1155378486055776, |
|
"grad_norm": 0.5002388954162598, |
|
"learning_rate": 1.484980401677077e-05, |
|
"loss": 0.5313, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 1.1198449445461398, |
|
"grad_norm": 0.4950617253780365, |
|
"learning_rate": 1.4808210283038183e-05, |
|
"loss": 0.5286, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.1241520404867018, |
|
"grad_norm": 0.49831753969192505, |
|
"learning_rate": 1.47665080682255e-05, |
|
"loss": 0.5133, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 1.128459136427264, |
|
"grad_norm": 0.6730148792266846, |
|
"learning_rate": 1.4724698313202825e-05, |
|
"loss": 0.5224, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 1.132766232367826, |
|
"grad_norm": 0.5355139374732971, |
|
"learning_rate": 1.4682781961266546e-05, |
|
"loss": 0.5188, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 1.137073328308388, |
|
"grad_norm": 0.5199829936027527, |
|
"learning_rate": 1.4640759958118045e-05, |
|
"loss": 0.5121, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 1.14138042424895, |
|
"grad_norm": 0.5292408466339111, |
|
"learning_rate": 1.4598633251842373e-05, |
|
"loss": 0.5267, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 1.1456875201895123, |
|
"grad_norm": 0.5363121032714844, |
|
"learning_rate": 1.4556402792886856e-05, |
|
"loss": 0.5147, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 1.1499946161300743, |
|
"grad_norm": 0.5359490513801575, |
|
"learning_rate": 1.4514069534039649e-05, |
|
"loss": 0.5155, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 1.1543017120706365, |
|
"grad_norm": 0.4707220792770386, |
|
"learning_rate": 1.4471634430408244e-05, |
|
"loss": 0.5419, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 1.1586088080111985, |
|
"grad_norm": 0.4798811376094818, |
|
"learning_rate": 1.4429098439397901e-05, |
|
"loss": 0.5152, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 1.1629159039517605, |
|
"grad_norm": 0.4730081260204315, |
|
"learning_rate": 1.4386462520690087e-05, |
|
"loss": 0.5283, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 1.1672229998923225, |
|
"grad_norm": 0.524276614189148, |
|
"learning_rate": 1.4343727636220785e-05, |
|
"loss": 0.5087, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 1.1715300958328847, |
|
"grad_norm": 0.5093454122543335, |
|
"learning_rate": 1.430089475015882e-05, |
|
"loss": 0.5371, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 1.1758371917734467, |
|
"grad_norm": 0.5228180289268494, |
|
"learning_rate": 1.4257964828884077e-05, |
|
"loss": 0.5121, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 1.180144287714009, |
|
"grad_norm": 0.5263434052467346, |
|
"learning_rate": 1.4214938840965729e-05, |
|
"loss": 0.5104, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 1.184451383654571, |
|
"grad_norm": 0.5519675612449646, |
|
"learning_rate": 1.417181775714036e-05, |
|
"loss": 0.5081, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 1.188758479595133, |
|
"grad_norm": 0.48901626467704773, |
|
"learning_rate": 1.4128602550290078e-05, |
|
"loss": 0.5332, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 1.1930655755356951, |
|
"grad_norm": 0.5022098422050476, |
|
"learning_rate": 1.4085294195420563e-05, |
|
"loss": 0.5267, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 1.1973726714762571, |
|
"grad_norm": 0.5244942307472229, |
|
"learning_rate": 1.4041893669639053e-05, |
|
"loss": 0.5309, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 1.2016797674168191, |
|
"grad_norm": 0.5060109496116638, |
|
"learning_rate": 1.399840195213233e-05, |
|
"loss": 0.509, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 1.2059868633573814, |
|
"grad_norm": 0.48709142208099365, |
|
"learning_rate": 1.3954820024144595e-05, |
|
"loss": 0.5249, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 1.2102939592979434, |
|
"grad_norm": 0.48755279183387756, |
|
"learning_rate": 1.3911148868955357e-05, |
|
"loss": 0.5216, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 1.2146010552385054, |
|
"grad_norm": 0.4871668219566345, |
|
"learning_rate": 1.3867389471857229e-05, |
|
"loss": 0.5199, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 1.2189081511790676, |
|
"grad_norm": 0.5313363671302795, |
|
"learning_rate": 1.3823542820133706e-05, |
|
"loss": 0.5146, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 1.2232152471196296, |
|
"grad_norm": 0.48473960161209106, |
|
"learning_rate": 1.3779609903036894e-05, |
|
"loss": 0.5126, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 1.2275223430601916, |
|
"grad_norm": 0.5411814451217651, |
|
"learning_rate": 1.3735591711765189e-05, |
|
"loss": 0.5186, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 1.2318294390007538, |
|
"grad_norm": 0.5286210775375366, |
|
"learning_rate": 1.3691489239440899e-05, |
|
"loss": 0.513, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 1.2361365349413158, |
|
"grad_norm": 0.47112423181533813, |
|
"learning_rate": 1.3647303481087858e-05, |
|
"loss": 0.5268, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 1.2404436308818778, |
|
"grad_norm": 0.5465208888053894, |
|
"learning_rate": 1.3603035433608977e-05, |
|
"loss": 0.5109, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 1.24475072682244, |
|
"grad_norm": 0.4758882522583008, |
|
"learning_rate": 1.3558686095763732e-05, |
|
"loss": 0.5307, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 1.249057822763002, |
|
"grad_norm": 0.5721794962882996, |
|
"learning_rate": 1.3514256468145645e-05, |
|
"loss": 0.5104, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 1.2533649187035643, |
|
"grad_norm": 0.5125982761383057, |
|
"learning_rate": 1.3469747553159714e-05, |
|
"loss": 0.5278, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 1.2576720146441263, |
|
"grad_norm": 0.5272653698921204, |
|
"learning_rate": 1.342516035499978e-05, |
|
"loss": 0.5276, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 1.2619791105846883, |
|
"grad_norm": 0.5423816442489624, |
|
"learning_rate": 1.3380495879625884e-05, |
|
"loss": 0.5408, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 1.2662862065252503, |
|
"grad_norm": 0.4817509055137634, |
|
"learning_rate": 1.333575513474157e-05, |
|
"loss": 0.5152, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 1.2705933024658125, |
|
"grad_norm": 0.5113592147827148, |
|
"learning_rate": 1.3290939129771143e-05, |
|
"loss": 0.5397, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 1.2749003984063745, |
|
"grad_norm": 0.5106224417686462, |
|
"learning_rate": 1.3246048875836898e-05, |
|
"loss": 0.5269, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 1.2792074943469367, |
|
"grad_norm": 0.5446826219558716, |
|
"learning_rate": 1.3201085385736313e-05, |
|
"loss": 0.5252, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 1.2835145902874987, |
|
"grad_norm": 0.484943151473999, |
|
"learning_rate": 1.3156049673919184e-05, |
|
"loss": 0.525, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 1.2878216862280607, |
|
"grad_norm": 0.5692194700241089, |
|
"learning_rate": 1.3110942756464764e-05, |
|
"loss": 0.5197, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 1.2921287821686227, |
|
"grad_norm": 0.5009827017784119, |
|
"learning_rate": 1.3065765651058802e-05, |
|
"loss": 0.5325, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.296435878109185, |
|
"grad_norm": 0.4953298568725586, |
|
"learning_rate": 1.3020519376970613e-05, |
|
"loss": 0.5095, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 1.300742974049747, |
|
"grad_norm": 0.5116891264915466, |
|
"learning_rate": 1.2975204955030068e-05, |
|
"loss": 0.5263, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 1.3050500699903091, |
|
"grad_norm": 0.4844088554382324, |
|
"learning_rate": 1.2929823407604567e-05, |
|
"loss": 0.5113, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 1.3093571659308711, |
|
"grad_norm": 0.4732029438018799, |
|
"learning_rate": 1.2884375758575967e-05, |
|
"loss": 0.532, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 1.3136642618714331, |
|
"grad_norm": 0.5469485521316528, |
|
"learning_rate": 1.2838863033317484e-05, |
|
"loss": 0.519, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 1.3179713578119951, |
|
"grad_norm": 0.4888254702091217, |
|
"learning_rate": 1.2793286258670565e-05, |
|
"loss": 0.5097, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 1.3222784537525574, |
|
"grad_norm": 0.5359517335891724, |
|
"learning_rate": 1.2747646462921717e-05, |
|
"loss": 0.5246, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 1.3265855496931194, |
|
"grad_norm": 0.5013801455497742, |
|
"learning_rate": 1.2701944675779299e-05, |
|
"loss": 0.524, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 1.3308926456336816, |
|
"grad_norm": 0.49307557940483093, |
|
"learning_rate": 1.2656181928350301e-05, |
|
"loss": 0.5403, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 1.3351997415742436, |
|
"grad_norm": 0.47625210881233215, |
|
"learning_rate": 1.2610359253117078e-05, |
|
"loss": 0.5275, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 1.3395068375148056, |
|
"grad_norm": 0.5096368789672852, |
|
"learning_rate": 1.2564477683914053e-05, |
|
"loss": 0.5231, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 1.3438139334553676, |
|
"grad_norm": 0.4992668926715851, |
|
"learning_rate": 1.2518538255904389e-05, |
|
"loss": 0.5235, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 1.3481210293959298, |
|
"grad_norm": 0.491062194108963, |
|
"learning_rate": 1.2472542005556647e-05, |
|
"loss": 0.5432, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 1.3524281253364918, |
|
"grad_norm": 0.48666131496429443, |
|
"learning_rate": 1.2426489970621385e-05, |
|
"loss": 0.531, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 1.356735221277054, |
|
"grad_norm": 0.4706876575946808, |
|
"learning_rate": 1.2380383190107757e-05, |
|
"loss": 0.5188, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 1.361042317217616, |
|
"grad_norm": 0.4910385310649872, |
|
"learning_rate": 1.2334222704260063e-05, |
|
"loss": 0.5106, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 1.365349413158178, |
|
"grad_norm": 0.506514847278595, |
|
"learning_rate": 1.2288009554534291e-05, |
|
"loss": 0.5292, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 1.36965650909874, |
|
"grad_norm": 0.49671700596809387, |
|
"learning_rate": 1.2241744783574596e-05, |
|
"loss": 0.5284, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 1.3739636050393023, |
|
"grad_norm": 0.4892718195915222, |
|
"learning_rate": 1.219542943518981e-05, |
|
"loss": 0.5215, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 1.3782707009798643, |
|
"grad_norm": 0.5412102937698364, |
|
"learning_rate": 1.2149064554329864e-05, |
|
"loss": 0.5256, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 1.3825777969204265, |
|
"grad_norm": 0.4869970679283142, |
|
"learning_rate": 1.2102651187062227e-05, |
|
"loss": 0.5218, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 1.3868848928609885, |
|
"grad_norm": 0.5195066332817078, |
|
"learning_rate": 1.2056190380548299e-05, |
|
"loss": 0.5269, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 1.3911919888015505, |
|
"grad_norm": 0.5343438982963562, |
|
"learning_rate": 1.2009683183019788e-05, |
|
"loss": 0.5301, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 1.3954990847421127, |
|
"grad_norm": 0.522270679473877, |
|
"learning_rate": 1.1963130643755055e-05, |
|
"loss": 0.545, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 1.3998061806826747, |
|
"grad_norm": 0.501485288143158, |
|
"learning_rate": 1.191653381305545e-05, |
|
"loss": 0.5253, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 1.4041132766232367, |
|
"grad_norm": 0.5288712382316589, |
|
"learning_rate": 1.186989374222161e-05, |
|
"loss": 0.5181, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 1.408420372563799, |
|
"grad_norm": 0.5131502151489258, |
|
"learning_rate": 1.1823211483529733e-05, |
|
"loss": 0.5138, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 1.412727468504361, |
|
"grad_norm": 0.4853404462337494, |
|
"learning_rate": 1.1776488090207852e-05, |
|
"loss": 0.5319, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 1.417034564444923, |
|
"grad_norm": 0.5093010663986206, |
|
"learning_rate": 1.1729724616412062e-05, |
|
"loss": 0.5155, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 1.4213416603854852, |
|
"grad_norm": 0.5078168511390686, |
|
"learning_rate": 1.1682922117202736e-05, |
|
"loss": 0.5206, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 1.4256487563260472, |
|
"grad_norm": 0.5315324664115906, |
|
"learning_rate": 1.163608164852073e-05, |
|
"loss": 0.5314, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 1.4299558522666094, |
|
"grad_norm": 0.4705192446708679, |
|
"learning_rate": 1.1589204267163545e-05, |
|
"loss": 0.4966, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 1.4342629482071714, |
|
"grad_norm": 0.48757535219192505, |
|
"learning_rate": 1.15422910307615e-05, |
|
"loss": 0.5299, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 1.4385700441477334, |
|
"grad_norm": 0.5582148432731628, |
|
"learning_rate": 1.1495342997753864e-05, |
|
"loss": 0.5201, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 1.4428771400882954, |
|
"grad_norm": 0.5134326219558716, |
|
"learning_rate": 1.1448361227364963e-05, |
|
"loss": 0.5061, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 1.4471842360288576, |
|
"grad_norm": 0.5316387414932251, |
|
"learning_rate": 1.1401346779580303e-05, |
|
"loss": 0.5145, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 1.4514913319694196, |
|
"grad_norm": 0.5328738689422607, |
|
"learning_rate": 1.1354300715122637e-05, |
|
"loss": 0.5288, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 1.4557984279099818, |
|
"grad_norm": 0.5279168486595154, |
|
"learning_rate": 1.1307224095428058e-05, |
|
"loss": 0.5031, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 1.4601055238505438, |
|
"grad_norm": 0.5049686431884766, |
|
"learning_rate": 1.1260117982622021e-05, |
|
"loss": 0.5004, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 1.4644126197911058, |
|
"grad_norm": 0.47000184655189514, |
|
"learning_rate": 1.1212983439495392e-05, |
|
"loss": 0.5267, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 1.4687197157316678, |
|
"grad_norm": 0.49505382776260376, |
|
"learning_rate": 1.1165821529480483e-05, |
|
"loss": 0.5278, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 1.47302681167223, |
|
"grad_norm": 0.568454384803772, |
|
"learning_rate": 1.1118633316627037e-05, |
|
"loss": 0.5116, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 1.477333907612792, |
|
"grad_norm": 0.5094279646873474, |
|
"learning_rate": 1.1071419865578241e-05, |
|
"loss": 0.5181, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 1.4816410035533543, |
|
"grad_norm": 0.5605435371398926, |
|
"learning_rate": 1.1024182241546686e-05, |
|
"loss": 0.5191, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 1.4859480994939163, |
|
"grad_norm": 0.49941274523735046, |
|
"learning_rate": 1.097692151029036e-05, |
|
"loss": 0.5036, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 1.4902551954344783, |
|
"grad_norm": 0.5064433813095093, |
|
"learning_rate": 1.0929638738088571e-05, |
|
"loss": 0.5195, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 1.4945622913750403, |
|
"grad_norm": 0.5021061301231384, |
|
"learning_rate": 1.088233499171792e-05, |
|
"loss": 0.522, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 1.4988693873156025, |
|
"grad_norm": 0.5188096761703491, |
|
"learning_rate": 1.0835011338428217e-05, |
|
"loss": 0.5156, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 1.5031764832561645, |
|
"grad_norm": 0.6124559640884399, |
|
"learning_rate": 1.0787668845918393e-05, |
|
"loss": 0.5145, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 1.5074835791967267, |
|
"grad_norm": 0.48937344551086426, |
|
"learning_rate": 1.074030858231244e-05, |
|
"loss": 0.515, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.5117906751372887, |
|
"grad_norm": 0.518526017665863, |
|
"learning_rate": 1.0692931616135283e-05, |
|
"loss": 0.505, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 1.5160977710778507, |
|
"grad_norm": 0.5395667552947998, |
|
"learning_rate": 1.0645539016288686e-05, |
|
"loss": 0.5076, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 1.5204048670184127, |
|
"grad_norm": 0.495190292596817, |
|
"learning_rate": 1.059813185202714e-05, |
|
"loss": 0.523, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 1.524711962958975, |
|
"grad_norm": 0.49644342064857483, |
|
"learning_rate": 1.055071119293373e-05, |
|
"loss": 0.5038, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 1.5290190588995372, |
|
"grad_norm": 0.483696848154068, |
|
"learning_rate": 1.0503278108896e-05, |
|
"loss": 0.5103, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 1.5333261548400992, |
|
"grad_norm": 0.5149986147880554, |
|
"learning_rate": 1.0455833670081831e-05, |
|
"loss": 0.5402, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 1.5376332507806612, |
|
"grad_norm": 0.4734952449798584, |
|
"learning_rate": 1.0408378946915282e-05, |
|
"loss": 0.5292, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 1.5419403467212232, |
|
"grad_norm": 0.5490080118179321, |
|
"learning_rate": 1.0360915010052443e-05, |
|
"loss": 0.5155, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 1.5462474426617852, |
|
"grad_norm": 0.5176838636398315, |
|
"learning_rate": 1.0313442930357278e-05, |
|
"loss": 0.5111, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 1.5505545386023474, |
|
"grad_norm": 0.5659157633781433, |
|
"learning_rate": 1.026596377887747e-05, |
|
"loss": 0.5152, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 1.5548616345429096, |
|
"grad_norm": 0.5195504426956177, |
|
"learning_rate": 1.0218478626820256e-05, |
|
"loss": 0.5178, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 1.5591687304834716, |
|
"grad_norm": 0.533338189125061, |
|
"learning_rate": 1.0170988545528248e-05, |
|
"loss": 0.5138, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 1.5634758264240336, |
|
"grad_norm": 0.5108840465545654, |
|
"learning_rate": 1.0123494606455278e-05, |
|
"loss": 0.5273, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 1.5677829223645956, |
|
"grad_norm": 0.4785379469394684, |
|
"learning_rate": 1.0075997881142208e-05, |
|
"loss": 0.5071, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 1.5720900183051576, |
|
"grad_norm": 0.49497827887535095, |
|
"learning_rate": 1.0028499441192765e-05, |
|
"loss": 0.5132, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 1.5763971142457198, |
|
"grad_norm": 0.5214102864265442, |
|
"learning_rate": 9.981000358249368e-06, |
|
"loss": 0.5133, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 1.580704210186282, |
|
"grad_norm": 0.47462400794029236, |
|
"learning_rate": 9.933501703968928e-06, |
|
"loss": 0.5226, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 1.585011306126844, |
|
"grad_norm": 0.4743979275226593, |
|
"learning_rate": 9.8860045499987e-06, |
|
"loss": 0.5219, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 1.589318402067406, |
|
"grad_norm": 0.5265910625457764, |
|
"learning_rate": 9.838509967952076e-06, |
|
"loss": 0.4945, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 1.593625498007968, |
|
"grad_norm": 0.5075172185897827, |
|
"learning_rate": 9.791019029384437e-06, |
|
"loss": 0.5175, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 1.59793259394853, |
|
"grad_norm": 0.5206677913665771, |
|
"learning_rate": 9.743532805768948e-06, |
|
"loss": 0.5188, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 1.6022396898890923, |
|
"grad_norm": 0.4802674651145935, |
|
"learning_rate": 9.696052368472406e-06, |
|
"loss": 0.5064, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 1.6065467858296545, |
|
"grad_norm": 0.5289535522460938, |
|
"learning_rate": 9.648578788731044e-06, |
|
"loss": 0.5281, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 1.6108538817702165, |
|
"grad_norm": 0.47722700238227844, |
|
"learning_rate": 9.601113137626394e-06, |
|
"loss": 0.5151, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 1.6151609777107785, |
|
"grad_norm": 0.4994152784347534, |
|
"learning_rate": 9.553656486061098e-06, |
|
"loss": 0.52, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 1.6194680736513405, |
|
"grad_norm": 0.48130089044570923, |
|
"learning_rate": 9.506209904734753e-06, |
|
"loss": 0.5336, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 1.6237751695919027, |
|
"grad_norm": 0.48449528217315674, |
|
"learning_rate": 9.45877446411976e-06, |
|
"loss": 0.5252, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 1.6280822655324647, |
|
"grad_norm": 0.5411643981933594, |
|
"learning_rate": 9.411351234437163e-06, |
|
"loss": 0.5187, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 1.632389361473027, |
|
"grad_norm": 0.5133873820304871, |
|
"learning_rate": 9.363941285632507e-06, |
|
"loss": 0.5217, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 1.636696457413589, |
|
"grad_norm": 0.5814666748046875, |
|
"learning_rate": 9.3165456873517e-06, |
|
"loss": 0.5, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 1.641003553354151, |
|
"grad_norm": 0.52715665102005, |
|
"learning_rate": 9.269165508916883e-06, |
|
"loss": 0.5184, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 1.645310649294713, |
|
"grad_norm": 0.48196879029273987, |
|
"learning_rate": 9.221801819302288e-06, |
|
"loss": 0.5191, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 1.6496177452352752, |
|
"grad_norm": 0.49397778511047363, |
|
"learning_rate": 9.174455687110142e-06, |
|
"loss": 0.5013, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 1.6539248411758372, |
|
"grad_norm": 0.5037091970443726, |
|
"learning_rate": 9.127128180546548e-06, |
|
"loss": 0.5298, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 1.6582319371163994, |
|
"grad_norm": 0.5031833052635193, |
|
"learning_rate": 9.079820367397384e-06, |
|
"loss": 0.4929, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 1.6625390330569614, |
|
"grad_norm": 0.5380353927612305, |
|
"learning_rate": 9.032533315004207e-06, |
|
"loss": 0.4968, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 1.6668461289975234, |
|
"grad_norm": 0.5191226005554199, |
|
"learning_rate": 8.98526809024018e-06, |
|
"loss": 0.5267, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 1.6711532249380854, |
|
"grad_norm": 0.5179468393325806, |
|
"learning_rate": 8.938025759486007e-06, |
|
"loss": 0.5159, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 1.6754603208786476, |
|
"grad_norm": 0.4779166579246521, |
|
"learning_rate": 8.89080738860585e-06, |
|
"loss": 0.5211, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 1.6797674168192096, |
|
"grad_norm": 0.5136571526527405, |
|
"learning_rate": 8.843614042923318e-06, |
|
"loss": 0.5003, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 1.6840745127597718, |
|
"grad_norm": 0.540773332118988, |
|
"learning_rate": 8.796446787197383e-06, |
|
"loss": 0.5131, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 1.6883816087003338, |
|
"grad_norm": 0.5126665234565735, |
|
"learning_rate": 8.749306685598409e-06, |
|
"loss": 0.5093, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 1.6926887046408958, |
|
"grad_norm": 0.47659188508987427, |
|
"learning_rate": 8.702194801684112e-06, |
|
"loss": 0.5158, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 1.6969958005814578, |
|
"grad_norm": 0.47945475578308105, |
|
"learning_rate": 8.655112198375564e-06, |
|
"loss": 0.5026, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 1.70130289652202, |
|
"grad_norm": 0.4939498007297516, |
|
"learning_rate": 8.60805993793323e-06, |
|
"loss": 0.5099, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 1.7056099924625823, |
|
"grad_norm": 0.5328351259231567, |
|
"learning_rate": 8.561039081932975e-06, |
|
"loss": 0.52, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 1.7099170884031443, |
|
"grad_norm": 0.49865198135375977, |
|
"learning_rate": 8.514050691242145e-06, |
|
"loss": 0.5077, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 1.7142241843437063, |
|
"grad_norm": 0.49807870388031006, |
|
"learning_rate": 8.467095825995605e-06, |
|
"loss": 0.4976, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 1.7185312802842683, |
|
"grad_norm": 0.5023031234741211, |
|
"learning_rate": 8.420175545571837e-06, |
|
"loss": 0.5233, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 1.7228383762248303, |
|
"grad_norm": 0.49054110050201416, |
|
"learning_rate": 8.373290908569026e-06, |
|
"loss": 0.5115, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.7271454721653925, |
|
"grad_norm": 0.47637811303138733, |
|
"learning_rate": 8.32644297278119e-06, |
|
"loss": 0.5103, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 1.7314525681059547, |
|
"grad_norm": 0.5239661931991577, |
|
"learning_rate": 8.279632795174304e-06, |
|
"loss": 0.5161, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 1.7357596640465167, |
|
"grad_norm": 0.5000544190406799, |
|
"learning_rate": 8.232861431862457e-06, |
|
"loss": 0.5113, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 1.7400667599870787, |
|
"grad_norm": 0.5361005067825317, |
|
"learning_rate": 8.186129938084028e-06, |
|
"loss": 0.5137, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 1.7443738559276407, |
|
"grad_norm": 0.48270535469055176, |
|
"learning_rate": 8.139439368177868e-06, |
|
"loss": 0.5116, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 1.7486809518682027, |
|
"grad_norm": 0.48645904660224915, |
|
"learning_rate": 8.092790775559522e-06, |
|
"loss": 0.517, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 1.752988047808765, |
|
"grad_norm": 0.4865799844264984, |
|
"learning_rate": 8.046185212697459e-06, |
|
"loss": 0.5202, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 1.7572951437493272, |
|
"grad_norm": 0.5095897912979126, |
|
"learning_rate": 7.999623731089327e-06, |
|
"loss": 0.5186, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 1.7616022396898892, |
|
"grad_norm": 0.49918055534362793, |
|
"learning_rate": 7.953107381238226e-06, |
|
"loss": 0.5091, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 1.7659093356304512, |
|
"grad_norm": 0.5209227204322815, |
|
"learning_rate": 7.906637212629011e-06, |
|
"loss": 0.5098, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 1.7702164315710132, |
|
"grad_norm": 0.5320930480957031, |
|
"learning_rate": 7.860214273704614e-06, |
|
"loss": 0.5172, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 1.7745235275115752, |
|
"grad_norm": 0.4841155707836151, |
|
"learning_rate": 7.813839611842387e-06, |
|
"loss": 0.4851, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 1.7788306234521374, |
|
"grad_norm": 0.5300472378730774, |
|
"learning_rate": 7.767514273330473e-06, |
|
"loss": 0.4953, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 1.7831377193926996, |
|
"grad_norm": 0.5021957159042358, |
|
"learning_rate": 7.721239303344201e-06, |
|
"loss": 0.5112, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 1.7874448153332616, |
|
"grad_norm": 0.498737096786499, |
|
"learning_rate": 7.675015745922499e-06, |
|
"loss": 0.5045, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 1.7917519112738236, |
|
"grad_norm": 0.4690532684326172, |
|
"learning_rate": 7.628844643944349e-06, |
|
"loss": 0.5102, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 1.7960590072143856, |
|
"grad_norm": 0.5077162384986877, |
|
"learning_rate": 7.582727039105255e-06, |
|
"loss": 0.5105, |
|
"step": 4170 |
|
}, |
|
{ |
|
"epoch": 1.8003661031549478, |
|
"grad_norm": 0.47492554783821106, |
|
"learning_rate": 7.536663971893724e-06, |
|
"loss": 0.5008, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 1.8046731990955098, |
|
"grad_norm": 0.5036799907684326, |
|
"learning_rate": 7.4906564815678205e-06, |
|
"loss": 0.5179, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 1.808980295036072, |
|
"grad_norm": 0.5044455528259277, |
|
"learning_rate": 7.444705606131697e-06, |
|
"loss": 0.5171, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 1.813287390976634, |
|
"grad_norm": 0.5645790696144104, |
|
"learning_rate": 7.39881238231218e-06, |
|
"loss": 0.5111, |
|
"step": 4210 |
|
}, |
|
{ |
|
"epoch": 1.817594486917196, |
|
"grad_norm": 0.4966265857219696, |
|
"learning_rate": 7.352977845535387e-06, |
|
"loss": 0.5144, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 1.821901582857758, |
|
"grad_norm": 0.5225628614425659, |
|
"learning_rate": 7.307203029903354e-06, |
|
"loss": 0.5115, |
|
"step": 4230 |
|
}, |
|
{ |
|
"epoch": 1.8262086787983203, |
|
"grad_norm": 0.5282090902328491, |
|
"learning_rate": 7.261488968170713e-06, |
|
"loss": 0.5251, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 1.8305157747388823, |
|
"grad_norm": 0.5346629023551941, |
|
"learning_rate": 7.21583669172139e-06, |
|
"loss": 0.5042, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 1.8348228706794445, |
|
"grad_norm": 0.5141210556030273, |
|
"learning_rate": 7.170247230545335e-06, |
|
"loss": 0.5199, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 1.8391299666200065, |
|
"grad_norm": 0.5251668691635132, |
|
"learning_rate": 7.124721613215275e-06, |
|
"loss": 0.4936, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 1.8434370625605685, |
|
"grad_norm": 0.5125293731689453, |
|
"learning_rate": 7.079260866863523e-06, |
|
"loss": 0.5161, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 1.8477441585011305, |
|
"grad_norm": 0.4881208837032318, |
|
"learning_rate": 7.033866017158797e-06, |
|
"loss": 0.5142, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 1.8520512544416927, |
|
"grad_norm": 0.5215027928352356, |
|
"learning_rate": 6.9885380882830735e-06, |
|
"loss": 0.5097, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 1.8563583503822547, |
|
"grad_norm": 0.4931368827819824, |
|
"learning_rate": 6.943278102908491e-06, |
|
"loss": 0.5123, |
|
"step": 4310 |
|
}, |
|
{ |
|
"epoch": 1.860665446322817, |
|
"grad_norm": 0.5080362558364868, |
|
"learning_rate": 6.898087082174267e-06, |
|
"loss": 0.5093, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 1.864972542263379, |
|
"grad_norm": 0.537807285785675, |
|
"learning_rate": 6.852966045663671e-06, |
|
"loss": 0.5245, |
|
"step": 4330 |
|
}, |
|
{ |
|
"epoch": 1.869279638203941, |
|
"grad_norm": 0.5395597815513611, |
|
"learning_rate": 6.807916011381008e-06, |
|
"loss": 0.5016, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 1.873586734144503, |
|
"grad_norm": 0.48623430728912354, |
|
"learning_rate": 6.762937995728663e-06, |
|
"loss": 0.4962, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 1.8778938300850652, |
|
"grad_norm": 0.5058403611183167, |
|
"learning_rate": 6.718033013484147e-06, |
|
"loss": 0.5401, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 1.8822009260256274, |
|
"grad_norm": 0.5220633149147034, |
|
"learning_rate": 6.673202077777239e-06, |
|
"loss": 0.5112, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 1.8865080219661894, |
|
"grad_norm": 0.5163370966911316, |
|
"learning_rate": 6.6284462000670924e-06, |
|
"loss": 0.5231, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 1.8908151179067514, |
|
"grad_norm": 0.508660614490509, |
|
"learning_rate": 6.583766390119437e-06, |
|
"loss": 0.5304, |
|
"step": 4390 |
|
}, |
|
{ |
|
"epoch": 1.8951222138473134, |
|
"grad_norm": 0.568144679069519, |
|
"learning_rate": 6.539163655983786e-06, |
|
"loss": 0.5086, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 1.8994293097878754, |
|
"grad_norm": 0.5001341700553894, |
|
"learning_rate": 6.494639003970701e-06, |
|
"loss": 0.5084, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 1.9037364057284376, |
|
"grad_norm": 0.5228297710418701, |
|
"learning_rate": 6.450193438629078e-06, |
|
"loss": 0.504, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 1.9080435016689998, |
|
"grad_norm": 0.4816001057624817, |
|
"learning_rate": 6.40582796272349e-06, |
|
"loss": 0.5102, |
|
"step": 4430 |
|
}, |
|
{ |
|
"epoch": 1.9123505976095618, |
|
"grad_norm": 0.5058324933052063, |
|
"learning_rate": 6.361543577211566e-06, |
|
"loss": 0.524, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 1.9166576935501238, |
|
"grad_norm": 0.5428106188774109, |
|
"learning_rate": 6.317341281221392e-06, |
|
"loss": 0.5082, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 1.9209647894906858, |
|
"grad_norm": 0.5131290555000305, |
|
"learning_rate": 6.273222072028991e-06, |
|
"loss": 0.5316, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 1.9252718854312478, |
|
"grad_norm": 0.5238609910011292, |
|
"learning_rate": 6.2291869450358074e-06, |
|
"loss": 0.5021, |
|
"step": 4470 |
|
}, |
|
{ |
|
"epoch": 1.92957898137181, |
|
"grad_norm": 0.4843258261680603, |
|
"learning_rate": 6.1852368937462585e-06, |
|
"loss": 0.5048, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 1.9338860773123723, |
|
"grad_norm": 0.5138316750526428, |
|
"learning_rate": 6.141372909745307e-06, |
|
"loss": 0.5352, |
|
"step": 4490 |
|
}, |
|
{ |
|
"epoch": 1.9381931732529343, |
|
"grad_norm": 0.49319642782211304, |
|
"learning_rate": 6.097595982676103e-06, |
|
"loss": 0.5065, |
|
"step": 4500 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 6963, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 300, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 4861580908953600.0, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|