{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 200, "global_step": 1779, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.016863406408094434, "grad_norm": 5.893406867980957, "learning_rate": 4.0000000000000003e-07, "loss": 2.8131, "step": 10 }, { "epoch": 0.03372681281618887, "grad_norm": 4.519655704498291, "learning_rate": 8.000000000000001e-07, "loss": 2.8225, "step": 20 }, { "epoch": 0.050590219224283306, "grad_norm": 4.535500526428223, "learning_rate": 1.2000000000000002e-06, "loss": 2.8042, "step": 30 }, { "epoch": 0.06745362563237774, "grad_norm": 4.360924243927002, "learning_rate": 1.6000000000000001e-06, "loss": 2.7337, "step": 40 }, { "epoch": 0.08431703204047218, "grad_norm": 5.550849914550781, "learning_rate": 2.0000000000000003e-06, "loss": 2.6976, "step": 50 }, { "epoch": 0.10118043844856661, "grad_norm": 4.623074054718018, "learning_rate": 2.4000000000000003e-06, "loss": 2.6382, "step": 60 }, { "epoch": 0.11804384485666104, "grad_norm": 6.715169906616211, "learning_rate": 2.8000000000000003e-06, "loss": 2.6356, "step": 70 }, { "epoch": 0.13490725126475547, "grad_norm": 5.305912017822266, "learning_rate": 3.2000000000000003e-06, "loss": 2.558, "step": 80 }, { "epoch": 0.15177065767284992, "grad_norm": 5.895500183105469, "learning_rate": 3.6000000000000003e-06, "loss": 2.4923, "step": 90 }, { "epoch": 0.16863406408094436, "grad_norm": 5.270618438720703, "learning_rate": 4.000000000000001e-06, "loss": 2.3955, "step": 100 }, { "epoch": 0.18549747048903878, "grad_norm": 5.943336486816406, "learning_rate": 4.4e-06, "loss": 2.3467, "step": 110 }, { "epoch": 0.20236087689713322, "grad_norm": 6.1944403648376465, "learning_rate": 4.800000000000001e-06, "loss": 2.3161, "step": 120 }, { "epoch": 0.21922428330522767, "grad_norm": 6.080775260925293, "learning_rate": 5.2e-06, "loss": 2.2254, "step": 130 }, { "epoch": 0.23608768971332209, "grad_norm": 6.028081893920898, "learning_rate": 5.600000000000001e-06, "loss": 2.205, "step": 140 }, { "epoch": 0.25295109612141653, "grad_norm": 5.6627631187438965, "learning_rate": 6e-06, "loss": 2.1021, "step": 150 }, { "epoch": 0.26981450252951095, "grad_norm": 7.71439266204834, "learning_rate": 6.4000000000000006e-06, "loss": 2.1383, "step": 160 }, { "epoch": 0.2866779089376054, "grad_norm": 8.687724113464355, "learning_rate": 6.800000000000001e-06, "loss": 1.9821, "step": 170 }, { "epoch": 0.30354131534569984, "grad_norm": 6.372674942016602, "learning_rate": 7.2000000000000005e-06, "loss": 1.9015, "step": 180 }, { "epoch": 0.32040472175379425, "grad_norm": 7.2390217781066895, "learning_rate": 7.600000000000001e-06, "loss": 1.9485, "step": 190 }, { "epoch": 0.3372681281618887, "grad_norm": 7.15785026550293, "learning_rate": 8.000000000000001e-06, "loss": 1.7671, "step": 200 }, { "epoch": 0.3372681281618887, "eval_accuracy": 0.5554272517321016, "eval_accuracy_label_arts, culture, entertainment and media": 0.5833333333333334, "eval_accuracy_label_conflict, war and peace": 0.7553191489361702, "eval_accuracy_label_crime, law and justice": 0.8959537572254336, "eval_accuracy_label_disaster, accident, and emergency incident": 0.32061068702290074, "eval_accuracy_label_economy, business, and finance": 0.6708860759493671, "eval_accuracy_label_environment": 0.125, "eval_accuracy_label_health": 0.7, "eval_accuracy_label_human interest": 0.0, "eval_accuracy_label_labour": 0.5, "eval_accuracy_label_lifestyle and leisure": 0.5, "eval_accuracy_label_politics": 0.28776978417266186, "eval_accuracy_label_religion": 0.0, "eval_accuracy_label_science and technology": 0.0, "eval_accuracy_label_society": 0.03508771929824561, "eval_accuracy_label_sport": 0.9615384615384616, "eval_accuracy_label_weather": 1.0, "eval_f1": 0.5206037462663565, "eval_loss": 1.5660511255264282, "eval_precision": 0.5827940940390139, "eval_recall": 0.5554272517321016, "eval_runtime": 6.8757, "eval_samples_per_second": 125.95, "eval_steps_per_second": 7.999, "step": 200 }, { "epoch": 0.35413153456998314, "grad_norm": 7.729678630828857, "learning_rate": 8.400000000000001e-06, "loss": 1.7632, "step": 210 }, { "epoch": 0.37099494097807756, "grad_norm": 8.155638694763184, "learning_rate": 8.8e-06, "loss": 1.6068, "step": 220 }, { "epoch": 0.38785834738617203, "grad_norm": 7.142002105712891, "learning_rate": 9.200000000000002e-06, "loss": 1.4778, "step": 230 }, { "epoch": 0.40472175379426645, "grad_norm": 7.332587242126465, "learning_rate": 9.600000000000001e-06, "loss": 1.5736, "step": 240 }, { "epoch": 0.42158516020236086, "grad_norm": 13.657541275024414, "learning_rate": 1e-05, "loss": 1.4757, "step": 250 }, { "epoch": 0.43844856661045534, "grad_norm": 14.87531566619873, "learning_rate": 1.04e-05, "loss": 1.4364, "step": 260 }, { "epoch": 0.45531197301854975, "grad_norm": 15.269392967224121, "learning_rate": 1.0800000000000002e-05, "loss": 1.3215, "step": 270 }, { "epoch": 0.47217537942664417, "grad_norm": 10.192849159240723, "learning_rate": 1.1200000000000001e-05, "loss": 1.3129, "step": 280 }, { "epoch": 0.48903878583473864, "grad_norm": 8.036419868469238, "learning_rate": 1.16e-05, "loss": 1.2399, "step": 290 }, { "epoch": 0.5059021922428331, "grad_norm": 10.625975608825684, "learning_rate": 1.2e-05, "loss": 1.3016, "step": 300 }, { "epoch": 0.5227655986509275, "grad_norm": 14.144831657409668, "learning_rate": 1.2400000000000002e-05, "loss": 1.2514, "step": 310 }, { "epoch": 0.5396290050590219, "grad_norm": 11.439717292785645, "learning_rate": 1.2800000000000001e-05, "loss": 1.1768, "step": 320 }, { "epoch": 0.5564924114671164, "grad_norm": 18.485063552856445, "learning_rate": 1.3200000000000002e-05, "loss": 1.1273, "step": 330 }, { "epoch": 0.5733558178752108, "grad_norm": 12.227529525756836, "learning_rate": 1.3600000000000002e-05, "loss": 1.0002, "step": 340 }, { "epoch": 0.5902192242833052, "grad_norm": 10.209256172180176, "learning_rate": 1.4e-05, "loss": 1.1188, "step": 350 }, { "epoch": 0.6070826306913997, "grad_norm": 8.667159080505371, "learning_rate": 1.4400000000000001e-05, "loss": 1.0151, "step": 360 }, { "epoch": 0.6239460370994941, "grad_norm": 8.289810180664062, "learning_rate": 1.48e-05, "loss": 1.0148, "step": 370 }, { "epoch": 0.6408094435075885, "grad_norm": 8.391195297241211, "learning_rate": 1.5200000000000002e-05, "loss": 0.9461, "step": 380 }, { "epoch": 0.657672849915683, "grad_norm": 10.535934448242188, "learning_rate": 1.5600000000000003e-05, "loss": 0.9695, "step": 390 }, { "epoch": 0.6745362563237775, "grad_norm": 10.750452041625977, "learning_rate": 1.6000000000000003e-05, "loss": 1.0248, "step": 400 }, { "epoch": 0.6745362563237775, "eval_accuracy": 0.6709006928406467, "eval_accuracy_label_arts, culture, entertainment and media": 0.9166666666666666, "eval_accuracy_label_conflict, war and peace": 0.7978723404255319, "eval_accuracy_label_crime, law and justice": 0.815028901734104, "eval_accuracy_label_disaster, accident, and emergency incident": 0.8625954198473282, "eval_accuracy_label_economy, business, and finance": 0.7215189873417721, "eval_accuracy_label_environment": 0.375, "eval_accuracy_label_health": 0.9, "eval_accuracy_label_human interest": 0.25, "eval_accuracy_label_labour": 1.0, "eval_accuracy_label_lifestyle and leisure": 0.5, "eval_accuracy_label_politics": 0.30935251798561153, "eval_accuracy_label_religion": 0.0, "eval_accuracy_label_science and technology": 0.4166666666666667, "eval_accuracy_label_society": 0.19298245614035087, "eval_accuracy_label_sport": 0.9615384615384616, "eval_accuracy_label_weather": 1.0, "eval_f1": 0.6591476133220594, "eval_loss": 1.0774492025375366, "eval_precision": 0.6983965116011075, "eval_recall": 0.6709006928406467, "eval_runtime": 6.9138, "eval_samples_per_second": 125.256, "eval_steps_per_second": 7.955, "step": 400 }, { "epoch": 0.6913996627318718, "grad_norm": 11.188483238220215, "learning_rate": 1.64e-05, "loss": 0.8975, "step": 410 }, { "epoch": 0.7082630691399663, "grad_norm": 8.035601615905762, "learning_rate": 1.6800000000000002e-05, "loss": 0.9736, "step": 420 }, { "epoch": 0.7251264755480608, "grad_norm": 10.406453132629395, "learning_rate": 1.72e-05, "loss": 0.8802, "step": 430 }, { "epoch": 0.7419898819561551, "grad_norm": 14.611414909362793, "learning_rate": 1.76e-05, "loss": 1.0106, "step": 440 }, { "epoch": 0.7588532883642496, "grad_norm": 8.613202095031738, "learning_rate": 1.8e-05, "loss": 0.8411, "step": 450 }, { "epoch": 0.7757166947723441, "grad_norm": 15.136896133422852, "learning_rate": 1.8400000000000003e-05, "loss": 0.736, "step": 460 }, { "epoch": 0.7925801011804384, "grad_norm": 18.369375228881836, "learning_rate": 1.88e-05, "loss": 0.8807, "step": 470 }, { "epoch": 0.8094435075885329, "grad_norm": 5.840969562530518, "learning_rate": 1.9200000000000003e-05, "loss": 0.8024, "step": 480 }, { "epoch": 0.8263069139966274, "grad_norm": 14.514450073242188, "learning_rate": 1.9600000000000002e-05, "loss": 0.8334, "step": 490 }, { "epoch": 0.8431703204047217, "grad_norm": 20.5344295501709, "learning_rate": 2e-05, "loss": 0.917, "step": 500 }, { "epoch": 0.8600337268128162, "grad_norm": 8.904403686523438, "learning_rate": 1.9843627834245506e-05, "loss": 0.7659, "step": 510 }, { "epoch": 0.8768971332209107, "grad_norm": 8.624781608581543, "learning_rate": 1.968725566849101e-05, "loss": 0.8243, "step": 520 }, { "epoch": 0.893760539629005, "grad_norm": 14.050222396850586, "learning_rate": 1.9530883502736514e-05, "loss": 0.8085, "step": 530 }, { "epoch": 0.9106239460370995, "grad_norm": 8.78807258605957, "learning_rate": 1.9374511336982018e-05, "loss": 0.6006, "step": 540 }, { "epoch": 0.927487352445194, "grad_norm": 12.83305549621582, "learning_rate": 1.9218139171227522e-05, "loss": 0.8491, "step": 550 }, { "epoch": 0.9443507588532883, "grad_norm": 10.532402038574219, "learning_rate": 1.9061767005473026e-05, "loss": 0.7823, "step": 560 }, { "epoch": 0.9612141652613828, "grad_norm": 21.147523880004883, "learning_rate": 1.890539483971853e-05, "loss": 0.8382, "step": 570 }, { "epoch": 0.9780775716694773, "grad_norm": 8.727555274963379, "learning_rate": 1.8749022673964038e-05, "loss": 0.7173, "step": 580 }, { "epoch": 0.9949409780775716, "grad_norm": 9.259013175964355, "learning_rate": 1.859265050820954e-05, "loss": 0.7727, "step": 590 }, { "epoch": 1.0118043844856661, "grad_norm": 6.446993350982666, "learning_rate": 1.8436278342455046e-05, "loss": 0.5845, "step": 600 }, { "epoch": 1.0118043844856661, "eval_accuracy": 0.6535796766743649, "eval_accuracy_label_arts, culture, entertainment and media": 0.9166666666666666, "eval_accuracy_label_conflict, war and peace": 0.7287234042553191, "eval_accuracy_label_crime, law and justice": 0.6763005780346821, "eval_accuracy_label_disaster, accident, and emergency incident": 0.8778625954198473, "eval_accuracy_label_economy, business, and finance": 0.7215189873417721, "eval_accuracy_label_environment": 0.4375, "eval_accuracy_label_health": 0.8, "eval_accuracy_label_human interest": 0.0, "eval_accuracy_label_labour": 1.0, "eval_accuracy_label_lifestyle and leisure": 0.75, "eval_accuracy_label_politics": 0.3669064748201439, "eval_accuracy_label_religion": 0.0, "eval_accuracy_label_science and technology": 0.4166666666666667, "eval_accuracy_label_society": 0.43859649122807015, "eval_accuracy_label_sport": 0.9230769230769231, "eval_accuracy_label_weather": 1.0, "eval_f1": 0.6563434776934847, "eval_loss": 0.990666389465332, "eval_precision": 0.682947679005421, "eval_recall": 0.6535796766743649, "eval_runtime": 6.9215, "eval_samples_per_second": 125.118, "eval_steps_per_second": 7.946, "step": 600 }, { "epoch": 1.0286677908937605, "grad_norm": 10.464855194091797, "learning_rate": 1.8279906176700547e-05, "loss": 0.565, "step": 610 }, { "epoch": 1.045531197301855, "grad_norm": 10.188838958740234, "learning_rate": 1.8123534010946054e-05, "loss": 0.5325, "step": 620 }, { "epoch": 1.0623946037099494, "grad_norm": 11.83839225769043, "learning_rate": 1.7967161845191555e-05, "loss": 0.603, "step": 630 }, { "epoch": 1.0792580101180438, "grad_norm": 12.935112953186035, "learning_rate": 1.7810789679437062e-05, "loss": 0.5319, "step": 640 }, { "epoch": 1.0961214165261384, "grad_norm": 8.650348663330078, "learning_rate": 1.7654417513682566e-05, "loss": 0.4803, "step": 650 }, { "epoch": 1.1129848229342327, "grad_norm": 12.213525772094727, "learning_rate": 1.749804534792807e-05, "loss": 0.6975, "step": 660 }, { "epoch": 1.129848229342327, "grad_norm": 12.859235763549805, "learning_rate": 1.7341673182173575e-05, "loss": 0.6273, "step": 670 }, { "epoch": 1.1467116357504217, "grad_norm": 8.133111953735352, "learning_rate": 1.718530101641908e-05, "loss": 0.5179, "step": 680 }, { "epoch": 1.163575042158516, "grad_norm": 8.366670608520508, "learning_rate": 1.7028928850664583e-05, "loss": 0.5622, "step": 690 }, { "epoch": 1.1804384485666104, "grad_norm": 12.844072341918945, "learning_rate": 1.6872556684910087e-05, "loss": 0.6347, "step": 700 }, { "epoch": 1.197301854974705, "grad_norm": 3.2994637489318848, "learning_rate": 1.671618451915559e-05, "loss": 0.4805, "step": 710 }, { "epoch": 1.2141652613827993, "grad_norm": 6.4653167724609375, "learning_rate": 1.6559812353401095e-05, "loss": 0.5172, "step": 720 }, { "epoch": 1.2310286677908937, "grad_norm": 6.744974613189697, "learning_rate": 1.64034401876466e-05, "loss": 0.5601, "step": 730 }, { "epoch": 1.2478920741989883, "grad_norm": 9.451184272766113, "learning_rate": 1.6247068021892107e-05, "loss": 0.5289, "step": 740 }, { "epoch": 1.2647554806070826, "grad_norm": 5.093151092529297, "learning_rate": 1.6090695856137607e-05, "loss": 0.5674, "step": 750 }, { "epoch": 1.281618887015177, "grad_norm": 16.99575424194336, "learning_rate": 1.5934323690383115e-05, "loss": 0.4681, "step": 760 }, { "epoch": 1.2984822934232714, "grad_norm": 17.180509567260742, "learning_rate": 1.5777951524628616e-05, "loss": 0.5974, "step": 770 }, { "epoch": 1.315345699831366, "grad_norm": 11.804935455322266, "learning_rate": 1.5621579358874123e-05, "loss": 0.5693, "step": 780 }, { "epoch": 1.3322091062394603, "grad_norm": 12.154623985290527, "learning_rate": 1.5465207193119627e-05, "loss": 0.6001, "step": 790 }, { "epoch": 1.3490725126475547, "grad_norm": 11.09798526763916, "learning_rate": 1.530883502736513e-05, "loss": 0.6104, "step": 800 }, { "epoch": 1.3490725126475547, "eval_accuracy": 0.7240184757505773, "eval_accuracy_label_arts, culture, entertainment and media": 0.8333333333333334, "eval_accuracy_label_conflict, war and peace": 0.7021276595744681, "eval_accuracy_label_crime, law and justice": 0.8323699421965318, "eval_accuracy_label_disaster, accident, and emergency incident": 0.8778625954198473, "eval_accuracy_label_economy, business, and finance": 0.7848101265822784, "eval_accuracy_label_environment": 0.5, "eval_accuracy_label_health": 0.7, "eval_accuracy_label_human interest": 0.25, "eval_accuracy_label_labour": 1.0, "eval_accuracy_label_lifestyle and leisure": 0.75, "eval_accuracy_label_politics": 0.6330935251798561, "eval_accuracy_label_religion": 0.0, "eval_accuracy_label_science and technology": 0.25, "eval_accuracy_label_society": 0.3684210526315789, "eval_accuracy_label_sport": 0.9615384615384616, "eval_accuracy_label_weather": 1.0, "eval_f1": 0.7232835318516546, "eval_loss": 0.8674274682998657, "eval_precision": 0.7332860565477397, "eval_recall": 0.7240184757505773, "eval_runtime": 6.8885, "eval_samples_per_second": 125.717, "eval_steps_per_second": 7.984, "step": 800 }, { "epoch": 1.3659359190556493, "grad_norm": 12.72695255279541, "learning_rate": 1.5152462861610635e-05, "loss": 0.5333, "step": 810 }, { "epoch": 1.3827993254637436, "grad_norm": 9.709075927734375, "learning_rate": 1.4996090695856138e-05, "loss": 0.488, "step": 820 }, { "epoch": 1.399662731871838, "grad_norm": 11.909979820251465, "learning_rate": 1.4839718530101644e-05, "loss": 0.6037, "step": 830 }, { "epoch": 1.4165261382799326, "grad_norm": 16.916475296020508, "learning_rate": 1.4683346364347146e-05, "loss": 0.5647, "step": 840 }, { "epoch": 1.433389544688027, "grad_norm": 13.278093338012695, "learning_rate": 1.4526974198592652e-05, "loss": 0.5078, "step": 850 }, { "epoch": 1.4502529510961213, "grad_norm": 7.877659320831299, "learning_rate": 1.4370602032838158e-05, "loss": 0.5108, "step": 860 }, { "epoch": 1.4671163575042159, "grad_norm": 7.198855400085449, "learning_rate": 1.421422986708366e-05, "loss": 0.537, "step": 870 }, { "epoch": 1.4839797639123102, "grad_norm": 7.947418212890625, "learning_rate": 1.4057857701329166e-05, "loss": 0.4431, "step": 880 }, { "epoch": 1.5008431703204046, "grad_norm": 8.317455291748047, "learning_rate": 1.3901485535574668e-05, "loss": 0.4532, "step": 890 }, { "epoch": 1.5177065767284992, "grad_norm": 8.073487281799316, "learning_rate": 1.3745113369820174e-05, "loss": 0.4624, "step": 900 }, { "epoch": 1.5345699831365935, "grad_norm": 9.39900016784668, "learning_rate": 1.3588741204065676e-05, "loss": 0.3739, "step": 910 }, { "epoch": 1.551433389544688, "grad_norm": 7.912290096282959, "learning_rate": 1.3432369038311182e-05, "loss": 0.497, "step": 920 }, { "epoch": 1.5682967959527825, "grad_norm": 12.181069374084473, "learning_rate": 1.3275996872556686e-05, "loss": 0.4859, "step": 930 }, { "epoch": 1.5851602023608768, "grad_norm": 9.931648254394531, "learning_rate": 1.311962470680219e-05, "loss": 0.4945, "step": 940 }, { "epoch": 1.6020236087689712, "grad_norm": 7.948305606842041, "learning_rate": 1.2963252541047694e-05, "loss": 0.3742, "step": 950 }, { "epoch": 1.6188870151770658, "grad_norm": 9.558001518249512, "learning_rate": 1.2806880375293199e-05, "loss": 0.4684, "step": 960 }, { "epoch": 1.6357504215851602, "grad_norm": 8.381938934326172, "learning_rate": 1.2650508209538703e-05, "loss": 0.4564, "step": 970 }, { "epoch": 1.6526138279932545, "grad_norm": 7.728443622589111, "learning_rate": 1.2494136043784208e-05, "loss": 0.4493, "step": 980 }, { "epoch": 1.669477234401349, "grad_norm": 14.073138236999512, "learning_rate": 1.233776387802971e-05, "loss": 0.4085, "step": 990 }, { "epoch": 1.6863406408094435, "grad_norm": 6.431807518005371, "learning_rate": 1.2181391712275217e-05, "loss": 0.4223, "step": 1000 }, { "epoch": 1.6863406408094435, "eval_accuracy": 0.7240184757505773, "eval_accuracy_label_arts, culture, entertainment and media": 0.75, "eval_accuracy_label_conflict, war and peace": 0.675531914893617, "eval_accuracy_label_crime, law and justice": 0.884393063583815, "eval_accuracy_label_disaster, accident, and emergency incident": 0.8549618320610687, "eval_accuracy_label_economy, business, and finance": 0.7341772151898734, "eval_accuracy_label_environment": 0.5, "eval_accuracy_label_health": 0.9, "eval_accuracy_label_human interest": 0.3333333333333333, "eval_accuracy_label_labour": 1.0, "eval_accuracy_label_lifestyle and leisure": 0.625, "eval_accuracy_label_politics": 0.6474820143884892, "eval_accuracy_label_religion": 0.0, "eval_accuracy_label_science and technology": 0.3333333333333333, "eval_accuracy_label_society": 0.3684210526315789, "eval_accuracy_label_sport": 0.9615384615384616, "eval_accuracy_label_weather": 0.0, "eval_f1": 0.7249701679054074, "eval_loss": 0.8601514101028442, "eval_precision": 0.7387214985313825, "eval_recall": 0.7240184757505773, "eval_runtime": 6.9046, "eval_samples_per_second": 125.423, "eval_steps_per_second": 7.966, "step": 1000 }, { "epoch": 1.7032040472175378, "grad_norm": 5.857812404632568, "learning_rate": 1.2025019546520719e-05, "loss": 0.4261, "step": 1010 }, { "epoch": 1.7200674536256324, "grad_norm": 6.620238304138184, "learning_rate": 1.1868647380766225e-05, "loss": 0.4071, "step": 1020 }, { "epoch": 1.7369308600337268, "grad_norm": 13.199769973754883, "learning_rate": 1.1712275215011727e-05, "loss": 0.3632, "step": 1030 }, { "epoch": 1.7537942664418211, "grad_norm": 8.069077491760254, "learning_rate": 1.1555903049257233e-05, "loss": 0.3977, "step": 1040 }, { "epoch": 1.7706576728499157, "grad_norm": 12.82445240020752, "learning_rate": 1.1399530883502739e-05, "loss": 0.4245, "step": 1050 }, { "epoch": 1.78752107925801, "grad_norm": 15.626612663269043, "learning_rate": 1.1243158717748241e-05, "loss": 0.3924, "step": 1060 }, { "epoch": 1.8043844856661044, "grad_norm": 14.491767883300781, "learning_rate": 1.1086786551993747e-05, "loss": 0.427, "step": 1070 }, { "epoch": 1.821247892074199, "grad_norm": 5.425060749053955, "learning_rate": 1.093041438623925e-05, "loss": 0.4013, "step": 1080 }, { "epoch": 1.8381112984822934, "grad_norm": 10.305951118469238, "learning_rate": 1.0774042220484755e-05, "loss": 0.4036, "step": 1090 }, { "epoch": 1.8549747048903877, "grad_norm": 10.018141746520996, "learning_rate": 1.0617670054730258e-05, "loss": 0.4123, "step": 1100 }, { "epoch": 1.8718381112984823, "grad_norm": 13.072948455810547, "learning_rate": 1.0461297888975763e-05, "loss": 0.4586, "step": 1110 }, { "epoch": 1.8887015177065767, "grad_norm": 7.460719585418701, "learning_rate": 1.030492572322127e-05, "loss": 0.3912, "step": 1120 }, { "epoch": 1.905564924114671, "grad_norm": 5.454730033874512, "learning_rate": 1.0148553557466772e-05, "loss": 0.452, "step": 1130 }, { "epoch": 1.9224283305227656, "grad_norm": 8.758441925048828, "learning_rate": 9.992181391712276e-06, "loss": 0.3622, "step": 1140 }, { "epoch": 1.93929173693086, "grad_norm": 8.622238159179688, "learning_rate": 9.835809225957781e-06, "loss": 0.439, "step": 1150 }, { "epoch": 1.9561551433389543, "grad_norm": 6.299844741821289, "learning_rate": 9.679437060203286e-06, "loss": 0.4006, "step": 1160 }, { "epoch": 1.973018549747049, "grad_norm": 14.24021053314209, "learning_rate": 9.52306489444879e-06, "loss": 0.3616, "step": 1170 }, { "epoch": 1.9898819561551433, "grad_norm": 20.004261016845703, "learning_rate": 9.366692728694294e-06, "loss": 0.4253, "step": 1180 }, { "epoch": 2.0067453625632377, "grad_norm": 12.190637588500977, "learning_rate": 9.210320562939798e-06, "loss": 0.3859, "step": 1190 }, { "epoch": 2.0236087689713322, "grad_norm": 3.0981788635253906, "learning_rate": 9.053948397185302e-06, "loss": 0.3104, "step": 1200 }, { "epoch": 2.0236087689713322, "eval_accuracy": 0.7263279445727483, "eval_accuracy_label_arts, culture, entertainment and media": 0.8333333333333334, "eval_accuracy_label_conflict, war and peace": 0.7180851063829787, "eval_accuracy_label_crime, law and justice": 0.8323699421965318, "eval_accuracy_label_disaster, accident, and emergency incident": 0.9083969465648855, "eval_accuracy_label_economy, business, and finance": 0.7721518987341772, "eval_accuracy_label_environment": 0.4375, "eval_accuracy_label_health": 0.7, "eval_accuracy_label_human interest": 0.25, "eval_accuracy_label_labour": 0.5, "eval_accuracy_label_lifestyle and leisure": 0.75, "eval_accuracy_label_politics": 0.5611510791366906, "eval_accuracy_label_religion": 0.0, "eval_accuracy_label_science and technology": 0.4166666666666667, "eval_accuracy_label_society": 0.47368421052631576, "eval_accuracy_label_sport": 0.9615384615384616, "eval_accuracy_label_weather": 1.0, "eval_f1": 0.7266120737327485, "eval_loss": 0.856505811214447, "eval_precision": 0.7326492002435036, "eval_recall": 0.7263279445727483, "eval_runtime": 6.9128, "eval_samples_per_second": 125.275, "eval_steps_per_second": 7.956, "step": 1200 }, { "epoch": 2.040472175379427, "grad_norm": 5.126299858093262, "learning_rate": 8.897576231430806e-06, "loss": 0.3066, "step": 1210 }, { "epoch": 2.057335581787521, "grad_norm": 5.153671741485596, "learning_rate": 8.74120406567631e-06, "loss": 0.3275, "step": 1220 }, { "epoch": 2.0741989881956155, "grad_norm": 6.91109561920166, "learning_rate": 8.584831899921814e-06, "loss": 0.2581, "step": 1230 }, { "epoch": 2.09106239460371, "grad_norm": 9.57040023803711, "learning_rate": 8.428459734167318e-06, "loss": 0.2809, "step": 1240 }, { "epoch": 2.1079258010118043, "grad_norm": 3.024446964263916, "learning_rate": 8.272087568412822e-06, "loss": 0.2288, "step": 1250 }, { "epoch": 2.124789207419899, "grad_norm": 6.808653831481934, "learning_rate": 8.115715402658327e-06, "loss": 0.2406, "step": 1260 }, { "epoch": 2.1416526138279934, "grad_norm": 15.430974960327148, "learning_rate": 7.959343236903832e-06, "loss": 0.3031, "step": 1270 }, { "epoch": 2.1585160202360876, "grad_norm": 6.104337215423584, "learning_rate": 7.802971071149336e-06, "loss": 0.2575, "step": 1280 }, { "epoch": 2.175379426644182, "grad_norm": 3.1404926776885986, "learning_rate": 7.64659890539484e-06, "loss": 0.264, "step": 1290 }, { "epoch": 2.1922428330522767, "grad_norm": 11.821499824523926, "learning_rate": 7.490226739640345e-06, "loss": 0.2602, "step": 1300 }, { "epoch": 2.209106239460371, "grad_norm": 4.936356544494629, "learning_rate": 7.333854573885849e-06, "loss": 0.3428, "step": 1310 }, { "epoch": 2.2259696458684655, "grad_norm": 12.609466552734375, "learning_rate": 7.177482408131353e-06, "loss": 0.2467, "step": 1320 }, { "epoch": 2.24283305227656, "grad_norm": 10.444657325744629, "learning_rate": 7.021110242376858e-06, "loss": 0.3149, "step": 1330 }, { "epoch": 2.259696458684654, "grad_norm": 9.078226089477539, "learning_rate": 6.864738076622362e-06, "loss": 0.3207, "step": 1340 }, { "epoch": 2.2765598650927488, "grad_norm": 7.427735328674316, "learning_rate": 6.708365910867866e-06, "loss": 0.2458, "step": 1350 }, { "epoch": 2.2934232715008434, "grad_norm": 6.552321910858154, "learning_rate": 6.551993745113371e-06, "loss": 0.273, "step": 1360 }, { "epoch": 2.3102866779089375, "grad_norm": 5.591136932373047, "learning_rate": 6.395621579358875e-06, "loss": 0.2345, "step": 1370 }, { "epoch": 2.327150084317032, "grad_norm": 9.349672317504883, "learning_rate": 6.239249413604379e-06, "loss": 0.3254, "step": 1380 }, { "epoch": 2.3440134907251267, "grad_norm": 7.311215400695801, "learning_rate": 6.082877247849883e-06, "loss": 0.2511, "step": 1390 }, { "epoch": 2.360876897133221, "grad_norm": 9.266729354858398, "learning_rate": 5.926505082095387e-06, "loss": 0.2855, "step": 1400 }, { "epoch": 2.360876897133221, "eval_accuracy": 0.7240184757505773, "eval_accuracy_label_arts, culture, entertainment and media": 0.75, "eval_accuracy_label_conflict, war and peace": 0.7393617021276596, "eval_accuracy_label_crime, law and justice": 0.8323699421965318, "eval_accuracy_label_disaster, accident, and emergency incident": 0.8549618320610687, "eval_accuracy_label_economy, business, and finance": 0.7974683544303798, "eval_accuracy_label_environment": 0.5, "eval_accuracy_label_health": 0.7, "eval_accuracy_label_human interest": 0.3333333333333333, "eval_accuracy_label_labour": 0.5, "eval_accuracy_label_lifestyle and leisure": 0.625, "eval_accuracy_label_politics": 0.5899280575539568, "eval_accuracy_label_religion": 0.0, "eval_accuracy_label_science and technology": 0.4166666666666667, "eval_accuracy_label_society": 0.38596491228070173, "eval_accuracy_label_sport": 0.9615384615384616, "eval_accuracy_label_weather": 1.0, "eval_f1": 0.7283152161972767, "eval_loss": 0.8981179594993591, "eval_precision": 0.7401859274408267, "eval_recall": 0.7240184757505773, "eval_runtime": 6.8981, "eval_samples_per_second": 125.541, "eval_steps_per_second": 7.973, "step": 1400 }, { "epoch": 2.3777403035413154, "grad_norm": 12.713851928710938, "learning_rate": 5.770132916340891e-06, "loss": 0.2885, "step": 1410 }, { "epoch": 2.39460370994941, "grad_norm": 5.382158279418945, "learning_rate": 5.6137607505863955e-06, "loss": 0.3191, "step": 1420 }, { "epoch": 2.411467116357504, "grad_norm": 4.609128475189209, "learning_rate": 5.4573885848319e-06, "loss": 0.273, "step": 1430 }, { "epoch": 2.4283305227655987, "grad_norm": 5.821180820465088, "learning_rate": 5.301016419077405e-06, "loss": 0.2397, "step": 1440 }, { "epoch": 2.4451939291736933, "grad_norm": 3.8270256519317627, "learning_rate": 5.1446442533229095e-06, "loss": 0.3575, "step": 1450 }, { "epoch": 2.4620573355817874, "grad_norm": 4.910329341888428, "learning_rate": 4.9882720875684136e-06, "loss": 0.2622, "step": 1460 }, { "epoch": 2.478920741989882, "grad_norm": 6.537694931030273, "learning_rate": 4.831899921813918e-06, "loss": 0.1981, "step": 1470 }, { "epoch": 2.4957841483979766, "grad_norm": 9.600226402282715, "learning_rate": 4.675527756059422e-06, "loss": 0.1977, "step": 1480 }, { "epoch": 2.5126475548060707, "grad_norm": 5.246051788330078, "learning_rate": 4.519155590304926e-06, "loss": 0.2452, "step": 1490 }, { "epoch": 2.5295109612141653, "grad_norm": 5.955575942993164, "learning_rate": 4.36278342455043e-06, "loss": 0.2239, "step": 1500 }, { "epoch": 2.54637436762226, "grad_norm": 9.812554359436035, "learning_rate": 4.206411258795935e-06, "loss": 0.2773, "step": 1510 }, { "epoch": 2.563237774030354, "grad_norm": 11.0867280960083, "learning_rate": 4.050039093041439e-06, "loss": 0.1866, "step": 1520 }, { "epoch": 2.5801011804384486, "grad_norm": 6.4792609214782715, "learning_rate": 3.893666927286943e-06, "loss": 0.2556, "step": 1530 }, { "epoch": 2.5969645868465427, "grad_norm": 9.363513946533203, "learning_rate": 3.737294761532447e-06, "loss": 0.2593, "step": 1540 }, { "epoch": 2.6138279932546373, "grad_norm": 11.11075496673584, "learning_rate": 3.580922595777952e-06, "loss": 0.3482, "step": 1550 }, { "epoch": 2.630691399662732, "grad_norm": 2.9576575756073, "learning_rate": 3.4245504300234562e-06, "loss": 0.2412, "step": 1560 }, { "epoch": 2.6475548060708265, "grad_norm": 9.084090232849121, "learning_rate": 3.2681782642689603e-06, "loss": 0.3092, "step": 1570 }, { "epoch": 2.6644182124789206, "grad_norm": 7.185946941375732, "learning_rate": 3.111806098514465e-06, "loss": 0.2406, "step": 1580 }, { "epoch": 2.681281618887015, "grad_norm": 5.467381000518799, "learning_rate": 2.955433932759969e-06, "loss": 0.2709, "step": 1590 }, { "epoch": 2.6981450252951094, "grad_norm": 6.529063701629639, "learning_rate": 2.799061767005473e-06, "loss": 0.217, "step": 1600 }, { "epoch": 2.6981450252951094, "eval_accuracy": 0.73094688221709, "eval_accuracy_label_arts, culture, entertainment and media": 0.75, "eval_accuracy_label_conflict, war and peace": 0.7446808510638298, "eval_accuracy_label_crime, law and justice": 0.838150289017341, "eval_accuracy_label_disaster, accident, and emergency incident": 0.8931297709923665, "eval_accuracy_label_economy, business, and finance": 0.8481012658227848, "eval_accuracy_label_environment": 0.375, "eval_accuracy_label_health": 0.8, "eval_accuracy_label_human interest": 0.3333333333333333, "eval_accuracy_label_labour": 0.5, "eval_accuracy_label_lifestyle and leisure": 0.5, "eval_accuracy_label_politics": 0.539568345323741, "eval_accuracy_label_religion": 0.0, "eval_accuracy_label_science and technology": 0.4166666666666667, "eval_accuracy_label_society": 0.45614035087719296, "eval_accuracy_label_sport": 0.9615384615384616, "eval_accuracy_label_weather": 1.0, "eval_f1": 0.7291730687142569, "eval_loss": 0.8666742444038391, "eval_precision": 0.7358354972874402, "eval_recall": 0.73094688221709, "eval_runtime": 6.9197, "eval_samples_per_second": 125.151, "eval_steps_per_second": 7.948, "step": 1600 }, { "epoch": 2.715008431703204, "grad_norm": 5.216823101043701, "learning_rate": 2.642689601250977e-06, "loss": 0.2422, "step": 1610 }, { "epoch": 2.7318718381112985, "grad_norm": 14.297146797180176, "learning_rate": 2.486317435496482e-06, "loss": 0.2569, "step": 1620 }, { "epoch": 2.748735244519393, "grad_norm": 3.5069735050201416, "learning_rate": 2.329945269741986e-06, "loss": 0.284, "step": 1630 }, { "epoch": 2.7655986509274872, "grad_norm": 10.029553413391113, "learning_rate": 2.1735731039874907e-06, "loss": 0.305, "step": 1640 }, { "epoch": 2.782462057335582, "grad_norm": 13.829554557800293, "learning_rate": 2.0172009382329948e-06, "loss": 0.3018, "step": 1650 }, { "epoch": 2.799325463743676, "grad_norm": 7.969460487365723, "learning_rate": 1.860828772478499e-06, "loss": 0.2687, "step": 1660 }, { "epoch": 2.8161888701517706, "grad_norm": 4.048108100891113, "learning_rate": 1.7044566067240032e-06, "loss": 0.2845, "step": 1670 }, { "epoch": 2.833052276559865, "grad_norm": 3.3585705757141113, "learning_rate": 1.5480844409695075e-06, "loss": 0.2792, "step": 1680 }, { "epoch": 2.8499156829679597, "grad_norm": 3.1764752864837646, "learning_rate": 1.391712275215012e-06, "loss": 0.2244, "step": 1690 }, { "epoch": 2.866779089376054, "grad_norm": 9.154349327087402, "learning_rate": 1.2353401094605161e-06, "loss": 0.2469, "step": 1700 }, { "epoch": 2.8836424957841484, "grad_norm": 8.833173751831055, "learning_rate": 1.0789679437060204e-06, "loss": 0.2882, "step": 1710 }, { "epoch": 2.9005059021922426, "grad_norm": 9.59762954711914, "learning_rate": 9.225957779515247e-07, "loss": 0.2442, "step": 1720 }, { "epoch": 2.917369308600337, "grad_norm": 10.648268699645996, "learning_rate": 7.66223612197029e-07, "loss": 0.2421, "step": 1730 }, { "epoch": 2.9342327150084317, "grad_norm": 3.897761821746826, "learning_rate": 6.098514464425332e-07, "loss": 0.2391, "step": 1740 }, { "epoch": 2.9510961214165263, "grad_norm": 5.289120674133301, "learning_rate": 4.5347928068803755e-07, "loss": 0.2567, "step": 1750 }, { "epoch": 2.9679595278246205, "grad_norm": 6.413928985595703, "learning_rate": 2.9710711493354186e-07, "loss": 0.2597, "step": 1760 }, { "epoch": 2.984822934232715, "grad_norm": 3.9349653720855713, "learning_rate": 1.4073494917904614e-07, "loss": 0.2551, "step": 1770 }, { "epoch": 3.0, "step": 1779, "total_flos": 7485003163459584.0, "train_loss": 0.7494140876820411, "train_runtime": 1316.4618, "train_samples_per_second": 43.214, "train_steps_per_second": 1.351 }, { "epoch": 3.0, "eval_accuracy": 0.7286374133949192, "eval_accuracy_label_arts, culture, entertainment and media": 0.8333333333333334, "eval_accuracy_label_conflict, war and peace": 0.723404255319149, "eval_accuracy_label_crime, law and justice": 0.791907514450867, "eval_accuracy_label_disaster, accident, and emergency incident": 0.8931297709923665, "eval_accuracy_label_economy, business, and finance": 0.7974683544303798, "eval_accuracy_label_environment": 0.4375, "eval_accuracy_label_health": 0.7, "eval_accuracy_label_human interest": 0.3333333333333333, "eval_accuracy_label_labour": 0.5, "eval_accuracy_label_lifestyle and leisure": 0.5, "eval_accuracy_label_politics": 0.6330935251798561, "eval_accuracy_label_religion": 0.0, "eval_accuracy_label_science and technology": 0.4166666666666667, "eval_accuracy_label_society": 0.45614035087719296, "eval_accuracy_label_sport": 0.9615384615384616, "eval_accuracy_label_weather": 1.0, "eval_f1": 0.730012835488787, "eval_loss": 0.8615460395812988, "eval_precision": 0.7350865087902028, "eval_recall": 0.7286374133949192, "eval_runtime": 6.8008, "eval_samples_per_second": 127.338, "eval_steps_per_second": 8.087, "step": 1779 } ], "logging_steps": 10, "max_steps": 1779, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 1000, "total_flos": 7485003163459584.0, "train_batch_size": 16, "trial_name": null, "trial_params": null }