classify-news-category-iptc / trainer_state.json
ilsilfverskiold's picture
ilsilfverskiold/iptc-newscodes-multilingual-text-classification
f1dd9a3 verified
raw
history blame
43.2 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.0,
"eval_steps": 200,
"global_step": 1779,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.016863406408094434,
"grad_norm": 5.893406867980957,
"learning_rate": 4.0000000000000003e-07,
"loss": 2.8131,
"step": 10
},
{
"epoch": 0.03372681281618887,
"grad_norm": 4.519655704498291,
"learning_rate": 8.000000000000001e-07,
"loss": 2.8225,
"step": 20
},
{
"epoch": 0.050590219224283306,
"grad_norm": 4.535500526428223,
"learning_rate": 1.2000000000000002e-06,
"loss": 2.8042,
"step": 30
},
{
"epoch": 0.06745362563237774,
"grad_norm": 4.360924243927002,
"learning_rate": 1.6000000000000001e-06,
"loss": 2.7337,
"step": 40
},
{
"epoch": 0.08431703204047218,
"grad_norm": 5.550849914550781,
"learning_rate": 2.0000000000000003e-06,
"loss": 2.6976,
"step": 50
},
{
"epoch": 0.10118043844856661,
"grad_norm": 4.623074054718018,
"learning_rate": 2.4000000000000003e-06,
"loss": 2.6382,
"step": 60
},
{
"epoch": 0.11804384485666104,
"grad_norm": 6.715169906616211,
"learning_rate": 2.8000000000000003e-06,
"loss": 2.6356,
"step": 70
},
{
"epoch": 0.13490725126475547,
"grad_norm": 5.305912017822266,
"learning_rate": 3.2000000000000003e-06,
"loss": 2.558,
"step": 80
},
{
"epoch": 0.15177065767284992,
"grad_norm": 5.895500183105469,
"learning_rate": 3.6000000000000003e-06,
"loss": 2.4923,
"step": 90
},
{
"epoch": 0.16863406408094436,
"grad_norm": 5.270618438720703,
"learning_rate": 4.000000000000001e-06,
"loss": 2.3955,
"step": 100
},
{
"epoch": 0.18549747048903878,
"grad_norm": 5.943336486816406,
"learning_rate": 4.4e-06,
"loss": 2.3467,
"step": 110
},
{
"epoch": 0.20236087689713322,
"grad_norm": 6.1944403648376465,
"learning_rate": 4.800000000000001e-06,
"loss": 2.3161,
"step": 120
},
{
"epoch": 0.21922428330522767,
"grad_norm": 6.080775260925293,
"learning_rate": 5.2e-06,
"loss": 2.2254,
"step": 130
},
{
"epoch": 0.23608768971332209,
"grad_norm": 6.028081893920898,
"learning_rate": 5.600000000000001e-06,
"loss": 2.205,
"step": 140
},
{
"epoch": 0.25295109612141653,
"grad_norm": 5.6627631187438965,
"learning_rate": 6e-06,
"loss": 2.1021,
"step": 150
},
{
"epoch": 0.26981450252951095,
"grad_norm": 7.71439266204834,
"learning_rate": 6.4000000000000006e-06,
"loss": 2.1383,
"step": 160
},
{
"epoch": 0.2866779089376054,
"grad_norm": 8.687724113464355,
"learning_rate": 6.800000000000001e-06,
"loss": 1.9821,
"step": 170
},
{
"epoch": 0.30354131534569984,
"grad_norm": 6.372674942016602,
"learning_rate": 7.2000000000000005e-06,
"loss": 1.9015,
"step": 180
},
{
"epoch": 0.32040472175379425,
"grad_norm": 7.2390217781066895,
"learning_rate": 7.600000000000001e-06,
"loss": 1.9485,
"step": 190
},
{
"epoch": 0.3372681281618887,
"grad_norm": 7.15785026550293,
"learning_rate": 8.000000000000001e-06,
"loss": 1.7671,
"step": 200
},
{
"epoch": 0.3372681281618887,
"eval_accuracy": 0.5554272517321016,
"eval_accuracy_label_arts, culture, entertainment and media": 0.5833333333333334,
"eval_accuracy_label_conflict, war and peace": 0.7553191489361702,
"eval_accuracy_label_crime, law and justice": 0.8959537572254336,
"eval_accuracy_label_disaster, accident, and emergency incident": 0.32061068702290074,
"eval_accuracy_label_economy, business, and finance": 0.6708860759493671,
"eval_accuracy_label_environment": 0.125,
"eval_accuracy_label_health": 0.7,
"eval_accuracy_label_human interest": 0.0,
"eval_accuracy_label_labour": 0.5,
"eval_accuracy_label_lifestyle and leisure": 0.5,
"eval_accuracy_label_politics": 0.28776978417266186,
"eval_accuracy_label_religion": 0.0,
"eval_accuracy_label_science and technology": 0.0,
"eval_accuracy_label_society": 0.03508771929824561,
"eval_accuracy_label_sport": 0.9615384615384616,
"eval_accuracy_label_weather": 1.0,
"eval_f1": 0.5206037462663565,
"eval_loss": 1.5660511255264282,
"eval_precision": 0.5827940940390139,
"eval_recall": 0.5554272517321016,
"eval_runtime": 6.8757,
"eval_samples_per_second": 125.95,
"eval_steps_per_second": 7.999,
"step": 200
},
{
"epoch": 0.35413153456998314,
"grad_norm": 7.729678630828857,
"learning_rate": 8.400000000000001e-06,
"loss": 1.7632,
"step": 210
},
{
"epoch": 0.37099494097807756,
"grad_norm": 8.155638694763184,
"learning_rate": 8.8e-06,
"loss": 1.6068,
"step": 220
},
{
"epoch": 0.38785834738617203,
"grad_norm": 7.142002105712891,
"learning_rate": 9.200000000000002e-06,
"loss": 1.4778,
"step": 230
},
{
"epoch": 0.40472175379426645,
"grad_norm": 7.332587242126465,
"learning_rate": 9.600000000000001e-06,
"loss": 1.5736,
"step": 240
},
{
"epoch": 0.42158516020236086,
"grad_norm": 13.657541275024414,
"learning_rate": 1e-05,
"loss": 1.4757,
"step": 250
},
{
"epoch": 0.43844856661045534,
"grad_norm": 14.87531566619873,
"learning_rate": 1.04e-05,
"loss": 1.4364,
"step": 260
},
{
"epoch": 0.45531197301854975,
"grad_norm": 15.269392967224121,
"learning_rate": 1.0800000000000002e-05,
"loss": 1.3215,
"step": 270
},
{
"epoch": 0.47217537942664417,
"grad_norm": 10.192849159240723,
"learning_rate": 1.1200000000000001e-05,
"loss": 1.3129,
"step": 280
},
{
"epoch": 0.48903878583473864,
"grad_norm": 8.036419868469238,
"learning_rate": 1.16e-05,
"loss": 1.2399,
"step": 290
},
{
"epoch": 0.5059021922428331,
"grad_norm": 10.625975608825684,
"learning_rate": 1.2e-05,
"loss": 1.3016,
"step": 300
},
{
"epoch": 0.5227655986509275,
"grad_norm": 14.144831657409668,
"learning_rate": 1.2400000000000002e-05,
"loss": 1.2514,
"step": 310
},
{
"epoch": 0.5396290050590219,
"grad_norm": 11.439717292785645,
"learning_rate": 1.2800000000000001e-05,
"loss": 1.1768,
"step": 320
},
{
"epoch": 0.5564924114671164,
"grad_norm": 18.485063552856445,
"learning_rate": 1.3200000000000002e-05,
"loss": 1.1273,
"step": 330
},
{
"epoch": 0.5733558178752108,
"grad_norm": 12.227529525756836,
"learning_rate": 1.3600000000000002e-05,
"loss": 1.0002,
"step": 340
},
{
"epoch": 0.5902192242833052,
"grad_norm": 10.209256172180176,
"learning_rate": 1.4e-05,
"loss": 1.1188,
"step": 350
},
{
"epoch": 0.6070826306913997,
"grad_norm": 8.667159080505371,
"learning_rate": 1.4400000000000001e-05,
"loss": 1.0151,
"step": 360
},
{
"epoch": 0.6239460370994941,
"grad_norm": 8.289810180664062,
"learning_rate": 1.48e-05,
"loss": 1.0148,
"step": 370
},
{
"epoch": 0.6408094435075885,
"grad_norm": 8.391195297241211,
"learning_rate": 1.5200000000000002e-05,
"loss": 0.9461,
"step": 380
},
{
"epoch": 0.657672849915683,
"grad_norm": 10.535934448242188,
"learning_rate": 1.5600000000000003e-05,
"loss": 0.9695,
"step": 390
},
{
"epoch": 0.6745362563237775,
"grad_norm": 10.750452041625977,
"learning_rate": 1.6000000000000003e-05,
"loss": 1.0248,
"step": 400
},
{
"epoch": 0.6745362563237775,
"eval_accuracy": 0.6709006928406467,
"eval_accuracy_label_arts, culture, entertainment and media": 0.9166666666666666,
"eval_accuracy_label_conflict, war and peace": 0.7978723404255319,
"eval_accuracy_label_crime, law and justice": 0.815028901734104,
"eval_accuracy_label_disaster, accident, and emergency incident": 0.8625954198473282,
"eval_accuracy_label_economy, business, and finance": 0.7215189873417721,
"eval_accuracy_label_environment": 0.375,
"eval_accuracy_label_health": 0.9,
"eval_accuracy_label_human interest": 0.25,
"eval_accuracy_label_labour": 1.0,
"eval_accuracy_label_lifestyle and leisure": 0.5,
"eval_accuracy_label_politics": 0.30935251798561153,
"eval_accuracy_label_religion": 0.0,
"eval_accuracy_label_science and technology": 0.4166666666666667,
"eval_accuracy_label_society": 0.19298245614035087,
"eval_accuracy_label_sport": 0.9615384615384616,
"eval_accuracy_label_weather": 1.0,
"eval_f1": 0.6591476133220594,
"eval_loss": 1.0774492025375366,
"eval_precision": 0.6983965116011075,
"eval_recall": 0.6709006928406467,
"eval_runtime": 6.9138,
"eval_samples_per_second": 125.256,
"eval_steps_per_second": 7.955,
"step": 400
},
{
"epoch": 0.6913996627318718,
"grad_norm": 11.188483238220215,
"learning_rate": 1.64e-05,
"loss": 0.8975,
"step": 410
},
{
"epoch": 0.7082630691399663,
"grad_norm": 8.035601615905762,
"learning_rate": 1.6800000000000002e-05,
"loss": 0.9736,
"step": 420
},
{
"epoch": 0.7251264755480608,
"grad_norm": 10.406453132629395,
"learning_rate": 1.72e-05,
"loss": 0.8802,
"step": 430
},
{
"epoch": 0.7419898819561551,
"grad_norm": 14.611414909362793,
"learning_rate": 1.76e-05,
"loss": 1.0106,
"step": 440
},
{
"epoch": 0.7588532883642496,
"grad_norm": 8.613202095031738,
"learning_rate": 1.8e-05,
"loss": 0.8411,
"step": 450
},
{
"epoch": 0.7757166947723441,
"grad_norm": 15.136896133422852,
"learning_rate": 1.8400000000000003e-05,
"loss": 0.736,
"step": 460
},
{
"epoch": 0.7925801011804384,
"grad_norm": 18.369375228881836,
"learning_rate": 1.88e-05,
"loss": 0.8807,
"step": 470
},
{
"epoch": 0.8094435075885329,
"grad_norm": 5.840969562530518,
"learning_rate": 1.9200000000000003e-05,
"loss": 0.8024,
"step": 480
},
{
"epoch": 0.8263069139966274,
"grad_norm": 14.514450073242188,
"learning_rate": 1.9600000000000002e-05,
"loss": 0.8334,
"step": 490
},
{
"epoch": 0.8431703204047217,
"grad_norm": 20.5344295501709,
"learning_rate": 2e-05,
"loss": 0.917,
"step": 500
},
{
"epoch": 0.8600337268128162,
"grad_norm": 8.904403686523438,
"learning_rate": 1.9843627834245506e-05,
"loss": 0.7659,
"step": 510
},
{
"epoch": 0.8768971332209107,
"grad_norm": 8.624781608581543,
"learning_rate": 1.968725566849101e-05,
"loss": 0.8243,
"step": 520
},
{
"epoch": 0.893760539629005,
"grad_norm": 14.050222396850586,
"learning_rate": 1.9530883502736514e-05,
"loss": 0.8085,
"step": 530
},
{
"epoch": 0.9106239460370995,
"grad_norm": 8.78807258605957,
"learning_rate": 1.9374511336982018e-05,
"loss": 0.6006,
"step": 540
},
{
"epoch": 0.927487352445194,
"grad_norm": 12.83305549621582,
"learning_rate": 1.9218139171227522e-05,
"loss": 0.8491,
"step": 550
},
{
"epoch": 0.9443507588532883,
"grad_norm": 10.532402038574219,
"learning_rate": 1.9061767005473026e-05,
"loss": 0.7823,
"step": 560
},
{
"epoch": 0.9612141652613828,
"grad_norm": 21.147523880004883,
"learning_rate": 1.890539483971853e-05,
"loss": 0.8382,
"step": 570
},
{
"epoch": 0.9780775716694773,
"grad_norm": 8.727555274963379,
"learning_rate": 1.8749022673964038e-05,
"loss": 0.7173,
"step": 580
},
{
"epoch": 0.9949409780775716,
"grad_norm": 9.259013175964355,
"learning_rate": 1.859265050820954e-05,
"loss": 0.7727,
"step": 590
},
{
"epoch": 1.0118043844856661,
"grad_norm": 6.446993350982666,
"learning_rate": 1.8436278342455046e-05,
"loss": 0.5845,
"step": 600
},
{
"epoch": 1.0118043844856661,
"eval_accuracy": 0.6535796766743649,
"eval_accuracy_label_arts, culture, entertainment and media": 0.9166666666666666,
"eval_accuracy_label_conflict, war and peace": 0.7287234042553191,
"eval_accuracy_label_crime, law and justice": 0.6763005780346821,
"eval_accuracy_label_disaster, accident, and emergency incident": 0.8778625954198473,
"eval_accuracy_label_economy, business, and finance": 0.7215189873417721,
"eval_accuracy_label_environment": 0.4375,
"eval_accuracy_label_health": 0.8,
"eval_accuracy_label_human interest": 0.0,
"eval_accuracy_label_labour": 1.0,
"eval_accuracy_label_lifestyle and leisure": 0.75,
"eval_accuracy_label_politics": 0.3669064748201439,
"eval_accuracy_label_religion": 0.0,
"eval_accuracy_label_science and technology": 0.4166666666666667,
"eval_accuracy_label_society": 0.43859649122807015,
"eval_accuracy_label_sport": 0.9230769230769231,
"eval_accuracy_label_weather": 1.0,
"eval_f1": 0.6563434776934847,
"eval_loss": 0.990666389465332,
"eval_precision": 0.682947679005421,
"eval_recall": 0.6535796766743649,
"eval_runtime": 6.9215,
"eval_samples_per_second": 125.118,
"eval_steps_per_second": 7.946,
"step": 600
},
{
"epoch": 1.0286677908937605,
"grad_norm": 10.464855194091797,
"learning_rate": 1.8279906176700547e-05,
"loss": 0.565,
"step": 610
},
{
"epoch": 1.045531197301855,
"grad_norm": 10.188838958740234,
"learning_rate": 1.8123534010946054e-05,
"loss": 0.5325,
"step": 620
},
{
"epoch": 1.0623946037099494,
"grad_norm": 11.83839225769043,
"learning_rate": 1.7967161845191555e-05,
"loss": 0.603,
"step": 630
},
{
"epoch": 1.0792580101180438,
"grad_norm": 12.935112953186035,
"learning_rate": 1.7810789679437062e-05,
"loss": 0.5319,
"step": 640
},
{
"epoch": 1.0961214165261384,
"grad_norm": 8.650348663330078,
"learning_rate": 1.7654417513682566e-05,
"loss": 0.4803,
"step": 650
},
{
"epoch": 1.1129848229342327,
"grad_norm": 12.213525772094727,
"learning_rate": 1.749804534792807e-05,
"loss": 0.6975,
"step": 660
},
{
"epoch": 1.129848229342327,
"grad_norm": 12.859235763549805,
"learning_rate": 1.7341673182173575e-05,
"loss": 0.6273,
"step": 670
},
{
"epoch": 1.1467116357504217,
"grad_norm": 8.133111953735352,
"learning_rate": 1.718530101641908e-05,
"loss": 0.5179,
"step": 680
},
{
"epoch": 1.163575042158516,
"grad_norm": 8.366670608520508,
"learning_rate": 1.7028928850664583e-05,
"loss": 0.5622,
"step": 690
},
{
"epoch": 1.1804384485666104,
"grad_norm": 12.844072341918945,
"learning_rate": 1.6872556684910087e-05,
"loss": 0.6347,
"step": 700
},
{
"epoch": 1.197301854974705,
"grad_norm": 3.2994637489318848,
"learning_rate": 1.671618451915559e-05,
"loss": 0.4805,
"step": 710
},
{
"epoch": 1.2141652613827993,
"grad_norm": 6.4653167724609375,
"learning_rate": 1.6559812353401095e-05,
"loss": 0.5172,
"step": 720
},
{
"epoch": 1.2310286677908937,
"grad_norm": 6.744974613189697,
"learning_rate": 1.64034401876466e-05,
"loss": 0.5601,
"step": 730
},
{
"epoch": 1.2478920741989883,
"grad_norm": 9.451184272766113,
"learning_rate": 1.6247068021892107e-05,
"loss": 0.5289,
"step": 740
},
{
"epoch": 1.2647554806070826,
"grad_norm": 5.093151092529297,
"learning_rate": 1.6090695856137607e-05,
"loss": 0.5674,
"step": 750
},
{
"epoch": 1.281618887015177,
"grad_norm": 16.99575424194336,
"learning_rate": 1.5934323690383115e-05,
"loss": 0.4681,
"step": 760
},
{
"epoch": 1.2984822934232714,
"grad_norm": 17.180509567260742,
"learning_rate": 1.5777951524628616e-05,
"loss": 0.5974,
"step": 770
},
{
"epoch": 1.315345699831366,
"grad_norm": 11.804935455322266,
"learning_rate": 1.5621579358874123e-05,
"loss": 0.5693,
"step": 780
},
{
"epoch": 1.3322091062394603,
"grad_norm": 12.154623985290527,
"learning_rate": 1.5465207193119627e-05,
"loss": 0.6001,
"step": 790
},
{
"epoch": 1.3490725126475547,
"grad_norm": 11.09798526763916,
"learning_rate": 1.530883502736513e-05,
"loss": 0.6104,
"step": 800
},
{
"epoch": 1.3490725126475547,
"eval_accuracy": 0.7240184757505773,
"eval_accuracy_label_arts, culture, entertainment and media": 0.8333333333333334,
"eval_accuracy_label_conflict, war and peace": 0.7021276595744681,
"eval_accuracy_label_crime, law and justice": 0.8323699421965318,
"eval_accuracy_label_disaster, accident, and emergency incident": 0.8778625954198473,
"eval_accuracy_label_economy, business, and finance": 0.7848101265822784,
"eval_accuracy_label_environment": 0.5,
"eval_accuracy_label_health": 0.7,
"eval_accuracy_label_human interest": 0.25,
"eval_accuracy_label_labour": 1.0,
"eval_accuracy_label_lifestyle and leisure": 0.75,
"eval_accuracy_label_politics": 0.6330935251798561,
"eval_accuracy_label_religion": 0.0,
"eval_accuracy_label_science and technology": 0.25,
"eval_accuracy_label_society": 0.3684210526315789,
"eval_accuracy_label_sport": 0.9615384615384616,
"eval_accuracy_label_weather": 1.0,
"eval_f1": 0.7232835318516546,
"eval_loss": 0.8674274682998657,
"eval_precision": 0.7332860565477397,
"eval_recall": 0.7240184757505773,
"eval_runtime": 6.8885,
"eval_samples_per_second": 125.717,
"eval_steps_per_second": 7.984,
"step": 800
},
{
"epoch": 1.3659359190556493,
"grad_norm": 12.72695255279541,
"learning_rate": 1.5152462861610635e-05,
"loss": 0.5333,
"step": 810
},
{
"epoch": 1.3827993254637436,
"grad_norm": 9.709075927734375,
"learning_rate": 1.4996090695856138e-05,
"loss": 0.488,
"step": 820
},
{
"epoch": 1.399662731871838,
"grad_norm": 11.909979820251465,
"learning_rate": 1.4839718530101644e-05,
"loss": 0.6037,
"step": 830
},
{
"epoch": 1.4165261382799326,
"grad_norm": 16.916475296020508,
"learning_rate": 1.4683346364347146e-05,
"loss": 0.5647,
"step": 840
},
{
"epoch": 1.433389544688027,
"grad_norm": 13.278093338012695,
"learning_rate": 1.4526974198592652e-05,
"loss": 0.5078,
"step": 850
},
{
"epoch": 1.4502529510961213,
"grad_norm": 7.877659320831299,
"learning_rate": 1.4370602032838158e-05,
"loss": 0.5108,
"step": 860
},
{
"epoch": 1.4671163575042159,
"grad_norm": 7.198855400085449,
"learning_rate": 1.421422986708366e-05,
"loss": 0.537,
"step": 870
},
{
"epoch": 1.4839797639123102,
"grad_norm": 7.947418212890625,
"learning_rate": 1.4057857701329166e-05,
"loss": 0.4431,
"step": 880
},
{
"epoch": 1.5008431703204046,
"grad_norm": 8.317455291748047,
"learning_rate": 1.3901485535574668e-05,
"loss": 0.4532,
"step": 890
},
{
"epoch": 1.5177065767284992,
"grad_norm": 8.073487281799316,
"learning_rate": 1.3745113369820174e-05,
"loss": 0.4624,
"step": 900
},
{
"epoch": 1.5345699831365935,
"grad_norm": 9.39900016784668,
"learning_rate": 1.3588741204065676e-05,
"loss": 0.3739,
"step": 910
},
{
"epoch": 1.551433389544688,
"grad_norm": 7.912290096282959,
"learning_rate": 1.3432369038311182e-05,
"loss": 0.497,
"step": 920
},
{
"epoch": 1.5682967959527825,
"grad_norm": 12.181069374084473,
"learning_rate": 1.3275996872556686e-05,
"loss": 0.4859,
"step": 930
},
{
"epoch": 1.5851602023608768,
"grad_norm": 9.931648254394531,
"learning_rate": 1.311962470680219e-05,
"loss": 0.4945,
"step": 940
},
{
"epoch": 1.6020236087689712,
"grad_norm": 7.948305606842041,
"learning_rate": 1.2963252541047694e-05,
"loss": 0.3742,
"step": 950
},
{
"epoch": 1.6188870151770658,
"grad_norm": 9.558001518249512,
"learning_rate": 1.2806880375293199e-05,
"loss": 0.4684,
"step": 960
},
{
"epoch": 1.6357504215851602,
"grad_norm": 8.381938934326172,
"learning_rate": 1.2650508209538703e-05,
"loss": 0.4564,
"step": 970
},
{
"epoch": 1.6526138279932545,
"grad_norm": 7.728443622589111,
"learning_rate": 1.2494136043784208e-05,
"loss": 0.4493,
"step": 980
},
{
"epoch": 1.669477234401349,
"grad_norm": 14.073138236999512,
"learning_rate": 1.233776387802971e-05,
"loss": 0.4085,
"step": 990
},
{
"epoch": 1.6863406408094435,
"grad_norm": 6.431807518005371,
"learning_rate": 1.2181391712275217e-05,
"loss": 0.4223,
"step": 1000
},
{
"epoch": 1.6863406408094435,
"eval_accuracy": 0.7240184757505773,
"eval_accuracy_label_arts, culture, entertainment and media": 0.75,
"eval_accuracy_label_conflict, war and peace": 0.675531914893617,
"eval_accuracy_label_crime, law and justice": 0.884393063583815,
"eval_accuracy_label_disaster, accident, and emergency incident": 0.8549618320610687,
"eval_accuracy_label_economy, business, and finance": 0.7341772151898734,
"eval_accuracy_label_environment": 0.5,
"eval_accuracy_label_health": 0.9,
"eval_accuracy_label_human interest": 0.3333333333333333,
"eval_accuracy_label_labour": 1.0,
"eval_accuracy_label_lifestyle and leisure": 0.625,
"eval_accuracy_label_politics": 0.6474820143884892,
"eval_accuracy_label_religion": 0.0,
"eval_accuracy_label_science and technology": 0.3333333333333333,
"eval_accuracy_label_society": 0.3684210526315789,
"eval_accuracy_label_sport": 0.9615384615384616,
"eval_accuracy_label_weather": 0.0,
"eval_f1": 0.7249701679054074,
"eval_loss": 0.8601514101028442,
"eval_precision": 0.7387214985313825,
"eval_recall": 0.7240184757505773,
"eval_runtime": 6.9046,
"eval_samples_per_second": 125.423,
"eval_steps_per_second": 7.966,
"step": 1000
},
{
"epoch": 1.7032040472175378,
"grad_norm": 5.857812404632568,
"learning_rate": 1.2025019546520719e-05,
"loss": 0.4261,
"step": 1010
},
{
"epoch": 1.7200674536256324,
"grad_norm": 6.620238304138184,
"learning_rate": 1.1868647380766225e-05,
"loss": 0.4071,
"step": 1020
},
{
"epoch": 1.7369308600337268,
"grad_norm": 13.199769973754883,
"learning_rate": 1.1712275215011727e-05,
"loss": 0.3632,
"step": 1030
},
{
"epoch": 1.7537942664418211,
"grad_norm": 8.069077491760254,
"learning_rate": 1.1555903049257233e-05,
"loss": 0.3977,
"step": 1040
},
{
"epoch": 1.7706576728499157,
"grad_norm": 12.82445240020752,
"learning_rate": 1.1399530883502739e-05,
"loss": 0.4245,
"step": 1050
},
{
"epoch": 1.78752107925801,
"grad_norm": 15.626612663269043,
"learning_rate": 1.1243158717748241e-05,
"loss": 0.3924,
"step": 1060
},
{
"epoch": 1.8043844856661044,
"grad_norm": 14.491767883300781,
"learning_rate": 1.1086786551993747e-05,
"loss": 0.427,
"step": 1070
},
{
"epoch": 1.821247892074199,
"grad_norm": 5.425060749053955,
"learning_rate": 1.093041438623925e-05,
"loss": 0.4013,
"step": 1080
},
{
"epoch": 1.8381112984822934,
"grad_norm": 10.305951118469238,
"learning_rate": 1.0774042220484755e-05,
"loss": 0.4036,
"step": 1090
},
{
"epoch": 1.8549747048903877,
"grad_norm": 10.018141746520996,
"learning_rate": 1.0617670054730258e-05,
"loss": 0.4123,
"step": 1100
},
{
"epoch": 1.8718381112984823,
"grad_norm": 13.072948455810547,
"learning_rate": 1.0461297888975763e-05,
"loss": 0.4586,
"step": 1110
},
{
"epoch": 1.8887015177065767,
"grad_norm": 7.460719585418701,
"learning_rate": 1.030492572322127e-05,
"loss": 0.3912,
"step": 1120
},
{
"epoch": 1.905564924114671,
"grad_norm": 5.454730033874512,
"learning_rate": 1.0148553557466772e-05,
"loss": 0.452,
"step": 1130
},
{
"epoch": 1.9224283305227656,
"grad_norm": 8.758441925048828,
"learning_rate": 9.992181391712276e-06,
"loss": 0.3622,
"step": 1140
},
{
"epoch": 1.93929173693086,
"grad_norm": 8.622238159179688,
"learning_rate": 9.835809225957781e-06,
"loss": 0.439,
"step": 1150
},
{
"epoch": 1.9561551433389543,
"grad_norm": 6.299844741821289,
"learning_rate": 9.679437060203286e-06,
"loss": 0.4006,
"step": 1160
},
{
"epoch": 1.973018549747049,
"grad_norm": 14.24021053314209,
"learning_rate": 9.52306489444879e-06,
"loss": 0.3616,
"step": 1170
},
{
"epoch": 1.9898819561551433,
"grad_norm": 20.004261016845703,
"learning_rate": 9.366692728694294e-06,
"loss": 0.4253,
"step": 1180
},
{
"epoch": 2.0067453625632377,
"grad_norm": 12.190637588500977,
"learning_rate": 9.210320562939798e-06,
"loss": 0.3859,
"step": 1190
},
{
"epoch": 2.0236087689713322,
"grad_norm": 3.0981788635253906,
"learning_rate": 9.053948397185302e-06,
"loss": 0.3104,
"step": 1200
},
{
"epoch": 2.0236087689713322,
"eval_accuracy": 0.7263279445727483,
"eval_accuracy_label_arts, culture, entertainment and media": 0.8333333333333334,
"eval_accuracy_label_conflict, war and peace": 0.7180851063829787,
"eval_accuracy_label_crime, law and justice": 0.8323699421965318,
"eval_accuracy_label_disaster, accident, and emergency incident": 0.9083969465648855,
"eval_accuracy_label_economy, business, and finance": 0.7721518987341772,
"eval_accuracy_label_environment": 0.4375,
"eval_accuracy_label_health": 0.7,
"eval_accuracy_label_human interest": 0.25,
"eval_accuracy_label_labour": 0.5,
"eval_accuracy_label_lifestyle and leisure": 0.75,
"eval_accuracy_label_politics": 0.5611510791366906,
"eval_accuracy_label_religion": 0.0,
"eval_accuracy_label_science and technology": 0.4166666666666667,
"eval_accuracy_label_society": 0.47368421052631576,
"eval_accuracy_label_sport": 0.9615384615384616,
"eval_accuracy_label_weather": 1.0,
"eval_f1": 0.7266120737327485,
"eval_loss": 0.856505811214447,
"eval_precision": 0.7326492002435036,
"eval_recall": 0.7263279445727483,
"eval_runtime": 6.9128,
"eval_samples_per_second": 125.275,
"eval_steps_per_second": 7.956,
"step": 1200
},
{
"epoch": 2.040472175379427,
"grad_norm": 5.126299858093262,
"learning_rate": 8.897576231430806e-06,
"loss": 0.3066,
"step": 1210
},
{
"epoch": 2.057335581787521,
"grad_norm": 5.153671741485596,
"learning_rate": 8.74120406567631e-06,
"loss": 0.3275,
"step": 1220
},
{
"epoch": 2.0741989881956155,
"grad_norm": 6.91109561920166,
"learning_rate": 8.584831899921814e-06,
"loss": 0.2581,
"step": 1230
},
{
"epoch": 2.09106239460371,
"grad_norm": 9.57040023803711,
"learning_rate": 8.428459734167318e-06,
"loss": 0.2809,
"step": 1240
},
{
"epoch": 2.1079258010118043,
"grad_norm": 3.024446964263916,
"learning_rate": 8.272087568412822e-06,
"loss": 0.2288,
"step": 1250
},
{
"epoch": 2.124789207419899,
"grad_norm": 6.808653831481934,
"learning_rate": 8.115715402658327e-06,
"loss": 0.2406,
"step": 1260
},
{
"epoch": 2.1416526138279934,
"grad_norm": 15.430974960327148,
"learning_rate": 7.959343236903832e-06,
"loss": 0.3031,
"step": 1270
},
{
"epoch": 2.1585160202360876,
"grad_norm": 6.104337215423584,
"learning_rate": 7.802971071149336e-06,
"loss": 0.2575,
"step": 1280
},
{
"epoch": 2.175379426644182,
"grad_norm": 3.1404926776885986,
"learning_rate": 7.64659890539484e-06,
"loss": 0.264,
"step": 1290
},
{
"epoch": 2.1922428330522767,
"grad_norm": 11.821499824523926,
"learning_rate": 7.490226739640345e-06,
"loss": 0.2602,
"step": 1300
},
{
"epoch": 2.209106239460371,
"grad_norm": 4.936356544494629,
"learning_rate": 7.333854573885849e-06,
"loss": 0.3428,
"step": 1310
},
{
"epoch": 2.2259696458684655,
"grad_norm": 12.609466552734375,
"learning_rate": 7.177482408131353e-06,
"loss": 0.2467,
"step": 1320
},
{
"epoch": 2.24283305227656,
"grad_norm": 10.444657325744629,
"learning_rate": 7.021110242376858e-06,
"loss": 0.3149,
"step": 1330
},
{
"epoch": 2.259696458684654,
"grad_norm": 9.078226089477539,
"learning_rate": 6.864738076622362e-06,
"loss": 0.3207,
"step": 1340
},
{
"epoch": 2.2765598650927488,
"grad_norm": 7.427735328674316,
"learning_rate": 6.708365910867866e-06,
"loss": 0.2458,
"step": 1350
},
{
"epoch": 2.2934232715008434,
"grad_norm": 6.552321910858154,
"learning_rate": 6.551993745113371e-06,
"loss": 0.273,
"step": 1360
},
{
"epoch": 2.3102866779089375,
"grad_norm": 5.591136932373047,
"learning_rate": 6.395621579358875e-06,
"loss": 0.2345,
"step": 1370
},
{
"epoch": 2.327150084317032,
"grad_norm": 9.349672317504883,
"learning_rate": 6.239249413604379e-06,
"loss": 0.3254,
"step": 1380
},
{
"epoch": 2.3440134907251267,
"grad_norm": 7.311215400695801,
"learning_rate": 6.082877247849883e-06,
"loss": 0.2511,
"step": 1390
},
{
"epoch": 2.360876897133221,
"grad_norm": 9.266729354858398,
"learning_rate": 5.926505082095387e-06,
"loss": 0.2855,
"step": 1400
},
{
"epoch": 2.360876897133221,
"eval_accuracy": 0.7240184757505773,
"eval_accuracy_label_arts, culture, entertainment and media": 0.75,
"eval_accuracy_label_conflict, war and peace": 0.7393617021276596,
"eval_accuracy_label_crime, law and justice": 0.8323699421965318,
"eval_accuracy_label_disaster, accident, and emergency incident": 0.8549618320610687,
"eval_accuracy_label_economy, business, and finance": 0.7974683544303798,
"eval_accuracy_label_environment": 0.5,
"eval_accuracy_label_health": 0.7,
"eval_accuracy_label_human interest": 0.3333333333333333,
"eval_accuracy_label_labour": 0.5,
"eval_accuracy_label_lifestyle and leisure": 0.625,
"eval_accuracy_label_politics": 0.5899280575539568,
"eval_accuracy_label_religion": 0.0,
"eval_accuracy_label_science and technology": 0.4166666666666667,
"eval_accuracy_label_society": 0.38596491228070173,
"eval_accuracy_label_sport": 0.9615384615384616,
"eval_accuracy_label_weather": 1.0,
"eval_f1": 0.7283152161972767,
"eval_loss": 0.8981179594993591,
"eval_precision": 0.7401859274408267,
"eval_recall": 0.7240184757505773,
"eval_runtime": 6.8981,
"eval_samples_per_second": 125.541,
"eval_steps_per_second": 7.973,
"step": 1400
},
{
"epoch": 2.3777403035413154,
"grad_norm": 12.713851928710938,
"learning_rate": 5.770132916340891e-06,
"loss": 0.2885,
"step": 1410
},
{
"epoch": 2.39460370994941,
"grad_norm": 5.382158279418945,
"learning_rate": 5.6137607505863955e-06,
"loss": 0.3191,
"step": 1420
},
{
"epoch": 2.411467116357504,
"grad_norm": 4.609128475189209,
"learning_rate": 5.4573885848319e-06,
"loss": 0.273,
"step": 1430
},
{
"epoch": 2.4283305227655987,
"grad_norm": 5.821180820465088,
"learning_rate": 5.301016419077405e-06,
"loss": 0.2397,
"step": 1440
},
{
"epoch": 2.4451939291736933,
"grad_norm": 3.8270256519317627,
"learning_rate": 5.1446442533229095e-06,
"loss": 0.3575,
"step": 1450
},
{
"epoch": 2.4620573355817874,
"grad_norm": 4.910329341888428,
"learning_rate": 4.9882720875684136e-06,
"loss": 0.2622,
"step": 1460
},
{
"epoch": 2.478920741989882,
"grad_norm": 6.537694931030273,
"learning_rate": 4.831899921813918e-06,
"loss": 0.1981,
"step": 1470
},
{
"epoch": 2.4957841483979766,
"grad_norm": 9.600226402282715,
"learning_rate": 4.675527756059422e-06,
"loss": 0.1977,
"step": 1480
},
{
"epoch": 2.5126475548060707,
"grad_norm": 5.246051788330078,
"learning_rate": 4.519155590304926e-06,
"loss": 0.2452,
"step": 1490
},
{
"epoch": 2.5295109612141653,
"grad_norm": 5.955575942993164,
"learning_rate": 4.36278342455043e-06,
"loss": 0.2239,
"step": 1500
},
{
"epoch": 2.54637436762226,
"grad_norm": 9.812554359436035,
"learning_rate": 4.206411258795935e-06,
"loss": 0.2773,
"step": 1510
},
{
"epoch": 2.563237774030354,
"grad_norm": 11.0867280960083,
"learning_rate": 4.050039093041439e-06,
"loss": 0.1866,
"step": 1520
},
{
"epoch": 2.5801011804384486,
"grad_norm": 6.4792609214782715,
"learning_rate": 3.893666927286943e-06,
"loss": 0.2556,
"step": 1530
},
{
"epoch": 2.5969645868465427,
"grad_norm": 9.363513946533203,
"learning_rate": 3.737294761532447e-06,
"loss": 0.2593,
"step": 1540
},
{
"epoch": 2.6138279932546373,
"grad_norm": 11.11075496673584,
"learning_rate": 3.580922595777952e-06,
"loss": 0.3482,
"step": 1550
},
{
"epoch": 2.630691399662732,
"grad_norm": 2.9576575756073,
"learning_rate": 3.4245504300234562e-06,
"loss": 0.2412,
"step": 1560
},
{
"epoch": 2.6475548060708265,
"grad_norm": 9.084090232849121,
"learning_rate": 3.2681782642689603e-06,
"loss": 0.3092,
"step": 1570
},
{
"epoch": 2.6644182124789206,
"grad_norm": 7.185946941375732,
"learning_rate": 3.111806098514465e-06,
"loss": 0.2406,
"step": 1580
},
{
"epoch": 2.681281618887015,
"grad_norm": 5.467381000518799,
"learning_rate": 2.955433932759969e-06,
"loss": 0.2709,
"step": 1590
},
{
"epoch": 2.6981450252951094,
"grad_norm": 6.529063701629639,
"learning_rate": 2.799061767005473e-06,
"loss": 0.217,
"step": 1600
},
{
"epoch": 2.6981450252951094,
"eval_accuracy": 0.73094688221709,
"eval_accuracy_label_arts, culture, entertainment and media": 0.75,
"eval_accuracy_label_conflict, war and peace": 0.7446808510638298,
"eval_accuracy_label_crime, law and justice": 0.838150289017341,
"eval_accuracy_label_disaster, accident, and emergency incident": 0.8931297709923665,
"eval_accuracy_label_economy, business, and finance": 0.8481012658227848,
"eval_accuracy_label_environment": 0.375,
"eval_accuracy_label_health": 0.8,
"eval_accuracy_label_human interest": 0.3333333333333333,
"eval_accuracy_label_labour": 0.5,
"eval_accuracy_label_lifestyle and leisure": 0.5,
"eval_accuracy_label_politics": 0.539568345323741,
"eval_accuracy_label_religion": 0.0,
"eval_accuracy_label_science and technology": 0.4166666666666667,
"eval_accuracy_label_society": 0.45614035087719296,
"eval_accuracy_label_sport": 0.9615384615384616,
"eval_accuracy_label_weather": 1.0,
"eval_f1": 0.7291730687142569,
"eval_loss": 0.8666742444038391,
"eval_precision": 0.7358354972874402,
"eval_recall": 0.73094688221709,
"eval_runtime": 6.9197,
"eval_samples_per_second": 125.151,
"eval_steps_per_second": 7.948,
"step": 1600
},
{
"epoch": 2.715008431703204,
"grad_norm": 5.216823101043701,
"learning_rate": 2.642689601250977e-06,
"loss": 0.2422,
"step": 1610
},
{
"epoch": 2.7318718381112985,
"grad_norm": 14.297146797180176,
"learning_rate": 2.486317435496482e-06,
"loss": 0.2569,
"step": 1620
},
{
"epoch": 2.748735244519393,
"grad_norm": 3.5069735050201416,
"learning_rate": 2.329945269741986e-06,
"loss": 0.284,
"step": 1630
},
{
"epoch": 2.7655986509274872,
"grad_norm": 10.029553413391113,
"learning_rate": 2.1735731039874907e-06,
"loss": 0.305,
"step": 1640
},
{
"epoch": 2.782462057335582,
"grad_norm": 13.829554557800293,
"learning_rate": 2.0172009382329948e-06,
"loss": 0.3018,
"step": 1650
},
{
"epoch": 2.799325463743676,
"grad_norm": 7.969460487365723,
"learning_rate": 1.860828772478499e-06,
"loss": 0.2687,
"step": 1660
},
{
"epoch": 2.8161888701517706,
"grad_norm": 4.048108100891113,
"learning_rate": 1.7044566067240032e-06,
"loss": 0.2845,
"step": 1670
},
{
"epoch": 2.833052276559865,
"grad_norm": 3.3585705757141113,
"learning_rate": 1.5480844409695075e-06,
"loss": 0.2792,
"step": 1680
},
{
"epoch": 2.8499156829679597,
"grad_norm": 3.1764752864837646,
"learning_rate": 1.391712275215012e-06,
"loss": 0.2244,
"step": 1690
},
{
"epoch": 2.866779089376054,
"grad_norm": 9.154349327087402,
"learning_rate": 1.2353401094605161e-06,
"loss": 0.2469,
"step": 1700
},
{
"epoch": 2.8836424957841484,
"grad_norm": 8.833173751831055,
"learning_rate": 1.0789679437060204e-06,
"loss": 0.2882,
"step": 1710
},
{
"epoch": 2.9005059021922426,
"grad_norm": 9.59762954711914,
"learning_rate": 9.225957779515247e-07,
"loss": 0.2442,
"step": 1720
},
{
"epoch": 2.917369308600337,
"grad_norm": 10.648268699645996,
"learning_rate": 7.66223612197029e-07,
"loss": 0.2421,
"step": 1730
},
{
"epoch": 2.9342327150084317,
"grad_norm": 3.897761821746826,
"learning_rate": 6.098514464425332e-07,
"loss": 0.2391,
"step": 1740
},
{
"epoch": 2.9510961214165263,
"grad_norm": 5.289120674133301,
"learning_rate": 4.5347928068803755e-07,
"loss": 0.2567,
"step": 1750
},
{
"epoch": 2.9679595278246205,
"grad_norm": 6.413928985595703,
"learning_rate": 2.9710711493354186e-07,
"loss": 0.2597,
"step": 1760
},
{
"epoch": 2.984822934232715,
"grad_norm": 3.9349653720855713,
"learning_rate": 1.4073494917904614e-07,
"loss": 0.2551,
"step": 1770
},
{
"epoch": 3.0,
"step": 1779,
"total_flos": 7485003163459584.0,
"train_loss": 0.7494140876820411,
"train_runtime": 1316.4618,
"train_samples_per_second": 43.214,
"train_steps_per_second": 1.351
},
{
"epoch": 3.0,
"eval_accuracy": 0.7286374133949192,
"eval_accuracy_label_arts, culture, entertainment and media": 0.8333333333333334,
"eval_accuracy_label_conflict, war and peace": 0.723404255319149,
"eval_accuracy_label_crime, law and justice": 0.791907514450867,
"eval_accuracy_label_disaster, accident, and emergency incident": 0.8931297709923665,
"eval_accuracy_label_economy, business, and finance": 0.7974683544303798,
"eval_accuracy_label_environment": 0.4375,
"eval_accuracy_label_health": 0.7,
"eval_accuracy_label_human interest": 0.3333333333333333,
"eval_accuracy_label_labour": 0.5,
"eval_accuracy_label_lifestyle and leisure": 0.5,
"eval_accuracy_label_politics": 0.6330935251798561,
"eval_accuracy_label_religion": 0.0,
"eval_accuracy_label_science and technology": 0.4166666666666667,
"eval_accuracy_label_society": 0.45614035087719296,
"eval_accuracy_label_sport": 0.9615384615384616,
"eval_accuracy_label_weather": 1.0,
"eval_f1": 0.730012835488787,
"eval_loss": 0.8615460395812988,
"eval_precision": 0.7350865087902028,
"eval_recall": 0.7286374133949192,
"eval_runtime": 6.8008,
"eval_samples_per_second": 127.338,
"eval_steps_per_second": 8.087,
"step": 1779
}
],
"logging_steps": 10,
"max_steps": 1779,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 1000,
"total_flos": 7485003163459584.0,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}