healjai_finetuned_phayath / trainer_state.json
mmenuu's picture
Upload 12 files
328f19d verified
raw
history blame
42.7 kB
{
"best_metric": 0.762063227953411,
"best_model_checkpoint": "finetuned_models/wisesight_sentiment/checkpoint-2400",
"epoch": 5.991124260355029,
"eval_steps": 100,
"global_step": 8100,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.07396449704142012,
"eval_class_f1": {
"neg": 0.018604651162790697,
"neu": 0.6997558991049634,
"pos": 0.0,
"q": 0.0
},
"eval_loss": 1.0810712575912476,
"eval_macro_average_f1": 0.17959013756693853,
"eval_micro_average_f1": 0.5391014975041597,
"eval_runtime": 6.8397,
"eval_samples_per_second": 351.476,
"eval_steps_per_second": 22.077,
"step": 100
},
{
"epoch": 0.14792899408284024,
"eval_class_f1": {
"neg": 0.6223055295220243,
"neu": 0.7497702909647779,
"pos": 0.0,
"q": 0.0
},
"eval_loss": 0.8820463418960571,
"eval_macro_average_f1": 0.34301895512170055,
"eval_micro_average_f1": 0.6472545757071547,
"eval_runtime": 6.8712,
"eval_samples_per_second": 349.865,
"eval_steps_per_second": 21.976,
"step": 200
},
{
"epoch": 0.22189349112426035,
"eval_class_f1": {
"neg": 0.7588757396449703,
"neu": 0.7731384829505916,
"pos": 0.27037037037037037,
"q": 0.0
},
"eval_loss": 0.7263810038566589,
"eval_macro_average_f1": 0.4505961482414831,
"eval_micro_average_f1": 0.7059068219633944,
"eval_runtime": 7.0136,
"eval_samples_per_second": 342.761,
"eval_steps_per_second": 21.529,
"step": 300
},
{
"epoch": 0.2958579881656805,
"eval_class_f1": {
"neg": 0.7806637806637807,
"neu": 0.760541586073501,
"pos": 0.501891551071879,
"q": 0.0909090909090909
},
"eval_loss": 0.6896220445632935,
"eval_macro_average_f1": 0.5335015021795629,
"eval_micro_average_f1": 0.7175540765391015,
"eval_runtime": 7.0909,
"eval_samples_per_second": 339.026,
"eval_steps_per_second": 21.295,
"step": 400
},
{
"epoch": 0.3698224852071006,
"grad_norm": 5.410265922546387,
"learning_rate": 1.828817733990148e-05,
"loss": 0.8994,
"step": 500
},
{
"epoch": 0.3698224852071006,
"eval_class_f1": {
"neg": 0.7767988252569751,
"neu": 0.7759882869692534,
"pos": 0.41987179487179493,
"q": 0.3777777777777778
},
"eval_loss": 0.6673027276992798,
"eval_macro_average_f1": 0.5876091712189503,
"eval_micro_average_f1": 0.7225457570715474,
"eval_runtime": 7.071,
"eval_samples_per_second": 339.978,
"eval_steps_per_second": 21.355,
"step": 500
},
{
"epoch": 0.4437869822485207,
"eval_class_f1": {
"neg": 0.7704042715484364,
"neu": 0.7415287628053587,
"pos": 0.5426356589147286,
"q": 0.14285714285714285
},
"eval_loss": 0.6752218008041382,
"eval_macro_average_f1": 0.5493564590314166,
"eval_micro_average_f1": 0.7050748752079867,
"eval_runtime": 7.2386,
"eval_samples_per_second": 332.107,
"eval_steps_per_second": 20.86,
"step": 600
},
{
"epoch": 0.5177514792899408,
"eval_class_f1": {
"neg": 0.7780979827089337,
"neu": 0.7535296490520371,
"pos": 0.5532435740514076,
"q": 0.3870967741935484
},
"eval_loss": 0.667128324508667,
"eval_macro_average_f1": 0.6179919950014817,
"eval_micro_average_f1": 0.7171381031613977,
"eval_runtime": 7.3636,
"eval_samples_per_second": 326.472,
"eval_steps_per_second": 20.506,
"step": 700
},
{
"epoch": 0.591715976331361,
"eval_class_f1": {
"neg": 0.7555923777961889,
"neu": 0.7529501332318232,
"pos": 0.5357575757575757,
"q": 0.4161073825503356
},
"eval_loss": 0.662220299243927,
"eval_macro_average_f1": 0.6151018673339809,
"eval_micro_average_f1": 0.7059068219633944,
"eval_runtime": 7.3848,
"eval_samples_per_second": 325.535,
"eval_steps_per_second": 20.448,
"step": 800
},
{
"epoch": 0.665680473372781,
"eval_class_f1": {
"neg": 0.7756714060031595,
"neu": 0.7722698471859858,
"pos": 0.5253807106598984,
"q": 0.2535211267605634
},
"eval_loss": 0.6270455121994019,
"eval_macro_average_f1": 0.5817107726524018,
"eval_micro_average_f1": 0.7250415973377704,
"eval_runtime": 7.3625,
"eval_samples_per_second": 326.52,
"eval_steps_per_second": 20.509,
"step": 900
},
{
"epoch": 0.7396449704142012,
"grad_norm": 7.580224514007568,
"learning_rate": 2.9252054794520548e-05,
"loss": 0.6495,
"step": 1000
},
{
"epoch": 0.7396449704142012,
"eval_class_f1": {
"neg": 0.7862993298585256,
"neu": 0.7885968159940763,
"pos": 0.5219858156028369,
"q": 0.30508474576271183
},
"eval_loss": 0.6415818929672241,
"eval_macro_average_f1": 0.6004916768045376,
"eval_micro_average_f1": 0.742928452579035,
"eval_runtime": 7.3324,
"eval_samples_per_second": 327.861,
"eval_steps_per_second": 20.594,
"step": 1000
},
{
"epoch": 0.8136094674556213,
"eval_class_f1": {
"neg": 0.7824267782426777,
"neu": 0.7750972762645915,
"pos": 0.550531914893617,
"q": 0.1923076923076923
},
"eval_loss": 0.6599770188331604,
"eval_macro_average_f1": 0.5750909154271446,
"eval_micro_average_f1": 0.7358569051580699,
"eval_runtime": 7.4168,
"eval_samples_per_second": 324.129,
"eval_steps_per_second": 20.359,
"step": 1100
},
{
"epoch": 0.8875739644970414,
"eval_class_f1": {
"neg": 0.7531806615776081,
"neu": 0.7700414000752727,
"pos": 0.5685164212910533,
"q": 0.35955056179775274
},
"eval_loss": 0.6348879933357239,
"eval_macro_average_f1": 0.6128222611854217,
"eval_micro_average_f1": 0.721297836938436,
"eval_runtime": 7.4408,
"eval_samples_per_second": 323.084,
"eval_steps_per_second": 20.294,
"step": 1200
},
{
"epoch": 0.9615384615384616,
"eval_class_f1": {
"neg": 0.7920646583394563,
"neu": 0.783076923076923,
"pos": 0.535014005602241,
"q": 0.46616541353383456
},
"eval_loss": 0.6110679507255554,
"eval_macro_average_f1": 0.6440802501381137,
"eval_micro_average_f1": 0.7400166389351082,
"eval_runtime": 7.4775,
"eval_samples_per_second": 321.498,
"eval_steps_per_second": 20.194,
"step": 1300
},
{
"epoch": 1.0355029585798816,
"eval_class_f1": {
"neg": 0.8015094339622642,
"neu": 0.8065934065934067,
"pos": 0.5252225519287834,
"q": 0.379746835443038
},
"eval_loss": 0.6416576504707336,
"eval_macro_average_f1": 0.628268056981873,
"eval_micro_average_f1": 0.7587354409317804,
"eval_runtime": 7.358,
"eval_samples_per_second": 326.721,
"eval_steps_per_second": 20.522,
"step": 1400
},
{
"epoch": 1.1094674556213018,
"grad_norm": 3.8226146697998047,
"learning_rate": 2.72013698630137e-05,
"loss": 0.6084,
"step": 1500
},
{
"epoch": 1.1094674556213018,
"eval_class_f1": {
"neg": 0.7846277021617293,
"neu": 0.8024917552216929,
"pos": 0.5813953488372093,
"q": 0.2857142857142857
},
"eval_loss": 0.6498740315437317,
"eval_macro_average_f1": 0.6135572729837293,
"eval_micro_average_f1": 0.7562396006655574,
"eval_runtime": 7.2975,
"eval_samples_per_second": 329.428,
"eval_steps_per_second": 20.692,
"step": 1500
},
{
"epoch": 1.183431952662722,
"eval_class_f1": {
"neg": 0.7917525773195877,
"neu": 0.7863247863247863,
"pos": 0.5444126074498568,
"q": 0.345679012345679
},
"eval_loss": 0.685055673122406,
"eval_macro_average_f1": 0.6170422458599774,
"eval_micro_average_f1": 0.7454242928452579,
"eval_runtime": 7.1996,
"eval_samples_per_second": 333.908,
"eval_steps_per_second": 20.973,
"step": 1600
},
{
"epoch": 1.2573964497041419,
"eval_class_f1": {
"neg": 0.7966231772831925,
"neu": 0.7783018867924529,
"pos": 0.5695216907675196,
"q": 0.25806451612903225
},
"eval_loss": 0.6685267090797424,
"eval_macro_average_f1": 0.6006278177430493,
"eval_micro_average_f1": 0.7375207986688852,
"eval_runtime": 7.2849,
"eval_samples_per_second": 329.999,
"eval_steps_per_second": 20.728,
"step": 1700
},
{
"epoch": 1.331360946745562,
"eval_class_f1": {
"neg": 0.8018362662586075,
"neu": 0.7977570093457944,
"pos": 0.5578947368421052,
"q": 0.36363636363636365
},
"eval_loss": 0.6347253918647766,
"eval_macro_average_f1": 0.6302810940207177,
"eval_micro_average_f1": 0.7549916805324459,
"eval_runtime": 7.1263,
"eval_samples_per_second": 337.343,
"eval_steps_per_second": 21.189,
"step": 1800
},
{
"epoch": 1.4053254437869822,
"eval_class_f1": {
"neg": 0.7917329093799682,
"neu": 0.7925512104283055,
"pos": 0.5742821473158551,
"q": 0.28125
},
"eval_loss": 0.6284430027008057,
"eval_macro_average_f1": 0.6099540667810323,
"eval_micro_average_f1": 0.7491680532445923,
"eval_runtime": 7.2447,
"eval_samples_per_second": 331.831,
"eval_steps_per_second": 20.843,
"step": 1900
},
{
"epoch": 1.4792899408284024,
"grad_norm": 4.046507835388184,
"learning_rate": 2.5146575342465757e-05,
"loss": 0.5135,
"step": 2000
},
{
"epoch": 1.4792899408284024,
"eval_class_f1": {
"neg": 0.7920792079207921,
"neu": 0.7867370007535796,
"pos": 0.5517241379310345,
"q": 0.27586206896551724
},
"eval_loss": 0.6431812644004822,
"eval_macro_average_f1": 0.601600603892731,
"eval_micro_average_f1": 0.7437603993344426,
"eval_runtime": 7.2356,
"eval_samples_per_second": 332.247,
"eval_steps_per_second": 20.869,
"step": 2000
},
{
"epoch": 1.5532544378698225,
"eval_class_f1": {
"neg": 0.7887550200803214,
"neu": 0.7950581395348836,
"pos": 0.555407209612817,
"q": 0.29032258064516125
},
"eval_loss": 0.6327183842658997,
"eval_macro_average_f1": 0.6073857374682958,
"eval_micro_average_f1": 0.7495840266222962,
"eval_runtime": 7.3333,
"eval_samples_per_second": 327.82,
"eval_steps_per_second": 20.591,
"step": 2100
},
{
"epoch": 1.6272189349112427,
"eval_class_f1": {
"neg": 0.7658119658119659,
"neu": 0.8002847988608045,
"pos": 0.56951871657754,
"q": 0.4197530864197531
},
"eval_loss": 0.6534045338630676,
"eval_macro_average_f1": 0.6388421419175159,
"eval_micro_average_f1": 0.7495840266222962,
"eval_runtime": 7.2578,
"eval_samples_per_second": 331.231,
"eval_steps_per_second": 20.805,
"step": 2200
},
{
"epoch": 1.7011834319526629,
"eval_class_f1": {
"neg": 0.7832369942196531,
"neu": 0.7650099403578529,
"pos": 0.5829268292682928,
"q": 0.35955056179775274
},
"eval_loss": 0.6581071019172668,
"eval_macro_average_f1": 0.6226810814108878,
"eval_micro_average_f1": 0.7316971713810316,
"eval_runtime": 7.1824,
"eval_samples_per_second": 334.708,
"eval_steps_per_second": 21.024,
"step": 2300
},
{
"epoch": 1.7751479289940828,
"eval_class_f1": {
"neg": 0.7951807228915662,
"neu": 0.8109843081312411,
"pos": 0.5441595441595442,
"q": 0.3157894736842105
},
"eval_loss": 0.6206311583518982,
"eval_macro_average_f1": 0.6165285122166405,
"eval_micro_average_f1": 0.762063227953411,
"eval_runtime": 7.2501,
"eval_samples_per_second": 331.583,
"eval_steps_per_second": 20.827,
"step": 2400
},
{
"epoch": 1.849112426035503,
"grad_norm": 6.195135593414307,
"learning_rate": 2.3091780821917807e-05,
"loss": 0.4995,
"step": 2500
},
{
"epoch": 1.849112426035503,
"eval_class_f1": {
"neg": 0.7932148626817447,
"neu": 0.8,
"pos": 0.5830164765525983,
"q": 0.196078431372549
},
"eval_loss": 0.6029447913169861,
"eval_macro_average_f1": 0.5930774426517229,
"eval_micro_average_f1": 0.7562396006655574,
"eval_runtime": 7.1935,
"eval_samples_per_second": 334.192,
"eval_steps_per_second": 20.991,
"step": 2500
},
{
"epoch": 1.9230769230769231,
"eval_class_f1": {
"neg": 0.8059236165237724,
"neu": 0.7899159663865546,
"pos": 0.579415501905972,
"q": 0.36666666666666664
},
"eval_loss": 0.6066814064979553,
"eval_macro_average_f1": 0.6354804378707414,
"eval_micro_average_f1": 0.7491680532445923,
"eval_runtime": 7.2817,
"eval_samples_per_second": 330.143,
"eval_steps_per_second": 20.737,
"step": 2600
},
{
"epoch": 1.997041420118343,
"eval_class_f1": {
"neg": 0.8003157063930545,
"neu": 0.797884397431054,
"pos": 0.5773447015834348,
"q": 0.3835616438356164
},
"eval_loss": 0.630171537399292,
"eval_macro_average_f1": 0.63977661231079,
"eval_micro_average_f1": 0.7545757071547421,
"eval_runtime": 7.2376,
"eval_samples_per_second": 332.156,
"eval_steps_per_second": 20.863,
"step": 2700
},
{
"epoch": 2.0710059171597632,
"eval_class_f1": {
"neg": 0.7848509266720386,
"neu": 0.7945103857566765,
"pos": 0.5853051058530511,
"q": 0.35294117647058826
},
"eval_loss": 0.7064331769943237,
"eval_macro_average_f1": 0.6294018986880886,
"eval_micro_average_f1": 0.7508319467554077,
"eval_runtime": 7.2934,
"eval_samples_per_second": 329.612,
"eval_steps_per_second": 20.704,
"step": 2800
},
{
"epoch": 2.1449704142011834,
"eval_class_f1": {
"neg": 0.797752808988764,
"neu": 0.8026412325752018,
"pos": 0.5824742268041238,
"q": 0.26666666666666666
},
"eval_loss": 0.7201129794120789,
"eval_macro_average_f1": 0.612383733758689,
"eval_micro_average_f1": 0.7591514143094842,
"eval_runtime": 7.1883,
"eval_samples_per_second": 334.433,
"eval_steps_per_second": 21.006,
"step": 2900
},
{
"epoch": 2.2189349112426036,
"grad_norm": 6.065237045288086,
"learning_rate": 2.1036986301369864e-05,
"loss": 0.4003,
"step": 3000
},
{
"epoch": 2.2189349112426036,
"eval_class_f1": {
"neg": 0.7861271676300579,
"neu": 0.800578034682081,
"pos": 0.5670391061452514,
"q": 0.3185840707964602
},
"eval_loss": 0.7178497910499573,
"eval_macro_average_f1": 0.6180820948134627,
"eval_micro_average_f1": 0.7508319467554077,
"eval_runtime": 7.2106,
"eval_samples_per_second": 333.398,
"eval_steps_per_second": 20.941,
"step": 3000
},
{
"epoch": 2.2928994082840237,
"eval_class_f1": {
"neg": 0.7955801104972374,
"neu": 0.781854043392505,
"pos": 0.5852585258525853,
"q": 0.28865979381443296
},
"eval_loss": 0.7727176547050476,
"eval_macro_average_f1": 0.6128381183891901,
"eval_micro_average_f1": 0.7383527454242929,
"eval_runtime": 7.2299,
"eval_samples_per_second": 332.51,
"eval_steps_per_second": 20.886,
"step": 3100
},
{
"epoch": 2.366863905325444,
"eval_class_f1": {
"neg": 0.7893462469733656,
"neu": 0.788983997022702,
"pos": 0.5606060606060607,
"q": 0.2888888888888889
},
"eval_loss": 0.7219040393829346,
"eval_macro_average_f1": 0.6069562983727543,
"eval_micro_average_f1": 0.7420965058236273,
"eval_runtime": 7.2669,
"eval_samples_per_second": 330.815,
"eval_steps_per_second": 20.779,
"step": 3200
},
{
"epoch": 2.440828402366864,
"eval_class_f1": {
"neg": 0.8073115003808072,
"neu": 0.7814829344841114,
"pos": 0.5855338691159586,
"q": 0.26666666666666666
},
"eval_loss": 0.7229210734367371,
"eval_macro_average_f1": 0.610248742661886,
"eval_micro_average_f1": 0.7450083194675541,
"eval_runtime": 7.1283,
"eval_samples_per_second": 337.248,
"eval_steps_per_second": 21.183,
"step": 3300
},
{
"epoch": 2.5147928994082838,
"eval_class_f1": {
"neg": 0.7984790874524715,
"neu": 0.7856049004594182,
"pos": 0.5773447015834348,
"q": 0.3
},
"eval_loss": 0.7037935853004456,
"eval_macro_average_f1": 0.615357172373831,
"eval_micro_average_f1": 0.747504159733777,
"eval_runtime": 7.2219,
"eval_samples_per_second": 332.876,
"eval_steps_per_second": 20.909,
"step": 3400
},
{
"epoch": 2.5887573964497044,
"grad_norm": 3.8475677967071533,
"learning_rate": 1.8982191780821918e-05,
"loss": 0.3579,
"step": 3500
},
{
"epoch": 2.5887573964497044,
"eval_class_f1": {
"neg": 0.7871815940838127,
"neu": 0.7871305649083427,
"pos": 0.5738916256157636,
"q": 0.37735849056603776
},
"eval_loss": 0.7569752931594849,
"eval_macro_average_f1": 0.6313905687934891,
"eval_micro_average_f1": 0.7420965058236273,
"eval_runtime": 7.3391,
"eval_samples_per_second": 327.56,
"eval_steps_per_second": 20.575,
"step": 3500
},
{
"epoch": 2.662721893491124,
"eval_class_f1": {
"neg": 0.8064269319051262,
"neu": 0.7905718701700155,
"pos": 0.5779927448609432,
"q": 0.3255813953488372
},
"eval_loss": 0.7201011180877686,
"eval_macro_average_f1": 0.6251432355712305,
"eval_micro_average_f1": 0.75,
"eval_runtime": 7.2188,
"eval_samples_per_second": 333.02,
"eval_steps_per_second": 20.918,
"step": 3600
},
{
"epoch": 2.7366863905325443,
"eval_class_f1": {
"neg": 0.7847896440129449,
"neu": 0.7701911822083495,
"pos": 0.5797413793103448,
"q": 0.3703703703703704
},
"eval_loss": 0.7302864789962769,
"eval_macro_average_f1": 0.6262731439755023,
"eval_micro_average_f1": 0.7304492512479202,
"eval_runtime": 7.2541,
"eval_samples_per_second": 331.401,
"eval_steps_per_second": 20.816,
"step": 3700
},
{
"epoch": 2.8106508875739644,
"eval_class_f1": {
"neg": 0.7971698113207547,
"neu": 0.8014842300556586,
"pos": 0.5839793281653747,
"q": 0.3283582089552239
},
"eval_loss": 0.7112248539924622,
"eval_macro_average_f1": 0.627747894624253,
"eval_micro_average_f1": 0.7587354409317804,
"eval_runtime": 7.2905,
"eval_samples_per_second": 329.743,
"eval_steps_per_second": 20.712,
"step": 3800
},
{
"epoch": 2.8846153846153846,
"eval_class_f1": {
"neg": 0.7999999999999999,
"neu": 0.7944066515495087,
"pos": 0.5961995249406176,
"q": 0.2545454545454545
},
"eval_loss": 0.7105884552001953,
"eval_macro_average_f1": 0.6112879077588952,
"eval_micro_average_f1": 0.7549916805324459,
"eval_runtime": 7.4167,
"eval_samples_per_second": 324.132,
"eval_steps_per_second": 20.359,
"step": 3900
},
{
"epoch": 2.9585798816568047,
"grad_norm": 8.97050666809082,
"learning_rate": 1.6927397260273975e-05,
"loss": 0.3409,
"step": 4000
},
{
"epoch": 2.9585798816568047,
"eval_class_f1": {
"neg": 0.803088803088803,
"neu": 0.7901328273244782,
"pos": 0.5671641791044775,
"q": 0.3513513513513513
},
"eval_loss": 0.7364293932914734,
"eval_macro_average_f1": 0.6279342902172774,
"eval_micro_average_f1": 0.7495840266222962,
"eval_runtime": 7.2488,
"eval_samples_per_second": 331.641,
"eval_steps_per_second": 20.831,
"step": 4000
},
{
"epoch": 3.032544378698225,
"eval_class_f1": {
"neg": 0.7924836601307189,
"neu": 0.7892777364110202,
"pos": 0.5696969696969698,
"q": 0.3287671232876712
},
"eval_loss": 0.8425710201263428,
"eval_macro_average_f1": 0.6200563723815951,
"eval_micro_average_f1": 0.7454242928452579,
"eval_runtime": 7.1671,
"eval_samples_per_second": 335.422,
"eval_steps_per_second": 21.068,
"step": 4100
},
{
"epoch": 3.106508875739645,
"eval_class_f1": {
"neg": 0.7883817427385893,
"neu": 0.7684537684537684,
"pos": 0.5720338983050848,
"q": 0.35294117647058826
},
"eval_loss": 0.9264113306999207,
"eval_macro_average_f1": 0.6204526464920077,
"eval_micro_average_f1": 0.7275374376039934,
"eval_runtime": 7.2924,
"eval_samples_per_second": 329.656,
"eval_steps_per_second": 20.706,
"step": 4200
},
{
"epoch": 3.1804733727810652,
"eval_class_f1": {
"neg": 0.8064269319051262,
"neu": 0.7787333854573885,
"pos": 0.5774647887323944,
"q": 0.32967032967032966
},
"eval_loss": 0.9222328662872314,
"eval_macro_average_f1": 0.6230738589413097,
"eval_micro_average_f1": 0.7420965058236273,
"eval_runtime": 7.2012,
"eval_samples_per_second": 333.833,
"eval_steps_per_second": 20.969,
"step": 4300
},
{
"epoch": 3.2544378698224854,
"eval_class_f1": {
"neg": 0.7999999999999999,
"neu": 0.7803557617942769,
"pos": 0.5765124555160142,
"q": 0.35955056179775274
},
"eval_loss": 0.9496058821678162,
"eval_macro_average_f1": 0.6291046947770109,
"eval_micro_average_f1": 0.7420965058236273,
"eval_runtime": 7.3334,
"eval_samples_per_second": 327.814,
"eval_steps_per_second": 20.591,
"step": 4400
},
{
"epoch": 3.328402366863905,
"grad_norm": 12.435276985168457,
"learning_rate": 1.4872602739726027e-05,
"loss": 0.2249,
"step": 4500
},
{
"epoch": 3.328402366863905,
"eval_class_f1": {
"neg": 0.8012718600953895,
"neu": 0.784238714613619,
"pos": 0.5663082437275986,
"q": 0.32323232323232326
},
"eval_loss": 0.9026820063591003,
"eval_macro_average_f1": 0.6187627854172325,
"eval_micro_average_f1": 0.7412645590682196,
"eval_runtime": 7.1404,
"eval_samples_per_second": 336.677,
"eval_steps_per_second": 21.147,
"step": 4500
},
{
"epoch": 3.4023668639053253,
"eval_class_f1": {
"neg": 0.8043647700701482,
"neu": 0.7884322678843227,
"pos": 0.5676328502415459,
"q": 0.2898550724637681
},
"eval_loss": 0.943065345287323,
"eval_macro_average_f1": 0.6125712401649462,
"eval_micro_average_f1": 0.747504159733777,
"eval_runtime": 7.2681,
"eval_samples_per_second": 330.759,
"eval_steps_per_second": 20.776,
"step": 4600
},
{
"epoch": 3.4763313609467454,
"eval_class_f1": {
"neg": 0.8018504240555128,
"neu": 0.7930382141505864,
"pos": 0.5692503176620076,
"q": 0.345679012345679
},
"eval_loss": 0.9825762510299683,
"eval_macro_average_f1": 0.6274544920534464,
"eval_micro_average_f1": 0.7512479201331115,
"eval_runtime": 7.2921,
"eval_samples_per_second": 329.672,
"eval_steps_per_second": 20.707,
"step": 4700
},
{
"epoch": 3.5502958579881656,
"eval_class_f1": {
"neg": 0.7946498819826908,
"neu": 0.7813455657492355,
"pos": 0.5795053003533569,
"q": 0.3055555555555555
},
"eval_loss": 0.9374552965164185,
"eval_macro_average_f1": 0.6152640759102097,
"eval_micro_average_f1": 0.7420965058236273,
"eval_runtime": 7.387,
"eval_samples_per_second": 325.436,
"eval_steps_per_second": 20.441,
"step": 4800
},
{
"epoch": 3.6242603550295858,
"eval_class_f1": {
"neg": 0.7984790874524715,
"neu": 0.7785547785547785,
"pos": 0.5748218527315915,
"q": 0.3116883116883117
},
"eval_loss": 0.9656402468681335,
"eval_macro_average_f1": 0.6158860076067884,
"eval_micro_average_f1": 0.7408485856905158,
"eval_runtime": 7.4821,
"eval_samples_per_second": 321.299,
"eval_steps_per_second": 20.181,
"step": 4900
},
{
"epoch": 3.698224852071006,
"grad_norm": 0.6623280644416809,
"learning_rate": 1.2817808219178083e-05,
"loss": 0.2207,
"step": 5000
},
{
"epoch": 3.698224852071006,
"eval_class_f1": {
"neg": 0.7987616099071206,
"neu": 0.7862857142857143,
"pos": 0.5647348951911221,
"q": 0.3
},
"eval_loss": 0.9422620534896851,
"eval_macro_average_f1": 0.6124455548459892,
"eval_micro_average_f1": 0.7441763727121464,
"eval_runtime": 7.2765,
"eval_samples_per_second": 330.379,
"eval_steps_per_second": 20.752,
"step": 5000
},
{
"epoch": 3.772189349112426,
"eval_class_f1": {
"neg": 0.8,
"neu": 0.7671342685370742,
"pos": 0.5714285714285714,
"q": 0.3414634146341463
},
"eval_loss": 0.9625053405761719,
"eval_macro_average_f1": 0.620006563649948,
"eval_micro_average_f1": 0.7296173044925125,
"eval_runtime": 7.3445,
"eval_samples_per_second": 327.318,
"eval_steps_per_second": 20.559,
"step": 5100
},
{
"epoch": 3.8461538461538463,
"eval_class_f1": {
"neg": 0.8018942383583267,
"neu": 0.7968691762951919,
"pos": 0.5706874189364461,
"q": 0.25287356321839083
},
"eval_loss": 0.9822611212730408,
"eval_macro_average_f1": 0.6055810992020889,
"eval_micro_average_f1": 0.7520798668885191,
"eval_runtime": 7.4499,
"eval_samples_per_second": 322.688,
"eval_steps_per_second": 20.269,
"step": 5200
},
{
"epoch": 3.9201183431952664,
"eval_class_f1": {
"neg": 0.7891268533772653,
"neu": 0.7875375375375375,
"pos": 0.5810968494749125,
"q": 0.273972602739726
},
"eval_loss": 0.9442653656005859,
"eval_macro_average_f1": 0.6079334607823603,
"eval_micro_average_f1": 0.7433444259567388,
"eval_runtime": 7.3731,
"eval_samples_per_second": 326.052,
"eval_steps_per_second": 20.48,
"step": 5300
},
{
"epoch": 3.994082840236686,
"eval_class_f1": {
"neg": 0.8046511627906977,
"neu": 0.7945516458569808,
"pos": 0.5775,
"q": 0.32
},
"eval_loss": 0.9429491758346558,
"eval_macro_average_f1": 0.6241757021619195,
"eval_micro_average_f1": 0.7537437603993344,
"eval_runtime": 7.3966,
"eval_samples_per_second": 325.013,
"eval_steps_per_second": 20.415,
"step": 5400
},
{
"epoch": 4.068047337278107,
"grad_norm": 2.4124114513397217,
"learning_rate": 1.0763013698630138e-05,
"loss": 0.2077,
"step": 5500
},
{
"epoch": 4.068047337278107,
"eval_class_f1": {
"neg": 0.8063781321184511,
"neu": 0.7866927592954991,
"pos": 0.5862884160756501,
"q": 0.3333333333333333
},
"eval_loss": 1.1077452898025513,
"eval_macro_average_f1": 0.6281731602057334,
"eval_micro_average_f1": 0.7483361064891847,
"eval_runtime": 7.369,
"eval_samples_per_second": 326.23,
"eval_steps_per_second": 20.491,
"step": 5500
},
{
"epoch": 4.1420118343195265,
"eval_class_f1": {
"neg": 0.7993920972644377,
"neu": 0.7660256410256411,
"pos": 0.5726775956284154,
"q": 0.29629629629629634
},
"eval_loss": 1.1472598314285278,
"eval_macro_average_f1": 0.6085979075536977,
"eval_micro_average_f1": 0.7304492512479202,
"eval_runtime": 7.5033,
"eval_samples_per_second": 320.394,
"eval_steps_per_second": 20.125,
"step": 5600
},
{
"epoch": 4.215976331360947,
"eval_class_f1": {
"neg": 0.8024502297090352,
"neu": 0.7868978805394989,
"pos": 0.5731559854897219,
"q": 0.3
},
"eval_loss": 1.169406533241272,
"eval_macro_average_f1": 0.615626023934564,
"eval_micro_average_f1": 0.7462562396006656,
"eval_runtime": 7.4833,
"eval_samples_per_second": 321.249,
"eval_steps_per_second": 20.178,
"step": 5700
},
{
"epoch": 4.289940828402367,
"eval_class_f1": {
"neg": 0.803951367781155,
"neu": 0.7902550437761706,
"pos": 0.5685019206145967,
"q": 0.30952380952380953
},
"eval_loss": 1.1968339681625366,
"eval_macro_average_f1": 0.6180580354239329,
"eval_micro_average_f1": 0.7495840266222962,
"eval_runtime": 7.4382,
"eval_samples_per_second": 323.197,
"eval_steps_per_second": 20.301,
"step": 5800
},
{
"epoch": 4.363905325443787,
"eval_class_f1": {
"neg": 0.8024786986831913,
"neu": 0.7916030534351146,
"pos": 0.5773955773955775,
"q": 0.3373493975903615
},
"eval_loss": 1.1896393299102783,
"eval_macro_average_f1": 0.6272066817760612,
"eval_micro_average_f1": 0.7504159733777038,
"eval_runtime": 7.423,
"eval_samples_per_second": 323.857,
"eval_steps_per_second": 20.342,
"step": 5900
},
{
"epoch": 4.437869822485207,
"grad_norm": 0.9506312608718872,
"learning_rate": 8.708219178082192e-06,
"loss": 0.1324,
"step": 6000
},
{
"epoch": 4.437869822485207,
"eval_class_f1": {
"neg": 0.8024316109422492,
"neu": 0.7925840092699884,
"pos": 0.5878048780487805,
"q": 0.28915662650602414
},
"eval_loss": 1.2535008192062378,
"eval_macro_average_f1": 0.6179942811917606,
"eval_micro_average_f1": 0.7516638935108153,
"eval_runtime": 7.3808,
"eval_samples_per_second": 325.711,
"eval_steps_per_second": 20.459,
"step": 6000
},
{
"epoch": 4.511834319526627,
"eval_class_f1": {
"neg": 0.7901821060965954,
"neu": 0.7819374758780393,
"pos": 0.5821064552661382,
"q": 0.2535211267605634
},
"eval_loss": 1.2182434797286987,
"eval_macro_average_f1": 0.601936791000334,
"eval_micro_average_f1": 0.7396006655574043,
"eval_runtime": 7.3498,
"eval_samples_per_second": 327.085,
"eval_steps_per_second": 20.545,
"step": 6100
},
{
"epoch": 4.585798816568047,
"eval_class_f1": {
"neg": 0.7945425361155697,
"neu": 0.7956989247311828,
"pos": 0.5761006289308177,
"q": 0.22857142857142854
},
"eval_loss": 1.2836171388626099,
"eval_macro_average_f1": 0.5987283795872497,
"eval_micro_average_f1": 0.7508319467554077,
"eval_runtime": 7.4332,
"eval_samples_per_second": 323.412,
"eval_steps_per_second": 20.314,
"step": 6200
},
{
"epoch": 4.659763313609467,
"eval_class_f1": {
"neg": 0.8003025718608169,
"neu": 0.7733970529669454,
"pos": 0.5691609977324262,
"q": 0.3225806451612903
},
"eval_loss": 1.2842472791671753,
"eval_macro_average_f1": 0.6163603169303697,
"eval_micro_average_f1": 0.7346089850249584,
"eval_runtime": 7.4941,
"eval_samples_per_second": 320.786,
"eval_steps_per_second": 20.149,
"step": 6300
},
{
"epoch": 4.733727810650888,
"eval_class_f1": {
"neg": 0.799375487900078,
"neu": 0.7936865839909809,
"pos": 0.5614489003880984,
"q": 0.3225806451612903
},
"eval_loss": 1.3067219257354736,
"eval_macro_average_f1": 0.619272904360112,
"eval_micro_average_f1": 0.7487520798668885,
"eval_runtime": 7.3805,
"eval_samples_per_second": 325.723,
"eval_steps_per_second": 20.459,
"step": 6400
},
{
"epoch": 4.8076923076923075,
"grad_norm": 0.2732117772102356,
"learning_rate": 6.653424657534246e-06,
"loss": 0.1441,
"step": 6500
},
{
"epoch": 4.8076923076923075,
"eval_class_f1": {
"neg": 0.8027628549501151,
"neu": 0.7751572327044026,
"pos": 0.5694760820045559,
"q": 0.3132530120481927
},
"eval_loss": 1.2718240022659302,
"eval_macro_average_f1": 0.6151622954268166,
"eval_micro_average_f1": 0.7371048252911814,
"eval_runtime": 7.3822,
"eval_samples_per_second": 325.649,
"eval_steps_per_second": 20.455,
"step": 6500
},
{
"epoch": 4.881656804733728,
"eval_class_f1": {
"neg": 0.796875,
"neu": 0.7868601986249045,
"pos": 0.5735115431348725,
"q": 0.29885057471264365
},
"eval_loss": 1.261472225189209,
"eval_macro_average_f1": 0.6140243291181051,
"eval_micro_average_f1": 0.7441763727121464,
"eval_runtime": 7.5114,
"eval_samples_per_second": 320.048,
"eval_steps_per_second": 20.103,
"step": 6600
},
{
"epoch": 4.955621301775148,
"eval_class_f1": {
"neg": 0.7930763178599529,
"neu": 0.7766536964980545,
"pos": 0.5714285714285715,
"q": 0.35294117647058826
},
"eval_loss": 1.2753080129623413,
"eval_macro_average_f1": 0.6235249405642919,
"eval_micro_average_f1": 0.7358569051580699,
"eval_runtime": 7.3656,
"eval_samples_per_second": 326.381,
"eval_steps_per_second": 20.501,
"step": 6700
},
{
"epoch": 5.029585798816568,
"eval_class_f1": {
"neg": 0.7962962962962963,
"neu": 0.7754943776657619,
"pos": 0.5657276995305165,
"q": 0.345679012345679
},
"eval_loss": 1.3079357147216797,
"eval_macro_average_f1": 0.6207993464595634,
"eval_micro_average_f1": 0.7366888519134775,
"eval_runtime": 7.4026,
"eval_samples_per_second": 324.753,
"eval_steps_per_second": 20.398,
"step": 6800
},
{
"epoch": 5.103550295857988,
"eval_class_f1": {
"neg": 0.7972136222910216,
"neu": 0.7786790266512167,
"pos": 0.5721040189125295,
"q": 0.3703703703703704
},
"eval_loss": 1.3499900102615356,
"eval_macro_average_f1": 0.6295917595562845,
"eval_micro_average_f1": 0.740432612312812,
"eval_runtime": 7.3777,
"eval_samples_per_second": 325.846,
"eval_steps_per_second": 20.467,
"step": 6900
},
{
"epoch": 5.177514792899408,
"grad_norm": 11.024497985839844,
"learning_rate": 4.598630136986302e-06,
"loss": 0.1111,
"step": 7000
},
{
"epoch": 5.177514792899408,
"eval_class_f1": {
"neg": 0.7956147220046985,
"neu": 0.7807853602744949,
"pos": 0.5693606755126658,
"q": 0.3544303797468354
},
"eval_loss": 1.4051584005355835,
"eval_macro_average_f1": 0.6250477843846737,
"eval_micro_average_f1": 0.7412645590682196,
"eval_runtime": 7.2531,
"eval_samples_per_second": 331.444,
"eval_steps_per_second": 20.819,
"step": 7000
},
{
"epoch": 5.2514792899408285,
"eval_class_f1": {
"neg": 0.7925407925407926,
"neu": 0.7769230769230769,
"pos": 0.5737898465171192,
"q": 0.27027027027027023
},
"eval_loss": 1.4020917415618896,
"eval_macro_average_f1": 0.6033809965628147,
"eval_micro_average_f1": 0.7375207986688852,
"eval_runtime": 7.404,
"eval_samples_per_second": 324.69,
"eval_steps_per_second": 20.394,
"step": 7100
},
{
"epoch": 5.325443786982248,
"eval_class_f1": {
"neg": 0.7949326999208235,
"neu": 0.7753846153846154,
"pos": 0.5727482678983833,
"q": 0.27848101265822783
},
"eval_loss": 1.4238033294677734,
"eval_macro_average_f1": 0.6053866489655125,
"eval_micro_average_f1": 0.7358569051580699,
"eval_runtime": 7.4328,
"eval_samples_per_second": 323.433,
"eval_steps_per_second": 20.315,
"step": 7200
},
{
"epoch": 5.399408284023669,
"eval_class_f1": {
"neg": 0.7969348659003831,
"neu": 0.7798306389530408,
"pos": 0.5721212121212121,
"q": 0.3
},
"eval_loss": 1.4431192874908447,
"eval_macro_average_f1": 0.612221679243659,
"eval_micro_average_f1": 0.7408485856905158,
"eval_runtime": 7.3682,
"eval_samples_per_second": 326.266,
"eval_steps_per_second": 20.493,
"step": 7300
},
{
"epoch": 5.4733727810650885,
"eval_class_f1": {
"neg": 0.7940717628705148,
"neu": 0.783072817384674,
"pos": 0.5703883495145632,
"q": 0.3037974683544304
},
"eval_loss": 1.4316595792770386,
"eval_macro_average_f1": 0.6128325995310456,
"eval_micro_average_f1": 0.7416805324459235,
"eval_runtime": 7.3736,
"eval_samples_per_second": 326.03,
"eval_steps_per_second": 20.479,
"step": 7400
},
{
"epoch": 5.547337278106509,
"grad_norm": 0.4265735149383545,
"learning_rate": 2.543835616438356e-06,
"loss": 0.0933,
"step": 7500
},
{
"epoch": 5.547337278106509,
"eval_class_f1": {
"neg": 0.7978311386522074,
"neu": 0.7788089713843775,
"pos": 0.567409144196952,
"q": 0.30769230769230765
},
"eval_loss": 1.4399964809417725,
"eval_macro_average_f1": 0.6129353904814612,
"eval_micro_average_f1": 0.7387687188019967,
"eval_runtime": 7.2697,
"eval_samples_per_second": 330.689,
"eval_steps_per_second": 20.771,
"step": 7500
},
{
"epoch": 5.621301775147929,
"eval_class_f1": {
"neg": 0.7984375,
"neu": 0.7812379853902346,
"pos": 0.5714285714285714,
"q": 0.32500000000000007
},
"eval_loss": 1.4240373373031616,
"eval_macro_average_f1": 0.6190260142047015,
"eval_micro_average_f1": 0.7412645590682196,
"eval_runtime": 7.4341,
"eval_samples_per_second": 323.375,
"eval_steps_per_second": 20.312,
"step": 7600
},
{
"epoch": 5.695266272189349,
"eval_class_f1": {
"neg": 0.7987470634299139,
"neu": 0.7843286420692278,
"pos": 0.5703883495145632,
"q": 0.30769230769230765
},
"eval_loss": 1.4332064390182495,
"eval_macro_average_f1": 0.6152890906765031,
"eval_micro_average_f1": 0.7437603993344426,
"eval_runtime": 7.4434,
"eval_samples_per_second": 322.969,
"eval_steps_per_second": 20.286,
"step": 7700
},
{
"epoch": 5.769230769230769,
"eval_class_f1": {
"neg": 0.7981220657276996,
"neu": 0.781874039938556,
"pos": 0.5731132075471698,
"q": 0.30769230769230765
},
"eval_loss": 1.4344979524612427,
"eval_macro_average_f1": 0.6152004052264332,
"eval_micro_average_f1": 0.7416805324459235,
"eval_runtime": 7.3808,
"eval_samples_per_second": 325.708,
"eval_steps_per_second": 20.458,
"step": 7800
},
{
"epoch": 5.84319526627219,
"eval_class_f1": {
"neg": 0.7990654205607477,
"neu": 0.7815384615384616,
"pos": 0.5724465558194775,
"q": 0.3414634146341463
},
"eval_loss": 1.4412455558776855,
"eval_macro_average_f1": 0.6236284631382082,
"eval_micro_average_f1": 0.7420965058236273,
"eval_runtime": 7.3915,
"eval_samples_per_second": 325.237,
"eval_steps_per_second": 20.429,
"step": 7900
},
{
"epoch": 5.9171597633136095,
"grad_norm": 16.41318702697754,
"learning_rate": 4.89041095890411e-07,
"loss": 0.1006,
"step": 8000
},
{
"epoch": 5.9171597633136095,
"eval_class_f1": {
"neg": 0.7987519500780033,
"neu": 0.7813098429720413,
"pos": 0.5700598802395208,
"q": 0.32500000000000007
},
"eval_loss": 1.4469937086105347,
"eval_macro_average_f1": 0.6187804183223914,
"eval_micro_average_f1": 0.7416805324459235,
"eval_runtime": 7.3689,
"eval_samples_per_second": 326.236,
"eval_steps_per_second": 20.492,
"step": 8000
},
{
"epoch": 5.991124260355029,
"eval_class_f1": {
"neg": 0.7990654205607477,
"neu": 0.781441717791411,
"pos": 0.569377990430622,
"q": 0.32500000000000007
},
"eval_loss": 1.4454258680343628,
"eval_macro_average_f1": 0.6187212821956952,
"eval_micro_average_f1": 0.7416805324459235,
"eval_runtime": 7.448,
"eval_samples_per_second": 322.77,
"eval_steps_per_second": 20.274,
"step": 8100
}
],
"logging_steps": 500,
"max_steps": 8112,
"num_input_tokens_seen": 0,
"num_train_epochs": 6,
"save_steps": 100,
"total_flos": 1.0485727069042368e+16,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}