QingyiSi's picture
Upload 1268 files
4697198
{
"best_metric": 0.6676326990127563,
"best_model_checkpoint": "/mnt/bn/qingyi-bn-lq/llama/saved-alpaca-belle30b/checkpoint-13400",
"epoch": 2.8910463861920173,
"global_step": 13400,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 5.9999999999999995e-05,
"loss": 1.6143,
"step": 20
},
{
"epoch": 0.01,
"learning_rate": 0.00011999999999999999,
"loss": 1.2447,
"step": 40
},
{
"epoch": 0.01,
"learning_rate": 0.00017699999999999997,
"loss": 0.9529,
"step": 60
},
{
"epoch": 0.02,
"learning_rate": 0.000237,
"loss": 0.8899,
"step": 80
},
{
"epoch": 0.02,
"learning_rate": 0.00029699999999999996,
"loss": 0.8614,
"step": 100
},
{
"epoch": 0.03,
"learning_rate": 0.00029958710612097066,
"loss": 0.8402,
"step": 120
},
{
"epoch": 0.03,
"learning_rate": 0.00029915248098515027,
"loss": 0.8335,
"step": 140
},
{
"epoch": 0.03,
"learning_rate": 0.00029871785584932993,
"loss": 0.8303,
"step": 160
},
{
"epoch": 0.04,
"learning_rate": 0.0002982832307135096,
"loss": 0.8261,
"step": 180
},
{
"epoch": 0.04,
"learning_rate": 0.0002978486055776892,
"loss": 0.807,
"step": 200
},
{
"epoch": 0.04,
"eval_loss": 0.8271128535270691,
"eval_runtime": 49.877,
"eval_samples_per_second": 40.099,
"eval_steps_per_second": 0.642,
"step": 200
},
{
"epoch": 0.05,
"learning_rate": 0.00029741398044186887,
"loss": 0.808,
"step": 220
},
{
"epoch": 0.05,
"learning_rate": 0.00029697935530604853,
"loss": 0.8092,
"step": 240
},
{
"epoch": 0.06,
"learning_rate": 0.00029654473017022814,
"loss": 0.8045,
"step": 260
},
{
"epoch": 0.06,
"learning_rate": 0.0002961101050344078,
"loss": 0.8007,
"step": 280
},
{
"epoch": 0.06,
"learning_rate": 0.00029567547989858747,
"loss": 0.793,
"step": 300
},
{
"epoch": 0.07,
"learning_rate": 0.0002952408547627671,
"loss": 0.7886,
"step": 320
},
{
"epoch": 0.07,
"learning_rate": 0.00029480622962694674,
"loss": 0.7854,
"step": 340
},
{
"epoch": 0.08,
"learning_rate": 0.0002943716044911264,
"loss": 0.783,
"step": 360
},
{
"epoch": 0.08,
"learning_rate": 0.000293936979355306,
"loss": 0.7797,
"step": 380
},
{
"epoch": 0.09,
"learning_rate": 0.0002935023542194857,
"loss": 0.7801,
"step": 400
},
{
"epoch": 0.09,
"eval_loss": 0.793747067451477,
"eval_runtime": 49.8962,
"eval_samples_per_second": 40.083,
"eval_steps_per_second": 0.641,
"step": 400
},
{
"epoch": 0.09,
"learning_rate": 0.00029306772908366534,
"loss": 0.7879,
"step": 420
},
{
"epoch": 0.09,
"learning_rate": 0.00029263310394784495,
"loss": 0.7745,
"step": 440
},
{
"epoch": 0.1,
"learning_rate": 0.0002921984788120246,
"loss": 0.7725,
"step": 460
},
{
"epoch": 0.1,
"learning_rate": 0.0002917638536762043,
"loss": 0.7659,
"step": 480
},
{
"epoch": 0.11,
"learning_rate": 0.0002913292285403839,
"loss": 0.7658,
"step": 500
},
{
"epoch": 0.11,
"learning_rate": 0.00029089460340456355,
"loss": 0.7722,
"step": 520
},
{
"epoch": 0.12,
"learning_rate": 0.0002904599782687432,
"loss": 0.773,
"step": 540
},
{
"epoch": 0.12,
"learning_rate": 0.0002900253531329228,
"loss": 0.7749,
"step": 560
},
{
"epoch": 0.13,
"learning_rate": 0.0002895907279971025,
"loss": 0.7734,
"step": 580
},
{
"epoch": 0.13,
"learning_rate": 0.00028915610286128215,
"loss": 0.7607,
"step": 600
},
{
"epoch": 0.13,
"eval_loss": 0.7771433591842651,
"eval_runtime": 49.9486,
"eval_samples_per_second": 40.041,
"eval_steps_per_second": 0.641,
"step": 600
},
{
"epoch": 0.13,
"learning_rate": 0.00028872147772546176,
"loss": 0.7657,
"step": 620
},
{
"epoch": 0.14,
"learning_rate": 0.00028828685258964137,
"loss": 0.7602,
"step": 640
},
{
"epoch": 0.14,
"learning_rate": 0.00028785222745382103,
"loss": 0.7619,
"step": 660
},
{
"epoch": 0.15,
"learning_rate": 0.0002874176023180007,
"loss": 0.7587,
"step": 680
},
{
"epoch": 0.15,
"learning_rate": 0.0002869829771821803,
"loss": 0.7553,
"step": 700
},
{
"epoch": 0.16,
"learning_rate": 0.00028654835204635997,
"loss": 0.7565,
"step": 720
},
{
"epoch": 0.16,
"learning_rate": 0.00028611372691053963,
"loss": 0.7586,
"step": 740
},
{
"epoch": 0.16,
"learning_rate": 0.00028567910177471924,
"loss": 0.7556,
"step": 760
},
{
"epoch": 0.17,
"learning_rate": 0.0002852444766388989,
"loss": 0.7487,
"step": 780
},
{
"epoch": 0.17,
"learning_rate": 0.00028480985150307857,
"loss": 0.7516,
"step": 800
},
{
"epoch": 0.17,
"eval_loss": 0.7632888555526733,
"eval_runtime": 49.913,
"eval_samples_per_second": 40.07,
"eval_steps_per_second": 0.641,
"step": 800
},
{
"epoch": 0.18,
"learning_rate": 0.0002843752263672582,
"loss": 0.7527,
"step": 820
},
{
"epoch": 0.18,
"learning_rate": 0.00028394060123143784,
"loss": 0.7407,
"step": 840
},
{
"epoch": 0.19,
"learning_rate": 0.0002835059760956175,
"loss": 0.744,
"step": 860
},
{
"epoch": 0.19,
"learning_rate": 0.0002830713509597971,
"loss": 0.7456,
"step": 880
},
{
"epoch": 0.19,
"learning_rate": 0.0002826367258239768,
"loss": 0.7429,
"step": 900
},
{
"epoch": 0.2,
"learning_rate": 0.00028220210068815644,
"loss": 0.7516,
"step": 920
},
{
"epoch": 0.2,
"learning_rate": 0.00028176747555233605,
"loss": 0.7381,
"step": 940
},
{
"epoch": 0.21,
"learning_rate": 0.0002813328504165157,
"loss": 0.7256,
"step": 960
},
{
"epoch": 0.21,
"learning_rate": 0.0002808982252806954,
"loss": 0.7443,
"step": 980
},
{
"epoch": 0.22,
"learning_rate": 0.000280463600144875,
"loss": 0.7389,
"step": 1000
},
{
"epoch": 0.22,
"eval_loss": 0.7532852292060852,
"eval_runtime": 49.9829,
"eval_samples_per_second": 40.014,
"eval_steps_per_second": 0.64,
"step": 1000
},
{
"epoch": 0.22,
"learning_rate": 0.00028002897500905465,
"loss": 0.7374,
"step": 1020
},
{
"epoch": 0.22,
"learning_rate": 0.0002795943498732343,
"loss": 0.7296,
"step": 1040
},
{
"epoch": 0.23,
"learning_rate": 0.0002791597247374139,
"loss": 0.7424,
"step": 1060
},
{
"epoch": 0.23,
"learning_rate": 0.0002787250996015936,
"loss": 0.7328,
"step": 1080
},
{
"epoch": 0.24,
"learning_rate": 0.00027829047446577325,
"loss": 0.7367,
"step": 1100
},
{
"epoch": 0.24,
"learning_rate": 0.00027785584932995286,
"loss": 0.7419,
"step": 1120
},
{
"epoch": 0.25,
"learning_rate": 0.0002774212241941325,
"loss": 0.7347,
"step": 1140
},
{
"epoch": 0.25,
"learning_rate": 0.0002769865990583122,
"loss": 0.7292,
"step": 1160
},
{
"epoch": 0.25,
"learning_rate": 0.0002765519739224918,
"loss": 0.7394,
"step": 1180
},
{
"epoch": 0.26,
"learning_rate": 0.00027611734878667146,
"loss": 0.7358,
"step": 1200
},
{
"epoch": 0.26,
"eval_loss": 0.7463639974594116,
"eval_runtime": 49.9963,
"eval_samples_per_second": 40.003,
"eval_steps_per_second": 0.64,
"step": 1200
},
{
"epoch": 0.26,
"learning_rate": 0.0002756827236508511,
"loss": 0.7266,
"step": 1220
},
{
"epoch": 0.27,
"learning_rate": 0.00027524809851503073,
"loss": 0.7336,
"step": 1240
},
{
"epoch": 0.27,
"learning_rate": 0.0002748134733792104,
"loss": 0.7296,
"step": 1260
},
{
"epoch": 0.28,
"learning_rate": 0.00027437884824339006,
"loss": 0.73,
"step": 1280
},
{
"epoch": 0.28,
"learning_rate": 0.00027394422310756967,
"loss": 0.7312,
"step": 1300
},
{
"epoch": 0.28,
"learning_rate": 0.00027350959797174933,
"loss": 0.7307,
"step": 1320
},
{
"epoch": 0.29,
"learning_rate": 0.000273074972835929,
"loss": 0.7246,
"step": 1340
},
{
"epoch": 0.29,
"learning_rate": 0.0002726403477001086,
"loss": 0.7299,
"step": 1360
},
{
"epoch": 0.3,
"learning_rate": 0.00027220572256428827,
"loss": 0.7251,
"step": 1380
},
{
"epoch": 0.3,
"learning_rate": 0.00027177109742846793,
"loss": 0.7286,
"step": 1400
},
{
"epoch": 0.3,
"eval_loss": 0.7393819093704224,
"eval_runtime": 49.9896,
"eval_samples_per_second": 40.008,
"eval_steps_per_second": 0.64,
"step": 1400
},
{
"epoch": 0.31,
"learning_rate": 0.00027133647229264754,
"loss": 0.7186,
"step": 1420
},
{
"epoch": 0.31,
"learning_rate": 0.0002709018471568272,
"loss": 0.7215,
"step": 1440
},
{
"epoch": 0.31,
"learning_rate": 0.00027046722202100687,
"loss": 0.7295,
"step": 1460
},
{
"epoch": 0.32,
"learning_rate": 0.0002700325968851865,
"loss": 0.7198,
"step": 1480
},
{
"epoch": 0.32,
"learning_rate": 0.00026959797174936614,
"loss": 0.7184,
"step": 1500
},
{
"epoch": 0.33,
"learning_rate": 0.0002691633466135458,
"loss": 0.7283,
"step": 1520
},
{
"epoch": 0.33,
"learning_rate": 0.0002687287214777254,
"loss": 0.7378,
"step": 1540
},
{
"epoch": 0.34,
"learning_rate": 0.0002682940963419051,
"loss": 0.7196,
"step": 1560
},
{
"epoch": 0.34,
"learning_rate": 0.00026785947120608474,
"loss": 0.7152,
"step": 1580
},
{
"epoch": 0.35,
"learning_rate": 0.00026742484607026435,
"loss": 0.7184,
"step": 1600
},
{
"epoch": 0.35,
"eval_loss": 0.7342154383659363,
"eval_runtime": 49.9957,
"eval_samples_per_second": 40.003,
"eval_steps_per_second": 0.64,
"step": 1600
},
{
"epoch": 0.35,
"learning_rate": 0.000266990220934444,
"loss": 0.7164,
"step": 1620
},
{
"epoch": 0.35,
"learning_rate": 0.0002665555957986237,
"loss": 0.7136,
"step": 1640
},
{
"epoch": 0.36,
"learning_rate": 0.0002661209706628033,
"loss": 0.7203,
"step": 1660
},
{
"epoch": 0.36,
"learning_rate": 0.00026568634552698295,
"loss": 0.7158,
"step": 1680
},
{
"epoch": 0.37,
"learning_rate": 0.0002652517203911626,
"loss": 0.7145,
"step": 1700
},
{
"epoch": 0.37,
"learning_rate": 0.0002648170952553422,
"loss": 0.7111,
"step": 1720
},
{
"epoch": 0.38,
"learning_rate": 0.0002643824701195219,
"loss": 0.7155,
"step": 1740
},
{
"epoch": 0.38,
"learning_rate": 0.00026394784498370155,
"loss": 0.718,
"step": 1760
},
{
"epoch": 0.38,
"learning_rate": 0.00026351321984788116,
"loss": 0.7125,
"step": 1780
},
{
"epoch": 0.39,
"learning_rate": 0.0002630785947120608,
"loss": 0.7163,
"step": 1800
},
{
"epoch": 0.39,
"eval_loss": 0.7301950454711914,
"eval_runtime": 49.9689,
"eval_samples_per_second": 40.025,
"eval_steps_per_second": 0.64,
"step": 1800
},
{
"epoch": 0.39,
"learning_rate": 0.0002626439695762405,
"loss": 0.7121,
"step": 1820
},
{
"epoch": 0.4,
"learning_rate": 0.0002622093444404201,
"loss": 0.7092,
"step": 1840
},
{
"epoch": 0.4,
"learning_rate": 0.00026177471930459976,
"loss": 0.7133,
"step": 1860
},
{
"epoch": 0.41,
"learning_rate": 0.0002613400941687794,
"loss": 0.7171,
"step": 1880
},
{
"epoch": 0.41,
"learning_rate": 0.00026090546903295903,
"loss": 0.7235,
"step": 1900
},
{
"epoch": 0.41,
"learning_rate": 0.0002604708438971387,
"loss": 0.7086,
"step": 1920
},
{
"epoch": 0.42,
"learning_rate": 0.00026003621876131836,
"loss": 0.7136,
"step": 1940
},
{
"epoch": 0.42,
"learning_rate": 0.00025960159362549797,
"loss": 0.7031,
"step": 1960
},
{
"epoch": 0.43,
"learning_rate": 0.00025916696848967763,
"loss": 0.7084,
"step": 1980
},
{
"epoch": 0.43,
"learning_rate": 0.0002587323433538573,
"loss": 0.7091,
"step": 2000
},
{
"epoch": 0.43,
"eval_loss": 0.726446270942688,
"eval_runtime": 50.0519,
"eval_samples_per_second": 39.959,
"eval_steps_per_second": 0.639,
"step": 2000
},
{
"epoch": 0.44,
"learning_rate": 0.0002582977182180369,
"loss": 0.7119,
"step": 2020
},
{
"epoch": 0.44,
"learning_rate": 0.00025786309308221657,
"loss": 0.7186,
"step": 2040
},
{
"epoch": 0.44,
"learning_rate": 0.00025742846794639623,
"loss": 0.703,
"step": 2060
},
{
"epoch": 0.45,
"learning_rate": 0.00025699384281057584,
"loss": 0.7078,
"step": 2080
},
{
"epoch": 0.45,
"learning_rate": 0.0002565592176747555,
"loss": 0.7084,
"step": 2100
},
{
"epoch": 0.46,
"learning_rate": 0.00025612459253893517,
"loss": 0.7014,
"step": 2120
},
{
"epoch": 0.46,
"learning_rate": 0.0002556899674031148,
"loss": 0.7076,
"step": 2140
},
{
"epoch": 0.47,
"learning_rate": 0.00025525534226729444,
"loss": 0.7103,
"step": 2160
},
{
"epoch": 0.47,
"learning_rate": 0.0002548207171314741,
"loss": 0.7118,
"step": 2180
},
{
"epoch": 0.47,
"learning_rate": 0.0002543860919956537,
"loss": 0.7028,
"step": 2200
},
{
"epoch": 0.47,
"eval_loss": 0.7220268845558167,
"eval_runtime": 49.9937,
"eval_samples_per_second": 40.005,
"eval_steps_per_second": 0.64,
"step": 2200
},
{
"epoch": 0.48,
"learning_rate": 0.0002539514668598334,
"loss": 0.707,
"step": 2220
},
{
"epoch": 0.48,
"learning_rate": 0.00025351684172401304,
"loss": 0.7045,
"step": 2240
},
{
"epoch": 0.49,
"learning_rate": 0.00025308221658819265,
"loss": 0.6905,
"step": 2260
},
{
"epoch": 0.49,
"learning_rate": 0.0002526475914523723,
"loss": 0.6982,
"step": 2280
},
{
"epoch": 0.5,
"learning_rate": 0.000252212966316552,
"loss": 0.706,
"step": 2300
},
{
"epoch": 0.5,
"learning_rate": 0.0002517783411807316,
"loss": 0.6992,
"step": 2320
},
{
"epoch": 0.5,
"learning_rate": 0.00025134371604491125,
"loss": 0.6939,
"step": 2340
},
{
"epoch": 0.51,
"learning_rate": 0.00025090909090909086,
"loss": 0.7037,
"step": 2360
},
{
"epoch": 0.51,
"learning_rate": 0.0002504744657732705,
"loss": 0.7127,
"step": 2380
},
{
"epoch": 0.52,
"learning_rate": 0.00025003984063745014,
"loss": 0.702,
"step": 2400
},
{
"epoch": 0.52,
"eval_loss": 0.7191869020462036,
"eval_runtime": 50.0038,
"eval_samples_per_second": 39.997,
"eval_steps_per_second": 0.64,
"step": 2400
},
{
"epoch": 0.52,
"learning_rate": 0.0002496052155016298,
"loss": 0.7033,
"step": 2420
},
{
"epoch": 0.53,
"learning_rate": 0.00024917059036580946,
"loss": 0.7028,
"step": 2440
},
{
"epoch": 0.53,
"learning_rate": 0.00024873596522998907,
"loss": 0.6967,
"step": 2460
},
{
"epoch": 0.54,
"learning_rate": 0.00024830134009416874,
"loss": 0.7068,
"step": 2480
},
{
"epoch": 0.54,
"learning_rate": 0.0002478667149583484,
"loss": 0.7105,
"step": 2500
},
{
"epoch": 0.54,
"learning_rate": 0.000247432089822528,
"loss": 0.6968,
"step": 2520
},
{
"epoch": 0.55,
"learning_rate": 0.00024699746468670767,
"loss": 0.7025,
"step": 2540
},
{
"epoch": 0.55,
"learning_rate": 0.00024656283955088734,
"loss": 0.6942,
"step": 2560
},
{
"epoch": 0.56,
"learning_rate": 0.00024612821441506694,
"loss": 0.6948,
"step": 2580
},
{
"epoch": 0.56,
"learning_rate": 0.0002456935892792466,
"loss": 0.6979,
"step": 2600
},
{
"epoch": 0.56,
"eval_loss": 0.715853750705719,
"eval_runtime": 50.0426,
"eval_samples_per_second": 39.966,
"eval_steps_per_second": 0.639,
"step": 2600
},
{
"epoch": 0.57,
"learning_rate": 0.00024525896414342627,
"loss": 0.6967,
"step": 2620
},
{
"epoch": 0.57,
"learning_rate": 0.0002448243390076059,
"loss": 0.7012,
"step": 2640
},
{
"epoch": 0.57,
"learning_rate": 0.00024438971387178554,
"loss": 0.697,
"step": 2660
},
{
"epoch": 0.58,
"learning_rate": 0.0002439550887359652,
"loss": 0.6931,
"step": 2680
},
{
"epoch": 0.58,
"learning_rate": 0.00024352046360014485,
"loss": 0.6856,
"step": 2700
},
{
"epoch": 0.59,
"learning_rate": 0.00024308583846432448,
"loss": 0.697,
"step": 2720
},
{
"epoch": 0.59,
"learning_rate": 0.00024265121332850415,
"loss": 0.6996,
"step": 2740
},
{
"epoch": 0.6,
"learning_rate": 0.00024221658819268378,
"loss": 0.698,
"step": 2760
},
{
"epoch": 0.6,
"learning_rate": 0.00024178196305686342,
"loss": 0.6952,
"step": 2780
},
{
"epoch": 0.6,
"learning_rate": 0.00024134733792104308,
"loss": 0.7049,
"step": 2800
},
{
"epoch": 0.6,
"eval_loss": 0.7124837040901184,
"eval_runtime": 50.0654,
"eval_samples_per_second": 39.948,
"eval_steps_per_second": 0.639,
"step": 2800
},
{
"epoch": 0.61,
"learning_rate": 0.00024091271278522272,
"loss": 0.6927,
"step": 2820
},
{
"epoch": 0.61,
"learning_rate": 0.00024047808764940235,
"loss": 0.6996,
"step": 2840
},
{
"epoch": 0.62,
"learning_rate": 0.00024004346251358202,
"loss": 0.6921,
"step": 2860
},
{
"epoch": 0.62,
"learning_rate": 0.00023960883737776165,
"loss": 0.695,
"step": 2880
},
{
"epoch": 0.63,
"learning_rate": 0.0002391742122419413,
"loss": 0.6887,
"step": 2900
},
{
"epoch": 0.63,
"learning_rate": 0.00023873958710612095,
"loss": 0.6915,
"step": 2920
},
{
"epoch": 0.63,
"learning_rate": 0.0002383049619703006,
"loss": 0.6915,
"step": 2940
},
{
"epoch": 0.64,
"learning_rate": 0.00023787033683448023,
"loss": 0.6916,
"step": 2960
},
{
"epoch": 0.64,
"learning_rate": 0.0002374357116986599,
"loss": 0.687,
"step": 2980
},
{
"epoch": 0.65,
"learning_rate": 0.00023700108656283953,
"loss": 0.6997,
"step": 3000
},
{
"epoch": 0.65,
"eval_loss": 0.7098860144615173,
"eval_runtime": 50.0652,
"eval_samples_per_second": 39.948,
"eval_steps_per_second": 0.639,
"step": 3000
},
{
"epoch": 0.65,
"learning_rate": 0.00023656646142701916,
"loss": 0.6895,
"step": 3020
},
{
"epoch": 0.66,
"learning_rate": 0.00023613183629119883,
"loss": 0.6861,
"step": 3040
},
{
"epoch": 0.66,
"learning_rate": 0.00023569721115537846,
"loss": 0.6988,
"step": 3060
},
{
"epoch": 0.66,
"learning_rate": 0.0002352625860195581,
"loss": 0.6852,
"step": 3080
},
{
"epoch": 0.67,
"learning_rate": 0.00023482796088373776,
"loss": 0.6863,
"step": 3100
},
{
"epoch": 0.67,
"learning_rate": 0.0002343933357479174,
"loss": 0.6943,
"step": 3120
},
{
"epoch": 0.68,
"learning_rate": 0.00023395871061209704,
"loss": 0.686,
"step": 3140
},
{
"epoch": 0.68,
"learning_rate": 0.0002335240854762767,
"loss": 0.684,
"step": 3160
},
{
"epoch": 0.69,
"learning_rate": 0.00023308946034045634,
"loss": 0.6866,
"step": 3180
},
{
"epoch": 0.69,
"learning_rate": 0.00023265483520463597,
"loss": 0.6859,
"step": 3200
},
{
"epoch": 0.69,
"eval_loss": 0.7077216506004333,
"eval_runtime": 50.0526,
"eval_samples_per_second": 39.958,
"eval_steps_per_second": 0.639,
"step": 3200
},
{
"epoch": 0.69,
"learning_rate": 0.00023222021006881564,
"loss": 0.6845,
"step": 3220
},
{
"epoch": 0.7,
"learning_rate": 0.00023178558493299527,
"loss": 0.7011,
"step": 3240
},
{
"epoch": 0.7,
"learning_rate": 0.0002313509597971749,
"loss": 0.69,
"step": 3260
},
{
"epoch": 0.71,
"learning_rate": 0.00023091633466135457,
"loss": 0.6931,
"step": 3280
},
{
"epoch": 0.71,
"learning_rate": 0.0002304817095255342,
"loss": 0.6998,
"step": 3300
},
{
"epoch": 0.72,
"learning_rate": 0.00023004708438971385,
"loss": 0.6933,
"step": 3320
},
{
"epoch": 0.72,
"learning_rate": 0.0002296124592538935,
"loss": 0.6859,
"step": 3340
},
{
"epoch": 0.72,
"learning_rate": 0.00022917783411807315,
"loss": 0.6972,
"step": 3360
},
{
"epoch": 0.73,
"learning_rate": 0.00022874320898225278,
"loss": 0.6868,
"step": 3380
},
{
"epoch": 0.73,
"learning_rate": 0.00022830858384643245,
"loss": 0.6902,
"step": 3400
},
{
"epoch": 0.73,
"eval_loss": 0.7059928178787231,
"eval_runtime": 50.0118,
"eval_samples_per_second": 39.991,
"eval_steps_per_second": 0.64,
"step": 3400
},
{
"epoch": 0.74,
"learning_rate": 0.00022787395871061208,
"loss": 0.6819,
"step": 3420
},
{
"epoch": 0.74,
"learning_rate": 0.00022743933357479172,
"loss": 0.6833,
"step": 3440
},
{
"epoch": 0.75,
"learning_rate": 0.00022700470843897138,
"loss": 0.6826,
"step": 3460
},
{
"epoch": 0.75,
"learning_rate": 0.00022657008330315102,
"loss": 0.694,
"step": 3480
},
{
"epoch": 0.76,
"learning_rate": 0.00022613545816733066,
"loss": 0.6827,
"step": 3500
},
{
"epoch": 0.76,
"learning_rate": 0.00022570083303151032,
"loss": 0.6844,
"step": 3520
},
{
"epoch": 0.76,
"learning_rate": 0.00022526620789568996,
"loss": 0.6893,
"step": 3540
},
{
"epoch": 0.77,
"learning_rate": 0.0002248315827598696,
"loss": 0.6843,
"step": 3560
},
{
"epoch": 0.77,
"learning_rate": 0.00022439695762404926,
"loss": 0.6843,
"step": 3580
},
{
"epoch": 0.78,
"learning_rate": 0.0002239623324882289,
"loss": 0.691,
"step": 3600
},
{
"epoch": 0.78,
"eval_loss": 0.7041522264480591,
"eval_runtime": 50.0554,
"eval_samples_per_second": 39.956,
"eval_steps_per_second": 0.639,
"step": 3600
},
{
"epoch": 0.78,
"learning_rate": 0.00022352770735240853,
"loss": 0.6846,
"step": 3620
},
{
"epoch": 0.79,
"learning_rate": 0.0002230930822165882,
"loss": 0.689,
"step": 3640
},
{
"epoch": 0.79,
"learning_rate": 0.00022265845708076783,
"loss": 0.6777,
"step": 3660
},
{
"epoch": 0.79,
"learning_rate": 0.00022222383194494747,
"loss": 0.6903,
"step": 3680
},
{
"epoch": 0.8,
"learning_rate": 0.00022178920680912713,
"loss": 0.684,
"step": 3700
},
{
"epoch": 0.8,
"learning_rate": 0.00022135458167330677,
"loss": 0.6867,
"step": 3720
},
{
"epoch": 0.81,
"learning_rate": 0.0002209199565374864,
"loss": 0.6697,
"step": 3740
},
{
"epoch": 0.81,
"learning_rate": 0.00022048533140166607,
"loss": 0.6864,
"step": 3760
},
{
"epoch": 0.82,
"learning_rate": 0.0002200507062658457,
"loss": 0.6813,
"step": 3780
},
{
"epoch": 0.82,
"learning_rate": 0.00021961608113002534,
"loss": 0.6807,
"step": 3800
},
{
"epoch": 0.82,
"eval_loss": 0.7024796009063721,
"eval_runtime": 50.022,
"eval_samples_per_second": 39.982,
"eval_steps_per_second": 0.64,
"step": 3800
},
{
"epoch": 0.82,
"learning_rate": 0.000219181455994205,
"loss": 0.6824,
"step": 3820
},
{
"epoch": 0.83,
"learning_rate": 0.00021874683085838464,
"loss": 0.6814,
"step": 3840
},
{
"epoch": 0.83,
"learning_rate": 0.00021831220572256427,
"loss": 0.6789,
"step": 3860
},
{
"epoch": 0.84,
"learning_rate": 0.00021787758058674394,
"loss": 0.6752,
"step": 3880
},
{
"epoch": 0.84,
"learning_rate": 0.00021744295545092358,
"loss": 0.6826,
"step": 3900
},
{
"epoch": 0.85,
"learning_rate": 0.0002170083303151032,
"loss": 0.6874,
"step": 3920
},
{
"epoch": 0.85,
"learning_rate": 0.00021657370517928288,
"loss": 0.6761,
"step": 3940
},
{
"epoch": 0.85,
"learning_rate": 0.0002161390800434625,
"loss": 0.6795,
"step": 3960
},
{
"epoch": 0.86,
"learning_rate": 0.00021570445490764215,
"loss": 0.6781,
"step": 3980
},
{
"epoch": 0.86,
"learning_rate": 0.0002152698297718218,
"loss": 0.6754,
"step": 4000
},
{
"epoch": 0.86,
"eval_loss": 0.7004331350326538,
"eval_runtime": 50.0568,
"eval_samples_per_second": 39.955,
"eval_steps_per_second": 0.639,
"step": 4000
},
{
"epoch": 0.87,
"learning_rate": 0.00021483520463600145,
"loss": 0.6791,
"step": 4020
},
{
"epoch": 0.87,
"learning_rate": 0.00021440057950018108,
"loss": 0.6863,
"step": 4040
},
{
"epoch": 0.88,
"learning_rate": 0.00021396595436436075,
"loss": 0.6846,
"step": 4060
},
{
"epoch": 0.88,
"learning_rate": 0.00021353132922854036,
"loss": 0.6814,
"step": 4080
},
{
"epoch": 0.88,
"learning_rate": 0.00021309670409272,
"loss": 0.6825,
"step": 4100
},
{
"epoch": 0.89,
"learning_rate": 0.00021266207895689963,
"loss": 0.6827,
"step": 4120
},
{
"epoch": 0.89,
"learning_rate": 0.0002122274538210793,
"loss": 0.6769,
"step": 4140
},
{
"epoch": 0.9,
"learning_rate": 0.00021179282868525893,
"loss": 0.6869,
"step": 4160
},
{
"epoch": 0.9,
"learning_rate": 0.00021135820354943857,
"loss": 0.6815,
"step": 4180
},
{
"epoch": 0.91,
"learning_rate": 0.00021092357841361823,
"loss": 0.6725,
"step": 4200
},
{
"epoch": 0.91,
"eval_loss": 0.6981337666511536,
"eval_runtime": 50.0559,
"eval_samples_per_second": 39.955,
"eval_steps_per_second": 0.639,
"step": 4200
},
{
"epoch": 0.91,
"learning_rate": 0.00021051068453458889,
"loss": 0.6731,
"step": 4220
},
{
"epoch": 0.91,
"learning_rate": 0.00021007605939876855,
"loss": 0.6792,
"step": 4240
},
{
"epoch": 0.92,
"learning_rate": 0.00020964143426294819,
"loss": 0.6755,
"step": 4260
},
{
"epoch": 0.92,
"learning_rate": 0.00020920680912712782,
"loss": 0.6833,
"step": 4280
},
{
"epoch": 0.93,
"learning_rate": 0.0002087721839913075,
"loss": 0.6693,
"step": 4300
},
{
"epoch": 0.93,
"learning_rate": 0.00020833755885548712,
"loss": 0.6728,
"step": 4320
},
{
"epoch": 0.94,
"learning_rate": 0.00020790293371966676,
"loss": 0.6812,
"step": 4340
},
{
"epoch": 0.94,
"learning_rate": 0.00020746830858384642,
"loss": 0.6734,
"step": 4360
},
{
"epoch": 0.94,
"learning_rate": 0.00020703368344802606,
"loss": 0.6813,
"step": 4380
},
{
"epoch": 0.95,
"learning_rate": 0.0002065990583122057,
"loss": 0.6779,
"step": 4400
},
{
"epoch": 0.95,
"eval_loss": 0.6968498826026917,
"eval_runtime": 50.0697,
"eval_samples_per_second": 39.944,
"eval_steps_per_second": 0.639,
"step": 4400
},
{
"epoch": 0.95,
"learning_rate": 0.00020616443317638536,
"loss": 0.6712,
"step": 4420
},
{
"epoch": 0.96,
"learning_rate": 0.000205729808040565,
"loss": 0.6846,
"step": 4440
},
{
"epoch": 0.96,
"learning_rate": 0.00020529518290474463,
"loss": 0.6694,
"step": 4460
},
{
"epoch": 0.97,
"learning_rate": 0.0002048605577689243,
"loss": 0.6753,
"step": 4480
},
{
"epoch": 0.97,
"learning_rate": 0.00020442593263310393,
"loss": 0.6792,
"step": 4500
},
{
"epoch": 0.98,
"learning_rate": 0.00020399130749728357,
"loss": 0.6738,
"step": 4520
},
{
"epoch": 0.98,
"learning_rate": 0.00020355668236146323,
"loss": 0.6699,
"step": 4540
},
{
"epoch": 0.98,
"learning_rate": 0.00020312205722564287,
"loss": 0.6737,
"step": 4560
},
{
"epoch": 0.99,
"learning_rate": 0.0002026874320898225,
"loss": 0.6837,
"step": 4580
},
{
"epoch": 0.99,
"learning_rate": 0.00020225280695400217,
"loss": 0.6701,
"step": 4600
},
{
"epoch": 0.99,
"eval_loss": 0.6954157948493958,
"eval_runtime": 50.0724,
"eval_samples_per_second": 39.942,
"eval_steps_per_second": 0.639,
"step": 4600
},
{
"epoch": 1.0,
"learning_rate": 0.0002018181818181818,
"loss": 0.6677,
"step": 4620
},
{
"epoch": 1.0,
"learning_rate": 0.00020138355668236144,
"loss": 0.6706,
"step": 4640
},
{
"epoch": 1.01,
"learning_rate": 0.0002009489315465411,
"loss": 0.6741,
"step": 4660
},
{
"epoch": 1.01,
"learning_rate": 0.00020051430641072074,
"loss": 0.6757,
"step": 4680
},
{
"epoch": 1.01,
"learning_rate": 0.00020007968127490038,
"loss": 0.6773,
"step": 4700
},
{
"epoch": 1.02,
"learning_rate": 0.00019964505613908004,
"loss": 0.6728,
"step": 4720
},
{
"epoch": 1.02,
"learning_rate": 0.00019921043100325968,
"loss": 0.6715,
"step": 4740
},
{
"epoch": 1.03,
"learning_rate": 0.00019877580586743931,
"loss": 0.6679,
"step": 4760
},
{
"epoch": 1.03,
"learning_rate": 0.00019834118073161898,
"loss": 0.6729,
"step": 4780
},
{
"epoch": 1.04,
"learning_rate": 0.00019790655559579861,
"loss": 0.6749,
"step": 4800
},
{
"epoch": 1.04,
"eval_loss": 0.6941403746604919,
"eval_runtime": 50.0645,
"eval_samples_per_second": 39.948,
"eval_steps_per_second": 0.639,
"step": 4800
},
{
"epoch": 1.04,
"learning_rate": 0.00019747193045997825,
"loss": 0.6661,
"step": 4820
},
{
"epoch": 1.04,
"learning_rate": 0.0001970373053241579,
"loss": 0.6638,
"step": 4840
},
{
"epoch": 1.05,
"learning_rate": 0.00019660268018833755,
"loss": 0.6715,
"step": 4860
},
{
"epoch": 1.05,
"learning_rate": 0.0001961680550525172,
"loss": 0.6721,
"step": 4880
},
{
"epoch": 1.06,
"learning_rate": 0.00019573342991669682,
"loss": 0.6695,
"step": 4900
},
{
"epoch": 1.06,
"learning_rate": 0.0001952988047808765,
"loss": 0.6809,
"step": 4920
},
{
"epoch": 1.07,
"learning_rate": 0.00019486417964505612,
"loss": 0.6701,
"step": 4940
},
{
"epoch": 1.07,
"learning_rate": 0.00019442955450923576,
"loss": 0.6747,
"step": 4960
},
{
"epoch": 1.07,
"learning_rate": 0.00019399492937341542,
"loss": 0.6713,
"step": 4980
},
{
"epoch": 1.08,
"learning_rate": 0.00019356030423759506,
"loss": 0.6746,
"step": 5000
},
{
"epoch": 1.08,
"eval_loss": 0.6935788989067078,
"eval_runtime": 50.0137,
"eval_samples_per_second": 39.989,
"eval_steps_per_second": 0.64,
"step": 5000
},
{
"epoch": 1.08,
"learning_rate": 0.0001931256791017747,
"loss": 0.672,
"step": 5020
},
{
"epoch": 1.09,
"learning_rate": 0.00019269105396595436,
"loss": 0.6673,
"step": 5040
},
{
"epoch": 1.09,
"learning_rate": 0.000192256428830134,
"loss": 0.6706,
"step": 5060
},
{
"epoch": 1.1,
"learning_rate": 0.00019182180369431363,
"loss": 0.6677,
"step": 5080
},
{
"epoch": 1.1,
"learning_rate": 0.0001913871785584933,
"loss": 0.67,
"step": 5100
},
{
"epoch": 1.1,
"learning_rate": 0.00019095255342267293,
"loss": 0.6693,
"step": 5120
},
{
"epoch": 1.11,
"learning_rate": 0.00019051792828685257,
"loss": 0.671,
"step": 5140
},
{
"epoch": 1.11,
"learning_rate": 0.00019008330315103223,
"loss": 0.6748,
"step": 5160
},
{
"epoch": 1.12,
"learning_rate": 0.00018964867801521187,
"loss": 0.6698,
"step": 5180
},
{
"epoch": 1.12,
"learning_rate": 0.0001892140528793915,
"loss": 0.662,
"step": 5200
},
{
"epoch": 1.12,
"eval_loss": 0.6918168663978577,
"eval_runtime": 50.0897,
"eval_samples_per_second": 39.928,
"eval_steps_per_second": 0.639,
"step": 5200
},
{
"epoch": 1.13,
"learning_rate": 0.00018877942774357117,
"loss": 0.66,
"step": 5220
},
{
"epoch": 1.13,
"learning_rate": 0.0001883448026077508,
"loss": 0.6705,
"step": 5240
},
{
"epoch": 1.13,
"learning_rate": 0.00018791017747193044,
"loss": 0.6693,
"step": 5260
},
{
"epoch": 1.14,
"learning_rate": 0.0001874755523361101,
"loss": 0.6546,
"step": 5280
},
{
"epoch": 1.14,
"learning_rate": 0.00018704092720028974,
"loss": 0.6673,
"step": 5300
},
{
"epoch": 1.15,
"learning_rate": 0.00018660630206446938,
"loss": 0.671,
"step": 5320
},
{
"epoch": 1.15,
"learning_rate": 0.00018617167692864904,
"loss": 0.675,
"step": 5340
},
{
"epoch": 1.16,
"learning_rate": 0.00018573705179282868,
"loss": 0.6744,
"step": 5360
},
{
"epoch": 1.16,
"learning_rate": 0.00018530242665700832,
"loss": 0.6643,
"step": 5380
},
{
"epoch": 1.17,
"learning_rate": 0.00018486780152118798,
"loss": 0.6686,
"step": 5400
},
{
"epoch": 1.17,
"eval_loss": 0.6908227801322937,
"eval_runtime": 50.0742,
"eval_samples_per_second": 39.941,
"eval_steps_per_second": 0.639,
"step": 5400
},
{
"epoch": 1.17,
"learning_rate": 0.00018443317638536762,
"loss": 0.6666,
"step": 5420
},
{
"epoch": 1.17,
"learning_rate": 0.00018399855124954725,
"loss": 0.6658,
"step": 5440
},
{
"epoch": 1.18,
"learning_rate": 0.0001835639261137269,
"loss": 0.671,
"step": 5460
},
{
"epoch": 1.18,
"learning_rate": 0.00018312930097790653,
"loss": 0.6736,
"step": 5480
},
{
"epoch": 1.19,
"learning_rate": 0.00018269467584208616,
"loss": 0.6697,
"step": 5500
},
{
"epoch": 1.19,
"learning_rate": 0.00018226005070626583,
"loss": 0.6718,
"step": 5520
},
{
"epoch": 1.2,
"learning_rate": 0.00018182542557044546,
"loss": 0.6701,
"step": 5540
},
{
"epoch": 1.2,
"learning_rate": 0.0001813908004346251,
"loss": 0.6696,
"step": 5560
},
{
"epoch": 1.2,
"learning_rate": 0.00018095617529880476,
"loss": 0.6611,
"step": 5580
},
{
"epoch": 1.21,
"learning_rate": 0.0001805215501629844,
"loss": 0.6638,
"step": 5600
},
{
"epoch": 1.21,
"eval_loss": 0.689289927482605,
"eval_runtime": 50.1304,
"eval_samples_per_second": 39.896,
"eval_steps_per_second": 0.638,
"step": 5600
},
{
"epoch": 1.21,
"learning_rate": 0.00018008692502716404,
"loss": 0.6646,
"step": 5620
},
{
"epoch": 1.22,
"learning_rate": 0.0001796522998913437,
"loss": 0.6717,
"step": 5640
},
{
"epoch": 1.22,
"learning_rate": 0.00017921767475552334,
"loss": 0.6647,
"step": 5660
},
{
"epoch": 1.23,
"learning_rate": 0.00017878304961970297,
"loss": 0.672,
"step": 5680
},
{
"epoch": 1.23,
"learning_rate": 0.00017834842448388264,
"loss": 0.6645,
"step": 5700
},
{
"epoch": 1.23,
"learning_rate": 0.00017791379934806227,
"loss": 0.6768,
"step": 5720
},
{
"epoch": 1.24,
"learning_rate": 0.0001774791742122419,
"loss": 0.6748,
"step": 5740
},
{
"epoch": 1.24,
"learning_rate": 0.00017704454907642157,
"loss": 0.6722,
"step": 5760
},
{
"epoch": 1.25,
"learning_rate": 0.0001766099239406012,
"loss": 0.6631,
"step": 5780
},
{
"epoch": 1.25,
"learning_rate": 0.00017617529880478084,
"loss": 0.6647,
"step": 5800
},
{
"epoch": 1.25,
"eval_loss": 0.688850462436676,
"eval_runtime": 50.0542,
"eval_samples_per_second": 39.957,
"eval_steps_per_second": 0.639,
"step": 5800
},
{
"epoch": 1.26,
"learning_rate": 0.0001757406736689605,
"loss": 0.66,
"step": 5820
},
{
"epoch": 1.26,
"learning_rate": 0.00017530604853314014,
"loss": 0.6682,
"step": 5840
},
{
"epoch": 1.26,
"learning_rate": 0.00017487142339731978,
"loss": 0.6589,
"step": 5860
},
{
"epoch": 1.27,
"learning_rate": 0.00017443679826149944,
"loss": 0.6691,
"step": 5880
},
{
"epoch": 1.27,
"learning_rate": 0.00017400217312567908,
"loss": 0.6726,
"step": 5900
},
{
"epoch": 1.28,
"learning_rate": 0.00017356754798985872,
"loss": 0.6628,
"step": 5920
},
{
"epoch": 1.28,
"learning_rate": 0.00017313292285403838,
"loss": 0.6719,
"step": 5940
},
{
"epoch": 1.29,
"learning_rate": 0.00017269829771821802,
"loss": 0.6648,
"step": 5960
},
{
"epoch": 1.29,
"learning_rate": 0.00017226367258239765,
"loss": 0.6594,
"step": 5980
},
{
"epoch": 1.29,
"learning_rate": 0.00017182904744657732,
"loss": 0.6717,
"step": 6000
},
{
"epoch": 1.29,
"eval_loss": 0.6876093745231628,
"eval_runtime": 50.1763,
"eval_samples_per_second": 39.859,
"eval_steps_per_second": 0.638,
"step": 6000
},
{
"epoch": 1.3,
"learning_rate": 0.00017139442231075695,
"loss": 0.6632,
"step": 6020
},
{
"epoch": 1.3,
"learning_rate": 0.0001709597971749366,
"loss": 0.6619,
"step": 6040
},
{
"epoch": 1.31,
"learning_rate": 0.00017052517203911625,
"loss": 0.667,
"step": 6060
},
{
"epoch": 1.31,
"learning_rate": 0.0001700905469032959,
"loss": 0.6625,
"step": 6080
},
{
"epoch": 1.32,
"learning_rate": 0.00016965592176747553,
"loss": 0.6661,
"step": 6100
},
{
"epoch": 1.32,
"learning_rate": 0.0001692212966316552,
"loss": 0.656,
"step": 6120
},
{
"epoch": 1.32,
"learning_rate": 0.00016878667149583483,
"loss": 0.6668,
"step": 6140
},
{
"epoch": 1.33,
"learning_rate": 0.00016835204636001446,
"loss": 0.6669,
"step": 6160
},
{
"epoch": 1.33,
"learning_rate": 0.00016791742122419413,
"loss": 0.6662,
"step": 6180
},
{
"epoch": 1.34,
"learning_rate": 0.00016748279608837376,
"loss": 0.6692,
"step": 6200
},
{
"epoch": 1.34,
"eval_loss": 0.6869744658470154,
"eval_runtime": 50.1517,
"eval_samples_per_second": 39.879,
"eval_steps_per_second": 0.638,
"step": 6200
},
{
"epoch": 1.34,
"learning_rate": 0.0001670481709525534,
"loss": 0.6571,
"step": 6220
},
{
"epoch": 1.35,
"learning_rate": 0.00016661354581673306,
"loss": 0.6659,
"step": 6240
},
{
"epoch": 1.35,
"learning_rate": 0.0001661789206809127,
"loss": 0.6622,
"step": 6260
},
{
"epoch": 1.35,
"learning_rate": 0.00016574429554509234,
"loss": 0.6522,
"step": 6280
},
{
"epoch": 1.36,
"learning_rate": 0.000165309670409272,
"loss": 0.667,
"step": 6300
},
{
"epoch": 1.36,
"learning_rate": 0.00016487504527345164,
"loss": 0.6644,
"step": 6320
},
{
"epoch": 1.37,
"learning_rate": 0.00016444042013763127,
"loss": 0.6625,
"step": 6340
},
{
"epoch": 1.37,
"learning_rate": 0.00016400579500181094,
"loss": 0.6686,
"step": 6360
},
{
"epoch": 1.38,
"learning_rate": 0.00016357116986599057,
"loss": 0.6562,
"step": 6380
},
{
"epoch": 1.38,
"learning_rate": 0.0001631365447301702,
"loss": 0.6595,
"step": 6400
},
{
"epoch": 1.38,
"eval_loss": 0.685205340385437,
"eval_runtime": 50.162,
"eval_samples_per_second": 39.871,
"eval_steps_per_second": 0.638,
"step": 6400
},
{
"epoch": 1.39,
"learning_rate": 0.00016270191959434987,
"loss": 0.6595,
"step": 6420
},
{
"epoch": 1.39,
"learning_rate": 0.0001622672944585295,
"loss": 0.6644,
"step": 6440
},
{
"epoch": 1.39,
"learning_rate": 0.00016183266932270915,
"loss": 0.6647,
"step": 6460
},
{
"epoch": 1.4,
"learning_rate": 0.0001613980441868888,
"loss": 0.6655,
"step": 6480
},
{
"epoch": 1.4,
"learning_rate": 0.00016096341905106845,
"loss": 0.6564,
"step": 6500
},
{
"epoch": 1.41,
"learning_rate": 0.00016052879391524808,
"loss": 0.6578,
"step": 6520
},
{
"epoch": 1.41,
"learning_rate": 0.00016009416877942775,
"loss": 0.6624,
"step": 6540
},
{
"epoch": 1.42,
"learning_rate": 0.00015965954364360738,
"loss": 0.6633,
"step": 6560
},
{
"epoch": 1.42,
"learning_rate": 0.00015922491850778702,
"loss": 0.6616,
"step": 6580
},
{
"epoch": 1.42,
"learning_rate": 0.00015879029337196668,
"loss": 0.6607,
"step": 6600
},
{
"epoch": 1.42,
"eval_loss": 0.6847727298736572,
"eval_runtime": 50.1562,
"eval_samples_per_second": 39.875,
"eval_steps_per_second": 0.638,
"step": 6600
},
{
"epoch": 1.43,
"learning_rate": 0.00015835566823614632,
"loss": 0.6564,
"step": 6620
},
{
"epoch": 1.43,
"learning_rate": 0.00015792104310032596,
"loss": 0.66,
"step": 6640
},
{
"epoch": 1.44,
"learning_rate": 0.00015748641796450562,
"loss": 0.6589,
"step": 6660
},
{
"epoch": 1.44,
"learning_rate": 0.00015705179282868526,
"loss": 0.6596,
"step": 6680
},
{
"epoch": 1.45,
"learning_rate": 0.0001566171676928649,
"loss": 0.6663,
"step": 6700
},
{
"epoch": 1.45,
"learning_rate": 0.00015618254255704456,
"loss": 0.6603,
"step": 6720
},
{
"epoch": 1.45,
"learning_rate": 0.0001557479174212242,
"loss": 0.6674,
"step": 6740
},
{
"epoch": 1.46,
"learning_rate": 0.00015531329228540383,
"loss": 0.6603,
"step": 6760
},
{
"epoch": 1.46,
"learning_rate": 0.0001548786671495835,
"loss": 0.6612,
"step": 6780
},
{
"epoch": 1.47,
"learning_rate": 0.00015444404201376313,
"loss": 0.6609,
"step": 6800
},
{
"epoch": 1.47,
"eval_loss": 0.683903694152832,
"eval_runtime": 50.079,
"eval_samples_per_second": 39.937,
"eval_steps_per_second": 0.639,
"step": 6800
},
{
"epoch": 1.47,
"learning_rate": 0.00015400941687794277,
"loss": 0.6557,
"step": 6820
},
{
"epoch": 1.48,
"learning_rate": 0.00015357479174212243,
"loss": 0.6627,
"step": 6840
},
{
"epoch": 1.48,
"learning_rate": 0.00015314016660630207,
"loss": 0.6667,
"step": 6860
},
{
"epoch": 1.48,
"learning_rate": 0.0001527055414704817,
"loss": 0.6633,
"step": 6880
},
{
"epoch": 1.49,
"learning_rate": 0.00015227091633466137,
"loss": 0.6565,
"step": 6900
},
{
"epoch": 1.49,
"learning_rate": 0.000151836291198841,
"loss": 0.6588,
"step": 6920
},
{
"epoch": 1.5,
"learning_rate": 0.00015140166606302064,
"loss": 0.6687,
"step": 6940
},
{
"epoch": 1.5,
"learning_rate": 0.0001509670409272003,
"loss": 0.6611,
"step": 6960
},
{
"epoch": 1.51,
"learning_rate": 0.00015053241579137994,
"loss": 0.6576,
"step": 6980
},
{
"epoch": 1.51,
"learning_rate": 0.00015009779065555957,
"loss": 0.6576,
"step": 7000
},
{
"epoch": 1.51,
"eval_loss": 0.6830142736434937,
"eval_runtime": 50.1233,
"eval_samples_per_second": 39.902,
"eval_steps_per_second": 0.638,
"step": 7000
},
{
"epoch": 1.51,
"learning_rate": 0.0001496631655197392,
"loss": 0.6617,
"step": 7020
},
{
"epoch": 1.52,
"learning_rate": 0.00014922854038391885,
"loss": 0.6533,
"step": 7040
},
{
"epoch": 1.52,
"learning_rate": 0.0001487939152480985,
"loss": 0.6524,
"step": 7060
},
{
"epoch": 1.53,
"learning_rate": 0.00014835929011227815,
"loss": 0.6597,
"step": 7080
},
{
"epoch": 1.53,
"learning_rate": 0.00014792466497645778,
"loss": 0.656,
"step": 7100
},
{
"epoch": 1.54,
"learning_rate": 0.00014749003984063745,
"loss": 0.6501,
"step": 7120
},
{
"epoch": 1.54,
"learning_rate": 0.00014705541470481708,
"loss": 0.6563,
"step": 7140
},
{
"epoch": 1.54,
"learning_rate": 0.00014662078956899672,
"loss": 0.6496,
"step": 7160
},
{
"epoch": 1.55,
"learning_rate": 0.00014618616443317638,
"loss": 0.6602,
"step": 7180
},
{
"epoch": 1.55,
"learning_rate": 0.00014575153929735602,
"loss": 0.6617,
"step": 7200
},
{
"epoch": 1.55,
"eval_loss": 0.6818540096282959,
"eval_runtime": 50.1175,
"eval_samples_per_second": 39.906,
"eval_steps_per_second": 0.639,
"step": 7200
},
{
"epoch": 1.56,
"learning_rate": 0.00014531691416153566,
"loss": 0.6655,
"step": 7220
},
{
"epoch": 1.56,
"learning_rate": 0.00014488228902571532,
"loss": 0.6544,
"step": 7240
},
{
"epoch": 1.57,
"learning_rate": 0.00014444766388989496,
"loss": 0.655,
"step": 7260
},
{
"epoch": 1.57,
"learning_rate": 0.0001440130387540746,
"loss": 0.6535,
"step": 7280
},
{
"epoch": 1.57,
"learning_rate": 0.00014357841361825426,
"loss": 0.6584,
"step": 7300
},
{
"epoch": 1.58,
"learning_rate": 0.0001431437884824339,
"loss": 0.6602,
"step": 7320
},
{
"epoch": 1.58,
"learning_rate": 0.00014270916334661353,
"loss": 0.6689,
"step": 7340
},
{
"epoch": 1.59,
"learning_rate": 0.0001422745382107932,
"loss": 0.6613,
"step": 7360
},
{
"epoch": 1.59,
"learning_rate": 0.00014183991307497283,
"loss": 0.659,
"step": 7380
},
{
"epoch": 1.6,
"learning_rate": 0.00014140528793915247,
"loss": 0.6463,
"step": 7400
},
{
"epoch": 1.6,
"eval_loss": 0.681868851184845,
"eval_runtime": 50.1388,
"eval_samples_per_second": 39.889,
"eval_steps_per_second": 0.638,
"step": 7400
},
{
"epoch": 1.6,
"learning_rate": 0.00014097066280333213,
"loss": 0.6617,
"step": 7420
},
{
"epoch": 1.61,
"learning_rate": 0.00014053603766751177,
"loss": 0.6648,
"step": 7440
},
{
"epoch": 1.61,
"learning_rate": 0.0001401014125316914,
"loss": 0.6528,
"step": 7460
},
{
"epoch": 1.61,
"learning_rate": 0.00013966678739587107,
"loss": 0.6655,
"step": 7480
},
{
"epoch": 1.62,
"learning_rate": 0.0001392321622600507,
"loss": 0.6609,
"step": 7500
},
{
"epoch": 1.62,
"learning_rate": 0.00013879753712423034,
"loss": 0.6528,
"step": 7520
},
{
"epoch": 1.63,
"learning_rate": 0.00013836291198841,
"loss": 0.6561,
"step": 7540
},
{
"epoch": 1.63,
"learning_rate": 0.00013792828685258964,
"loss": 0.6682,
"step": 7560
},
{
"epoch": 1.64,
"learning_rate": 0.00013749366171676928,
"loss": 0.6677,
"step": 7580
},
{
"epoch": 1.64,
"learning_rate": 0.00013705903658094894,
"loss": 0.6599,
"step": 7600
},
{
"epoch": 1.64,
"eval_loss": 0.6807426810264587,
"eval_runtime": 50.3308,
"eval_samples_per_second": 39.737,
"eval_steps_per_second": 0.636,
"step": 7600
},
{
"epoch": 1.64,
"learning_rate": 0.00013662441144512855,
"loss": 0.6525,
"step": 7620
},
{
"epoch": 1.65,
"learning_rate": 0.0001361897863093082,
"loss": 0.6574,
"step": 7640
},
{
"epoch": 1.65,
"learning_rate": 0.00013575516117348785,
"loss": 0.6516,
"step": 7660
},
{
"epoch": 1.66,
"learning_rate": 0.00013532053603766749,
"loss": 0.6533,
"step": 7680
},
{
"epoch": 1.66,
"learning_rate": 0.00013488591090184715,
"loss": 0.6577,
"step": 7700
},
{
"epoch": 1.67,
"learning_rate": 0.00013445128576602679,
"loss": 0.6592,
"step": 7720
},
{
"epoch": 1.67,
"learning_rate": 0.00013401666063020642,
"loss": 0.6585,
"step": 7740
},
{
"epoch": 1.67,
"learning_rate": 0.00013358203549438609,
"loss": 0.6607,
"step": 7760
},
{
"epoch": 1.68,
"learning_rate": 0.00013314741035856572,
"loss": 0.6617,
"step": 7780
},
{
"epoch": 1.68,
"learning_rate": 0.00013271278522274536,
"loss": 0.6443,
"step": 7800
},
{
"epoch": 1.68,
"eval_loss": 0.6800745725631714,
"eval_runtime": 50.165,
"eval_samples_per_second": 39.868,
"eval_steps_per_second": 0.638,
"step": 7800
},
{
"epoch": 1.69,
"learning_rate": 0.00013227816008692502,
"loss": 0.6587,
"step": 7820
},
{
"epoch": 1.69,
"learning_rate": 0.00013184353495110466,
"loss": 0.6613,
"step": 7840
},
{
"epoch": 1.7,
"learning_rate": 0.0001314089098152843,
"loss": 0.654,
"step": 7860
},
{
"epoch": 1.7,
"learning_rate": 0.00013097428467946396,
"loss": 0.6523,
"step": 7880
},
{
"epoch": 1.7,
"learning_rate": 0.0001305396595436436,
"loss": 0.6563,
"step": 7900
},
{
"epoch": 1.71,
"learning_rate": 0.00013010503440782323,
"loss": 0.6524,
"step": 7920
},
{
"epoch": 1.71,
"learning_rate": 0.0001296704092720029,
"loss": 0.6523,
"step": 7940
},
{
"epoch": 1.72,
"learning_rate": 0.00012923578413618253,
"loss": 0.6493,
"step": 7960
},
{
"epoch": 1.72,
"learning_rate": 0.00012880115900036217,
"loss": 0.6538,
"step": 7980
},
{
"epoch": 1.73,
"learning_rate": 0.00012836653386454183,
"loss": 0.6512,
"step": 8000
},
{
"epoch": 1.73,
"eval_loss": 0.6790341734886169,
"eval_runtime": 50.1317,
"eval_samples_per_second": 39.895,
"eval_steps_per_second": 0.638,
"step": 8000
},
{
"epoch": 1.73,
"learning_rate": 0.00012793190872872147,
"loss": 0.6562,
"step": 8020
},
{
"epoch": 1.73,
"learning_rate": 0.0001274972835929011,
"loss": 0.6556,
"step": 8040
},
{
"epoch": 1.74,
"learning_rate": 0.00012706265845708077,
"loss": 0.65,
"step": 8060
},
{
"epoch": 1.74,
"learning_rate": 0.0001266280333212604,
"loss": 0.661,
"step": 8080
},
{
"epoch": 1.75,
"learning_rate": 0.00012619340818544004,
"loss": 0.655,
"step": 8100
},
{
"epoch": 1.75,
"learning_rate": 0.0001257587830496197,
"loss": 0.6534,
"step": 8120
},
{
"epoch": 1.76,
"learning_rate": 0.00012532415791379934,
"loss": 0.6517,
"step": 8140
},
{
"epoch": 1.76,
"learning_rate": 0.00012488953277797898,
"loss": 0.6605,
"step": 8160
},
{
"epoch": 1.76,
"learning_rate": 0.00012445490764215864,
"loss": 0.6556,
"step": 8180
},
{
"epoch": 1.77,
"learning_rate": 0.00012402028250633828,
"loss": 0.6492,
"step": 8200
},
{
"epoch": 1.77,
"eval_loss": 0.6781870126724243,
"eval_runtime": 50.0809,
"eval_samples_per_second": 39.935,
"eval_steps_per_second": 0.639,
"step": 8200
},
{
"epoch": 1.77,
"learning_rate": 0.00012358565737051791,
"loss": 0.6541,
"step": 8220
},
{
"epoch": 1.78,
"learning_rate": 0.00012315103223469758,
"loss": 0.6517,
"step": 8240
},
{
"epoch": 1.78,
"learning_rate": 0.00012271640709887721,
"loss": 0.6483,
"step": 8260
},
{
"epoch": 1.79,
"learning_rate": 0.00012228178196305685,
"loss": 0.6619,
"step": 8280
},
{
"epoch": 1.79,
"learning_rate": 0.0001218471568272365,
"loss": 0.6556,
"step": 8300
},
{
"epoch": 1.8,
"learning_rate": 0.00012141253169141615,
"loss": 0.6471,
"step": 8320
},
{
"epoch": 1.8,
"learning_rate": 0.00012097790655559579,
"loss": 0.6611,
"step": 8340
},
{
"epoch": 1.8,
"learning_rate": 0.00012054328141977544,
"loss": 0.6506,
"step": 8360
},
{
"epoch": 1.81,
"learning_rate": 0.00012010865628395509,
"loss": 0.6611,
"step": 8380
},
{
"epoch": 1.81,
"learning_rate": 0.00011967403114813472,
"loss": 0.6557,
"step": 8400
},
{
"epoch": 1.81,
"eval_loss": 0.6776989102363586,
"eval_runtime": 50.1344,
"eval_samples_per_second": 39.893,
"eval_steps_per_second": 0.638,
"step": 8400
},
{
"epoch": 1.82,
"learning_rate": 0.00011923940601231437,
"loss": 0.6504,
"step": 8420
},
{
"epoch": 1.82,
"learning_rate": 0.00011880478087649402,
"loss": 0.6552,
"step": 8440
},
{
"epoch": 1.83,
"learning_rate": 0.00011839188699746468,
"loss": 0.641,
"step": 8460
},
{
"epoch": 1.83,
"learning_rate": 0.00011795726186164432,
"loss": 0.6535,
"step": 8480
},
{
"epoch": 1.83,
"learning_rate": 0.00011752263672582397,
"loss": 0.6568,
"step": 8500
},
{
"epoch": 1.84,
"learning_rate": 0.00011708801159000362,
"loss": 0.6621,
"step": 8520
},
{
"epoch": 1.84,
"learning_rate": 0.00011665338645418325,
"loss": 0.6607,
"step": 8540
},
{
"epoch": 1.85,
"learning_rate": 0.0001162187613183629,
"loss": 0.6516,
"step": 8560
},
{
"epoch": 1.85,
"learning_rate": 0.00011578413618254255,
"loss": 0.6497,
"step": 8580
},
{
"epoch": 1.86,
"learning_rate": 0.00011534951104672219,
"loss": 0.6559,
"step": 8600
},
{
"epoch": 1.86,
"eval_loss": 0.6773191094398499,
"eval_runtime": 50.1605,
"eval_samples_per_second": 39.872,
"eval_steps_per_second": 0.638,
"step": 8600
},
{
"epoch": 1.86,
"learning_rate": 0.00011491488591090184,
"loss": 0.6595,
"step": 8620
},
{
"epoch": 1.86,
"learning_rate": 0.00011448026077508149,
"loss": 0.6495,
"step": 8640
},
{
"epoch": 1.87,
"learning_rate": 0.00011404563563926113,
"loss": 0.6518,
"step": 8660
},
{
"epoch": 1.87,
"learning_rate": 0.00011361101050344078,
"loss": 0.6511,
"step": 8680
},
{
"epoch": 1.88,
"learning_rate": 0.00011317638536762043,
"loss": 0.6495,
"step": 8700
},
{
"epoch": 1.88,
"learning_rate": 0.00011274176023180006,
"loss": 0.6485,
"step": 8720
},
{
"epoch": 1.89,
"learning_rate": 0.00011230713509597971,
"loss": 0.6543,
"step": 8740
},
{
"epoch": 1.89,
"learning_rate": 0.00011187250996015936,
"loss": 0.6509,
"step": 8760
},
{
"epoch": 1.89,
"learning_rate": 0.000111437884824339,
"loss": 0.656,
"step": 8780
},
{
"epoch": 1.9,
"learning_rate": 0.00011100325968851865,
"loss": 0.6557,
"step": 8800
},
{
"epoch": 1.9,
"eval_loss": 0.6773696541786194,
"eval_runtime": 50.1296,
"eval_samples_per_second": 39.897,
"eval_steps_per_second": 0.638,
"step": 8800
},
{
"epoch": 1.9,
"learning_rate": 0.0001105686345526983,
"loss": 0.6509,
"step": 8820
},
{
"epoch": 1.91,
"learning_rate": 0.00011013400941687794,
"loss": 0.65,
"step": 8840
},
{
"epoch": 1.91,
"learning_rate": 0.00010969938428105759,
"loss": 0.6447,
"step": 8860
},
{
"epoch": 1.92,
"learning_rate": 0.00010926475914523724,
"loss": 0.6563,
"step": 8880
},
{
"epoch": 1.92,
"learning_rate": 0.00010883013400941687,
"loss": 0.6545,
"step": 8900
},
{
"epoch": 1.92,
"learning_rate": 0.00010839550887359652,
"loss": 0.6509,
"step": 8920
},
{
"epoch": 1.93,
"learning_rate": 0.00010796088373777617,
"loss": 0.6434,
"step": 8940
},
{
"epoch": 1.93,
"learning_rate": 0.00010752625860195581,
"loss": 0.6412,
"step": 8960
},
{
"epoch": 1.94,
"learning_rate": 0.00010709163346613546,
"loss": 0.6512,
"step": 8980
},
{
"epoch": 1.94,
"learning_rate": 0.00010665700833031508,
"loss": 0.6478,
"step": 9000
},
{
"epoch": 1.94,
"eval_loss": 0.6760911345481873,
"eval_runtime": 50.1795,
"eval_samples_per_second": 39.857,
"eval_steps_per_second": 0.638,
"step": 9000
},
{
"epoch": 1.95,
"learning_rate": 0.00010622238319449473,
"loss": 0.6545,
"step": 9020
},
{
"epoch": 1.95,
"learning_rate": 0.00010578775805867438,
"loss": 0.6468,
"step": 9040
},
{
"epoch": 1.95,
"learning_rate": 0.00010535313292285402,
"loss": 0.6527,
"step": 9060
},
{
"epoch": 1.96,
"learning_rate": 0.00010491850778703367,
"loss": 0.6621,
"step": 9080
},
{
"epoch": 1.96,
"learning_rate": 0.00010448388265121332,
"loss": 0.6496,
"step": 9100
},
{
"epoch": 1.97,
"learning_rate": 0.00010404925751539295,
"loss": 0.6512,
"step": 9120
},
{
"epoch": 1.97,
"learning_rate": 0.0001036146323795726,
"loss": 0.6491,
"step": 9140
},
{
"epoch": 1.98,
"learning_rate": 0.00010318000724375225,
"loss": 0.6482,
"step": 9160
},
{
"epoch": 1.98,
"learning_rate": 0.00010274538210793189,
"loss": 0.6456,
"step": 9180
},
{
"epoch": 1.98,
"learning_rate": 0.00010231075697211154,
"loss": 0.6458,
"step": 9200
},
{
"epoch": 1.98,
"eval_loss": 0.6748936772346497,
"eval_runtime": 50.1856,
"eval_samples_per_second": 39.852,
"eval_steps_per_second": 0.638,
"step": 9200
},
{
"epoch": 1.99,
"learning_rate": 0.00010187613183629119,
"loss": 0.6473,
"step": 9220
},
{
"epoch": 1.99,
"learning_rate": 0.00010144150670047083,
"loss": 0.6496,
"step": 9240
},
{
"epoch": 2.0,
"learning_rate": 0.00010100688156465048,
"loss": 0.6566,
"step": 9260
},
{
"epoch": 2.0,
"learning_rate": 0.00010057225642883013,
"loss": 0.6475,
"step": 9280
},
{
"epoch": 2.01,
"learning_rate": 0.00010013763129300976,
"loss": 0.6536,
"step": 9300
},
{
"epoch": 2.01,
"learning_rate": 9.970300615718941e-05,
"loss": 0.646,
"step": 9320
},
{
"epoch": 2.02,
"learning_rate": 9.926838102136906e-05,
"loss": 0.6503,
"step": 9340
},
{
"epoch": 2.02,
"learning_rate": 9.88337558855487e-05,
"loss": 0.6527,
"step": 9360
},
{
"epoch": 2.02,
"learning_rate": 9.839913074972835e-05,
"loss": 0.6514,
"step": 9380
},
{
"epoch": 2.03,
"learning_rate": 9.7964505613908e-05,
"loss": 0.6548,
"step": 9400
},
{
"epoch": 2.03,
"eval_loss": 0.6744834780693054,
"eval_runtime": 50.1696,
"eval_samples_per_second": 39.865,
"eval_steps_per_second": 0.638,
"step": 9400
},
{
"epoch": 2.03,
"learning_rate": 9.752988047808764e-05,
"loss": 0.6483,
"step": 9420
},
{
"epoch": 2.04,
"learning_rate": 9.709525534226729e-05,
"loss": 0.6522,
"step": 9440
},
{
"epoch": 2.04,
"learning_rate": 9.666063020644694e-05,
"loss": 0.6538,
"step": 9460
},
{
"epoch": 2.05,
"learning_rate": 9.622600507062657e-05,
"loss": 0.6449,
"step": 9480
},
{
"epoch": 2.05,
"learning_rate": 9.579137993480622e-05,
"loss": 0.6451,
"step": 9500
},
{
"epoch": 2.05,
"learning_rate": 9.535675479898587e-05,
"loss": 0.6355,
"step": 9520
},
{
"epoch": 2.06,
"learning_rate": 9.492212966316551e-05,
"loss": 0.6494,
"step": 9540
},
{
"epoch": 2.06,
"learning_rate": 9.448750452734516e-05,
"loss": 0.6435,
"step": 9560
},
{
"epoch": 2.07,
"learning_rate": 9.405287939152481e-05,
"loss": 0.651,
"step": 9580
},
{
"epoch": 2.07,
"learning_rate": 9.361825425570445e-05,
"loss": 0.6493,
"step": 9600
},
{
"epoch": 2.07,
"eval_loss": 0.674017071723938,
"eval_runtime": 50.1402,
"eval_samples_per_second": 39.888,
"eval_steps_per_second": 0.638,
"step": 9600
},
{
"epoch": 2.08,
"learning_rate": 9.31836291198841e-05,
"loss": 0.6469,
"step": 9620
},
{
"epoch": 2.08,
"learning_rate": 9.274900398406375e-05,
"loss": 0.65,
"step": 9640
},
{
"epoch": 2.08,
"learning_rate": 9.231437884824338e-05,
"loss": 0.6536,
"step": 9660
},
{
"epoch": 2.09,
"learning_rate": 9.187975371242303e-05,
"loss": 0.6488,
"step": 9680
},
{
"epoch": 2.09,
"learning_rate": 9.144512857660268e-05,
"loss": 0.6391,
"step": 9700
},
{
"epoch": 2.1,
"learning_rate": 9.101050344078232e-05,
"loss": 0.644,
"step": 9720
},
{
"epoch": 2.1,
"learning_rate": 9.057587830496197e-05,
"loss": 0.6507,
"step": 9740
},
{
"epoch": 2.11,
"learning_rate": 9.014125316914162e-05,
"loss": 0.6404,
"step": 9760
},
{
"epoch": 2.11,
"learning_rate": 8.970662803332126e-05,
"loss": 0.6509,
"step": 9780
},
{
"epoch": 2.11,
"learning_rate": 8.92720028975009e-05,
"loss": 0.6435,
"step": 9800
},
{
"epoch": 2.11,
"eval_loss": 0.6735255122184753,
"eval_runtime": 50.1703,
"eval_samples_per_second": 39.864,
"eval_steps_per_second": 0.638,
"step": 9800
},
{
"epoch": 2.12,
"learning_rate": 8.883737776168056e-05,
"loss": 0.6374,
"step": 9820
},
{
"epoch": 2.12,
"learning_rate": 8.840275262586019e-05,
"loss": 0.6445,
"step": 9840
},
{
"epoch": 2.13,
"learning_rate": 8.796812749003983e-05,
"loss": 0.6495,
"step": 9860
},
{
"epoch": 2.13,
"learning_rate": 8.753350235421946e-05,
"loss": 0.6482,
"step": 9880
},
{
"epoch": 2.14,
"learning_rate": 8.709887721839911e-05,
"loss": 0.6441,
"step": 9900
},
{
"epoch": 2.14,
"learning_rate": 8.666425208257877e-05,
"loss": 0.6525,
"step": 9920
},
{
"epoch": 2.14,
"learning_rate": 8.62296269467584e-05,
"loss": 0.6453,
"step": 9940
},
{
"epoch": 2.15,
"learning_rate": 8.579500181093805e-05,
"loss": 0.6498,
"step": 9960
},
{
"epoch": 2.15,
"learning_rate": 8.53603766751177e-05,
"loss": 0.6471,
"step": 9980
},
{
"epoch": 2.16,
"learning_rate": 8.492575153929734e-05,
"loss": 0.6419,
"step": 10000
},
{
"epoch": 2.16,
"eval_loss": 0.6730753779411316,
"eval_runtime": 50.1885,
"eval_samples_per_second": 39.85,
"eval_steps_per_second": 0.638,
"step": 10000
},
{
"epoch": 2.16,
"learning_rate": 8.449112640347699e-05,
"loss": 0.6447,
"step": 10020
},
{
"epoch": 2.17,
"learning_rate": 8.405650126765664e-05,
"loss": 0.6444,
"step": 10040
},
{
"epoch": 2.17,
"learning_rate": 8.362187613183627e-05,
"loss": 0.6393,
"step": 10060
},
{
"epoch": 2.17,
"learning_rate": 8.318725099601592e-05,
"loss": 0.6464,
"step": 10080
},
{
"epoch": 2.18,
"learning_rate": 8.275262586019557e-05,
"loss": 0.6458,
"step": 10100
},
{
"epoch": 2.18,
"learning_rate": 8.231800072437521e-05,
"loss": 0.6402,
"step": 10120
},
{
"epoch": 2.19,
"learning_rate": 8.188337558855486e-05,
"loss": 0.6409,
"step": 10140
},
{
"epoch": 2.19,
"learning_rate": 8.144875045273451e-05,
"loss": 0.6512,
"step": 10160
},
{
"epoch": 2.2,
"learning_rate": 8.101412531691415e-05,
"loss": 0.6498,
"step": 10180
},
{
"epoch": 2.2,
"learning_rate": 8.05795001810938e-05,
"loss": 0.6393,
"step": 10200
},
{
"epoch": 2.2,
"eval_loss": 0.6726437211036682,
"eval_runtime": 50.1492,
"eval_samples_per_second": 39.881,
"eval_steps_per_second": 0.638,
"step": 10200
},
{
"epoch": 2.2,
"learning_rate": 8.014487504527345e-05,
"loss": 0.6458,
"step": 10220
},
{
"epoch": 2.21,
"learning_rate": 7.971024990945308e-05,
"loss": 0.6466,
"step": 10240
},
{
"epoch": 2.21,
"learning_rate": 7.927562477363273e-05,
"loss": 0.644,
"step": 10260
},
{
"epoch": 2.22,
"learning_rate": 7.884099963781238e-05,
"loss": 0.6467,
"step": 10280
},
{
"epoch": 2.22,
"learning_rate": 7.840637450199202e-05,
"loss": 0.6436,
"step": 10300
},
{
"epoch": 2.23,
"learning_rate": 7.797174936617167e-05,
"loss": 0.6422,
"step": 10320
},
{
"epoch": 2.23,
"learning_rate": 7.753712423035132e-05,
"loss": 0.645,
"step": 10340
},
{
"epoch": 2.24,
"learning_rate": 7.710249909453096e-05,
"loss": 0.6423,
"step": 10360
},
{
"epoch": 2.24,
"learning_rate": 7.666787395871061e-05,
"loss": 0.6557,
"step": 10380
},
{
"epoch": 2.24,
"learning_rate": 7.623324882289026e-05,
"loss": 0.646,
"step": 10400
},
{
"epoch": 2.24,
"eval_loss": 0.6725419759750366,
"eval_runtime": 50.1975,
"eval_samples_per_second": 39.843,
"eval_steps_per_second": 0.637,
"step": 10400
},
{
"epoch": 2.25,
"learning_rate": 7.57986236870699e-05,
"loss": 0.6503,
"step": 10420
},
{
"epoch": 2.25,
"learning_rate": 7.536399855124954e-05,
"loss": 0.6428,
"step": 10440
},
{
"epoch": 2.26,
"learning_rate": 7.49293734154292e-05,
"loss": 0.6438,
"step": 10460
},
{
"epoch": 2.26,
"learning_rate": 7.449474827960883e-05,
"loss": 0.6427,
"step": 10480
},
{
"epoch": 2.27,
"learning_rate": 7.406012314378847e-05,
"loss": 0.6458,
"step": 10500
},
{
"epoch": 2.27,
"learning_rate": 7.362549800796812e-05,
"loss": 0.6423,
"step": 10520
},
{
"epoch": 2.27,
"learning_rate": 7.319087287214777e-05,
"loss": 0.6466,
"step": 10540
},
{
"epoch": 2.28,
"learning_rate": 7.27562477363274e-05,
"loss": 0.6394,
"step": 10560
},
{
"epoch": 2.28,
"learning_rate": 7.232162260050705e-05,
"loss": 0.6362,
"step": 10580
},
{
"epoch": 2.29,
"learning_rate": 7.18869974646867e-05,
"loss": 0.6399,
"step": 10600
},
{
"epoch": 2.29,
"eval_loss": 0.6719211935997009,
"eval_runtime": 50.1808,
"eval_samples_per_second": 39.856,
"eval_steps_per_second": 0.638,
"step": 10600
},
{
"epoch": 2.29,
"learning_rate": 7.145237232886634e-05,
"loss": 0.6378,
"step": 10620
},
{
"epoch": 2.3,
"learning_rate": 7.101774719304599e-05,
"loss": 0.634,
"step": 10640
},
{
"epoch": 2.3,
"learning_rate": 7.058312205722564e-05,
"loss": 0.6374,
"step": 10660
},
{
"epoch": 2.3,
"learning_rate": 7.014849692140528e-05,
"loss": 0.6464,
"step": 10680
},
{
"epoch": 2.31,
"learning_rate": 6.971387178558493e-05,
"loss": 0.643,
"step": 10700
},
{
"epoch": 2.31,
"learning_rate": 6.927924664976458e-05,
"loss": 0.6384,
"step": 10720
},
{
"epoch": 2.32,
"learning_rate": 6.884462151394421e-05,
"loss": 0.6451,
"step": 10740
},
{
"epoch": 2.32,
"learning_rate": 6.840999637812386e-05,
"loss": 0.6465,
"step": 10760
},
{
"epoch": 2.33,
"learning_rate": 6.799710249909452e-05,
"loss": 0.646,
"step": 10780
},
{
"epoch": 2.33,
"learning_rate": 6.756247736327417e-05,
"loss": 0.6525,
"step": 10800
},
{
"epoch": 2.33,
"eval_loss": 0.6714358925819397,
"eval_runtime": 50.1294,
"eval_samples_per_second": 39.897,
"eval_steps_per_second": 0.638,
"step": 10800
},
{
"epoch": 2.33,
"learning_rate": 6.712785222745382e-05,
"loss": 0.6423,
"step": 10820
},
{
"epoch": 2.34,
"learning_rate": 6.669322709163345e-05,
"loss": 0.6449,
"step": 10840
},
{
"epoch": 2.34,
"learning_rate": 6.62586019558131e-05,
"loss": 0.6325,
"step": 10860
},
{
"epoch": 2.35,
"learning_rate": 6.582397681999275e-05,
"loss": 0.6558,
"step": 10880
},
{
"epoch": 2.35,
"learning_rate": 6.538935168417239e-05,
"loss": 0.6419,
"step": 10900
},
{
"epoch": 2.36,
"learning_rate": 6.495472654835204e-05,
"loss": 0.6466,
"step": 10920
},
{
"epoch": 2.36,
"learning_rate": 6.452010141253169e-05,
"loss": 0.6357,
"step": 10940
},
{
"epoch": 2.36,
"learning_rate": 6.408547627671133e-05,
"loss": 0.6366,
"step": 10960
},
{
"epoch": 2.37,
"learning_rate": 6.365085114089098e-05,
"loss": 0.6466,
"step": 10980
},
{
"epoch": 2.37,
"learning_rate": 6.321622600507063e-05,
"loss": 0.6542,
"step": 11000
},
{
"epoch": 2.37,
"eval_loss": 0.6710445880889893,
"eval_runtime": 50.2479,
"eval_samples_per_second": 39.803,
"eval_steps_per_second": 0.637,
"step": 11000
},
{
"epoch": 2.38,
"learning_rate": 6.278160086925026e-05,
"loss": 0.6481,
"step": 11020
},
{
"epoch": 2.38,
"learning_rate": 6.23469757334299e-05,
"loss": 0.6425,
"step": 11040
},
{
"epoch": 2.39,
"learning_rate": 6.191235059760955e-05,
"loss": 0.6439,
"step": 11060
},
{
"epoch": 2.39,
"learning_rate": 6.14777254617892e-05,
"loss": 0.6424,
"step": 11080
},
{
"epoch": 2.39,
"learning_rate": 6.104310032596884e-05,
"loss": 0.6404,
"step": 11100
},
{
"epoch": 2.4,
"learning_rate": 6.060847519014849e-05,
"loss": 0.6387,
"step": 11120
},
{
"epoch": 2.4,
"learning_rate": 6.017385005432814e-05,
"loss": 0.6462,
"step": 11140
},
{
"epoch": 2.41,
"learning_rate": 5.973922491850778e-05,
"loss": 0.6431,
"step": 11160
},
{
"epoch": 2.41,
"learning_rate": 5.9304599782687424e-05,
"loss": 0.638,
"step": 11180
},
{
"epoch": 2.42,
"learning_rate": 5.8869974646867074e-05,
"loss": 0.6344,
"step": 11200
},
{
"epoch": 2.42,
"eval_loss": 0.6704220771789551,
"eval_runtime": 50.1558,
"eval_samples_per_second": 39.876,
"eval_steps_per_second": 0.638,
"step": 11200
},
{
"epoch": 2.42,
"learning_rate": 5.843534951104672e-05,
"loss": 0.6448,
"step": 11220
},
{
"epoch": 2.43,
"learning_rate": 5.800072437522636e-05,
"loss": 0.6449,
"step": 11240
},
{
"epoch": 2.43,
"learning_rate": 5.756609923940601e-05,
"loss": 0.6399,
"step": 11260
},
{
"epoch": 2.43,
"learning_rate": 5.7131474103585654e-05,
"loss": 0.638,
"step": 11280
},
{
"epoch": 2.44,
"learning_rate": 5.66968489677653e-05,
"loss": 0.6418,
"step": 11300
},
{
"epoch": 2.44,
"learning_rate": 5.626222383194495e-05,
"loss": 0.6482,
"step": 11320
},
{
"epoch": 2.45,
"learning_rate": 5.582759869612459e-05,
"loss": 0.6392,
"step": 11340
},
{
"epoch": 2.45,
"learning_rate": 5.5392973560304233e-05,
"loss": 0.6363,
"step": 11360
},
{
"epoch": 2.46,
"learning_rate": 5.4958348424483883e-05,
"loss": 0.6503,
"step": 11380
},
{
"epoch": 2.46,
"learning_rate": 5.452372328866353e-05,
"loss": 0.6453,
"step": 11400
},
{
"epoch": 2.46,
"eval_loss": 0.670009195804596,
"eval_runtime": 50.155,
"eval_samples_per_second": 39.876,
"eval_steps_per_second": 0.638,
"step": 11400
},
{
"epoch": 2.46,
"learning_rate": 5.408909815284317e-05,
"loss": 0.6384,
"step": 11420
},
{
"epoch": 2.47,
"learning_rate": 5.365447301702282e-05,
"loss": 0.6449,
"step": 11440
},
{
"epoch": 2.47,
"learning_rate": 5.3219847881202456e-05,
"loss": 0.6406,
"step": 11460
},
{
"epoch": 2.48,
"learning_rate": 5.27852227453821e-05,
"loss": 0.6363,
"step": 11480
},
{
"epoch": 2.48,
"learning_rate": 5.235059760956174e-05,
"loss": 0.6482,
"step": 11500
},
{
"epoch": 2.49,
"learning_rate": 5.191597247374139e-05,
"loss": 0.6503,
"step": 11520
},
{
"epoch": 2.49,
"learning_rate": 5.1481347337921036e-05,
"loss": 0.6479,
"step": 11540
},
{
"epoch": 2.49,
"learning_rate": 5.10684534588917e-05,
"loss": 0.6437,
"step": 11560
},
{
"epoch": 2.5,
"learning_rate": 5.063382832307134e-05,
"loss": 0.6398,
"step": 11580
},
{
"epoch": 2.5,
"learning_rate": 5.0199203187250985e-05,
"loss": 0.6456,
"step": 11600
},
{
"epoch": 2.5,
"eval_loss": 0.6702134013175964,
"eval_runtime": 50.1834,
"eval_samples_per_second": 39.854,
"eval_steps_per_second": 0.638,
"step": 11600
},
{
"epoch": 2.51,
"learning_rate": 4.9764578051430635e-05,
"loss": 0.646,
"step": 11620
},
{
"epoch": 2.51,
"learning_rate": 4.932995291561028e-05,
"loss": 0.6375,
"step": 11640
},
{
"epoch": 2.52,
"learning_rate": 4.889532777978992e-05,
"loss": 0.6393,
"step": 11660
},
{
"epoch": 2.52,
"learning_rate": 4.846070264396957e-05,
"loss": 0.638,
"step": 11680
},
{
"epoch": 2.52,
"learning_rate": 4.8026077508149215e-05,
"loss": 0.6411,
"step": 11700
},
{
"epoch": 2.53,
"learning_rate": 4.759145237232886e-05,
"loss": 0.6467,
"step": 11720
},
{
"epoch": 2.53,
"learning_rate": 4.715682723650851e-05,
"loss": 0.6369,
"step": 11740
},
{
"epoch": 2.54,
"learning_rate": 4.672220210068815e-05,
"loss": 0.637,
"step": 11760
},
{
"epoch": 2.54,
"learning_rate": 4.6287576964867795e-05,
"loss": 0.6486,
"step": 11780
},
{
"epoch": 2.55,
"learning_rate": 4.5852951829047445e-05,
"loss": 0.637,
"step": 11800
},
{
"epoch": 2.55,
"eval_loss": 0.6698750257492065,
"eval_runtime": 50.1539,
"eval_samples_per_second": 39.877,
"eval_steps_per_second": 0.638,
"step": 11800
},
{
"epoch": 2.55,
"learning_rate": 4.541832669322709e-05,
"loss": 0.639,
"step": 11820
},
{
"epoch": 2.55,
"learning_rate": 4.498370155740673e-05,
"loss": 0.6366,
"step": 11840
},
{
"epoch": 2.56,
"learning_rate": 4.454907642158638e-05,
"loss": 0.6409,
"step": 11860
},
{
"epoch": 2.56,
"learning_rate": 4.4114451285766025e-05,
"loss": 0.6394,
"step": 11880
},
{
"epoch": 2.57,
"learning_rate": 4.367982614994567e-05,
"loss": 0.6351,
"step": 11900
},
{
"epoch": 2.57,
"learning_rate": 4.324520101412532e-05,
"loss": 0.6391,
"step": 11920
},
{
"epoch": 2.58,
"learning_rate": 4.281057587830496e-05,
"loss": 0.6267,
"step": 11940
},
{
"epoch": 2.58,
"learning_rate": 4.2375950742484604e-05,
"loss": 0.6461,
"step": 11960
},
{
"epoch": 2.58,
"learning_rate": 4.194132560666425e-05,
"loss": 0.6483,
"step": 11980
},
{
"epoch": 2.59,
"learning_rate": 4.150670047084389e-05,
"loss": 0.6461,
"step": 12000
},
{
"epoch": 2.59,
"eval_loss": 0.6692882180213928,
"eval_runtime": 50.1673,
"eval_samples_per_second": 39.867,
"eval_steps_per_second": 0.638,
"step": 12000
},
{
"epoch": 2.59,
"learning_rate": 4.1072075335023534e-05,
"loss": 0.6429,
"step": 12020
},
{
"epoch": 2.6,
"learning_rate": 4.0637450199203184e-05,
"loss": 0.6416,
"step": 12040
},
{
"epoch": 2.6,
"learning_rate": 4.020282506338283e-05,
"loss": 0.6356,
"step": 12060
},
{
"epoch": 2.61,
"learning_rate": 3.976819992756247e-05,
"loss": 0.6402,
"step": 12080
},
{
"epoch": 2.61,
"learning_rate": 3.933357479174212e-05,
"loss": 0.6395,
"step": 12100
},
{
"epoch": 2.61,
"learning_rate": 3.8898949655921764e-05,
"loss": 0.6432,
"step": 12120
},
{
"epoch": 2.62,
"learning_rate": 3.846432452010141e-05,
"loss": 0.6386,
"step": 12140
},
{
"epoch": 2.62,
"learning_rate": 3.802969938428106e-05,
"loss": 0.6396,
"step": 12160
},
{
"epoch": 2.63,
"learning_rate": 3.75950742484607e-05,
"loss": 0.6423,
"step": 12180
},
{
"epoch": 2.63,
"learning_rate": 3.7160449112640344e-05,
"loss": 0.649,
"step": 12200
},
{
"epoch": 2.63,
"eval_loss": 0.6691960096359253,
"eval_runtime": 50.1649,
"eval_samples_per_second": 39.869,
"eval_steps_per_second": 0.638,
"step": 12200
},
{
"epoch": 2.64,
"learning_rate": 3.672582397681999e-05,
"loss": 0.6547,
"step": 12220
},
{
"epoch": 2.64,
"learning_rate": 3.629119884099964e-05,
"loss": 0.642,
"step": 12240
},
{
"epoch": 2.65,
"learning_rate": 3.585657370517928e-05,
"loss": 0.634,
"step": 12260
},
{
"epoch": 2.65,
"learning_rate": 3.542194856935892e-05,
"loss": 0.6447,
"step": 12280
},
{
"epoch": 2.65,
"learning_rate": 3.498732343353857e-05,
"loss": 0.6285,
"step": 12300
},
{
"epoch": 2.66,
"learning_rate": 3.455269829771822e-05,
"loss": 0.6436,
"step": 12320
},
{
"epoch": 2.66,
"learning_rate": 3.411807316189786e-05,
"loss": 0.6349,
"step": 12340
},
{
"epoch": 2.67,
"learning_rate": 3.36834480260775e-05,
"loss": 0.6425,
"step": 12360
},
{
"epoch": 2.67,
"learning_rate": 3.324882289025715e-05,
"loss": 0.6393,
"step": 12380
},
{
"epoch": 2.68,
"learning_rate": 3.2814197754436796e-05,
"loss": 0.6367,
"step": 12400
},
{
"epoch": 2.68,
"eval_loss": 0.6687243580818176,
"eval_runtime": 50.3508,
"eval_samples_per_second": 39.721,
"eval_steps_per_second": 0.636,
"step": 12400
},
{
"epoch": 2.68,
"learning_rate": 3.237957261861644e-05,
"loss": 0.6386,
"step": 12420
},
{
"epoch": 2.68,
"learning_rate": 3.194494748279609e-05,
"loss": 0.6526,
"step": 12440
},
{
"epoch": 2.69,
"learning_rate": 3.151032234697573e-05,
"loss": 0.6357,
"step": 12460
},
{
"epoch": 2.69,
"learning_rate": 3.1075697211155376e-05,
"loss": 0.6353,
"step": 12480
},
{
"epoch": 2.7,
"learning_rate": 3.0641072075335026e-05,
"loss": 0.6449,
"step": 12500
},
{
"epoch": 2.7,
"learning_rate": 3.0206446939514663e-05,
"loss": 0.6425,
"step": 12520
},
{
"epoch": 2.71,
"learning_rate": 2.977182180369431e-05,
"loss": 0.6374,
"step": 12540
},
{
"epoch": 2.71,
"learning_rate": 2.9337196667873956e-05,
"loss": 0.6324,
"step": 12560
},
{
"epoch": 2.71,
"learning_rate": 2.89025715320536e-05,
"loss": 0.6502,
"step": 12580
},
{
"epoch": 2.72,
"learning_rate": 2.8467946396233246e-05,
"loss": 0.637,
"step": 12600
},
{
"epoch": 2.72,
"eval_loss": 0.6683821082115173,
"eval_runtime": 50.2054,
"eval_samples_per_second": 39.836,
"eval_steps_per_second": 0.637,
"step": 12600
},
{
"epoch": 2.72,
"learning_rate": 2.8033321260412892e-05,
"loss": 0.647,
"step": 12620
},
{
"epoch": 2.73,
"learning_rate": 2.7598696124592536e-05,
"loss": 0.632,
"step": 12640
},
{
"epoch": 2.73,
"learning_rate": 2.7164070988772182e-05,
"loss": 0.6411,
"step": 12660
},
{
"epoch": 2.74,
"learning_rate": 2.672944585295183e-05,
"loss": 0.632,
"step": 12680
},
{
"epoch": 2.74,
"learning_rate": 2.6294820717131472e-05,
"loss": 0.6389,
"step": 12700
},
{
"epoch": 2.74,
"learning_rate": 2.586019558131112e-05,
"loss": 0.6337,
"step": 12720
},
{
"epoch": 2.75,
"learning_rate": 2.542557044549076e-05,
"loss": 0.6439,
"step": 12740
},
{
"epoch": 2.75,
"learning_rate": 2.4990945309670405e-05,
"loss": 0.6364,
"step": 12760
},
{
"epoch": 2.76,
"learning_rate": 2.4556320173850052e-05,
"loss": 0.6402,
"step": 12780
},
{
"epoch": 2.76,
"learning_rate": 2.4121695038029695e-05,
"loss": 0.6376,
"step": 12800
},
{
"epoch": 2.76,
"eval_loss": 0.6680713295936584,
"eval_runtime": 50.1757,
"eval_samples_per_second": 39.86,
"eval_steps_per_second": 0.638,
"step": 12800
},
{
"epoch": 2.77,
"learning_rate": 2.3687069902209342e-05,
"loss": 0.6316,
"step": 12820
},
{
"epoch": 2.77,
"learning_rate": 2.325244476638899e-05,
"loss": 0.6393,
"step": 12840
},
{
"epoch": 2.77,
"learning_rate": 2.281781963056863e-05,
"loss": 0.6372,
"step": 12860
},
{
"epoch": 2.78,
"learning_rate": 2.2383194494748278e-05,
"loss": 0.6466,
"step": 12880
},
{
"epoch": 2.78,
"learning_rate": 2.1948569358927925e-05,
"loss": 0.6392,
"step": 12900
},
{
"epoch": 2.79,
"learning_rate": 2.1513944223107568e-05,
"loss": 0.6389,
"step": 12920
},
{
"epoch": 2.79,
"learning_rate": 2.107931908728721e-05,
"loss": 0.64,
"step": 12940
},
{
"epoch": 2.8,
"learning_rate": 2.0644693951466858e-05,
"loss": 0.6362,
"step": 12960
},
{
"epoch": 2.8,
"learning_rate": 2.02100688156465e-05,
"loss": 0.6364,
"step": 12980
},
{
"epoch": 2.8,
"learning_rate": 1.9775443679826148e-05,
"loss": 0.6372,
"step": 13000
},
{
"epoch": 2.8,
"eval_loss": 0.6680414080619812,
"eval_runtime": 50.2211,
"eval_samples_per_second": 39.824,
"eval_steps_per_second": 0.637,
"step": 13000
},
{
"epoch": 2.81,
"learning_rate": 1.9340818544005794e-05,
"loss": 0.6336,
"step": 13020
},
{
"epoch": 2.81,
"learning_rate": 1.8906193408185438e-05,
"loss": 0.6348,
"step": 13040
},
{
"epoch": 2.82,
"learning_rate": 1.8471568272365084e-05,
"loss": 0.6338,
"step": 13060
},
{
"epoch": 2.82,
"learning_rate": 1.8036943136544728e-05,
"loss": 0.6396,
"step": 13080
},
{
"epoch": 2.83,
"learning_rate": 1.7602318000724374e-05,
"loss": 0.641,
"step": 13100
},
{
"epoch": 2.83,
"learning_rate": 1.7167692864904017e-05,
"loss": 0.6369,
"step": 13120
},
{
"epoch": 2.83,
"learning_rate": 1.6733067729083664e-05,
"loss": 0.6345,
"step": 13140
},
{
"epoch": 2.84,
"learning_rate": 1.629844259326331e-05,
"loss": 0.649,
"step": 13160
},
{
"epoch": 2.84,
"learning_rate": 1.5863817457442954e-05,
"loss": 0.6409,
"step": 13180
},
{
"epoch": 2.85,
"learning_rate": 1.54291923216226e-05,
"loss": 0.63,
"step": 13200
},
{
"epoch": 2.85,
"eval_loss": 0.6678950190544128,
"eval_runtime": 50.1908,
"eval_samples_per_second": 39.848,
"eval_steps_per_second": 0.638,
"step": 13200
},
{
"epoch": 2.85,
"learning_rate": 1.4994567185802244e-05,
"loss": 0.6428,
"step": 13220
},
{
"epoch": 2.86,
"learning_rate": 1.4559942049981889e-05,
"loss": 0.645,
"step": 13240
},
{
"epoch": 2.86,
"learning_rate": 1.4125316914161534e-05,
"loss": 0.6434,
"step": 13260
},
{
"epoch": 2.87,
"learning_rate": 1.369069177834118e-05,
"loss": 0.6462,
"step": 13280
},
{
"epoch": 2.87,
"learning_rate": 1.3256066642520825e-05,
"loss": 0.6387,
"step": 13300
},
{
"epoch": 2.87,
"learning_rate": 1.2821441506700468e-05,
"loss": 0.6311,
"step": 13320
},
{
"epoch": 2.88,
"learning_rate": 1.2386816370880113e-05,
"loss": 0.6446,
"step": 13340
},
{
"epoch": 2.88,
"learning_rate": 1.195219123505976e-05,
"loss": 0.6426,
"step": 13360
},
{
"epoch": 2.89,
"learning_rate": 1.1517566099239405e-05,
"loss": 0.6369,
"step": 13380
},
{
"epoch": 2.89,
"learning_rate": 1.108294096341905e-05,
"loss": 0.6467,
"step": 13400
},
{
"epoch": 2.89,
"eval_loss": 0.6676326990127563,
"eval_runtime": 50.1589,
"eval_samples_per_second": 39.873,
"eval_steps_per_second": 0.638,
"step": 13400
}
],
"max_steps": 13905,
"num_train_epochs": 3,
"total_flos": 1.7033837289458893e+20,
"trial_name": null,
"trial_params": null
}