whisper-tiny-fr / trainer_state.json
JaepaX's picture
whisper-tiny-french-best
4cba801 verified
{
"best_metric": 37.65067359962184,
"best_model_checkpoint": "./whisper-tiny-fr-micro-train/checkpoint-8222",
"epoch": 0.08564583333333334,
"eval_steps": 4111,
"global_step": 8222,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"grad_norm": 19.87736701965332,
"learning_rate": 5e-09,
"loss": 0.9486,
"step": 25
},
{
"epoch": 0.0,
"grad_norm": 18.398113250732422,
"learning_rate": 1e-08,
"loss": 0.9083,
"step": 50
},
{
"epoch": 0.0,
"grad_norm": 16.93355941772461,
"learning_rate": 1.5e-08,
"loss": 0.8204,
"step": 75
},
{
"epoch": 0.0,
"grad_norm": 11.64875602722168,
"learning_rate": 2e-08,
"loss": 0.7006,
"step": 100
},
{
"epoch": 0.0,
"grad_norm": 8.734498977661133,
"learning_rate": 2.5e-08,
"loss": 0.648,
"step": 125
},
{
"epoch": 0.0,
"grad_norm": 7.067263603210449,
"learning_rate": 3e-08,
"loss": 0.537,
"step": 150
},
{
"epoch": 0.0,
"grad_norm": 7.97986364364624,
"learning_rate": 3.4999999999999996e-08,
"loss": 0.5752,
"step": 175
},
{
"epoch": 0.0,
"grad_norm": 7.728494167327881,
"learning_rate": 4e-08,
"loss": 0.5863,
"step": 200
},
{
"epoch": 0.0,
"grad_norm": 9.38297176361084,
"learning_rate": 4.5e-08,
"loss": 0.5172,
"step": 225
},
{
"epoch": 0.0,
"grad_norm": 7.568984508514404,
"learning_rate": 5e-08,
"loss": 0.4682,
"step": 250
},
{
"epoch": 0.0,
"grad_norm": 8.042702674865723,
"learning_rate": 5.5e-08,
"loss": 0.4214,
"step": 275
},
{
"epoch": 0.0,
"grad_norm": 7.661986827850342,
"learning_rate": 6e-08,
"loss": 0.4431,
"step": 300
},
{
"epoch": 0.0,
"grad_norm": 9.154074668884277,
"learning_rate": 6.5e-08,
"loss": 0.4056,
"step": 325
},
{
"epoch": 0.0,
"grad_norm": 8.346107482910156,
"learning_rate": 6.999999999999999e-08,
"loss": 0.4079,
"step": 350
},
{
"epoch": 0.0,
"grad_norm": 6.246629238128662,
"learning_rate": 7.5e-08,
"loss": 0.3897,
"step": 375
},
{
"epoch": 0.0,
"grad_norm": 7.129103183746338,
"learning_rate": 8e-08,
"loss": 0.3536,
"step": 400
},
{
"epoch": 0.0,
"grad_norm": 6.834921836853027,
"learning_rate": 8.5e-08,
"loss": 0.3401,
"step": 425
},
{
"epoch": 0.0,
"grad_norm": 8.863313674926758,
"learning_rate": 9e-08,
"loss": 0.3627,
"step": 450
},
{
"epoch": 0.0,
"grad_norm": 7.284473896026611,
"learning_rate": 9.499999999999999e-08,
"loss": 0.356,
"step": 475
},
{
"epoch": 0.01,
"grad_norm": 5.816940784454346,
"learning_rate": 1e-07,
"loss": 0.3539,
"step": 500
},
{
"epoch": 0.01,
"grad_norm": 8.022565841674805,
"learning_rate": 9.997382198952879e-08,
"loss": 0.3577,
"step": 525
},
{
"epoch": 0.01,
"grad_norm": 7.51448917388916,
"learning_rate": 9.994764397905758e-08,
"loss": 0.3513,
"step": 550
},
{
"epoch": 0.01,
"grad_norm": 7.016752243041992,
"learning_rate": 9.992146596858639e-08,
"loss": 0.3687,
"step": 575
},
{
"epoch": 0.01,
"grad_norm": 6.761058330535889,
"learning_rate": 9.989528795811518e-08,
"loss": 0.3495,
"step": 600
},
{
"epoch": 0.01,
"grad_norm": 6.693453311920166,
"learning_rate": 9.986910994764397e-08,
"loss": 0.3325,
"step": 625
},
{
"epoch": 0.01,
"grad_norm": 6.049990653991699,
"learning_rate": 9.984293193717277e-08,
"loss": 0.3609,
"step": 650
},
{
"epoch": 0.01,
"grad_norm": 6.6787109375,
"learning_rate": 9.981675392670157e-08,
"loss": 0.3317,
"step": 675
},
{
"epoch": 0.01,
"grad_norm": 6.8440937995910645,
"learning_rate": 9.979057591623035e-08,
"loss": 0.332,
"step": 700
},
{
"epoch": 0.01,
"grad_norm": 6.892059326171875,
"learning_rate": 9.976439790575916e-08,
"loss": 0.3379,
"step": 725
},
{
"epoch": 0.01,
"grad_norm": 8.142931938171387,
"learning_rate": 9.973821989528795e-08,
"loss": 0.3308,
"step": 750
},
{
"epoch": 0.01,
"grad_norm": 7.152769565582275,
"learning_rate": 9.971204188481675e-08,
"loss": 0.3352,
"step": 775
},
{
"epoch": 0.01,
"grad_norm": 6.680529594421387,
"learning_rate": 9.968586387434554e-08,
"loss": 0.3343,
"step": 800
},
{
"epoch": 0.01,
"grad_norm": 6.2912445068359375,
"learning_rate": 9.965968586387435e-08,
"loss": 0.3233,
"step": 825
},
{
"epoch": 0.01,
"grad_norm": 6.337522983551025,
"learning_rate": 9.963350785340313e-08,
"loss": 0.3147,
"step": 850
},
{
"epoch": 0.01,
"grad_norm": 7.505101203918457,
"learning_rate": 9.960732984293193e-08,
"loss": 0.3384,
"step": 875
},
{
"epoch": 0.01,
"grad_norm": 6.080435276031494,
"learning_rate": 9.958115183246073e-08,
"loss": 0.3397,
"step": 900
},
{
"epoch": 0.01,
"grad_norm": 7.185611248016357,
"learning_rate": 9.955497382198953e-08,
"loss": 0.3448,
"step": 925
},
{
"epoch": 0.01,
"grad_norm": 6.759621620178223,
"learning_rate": 9.952879581151831e-08,
"loss": 0.3193,
"step": 950
},
{
"epoch": 0.01,
"grad_norm": 5.393013000488281,
"learning_rate": 9.950261780104712e-08,
"loss": 0.3134,
"step": 975
},
{
"epoch": 0.01,
"grad_norm": 6.275601387023926,
"learning_rate": 9.947643979057591e-08,
"loss": 0.3227,
"step": 1000
},
{
"epoch": 0.01,
"grad_norm": 8.202522277832031,
"learning_rate": 9.94502617801047e-08,
"loss": 0.3366,
"step": 1025
},
{
"epoch": 0.01,
"grad_norm": 6.580430507659912,
"learning_rate": 9.94240837696335e-08,
"loss": 0.4026,
"step": 1050
},
{
"epoch": 0.01,
"grad_norm": 8.456107139587402,
"learning_rate": 9.93979057591623e-08,
"loss": 0.3868,
"step": 1075
},
{
"epoch": 0.01,
"grad_norm": 8.176070213317871,
"learning_rate": 9.937172774869109e-08,
"loss": 0.4297,
"step": 1100
},
{
"epoch": 0.01,
"grad_norm": 9.07604694366455,
"learning_rate": 9.934554973821989e-08,
"loss": 0.4861,
"step": 1125
},
{
"epoch": 0.01,
"grad_norm": 8.36545181274414,
"learning_rate": 9.931937172774869e-08,
"loss": 0.4939,
"step": 1150
},
{
"epoch": 0.01,
"grad_norm": 9.944046974182129,
"learning_rate": 9.929319371727748e-08,
"loss": 0.5198,
"step": 1175
},
{
"epoch": 0.01,
"grad_norm": 12.03496265411377,
"learning_rate": 9.926701570680629e-08,
"loss": 0.5353,
"step": 1200
},
{
"epoch": 0.01,
"grad_norm": 14.10308837890625,
"learning_rate": 9.924083769633508e-08,
"loss": 0.5005,
"step": 1225
},
{
"epoch": 0.01,
"grad_norm": 12.214973449707031,
"learning_rate": 9.921465968586387e-08,
"loss": 0.5879,
"step": 1250
},
{
"epoch": 0.01,
"grad_norm": 11.323634147644043,
"learning_rate": 9.918848167539266e-08,
"loss": 0.5031,
"step": 1275
},
{
"epoch": 0.01,
"grad_norm": 10.742391586303711,
"learning_rate": 9.916230366492147e-08,
"loss": 0.5495,
"step": 1300
},
{
"epoch": 0.01,
"grad_norm": 14.457928657531738,
"learning_rate": 9.913612565445025e-08,
"loss": 0.5263,
"step": 1325
},
{
"epoch": 0.01,
"grad_norm": 11.978686332702637,
"learning_rate": 9.910994764397906e-08,
"loss": 0.5477,
"step": 1350
},
{
"epoch": 0.01,
"grad_norm": 11.699676513671875,
"learning_rate": 9.908376963350785e-08,
"loss": 0.5293,
"step": 1375
},
{
"epoch": 0.01,
"grad_norm": 11.737068176269531,
"learning_rate": 9.905759162303664e-08,
"loss": 0.5622,
"step": 1400
},
{
"epoch": 0.01,
"grad_norm": 10.408597946166992,
"learning_rate": 9.903141361256544e-08,
"loss": 0.5639,
"step": 1425
},
{
"epoch": 0.02,
"grad_norm": 11.709553718566895,
"learning_rate": 9.900523560209424e-08,
"loss": 0.5421,
"step": 1450
},
{
"epoch": 0.02,
"grad_norm": 10.107832908630371,
"learning_rate": 9.897905759162302e-08,
"loss": 0.5291,
"step": 1475
},
{
"epoch": 0.02,
"grad_norm": 11.955233573913574,
"learning_rate": 9.895287958115183e-08,
"loss": 0.5508,
"step": 1500
},
{
"epoch": 0.02,
"grad_norm": 12.00100040435791,
"learning_rate": 9.892670157068062e-08,
"loss": 0.5271,
"step": 1525
},
{
"epoch": 0.02,
"grad_norm": 11.11552619934082,
"learning_rate": 9.890052356020942e-08,
"loss": 0.5338,
"step": 1550
},
{
"epoch": 0.02,
"grad_norm": 8.751993179321289,
"learning_rate": 9.887434554973821e-08,
"loss": 0.5178,
"step": 1575
},
{
"epoch": 0.02,
"grad_norm": 10.523124694824219,
"learning_rate": 9.884816753926702e-08,
"loss": 0.5574,
"step": 1600
},
{
"epoch": 0.02,
"grad_norm": 14.987282752990723,
"learning_rate": 9.88219895287958e-08,
"loss": 0.5406,
"step": 1625
},
{
"epoch": 0.02,
"grad_norm": 12.370256423950195,
"learning_rate": 9.87958115183246e-08,
"loss": 0.4805,
"step": 1650
},
{
"epoch": 0.02,
"grad_norm": 9.747725486755371,
"learning_rate": 9.87696335078534e-08,
"loss": 0.5259,
"step": 1675
},
{
"epoch": 0.02,
"grad_norm": 12.413991928100586,
"learning_rate": 9.87434554973822e-08,
"loss": 0.5225,
"step": 1700
},
{
"epoch": 0.02,
"grad_norm": 11.440505981445312,
"learning_rate": 9.871727748691098e-08,
"loss": 0.5511,
"step": 1725
},
{
"epoch": 0.02,
"grad_norm": 11.07944107055664,
"learning_rate": 9.869109947643979e-08,
"loss": 0.4913,
"step": 1750
},
{
"epoch": 0.02,
"grad_norm": 12.481764793395996,
"learning_rate": 9.866492146596858e-08,
"loss": 0.5618,
"step": 1775
},
{
"epoch": 0.02,
"grad_norm": 10.33045768737793,
"learning_rate": 9.863874345549738e-08,
"loss": 0.5099,
"step": 1800
},
{
"epoch": 0.02,
"grad_norm": 11.342964172363281,
"learning_rate": 9.861256544502617e-08,
"loss": 0.5454,
"step": 1825
},
{
"epoch": 0.02,
"grad_norm": 10.811851501464844,
"learning_rate": 9.858638743455498e-08,
"loss": 0.5118,
"step": 1850
},
{
"epoch": 0.02,
"grad_norm": 12.243831634521484,
"learning_rate": 9.856020942408377e-08,
"loss": 0.5191,
"step": 1875
},
{
"epoch": 0.02,
"grad_norm": 10.51577377319336,
"learning_rate": 9.853403141361256e-08,
"loss": 0.4916,
"step": 1900
},
{
"epoch": 0.02,
"grad_norm": 9.325318336486816,
"learning_rate": 9.850785340314135e-08,
"loss": 0.5178,
"step": 1925
},
{
"epoch": 0.02,
"grad_norm": 9.348186492919922,
"learning_rate": 9.848167539267015e-08,
"loss": 0.5066,
"step": 1950
},
{
"epoch": 0.02,
"grad_norm": 11.930258750915527,
"learning_rate": 9.845549738219895e-08,
"loss": 0.4747,
"step": 1975
},
{
"epoch": 0.02,
"grad_norm": 11.170626640319824,
"learning_rate": 9.842931937172775e-08,
"loss": 0.5285,
"step": 2000
},
{
"epoch": 0.02,
"grad_norm": 10.741945266723633,
"learning_rate": 9.840314136125654e-08,
"loss": 0.5686,
"step": 2025
},
{
"epoch": 0.02,
"grad_norm": 10.902135848999023,
"learning_rate": 9.837696335078533e-08,
"loss": 0.5909,
"step": 2050
},
{
"epoch": 0.02,
"grad_norm": 8.929906845092773,
"learning_rate": 9.835078534031414e-08,
"loss": 0.578,
"step": 2075
},
{
"epoch": 0.02,
"grad_norm": 11.585110664367676,
"learning_rate": 9.832460732984292e-08,
"loss": 0.8891,
"step": 2100
},
{
"epoch": 0.02,
"grad_norm": 11.564123153686523,
"learning_rate": 9.829842931937173e-08,
"loss": 1.0214,
"step": 2125
},
{
"epoch": 0.02,
"grad_norm": 10.137656211853027,
"learning_rate": 9.827225130890052e-08,
"loss": 0.7967,
"step": 2150
},
{
"epoch": 0.02,
"grad_norm": 9.988815307617188,
"learning_rate": 9.824607329842931e-08,
"loss": 0.7586,
"step": 2175
},
{
"epoch": 0.02,
"grad_norm": 8.484098434448242,
"learning_rate": 9.82198952879581e-08,
"loss": 0.6455,
"step": 2200
},
{
"epoch": 0.02,
"grad_norm": 8.596495628356934,
"learning_rate": 9.819371727748691e-08,
"loss": 0.6966,
"step": 2225
},
{
"epoch": 0.02,
"grad_norm": 8.861817359924316,
"learning_rate": 9.816753926701569e-08,
"loss": 0.8732,
"step": 2250
},
{
"epoch": 0.02,
"grad_norm": 9.609010696411133,
"learning_rate": 9.81413612565445e-08,
"loss": 0.803,
"step": 2275
},
{
"epoch": 0.02,
"grad_norm": 8.113046646118164,
"learning_rate": 9.811518324607329e-08,
"loss": 0.8018,
"step": 2300
},
{
"epoch": 0.02,
"grad_norm": 7.831557750701904,
"learning_rate": 9.808900523560209e-08,
"loss": 0.7681,
"step": 2325
},
{
"epoch": 0.02,
"grad_norm": 9.451202392578125,
"learning_rate": 9.806282722513088e-08,
"loss": 0.6863,
"step": 2350
},
{
"epoch": 0.02,
"grad_norm": 6.153475284576416,
"learning_rate": 9.803664921465969e-08,
"loss": 0.5344,
"step": 2375
},
{
"epoch": 0.03,
"grad_norm": 6.556187152862549,
"learning_rate": 9.801047120418847e-08,
"loss": 0.5072,
"step": 2400
},
{
"epoch": 0.03,
"grad_norm": 6.695789337158203,
"learning_rate": 9.798429319371727e-08,
"loss": 0.4882,
"step": 2425
},
{
"epoch": 0.03,
"grad_norm": 5.124952793121338,
"learning_rate": 9.795811518324607e-08,
"loss": 0.4023,
"step": 2450
},
{
"epoch": 0.03,
"grad_norm": 5.724789142608643,
"learning_rate": 9.793193717277487e-08,
"loss": 0.436,
"step": 2475
},
{
"epoch": 0.03,
"grad_norm": 6.060319423675537,
"learning_rate": 9.790575916230365e-08,
"loss": 0.3939,
"step": 2500
},
{
"epoch": 0.03,
"grad_norm": 5.216397285461426,
"learning_rate": 9.787958115183246e-08,
"loss": 0.3305,
"step": 2525
},
{
"epoch": 0.03,
"grad_norm": 5.101900577545166,
"learning_rate": 9.785340314136125e-08,
"loss": 0.3212,
"step": 2550
},
{
"epoch": 0.03,
"grad_norm": 4.3815484046936035,
"learning_rate": 9.782722513089004e-08,
"loss": 0.3153,
"step": 2575
},
{
"epoch": 0.03,
"grad_norm": 7.231525897979736,
"learning_rate": 9.780104712041885e-08,
"loss": 0.4177,
"step": 2600
},
{
"epoch": 0.03,
"grad_norm": 5.76875638961792,
"learning_rate": 9.777486910994764e-08,
"loss": 0.5512,
"step": 2625
},
{
"epoch": 0.03,
"grad_norm": 5.580086708068848,
"learning_rate": 9.774869109947644e-08,
"loss": 0.5036,
"step": 2650
},
{
"epoch": 0.03,
"grad_norm": 6.0146894454956055,
"learning_rate": 9.772251308900523e-08,
"loss": 0.5229,
"step": 2675
},
{
"epoch": 0.03,
"grad_norm": 5.919321060180664,
"learning_rate": 9.769633507853404e-08,
"loss": 0.6031,
"step": 2700
},
{
"epoch": 0.03,
"grad_norm": 7.249564170837402,
"learning_rate": 9.767015706806282e-08,
"loss": 0.6529,
"step": 2725
},
{
"epoch": 0.03,
"grad_norm": 5.037733554840088,
"learning_rate": 9.764397905759162e-08,
"loss": 0.5845,
"step": 2750
},
{
"epoch": 0.03,
"grad_norm": 5.596995830535889,
"learning_rate": 9.761780104712042e-08,
"loss": 0.4844,
"step": 2775
},
{
"epoch": 0.03,
"grad_norm": 6.465144157409668,
"learning_rate": 9.759162303664921e-08,
"loss": 0.7091,
"step": 2800
},
{
"epoch": 0.03,
"grad_norm": 8.079314231872559,
"learning_rate": 9.7565445026178e-08,
"loss": 1.0112,
"step": 2825
},
{
"epoch": 0.03,
"grad_norm": 6.964105606079102,
"learning_rate": 9.753926701570681e-08,
"loss": 0.8826,
"step": 2850
},
{
"epoch": 0.03,
"grad_norm": 5.710263252258301,
"learning_rate": 9.751308900523559e-08,
"loss": 0.7581,
"step": 2875
},
{
"epoch": 0.03,
"grad_norm": 4.680161476135254,
"learning_rate": 9.74869109947644e-08,
"loss": 0.6771,
"step": 2900
},
{
"epoch": 0.03,
"grad_norm": 6.1621198654174805,
"learning_rate": 9.746073298429319e-08,
"loss": 0.5596,
"step": 2925
},
{
"epoch": 0.03,
"grad_norm": 5.297918796539307,
"learning_rate": 9.743455497382198e-08,
"loss": 0.4256,
"step": 2950
},
{
"epoch": 0.03,
"grad_norm": 5.257850646972656,
"learning_rate": 9.740837696335078e-08,
"loss": 0.3977,
"step": 2975
},
{
"epoch": 0.03,
"grad_norm": 5.193603992462158,
"learning_rate": 9.738219895287958e-08,
"loss": 0.3957,
"step": 3000
},
{
"epoch": 0.03,
"grad_norm": 4.629751682281494,
"learning_rate": 9.735602094240836e-08,
"loss": 0.3431,
"step": 3025
},
{
"epoch": 0.03,
"grad_norm": 4.40083122253418,
"learning_rate": 9.732984293193717e-08,
"loss": 0.3242,
"step": 3050
},
{
"epoch": 0.03,
"grad_norm": 4.38535737991333,
"learning_rate": 9.730366492146596e-08,
"loss": 0.3124,
"step": 3075
},
{
"epoch": 0.03,
"grad_norm": 4.32893180847168,
"learning_rate": 9.727748691099476e-08,
"loss": 0.2964,
"step": 3100
},
{
"epoch": 0.03,
"grad_norm": 4.1819586753845215,
"learning_rate": 9.725130890052355e-08,
"loss": 0.2668,
"step": 3125
},
{
"epoch": 0.03,
"grad_norm": 4.960391521453857,
"learning_rate": 9.722513089005235e-08,
"loss": 0.2789,
"step": 3150
},
{
"epoch": 0.03,
"grad_norm": 4.490744113922119,
"learning_rate": 9.719895287958115e-08,
"loss": 0.2566,
"step": 3175
},
{
"epoch": 0.03,
"grad_norm": 4.861118316650391,
"learning_rate": 9.717277486910994e-08,
"loss": 0.279,
"step": 3200
},
{
"epoch": 0.03,
"grad_norm": 5.386078357696533,
"learning_rate": 9.714659685863873e-08,
"loss": 0.2882,
"step": 3225
},
{
"epoch": 0.03,
"grad_norm": 6.265291213989258,
"learning_rate": 9.712041884816754e-08,
"loss": 0.3766,
"step": 3250
},
{
"epoch": 0.03,
"grad_norm": 6.723097801208496,
"learning_rate": 9.709424083769633e-08,
"loss": 0.5386,
"step": 3275
},
{
"epoch": 0.03,
"grad_norm": 5.949530601501465,
"learning_rate": 9.706806282722513e-08,
"loss": 0.4818,
"step": 3300
},
{
"epoch": 0.03,
"grad_norm": 5.125253200531006,
"learning_rate": 9.704188481675392e-08,
"loss": 0.4862,
"step": 3325
},
{
"epoch": 0.03,
"grad_norm": 5.845962047576904,
"learning_rate": 9.701570680628271e-08,
"loss": 0.4502,
"step": 3350
},
{
"epoch": 0.04,
"grad_norm": 5.178995609283447,
"learning_rate": 9.698952879581152e-08,
"loss": 0.4086,
"step": 3375
},
{
"epoch": 0.04,
"grad_norm": 4.950035095214844,
"learning_rate": 9.696335078534031e-08,
"loss": 0.4167,
"step": 3400
},
{
"epoch": 0.04,
"grad_norm": 7.225176811218262,
"learning_rate": 9.693717277486911e-08,
"loss": 0.3945,
"step": 3425
},
{
"epoch": 0.04,
"grad_norm": 6.313861846923828,
"learning_rate": 9.69109947643979e-08,
"loss": 0.4021,
"step": 3450
},
{
"epoch": 0.04,
"grad_norm": 5.976010799407959,
"learning_rate": 9.68848167539267e-08,
"loss": 0.3821,
"step": 3475
},
{
"epoch": 0.04,
"grad_norm": 6.867140769958496,
"learning_rate": 9.685863874345549e-08,
"loss": 0.3901,
"step": 3500
},
{
"epoch": 0.04,
"grad_norm": 5.82126522064209,
"learning_rate": 9.683246073298429e-08,
"loss": 0.3798,
"step": 3525
},
{
"epoch": 0.04,
"grad_norm": 5.741916656494141,
"learning_rate": 9.680628272251309e-08,
"loss": 0.3842,
"step": 3550
},
{
"epoch": 0.04,
"grad_norm": 5.43148946762085,
"learning_rate": 9.678010471204188e-08,
"loss": 0.3762,
"step": 3575
},
{
"epoch": 0.04,
"grad_norm": 4.983076095581055,
"learning_rate": 9.675392670157067e-08,
"loss": 0.3496,
"step": 3600
},
{
"epoch": 0.04,
"grad_norm": 5.233561992645264,
"learning_rate": 9.672774869109948e-08,
"loss": 0.3225,
"step": 3625
},
{
"epoch": 0.04,
"grad_norm": 4.534473419189453,
"learning_rate": 9.670157068062826e-08,
"loss": 0.3009,
"step": 3650
},
{
"epoch": 0.04,
"grad_norm": 5.9857306480407715,
"learning_rate": 9.667539267015707e-08,
"loss": 0.4075,
"step": 3675
},
{
"epoch": 0.04,
"grad_norm": 7.715012073516846,
"learning_rate": 9.664921465968586e-08,
"loss": 0.5876,
"step": 3700
},
{
"epoch": 0.04,
"grad_norm": 7.0109405517578125,
"learning_rate": 9.662303664921465e-08,
"loss": 0.6805,
"step": 3725
},
{
"epoch": 0.04,
"grad_norm": 6.128580093383789,
"learning_rate": 9.659685863874345e-08,
"loss": 0.5924,
"step": 3750
},
{
"epoch": 0.04,
"grad_norm": 5.6104865074157715,
"learning_rate": 9.657068062827225e-08,
"loss": 0.4888,
"step": 3775
},
{
"epoch": 0.04,
"grad_norm": 5.251614093780518,
"learning_rate": 9.654450261780103e-08,
"loss": 0.4286,
"step": 3800
},
{
"epoch": 0.04,
"grad_norm": 5.3208184242248535,
"learning_rate": 9.651832460732984e-08,
"loss": 0.3889,
"step": 3825
},
{
"epoch": 0.04,
"grad_norm": 5.454063892364502,
"learning_rate": 9.649214659685863e-08,
"loss": 0.4181,
"step": 3850
},
{
"epoch": 0.04,
"grad_norm": 5.304567813873291,
"learning_rate": 9.646596858638742e-08,
"loss": 0.3816,
"step": 3875
},
{
"epoch": 0.04,
"grad_norm": 4.866218566894531,
"learning_rate": 9.643979057591623e-08,
"loss": 0.3475,
"step": 3900
},
{
"epoch": 0.04,
"grad_norm": 4.873610019683838,
"learning_rate": 9.641361256544502e-08,
"loss": 0.3369,
"step": 3925
},
{
"epoch": 0.04,
"grad_norm": 4.69268798828125,
"learning_rate": 9.638743455497382e-08,
"loss": 0.3515,
"step": 3950
},
{
"epoch": 0.04,
"grad_norm": 5.367419719696045,
"learning_rate": 9.636125654450261e-08,
"loss": 0.3328,
"step": 3975
},
{
"epoch": 0.04,
"grad_norm": 4.6179728507995605,
"learning_rate": 9.633507853403142e-08,
"loss": 0.3536,
"step": 4000
},
{
"epoch": 0.04,
"grad_norm": 4.710158348083496,
"learning_rate": 9.630890052356021e-08,
"loss": 0.3446,
"step": 4025
},
{
"epoch": 0.04,
"grad_norm": 4.824263095855713,
"learning_rate": 9.6282722513089e-08,
"loss": 0.3488,
"step": 4050
},
{
"epoch": 0.04,
"grad_norm": 5.474529266357422,
"learning_rate": 9.62565445026178e-08,
"loss": 0.3648,
"step": 4075
},
{
"epoch": 0.04,
"grad_norm": 5.825191497802734,
"learning_rate": 9.62303664921466e-08,
"loss": 0.3782,
"step": 4100
},
{
"epoch": 0.04,
"eval_loss": 0.7290233969688416,
"eval_runtime": 275.8897,
"eval_samples_per_second": 9.801,
"eval_steps_per_second": 1.225,
"eval_wer": 39.41739541479556,
"step": 4111
},
{
"epoch": 0.04,
"grad_norm": 4.9900221824646,
"learning_rate": 9.620418848167538e-08,
"loss": 0.3366,
"step": 4125
},
{
"epoch": 0.04,
"grad_norm": 6.866960525512695,
"learning_rate": 9.617801047120419e-08,
"loss": 0.384,
"step": 4150
},
{
"epoch": 0.04,
"grad_norm": 6.261806011199951,
"learning_rate": 9.615183246073298e-08,
"loss": 0.4945,
"step": 4175
},
{
"epoch": 0.04,
"grad_norm": 6.251190185546875,
"learning_rate": 9.612565445026178e-08,
"loss": 0.5011,
"step": 4200
},
{
"epoch": 0.04,
"grad_norm": 7.063992023468018,
"learning_rate": 9.609947643979057e-08,
"loss": 0.4765,
"step": 4225
},
{
"epoch": 0.04,
"grad_norm": 6.903201103210449,
"learning_rate": 9.607329842931938e-08,
"loss": 0.4501,
"step": 4250
},
{
"epoch": 0.04,
"grad_norm": 6.0563788414001465,
"learning_rate": 9.604712041884816e-08,
"loss": 0.459,
"step": 4275
},
{
"epoch": 0.04,
"grad_norm": 6.9955363273620605,
"learning_rate": 9.602094240837696e-08,
"loss": 0.4237,
"step": 4300
},
{
"epoch": 0.05,
"grad_norm": 6.026924133300781,
"learning_rate": 9.599476439790576e-08,
"loss": 0.3898,
"step": 4325
},
{
"epoch": 0.05,
"grad_norm": 5.720476150512695,
"learning_rate": 9.596858638743455e-08,
"loss": 0.4239,
"step": 4350
},
{
"epoch": 0.05,
"grad_norm": 6.680058479309082,
"learning_rate": 9.594240837696334e-08,
"loss": 0.4321,
"step": 4375
},
{
"epoch": 0.05,
"grad_norm": 8.304168701171875,
"learning_rate": 9.591623036649215e-08,
"loss": 0.5794,
"step": 4400
},
{
"epoch": 0.05,
"grad_norm": 8.107504844665527,
"learning_rate": 9.589005235602093e-08,
"loss": 0.8686,
"step": 4425
},
{
"epoch": 0.05,
"grad_norm": 8.911792755126953,
"learning_rate": 9.586387434554973e-08,
"loss": 0.9618,
"step": 4450
},
{
"epoch": 0.05,
"grad_norm": 6.706320285797119,
"learning_rate": 9.583769633507853e-08,
"loss": 0.8042,
"step": 4475
},
{
"epoch": 0.05,
"grad_norm": 6.71433687210083,
"learning_rate": 9.581151832460732e-08,
"loss": 0.6088,
"step": 4500
},
{
"epoch": 0.05,
"grad_norm": 6.675333023071289,
"learning_rate": 9.578534031413611e-08,
"loss": 0.551,
"step": 4525
},
{
"epoch": 0.05,
"grad_norm": 6.766154766082764,
"learning_rate": 9.575916230366492e-08,
"loss": 0.5113,
"step": 4550
},
{
"epoch": 0.05,
"grad_norm": 6.36196231842041,
"learning_rate": 9.573298429319371e-08,
"loss": 0.4785,
"step": 4575
},
{
"epoch": 0.05,
"grad_norm": 6.1699395179748535,
"learning_rate": 9.570680628272251e-08,
"loss": 0.4663,
"step": 4600
},
{
"epoch": 0.05,
"grad_norm": 6.362920761108398,
"learning_rate": 9.56806282722513e-08,
"loss": 0.4394,
"step": 4625
},
{
"epoch": 0.05,
"grad_norm": 6.1348347663879395,
"learning_rate": 9.56544502617801e-08,
"loss": 0.4343,
"step": 4650
},
{
"epoch": 0.05,
"grad_norm": 6.3059563636779785,
"learning_rate": 9.56282722513089e-08,
"loss": 0.4535,
"step": 4675
},
{
"epoch": 0.05,
"grad_norm": 7.463464260101318,
"learning_rate": 9.560209424083769e-08,
"loss": 0.7034,
"step": 4700
},
{
"epoch": 0.05,
"grad_norm": 7.093417644500732,
"learning_rate": 9.557591623036649e-08,
"loss": 0.8337,
"step": 4725
},
{
"epoch": 0.05,
"grad_norm": 6.7604193687438965,
"learning_rate": 9.554973821989528e-08,
"loss": 0.7934,
"step": 4750
},
{
"epoch": 0.05,
"grad_norm": 6.093296051025391,
"learning_rate": 9.552356020942409e-08,
"loss": 0.7055,
"step": 4775
},
{
"epoch": 0.05,
"grad_norm": 6.788339138031006,
"learning_rate": 9.549738219895288e-08,
"loss": 0.6884,
"step": 4800
},
{
"epoch": 0.05,
"grad_norm": 6.2128496170043945,
"learning_rate": 9.547120418848167e-08,
"loss": 0.5722,
"step": 4825
},
{
"epoch": 0.05,
"grad_norm": 6.026149272918701,
"learning_rate": 9.544502617801047e-08,
"loss": 0.5802,
"step": 4850
},
{
"epoch": 0.05,
"grad_norm": 6.711429119110107,
"learning_rate": 9.541884816753927e-08,
"loss": 0.5129,
"step": 4875
},
{
"epoch": 0.05,
"grad_norm": 6.273972988128662,
"learning_rate": 9.539267015706805e-08,
"loss": 0.4283,
"step": 4900
},
{
"epoch": 0.05,
"grad_norm": 5.497582912445068,
"learning_rate": 9.536649214659686e-08,
"loss": 0.4075,
"step": 4925
},
{
"epoch": 0.05,
"grad_norm": 5.759308815002441,
"learning_rate": 9.534031413612565e-08,
"loss": 0.4438,
"step": 4950
},
{
"epoch": 0.05,
"grad_norm": 6.2068305015563965,
"learning_rate": 9.531413612565445e-08,
"loss": 0.4686,
"step": 4975
},
{
"epoch": 0.05,
"grad_norm": 5.611216068267822,
"learning_rate": 9.528795811518324e-08,
"loss": 0.4714,
"step": 5000
},
{
"epoch": 0.05,
"grad_norm": 5.2035040855407715,
"learning_rate": 9.526178010471204e-08,
"loss": 0.4933,
"step": 5025
},
{
"epoch": 0.05,
"grad_norm": 6.796937942504883,
"learning_rate": 9.523560209424082e-08,
"loss": 0.518,
"step": 5050
},
{
"epoch": 0.05,
"grad_norm": 5.768625259399414,
"learning_rate": 9.520942408376963e-08,
"loss": 0.5254,
"step": 5075
},
{
"epoch": 0.05,
"grad_norm": 5.743659019470215,
"learning_rate": 9.518324607329842e-08,
"loss": 0.5098,
"step": 5100
},
{
"epoch": 0.05,
"grad_norm": 6.624993801116943,
"learning_rate": 9.515706806282722e-08,
"loss": 0.4855,
"step": 5125
},
{
"epoch": 0.05,
"grad_norm": 6.45778751373291,
"learning_rate": 9.513089005235601e-08,
"loss": 0.5223,
"step": 5150
},
{
"epoch": 0.05,
"grad_norm": 5.325904369354248,
"learning_rate": 9.510471204188482e-08,
"loss": 0.5041,
"step": 5175
},
{
"epoch": 0.05,
"grad_norm": 5.208452224731445,
"learning_rate": 9.507853403141361e-08,
"loss": 0.5157,
"step": 5200
},
{
"epoch": 0.05,
"grad_norm": 5.317996501922607,
"learning_rate": 9.50523560209424e-08,
"loss": 0.5614,
"step": 5225
},
{
"epoch": 0.05,
"grad_norm": 6.383024215698242,
"learning_rate": 9.50261780104712e-08,
"loss": 0.5186,
"step": 5250
},
{
"epoch": 0.05,
"grad_norm": 4.965906620025635,
"learning_rate": 9.499999999999999e-08,
"loss": 0.4887,
"step": 5275
},
{
"epoch": 0.06,
"grad_norm": 5.874698162078857,
"learning_rate": 9.49738219895288e-08,
"loss": 0.4882,
"step": 5300
},
{
"epoch": 0.06,
"grad_norm": 5.321093559265137,
"learning_rate": 9.494764397905759e-08,
"loss": 0.4929,
"step": 5325
},
{
"epoch": 0.06,
"grad_norm": 6.657257556915283,
"learning_rate": 9.492146596858638e-08,
"loss": 0.4467,
"step": 5350
},
{
"epoch": 0.06,
"grad_norm": 5.798694133758545,
"learning_rate": 9.489528795811518e-08,
"loss": 0.5027,
"step": 5375
},
{
"epoch": 0.06,
"grad_norm": 6.4486236572265625,
"learning_rate": 9.486910994764398e-08,
"loss": 0.5157,
"step": 5400
},
{
"epoch": 0.06,
"grad_norm": 5.78603458404541,
"learning_rate": 9.484293193717276e-08,
"loss": 0.568,
"step": 5425
},
{
"epoch": 0.06,
"grad_norm": 6.391395568847656,
"learning_rate": 9.481675392670157e-08,
"loss": 0.5768,
"step": 5450
},
{
"epoch": 0.06,
"grad_norm": 6.703619003295898,
"learning_rate": 9.479057591623036e-08,
"loss": 0.5885,
"step": 5475
},
{
"epoch": 0.06,
"grad_norm": 6.5529937744140625,
"learning_rate": 9.476439790575916e-08,
"loss": 0.5355,
"step": 5500
},
{
"epoch": 0.06,
"grad_norm": 5.757615566253662,
"learning_rate": 9.473821989528795e-08,
"loss": 0.4787,
"step": 5525
},
{
"epoch": 0.06,
"grad_norm": 5.5016703605651855,
"learning_rate": 9.471204188481676e-08,
"loss": 0.4435,
"step": 5550
},
{
"epoch": 0.06,
"grad_norm": 6.2132368087768555,
"learning_rate": 9.468586387434555e-08,
"loss": 0.5157,
"step": 5575
},
{
"epoch": 0.06,
"grad_norm": 5.654526710510254,
"learning_rate": 9.465968586387434e-08,
"loss": 0.5769,
"step": 5600
},
{
"epoch": 0.06,
"grad_norm": 5.5138139724731445,
"learning_rate": 9.463350785340314e-08,
"loss": 0.5805,
"step": 5625
},
{
"epoch": 0.06,
"grad_norm": 5.938875198364258,
"learning_rate": 9.460732984293194e-08,
"loss": 0.6114,
"step": 5650
},
{
"epoch": 0.06,
"grad_norm": 4.941293239593506,
"learning_rate": 9.458115183246072e-08,
"loss": 0.5762,
"step": 5675
},
{
"epoch": 0.06,
"grad_norm": 6.395961284637451,
"learning_rate": 9.455497382198953e-08,
"loss": 0.5745,
"step": 5700
},
{
"epoch": 0.06,
"grad_norm": 5.585537910461426,
"learning_rate": 9.452879581151832e-08,
"loss": 0.5571,
"step": 5725
},
{
"epoch": 0.06,
"grad_norm": 5.933156490325928,
"learning_rate": 9.450261780104711e-08,
"loss": 0.4811,
"step": 5750
},
{
"epoch": 0.06,
"grad_norm": 7.242075443267822,
"learning_rate": 9.447643979057591e-08,
"loss": 0.464,
"step": 5775
},
{
"epoch": 0.06,
"grad_norm": 5.613156318664551,
"learning_rate": 9.445026178010471e-08,
"loss": 0.5033,
"step": 5800
},
{
"epoch": 0.06,
"grad_norm": 6.406403541564941,
"learning_rate": 9.44240837696335e-08,
"loss": 0.4344,
"step": 5825
},
{
"epoch": 0.06,
"grad_norm": 4.892160415649414,
"learning_rate": 9.43979057591623e-08,
"loss": 0.4187,
"step": 5850
},
{
"epoch": 0.06,
"grad_norm": 5.776142120361328,
"learning_rate": 9.43717277486911e-08,
"loss": 0.4124,
"step": 5875
},
{
"epoch": 0.06,
"grad_norm": 5.972835063934326,
"learning_rate": 9.434554973821989e-08,
"loss": 0.4042,
"step": 5900
},
{
"epoch": 0.06,
"grad_norm": 6.167374610900879,
"learning_rate": 9.431937172774868e-08,
"loss": 0.4208,
"step": 5925
},
{
"epoch": 0.06,
"grad_norm": 6.850512504577637,
"learning_rate": 9.429319371727749e-08,
"loss": 0.3989,
"step": 5950
},
{
"epoch": 0.06,
"grad_norm": 6.674014091491699,
"learning_rate": 9.426701570680628e-08,
"loss": 0.3725,
"step": 5975
},
{
"epoch": 0.06,
"grad_norm": 7.24482536315918,
"learning_rate": 9.424083769633507e-08,
"loss": 0.376,
"step": 6000
},
{
"epoch": 0.06,
"grad_norm": 6.7198710441589355,
"learning_rate": 9.421465968586388e-08,
"loss": 0.3846,
"step": 6025
},
{
"epoch": 0.06,
"grad_norm": 6.8929829597473145,
"learning_rate": 9.418848167539266e-08,
"loss": 0.3857,
"step": 6050
},
{
"epoch": 0.06,
"grad_norm": 8.093165397644043,
"learning_rate": 9.416230366492147e-08,
"loss": 0.3766,
"step": 6075
},
{
"epoch": 0.06,
"grad_norm": 6.204592227935791,
"learning_rate": 9.413612565445026e-08,
"loss": 0.3779,
"step": 6100
},
{
"epoch": 0.06,
"grad_norm": 5.946498870849609,
"learning_rate": 9.410994764397905e-08,
"loss": 0.3719,
"step": 6125
},
{
"epoch": 0.06,
"grad_norm": 7.825682163238525,
"learning_rate": 9.408376963350785e-08,
"loss": 0.3891,
"step": 6150
},
{
"epoch": 0.06,
"grad_norm": 7.207645416259766,
"learning_rate": 9.405759162303665e-08,
"loss": 0.3901,
"step": 6175
},
{
"epoch": 0.06,
"grad_norm": 6.809023857116699,
"learning_rate": 9.403141361256543e-08,
"loss": 0.4059,
"step": 6200
},
{
"epoch": 0.06,
"grad_norm": 6.104794979095459,
"learning_rate": 9.400523560209424e-08,
"loss": 0.4059,
"step": 6225
},
{
"epoch": 0.07,
"grad_norm": 6.525493621826172,
"learning_rate": 9.397905759162303e-08,
"loss": 0.4047,
"step": 6250
},
{
"epoch": 0.07,
"grad_norm": 6.874316215515137,
"learning_rate": 9.395287958115183e-08,
"loss": 0.514,
"step": 6275
},
{
"epoch": 0.07,
"grad_norm": 5.96618127822876,
"learning_rate": 9.392670157068062e-08,
"loss": 0.4962,
"step": 6300
},
{
"epoch": 0.07,
"grad_norm": 6.455708026885986,
"learning_rate": 9.390052356020942e-08,
"loss": 0.5045,
"step": 6325
},
{
"epoch": 0.07,
"grad_norm": 6.469492435455322,
"learning_rate": 9.387434554973822e-08,
"loss": 0.8458,
"step": 6350
},
{
"epoch": 0.07,
"grad_norm": 9.225332260131836,
"learning_rate": 9.384816753926701e-08,
"loss": 0.8835,
"step": 6375
},
{
"epoch": 0.07,
"grad_norm": 6.529109954833984,
"learning_rate": 9.38219895287958e-08,
"loss": 0.7166,
"step": 6400
},
{
"epoch": 0.07,
"grad_norm": 7.395893096923828,
"learning_rate": 9.379581151832461e-08,
"loss": 0.7075,
"step": 6425
},
{
"epoch": 0.07,
"grad_norm": 8.16038990020752,
"learning_rate": 9.376963350785339e-08,
"loss": 0.9168,
"step": 6450
},
{
"epoch": 0.07,
"grad_norm": 7.322926044464111,
"learning_rate": 9.37434554973822e-08,
"loss": 0.7444,
"step": 6475
},
{
"epoch": 0.07,
"grad_norm": 7.18267297744751,
"learning_rate": 9.371727748691099e-08,
"loss": 0.6744,
"step": 6500
},
{
"epoch": 0.07,
"grad_norm": 7.361169815063477,
"learning_rate": 9.369109947643978e-08,
"loss": 0.5583,
"step": 6525
},
{
"epoch": 0.07,
"grad_norm": 8.085954666137695,
"learning_rate": 9.366492146596858e-08,
"loss": 0.5442,
"step": 6550
},
{
"epoch": 0.07,
"grad_norm": 7.492279052734375,
"learning_rate": 9.363874345549738e-08,
"loss": 0.5684,
"step": 6575
},
{
"epoch": 0.07,
"grad_norm": 6.951526641845703,
"learning_rate": 9.361256544502618e-08,
"loss": 0.5311,
"step": 6600
},
{
"epoch": 0.07,
"grad_norm": 6.271228790283203,
"learning_rate": 9.358638743455497e-08,
"loss": 0.473,
"step": 6625
},
{
"epoch": 0.07,
"grad_norm": 5.724484443664551,
"learning_rate": 9.356020942408376e-08,
"loss": 0.4471,
"step": 6650
},
{
"epoch": 0.07,
"grad_norm": 5.2642669677734375,
"learning_rate": 9.353403141361256e-08,
"loss": 0.4008,
"step": 6675
},
{
"epoch": 0.07,
"grad_norm": 5.970279216766357,
"learning_rate": 9.350785340314136e-08,
"loss": 0.3922,
"step": 6700
},
{
"epoch": 0.07,
"grad_norm": 6.13707160949707,
"learning_rate": 9.348167539267016e-08,
"loss": 0.4149,
"step": 6725
},
{
"epoch": 0.07,
"grad_norm": 5.1920061111450195,
"learning_rate": 9.345549738219895e-08,
"loss": 0.3732,
"step": 6750
},
{
"epoch": 0.07,
"grad_norm": 6.059106349945068,
"learning_rate": 9.342931937172774e-08,
"loss": 0.3783,
"step": 6775
},
{
"epoch": 0.07,
"grad_norm": 5.317996025085449,
"learning_rate": 9.340314136125655e-08,
"loss": 0.3701,
"step": 6800
},
{
"epoch": 0.07,
"grad_norm": 5.347188472747803,
"learning_rate": 9.337696335078533e-08,
"loss": 0.3466,
"step": 6825
},
{
"epoch": 0.07,
"grad_norm": 5.118027687072754,
"learning_rate": 9.335078534031414e-08,
"loss": 0.363,
"step": 6850
},
{
"epoch": 0.07,
"grad_norm": 4.868067264556885,
"learning_rate": 9.332460732984293e-08,
"loss": 0.3696,
"step": 6875
},
{
"epoch": 0.07,
"grad_norm": 5.714309215545654,
"learning_rate": 9.329842931937172e-08,
"loss": 0.3768,
"step": 6900
},
{
"epoch": 0.07,
"grad_norm": 5.903509616851807,
"learning_rate": 9.327225130890052e-08,
"loss": 0.3625,
"step": 6925
},
{
"epoch": 0.07,
"grad_norm": 5.700974941253662,
"learning_rate": 9.324607329842932e-08,
"loss": 0.3717,
"step": 6950
},
{
"epoch": 0.07,
"grad_norm": 6.056822776794434,
"learning_rate": 9.32198952879581e-08,
"loss": 0.3601,
"step": 6975
},
{
"epoch": 0.07,
"grad_norm": 6.140659809112549,
"learning_rate": 9.319371727748691e-08,
"loss": 0.3691,
"step": 7000
},
{
"epoch": 0.07,
"grad_norm": 6.195953369140625,
"learning_rate": 9.31675392670157e-08,
"loss": 0.3632,
"step": 7025
},
{
"epoch": 0.07,
"grad_norm": 4.96120023727417,
"learning_rate": 9.314136125654451e-08,
"loss": 0.3449,
"step": 7050
},
{
"epoch": 0.07,
"grad_norm": 6.803286075592041,
"learning_rate": 9.311518324607329e-08,
"loss": 0.3601,
"step": 7075
},
{
"epoch": 0.07,
"grad_norm": 5.16037654876709,
"learning_rate": 9.30890052356021e-08,
"loss": 0.3478,
"step": 7100
},
{
"epoch": 0.07,
"grad_norm": 5.407104969024658,
"learning_rate": 9.306282722513089e-08,
"loss": 0.3498,
"step": 7125
},
{
"epoch": 0.07,
"grad_norm": 5.451097011566162,
"learning_rate": 9.303664921465968e-08,
"loss": 0.3574,
"step": 7150
},
{
"epoch": 0.07,
"grad_norm": 5.362937927246094,
"learning_rate": 9.301047120418847e-08,
"loss": 0.3477,
"step": 7175
},
{
"epoch": 0.07,
"grad_norm": 5.407390117645264,
"learning_rate": 9.298429319371728e-08,
"loss": 0.3575,
"step": 7200
},
{
"epoch": 0.08,
"grad_norm": 5.426994800567627,
"learning_rate": 9.295811518324606e-08,
"loss": 0.3454,
"step": 7225
},
{
"epoch": 0.08,
"grad_norm": 6.192265510559082,
"learning_rate": 9.293193717277487e-08,
"loss": 0.36,
"step": 7250
},
{
"epoch": 0.08,
"grad_norm": 5.969931125640869,
"learning_rate": 9.290575916230366e-08,
"loss": 0.3479,
"step": 7275
},
{
"epoch": 0.08,
"grad_norm": 5.602126121520996,
"learning_rate": 9.287958115183245e-08,
"loss": 0.3527,
"step": 7300
},
{
"epoch": 0.08,
"grad_norm": 6.191224575042725,
"learning_rate": 9.285340314136125e-08,
"loss": 0.3915,
"step": 7325
},
{
"epoch": 0.08,
"grad_norm": 5.79760217666626,
"learning_rate": 9.282722513089005e-08,
"loss": 0.3922,
"step": 7350
},
{
"epoch": 0.08,
"grad_norm": 8.519009590148926,
"learning_rate": 9.280104712041885e-08,
"loss": 0.4254,
"step": 7375
},
{
"epoch": 0.08,
"grad_norm": 5.360806941986084,
"learning_rate": 9.277486910994764e-08,
"loss": 0.4391,
"step": 7400
},
{
"epoch": 0.08,
"grad_norm": 5.539173603057861,
"learning_rate": 9.274869109947645e-08,
"loss": 0.3988,
"step": 7425
},
{
"epoch": 0.08,
"grad_norm": 7.067492961883545,
"learning_rate": 9.272251308900523e-08,
"loss": 0.3779,
"step": 7450
},
{
"epoch": 0.08,
"grad_norm": 5.135078430175781,
"learning_rate": 9.269633507853403e-08,
"loss": 0.3904,
"step": 7475
},
{
"epoch": 0.08,
"grad_norm": 5.269252300262451,
"learning_rate": 9.267015706806283e-08,
"loss": 0.3597,
"step": 7500
},
{
"epoch": 0.08,
"grad_norm": 7.094182014465332,
"learning_rate": 9.264397905759162e-08,
"loss": 0.3766,
"step": 7525
},
{
"epoch": 0.08,
"grad_norm": 5.993140697479248,
"learning_rate": 9.261780104712041e-08,
"loss": 0.3377,
"step": 7550
},
{
"epoch": 0.08,
"grad_norm": 6.09189510345459,
"learning_rate": 9.259162303664922e-08,
"loss": 0.3779,
"step": 7575
},
{
"epoch": 0.08,
"grad_norm": 5.466849327087402,
"learning_rate": 9.2565445026178e-08,
"loss": 0.3602,
"step": 7600
},
{
"epoch": 0.08,
"grad_norm": 5.297680854797363,
"learning_rate": 9.25392670157068e-08,
"loss": 0.3318,
"step": 7625
},
{
"epoch": 0.08,
"grad_norm": 5.143691539764404,
"learning_rate": 9.25130890052356e-08,
"loss": 0.334,
"step": 7650
},
{
"epoch": 0.08,
"grad_norm": 5.337982654571533,
"learning_rate": 9.248691099476439e-08,
"loss": 0.3343,
"step": 7675
},
{
"epoch": 0.08,
"grad_norm": 5.539205551147461,
"learning_rate": 9.246073298429318e-08,
"loss": 0.3527,
"step": 7700
},
{
"epoch": 0.08,
"grad_norm": 5.057958126068115,
"learning_rate": 9.243455497382199e-08,
"loss": 0.3441,
"step": 7725
},
{
"epoch": 0.08,
"grad_norm": 5.447077751159668,
"learning_rate": 9.240837696335077e-08,
"loss": 0.3368,
"step": 7750
},
{
"epoch": 0.08,
"grad_norm": 5.604344844818115,
"learning_rate": 9.238219895287958e-08,
"loss": 0.3357,
"step": 7775
},
{
"epoch": 0.08,
"grad_norm": 6.193871021270752,
"learning_rate": 9.235602094240837e-08,
"loss": 0.3841,
"step": 7800
},
{
"epoch": 0.08,
"grad_norm": 5.70228910446167,
"learning_rate": 9.232984293193718e-08,
"loss": 0.3991,
"step": 7825
},
{
"epoch": 0.08,
"grad_norm": 6.8992743492126465,
"learning_rate": 9.230366492146596e-08,
"loss": 0.4435,
"step": 7850
},
{
"epoch": 0.08,
"grad_norm": 7.393523693084717,
"learning_rate": 9.227748691099476e-08,
"loss": 0.4094,
"step": 7875
},
{
"epoch": 0.08,
"grad_norm": 5.266127586364746,
"learning_rate": 9.225130890052356e-08,
"loss": 0.3806,
"step": 7900
},
{
"epoch": 0.08,
"grad_norm": 5.960921287536621,
"learning_rate": 9.222513089005235e-08,
"loss": 0.3749,
"step": 7925
},
{
"epoch": 0.08,
"grad_norm": 6.215056896209717,
"learning_rate": 9.219895287958114e-08,
"loss": 0.3956,
"step": 7950
},
{
"epoch": 0.08,
"grad_norm": 4.992290019989014,
"learning_rate": 9.217277486910995e-08,
"loss": 0.414,
"step": 7975
},
{
"epoch": 0.08,
"grad_norm": 5.627460479736328,
"learning_rate": 9.214659685863874e-08,
"loss": 0.4508,
"step": 8000
},
{
"epoch": 0.08,
"grad_norm": 7.53002405166626,
"learning_rate": 9.212041884816754e-08,
"loss": 0.4771,
"step": 8025
},
{
"epoch": 0.08,
"grad_norm": 6.5475172996521,
"learning_rate": 9.209424083769633e-08,
"loss": 0.4636,
"step": 8050
},
{
"epoch": 0.08,
"grad_norm": 6.499009132385254,
"learning_rate": 9.206806282722512e-08,
"loss": 0.5024,
"step": 8075
},
{
"epoch": 0.08,
"grad_norm": 5.928787708282471,
"learning_rate": 9.204188481675393e-08,
"loss": 0.482,
"step": 8100
},
{
"epoch": 0.08,
"grad_norm": 6.647201061248779,
"learning_rate": 9.201570680628272e-08,
"loss": 0.4901,
"step": 8125
},
{
"epoch": 0.08,
"grad_norm": 7.4282355308532715,
"learning_rate": 9.198952879581152e-08,
"loss": 0.509,
"step": 8150
},
{
"epoch": 0.09,
"grad_norm": 8.04277229309082,
"learning_rate": 9.196335078534031e-08,
"loss": 0.5403,
"step": 8175
},
{
"epoch": 0.09,
"grad_norm": 8.562540054321289,
"learning_rate": 9.193717277486911e-08,
"loss": 0.5798,
"step": 8200
},
{
"epoch": 0.09,
"eval_loss": 0.7510205507278442,
"eval_runtime": 275.1584,
"eval_samples_per_second": 9.827,
"eval_steps_per_second": 1.228,
"eval_wer": 37.65067359962184,
"step": 8222
}
],
"logging_steps": 25,
"max_steps": 96000,
"num_input_tokens_seen": 0,
"num_train_epochs": 9223372036854775807,
"save_steps": 4111,
"total_flos": 3.23866357530624e+18,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}