End of training

Browse files

Files changed (8) hide show

.gitattributes +1 -0
README.md +9 -6
all_results.json +11 -5
eval_results.json +6 -6
train_results.json +8 -0
trainer_state.json +2724 -0
wandb/run-20241007_131849-0rbzerob/files/output.log +0 -0
wandb/run-20241007_131849-0rbzerob/run-0rbzerob.wandb +0 -0

.gitattributes CHANGED Viewed

@@ -35,3 +35,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
 wandb/run-20241005_141414-821qpm7o/files/output.log filter=lfs diff=lfs merge=lfs -text
 wandb/run-20241005_141414-821qpm7o/run-821qpm7o.wandb filter=lfs diff=lfs merge=lfs -text

 *tfevents* filter=lfs diff=lfs merge=lfs -text
 wandb/run-20241005_141414-821qpm7o/files/output.log filter=lfs diff=lfs merge=lfs -text
 wandb/run-20241005_141414-821qpm7o/run-821qpm7o.wandb filter=lfs diff=lfs merge=lfs -text
+wandb/run-20241007_131849-0rbzerob/run-0rbzerob.wandb filter=lfs diff=lfs merge=lfs -text

README.md CHANGED Viewed

@@ -1,22 +1,25 @@
 ---
 library_name: transformers
 license: apache-2.0
 base_model: openai/whisper-large-v3
 tags:
 - generated_from_trainer
 datasets:
-- common_voice_17_0
 metrics:
 - wer
 model-index:
-- name: openai/whisper-large-v3
   results:
   - task:
       name: Automatic Speech Recognition
       type: automatic-speech-recognition
     dataset:
-      name: common_voice_17_0
-      type: common_voice_17_0
       config: eu
       split: test
       args: eu
@@ -29,9 +32,9 @@ model-index:
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 should probably proofread and complete it, then remove this comment. -->
-# openai/whisper-large-v3
-This model is a fine-tuned version of [openai/whisper-large-v3](https://huggingface.co/openai/whisper-large-v3) on the common_voice_17_0 dataset.
 It achieves the following results on the evaluation set:
 - Loss: 0.1259
 - Wer: 7.2154

 ---
 library_name: transformers
+language:
+- eu
 license: apache-2.0
 base_model: openai/whisper-large-v3
 tags:
+- whisper-event
 - generated_from_trainer
 datasets:
+- mozilla-foundation/common_voice_17_0
 metrics:
 - wer
 model-index:
+- name: Whisper Large Basque
   results:
   - task:
       name: Automatic Speech Recognition
       type: automatic-speech-recognition
     dataset:
+      name: mozilla-foundation/common_voice_17_0 eu
+      type: mozilla-foundation/common_voice_17_0
       config: eu
       split: test
       args: eu
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 should probably proofread and complete it, then remove this comment. -->
+# Whisper Large Basque
+This model is a fine-tuned version of [openai/whisper-large-v3](https://huggingface.co/openai/whisper-large-v3) on the mozilla-foundation/common_voice_17_0 eu dataset.
 It achieves the following results on the evaluation set:
 - Loss: 0.1259
 - Wer: 7.2154

all_results.json CHANGED Viewed

@@ -1,8 +1,14 @@
 {
-    "eval_loss": 0.9277587532997131,
     "eval_model_preparation_time": 0.0102,
-    "eval_runtime": 4165.1595,
-    "eval_samples_per_second": 3.272,
-    "eval_steps_per_second": 0.409,
-    "eval_wer": 44.29532045879292
 }

 {
+    "epoch": 5.048,
+    "eval_loss": 0.12588092684745789,
     "eval_model_preparation_time": 0.0102,
+    "eval_runtime": 4097.4891,
+    "eval_samples_per_second": 3.326,
+    "eval_steps_per_second": 0.416,
+    "eval_wer": 7.215361500971087,
+    "total_flos": 4.891718061785088e+20,
+    "train_loss": 0.0,
+    "train_runtime": 289.8068,
+    "train_samples_per_second": 552.092,
+    "train_steps_per_second": 34.506
 }

eval_results.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
-    "eval_loss": 0.9277587532997131,
-    "eval_model_preparation_time": 0.0102,
-    "eval_runtime": 4165.1595,
-    "eval_samples_per_second": 3.272,
-    "eval_steps_per_second": 0.409,
-    "eval_wer": 44.29532045879292
 }

 {
+    "epoch": 5.048,
+    "eval_loss": 0.12588092684745789,
+    "eval_runtime": 4097.4891,
+    "eval_samples_per_second": 3.326,
+    "eval_steps_per_second": 0.416,
+    "eval_wer": 7.215361500971087
 }

train_results.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "epoch": 5.048,
+    "total_flos": 4.891718061785088e+20,
+    "train_loss": 0.0,
+    "train_runtime": 289.8068,
+    "train_samples_per_second": 552.092,
+    "train_steps_per_second": 34.506
+}

trainer_state.json ADDED Viewed

	@@ -0,0 +1,2724 @@

+{
+  "best_metric": 7.215361500971087,
+  "best_model_checkpoint": "./checkpoint-9000",
+  "epoch": 5.048,
+  "eval_steps": 500,
+  "global_step": 9000,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.0025,
+      "grad_norm": 6.131621360778809,
+      "learning_rate": 2.1875e-07,
+      "loss": 0.9345,
+      "step": 25
+    },
+    {
+      "epoch": 0.005,
+      "grad_norm": 6.021520137786865,
+      "learning_rate": 4.375e-07,
+      "loss": 0.8231,
+      "step": 50
+    },
+    {
+      "epoch": 0.0075,
+      "grad_norm": 5.526496410369873,
+      "learning_rate": 6.5625e-07,
+      "loss": 0.5623,
+      "step": 75
+    },
+    {
+      "epoch": 0.01,
+      "grad_norm": 4.9277825355529785,
+      "learning_rate": 8.75e-07,
+      "loss": 0.4173,
+      "step": 100
+    },
+    {
+      "epoch": 0.0125,
+      "grad_norm": 4.292990684509277,
+      "learning_rate": 1.09375e-06,
+      "loss": 0.385,
+      "step": 125
+    },
+    {
+      "epoch": 0.015,
+      "grad_norm": 5.749295234680176,
+      "learning_rate": 1.3125e-06,
+      "loss": 0.3931,
+      "step": 150
+    },
+    {
+      "epoch": 0.0175,
+      "grad_norm": 3.8306965827941895,
+      "learning_rate": 1.5312499999999997e-06,
+      "loss": 0.3516,
+      "step": 175
+    },
+    {
+      "epoch": 0.02,
+      "grad_norm": 4.687748908996582,
+      "learning_rate": 1.75e-06,
+      "loss": 0.3235,
+      "step": 200
+    },
+    {
+      "epoch": 0.0225,
+      "grad_norm": 4.232759952545166,
+      "learning_rate": 1.96875e-06,
+      "loss": 0.3314,
+      "step": 225
+    },
+    {
+      "epoch": 0.025,
+      "grad_norm": 4.185751914978027,
+      "learning_rate": 2.1875e-06,
+      "loss": 0.309,
+      "step": 250
+    },
+    {
+      "epoch": 0.0275,
+      "grad_norm": 4.818612098693848,
+      "learning_rate": 2.40625e-06,
+      "loss": 0.2991,
+      "step": 275
+    },
+    {
+      "epoch": 0.03,
+      "grad_norm": 4.171736717224121,
+      "learning_rate": 2.625e-06,
+      "loss": 0.2832,
+      "step": 300
+    },
+    {
+      "epoch": 0.0325,
+      "grad_norm": 5.217376708984375,
+      "learning_rate": 2.8437499999999997e-06,
+      "loss": 0.2873,
+      "step": 325
+    },
+    {
+      "epoch": 0.035,
+      "grad_norm": 4.671106815338135,
+      "learning_rate": 3.0624999999999995e-06,
+      "loss": 0.2957,
+      "step": 350
+    },
+    {
+      "epoch": 0.0375,
+      "grad_norm": 3.9175262451171875,
+      "learning_rate": 3.2812499999999997e-06,
+      "loss": 0.2634,
+      "step": 375
+    },
+    {
+      "epoch": 0.04,
+      "grad_norm": 4.647582054138184,
+      "learning_rate": 3.5e-06,
+      "loss": 0.2541,
+      "step": 400
+    },
+    {
+      "epoch": 0.0425,
+      "grad_norm": 3.25675368309021,
+      "learning_rate": 3.7187499999999998e-06,
+      "loss": 0.2244,
+      "step": 425
+    },
+    {
+      "epoch": 0.045,
+      "grad_norm": 4.597206115722656,
+      "learning_rate": 3.9375e-06,
+      "loss": 0.2492,
+      "step": 450
+    },
+    {
+      "epoch": 0.0475,
+      "grad_norm": 4.602332592010498,
+      "learning_rate": 4.156249999999999e-06,
+      "loss": 0.246,
+      "step": 475
+    },
+    {
+      "epoch": 0.05,
+      "grad_norm": 3.6419622898101807,
+      "learning_rate": 4.375e-06,
+      "loss": 0.2208,
+      "step": 500
+    },
+    {
+      "epoch": 0.05,
+      "eval_loss": 0.2592349350452423,
+      "eval_runtime": 4116.5906,
+      "eval_samples_per_second": 3.311,
+      "eval_steps_per_second": 0.414,
+      "eval_wer": 20.691487412510533,
+      "step": 500
+    },
+    {
+      "epoch": 0.0525,
+      "grad_norm": 3.6599488258361816,
+      "learning_rate": 4.363486842105263e-06,
+      "loss": 0.2539,
+      "step": 525
+    },
+    {
+      "epoch": 0.055,
+      "grad_norm": 3.6934616565704346,
+      "learning_rate": 4.351973684210526e-06,
+      "loss": 0.2313,
+      "step": 550
+    },
+    {
+      "epoch": 0.0575,
+      "grad_norm": 3.7546138763427734,
+      "learning_rate": 4.340460526315789e-06,
+      "loss": 0.2272,
+      "step": 575
+    },
+    {
+      "epoch": 0.06,
+      "grad_norm": 3.096877098083496,
+      "learning_rate": 4.3289473684210525e-06,
+      "loss": 0.2373,
+      "step": 600
+    },
+    {
+      "epoch": 0.0625,
+      "grad_norm": 3.572812795639038,
+      "learning_rate": 4.3174342105263155e-06,
+      "loss": 0.2285,
+      "step": 625
+    },
+    {
+      "epoch": 0.065,
+      "grad_norm": 3.3494396209716797,
+      "learning_rate": 4.3059210526315785e-06,
+      "loss": 0.2293,
+      "step": 650
+    },
+    {
+      "epoch": 0.0675,
+      "grad_norm": 3.5156869888305664,
+      "learning_rate": 4.2944078947368415e-06,
+      "loss": 0.2063,
+      "step": 675
+    },
+    {
+      "epoch": 0.07,
+      "grad_norm": 3.698807716369629,
+      "learning_rate": 4.282894736842105e-06,
+      "loss": 0.2113,
+      "step": 700
+    },
+    {
+      "epoch": 0.0725,
+      "grad_norm": 3.716585636138916,
+      "learning_rate": 4.271381578947368e-06,
+      "loss": 0.2055,
+      "step": 725
+    },
+    {
+      "epoch": 0.075,
+      "grad_norm": 4.204227924346924,
+      "learning_rate": 4.2598684210526314e-06,
+      "loss": 0.2114,
+      "step": 750
+    },
+    {
+      "epoch": 0.0775,
+      "grad_norm": 3.479562282562256,
+      "learning_rate": 4.2483552631578944e-06,
+      "loss": 0.2224,
+      "step": 775
+    },
+    {
+      "epoch": 0.08,
+      "grad_norm": 4.5203094482421875,
+      "learning_rate": 4.2368421052631575e-06,
+      "loss": 0.2523,
+      "step": 800
+    },
+    {
+      "epoch": 0.0825,
+      "grad_norm": 3.6081738471984863,
+      "learning_rate": 4.2253289473684205e-06,
+      "loss": 0.2383,
+      "step": 825
+    },
+    {
+      "epoch": 0.085,
+      "grad_norm": 3.2602758407592773,
+      "learning_rate": 4.2138157894736835e-06,
+      "loss": 0.1808,
+      "step": 850
+    },
+    {
+      "epoch": 0.0875,
+      "grad_norm": 3.6786868572235107,
+      "learning_rate": 4.202302631578947e-06,
+      "loss": 0.1747,
+      "step": 875
+    },
+    {
+      "epoch": 0.09,
+      "grad_norm": 3.1120803356170654,
+      "learning_rate": 4.19078947368421e-06,
+      "loss": 0.1662,
+      "step": 900
+    },
+    {
+      "epoch": 0.0925,
+      "grad_norm": 3.1962203979492188,
+      "learning_rate": 4.179276315789473e-06,
+      "loss": 0.1771,
+      "step": 925
+    },
+    {
+      "epoch": 0.095,
+      "grad_norm": 3.172363758087158,
+      "learning_rate": 4.167763157894736e-06,
+      "loss": 0.1751,
+      "step": 950
+    },
+    {
+      "epoch": 0.0975,
+      "grad_norm": 2.4304590225219727,
+      "learning_rate": 4.156249999999999e-06,
+      "loss": 0.1701,
+      "step": 975
+    },
+    {
+      "epoch": 0.1,
+      "grad_norm": 3.193345308303833,
+      "learning_rate": 4.144736842105262e-06,
+      "loss": 0.1489,
+      "step": 1000
+    },
+    {
+      "epoch": 0.1,
+      "eval_loss": 0.1971057653427124,
+      "eval_runtime": 4130.6867,
+      "eval_samples_per_second": 3.3,
+      "eval_steps_per_second": 0.413,
+      "eval_wer": 14.68265601524424,
+      "step": 1000
+    },
+    {
+      "epoch": 0.1025,
+      "grad_norm": 3.322065591812134,
+      "learning_rate": 4.133223684210526e-06,
+      "loss": 0.1701,
+      "step": 1025
+    },
+    {
+      "epoch": 0.105,
+      "grad_norm": 3.5462722778320312,
+      "learning_rate": 4.121710526315789e-06,
+      "loss": 0.1875,
+      "step": 1050
+    },
+    {
+      "epoch": 0.1075,
+      "grad_norm": 3.39326810836792,
+      "learning_rate": 4.110197368421052e-06,
+      "loss": 0.1506,
+      "step": 1075
+    },
+    {
+      "epoch": 0.11,
+      "grad_norm": 2.9165821075439453,
+      "learning_rate": 4.098684210526315e-06,
+      "loss": 0.1525,
+      "step": 1100
+    },
+    {
+      "epoch": 0.1125,
+      "grad_norm": 3.262007236480713,
+      "learning_rate": 4.087171052631578e-06,
+      "loss": 0.157,
+      "step": 1125
+    },
+    {
+      "epoch": 0.115,
+      "grad_norm": 2.4523119926452637,
+      "learning_rate": 4.075657894736842e-06,
+      "loss": 0.1416,
+      "step": 1150
+    },
+    {
+      "epoch": 0.1175,
+      "grad_norm": 2.7651798725128174,
+      "learning_rate": 4.064144736842105e-06,
+      "loss": 0.1527,
+      "step": 1175
+    },
+    {
+      "epoch": 0.12,
+      "grad_norm": 3.609523296356201,
+      "learning_rate": 4.052631578947368e-06,
+      "loss": 0.1822,
+      "step": 1200
+    },
+    {
+      "epoch": 0.1225,
+      "grad_norm": 3.8101985454559326,
+      "learning_rate": 4.041118421052631e-06,
+      "loss": 0.1703,
+      "step": 1225
+    },
+    {
+      "epoch": 0.125,
+      "grad_norm": 3.8921287059783936,
+      "learning_rate": 4.029605263157894e-06,
+      "loss": 0.1924,
+      "step": 1250
+    },
+    {
+      "epoch": 0.1275,
+      "grad_norm": 4.463279724121094,
+      "learning_rate": 4.018092105263157e-06,
+      "loss": 0.1818,
+      "step": 1275
+    },
+    {
+      "epoch": 0.13,
+      "grad_norm": 3.6556308269500732,
+      "learning_rate": 4.00657894736842e-06,
+      "loss": 0.1726,
+      "step": 1300
+    },
+    {
+      "epoch": 0.1325,
+      "grad_norm": 2.98067569732666,
+      "learning_rate": 3.995065789473683e-06,
+      "loss": 0.174,
+      "step": 1325
+    },
+    {
+      "epoch": 0.135,
+      "grad_norm": 2.8287429809570312,
+      "learning_rate": 3.983552631578947e-06,
+      "loss": 0.1631,
+      "step": 1350
+    },
+    {
+      "epoch": 0.1375,
+      "grad_norm": 2.6438794136047363,
+      "learning_rate": 3.97203947368421e-06,
+      "loss": 0.1475,
+      "step": 1375
+    },
+    {
+      "epoch": 0.14,
+      "grad_norm": 3.513123035430908,
+      "learning_rate": 3.960526315789473e-06,
+      "loss": 0.1457,
+      "step": 1400
+    },
+    {
+      "epoch": 0.1425,
+      "grad_norm": 2.4688916206359863,
+      "learning_rate": 3.949013157894737e-06,
+      "loss": 0.1375,
+      "step": 1425
+    },
+    {
+      "epoch": 0.145,
+      "grad_norm": 4.005943775177002,
+      "learning_rate": 3.9375e-06,
+      "loss": 0.1623,
+      "step": 1450
+    },
+    {
+      "epoch": 0.1475,
+      "grad_norm": 2.91786789894104,
+      "learning_rate": 3.925986842105263e-06,
+      "loss": 0.1701,
+      "step": 1475
+    },
+    {
+      "epoch": 0.15,
+      "grad_norm": 3.5332415103912354,
+      "learning_rate": 3.914473684210526e-06,
+      "loss": 0.1973,
+      "step": 1500
+    },
+    {
+      "epoch": 0.15,
+      "eval_loss": 0.17469166219234467,
+      "eval_runtime": 4132.0041,
+      "eval_samples_per_second": 3.299,
+      "eval_steps_per_second": 0.412,
+      "eval_wer": 12.377697973542453,
+      "step": 1500
+    },
+    {
+      "epoch": 0.1525,
+      "grad_norm": 4.05070686340332,
+      "learning_rate": 3.902960526315789e-06,
+      "loss": 0.1796,
+      "step": 1525
+    },
+    {
+      "epoch": 0.155,
+      "grad_norm": 2.989821195602417,
+      "learning_rate": 3.891447368421052e-06,
+      "loss": 0.1561,
+      "step": 1550
+    },
+    {
+      "epoch": 0.1575,
+      "grad_norm": 2.9603219032287598,
+      "learning_rate": 3.879934210526315e-06,
+      "loss": 0.1609,
+      "step": 1575
+    },
+    {
+      "epoch": 0.16,
+      "grad_norm": 3.2663583755493164,
+      "learning_rate": 3.868421052631579e-06,
+      "loss": 0.1833,
+      "step": 1600
+    },
+    {
+      "epoch": 0.1625,
+      "grad_norm": 3.459775686264038,
+      "learning_rate": 3.856907894736842e-06,
+      "loss": 0.1727,
+      "step": 1625
+    },
+    {
+      "epoch": 0.165,
+      "grad_norm": 3.427720069885254,
+      "learning_rate": 3.845394736842105e-06,
+      "loss": 0.181,
+      "step": 1650
+    },
+    {
+      "epoch": 0.1675,
+      "grad_norm": 4.471118450164795,
+      "learning_rate": 3.833881578947368e-06,
+      "loss": 0.1536,
+      "step": 1675
+    },
+    {
+      "epoch": 0.17,
+      "grad_norm": 3.1428306102752686,
+      "learning_rate": 3.822368421052632e-06,
+      "loss": 0.1372,
+      "step": 1700
+    },
+    {
+      "epoch": 1.0021,
+      "grad_norm": 2.8270132541656494,
+      "learning_rate": 3.8108552631578944e-06,
+      "loss": 0.1454,
+      "step": 1725
+    },
+    {
+      "epoch": 1.0046,
+      "grad_norm": 3.0873589515686035,
+      "learning_rate": 3.799342105263158e-06,
+      "loss": 0.1303,
+      "step": 1750
+    },
+    {
+      "epoch": 1.0071,
+      "grad_norm": 3.187711000442505,
+      "learning_rate": 3.787828947368421e-06,
+      "loss": 0.1383,
+      "step": 1775
+    },
+    {
+      "epoch": 1.0096,
+      "grad_norm": 3.1710643768310547,
+      "learning_rate": 3.776315789473684e-06,
+      "loss": 0.1626,
+      "step": 1800
+    },
+    {
+      "epoch": 1.0121,
+      "grad_norm": 3.4516818523406982,
+      "learning_rate": 3.7648026315789473e-06,
+      "loss": 0.1405,
+      "step": 1825
+    },
+    {
+      "epoch": 1.0146,
+      "grad_norm": 2.930408000946045,
+      "learning_rate": 3.7532894736842103e-06,
+      "loss": 0.143,
+      "step": 1850
+    },
+    {
+      "epoch": 1.0171,
+      "grad_norm": 3.066941261291504,
+      "learning_rate": 3.7417763157894733e-06,
+      "loss": 0.1437,
+      "step": 1875
+    },
+    {
+      "epoch": 1.0196,
+      "grad_norm": 3.389916181564331,
+      "learning_rate": 3.7302631578947363e-06,
+      "loss": 0.1289,
+      "step": 1900
+    },
+    {
+      "epoch": 1.0221,
+      "grad_norm": 3.048574209213257,
+      "learning_rate": 3.7187499999999998e-06,
+      "loss": 0.1415,
+      "step": 1925
+    },
+    {
+      "epoch": 1.0246,
+      "grad_norm": 2.5267295837402344,
+      "learning_rate": 3.7072368421052628e-06,
+      "loss": 0.1386,
+      "step": 1950
+    },
+    {
+      "epoch": 1.0271,
+      "grad_norm": 3.151757001876831,
+      "learning_rate": 3.6957236842105258e-06,
+      "loss": 0.1436,
+      "step": 1975
+    },
+    {
+      "epoch": 1.0296,
+      "grad_norm": 3.629039764404297,
+      "learning_rate": 3.684210526315789e-06,
+      "loss": 0.1353,
+      "step": 2000
+    },
+    {
+      "epoch": 1.0296,
+      "eval_loss": 0.1527385264635086,
+      "eval_runtime": 4116.9756,
+      "eval_samples_per_second": 3.311,
+      "eval_steps_per_second": 0.414,
+      "eval_wer": 10.719520685990693,
+      "step": 2000
+    },
+    {
+      "epoch": 1.0321,
+      "grad_norm": 1.8788173198699951,
+      "learning_rate": 3.6726973684210522e-06,
+      "loss": 0.1322,
+      "step": 2025
+    },
+    {
+      "epoch": 1.0346,
+      "grad_norm": 2.587233066558838,
+      "learning_rate": 3.6611842105263157e-06,
+      "loss": 0.1176,
+      "step": 2050
+    },
+    {
+      "epoch": 1.0371,
+      "grad_norm": 4.001532077789307,
+      "learning_rate": 3.6496710526315787e-06,
+      "loss": 0.1233,
+      "step": 2075
+    },
+    {
+      "epoch": 1.0396,
+      "grad_norm": 3.3947739601135254,
+      "learning_rate": 3.638157894736842e-06,
+      "loss": 0.1188,
+      "step": 2100
+    },
+    {
+      "epoch": 1.0421,
+      "grad_norm": 3.4743120670318604,
+      "learning_rate": 3.626644736842105e-06,
+      "loss": 0.1318,
+      "step": 2125
+    },
+    {
+      "epoch": 1.0446,
+      "grad_norm": 2.9288718700408936,
+      "learning_rate": 3.615131578947368e-06,
+      "loss": 0.1224,
+      "step": 2150
+    },
+    {
+      "epoch": 1.0471,
+      "grad_norm": 2.6081368923187256,
+      "learning_rate": 3.603618421052631e-06,
+      "loss": 0.1232,
+      "step": 2175
+    },
+    {
+      "epoch": 1.0496,
+      "grad_norm": 2.4068429470062256,
+      "learning_rate": 3.5921052631578946e-06,
+      "loss": 0.1073,
+      "step": 2200
+    },
+    {
+      "epoch": 1.0521,
+      "grad_norm": 3.049074411392212,
+      "learning_rate": 3.5805921052631576e-06,
+      "loss": 0.1071,
+      "step": 2225
+    },
+    {
+      "epoch": 1.0546,
+      "grad_norm": 2.0809032917022705,
+      "learning_rate": 3.5690789473684206e-06,
+      "loss": 0.1217,
+      "step": 2250
+    },
+    {
+      "epoch": 1.0571,
+      "grad_norm": 3.0854332447052,
+      "learning_rate": 3.5575657894736836e-06,
+      "loss": 0.1332,
+      "step": 2275
+    },
+    {
+      "epoch": 1.0596,
+      "grad_norm": 3.580145835876465,
+      "learning_rate": 3.546052631578947e-06,
+      "loss": 0.131,
+      "step": 2300
+    },
+    {
+      "epoch": 1.0621,
+      "grad_norm": 3.8924479484558105,
+      "learning_rate": 3.53453947368421e-06,
+      "loss": 0.136,
+      "step": 2325
+    },
+    {
+      "epoch": 1.0646,
+      "grad_norm": 2.8398871421813965,
+      "learning_rate": 3.523026315789473e-06,
+      "loss": 0.1081,
+      "step": 2350
+    },
+    {
+      "epoch": 1.0671,
+      "grad_norm": 3.007026195526123,
+      "learning_rate": 3.511513157894737e-06,
+      "loss": 0.1115,
+      "step": 2375
+    },
+    {
+      "epoch": 1.0695999999999999,
+      "grad_norm": 1.5712552070617676,
+      "learning_rate": 3.5e-06,
+      "loss": 0.1183,
+      "step": 2400
+    },
+    {
+      "epoch": 1.0721,
+      "grad_norm": 3.844963312149048,
+      "learning_rate": 3.488486842105263e-06,
+      "loss": 0.113,
+      "step": 2425
+    },
+    {
+      "epoch": 1.0746,
+      "grad_norm": 2.8939759731292725,
+      "learning_rate": 3.476973684210526e-06,
+      "loss": 0.1115,
+      "step": 2450
+    },
+    {
+      "epoch": 1.0771,
+      "grad_norm": 1.8150537014007568,
+      "learning_rate": 3.4654605263157894e-06,
+      "loss": 0.1117,
+      "step": 2475
+    },
+    {
+      "epoch": 1.0796000000000001,
+      "grad_norm": 2.839418649673462,
+      "learning_rate": 3.4539473684210524e-06,
+      "loss": 0.1065,
+      "step": 2500
+    },
+    {
+      "epoch": 1.0796000000000001,
+      "eval_loss": 0.1456422209739685,
+      "eval_runtime": 4133.4016,
+      "eval_samples_per_second": 3.298,
+      "eval_steps_per_second": 0.412,
+      "eval_wer": 9.869361281102277,
+      "step": 2500
+    },
+    {
+      "epoch": 1.0821,
+      "grad_norm": 3.4274985790252686,
+      "learning_rate": 3.4424342105263154e-06,
+      "loss": 0.1067,
+      "step": 2525
+    },
+    {
+      "epoch": 1.0846,
+      "grad_norm": 2.2946057319641113,
+      "learning_rate": 3.4309210526315784e-06,
+      "loss": 0.1038,
+      "step": 2550
+    },
+    {
+      "epoch": 1.0871,
+      "grad_norm": 2.5364551544189453,
+      "learning_rate": 3.419407894736842e-06,
+      "loss": 0.1073,
+      "step": 2575
+    },
+    {
+      "epoch": 1.0896,
+      "grad_norm": 2.9779515266418457,
+      "learning_rate": 3.4083552631578944e-06,
+      "loss": 0.1067,
+      "step": 2600
+    },
+    {
+      "epoch": 1.0921,
+      "grad_norm": 2.502685308456421,
+      "learning_rate": 3.3968421052631574e-06,
+      "loss": 0.1229,
+      "step": 2625
+    },
+    {
+      "epoch": 1.0946,
+      "grad_norm": 2.181756019592285,
+      "learning_rate": 3.3853289473684205e-06,
+      "loss": 0.1071,
+      "step": 2650
+    },
+    {
+      "epoch": 1.0971,
+      "grad_norm": 2.428738594055176,
+      "learning_rate": 3.3738157894736843e-06,
+      "loss": 0.101,
+      "step": 2675
+    },
+    {
+      "epoch": 1.0996,
+      "grad_norm": 3.797952651977539,
+      "learning_rate": 3.3623026315789473e-06,
+      "loss": 0.1198,
+      "step": 2700
+    },
+    {
+      "epoch": 1.1021,
+      "grad_norm": 2.9902758598327637,
+      "learning_rate": 3.3507894736842103e-06,
+      "loss": 0.1013,
+      "step": 2725
+    },
+    {
+      "epoch": 1.1046,
+      "grad_norm": 3.0514307022094727,
+      "learning_rate": 3.3392763157894734e-06,
+      "loss": 0.1075,
+      "step": 2750
+    },
+    {
+      "epoch": 1.1071,
+      "grad_norm": 3.2877554893493652,
+      "learning_rate": 3.327763157894737e-06,
+      "loss": 0.1059,
+      "step": 2775
+    },
+    {
+      "epoch": 1.1096,
+      "grad_norm": 2.3952691555023193,
+      "learning_rate": 3.31625e-06,
+      "loss": 0.0926,
+      "step": 2800
+    },
+    {
+      "epoch": 1.1121,
+      "grad_norm": 2.2840464115142822,
+      "learning_rate": 3.304736842105263e-06,
+      "loss": 0.1048,
+      "step": 2825
+    },
+    {
+      "epoch": 1.1146,
+      "grad_norm": 2.7062416076660156,
+      "learning_rate": 3.293223684210526e-06,
+      "loss": 0.1049,
+      "step": 2850
+    },
+    {
+      "epoch": 1.1171,
+      "grad_norm": 2.971315860748291,
+      "learning_rate": 3.2817105263157893e-06,
+      "loss": 0.1073,
+      "step": 2875
+    },
+    {
+      "epoch": 1.1196,
+      "grad_norm": 2.8689844608306885,
+      "learning_rate": 3.2701973684210523e-06,
+      "loss": 0.1141,
+      "step": 2900
+    },
+    {
+      "epoch": 1.1221,
+      "grad_norm": 3.6150734424591064,
+      "learning_rate": 3.2586842105263153e-06,
+      "loss": 0.1066,
+      "step": 2925
+    },
+    {
+      "epoch": 1.1246,
+      "grad_norm": 2.3004024028778076,
+      "learning_rate": 3.2471710526315783e-06,
+      "loss": 0.1248,
+      "step": 2950
+    },
+    {
+      "epoch": 1.1271,
+      "grad_norm": 2.5995240211486816,
+      "learning_rate": 3.2356578947368417e-06,
+      "loss": 0.0972,
+      "step": 2975
+    },
+    {
+      "epoch": 1.1296,
+      "grad_norm": 2.957960367202759,
+      "learning_rate": 3.224144736842105e-06,
+      "loss": 0.106,
+      "step": 3000
+    },
+    {
+      "epoch": 1.1296,
+      "eval_loss": 0.13624447584152222,
+      "eval_runtime": 4123.4662,
+      "eval_samples_per_second": 3.305,
+      "eval_steps_per_second": 0.413,
+      "eval_wer": 9.09249148008355,
+      "step": 3000
+    },
+    {
+      "epoch": 1.1320999999999999,
+      "grad_norm": 2.653007984161377,
+      "learning_rate": 3.212631578947368e-06,
+      "loss": 0.1083,
+      "step": 3025
+    },
+    {
+      "epoch": 1.1346,
+      "grad_norm": 2.6895744800567627,
+      "learning_rate": 3.2011184210526316e-06,
+      "loss": 0.1119,
+      "step": 3050
+    },
+    {
+      "epoch": 1.1371,
+      "grad_norm": 2.1507463455200195,
+      "learning_rate": 3.1896052631578946e-06,
+      "loss": 0.0944,
+      "step": 3075
+    },
+    {
+      "epoch": 1.1396,
+      "grad_norm": 3.61063289642334,
+      "learning_rate": 3.1780921052631576e-06,
+      "loss": 0.095,
+      "step": 3100
+    },
+    {
+      "epoch": 1.1421000000000001,
+      "grad_norm": 2.570584774017334,
+      "learning_rate": 3.1665789473684206e-06,
+      "loss": 0.1076,
+      "step": 3125
+    },
+    {
+      "epoch": 1.1446,
+      "grad_norm": 3.05507230758667,
+      "learning_rate": 3.155065789473684e-06,
+      "loss": 0.1175,
+      "step": 3150
+    },
+    {
+      "epoch": 1.1471,
+      "grad_norm": 2.82817006111145,
+      "learning_rate": 3.143552631578947e-06,
+      "loss": 0.0965,
+      "step": 3175
+    },
+    {
+      "epoch": 1.1496,
+      "grad_norm": 2.336517572402954,
+      "learning_rate": 3.13203947368421e-06,
+      "loss": 0.0955,
+      "step": 3200
+    },
+    {
+      "epoch": 1.1521,
+      "grad_norm": 3.8640036582946777,
+      "learning_rate": 3.120526315789473e-06,
+      "loss": 0.1044,
+      "step": 3225
+    },
+    {
+      "epoch": 1.1546,
+      "grad_norm": 3.7205588817596436,
+      "learning_rate": 3.1090131578947366e-06,
+      "loss": 0.1013,
+      "step": 3250
+    },
+    {
+      "epoch": 1.1571,
+      "grad_norm": 2.1962900161743164,
+      "learning_rate": 3.0974999999999996e-06,
+      "loss": 0.0978,
+      "step": 3275
+    },
+    {
+      "epoch": 1.1596,
+      "grad_norm": 3.3310599327087402,
+      "learning_rate": 3.0859868421052626e-06,
+      "loss": 0.1089,
+      "step": 3300
+    },
+    {
+      "epoch": 1.1621,
+      "grad_norm": 2.699566602706909,
+      "learning_rate": 3.074473684210526e-06,
+      "loss": 0.1078,
+      "step": 3325
+    },
+    {
+      "epoch": 1.1646,
+      "grad_norm": 3.79370379447937,
+      "learning_rate": 3.0629605263157894e-06,
+      "loss": 0.1118,
+      "step": 3350
+    },
+    {
+      "epoch": 1.1671,
+      "grad_norm": 1.9741384983062744,
+      "learning_rate": 3.0514473684210525e-06,
+      "loss": 0.1119,
+      "step": 3375
+    },
+    {
+      "epoch": 1.1696,
+      "grad_norm": 2.29034686088562,
+      "learning_rate": 3.0399342105263155e-06,
+      "loss": 0.1015,
+      "step": 3400
+    },
+    {
+      "epoch": 2.0017,
+      "grad_norm": 2.011443853378296,
+      "learning_rate": 3.028421052631579e-06,
+      "loss": 0.0708,
+      "step": 3425
+    },
+    {
+      "epoch": 2.0042,
+      "grad_norm": 1.2196134328842163,
+      "learning_rate": 3.016907894736842e-06,
+      "loss": 0.0668,
+      "step": 3450
+    },
+    {
+      "epoch": 2.0067,
+      "grad_norm": 2.863933563232422,
+      "learning_rate": 3.005394736842105e-06,
+      "loss": 0.0673,
+      "step": 3475
+    },
+    {
+      "epoch": 2.0092,
+      "grad_norm": 1.9341013431549072,
+      "learning_rate": 2.9938815789473684e-06,
+      "loss": 0.0718,
+      "step": 3500
+    },
+    {
+      "epoch": 2.0092,
+      "eval_loss": 0.13255682587623596,
+      "eval_runtime": 4133.4892,
+      "eval_samples_per_second": 3.297,
+      "eval_steps_per_second": 0.412,
+      "eval_wer": 8.542819451060867,
+      "step": 3500
+    },
+    {
+      "epoch": 2.0117,
+      "grad_norm": 2.795734405517578,
+      "learning_rate": 2.9823684210526314e-06,
+      "loss": 0.071,
+      "step": 3525
+    },
+    {
+      "epoch": 2.0142,
+      "grad_norm": 1.982479214668274,
+      "learning_rate": 2.9708552631578944e-06,
+      "loss": 0.0629,
+      "step": 3550
+    },
+    {
+      "epoch": 2.0167,
+      "grad_norm": 3.168161630630493,
+      "learning_rate": 2.9593421052631574e-06,
+      "loss": 0.0593,
+      "step": 3575
+    },
+    {
+      "epoch": 2.0192,
+      "grad_norm": 2.259500741958618,
+      "learning_rate": 2.947828947368421e-06,
+      "loss": 0.0696,
+      "step": 3600
+    },
+    {
+      "epoch": 2.0217,
+      "grad_norm": 2.1626062393188477,
+      "learning_rate": 2.936315789473684e-06,
+      "loss": 0.0687,
+      "step": 3625
+    },
+    {
+      "epoch": 2.0242,
+      "grad_norm": 2.4419946670532227,
+      "learning_rate": 2.924802631578947e-06,
+      "loss": 0.0686,
+      "step": 3650
+    },
+    {
+      "epoch": 2.0267,
+      "grad_norm": 2.445758819580078,
+      "learning_rate": 2.9132894736842103e-06,
+      "loss": 0.0631,
+      "step": 3675
+    },
+    {
+      "epoch": 2.0292,
+      "grad_norm": 2.614476442337036,
+      "learning_rate": 2.9017763157894737e-06,
+      "loss": 0.0647,
+      "step": 3700
+    },
+    {
+      "epoch": 2.0317,
+      "grad_norm": 1.4166672229766846,
+      "learning_rate": 2.8902631578947367e-06,
+      "loss": 0.0653,
+      "step": 3725
+    },
+    {
+      "epoch": 2.0342,
+      "grad_norm": 1.8435245752334595,
+      "learning_rate": 2.8787499999999998e-06,
+      "loss": 0.0567,
+      "step": 3750
+    },
+    {
+      "epoch": 2.0367,
+      "grad_norm": 1.8179950714111328,
+      "learning_rate": 2.867236842105263e-06,
+      "loss": 0.0636,
+      "step": 3775
+    },
+    {
+      "epoch": 2.0392,
+      "grad_norm": 1.487122893333435,
+      "learning_rate": 2.855723684210526e-06,
+      "loss": 0.0598,
+      "step": 3800
+    },
+    {
+      "epoch": 2.0417,
+      "grad_norm": 2.9211690425872803,
+      "learning_rate": 2.8442105263157892e-06,
+      "loss": 0.0599,
+      "step": 3825
+    },
+    {
+      "epoch": 2.0442,
+      "grad_norm": 2.5018093585968018,
+      "learning_rate": 2.8326973684210522e-06,
+      "loss": 0.055,
+      "step": 3850
+    },
+    {
+      "epoch": 2.0467,
+      "grad_norm": 2.186502456665039,
+      "learning_rate": 2.8211842105263157e-06,
+      "loss": 0.0533,
+      "step": 3875
+    },
+    {
+      "epoch": 2.0492,
+      "grad_norm": 1.039233922958374,
+      "learning_rate": 2.8096710526315787e-06,
+      "loss": 0.0514,
+      "step": 3900
+    },
+    {
+      "epoch": 2.0517,
+      "grad_norm": 1.871267557144165,
+      "learning_rate": 2.7981578947368417e-06,
+      "loss": 0.0512,
+      "step": 3925
+    },
+    {
+      "epoch": 2.0542,
+      "grad_norm": 2.0849483013153076,
+      "learning_rate": 2.7866447368421047e-06,
+      "loss": 0.0579,
+      "step": 3950
+    },
+    {
+      "epoch": 2.0567,
+      "grad_norm": 1.6887531280517578,
+      "learning_rate": 2.775131578947368e-06,
+      "loss": 0.0575,
+      "step": 3975
+    },
+    {
+      "epoch": 2.0592,
+      "grad_norm": 1.88097083568573,
+      "learning_rate": 2.763618421052631e-06,
+      "loss": 0.0683,
+      "step": 4000
+    },
+    {
+      "epoch": 2.0592,
+      "eval_loss": 0.1342601627111435,
+      "eval_runtime": 4125.8373,
+      "eval_samples_per_second": 3.304,
+      "eval_steps_per_second": 0.413,
+      "eval_wer": 8.485103888013485,
+      "step": 4000
+    },
+    {
+      "epoch": 2.0617,
+      "grad_norm": 2.1877427101135254,
+      "learning_rate": 2.7521052631578946e-06,
+      "loss": 0.0614,
+      "step": 4025
+    },
+    {
+      "epoch": 2.0642,
+      "grad_norm": 1.4176368713378906,
+      "learning_rate": 2.740592105263158e-06,
+      "loss": 0.0559,
+      "step": 4050
+    },
+    {
+      "epoch": 2.0667,
+      "grad_norm": 2.4362101554870605,
+      "learning_rate": 2.729078947368421e-06,
+      "loss": 0.0593,
+      "step": 4075
+    },
+    {
+      "epoch": 2.0692,
+      "grad_norm": 1.8663033246994019,
+      "learning_rate": 2.717565789473684e-06,
+      "loss": 0.0591,
+      "step": 4100
+    },
+    {
+      "epoch": 2.0717,
+      "grad_norm": 1.627626657485962,
+      "learning_rate": 2.706052631578947e-06,
+      "loss": 0.0637,
+      "step": 4125
+    },
+    {
+      "epoch": 2.0742,
+      "grad_norm": 2.2072463035583496,
+      "learning_rate": 2.6945394736842105e-06,
+      "loss": 0.0571,
+      "step": 4150
+    },
+    {
+      "epoch": 2.0767,
+      "grad_norm": 1.7411611080169678,
+      "learning_rate": 2.6830263157894735e-06,
+      "loss": 0.0588,
+      "step": 4175
+    },
+    {
+      "epoch": 2.0792,
+      "grad_norm": 1.324000358581543,
+      "learning_rate": 2.6715131578947365e-06,
+      "loss": 0.0482,
+      "step": 4200
+    },
+    {
+      "epoch": 2.0817,
+      "grad_norm": 1.4138795137405396,
+      "learning_rate": 2.6599999999999995e-06,
+      "loss": 0.0477,
+      "step": 4225
+    },
+    {
+      "epoch": 2.0842,
+      "grad_norm": 2.403547763824463,
+      "learning_rate": 2.648486842105263e-06,
+      "loss": 0.0558,
+      "step": 4250
+    },
+    {
+      "epoch": 2.0867,
+      "grad_norm": 1.3718703985214233,
+      "learning_rate": 2.636973684210526e-06,
+      "loss": 0.0546,
+      "step": 4275
+    },
+    {
+      "epoch": 2.0892,
+      "grad_norm": 2.296445369720459,
+      "learning_rate": 2.625460526315789e-06,
+      "loss": 0.0554,
+      "step": 4300
+    },
+    {
+      "epoch": 2.0917,
+      "grad_norm": 2.3471312522888184,
+      "learning_rate": 2.613947368421052e-06,
+      "loss": 0.051,
+      "step": 4325
+    },
+    {
+      "epoch": 2.0942,
+      "grad_norm": 1.6061975955963135,
+      "learning_rate": 2.602434210526316e-06,
+      "loss": 0.0548,
+      "step": 4350
+    },
+    {
+      "epoch": 2.0967,
+      "grad_norm": 2.979126453399658,
+      "learning_rate": 2.590921052631579e-06,
+      "loss": 0.0492,
+      "step": 4375
+    },
+    {
+      "epoch": 2.0992,
+      "grad_norm": 1.7963169813156128,
+      "learning_rate": 2.579407894736842e-06,
+      "loss": 0.0514,
+      "step": 4400
+    },
+    {
+      "epoch": 2.1017,
+      "grad_norm": 2.4996039867401123,
+      "learning_rate": 2.5678947368421053e-06,
+      "loss": 0.0399,
+      "step": 4425
+    },
+    {
+      "epoch": 2.1042,
+      "grad_norm": 1.7498191595077515,
+      "learning_rate": 2.5563815789473683e-06,
+      "loss": 0.0522,
+      "step": 4450
+    },
+    {
+      "epoch": 2.1067,
+      "grad_norm": 1.413889765739441,
+      "learning_rate": 2.5448684210526313e-06,
+      "loss": 0.0517,
+      "step": 4475
+    },
+    {
+      "epoch": 2.1092,
+      "grad_norm": 2.0956978797912598,
+      "learning_rate": 2.5333552631578943e-06,
+      "loss": 0.0482,
+      "step": 4500
+    },
+    {
+      "epoch": 2.1092,
+      "eval_loss": 0.1336347758769989,
+      "eval_runtime": 4119.9162,
+      "eval_samples_per_second": 3.308,
+      "eval_steps_per_second": 0.414,
+      "eval_wer": 8.104914067939463,
+      "step": 4500
+    },
+    {
+      "epoch": 2.1117,
+      "grad_norm": 3.138298749923706,
+      "learning_rate": 2.5218421052631578e-06,
+      "loss": 0.0568,
+      "step": 4525
+    },
+    {
+      "epoch": 2.1142,
+      "grad_norm": 1.4262772798538208,
+      "learning_rate": 2.510328947368421e-06,
+      "loss": 0.0475,
+      "step": 4550
+    },
+    {
+      "epoch": 2.1167,
+      "grad_norm": 3.3500139713287354,
+      "learning_rate": 2.498815789473684e-06,
+      "loss": 0.0474,
+      "step": 4575
+    },
+    {
+      "epoch": 2.1192,
+      "grad_norm": 4.509912014007568,
+      "learning_rate": 2.4873026315789472e-06,
+      "loss": 0.0586,
+      "step": 4600
+    },
+    {
+      "epoch": 2.1217,
+      "grad_norm": 2.1386468410491943,
+      "learning_rate": 2.4757894736842102e-06,
+      "loss": 0.062,
+      "step": 4625
+    },
+    {
+      "epoch": 2.1242,
+      "grad_norm": 1.1121129989624023,
+      "learning_rate": 2.4642763157894733e-06,
+      "loss": 0.0563,
+      "step": 4650
+    },
+    {
+      "epoch": 2.1267,
+      "grad_norm": 1.677538514137268,
+      "learning_rate": 2.4527631578947363e-06,
+      "loss": 0.0519,
+      "step": 4675
+    },
+    {
+      "epoch": 2.1292,
+      "grad_norm": 1.579513430595398,
+      "learning_rate": 2.44125e-06,
+      "loss": 0.0544,
+      "step": 4700
+    },
+    {
+      "epoch": 2.1317,
+      "grad_norm": 2.1100914478302,
+      "learning_rate": 2.429736842105263e-06,
+      "loss": 0.0578,
+      "step": 4725
+    },
+    {
+      "epoch": 2.1342,
+      "grad_norm": 1.779682993888855,
+      "learning_rate": 2.418223684210526e-06,
+      "loss": 0.0486,
+      "step": 4750
+    },
+    {
+      "epoch": 2.1367,
+      "grad_norm": 1.7443439960479736,
+      "learning_rate": 2.4067105263157896e-06,
+      "loss": 0.0534,
+      "step": 4775
+    },
+    {
+      "epoch": 2.1391999999999998,
+      "grad_norm": 1.9388935565948486,
+      "learning_rate": 2.3951973684210526e-06,
+      "loss": 0.0516,
+      "step": 4800
+    },
+    {
+      "epoch": 2.1417,
+      "grad_norm": 1.82517409324646,
+      "learning_rate": 2.3836842105263156e-06,
+      "loss": 0.0451,
+      "step": 4825
+    },
+    {
+      "epoch": 2.1442,
+      "grad_norm": 1.9101967811584473,
+      "learning_rate": 2.3721710526315786e-06,
+      "loss": 0.0546,
+      "step": 4850
+    },
+    {
+      "epoch": 2.1467,
+      "grad_norm": 1.7242915630340576,
+      "learning_rate": 2.360657894736842e-06,
+      "loss": 0.0495,
+      "step": 4875
+    },
+    {
+      "epoch": 2.1492,
+      "grad_norm": 1.9127079248428345,
+      "learning_rate": 2.349144736842105e-06,
+      "loss": 0.0465,
+      "step": 4900
+    },
+    {
+      "epoch": 2.1517,
+      "grad_norm": 2.7716519832611084,
+      "learning_rate": 2.337631578947368e-06,
+      "loss": 0.0493,
+      "step": 4925
+    },
+    {
+      "epoch": 2.1542,
+      "grad_norm": 3.141706705093384,
+      "learning_rate": 2.326118421052631e-06,
+      "loss": 0.046,
+      "step": 4950
+    },
+    {
+      "epoch": 2.1567,
+      "grad_norm": 2.2624270915985107,
+      "learning_rate": 2.3146052631578945e-06,
+      "loss": 0.0522,
+      "step": 4975
+    },
+    {
+      "epoch": 2.1592000000000002,
+      "grad_norm": 1.2777652740478516,
+      "learning_rate": 2.3030921052631575e-06,
+      "loss": 0.0548,
+      "step": 5000
+    },
+    {
+      "epoch": 2.1592000000000002,
+      "eval_loss": 0.13162237405776978,
+      "eval_runtime": 4127.2085,
+      "eval_samples_per_second": 3.302,
+      "eval_steps_per_second": 0.413,
+      "eval_wer": 7.9244384184103485,
+      "step": 5000
+    },
+    {
+      "epoch": 2.1617,
+      "grad_norm": 2.106818675994873,
+      "learning_rate": 2.2915789473684206e-06,
+      "loss": 0.0527,
+      "step": 5025
+    },
+    {
+      "epoch": 2.1642,
+      "grad_norm": 2.2705554962158203,
+      "learning_rate": 2.2800657894736844e-06,
+      "loss": 0.0483,
+      "step": 5050
+    },
+    {
+      "epoch": 2.1667,
+      "grad_norm": 1.5468271970748901,
+      "learning_rate": 2.2685526315789474e-06,
+      "loss": 0.0516,
+      "step": 5075
+    },
+    {
+      "epoch": 2.1692,
+      "grad_norm": 2.0331270694732666,
+      "learning_rate": 2.2570394736842104e-06,
+      "loss": 0.0551,
+      "step": 5100
+    },
+    {
+      "epoch": 3.0013,
+      "grad_norm": 1.107423186302185,
+      "learning_rate": 2.2455263157894734e-06,
+      "loss": 0.0434,
+      "step": 5125
+    },
+    {
+      "epoch": 3.0038,
+      "grad_norm": 3.9103100299835205,
+      "learning_rate": 2.234013157894737e-06,
+      "loss": 0.0362,
+      "step": 5150
+    },
+    {
+      "epoch": 3.0063,
+      "grad_norm": 1.193088173866272,
+      "learning_rate": 2.2225e-06,
+      "loss": 0.0327,
+      "step": 5175
+    },
+    {
+      "epoch": 3.0088,
+      "grad_norm": 1.0432852506637573,
+      "learning_rate": 2.210986842105263e-06,
+      "loss": 0.0326,
+      "step": 5200
+    },
+    {
+      "epoch": 3.0113,
+      "grad_norm": 0.7116020917892456,
+      "learning_rate": 2.199473684210526e-06,
+      "loss": 0.0296,
+      "step": 5225
+    },
+    {
+      "epoch": 3.0138,
+      "grad_norm": 2.009617805480957,
+      "learning_rate": 2.1879605263157894e-06,
+      "loss": 0.0367,
+      "step": 5250
+    },
+    {
+      "epoch": 3.0163,
+      "grad_norm": 1.9047244787216187,
+      "learning_rate": 2.1764473684210524e-06,
+      "loss": 0.0347,
+      "step": 5275
+    },
+    {
+      "epoch": 3.0188,
+      "grad_norm": 1.630439043045044,
+      "learning_rate": 2.164934210526316e-06,
+      "loss": 0.0291,
+      "step": 5300
+    },
+    {
+      "epoch": 3.0213,
+      "grad_norm": 1.4158824682235718,
+      "learning_rate": 2.153421052631579e-06,
+      "loss": 0.0321,
+      "step": 5325
+    },
+    {
+      "epoch": 3.0238,
+      "grad_norm": 1.2792794704437256,
+      "learning_rate": 2.141907894736842e-06,
+      "loss": 0.0338,
+      "step": 5350
+    },
+    {
+      "epoch": 3.0263,
+      "grad_norm": 1.6505346298217773,
+      "learning_rate": 2.1303947368421053e-06,
+      "loss": 0.0348,
+      "step": 5375
+    },
+    {
+      "epoch": 3.0288,
+      "grad_norm": 1.5343618392944336,
+      "learning_rate": 2.1188815789473683e-06,
+      "loss": 0.0318,
+      "step": 5400
+    },
+    {
+      "epoch": 3.0313,
+      "grad_norm": 1.8325493335723877,
+      "learning_rate": 2.1073684210526313e-06,
+      "loss": 0.0333,
+      "step": 5425
+    },
+    {
+      "epoch": 3.0338,
+      "grad_norm": 1.7224900722503662,
+      "learning_rate": 2.0958552631578943e-06,
+      "loss": 0.0322,
+      "step": 5450
+    },
+    {
+      "epoch": 3.0362999999999998,
+      "grad_norm": 1.3443737030029297,
+      "learning_rate": 2.0843421052631577e-06,
+      "loss": 0.0304,
+      "step": 5475
+    },
+    {
+      "epoch": 3.0388,
+      "grad_norm": 1.3260679244995117,
+      "learning_rate": 2.0728289473684207e-06,
+      "loss": 0.0282,
+      "step": 5500
+    },
+    {
+      "epoch": 3.0388,
+      "eval_loss": 0.13909843564033508,
+      "eval_runtime": 4135.2147,
+      "eval_samples_per_second": 3.296,
+      "eval_steps_per_second": 0.412,
+      "eval_wer": 7.8181684927992965,
+      "step": 5500
+    },
+    {
+      "epoch": 3.0413,
+      "grad_norm": 1.0075204372406006,
+      "learning_rate": 2.061315789473684e-06,
+      "loss": 0.0308,
+      "step": 5525
+    },
+    {
+      "epoch": 3.0438,
+      "grad_norm": 1.0206842422485352,
+      "learning_rate": 2.049802631578947e-06,
+      "loss": 0.0306,
+      "step": 5550
+    },
+    {
+      "epoch": 3.0463,
+      "grad_norm": 1.411301851272583,
+      "learning_rate": 2.03828947368421e-06,
+      "loss": 0.0243,
+      "step": 5575
+    },
+    {
+      "epoch": 3.0488,
+      "grad_norm": 0.959862470626831,
+      "learning_rate": 2.0267763157894732e-06,
+      "loss": 0.0272,
+      "step": 5600
+    },
+    {
+      "epoch": 3.0513,
+      "grad_norm": 2.2999842166900635,
+      "learning_rate": 2.0152631578947367e-06,
+      "loss": 0.0246,
+      "step": 5625
+    },
+    {
+      "epoch": 3.0538,
+      "grad_norm": 2.890066146850586,
+      "learning_rate": 2.00375e-06,
+      "loss": 0.0299,
+      "step": 5650
+    },
+    {
+      "epoch": 3.0563,
+      "grad_norm": 1.7101376056671143,
+      "learning_rate": 1.992236842105263e-06,
+      "loss": 0.0322,
+      "step": 5675
+    },
+    {
+      "epoch": 3.0588,
+      "grad_norm": 1.531943917274475,
+      "learning_rate": 1.980723684210526e-06,
+      "loss": 0.0345,
+      "step": 5700
+    },
+    {
+      "epoch": 3.0613,
+      "grad_norm": 1.6334413290023804,
+      "learning_rate": 1.969210526315789e-06,
+      "loss": 0.032,
+      "step": 5725
+    },
+    {
+      "epoch": 3.0638,
+      "grad_norm": 2.112278461456299,
+      "learning_rate": 1.9576973684210526e-06,
+      "loss": 0.0304,
+      "step": 5750
+    },
+    {
+      "epoch": 3.0663,
+      "grad_norm": 1.7582517862319946,
+      "learning_rate": 1.9461842105263156e-06,
+      "loss": 0.0254,
+      "step": 5775
+    },
+    {
+      "epoch": 3.0688,
+      "grad_norm": 1.3391777276992798,
+      "learning_rate": 1.934671052631579e-06,
+      "loss": 0.0316,
+      "step": 5800
+    },
+    {
+      "epoch": 3.0713,
+      "grad_norm": 0.8350562453269958,
+      "learning_rate": 1.923157894736842e-06,
+      "loss": 0.0329,
+      "step": 5825
+    },
+    {
+      "epoch": 3.0738,
+      "grad_norm": 0.7084619402885437,
+      "learning_rate": 1.911644736842105e-06,
+      "loss": 0.0325,
+      "step": 5850
+    },
+    {
+      "epoch": 3.0763,
+      "grad_norm": 1.2961277961730957,
+      "learning_rate": 1.9001315789473683e-06,
+      "loss": 0.0313,
+      "step": 5875
+    },
+    {
+      "epoch": 3.0788,
+      "grad_norm": 1.032840371131897,
+      "learning_rate": 1.8886184210526315e-06,
+      "loss": 0.0224,
+      "step": 5900
+    },
+    {
+      "epoch": 3.0813,
+      "grad_norm": 1.2073044776916504,
+      "learning_rate": 1.8771052631578945e-06,
+      "loss": 0.0215,
+      "step": 5925
+    },
+    {
+      "epoch": 3.0838,
+      "grad_norm": 0.8210967779159546,
+      "learning_rate": 1.8655921052631577e-06,
+      "loss": 0.0258,
+      "step": 5950
+    },
+    {
+      "epoch": 3.0863,
+      "grad_norm": 1.5273653268814087,
+      "learning_rate": 1.854078947368421e-06,
+      "loss": 0.0254,
+      "step": 5975
+    },
+    {
+      "epoch": 3.0888,
+      "grad_norm": 3.194197177886963,
+      "learning_rate": 1.8425657894736842e-06,
+      "loss": 0.025,
+      "step": 6000
+    },
+    {
+      "epoch": 3.0888,
+      "eval_loss": 0.14247554540634155,
+      "eval_runtime": 4123.5746,
+      "eval_samples_per_second": 3.305,
+      "eval_steps_per_second": 0.413,
+      "eval_wer": 7.940928579281029,
+      "step": 6000
+    },
+    {
+      "epoch": 3.0913,
+      "grad_norm": 2.1373400688171387,
+      "learning_rate": 1.8310526315789472e-06,
+      "loss": 0.031,
+      "step": 6025
+    },
+    {
+      "epoch": 3.0938,
+      "grad_norm": 1.0779415369033813,
+      "learning_rate": 1.8195394736842104e-06,
+      "loss": 0.024,
+      "step": 6050
+    },
+    {
+      "epoch": 3.0963,
+      "grad_norm": 0.9637121558189392,
+      "learning_rate": 1.8080263157894734e-06,
+      "loss": 0.0282,
+      "step": 6075
+    },
+    {
+      "epoch": 3.0987999999999998,
+      "grad_norm": 1.1645703315734863,
+      "learning_rate": 1.7965131578947366e-06,
+      "loss": 0.0278,
+      "step": 6100
+    },
+    {
+      "epoch": 3.1013,
+      "grad_norm": 1.2814173698425293,
+      "learning_rate": 1.7849999999999996e-06,
+      "loss": 0.0199,
+      "step": 6125
+    },
+    {
+      "epoch": 3.1038,
+      "grad_norm": 1.458809494972229,
+      "learning_rate": 1.773486842105263e-06,
+      "loss": 0.0264,
+      "step": 6150
+    },
+    {
+      "epoch": 3.1063,
+      "grad_norm": 1.6669671535491943,
+      "learning_rate": 1.7619736842105263e-06,
+      "loss": 0.0272,
+      "step": 6175
+    },
+    {
+      "epoch": 3.1088,
+      "grad_norm": 1.5049173831939697,
+      "learning_rate": 1.7504605263157893e-06,
+      "loss": 0.0243,
+      "step": 6200
+    },
+    {
+      "epoch": 3.1113,
+      "grad_norm": 0.861107587814331,
+      "learning_rate": 1.7389473684210525e-06,
+      "loss": 0.0274,
+      "step": 6225
+    },
+    {
+      "epoch": 3.1138,
+      "grad_norm": 1.0454998016357422,
+      "learning_rate": 1.7274342105263155e-06,
+      "loss": 0.0258,
+      "step": 6250
+    },
+    {
+      "epoch": 3.1163,
+      "grad_norm": 1.7108014822006226,
+      "learning_rate": 1.7159210526315788e-06,
+      "loss": 0.0259,
+      "step": 6275
+    },
+    {
+      "epoch": 3.1188,
+      "grad_norm": 0.8804712295532227,
+      "learning_rate": 1.704407894736842e-06,
+      "loss": 0.0255,
+      "step": 6300
+    },
+    {
+      "epoch": 3.1213,
+      "grad_norm": 2.0050883293151855,
+      "learning_rate": 1.6928947368421052e-06,
+      "loss": 0.0304,
+      "step": 6325
+    },
+    {
+      "epoch": 3.1238,
+      "grad_norm": 1.4400875568389893,
+      "learning_rate": 1.6813815789473682e-06,
+      "loss": 0.0333,
+      "step": 6350
+    },
+    {
+      "epoch": 3.1263,
+      "grad_norm": 1.4423948526382446,
+      "learning_rate": 1.6698684210526315e-06,
+      "loss": 0.0279,
+      "step": 6375
+    },
+    {
+      "epoch": 3.1288,
+      "grad_norm": 1.3972327709197998,
+      "learning_rate": 1.6583552631578947e-06,
+      "loss": 0.0255,
+      "step": 6400
+    },
+    {
+      "epoch": 3.1313,
+      "grad_norm": 1.6908966302871704,
+      "learning_rate": 1.6468421052631577e-06,
+      "loss": 0.0267,
+      "step": 6425
+    },
+    {
+      "epoch": 3.1338,
+      "grad_norm": 0.9540082216262817,
+      "learning_rate": 1.635328947368421e-06,
+      "loss": 0.0265,
+      "step": 6450
+    },
+    {
+      "epoch": 3.1363,
+      "grad_norm": 1.41488778591156,
+      "learning_rate": 1.6238157894736841e-06,
+      "loss": 0.0224,
+      "step": 6475
+    },
+    {
+      "epoch": 3.1388,
+      "grad_norm": 0.4790860116481781,
+      "learning_rate": 1.6123026315789474e-06,
+      "loss": 0.0274,
+      "step": 6500
+    },
+    {
+      "epoch": 3.1388,
+      "eval_loss": 0.13914132118225098,
+      "eval_runtime": 4133.8202,
+      "eval_samples_per_second": 3.297,
+      "eval_steps_per_second": 0.412,
+      "eval_wer": 7.731137088204039,
+      "step": 6500
+    },
+    {
+      "epoch": 3.1413,
+      "grad_norm": 2.5638585090637207,
+      "learning_rate": 1.6007894736842104e-06,
+      "loss": 0.025,
+      "step": 6525
+    },
+    {
+      "epoch": 3.1438,
+      "grad_norm": 1.8847306966781616,
+      "learning_rate": 1.5892763157894736e-06,
+      "loss": 0.0294,
+      "step": 6550
+    },
+    {
+      "epoch": 3.1463,
+      "grad_norm": 1.0196236371994019,
+      "learning_rate": 1.5777631578947366e-06,
+      "loss": 0.0255,
+      "step": 6575
+    },
+    {
+      "epoch": 3.1488,
+      "grad_norm": 1.0703202486038208,
+      "learning_rate": 1.5662499999999998e-06,
+      "loss": 0.0246,
+      "step": 6600
+    },
+    {
+      "epoch": 3.1513,
+      "grad_norm": 2.646519422531128,
+      "learning_rate": 1.5547368421052628e-06,
+      "loss": 0.0213,
+      "step": 6625
+    },
+    {
+      "epoch": 3.1538,
+      "grad_norm": 1.7430530786514282,
+      "learning_rate": 1.5432236842105263e-06,
+      "loss": 0.0267,
+      "step": 6650
+    },
+    {
+      "epoch": 3.1563,
+      "grad_norm": 1.0606240034103394,
+      "learning_rate": 1.5317105263157895e-06,
+      "loss": 0.0269,
+      "step": 6675
+    },
+    {
+      "epoch": 3.1588,
+      "grad_norm": 1.4670476913452148,
+      "learning_rate": 1.5201973684210525e-06,
+      "loss": 0.0271,
+      "step": 6700
+    },
+    {
+      "epoch": 3.1612999999999998,
+      "grad_norm": 2.345014810562134,
+      "learning_rate": 1.5086842105263157e-06,
+      "loss": 0.0252,
+      "step": 6725
+    },
+    {
+      "epoch": 3.1638,
+      "grad_norm": 2.9098987579345703,
+      "learning_rate": 1.4971710526315787e-06,
+      "loss": 0.0272,
+      "step": 6750
+    },
+    {
+      "epoch": 3.1663,
+      "grad_norm": 0.5682694911956787,
+      "learning_rate": 1.485657894736842e-06,
+      "loss": 0.0237,
+      "step": 6775
+    },
+    {
+      "epoch": 3.1688,
+      "grad_norm": 1.4645904302597046,
+      "learning_rate": 1.4746052631578947e-06,
+      "loss": 0.0303,
+      "step": 6800
+    },
+    {
+      "epoch": 4.0009,
+      "grad_norm": 1.3764489889144897,
+      "learning_rate": 1.4630921052631578e-06,
+      "loss": 0.0242,
+      "step": 6825
+    },
+    {
+      "epoch": 4.0034,
+      "grad_norm": 0.8848748803138733,
+      "learning_rate": 1.451578947368421e-06,
+      "loss": 0.0163,
+      "step": 6850
+    },
+    {
+      "epoch": 4.0059,
+      "grad_norm": 0.619125485420227,
+      "learning_rate": 1.440065789473684e-06,
+      "loss": 0.0188,
+      "step": 6875
+    },
+    {
+      "epoch": 4.0084,
+      "grad_norm": 0.9328649044036865,
+      "learning_rate": 1.4285526315789472e-06,
+      "loss": 0.0173,
+      "step": 6900
+    },
+    {
+      "epoch": 4.0109,
+      "grad_norm": 1.77474045753479,
+      "learning_rate": 1.4170394736842104e-06,
+      "loss": 0.0146,
+      "step": 6925
+    },
+    {
+      "epoch": 4.0134,
+      "grad_norm": 1.3934537172317505,
+      "learning_rate": 1.4055263157894737e-06,
+      "loss": 0.0156,
+      "step": 6950
+    },
+    {
+      "epoch": 4.0159,
+      "grad_norm": 1.2856354713439941,
+      "learning_rate": 1.3940131578947367e-06,
+      "loss": 0.0173,
+      "step": 6975
+    },
+    {
+      "epoch": 4.0184,
+      "grad_norm": 2.1229758262634277,
+      "learning_rate": 1.3824999999999999e-06,
+      "loss": 0.0155,
+      "step": 7000
+    },
+    {
+      "epoch": 4.0184,
+      "eval_loss": 0.14916160702705383,
+      "eval_runtime": 4128.7355,
+      "eval_samples_per_second": 3.301,
+      "eval_steps_per_second": 0.413,
+      "eval_wer": 7.697240646414307,
+      "step": 7000
+    },
+    {
+      "epoch": 4.0209,
+      "grad_norm": 0.44512999057769775,
+      "learning_rate": 1.3709868421052631e-06,
+      "loss": 0.0153,
+      "step": 7025
+    },
+    {
+      "epoch": 4.0234,
+      "grad_norm": 1.8791674375534058,
+      "learning_rate": 1.3594736842105261e-06,
+      "loss": 0.0165,
+      "step": 7050
+    },
+    {
+      "epoch": 4.0259,
+      "grad_norm": 5.244405746459961,
+      "learning_rate": 1.3479605263157894e-06,
+      "loss": 0.0179,
+      "step": 7075
+    },
+    {
+      "epoch": 4.0284,
+      "grad_norm": 1.1926153898239136,
+      "learning_rate": 1.3364473684210526e-06,
+      "loss": 0.0161,
+      "step": 7100
+    },
+    {
+      "epoch": 4.0309,
+      "grad_norm": 1.1147819757461548,
+      "learning_rate": 1.3249342105263158e-06,
+      "loss": 0.015,
+      "step": 7125
+    },
+    {
+      "epoch": 4.0334,
+      "grad_norm": 1.9370721578598022,
+      "learning_rate": 1.3134210526315788e-06,
+      "loss": 0.0142,
+      "step": 7150
+    },
+    {
+      "epoch": 4.0359,
+      "grad_norm": 0.49344903230667114,
+      "learning_rate": 1.301907894736842e-06,
+      "loss": 0.0134,
+      "step": 7175
+    },
+    {
+      "epoch": 4.0384,
+      "grad_norm": 1.8190902471542358,
+      "learning_rate": 1.290394736842105e-06,
+      "loss": 0.0168,
+      "step": 7200
+    },
+    {
+      "epoch": 4.0409,
+      "grad_norm": 0.7560425400733948,
+      "learning_rate": 1.2788815789473683e-06,
+      "loss": 0.0143,
+      "step": 7225
+    },
+    {
+      "epoch": 4.0434,
+      "grad_norm": 1.0451087951660156,
+      "learning_rate": 1.2673684210526313e-06,
+      "loss": 0.0149,
+      "step": 7250
+    },
+    {
+      "epoch": 4.0459,
+      "grad_norm": 1.0334726572036743,
+      "learning_rate": 1.2558552631578947e-06,
+      "loss": 0.0136,
+      "step": 7275
+    },
+    {
+      "epoch": 4.0484,
+      "grad_norm": 0.6531663537025452,
+      "learning_rate": 1.244342105263158e-06,
+      "loss": 0.0137,
+      "step": 7300
+    },
+    {
+      "epoch": 4.0509,
+      "grad_norm": 0.8954887986183167,
+      "learning_rate": 1.232828947368421e-06,
+      "loss": 0.0118,
+      "step": 7325
+    },
+    {
+      "epoch": 4.0534,
+      "grad_norm": 1.0640511512756348,
+      "learning_rate": 1.2213157894736842e-06,
+      "loss": 0.0126,
+      "step": 7350
+    },
+    {
+      "epoch": 4.0559,
+      "grad_norm": 0.2824617922306061,
+      "learning_rate": 1.2098026315789472e-06,
+      "loss": 0.0139,
+      "step": 7375
+    },
+    {
+      "epoch": 4.0584,
+      "grad_norm": 1.0095443725585938,
+      "learning_rate": 1.1982894736842104e-06,
+      "loss": 0.018,
+      "step": 7400
+    },
+    {
+      "epoch": 4.0609,
+      "grad_norm": 1.1475225687026978,
+      "learning_rate": 1.1867763157894734e-06,
+      "loss": 0.0133,
+      "step": 7425
+    },
+    {
+      "epoch": 4.0634,
+      "grad_norm": 1.5951991081237793,
+      "learning_rate": 1.1752631578947369e-06,
+      "loss": 0.013,
+      "step": 7450
+    },
+    {
+      "epoch": 4.0659,
+      "grad_norm": 0.3482917249202728,
+      "learning_rate": 1.1637499999999999e-06,
+      "loss": 0.0154,
+      "step": 7475
+    },
+    {
+      "epoch": 4.0684,
+      "grad_norm": 1.1572391986846924,
+      "learning_rate": 1.152236842105263e-06,
+      "loss": 0.0189,
+      "step": 7500
+    },
+    {
+      "epoch": 4.0684,
+      "eval_loss": 0.15172211825847626,
+      "eval_runtime": 4117.5679,
+      "eval_samples_per_second": 3.31,
+      "eval_steps_per_second": 0.414,
+      "eval_wer": 7.656931364285977,
+      "step": 7500
+    },
+    {
+      "epoch": 4.0709,
+      "grad_norm": 1.3942557573318481,
+      "learning_rate": 1.140723684210526e-06,
+      "loss": 0.0143,
+      "step": 7525
+    },
+    {
+      "epoch": 4.0734,
+      "grad_norm": 0.8097572326660156,
+      "learning_rate": 1.1292105263157893e-06,
+      "loss": 0.0127,
+      "step": 7550
+    },
+    {
+      "epoch": 4.0759,
+      "grad_norm": 0.740375816822052,
+      "learning_rate": 1.1176973684210526e-06,
+      "loss": 0.0124,
+      "step": 7575
+    },
+    {
+      "epoch": 4.0784,
+      "grad_norm": 0.8702480792999268,
+      "learning_rate": 1.1061842105263156e-06,
+      "loss": 0.0137,
+      "step": 7600
+    },
+    {
+      "epoch": 4.0809,
+      "grad_norm": 1.223105788230896,
+      "learning_rate": 1.094671052631579e-06,
+      "loss": 0.0137,
+      "step": 7625
+    },
+    {
+      "epoch": 4.0834,
+      "grad_norm": 0.43614983558654785,
+      "learning_rate": 1.083157894736842e-06,
+      "loss": 0.0109,
+      "step": 7650
+    },
+    {
+      "epoch": 4.0859,
+      "grad_norm": 1.0974986553192139,
+      "learning_rate": 1.0716447368421052e-06,
+      "loss": 0.0118,
+      "step": 7675
+    },
+    {
+      "epoch": 4.0884,
+      "grad_norm": 0.7234652042388916,
+      "learning_rate": 1.0601315789473682e-06,
+      "loss": 0.0125,
+      "step": 7700
+    },
+    {
+      "epoch": 4.0909,
+      "grad_norm": 0.7752431035041809,
+      "learning_rate": 1.0486184210526315e-06,
+      "loss": 0.0135,
+      "step": 7725
+    },
+    {
+      "epoch": 4.0934,
+      "grad_norm": 0.8796952366828918,
+      "learning_rate": 1.0371052631578947e-06,
+      "loss": 0.0158,
+      "step": 7750
+    },
+    {
+      "epoch": 4.0959,
+      "grad_norm": 3.9135661125183105,
+      "learning_rate": 1.0255921052631577e-06,
+      "loss": 0.0139,
+      "step": 7775
+    },
+    {
+      "epoch": 4.0984,
+      "grad_norm": 0.4837290942668915,
+      "learning_rate": 1.014078947368421e-06,
+      "loss": 0.0103,
+      "step": 7800
+    },
+    {
+      "epoch": 4.1009,
+      "grad_norm": 1.1155998706817627,
+      "learning_rate": 1.0025657894736842e-06,
+      "loss": 0.0106,
+      "step": 7825
+    },
+    {
+      "epoch": 4.1034,
+      "grad_norm": 2.628676652908325,
+      "learning_rate": 9.910526315789474e-07,
+      "loss": 0.0089,
+      "step": 7850
+    },
+    {
+      "epoch": 4.1059,
+      "grad_norm": 1.716665506362915,
+      "learning_rate": 9.795394736842104e-07,
+      "loss": 0.0132,
+      "step": 7875
+    },
+    {
+      "epoch": 4.1084,
+      "grad_norm": 1.6751716136932373,
+      "learning_rate": 9.680263157894736e-07,
+      "loss": 0.0137,
+      "step": 7900
+    },
+    {
+      "epoch": 4.1109,
+      "grad_norm": 0.9773244261741638,
+      "learning_rate": 9.565131578947368e-07,
+      "loss": 0.0111,
+      "step": 7925
+    },
+    {
+      "epoch": 4.1134,
+      "grad_norm": 1.44219172000885,
+      "learning_rate": 9.45e-07,
+      "loss": 0.0139,
+      "step": 7950
+    },
+    {
+      "epoch": 4.1159,
+      "grad_norm": 0.8723123073577881,
+      "learning_rate": 9.334868421052631e-07,
+      "loss": 0.0117,
+      "step": 7975
+    },
+    {
+      "epoch": 4.1184,
+      "grad_norm": 0.6484673023223877,
+      "learning_rate": 9.219736842105263e-07,
+      "loss": 0.0139,
+      "step": 8000
+    },
+    {
+      "epoch": 4.1184,
+      "eval_loss": 0.15393850207328796,
+      "eval_runtime": 4128.9341,
+      "eval_samples_per_second": 3.301,
+      "eval_steps_per_second": 0.413,
+      "eval_wer": 7.626699402689728,
+      "step": 8000
+    },
+    {
+      "epoch": 4.1209,
+      "grad_norm": 1.3702197074890137,
+      "learning_rate": 9.104605263157894e-07,
+      "loss": 0.0158,
+      "step": 8025
+    },
+    {
+      "epoch": 4.1234,
+      "grad_norm": 1.425645351409912,
+      "learning_rate": 8.989473684210525e-07,
+      "loss": 0.0117,
+      "step": 8050
+    },
+    {
+      "epoch": 4.1259,
+      "grad_norm": 1.4255399703979492,
+      "learning_rate": 8.874342105263158e-07,
+      "loss": 0.015,
+      "step": 8075
+    },
+    {
+      "epoch": 4.1284,
+      "grad_norm": 0.6988621950149536,
+      "learning_rate": 8.759210526315789e-07,
+      "loss": 0.0141,
+      "step": 8100
+    },
+    {
+      "epoch": 4.1309,
+      "grad_norm": 1.1563546657562256,
+      "learning_rate": 8.64407894736842e-07,
+      "loss": 0.0122,
+      "step": 8125
+    },
+    {
+      "epoch": 4.1334,
+      "grad_norm": 1.2023714780807495,
+      "learning_rate": 8.528947368421051e-07,
+      "loss": 0.013,
+      "step": 8150
+    },
+    {
+      "epoch": 4.1359,
+      "grad_norm": 0.9450110197067261,
+      "learning_rate": 8.413815789473683e-07,
+      "loss": 0.0123,
+      "step": 8175
+    },
+    {
+      "epoch": 4.1384,
+      "grad_norm": 0.9265995621681213,
+      "learning_rate": 8.298684210526316e-07,
+      "loss": 0.0114,
+      "step": 8200
+    },
+    {
+      "epoch": 4.1409,
+      "grad_norm": 0.4234980046749115,
+      "learning_rate": 8.183552631578947e-07,
+      "loss": 0.0085,
+      "step": 8225
+    },
+    {
+      "epoch": 4.1434,
+      "grad_norm": 1.3323073387145996,
+      "learning_rate": 8.068421052631579e-07,
+      "loss": 0.014,
+      "step": 8250
+    },
+    {
+      "epoch": 4.1459,
+      "grad_norm": 1.2050007581710815,
+      "learning_rate": 7.95328947368421e-07,
+      "loss": 0.0106,
+      "step": 8275
+    },
+    {
+      "epoch": 4.1484,
+      "grad_norm": 1.261042594909668,
+      "learning_rate": 7.838157894736841e-07,
+      "loss": 0.0107,
+      "step": 8300
+    },
+    {
+      "epoch": 4.1509,
+      "grad_norm": 1.2892303466796875,
+      "learning_rate": 7.723026315789474e-07,
+      "loss": 0.0145,
+      "step": 8325
+    },
+    {
+      "epoch": 4.1534,
+      "grad_norm": 1.1626112461090088,
+      "learning_rate": 7.607894736842105e-07,
+      "loss": 0.0139,
+      "step": 8350
+    },
+    {
+      "epoch": 4.1559,
+      "grad_norm": 1.0547322034835815,
+      "learning_rate": 7.492763157894736e-07,
+      "loss": 0.0154,
+      "step": 8375
+    },
+    {
+      "epoch": 4.1584,
+      "grad_norm": 0.44805532693862915,
+      "learning_rate": 7.377631578947367e-07,
+      "loss": 0.0109,
+      "step": 8400
+    },
+    {
+      "epoch": 4.1609,
+      "grad_norm": 0.7095866203308105,
+      "learning_rate": 7.262499999999999e-07,
+      "loss": 0.0114,
+      "step": 8425
+    },
+    {
+      "epoch": 4.1634,
+      "grad_norm": 1.4220194816589355,
+      "learning_rate": 7.14736842105263e-07,
+      "loss": 0.0134,
+      "step": 8450
+    },
+    {
+      "epoch": 4.1659,
+      "grad_norm": 1.0814168453216553,
+      "learning_rate": 7.032236842105263e-07,
+      "loss": 0.0142,
+      "step": 8475
+    },
+    {
+      "epoch": 4.1684,
+      "grad_norm": 0.7026916146278381,
+      "learning_rate": 6.917105263157895e-07,
+      "loss": 0.0141,
+      "step": 8500
+    },
+    {
+      "epoch": 4.1684,
+      "eval_loss": 0.15496784448623657,
+      "eval_runtime": 4124.1829,
+      "eval_samples_per_second": 3.305,
+      "eval_steps_per_second": 0.413,
+      "eval_wer": 7.542416358239584,
+      "step": 8500
+    },
+    {
+      "epoch": 5.0005,
+      "grad_norm": 4.648550033569336,
+      "learning_rate": 6.801973684210526e-07,
+      "loss": 0.0285,
+      "step": 8525
+    },
+    {
+      "epoch": 5.003,
+      "grad_norm": 1.9204503297805786,
+      "learning_rate": 6.691447368421053e-07,
+      "loss": 0.0761,
+      "step": 8550
+    },
+    {
+      "epoch": 5.0055,
+      "grad_norm": 1.7285746335983276,
+      "learning_rate": 6.576315789473684e-07,
+      "loss": 0.0602,
+      "step": 8575
+    },
+    {
+      "epoch": 5.008,
+      "grad_norm": 1.1516830921173096,
+      "learning_rate": 6.461184210526315e-07,
+      "loss": 0.0585,
+      "step": 8600
+    },
+    {
+      "epoch": 5.0105,
+      "grad_norm": 3.3867828845977783,
+      "learning_rate": 6.346052631578947e-07,
+      "loss": 0.0656,
+      "step": 8625
+    },
+    {
+      "epoch": 5.013,
+      "grad_norm": 4.064920902252197,
+      "learning_rate": 6.230921052631579e-07,
+      "loss": 0.0683,
+      "step": 8650
+    },
+    {
+      "epoch": 5.0155,
+      "grad_norm": 3.695047378540039,
+      "learning_rate": 6.11578947368421e-07,
+      "loss": 0.0659,
+      "step": 8675
+    },
+    {
+      "epoch": 5.018,
+      "grad_norm": 2.9087939262390137,
+      "learning_rate": 6.000657894736842e-07,
+      "loss": 0.0611,
+      "step": 8700
+    },
+    {
+      "epoch": 5.0205,
+      "grad_norm": 3.368290424346924,
+      "learning_rate": 5.885526315789473e-07,
+      "loss": 0.0603,
+      "step": 8725
+    },
+    {
+      "epoch": 5.023,
+      "grad_norm": 3.7565319538116455,
+      "learning_rate": 5.770394736842104e-07,
+      "loss": 0.0614,
+      "step": 8750
+    },
+    {
+      "epoch": 5.0255,
+      "grad_norm": 2.4887771606445312,
+      "learning_rate": 5.655263157894735e-07,
+      "loss": 0.0497,
+      "step": 8775
+    },
+    {
+      "epoch": 5.028,
+      "grad_norm": 2.1670076847076416,
+      "learning_rate": 5.540131578947369e-07,
+      "loss": 0.0662,
+      "step": 8800
+    },
+    {
+      "epoch": 5.0305,
+      "grad_norm": 1.3746148347854614,
+      "learning_rate": 5.425e-07,
+      "loss": 0.0507,
+      "step": 8825
+    },
+    {
+      "epoch": 5.033,
+      "grad_norm": 1.8274154663085938,
+      "learning_rate": 5.309868421052631e-07,
+      "loss": 0.0449,
+      "step": 8850
+    },
+    {
+      "epoch": 5.0355,
+      "grad_norm": 2.9424078464508057,
+      "learning_rate": 5.194736842105262e-07,
+      "loss": 0.0529,
+      "step": 8875
+    },
+    {
+      "epoch": 5.038,
+      "grad_norm": 2.457754611968994,
+      "learning_rate": 5.079605263157895e-07,
+      "loss": 0.042,
+      "step": 8900
+    },
+    {
+      "epoch": 5.0405,
+      "grad_norm": 2.208768606185913,
+      "learning_rate": 4.964473684210526e-07,
+      "loss": 0.0407,
+      "step": 8925
+    },
+    {
+      "epoch": 5.043,
+      "grad_norm": 1.9554438591003418,
+      "learning_rate": 4.849342105263158e-07,
+      "loss": 0.0465,
+      "step": 8950
+    },
+    {
+      "epoch": 5.0455,
+      "grad_norm": 1.1464567184448242,
+      "learning_rate": 4.734210526315789e-07,
+      "loss": 0.0537,
+      "step": 8975
+    },
+    {
+      "epoch": 5.048,
+      "grad_norm": 3.1216509342193604,
+      "learning_rate": 4.6190789473684203e-07,
+      "loss": 0.0368,
+      "step": 9000
+    },
+    {
+      "epoch": 5.048,
+      "eval_loss": 0.12588092684745789,
+      "eval_runtime": 4149.257,
+      "eval_samples_per_second": 3.285,
+      "eval_steps_per_second": 0.411,
+      "eval_wer": 7.215361500971087,
+      "step": 9000
+    },
+    {
+      "epoch": 5.048,
+      "step": 9000,
+      "total_flos": 4.891718061785088e+20,
+      "train_loss": 0.0,
+      "train_runtime": 289.8068,
+      "train_samples_per_second": 552.092,
+      "train_steps_per_second": 34.506
+    }
+  ],
+  "logging_steps": 25,
+  "max_steps": 10000,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 9223372036854775807,
+  "save_steps": 1000,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 4.891718061785088e+20,
+  "train_batch_size": 16,
+  "trial_name": null,
+  "trial_params": null
+}

wandb/run-20241007_131849-0rbzerob/files/output.log CHANGED Viewed

The diff for this file is too large to render. See raw diff

wandb/run-20241007_131849-0rbzerob/run-0rbzerob.wandb CHANGED Viewed

Binary files a/wandb/run-20241007_131849-0rbzerob/run-0rbzerob.wandb and b/wandb/run-20241007_131849-0rbzerob/run-0rbzerob.wandb differ