|
{ |
|
"best_metric": 37.65067359962184, |
|
"best_model_checkpoint": "./whisper-tiny-fr-micro-train/checkpoint-8222", |
|
"epoch": 0.08564583333333334, |
|
"eval_steps": 4111, |
|
"global_step": 8222, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 19.87736701965332, |
|
"learning_rate": 5e-09, |
|
"loss": 0.9486, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 18.398113250732422, |
|
"learning_rate": 1e-08, |
|
"loss": 0.9083, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 16.93355941772461, |
|
"learning_rate": 1.5e-08, |
|
"loss": 0.8204, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 11.64875602722168, |
|
"learning_rate": 2e-08, |
|
"loss": 0.7006, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 8.734498977661133, |
|
"learning_rate": 2.5e-08, |
|
"loss": 0.648, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 7.067263603210449, |
|
"learning_rate": 3e-08, |
|
"loss": 0.537, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 7.97986364364624, |
|
"learning_rate": 3.4999999999999996e-08, |
|
"loss": 0.5752, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 7.728494167327881, |
|
"learning_rate": 4e-08, |
|
"loss": 0.5863, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 9.38297176361084, |
|
"learning_rate": 4.5e-08, |
|
"loss": 0.5172, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 7.568984508514404, |
|
"learning_rate": 5e-08, |
|
"loss": 0.4682, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 8.042702674865723, |
|
"learning_rate": 5.5e-08, |
|
"loss": 0.4214, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 7.661986827850342, |
|
"learning_rate": 6e-08, |
|
"loss": 0.4431, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 9.154074668884277, |
|
"learning_rate": 6.5e-08, |
|
"loss": 0.4056, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 8.346107482910156, |
|
"learning_rate": 6.999999999999999e-08, |
|
"loss": 0.4079, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 6.246629238128662, |
|
"learning_rate": 7.5e-08, |
|
"loss": 0.3897, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 7.129103183746338, |
|
"learning_rate": 8e-08, |
|
"loss": 0.3536, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 6.834921836853027, |
|
"learning_rate": 8.5e-08, |
|
"loss": 0.3401, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 8.863313674926758, |
|
"learning_rate": 9e-08, |
|
"loss": 0.3627, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 7.284473896026611, |
|
"learning_rate": 9.499999999999999e-08, |
|
"loss": 0.356, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 5.816940784454346, |
|
"learning_rate": 1e-07, |
|
"loss": 0.3539, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 8.022565841674805, |
|
"learning_rate": 9.997382198952879e-08, |
|
"loss": 0.3577, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 7.51448917388916, |
|
"learning_rate": 9.994764397905758e-08, |
|
"loss": 0.3513, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 7.016752243041992, |
|
"learning_rate": 9.992146596858639e-08, |
|
"loss": 0.3687, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 6.761058330535889, |
|
"learning_rate": 9.989528795811518e-08, |
|
"loss": 0.3495, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 6.693453311920166, |
|
"learning_rate": 9.986910994764397e-08, |
|
"loss": 0.3325, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 6.049990653991699, |
|
"learning_rate": 9.984293193717277e-08, |
|
"loss": 0.3609, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 6.6787109375, |
|
"learning_rate": 9.981675392670157e-08, |
|
"loss": 0.3317, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 6.8440937995910645, |
|
"learning_rate": 9.979057591623035e-08, |
|
"loss": 0.332, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 6.892059326171875, |
|
"learning_rate": 9.976439790575916e-08, |
|
"loss": 0.3379, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 8.142931938171387, |
|
"learning_rate": 9.973821989528795e-08, |
|
"loss": 0.3308, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 7.152769565582275, |
|
"learning_rate": 9.971204188481675e-08, |
|
"loss": 0.3352, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 6.680529594421387, |
|
"learning_rate": 9.968586387434554e-08, |
|
"loss": 0.3343, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 6.2912445068359375, |
|
"learning_rate": 9.965968586387435e-08, |
|
"loss": 0.3233, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 6.337522983551025, |
|
"learning_rate": 9.963350785340313e-08, |
|
"loss": 0.3147, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 7.505101203918457, |
|
"learning_rate": 9.960732984293193e-08, |
|
"loss": 0.3384, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 6.080435276031494, |
|
"learning_rate": 9.958115183246073e-08, |
|
"loss": 0.3397, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 7.185611248016357, |
|
"learning_rate": 9.955497382198953e-08, |
|
"loss": 0.3448, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 6.759621620178223, |
|
"learning_rate": 9.952879581151831e-08, |
|
"loss": 0.3193, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 5.393013000488281, |
|
"learning_rate": 9.950261780104712e-08, |
|
"loss": 0.3134, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 6.275601387023926, |
|
"learning_rate": 9.947643979057591e-08, |
|
"loss": 0.3227, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 8.202522277832031, |
|
"learning_rate": 9.94502617801047e-08, |
|
"loss": 0.3366, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 6.580430507659912, |
|
"learning_rate": 9.94240837696335e-08, |
|
"loss": 0.4026, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 8.456107139587402, |
|
"learning_rate": 9.93979057591623e-08, |
|
"loss": 0.3868, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 8.176070213317871, |
|
"learning_rate": 9.937172774869109e-08, |
|
"loss": 0.4297, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 9.07604694366455, |
|
"learning_rate": 9.934554973821989e-08, |
|
"loss": 0.4861, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 8.36545181274414, |
|
"learning_rate": 9.931937172774869e-08, |
|
"loss": 0.4939, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 9.944046974182129, |
|
"learning_rate": 9.929319371727748e-08, |
|
"loss": 0.5198, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 12.03496265411377, |
|
"learning_rate": 9.926701570680629e-08, |
|
"loss": 0.5353, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 14.10308837890625, |
|
"learning_rate": 9.924083769633508e-08, |
|
"loss": 0.5005, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 12.214973449707031, |
|
"learning_rate": 9.921465968586387e-08, |
|
"loss": 0.5879, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 11.323634147644043, |
|
"learning_rate": 9.918848167539266e-08, |
|
"loss": 0.5031, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 10.742391586303711, |
|
"learning_rate": 9.916230366492147e-08, |
|
"loss": 0.5495, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 14.457928657531738, |
|
"learning_rate": 9.913612565445025e-08, |
|
"loss": 0.5263, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 11.978686332702637, |
|
"learning_rate": 9.910994764397906e-08, |
|
"loss": 0.5477, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 11.699676513671875, |
|
"learning_rate": 9.908376963350785e-08, |
|
"loss": 0.5293, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 11.737068176269531, |
|
"learning_rate": 9.905759162303664e-08, |
|
"loss": 0.5622, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 10.408597946166992, |
|
"learning_rate": 9.903141361256544e-08, |
|
"loss": 0.5639, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 11.709553718566895, |
|
"learning_rate": 9.900523560209424e-08, |
|
"loss": 0.5421, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 10.107832908630371, |
|
"learning_rate": 9.897905759162302e-08, |
|
"loss": 0.5291, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 11.955233573913574, |
|
"learning_rate": 9.895287958115183e-08, |
|
"loss": 0.5508, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 12.00100040435791, |
|
"learning_rate": 9.892670157068062e-08, |
|
"loss": 0.5271, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 11.11552619934082, |
|
"learning_rate": 9.890052356020942e-08, |
|
"loss": 0.5338, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 8.751993179321289, |
|
"learning_rate": 9.887434554973821e-08, |
|
"loss": 0.5178, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 10.523124694824219, |
|
"learning_rate": 9.884816753926702e-08, |
|
"loss": 0.5574, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 14.987282752990723, |
|
"learning_rate": 9.88219895287958e-08, |
|
"loss": 0.5406, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 12.370256423950195, |
|
"learning_rate": 9.87958115183246e-08, |
|
"loss": 0.4805, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 9.747725486755371, |
|
"learning_rate": 9.87696335078534e-08, |
|
"loss": 0.5259, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 12.413991928100586, |
|
"learning_rate": 9.87434554973822e-08, |
|
"loss": 0.5225, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 11.440505981445312, |
|
"learning_rate": 9.871727748691098e-08, |
|
"loss": 0.5511, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 11.07944107055664, |
|
"learning_rate": 9.869109947643979e-08, |
|
"loss": 0.4913, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 12.481764793395996, |
|
"learning_rate": 9.866492146596858e-08, |
|
"loss": 0.5618, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 10.33045768737793, |
|
"learning_rate": 9.863874345549738e-08, |
|
"loss": 0.5099, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 11.342964172363281, |
|
"learning_rate": 9.861256544502617e-08, |
|
"loss": 0.5454, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 10.811851501464844, |
|
"learning_rate": 9.858638743455498e-08, |
|
"loss": 0.5118, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 12.243831634521484, |
|
"learning_rate": 9.856020942408377e-08, |
|
"loss": 0.5191, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 10.51577377319336, |
|
"learning_rate": 9.853403141361256e-08, |
|
"loss": 0.4916, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 9.325318336486816, |
|
"learning_rate": 9.850785340314135e-08, |
|
"loss": 0.5178, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 9.348186492919922, |
|
"learning_rate": 9.848167539267015e-08, |
|
"loss": 0.5066, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 11.930258750915527, |
|
"learning_rate": 9.845549738219895e-08, |
|
"loss": 0.4747, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 11.170626640319824, |
|
"learning_rate": 9.842931937172775e-08, |
|
"loss": 0.5285, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 10.741945266723633, |
|
"learning_rate": 9.840314136125654e-08, |
|
"loss": 0.5686, |
|
"step": 2025 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 10.902135848999023, |
|
"learning_rate": 9.837696335078533e-08, |
|
"loss": 0.5909, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 8.929906845092773, |
|
"learning_rate": 9.835078534031414e-08, |
|
"loss": 0.578, |
|
"step": 2075 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 11.585110664367676, |
|
"learning_rate": 9.832460732984292e-08, |
|
"loss": 0.8891, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 11.564123153686523, |
|
"learning_rate": 9.829842931937173e-08, |
|
"loss": 1.0214, |
|
"step": 2125 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 10.137656211853027, |
|
"learning_rate": 9.827225130890052e-08, |
|
"loss": 0.7967, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 9.988815307617188, |
|
"learning_rate": 9.824607329842931e-08, |
|
"loss": 0.7586, |
|
"step": 2175 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 8.484098434448242, |
|
"learning_rate": 9.82198952879581e-08, |
|
"loss": 0.6455, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 8.596495628356934, |
|
"learning_rate": 9.819371727748691e-08, |
|
"loss": 0.6966, |
|
"step": 2225 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 8.861817359924316, |
|
"learning_rate": 9.816753926701569e-08, |
|
"loss": 0.8732, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 9.609010696411133, |
|
"learning_rate": 9.81413612565445e-08, |
|
"loss": 0.803, |
|
"step": 2275 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 8.113046646118164, |
|
"learning_rate": 9.811518324607329e-08, |
|
"loss": 0.8018, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 7.831557750701904, |
|
"learning_rate": 9.808900523560209e-08, |
|
"loss": 0.7681, |
|
"step": 2325 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 9.451202392578125, |
|
"learning_rate": 9.806282722513088e-08, |
|
"loss": 0.6863, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 6.153475284576416, |
|
"learning_rate": 9.803664921465969e-08, |
|
"loss": 0.5344, |
|
"step": 2375 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 6.556187152862549, |
|
"learning_rate": 9.801047120418847e-08, |
|
"loss": 0.5072, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 6.695789337158203, |
|
"learning_rate": 9.798429319371727e-08, |
|
"loss": 0.4882, |
|
"step": 2425 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 5.124952793121338, |
|
"learning_rate": 9.795811518324607e-08, |
|
"loss": 0.4023, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 5.724789142608643, |
|
"learning_rate": 9.793193717277487e-08, |
|
"loss": 0.436, |
|
"step": 2475 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 6.060319423675537, |
|
"learning_rate": 9.790575916230365e-08, |
|
"loss": 0.3939, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 5.216397285461426, |
|
"learning_rate": 9.787958115183246e-08, |
|
"loss": 0.3305, |
|
"step": 2525 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 5.101900577545166, |
|
"learning_rate": 9.785340314136125e-08, |
|
"loss": 0.3212, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 4.3815484046936035, |
|
"learning_rate": 9.782722513089004e-08, |
|
"loss": 0.3153, |
|
"step": 2575 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 7.231525897979736, |
|
"learning_rate": 9.780104712041885e-08, |
|
"loss": 0.4177, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 5.76875638961792, |
|
"learning_rate": 9.777486910994764e-08, |
|
"loss": 0.5512, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 5.580086708068848, |
|
"learning_rate": 9.774869109947644e-08, |
|
"loss": 0.5036, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 6.0146894454956055, |
|
"learning_rate": 9.772251308900523e-08, |
|
"loss": 0.5229, |
|
"step": 2675 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 5.919321060180664, |
|
"learning_rate": 9.769633507853404e-08, |
|
"loss": 0.6031, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 7.249564170837402, |
|
"learning_rate": 9.767015706806282e-08, |
|
"loss": 0.6529, |
|
"step": 2725 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 5.037733554840088, |
|
"learning_rate": 9.764397905759162e-08, |
|
"loss": 0.5845, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 5.596995830535889, |
|
"learning_rate": 9.761780104712042e-08, |
|
"loss": 0.4844, |
|
"step": 2775 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 6.465144157409668, |
|
"learning_rate": 9.759162303664921e-08, |
|
"loss": 0.7091, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 8.079314231872559, |
|
"learning_rate": 9.7565445026178e-08, |
|
"loss": 1.0112, |
|
"step": 2825 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 6.964105606079102, |
|
"learning_rate": 9.753926701570681e-08, |
|
"loss": 0.8826, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 5.710263252258301, |
|
"learning_rate": 9.751308900523559e-08, |
|
"loss": 0.7581, |
|
"step": 2875 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 4.680161476135254, |
|
"learning_rate": 9.74869109947644e-08, |
|
"loss": 0.6771, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 6.1621198654174805, |
|
"learning_rate": 9.746073298429319e-08, |
|
"loss": 0.5596, |
|
"step": 2925 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 5.297918796539307, |
|
"learning_rate": 9.743455497382198e-08, |
|
"loss": 0.4256, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 5.257850646972656, |
|
"learning_rate": 9.740837696335078e-08, |
|
"loss": 0.3977, |
|
"step": 2975 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 5.193603992462158, |
|
"learning_rate": 9.738219895287958e-08, |
|
"loss": 0.3957, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 4.629751682281494, |
|
"learning_rate": 9.735602094240836e-08, |
|
"loss": 0.3431, |
|
"step": 3025 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 4.40083122253418, |
|
"learning_rate": 9.732984293193717e-08, |
|
"loss": 0.3242, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 4.38535737991333, |
|
"learning_rate": 9.730366492146596e-08, |
|
"loss": 0.3124, |
|
"step": 3075 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 4.32893180847168, |
|
"learning_rate": 9.727748691099476e-08, |
|
"loss": 0.2964, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 4.1819586753845215, |
|
"learning_rate": 9.725130890052355e-08, |
|
"loss": 0.2668, |
|
"step": 3125 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 4.960391521453857, |
|
"learning_rate": 9.722513089005235e-08, |
|
"loss": 0.2789, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 4.490744113922119, |
|
"learning_rate": 9.719895287958115e-08, |
|
"loss": 0.2566, |
|
"step": 3175 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 4.861118316650391, |
|
"learning_rate": 9.717277486910994e-08, |
|
"loss": 0.279, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 5.386078357696533, |
|
"learning_rate": 9.714659685863873e-08, |
|
"loss": 0.2882, |
|
"step": 3225 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 6.265291213989258, |
|
"learning_rate": 9.712041884816754e-08, |
|
"loss": 0.3766, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 6.723097801208496, |
|
"learning_rate": 9.709424083769633e-08, |
|
"loss": 0.5386, |
|
"step": 3275 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 5.949530601501465, |
|
"learning_rate": 9.706806282722513e-08, |
|
"loss": 0.4818, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 5.125253200531006, |
|
"learning_rate": 9.704188481675392e-08, |
|
"loss": 0.4862, |
|
"step": 3325 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 5.845962047576904, |
|
"learning_rate": 9.701570680628271e-08, |
|
"loss": 0.4502, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 5.178995609283447, |
|
"learning_rate": 9.698952879581152e-08, |
|
"loss": 0.4086, |
|
"step": 3375 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 4.950035095214844, |
|
"learning_rate": 9.696335078534031e-08, |
|
"loss": 0.4167, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 7.225176811218262, |
|
"learning_rate": 9.693717277486911e-08, |
|
"loss": 0.3945, |
|
"step": 3425 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 6.313861846923828, |
|
"learning_rate": 9.69109947643979e-08, |
|
"loss": 0.4021, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 5.976010799407959, |
|
"learning_rate": 9.68848167539267e-08, |
|
"loss": 0.3821, |
|
"step": 3475 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 6.867140769958496, |
|
"learning_rate": 9.685863874345549e-08, |
|
"loss": 0.3901, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 5.82126522064209, |
|
"learning_rate": 9.683246073298429e-08, |
|
"loss": 0.3798, |
|
"step": 3525 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 5.741916656494141, |
|
"learning_rate": 9.680628272251309e-08, |
|
"loss": 0.3842, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 5.43148946762085, |
|
"learning_rate": 9.678010471204188e-08, |
|
"loss": 0.3762, |
|
"step": 3575 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 4.983076095581055, |
|
"learning_rate": 9.675392670157067e-08, |
|
"loss": 0.3496, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 5.233561992645264, |
|
"learning_rate": 9.672774869109948e-08, |
|
"loss": 0.3225, |
|
"step": 3625 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 4.534473419189453, |
|
"learning_rate": 9.670157068062826e-08, |
|
"loss": 0.3009, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 5.9857306480407715, |
|
"learning_rate": 9.667539267015707e-08, |
|
"loss": 0.4075, |
|
"step": 3675 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 7.715012073516846, |
|
"learning_rate": 9.664921465968586e-08, |
|
"loss": 0.5876, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 7.0109405517578125, |
|
"learning_rate": 9.662303664921465e-08, |
|
"loss": 0.6805, |
|
"step": 3725 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 6.128580093383789, |
|
"learning_rate": 9.659685863874345e-08, |
|
"loss": 0.5924, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 5.6104865074157715, |
|
"learning_rate": 9.657068062827225e-08, |
|
"loss": 0.4888, |
|
"step": 3775 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 5.251614093780518, |
|
"learning_rate": 9.654450261780103e-08, |
|
"loss": 0.4286, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 5.3208184242248535, |
|
"learning_rate": 9.651832460732984e-08, |
|
"loss": 0.3889, |
|
"step": 3825 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 5.454063892364502, |
|
"learning_rate": 9.649214659685863e-08, |
|
"loss": 0.4181, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 5.304567813873291, |
|
"learning_rate": 9.646596858638742e-08, |
|
"loss": 0.3816, |
|
"step": 3875 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 4.866218566894531, |
|
"learning_rate": 9.643979057591623e-08, |
|
"loss": 0.3475, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 4.873610019683838, |
|
"learning_rate": 9.641361256544502e-08, |
|
"loss": 0.3369, |
|
"step": 3925 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 4.69268798828125, |
|
"learning_rate": 9.638743455497382e-08, |
|
"loss": 0.3515, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 5.367419719696045, |
|
"learning_rate": 9.636125654450261e-08, |
|
"loss": 0.3328, |
|
"step": 3975 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 4.6179728507995605, |
|
"learning_rate": 9.633507853403142e-08, |
|
"loss": 0.3536, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 4.710158348083496, |
|
"learning_rate": 9.630890052356021e-08, |
|
"loss": 0.3446, |
|
"step": 4025 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 4.824263095855713, |
|
"learning_rate": 9.6282722513089e-08, |
|
"loss": 0.3488, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 5.474529266357422, |
|
"learning_rate": 9.62565445026178e-08, |
|
"loss": 0.3648, |
|
"step": 4075 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 5.825191497802734, |
|
"learning_rate": 9.62303664921466e-08, |
|
"loss": 0.3782, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_loss": 0.7290233969688416, |
|
"eval_runtime": 275.8897, |
|
"eval_samples_per_second": 9.801, |
|
"eval_steps_per_second": 1.225, |
|
"eval_wer": 39.41739541479556, |
|
"step": 4111 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 4.9900221824646, |
|
"learning_rate": 9.620418848167538e-08, |
|
"loss": 0.3366, |
|
"step": 4125 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 6.866960525512695, |
|
"learning_rate": 9.617801047120419e-08, |
|
"loss": 0.384, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 6.261806011199951, |
|
"learning_rate": 9.615183246073298e-08, |
|
"loss": 0.4945, |
|
"step": 4175 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 6.251190185546875, |
|
"learning_rate": 9.612565445026178e-08, |
|
"loss": 0.5011, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 7.063992023468018, |
|
"learning_rate": 9.609947643979057e-08, |
|
"loss": 0.4765, |
|
"step": 4225 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 6.903201103210449, |
|
"learning_rate": 9.607329842931938e-08, |
|
"loss": 0.4501, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 6.0563788414001465, |
|
"learning_rate": 9.604712041884816e-08, |
|
"loss": 0.459, |
|
"step": 4275 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 6.9955363273620605, |
|
"learning_rate": 9.602094240837696e-08, |
|
"loss": 0.4237, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 6.026924133300781, |
|
"learning_rate": 9.599476439790576e-08, |
|
"loss": 0.3898, |
|
"step": 4325 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 5.720476150512695, |
|
"learning_rate": 9.596858638743455e-08, |
|
"loss": 0.4239, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 6.680058479309082, |
|
"learning_rate": 9.594240837696334e-08, |
|
"loss": 0.4321, |
|
"step": 4375 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 8.304168701171875, |
|
"learning_rate": 9.591623036649215e-08, |
|
"loss": 0.5794, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 8.107504844665527, |
|
"learning_rate": 9.589005235602093e-08, |
|
"loss": 0.8686, |
|
"step": 4425 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 8.911792755126953, |
|
"learning_rate": 9.586387434554973e-08, |
|
"loss": 0.9618, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 6.706320285797119, |
|
"learning_rate": 9.583769633507853e-08, |
|
"loss": 0.8042, |
|
"step": 4475 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 6.71433687210083, |
|
"learning_rate": 9.581151832460732e-08, |
|
"loss": 0.6088, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 6.675333023071289, |
|
"learning_rate": 9.578534031413611e-08, |
|
"loss": 0.551, |
|
"step": 4525 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 6.766154766082764, |
|
"learning_rate": 9.575916230366492e-08, |
|
"loss": 0.5113, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 6.36196231842041, |
|
"learning_rate": 9.573298429319371e-08, |
|
"loss": 0.4785, |
|
"step": 4575 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 6.1699395179748535, |
|
"learning_rate": 9.570680628272251e-08, |
|
"loss": 0.4663, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 6.362920761108398, |
|
"learning_rate": 9.56806282722513e-08, |
|
"loss": 0.4394, |
|
"step": 4625 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 6.1348347663879395, |
|
"learning_rate": 9.56544502617801e-08, |
|
"loss": 0.4343, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 6.3059563636779785, |
|
"learning_rate": 9.56282722513089e-08, |
|
"loss": 0.4535, |
|
"step": 4675 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 7.463464260101318, |
|
"learning_rate": 9.560209424083769e-08, |
|
"loss": 0.7034, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 7.093417644500732, |
|
"learning_rate": 9.557591623036649e-08, |
|
"loss": 0.8337, |
|
"step": 4725 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 6.7604193687438965, |
|
"learning_rate": 9.554973821989528e-08, |
|
"loss": 0.7934, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 6.093296051025391, |
|
"learning_rate": 9.552356020942409e-08, |
|
"loss": 0.7055, |
|
"step": 4775 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 6.788339138031006, |
|
"learning_rate": 9.549738219895288e-08, |
|
"loss": 0.6884, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 6.2128496170043945, |
|
"learning_rate": 9.547120418848167e-08, |
|
"loss": 0.5722, |
|
"step": 4825 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 6.026149272918701, |
|
"learning_rate": 9.544502617801047e-08, |
|
"loss": 0.5802, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 6.711429119110107, |
|
"learning_rate": 9.541884816753927e-08, |
|
"loss": 0.5129, |
|
"step": 4875 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 6.273972988128662, |
|
"learning_rate": 9.539267015706805e-08, |
|
"loss": 0.4283, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 5.497582912445068, |
|
"learning_rate": 9.536649214659686e-08, |
|
"loss": 0.4075, |
|
"step": 4925 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 5.759308815002441, |
|
"learning_rate": 9.534031413612565e-08, |
|
"loss": 0.4438, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 6.2068305015563965, |
|
"learning_rate": 9.531413612565445e-08, |
|
"loss": 0.4686, |
|
"step": 4975 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 5.611216068267822, |
|
"learning_rate": 9.528795811518324e-08, |
|
"loss": 0.4714, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 5.2035040855407715, |
|
"learning_rate": 9.526178010471204e-08, |
|
"loss": 0.4933, |
|
"step": 5025 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 6.796937942504883, |
|
"learning_rate": 9.523560209424082e-08, |
|
"loss": 0.518, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 5.768625259399414, |
|
"learning_rate": 9.520942408376963e-08, |
|
"loss": 0.5254, |
|
"step": 5075 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 5.743659019470215, |
|
"learning_rate": 9.518324607329842e-08, |
|
"loss": 0.5098, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 6.624993801116943, |
|
"learning_rate": 9.515706806282722e-08, |
|
"loss": 0.4855, |
|
"step": 5125 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 6.45778751373291, |
|
"learning_rate": 9.513089005235601e-08, |
|
"loss": 0.5223, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 5.325904369354248, |
|
"learning_rate": 9.510471204188482e-08, |
|
"loss": 0.5041, |
|
"step": 5175 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 5.208452224731445, |
|
"learning_rate": 9.507853403141361e-08, |
|
"loss": 0.5157, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 5.317996501922607, |
|
"learning_rate": 9.50523560209424e-08, |
|
"loss": 0.5614, |
|
"step": 5225 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 6.383024215698242, |
|
"learning_rate": 9.50261780104712e-08, |
|
"loss": 0.5186, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 4.965906620025635, |
|
"learning_rate": 9.499999999999999e-08, |
|
"loss": 0.4887, |
|
"step": 5275 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 5.874698162078857, |
|
"learning_rate": 9.49738219895288e-08, |
|
"loss": 0.4882, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 5.321093559265137, |
|
"learning_rate": 9.494764397905759e-08, |
|
"loss": 0.4929, |
|
"step": 5325 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 6.657257556915283, |
|
"learning_rate": 9.492146596858638e-08, |
|
"loss": 0.4467, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 5.798694133758545, |
|
"learning_rate": 9.489528795811518e-08, |
|
"loss": 0.5027, |
|
"step": 5375 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 6.4486236572265625, |
|
"learning_rate": 9.486910994764398e-08, |
|
"loss": 0.5157, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 5.78603458404541, |
|
"learning_rate": 9.484293193717276e-08, |
|
"loss": 0.568, |
|
"step": 5425 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 6.391395568847656, |
|
"learning_rate": 9.481675392670157e-08, |
|
"loss": 0.5768, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 6.703619003295898, |
|
"learning_rate": 9.479057591623036e-08, |
|
"loss": 0.5885, |
|
"step": 5475 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 6.5529937744140625, |
|
"learning_rate": 9.476439790575916e-08, |
|
"loss": 0.5355, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 5.757615566253662, |
|
"learning_rate": 9.473821989528795e-08, |
|
"loss": 0.4787, |
|
"step": 5525 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 5.5016703605651855, |
|
"learning_rate": 9.471204188481676e-08, |
|
"loss": 0.4435, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 6.2132368087768555, |
|
"learning_rate": 9.468586387434555e-08, |
|
"loss": 0.5157, |
|
"step": 5575 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 5.654526710510254, |
|
"learning_rate": 9.465968586387434e-08, |
|
"loss": 0.5769, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 5.5138139724731445, |
|
"learning_rate": 9.463350785340314e-08, |
|
"loss": 0.5805, |
|
"step": 5625 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 5.938875198364258, |
|
"learning_rate": 9.460732984293194e-08, |
|
"loss": 0.6114, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 4.941293239593506, |
|
"learning_rate": 9.458115183246072e-08, |
|
"loss": 0.5762, |
|
"step": 5675 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 6.395961284637451, |
|
"learning_rate": 9.455497382198953e-08, |
|
"loss": 0.5745, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 5.585537910461426, |
|
"learning_rate": 9.452879581151832e-08, |
|
"loss": 0.5571, |
|
"step": 5725 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 5.933156490325928, |
|
"learning_rate": 9.450261780104711e-08, |
|
"loss": 0.4811, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 7.242075443267822, |
|
"learning_rate": 9.447643979057591e-08, |
|
"loss": 0.464, |
|
"step": 5775 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 5.613156318664551, |
|
"learning_rate": 9.445026178010471e-08, |
|
"loss": 0.5033, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 6.406403541564941, |
|
"learning_rate": 9.44240837696335e-08, |
|
"loss": 0.4344, |
|
"step": 5825 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 4.892160415649414, |
|
"learning_rate": 9.43979057591623e-08, |
|
"loss": 0.4187, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 5.776142120361328, |
|
"learning_rate": 9.43717277486911e-08, |
|
"loss": 0.4124, |
|
"step": 5875 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 5.972835063934326, |
|
"learning_rate": 9.434554973821989e-08, |
|
"loss": 0.4042, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 6.167374610900879, |
|
"learning_rate": 9.431937172774868e-08, |
|
"loss": 0.4208, |
|
"step": 5925 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 6.850512504577637, |
|
"learning_rate": 9.429319371727749e-08, |
|
"loss": 0.3989, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 6.674014091491699, |
|
"learning_rate": 9.426701570680628e-08, |
|
"loss": 0.3725, |
|
"step": 5975 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 7.24482536315918, |
|
"learning_rate": 9.424083769633507e-08, |
|
"loss": 0.376, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 6.7198710441589355, |
|
"learning_rate": 9.421465968586388e-08, |
|
"loss": 0.3846, |
|
"step": 6025 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 6.8929829597473145, |
|
"learning_rate": 9.418848167539266e-08, |
|
"loss": 0.3857, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 8.093165397644043, |
|
"learning_rate": 9.416230366492147e-08, |
|
"loss": 0.3766, |
|
"step": 6075 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 6.204592227935791, |
|
"learning_rate": 9.413612565445026e-08, |
|
"loss": 0.3779, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 5.946498870849609, |
|
"learning_rate": 9.410994764397905e-08, |
|
"loss": 0.3719, |
|
"step": 6125 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 7.825682163238525, |
|
"learning_rate": 9.408376963350785e-08, |
|
"loss": 0.3891, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 7.207645416259766, |
|
"learning_rate": 9.405759162303665e-08, |
|
"loss": 0.3901, |
|
"step": 6175 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 6.809023857116699, |
|
"learning_rate": 9.403141361256543e-08, |
|
"loss": 0.4059, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 6.104794979095459, |
|
"learning_rate": 9.400523560209424e-08, |
|
"loss": 0.4059, |
|
"step": 6225 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 6.525493621826172, |
|
"learning_rate": 9.397905759162303e-08, |
|
"loss": 0.4047, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 6.874316215515137, |
|
"learning_rate": 9.395287958115183e-08, |
|
"loss": 0.514, |
|
"step": 6275 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 5.96618127822876, |
|
"learning_rate": 9.392670157068062e-08, |
|
"loss": 0.4962, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 6.455708026885986, |
|
"learning_rate": 9.390052356020942e-08, |
|
"loss": 0.5045, |
|
"step": 6325 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 6.469492435455322, |
|
"learning_rate": 9.387434554973822e-08, |
|
"loss": 0.8458, |
|
"step": 6350 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 9.225332260131836, |
|
"learning_rate": 9.384816753926701e-08, |
|
"loss": 0.8835, |
|
"step": 6375 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 6.529109954833984, |
|
"learning_rate": 9.38219895287958e-08, |
|
"loss": 0.7166, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 7.395893096923828, |
|
"learning_rate": 9.379581151832461e-08, |
|
"loss": 0.7075, |
|
"step": 6425 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 8.16038990020752, |
|
"learning_rate": 9.376963350785339e-08, |
|
"loss": 0.9168, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 7.322926044464111, |
|
"learning_rate": 9.37434554973822e-08, |
|
"loss": 0.7444, |
|
"step": 6475 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 7.18267297744751, |
|
"learning_rate": 9.371727748691099e-08, |
|
"loss": 0.6744, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 7.361169815063477, |
|
"learning_rate": 9.369109947643978e-08, |
|
"loss": 0.5583, |
|
"step": 6525 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 8.085954666137695, |
|
"learning_rate": 9.366492146596858e-08, |
|
"loss": 0.5442, |
|
"step": 6550 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 7.492279052734375, |
|
"learning_rate": 9.363874345549738e-08, |
|
"loss": 0.5684, |
|
"step": 6575 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 6.951526641845703, |
|
"learning_rate": 9.361256544502618e-08, |
|
"loss": 0.5311, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 6.271228790283203, |
|
"learning_rate": 9.358638743455497e-08, |
|
"loss": 0.473, |
|
"step": 6625 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 5.724484443664551, |
|
"learning_rate": 9.356020942408376e-08, |
|
"loss": 0.4471, |
|
"step": 6650 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 5.2642669677734375, |
|
"learning_rate": 9.353403141361256e-08, |
|
"loss": 0.4008, |
|
"step": 6675 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 5.970279216766357, |
|
"learning_rate": 9.350785340314136e-08, |
|
"loss": 0.3922, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 6.13707160949707, |
|
"learning_rate": 9.348167539267016e-08, |
|
"loss": 0.4149, |
|
"step": 6725 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 5.1920061111450195, |
|
"learning_rate": 9.345549738219895e-08, |
|
"loss": 0.3732, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 6.059106349945068, |
|
"learning_rate": 9.342931937172774e-08, |
|
"loss": 0.3783, |
|
"step": 6775 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 5.317996025085449, |
|
"learning_rate": 9.340314136125655e-08, |
|
"loss": 0.3701, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 5.347188472747803, |
|
"learning_rate": 9.337696335078533e-08, |
|
"loss": 0.3466, |
|
"step": 6825 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 5.118027687072754, |
|
"learning_rate": 9.335078534031414e-08, |
|
"loss": 0.363, |
|
"step": 6850 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 4.868067264556885, |
|
"learning_rate": 9.332460732984293e-08, |
|
"loss": 0.3696, |
|
"step": 6875 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 5.714309215545654, |
|
"learning_rate": 9.329842931937172e-08, |
|
"loss": 0.3768, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 5.903509616851807, |
|
"learning_rate": 9.327225130890052e-08, |
|
"loss": 0.3625, |
|
"step": 6925 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 5.700974941253662, |
|
"learning_rate": 9.324607329842932e-08, |
|
"loss": 0.3717, |
|
"step": 6950 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 6.056822776794434, |
|
"learning_rate": 9.32198952879581e-08, |
|
"loss": 0.3601, |
|
"step": 6975 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 6.140659809112549, |
|
"learning_rate": 9.319371727748691e-08, |
|
"loss": 0.3691, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 6.195953369140625, |
|
"learning_rate": 9.31675392670157e-08, |
|
"loss": 0.3632, |
|
"step": 7025 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 4.96120023727417, |
|
"learning_rate": 9.314136125654451e-08, |
|
"loss": 0.3449, |
|
"step": 7050 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 6.803286075592041, |
|
"learning_rate": 9.311518324607329e-08, |
|
"loss": 0.3601, |
|
"step": 7075 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 5.16037654876709, |
|
"learning_rate": 9.30890052356021e-08, |
|
"loss": 0.3478, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 5.407104969024658, |
|
"learning_rate": 9.306282722513089e-08, |
|
"loss": 0.3498, |
|
"step": 7125 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 5.451097011566162, |
|
"learning_rate": 9.303664921465968e-08, |
|
"loss": 0.3574, |
|
"step": 7150 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 5.362937927246094, |
|
"learning_rate": 9.301047120418847e-08, |
|
"loss": 0.3477, |
|
"step": 7175 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 5.407390117645264, |
|
"learning_rate": 9.298429319371728e-08, |
|
"loss": 0.3575, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 5.426994800567627, |
|
"learning_rate": 9.295811518324606e-08, |
|
"loss": 0.3454, |
|
"step": 7225 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 6.192265510559082, |
|
"learning_rate": 9.293193717277487e-08, |
|
"loss": 0.36, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 5.969931125640869, |
|
"learning_rate": 9.290575916230366e-08, |
|
"loss": 0.3479, |
|
"step": 7275 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 5.602126121520996, |
|
"learning_rate": 9.287958115183245e-08, |
|
"loss": 0.3527, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 6.191224575042725, |
|
"learning_rate": 9.285340314136125e-08, |
|
"loss": 0.3915, |
|
"step": 7325 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 5.79760217666626, |
|
"learning_rate": 9.282722513089005e-08, |
|
"loss": 0.3922, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 8.519009590148926, |
|
"learning_rate": 9.280104712041885e-08, |
|
"loss": 0.4254, |
|
"step": 7375 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 5.360806941986084, |
|
"learning_rate": 9.277486910994764e-08, |
|
"loss": 0.4391, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 5.539173603057861, |
|
"learning_rate": 9.274869109947645e-08, |
|
"loss": 0.3988, |
|
"step": 7425 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 7.067492961883545, |
|
"learning_rate": 9.272251308900523e-08, |
|
"loss": 0.3779, |
|
"step": 7450 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 5.135078430175781, |
|
"learning_rate": 9.269633507853403e-08, |
|
"loss": 0.3904, |
|
"step": 7475 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 5.269252300262451, |
|
"learning_rate": 9.267015706806283e-08, |
|
"loss": 0.3597, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 7.094182014465332, |
|
"learning_rate": 9.264397905759162e-08, |
|
"loss": 0.3766, |
|
"step": 7525 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 5.993140697479248, |
|
"learning_rate": 9.261780104712041e-08, |
|
"loss": 0.3377, |
|
"step": 7550 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 6.09189510345459, |
|
"learning_rate": 9.259162303664922e-08, |
|
"loss": 0.3779, |
|
"step": 7575 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 5.466849327087402, |
|
"learning_rate": 9.2565445026178e-08, |
|
"loss": 0.3602, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 5.297680854797363, |
|
"learning_rate": 9.25392670157068e-08, |
|
"loss": 0.3318, |
|
"step": 7625 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 5.143691539764404, |
|
"learning_rate": 9.25130890052356e-08, |
|
"loss": 0.334, |
|
"step": 7650 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 5.337982654571533, |
|
"learning_rate": 9.248691099476439e-08, |
|
"loss": 0.3343, |
|
"step": 7675 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 5.539205551147461, |
|
"learning_rate": 9.246073298429318e-08, |
|
"loss": 0.3527, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 5.057958126068115, |
|
"learning_rate": 9.243455497382199e-08, |
|
"loss": 0.3441, |
|
"step": 7725 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 5.447077751159668, |
|
"learning_rate": 9.240837696335077e-08, |
|
"loss": 0.3368, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 5.604344844818115, |
|
"learning_rate": 9.238219895287958e-08, |
|
"loss": 0.3357, |
|
"step": 7775 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 6.193871021270752, |
|
"learning_rate": 9.235602094240837e-08, |
|
"loss": 0.3841, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 5.70228910446167, |
|
"learning_rate": 9.232984293193718e-08, |
|
"loss": 0.3991, |
|
"step": 7825 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 6.8992743492126465, |
|
"learning_rate": 9.230366492146596e-08, |
|
"loss": 0.4435, |
|
"step": 7850 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 7.393523693084717, |
|
"learning_rate": 9.227748691099476e-08, |
|
"loss": 0.4094, |
|
"step": 7875 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 5.266127586364746, |
|
"learning_rate": 9.225130890052356e-08, |
|
"loss": 0.3806, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 5.960921287536621, |
|
"learning_rate": 9.222513089005235e-08, |
|
"loss": 0.3749, |
|
"step": 7925 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 6.215056896209717, |
|
"learning_rate": 9.219895287958114e-08, |
|
"loss": 0.3956, |
|
"step": 7950 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 4.992290019989014, |
|
"learning_rate": 9.217277486910995e-08, |
|
"loss": 0.414, |
|
"step": 7975 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 5.627460479736328, |
|
"learning_rate": 9.214659685863874e-08, |
|
"loss": 0.4508, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 7.53002405166626, |
|
"learning_rate": 9.212041884816754e-08, |
|
"loss": 0.4771, |
|
"step": 8025 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 6.5475172996521, |
|
"learning_rate": 9.209424083769633e-08, |
|
"loss": 0.4636, |
|
"step": 8050 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 6.499009132385254, |
|
"learning_rate": 9.206806282722512e-08, |
|
"loss": 0.5024, |
|
"step": 8075 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 5.928787708282471, |
|
"learning_rate": 9.204188481675393e-08, |
|
"loss": 0.482, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 6.647201061248779, |
|
"learning_rate": 9.201570680628272e-08, |
|
"loss": 0.4901, |
|
"step": 8125 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 7.4282355308532715, |
|
"learning_rate": 9.198952879581152e-08, |
|
"loss": 0.509, |
|
"step": 8150 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 8.04277229309082, |
|
"learning_rate": 9.196335078534031e-08, |
|
"loss": 0.5403, |
|
"step": 8175 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 8.562540054321289, |
|
"learning_rate": 9.193717277486911e-08, |
|
"loss": 0.5798, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"eval_loss": 0.7510205507278442, |
|
"eval_runtime": 275.1584, |
|
"eval_samples_per_second": 9.827, |
|
"eval_steps_per_second": 1.228, |
|
"eval_wer": 37.65067359962184, |
|
"step": 8222 |
|
} |
|
], |
|
"logging_steps": 25, |
|
"max_steps": 96000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 9223372036854775807, |
|
"save_steps": 4111, |
|
"total_flos": 3.23866357530624e+18, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|