diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,113560 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 7.992557195415909, + "global_step": 189000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 1.6000000000000003e-05, + "loss": 0.5091, + "step": 10 + }, + { + "epoch": 0.0, + "learning_rate": 3.2000000000000005e-05, + "loss": 0.6233, + "step": 20 + }, + { + "epoch": 0.0, + "learning_rate": 4.8e-05, + "loss": 0.5031, + "step": 30 + }, + { + "epoch": 0.0, + "learning_rate": 6.400000000000001e-05, + "loss": 0.5441, + "step": 40 + }, + { + "epoch": 0.0, + "learning_rate": 8e-05, + "loss": 0.6067, + "step": 50 + }, + { + "epoch": 0.0, + "learning_rate": 9.6e-05, + "loss": 0.6633, + "step": 60 + }, + { + "epoch": 0.0, + "learning_rate": 0.00011200000000000001, + "loss": 0.4737, + "step": 70 + }, + { + "epoch": 0.0, + "learning_rate": 0.00012800000000000002, + "loss": 0.5007, + "step": 80 + }, + { + "epoch": 0.0, + "learning_rate": 0.000144, + "loss": 0.5474, + "step": 90 + }, + { + "epoch": 0.0, + "learning_rate": 0.00016, + "loss": 0.5314, + "step": 100 + }, + { + "epoch": 0.0, + "learning_rate": 0.00017600000000000002, + "loss": 0.5054, + "step": 110 + }, + { + "epoch": 0.01, + "learning_rate": 0.000192, + "loss": 0.4627, + "step": 120 + }, + { + "epoch": 0.01, + "learning_rate": 0.00020800000000000001, + "loss": 0.477, + "step": 130 + }, + { + "epoch": 0.01, + "learning_rate": 0.00022400000000000002, + "loss": 0.7119, + "step": 140 + }, + { + "epoch": 0.01, + "learning_rate": 0.00024, + "loss": 0.5165, + "step": 150 + }, + { + "epoch": 0.01, + "learning_rate": 0.00025600000000000004, + "loss": 0.4349, + "step": 160 + }, + { + "epoch": 0.01, + "learning_rate": 0.00027200000000000005, + "loss": 0.5688, + "step": 170 + }, + { + "epoch": 0.01, + "learning_rate": 0.000288, + "loss": 0.6779, + "step": 180 + }, + { + "epoch": 0.01, + "learning_rate": 0.000304, + "loss": 0.4452, + "step": 190 + }, + { + "epoch": 0.01, + "learning_rate": 0.00032, + "loss": 0.4647, + "step": 200 + }, + { + "epoch": 0.01, + "learning_rate": 0.000336, + "loss": 0.4946, + "step": 210 + }, + { + "epoch": 0.01, + "learning_rate": 0.00035200000000000005, + "loss": 0.5094, + "step": 220 + }, + { + "epoch": 0.01, + "learning_rate": 0.00036800000000000005, + "loss": 0.5185, + "step": 230 + }, + { + "epoch": 0.01, + "learning_rate": 0.000384, + "loss": 0.5356, + "step": 240 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004, + "loss": 0.5815, + "step": 250 + }, + { + "epoch": 0.01, + "learning_rate": 0.00041600000000000003, + "loss": 0.5643, + "step": 260 + }, + { + "epoch": 0.01, + "learning_rate": 0.00043200000000000004, + "loss": 0.4796, + "step": 270 + }, + { + "epoch": 0.01, + "learning_rate": 0.00044800000000000005, + "loss": 0.445, + "step": 280 + }, + { + "epoch": 0.01, + "learning_rate": 0.000464, + "loss": 0.4843, + "step": 290 + }, + { + "epoch": 0.01, + "learning_rate": 0.00048, + "loss": 0.5037, + "step": 300 + }, + { + "epoch": 0.01, + "learning_rate": 0.000496, + "loss": 0.5654, + "step": 310 + }, + { + "epoch": 0.01, + "learning_rate": 0.0005120000000000001, + "loss": 0.5071, + "step": 320 + }, + { + "epoch": 0.01, + "learning_rate": 0.000528, + "loss": 0.5664, + "step": 330 + }, + { + "epoch": 0.01, + "learning_rate": 0.0005440000000000001, + "loss": 0.5739, + "step": 340 + }, + { + "epoch": 0.01, + "learning_rate": 0.00056, + "loss": 0.5219, + "step": 350 + }, + { + "epoch": 0.02, + "learning_rate": 0.000576, + "loss": 0.6144, + "step": 360 + }, + { + "epoch": 0.02, + "learning_rate": 0.000592, + "loss": 0.5183, + "step": 370 + }, + { + "epoch": 0.02, + "learning_rate": 0.000608, + "loss": 0.568, + "step": 380 + }, + { + "epoch": 0.02, + "learning_rate": 0.0006240000000000001, + "loss": 0.5101, + "step": 390 + }, + { + "epoch": 0.02, + "learning_rate": 0.00064, + "loss": 0.5742, + "step": 400 + }, + { + "epoch": 0.02, + "learning_rate": 0.000656, + "loss": 0.5648, + "step": 410 + }, + { + "epoch": 0.02, + "learning_rate": 0.000672, + "loss": 0.443, + "step": 420 + }, + { + "epoch": 0.02, + "learning_rate": 0.000688, + "loss": 0.5076, + "step": 430 + }, + { + "epoch": 0.02, + "learning_rate": 0.0007040000000000001, + "loss": 0.5114, + "step": 440 + }, + { + "epoch": 0.02, + "learning_rate": 0.00072, + "loss": 0.5598, + "step": 450 + }, + { + "epoch": 0.02, + "learning_rate": 0.0007360000000000001, + "loss": 0.4879, + "step": 460 + }, + { + "epoch": 0.02, + "learning_rate": 0.000752, + "loss": 0.5685, + "step": 470 + }, + { + "epoch": 0.02, + "learning_rate": 0.000768, + "loss": 0.656, + "step": 480 + }, + { + "epoch": 0.02, + "learning_rate": 0.000784, + "loss": 0.6048, + "step": 490 + }, + { + "epoch": 0.02, + "learning_rate": 0.0008, + "loss": 0.4491, + "step": 500 + }, + { + "epoch": 0.02, + "learning_rate": 0.0007999575992707075, + "loss": 0.6598, + "step": 510 + }, + { + "epoch": 0.02, + "learning_rate": 0.000799915198541415, + "loss": 0.5416, + "step": 520 + }, + { + "epoch": 0.02, + "learning_rate": 0.0007998727978121224, + "loss": 0.5244, + "step": 530 + }, + { + "epoch": 0.02, + "learning_rate": 0.0007998303970828299, + "loss": 0.5654, + "step": 540 + }, + { + "epoch": 0.02, + "learning_rate": 0.0007997879963535373, + "loss": 0.5205, + "step": 550 + }, + { + "epoch": 0.02, + "learning_rate": 0.0007997455956242447, + "loss": 0.5595, + "step": 560 + }, + { + "epoch": 0.02, + "learning_rate": 0.0007997031948949523, + "loss": 0.5699, + "step": 570 + }, + { + "epoch": 0.02, + "learning_rate": 0.0007996607941656597, + "loss": 0.5969, + "step": 580 + }, + { + "epoch": 0.02, + "learning_rate": 0.0007996183934363671, + "loss": 0.5695, + "step": 590 + }, + { + "epoch": 0.03, + "learning_rate": 0.0007995759927070746, + "loss": 0.5458, + "step": 600 + }, + { + "epoch": 0.03, + "learning_rate": 0.0007995335919777821, + "loss": 0.5963, + "step": 610 + }, + { + "epoch": 0.03, + "learning_rate": 0.0007994911912484895, + "loss": 0.4507, + "step": 620 + }, + { + "epoch": 0.03, + "learning_rate": 0.000799448790519197, + "loss": 0.5426, + "step": 630 + }, + { + "epoch": 0.03, + "learning_rate": 0.0007994063897899044, + "loss": 0.5186, + "step": 640 + }, + { + "epoch": 0.03, + "learning_rate": 0.0007993639890606119, + "loss": 0.585, + "step": 650 + }, + { + "epoch": 0.03, + "learning_rate": 0.0007993215883313193, + "loss": 0.6048, + "step": 660 + }, + { + "epoch": 0.03, + "learning_rate": 0.0007992791876020268, + "loss": 0.5359, + "step": 670 + }, + { + "epoch": 0.03, + "learning_rate": 0.0007992367868727342, + "loss": 0.5294, + "step": 680 + }, + { + "epoch": 0.03, + "learning_rate": 0.0007991943861434417, + "loss": 0.5608, + "step": 690 + }, + { + "epoch": 0.03, + "learning_rate": 0.0007991519854141492, + "loss": 0.582, + "step": 700 + }, + { + "epoch": 0.03, + "learning_rate": 0.0007991095846848566, + "loss": 0.494, + "step": 710 + }, + { + "epoch": 0.03, + "learning_rate": 0.000799067183955564, + "loss": 0.5367, + "step": 720 + }, + { + "epoch": 0.03, + "learning_rate": 0.0007990247832262716, + "loss": 0.5862, + "step": 730 + }, + { + "epoch": 0.03, + "learning_rate": 0.000798982382496979, + "loss": 0.5445, + "step": 740 + }, + { + "epoch": 0.03, + "learning_rate": 0.0007989399817676864, + "loss": 0.5066, + "step": 750 + }, + { + "epoch": 0.03, + "learning_rate": 0.0007988975810383938, + "loss": 0.5879, + "step": 760 + }, + { + "epoch": 0.03, + "learning_rate": 0.0007988551803091014, + "loss": 0.5377, + "step": 770 + }, + { + "epoch": 0.03, + "learning_rate": 0.0007988127795798088, + "loss": 0.7732, + "step": 780 + }, + { + "epoch": 0.03, + "learning_rate": 0.0007987703788505162, + "loss": 0.5179, + "step": 790 + }, + { + "epoch": 0.03, + "learning_rate": 0.0007987279781212237, + "loss": 0.5247, + "step": 800 + }, + { + "epoch": 0.03, + "learning_rate": 0.0007986855773919312, + "loss": 0.5853, + "step": 810 + }, + { + "epoch": 0.03, + "learning_rate": 0.0007986431766626387, + "loss": 0.5689, + "step": 820 + }, + { + "epoch": 0.04, + "learning_rate": 0.0007986007759333461, + "loss": 0.5968, + "step": 830 + }, + { + "epoch": 0.04, + "learning_rate": 0.0007985583752040535, + "loss": 0.6044, + "step": 840 + }, + { + "epoch": 0.04, + "learning_rate": 0.000798515974474761, + "loss": 0.4833, + "step": 850 + }, + { + "epoch": 0.04, + "learning_rate": 0.0007984735737454685, + "loss": 0.4947, + "step": 860 + }, + { + "epoch": 0.04, + "learning_rate": 0.0007984311730161759, + "loss": 0.5956, + "step": 870 + }, + { + "epoch": 0.04, + "learning_rate": 0.0007983887722868833, + "loss": 0.576, + "step": 880 + }, + { + "epoch": 0.04, + "learning_rate": 0.0007983463715575909, + "loss": 0.6091, + "step": 890 + }, + { + "epoch": 0.04, + "learning_rate": 0.0007983039708282983, + "loss": 0.6454, + "step": 900 + }, + { + "epoch": 0.04, + "learning_rate": 0.0007982615700990057, + "loss": 0.6375, + "step": 910 + }, + { + "epoch": 0.04, + "learning_rate": 0.0007982191693697132, + "loss": 0.6126, + "step": 920 + }, + { + "epoch": 0.04, + "learning_rate": 0.0007981767686404207, + "loss": 0.6092, + "step": 930 + }, + { + "epoch": 0.04, + "learning_rate": 0.0007981343679111281, + "loss": 0.5463, + "step": 940 + }, + { + "epoch": 0.04, + "learning_rate": 0.0007980919671818356, + "loss": 0.5512, + "step": 950 + }, + { + "epoch": 0.04, + "learning_rate": 0.000798049566452543, + "loss": 0.6774, + "step": 960 + }, + { + "epoch": 0.04, + "learning_rate": 0.0007980071657232505, + "loss": 0.4869, + "step": 970 + }, + { + "epoch": 0.04, + "learning_rate": 0.0007979647649939579, + "loss": 0.6152, + "step": 980 + }, + { + "epoch": 0.04, + "learning_rate": 0.0007979223642646654, + "loss": 0.5422, + "step": 990 + }, + { + "epoch": 0.04, + "learning_rate": 0.0007978799635353728, + "loss": 0.5472, + "step": 1000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0007978375628060803, + "loss": 0.5053, + "step": 1010 + }, + { + "epoch": 0.04, + "learning_rate": 0.0007977951620767878, + "loss": 0.5338, + "step": 1020 + }, + { + "epoch": 0.04, + "learning_rate": 0.0007977527613474952, + "loss": 0.5285, + "step": 1030 + }, + { + "epoch": 0.04, + "learning_rate": 0.0007977103606182026, + "loss": 0.5798, + "step": 1040 + }, + { + "epoch": 0.04, + "learning_rate": 0.0007976679598889102, + "loss": 0.5947, + "step": 1050 + }, + { + "epoch": 0.04, + "learning_rate": 0.0007976255591596176, + "loss": 0.5286, + "step": 1060 + }, + { + "epoch": 0.05, + "learning_rate": 0.000797583158430325, + "loss": 0.57, + "step": 1070 + }, + { + "epoch": 0.05, + "learning_rate": 0.0007975407577010325, + "loss": 0.4947, + "step": 1080 + }, + { + "epoch": 0.05, + "learning_rate": 0.0007974983569717399, + "loss": 0.5894, + "step": 1090 + }, + { + "epoch": 0.05, + "learning_rate": 0.0007974559562424474, + "loss": 0.5931, + "step": 1100 + }, + { + "epoch": 0.05, + "learning_rate": 0.0007974135555131549, + "loss": 0.6035, + "step": 1110 + }, + { + "epoch": 0.05, + "learning_rate": 0.0007973711547838623, + "loss": 0.5528, + "step": 1120 + }, + { + "epoch": 0.05, + "learning_rate": 0.0007973287540545698, + "loss": 0.5854, + "step": 1130 + }, + { + "epoch": 0.05, + "learning_rate": 0.0007972863533252773, + "loss": 0.59, + "step": 1140 + }, + { + "epoch": 0.05, + "learning_rate": 0.0007972439525959847, + "loss": 0.5969, + "step": 1150 + }, + { + "epoch": 0.05, + "learning_rate": 0.0007972015518666921, + "loss": 0.5385, + "step": 1160 + }, + { + "epoch": 0.05, + "learning_rate": 0.0007971591511373996, + "loss": 0.5943, + "step": 1170 + }, + { + "epoch": 0.05, + "learning_rate": 0.0007971167504081071, + "loss": 0.6122, + "step": 1180 + }, + { + "epoch": 0.05, + "learning_rate": 0.0007970743496788145, + "loss": 0.6109, + "step": 1190 + }, + { + "epoch": 0.05, + "learning_rate": 0.000797031948949522, + "loss": 0.6666, + "step": 1200 + }, + { + "epoch": 0.05, + "learning_rate": 0.0007969895482202294, + "loss": 0.5948, + "step": 1210 + }, + { + "epoch": 0.05, + "learning_rate": 0.0007969471474909369, + "loss": 0.6014, + "step": 1220 + }, + { + "epoch": 0.05, + "learning_rate": 0.0007969047467616444, + "loss": 0.6087, + "step": 1230 + }, + { + "epoch": 0.05, + "learning_rate": 0.0007968623460323518, + "loss": 0.48, + "step": 1240 + }, + { + "epoch": 0.05, + "learning_rate": 0.0007968199453030592, + "loss": 0.5745, + "step": 1250 + }, + { + "epoch": 0.05, + "learning_rate": 0.0007967775445737668, + "loss": 0.6438, + "step": 1260 + }, + { + "epoch": 0.05, + "learning_rate": 0.0007967351438444741, + "loss": 0.5564, + "step": 1270 + }, + { + "epoch": 0.05, + "learning_rate": 0.0007966927431151816, + "loss": 0.5707, + "step": 1280 + }, + { + "epoch": 0.05, + "learning_rate": 0.000796650342385889, + "loss": 0.5371, + "step": 1290 + }, + { + "epoch": 0.05, + "learning_rate": 0.0007966079416565965, + "loss": 0.5599, + "step": 1300 + }, + { + "epoch": 0.06, + "learning_rate": 0.000796565540927304, + "loss": 0.5122, + "step": 1310 + }, + { + "epoch": 0.06, + "learning_rate": 0.0007965231401980114, + "loss": 0.6212, + "step": 1320 + }, + { + "epoch": 0.06, + "learning_rate": 0.0007964807394687189, + "loss": 0.5456, + "step": 1330 + }, + { + "epoch": 0.06, + "learning_rate": 0.0007964383387394264, + "loss": 0.5815, + "step": 1340 + }, + { + "epoch": 0.06, + "learning_rate": 0.0007963959380101338, + "loss": 0.5772, + "step": 1350 + }, + { + "epoch": 0.06, + "learning_rate": 0.0007963535372808412, + "loss": 0.5782, + "step": 1360 + }, + { + "epoch": 0.06, + "learning_rate": 0.0007963111365515487, + "loss": 0.6232, + "step": 1370 + }, + { + "epoch": 0.06, + "learning_rate": 0.0007962687358222562, + "loss": 0.5326, + "step": 1380 + }, + { + "epoch": 0.06, + "learning_rate": 0.0007962263350929636, + "loss": 0.4819, + "step": 1390 + }, + { + "epoch": 0.06, + "learning_rate": 0.0007961839343636711, + "loss": 0.6134, + "step": 1400 + }, + { + "epoch": 0.06, + "learning_rate": 0.0007961415336343785, + "loss": 0.6849, + "step": 1410 + }, + { + "epoch": 0.06, + "learning_rate": 0.000796099132905086, + "loss": 0.5251, + "step": 1420 + }, + { + "epoch": 0.06, + "learning_rate": 0.0007960567321757935, + "loss": 0.6464, + "step": 1430 + }, + { + "epoch": 0.06, + "learning_rate": 0.0007960143314465009, + "loss": 0.6525, + "step": 1440 + }, + { + "epoch": 0.06, + "learning_rate": 0.0007959719307172083, + "loss": 0.7665, + "step": 1450 + }, + { + "epoch": 0.06, + "learning_rate": 0.0007959295299879159, + "loss": 0.5674, + "step": 1460 + }, + { + "epoch": 0.06, + "learning_rate": 0.0007958871292586233, + "loss": 0.5564, + "step": 1470 + }, + { + "epoch": 0.06, + "learning_rate": 0.0007958447285293307, + "loss": 0.5398, + "step": 1480 + }, + { + "epoch": 0.06, + "learning_rate": 0.0007958023278000382, + "loss": 0.5429, + "step": 1490 + }, + { + "epoch": 0.06, + "learning_rate": 0.0007957599270707457, + "loss": 0.5074, + "step": 1500 + }, + { + "epoch": 0.06, + "learning_rate": 0.0007957175263414531, + "loss": 0.5954, + "step": 1510 + }, + { + "epoch": 0.06, + "learning_rate": 0.0007956751256121606, + "loss": 0.59, + "step": 1520 + }, + { + "epoch": 0.06, + "learning_rate": 0.000795632724882868, + "loss": 0.6186, + "step": 1530 + }, + { + "epoch": 0.07, + "learning_rate": 0.0007955903241535755, + "loss": 0.6087, + "step": 1540 + }, + { + "epoch": 0.07, + "learning_rate": 0.000795547923424283, + "loss": 0.5917, + "step": 1550 + }, + { + "epoch": 0.07, + "learning_rate": 0.0007955055226949904, + "loss": 0.5571, + "step": 1560 + }, + { + "epoch": 0.07, + "learning_rate": 0.0007954631219656978, + "loss": 0.4796, + "step": 1570 + }, + { + "epoch": 0.07, + "learning_rate": 0.0007954207212364054, + "loss": 0.5097, + "step": 1580 + }, + { + "epoch": 0.07, + "learning_rate": 0.0007953783205071127, + "loss": 0.5406, + "step": 1590 + }, + { + "epoch": 0.07, + "learning_rate": 0.0007953359197778202, + "loss": 0.5884, + "step": 1600 + }, + { + "epoch": 0.07, + "learning_rate": 0.0007952935190485277, + "loss": 0.6562, + "step": 1610 + }, + { + "epoch": 0.07, + "learning_rate": 0.0007952511183192351, + "loss": 0.5142, + "step": 1620 + }, + { + "epoch": 0.07, + "learning_rate": 0.0007952087175899426, + "loss": 0.626, + "step": 1630 + }, + { + "epoch": 0.07, + "learning_rate": 0.0007951663168606501, + "loss": 0.5783, + "step": 1640 + }, + { + "epoch": 0.07, + "learning_rate": 0.0007951239161313574, + "loss": 0.6906, + "step": 1650 + }, + { + "epoch": 0.07, + "learning_rate": 0.000795081515402065, + "loss": 0.5203, + "step": 1660 + }, + { + "epoch": 0.07, + "learning_rate": 0.0007950391146727725, + "loss": 0.5111, + "step": 1670 + }, + { + "epoch": 0.07, + "learning_rate": 0.0007949967139434798, + "loss": 0.5662, + "step": 1680 + }, + { + "epoch": 0.07, + "learning_rate": 0.0007949543132141873, + "loss": 0.6169, + "step": 1690 + }, + { + "epoch": 0.07, + "learning_rate": 0.0007949119124848948, + "loss": 0.7241, + "step": 1700 + }, + { + "epoch": 0.07, + "learning_rate": 0.0007948695117556022, + "loss": 0.5481, + "step": 1710 + }, + { + "epoch": 0.07, + "learning_rate": 0.0007948271110263097, + "loss": 0.577, + "step": 1720 + }, + { + "epoch": 0.07, + "learning_rate": 0.0007947847102970172, + "loss": 0.6324, + "step": 1730 + }, + { + "epoch": 0.07, + "learning_rate": 0.0007947423095677246, + "loss": 0.6097, + "step": 1740 + }, + { + "epoch": 0.07, + "learning_rate": 0.0007946999088384321, + "loss": 0.5152, + "step": 1750 + }, + { + "epoch": 0.07, + "learning_rate": 0.0007946575081091396, + "loss": 0.6197, + "step": 1760 + }, + { + "epoch": 0.07, + "learning_rate": 0.0007946151073798469, + "loss": 0.4844, + "step": 1770 + }, + { + "epoch": 0.08, + "learning_rate": 0.0007945727066505545, + "loss": 0.7067, + "step": 1780 + }, + { + "epoch": 0.08, + "learning_rate": 0.000794530305921262, + "loss": 0.7044, + "step": 1790 + }, + { + "epoch": 0.08, + "learning_rate": 0.0007944879051919693, + "loss": 0.4668, + "step": 1800 + }, + { + "epoch": 0.08, + "learning_rate": 0.0007944455044626768, + "loss": 0.6165, + "step": 1810 + }, + { + "epoch": 0.08, + "learning_rate": 0.0007944031037333842, + "loss": 0.5812, + "step": 1820 + }, + { + "epoch": 0.08, + "learning_rate": 0.0007943607030040917, + "loss": 0.6261, + "step": 1830 + }, + { + "epoch": 0.08, + "learning_rate": 0.0007943183022747992, + "loss": 0.6095, + "step": 1840 + }, + { + "epoch": 0.08, + "learning_rate": 0.0007942759015455066, + "loss": 0.5262, + "step": 1850 + }, + { + "epoch": 0.08, + "learning_rate": 0.0007942335008162141, + "loss": 0.583, + "step": 1860 + }, + { + "epoch": 0.08, + "learning_rate": 0.0007941911000869216, + "loss": 0.5823, + "step": 1870 + }, + { + "epoch": 0.08, + "learning_rate": 0.000794148699357629, + "loss": 0.6352, + "step": 1880 + }, + { + "epoch": 0.08, + "learning_rate": 0.0007941062986283364, + "loss": 0.528, + "step": 1890 + }, + { + "epoch": 0.08, + "learning_rate": 0.0007940638978990439, + "loss": 0.6448, + "step": 1900 + }, + { + "epoch": 0.08, + "learning_rate": 0.0007940214971697513, + "loss": 0.6355, + "step": 1910 + }, + { + "epoch": 0.08, + "learning_rate": 0.0007939790964404588, + "loss": 0.6835, + "step": 1920 + }, + { + "epoch": 0.08, + "learning_rate": 0.0007939366957111663, + "loss": 0.6171, + "step": 1930 + }, + { + "epoch": 0.08, + "learning_rate": 0.0007938942949818737, + "loss": 0.5404, + "step": 1940 + }, + { + "epoch": 0.08, + "learning_rate": 0.0007938518942525812, + "loss": 0.7288, + "step": 1950 + }, + { + "epoch": 0.08, + "learning_rate": 0.0007938094935232887, + "loss": 0.4581, + "step": 1960 + }, + { + "epoch": 0.08, + "learning_rate": 0.000793767092793996, + "loss": 0.6394, + "step": 1970 + }, + { + "epoch": 0.08, + "learning_rate": 0.0007937246920647035, + "loss": 0.5071, + "step": 1980 + }, + { + "epoch": 0.08, + "learning_rate": 0.0007936822913354111, + "loss": 0.5476, + "step": 1990 + }, + { + "epoch": 0.08, + "learning_rate": 0.0007936398906061184, + "loss": 0.5368, + "step": 2000 + }, + { + "epoch": 0.09, + "learning_rate": 0.0007935974898768259, + "loss": 0.6201, + "step": 2010 + }, + { + "epoch": 0.09, + "learning_rate": 0.0007935550891475334, + "loss": 0.6034, + "step": 2020 + }, + { + "epoch": 0.09, + "learning_rate": 0.0007935126884182408, + "loss": 0.5521, + "step": 2030 + }, + { + "epoch": 0.09, + "learning_rate": 0.0007934702876889483, + "loss": 0.5972, + "step": 2040 + }, + { + "epoch": 0.09, + "learning_rate": 0.0007934278869596558, + "loss": 0.6811, + "step": 2050 + }, + { + "epoch": 0.09, + "learning_rate": 0.0007933854862303631, + "loss": 0.4694, + "step": 2060 + }, + { + "epoch": 0.09, + "learning_rate": 0.0007933430855010707, + "loss": 0.5801, + "step": 2070 + }, + { + "epoch": 0.09, + "learning_rate": 0.0007933006847717782, + "loss": 0.6109, + "step": 2080 + }, + { + "epoch": 0.09, + "learning_rate": 0.0007932582840424855, + "loss": 0.5923, + "step": 2090 + }, + { + "epoch": 0.09, + "learning_rate": 0.000793215883313193, + "loss": 0.5756, + "step": 2100 + }, + { + "epoch": 0.09, + "learning_rate": 0.0007931734825839006, + "loss": 0.6424, + "step": 2110 + }, + { + "epoch": 0.09, + "learning_rate": 0.0007931310818546079, + "loss": 0.5795, + "step": 2120 + }, + { + "epoch": 0.09, + "learning_rate": 0.0007930886811253154, + "loss": 0.4616, + "step": 2130 + }, + { + "epoch": 0.09, + "learning_rate": 0.0007930462803960229, + "loss": 0.6285, + "step": 2140 + }, + { + "epoch": 0.09, + "learning_rate": 0.0007930038796667303, + "loss": 0.7408, + "step": 2150 + }, + { + "epoch": 0.09, + "learning_rate": 0.0007929614789374378, + "loss": 0.6614, + "step": 2160 + }, + { + "epoch": 0.09, + "learning_rate": 0.0007929190782081453, + "loss": 0.5848, + "step": 2170 + }, + { + "epoch": 0.09, + "learning_rate": 0.0007928766774788526, + "loss": 0.5521, + "step": 2180 + }, + { + "epoch": 0.09, + "learning_rate": 0.0007928342767495602, + "loss": 0.5437, + "step": 2190 + }, + { + "epoch": 0.09, + "learning_rate": 0.0007927918760202675, + "loss": 0.5712, + "step": 2200 + }, + { + "epoch": 0.09, + "learning_rate": 0.000792749475290975, + "loss": 0.606, + "step": 2210 + }, + { + "epoch": 0.09, + "learning_rate": 0.0007927070745616825, + "loss": 0.6201, + "step": 2220 + }, + { + "epoch": 0.09, + "learning_rate": 0.00079266467383239, + "loss": 0.5566, + "step": 2230 + }, + { + "epoch": 0.09, + "learning_rate": 0.0007926222731030974, + "loss": 0.5775, + "step": 2240 + }, + { + "epoch": 0.1, + "learning_rate": 0.0007925798723738049, + "loss": 0.6357, + "step": 2250 + }, + { + "epoch": 0.1, + "learning_rate": 0.0007925374716445122, + "loss": 0.4891, + "step": 2260 + }, + { + "epoch": 0.1, + "learning_rate": 0.0007924950709152198, + "loss": 0.5384, + "step": 2270 + }, + { + "epoch": 0.1, + "learning_rate": 0.0007924526701859273, + "loss": 0.5898, + "step": 2280 + }, + { + "epoch": 0.1, + "learning_rate": 0.0007924102694566346, + "loss": 0.5556, + "step": 2290 + }, + { + "epoch": 0.1, + "learning_rate": 0.0007923678687273421, + "loss": 0.7283, + "step": 2300 + }, + { + "epoch": 0.1, + "learning_rate": 0.0007923254679980497, + "loss": 0.508, + "step": 2310 + }, + { + "epoch": 0.1, + "learning_rate": 0.000792283067268757, + "loss": 0.4856, + "step": 2320 + }, + { + "epoch": 0.1, + "learning_rate": 0.0007922406665394645, + "loss": 0.5468, + "step": 2330 + }, + { + "epoch": 0.1, + "learning_rate": 0.000792198265810172, + "loss": 0.5032, + "step": 2340 + }, + { + "epoch": 0.1, + "learning_rate": 0.0007921558650808794, + "loss": 0.5627, + "step": 2350 + }, + { + "epoch": 0.1, + "learning_rate": 0.0007921134643515869, + "loss": 0.6195, + "step": 2360 + }, + { + "epoch": 0.1, + "learning_rate": 0.0007920710636222944, + "loss": 0.5602, + "step": 2370 + }, + { + "epoch": 0.1, + "learning_rate": 0.0007920286628930017, + "loss": 0.6204, + "step": 2380 + }, + { + "epoch": 0.1, + "learning_rate": 0.0007919862621637093, + "loss": 0.5698, + "step": 2390 + }, + { + "epoch": 0.1, + "learning_rate": 0.0007919438614344168, + "loss": 0.5808, + "step": 2400 + }, + { + "epoch": 0.1, + "learning_rate": 0.0007919014607051241, + "loss": 0.6035, + "step": 2410 + }, + { + "epoch": 0.1, + "learning_rate": 0.0007918590599758316, + "loss": 0.6023, + "step": 2420 + }, + { + "epoch": 0.1, + "learning_rate": 0.0007918166592465391, + "loss": 0.5229, + "step": 2430 + }, + { + "epoch": 0.1, + "learning_rate": 0.0007917742585172465, + "loss": 0.596, + "step": 2440 + }, + { + "epoch": 0.1, + "learning_rate": 0.000791731857787954, + "loss": 0.6354, + "step": 2450 + }, + { + "epoch": 0.1, + "learning_rate": 0.0007916894570586615, + "loss": 0.5765, + "step": 2460 + }, + { + "epoch": 0.1, + "learning_rate": 0.0007916470563293689, + "loss": 0.6014, + "step": 2470 + }, + { + "epoch": 0.1, + "learning_rate": 0.0007916046556000764, + "loss": 0.6596, + "step": 2480 + }, + { + "epoch": 0.11, + "learning_rate": 0.0007915622548707839, + "loss": 0.6122, + "step": 2490 + }, + { + "epoch": 0.11, + "learning_rate": 0.0007915198541414912, + "loss": 0.5848, + "step": 2500 + }, + { + "epoch": 0.11, + "learning_rate": 0.0007914774534121987, + "loss": 0.5931, + "step": 2510 + }, + { + "epoch": 0.11, + "learning_rate": 0.0007914350526829062, + "loss": 0.5904, + "step": 2520 + }, + { + "epoch": 0.11, + "learning_rate": 0.0007913926519536136, + "loss": 0.5554, + "step": 2530 + }, + { + "epoch": 0.11, + "learning_rate": 0.0007913502512243211, + "loss": 0.5614, + "step": 2540 + }, + { + "epoch": 0.11, + "learning_rate": 0.0007913078504950286, + "loss": 0.4867, + "step": 2550 + }, + { + "epoch": 0.11, + "learning_rate": 0.000791265449765736, + "loss": 0.5689, + "step": 2560 + }, + { + "epoch": 0.11, + "learning_rate": 0.0007912230490364435, + "loss": 0.544, + "step": 2570 + }, + { + "epoch": 0.11, + "learning_rate": 0.0007911806483071509, + "loss": 0.663, + "step": 2580 + }, + { + "epoch": 0.11, + "learning_rate": 0.0007911382475778583, + "loss": 0.559, + "step": 2590 + }, + { + "epoch": 0.11, + "learning_rate": 0.0007910958468485659, + "loss": 0.5476, + "step": 2600 + }, + { + "epoch": 0.11, + "learning_rate": 0.0007910534461192733, + "loss": 0.6038, + "step": 2610 + }, + { + "epoch": 0.11, + "learning_rate": 0.0007910110453899807, + "loss": 0.6968, + "step": 2620 + }, + { + "epoch": 0.11, + "learning_rate": 0.0007909686446606882, + "loss": 0.4827, + "step": 2630 + }, + { + "epoch": 0.11, + "learning_rate": 0.0007909262439313957, + "loss": 0.668, + "step": 2640 + }, + { + "epoch": 0.11, + "learning_rate": 0.0007908838432021031, + "loss": 0.5254, + "step": 2650 + }, + { + "epoch": 0.11, + "learning_rate": 0.0007908414424728106, + "loss": 0.6104, + "step": 2660 + }, + { + "epoch": 0.11, + "learning_rate": 0.000790799041743518, + "loss": 0.6409, + "step": 2670 + }, + { + "epoch": 0.11, + "learning_rate": 0.0007907566410142255, + "loss": 0.6134, + "step": 2680 + }, + { + "epoch": 0.11, + "learning_rate": 0.000790714240284933, + "loss": 0.7575, + "step": 2690 + }, + { + "epoch": 0.11, + "learning_rate": 0.0007906718395556403, + "loss": 0.5912, + "step": 2700 + }, + { + "epoch": 0.11, + "learning_rate": 0.0007906294388263478, + "loss": 0.5487, + "step": 2710 + }, + { + "epoch": 0.12, + "learning_rate": 0.0007905870380970554, + "loss": 0.6304, + "step": 2720 + }, + { + "epoch": 0.12, + "learning_rate": 0.0007905446373677627, + "loss": 0.5316, + "step": 2730 + }, + { + "epoch": 0.12, + "learning_rate": 0.0007905022366384702, + "loss": 0.6676, + "step": 2740 + }, + { + "epoch": 0.12, + "learning_rate": 0.0007904598359091777, + "loss": 0.5876, + "step": 2750 + }, + { + "epoch": 0.12, + "learning_rate": 0.0007904174351798851, + "loss": 0.602, + "step": 2760 + }, + { + "epoch": 0.12, + "learning_rate": 0.0007903750344505926, + "loss": 0.497, + "step": 2770 + }, + { + "epoch": 0.12, + "learning_rate": 0.0007903326337213001, + "loss": 0.6571, + "step": 2780 + }, + { + "epoch": 0.12, + "learning_rate": 0.0007902902329920074, + "loss": 0.5879, + "step": 2790 + }, + { + "epoch": 0.12, + "learning_rate": 0.000790247832262715, + "loss": 0.6539, + "step": 2800 + }, + { + "epoch": 0.12, + "learning_rate": 0.0007902054315334224, + "loss": 0.6027, + "step": 2810 + }, + { + "epoch": 0.12, + "learning_rate": 0.0007901630308041298, + "loss": 0.5757, + "step": 2820 + }, + { + "epoch": 0.12, + "learning_rate": 0.0007901206300748373, + "loss": 0.6731, + "step": 2830 + }, + { + "epoch": 0.12, + "learning_rate": 0.0007900782293455448, + "loss": 0.4728, + "step": 2840 + }, + { + "epoch": 0.12, + "learning_rate": 0.0007900358286162522, + "loss": 0.5114, + "step": 2850 + }, + { + "epoch": 0.12, + "learning_rate": 0.0007899934278869597, + "loss": 0.6053, + "step": 2860 + }, + { + "epoch": 0.12, + "learning_rate": 0.0007899510271576671, + "loss": 0.6404, + "step": 2870 + }, + { + "epoch": 0.12, + "learning_rate": 0.0007899086264283746, + "loss": 0.5954, + "step": 2880 + }, + { + "epoch": 0.12, + "learning_rate": 0.0007898662256990821, + "loss": 0.5363, + "step": 2890 + }, + { + "epoch": 0.12, + "learning_rate": 0.0007898238249697895, + "loss": 0.5591, + "step": 2900 + }, + { + "epoch": 0.12, + "learning_rate": 0.0007897814242404969, + "loss": 0.4779, + "step": 2910 + }, + { + "epoch": 0.12, + "learning_rate": 0.0007897390235112045, + "loss": 0.5933, + "step": 2920 + }, + { + "epoch": 0.12, + "learning_rate": 0.0007896966227819119, + "loss": 0.6104, + "step": 2930 + }, + { + "epoch": 0.12, + "learning_rate": 0.0007896542220526193, + "loss": 0.6662, + "step": 2940 + }, + { + "epoch": 0.12, + "learning_rate": 0.0007896118213233268, + "loss": 0.5963, + "step": 2950 + }, + { + "epoch": 0.13, + "learning_rate": 0.0007895694205940343, + "loss": 0.5619, + "step": 2960 + }, + { + "epoch": 0.13, + "learning_rate": 0.0007895270198647417, + "loss": 0.6362, + "step": 2970 + }, + { + "epoch": 0.13, + "learning_rate": 0.0007894846191354492, + "loss": 0.6881, + "step": 2980 + }, + { + "epoch": 0.13, + "learning_rate": 0.0007894422184061566, + "loss": 0.5541, + "step": 2990 + }, + { + "epoch": 0.13, + "learning_rate": 0.0007893998176768641, + "loss": 0.5718, + "step": 3000 + }, + { + "epoch": 0.13, + "learning_rate": 0.0007893574169475716, + "loss": 0.653, + "step": 3010 + }, + { + "epoch": 0.13, + "learning_rate": 0.000789315016218279, + "loss": 0.6233, + "step": 3020 + }, + { + "epoch": 0.13, + "learning_rate": 0.0007892726154889864, + "loss": 0.6744, + "step": 3030 + }, + { + "epoch": 0.13, + "learning_rate": 0.0007892302147596939, + "loss": 0.6081, + "step": 3040 + }, + { + "epoch": 0.13, + "learning_rate": 0.0007891878140304014, + "loss": 0.5098, + "step": 3050 + }, + { + "epoch": 0.13, + "learning_rate": 0.0007891454133011088, + "loss": 0.5829, + "step": 3060 + }, + { + "epoch": 0.13, + "learning_rate": 0.0007891030125718163, + "loss": 0.5751, + "step": 3070 + }, + { + "epoch": 0.13, + "learning_rate": 0.0007890606118425238, + "loss": 0.6398, + "step": 3080 + }, + { + "epoch": 0.13, + "learning_rate": 0.0007890182111132312, + "loss": 0.51, + "step": 3090 + }, + { + "epoch": 0.13, + "learning_rate": 0.0007889758103839387, + "loss": 0.5058, + "step": 3100 + }, + { + "epoch": 0.13, + "learning_rate": 0.000788933409654646, + "loss": 0.551, + "step": 3110 + }, + { + "epoch": 0.13, + "learning_rate": 0.0007888910089253535, + "loss": 0.6021, + "step": 3120 + }, + { + "epoch": 0.13, + "learning_rate": 0.000788848608196061, + "loss": 0.5232, + "step": 3130 + }, + { + "epoch": 0.13, + "learning_rate": 0.0007888062074667685, + "loss": 0.5788, + "step": 3140 + }, + { + "epoch": 0.13, + "learning_rate": 0.0007887638067374759, + "loss": 0.6711, + "step": 3150 + }, + { + "epoch": 0.13, + "learning_rate": 0.0007887214060081834, + "loss": 0.6358, + "step": 3160 + }, + { + "epoch": 0.13, + "learning_rate": 0.0007886790052788909, + "loss": 0.6255, + "step": 3170 + }, + { + "epoch": 0.13, + "learning_rate": 0.0007886366045495983, + "loss": 0.6984, + "step": 3180 + }, + { + "epoch": 0.13, + "learning_rate": 0.0007885942038203057, + "loss": 0.6655, + "step": 3190 + }, + { + "epoch": 0.14, + "learning_rate": 0.0007885518030910131, + "loss": 0.6883, + "step": 3200 + }, + { + "epoch": 0.14, + "learning_rate": 0.0007885094023617207, + "loss": 0.6157, + "step": 3210 + }, + { + "epoch": 0.14, + "learning_rate": 0.0007884670016324281, + "loss": 0.5707, + "step": 3220 + }, + { + "epoch": 0.14, + "learning_rate": 0.0007884246009031355, + "loss": 0.6344, + "step": 3230 + }, + { + "epoch": 0.14, + "learning_rate": 0.000788382200173843, + "loss": 0.5579, + "step": 3240 + }, + { + "epoch": 0.14, + "learning_rate": 0.0007883397994445505, + "loss": 0.6221, + "step": 3250 + }, + { + "epoch": 0.14, + "learning_rate": 0.0007882973987152579, + "loss": 0.6806, + "step": 3260 + }, + { + "epoch": 0.14, + "learning_rate": 0.0007882549979859654, + "loss": 0.6406, + "step": 3270 + }, + { + "epoch": 0.14, + "learning_rate": 0.0007882125972566729, + "loss": 0.6485, + "step": 3280 + }, + { + "epoch": 0.14, + "learning_rate": 0.0007881701965273803, + "loss": 0.6076, + "step": 3290 + }, + { + "epoch": 0.14, + "learning_rate": 0.0007881277957980878, + "loss": 0.7057, + "step": 3300 + }, + { + "epoch": 0.14, + "learning_rate": 0.0007880853950687952, + "loss": 0.6198, + "step": 3310 + }, + { + "epoch": 0.14, + "learning_rate": 0.0007880429943395026, + "loss": 0.694, + "step": 3320 + }, + { + "epoch": 0.14, + "learning_rate": 0.0007880005936102102, + "loss": 0.6509, + "step": 3330 + }, + { + "epoch": 0.14, + "learning_rate": 0.0007879581928809176, + "loss": 0.6122, + "step": 3340 + }, + { + "epoch": 0.14, + "learning_rate": 0.000787915792151625, + "loss": 0.6295, + "step": 3350 + }, + { + "epoch": 0.14, + "learning_rate": 0.0007878733914223325, + "loss": 0.7357, + "step": 3360 + }, + { + "epoch": 0.14, + "learning_rate": 0.00078783099069304, + "loss": 0.5658, + "step": 3370 + }, + { + "epoch": 0.14, + "learning_rate": 0.0007877885899637474, + "loss": 0.692, + "step": 3380 + }, + { + "epoch": 0.14, + "learning_rate": 0.0007877461892344549, + "loss": 0.6278, + "step": 3390 + }, + { + "epoch": 0.14, + "learning_rate": 0.0007877037885051623, + "loss": 0.6803, + "step": 3400 + }, + { + "epoch": 0.14, + "learning_rate": 0.0007876613877758698, + "loss": 0.5734, + "step": 3410 + }, + { + "epoch": 0.14, + "learning_rate": 0.0007876189870465772, + "loss": 0.6874, + "step": 3420 + }, + { + "epoch": 0.15, + "learning_rate": 0.0007875765863172847, + "loss": 0.5183, + "step": 3430 + }, + { + "epoch": 0.15, + "learning_rate": 0.0007875341855879921, + "loss": 0.5669, + "step": 3440 + }, + { + "epoch": 0.15, + "learning_rate": 0.0007874917848586996, + "loss": 0.6534, + "step": 3450 + }, + { + "epoch": 0.15, + "learning_rate": 0.0007874493841294071, + "loss": 0.6402, + "step": 3460 + }, + { + "epoch": 0.15, + "learning_rate": 0.0007874069834001145, + "loss": 0.5163, + "step": 3470 + }, + { + "epoch": 0.15, + "learning_rate": 0.0007873645826708219, + "loss": 0.6744, + "step": 3480 + }, + { + "epoch": 0.15, + "learning_rate": 0.0007873221819415295, + "loss": 0.6753, + "step": 3490 + }, + { + "epoch": 0.15, + "learning_rate": 0.0007872797812122369, + "loss": 0.5066, + "step": 3500 + }, + { + "epoch": 0.15, + "learning_rate": 0.0007872373804829443, + "loss": 0.5706, + "step": 3510 + }, + { + "epoch": 0.15, + "learning_rate": 0.0007871949797536518, + "loss": 0.6045, + "step": 3520 + }, + { + "epoch": 0.15, + "learning_rate": 0.0007871525790243593, + "loss": 0.5469, + "step": 3530 + }, + { + "epoch": 0.15, + "learning_rate": 0.0007871101782950667, + "loss": 0.5884, + "step": 3540 + }, + { + "epoch": 0.15, + "learning_rate": 0.0007870677775657742, + "loss": 0.4794, + "step": 3550 + }, + { + "epoch": 0.15, + "learning_rate": 0.0007870253768364816, + "loss": 0.5506, + "step": 3560 + }, + { + "epoch": 0.15, + "learning_rate": 0.0007869829761071891, + "loss": 0.6138, + "step": 3570 + }, + { + "epoch": 0.15, + "learning_rate": 0.0007869405753778966, + "loss": 0.7031, + "step": 3580 + }, + { + "epoch": 0.15, + "learning_rate": 0.000786898174648604, + "loss": 0.6951, + "step": 3590 + }, + { + "epoch": 0.15, + "learning_rate": 0.0007868557739193114, + "loss": 0.6586, + "step": 3600 + }, + { + "epoch": 0.15, + "learning_rate": 0.000786813373190019, + "loss": 0.5815, + "step": 3610 + }, + { + "epoch": 0.15, + "learning_rate": 0.0007867709724607264, + "loss": 0.7233, + "step": 3620 + }, + { + "epoch": 0.15, + "learning_rate": 0.0007867285717314338, + "loss": 0.6076, + "step": 3630 + }, + { + "epoch": 0.15, + "learning_rate": 0.0007866861710021412, + "loss": 0.5623, + "step": 3640 + }, + { + "epoch": 0.15, + "learning_rate": 0.0007866437702728487, + "loss": 0.6109, + "step": 3650 + }, + { + "epoch": 0.15, + "learning_rate": 0.0007866013695435562, + "loss": 0.6527, + "step": 3660 + }, + { + "epoch": 0.16, + "learning_rate": 0.0007865589688142636, + "loss": 0.5235, + "step": 3670 + }, + { + "epoch": 0.16, + "learning_rate": 0.0007865165680849711, + "loss": 0.5523, + "step": 3680 + }, + { + "epoch": 0.16, + "learning_rate": 0.0007864741673556786, + "loss": 0.6074, + "step": 3690 + }, + { + "epoch": 0.16, + "learning_rate": 0.000786431766626386, + "loss": 0.6749, + "step": 3700 + }, + { + "epoch": 0.16, + "learning_rate": 0.0007863893658970935, + "loss": 0.584, + "step": 3710 + }, + { + "epoch": 0.16, + "learning_rate": 0.0007863469651678009, + "loss": 0.5507, + "step": 3720 + }, + { + "epoch": 0.16, + "learning_rate": 0.0007863045644385083, + "loss": 0.5906, + "step": 3730 + }, + { + "epoch": 0.16, + "learning_rate": 0.0007862621637092158, + "loss": 0.5458, + "step": 3740 + }, + { + "epoch": 0.16, + "learning_rate": 0.0007862197629799233, + "loss": 0.6944, + "step": 3750 + }, + { + "epoch": 0.16, + "learning_rate": 0.0007861773622506307, + "loss": 0.5771, + "step": 3760 + }, + { + "epoch": 0.16, + "learning_rate": 0.0007861349615213382, + "loss": 0.5659, + "step": 3770 + }, + { + "epoch": 0.16, + "learning_rate": 0.0007860925607920457, + "loss": 0.5658, + "step": 3780 + }, + { + "epoch": 0.16, + "learning_rate": 0.0007860501600627531, + "loss": 0.7003, + "step": 3790 + }, + { + "epoch": 0.16, + "learning_rate": 0.0007860077593334605, + "loss": 0.5329, + "step": 3800 + }, + { + "epoch": 0.16, + "learning_rate": 0.0007859653586041681, + "loss": 0.5499, + "step": 3810 + }, + { + "epoch": 0.16, + "learning_rate": 0.0007859229578748755, + "loss": 0.6325, + "step": 3820 + }, + { + "epoch": 0.16, + "learning_rate": 0.0007858805571455829, + "loss": 0.6094, + "step": 3830 + }, + { + "epoch": 0.16, + "learning_rate": 0.0007858381564162904, + "loss": 0.7074, + "step": 3840 + }, + { + "epoch": 0.16, + "learning_rate": 0.0007857957556869978, + "loss": 0.6479, + "step": 3850 + }, + { + "epoch": 0.16, + "learning_rate": 0.0007857533549577053, + "loss": 0.6194, + "step": 3860 + }, + { + "epoch": 0.16, + "learning_rate": 0.0007857109542284128, + "loss": 0.5824, + "step": 3870 + }, + { + "epoch": 0.16, + "learning_rate": 0.0007856685534991202, + "loss": 0.7079, + "step": 3880 + }, + { + "epoch": 0.16, + "learning_rate": 0.0007856261527698277, + "loss": 0.5648, + "step": 3890 + }, + { + "epoch": 0.16, + "learning_rate": 0.0007855837520405352, + "loss": 0.7314, + "step": 3900 + }, + { + "epoch": 0.17, + "learning_rate": 0.0007855413513112426, + "loss": 0.6453, + "step": 3910 + }, + { + "epoch": 0.17, + "learning_rate": 0.00078549895058195, + "loss": 0.555, + "step": 3920 + }, + { + "epoch": 0.17, + "learning_rate": 0.0007854565498526575, + "loss": 0.4925, + "step": 3930 + }, + { + "epoch": 0.17, + "learning_rate": 0.000785414149123365, + "loss": 0.5157, + "step": 3940 + }, + { + "epoch": 0.17, + "learning_rate": 0.0007853717483940724, + "loss": 0.5987, + "step": 3950 + }, + { + "epoch": 0.17, + "learning_rate": 0.0007853293476647799, + "loss": 0.7067, + "step": 3960 + }, + { + "epoch": 0.17, + "learning_rate": 0.0007852869469354873, + "loss": 0.436, + "step": 3970 + }, + { + "epoch": 0.17, + "learning_rate": 0.0007852445462061948, + "loss": 0.643, + "step": 3980 + }, + { + "epoch": 0.17, + "learning_rate": 0.0007852021454769023, + "loss": 0.6058, + "step": 3990 + }, + { + "epoch": 0.17, + "learning_rate": 0.0007851597447476097, + "loss": 0.6332, + "step": 4000 + }, + { + "epoch": 0.17, + "learning_rate": 0.0007851173440183171, + "loss": 0.5234, + "step": 4010 + }, + { + "epoch": 0.17, + "learning_rate": 0.0007850749432890247, + "loss": 0.4634, + "step": 4020 + }, + { + "epoch": 0.17, + "learning_rate": 0.0007850325425597321, + "loss": 0.5965, + "step": 4030 + }, + { + "epoch": 0.17, + "learning_rate": 0.0007849901418304395, + "loss": 0.6243, + "step": 4040 + }, + { + "epoch": 0.17, + "learning_rate": 0.000784947741101147, + "loss": 0.6651, + "step": 4050 + }, + { + "epoch": 0.17, + "learning_rate": 0.0007849053403718544, + "loss": 0.6369, + "step": 4060 + }, + { + "epoch": 0.17, + "learning_rate": 0.0007848629396425619, + "loss": 0.5679, + "step": 4070 + }, + { + "epoch": 0.17, + "learning_rate": 0.0007848205389132694, + "loss": 0.6032, + "step": 4080 + }, + { + "epoch": 0.17, + "learning_rate": 0.0007847781381839767, + "loss": 0.5698, + "step": 4090 + }, + { + "epoch": 0.17, + "learning_rate": 0.0007847357374546843, + "loss": 0.6337, + "step": 4100 + }, + { + "epoch": 0.17, + "learning_rate": 0.0007846933367253918, + "loss": 0.6551, + "step": 4110 + }, + { + "epoch": 0.17, + "learning_rate": 0.0007846509359960991, + "loss": 0.573, + "step": 4120 + }, + { + "epoch": 0.17, + "learning_rate": 0.0007846085352668066, + "loss": 0.6287, + "step": 4130 + }, + { + "epoch": 0.18, + "learning_rate": 0.0007845661345375142, + "loss": 0.6004, + "step": 4140 + }, + { + "epoch": 0.18, + "learning_rate": 0.0007845237338082215, + "loss": 0.5903, + "step": 4150 + }, + { + "epoch": 0.18, + "learning_rate": 0.000784481333078929, + "loss": 0.5827, + "step": 4160 + }, + { + "epoch": 0.18, + "learning_rate": 0.0007844389323496364, + "loss": 0.5596, + "step": 4170 + }, + { + "epoch": 0.18, + "learning_rate": 0.0007843965316203439, + "loss": 0.5913, + "step": 4180 + }, + { + "epoch": 0.18, + "learning_rate": 0.0007843541308910514, + "loss": 0.5476, + "step": 4190 + }, + { + "epoch": 0.18, + "learning_rate": 0.0007843117301617588, + "loss": 0.5515, + "step": 4200 + }, + { + "epoch": 0.18, + "learning_rate": 0.0007842693294324662, + "loss": 0.5908, + "step": 4210 + }, + { + "epoch": 0.18, + "learning_rate": 0.0007842269287031738, + "loss": 0.5775, + "step": 4220 + }, + { + "epoch": 0.18, + "learning_rate": 0.0007841845279738812, + "loss": 0.6579, + "step": 4230 + }, + { + "epoch": 0.18, + "learning_rate": 0.0007841421272445886, + "loss": 0.6182, + "step": 4240 + }, + { + "epoch": 0.18, + "learning_rate": 0.0007840997265152961, + "loss": 0.6763, + "step": 4250 + }, + { + "epoch": 0.18, + "learning_rate": 0.0007840573257860036, + "loss": 0.5892, + "step": 4260 + }, + { + "epoch": 0.18, + "learning_rate": 0.000784014925056711, + "loss": 0.7075, + "step": 4270 + }, + { + "epoch": 0.18, + "learning_rate": 0.0007839725243274185, + "loss": 0.5989, + "step": 4280 + }, + { + "epoch": 0.18, + "learning_rate": 0.0007839301235981259, + "loss": 0.7097, + "step": 4290 + }, + { + "epoch": 0.18, + "learning_rate": 0.0007838877228688334, + "loss": 0.5612, + "step": 4300 + }, + { + "epoch": 0.18, + "learning_rate": 0.0007838453221395409, + "loss": 0.6322, + "step": 4310 + }, + { + "epoch": 0.18, + "learning_rate": 0.0007838029214102483, + "loss": 0.5991, + "step": 4320 + }, + { + "epoch": 0.18, + "learning_rate": 0.0007837605206809557, + "loss": 0.5368, + "step": 4330 + }, + { + "epoch": 0.18, + "learning_rate": 0.0007837181199516633, + "loss": 0.6674, + "step": 4340 + }, + { + "epoch": 0.18, + "learning_rate": 0.0007836757192223706, + "loss": 0.5854, + "step": 4350 + }, + { + "epoch": 0.18, + "learning_rate": 0.0007836333184930781, + "loss": 0.644, + "step": 4360 + }, + { + "epoch": 0.18, + "learning_rate": 0.0007835909177637856, + "loss": 0.6525, + "step": 4370 + }, + { + "epoch": 0.19, + "learning_rate": 0.000783548517034493, + "loss": 0.5781, + "step": 4380 + }, + { + "epoch": 0.19, + "learning_rate": 0.0007835061163052005, + "loss": 0.6052, + "step": 4390 + }, + { + "epoch": 0.19, + "learning_rate": 0.000783463715575908, + "loss": 0.5652, + "step": 4400 + }, + { + "epoch": 0.19, + "learning_rate": 0.0007834213148466153, + "loss": 0.6408, + "step": 4410 + }, + { + "epoch": 0.19, + "learning_rate": 0.0007833789141173229, + "loss": 0.6128, + "step": 4420 + }, + { + "epoch": 0.19, + "learning_rate": 0.0007833365133880304, + "loss": 0.5614, + "step": 4430 + }, + { + "epoch": 0.19, + "learning_rate": 0.0007832941126587377, + "loss": 0.5681, + "step": 4440 + }, + { + "epoch": 0.19, + "learning_rate": 0.0007832517119294452, + "loss": 0.6266, + "step": 4450 + }, + { + "epoch": 0.19, + "learning_rate": 0.0007832093112001527, + "loss": 0.6191, + "step": 4460 + }, + { + "epoch": 0.19, + "learning_rate": 0.0007831669104708601, + "loss": 0.5644, + "step": 4470 + }, + { + "epoch": 0.19, + "learning_rate": 0.0007831245097415676, + "loss": 0.6388, + "step": 4480 + }, + { + "epoch": 0.19, + "learning_rate": 0.0007830821090122751, + "loss": 0.574, + "step": 4490 + }, + { + "epoch": 0.19, + "learning_rate": 0.0007830397082829825, + "loss": 0.616, + "step": 4500 + }, + { + "epoch": 0.19, + "learning_rate": 0.00078299730755369, + "loss": 0.5824, + "step": 4510 + }, + { + "epoch": 0.19, + "learning_rate": 0.0007829549068243975, + "loss": 0.567, + "step": 4520 + }, + { + "epoch": 0.19, + "learning_rate": 0.0007829125060951048, + "loss": 0.5877, + "step": 4530 + }, + { + "epoch": 0.19, + "learning_rate": 0.0007828701053658123, + "loss": 0.6796, + "step": 4540 + }, + { + "epoch": 0.19, + "learning_rate": 0.0007828277046365199, + "loss": 0.6504, + "step": 4550 + }, + { + "epoch": 0.19, + "learning_rate": 0.0007827853039072272, + "loss": 0.5854, + "step": 4560 + }, + { + "epoch": 0.19, + "learning_rate": 0.0007827429031779347, + "loss": 0.6635, + "step": 4570 + }, + { + "epoch": 0.19, + "learning_rate": 0.0007827005024486422, + "loss": 0.6903, + "step": 4580 + }, + { + "epoch": 0.19, + "learning_rate": 0.0007826581017193496, + "loss": 0.7791, + "step": 4590 + }, + { + "epoch": 0.19, + "learning_rate": 0.0007826157009900571, + "loss": 0.6974, + "step": 4600 + }, + { + "epoch": 0.19, + "learning_rate": 0.0007825733002607646, + "loss": 0.5247, + "step": 4610 + }, + { + "epoch": 0.2, + "learning_rate": 0.0007825308995314719, + "loss": 0.7485, + "step": 4620 + }, + { + "epoch": 0.2, + "learning_rate": 0.0007824884988021795, + "loss": 0.6153, + "step": 4630 + }, + { + "epoch": 0.2, + "learning_rate": 0.000782446098072887, + "loss": 0.5295, + "step": 4640 + }, + { + "epoch": 0.2, + "learning_rate": 0.0007824036973435943, + "loss": 0.5912, + "step": 4650 + }, + { + "epoch": 0.2, + "learning_rate": 0.0007823612966143018, + "loss": 0.6401, + "step": 4660 + }, + { + "epoch": 0.2, + "learning_rate": 0.0007823188958850092, + "loss": 0.6331, + "step": 4670 + }, + { + "epoch": 0.2, + "learning_rate": 0.0007822764951557167, + "loss": 0.6078, + "step": 4680 + }, + { + "epoch": 0.2, + "learning_rate": 0.0007822340944264242, + "loss": 0.6459, + "step": 4690 + }, + { + "epoch": 0.2, + "learning_rate": 0.0007821916936971315, + "loss": 0.6144, + "step": 4700 + }, + { + "epoch": 0.2, + "learning_rate": 0.0007821492929678391, + "loss": 0.6134, + "step": 4710 + }, + { + "epoch": 0.2, + "learning_rate": 0.0007821068922385466, + "loss": 0.5599, + "step": 4720 + }, + { + "epoch": 0.2, + "learning_rate": 0.0007820644915092539, + "loss": 0.5863, + "step": 4730 + }, + { + "epoch": 0.2, + "learning_rate": 0.0007820220907799614, + "loss": 0.5725, + "step": 4740 + }, + { + "epoch": 0.2, + "learning_rate": 0.000781979690050669, + "loss": 0.5583, + "step": 4750 + }, + { + "epoch": 0.2, + "learning_rate": 0.0007819372893213763, + "loss": 0.7017, + "step": 4760 + }, + { + "epoch": 0.2, + "learning_rate": 0.0007818948885920838, + "loss": 0.4631, + "step": 4770 + }, + { + "epoch": 0.2, + "learning_rate": 0.0007818524878627913, + "loss": 0.6202, + "step": 4780 + }, + { + "epoch": 0.2, + "learning_rate": 0.0007818100871334987, + "loss": 0.6626, + "step": 4790 + }, + { + "epoch": 0.2, + "learning_rate": 0.0007817676864042062, + "loss": 0.595, + "step": 4800 + }, + { + "epoch": 0.2, + "learning_rate": 0.0007817252856749137, + "loss": 0.5791, + "step": 4810 + }, + { + "epoch": 0.2, + "learning_rate": 0.000781682884945621, + "loss": 0.5932, + "step": 4820 + }, + { + "epoch": 0.2, + "learning_rate": 0.0007816404842163286, + "loss": 0.5971, + "step": 4830 + }, + { + "epoch": 0.2, + "learning_rate": 0.0007815980834870361, + "loss": 0.6643, + "step": 4840 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007815556827577434, + "loss": 0.6145, + "step": 4850 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007815132820284509, + "loss": 0.6516, + "step": 4860 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007814708812991585, + "loss": 0.6677, + "step": 4870 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007814284805698658, + "loss": 0.6253, + "step": 4880 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007813860798405733, + "loss": 0.6701, + "step": 4890 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007813436791112808, + "loss": 0.638, + "step": 4900 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007813012783819882, + "loss": 0.5344, + "step": 4910 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007812588776526957, + "loss": 0.6352, + "step": 4920 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007812164769234032, + "loss": 0.6374, + "step": 4930 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007811740761941105, + "loss": 0.563, + "step": 4940 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007811316754648181, + "loss": 0.6436, + "step": 4950 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007810892747355255, + "loss": 0.5727, + "step": 4960 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007810468740062329, + "loss": 0.6346, + "step": 4970 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007810044732769404, + "loss": 0.5473, + "step": 4980 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007809620725476479, + "loss": 0.5893, + "step": 4990 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007809196718183553, + "loss": 0.5588, + "step": 5000 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007808772710890628, + "loss": 0.5835, + "step": 5010 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007808348703597701, + "loss": 0.6042, + "step": 5020 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007807924696304777, + "loss": 0.5172, + "step": 5030 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007807500689011852, + "loss": 0.6127, + "step": 5040 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007807076681718925, + "loss": 0.6139, + "step": 5050 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007806652674426, + "loss": 0.6042, + "step": 5060 + }, + { + "epoch": 0.21, + "learning_rate": 0.0007806228667133075, + "loss": 0.5899, + "step": 5070 + }, + { + "epoch": 0.21, + "learning_rate": 0.000780580465984015, + "loss": 0.5925, + "step": 5080 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007805380652547224, + "loss": 0.6487, + "step": 5090 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007804956645254299, + "loss": 0.6454, + "step": 5100 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007804532637961373, + "loss": 0.4416, + "step": 5110 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007804108630668448, + "loss": 0.5783, + "step": 5120 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007803684623375523, + "loss": 0.6036, + "step": 5130 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007803260616082596, + "loss": 0.6761, + "step": 5140 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007802836608789671, + "loss": 0.552, + "step": 5150 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007802412601496747, + "loss": 0.4905, + "step": 5160 + }, + { + "epoch": 0.22, + "learning_rate": 0.000780198859420382, + "loss": 0.5744, + "step": 5170 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007801564586910895, + "loss": 0.638, + "step": 5180 + }, + { + "epoch": 0.22, + "learning_rate": 0.000780114057961797, + "loss": 0.6112, + "step": 5190 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007800716572325044, + "loss": 0.5127, + "step": 5200 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007800292565032119, + "loss": 0.6041, + "step": 5210 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007799868557739194, + "loss": 0.7574, + "step": 5220 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007799444550446267, + "loss": 0.6615, + "step": 5230 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007799020543153343, + "loss": 0.6202, + "step": 5240 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007798596535860418, + "loss": 0.5936, + "step": 5250 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007798172528567491, + "loss": 0.535, + "step": 5260 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007797748521274566, + "loss": 0.695, + "step": 5270 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007797324513981641, + "loss": 0.6237, + "step": 5280 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007796900506688715, + "loss": 0.673, + "step": 5290 + }, + { + "epoch": 0.22, + "learning_rate": 0.000779647649939579, + "loss": 0.5976, + "step": 5300 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007796052492102865, + "loss": 0.6039, + "step": 5310 + }, + { + "epoch": 0.22, + "learning_rate": 0.0007795628484809939, + "loss": 0.5627, + "step": 5320 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007795204477517014, + "loss": 0.5844, + "step": 5330 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007794780470224088, + "loss": 0.7201, + "step": 5340 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007794356462931162, + "loss": 0.5847, + "step": 5350 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007793932455638238, + "loss": 0.6687, + "step": 5360 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007793508448345312, + "loss": 0.635, + "step": 5370 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007793084441052386, + "loss": 0.4912, + "step": 5380 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007792660433759461, + "loss": 0.6875, + "step": 5390 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007792236426466536, + "loss": 0.5787, + "step": 5400 + }, + { + "epoch": 0.23, + "learning_rate": 0.000779181241917361, + "loss": 0.578, + "step": 5410 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007791388411880685, + "loss": 0.5157, + "step": 5420 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007790964404587759, + "loss": 0.5754, + "step": 5430 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007790540397294834, + "loss": 0.6248, + "step": 5440 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007790116390001909, + "loss": 0.6231, + "step": 5450 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007789692382708983, + "loss": 0.6599, + "step": 5460 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007789268375416057, + "loss": 0.601, + "step": 5470 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007788844368123133, + "loss": 0.542, + "step": 5480 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007788420360830207, + "loss": 0.5781, + "step": 5490 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007787996353537281, + "loss": 0.5232, + "step": 5500 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007787572346244356, + "loss": 0.6865, + "step": 5510 + }, + { + "epoch": 0.23, + "learning_rate": 0.000778714833895143, + "loss": 0.664, + "step": 5520 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007786724331658505, + "loss": 0.6189, + "step": 5530 + }, + { + "epoch": 0.23, + "learning_rate": 0.000778630032436558, + "loss": 0.6569, + "step": 5540 + }, + { + "epoch": 0.23, + "learning_rate": 0.0007785876317072653, + "loss": 0.5482, + "step": 5550 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007785452309779729, + "loss": 0.6417, + "step": 5560 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007785028302486803, + "loss": 0.6044, + "step": 5570 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007784604295193877, + "loss": 0.5786, + "step": 5580 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007784180287900952, + "loss": 0.6359, + "step": 5590 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007783756280608027, + "loss": 0.5617, + "step": 5600 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007783332273315101, + "loss": 0.7343, + "step": 5610 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007782908266022176, + "loss": 0.5628, + "step": 5620 + }, + { + "epoch": 0.24, + "learning_rate": 0.000778248425872925, + "loss": 0.5783, + "step": 5630 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007782060251436325, + "loss": 0.6466, + "step": 5640 + }, + { + "epoch": 0.24, + "learning_rate": 0.00077816362441434, + "loss": 0.4999, + "step": 5650 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007781212236850474, + "loss": 0.6204, + "step": 5660 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007780788229557548, + "loss": 0.5278, + "step": 5670 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007780364222264623, + "loss": 0.5897, + "step": 5680 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007779940214971698, + "loss": 0.5271, + "step": 5690 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007779516207678772, + "loss": 0.6375, + "step": 5700 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007779092200385847, + "loss": 0.658, + "step": 5710 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007778668193092922, + "loss": 0.5876, + "step": 5720 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007778244185799996, + "loss": 0.6133, + "step": 5730 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007777820178507071, + "loss": 0.5796, + "step": 5740 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007777396171214145, + "loss": 0.5593, + "step": 5750 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007776972163921219, + "loss": 0.5959, + "step": 5760 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007776548156628295, + "loss": 0.5671, + "step": 5770 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007776124149335369, + "loss": 0.5223, + "step": 5780 + }, + { + "epoch": 0.24, + "learning_rate": 0.0007775700142042443, + "loss": 0.7194, + "step": 5790 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007775276134749518, + "loss": 0.6266, + "step": 5800 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007774852127456593, + "loss": 0.5637, + "step": 5810 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007774428120163667, + "loss": 0.5577, + "step": 5820 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007774004112870742, + "loss": 0.5734, + "step": 5830 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007773580105577817, + "loss": 0.57, + "step": 5840 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007773156098284891, + "loss": 0.6176, + "step": 5850 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007772732090991966, + "loss": 0.6607, + "step": 5860 + }, + { + "epoch": 0.25, + "learning_rate": 0.000777230808369904, + "loss": 0.5944, + "step": 5870 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007771884076406114, + "loss": 0.7022, + "step": 5880 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007771460069113189, + "loss": 0.6541, + "step": 5890 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007771036061820264, + "loss": 0.5973, + "step": 5900 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007770612054527338, + "loss": 0.5962, + "step": 5910 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007770188047234413, + "loss": 0.6234, + "step": 5920 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007769764039941488, + "loss": 0.6357, + "step": 5930 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007769340032648562, + "loss": 0.5536, + "step": 5940 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007768916025355636, + "loss": 0.6613, + "step": 5950 + }, + { + "epoch": 0.25, + "learning_rate": 0.000776849201806271, + "loss": 0.6475, + "step": 5960 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007768068010769786, + "loss": 0.596, + "step": 5970 + }, + { + "epoch": 0.25, + "learning_rate": 0.000776764400347686, + "loss": 0.5162, + "step": 5980 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007767219996183935, + "loss": 0.6393, + "step": 5990 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007766795988891009, + "loss": 0.5547, + "step": 6000 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007766371981598084, + "loss": 0.6589, + "step": 6010 + }, + { + "epoch": 0.25, + "learning_rate": 0.0007765947974305159, + "loss": 0.5306, + "step": 6020 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007765523967012233, + "loss": 0.6374, + "step": 6030 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007765099959719307, + "loss": 0.5988, + "step": 6040 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007764675952426383, + "loss": 0.5984, + "step": 6050 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007764251945133457, + "loss": 0.6709, + "step": 6060 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007763827937840531, + "loss": 0.5298, + "step": 6070 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007763403930547605, + "loss": 0.6266, + "step": 6080 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007762979923254681, + "loss": 0.5356, + "step": 6090 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007762555915961755, + "loss": 0.6789, + "step": 6100 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007762131908668829, + "loss": 0.5834, + "step": 6110 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007761707901375904, + "loss": 0.7174, + "step": 6120 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007761283894082979, + "loss": 0.6057, + "step": 6130 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007760859886790053, + "loss": 0.5919, + "step": 6140 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007760435879497128, + "loss": 0.547, + "step": 6150 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007760011872204202, + "loss": 0.6068, + "step": 6160 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007759587864911277, + "loss": 0.593, + "step": 6170 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007759163857618352, + "loss": 0.6016, + "step": 6180 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007758739850325426, + "loss": 0.5751, + "step": 6190 + }, + { + "epoch": 0.26, + "learning_rate": 0.00077583158430325, + "loss": 0.6359, + "step": 6200 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007757891835739575, + "loss": 0.5639, + "step": 6210 + }, + { + "epoch": 0.26, + "learning_rate": 0.000775746782844665, + "loss": 0.5254, + "step": 6220 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007757043821153724, + "loss": 0.6913, + "step": 6230 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007756619813860798, + "loss": 0.5439, + "step": 6240 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007756195806567874, + "loss": 0.6438, + "step": 6250 + }, + { + "epoch": 0.26, + "learning_rate": 0.0007755771799274948, + "loss": 0.6363, + "step": 6260 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007755347791982022, + "loss": 0.633, + "step": 6270 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007754923784689097, + "loss": 0.5649, + "step": 6280 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007754499777396172, + "loss": 0.6257, + "step": 6290 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007754075770103246, + "loss": 0.5736, + "step": 6300 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007753651762810321, + "loss": 0.5462, + "step": 6310 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007753227755517395, + "loss": 0.6175, + "step": 6320 + }, + { + "epoch": 0.27, + "learning_rate": 0.000775280374822447, + "loss": 0.6101, + "step": 6330 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007752379740931545, + "loss": 0.5474, + "step": 6340 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007751955733638619, + "loss": 0.7151, + "step": 6350 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007751531726345693, + "loss": 0.6211, + "step": 6360 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007751107719052769, + "loss": 0.574, + "step": 6370 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007750683711759843, + "loss": 0.6345, + "step": 6380 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007750259704466917, + "loss": 0.6718, + "step": 6390 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007749835697173992, + "loss": 0.5703, + "step": 6400 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007749411689881066, + "loss": 0.7077, + "step": 6410 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007748987682588141, + "loss": 0.5817, + "step": 6420 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007748563675295216, + "loss": 0.6521, + "step": 6430 + }, + { + "epoch": 0.27, + "learning_rate": 0.000774813966800229, + "loss": 0.6118, + "step": 6440 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007747715660709365, + "loss": 0.5046, + "step": 6450 + }, + { + "epoch": 0.27, + "learning_rate": 0.000774729165341644, + "loss": 0.6397, + "step": 6460 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007746867646123514, + "loss": 0.6057, + "step": 6470 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007746443638830588, + "loss": 0.5903, + "step": 6480 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007746019631537662, + "loss": 0.5182, + "step": 6490 + }, + { + "epoch": 0.27, + "learning_rate": 0.0007745595624244737, + "loss": 0.6889, + "step": 6500 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007745171616951812, + "loss": 0.5525, + "step": 6510 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007744747609658886, + "loss": 0.5793, + "step": 6520 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007744323602365961, + "loss": 0.4952, + "step": 6530 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007743899595073036, + "loss": 0.5174, + "step": 6540 + }, + { + "epoch": 0.28, + "learning_rate": 0.000774347558778011, + "loss": 0.5308, + "step": 6550 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007743051580487184, + "loss": 0.5757, + "step": 6560 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007742627573194259, + "loss": 0.6377, + "step": 6570 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007742203565901334, + "loss": 0.5611, + "step": 6580 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007741779558608408, + "loss": 0.5419, + "step": 6590 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007741355551315483, + "loss": 0.593, + "step": 6600 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007740931544022557, + "loss": 0.6184, + "step": 6610 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007740507536729632, + "loss": 0.6429, + "step": 6620 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007740083529436707, + "loss": 0.5284, + "step": 6630 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007739659522143781, + "loss": 0.6045, + "step": 6640 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007739235514850855, + "loss": 0.601, + "step": 6650 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007738811507557931, + "loss": 0.6243, + "step": 6660 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007738387500265005, + "loss": 0.6177, + "step": 6670 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007737963492972079, + "loss": 0.5173, + "step": 6680 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007737539485679154, + "loss": 0.6091, + "step": 6690 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007737115478386229, + "loss": 0.6482, + "step": 6700 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007736691471093303, + "loss": 0.7593, + "step": 6710 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007736267463800378, + "loss": 0.6338, + "step": 6720 + }, + { + "epoch": 0.28, + "learning_rate": 0.0007735843456507452, + "loss": 0.6173, + "step": 6730 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007735419449214527, + "loss": 0.5905, + "step": 6740 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007734995441921602, + "loss": 0.5481, + "step": 6750 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007734571434628676, + "loss": 0.589, + "step": 6760 + }, + { + "epoch": 0.29, + "learning_rate": 0.000773414742733575, + "loss": 0.5981, + "step": 6770 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007733723420042826, + "loss": 0.6859, + "step": 6780 + }, + { + "epoch": 0.29, + "learning_rate": 0.00077332994127499, + "loss": 0.5649, + "step": 6790 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007732875405456974, + "loss": 0.6478, + "step": 6800 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007732451398164049, + "loss": 0.5209, + "step": 6810 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007732027390871123, + "loss": 0.6234, + "step": 6820 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007731603383578198, + "loss": 0.6824, + "step": 6830 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007731179376285273, + "loss": 0.5669, + "step": 6840 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007730755368992346, + "loss": 0.6288, + "step": 6850 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007730331361699422, + "loss": 0.5869, + "step": 6860 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007729907354406497, + "loss": 0.5, + "step": 6870 + }, + { + "epoch": 0.29, + "learning_rate": 0.000772948334711357, + "loss": 0.6136, + "step": 6880 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007729059339820645, + "loss": 0.6823, + "step": 6890 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007728635332527721, + "loss": 0.5519, + "step": 6900 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007728211325234794, + "loss": 0.5885, + "step": 6910 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007727787317941869, + "loss": 0.6006, + "step": 6920 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007727363310648944, + "loss": 0.7368, + "step": 6930 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007726939303356018, + "loss": 0.5831, + "step": 6940 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007726515296063093, + "loss": 0.562, + "step": 6950 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007726091288770168, + "loss": 0.6877, + "step": 6960 + }, + { + "epoch": 0.29, + "learning_rate": 0.0007725667281477241, + "loss": 0.6029, + "step": 6970 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007725243274184317, + "loss": 0.56, + "step": 6980 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007724819266891392, + "loss": 0.6132, + "step": 6990 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007724395259598465, + "loss": 0.5266, + "step": 7000 + }, + { + "epoch": 0.3, + "learning_rate": 0.000772397125230554, + "loss": 0.6888, + "step": 7010 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007723547245012614, + "loss": 0.645, + "step": 7020 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007723123237719689, + "loss": 0.6033, + "step": 7030 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007722699230426764, + "loss": 0.5054, + "step": 7040 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007722275223133838, + "loss": 0.5736, + "step": 7050 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007721851215840913, + "loss": 0.6939, + "step": 7060 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007721427208547988, + "loss": 0.6571, + "step": 7070 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007721003201255062, + "loss": 0.6274, + "step": 7080 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007720579193962136, + "loss": 0.5362, + "step": 7090 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007720155186669211, + "loss": 0.519, + "step": 7100 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007719731179376285, + "loss": 0.6264, + "step": 7110 + }, + { + "epoch": 0.3, + "learning_rate": 0.000771930717208336, + "loss": 0.6095, + "step": 7120 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007718883164790435, + "loss": 0.5955, + "step": 7130 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007718459157497509, + "loss": 0.6268, + "step": 7140 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007718035150204584, + "loss": 0.5433, + "step": 7150 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007717611142911659, + "loss": 0.691, + "step": 7160 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007717187135618732, + "loss": 0.5381, + "step": 7170 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007716763128325807, + "loss": 0.626, + "step": 7180 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007716339121032883, + "loss": 0.6366, + "step": 7190 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007715915113739956, + "loss": 0.636, + "step": 7200 + }, + { + "epoch": 0.3, + "learning_rate": 0.0007715491106447031, + "loss": 0.564, + "step": 7210 + }, + { + "epoch": 0.31, + "learning_rate": 0.0007715067099154106, + "loss": 0.7273, + "step": 7220 + }, + { + "epoch": 0.31, + "learning_rate": 0.000771464309186118, + "loss": 0.7586, + "step": 7230 + }, + { + "epoch": 0.31, + "learning_rate": 0.0007714219084568255, + "loss": 0.6252, + "step": 7240 + }, + { + "epoch": 0.31, + "learning_rate": 0.000771379507727533, + "loss": 0.6642, + "step": 7250 + }, + { + "epoch": 0.31, + "learning_rate": 0.0007713371069982403, + "loss": 0.5657, + "step": 7260 + }, + { + "epoch": 0.31, + "learning_rate": 0.0007712947062689479, + "loss": 0.5254, + "step": 7270 + }, + { + "epoch": 0.31, + "learning_rate": 0.0007712523055396554, + "loss": 0.5148, + "step": 7280 + }, + { + "epoch": 0.31, + "learning_rate": 0.0007712099048103627, + "loss": 0.689, + "step": 7290 + }, + { + "epoch": 0.31, + "learning_rate": 0.0007711675040810702, + "loss": 0.5358, + "step": 7300 + }, + { + "epoch": 0.31, + "learning_rate": 0.0007711251033517778, + "loss": 0.4977, + "step": 7310 + }, + { + "epoch": 0.31, + "learning_rate": 0.0007710827026224851, + "loss": 0.5207, + "step": 7320 + }, + { + "epoch": 0.31, + "learning_rate": 0.0007710403018931926, + "loss": 0.6051, + "step": 7330 + }, + { + "epoch": 0.31, + "learning_rate": 0.0007709979011639001, + "loss": 0.6496, + "step": 7340 + }, + { + "epoch": 0.31, + "learning_rate": 0.0007709555004346075, + "loss": 0.809, + "step": 7350 + }, + { + "epoch": 0.31, + "learning_rate": 0.000770913099705315, + "loss": 0.6206, + "step": 7360 + }, + { + "epoch": 0.31, + "learning_rate": 0.0007708706989760225, + "loss": 0.5263, + "step": 7370 + }, + { + "epoch": 0.31, + "learning_rate": 0.0007708282982467298, + "loss": 0.6258, + "step": 7380 + }, + { + "epoch": 0.31, + "learning_rate": 0.0007707858975174374, + "loss": 0.605, + "step": 7390 + }, + { + "epoch": 0.31, + "learning_rate": 0.0007707434967881449, + "loss": 0.6053, + "step": 7400 + }, + { + "epoch": 0.31, + "learning_rate": 0.0007707010960588522, + "loss": 0.7063, + "step": 7410 + }, + { + "epoch": 0.31, + "learning_rate": 0.0007706586953295597, + "loss": 0.6702, + "step": 7420 + }, + { + "epoch": 0.31, + "learning_rate": 0.0007706162946002672, + "loss": 0.6451, + "step": 7430 + }, + { + "epoch": 0.31, + "learning_rate": 0.0007705738938709746, + "loss": 0.6194, + "step": 7440 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007705314931416821, + "loss": 0.4984, + "step": 7450 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007704890924123894, + "loss": 0.7647, + "step": 7460 + }, + { + "epoch": 0.32, + "learning_rate": 0.000770446691683097, + "loss": 0.6545, + "step": 7470 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007704042909538045, + "loss": 0.6128, + "step": 7480 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007703618902245118, + "loss": 0.5173, + "step": 7490 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007703194894952193, + "loss": 0.7232, + "step": 7500 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007702770887659269, + "loss": 0.573, + "step": 7510 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007702346880366342, + "loss": 0.6052, + "step": 7520 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007701922873073417, + "loss": 0.6709, + "step": 7530 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007701498865780492, + "loss": 0.6687, + "step": 7540 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007701074858487566, + "loss": 0.5686, + "step": 7550 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007700650851194641, + "loss": 0.5664, + "step": 7560 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007700226843901716, + "loss": 0.6329, + "step": 7570 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007699802836608789, + "loss": 0.589, + "step": 7580 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007699378829315865, + "loss": 0.6654, + "step": 7590 + }, + { + "epoch": 0.32, + "learning_rate": 0.000769895482202294, + "loss": 0.56, + "step": 7600 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007698530814730013, + "loss": 0.6502, + "step": 7610 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007698106807437088, + "loss": 0.6265, + "step": 7620 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007697682800144163, + "loss": 0.565, + "step": 7630 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007697258792851237, + "loss": 0.5124, + "step": 7640 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007696834785558312, + "loss": 0.529, + "step": 7650 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007696410778265387, + "loss": 0.6316, + "step": 7660 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007695986770972461, + "loss": 0.5952, + "step": 7670 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007695562763679536, + "loss": 0.6284, + "step": 7680 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007695138756386611, + "loss": 0.7009, + "step": 7690 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007694714749093684, + "loss": 0.5168, + "step": 7700 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007694290741800759, + "loss": 0.6853, + "step": 7710 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007693866734507834, + "loss": 0.5777, + "step": 7720 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007693442727214908, + "loss": 0.624, + "step": 7730 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007693018719921983, + "loss": 0.5905, + "step": 7740 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007692594712629058, + "loss": 0.5593, + "step": 7750 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007692170705336132, + "loss": 0.6197, + "step": 7760 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007691746698043207, + "loss": 0.5957, + "step": 7770 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007691322690750281, + "loss": 0.5709, + "step": 7780 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007690898683457356, + "loss": 0.6195, + "step": 7790 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007690474676164431, + "loss": 0.621, + "step": 7800 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007690050668871505, + "loss": 0.5229, + "step": 7810 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007689626661578579, + "loss": 0.5913, + "step": 7820 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007689202654285654, + "loss": 0.5522, + "step": 7830 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007688778646992729, + "loss": 0.6063, + "step": 7840 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007688354639699803, + "loss": 0.6173, + "step": 7850 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007687930632406878, + "loss": 0.5803, + "step": 7860 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007687506625113953, + "loss": 0.6865, + "step": 7870 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007687082617821027, + "loss": 0.5696, + "step": 7880 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007686658610528102, + "loss": 0.5863, + "step": 7890 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007686234603235175, + "loss": 0.7458, + "step": 7900 + }, + { + "epoch": 0.33, + "learning_rate": 0.000768581059594225, + "loss": 0.5145, + "step": 7910 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007685386588649326, + "loss": 0.7213, + "step": 7920 + }, + { + "epoch": 0.34, + "learning_rate": 0.00076849625813564, + "loss": 0.638, + "step": 7930 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007684538574063474, + "loss": 0.659, + "step": 7940 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007684114566770549, + "loss": 0.6848, + "step": 7950 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007683690559477623, + "loss": 0.6549, + "step": 7960 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007683266552184698, + "loss": 0.5442, + "step": 7970 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007682842544891773, + "loss": 0.5427, + "step": 7980 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007682418537598846, + "loss": 0.6002, + "step": 7990 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007681994530305922, + "loss": 0.6659, + "step": 8000 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007681570523012997, + "loss": 0.6429, + "step": 8010 + }, + { + "epoch": 0.34, + "learning_rate": 0.000768114651572007, + "loss": 0.6183, + "step": 8020 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007680722508427145, + "loss": 0.5832, + "step": 8030 + }, + { + "epoch": 0.34, + "learning_rate": 0.000768029850113422, + "loss": 0.6756, + "step": 8040 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007679874493841294, + "loss": 0.6405, + "step": 8050 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007679450486548369, + "loss": 0.6598, + "step": 8060 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007679026479255443, + "loss": 0.6456, + "step": 8070 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007678602471962518, + "loss": 0.5152, + "step": 8080 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007678178464669593, + "loss": 0.6053, + "step": 8090 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007677754457376667, + "loss": 0.5916, + "step": 8100 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007677330450083741, + "loss": 0.542, + "step": 8110 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007676906442790817, + "loss": 0.5207, + "step": 8120 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007676482435497891, + "loss": 0.6265, + "step": 8130 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007676058428204965, + "loss": 0.5757, + "step": 8140 + }, + { + "epoch": 0.34, + "learning_rate": 0.000767563442091204, + "loss": 0.6004, + "step": 8150 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007675210413619115, + "loss": 0.5795, + "step": 8160 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007674786406326189, + "loss": 0.5835, + "step": 8170 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007674362399033264, + "loss": 0.5072, + "step": 8180 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007673938391740338, + "loss": 0.6108, + "step": 8190 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007673514384447413, + "loss": 0.6604, + "step": 8200 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007673090377154488, + "loss": 0.6534, + "step": 8210 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007672666369861562, + "loss": 0.6648, + "step": 8220 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007672242362568636, + "loss": 0.605, + "step": 8230 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007671818355275711, + "loss": 0.6421, + "step": 8240 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007671394347982786, + "loss": 0.6396, + "step": 8250 + }, + { + "epoch": 0.35, + "learning_rate": 0.000767097034068986, + "loss": 0.7274, + "step": 8260 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007670546333396935, + "loss": 0.7185, + "step": 8270 + }, + { + "epoch": 0.35, + "learning_rate": 0.000767012232610401, + "loss": 0.5677, + "step": 8280 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007669698318811084, + "loss": 0.5207, + "step": 8290 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007669274311518159, + "loss": 0.5518, + "step": 8300 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007668850304225233, + "loss": 0.6022, + "step": 8310 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007668426296932308, + "loss": 0.5682, + "step": 8320 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007668002289639383, + "loss": 0.5575, + "step": 8330 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007667578282346457, + "loss": 0.541, + "step": 8340 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007667154275053531, + "loss": 0.726, + "step": 8350 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007666730267760606, + "loss": 0.5154, + "step": 8360 + }, + { + "epoch": 0.35, + "learning_rate": 0.000766630626046768, + "loss": 0.512, + "step": 8370 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007665882253174755, + "loss": 0.7272, + "step": 8380 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007665458245881829, + "loss": 0.654, + "step": 8390 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007665034238588905, + "loss": 0.6862, + "step": 8400 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007664610231295979, + "loss": 0.6486, + "step": 8410 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007664186224003053, + "loss": 0.6841, + "step": 8420 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007663762216710127, + "loss": 0.6623, + "step": 8430 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007663338209417202, + "loss": 0.7007, + "step": 8440 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007662914202124277, + "loss": 0.6529, + "step": 8450 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007662490194831351, + "loss": 0.5428, + "step": 8460 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007662066187538426, + "loss": 0.5843, + "step": 8470 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007661642180245501, + "loss": 0.5791, + "step": 8480 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007661218172952575, + "loss": 0.655, + "step": 8490 + }, + { + "epoch": 0.36, + "learning_rate": 0.000766079416565965, + "loss": 0.6185, + "step": 8500 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007660370158366724, + "loss": 0.5906, + "step": 8510 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007659946151073798, + "loss": 0.5526, + "step": 8520 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007659522143780874, + "loss": 0.564, + "step": 8530 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007659098136487948, + "loss": 0.6993, + "step": 8540 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007658674129195022, + "loss": 0.5585, + "step": 8550 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007658250121902097, + "loss": 0.5103, + "step": 8560 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007657826114609172, + "loss": 0.5449, + "step": 8570 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007657402107316246, + "loss": 0.4888, + "step": 8580 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007656978100023321, + "loss": 0.6759, + "step": 8590 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007656554092730395, + "loss": 0.572, + "step": 8600 + }, + { + "epoch": 0.36, + "learning_rate": 0.000765613008543747, + "loss": 0.6625, + "step": 8610 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007655706078144545, + "loss": 0.6337, + "step": 8620 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007655282070851619, + "loss": 0.653, + "step": 8630 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007654858063558693, + "loss": 0.6106, + "step": 8640 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007654434056265768, + "loss": 0.6629, + "step": 8650 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007654010048972843, + "loss": 0.6565, + "step": 8660 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007653586041679917, + "loss": 0.5824, + "step": 8670 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007653162034386991, + "loss": 0.5425, + "step": 8680 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007652738027094067, + "loss": 0.6052, + "step": 8690 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007652314019801141, + "loss": 0.5812, + "step": 8700 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007651890012508215, + "loss": 0.6381, + "step": 8710 + }, + { + "epoch": 0.37, + "learning_rate": 0.000765146600521529, + "loss": 0.6073, + "step": 8720 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007651041997922365, + "loss": 0.5368, + "step": 8730 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007650617990629439, + "loss": 0.7608, + "step": 8740 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007650193983336514, + "loss": 0.4941, + "step": 8750 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007649769976043588, + "loss": 0.632, + "step": 8760 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007649345968750663, + "loss": 0.7651, + "step": 8770 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007648921961457738, + "loss": 0.5062, + "step": 8780 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007648497954164812, + "loss": 0.5611, + "step": 8790 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007648073946871886, + "loss": 0.627, + "step": 8800 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007647649939578962, + "loss": 0.6366, + "step": 8810 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007647225932286036, + "loss": 0.6125, + "step": 8820 + }, + { + "epoch": 0.37, + "learning_rate": 0.000764680192499311, + "loss": 0.6978, + "step": 8830 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007646377917700184, + "loss": 0.5558, + "step": 8840 + }, + { + "epoch": 0.37, + "learning_rate": 0.000764595391040726, + "loss": 0.64, + "step": 8850 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007645529903114334, + "loss": 0.6389, + "step": 8860 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007645105895821409, + "loss": 0.6405, + "step": 8870 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007644681888528483, + "loss": 0.6921, + "step": 8880 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007644257881235558, + "loss": 0.6883, + "step": 8890 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007643833873942633, + "loss": 0.6687, + "step": 8900 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007643409866649707, + "loss": 0.6088, + "step": 8910 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007642985859356781, + "loss": 0.6154, + "step": 8920 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007642561852063857, + "loss": 0.6552, + "step": 8930 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007642137844770931, + "loss": 0.5808, + "step": 8940 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007641713837478005, + "loss": 0.5133, + "step": 8950 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007641289830185079, + "loss": 0.5386, + "step": 8960 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007640865822892154, + "loss": 0.6349, + "step": 8970 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007640441815599229, + "loss": 0.5621, + "step": 8980 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007640017808306303, + "loss": 0.6052, + "step": 8990 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007639593801013377, + "loss": 0.6937, + "step": 9000 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007639169793720453, + "loss": 0.6184, + "step": 9010 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007638745786427527, + "loss": 0.7246, + "step": 9020 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007638321779134601, + "loss": 0.6943, + "step": 9030 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007637897771841676, + "loss": 0.4638, + "step": 9040 + }, + { + "epoch": 0.38, + "learning_rate": 0.000763747376454875, + "loss": 0.5698, + "step": 9050 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007637049757255825, + "loss": 0.6173, + "step": 9060 + }, + { + "epoch": 0.38, + "learning_rate": 0.00076366257499629, + "loss": 0.6143, + "step": 9070 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007636201742669974, + "loss": 0.591, + "step": 9080 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007635777735377049, + "loss": 0.5914, + "step": 9090 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007635353728084124, + "loss": 0.7357, + "step": 9100 + }, + { + "epoch": 0.39, + "learning_rate": 0.0007634929720791198, + "loss": 0.6463, + "step": 9110 + }, + { + "epoch": 0.39, + "learning_rate": 0.0007634505713498272, + "loss": 0.6637, + "step": 9120 + }, + { + "epoch": 0.39, + "learning_rate": 0.0007634081706205347, + "loss": 0.5815, + "step": 9130 + }, + { + "epoch": 0.39, + "learning_rate": 0.0007633657698912422, + "loss": 0.6009, + "step": 9140 + }, + { + "epoch": 0.39, + "learning_rate": 0.0007633233691619496, + "loss": 0.5554, + "step": 9150 + }, + { + "epoch": 0.39, + "learning_rate": 0.0007632809684326571, + "loss": 0.6193, + "step": 9160 + }, + { + "epoch": 0.39, + "learning_rate": 0.0007632385677033645, + "loss": 0.6058, + "step": 9170 + }, + { + "epoch": 0.39, + "learning_rate": 0.000763196166974072, + "loss": 0.5501, + "step": 9180 + }, + { + "epoch": 0.39, + "learning_rate": 0.0007631537662447795, + "loss": 0.5933, + "step": 9190 + }, + { + "epoch": 0.39, + "learning_rate": 0.0007631113655154869, + "loss": 0.6403, + "step": 9200 + }, + { + "epoch": 0.39, + "learning_rate": 0.0007630689647861943, + "loss": 0.631, + "step": 9210 + }, + { + "epoch": 0.39, + "learning_rate": 0.0007630265640569019, + "loss": 0.6228, + "step": 9220 + }, + { + "epoch": 0.39, + "learning_rate": 0.0007629841633276093, + "loss": 0.5897, + "step": 9230 + }, + { + "epoch": 0.39, + "learning_rate": 0.0007629417625983167, + "loss": 0.5963, + "step": 9240 + }, + { + "epoch": 0.39, + "learning_rate": 0.0007628993618690242, + "loss": 0.6296, + "step": 9250 + }, + { + "epoch": 0.39, + "learning_rate": 0.0007628569611397316, + "loss": 0.6764, + "step": 9260 + }, + { + "epoch": 0.39, + "learning_rate": 0.0007628145604104391, + "loss": 0.6023, + "step": 9270 + }, + { + "epoch": 0.39, + "learning_rate": 0.0007627721596811466, + "loss": 0.7838, + "step": 9280 + }, + { + "epoch": 0.39, + "learning_rate": 0.000762729758951854, + "loss": 0.6439, + "step": 9290 + }, + { + "epoch": 0.39, + "learning_rate": 0.0007626873582225615, + "loss": 0.5452, + "step": 9300 + }, + { + "epoch": 0.39, + "learning_rate": 0.000762644957493269, + "loss": 0.7239, + "step": 9310 + }, + { + "epoch": 0.39, + "learning_rate": 0.0007626025567639763, + "loss": 0.6873, + "step": 9320 + }, + { + "epoch": 0.39, + "learning_rate": 0.0007625601560346838, + "loss": 0.608, + "step": 9330 + }, + { + "epoch": 0.39, + "learning_rate": 0.0007625177553053914, + "loss": 0.7264, + "step": 9340 + }, + { + "epoch": 0.4, + "learning_rate": 0.0007624753545760987, + "loss": 0.5253, + "step": 9350 + }, + { + "epoch": 0.4, + "learning_rate": 0.0007624329538468062, + "loss": 0.5611, + "step": 9360 + }, + { + "epoch": 0.4, + "learning_rate": 0.0007623905531175136, + "loss": 0.6189, + "step": 9370 + }, + { + "epoch": 0.4, + "learning_rate": 0.0007623481523882211, + "loss": 0.6604, + "step": 9380 + }, + { + "epoch": 0.4, + "learning_rate": 0.0007623057516589286, + "loss": 0.6627, + "step": 9390 + }, + { + "epoch": 0.4, + "learning_rate": 0.000762263350929636, + "loss": 0.6491, + "step": 9400 + }, + { + "epoch": 0.4, + "learning_rate": 0.0007622209502003434, + "loss": 0.5831, + "step": 9410 + }, + { + "epoch": 0.4, + "learning_rate": 0.000762178549471051, + "loss": 0.7038, + "step": 9420 + }, + { + "epoch": 0.4, + "learning_rate": 0.0007621361487417584, + "loss": 0.673, + "step": 9430 + }, + { + "epoch": 0.4, + "learning_rate": 0.0007620937480124658, + "loss": 0.5855, + "step": 9440 + }, + { + "epoch": 0.4, + "learning_rate": 0.0007620513472831733, + "loss": 0.632, + "step": 9450 + }, + { + "epoch": 0.4, + "learning_rate": 0.0007620089465538808, + "loss": 0.6441, + "step": 9460 + }, + { + "epoch": 0.4, + "learning_rate": 0.0007619665458245882, + "loss": 0.5871, + "step": 9470 + }, + { + "epoch": 0.4, + "learning_rate": 0.0007619241450952957, + "loss": 0.7076, + "step": 9480 + }, + { + "epoch": 0.4, + "learning_rate": 0.0007618817443660031, + "loss": 0.6391, + "step": 9490 + }, + { + "epoch": 0.4, + "learning_rate": 0.0007618393436367106, + "loss": 0.5737, + "step": 9500 + }, + { + "epoch": 0.4, + "learning_rate": 0.0007617969429074181, + "loss": 0.6174, + "step": 9510 + }, + { + "epoch": 0.4, + "learning_rate": 0.0007617545421781255, + "loss": 0.6152, + "step": 9520 + }, + { + "epoch": 0.4, + "learning_rate": 0.0007617121414488329, + "loss": 0.5467, + "step": 9530 + }, + { + "epoch": 0.4, + "learning_rate": 0.0007616697407195405, + "loss": 0.61, + "step": 9540 + }, + { + "epoch": 0.4, + "learning_rate": 0.0007616273399902479, + "loss": 0.5968, + "step": 9550 + }, + { + "epoch": 0.4, + "learning_rate": 0.0007615849392609553, + "loss": 0.6215, + "step": 9560 + }, + { + "epoch": 0.4, + "learning_rate": 0.0007615425385316628, + "loss": 0.5158, + "step": 9570 + }, + { + "epoch": 0.41, + "learning_rate": 0.0007615001378023702, + "loss": 0.6269, + "step": 9580 + }, + { + "epoch": 0.41, + "learning_rate": 0.0007614577370730777, + "loss": 0.6019, + "step": 9590 + }, + { + "epoch": 0.41, + "learning_rate": 0.0007614153363437852, + "loss": 0.5946, + "step": 9600 + }, + { + "epoch": 0.41, + "learning_rate": 0.0007613729356144925, + "loss": 0.6059, + "step": 9610 + }, + { + "epoch": 0.41, + "learning_rate": 0.0007613305348852001, + "loss": 0.6349, + "step": 9620 + }, + { + "epoch": 0.41, + "learning_rate": 0.0007612881341559076, + "loss": 0.6576, + "step": 9630 + }, + { + "epoch": 0.41, + "learning_rate": 0.0007612457334266149, + "loss": 0.6598, + "step": 9640 + }, + { + "epoch": 0.41, + "learning_rate": 0.0007612033326973224, + "loss": 0.6276, + "step": 9650 + }, + { + "epoch": 0.41, + "learning_rate": 0.0007611609319680299, + "loss": 0.5596, + "step": 9660 + }, + { + "epoch": 0.41, + "learning_rate": 0.0007611185312387373, + "loss": 0.5251, + "step": 9670 + }, + { + "epoch": 0.41, + "learning_rate": 0.0007610761305094448, + "loss": 0.6251, + "step": 9680 + }, + { + "epoch": 0.41, + "learning_rate": 0.0007610337297801523, + "loss": 0.5493, + "step": 9690 + }, + { + "epoch": 0.41, + "learning_rate": 0.0007609913290508597, + "loss": 0.6198, + "step": 9700 + }, + { + "epoch": 0.41, + "learning_rate": 0.0007609489283215672, + "loss": 0.5803, + "step": 9710 + }, + { + "epoch": 0.41, + "learning_rate": 0.0007609065275922747, + "loss": 0.6944, + "step": 9720 + }, + { + "epoch": 0.41, + "learning_rate": 0.000760864126862982, + "loss": 0.6522, + "step": 9730 + }, + { + "epoch": 0.41, + "learning_rate": 0.0007608217261336895, + "loss": 0.7118, + "step": 9740 + }, + { + "epoch": 0.41, + "learning_rate": 0.0007607793254043971, + "loss": 0.561, + "step": 9750 + }, + { + "epoch": 0.41, + "learning_rate": 0.0007607369246751044, + "loss": 0.6794, + "step": 9760 + }, + { + "epoch": 0.41, + "learning_rate": 0.0007606945239458119, + "loss": 0.6302, + "step": 9770 + }, + { + "epoch": 0.41, + "learning_rate": 0.0007606521232165194, + "loss": 0.6144, + "step": 9780 + }, + { + "epoch": 0.41, + "learning_rate": 0.0007606097224872268, + "loss": 0.6316, + "step": 9790 + }, + { + "epoch": 0.41, + "learning_rate": 0.0007605673217579343, + "loss": 0.6788, + "step": 9800 + }, + { + "epoch": 0.41, + "learning_rate": 0.0007605249210286418, + "loss": 0.7701, + "step": 9810 + }, + { + "epoch": 0.42, + "learning_rate": 0.0007604825202993492, + "loss": 0.5893, + "step": 9820 + }, + { + "epoch": 0.42, + "learning_rate": 0.0007604401195700567, + "loss": 0.6793, + "step": 9830 + }, + { + "epoch": 0.42, + "learning_rate": 0.0007603977188407642, + "loss": 0.6783, + "step": 9840 + }, + { + "epoch": 0.42, + "learning_rate": 0.0007603553181114715, + "loss": 0.6744, + "step": 9850 + }, + { + "epoch": 0.42, + "learning_rate": 0.000760312917382179, + "loss": 0.6276, + "step": 9860 + }, + { + "epoch": 0.42, + "learning_rate": 0.0007602705166528864, + "loss": 0.5408, + "step": 9870 + }, + { + "epoch": 0.42, + "learning_rate": 0.0007602281159235939, + "loss": 0.487, + "step": 9880 + }, + { + "epoch": 0.42, + "learning_rate": 0.0007601857151943014, + "loss": 0.675, + "step": 9890 + }, + { + "epoch": 0.42, + "learning_rate": 0.0007601433144650088, + "loss": 0.5671, + "step": 9900 + }, + { + "epoch": 0.42, + "learning_rate": 0.0007601009137357163, + "loss": 0.7367, + "step": 9910 + }, + { + "epoch": 0.42, + "learning_rate": 0.0007600585130064238, + "loss": 0.5942, + "step": 9920 + }, + { + "epoch": 0.42, + "learning_rate": 0.0007600161122771311, + "loss": 0.6035, + "step": 9930 + }, + { + "epoch": 0.42, + "learning_rate": 0.0007599737115478386, + "loss": 0.6635, + "step": 9940 + }, + { + "epoch": 0.42, + "learning_rate": 0.0007599313108185462, + "loss": 0.5992, + "step": 9950 + }, + { + "epoch": 0.42, + "learning_rate": 0.0007598889100892535, + "loss": 0.6101, + "step": 9960 + }, + { + "epoch": 0.42, + "learning_rate": 0.000759846509359961, + "loss": 0.6378, + "step": 9970 + }, + { + "epoch": 0.42, + "learning_rate": 0.0007598041086306685, + "loss": 0.6128, + "step": 9980 + }, + { + "epoch": 0.42, + "learning_rate": 0.0007597617079013759, + "loss": 0.5432, + "step": 9990 + }, + { + "epoch": 0.42, + "learning_rate": 0.0007597193071720834, + "loss": 0.5998, + "step": 10000 + }, + { + "epoch": 0.42, + "eval_loss": 0.6361307501792908, + "eval_runtime": 337.8435, + "eval_samples_per_second": 15.555, + "eval_steps_per_second": 3.889, + "step": 10000 + }, + { + "epoch": 0.42, + "learning_rate": 0.0007596769064427909, + "loss": 0.6544, + "step": 10010 + }, + { + "epoch": 0.42, + "learning_rate": 0.0007596345057134982, + "loss": 0.6739, + "step": 10020 + }, + { + "epoch": 0.42, + "learning_rate": 0.0007595921049842058, + "loss": 0.6349, + "step": 10030 + }, + { + "epoch": 0.42, + "learning_rate": 0.0007595497042549133, + "loss": 0.6995, + "step": 10040 + }, + { + "epoch": 0.43, + "learning_rate": 0.0007595073035256206, + "loss": 0.5983, + "step": 10050 + }, + { + "epoch": 0.43, + "learning_rate": 0.0007594649027963281, + "loss": 0.6614, + "step": 10060 + }, + { + "epoch": 0.43, + "learning_rate": 0.0007594225020670357, + "loss": 0.6029, + "step": 10070 + }, + { + "epoch": 0.43, + "learning_rate": 0.000759380101337743, + "loss": 0.7124, + "step": 10080 + }, + { + "epoch": 0.43, + "learning_rate": 0.0007593377006084505, + "loss": 0.6148, + "step": 10090 + }, + { + "epoch": 0.43, + "learning_rate": 0.000759295299879158, + "loss": 0.5817, + "step": 10100 + }, + { + "epoch": 0.43, + "learning_rate": 0.0007592528991498654, + "loss": 0.5396, + "step": 10110 + }, + { + "epoch": 0.43, + "learning_rate": 0.0007592104984205729, + "loss": 0.63, + "step": 10120 + }, + { + "epoch": 0.43, + "learning_rate": 0.0007591680976912804, + "loss": 0.6856, + "step": 10130 + }, + { + "epoch": 0.43, + "learning_rate": 0.0007591256969619877, + "loss": 0.5574, + "step": 10140 + }, + { + "epoch": 0.43, + "learning_rate": 0.0007590832962326953, + "loss": 0.621, + "step": 10150 + }, + { + "epoch": 0.43, + "learning_rate": 0.0007590408955034028, + "loss": 0.6546, + "step": 10160 + }, + { + "epoch": 0.43, + "learning_rate": 0.0007589984947741101, + "loss": 0.628, + "step": 10170 + }, + { + "epoch": 0.43, + "learning_rate": 0.0007589560940448176, + "loss": 0.6099, + "step": 10180 + }, + { + "epoch": 0.43, + "learning_rate": 0.0007589136933155251, + "loss": 0.5651, + "step": 10190 + }, + { + "epoch": 0.43, + "learning_rate": 0.0007588712925862325, + "loss": 0.7003, + "step": 10200 + }, + { + "epoch": 0.43, + "learning_rate": 0.00075882889185694, + "loss": 0.6097, + "step": 10210 + }, + { + "epoch": 0.43, + "learning_rate": 0.0007587864911276473, + "loss": 0.6286, + "step": 10220 + }, + { + "epoch": 0.43, + "learning_rate": 0.0007587440903983549, + "loss": 0.5699, + "step": 10230 + }, + { + "epoch": 0.43, + "learning_rate": 0.0007587016896690624, + "loss": 0.5349, + "step": 10240 + }, + { + "epoch": 0.43, + "learning_rate": 0.0007586592889397697, + "loss": 0.7123, + "step": 10250 + }, + { + "epoch": 0.43, + "learning_rate": 0.0007586168882104772, + "loss": 0.5462, + "step": 10260 + }, + { + "epoch": 0.43, + "learning_rate": 0.0007585744874811847, + "loss": 0.6152, + "step": 10270 + }, + { + "epoch": 0.43, + "learning_rate": 0.0007585320867518921, + "loss": 0.6061, + "step": 10280 + }, + { + "epoch": 0.44, + "learning_rate": 0.0007584896860225996, + "loss": 0.5084, + "step": 10290 + }, + { + "epoch": 0.44, + "learning_rate": 0.0007584472852933071, + "loss": 0.5812, + "step": 10300 + }, + { + "epoch": 0.44, + "learning_rate": 0.0007584048845640145, + "loss": 0.6344, + "step": 10310 + }, + { + "epoch": 0.44, + "learning_rate": 0.000758362483834722, + "loss": 0.5583, + "step": 10320 + }, + { + "epoch": 0.44, + "learning_rate": 0.0007583200831054295, + "loss": 0.5861, + "step": 10330 + }, + { + "epoch": 0.44, + "learning_rate": 0.0007582776823761368, + "loss": 0.6502, + "step": 10340 + }, + { + "epoch": 0.44, + "learning_rate": 0.0007582352816468444, + "loss": 0.5275, + "step": 10350 + }, + { + "epoch": 0.44, + "learning_rate": 0.0007581928809175519, + "loss": 0.5731, + "step": 10360 + }, + { + "epoch": 0.44, + "learning_rate": 0.0007581504801882592, + "loss": 0.6453, + "step": 10370 + }, + { + "epoch": 0.44, + "learning_rate": 0.0007581080794589667, + "loss": 0.5865, + "step": 10380 + }, + { + "epoch": 0.44, + "learning_rate": 0.0007580656787296742, + "loss": 0.714, + "step": 10390 + }, + { + "epoch": 0.44, + "learning_rate": 0.0007580232780003816, + "loss": 0.7056, + "step": 10400 + }, + { + "epoch": 0.44, + "learning_rate": 0.0007579808772710891, + "loss": 0.5485, + "step": 10410 + }, + { + "epoch": 0.44, + "learning_rate": 0.0007579384765417966, + "loss": 0.5998, + "step": 10420 + }, + { + "epoch": 0.44, + "learning_rate": 0.000757896075812504, + "loss": 0.6239, + "step": 10430 + }, + { + "epoch": 0.44, + "learning_rate": 0.0007578536750832115, + "loss": 0.5806, + "step": 10440 + }, + { + "epoch": 0.44, + "learning_rate": 0.000757811274353919, + "loss": 0.6128, + "step": 10450 + }, + { + "epoch": 0.44, + "learning_rate": 0.0007577688736246263, + "loss": 0.5991, + "step": 10460 + }, + { + "epoch": 0.44, + "learning_rate": 0.0007577264728953338, + "loss": 0.6084, + "step": 10470 + }, + { + "epoch": 0.44, + "learning_rate": 0.0007576840721660414, + "loss": 0.564, + "step": 10480 + }, + { + "epoch": 0.44, + "learning_rate": 0.0007576416714367487, + "loss": 0.5061, + "step": 10490 + }, + { + "epoch": 0.44, + "learning_rate": 0.0007575992707074562, + "loss": 0.6631, + "step": 10500 + }, + { + "epoch": 0.44, + "learning_rate": 0.0007575568699781637, + "loss": 0.6126, + "step": 10510 + }, + { + "epoch": 0.44, + "learning_rate": 0.0007575144692488711, + "loss": 0.5286, + "step": 10520 + }, + { + "epoch": 0.45, + "learning_rate": 0.0007574720685195786, + "loss": 0.7224, + "step": 10530 + }, + { + "epoch": 0.45, + "learning_rate": 0.000757429667790286, + "loss": 0.6591, + "step": 10540 + }, + { + "epoch": 0.45, + "learning_rate": 0.0007573872670609934, + "loss": 0.53, + "step": 10550 + }, + { + "epoch": 0.45, + "learning_rate": 0.000757344866331701, + "loss": 0.591, + "step": 10560 + }, + { + "epoch": 0.45, + "learning_rate": 0.0007573024656024084, + "loss": 0.6099, + "step": 10570 + }, + { + "epoch": 0.45, + "learning_rate": 0.0007572600648731158, + "loss": 0.7179, + "step": 10580 + }, + { + "epoch": 0.45, + "learning_rate": 0.0007572176641438233, + "loss": 0.5985, + "step": 10590 + }, + { + "epoch": 0.45, + "learning_rate": 0.0007571752634145308, + "loss": 0.6045, + "step": 10600 + }, + { + "epoch": 0.45, + "learning_rate": 0.0007571328626852382, + "loss": 0.5584, + "step": 10610 + }, + { + "epoch": 0.45, + "learning_rate": 0.0007570904619559457, + "loss": 0.7706, + "step": 10620 + }, + { + "epoch": 0.45, + "learning_rate": 0.000757048061226653, + "loss": 0.5897, + "step": 10630 + }, + { + "epoch": 0.45, + "learning_rate": 0.0007570056604973606, + "loss": 0.6053, + "step": 10640 + }, + { + "epoch": 0.45, + "learning_rate": 0.0007569632597680681, + "loss": 0.6208, + "step": 10650 + }, + { + "epoch": 0.45, + "learning_rate": 0.0007569208590387755, + "loss": 0.4623, + "step": 10660 + }, + { + "epoch": 0.45, + "learning_rate": 0.0007568784583094829, + "loss": 0.6884, + "step": 10670 + }, + { + "epoch": 0.45, + "learning_rate": 0.0007568360575801905, + "loss": 0.6803, + "step": 10680 + }, + { + "epoch": 0.45, + "learning_rate": 0.0007567936568508979, + "loss": 0.6405, + "step": 10690 + }, + { + "epoch": 0.45, + "learning_rate": 0.0007567512561216053, + "loss": 0.5893, + "step": 10700 + }, + { + "epoch": 0.45, + "learning_rate": 0.0007567088553923128, + "loss": 0.6655, + "step": 10710 + }, + { + "epoch": 0.45, + "learning_rate": 0.0007566664546630203, + "loss": 0.5431, + "step": 10720 + }, + { + "epoch": 0.45, + "learning_rate": 0.0007566240539337277, + "loss": 0.5302, + "step": 10730 + }, + { + "epoch": 0.45, + "learning_rate": 0.0007565816532044352, + "loss": 0.6615, + "step": 10740 + }, + { + "epoch": 0.45, + "learning_rate": 0.0007565392524751425, + "loss": 0.622, + "step": 10750 + }, + { + "epoch": 0.46, + "learning_rate": 0.0007564968517458501, + "loss": 0.6325, + "step": 10760 + }, + { + "epoch": 0.46, + "learning_rate": 0.0007564544510165576, + "loss": 0.5363, + "step": 10770 + }, + { + "epoch": 0.46, + "learning_rate": 0.000756412050287265, + "loss": 0.5953, + "step": 10780 + }, + { + "epoch": 0.46, + "learning_rate": 0.0007563696495579724, + "loss": 0.6159, + "step": 10790 + }, + { + "epoch": 0.46, + "learning_rate": 0.0007563272488286799, + "loss": 0.6203, + "step": 10800 + }, + { + "epoch": 0.46, + "learning_rate": 0.0007562848480993873, + "loss": 0.65, + "step": 10810 + }, + { + "epoch": 0.46, + "learning_rate": 0.0007562424473700948, + "loss": 0.522, + "step": 10820 + }, + { + "epoch": 0.46, + "learning_rate": 0.0007562000466408022, + "loss": 0.5774, + "step": 10830 + }, + { + "epoch": 0.46, + "learning_rate": 0.0007561576459115097, + "loss": 0.7589, + "step": 10840 + }, + { + "epoch": 0.46, + "learning_rate": 0.0007561152451822172, + "loss": 0.6015, + "step": 10850 + }, + { + "epoch": 0.46, + "learning_rate": 0.0007560728444529246, + "loss": 0.6641, + "step": 10860 + }, + { + "epoch": 0.46, + "learning_rate": 0.000756030443723632, + "loss": 0.6256, + "step": 10870 + }, + { + "epoch": 0.46, + "learning_rate": 0.0007559880429943396, + "loss": 0.5818, + "step": 10880 + }, + { + "epoch": 0.46, + "learning_rate": 0.000755945642265047, + "loss": 0.6654, + "step": 10890 + }, + { + "epoch": 0.46, + "learning_rate": 0.0007559032415357544, + "loss": 0.5756, + "step": 10900 + }, + { + "epoch": 0.46, + "learning_rate": 0.0007558608408064619, + "loss": 0.686, + "step": 10910 + }, + { + "epoch": 0.46, + "learning_rate": 0.0007558184400771694, + "loss": 0.5812, + "step": 10920 + }, + { + "epoch": 0.46, + "learning_rate": 0.0007557760393478768, + "loss": 0.5893, + "step": 10930 + }, + { + "epoch": 0.46, + "learning_rate": 0.0007557336386185843, + "loss": 0.6391, + "step": 10940 + }, + { + "epoch": 0.46, + "learning_rate": 0.0007556912378892917, + "loss": 0.6423, + "step": 10950 + }, + { + "epoch": 0.46, + "learning_rate": 0.0007556488371599992, + "loss": 0.5752, + "step": 10960 + }, + { + "epoch": 0.46, + "learning_rate": 0.0007556064364307067, + "loss": 0.5765, + "step": 10970 + }, + { + "epoch": 0.46, + "learning_rate": 0.0007555640357014141, + "loss": 0.5854, + "step": 10980 + }, + { + "epoch": 0.46, + "learning_rate": 0.0007555216349721215, + "loss": 0.6918, + "step": 10990 + }, + { + "epoch": 0.47, + "learning_rate": 0.000755479234242829, + "loss": 0.5194, + "step": 11000 + }, + { + "epoch": 0.47, + "learning_rate": 0.0007554368335135365, + "loss": 0.7056, + "step": 11010 + }, + { + "epoch": 0.47, + "learning_rate": 0.0007553944327842439, + "loss": 0.6055, + "step": 11020 + }, + { + "epoch": 0.47, + "learning_rate": 0.0007553520320549514, + "loss": 0.5804, + "step": 11030 + }, + { + "epoch": 0.47, + "learning_rate": 0.0007553096313256589, + "loss": 0.6088, + "step": 11040 + }, + { + "epoch": 0.47, + "learning_rate": 0.0007552672305963663, + "loss": 0.6886, + "step": 11050 + }, + { + "epoch": 0.47, + "learning_rate": 0.0007552248298670738, + "loss": 0.6164, + "step": 11060 + }, + { + "epoch": 0.47, + "learning_rate": 0.0007551824291377812, + "loss": 0.6539, + "step": 11070 + }, + { + "epoch": 0.47, + "learning_rate": 0.0007551400284084886, + "loss": 0.6394, + "step": 11080 + }, + { + "epoch": 0.47, + "learning_rate": 0.0007550976276791962, + "loss": 0.5296, + "step": 11090 + }, + { + "epoch": 0.47, + "learning_rate": 0.0007550552269499036, + "loss": 0.5987, + "step": 11100 + }, + { + "epoch": 0.47, + "learning_rate": 0.000755012826220611, + "loss": 0.6983, + "step": 11110 + }, + { + "epoch": 0.47, + "learning_rate": 0.0007549704254913185, + "loss": 0.6313, + "step": 11120 + }, + { + "epoch": 0.47, + "learning_rate": 0.000754928024762026, + "loss": 0.6948, + "step": 11130 + }, + { + "epoch": 0.47, + "learning_rate": 0.0007548856240327334, + "loss": 0.5854, + "step": 11140 + }, + { + "epoch": 0.47, + "learning_rate": 0.0007548432233034408, + "loss": 0.623, + "step": 11150 + }, + { + "epoch": 0.47, + "learning_rate": 0.0007548008225741483, + "loss": 0.5919, + "step": 11160 + }, + { + "epoch": 0.47, + "learning_rate": 0.0007547584218448558, + "loss": 0.6124, + "step": 11170 + }, + { + "epoch": 0.47, + "learning_rate": 0.0007547160211155632, + "loss": 0.6113, + "step": 11180 + }, + { + "epoch": 0.47, + "learning_rate": 0.0007546736203862707, + "loss": 0.6173, + "step": 11190 + }, + { + "epoch": 0.47, + "learning_rate": 0.0007546312196569781, + "loss": 0.5483, + "step": 11200 + }, + { + "epoch": 0.47, + "learning_rate": 0.0007545888189276856, + "loss": 0.507, + "step": 11210 + }, + { + "epoch": 0.47, + "learning_rate": 0.000754546418198393, + "loss": 0.6492, + "step": 11220 + }, + { + "epoch": 0.47, + "learning_rate": 0.0007545040174691005, + "loss": 0.6521, + "step": 11230 + }, + { + "epoch": 0.48, + "learning_rate": 0.0007544616167398079, + "loss": 0.6518, + "step": 11240 + }, + { + "epoch": 0.48, + "learning_rate": 0.0007544192160105155, + "loss": 0.4971, + "step": 11250 + }, + { + "epoch": 0.48, + "learning_rate": 0.0007543768152812229, + "loss": 0.7763, + "step": 11260 + }, + { + "epoch": 0.48, + "learning_rate": 0.0007543344145519303, + "loss": 0.5626, + "step": 11270 + }, + { + "epoch": 0.48, + "learning_rate": 0.0007542920138226377, + "loss": 0.5704, + "step": 11280 + }, + { + "epoch": 0.48, + "learning_rate": 0.0007542496130933453, + "loss": 0.6156, + "step": 11290 + }, + { + "epoch": 0.48, + "learning_rate": 0.0007542072123640527, + "loss": 0.6445, + "step": 11300 + }, + { + "epoch": 0.48, + "learning_rate": 0.0007541648116347601, + "loss": 0.6429, + "step": 11310 + }, + { + "epoch": 0.48, + "learning_rate": 0.0007541224109054676, + "loss": 0.5081, + "step": 11320 + }, + { + "epoch": 0.48, + "learning_rate": 0.0007540800101761751, + "loss": 0.5698, + "step": 11330 + }, + { + "epoch": 0.48, + "learning_rate": 0.0007540376094468825, + "loss": 0.614, + "step": 11340 + }, + { + "epoch": 0.48, + "learning_rate": 0.00075399520871759, + "loss": 0.6224, + "step": 11350 + }, + { + "epoch": 0.48, + "learning_rate": 0.0007539528079882974, + "loss": 0.6508, + "step": 11360 + }, + { + "epoch": 0.48, + "learning_rate": 0.0007539104072590049, + "loss": 0.5994, + "step": 11370 + }, + { + "epoch": 0.48, + "learning_rate": 0.0007538680065297124, + "loss": 0.5786, + "step": 11380 + }, + { + "epoch": 0.48, + "learning_rate": 0.0007538256058004198, + "loss": 0.5275, + "step": 11390 + }, + { + "epoch": 0.48, + "learning_rate": 0.0007537832050711272, + "loss": 0.6075, + "step": 11400 + }, + { + "epoch": 0.48, + "learning_rate": 0.0007537408043418347, + "loss": 0.5497, + "step": 11410 + }, + { + "epoch": 0.48, + "learning_rate": 0.0007536984036125422, + "loss": 0.4839, + "step": 11420 + }, + { + "epoch": 0.48, + "learning_rate": 0.0007536560028832496, + "loss": 0.642, + "step": 11430 + }, + { + "epoch": 0.48, + "learning_rate": 0.000753613602153957, + "loss": 0.5893, + "step": 11440 + }, + { + "epoch": 0.48, + "learning_rate": 0.0007535712014246646, + "loss": 0.6042, + "step": 11450 + }, + { + "epoch": 0.48, + "learning_rate": 0.000753528800695372, + "loss": 0.6363, + "step": 11460 + }, + { + "epoch": 0.49, + "learning_rate": 0.0007534863999660794, + "loss": 0.6551, + "step": 11470 + }, + { + "epoch": 0.49, + "learning_rate": 0.0007534439992367869, + "loss": 0.5657, + "step": 11480 + }, + { + "epoch": 0.49, + "learning_rate": 0.0007534015985074944, + "loss": 0.6136, + "step": 11490 + }, + { + "epoch": 0.49, + "learning_rate": 0.0007533591977782018, + "loss": 0.6537, + "step": 11500 + }, + { + "epoch": 0.49, + "learning_rate": 0.0007533167970489093, + "loss": 0.6369, + "step": 11510 + }, + { + "epoch": 0.49, + "learning_rate": 0.0007532743963196167, + "loss": 0.5707, + "step": 11520 + }, + { + "epoch": 0.49, + "learning_rate": 0.0007532319955903242, + "loss": 0.6284, + "step": 11530 + }, + { + "epoch": 0.49, + "learning_rate": 0.0007531895948610317, + "loss": 0.6201, + "step": 11540 + }, + { + "epoch": 0.49, + "learning_rate": 0.0007531471941317391, + "loss": 0.588, + "step": 11550 + }, + { + "epoch": 0.49, + "learning_rate": 0.0007531047934024465, + "loss": 0.5642, + "step": 11560 + }, + { + "epoch": 0.49, + "learning_rate": 0.0007530623926731541, + "loss": 0.6118, + "step": 11570 + }, + { + "epoch": 0.49, + "learning_rate": 0.0007530199919438615, + "loss": 0.5893, + "step": 11580 + }, + { + "epoch": 0.49, + "learning_rate": 0.0007529775912145689, + "loss": 0.6033, + "step": 11590 + }, + { + "epoch": 0.49, + "learning_rate": 0.0007529351904852764, + "loss": 0.7123, + "step": 11600 + }, + { + "epoch": 0.49, + "learning_rate": 0.0007528927897559838, + "loss": 0.6357, + "step": 11610 + }, + { + "epoch": 0.49, + "learning_rate": 0.0007528503890266913, + "loss": 0.5915, + "step": 11620 + }, + { + "epoch": 0.49, + "learning_rate": 0.0007528079882973988, + "loss": 0.5912, + "step": 11630 + }, + { + "epoch": 0.49, + "learning_rate": 0.0007527655875681062, + "loss": 0.6112, + "step": 11640 + }, + { + "epoch": 0.49, + "learning_rate": 0.0007527231868388137, + "loss": 0.5619, + "step": 11650 + }, + { + "epoch": 0.49, + "learning_rate": 0.0007526807861095212, + "loss": 0.6299, + "step": 11660 + }, + { + "epoch": 0.49, + "learning_rate": 0.0007526383853802286, + "loss": 0.5745, + "step": 11670 + }, + { + "epoch": 0.49, + "learning_rate": 0.000752595984650936, + "loss": 0.5975, + "step": 11680 + }, + { + "epoch": 0.49, + "learning_rate": 0.0007525535839216434, + "loss": 0.6203, + "step": 11690 + }, + { + "epoch": 0.49, + "learning_rate": 0.000752511183192351, + "loss": 0.6461, + "step": 11700 + }, + { + "epoch": 0.5, + "learning_rate": 0.0007524687824630584, + "loss": 0.6729, + "step": 11710 + }, + { + "epoch": 0.5, + "learning_rate": 0.0007524263817337658, + "loss": 0.7198, + "step": 11720 + }, + { + "epoch": 0.5, + "learning_rate": 0.0007523839810044733, + "loss": 0.5475, + "step": 11730 + }, + { + "epoch": 0.5, + "learning_rate": 0.0007523415802751808, + "loss": 0.6529, + "step": 11740 + }, + { + "epoch": 0.5, + "learning_rate": 0.0007522991795458882, + "loss": 0.7057, + "step": 11750 + }, + { + "epoch": 0.5, + "learning_rate": 0.0007522567788165956, + "loss": 0.5945, + "step": 11760 + }, + { + "epoch": 0.5, + "learning_rate": 0.0007522143780873031, + "loss": 0.6071, + "step": 11770 + }, + { + "epoch": 0.5, + "learning_rate": 0.0007521719773580107, + "loss": 0.6047, + "step": 11780 + }, + { + "epoch": 0.5, + "learning_rate": 0.000752129576628718, + "loss": 0.6428, + "step": 11790 + }, + { + "epoch": 0.5, + "learning_rate": 0.0007520871758994255, + "loss": 0.5286, + "step": 11800 + }, + { + "epoch": 0.5, + "learning_rate": 0.0007520447751701329, + "loss": 0.5209, + "step": 11810 + }, + { + "epoch": 0.5, + "learning_rate": 0.0007520023744408404, + "loss": 0.7729, + "step": 11820 + }, + { + "epoch": 0.5, + "learning_rate": 0.0007519599737115479, + "loss": 0.6752, + "step": 11830 + }, + { + "epoch": 0.5, + "learning_rate": 0.0007519175729822553, + "loss": 0.6177, + "step": 11840 + }, + { + "epoch": 0.5, + "learning_rate": 0.0007518751722529628, + "loss": 0.5883, + "step": 11850 + }, + { + "epoch": 0.5, + "learning_rate": 0.0007518327715236703, + "loss": 0.6387, + "step": 11860 + }, + { + "epoch": 0.5, + "learning_rate": 0.0007517903707943777, + "loss": 0.6131, + "step": 11870 + }, + { + "epoch": 0.5, + "learning_rate": 0.0007517479700650851, + "loss": 0.5791, + "step": 11880 + }, + { + "epoch": 0.5, + "learning_rate": 0.0007517055693357926, + "loss": 0.5556, + "step": 11890 + }, + { + "epoch": 0.5, + "learning_rate": 0.0007516631686065001, + "loss": 0.5669, + "step": 11900 + }, + { + "epoch": 0.5, + "learning_rate": 0.0007516207678772075, + "loss": 0.6502, + "step": 11910 + }, + { + "epoch": 0.5, + "learning_rate": 0.000751578367147915, + "loss": 0.5086, + "step": 11920 + }, + { + "epoch": 0.5, + "learning_rate": 0.0007515359664186224, + "loss": 0.5305, + "step": 11930 + }, + { + "epoch": 0.5, + "learning_rate": 0.0007514935656893299, + "loss": 0.5678, + "step": 11940 + }, + { + "epoch": 0.51, + "learning_rate": 0.0007514511649600374, + "loss": 0.5955, + "step": 11950 + }, + { + "epoch": 0.51, + "learning_rate": 0.0007514087642307448, + "loss": 0.7589, + "step": 11960 + }, + { + "epoch": 0.51, + "learning_rate": 0.0007513663635014522, + "loss": 0.698, + "step": 11970 + }, + { + "epoch": 0.51, + "learning_rate": 0.0007513239627721598, + "loss": 0.5837, + "step": 11980 + }, + { + "epoch": 0.51, + "learning_rate": 0.0007512815620428672, + "loss": 0.537, + "step": 11990 + }, + { + "epoch": 0.51, + "learning_rate": 0.0007512391613135746, + "loss": 0.7069, + "step": 12000 + }, + { + "epoch": 0.51, + "learning_rate": 0.0007511967605842821, + "loss": 0.6812, + "step": 12010 + }, + { + "epoch": 0.51, + "learning_rate": 0.0007511543598549895, + "loss": 0.6666, + "step": 12020 + }, + { + "epoch": 0.51, + "learning_rate": 0.000751111959125697, + "loss": 0.6355, + "step": 12030 + }, + { + "epoch": 0.51, + "learning_rate": 0.0007510695583964045, + "loss": 0.4884, + "step": 12040 + }, + { + "epoch": 0.51, + "learning_rate": 0.0007510271576671118, + "loss": 0.6222, + "step": 12050 + }, + { + "epoch": 0.51, + "learning_rate": 0.0007509847569378194, + "loss": 0.5423, + "step": 12060 + }, + { + "epoch": 0.51, + "learning_rate": 0.0007509423562085269, + "loss": 0.6097, + "step": 12070 + }, + { + "epoch": 0.51, + "learning_rate": 0.0007508999554792342, + "loss": 0.6327, + "step": 12080 + }, + { + "epoch": 0.51, + "learning_rate": 0.0007508575547499417, + "loss": 0.5862, + "step": 12090 + }, + { + "epoch": 0.51, + "learning_rate": 0.0007508151540206493, + "loss": 0.5047, + "step": 12100 + }, + { + "epoch": 0.51, + "learning_rate": 0.0007507727532913566, + "loss": 0.591, + "step": 12110 + }, + { + "epoch": 0.51, + "learning_rate": 0.0007507303525620641, + "loss": 0.6165, + "step": 12120 + }, + { + "epoch": 0.51, + "learning_rate": 0.0007506879518327716, + "loss": 0.6532, + "step": 12130 + }, + { + "epoch": 0.51, + "learning_rate": 0.000750645551103479, + "loss": 0.6097, + "step": 12140 + }, + { + "epoch": 0.51, + "learning_rate": 0.0007506031503741865, + "loss": 0.6374, + "step": 12150 + }, + { + "epoch": 0.51, + "learning_rate": 0.000750560749644894, + "loss": 0.508, + "step": 12160 + }, + { + "epoch": 0.51, + "learning_rate": 0.0007505183489156013, + "loss": 0.5172, + "step": 12170 + }, + { + "epoch": 0.52, + "learning_rate": 0.0007504759481863089, + "loss": 0.7089, + "step": 12180 + }, + { + "epoch": 0.52, + "learning_rate": 0.0007504335474570164, + "loss": 0.6947, + "step": 12190 + }, + { + "epoch": 0.52, + "learning_rate": 0.0007503911467277237, + "loss": 0.5599, + "step": 12200 + }, + { + "epoch": 0.52, + "learning_rate": 0.0007503487459984312, + "loss": 0.6658, + "step": 12210 + }, + { + "epoch": 0.52, + "learning_rate": 0.0007503063452691386, + "loss": 0.5542, + "step": 12220 + }, + { + "epoch": 0.52, + "learning_rate": 0.0007502639445398461, + "loss": 0.5889, + "step": 12230 + }, + { + "epoch": 0.52, + "learning_rate": 0.0007502215438105536, + "loss": 0.6069, + "step": 12240 + }, + { + "epoch": 0.52, + "learning_rate": 0.000750179143081261, + "loss": 0.6269, + "step": 12250 + }, + { + "epoch": 0.52, + "learning_rate": 0.0007501367423519685, + "loss": 0.5793, + "step": 12260 + }, + { + "epoch": 0.52, + "learning_rate": 0.000750094341622676, + "loss": 0.6086, + "step": 12270 + }, + { + "epoch": 0.52, + "learning_rate": 0.0007500519408933834, + "loss": 0.6366, + "step": 12280 + }, + { + "epoch": 0.52, + "learning_rate": 0.0007500095401640908, + "loss": 0.5846, + "step": 12290 + }, + { + "epoch": 0.52, + "learning_rate": 0.0007499671394347984, + "loss": 0.6141, + "step": 12300 + }, + { + "epoch": 0.52, + "learning_rate": 0.0007499247387055058, + "loss": 0.6024, + "step": 12310 + }, + { + "epoch": 0.52, + "learning_rate": 0.0007498823379762132, + "loss": 0.6157, + "step": 12320 + }, + { + "epoch": 0.52, + "learning_rate": 0.0007498399372469207, + "loss": 0.6804, + "step": 12330 + }, + { + "epoch": 0.52, + "learning_rate": 0.0007497975365176281, + "loss": 0.5805, + "step": 12340 + }, + { + "epoch": 0.52, + "learning_rate": 0.0007497551357883356, + "loss": 0.633, + "step": 12350 + }, + { + "epoch": 0.52, + "learning_rate": 0.0007497127350590431, + "loss": 0.6117, + "step": 12360 + }, + { + "epoch": 0.52, + "learning_rate": 0.0007496703343297504, + "loss": 0.6397, + "step": 12370 + }, + { + "epoch": 0.52, + "learning_rate": 0.000749627933600458, + "loss": 0.683, + "step": 12380 + }, + { + "epoch": 0.52, + "learning_rate": 0.0007495855328711655, + "loss": 0.6531, + "step": 12390 + }, + { + "epoch": 0.52, + "learning_rate": 0.0007495431321418728, + "loss": 0.6204, + "step": 12400 + }, + { + "epoch": 0.52, + "learning_rate": 0.0007495007314125803, + "loss": 0.6636, + "step": 12410 + }, + { + "epoch": 0.53, + "learning_rate": 0.0007494583306832878, + "loss": 0.6741, + "step": 12420 + }, + { + "epoch": 0.53, + "learning_rate": 0.0007494159299539952, + "loss": 0.5562, + "step": 12430 + }, + { + "epoch": 0.53, + "learning_rate": 0.0007493735292247027, + "loss": 0.6214, + "step": 12440 + }, + { + "epoch": 0.53, + "learning_rate": 0.0007493311284954102, + "loss": 0.6067, + "step": 12450 + }, + { + "epoch": 0.53, + "learning_rate": 0.0007492887277661176, + "loss": 0.5956, + "step": 12460 + }, + { + "epoch": 0.53, + "learning_rate": 0.0007492463270368251, + "loss": 0.5562, + "step": 12470 + }, + { + "epoch": 0.53, + "learning_rate": 0.0007492039263075326, + "loss": 0.6638, + "step": 12480 + }, + { + "epoch": 0.53, + "learning_rate": 0.0007491615255782399, + "loss": 0.664, + "step": 12490 + }, + { + "epoch": 0.53, + "learning_rate": 0.0007491191248489474, + "loss": 0.684, + "step": 12500 + }, + { + "epoch": 0.53, + "learning_rate": 0.000749076724119655, + "loss": 0.5465, + "step": 12510 + }, + { + "epoch": 0.53, + "learning_rate": 0.0007490343233903623, + "loss": 0.7079, + "step": 12520 + }, + { + "epoch": 0.53, + "learning_rate": 0.0007489919226610698, + "loss": 0.6375, + "step": 12530 + }, + { + "epoch": 0.53, + "learning_rate": 0.0007489495219317773, + "loss": 0.5776, + "step": 12540 + }, + { + "epoch": 0.53, + "learning_rate": 0.0007489071212024847, + "loss": 0.6072, + "step": 12550 + }, + { + "epoch": 0.53, + "learning_rate": 0.0007488647204731922, + "loss": 0.7531, + "step": 12560 + }, + { + "epoch": 0.53, + "learning_rate": 0.0007488223197438997, + "loss": 0.639, + "step": 12570 + }, + { + "epoch": 0.53, + "learning_rate": 0.000748779919014607, + "loss": 0.5889, + "step": 12580 + }, + { + "epoch": 0.53, + "learning_rate": 0.0007487375182853146, + "loss": 0.5962, + "step": 12590 + }, + { + "epoch": 0.53, + "learning_rate": 0.0007486951175560221, + "loss": 0.5892, + "step": 12600 + }, + { + "epoch": 0.53, + "learning_rate": 0.0007486527168267294, + "loss": 0.6007, + "step": 12610 + }, + { + "epoch": 0.53, + "learning_rate": 0.0007486103160974369, + "loss": 0.63, + "step": 12620 + }, + { + "epoch": 0.53, + "learning_rate": 0.0007485679153681445, + "loss": 0.5749, + "step": 12630 + }, + { + "epoch": 0.53, + "learning_rate": 0.0007485255146388518, + "loss": 0.6368, + "step": 12640 + }, + { + "epoch": 0.53, + "learning_rate": 0.0007484831139095593, + "loss": 0.5417, + "step": 12650 + }, + { + "epoch": 0.54, + "learning_rate": 0.0007484407131802666, + "loss": 0.641, + "step": 12660 + }, + { + "epoch": 0.54, + "learning_rate": 0.0007483983124509742, + "loss": 0.5266, + "step": 12670 + }, + { + "epoch": 0.54, + "learning_rate": 0.0007483559117216817, + "loss": 0.6357, + "step": 12680 + }, + { + "epoch": 0.54, + "learning_rate": 0.000748313510992389, + "loss": 0.5623, + "step": 12690 + }, + { + "epoch": 0.54, + "learning_rate": 0.0007482711102630965, + "loss": 0.5317, + "step": 12700 + }, + { + "epoch": 0.54, + "learning_rate": 0.0007482287095338041, + "loss": 0.5722, + "step": 12710 + }, + { + "epoch": 0.54, + "learning_rate": 0.0007481863088045114, + "loss": 0.5847, + "step": 12720 + }, + { + "epoch": 0.54, + "learning_rate": 0.0007481439080752189, + "loss": 0.6959, + "step": 12730 + }, + { + "epoch": 0.54, + "learning_rate": 0.0007481015073459264, + "loss": 0.758, + "step": 12740 + }, + { + "epoch": 0.54, + "learning_rate": 0.0007480591066166338, + "loss": 0.6306, + "step": 12750 + }, + { + "epoch": 0.54, + "learning_rate": 0.0007480167058873413, + "loss": 0.6342, + "step": 12760 + }, + { + "epoch": 0.54, + "learning_rate": 0.0007479743051580488, + "loss": 0.6282, + "step": 12770 + }, + { + "epoch": 0.54, + "learning_rate": 0.0007479319044287561, + "loss": 0.7212, + "step": 12780 + }, + { + "epoch": 0.54, + "learning_rate": 0.0007478895036994637, + "loss": 0.6278, + "step": 12790 + }, + { + "epoch": 0.54, + "learning_rate": 0.0007478471029701712, + "loss": 0.6269, + "step": 12800 + }, + { + "epoch": 0.54, + "learning_rate": 0.0007478047022408785, + "loss": 0.567, + "step": 12810 + }, + { + "epoch": 0.54, + "learning_rate": 0.000747762301511586, + "loss": 0.6161, + "step": 12820 + }, + { + "epoch": 0.54, + "learning_rate": 0.0007477199007822936, + "loss": 0.6015, + "step": 12830 + }, + { + "epoch": 0.54, + "learning_rate": 0.0007476775000530009, + "loss": 0.6014, + "step": 12840 + }, + { + "epoch": 0.54, + "learning_rate": 0.0007476350993237084, + "loss": 0.6351, + "step": 12850 + }, + { + "epoch": 0.54, + "learning_rate": 0.0007475926985944159, + "loss": 0.6446, + "step": 12860 + }, + { + "epoch": 0.54, + "learning_rate": 0.0007475502978651233, + "loss": 0.5391, + "step": 12870 + }, + { + "epoch": 0.54, + "learning_rate": 0.0007475078971358308, + "loss": 0.6163, + "step": 12880 + }, + { + "epoch": 0.55, + "learning_rate": 0.0007474654964065383, + "loss": 0.5519, + "step": 12890 + }, + { + "epoch": 0.55, + "learning_rate": 0.0007474230956772456, + "loss": 0.5735, + "step": 12900 + }, + { + "epoch": 0.55, + "learning_rate": 0.0007473806949479532, + "loss": 0.5562, + "step": 12910 + }, + { + "epoch": 0.55, + "learning_rate": 0.0007473382942186607, + "loss": 0.6746, + "step": 12920 + }, + { + "epoch": 0.55, + "learning_rate": 0.000747295893489368, + "loss": 0.5621, + "step": 12930 + }, + { + "epoch": 0.55, + "learning_rate": 0.0007472534927600755, + "loss": 0.6022, + "step": 12940 + }, + { + "epoch": 0.55, + "learning_rate": 0.000747211092030783, + "loss": 0.5434, + "step": 12950 + }, + { + "epoch": 0.55, + "learning_rate": 0.0007471686913014904, + "loss": 0.6853, + "step": 12960 + }, + { + "epoch": 0.55, + "learning_rate": 0.0007471262905721979, + "loss": 0.6753, + "step": 12970 + }, + { + "epoch": 0.55, + "learning_rate": 0.0007470838898429053, + "loss": 0.58, + "step": 12980 + }, + { + "epoch": 0.55, + "learning_rate": 0.0007470414891136128, + "loss": 0.5815, + "step": 12990 + }, + { + "epoch": 0.55, + "learning_rate": 0.0007469990883843203, + "loss": 0.6307, + "step": 13000 + }, + { + "epoch": 0.55, + "learning_rate": 0.0007469566876550277, + "loss": 0.6604, + "step": 13010 + }, + { + "epoch": 0.55, + "learning_rate": 0.0007469142869257351, + "loss": 0.6728, + "step": 13020 + }, + { + "epoch": 0.55, + "learning_rate": 0.0007468718861964426, + "loss": 0.6022, + "step": 13030 + }, + { + "epoch": 0.55, + "learning_rate": 0.0007468294854671501, + "loss": 0.5398, + "step": 13040 + }, + { + "epoch": 0.55, + "learning_rate": 0.0007467870847378575, + "loss": 0.6341, + "step": 13050 + }, + { + "epoch": 0.55, + "learning_rate": 0.000746744684008565, + "loss": 0.6798, + "step": 13060 + }, + { + "epoch": 0.55, + "learning_rate": 0.0007467022832792725, + "loss": 0.5191, + "step": 13070 + }, + { + "epoch": 0.55, + "learning_rate": 0.0007466598825499799, + "loss": 0.576, + "step": 13080 + }, + { + "epoch": 0.55, + "learning_rate": 0.0007466174818206874, + "loss": 0.5796, + "step": 13090 + }, + { + "epoch": 0.55, + "learning_rate": 0.0007465750810913947, + "loss": 0.6727, + "step": 13100 + }, + { + "epoch": 0.55, + "learning_rate": 0.0007465326803621022, + "loss": 0.5564, + "step": 13110 + }, + { + "epoch": 0.55, + "learning_rate": 0.0007464902796328098, + "loss": 0.5987, + "step": 13120 + }, + { + "epoch": 0.56, + "learning_rate": 0.0007464478789035171, + "loss": 0.6462, + "step": 13130 + }, + { + "epoch": 0.56, + "learning_rate": 0.0007464054781742246, + "loss": 0.6555, + "step": 13140 + }, + { + "epoch": 0.56, + "learning_rate": 0.0007463630774449321, + "loss": 0.7378, + "step": 13150 + }, + { + "epoch": 0.56, + "learning_rate": 0.0007463206767156395, + "loss": 0.5343, + "step": 13160 + }, + { + "epoch": 0.56, + "learning_rate": 0.000746278275986347, + "loss": 0.6108, + "step": 13170 + }, + { + "epoch": 0.56, + "learning_rate": 0.0007462358752570545, + "loss": 0.5885, + "step": 13180 + }, + { + "epoch": 0.56, + "learning_rate": 0.0007461934745277618, + "loss": 0.7035, + "step": 13190 + }, + { + "epoch": 0.56, + "learning_rate": 0.0007461510737984694, + "loss": 0.644, + "step": 13200 + }, + { + "epoch": 0.56, + "learning_rate": 0.0007461086730691769, + "loss": 0.4994, + "step": 13210 + }, + { + "epoch": 0.56, + "learning_rate": 0.0007460662723398842, + "loss": 0.6407, + "step": 13220 + }, + { + "epoch": 0.56, + "learning_rate": 0.0007460238716105917, + "loss": 0.6107, + "step": 13230 + }, + { + "epoch": 0.56, + "learning_rate": 0.0007459814708812993, + "loss": 0.6146, + "step": 13240 + }, + { + "epoch": 0.56, + "learning_rate": 0.0007459390701520066, + "loss": 0.6068, + "step": 13250 + }, + { + "epoch": 0.56, + "learning_rate": 0.0007458966694227141, + "loss": 0.6297, + "step": 13260 + }, + { + "epoch": 0.56, + "learning_rate": 0.0007458542686934215, + "loss": 0.6471, + "step": 13270 + }, + { + "epoch": 0.56, + "learning_rate": 0.000745811867964129, + "loss": 0.6486, + "step": 13280 + }, + { + "epoch": 0.56, + "learning_rate": 0.0007457694672348365, + "loss": 0.6516, + "step": 13290 + }, + { + "epoch": 0.56, + "learning_rate": 0.0007457270665055439, + "loss": 0.6181, + "step": 13300 + }, + { + "epoch": 0.56, + "learning_rate": 0.0007456846657762513, + "loss": 0.6484, + "step": 13310 + }, + { + "epoch": 0.56, + "learning_rate": 0.0007456422650469589, + "loss": 0.5871, + "step": 13320 + }, + { + "epoch": 0.56, + "learning_rate": 0.0007455998643176663, + "loss": 0.691, + "step": 13330 + }, + { + "epoch": 0.56, + "learning_rate": 0.0007455574635883737, + "loss": 0.5649, + "step": 13340 + }, + { + "epoch": 0.56, + "learning_rate": 0.0007455150628590812, + "loss": 0.5493, + "step": 13350 + }, + { + "epoch": 0.56, + "learning_rate": 0.0007454726621297887, + "loss": 0.6166, + "step": 13360 + }, + { + "epoch": 0.57, + "learning_rate": 0.0007454302614004961, + "loss": 0.5412, + "step": 13370 + }, + { + "epoch": 0.57, + "learning_rate": 0.0007453878606712036, + "loss": 0.71, + "step": 13380 + }, + { + "epoch": 0.57, + "learning_rate": 0.000745345459941911, + "loss": 0.5896, + "step": 13390 + }, + { + "epoch": 0.57, + "learning_rate": 0.0007453030592126185, + "loss": 0.6035, + "step": 13400 + }, + { + "epoch": 0.57, + "learning_rate": 0.000745260658483326, + "loss": 0.618, + "step": 13410 + }, + { + "epoch": 0.57, + "learning_rate": 0.0007452182577540334, + "loss": 0.5391, + "step": 13420 + }, + { + "epoch": 0.57, + "learning_rate": 0.0007451758570247408, + "loss": 0.5666, + "step": 13430 + }, + { + "epoch": 0.57, + "learning_rate": 0.0007451334562954484, + "loss": 0.6124, + "step": 13440 + }, + { + "epoch": 0.57, + "learning_rate": 0.0007450910555661558, + "loss": 0.6676, + "step": 13450 + }, + { + "epoch": 0.57, + "learning_rate": 0.0007450486548368632, + "loss": 0.6854, + "step": 13460 + }, + { + "epoch": 0.57, + "learning_rate": 0.0007450062541075707, + "loss": 0.668, + "step": 13470 + }, + { + "epoch": 0.57, + "learning_rate": 0.0007449638533782782, + "loss": 0.6037, + "step": 13480 + }, + { + "epoch": 0.57, + "learning_rate": 0.0007449214526489856, + "loss": 0.7304, + "step": 13490 + }, + { + "epoch": 0.57, + "learning_rate": 0.0007448790519196931, + "loss": 0.6132, + "step": 13500 + }, + { + "epoch": 0.57, + "learning_rate": 0.0007448366511904005, + "loss": 0.643, + "step": 13510 + }, + { + "epoch": 0.57, + "learning_rate": 0.000744794250461108, + "loss": 0.5895, + "step": 13520 + }, + { + "epoch": 0.57, + "learning_rate": 0.0007447518497318155, + "loss": 0.6195, + "step": 13530 + }, + { + "epoch": 0.57, + "learning_rate": 0.0007447094490025229, + "loss": 0.5637, + "step": 13540 + }, + { + "epoch": 0.57, + "learning_rate": 0.0007446670482732303, + "loss": 0.6548, + "step": 13550 + }, + { + "epoch": 0.57, + "learning_rate": 0.0007446246475439378, + "loss": 0.6058, + "step": 13560 + }, + { + "epoch": 0.57, + "learning_rate": 0.0007445822468146453, + "loss": 0.6111, + "step": 13570 + }, + { + "epoch": 0.57, + "learning_rate": 0.0007445398460853527, + "loss": 0.6542, + "step": 13580 + }, + { + "epoch": 0.57, + "learning_rate": 0.0007444974453560601, + "loss": 0.6391, + "step": 13590 + }, + { + "epoch": 0.58, + "learning_rate": 0.0007444550446267677, + "loss": 0.676, + "step": 13600 + }, + { + "epoch": 0.58, + "learning_rate": 0.0007444126438974751, + "loss": 0.8043, + "step": 13610 + }, + { + "epoch": 0.58, + "learning_rate": 0.0007443702431681825, + "loss": 0.5862, + "step": 13620 + }, + { + "epoch": 0.58, + "learning_rate": 0.00074432784243889, + "loss": 0.5719, + "step": 13630 + }, + { + "epoch": 0.58, + "learning_rate": 0.0007442854417095974, + "loss": 0.5825, + "step": 13640 + }, + { + "epoch": 0.58, + "learning_rate": 0.0007442430409803049, + "loss": 0.5895, + "step": 13650 + }, + { + "epoch": 0.58, + "learning_rate": 0.0007442006402510123, + "loss": 0.6612, + "step": 13660 + }, + { + "epoch": 0.58, + "learning_rate": 0.0007441582395217198, + "loss": 0.6712, + "step": 13670 + }, + { + "epoch": 0.58, + "learning_rate": 0.0007441158387924273, + "loss": 0.648, + "step": 13680 + }, + { + "epoch": 0.58, + "learning_rate": 0.0007440734380631347, + "loss": 0.5609, + "step": 13690 + }, + { + "epoch": 0.58, + "learning_rate": 0.0007440310373338422, + "loss": 0.6083, + "step": 13700 + }, + { + "epoch": 0.58, + "learning_rate": 0.0007439886366045496, + "loss": 0.5155, + "step": 13710 + }, + { + "epoch": 0.58, + "learning_rate": 0.000743946235875257, + "loss": 0.6379, + "step": 13720 + }, + { + "epoch": 0.58, + "learning_rate": 0.0007439038351459646, + "loss": 0.6733, + "step": 13730 + }, + { + "epoch": 0.58, + "learning_rate": 0.000743861434416672, + "loss": 0.6182, + "step": 13740 + }, + { + "epoch": 0.58, + "learning_rate": 0.0007438190336873794, + "loss": 0.7276, + "step": 13750 + }, + { + "epoch": 0.58, + "learning_rate": 0.0007437766329580869, + "loss": 0.6636, + "step": 13760 + }, + { + "epoch": 0.58, + "learning_rate": 0.0007437342322287944, + "loss": 0.5745, + "step": 13770 + }, + { + "epoch": 0.58, + "learning_rate": 0.0007436918314995018, + "loss": 0.7242, + "step": 13780 + }, + { + "epoch": 0.58, + "learning_rate": 0.0007436494307702093, + "loss": 0.6628, + "step": 13790 + }, + { + "epoch": 0.58, + "learning_rate": 0.0007436070300409168, + "loss": 0.5946, + "step": 13800 + }, + { + "epoch": 0.58, + "learning_rate": 0.0007435646293116242, + "loss": 0.5858, + "step": 13810 + }, + { + "epoch": 0.58, + "learning_rate": 0.0007435222285823317, + "loss": 0.5847, + "step": 13820 + }, + { + "epoch": 0.58, + "learning_rate": 0.0007434798278530391, + "loss": 0.606, + "step": 13830 + }, + { + "epoch": 0.59, + "learning_rate": 0.0007434374271237465, + "loss": 0.6959, + "step": 13840 + }, + { + "epoch": 0.59, + "learning_rate": 0.0007433950263944541, + "loss": 0.7436, + "step": 13850 + }, + { + "epoch": 0.59, + "learning_rate": 0.0007433526256651615, + "loss": 0.5982, + "step": 13860 + }, + { + "epoch": 0.59, + "learning_rate": 0.0007433102249358689, + "loss": 0.6031, + "step": 13870 + }, + { + "epoch": 0.59, + "learning_rate": 0.0007432678242065764, + "loss": 0.5581, + "step": 13880 + }, + { + "epoch": 0.59, + "learning_rate": 0.0007432254234772839, + "loss": 0.6424, + "step": 13890 + }, + { + "epoch": 0.59, + "learning_rate": 0.0007431830227479913, + "loss": 0.5949, + "step": 13900 + }, + { + "epoch": 0.59, + "learning_rate": 0.0007431406220186987, + "loss": 0.731, + "step": 13910 + }, + { + "epoch": 0.59, + "learning_rate": 0.0007430982212894062, + "loss": 0.6413, + "step": 13920 + }, + { + "epoch": 0.59, + "learning_rate": 0.0007430558205601137, + "loss": 0.5959, + "step": 13930 + }, + { + "epoch": 0.59, + "learning_rate": 0.0007430134198308211, + "loss": 0.6236, + "step": 13940 + }, + { + "epoch": 0.59, + "learning_rate": 0.0007429710191015286, + "loss": 0.5956, + "step": 13950 + }, + { + "epoch": 0.59, + "learning_rate": 0.000742928618372236, + "loss": 0.6389, + "step": 13960 + }, + { + "epoch": 0.59, + "learning_rate": 0.0007428862176429435, + "loss": 0.7018, + "step": 13970 + }, + { + "epoch": 0.59, + "learning_rate": 0.000742843816913651, + "loss": 0.5867, + "step": 13980 + }, + { + "epoch": 0.59, + "learning_rate": 0.0007428014161843584, + "loss": 0.562, + "step": 13990 + }, + { + "epoch": 0.59, + "learning_rate": 0.0007427590154550658, + "loss": 0.706, + "step": 14000 + }, + { + "epoch": 0.59, + "learning_rate": 0.0007427166147257734, + "loss": 0.6874, + "step": 14010 + }, + { + "epoch": 0.59, + "learning_rate": 0.0007426742139964808, + "loss": 0.561, + "step": 14020 + }, + { + "epoch": 0.59, + "learning_rate": 0.0007426318132671882, + "loss": 0.7459, + "step": 14030 + }, + { + "epoch": 0.59, + "learning_rate": 0.0007425894125378957, + "loss": 0.6497, + "step": 14040 + }, + { + "epoch": 0.59, + "learning_rate": 0.0007425470118086032, + "loss": 0.7178, + "step": 14050 + }, + { + "epoch": 0.59, + "learning_rate": 0.0007425046110793106, + "loss": 0.5097, + "step": 14060 + }, + { + "epoch": 0.6, + "learning_rate": 0.000742462210350018, + "loss": 0.5795, + "step": 14070 + }, + { + "epoch": 0.6, + "learning_rate": 0.0007424198096207255, + "loss": 0.6202, + "step": 14080 + }, + { + "epoch": 0.6, + "learning_rate": 0.000742377408891433, + "loss": 0.4939, + "step": 14090 + }, + { + "epoch": 0.6, + "learning_rate": 0.0007423350081621405, + "loss": 0.6458, + "step": 14100 + }, + { + "epoch": 0.6, + "learning_rate": 0.0007422926074328479, + "loss": 0.7222, + "step": 14110 + }, + { + "epoch": 0.6, + "learning_rate": 0.0007422502067035553, + "loss": 0.613, + "step": 14120 + }, + { + "epoch": 0.6, + "learning_rate": 0.0007422078059742629, + "loss": 0.627, + "step": 14130 + }, + { + "epoch": 0.6, + "learning_rate": 0.0007421654052449703, + "loss": 0.6498, + "step": 14140 + }, + { + "epoch": 0.6, + "learning_rate": 0.0007421230045156777, + "loss": 0.6762, + "step": 14150 + }, + { + "epoch": 0.6, + "learning_rate": 0.0007420806037863851, + "loss": 0.5524, + "step": 14160 + }, + { + "epoch": 0.6, + "learning_rate": 0.0007420382030570926, + "loss": 0.6521, + "step": 14170 + }, + { + "epoch": 0.6, + "learning_rate": 0.0007419958023278001, + "loss": 0.6267, + "step": 14180 + }, + { + "epoch": 0.6, + "learning_rate": 0.0007419534015985075, + "loss": 0.6594, + "step": 14190 + }, + { + "epoch": 0.6, + "learning_rate": 0.0007419110008692149, + "loss": 0.5393, + "step": 14200 + }, + { + "epoch": 0.6, + "learning_rate": 0.0007418686001399225, + "loss": 0.6549, + "step": 14210 + }, + { + "epoch": 0.6, + "learning_rate": 0.0007418261994106299, + "loss": 0.6943, + "step": 14220 + }, + { + "epoch": 0.6, + "learning_rate": 0.0007417837986813373, + "loss": 0.6557, + "step": 14230 + }, + { + "epoch": 0.6, + "learning_rate": 0.0007417413979520448, + "loss": 0.5756, + "step": 14240 + }, + { + "epoch": 0.6, + "learning_rate": 0.0007416989972227522, + "loss": 0.6499, + "step": 14250 + }, + { + "epoch": 0.6, + "learning_rate": 0.0007416565964934597, + "loss": 0.5379, + "step": 14260 + }, + { + "epoch": 0.6, + "learning_rate": 0.0007416141957641672, + "loss": 0.6383, + "step": 14270 + }, + { + "epoch": 0.6, + "learning_rate": 0.0007415717950348746, + "loss": 0.5249, + "step": 14280 + }, + { + "epoch": 0.6, + "learning_rate": 0.0007415293943055821, + "loss": 0.6645, + "step": 14290 + }, + { + "epoch": 0.6, + "learning_rate": 0.0007414869935762896, + "loss": 0.6242, + "step": 14300 + }, + { + "epoch": 0.61, + "learning_rate": 0.000741444592846997, + "loss": 0.6758, + "step": 14310 + }, + { + "epoch": 0.61, + "learning_rate": 0.0007414021921177044, + "loss": 0.6297, + "step": 14320 + }, + { + "epoch": 0.61, + "learning_rate": 0.000741359791388412, + "loss": 0.7331, + "step": 14330 + }, + { + "epoch": 0.61, + "learning_rate": 0.0007413173906591194, + "loss": 0.5878, + "step": 14340 + }, + { + "epoch": 0.61, + "learning_rate": 0.0007412749899298268, + "loss": 0.6685, + "step": 14350 + }, + { + "epoch": 0.61, + "learning_rate": 0.0007412325892005343, + "loss": 0.6424, + "step": 14360 + }, + { + "epoch": 0.61, + "learning_rate": 0.0007411901884712417, + "loss": 0.5827, + "step": 14370 + }, + { + "epoch": 0.61, + "learning_rate": 0.0007411477877419492, + "loss": 0.6836, + "step": 14380 + }, + { + "epoch": 0.61, + "learning_rate": 0.0007411053870126567, + "loss": 0.6079, + "step": 14390 + }, + { + "epoch": 0.61, + "learning_rate": 0.0007410629862833641, + "loss": 0.6642, + "step": 14400 + }, + { + "epoch": 0.61, + "learning_rate": 0.0007410205855540716, + "loss": 0.5933, + "step": 14410 + }, + { + "epoch": 0.61, + "learning_rate": 0.0007409781848247791, + "loss": 0.5551, + "step": 14420 + }, + { + "epoch": 0.61, + "learning_rate": 0.0007409357840954865, + "loss": 0.6871, + "step": 14430 + }, + { + "epoch": 0.61, + "learning_rate": 0.0007408933833661939, + "loss": 0.6032, + "step": 14440 + }, + { + "epoch": 0.61, + "learning_rate": 0.0007408509826369014, + "loss": 0.688, + "step": 14450 + }, + { + "epoch": 0.61, + "learning_rate": 0.0007408085819076089, + "loss": 0.7228, + "step": 14460 + }, + { + "epoch": 0.61, + "learning_rate": 0.0007407661811783163, + "loss": 0.6899, + "step": 14470 + }, + { + "epoch": 0.61, + "learning_rate": 0.0007407237804490238, + "loss": 0.5755, + "step": 14480 + }, + { + "epoch": 0.61, + "learning_rate": 0.0007406813797197312, + "loss": 0.6081, + "step": 14490 + }, + { + "epoch": 0.61, + "learning_rate": 0.0007406389789904387, + "loss": 0.6162, + "step": 14500 + }, + { + "epoch": 0.61, + "learning_rate": 0.0007405965782611462, + "loss": 0.7299, + "step": 14510 + }, + { + "epoch": 0.61, + "learning_rate": 0.0007405541775318535, + "loss": 0.5963, + "step": 14520 + }, + { + "epoch": 0.61, + "learning_rate": 0.000740511776802561, + "loss": 0.6647, + "step": 14530 + }, + { + "epoch": 0.61, + "learning_rate": 0.0007404693760732686, + "loss": 0.6602, + "step": 14540 + }, + { + "epoch": 0.62, + "learning_rate": 0.0007404269753439759, + "loss": 0.6536, + "step": 14550 + }, + { + "epoch": 0.62, + "learning_rate": 0.0007403845746146834, + "loss": 0.565, + "step": 14560 + }, + { + "epoch": 0.62, + "learning_rate": 0.0007403421738853908, + "loss": 0.5678, + "step": 14570 + }, + { + "epoch": 0.62, + "learning_rate": 0.0007402997731560983, + "loss": 0.6437, + "step": 14580 + }, + { + "epoch": 0.62, + "learning_rate": 0.0007402573724268058, + "loss": 0.5605, + "step": 14590 + }, + { + "epoch": 0.62, + "learning_rate": 0.0007402149716975132, + "loss": 0.5891, + "step": 14600 + }, + { + "epoch": 0.62, + "learning_rate": 0.0007401725709682206, + "loss": 0.7367, + "step": 14610 + }, + { + "epoch": 0.62, + "learning_rate": 0.0007401301702389282, + "loss": 0.6042, + "step": 14620 + }, + { + "epoch": 0.62, + "learning_rate": 0.0007400877695096356, + "loss": 0.6288, + "step": 14630 + }, + { + "epoch": 0.62, + "learning_rate": 0.000740045368780343, + "loss": 0.5818, + "step": 14640 + }, + { + "epoch": 0.62, + "learning_rate": 0.0007400029680510505, + "loss": 0.6382, + "step": 14650 + }, + { + "epoch": 0.62, + "learning_rate": 0.000739960567321758, + "loss": 0.5987, + "step": 14660 + }, + { + "epoch": 0.62, + "learning_rate": 0.0007399181665924654, + "loss": 0.6191, + "step": 14670 + }, + { + "epoch": 0.62, + "learning_rate": 0.0007398757658631729, + "loss": 0.6835, + "step": 14680 + }, + { + "epoch": 0.62, + "learning_rate": 0.0007398333651338803, + "loss": 0.6823, + "step": 14690 + }, + { + "epoch": 0.62, + "learning_rate": 0.0007397909644045878, + "loss": 0.6701, + "step": 14700 + }, + { + "epoch": 0.62, + "learning_rate": 0.0007397485636752953, + "loss": 0.489, + "step": 14710 + }, + { + "epoch": 0.62, + "learning_rate": 0.0007397061629460027, + "loss": 0.6434, + "step": 14720 + }, + { + "epoch": 0.62, + "learning_rate": 0.0007396637622167101, + "loss": 0.6888, + "step": 14730 + }, + { + "epoch": 0.62, + "learning_rate": 0.0007396213614874177, + "loss": 0.5761, + "step": 14740 + }, + { + "epoch": 0.62, + "learning_rate": 0.0007395789607581251, + "loss": 0.605, + "step": 14750 + }, + { + "epoch": 0.62, + "learning_rate": 0.0007395365600288325, + "loss": 0.6319, + "step": 14760 + }, + { + "epoch": 0.62, + "learning_rate": 0.00073949415929954, + "loss": 0.692, + "step": 14770 + }, + { + "epoch": 0.63, + "learning_rate": 0.0007394517585702474, + "loss": 0.6627, + "step": 14780 + }, + { + "epoch": 0.63, + "learning_rate": 0.0007394093578409549, + "loss": 0.5894, + "step": 14790 + }, + { + "epoch": 0.63, + "learning_rate": 0.0007393669571116624, + "loss": 0.6542, + "step": 14800 + }, + { + "epoch": 0.63, + "learning_rate": 0.0007393245563823697, + "loss": 0.6613, + "step": 14810 + }, + { + "epoch": 0.63, + "learning_rate": 0.0007392821556530773, + "loss": 0.5248, + "step": 14820 + }, + { + "epoch": 0.63, + "learning_rate": 0.0007392397549237848, + "loss": 0.5872, + "step": 14830 + }, + { + "epoch": 0.63, + "learning_rate": 0.0007391973541944921, + "loss": 0.6033, + "step": 14840 + }, + { + "epoch": 0.63, + "learning_rate": 0.0007391549534651996, + "loss": 0.5938, + "step": 14850 + }, + { + "epoch": 0.63, + "learning_rate": 0.0007391125527359072, + "loss": 0.6402, + "step": 14860 + }, + { + "epoch": 0.63, + "learning_rate": 0.0007390701520066145, + "loss": 0.6271, + "step": 14870 + }, + { + "epoch": 0.63, + "learning_rate": 0.000739027751277322, + "loss": 0.643, + "step": 14880 + }, + { + "epoch": 0.63, + "learning_rate": 0.0007389853505480295, + "loss": 0.6084, + "step": 14890 + }, + { + "epoch": 0.63, + "learning_rate": 0.0007389429498187369, + "loss": 0.5807, + "step": 14900 + }, + { + "epoch": 0.63, + "learning_rate": 0.0007389005490894444, + "loss": 0.6567, + "step": 14910 + }, + { + "epoch": 0.63, + "learning_rate": 0.0007388581483601519, + "loss": 0.6591, + "step": 14920 + }, + { + "epoch": 0.63, + "learning_rate": 0.0007388157476308592, + "loss": 0.5358, + "step": 14930 + }, + { + "epoch": 0.63, + "learning_rate": 0.0007387733469015668, + "loss": 0.597, + "step": 14940 + }, + { + "epoch": 0.63, + "learning_rate": 0.0007387309461722743, + "loss": 0.6052, + "step": 14950 + }, + { + "epoch": 0.63, + "learning_rate": 0.0007386885454429816, + "loss": 0.5493, + "step": 14960 + }, + { + "epoch": 0.63, + "learning_rate": 0.0007386461447136891, + "loss": 0.645, + "step": 14970 + }, + { + "epoch": 0.63, + "learning_rate": 0.0007386037439843966, + "loss": 0.6871, + "step": 14980 + }, + { + "epoch": 0.63, + "learning_rate": 0.000738561343255104, + "loss": 0.7201, + "step": 14990 + }, + { + "epoch": 0.63, + "learning_rate": 0.0007385189425258115, + "loss": 0.604, + "step": 15000 + }, + { + "epoch": 0.63, + "learning_rate": 0.000738476541796519, + "loss": 0.6668, + "step": 15010 + }, + { + "epoch": 0.64, + "learning_rate": 0.0007384341410672264, + "loss": 0.5855, + "step": 15020 + }, + { + "epoch": 0.64, + "learning_rate": 0.0007383917403379339, + "loss": 0.6276, + "step": 15030 + }, + { + "epoch": 0.64, + "learning_rate": 0.0007383493396086414, + "loss": 0.5422, + "step": 15040 + }, + { + "epoch": 0.64, + "learning_rate": 0.0007383069388793487, + "loss": 0.6336, + "step": 15050 + }, + { + "epoch": 0.64, + "learning_rate": 0.0007382645381500562, + "loss": 0.6637, + "step": 15060 + }, + { + "epoch": 0.64, + "learning_rate": 0.0007382221374207638, + "loss": 0.6478, + "step": 15070 + }, + { + "epoch": 0.64, + "learning_rate": 0.0007381797366914711, + "loss": 0.5825, + "step": 15080 + }, + { + "epoch": 0.64, + "learning_rate": 0.0007381373359621786, + "loss": 0.645, + "step": 15090 + }, + { + "epoch": 0.64, + "learning_rate": 0.000738094935232886, + "loss": 0.6042, + "step": 15100 + }, + { + "epoch": 0.64, + "learning_rate": 0.0007380525345035935, + "loss": 0.748, + "step": 15110 + }, + { + "epoch": 0.64, + "learning_rate": 0.000738010133774301, + "loss": 0.5841, + "step": 15120 + }, + { + "epoch": 0.64, + "learning_rate": 0.0007379677330450083, + "loss": 0.5532, + "step": 15130 + }, + { + "epoch": 0.64, + "learning_rate": 0.0007379253323157158, + "loss": 0.561, + "step": 15140 + }, + { + "epoch": 0.64, + "learning_rate": 0.0007378829315864234, + "loss": 0.666, + "step": 15150 + }, + { + "epoch": 0.64, + "learning_rate": 0.0007378405308571307, + "loss": 0.6019, + "step": 15160 + }, + { + "epoch": 0.64, + "learning_rate": 0.0007377981301278382, + "loss": 0.5926, + "step": 15170 + }, + { + "epoch": 0.64, + "learning_rate": 0.0007377557293985457, + "loss": 0.5976, + "step": 15180 + }, + { + "epoch": 0.64, + "learning_rate": 0.0007377133286692531, + "loss": 0.5917, + "step": 15190 + }, + { + "epoch": 0.64, + "learning_rate": 0.0007376709279399606, + "loss": 0.5789, + "step": 15200 + }, + { + "epoch": 0.64, + "learning_rate": 0.0007376285272106681, + "loss": 0.5984, + "step": 15210 + }, + { + "epoch": 0.64, + "learning_rate": 0.0007375861264813754, + "loss": 0.5242, + "step": 15220 + }, + { + "epoch": 0.64, + "learning_rate": 0.000737543725752083, + "loss": 0.5126, + "step": 15230 + }, + { + "epoch": 0.64, + "learning_rate": 0.0007375013250227905, + "loss": 0.6455, + "step": 15240 + }, + { + "epoch": 0.64, + "learning_rate": 0.0007374589242934978, + "loss": 0.7547, + "step": 15250 + }, + { + "epoch": 0.65, + "learning_rate": 0.0007374165235642053, + "loss": 0.6853, + "step": 15260 + }, + { + "epoch": 0.65, + "learning_rate": 0.0007373741228349129, + "loss": 0.6655, + "step": 15270 + }, + { + "epoch": 0.65, + "learning_rate": 0.0007373317221056202, + "loss": 0.5715, + "step": 15280 + }, + { + "epoch": 0.65, + "learning_rate": 0.0007372893213763277, + "loss": 0.6872, + "step": 15290 + }, + { + "epoch": 0.65, + "learning_rate": 0.0007372469206470352, + "loss": 0.5997, + "step": 15300 + }, + { + "epoch": 0.65, + "learning_rate": 0.0007372045199177426, + "loss": 0.6356, + "step": 15310 + }, + { + "epoch": 0.65, + "learning_rate": 0.0007371621191884501, + "loss": 0.6224, + "step": 15320 + }, + { + "epoch": 0.65, + "learning_rate": 0.0007371197184591576, + "loss": 0.5724, + "step": 15330 + }, + { + "epoch": 0.65, + "learning_rate": 0.0007370773177298649, + "loss": 0.7233, + "step": 15340 + }, + { + "epoch": 0.65, + "learning_rate": 0.0007370349170005725, + "loss": 0.5913, + "step": 15350 + }, + { + "epoch": 0.65, + "learning_rate": 0.00073699251627128, + "loss": 0.5893, + "step": 15360 + }, + { + "epoch": 0.65, + "learning_rate": 0.0007369501155419873, + "loss": 0.6799, + "step": 15370 + }, + { + "epoch": 0.65, + "learning_rate": 0.0007369077148126948, + "loss": 0.6619, + "step": 15380 + }, + { + "epoch": 0.65, + "learning_rate": 0.0007368653140834024, + "loss": 0.632, + "step": 15390 + }, + { + "epoch": 0.65, + "learning_rate": 0.0007368229133541097, + "loss": 0.5894, + "step": 15400 + }, + { + "epoch": 0.65, + "learning_rate": 0.0007367805126248172, + "loss": 0.6487, + "step": 15410 + }, + { + "epoch": 0.65, + "learning_rate": 0.0007367381118955245, + "loss": 0.6319, + "step": 15420 + }, + { + "epoch": 0.65, + "learning_rate": 0.0007366957111662321, + "loss": 0.6552, + "step": 15430 + }, + { + "epoch": 0.65, + "learning_rate": 0.0007366533104369396, + "loss": 0.6324, + "step": 15440 + }, + { + "epoch": 0.65, + "learning_rate": 0.000736610909707647, + "loss": 0.6085, + "step": 15450 + }, + { + "epoch": 0.65, + "learning_rate": 0.0007365685089783544, + "loss": 0.548, + "step": 15460 + }, + { + "epoch": 0.65, + "learning_rate": 0.000736526108249062, + "loss": 0.6562, + "step": 15470 + }, + { + "epoch": 0.65, + "learning_rate": 0.0007364837075197694, + "loss": 0.6543, + "step": 15480 + }, + { + "epoch": 0.66, + "learning_rate": 0.0007364413067904768, + "loss": 0.6441, + "step": 15490 + }, + { + "epoch": 0.66, + "learning_rate": 0.0007363989060611843, + "loss": 0.6044, + "step": 15500 + }, + { + "epoch": 0.66, + "learning_rate": 0.0007363565053318918, + "loss": 0.6424, + "step": 15510 + }, + { + "epoch": 0.66, + "learning_rate": 0.0007363141046025992, + "loss": 0.6432, + "step": 15520 + }, + { + "epoch": 0.66, + "learning_rate": 0.0007362717038733067, + "loss": 0.5247, + "step": 15530 + }, + { + "epoch": 0.66, + "learning_rate": 0.000736229303144014, + "loss": 0.5856, + "step": 15540 + }, + { + "epoch": 0.66, + "learning_rate": 0.0007361869024147216, + "loss": 0.5221, + "step": 15550 + }, + { + "epoch": 0.66, + "learning_rate": 0.0007361445016854291, + "loss": 0.6036, + "step": 15560 + }, + { + "epoch": 0.66, + "learning_rate": 0.0007361021009561364, + "loss": 0.6727, + "step": 15570 + }, + { + "epoch": 0.66, + "learning_rate": 0.0007360597002268439, + "loss": 0.5766, + "step": 15580 + }, + { + "epoch": 0.66, + "learning_rate": 0.0007360172994975514, + "loss": 0.5932, + "step": 15590 + }, + { + "epoch": 0.66, + "learning_rate": 0.0007359748987682588, + "loss": 0.7062, + "step": 15600 + }, + { + "epoch": 0.66, + "learning_rate": 0.0007359324980389663, + "loss": 0.5923, + "step": 15610 + }, + { + "epoch": 0.66, + "learning_rate": 0.0007358900973096738, + "loss": 0.4876, + "step": 15620 + }, + { + "epoch": 0.66, + "learning_rate": 0.0007358476965803812, + "loss": 0.614, + "step": 15630 + }, + { + "epoch": 0.66, + "learning_rate": 0.0007358052958510887, + "loss": 0.5965, + "step": 15640 + }, + { + "epoch": 0.66, + "learning_rate": 0.0007357628951217962, + "loss": 0.6867, + "step": 15650 + }, + { + "epoch": 0.66, + "learning_rate": 0.0007357204943925035, + "loss": 0.632, + "step": 15660 + }, + { + "epoch": 0.66, + "learning_rate": 0.000735678093663211, + "loss": 0.5183, + "step": 15670 + }, + { + "epoch": 0.66, + "learning_rate": 0.0007356356929339186, + "loss": 0.578, + "step": 15680 + }, + { + "epoch": 0.66, + "learning_rate": 0.0007355932922046259, + "loss": 0.7461, + "step": 15690 + }, + { + "epoch": 0.66, + "learning_rate": 0.0007355508914753334, + "loss": 0.5942, + "step": 15700 + }, + { + "epoch": 0.66, + "learning_rate": 0.0007355084907460409, + "loss": 0.5023, + "step": 15710 + }, + { + "epoch": 0.66, + "learning_rate": 0.0007354660900167483, + "loss": 0.5746, + "step": 15720 + }, + { + "epoch": 0.67, + "learning_rate": 0.0007354236892874558, + "loss": 0.5986, + "step": 15730 + }, + { + "epoch": 0.67, + "learning_rate": 0.0007353812885581632, + "loss": 0.5728, + "step": 15740 + }, + { + "epoch": 0.67, + "learning_rate": 0.0007353388878288706, + "loss": 0.6462, + "step": 15750 + }, + { + "epoch": 0.67, + "learning_rate": 0.0007352964870995782, + "loss": 0.604, + "step": 15760 + }, + { + "epoch": 0.67, + "learning_rate": 0.0007352540863702856, + "loss": 0.6255, + "step": 15770 + }, + { + "epoch": 0.67, + "learning_rate": 0.000735211685640993, + "loss": 0.7649, + "step": 15780 + }, + { + "epoch": 0.67, + "learning_rate": 0.0007351692849117005, + "loss": 0.5214, + "step": 15790 + }, + { + "epoch": 0.67, + "learning_rate": 0.000735126884182408, + "loss": 0.6155, + "step": 15800 + }, + { + "epoch": 0.67, + "learning_rate": 0.0007350844834531154, + "loss": 0.698, + "step": 15810 + }, + { + "epoch": 0.67, + "learning_rate": 0.0007350420827238229, + "loss": 0.6477, + "step": 15820 + }, + { + "epoch": 0.67, + "learning_rate": 0.0007349996819945304, + "loss": 0.6177, + "step": 15830 + }, + { + "epoch": 0.67, + "learning_rate": 0.0007349572812652378, + "loss": 0.6556, + "step": 15840 + }, + { + "epoch": 0.67, + "learning_rate": 0.0007349148805359453, + "loss": 0.6655, + "step": 15850 + }, + { + "epoch": 0.67, + "learning_rate": 0.0007348724798066527, + "loss": 0.5396, + "step": 15860 + }, + { + "epoch": 0.67, + "learning_rate": 0.0007348300790773601, + "loss": 0.631, + "step": 15870 + }, + { + "epoch": 0.67, + "learning_rate": 0.0007347876783480677, + "loss": 0.6247, + "step": 15880 + }, + { + "epoch": 0.67, + "learning_rate": 0.0007347452776187751, + "loss": 0.6959, + "step": 15890 + }, + { + "epoch": 0.67, + "learning_rate": 0.0007347028768894825, + "loss": 0.6202, + "step": 15900 + }, + { + "epoch": 0.67, + "learning_rate": 0.00073466047616019, + "loss": 0.573, + "step": 15910 + }, + { + "epoch": 0.67, + "learning_rate": 0.0007346180754308975, + "loss": 0.6539, + "step": 15920 + }, + { + "epoch": 0.67, + "learning_rate": 0.0007345756747016049, + "loss": 0.5947, + "step": 15930 + }, + { + "epoch": 0.67, + "learning_rate": 0.0007345332739723124, + "loss": 0.7541, + "step": 15940 + }, + { + "epoch": 0.67, + "learning_rate": 0.0007344908732430197, + "loss": 0.6038, + "step": 15950 + }, + { + "epoch": 0.67, + "learning_rate": 0.0007344484725137273, + "loss": 0.7052, + "step": 15960 + }, + { + "epoch": 0.68, + "learning_rate": 0.0007344060717844348, + "loss": 0.5246, + "step": 15970 + }, + { + "epoch": 0.68, + "learning_rate": 0.0007343636710551421, + "loss": 0.5291, + "step": 15980 + }, + { + "epoch": 0.68, + "learning_rate": 0.0007343212703258496, + "loss": 0.6291, + "step": 15990 + }, + { + "epoch": 0.68, + "learning_rate": 0.0007342788695965572, + "loss": 0.5963, + "step": 16000 + }, + { + "epoch": 0.68, + "learning_rate": 0.0007342364688672645, + "loss": 0.5817, + "step": 16010 + }, + { + "epoch": 0.68, + "learning_rate": 0.000734194068137972, + "loss": 0.5988, + "step": 16020 + }, + { + "epoch": 0.68, + "learning_rate": 0.0007341516674086794, + "loss": 0.5936, + "step": 16030 + }, + { + "epoch": 0.68, + "learning_rate": 0.000734109266679387, + "loss": 0.5444, + "step": 16040 + }, + { + "epoch": 0.68, + "learning_rate": 0.0007340668659500944, + "loss": 0.5372, + "step": 16050 + }, + { + "epoch": 0.68, + "learning_rate": 0.0007340244652208018, + "loss": 0.6743, + "step": 16060 + }, + { + "epoch": 0.68, + "learning_rate": 0.0007339820644915092, + "loss": 0.6394, + "step": 16070 + }, + { + "epoch": 0.68, + "learning_rate": 0.0007339396637622168, + "loss": 0.5999, + "step": 16080 + }, + { + "epoch": 0.68, + "learning_rate": 0.0007338972630329242, + "loss": 0.5811, + "step": 16090 + }, + { + "epoch": 0.68, + "learning_rate": 0.0007338548623036316, + "loss": 0.6296, + "step": 16100 + }, + { + "epoch": 0.68, + "learning_rate": 0.0007338124615743391, + "loss": 0.6402, + "step": 16110 + }, + { + "epoch": 0.68, + "learning_rate": 0.0007337700608450466, + "loss": 0.605, + "step": 16120 + }, + { + "epoch": 0.68, + "learning_rate": 0.000733727660115754, + "loss": 0.6436, + "step": 16130 + }, + { + "epoch": 0.68, + "learning_rate": 0.0007336852593864615, + "loss": 0.6466, + "step": 16140 + }, + { + "epoch": 0.68, + "learning_rate": 0.0007336428586571689, + "loss": 0.7211, + "step": 16150 + }, + { + "epoch": 0.68, + "learning_rate": 0.0007336004579278764, + "loss": 0.6473, + "step": 16160 + }, + { + "epoch": 0.68, + "learning_rate": 0.0007335580571985839, + "loss": 0.5904, + "step": 16170 + }, + { + "epoch": 0.68, + "learning_rate": 0.0007335156564692913, + "loss": 0.6351, + "step": 16180 + }, + { + "epoch": 0.68, + "learning_rate": 0.0007334732557399987, + "loss": 0.7375, + "step": 16190 + }, + { + "epoch": 0.69, + "learning_rate": 0.0007334308550107062, + "loss": 0.6142, + "step": 16200 + }, + { + "epoch": 0.69, + "learning_rate": 0.0007333884542814137, + "loss": 0.6657, + "step": 16210 + }, + { + "epoch": 0.69, + "learning_rate": 0.0007333460535521211, + "loss": 0.6089, + "step": 16220 + }, + { + "epoch": 0.69, + "learning_rate": 0.0007333036528228286, + "loss": 0.6561, + "step": 16230 + }, + { + "epoch": 0.69, + "learning_rate": 0.0007332612520935361, + "loss": 0.6024, + "step": 16240 + }, + { + "epoch": 0.69, + "learning_rate": 0.0007332188513642435, + "loss": 0.6674, + "step": 16250 + }, + { + "epoch": 0.69, + "learning_rate": 0.000733176450634951, + "loss": 0.6292, + "step": 16260 + }, + { + "epoch": 0.69, + "learning_rate": 0.0007331340499056584, + "loss": 0.6667, + "step": 16270 + }, + { + "epoch": 0.69, + "learning_rate": 0.0007330916491763658, + "loss": 0.6541, + "step": 16280 + }, + { + "epoch": 0.69, + "learning_rate": 0.0007330492484470734, + "loss": 0.521, + "step": 16290 + }, + { + "epoch": 0.69, + "learning_rate": 0.0007330068477177808, + "loss": 0.6272, + "step": 16300 + }, + { + "epoch": 0.69, + "learning_rate": 0.0007329644469884882, + "loss": 0.6003, + "step": 16310 + }, + { + "epoch": 0.69, + "learning_rate": 0.0007329220462591957, + "loss": 0.6223, + "step": 16320 + }, + { + "epoch": 0.69, + "learning_rate": 0.0007328796455299032, + "loss": 0.5614, + "step": 16330 + }, + { + "epoch": 0.69, + "learning_rate": 0.0007328372448006106, + "loss": 0.6057, + "step": 16340 + }, + { + "epoch": 0.69, + "learning_rate": 0.000732794844071318, + "loss": 0.675, + "step": 16350 + }, + { + "epoch": 0.69, + "learning_rate": 0.0007327524433420256, + "loss": 0.6642, + "step": 16360 + }, + { + "epoch": 0.69, + "learning_rate": 0.000732710042612733, + "loss": 0.6538, + "step": 16370 + }, + { + "epoch": 0.69, + "learning_rate": 0.0007326676418834404, + "loss": 0.703, + "step": 16380 + }, + { + "epoch": 0.69, + "learning_rate": 0.0007326252411541479, + "loss": 0.5156, + "step": 16390 + }, + { + "epoch": 0.69, + "learning_rate": 0.0007325828404248553, + "loss": 0.5663, + "step": 16400 + }, + { + "epoch": 0.69, + "learning_rate": 0.0007325404396955628, + "loss": 0.6556, + "step": 16410 + }, + { + "epoch": 0.69, + "learning_rate": 0.0007324980389662703, + "loss": 0.6383, + "step": 16420 + }, + { + "epoch": 0.69, + "learning_rate": 0.0007324556382369777, + "loss": 0.5925, + "step": 16430 + }, + { + "epoch": 0.7, + "learning_rate": 0.0007324132375076852, + "loss": 0.5572, + "step": 16440 + }, + { + "epoch": 0.7, + "learning_rate": 0.0007323708367783927, + "loss": 0.4902, + "step": 16450 + }, + { + "epoch": 0.7, + "learning_rate": 0.0007323284360491001, + "loss": 0.6336, + "step": 16460 + }, + { + "epoch": 0.7, + "learning_rate": 0.0007322860353198075, + "loss": 0.6811, + "step": 16470 + }, + { + "epoch": 0.7, + "learning_rate": 0.0007322436345905149, + "loss": 0.6521, + "step": 16480 + }, + { + "epoch": 0.7, + "learning_rate": 0.0007322012338612225, + "loss": 0.5733, + "step": 16490 + }, + { + "epoch": 0.7, + "learning_rate": 0.0007321588331319299, + "loss": 0.694, + "step": 16500 + }, + { + "epoch": 0.7, + "learning_rate": 0.0007321164324026373, + "loss": 0.5726, + "step": 16510 + }, + { + "epoch": 0.7, + "learning_rate": 0.0007320740316733448, + "loss": 0.5779, + "step": 16520 + }, + { + "epoch": 0.7, + "learning_rate": 0.0007320316309440523, + "loss": 0.5764, + "step": 16530 + }, + { + "epoch": 0.7, + "learning_rate": 0.0007319892302147597, + "loss": 0.6296, + "step": 16540 + }, + { + "epoch": 0.7, + "learning_rate": 0.0007319468294854672, + "loss": 0.5741, + "step": 16550 + }, + { + "epoch": 0.7, + "learning_rate": 0.0007319044287561746, + "loss": 0.7486, + "step": 16560 + }, + { + "epoch": 0.7, + "learning_rate": 0.0007318620280268821, + "loss": 0.6184, + "step": 16570 + }, + { + "epoch": 0.7, + "learning_rate": 0.0007318196272975896, + "loss": 0.6578, + "step": 16580 + }, + { + "epoch": 0.7, + "learning_rate": 0.000731777226568297, + "loss": 0.5588, + "step": 16590 + }, + { + "epoch": 0.7, + "learning_rate": 0.0007317348258390044, + "loss": 0.571, + "step": 16600 + }, + { + "epoch": 0.7, + "learning_rate": 0.000731692425109712, + "loss": 0.5996, + "step": 16610 + }, + { + "epoch": 0.7, + "learning_rate": 0.0007316500243804194, + "loss": 0.6088, + "step": 16620 + }, + { + "epoch": 0.7, + "learning_rate": 0.0007316076236511268, + "loss": 0.6024, + "step": 16630 + }, + { + "epoch": 0.7, + "learning_rate": 0.0007315652229218342, + "loss": 0.607, + "step": 16640 + }, + { + "epoch": 0.7, + "learning_rate": 0.0007315228221925418, + "loss": 0.6863, + "step": 16650 + }, + { + "epoch": 0.7, + "learning_rate": 0.0007314804214632492, + "loss": 0.6516, + "step": 16660 + }, + { + "epoch": 0.7, + "learning_rate": 0.0007314380207339566, + "loss": 0.5095, + "step": 16670 + }, + { + "epoch": 0.71, + "learning_rate": 0.0007313956200046641, + "loss": 0.6025, + "step": 16680 + }, + { + "epoch": 0.71, + "learning_rate": 0.0007313532192753716, + "loss": 0.5959, + "step": 16690 + }, + { + "epoch": 0.71, + "learning_rate": 0.000731310818546079, + "loss": 0.5485, + "step": 16700 + }, + { + "epoch": 0.71, + "learning_rate": 0.0007312684178167865, + "loss": 0.6573, + "step": 16710 + }, + { + "epoch": 0.71, + "learning_rate": 0.0007312260170874939, + "loss": 0.5794, + "step": 16720 + }, + { + "epoch": 0.71, + "learning_rate": 0.0007311836163582014, + "loss": 0.5564, + "step": 16730 + }, + { + "epoch": 0.71, + "learning_rate": 0.0007311412156289089, + "loss": 0.6392, + "step": 16740 + }, + { + "epoch": 0.71, + "learning_rate": 0.0007310988148996163, + "loss": 0.5766, + "step": 16750 + }, + { + "epoch": 0.71, + "learning_rate": 0.0007310564141703237, + "loss": 0.7061, + "step": 16760 + }, + { + "epoch": 0.71, + "learning_rate": 0.0007310140134410313, + "loss": 0.7205, + "step": 16770 + }, + { + "epoch": 0.71, + "learning_rate": 0.0007309716127117387, + "loss": 0.5958, + "step": 16780 + }, + { + "epoch": 0.71, + "learning_rate": 0.0007309292119824461, + "loss": 0.6157, + "step": 16790 + }, + { + "epoch": 0.71, + "learning_rate": 0.0007308868112531536, + "loss": 0.5907, + "step": 16800 + }, + { + "epoch": 0.71, + "learning_rate": 0.0007308444105238611, + "loss": 0.6223, + "step": 16810 + }, + { + "epoch": 0.71, + "learning_rate": 0.0007308020097945685, + "loss": 0.6177, + "step": 16820 + }, + { + "epoch": 0.71, + "learning_rate": 0.000730759609065276, + "loss": 0.6976, + "step": 16830 + }, + { + "epoch": 0.71, + "learning_rate": 0.0007307172083359834, + "loss": 0.5278, + "step": 16840 + }, + { + "epoch": 0.71, + "learning_rate": 0.0007306748076066909, + "loss": 0.5984, + "step": 16850 + }, + { + "epoch": 0.71, + "learning_rate": 0.0007306324068773984, + "loss": 0.6882, + "step": 16860 + }, + { + "epoch": 0.71, + "learning_rate": 0.0007305900061481058, + "loss": 0.5681, + "step": 16870 + }, + { + "epoch": 0.71, + "learning_rate": 0.0007305476054188132, + "loss": 0.604, + "step": 16880 + }, + { + "epoch": 0.71, + "learning_rate": 0.0007305052046895208, + "loss": 0.5527, + "step": 16890 + }, + { + "epoch": 0.71, + "learning_rate": 0.0007304628039602282, + "loss": 0.5396, + "step": 16900 + }, + { + "epoch": 0.72, + "learning_rate": 0.0007304204032309356, + "loss": 0.6247, + "step": 16910 + }, + { + "epoch": 0.72, + "learning_rate": 0.000730378002501643, + "loss": 0.6183, + "step": 16920 + }, + { + "epoch": 0.72, + "learning_rate": 0.0007303356017723505, + "loss": 0.6076, + "step": 16930 + }, + { + "epoch": 0.72, + "learning_rate": 0.000730293201043058, + "loss": 0.7084, + "step": 16940 + }, + { + "epoch": 0.72, + "learning_rate": 0.0007302508003137655, + "loss": 0.7182, + "step": 16950 + }, + { + "epoch": 0.72, + "learning_rate": 0.0007302083995844728, + "loss": 0.6645, + "step": 16960 + }, + { + "epoch": 0.72, + "learning_rate": 0.0007301659988551804, + "loss": 0.6664, + "step": 16970 + }, + { + "epoch": 0.72, + "learning_rate": 0.0007301235981258879, + "loss": 0.6011, + "step": 16980 + }, + { + "epoch": 0.72, + "learning_rate": 0.0007300811973965952, + "loss": 0.5842, + "step": 16990 + }, + { + "epoch": 0.72, + "learning_rate": 0.0007300387966673027, + "loss": 0.4616, + "step": 17000 + }, + { + "epoch": 0.72, + "learning_rate": 0.0007299963959380101, + "loss": 0.5858, + "step": 17010 + }, + { + "epoch": 0.72, + "learning_rate": 0.0007299539952087176, + "loss": 0.6305, + "step": 17020 + }, + { + "epoch": 0.72, + "learning_rate": 0.0007299115944794251, + "loss": 0.6119, + "step": 17030 + }, + { + "epoch": 0.72, + "learning_rate": 0.0007298691937501325, + "loss": 0.697, + "step": 17040 + }, + { + "epoch": 0.72, + "learning_rate": 0.00072982679302084, + "loss": 0.6877, + "step": 17050 + }, + { + "epoch": 0.72, + "learning_rate": 0.0007297843922915475, + "loss": 0.6244, + "step": 17060 + }, + { + "epoch": 0.72, + "learning_rate": 0.0007297419915622549, + "loss": 0.6343, + "step": 17070 + }, + { + "epoch": 0.72, + "learning_rate": 0.0007296995908329623, + "loss": 0.6527, + "step": 17080 + }, + { + "epoch": 0.72, + "learning_rate": 0.0007296571901036698, + "loss": 0.6032, + "step": 17090 + }, + { + "epoch": 0.72, + "learning_rate": 0.0007296147893743773, + "loss": 0.6217, + "step": 17100 + }, + { + "epoch": 0.72, + "learning_rate": 0.0007295723886450847, + "loss": 0.5998, + "step": 17110 + }, + { + "epoch": 0.72, + "learning_rate": 0.0007295299879157922, + "loss": 0.571, + "step": 17120 + }, + { + "epoch": 0.72, + "learning_rate": 0.0007294875871864996, + "loss": 0.6792, + "step": 17130 + }, + { + "epoch": 0.72, + "learning_rate": 0.0007294451864572071, + "loss": 0.6104, + "step": 17140 + }, + { + "epoch": 0.73, + "learning_rate": 0.0007294027857279146, + "loss": 0.5999, + "step": 17150 + }, + { + "epoch": 0.73, + "learning_rate": 0.000729360384998622, + "loss": 0.5071, + "step": 17160 + }, + { + "epoch": 0.73, + "learning_rate": 0.0007293179842693294, + "loss": 0.6451, + "step": 17170 + }, + { + "epoch": 0.73, + "learning_rate": 0.000729275583540037, + "loss": 0.6632, + "step": 17180 + }, + { + "epoch": 0.73, + "learning_rate": 0.0007292331828107444, + "loss": 0.6124, + "step": 17190 + }, + { + "epoch": 0.73, + "learning_rate": 0.0007291907820814518, + "loss": 0.5391, + "step": 17200 + }, + { + "epoch": 0.73, + "learning_rate": 0.0007291483813521593, + "loss": 0.6107, + "step": 17210 + }, + { + "epoch": 0.73, + "learning_rate": 0.0007291059806228668, + "loss": 0.5494, + "step": 17220 + }, + { + "epoch": 0.73, + "learning_rate": 0.0007290635798935742, + "loss": 0.5335, + "step": 17230 + }, + { + "epoch": 0.73, + "learning_rate": 0.0007290211791642817, + "loss": 0.5614, + "step": 17240 + }, + { + "epoch": 0.73, + "learning_rate": 0.000728978778434989, + "loss": 0.6793, + "step": 17250 + }, + { + "epoch": 0.73, + "learning_rate": 0.0007289363777056966, + "loss": 0.5121, + "step": 17260 + }, + { + "epoch": 0.73, + "learning_rate": 0.0007288939769764041, + "loss": 0.5717, + "step": 17270 + }, + { + "epoch": 0.73, + "learning_rate": 0.0007288515762471114, + "loss": 0.657, + "step": 17280 + }, + { + "epoch": 0.73, + "learning_rate": 0.0007288091755178189, + "loss": 0.585, + "step": 17290 + }, + { + "epoch": 0.73, + "learning_rate": 0.0007287667747885265, + "loss": 0.6849, + "step": 17300 + }, + { + "epoch": 0.73, + "learning_rate": 0.0007287243740592338, + "loss": 0.6092, + "step": 17310 + }, + { + "epoch": 0.73, + "learning_rate": 0.0007286819733299413, + "loss": 0.6287, + "step": 17320 + }, + { + "epoch": 0.73, + "learning_rate": 0.0007286395726006488, + "loss": 0.6077, + "step": 17330 + }, + { + "epoch": 0.73, + "learning_rate": 0.0007285971718713562, + "loss": 0.5957, + "step": 17340 + }, + { + "epoch": 0.73, + "learning_rate": 0.0007285547711420637, + "loss": 0.5763, + "step": 17350 + }, + { + "epoch": 0.73, + "learning_rate": 0.0007285123704127712, + "loss": 0.7659, + "step": 17360 + }, + { + "epoch": 0.73, + "learning_rate": 0.0007284699696834785, + "loss": 0.5984, + "step": 17370 + }, + { + "epoch": 0.73, + "learning_rate": 0.0007284275689541861, + "loss": 0.5336, + "step": 17380 + }, + { + "epoch": 0.74, + "learning_rate": 0.0007283851682248936, + "loss": 0.5709, + "step": 17390 + }, + { + "epoch": 0.74, + "learning_rate": 0.0007283427674956009, + "loss": 0.632, + "step": 17400 + }, + { + "epoch": 0.74, + "learning_rate": 0.0007283003667663084, + "loss": 0.6221, + "step": 17410 + }, + { + "epoch": 0.74, + "learning_rate": 0.000728257966037016, + "loss": 0.5207, + "step": 17420 + }, + { + "epoch": 0.74, + "learning_rate": 0.0007282155653077233, + "loss": 0.6199, + "step": 17430 + }, + { + "epoch": 0.74, + "learning_rate": 0.0007281731645784308, + "loss": 0.5875, + "step": 17440 + }, + { + "epoch": 0.74, + "learning_rate": 0.0007281307638491382, + "loss": 0.6093, + "step": 17450 + }, + { + "epoch": 0.74, + "learning_rate": 0.0007280883631198457, + "loss": 0.6651, + "step": 17460 + }, + { + "epoch": 0.74, + "learning_rate": 0.0007280459623905532, + "loss": 0.569, + "step": 17470 + }, + { + "epoch": 0.74, + "learning_rate": 0.0007280035616612606, + "loss": 0.6293, + "step": 17480 + }, + { + "epoch": 0.74, + "learning_rate": 0.000727961160931968, + "loss": 0.5955, + "step": 17490 + }, + { + "epoch": 0.74, + "learning_rate": 0.0007279187602026756, + "loss": 0.5986, + "step": 17500 + }, + { + "epoch": 0.74, + "learning_rate": 0.000727876359473383, + "loss": 0.7122, + "step": 17510 + }, + { + "epoch": 0.74, + "learning_rate": 0.0007278339587440904, + "loss": 0.5792, + "step": 17520 + }, + { + "epoch": 0.74, + "learning_rate": 0.0007277915580147979, + "loss": 0.644, + "step": 17530 + }, + { + "epoch": 0.74, + "learning_rate": 0.0007277491572855053, + "loss": 0.6263, + "step": 17540 + }, + { + "epoch": 0.74, + "learning_rate": 0.0007277067565562128, + "loss": 0.5893, + "step": 17550 + }, + { + "epoch": 0.74, + "learning_rate": 0.0007276643558269203, + "loss": 0.6411, + "step": 17560 + }, + { + "epoch": 0.74, + "learning_rate": 0.0007276219550976276, + "loss": 0.6298, + "step": 17570 + }, + { + "epoch": 0.74, + "learning_rate": 0.0007275795543683352, + "loss": 0.6352, + "step": 17580 + }, + { + "epoch": 0.74, + "learning_rate": 0.0007275371536390427, + "loss": 0.5532, + "step": 17590 + }, + { + "epoch": 0.74, + "learning_rate": 0.00072749475290975, + "loss": 0.6109, + "step": 17600 + }, + { + "epoch": 0.74, + "learning_rate": 0.0007274523521804575, + "loss": 0.5958, + "step": 17610 + }, + { + "epoch": 0.75, + "learning_rate": 0.000727409951451165, + "loss": 0.6146, + "step": 17620 + }, + { + "epoch": 0.75, + "learning_rate": 0.0007273675507218724, + "loss": 0.6172, + "step": 17630 + }, + { + "epoch": 0.75, + "learning_rate": 0.0007273251499925799, + "loss": 0.6996, + "step": 17640 + }, + { + "epoch": 0.75, + "learning_rate": 0.0007272827492632874, + "loss": 0.7388, + "step": 17650 + }, + { + "epoch": 0.75, + "learning_rate": 0.0007272403485339948, + "loss": 0.661, + "step": 17660 + }, + { + "epoch": 0.75, + "learning_rate": 0.0007271979478047023, + "loss": 0.5107, + "step": 17670 + }, + { + "epoch": 0.75, + "learning_rate": 0.0007271555470754098, + "loss": 0.5982, + "step": 17680 + }, + { + "epoch": 0.75, + "learning_rate": 0.0007271131463461171, + "loss": 0.6044, + "step": 17690 + }, + { + "epoch": 0.75, + "learning_rate": 0.0007270707456168246, + "loss": 0.7058, + "step": 17700 + }, + { + "epoch": 0.75, + "learning_rate": 0.0007270283448875322, + "loss": 0.6115, + "step": 17710 + }, + { + "epoch": 0.75, + "learning_rate": 0.0007269859441582395, + "loss": 0.6186, + "step": 17720 + }, + { + "epoch": 0.75, + "learning_rate": 0.000726943543428947, + "loss": 0.5408, + "step": 17730 + }, + { + "epoch": 0.75, + "learning_rate": 0.0007269011426996545, + "loss": 0.7947, + "step": 17740 + }, + { + "epoch": 0.75, + "learning_rate": 0.0007268587419703619, + "loss": 0.5893, + "step": 17750 + }, + { + "epoch": 0.75, + "learning_rate": 0.0007268163412410694, + "loss": 0.6298, + "step": 17760 + }, + { + "epoch": 0.75, + "learning_rate": 0.0007267739405117769, + "loss": 0.5958, + "step": 17770 + }, + { + "epoch": 0.75, + "learning_rate": 0.0007267315397824842, + "loss": 0.5858, + "step": 17780 + }, + { + "epoch": 0.75, + "learning_rate": 0.0007266891390531918, + "loss": 0.6175, + "step": 17790 + }, + { + "epoch": 0.75, + "learning_rate": 0.0007266467383238993, + "loss": 0.6409, + "step": 17800 + }, + { + "epoch": 0.75, + "learning_rate": 0.0007266043375946066, + "loss": 0.6391, + "step": 17810 + }, + { + "epoch": 0.75, + "learning_rate": 0.0007265619368653141, + "loss": 0.5389, + "step": 17820 + }, + { + "epoch": 0.75, + "learning_rate": 0.0007265195361360217, + "loss": 0.6465, + "step": 17830 + }, + { + "epoch": 0.75, + "learning_rate": 0.000726477135406729, + "loss": 0.6272, + "step": 17840 + }, + { + "epoch": 0.75, + "learning_rate": 0.0007264347346774365, + "loss": 0.6083, + "step": 17850 + }, + { + "epoch": 0.76, + "learning_rate": 0.000726392333948144, + "loss": 0.6784, + "step": 17860 + }, + { + "epoch": 0.76, + "learning_rate": 0.0007263499332188514, + "loss": 0.6383, + "step": 17870 + }, + { + "epoch": 0.76, + "learning_rate": 0.0007263075324895589, + "loss": 0.5568, + "step": 17880 + }, + { + "epoch": 0.76, + "learning_rate": 0.0007262651317602662, + "loss": 0.6551, + "step": 17890 + }, + { + "epoch": 0.76, + "learning_rate": 0.0007262227310309737, + "loss": 0.645, + "step": 17900 + }, + { + "epoch": 0.76, + "learning_rate": 0.0007261803303016813, + "loss": 0.5597, + "step": 17910 + }, + { + "epoch": 0.76, + "learning_rate": 0.0007261379295723886, + "loss": 0.6426, + "step": 17920 + }, + { + "epoch": 0.76, + "learning_rate": 0.0007260955288430961, + "loss": 0.7612, + "step": 17930 + }, + { + "epoch": 0.76, + "learning_rate": 0.0007260531281138036, + "loss": 0.6687, + "step": 17940 + }, + { + "epoch": 0.76, + "learning_rate": 0.000726010727384511, + "loss": 0.7261, + "step": 17950 + }, + { + "epoch": 0.76, + "learning_rate": 0.0007259683266552185, + "loss": 0.617, + "step": 17960 + }, + { + "epoch": 0.76, + "learning_rate": 0.000725925925925926, + "loss": 0.5772, + "step": 17970 + }, + { + "epoch": 0.76, + "learning_rate": 0.0007258835251966333, + "loss": 0.5861, + "step": 17980 + }, + { + "epoch": 0.76, + "learning_rate": 0.0007258411244673409, + "loss": 0.5667, + "step": 17990 + }, + { + "epoch": 0.76, + "learning_rate": 0.0007257987237380484, + "loss": 0.5959, + "step": 18000 + }, + { + "epoch": 0.76, + "learning_rate": 0.0007257563230087557, + "loss": 0.4942, + "step": 18010 + }, + { + "epoch": 0.76, + "learning_rate": 0.0007257139222794632, + "loss": 0.5567, + "step": 18020 + }, + { + "epoch": 0.76, + "learning_rate": 0.0007256715215501708, + "loss": 0.4675, + "step": 18030 + }, + { + "epoch": 0.76, + "learning_rate": 0.0007256291208208781, + "loss": 0.6557, + "step": 18040 + }, + { + "epoch": 0.76, + "learning_rate": 0.0007255867200915856, + "loss": 0.6902, + "step": 18050 + }, + { + "epoch": 0.76, + "learning_rate": 0.0007255443193622931, + "loss": 0.5983, + "step": 18060 + }, + { + "epoch": 0.76, + "learning_rate": 0.0007255019186330005, + "loss": 0.5857, + "step": 18070 + }, + { + "epoch": 0.76, + "learning_rate": 0.000725459517903708, + "loss": 0.5751, + "step": 18080 + }, + { + "epoch": 0.77, + "learning_rate": 0.0007254171171744155, + "loss": 0.5528, + "step": 18090 + }, + { + "epoch": 0.77, + "learning_rate": 0.0007253747164451228, + "loss": 0.6601, + "step": 18100 + }, + { + "epoch": 0.77, + "learning_rate": 0.0007253323157158304, + "loss": 0.63, + "step": 18110 + }, + { + "epoch": 0.77, + "learning_rate": 0.0007252899149865379, + "loss": 0.6909, + "step": 18120 + }, + { + "epoch": 0.77, + "learning_rate": 0.0007252475142572452, + "loss": 0.5727, + "step": 18130 + }, + { + "epoch": 0.77, + "learning_rate": 0.0007252051135279527, + "loss": 0.5623, + "step": 18140 + }, + { + "epoch": 0.77, + "learning_rate": 0.0007251627127986602, + "loss": 0.5867, + "step": 18150 + }, + { + "epoch": 0.77, + "learning_rate": 0.0007251203120693676, + "loss": 0.4831, + "step": 18160 + }, + { + "epoch": 0.77, + "learning_rate": 0.0007250779113400751, + "loss": 0.6079, + "step": 18170 + }, + { + "epoch": 0.77, + "learning_rate": 0.0007250355106107825, + "loss": 0.5672, + "step": 18180 + }, + { + "epoch": 0.77, + "learning_rate": 0.00072499310988149, + "loss": 0.6574, + "step": 18190 + }, + { + "epoch": 0.77, + "learning_rate": 0.0007249507091521975, + "loss": 0.628, + "step": 18200 + }, + { + "epoch": 0.77, + "learning_rate": 0.0007249083084229049, + "loss": 0.578, + "step": 18210 + }, + { + "epoch": 0.77, + "learning_rate": 0.0007248659076936123, + "loss": 0.6557, + "step": 18220 + }, + { + "epoch": 0.77, + "learning_rate": 0.0007248235069643198, + "loss": 0.5247, + "step": 18230 + }, + { + "epoch": 0.77, + "learning_rate": 0.0007247811062350273, + "loss": 0.6387, + "step": 18240 + }, + { + "epoch": 0.77, + "learning_rate": 0.0007247387055057347, + "loss": 0.5896, + "step": 18250 + }, + { + "epoch": 0.77, + "learning_rate": 0.0007246963047764422, + "loss": 0.5839, + "step": 18260 + }, + { + "epoch": 0.77, + "learning_rate": 0.0007246539040471497, + "loss": 0.5796, + "step": 18270 + }, + { + "epoch": 0.77, + "learning_rate": 0.0007246115033178571, + "loss": 0.6407, + "step": 18280 + }, + { + "epoch": 0.77, + "learning_rate": 0.0007245691025885646, + "loss": 0.5823, + "step": 18290 + }, + { + "epoch": 0.77, + "learning_rate": 0.000724526701859272, + "loss": 0.6114, + "step": 18300 + }, + { + "epoch": 0.77, + "learning_rate": 0.0007244843011299795, + "loss": 0.5612, + "step": 18310 + }, + { + "epoch": 0.77, + "learning_rate": 0.000724441900400687, + "loss": 0.5973, + "step": 18320 + }, + { + "epoch": 0.78, + "learning_rate": 0.0007243994996713943, + "loss": 0.5664, + "step": 18330 + }, + { + "epoch": 0.78, + "learning_rate": 0.0007243570989421018, + "loss": 0.615, + "step": 18340 + }, + { + "epoch": 0.78, + "learning_rate": 0.0007243146982128093, + "loss": 0.6891, + "step": 18350 + }, + { + "epoch": 0.78, + "learning_rate": 0.0007242722974835167, + "loss": 0.7602, + "step": 18360 + }, + { + "epoch": 0.78, + "learning_rate": 0.0007242298967542242, + "loss": 0.5697, + "step": 18370 + }, + { + "epoch": 0.78, + "learning_rate": 0.0007241874960249317, + "loss": 0.511, + "step": 18380 + }, + { + "epoch": 0.78, + "learning_rate": 0.0007241450952956392, + "loss": 0.5168, + "step": 18390 + }, + { + "epoch": 0.78, + "learning_rate": 0.0007241026945663466, + "loss": 0.5934, + "step": 18400 + }, + { + "epoch": 0.78, + "learning_rate": 0.0007240602938370541, + "loss": 0.6624, + "step": 18410 + }, + { + "epoch": 0.78, + "learning_rate": 0.0007240178931077614, + "loss": 0.7241, + "step": 18420 + }, + { + "epoch": 0.78, + "learning_rate": 0.0007239754923784689, + "loss": 0.6129, + "step": 18430 + }, + { + "epoch": 0.78, + "learning_rate": 0.0007239330916491765, + "loss": 0.6398, + "step": 18440 + }, + { + "epoch": 0.78, + "learning_rate": 0.0007238906909198838, + "loss": 0.5879, + "step": 18450 + }, + { + "epoch": 0.78, + "learning_rate": 0.0007238482901905913, + "loss": 0.7035, + "step": 18460 + }, + { + "epoch": 0.78, + "learning_rate": 0.0007238058894612988, + "loss": 0.6507, + "step": 18470 + }, + { + "epoch": 0.78, + "learning_rate": 0.0007237634887320062, + "loss": 0.6584, + "step": 18480 + }, + { + "epoch": 0.78, + "learning_rate": 0.0007237210880027137, + "loss": 0.5595, + "step": 18490 + }, + { + "epoch": 0.78, + "learning_rate": 0.0007236786872734211, + "loss": 0.6436, + "step": 18500 + }, + { + "epoch": 0.78, + "learning_rate": 0.0007236362865441285, + "loss": 0.6588, + "step": 18510 + }, + { + "epoch": 0.78, + "learning_rate": 0.0007235938858148361, + "loss": 0.6075, + "step": 18520 + }, + { + "epoch": 0.78, + "learning_rate": 0.0007235514850855435, + "loss": 0.6023, + "step": 18530 + }, + { + "epoch": 0.78, + "learning_rate": 0.0007235090843562509, + "loss": 0.6874, + "step": 18540 + }, + { + "epoch": 0.78, + "learning_rate": 0.0007234666836269584, + "loss": 0.608, + "step": 18550 + }, + { + "epoch": 0.78, + "learning_rate": 0.0007234242828976659, + "loss": 0.6512, + "step": 18560 + }, + { + "epoch": 0.79, + "learning_rate": 0.0007233818821683733, + "loss": 0.5649, + "step": 18570 + }, + { + "epoch": 0.79, + "learning_rate": 0.0007233394814390808, + "loss": 0.6006, + "step": 18580 + }, + { + "epoch": 0.79, + "learning_rate": 0.0007232970807097882, + "loss": 0.5809, + "step": 18590 + }, + { + "epoch": 0.79, + "learning_rate": 0.0007232546799804957, + "loss": 0.5728, + "step": 18600 + }, + { + "epoch": 0.79, + "learning_rate": 0.0007232122792512032, + "loss": 0.5182, + "step": 18610 + }, + { + "epoch": 0.79, + "learning_rate": 0.0007231698785219106, + "loss": 0.6434, + "step": 18620 + }, + { + "epoch": 0.79, + "learning_rate": 0.000723127477792618, + "loss": 0.5726, + "step": 18630 + }, + { + "epoch": 0.79, + "learning_rate": 0.0007230850770633256, + "loss": 0.6834, + "step": 18640 + }, + { + "epoch": 0.79, + "learning_rate": 0.000723042676334033, + "loss": 0.6384, + "step": 18650 + }, + { + "epoch": 0.79, + "learning_rate": 0.0007230002756047404, + "loss": 0.5545, + "step": 18660 + }, + { + "epoch": 0.79, + "learning_rate": 0.0007229578748754479, + "loss": 0.4982, + "step": 18670 + }, + { + "epoch": 0.79, + "learning_rate": 0.0007229154741461554, + "loss": 0.6851, + "step": 18680 + }, + { + "epoch": 0.79, + "learning_rate": 0.0007228730734168628, + "loss": 0.6098, + "step": 18690 + }, + { + "epoch": 0.79, + "learning_rate": 0.0007228306726875703, + "loss": 0.6371, + "step": 18700 + }, + { + "epoch": 0.79, + "learning_rate": 0.0007227882719582777, + "loss": 0.5767, + "step": 18710 + }, + { + "epoch": 0.79, + "learning_rate": 0.0007227458712289852, + "loss": 0.6275, + "step": 18720 + }, + { + "epoch": 0.79, + "learning_rate": 0.0007227034704996927, + "loss": 0.5842, + "step": 18730 + }, + { + "epoch": 0.79, + "learning_rate": 0.0007226610697704, + "loss": 0.6876, + "step": 18740 + }, + { + "epoch": 0.79, + "learning_rate": 0.0007226186690411075, + "loss": 0.5441, + "step": 18750 + }, + { + "epoch": 0.79, + "learning_rate": 0.000722576268311815, + "loss": 0.6453, + "step": 18760 + }, + { + "epoch": 0.79, + "learning_rate": 0.0007225338675825225, + "loss": 0.7198, + "step": 18770 + }, + { + "epoch": 0.79, + "learning_rate": 0.0007224914668532299, + "loss": 0.6716, + "step": 18780 + }, + { + "epoch": 0.79, + "learning_rate": 0.0007224490661239373, + "loss": 0.6273, + "step": 18790 + }, + { + "epoch": 0.8, + "learning_rate": 0.0007224066653946449, + "loss": 0.59, + "step": 18800 + }, + { + "epoch": 0.8, + "learning_rate": 0.0007223642646653523, + "loss": 0.6018, + "step": 18810 + }, + { + "epoch": 0.8, + "learning_rate": 0.0007223218639360597, + "loss": 0.6353, + "step": 18820 + }, + { + "epoch": 0.8, + "learning_rate": 0.0007222794632067671, + "loss": 0.6088, + "step": 18830 + }, + { + "epoch": 0.8, + "learning_rate": 0.0007222370624774747, + "loss": 0.5748, + "step": 18840 + }, + { + "epoch": 0.8, + "learning_rate": 0.0007221946617481821, + "loss": 0.5847, + "step": 18850 + }, + { + "epoch": 0.8, + "learning_rate": 0.0007221522610188895, + "loss": 0.664, + "step": 18860 + }, + { + "epoch": 0.8, + "learning_rate": 0.000722109860289597, + "loss": 0.6428, + "step": 18870 + }, + { + "epoch": 0.8, + "learning_rate": 0.0007220674595603045, + "loss": 0.5602, + "step": 18880 + }, + { + "epoch": 0.8, + "learning_rate": 0.000722025058831012, + "loss": 0.6027, + "step": 18890 + }, + { + "epoch": 0.8, + "learning_rate": 0.0007219826581017194, + "loss": 0.5566, + "step": 18900 + }, + { + "epoch": 0.8, + "learning_rate": 0.0007219402573724268, + "loss": 0.6563, + "step": 18910 + }, + { + "epoch": 0.8, + "learning_rate": 0.0007218978566431343, + "loss": 0.6298, + "step": 18920 + }, + { + "epoch": 0.8, + "learning_rate": 0.0007218554559138418, + "loss": 0.7829, + "step": 18930 + }, + { + "epoch": 0.8, + "learning_rate": 0.0007218130551845492, + "loss": 0.6354, + "step": 18940 + }, + { + "epoch": 0.8, + "learning_rate": 0.0007217706544552566, + "loss": 0.6258, + "step": 18950 + }, + { + "epoch": 0.8, + "learning_rate": 0.0007217282537259641, + "loss": 0.6694, + "step": 18960 + }, + { + "epoch": 0.8, + "learning_rate": 0.0007216858529966716, + "loss": 0.5583, + "step": 18970 + }, + { + "epoch": 0.8, + "learning_rate": 0.000721643452267379, + "loss": 0.6371, + "step": 18980 + }, + { + "epoch": 0.8, + "learning_rate": 0.0007216010515380865, + "loss": 0.574, + "step": 18990 + }, + { + "epoch": 0.8, + "learning_rate": 0.000721558650808794, + "loss": 0.5414, + "step": 19000 + }, + { + "epoch": 0.8, + "learning_rate": 0.0007215162500795014, + "loss": 0.61, + "step": 19010 + }, + { + "epoch": 0.8, + "learning_rate": 0.0007214738493502089, + "loss": 0.5119, + "step": 19020 + }, + { + "epoch": 0.8, + "learning_rate": 0.0007214314486209163, + "loss": 0.6398, + "step": 19030 + }, + { + "epoch": 0.81, + "learning_rate": 0.0007213890478916237, + "loss": 0.7128, + "step": 19040 + }, + { + "epoch": 0.81, + "learning_rate": 0.0007213466471623313, + "loss": 0.6507, + "step": 19050 + }, + { + "epoch": 0.81, + "learning_rate": 0.0007213042464330387, + "loss": 0.6263, + "step": 19060 + }, + { + "epoch": 0.81, + "learning_rate": 0.0007212618457037461, + "loss": 0.5896, + "step": 19070 + }, + { + "epoch": 0.81, + "learning_rate": 0.0007212194449744536, + "loss": 0.6075, + "step": 19080 + }, + { + "epoch": 0.81, + "learning_rate": 0.0007211770442451611, + "loss": 0.6228, + "step": 19090 + }, + { + "epoch": 0.81, + "learning_rate": 0.0007211346435158685, + "loss": 0.6643, + "step": 19100 + }, + { + "epoch": 0.81, + "learning_rate": 0.0007210922427865759, + "loss": 0.6432, + "step": 19110 + }, + { + "epoch": 0.81, + "learning_rate": 0.0007210498420572834, + "loss": 0.5934, + "step": 19120 + }, + { + "epoch": 0.81, + "learning_rate": 0.0007210074413279909, + "loss": 0.5739, + "step": 19130 + }, + { + "epoch": 0.81, + "learning_rate": 0.0007209650405986983, + "loss": 0.7134, + "step": 19140 + }, + { + "epoch": 0.81, + "learning_rate": 0.0007209226398694058, + "loss": 0.5658, + "step": 19150 + }, + { + "epoch": 0.81, + "learning_rate": 0.0007208802391401132, + "loss": 0.6782, + "step": 19160 + }, + { + "epoch": 0.81, + "learning_rate": 0.0007208378384108207, + "loss": 0.6519, + "step": 19170 + }, + { + "epoch": 0.81, + "learning_rate": 0.0007207954376815282, + "loss": 0.7282, + "step": 19180 + }, + { + "epoch": 0.81, + "learning_rate": 0.0007207530369522356, + "loss": 0.5826, + "step": 19190 + }, + { + "epoch": 0.81, + "learning_rate": 0.000720710636222943, + "loss": 0.6852, + "step": 19200 + }, + { + "epoch": 0.81, + "learning_rate": 0.0007206682354936506, + "loss": 0.5673, + "step": 19210 + }, + { + "epoch": 0.81, + "learning_rate": 0.000720625834764358, + "loss": 0.6116, + "step": 19220 + }, + { + "epoch": 0.81, + "learning_rate": 0.0007205834340350654, + "loss": 0.6033, + "step": 19230 + }, + { + "epoch": 0.81, + "learning_rate": 0.0007205410333057729, + "loss": 0.5422, + "step": 19240 + }, + { + "epoch": 0.81, + "learning_rate": 0.0007204986325764804, + "loss": 0.7235, + "step": 19250 + }, + { + "epoch": 0.81, + "learning_rate": 0.0007204562318471878, + "loss": 0.5592, + "step": 19260 + }, + { + "epoch": 0.81, + "learning_rate": 0.0007204138311178953, + "loss": 0.6231, + "step": 19270 + }, + { + "epoch": 0.82, + "learning_rate": 0.0007203714303886027, + "loss": 0.6741, + "step": 19280 + }, + { + "epoch": 0.82, + "learning_rate": 0.0007203290296593102, + "loss": 0.615, + "step": 19290 + }, + { + "epoch": 0.82, + "learning_rate": 0.0007202866289300177, + "loss": 0.6004, + "step": 19300 + }, + { + "epoch": 0.82, + "learning_rate": 0.0007202442282007251, + "loss": 0.5678, + "step": 19310 + }, + { + "epoch": 0.82, + "learning_rate": 0.0007202018274714325, + "loss": 0.531, + "step": 19320 + }, + { + "epoch": 0.82, + "learning_rate": 0.00072015942674214, + "loss": 0.6414, + "step": 19330 + }, + { + "epoch": 0.82, + "learning_rate": 0.0007201170260128475, + "loss": 0.6653, + "step": 19340 + }, + { + "epoch": 0.82, + "learning_rate": 0.0007200746252835549, + "loss": 0.6284, + "step": 19350 + }, + { + "epoch": 0.82, + "learning_rate": 0.0007200322245542623, + "loss": 0.6153, + "step": 19360 + }, + { + "epoch": 0.82, + "learning_rate": 0.0007199898238249699, + "loss": 0.6316, + "step": 19370 + }, + { + "epoch": 0.82, + "learning_rate": 0.0007199474230956773, + "loss": 0.6466, + "step": 19380 + }, + { + "epoch": 0.82, + "learning_rate": 0.0007199050223663847, + "loss": 0.6101, + "step": 19390 + }, + { + "epoch": 0.82, + "learning_rate": 0.0007198626216370921, + "loss": 0.6005, + "step": 19400 + }, + { + "epoch": 0.82, + "learning_rate": 0.0007198202209077997, + "loss": 0.6207, + "step": 19410 + }, + { + "epoch": 0.82, + "learning_rate": 0.0007197778201785071, + "loss": 0.6286, + "step": 19420 + }, + { + "epoch": 0.82, + "learning_rate": 0.0007197354194492145, + "loss": 0.625, + "step": 19430 + }, + { + "epoch": 0.82, + "learning_rate": 0.000719693018719922, + "loss": 0.6569, + "step": 19440 + }, + { + "epoch": 0.82, + "learning_rate": 0.0007196506179906295, + "loss": 0.6067, + "step": 19450 + }, + { + "epoch": 0.82, + "learning_rate": 0.0007196082172613369, + "loss": 0.6574, + "step": 19460 + }, + { + "epoch": 0.82, + "learning_rate": 0.0007195658165320444, + "loss": 0.6847, + "step": 19470 + }, + { + "epoch": 0.82, + "learning_rate": 0.0007195234158027518, + "loss": 0.5976, + "step": 19480 + }, + { + "epoch": 0.82, + "learning_rate": 0.0007194810150734593, + "loss": 0.5956, + "step": 19490 + }, + { + "epoch": 0.82, + "learning_rate": 0.0007194386143441668, + "loss": 0.4963, + "step": 19500 + }, + { + "epoch": 0.83, + "learning_rate": 0.0007193962136148742, + "loss": 0.6015, + "step": 19510 + }, + { + "epoch": 0.83, + "learning_rate": 0.0007193538128855816, + "loss": 0.7437, + "step": 19520 + }, + { + "epoch": 0.83, + "learning_rate": 0.0007193114121562892, + "loss": 0.6133, + "step": 19530 + }, + { + "epoch": 0.83, + "learning_rate": 0.0007192690114269966, + "loss": 0.6524, + "step": 19540 + }, + { + "epoch": 0.83, + "learning_rate": 0.000719226610697704, + "loss": 0.6267, + "step": 19550 + }, + { + "epoch": 0.83, + "learning_rate": 0.0007191842099684115, + "loss": 0.6482, + "step": 19560 + }, + { + "epoch": 0.83, + "learning_rate": 0.0007191418092391189, + "loss": 0.6271, + "step": 19570 + }, + { + "epoch": 0.83, + "learning_rate": 0.0007190994085098264, + "loss": 0.56, + "step": 19580 + }, + { + "epoch": 0.83, + "learning_rate": 0.0007190570077805339, + "loss": 0.603, + "step": 19590 + }, + { + "epoch": 0.83, + "learning_rate": 0.0007190146070512413, + "loss": 0.6183, + "step": 19600 + }, + { + "epoch": 0.83, + "learning_rate": 0.0007189722063219488, + "loss": 0.5323, + "step": 19610 + }, + { + "epoch": 0.83, + "learning_rate": 0.0007189298055926563, + "loss": 0.5964, + "step": 19620 + }, + { + "epoch": 0.83, + "learning_rate": 0.0007188874048633637, + "loss": 0.5781, + "step": 19630 + }, + { + "epoch": 0.83, + "learning_rate": 0.0007188450041340711, + "loss": 0.6134, + "step": 19640 + }, + { + "epoch": 0.83, + "learning_rate": 0.0007188026034047786, + "loss": 0.6201, + "step": 19650 + }, + { + "epoch": 0.83, + "learning_rate": 0.0007187602026754861, + "loss": 0.5976, + "step": 19660 + }, + { + "epoch": 0.83, + "learning_rate": 0.0007187178019461935, + "loss": 0.6749, + "step": 19670 + }, + { + "epoch": 0.83, + "learning_rate": 0.000718675401216901, + "loss": 0.5905, + "step": 19680 + }, + { + "epoch": 0.83, + "learning_rate": 0.0007186330004876084, + "loss": 0.5686, + "step": 19690 + }, + { + "epoch": 0.83, + "learning_rate": 0.0007185905997583159, + "loss": 0.68, + "step": 19700 + }, + { + "epoch": 0.83, + "learning_rate": 0.0007185481990290234, + "loss": 0.6849, + "step": 19710 + }, + { + "epoch": 0.83, + "learning_rate": 0.0007185057982997307, + "loss": 0.548, + "step": 19720 + }, + { + "epoch": 0.83, + "learning_rate": 0.0007184633975704382, + "loss": 0.5168, + "step": 19730 + }, + { + "epoch": 0.83, + "learning_rate": 0.0007184209968411458, + "loss": 0.6682, + "step": 19740 + }, + { + "epoch": 0.84, + "learning_rate": 0.0007183785961118531, + "loss": 0.631, + "step": 19750 + }, + { + "epoch": 0.84, + "learning_rate": 0.0007183361953825606, + "loss": 0.65, + "step": 19760 + }, + { + "epoch": 0.84, + "learning_rate": 0.000718293794653268, + "loss": 0.5683, + "step": 19770 + }, + { + "epoch": 0.84, + "learning_rate": 0.0007182513939239755, + "loss": 0.5944, + "step": 19780 + }, + { + "epoch": 0.84, + "learning_rate": 0.000718208993194683, + "loss": 0.5564, + "step": 19790 + }, + { + "epoch": 0.84, + "learning_rate": 0.0007181665924653904, + "loss": 0.567, + "step": 19800 + }, + { + "epoch": 0.84, + "learning_rate": 0.0007181241917360978, + "loss": 0.5472, + "step": 19810 + }, + { + "epoch": 0.84, + "learning_rate": 0.0007180817910068054, + "loss": 0.5707, + "step": 19820 + }, + { + "epoch": 0.84, + "learning_rate": 0.0007180393902775128, + "loss": 0.6035, + "step": 19830 + }, + { + "epoch": 0.84, + "learning_rate": 0.0007179969895482202, + "loss": 0.5128, + "step": 19840 + }, + { + "epoch": 0.84, + "learning_rate": 0.0007179545888189277, + "loss": 0.5397, + "step": 19850 + }, + { + "epoch": 0.84, + "learning_rate": 0.0007179121880896353, + "loss": 0.678, + "step": 19860 + }, + { + "epoch": 0.84, + "learning_rate": 0.0007178697873603426, + "loss": 0.735, + "step": 19870 + }, + { + "epoch": 0.84, + "learning_rate": 0.0007178273866310501, + "loss": 0.6383, + "step": 19880 + }, + { + "epoch": 0.84, + "learning_rate": 0.0007177849859017575, + "loss": 0.574, + "step": 19890 + }, + { + "epoch": 0.84, + "learning_rate": 0.000717742585172465, + "loss": 0.6756, + "step": 19900 + }, + { + "epoch": 0.84, + "learning_rate": 0.0007177001844431725, + "loss": 0.587, + "step": 19910 + }, + { + "epoch": 0.84, + "learning_rate": 0.0007176577837138799, + "loss": 0.6241, + "step": 19920 + }, + { + "epoch": 0.84, + "learning_rate": 0.0007176153829845873, + "loss": 0.614, + "step": 19930 + }, + { + "epoch": 0.84, + "learning_rate": 0.0007175729822552949, + "loss": 0.6061, + "step": 19940 + }, + { + "epoch": 0.84, + "learning_rate": 0.0007175305815260023, + "loss": 0.5588, + "step": 19950 + }, + { + "epoch": 0.84, + "learning_rate": 0.0007174881807967097, + "loss": 0.6382, + "step": 19960 + }, + { + "epoch": 0.84, + "learning_rate": 0.0007174457800674172, + "loss": 0.6457, + "step": 19970 + }, + { + "epoch": 0.84, + "learning_rate": 0.0007174033793381247, + "loss": 0.5767, + "step": 19980 + }, + { + "epoch": 0.85, + "learning_rate": 0.0007173609786088321, + "loss": 0.5303, + "step": 19990 + }, + { + "epoch": 0.85, + "learning_rate": 0.0007173185778795396, + "loss": 0.5425, + "step": 20000 + }, + { + "epoch": 0.85, + "eval_loss": 0.6291659474372864, + "eval_runtime": 337.8327, + "eval_samples_per_second": 15.555, + "eval_steps_per_second": 3.889, + "step": 20000 + }, + { + "epoch": 0.85, + "learning_rate": 0.0007172761771502469, + "loss": 0.5866, + "step": 20010 + }, + { + "epoch": 0.85, + "learning_rate": 0.0007172337764209545, + "loss": 0.6541, + "step": 20020 + }, + { + "epoch": 0.85, + "learning_rate": 0.000717191375691662, + "loss": 0.6159, + "step": 20030 + }, + { + "epoch": 0.85, + "learning_rate": 0.0007171489749623693, + "loss": 0.6534, + "step": 20040 + }, + { + "epoch": 0.85, + "learning_rate": 0.0007171065742330768, + "loss": 0.6131, + "step": 20050 + }, + { + "epoch": 0.85, + "learning_rate": 0.0007170641735037844, + "loss": 0.6717, + "step": 20060 + }, + { + "epoch": 0.85, + "learning_rate": 0.0007170217727744917, + "loss": 0.5898, + "step": 20070 + }, + { + "epoch": 0.85, + "learning_rate": 0.0007169793720451992, + "loss": 0.6734, + "step": 20080 + }, + { + "epoch": 0.85, + "learning_rate": 0.0007169369713159067, + "loss": 0.5304, + "step": 20090 + }, + { + "epoch": 0.85, + "learning_rate": 0.0007168945705866141, + "loss": 0.6221, + "step": 20100 + }, + { + "epoch": 0.85, + "learning_rate": 0.0007168521698573216, + "loss": 0.646, + "step": 20110 + }, + { + "epoch": 0.85, + "learning_rate": 0.0007168097691280291, + "loss": 0.6391, + "step": 20120 + }, + { + "epoch": 0.85, + "learning_rate": 0.0007167673683987364, + "loss": 0.6085, + "step": 20130 + }, + { + "epoch": 0.85, + "learning_rate": 0.000716724967669444, + "loss": 0.6449, + "step": 20140 + }, + { + "epoch": 0.85, + "learning_rate": 0.0007166825669401515, + "loss": 0.582, + "step": 20150 + }, + { + "epoch": 0.85, + "learning_rate": 0.0007166401662108588, + "loss": 0.6431, + "step": 20160 + }, + { + "epoch": 0.85, + "learning_rate": 0.0007165977654815663, + "loss": 0.6114, + "step": 20170 + }, + { + "epoch": 0.85, + "learning_rate": 0.0007165553647522738, + "loss": 0.5759, + "step": 20180 + }, + { + "epoch": 0.85, + "learning_rate": 0.0007165129640229812, + "loss": 0.4313, + "step": 20190 + }, + { + "epoch": 0.85, + "learning_rate": 0.0007164705632936887, + "loss": 0.6361, + "step": 20200 + }, + { + "epoch": 0.85, + "learning_rate": 0.0007164281625643962, + "loss": 0.6138, + "step": 20210 + }, + { + "epoch": 0.86, + "learning_rate": 0.0007163857618351036, + "loss": 0.607, + "step": 20220 + }, + { + "epoch": 0.86, + "learning_rate": 0.0007163433611058111, + "loss": 0.5974, + "step": 20230 + }, + { + "epoch": 0.86, + "learning_rate": 0.0007163009603765186, + "loss": 0.5623, + "step": 20240 + }, + { + "epoch": 0.86, + "learning_rate": 0.0007162585596472259, + "loss": 0.5691, + "step": 20250 + }, + { + "epoch": 0.86, + "learning_rate": 0.0007162161589179334, + "loss": 0.5758, + "step": 20260 + }, + { + "epoch": 0.86, + "learning_rate": 0.000716173758188641, + "loss": 0.5985, + "step": 20270 + }, + { + "epoch": 0.86, + "learning_rate": 0.0007161313574593483, + "loss": 0.6141, + "step": 20280 + }, + { + "epoch": 0.86, + "learning_rate": 0.0007160889567300558, + "loss": 0.592, + "step": 20290 + }, + { + "epoch": 0.86, + "learning_rate": 0.0007160465560007632, + "loss": 0.5901, + "step": 20300 + }, + { + "epoch": 0.86, + "learning_rate": 0.0007160041552714707, + "loss": 0.5923, + "step": 20310 + }, + { + "epoch": 0.86, + "learning_rate": 0.0007159617545421782, + "loss": 0.6363, + "step": 20320 + }, + { + "epoch": 0.86, + "learning_rate": 0.0007159193538128855, + "loss": 0.6146, + "step": 20330 + }, + { + "epoch": 0.86, + "learning_rate": 0.0007158769530835931, + "loss": 0.5526, + "step": 20340 + }, + { + "epoch": 0.86, + "learning_rate": 0.0007158345523543006, + "loss": 0.6905, + "step": 20350 + }, + { + "epoch": 0.86, + "learning_rate": 0.0007157921516250079, + "loss": 0.6717, + "step": 20360 + }, + { + "epoch": 0.86, + "learning_rate": 0.0007157497508957154, + "loss": 0.5869, + "step": 20370 + }, + { + "epoch": 0.86, + "learning_rate": 0.0007157073501664229, + "loss": 0.6797, + "step": 20380 + }, + { + "epoch": 0.86, + "learning_rate": 0.0007156649494371303, + "loss": 0.6152, + "step": 20390 + }, + { + "epoch": 0.86, + "learning_rate": 0.0007156225487078378, + "loss": 0.6495, + "step": 20400 + }, + { + "epoch": 0.86, + "learning_rate": 0.0007155801479785453, + "loss": 0.6464, + "step": 20410 + }, + { + "epoch": 0.86, + "learning_rate": 0.0007155377472492527, + "loss": 0.5877, + "step": 20420 + }, + { + "epoch": 0.86, + "learning_rate": 0.0007154953465199602, + "loss": 0.5404, + "step": 20430 + }, + { + "epoch": 0.86, + "learning_rate": 0.0007154529457906677, + "loss": 0.6085, + "step": 20440 + }, + { + "epoch": 0.86, + "learning_rate": 0.000715410545061375, + "loss": 0.6544, + "step": 20450 + }, + { + "epoch": 0.87, + "learning_rate": 0.0007153681443320825, + "loss": 0.7411, + "step": 20460 + }, + { + "epoch": 0.87, + "learning_rate": 0.0007153257436027901, + "loss": 0.6482, + "step": 20470 + }, + { + "epoch": 0.87, + "learning_rate": 0.0007152833428734974, + "loss": 0.5788, + "step": 20480 + }, + { + "epoch": 0.87, + "learning_rate": 0.0007152409421442049, + "loss": 0.5449, + "step": 20490 + }, + { + "epoch": 0.87, + "learning_rate": 0.0007151985414149124, + "loss": 0.6632, + "step": 20500 + }, + { + "epoch": 0.87, + "learning_rate": 0.0007151561406856198, + "loss": 0.5706, + "step": 20510 + }, + { + "epoch": 0.87, + "learning_rate": 0.0007151137399563273, + "loss": 0.6137, + "step": 20520 + }, + { + "epoch": 0.87, + "learning_rate": 0.0007150713392270348, + "loss": 0.6483, + "step": 20530 + }, + { + "epoch": 0.87, + "learning_rate": 0.0007150289384977421, + "loss": 0.501, + "step": 20540 + }, + { + "epoch": 0.87, + "learning_rate": 0.0007149865377684497, + "loss": 0.6377, + "step": 20550 + }, + { + "epoch": 0.87, + "learning_rate": 0.0007149441370391572, + "loss": 0.6234, + "step": 20560 + }, + { + "epoch": 0.87, + "learning_rate": 0.0007149017363098645, + "loss": 0.6803, + "step": 20570 + }, + { + "epoch": 0.87, + "learning_rate": 0.000714859335580572, + "loss": 0.594, + "step": 20580 + }, + { + "epoch": 0.87, + "learning_rate": 0.0007148169348512796, + "loss": 0.5463, + "step": 20590 + }, + { + "epoch": 0.87, + "learning_rate": 0.0007147745341219869, + "loss": 0.593, + "step": 20600 + }, + { + "epoch": 0.87, + "learning_rate": 0.0007147321333926944, + "loss": 0.4335, + "step": 20610 + }, + { + "epoch": 0.87, + "learning_rate": 0.0007146897326634018, + "loss": 0.6657, + "step": 20620 + }, + { + "epoch": 0.87, + "learning_rate": 0.0007146473319341093, + "loss": 0.6455, + "step": 20630 + }, + { + "epoch": 0.87, + "learning_rate": 0.0007146049312048168, + "loss": 0.5391, + "step": 20640 + }, + { + "epoch": 0.87, + "learning_rate": 0.0007145625304755242, + "loss": 0.6609, + "step": 20650 + }, + { + "epoch": 0.87, + "learning_rate": 0.0007145201297462316, + "loss": 0.6325, + "step": 20660 + }, + { + "epoch": 0.87, + "learning_rate": 0.0007144777290169392, + "loss": 0.5776, + "step": 20670 + }, + { + "epoch": 0.87, + "learning_rate": 0.0007144353282876466, + "loss": 0.6096, + "step": 20680 + }, + { + "epoch": 0.87, + "learning_rate": 0.000714392927558354, + "loss": 0.7119, + "step": 20690 + }, + { + "epoch": 0.88, + "learning_rate": 0.0007143505268290615, + "loss": 0.6445, + "step": 20700 + }, + { + "epoch": 0.88, + "learning_rate": 0.000714308126099769, + "loss": 0.6244, + "step": 20710 + }, + { + "epoch": 0.88, + "learning_rate": 0.0007142657253704764, + "loss": 0.6829, + "step": 20720 + }, + { + "epoch": 0.88, + "learning_rate": 0.0007142233246411839, + "loss": 0.5619, + "step": 20730 + }, + { + "epoch": 0.88, + "learning_rate": 0.0007141809239118912, + "loss": 0.5975, + "step": 20740 + }, + { + "epoch": 0.88, + "learning_rate": 0.0007141385231825988, + "loss": 0.5039, + "step": 20750 + }, + { + "epoch": 0.88, + "learning_rate": 0.0007140961224533063, + "loss": 0.6294, + "step": 20760 + }, + { + "epoch": 0.88, + "learning_rate": 0.0007140537217240136, + "loss": 0.6314, + "step": 20770 + }, + { + "epoch": 0.88, + "learning_rate": 0.0007140113209947211, + "loss": 0.5863, + "step": 20780 + }, + { + "epoch": 0.88, + "learning_rate": 0.0007139689202654286, + "loss": 0.6469, + "step": 20790 + }, + { + "epoch": 0.88, + "learning_rate": 0.000713926519536136, + "loss": 0.6072, + "step": 20800 + }, + { + "epoch": 0.88, + "learning_rate": 0.0007138841188068435, + "loss": 0.6536, + "step": 20810 + }, + { + "epoch": 0.88, + "learning_rate": 0.000713841718077551, + "loss": 0.6049, + "step": 20820 + }, + { + "epoch": 0.88, + "learning_rate": 0.0007137993173482584, + "loss": 0.5867, + "step": 20830 + }, + { + "epoch": 0.88, + "learning_rate": 0.0007137569166189659, + "loss": 0.5829, + "step": 20840 + }, + { + "epoch": 0.88, + "learning_rate": 0.0007137145158896734, + "loss": 0.6715, + "step": 20850 + }, + { + "epoch": 0.88, + "learning_rate": 0.0007136721151603807, + "loss": 0.6044, + "step": 20860 + }, + { + "epoch": 0.88, + "learning_rate": 0.0007136297144310883, + "loss": 0.6245, + "step": 20870 + }, + { + "epoch": 0.88, + "learning_rate": 0.0007135873137017958, + "loss": 0.5775, + "step": 20880 + }, + { + "epoch": 0.88, + "learning_rate": 0.0007135449129725031, + "loss": 0.6605, + "step": 20890 + }, + { + "epoch": 0.88, + "learning_rate": 0.0007135025122432106, + "loss": 0.6844, + "step": 20900 + }, + { + "epoch": 0.88, + "learning_rate": 0.0007134601115139181, + "loss": 0.6052, + "step": 20910 + }, + { + "epoch": 0.88, + "learning_rate": 0.0007134177107846255, + "loss": 0.6249, + "step": 20920 + }, + { + "epoch": 0.89, + "learning_rate": 0.000713375310055333, + "loss": 0.6222, + "step": 20930 + }, + { + "epoch": 0.89, + "learning_rate": 0.0007133329093260404, + "loss": 0.6625, + "step": 20940 + }, + { + "epoch": 0.89, + "learning_rate": 0.0007132905085967479, + "loss": 0.691, + "step": 20950 + }, + { + "epoch": 0.89, + "learning_rate": 0.0007132481078674554, + "loss": 0.5479, + "step": 20960 + }, + { + "epoch": 0.89, + "learning_rate": 0.0007132057071381628, + "loss": 0.5582, + "step": 20970 + }, + { + "epoch": 0.89, + "learning_rate": 0.0007131633064088702, + "loss": 0.6459, + "step": 20980 + }, + { + "epoch": 0.89, + "learning_rate": 0.0007131209056795777, + "loss": 0.6081, + "step": 20990 + }, + { + "epoch": 0.89, + "learning_rate": 0.0007130785049502852, + "loss": 0.6229, + "step": 21000 + }, + { + "epoch": 0.89, + "learning_rate": 0.0007130361042209926, + "loss": 0.6742, + "step": 21010 + }, + { + "epoch": 0.89, + "learning_rate": 0.0007129937034917001, + "loss": 0.661, + "step": 21020 + }, + { + "epoch": 0.89, + "learning_rate": 0.0007129513027624076, + "loss": 0.5608, + "step": 21030 + }, + { + "epoch": 0.89, + "learning_rate": 0.000712908902033115, + "loss": 0.6013, + "step": 21040 + }, + { + "epoch": 0.89, + "learning_rate": 0.0007128665013038225, + "loss": 0.5647, + "step": 21050 + }, + { + "epoch": 0.89, + "learning_rate": 0.0007128241005745299, + "loss": 0.6708, + "step": 21060 + }, + { + "epoch": 0.89, + "learning_rate": 0.0007127816998452373, + "loss": 0.6044, + "step": 21070 + }, + { + "epoch": 0.89, + "learning_rate": 0.0007127392991159449, + "loss": 0.5185, + "step": 21080 + }, + { + "epoch": 0.89, + "learning_rate": 0.0007126968983866523, + "loss": 0.5722, + "step": 21090 + }, + { + "epoch": 0.89, + "learning_rate": 0.0007126544976573597, + "loss": 0.6297, + "step": 21100 + }, + { + "epoch": 0.89, + "learning_rate": 0.0007126120969280672, + "loss": 0.6384, + "step": 21110 + }, + { + "epoch": 0.89, + "learning_rate": 0.0007125696961987747, + "loss": 0.5636, + "step": 21120 + }, + { + "epoch": 0.89, + "learning_rate": 0.0007125272954694821, + "loss": 0.5653, + "step": 21130 + }, + { + "epoch": 0.89, + "learning_rate": 0.0007124848947401896, + "loss": 0.5446, + "step": 21140 + }, + { + "epoch": 0.89, + "learning_rate": 0.000712442494010897, + "loss": 0.6408, + "step": 21150 + }, + { + "epoch": 0.89, + "learning_rate": 0.0007124000932816045, + "loss": 0.6568, + "step": 21160 + }, + { + "epoch": 0.9, + "learning_rate": 0.000712357692552312, + "loss": 0.5491, + "step": 21170 + }, + { + "epoch": 0.9, + "learning_rate": 0.0007123152918230193, + "loss": 0.6168, + "step": 21180 + }, + { + "epoch": 0.9, + "learning_rate": 0.0007122728910937268, + "loss": 0.685, + "step": 21190 + }, + { + "epoch": 0.9, + "learning_rate": 0.0007122304903644344, + "loss": 0.5489, + "step": 21200 + }, + { + "epoch": 0.9, + "learning_rate": 0.0007121880896351417, + "loss": 0.5288, + "step": 21210 + }, + { + "epoch": 0.9, + "learning_rate": 0.0007121456889058492, + "loss": 0.6228, + "step": 21220 + }, + { + "epoch": 0.9, + "learning_rate": 0.0007121032881765566, + "loss": 0.6075, + "step": 21230 + }, + { + "epoch": 0.9, + "learning_rate": 0.0007120608874472641, + "loss": 0.6071, + "step": 21240 + }, + { + "epoch": 0.9, + "learning_rate": 0.0007120184867179716, + "loss": 0.6104, + "step": 21250 + }, + { + "epoch": 0.9, + "learning_rate": 0.000711976085988679, + "loss": 0.6406, + "step": 21260 + }, + { + "epoch": 0.9, + "learning_rate": 0.0007119336852593864, + "loss": 0.6664, + "step": 21270 + }, + { + "epoch": 0.9, + "learning_rate": 0.000711891284530094, + "loss": 0.6303, + "step": 21280 + }, + { + "epoch": 0.9, + "learning_rate": 0.0007118488838008014, + "loss": 0.5787, + "step": 21290 + }, + { + "epoch": 0.9, + "learning_rate": 0.0007118064830715088, + "loss": 0.5408, + "step": 21300 + }, + { + "epoch": 0.9, + "learning_rate": 0.0007117640823422163, + "loss": 0.6108, + "step": 21310 + }, + { + "epoch": 0.9, + "learning_rate": 0.0007117216816129238, + "loss": 0.5309, + "step": 21320 + }, + { + "epoch": 0.9, + "learning_rate": 0.0007116792808836312, + "loss": 0.6953, + "step": 21330 + }, + { + "epoch": 0.9, + "learning_rate": 0.0007116368801543387, + "loss": 0.6161, + "step": 21340 + }, + { + "epoch": 0.9, + "learning_rate": 0.0007115944794250461, + "loss": 0.6533, + "step": 21350 + }, + { + "epoch": 0.9, + "learning_rate": 0.0007115520786957536, + "loss": 0.715, + "step": 21360 + }, + { + "epoch": 0.9, + "learning_rate": 0.0007115096779664611, + "loss": 0.5497, + "step": 21370 + }, + { + "epoch": 0.9, + "learning_rate": 0.0007114672772371685, + "loss": 0.6655, + "step": 21380 + }, + { + "epoch": 0.9, + "learning_rate": 0.0007114248765078759, + "loss": 0.6049, + "step": 21390 + }, + { + "epoch": 0.9, + "learning_rate": 0.0007113824757785835, + "loss": 0.5993, + "step": 21400 + }, + { + "epoch": 0.91, + "learning_rate": 0.0007113400750492909, + "loss": 0.6398, + "step": 21410 + }, + { + "epoch": 0.91, + "learning_rate": 0.0007112976743199983, + "loss": 0.6148, + "step": 21420 + }, + { + "epoch": 0.91, + "learning_rate": 0.0007112552735907058, + "loss": 0.6681, + "step": 21430 + }, + { + "epoch": 0.91, + "learning_rate": 0.0007112128728614133, + "loss": 0.6382, + "step": 21440 + }, + { + "epoch": 0.91, + "learning_rate": 0.0007111704721321207, + "loss": 0.7087, + "step": 21450 + }, + { + "epoch": 0.91, + "learning_rate": 0.0007111280714028282, + "loss": 0.6605, + "step": 21460 + }, + { + "epoch": 0.91, + "learning_rate": 0.0007110856706735356, + "loss": 0.6312, + "step": 21470 + }, + { + "epoch": 0.91, + "learning_rate": 0.0007110432699442431, + "loss": 0.6083, + "step": 21480 + }, + { + "epoch": 0.91, + "learning_rate": 0.0007110008692149506, + "loss": 0.7642, + "step": 21490 + }, + { + "epoch": 0.91, + "learning_rate": 0.000710958468485658, + "loss": 0.5539, + "step": 21500 + }, + { + "epoch": 0.91, + "learning_rate": 0.0007109160677563654, + "loss": 0.5903, + "step": 21510 + }, + { + "epoch": 0.91, + "learning_rate": 0.0007108736670270729, + "loss": 0.5887, + "step": 21520 + }, + { + "epoch": 0.91, + "learning_rate": 0.0007108312662977804, + "loss": 0.6, + "step": 21530 + }, + { + "epoch": 0.91, + "learning_rate": 0.0007107888655684878, + "loss": 0.6409, + "step": 21540 + }, + { + "epoch": 0.91, + "learning_rate": 0.0007107464648391952, + "loss": 0.5514, + "step": 21550 + }, + { + "epoch": 0.91, + "learning_rate": 0.0007107040641099028, + "loss": 0.6284, + "step": 21560 + }, + { + "epoch": 0.91, + "learning_rate": 0.0007106616633806102, + "loss": 0.5739, + "step": 21570 + }, + { + "epoch": 0.91, + "learning_rate": 0.0007106192626513176, + "loss": 0.6499, + "step": 21580 + }, + { + "epoch": 0.91, + "learning_rate": 0.000710576861922025, + "loss": 0.692, + "step": 21590 + }, + { + "epoch": 0.91, + "learning_rate": 0.0007105344611927325, + "loss": 0.6065, + "step": 21600 + }, + { + "epoch": 0.91, + "learning_rate": 0.00071049206046344, + "loss": 0.6687, + "step": 21610 + }, + { + "epoch": 0.91, + "learning_rate": 0.0007104496597341475, + "loss": 0.6152, + "step": 21620 + }, + { + "epoch": 0.91, + "learning_rate": 0.0007104072590048549, + "loss": 0.6808, + "step": 21630 + }, + { + "epoch": 0.92, + "learning_rate": 0.0007103648582755624, + "loss": 0.707, + "step": 21640 + }, + { + "epoch": 0.92, + "learning_rate": 0.0007103224575462699, + "loss": 0.567, + "step": 21650 + }, + { + "epoch": 0.92, + "learning_rate": 0.0007102800568169773, + "loss": 0.5303, + "step": 21660 + }, + { + "epoch": 0.92, + "learning_rate": 0.0007102376560876847, + "loss": 0.5269, + "step": 21670 + }, + { + "epoch": 0.92, + "learning_rate": 0.0007101952553583921, + "loss": 0.5847, + "step": 21680 + }, + { + "epoch": 0.92, + "learning_rate": 0.0007101528546290997, + "loss": 0.6181, + "step": 21690 + }, + { + "epoch": 0.92, + "learning_rate": 0.0007101104538998071, + "loss": 0.654, + "step": 21700 + }, + { + "epoch": 0.92, + "learning_rate": 0.0007100680531705145, + "loss": 0.6816, + "step": 21710 + }, + { + "epoch": 0.92, + "learning_rate": 0.000710025652441222, + "loss": 0.651, + "step": 21720 + }, + { + "epoch": 0.92, + "learning_rate": 0.0007099832517119295, + "loss": 0.5812, + "step": 21730 + }, + { + "epoch": 0.92, + "learning_rate": 0.000709940850982637, + "loss": 0.6924, + "step": 21740 + }, + { + "epoch": 0.92, + "learning_rate": 0.0007098984502533444, + "loss": 0.6462, + "step": 21750 + }, + { + "epoch": 0.92, + "learning_rate": 0.0007098560495240518, + "loss": 0.5935, + "step": 21760 + }, + { + "epoch": 0.92, + "learning_rate": 0.0007098136487947593, + "loss": 0.6244, + "step": 21770 + }, + { + "epoch": 0.92, + "learning_rate": 0.0007097712480654668, + "loss": 0.6925, + "step": 21780 + }, + { + "epoch": 0.92, + "learning_rate": 0.0007097288473361742, + "loss": 0.5978, + "step": 21790 + }, + { + "epoch": 0.92, + "learning_rate": 0.0007096864466068816, + "loss": 0.6391, + "step": 21800 + }, + { + "epoch": 0.92, + "learning_rate": 0.0007096440458775892, + "loss": 0.562, + "step": 21810 + }, + { + "epoch": 0.92, + "learning_rate": 0.0007096016451482966, + "loss": 0.619, + "step": 21820 + }, + { + "epoch": 0.92, + "learning_rate": 0.000709559244419004, + "loss": 0.6623, + "step": 21830 + }, + { + "epoch": 0.92, + "learning_rate": 0.0007095168436897115, + "loss": 0.6397, + "step": 21840 + }, + { + "epoch": 0.92, + "learning_rate": 0.000709474442960419, + "loss": 0.6841, + "step": 21850 + }, + { + "epoch": 0.92, + "learning_rate": 0.0007094320422311264, + "loss": 0.5645, + "step": 21860 + }, + { + "epoch": 0.92, + "learning_rate": 0.0007093896415018338, + "loss": 0.5963, + "step": 21870 + }, + { + "epoch": 0.93, + "learning_rate": 0.0007093472407725413, + "loss": 0.5978, + "step": 21880 + }, + { + "epoch": 0.93, + "learning_rate": 0.0007093048400432488, + "loss": 0.708, + "step": 21890 + }, + { + "epoch": 0.93, + "learning_rate": 0.0007092624393139562, + "loss": 0.5719, + "step": 21900 + }, + { + "epoch": 0.93, + "learning_rate": 0.0007092200385846637, + "loss": 0.6186, + "step": 21910 + }, + { + "epoch": 0.93, + "learning_rate": 0.0007091776378553711, + "loss": 0.5051, + "step": 21920 + }, + { + "epoch": 0.93, + "learning_rate": 0.0007091352371260786, + "loss": 0.6176, + "step": 21930 + }, + { + "epoch": 0.93, + "learning_rate": 0.0007090928363967861, + "loss": 0.6242, + "step": 21940 + }, + { + "epoch": 0.93, + "learning_rate": 0.0007090504356674935, + "loss": 0.668, + "step": 21950 + }, + { + "epoch": 0.93, + "learning_rate": 0.0007090080349382009, + "loss": 0.715, + "step": 21960 + }, + { + "epoch": 0.93, + "learning_rate": 0.0007089656342089085, + "loss": 0.584, + "step": 21970 + }, + { + "epoch": 0.93, + "learning_rate": 0.0007089232334796159, + "loss": 0.6717, + "step": 21980 + }, + { + "epoch": 0.93, + "learning_rate": 0.0007088808327503233, + "loss": 0.6739, + "step": 21990 + }, + { + "epoch": 0.93, + "learning_rate": 0.0007088384320210308, + "loss": 0.578, + "step": 22000 + }, + { + "epoch": 0.93, + "learning_rate": 0.0007087960312917383, + "loss": 0.597, + "step": 22010 + }, + { + "epoch": 0.93, + "learning_rate": 0.0007087536305624457, + "loss": 0.6072, + "step": 22020 + }, + { + "epoch": 0.93, + "learning_rate": 0.0007087112298331532, + "loss": 0.5684, + "step": 22030 + }, + { + "epoch": 0.93, + "learning_rate": 0.0007086688291038606, + "loss": 0.5653, + "step": 22040 + }, + { + "epoch": 0.93, + "learning_rate": 0.0007086264283745681, + "loss": 0.6884, + "step": 22050 + }, + { + "epoch": 0.93, + "learning_rate": 0.0007085840276452756, + "loss": 0.7782, + "step": 22060 + }, + { + "epoch": 0.93, + "learning_rate": 0.000708541626915983, + "loss": 0.6606, + "step": 22070 + }, + { + "epoch": 0.93, + "learning_rate": 0.0007084992261866904, + "loss": 0.6001, + "step": 22080 + }, + { + "epoch": 0.93, + "learning_rate": 0.000708456825457398, + "loss": 0.5788, + "step": 22090 + }, + { + "epoch": 0.93, + "learning_rate": 0.0007084144247281054, + "loss": 0.773, + "step": 22100 + }, + { + "epoch": 0.94, + "learning_rate": 0.0007083720239988128, + "loss": 0.6497, + "step": 22110 + }, + { + "epoch": 0.94, + "learning_rate": 0.0007083296232695203, + "loss": 0.6063, + "step": 22120 + }, + { + "epoch": 0.94, + "learning_rate": 0.0007082872225402277, + "loss": 0.6154, + "step": 22130 + }, + { + "epoch": 0.94, + "learning_rate": 0.0007082448218109352, + "loss": 0.5729, + "step": 22140 + }, + { + "epoch": 0.94, + "learning_rate": 0.0007082024210816427, + "loss": 0.6894, + "step": 22150 + }, + { + "epoch": 0.94, + "learning_rate": 0.00070816002035235, + "loss": 0.6178, + "step": 22160 + }, + { + "epoch": 0.94, + "learning_rate": 0.0007081176196230576, + "loss": 0.629, + "step": 22170 + }, + { + "epoch": 0.94, + "learning_rate": 0.000708075218893765, + "loss": 0.5572, + "step": 22180 + }, + { + "epoch": 0.94, + "learning_rate": 0.0007080328181644724, + "loss": 0.6512, + "step": 22190 + }, + { + "epoch": 0.94, + "learning_rate": 0.0007079904174351799, + "loss": 0.656, + "step": 22200 + }, + { + "epoch": 0.94, + "learning_rate": 0.0007079480167058873, + "loss": 0.6024, + "step": 22210 + }, + { + "epoch": 0.94, + "learning_rate": 0.0007079056159765948, + "loss": 0.6325, + "step": 22220 + }, + { + "epoch": 0.94, + "learning_rate": 0.0007078632152473023, + "loss": 0.6019, + "step": 22230 + }, + { + "epoch": 0.94, + "learning_rate": 0.0007078208145180097, + "loss": 0.6477, + "step": 22240 + }, + { + "epoch": 0.94, + "learning_rate": 0.0007077784137887172, + "loss": 0.5758, + "step": 22250 + }, + { + "epoch": 0.94, + "learning_rate": 0.0007077360130594247, + "loss": 0.6005, + "step": 22260 + }, + { + "epoch": 0.94, + "learning_rate": 0.0007076936123301321, + "loss": 0.6484, + "step": 22270 + }, + { + "epoch": 0.94, + "learning_rate": 0.0007076512116008395, + "loss": 0.6001, + "step": 22280 + }, + { + "epoch": 0.94, + "learning_rate": 0.000707608810871547, + "loss": 0.6617, + "step": 22290 + }, + { + "epoch": 0.94, + "learning_rate": 0.0007075664101422545, + "loss": 0.6509, + "step": 22300 + }, + { + "epoch": 0.94, + "learning_rate": 0.0007075240094129619, + "loss": 0.6199, + "step": 22310 + }, + { + "epoch": 0.94, + "learning_rate": 0.0007074816086836694, + "loss": 0.5061, + "step": 22320 + }, + { + "epoch": 0.94, + "learning_rate": 0.0007074392079543768, + "loss": 0.5844, + "step": 22330 + }, + { + "epoch": 0.94, + "learning_rate": 0.0007073968072250843, + "loss": 0.6367, + "step": 22340 + }, + { + "epoch": 0.95, + "learning_rate": 0.0007073544064957918, + "loss": 0.5865, + "step": 22350 + }, + { + "epoch": 0.95, + "learning_rate": 0.0007073120057664992, + "loss": 0.6757, + "step": 22360 + }, + { + "epoch": 0.95, + "learning_rate": 0.0007072696050372067, + "loss": 0.6322, + "step": 22370 + }, + { + "epoch": 0.95, + "learning_rate": 0.0007072272043079142, + "loss": 0.5538, + "step": 22380 + }, + { + "epoch": 0.95, + "learning_rate": 0.0007071848035786216, + "loss": 0.5663, + "step": 22390 + }, + { + "epoch": 0.95, + "learning_rate": 0.000707142402849329, + "loss": 0.5554, + "step": 22400 + }, + { + "epoch": 0.95, + "learning_rate": 0.0007071000021200365, + "loss": 0.5334, + "step": 22410 + }, + { + "epoch": 0.95, + "learning_rate": 0.000707057601390744, + "loss": 0.6138, + "step": 22420 + }, + { + "epoch": 0.95, + "learning_rate": 0.0007070152006614514, + "loss": 0.6498, + "step": 22430 + }, + { + "epoch": 0.95, + "learning_rate": 0.0007069727999321589, + "loss": 0.637, + "step": 22440 + }, + { + "epoch": 0.95, + "learning_rate": 0.0007069303992028663, + "loss": 0.5299, + "step": 22450 + }, + { + "epoch": 0.95, + "learning_rate": 0.0007068879984735738, + "loss": 0.6525, + "step": 22460 + }, + { + "epoch": 0.95, + "learning_rate": 0.0007068455977442813, + "loss": 0.6105, + "step": 22470 + }, + { + "epoch": 0.95, + "learning_rate": 0.0007068031970149886, + "loss": 0.6262, + "step": 22480 + }, + { + "epoch": 0.95, + "learning_rate": 0.0007067607962856961, + "loss": 0.5891, + "step": 22490 + }, + { + "epoch": 0.95, + "learning_rate": 0.0007067183955564037, + "loss": 0.6563, + "step": 22500 + }, + { + "epoch": 0.95, + "learning_rate": 0.000706675994827111, + "loss": 0.5711, + "step": 22510 + }, + { + "epoch": 0.95, + "learning_rate": 0.0007066335940978185, + "loss": 0.6365, + "step": 22520 + }, + { + "epoch": 0.95, + "learning_rate": 0.000706591193368526, + "loss": 0.635, + "step": 22530 + }, + { + "epoch": 0.95, + "learning_rate": 0.0007065487926392334, + "loss": 0.6868, + "step": 22540 + }, + { + "epoch": 0.95, + "learning_rate": 0.0007065063919099409, + "loss": 0.597, + "step": 22550 + }, + { + "epoch": 0.95, + "learning_rate": 0.0007064639911806484, + "loss": 0.5961, + "step": 22560 + }, + { + "epoch": 0.95, + "learning_rate": 0.0007064215904513557, + "loss": 0.6529, + "step": 22570 + }, + { + "epoch": 0.95, + "learning_rate": 0.0007063791897220633, + "loss": 0.5869, + "step": 22580 + }, + { + "epoch": 0.96, + "learning_rate": 0.0007063367889927708, + "loss": 0.6421, + "step": 22590 + }, + { + "epoch": 0.96, + "learning_rate": 0.0007062943882634781, + "loss": 0.705, + "step": 22600 + }, + { + "epoch": 0.96, + "learning_rate": 0.0007062519875341856, + "loss": 0.6322, + "step": 22610 + }, + { + "epoch": 0.96, + "learning_rate": 0.0007062095868048932, + "loss": 0.5739, + "step": 22620 + }, + { + "epoch": 0.96, + "learning_rate": 0.0007061671860756005, + "loss": 0.6547, + "step": 22630 + }, + { + "epoch": 0.96, + "learning_rate": 0.000706124785346308, + "loss": 0.6634, + "step": 22640 + }, + { + "epoch": 0.96, + "learning_rate": 0.0007060823846170154, + "loss": 0.5673, + "step": 22650 + }, + { + "epoch": 0.96, + "learning_rate": 0.0007060399838877229, + "loss": 0.536, + "step": 22660 + }, + { + "epoch": 0.96, + "learning_rate": 0.0007059975831584304, + "loss": 0.5347, + "step": 22670 + }, + { + "epoch": 0.96, + "learning_rate": 0.0007059551824291378, + "loss": 0.5288, + "step": 22680 + }, + { + "epoch": 0.96, + "learning_rate": 0.0007059127816998452, + "loss": 0.5419, + "step": 22690 + }, + { + "epoch": 0.96, + "learning_rate": 0.0007058703809705528, + "loss": 0.6644, + "step": 22700 + }, + { + "epoch": 0.96, + "learning_rate": 0.0007058279802412602, + "loss": 0.6728, + "step": 22710 + }, + { + "epoch": 0.96, + "learning_rate": 0.0007057855795119676, + "loss": 0.6428, + "step": 22720 + }, + { + "epoch": 0.96, + "learning_rate": 0.0007057431787826751, + "loss": 0.651, + "step": 22730 + }, + { + "epoch": 0.96, + "learning_rate": 0.0007057007780533825, + "loss": 0.6546, + "step": 22740 + }, + { + "epoch": 0.96, + "learning_rate": 0.00070565837732409, + "loss": 0.6206, + "step": 22750 + }, + { + "epoch": 0.96, + "learning_rate": 0.0007056159765947975, + "loss": 0.5655, + "step": 22760 + }, + { + "epoch": 0.96, + "learning_rate": 0.0007055735758655048, + "loss": 0.6946, + "step": 22770 + }, + { + "epoch": 0.96, + "learning_rate": 0.0007055311751362124, + "loss": 0.6061, + "step": 22780 + }, + { + "epoch": 0.96, + "learning_rate": 0.0007054887744069199, + "loss": 0.6721, + "step": 22790 + }, + { + "epoch": 0.96, + "learning_rate": 0.0007054463736776272, + "loss": 0.5201, + "step": 22800 + }, + { + "epoch": 0.96, + "learning_rate": 0.0007054039729483347, + "loss": 0.649, + "step": 22810 + }, + { + "epoch": 0.97, + "learning_rate": 0.0007053615722190423, + "loss": 0.6368, + "step": 22820 + }, + { + "epoch": 0.97, + "learning_rate": 0.0007053191714897496, + "loss": 0.6428, + "step": 22830 + }, + { + "epoch": 0.97, + "learning_rate": 0.0007052767707604571, + "loss": 0.6092, + "step": 22840 + }, + { + "epoch": 0.97, + "learning_rate": 0.0007052343700311646, + "loss": 0.6464, + "step": 22850 + }, + { + "epoch": 0.97, + "learning_rate": 0.000705191969301872, + "loss": 0.6384, + "step": 22860 + }, + { + "epoch": 0.97, + "learning_rate": 0.0007051495685725795, + "loss": 0.5484, + "step": 22870 + }, + { + "epoch": 0.97, + "learning_rate": 0.000705107167843287, + "loss": 0.6678, + "step": 22880 + }, + { + "epoch": 0.97, + "learning_rate": 0.0007050647671139943, + "loss": 0.598, + "step": 22890 + }, + { + "epoch": 0.97, + "learning_rate": 0.0007050223663847019, + "loss": 0.5829, + "step": 22900 + }, + { + "epoch": 0.97, + "learning_rate": 0.0007049799656554094, + "loss": 0.5523, + "step": 22910 + }, + { + "epoch": 0.97, + "learning_rate": 0.0007049375649261167, + "loss": 0.7557, + "step": 22920 + }, + { + "epoch": 0.97, + "learning_rate": 0.0007048951641968242, + "loss": 0.6861, + "step": 22930 + }, + { + "epoch": 0.97, + "learning_rate": 0.0007048527634675317, + "loss": 0.6454, + "step": 22940 + }, + { + "epoch": 0.97, + "learning_rate": 0.0007048103627382391, + "loss": 0.6413, + "step": 22950 + }, + { + "epoch": 0.97, + "learning_rate": 0.0007047679620089466, + "loss": 0.709, + "step": 22960 + }, + { + "epoch": 0.97, + "learning_rate": 0.0007047255612796541, + "loss": 0.6126, + "step": 22970 + }, + { + "epoch": 0.97, + "learning_rate": 0.0007046831605503615, + "loss": 0.5864, + "step": 22980 + }, + { + "epoch": 0.97, + "learning_rate": 0.000704640759821069, + "loss": 0.5941, + "step": 22990 + }, + { + "epoch": 0.97, + "learning_rate": 0.0007045983590917765, + "loss": 0.666, + "step": 23000 + }, + { + "epoch": 0.97, + "learning_rate": 0.0007045559583624838, + "loss": 0.5371, + "step": 23010 + }, + { + "epoch": 0.97, + "learning_rate": 0.0007045135576331913, + "loss": 0.5663, + "step": 23020 + }, + { + "epoch": 0.97, + "learning_rate": 0.0007044711569038989, + "loss": 0.6226, + "step": 23030 + }, + { + "epoch": 0.97, + "learning_rate": 0.0007044287561746062, + "loss": 0.6017, + "step": 23040 + }, + { + "epoch": 0.97, + "learning_rate": 0.0007043863554453137, + "loss": 0.6189, + "step": 23050 + }, + { + "epoch": 0.98, + "learning_rate": 0.0007043439547160212, + "loss": 0.6393, + "step": 23060 + }, + { + "epoch": 0.98, + "learning_rate": 0.0007043015539867286, + "loss": 0.6345, + "step": 23070 + }, + { + "epoch": 0.98, + "learning_rate": 0.0007042591532574361, + "loss": 0.6487, + "step": 23080 + }, + { + "epoch": 0.98, + "learning_rate": 0.0007042167525281434, + "loss": 0.5666, + "step": 23090 + }, + { + "epoch": 0.98, + "learning_rate": 0.0007041743517988509, + "loss": 0.6711, + "step": 23100 + }, + { + "epoch": 0.98, + "learning_rate": 0.0007041319510695585, + "loss": 0.5355, + "step": 23110 + }, + { + "epoch": 0.98, + "learning_rate": 0.0007040895503402658, + "loss": 0.6456, + "step": 23120 + }, + { + "epoch": 0.98, + "learning_rate": 0.0007040471496109733, + "loss": 0.6645, + "step": 23130 + }, + { + "epoch": 0.98, + "learning_rate": 0.0007040047488816808, + "loss": 0.7074, + "step": 23140 + }, + { + "epoch": 0.98, + "learning_rate": 0.0007039623481523882, + "loss": 0.6012, + "step": 23150 + }, + { + "epoch": 0.98, + "learning_rate": 0.0007039199474230957, + "loss": 0.7067, + "step": 23160 + }, + { + "epoch": 0.98, + "learning_rate": 0.0007038775466938032, + "loss": 0.6977, + "step": 23170 + }, + { + "epoch": 0.98, + "learning_rate": 0.0007038351459645105, + "loss": 0.6459, + "step": 23180 + }, + { + "epoch": 0.98, + "learning_rate": 0.0007037927452352181, + "loss": 0.6519, + "step": 23190 + }, + { + "epoch": 0.98, + "learning_rate": 0.0007037503445059256, + "loss": 0.6257, + "step": 23200 + }, + { + "epoch": 0.98, + "learning_rate": 0.0007037079437766329, + "loss": 0.6431, + "step": 23210 + }, + { + "epoch": 0.98, + "learning_rate": 0.0007036655430473404, + "loss": 0.6272, + "step": 23220 + }, + { + "epoch": 0.98, + "learning_rate": 0.000703623142318048, + "loss": 0.653, + "step": 23230 + }, + { + "epoch": 0.98, + "learning_rate": 0.0007035807415887553, + "loss": 0.5919, + "step": 23240 + }, + { + "epoch": 0.98, + "learning_rate": 0.0007035383408594628, + "loss": 0.5866, + "step": 23250 + }, + { + "epoch": 0.98, + "learning_rate": 0.0007034959401301703, + "loss": 0.6435, + "step": 23260 + }, + { + "epoch": 0.98, + "learning_rate": 0.0007034535394008777, + "loss": 0.6517, + "step": 23270 + }, + { + "epoch": 0.98, + "learning_rate": 0.0007034111386715852, + "loss": 0.6048, + "step": 23280 + }, + { + "epoch": 0.98, + "learning_rate": 0.0007033687379422927, + "loss": 0.593, + "step": 23290 + }, + { + "epoch": 0.99, + "learning_rate": 0.000703326337213, + "loss": 0.6295, + "step": 23300 + }, + { + "epoch": 0.99, + "learning_rate": 0.0007032839364837076, + "loss": 0.6414, + "step": 23310 + }, + { + "epoch": 0.99, + "learning_rate": 0.0007032415357544151, + "loss": 0.6102, + "step": 23320 + }, + { + "epoch": 0.99, + "learning_rate": 0.0007031991350251224, + "loss": 0.6823, + "step": 23330 + }, + { + "epoch": 0.99, + "learning_rate": 0.0007031567342958299, + "loss": 0.5635, + "step": 23340 + }, + { + "epoch": 0.99, + "learning_rate": 0.0007031143335665375, + "loss": 0.691, + "step": 23350 + }, + { + "epoch": 0.99, + "learning_rate": 0.0007030719328372448, + "loss": 0.6135, + "step": 23360 + }, + { + "epoch": 0.99, + "learning_rate": 0.0007030295321079523, + "loss": 0.6744, + "step": 23370 + }, + { + "epoch": 0.99, + "learning_rate": 0.0007029871313786597, + "loss": 0.5247, + "step": 23380 + }, + { + "epoch": 0.99, + "learning_rate": 0.0007029447306493672, + "loss": 0.6435, + "step": 23390 + }, + { + "epoch": 0.99, + "learning_rate": 0.0007029023299200747, + "loss": 0.6787, + "step": 23400 + }, + { + "epoch": 0.99, + "learning_rate": 0.0007028599291907821, + "loss": 0.6846, + "step": 23410 + }, + { + "epoch": 0.99, + "learning_rate": 0.0007028175284614895, + "loss": 0.7162, + "step": 23420 + }, + { + "epoch": 0.99, + "learning_rate": 0.0007027751277321971, + "loss": 0.6054, + "step": 23430 + }, + { + "epoch": 0.99, + "learning_rate": 0.0007027327270029045, + "loss": 0.652, + "step": 23440 + }, + { + "epoch": 0.99, + "learning_rate": 0.0007026903262736119, + "loss": 0.5605, + "step": 23450 + }, + { + "epoch": 0.99, + "learning_rate": 0.0007026479255443194, + "loss": 0.6684, + "step": 23460 + }, + { + "epoch": 0.99, + "learning_rate": 0.0007026055248150269, + "loss": 0.637, + "step": 23470 + }, + { + "epoch": 0.99, + "learning_rate": 0.0007025631240857343, + "loss": 0.5821, + "step": 23480 + }, + { + "epoch": 0.99, + "learning_rate": 0.0007025207233564418, + "loss": 0.5681, + "step": 23490 + }, + { + "epoch": 0.99, + "learning_rate": 0.0007024783226271491, + "loss": 0.5773, + "step": 23500 + }, + { + "epoch": 0.99, + "learning_rate": 0.0007024359218978567, + "loss": 0.5677, + "step": 23510 + }, + { + "epoch": 0.99, + "learning_rate": 0.0007023935211685642, + "loss": 0.6691, + "step": 23520 + }, + { + "epoch": 1.0, + "learning_rate": 0.0007023511204392716, + "loss": 0.6414, + "step": 23530 + }, + { + "epoch": 1.0, + "learning_rate": 0.000702308719709979, + "loss": 0.6296, + "step": 23540 + }, + { + "epoch": 1.0, + "learning_rate": 0.0007022663189806865, + "loss": 0.4861, + "step": 23550 + }, + { + "epoch": 1.0, + "learning_rate": 0.000702223918251394, + "loss": 0.5755, + "step": 23560 + }, + { + "epoch": 1.0, + "learning_rate": 0.0007021815175221014, + "loss": 0.6427, + "step": 23570 + }, + { + "epoch": 1.0, + "learning_rate": 0.0007021391167928089, + "loss": 0.527, + "step": 23580 + }, + { + "epoch": 1.0, + "learning_rate": 0.0007020967160635164, + "loss": 0.6524, + "step": 23590 + }, + { + "epoch": 1.0, + "learning_rate": 0.0007020543153342238, + "loss": 0.781, + "step": 23600 + }, + { + "epoch": 1.0, + "learning_rate": 0.0007020119146049313, + "loss": 0.5479, + "step": 23610 + }, + { + "epoch": 1.0, + "learning_rate": 0.0007019695138756386, + "loss": 0.5733, + "step": 23620 + }, + { + "epoch": 1.0, + "learning_rate": 0.0007019271131463461, + "loss": 0.6175, + "step": 23630 + }, + { + "epoch": 1.0, + "learning_rate": 0.0007018847124170537, + "loss": 0.5407, + "step": 23640 + }, + { + "epoch": 1.0, + "learning_rate": 0.000701842311687761, + "loss": 0.646, + "step": 23650 + }, + { + "epoch": 1.0, + "learning_rate": 0.0007017999109584685, + "loss": 0.5768, + "step": 23660 + }, + { + "epoch": 1.0, + "learning_rate": 0.000701757510229176, + "loss": 0.5123, + "step": 23670 + }, + { + "epoch": 1.0, + "learning_rate": 0.0007017151094998834, + "loss": 0.5601, + "step": 23680 + }, + { + "epoch": 1.0, + "learning_rate": 0.0007016727087705909, + "loss": 0.6888, + "step": 23690 + }, + { + "epoch": 1.0, + "learning_rate": 0.0007016303080412983, + "loss": 0.518, + "step": 23700 + }, + { + "epoch": 1.0, + "learning_rate": 0.0007015879073120057, + "loss": 0.4827, + "step": 23710 + }, + { + "epoch": 1.0, + "learning_rate": 0.0007015455065827133, + "loss": 0.6067, + "step": 23720 + }, + { + "epoch": 1.0, + "learning_rate": 0.0007015031058534207, + "loss": 0.5729, + "step": 23730 + }, + { + "epoch": 1.0, + "learning_rate": 0.0007014607051241281, + "loss": 0.5277, + "step": 23740 + }, + { + "epoch": 1.0, + "learning_rate": 0.0007014183043948356, + "loss": 0.6001, + "step": 23750 + }, + { + "epoch": 1.0, + "learning_rate": 0.0007013759036655431, + "loss": 0.5276, + "step": 23760 + }, + { + "epoch": 1.01, + "learning_rate": 0.0007013335029362505, + "loss": 0.6003, + "step": 23770 + }, + { + "epoch": 1.01, + "learning_rate": 0.000701291102206958, + "loss": 0.469, + "step": 23780 + }, + { + "epoch": 1.01, + "learning_rate": 0.0007012487014776654, + "loss": 0.6175, + "step": 23790 + }, + { + "epoch": 1.01, + "learning_rate": 0.0007012063007483729, + "loss": 0.6383, + "step": 23800 + }, + { + "epoch": 1.01, + "learning_rate": 0.0007011639000190804, + "loss": 0.6678, + "step": 23810 + }, + { + "epoch": 1.01, + "learning_rate": 0.0007011214992897878, + "loss": 0.5253, + "step": 23820 + }, + { + "epoch": 1.01, + "learning_rate": 0.0007010790985604952, + "loss": 0.5248, + "step": 23830 + }, + { + "epoch": 1.01, + "learning_rate": 0.0007010366978312028, + "loss": 0.5087, + "step": 23840 + }, + { + "epoch": 1.01, + "learning_rate": 0.0007009942971019102, + "loss": 0.5807, + "step": 23850 + }, + { + "epoch": 1.01, + "learning_rate": 0.0007009518963726176, + "loss": 0.5219, + "step": 23860 + }, + { + "epoch": 1.01, + "learning_rate": 0.0007009094956433251, + "loss": 0.5977, + "step": 23870 + }, + { + "epoch": 1.01, + "learning_rate": 0.0007008670949140326, + "loss": 0.5786, + "step": 23880 + }, + { + "epoch": 1.01, + "learning_rate": 0.00070082469418474, + "loss": 0.5504, + "step": 23890 + }, + { + "epoch": 1.01, + "learning_rate": 0.0007007822934554475, + "loss": 0.6085, + "step": 23900 + }, + { + "epoch": 1.01, + "learning_rate": 0.0007007398927261549, + "loss": 0.5364, + "step": 23910 + }, + { + "epoch": 1.01, + "learning_rate": 0.0007006974919968624, + "loss": 0.5677, + "step": 23920 + }, + { + "epoch": 1.01, + "learning_rate": 0.0007006550912675699, + "loss": 0.6672, + "step": 23930 + }, + { + "epoch": 1.01, + "learning_rate": 0.0007006126905382773, + "loss": 0.514, + "step": 23940 + }, + { + "epoch": 1.01, + "learning_rate": 0.0007005702898089847, + "loss": 0.4747, + "step": 23950 + }, + { + "epoch": 1.01, + "learning_rate": 0.0007005278890796923, + "loss": 0.5778, + "step": 23960 + }, + { + "epoch": 1.01, + "learning_rate": 0.0007004854883503997, + "loss": 0.5654, + "step": 23970 + }, + { + "epoch": 1.01, + "learning_rate": 0.0007004430876211071, + "loss": 0.5595, + "step": 23980 + }, + { + "epoch": 1.01, + "learning_rate": 0.0007004006868918146, + "loss": 0.618, + "step": 23990 + }, + { + "epoch": 1.01, + "learning_rate": 0.0007003582861625221, + "loss": 0.6047, + "step": 24000 + }, + { + "epoch": 1.02, + "learning_rate": 0.0007003158854332295, + "loss": 0.5487, + "step": 24010 + }, + { + "epoch": 1.02, + "learning_rate": 0.0007002734847039369, + "loss": 0.5975, + "step": 24020 + }, + { + "epoch": 1.02, + "learning_rate": 0.0007002310839746443, + "loss": 0.6538, + "step": 24030 + }, + { + "epoch": 1.02, + "learning_rate": 0.0007001886832453519, + "loss": 0.5883, + "step": 24040 + }, + { + "epoch": 1.02, + "learning_rate": 0.0007001462825160593, + "loss": 0.5701, + "step": 24050 + }, + { + "epoch": 1.02, + "learning_rate": 0.0007001038817867667, + "loss": 0.7498, + "step": 24060 + }, + { + "epoch": 1.02, + "learning_rate": 0.0007000614810574742, + "loss": 0.5149, + "step": 24070 + }, + { + "epoch": 1.02, + "learning_rate": 0.0007000190803281817, + "loss": 0.5168, + "step": 24080 + }, + { + "epoch": 1.02, + "learning_rate": 0.0006999766795988891, + "loss": 0.556, + "step": 24090 + }, + { + "epoch": 1.02, + "learning_rate": 0.0006999342788695966, + "loss": 0.5341, + "step": 24100 + }, + { + "epoch": 1.02, + "learning_rate": 0.000699891878140304, + "loss": 0.5227, + "step": 24110 + }, + { + "epoch": 1.02, + "learning_rate": 0.0006998494774110115, + "loss": 0.556, + "step": 24120 + }, + { + "epoch": 1.02, + "learning_rate": 0.000699807076681719, + "loss": 0.5245, + "step": 24130 + }, + { + "epoch": 1.02, + "learning_rate": 0.0006997646759524264, + "loss": 0.635, + "step": 24140 + }, + { + "epoch": 1.02, + "learning_rate": 0.0006997222752231338, + "loss": 0.535, + "step": 24150 + }, + { + "epoch": 1.02, + "learning_rate": 0.0006996798744938413, + "loss": 0.5778, + "step": 24160 + }, + { + "epoch": 1.02, + "learning_rate": 0.0006996374737645488, + "loss": 0.5199, + "step": 24170 + }, + { + "epoch": 1.02, + "learning_rate": 0.0006995950730352562, + "loss": 0.6748, + "step": 24180 + }, + { + "epoch": 1.02, + "learning_rate": 0.0006995526723059637, + "loss": 0.5759, + "step": 24190 + }, + { + "epoch": 1.02, + "learning_rate": 0.0006995102715766712, + "loss": 0.5207, + "step": 24200 + }, + { + "epoch": 1.02, + "learning_rate": 0.0006994678708473786, + "loss": 0.6965, + "step": 24210 + }, + { + "epoch": 1.02, + "learning_rate": 0.0006994254701180861, + "loss": 0.5454, + "step": 24220 + }, + { + "epoch": 1.02, + "learning_rate": 0.0006993830693887935, + "loss": 0.5924, + "step": 24230 + }, + { + "epoch": 1.03, + "learning_rate": 0.0006993406686595009, + "loss": 0.622, + "step": 24240 + }, + { + "epoch": 1.03, + "learning_rate": 0.0006992982679302085, + "loss": 0.6326, + "step": 24250 + }, + { + "epoch": 1.03, + "learning_rate": 0.0006992558672009159, + "loss": 0.6048, + "step": 24260 + }, + { + "epoch": 1.03, + "learning_rate": 0.0006992134664716233, + "loss": 0.5181, + "step": 24270 + }, + { + "epoch": 1.03, + "learning_rate": 0.0006991710657423308, + "loss": 0.6171, + "step": 24280 + }, + { + "epoch": 1.03, + "learning_rate": 0.0006991286650130383, + "loss": 0.5832, + "step": 24290 + }, + { + "epoch": 1.03, + "learning_rate": 0.0006990862642837457, + "loss": 0.5336, + "step": 24300 + }, + { + "epoch": 1.03, + "learning_rate": 0.0006990438635544531, + "loss": 0.5098, + "step": 24310 + }, + { + "epoch": 1.03, + "learning_rate": 0.0006990014628251606, + "loss": 0.5386, + "step": 24320 + }, + { + "epoch": 1.03, + "learning_rate": 0.0006989590620958681, + "loss": 0.5651, + "step": 24330 + }, + { + "epoch": 1.03, + "learning_rate": 0.0006989166613665755, + "loss": 0.5576, + "step": 24340 + }, + { + "epoch": 1.03, + "learning_rate": 0.000698874260637283, + "loss": 0.577, + "step": 24350 + }, + { + "epoch": 1.03, + "learning_rate": 0.0006988318599079904, + "loss": 0.5504, + "step": 24360 + }, + { + "epoch": 1.03, + "learning_rate": 0.0006987894591786979, + "loss": 0.5514, + "step": 24370 + }, + { + "epoch": 1.03, + "learning_rate": 0.0006987470584494054, + "loss": 0.5736, + "step": 24380 + }, + { + "epoch": 1.03, + "learning_rate": 0.0006987046577201128, + "loss": 0.5235, + "step": 24390 + }, + { + "epoch": 1.03, + "learning_rate": 0.0006986622569908203, + "loss": 0.5624, + "step": 24400 + }, + { + "epoch": 1.03, + "learning_rate": 0.0006986198562615278, + "loss": 0.5762, + "step": 24410 + }, + { + "epoch": 1.03, + "learning_rate": 0.0006985774555322352, + "loss": 0.552, + "step": 24420 + }, + { + "epoch": 1.03, + "learning_rate": 0.0006985350548029426, + "loss": 0.5433, + "step": 24430 + }, + { + "epoch": 1.03, + "learning_rate": 0.00069849265407365, + "loss": 0.6427, + "step": 24440 + }, + { + "epoch": 1.03, + "learning_rate": 0.0006984502533443576, + "loss": 0.5829, + "step": 24450 + }, + { + "epoch": 1.03, + "learning_rate": 0.000698407852615065, + "loss": 0.546, + "step": 24460 + }, + { + "epoch": 1.03, + "learning_rate": 0.0006983654518857725, + "loss": 0.6099, + "step": 24470 + }, + { + "epoch": 1.04, + "learning_rate": 0.0006983230511564799, + "loss": 0.5575, + "step": 24480 + }, + { + "epoch": 1.04, + "learning_rate": 0.0006982806504271874, + "loss": 0.5227, + "step": 24490 + }, + { + "epoch": 1.04, + "learning_rate": 0.0006982382496978949, + "loss": 0.605, + "step": 24500 + }, + { + "epoch": 1.04, + "learning_rate": 0.0006981958489686023, + "loss": 0.5974, + "step": 24510 + }, + { + "epoch": 1.04, + "learning_rate": 0.0006981534482393097, + "loss": 0.5444, + "step": 24520 + }, + { + "epoch": 1.04, + "learning_rate": 0.0006981110475100173, + "loss": 0.5775, + "step": 24530 + }, + { + "epoch": 1.04, + "learning_rate": 0.0006980686467807247, + "loss": 0.5505, + "step": 24540 + }, + { + "epoch": 1.04, + "learning_rate": 0.0006980262460514321, + "loss": 0.5253, + "step": 24550 + }, + { + "epoch": 1.04, + "learning_rate": 0.0006979838453221395, + "loss": 0.4977, + "step": 24560 + }, + { + "epoch": 1.04, + "learning_rate": 0.0006979414445928471, + "loss": 0.5343, + "step": 24570 + }, + { + "epoch": 1.04, + "learning_rate": 0.0006978990438635545, + "loss": 0.5587, + "step": 24580 + }, + { + "epoch": 1.04, + "learning_rate": 0.0006978566431342619, + "loss": 0.5814, + "step": 24590 + }, + { + "epoch": 1.04, + "learning_rate": 0.0006978142424049694, + "loss": 0.5183, + "step": 24600 + }, + { + "epoch": 1.04, + "learning_rate": 0.0006977718416756769, + "loss": 0.5714, + "step": 24610 + }, + { + "epoch": 1.04, + "learning_rate": 0.0006977294409463843, + "loss": 0.5093, + "step": 24620 + }, + { + "epoch": 1.04, + "learning_rate": 0.0006976870402170917, + "loss": 0.5629, + "step": 24630 + }, + { + "epoch": 1.04, + "learning_rate": 0.0006976446394877992, + "loss": 0.6162, + "step": 24640 + }, + { + "epoch": 1.04, + "learning_rate": 0.0006976022387585067, + "loss": 0.5614, + "step": 24650 + }, + { + "epoch": 1.04, + "learning_rate": 0.0006975598380292141, + "loss": 0.6527, + "step": 24660 + }, + { + "epoch": 1.04, + "learning_rate": 0.0006975174372999216, + "loss": 0.6788, + "step": 24670 + }, + { + "epoch": 1.04, + "learning_rate": 0.000697475036570629, + "loss": 0.5523, + "step": 24680 + }, + { + "epoch": 1.04, + "learning_rate": 0.0006974326358413365, + "loss": 0.6047, + "step": 24690 + }, + { + "epoch": 1.04, + "learning_rate": 0.000697390235112044, + "loss": 0.5696, + "step": 24700 + }, + { + "epoch": 1.04, + "learning_rate": 0.0006973478343827514, + "loss": 0.6413, + "step": 24710 + }, + { + "epoch": 1.05, + "learning_rate": 0.0006973054336534588, + "loss": 0.5421, + "step": 24720 + }, + { + "epoch": 1.05, + "learning_rate": 0.0006972630329241664, + "loss": 0.5973, + "step": 24730 + }, + { + "epoch": 1.05, + "learning_rate": 0.0006972206321948738, + "loss": 0.6337, + "step": 24740 + }, + { + "epoch": 1.05, + "learning_rate": 0.0006971782314655812, + "loss": 0.5656, + "step": 24750 + }, + { + "epoch": 1.05, + "learning_rate": 0.0006971358307362887, + "loss": 0.5187, + "step": 24760 + }, + { + "epoch": 1.05, + "learning_rate": 0.0006970934300069961, + "loss": 0.5453, + "step": 24770 + }, + { + "epoch": 1.05, + "learning_rate": 0.0006970510292777036, + "loss": 0.5258, + "step": 24780 + }, + { + "epoch": 1.05, + "learning_rate": 0.0006970086285484111, + "loss": 0.5629, + "step": 24790 + }, + { + "epoch": 1.05, + "learning_rate": 0.0006969662278191185, + "loss": 0.5342, + "step": 24800 + }, + { + "epoch": 1.05, + "learning_rate": 0.000696923827089826, + "loss": 0.6197, + "step": 24810 + }, + { + "epoch": 1.05, + "learning_rate": 0.0006968814263605335, + "loss": 0.6046, + "step": 24820 + }, + { + "epoch": 1.05, + "learning_rate": 0.0006968390256312409, + "loss": 0.4763, + "step": 24830 + }, + { + "epoch": 1.05, + "learning_rate": 0.0006967966249019483, + "loss": 0.6106, + "step": 24840 + }, + { + "epoch": 1.05, + "learning_rate": 0.0006967542241726559, + "loss": 0.6816, + "step": 24850 + }, + { + "epoch": 1.05, + "learning_rate": 0.0006967118234433633, + "loss": 0.5438, + "step": 24860 + }, + { + "epoch": 1.05, + "learning_rate": 0.0006966694227140707, + "loss": 0.6479, + "step": 24870 + }, + { + "epoch": 1.05, + "learning_rate": 0.0006966270219847782, + "loss": 0.5023, + "step": 24880 + }, + { + "epoch": 1.05, + "learning_rate": 0.0006965846212554856, + "loss": 0.5059, + "step": 24890 + }, + { + "epoch": 1.05, + "learning_rate": 0.0006965422205261931, + "loss": 0.5901, + "step": 24900 + }, + { + "epoch": 1.05, + "learning_rate": 0.0006964998197969006, + "loss": 0.6659, + "step": 24910 + }, + { + "epoch": 1.05, + "learning_rate": 0.0006964574190676079, + "loss": 0.6221, + "step": 24920 + }, + { + "epoch": 1.05, + "learning_rate": 0.0006964150183383155, + "loss": 0.6237, + "step": 24930 + }, + { + "epoch": 1.05, + "learning_rate": 0.000696372617609023, + "loss": 0.6555, + "step": 24940 + }, + { + "epoch": 1.06, + "learning_rate": 0.0006963302168797303, + "loss": 0.7002, + "step": 24950 + }, + { + "epoch": 1.06, + "learning_rate": 0.0006962878161504378, + "loss": 0.649, + "step": 24960 + }, + { + "epoch": 1.06, + "learning_rate": 0.0006962454154211453, + "loss": 0.6351, + "step": 24970 + }, + { + "epoch": 1.06, + "learning_rate": 0.0006962030146918527, + "loss": 0.5112, + "step": 24980 + }, + { + "epoch": 1.06, + "learning_rate": 0.0006961606139625602, + "loss": 0.5888, + "step": 24990 + }, + { + "epoch": 1.06, + "learning_rate": 0.0006961182132332677, + "loss": 0.6111, + "step": 25000 + }, + { + "epoch": 1.06, + "learning_rate": 0.0006960758125039751, + "loss": 0.5891, + "step": 25010 + }, + { + "epoch": 1.06, + "learning_rate": 0.0006960334117746826, + "loss": 0.6024, + "step": 25020 + }, + { + "epoch": 1.06, + "learning_rate": 0.00069599101104539, + "loss": 0.5441, + "step": 25030 + }, + { + "epoch": 1.06, + "learning_rate": 0.0006959486103160974, + "loss": 0.5834, + "step": 25040 + }, + { + "epoch": 1.06, + "learning_rate": 0.0006959062095868049, + "loss": 0.5977, + "step": 25050 + }, + { + "epoch": 1.06, + "learning_rate": 0.0006958638088575125, + "loss": 0.5637, + "step": 25060 + }, + { + "epoch": 1.06, + "learning_rate": 0.0006958214081282198, + "loss": 0.6074, + "step": 25070 + }, + { + "epoch": 1.06, + "learning_rate": 0.0006957790073989273, + "loss": 0.6314, + "step": 25080 + }, + { + "epoch": 1.06, + "learning_rate": 0.0006957366066696347, + "loss": 0.6009, + "step": 25090 + }, + { + "epoch": 1.06, + "learning_rate": 0.0006956942059403422, + "loss": 0.6261, + "step": 25100 + }, + { + "epoch": 1.06, + "learning_rate": 0.0006956518052110497, + "loss": 0.6088, + "step": 25110 + }, + { + "epoch": 1.06, + "learning_rate": 0.0006956094044817571, + "loss": 0.529, + "step": 25120 + }, + { + "epoch": 1.06, + "learning_rate": 0.0006955670037524645, + "loss": 0.5781, + "step": 25130 + }, + { + "epoch": 1.06, + "learning_rate": 0.0006955246030231721, + "loss": 0.5641, + "step": 25140 + }, + { + "epoch": 1.06, + "learning_rate": 0.0006954822022938795, + "loss": 0.6374, + "step": 25150 + }, + { + "epoch": 1.06, + "learning_rate": 0.0006954398015645869, + "loss": 0.6331, + "step": 25160 + }, + { + "epoch": 1.06, + "learning_rate": 0.0006953974008352944, + "loss": 0.5078, + "step": 25170 + }, + { + "epoch": 1.06, + "learning_rate": 0.0006953550001060019, + "loss": 0.6404, + "step": 25180 + }, + { + "epoch": 1.07, + "learning_rate": 0.0006953125993767093, + "loss": 0.5963, + "step": 25190 + }, + { + "epoch": 1.07, + "learning_rate": 0.0006952701986474168, + "loss": 0.5921, + "step": 25200 + }, + { + "epoch": 1.07, + "learning_rate": 0.0006952277979181242, + "loss": 0.6606, + "step": 25210 + }, + { + "epoch": 1.07, + "learning_rate": 0.0006951853971888317, + "loss": 0.6495, + "step": 25220 + }, + { + "epoch": 1.07, + "learning_rate": 0.0006951429964595392, + "loss": 0.5645, + "step": 25230 + }, + { + "epoch": 1.07, + "learning_rate": 0.0006951005957302465, + "loss": 0.5183, + "step": 25240 + }, + { + "epoch": 1.07, + "learning_rate": 0.000695058195000954, + "loss": 0.5287, + "step": 25250 + }, + { + "epoch": 1.07, + "learning_rate": 0.0006950157942716616, + "loss": 0.5061, + "step": 25260 + }, + { + "epoch": 1.07, + "learning_rate": 0.0006949733935423689, + "loss": 0.6056, + "step": 25270 + }, + { + "epoch": 1.07, + "learning_rate": 0.0006949309928130764, + "loss": 0.6307, + "step": 25280 + }, + { + "epoch": 1.07, + "learning_rate": 0.0006948885920837839, + "loss": 0.5398, + "step": 25290 + }, + { + "epoch": 1.07, + "learning_rate": 0.0006948461913544913, + "loss": 0.5338, + "step": 25300 + }, + { + "epoch": 1.07, + "learning_rate": 0.0006948037906251988, + "loss": 0.5989, + "step": 25310 + }, + { + "epoch": 1.07, + "learning_rate": 0.0006947613898959063, + "loss": 0.58, + "step": 25320 + }, + { + "epoch": 1.07, + "learning_rate": 0.0006947189891666136, + "loss": 0.6035, + "step": 25330 + }, + { + "epoch": 1.07, + "learning_rate": 0.0006946765884373212, + "loss": 0.5323, + "step": 25340 + }, + { + "epoch": 1.07, + "learning_rate": 0.0006946341877080287, + "loss": 0.5299, + "step": 25350 + }, + { + "epoch": 1.07, + "learning_rate": 0.000694591786978736, + "loss": 0.5456, + "step": 25360 + }, + { + "epoch": 1.07, + "learning_rate": 0.0006945493862494435, + "loss": 0.5729, + "step": 25370 + }, + { + "epoch": 1.07, + "learning_rate": 0.0006945069855201511, + "loss": 0.5174, + "step": 25380 + }, + { + "epoch": 1.07, + "learning_rate": 0.0006944645847908584, + "loss": 0.6261, + "step": 25390 + }, + { + "epoch": 1.07, + "learning_rate": 0.0006944221840615659, + "loss": 0.7251, + "step": 25400 + }, + { + "epoch": 1.07, + "learning_rate": 0.0006943797833322734, + "loss": 0.5504, + "step": 25410 + }, + { + "epoch": 1.07, + "learning_rate": 0.0006943373826029808, + "loss": 0.5738, + "step": 25420 + }, + { + "epoch": 1.08, + "learning_rate": 0.0006942949818736883, + "loss": 0.5357, + "step": 25430 + }, + { + "epoch": 1.08, + "learning_rate": 0.0006942525811443958, + "loss": 0.6319, + "step": 25440 + }, + { + "epoch": 1.08, + "learning_rate": 0.0006942101804151031, + "loss": 0.5715, + "step": 25450 + }, + { + "epoch": 1.08, + "learning_rate": 0.0006941677796858107, + "loss": 0.6966, + "step": 25460 + }, + { + "epoch": 1.08, + "learning_rate": 0.0006941253789565182, + "loss": 0.6355, + "step": 25470 + }, + { + "epoch": 1.08, + "learning_rate": 0.0006940829782272255, + "loss": 0.5878, + "step": 25480 + }, + { + "epoch": 1.08, + "learning_rate": 0.000694040577497933, + "loss": 0.5854, + "step": 25490 + }, + { + "epoch": 1.08, + "learning_rate": 0.0006939981767686404, + "loss": 0.552, + "step": 25500 + }, + { + "epoch": 1.08, + "learning_rate": 0.0006939557760393479, + "loss": 0.5348, + "step": 25510 + }, + { + "epoch": 1.08, + "learning_rate": 0.0006939133753100554, + "loss": 0.5801, + "step": 25520 + }, + { + "epoch": 1.08, + "learning_rate": 0.0006938709745807627, + "loss": 0.4255, + "step": 25530 + }, + { + "epoch": 1.08, + "learning_rate": 0.0006938285738514703, + "loss": 0.535, + "step": 25540 + }, + { + "epoch": 1.08, + "learning_rate": 0.0006937861731221778, + "loss": 0.5846, + "step": 25550 + }, + { + "epoch": 1.08, + "learning_rate": 0.0006937437723928851, + "loss": 0.5823, + "step": 25560 + }, + { + "epoch": 1.08, + "learning_rate": 0.0006937013716635926, + "loss": 0.671, + "step": 25570 + }, + { + "epoch": 1.08, + "learning_rate": 0.0006936589709343001, + "loss": 0.574, + "step": 25580 + }, + { + "epoch": 1.08, + "learning_rate": 0.0006936165702050075, + "loss": 0.6175, + "step": 25590 + }, + { + "epoch": 1.08, + "learning_rate": 0.000693574169475715, + "loss": 0.5127, + "step": 25600 + }, + { + "epoch": 1.08, + "learning_rate": 0.0006935317687464225, + "loss": 0.5691, + "step": 25610 + }, + { + "epoch": 1.08, + "learning_rate": 0.0006934893680171299, + "loss": 0.6279, + "step": 25620 + }, + { + "epoch": 1.08, + "learning_rate": 0.0006934469672878374, + "loss": 0.5667, + "step": 25630 + }, + { + "epoch": 1.08, + "learning_rate": 0.0006934045665585449, + "loss": 0.5849, + "step": 25640 + }, + { + "epoch": 1.08, + "learning_rate": 0.0006933621658292522, + "loss": 0.5885, + "step": 25650 + }, + { + "epoch": 1.09, + "learning_rate": 0.0006933197650999597, + "loss": 0.6158, + "step": 25660 + }, + { + "epoch": 1.09, + "learning_rate": 0.0006932773643706673, + "loss": 0.5853, + "step": 25670 + }, + { + "epoch": 1.09, + "learning_rate": 0.0006932349636413746, + "loss": 0.5683, + "step": 25680 + }, + { + "epoch": 1.09, + "learning_rate": 0.0006931925629120821, + "loss": 0.6243, + "step": 25690 + }, + { + "epoch": 1.09, + "learning_rate": 0.0006931501621827896, + "loss": 0.6842, + "step": 25700 + }, + { + "epoch": 1.09, + "learning_rate": 0.000693107761453497, + "loss": 0.5601, + "step": 25710 + }, + { + "epoch": 1.09, + "learning_rate": 0.0006930653607242045, + "loss": 0.5747, + "step": 25720 + }, + { + "epoch": 1.09, + "learning_rate": 0.000693022959994912, + "loss": 0.4747, + "step": 25730 + }, + { + "epoch": 1.09, + "learning_rate": 0.0006929805592656193, + "loss": 0.6375, + "step": 25740 + }, + { + "epoch": 1.09, + "learning_rate": 0.0006929381585363269, + "loss": 0.4729, + "step": 25750 + }, + { + "epoch": 1.09, + "learning_rate": 0.0006928957578070344, + "loss": 0.5623, + "step": 25760 + }, + { + "epoch": 1.09, + "learning_rate": 0.0006928533570777417, + "loss": 0.5665, + "step": 25770 + }, + { + "epoch": 1.09, + "learning_rate": 0.0006928109563484492, + "loss": 0.4765, + "step": 25780 + }, + { + "epoch": 1.09, + "learning_rate": 0.0006927685556191568, + "loss": 0.584, + "step": 25790 + }, + { + "epoch": 1.09, + "learning_rate": 0.0006927261548898641, + "loss": 0.548, + "step": 25800 + }, + { + "epoch": 1.09, + "learning_rate": 0.0006926837541605716, + "loss": 0.5884, + "step": 25810 + }, + { + "epoch": 1.09, + "learning_rate": 0.0006926413534312791, + "loss": 0.6063, + "step": 25820 + }, + { + "epoch": 1.09, + "learning_rate": 0.0006925989527019865, + "loss": 0.5335, + "step": 25830 + }, + { + "epoch": 1.09, + "learning_rate": 0.000692556551972694, + "loss": 0.5116, + "step": 25840 + }, + { + "epoch": 1.09, + "learning_rate": 0.0006925141512434014, + "loss": 0.5986, + "step": 25850 + }, + { + "epoch": 1.09, + "learning_rate": 0.0006924717505141088, + "loss": 0.5328, + "step": 25860 + }, + { + "epoch": 1.09, + "learning_rate": 0.0006924293497848164, + "loss": 0.5848, + "step": 25870 + }, + { + "epoch": 1.09, + "learning_rate": 0.0006923869490555238, + "loss": 0.6491, + "step": 25880 + }, + { + "epoch": 1.09, + "learning_rate": 0.0006923445483262312, + "loss": 0.5726, + "step": 25890 + }, + { + "epoch": 1.1, + "learning_rate": 0.0006923021475969387, + "loss": 0.5517, + "step": 25900 + }, + { + "epoch": 1.1, + "learning_rate": 0.0006922597468676462, + "loss": 0.5889, + "step": 25910 + }, + { + "epoch": 1.1, + "learning_rate": 0.0006922173461383536, + "loss": 0.6953, + "step": 25920 + }, + { + "epoch": 1.1, + "learning_rate": 0.0006921749454090611, + "loss": 0.5754, + "step": 25930 + }, + { + "epoch": 1.1, + "learning_rate": 0.0006921325446797684, + "loss": 0.5994, + "step": 25940 + }, + { + "epoch": 1.1, + "learning_rate": 0.000692090143950476, + "loss": 0.4993, + "step": 25950 + }, + { + "epoch": 1.1, + "learning_rate": 0.0006920477432211835, + "loss": 0.5768, + "step": 25960 + }, + { + "epoch": 1.1, + "learning_rate": 0.0006920053424918908, + "loss": 0.5483, + "step": 25970 + }, + { + "epoch": 1.1, + "learning_rate": 0.0006919629417625983, + "loss": 0.697, + "step": 25980 + }, + { + "epoch": 1.1, + "learning_rate": 0.0006919205410333059, + "loss": 0.6415, + "step": 25990 + }, + { + "epoch": 1.1, + "learning_rate": 0.0006918781403040132, + "loss": 0.52, + "step": 26000 + }, + { + "epoch": 1.1, + "learning_rate": 0.0006918357395747207, + "loss": 0.6452, + "step": 26010 + }, + { + "epoch": 1.1, + "learning_rate": 0.0006917933388454282, + "loss": 0.6071, + "step": 26020 + }, + { + "epoch": 1.1, + "learning_rate": 0.0006917509381161356, + "loss": 0.5752, + "step": 26030 + }, + { + "epoch": 1.1, + "learning_rate": 0.0006917085373868431, + "loss": 0.5113, + "step": 26040 + }, + { + "epoch": 1.1, + "learning_rate": 0.0006916661366575506, + "loss": 0.6, + "step": 26050 + }, + { + "epoch": 1.1, + "learning_rate": 0.0006916237359282579, + "loss": 0.5362, + "step": 26060 + }, + { + "epoch": 1.1, + "learning_rate": 0.0006915813351989655, + "loss": 0.6591, + "step": 26070 + }, + { + "epoch": 1.1, + "learning_rate": 0.000691538934469673, + "loss": 0.5323, + "step": 26080 + }, + { + "epoch": 1.1, + "learning_rate": 0.0006914965337403803, + "loss": 0.5769, + "step": 26090 + }, + { + "epoch": 1.1, + "learning_rate": 0.0006914541330110878, + "loss": 0.5578, + "step": 26100 + }, + { + "epoch": 1.1, + "learning_rate": 0.0006914117322817953, + "loss": 0.5462, + "step": 26110 + }, + { + "epoch": 1.1, + "learning_rate": 0.0006913693315525027, + "loss": 0.6396, + "step": 26120 + }, + { + "epoch": 1.11, + "learning_rate": 0.0006913269308232102, + "loss": 0.6143, + "step": 26130 + }, + { + "epoch": 1.11, + "learning_rate": 0.0006912845300939177, + "loss": 0.5494, + "step": 26140 + }, + { + "epoch": 1.11, + "learning_rate": 0.0006912421293646251, + "loss": 0.6032, + "step": 26150 + }, + { + "epoch": 1.11, + "learning_rate": 0.0006911997286353326, + "loss": 0.6202, + "step": 26160 + }, + { + "epoch": 1.11, + "learning_rate": 0.00069115732790604, + "loss": 0.6222, + "step": 26170 + }, + { + "epoch": 1.11, + "learning_rate": 0.0006911149271767474, + "loss": 0.5147, + "step": 26180 + }, + { + "epoch": 1.11, + "learning_rate": 0.0006910725264474549, + "loss": 0.5999, + "step": 26190 + }, + { + "epoch": 1.11, + "learning_rate": 0.0006910301257181624, + "loss": 0.6171, + "step": 26200 + }, + { + "epoch": 1.11, + "learning_rate": 0.0006909877249888698, + "loss": 0.5012, + "step": 26210 + }, + { + "epoch": 1.11, + "learning_rate": 0.0006909453242595773, + "loss": 0.5372, + "step": 26220 + }, + { + "epoch": 1.11, + "learning_rate": 0.0006909029235302848, + "loss": 0.5925, + "step": 26230 + }, + { + "epoch": 1.11, + "learning_rate": 0.0006908605228009922, + "loss": 0.5717, + "step": 26240 + }, + { + "epoch": 1.11, + "learning_rate": 0.0006908181220716997, + "loss": 0.5678, + "step": 26250 + }, + { + "epoch": 1.11, + "learning_rate": 0.0006907757213424071, + "loss": 0.5222, + "step": 26260 + }, + { + "epoch": 1.11, + "learning_rate": 0.0006907333206131145, + "loss": 0.6088, + "step": 26270 + }, + { + "epoch": 1.11, + "learning_rate": 0.0006906909198838221, + "loss": 0.5573, + "step": 26280 + }, + { + "epoch": 1.11, + "learning_rate": 0.0006906485191545295, + "loss": 0.6007, + "step": 26290 + }, + { + "epoch": 1.11, + "learning_rate": 0.0006906061184252369, + "loss": 0.606, + "step": 26300 + }, + { + "epoch": 1.11, + "learning_rate": 0.0006905637176959444, + "loss": 0.4931, + "step": 26310 + }, + { + "epoch": 1.11, + "learning_rate": 0.0006905213169666519, + "loss": 0.5421, + "step": 26320 + }, + { + "epoch": 1.11, + "learning_rate": 0.0006904789162373593, + "loss": 0.6568, + "step": 26330 + }, + { + "epoch": 1.11, + "learning_rate": 0.0006904365155080668, + "loss": 0.4719, + "step": 26340 + }, + { + "epoch": 1.11, + "learning_rate": 0.0006903941147787743, + "loss": 0.5117, + "step": 26350 + }, + { + "epoch": 1.11, + "learning_rate": 0.0006903517140494817, + "loss": 0.5725, + "step": 26360 + }, + { + "epoch": 1.12, + "learning_rate": 0.0006903093133201892, + "loss": 0.6667, + "step": 26370 + }, + { + "epoch": 1.12, + "learning_rate": 0.0006902669125908965, + "loss": 0.6083, + "step": 26380 + }, + { + "epoch": 1.12, + "learning_rate": 0.000690224511861604, + "loss": 0.5744, + "step": 26390 + }, + { + "epoch": 1.12, + "learning_rate": 0.0006901821111323116, + "loss": 0.5125, + "step": 26400 + }, + { + "epoch": 1.12, + "learning_rate": 0.000690139710403019, + "loss": 0.565, + "step": 26410 + }, + { + "epoch": 1.12, + "learning_rate": 0.0006900973096737264, + "loss": 0.6076, + "step": 26420 + }, + { + "epoch": 1.12, + "learning_rate": 0.0006900549089444339, + "loss": 0.6309, + "step": 26430 + }, + { + "epoch": 1.12, + "learning_rate": 0.0006900125082151414, + "loss": 0.5827, + "step": 26440 + }, + { + "epoch": 1.12, + "learning_rate": 0.0006899701074858488, + "loss": 0.641, + "step": 26450 + }, + { + "epoch": 1.12, + "learning_rate": 0.0006899277067565562, + "loss": 0.4923, + "step": 26460 + }, + { + "epoch": 1.12, + "learning_rate": 0.0006898853060272636, + "loss": 0.4364, + "step": 26470 + }, + { + "epoch": 1.12, + "learning_rate": 0.0006898429052979712, + "loss": 0.5288, + "step": 26480 + }, + { + "epoch": 1.12, + "learning_rate": 0.0006898005045686786, + "loss": 0.519, + "step": 26490 + }, + { + "epoch": 1.12, + "learning_rate": 0.000689758103839386, + "loss": 0.5937, + "step": 26500 + }, + { + "epoch": 1.12, + "learning_rate": 0.0006897157031100935, + "loss": 0.6305, + "step": 26510 + }, + { + "epoch": 1.12, + "learning_rate": 0.000689673302380801, + "loss": 0.649, + "step": 26520 + }, + { + "epoch": 1.12, + "learning_rate": 0.0006896309016515084, + "loss": 0.5124, + "step": 26530 + }, + { + "epoch": 1.12, + "learning_rate": 0.0006895885009222159, + "loss": 0.4449, + "step": 26540 + }, + { + "epoch": 1.12, + "learning_rate": 0.0006895461001929233, + "loss": 0.5322, + "step": 26550 + }, + { + "epoch": 1.12, + "learning_rate": 0.0006895036994636308, + "loss": 0.5477, + "step": 26560 + }, + { + "epoch": 1.12, + "learning_rate": 0.0006894612987343383, + "loss": 0.598, + "step": 26570 + }, + { + "epoch": 1.12, + "learning_rate": 0.0006894188980050457, + "loss": 0.5701, + "step": 26580 + }, + { + "epoch": 1.12, + "learning_rate": 0.0006893764972757531, + "loss": 0.5802, + "step": 26590 + }, + { + "epoch": 1.12, + "learning_rate": 0.0006893340965464607, + "loss": 0.5165, + "step": 26600 + }, + { + "epoch": 1.13, + "learning_rate": 0.0006892916958171681, + "loss": 0.5221, + "step": 26610 + }, + { + "epoch": 1.13, + "learning_rate": 0.0006892492950878755, + "loss": 0.6323, + "step": 26620 + }, + { + "epoch": 1.13, + "learning_rate": 0.000689206894358583, + "loss": 0.6157, + "step": 26630 + }, + { + "epoch": 1.13, + "learning_rate": 0.0006891644936292905, + "loss": 0.5799, + "step": 26640 + }, + { + "epoch": 1.13, + "learning_rate": 0.0006891220928999979, + "loss": 0.5535, + "step": 26650 + }, + { + "epoch": 1.13, + "learning_rate": 0.0006890796921707054, + "loss": 0.7069, + "step": 26660 + }, + { + "epoch": 1.13, + "learning_rate": 0.0006890372914414128, + "loss": 0.6223, + "step": 26670 + }, + { + "epoch": 1.13, + "learning_rate": 0.0006889948907121203, + "loss": 0.4631, + "step": 26680 + }, + { + "epoch": 1.13, + "learning_rate": 0.0006889524899828278, + "loss": 0.5508, + "step": 26690 + }, + { + "epoch": 1.13, + "learning_rate": 0.0006889100892535352, + "loss": 0.6021, + "step": 26700 + }, + { + "epoch": 1.13, + "learning_rate": 0.0006888676885242426, + "loss": 0.479, + "step": 26710 + }, + { + "epoch": 1.13, + "learning_rate": 0.0006888252877949501, + "loss": 0.5727, + "step": 26720 + }, + { + "epoch": 1.13, + "learning_rate": 0.0006887828870656576, + "loss": 0.5909, + "step": 26730 + }, + { + "epoch": 1.13, + "learning_rate": 0.000688740486336365, + "loss": 0.4967, + "step": 26740 + }, + { + "epoch": 1.13, + "learning_rate": 0.0006886980856070725, + "loss": 0.6219, + "step": 26750 + }, + { + "epoch": 1.13, + "learning_rate": 0.00068865568487778, + "loss": 0.6883, + "step": 26760 + }, + { + "epoch": 1.13, + "learning_rate": 0.0006886132841484874, + "loss": 0.6034, + "step": 26770 + }, + { + "epoch": 1.13, + "learning_rate": 0.0006885708834191948, + "loss": 0.5622, + "step": 26780 + }, + { + "epoch": 1.13, + "learning_rate": 0.0006885284826899023, + "loss": 0.5401, + "step": 26790 + }, + { + "epoch": 1.13, + "learning_rate": 0.0006884860819606097, + "loss": 0.6162, + "step": 26800 + }, + { + "epoch": 1.13, + "learning_rate": 0.0006884436812313172, + "loss": 0.6447, + "step": 26810 + }, + { + "epoch": 1.13, + "learning_rate": 0.0006884012805020247, + "loss": 0.5589, + "step": 26820 + }, + { + "epoch": 1.13, + "learning_rate": 0.0006883588797727321, + "loss": 0.6537, + "step": 26830 + }, + { + "epoch": 1.14, + "learning_rate": 0.0006883164790434396, + "loss": 0.6269, + "step": 26840 + }, + { + "epoch": 1.14, + "learning_rate": 0.0006882740783141471, + "loss": 0.531, + "step": 26850 + }, + { + "epoch": 1.14, + "learning_rate": 0.0006882316775848545, + "loss": 0.5767, + "step": 26860 + }, + { + "epoch": 1.14, + "learning_rate": 0.0006881892768555619, + "loss": 0.6455, + "step": 26870 + }, + { + "epoch": 1.14, + "learning_rate": 0.0006881468761262695, + "loss": 0.5292, + "step": 26880 + }, + { + "epoch": 1.14, + "learning_rate": 0.0006881044753969769, + "loss": 0.5397, + "step": 26890 + }, + { + "epoch": 1.14, + "learning_rate": 0.0006880620746676843, + "loss": 0.494, + "step": 26900 + }, + { + "epoch": 1.14, + "learning_rate": 0.0006880196739383917, + "loss": 0.4578, + "step": 26910 + }, + { + "epoch": 1.14, + "learning_rate": 0.0006879772732090992, + "loss": 0.5656, + "step": 26920 + }, + { + "epoch": 1.14, + "learning_rate": 0.0006879348724798067, + "loss": 0.5726, + "step": 26930 + }, + { + "epoch": 1.14, + "learning_rate": 0.0006878924717505141, + "loss": 0.612, + "step": 26940 + }, + { + "epoch": 1.14, + "learning_rate": 0.0006878500710212216, + "loss": 0.5796, + "step": 26950 + }, + { + "epoch": 1.14, + "learning_rate": 0.0006878076702919291, + "loss": 0.5246, + "step": 26960 + }, + { + "epoch": 1.14, + "learning_rate": 0.0006877652695626365, + "loss": 0.5426, + "step": 26970 + }, + { + "epoch": 1.14, + "learning_rate": 0.000687722868833344, + "loss": 0.5193, + "step": 26980 + }, + { + "epoch": 1.14, + "learning_rate": 0.0006876804681040514, + "loss": 0.5662, + "step": 26990 + }, + { + "epoch": 1.14, + "learning_rate": 0.0006876380673747588, + "loss": 0.5151, + "step": 27000 + }, + { + "epoch": 1.14, + "learning_rate": 0.0006875956666454664, + "loss": 0.5513, + "step": 27010 + }, + { + "epoch": 1.14, + "learning_rate": 0.0006875532659161738, + "loss": 0.5754, + "step": 27020 + }, + { + "epoch": 1.14, + "learning_rate": 0.0006875108651868812, + "loss": 0.5476, + "step": 27030 + }, + { + "epoch": 1.14, + "learning_rate": 0.0006874684644575887, + "loss": 0.6118, + "step": 27040 + }, + { + "epoch": 1.14, + "learning_rate": 0.0006874260637282962, + "loss": 0.6001, + "step": 27050 + }, + { + "epoch": 1.14, + "learning_rate": 0.0006873836629990036, + "loss": 0.6224, + "step": 27060 + }, + { + "epoch": 1.14, + "learning_rate": 0.000687341262269711, + "loss": 0.6493, + "step": 27070 + }, + { + "epoch": 1.15, + "learning_rate": 0.0006872988615404185, + "loss": 0.5517, + "step": 27080 + }, + { + "epoch": 1.15, + "learning_rate": 0.000687256460811126, + "loss": 0.4907, + "step": 27090 + }, + { + "epoch": 1.15, + "learning_rate": 0.0006872140600818334, + "loss": 0.5035, + "step": 27100 + }, + { + "epoch": 1.15, + "learning_rate": 0.0006871716593525409, + "loss": 0.5734, + "step": 27110 + }, + { + "epoch": 1.15, + "learning_rate": 0.0006871292586232483, + "loss": 0.5609, + "step": 27120 + }, + { + "epoch": 1.15, + "learning_rate": 0.0006870868578939558, + "loss": 0.5619, + "step": 27130 + }, + { + "epoch": 1.15, + "learning_rate": 0.0006870444571646633, + "loss": 0.6514, + "step": 27140 + }, + { + "epoch": 1.15, + "learning_rate": 0.0006870020564353707, + "loss": 0.5676, + "step": 27150 + }, + { + "epoch": 1.15, + "learning_rate": 0.0006869596557060781, + "loss": 0.5087, + "step": 27160 + }, + { + "epoch": 1.15, + "learning_rate": 0.0006869172549767857, + "loss": 0.5278, + "step": 27170 + }, + { + "epoch": 1.15, + "learning_rate": 0.0006868748542474931, + "loss": 0.5596, + "step": 27180 + }, + { + "epoch": 1.15, + "learning_rate": 0.0006868324535182005, + "loss": 0.4936, + "step": 27190 + }, + { + "epoch": 1.15, + "learning_rate": 0.000686790052788908, + "loss": 0.5692, + "step": 27200 + }, + { + "epoch": 1.15, + "learning_rate": 0.0006867476520596155, + "loss": 0.6747, + "step": 27210 + }, + { + "epoch": 1.15, + "learning_rate": 0.0006867052513303229, + "loss": 0.6599, + "step": 27220 + }, + { + "epoch": 1.15, + "learning_rate": 0.0006866628506010304, + "loss": 0.6341, + "step": 27230 + }, + { + "epoch": 1.15, + "learning_rate": 0.0006866204498717378, + "loss": 0.5822, + "step": 27240 + }, + { + "epoch": 1.15, + "learning_rate": 0.0006865780491424453, + "loss": 0.5721, + "step": 27250 + }, + { + "epoch": 1.15, + "learning_rate": 0.0006865356484131528, + "loss": 0.6001, + "step": 27260 + }, + { + "epoch": 1.15, + "learning_rate": 0.0006864932476838602, + "loss": 0.6228, + "step": 27270 + }, + { + "epoch": 1.15, + "learning_rate": 0.0006864508469545676, + "loss": 0.5955, + "step": 27280 + }, + { + "epoch": 1.15, + "learning_rate": 0.0006864084462252752, + "loss": 0.4978, + "step": 27290 + }, + { + "epoch": 1.15, + "learning_rate": 0.0006863660454959826, + "loss": 0.5665, + "step": 27300 + }, + { + "epoch": 1.15, + "learning_rate": 0.00068632364476669, + "loss": 0.7129, + "step": 27310 + }, + { + "epoch": 1.16, + "learning_rate": 0.0006862812440373975, + "loss": 0.5458, + "step": 27320 + }, + { + "epoch": 1.16, + "learning_rate": 0.000686238843308105, + "loss": 0.5993, + "step": 27330 + }, + { + "epoch": 1.16, + "learning_rate": 0.0006861964425788124, + "loss": 0.5737, + "step": 27340 + }, + { + "epoch": 1.16, + "learning_rate": 0.0006861540418495199, + "loss": 0.6236, + "step": 27350 + }, + { + "epoch": 1.16, + "learning_rate": 0.0006861116411202273, + "loss": 0.5938, + "step": 27360 + }, + { + "epoch": 1.16, + "learning_rate": 0.0006860692403909348, + "loss": 0.6408, + "step": 27370 + }, + { + "epoch": 1.16, + "learning_rate": 0.0006860268396616423, + "loss": 0.5185, + "step": 27380 + }, + { + "epoch": 1.16, + "learning_rate": 0.0006859844389323496, + "loss": 0.5327, + "step": 27390 + }, + { + "epoch": 1.16, + "learning_rate": 0.0006859420382030571, + "loss": 0.6654, + "step": 27400 + }, + { + "epoch": 1.16, + "learning_rate": 0.0006858996374737647, + "loss": 0.5416, + "step": 27410 + }, + { + "epoch": 1.16, + "learning_rate": 0.000685857236744472, + "loss": 0.6141, + "step": 27420 + }, + { + "epoch": 1.16, + "learning_rate": 0.0006858148360151795, + "loss": 0.5844, + "step": 27430 + }, + { + "epoch": 1.16, + "learning_rate": 0.0006857724352858869, + "loss": 0.6177, + "step": 27440 + }, + { + "epoch": 1.16, + "learning_rate": 0.0006857300345565944, + "loss": 0.5986, + "step": 27450 + }, + { + "epoch": 1.16, + "learning_rate": 0.0006856876338273019, + "loss": 0.576, + "step": 27460 + }, + { + "epoch": 1.16, + "learning_rate": 0.0006856452330980093, + "loss": 0.5784, + "step": 27470 + }, + { + "epoch": 1.16, + "learning_rate": 0.0006856028323687167, + "loss": 0.6798, + "step": 27480 + }, + { + "epoch": 1.16, + "learning_rate": 0.0006855604316394243, + "loss": 0.5377, + "step": 27490 + }, + { + "epoch": 1.16, + "learning_rate": 0.0006855180309101317, + "loss": 0.5427, + "step": 27500 + }, + { + "epoch": 1.16, + "learning_rate": 0.0006854756301808391, + "loss": 0.6156, + "step": 27510 + }, + { + "epoch": 1.16, + "learning_rate": 0.0006854332294515466, + "loss": 0.5572, + "step": 27520 + }, + { + "epoch": 1.16, + "learning_rate": 0.000685390828722254, + "loss": 0.6011, + "step": 27530 + }, + { + "epoch": 1.16, + "learning_rate": 0.0006853484279929615, + "loss": 0.5765, + "step": 27540 + }, + { + "epoch": 1.17, + "learning_rate": 0.000685306027263669, + "loss": 0.5492, + "step": 27550 + }, + { + "epoch": 1.17, + "learning_rate": 0.0006852636265343764, + "loss": 0.7331, + "step": 27560 + }, + { + "epoch": 1.17, + "learning_rate": 0.0006852212258050839, + "loss": 0.608, + "step": 27570 + }, + { + "epoch": 1.17, + "learning_rate": 0.0006851788250757914, + "loss": 0.5258, + "step": 27580 + }, + { + "epoch": 1.17, + "learning_rate": 0.0006851364243464988, + "loss": 0.5885, + "step": 27590 + }, + { + "epoch": 1.17, + "learning_rate": 0.0006850940236172062, + "loss": 0.5075, + "step": 27600 + }, + { + "epoch": 1.17, + "learning_rate": 0.0006850516228879137, + "loss": 0.5575, + "step": 27610 + }, + { + "epoch": 1.17, + "learning_rate": 0.0006850092221586212, + "loss": 0.5651, + "step": 27620 + }, + { + "epoch": 1.17, + "learning_rate": 0.0006849668214293286, + "loss": 0.5876, + "step": 27630 + }, + { + "epoch": 1.17, + "learning_rate": 0.0006849244207000361, + "loss": 0.5443, + "step": 27640 + }, + { + "epoch": 1.17, + "learning_rate": 0.0006848820199707435, + "loss": 0.7017, + "step": 27650 + }, + { + "epoch": 1.17, + "learning_rate": 0.000684839619241451, + "loss": 0.5877, + "step": 27660 + }, + { + "epoch": 1.17, + "learning_rate": 0.0006847972185121585, + "loss": 0.5473, + "step": 27670 + }, + { + "epoch": 1.17, + "learning_rate": 0.0006847548177828658, + "loss": 0.6034, + "step": 27680 + }, + { + "epoch": 1.17, + "learning_rate": 0.0006847124170535733, + "loss": 0.5962, + "step": 27690 + }, + { + "epoch": 1.17, + "learning_rate": 0.0006846700163242809, + "loss": 0.5006, + "step": 27700 + }, + { + "epoch": 1.17, + "learning_rate": 0.0006846276155949882, + "loss": 0.5988, + "step": 27710 + }, + { + "epoch": 1.17, + "learning_rate": 0.0006845852148656957, + "loss": 0.5941, + "step": 27720 + }, + { + "epoch": 1.17, + "learning_rate": 0.0006845428141364032, + "loss": 0.5514, + "step": 27730 + }, + { + "epoch": 1.17, + "learning_rate": 0.0006845004134071106, + "loss": 0.5456, + "step": 27740 + }, + { + "epoch": 1.17, + "learning_rate": 0.0006844580126778181, + "loss": 0.6653, + "step": 27750 + }, + { + "epoch": 1.17, + "learning_rate": 0.0006844156119485256, + "loss": 0.634, + "step": 27760 + }, + { + "epoch": 1.17, + "learning_rate": 0.0006843732112192329, + "loss": 0.5701, + "step": 27770 + }, + { + "epoch": 1.17, + "learning_rate": 0.0006843308104899405, + "loss": 0.6192, + "step": 27780 + }, + { + "epoch": 1.18, + "learning_rate": 0.000684288409760648, + "loss": 0.6494, + "step": 27790 + }, + { + "epoch": 1.18, + "learning_rate": 0.0006842460090313553, + "loss": 0.5872, + "step": 27800 + }, + { + "epoch": 1.18, + "learning_rate": 0.0006842036083020628, + "loss": 0.5553, + "step": 27810 + }, + { + "epoch": 1.18, + "learning_rate": 0.0006841612075727704, + "loss": 0.5927, + "step": 27820 + }, + { + "epoch": 1.18, + "learning_rate": 0.0006841188068434777, + "loss": 0.551, + "step": 27830 + }, + { + "epoch": 1.18, + "learning_rate": 0.0006840764061141852, + "loss": 0.6772, + "step": 27840 + }, + { + "epoch": 1.18, + "learning_rate": 0.0006840340053848926, + "loss": 0.6099, + "step": 27850 + }, + { + "epoch": 1.18, + "learning_rate": 0.0006839916046556001, + "loss": 0.5538, + "step": 27860 + }, + { + "epoch": 1.18, + "learning_rate": 0.0006839492039263076, + "loss": 0.7256, + "step": 27870 + }, + { + "epoch": 1.18, + "learning_rate": 0.000683906803197015, + "loss": 0.6736, + "step": 27880 + }, + { + "epoch": 1.18, + "learning_rate": 0.0006838644024677224, + "loss": 0.5215, + "step": 27890 + }, + { + "epoch": 1.18, + "learning_rate": 0.00068382200173843, + "loss": 0.5873, + "step": 27900 + }, + { + "epoch": 1.18, + "learning_rate": 0.0006837796010091375, + "loss": 0.546, + "step": 27910 + }, + { + "epoch": 1.18, + "learning_rate": 0.0006837372002798448, + "loss": 0.5603, + "step": 27920 + }, + { + "epoch": 1.18, + "learning_rate": 0.0006836947995505523, + "loss": 0.5531, + "step": 27930 + }, + { + "epoch": 1.18, + "learning_rate": 0.0006836523988212599, + "loss": 0.6051, + "step": 27940 + }, + { + "epoch": 1.18, + "learning_rate": 0.0006836099980919672, + "loss": 0.5002, + "step": 27950 + }, + { + "epoch": 1.18, + "learning_rate": 0.0006835675973626747, + "loss": 0.5597, + "step": 27960 + }, + { + "epoch": 1.18, + "learning_rate": 0.0006835251966333821, + "loss": 0.5233, + "step": 27970 + }, + { + "epoch": 1.18, + "learning_rate": 0.0006834827959040896, + "loss": 0.5973, + "step": 27980 + }, + { + "epoch": 1.18, + "learning_rate": 0.0006834403951747971, + "loss": 0.5774, + "step": 27990 + }, + { + "epoch": 1.18, + "learning_rate": 0.0006833979944455044, + "loss": 0.6091, + "step": 28000 + }, + { + "epoch": 1.18, + "learning_rate": 0.0006833555937162119, + "loss": 0.5433, + "step": 28010 + }, + { + "epoch": 1.18, + "learning_rate": 0.0006833131929869195, + "loss": 0.6489, + "step": 28020 + }, + { + "epoch": 1.19, + "learning_rate": 0.0006832707922576268, + "loss": 0.568, + "step": 28030 + }, + { + "epoch": 1.19, + "learning_rate": 0.0006832283915283343, + "loss": 0.6453, + "step": 28040 + }, + { + "epoch": 1.19, + "learning_rate": 0.0006831859907990418, + "loss": 0.5244, + "step": 28050 + }, + { + "epoch": 1.19, + "learning_rate": 0.0006831435900697492, + "loss": 0.5397, + "step": 28060 + }, + { + "epoch": 1.19, + "learning_rate": 0.0006831011893404567, + "loss": 0.5682, + "step": 28070 + }, + { + "epoch": 1.19, + "learning_rate": 0.0006830587886111642, + "loss": 0.5399, + "step": 28080 + }, + { + "epoch": 1.19, + "learning_rate": 0.0006830163878818715, + "loss": 0.5871, + "step": 28090 + }, + { + "epoch": 1.19, + "learning_rate": 0.0006829739871525791, + "loss": 0.5482, + "step": 28100 + }, + { + "epoch": 1.19, + "learning_rate": 0.0006829315864232866, + "loss": 0.5687, + "step": 28110 + }, + { + "epoch": 1.19, + "learning_rate": 0.0006828891856939939, + "loss": 0.5135, + "step": 28120 + }, + { + "epoch": 1.19, + "learning_rate": 0.0006828467849647014, + "loss": 0.568, + "step": 28130 + }, + { + "epoch": 1.19, + "learning_rate": 0.0006828043842354089, + "loss": 0.6056, + "step": 28140 + }, + { + "epoch": 1.19, + "learning_rate": 0.0006827619835061163, + "loss": 0.5911, + "step": 28150 + }, + { + "epoch": 1.19, + "learning_rate": 0.0006827195827768238, + "loss": 0.6205, + "step": 28160 + }, + { + "epoch": 1.19, + "learning_rate": 0.0006826771820475313, + "loss": 0.5102, + "step": 28170 + }, + { + "epoch": 1.19, + "learning_rate": 0.0006826347813182387, + "loss": 0.5685, + "step": 28180 + }, + { + "epoch": 1.19, + "learning_rate": 0.0006825923805889462, + "loss": 0.545, + "step": 28190 + }, + { + "epoch": 1.19, + "learning_rate": 0.0006825499798596537, + "loss": 0.5157, + "step": 28200 + }, + { + "epoch": 1.19, + "learning_rate": 0.000682507579130361, + "loss": 0.6216, + "step": 28210 + }, + { + "epoch": 1.19, + "learning_rate": 0.0006824651784010685, + "loss": 0.6402, + "step": 28220 + }, + { + "epoch": 1.19, + "learning_rate": 0.0006824227776717761, + "loss": 0.5277, + "step": 28230 + }, + { + "epoch": 1.19, + "learning_rate": 0.0006823803769424834, + "loss": 0.569, + "step": 28240 + }, + { + "epoch": 1.19, + "learning_rate": 0.0006823379762131909, + "loss": 0.5537, + "step": 28250 + }, + { + "epoch": 1.2, + "learning_rate": 0.0006822955754838984, + "loss": 0.4878, + "step": 28260 + }, + { + "epoch": 1.2, + "learning_rate": 0.0006822531747546058, + "loss": 0.6577, + "step": 28270 + }, + { + "epoch": 1.2, + "learning_rate": 0.0006822107740253133, + "loss": 0.6072, + "step": 28280 + }, + { + "epoch": 1.2, + "learning_rate": 0.0006821683732960208, + "loss": 0.6501, + "step": 28290 + }, + { + "epoch": 1.2, + "learning_rate": 0.0006821259725667281, + "loss": 0.5199, + "step": 28300 + }, + { + "epoch": 1.2, + "learning_rate": 0.0006820835718374357, + "loss": 0.5334, + "step": 28310 + }, + { + "epoch": 1.2, + "learning_rate": 0.000682041171108143, + "loss": 0.5634, + "step": 28320 + }, + { + "epoch": 1.2, + "learning_rate": 0.0006819987703788505, + "loss": 0.5156, + "step": 28330 + }, + { + "epoch": 1.2, + "learning_rate": 0.000681956369649558, + "loss": 0.5616, + "step": 28340 + }, + { + "epoch": 1.2, + "learning_rate": 0.0006819139689202654, + "loss": 0.5901, + "step": 28350 + }, + { + "epoch": 1.2, + "learning_rate": 0.0006818715681909729, + "loss": 0.5634, + "step": 28360 + }, + { + "epoch": 1.2, + "learning_rate": 0.0006818291674616804, + "loss": 0.6075, + "step": 28370 + }, + { + "epoch": 1.2, + "learning_rate": 0.0006817867667323878, + "loss": 0.5313, + "step": 28380 + }, + { + "epoch": 1.2, + "learning_rate": 0.0006817443660030953, + "loss": 0.5094, + "step": 28390 + }, + { + "epoch": 1.2, + "learning_rate": 0.0006817019652738028, + "loss": 0.591, + "step": 28400 + }, + { + "epoch": 1.2, + "learning_rate": 0.0006816595645445101, + "loss": 0.6013, + "step": 28410 + }, + { + "epoch": 1.2, + "learning_rate": 0.0006816171638152176, + "loss": 0.6337, + "step": 28420 + }, + { + "epoch": 1.2, + "learning_rate": 0.0006815747630859252, + "loss": 0.5155, + "step": 28430 + }, + { + "epoch": 1.2, + "learning_rate": 0.0006815323623566325, + "loss": 0.5974, + "step": 28440 + }, + { + "epoch": 1.2, + "learning_rate": 0.00068148996162734, + "loss": 0.5978, + "step": 28450 + }, + { + "epoch": 1.2, + "learning_rate": 0.0006814475608980475, + "loss": 0.4894, + "step": 28460 + }, + { + "epoch": 1.2, + "learning_rate": 0.0006814051601687549, + "loss": 0.5808, + "step": 28470 + }, + { + "epoch": 1.2, + "learning_rate": 0.0006813627594394624, + "loss": 0.519, + "step": 28480 + }, + { + "epoch": 1.2, + "learning_rate": 0.0006813203587101699, + "loss": 0.5275, + "step": 28490 + }, + { + "epoch": 1.21, + "learning_rate": 0.0006812779579808772, + "loss": 0.6131, + "step": 28500 + }, + { + "epoch": 1.21, + "learning_rate": 0.0006812355572515848, + "loss": 0.5598, + "step": 28510 + }, + { + "epoch": 1.21, + "learning_rate": 0.0006811931565222923, + "loss": 0.5844, + "step": 28520 + }, + { + "epoch": 1.21, + "learning_rate": 0.0006811507557929996, + "loss": 0.6436, + "step": 28530 + }, + { + "epoch": 1.21, + "learning_rate": 0.0006811083550637071, + "loss": 0.6174, + "step": 28540 + }, + { + "epoch": 1.21, + "learning_rate": 0.0006810659543344147, + "loss": 0.6006, + "step": 28550 + }, + { + "epoch": 1.21, + "learning_rate": 0.000681023553605122, + "loss": 0.5411, + "step": 28560 + }, + { + "epoch": 1.21, + "learning_rate": 0.0006809811528758295, + "loss": 0.6515, + "step": 28570 + }, + { + "epoch": 1.21, + "learning_rate": 0.000680938752146537, + "loss": 0.5411, + "step": 28580 + }, + { + "epoch": 1.21, + "learning_rate": 0.0006808963514172444, + "loss": 0.5201, + "step": 28590 + }, + { + "epoch": 1.21, + "learning_rate": 0.0006808539506879519, + "loss": 0.5766, + "step": 28600 + }, + { + "epoch": 1.21, + "learning_rate": 0.0006808115499586593, + "loss": 0.5849, + "step": 28610 + }, + { + "epoch": 1.21, + "learning_rate": 0.0006807691492293667, + "loss": 0.5649, + "step": 28620 + }, + { + "epoch": 1.21, + "learning_rate": 0.0006807267485000743, + "loss": 0.6626, + "step": 28630 + }, + { + "epoch": 1.21, + "learning_rate": 0.0006806843477707817, + "loss": 0.6439, + "step": 28640 + }, + { + "epoch": 1.21, + "learning_rate": 0.0006806419470414891, + "loss": 0.5206, + "step": 28650 + }, + { + "epoch": 1.21, + "learning_rate": 0.0006805995463121966, + "loss": 0.623, + "step": 28660 + }, + { + "epoch": 1.21, + "learning_rate": 0.0006805571455829041, + "loss": 0.4991, + "step": 28670 + }, + { + "epoch": 1.21, + "learning_rate": 0.0006805147448536115, + "loss": 0.5428, + "step": 28680 + }, + { + "epoch": 1.21, + "learning_rate": 0.000680472344124319, + "loss": 0.5681, + "step": 28690 + }, + { + "epoch": 1.21, + "learning_rate": 0.0006804299433950264, + "loss": 0.5937, + "step": 28700 + }, + { + "epoch": 1.21, + "learning_rate": 0.0006803875426657339, + "loss": 0.5214, + "step": 28710 + }, + { + "epoch": 1.21, + "learning_rate": 0.0006803451419364414, + "loss": 0.5706, + "step": 28720 + }, + { + "epoch": 1.21, + "learning_rate": 0.0006803027412071488, + "loss": 0.5806, + "step": 28730 + }, + { + "epoch": 1.22, + "learning_rate": 0.0006802603404778562, + "loss": 0.6901, + "step": 28740 + }, + { + "epoch": 1.22, + "learning_rate": 0.0006802179397485637, + "loss": 0.6234, + "step": 28750 + }, + { + "epoch": 1.22, + "learning_rate": 0.0006801755390192712, + "loss": 0.5528, + "step": 28760 + }, + { + "epoch": 1.22, + "learning_rate": 0.0006801331382899786, + "loss": 0.6514, + "step": 28770 + }, + { + "epoch": 1.22, + "learning_rate": 0.0006800907375606861, + "loss": 0.6532, + "step": 28780 + }, + { + "epoch": 1.22, + "learning_rate": 0.0006800483368313936, + "loss": 0.5345, + "step": 28790 + }, + { + "epoch": 1.22, + "learning_rate": 0.000680005936102101, + "loss": 0.5273, + "step": 28800 + }, + { + "epoch": 1.22, + "learning_rate": 0.0006799635353728085, + "loss": 0.5781, + "step": 28810 + }, + { + "epoch": 1.22, + "learning_rate": 0.0006799211346435158, + "loss": 0.5454, + "step": 28820 + }, + { + "epoch": 1.22, + "learning_rate": 0.0006798787339142233, + "loss": 0.5455, + "step": 28830 + }, + { + "epoch": 1.22, + "learning_rate": 0.0006798363331849309, + "loss": 0.5811, + "step": 28840 + }, + { + "epoch": 1.22, + "learning_rate": 0.0006797939324556382, + "loss": 0.6328, + "step": 28850 + }, + { + "epoch": 1.22, + "learning_rate": 0.0006797515317263457, + "loss": 0.5401, + "step": 28860 + }, + { + "epoch": 1.22, + "learning_rate": 0.0006797091309970532, + "loss": 0.5971, + "step": 28870 + }, + { + "epoch": 1.22, + "learning_rate": 0.0006796667302677606, + "loss": 0.5965, + "step": 28880 + }, + { + "epoch": 1.22, + "learning_rate": 0.0006796243295384681, + "loss": 0.6427, + "step": 28890 + }, + { + "epoch": 1.22, + "learning_rate": 0.0006795819288091756, + "loss": 0.6014, + "step": 28900 + }, + { + "epoch": 1.22, + "learning_rate": 0.000679539528079883, + "loss": 0.5325, + "step": 28910 + }, + { + "epoch": 1.22, + "learning_rate": 0.0006794971273505905, + "loss": 0.5478, + "step": 28920 + }, + { + "epoch": 1.22, + "learning_rate": 0.0006794547266212979, + "loss": 0.5691, + "step": 28930 + }, + { + "epoch": 1.22, + "learning_rate": 0.0006794123258920053, + "loss": 0.6676, + "step": 28940 + }, + { + "epoch": 1.22, + "learning_rate": 0.0006793699251627128, + "loss": 0.5983, + "step": 28950 + }, + { + "epoch": 1.22, + "learning_rate": 0.0006793275244334203, + "loss": 0.6161, + "step": 28960 + }, + { + "epoch": 1.23, + "learning_rate": 0.0006792851237041277, + "loss": 0.6049, + "step": 28970 + }, + { + "epoch": 1.23, + "learning_rate": 0.0006792427229748352, + "loss": 0.6736, + "step": 28980 + }, + { + "epoch": 1.23, + "learning_rate": 0.0006792003222455427, + "loss": 0.5515, + "step": 28990 + }, + { + "epoch": 1.23, + "learning_rate": 0.0006791579215162501, + "loss": 0.456, + "step": 29000 + }, + { + "epoch": 1.23, + "learning_rate": 0.0006791155207869576, + "loss": 0.6256, + "step": 29010 + }, + { + "epoch": 1.23, + "learning_rate": 0.000679073120057665, + "loss": 0.6029, + "step": 29020 + }, + { + "epoch": 1.23, + "learning_rate": 0.0006790307193283724, + "loss": 0.4936, + "step": 29030 + }, + { + "epoch": 1.23, + "learning_rate": 0.00067898831859908, + "loss": 0.6513, + "step": 29040 + }, + { + "epoch": 1.23, + "learning_rate": 0.0006789459178697874, + "loss": 0.6708, + "step": 29050 + }, + { + "epoch": 1.23, + "learning_rate": 0.0006789035171404948, + "loss": 0.6428, + "step": 29060 + }, + { + "epoch": 1.23, + "learning_rate": 0.0006788611164112023, + "loss": 0.6065, + "step": 29070 + }, + { + "epoch": 1.23, + "learning_rate": 0.0006788187156819098, + "loss": 0.704, + "step": 29080 + }, + { + "epoch": 1.23, + "learning_rate": 0.0006787763149526172, + "loss": 0.6592, + "step": 29090 + }, + { + "epoch": 1.23, + "learning_rate": 0.0006787339142233247, + "loss": 0.5423, + "step": 29100 + }, + { + "epoch": 1.23, + "learning_rate": 0.0006786915134940321, + "loss": 0.6338, + "step": 29110 + }, + { + "epoch": 1.23, + "learning_rate": 0.0006786491127647396, + "loss": 0.6561, + "step": 29120 + }, + { + "epoch": 1.23, + "learning_rate": 0.0006786067120354471, + "loss": 0.4925, + "step": 29130 + }, + { + "epoch": 1.23, + "learning_rate": 0.0006785643113061545, + "loss": 0.5727, + "step": 29140 + }, + { + "epoch": 1.23, + "learning_rate": 0.0006785219105768619, + "loss": 0.6433, + "step": 29150 + }, + { + "epoch": 1.23, + "learning_rate": 0.0006784795098475695, + "loss": 0.5899, + "step": 29160 + }, + { + "epoch": 1.23, + "learning_rate": 0.0006784371091182769, + "loss": 0.5404, + "step": 29170 + }, + { + "epoch": 1.23, + "learning_rate": 0.0006783947083889843, + "loss": 0.6078, + "step": 29180 + }, + { + "epoch": 1.23, + "learning_rate": 0.0006783523076596918, + "loss": 0.6414, + "step": 29190 + }, + { + "epoch": 1.23, + "learning_rate": 0.0006783099069303993, + "loss": 0.5501, + "step": 29200 + }, + { + "epoch": 1.24, + "learning_rate": 0.0006782675062011067, + "loss": 0.5376, + "step": 29210 + }, + { + "epoch": 1.24, + "learning_rate": 0.0006782251054718141, + "loss": 0.5393, + "step": 29220 + }, + { + "epoch": 1.24, + "learning_rate": 0.0006781827047425215, + "loss": 0.5841, + "step": 29230 + }, + { + "epoch": 1.24, + "learning_rate": 0.0006781403040132291, + "loss": 0.695, + "step": 29240 + }, + { + "epoch": 1.24, + "learning_rate": 0.0006780979032839365, + "loss": 0.659, + "step": 29250 + }, + { + "epoch": 1.24, + "learning_rate": 0.000678055502554644, + "loss": 0.6178, + "step": 29260 + }, + { + "epoch": 1.24, + "learning_rate": 0.0006780131018253514, + "loss": 0.7016, + "step": 29270 + }, + { + "epoch": 1.24, + "learning_rate": 0.0006779707010960589, + "loss": 0.6255, + "step": 29280 + }, + { + "epoch": 1.24, + "learning_rate": 0.0006779283003667663, + "loss": 0.6202, + "step": 29290 + }, + { + "epoch": 1.24, + "learning_rate": 0.0006778858996374738, + "loss": 0.5437, + "step": 29300 + }, + { + "epoch": 1.24, + "learning_rate": 0.0006778434989081812, + "loss": 0.4701, + "step": 29310 + }, + { + "epoch": 1.24, + "learning_rate": 0.0006778010981788887, + "loss": 0.6011, + "step": 29320 + }, + { + "epoch": 1.24, + "learning_rate": 0.0006777586974495962, + "loss": 0.5492, + "step": 29330 + }, + { + "epoch": 1.24, + "learning_rate": 0.0006777162967203036, + "loss": 0.6659, + "step": 29340 + }, + { + "epoch": 1.24, + "learning_rate": 0.000677673895991011, + "loss": 0.5414, + "step": 29350 + }, + { + "epoch": 1.24, + "learning_rate": 0.0006776314952617186, + "loss": 0.6549, + "step": 29360 + }, + { + "epoch": 1.24, + "learning_rate": 0.000677589094532426, + "loss": 0.5967, + "step": 29370 + }, + { + "epoch": 1.24, + "learning_rate": 0.0006775466938031334, + "loss": 0.5793, + "step": 29380 + }, + { + "epoch": 1.24, + "learning_rate": 0.0006775042930738409, + "loss": 0.6028, + "step": 29390 + }, + { + "epoch": 1.24, + "learning_rate": 0.0006774618923445484, + "loss": 0.4739, + "step": 29400 + }, + { + "epoch": 1.24, + "learning_rate": 0.0006774194916152558, + "loss": 0.5812, + "step": 29410 + }, + { + "epoch": 1.24, + "learning_rate": 0.0006773770908859633, + "loss": 0.5955, + "step": 29420 + }, + { + "epoch": 1.24, + "learning_rate": 0.0006773346901566707, + "loss": 0.6288, + "step": 29430 + }, + { + "epoch": 1.24, + "learning_rate": 0.0006772922894273782, + "loss": 0.7082, + "step": 29440 + }, + { + "epoch": 1.25, + "learning_rate": 0.0006772498886980857, + "loss": 0.5245, + "step": 29450 + }, + { + "epoch": 1.25, + "learning_rate": 0.0006772074879687931, + "loss": 0.7395, + "step": 29460 + }, + { + "epoch": 1.25, + "learning_rate": 0.0006771650872395005, + "loss": 0.5862, + "step": 29470 + }, + { + "epoch": 1.25, + "learning_rate": 0.000677122686510208, + "loss": 0.5887, + "step": 29480 + }, + { + "epoch": 1.25, + "learning_rate": 0.0006770802857809155, + "loss": 0.5407, + "step": 29490 + }, + { + "epoch": 1.25, + "learning_rate": 0.0006770378850516229, + "loss": 0.6337, + "step": 29500 + }, + { + "epoch": 1.25, + "learning_rate": 0.0006769954843223304, + "loss": 0.6177, + "step": 29510 + }, + { + "epoch": 1.25, + "learning_rate": 0.0006769530835930379, + "loss": 0.6543, + "step": 29520 + }, + { + "epoch": 1.25, + "learning_rate": 0.0006769106828637453, + "loss": 0.5974, + "step": 29530 + }, + { + "epoch": 1.25, + "learning_rate": 0.0006768682821344527, + "loss": 0.5944, + "step": 29540 + }, + { + "epoch": 1.25, + "learning_rate": 0.0006768258814051602, + "loss": 0.5679, + "step": 29550 + }, + { + "epoch": 1.25, + "learning_rate": 0.0006767834806758676, + "loss": 0.6102, + "step": 29560 + }, + { + "epoch": 1.25, + "learning_rate": 0.0006767410799465751, + "loss": 0.5444, + "step": 29570 + }, + { + "epoch": 1.25, + "learning_rate": 0.0006766986792172826, + "loss": 0.4687, + "step": 29580 + }, + { + "epoch": 1.25, + "learning_rate": 0.00067665627848799, + "loss": 0.5924, + "step": 29590 + }, + { + "epoch": 1.25, + "learning_rate": 0.0006766138777586975, + "loss": 0.5215, + "step": 29600 + }, + { + "epoch": 1.25, + "learning_rate": 0.000676571477029405, + "loss": 0.5816, + "step": 29610 + }, + { + "epoch": 1.25, + "learning_rate": 0.0006765290763001124, + "loss": 0.5781, + "step": 29620 + }, + { + "epoch": 1.25, + "learning_rate": 0.0006764866755708198, + "loss": 0.5977, + "step": 29630 + }, + { + "epoch": 1.25, + "learning_rate": 0.0006764442748415273, + "loss": 0.4882, + "step": 29640 + }, + { + "epoch": 1.25, + "learning_rate": 0.0006764018741122348, + "loss": 0.6981, + "step": 29650 + }, + { + "epoch": 1.25, + "learning_rate": 0.0006763594733829422, + "loss": 0.6863, + "step": 29660 + }, + { + "epoch": 1.25, + "learning_rate": 0.0006763170726536497, + "loss": 0.541, + "step": 29670 + }, + { + "epoch": 1.26, + "learning_rate": 0.0006762746719243571, + "loss": 0.498, + "step": 29680 + }, + { + "epoch": 1.26, + "learning_rate": 0.0006762322711950646, + "loss": 0.5225, + "step": 29690 + }, + { + "epoch": 1.26, + "learning_rate": 0.000676189870465772, + "loss": 0.5959, + "step": 29700 + }, + { + "epoch": 1.26, + "learning_rate": 0.0006761474697364795, + "loss": 0.5704, + "step": 29710 + }, + { + "epoch": 1.26, + "learning_rate": 0.0006761050690071869, + "loss": 0.6006, + "step": 29720 + }, + { + "epoch": 1.26, + "learning_rate": 0.0006760626682778945, + "loss": 0.6235, + "step": 29730 + }, + { + "epoch": 1.26, + "learning_rate": 0.0006760202675486019, + "loss": 0.7029, + "step": 29740 + }, + { + "epoch": 1.26, + "learning_rate": 0.0006759778668193093, + "loss": 0.5663, + "step": 29750 + }, + { + "epoch": 1.26, + "learning_rate": 0.0006759354660900167, + "loss": 0.5109, + "step": 29760 + }, + { + "epoch": 1.26, + "learning_rate": 0.0006758930653607243, + "loss": 0.5139, + "step": 29770 + }, + { + "epoch": 1.26, + "learning_rate": 0.0006758506646314317, + "loss": 0.539, + "step": 29780 + }, + { + "epoch": 1.26, + "learning_rate": 0.0006758082639021391, + "loss": 0.5511, + "step": 29790 + }, + { + "epoch": 1.26, + "learning_rate": 0.0006757658631728466, + "loss": 0.5445, + "step": 29800 + }, + { + "epoch": 1.26, + "learning_rate": 0.0006757234624435541, + "loss": 0.5365, + "step": 29810 + }, + { + "epoch": 1.26, + "learning_rate": 0.0006756810617142615, + "loss": 0.6377, + "step": 29820 + }, + { + "epoch": 1.26, + "learning_rate": 0.0006756386609849689, + "loss": 0.5776, + "step": 29830 + }, + { + "epoch": 1.26, + "learning_rate": 0.0006755962602556764, + "loss": 0.5897, + "step": 29840 + }, + { + "epoch": 1.26, + "learning_rate": 0.000675553859526384, + "loss": 0.5625, + "step": 29850 + }, + { + "epoch": 1.26, + "learning_rate": 0.0006755114587970913, + "loss": 0.5558, + "step": 29860 + }, + { + "epoch": 1.26, + "learning_rate": 0.0006754690580677988, + "loss": 0.5291, + "step": 29870 + }, + { + "epoch": 1.26, + "learning_rate": 0.0006754266573385062, + "loss": 0.5913, + "step": 29880 + }, + { + "epoch": 1.26, + "learning_rate": 0.0006753842566092137, + "loss": 0.5776, + "step": 29890 + }, + { + "epoch": 1.26, + "learning_rate": 0.0006753418558799212, + "loss": 0.6106, + "step": 29900 + }, + { + "epoch": 1.26, + "learning_rate": 0.0006752994551506286, + "loss": 0.6628, + "step": 29910 + }, + { + "epoch": 1.27, + "learning_rate": 0.000675257054421336, + "loss": 0.583, + "step": 29920 + }, + { + "epoch": 1.27, + "learning_rate": 0.0006752146536920436, + "loss": 0.5745, + "step": 29930 + }, + { + "epoch": 1.27, + "learning_rate": 0.000675172252962751, + "loss": 0.5825, + "step": 29940 + }, + { + "epoch": 1.27, + "learning_rate": 0.0006751298522334584, + "loss": 0.5568, + "step": 29950 + }, + { + "epoch": 1.27, + "learning_rate": 0.0006750874515041659, + "loss": 0.5764, + "step": 29960 + }, + { + "epoch": 1.27, + "learning_rate": 0.0006750450507748734, + "loss": 0.5142, + "step": 29970 + }, + { + "epoch": 1.27, + "learning_rate": 0.0006750026500455808, + "loss": 0.4487, + "step": 29980 + }, + { + "epoch": 1.27, + "learning_rate": 0.0006749602493162883, + "loss": 0.6577, + "step": 29990 + }, + { + "epoch": 1.27, + "learning_rate": 0.0006749178485869957, + "loss": 0.5821, + "step": 30000 + }, + { + "epoch": 1.27, + "eval_loss": 0.6254510283470154, + "eval_runtime": 337.5949, + "eval_samples_per_second": 15.566, + "eval_steps_per_second": 3.892, + "step": 30000 + }, + { + "epoch": 1.27, + "learning_rate": 0.0006748754478577032, + "loss": 0.5532, + "step": 30010 + }, + { + "epoch": 1.27, + "learning_rate": 0.0006748330471284107, + "loss": 0.5679, + "step": 30020 + }, + { + "epoch": 1.27, + "learning_rate": 0.0006747906463991181, + "loss": 0.5665, + "step": 30030 + }, + { + "epoch": 1.27, + "learning_rate": 0.0006747482456698255, + "loss": 0.5408, + "step": 30040 + }, + { + "epoch": 1.27, + "learning_rate": 0.0006747058449405331, + "loss": 0.5008, + "step": 30050 + }, + { + "epoch": 1.27, + "learning_rate": 0.0006746634442112405, + "loss": 0.6044, + "step": 30060 + }, + { + "epoch": 1.27, + "learning_rate": 0.0006746210434819479, + "loss": 0.5533, + "step": 30070 + }, + { + "epoch": 1.27, + "learning_rate": 0.0006745786427526554, + "loss": 0.5834, + "step": 30080 + }, + { + "epoch": 1.27, + "learning_rate": 0.0006745362420233628, + "loss": 0.5195, + "step": 30090 + }, + { + "epoch": 1.27, + "learning_rate": 0.0006744938412940703, + "loss": 0.5288, + "step": 30100 + }, + { + "epoch": 1.27, + "learning_rate": 0.0006744514405647778, + "loss": 0.5713, + "step": 30110 + }, + { + "epoch": 1.27, + "learning_rate": 0.0006744090398354852, + "loss": 0.6587, + "step": 30120 + }, + { + "epoch": 1.27, + "learning_rate": 0.0006743666391061927, + "loss": 0.5719, + "step": 30130 + }, + { + "epoch": 1.27, + "learning_rate": 0.0006743242383769002, + "loss": 0.6385, + "step": 30140 + }, + { + "epoch": 1.28, + "learning_rate": 0.0006742818376476075, + "loss": 0.604, + "step": 30150 + }, + { + "epoch": 1.28, + "learning_rate": 0.000674239436918315, + "loss": 0.5908, + "step": 30160 + }, + { + "epoch": 1.28, + "learning_rate": 0.0006741970361890225, + "loss": 0.5629, + "step": 30170 + }, + { + "epoch": 1.28, + "learning_rate": 0.0006741546354597299, + "loss": 0.603, + "step": 30180 + }, + { + "epoch": 1.28, + "learning_rate": 0.0006741122347304374, + "loss": 0.4915, + "step": 30190 + }, + { + "epoch": 1.28, + "learning_rate": 0.0006740698340011449, + "loss": 0.5905, + "step": 30200 + }, + { + "epoch": 1.28, + "learning_rate": 0.0006740274332718523, + "loss": 0.5249, + "step": 30210 + }, + { + "epoch": 1.28, + "learning_rate": 0.0006739850325425598, + "loss": 0.6394, + "step": 30220 + }, + { + "epoch": 1.28, + "learning_rate": 0.0006739426318132673, + "loss": 0.6733, + "step": 30230 + }, + { + "epoch": 1.28, + "learning_rate": 0.0006739002310839746, + "loss": 0.5909, + "step": 30240 + }, + { + "epoch": 1.28, + "learning_rate": 0.0006738578303546821, + "loss": 0.6294, + "step": 30250 + }, + { + "epoch": 1.28, + "learning_rate": 0.0006738154296253897, + "loss": 0.6335, + "step": 30260 + }, + { + "epoch": 1.28, + "learning_rate": 0.000673773028896097, + "loss": 0.605, + "step": 30270 + }, + { + "epoch": 1.28, + "learning_rate": 0.0006737306281668045, + "loss": 0.5363, + "step": 30280 + }, + { + "epoch": 1.28, + "learning_rate": 0.0006736882274375119, + "loss": 0.6013, + "step": 30290 + }, + { + "epoch": 1.28, + "learning_rate": 0.0006736458267082194, + "loss": 0.5636, + "step": 30300 + }, + { + "epoch": 1.28, + "learning_rate": 0.0006736034259789269, + "loss": 0.5715, + "step": 30310 + }, + { + "epoch": 1.28, + "learning_rate": 0.0006735610252496343, + "loss": 0.4594, + "step": 30320 + }, + { + "epoch": 1.28, + "learning_rate": 0.0006735186245203417, + "loss": 0.5405, + "step": 30330 + }, + { + "epoch": 1.28, + "learning_rate": 0.0006734762237910493, + "loss": 0.5305, + "step": 30340 + }, + { + "epoch": 1.28, + "learning_rate": 0.0006734338230617567, + "loss": 0.609, + "step": 30350 + }, + { + "epoch": 1.28, + "learning_rate": 0.0006733914223324641, + "loss": 0.5529, + "step": 30360 + }, + { + "epoch": 1.28, + "learning_rate": 0.0006733490216031716, + "loss": 0.6016, + "step": 30370 + }, + { + "epoch": 1.28, + "learning_rate": 0.0006733066208738791, + "loss": 0.6497, + "step": 30380 + }, + { + "epoch": 1.29, + "learning_rate": 0.0006732642201445865, + "loss": 0.5799, + "step": 30390 + }, + { + "epoch": 1.29, + "learning_rate": 0.000673221819415294, + "loss": 0.6471, + "step": 30400 + }, + { + "epoch": 1.29, + "learning_rate": 0.0006731794186860014, + "loss": 0.6029, + "step": 30410 + }, + { + "epoch": 1.29, + "learning_rate": 0.0006731370179567089, + "loss": 0.5474, + "step": 30420 + }, + { + "epoch": 1.29, + "learning_rate": 0.0006730946172274164, + "loss": 0.5511, + "step": 30430 + }, + { + "epoch": 1.29, + "learning_rate": 0.0006730522164981238, + "loss": 0.5881, + "step": 30440 + }, + { + "epoch": 1.29, + "learning_rate": 0.0006730098157688312, + "loss": 0.5267, + "step": 30450 + }, + { + "epoch": 1.29, + "learning_rate": 0.0006729674150395388, + "loss": 0.6721, + "step": 30460 + }, + { + "epoch": 1.29, + "learning_rate": 0.0006729250143102461, + "loss": 0.5739, + "step": 30470 + }, + { + "epoch": 1.29, + "learning_rate": 0.0006728826135809536, + "loss": 0.4932, + "step": 30480 + }, + { + "epoch": 1.29, + "learning_rate": 0.0006728402128516611, + "loss": 0.6823, + "step": 30490 + }, + { + "epoch": 1.29, + "learning_rate": 0.0006727978121223685, + "loss": 0.4928, + "step": 30500 + }, + { + "epoch": 1.29, + "learning_rate": 0.000672755411393076, + "loss": 0.4665, + "step": 30510 + }, + { + "epoch": 1.29, + "learning_rate": 0.0006727130106637835, + "loss": 0.5201, + "step": 30520 + }, + { + "epoch": 1.29, + "learning_rate": 0.0006726706099344908, + "loss": 0.578, + "step": 30530 + }, + { + "epoch": 1.29, + "learning_rate": 0.0006726282092051984, + "loss": 0.6476, + "step": 30540 + }, + { + "epoch": 1.29, + "learning_rate": 0.0006725858084759059, + "loss": 0.5621, + "step": 30550 + }, + { + "epoch": 1.29, + "learning_rate": 0.0006725434077466132, + "loss": 0.5384, + "step": 30560 + }, + { + "epoch": 1.29, + "learning_rate": 0.0006725010070173207, + "loss": 0.518, + "step": 30570 + }, + { + "epoch": 1.29, + "learning_rate": 0.0006724586062880283, + "loss": 0.7168, + "step": 30580 + }, + { + "epoch": 1.29, + "learning_rate": 0.0006724162055587356, + "loss": 0.5477, + "step": 30590 + }, + { + "epoch": 1.29, + "learning_rate": 0.0006723738048294431, + "loss": 0.5453, + "step": 30600 + }, + { + "epoch": 1.29, + "learning_rate": 0.0006723314041001506, + "loss": 0.5937, + "step": 30610 + }, + { + "epoch": 1.29, + "learning_rate": 0.000672289003370858, + "loss": 0.6398, + "step": 30620 + }, + { + "epoch": 1.3, + "learning_rate": 0.0006722466026415655, + "loss": 0.5308, + "step": 30630 + }, + { + "epoch": 1.3, + "learning_rate": 0.000672204201912273, + "loss": 0.5712, + "step": 30640 + }, + { + "epoch": 1.3, + "learning_rate": 0.0006721618011829803, + "loss": 0.5579, + "step": 30650 + }, + { + "epoch": 1.3, + "learning_rate": 0.0006721194004536879, + "loss": 0.5448, + "step": 30660 + }, + { + "epoch": 1.3, + "learning_rate": 0.0006720769997243954, + "loss": 0.5702, + "step": 30670 + }, + { + "epoch": 1.3, + "learning_rate": 0.0006720345989951027, + "loss": 0.6084, + "step": 30680 + }, + { + "epoch": 1.3, + "learning_rate": 0.0006719921982658102, + "loss": 0.5092, + "step": 30690 + }, + { + "epoch": 1.3, + "learning_rate": 0.0006719497975365176, + "loss": 0.5488, + "step": 30700 + }, + { + "epoch": 1.3, + "learning_rate": 0.0006719073968072251, + "loss": 0.6385, + "step": 30710 + }, + { + "epoch": 1.3, + "learning_rate": 0.0006718649960779326, + "loss": 0.6713, + "step": 30720 + }, + { + "epoch": 1.3, + "learning_rate": 0.00067182259534864, + "loss": 0.5464, + "step": 30730 + }, + { + "epoch": 1.3, + "learning_rate": 0.0006717801946193475, + "loss": 0.5958, + "step": 30740 + }, + { + "epoch": 1.3, + "learning_rate": 0.000671737793890055, + "loss": 0.6336, + "step": 30750 + }, + { + "epoch": 1.3, + "learning_rate": 0.0006716953931607623, + "loss": 0.6284, + "step": 30760 + }, + { + "epoch": 1.3, + "learning_rate": 0.0006716529924314698, + "loss": 0.6545, + "step": 30770 + }, + { + "epoch": 1.3, + "learning_rate": 0.0006716105917021773, + "loss": 0.5968, + "step": 30780 + }, + { + "epoch": 1.3, + "learning_rate": 0.0006715681909728847, + "loss": 0.5287, + "step": 30790 + }, + { + "epoch": 1.3, + "learning_rate": 0.0006715257902435922, + "loss": 0.617, + "step": 30800 + }, + { + "epoch": 1.3, + "learning_rate": 0.0006714833895142997, + "loss": 0.4703, + "step": 30810 + }, + { + "epoch": 1.3, + "learning_rate": 0.0006714409887850071, + "loss": 0.586, + "step": 30820 + }, + { + "epoch": 1.3, + "learning_rate": 0.0006713985880557146, + "loss": 0.5227, + "step": 30830 + }, + { + "epoch": 1.3, + "learning_rate": 0.0006713561873264221, + "loss": 0.6134, + "step": 30840 + }, + { + "epoch": 1.3, + "learning_rate": 0.0006713137865971294, + "loss": 0.4928, + "step": 30850 + }, + { + "epoch": 1.31, + "learning_rate": 0.000671271385867837, + "loss": 0.6177, + "step": 30860 + }, + { + "epoch": 1.31, + "learning_rate": 0.0006712289851385445, + "loss": 0.631, + "step": 30870 + }, + { + "epoch": 1.31, + "learning_rate": 0.0006711865844092518, + "loss": 0.6482, + "step": 30880 + }, + { + "epoch": 1.31, + "learning_rate": 0.0006711441836799593, + "loss": 0.5884, + "step": 30890 + }, + { + "epoch": 1.31, + "learning_rate": 0.0006711017829506668, + "loss": 0.5557, + "step": 30900 + }, + { + "epoch": 1.31, + "learning_rate": 0.0006710593822213742, + "loss": 0.6637, + "step": 30910 + }, + { + "epoch": 1.31, + "learning_rate": 0.0006710169814920817, + "loss": 0.5333, + "step": 30920 + }, + { + "epoch": 1.31, + "learning_rate": 0.0006709745807627892, + "loss": 0.5776, + "step": 30930 + }, + { + "epoch": 1.31, + "learning_rate": 0.0006709321800334966, + "loss": 0.5177, + "step": 30940 + }, + { + "epoch": 1.31, + "learning_rate": 0.0006708897793042041, + "loss": 0.5666, + "step": 30950 + }, + { + "epoch": 1.31, + "learning_rate": 0.0006708473785749116, + "loss": 0.5767, + "step": 30960 + }, + { + "epoch": 1.31, + "learning_rate": 0.0006708049778456189, + "loss": 0.6629, + "step": 30970 + }, + { + "epoch": 1.31, + "learning_rate": 0.0006707625771163264, + "loss": 0.5344, + "step": 30980 + }, + { + "epoch": 1.31, + "learning_rate": 0.000670720176387034, + "loss": 0.6217, + "step": 30990 + }, + { + "epoch": 1.31, + "learning_rate": 0.0006706777756577413, + "loss": 0.5567, + "step": 31000 + }, + { + "epoch": 1.31, + "learning_rate": 0.0006706353749284488, + "loss": 0.5374, + "step": 31010 + }, + { + "epoch": 1.31, + "learning_rate": 0.0006705929741991563, + "loss": 0.6178, + "step": 31020 + }, + { + "epoch": 1.31, + "learning_rate": 0.0006705505734698637, + "loss": 0.5655, + "step": 31030 + }, + { + "epoch": 1.31, + "learning_rate": 0.0006705081727405712, + "loss": 0.6168, + "step": 31040 + }, + { + "epoch": 1.31, + "learning_rate": 0.0006704657720112787, + "loss": 0.69, + "step": 31050 + }, + { + "epoch": 1.31, + "learning_rate": 0.000670423371281986, + "loss": 0.5561, + "step": 31060 + }, + { + "epoch": 1.31, + "learning_rate": 0.0006703809705526936, + "loss": 0.5657, + "step": 31070 + }, + { + "epoch": 1.31, + "learning_rate": 0.000670338569823401, + "loss": 0.6699, + "step": 31080 + }, + { + "epoch": 1.31, + "learning_rate": 0.0006702961690941084, + "loss": 0.6052, + "step": 31090 + }, + { + "epoch": 1.32, + "learning_rate": 0.0006702537683648159, + "loss": 0.5972, + "step": 31100 + }, + { + "epoch": 1.32, + "learning_rate": 0.0006702113676355234, + "loss": 0.531, + "step": 31110 + }, + { + "epoch": 1.32, + "learning_rate": 0.0006701689669062308, + "loss": 0.6316, + "step": 31120 + }, + { + "epoch": 1.32, + "learning_rate": 0.0006701265661769383, + "loss": 0.5442, + "step": 31130 + }, + { + "epoch": 1.32, + "learning_rate": 0.0006700841654476456, + "loss": 0.5981, + "step": 31140 + }, + { + "epoch": 1.32, + "learning_rate": 0.0006700417647183532, + "loss": 0.6147, + "step": 31150 + }, + { + "epoch": 1.32, + "learning_rate": 0.0006699993639890607, + "loss": 0.5729, + "step": 31160 + }, + { + "epoch": 1.32, + "learning_rate": 0.000669956963259768, + "loss": 0.5123, + "step": 31170 + }, + { + "epoch": 1.32, + "learning_rate": 0.0006699145625304755, + "loss": 0.6195, + "step": 31180 + }, + { + "epoch": 1.32, + "learning_rate": 0.0006698721618011831, + "loss": 0.6114, + "step": 31190 + }, + { + "epoch": 1.32, + "learning_rate": 0.0006698297610718904, + "loss": 0.5496, + "step": 31200 + }, + { + "epoch": 1.32, + "learning_rate": 0.0006697873603425979, + "loss": 0.584, + "step": 31210 + }, + { + "epoch": 1.32, + "learning_rate": 0.0006697449596133054, + "loss": 0.6061, + "step": 31220 + }, + { + "epoch": 1.32, + "learning_rate": 0.0006697025588840128, + "loss": 0.5134, + "step": 31230 + }, + { + "epoch": 1.32, + "learning_rate": 0.0006696601581547203, + "loss": 0.6406, + "step": 31240 + }, + { + "epoch": 1.32, + "learning_rate": 0.0006696177574254278, + "loss": 0.5839, + "step": 31250 + }, + { + "epoch": 1.32, + "learning_rate": 0.0006695753566961351, + "loss": 0.6163, + "step": 31260 + }, + { + "epoch": 1.32, + "learning_rate": 0.0006695329559668427, + "loss": 0.6628, + "step": 31270 + }, + { + "epoch": 1.32, + "learning_rate": 0.0006694905552375502, + "loss": 0.596, + "step": 31280 + }, + { + "epoch": 1.32, + "learning_rate": 0.0006694481545082575, + "loss": 0.5939, + "step": 31290 + }, + { + "epoch": 1.32, + "learning_rate": 0.000669405753778965, + "loss": 0.513, + "step": 31300 + }, + { + "epoch": 1.32, + "learning_rate": 0.0006693633530496725, + "loss": 0.5646, + "step": 31310 + }, + { + "epoch": 1.32, + "learning_rate": 0.0006693209523203799, + "loss": 0.517, + "step": 31320 + }, + { + "epoch": 1.32, + "learning_rate": 0.0006692785515910874, + "loss": 0.6649, + "step": 31330 + }, + { + "epoch": 1.33, + "learning_rate": 0.0006692361508617949, + "loss": 0.5832, + "step": 31340 + }, + { + "epoch": 1.33, + "learning_rate": 0.0006691937501325023, + "loss": 0.637, + "step": 31350 + }, + { + "epoch": 1.33, + "learning_rate": 0.0006691513494032098, + "loss": 0.5676, + "step": 31360 + }, + { + "epoch": 1.33, + "learning_rate": 0.0006691089486739172, + "loss": 0.6194, + "step": 31370 + }, + { + "epoch": 1.33, + "learning_rate": 0.0006690665479446246, + "loss": 0.6133, + "step": 31380 + }, + { + "epoch": 1.33, + "learning_rate": 0.0006690241472153322, + "loss": 0.5792, + "step": 31390 + }, + { + "epoch": 1.33, + "learning_rate": 0.0006689817464860396, + "loss": 0.5685, + "step": 31400 + }, + { + "epoch": 1.33, + "learning_rate": 0.000668939345756747, + "loss": 0.6092, + "step": 31410 + }, + { + "epoch": 1.33, + "learning_rate": 0.0006688969450274545, + "loss": 0.5517, + "step": 31420 + }, + { + "epoch": 1.33, + "learning_rate": 0.000668854544298162, + "loss": 0.5929, + "step": 31430 + }, + { + "epoch": 1.33, + "learning_rate": 0.0006688121435688694, + "loss": 0.5498, + "step": 31440 + }, + { + "epoch": 1.33, + "learning_rate": 0.0006687697428395769, + "loss": 0.5424, + "step": 31450 + }, + { + "epoch": 1.33, + "learning_rate": 0.0006687273421102843, + "loss": 0.562, + "step": 31460 + }, + { + "epoch": 1.33, + "learning_rate": 0.0006686849413809918, + "loss": 0.5742, + "step": 31470 + }, + { + "epoch": 1.33, + "learning_rate": 0.0006686425406516993, + "loss": 0.5556, + "step": 31480 + }, + { + "epoch": 1.33, + "learning_rate": 0.0006686001399224067, + "loss": 0.636, + "step": 31490 + }, + { + "epoch": 1.33, + "learning_rate": 0.0006685577391931141, + "loss": 0.5775, + "step": 31500 + }, + { + "epoch": 1.33, + "learning_rate": 0.0006685153384638216, + "loss": 0.6124, + "step": 31510 + }, + { + "epoch": 1.33, + "learning_rate": 0.0006684729377345291, + "loss": 0.5638, + "step": 31520 + }, + { + "epoch": 1.33, + "learning_rate": 0.0006684305370052365, + "loss": 0.5982, + "step": 31530 + }, + { + "epoch": 1.33, + "learning_rate": 0.000668388136275944, + "loss": 0.6329, + "step": 31540 + }, + { + "epoch": 1.33, + "learning_rate": 0.0006683457355466515, + "loss": 0.6211, + "step": 31550 + }, + { + "epoch": 1.33, + "learning_rate": 0.0006683033348173589, + "loss": 0.5607, + "step": 31560 + }, + { + "epoch": 1.34, + "learning_rate": 0.0006682609340880664, + "loss": 0.5139, + "step": 31570 + }, + { + "epoch": 1.34, + "learning_rate": 0.0006682185333587738, + "loss": 0.5795, + "step": 31580 + }, + { + "epoch": 1.34, + "learning_rate": 0.0006681761326294812, + "loss": 0.5231, + "step": 31590 + }, + { + "epoch": 1.34, + "learning_rate": 0.0006681337319001888, + "loss": 0.5728, + "step": 31600 + }, + { + "epoch": 1.34, + "learning_rate": 0.0006680913311708962, + "loss": 0.5798, + "step": 31610 + }, + { + "epoch": 1.34, + "learning_rate": 0.0006680489304416036, + "loss": 0.6287, + "step": 31620 + }, + { + "epoch": 1.34, + "learning_rate": 0.0006680065297123111, + "loss": 0.6851, + "step": 31630 + }, + { + "epoch": 1.34, + "learning_rate": 0.0006679641289830186, + "loss": 0.5691, + "step": 31640 + }, + { + "epoch": 1.34, + "learning_rate": 0.000667921728253726, + "loss": 0.5494, + "step": 31650 + }, + { + "epoch": 1.34, + "learning_rate": 0.0006678793275244335, + "loss": 0.6589, + "step": 31660 + }, + { + "epoch": 1.34, + "learning_rate": 0.0006678369267951408, + "loss": 0.548, + "step": 31670 + }, + { + "epoch": 1.34, + "learning_rate": 0.0006677945260658484, + "loss": 0.585, + "step": 31680 + }, + { + "epoch": 1.34, + "learning_rate": 0.0006677521253365558, + "loss": 0.6812, + "step": 31690 + }, + { + "epoch": 1.34, + "learning_rate": 0.0006677097246072632, + "loss": 0.5725, + "step": 31700 + }, + { + "epoch": 1.34, + "learning_rate": 0.0006676673238779707, + "loss": 0.5668, + "step": 31710 + }, + { + "epoch": 1.34, + "learning_rate": 0.0006676249231486782, + "loss": 0.6998, + "step": 31720 + }, + { + "epoch": 1.34, + "learning_rate": 0.0006675825224193856, + "loss": 0.4934, + "step": 31730 + }, + { + "epoch": 1.34, + "learning_rate": 0.0006675401216900931, + "loss": 0.6001, + "step": 31740 + }, + { + "epoch": 1.34, + "learning_rate": 0.0006674977209608005, + "loss": 0.5702, + "step": 31750 + }, + { + "epoch": 1.34, + "learning_rate": 0.000667455320231508, + "loss": 0.5654, + "step": 31760 + }, + { + "epoch": 1.34, + "learning_rate": 0.0006674129195022155, + "loss": 0.6848, + "step": 31770 + }, + { + "epoch": 1.34, + "learning_rate": 0.0006673705187729229, + "loss": 0.5054, + "step": 31780 + }, + { + "epoch": 1.34, + "learning_rate": 0.0006673281180436303, + "loss": 0.6013, + "step": 31790 + }, + { + "epoch": 1.34, + "learning_rate": 0.0006672857173143379, + "loss": 0.5444, + "step": 31800 + }, + { + "epoch": 1.35, + "learning_rate": 0.0006672433165850453, + "loss": 0.5099, + "step": 31810 + }, + { + "epoch": 1.35, + "learning_rate": 0.0006672009158557527, + "loss": 0.5926, + "step": 31820 + }, + { + "epoch": 1.35, + "learning_rate": 0.0006671585151264602, + "loss": 0.6552, + "step": 31830 + }, + { + "epoch": 1.35, + "learning_rate": 0.0006671161143971677, + "loss": 0.5806, + "step": 31840 + }, + { + "epoch": 1.35, + "learning_rate": 0.0006670737136678751, + "loss": 0.623, + "step": 31850 + }, + { + "epoch": 1.35, + "learning_rate": 0.0006670313129385826, + "loss": 0.7061, + "step": 31860 + }, + { + "epoch": 1.35, + "learning_rate": 0.00066698891220929, + "loss": 0.6776, + "step": 31870 + }, + { + "epoch": 1.35, + "learning_rate": 0.0006669465114799975, + "loss": 0.526, + "step": 31880 + }, + { + "epoch": 1.35, + "learning_rate": 0.000666904110750705, + "loss": 0.6113, + "step": 31890 + }, + { + "epoch": 1.35, + "learning_rate": 0.0006668617100214124, + "loss": 0.5601, + "step": 31900 + }, + { + "epoch": 1.35, + "learning_rate": 0.0006668193092921198, + "loss": 0.5326, + "step": 31910 + }, + { + "epoch": 1.35, + "learning_rate": 0.0006667769085628274, + "loss": 0.4983, + "step": 31920 + }, + { + "epoch": 1.35, + "learning_rate": 0.0006667345078335348, + "loss": 0.6145, + "step": 31930 + }, + { + "epoch": 1.35, + "learning_rate": 0.0006666921071042422, + "loss": 0.5763, + "step": 31940 + }, + { + "epoch": 1.35, + "learning_rate": 0.0006666497063749497, + "loss": 0.5647, + "step": 31950 + }, + { + "epoch": 1.35, + "learning_rate": 0.0006666073056456572, + "loss": 0.5927, + "step": 31960 + }, + { + "epoch": 1.35, + "learning_rate": 0.0006665649049163646, + "loss": 0.6277, + "step": 31970 + }, + { + "epoch": 1.35, + "learning_rate": 0.000666522504187072, + "loss": 0.6477, + "step": 31980 + }, + { + "epoch": 1.35, + "learning_rate": 0.0006664801034577795, + "loss": 0.5098, + "step": 31990 + }, + { + "epoch": 1.35, + "learning_rate": 0.000666437702728487, + "loss": 0.5947, + "step": 32000 + }, + { + "epoch": 1.35, + "learning_rate": 0.0006663953019991944, + "loss": 0.6288, + "step": 32010 + }, + { + "epoch": 1.35, + "learning_rate": 0.0006663529012699019, + "loss": 0.6199, + "step": 32020 + }, + { + "epoch": 1.35, + "learning_rate": 0.0006663105005406093, + "loss": 0.5647, + "step": 32030 + }, + { + "epoch": 1.35, + "learning_rate": 0.0006662680998113168, + "loss": 0.6075, + "step": 32040 + }, + { + "epoch": 1.36, + "learning_rate": 0.0006662256990820243, + "loss": 0.5576, + "step": 32050 + }, + { + "epoch": 1.36, + "learning_rate": 0.0006661832983527317, + "loss": 0.5963, + "step": 32060 + }, + { + "epoch": 1.36, + "learning_rate": 0.0006661408976234391, + "loss": 0.6591, + "step": 32070 + }, + { + "epoch": 1.36, + "learning_rate": 0.0006660984968941467, + "loss": 0.5288, + "step": 32080 + }, + { + "epoch": 1.36, + "learning_rate": 0.0006660560961648541, + "loss": 0.6203, + "step": 32090 + }, + { + "epoch": 1.36, + "learning_rate": 0.0006660136954355615, + "loss": 0.5845, + "step": 32100 + }, + { + "epoch": 1.36, + "learning_rate": 0.000665971294706269, + "loss": 0.6671, + "step": 32110 + }, + { + "epoch": 1.36, + "learning_rate": 0.0006659288939769764, + "loss": 0.7039, + "step": 32120 + }, + { + "epoch": 1.36, + "learning_rate": 0.0006658864932476839, + "loss": 0.6239, + "step": 32130 + }, + { + "epoch": 1.36, + "learning_rate": 0.0006658440925183913, + "loss": 0.5408, + "step": 32140 + }, + { + "epoch": 1.36, + "learning_rate": 0.0006658016917890988, + "loss": 0.6777, + "step": 32150 + }, + { + "epoch": 1.36, + "learning_rate": 0.0006657592910598063, + "loss": 0.6263, + "step": 32160 + }, + { + "epoch": 1.36, + "learning_rate": 0.0006657168903305137, + "loss": 0.5272, + "step": 32170 + }, + { + "epoch": 1.36, + "learning_rate": 0.0006656744896012212, + "loss": 0.6719, + "step": 32180 + }, + { + "epoch": 1.36, + "learning_rate": 0.0006656320888719286, + "loss": 0.5613, + "step": 32190 + }, + { + "epoch": 1.36, + "learning_rate": 0.000665589688142636, + "loss": 0.6035, + "step": 32200 + }, + { + "epoch": 1.36, + "learning_rate": 0.0006655472874133436, + "loss": 0.6037, + "step": 32210 + }, + { + "epoch": 1.36, + "learning_rate": 0.000665504886684051, + "loss": 0.6595, + "step": 32220 + }, + { + "epoch": 1.36, + "learning_rate": 0.0006654624859547584, + "loss": 0.6295, + "step": 32230 + }, + { + "epoch": 1.36, + "learning_rate": 0.0006654200852254659, + "loss": 0.6422, + "step": 32240 + }, + { + "epoch": 1.36, + "learning_rate": 0.0006653776844961734, + "loss": 0.5268, + "step": 32250 + }, + { + "epoch": 1.36, + "learning_rate": 0.0006653352837668808, + "loss": 0.5394, + "step": 32260 + }, + { + "epoch": 1.36, + "learning_rate": 0.0006652928830375883, + "loss": 0.5376, + "step": 32270 + }, + { + "epoch": 1.37, + "learning_rate": 0.0006652504823082957, + "loss": 0.6102, + "step": 32280 + }, + { + "epoch": 1.37, + "learning_rate": 0.0006652080815790032, + "loss": 0.6374, + "step": 32290 + }, + { + "epoch": 1.37, + "learning_rate": 0.0006651656808497106, + "loss": 0.5256, + "step": 32300 + }, + { + "epoch": 1.37, + "learning_rate": 0.0006651232801204181, + "loss": 0.5949, + "step": 32310 + }, + { + "epoch": 1.37, + "learning_rate": 0.0006650808793911255, + "loss": 0.6553, + "step": 32320 + }, + { + "epoch": 1.37, + "learning_rate": 0.000665038478661833, + "loss": 0.4782, + "step": 32330 + }, + { + "epoch": 1.37, + "learning_rate": 0.0006649960779325405, + "loss": 0.5803, + "step": 32340 + }, + { + "epoch": 1.37, + "learning_rate": 0.0006649536772032479, + "loss": 0.532, + "step": 32350 + }, + { + "epoch": 1.37, + "learning_rate": 0.0006649112764739554, + "loss": 0.6171, + "step": 32360 + }, + { + "epoch": 1.37, + "learning_rate": 0.0006648688757446629, + "loss": 0.6078, + "step": 32370 + }, + { + "epoch": 1.37, + "learning_rate": 0.0006648264750153703, + "loss": 0.6035, + "step": 32380 + }, + { + "epoch": 1.37, + "learning_rate": 0.0006647840742860777, + "loss": 0.5346, + "step": 32390 + }, + { + "epoch": 1.37, + "learning_rate": 0.0006647416735567852, + "loss": 0.6346, + "step": 32400 + }, + { + "epoch": 1.37, + "learning_rate": 0.0006646992728274927, + "loss": 0.5507, + "step": 32410 + }, + { + "epoch": 1.37, + "learning_rate": 0.0006646568720982001, + "loss": 0.5847, + "step": 32420 + }, + { + "epoch": 1.37, + "learning_rate": 0.0006646144713689076, + "loss": 0.7278, + "step": 32430 + }, + { + "epoch": 1.37, + "learning_rate": 0.000664572070639615, + "loss": 0.6262, + "step": 32440 + }, + { + "epoch": 1.37, + "learning_rate": 0.0006645296699103225, + "loss": 0.5835, + "step": 32450 + }, + { + "epoch": 1.37, + "learning_rate": 0.00066448726918103, + "loss": 0.6199, + "step": 32460 + }, + { + "epoch": 1.37, + "learning_rate": 0.0006644448684517374, + "loss": 0.6838, + "step": 32470 + }, + { + "epoch": 1.37, + "learning_rate": 0.0006644024677224448, + "loss": 0.6497, + "step": 32480 + }, + { + "epoch": 1.37, + "learning_rate": 0.0006643600669931524, + "loss": 0.5752, + "step": 32490 + }, + { + "epoch": 1.37, + "learning_rate": 0.0006643176662638598, + "loss": 0.6802, + "step": 32500 + }, + { + "epoch": 1.37, + "learning_rate": 0.0006642752655345672, + "loss": 0.4919, + "step": 32510 + }, + { + "epoch": 1.38, + "learning_rate": 0.0006642328648052747, + "loss": 0.5996, + "step": 32520 + }, + { + "epoch": 1.38, + "learning_rate": 0.0006641904640759822, + "loss": 0.5516, + "step": 32530 + }, + { + "epoch": 1.38, + "learning_rate": 0.0006641480633466896, + "loss": 0.6427, + "step": 32540 + }, + { + "epoch": 1.38, + "learning_rate": 0.000664105662617397, + "loss": 0.6397, + "step": 32550 + }, + { + "epoch": 1.38, + "learning_rate": 0.0006640632618881045, + "loss": 0.5461, + "step": 32560 + }, + { + "epoch": 1.38, + "learning_rate": 0.000664020861158812, + "loss": 0.6651, + "step": 32570 + }, + { + "epoch": 1.38, + "learning_rate": 0.0006639784604295195, + "loss": 0.5433, + "step": 32580 + }, + { + "epoch": 1.38, + "learning_rate": 0.0006639360597002269, + "loss": 0.5176, + "step": 32590 + }, + { + "epoch": 1.38, + "learning_rate": 0.0006638936589709343, + "loss": 0.6062, + "step": 32600 + }, + { + "epoch": 1.38, + "learning_rate": 0.0006638512582416419, + "loss": 0.5838, + "step": 32610 + }, + { + "epoch": 1.38, + "learning_rate": 0.0006638088575123492, + "loss": 0.624, + "step": 32620 + }, + { + "epoch": 1.38, + "learning_rate": 0.0006637664567830567, + "loss": 0.5744, + "step": 32630 + }, + { + "epoch": 1.38, + "learning_rate": 0.0006637240560537641, + "loss": 0.5272, + "step": 32640 + }, + { + "epoch": 1.38, + "learning_rate": 0.0006636816553244716, + "loss": 0.6506, + "step": 32650 + }, + { + "epoch": 1.38, + "learning_rate": 0.0006636392545951791, + "loss": 0.5812, + "step": 32660 + }, + { + "epoch": 1.38, + "learning_rate": 0.0006635968538658865, + "loss": 0.6021, + "step": 32670 + }, + { + "epoch": 1.38, + "learning_rate": 0.0006635544531365939, + "loss": 0.7001, + "step": 32680 + }, + { + "epoch": 1.38, + "learning_rate": 0.0006635120524073015, + "loss": 0.5969, + "step": 32690 + }, + { + "epoch": 1.38, + "learning_rate": 0.000663469651678009, + "loss": 0.6279, + "step": 32700 + }, + { + "epoch": 1.38, + "learning_rate": 0.0006634272509487163, + "loss": 0.6545, + "step": 32710 + }, + { + "epoch": 1.38, + "learning_rate": 0.0006633848502194238, + "loss": 0.5827, + "step": 32720 + }, + { + "epoch": 1.38, + "learning_rate": 0.0006633424494901312, + "loss": 0.551, + "step": 32730 + }, + { + "epoch": 1.38, + "learning_rate": 0.0006633000487608387, + "loss": 0.4983, + "step": 32740 + }, + { + "epoch": 1.38, + "learning_rate": 0.0006632576480315462, + "loss": 0.6099, + "step": 32750 + }, + { + "epoch": 1.39, + "learning_rate": 0.0006632152473022536, + "loss": 0.5467, + "step": 32760 + }, + { + "epoch": 1.39, + "learning_rate": 0.0006631728465729611, + "loss": 0.5463, + "step": 32770 + }, + { + "epoch": 1.39, + "learning_rate": 0.0006631304458436686, + "loss": 0.5434, + "step": 32780 + }, + { + "epoch": 1.39, + "learning_rate": 0.000663088045114376, + "loss": 0.669, + "step": 32790 + }, + { + "epoch": 1.39, + "learning_rate": 0.0006630456443850834, + "loss": 0.5524, + "step": 32800 + }, + { + "epoch": 1.39, + "learning_rate": 0.0006630032436557909, + "loss": 0.4795, + "step": 32810 + }, + { + "epoch": 1.39, + "learning_rate": 0.0006629608429264984, + "loss": 0.5896, + "step": 32820 + }, + { + "epoch": 1.39, + "learning_rate": 0.0006629184421972058, + "loss": 0.5922, + "step": 32830 + }, + { + "epoch": 1.39, + "learning_rate": 0.0006628760414679133, + "loss": 0.5733, + "step": 32840 + }, + { + "epoch": 1.39, + "learning_rate": 0.0006628336407386207, + "loss": 0.609, + "step": 32850 + }, + { + "epoch": 1.39, + "learning_rate": 0.0006627912400093282, + "loss": 0.6344, + "step": 32860 + }, + { + "epoch": 1.39, + "learning_rate": 0.0006627488392800357, + "loss": 0.6187, + "step": 32870 + }, + { + "epoch": 1.39, + "learning_rate": 0.0006627064385507431, + "loss": 0.5814, + "step": 32880 + }, + { + "epoch": 1.39, + "learning_rate": 0.0006626640378214506, + "loss": 0.6508, + "step": 32890 + }, + { + "epoch": 1.39, + "learning_rate": 0.0006626216370921581, + "loss": 0.5435, + "step": 32900 + }, + { + "epoch": 1.39, + "learning_rate": 0.0006625792363628654, + "loss": 0.5739, + "step": 32910 + }, + { + "epoch": 1.39, + "learning_rate": 0.0006625368356335729, + "loss": 0.6312, + "step": 32920 + }, + { + "epoch": 1.39, + "learning_rate": 0.0006624944349042804, + "loss": 0.6776, + "step": 32930 + }, + { + "epoch": 1.39, + "learning_rate": 0.0006624520341749878, + "loss": 0.5323, + "step": 32940 + }, + { + "epoch": 1.39, + "learning_rate": 0.0006624096334456953, + "loss": 0.6112, + "step": 32950 + }, + { + "epoch": 1.39, + "learning_rate": 0.0006623672327164028, + "loss": 0.6221, + "step": 32960 + }, + { + "epoch": 1.39, + "learning_rate": 0.0006623248319871102, + "loss": 0.4679, + "step": 32970 + }, + { + "epoch": 1.39, + "learning_rate": 0.0006622824312578177, + "loss": 0.6121, + "step": 32980 + }, + { + "epoch": 1.4, + "learning_rate": 0.0006622400305285252, + "loss": 0.5472, + "step": 32990 + }, + { + "epoch": 1.4, + "learning_rate": 0.0006621976297992325, + "loss": 0.5169, + "step": 33000 + }, + { + "epoch": 1.4, + "learning_rate": 0.00066215522906994, + "loss": 0.5561, + "step": 33010 + }, + { + "epoch": 1.4, + "learning_rate": 0.0006621128283406476, + "loss": 0.5663, + "step": 33020 + }, + { + "epoch": 1.4, + "learning_rate": 0.0006620704276113549, + "loss": 0.678, + "step": 33030 + }, + { + "epoch": 1.4, + "learning_rate": 0.0006620280268820624, + "loss": 0.5931, + "step": 33040 + }, + { + "epoch": 1.4, + "learning_rate": 0.0006619856261527699, + "loss": 0.5811, + "step": 33050 + }, + { + "epoch": 1.4, + "learning_rate": 0.0006619432254234773, + "loss": 0.6967, + "step": 33060 + }, + { + "epoch": 1.4, + "learning_rate": 0.0006619008246941848, + "loss": 0.5198, + "step": 33070 + }, + { + "epoch": 1.4, + "learning_rate": 0.0006618584239648923, + "loss": 0.6195, + "step": 33080 + }, + { + "epoch": 1.4, + "learning_rate": 0.0006618160232355996, + "loss": 0.6173, + "step": 33090 + }, + { + "epoch": 1.4, + "learning_rate": 0.0006617736225063072, + "loss": 0.6146, + "step": 33100 + }, + { + "epoch": 1.4, + "learning_rate": 0.0006617312217770147, + "loss": 0.5114, + "step": 33110 + }, + { + "epoch": 1.4, + "learning_rate": 0.000661688821047722, + "loss": 0.6521, + "step": 33120 + }, + { + "epoch": 1.4, + "learning_rate": 0.0006616464203184295, + "loss": 0.4649, + "step": 33130 + }, + { + "epoch": 1.4, + "learning_rate": 0.000661604019589137, + "loss": 0.5698, + "step": 33140 + }, + { + "epoch": 1.4, + "learning_rate": 0.0006615616188598444, + "loss": 0.53, + "step": 33150 + }, + { + "epoch": 1.4, + "learning_rate": 0.0006615192181305519, + "loss": 0.4872, + "step": 33160 + }, + { + "epoch": 1.4, + "learning_rate": 0.0006614768174012593, + "loss": 0.5327, + "step": 33170 + }, + { + "epoch": 1.4, + "learning_rate": 0.0006614344166719668, + "loss": 0.5487, + "step": 33180 + }, + { + "epoch": 1.4, + "learning_rate": 0.0006613920159426743, + "loss": 0.6574, + "step": 33190 + }, + { + "epoch": 1.4, + "learning_rate": 0.0006613496152133817, + "loss": 0.6345, + "step": 33200 + }, + { + "epoch": 1.4, + "learning_rate": 0.0006613072144840891, + "loss": 0.5783, + "step": 33210 + }, + { + "epoch": 1.4, + "learning_rate": 0.0006612648137547967, + "loss": 0.6401, + "step": 33220 + }, + { + "epoch": 1.41, + "learning_rate": 0.000661222413025504, + "loss": 0.5805, + "step": 33230 + }, + { + "epoch": 1.41, + "learning_rate": 0.0006611800122962115, + "loss": 0.5164, + "step": 33240 + }, + { + "epoch": 1.41, + "learning_rate": 0.000661137611566919, + "loss": 0.6184, + "step": 33250 + }, + { + "epoch": 1.41, + "learning_rate": 0.0006610952108376264, + "loss": 0.6018, + "step": 33260 + }, + { + "epoch": 1.41, + "learning_rate": 0.0006610528101083339, + "loss": 0.6152, + "step": 33270 + }, + { + "epoch": 1.41, + "learning_rate": 0.0006610104093790414, + "loss": 0.5837, + "step": 33280 + }, + { + "epoch": 1.41, + "learning_rate": 0.0006609680086497487, + "loss": 0.6302, + "step": 33290 + }, + { + "epoch": 1.41, + "learning_rate": 0.0006609256079204563, + "loss": 0.5672, + "step": 33300 + }, + { + "epoch": 1.41, + "learning_rate": 0.0006608832071911638, + "loss": 0.5776, + "step": 33310 + }, + { + "epoch": 1.41, + "learning_rate": 0.0006608408064618711, + "loss": 0.5595, + "step": 33320 + }, + { + "epoch": 1.41, + "learning_rate": 0.0006607984057325786, + "loss": 0.6399, + "step": 33330 + }, + { + "epoch": 1.41, + "learning_rate": 0.0006607560050032861, + "loss": 0.5944, + "step": 33340 + }, + { + "epoch": 1.41, + "learning_rate": 0.0006607136042739935, + "loss": 0.5115, + "step": 33350 + }, + { + "epoch": 1.41, + "learning_rate": 0.000660671203544701, + "loss": 0.6287, + "step": 33360 + }, + { + "epoch": 1.41, + "learning_rate": 0.0006606288028154085, + "loss": 0.5536, + "step": 33370 + }, + { + "epoch": 1.41, + "learning_rate": 0.0006605864020861159, + "loss": 0.5395, + "step": 33380 + }, + { + "epoch": 1.41, + "learning_rate": 0.0006605440013568234, + "loss": 0.5011, + "step": 33390 + }, + { + "epoch": 1.41, + "learning_rate": 0.0006605016006275309, + "loss": 0.6054, + "step": 33400 + }, + { + "epoch": 1.41, + "learning_rate": 0.0006604591998982382, + "loss": 0.5526, + "step": 33410 + }, + { + "epoch": 1.41, + "learning_rate": 0.0006604167991689458, + "loss": 0.6049, + "step": 33420 + }, + { + "epoch": 1.41, + "learning_rate": 0.0006603743984396533, + "loss": 0.5933, + "step": 33430 + }, + { + "epoch": 1.41, + "learning_rate": 0.0006603319977103606, + "loss": 0.5941, + "step": 33440 + }, + { + "epoch": 1.41, + "learning_rate": 0.0006602895969810681, + "loss": 0.5708, + "step": 33450 + }, + { + "epoch": 1.41, + "learning_rate": 0.0006602471962517756, + "loss": 0.6385, + "step": 33460 + }, + { + "epoch": 1.42, + "learning_rate": 0.000660204795522483, + "loss": 0.5586, + "step": 33470 + }, + { + "epoch": 1.42, + "learning_rate": 0.0006601623947931905, + "loss": 0.5506, + "step": 33480 + }, + { + "epoch": 1.42, + "learning_rate": 0.000660119994063898, + "loss": 0.5724, + "step": 33490 + }, + { + "epoch": 1.42, + "learning_rate": 0.0006600775933346054, + "loss": 0.6068, + "step": 33500 + }, + { + "epoch": 1.42, + "learning_rate": 0.0006600351926053129, + "loss": 0.5173, + "step": 33510 + }, + { + "epoch": 1.42, + "learning_rate": 0.0006599927918760202, + "loss": 0.5249, + "step": 33520 + }, + { + "epoch": 1.42, + "learning_rate": 0.0006599503911467277, + "loss": 0.5404, + "step": 33530 + }, + { + "epoch": 1.42, + "learning_rate": 0.0006599079904174352, + "loss": 0.6615, + "step": 33540 + }, + { + "epoch": 1.42, + "learning_rate": 0.0006598655896881426, + "loss": 0.5503, + "step": 33550 + }, + { + "epoch": 1.42, + "learning_rate": 0.0006598231889588501, + "loss": 0.6833, + "step": 33560 + }, + { + "epoch": 1.42, + "learning_rate": 0.0006597807882295576, + "loss": 0.6727, + "step": 33570 + }, + { + "epoch": 1.42, + "learning_rate": 0.000659738387500265, + "loss": 0.7116, + "step": 33580 + }, + { + "epoch": 1.42, + "learning_rate": 0.0006596959867709725, + "loss": 0.5573, + "step": 33590 + }, + { + "epoch": 1.42, + "learning_rate": 0.00065965358604168, + "loss": 0.5533, + "step": 33600 + }, + { + "epoch": 1.42, + "learning_rate": 0.0006596111853123873, + "loss": 0.4972, + "step": 33610 + }, + { + "epoch": 1.42, + "learning_rate": 0.0006595687845830948, + "loss": 0.5929, + "step": 33620 + }, + { + "epoch": 1.42, + "learning_rate": 0.0006595263838538024, + "loss": 0.6794, + "step": 33630 + }, + { + "epoch": 1.42, + "learning_rate": 0.0006594839831245097, + "loss": 0.5427, + "step": 33640 + }, + { + "epoch": 1.42, + "learning_rate": 0.0006594415823952172, + "loss": 0.5489, + "step": 33650 + }, + { + "epoch": 1.42, + "learning_rate": 0.0006593991816659247, + "loss": 0.5622, + "step": 33660 + }, + { + "epoch": 1.42, + "learning_rate": 0.0006593567809366321, + "loss": 0.6172, + "step": 33670 + }, + { + "epoch": 1.42, + "learning_rate": 0.0006593143802073396, + "loss": 0.6136, + "step": 33680 + }, + { + "epoch": 1.42, + "learning_rate": 0.0006592719794780471, + "loss": 0.6229, + "step": 33690 + }, + { + "epoch": 1.43, + "learning_rate": 0.0006592295787487544, + "loss": 0.489, + "step": 33700 + }, + { + "epoch": 1.43, + "learning_rate": 0.000659187178019462, + "loss": 0.5492, + "step": 33710 + }, + { + "epoch": 1.43, + "learning_rate": 0.0006591447772901695, + "loss": 0.5326, + "step": 33720 + }, + { + "epoch": 1.43, + "learning_rate": 0.0006591023765608768, + "loss": 0.5783, + "step": 33730 + }, + { + "epoch": 1.43, + "learning_rate": 0.0006590599758315843, + "loss": 0.5461, + "step": 33740 + }, + { + "epoch": 1.43, + "learning_rate": 0.0006590175751022919, + "loss": 0.6071, + "step": 33750 + }, + { + "epoch": 1.43, + "learning_rate": 0.0006589751743729992, + "loss": 0.5792, + "step": 33760 + }, + { + "epoch": 1.43, + "learning_rate": 0.0006589327736437067, + "loss": 0.5692, + "step": 33770 + }, + { + "epoch": 1.43, + "learning_rate": 0.0006588903729144142, + "loss": 0.5619, + "step": 33780 + }, + { + "epoch": 1.43, + "learning_rate": 0.0006588479721851216, + "loss": 0.6287, + "step": 33790 + }, + { + "epoch": 1.43, + "learning_rate": 0.0006588055714558291, + "loss": 0.5472, + "step": 33800 + }, + { + "epoch": 1.43, + "learning_rate": 0.0006587631707265366, + "loss": 0.6087, + "step": 33810 + }, + { + "epoch": 1.43, + "learning_rate": 0.0006587207699972439, + "loss": 0.5904, + "step": 33820 + }, + { + "epoch": 1.43, + "learning_rate": 0.0006586783692679515, + "loss": 0.5144, + "step": 33830 + }, + { + "epoch": 1.43, + "learning_rate": 0.0006586359685386589, + "loss": 0.5891, + "step": 33840 + }, + { + "epoch": 1.43, + "learning_rate": 0.0006585935678093663, + "loss": 0.5351, + "step": 33850 + }, + { + "epoch": 1.43, + "learning_rate": 0.0006585511670800738, + "loss": 0.5895, + "step": 33860 + }, + { + "epoch": 1.43, + "learning_rate": 0.0006585087663507813, + "loss": 0.6109, + "step": 33870 + }, + { + "epoch": 1.43, + "learning_rate": 0.0006584663656214887, + "loss": 0.5469, + "step": 33880 + }, + { + "epoch": 1.43, + "learning_rate": 0.0006584239648921962, + "loss": 0.6339, + "step": 33890 + }, + { + "epoch": 1.43, + "learning_rate": 0.0006583815641629036, + "loss": 0.6026, + "step": 33900 + }, + { + "epoch": 1.43, + "learning_rate": 0.0006583391634336111, + "loss": 0.5529, + "step": 33910 + }, + { + "epoch": 1.43, + "learning_rate": 0.0006582967627043186, + "loss": 0.6255, + "step": 33920 + }, + { + "epoch": 1.43, + "learning_rate": 0.000658254361975026, + "loss": 0.5655, + "step": 33930 + }, + { + "epoch": 1.44, + "learning_rate": 0.0006582119612457334, + "loss": 0.5899, + "step": 33940 + }, + { + "epoch": 1.44, + "learning_rate": 0.000658169560516441, + "loss": 0.5772, + "step": 33950 + }, + { + "epoch": 1.44, + "learning_rate": 0.0006581271597871484, + "loss": 0.6523, + "step": 33960 + }, + { + "epoch": 1.44, + "learning_rate": 0.0006580847590578558, + "loss": 0.5286, + "step": 33970 + }, + { + "epoch": 1.44, + "learning_rate": 0.0006580423583285633, + "loss": 0.579, + "step": 33980 + }, + { + "epoch": 1.44, + "learning_rate": 0.0006579999575992708, + "loss": 0.5713, + "step": 33990 + }, + { + "epoch": 1.44, + "learning_rate": 0.0006579575568699782, + "loss": 0.6089, + "step": 34000 + }, + { + "epoch": 1.44, + "learning_rate": 0.0006579151561406857, + "loss": 0.6204, + "step": 34010 + }, + { + "epoch": 1.44, + "learning_rate": 0.000657872755411393, + "loss": 0.5458, + "step": 34020 + }, + { + "epoch": 1.44, + "learning_rate": 0.0006578303546821006, + "loss": 0.591, + "step": 34030 + }, + { + "epoch": 1.44, + "learning_rate": 0.0006577879539528081, + "loss": 0.5084, + "step": 34040 + }, + { + "epoch": 1.44, + "learning_rate": 0.0006577455532235154, + "loss": 0.5959, + "step": 34050 + }, + { + "epoch": 1.44, + "learning_rate": 0.0006577031524942229, + "loss": 0.7334, + "step": 34060 + }, + { + "epoch": 1.44, + "learning_rate": 0.0006576607517649304, + "loss": 0.6088, + "step": 34070 + }, + { + "epoch": 1.44, + "learning_rate": 0.0006576183510356378, + "loss": 0.5507, + "step": 34080 + }, + { + "epoch": 1.44, + "learning_rate": 0.0006575759503063453, + "loss": 0.6238, + "step": 34090 + }, + { + "epoch": 1.44, + "learning_rate": 0.0006575335495770528, + "loss": 0.5001, + "step": 34100 + }, + { + "epoch": 1.44, + "learning_rate": 0.0006574911488477602, + "loss": 0.5522, + "step": 34110 + }, + { + "epoch": 1.44, + "learning_rate": 0.0006574487481184677, + "loss": 0.5583, + "step": 34120 + }, + { + "epoch": 1.44, + "learning_rate": 0.0006574063473891751, + "loss": 0.5908, + "step": 34130 + }, + { + "epoch": 1.44, + "learning_rate": 0.0006573639466598825, + "loss": 0.592, + "step": 34140 + }, + { + "epoch": 1.44, + "learning_rate": 0.00065732154593059, + "loss": 0.5751, + "step": 34150 + }, + { + "epoch": 1.44, + "learning_rate": 0.0006572791452012975, + "loss": 0.5885, + "step": 34160 + }, + { + "epoch": 1.45, + "learning_rate": 0.0006572367444720049, + "loss": 0.4917, + "step": 34170 + }, + { + "epoch": 1.45, + "learning_rate": 0.0006571943437427124, + "loss": 0.5177, + "step": 34180 + }, + { + "epoch": 1.45, + "learning_rate": 0.0006571519430134199, + "loss": 0.5418, + "step": 34190 + }, + { + "epoch": 1.45, + "learning_rate": 0.0006571095422841273, + "loss": 0.6658, + "step": 34200 + }, + { + "epoch": 1.45, + "learning_rate": 0.0006570671415548348, + "loss": 0.5959, + "step": 34210 + }, + { + "epoch": 1.45, + "learning_rate": 0.0006570247408255422, + "loss": 0.5428, + "step": 34220 + }, + { + "epoch": 1.45, + "learning_rate": 0.0006569823400962496, + "loss": 0.5095, + "step": 34230 + }, + { + "epoch": 1.45, + "learning_rate": 0.0006569399393669572, + "loss": 0.4962, + "step": 34240 + }, + { + "epoch": 1.45, + "learning_rate": 0.0006568975386376646, + "loss": 0.5608, + "step": 34250 + }, + { + "epoch": 1.45, + "learning_rate": 0.000656855137908372, + "loss": 0.5392, + "step": 34260 + }, + { + "epoch": 1.45, + "learning_rate": 0.0006568127371790795, + "loss": 0.6249, + "step": 34270 + }, + { + "epoch": 1.45, + "learning_rate": 0.000656770336449787, + "loss": 0.5231, + "step": 34280 + }, + { + "epoch": 1.45, + "learning_rate": 0.0006567279357204944, + "loss": 0.6333, + "step": 34290 + }, + { + "epoch": 1.45, + "learning_rate": 0.0006566855349912019, + "loss": 0.595, + "step": 34300 + }, + { + "epoch": 1.45, + "learning_rate": 0.0006566431342619093, + "loss": 0.5062, + "step": 34310 + }, + { + "epoch": 1.45, + "learning_rate": 0.0006566007335326168, + "loss": 0.5551, + "step": 34320 + }, + { + "epoch": 1.45, + "learning_rate": 0.0006565583328033243, + "loss": 0.6041, + "step": 34330 + }, + { + "epoch": 1.45, + "learning_rate": 0.0006565159320740317, + "loss": 0.6024, + "step": 34340 + }, + { + "epoch": 1.45, + "learning_rate": 0.0006564735313447391, + "loss": 0.5634, + "step": 34350 + }, + { + "epoch": 1.45, + "learning_rate": 0.0006564311306154467, + "loss": 0.5655, + "step": 34360 + }, + { + "epoch": 1.45, + "learning_rate": 0.0006563887298861541, + "loss": 0.5576, + "step": 34370 + }, + { + "epoch": 1.45, + "learning_rate": 0.0006563463291568615, + "loss": 0.6054, + "step": 34380 + }, + { + "epoch": 1.45, + "learning_rate": 0.000656303928427569, + "loss": 0.5784, + "step": 34390 + }, + { + "epoch": 1.45, + "learning_rate": 0.0006562615276982765, + "loss": 0.6068, + "step": 34400 + }, + { + "epoch": 1.46, + "learning_rate": 0.0006562191269689839, + "loss": 0.6692, + "step": 34410 + }, + { + "epoch": 1.46, + "learning_rate": 0.0006561767262396914, + "loss": 0.5653, + "step": 34420 + }, + { + "epoch": 1.46, + "learning_rate": 0.0006561343255103987, + "loss": 0.6411, + "step": 34430 + }, + { + "epoch": 1.46, + "learning_rate": 0.0006560919247811063, + "loss": 0.6501, + "step": 34440 + }, + { + "epoch": 1.46, + "learning_rate": 0.0006560495240518137, + "loss": 0.5875, + "step": 34450 + }, + { + "epoch": 1.46, + "learning_rate": 0.0006560071233225211, + "loss": 0.5994, + "step": 34460 + }, + { + "epoch": 1.46, + "learning_rate": 0.0006559647225932286, + "loss": 0.606, + "step": 34470 + }, + { + "epoch": 1.46, + "learning_rate": 0.0006559223218639361, + "loss": 0.593, + "step": 34480 + }, + { + "epoch": 1.46, + "learning_rate": 0.0006558799211346436, + "loss": 0.5689, + "step": 34490 + }, + { + "epoch": 1.46, + "learning_rate": 0.000655837520405351, + "loss": 0.4565, + "step": 34500 + }, + { + "epoch": 1.46, + "learning_rate": 0.0006557951196760584, + "loss": 0.6361, + "step": 34510 + }, + { + "epoch": 1.46, + "learning_rate": 0.000655752718946766, + "loss": 0.5607, + "step": 34520 + }, + { + "epoch": 1.46, + "learning_rate": 0.0006557103182174734, + "loss": 0.5451, + "step": 34530 + }, + { + "epoch": 1.46, + "learning_rate": 0.0006556679174881808, + "loss": 0.6119, + "step": 34540 + }, + { + "epoch": 1.46, + "learning_rate": 0.0006556255167588882, + "loss": 0.5535, + "step": 34550 + }, + { + "epoch": 1.46, + "learning_rate": 0.0006555831160295958, + "loss": 0.582, + "step": 34560 + }, + { + "epoch": 1.46, + "learning_rate": 0.0006555407153003032, + "loss": 0.5903, + "step": 34570 + }, + { + "epoch": 1.46, + "learning_rate": 0.0006554983145710106, + "loss": 0.699, + "step": 34580 + }, + { + "epoch": 1.46, + "learning_rate": 0.0006554559138417181, + "loss": 0.5786, + "step": 34590 + }, + { + "epoch": 1.46, + "learning_rate": 0.0006554135131124256, + "loss": 0.5899, + "step": 34600 + }, + { + "epoch": 1.46, + "learning_rate": 0.000655371112383133, + "loss": 0.5541, + "step": 34610 + }, + { + "epoch": 1.46, + "learning_rate": 0.0006553287116538405, + "loss": 0.5996, + "step": 34620 + }, + { + "epoch": 1.46, + "learning_rate": 0.0006552863109245479, + "loss": 0.5115, + "step": 34630 + }, + { + "epoch": 1.46, + "learning_rate": 0.0006552439101952554, + "loss": 0.6655, + "step": 34640 + }, + { + "epoch": 1.47, + "learning_rate": 0.0006552015094659629, + "loss": 0.6737, + "step": 34650 + }, + { + "epoch": 1.47, + "learning_rate": 0.0006551591087366703, + "loss": 0.6093, + "step": 34660 + }, + { + "epoch": 1.47, + "learning_rate": 0.0006551167080073777, + "loss": 0.5247, + "step": 34670 + }, + { + "epoch": 1.47, + "learning_rate": 0.0006550743072780852, + "loss": 0.5155, + "step": 34680 + }, + { + "epoch": 1.47, + "learning_rate": 0.0006550319065487927, + "loss": 0.575, + "step": 34690 + }, + { + "epoch": 1.47, + "learning_rate": 0.0006549895058195001, + "loss": 0.5058, + "step": 34700 + }, + { + "epoch": 1.47, + "learning_rate": 0.0006549471050902076, + "loss": 0.5735, + "step": 34710 + }, + { + "epoch": 1.47, + "learning_rate": 0.0006549047043609151, + "loss": 0.5428, + "step": 34720 + }, + { + "epoch": 1.47, + "learning_rate": 0.0006548623036316225, + "loss": 0.4919, + "step": 34730 + }, + { + "epoch": 1.47, + "learning_rate": 0.00065481990290233, + "loss": 0.5371, + "step": 34740 + }, + { + "epoch": 1.47, + "learning_rate": 0.0006547775021730374, + "loss": 0.6579, + "step": 34750 + }, + { + "epoch": 1.47, + "learning_rate": 0.0006547351014437448, + "loss": 0.6301, + "step": 34760 + }, + { + "epoch": 1.47, + "learning_rate": 0.0006546927007144523, + "loss": 0.6454, + "step": 34770 + }, + { + "epoch": 1.47, + "learning_rate": 0.0006546502999851598, + "loss": 0.6279, + "step": 34780 + }, + { + "epoch": 1.47, + "learning_rate": 0.0006546078992558672, + "loss": 0.5818, + "step": 34790 + }, + { + "epoch": 1.47, + "learning_rate": 0.0006545654985265747, + "loss": 0.6159, + "step": 34800 + }, + { + "epoch": 1.47, + "learning_rate": 0.0006545230977972822, + "loss": 0.5559, + "step": 34810 + }, + { + "epoch": 1.47, + "learning_rate": 0.0006544806970679896, + "loss": 0.5666, + "step": 34820 + }, + { + "epoch": 1.47, + "learning_rate": 0.000654438296338697, + "loss": 0.531, + "step": 34830 + }, + { + "epoch": 1.47, + "learning_rate": 0.0006543958956094045, + "loss": 0.5311, + "step": 34840 + }, + { + "epoch": 1.47, + "learning_rate": 0.000654353494880112, + "loss": 0.6246, + "step": 34850 + }, + { + "epoch": 1.47, + "learning_rate": 0.0006543110941508194, + "loss": 0.5468, + "step": 34860 + }, + { + "epoch": 1.47, + "learning_rate": 0.0006542686934215269, + "loss": 0.5602, + "step": 34870 + }, + { + "epoch": 1.48, + "learning_rate": 0.0006542262926922343, + "loss": 0.6101, + "step": 34880 + }, + { + "epoch": 1.48, + "learning_rate": 0.0006541838919629418, + "loss": 0.6997, + "step": 34890 + }, + { + "epoch": 1.48, + "learning_rate": 0.0006541414912336493, + "loss": 0.5446, + "step": 34900 + }, + { + "epoch": 1.48, + "learning_rate": 0.0006540990905043567, + "loss": 0.657, + "step": 34910 + }, + { + "epoch": 1.48, + "learning_rate": 0.0006540566897750642, + "loss": 0.6019, + "step": 34920 + }, + { + "epoch": 1.48, + "learning_rate": 0.0006540142890457717, + "loss": 0.5414, + "step": 34930 + }, + { + "epoch": 1.48, + "learning_rate": 0.0006539718883164791, + "loss": 0.5609, + "step": 34940 + }, + { + "epoch": 1.48, + "learning_rate": 0.0006539294875871865, + "loss": 0.5155, + "step": 34950 + }, + { + "epoch": 1.48, + "learning_rate": 0.000653887086857894, + "loss": 0.645, + "step": 34960 + }, + { + "epoch": 1.48, + "learning_rate": 0.0006538446861286015, + "loss": 0.6204, + "step": 34970 + }, + { + "epoch": 1.48, + "learning_rate": 0.0006538022853993089, + "loss": 0.6152, + "step": 34980 + }, + { + "epoch": 1.48, + "learning_rate": 0.0006537598846700163, + "loss": 0.5709, + "step": 34990 + }, + { + "epoch": 1.48, + "learning_rate": 0.0006537174839407238, + "loss": 0.5639, + "step": 35000 + }, + { + "epoch": 1.48, + "learning_rate": 0.0006536750832114313, + "loss": 0.6408, + "step": 35010 + }, + { + "epoch": 1.48, + "learning_rate": 0.0006536326824821387, + "loss": 0.6894, + "step": 35020 + }, + { + "epoch": 1.48, + "learning_rate": 0.0006535902817528462, + "loss": 0.6108, + "step": 35030 + }, + { + "epoch": 1.48, + "learning_rate": 0.0006535478810235536, + "loss": 0.5345, + "step": 35040 + }, + { + "epoch": 1.48, + "learning_rate": 0.0006535054802942611, + "loss": 0.5109, + "step": 35050 + }, + { + "epoch": 1.48, + "learning_rate": 0.0006534630795649685, + "loss": 0.6259, + "step": 35060 + }, + { + "epoch": 1.48, + "learning_rate": 0.000653420678835676, + "loss": 0.5285, + "step": 35070 + }, + { + "epoch": 1.48, + "learning_rate": 0.0006533782781063834, + "loss": 0.6326, + "step": 35080 + }, + { + "epoch": 1.48, + "learning_rate": 0.0006533358773770909, + "loss": 0.5406, + "step": 35090 + }, + { + "epoch": 1.48, + "learning_rate": 0.0006532934766477984, + "loss": 0.5619, + "step": 35100 + }, + { + "epoch": 1.48, + "learning_rate": 0.0006532510759185058, + "loss": 0.6635, + "step": 35110 + }, + { + "epoch": 1.49, + "learning_rate": 0.0006532086751892132, + "loss": 0.5535, + "step": 35120 + }, + { + "epoch": 1.49, + "learning_rate": 0.0006531662744599208, + "loss": 0.5916, + "step": 35130 + }, + { + "epoch": 1.49, + "learning_rate": 0.0006531238737306282, + "loss": 0.5474, + "step": 35140 + }, + { + "epoch": 1.49, + "learning_rate": 0.0006530814730013356, + "loss": 0.5703, + "step": 35150 + }, + { + "epoch": 1.49, + "learning_rate": 0.0006530390722720431, + "loss": 0.6214, + "step": 35160 + }, + { + "epoch": 1.49, + "learning_rate": 0.0006529966715427506, + "loss": 0.5701, + "step": 35170 + }, + { + "epoch": 1.49, + "learning_rate": 0.000652954270813458, + "loss": 0.5929, + "step": 35180 + }, + { + "epoch": 1.49, + "learning_rate": 0.0006529118700841655, + "loss": 0.5987, + "step": 35190 + }, + { + "epoch": 1.49, + "learning_rate": 0.0006528694693548729, + "loss": 0.5978, + "step": 35200 + }, + { + "epoch": 1.49, + "learning_rate": 0.0006528270686255804, + "loss": 0.4837, + "step": 35210 + }, + { + "epoch": 1.49, + "learning_rate": 0.0006527846678962879, + "loss": 0.4946, + "step": 35220 + }, + { + "epoch": 1.49, + "learning_rate": 0.0006527422671669953, + "loss": 0.5093, + "step": 35230 + }, + { + "epoch": 1.49, + "learning_rate": 0.0006526998664377027, + "loss": 0.6668, + "step": 35240 + }, + { + "epoch": 1.49, + "learning_rate": 0.0006526574657084103, + "loss": 0.6552, + "step": 35250 + }, + { + "epoch": 1.49, + "learning_rate": 0.0006526150649791177, + "loss": 0.5461, + "step": 35260 + }, + { + "epoch": 1.49, + "learning_rate": 0.0006525726642498251, + "loss": 0.6849, + "step": 35270 + }, + { + "epoch": 1.49, + "learning_rate": 0.0006525302635205326, + "loss": 0.6009, + "step": 35280 + }, + { + "epoch": 1.49, + "learning_rate": 0.00065248786279124, + "loss": 0.6449, + "step": 35290 + }, + { + "epoch": 1.49, + "learning_rate": 0.0006524454620619475, + "loss": 0.6361, + "step": 35300 + }, + { + "epoch": 1.49, + "learning_rate": 0.000652403061332655, + "loss": 0.4585, + "step": 35310 + }, + { + "epoch": 1.49, + "learning_rate": 0.0006523606606033624, + "loss": 0.5605, + "step": 35320 + }, + { + "epoch": 1.49, + "learning_rate": 0.0006523182598740699, + "loss": 0.7019, + "step": 35330 + }, + { + "epoch": 1.49, + "learning_rate": 0.0006522758591447774, + "loss": 0.6024, + "step": 35340 + }, + { + "epoch": 1.49, + "learning_rate": 0.0006522334584154848, + "loss": 0.5308, + "step": 35350 + }, + { + "epoch": 1.5, + "learning_rate": 0.0006521910576861922, + "loss": 0.5911, + "step": 35360 + }, + { + "epoch": 1.5, + "learning_rate": 0.0006521486569568998, + "loss": 0.6132, + "step": 35370 + }, + { + "epoch": 1.5, + "learning_rate": 0.0006521062562276071, + "loss": 0.5152, + "step": 35380 + }, + { + "epoch": 1.5, + "learning_rate": 0.0006520638554983146, + "loss": 0.5995, + "step": 35390 + }, + { + "epoch": 1.5, + "learning_rate": 0.000652021454769022, + "loss": 0.6342, + "step": 35400 + }, + { + "epoch": 1.5, + "learning_rate": 0.0006519790540397295, + "loss": 0.6711, + "step": 35410 + }, + { + "epoch": 1.5, + "learning_rate": 0.000651936653310437, + "loss": 0.7022, + "step": 35420 + }, + { + "epoch": 1.5, + "learning_rate": 0.0006518942525811445, + "loss": 0.5101, + "step": 35430 + }, + { + "epoch": 1.5, + "learning_rate": 0.0006518518518518518, + "loss": 0.4465, + "step": 35440 + }, + { + "epoch": 1.5, + "learning_rate": 0.0006518094511225594, + "loss": 0.6372, + "step": 35450 + }, + { + "epoch": 1.5, + "learning_rate": 0.0006517670503932669, + "loss": 0.5903, + "step": 35460 + }, + { + "epoch": 1.5, + "learning_rate": 0.0006517246496639742, + "loss": 0.5248, + "step": 35470 + }, + { + "epoch": 1.5, + "learning_rate": 0.0006516822489346817, + "loss": 0.6212, + "step": 35480 + }, + { + "epoch": 1.5, + "learning_rate": 0.0006516398482053891, + "loss": 0.5091, + "step": 35490 + }, + { + "epoch": 1.5, + "learning_rate": 0.0006515974474760966, + "loss": 0.5802, + "step": 35500 + }, + { + "epoch": 1.5, + "learning_rate": 0.0006515550467468041, + "loss": 0.5759, + "step": 35510 + }, + { + "epoch": 1.5, + "learning_rate": 0.0006515126460175115, + "loss": 0.6052, + "step": 35520 + }, + { + "epoch": 1.5, + "learning_rate": 0.000651470245288219, + "loss": 0.5493, + "step": 35530 + }, + { + "epoch": 1.5, + "learning_rate": 0.0006514278445589265, + "loss": 0.6218, + "step": 35540 + }, + { + "epoch": 1.5, + "learning_rate": 0.0006513854438296339, + "loss": 0.6072, + "step": 35550 + }, + { + "epoch": 1.5, + "learning_rate": 0.0006513430431003413, + "loss": 0.5502, + "step": 35560 + }, + { + "epoch": 1.5, + "learning_rate": 0.0006513006423710488, + "loss": 0.5589, + "step": 35570 + }, + { + "epoch": 1.5, + "learning_rate": 0.0006512582416417563, + "loss": 0.6258, + "step": 35580 + }, + { + "epoch": 1.51, + "learning_rate": 0.0006512158409124637, + "loss": 0.6379, + "step": 35590 + }, + { + "epoch": 1.51, + "learning_rate": 0.0006511734401831712, + "loss": 0.5588, + "step": 35600 + }, + { + "epoch": 1.51, + "learning_rate": 0.0006511310394538786, + "loss": 0.5298, + "step": 35610 + }, + { + "epoch": 1.51, + "learning_rate": 0.0006510886387245861, + "loss": 0.667, + "step": 35620 + }, + { + "epoch": 1.51, + "learning_rate": 0.0006510462379952936, + "loss": 0.5768, + "step": 35630 + }, + { + "epoch": 1.51, + "learning_rate": 0.000651003837266001, + "loss": 0.6077, + "step": 35640 + }, + { + "epoch": 1.51, + "learning_rate": 0.0006509614365367084, + "loss": 0.55, + "step": 35650 + }, + { + "epoch": 1.51, + "learning_rate": 0.000650919035807416, + "loss": 0.6988, + "step": 35660 + }, + { + "epoch": 1.51, + "learning_rate": 0.0006508766350781233, + "loss": 0.5932, + "step": 35670 + }, + { + "epoch": 1.51, + "learning_rate": 0.0006508342343488308, + "loss": 0.6253, + "step": 35680 + }, + { + "epoch": 1.51, + "learning_rate": 0.0006507918336195383, + "loss": 0.5712, + "step": 35690 + }, + { + "epoch": 1.51, + "learning_rate": 0.0006507494328902457, + "loss": 0.6432, + "step": 35700 + }, + { + "epoch": 1.51, + "learning_rate": 0.0006507070321609532, + "loss": 0.7242, + "step": 35710 + }, + { + "epoch": 1.51, + "learning_rate": 0.0006506646314316607, + "loss": 0.628, + "step": 35720 + }, + { + "epoch": 1.51, + "learning_rate": 0.000650622230702368, + "loss": 0.5913, + "step": 35730 + }, + { + "epoch": 1.51, + "learning_rate": 0.0006505798299730756, + "loss": 0.4931, + "step": 35740 + }, + { + "epoch": 1.51, + "learning_rate": 0.0006505374292437831, + "loss": 0.6097, + "step": 35750 + }, + { + "epoch": 1.51, + "learning_rate": 0.0006504950285144904, + "loss": 0.5587, + "step": 35760 + }, + { + "epoch": 1.51, + "learning_rate": 0.0006504526277851979, + "loss": 0.5166, + "step": 35770 + }, + { + "epoch": 1.51, + "learning_rate": 0.0006504102270559055, + "loss": 0.4948, + "step": 35780 + }, + { + "epoch": 1.51, + "learning_rate": 0.0006503678263266128, + "loss": 0.5993, + "step": 35790 + }, + { + "epoch": 1.51, + "learning_rate": 0.0006503254255973203, + "loss": 0.5252, + "step": 35800 + }, + { + "epoch": 1.51, + "learning_rate": 0.0006502830248680278, + "loss": 0.6534, + "step": 35810 + }, + { + "epoch": 1.51, + "learning_rate": 0.0006502406241387352, + "loss": 0.5852, + "step": 35820 + }, + { + "epoch": 1.52, + "learning_rate": 0.0006501982234094427, + "loss": 0.631, + "step": 35830 + }, + { + "epoch": 1.52, + "learning_rate": 0.0006501558226801502, + "loss": 0.5765, + "step": 35840 + }, + { + "epoch": 1.52, + "learning_rate": 0.0006501134219508575, + "loss": 0.6836, + "step": 35850 + }, + { + "epoch": 1.52, + "learning_rate": 0.0006500710212215651, + "loss": 0.5529, + "step": 35860 + }, + { + "epoch": 1.52, + "learning_rate": 0.0006500286204922726, + "loss": 0.6735, + "step": 35870 + }, + { + "epoch": 1.52, + "learning_rate": 0.0006499862197629799, + "loss": 0.5714, + "step": 35880 + }, + { + "epoch": 1.52, + "learning_rate": 0.0006499438190336874, + "loss": 0.562, + "step": 35890 + }, + { + "epoch": 1.52, + "learning_rate": 0.000649901418304395, + "loss": 0.4197, + "step": 35900 + }, + { + "epoch": 1.52, + "learning_rate": 0.0006498590175751023, + "loss": 0.5941, + "step": 35910 + }, + { + "epoch": 1.52, + "learning_rate": 0.0006498166168458098, + "loss": 0.6129, + "step": 35920 + }, + { + "epoch": 1.52, + "learning_rate": 0.0006497742161165172, + "loss": 0.6396, + "step": 35930 + }, + { + "epoch": 1.52, + "learning_rate": 0.0006497318153872247, + "loss": 0.6277, + "step": 35940 + }, + { + "epoch": 1.52, + "learning_rate": 0.0006496894146579322, + "loss": 0.5461, + "step": 35950 + }, + { + "epoch": 1.52, + "learning_rate": 0.0006496470139286397, + "loss": 0.5818, + "step": 35960 + }, + { + "epoch": 1.52, + "learning_rate": 0.000649604613199347, + "loss": 0.5532, + "step": 35970 + }, + { + "epoch": 1.52, + "learning_rate": 0.0006495622124700546, + "loss": 0.5413, + "step": 35980 + }, + { + "epoch": 1.52, + "learning_rate": 0.0006495198117407619, + "loss": 0.5896, + "step": 35990 + }, + { + "epoch": 1.52, + "learning_rate": 0.0006494774110114694, + "loss": 0.6515, + "step": 36000 + }, + { + "epoch": 1.52, + "learning_rate": 0.0006494350102821769, + "loss": 0.547, + "step": 36010 + }, + { + "epoch": 1.52, + "learning_rate": 0.0006493926095528843, + "loss": 0.549, + "step": 36020 + }, + { + "epoch": 1.52, + "learning_rate": 0.0006493502088235918, + "loss": 0.5615, + "step": 36030 + }, + { + "epoch": 1.52, + "learning_rate": 0.0006493078080942993, + "loss": 0.5395, + "step": 36040 + }, + { + "epoch": 1.52, + "learning_rate": 0.0006492654073650066, + "loss": 0.7291, + "step": 36050 + }, + { + "epoch": 1.52, + "learning_rate": 0.0006492230066357142, + "loss": 0.6571, + "step": 36060 + }, + { + "epoch": 1.53, + "learning_rate": 0.0006491806059064217, + "loss": 0.64, + "step": 36070 + }, + { + "epoch": 1.53, + "learning_rate": 0.000649138205177129, + "loss": 0.5687, + "step": 36080 + }, + { + "epoch": 1.53, + "learning_rate": 0.0006490958044478365, + "loss": 0.5841, + "step": 36090 + }, + { + "epoch": 1.53, + "learning_rate": 0.000649053403718544, + "loss": 0.6434, + "step": 36100 + }, + { + "epoch": 1.53, + "learning_rate": 0.0006490110029892514, + "loss": 0.6252, + "step": 36110 + }, + { + "epoch": 1.53, + "learning_rate": 0.0006489686022599589, + "loss": 0.5223, + "step": 36120 + }, + { + "epoch": 1.53, + "learning_rate": 0.0006489262015306664, + "loss": 0.6125, + "step": 36130 + }, + { + "epoch": 1.53, + "learning_rate": 0.0006488838008013738, + "loss": 0.5846, + "step": 36140 + }, + { + "epoch": 1.53, + "learning_rate": 0.0006488414000720813, + "loss": 0.561, + "step": 36150 + }, + { + "epoch": 1.53, + "learning_rate": 0.0006487989993427888, + "loss": 0.5889, + "step": 36160 + }, + { + "epoch": 1.53, + "learning_rate": 0.0006487565986134961, + "loss": 0.5742, + "step": 36170 + }, + { + "epoch": 1.53, + "learning_rate": 0.0006487141978842036, + "loss": 0.642, + "step": 36180 + }, + { + "epoch": 1.53, + "learning_rate": 0.0006486717971549112, + "loss": 0.5811, + "step": 36190 + }, + { + "epoch": 1.53, + "learning_rate": 0.0006486293964256185, + "loss": 0.6441, + "step": 36200 + }, + { + "epoch": 1.53, + "learning_rate": 0.000648586995696326, + "loss": 0.6345, + "step": 36210 + }, + { + "epoch": 1.53, + "learning_rate": 0.0006485445949670335, + "loss": 0.6463, + "step": 36220 + }, + { + "epoch": 1.53, + "learning_rate": 0.0006485021942377409, + "loss": 0.5278, + "step": 36230 + }, + { + "epoch": 1.53, + "learning_rate": 0.0006484597935084484, + "loss": 0.5412, + "step": 36240 + }, + { + "epoch": 1.53, + "learning_rate": 0.0006484173927791559, + "loss": 0.5616, + "step": 36250 + }, + { + "epoch": 1.53, + "learning_rate": 0.0006483749920498632, + "loss": 0.5041, + "step": 36260 + }, + { + "epoch": 1.53, + "learning_rate": 0.0006483325913205708, + "loss": 0.5646, + "step": 36270 + }, + { + "epoch": 1.53, + "learning_rate": 0.0006482901905912782, + "loss": 0.593, + "step": 36280 + }, + { + "epoch": 1.53, + "learning_rate": 0.0006482477898619856, + "loss": 0.608, + "step": 36290 + }, + { + "epoch": 1.54, + "learning_rate": 0.0006482053891326931, + "loss": 0.5979, + "step": 36300 + }, + { + "epoch": 1.54, + "learning_rate": 0.0006481629884034006, + "loss": 0.6315, + "step": 36310 + }, + { + "epoch": 1.54, + "learning_rate": 0.000648120587674108, + "loss": 0.5372, + "step": 36320 + }, + { + "epoch": 1.54, + "learning_rate": 0.0006480781869448155, + "loss": 0.5394, + "step": 36330 + }, + { + "epoch": 1.54, + "learning_rate": 0.0006480357862155228, + "loss": 0.6276, + "step": 36340 + }, + { + "epoch": 1.54, + "learning_rate": 0.0006479933854862304, + "loss": 0.5604, + "step": 36350 + }, + { + "epoch": 1.54, + "learning_rate": 0.0006479509847569379, + "loss": 0.7263, + "step": 36360 + }, + { + "epoch": 1.54, + "learning_rate": 0.0006479085840276452, + "loss": 0.6035, + "step": 36370 + }, + { + "epoch": 1.54, + "learning_rate": 0.0006478661832983527, + "loss": 0.5122, + "step": 36380 + }, + { + "epoch": 1.54, + "learning_rate": 0.0006478237825690603, + "loss": 0.552, + "step": 36390 + }, + { + "epoch": 1.54, + "learning_rate": 0.0006477813818397676, + "loss": 0.6376, + "step": 36400 + }, + { + "epoch": 1.54, + "learning_rate": 0.0006477389811104751, + "loss": 0.5471, + "step": 36410 + }, + { + "epoch": 1.54, + "learning_rate": 0.0006476965803811826, + "loss": 0.5282, + "step": 36420 + }, + { + "epoch": 1.54, + "learning_rate": 0.00064765417965189, + "loss": 0.5356, + "step": 36430 + }, + { + "epoch": 1.54, + "learning_rate": 0.0006476117789225975, + "loss": 0.5602, + "step": 36440 + }, + { + "epoch": 1.54, + "learning_rate": 0.000647569378193305, + "loss": 0.6486, + "step": 36450 + }, + { + "epoch": 1.54, + "learning_rate": 0.0006475269774640123, + "loss": 0.6088, + "step": 36460 + }, + { + "epoch": 1.54, + "learning_rate": 0.0006474845767347199, + "loss": 0.6552, + "step": 36470 + }, + { + "epoch": 1.54, + "learning_rate": 0.0006474421760054274, + "loss": 0.4906, + "step": 36480 + }, + { + "epoch": 1.54, + "learning_rate": 0.0006473997752761347, + "loss": 0.5401, + "step": 36490 + }, + { + "epoch": 1.54, + "learning_rate": 0.0006473573745468422, + "loss": 0.6328, + "step": 36500 + }, + { + "epoch": 1.54, + "learning_rate": 0.0006473149738175498, + "loss": 0.5935, + "step": 36510 + }, + { + "epoch": 1.54, + "learning_rate": 0.0006472725730882571, + "loss": 0.5979, + "step": 36520 + }, + { + "epoch": 1.54, + "learning_rate": 0.0006472301723589646, + "loss": 0.6404, + "step": 36530 + }, + { + "epoch": 1.55, + "learning_rate": 0.0006471877716296721, + "loss": 0.4864, + "step": 36540 + }, + { + "epoch": 1.55, + "learning_rate": 0.0006471453709003795, + "loss": 0.5975, + "step": 36550 + }, + { + "epoch": 1.55, + "learning_rate": 0.000647102970171087, + "loss": 0.5982, + "step": 36560 + }, + { + "epoch": 1.55, + "learning_rate": 0.0006470605694417945, + "loss": 0.6079, + "step": 36570 + }, + { + "epoch": 1.55, + "learning_rate": 0.0006470181687125018, + "loss": 0.5871, + "step": 36580 + }, + { + "epoch": 1.55, + "learning_rate": 0.0006469757679832094, + "loss": 0.6382, + "step": 36590 + }, + { + "epoch": 1.55, + "learning_rate": 0.0006469333672539168, + "loss": 0.6443, + "step": 36600 + }, + { + "epoch": 1.55, + "learning_rate": 0.0006468909665246242, + "loss": 0.5298, + "step": 36610 + }, + { + "epoch": 1.55, + "learning_rate": 0.0006468485657953317, + "loss": 0.5477, + "step": 36620 + }, + { + "epoch": 1.55, + "learning_rate": 0.0006468061650660392, + "loss": 0.7045, + "step": 36630 + }, + { + "epoch": 1.55, + "learning_rate": 0.0006467637643367466, + "loss": 0.5301, + "step": 36640 + }, + { + "epoch": 1.55, + "learning_rate": 0.0006467213636074541, + "loss": 0.7244, + "step": 36650 + }, + { + "epoch": 1.55, + "learning_rate": 0.0006466789628781615, + "loss": 0.5855, + "step": 36660 + }, + { + "epoch": 1.55, + "learning_rate": 0.000646636562148869, + "loss": 0.6063, + "step": 36670 + }, + { + "epoch": 1.55, + "learning_rate": 0.0006465941614195765, + "loss": 0.6183, + "step": 36680 + }, + { + "epoch": 1.55, + "learning_rate": 0.0006465517606902839, + "loss": 0.6987, + "step": 36690 + }, + { + "epoch": 1.55, + "learning_rate": 0.0006465093599609913, + "loss": 0.5941, + "step": 36700 + }, + { + "epoch": 1.55, + "learning_rate": 0.0006464669592316988, + "loss": 0.5842, + "step": 36710 + }, + { + "epoch": 1.55, + "learning_rate": 0.0006464245585024063, + "loss": 0.5101, + "step": 36720 + }, + { + "epoch": 1.55, + "learning_rate": 0.0006463821577731137, + "loss": 0.6404, + "step": 36730 + }, + { + "epoch": 1.55, + "learning_rate": 0.0006463397570438212, + "loss": 0.5867, + "step": 36740 + }, + { + "epoch": 1.55, + "learning_rate": 0.0006462973563145287, + "loss": 0.5186, + "step": 36750 + }, + { + "epoch": 1.55, + "learning_rate": 0.0006462549555852361, + "loss": 0.5866, + "step": 36760 + }, + { + "epoch": 1.55, + "learning_rate": 0.0006462125548559436, + "loss": 0.6136, + "step": 36770 + }, + { + "epoch": 1.56, + "learning_rate": 0.000646170154126651, + "loss": 0.5117, + "step": 36780 + }, + { + "epoch": 1.56, + "learning_rate": 0.0006461277533973584, + "loss": 0.6658, + "step": 36790 + }, + { + "epoch": 1.56, + "learning_rate": 0.000646085352668066, + "loss": 0.5488, + "step": 36800 + }, + { + "epoch": 1.56, + "learning_rate": 0.0006460429519387734, + "loss": 0.6615, + "step": 36810 + }, + { + "epoch": 1.56, + "learning_rate": 0.0006460005512094808, + "loss": 0.6085, + "step": 36820 + }, + { + "epoch": 1.56, + "learning_rate": 0.0006459581504801883, + "loss": 0.5563, + "step": 36830 + }, + { + "epoch": 1.56, + "learning_rate": 0.0006459157497508958, + "loss": 0.6335, + "step": 36840 + }, + { + "epoch": 1.56, + "learning_rate": 0.0006458733490216032, + "loss": 0.5782, + "step": 36850 + }, + { + "epoch": 1.56, + "learning_rate": 0.0006458309482923107, + "loss": 0.531, + "step": 36860 + }, + { + "epoch": 1.56, + "learning_rate": 0.0006457885475630182, + "loss": 0.5525, + "step": 36870 + }, + { + "epoch": 1.56, + "learning_rate": 0.0006457461468337256, + "loss": 0.5856, + "step": 36880 + }, + { + "epoch": 1.56, + "learning_rate": 0.0006457037461044331, + "loss": 0.5965, + "step": 36890 + }, + { + "epoch": 1.56, + "learning_rate": 0.0006456613453751404, + "loss": 0.6823, + "step": 36900 + }, + { + "epoch": 1.56, + "learning_rate": 0.0006456189446458479, + "loss": 0.5349, + "step": 36910 + }, + { + "epoch": 1.56, + "learning_rate": 0.0006455765439165554, + "loss": 0.6979, + "step": 36920 + }, + { + "epoch": 1.56, + "learning_rate": 0.0006455341431872628, + "loss": 0.5549, + "step": 36930 + }, + { + "epoch": 1.56, + "learning_rate": 0.0006454917424579703, + "loss": 0.6653, + "step": 36940 + }, + { + "epoch": 1.56, + "learning_rate": 0.0006454493417286778, + "loss": 0.5427, + "step": 36950 + }, + { + "epoch": 1.56, + "learning_rate": 0.0006454069409993852, + "loss": 0.5699, + "step": 36960 + }, + { + "epoch": 1.56, + "learning_rate": 0.0006453645402700927, + "loss": 0.6264, + "step": 36970 + }, + { + "epoch": 1.56, + "learning_rate": 0.0006453221395408001, + "loss": 0.4644, + "step": 36980 + }, + { + "epoch": 1.56, + "learning_rate": 0.0006452797388115075, + "loss": 0.5186, + "step": 36990 + }, + { + "epoch": 1.56, + "learning_rate": 0.0006452373380822151, + "loss": 0.6045, + "step": 37000 + }, + { + "epoch": 1.57, + "learning_rate": 0.0006451949373529225, + "loss": 0.5357, + "step": 37010 + }, + { + "epoch": 1.57, + "learning_rate": 0.0006451525366236299, + "loss": 0.661, + "step": 37020 + }, + { + "epoch": 1.57, + "learning_rate": 0.0006451101358943374, + "loss": 0.4918, + "step": 37030 + }, + { + "epoch": 1.57, + "learning_rate": 0.0006450677351650449, + "loss": 0.585, + "step": 37040 + }, + { + "epoch": 1.57, + "learning_rate": 0.0006450253344357523, + "loss": 0.4625, + "step": 37050 + }, + { + "epoch": 1.57, + "learning_rate": 0.0006449829337064598, + "loss": 0.6295, + "step": 37060 + }, + { + "epoch": 1.57, + "learning_rate": 0.0006449405329771672, + "loss": 0.5401, + "step": 37070 + }, + { + "epoch": 1.57, + "learning_rate": 0.0006448981322478747, + "loss": 0.5094, + "step": 37080 + }, + { + "epoch": 1.57, + "learning_rate": 0.0006448557315185822, + "loss": 0.6904, + "step": 37090 + }, + { + "epoch": 1.57, + "learning_rate": 0.0006448133307892896, + "loss": 0.7571, + "step": 37100 + }, + { + "epoch": 1.57, + "learning_rate": 0.000644770930059997, + "loss": 0.6165, + "step": 37110 + }, + { + "epoch": 1.57, + "learning_rate": 0.0006447285293307046, + "loss": 0.5996, + "step": 37120 + }, + { + "epoch": 1.57, + "learning_rate": 0.000644686128601412, + "loss": 0.5219, + "step": 37130 + }, + { + "epoch": 1.57, + "learning_rate": 0.0006446437278721194, + "loss": 0.5385, + "step": 37140 + }, + { + "epoch": 1.57, + "learning_rate": 0.0006446013271428269, + "loss": 0.5843, + "step": 37150 + }, + { + "epoch": 1.57, + "learning_rate": 0.0006445589264135344, + "loss": 0.4785, + "step": 37160 + }, + { + "epoch": 1.57, + "learning_rate": 0.0006445165256842418, + "loss": 0.5496, + "step": 37170 + }, + { + "epoch": 1.57, + "learning_rate": 0.0006444741249549493, + "loss": 0.5914, + "step": 37180 + }, + { + "epoch": 1.57, + "learning_rate": 0.0006444317242256567, + "loss": 0.6184, + "step": 37190 + }, + { + "epoch": 1.57, + "learning_rate": 0.0006443893234963642, + "loss": 0.5834, + "step": 37200 + }, + { + "epoch": 1.57, + "learning_rate": 0.0006443469227670716, + "loss": 0.5847, + "step": 37210 + }, + { + "epoch": 1.57, + "learning_rate": 0.0006443045220377791, + "loss": 0.7343, + "step": 37220 + }, + { + "epoch": 1.57, + "learning_rate": 0.0006442621213084865, + "loss": 0.578, + "step": 37230 + }, + { + "epoch": 1.57, + "learning_rate": 0.000644219720579194, + "loss": 0.6081, + "step": 37240 + }, + { + "epoch": 1.58, + "learning_rate": 0.0006441773198499015, + "loss": 0.6648, + "step": 37250 + }, + { + "epoch": 1.58, + "learning_rate": 0.0006441349191206089, + "loss": 0.6322, + "step": 37260 + }, + { + "epoch": 1.58, + "learning_rate": 0.0006440925183913163, + "loss": 0.644, + "step": 37270 + }, + { + "epoch": 1.58, + "learning_rate": 0.0006440501176620239, + "loss": 0.6735, + "step": 37280 + }, + { + "epoch": 1.58, + "learning_rate": 0.0006440077169327313, + "loss": 0.522, + "step": 37290 + }, + { + "epoch": 1.58, + "learning_rate": 0.0006439653162034387, + "loss": 0.5467, + "step": 37300 + }, + { + "epoch": 1.58, + "learning_rate": 0.0006439229154741461, + "loss": 0.5468, + "step": 37310 + }, + { + "epoch": 1.58, + "learning_rate": 0.0006438805147448536, + "loss": 0.7204, + "step": 37320 + }, + { + "epoch": 1.58, + "learning_rate": 0.0006438381140155611, + "loss": 0.6305, + "step": 37330 + }, + { + "epoch": 1.58, + "learning_rate": 0.0006437957132862685, + "loss": 0.6388, + "step": 37340 + }, + { + "epoch": 1.58, + "learning_rate": 0.000643753312556976, + "loss": 0.6814, + "step": 37350 + }, + { + "epoch": 1.58, + "learning_rate": 0.0006437109118276835, + "loss": 0.5988, + "step": 37360 + }, + { + "epoch": 1.58, + "learning_rate": 0.000643668511098391, + "loss": 0.6133, + "step": 37370 + }, + { + "epoch": 1.58, + "learning_rate": 0.0006436261103690984, + "loss": 0.625, + "step": 37380 + }, + { + "epoch": 1.58, + "learning_rate": 0.0006435837096398058, + "loss": 0.7304, + "step": 37390 + }, + { + "epoch": 1.58, + "learning_rate": 0.0006435413089105133, + "loss": 0.644, + "step": 37400 + }, + { + "epoch": 1.58, + "learning_rate": 0.0006434989081812208, + "loss": 0.5484, + "step": 37410 + }, + { + "epoch": 1.58, + "learning_rate": 0.0006434565074519282, + "loss": 0.5207, + "step": 37420 + }, + { + "epoch": 1.58, + "learning_rate": 0.0006434141067226356, + "loss": 0.6061, + "step": 37430 + }, + { + "epoch": 1.58, + "learning_rate": 0.0006433717059933431, + "loss": 0.5283, + "step": 37440 + }, + { + "epoch": 1.58, + "learning_rate": 0.0006433293052640506, + "loss": 0.5624, + "step": 37450 + }, + { + "epoch": 1.58, + "learning_rate": 0.000643286904534758, + "loss": 0.6262, + "step": 37460 + }, + { + "epoch": 1.58, + "learning_rate": 0.0006432445038054655, + "loss": 0.6179, + "step": 37470 + }, + { + "epoch": 1.58, + "learning_rate": 0.000643202103076173, + "loss": 0.5793, + "step": 37480 + }, + { + "epoch": 1.59, + "learning_rate": 0.0006431597023468804, + "loss": 0.6772, + "step": 37490 + }, + { + "epoch": 1.59, + "learning_rate": 0.0006431173016175879, + "loss": 0.6382, + "step": 37500 + }, + { + "epoch": 1.59, + "learning_rate": 0.0006430749008882953, + "loss": 0.6591, + "step": 37510 + }, + { + "epoch": 1.59, + "learning_rate": 0.0006430325001590027, + "loss": 0.5974, + "step": 37520 + }, + { + "epoch": 1.59, + "learning_rate": 0.0006429900994297102, + "loss": 0.5558, + "step": 37530 + }, + { + "epoch": 1.59, + "learning_rate": 0.0006429476987004177, + "loss": 0.5412, + "step": 37540 + }, + { + "epoch": 1.59, + "learning_rate": 0.0006429052979711251, + "loss": 0.5325, + "step": 37550 + }, + { + "epoch": 1.59, + "learning_rate": 0.0006428628972418326, + "loss": 0.5871, + "step": 37560 + }, + { + "epoch": 1.59, + "learning_rate": 0.0006428204965125401, + "loss": 0.679, + "step": 37570 + }, + { + "epoch": 1.59, + "learning_rate": 0.0006427780957832475, + "loss": 0.6163, + "step": 37580 + }, + { + "epoch": 1.59, + "learning_rate": 0.0006427356950539549, + "loss": 0.5485, + "step": 37590 + }, + { + "epoch": 1.59, + "learning_rate": 0.0006426932943246624, + "loss": 0.512, + "step": 37600 + }, + { + "epoch": 1.59, + "learning_rate": 0.0006426508935953699, + "loss": 0.6242, + "step": 37610 + }, + { + "epoch": 1.59, + "learning_rate": 0.0006426084928660773, + "loss": 0.5117, + "step": 37620 + }, + { + "epoch": 1.59, + "learning_rate": 0.0006425660921367848, + "loss": 0.5884, + "step": 37630 + }, + { + "epoch": 1.59, + "learning_rate": 0.0006425236914074922, + "loss": 0.6016, + "step": 37640 + }, + { + "epoch": 1.59, + "learning_rate": 0.0006424812906781997, + "loss": 0.6501, + "step": 37650 + }, + { + "epoch": 1.59, + "learning_rate": 0.0006424388899489072, + "loss": 0.5921, + "step": 37660 + }, + { + "epoch": 1.59, + "learning_rate": 0.0006423964892196146, + "loss": 0.5737, + "step": 37670 + }, + { + "epoch": 1.59, + "learning_rate": 0.000642354088490322, + "loss": 0.6607, + "step": 37680 + }, + { + "epoch": 1.59, + "learning_rate": 0.0006423116877610296, + "loss": 0.5341, + "step": 37690 + }, + { + "epoch": 1.59, + "learning_rate": 0.000642269287031737, + "loss": 0.6577, + "step": 37700 + }, + { + "epoch": 1.59, + "learning_rate": 0.0006422268863024444, + "loss": 0.5865, + "step": 37710 + }, + { + "epoch": 1.6, + "learning_rate": 0.0006421844855731519, + "loss": 0.4806, + "step": 37720 + }, + { + "epoch": 1.6, + "learning_rate": 0.0006421420848438594, + "loss": 0.573, + "step": 37730 + }, + { + "epoch": 1.6, + "learning_rate": 0.0006420996841145668, + "loss": 0.6048, + "step": 37740 + }, + { + "epoch": 1.6, + "learning_rate": 0.0006420572833852743, + "loss": 0.5695, + "step": 37750 + }, + { + "epoch": 1.6, + "learning_rate": 0.0006420148826559817, + "loss": 0.582, + "step": 37760 + }, + { + "epoch": 1.6, + "learning_rate": 0.0006419724819266892, + "loss": 0.5926, + "step": 37770 + }, + { + "epoch": 1.6, + "learning_rate": 0.0006419300811973967, + "loss": 0.5994, + "step": 37780 + }, + { + "epoch": 1.6, + "learning_rate": 0.0006418876804681041, + "loss": 0.5516, + "step": 37790 + }, + { + "epoch": 1.6, + "learning_rate": 0.0006418452797388115, + "loss": 0.5916, + "step": 37800 + }, + { + "epoch": 1.6, + "learning_rate": 0.0006418028790095191, + "loss": 0.6777, + "step": 37810 + }, + { + "epoch": 1.6, + "learning_rate": 0.0006417604782802264, + "loss": 0.5148, + "step": 37820 + }, + { + "epoch": 1.6, + "learning_rate": 0.0006417180775509339, + "loss": 0.6273, + "step": 37830 + }, + { + "epoch": 1.6, + "learning_rate": 0.0006416756768216413, + "loss": 0.5604, + "step": 37840 + }, + { + "epoch": 1.6, + "learning_rate": 0.0006416332760923488, + "loss": 0.624, + "step": 37850 + }, + { + "epoch": 1.6, + "learning_rate": 0.0006415908753630563, + "loss": 0.5423, + "step": 37860 + }, + { + "epoch": 1.6, + "learning_rate": 0.0006415484746337637, + "loss": 0.5332, + "step": 37870 + }, + { + "epoch": 1.6, + "learning_rate": 0.0006415060739044711, + "loss": 0.5563, + "step": 37880 + }, + { + "epoch": 1.6, + "learning_rate": 0.0006414636731751787, + "loss": 0.4979, + "step": 37890 + }, + { + "epoch": 1.6, + "learning_rate": 0.0006414212724458861, + "loss": 0.6359, + "step": 37900 + }, + { + "epoch": 1.6, + "learning_rate": 0.0006413788717165935, + "loss": 0.6516, + "step": 37910 + }, + { + "epoch": 1.6, + "learning_rate": 0.000641336470987301, + "loss": 0.5788, + "step": 37920 + }, + { + "epoch": 1.6, + "learning_rate": 0.0006412940702580085, + "loss": 0.5746, + "step": 37930 + }, + { + "epoch": 1.6, + "learning_rate": 0.0006412516695287159, + "loss": 0.6057, + "step": 37940 + }, + { + "epoch": 1.6, + "learning_rate": 0.0006412092687994234, + "loss": 0.6027, + "step": 37950 + }, + { + "epoch": 1.61, + "learning_rate": 0.0006411668680701308, + "loss": 0.533, + "step": 37960 + }, + { + "epoch": 1.61, + "learning_rate": 0.0006411244673408383, + "loss": 0.522, + "step": 37970 + }, + { + "epoch": 1.61, + "learning_rate": 0.0006410820666115458, + "loss": 0.5735, + "step": 37980 + }, + { + "epoch": 1.61, + "learning_rate": 0.0006410396658822532, + "loss": 0.6731, + "step": 37990 + }, + { + "epoch": 1.61, + "learning_rate": 0.0006409972651529606, + "loss": 0.5508, + "step": 38000 + }, + { + "epoch": 1.61, + "learning_rate": 0.0006409548644236682, + "loss": 0.6606, + "step": 38010 + }, + { + "epoch": 1.61, + "learning_rate": 0.0006409124636943756, + "loss": 0.6639, + "step": 38020 + }, + { + "epoch": 1.61, + "learning_rate": 0.000640870062965083, + "loss": 0.6482, + "step": 38030 + }, + { + "epoch": 1.61, + "learning_rate": 0.0006408276622357905, + "loss": 0.7, + "step": 38040 + }, + { + "epoch": 1.61, + "learning_rate": 0.0006407852615064979, + "loss": 0.6405, + "step": 38050 + }, + { + "epoch": 1.61, + "learning_rate": 0.0006407428607772054, + "loss": 0.58, + "step": 38060 + }, + { + "epoch": 1.61, + "learning_rate": 0.0006407004600479129, + "loss": 0.6648, + "step": 38070 + }, + { + "epoch": 1.61, + "learning_rate": 0.0006406580593186203, + "loss": 0.6335, + "step": 38080 + }, + { + "epoch": 1.61, + "learning_rate": 0.0006406156585893278, + "loss": 0.6199, + "step": 38090 + }, + { + "epoch": 1.61, + "learning_rate": 0.0006405732578600353, + "loss": 0.5507, + "step": 38100 + }, + { + "epoch": 1.61, + "learning_rate": 0.0006405308571307427, + "loss": 0.5812, + "step": 38110 + }, + { + "epoch": 1.61, + "learning_rate": 0.0006404884564014501, + "loss": 0.5797, + "step": 38120 + }, + { + "epoch": 1.61, + "learning_rate": 0.0006404460556721576, + "loss": 0.5626, + "step": 38130 + }, + { + "epoch": 1.61, + "learning_rate": 0.000640403654942865, + "loss": 0.5758, + "step": 38140 + }, + { + "epoch": 1.61, + "learning_rate": 0.0006403612542135725, + "loss": 0.636, + "step": 38150 + }, + { + "epoch": 1.61, + "learning_rate": 0.00064031885348428, + "loss": 0.5421, + "step": 38160 + }, + { + "epoch": 1.61, + "learning_rate": 0.0006402764527549874, + "loss": 0.6274, + "step": 38170 + }, + { + "epoch": 1.61, + "learning_rate": 0.0006402340520256949, + "loss": 1.0423, + "step": 38180 + }, + { + "epoch": 1.62, + "learning_rate": 0.0006401916512964024, + "loss": 0.5282, + "step": 38190 + }, + { + "epoch": 1.62, + "learning_rate": 0.0006401492505671097, + "loss": 0.5791, + "step": 38200 + }, + { + "epoch": 1.62, + "learning_rate": 0.0006401068498378172, + "loss": 0.6264, + "step": 38210 + }, + { + "epoch": 1.62, + "learning_rate": 0.0006400644491085248, + "loss": 0.6134, + "step": 38220 + }, + { + "epoch": 1.62, + "learning_rate": 0.0006400220483792321, + "loss": 0.595, + "step": 38230 + }, + { + "epoch": 1.62, + "learning_rate": 0.0006399796476499396, + "loss": 0.6046, + "step": 38240 + }, + { + "epoch": 1.62, + "learning_rate": 0.000639937246920647, + "loss": 0.4572, + "step": 38250 + }, + { + "epoch": 1.62, + "learning_rate": 0.0006398948461913545, + "loss": 0.5985, + "step": 38260 + }, + { + "epoch": 1.62, + "learning_rate": 0.000639852445462062, + "loss": 0.462, + "step": 38270 + }, + { + "epoch": 1.62, + "learning_rate": 0.0006398100447327695, + "loss": 0.5743, + "step": 38280 + }, + { + "epoch": 1.62, + "learning_rate": 0.0006397676440034768, + "loss": 0.5352, + "step": 38290 + }, + { + "epoch": 1.62, + "learning_rate": 0.0006397252432741844, + "loss": 0.5709, + "step": 38300 + }, + { + "epoch": 1.62, + "learning_rate": 0.0006396828425448919, + "loss": 0.5659, + "step": 38310 + }, + { + "epoch": 1.62, + "learning_rate": 0.0006396404418155992, + "loss": 0.6955, + "step": 38320 + }, + { + "epoch": 1.62, + "learning_rate": 0.0006395980410863067, + "loss": 0.5623, + "step": 38330 + }, + { + "epoch": 1.62, + "learning_rate": 0.0006395556403570143, + "loss": 0.593, + "step": 38340 + }, + { + "epoch": 1.62, + "learning_rate": 0.0006395132396277216, + "loss": 0.5411, + "step": 38350 + }, + { + "epoch": 1.62, + "learning_rate": 0.0006394708388984291, + "loss": 0.6041, + "step": 38360 + }, + { + "epoch": 1.62, + "learning_rate": 0.0006394284381691365, + "loss": 0.5911, + "step": 38370 + }, + { + "epoch": 1.62, + "learning_rate": 0.000639386037439844, + "loss": 0.6456, + "step": 38380 + }, + { + "epoch": 1.62, + "learning_rate": 0.0006393436367105515, + "loss": 0.5982, + "step": 38390 + }, + { + "epoch": 1.62, + "learning_rate": 0.0006393012359812589, + "loss": 0.567, + "step": 38400 + }, + { + "epoch": 1.62, + "learning_rate": 0.0006392588352519663, + "loss": 0.5701, + "step": 38410 + }, + { + "epoch": 1.62, + "learning_rate": 0.0006392164345226739, + "loss": 0.6319, + "step": 38420 + }, + { + "epoch": 1.63, + "learning_rate": 0.0006391740337933812, + "loss": 0.5555, + "step": 38430 + }, + { + "epoch": 1.63, + "learning_rate": 0.0006391316330640887, + "loss": 0.5087, + "step": 38440 + }, + { + "epoch": 1.63, + "learning_rate": 0.0006390892323347962, + "loss": 0.5948, + "step": 38450 + }, + { + "epoch": 1.63, + "learning_rate": 0.0006390468316055036, + "loss": 0.5538, + "step": 38460 + }, + { + "epoch": 1.63, + "learning_rate": 0.0006390044308762111, + "loss": 0.503, + "step": 38470 + }, + { + "epoch": 1.63, + "learning_rate": 0.0006389620301469186, + "loss": 0.5129, + "step": 38480 + }, + { + "epoch": 1.63, + "learning_rate": 0.0006389196294176259, + "loss": 0.6116, + "step": 38490 + }, + { + "epoch": 1.63, + "learning_rate": 0.0006388772286883335, + "loss": 0.5761, + "step": 38500 + }, + { + "epoch": 1.63, + "learning_rate": 0.000638834827959041, + "loss": 0.5907, + "step": 38510 + }, + { + "epoch": 1.63, + "learning_rate": 0.0006387924272297483, + "loss": 0.5914, + "step": 38520 + }, + { + "epoch": 1.63, + "learning_rate": 0.0006387500265004558, + "loss": 0.5859, + "step": 38530 + }, + { + "epoch": 1.63, + "learning_rate": 0.0006387076257711634, + "loss": 0.5369, + "step": 38540 + }, + { + "epoch": 1.63, + "learning_rate": 0.0006386652250418707, + "loss": 0.6555, + "step": 38550 + }, + { + "epoch": 1.63, + "learning_rate": 0.0006386228243125782, + "loss": 0.6267, + "step": 38560 + }, + { + "epoch": 1.63, + "learning_rate": 0.0006385804235832857, + "loss": 0.6781, + "step": 38570 + }, + { + "epoch": 1.63, + "learning_rate": 0.0006385380228539931, + "loss": 0.5366, + "step": 38580 + }, + { + "epoch": 1.63, + "learning_rate": 0.0006384956221247006, + "loss": 0.5004, + "step": 38590 + }, + { + "epoch": 1.63, + "learning_rate": 0.0006384532213954081, + "loss": 0.5173, + "step": 38600 + }, + { + "epoch": 1.63, + "learning_rate": 0.0006384108206661154, + "loss": 0.5856, + "step": 38610 + }, + { + "epoch": 1.63, + "learning_rate": 0.000638368419936823, + "loss": 0.5892, + "step": 38620 + }, + { + "epoch": 1.63, + "learning_rate": 0.0006383260192075305, + "loss": 0.5863, + "step": 38630 + }, + { + "epoch": 1.63, + "learning_rate": 0.0006382836184782378, + "loss": 0.7465, + "step": 38640 + }, + { + "epoch": 1.63, + "learning_rate": 0.0006382412177489453, + "loss": 0.5423, + "step": 38650 + }, + { + "epoch": 1.63, + "learning_rate": 0.0006381988170196528, + "loss": 0.5473, + "step": 38660 + }, + { + "epoch": 1.64, + "learning_rate": 0.0006381564162903602, + "loss": 0.6195, + "step": 38670 + }, + { + "epoch": 1.64, + "learning_rate": 0.0006381140155610677, + "loss": 0.5709, + "step": 38680 + }, + { + "epoch": 1.64, + "learning_rate": 0.0006380716148317752, + "loss": 0.5441, + "step": 38690 + }, + { + "epoch": 1.64, + "learning_rate": 0.0006380292141024826, + "loss": 0.573, + "step": 38700 + }, + { + "epoch": 1.64, + "learning_rate": 0.0006379868133731901, + "loss": 0.6984, + "step": 38710 + }, + { + "epoch": 1.64, + "learning_rate": 0.0006379444126438976, + "loss": 0.497, + "step": 38720 + }, + { + "epoch": 1.64, + "learning_rate": 0.0006379020119146049, + "loss": 0.6734, + "step": 38730 + }, + { + "epoch": 1.64, + "learning_rate": 0.0006378596111853124, + "loss": 0.6099, + "step": 38740 + }, + { + "epoch": 1.64, + "learning_rate": 0.0006378172104560198, + "loss": 0.5798, + "step": 38750 + }, + { + "epoch": 1.64, + "learning_rate": 0.0006377748097267273, + "loss": 0.5783, + "step": 38760 + }, + { + "epoch": 1.64, + "learning_rate": 0.0006377324089974348, + "loss": 0.5854, + "step": 38770 + }, + { + "epoch": 1.64, + "learning_rate": 0.0006376900082681422, + "loss": 0.5601, + "step": 38780 + }, + { + "epoch": 1.64, + "learning_rate": 0.0006376476075388497, + "loss": 0.5597, + "step": 38790 + }, + { + "epoch": 1.64, + "learning_rate": 0.0006376052068095572, + "loss": 0.643, + "step": 38800 + }, + { + "epoch": 1.64, + "learning_rate": 0.0006375628060802645, + "loss": 0.6061, + "step": 38810 + }, + { + "epoch": 1.64, + "learning_rate": 0.000637520405350972, + "loss": 0.614, + "step": 38820 + }, + { + "epoch": 1.64, + "learning_rate": 0.0006374780046216796, + "loss": 0.6285, + "step": 38830 + }, + { + "epoch": 1.64, + "learning_rate": 0.0006374356038923869, + "loss": 0.5379, + "step": 38840 + }, + { + "epoch": 1.64, + "learning_rate": 0.0006373932031630944, + "loss": 0.6096, + "step": 38850 + }, + { + "epoch": 1.64, + "learning_rate": 0.0006373508024338019, + "loss": 0.4671, + "step": 38860 + }, + { + "epoch": 1.64, + "learning_rate": 0.0006373084017045093, + "loss": 0.4919, + "step": 38870 + }, + { + "epoch": 1.64, + "learning_rate": 0.0006372660009752168, + "loss": 0.5478, + "step": 38880 + }, + { + "epoch": 1.64, + "learning_rate": 0.0006372236002459243, + "loss": 0.5716, + "step": 38890 + }, + { + "epoch": 1.65, + "learning_rate": 0.0006371811995166317, + "loss": 0.6693, + "step": 38900 + }, + { + "epoch": 1.65, + "learning_rate": 0.0006371387987873392, + "loss": 0.6354, + "step": 38910 + }, + { + "epoch": 1.65, + "learning_rate": 0.0006370963980580467, + "loss": 0.4859, + "step": 38920 + }, + { + "epoch": 1.65, + "learning_rate": 0.000637053997328754, + "loss": 0.6553, + "step": 38930 + }, + { + "epoch": 1.65, + "learning_rate": 0.0006370115965994615, + "loss": 0.6254, + "step": 38940 + }, + { + "epoch": 1.65, + "learning_rate": 0.0006369691958701691, + "loss": 0.5227, + "step": 38950 + }, + { + "epoch": 1.65, + "learning_rate": 0.0006369267951408764, + "loss": 0.6704, + "step": 38960 + }, + { + "epoch": 1.65, + "learning_rate": 0.0006368843944115839, + "loss": 0.539, + "step": 38970 + }, + { + "epoch": 1.65, + "learning_rate": 0.0006368419936822914, + "loss": 0.5756, + "step": 38980 + }, + { + "epoch": 1.65, + "learning_rate": 0.0006367995929529988, + "loss": 0.6459, + "step": 38990 + }, + { + "epoch": 1.65, + "learning_rate": 0.0006367571922237063, + "loss": 0.574, + "step": 39000 + }, + { + "epoch": 1.65, + "learning_rate": 0.0006367147914944138, + "loss": 0.6317, + "step": 39010 + }, + { + "epoch": 1.65, + "learning_rate": 0.0006366723907651211, + "loss": 0.5662, + "step": 39020 + }, + { + "epoch": 1.65, + "learning_rate": 0.0006366299900358287, + "loss": 0.6288, + "step": 39030 + }, + { + "epoch": 1.65, + "learning_rate": 0.0006365875893065361, + "loss": 0.6249, + "step": 39040 + }, + { + "epoch": 1.65, + "learning_rate": 0.0006365451885772435, + "loss": 0.5671, + "step": 39050 + }, + { + "epoch": 1.65, + "learning_rate": 0.000636502787847951, + "loss": 0.5595, + "step": 39060 + }, + { + "epoch": 1.65, + "learning_rate": 0.0006364603871186585, + "loss": 0.5487, + "step": 39070 + }, + { + "epoch": 1.65, + "learning_rate": 0.0006364179863893659, + "loss": 0.6679, + "step": 39080 + }, + { + "epoch": 1.65, + "learning_rate": 0.0006363755856600734, + "loss": 0.669, + "step": 39090 + }, + { + "epoch": 1.65, + "learning_rate": 0.0006363331849307808, + "loss": 0.6747, + "step": 39100 + }, + { + "epoch": 1.65, + "learning_rate": 0.0006362907842014883, + "loss": 0.5714, + "step": 39110 + }, + { + "epoch": 1.65, + "learning_rate": 0.0006362483834721958, + "loss": 0.5654, + "step": 39120 + }, + { + "epoch": 1.65, + "learning_rate": 0.0006362059827429032, + "loss": 0.5714, + "step": 39130 + }, + { + "epoch": 1.66, + "learning_rate": 0.0006361635820136106, + "loss": 0.5507, + "step": 39140 + }, + { + "epoch": 1.66, + "learning_rate": 0.0006361211812843182, + "loss": 0.6204, + "step": 39150 + }, + { + "epoch": 1.66, + "learning_rate": 0.0006360787805550256, + "loss": 0.5869, + "step": 39160 + }, + { + "epoch": 1.66, + "learning_rate": 0.000636036379825733, + "loss": 0.5017, + "step": 39170 + }, + { + "epoch": 1.66, + "learning_rate": 0.0006359939790964405, + "loss": 0.6093, + "step": 39180 + }, + { + "epoch": 1.66, + "learning_rate": 0.000635951578367148, + "loss": 0.6214, + "step": 39190 + }, + { + "epoch": 1.66, + "learning_rate": 0.0006359091776378554, + "loss": 0.6087, + "step": 39200 + }, + { + "epoch": 1.66, + "learning_rate": 0.0006358667769085629, + "loss": 0.4818, + "step": 39210 + }, + { + "epoch": 1.66, + "learning_rate": 0.0006358243761792702, + "loss": 0.5813, + "step": 39220 + }, + { + "epoch": 1.66, + "learning_rate": 0.0006357819754499778, + "loss": 0.6087, + "step": 39230 + }, + { + "epoch": 1.66, + "learning_rate": 0.0006357395747206853, + "loss": 0.6092, + "step": 39240 + }, + { + "epoch": 1.66, + "learning_rate": 0.0006356971739913926, + "loss": 0.6679, + "step": 39250 + }, + { + "epoch": 1.66, + "learning_rate": 0.0006356547732621001, + "loss": 0.6649, + "step": 39260 + }, + { + "epoch": 1.66, + "learning_rate": 0.0006356123725328076, + "loss": 0.5302, + "step": 39270 + }, + { + "epoch": 1.66, + "learning_rate": 0.000635569971803515, + "loss": 0.5459, + "step": 39280 + }, + { + "epoch": 1.66, + "learning_rate": 0.0006355275710742225, + "loss": 0.5934, + "step": 39290 + }, + { + "epoch": 1.66, + "learning_rate": 0.00063548517034493, + "loss": 0.6161, + "step": 39300 + }, + { + "epoch": 1.66, + "learning_rate": 0.0006354427696156374, + "loss": 0.5832, + "step": 39310 + }, + { + "epoch": 1.66, + "learning_rate": 0.0006354003688863449, + "loss": 0.5874, + "step": 39320 + }, + { + "epoch": 1.66, + "learning_rate": 0.0006353579681570524, + "loss": 0.5814, + "step": 39330 + }, + { + "epoch": 1.66, + "learning_rate": 0.0006353155674277597, + "loss": 0.6918, + "step": 39340 + }, + { + "epoch": 1.66, + "learning_rate": 0.0006352731666984672, + "loss": 0.6171, + "step": 39350 + }, + { + "epoch": 1.66, + "learning_rate": 0.0006352307659691747, + "loss": 0.4692, + "step": 39360 + }, + { + "epoch": 1.66, + "learning_rate": 0.0006351883652398821, + "loss": 0.6155, + "step": 39370 + }, + { + "epoch": 1.67, + "learning_rate": 0.0006351459645105896, + "loss": 0.7185, + "step": 39380 + }, + { + "epoch": 1.67, + "learning_rate": 0.0006351035637812971, + "loss": 0.4758, + "step": 39390 + }, + { + "epoch": 1.67, + "learning_rate": 0.0006350611630520045, + "loss": 0.5741, + "step": 39400 + }, + { + "epoch": 1.67, + "learning_rate": 0.000635018762322712, + "loss": 0.6034, + "step": 39410 + }, + { + "epoch": 1.67, + "learning_rate": 0.0006349763615934194, + "loss": 0.4682, + "step": 39420 + }, + { + "epoch": 1.67, + "learning_rate": 0.0006349339608641269, + "loss": 0.6522, + "step": 39430 + }, + { + "epoch": 1.67, + "learning_rate": 0.0006348915601348344, + "loss": 0.5565, + "step": 39440 + }, + { + "epoch": 1.67, + "learning_rate": 0.0006348491594055418, + "loss": 0.603, + "step": 39450 + }, + { + "epoch": 1.67, + "learning_rate": 0.0006348067586762492, + "loss": 0.5157, + "step": 39460 + }, + { + "epoch": 1.67, + "learning_rate": 0.0006347643579469567, + "loss": 0.5789, + "step": 39470 + }, + { + "epoch": 1.67, + "learning_rate": 0.0006347219572176642, + "loss": 0.541, + "step": 39480 + }, + { + "epoch": 1.67, + "learning_rate": 0.0006346795564883716, + "loss": 0.6552, + "step": 39490 + }, + { + "epoch": 1.67, + "learning_rate": 0.0006346371557590791, + "loss": 0.5832, + "step": 39500 + }, + { + "epoch": 1.67, + "learning_rate": 0.0006345947550297866, + "loss": 0.5883, + "step": 39510 + }, + { + "epoch": 1.67, + "learning_rate": 0.000634552354300494, + "loss": 0.602, + "step": 39520 + }, + { + "epoch": 1.67, + "learning_rate": 0.0006345099535712015, + "loss": 0.5786, + "step": 39530 + }, + { + "epoch": 1.67, + "learning_rate": 0.0006344675528419089, + "loss": 0.5217, + "step": 39540 + }, + { + "epoch": 1.67, + "learning_rate": 0.0006344251521126163, + "loss": 0.5893, + "step": 39550 + }, + { + "epoch": 1.67, + "learning_rate": 0.0006343827513833239, + "loss": 0.5391, + "step": 39560 + }, + { + "epoch": 1.67, + "learning_rate": 0.0006343403506540313, + "loss": 0.6379, + "step": 39570 + }, + { + "epoch": 1.67, + "learning_rate": 0.0006342979499247387, + "loss": 0.6073, + "step": 39580 + }, + { + "epoch": 1.67, + "learning_rate": 0.0006342555491954462, + "loss": 0.5449, + "step": 39590 + }, + { + "epoch": 1.67, + "learning_rate": 0.0006342131484661537, + "loss": 0.6176, + "step": 39600 + }, + { + "epoch": 1.68, + "learning_rate": 0.0006341707477368611, + "loss": 0.5854, + "step": 39610 + }, + { + "epoch": 1.68, + "learning_rate": 0.0006341283470075686, + "loss": 0.6026, + "step": 39620 + }, + { + "epoch": 1.68, + "learning_rate": 0.000634085946278276, + "loss": 0.7012, + "step": 39630 + }, + { + "epoch": 1.68, + "learning_rate": 0.0006340435455489835, + "loss": 0.493, + "step": 39640 + }, + { + "epoch": 1.68, + "learning_rate": 0.000634001144819691, + "loss": 0.565, + "step": 39650 + }, + { + "epoch": 1.68, + "learning_rate": 0.0006339587440903984, + "loss": 0.5109, + "step": 39660 + }, + { + "epoch": 1.68, + "learning_rate": 0.0006339163433611058, + "loss": 0.5588, + "step": 39670 + }, + { + "epoch": 1.68, + "learning_rate": 0.0006338739426318133, + "loss": 0.6806, + "step": 39680 + }, + { + "epoch": 1.68, + "learning_rate": 0.0006338315419025208, + "loss": 0.6096, + "step": 39690 + }, + { + "epoch": 1.68, + "learning_rate": 0.0006337891411732282, + "loss": 0.6131, + "step": 39700 + }, + { + "epoch": 1.68, + "learning_rate": 0.0006337467404439356, + "loss": 0.5603, + "step": 39710 + }, + { + "epoch": 1.68, + "learning_rate": 0.0006337043397146432, + "loss": 0.6375, + "step": 39720 + }, + { + "epoch": 1.68, + "learning_rate": 0.0006336619389853506, + "loss": 0.6107, + "step": 39730 + }, + { + "epoch": 1.68, + "learning_rate": 0.000633619538256058, + "loss": 0.5562, + "step": 39740 + }, + { + "epoch": 1.68, + "learning_rate": 0.0006335771375267654, + "loss": 0.5609, + "step": 39750 + }, + { + "epoch": 1.68, + "learning_rate": 0.000633534736797473, + "loss": 0.6522, + "step": 39760 + }, + { + "epoch": 1.68, + "learning_rate": 0.0006334923360681804, + "loss": 0.5204, + "step": 39770 + }, + { + "epoch": 1.68, + "learning_rate": 0.0006334499353388878, + "loss": 0.6555, + "step": 39780 + }, + { + "epoch": 1.68, + "learning_rate": 0.0006334075346095953, + "loss": 0.5546, + "step": 39790 + }, + { + "epoch": 1.68, + "learning_rate": 0.0006333651338803028, + "loss": 0.5227, + "step": 39800 + }, + { + "epoch": 1.68, + "learning_rate": 0.0006333227331510102, + "loss": 0.5434, + "step": 39810 + }, + { + "epoch": 1.68, + "learning_rate": 0.0006332803324217177, + "loss": 0.5781, + "step": 39820 + }, + { + "epoch": 1.68, + "learning_rate": 0.0006332379316924251, + "loss": 0.5375, + "step": 39830 + }, + { + "epoch": 1.68, + "learning_rate": 0.0006331955309631326, + "loss": 0.5492, + "step": 39840 + }, + { + "epoch": 1.69, + "learning_rate": 0.0006331531302338401, + "loss": 0.6527, + "step": 39850 + }, + { + "epoch": 1.69, + "learning_rate": 0.0006331107295045475, + "loss": 0.6247, + "step": 39860 + }, + { + "epoch": 1.69, + "learning_rate": 0.0006330683287752549, + "loss": 0.5187, + "step": 39870 + }, + { + "epoch": 1.69, + "learning_rate": 0.0006330259280459625, + "loss": 0.5244, + "step": 39880 + }, + { + "epoch": 1.69, + "learning_rate": 0.0006329835273166699, + "loss": 0.6077, + "step": 39890 + }, + { + "epoch": 1.69, + "learning_rate": 0.0006329411265873773, + "loss": 0.5265, + "step": 39900 + }, + { + "epoch": 1.69, + "learning_rate": 0.0006328987258580848, + "loss": 0.574, + "step": 39910 + }, + { + "epoch": 1.69, + "learning_rate": 0.0006328563251287923, + "loss": 0.5568, + "step": 39920 + }, + { + "epoch": 1.69, + "learning_rate": 0.0006328139243994997, + "loss": 0.7126, + "step": 39930 + }, + { + "epoch": 1.69, + "learning_rate": 0.0006327715236702072, + "loss": 0.5745, + "step": 39940 + }, + { + "epoch": 1.69, + "learning_rate": 0.0006327291229409146, + "loss": 0.5836, + "step": 39950 + }, + { + "epoch": 1.69, + "learning_rate": 0.0006326867222116221, + "loss": 0.5586, + "step": 39960 + }, + { + "epoch": 1.69, + "learning_rate": 0.0006326443214823295, + "loss": 0.6035, + "step": 39970 + }, + { + "epoch": 1.69, + "learning_rate": 0.000632601920753037, + "loss": 0.5863, + "step": 39980 + }, + { + "epoch": 1.69, + "learning_rate": 0.0006325595200237444, + "loss": 0.5918, + "step": 39990 + }, + { + "epoch": 1.69, + "learning_rate": 0.0006325171192944519, + "loss": 0.585, + "step": 40000 + }, + { + "epoch": 1.69, + "eval_loss": 0.6182475090026855, + "eval_runtime": 337.5574, + "eval_samples_per_second": 15.568, + "eval_steps_per_second": 3.893, + "step": 40000 + }, + { + "epoch": 1.69, + "learning_rate": 0.0006324747185651594, + "loss": 0.5426, + "step": 40010 + }, + { + "epoch": 1.69, + "learning_rate": 0.0006324323178358668, + "loss": 0.6274, + "step": 40020 + }, + { + "epoch": 1.69, + "learning_rate": 0.0006323899171065742, + "loss": 0.5879, + "step": 40030 + }, + { + "epoch": 1.69, + "learning_rate": 0.0006323475163772818, + "loss": 0.5892, + "step": 40040 + }, + { + "epoch": 1.69, + "learning_rate": 0.0006323051156479892, + "loss": 0.5792, + "step": 40050 + }, + { + "epoch": 1.69, + "learning_rate": 0.0006322627149186966, + "loss": 0.6454, + "step": 40060 + }, + { + "epoch": 1.69, + "learning_rate": 0.0006322203141894041, + "loss": 0.532, + "step": 40070 + }, + { + "epoch": 1.69, + "learning_rate": 0.0006321779134601115, + "loss": 0.678, + "step": 40080 + }, + { + "epoch": 1.7, + "learning_rate": 0.000632135512730819, + "loss": 0.5556, + "step": 40090 + }, + { + "epoch": 1.7, + "learning_rate": 0.0006320931120015265, + "loss": 0.5789, + "step": 40100 + }, + { + "epoch": 1.7, + "learning_rate": 0.0006320507112722339, + "loss": 0.7067, + "step": 40110 + }, + { + "epoch": 1.7, + "learning_rate": 0.0006320083105429414, + "loss": 0.5701, + "step": 40120 + }, + { + "epoch": 1.7, + "learning_rate": 0.0006319659098136489, + "loss": 0.6824, + "step": 40130 + }, + { + "epoch": 1.7, + "learning_rate": 0.0006319235090843563, + "loss": 0.6875, + "step": 40140 + }, + { + "epoch": 1.7, + "learning_rate": 0.0006318811083550637, + "loss": 0.6386, + "step": 40150 + }, + { + "epoch": 1.7, + "learning_rate": 0.0006318387076257711, + "loss": 0.5898, + "step": 40160 + }, + { + "epoch": 1.7, + "learning_rate": 0.0006317963068964787, + "loss": 0.5183, + "step": 40170 + }, + { + "epoch": 1.7, + "learning_rate": 0.0006317539061671861, + "loss": 0.4507, + "step": 40180 + }, + { + "epoch": 1.7, + "learning_rate": 0.0006317115054378935, + "loss": 0.6666, + "step": 40190 + }, + { + "epoch": 1.7, + "learning_rate": 0.000631669104708601, + "loss": 0.508, + "step": 40200 + }, + { + "epoch": 1.7, + "learning_rate": 0.0006316267039793085, + "loss": 0.5691, + "step": 40210 + }, + { + "epoch": 1.7, + "learning_rate": 0.000631584303250016, + "loss": 0.6355, + "step": 40220 + }, + { + "epoch": 1.7, + "learning_rate": 0.0006315419025207234, + "loss": 0.592, + "step": 40230 + }, + { + "epoch": 1.7, + "learning_rate": 0.0006314995017914308, + "loss": 0.6097, + "step": 40240 + }, + { + "epoch": 1.7, + "learning_rate": 0.0006314571010621383, + "loss": 0.5983, + "step": 40250 + }, + { + "epoch": 1.7, + "learning_rate": 0.0006314147003328458, + "loss": 0.673, + "step": 40260 + }, + { + "epoch": 1.7, + "learning_rate": 0.0006313722996035532, + "loss": 0.6025, + "step": 40270 + }, + { + "epoch": 1.7, + "learning_rate": 0.0006313298988742606, + "loss": 0.6121, + "step": 40280 + }, + { + "epoch": 1.7, + "learning_rate": 0.0006312874981449681, + "loss": 0.6074, + "step": 40290 + }, + { + "epoch": 1.7, + "learning_rate": 0.0006312450974156756, + "loss": 0.5227, + "step": 40300 + }, + { + "epoch": 1.7, + "learning_rate": 0.000631202696686383, + "loss": 0.5474, + "step": 40310 + }, + { + "epoch": 1.71, + "learning_rate": 0.0006311602959570904, + "loss": 0.4988, + "step": 40320 + }, + { + "epoch": 1.71, + "learning_rate": 0.000631117895227798, + "loss": 0.6253, + "step": 40330 + }, + { + "epoch": 1.71, + "learning_rate": 0.0006310754944985054, + "loss": 0.551, + "step": 40340 + }, + { + "epoch": 1.71, + "learning_rate": 0.0006310330937692128, + "loss": 0.5844, + "step": 40350 + }, + { + "epoch": 1.71, + "learning_rate": 0.0006309906930399203, + "loss": 0.6082, + "step": 40360 + }, + { + "epoch": 1.71, + "learning_rate": 0.0006309482923106278, + "loss": 0.5127, + "step": 40370 + }, + { + "epoch": 1.71, + "learning_rate": 0.0006309058915813352, + "loss": 0.5563, + "step": 40380 + }, + { + "epoch": 1.71, + "learning_rate": 0.0006308634908520427, + "loss": 0.536, + "step": 40390 + }, + { + "epoch": 1.71, + "learning_rate": 0.0006308210901227501, + "loss": 0.4597, + "step": 40400 + }, + { + "epoch": 1.71, + "learning_rate": 0.0006307786893934576, + "loss": 0.5677, + "step": 40410 + }, + { + "epoch": 1.71, + "learning_rate": 0.0006307362886641651, + "loss": 0.5443, + "step": 40420 + }, + { + "epoch": 1.71, + "learning_rate": 0.0006306938879348725, + "loss": 0.5492, + "step": 40430 + }, + { + "epoch": 1.71, + "learning_rate": 0.0006306514872055799, + "loss": 0.4875, + "step": 40440 + }, + { + "epoch": 1.71, + "learning_rate": 0.0006306090864762875, + "loss": 0.5447, + "step": 40450 + }, + { + "epoch": 1.71, + "learning_rate": 0.0006305666857469949, + "loss": 0.5852, + "step": 40460 + }, + { + "epoch": 1.71, + "learning_rate": 0.0006305242850177023, + "loss": 0.5139, + "step": 40470 + }, + { + "epoch": 1.71, + "learning_rate": 0.0006304818842884098, + "loss": 0.6172, + "step": 40480 + }, + { + "epoch": 1.71, + "learning_rate": 0.0006304394835591173, + "loss": 0.5326, + "step": 40490 + }, + { + "epoch": 1.71, + "learning_rate": 0.0006303970828298247, + "loss": 0.6218, + "step": 40500 + }, + { + "epoch": 1.71, + "learning_rate": 0.0006303546821005322, + "loss": 0.5461, + "step": 40510 + }, + { + "epoch": 1.71, + "learning_rate": 0.0006303122813712396, + "loss": 0.5841, + "step": 40520 + }, + { + "epoch": 1.71, + "learning_rate": 0.0006302698806419471, + "loss": 0.5323, + "step": 40530 + }, + { + "epoch": 1.71, + "learning_rate": 0.0006302274799126546, + "loss": 0.5947, + "step": 40540 + }, + { + "epoch": 1.71, + "learning_rate": 0.000630185079183362, + "loss": 0.5795, + "step": 40550 + }, + { + "epoch": 1.72, + "learning_rate": 0.0006301426784540694, + "loss": 0.5301, + "step": 40560 + }, + { + "epoch": 1.72, + "learning_rate": 0.000630100277724777, + "loss": 0.5347, + "step": 40570 + }, + { + "epoch": 1.72, + "learning_rate": 0.0006300578769954843, + "loss": 0.5948, + "step": 40580 + }, + { + "epoch": 1.72, + "learning_rate": 0.0006300154762661918, + "loss": 0.5919, + "step": 40590 + }, + { + "epoch": 1.72, + "learning_rate": 0.0006299730755368993, + "loss": 0.5644, + "step": 40600 + }, + { + "epoch": 1.72, + "learning_rate": 0.0006299306748076067, + "loss": 0.6178, + "step": 40610 + }, + { + "epoch": 1.72, + "learning_rate": 0.0006298882740783142, + "loss": 0.6663, + "step": 40620 + }, + { + "epoch": 1.72, + "learning_rate": 0.0006298458733490217, + "loss": 0.4909, + "step": 40630 + }, + { + "epoch": 1.72, + "learning_rate": 0.000629803472619729, + "loss": 0.4903, + "step": 40640 + }, + { + "epoch": 1.72, + "learning_rate": 0.0006297610718904366, + "loss": 0.5632, + "step": 40650 + }, + { + "epoch": 1.72, + "learning_rate": 0.000629718671161144, + "loss": 0.5755, + "step": 40660 + }, + { + "epoch": 1.72, + "learning_rate": 0.0006296762704318514, + "loss": 0.5456, + "step": 40670 + }, + { + "epoch": 1.72, + "learning_rate": 0.0006296338697025589, + "loss": 0.4702, + "step": 40680 + }, + { + "epoch": 1.72, + "learning_rate": 0.0006295914689732663, + "loss": 0.6645, + "step": 40690 + }, + { + "epoch": 1.72, + "learning_rate": 0.0006295490682439738, + "loss": 0.571, + "step": 40700 + }, + { + "epoch": 1.72, + "learning_rate": 0.0006295066675146813, + "loss": 0.6265, + "step": 40710 + }, + { + "epoch": 1.72, + "learning_rate": 0.0006294642667853887, + "loss": 0.5373, + "step": 40720 + }, + { + "epoch": 1.72, + "learning_rate": 0.0006294218660560962, + "loss": 0.5615, + "step": 40730 + }, + { + "epoch": 1.72, + "learning_rate": 0.0006293794653268037, + "loss": 0.6148, + "step": 40740 + }, + { + "epoch": 1.72, + "learning_rate": 0.0006293370645975111, + "loss": 0.5863, + "step": 40750 + }, + { + "epoch": 1.72, + "learning_rate": 0.0006292946638682185, + "loss": 0.6081, + "step": 40760 + }, + { + "epoch": 1.72, + "learning_rate": 0.000629252263138926, + "loss": 0.5869, + "step": 40770 + }, + { + "epoch": 1.72, + "learning_rate": 0.0006292098624096335, + "loss": 0.5189, + "step": 40780 + }, + { + "epoch": 1.72, + "learning_rate": 0.0006291674616803409, + "loss": 0.5895, + "step": 40790 + }, + { + "epoch": 1.73, + "learning_rate": 0.0006291250609510484, + "loss": 0.5804, + "step": 40800 + }, + { + "epoch": 1.73, + "learning_rate": 0.0006290826602217558, + "loss": 0.652, + "step": 40810 + }, + { + "epoch": 1.73, + "learning_rate": 0.0006290402594924633, + "loss": 0.6245, + "step": 40820 + }, + { + "epoch": 1.73, + "learning_rate": 0.0006289978587631708, + "loss": 0.5259, + "step": 40830 + }, + { + "epoch": 1.73, + "learning_rate": 0.0006289554580338782, + "loss": 0.6479, + "step": 40840 + }, + { + "epoch": 1.73, + "learning_rate": 0.0006289130573045856, + "loss": 0.6396, + "step": 40850 + }, + { + "epoch": 1.73, + "learning_rate": 0.0006288706565752932, + "loss": 0.6402, + "step": 40860 + }, + { + "epoch": 1.73, + "learning_rate": 0.0006288282558460006, + "loss": 0.5821, + "step": 40870 + }, + { + "epoch": 1.73, + "learning_rate": 0.000628785855116708, + "loss": 0.5575, + "step": 40880 + }, + { + "epoch": 1.73, + "learning_rate": 0.0006287434543874155, + "loss": 0.5848, + "step": 40890 + }, + { + "epoch": 1.73, + "learning_rate": 0.0006287010536581229, + "loss": 0.5589, + "step": 40900 + }, + { + "epoch": 1.73, + "learning_rate": 0.0006286586529288304, + "loss": 0.5364, + "step": 40910 + }, + { + "epoch": 1.73, + "learning_rate": 0.0006286162521995379, + "loss": 0.5688, + "step": 40920 + }, + { + "epoch": 1.73, + "learning_rate": 0.0006285738514702453, + "loss": 0.4957, + "step": 40930 + }, + { + "epoch": 1.73, + "learning_rate": 0.0006285314507409528, + "loss": 0.6339, + "step": 40940 + }, + { + "epoch": 1.73, + "learning_rate": 0.0006284890500116603, + "loss": 0.6041, + "step": 40950 + }, + { + "epoch": 1.73, + "learning_rate": 0.0006284466492823676, + "loss": 0.6032, + "step": 40960 + }, + { + "epoch": 1.73, + "learning_rate": 0.0006284042485530751, + "loss": 0.581, + "step": 40970 + }, + { + "epoch": 1.73, + "learning_rate": 0.0006283618478237827, + "loss": 0.6911, + "step": 40980 + }, + { + "epoch": 1.73, + "learning_rate": 0.00062831944709449, + "loss": 0.6301, + "step": 40990 + }, + { + "epoch": 1.73, + "learning_rate": 0.0006282770463651975, + "loss": 0.6087, + "step": 41000 + }, + { + "epoch": 1.73, + "learning_rate": 0.000628234645635905, + "loss": 0.5661, + "step": 41010 + }, + { + "epoch": 1.73, + "learning_rate": 0.0006281922449066124, + "loss": 0.6667, + "step": 41020 + }, + { + "epoch": 1.74, + "learning_rate": 0.0006281498441773199, + "loss": 0.655, + "step": 41030 + }, + { + "epoch": 1.74, + "learning_rate": 0.0006281074434480274, + "loss": 0.5581, + "step": 41040 + }, + { + "epoch": 1.74, + "learning_rate": 0.0006280650427187347, + "loss": 0.6045, + "step": 41050 + }, + { + "epoch": 1.74, + "learning_rate": 0.0006280226419894423, + "loss": 0.5529, + "step": 41060 + }, + { + "epoch": 1.74, + "learning_rate": 0.0006279802412601498, + "loss": 0.6128, + "step": 41070 + }, + { + "epoch": 1.74, + "learning_rate": 0.0006279378405308571, + "loss": 0.5946, + "step": 41080 + }, + { + "epoch": 1.74, + "learning_rate": 0.0006278954398015646, + "loss": 0.5492, + "step": 41090 + }, + { + "epoch": 1.74, + "learning_rate": 0.0006278530390722722, + "loss": 0.5525, + "step": 41100 + }, + { + "epoch": 1.74, + "learning_rate": 0.0006278106383429795, + "loss": 0.522, + "step": 41110 + }, + { + "epoch": 1.74, + "learning_rate": 0.000627768237613687, + "loss": 0.6424, + "step": 41120 + }, + { + "epoch": 1.74, + "learning_rate": 0.0006277258368843945, + "loss": 0.5707, + "step": 41130 + }, + { + "epoch": 1.74, + "learning_rate": 0.0006276834361551019, + "loss": 0.6055, + "step": 41140 + }, + { + "epoch": 1.74, + "learning_rate": 0.0006276410354258094, + "loss": 0.5524, + "step": 41150 + }, + { + "epoch": 1.74, + "learning_rate": 0.0006275986346965169, + "loss": 0.6014, + "step": 41160 + }, + { + "epoch": 1.74, + "learning_rate": 0.0006275562339672242, + "loss": 0.5136, + "step": 41170 + }, + { + "epoch": 1.74, + "learning_rate": 0.0006275138332379318, + "loss": 0.5601, + "step": 41180 + }, + { + "epoch": 1.74, + "learning_rate": 0.0006274714325086391, + "loss": 0.649, + "step": 41190 + }, + { + "epoch": 1.74, + "learning_rate": 0.0006274290317793466, + "loss": 0.5739, + "step": 41200 + }, + { + "epoch": 1.74, + "learning_rate": 0.0006273866310500541, + "loss": 0.4601, + "step": 41210 + }, + { + "epoch": 1.74, + "learning_rate": 0.0006273442303207615, + "loss": 0.6124, + "step": 41220 + }, + { + "epoch": 1.74, + "learning_rate": 0.000627301829591469, + "loss": 0.5127, + "step": 41230 + }, + { + "epoch": 1.74, + "learning_rate": 0.0006272594288621765, + "loss": 0.6041, + "step": 41240 + }, + { + "epoch": 1.74, + "learning_rate": 0.0006272170281328838, + "loss": 0.5507, + "step": 41250 + }, + { + "epoch": 1.74, + "learning_rate": 0.0006271746274035914, + "loss": 0.6215, + "step": 41260 + }, + { + "epoch": 1.75, + "learning_rate": 0.0006271322266742989, + "loss": 0.5771, + "step": 41270 + }, + { + "epoch": 1.75, + "learning_rate": 0.0006270898259450062, + "loss": 0.5565, + "step": 41280 + }, + { + "epoch": 1.75, + "learning_rate": 0.0006270474252157137, + "loss": 0.5613, + "step": 41290 + }, + { + "epoch": 1.75, + "learning_rate": 0.0006270050244864212, + "loss": 0.5661, + "step": 41300 + }, + { + "epoch": 1.75, + "learning_rate": 0.0006269626237571286, + "loss": 0.5549, + "step": 41310 + }, + { + "epoch": 1.75, + "learning_rate": 0.0006269202230278361, + "loss": 0.5698, + "step": 41320 + }, + { + "epoch": 1.75, + "learning_rate": 0.0006268778222985436, + "loss": 0.6032, + "step": 41330 + }, + { + "epoch": 1.75, + "learning_rate": 0.000626835421569251, + "loss": 0.5016, + "step": 41340 + }, + { + "epoch": 1.75, + "learning_rate": 0.0006267930208399585, + "loss": 0.65, + "step": 41350 + }, + { + "epoch": 1.75, + "learning_rate": 0.000626750620110666, + "loss": 0.5221, + "step": 41360 + }, + { + "epoch": 1.75, + "learning_rate": 0.0006267082193813733, + "loss": 0.595, + "step": 41370 + }, + { + "epoch": 1.75, + "learning_rate": 0.0006266658186520809, + "loss": 0.4469, + "step": 41380 + }, + { + "epoch": 1.75, + "learning_rate": 0.0006266234179227884, + "loss": 0.4944, + "step": 41390 + }, + { + "epoch": 1.75, + "learning_rate": 0.0006265810171934957, + "loss": 0.5513, + "step": 41400 + }, + { + "epoch": 1.75, + "learning_rate": 0.0006265386164642032, + "loss": 0.6646, + "step": 41410 + }, + { + "epoch": 1.75, + "learning_rate": 0.0006264962157349107, + "loss": 0.5601, + "step": 41420 + }, + { + "epoch": 1.75, + "learning_rate": 0.0006264538150056181, + "loss": 0.5495, + "step": 41430 + }, + { + "epoch": 1.75, + "learning_rate": 0.0006264114142763256, + "loss": 0.5021, + "step": 41440 + }, + { + "epoch": 1.75, + "learning_rate": 0.0006263690135470331, + "loss": 0.589, + "step": 41450 + }, + { + "epoch": 1.75, + "learning_rate": 0.0006263266128177405, + "loss": 0.6017, + "step": 41460 + }, + { + "epoch": 1.75, + "learning_rate": 0.000626284212088448, + "loss": 0.6146, + "step": 41470 + }, + { + "epoch": 1.75, + "learning_rate": 0.0006262418113591555, + "loss": 0.6958, + "step": 41480 + }, + { + "epoch": 1.75, + "learning_rate": 0.0006261994106298628, + "loss": 0.6282, + "step": 41490 + }, + { + "epoch": 1.75, + "learning_rate": 0.0006261570099005703, + "loss": 0.5522, + "step": 41500 + }, + { + "epoch": 1.76, + "learning_rate": 0.0006261146091712778, + "loss": 0.6089, + "step": 41510 + }, + { + "epoch": 1.76, + "learning_rate": 0.0006260722084419852, + "loss": 0.565, + "step": 41520 + }, + { + "epoch": 1.76, + "learning_rate": 0.0006260298077126927, + "loss": 0.5942, + "step": 41530 + }, + { + "epoch": 1.76, + "learning_rate": 0.0006259874069834002, + "loss": 0.5994, + "step": 41540 + }, + { + "epoch": 1.76, + "learning_rate": 0.0006259450062541076, + "loss": 0.5144, + "step": 41550 + }, + { + "epoch": 1.76, + "learning_rate": 0.0006259026055248151, + "loss": 0.5488, + "step": 41560 + }, + { + "epoch": 1.76, + "learning_rate": 0.0006258602047955224, + "loss": 0.5206, + "step": 41570 + }, + { + "epoch": 1.76, + "learning_rate": 0.0006258178040662299, + "loss": 0.672, + "step": 41580 + }, + { + "epoch": 1.76, + "learning_rate": 0.0006257754033369375, + "loss": 0.5341, + "step": 41590 + }, + { + "epoch": 1.76, + "learning_rate": 0.0006257330026076448, + "loss": 0.5835, + "step": 41600 + }, + { + "epoch": 1.76, + "learning_rate": 0.0006256906018783523, + "loss": 0.6827, + "step": 41610 + }, + { + "epoch": 1.76, + "learning_rate": 0.0006256482011490598, + "loss": 0.5773, + "step": 41620 + }, + { + "epoch": 1.76, + "learning_rate": 0.0006256058004197672, + "loss": 0.563, + "step": 41630 + }, + { + "epoch": 1.76, + "learning_rate": 0.0006255633996904747, + "loss": 0.4976, + "step": 41640 + }, + { + "epoch": 1.76, + "learning_rate": 0.0006255209989611822, + "loss": 0.534, + "step": 41650 + }, + { + "epoch": 1.76, + "learning_rate": 0.0006254785982318895, + "loss": 0.5606, + "step": 41660 + }, + { + "epoch": 1.76, + "learning_rate": 0.0006254361975025971, + "loss": 0.5169, + "step": 41670 + }, + { + "epoch": 1.76, + "learning_rate": 0.0006253937967733046, + "loss": 0.549, + "step": 41680 + }, + { + "epoch": 1.76, + "learning_rate": 0.0006253513960440119, + "loss": 0.7435, + "step": 41690 + }, + { + "epoch": 1.76, + "learning_rate": 0.0006253089953147194, + "loss": 0.5547, + "step": 41700 + }, + { + "epoch": 1.76, + "learning_rate": 0.000625266594585427, + "loss": 0.6174, + "step": 41710 + }, + { + "epoch": 1.76, + "learning_rate": 0.0006252241938561343, + "loss": 0.5857, + "step": 41720 + }, + { + "epoch": 1.76, + "learning_rate": 0.0006251817931268418, + "loss": 0.5375, + "step": 41730 + }, + { + "epoch": 1.77, + "learning_rate": 0.0006251393923975493, + "loss": 0.6071, + "step": 41740 + }, + { + "epoch": 1.77, + "learning_rate": 0.0006250969916682567, + "loss": 0.5182, + "step": 41750 + }, + { + "epoch": 1.77, + "learning_rate": 0.0006250545909389642, + "loss": 0.6664, + "step": 41760 + }, + { + "epoch": 1.77, + "learning_rate": 0.0006250121902096717, + "loss": 0.6367, + "step": 41770 + }, + { + "epoch": 1.77, + "learning_rate": 0.000624969789480379, + "loss": 0.5673, + "step": 41780 + }, + { + "epoch": 1.77, + "learning_rate": 0.0006249273887510866, + "loss": 0.7235, + "step": 41790 + }, + { + "epoch": 1.77, + "learning_rate": 0.0006248849880217941, + "loss": 0.5942, + "step": 41800 + }, + { + "epoch": 1.77, + "learning_rate": 0.0006248425872925014, + "loss": 0.641, + "step": 41810 + }, + { + "epoch": 1.77, + "learning_rate": 0.0006248001865632089, + "loss": 0.6033, + "step": 41820 + }, + { + "epoch": 1.77, + "learning_rate": 0.0006247577858339164, + "loss": 0.6109, + "step": 41830 + }, + { + "epoch": 1.77, + "learning_rate": 0.0006247153851046238, + "loss": 0.6247, + "step": 41840 + }, + { + "epoch": 1.77, + "learning_rate": 0.0006246729843753313, + "loss": 0.5922, + "step": 41850 + }, + { + "epoch": 1.77, + "learning_rate": 0.0006246305836460387, + "loss": 0.6587, + "step": 41860 + }, + { + "epoch": 1.77, + "learning_rate": 0.0006245881829167462, + "loss": 0.5594, + "step": 41870 + }, + { + "epoch": 1.77, + "learning_rate": 0.0006245457821874537, + "loss": 0.7009, + "step": 41880 + }, + { + "epoch": 1.77, + "learning_rate": 0.0006245033814581611, + "loss": 0.6113, + "step": 41890 + }, + { + "epoch": 1.77, + "learning_rate": 0.0006244609807288685, + "loss": 0.4689, + "step": 41900 + }, + { + "epoch": 1.77, + "learning_rate": 0.0006244185799995761, + "loss": 0.4873, + "step": 41910 + }, + { + "epoch": 1.77, + "learning_rate": 0.0006243761792702835, + "loss": 0.5575, + "step": 41920 + }, + { + "epoch": 1.77, + "learning_rate": 0.0006243337785409909, + "loss": 0.5808, + "step": 41930 + }, + { + "epoch": 1.77, + "learning_rate": 0.0006242913778116984, + "loss": 0.5909, + "step": 41940 + }, + { + "epoch": 1.77, + "learning_rate": 0.0006242489770824059, + "loss": 0.5974, + "step": 41950 + }, + { + "epoch": 1.77, + "learning_rate": 0.0006242065763531133, + "loss": 0.6776, + "step": 41960 + }, + { + "epoch": 1.77, + "learning_rate": 0.0006241641756238208, + "loss": 0.5409, + "step": 41970 + }, + { + "epoch": 1.78, + "learning_rate": 0.0006241217748945282, + "loss": 0.5944, + "step": 41980 + }, + { + "epoch": 1.78, + "learning_rate": 0.0006240793741652357, + "loss": 0.5664, + "step": 41990 + }, + { + "epoch": 1.78, + "learning_rate": 0.0006240369734359432, + "loss": 0.621, + "step": 42000 + }, + { + "epoch": 1.78, + "learning_rate": 0.0006239945727066506, + "loss": 0.6263, + "step": 42010 + }, + { + "epoch": 1.78, + "learning_rate": 0.000623952171977358, + "loss": 0.5701, + "step": 42020 + }, + { + "epoch": 1.78, + "learning_rate": 0.0006239097712480655, + "loss": 0.5263, + "step": 42030 + }, + { + "epoch": 1.78, + "learning_rate": 0.000623867370518773, + "loss": 0.5343, + "step": 42040 + }, + { + "epoch": 1.78, + "learning_rate": 0.0006238249697894804, + "loss": 0.7297, + "step": 42050 + }, + { + "epoch": 1.78, + "learning_rate": 0.0006237825690601879, + "loss": 0.6225, + "step": 42060 + }, + { + "epoch": 1.78, + "learning_rate": 0.0006237401683308954, + "loss": 0.6182, + "step": 42070 + }, + { + "epoch": 1.78, + "learning_rate": 0.0006236977676016028, + "loss": 0.6286, + "step": 42080 + }, + { + "epoch": 1.78, + "learning_rate": 0.0006236553668723103, + "loss": 0.5204, + "step": 42090 + }, + { + "epoch": 1.78, + "learning_rate": 0.0006236129661430176, + "loss": 0.5539, + "step": 42100 + }, + { + "epoch": 1.78, + "learning_rate": 0.0006235705654137251, + "loss": 0.6387, + "step": 42110 + }, + { + "epoch": 1.78, + "learning_rate": 0.0006235281646844326, + "loss": 0.6853, + "step": 42120 + }, + { + "epoch": 1.78, + "learning_rate": 0.00062348576395514, + "loss": 0.5343, + "step": 42130 + }, + { + "epoch": 1.78, + "learning_rate": 0.0006234433632258475, + "loss": 0.5768, + "step": 42140 + }, + { + "epoch": 1.78, + "learning_rate": 0.000623400962496555, + "loss": 0.682, + "step": 42150 + }, + { + "epoch": 1.78, + "learning_rate": 0.0006233585617672624, + "loss": 0.6244, + "step": 42160 + }, + { + "epoch": 1.78, + "learning_rate": 0.0006233161610379699, + "loss": 0.5599, + "step": 42170 + }, + { + "epoch": 1.78, + "learning_rate": 0.0006232737603086773, + "loss": 0.5796, + "step": 42180 + }, + { + "epoch": 1.78, + "learning_rate": 0.0006232313595793847, + "loss": 0.5682, + "step": 42190 + }, + { + "epoch": 1.78, + "learning_rate": 0.0006231889588500923, + "loss": 0.5569, + "step": 42200 + }, + { + "epoch": 1.79, + "learning_rate": 0.0006231465581207997, + "loss": 0.5363, + "step": 42210 + }, + { + "epoch": 1.79, + "learning_rate": 0.0006231041573915071, + "loss": 0.6217, + "step": 42220 + }, + { + "epoch": 1.79, + "learning_rate": 0.0006230617566622146, + "loss": 0.5763, + "step": 42230 + }, + { + "epoch": 1.79, + "learning_rate": 0.0006230193559329221, + "loss": 0.6068, + "step": 42240 + }, + { + "epoch": 1.79, + "learning_rate": 0.0006229769552036295, + "loss": 0.5501, + "step": 42250 + }, + { + "epoch": 1.79, + "learning_rate": 0.000622934554474337, + "loss": 0.6336, + "step": 42260 + }, + { + "epoch": 1.79, + "learning_rate": 0.0006228921537450444, + "loss": 0.5064, + "step": 42270 + }, + { + "epoch": 1.79, + "learning_rate": 0.0006228497530157519, + "loss": 0.535, + "step": 42280 + }, + { + "epoch": 1.79, + "learning_rate": 0.0006228073522864594, + "loss": 0.6124, + "step": 42290 + }, + { + "epoch": 1.79, + "learning_rate": 0.0006227649515571668, + "loss": 0.5892, + "step": 42300 + }, + { + "epoch": 1.79, + "learning_rate": 0.0006227225508278742, + "loss": 0.5643, + "step": 42310 + }, + { + "epoch": 1.79, + "learning_rate": 0.0006226801500985818, + "loss": 0.628, + "step": 42320 + }, + { + "epoch": 1.79, + "learning_rate": 0.0006226377493692892, + "loss": 0.6752, + "step": 42330 + }, + { + "epoch": 1.79, + "learning_rate": 0.0006225953486399966, + "loss": 0.6427, + "step": 42340 + }, + { + "epoch": 1.79, + "learning_rate": 0.0006225529479107041, + "loss": 0.6457, + "step": 42350 + }, + { + "epoch": 1.79, + "learning_rate": 0.0006225105471814116, + "loss": 0.6105, + "step": 42360 + }, + { + "epoch": 1.79, + "learning_rate": 0.000622468146452119, + "loss": 0.6257, + "step": 42370 + }, + { + "epoch": 1.79, + "learning_rate": 0.0006224257457228265, + "loss": 0.5828, + "step": 42380 + }, + { + "epoch": 1.79, + "learning_rate": 0.0006223833449935339, + "loss": 0.6686, + "step": 42390 + }, + { + "epoch": 1.79, + "learning_rate": 0.0006223409442642414, + "loss": 0.5731, + "step": 42400 + }, + { + "epoch": 1.79, + "learning_rate": 0.0006222985435349489, + "loss": 0.6268, + "step": 42410 + }, + { + "epoch": 1.79, + "learning_rate": 0.0006222561428056563, + "loss": 0.5006, + "step": 42420 + }, + { + "epoch": 1.79, + "learning_rate": 0.0006222137420763637, + "loss": 0.5582, + "step": 42430 + }, + { + "epoch": 1.79, + "learning_rate": 0.0006221713413470712, + "loss": 0.4704, + "step": 42440 + }, + { + "epoch": 1.8, + "learning_rate": 0.0006221289406177787, + "loss": 0.6218, + "step": 42450 + }, + { + "epoch": 1.8, + "learning_rate": 0.0006220865398884861, + "loss": 0.6605, + "step": 42460 + }, + { + "epoch": 1.8, + "learning_rate": 0.0006220441391591935, + "loss": 0.5671, + "step": 42470 + }, + { + "epoch": 1.8, + "learning_rate": 0.0006220017384299011, + "loss": 0.6484, + "step": 42480 + }, + { + "epoch": 1.8, + "learning_rate": 0.0006219593377006085, + "loss": 0.503, + "step": 42490 + }, + { + "epoch": 1.8, + "learning_rate": 0.0006219169369713159, + "loss": 0.5798, + "step": 42500 + }, + { + "epoch": 1.8, + "learning_rate": 0.0006218745362420233, + "loss": 0.5666, + "step": 42510 + }, + { + "epoch": 1.8, + "learning_rate": 0.0006218321355127309, + "loss": 0.618, + "step": 42520 + }, + { + "epoch": 1.8, + "learning_rate": 0.0006217897347834383, + "loss": 0.7225, + "step": 42530 + }, + { + "epoch": 1.8, + "learning_rate": 0.0006217473340541457, + "loss": 0.5401, + "step": 42540 + }, + { + "epoch": 1.8, + "learning_rate": 0.0006217049333248532, + "loss": 0.6292, + "step": 42550 + }, + { + "epoch": 1.8, + "learning_rate": 0.0006216625325955607, + "loss": 0.5774, + "step": 42560 + }, + { + "epoch": 1.8, + "learning_rate": 0.0006216201318662682, + "loss": 0.5775, + "step": 42570 + }, + { + "epoch": 1.8, + "learning_rate": 0.0006215777311369756, + "loss": 0.6216, + "step": 42580 + }, + { + "epoch": 1.8, + "learning_rate": 0.000621535330407683, + "loss": 0.6002, + "step": 42590 + }, + { + "epoch": 1.8, + "learning_rate": 0.0006214929296783906, + "loss": 0.6121, + "step": 42600 + }, + { + "epoch": 1.8, + "learning_rate": 0.000621450528949098, + "loss": 0.6587, + "step": 42610 + }, + { + "epoch": 1.8, + "learning_rate": 0.0006214081282198054, + "loss": 0.4774, + "step": 42620 + }, + { + "epoch": 1.8, + "learning_rate": 0.0006213657274905128, + "loss": 0.5954, + "step": 42630 + }, + { + "epoch": 1.8, + "learning_rate": 0.0006213233267612203, + "loss": 0.546, + "step": 42640 + }, + { + "epoch": 1.8, + "learning_rate": 0.0006212809260319278, + "loss": 0.6274, + "step": 42650 + }, + { + "epoch": 1.8, + "learning_rate": 0.0006212385253026352, + "loss": 0.549, + "step": 42660 + }, + { + "epoch": 1.8, + "learning_rate": 0.0006211961245733427, + "loss": 0.5217, + "step": 42670 + }, + { + "epoch": 1.8, + "learning_rate": 0.0006211537238440502, + "loss": 0.6003, + "step": 42680 + }, + { + "epoch": 1.81, + "learning_rate": 0.0006211113231147576, + "loss": 0.5325, + "step": 42690 + }, + { + "epoch": 1.81, + "learning_rate": 0.0006210689223854651, + "loss": 0.5656, + "step": 42700 + }, + { + "epoch": 1.81, + "learning_rate": 0.0006210265216561725, + "loss": 0.5843, + "step": 42710 + }, + { + "epoch": 1.81, + "learning_rate": 0.0006209841209268799, + "loss": 0.5326, + "step": 42720 + }, + { + "epoch": 1.81, + "learning_rate": 0.0006209417201975874, + "loss": 0.6718, + "step": 42730 + }, + { + "epoch": 1.81, + "learning_rate": 0.0006208993194682949, + "loss": 0.5997, + "step": 42740 + }, + { + "epoch": 1.81, + "learning_rate": 0.0006208569187390023, + "loss": 0.555, + "step": 42750 + }, + { + "epoch": 1.81, + "learning_rate": 0.0006208145180097098, + "loss": 0.5723, + "step": 42760 + }, + { + "epoch": 1.81, + "learning_rate": 0.0006207721172804173, + "loss": 0.7162, + "step": 42770 + }, + { + "epoch": 1.81, + "learning_rate": 0.0006207297165511247, + "loss": 0.6034, + "step": 42780 + }, + { + "epoch": 1.81, + "learning_rate": 0.0006206873158218321, + "loss": 0.4909, + "step": 42790 + }, + { + "epoch": 1.81, + "learning_rate": 0.0006206449150925396, + "loss": 0.6001, + "step": 42800 + }, + { + "epoch": 1.81, + "learning_rate": 0.0006206025143632471, + "loss": 0.5577, + "step": 42810 + }, + { + "epoch": 1.81, + "learning_rate": 0.0006205601136339545, + "loss": 0.5302, + "step": 42820 + }, + { + "epoch": 1.81, + "learning_rate": 0.000620517712904662, + "loss": 0.7032, + "step": 42830 + }, + { + "epoch": 1.81, + "learning_rate": 0.0006204753121753694, + "loss": 0.6105, + "step": 42840 + }, + { + "epoch": 1.81, + "learning_rate": 0.0006204329114460769, + "loss": 0.5439, + "step": 42850 + }, + { + "epoch": 1.81, + "learning_rate": 0.0006203905107167844, + "loss": 0.6147, + "step": 42860 + }, + { + "epoch": 1.81, + "learning_rate": 0.0006203481099874918, + "loss": 0.6507, + "step": 42870 + }, + { + "epoch": 1.81, + "learning_rate": 0.0006203057092581992, + "loss": 0.537, + "step": 42880 + }, + { + "epoch": 1.81, + "learning_rate": 0.0006202633085289068, + "loss": 0.5432, + "step": 42890 + }, + { + "epoch": 1.81, + "learning_rate": 0.0006202209077996142, + "loss": 0.565, + "step": 42900 + }, + { + "epoch": 1.81, + "learning_rate": 0.0006201785070703216, + "loss": 0.5427, + "step": 42910 + }, + { + "epoch": 1.82, + "learning_rate": 0.0006201361063410291, + "loss": 0.5936, + "step": 42920 + }, + { + "epoch": 1.82, + "learning_rate": 0.0006200937056117366, + "loss": 0.64, + "step": 42930 + }, + { + "epoch": 1.82, + "learning_rate": 0.000620051304882444, + "loss": 0.5198, + "step": 42940 + }, + { + "epoch": 1.82, + "learning_rate": 0.0006200089041531515, + "loss": 0.5847, + "step": 42950 + }, + { + "epoch": 1.82, + "learning_rate": 0.0006199665034238589, + "loss": 0.5382, + "step": 42960 + }, + { + "epoch": 1.82, + "learning_rate": 0.0006199241026945664, + "loss": 0.5916, + "step": 42970 + }, + { + "epoch": 1.82, + "learning_rate": 0.0006198817019652739, + "loss": 0.609, + "step": 42980 + }, + { + "epoch": 1.82, + "learning_rate": 0.0006198393012359813, + "loss": 0.6113, + "step": 42990 + }, + { + "epoch": 1.82, + "learning_rate": 0.0006197969005066887, + "loss": 0.6072, + "step": 43000 + }, + { + "epoch": 1.82, + "learning_rate": 0.0006197544997773963, + "loss": 0.6521, + "step": 43010 + }, + { + "epoch": 1.82, + "learning_rate": 0.0006197120990481037, + "loss": 0.6341, + "step": 43020 + }, + { + "epoch": 1.82, + "learning_rate": 0.0006196696983188111, + "loss": 0.5327, + "step": 43030 + }, + { + "epoch": 1.82, + "learning_rate": 0.0006196272975895185, + "loss": 0.5515, + "step": 43040 + }, + { + "epoch": 1.82, + "learning_rate": 0.000619584896860226, + "loss": 0.536, + "step": 43050 + }, + { + "epoch": 1.82, + "learning_rate": 0.0006195424961309335, + "loss": 0.6058, + "step": 43060 + }, + { + "epoch": 1.82, + "learning_rate": 0.000619500095401641, + "loss": 0.6576, + "step": 43070 + }, + { + "epoch": 1.82, + "learning_rate": 0.0006194576946723483, + "loss": 0.6169, + "step": 43080 + }, + { + "epoch": 1.82, + "learning_rate": 0.0006194152939430559, + "loss": 0.5565, + "step": 43090 + }, + { + "epoch": 1.82, + "learning_rate": 0.0006193728932137633, + "loss": 0.5909, + "step": 43100 + }, + { + "epoch": 1.82, + "learning_rate": 0.0006193304924844707, + "loss": 0.6015, + "step": 43110 + }, + { + "epoch": 1.82, + "learning_rate": 0.0006192880917551782, + "loss": 0.5745, + "step": 43120 + }, + { + "epoch": 1.82, + "learning_rate": 0.0006192456910258857, + "loss": 0.6315, + "step": 43130 + }, + { + "epoch": 1.82, + "learning_rate": 0.0006192032902965931, + "loss": 0.5972, + "step": 43140 + }, + { + "epoch": 1.82, + "learning_rate": 0.0006191608895673006, + "loss": 0.5302, + "step": 43150 + }, + { + "epoch": 1.83, + "learning_rate": 0.000619118488838008, + "loss": 0.5799, + "step": 43160 + }, + { + "epoch": 1.83, + "learning_rate": 0.0006190760881087155, + "loss": 0.5787, + "step": 43170 + }, + { + "epoch": 1.83, + "learning_rate": 0.000619033687379423, + "loss": 0.6096, + "step": 43180 + }, + { + "epoch": 1.83, + "learning_rate": 0.0006189912866501304, + "loss": 0.6091, + "step": 43190 + }, + { + "epoch": 1.83, + "learning_rate": 0.0006189488859208378, + "loss": 0.4776, + "step": 43200 + }, + { + "epoch": 1.83, + "learning_rate": 0.0006189064851915454, + "loss": 0.647, + "step": 43210 + }, + { + "epoch": 1.83, + "learning_rate": 0.0006188640844622528, + "loss": 0.5232, + "step": 43220 + }, + { + "epoch": 1.83, + "learning_rate": 0.0006188216837329602, + "loss": 0.5694, + "step": 43230 + }, + { + "epoch": 1.83, + "learning_rate": 0.0006187792830036677, + "loss": 0.7137, + "step": 43240 + }, + { + "epoch": 1.83, + "learning_rate": 0.0006187368822743751, + "loss": 0.6293, + "step": 43250 + }, + { + "epoch": 1.83, + "learning_rate": 0.0006186944815450826, + "loss": 0.5722, + "step": 43260 + }, + { + "epoch": 1.83, + "learning_rate": 0.0006186520808157901, + "loss": 0.5468, + "step": 43270 + }, + { + "epoch": 1.83, + "learning_rate": 0.0006186096800864975, + "loss": 0.6743, + "step": 43280 + }, + { + "epoch": 1.83, + "learning_rate": 0.000618567279357205, + "loss": 0.5774, + "step": 43290 + }, + { + "epoch": 1.83, + "learning_rate": 0.0006185248786279125, + "loss": 0.665, + "step": 43300 + }, + { + "epoch": 1.83, + "learning_rate": 0.0006184824778986199, + "loss": 0.6315, + "step": 43310 + }, + { + "epoch": 1.83, + "learning_rate": 0.0006184400771693273, + "loss": 0.4686, + "step": 43320 + }, + { + "epoch": 1.83, + "learning_rate": 0.0006183976764400348, + "loss": 0.6186, + "step": 43330 + }, + { + "epoch": 1.83, + "learning_rate": 0.0006183552757107422, + "loss": 0.5621, + "step": 43340 + }, + { + "epoch": 1.83, + "learning_rate": 0.0006183128749814497, + "loss": 0.5727, + "step": 43350 + }, + { + "epoch": 1.83, + "learning_rate": 0.0006182704742521572, + "loss": 0.5476, + "step": 43360 + }, + { + "epoch": 1.83, + "learning_rate": 0.0006182280735228646, + "loss": 0.5919, + "step": 43370 + }, + { + "epoch": 1.83, + "learning_rate": 0.0006181856727935721, + "loss": 0.6658, + "step": 43380 + }, + { + "epoch": 1.83, + "learning_rate": 0.0006181432720642796, + "loss": 0.5833, + "step": 43390 + }, + { + "epoch": 1.84, + "learning_rate": 0.0006181008713349869, + "loss": 0.5359, + "step": 43400 + }, + { + "epoch": 1.84, + "learning_rate": 0.0006180584706056945, + "loss": 0.6815, + "step": 43410 + }, + { + "epoch": 1.84, + "learning_rate": 0.000618016069876402, + "loss": 0.6158, + "step": 43420 + }, + { + "epoch": 1.84, + "learning_rate": 0.0006179736691471093, + "loss": 0.5931, + "step": 43430 + }, + { + "epoch": 1.84, + "learning_rate": 0.0006179312684178168, + "loss": 0.5563, + "step": 43440 + }, + { + "epoch": 1.84, + "learning_rate": 0.0006178888676885243, + "loss": 0.6955, + "step": 43450 + }, + { + "epoch": 1.84, + "learning_rate": 0.0006178464669592317, + "loss": 0.5506, + "step": 43460 + }, + { + "epoch": 1.84, + "learning_rate": 0.0006178040662299392, + "loss": 0.5916, + "step": 43470 + }, + { + "epoch": 1.84, + "learning_rate": 0.0006177616655006467, + "loss": 0.5318, + "step": 43480 + }, + { + "epoch": 1.84, + "learning_rate": 0.0006177192647713541, + "loss": 0.5677, + "step": 43490 + }, + { + "epoch": 1.84, + "learning_rate": 0.0006176768640420616, + "loss": 0.5753, + "step": 43500 + }, + { + "epoch": 1.84, + "learning_rate": 0.000617634463312769, + "loss": 0.4892, + "step": 43510 + }, + { + "epoch": 1.84, + "learning_rate": 0.0006175920625834764, + "loss": 0.5645, + "step": 43520 + }, + { + "epoch": 1.84, + "learning_rate": 0.0006175496618541839, + "loss": 0.6074, + "step": 43530 + }, + { + "epoch": 1.84, + "learning_rate": 0.0006175072611248915, + "loss": 0.5384, + "step": 43540 + }, + { + "epoch": 1.84, + "learning_rate": 0.0006174648603955988, + "loss": 0.5266, + "step": 43550 + }, + { + "epoch": 1.84, + "learning_rate": 0.0006174224596663063, + "loss": 0.5903, + "step": 43560 + }, + { + "epoch": 1.84, + "learning_rate": 0.0006173800589370137, + "loss": 0.6302, + "step": 43570 + }, + { + "epoch": 1.84, + "learning_rate": 0.0006173376582077212, + "loss": 0.667, + "step": 43580 + }, + { + "epoch": 1.84, + "learning_rate": 0.0006172952574784287, + "loss": 0.6498, + "step": 43590 + }, + { + "epoch": 1.84, + "learning_rate": 0.0006172528567491361, + "loss": 0.5569, + "step": 43600 + }, + { + "epoch": 1.84, + "learning_rate": 0.0006172104560198435, + "loss": 0.5859, + "step": 43610 + }, + { + "epoch": 1.84, + "learning_rate": 0.0006171680552905511, + "loss": 0.5957, + "step": 43620 + }, + { + "epoch": 1.85, + "learning_rate": 0.0006171256545612585, + "loss": 0.5008, + "step": 43630 + }, + { + "epoch": 1.85, + "learning_rate": 0.0006170832538319659, + "loss": 0.5327, + "step": 43640 + }, + { + "epoch": 1.85, + "learning_rate": 0.0006170408531026734, + "loss": 0.7035, + "step": 43650 + }, + { + "epoch": 1.85, + "learning_rate": 0.0006169984523733808, + "loss": 0.7324, + "step": 43660 + }, + { + "epoch": 1.85, + "learning_rate": 0.0006169560516440883, + "loss": 0.6243, + "step": 43670 + }, + { + "epoch": 1.85, + "learning_rate": 0.0006169136509147958, + "loss": 0.7245, + "step": 43680 + }, + { + "epoch": 1.85, + "learning_rate": 0.0006168712501855031, + "loss": 0.6368, + "step": 43690 + }, + { + "epoch": 1.85, + "learning_rate": 0.0006168288494562107, + "loss": 0.5457, + "step": 43700 + }, + { + "epoch": 1.85, + "learning_rate": 0.0006167864487269182, + "loss": 0.5525, + "step": 43710 + }, + { + "epoch": 1.85, + "learning_rate": 0.0006167440479976255, + "loss": 0.5622, + "step": 43720 + }, + { + "epoch": 1.85, + "learning_rate": 0.000616701647268333, + "loss": 0.5707, + "step": 43730 + }, + { + "epoch": 1.85, + "learning_rate": 0.0006166592465390406, + "loss": 0.6482, + "step": 43740 + }, + { + "epoch": 1.85, + "learning_rate": 0.0006166168458097479, + "loss": 0.5924, + "step": 43750 + }, + { + "epoch": 1.85, + "learning_rate": 0.0006165744450804554, + "loss": 0.6356, + "step": 43760 + }, + { + "epoch": 1.85, + "learning_rate": 0.0006165320443511629, + "loss": 0.7152, + "step": 43770 + }, + { + "epoch": 1.85, + "learning_rate": 0.0006164896436218703, + "loss": 0.5268, + "step": 43780 + }, + { + "epoch": 1.85, + "learning_rate": 0.0006164472428925778, + "loss": 0.5331, + "step": 43790 + }, + { + "epoch": 1.85, + "learning_rate": 0.0006164048421632853, + "loss": 0.6067, + "step": 43800 + }, + { + "epoch": 1.85, + "learning_rate": 0.0006163624414339926, + "loss": 0.5553, + "step": 43810 + }, + { + "epoch": 1.85, + "learning_rate": 0.0006163200407047002, + "loss": 0.5467, + "step": 43820 + }, + { + "epoch": 1.85, + "learning_rate": 0.0006162776399754077, + "loss": 0.4813, + "step": 43830 + }, + { + "epoch": 1.85, + "learning_rate": 0.000616235239246115, + "loss": 0.5494, + "step": 43840 + }, + { + "epoch": 1.85, + "learning_rate": 0.0006161928385168225, + "loss": 0.533, + "step": 43850 + }, + { + "epoch": 1.85, + "learning_rate": 0.00061615043778753, + "loss": 0.6369, + "step": 43860 + }, + { + "epoch": 1.86, + "learning_rate": 0.0006161080370582374, + "loss": 0.7125, + "step": 43870 + }, + { + "epoch": 1.86, + "learning_rate": 0.0006160656363289449, + "loss": 0.6046, + "step": 43880 + }, + { + "epoch": 1.86, + "learning_rate": 0.0006160232355996524, + "loss": 0.5251, + "step": 43890 + }, + { + "epoch": 1.86, + "learning_rate": 0.0006159808348703598, + "loss": 0.5718, + "step": 43900 + }, + { + "epoch": 1.86, + "learning_rate": 0.0006159384341410673, + "loss": 0.6478, + "step": 43910 + }, + { + "epoch": 1.86, + "learning_rate": 0.0006158960334117748, + "loss": 0.5402, + "step": 43920 + }, + { + "epoch": 1.86, + "learning_rate": 0.0006158536326824821, + "loss": 0.4754, + "step": 43930 + }, + { + "epoch": 1.86, + "learning_rate": 0.0006158112319531897, + "loss": 0.6246, + "step": 43940 + }, + { + "epoch": 1.86, + "learning_rate": 0.0006157688312238972, + "loss": 0.5531, + "step": 43950 + }, + { + "epoch": 1.86, + "learning_rate": 0.0006157264304946045, + "loss": 0.6153, + "step": 43960 + }, + { + "epoch": 1.86, + "learning_rate": 0.000615684029765312, + "loss": 0.5588, + "step": 43970 + }, + { + "epoch": 1.86, + "learning_rate": 0.0006156416290360194, + "loss": 0.5548, + "step": 43980 + }, + { + "epoch": 1.86, + "learning_rate": 0.0006155992283067269, + "loss": 0.6271, + "step": 43990 + }, + { + "epoch": 1.86, + "learning_rate": 0.0006155568275774344, + "loss": 0.5514, + "step": 44000 + }, + { + "epoch": 1.86, + "learning_rate": 0.0006155144268481417, + "loss": 0.5239, + "step": 44010 + }, + { + "epoch": 1.86, + "learning_rate": 0.0006154720261188493, + "loss": 0.6776, + "step": 44020 + }, + { + "epoch": 1.86, + "learning_rate": 0.0006154296253895568, + "loss": 0.6317, + "step": 44030 + }, + { + "epoch": 1.86, + "learning_rate": 0.0006153872246602641, + "loss": 0.6495, + "step": 44040 + }, + { + "epoch": 1.86, + "learning_rate": 0.0006153448239309716, + "loss": 0.5355, + "step": 44050 + }, + { + "epoch": 1.86, + "learning_rate": 0.0006153024232016791, + "loss": 0.5914, + "step": 44060 + }, + { + "epoch": 1.86, + "learning_rate": 0.0006152600224723865, + "loss": 0.5448, + "step": 44070 + }, + { + "epoch": 1.86, + "learning_rate": 0.000615217621743094, + "loss": 0.5764, + "step": 44080 + }, + { + "epoch": 1.86, + "learning_rate": 0.0006151752210138015, + "loss": 0.5872, + "step": 44090 + }, + { + "epoch": 1.86, + "learning_rate": 0.0006151328202845089, + "loss": 0.4692, + "step": 44100 + }, + { + "epoch": 1.87, + "learning_rate": 0.0006150904195552164, + "loss": 0.6471, + "step": 44110 + }, + { + "epoch": 1.87, + "learning_rate": 0.0006150480188259239, + "loss": 0.5143, + "step": 44120 + }, + { + "epoch": 1.87, + "learning_rate": 0.0006150056180966312, + "loss": 0.6618, + "step": 44130 + }, + { + "epoch": 1.87, + "learning_rate": 0.0006149632173673387, + "loss": 0.501, + "step": 44140 + }, + { + "epoch": 1.87, + "learning_rate": 0.0006149208166380463, + "loss": 0.6278, + "step": 44150 + }, + { + "epoch": 1.87, + "learning_rate": 0.0006148784159087536, + "loss": 0.5065, + "step": 44160 + }, + { + "epoch": 1.87, + "learning_rate": 0.0006148360151794611, + "loss": 0.6516, + "step": 44170 + }, + { + "epoch": 1.87, + "learning_rate": 0.0006147936144501686, + "loss": 0.7077, + "step": 44180 + }, + { + "epoch": 1.87, + "learning_rate": 0.000614751213720876, + "loss": 0.6638, + "step": 44190 + }, + { + "epoch": 1.87, + "learning_rate": 0.0006147088129915835, + "loss": 0.5935, + "step": 44200 + }, + { + "epoch": 1.87, + "learning_rate": 0.000614666412262291, + "loss": 0.5317, + "step": 44210 + }, + { + "epoch": 1.87, + "learning_rate": 0.0006146240115329983, + "loss": 0.591, + "step": 44220 + }, + { + "epoch": 1.87, + "learning_rate": 0.0006145816108037059, + "loss": 0.5809, + "step": 44230 + }, + { + "epoch": 1.87, + "learning_rate": 0.0006145392100744134, + "loss": 0.5758, + "step": 44240 + }, + { + "epoch": 1.87, + "learning_rate": 0.0006144968093451207, + "loss": 0.6067, + "step": 44250 + }, + { + "epoch": 1.87, + "learning_rate": 0.0006144544086158282, + "loss": 0.6172, + "step": 44260 + }, + { + "epoch": 1.87, + "learning_rate": 0.0006144120078865357, + "loss": 0.6034, + "step": 44270 + }, + { + "epoch": 1.87, + "learning_rate": 0.0006143696071572431, + "loss": 0.5916, + "step": 44280 + }, + { + "epoch": 1.87, + "learning_rate": 0.0006143272064279506, + "loss": 0.523, + "step": 44290 + }, + { + "epoch": 1.87, + "learning_rate": 0.000614284805698658, + "loss": 0.5579, + "step": 44300 + }, + { + "epoch": 1.87, + "learning_rate": 0.0006142424049693655, + "loss": 0.6438, + "step": 44310 + }, + { + "epoch": 1.87, + "learning_rate": 0.000614200004240073, + "loss": 0.6097, + "step": 44320 + }, + { + "epoch": 1.87, + "learning_rate": 0.0006141576035107804, + "loss": 0.5954, + "step": 44330 + }, + { + "epoch": 1.88, + "learning_rate": 0.0006141152027814878, + "loss": 0.5392, + "step": 44340 + }, + { + "epoch": 1.88, + "learning_rate": 0.0006140728020521954, + "loss": 0.6803, + "step": 44350 + }, + { + "epoch": 1.88, + "learning_rate": 0.0006140304013229028, + "loss": 0.5811, + "step": 44360 + }, + { + "epoch": 1.88, + "learning_rate": 0.0006139880005936102, + "loss": 0.6774, + "step": 44370 + }, + { + "epoch": 1.88, + "learning_rate": 0.0006139455998643177, + "loss": 0.6091, + "step": 44380 + }, + { + "epoch": 1.88, + "learning_rate": 0.0006139031991350252, + "loss": 0.4999, + "step": 44390 + }, + { + "epoch": 1.88, + "learning_rate": 0.0006138607984057326, + "loss": 0.5748, + "step": 44400 + }, + { + "epoch": 1.88, + "learning_rate": 0.0006138183976764401, + "loss": 0.5628, + "step": 44410 + }, + { + "epoch": 1.88, + "learning_rate": 0.0006137759969471474, + "loss": 0.5733, + "step": 44420 + }, + { + "epoch": 1.88, + "learning_rate": 0.000613733596217855, + "loss": 0.5116, + "step": 44430 + }, + { + "epoch": 1.88, + "learning_rate": 0.0006136911954885625, + "loss": 0.596, + "step": 44440 + }, + { + "epoch": 1.88, + "learning_rate": 0.0006136487947592698, + "loss": 0.6044, + "step": 44450 + }, + { + "epoch": 1.88, + "learning_rate": 0.0006136063940299773, + "loss": 0.6229, + "step": 44460 + }, + { + "epoch": 1.88, + "learning_rate": 0.0006135639933006849, + "loss": 0.652, + "step": 44470 + }, + { + "epoch": 1.88, + "learning_rate": 0.0006135215925713922, + "loss": 0.5762, + "step": 44480 + }, + { + "epoch": 1.88, + "learning_rate": 0.0006134791918420997, + "loss": 0.5425, + "step": 44490 + }, + { + "epoch": 1.88, + "learning_rate": 0.0006134367911128072, + "loss": 0.6174, + "step": 44500 + }, + { + "epoch": 1.88, + "learning_rate": 0.0006133943903835146, + "loss": 0.5959, + "step": 44510 + }, + { + "epoch": 1.88, + "learning_rate": 0.0006133519896542221, + "loss": 0.558, + "step": 44520 + }, + { + "epoch": 1.88, + "learning_rate": 0.0006133095889249296, + "loss": 0.571, + "step": 44530 + }, + { + "epoch": 1.88, + "learning_rate": 0.0006132671881956369, + "loss": 0.492, + "step": 44540 + }, + { + "epoch": 1.88, + "learning_rate": 0.0006132247874663445, + "loss": 0.7502, + "step": 44550 + }, + { + "epoch": 1.88, + "learning_rate": 0.000613182386737052, + "loss": 0.6024, + "step": 44560 + }, + { + "epoch": 1.88, + "learning_rate": 0.0006131399860077593, + "loss": 0.5883, + "step": 44570 + }, + { + "epoch": 1.89, + "learning_rate": 0.0006130975852784668, + "loss": 0.5787, + "step": 44580 + }, + { + "epoch": 1.89, + "learning_rate": 0.0006130551845491743, + "loss": 0.5087, + "step": 44590 + }, + { + "epoch": 1.89, + "learning_rate": 0.0006130127838198817, + "loss": 0.5823, + "step": 44600 + }, + { + "epoch": 1.89, + "learning_rate": 0.0006129703830905892, + "loss": 0.6235, + "step": 44610 + }, + { + "epoch": 1.89, + "learning_rate": 0.0006129279823612966, + "loss": 0.5018, + "step": 44620 + }, + { + "epoch": 1.89, + "learning_rate": 0.0006128855816320041, + "loss": 0.625, + "step": 44630 + }, + { + "epoch": 1.89, + "learning_rate": 0.0006128431809027116, + "loss": 0.553, + "step": 44640 + }, + { + "epoch": 1.89, + "learning_rate": 0.000612800780173419, + "loss": 0.4955, + "step": 44650 + }, + { + "epoch": 1.89, + "learning_rate": 0.0006127583794441264, + "loss": 0.5309, + "step": 44660 + }, + { + "epoch": 1.89, + "learning_rate": 0.0006127159787148339, + "loss": 0.6306, + "step": 44670 + }, + { + "epoch": 1.89, + "learning_rate": 0.0006126735779855414, + "loss": 0.5178, + "step": 44680 + }, + { + "epoch": 1.89, + "learning_rate": 0.0006126311772562488, + "loss": 0.5506, + "step": 44690 + }, + { + "epoch": 1.89, + "learning_rate": 0.0006125887765269563, + "loss": 0.5605, + "step": 44700 + }, + { + "epoch": 1.89, + "learning_rate": 0.0006125463757976638, + "loss": 0.5941, + "step": 44710 + }, + { + "epoch": 1.89, + "learning_rate": 0.0006125039750683712, + "loss": 0.493, + "step": 44720 + }, + { + "epoch": 1.89, + "learning_rate": 0.0006124615743390787, + "loss": 0.6731, + "step": 44730 + }, + { + "epoch": 1.89, + "learning_rate": 0.0006124191736097861, + "loss": 0.5892, + "step": 44740 + }, + { + "epoch": 1.89, + "learning_rate": 0.0006123767728804935, + "loss": 0.5891, + "step": 44750 + }, + { + "epoch": 1.89, + "learning_rate": 0.0006123343721512011, + "loss": 0.5475, + "step": 44760 + }, + { + "epoch": 1.89, + "learning_rate": 0.0006122919714219085, + "loss": 0.5666, + "step": 44770 + }, + { + "epoch": 1.89, + "learning_rate": 0.0006122495706926159, + "loss": 0.5683, + "step": 44780 + }, + { + "epoch": 1.89, + "learning_rate": 0.0006122071699633234, + "loss": 0.5549, + "step": 44790 + }, + { + "epoch": 1.89, + "learning_rate": 0.0006121647692340309, + "loss": 0.4885, + "step": 44800 + }, + { + "epoch": 1.89, + "learning_rate": 0.0006121223685047383, + "loss": 0.5739, + "step": 44810 + }, + { + "epoch": 1.9, + "learning_rate": 0.0006120799677754458, + "loss": 0.697, + "step": 44820 + }, + { + "epoch": 1.9, + "learning_rate": 0.0006120375670461532, + "loss": 0.5882, + "step": 44830 + }, + { + "epoch": 1.9, + "learning_rate": 0.0006119951663168607, + "loss": 0.6313, + "step": 44840 + }, + { + "epoch": 1.9, + "learning_rate": 0.0006119527655875682, + "loss": 0.5344, + "step": 44850 + }, + { + "epoch": 1.9, + "learning_rate": 0.0006119103648582756, + "loss": 0.54, + "step": 44860 + }, + { + "epoch": 1.9, + "learning_rate": 0.000611867964128983, + "loss": 0.6136, + "step": 44870 + }, + { + "epoch": 1.9, + "learning_rate": 0.0006118255633996905, + "loss": 0.6768, + "step": 44880 + }, + { + "epoch": 1.9, + "learning_rate": 0.000611783162670398, + "loss": 0.5452, + "step": 44890 + }, + { + "epoch": 1.9, + "learning_rate": 0.0006117407619411054, + "loss": 0.5857, + "step": 44900 + }, + { + "epoch": 1.9, + "learning_rate": 0.0006116983612118129, + "loss": 0.653, + "step": 44910 + }, + { + "epoch": 1.9, + "learning_rate": 0.0006116559604825204, + "loss": 0.5315, + "step": 44920 + }, + { + "epoch": 1.9, + "learning_rate": 0.0006116135597532278, + "loss": 0.4933, + "step": 44930 + }, + { + "epoch": 1.9, + "learning_rate": 0.0006115711590239352, + "loss": 0.7106, + "step": 44940 + }, + { + "epoch": 1.9, + "learning_rate": 0.0006115287582946426, + "loss": 0.6066, + "step": 44950 + }, + { + "epoch": 1.9, + "learning_rate": 0.0006114863575653502, + "loss": 0.5959, + "step": 44960 + }, + { + "epoch": 1.9, + "learning_rate": 0.0006114439568360576, + "loss": 0.5163, + "step": 44970 + }, + { + "epoch": 1.9, + "learning_rate": 0.000611401556106765, + "loss": 0.5967, + "step": 44980 + }, + { + "epoch": 1.9, + "learning_rate": 0.0006113591553774725, + "loss": 0.5914, + "step": 44990 + }, + { + "epoch": 1.9, + "learning_rate": 0.00061131675464818, + "loss": 0.5842, + "step": 45000 + }, + { + "epoch": 1.9, + "learning_rate": 0.0006112743539188874, + "loss": 0.4903, + "step": 45010 + }, + { + "epoch": 1.9, + "learning_rate": 0.0006112319531895949, + "loss": 0.5452, + "step": 45020 + }, + { + "epoch": 1.9, + "learning_rate": 0.0006111895524603023, + "loss": 0.6404, + "step": 45030 + }, + { + "epoch": 1.9, + "learning_rate": 0.0006111471517310098, + "loss": 0.5661, + "step": 45040 + }, + { + "epoch": 1.91, + "learning_rate": 0.0006111047510017173, + "loss": 0.6001, + "step": 45050 + }, + { + "epoch": 1.91, + "learning_rate": 0.0006110623502724247, + "loss": 0.5376, + "step": 45060 + }, + { + "epoch": 1.91, + "learning_rate": 0.0006110199495431321, + "loss": 0.526, + "step": 45070 + }, + { + "epoch": 1.91, + "learning_rate": 0.0006109775488138397, + "loss": 0.5801, + "step": 45080 + }, + { + "epoch": 1.91, + "learning_rate": 0.0006109351480845471, + "loss": 0.5569, + "step": 45090 + }, + { + "epoch": 1.91, + "learning_rate": 0.0006108927473552545, + "loss": 0.5212, + "step": 45100 + }, + { + "epoch": 1.91, + "learning_rate": 0.000610850346625962, + "loss": 0.5371, + "step": 45110 + }, + { + "epoch": 1.91, + "learning_rate": 0.0006108079458966695, + "loss": 0.7621, + "step": 45120 + }, + { + "epoch": 1.91, + "learning_rate": 0.0006107655451673769, + "loss": 0.6012, + "step": 45130 + }, + { + "epoch": 1.91, + "learning_rate": 0.0006107231444380844, + "loss": 0.6583, + "step": 45140 + }, + { + "epoch": 1.91, + "learning_rate": 0.0006106807437087918, + "loss": 0.5542, + "step": 45150 + }, + { + "epoch": 1.91, + "learning_rate": 0.0006106383429794993, + "loss": 0.5806, + "step": 45160 + }, + { + "epoch": 1.91, + "learning_rate": 0.0006105959422502068, + "loss": 0.6262, + "step": 45170 + }, + { + "epoch": 1.91, + "learning_rate": 0.0006105535415209142, + "loss": 0.472, + "step": 45180 + }, + { + "epoch": 1.91, + "learning_rate": 0.0006105111407916216, + "loss": 0.59, + "step": 45190 + }, + { + "epoch": 1.91, + "learning_rate": 0.0006104687400623291, + "loss": 0.6389, + "step": 45200 + }, + { + "epoch": 1.91, + "learning_rate": 0.0006104263393330366, + "loss": 0.6566, + "step": 45210 + }, + { + "epoch": 1.91, + "learning_rate": 0.000610383938603744, + "loss": 0.6652, + "step": 45220 + }, + { + "epoch": 1.91, + "learning_rate": 0.0006103415378744514, + "loss": 0.6456, + "step": 45230 + }, + { + "epoch": 1.91, + "learning_rate": 0.000610299137145159, + "loss": 0.5463, + "step": 45240 + }, + { + "epoch": 1.91, + "learning_rate": 0.0006102567364158664, + "loss": 0.618, + "step": 45250 + }, + { + "epoch": 1.91, + "learning_rate": 0.0006102143356865738, + "loss": 0.5365, + "step": 45260 + }, + { + "epoch": 1.91, + "learning_rate": 0.0006101719349572813, + "loss": 0.6094, + "step": 45270 + }, + { + "epoch": 1.91, + "learning_rate": 0.0006101295342279887, + "loss": 0.5744, + "step": 45280 + }, + { + "epoch": 1.92, + "learning_rate": 0.0006100871334986962, + "loss": 0.6096, + "step": 45290 + }, + { + "epoch": 1.92, + "learning_rate": 0.0006100447327694037, + "loss": 0.7585, + "step": 45300 + }, + { + "epoch": 1.92, + "learning_rate": 0.0006100023320401111, + "loss": 0.5543, + "step": 45310 + }, + { + "epoch": 1.92, + "learning_rate": 0.0006099599313108186, + "loss": 0.5147, + "step": 45320 + }, + { + "epoch": 1.92, + "learning_rate": 0.0006099175305815261, + "loss": 0.6975, + "step": 45330 + }, + { + "epoch": 1.92, + "learning_rate": 0.0006098751298522335, + "loss": 0.6398, + "step": 45340 + }, + { + "epoch": 1.92, + "learning_rate": 0.0006098327291229409, + "loss": 0.5749, + "step": 45350 + }, + { + "epoch": 1.92, + "learning_rate": 0.0006097903283936483, + "loss": 0.5769, + "step": 45360 + }, + { + "epoch": 1.92, + "learning_rate": 0.0006097479276643559, + "loss": 0.74, + "step": 45370 + }, + { + "epoch": 1.92, + "learning_rate": 0.0006097055269350633, + "loss": 0.5703, + "step": 45380 + }, + { + "epoch": 1.92, + "learning_rate": 0.0006096631262057707, + "loss": 0.5404, + "step": 45390 + }, + { + "epoch": 1.92, + "learning_rate": 0.0006096207254764782, + "loss": 0.6391, + "step": 45400 + }, + { + "epoch": 1.92, + "learning_rate": 0.0006095783247471857, + "loss": 0.6135, + "step": 45410 + }, + { + "epoch": 1.92, + "learning_rate": 0.0006095359240178931, + "loss": 0.6167, + "step": 45420 + }, + { + "epoch": 1.92, + "learning_rate": 0.0006094935232886006, + "loss": 0.603, + "step": 45430 + }, + { + "epoch": 1.92, + "learning_rate": 0.0006094511225593081, + "loss": 0.6398, + "step": 45440 + }, + { + "epoch": 1.92, + "learning_rate": 0.0006094087218300155, + "loss": 0.6446, + "step": 45450 + }, + { + "epoch": 1.92, + "learning_rate": 0.000609366321100723, + "loss": 0.637, + "step": 45460 + }, + { + "epoch": 1.92, + "learning_rate": 0.0006093239203714304, + "loss": 0.6142, + "step": 45470 + }, + { + "epoch": 1.92, + "learning_rate": 0.0006092815196421378, + "loss": 0.5809, + "step": 45480 + }, + { + "epoch": 1.92, + "learning_rate": 0.0006092391189128453, + "loss": 0.5511, + "step": 45490 + }, + { + "epoch": 1.92, + "learning_rate": 0.0006091967181835528, + "loss": 0.5633, + "step": 45500 + }, + { + "epoch": 1.92, + "learning_rate": 0.0006091543174542602, + "loss": 0.5239, + "step": 45510 + }, + { + "epoch": 1.92, + "learning_rate": 0.0006091119167249677, + "loss": 0.5826, + "step": 45520 + }, + { + "epoch": 1.93, + "learning_rate": 0.0006090695159956752, + "loss": 0.6117, + "step": 45530 + }, + { + "epoch": 1.93, + "learning_rate": 0.0006090271152663826, + "loss": 0.6223, + "step": 45540 + }, + { + "epoch": 1.93, + "learning_rate": 0.00060898471453709, + "loss": 0.6524, + "step": 45550 + }, + { + "epoch": 1.93, + "learning_rate": 0.0006089423138077975, + "loss": 0.6316, + "step": 45560 + }, + { + "epoch": 1.93, + "learning_rate": 0.000608899913078505, + "loss": 0.5877, + "step": 45570 + }, + { + "epoch": 1.93, + "learning_rate": 0.0006088575123492124, + "loss": 0.5368, + "step": 45580 + }, + { + "epoch": 1.93, + "learning_rate": 0.0006088151116199199, + "loss": 0.5482, + "step": 45590 + }, + { + "epoch": 1.93, + "learning_rate": 0.0006087727108906273, + "loss": 0.5637, + "step": 45600 + }, + { + "epoch": 1.93, + "learning_rate": 0.0006087303101613348, + "loss": 0.6886, + "step": 45610 + }, + { + "epoch": 1.93, + "learning_rate": 0.0006086879094320423, + "loss": 0.6657, + "step": 45620 + }, + { + "epoch": 1.93, + "learning_rate": 0.0006086455087027497, + "loss": 0.5644, + "step": 45630 + }, + { + "epoch": 1.93, + "learning_rate": 0.0006086031079734571, + "loss": 0.5414, + "step": 45640 + }, + { + "epoch": 1.93, + "learning_rate": 0.0006085607072441647, + "loss": 0.6004, + "step": 45650 + }, + { + "epoch": 1.93, + "learning_rate": 0.0006085183065148721, + "loss": 0.6727, + "step": 45660 + }, + { + "epoch": 1.93, + "learning_rate": 0.0006084759057855795, + "loss": 0.5909, + "step": 45670 + }, + { + "epoch": 1.93, + "learning_rate": 0.000608433505056287, + "loss": 0.68, + "step": 45680 + }, + { + "epoch": 1.93, + "learning_rate": 0.0006083911043269945, + "loss": 0.6293, + "step": 45690 + }, + { + "epoch": 1.93, + "learning_rate": 0.0006083487035977019, + "loss": 0.5979, + "step": 45700 + }, + { + "epoch": 1.93, + "learning_rate": 0.0006083063028684094, + "loss": 0.5941, + "step": 45710 + }, + { + "epoch": 1.93, + "learning_rate": 0.0006082639021391168, + "loss": 0.5237, + "step": 45720 + }, + { + "epoch": 1.93, + "learning_rate": 0.0006082215014098243, + "loss": 0.5852, + "step": 45730 + }, + { + "epoch": 1.93, + "learning_rate": 0.0006081791006805318, + "loss": 0.4999, + "step": 45740 + }, + { + "epoch": 1.93, + "learning_rate": 0.0006081366999512392, + "loss": 0.632, + "step": 45750 + }, + { + "epoch": 1.94, + "learning_rate": 0.0006080942992219466, + "loss": 0.5527, + "step": 45760 + }, + { + "epoch": 1.94, + "learning_rate": 0.0006080518984926542, + "loss": 0.5195, + "step": 45770 + }, + { + "epoch": 1.94, + "learning_rate": 0.0006080094977633616, + "loss": 0.538, + "step": 45780 + }, + { + "epoch": 1.94, + "learning_rate": 0.000607967097034069, + "loss": 0.5172, + "step": 45790 + }, + { + "epoch": 1.94, + "learning_rate": 0.0006079246963047765, + "loss": 0.6341, + "step": 45800 + }, + { + "epoch": 1.94, + "learning_rate": 0.0006078822955754839, + "loss": 0.6048, + "step": 45810 + }, + { + "epoch": 1.94, + "learning_rate": 0.0006078398948461914, + "loss": 0.5927, + "step": 45820 + }, + { + "epoch": 1.94, + "learning_rate": 0.0006077974941168989, + "loss": 0.5849, + "step": 45830 + }, + { + "epoch": 1.94, + "learning_rate": 0.0006077550933876062, + "loss": 0.5208, + "step": 45840 + }, + { + "epoch": 1.94, + "learning_rate": 0.0006077126926583138, + "loss": 0.5738, + "step": 45850 + }, + { + "epoch": 1.94, + "learning_rate": 0.0006076702919290213, + "loss": 0.5654, + "step": 45860 + }, + { + "epoch": 1.94, + "learning_rate": 0.0006076278911997286, + "loss": 0.6391, + "step": 45870 + }, + { + "epoch": 1.94, + "learning_rate": 0.0006075854904704361, + "loss": 0.6384, + "step": 45880 + }, + { + "epoch": 1.94, + "learning_rate": 0.0006075430897411437, + "loss": 0.6442, + "step": 45890 + }, + { + "epoch": 1.94, + "learning_rate": 0.000607500689011851, + "loss": 0.595, + "step": 45900 + }, + { + "epoch": 1.94, + "learning_rate": 0.0006074582882825585, + "loss": 0.5227, + "step": 45910 + }, + { + "epoch": 1.94, + "learning_rate": 0.000607415887553266, + "loss": 0.4981, + "step": 45920 + }, + { + "epoch": 1.94, + "learning_rate": 0.0006073734868239734, + "loss": 0.6192, + "step": 45930 + }, + { + "epoch": 1.94, + "learning_rate": 0.0006073310860946809, + "loss": 0.5633, + "step": 45940 + }, + { + "epoch": 1.94, + "learning_rate": 0.0006072886853653883, + "loss": 0.611, + "step": 45950 + }, + { + "epoch": 1.94, + "learning_rate": 0.0006072462846360957, + "loss": 0.6079, + "step": 45960 + }, + { + "epoch": 1.94, + "learning_rate": 0.0006072038839068033, + "loss": 0.6901, + "step": 45970 + }, + { + "epoch": 1.94, + "learning_rate": 0.0006071614831775107, + "loss": 0.582, + "step": 45980 + }, + { + "epoch": 1.94, + "learning_rate": 0.0006071190824482181, + "loss": 0.6609, + "step": 45990 + }, + { + "epoch": 1.95, + "learning_rate": 0.0006070766817189256, + "loss": 0.7101, + "step": 46000 + }, + { + "epoch": 1.95, + "learning_rate": 0.000607034280989633, + "loss": 0.6436, + "step": 46010 + }, + { + "epoch": 1.95, + "learning_rate": 0.0006069918802603405, + "loss": 0.5068, + "step": 46020 + }, + { + "epoch": 1.95, + "learning_rate": 0.000606949479531048, + "loss": 0.6014, + "step": 46030 + }, + { + "epoch": 1.95, + "learning_rate": 0.0006069070788017554, + "loss": 0.6152, + "step": 46040 + }, + { + "epoch": 1.95, + "learning_rate": 0.0006068646780724629, + "loss": 0.6296, + "step": 46050 + }, + { + "epoch": 1.95, + "learning_rate": 0.0006068222773431704, + "loss": 0.599, + "step": 46060 + }, + { + "epoch": 1.95, + "learning_rate": 0.0006067798766138778, + "loss": 0.5763, + "step": 46070 + }, + { + "epoch": 1.95, + "learning_rate": 0.0006067374758845852, + "loss": 0.5048, + "step": 46080 + }, + { + "epoch": 1.95, + "learning_rate": 0.0006066950751552927, + "loss": 0.6124, + "step": 46090 + }, + { + "epoch": 1.95, + "learning_rate": 0.0006066526744260002, + "loss": 0.5962, + "step": 46100 + }, + { + "epoch": 1.95, + "learning_rate": 0.0006066102736967076, + "loss": 0.5553, + "step": 46110 + }, + { + "epoch": 1.95, + "learning_rate": 0.0006065678729674151, + "loss": 0.6346, + "step": 46120 + }, + { + "epoch": 1.95, + "learning_rate": 0.0006065254722381225, + "loss": 0.5596, + "step": 46130 + }, + { + "epoch": 1.95, + "learning_rate": 0.00060648307150883, + "loss": 0.508, + "step": 46140 + }, + { + "epoch": 1.95, + "learning_rate": 0.0006064406707795375, + "loss": 0.5891, + "step": 46150 + }, + { + "epoch": 1.95, + "learning_rate": 0.0006063982700502448, + "loss": 0.5864, + "step": 46160 + }, + { + "epoch": 1.95, + "learning_rate": 0.0006063558693209523, + "loss": 0.5487, + "step": 46170 + }, + { + "epoch": 1.95, + "learning_rate": 0.0006063134685916599, + "loss": 0.687, + "step": 46180 + }, + { + "epoch": 1.95, + "learning_rate": 0.0006062710678623672, + "loss": 0.572, + "step": 46190 + }, + { + "epoch": 1.95, + "learning_rate": 0.0006062286671330747, + "loss": 0.5618, + "step": 46200 + }, + { + "epoch": 1.95, + "learning_rate": 0.0006061862664037822, + "loss": 0.5253, + "step": 46210 + }, + { + "epoch": 1.95, + "learning_rate": 0.0006061438656744896, + "loss": 0.5966, + "step": 46220 + }, + { + "epoch": 1.96, + "learning_rate": 0.0006061014649451971, + "loss": 0.6183, + "step": 46230 + }, + { + "epoch": 1.96, + "learning_rate": 0.0006060590642159046, + "loss": 0.6367, + "step": 46240 + }, + { + "epoch": 1.96, + "learning_rate": 0.0006060166634866119, + "loss": 0.4946, + "step": 46250 + }, + { + "epoch": 1.96, + "learning_rate": 0.0006059742627573195, + "loss": 0.597, + "step": 46260 + }, + { + "epoch": 1.96, + "learning_rate": 0.000605931862028027, + "loss": 0.545, + "step": 46270 + }, + { + "epoch": 1.96, + "learning_rate": 0.0006058894612987343, + "loss": 0.5803, + "step": 46280 + }, + { + "epoch": 1.96, + "learning_rate": 0.0006058470605694418, + "loss": 0.5621, + "step": 46290 + }, + { + "epoch": 1.96, + "learning_rate": 0.0006058046598401494, + "loss": 0.5714, + "step": 46300 + }, + { + "epoch": 1.96, + "learning_rate": 0.0006057622591108567, + "loss": 0.6039, + "step": 46310 + }, + { + "epoch": 1.96, + "learning_rate": 0.0006057198583815642, + "loss": 0.6685, + "step": 46320 + }, + { + "epoch": 1.96, + "learning_rate": 0.0006056774576522717, + "loss": 0.6506, + "step": 46330 + }, + { + "epoch": 1.96, + "learning_rate": 0.0006056350569229791, + "loss": 0.5803, + "step": 46340 + }, + { + "epoch": 1.96, + "learning_rate": 0.0006055926561936866, + "loss": 0.5297, + "step": 46350 + }, + { + "epoch": 1.96, + "learning_rate": 0.000605550255464394, + "loss": 0.5265, + "step": 46360 + }, + { + "epoch": 1.96, + "learning_rate": 0.0006055078547351014, + "loss": 0.5858, + "step": 46370 + }, + { + "epoch": 1.96, + "learning_rate": 0.000605465454005809, + "loss": 0.6351, + "step": 46380 + }, + { + "epoch": 1.96, + "learning_rate": 0.0006054230532765165, + "loss": 0.5843, + "step": 46390 + }, + { + "epoch": 1.96, + "learning_rate": 0.0006053806525472238, + "loss": 0.6335, + "step": 46400 + }, + { + "epoch": 1.96, + "learning_rate": 0.0006053382518179313, + "loss": 0.6042, + "step": 46410 + }, + { + "epoch": 1.96, + "learning_rate": 0.0006052958510886387, + "loss": 0.6457, + "step": 46420 + }, + { + "epoch": 1.96, + "learning_rate": 0.0006052534503593462, + "loss": 0.4788, + "step": 46430 + }, + { + "epoch": 1.96, + "learning_rate": 0.0006052110496300537, + "loss": 0.6689, + "step": 46440 + }, + { + "epoch": 1.96, + "learning_rate": 0.000605168648900761, + "loss": 0.5651, + "step": 46450 + }, + { + "epoch": 1.96, + "learning_rate": 0.0006051262481714686, + "loss": 0.5655, + "step": 46460 + }, + { + "epoch": 1.97, + "learning_rate": 0.0006050838474421761, + "loss": 0.5944, + "step": 46470 + }, + { + "epoch": 1.97, + "learning_rate": 0.0006050414467128834, + "loss": 0.5554, + "step": 46480 + }, + { + "epoch": 1.97, + "learning_rate": 0.0006049990459835909, + "loss": 0.5536, + "step": 46490 + }, + { + "epoch": 1.97, + "learning_rate": 0.0006049566452542985, + "loss": 0.4895, + "step": 46500 + }, + { + "epoch": 1.97, + "learning_rate": 0.0006049142445250058, + "loss": 0.7098, + "step": 46510 + }, + { + "epoch": 1.97, + "learning_rate": 0.0006048718437957133, + "loss": 0.6085, + "step": 46520 + }, + { + "epoch": 1.97, + "learning_rate": 0.0006048294430664208, + "loss": 0.544, + "step": 46530 + }, + { + "epoch": 1.97, + "learning_rate": 0.0006047870423371282, + "loss": 0.5347, + "step": 46540 + }, + { + "epoch": 1.97, + "learning_rate": 0.0006047446416078357, + "loss": 0.536, + "step": 46550 + }, + { + "epoch": 1.97, + "learning_rate": 0.0006047022408785432, + "loss": 0.6088, + "step": 46560 + }, + { + "epoch": 1.97, + "learning_rate": 0.0006046598401492505, + "loss": 0.6528, + "step": 46570 + }, + { + "epoch": 1.97, + "learning_rate": 0.0006046174394199581, + "loss": 0.5187, + "step": 46580 + }, + { + "epoch": 1.97, + "learning_rate": 0.0006045750386906656, + "loss": 0.5515, + "step": 46590 + }, + { + "epoch": 1.97, + "learning_rate": 0.0006045326379613729, + "loss": 0.5586, + "step": 46600 + }, + { + "epoch": 1.97, + "learning_rate": 0.0006044902372320804, + "loss": 0.5781, + "step": 46610 + }, + { + "epoch": 1.97, + "learning_rate": 0.0006044478365027879, + "loss": 0.4801, + "step": 46620 + }, + { + "epoch": 1.97, + "learning_rate": 0.0006044054357734953, + "loss": 0.5935, + "step": 46630 + }, + { + "epoch": 1.97, + "learning_rate": 0.0006043630350442028, + "loss": 0.6245, + "step": 46640 + }, + { + "epoch": 1.97, + "learning_rate": 0.0006043206343149103, + "loss": 0.6431, + "step": 46650 + }, + { + "epoch": 1.97, + "learning_rate": 0.0006042782335856177, + "loss": 0.4822, + "step": 46660 + }, + { + "epoch": 1.97, + "learning_rate": 0.0006042358328563252, + "loss": 0.568, + "step": 46670 + }, + { + "epoch": 1.97, + "learning_rate": 0.0006041934321270327, + "loss": 0.6676, + "step": 46680 + }, + { + "epoch": 1.97, + "learning_rate": 0.00060415103139774, + "loss": 0.6664, + "step": 46690 + }, + { + "epoch": 1.97, + "learning_rate": 0.0006041086306684475, + "loss": 0.6917, + "step": 46700 + }, + { + "epoch": 1.98, + "learning_rate": 0.0006040662299391551, + "loss": 0.5591, + "step": 46710 + }, + { + "epoch": 1.98, + "learning_rate": 0.0006040238292098624, + "loss": 0.6007, + "step": 46720 + }, + { + "epoch": 1.98, + "learning_rate": 0.0006039814284805699, + "loss": 0.6154, + "step": 46730 + }, + { + "epoch": 1.98, + "learning_rate": 0.0006039390277512774, + "loss": 0.5721, + "step": 46740 + }, + { + "epoch": 1.98, + "learning_rate": 0.0006038966270219848, + "loss": 0.5543, + "step": 46750 + }, + { + "epoch": 1.98, + "learning_rate": 0.0006038542262926923, + "loss": 0.597, + "step": 46760 + }, + { + "epoch": 1.98, + "learning_rate": 0.0006038118255633996, + "loss": 0.594, + "step": 46770 + }, + { + "epoch": 1.98, + "learning_rate": 0.0006037694248341071, + "loss": 0.6628, + "step": 46780 + }, + { + "epoch": 1.98, + "learning_rate": 0.0006037270241048147, + "loss": 0.6152, + "step": 46790 + }, + { + "epoch": 1.98, + "learning_rate": 0.000603684623375522, + "loss": 0.5721, + "step": 46800 + }, + { + "epoch": 1.98, + "learning_rate": 0.0006036422226462295, + "loss": 0.6436, + "step": 46810 + }, + { + "epoch": 1.98, + "learning_rate": 0.000603599821916937, + "loss": 0.5414, + "step": 46820 + }, + { + "epoch": 1.98, + "learning_rate": 0.0006035574211876444, + "loss": 0.5524, + "step": 46830 + }, + { + "epoch": 1.98, + "learning_rate": 0.0006035150204583519, + "loss": 0.5498, + "step": 46840 + }, + { + "epoch": 1.98, + "learning_rate": 0.0006034726197290594, + "loss": 0.6065, + "step": 46850 + }, + { + "epoch": 1.98, + "learning_rate": 0.0006034302189997667, + "loss": 0.5742, + "step": 46860 + }, + { + "epoch": 1.98, + "learning_rate": 0.0006033878182704743, + "loss": 0.6128, + "step": 46870 + }, + { + "epoch": 1.98, + "learning_rate": 0.0006033454175411818, + "loss": 0.5571, + "step": 46880 + }, + { + "epoch": 1.98, + "learning_rate": 0.0006033030168118891, + "loss": 0.6009, + "step": 46890 + }, + { + "epoch": 1.98, + "learning_rate": 0.0006032606160825966, + "loss": 0.552, + "step": 46900 + }, + { + "epoch": 1.98, + "learning_rate": 0.0006032182153533042, + "loss": 0.6414, + "step": 46910 + }, + { + "epoch": 1.98, + "learning_rate": 0.0006031758146240115, + "loss": 0.5441, + "step": 46920 + }, + { + "epoch": 1.98, + "learning_rate": 0.000603133413894719, + "loss": 0.6564, + "step": 46930 + }, + { + "epoch": 1.99, + "learning_rate": 0.0006030910131654265, + "loss": 0.5307, + "step": 46940 + }, + { + "epoch": 1.99, + "learning_rate": 0.0006030486124361339, + "loss": 0.6455, + "step": 46950 + }, + { + "epoch": 1.99, + "learning_rate": 0.0006030062117068414, + "loss": 0.5962, + "step": 46960 + }, + { + "epoch": 1.99, + "learning_rate": 0.0006029638109775489, + "loss": 0.6292, + "step": 46970 + }, + { + "epoch": 1.99, + "learning_rate": 0.0006029214102482562, + "loss": 0.6203, + "step": 46980 + }, + { + "epoch": 1.99, + "learning_rate": 0.0006028790095189638, + "loss": 0.5581, + "step": 46990 + }, + { + "epoch": 1.99, + "learning_rate": 0.0006028366087896713, + "loss": 0.62, + "step": 47000 + }, + { + "epoch": 1.99, + "learning_rate": 0.0006027942080603786, + "loss": 0.7055, + "step": 47010 + }, + { + "epoch": 1.99, + "learning_rate": 0.0006027518073310861, + "loss": 0.5764, + "step": 47020 + }, + { + "epoch": 1.99, + "learning_rate": 0.0006027094066017936, + "loss": 0.6051, + "step": 47030 + }, + { + "epoch": 1.99, + "learning_rate": 0.000602667005872501, + "loss": 0.5505, + "step": 47040 + }, + { + "epoch": 1.99, + "learning_rate": 0.0006026246051432085, + "loss": 0.5565, + "step": 47050 + }, + { + "epoch": 1.99, + "learning_rate": 0.0006025822044139159, + "loss": 0.6092, + "step": 47060 + }, + { + "epoch": 1.99, + "learning_rate": 0.0006025398036846234, + "loss": 0.5324, + "step": 47070 + }, + { + "epoch": 1.99, + "learning_rate": 0.0006024974029553309, + "loss": 0.589, + "step": 47080 + }, + { + "epoch": 1.99, + "learning_rate": 0.0006024550022260383, + "loss": 0.5959, + "step": 47090 + }, + { + "epoch": 1.99, + "learning_rate": 0.0006024126014967457, + "loss": 0.5362, + "step": 47100 + }, + { + "epoch": 1.99, + "learning_rate": 0.0006023702007674533, + "loss": 0.6606, + "step": 47110 + }, + { + "epoch": 1.99, + "learning_rate": 0.0006023278000381607, + "loss": 0.5745, + "step": 47120 + }, + { + "epoch": 1.99, + "learning_rate": 0.0006022853993088681, + "loss": 0.4986, + "step": 47130 + }, + { + "epoch": 1.99, + "learning_rate": 0.0006022429985795756, + "loss": 0.5306, + "step": 47140 + }, + { + "epoch": 1.99, + "learning_rate": 0.0006022005978502831, + "loss": 0.5907, + "step": 47150 + }, + { + "epoch": 1.99, + "learning_rate": 0.0006021581971209905, + "loss": 0.6354, + "step": 47160 + }, + { + "epoch": 1.99, + "learning_rate": 0.000602115796391698, + "loss": 0.5049, + "step": 47170 + }, + { + "epoch": 2.0, + "learning_rate": 0.0006020733956624054, + "loss": 0.683, + "step": 47180 + }, + { + "epoch": 2.0, + "learning_rate": 0.0006020309949331129, + "loss": 0.6151, + "step": 47190 + }, + { + "epoch": 2.0, + "learning_rate": 0.0006019885942038204, + "loss": 0.5382, + "step": 47200 + }, + { + "epoch": 2.0, + "learning_rate": 0.0006019461934745278, + "loss": 0.6837, + "step": 47210 + }, + { + "epoch": 2.0, + "learning_rate": 0.0006019037927452352, + "loss": 0.5442, + "step": 47220 + }, + { + "epoch": 2.0, + "learning_rate": 0.0006018613920159427, + "loss": 0.5025, + "step": 47230 + }, + { + "epoch": 2.0, + "learning_rate": 0.0006018189912866502, + "loss": 0.6, + "step": 47240 + }, + { + "epoch": 2.0, + "learning_rate": 0.0006017765905573576, + "loss": 0.6435, + "step": 47250 + }, + { + "epoch": 2.0, + "learning_rate": 0.0006017341898280651, + "loss": 0.4832, + "step": 47260 + }, + { + "epoch": 2.0, + "learning_rate": 0.0006016917890987726, + "loss": 0.6172, + "step": 47270 + }, + { + "epoch": 2.0, + "learning_rate": 0.00060164938836948, + "loss": 0.6005, + "step": 47280 + }, + { + "epoch": 2.0, + "learning_rate": 0.0006016069876401875, + "loss": 0.4877, + "step": 47290 + }, + { + "epoch": 2.0, + "learning_rate": 0.0006015645869108948, + "loss": 0.4214, + "step": 47300 + }, + { + "epoch": 2.0, + "learning_rate": 0.0006015221861816023, + "loss": 0.4822, + "step": 47310 + }, + { + "epoch": 2.0, + "learning_rate": 0.0006014797854523099, + "loss": 0.556, + "step": 47320 + }, + { + "epoch": 2.0, + "learning_rate": 0.0006014373847230172, + "loss": 0.5535, + "step": 47330 + }, + { + "epoch": 2.0, + "learning_rate": 0.0006013949839937247, + "loss": 0.5547, + "step": 47340 + }, + { + "epoch": 2.0, + "learning_rate": 0.0006013525832644322, + "loss": 0.5328, + "step": 47350 + }, + { + "epoch": 2.0, + "learning_rate": 0.0006013101825351396, + "loss": 0.5801, + "step": 47360 + }, + { + "epoch": 2.0, + "learning_rate": 0.0006012677818058471, + "loss": 0.646, + "step": 47370 + }, + { + "epoch": 2.0, + "learning_rate": 0.0006012253810765545, + "loss": 0.4888, + "step": 47380 + }, + { + "epoch": 2.0, + "learning_rate": 0.0006011829803472619, + "loss": 0.5579, + "step": 47390 + }, + { + "epoch": 2.0, + "learning_rate": 0.0006011405796179695, + "loss": 0.4464, + "step": 47400 + }, + { + "epoch": 2.0, + "learning_rate": 0.0006010981788886769, + "loss": 0.4954, + "step": 47410 + }, + { + "epoch": 2.01, + "learning_rate": 0.0006010557781593843, + "loss": 0.5144, + "step": 47420 + }, + { + "epoch": 2.01, + "learning_rate": 0.0006010133774300918, + "loss": 0.5419, + "step": 47430 + }, + { + "epoch": 2.01, + "learning_rate": 0.0006009709767007993, + "loss": 0.4752, + "step": 47440 + }, + { + "epoch": 2.01, + "learning_rate": 0.0006009285759715067, + "loss": 0.4848, + "step": 47450 + }, + { + "epoch": 2.01, + "learning_rate": 0.0006008861752422142, + "loss": 0.6621, + "step": 47460 + }, + { + "epoch": 2.01, + "learning_rate": 0.0006008437745129217, + "loss": 0.5249, + "step": 47470 + }, + { + "epoch": 2.01, + "learning_rate": 0.0006008013737836291, + "loss": 0.5356, + "step": 47480 + }, + { + "epoch": 2.01, + "learning_rate": 0.0006007589730543366, + "loss": 0.5444, + "step": 47490 + }, + { + "epoch": 2.01, + "learning_rate": 0.000600716572325044, + "loss": 0.5465, + "step": 47500 + }, + { + "epoch": 2.01, + "learning_rate": 0.0006006741715957514, + "loss": 0.5447, + "step": 47510 + }, + { + "epoch": 2.01, + "learning_rate": 0.000600631770866459, + "loss": 0.5371, + "step": 47520 + }, + { + "epoch": 2.01, + "learning_rate": 0.0006005893701371664, + "loss": 0.5976, + "step": 47530 + }, + { + "epoch": 2.01, + "learning_rate": 0.0006005469694078738, + "loss": 0.7002, + "step": 47540 + }, + { + "epoch": 2.01, + "learning_rate": 0.0006005045686785813, + "loss": 0.5911, + "step": 47550 + }, + { + "epoch": 2.01, + "learning_rate": 0.0006004621679492888, + "loss": 0.523, + "step": 47560 + }, + { + "epoch": 2.01, + "learning_rate": 0.0006004197672199962, + "loss": 0.5444, + "step": 47570 + }, + { + "epoch": 2.01, + "learning_rate": 0.0006003773664907037, + "loss": 0.5343, + "step": 47580 + }, + { + "epoch": 2.01, + "learning_rate": 0.0006003349657614111, + "loss": 0.5129, + "step": 47590 + }, + { + "epoch": 2.01, + "learning_rate": 0.0006002925650321186, + "loss": 0.5073, + "step": 47600 + }, + { + "epoch": 2.01, + "learning_rate": 0.0006002501643028261, + "loss": 0.4659, + "step": 47610 + }, + { + "epoch": 2.01, + "learning_rate": 0.0006002077635735335, + "loss": 0.486, + "step": 47620 + }, + { + "epoch": 2.01, + "learning_rate": 0.0006001653628442409, + "loss": 0.5903, + "step": 47630 + }, + { + "epoch": 2.01, + "learning_rate": 0.0006001229621149484, + "loss": 0.5711, + "step": 47640 + }, + { + "epoch": 2.02, + "learning_rate": 0.0006000805613856559, + "loss": 0.5755, + "step": 47650 + }, + { + "epoch": 2.02, + "learning_rate": 0.0006000381606563633, + "loss": 0.5813, + "step": 47660 + }, + { + "epoch": 2.02, + "learning_rate": 0.0005999957599270707, + "loss": 0.5408, + "step": 47670 + }, + { + "epoch": 2.02, + "learning_rate": 0.0005999533591977783, + "loss": 0.5102, + "step": 47680 + }, + { + "epoch": 2.02, + "learning_rate": 0.0005999109584684857, + "loss": 0.4828, + "step": 47690 + }, + { + "epoch": 2.02, + "learning_rate": 0.0005998685577391931, + "loss": 0.6098, + "step": 47700 + }, + { + "epoch": 2.02, + "learning_rate": 0.0005998261570099006, + "loss": 0.4746, + "step": 47710 + }, + { + "epoch": 2.02, + "learning_rate": 0.0005997837562806081, + "loss": 0.5494, + "step": 47720 + }, + { + "epoch": 2.02, + "learning_rate": 0.0005997413555513155, + "loss": 0.4538, + "step": 47730 + }, + { + "epoch": 2.02, + "learning_rate": 0.000599698954822023, + "loss": 0.5321, + "step": 47740 + }, + { + "epoch": 2.02, + "learning_rate": 0.0005996565540927304, + "loss": 0.4866, + "step": 47750 + }, + { + "epoch": 2.02, + "learning_rate": 0.0005996141533634379, + "loss": 0.629, + "step": 47760 + }, + { + "epoch": 2.02, + "learning_rate": 0.0005995717526341454, + "loss": 0.5901, + "step": 47770 + }, + { + "epoch": 2.02, + "learning_rate": 0.0005995293519048528, + "loss": 0.529, + "step": 47780 + }, + { + "epoch": 2.02, + "learning_rate": 0.0005994869511755602, + "loss": 0.6794, + "step": 47790 + }, + { + "epoch": 2.02, + "learning_rate": 0.0005994445504462678, + "loss": 0.5362, + "step": 47800 + }, + { + "epoch": 2.02, + "learning_rate": 0.0005994021497169752, + "loss": 0.584, + "step": 47810 + }, + { + "epoch": 2.02, + "learning_rate": 0.0005993597489876826, + "loss": 0.6118, + "step": 47820 + }, + { + "epoch": 2.02, + "learning_rate": 0.00059931734825839, + "loss": 0.547, + "step": 47830 + }, + { + "epoch": 2.02, + "learning_rate": 0.0005992749475290975, + "loss": 0.5298, + "step": 47840 + }, + { + "epoch": 2.02, + "learning_rate": 0.000599232546799805, + "loss": 0.5472, + "step": 47850 + }, + { + "epoch": 2.02, + "learning_rate": 0.0005991901460705124, + "loss": 0.585, + "step": 47860 + }, + { + "epoch": 2.02, + "learning_rate": 0.0005991477453412199, + "loss": 0.5333, + "step": 47870 + }, + { + "epoch": 2.02, + "learning_rate": 0.0005991053446119274, + "loss": 0.541, + "step": 47880 + }, + { + "epoch": 2.03, + "learning_rate": 0.0005990629438826348, + "loss": 0.4582, + "step": 47890 + }, + { + "epoch": 2.03, + "learning_rate": 0.0005990205431533423, + "loss": 0.5284, + "step": 47900 + }, + { + "epoch": 2.03, + "learning_rate": 0.0005989781424240497, + "loss": 0.5652, + "step": 47910 + }, + { + "epoch": 2.03, + "learning_rate": 0.0005989357416947572, + "loss": 0.5085, + "step": 47920 + }, + { + "epoch": 2.03, + "learning_rate": 0.0005988933409654647, + "loss": 0.5001, + "step": 47930 + }, + { + "epoch": 2.03, + "learning_rate": 0.0005988509402361721, + "loss": 0.5362, + "step": 47940 + }, + { + "epoch": 2.03, + "learning_rate": 0.0005988085395068795, + "loss": 0.5168, + "step": 47950 + }, + { + "epoch": 2.03, + "learning_rate": 0.000598766138777587, + "loss": 0.5041, + "step": 47960 + }, + { + "epoch": 2.03, + "learning_rate": 0.0005987237380482945, + "loss": 0.4952, + "step": 47970 + }, + { + "epoch": 2.03, + "learning_rate": 0.0005986813373190019, + "loss": 0.5274, + "step": 47980 + }, + { + "epoch": 2.03, + "learning_rate": 0.0005986389365897093, + "loss": 0.5558, + "step": 47990 + }, + { + "epoch": 2.03, + "learning_rate": 0.0005985965358604169, + "loss": 0.5881, + "step": 48000 + }, + { + "epoch": 2.03, + "learning_rate": 0.0005985541351311243, + "loss": 0.6355, + "step": 48010 + }, + { + "epoch": 2.03, + "learning_rate": 0.0005985117344018317, + "loss": 0.5972, + "step": 48020 + }, + { + "epoch": 2.03, + "learning_rate": 0.0005984693336725392, + "loss": 0.5286, + "step": 48030 + }, + { + "epoch": 2.03, + "learning_rate": 0.0005984269329432466, + "loss": 0.5369, + "step": 48040 + }, + { + "epoch": 2.03, + "learning_rate": 0.0005983845322139541, + "loss": 0.588, + "step": 48050 + }, + { + "epoch": 2.03, + "learning_rate": 0.0005983421314846616, + "loss": 0.4542, + "step": 48060 + }, + { + "epoch": 2.03, + "learning_rate": 0.000598299730755369, + "loss": 0.5687, + "step": 48070 + }, + { + "epoch": 2.03, + "learning_rate": 0.0005982573300260765, + "loss": 0.6087, + "step": 48080 + }, + { + "epoch": 2.03, + "learning_rate": 0.000598214929296784, + "loss": 0.4903, + "step": 48090 + }, + { + "epoch": 2.03, + "learning_rate": 0.0005981725285674914, + "loss": 0.6037, + "step": 48100 + }, + { + "epoch": 2.03, + "learning_rate": 0.0005981301278381988, + "loss": 0.4358, + "step": 48110 + }, + { + "epoch": 2.03, + "learning_rate": 0.0005980877271089063, + "loss": 0.5041, + "step": 48120 + }, + { + "epoch": 2.04, + "learning_rate": 0.0005980453263796138, + "loss": 0.492, + "step": 48130 + }, + { + "epoch": 2.04, + "learning_rate": 0.0005980029256503212, + "loss": 0.5596, + "step": 48140 + }, + { + "epoch": 2.04, + "learning_rate": 0.0005979605249210287, + "loss": 0.5638, + "step": 48150 + }, + { + "epoch": 2.04, + "learning_rate": 0.0005979181241917361, + "loss": 0.5343, + "step": 48160 + }, + { + "epoch": 2.04, + "learning_rate": 0.0005978757234624436, + "loss": 0.5111, + "step": 48170 + }, + { + "epoch": 2.04, + "learning_rate": 0.0005978333227331511, + "loss": 0.5832, + "step": 48180 + }, + { + "epoch": 2.04, + "learning_rate": 0.0005977909220038585, + "loss": 0.5941, + "step": 48190 + }, + { + "epoch": 2.04, + "learning_rate": 0.0005977485212745659, + "loss": 0.5591, + "step": 48200 + }, + { + "epoch": 2.04, + "learning_rate": 0.0005977061205452735, + "loss": 0.5082, + "step": 48210 + }, + { + "epoch": 2.04, + "learning_rate": 0.0005976637198159809, + "loss": 0.6203, + "step": 48220 + }, + { + "epoch": 2.04, + "learning_rate": 0.0005976213190866883, + "loss": 0.5419, + "step": 48230 + }, + { + "epoch": 2.04, + "learning_rate": 0.0005975789183573957, + "loss": 0.6102, + "step": 48240 + }, + { + "epoch": 2.04, + "learning_rate": 0.0005975365176281033, + "loss": 0.5592, + "step": 48250 + }, + { + "epoch": 2.04, + "learning_rate": 0.0005974941168988107, + "loss": 0.5151, + "step": 48260 + }, + { + "epoch": 2.04, + "learning_rate": 0.0005974517161695181, + "loss": 0.5853, + "step": 48270 + }, + { + "epoch": 2.04, + "learning_rate": 0.0005974093154402255, + "loss": 0.5653, + "step": 48280 + }, + { + "epoch": 2.04, + "learning_rate": 0.0005973669147109331, + "loss": 0.4786, + "step": 48290 + }, + { + "epoch": 2.04, + "learning_rate": 0.0005973245139816405, + "loss": 0.5053, + "step": 48300 + }, + { + "epoch": 2.04, + "learning_rate": 0.0005972821132523479, + "loss": 0.539, + "step": 48310 + }, + { + "epoch": 2.04, + "learning_rate": 0.0005972397125230554, + "loss": 0.4795, + "step": 48320 + }, + { + "epoch": 2.04, + "learning_rate": 0.000597197311793763, + "loss": 0.535, + "step": 48330 + }, + { + "epoch": 2.04, + "learning_rate": 0.0005971549110644703, + "loss": 0.5899, + "step": 48340 + }, + { + "epoch": 2.04, + "learning_rate": 0.0005971125103351778, + "loss": 0.4316, + "step": 48350 + }, + { + "epoch": 2.05, + "learning_rate": 0.0005970701096058852, + "loss": 0.5189, + "step": 48360 + }, + { + "epoch": 2.05, + "learning_rate": 0.0005970277088765927, + "loss": 0.4826, + "step": 48370 + }, + { + "epoch": 2.05, + "learning_rate": 0.0005969853081473002, + "loss": 0.5916, + "step": 48380 + }, + { + "epoch": 2.05, + "learning_rate": 0.0005969429074180076, + "loss": 0.6229, + "step": 48390 + }, + { + "epoch": 2.05, + "learning_rate": 0.000596900506688715, + "loss": 0.479, + "step": 48400 + }, + { + "epoch": 2.05, + "learning_rate": 0.0005968581059594226, + "loss": 0.4506, + "step": 48410 + }, + { + "epoch": 2.05, + "learning_rate": 0.00059681570523013, + "loss": 0.5624, + "step": 48420 + }, + { + "epoch": 2.05, + "learning_rate": 0.0005967733045008374, + "loss": 0.5045, + "step": 48430 + }, + { + "epoch": 2.05, + "learning_rate": 0.0005967309037715449, + "loss": 0.5681, + "step": 48440 + }, + { + "epoch": 2.05, + "learning_rate": 0.0005966885030422524, + "loss": 0.4812, + "step": 48450 + }, + { + "epoch": 2.05, + "learning_rate": 0.0005966461023129598, + "loss": 0.5523, + "step": 48460 + }, + { + "epoch": 2.05, + "learning_rate": 0.0005966037015836673, + "loss": 0.5555, + "step": 48470 + }, + { + "epoch": 2.05, + "learning_rate": 0.0005965613008543747, + "loss": 0.6421, + "step": 48480 + }, + { + "epoch": 2.05, + "learning_rate": 0.0005965189001250822, + "loss": 0.5489, + "step": 48490 + }, + { + "epoch": 2.05, + "learning_rate": 0.0005964764993957897, + "loss": 0.5237, + "step": 48500 + }, + { + "epoch": 2.05, + "learning_rate": 0.0005964340986664971, + "loss": 0.5918, + "step": 48510 + }, + { + "epoch": 2.05, + "learning_rate": 0.0005963916979372045, + "loss": 0.5362, + "step": 48520 + }, + { + "epoch": 2.05, + "learning_rate": 0.0005963492972079121, + "loss": 0.6179, + "step": 48530 + }, + { + "epoch": 2.05, + "learning_rate": 0.0005963068964786195, + "loss": 0.5668, + "step": 48540 + }, + { + "epoch": 2.05, + "learning_rate": 0.0005962644957493269, + "loss": 0.5857, + "step": 48550 + }, + { + "epoch": 2.05, + "learning_rate": 0.0005962220950200344, + "loss": 0.5496, + "step": 48560 + }, + { + "epoch": 2.05, + "learning_rate": 0.0005961796942907418, + "loss": 0.603, + "step": 48570 + }, + { + "epoch": 2.05, + "learning_rate": 0.0005961372935614493, + "loss": 0.5505, + "step": 48580 + }, + { + "epoch": 2.05, + "learning_rate": 0.0005960948928321568, + "loss": 0.5916, + "step": 48590 + }, + { + "epoch": 2.06, + "learning_rate": 0.0005960524921028641, + "loss": 0.5834, + "step": 48600 + }, + { + "epoch": 2.06, + "learning_rate": 0.0005960100913735717, + "loss": 0.5816, + "step": 48610 + }, + { + "epoch": 2.06, + "learning_rate": 0.0005959676906442792, + "loss": 0.4856, + "step": 48620 + }, + { + "epoch": 2.06, + "learning_rate": 0.0005959252899149865, + "loss": 0.5361, + "step": 48630 + }, + { + "epoch": 2.06, + "learning_rate": 0.000595882889185694, + "loss": 0.5317, + "step": 48640 + }, + { + "epoch": 2.06, + "learning_rate": 0.0005958404884564015, + "loss": 0.5581, + "step": 48650 + }, + { + "epoch": 2.06, + "learning_rate": 0.0005957980877271089, + "loss": 0.4612, + "step": 48660 + }, + { + "epoch": 2.06, + "learning_rate": 0.0005957556869978164, + "loss": 0.5933, + "step": 48670 + }, + { + "epoch": 2.06, + "learning_rate": 0.0005957132862685239, + "loss": 0.5767, + "step": 48680 + }, + { + "epoch": 2.06, + "learning_rate": 0.0005956708855392313, + "loss": 0.5853, + "step": 48690 + }, + { + "epoch": 2.06, + "learning_rate": 0.0005956284848099388, + "loss": 0.5457, + "step": 48700 + }, + { + "epoch": 2.06, + "learning_rate": 0.0005955860840806463, + "loss": 0.5193, + "step": 48710 + }, + { + "epoch": 2.06, + "learning_rate": 0.0005955436833513536, + "loss": 0.4677, + "step": 48720 + }, + { + "epoch": 2.06, + "learning_rate": 0.0005955012826220611, + "loss": 0.5892, + "step": 48730 + }, + { + "epoch": 2.06, + "learning_rate": 0.0005954588818927687, + "loss": 0.5073, + "step": 48740 + }, + { + "epoch": 2.06, + "learning_rate": 0.000595416481163476, + "loss": 0.4742, + "step": 48750 + }, + { + "epoch": 2.06, + "learning_rate": 0.0005953740804341835, + "loss": 0.6194, + "step": 48760 + }, + { + "epoch": 2.06, + "learning_rate": 0.000595331679704891, + "loss": 0.5076, + "step": 48770 + }, + { + "epoch": 2.06, + "learning_rate": 0.0005952892789755984, + "loss": 0.5721, + "step": 48780 + }, + { + "epoch": 2.06, + "learning_rate": 0.0005952468782463059, + "loss": 0.5754, + "step": 48790 + }, + { + "epoch": 2.06, + "learning_rate": 0.0005952044775170133, + "loss": 0.4978, + "step": 48800 + }, + { + "epoch": 2.06, + "learning_rate": 0.0005951620767877207, + "loss": 0.5219, + "step": 48810 + }, + { + "epoch": 2.06, + "learning_rate": 0.0005951196760584283, + "loss": 0.5751, + "step": 48820 + }, + { + "epoch": 2.06, + "learning_rate": 0.0005950772753291357, + "loss": 0.5683, + "step": 48830 + }, + { + "epoch": 2.07, + "learning_rate": 0.0005950348745998431, + "loss": 0.5748, + "step": 48840 + }, + { + "epoch": 2.07, + "learning_rate": 0.0005949924738705506, + "loss": 0.5972, + "step": 48850 + }, + { + "epoch": 2.07, + "learning_rate": 0.0005949500731412581, + "loss": 0.5945, + "step": 48860 + }, + { + "epoch": 2.07, + "learning_rate": 0.0005949076724119655, + "loss": 0.4958, + "step": 48870 + }, + { + "epoch": 2.07, + "learning_rate": 0.000594865271682673, + "loss": 0.4413, + "step": 48880 + }, + { + "epoch": 2.07, + "learning_rate": 0.0005948228709533803, + "loss": 0.549, + "step": 48890 + }, + { + "epoch": 2.07, + "learning_rate": 0.0005947804702240879, + "loss": 0.4655, + "step": 48900 + }, + { + "epoch": 2.07, + "learning_rate": 0.0005947380694947954, + "loss": 0.5296, + "step": 48910 + }, + { + "epoch": 2.07, + "learning_rate": 0.0005946956687655027, + "loss": 0.6241, + "step": 48920 + }, + { + "epoch": 2.07, + "learning_rate": 0.0005946532680362102, + "loss": 0.6697, + "step": 48930 + }, + { + "epoch": 2.07, + "learning_rate": 0.0005946108673069178, + "loss": 0.5372, + "step": 48940 + }, + { + "epoch": 2.07, + "learning_rate": 0.0005945684665776251, + "loss": 0.4896, + "step": 48950 + }, + { + "epoch": 2.07, + "learning_rate": 0.0005945260658483326, + "loss": 0.4592, + "step": 48960 + }, + { + "epoch": 2.07, + "learning_rate": 0.0005944836651190401, + "loss": 0.6048, + "step": 48970 + }, + { + "epoch": 2.07, + "learning_rate": 0.0005944412643897475, + "loss": 0.5882, + "step": 48980 + }, + { + "epoch": 2.07, + "learning_rate": 0.000594398863660455, + "loss": 0.5275, + "step": 48990 + }, + { + "epoch": 2.07, + "learning_rate": 0.0005943564629311625, + "loss": 0.5742, + "step": 49000 + }, + { + "epoch": 2.07, + "learning_rate": 0.0005943140622018698, + "loss": 0.4468, + "step": 49010 + }, + { + "epoch": 2.07, + "learning_rate": 0.0005942716614725774, + "loss": 0.4737, + "step": 49020 + }, + { + "epoch": 2.07, + "learning_rate": 0.0005942292607432849, + "loss": 0.4391, + "step": 49030 + }, + { + "epoch": 2.07, + "learning_rate": 0.0005941868600139922, + "loss": 0.5189, + "step": 49040 + }, + { + "epoch": 2.07, + "learning_rate": 0.0005941444592846997, + "loss": 0.5509, + "step": 49050 + }, + { + "epoch": 2.07, + "learning_rate": 0.0005941020585554073, + "loss": 0.5452, + "step": 49060 + }, + { + "epoch": 2.08, + "learning_rate": 0.0005940596578261146, + "loss": 0.6074, + "step": 49070 + }, + { + "epoch": 2.08, + "learning_rate": 0.0005940172570968221, + "loss": 0.5489, + "step": 49080 + }, + { + "epoch": 2.08, + "learning_rate": 0.0005939748563675296, + "loss": 0.6784, + "step": 49090 + }, + { + "epoch": 2.08, + "learning_rate": 0.000593932455638237, + "loss": 0.5531, + "step": 49100 + }, + { + "epoch": 2.08, + "learning_rate": 0.0005938900549089445, + "loss": 0.5241, + "step": 49110 + }, + { + "epoch": 2.08, + "learning_rate": 0.000593847654179652, + "loss": 0.5231, + "step": 49120 + }, + { + "epoch": 2.08, + "learning_rate": 0.0005938052534503593, + "loss": 0.4759, + "step": 49130 + }, + { + "epoch": 2.08, + "learning_rate": 0.0005937628527210669, + "loss": 0.518, + "step": 49140 + }, + { + "epoch": 2.08, + "learning_rate": 0.0005937204519917744, + "loss": 0.5035, + "step": 49150 + }, + { + "epoch": 2.08, + "learning_rate": 0.0005936780512624817, + "loss": 0.549, + "step": 49160 + }, + { + "epoch": 2.08, + "learning_rate": 0.0005936356505331892, + "loss": 0.4949, + "step": 49170 + }, + { + "epoch": 2.08, + "learning_rate": 0.0005935932498038967, + "loss": 0.5347, + "step": 49180 + }, + { + "epoch": 2.08, + "learning_rate": 0.0005935508490746041, + "loss": 0.5604, + "step": 49190 + }, + { + "epoch": 2.08, + "learning_rate": 0.0005935084483453116, + "loss": 0.5155, + "step": 49200 + }, + { + "epoch": 2.08, + "learning_rate": 0.0005934660476160189, + "loss": 0.4977, + "step": 49210 + }, + { + "epoch": 2.08, + "learning_rate": 0.0005934236468867265, + "loss": 0.5182, + "step": 49220 + }, + { + "epoch": 2.08, + "learning_rate": 0.000593381246157434, + "loss": 0.4931, + "step": 49230 + }, + { + "epoch": 2.08, + "learning_rate": 0.0005933388454281413, + "loss": 0.4678, + "step": 49240 + }, + { + "epoch": 2.08, + "learning_rate": 0.0005932964446988488, + "loss": 0.5558, + "step": 49250 + }, + { + "epoch": 2.08, + "learning_rate": 0.0005932540439695563, + "loss": 0.4478, + "step": 49260 + }, + { + "epoch": 2.08, + "learning_rate": 0.0005932116432402637, + "loss": 0.4686, + "step": 49270 + }, + { + "epoch": 2.08, + "learning_rate": 0.0005931692425109712, + "loss": 0.5382, + "step": 49280 + }, + { + "epoch": 2.08, + "learning_rate": 0.0005931268417816787, + "loss": 0.5689, + "step": 49290 + }, + { + "epoch": 2.08, + "learning_rate": 0.0005930844410523861, + "loss": 0.5655, + "step": 49300 + }, + { + "epoch": 2.09, + "learning_rate": 0.0005930420403230936, + "loss": 0.5342, + "step": 49310 + }, + { + "epoch": 2.09, + "learning_rate": 0.0005929996395938011, + "loss": 0.4755, + "step": 49320 + }, + { + "epoch": 2.09, + "learning_rate": 0.0005929572388645084, + "loss": 0.5873, + "step": 49330 + }, + { + "epoch": 2.09, + "learning_rate": 0.0005929148381352159, + "loss": 0.5306, + "step": 49340 + }, + { + "epoch": 2.09, + "learning_rate": 0.0005928724374059235, + "loss": 0.4782, + "step": 49350 + }, + { + "epoch": 2.09, + "learning_rate": 0.0005928300366766308, + "loss": 0.5349, + "step": 49360 + }, + { + "epoch": 2.09, + "learning_rate": 0.0005927876359473383, + "loss": 0.5029, + "step": 49370 + }, + { + "epoch": 2.09, + "learning_rate": 0.0005927452352180458, + "loss": 0.5368, + "step": 49380 + }, + { + "epoch": 2.09, + "learning_rate": 0.0005927028344887532, + "loss": 0.5253, + "step": 49390 + }, + { + "epoch": 2.09, + "learning_rate": 0.0005926604337594607, + "loss": 0.5828, + "step": 49400 + }, + { + "epoch": 2.09, + "learning_rate": 0.0005926180330301682, + "loss": 0.6064, + "step": 49410 + }, + { + "epoch": 2.09, + "learning_rate": 0.0005925756323008756, + "loss": 0.4694, + "step": 49420 + }, + { + "epoch": 2.09, + "learning_rate": 0.0005925332315715831, + "loss": 0.565, + "step": 49430 + }, + { + "epoch": 2.09, + "learning_rate": 0.0005924908308422906, + "loss": 0.5276, + "step": 49440 + }, + { + "epoch": 2.09, + "learning_rate": 0.0005924484301129979, + "loss": 0.5381, + "step": 49450 + }, + { + "epoch": 2.09, + "learning_rate": 0.0005924060293837054, + "loss": 0.6455, + "step": 49460 + }, + { + "epoch": 2.09, + "learning_rate": 0.000592363628654413, + "loss": 0.507, + "step": 49470 + }, + { + "epoch": 2.09, + "learning_rate": 0.0005923212279251203, + "loss": 0.5315, + "step": 49480 + }, + { + "epoch": 2.09, + "learning_rate": 0.0005922788271958278, + "loss": 0.5264, + "step": 49490 + }, + { + "epoch": 2.09, + "learning_rate": 0.0005922364264665353, + "loss": 0.5586, + "step": 49500 + }, + { + "epoch": 2.09, + "learning_rate": 0.0005921940257372427, + "loss": 0.5342, + "step": 49510 + }, + { + "epoch": 2.09, + "learning_rate": 0.0005921516250079502, + "loss": 0.5159, + "step": 49520 + }, + { + "epoch": 2.09, + "learning_rate": 0.0005921092242786576, + "loss": 0.6701, + "step": 49530 + }, + { + "epoch": 2.09, + "learning_rate": 0.000592066823549365, + "loss": 0.6391, + "step": 49540 + }, + { + "epoch": 2.1, + "learning_rate": 0.0005920244228200726, + "loss": 0.5247, + "step": 49550 + }, + { + "epoch": 2.1, + "learning_rate": 0.00059198202209078, + "loss": 0.5875, + "step": 49560 + }, + { + "epoch": 2.1, + "learning_rate": 0.0005919396213614874, + "loss": 0.618, + "step": 49570 + }, + { + "epoch": 2.1, + "learning_rate": 0.0005918972206321949, + "loss": 0.6312, + "step": 49580 + }, + { + "epoch": 2.1, + "learning_rate": 0.0005918548199029024, + "loss": 0.5089, + "step": 49590 + }, + { + "epoch": 2.1, + "learning_rate": 0.0005918124191736098, + "loss": 0.632, + "step": 49600 + }, + { + "epoch": 2.1, + "learning_rate": 0.0005917700184443173, + "loss": 0.4787, + "step": 49610 + }, + { + "epoch": 2.1, + "learning_rate": 0.0005917276177150246, + "loss": 0.4687, + "step": 49620 + }, + { + "epoch": 2.1, + "learning_rate": 0.0005916852169857322, + "loss": 0.5677, + "step": 49630 + }, + { + "epoch": 2.1, + "learning_rate": 0.0005916428162564397, + "loss": 0.556, + "step": 49640 + }, + { + "epoch": 2.1, + "learning_rate": 0.000591600415527147, + "loss": 0.5876, + "step": 49650 + }, + { + "epoch": 2.1, + "learning_rate": 0.0005915580147978545, + "loss": 0.4774, + "step": 49660 + }, + { + "epoch": 2.1, + "learning_rate": 0.0005915156140685621, + "loss": 0.576, + "step": 49670 + }, + { + "epoch": 2.1, + "learning_rate": 0.0005914732133392694, + "loss": 0.5923, + "step": 49680 + }, + { + "epoch": 2.1, + "learning_rate": 0.0005914308126099769, + "loss": 0.4974, + "step": 49690 + }, + { + "epoch": 2.1, + "learning_rate": 0.0005913884118806844, + "loss": 0.5085, + "step": 49700 + }, + { + "epoch": 2.1, + "learning_rate": 0.0005913460111513918, + "loss": 0.5723, + "step": 49710 + }, + { + "epoch": 2.1, + "learning_rate": 0.0005913036104220993, + "loss": 0.4923, + "step": 49720 + }, + { + "epoch": 2.1, + "learning_rate": 0.0005912612096928068, + "loss": 0.5908, + "step": 49730 + }, + { + "epoch": 2.1, + "learning_rate": 0.0005912188089635141, + "loss": 0.5213, + "step": 49740 + }, + { + "epoch": 2.1, + "learning_rate": 0.0005911764082342217, + "loss": 0.5104, + "step": 49750 + }, + { + "epoch": 2.1, + "learning_rate": 0.0005911340075049292, + "loss": 0.4723, + "step": 49760 + }, + { + "epoch": 2.1, + "learning_rate": 0.0005910916067756365, + "loss": 0.4554, + "step": 49770 + }, + { + "epoch": 2.11, + "learning_rate": 0.000591049206046344, + "loss": 0.6073, + "step": 49780 + }, + { + "epoch": 2.11, + "learning_rate": 0.0005910068053170515, + "loss": 0.5642, + "step": 49790 + }, + { + "epoch": 2.11, + "learning_rate": 0.0005909644045877589, + "loss": 0.5887, + "step": 49800 + }, + { + "epoch": 2.11, + "learning_rate": 0.0005909220038584664, + "loss": 0.5477, + "step": 49810 + }, + { + "epoch": 2.11, + "learning_rate": 0.0005908796031291738, + "loss": 0.5425, + "step": 49820 + }, + { + "epoch": 2.11, + "learning_rate": 0.0005908372023998813, + "loss": 0.615, + "step": 49830 + }, + { + "epoch": 2.11, + "learning_rate": 0.0005907948016705888, + "loss": 0.6014, + "step": 49840 + }, + { + "epoch": 2.11, + "learning_rate": 0.0005907524009412962, + "loss": 0.419, + "step": 49850 + }, + { + "epoch": 2.11, + "learning_rate": 0.0005907100002120036, + "loss": 0.5163, + "step": 49860 + }, + { + "epoch": 2.11, + "learning_rate": 0.0005906675994827111, + "loss": 0.5545, + "step": 49870 + }, + { + "epoch": 2.11, + "learning_rate": 0.0005906251987534186, + "loss": 0.547, + "step": 49880 + }, + { + "epoch": 2.11, + "learning_rate": 0.000590582798024126, + "loss": 0.5646, + "step": 49890 + }, + { + "epoch": 2.11, + "learning_rate": 0.0005905403972948335, + "loss": 0.5943, + "step": 49900 + }, + { + "epoch": 2.11, + "learning_rate": 0.000590497996565541, + "loss": 0.5962, + "step": 49910 + }, + { + "epoch": 2.11, + "learning_rate": 0.0005904555958362484, + "loss": 0.5951, + "step": 49920 + }, + { + "epoch": 2.11, + "learning_rate": 0.0005904131951069559, + "loss": 0.625, + "step": 49930 + }, + { + "epoch": 2.11, + "learning_rate": 0.0005903707943776633, + "loss": 0.5408, + "step": 49940 + }, + { + "epoch": 2.11, + "learning_rate": 0.0005903283936483708, + "loss": 0.5986, + "step": 49950 + }, + { + "epoch": 2.11, + "learning_rate": 0.0005902859929190783, + "loss": 0.5495, + "step": 49960 + }, + { + "epoch": 2.11, + "learning_rate": 0.0005902435921897857, + "loss": 0.5875, + "step": 49970 + }, + { + "epoch": 2.11, + "learning_rate": 0.0005902011914604931, + "loss": 0.5626, + "step": 49980 + }, + { + "epoch": 2.11, + "learning_rate": 0.0005901587907312006, + "loss": 0.4972, + "step": 49990 + }, + { + "epoch": 2.11, + "learning_rate": 0.0005901163900019081, + "loss": 0.5752, + "step": 50000 + }, + { + "epoch": 2.11, + "eval_loss": 0.6209712624549866, + "eval_runtime": 337.6425, + "eval_samples_per_second": 15.564, + "eval_steps_per_second": 3.892, + "step": 50000 + }, + { + "epoch": 2.11, + "learning_rate": 0.0005900739892726155, + "loss": 0.6667, + "step": 50010 + }, + { + "epoch": 2.12, + "learning_rate": 0.000590031588543323, + "loss": 0.4868, + "step": 50020 + }, + { + "epoch": 2.12, + "learning_rate": 0.0005899891878140305, + "loss": 0.4711, + "step": 50030 + }, + { + "epoch": 2.12, + "learning_rate": 0.0005899467870847379, + "loss": 0.7268, + "step": 50040 + }, + { + "epoch": 2.12, + "learning_rate": 0.0005899043863554454, + "loss": 0.556, + "step": 50050 + }, + { + "epoch": 2.12, + "learning_rate": 0.0005898619856261528, + "loss": 0.6284, + "step": 50060 + }, + { + "epoch": 2.12, + "learning_rate": 0.0005898195848968602, + "loss": 0.6613, + "step": 50070 + }, + { + "epoch": 2.12, + "learning_rate": 0.0005897771841675678, + "loss": 0.5903, + "step": 50080 + }, + { + "epoch": 2.12, + "learning_rate": 0.0005897347834382752, + "loss": 0.5359, + "step": 50090 + }, + { + "epoch": 2.12, + "learning_rate": 0.0005896923827089826, + "loss": 0.5745, + "step": 50100 + }, + { + "epoch": 2.12, + "learning_rate": 0.0005896499819796901, + "loss": 0.5602, + "step": 50110 + }, + { + "epoch": 2.12, + "learning_rate": 0.0005896075812503976, + "loss": 0.5169, + "step": 50120 + }, + { + "epoch": 2.12, + "learning_rate": 0.000589565180521105, + "loss": 0.5398, + "step": 50130 + }, + { + "epoch": 2.12, + "learning_rate": 0.0005895227797918124, + "loss": 0.4734, + "step": 50140 + }, + { + "epoch": 2.12, + "learning_rate": 0.0005894803790625198, + "loss": 0.52, + "step": 50150 + }, + { + "epoch": 2.12, + "learning_rate": 0.0005894379783332274, + "loss": 0.566, + "step": 50160 + }, + { + "epoch": 2.12, + "learning_rate": 0.0005893955776039348, + "loss": 0.5178, + "step": 50170 + }, + { + "epoch": 2.12, + "learning_rate": 0.0005893531768746422, + "loss": 0.5283, + "step": 50180 + }, + { + "epoch": 2.12, + "learning_rate": 0.0005893107761453497, + "loss": 0.556, + "step": 50190 + }, + { + "epoch": 2.12, + "learning_rate": 0.0005892683754160572, + "loss": 0.6206, + "step": 50200 + }, + { + "epoch": 2.12, + "learning_rate": 0.0005892259746867646, + "loss": 0.5014, + "step": 50210 + }, + { + "epoch": 2.12, + "learning_rate": 0.0005891835739574721, + "loss": 0.5912, + "step": 50220 + }, + { + "epoch": 2.12, + "learning_rate": 0.0005891411732281795, + "loss": 0.5519, + "step": 50230 + }, + { + "epoch": 2.12, + "learning_rate": 0.000589098772498887, + "loss": 0.6274, + "step": 50240 + }, + { + "epoch": 2.13, + "learning_rate": 0.0005890563717695945, + "loss": 0.5774, + "step": 50250 + }, + { + "epoch": 2.13, + "learning_rate": 0.0005890139710403019, + "loss": 0.5574, + "step": 50260 + }, + { + "epoch": 2.13, + "learning_rate": 0.0005889715703110093, + "loss": 0.5891, + "step": 50270 + }, + { + "epoch": 2.13, + "learning_rate": 0.0005889291695817169, + "loss": 0.5965, + "step": 50280 + }, + { + "epoch": 2.13, + "learning_rate": 0.0005888867688524243, + "loss": 0.5582, + "step": 50290 + }, + { + "epoch": 2.13, + "learning_rate": 0.0005888443681231317, + "loss": 0.609, + "step": 50300 + }, + { + "epoch": 2.13, + "learning_rate": 0.0005888019673938392, + "loss": 0.5217, + "step": 50310 + }, + { + "epoch": 2.13, + "learning_rate": 0.0005887595666645467, + "loss": 0.6075, + "step": 50320 + }, + { + "epoch": 2.13, + "learning_rate": 0.0005887171659352541, + "loss": 0.5001, + "step": 50330 + }, + { + "epoch": 2.13, + "learning_rate": 0.0005886747652059616, + "loss": 0.5654, + "step": 50340 + }, + { + "epoch": 2.13, + "learning_rate": 0.000588632364476669, + "loss": 0.5833, + "step": 50350 + }, + { + "epoch": 2.13, + "learning_rate": 0.0005885899637473765, + "loss": 0.5751, + "step": 50360 + }, + { + "epoch": 2.13, + "learning_rate": 0.000588547563018084, + "loss": 0.4976, + "step": 50370 + }, + { + "epoch": 2.13, + "learning_rate": 0.0005885051622887914, + "loss": 0.5851, + "step": 50380 + }, + { + "epoch": 2.13, + "learning_rate": 0.0005884627615594988, + "loss": 0.5588, + "step": 50390 + }, + { + "epoch": 2.13, + "learning_rate": 0.0005884203608302064, + "loss": 0.5136, + "step": 50400 + }, + { + "epoch": 2.13, + "learning_rate": 0.0005883779601009138, + "loss": 0.4693, + "step": 50410 + }, + { + "epoch": 2.13, + "learning_rate": 0.0005883355593716212, + "loss": 0.546, + "step": 50420 + }, + { + "epoch": 2.13, + "learning_rate": 0.0005882931586423286, + "loss": 0.5373, + "step": 50430 + }, + { + "epoch": 2.13, + "learning_rate": 0.0005882507579130362, + "loss": 0.5914, + "step": 50440 + }, + { + "epoch": 2.13, + "learning_rate": 0.0005882083571837436, + "loss": 0.5186, + "step": 50450 + }, + { + "epoch": 2.13, + "learning_rate": 0.000588165956454451, + "loss": 0.5005, + "step": 50460 + }, + { + "epoch": 2.13, + "learning_rate": 0.0005881235557251585, + "loss": 0.4518, + "step": 50470 + }, + { + "epoch": 2.13, + "learning_rate": 0.000588081154995866, + "loss": 0.5228, + "step": 50480 + }, + { + "epoch": 2.14, + "learning_rate": 0.0005880387542665734, + "loss": 0.6138, + "step": 50490 + }, + { + "epoch": 2.14, + "learning_rate": 0.0005879963535372809, + "loss": 0.5852, + "step": 50500 + }, + { + "epoch": 2.14, + "learning_rate": 0.0005879539528079883, + "loss": 0.505, + "step": 50510 + }, + { + "epoch": 2.14, + "learning_rate": 0.0005879115520786958, + "loss": 0.5563, + "step": 50520 + }, + { + "epoch": 2.14, + "learning_rate": 0.0005878691513494033, + "loss": 0.6413, + "step": 50530 + }, + { + "epoch": 2.14, + "learning_rate": 0.0005878267506201107, + "loss": 0.5169, + "step": 50540 + }, + { + "epoch": 2.14, + "learning_rate": 0.0005877843498908181, + "loss": 0.6066, + "step": 50550 + }, + { + "epoch": 2.14, + "learning_rate": 0.0005877419491615257, + "loss": 0.6438, + "step": 50560 + }, + { + "epoch": 2.14, + "learning_rate": 0.0005876995484322331, + "loss": 0.6042, + "step": 50570 + }, + { + "epoch": 2.14, + "learning_rate": 0.0005876571477029405, + "loss": 0.6053, + "step": 50580 + }, + { + "epoch": 2.14, + "learning_rate": 0.000587614746973648, + "loss": 0.5252, + "step": 50590 + }, + { + "epoch": 2.14, + "learning_rate": 0.0005875723462443554, + "loss": 0.4536, + "step": 50600 + }, + { + "epoch": 2.14, + "learning_rate": 0.0005875299455150629, + "loss": 0.6157, + "step": 50610 + }, + { + "epoch": 2.14, + "learning_rate": 0.0005874875447857704, + "loss": 0.5278, + "step": 50620 + }, + { + "epoch": 2.14, + "learning_rate": 0.0005874451440564778, + "loss": 0.6014, + "step": 50630 + }, + { + "epoch": 2.14, + "learning_rate": 0.0005874027433271853, + "loss": 0.4818, + "step": 50640 + }, + { + "epoch": 2.14, + "learning_rate": 0.0005873603425978928, + "loss": 0.5893, + "step": 50650 + }, + { + "epoch": 2.14, + "learning_rate": 0.0005873179418686002, + "loss": 0.5342, + "step": 50660 + }, + { + "epoch": 2.14, + "learning_rate": 0.0005872755411393076, + "loss": 0.4604, + "step": 50670 + }, + { + "epoch": 2.14, + "learning_rate": 0.000587233140410015, + "loss": 0.5309, + "step": 50680 + }, + { + "epoch": 2.14, + "learning_rate": 0.0005871907396807226, + "loss": 0.5289, + "step": 50690 + }, + { + "epoch": 2.14, + "learning_rate": 0.00058714833895143, + "loss": 0.5739, + "step": 50700 + }, + { + "epoch": 2.14, + "learning_rate": 0.0005871059382221374, + "loss": 0.5249, + "step": 50710 + }, + { + "epoch": 2.14, + "learning_rate": 0.0005870635374928449, + "loss": 0.5144, + "step": 50720 + }, + { + "epoch": 2.15, + "learning_rate": 0.0005870211367635524, + "loss": 0.6828, + "step": 50730 + }, + { + "epoch": 2.15, + "learning_rate": 0.0005869787360342598, + "loss": 0.6959, + "step": 50740 + }, + { + "epoch": 2.15, + "learning_rate": 0.0005869363353049672, + "loss": 0.5785, + "step": 50750 + }, + { + "epoch": 2.15, + "learning_rate": 0.0005868939345756747, + "loss": 0.5282, + "step": 50760 + }, + { + "epoch": 2.15, + "learning_rate": 0.0005868515338463822, + "loss": 0.65, + "step": 50770 + }, + { + "epoch": 2.15, + "learning_rate": 0.0005868091331170896, + "loss": 0.4919, + "step": 50780 + }, + { + "epoch": 2.15, + "learning_rate": 0.0005867667323877971, + "loss": 0.5421, + "step": 50790 + }, + { + "epoch": 2.15, + "learning_rate": 0.0005867243316585045, + "loss": 0.6123, + "step": 50800 + }, + { + "epoch": 2.15, + "learning_rate": 0.000586681930929212, + "loss": 0.4924, + "step": 50810 + }, + { + "epoch": 2.15, + "learning_rate": 0.0005866395301999195, + "loss": 0.5152, + "step": 50820 + }, + { + "epoch": 2.15, + "learning_rate": 0.0005865971294706269, + "loss": 0.5472, + "step": 50830 + }, + { + "epoch": 2.15, + "learning_rate": 0.0005865547287413343, + "loss": 0.5255, + "step": 50840 + }, + { + "epoch": 2.15, + "learning_rate": 0.0005865123280120419, + "loss": 0.4997, + "step": 50850 + }, + { + "epoch": 2.15, + "learning_rate": 0.0005864699272827493, + "loss": 0.5561, + "step": 50860 + }, + { + "epoch": 2.15, + "learning_rate": 0.0005864275265534567, + "loss": 0.5544, + "step": 50870 + }, + { + "epoch": 2.15, + "learning_rate": 0.0005863851258241642, + "loss": 0.5322, + "step": 50880 + }, + { + "epoch": 2.15, + "learning_rate": 0.0005863427250948717, + "loss": 0.5739, + "step": 50890 + }, + { + "epoch": 2.15, + "learning_rate": 0.0005863003243655791, + "loss": 0.5599, + "step": 50900 + }, + { + "epoch": 2.15, + "learning_rate": 0.0005862579236362866, + "loss": 0.5736, + "step": 50910 + }, + { + "epoch": 2.15, + "learning_rate": 0.000586215522906994, + "loss": 0.5404, + "step": 50920 + }, + { + "epoch": 2.15, + "learning_rate": 0.0005861731221777015, + "loss": 0.471, + "step": 50930 + }, + { + "epoch": 2.15, + "learning_rate": 0.000586130721448409, + "loss": 0.433, + "step": 50940 + }, + { + "epoch": 2.15, + "learning_rate": 0.0005860883207191164, + "loss": 0.7258, + "step": 50950 + }, + { + "epoch": 2.16, + "learning_rate": 0.0005860459199898238, + "loss": 0.601, + "step": 50960 + }, + { + "epoch": 2.16, + "learning_rate": 0.0005860035192605314, + "loss": 0.6164, + "step": 50970 + }, + { + "epoch": 2.16, + "learning_rate": 0.0005859611185312388, + "loss": 0.4885, + "step": 50980 + }, + { + "epoch": 2.16, + "learning_rate": 0.0005859187178019462, + "loss": 0.6148, + "step": 50990 + }, + { + "epoch": 2.16, + "learning_rate": 0.0005858763170726537, + "loss": 0.5312, + "step": 51000 + }, + { + "epoch": 2.16, + "learning_rate": 0.0005858339163433612, + "loss": 0.5341, + "step": 51010 + }, + { + "epoch": 2.16, + "learning_rate": 0.0005857915156140686, + "loss": 0.5922, + "step": 51020 + }, + { + "epoch": 2.16, + "learning_rate": 0.0005857491148847761, + "loss": 0.562, + "step": 51030 + }, + { + "epoch": 2.16, + "learning_rate": 0.0005857067141554834, + "loss": 0.4545, + "step": 51040 + }, + { + "epoch": 2.16, + "learning_rate": 0.000585664313426191, + "loss": 0.6225, + "step": 51050 + }, + { + "epoch": 2.16, + "learning_rate": 0.0005856219126968985, + "loss": 0.5042, + "step": 51060 + }, + { + "epoch": 2.16, + "learning_rate": 0.0005855795119676058, + "loss": 0.5098, + "step": 51070 + }, + { + "epoch": 2.16, + "learning_rate": 0.0005855371112383133, + "loss": 0.5776, + "step": 51080 + }, + { + "epoch": 2.16, + "learning_rate": 0.0005854947105090209, + "loss": 0.5457, + "step": 51090 + }, + { + "epoch": 2.16, + "learning_rate": 0.0005854523097797282, + "loss": 0.4899, + "step": 51100 + }, + { + "epoch": 2.16, + "learning_rate": 0.0005854099090504357, + "loss": 0.4404, + "step": 51110 + }, + { + "epoch": 2.16, + "learning_rate": 0.0005853675083211431, + "loss": 0.5087, + "step": 51120 + }, + { + "epoch": 2.16, + "learning_rate": 0.0005853251075918506, + "loss": 0.5631, + "step": 51130 + }, + { + "epoch": 2.16, + "learning_rate": 0.0005852827068625581, + "loss": 0.559, + "step": 51140 + }, + { + "epoch": 2.16, + "learning_rate": 0.0005852403061332655, + "loss": 0.5756, + "step": 51150 + }, + { + "epoch": 2.16, + "learning_rate": 0.0005851979054039729, + "loss": 0.5653, + "step": 51160 + }, + { + "epoch": 2.16, + "learning_rate": 0.0005851555046746805, + "loss": 0.5888, + "step": 51170 + }, + { + "epoch": 2.16, + "learning_rate": 0.000585113103945388, + "loss": 0.5178, + "step": 51180 + }, + { + "epoch": 2.16, + "learning_rate": 0.0005850707032160953, + "loss": 0.4952, + "step": 51190 + }, + { + "epoch": 2.17, + "learning_rate": 0.0005850283024868028, + "loss": 0.4933, + "step": 51200 + }, + { + "epoch": 2.17, + "learning_rate": 0.0005849859017575102, + "loss": 0.5579, + "step": 51210 + }, + { + "epoch": 2.17, + "learning_rate": 0.0005849435010282177, + "loss": 0.5654, + "step": 51220 + }, + { + "epoch": 2.17, + "learning_rate": 0.0005849011002989252, + "loss": 0.5135, + "step": 51230 + }, + { + "epoch": 2.17, + "learning_rate": 0.0005848586995696326, + "loss": 0.5242, + "step": 51240 + }, + { + "epoch": 2.17, + "learning_rate": 0.0005848162988403401, + "loss": 0.4766, + "step": 51250 + }, + { + "epoch": 2.17, + "learning_rate": 0.0005847738981110476, + "loss": 0.4537, + "step": 51260 + }, + { + "epoch": 2.17, + "learning_rate": 0.000584731497381755, + "loss": 0.5505, + "step": 51270 + }, + { + "epoch": 2.17, + "learning_rate": 0.0005846890966524624, + "loss": 0.5223, + "step": 51280 + }, + { + "epoch": 2.17, + "learning_rate": 0.0005846466959231699, + "loss": 0.4849, + "step": 51290 + }, + { + "epoch": 2.17, + "learning_rate": 0.0005846042951938774, + "loss": 0.4304, + "step": 51300 + }, + { + "epoch": 2.17, + "learning_rate": 0.0005845618944645848, + "loss": 0.5402, + "step": 51310 + }, + { + "epoch": 2.17, + "learning_rate": 0.0005845194937352923, + "loss": 0.5595, + "step": 51320 + }, + { + "epoch": 2.17, + "learning_rate": 0.0005844770930059997, + "loss": 0.6415, + "step": 51330 + }, + { + "epoch": 2.17, + "learning_rate": 0.0005844346922767072, + "loss": 0.6371, + "step": 51340 + }, + { + "epoch": 2.17, + "learning_rate": 0.0005843922915474147, + "loss": 0.6055, + "step": 51350 + }, + { + "epoch": 2.17, + "learning_rate": 0.000584349890818122, + "loss": 0.5656, + "step": 51360 + }, + { + "epoch": 2.17, + "learning_rate": 0.0005843074900888295, + "loss": 0.4312, + "step": 51370 + }, + { + "epoch": 2.17, + "learning_rate": 0.0005842650893595371, + "loss": 0.6341, + "step": 51380 + }, + { + "epoch": 2.17, + "learning_rate": 0.0005842226886302444, + "loss": 0.5313, + "step": 51390 + }, + { + "epoch": 2.17, + "learning_rate": 0.0005841802879009519, + "loss": 0.6417, + "step": 51400 + }, + { + "epoch": 2.17, + "learning_rate": 0.0005841378871716594, + "loss": 0.4814, + "step": 51410 + }, + { + "epoch": 2.17, + "learning_rate": 0.0005840954864423668, + "loss": 0.4796, + "step": 51420 + }, + { + "epoch": 2.17, + "learning_rate": 0.0005840530857130743, + "loss": 0.6121, + "step": 51430 + }, + { + "epoch": 2.18, + "learning_rate": 0.0005840106849837818, + "loss": 0.6449, + "step": 51440 + }, + { + "epoch": 2.18, + "learning_rate": 0.0005839682842544892, + "loss": 0.4985, + "step": 51450 + }, + { + "epoch": 2.18, + "learning_rate": 0.0005839258835251967, + "loss": 0.68, + "step": 51460 + }, + { + "epoch": 2.18, + "learning_rate": 0.0005838834827959042, + "loss": 0.5731, + "step": 51470 + }, + { + "epoch": 2.18, + "learning_rate": 0.0005838410820666115, + "loss": 0.5508, + "step": 51480 + }, + { + "epoch": 2.18, + "learning_rate": 0.000583798681337319, + "loss": 0.5573, + "step": 51490 + }, + { + "epoch": 2.18, + "learning_rate": 0.0005837562806080266, + "loss": 0.4559, + "step": 51500 + }, + { + "epoch": 2.18, + "learning_rate": 0.0005837138798787339, + "loss": 0.5225, + "step": 51510 + }, + { + "epoch": 2.18, + "learning_rate": 0.0005836714791494414, + "loss": 0.5508, + "step": 51520 + }, + { + "epoch": 2.18, + "learning_rate": 0.0005836290784201489, + "loss": 0.5588, + "step": 51530 + }, + { + "epoch": 2.18, + "learning_rate": 0.0005835866776908563, + "loss": 0.5248, + "step": 51540 + }, + { + "epoch": 2.18, + "learning_rate": 0.0005835442769615638, + "loss": 0.5573, + "step": 51550 + }, + { + "epoch": 2.18, + "learning_rate": 0.0005835018762322713, + "loss": 0.5023, + "step": 51560 + }, + { + "epoch": 2.18, + "learning_rate": 0.0005834594755029786, + "loss": 0.5796, + "step": 51570 + }, + { + "epoch": 2.18, + "learning_rate": 0.0005834170747736862, + "loss": 0.5753, + "step": 51580 + }, + { + "epoch": 2.18, + "learning_rate": 0.0005833746740443937, + "loss": 0.5658, + "step": 51590 + }, + { + "epoch": 2.18, + "learning_rate": 0.000583332273315101, + "loss": 0.5473, + "step": 51600 + }, + { + "epoch": 2.18, + "learning_rate": 0.0005832898725858085, + "loss": 0.6368, + "step": 51610 + }, + { + "epoch": 2.18, + "learning_rate": 0.000583247471856516, + "loss": 0.5308, + "step": 51620 + }, + { + "epoch": 2.18, + "learning_rate": 0.0005832050711272234, + "loss": 0.4662, + "step": 51630 + }, + { + "epoch": 2.18, + "learning_rate": 0.0005831626703979309, + "loss": 0.4769, + "step": 51640 + }, + { + "epoch": 2.18, + "learning_rate": 0.0005831202696686382, + "loss": 0.5762, + "step": 51650 + }, + { + "epoch": 2.18, + "learning_rate": 0.0005830778689393458, + "loss": 0.5619, + "step": 51660 + }, + { + "epoch": 2.19, + "learning_rate": 0.0005830354682100533, + "loss": 0.4849, + "step": 51670 + }, + { + "epoch": 2.19, + "learning_rate": 0.0005829930674807606, + "loss": 0.573, + "step": 51680 + }, + { + "epoch": 2.19, + "learning_rate": 0.0005829506667514681, + "loss": 0.5153, + "step": 51690 + }, + { + "epoch": 2.19, + "learning_rate": 0.0005829082660221757, + "loss": 0.5465, + "step": 51700 + }, + { + "epoch": 2.19, + "learning_rate": 0.000582865865292883, + "loss": 0.5136, + "step": 51710 + }, + { + "epoch": 2.19, + "learning_rate": 0.0005828234645635905, + "loss": 0.5402, + "step": 51720 + }, + { + "epoch": 2.19, + "learning_rate": 0.000582781063834298, + "loss": 0.5092, + "step": 51730 + }, + { + "epoch": 2.19, + "learning_rate": 0.0005827386631050054, + "loss": 0.6069, + "step": 51740 + }, + { + "epoch": 2.19, + "learning_rate": 0.0005826962623757129, + "loss": 0.5257, + "step": 51750 + }, + { + "epoch": 2.19, + "learning_rate": 0.0005826538616464204, + "loss": 0.4903, + "step": 51760 + }, + { + "epoch": 2.19, + "learning_rate": 0.0005826114609171277, + "loss": 0.5139, + "step": 51770 + }, + { + "epoch": 2.19, + "learning_rate": 0.0005825690601878353, + "loss": 0.531, + "step": 51780 + }, + { + "epoch": 2.19, + "learning_rate": 0.0005825266594585428, + "loss": 0.5382, + "step": 51790 + }, + { + "epoch": 2.19, + "learning_rate": 0.0005824842587292501, + "loss": 0.5554, + "step": 51800 + }, + { + "epoch": 2.19, + "learning_rate": 0.0005824418579999576, + "loss": 0.5095, + "step": 51810 + }, + { + "epoch": 2.19, + "learning_rate": 0.0005823994572706651, + "loss": 0.5218, + "step": 51820 + }, + { + "epoch": 2.19, + "learning_rate": 0.0005823570565413725, + "loss": 0.6137, + "step": 51830 + }, + { + "epoch": 2.19, + "learning_rate": 0.00058231465581208, + "loss": 0.5847, + "step": 51840 + }, + { + "epoch": 2.19, + "learning_rate": 0.0005822722550827875, + "loss": 0.438, + "step": 51850 + }, + { + "epoch": 2.19, + "learning_rate": 0.0005822298543534949, + "loss": 0.4941, + "step": 51860 + }, + { + "epoch": 2.19, + "learning_rate": 0.0005821874536242024, + "loss": 0.5508, + "step": 51870 + }, + { + "epoch": 2.19, + "learning_rate": 0.0005821450528949099, + "loss": 0.5063, + "step": 51880 + }, + { + "epoch": 2.19, + "learning_rate": 0.0005821026521656172, + "loss": 0.5294, + "step": 51890 + }, + { + "epoch": 2.19, + "learning_rate": 0.0005820602514363247, + "loss": 0.6089, + "step": 51900 + }, + { + "epoch": 2.2, + "learning_rate": 0.0005820178507070323, + "loss": 0.5715, + "step": 51910 + }, + { + "epoch": 2.2, + "learning_rate": 0.0005819754499777396, + "loss": 0.5184, + "step": 51920 + }, + { + "epoch": 2.2, + "learning_rate": 0.0005819330492484471, + "loss": 0.4328, + "step": 51930 + }, + { + "epoch": 2.2, + "learning_rate": 0.0005818906485191546, + "loss": 0.5033, + "step": 51940 + }, + { + "epoch": 2.2, + "learning_rate": 0.000581848247789862, + "loss": 0.5208, + "step": 51950 + }, + { + "epoch": 2.2, + "learning_rate": 0.0005818058470605695, + "loss": 0.6041, + "step": 51960 + }, + { + "epoch": 2.2, + "learning_rate": 0.0005817634463312768, + "loss": 0.5607, + "step": 51970 + }, + { + "epoch": 2.2, + "learning_rate": 0.0005817210456019844, + "loss": 0.5245, + "step": 51980 + }, + { + "epoch": 2.2, + "learning_rate": 0.0005816786448726919, + "loss": 0.4966, + "step": 51990 + }, + { + "epoch": 2.2, + "learning_rate": 0.0005816362441433992, + "loss": 0.6623, + "step": 52000 + }, + { + "epoch": 2.2, + "learning_rate": 0.0005815938434141067, + "loss": 0.6049, + "step": 52010 + }, + { + "epoch": 2.2, + "learning_rate": 0.0005815514426848142, + "loss": 0.5516, + "step": 52020 + }, + { + "epoch": 2.2, + "learning_rate": 0.0005815090419555216, + "loss": 0.578, + "step": 52030 + }, + { + "epoch": 2.2, + "learning_rate": 0.0005814666412262291, + "loss": 0.5082, + "step": 52040 + }, + { + "epoch": 2.2, + "learning_rate": 0.0005814242404969366, + "loss": 0.6198, + "step": 52050 + }, + { + "epoch": 2.2, + "learning_rate": 0.000581381839767644, + "loss": 0.5347, + "step": 52060 + }, + { + "epoch": 2.2, + "learning_rate": 0.0005813394390383515, + "loss": 0.4767, + "step": 52070 + }, + { + "epoch": 2.2, + "learning_rate": 0.000581297038309059, + "loss": 0.6466, + "step": 52080 + }, + { + "epoch": 2.2, + "learning_rate": 0.0005812546375797663, + "loss": 0.5615, + "step": 52090 + }, + { + "epoch": 2.2, + "learning_rate": 0.0005812122368504738, + "loss": 0.4641, + "step": 52100 + }, + { + "epoch": 2.2, + "learning_rate": 0.0005811698361211814, + "loss": 0.648, + "step": 52110 + }, + { + "epoch": 2.2, + "learning_rate": 0.0005811274353918887, + "loss": 0.6095, + "step": 52120 + }, + { + "epoch": 2.2, + "learning_rate": 0.0005810850346625962, + "loss": 0.5041, + "step": 52130 + }, + { + "epoch": 2.2, + "learning_rate": 0.0005810426339333037, + "loss": 0.5076, + "step": 52140 + }, + { + "epoch": 2.21, + "learning_rate": 0.0005810002332040111, + "loss": 0.6962, + "step": 52150 + }, + { + "epoch": 2.21, + "learning_rate": 0.0005809578324747186, + "loss": 0.4983, + "step": 52160 + }, + { + "epoch": 2.21, + "learning_rate": 0.0005809154317454261, + "loss": 0.5715, + "step": 52170 + }, + { + "epoch": 2.21, + "learning_rate": 0.0005808730310161334, + "loss": 0.6177, + "step": 52180 + }, + { + "epoch": 2.21, + "learning_rate": 0.000580830630286841, + "loss": 0.6148, + "step": 52190 + }, + { + "epoch": 2.21, + "learning_rate": 0.0005807882295575485, + "loss": 0.5601, + "step": 52200 + }, + { + "epoch": 2.21, + "learning_rate": 0.0005807458288282558, + "loss": 0.6229, + "step": 52210 + }, + { + "epoch": 2.21, + "learning_rate": 0.0005807034280989633, + "loss": 0.5258, + "step": 52220 + }, + { + "epoch": 2.21, + "learning_rate": 0.0005806610273696709, + "loss": 0.6037, + "step": 52230 + }, + { + "epoch": 2.21, + "learning_rate": 0.0005806186266403782, + "loss": 0.4773, + "step": 52240 + }, + { + "epoch": 2.21, + "learning_rate": 0.0005805762259110857, + "loss": 0.5346, + "step": 52250 + }, + { + "epoch": 2.21, + "learning_rate": 0.0005805338251817931, + "loss": 0.5886, + "step": 52260 + }, + { + "epoch": 2.21, + "learning_rate": 0.0005804914244525006, + "loss": 0.6226, + "step": 52270 + }, + { + "epoch": 2.21, + "learning_rate": 0.0005804490237232081, + "loss": 0.5693, + "step": 52280 + }, + { + "epoch": 2.21, + "learning_rate": 0.0005804066229939155, + "loss": 0.5343, + "step": 52290 + }, + { + "epoch": 2.21, + "learning_rate": 0.0005803642222646229, + "loss": 0.5677, + "step": 52300 + }, + { + "epoch": 2.21, + "learning_rate": 0.0005803218215353305, + "loss": 0.4871, + "step": 52310 + }, + { + "epoch": 2.21, + "learning_rate": 0.0005802794208060379, + "loss": 0.4957, + "step": 52320 + }, + { + "epoch": 2.21, + "learning_rate": 0.0005802370200767453, + "loss": 0.579, + "step": 52330 + }, + { + "epoch": 2.21, + "learning_rate": 0.0005801946193474528, + "loss": 0.5916, + "step": 52340 + }, + { + "epoch": 2.21, + "learning_rate": 0.0005801522186181603, + "loss": 0.5858, + "step": 52350 + }, + { + "epoch": 2.21, + "learning_rate": 0.0005801098178888677, + "loss": 0.5902, + "step": 52360 + }, + { + "epoch": 2.21, + "learning_rate": 0.0005800674171595752, + "loss": 0.5167, + "step": 52370 + }, + { + "epoch": 2.22, + "learning_rate": 0.0005800250164302826, + "loss": 0.5289, + "step": 52380 + }, + { + "epoch": 2.22, + "learning_rate": 0.0005799826157009901, + "loss": 0.5725, + "step": 52390 + }, + { + "epoch": 2.22, + "learning_rate": 0.0005799402149716976, + "loss": 0.528, + "step": 52400 + }, + { + "epoch": 2.22, + "learning_rate": 0.000579897814242405, + "loss": 0.6092, + "step": 52410 + }, + { + "epoch": 2.22, + "learning_rate": 0.0005798554135131124, + "loss": 0.5771, + "step": 52420 + }, + { + "epoch": 2.22, + "learning_rate": 0.00057981301278382, + "loss": 0.6738, + "step": 52430 + }, + { + "epoch": 2.22, + "learning_rate": 0.0005797706120545274, + "loss": 0.5114, + "step": 52440 + }, + { + "epoch": 2.22, + "learning_rate": 0.0005797282113252348, + "loss": 0.4938, + "step": 52450 + }, + { + "epoch": 2.22, + "learning_rate": 0.0005796858105959423, + "loss": 0.5146, + "step": 52460 + }, + { + "epoch": 2.22, + "learning_rate": 0.0005796434098666498, + "loss": 0.5492, + "step": 52470 + }, + { + "epoch": 2.22, + "learning_rate": 0.0005796010091373572, + "loss": 0.5664, + "step": 52480 + }, + { + "epoch": 2.22, + "learning_rate": 0.0005795586084080647, + "loss": 0.5716, + "step": 52490 + }, + { + "epoch": 2.22, + "learning_rate": 0.000579516207678772, + "loss": 0.5549, + "step": 52500 + }, + { + "epoch": 2.22, + "learning_rate": 0.0005794738069494796, + "loss": 0.6391, + "step": 52510 + }, + { + "epoch": 2.22, + "learning_rate": 0.0005794314062201871, + "loss": 0.5824, + "step": 52520 + }, + { + "epoch": 2.22, + "learning_rate": 0.0005793890054908944, + "loss": 0.6416, + "step": 52530 + }, + { + "epoch": 2.22, + "learning_rate": 0.0005793466047616019, + "loss": 0.6526, + "step": 52540 + }, + { + "epoch": 2.22, + "learning_rate": 0.0005793042040323094, + "loss": 0.5576, + "step": 52550 + }, + { + "epoch": 2.22, + "learning_rate": 0.0005792618033030168, + "loss": 0.5571, + "step": 52560 + }, + { + "epoch": 2.22, + "learning_rate": 0.0005792194025737243, + "loss": 0.6332, + "step": 52570 + }, + { + "epoch": 2.22, + "learning_rate": 0.0005791770018444317, + "loss": 0.5914, + "step": 52580 + }, + { + "epoch": 2.22, + "learning_rate": 0.0005791346011151392, + "loss": 0.4489, + "step": 52590 + }, + { + "epoch": 2.22, + "learning_rate": 0.0005790922003858467, + "loss": 0.56, + "step": 52600 + }, + { + "epoch": 2.22, + "learning_rate": 0.0005790497996565541, + "loss": 0.532, + "step": 52610 + }, + { + "epoch": 2.23, + "learning_rate": 0.0005790073989272615, + "loss": 0.5385, + "step": 52620 + }, + { + "epoch": 2.23, + "learning_rate": 0.000578964998197969, + "loss": 0.6132, + "step": 52630 + }, + { + "epoch": 2.23, + "learning_rate": 0.0005789225974686765, + "loss": 0.5939, + "step": 52640 + }, + { + "epoch": 2.23, + "learning_rate": 0.0005788801967393839, + "loss": 0.5292, + "step": 52650 + }, + { + "epoch": 2.23, + "learning_rate": 0.0005788377960100914, + "loss": 0.4504, + "step": 52660 + }, + { + "epoch": 2.23, + "learning_rate": 0.0005787953952807989, + "loss": 0.5257, + "step": 52670 + }, + { + "epoch": 2.23, + "learning_rate": 0.0005787529945515063, + "loss": 0.6144, + "step": 52680 + }, + { + "epoch": 2.23, + "learning_rate": 0.0005787105938222138, + "loss": 0.5127, + "step": 52690 + }, + { + "epoch": 2.23, + "learning_rate": 0.0005786681930929212, + "loss": 0.4926, + "step": 52700 + }, + { + "epoch": 2.23, + "learning_rate": 0.0005786257923636286, + "loss": 0.5985, + "step": 52710 + }, + { + "epoch": 2.23, + "learning_rate": 0.0005785833916343362, + "loss": 0.5277, + "step": 52720 + }, + { + "epoch": 2.23, + "learning_rate": 0.0005785409909050436, + "loss": 0.5498, + "step": 52730 + }, + { + "epoch": 2.23, + "learning_rate": 0.000578498590175751, + "loss": 0.5169, + "step": 52740 + }, + { + "epoch": 2.23, + "learning_rate": 0.0005784561894464585, + "loss": 0.5062, + "step": 52750 + }, + { + "epoch": 2.23, + "learning_rate": 0.000578413788717166, + "loss": 0.5106, + "step": 52760 + }, + { + "epoch": 2.23, + "learning_rate": 0.0005783713879878734, + "loss": 0.5832, + "step": 52770 + }, + { + "epoch": 2.23, + "learning_rate": 0.0005783289872585809, + "loss": 0.4333, + "step": 52780 + }, + { + "epoch": 2.23, + "learning_rate": 0.0005782865865292883, + "loss": 0.5144, + "step": 52790 + }, + { + "epoch": 2.23, + "learning_rate": 0.0005782441857999958, + "loss": 0.5038, + "step": 52800 + }, + { + "epoch": 2.23, + "learning_rate": 0.0005782017850707033, + "loss": 0.5306, + "step": 52810 + }, + { + "epoch": 2.23, + "learning_rate": 0.0005781593843414107, + "loss": 0.5725, + "step": 52820 + }, + { + "epoch": 2.23, + "learning_rate": 0.0005781169836121181, + "loss": 0.588, + "step": 52830 + }, + { + "epoch": 2.23, + "learning_rate": 0.0005780745828828257, + "loss": 0.6158, + "step": 52840 + }, + { + "epoch": 2.23, + "learning_rate": 0.0005780321821535331, + "loss": 0.5921, + "step": 52850 + }, + { + "epoch": 2.24, + "learning_rate": 0.0005779897814242405, + "loss": 0.4961, + "step": 52860 + }, + { + "epoch": 2.24, + "learning_rate": 0.0005779473806949479, + "loss": 0.5434, + "step": 52870 + }, + { + "epoch": 2.24, + "learning_rate": 0.0005779049799656555, + "loss": 0.5944, + "step": 52880 + }, + { + "epoch": 2.24, + "learning_rate": 0.0005778625792363629, + "loss": 0.6043, + "step": 52890 + }, + { + "epoch": 2.24, + "learning_rate": 0.0005778201785070703, + "loss": 0.5002, + "step": 52900 + }, + { + "epoch": 2.24, + "learning_rate": 0.0005777777777777778, + "loss": 0.5842, + "step": 52910 + }, + { + "epoch": 2.24, + "learning_rate": 0.0005777353770484853, + "loss": 0.568, + "step": 52920 + }, + { + "epoch": 2.24, + "learning_rate": 0.0005776929763191927, + "loss": 0.5699, + "step": 52930 + }, + { + "epoch": 2.24, + "learning_rate": 0.0005776505755899002, + "loss": 0.5143, + "step": 52940 + }, + { + "epoch": 2.24, + "learning_rate": 0.0005776081748606076, + "loss": 0.5317, + "step": 52950 + }, + { + "epoch": 2.24, + "learning_rate": 0.0005775657741313151, + "loss": 0.5807, + "step": 52960 + }, + { + "epoch": 2.24, + "learning_rate": 0.0005775233734020226, + "loss": 0.5213, + "step": 52970 + }, + { + "epoch": 2.24, + "learning_rate": 0.00057748097267273, + "loss": 0.4169, + "step": 52980 + }, + { + "epoch": 2.24, + "learning_rate": 0.0005774385719434374, + "loss": 0.5842, + "step": 52990 + }, + { + "epoch": 2.24, + "learning_rate": 0.000577396171214145, + "loss": 0.631, + "step": 53000 + }, + { + "epoch": 2.24, + "learning_rate": 0.0005773537704848524, + "loss": 0.5452, + "step": 53010 + }, + { + "epoch": 2.24, + "learning_rate": 0.0005773113697555598, + "loss": 0.5233, + "step": 53020 + }, + { + "epoch": 2.24, + "learning_rate": 0.0005772689690262672, + "loss": 0.5356, + "step": 53030 + }, + { + "epoch": 2.24, + "learning_rate": 0.0005772265682969748, + "loss": 0.584, + "step": 53040 + }, + { + "epoch": 2.24, + "learning_rate": 0.0005771841675676822, + "loss": 0.5422, + "step": 53050 + }, + { + "epoch": 2.24, + "learning_rate": 0.0005771417668383896, + "loss": 0.6475, + "step": 53060 + }, + { + "epoch": 2.24, + "learning_rate": 0.0005770993661090971, + "loss": 0.5606, + "step": 53070 + }, + { + "epoch": 2.24, + "learning_rate": 0.0005770569653798046, + "loss": 0.5272, + "step": 53080 + }, + { + "epoch": 2.25, + "learning_rate": 0.000577014564650512, + "loss": 0.632, + "step": 53090 + }, + { + "epoch": 2.25, + "learning_rate": 0.0005769721639212195, + "loss": 0.5727, + "step": 53100 + }, + { + "epoch": 2.25, + "learning_rate": 0.0005769297631919269, + "loss": 0.6079, + "step": 53110 + }, + { + "epoch": 2.25, + "learning_rate": 0.0005768873624626344, + "loss": 0.6643, + "step": 53120 + }, + { + "epoch": 2.25, + "learning_rate": 0.0005768449617333419, + "loss": 0.616, + "step": 53130 + }, + { + "epoch": 2.25, + "learning_rate": 0.0005768025610040493, + "loss": 0.497, + "step": 53140 + }, + { + "epoch": 2.25, + "learning_rate": 0.0005767601602747567, + "loss": 0.4824, + "step": 53150 + }, + { + "epoch": 2.25, + "learning_rate": 0.0005767177595454642, + "loss": 0.5245, + "step": 53160 + }, + { + "epoch": 2.25, + "learning_rate": 0.0005766753588161717, + "loss": 0.4854, + "step": 53170 + }, + { + "epoch": 2.25, + "learning_rate": 0.0005766329580868791, + "loss": 0.6134, + "step": 53180 + }, + { + "epoch": 2.25, + "learning_rate": 0.0005765905573575865, + "loss": 0.5409, + "step": 53190 + }, + { + "epoch": 2.25, + "learning_rate": 0.0005765481566282941, + "loss": 0.5154, + "step": 53200 + }, + { + "epoch": 2.25, + "learning_rate": 0.0005765057558990015, + "loss": 0.5471, + "step": 53210 + }, + { + "epoch": 2.25, + "learning_rate": 0.0005764633551697089, + "loss": 0.4887, + "step": 53220 + }, + { + "epoch": 2.25, + "learning_rate": 0.0005764209544404164, + "loss": 0.562, + "step": 53230 + }, + { + "epoch": 2.25, + "learning_rate": 0.0005763785537111238, + "loss": 0.5026, + "step": 53240 + }, + { + "epoch": 2.25, + "learning_rate": 0.0005763361529818313, + "loss": 0.5624, + "step": 53250 + }, + { + "epoch": 2.25, + "learning_rate": 0.0005762937522525388, + "loss": 0.5862, + "step": 53260 + }, + { + "epoch": 2.25, + "learning_rate": 0.0005762513515232462, + "loss": 0.5449, + "step": 53270 + }, + { + "epoch": 2.25, + "learning_rate": 0.0005762089507939537, + "loss": 0.485, + "step": 53280 + }, + { + "epoch": 2.25, + "learning_rate": 0.0005761665500646612, + "loss": 0.65, + "step": 53290 + }, + { + "epoch": 2.25, + "learning_rate": 0.0005761241493353686, + "loss": 0.6087, + "step": 53300 + }, + { + "epoch": 2.25, + "learning_rate": 0.000576081748606076, + "loss": 0.6243, + "step": 53310 + }, + { + "epoch": 2.25, + "learning_rate": 0.0005760393478767835, + "loss": 0.5997, + "step": 53320 + }, + { + "epoch": 2.26, + "learning_rate": 0.000575996947147491, + "loss": 0.5928, + "step": 53330 + }, + { + "epoch": 2.26, + "learning_rate": 0.0005759545464181984, + "loss": 0.6114, + "step": 53340 + }, + { + "epoch": 2.26, + "learning_rate": 0.0005759121456889059, + "loss": 0.6406, + "step": 53350 + }, + { + "epoch": 2.26, + "learning_rate": 0.0005758697449596133, + "loss": 0.6383, + "step": 53360 + }, + { + "epoch": 2.26, + "learning_rate": 0.0005758273442303208, + "loss": 0.5361, + "step": 53370 + }, + { + "epoch": 2.26, + "learning_rate": 0.0005757849435010283, + "loss": 0.752, + "step": 53380 + }, + { + "epoch": 2.26, + "learning_rate": 0.0005757425427717357, + "loss": 0.5543, + "step": 53390 + }, + { + "epoch": 2.26, + "learning_rate": 0.0005757001420424431, + "loss": 0.5028, + "step": 53400 + }, + { + "epoch": 2.26, + "learning_rate": 0.0005756577413131507, + "loss": 0.5278, + "step": 53410 + }, + { + "epoch": 2.26, + "learning_rate": 0.0005756153405838581, + "loss": 0.5393, + "step": 53420 + }, + { + "epoch": 2.26, + "learning_rate": 0.0005755729398545655, + "loss": 0.4457, + "step": 53430 + }, + { + "epoch": 2.26, + "learning_rate": 0.000575530539125273, + "loss": 0.5571, + "step": 53440 + }, + { + "epoch": 2.26, + "learning_rate": 0.0005754881383959805, + "loss": 0.5333, + "step": 53450 + }, + { + "epoch": 2.26, + "learning_rate": 0.0005754457376666879, + "loss": 0.5601, + "step": 53460 + }, + { + "epoch": 2.26, + "learning_rate": 0.0005754033369373953, + "loss": 0.5598, + "step": 53470 + }, + { + "epoch": 2.26, + "learning_rate": 0.0005753609362081028, + "loss": 0.6427, + "step": 53480 + }, + { + "epoch": 2.26, + "learning_rate": 0.0005753185354788103, + "loss": 0.4862, + "step": 53490 + }, + { + "epoch": 2.26, + "learning_rate": 0.0005752761347495177, + "loss": 0.5087, + "step": 53500 + }, + { + "epoch": 2.26, + "learning_rate": 0.0005752337340202251, + "loss": 0.7063, + "step": 53510 + }, + { + "epoch": 2.26, + "learning_rate": 0.0005751913332909326, + "loss": 0.5382, + "step": 53520 + }, + { + "epoch": 2.26, + "learning_rate": 0.0005751489325616402, + "loss": 0.5725, + "step": 53530 + }, + { + "epoch": 2.26, + "learning_rate": 0.0005751065318323475, + "loss": 0.5567, + "step": 53540 + }, + { + "epoch": 2.26, + "learning_rate": 0.000575064131103055, + "loss": 0.5427, + "step": 53550 + }, + { + "epoch": 2.26, + "learning_rate": 0.0005750217303737624, + "loss": 0.5625, + "step": 53560 + }, + { + "epoch": 2.27, + "learning_rate": 0.0005749793296444699, + "loss": 0.6265, + "step": 53570 + }, + { + "epoch": 2.27, + "learning_rate": 0.0005749369289151774, + "loss": 0.4798, + "step": 53580 + }, + { + "epoch": 2.27, + "learning_rate": 0.0005748945281858848, + "loss": 0.5168, + "step": 53590 + }, + { + "epoch": 2.27, + "learning_rate": 0.0005748521274565922, + "loss": 0.6712, + "step": 53600 + }, + { + "epoch": 2.27, + "learning_rate": 0.0005748097267272998, + "loss": 0.6749, + "step": 53610 + }, + { + "epoch": 2.27, + "learning_rate": 0.0005747673259980072, + "loss": 0.6262, + "step": 53620 + }, + { + "epoch": 2.27, + "learning_rate": 0.0005747249252687146, + "loss": 0.5397, + "step": 53630 + }, + { + "epoch": 2.27, + "learning_rate": 0.0005746825245394221, + "loss": 0.487, + "step": 53640 + }, + { + "epoch": 2.27, + "learning_rate": 0.0005746401238101296, + "loss": 0.5717, + "step": 53650 + }, + { + "epoch": 2.27, + "learning_rate": 0.000574597723080837, + "loss": 0.4783, + "step": 53660 + }, + { + "epoch": 2.27, + "learning_rate": 0.0005745553223515445, + "loss": 0.602, + "step": 53670 + }, + { + "epoch": 2.27, + "learning_rate": 0.0005745129216222519, + "loss": 0.5467, + "step": 53680 + }, + { + "epoch": 2.27, + "learning_rate": 0.0005744705208929594, + "loss": 0.4987, + "step": 53690 + }, + { + "epoch": 2.27, + "learning_rate": 0.0005744281201636669, + "loss": 0.4431, + "step": 53700 + }, + { + "epoch": 2.27, + "learning_rate": 0.0005743857194343743, + "loss": 0.4971, + "step": 53710 + }, + { + "epoch": 2.27, + "learning_rate": 0.0005743433187050817, + "loss": 0.5843, + "step": 53720 + }, + { + "epoch": 2.27, + "learning_rate": 0.0005743009179757893, + "loss": 0.548, + "step": 53730 + }, + { + "epoch": 2.27, + "learning_rate": 0.0005742585172464967, + "loss": 0.5955, + "step": 53740 + }, + { + "epoch": 2.27, + "learning_rate": 0.0005742161165172041, + "loss": 0.5533, + "step": 53750 + }, + { + "epoch": 2.27, + "learning_rate": 0.0005741737157879116, + "loss": 0.4899, + "step": 53760 + }, + { + "epoch": 2.27, + "learning_rate": 0.000574131315058619, + "loss": 0.5893, + "step": 53770 + }, + { + "epoch": 2.27, + "learning_rate": 0.0005740889143293265, + "loss": 0.583, + "step": 53780 + }, + { + "epoch": 2.27, + "learning_rate": 0.000574046513600034, + "loss": 0.5442, + "step": 53790 + }, + { + "epoch": 2.28, + "learning_rate": 0.0005740041128707413, + "loss": 0.5675, + "step": 53800 + }, + { + "epoch": 2.28, + "learning_rate": 0.0005739617121414489, + "loss": 0.5417, + "step": 53810 + }, + { + "epoch": 2.28, + "learning_rate": 0.0005739193114121564, + "loss": 0.4939, + "step": 53820 + }, + { + "epoch": 2.28, + "learning_rate": 0.0005738769106828637, + "loss": 0.5553, + "step": 53830 + }, + { + "epoch": 2.28, + "learning_rate": 0.0005738345099535712, + "loss": 0.5126, + "step": 53840 + }, + { + "epoch": 2.28, + "learning_rate": 0.0005737921092242787, + "loss": 0.5282, + "step": 53850 + }, + { + "epoch": 2.28, + "learning_rate": 0.0005737497084949861, + "loss": 0.4711, + "step": 53860 + }, + { + "epoch": 2.28, + "learning_rate": 0.0005737073077656936, + "loss": 0.5113, + "step": 53870 + }, + { + "epoch": 2.28, + "learning_rate": 0.000573664907036401, + "loss": 0.5746, + "step": 53880 + }, + { + "epoch": 2.28, + "learning_rate": 0.0005736225063071085, + "loss": 0.566, + "step": 53890 + }, + { + "epoch": 2.28, + "learning_rate": 0.000573580105577816, + "loss": 0.433, + "step": 53900 + }, + { + "epoch": 2.28, + "learning_rate": 0.0005735377048485235, + "loss": 0.5095, + "step": 53910 + }, + { + "epoch": 2.28, + "learning_rate": 0.0005734953041192308, + "loss": 0.4827, + "step": 53920 + }, + { + "epoch": 2.28, + "learning_rate": 0.0005734529033899384, + "loss": 0.5784, + "step": 53930 + }, + { + "epoch": 2.28, + "learning_rate": 0.0005734105026606459, + "loss": 0.5683, + "step": 53940 + }, + { + "epoch": 2.28, + "learning_rate": 0.0005733681019313532, + "loss": 0.5308, + "step": 53950 + }, + { + "epoch": 2.28, + "learning_rate": 0.0005733257012020607, + "loss": 0.5355, + "step": 53960 + }, + { + "epoch": 2.28, + "learning_rate": 0.0005732833004727681, + "loss": 0.5426, + "step": 53970 + }, + { + "epoch": 2.28, + "learning_rate": 0.0005732408997434756, + "loss": 0.5047, + "step": 53980 + }, + { + "epoch": 2.28, + "learning_rate": 0.0005731984990141831, + "loss": 0.4786, + "step": 53990 + }, + { + "epoch": 2.28, + "learning_rate": 0.0005731560982848905, + "loss": 0.5134, + "step": 54000 + }, + { + "epoch": 2.28, + "learning_rate": 0.000573113697555598, + "loss": 0.4157, + "step": 54010 + }, + { + "epoch": 2.28, + "learning_rate": 0.0005730712968263055, + "loss": 0.5882, + "step": 54020 + }, + { + "epoch": 2.28, + "learning_rate": 0.000573028896097013, + "loss": 0.5142, + "step": 54030 + }, + { + "epoch": 2.29, + "learning_rate": 0.0005729864953677203, + "loss": 0.5637, + "step": 54040 + }, + { + "epoch": 2.29, + "learning_rate": 0.0005729440946384278, + "loss": 0.6372, + "step": 54050 + }, + { + "epoch": 2.29, + "learning_rate": 0.0005729016939091353, + "loss": 0.5748, + "step": 54060 + }, + { + "epoch": 2.29, + "learning_rate": 0.0005728592931798427, + "loss": 0.4832, + "step": 54070 + }, + { + "epoch": 2.29, + "learning_rate": 0.0005728168924505502, + "loss": 0.5169, + "step": 54080 + }, + { + "epoch": 2.29, + "learning_rate": 0.0005727744917212576, + "loss": 0.5686, + "step": 54090 + }, + { + "epoch": 2.29, + "learning_rate": 0.0005727320909919651, + "loss": 0.4788, + "step": 54100 + }, + { + "epoch": 2.29, + "learning_rate": 0.0005726896902626726, + "loss": 0.5093, + "step": 54110 + }, + { + "epoch": 2.29, + "learning_rate": 0.0005726472895333799, + "loss": 0.5584, + "step": 54120 + }, + { + "epoch": 2.29, + "learning_rate": 0.0005726048888040874, + "loss": 0.5349, + "step": 54130 + }, + { + "epoch": 2.29, + "learning_rate": 0.000572562488074795, + "loss": 0.5658, + "step": 54140 + }, + { + "epoch": 2.29, + "learning_rate": 0.0005725200873455023, + "loss": 0.5673, + "step": 54150 + }, + { + "epoch": 2.29, + "learning_rate": 0.0005724776866162098, + "loss": 0.5128, + "step": 54160 + }, + { + "epoch": 2.29, + "learning_rate": 0.0005724352858869173, + "loss": 0.5241, + "step": 54170 + }, + { + "epoch": 2.29, + "learning_rate": 0.0005723928851576247, + "loss": 0.5904, + "step": 54180 + }, + { + "epoch": 2.29, + "learning_rate": 0.0005723504844283322, + "loss": 0.5631, + "step": 54190 + }, + { + "epoch": 2.29, + "learning_rate": 0.0005723080836990397, + "loss": 0.5808, + "step": 54200 + }, + { + "epoch": 2.29, + "learning_rate": 0.000572265682969747, + "loss": 0.5334, + "step": 54210 + }, + { + "epoch": 2.29, + "learning_rate": 0.0005722232822404546, + "loss": 0.5958, + "step": 54220 + }, + { + "epoch": 2.29, + "learning_rate": 0.0005721808815111621, + "loss": 0.5251, + "step": 54230 + }, + { + "epoch": 2.29, + "learning_rate": 0.0005721384807818694, + "loss": 0.4831, + "step": 54240 + }, + { + "epoch": 2.29, + "learning_rate": 0.0005720960800525769, + "loss": 0.5635, + "step": 54250 + }, + { + "epoch": 2.29, + "learning_rate": 0.0005720536793232845, + "loss": 0.5387, + "step": 54260 + }, + { + "epoch": 2.3, + "learning_rate": 0.0005720112785939918, + "loss": 0.6658, + "step": 54270 + }, + { + "epoch": 2.3, + "learning_rate": 0.0005719688778646993, + "loss": 0.5417, + "step": 54280 + }, + { + "epoch": 2.3, + "learning_rate": 0.0005719264771354068, + "loss": 0.7009, + "step": 54290 + }, + { + "epoch": 2.3, + "learning_rate": 0.0005718840764061142, + "loss": 0.6154, + "step": 54300 + }, + { + "epoch": 2.3, + "learning_rate": 0.0005718416756768217, + "loss": 0.6059, + "step": 54310 + }, + { + "epoch": 2.3, + "learning_rate": 0.0005717992749475292, + "loss": 0.606, + "step": 54320 + }, + { + "epoch": 2.3, + "learning_rate": 0.0005717568742182365, + "loss": 0.5046, + "step": 54330 + }, + { + "epoch": 2.3, + "learning_rate": 0.0005717144734889441, + "loss": 0.6098, + "step": 54340 + }, + { + "epoch": 2.3, + "learning_rate": 0.0005716720727596516, + "loss": 0.4544, + "step": 54350 + }, + { + "epoch": 2.3, + "learning_rate": 0.0005716296720303589, + "loss": 0.618, + "step": 54360 + }, + { + "epoch": 2.3, + "learning_rate": 0.0005715872713010664, + "loss": 0.5662, + "step": 54370 + }, + { + "epoch": 2.3, + "learning_rate": 0.0005715448705717739, + "loss": 0.5281, + "step": 54380 + }, + { + "epoch": 2.3, + "learning_rate": 0.0005715024698424813, + "loss": 0.5062, + "step": 54390 + }, + { + "epoch": 2.3, + "learning_rate": 0.0005714600691131888, + "loss": 0.5025, + "step": 54400 + }, + { + "epoch": 2.3, + "learning_rate": 0.0005714176683838961, + "loss": 0.597, + "step": 54410 + }, + { + "epoch": 2.3, + "learning_rate": 0.0005713752676546037, + "loss": 0.4972, + "step": 54420 + }, + { + "epoch": 2.3, + "learning_rate": 0.0005713328669253112, + "loss": 0.5571, + "step": 54430 + }, + { + "epoch": 2.3, + "learning_rate": 0.0005712904661960185, + "loss": 0.5924, + "step": 54440 + }, + { + "epoch": 2.3, + "learning_rate": 0.000571248065466726, + "loss": 0.5336, + "step": 54450 + }, + { + "epoch": 2.3, + "learning_rate": 0.0005712056647374336, + "loss": 0.5648, + "step": 54460 + }, + { + "epoch": 2.3, + "learning_rate": 0.0005711632640081409, + "loss": 0.5268, + "step": 54470 + }, + { + "epoch": 2.3, + "learning_rate": 0.0005711208632788484, + "loss": 0.633, + "step": 54480 + }, + { + "epoch": 2.3, + "learning_rate": 0.0005710784625495559, + "loss": 0.5307, + "step": 54490 + }, + { + "epoch": 2.3, + "learning_rate": 0.0005710360618202633, + "loss": 0.5918, + "step": 54500 + }, + { + "epoch": 2.31, + "learning_rate": 0.0005709936610909708, + "loss": 0.4573, + "step": 54510 + }, + { + "epoch": 2.31, + "learning_rate": 0.0005709512603616783, + "loss": 0.5096, + "step": 54520 + }, + { + "epoch": 2.31, + "learning_rate": 0.0005709088596323856, + "loss": 0.5082, + "step": 54530 + }, + { + "epoch": 2.31, + "learning_rate": 0.0005708664589030932, + "loss": 0.511, + "step": 54540 + }, + { + "epoch": 2.31, + "learning_rate": 0.0005708240581738007, + "loss": 0.5666, + "step": 54550 + }, + { + "epoch": 2.31, + "learning_rate": 0.000570781657444508, + "loss": 0.6517, + "step": 54560 + }, + { + "epoch": 2.31, + "learning_rate": 0.0005707392567152155, + "loss": 0.5056, + "step": 54570 + }, + { + "epoch": 2.31, + "learning_rate": 0.000570696855985923, + "loss": 0.5474, + "step": 54580 + }, + { + "epoch": 2.31, + "learning_rate": 0.0005706544552566304, + "loss": 0.606, + "step": 54590 + }, + { + "epoch": 2.31, + "learning_rate": 0.0005706120545273379, + "loss": 0.5325, + "step": 54600 + }, + { + "epoch": 2.31, + "learning_rate": 0.0005705696537980454, + "loss": 0.55, + "step": 54610 + }, + { + "epoch": 2.31, + "learning_rate": 0.0005705272530687528, + "loss": 0.4731, + "step": 54620 + }, + { + "epoch": 2.31, + "learning_rate": 0.0005704848523394603, + "loss": 0.4946, + "step": 54630 + }, + { + "epoch": 2.31, + "learning_rate": 0.0005704424516101678, + "loss": 0.6169, + "step": 54640 + }, + { + "epoch": 2.31, + "learning_rate": 0.0005704000508808751, + "loss": 0.5656, + "step": 54650 + }, + { + "epoch": 2.31, + "learning_rate": 0.0005703576501515826, + "loss": 0.4863, + "step": 54660 + }, + { + "epoch": 2.31, + "learning_rate": 0.0005703152494222902, + "loss": 0.5154, + "step": 54670 + }, + { + "epoch": 2.31, + "learning_rate": 0.0005702728486929975, + "loss": 0.5406, + "step": 54680 + }, + { + "epoch": 2.31, + "learning_rate": 0.000570230447963705, + "loss": 0.5439, + "step": 54690 + }, + { + "epoch": 2.31, + "learning_rate": 0.0005701880472344125, + "loss": 0.4997, + "step": 54700 + }, + { + "epoch": 2.31, + "learning_rate": 0.0005701456465051199, + "loss": 0.5168, + "step": 54710 + }, + { + "epoch": 2.31, + "learning_rate": 0.0005701032457758274, + "loss": 0.547, + "step": 54720 + }, + { + "epoch": 2.31, + "learning_rate": 0.0005700608450465348, + "loss": 0.582, + "step": 54730 + }, + { + "epoch": 2.31, + "learning_rate": 0.0005700184443172422, + "loss": 0.6018, + "step": 54740 + }, + { + "epoch": 2.32, + "learning_rate": 0.0005699760435879498, + "loss": 0.6147, + "step": 54750 + }, + { + "epoch": 2.32, + "learning_rate": 0.0005699336428586572, + "loss": 0.4906, + "step": 54760 + }, + { + "epoch": 2.32, + "learning_rate": 0.0005698912421293646, + "loss": 0.5004, + "step": 54770 + }, + { + "epoch": 2.32, + "learning_rate": 0.0005698488414000721, + "loss": 0.5135, + "step": 54780 + }, + { + "epoch": 2.32, + "learning_rate": 0.0005698064406707796, + "loss": 0.5753, + "step": 54790 + }, + { + "epoch": 2.32, + "learning_rate": 0.000569764039941487, + "loss": 0.5334, + "step": 54800 + }, + { + "epoch": 2.32, + "learning_rate": 0.0005697216392121945, + "loss": 0.5567, + "step": 54810 + }, + { + "epoch": 2.32, + "learning_rate": 0.0005696792384829018, + "loss": 0.4885, + "step": 54820 + }, + { + "epoch": 2.32, + "learning_rate": 0.0005696368377536094, + "loss": 0.4899, + "step": 54830 + }, + { + "epoch": 2.32, + "learning_rate": 0.0005695944370243169, + "loss": 0.4705, + "step": 54840 + }, + { + "epoch": 2.32, + "learning_rate": 0.0005695520362950242, + "loss": 0.6041, + "step": 54850 + }, + { + "epoch": 2.32, + "learning_rate": 0.0005695096355657317, + "loss": 0.6187, + "step": 54860 + }, + { + "epoch": 2.32, + "learning_rate": 0.0005694672348364393, + "loss": 0.5327, + "step": 54870 + }, + { + "epoch": 2.32, + "learning_rate": 0.0005694248341071466, + "loss": 0.5895, + "step": 54880 + }, + { + "epoch": 2.32, + "learning_rate": 0.0005693824333778541, + "loss": 0.5707, + "step": 54890 + }, + { + "epoch": 2.32, + "learning_rate": 0.0005693400326485616, + "loss": 0.554, + "step": 54900 + }, + { + "epoch": 2.32, + "learning_rate": 0.000569297631919269, + "loss": 0.5775, + "step": 54910 + }, + { + "epoch": 2.32, + "learning_rate": 0.0005692552311899765, + "loss": 0.5496, + "step": 54920 + }, + { + "epoch": 2.32, + "learning_rate": 0.000569212830460684, + "loss": 0.5226, + "step": 54930 + }, + { + "epoch": 2.32, + "learning_rate": 0.0005691704297313913, + "loss": 0.546, + "step": 54940 + }, + { + "epoch": 2.32, + "learning_rate": 0.0005691280290020989, + "loss": 0.5302, + "step": 54950 + }, + { + "epoch": 2.32, + "learning_rate": 0.0005690856282728064, + "loss": 0.5663, + "step": 54960 + }, + { + "epoch": 2.32, + "learning_rate": 0.0005690432275435137, + "loss": 0.6079, + "step": 54970 + }, + { + "epoch": 2.33, + "learning_rate": 0.0005690008268142212, + "loss": 0.5562, + "step": 54980 + }, + { + "epoch": 2.33, + "learning_rate": 0.0005689584260849288, + "loss": 0.667, + "step": 54990 + }, + { + "epoch": 2.33, + "learning_rate": 0.0005689160253556361, + "loss": 0.5346, + "step": 55000 + }, + { + "epoch": 2.33, + "learning_rate": 0.0005688736246263436, + "loss": 0.5153, + "step": 55010 + }, + { + "epoch": 2.33, + "learning_rate": 0.000568831223897051, + "loss": 0.5897, + "step": 55020 + }, + { + "epoch": 2.33, + "learning_rate": 0.0005687888231677585, + "loss": 0.5558, + "step": 55030 + }, + { + "epoch": 2.33, + "learning_rate": 0.000568746422438466, + "loss": 0.65, + "step": 55040 + }, + { + "epoch": 2.33, + "learning_rate": 0.0005687040217091734, + "loss": 0.5143, + "step": 55050 + }, + { + "epoch": 2.33, + "learning_rate": 0.0005686616209798808, + "loss": 0.6068, + "step": 55060 + }, + { + "epoch": 2.33, + "learning_rate": 0.0005686192202505884, + "loss": 0.5345, + "step": 55070 + }, + { + "epoch": 2.33, + "learning_rate": 0.0005685768195212958, + "loss": 0.5821, + "step": 55080 + }, + { + "epoch": 2.33, + "learning_rate": 0.0005685344187920032, + "loss": 0.5377, + "step": 55090 + }, + { + "epoch": 2.33, + "learning_rate": 0.0005684920180627107, + "loss": 0.5496, + "step": 55100 + }, + { + "epoch": 2.33, + "learning_rate": 0.0005684496173334182, + "loss": 0.6047, + "step": 55110 + }, + { + "epoch": 2.33, + "learning_rate": 0.0005684072166041256, + "loss": 0.544, + "step": 55120 + }, + { + "epoch": 2.33, + "learning_rate": 0.0005683648158748331, + "loss": 0.5076, + "step": 55130 + }, + { + "epoch": 2.33, + "learning_rate": 0.0005683224151455405, + "loss": 0.4631, + "step": 55140 + }, + { + "epoch": 2.33, + "learning_rate": 0.000568280014416248, + "loss": 0.5981, + "step": 55150 + }, + { + "epoch": 2.33, + "learning_rate": 0.0005682376136869555, + "loss": 0.556, + "step": 55160 + }, + { + "epoch": 2.33, + "learning_rate": 0.0005681952129576629, + "loss": 0.5135, + "step": 55170 + }, + { + "epoch": 2.33, + "learning_rate": 0.0005681528122283703, + "loss": 0.58, + "step": 55180 + }, + { + "epoch": 2.33, + "learning_rate": 0.0005681104114990778, + "loss": 0.6407, + "step": 55190 + }, + { + "epoch": 2.33, + "learning_rate": 0.0005680680107697853, + "loss": 0.5997, + "step": 55200 + }, + { + "epoch": 2.33, + "learning_rate": 0.0005680256100404927, + "loss": 0.5152, + "step": 55210 + }, + { + "epoch": 2.34, + "learning_rate": 0.0005679832093112002, + "loss": 0.5646, + "step": 55220 + }, + { + "epoch": 2.34, + "learning_rate": 0.0005679408085819077, + "loss": 0.5291, + "step": 55230 + }, + { + "epoch": 2.34, + "learning_rate": 0.0005678984078526151, + "loss": 0.6682, + "step": 55240 + }, + { + "epoch": 2.34, + "learning_rate": 0.0005678560071233226, + "loss": 0.5439, + "step": 55250 + }, + { + "epoch": 2.34, + "learning_rate": 0.00056781360639403, + "loss": 0.5261, + "step": 55260 + }, + { + "epoch": 2.34, + "learning_rate": 0.0005677712056647374, + "loss": 0.5792, + "step": 55270 + }, + { + "epoch": 2.34, + "learning_rate": 0.000567728804935445, + "loss": 0.5517, + "step": 55280 + }, + { + "epoch": 2.34, + "learning_rate": 0.0005676864042061524, + "loss": 0.5743, + "step": 55290 + }, + { + "epoch": 2.34, + "learning_rate": 0.0005676440034768598, + "loss": 0.5488, + "step": 55300 + }, + { + "epoch": 2.34, + "learning_rate": 0.0005676016027475673, + "loss": 0.4516, + "step": 55310 + }, + { + "epoch": 2.34, + "learning_rate": 0.0005675592020182748, + "loss": 0.5266, + "step": 55320 + }, + { + "epoch": 2.34, + "learning_rate": 0.0005675168012889822, + "loss": 0.5802, + "step": 55330 + }, + { + "epoch": 2.34, + "learning_rate": 0.0005674744005596896, + "loss": 0.4998, + "step": 55340 + }, + { + "epoch": 2.34, + "learning_rate": 0.000567431999830397, + "loss": 0.6404, + "step": 55350 + }, + { + "epoch": 2.34, + "learning_rate": 0.0005673895991011046, + "loss": 0.6038, + "step": 55360 + }, + { + "epoch": 2.34, + "learning_rate": 0.000567347198371812, + "loss": 0.5937, + "step": 55370 + }, + { + "epoch": 2.34, + "learning_rate": 0.0005673047976425194, + "loss": 0.589, + "step": 55380 + }, + { + "epoch": 2.34, + "learning_rate": 0.0005672623969132269, + "loss": 0.5304, + "step": 55390 + }, + { + "epoch": 2.34, + "learning_rate": 0.0005672199961839344, + "loss": 0.5797, + "step": 55400 + }, + { + "epoch": 2.34, + "learning_rate": 0.0005671775954546418, + "loss": 0.5344, + "step": 55410 + }, + { + "epoch": 2.34, + "learning_rate": 0.0005671351947253493, + "loss": 0.5789, + "step": 55420 + }, + { + "epoch": 2.34, + "learning_rate": 0.0005670927939960568, + "loss": 0.6419, + "step": 55430 + }, + { + "epoch": 2.34, + "learning_rate": 0.0005670503932667642, + "loss": 0.5965, + "step": 55440 + }, + { + "epoch": 2.34, + "learning_rate": 0.0005670079925374717, + "loss": 0.5232, + "step": 55450 + }, + { + "epoch": 2.35, + "learning_rate": 0.0005669655918081791, + "loss": 0.6434, + "step": 55460 + }, + { + "epoch": 2.35, + "learning_rate": 0.0005669231910788865, + "loss": 0.6434, + "step": 55470 + }, + { + "epoch": 2.35, + "learning_rate": 0.0005668807903495941, + "loss": 0.5476, + "step": 55480 + }, + { + "epoch": 2.35, + "learning_rate": 0.0005668383896203015, + "loss": 0.5635, + "step": 55490 + }, + { + "epoch": 2.35, + "learning_rate": 0.0005667959888910089, + "loss": 0.4776, + "step": 55500 + }, + { + "epoch": 2.35, + "learning_rate": 0.0005667535881617164, + "loss": 0.5336, + "step": 55510 + }, + { + "epoch": 2.35, + "learning_rate": 0.0005667111874324239, + "loss": 0.7213, + "step": 55520 + }, + { + "epoch": 2.35, + "learning_rate": 0.0005666687867031313, + "loss": 0.5582, + "step": 55530 + }, + { + "epoch": 2.35, + "learning_rate": 0.0005666263859738388, + "loss": 0.5497, + "step": 55540 + }, + { + "epoch": 2.35, + "learning_rate": 0.0005665839852445462, + "loss": 0.5166, + "step": 55550 + }, + { + "epoch": 2.35, + "learning_rate": 0.0005665415845152537, + "loss": 0.5266, + "step": 55560 + }, + { + "epoch": 2.35, + "learning_rate": 0.0005664991837859612, + "loss": 0.5959, + "step": 55570 + }, + { + "epoch": 2.35, + "learning_rate": 0.0005664567830566686, + "loss": 0.5151, + "step": 55580 + }, + { + "epoch": 2.35, + "learning_rate": 0.000566414382327376, + "loss": 0.6289, + "step": 55590 + }, + { + "epoch": 2.35, + "learning_rate": 0.0005663719815980836, + "loss": 0.6132, + "step": 55600 + }, + { + "epoch": 2.35, + "learning_rate": 0.000566329580868791, + "loss": 0.7453, + "step": 55610 + }, + { + "epoch": 2.35, + "learning_rate": 0.0005662871801394984, + "loss": 0.5524, + "step": 55620 + }, + { + "epoch": 2.35, + "learning_rate": 0.0005662447794102058, + "loss": 0.5405, + "step": 55630 + }, + { + "epoch": 2.35, + "learning_rate": 0.0005662023786809134, + "loss": 0.5286, + "step": 55640 + }, + { + "epoch": 2.35, + "learning_rate": 0.0005661599779516208, + "loss": 0.5308, + "step": 55650 + }, + { + "epoch": 2.35, + "learning_rate": 0.0005661175772223282, + "loss": 0.4727, + "step": 55660 + }, + { + "epoch": 2.35, + "learning_rate": 0.0005660751764930357, + "loss": 0.7251, + "step": 55670 + }, + { + "epoch": 2.35, + "learning_rate": 0.0005660327757637432, + "loss": 0.5711, + "step": 55680 + }, + { + "epoch": 2.36, + "learning_rate": 0.0005659903750344506, + "loss": 0.5241, + "step": 55690 + }, + { + "epoch": 2.36, + "learning_rate": 0.0005659479743051581, + "loss": 0.5588, + "step": 55700 + }, + { + "epoch": 2.36, + "learning_rate": 0.0005659055735758655, + "loss": 0.5789, + "step": 55710 + }, + { + "epoch": 2.36, + "learning_rate": 0.000565863172846573, + "loss": 0.6088, + "step": 55720 + }, + { + "epoch": 2.36, + "learning_rate": 0.0005658207721172805, + "loss": 0.588, + "step": 55730 + }, + { + "epoch": 2.36, + "learning_rate": 0.0005657783713879879, + "loss": 0.5436, + "step": 55740 + }, + { + "epoch": 2.36, + "learning_rate": 0.0005657359706586953, + "loss": 0.5744, + "step": 55750 + }, + { + "epoch": 2.36, + "learning_rate": 0.0005656935699294029, + "loss": 0.5552, + "step": 55760 + }, + { + "epoch": 2.36, + "learning_rate": 0.0005656511692001103, + "loss": 0.4999, + "step": 55770 + }, + { + "epoch": 2.36, + "learning_rate": 0.0005656087684708177, + "loss": 0.5099, + "step": 55780 + }, + { + "epoch": 2.36, + "learning_rate": 0.0005655663677415252, + "loss": 0.6363, + "step": 55790 + }, + { + "epoch": 2.36, + "learning_rate": 0.0005655239670122326, + "loss": 0.5673, + "step": 55800 + }, + { + "epoch": 2.36, + "learning_rate": 0.0005654815662829401, + "loss": 0.5138, + "step": 55810 + }, + { + "epoch": 2.36, + "learning_rate": 0.0005654391655536476, + "loss": 0.6276, + "step": 55820 + }, + { + "epoch": 2.36, + "learning_rate": 0.000565396764824355, + "loss": 0.5727, + "step": 55830 + }, + { + "epoch": 2.36, + "learning_rate": 0.0005653543640950625, + "loss": 0.4723, + "step": 55840 + }, + { + "epoch": 2.36, + "learning_rate": 0.00056531196336577, + "loss": 0.5745, + "step": 55850 + }, + { + "epoch": 2.36, + "learning_rate": 0.0005652695626364774, + "loss": 0.5802, + "step": 55860 + }, + { + "epoch": 2.36, + "learning_rate": 0.0005652271619071848, + "loss": 0.5624, + "step": 55870 + }, + { + "epoch": 2.36, + "learning_rate": 0.0005651847611778922, + "loss": 0.5577, + "step": 55880 + }, + { + "epoch": 2.36, + "learning_rate": 0.0005651423604485998, + "loss": 0.5549, + "step": 55890 + }, + { + "epoch": 2.36, + "learning_rate": 0.0005650999597193072, + "loss": 0.61, + "step": 55900 + }, + { + "epoch": 2.36, + "learning_rate": 0.0005650575589900146, + "loss": 0.5965, + "step": 55910 + }, + { + "epoch": 2.36, + "learning_rate": 0.0005650151582607221, + "loss": 0.4937, + "step": 55920 + }, + { + "epoch": 2.37, + "learning_rate": 0.0005649727575314296, + "loss": 0.6106, + "step": 55930 + }, + { + "epoch": 2.37, + "learning_rate": 0.000564930356802137, + "loss": 0.6339, + "step": 55940 + }, + { + "epoch": 2.37, + "learning_rate": 0.0005648879560728444, + "loss": 0.5432, + "step": 55950 + }, + { + "epoch": 2.37, + "learning_rate": 0.000564845555343552, + "loss": 0.5575, + "step": 55960 + }, + { + "epoch": 2.37, + "learning_rate": 0.0005648031546142594, + "loss": 0.5714, + "step": 55970 + }, + { + "epoch": 2.37, + "learning_rate": 0.0005647607538849668, + "loss": 0.6011, + "step": 55980 + }, + { + "epoch": 2.37, + "learning_rate": 0.0005647183531556743, + "loss": 0.5142, + "step": 55990 + }, + { + "epoch": 2.37, + "learning_rate": 0.0005646759524263817, + "loss": 0.601, + "step": 56000 + }, + { + "epoch": 2.37, + "learning_rate": 0.0005646335516970892, + "loss": 0.62, + "step": 56010 + }, + { + "epoch": 2.37, + "learning_rate": 0.0005645911509677967, + "loss": 0.5306, + "step": 56020 + }, + { + "epoch": 2.37, + "learning_rate": 0.0005645487502385041, + "loss": 0.547, + "step": 56030 + }, + { + "epoch": 2.37, + "learning_rate": 0.0005645063495092116, + "loss": 0.5176, + "step": 56040 + }, + { + "epoch": 2.37, + "learning_rate": 0.0005644639487799191, + "loss": 0.4862, + "step": 56050 + }, + { + "epoch": 2.37, + "learning_rate": 0.0005644215480506265, + "loss": 0.626, + "step": 56060 + }, + { + "epoch": 2.37, + "learning_rate": 0.0005643791473213339, + "loss": 0.5088, + "step": 56070 + }, + { + "epoch": 2.37, + "learning_rate": 0.0005643367465920414, + "loss": 0.5485, + "step": 56080 + }, + { + "epoch": 2.37, + "learning_rate": 0.0005642943458627489, + "loss": 0.5697, + "step": 56090 + }, + { + "epoch": 2.37, + "learning_rate": 0.0005642519451334563, + "loss": 0.4896, + "step": 56100 + }, + { + "epoch": 2.37, + "learning_rate": 0.0005642095444041638, + "loss": 0.5423, + "step": 56110 + }, + { + "epoch": 2.37, + "learning_rate": 0.0005641671436748712, + "loss": 0.5308, + "step": 56120 + }, + { + "epoch": 2.37, + "learning_rate": 0.0005641247429455787, + "loss": 0.5562, + "step": 56130 + }, + { + "epoch": 2.37, + "learning_rate": 0.0005640823422162862, + "loss": 0.5598, + "step": 56140 + }, + { + "epoch": 2.37, + "learning_rate": 0.0005640399414869936, + "loss": 0.5479, + "step": 56150 + }, + { + "epoch": 2.37, + "learning_rate": 0.000563997540757701, + "loss": 0.5503, + "step": 56160 + }, + { + "epoch": 2.38, + "learning_rate": 0.0005639551400284086, + "loss": 0.5565, + "step": 56170 + }, + { + "epoch": 2.38, + "learning_rate": 0.000563912739299116, + "loss": 0.4933, + "step": 56180 + }, + { + "epoch": 2.38, + "learning_rate": 0.0005638703385698234, + "loss": 0.5328, + "step": 56190 + }, + { + "epoch": 2.38, + "learning_rate": 0.0005638279378405309, + "loss": 0.6017, + "step": 56200 + }, + { + "epoch": 2.38, + "learning_rate": 0.0005637855371112384, + "loss": 0.52, + "step": 56210 + }, + { + "epoch": 2.38, + "learning_rate": 0.0005637431363819458, + "loss": 0.5333, + "step": 56220 + }, + { + "epoch": 2.38, + "learning_rate": 0.0005637007356526533, + "loss": 0.626, + "step": 56230 + }, + { + "epoch": 2.38, + "learning_rate": 0.0005636583349233606, + "loss": 0.6135, + "step": 56240 + }, + { + "epoch": 2.38, + "learning_rate": 0.0005636159341940682, + "loss": 0.6053, + "step": 56250 + }, + { + "epoch": 2.38, + "learning_rate": 0.0005635735334647757, + "loss": 0.5556, + "step": 56260 + }, + { + "epoch": 2.38, + "learning_rate": 0.000563531132735483, + "loss": 0.4902, + "step": 56270 + }, + { + "epoch": 2.38, + "learning_rate": 0.0005634887320061905, + "loss": 0.5244, + "step": 56280 + }, + { + "epoch": 2.38, + "learning_rate": 0.0005634463312768981, + "loss": 0.4365, + "step": 56290 + }, + { + "epoch": 2.38, + "learning_rate": 0.0005634039305476054, + "loss": 0.554, + "step": 56300 + }, + { + "epoch": 2.38, + "learning_rate": 0.0005633615298183129, + "loss": 0.5877, + "step": 56310 + }, + { + "epoch": 2.38, + "learning_rate": 0.0005633191290890203, + "loss": 0.5552, + "step": 56320 + }, + { + "epoch": 2.38, + "learning_rate": 0.0005632767283597278, + "loss": 0.5941, + "step": 56330 + }, + { + "epoch": 2.38, + "learning_rate": 0.0005632343276304353, + "loss": 0.5789, + "step": 56340 + }, + { + "epoch": 2.38, + "learning_rate": 0.0005631919269011427, + "loss": 0.5444, + "step": 56350 + }, + { + "epoch": 2.38, + "learning_rate": 0.0005631495261718501, + "loss": 0.6905, + "step": 56360 + }, + { + "epoch": 2.38, + "learning_rate": 0.0005631071254425577, + "loss": 0.5926, + "step": 56370 + }, + { + "epoch": 2.38, + "learning_rate": 0.0005630647247132651, + "loss": 0.52, + "step": 56380 + }, + { + "epoch": 2.38, + "learning_rate": 0.0005630223239839725, + "loss": 0.4549, + "step": 56390 + }, + { + "epoch": 2.39, + "learning_rate": 0.00056297992325468, + "loss": 0.5238, + "step": 56400 + }, + { + "epoch": 2.39, + "learning_rate": 0.0005629375225253874, + "loss": 0.5092, + "step": 56410 + }, + { + "epoch": 2.39, + "learning_rate": 0.0005628951217960949, + "loss": 0.6304, + "step": 56420 + }, + { + "epoch": 2.39, + "learning_rate": 0.0005628527210668024, + "loss": 0.4976, + "step": 56430 + }, + { + "epoch": 2.39, + "learning_rate": 0.0005628103203375098, + "loss": 0.5495, + "step": 56440 + }, + { + "epoch": 2.39, + "learning_rate": 0.0005627679196082173, + "loss": 0.5275, + "step": 56450 + }, + { + "epoch": 2.39, + "learning_rate": 0.0005627255188789248, + "loss": 0.5516, + "step": 56460 + }, + { + "epoch": 2.39, + "learning_rate": 0.0005626831181496322, + "loss": 0.4995, + "step": 56470 + }, + { + "epoch": 2.39, + "learning_rate": 0.0005626407174203396, + "loss": 0.589, + "step": 56480 + }, + { + "epoch": 2.39, + "learning_rate": 0.0005625983166910472, + "loss": 0.5293, + "step": 56490 + }, + { + "epoch": 2.39, + "learning_rate": 0.0005625559159617546, + "loss": 0.4986, + "step": 56500 + }, + { + "epoch": 2.39, + "learning_rate": 0.000562513515232462, + "loss": 0.5304, + "step": 56510 + }, + { + "epoch": 2.39, + "learning_rate": 0.0005624711145031695, + "loss": 0.5625, + "step": 56520 + }, + { + "epoch": 2.39, + "learning_rate": 0.0005624287137738769, + "loss": 0.5416, + "step": 56530 + }, + { + "epoch": 2.39, + "learning_rate": 0.0005623863130445844, + "loss": 0.5932, + "step": 56540 + }, + { + "epoch": 2.39, + "learning_rate": 0.0005623439123152919, + "loss": 0.5493, + "step": 56550 + }, + { + "epoch": 2.39, + "learning_rate": 0.0005623015115859992, + "loss": 0.5432, + "step": 56560 + }, + { + "epoch": 2.39, + "learning_rate": 0.0005622591108567068, + "loss": 0.5103, + "step": 56570 + }, + { + "epoch": 2.39, + "learning_rate": 0.0005622167101274143, + "loss": 0.5168, + "step": 56580 + }, + { + "epoch": 2.39, + "learning_rate": 0.0005621743093981216, + "loss": 0.5633, + "step": 56590 + }, + { + "epoch": 2.39, + "learning_rate": 0.0005621319086688291, + "loss": 0.5274, + "step": 56600 + }, + { + "epoch": 2.39, + "learning_rate": 0.0005620895079395366, + "loss": 0.4679, + "step": 56610 + }, + { + "epoch": 2.39, + "learning_rate": 0.000562047107210244, + "loss": 0.5114, + "step": 56620 + }, + { + "epoch": 2.39, + "learning_rate": 0.0005620047064809515, + "loss": 0.5912, + "step": 56630 + }, + { + "epoch": 2.4, + "learning_rate": 0.000561962305751659, + "loss": 0.553, + "step": 56640 + }, + { + "epoch": 2.4, + "learning_rate": 0.0005619199050223664, + "loss": 0.4378, + "step": 56650 + }, + { + "epoch": 2.4, + "learning_rate": 0.0005618775042930739, + "loss": 0.4732, + "step": 56660 + }, + { + "epoch": 2.4, + "learning_rate": 0.0005618351035637814, + "loss": 0.5626, + "step": 56670 + }, + { + "epoch": 2.4, + "learning_rate": 0.0005617927028344887, + "loss": 0.5785, + "step": 56680 + }, + { + "epoch": 2.4, + "learning_rate": 0.0005617503021051962, + "loss": 0.5616, + "step": 56690 + }, + { + "epoch": 2.4, + "learning_rate": 0.0005617079013759038, + "loss": 0.6061, + "step": 56700 + }, + { + "epoch": 2.4, + "learning_rate": 0.0005616655006466111, + "loss": 0.589, + "step": 56710 + }, + { + "epoch": 2.4, + "learning_rate": 0.0005616230999173186, + "loss": 0.5287, + "step": 56720 + }, + { + "epoch": 2.4, + "learning_rate": 0.000561580699188026, + "loss": 0.5607, + "step": 56730 + }, + { + "epoch": 2.4, + "learning_rate": 0.0005615382984587335, + "loss": 0.5583, + "step": 56740 + }, + { + "epoch": 2.4, + "learning_rate": 0.000561495897729441, + "loss": 0.5545, + "step": 56750 + }, + { + "epoch": 2.4, + "learning_rate": 0.0005614534970001485, + "loss": 0.6284, + "step": 56760 + }, + { + "epoch": 2.4, + "learning_rate": 0.0005614110962708558, + "loss": 0.5135, + "step": 56770 + }, + { + "epoch": 2.4, + "learning_rate": 0.0005613686955415634, + "loss": 0.5386, + "step": 56780 + }, + { + "epoch": 2.4, + "learning_rate": 0.0005613262948122709, + "loss": 0.5378, + "step": 56790 + }, + { + "epoch": 2.4, + "learning_rate": 0.0005612838940829782, + "loss": 0.5683, + "step": 56800 + }, + { + "epoch": 2.4, + "learning_rate": 0.0005612414933536857, + "loss": 0.5048, + "step": 56810 + }, + { + "epoch": 2.4, + "learning_rate": 0.0005611990926243933, + "loss": 0.5823, + "step": 56820 + }, + { + "epoch": 2.4, + "learning_rate": 0.0005611566918951006, + "loss": 0.5877, + "step": 56830 + }, + { + "epoch": 2.4, + "learning_rate": 0.0005611142911658081, + "loss": 0.4757, + "step": 56840 + }, + { + "epoch": 2.4, + "learning_rate": 0.0005610718904365155, + "loss": 0.5431, + "step": 56850 + }, + { + "epoch": 2.4, + "learning_rate": 0.000561029489707223, + "loss": 0.6014, + "step": 56860 + }, + { + "epoch": 2.4, + "learning_rate": 0.0005609870889779305, + "loss": 0.5819, + "step": 56870 + }, + { + "epoch": 2.41, + "learning_rate": 0.0005609446882486378, + "loss": 0.4981, + "step": 56880 + }, + { + "epoch": 2.41, + "learning_rate": 0.0005609022875193453, + "loss": 0.5791, + "step": 56890 + }, + { + "epoch": 2.41, + "learning_rate": 0.0005608598867900529, + "loss": 0.5418, + "step": 56900 + }, + { + "epoch": 2.41, + "learning_rate": 0.0005608174860607602, + "loss": 0.6301, + "step": 56910 + }, + { + "epoch": 2.41, + "learning_rate": 0.0005607750853314677, + "loss": 0.5327, + "step": 56920 + }, + { + "epoch": 2.41, + "learning_rate": 0.0005607326846021752, + "loss": 0.5229, + "step": 56930 + }, + { + "epoch": 2.41, + "learning_rate": 0.0005606902838728826, + "loss": 0.4874, + "step": 56940 + }, + { + "epoch": 2.41, + "learning_rate": 0.0005606478831435901, + "loss": 0.6183, + "step": 56950 + }, + { + "epoch": 2.41, + "learning_rate": 0.0005606054824142976, + "loss": 0.6354, + "step": 56960 + }, + { + "epoch": 2.41, + "learning_rate": 0.0005605630816850049, + "loss": 0.6076, + "step": 56970 + }, + { + "epoch": 2.41, + "learning_rate": 0.0005605206809557125, + "loss": 0.559, + "step": 56980 + }, + { + "epoch": 2.41, + "learning_rate": 0.00056047828022642, + "loss": 0.4822, + "step": 56990 + }, + { + "epoch": 2.41, + "learning_rate": 0.0005604358794971273, + "loss": 0.5019, + "step": 57000 + }, + { + "epoch": 2.41, + "learning_rate": 0.0005603934787678348, + "loss": 0.526, + "step": 57010 + }, + { + "epoch": 2.41, + "learning_rate": 0.0005603510780385424, + "loss": 0.5834, + "step": 57020 + }, + { + "epoch": 2.41, + "learning_rate": 0.0005603086773092497, + "loss": 0.517, + "step": 57030 + }, + { + "epoch": 2.41, + "learning_rate": 0.0005602662765799572, + "loss": 0.5447, + "step": 57040 + }, + { + "epoch": 2.41, + "learning_rate": 0.0005602238758506647, + "loss": 0.5892, + "step": 57050 + }, + { + "epoch": 2.41, + "learning_rate": 0.0005601814751213721, + "loss": 0.6021, + "step": 57060 + }, + { + "epoch": 2.41, + "learning_rate": 0.0005601390743920796, + "loss": 0.597, + "step": 57070 + }, + { + "epoch": 2.41, + "learning_rate": 0.0005600966736627871, + "loss": 0.6265, + "step": 57080 + }, + { + "epoch": 2.41, + "learning_rate": 0.0005600542729334944, + "loss": 0.6016, + "step": 57090 + }, + { + "epoch": 2.41, + "learning_rate": 0.000560011872204202, + "loss": 0.612, + "step": 57100 + }, + { + "epoch": 2.42, + "learning_rate": 0.0005599694714749095, + "loss": 0.4695, + "step": 57110 + }, + { + "epoch": 2.42, + "learning_rate": 0.0005599270707456168, + "loss": 0.5754, + "step": 57120 + }, + { + "epoch": 2.42, + "learning_rate": 0.0005598846700163243, + "loss": 0.5522, + "step": 57130 + }, + { + "epoch": 2.42, + "learning_rate": 0.0005598422692870318, + "loss": 0.5973, + "step": 57140 + }, + { + "epoch": 2.42, + "learning_rate": 0.0005597998685577392, + "loss": 0.5335, + "step": 57150 + }, + { + "epoch": 2.42, + "learning_rate": 0.0005597574678284467, + "loss": 0.5432, + "step": 57160 + }, + { + "epoch": 2.42, + "learning_rate": 0.000559715067099154, + "loss": 0.5078, + "step": 57170 + }, + { + "epoch": 2.42, + "learning_rate": 0.0005596726663698616, + "loss": 0.5523, + "step": 57180 + }, + { + "epoch": 2.42, + "learning_rate": 0.0005596302656405691, + "loss": 0.6013, + "step": 57190 + }, + { + "epoch": 2.42, + "learning_rate": 0.0005595878649112765, + "loss": 0.5683, + "step": 57200 + }, + { + "epoch": 2.42, + "learning_rate": 0.0005595454641819839, + "loss": 0.5164, + "step": 57210 + }, + { + "epoch": 2.42, + "learning_rate": 0.0005595030634526914, + "loss": 0.6015, + "step": 57220 + }, + { + "epoch": 2.42, + "learning_rate": 0.0005594606627233989, + "loss": 0.4152, + "step": 57230 + }, + { + "epoch": 2.42, + "learning_rate": 0.0005594182619941063, + "loss": 0.5756, + "step": 57240 + }, + { + "epoch": 2.42, + "learning_rate": 0.0005593758612648138, + "loss": 0.6152, + "step": 57250 + }, + { + "epoch": 2.42, + "learning_rate": 0.0005593334605355213, + "loss": 0.5326, + "step": 57260 + }, + { + "epoch": 2.42, + "learning_rate": 0.0005592910598062287, + "loss": 0.517, + "step": 57270 + }, + { + "epoch": 2.42, + "learning_rate": 0.0005592486590769362, + "loss": 0.6328, + "step": 57280 + }, + { + "epoch": 2.42, + "learning_rate": 0.0005592062583476435, + "loss": 0.6094, + "step": 57290 + }, + { + "epoch": 2.42, + "learning_rate": 0.000559163857618351, + "loss": 0.5494, + "step": 57300 + }, + { + "epoch": 2.42, + "learning_rate": 0.0005591214568890586, + "loss": 0.5149, + "step": 57310 + }, + { + "epoch": 2.42, + "learning_rate": 0.0005590790561597659, + "loss": 0.5713, + "step": 57320 + }, + { + "epoch": 2.42, + "learning_rate": 0.0005590366554304734, + "loss": 0.5139, + "step": 57330 + }, + { + "epoch": 2.42, + "learning_rate": 0.0005589942547011809, + "loss": 0.5915, + "step": 57340 + }, + { + "epoch": 2.43, + "learning_rate": 0.0005589518539718883, + "loss": 0.5676, + "step": 57350 + }, + { + "epoch": 2.43, + "learning_rate": 0.0005589094532425958, + "loss": 0.5523, + "step": 57360 + }, + { + "epoch": 2.43, + "learning_rate": 0.0005588670525133033, + "loss": 0.5143, + "step": 57370 + }, + { + "epoch": 2.43, + "learning_rate": 0.0005588246517840106, + "loss": 0.5109, + "step": 57380 + }, + { + "epoch": 2.43, + "learning_rate": 0.0005587822510547182, + "loss": 0.652, + "step": 57390 + }, + { + "epoch": 2.43, + "learning_rate": 0.0005587398503254257, + "loss": 0.6057, + "step": 57400 + }, + { + "epoch": 2.43, + "learning_rate": 0.000558697449596133, + "loss": 0.5545, + "step": 57410 + }, + { + "epoch": 2.43, + "learning_rate": 0.0005586550488668405, + "loss": 0.515, + "step": 57420 + }, + { + "epoch": 2.43, + "learning_rate": 0.0005586126481375481, + "loss": 0.5767, + "step": 57430 + }, + { + "epoch": 2.43, + "learning_rate": 0.0005585702474082554, + "loss": 0.6014, + "step": 57440 + }, + { + "epoch": 2.43, + "learning_rate": 0.0005585278466789629, + "loss": 0.571, + "step": 57450 + }, + { + "epoch": 2.43, + "learning_rate": 0.0005584854459496704, + "loss": 0.5803, + "step": 57460 + }, + { + "epoch": 2.43, + "learning_rate": 0.0005584430452203778, + "loss": 0.5304, + "step": 57470 + }, + { + "epoch": 2.43, + "learning_rate": 0.0005584006444910853, + "loss": 0.5906, + "step": 57480 + }, + { + "epoch": 2.43, + "learning_rate": 0.0005583582437617927, + "loss": 0.7207, + "step": 57490 + }, + { + "epoch": 2.43, + "learning_rate": 0.0005583158430325001, + "loss": 0.5742, + "step": 57500 + }, + { + "epoch": 2.43, + "learning_rate": 0.0005582734423032077, + "loss": 0.4658, + "step": 57510 + }, + { + "epoch": 2.43, + "learning_rate": 0.0005582310415739151, + "loss": 0.5384, + "step": 57520 + }, + { + "epoch": 2.43, + "learning_rate": 0.0005581886408446225, + "loss": 0.5018, + "step": 57530 + }, + { + "epoch": 2.43, + "learning_rate": 0.00055814624011533, + "loss": 0.5508, + "step": 57540 + }, + { + "epoch": 2.43, + "learning_rate": 0.0005581038393860375, + "loss": 0.5737, + "step": 57550 + }, + { + "epoch": 2.43, + "learning_rate": 0.0005580614386567449, + "loss": 0.5859, + "step": 57560 + }, + { + "epoch": 2.43, + "learning_rate": 0.0005580190379274524, + "loss": 0.5289, + "step": 57570 + }, + { + "epoch": 2.43, + "learning_rate": 0.0005579766371981598, + "loss": 0.5235, + "step": 57580 + }, + { + "epoch": 2.44, + "learning_rate": 0.0005579342364688673, + "loss": 0.5173, + "step": 57590 + }, + { + "epoch": 2.44, + "learning_rate": 0.0005578918357395748, + "loss": 0.5387, + "step": 57600 + }, + { + "epoch": 2.44, + "learning_rate": 0.0005578494350102822, + "loss": 0.5929, + "step": 57610 + }, + { + "epoch": 2.44, + "learning_rate": 0.0005578070342809896, + "loss": 0.5984, + "step": 57620 + }, + { + "epoch": 2.44, + "learning_rate": 0.0005577646335516972, + "loss": 0.5678, + "step": 57630 + }, + { + "epoch": 2.44, + "learning_rate": 0.0005577222328224046, + "loss": 0.6249, + "step": 57640 + }, + { + "epoch": 2.44, + "learning_rate": 0.000557679832093112, + "loss": 0.5558, + "step": 57650 + }, + { + "epoch": 2.44, + "learning_rate": 0.0005576374313638195, + "loss": 0.5805, + "step": 57660 + }, + { + "epoch": 2.44, + "learning_rate": 0.000557595030634527, + "loss": 0.5677, + "step": 57670 + }, + { + "epoch": 2.44, + "learning_rate": 0.0005575526299052344, + "loss": 0.5469, + "step": 57680 + }, + { + "epoch": 2.44, + "learning_rate": 0.0005575102291759419, + "loss": 0.604, + "step": 57690 + }, + { + "epoch": 2.44, + "learning_rate": 0.0005574678284466492, + "loss": 0.5407, + "step": 57700 + }, + { + "epoch": 2.44, + "learning_rate": 0.0005574254277173568, + "loss": 0.5309, + "step": 57710 + }, + { + "epoch": 2.44, + "learning_rate": 0.0005573830269880643, + "loss": 0.5719, + "step": 57720 + }, + { + "epoch": 2.44, + "learning_rate": 0.0005573406262587716, + "loss": 0.5927, + "step": 57730 + }, + { + "epoch": 2.44, + "learning_rate": 0.0005572982255294791, + "loss": 0.5026, + "step": 57740 + }, + { + "epoch": 2.44, + "learning_rate": 0.0005572558248001866, + "loss": 0.5562, + "step": 57750 + }, + { + "epoch": 2.44, + "learning_rate": 0.000557213424070894, + "loss": 0.5561, + "step": 57760 + }, + { + "epoch": 2.44, + "learning_rate": 0.0005571710233416015, + "loss": 0.4991, + "step": 57770 + }, + { + "epoch": 2.44, + "learning_rate": 0.0005571286226123089, + "loss": 0.5123, + "step": 57780 + }, + { + "epoch": 2.44, + "learning_rate": 0.0005570862218830164, + "loss": 0.5004, + "step": 57790 + }, + { + "epoch": 2.44, + "learning_rate": 0.0005570438211537239, + "loss": 0.4923, + "step": 57800 + }, + { + "epoch": 2.44, + "learning_rate": 0.0005570014204244313, + "loss": 0.5622, + "step": 57810 + }, + { + "epoch": 2.45, + "learning_rate": 0.0005569590196951387, + "loss": 0.5475, + "step": 57820 + }, + { + "epoch": 2.45, + "learning_rate": 0.0005569166189658462, + "loss": 0.559, + "step": 57830 + }, + { + "epoch": 2.45, + "learning_rate": 0.0005568742182365537, + "loss": 0.5514, + "step": 57840 + }, + { + "epoch": 2.45, + "learning_rate": 0.0005568318175072611, + "loss": 0.6223, + "step": 57850 + }, + { + "epoch": 2.45, + "learning_rate": 0.0005567894167779686, + "loss": 0.5041, + "step": 57860 + }, + { + "epoch": 2.45, + "learning_rate": 0.0005567470160486761, + "loss": 0.6847, + "step": 57870 + }, + { + "epoch": 2.45, + "learning_rate": 0.0005567046153193835, + "loss": 0.5, + "step": 57880 + }, + { + "epoch": 2.45, + "learning_rate": 0.000556662214590091, + "loss": 0.6563, + "step": 57890 + }, + { + "epoch": 2.45, + "learning_rate": 0.0005566198138607984, + "loss": 0.5911, + "step": 57900 + }, + { + "epoch": 2.45, + "learning_rate": 0.0005565774131315058, + "loss": 0.5629, + "step": 57910 + }, + { + "epoch": 2.45, + "learning_rate": 0.0005565350124022134, + "loss": 0.6458, + "step": 57920 + }, + { + "epoch": 2.45, + "learning_rate": 0.0005564926116729208, + "loss": 0.6508, + "step": 57930 + }, + { + "epoch": 2.45, + "learning_rate": 0.0005564502109436282, + "loss": 0.5123, + "step": 57940 + }, + { + "epoch": 2.45, + "learning_rate": 0.0005564078102143357, + "loss": 0.5002, + "step": 57950 + }, + { + "epoch": 2.45, + "learning_rate": 0.0005563654094850432, + "loss": 0.5616, + "step": 57960 + }, + { + "epoch": 2.45, + "learning_rate": 0.0005563230087557506, + "loss": 0.498, + "step": 57970 + }, + { + "epoch": 2.45, + "learning_rate": 0.0005562806080264581, + "loss": 0.486, + "step": 57980 + }, + { + "epoch": 2.45, + "learning_rate": 0.0005562382072971656, + "loss": 0.5186, + "step": 57990 + }, + { + "epoch": 2.45, + "learning_rate": 0.000556195806567873, + "loss": 0.5373, + "step": 58000 + }, + { + "epoch": 2.45, + "learning_rate": 0.0005561534058385805, + "loss": 0.5718, + "step": 58010 + }, + { + "epoch": 2.45, + "learning_rate": 0.0005561110051092879, + "loss": 0.5649, + "step": 58020 + }, + { + "epoch": 2.45, + "learning_rate": 0.0005560686043799953, + "loss": 0.5045, + "step": 58030 + }, + { + "epoch": 2.45, + "learning_rate": 0.0005560262036507029, + "loss": 0.5473, + "step": 58040 + }, + { + "epoch": 2.45, + "learning_rate": 0.0005559838029214103, + "loss": 0.559, + "step": 58050 + }, + { + "epoch": 2.46, + "learning_rate": 0.0005559414021921177, + "loss": 0.4806, + "step": 58060 + }, + { + "epoch": 2.46, + "learning_rate": 0.0005558990014628252, + "loss": 0.5223, + "step": 58070 + }, + { + "epoch": 2.46, + "learning_rate": 0.0005558566007335327, + "loss": 0.5027, + "step": 58080 + }, + { + "epoch": 2.46, + "learning_rate": 0.0005558142000042401, + "loss": 0.5001, + "step": 58090 + }, + { + "epoch": 2.46, + "learning_rate": 0.0005557717992749475, + "loss": 0.6493, + "step": 58100 + }, + { + "epoch": 2.46, + "learning_rate": 0.000555729398545655, + "loss": 0.6827, + "step": 58110 + }, + { + "epoch": 2.46, + "learning_rate": 0.0005556869978163625, + "loss": 0.549, + "step": 58120 + }, + { + "epoch": 2.46, + "learning_rate": 0.0005556445970870699, + "loss": 0.5848, + "step": 58130 + }, + { + "epoch": 2.46, + "learning_rate": 0.0005556021963577774, + "loss": 0.6631, + "step": 58140 + }, + { + "epoch": 2.46, + "learning_rate": 0.0005555597956284848, + "loss": 0.4865, + "step": 58150 + }, + { + "epoch": 2.46, + "learning_rate": 0.0005555173948991923, + "loss": 0.5454, + "step": 58160 + }, + { + "epoch": 2.46, + "learning_rate": 0.0005554749941698998, + "loss": 0.6111, + "step": 58170 + }, + { + "epoch": 2.46, + "learning_rate": 0.0005554325934406072, + "loss": 0.5929, + "step": 58180 + }, + { + "epoch": 2.46, + "learning_rate": 0.0005553901927113146, + "loss": 0.518, + "step": 58190 + }, + { + "epoch": 2.46, + "learning_rate": 0.0005553477919820222, + "loss": 0.5622, + "step": 58200 + }, + { + "epoch": 2.46, + "learning_rate": 0.0005553053912527296, + "loss": 0.6257, + "step": 58210 + }, + { + "epoch": 2.46, + "learning_rate": 0.000555262990523437, + "loss": 0.5763, + "step": 58220 + }, + { + "epoch": 2.46, + "learning_rate": 0.0005552205897941444, + "loss": 0.5826, + "step": 58230 + }, + { + "epoch": 2.46, + "learning_rate": 0.000555178189064852, + "loss": 0.5492, + "step": 58240 + }, + { + "epoch": 2.46, + "learning_rate": 0.0005551357883355594, + "loss": 0.6057, + "step": 58250 + }, + { + "epoch": 2.46, + "learning_rate": 0.0005550933876062668, + "loss": 0.5343, + "step": 58260 + }, + { + "epoch": 2.46, + "learning_rate": 0.0005550509868769743, + "loss": 0.5451, + "step": 58270 + }, + { + "epoch": 2.46, + "learning_rate": 0.0005550085861476818, + "loss": 0.4964, + "step": 58280 + }, + { + "epoch": 2.47, + "learning_rate": 0.0005549661854183892, + "loss": 0.5515, + "step": 58290 + }, + { + "epoch": 2.47, + "learning_rate": 0.0005549237846890967, + "loss": 0.5157, + "step": 58300 + }, + { + "epoch": 2.47, + "learning_rate": 0.0005548813839598041, + "loss": 0.5318, + "step": 58310 + }, + { + "epoch": 2.47, + "learning_rate": 0.0005548389832305116, + "loss": 0.489, + "step": 58320 + }, + { + "epoch": 2.47, + "learning_rate": 0.0005547965825012191, + "loss": 0.4614, + "step": 58330 + }, + { + "epoch": 2.47, + "learning_rate": 0.0005547541817719265, + "loss": 0.6319, + "step": 58340 + }, + { + "epoch": 2.47, + "learning_rate": 0.0005547117810426339, + "loss": 0.622, + "step": 58350 + }, + { + "epoch": 2.47, + "learning_rate": 0.0005546693803133414, + "loss": 0.5256, + "step": 58360 + }, + { + "epoch": 2.47, + "learning_rate": 0.0005546269795840489, + "loss": 0.5038, + "step": 58370 + }, + { + "epoch": 2.47, + "learning_rate": 0.0005545845788547563, + "loss": 0.5473, + "step": 58380 + }, + { + "epoch": 2.47, + "learning_rate": 0.0005545421781254637, + "loss": 0.5692, + "step": 58390 + }, + { + "epoch": 2.47, + "learning_rate": 0.0005544997773961713, + "loss": 0.536, + "step": 58400 + }, + { + "epoch": 2.47, + "learning_rate": 0.0005544573766668787, + "loss": 0.5273, + "step": 58410 + }, + { + "epoch": 2.47, + "learning_rate": 0.0005544149759375861, + "loss": 0.5627, + "step": 58420 + }, + { + "epoch": 2.47, + "learning_rate": 0.0005543725752082936, + "loss": 0.4799, + "step": 58430 + }, + { + "epoch": 2.47, + "learning_rate": 0.0005543301744790011, + "loss": 0.6208, + "step": 58440 + }, + { + "epoch": 2.47, + "learning_rate": 0.0005542877737497085, + "loss": 0.5923, + "step": 58450 + }, + { + "epoch": 2.47, + "learning_rate": 0.000554245373020416, + "loss": 0.492, + "step": 58460 + }, + { + "epoch": 2.47, + "learning_rate": 0.0005542029722911234, + "loss": 0.6026, + "step": 58470 + }, + { + "epoch": 2.47, + "learning_rate": 0.0005541605715618309, + "loss": 0.5129, + "step": 58480 + }, + { + "epoch": 2.47, + "learning_rate": 0.0005541181708325384, + "loss": 0.5782, + "step": 58490 + }, + { + "epoch": 2.47, + "learning_rate": 0.0005540757701032458, + "loss": 0.5227, + "step": 58500 + }, + { + "epoch": 2.47, + "learning_rate": 0.0005540333693739532, + "loss": 0.5224, + "step": 58510 + }, + { + "epoch": 2.47, + "learning_rate": 0.0005539909686446608, + "loss": 0.534, + "step": 58520 + }, + { + "epoch": 2.48, + "learning_rate": 0.0005539485679153682, + "loss": 0.5077, + "step": 58530 + }, + { + "epoch": 2.48, + "learning_rate": 0.0005539061671860756, + "loss": 0.6322, + "step": 58540 + }, + { + "epoch": 2.48, + "learning_rate": 0.0005538637664567831, + "loss": 0.5487, + "step": 58550 + }, + { + "epoch": 2.48, + "learning_rate": 0.0005538213657274905, + "loss": 0.5015, + "step": 58560 + }, + { + "epoch": 2.48, + "learning_rate": 0.000553778964998198, + "loss": 0.6675, + "step": 58570 + }, + { + "epoch": 2.48, + "learning_rate": 0.0005537365642689055, + "loss": 0.4788, + "step": 58580 + }, + { + "epoch": 2.48, + "learning_rate": 0.0005536941635396129, + "loss": 0.4767, + "step": 58590 + }, + { + "epoch": 2.48, + "learning_rate": 0.0005536517628103204, + "loss": 0.5272, + "step": 58600 + }, + { + "epoch": 2.48, + "learning_rate": 0.0005536093620810279, + "loss": 0.5105, + "step": 58610 + }, + { + "epoch": 2.48, + "learning_rate": 0.0005535669613517353, + "loss": 0.5572, + "step": 58620 + }, + { + "epoch": 2.48, + "learning_rate": 0.0005535245606224427, + "loss": 0.5376, + "step": 58630 + }, + { + "epoch": 2.48, + "learning_rate": 0.0005534821598931501, + "loss": 0.5343, + "step": 58640 + }, + { + "epoch": 2.48, + "learning_rate": 0.0005534397591638577, + "loss": 0.6082, + "step": 58650 + }, + { + "epoch": 2.48, + "learning_rate": 0.0005533973584345651, + "loss": 0.5494, + "step": 58660 + }, + { + "epoch": 2.48, + "learning_rate": 0.0005533549577052726, + "loss": 0.512, + "step": 58670 + }, + { + "epoch": 2.48, + "learning_rate": 0.00055331255697598, + "loss": 0.583, + "step": 58680 + }, + { + "epoch": 2.48, + "learning_rate": 0.0005532701562466875, + "loss": 0.6124, + "step": 58690 + }, + { + "epoch": 2.48, + "learning_rate": 0.000553227755517395, + "loss": 0.6056, + "step": 58700 + }, + { + "epoch": 2.48, + "learning_rate": 0.0005531853547881023, + "loss": 0.5013, + "step": 58710 + }, + { + "epoch": 2.48, + "learning_rate": 0.0005531429540588098, + "loss": 0.5512, + "step": 58720 + }, + { + "epoch": 2.48, + "learning_rate": 0.0005531005533295174, + "loss": 0.6035, + "step": 58730 + }, + { + "epoch": 2.48, + "learning_rate": 0.0005530581526002247, + "loss": 0.5119, + "step": 58740 + }, + { + "epoch": 2.48, + "learning_rate": 0.0005530157518709322, + "loss": 0.5042, + "step": 58750 + }, + { + "epoch": 2.48, + "learning_rate": 0.0005529733511416396, + "loss": 0.5363, + "step": 58760 + }, + { + "epoch": 2.49, + "learning_rate": 0.0005529309504123471, + "loss": 0.4638, + "step": 58770 + }, + { + "epoch": 2.49, + "learning_rate": 0.0005528885496830546, + "loss": 0.6105, + "step": 58780 + }, + { + "epoch": 2.49, + "learning_rate": 0.000552846148953762, + "loss": 0.4961, + "step": 58790 + }, + { + "epoch": 2.49, + "learning_rate": 0.0005528037482244694, + "loss": 0.5692, + "step": 58800 + }, + { + "epoch": 2.49, + "learning_rate": 0.000552761347495177, + "loss": 0.5878, + "step": 58810 + }, + { + "epoch": 2.49, + "learning_rate": 0.0005527189467658844, + "loss": 0.5867, + "step": 58820 + }, + { + "epoch": 2.49, + "learning_rate": 0.0005526765460365918, + "loss": 0.5736, + "step": 58830 + }, + { + "epoch": 2.49, + "learning_rate": 0.0005526341453072993, + "loss": 0.677, + "step": 58840 + }, + { + "epoch": 2.49, + "learning_rate": 0.0005525917445780068, + "loss": 0.5693, + "step": 58850 + }, + { + "epoch": 2.49, + "learning_rate": 0.0005525493438487142, + "loss": 0.6159, + "step": 58860 + }, + { + "epoch": 2.49, + "learning_rate": 0.0005525069431194217, + "loss": 0.5254, + "step": 58870 + }, + { + "epoch": 2.49, + "learning_rate": 0.0005524645423901291, + "loss": 0.5877, + "step": 58880 + }, + { + "epoch": 2.49, + "learning_rate": 0.0005524221416608366, + "loss": 0.4555, + "step": 58890 + }, + { + "epoch": 2.49, + "learning_rate": 0.0005523797409315441, + "loss": 0.6234, + "step": 58900 + }, + { + "epoch": 2.49, + "learning_rate": 0.0005523373402022515, + "loss": 0.4738, + "step": 58910 + }, + { + "epoch": 2.49, + "learning_rate": 0.0005522949394729589, + "loss": 0.4954, + "step": 58920 + }, + { + "epoch": 2.49, + "learning_rate": 0.0005522525387436665, + "loss": 0.5612, + "step": 58930 + }, + { + "epoch": 2.49, + "learning_rate": 0.0005522101380143739, + "loss": 0.6385, + "step": 58940 + }, + { + "epoch": 2.49, + "learning_rate": 0.0005521677372850813, + "loss": 0.5604, + "step": 58950 + }, + { + "epoch": 2.49, + "learning_rate": 0.0005521253365557888, + "loss": 0.6475, + "step": 58960 + }, + { + "epoch": 2.49, + "learning_rate": 0.0005520829358264963, + "loss": 0.5871, + "step": 58970 + }, + { + "epoch": 2.49, + "learning_rate": 0.0005520405350972037, + "loss": 0.6235, + "step": 58980 + }, + { + "epoch": 2.49, + "learning_rate": 0.0005519981343679112, + "loss": 0.5908, + "step": 58990 + }, + { + "epoch": 2.5, + "learning_rate": 0.0005519557336386186, + "loss": 0.5087, + "step": 59000 + }, + { + "epoch": 2.5, + "learning_rate": 0.0005519133329093261, + "loss": 0.5674, + "step": 59010 + }, + { + "epoch": 2.5, + "learning_rate": 0.0005518709321800336, + "loss": 0.6272, + "step": 59020 + }, + { + "epoch": 2.5, + "learning_rate": 0.0005518285314507409, + "loss": 0.59, + "step": 59030 + }, + { + "epoch": 2.5, + "learning_rate": 0.0005517861307214484, + "loss": 0.5812, + "step": 59040 + }, + { + "epoch": 2.5, + "learning_rate": 0.000551743729992156, + "loss": 0.5477, + "step": 59050 + }, + { + "epoch": 2.5, + "learning_rate": 0.0005517013292628633, + "loss": 0.5762, + "step": 59060 + }, + { + "epoch": 2.5, + "learning_rate": 0.0005516589285335708, + "loss": 0.5223, + "step": 59070 + }, + { + "epoch": 2.5, + "learning_rate": 0.0005516165278042783, + "loss": 0.5378, + "step": 59080 + }, + { + "epoch": 2.5, + "learning_rate": 0.0005515741270749857, + "loss": 0.5626, + "step": 59090 + }, + { + "epoch": 2.5, + "learning_rate": 0.0005515317263456932, + "loss": 0.596, + "step": 59100 + }, + { + "epoch": 2.5, + "learning_rate": 0.0005514893256164007, + "loss": 0.6499, + "step": 59110 + }, + { + "epoch": 2.5, + "learning_rate": 0.000551446924887108, + "loss": 0.6724, + "step": 59120 + }, + { + "epoch": 2.5, + "learning_rate": 0.0005514045241578156, + "loss": 0.6093, + "step": 59130 + }, + { + "epoch": 2.5, + "learning_rate": 0.0005513621234285231, + "loss": 0.574, + "step": 59140 + }, + { + "epoch": 2.5, + "learning_rate": 0.0005513197226992304, + "loss": 0.5883, + "step": 59150 + }, + { + "epoch": 2.5, + "learning_rate": 0.0005512773219699379, + "loss": 0.5662, + "step": 59160 + }, + { + "epoch": 2.5, + "learning_rate": 0.0005512349212406453, + "loss": 0.5895, + "step": 59170 + }, + { + "epoch": 2.5, + "learning_rate": 0.0005511925205113528, + "loss": 0.5683, + "step": 59180 + }, + { + "epoch": 2.5, + "learning_rate": 0.0005511501197820603, + "loss": 0.5199, + "step": 59190 + }, + { + "epoch": 2.5, + "learning_rate": 0.0005511077190527677, + "loss": 0.5616, + "step": 59200 + }, + { + "epoch": 2.5, + "learning_rate": 0.0005510653183234752, + "loss": 0.5707, + "step": 59210 + }, + { + "epoch": 2.5, + "learning_rate": 0.0005510229175941827, + "loss": 0.5316, + "step": 59220 + }, + { + "epoch": 2.5, + "learning_rate": 0.0005509805168648901, + "loss": 0.5373, + "step": 59230 + }, + { + "epoch": 2.51, + "learning_rate": 0.0005509381161355975, + "loss": 0.5941, + "step": 59240 + }, + { + "epoch": 2.51, + "learning_rate": 0.000550895715406305, + "loss": 0.5494, + "step": 59250 + }, + { + "epoch": 2.51, + "learning_rate": 0.0005508533146770125, + "loss": 0.5411, + "step": 59260 + }, + { + "epoch": 2.51, + "learning_rate": 0.0005508109139477199, + "loss": 0.5132, + "step": 59270 + }, + { + "epoch": 2.51, + "learning_rate": 0.0005507685132184274, + "loss": 0.6374, + "step": 59280 + }, + { + "epoch": 2.51, + "learning_rate": 0.0005507261124891348, + "loss": 0.592, + "step": 59290 + }, + { + "epoch": 2.51, + "learning_rate": 0.0005506837117598423, + "loss": 0.5795, + "step": 59300 + }, + { + "epoch": 2.51, + "learning_rate": 0.0005506413110305498, + "loss": 0.5276, + "step": 59310 + }, + { + "epoch": 2.51, + "learning_rate": 0.0005505989103012571, + "loss": 0.5402, + "step": 59320 + }, + { + "epoch": 2.51, + "learning_rate": 0.0005505565095719646, + "loss": 0.5217, + "step": 59330 + }, + { + "epoch": 2.51, + "learning_rate": 0.0005505141088426722, + "loss": 0.667, + "step": 59340 + }, + { + "epoch": 2.51, + "learning_rate": 0.0005504717081133795, + "loss": 0.6444, + "step": 59350 + }, + { + "epoch": 2.51, + "learning_rate": 0.000550429307384087, + "loss": 0.5273, + "step": 59360 + }, + { + "epoch": 2.51, + "learning_rate": 0.0005503869066547945, + "loss": 0.5754, + "step": 59370 + }, + { + "epoch": 2.51, + "learning_rate": 0.0005503445059255019, + "loss": 0.5935, + "step": 59380 + }, + { + "epoch": 2.51, + "learning_rate": 0.0005503021051962094, + "loss": 0.5677, + "step": 59390 + }, + { + "epoch": 2.51, + "learning_rate": 0.0005502597044669169, + "loss": 0.5145, + "step": 59400 + }, + { + "epoch": 2.51, + "learning_rate": 0.0005502173037376242, + "loss": 0.5718, + "step": 59410 + }, + { + "epoch": 2.51, + "learning_rate": 0.0005501749030083318, + "loss": 0.4751, + "step": 59420 + }, + { + "epoch": 2.51, + "learning_rate": 0.0005501325022790393, + "loss": 0.5026, + "step": 59430 + }, + { + "epoch": 2.51, + "learning_rate": 0.0005500901015497466, + "loss": 0.4761, + "step": 59440 + }, + { + "epoch": 2.51, + "learning_rate": 0.0005500477008204541, + "loss": 0.4975, + "step": 59450 + }, + { + "epoch": 2.51, + "learning_rate": 0.0005500053000911617, + "loss": 0.5866, + "step": 59460 + }, + { + "epoch": 2.51, + "learning_rate": 0.000549962899361869, + "loss": 0.4963, + "step": 59470 + }, + { + "epoch": 2.52, + "learning_rate": 0.0005499204986325765, + "loss": 0.534, + "step": 59480 + }, + { + "epoch": 2.52, + "learning_rate": 0.000549878097903284, + "loss": 0.579, + "step": 59490 + }, + { + "epoch": 2.52, + "learning_rate": 0.0005498356971739914, + "loss": 0.5819, + "step": 59500 + }, + { + "epoch": 2.52, + "learning_rate": 0.0005497932964446989, + "loss": 0.5844, + "step": 59510 + }, + { + "epoch": 2.52, + "learning_rate": 0.0005497508957154064, + "loss": 0.5217, + "step": 59520 + }, + { + "epoch": 2.52, + "learning_rate": 0.0005497084949861137, + "loss": 0.5529, + "step": 59530 + }, + { + "epoch": 2.52, + "learning_rate": 0.0005496660942568213, + "loss": 0.6351, + "step": 59540 + }, + { + "epoch": 2.52, + "learning_rate": 0.0005496236935275288, + "loss": 0.4911, + "step": 59550 + }, + { + "epoch": 2.52, + "learning_rate": 0.0005495812927982361, + "loss": 0.5049, + "step": 59560 + }, + { + "epoch": 2.52, + "learning_rate": 0.0005495388920689436, + "loss": 0.5857, + "step": 59570 + }, + { + "epoch": 2.52, + "learning_rate": 0.0005494964913396512, + "loss": 0.5859, + "step": 59580 + }, + { + "epoch": 2.52, + "learning_rate": 0.0005494540906103585, + "loss": 0.5133, + "step": 59590 + }, + { + "epoch": 2.52, + "learning_rate": 0.000549411689881066, + "loss": 0.556, + "step": 59600 + }, + { + "epoch": 2.52, + "learning_rate": 0.0005493692891517735, + "loss": 0.5246, + "step": 59610 + }, + { + "epoch": 2.52, + "learning_rate": 0.0005493268884224809, + "loss": 0.6271, + "step": 59620 + }, + { + "epoch": 2.52, + "learning_rate": 0.0005492844876931884, + "loss": 0.5647, + "step": 59630 + }, + { + "epoch": 2.52, + "learning_rate": 0.0005492420869638957, + "loss": 0.6235, + "step": 59640 + }, + { + "epoch": 2.52, + "learning_rate": 0.0005491996862346032, + "loss": 0.5607, + "step": 59650 + }, + { + "epoch": 2.52, + "learning_rate": 0.0005491572855053108, + "loss": 0.5519, + "step": 59660 + }, + { + "epoch": 2.52, + "learning_rate": 0.0005491148847760181, + "loss": 0.4775, + "step": 59670 + }, + { + "epoch": 2.52, + "learning_rate": 0.0005490724840467256, + "loss": 0.531, + "step": 59680 + }, + { + "epoch": 2.52, + "learning_rate": 0.0005490300833174331, + "loss": 0.5525, + "step": 59690 + }, + { + "epoch": 2.52, + "learning_rate": 0.0005489876825881405, + "loss": 0.4457, + "step": 59700 + }, + { + "epoch": 2.53, + "learning_rate": 0.000548945281858848, + "loss": 0.5814, + "step": 59710 + }, + { + "epoch": 2.53, + "learning_rate": 0.0005489028811295555, + "loss": 0.6167, + "step": 59720 + }, + { + "epoch": 2.53, + "learning_rate": 0.0005488604804002628, + "loss": 0.5607, + "step": 59730 + }, + { + "epoch": 2.53, + "learning_rate": 0.0005488180796709704, + "loss": 0.5838, + "step": 59740 + }, + { + "epoch": 2.53, + "learning_rate": 0.0005487756789416779, + "loss": 0.567, + "step": 59750 + }, + { + "epoch": 2.53, + "learning_rate": 0.0005487332782123852, + "loss": 0.5789, + "step": 59760 + }, + { + "epoch": 2.53, + "learning_rate": 0.0005486908774830927, + "loss": 0.5177, + "step": 59770 + }, + { + "epoch": 2.53, + "learning_rate": 0.0005486484767538002, + "loss": 0.6863, + "step": 59780 + }, + { + "epoch": 2.53, + "learning_rate": 0.0005486060760245076, + "loss": 0.5987, + "step": 59790 + }, + { + "epoch": 2.53, + "learning_rate": 0.0005485636752952151, + "loss": 0.6308, + "step": 59800 + }, + { + "epoch": 2.53, + "learning_rate": 0.0005485212745659226, + "loss": 0.484, + "step": 59810 + }, + { + "epoch": 2.53, + "learning_rate": 0.00054847887383663, + "loss": 0.495, + "step": 59820 + }, + { + "epoch": 2.53, + "learning_rate": 0.0005484364731073375, + "loss": 0.5858, + "step": 59830 + }, + { + "epoch": 2.53, + "learning_rate": 0.000548394072378045, + "loss": 0.5158, + "step": 59840 + }, + { + "epoch": 2.53, + "learning_rate": 0.0005483516716487523, + "loss": 0.5626, + "step": 59850 + }, + { + "epoch": 2.53, + "learning_rate": 0.0005483092709194598, + "loss": 0.6279, + "step": 59860 + }, + { + "epoch": 2.53, + "learning_rate": 0.0005482668701901674, + "loss": 0.5963, + "step": 59870 + }, + { + "epoch": 2.53, + "learning_rate": 0.0005482244694608747, + "loss": 0.722, + "step": 59880 + }, + { + "epoch": 2.53, + "learning_rate": 0.0005481820687315822, + "loss": 0.5392, + "step": 59890 + }, + { + "epoch": 2.53, + "learning_rate": 0.0005481396680022897, + "loss": 0.5556, + "step": 59900 + }, + { + "epoch": 2.53, + "learning_rate": 0.0005480972672729971, + "loss": 0.5845, + "step": 59910 + }, + { + "epoch": 2.53, + "learning_rate": 0.0005480548665437046, + "loss": 0.6377, + "step": 59920 + }, + { + "epoch": 2.53, + "learning_rate": 0.000548012465814412, + "loss": 0.6205, + "step": 59930 + }, + { + "epoch": 2.53, + "learning_rate": 0.0005479700650851195, + "loss": 0.4524, + "step": 59940 + }, + { + "epoch": 2.54, + "learning_rate": 0.000547927664355827, + "loss": 0.4696, + "step": 59950 + }, + { + "epoch": 2.54, + "learning_rate": 0.0005478852636265344, + "loss": 0.5068, + "step": 59960 + }, + { + "epoch": 2.54, + "learning_rate": 0.0005478428628972418, + "loss": 0.5286, + "step": 59970 + }, + { + "epoch": 2.54, + "learning_rate": 0.0005478004621679493, + "loss": 0.5013, + "step": 59980 + }, + { + "epoch": 2.54, + "learning_rate": 0.0005477580614386568, + "loss": 0.576, + "step": 59990 + }, + { + "epoch": 2.54, + "learning_rate": 0.0005477156607093642, + "loss": 0.597, + "step": 60000 + }, + { + "epoch": 2.54, + "eval_loss": 0.614812970161438, + "eval_runtime": 337.7266, + "eval_samples_per_second": 15.56, + "eval_steps_per_second": 3.891, + "step": 60000 + }, + { + "epoch": 2.54, + "learning_rate": 0.0005476732599800717, + "loss": 0.5573, + "step": 60010 + }, + { + "epoch": 2.54, + "learning_rate": 0.0005476308592507792, + "loss": 0.5024, + "step": 60020 + }, + { + "epoch": 2.54, + "learning_rate": 0.0005475884585214866, + "loss": 0.5397, + "step": 60030 + }, + { + "epoch": 2.54, + "learning_rate": 0.0005475460577921941, + "loss": 0.5716, + "step": 60040 + }, + { + "epoch": 2.54, + "learning_rate": 0.0005475036570629014, + "loss": 0.5294, + "step": 60050 + }, + { + "epoch": 2.54, + "learning_rate": 0.0005474612563336089, + "loss": 0.5466, + "step": 60060 + }, + { + "epoch": 2.54, + "learning_rate": 0.0005474188556043165, + "loss": 0.5732, + "step": 60070 + }, + { + "epoch": 2.54, + "learning_rate": 0.0005473764548750238, + "loss": 0.578, + "step": 60080 + }, + { + "epoch": 2.54, + "learning_rate": 0.0005473340541457313, + "loss": 0.515, + "step": 60090 + }, + { + "epoch": 2.54, + "learning_rate": 0.0005472916534164388, + "loss": 0.5342, + "step": 60100 + }, + { + "epoch": 2.54, + "learning_rate": 0.0005472492526871462, + "loss": 0.5687, + "step": 60110 + }, + { + "epoch": 2.54, + "learning_rate": 0.0005472068519578537, + "loss": 0.5727, + "step": 60120 + }, + { + "epoch": 2.54, + "learning_rate": 0.0005471644512285612, + "loss": 0.5957, + "step": 60130 + }, + { + "epoch": 2.54, + "learning_rate": 0.0005471220504992685, + "loss": 0.5407, + "step": 60140 + }, + { + "epoch": 2.54, + "learning_rate": 0.0005470796497699761, + "loss": 0.6212, + "step": 60150 + }, + { + "epoch": 2.54, + "learning_rate": 0.0005470372490406836, + "loss": 0.4535, + "step": 60160 + }, + { + "epoch": 2.54, + "learning_rate": 0.0005469948483113909, + "loss": 0.5337, + "step": 60170 + }, + { + "epoch": 2.54, + "learning_rate": 0.0005469524475820984, + "loss": 0.5265, + "step": 60180 + }, + { + "epoch": 2.55, + "learning_rate": 0.000546910046852806, + "loss": 0.5844, + "step": 60190 + }, + { + "epoch": 2.55, + "learning_rate": 0.0005468676461235133, + "loss": 0.4787, + "step": 60200 + }, + { + "epoch": 2.55, + "learning_rate": 0.0005468252453942208, + "loss": 0.5991, + "step": 60210 + }, + { + "epoch": 2.55, + "learning_rate": 0.0005467828446649283, + "loss": 0.5654, + "step": 60220 + }, + { + "epoch": 2.55, + "learning_rate": 0.0005467404439356357, + "loss": 0.526, + "step": 60230 + }, + { + "epoch": 2.55, + "learning_rate": 0.0005466980432063432, + "loss": 0.5136, + "step": 60240 + }, + { + "epoch": 2.55, + "learning_rate": 0.0005466556424770506, + "loss": 0.5956, + "step": 60250 + }, + { + "epoch": 2.55, + "learning_rate": 0.000546613241747758, + "loss": 0.5169, + "step": 60260 + }, + { + "epoch": 2.55, + "learning_rate": 0.0005465708410184656, + "loss": 0.5977, + "step": 60270 + }, + { + "epoch": 2.55, + "learning_rate": 0.000546528440289173, + "loss": 0.5924, + "step": 60280 + }, + { + "epoch": 2.55, + "learning_rate": 0.0005464860395598804, + "loss": 0.5212, + "step": 60290 + }, + { + "epoch": 2.55, + "learning_rate": 0.0005464436388305879, + "loss": 0.5982, + "step": 60300 + }, + { + "epoch": 2.55, + "learning_rate": 0.0005464012381012954, + "loss": 0.6596, + "step": 60310 + }, + { + "epoch": 2.55, + "learning_rate": 0.0005463588373720028, + "loss": 0.4693, + "step": 60320 + }, + { + "epoch": 2.55, + "learning_rate": 0.0005463164366427103, + "loss": 0.4414, + "step": 60330 + }, + { + "epoch": 2.55, + "learning_rate": 0.0005462740359134177, + "loss": 0.5957, + "step": 60340 + }, + { + "epoch": 2.55, + "learning_rate": 0.0005462316351841252, + "loss": 0.6169, + "step": 60350 + }, + { + "epoch": 2.55, + "learning_rate": 0.0005461892344548327, + "loss": 0.49, + "step": 60360 + }, + { + "epoch": 2.55, + "learning_rate": 0.0005461468337255401, + "loss": 0.5926, + "step": 60370 + }, + { + "epoch": 2.55, + "learning_rate": 0.0005461044329962475, + "loss": 0.5905, + "step": 60380 + }, + { + "epoch": 2.55, + "learning_rate": 0.000546062032266955, + "loss": 0.4116, + "step": 60390 + }, + { + "epoch": 2.55, + "learning_rate": 0.0005460196315376625, + "loss": 0.5826, + "step": 60400 + }, + { + "epoch": 2.55, + "learning_rate": 0.0005459772308083699, + "loss": 0.5376, + "step": 60410 + }, + { + "epoch": 2.56, + "learning_rate": 0.0005459348300790774, + "loss": 0.4114, + "step": 60420 + }, + { + "epoch": 2.56, + "learning_rate": 0.0005458924293497849, + "loss": 0.5173, + "step": 60430 + }, + { + "epoch": 2.56, + "learning_rate": 0.0005458500286204923, + "loss": 0.504, + "step": 60440 + }, + { + "epoch": 2.56, + "learning_rate": 0.0005458076278911998, + "loss": 0.4993, + "step": 60450 + }, + { + "epoch": 2.56, + "learning_rate": 0.0005457652271619072, + "loss": 0.5309, + "step": 60460 + }, + { + "epoch": 2.56, + "learning_rate": 0.0005457228264326147, + "loss": 0.4482, + "step": 60470 + }, + { + "epoch": 2.56, + "learning_rate": 0.0005456804257033222, + "loss": 0.5491, + "step": 60480 + }, + { + "epoch": 2.56, + "learning_rate": 0.0005456380249740296, + "loss": 0.5161, + "step": 60490 + }, + { + "epoch": 2.56, + "learning_rate": 0.000545595624244737, + "loss": 0.5631, + "step": 60500 + }, + { + "epoch": 2.56, + "learning_rate": 0.0005455532235154445, + "loss": 0.5399, + "step": 60510 + }, + { + "epoch": 2.56, + "learning_rate": 0.000545510822786152, + "loss": 0.5324, + "step": 60520 + }, + { + "epoch": 2.56, + "learning_rate": 0.0005454684220568594, + "loss": 0.6212, + "step": 60530 + }, + { + "epoch": 2.56, + "learning_rate": 0.0005454260213275668, + "loss": 0.4993, + "step": 60540 + }, + { + "epoch": 2.56, + "learning_rate": 0.0005453836205982744, + "loss": 0.5176, + "step": 60550 + }, + { + "epoch": 2.56, + "learning_rate": 0.0005453412198689818, + "loss": 0.6088, + "step": 60560 + }, + { + "epoch": 2.56, + "learning_rate": 0.0005452988191396892, + "loss": 0.5554, + "step": 60570 + }, + { + "epoch": 2.56, + "learning_rate": 0.0005452564184103966, + "loss": 0.5203, + "step": 60580 + }, + { + "epoch": 2.56, + "learning_rate": 0.0005452140176811041, + "loss": 0.5476, + "step": 60590 + }, + { + "epoch": 2.56, + "learning_rate": 0.0005451716169518116, + "loss": 0.5418, + "step": 60600 + }, + { + "epoch": 2.56, + "learning_rate": 0.000545129216222519, + "loss": 0.509, + "step": 60610 + }, + { + "epoch": 2.56, + "learning_rate": 0.0005450868154932265, + "loss": 0.5772, + "step": 60620 + }, + { + "epoch": 2.56, + "learning_rate": 0.000545044414763934, + "loss": 0.5575, + "step": 60630 + }, + { + "epoch": 2.56, + "learning_rate": 0.0005450020140346414, + "loss": 0.5033, + "step": 60640 + }, + { + "epoch": 2.56, + "learning_rate": 0.0005449596133053489, + "loss": 0.6112, + "step": 60650 + }, + { + "epoch": 2.57, + "learning_rate": 0.0005449172125760563, + "loss": 0.5542, + "step": 60660 + }, + { + "epoch": 2.57, + "learning_rate": 0.0005448748118467637, + "loss": 0.5451, + "step": 60670 + }, + { + "epoch": 2.57, + "learning_rate": 0.0005448324111174713, + "loss": 0.4488, + "step": 60680 + }, + { + "epoch": 2.57, + "learning_rate": 0.0005447900103881787, + "loss": 0.5016, + "step": 60690 + }, + { + "epoch": 2.57, + "learning_rate": 0.0005447476096588861, + "loss": 0.4687, + "step": 60700 + }, + { + "epoch": 2.57, + "learning_rate": 0.0005447052089295936, + "loss": 0.5622, + "step": 60710 + }, + { + "epoch": 2.57, + "learning_rate": 0.0005446628082003011, + "loss": 0.514, + "step": 60720 + }, + { + "epoch": 2.57, + "learning_rate": 0.0005446204074710085, + "loss": 0.6491, + "step": 60730 + }, + { + "epoch": 2.57, + "learning_rate": 0.000544578006741716, + "loss": 0.6118, + "step": 60740 + }, + { + "epoch": 2.57, + "learning_rate": 0.0005445356060124234, + "loss": 0.5092, + "step": 60750 + }, + { + "epoch": 2.57, + "learning_rate": 0.0005444932052831309, + "loss": 0.5162, + "step": 60760 + }, + { + "epoch": 2.57, + "learning_rate": 0.0005444508045538384, + "loss": 0.4835, + "step": 60770 + }, + { + "epoch": 2.57, + "learning_rate": 0.0005444084038245458, + "loss": 0.5225, + "step": 60780 + }, + { + "epoch": 2.57, + "learning_rate": 0.0005443660030952532, + "loss": 0.5072, + "step": 60790 + }, + { + "epoch": 2.57, + "learning_rate": 0.0005443236023659608, + "loss": 0.5549, + "step": 60800 + }, + { + "epoch": 2.57, + "learning_rate": 0.0005442812016366682, + "loss": 0.5776, + "step": 60810 + }, + { + "epoch": 2.57, + "learning_rate": 0.0005442388009073756, + "loss": 0.5537, + "step": 60820 + }, + { + "epoch": 2.57, + "learning_rate": 0.0005441964001780831, + "loss": 0.4869, + "step": 60830 + }, + { + "epoch": 2.57, + "learning_rate": 0.0005441539994487906, + "loss": 0.6895, + "step": 60840 + }, + { + "epoch": 2.57, + "learning_rate": 0.000544111598719498, + "loss": 0.5598, + "step": 60850 + }, + { + "epoch": 2.57, + "learning_rate": 0.0005440691979902054, + "loss": 0.4915, + "step": 60860 + }, + { + "epoch": 2.57, + "learning_rate": 0.0005440267972609129, + "loss": 0.505, + "step": 60870 + }, + { + "epoch": 2.57, + "learning_rate": 0.0005439843965316204, + "loss": 0.6275, + "step": 60880 + }, + { + "epoch": 2.57, + "learning_rate": 0.0005439419958023278, + "loss": 0.4553, + "step": 60890 + }, + { + "epoch": 2.58, + "learning_rate": 0.0005438995950730353, + "loss": 0.5269, + "step": 60900 + }, + { + "epoch": 2.58, + "learning_rate": 0.0005438571943437427, + "loss": 0.6661, + "step": 60910 + }, + { + "epoch": 2.58, + "learning_rate": 0.0005438147936144502, + "loss": 0.5319, + "step": 60920 + }, + { + "epoch": 2.58, + "learning_rate": 0.0005437723928851577, + "loss": 0.5353, + "step": 60930 + }, + { + "epoch": 2.58, + "learning_rate": 0.0005437299921558651, + "loss": 0.5864, + "step": 60940 + }, + { + "epoch": 2.58, + "learning_rate": 0.0005436875914265725, + "loss": 0.678, + "step": 60950 + }, + { + "epoch": 2.58, + "learning_rate": 0.0005436451906972801, + "loss": 0.4873, + "step": 60960 + }, + { + "epoch": 2.58, + "learning_rate": 0.0005436027899679875, + "loss": 0.5471, + "step": 60970 + }, + { + "epoch": 2.58, + "learning_rate": 0.0005435603892386949, + "loss": 0.5628, + "step": 60980 + }, + { + "epoch": 2.58, + "learning_rate": 0.0005435179885094024, + "loss": 0.5451, + "step": 60990 + }, + { + "epoch": 2.58, + "learning_rate": 0.0005434755877801099, + "loss": 0.5602, + "step": 61000 + }, + { + "epoch": 2.58, + "learning_rate": 0.0005434331870508173, + "loss": 0.5452, + "step": 61010 + }, + { + "epoch": 2.58, + "learning_rate": 0.0005433907863215248, + "loss": 0.4742, + "step": 61020 + }, + { + "epoch": 2.58, + "learning_rate": 0.0005433483855922322, + "loss": 0.5716, + "step": 61030 + }, + { + "epoch": 2.58, + "learning_rate": 0.0005433059848629397, + "loss": 0.5707, + "step": 61040 + }, + { + "epoch": 2.58, + "learning_rate": 0.0005432635841336472, + "loss": 0.468, + "step": 61050 + }, + { + "epoch": 2.58, + "learning_rate": 0.0005432211834043546, + "loss": 0.5106, + "step": 61060 + }, + { + "epoch": 2.58, + "learning_rate": 0.000543178782675062, + "loss": 0.5111, + "step": 61070 + }, + { + "epoch": 2.58, + "learning_rate": 0.0005431363819457696, + "loss": 0.4967, + "step": 61080 + }, + { + "epoch": 2.58, + "learning_rate": 0.000543093981216477, + "loss": 0.4863, + "step": 61090 + }, + { + "epoch": 2.58, + "learning_rate": 0.0005430515804871844, + "loss": 0.6062, + "step": 61100 + }, + { + "epoch": 2.58, + "learning_rate": 0.0005430091797578918, + "loss": 0.5237, + "step": 61110 + }, + { + "epoch": 2.58, + "learning_rate": 0.0005429667790285993, + "loss": 0.4689, + "step": 61120 + }, + { + "epoch": 2.59, + "learning_rate": 0.0005429243782993068, + "loss": 0.5734, + "step": 61130 + }, + { + "epoch": 2.59, + "learning_rate": 0.0005428819775700142, + "loss": 0.4921, + "step": 61140 + }, + { + "epoch": 2.59, + "learning_rate": 0.0005428395768407217, + "loss": 0.5534, + "step": 61150 + }, + { + "epoch": 2.59, + "learning_rate": 0.0005427971761114292, + "loss": 0.6353, + "step": 61160 + }, + { + "epoch": 2.59, + "learning_rate": 0.0005427547753821366, + "loss": 0.645, + "step": 61170 + }, + { + "epoch": 2.59, + "learning_rate": 0.000542712374652844, + "loss": 0.6017, + "step": 61180 + }, + { + "epoch": 2.59, + "learning_rate": 0.0005426699739235515, + "loss": 0.5586, + "step": 61190 + }, + { + "epoch": 2.59, + "learning_rate": 0.0005426275731942589, + "loss": 0.5983, + "step": 61200 + }, + { + "epoch": 2.59, + "learning_rate": 0.0005425851724649664, + "loss": 0.5314, + "step": 61210 + }, + { + "epoch": 2.59, + "learning_rate": 0.0005425427717356739, + "loss": 0.5566, + "step": 61220 + }, + { + "epoch": 2.59, + "learning_rate": 0.0005425003710063813, + "loss": 0.5726, + "step": 61230 + }, + { + "epoch": 2.59, + "learning_rate": 0.0005424579702770888, + "loss": 0.5729, + "step": 61240 + }, + { + "epoch": 2.59, + "learning_rate": 0.0005424155695477963, + "loss": 0.6439, + "step": 61250 + }, + { + "epoch": 2.59, + "learning_rate": 0.0005423731688185037, + "loss": 0.4948, + "step": 61260 + }, + { + "epoch": 2.59, + "learning_rate": 0.0005423307680892111, + "loss": 0.6874, + "step": 61270 + }, + { + "epoch": 2.59, + "learning_rate": 0.0005422883673599186, + "loss": 0.5299, + "step": 61280 + }, + { + "epoch": 2.59, + "learning_rate": 0.0005422459666306261, + "loss": 0.4886, + "step": 61290 + }, + { + "epoch": 2.59, + "learning_rate": 0.0005422035659013335, + "loss": 0.6245, + "step": 61300 + }, + { + "epoch": 2.59, + "learning_rate": 0.000542161165172041, + "loss": 0.5564, + "step": 61310 + }, + { + "epoch": 2.59, + "learning_rate": 0.0005421187644427484, + "loss": 0.6703, + "step": 61320 + }, + { + "epoch": 2.59, + "learning_rate": 0.0005420763637134559, + "loss": 0.6186, + "step": 61330 + }, + { + "epoch": 2.59, + "learning_rate": 0.0005420339629841634, + "loss": 0.6104, + "step": 61340 + }, + { + "epoch": 2.59, + "learning_rate": 0.0005419915622548708, + "loss": 0.5586, + "step": 61350 + }, + { + "epoch": 2.59, + "learning_rate": 0.0005419491615255782, + "loss": 0.5316, + "step": 61360 + }, + { + "epoch": 2.6, + "learning_rate": 0.0005419067607962858, + "loss": 0.4221, + "step": 61370 + }, + { + "epoch": 2.6, + "learning_rate": 0.0005418643600669932, + "loss": 0.5421, + "step": 61380 + }, + { + "epoch": 2.6, + "learning_rate": 0.0005418219593377006, + "loss": 0.5428, + "step": 61390 + }, + { + "epoch": 2.6, + "learning_rate": 0.0005417795586084081, + "loss": 0.5993, + "step": 61400 + }, + { + "epoch": 2.6, + "learning_rate": 0.0005417371578791156, + "loss": 0.5323, + "step": 61410 + }, + { + "epoch": 2.6, + "learning_rate": 0.000541694757149823, + "loss": 0.5041, + "step": 61420 + }, + { + "epoch": 2.6, + "learning_rate": 0.0005416523564205305, + "loss": 0.5253, + "step": 61430 + }, + { + "epoch": 2.6, + "learning_rate": 0.0005416099556912379, + "loss": 0.5677, + "step": 61440 + }, + { + "epoch": 2.6, + "learning_rate": 0.0005415675549619454, + "loss": 0.641, + "step": 61450 + }, + { + "epoch": 2.6, + "learning_rate": 0.0005415251542326529, + "loss": 0.6027, + "step": 61460 + }, + { + "epoch": 2.6, + "learning_rate": 0.0005414827535033602, + "loss": 0.5493, + "step": 61470 + }, + { + "epoch": 2.6, + "learning_rate": 0.0005414403527740677, + "loss": 0.4953, + "step": 61480 + }, + { + "epoch": 2.6, + "learning_rate": 0.0005413979520447753, + "loss": 0.5974, + "step": 61490 + }, + { + "epoch": 2.6, + "learning_rate": 0.0005413555513154826, + "loss": 0.5783, + "step": 61500 + }, + { + "epoch": 2.6, + "learning_rate": 0.0005413131505861901, + "loss": 0.558, + "step": 61510 + }, + { + "epoch": 2.6, + "learning_rate": 0.0005412707498568975, + "loss": 0.5972, + "step": 61520 + }, + { + "epoch": 2.6, + "learning_rate": 0.000541228349127605, + "loss": 0.5828, + "step": 61530 + }, + { + "epoch": 2.6, + "learning_rate": 0.0005411859483983125, + "loss": 0.5844, + "step": 61540 + }, + { + "epoch": 2.6, + "learning_rate": 0.00054114354766902, + "loss": 0.5492, + "step": 61550 + }, + { + "epoch": 2.6, + "learning_rate": 0.0005411011469397273, + "loss": 0.5726, + "step": 61560 + }, + { + "epoch": 2.6, + "learning_rate": 0.0005410587462104349, + "loss": 0.6693, + "step": 61570 + }, + { + "epoch": 2.6, + "learning_rate": 0.0005410163454811423, + "loss": 0.5503, + "step": 61580 + }, + { + "epoch": 2.6, + "learning_rate": 0.0005409739447518497, + "loss": 0.5151, + "step": 61590 + }, + { + "epoch": 2.6, + "learning_rate": 0.0005409315440225572, + "loss": 0.5346, + "step": 61600 + }, + { + "epoch": 2.61, + "learning_rate": 0.0005408891432932648, + "loss": 0.4459, + "step": 61610 + }, + { + "epoch": 2.61, + "learning_rate": 0.0005408467425639721, + "loss": 0.4908, + "step": 61620 + }, + { + "epoch": 2.61, + "learning_rate": 0.0005408043418346796, + "loss": 0.5772, + "step": 61630 + }, + { + "epoch": 2.61, + "learning_rate": 0.000540761941105387, + "loss": 0.5853, + "step": 61640 + }, + { + "epoch": 2.61, + "learning_rate": 0.0005407195403760945, + "loss": 0.5015, + "step": 61650 + }, + { + "epoch": 2.61, + "learning_rate": 0.000540677139646802, + "loss": 0.5526, + "step": 61660 + }, + { + "epoch": 2.61, + "learning_rate": 0.0005406347389175094, + "loss": 0.5633, + "step": 61670 + }, + { + "epoch": 2.61, + "learning_rate": 0.0005405923381882168, + "loss": 0.5037, + "step": 61680 + }, + { + "epoch": 2.61, + "learning_rate": 0.0005405499374589244, + "loss": 0.5331, + "step": 61690 + }, + { + "epoch": 2.61, + "learning_rate": 0.0005405075367296318, + "loss": 0.5116, + "step": 61700 + }, + { + "epoch": 2.61, + "learning_rate": 0.0005404651360003392, + "loss": 0.5214, + "step": 61710 + }, + { + "epoch": 2.61, + "learning_rate": 0.0005404227352710467, + "loss": 0.5925, + "step": 61720 + }, + { + "epoch": 2.61, + "learning_rate": 0.0005403803345417541, + "loss": 0.5107, + "step": 61730 + }, + { + "epoch": 2.61, + "learning_rate": 0.0005403379338124616, + "loss": 0.6, + "step": 61740 + }, + { + "epoch": 2.61, + "learning_rate": 0.0005402955330831691, + "loss": 0.5692, + "step": 61750 + }, + { + "epoch": 2.61, + "learning_rate": 0.0005402531323538765, + "loss": 0.6205, + "step": 61760 + }, + { + "epoch": 2.61, + "learning_rate": 0.000540210731624584, + "loss": 0.551, + "step": 61770 + }, + { + "epoch": 2.61, + "learning_rate": 0.0005401683308952915, + "loss": 0.6074, + "step": 61780 + }, + { + "epoch": 2.61, + "learning_rate": 0.0005401259301659988, + "loss": 0.5455, + "step": 61790 + }, + { + "epoch": 2.61, + "learning_rate": 0.0005400835294367063, + "loss": 0.5138, + "step": 61800 + }, + { + "epoch": 2.61, + "learning_rate": 0.0005400411287074138, + "loss": 0.498, + "step": 61810 + }, + { + "epoch": 2.61, + "learning_rate": 0.0005399987279781212, + "loss": 0.6401, + "step": 61820 + }, + { + "epoch": 2.61, + "learning_rate": 0.0005399563272488287, + "loss": 0.5126, + "step": 61830 + }, + { + "epoch": 2.62, + "learning_rate": 0.0005399139265195362, + "loss": 0.5412, + "step": 61840 + }, + { + "epoch": 2.62, + "learning_rate": 0.0005398715257902436, + "loss": 0.5083, + "step": 61850 + }, + { + "epoch": 2.62, + "learning_rate": 0.0005398291250609511, + "loss": 0.5974, + "step": 61860 + }, + { + "epoch": 2.62, + "learning_rate": 0.0005397867243316586, + "loss": 0.628, + "step": 61870 + }, + { + "epoch": 2.62, + "learning_rate": 0.0005397443236023659, + "loss": 0.5436, + "step": 61880 + }, + { + "epoch": 2.62, + "learning_rate": 0.0005397019228730734, + "loss": 0.5576, + "step": 61890 + }, + { + "epoch": 2.62, + "learning_rate": 0.000539659522143781, + "loss": 0.5335, + "step": 61900 + }, + { + "epoch": 2.62, + "learning_rate": 0.0005396171214144883, + "loss": 0.6635, + "step": 61910 + }, + { + "epoch": 2.62, + "learning_rate": 0.0005395747206851958, + "loss": 0.5945, + "step": 61920 + }, + { + "epoch": 2.62, + "learning_rate": 0.0005395323199559033, + "loss": 0.5894, + "step": 61930 + }, + { + "epoch": 2.62, + "learning_rate": 0.0005394899192266107, + "loss": 0.5478, + "step": 61940 + }, + { + "epoch": 2.62, + "learning_rate": 0.0005394475184973182, + "loss": 0.5898, + "step": 61950 + }, + { + "epoch": 2.62, + "learning_rate": 0.0005394051177680257, + "loss": 0.4998, + "step": 61960 + }, + { + "epoch": 2.62, + "learning_rate": 0.0005393627170387331, + "loss": 0.5604, + "step": 61970 + }, + { + "epoch": 2.62, + "learning_rate": 0.0005393203163094406, + "loss": 0.5079, + "step": 61980 + }, + { + "epoch": 2.62, + "learning_rate": 0.0005392779155801481, + "loss": 0.624, + "step": 61990 + }, + { + "epoch": 2.62, + "learning_rate": 0.0005392355148508554, + "loss": 0.6419, + "step": 62000 + }, + { + "epoch": 2.62, + "learning_rate": 0.0005391931141215629, + "loss": 0.5636, + "step": 62010 + }, + { + "epoch": 2.62, + "learning_rate": 0.0005391507133922705, + "loss": 0.5604, + "step": 62020 + }, + { + "epoch": 2.62, + "learning_rate": 0.0005391083126629778, + "loss": 0.4566, + "step": 62030 + }, + { + "epoch": 2.62, + "learning_rate": 0.0005390659119336853, + "loss": 0.6616, + "step": 62040 + }, + { + "epoch": 2.62, + "learning_rate": 0.0005390235112043927, + "loss": 0.5795, + "step": 62050 + }, + { + "epoch": 2.62, + "learning_rate": 0.0005389811104751002, + "loss": 0.6493, + "step": 62060 + }, + { + "epoch": 2.62, + "learning_rate": 0.0005389387097458077, + "loss": 0.4821, + "step": 62070 + }, + { + "epoch": 2.63, + "learning_rate": 0.000538896309016515, + "loss": 0.5762, + "step": 62080 + }, + { + "epoch": 2.63, + "learning_rate": 0.0005388539082872225, + "loss": 0.5914, + "step": 62090 + }, + { + "epoch": 2.63, + "learning_rate": 0.0005388115075579301, + "loss": 0.4733, + "step": 62100 + }, + { + "epoch": 2.63, + "learning_rate": 0.0005387691068286374, + "loss": 0.5435, + "step": 62110 + }, + { + "epoch": 2.63, + "learning_rate": 0.0005387267060993449, + "loss": 0.5099, + "step": 62120 + }, + { + "epoch": 2.63, + "learning_rate": 0.0005386843053700524, + "loss": 0.514, + "step": 62130 + }, + { + "epoch": 2.63, + "learning_rate": 0.0005386419046407598, + "loss": 0.6153, + "step": 62140 + }, + { + "epoch": 2.63, + "learning_rate": 0.0005385995039114673, + "loss": 0.557, + "step": 62150 + }, + { + "epoch": 2.63, + "learning_rate": 0.0005385571031821748, + "loss": 0.5452, + "step": 62160 + }, + { + "epoch": 2.63, + "learning_rate": 0.0005385147024528821, + "loss": 0.6296, + "step": 62170 + }, + { + "epoch": 2.63, + "learning_rate": 0.0005384723017235897, + "loss": 0.5771, + "step": 62180 + }, + { + "epoch": 2.63, + "learning_rate": 0.0005384299009942972, + "loss": 0.522, + "step": 62190 + }, + { + "epoch": 2.63, + "learning_rate": 0.0005383875002650045, + "loss": 0.5393, + "step": 62200 + }, + { + "epoch": 2.63, + "learning_rate": 0.000538345099535712, + "loss": 0.6076, + "step": 62210 + }, + { + "epoch": 2.63, + "learning_rate": 0.0005383026988064196, + "loss": 0.6673, + "step": 62220 + }, + { + "epoch": 2.63, + "learning_rate": 0.0005382602980771269, + "loss": 0.6234, + "step": 62230 + }, + { + "epoch": 2.63, + "learning_rate": 0.0005382178973478344, + "loss": 0.5682, + "step": 62240 + }, + { + "epoch": 2.63, + "learning_rate": 0.0005381754966185419, + "loss": 0.585, + "step": 62250 + }, + { + "epoch": 2.63, + "learning_rate": 0.0005381330958892493, + "loss": 0.5313, + "step": 62260 + }, + { + "epoch": 2.63, + "learning_rate": 0.0005380906951599568, + "loss": 0.5163, + "step": 62270 + }, + { + "epoch": 2.63, + "learning_rate": 0.0005380482944306643, + "loss": 0.5368, + "step": 62280 + }, + { + "epoch": 2.63, + "learning_rate": 0.0005380058937013716, + "loss": 0.5773, + "step": 62290 + }, + { + "epoch": 2.63, + "learning_rate": 0.0005379634929720792, + "loss": 0.4948, + "step": 62300 + }, + { + "epoch": 2.64, + "learning_rate": 0.0005379210922427867, + "loss": 0.5364, + "step": 62310 + }, + { + "epoch": 2.64, + "learning_rate": 0.000537878691513494, + "loss": 0.5809, + "step": 62320 + }, + { + "epoch": 2.64, + "learning_rate": 0.0005378362907842015, + "loss": 0.6497, + "step": 62330 + }, + { + "epoch": 2.64, + "learning_rate": 0.000537793890054909, + "loss": 0.5266, + "step": 62340 + }, + { + "epoch": 2.64, + "learning_rate": 0.0005377514893256164, + "loss": 0.5228, + "step": 62350 + }, + { + "epoch": 2.64, + "learning_rate": 0.0005377090885963239, + "loss": 0.5735, + "step": 62360 + }, + { + "epoch": 2.64, + "learning_rate": 0.0005376666878670314, + "loss": 0.5673, + "step": 62370 + }, + { + "epoch": 2.64, + "learning_rate": 0.0005376242871377388, + "loss": 0.5818, + "step": 62380 + }, + { + "epoch": 2.64, + "learning_rate": 0.0005375818864084463, + "loss": 0.5311, + "step": 62390 + }, + { + "epoch": 2.64, + "learning_rate": 0.0005375394856791537, + "loss": 0.568, + "step": 62400 + }, + { + "epoch": 2.64, + "learning_rate": 0.0005374970849498611, + "loss": 0.5736, + "step": 62410 + }, + { + "epoch": 2.64, + "learning_rate": 0.0005374546842205686, + "loss": 0.4852, + "step": 62420 + }, + { + "epoch": 2.64, + "learning_rate": 0.000537412283491276, + "loss": 0.5895, + "step": 62430 + }, + { + "epoch": 2.64, + "learning_rate": 0.0005373698827619835, + "loss": 0.4906, + "step": 62440 + }, + { + "epoch": 2.64, + "learning_rate": 0.000537327482032691, + "loss": 0.505, + "step": 62450 + }, + { + "epoch": 2.64, + "learning_rate": 0.0005372850813033985, + "loss": 0.5143, + "step": 62460 + }, + { + "epoch": 2.64, + "learning_rate": 0.0005372426805741059, + "loss": 0.5496, + "step": 62470 + }, + { + "epoch": 2.64, + "learning_rate": 0.0005372002798448134, + "loss": 0.5921, + "step": 62480 + }, + { + "epoch": 2.64, + "learning_rate": 0.0005371578791155207, + "loss": 0.5165, + "step": 62490 + }, + { + "epoch": 2.64, + "learning_rate": 0.0005371154783862283, + "loss": 0.568, + "step": 62500 + }, + { + "epoch": 2.64, + "learning_rate": 0.0005370730776569358, + "loss": 0.613, + "step": 62510 + }, + { + "epoch": 2.64, + "learning_rate": 0.0005370306769276431, + "loss": 0.5791, + "step": 62520 + }, + { + "epoch": 2.64, + "learning_rate": 0.0005369882761983506, + "loss": 0.5979, + "step": 62530 + }, + { + "epoch": 2.64, + "learning_rate": 0.0005369458754690581, + "loss": 0.5731, + "step": 62540 + }, + { + "epoch": 2.65, + "learning_rate": 0.0005369034747397655, + "loss": 0.5789, + "step": 62550 + }, + { + "epoch": 2.65, + "learning_rate": 0.000536861074010473, + "loss": 0.5927, + "step": 62560 + }, + { + "epoch": 2.65, + "learning_rate": 0.0005368186732811805, + "loss": 0.6206, + "step": 62570 + }, + { + "epoch": 2.65, + "learning_rate": 0.0005367762725518879, + "loss": 0.4981, + "step": 62580 + }, + { + "epoch": 2.65, + "learning_rate": 0.0005367338718225954, + "loss": 0.4971, + "step": 62590 + }, + { + "epoch": 2.65, + "learning_rate": 0.0005366914710933029, + "loss": 0.5686, + "step": 62600 + }, + { + "epoch": 2.65, + "learning_rate": 0.0005366490703640102, + "loss": 0.5007, + "step": 62610 + }, + { + "epoch": 2.65, + "learning_rate": 0.0005366066696347177, + "loss": 0.6311, + "step": 62620 + }, + { + "epoch": 2.65, + "learning_rate": 0.0005365642689054253, + "loss": 0.5435, + "step": 62630 + }, + { + "epoch": 2.65, + "learning_rate": 0.0005365218681761326, + "loss": 0.5352, + "step": 62640 + }, + { + "epoch": 2.65, + "learning_rate": 0.0005364794674468401, + "loss": 0.5109, + "step": 62650 + }, + { + "epoch": 2.65, + "learning_rate": 0.0005364370667175476, + "loss": 0.5277, + "step": 62660 + }, + { + "epoch": 2.65, + "learning_rate": 0.000536394665988255, + "loss": 0.6674, + "step": 62670 + }, + { + "epoch": 2.65, + "learning_rate": 0.0005363522652589625, + "loss": 0.4522, + "step": 62680 + }, + { + "epoch": 2.65, + "learning_rate": 0.0005363098645296699, + "loss": 0.5409, + "step": 62690 + }, + { + "epoch": 2.65, + "learning_rate": 0.0005362674638003773, + "loss": 0.4913, + "step": 62700 + }, + { + "epoch": 2.65, + "learning_rate": 0.0005362250630710849, + "loss": 0.6608, + "step": 62710 + }, + { + "epoch": 2.65, + "learning_rate": 0.0005361826623417923, + "loss": 0.4964, + "step": 62720 + }, + { + "epoch": 2.65, + "learning_rate": 0.0005361402616124997, + "loss": 0.5443, + "step": 62730 + }, + { + "epoch": 2.65, + "learning_rate": 0.0005360978608832072, + "loss": 0.627, + "step": 62740 + }, + { + "epoch": 2.65, + "learning_rate": 0.0005360554601539147, + "loss": 0.5027, + "step": 62750 + }, + { + "epoch": 2.65, + "learning_rate": 0.0005360130594246221, + "loss": 0.553, + "step": 62760 + }, + { + "epoch": 2.65, + "learning_rate": 0.0005359706586953296, + "loss": 0.612, + "step": 62770 + }, + { + "epoch": 2.65, + "learning_rate": 0.000535928257966037, + "loss": 0.4994, + "step": 62780 + }, + { + "epoch": 2.66, + "learning_rate": 0.0005358858572367445, + "loss": 0.4563, + "step": 62790 + }, + { + "epoch": 2.66, + "learning_rate": 0.000535843456507452, + "loss": 0.513, + "step": 62800 + }, + { + "epoch": 2.66, + "learning_rate": 0.0005358010557781594, + "loss": 0.5919, + "step": 62810 + }, + { + "epoch": 2.66, + "learning_rate": 0.0005357586550488668, + "loss": 0.478, + "step": 62820 + }, + { + "epoch": 2.66, + "learning_rate": 0.0005357162543195744, + "loss": 0.5127, + "step": 62830 + }, + { + "epoch": 2.66, + "learning_rate": 0.0005356738535902818, + "loss": 0.5484, + "step": 62840 + }, + { + "epoch": 2.66, + "learning_rate": 0.0005356314528609892, + "loss": 0.6286, + "step": 62850 + }, + { + "epoch": 2.66, + "learning_rate": 0.0005355890521316967, + "loss": 0.5532, + "step": 62860 + }, + { + "epoch": 2.66, + "learning_rate": 0.0005355466514024042, + "loss": 0.5017, + "step": 62870 + }, + { + "epoch": 2.66, + "learning_rate": 0.0005355042506731116, + "loss": 0.5736, + "step": 62880 + }, + { + "epoch": 2.66, + "learning_rate": 0.0005354618499438191, + "loss": 0.5613, + "step": 62890 + }, + { + "epoch": 2.66, + "learning_rate": 0.0005354194492145264, + "loss": 0.5709, + "step": 62900 + }, + { + "epoch": 2.66, + "learning_rate": 0.000535377048485234, + "loss": 0.6232, + "step": 62910 + }, + { + "epoch": 2.66, + "learning_rate": 0.0005353346477559415, + "loss": 0.4514, + "step": 62920 + }, + { + "epoch": 2.66, + "learning_rate": 0.0005352922470266488, + "loss": 0.5809, + "step": 62930 + }, + { + "epoch": 2.66, + "learning_rate": 0.0005352498462973563, + "loss": 0.5765, + "step": 62940 + }, + { + "epoch": 2.66, + "learning_rate": 0.0005352074455680639, + "loss": 0.5277, + "step": 62950 + }, + { + "epoch": 2.66, + "learning_rate": 0.0005351650448387712, + "loss": 0.7146, + "step": 62960 + }, + { + "epoch": 2.66, + "learning_rate": 0.0005351226441094787, + "loss": 0.5914, + "step": 62970 + }, + { + "epoch": 2.66, + "learning_rate": 0.0005350802433801862, + "loss": 0.5379, + "step": 62980 + }, + { + "epoch": 2.66, + "learning_rate": 0.0005350378426508936, + "loss": 0.6106, + "step": 62990 + }, + { + "epoch": 2.66, + "learning_rate": 0.0005349954419216011, + "loss": 0.4814, + "step": 63000 + }, + { + "epoch": 2.66, + "learning_rate": 0.0005349530411923085, + "loss": 0.4736, + "step": 63010 + }, + { + "epoch": 2.67, + "learning_rate": 0.0005349106404630159, + "loss": 0.6082, + "step": 63020 + }, + { + "epoch": 2.67, + "learning_rate": 0.0005348682397337235, + "loss": 0.5278, + "step": 63030 + }, + { + "epoch": 2.67, + "learning_rate": 0.0005348258390044309, + "loss": 0.5325, + "step": 63040 + }, + { + "epoch": 2.67, + "learning_rate": 0.0005347834382751383, + "loss": 0.4819, + "step": 63050 + }, + { + "epoch": 2.67, + "learning_rate": 0.0005347410375458458, + "loss": 0.491, + "step": 63060 + }, + { + "epoch": 2.67, + "learning_rate": 0.0005346986368165533, + "loss": 0.6142, + "step": 63070 + }, + { + "epoch": 2.67, + "learning_rate": 0.0005346562360872607, + "loss": 0.4459, + "step": 63080 + }, + { + "epoch": 2.67, + "learning_rate": 0.0005346138353579682, + "loss": 0.6237, + "step": 63090 + }, + { + "epoch": 2.67, + "learning_rate": 0.0005345714346286756, + "loss": 0.5468, + "step": 63100 + }, + { + "epoch": 2.67, + "learning_rate": 0.0005345290338993831, + "loss": 0.5413, + "step": 63110 + }, + { + "epoch": 2.67, + "learning_rate": 0.0005344866331700906, + "loss": 0.5509, + "step": 63120 + }, + { + "epoch": 2.67, + "learning_rate": 0.000534444232440798, + "loss": 0.5504, + "step": 63130 + }, + { + "epoch": 2.67, + "learning_rate": 0.0005344018317115054, + "loss": 0.5245, + "step": 63140 + }, + { + "epoch": 2.67, + "learning_rate": 0.0005343594309822129, + "loss": 0.4605, + "step": 63150 + }, + { + "epoch": 2.67, + "learning_rate": 0.0005343170302529204, + "loss": 0.5651, + "step": 63160 + }, + { + "epoch": 2.67, + "learning_rate": 0.0005342746295236278, + "loss": 0.5003, + "step": 63170 + }, + { + "epoch": 2.67, + "learning_rate": 0.0005342322287943353, + "loss": 0.5763, + "step": 63180 + }, + { + "epoch": 2.67, + "learning_rate": 0.0005341898280650428, + "loss": 0.5379, + "step": 63190 + }, + { + "epoch": 2.67, + "learning_rate": 0.0005341474273357502, + "loss": 0.5683, + "step": 63200 + }, + { + "epoch": 2.67, + "learning_rate": 0.0005341050266064577, + "loss": 0.6308, + "step": 63210 + }, + { + "epoch": 2.67, + "learning_rate": 0.0005340626258771651, + "loss": 0.6991, + "step": 63220 + }, + { + "epoch": 2.67, + "learning_rate": 0.0005340202251478725, + "loss": 0.5527, + "step": 63230 + }, + { + "epoch": 2.67, + "learning_rate": 0.0005339778244185801, + "loss": 0.5219, + "step": 63240 + }, + { + "epoch": 2.67, + "learning_rate": 0.0005339354236892875, + "loss": 0.486, + "step": 63250 + }, + { + "epoch": 2.68, + "learning_rate": 0.0005338930229599949, + "loss": 0.4883, + "step": 63260 + }, + { + "epoch": 2.68, + "learning_rate": 0.0005338506222307024, + "loss": 0.6231, + "step": 63270 + }, + { + "epoch": 2.68, + "learning_rate": 0.0005338082215014099, + "loss": 0.4818, + "step": 63280 + }, + { + "epoch": 2.68, + "learning_rate": 0.0005337658207721173, + "loss": 0.502, + "step": 63290 + }, + { + "epoch": 2.68, + "learning_rate": 0.0005337234200428247, + "loss": 0.4635, + "step": 63300 + }, + { + "epoch": 2.68, + "learning_rate": 0.0005336810193135322, + "loss": 0.6835, + "step": 63310 + }, + { + "epoch": 2.68, + "learning_rate": 0.0005336386185842397, + "loss": 0.5488, + "step": 63320 + }, + { + "epoch": 2.68, + "learning_rate": 0.0005335962178549471, + "loss": 0.4601, + "step": 63330 + }, + { + "epoch": 2.68, + "learning_rate": 0.0005335538171256546, + "loss": 0.5, + "step": 63340 + }, + { + "epoch": 2.68, + "learning_rate": 0.000533511416396362, + "loss": 0.5787, + "step": 63350 + }, + { + "epoch": 2.68, + "learning_rate": 0.0005334690156670695, + "loss": 0.5159, + "step": 63360 + }, + { + "epoch": 2.68, + "learning_rate": 0.000533426614937777, + "loss": 0.6051, + "step": 63370 + }, + { + "epoch": 2.68, + "learning_rate": 0.0005333842142084844, + "loss": 0.5425, + "step": 63380 + }, + { + "epoch": 2.68, + "learning_rate": 0.0005333418134791918, + "loss": 0.4451, + "step": 63390 + }, + { + "epoch": 2.68, + "learning_rate": 0.0005332994127498994, + "loss": 0.6021, + "step": 63400 + }, + { + "epoch": 2.68, + "learning_rate": 0.0005332570120206068, + "loss": 0.5112, + "step": 63410 + }, + { + "epoch": 2.68, + "learning_rate": 0.0005332146112913142, + "loss": 0.5167, + "step": 63420 + }, + { + "epoch": 2.68, + "learning_rate": 0.0005331722105620216, + "loss": 0.5179, + "step": 63430 + }, + { + "epoch": 2.68, + "learning_rate": 0.0005331298098327292, + "loss": 0.5562, + "step": 63440 + }, + { + "epoch": 2.68, + "learning_rate": 0.0005330874091034366, + "loss": 0.4454, + "step": 63450 + }, + { + "epoch": 2.68, + "learning_rate": 0.000533045008374144, + "loss": 0.6274, + "step": 63460 + }, + { + "epoch": 2.68, + "learning_rate": 0.0005330026076448515, + "loss": 0.542, + "step": 63470 + }, + { + "epoch": 2.68, + "learning_rate": 0.000532960206915559, + "loss": 0.4905, + "step": 63480 + }, + { + "epoch": 2.68, + "learning_rate": 0.0005329178061862664, + "loss": 0.7345, + "step": 63490 + }, + { + "epoch": 2.69, + "learning_rate": 0.0005328754054569739, + "loss": 0.519, + "step": 63500 + }, + { + "epoch": 2.69, + "learning_rate": 0.0005328330047276813, + "loss": 0.5283, + "step": 63510 + }, + { + "epoch": 2.69, + "learning_rate": 0.0005327906039983888, + "loss": 0.506, + "step": 63520 + }, + { + "epoch": 2.69, + "learning_rate": 0.0005327482032690963, + "loss": 0.6433, + "step": 63530 + }, + { + "epoch": 2.69, + "learning_rate": 0.0005327058025398037, + "loss": 0.5913, + "step": 63540 + }, + { + "epoch": 2.69, + "learning_rate": 0.0005326634018105111, + "loss": 0.5734, + "step": 63550 + }, + { + "epoch": 2.69, + "learning_rate": 0.0005326210010812187, + "loss": 0.5057, + "step": 63560 + }, + { + "epoch": 2.69, + "learning_rate": 0.0005325786003519261, + "loss": 0.5879, + "step": 63570 + }, + { + "epoch": 2.69, + "learning_rate": 0.0005325361996226335, + "loss": 0.5597, + "step": 63580 + }, + { + "epoch": 2.69, + "learning_rate": 0.000532493798893341, + "loss": 0.4856, + "step": 63590 + }, + { + "epoch": 2.69, + "learning_rate": 0.0005324513981640485, + "loss": 0.5882, + "step": 63600 + }, + { + "epoch": 2.69, + "learning_rate": 0.0005324089974347559, + "loss": 0.5134, + "step": 63610 + }, + { + "epoch": 2.69, + "learning_rate": 0.0005323665967054633, + "loss": 0.6733, + "step": 63620 + }, + { + "epoch": 2.69, + "learning_rate": 0.0005323241959761708, + "loss": 0.5876, + "step": 63630 + }, + { + "epoch": 2.69, + "learning_rate": 0.0005322817952468783, + "loss": 0.6421, + "step": 63640 + }, + { + "epoch": 2.69, + "learning_rate": 0.0005322393945175857, + "loss": 0.4651, + "step": 63650 + }, + { + "epoch": 2.69, + "learning_rate": 0.0005321969937882932, + "loss": 0.5966, + "step": 63660 + }, + { + "epoch": 2.69, + "learning_rate": 0.0005321545930590006, + "loss": 0.4412, + "step": 63670 + }, + { + "epoch": 2.69, + "learning_rate": 0.0005321121923297081, + "loss": 0.5575, + "step": 63680 + }, + { + "epoch": 2.69, + "learning_rate": 0.0005320697916004156, + "loss": 0.5714, + "step": 63690 + }, + { + "epoch": 2.69, + "learning_rate": 0.000532027390871123, + "loss": 0.6065, + "step": 63700 + }, + { + "epoch": 2.69, + "learning_rate": 0.0005319849901418304, + "loss": 0.5747, + "step": 63710 + }, + { + "epoch": 2.69, + "learning_rate": 0.000531942589412538, + "loss": 0.5839, + "step": 63720 + }, + { + "epoch": 2.7, + "learning_rate": 0.0005319001886832454, + "loss": 0.6187, + "step": 63730 + }, + { + "epoch": 2.7, + "learning_rate": 0.0005318577879539528, + "loss": 0.6066, + "step": 63740 + }, + { + "epoch": 2.7, + "learning_rate": 0.0005318153872246603, + "loss": 0.5528, + "step": 63750 + }, + { + "epoch": 2.7, + "learning_rate": 0.0005317729864953677, + "loss": 0.5183, + "step": 63760 + }, + { + "epoch": 2.7, + "learning_rate": 0.0005317305857660752, + "loss": 0.5429, + "step": 63770 + }, + { + "epoch": 2.7, + "learning_rate": 0.0005316881850367827, + "loss": 0.5855, + "step": 63780 + }, + { + "epoch": 2.7, + "learning_rate": 0.0005316457843074901, + "loss": 0.6049, + "step": 63790 + }, + { + "epoch": 2.7, + "learning_rate": 0.0005316033835781976, + "loss": 0.5627, + "step": 63800 + }, + { + "epoch": 2.7, + "learning_rate": 0.0005315609828489051, + "loss": 0.5055, + "step": 63810 + }, + { + "epoch": 2.7, + "learning_rate": 0.0005315185821196125, + "loss": 0.5854, + "step": 63820 + }, + { + "epoch": 2.7, + "learning_rate": 0.0005314761813903199, + "loss": 0.5973, + "step": 63830 + }, + { + "epoch": 2.7, + "learning_rate": 0.0005314337806610274, + "loss": 0.5174, + "step": 63840 + }, + { + "epoch": 2.7, + "learning_rate": 0.0005313913799317349, + "loss": 0.6365, + "step": 63850 + }, + { + "epoch": 2.7, + "learning_rate": 0.0005313489792024423, + "loss": 0.5359, + "step": 63860 + }, + { + "epoch": 2.7, + "learning_rate": 0.0005313065784731498, + "loss": 0.6015, + "step": 63870 + }, + { + "epoch": 2.7, + "learning_rate": 0.0005312641777438572, + "loss": 0.4582, + "step": 63880 + }, + { + "epoch": 2.7, + "learning_rate": 0.0005312217770145647, + "loss": 0.4968, + "step": 63890 + }, + { + "epoch": 2.7, + "learning_rate": 0.0005311793762852722, + "loss": 0.6134, + "step": 63900 + }, + { + "epoch": 2.7, + "learning_rate": 0.0005311369755559796, + "loss": 0.5145, + "step": 63910 + }, + { + "epoch": 2.7, + "learning_rate": 0.000531094574826687, + "loss": 0.5066, + "step": 63920 + }, + { + "epoch": 2.7, + "learning_rate": 0.0005310521740973946, + "loss": 0.5612, + "step": 63930 + }, + { + "epoch": 2.7, + "learning_rate": 0.0005310097733681019, + "loss": 0.4812, + "step": 63940 + }, + { + "epoch": 2.7, + "learning_rate": 0.0005309673726388094, + "loss": 0.5963, + "step": 63950 + }, + { + "epoch": 2.7, + "learning_rate": 0.0005309249719095168, + "loss": 0.6209, + "step": 63960 + }, + { + "epoch": 2.71, + "learning_rate": 0.0005308825711802243, + "loss": 0.577, + "step": 63970 + }, + { + "epoch": 2.71, + "learning_rate": 0.0005308401704509318, + "loss": 0.5613, + "step": 63980 + }, + { + "epoch": 2.71, + "learning_rate": 0.0005307977697216392, + "loss": 0.5283, + "step": 63990 + }, + { + "epoch": 2.71, + "learning_rate": 0.0005307553689923467, + "loss": 0.592, + "step": 64000 + }, + { + "epoch": 2.71, + "learning_rate": 0.0005307129682630542, + "loss": 0.4972, + "step": 64010 + }, + { + "epoch": 2.71, + "learning_rate": 0.0005306705675337616, + "loss": 0.501, + "step": 64020 + }, + { + "epoch": 2.71, + "learning_rate": 0.000530628166804469, + "loss": 0.6224, + "step": 64030 + }, + { + "epoch": 2.71, + "learning_rate": 0.0005305857660751765, + "loss": 0.641, + "step": 64040 + }, + { + "epoch": 2.71, + "learning_rate": 0.000530543365345884, + "loss": 0.5952, + "step": 64050 + }, + { + "epoch": 2.71, + "learning_rate": 0.0005305009646165914, + "loss": 0.6351, + "step": 64060 + }, + { + "epoch": 2.71, + "learning_rate": 0.0005304585638872989, + "loss": 0.641, + "step": 64070 + }, + { + "epoch": 2.71, + "learning_rate": 0.0005304161631580063, + "loss": 0.5535, + "step": 64080 + }, + { + "epoch": 2.71, + "learning_rate": 0.0005303737624287138, + "loss": 0.4981, + "step": 64090 + }, + { + "epoch": 2.71, + "learning_rate": 0.0005303313616994213, + "loss": 0.5695, + "step": 64100 + }, + { + "epoch": 2.71, + "learning_rate": 0.0005302889609701287, + "loss": 0.5256, + "step": 64110 + }, + { + "epoch": 2.71, + "learning_rate": 0.0005302465602408361, + "loss": 0.565, + "step": 64120 + }, + { + "epoch": 2.71, + "learning_rate": 0.0005302041595115437, + "loss": 0.5556, + "step": 64130 + }, + { + "epoch": 2.71, + "learning_rate": 0.0005301617587822511, + "loss": 0.489, + "step": 64140 + }, + { + "epoch": 2.71, + "learning_rate": 0.0005301193580529585, + "loss": 0.5536, + "step": 64150 + }, + { + "epoch": 2.71, + "learning_rate": 0.000530076957323666, + "loss": 0.6385, + "step": 64160 + }, + { + "epoch": 2.71, + "learning_rate": 0.0005300345565943735, + "loss": 0.5813, + "step": 64170 + }, + { + "epoch": 2.71, + "learning_rate": 0.0005299921558650809, + "loss": 0.559, + "step": 64180 + }, + { + "epoch": 2.71, + "learning_rate": 0.0005299497551357884, + "loss": 0.5044, + "step": 64190 + }, + { + "epoch": 2.71, + "learning_rate": 0.0005299073544064958, + "loss": 0.5908, + "step": 64200 + }, + { + "epoch": 2.72, + "learning_rate": 0.0005298649536772033, + "loss": 0.5873, + "step": 64210 + }, + { + "epoch": 2.72, + "learning_rate": 0.0005298225529479108, + "loss": 0.5167, + "step": 64220 + }, + { + "epoch": 2.72, + "learning_rate": 0.0005297801522186181, + "loss": 0.5785, + "step": 64230 + }, + { + "epoch": 2.72, + "learning_rate": 0.0005297377514893256, + "loss": 0.6001, + "step": 64240 + }, + { + "epoch": 2.72, + "learning_rate": 0.0005296953507600332, + "loss": 0.5348, + "step": 64250 + }, + { + "epoch": 2.72, + "learning_rate": 0.0005296529500307405, + "loss": 0.5322, + "step": 64260 + }, + { + "epoch": 2.72, + "learning_rate": 0.000529610549301448, + "loss": 0.6614, + "step": 64270 + }, + { + "epoch": 2.72, + "learning_rate": 0.0005295681485721555, + "loss": 0.5957, + "step": 64280 + }, + { + "epoch": 2.72, + "learning_rate": 0.0005295257478428629, + "loss": 0.5569, + "step": 64290 + }, + { + "epoch": 2.72, + "learning_rate": 0.0005294833471135704, + "loss": 0.4545, + "step": 64300 + }, + { + "epoch": 2.72, + "learning_rate": 0.0005294409463842779, + "loss": 0.5741, + "step": 64310 + }, + { + "epoch": 2.72, + "learning_rate": 0.0005293985456549852, + "loss": 0.5769, + "step": 64320 + }, + { + "epoch": 2.72, + "learning_rate": 0.0005293561449256928, + "loss": 0.5855, + "step": 64330 + }, + { + "epoch": 2.72, + "learning_rate": 0.0005293137441964003, + "loss": 0.5467, + "step": 64340 + }, + { + "epoch": 2.72, + "learning_rate": 0.0005292713434671076, + "loss": 0.5051, + "step": 64350 + }, + { + "epoch": 2.72, + "learning_rate": 0.0005292289427378151, + "loss": 0.5, + "step": 64360 + }, + { + "epoch": 2.72, + "learning_rate": 0.0005291865420085225, + "loss": 0.6119, + "step": 64370 + }, + { + "epoch": 2.72, + "learning_rate": 0.00052914414127923, + "loss": 0.5282, + "step": 64380 + }, + { + "epoch": 2.72, + "learning_rate": 0.0005291017405499375, + "loss": 0.6177, + "step": 64390 + }, + { + "epoch": 2.72, + "learning_rate": 0.000529059339820645, + "loss": 0.5095, + "step": 64400 + }, + { + "epoch": 2.72, + "learning_rate": 0.0005290169390913524, + "loss": 0.5487, + "step": 64410 + }, + { + "epoch": 2.72, + "learning_rate": 0.0005289745383620599, + "loss": 0.59, + "step": 64420 + }, + { + "epoch": 2.72, + "learning_rate": 0.0005289321376327673, + "loss": 0.5499, + "step": 64430 + }, + { + "epoch": 2.73, + "learning_rate": 0.0005288897369034747, + "loss": 0.5957, + "step": 64440 + }, + { + "epoch": 2.73, + "learning_rate": 0.0005288473361741823, + "loss": 0.6216, + "step": 64450 + }, + { + "epoch": 2.73, + "learning_rate": 0.0005288049354448897, + "loss": 0.5405, + "step": 64460 + }, + { + "epoch": 2.73, + "learning_rate": 0.0005287625347155971, + "loss": 0.5275, + "step": 64470 + }, + { + "epoch": 2.73, + "learning_rate": 0.0005287201339863046, + "loss": 0.5763, + "step": 64480 + }, + { + "epoch": 2.73, + "learning_rate": 0.000528677733257012, + "loss": 0.6097, + "step": 64490 + }, + { + "epoch": 2.73, + "learning_rate": 0.0005286353325277195, + "loss": 0.4259, + "step": 64500 + }, + { + "epoch": 2.73, + "learning_rate": 0.000528592931798427, + "loss": 0.5273, + "step": 64510 + }, + { + "epoch": 2.73, + "learning_rate": 0.0005285505310691344, + "loss": 0.5317, + "step": 64520 + }, + { + "epoch": 2.73, + "learning_rate": 0.0005285081303398419, + "loss": 0.591, + "step": 64530 + }, + { + "epoch": 2.73, + "learning_rate": 0.0005284657296105494, + "loss": 0.5253, + "step": 64540 + }, + { + "epoch": 2.73, + "learning_rate": 0.0005284233288812567, + "loss": 0.618, + "step": 64550 + }, + { + "epoch": 2.73, + "learning_rate": 0.0005283809281519642, + "loss": 0.638, + "step": 64560 + }, + { + "epoch": 2.73, + "learning_rate": 0.0005283385274226717, + "loss": 0.5409, + "step": 64570 + }, + { + "epoch": 2.73, + "learning_rate": 0.0005282961266933791, + "loss": 0.5381, + "step": 64580 + }, + { + "epoch": 2.73, + "learning_rate": 0.0005282537259640866, + "loss": 0.5577, + "step": 64590 + }, + { + "epoch": 2.73, + "learning_rate": 0.0005282113252347941, + "loss": 0.527, + "step": 64600 + }, + { + "epoch": 2.73, + "learning_rate": 0.0005281689245055015, + "loss": 0.6125, + "step": 64610 + }, + { + "epoch": 2.73, + "learning_rate": 0.000528126523776209, + "loss": 0.4781, + "step": 64620 + }, + { + "epoch": 2.73, + "learning_rate": 0.0005280841230469165, + "loss": 0.5359, + "step": 64630 + }, + { + "epoch": 2.73, + "learning_rate": 0.0005280417223176238, + "loss": 0.5531, + "step": 64640 + }, + { + "epoch": 2.73, + "learning_rate": 0.0005279993215883313, + "loss": 0.5424, + "step": 64650 + }, + { + "epoch": 2.73, + "learning_rate": 0.0005279569208590389, + "loss": 0.5875, + "step": 64660 + }, + { + "epoch": 2.73, + "learning_rate": 0.0005279145201297462, + "loss": 0.6403, + "step": 64670 + }, + { + "epoch": 2.74, + "learning_rate": 0.0005278721194004537, + "loss": 0.543, + "step": 64680 + }, + { + "epoch": 2.74, + "learning_rate": 0.0005278297186711612, + "loss": 0.5608, + "step": 64690 + }, + { + "epoch": 2.74, + "learning_rate": 0.0005277873179418686, + "loss": 0.4619, + "step": 64700 + }, + { + "epoch": 2.74, + "learning_rate": 0.0005277449172125761, + "loss": 0.543, + "step": 64710 + }, + { + "epoch": 2.74, + "learning_rate": 0.0005277025164832836, + "loss": 0.586, + "step": 64720 + }, + { + "epoch": 2.74, + "learning_rate": 0.0005276601157539909, + "loss": 0.5554, + "step": 64730 + }, + { + "epoch": 2.74, + "learning_rate": 0.0005276177150246985, + "loss": 0.6111, + "step": 64740 + }, + { + "epoch": 2.74, + "learning_rate": 0.000527575314295406, + "loss": 0.5757, + "step": 64750 + }, + { + "epoch": 2.74, + "learning_rate": 0.0005275329135661133, + "loss": 0.5989, + "step": 64760 + }, + { + "epoch": 2.74, + "learning_rate": 0.0005274905128368208, + "loss": 0.6962, + "step": 64770 + }, + { + "epoch": 2.74, + "learning_rate": 0.0005274481121075284, + "loss": 0.5257, + "step": 64780 + }, + { + "epoch": 2.74, + "learning_rate": 0.0005274057113782357, + "loss": 0.6447, + "step": 64790 + }, + { + "epoch": 2.74, + "learning_rate": 0.0005273633106489432, + "loss": 0.4914, + "step": 64800 + }, + { + "epoch": 2.74, + "learning_rate": 0.0005273209099196507, + "loss": 0.535, + "step": 64810 + }, + { + "epoch": 2.74, + "learning_rate": 0.0005272785091903581, + "loss": 0.4964, + "step": 64820 + }, + { + "epoch": 2.74, + "learning_rate": 0.0005272361084610656, + "loss": 0.6069, + "step": 64830 + }, + { + "epoch": 2.74, + "learning_rate": 0.0005271937077317729, + "loss": 0.5717, + "step": 64840 + }, + { + "epoch": 2.74, + "learning_rate": 0.0005271513070024804, + "loss": 0.6049, + "step": 64850 + }, + { + "epoch": 2.74, + "learning_rate": 0.000527108906273188, + "loss": 0.4927, + "step": 64860 + }, + { + "epoch": 2.74, + "learning_rate": 0.0005270665055438953, + "loss": 0.5566, + "step": 64870 + }, + { + "epoch": 2.74, + "learning_rate": 0.0005270241048146028, + "loss": 0.6662, + "step": 64880 + }, + { + "epoch": 2.74, + "learning_rate": 0.0005269817040853103, + "loss": 0.5116, + "step": 64890 + }, + { + "epoch": 2.74, + "learning_rate": 0.0005269393033560177, + "loss": 0.5188, + "step": 64900 + }, + { + "epoch": 2.74, + "learning_rate": 0.0005268969026267252, + "loss": 0.4953, + "step": 64910 + }, + { + "epoch": 2.75, + "learning_rate": 0.0005268545018974327, + "loss": 0.6639, + "step": 64920 + }, + { + "epoch": 2.75, + "learning_rate": 0.00052681210116814, + "loss": 0.573, + "step": 64930 + }, + { + "epoch": 2.75, + "learning_rate": 0.0005267697004388476, + "loss": 0.6061, + "step": 64940 + }, + { + "epoch": 2.75, + "learning_rate": 0.0005267272997095551, + "loss": 0.6073, + "step": 64950 + }, + { + "epoch": 2.75, + "learning_rate": 0.0005266848989802624, + "loss": 0.5409, + "step": 64960 + }, + { + "epoch": 2.75, + "learning_rate": 0.0005266424982509699, + "loss": 0.6675, + "step": 64970 + }, + { + "epoch": 2.75, + "learning_rate": 0.0005266000975216775, + "loss": 0.5805, + "step": 64980 + }, + { + "epoch": 2.75, + "learning_rate": 0.0005265576967923848, + "loss": 0.5722, + "step": 64990 + }, + { + "epoch": 2.75, + "learning_rate": 0.0005265152960630923, + "loss": 0.5811, + "step": 65000 + }, + { + "epoch": 2.75, + "learning_rate": 0.0005264728953337998, + "loss": 0.5526, + "step": 65010 + }, + { + "epoch": 2.75, + "learning_rate": 0.0005264304946045072, + "loss": 0.5199, + "step": 65020 + }, + { + "epoch": 2.75, + "learning_rate": 0.0005263880938752147, + "loss": 0.5603, + "step": 65030 + }, + { + "epoch": 2.75, + "learning_rate": 0.0005263456931459222, + "loss": 0.5314, + "step": 65040 + }, + { + "epoch": 2.75, + "learning_rate": 0.0005263032924166295, + "loss": 0.5364, + "step": 65050 + }, + { + "epoch": 2.75, + "learning_rate": 0.0005262608916873371, + "loss": 0.519, + "step": 65060 + }, + { + "epoch": 2.75, + "learning_rate": 0.0005262184909580446, + "loss": 0.5056, + "step": 65070 + }, + { + "epoch": 2.75, + "learning_rate": 0.0005261760902287519, + "loss": 0.5263, + "step": 65080 + }, + { + "epoch": 2.75, + "learning_rate": 0.0005261336894994594, + "loss": 0.5893, + "step": 65090 + }, + { + "epoch": 2.75, + "learning_rate": 0.0005260912887701669, + "loss": 0.5459, + "step": 65100 + }, + { + "epoch": 2.75, + "learning_rate": 0.0005260488880408743, + "loss": 0.549, + "step": 65110 + }, + { + "epoch": 2.75, + "learning_rate": 0.0005260064873115818, + "loss": 0.5111, + "step": 65120 + }, + { + "epoch": 2.75, + "learning_rate": 0.0005259640865822893, + "loss": 0.4562, + "step": 65130 + }, + { + "epoch": 2.75, + "learning_rate": 0.0005259216858529967, + "loss": 0.5349, + "step": 65140 + }, + { + "epoch": 2.76, + "learning_rate": 0.0005258792851237042, + "loss": 0.5737, + "step": 65150 + }, + { + "epoch": 2.76, + "learning_rate": 0.0005258368843944116, + "loss": 0.5936, + "step": 65160 + }, + { + "epoch": 2.76, + "learning_rate": 0.000525794483665119, + "loss": 0.5425, + "step": 65170 + }, + { + "epoch": 2.76, + "learning_rate": 0.0005257520829358265, + "loss": 0.533, + "step": 65180 + }, + { + "epoch": 2.76, + "learning_rate": 0.000525709682206534, + "loss": 0.5776, + "step": 65190 + }, + { + "epoch": 2.76, + "learning_rate": 0.0005256672814772414, + "loss": 0.5567, + "step": 65200 + }, + { + "epoch": 2.76, + "learning_rate": 0.0005256248807479489, + "loss": 0.5869, + "step": 65210 + }, + { + "epoch": 2.76, + "learning_rate": 0.0005255824800186564, + "loss": 0.5645, + "step": 65220 + }, + { + "epoch": 2.76, + "learning_rate": 0.0005255400792893638, + "loss": 0.5315, + "step": 65230 + }, + { + "epoch": 2.76, + "learning_rate": 0.0005254976785600713, + "loss": 0.5717, + "step": 65240 + }, + { + "epoch": 2.76, + "learning_rate": 0.0005254552778307786, + "loss": 0.5497, + "step": 65250 + }, + { + "epoch": 2.76, + "learning_rate": 0.0005254128771014861, + "loss": 0.6518, + "step": 65260 + }, + { + "epoch": 2.76, + "learning_rate": 0.0005253704763721937, + "loss": 0.5005, + "step": 65270 + }, + { + "epoch": 2.76, + "learning_rate": 0.000525328075642901, + "loss": 0.5506, + "step": 65280 + }, + { + "epoch": 2.76, + "learning_rate": 0.0005252856749136085, + "loss": 0.5421, + "step": 65290 + }, + { + "epoch": 2.76, + "learning_rate": 0.000525243274184316, + "loss": 0.4679, + "step": 65300 + }, + { + "epoch": 2.76, + "learning_rate": 0.0005252008734550235, + "loss": 0.5018, + "step": 65310 + }, + { + "epoch": 2.76, + "learning_rate": 0.0005251584727257309, + "loss": 0.6565, + "step": 65320 + }, + { + "epoch": 2.76, + "learning_rate": 0.0005251160719964384, + "loss": 0.5195, + "step": 65330 + }, + { + "epoch": 2.76, + "learning_rate": 0.0005250736712671457, + "loss": 0.4982, + "step": 65340 + }, + { + "epoch": 2.76, + "learning_rate": 0.0005250312705378533, + "loss": 0.5682, + "step": 65350 + }, + { + "epoch": 2.76, + "learning_rate": 0.0005249888698085608, + "loss": 0.6093, + "step": 65360 + }, + { + "epoch": 2.76, + "learning_rate": 0.0005249464690792681, + "loss": 0.6474, + "step": 65370 + }, + { + "epoch": 2.76, + "learning_rate": 0.0005249040683499756, + "loss": 0.5667, + "step": 65380 + }, + { + "epoch": 2.77, + "learning_rate": 0.0005248616676206832, + "loss": 0.6015, + "step": 65390 + }, + { + "epoch": 2.77, + "learning_rate": 0.0005248192668913905, + "loss": 0.4893, + "step": 65400 + }, + { + "epoch": 2.77, + "learning_rate": 0.000524776866162098, + "loss": 0.535, + "step": 65410 + }, + { + "epoch": 2.77, + "learning_rate": 0.0005247344654328055, + "loss": 0.6234, + "step": 65420 + }, + { + "epoch": 2.77, + "learning_rate": 0.0005246920647035129, + "loss": 0.4796, + "step": 65430 + }, + { + "epoch": 2.77, + "learning_rate": 0.0005246496639742204, + "loss": 0.6031, + "step": 65440 + }, + { + "epoch": 2.77, + "learning_rate": 0.0005246072632449278, + "loss": 0.5497, + "step": 65450 + }, + { + "epoch": 2.77, + "learning_rate": 0.0005245648625156352, + "loss": 0.459, + "step": 65460 + }, + { + "epoch": 2.77, + "learning_rate": 0.0005245224617863428, + "loss": 0.5264, + "step": 65470 + }, + { + "epoch": 2.77, + "learning_rate": 0.0005244800610570502, + "loss": 0.6038, + "step": 65480 + }, + { + "epoch": 2.77, + "learning_rate": 0.0005244376603277576, + "loss": 0.5438, + "step": 65490 + }, + { + "epoch": 2.77, + "learning_rate": 0.0005243952595984651, + "loss": 0.6504, + "step": 65500 + }, + { + "epoch": 2.77, + "learning_rate": 0.0005243528588691726, + "loss": 0.5372, + "step": 65510 + }, + { + "epoch": 2.77, + "learning_rate": 0.00052431045813988, + "loss": 0.6083, + "step": 65520 + }, + { + "epoch": 2.77, + "learning_rate": 0.0005242680574105875, + "loss": 0.5688, + "step": 65530 + }, + { + "epoch": 2.77, + "learning_rate": 0.0005242256566812949, + "loss": 0.5366, + "step": 65540 + }, + { + "epoch": 2.77, + "learning_rate": 0.0005241832559520024, + "loss": 0.5622, + "step": 65550 + }, + { + "epoch": 2.77, + "learning_rate": 0.0005241408552227099, + "loss": 0.4758, + "step": 65560 + }, + { + "epoch": 2.77, + "learning_rate": 0.0005240984544934173, + "loss": 0.6, + "step": 65570 + }, + { + "epoch": 2.77, + "learning_rate": 0.0005240560537641247, + "loss": 0.4609, + "step": 65580 + }, + { + "epoch": 2.77, + "learning_rate": 0.0005240136530348323, + "loss": 0.5552, + "step": 65590 + }, + { + "epoch": 2.77, + "learning_rate": 0.0005239712523055397, + "loss": 0.5454, + "step": 65600 + }, + { + "epoch": 2.77, + "learning_rate": 0.0005239288515762471, + "loss": 0.5821, + "step": 65610 + }, + { + "epoch": 2.77, + "learning_rate": 0.0005238864508469546, + "loss": 0.6406, + "step": 65620 + }, + { + "epoch": 2.78, + "learning_rate": 0.0005238440501176621, + "loss": 0.54, + "step": 65630 + }, + { + "epoch": 2.78, + "learning_rate": 0.0005238016493883695, + "loss": 0.5965, + "step": 65640 + }, + { + "epoch": 2.78, + "learning_rate": 0.000523759248659077, + "loss": 0.5455, + "step": 65650 + }, + { + "epoch": 2.78, + "learning_rate": 0.0005237168479297844, + "loss": 0.6076, + "step": 65660 + }, + { + "epoch": 2.78, + "learning_rate": 0.0005236744472004919, + "loss": 0.5878, + "step": 65670 + }, + { + "epoch": 2.78, + "learning_rate": 0.0005236320464711994, + "loss": 0.5691, + "step": 65680 + }, + { + "epoch": 2.78, + "learning_rate": 0.0005235896457419068, + "loss": 0.461, + "step": 65690 + }, + { + "epoch": 2.78, + "learning_rate": 0.0005235472450126142, + "loss": 0.4622, + "step": 65700 + }, + { + "epoch": 2.78, + "learning_rate": 0.0005235048442833217, + "loss": 0.5654, + "step": 65710 + }, + { + "epoch": 2.78, + "learning_rate": 0.0005234624435540292, + "loss": 0.6422, + "step": 65720 + }, + { + "epoch": 2.78, + "learning_rate": 0.0005234200428247366, + "loss": 0.583, + "step": 65730 + }, + { + "epoch": 2.78, + "learning_rate": 0.0005233776420954441, + "loss": 0.5096, + "step": 65740 + }, + { + "epoch": 2.78, + "learning_rate": 0.0005233352413661516, + "loss": 0.5181, + "step": 65750 + }, + { + "epoch": 2.78, + "learning_rate": 0.000523292840636859, + "loss": 0.4637, + "step": 65760 + }, + { + "epoch": 2.78, + "learning_rate": 0.0005232504399075664, + "loss": 0.5984, + "step": 65770 + }, + { + "epoch": 2.78, + "learning_rate": 0.0005232080391782738, + "loss": 0.5771, + "step": 65780 + }, + { + "epoch": 2.78, + "learning_rate": 0.0005231656384489813, + "loss": 0.5944, + "step": 65790 + }, + { + "epoch": 2.78, + "learning_rate": 0.0005231232377196888, + "loss": 0.7141, + "step": 65800 + }, + { + "epoch": 2.78, + "learning_rate": 0.0005230808369903962, + "loss": 0.5445, + "step": 65810 + }, + { + "epoch": 2.78, + "learning_rate": 0.0005230384362611037, + "loss": 0.5941, + "step": 65820 + }, + { + "epoch": 2.78, + "learning_rate": 0.0005229960355318112, + "loss": 0.6274, + "step": 65830 + }, + { + "epoch": 2.78, + "learning_rate": 0.0005229536348025186, + "loss": 0.5122, + "step": 65840 + }, + { + "epoch": 2.78, + "learning_rate": 0.0005229112340732261, + "loss": 0.4749, + "step": 65850 + }, + { + "epoch": 2.79, + "learning_rate": 0.0005228688333439335, + "loss": 0.5781, + "step": 65860 + }, + { + "epoch": 2.79, + "learning_rate": 0.0005228264326146409, + "loss": 0.6577, + "step": 65870 + }, + { + "epoch": 2.79, + "learning_rate": 0.0005227840318853485, + "loss": 0.6542, + "step": 65880 + }, + { + "epoch": 2.79, + "learning_rate": 0.0005227416311560559, + "loss": 0.4606, + "step": 65890 + }, + { + "epoch": 2.79, + "learning_rate": 0.0005226992304267633, + "loss": 0.5439, + "step": 65900 + }, + { + "epoch": 2.79, + "learning_rate": 0.0005226568296974708, + "loss": 0.5499, + "step": 65910 + }, + { + "epoch": 2.79, + "learning_rate": 0.0005226144289681783, + "loss": 0.6014, + "step": 65920 + }, + { + "epoch": 2.79, + "learning_rate": 0.0005225720282388857, + "loss": 0.5778, + "step": 65930 + }, + { + "epoch": 2.79, + "learning_rate": 0.0005225296275095932, + "loss": 0.5052, + "step": 65940 + }, + { + "epoch": 2.79, + "learning_rate": 0.0005224872267803006, + "loss": 0.5712, + "step": 65950 + }, + { + "epoch": 2.79, + "learning_rate": 0.0005224448260510081, + "loss": 0.5753, + "step": 65960 + }, + { + "epoch": 2.79, + "learning_rate": 0.0005224024253217156, + "loss": 0.5409, + "step": 65970 + }, + { + "epoch": 2.79, + "learning_rate": 0.000522360024592423, + "loss": 0.4796, + "step": 65980 + }, + { + "epoch": 2.79, + "learning_rate": 0.0005223176238631304, + "loss": 0.5941, + "step": 65990 + }, + { + "epoch": 2.79, + "learning_rate": 0.000522275223133838, + "loss": 0.6004, + "step": 66000 + }, + { + "epoch": 2.79, + "learning_rate": 0.0005222328224045454, + "loss": 0.5999, + "step": 66010 + }, + { + "epoch": 2.79, + "learning_rate": 0.0005221904216752528, + "loss": 0.4649, + "step": 66020 + }, + { + "epoch": 2.79, + "learning_rate": 0.0005221480209459603, + "loss": 0.6109, + "step": 66030 + }, + { + "epoch": 2.79, + "learning_rate": 0.0005221056202166678, + "loss": 0.4958, + "step": 66040 + }, + { + "epoch": 2.79, + "learning_rate": 0.0005220632194873752, + "loss": 0.5757, + "step": 66050 + }, + { + "epoch": 2.79, + "learning_rate": 0.0005220208187580827, + "loss": 0.556, + "step": 66060 + }, + { + "epoch": 2.79, + "learning_rate": 0.0005219784180287901, + "loss": 0.6853, + "step": 66070 + }, + { + "epoch": 2.79, + "learning_rate": 0.0005219360172994976, + "loss": 0.5885, + "step": 66080 + }, + { + "epoch": 2.79, + "learning_rate": 0.000521893616570205, + "loss": 0.5715, + "step": 66090 + }, + { + "epoch": 2.8, + "learning_rate": 0.0005218512158409125, + "loss": 0.6034, + "step": 66100 + }, + { + "epoch": 2.8, + "learning_rate": 0.0005218088151116199, + "loss": 0.4779, + "step": 66110 + }, + { + "epoch": 2.8, + "learning_rate": 0.0005217664143823274, + "loss": 0.5997, + "step": 66120 + }, + { + "epoch": 2.8, + "learning_rate": 0.0005217240136530349, + "loss": 0.5409, + "step": 66130 + }, + { + "epoch": 2.8, + "learning_rate": 0.0005216816129237423, + "loss": 0.5031, + "step": 66140 + }, + { + "epoch": 2.8, + "learning_rate": 0.0005216392121944497, + "loss": 0.5848, + "step": 66150 + }, + { + "epoch": 2.8, + "learning_rate": 0.0005215968114651573, + "loss": 0.5144, + "step": 66160 + }, + { + "epoch": 2.8, + "learning_rate": 0.0005215544107358647, + "loss": 0.5486, + "step": 66170 + }, + { + "epoch": 2.8, + "learning_rate": 0.0005215120100065721, + "loss": 0.5871, + "step": 66180 + }, + { + "epoch": 2.8, + "learning_rate": 0.0005214696092772796, + "loss": 0.561, + "step": 66190 + }, + { + "epoch": 2.8, + "learning_rate": 0.0005214272085479871, + "loss": 0.5522, + "step": 66200 + }, + { + "epoch": 2.8, + "learning_rate": 0.0005213848078186945, + "loss": 0.5706, + "step": 66210 + }, + { + "epoch": 2.8, + "learning_rate": 0.000521342407089402, + "loss": 0.5238, + "step": 66220 + }, + { + "epoch": 2.8, + "learning_rate": 0.0005213000063601094, + "loss": 0.5392, + "step": 66230 + }, + { + "epoch": 2.8, + "learning_rate": 0.0005212576056308169, + "loss": 0.5042, + "step": 66240 + }, + { + "epoch": 2.8, + "learning_rate": 0.0005212152049015244, + "loss": 0.6747, + "step": 66250 + }, + { + "epoch": 2.8, + "learning_rate": 0.0005211728041722318, + "loss": 0.4271, + "step": 66260 + }, + { + "epoch": 2.8, + "learning_rate": 0.0005211304034429392, + "loss": 0.6523, + "step": 66270 + }, + { + "epoch": 2.8, + "learning_rate": 0.0005210880027136468, + "loss": 0.5325, + "step": 66280 + }, + { + "epoch": 2.8, + "learning_rate": 0.0005210456019843542, + "loss": 0.6221, + "step": 66290 + }, + { + "epoch": 2.8, + "learning_rate": 0.0005210032012550616, + "loss": 0.5425, + "step": 66300 + }, + { + "epoch": 2.8, + "learning_rate": 0.000520960800525769, + "loss": 0.6491, + "step": 66310 + }, + { + "epoch": 2.8, + "learning_rate": 0.0005209183997964765, + "loss": 0.658, + "step": 66320 + }, + { + "epoch": 2.81, + "learning_rate": 0.000520875999067184, + "loss": 0.6298, + "step": 66330 + }, + { + "epoch": 2.81, + "learning_rate": 0.0005208335983378914, + "loss": 0.6005, + "step": 66340 + }, + { + "epoch": 2.81, + "learning_rate": 0.0005207911976085989, + "loss": 0.5716, + "step": 66350 + }, + { + "epoch": 2.81, + "learning_rate": 0.0005207487968793064, + "loss": 0.6544, + "step": 66360 + }, + { + "epoch": 2.81, + "learning_rate": 0.0005207063961500138, + "loss": 0.6155, + "step": 66370 + }, + { + "epoch": 2.81, + "learning_rate": 0.0005206639954207212, + "loss": 0.5099, + "step": 66380 + }, + { + "epoch": 2.81, + "learning_rate": 0.0005206215946914287, + "loss": 0.5366, + "step": 66390 + }, + { + "epoch": 2.81, + "learning_rate": 0.0005205791939621361, + "loss": 0.4547, + "step": 66400 + }, + { + "epoch": 2.81, + "learning_rate": 0.0005205367932328436, + "loss": 0.5339, + "step": 66410 + }, + { + "epoch": 2.81, + "learning_rate": 0.0005204943925035511, + "loss": 0.5762, + "step": 66420 + }, + { + "epoch": 2.81, + "learning_rate": 0.0005204519917742585, + "loss": 0.4792, + "step": 66430 + }, + { + "epoch": 2.81, + "learning_rate": 0.000520409591044966, + "loss": 0.4539, + "step": 66440 + }, + { + "epoch": 2.81, + "learning_rate": 0.0005203671903156735, + "loss": 0.5326, + "step": 66450 + }, + { + "epoch": 2.81, + "learning_rate": 0.0005203247895863809, + "loss": 0.534, + "step": 66460 + }, + { + "epoch": 2.81, + "learning_rate": 0.0005202823888570883, + "loss": 0.6447, + "step": 66470 + }, + { + "epoch": 2.81, + "learning_rate": 0.0005202399881277959, + "loss": 0.5231, + "step": 66480 + }, + { + "epoch": 2.81, + "learning_rate": 0.0005201975873985033, + "loss": 0.5308, + "step": 66490 + }, + { + "epoch": 2.81, + "learning_rate": 0.0005201551866692107, + "loss": 0.6164, + "step": 66500 + }, + { + "epoch": 2.81, + "learning_rate": 0.0005201127859399182, + "loss": 0.6078, + "step": 66510 + }, + { + "epoch": 2.81, + "learning_rate": 0.0005200703852106256, + "loss": 0.5353, + "step": 66520 + }, + { + "epoch": 2.81, + "learning_rate": 0.0005200279844813331, + "loss": 0.5898, + "step": 66530 + }, + { + "epoch": 2.81, + "learning_rate": 0.0005199855837520406, + "loss": 0.5199, + "step": 66540 + }, + { + "epoch": 2.81, + "learning_rate": 0.000519943183022748, + "loss": 0.5812, + "step": 66550 + }, + { + "epoch": 2.81, + "learning_rate": 0.0005199007822934555, + "loss": 0.5647, + "step": 66560 + }, + { + "epoch": 2.82, + "learning_rate": 0.000519858381564163, + "loss": 0.563, + "step": 66570 + }, + { + "epoch": 2.82, + "learning_rate": 0.0005198159808348704, + "loss": 0.4807, + "step": 66580 + }, + { + "epoch": 2.82, + "learning_rate": 0.0005197735801055778, + "loss": 0.4918, + "step": 66590 + }, + { + "epoch": 2.82, + "learning_rate": 0.0005197311793762853, + "loss": 0.5777, + "step": 66600 + }, + { + "epoch": 2.82, + "learning_rate": 0.0005196887786469928, + "loss": 0.6448, + "step": 66610 + }, + { + "epoch": 2.82, + "learning_rate": 0.0005196463779177002, + "loss": 0.5003, + "step": 66620 + }, + { + "epoch": 2.82, + "learning_rate": 0.0005196039771884077, + "loss": 0.6408, + "step": 66630 + }, + { + "epoch": 2.82, + "learning_rate": 0.0005195615764591151, + "loss": 0.5625, + "step": 66640 + }, + { + "epoch": 2.82, + "learning_rate": 0.0005195191757298226, + "loss": 0.4986, + "step": 66650 + }, + { + "epoch": 2.82, + "learning_rate": 0.0005194767750005301, + "loss": 0.5285, + "step": 66660 + }, + { + "epoch": 2.82, + "learning_rate": 0.0005194343742712375, + "loss": 0.6886, + "step": 66670 + }, + { + "epoch": 2.82, + "learning_rate": 0.0005193919735419449, + "loss": 0.6146, + "step": 66680 + }, + { + "epoch": 2.82, + "learning_rate": 0.0005193495728126525, + "loss": 0.5014, + "step": 66690 + }, + { + "epoch": 2.82, + "learning_rate": 0.0005193071720833598, + "loss": 0.5533, + "step": 66700 + }, + { + "epoch": 2.82, + "learning_rate": 0.0005192647713540673, + "loss": 0.5377, + "step": 66710 + }, + { + "epoch": 2.82, + "learning_rate": 0.0005192223706247748, + "loss": 0.5072, + "step": 66720 + }, + { + "epoch": 2.82, + "learning_rate": 0.0005191799698954822, + "loss": 0.646, + "step": 66730 + }, + { + "epoch": 2.82, + "learning_rate": 0.0005191375691661897, + "loss": 0.606, + "step": 66740 + }, + { + "epoch": 2.82, + "learning_rate": 0.0005190951684368972, + "loss": 0.5916, + "step": 66750 + }, + { + "epoch": 2.82, + "learning_rate": 0.0005190527677076045, + "loss": 0.5245, + "step": 66760 + }, + { + "epoch": 2.82, + "learning_rate": 0.0005190103669783121, + "loss": 0.5324, + "step": 66770 + }, + { + "epoch": 2.82, + "learning_rate": 0.0005189679662490196, + "loss": 0.5351, + "step": 66780 + }, + { + "epoch": 2.82, + "learning_rate": 0.0005189255655197269, + "loss": 0.6992, + "step": 66790 + }, + { + "epoch": 2.82, + "learning_rate": 0.0005188831647904344, + "loss": 0.526, + "step": 66800 + }, + { + "epoch": 2.83, + "learning_rate": 0.000518840764061142, + "loss": 0.4734, + "step": 66810 + }, + { + "epoch": 2.83, + "learning_rate": 0.0005187983633318493, + "loss": 0.5802, + "step": 66820 + }, + { + "epoch": 2.83, + "learning_rate": 0.0005187559626025568, + "loss": 0.5292, + "step": 66830 + }, + { + "epoch": 2.83, + "learning_rate": 0.0005187135618732642, + "loss": 0.562, + "step": 66840 + }, + { + "epoch": 2.83, + "learning_rate": 0.0005186711611439717, + "loss": 0.5512, + "step": 66850 + }, + { + "epoch": 2.83, + "learning_rate": 0.0005186287604146792, + "loss": 0.6065, + "step": 66860 + }, + { + "epoch": 2.83, + "learning_rate": 0.0005185863596853866, + "loss": 0.6211, + "step": 66870 + }, + { + "epoch": 2.83, + "learning_rate": 0.000518543958956094, + "loss": 0.5817, + "step": 66880 + }, + { + "epoch": 2.83, + "learning_rate": 0.0005185015582268016, + "loss": 0.5376, + "step": 66890 + }, + { + "epoch": 2.83, + "learning_rate": 0.000518459157497509, + "loss": 0.6077, + "step": 66900 + }, + { + "epoch": 2.83, + "learning_rate": 0.0005184167567682164, + "loss": 0.5089, + "step": 66910 + }, + { + "epoch": 2.83, + "learning_rate": 0.0005183743560389239, + "loss": 0.6108, + "step": 66920 + }, + { + "epoch": 2.83, + "learning_rate": 0.0005183319553096313, + "loss": 0.5093, + "step": 66930 + }, + { + "epoch": 2.83, + "learning_rate": 0.0005182895545803388, + "loss": 0.5961, + "step": 66940 + }, + { + "epoch": 2.83, + "learning_rate": 0.0005182471538510463, + "loss": 0.5747, + "step": 66950 + }, + { + "epoch": 2.83, + "learning_rate": 0.0005182047531217537, + "loss": 0.6478, + "step": 66960 + }, + { + "epoch": 2.83, + "learning_rate": 0.0005181623523924612, + "loss": 0.4955, + "step": 66970 + }, + { + "epoch": 2.83, + "learning_rate": 0.0005181199516631687, + "loss": 0.6236, + "step": 66980 + }, + { + "epoch": 2.83, + "learning_rate": 0.000518077550933876, + "loss": 0.456, + "step": 66990 + }, + { + "epoch": 2.83, + "learning_rate": 0.0005180351502045835, + "loss": 0.5693, + "step": 67000 + }, + { + "epoch": 2.83, + "learning_rate": 0.0005179927494752911, + "loss": 0.4763, + "step": 67010 + }, + { + "epoch": 2.83, + "learning_rate": 0.0005179503487459984, + "loss": 0.552, + "step": 67020 + }, + { + "epoch": 2.83, + "learning_rate": 0.0005179079480167059, + "loss": 0.5903, + "step": 67030 + }, + { + "epoch": 2.84, + "learning_rate": 0.0005178655472874134, + "loss": 0.5042, + "step": 67040 + }, + { + "epoch": 2.84, + "learning_rate": 0.0005178231465581208, + "loss": 0.5885, + "step": 67050 + }, + { + "epoch": 2.84, + "learning_rate": 0.0005177807458288283, + "loss": 0.5115, + "step": 67060 + }, + { + "epoch": 2.84, + "learning_rate": 0.0005177383450995358, + "loss": 0.6079, + "step": 67070 + }, + { + "epoch": 2.84, + "learning_rate": 0.0005176959443702431, + "loss": 0.5623, + "step": 67080 + }, + { + "epoch": 2.84, + "learning_rate": 0.0005176535436409507, + "loss": 0.4898, + "step": 67090 + }, + { + "epoch": 2.84, + "learning_rate": 0.0005176111429116582, + "loss": 0.5075, + "step": 67100 + }, + { + "epoch": 2.84, + "learning_rate": 0.0005175687421823655, + "loss": 0.5514, + "step": 67110 + }, + { + "epoch": 2.84, + "learning_rate": 0.000517526341453073, + "loss": 0.5684, + "step": 67120 + }, + { + "epoch": 2.84, + "learning_rate": 0.0005174839407237805, + "loss": 0.5448, + "step": 67130 + }, + { + "epoch": 2.84, + "learning_rate": 0.0005174415399944879, + "loss": 0.5326, + "step": 67140 + }, + { + "epoch": 2.84, + "learning_rate": 0.0005173991392651954, + "loss": 0.5841, + "step": 67150 + }, + { + "epoch": 2.84, + "learning_rate": 0.0005173567385359029, + "loss": 0.6066, + "step": 67160 + }, + { + "epoch": 2.84, + "learning_rate": 0.0005173143378066103, + "loss": 0.5663, + "step": 67170 + }, + { + "epoch": 2.84, + "learning_rate": 0.0005172719370773178, + "loss": 0.5904, + "step": 67180 + }, + { + "epoch": 2.84, + "learning_rate": 0.0005172295363480253, + "loss": 0.4561, + "step": 67190 + }, + { + "epoch": 2.84, + "learning_rate": 0.0005171871356187326, + "loss": 0.509, + "step": 67200 + }, + { + "epoch": 2.84, + "learning_rate": 0.0005171447348894401, + "loss": 0.5424, + "step": 67210 + }, + { + "epoch": 2.84, + "learning_rate": 0.0005171023341601477, + "loss": 0.6063, + "step": 67220 + }, + { + "epoch": 2.84, + "learning_rate": 0.000517059933430855, + "loss": 0.5457, + "step": 67230 + }, + { + "epoch": 2.84, + "learning_rate": 0.0005170175327015625, + "loss": 0.5951, + "step": 67240 + }, + { + "epoch": 2.84, + "learning_rate": 0.00051697513197227, + "loss": 0.5199, + "step": 67250 + }, + { + "epoch": 2.84, + "learning_rate": 0.0005169327312429774, + "loss": 0.624, + "step": 67260 + }, + { + "epoch": 2.84, + "learning_rate": 0.0005168903305136849, + "loss": 0.5208, + "step": 67270 + }, + { + "epoch": 2.85, + "learning_rate": 0.0005168479297843923, + "loss": 0.5034, + "step": 67280 + }, + { + "epoch": 2.85, + "learning_rate": 0.0005168055290550997, + "loss": 0.5193, + "step": 67290 + }, + { + "epoch": 2.85, + "learning_rate": 0.0005167631283258073, + "loss": 0.5758, + "step": 67300 + }, + { + "epoch": 2.85, + "learning_rate": 0.0005167207275965146, + "loss": 0.4725, + "step": 67310 + }, + { + "epoch": 2.85, + "learning_rate": 0.0005166783268672221, + "loss": 0.5471, + "step": 67320 + }, + { + "epoch": 2.85, + "learning_rate": 0.0005166359261379296, + "loss": 0.5486, + "step": 67330 + }, + { + "epoch": 2.85, + "learning_rate": 0.000516593525408637, + "loss": 0.5003, + "step": 67340 + }, + { + "epoch": 2.85, + "learning_rate": 0.0005165511246793445, + "loss": 0.6004, + "step": 67350 + }, + { + "epoch": 2.85, + "learning_rate": 0.000516508723950052, + "loss": 0.5643, + "step": 67360 + }, + { + "epoch": 2.85, + "learning_rate": 0.0005164663232207593, + "loss": 0.5329, + "step": 67370 + }, + { + "epoch": 2.85, + "learning_rate": 0.0005164239224914669, + "loss": 0.6411, + "step": 67380 + }, + { + "epoch": 2.85, + "learning_rate": 0.0005163815217621744, + "loss": 0.5444, + "step": 67390 + }, + { + "epoch": 2.85, + "learning_rate": 0.0005163391210328817, + "loss": 0.5373, + "step": 67400 + }, + { + "epoch": 2.85, + "learning_rate": 0.0005162967203035892, + "loss": 0.5599, + "step": 67410 + }, + { + "epoch": 2.85, + "learning_rate": 0.0005162543195742968, + "loss": 0.6055, + "step": 67420 + }, + { + "epoch": 2.85, + "learning_rate": 0.0005162119188450041, + "loss": 0.4969, + "step": 67430 + }, + { + "epoch": 2.85, + "learning_rate": 0.0005161695181157116, + "loss": 0.5403, + "step": 67440 + }, + { + "epoch": 2.85, + "learning_rate": 0.0005161271173864191, + "loss": 0.5353, + "step": 67450 + }, + { + "epoch": 2.85, + "learning_rate": 0.0005160847166571265, + "loss": 0.5192, + "step": 67460 + }, + { + "epoch": 2.85, + "learning_rate": 0.000516042315927834, + "loss": 0.6583, + "step": 67470 + }, + { + "epoch": 2.85, + "learning_rate": 0.0005159999151985415, + "loss": 0.5567, + "step": 67480 + }, + { + "epoch": 2.85, + "learning_rate": 0.0005159575144692488, + "loss": 0.5318, + "step": 67490 + }, + { + "epoch": 2.85, + "learning_rate": 0.0005159151137399564, + "loss": 0.5619, + "step": 67500 + }, + { + "epoch": 2.85, + "learning_rate": 0.0005158727130106639, + "loss": 0.4847, + "step": 67510 + }, + { + "epoch": 2.86, + "learning_rate": 0.0005158303122813712, + "loss": 0.5692, + "step": 67520 + }, + { + "epoch": 2.86, + "learning_rate": 0.0005157879115520787, + "loss": 0.5499, + "step": 67530 + }, + { + "epoch": 2.86, + "learning_rate": 0.0005157455108227863, + "loss": 0.4291, + "step": 67540 + }, + { + "epoch": 2.86, + "learning_rate": 0.0005157031100934936, + "loss": 0.6016, + "step": 67550 + }, + { + "epoch": 2.86, + "learning_rate": 0.0005156607093642011, + "loss": 0.5498, + "step": 67560 + }, + { + "epoch": 2.86, + "learning_rate": 0.0005156183086349086, + "loss": 0.5082, + "step": 67570 + }, + { + "epoch": 2.86, + "learning_rate": 0.000515575907905616, + "loss": 0.6166, + "step": 67580 + }, + { + "epoch": 2.86, + "learning_rate": 0.0005155335071763235, + "loss": 0.5829, + "step": 67590 + }, + { + "epoch": 2.86, + "learning_rate": 0.0005154911064470309, + "loss": 0.5501, + "step": 67600 + }, + { + "epoch": 2.86, + "learning_rate": 0.0005154487057177383, + "loss": 0.5623, + "step": 67610 + }, + { + "epoch": 2.86, + "learning_rate": 0.0005154063049884459, + "loss": 0.4963, + "step": 67620 + }, + { + "epoch": 2.86, + "learning_rate": 0.0005153639042591533, + "loss": 0.4252, + "step": 67630 + }, + { + "epoch": 2.86, + "learning_rate": 0.0005153215035298607, + "loss": 0.4916, + "step": 67640 + }, + { + "epoch": 2.86, + "learning_rate": 0.0005152791028005682, + "loss": 0.6202, + "step": 67650 + }, + { + "epoch": 2.86, + "learning_rate": 0.0005152367020712757, + "loss": 0.4733, + "step": 67660 + }, + { + "epoch": 2.86, + "learning_rate": 0.0005151943013419831, + "loss": 0.5469, + "step": 67670 + }, + { + "epoch": 2.86, + "learning_rate": 0.0005151519006126906, + "loss": 0.5301, + "step": 67680 + }, + { + "epoch": 2.86, + "learning_rate": 0.0005151094998833979, + "loss": 0.5802, + "step": 67690 + }, + { + "epoch": 2.86, + "learning_rate": 0.0005150670991541055, + "loss": 0.5928, + "step": 67700 + }, + { + "epoch": 2.86, + "learning_rate": 0.000515024698424813, + "loss": 0.5171, + "step": 67710 + }, + { + "epoch": 2.86, + "learning_rate": 0.0005149822976955203, + "loss": 0.5104, + "step": 67720 + }, + { + "epoch": 2.86, + "learning_rate": 0.0005149398969662278, + "loss": 0.4601, + "step": 67730 + }, + { + "epoch": 2.86, + "learning_rate": 0.0005148974962369353, + "loss": 0.5395, + "step": 67740 + }, + { + "epoch": 2.87, + "learning_rate": 0.0005148550955076427, + "loss": 0.5057, + "step": 67750 + }, + { + "epoch": 2.87, + "learning_rate": 0.0005148126947783502, + "loss": 0.5907, + "step": 67760 + }, + { + "epoch": 2.87, + "learning_rate": 0.0005147702940490577, + "loss": 0.5525, + "step": 67770 + }, + { + "epoch": 2.87, + "learning_rate": 0.0005147278933197651, + "loss": 0.5272, + "step": 67780 + }, + { + "epoch": 2.87, + "learning_rate": 0.0005146854925904726, + "loss": 0.6282, + "step": 67790 + }, + { + "epoch": 2.87, + "learning_rate": 0.0005146430918611801, + "loss": 0.5243, + "step": 67800 + }, + { + "epoch": 2.87, + "learning_rate": 0.0005146006911318874, + "loss": 0.5145, + "step": 67810 + }, + { + "epoch": 2.87, + "learning_rate": 0.0005145582904025949, + "loss": 0.5837, + "step": 67820 + }, + { + "epoch": 2.87, + "learning_rate": 0.0005145158896733025, + "loss": 0.5913, + "step": 67830 + }, + { + "epoch": 2.87, + "learning_rate": 0.0005144734889440098, + "loss": 0.5579, + "step": 67840 + }, + { + "epoch": 2.87, + "learning_rate": 0.0005144310882147173, + "loss": 0.5843, + "step": 67850 + }, + { + "epoch": 2.87, + "learning_rate": 0.0005143886874854248, + "loss": 0.6117, + "step": 67860 + }, + { + "epoch": 2.87, + "learning_rate": 0.0005143462867561322, + "loss": 0.5865, + "step": 67870 + }, + { + "epoch": 2.87, + "learning_rate": 0.0005143038860268397, + "loss": 0.6213, + "step": 67880 + }, + { + "epoch": 2.87, + "learning_rate": 0.0005142614852975472, + "loss": 0.5766, + "step": 67890 + }, + { + "epoch": 2.87, + "learning_rate": 0.0005142190845682545, + "loss": 0.5452, + "step": 67900 + }, + { + "epoch": 2.87, + "learning_rate": 0.0005141766838389621, + "loss": 0.4795, + "step": 67910 + }, + { + "epoch": 2.87, + "learning_rate": 0.0005141342831096695, + "loss": 0.5963, + "step": 67920 + }, + { + "epoch": 2.87, + "learning_rate": 0.0005140918823803769, + "loss": 0.6336, + "step": 67930 + }, + { + "epoch": 2.87, + "learning_rate": 0.0005140494816510844, + "loss": 0.5375, + "step": 67940 + }, + { + "epoch": 2.87, + "learning_rate": 0.0005140070809217919, + "loss": 0.4748, + "step": 67950 + }, + { + "epoch": 2.87, + "learning_rate": 0.0005139646801924993, + "loss": 0.4954, + "step": 67960 + }, + { + "epoch": 2.87, + "learning_rate": 0.0005139222794632068, + "loss": 0.5391, + "step": 67970 + }, + { + "epoch": 2.87, + "learning_rate": 0.0005138798787339143, + "loss": 0.5234, + "step": 67980 + }, + { + "epoch": 2.88, + "learning_rate": 0.0005138374780046217, + "loss": 0.5253, + "step": 67990 + }, + { + "epoch": 2.88, + "learning_rate": 0.0005137950772753292, + "loss": 0.5545, + "step": 68000 + }, + { + "epoch": 2.88, + "learning_rate": 0.0005137526765460366, + "loss": 0.5537, + "step": 68010 + }, + { + "epoch": 2.88, + "learning_rate": 0.000513710275816744, + "loss": 0.5281, + "step": 68020 + }, + { + "epoch": 2.88, + "learning_rate": 0.0005136678750874516, + "loss": 0.5674, + "step": 68030 + }, + { + "epoch": 2.88, + "learning_rate": 0.000513625474358159, + "loss": 0.4665, + "step": 68040 + }, + { + "epoch": 2.88, + "learning_rate": 0.0005135830736288664, + "loss": 0.5962, + "step": 68050 + }, + { + "epoch": 2.88, + "learning_rate": 0.0005135406728995739, + "loss": 0.539, + "step": 68060 + }, + { + "epoch": 2.88, + "learning_rate": 0.0005134982721702814, + "loss": 0.4893, + "step": 68070 + }, + { + "epoch": 2.88, + "learning_rate": 0.0005134558714409888, + "loss": 0.6401, + "step": 68080 + }, + { + "epoch": 2.88, + "learning_rate": 0.0005134134707116963, + "loss": 0.6025, + "step": 68090 + }, + { + "epoch": 2.88, + "learning_rate": 0.0005133710699824036, + "loss": 0.5638, + "step": 68100 + }, + { + "epoch": 2.88, + "learning_rate": 0.0005133286692531112, + "loss": 0.6303, + "step": 68110 + }, + { + "epoch": 2.88, + "learning_rate": 0.0005132862685238187, + "loss": 0.4704, + "step": 68120 + }, + { + "epoch": 2.88, + "learning_rate": 0.000513243867794526, + "loss": 0.5518, + "step": 68130 + }, + { + "epoch": 2.88, + "learning_rate": 0.0005132014670652335, + "loss": 0.5557, + "step": 68140 + }, + { + "epoch": 2.88, + "learning_rate": 0.0005131590663359411, + "loss": 0.5744, + "step": 68150 + }, + { + "epoch": 2.88, + "learning_rate": 0.0005131166656066484, + "loss": 0.5317, + "step": 68160 + }, + { + "epoch": 2.88, + "learning_rate": 0.0005130742648773559, + "loss": 0.5243, + "step": 68170 + }, + { + "epoch": 2.88, + "learning_rate": 0.0005130318641480634, + "loss": 0.6434, + "step": 68180 + }, + { + "epoch": 2.88, + "learning_rate": 0.0005129894634187709, + "loss": 0.6225, + "step": 68190 + }, + { + "epoch": 2.88, + "learning_rate": 0.0005129470626894783, + "loss": 0.4556, + "step": 68200 + }, + { + "epoch": 2.88, + "learning_rate": 0.0005129046619601858, + "loss": 0.553, + "step": 68210 + }, + { + "epoch": 2.88, + "learning_rate": 0.0005128622612308931, + "loss": 0.5699, + "step": 68220 + }, + { + "epoch": 2.89, + "learning_rate": 0.0005128198605016007, + "loss": 0.5654, + "step": 68230 + }, + { + "epoch": 2.89, + "learning_rate": 0.0005127774597723081, + "loss": 0.57, + "step": 68240 + }, + { + "epoch": 2.89, + "learning_rate": 0.0005127350590430155, + "loss": 0.5919, + "step": 68250 + }, + { + "epoch": 2.89, + "learning_rate": 0.000512692658313723, + "loss": 0.5484, + "step": 68260 + }, + { + "epoch": 2.89, + "learning_rate": 0.0005126502575844305, + "loss": 0.5569, + "step": 68270 + }, + { + "epoch": 2.89, + "learning_rate": 0.0005126078568551379, + "loss": 0.5497, + "step": 68280 + }, + { + "epoch": 2.89, + "learning_rate": 0.0005125654561258454, + "loss": 0.5142, + "step": 68290 + }, + { + "epoch": 2.89, + "learning_rate": 0.0005125230553965528, + "loss": 0.535, + "step": 68300 + }, + { + "epoch": 2.89, + "learning_rate": 0.0005124806546672603, + "loss": 0.6414, + "step": 68310 + }, + { + "epoch": 2.89, + "learning_rate": 0.0005124382539379678, + "loss": 0.5935, + "step": 68320 + }, + { + "epoch": 2.89, + "learning_rate": 0.0005123958532086752, + "loss": 0.5595, + "step": 68330 + }, + { + "epoch": 2.89, + "learning_rate": 0.0005123534524793826, + "loss": 0.5338, + "step": 68340 + }, + { + "epoch": 2.89, + "learning_rate": 0.0005123110517500901, + "loss": 0.6319, + "step": 68350 + }, + { + "epoch": 2.89, + "learning_rate": 0.0005122686510207976, + "loss": 0.592, + "step": 68360 + }, + { + "epoch": 2.89, + "learning_rate": 0.000512226250291505, + "loss": 0.5381, + "step": 68370 + }, + { + "epoch": 2.89, + "learning_rate": 0.0005121838495622125, + "loss": 0.5511, + "step": 68380 + }, + { + "epoch": 2.89, + "learning_rate": 0.00051214144883292, + "loss": 0.6523, + "step": 68390 + }, + { + "epoch": 2.89, + "learning_rate": 0.0005120990481036274, + "loss": 0.5386, + "step": 68400 + }, + { + "epoch": 2.89, + "learning_rate": 0.0005120566473743349, + "loss": 0.5265, + "step": 68410 + }, + { + "epoch": 2.89, + "learning_rate": 0.0005120142466450423, + "loss": 0.5652, + "step": 68420 + }, + { + "epoch": 2.89, + "learning_rate": 0.0005119718459157497, + "loss": 0.5222, + "step": 68430 + }, + { + "epoch": 2.89, + "learning_rate": 0.0005119294451864573, + "loss": 0.5012, + "step": 68440 + }, + { + "epoch": 2.89, + "learning_rate": 0.0005118870444571647, + "loss": 0.6505, + "step": 68450 + }, + { + "epoch": 2.9, + "learning_rate": 0.0005118446437278721, + "loss": 0.6113, + "step": 68460 + }, + { + "epoch": 2.9, + "learning_rate": 0.0005118022429985796, + "loss": 0.5286, + "step": 68470 + }, + { + "epoch": 2.9, + "learning_rate": 0.0005117598422692871, + "loss": 0.5892, + "step": 68480 + }, + { + "epoch": 2.9, + "learning_rate": 0.0005117174415399945, + "loss": 0.5588, + "step": 68490 + }, + { + "epoch": 2.9, + "learning_rate": 0.000511675040810702, + "loss": 0.6046, + "step": 68500 + }, + { + "epoch": 2.9, + "learning_rate": 0.0005116326400814095, + "loss": 0.5091, + "step": 68510 + }, + { + "epoch": 2.9, + "learning_rate": 0.0005115902393521169, + "loss": 0.5746, + "step": 68520 + }, + { + "epoch": 2.9, + "learning_rate": 0.0005115478386228243, + "loss": 0.6003, + "step": 68530 + }, + { + "epoch": 2.9, + "learning_rate": 0.0005115054378935318, + "loss": 0.5846, + "step": 68540 + }, + { + "epoch": 2.9, + "learning_rate": 0.0005114630371642392, + "loss": 0.4922, + "step": 68550 + }, + { + "epoch": 2.9, + "learning_rate": 0.0005114206364349467, + "loss": 0.6708, + "step": 68560 + }, + { + "epoch": 2.9, + "learning_rate": 0.0005113782357056542, + "loss": 0.5909, + "step": 68570 + }, + { + "epoch": 2.9, + "learning_rate": 0.0005113358349763616, + "loss": 0.5713, + "step": 68580 + }, + { + "epoch": 2.9, + "learning_rate": 0.0005112934342470691, + "loss": 0.5949, + "step": 68590 + }, + { + "epoch": 2.9, + "learning_rate": 0.0005112510335177766, + "loss": 0.6228, + "step": 68600 + }, + { + "epoch": 2.9, + "learning_rate": 0.000511208632788484, + "loss": 0.4823, + "step": 68610 + }, + { + "epoch": 2.9, + "learning_rate": 0.0005111662320591914, + "loss": 0.5546, + "step": 68620 + }, + { + "epoch": 2.9, + "learning_rate": 0.0005111238313298988, + "loss": 0.5948, + "step": 68630 + }, + { + "epoch": 2.9, + "learning_rate": 0.0005110814306006064, + "loss": 0.6143, + "step": 68640 + }, + { + "epoch": 2.9, + "learning_rate": 0.0005110390298713138, + "loss": 0.4605, + "step": 68650 + }, + { + "epoch": 2.9, + "learning_rate": 0.0005109966291420212, + "loss": 0.5536, + "step": 68660 + }, + { + "epoch": 2.9, + "learning_rate": 0.0005109542284127287, + "loss": 0.6364, + "step": 68670 + }, + { + "epoch": 2.9, + "learning_rate": 0.0005109118276834362, + "loss": 0.4947, + "step": 68680 + }, + { + "epoch": 2.9, + "learning_rate": 0.0005108694269541436, + "loss": 0.6201, + "step": 68690 + }, + { + "epoch": 2.91, + "learning_rate": 0.0005108270262248511, + "loss": 0.4569, + "step": 68700 + }, + { + "epoch": 2.91, + "learning_rate": 0.0005107846254955585, + "loss": 0.5128, + "step": 68710 + }, + { + "epoch": 2.91, + "learning_rate": 0.000510742224766266, + "loss": 0.4574, + "step": 68720 + }, + { + "epoch": 2.91, + "learning_rate": 0.0005106998240369735, + "loss": 0.5286, + "step": 68730 + }, + { + "epoch": 2.91, + "learning_rate": 0.0005106574233076809, + "loss": 0.6162, + "step": 68740 + }, + { + "epoch": 2.91, + "learning_rate": 0.0005106150225783883, + "loss": 0.5695, + "step": 68750 + }, + { + "epoch": 2.91, + "learning_rate": 0.0005105726218490959, + "loss": 0.5295, + "step": 68760 + }, + { + "epoch": 2.91, + "learning_rate": 0.0005105302211198033, + "loss": 0.5546, + "step": 68770 + }, + { + "epoch": 2.91, + "learning_rate": 0.0005104878203905107, + "loss": 0.5193, + "step": 68780 + }, + { + "epoch": 2.91, + "learning_rate": 0.0005104454196612182, + "loss": 0.5785, + "step": 68790 + }, + { + "epoch": 2.91, + "learning_rate": 0.0005104030189319257, + "loss": 0.5567, + "step": 68800 + }, + { + "epoch": 2.91, + "learning_rate": 0.0005103606182026331, + "loss": 0.5293, + "step": 68810 + }, + { + "epoch": 2.91, + "learning_rate": 0.0005103182174733406, + "loss": 0.6194, + "step": 68820 + }, + { + "epoch": 2.91, + "learning_rate": 0.000510275816744048, + "loss": 0.5798, + "step": 68830 + }, + { + "epoch": 2.91, + "learning_rate": 0.0005102334160147555, + "loss": 0.4691, + "step": 68840 + }, + { + "epoch": 2.91, + "learning_rate": 0.0005101910152854629, + "loss": 0.5981, + "step": 68850 + }, + { + "epoch": 2.91, + "learning_rate": 0.0005101486145561704, + "loss": 0.5088, + "step": 68860 + }, + { + "epoch": 2.91, + "learning_rate": 0.0005101062138268778, + "loss": 0.4538, + "step": 68870 + }, + { + "epoch": 2.91, + "learning_rate": 0.0005100638130975853, + "loss": 0.5106, + "step": 68880 + }, + { + "epoch": 2.91, + "learning_rate": 0.0005100214123682928, + "loss": 0.616, + "step": 68890 + }, + { + "epoch": 2.91, + "learning_rate": 0.0005099790116390002, + "loss": 0.6236, + "step": 68900 + }, + { + "epoch": 2.91, + "learning_rate": 0.0005099366109097076, + "loss": 0.5243, + "step": 68910 + }, + { + "epoch": 2.91, + "learning_rate": 0.0005098942101804152, + "loss": 0.4851, + "step": 68920 + }, + { + "epoch": 2.91, + "learning_rate": 0.0005098518094511226, + "loss": 0.6389, + "step": 68930 + }, + { + "epoch": 2.92, + "learning_rate": 0.00050980940872183, + "loss": 0.6198, + "step": 68940 + }, + { + "epoch": 2.92, + "learning_rate": 0.0005097670079925375, + "loss": 0.6395, + "step": 68950 + }, + { + "epoch": 2.92, + "learning_rate": 0.000509724607263245, + "loss": 0.5233, + "step": 68960 + }, + { + "epoch": 2.92, + "learning_rate": 0.0005096822065339524, + "loss": 0.5446, + "step": 68970 + }, + { + "epoch": 2.92, + "learning_rate": 0.0005096398058046599, + "loss": 0.608, + "step": 68980 + }, + { + "epoch": 2.92, + "learning_rate": 0.0005095974050753673, + "loss": 0.4683, + "step": 68990 + }, + { + "epoch": 2.92, + "learning_rate": 0.0005095550043460748, + "loss": 0.5657, + "step": 69000 + }, + { + "epoch": 2.92, + "learning_rate": 0.0005095126036167823, + "loss": 0.5576, + "step": 69010 + }, + { + "epoch": 2.92, + "learning_rate": 0.0005094702028874897, + "loss": 0.6137, + "step": 69020 + }, + { + "epoch": 2.92, + "learning_rate": 0.0005094278021581971, + "loss": 0.5629, + "step": 69030 + }, + { + "epoch": 2.92, + "learning_rate": 0.0005093854014289047, + "loss": 0.5643, + "step": 69040 + }, + { + "epoch": 2.92, + "learning_rate": 0.0005093430006996121, + "loss": 0.6008, + "step": 69050 + }, + { + "epoch": 2.92, + "learning_rate": 0.0005093005999703195, + "loss": 0.565, + "step": 69060 + }, + { + "epoch": 2.92, + "learning_rate": 0.000509258199241027, + "loss": 0.5376, + "step": 69070 + }, + { + "epoch": 2.92, + "learning_rate": 0.0005092157985117344, + "loss": 0.6281, + "step": 69080 + }, + { + "epoch": 2.92, + "learning_rate": 0.0005091733977824419, + "loss": 0.5168, + "step": 69090 + }, + { + "epoch": 2.92, + "learning_rate": 0.0005091309970531494, + "loss": 0.5619, + "step": 69100 + }, + { + "epoch": 2.92, + "learning_rate": 0.0005090885963238568, + "loss": 0.6353, + "step": 69110 + }, + { + "epoch": 2.92, + "learning_rate": 0.0005090461955945643, + "loss": 0.5902, + "step": 69120 + }, + { + "epoch": 2.92, + "learning_rate": 0.0005090037948652718, + "loss": 0.6062, + "step": 69130 + }, + { + "epoch": 2.92, + "learning_rate": 0.0005089613941359791, + "loss": 0.5282, + "step": 69140 + }, + { + "epoch": 2.92, + "learning_rate": 0.0005089189934066866, + "loss": 0.5772, + "step": 69150 + }, + { + "epoch": 2.92, + "learning_rate": 0.000508876592677394, + "loss": 0.569, + "step": 69160 + }, + { + "epoch": 2.93, + "learning_rate": 0.0005088341919481015, + "loss": 0.4475, + "step": 69170 + }, + { + "epoch": 2.93, + "learning_rate": 0.000508791791218809, + "loss": 0.6095, + "step": 69180 + }, + { + "epoch": 2.93, + "learning_rate": 0.0005087493904895164, + "loss": 0.5579, + "step": 69190 + }, + { + "epoch": 2.93, + "learning_rate": 0.0005087069897602239, + "loss": 0.6069, + "step": 69200 + }, + { + "epoch": 2.93, + "learning_rate": 0.0005086645890309314, + "loss": 0.5828, + "step": 69210 + }, + { + "epoch": 2.93, + "learning_rate": 0.0005086221883016388, + "loss": 0.5043, + "step": 69220 + }, + { + "epoch": 2.93, + "learning_rate": 0.0005085797875723462, + "loss": 0.4889, + "step": 69230 + }, + { + "epoch": 2.93, + "learning_rate": 0.0005085373868430537, + "loss": 0.5201, + "step": 69240 + }, + { + "epoch": 2.93, + "learning_rate": 0.0005084949861137612, + "loss": 0.476, + "step": 69250 + }, + { + "epoch": 2.93, + "learning_rate": 0.0005084525853844686, + "loss": 0.7554, + "step": 69260 + }, + { + "epoch": 2.93, + "learning_rate": 0.0005084101846551761, + "loss": 0.5318, + "step": 69270 + }, + { + "epoch": 2.93, + "learning_rate": 0.0005083677839258835, + "loss": 0.5711, + "step": 69280 + }, + { + "epoch": 2.93, + "learning_rate": 0.000508325383196591, + "loss": 0.6282, + "step": 69290 + }, + { + "epoch": 2.93, + "learning_rate": 0.0005082829824672985, + "loss": 0.5627, + "step": 69300 + }, + { + "epoch": 2.93, + "learning_rate": 0.0005082405817380059, + "loss": 0.5577, + "step": 69310 + }, + { + "epoch": 2.93, + "learning_rate": 0.0005081981810087133, + "loss": 0.564, + "step": 69320 + }, + { + "epoch": 2.93, + "learning_rate": 0.0005081557802794209, + "loss": 0.5482, + "step": 69330 + }, + { + "epoch": 2.93, + "learning_rate": 0.0005081133795501283, + "loss": 0.5424, + "step": 69340 + }, + { + "epoch": 2.93, + "learning_rate": 0.0005080709788208357, + "loss": 0.6234, + "step": 69350 + }, + { + "epoch": 2.93, + "learning_rate": 0.0005080285780915432, + "loss": 0.5924, + "step": 69360 + }, + { + "epoch": 2.93, + "learning_rate": 0.0005079861773622507, + "loss": 0.5763, + "step": 69370 + }, + { + "epoch": 2.93, + "learning_rate": 0.0005079437766329581, + "loss": 0.5542, + "step": 69380 + }, + { + "epoch": 2.93, + "learning_rate": 0.0005079013759036656, + "loss": 0.5198, + "step": 69390 + }, + { + "epoch": 2.93, + "learning_rate": 0.000507858975174373, + "loss": 0.5589, + "step": 69400 + }, + { + "epoch": 2.94, + "learning_rate": 0.0005078165744450805, + "loss": 0.521, + "step": 69410 + }, + { + "epoch": 2.94, + "learning_rate": 0.000507774173715788, + "loss": 0.5909, + "step": 69420 + }, + { + "epoch": 2.94, + "learning_rate": 0.0005077317729864954, + "loss": 0.6408, + "step": 69430 + }, + { + "epoch": 2.94, + "learning_rate": 0.0005076893722572028, + "loss": 0.543, + "step": 69440 + }, + { + "epoch": 2.94, + "learning_rate": 0.0005076469715279104, + "loss": 0.5368, + "step": 69450 + }, + { + "epoch": 2.94, + "learning_rate": 0.0005076045707986177, + "loss": 0.5345, + "step": 69460 + }, + { + "epoch": 2.94, + "learning_rate": 0.0005075621700693252, + "loss": 0.547, + "step": 69470 + }, + { + "epoch": 2.94, + "learning_rate": 0.0005075197693400327, + "loss": 0.6726, + "step": 69480 + }, + { + "epoch": 2.94, + "learning_rate": 0.0005074773686107401, + "loss": 0.5795, + "step": 69490 + }, + { + "epoch": 2.94, + "learning_rate": 0.0005074349678814476, + "loss": 0.5378, + "step": 69500 + }, + { + "epoch": 2.94, + "learning_rate": 0.0005073925671521551, + "loss": 0.5856, + "step": 69510 + }, + { + "epoch": 2.94, + "learning_rate": 0.0005073501664228624, + "loss": 0.5097, + "step": 69520 + }, + { + "epoch": 2.94, + "learning_rate": 0.00050730776569357, + "loss": 0.5611, + "step": 69530 + }, + { + "epoch": 2.94, + "learning_rate": 0.0005072653649642775, + "loss": 0.5751, + "step": 69540 + }, + { + "epoch": 2.94, + "learning_rate": 0.0005072229642349848, + "loss": 0.6031, + "step": 69550 + }, + { + "epoch": 2.94, + "learning_rate": 0.0005071805635056923, + "loss": 0.4923, + "step": 69560 + }, + { + "epoch": 2.94, + "learning_rate": 0.0005071381627763999, + "loss": 0.5571, + "step": 69570 + }, + { + "epoch": 2.94, + "learning_rate": 0.0005070957620471072, + "loss": 0.5097, + "step": 69580 + }, + { + "epoch": 2.94, + "learning_rate": 0.0005070533613178147, + "loss": 0.6107, + "step": 69590 + }, + { + "epoch": 2.94, + "learning_rate": 0.0005070109605885221, + "loss": 0.556, + "step": 69600 + }, + { + "epoch": 2.94, + "learning_rate": 0.0005069685598592296, + "loss": 0.5025, + "step": 69610 + }, + { + "epoch": 2.94, + "learning_rate": 0.0005069261591299371, + "loss": 0.5669, + "step": 69620 + }, + { + "epoch": 2.94, + "learning_rate": 0.0005068837584006445, + "loss": 0.645, + "step": 69630 + }, + { + "epoch": 2.94, + "learning_rate": 0.0005068413576713519, + "loss": 0.5799, + "step": 69640 + }, + { + "epoch": 2.95, + "learning_rate": 0.0005067989569420595, + "loss": 0.5412, + "step": 69650 + }, + { + "epoch": 2.95, + "learning_rate": 0.000506756556212767, + "loss": 0.5652, + "step": 69660 + }, + { + "epoch": 2.95, + "learning_rate": 0.0005067141554834743, + "loss": 0.586, + "step": 69670 + }, + { + "epoch": 2.95, + "learning_rate": 0.0005066717547541818, + "loss": 0.6181, + "step": 69680 + }, + { + "epoch": 2.95, + "learning_rate": 0.0005066293540248892, + "loss": 0.5531, + "step": 69690 + }, + { + "epoch": 2.95, + "learning_rate": 0.0005065869532955967, + "loss": 0.5551, + "step": 69700 + }, + { + "epoch": 2.95, + "learning_rate": 0.0005065445525663042, + "loss": 0.586, + "step": 69710 + }, + { + "epoch": 2.95, + "learning_rate": 0.0005065021518370116, + "loss": 0.5598, + "step": 69720 + }, + { + "epoch": 2.95, + "learning_rate": 0.0005064597511077191, + "loss": 0.575, + "step": 69730 + }, + { + "epoch": 2.95, + "learning_rate": 0.0005064173503784266, + "loss": 0.4883, + "step": 69740 + }, + { + "epoch": 2.95, + "learning_rate": 0.0005063749496491339, + "loss": 0.593, + "step": 69750 + }, + { + "epoch": 2.95, + "learning_rate": 0.0005063325489198414, + "loss": 0.527, + "step": 69760 + }, + { + "epoch": 2.95, + "learning_rate": 0.0005062901481905489, + "loss": 0.5661, + "step": 69770 + }, + { + "epoch": 2.95, + "learning_rate": 0.0005062477474612563, + "loss": 0.522, + "step": 69780 + }, + { + "epoch": 2.95, + "learning_rate": 0.0005062053467319638, + "loss": 0.5918, + "step": 69790 + }, + { + "epoch": 2.95, + "learning_rate": 0.0005061629460026713, + "loss": 0.5937, + "step": 69800 + }, + { + "epoch": 2.95, + "learning_rate": 0.0005061205452733787, + "loss": 0.4222, + "step": 69810 + }, + { + "epoch": 2.95, + "learning_rate": 0.0005060781445440862, + "loss": 0.5006, + "step": 69820 + }, + { + "epoch": 2.95, + "learning_rate": 0.0005060357438147937, + "loss": 0.5573, + "step": 69830 + }, + { + "epoch": 2.95, + "learning_rate": 0.000505993343085501, + "loss": 0.5724, + "step": 69840 + }, + { + "epoch": 2.95, + "learning_rate": 0.0005059509423562085, + "loss": 0.4367, + "step": 69850 + }, + { + "epoch": 2.95, + "learning_rate": 0.0005059085416269161, + "loss": 0.5215, + "step": 69860 + }, + { + "epoch": 2.95, + "learning_rate": 0.0005058661408976234, + "loss": 0.4401, + "step": 69870 + }, + { + "epoch": 2.96, + "learning_rate": 0.0005058237401683309, + "loss": 0.4991, + "step": 69880 + }, + { + "epoch": 2.96, + "learning_rate": 0.0005057813394390384, + "loss": 0.5107, + "step": 69890 + }, + { + "epoch": 2.96, + "learning_rate": 0.0005057389387097458, + "loss": 0.5603, + "step": 69900 + }, + { + "epoch": 2.96, + "learning_rate": 0.0005056965379804533, + "loss": 0.5796, + "step": 69910 + }, + { + "epoch": 2.96, + "learning_rate": 0.0005056541372511608, + "loss": 0.4495, + "step": 69920 + }, + { + "epoch": 2.96, + "learning_rate": 0.0005056117365218681, + "loss": 0.6021, + "step": 69930 + }, + { + "epoch": 2.96, + "learning_rate": 0.0005055693357925757, + "loss": 0.5264, + "step": 69940 + }, + { + "epoch": 2.96, + "learning_rate": 0.0005055269350632832, + "loss": 0.6888, + "step": 69950 + }, + { + "epoch": 2.96, + "learning_rate": 0.0005054845343339905, + "loss": 0.5547, + "step": 69960 + }, + { + "epoch": 2.96, + "learning_rate": 0.000505442133604698, + "loss": 0.5121, + "step": 69970 + }, + { + "epoch": 2.96, + "learning_rate": 0.0005053997328754056, + "loss": 0.582, + "step": 69980 + }, + { + "epoch": 2.96, + "learning_rate": 0.0005053573321461129, + "loss": 0.5956, + "step": 69990 + }, + { + "epoch": 2.96, + "learning_rate": 0.0005053149314168204, + "loss": 0.5408, + "step": 70000 + }, + { + "epoch": 2.96, + "eval_loss": 0.6053586602210999, + "eval_runtime": 337.729, + "eval_samples_per_second": 15.56, + "eval_steps_per_second": 3.891, + "step": 70000 + }, + { + "epoch": 2.96, + "learning_rate": 0.0005052725306875279, + "loss": 0.5125, + "step": 70010 + }, + { + "epoch": 2.96, + "learning_rate": 0.0005052301299582353, + "loss": 0.5051, + "step": 70020 + }, + { + "epoch": 2.96, + "learning_rate": 0.0005051877292289428, + "loss": 0.5172, + "step": 70030 + }, + { + "epoch": 2.96, + "learning_rate": 0.0005051453284996503, + "loss": 0.5293, + "step": 70040 + }, + { + "epoch": 2.96, + "learning_rate": 0.0005051029277703576, + "loss": 0.6299, + "step": 70050 + }, + { + "epoch": 2.96, + "learning_rate": 0.0005050605270410652, + "loss": 0.5143, + "step": 70060 + }, + { + "epoch": 2.96, + "learning_rate": 0.0005050181263117725, + "loss": 0.5827, + "step": 70070 + }, + { + "epoch": 2.96, + "learning_rate": 0.00050497572558248, + "loss": 0.5883, + "step": 70080 + }, + { + "epoch": 2.96, + "learning_rate": 0.0005049333248531875, + "loss": 0.5293, + "step": 70090 + }, + { + "epoch": 2.96, + "learning_rate": 0.000504890924123895, + "loss": 0.5464, + "step": 70100 + }, + { + "epoch": 2.96, + "learning_rate": 0.0005048485233946024, + "loss": 0.6002, + "step": 70110 + }, + { + "epoch": 2.97, + "learning_rate": 0.0005048061226653099, + "loss": 0.7026, + "step": 70120 + }, + { + "epoch": 2.97, + "learning_rate": 0.0005047637219360172, + "loss": 0.4571, + "step": 70130 + }, + { + "epoch": 2.97, + "learning_rate": 0.0005047213212067248, + "loss": 0.6722, + "step": 70140 + }, + { + "epoch": 2.97, + "learning_rate": 0.0005046789204774323, + "loss": 0.6154, + "step": 70150 + }, + { + "epoch": 2.97, + "learning_rate": 0.0005046365197481396, + "loss": 0.5586, + "step": 70160 + }, + { + "epoch": 2.97, + "learning_rate": 0.0005045941190188471, + "loss": 0.5023, + "step": 70170 + }, + { + "epoch": 2.97, + "learning_rate": 0.0005045517182895547, + "loss": 0.5799, + "step": 70180 + }, + { + "epoch": 2.97, + "learning_rate": 0.000504509317560262, + "loss": 0.5724, + "step": 70190 + }, + { + "epoch": 2.97, + "learning_rate": 0.0005044669168309695, + "loss": 0.514, + "step": 70200 + }, + { + "epoch": 2.97, + "learning_rate": 0.000504424516101677, + "loss": 0.5404, + "step": 70210 + }, + { + "epoch": 2.97, + "learning_rate": 0.0005043821153723844, + "loss": 0.5795, + "step": 70220 + }, + { + "epoch": 2.97, + "learning_rate": 0.0005043397146430919, + "loss": 0.6498, + "step": 70230 + }, + { + "epoch": 2.97, + "learning_rate": 0.0005042973139137994, + "loss": 0.5169, + "step": 70240 + }, + { + "epoch": 2.97, + "learning_rate": 0.0005042549131845067, + "loss": 0.6216, + "step": 70250 + }, + { + "epoch": 2.97, + "learning_rate": 0.0005042125124552143, + "loss": 0.5882, + "step": 70260 + }, + { + "epoch": 2.97, + "learning_rate": 0.0005041701117259218, + "loss": 0.5458, + "step": 70270 + }, + { + "epoch": 2.97, + "learning_rate": 0.0005041277109966291, + "loss": 0.5344, + "step": 70280 + }, + { + "epoch": 2.97, + "learning_rate": 0.0005040853102673366, + "loss": 0.5116, + "step": 70290 + }, + { + "epoch": 2.97, + "learning_rate": 0.0005040429095380441, + "loss": 0.6302, + "step": 70300 + }, + { + "epoch": 2.97, + "learning_rate": 0.0005040005088087515, + "loss": 0.5824, + "step": 70310 + }, + { + "epoch": 2.97, + "learning_rate": 0.000503958108079459, + "loss": 0.6912, + "step": 70320 + }, + { + "epoch": 2.97, + "learning_rate": 0.0005039157073501665, + "loss": 0.4882, + "step": 70330 + }, + { + "epoch": 2.97, + "learning_rate": 0.0005038733066208739, + "loss": 0.4924, + "step": 70340 + }, + { + "epoch": 2.98, + "learning_rate": 0.0005038309058915814, + "loss": 0.4844, + "step": 70350 + }, + { + "epoch": 2.98, + "learning_rate": 0.0005037885051622889, + "loss": 0.5262, + "step": 70360 + }, + { + "epoch": 2.98, + "learning_rate": 0.0005037461044329962, + "loss": 0.6292, + "step": 70370 + }, + { + "epoch": 2.98, + "learning_rate": 0.0005037037037037037, + "loss": 0.5322, + "step": 70380 + }, + { + "epoch": 2.98, + "learning_rate": 0.0005036613029744112, + "loss": 0.5653, + "step": 70390 + }, + { + "epoch": 2.98, + "learning_rate": 0.0005036189022451186, + "loss": 0.6209, + "step": 70400 + }, + { + "epoch": 2.98, + "learning_rate": 0.0005035765015158261, + "loss": 0.5085, + "step": 70410 + }, + { + "epoch": 2.98, + "learning_rate": 0.0005035341007865336, + "loss": 0.6068, + "step": 70420 + }, + { + "epoch": 2.98, + "learning_rate": 0.000503491700057241, + "loss": 0.5698, + "step": 70430 + }, + { + "epoch": 2.98, + "learning_rate": 0.0005034492993279485, + "loss": 0.502, + "step": 70440 + }, + { + "epoch": 2.98, + "learning_rate": 0.0005034068985986559, + "loss": 0.5874, + "step": 70450 + }, + { + "epoch": 2.98, + "learning_rate": 0.0005033644978693633, + "loss": 0.5782, + "step": 70460 + }, + { + "epoch": 2.98, + "learning_rate": 0.0005033220971400709, + "loss": 0.5394, + "step": 70470 + }, + { + "epoch": 2.98, + "learning_rate": 0.0005032796964107783, + "loss": 0.5976, + "step": 70480 + }, + { + "epoch": 2.98, + "learning_rate": 0.0005032372956814857, + "loss": 0.5624, + "step": 70490 + }, + { + "epoch": 2.98, + "learning_rate": 0.0005031948949521932, + "loss": 0.4806, + "step": 70500 + }, + { + "epoch": 2.98, + "learning_rate": 0.0005031524942229007, + "loss": 0.5768, + "step": 70510 + }, + { + "epoch": 2.98, + "learning_rate": 0.0005031100934936081, + "loss": 0.6531, + "step": 70520 + }, + { + "epoch": 2.98, + "learning_rate": 0.0005030676927643156, + "loss": 0.5966, + "step": 70530 + }, + { + "epoch": 2.98, + "learning_rate": 0.000503025292035023, + "loss": 0.5612, + "step": 70540 + }, + { + "epoch": 2.98, + "learning_rate": 0.0005029828913057305, + "loss": 0.5579, + "step": 70550 + }, + { + "epoch": 2.98, + "learning_rate": 0.000502940490576438, + "loss": 0.5137, + "step": 70560 + }, + { + "epoch": 2.98, + "learning_rate": 0.0005028980898471453, + "loss": 0.633, + "step": 70570 + }, + { + "epoch": 2.98, + "learning_rate": 0.0005028556891178528, + "loss": 0.6308, + "step": 70580 + }, + { + "epoch": 2.99, + "learning_rate": 0.0005028132883885604, + "loss": 0.6313, + "step": 70590 + }, + { + "epoch": 2.99, + "learning_rate": 0.0005027708876592677, + "loss": 0.5713, + "step": 70600 + }, + { + "epoch": 2.99, + "learning_rate": 0.0005027284869299752, + "loss": 0.6351, + "step": 70610 + }, + { + "epoch": 2.99, + "learning_rate": 0.0005026860862006827, + "loss": 0.5522, + "step": 70620 + }, + { + "epoch": 2.99, + "learning_rate": 0.0005026436854713901, + "loss": 0.4856, + "step": 70630 + }, + { + "epoch": 2.99, + "learning_rate": 0.0005026012847420976, + "loss": 0.5759, + "step": 70640 + }, + { + "epoch": 2.99, + "learning_rate": 0.0005025588840128051, + "loss": 0.5238, + "step": 70650 + }, + { + "epoch": 2.99, + "learning_rate": 0.0005025164832835124, + "loss": 0.5602, + "step": 70660 + }, + { + "epoch": 2.99, + "learning_rate": 0.00050247408255422, + "loss": 0.5609, + "step": 70670 + }, + { + "epoch": 2.99, + "learning_rate": 0.0005024316818249274, + "loss": 0.591, + "step": 70680 + }, + { + "epoch": 2.99, + "learning_rate": 0.0005023892810956348, + "loss": 0.5414, + "step": 70690 + }, + { + "epoch": 2.99, + "learning_rate": 0.0005023468803663423, + "loss": 0.6054, + "step": 70700 + }, + { + "epoch": 2.99, + "learning_rate": 0.0005023044796370498, + "loss": 0.6063, + "step": 70710 + }, + { + "epoch": 2.99, + "learning_rate": 0.0005022620789077572, + "loss": 0.5206, + "step": 70720 + }, + { + "epoch": 2.99, + "learning_rate": 0.0005022196781784647, + "loss": 0.6796, + "step": 70730 + }, + { + "epoch": 2.99, + "learning_rate": 0.0005021772774491721, + "loss": 0.5158, + "step": 70740 + }, + { + "epoch": 2.99, + "learning_rate": 0.0005021348767198796, + "loss": 0.5476, + "step": 70750 + }, + { + "epoch": 2.99, + "learning_rate": 0.0005020924759905871, + "loss": 0.5877, + "step": 70760 + }, + { + "epoch": 2.99, + "learning_rate": 0.0005020500752612945, + "loss": 0.4583, + "step": 70770 + }, + { + "epoch": 2.99, + "learning_rate": 0.0005020076745320019, + "loss": 0.5495, + "step": 70780 + }, + { + "epoch": 2.99, + "learning_rate": 0.0005019652738027095, + "loss": 0.5862, + "step": 70790 + }, + { + "epoch": 2.99, + "learning_rate": 0.0005019228730734169, + "loss": 0.4068, + "step": 70800 + }, + { + "epoch": 2.99, + "learning_rate": 0.0005018804723441243, + "loss": 0.5669, + "step": 70810 + }, + { + "epoch": 2.99, + "learning_rate": 0.0005018380716148318, + "loss": 0.6162, + "step": 70820 + }, + { + "epoch": 3.0, + "learning_rate": 0.0005017956708855393, + "loss": 0.5455, + "step": 70830 + }, + { + "epoch": 3.0, + "learning_rate": 0.0005017532701562467, + "loss": 0.5478, + "step": 70840 + }, + { + "epoch": 3.0, + "learning_rate": 0.0005017108694269542, + "loss": 0.448, + "step": 70850 + }, + { + "epoch": 3.0, + "learning_rate": 0.0005016684686976616, + "loss": 0.5944, + "step": 70860 + }, + { + "epoch": 3.0, + "learning_rate": 0.0005016260679683691, + "loss": 0.5631, + "step": 70870 + }, + { + "epoch": 3.0, + "learning_rate": 0.0005015836672390766, + "loss": 0.5651, + "step": 70880 + }, + { + "epoch": 3.0, + "learning_rate": 0.000501541266509784, + "loss": 0.4939, + "step": 70890 + }, + { + "epoch": 3.0, + "learning_rate": 0.0005014988657804914, + "loss": 0.6093, + "step": 70900 + }, + { + "epoch": 3.0, + "learning_rate": 0.0005014564650511989, + "loss": 0.4765, + "step": 70910 + }, + { + "epoch": 3.0, + "learning_rate": 0.0005014140643219064, + "loss": 0.596, + "step": 70920 + }, + { + "epoch": 3.0, + "learning_rate": 0.0005013716635926138, + "loss": 0.5989, + "step": 70930 + }, + { + "epoch": 3.0, + "learning_rate": 0.0005013292628633213, + "loss": 0.5595, + "step": 70940 + }, + { + "epoch": 3.0, + "learning_rate": 0.0005012868621340288, + "loss": 0.6006, + "step": 70950 + }, + { + "epoch": 3.0, + "learning_rate": 0.0005012444614047362, + "loss": 0.5156, + "step": 70960 + }, + { + "epoch": 3.0, + "learning_rate": 0.0005012020606754437, + "loss": 0.4382, + "step": 70970 + }, + { + "epoch": 3.0, + "learning_rate": 0.000501159659946151, + "loss": 0.5785, + "step": 70980 + }, + { + "epoch": 3.0, + "learning_rate": 0.0005011172592168586, + "loss": 0.5198, + "step": 70990 + }, + { + "epoch": 3.0, + "learning_rate": 0.000501074858487566, + "loss": 0.4833, + "step": 71000 + }, + { + "epoch": 3.0, + "learning_rate": 0.0005010324577582734, + "loss": 0.5431, + "step": 71010 + }, + { + "epoch": 3.0, + "learning_rate": 0.0005009900570289809, + "loss": 0.4276, + "step": 71020 + }, + { + "epoch": 3.0, + "learning_rate": 0.0005009476562996884, + "loss": 0.519, + "step": 71030 + }, + { + "epoch": 3.0, + "learning_rate": 0.0005009052555703958, + "loss": 0.4612, + "step": 71040 + }, + { + "epoch": 3.0, + "learning_rate": 0.0005008628548411033, + "loss": 0.4552, + "step": 71050 + }, + { + "epoch": 3.01, + "learning_rate": 0.0005008204541118107, + "loss": 0.5332, + "step": 71060 + }, + { + "epoch": 3.01, + "learning_rate": 0.0005007780533825182, + "loss": 0.4765, + "step": 71070 + }, + { + "epoch": 3.01, + "learning_rate": 0.0005007356526532257, + "loss": 0.5225, + "step": 71080 + }, + { + "epoch": 3.01, + "learning_rate": 0.0005006932519239331, + "loss": 0.5441, + "step": 71090 + }, + { + "epoch": 3.01, + "learning_rate": 0.0005006508511946405, + "loss": 0.4793, + "step": 71100 + }, + { + "epoch": 3.01, + "learning_rate": 0.000500608450465348, + "loss": 0.4695, + "step": 71110 + }, + { + "epoch": 3.01, + "learning_rate": 0.0005005660497360555, + "loss": 0.5591, + "step": 71120 + }, + { + "epoch": 3.01, + "learning_rate": 0.0005005236490067629, + "loss": 0.4518, + "step": 71130 + }, + { + "epoch": 3.01, + "learning_rate": 0.0005004812482774704, + "loss": 0.4745, + "step": 71140 + }, + { + "epoch": 3.01, + "learning_rate": 0.0005004388475481779, + "loss": 0.5148, + "step": 71150 + }, + { + "epoch": 3.01, + "learning_rate": 0.0005003964468188853, + "loss": 0.4996, + "step": 71160 + }, + { + "epoch": 3.01, + "learning_rate": 0.0005003540460895928, + "loss": 0.4106, + "step": 71170 + }, + { + "epoch": 3.01, + "learning_rate": 0.0005003116453603002, + "loss": 0.5095, + "step": 71180 + }, + { + "epoch": 3.01, + "learning_rate": 0.0005002692446310076, + "loss": 0.4849, + "step": 71190 + }, + { + "epoch": 3.01, + "learning_rate": 0.0005002268439017152, + "loss": 0.4828, + "step": 71200 + }, + { + "epoch": 3.01, + "learning_rate": 0.0005001844431724226, + "loss": 0.518, + "step": 71210 + }, + { + "epoch": 3.01, + "learning_rate": 0.00050014204244313, + "loss": 0.4613, + "step": 71220 + }, + { + "epoch": 3.01, + "learning_rate": 0.0005000996417138375, + "loss": 0.5248, + "step": 71230 + }, + { + "epoch": 3.01, + "learning_rate": 0.000500057240984545, + "loss": 0.4476, + "step": 71240 + }, + { + "epoch": 3.01, + "learning_rate": 0.0005000148402552524, + "loss": 0.4928, + "step": 71250 + }, + { + "epoch": 3.01, + "learning_rate": 0.0004999724395259599, + "loss": 0.5271, + "step": 71260 + }, + { + "epoch": 3.01, + "learning_rate": 0.0004999300387966673, + "loss": 0.5018, + "step": 71270 + }, + { + "epoch": 3.01, + "learning_rate": 0.0004998876380673748, + "loss": 0.4535, + "step": 71280 + }, + { + "epoch": 3.01, + "learning_rate": 0.0004998452373380822, + "loss": 0.4703, + "step": 71290 + }, + { + "epoch": 3.02, + "learning_rate": 0.0004998028366087897, + "loss": 0.5359, + "step": 71300 + }, + { + "epoch": 3.02, + "learning_rate": 0.0004997604358794971, + "loss": 0.5612, + "step": 71310 + }, + { + "epoch": 3.02, + "learning_rate": 0.0004997180351502046, + "loss": 0.5504, + "step": 71320 + }, + { + "epoch": 3.02, + "learning_rate": 0.0004996756344209121, + "loss": 0.533, + "step": 71330 + }, + { + "epoch": 3.02, + "learning_rate": 0.0004996332336916195, + "loss": 0.4805, + "step": 71340 + }, + { + "epoch": 3.02, + "learning_rate": 0.0004995908329623269, + "loss": 0.4741, + "step": 71350 + }, + { + "epoch": 3.02, + "learning_rate": 0.0004995484322330345, + "loss": 0.4634, + "step": 71360 + }, + { + "epoch": 3.02, + "learning_rate": 0.0004995060315037419, + "loss": 0.4991, + "step": 71370 + }, + { + "epoch": 3.02, + "learning_rate": 0.0004994636307744493, + "loss": 0.5552, + "step": 71380 + }, + { + "epoch": 3.02, + "learning_rate": 0.0004994212300451568, + "loss": 0.5193, + "step": 71390 + }, + { + "epoch": 3.02, + "learning_rate": 0.0004993788293158643, + "loss": 0.5232, + "step": 71400 + }, + { + "epoch": 3.02, + "learning_rate": 0.0004993364285865717, + "loss": 0.5304, + "step": 71410 + }, + { + "epoch": 3.02, + "learning_rate": 0.0004992940278572792, + "loss": 0.6105, + "step": 71420 + }, + { + "epoch": 3.02, + "learning_rate": 0.0004992516271279866, + "loss": 0.5924, + "step": 71430 + }, + { + "epoch": 3.02, + "learning_rate": 0.0004992092263986941, + "loss": 0.5286, + "step": 71440 + }, + { + "epoch": 3.02, + "learning_rate": 0.0004991668256694016, + "loss": 0.5514, + "step": 71450 + }, + { + "epoch": 3.02, + "learning_rate": 0.000499124424940109, + "loss": 0.4388, + "step": 71460 + }, + { + "epoch": 3.02, + "learning_rate": 0.0004990820242108164, + "loss": 0.4027, + "step": 71470 + }, + { + "epoch": 3.02, + "learning_rate": 0.000499039623481524, + "loss": 0.5186, + "step": 71480 + }, + { + "epoch": 3.02, + "learning_rate": 0.0004989972227522314, + "loss": 0.487, + "step": 71490 + }, + { + "epoch": 3.02, + "learning_rate": 0.0004989548220229388, + "loss": 0.4596, + "step": 71500 + }, + { + "epoch": 3.02, + "learning_rate": 0.0004989124212936462, + "loss": 0.4839, + "step": 71510 + }, + { + "epoch": 3.02, + "learning_rate": 0.0004988700205643538, + "loss": 0.558, + "step": 71520 + }, + { + "epoch": 3.02, + "learning_rate": 0.0004988276198350612, + "loss": 0.5125, + "step": 71530 + }, + { + "epoch": 3.03, + "learning_rate": 0.0004987852191057686, + "loss": 0.515, + "step": 71540 + }, + { + "epoch": 3.03, + "learning_rate": 0.0004987428183764761, + "loss": 0.5388, + "step": 71550 + }, + { + "epoch": 3.03, + "learning_rate": 0.0004987004176471836, + "loss": 0.5043, + "step": 71560 + }, + { + "epoch": 3.03, + "learning_rate": 0.000498658016917891, + "loss": 0.5008, + "step": 71570 + }, + { + "epoch": 3.03, + "learning_rate": 0.0004986156161885985, + "loss": 0.507, + "step": 71580 + }, + { + "epoch": 3.03, + "learning_rate": 0.0004985732154593059, + "loss": 0.5735, + "step": 71590 + }, + { + "epoch": 3.03, + "learning_rate": 0.0004985308147300134, + "loss": 0.4633, + "step": 71600 + }, + { + "epoch": 3.03, + "learning_rate": 0.0004984884140007208, + "loss": 0.5328, + "step": 71610 + }, + { + "epoch": 3.03, + "learning_rate": 0.0004984460132714283, + "loss": 0.5205, + "step": 71620 + }, + { + "epoch": 3.03, + "learning_rate": 0.0004984036125421357, + "loss": 0.5621, + "step": 71630 + }, + { + "epoch": 3.03, + "learning_rate": 0.0004983612118128432, + "loss": 0.5216, + "step": 71640 + }, + { + "epoch": 3.03, + "learning_rate": 0.0004983188110835507, + "loss": 0.5304, + "step": 71650 + }, + { + "epoch": 3.03, + "learning_rate": 0.0004982764103542581, + "loss": 0.4998, + "step": 71660 + }, + { + "epoch": 3.03, + "learning_rate": 0.0004982340096249655, + "loss": 0.5101, + "step": 71670 + }, + { + "epoch": 3.03, + "learning_rate": 0.0004981916088956731, + "loss": 0.5165, + "step": 71680 + }, + { + "epoch": 3.03, + "learning_rate": 0.0004981492081663805, + "loss": 0.592, + "step": 71690 + }, + { + "epoch": 3.03, + "learning_rate": 0.0004981068074370879, + "loss": 0.5027, + "step": 71700 + }, + { + "epoch": 3.03, + "learning_rate": 0.0004980644067077954, + "loss": 0.4887, + "step": 71710 + }, + { + "epoch": 3.03, + "learning_rate": 0.0004980220059785028, + "loss": 0.5141, + "step": 71720 + }, + { + "epoch": 3.03, + "learning_rate": 0.0004979796052492103, + "loss": 0.4989, + "step": 71730 + }, + { + "epoch": 3.03, + "learning_rate": 0.0004979372045199178, + "loss": 0.5166, + "step": 71740 + }, + { + "epoch": 3.03, + "learning_rate": 0.0004978948037906252, + "loss": 0.4397, + "step": 71750 + }, + { + "epoch": 3.03, + "learning_rate": 0.0004978524030613327, + "loss": 0.5845, + "step": 71760 + }, + { + "epoch": 3.04, + "learning_rate": 0.0004978100023320402, + "loss": 0.5411, + "step": 71770 + }, + { + "epoch": 3.04, + "learning_rate": 0.0004977676016027476, + "loss": 0.4676, + "step": 71780 + }, + { + "epoch": 3.04, + "learning_rate": 0.000497725200873455, + "loss": 0.4935, + "step": 71790 + }, + { + "epoch": 3.04, + "learning_rate": 0.0004976828001441625, + "loss": 0.5317, + "step": 71800 + }, + { + "epoch": 3.04, + "learning_rate": 0.00049764039941487, + "loss": 0.5395, + "step": 71810 + }, + { + "epoch": 3.04, + "learning_rate": 0.0004975979986855774, + "loss": 0.4697, + "step": 71820 + }, + { + "epoch": 3.04, + "learning_rate": 0.0004975555979562849, + "loss": 0.4977, + "step": 71830 + }, + { + "epoch": 3.04, + "learning_rate": 0.0004975131972269923, + "loss": 0.588, + "step": 71840 + }, + { + "epoch": 3.04, + "learning_rate": 0.0004974707964976998, + "loss": 0.6394, + "step": 71850 + }, + { + "epoch": 3.04, + "learning_rate": 0.0004974283957684073, + "loss": 0.5246, + "step": 71860 + }, + { + "epoch": 3.04, + "learning_rate": 0.0004973859950391147, + "loss": 0.5227, + "step": 71870 + }, + { + "epoch": 3.04, + "learning_rate": 0.0004973435943098221, + "loss": 0.5361, + "step": 71880 + }, + { + "epoch": 3.04, + "learning_rate": 0.0004973011935805297, + "loss": 0.4055, + "step": 71890 + }, + { + "epoch": 3.04, + "learning_rate": 0.000497258792851237, + "loss": 0.5008, + "step": 71900 + }, + { + "epoch": 3.04, + "learning_rate": 0.0004972163921219445, + "loss": 0.535, + "step": 71910 + }, + { + "epoch": 3.04, + "learning_rate": 0.000497173991392652, + "loss": 0.5468, + "step": 71920 + }, + { + "epoch": 3.04, + "learning_rate": 0.0004971315906633594, + "loss": 0.5504, + "step": 71930 + }, + { + "epoch": 3.04, + "learning_rate": 0.0004970891899340669, + "loss": 0.5302, + "step": 71940 + }, + { + "epoch": 3.04, + "learning_rate": 0.0004970467892047744, + "loss": 0.5478, + "step": 71950 + }, + { + "epoch": 3.04, + "learning_rate": 0.0004970043884754817, + "loss": 0.514, + "step": 71960 + }, + { + "epoch": 3.04, + "learning_rate": 0.0004969619877461893, + "loss": 0.4169, + "step": 71970 + }, + { + "epoch": 3.04, + "learning_rate": 0.0004969195870168968, + "loss": 0.5202, + "step": 71980 + }, + { + "epoch": 3.04, + "learning_rate": 0.0004968771862876041, + "loss": 0.5228, + "step": 71990 + }, + { + "epoch": 3.04, + "learning_rate": 0.0004968347855583116, + "loss": 0.5436, + "step": 72000 + }, + { + "epoch": 3.05, + "learning_rate": 0.0004967923848290192, + "loss": 0.54, + "step": 72010 + }, + { + "epoch": 3.05, + "learning_rate": 0.0004967499840997265, + "loss": 0.461, + "step": 72020 + }, + { + "epoch": 3.05, + "learning_rate": 0.000496707583370434, + "loss": 0.4488, + "step": 72030 + }, + { + "epoch": 3.05, + "learning_rate": 0.0004966651826411414, + "loss": 0.5339, + "step": 72040 + }, + { + "epoch": 3.05, + "learning_rate": 0.0004966227819118489, + "loss": 0.4892, + "step": 72050 + }, + { + "epoch": 3.05, + "learning_rate": 0.0004965803811825564, + "loss": 0.5274, + "step": 72060 + }, + { + "epoch": 3.05, + "learning_rate": 0.0004965379804532638, + "loss": 0.5486, + "step": 72070 + }, + { + "epoch": 3.05, + "learning_rate": 0.0004964955797239712, + "loss": 0.5255, + "step": 72080 + }, + { + "epoch": 3.05, + "learning_rate": 0.0004964531789946788, + "loss": 0.6218, + "step": 72090 + }, + { + "epoch": 3.05, + "learning_rate": 0.0004964107782653862, + "loss": 0.5992, + "step": 72100 + }, + { + "epoch": 3.05, + "learning_rate": 0.0004963683775360936, + "loss": 0.5378, + "step": 72110 + }, + { + "epoch": 3.05, + "learning_rate": 0.0004963259768068011, + "loss": 0.611, + "step": 72120 + }, + { + "epoch": 3.05, + "learning_rate": 0.0004962835760775086, + "loss": 0.4515, + "step": 72130 + }, + { + "epoch": 3.05, + "learning_rate": 0.000496241175348216, + "loss": 0.5273, + "step": 72140 + }, + { + "epoch": 3.05, + "learning_rate": 0.0004961987746189235, + "loss": 0.4744, + "step": 72150 + }, + { + "epoch": 3.05, + "learning_rate": 0.0004961563738896309, + "loss": 0.5466, + "step": 72160 + }, + { + "epoch": 3.05, + "learning_rate": 0.0004961139731603384, + "loss": 0.5004, + "step": 72170 + }, + { + "epoch": 3.05, + "learning_rate": 0.0004960715724310459, + "loss": 0.5751, + "step": 72180 + }, + { + "epoch": 3.05, + "learning_rate": 0.0004960291717017533, + "loss": 0.4606, + "step": 72190 + }, + { + "epoch": 3.05, + "learning_rate": 0.0004959867709724607, + "loss": 0.4958, + "step": 72200 + }, + { + "epoch": 3.05, + "learning_rate": 0.0004959443702431683, + "loss": 0.4351, + "step": 72210 + }, + { + "epoch": 3.05, + "learning_rate": 0.0004959019695138756, + "loss": 0.5332, + "step": 72220 + }, + { + "epoch": 3.05, + "learning_rate": 0.0004958595687845831, + "loss": 0.5246, + "step": 72230 + }, + { + "epoch": 3.05, + "learning_rate": 0.0004958171680552906, + "loss": 0.5564, + "step": 72240 + }, + { + "epoch": 3.06, + "learning_rate": 0.000495774767325998, + "loss": 0.4552, + "step": 72250 + }, + { + "epoch": 3.06, + "learning_rate": 0.0004957323665967055, + "loss": 0.5377, + "step": 72260 + }, + { + "epoch": 3.06, + "learning_rate": 0.000495689965867413, + "loss": 0.4634, + "step": 72270 + }, + { + "epoch": 3.06, + "learning_rate": 0.0004956475651381203, + "loss": 0.5248, + "step": 72280 + }, + { + "epoch": 3.06, + "learning_rate": 0.0004956051644088279, + "loss": 0.5232, + "step": 72290 + }, + { + "epoch": 3.06, + "learning_rate": 0.0004955627636795354, + "loss": 0.5317, + "step": 72300 + }, + { + "epoch": 3.06, + "learning_rate": 0.0004955203629502427, + "loss": 0.5773, + "step": 72310 + }, + { + "epoch": 3.06, + "learning_rate": 0.0004954779622209502, + "loss": 0.494, + "step": 72320 + }, + { + "epoch": 3.06, + "learning_rate": 0.0004954355614916577, + "loss": 0.5459, + "step": 72330 + }, + { + "epoch": 3.06, + "learning_rate": 0.0004953931607623651, + "loss": 0.5463, + "step": 72340 + }, + { + "epoch": 3.06, + "learning_rate": 0.0004953507600330726, + "loss": 0.4873, + "step": 72350 + }, + { + "epoch": 3.06, + "learning_rate": 0.0004953083593037801, + "loss": 0.4884, + "step": 72360 + }, + { + "epoch": 3.06, + "learning_rate": 0.0004952659585744875, + "loss": 0.536, + "step": 72370 + }, + { + "epoch": 3.06, + "learning_rate": 0.000495223557845195, + "loss": 0.5463, + "step": 72380 + }, + { + "epoch": 3.06, + "learning_rate": 0.0004951811571159025, + "loss": 0.4813, + "step": 72390 + }, + { + "epoch": 3.06, + "learning_rate": 0.0004951387563866098, + "loss": 0.6031, + "step": 72400 + }, + { + "epoch": 3.06, + "learning_rate": 0.0004950963556573173, + "loss": 0.4803, + "step": 72410 + }, + { + "epoch": 3.06, + "learning_rate": 0.0004950539549280249, + "loss": 0.506, + "step": 72420 + }, + { + "epoch": 3.06, + "learning_rate": 0.0004950115541987322, + "loss": 0.4757, + "step": 72430 + }, + { + "epoch": 3.06, + "learning_rate": 0.0004949691534694397, + "loss": 0.4803, + "step": 72440 + }, + { + "epoch": 3.06, + "learning_rate": 0.0004949267527401471, + "loss": 0.5146, + "step": 72450 + }, + { + "epoch": 3.06, + "learning_rate": 0.0004948843520108546, + "loss": 0.5381, + "step": 72460 + }, + { + "epoch": 3.06, + "learning_rate": 0.0004948419512815621, + "loss": 0.5242, + "step": 72470 + }, + { + "epoch": 3.07, + "learning_rate": 0.0004947995505522695, + "loss": 0.5278, + "step": 72480 + }, + { + "epoch": 3.07, + "learning_rate": 0.000494757149822977, + "loss": 0.5766, + "step": 72490 + }, + { + "epoch": 3.07, + "learning_rate": 0.0004947147490936845, + "loss": 0.5403, + "step": 72500 + }, + { + "epoch": 3.07, + "learning_rate": 0.000494672348364392, + "loss": 0.4668, + "step": 72510 + }, + { + "epoch": 3.07, + "learning_rate": 0.0004946299476350993, + "loss": 0.541, + "step": 72520 + }, + { + "epoch": 3.07, + "learning_rate": 0.0004945875469058068, + "loss": 0.5967, + "step": 72530 + }, + { + "epoch": 3.07, + "learning_rate": 0.0004945451461765142, + "loss": 0.5478, + "step": 72540 + }, + { + "epoch": 3.07, + "learning_rate": 0.0004945027454472217, + "loss": 0.5164, + "step": 72550 + }, + { + "epoch": 3.07, + "learning_rate": 0.0004944603447179292, + "loss": 0.5291, + "step": 72560 + }, + { + "epoch": 3.07, + "learning_rate": 0.0004944179439886366, + "loss": 0.5039, + "step": 72570 + }, + { + "epoch": 3.07, + "learning_rate": 0.0004943755432593441, + "loss": 0.4855, + "step": 72580 + }, + { + "epoch": 3.07, + "learning_rate": 0.0004943331425300516, + "loss": 0.4835, + "step": 72590 + }, + { + "epoch": 3.07, + "learning_rate": 0.0004942907418007589, + "loss": 0.6097, + "step": 72600 + }, + { + "epoch": 3.07, + "learning_rate": 0.0004942483410714664, + "loss": 0.5133, + "step": 72610 + }, + { + "epoch": 3.07, + "learning_rate": 0.000494205940342174, + "loss": 0.5457, + "step": 72620 + }, + { + "epoch": 3.07, + "learning_rate": 0.0004941635396128813, + "loss": 0.5608, + "step": 72630 + }, + { + "epoch": 3.07, + "learning_rate": 0.0004941211388835888, + "loss": 0.5323, + "step": 72640 + }, + { + "epoch": 3.07, + "learning_rate": 0.0004940787381542963, + "loss": 0.5183, + "step": 72650 + }, + { + "epoch": 3.07, + "learning_rate": 0.0004940363374250037, + "loss": 0.5324, + "step": 72660 + }, + { + "epoch": 3.07, + "learning_rate": 0.0004939939366957112, + "loss": 0.5807, + "step": 72670 + }, + { + "epoch": 3.07, + "learning_rate": 0.0004939515359664187, + "loss": 0.4917, + "step": 72680 + }, + { + "epoch": 3.07, + "learning_rate": 0.000493909135237126, + "loss": 0.5183, + "step": 72690 + }, + { + "epoch": 3.07, + "learning_rate": 0.0004938667345078336, + "loss": 0.5434, + "step": 72700 + }, + { + "epoch": 3.07, + "learning_rate": 0.0004938243337785411, + "loss": 0.5461, + "step": 72710 + }, + { + "epoch": 3.08, + "learning_rate": 0.0004937819330492484, + "loss": 0.5218, + "step": 72720 + }, + { + "epoch": 3.08, + "learning_rate": 0.0004937395323199559, + "loss": 0.5265, + "step": 72730 + }, + { + "epoch": 3.08, + "learning_rate": 0.0004936971315906635, + "loss": 0.5529, + "step": 72740 + }, + { + "epoch": 3.08, + "learning_rate": 0.0004936547308613708, + "loss": 0.4382, + "step": 72750 + }, + { + "epoch": 3.08, + "learning_rate": 0.0004936123301320783, + "loss": 0.4791, + "step": 72760 + }, + { + "epoch": 3.08, + "learning_rate": 0.0004935699294027858, + "loss": 0.4422, + "step": 72770 + }, + { + "epoch": 3.08, + "learning_rate": 0.0004935275286734932, + "loss": 0.5278, + "step": 72780 + }, + { + "epoch": 3.08, + "learning_rate": 0.0004934851279442007, + "loss": 0.5135, + "step": 72790 + }, + { + "epoch": 3.08, + "learning_rate": 0.0004934427272149082, + "loss": 0.5586, + "step": 72800 + }, + { + "epoch": 3.08, + "learning_rate": 0.0004934003264856155, + "loss": 0.5076, + "step": 72810 + }, + { + "epoch": 3.08, + "learning_rate": 0.0004933579257563231, + "loss": 0.4554, + "step": 72820 + }, + { + "epoch": 3.08, + "learning_rate": 0.0004933155250270305, + "loss": 0.5352, + "step": 72830 + }, + { + "epoch": 3.08, + "learning_rate": 0.0004932731242977379, + "loss": 0.5573, + "step": 72840 + }, + { + "epoch": 3.08, + "learning_rate": 0.0004932307235684454, + "loss": 0.4671, + "step": 72850 + }, + { + "epoch": 3.08, + "learning_rate": 0.0004931883228391529, + "loss": 0.4761, + "step": 72860 + }, + { + "epoch": 3.08, + "learning_rate": 0.0004931459221098603, + "loss": 0.5503, + "step": 72870 + }, + { + "epoch": 3.08, + "learning_rate": 0.0004931035213805678, + "loss": 0.4144, + "step": 72880 + }, + { + "epoch": 3.08, + "learning_rate": 0.0004930611206512751, + "loss": 0.4416, + "step": 72890 + }, + { + "epoch": 3.08, + "learning_rate": 0.0004930187199219827, + "loss": 0.494, + "step": 72900 + }, + { + "epoch": 3.08, + "learning_rate": 0.0004929763191926902, + "loss": 0.4925, + "step": 72910 + }, + { + "epoch": 3.08, + "learning_rate": 0.0004929339184633975, + "loss": 0.4544, + "step": 72920 + }, + { + "epoch": 3.08, + "learning_rate": 0.000492891517734105, + "loss": 0.5669, + "step": 72930 + }, + { + "epoch": 3.08, + "learning_rate": 0.0004928491170048125, + "loss": 0.5084, + "step": 72940 + }, + { + "epoch": 3.08, + "learning_rate": 0.00049280671627552, + "loss": 0.5562, + "step": 72950 + }, + { + "epoch": 3.09, + "learning_rate": 0.0004927643155462274, + "loss": 0.5731, + "step": 72960 + }, + { + "epoch": 3.09, + "learning_rate": 0.0004927219148169349, + "loss": 0.5005, + "step": 72970 + }, + { + "epoch": 3.09, + "learning_rate": 0.0004926795140876423, + "loss": 0.563, + "step": 72980 + }, + { + "epoch": 3.09, + "learning_rate": 0.0004926371133583498, + "loss": 0.5228, + "step": 72990 + }, + { + "epoch": 3.09, + "learning_rate": 0.0004925947126290573, + "loss": 0.4966, + "step": 73000 + }, + { + "epoch": 3.09, + "learning_rate": 0.0004925523118997646, + "loss": 0.6024, + "step": 73010 + }, + { + "epoch": 3.09, + "learning_rate": 0.0004925099111704722, + "loss": 0.4786, + "step": 73020 + }, + { + "epoch": 3.09, + "learning_rate": 0.0004924675104411797, + "loss": 0.4879, + "step": 73030 + }, + { + "epoch": 3.09, + "learning_rate": 0.000492425109711887, + "loss": 0.4488, + "step": 73040 + }, + { + "epoch": 3.09, + "learning_rate": 0.0004923827089825945, + "loss": 0.4996, + "step": 73050 + }, + { + "epoch": 3.09, + "learning_rate": 0.000492340308253302, + "loss": 0.4363, + "step": 73060 + }, + { + "epoch": 3.09, + "learning_rate": 0.0004922979075240094, + "loss": 0.5704, + "step": 73070 + }, + { + "epoch": 3.09, + "learning_rate": 0.0004922555067947169, + "loss": 0.6253, + "step": 73080 + }, + { + "epoch": 3.09, + "learning_rate": 0.0004922131060654244, + "loss": 0.5385, + "step": 73090 + }, + { + "epoch": 3.09, + "learning_rate": 0.0004921707053361318, + "loss": 0.5292, + "step": 73100 + }, + { + "epoch": 3.09, + "learning_rate": 0.0004921283046068393, + "loss": 0.5467, + "step": 73110 + }, + { + "epoch": 3.09, + "learning_rate": 0.0004920859038775468, + "loss": 0.5959, + "step": 73120 + }, + { + "epoch": 3.09, + "learning_rate": 0.0004920435031482541, + "loss": 0.4598, + "step": 73130 + }, + { + "epoch": 3.09, + "learning_rate": 0.0004920011024189616, + "loss": 0.5015, + "step": 73140 + }, + { + "epoch": 3.09, + "learning_rate": 0.0004919587016896691, + "loss": 0.5917, + "step": 73150 + }, + { + "epoch": 3.09, + "learning_rate": 0.0004919163009603765, + "loss": 0.5123, + "step": 73160 + }, + { + "epoch": 3.09, + "learning_rate": 0.000491873900231084, + "loss": 0.5387, + "step": 73170 + }, + { + "epoch": 3.09, + "learning_rate": 0.0004918314995017915, + "loss": 0.5092, + "step": 73180 + }, + { + "epoch": 3.1, + "learning_rate": 0.0004917890987724989, + "loss": 0.5036, + "step": 73190 + }, + { + "epoch": 3.1, + "learning_rate": 0.0004917466980432064, + "loss": 0.4785, + "step": 73200 + }, + { + "epoch": 3.1, + "learning_rate": 0.0004917042973139138, + "loss": 0.4546, + "step": 73210 + }, + { + "epoch": 3.1, + "learning_rate": 0.0004916618965846212, + "loss": 0.5189, + "step": 73220 + }, + { + "epoch": 3.1, + "learning_rate": 0.0004916194958553288, + "loss": 0.4696, + "step": 73230 + }, + { + "epoch": 3.1, + "learning_rate": 0.0004915770951260362, + "loss": 0.4645, + "step": 73240 + }, + { + "epoch": 3.1, + "learning_rate": 0.0004915346943967436, + "loss": 0.501, + "step": 73250 + }, + { + "epoch": 3.1, + "learning_rate": 0.0004914922936674511, + "loss": 0.4955, + "step": 73260 + }, + { + "epoch": 3.1, + "learning_rate": 0.0004914498929381586, + "loss": 0.4628, + "step": 73270 + }, + { + "epoch": 3.1, + "learning_rate": 0.000491407492208866, + "loss": 0.4787, + "step": 73280 + }, + { + "epoch": 3.1, + "learning_rate": 0.0004913650914795735, + "loss": 0.4952, + "step": 73290 + }, + { + "epoch": 3.1, + "learning_rate": 0.0004913226907502808, + "loss": 0.4324, + "step": 73300 + }, + { + "epoch": 3.1, + "learning_rate": 0.0004912802900209884, + "loss": 0.5118, + "step": 73310 + }, + { + "epoch": 3.1, + "learning_rate": 0.0004912378892916959, + "loss": 0.5121, + "step": 73320 + }, + { + "epoch": 3.1, + "learning_rate": 0.0004911954885624033, + "loss": 0.5002, + "step": 73330 + }, + { + "epoch": 3.1, + "learning_rate": 0.0004911530878331107, + "loss": 0.4463, + "step": 73340 + }, + { + "epoch": 3.1, + "learning_rate": 0.0004911106871038183, + "loss": 0.4805, + "step": 73350 + }, + { + "epoch": 3.1, + "learning_rate": 0.0004910682863745257, + "loss": 0.5899, + "step": 73360 + }, + { + "epoch": 3.1, + "learning_rate": 0.0004910258856452331, + "loss": 0.5429, + "step": 73370 + }, + { + "epoch": 3.1, + "learning_rate": 0.0004909834849159406, + "loss": 0.4607, + "step": 73380 + }, + { + "epoch": 3.1, + "learning_rate": 0.000490941084186648, + "loss": 0.5488, + "step": 73390 + }, + { + "epoch": 3.1, + "learning_rate": 0.0004908986834573555, + "loss": 0.4573, + "step": 73400 + }, + { + "epoch": 3.1, + "learning_rate": 0.000490856282728063, + "loss": 0.4594, + "step": 73410 + }, + { + "epoch": 3.1, + "learning_rate": 0.0004908138819987703, + "loss": 0.5379, + "step": 73420 + }, + { + "epoch": 3.11, + "learning_rate": 0.0004907714812694779, + "loss": 0.5829, + "step": 73430 + }, + { + "epoch": 3.11, + "learning_rate": 0.0004907290805401853, + "loss": 0.4431, + "step": 73440 + }, + { + "epoch": 3.11, + "learning_rate": 0.0004906866798108927, + "loss": 0.4935, + "step": 73450 + }, + { + "epoch": 3.11, + "learning_rate": 0.0004906442790816002, + "loss": 0.5333, + "step": 73460 + }, + { + "epoch": 3.11, + "learning_rate": 0.0004906018783523077, + "loss": 0.4704, + "step": 73470 + }, + { + "epoch": 3.11, + "learning_rate": 0.0004905594776230151, + "loss": 0.5163, + "step": 73480 + }, + { + "epoch": 3.11, + "learning_rate": 0.0004905170768937226, + "loss": 0.4549, + "step": 73490 + }, + { + "epoch": 3.11, + "learning_rate": 0.00049047467616443, + "loss": 0.4823, + "step": 73500 + }, + { + "epoch": 3.11, + "learning_rate": 0.0004904322754351375, + "loss": 0.5041, + "step": 73510 + }, + { + "epoch": 3.11, + "learning_rate": 0.000490389874705845, + "loss": 0.5117, + "step": 73520 + }, + { + "epoch": 3.11, + "learning_rate": 0.0004903474739765524, + "loss": 0.4273, + "step": 73530 + }, + { + "epoch": 3.11, + "learning_rate": 0.0004903050732472598, + "loss": 0.5989, + "step": 73540 + }, + { + "epoch": 3.11, + "learning_rate": 0.0004902626725179674, + "loss": 0.4742, + "step": 73550 + }, + { + "epoch": 3.11, + "learning_rate": 0.0004902202717886748, + "loss": 0.5185, + "step": 73560 + }, + { + "epoch": 3.11, + "learning_rate": 0.0004901778710593822, + "loss": 0.5201, + "step": 73570 + }, + { + "epoch": 3.11, + "learning_rate": 0.0004901354703300897, + "loss": 0.5478, + "step": 73580 + }, + { + "epoch": 3.11, + "learning_rate": 0.0004900930696007972, + "loss": 0.4528, + "step": 73590 + }, + { + "epoch": 3.11, + "learning_rate": 0.0004900506688715046, + "loss": 0.5149, + "step": 73600 + }, + { + "epoch": 3.11, + "learning_rate": 0.0004900082681422121, + "loss": 0.5134, + "step": 73610 + }, + { + "epoch": 3.11, + "learning_rate": 0.0004899658674129195, + "loss": 0.5406, + "step": 73620 + }, + { + "epoch": 3.11, + "learning_rate": 0.000489923466683627, + "loss": 0.5407, + "step": 73630 + }, + { + "epoch": 3.11, + "learning_rate": 0.0004898810659543345, + "loss": 0.6125, + "step": 73640 + }, + { + "epoch": 3.11, + "learning_rate": 0.0004898386652250419, + "loss": 0.4722, + "step": 73650 + }, + { + "epoch": 3.11, + "learning_rate": 0.0004897962644957493, + "loss": 0.471, + "step": 73660 + }, + { + "epoch": 3.12, + "learning_rate": 0.0004897538637664568, + "loss": 0.4476, + "step": 73670 + }, + { + "epoch": 3.12, + "learning_rate": 0.0004897114630371643, + "loss": 0.5193, + "step": 73680 + }, + { + "epoch": 3.12, + "learning_rate": 0.0004896690623078717, + "loss": 0.5127, + "step": 73690 + }, + { + "epoch": 3.12, + "learning_rate": 0.0004896266615785792, + "loss": 0.4781, + "step": 73700 + }, + { + "epoch": 3.12, + "learning_rate": 0.0004895842608492867, + "loss": 0.4825, + "step": 73710 + }, + { + "epoch": 3.12, + "learning_rate": 0.0004895418601199941, + "loss": 0.5058, + "step": 73720 + }, + { + "epoch": 3.12, + "learning_rate": 0.0004894994593907016, + "loss": 0.5268, + "step": 73730 + }, + { + "epoch": 3.12, + "learning_rate": 0.000489457058661409, + "loss": 0.5138, + "step": 73740 + }, + { + "epoch": 3.12, + "learning_rate": 0.0004894146579321164, + "loss": 0.4676, + "step": 73750 + }, + { + "epoch": 3.12, + "learning_rate": 0.0004893722572028239, + "loss": 0.4773, + "step": 73760 + }, + { + "epoch": 3.12, + "learning_rate": 0.0004893298564735314, + "loss": 0.4538, + "step": 73770 + }, + { + "epoch": 3.12, + "learning_rate": 0.0004892874557442388, + "loss": 0.4822, + "step": 73780 + }, + { + "epoch": 3.12, + "learning_rate": 0.0004892450550149463, + "loss": 0.4474, + "step": 73790 + }, + { + "epoch": 3.12, + "learning_rate": 0.0004892026542856538, + "loss": 0.4985, + "step": 73800 + }, + { + "epoch": 3.12, + "learning_rate": 0.0004891602535563612, + "loss": 0.521, + "step": 73810 + }, + { + "epoch": 3.12, + "learning_rate": 0.0004891178528270686, + "loss": 0.5127, + "step": 73820 + }, + { + "epoch": 3.12, + "learning_rate": 0.000489075452097776, + "loss": 0.4665, + "step": 73830 + }, + { + "epoch": 3.12, + "learning_rate": 0.0004890330513684836, + "loss": 0.5128, + "step": 73840 + }, + { + "epoch": 3.12, + "learning_rate": 0.000488990650639191, + "loss": 0.5272, + "step": 73850 + }, + { + "epoch": 3.12, + "learning_rate": 0.0004889482499098984, + "loss": 0.423, + "step": 73860 + }, + { + "epoch": 3.12, + "learning_rate": 0.0004889058491806059, + "loss": 0.6059, + "step": 73870 + }, + { + "epoch": 3.12, + "learning_rate": 0.0004888634484513134, + "loss": 0.5059, + "step": 73880 + }, + { + "epoch": 3.12, + "learning_rate": 0.0004888210477220208, + "loss": 0.4635, + "step": 73890 + }, + { + "epoch": 3.13, + "learning_rate": 0.0004887786469927283, + "loss": 0.4782, + "step": 73900 + }, + { + "epoch": 3.13, + "learning_rate": 0.0004887362462634357, + "loss": 0.5022, + "step": 73910 + }, + { + "epoch": 3.13, + "learning_rate": 0.0004886938455341432, + "loss": 0.4395, + "step": 73920 + }, + { + "epoch": 3.13, + "learning_rate": 0.0004886514448048507, + "loss": 0.4682, + "step": 73930 + }, + { + "epoch": 3.13, + "learning_rate": 0.0004886090440755581, + "loss": 0.5286, + "step": 73940 + }, + { + "epoch": 3.13, + "learning_rate": 0.0004885666433462655, + "loss": 0.5875, + "step": 73950 + }, + { + "epoch": 3.13, + "learning_rate": 0.0004885242426169731, + "loss": 0.5407, + "step": 73960 + }, + { + "epoch": 3.13, + "learning_rate": 0.0004884818418876805, + "loss": 0.6945, + "step": 73970 + }, + { + "epoch": 3.13, + "learning_rate": 0.0004884394411583879, + "loss": 0.4727, + "step": 73980 + }, + { + "epoch": 3.13, + "learning_rate": 0.0004883970404290954, + "loss": 0.4344, + "step": 73990 + }, + { + "epoch": 3.13, + "learning_rate": 0.0004883546396998029, + "loss": 0.556, + "step": 74000 + }, + { + "epoch": 3.13, + "learning_rate": 0.0004883122389705103, + "loss": 0.582, + "step": 74010 + }, + { + "epoch": 3.13, + "learning_rate": 0.0004882698382412178, + "loss": 0.5813, + "step": 74020 + }, + { + "epoch": 3.13, + "learning_rate": 0.0004882274375119252, + "loss": 0.5199, + "step": 74030 + }, + { + "epoch": 3.13, + "learning_rate": 0.0004881850367826327, + "loss": 0.5599, + "step": 74040 + }, + { + "epoch": 3.13, + "learning_rate": 0.0004881426360533401, + "loss": 0.6021, + "step": 74050 + }, + { + "epoch": 3.13, + "learning_rate": 0.00048810023532404757, + "loss": 0.5208, + "step": 74060 + }, + { + "epoch": 3.13, + "learning_rate": 0.0004880578345947551, + "loss": 0.456, + "step": 74070 + }, + { + "epoch": 3.13, + "learning_rate": 0.00048801543386546245, + "loss": 0.496, + "step": 74080 + }, + { + "epoch": 3.13, + "learning_rate": 0.00048797303313616997, + "loss": 0.5465, + "step": 74090 + }, + { + "epoch": 3.13, + "learning_rate": 0.00048793063240687743, + "loss": 0.5174, + "step": 74100 + }, + { + "epoch": 3.13, + "learning_rate": 0.00048788823167758485, + "loss": 0.5012, + "step": 74110 + }, + { + "epoch": 3.13, + "learning_rate": 0.0004878458309482923, + "loss": 0.5082, + "step": 74120 + }, + { + "epoch": 3.13, + "learning_rate": 0.00048780343021899983, + "loss": 0.5289, + "step": 74130 + }, + { + "epoch": 3.14, + "learning_rate": 0.0004877610294897072, + "loss": 0.5438, + "step": 74140 + }, + { + "epoch": 3.14, + "learning_rate": 0.0004877186287604147, + "loss": 0.529, + "step": 74150 + }, + { + "epoch": 3.14, + "learning_rate": 0.0004876762280311222, + "loss": 0.5028, + "step": 74160 + }, + { + "epoch": 3.14, + "learning_rate": 0.0004876338273018296, + "loss": 0.5482, + "step": 74170 + }, + { + "epoch": 3.14, + "learning_rate": 0.00048759142657253706, + "loss": 0.4759, + "step": 74180 + }, + { + "epoch": 3.14, + "learning_rate": 0.0004875490258432446, + "loss": 0.5872, + "step": 74190 + }, + { + "epoch": 3.14, + "learning_rate": 0.00048750662511395194, + "loss": 0.5212, + "step": 74200 + }, + { + "epoch": 3.14, + "learning_rate": 0.00048746422438465946, + "loss": 0.4995, + "step": 74210 + }, + { + "epoch": 3.14, + "learning_rate": 0.00048742182365536693, + "loss": 0.543, + "step": 74220 + }, + { + "epoch": 3.14, + "learning_rate": 0.00048737942292607434, + "loss": 0.5652, + "step": 74230 + }, + { + "epoch": 3.14, + "learning_rate": 0.0004873370221967818, + "loss": 0.4894, + "step": 74240 + }, + { + "epoch": 3.14, + "learning_rate": 0.0004872946214674893, + "loss": 0.4258, + "step": 74250 + }, + { + "epoch": 3.14, + "learning_rate": 0.0004872522207381967, + "loss": 0.5535, + "step": 74260 + }, + { + "epoch": 3.14, + "learning_rate": 0.0004872098200089042, + "loss": 0.5712, + "step": 74270 + }, + { + "epoch": 3.14, + "learning_rate": 0.0004871674192796117, + "loss": 0.5439, + "step": 74280 + }, + { + "epoch": 3.14, + "learning_rate": 0.0004871250185503191, + "loss": 0.5796, + "step": 74290 + }, + { + "epoch": 3.14, + "learning_rate": 0.00048708261782102655, + "loss": 0.4422, + "step": 74300 + }, + { + "epoch": 3.14, + "learning_rate": 0.000487040217091734, + "loss": 0.5219, + "step": 74310 + }, + { + "epoch": 3.14, + "learning_rate": 0.00048699781636244143, + "loss": 0.5136, + "step": 74320 + }, + { + "epoch": 3.14, + "learning_rate": 0.0004869554156331489, + "loss": 0.5618, + "step": 74330 + }, + { + "epoch": 3.14, + "learning_rate": 0.0004869130149038564, + "loss": 0.5964, + "step": 74340 + }, + { + "epoch": 3.14, + "learning_rate": 0.00048687061417456383, + "loss": 0.4471, + "step": 74350 + }, + { + "epoch": 3.14, + "learning_rate": 0.0004868282134452713, + "loss": 0.5655, + "step": 74360 + }, + { + "epoch": 3.15, + "learning_rate": 0.0004867858127159787, + "loss": 0.5107, + "step": 74370 + }, + { + "epoch": 3.15, + "learning_rate": 0.0004867434119866862, + "loss": 0.5547, + "step": 74380 + }, + { + "epoch": 3.15, + "learning_rate": 0.00048670101125739365, + "loss": 0.5588, + "step": 74390 + }, + { + "epoch": 3.15, + "learning_rate": 0.00048665861052810106, + "loss": 0.5385, + "step": 74400 + }, + { + "epoch": 3.15, + "learning_rate": 0.0004866162097988086, + "loss": 0.4729, + "step": 74410 + }, + { + "epoch": 3.15, + "learning_rate": 0.00048657380906951605, + "loss": 0.472, + "step": 74420 + }, + { + "epoch": 3.15, + "learning_rate": 0.00048653140834022346, + "loss": 0.4596, + "step": 74430 + }, + { + "epoch": 3.15, + "learning_rate": 0.0004864890076109309, + "loss": 0.517, + "step": 74440 + }, + { + "epoch": 3.15, + "learning_rate": 0.0004864466068816384, + "loss": 0.5981, + "step": 74450 + }, + { + "epoch": 3.15, + "learning_rate": 0.0004864042061523458, + "loss": 0.5713, + "step": 74460 + }, + { + "epoch": 3.15, + "learning_rate": 0.00048636180542305327, + "loss": 0.4909, + "step": 74470 + }, + { + "epoch": 3.15, + "learning_rate": 0.0004863194046937608, + "loss": 0.5372, + "step": 74480 + }, + { + "epoch": 3.15, + "learning_rate": 0.0004862770039644682, + "loss": 0.528, + "step": 74490 + }, + { + "epoch": 3.15, + "learning_rate": 0.00048623460323517567, + "loss": 0.4746, + "step": 74500 + }, + { + "epoch": 3.15, + "learning_rate": 0.00048619220250588314, + "loss": 0.5438, + "step": 74510 + }, + { + "epoch": 3.15, + "learning_rate": 0.00048614980177659055, + "loss": 0.5697, + "step": 74520 + }, + { + "epoch": 3.15, + "learning_rate": 0.000486107401047298, + "loss": 0.4827, + "step": 74530 + }, + { + "epoch": 3.15, + "learning_rate": 0.00048606500031800554, + "loss": 0.5983, + "step": 74540 + }, + { + "epoch": 3.15, + "learning_rate": 0.0004860225995887129, + "loss": 0.6071, + "step": 74550 + }, + { + "epoch": 3.15, + "learning_rate": 0.0004859801988594204, + "loss": 0.5053, + "step": 74560 + }, + { + "epoch": 3.15, + "learning_rate": 0.0004859377981301279, + "loss": 0.6286, + "step": 74570 + }, + { + "epoch": 3.15, + "learning_rate": 0.0004858953974008353, + "loss": 0.4561, + "step": 74580 + }, + { + "epoch": 3.15, + "learning_rate": 0.00048585299667154276, + "loss": 0.5321, + "step": 74590 + }, + { + "epoch": 3.15, + "learning_rate": 0.0004858105959422503, + "loss": 0.5497, + "step": 74600 + }, + { + "epoch": 3.16, + "learning_rate": 0.00048576819521295764, + "loss": 0.4808, + "step": 74610 + }, + { + "epoch": 3.16, + "learning_rate": 0.00048572579448366516, + "loss": 0.5256, + "step": 74620 + }, + { + "epoch": 3.16, + "learning_rate": 0.00048568339375437263, + "loss": 0.535, + "step": 74630 + }, + { + "epoch": 3.16, + "learning_rate": 0.00048564099302508004, + "loss": 0.4878, + "step": 74640 + }, + { + "epoch": 3.16, + "learning_rate": 0.0004855985922957875, + "loss": 0.5045, + "step": 74650 + }, + { + "epoch": 3.16, + "learning_rate": 0.00048555619156649503, + "loss": 0.5911, + "step": 74660 + }, + { + "epoch": 3.16, + "learning_rate": 0.0004855137908372024, + "loss": 0.4426, + "step": 74670 + }, + { + "epoch": 3.16, + "learning_rate": 0.0004854713901079099, + "loss": 0.4799, + "step": 74680 + }, + { + "epoch": 3.16, + "learning_rate": 0.00048542898937861727, + "loss": 0.5388, + "step": 74690 + }, + { + "epoch": 3.16, + "learning_rate": 0.0004853865886493248, + "loss": 0.4935, + "step": 74700 + }, + { + "epoch": 3.16, + "learning_rate": 0.00048534418792003226, + "loss": 0.4264, + "step": 74710 + }, + { + "epoch": 3.16, + "learning_rate": 0.00048530178719073967, + "loss": 0.5304, + "step": 74720 + }, + { + "epoch": 3.16, + "learning_rate": 0.00048525938646144714, + "loss": 0.4217, + "step": 74730 + }, + { + "epoch": 3.16, + "learning_rate": 0.00048521698573215466, + "loss": 0.5422, + "step": 74740 + }, + { + "epoch": 3.16, + "learning_rate": 0.000485174585002862, + "loss": 0.4844, + "step": 74750 + }, + { + "epoch": 3.16, + "learning_rate": 0.00048513218427356954, + "loss": 0.4889, + "step": 74760 + }, + { + "epoch": 3.16, + "learning_rate": 0.000485089783544277, + "loss": 0.4848, + "step": 74770 + }, + { + "epoch": 3.16, + "learning_rate": 0.0004850473828149844, + "loss": 0.5355, + "step": 74780 + }, + { + "epoch": 3.16, + "learning_rate": 0.0004850049820856919, + "loss": 0.5225, + "step": 74790 + }, + { + "epoch": 3.16, + "learning_rate": 0.0004849625813563994, + "loss": 0.4961, + "step": 74800 + }, + { + "epoch": 3.16, + "learning_rate": 0.00048492018062710676, + "loss": 0.5388, + "step": 74810 + }, + { + "epoch": 3.16, + "learning_rate": 0.0004848777798978143, + "loss": 0.4639, + "step": 74820 + }, + { + "epoch": 3.16, + "learning_rate": 0.00048483537916852175, + "loss": 0.5121, + "step": 74830 + }, + { + "epoch": 3.16, + "learning_rate": 0.00048479297843922916, + "loss": 0.518, + "step": 74840 + }, + { + "epoch": 3.17, + "learning_rate": 0.00048475057770993663, + "loss": 0.4298, + "step": 74850 + }, + { + "epoch": 3.17, + "learning_rate": 0.00048470817698064415, + "loss": 0.4589, + "step": 74860 + }, + { + "epoch": 3.17, + "learning_rate": 0.0004846657762513515, + "loss": 0.4803, + "step": 74870 + }, + { + "epoch": 3.17, + "learning_rate": 0.00048462337552205903, + "loss": 0.4676, + "step": 74880 + }, + { + "epoch": 3.17, + "learning_rate": 0.0004845809747927665, + "loss": 0.5189, + "step": 74890 + }, + { + "epoch": 3.17, + "learning_rate": 0.0004845385740634739, + "loss": 0.4613, + "step": 74900 + }, + { + "epoch": 3.17, + "learning_rate": 0.0004844961733341814, + "loss": 0.4898, + "step": 74910 + }, + { + "epoch": 3.17, + "learning_rate": 0.00048445377260488884, + "loss": 0.4845, + "step": 74920 + }, + { + "epoch": 3.17, + "learning_rate": 0.00048441137187559626, + "loss": 0.4895, + "step": 74930 + }, + { + "epoch": 3.17, + "learning_rate": 0.0004843689711463038, + "loss": 0.5075, + "step": 74940 + }, + { + "epoch": 3.17, + "learning_rate": 0.00048432657041701124, + "loss": 0.4986, + "step": 74950 + }, + { + "epoch": 3.17, + "learning_rate": 0.00048428416968771866, + "loss": 0.5302, + "step": 74960 + }, + { + "epoch": 3.17, + "learning_rate": 0.0004842417689584261, + "loss": 0.539, + "step": 74970 + }, + { + "epoch": 3.17, + "learning_rate": 0.00048419936822913354, + "loss": 0.6087, + "step": 74980 + }, + { + "epoch": 3.17, + "learning_rate": 0.000484156967499841, + "loss": 0.5667, + "step": 74990 + }, + { + "epoch": 3.17, + "learning_rate": 0.00048411456677054847, + "loss": 0.5784, + "step": 75000 + }, + { + "epoch": 3.17, + "learning_rate": 0.0004840721660412559, + "loss": 0.486, + "step": 75010 + }, + { + "epoch": 3.17, + "learning_rate": 0.0004840297653119634, + "loss": 0.5271, + "step": 75020 + }, + { + "epoch": 3.17, + "learning_rate": 0.00048398736458267087, + "loss": 0.4607, + "step": 75030 + }, + { + "epoch": 3.17, + "learning_rate": 0.0004839449638533783, + "loss": 0.5144, + "step": 75040 + }, + { + "epoch": 3.17, + "learning_rate": 0.00048390256312408575, + "loss": 0.4709, + "step": 75050 + }, + { + "epoch": 3.17, + "learning_rate": 0.0004838601623947932, + "loss": 0.5076, + "step": 75060 + }, + { + "epoch": 3.17, + "learning_rate": 0.00048381776166550063, + "loss": 0.476, + "step": 75070 + }, + { + "epoch": 3.18, + "learning_rate": 0.0004837753609362081, + "loss": 0.4717, + "step": 75080 + }, + { + "epoch": 3.18, + "learning_rate": 0.0004837329602069156, + "loss": 0.472, + "step": 75090 + }, + { + "epoch": 3.18, + "learning_rate": 0.00048369055947762303, + "loss": 0.5083, + "step": 75100 + }, + { + "epoch": 3.18, + "learning_rate": 0.0004836481587483305, + "loss": 0.4591, + "step": 75110 + }, + { + "epoch": 3.18, + "learning_rate": 0.00048360575801903796, + "loss": 0.5677, + "step": 75120 + }, + { + "epoch": 3.18, + "learning_rate": 0.0004835633572897454, + "loss": 0.5859, + "step": 75130 + }, + { + "epoch": 3.18, + "learning_rate": 0.00048352095656045284, + "loss": 0.553, + "step": 75140 + }, + { + "epoch": 3.18, + "learning_rate": 0.00048347855583116036, + "loss": 0.5696, + "step": 75150 + }, + { + "epoch": 3.18, + "learning_rate": 0.0004834361551018678, + "loss": 0.5231, + "step": 75160 + }, + { + "epoch": 3.18, + "learning_rate": 0.00048339375437257524, + "loss": 0.5804, + "step": 75170 + }, + { + "epoch": 3.18, + "learning_rate": 0.0004833513536432827, + "loss": 0.5019, + "step": 75180 + }, + { + "epoch": 3.18, + "learning_rate": 0.0004833089529139901, + "loss": 0.5516, + "step": 75190 + }, + { + "epoch": 3.18, + "learning_rate": 0.0004832665521846976, + "loss": 0.528, + "step": 75200 + }, + { + "epoch": 3.18, + "learning_rate": 0.0004832241514554051, + "loss": 0.4757, + "step": 75210 + }, + { + "epoch": 3.18, + "learning_rate": 0.00048318175072611247, + "loss": 0.5318, + "step": 75220 + }, + { + "epoch": 3.18, + "learning_rate": 0.00048313934999682, + "loss": 0.5559, + "step": 75230 + }, + { + "epoch": 3.18, + "learning_rate": 0.00048309694926752746, + "loss": 0.633, + "step": 75240 + }, + { + "epoch": 3.18, + "learning_rate": 0.00048305454853823487, + "loss": 0.5745, + "step": 75250 + }, + { + "epoch": 3.18, + "learning_rate": 0.00048301214780894233, + "loss": 0.4618, + "step": 75260 + }, + { + "epoch": 3.18, + "learning_rate": 0.00048296974707964986, + "loss": 0.4105, + "step": 75270 + }, + { + "epoch": 3.18, + "learning_rate": 0.0004829273463503572, + "loss": 0.552, + "step": 75280 + }, + { + "epoch": 3.18, + "learning_rate": 0.00048288494562106474, + "loss": 0.63, + "step": 75290 + }, + { + "epoch": 3.18, + "learning_rate": 0.0004828425448917721, + "loss": 0.5927, + "step": 75300 + }, + { + "epoch": 3.18, + "learning_rate": 0.0004828001441624796, + "loss": 0.5353, + "step": 75310 + }, + { + "epoch": 3.19, + "learning_rate": 0.0004827577434331871, + "loss": 0.4562, + "step": 75320 + }, + { + "epoch": 3.19, + "learning_rate": 0.0004827153427038945, + "loss": 0.4668, + "step": 75330 + }, + { + "epoch": 3.19, + "learning_rate": 0.00048267294197460196, + "loss": 0.4971, + "step": 75340 + }, + { + "epoch": 3.19, + "learning_rate": 0.0004826305412453095, + "loss": 0.5424, + "step": 75350 + }, + { + "epoch": 3.19, + "learning_rate": 0.00048258814051601684, + "loss": 0.5036, + "step": 75360 + }, + { + "epoch": 3.19, + "learning_rate": 0.00048254573978672436, + "loss": 0.53, + "step": 75370 + }, + { + "epoch": 3.19, + "learning_rate": 0.00048250333905743183, + "loss": 0.5401, + "step": 75380 + }, + { + "epoch": 3.19, + "learning_rate": 0.00048246093832813924, + "loss": 0.5064, + "step": 75390 + }, + { + "epoch": 3.19, + "learning_rate": 0.0004824185375988467, + "loss": 0.5401, + "step": 75400 + }, + { + "epoch": 3.19, + "learning_rate": 0.00048237613686955423, + "loss": 0.5111, + "step": 75410 + }, + { + "epoch": 3.19, + "learning_rate": 0.0004823337361402616, + "loss": 0.543, + "step": 75420 + }, + { + "epoch": 3.19, + "learning_rate": 0.0004822913354109691, + "loss": 0.5015, + "step": 75430 + }, + { + "epoch": 3.19, + "learning_rate": 0.0004822489346816766, + "loss": 0.4696, + "step": 75440 + }, + { + "epoch": 3.19, + "learning_rate": 0.000482206533952384, + "loss": 0.5638, + "step": 75450 + }, + { + "epoch": 3.19, + "learning_rate": 0.00048216413322309145, + "loss": 0.5414, + "step": 75460 + }, + { + "epoch": 3.19, + "learning_rate": 0.000482121732493799, + "loss": 0.4752, + "step": 75470 + }, + { + "epoch": 3.19, + "learning_rate": 0.00048207933176450633, + "loss": 0.5112, + "step": 75480 + }, + { + "epoch": 3.19, + "learning_rate": 0.00048203693103521385, + "loss": 0.5937, + "step": 75490 + }, + { + "epoch": 3.19, + "learning_rate": 0.0004819945303059213, + "loss": 0.5636, + "step": 75500 + }, + { + "epoch": 3.19, + "learning_rate": 0.00048195212957662873, + "loss": 0.4162, + "step": 75510 + }, + { + "epoch": 3.19, + "learning_rate": 0.0004819097288473362, + "loss": 0.5701, + "step": 75520 + }, + { + "epoch": 3.19, + "learning_rate": 0.00048186732811804367, + "loss": 0.5312, + "step": 75530 + }, + { + "epoch": 3.19, + "learning_rate": 0.0004818249273887511, + "loss": 0.4948, + "step": 75540 + }, + { + "epoch": 3.19, + "learning_rate": 0.0004817825266594586, + "loss": 0.5552, + "step": 75550 + }, + { + "epoch": 3.2, + "learning_rate": 0.00048174012593016607, + "loss": 0.5866, + "step": 75560 + }, + { + "epoch": 3.2, + "learning_rate": 0.0004816977252008735, + "loss": 0.5222, + "step": 75570 + }, + { + "epoch": 3.2, + "learning_rate": 0.00048165532447158095, + "loss": 0.5167, + "step": 75580 + }, + { + "epoch": 3.2, + "learning_rate": 0.00048161292374228836, + "loss": 0.5507, + "step": 75590 + }, + { + "epoch": 3.2, + "learning_rate": 0.0004815705230129958, + "loss": 0.5202, + "step": 75600 + }, + { + "epoch": 3.2, + "learning_rate": 0.00048152812228370335, + "loss": 0.4746, + "step": 75610 + }, + { + "epoch": 3.2, + "learning_rate": 0.0004814857215544107, + "loss": 0.5439, + "step": 75620 + }, + { + "epoch": 3.2, + "learning_rate": 0.0004814433208251182, + "loss": 0.5818, + "step": 75630 + }, + { + "epoch": 3.2, + "learning_rate": 0.0004814009200958257, + "loss": 0.4889, + "step": 75640 + }, + { + "epoch": 3.2, + "learning_rate": 0.0004813585193665331, + "loss": 0.549, + "step": 75650 + }, + { + "epoch": 3.2, + "learning_rate": 0.00048131611863724057, + "loss": 0.4992, + "step": 75660 + }, + { + "epoch": 3.2, + "learning_rate": 0.00048127371790794804, + "loss": 0.4875, + "step": 75670 + }, + { + "epoch": 3.2, + "learning_rate": 0.00048123131717865545, + "loss": 0.54, + "step": 75680 + }, + { + "epoch": 3.2, + "learning_rate": 0.000481188916449363, + "loss": 0.5764, + "step": 75690 + }, + { + "epoch": 3.2, + "learning_rate": 0.00048114651572007044, + "loss": 0.4574, + "step": 75700 + }, + { + "epoch": 3.2, + "learning_rate": 0.00048110411499077785, + "loss": 0.4398, + "step": 75710 + }, + { + "epoch": 3.2, + "learning_rate": 0.0004810617142614853, + "loss": 0.5258, + "step": 75720 + }, + { + "epoch": 3.2, + "learning_rate": 0.0004810193135321928, + "loss": 0.5225, + "step": 75730 + }, + { + "epoch": 3.2, + "learning_rate": 0.0004809769128029002, + "loss": 0.5353, + "step": 75740 + }, + { + "epoch": 3.2, + "learning_rate": 0.00048093451207360767, + "loss": 0.4473, + "step": 75750 + }, + { + "epoch": 3.2, + "learning_rate": 0.0004808921113443152, + "loss": 0.4724, + "step": 75760 + }, + { + "epoch": 3.2, + "learning_rate": 0.0004808497106150226, + "loss": 0.5771, + "step": 75770 + }, + { + "epoch": 3.2, + "learning_rate": 0.00048080730988573007, + "loss": 0.585, + "step": 75780 + }, + { + "epoch": 3.21, + "learning_rate": 0.00048076490915643753, + "loss": 0.6031, + "step": 75790 + }, + { + "epoch": 3.21, + "learning_rate": 0.00048072250842714495, + "loss": 0.6068, + "step": 75800 + }, + { + "epoch": 3.21, + "learning_rate": 0.0004806801076978524, + "loss": 0.5503, + "step": 75810 + }, + { + "epoch": 3.21, + "learning_rate": 0.00048063770696855993, + "loss": 0.4862, + "step": 75820 + }, + { + "epoch": 3.21, + "learning_rate": 0.0004805953062392673, + "loss": 0.5818, + "step": 75830 + }, + { + "epoch": 3.21, + "learning_rate": 0.0004805529055099748, + "loss": 0.4202, + "step": 75840 + }, + { + "epoch": 3.21, + "learning_rate": 0.0004805105047806823, + "loss": 0.5186, + "step": 75850 + }, + { + "epoch": 3.21, + "learning_rate": 0.0004804681040513897, + "loss": 0.498, + "step": 75860 + }, + { + "epoch": 3.21, + "learning_rate": 0.00048042570332209716, + "loss": 0.6796, + "step": 75870 + }, + { + "epoch": 3.21, + "learning_rate": 0.0004803833025928047, + "loss": 0.5047, + "step": 75880 + }, + { + "epoch": 3.21, + "learning_rate": 0.00048034090186351204, + "loss": 0.5118, + "step": 75890 + }, + { + "epoch": 3.21, + "learning_rate": 0.00048029850113421956, + "loss": 0.5146, + "step": 75900 + }, + { + "epoch": 3.21, + "learning_rate": 0.00048025610040492697, + "loss": 0.5572, + "step": 75910 + }, + { + "epoch": 3.21, + "learning_rate": 0.00048021369967563444, + "loss": 0.4548, + "step": 75920 + }, + { + "epoch": 3.21, + "learning_rate": 0.0004801712989463419, + "loss": 0.375, + "step": 75930 + }, + { + "epoch": 3.21, + "learning_rate": 0.0004801288982170493, + "loss": 0.4809, + "step": 75940 + }, + { + "epoch": 3.21, + "learning_rate": 0.0004800864974877568, + "loss": 0.5234, + "step": 75950 + }, + { + "epoch": 3.21, + "learning_rate": 0.0004800440967584643, + "loss": 0.433, + "step": 75960 + }, + { + "epoch": 3.21, + "learning_rate": 0.00048000169602917166, + "loss": 0.5337, + "step": 75970 + }, + { + "epoch": 3.21, + "learning_rate": 0.0004799592952998792, + "loss": 0.5185, + "step": 75980 + }, + { + "epoch": 3.21, + "learning_rate": 0.00047991689457058665, + "loss": 0.5264, + "step": 75990 + }, + { + "epoch": 3.21, + "learning_rate": 0.00047987449384129406, + "loss": 0.51, + "step": 76000 + }, + { + "epoch": 3.21, + "learning_rate": 0.00047983209311200153, + "loss": 0.4856, + "step": 76010 + }, + { + "epoch": 3.21, + "learning_rate": 0.00047978969238270905, + "loss": 0.515, + "step": 76020 + }, + { + "epoch": 3.22, + "learning_rate": 0.0004797472916534164, + "loss": 0.5146, + "step": 76030 + }, + { + "epoch": 3.22, + "learning_rate": 0.00047970489092412393, + "loss": 0.5528, + "step": 76040 + }, + { + "epoch": 3.22, + "learning_rate": 0.0004796624901948314, + "loss": 0.528, + "step": 76050 + }, + { + "epoch": 3.22, + "learning_rate": 0.0004796200894655388, + "loss": 0.476, + "step": 76060 + }, + { + "epoch": 3.22, + "learning_rate": 0.0004795776887362463, + "loss": 0.5561, + "step": 76070 + }, + { + "epoch": 3.22, + "learning_rate": 0.0004795352880069538, + "loss": 0.538, + "step": 76080 + }, + { + "epoch": 3.22, + "learning_rate": 0.00047949288727766116, + "loss": 0.4592, + "step": 76090 + }, + { + "epoch": 3.22, + "learning_rate": 0.0004794504865483687, + "loss": 0.4291, + "step": 76100 + }, + { + "epoch": 3.22, + "learning_rate": 0.00047940808581907614, + "loss": 0.5123, + "step": 76110 + }, + { + "epoch": 3.22, + "learning_rate": 0.00047936568508978356, + "loss": 0.5434, + "step": 76120 + }, + { + "epoch": 3.22, + "learning_rate": 0.000479323284360491, + "loss": 0.4747, + "step": 76130 + }, + { + "epoch": 3.22, + "learning_rate": 0.00047928088363119854, + "loss": 0.4645, + "step": 76140 + }, + { + "epoch": 3.22, + "learning_rate": 0.0004792384829019059, + "loss": 0.5325, + "step": 76150 + }, + { + "epoch": 3.22, + "learning_rate": 0.0004791960821726134, + "loss": 0.6031, + "step": 76160 + }, + { + "epoch": 3.22, + "learning_rate": 0.0004791536814433209, + "loss": 0.4842, + "step": 76170 + }, + { + "epoch": 3.22, + "learning_rate": 0.0004791112807140283, + "loss": 0.4916, + "step": 76180 + }, + { + "epoch": 3.22, + "learning_rate": 0.00047906887998473577, + "loss": 0.6312, + "step": 76190 + }, + { + "epoch": 3.22, + "learning_rate": 0.0004790264792554432, + "loss": 0.4754, + "step": 76200 + }, + { + "epoch": 3.22, + "learning_rate": 0.00047898407852615065, + "loss": 0.4842, + "step": 76210 + }, + { + "epoch": 3.22, + "learning_rate": 0.00047894167779685817, + "loss": 0.4404, + "step": 76220 + }, + { + "epoch": 3.22, + "learning_rate": 0.00047889927706756553, + "loss": 0.4689, + "step": 76230 + }, + { + "epoch": 3.22, + "learning_rate": 0.00047885687633827305, + "loss": 0.4999, + "step": 76240 + }, + { + "epoch": 3.22, + "learning_rate": 0.0004788144756089805, + "loss": 0.4727, + "step": 76250 + }, + { + "epoch": 3.22, + "learning_rate": 0.00047877207487968793, + "loss": 0.4853, + "step": 76260 + }, + { + "epoch": 3.23, + "learning_rate": 0.0004787296741503954, + "loss": 0.528, + "step": 76270 + }, + { + "epoch": 3.23, + "learning_rate": 0.00047868727342110286, + "loss": 0.5291, + "step": 76280 + }, + { + "epoch": 3.23, + "learning_rate": 0.0004786448726918103, + "loss": 0.5253, + "step": 76290 + }, + { + "epoch": 3.23, + "learning_rate": 0.0004786024719625178, + "loss": 0.412, + "step": 76300 + }, + { + "epoch": 3.23, + "learning_rate": 0.00047856007123322526, + "loss": 0.5087, + "step": 76310 + }, + { + "epoch": 3.23, + "learning_rate": 0.0004785176705039327, + "loss": 0.4515, + "step": 76320 + }, + { + "epoch": 3.23, + "learning_rate": 0.00047847526977464014, + "loss": 0.5723, + "step": 76330 + }, + { + "epoch": 3.23, + "learning_rate": 0.0004784328690453476, + "loss": 0.601, + "step": 76340 + }, + { + "epoch": 3.23, + "learning_rate": 0.000478390468316055, + "loss": 0.5171, + "step": 76350 + }, + { + "epoch": 3.23, + "learning_rate": 0.00047834806758676254, + "loss": 0.4414, + "step": 76360 + }, + { + "epoch": 3.23, + "learning_rate": 0.00047830566685747, + "loss": 0.5525, + "step": 76370 + }, + { + "epoch": 3.23, + "learning_rate": 0.0004782632661281774, + "loss": 0.5213, + "step": 76380 + }, + { + "epoch": 3.23, + "learning_rate": 0.0004782208653988849, + "loss": 0.5814, + "step": 76390 + }, + { + "epoch": 3.23, + "learning_rate": 0.00047817846466959236, + "loss": 0.4739, + "step": 76400 + }, + { + "epoch": 3.23, + "learning_rate": 0.00047813606394029977, + "loss": 0.5884, + "step": 76410 + }, + { + "epoch": 3.23, + "learning_rate": 0.00047809366321100724, + "loss": 0.5032, + "step": 76420 + }, + { + "epoch": 3.23, + "learning_rate": 0.00047805126248171476, + "loss": 0.4679, + "step": 76430 + }, + { + "epoch": 3.23, + "learning_rate": 0.00047800886175242217, + "loss": 0.3853, + "step": 76440 + }, + { + "epoch": 3.23, + "learning_rate": 0.00047796646102312964, + "loss": 0.5355, + "step": 76450 + }, + { + "epoch": 3.23, + "learning_rate": 0.0004779240602938371, + "loss": 0.5662, + "step": 76460 + }, + { + "epoch": 3.23, + "learning_rate": 0.0004778816595645445, + "loss": 0.4942, + "step": 76470 + }, + { + "epoch": 3.23, + "learning_rate": 0.000477839258835252, + "loss": 0.4424, + "step": 76480 + }, + { + "epoch": 3.23, + "learning_rate": 0.0004777968581059595, + "loss": 0.4922, + "step": 76490 + }, + { + "epoch": 3.24, + "learning_rate": 0.00047775445737666686, + "loss": 0.5414, + "step": 76500 + }, + { + "epoch": 3.24, + "learning_rate": 0.0004777120566473744, + "loss": 0.5683, + "step": 76510 + }, + { + "epoch": 3.24, + "learning_rate": 0.0004776696559180818, + "loss": 0.5588, + "step": 76520 + }, + { + "epoch": 3.24, + "learning_rate": 0.00047762725518878926, + "loss": 0.4974, + "step": 76530 + }, + { + "epoch": 3.24, + "learning_rate": 0.00047758485445949673, + "loss": 0.5487, + "step": 76540 + }, + { + "epoch": 3.24, + "learning_rate": 0.00047754245373020414, + "loss": 0.421, + "step": 76550 + }, + { + "epoch": 3.24, + "learning_rate": 0.0004775000530009116, + "loss": 0.5916, + "step": 76560 + }, + { + "epoch": 3.24, + "learning_rate": 0.00047745765227161913, + "loss": 0.4629, + "step": 76570 + }, + { + "epoch": 3.24, + "learning_rate": 0.0004774152515423265, + "loss": 0.5329, + "step": 76580 + }, + { + "epoch": 3.24, + "learning_rate": 0.000477372850813034, + "loss": 0.5355, + "step": 76590 + }, + { + "epoch": 3.24, + "learning_rate": 0.0004773304500837415, + "loss": 0.572, + "step": 76600 + }, + { + "epoch": 3.24, + "learning_rate": 0.0004772880493544489, + "loss": 0.5435, + "step": 76610 + }, + { + "epoch": 3.24, + "learning_rate": 0.00047724564862515635, + "loss": 0.5154, + "step": 76620 + }, + { + "epoch": 3.24, + "learning_rate": 0.0004772032478958639, + "loss": 0.5073, + "step": 76630 + }, + { + "epoch": 3.24, + "learning_rate": 0.00047716084716657123, + "loss": 0.509, + "step": 76640 + }, + { + "epoch": 3.24, + "learning_rate": 0.00047711844643727875, + "loss": 0.5813, + "step": 76650 + }, + { + "epoch": 3.24, + "learning_rate": 0.0004770760457079862, + "loss": 0.5699, + "step": 76660 + }, + { + "epoch": 3.24, + "learning_rate": 0.00047703364497869363, + "loss": 0.5242, + "step": 76670 + }, + { + "epoch": 3.24, + "learning_rate": 0.0004769912442494011, + "loss": 0.5007, + "step": 76680 + }, + { + "epoch": 3.24, + "learning_rate": 0.0004769488435201086, + "loss": 0.4746, + "step": 76690 + }, + { + "epoch": 3.24, + "learning_rate": 0.000476906442790816, + "loss": 0.43, + "step": 76700 + }, + { + "epoch": 3.24, + "learning_rate": 0.0004768640420615235, + "loss": 0.5566, + "step": 76710 + }, + { + "epoch": 3.24, + "learning_rate": 0.00047682164133223097, + "loss": 0.5325, + "step": 76720 + }, + { + "epoch": 3.24, + "learning_rate": 0.0004767792406029384, + "loss": 0.4324, + "step": 76730 + }, + { + "epoch": 3.25, + "learning_rate": 0.00047673683987364585, + "loss": 0.4257, + "step": 76740 + }, + { + "epoch": 3.25, + "learning_rate": 0.00047669443914435337, + "loss": 0.5847, + "step": 76750 + }, + { + "epoch": 3.25, + "learning_rate": 0.0004766520384150607, + "loss": 0.5264, + "step": 76760 + }, + { + "epoch": 3.25, + "learning_rate": 0.00047660963768576825, + "loss": 0.4188, + "step": 76770 + }, + { + "epoch": 3.25, + "learning_rate": 0.0004765672369564757, + "loss": 0.5866, + "step": 76780 + }, + { + "epoch": 3.25, + "learning_rate": 0.00047652483622718313, + "loss": 0.5252, + "step": 76790 + }, + { + "epoch": 3.25, + "learning_rate": 0.0004764824354978906, + "loss": 0.5626, + "step": 76800 + }, + { + "epoch": 3.25, + "learning_rate": 0.00047644003476859806, + "loss": 0.5342, + "step": 76810 + }, + { + "epoch": 3.25, + "learning_rate": 0.0004763976340393055, + "loss": 0.4544, + "step": 76820 + }, + { + "epoch": 3.25, + "learning_rate": 0.000476355233310013, + "loss": 0.5763, + "step": 76830 + }, + { + "epoch": 3.25, + "learning_rate": 0.00047631283258072035, + "loss": 0.5174, + "step": 76840 + }, + { + "epoch": 3.25, + "learning_rate": 0.0004762704318514279, + "loss": 0.4455, + "step": 76850 + }, + { + "epoch": 3.25, + "learning_rate": 0.00047622803112213534, + "loss": 0.4155, + "step": 76860 + }, + { + "epoch": 3.25, + "learning_rate": 0.00047618563039284275, + "loss": 0.5356, + "step": 76870 + }, + { + "epoch": 3.25, + "learning_rate": 0.0004761432296635502, + "loss": 0.5383, + "step": 76880 + }, + { + "epoch": 3.25, + "learning_rate": 0.00047610082893425774, + "loss": 0.6553, + "step": 76890 + }, + { + "epoch": 3.25, + "learning_rate": 0.0004760584282049651, + "loss": 0.4864, + "step": 76900 + }, + { + "epoch": 3.25, + "learning_rate": 0.0004760160274756726, + "loss": 0.509, + "step": 76910 + }, + { + "epoch": 3.25, + "learning_rate": 0.0004759736267463801, + "loss": 0.4601, + "step": 76920 + }, + { + "epoch": 3.25, + "learning_rate": 0.0004759312260170875, + "loss": 0.5248, + "step": 76930 + }, + { + "epoch": 3.25, + "learning_rate": 0.00047588882528779497, + "loss": 0.5178, + "step": 76940 + }, + { + "epoch": 3.25, + "learning_rate": 0.00047584642455850243, + "loss": 0.6241, + "step": 76950 + }, + { + "epoch": 3.25, + "learning_rate": 0.00047580402382920985, + "loss": 0.5301, + "step": 76960 + }, + { + "epoch": 3.25, + "learning_rate": 0.00047576162309991737, + "loss": 0.4639, + "step": 76970 + }, + { + "epoch": 3.26, + "learning_rate": 0.00047571922237062483, + "loss": 0.4565, + "step": 76980 + }, + { + "epoch": 3.26, + "learning_rate": 0.00047567682164133225, + "loss": 0.4754, + "step": 76990 + }, + { + "epoch": 3.26, + "learning_rate": 0.0004756344209120397, + "loss": 0.515, + "step": 77000 + }, + { + "epoch": 3.26, + "learning_rate": 0.0004755920201827472, + "loss": 0.4733, + "step": 77010 + }, + { + "epoch": 3.26, + "learning_rate": 0.0004755496194534546, + "loss": 0.5727, + "step": 77020 + }, + { + "epoch": 3.26, + "learning_rate": 0.00047550721872416206, + "loss": 0.471, + "step": 77030 + }, + { + "epoch": 3.26, + "learning_rate": 0.0004754648179948696, + "loss": 0.5049, + "step": 77040 + }, + { + "epoch": 3.26, + "learning_rate": 0.000475422417265577, + "loss": 0.5597, + "step": 77050 + }, + { + "epoch": 3.26, + "learning_rate": 0.00047538001653628446, + "loss": 0.579, + "step": 77060 + }, + { + "epoch": 3.26, + "learning_rate": 0.0004753376158069919, + "loss": 0.4981, + "step": 77070 + }, + { + "epoch": 3.26, + "learning_rate": 0.00047529521507769934, + "loss": 0.458, + "step": 77080 + }, + { + "epoch": 3.26, + "learning_rate": 0.0004752528143484068, + "loss": 0.4263, + "step": 77090 + }, + { + "epoch": 3.26, + "learning_rate": 0.0004752104136191143, + "loss": 0.519, + "step": 77100 + }, + { + "epoch": 3.26, + "learning_rate": 0.00047516801288982174, + "loss": 0.4756, + "step": 77110 + }, + { + "epoch": 3.26, + "learning_rate": 0.0004751256121605292, + "loss": 0.6127, + "step": 77120 + }, + { + "epoch": 3.26, + "learning_rate": 0.0004750832114312366, + "loss": 0.5791, + "step": 77130 + }, + { + "epoch": 3.26, + "learning_rate": 0.0004750408107019441, + "loss": 0.4921, + "step": 77140 + }, + { + "epoch": 3.26, + "learning_rate": 0.00047499840997265155, + "loss": 0.5414, + "step": 77150 + }, + { + "epoch": 3.26, + "learning_rate": 0.00047495600924335896, + "loss": 0.4507, + "step": 77160 + }, + { + "epoch": 3.26, + "learning_rate": 0.00047491360851406643, + "loss": 0.4666, + "step": 77170 + }, + { + "epoch": 3.26, + "learning_rate": 0.00047487120778477395, + "loss": 0.4686, + "step": 77180 + }, + { + "epoch": 3.26, + "learning_rate": 0.00047482880705548137, + "loss": 0.589, + "step": 77190 + }, + { + "epoch": 3.26, + "learning_rate": 0.00047478640632618883, + "loss": 0.4556, + "step": 77200 + }, + { + "epoch": 3.27, + "learning_rate": 0.0004747440055968963, + "loss": 0.4177, + "step": 77210 + }, + { + "epoch": 3.27, + "learning_rate": 0.0004747016048676037, + "loss": 0.526, + "step": 77220 + }, + { + "epoch": 3.27, + "learning_rate": 0.0004746592041383112, + "loss": 0.5222, + "step": 77230 + }, + { + "epoch": 3.27, + "learning_rate": 0.0004746168034090187, + "loss": 0.5068, + "step": 77240 + }, + { + "epoch": 3.27, + "learning_rate": 0.00047457440267972606, + "loss": 0.4607, + "step": 77250 + }, + { + "epoch": 3.27, + "learning_rate": 0.0004745320019504336, + "loss": 0.5532, + "step": 77260 + }, + { + "epoch": 3.27, + "learning_rate": 0.00047448960122114105, + "loss": 0.5266, + "step": 77270 + }, + { + "epoch": 3.27, + "learning_rate": 0.00047444720049184846, + "loss": 0.57, + "step": 77280 + }, + { + "epoch": 3.27, + "learning_rate": 0.0004744047997625559, + "loss": 0.4789, + "step": 77290 + }, + { + "epoch": 3.27, + "learning_rate": 0.00047436239903326345, + "loss": 0.445, + "step": 77300 + }, + { + "epoch": 3.27, + "learning_rate": 0.0004743199983039708, + "loss": 0.6136, + "step": 77310 + }, + { + "epoch": 3.27, + "learning_rate": 0.0004742775975746783, + "loss": 0.46, + "step": 77320 + }, + { + "epoch": 3.27, + "learning_rate": 0.0004742351968453858, + "loss": 0.5109, + "step": 77330 + }, + { + "epoch": 3.27, + "learning_rate": 0.0004741927961160932, + "loss": 0.5145, + "step": 77340 + }, + { + "epoch": 3.27, + "learning_rate": 0.00047415039538680067, + "loss": 0.5738, + "step": 77350 + }, + { + "epoch": 3.27, + "learning_rate": 0.0004741079946575082, + "loss": 0.487, + "step": 77360 + }, + { + "epoch": 3.27, + "learning_rate": 0.00047406559392821555, + "loss": 0.587, + "step": 77370 + }, + { + "epoch": 3.27, + "learning_rate": 0.00047402319319892307, + "loss": 0.4403, + "step": 77380 + }, + { + "epoch": 3.27, + "learning_rate": 0.00047398079246963054, + "loss": 0.5683, + "step": 77390 + }, + { + "epoch": 3.27, + "learning_rate": 0.00047393839174033795, + "loss": 0.441, + "step": 77400 + }, + { + "epoch": 3.27, + "learning_rate": 0.0004738959910110454, + "loss": 0.5224, + "step": 77410 + }, + { + "epoch": 3.27, + "learning_rate": 0.00047385359028175294, + "loss": 0.5017, + "step": 77420 + }, + { + "epoch": 3.27, + "learning_rate": 0.0004738111895524603, + "loss": 0.487, + "step": 77430 + }, + { + "epoch": 3.27, + "learning_rate": 0.0004737687888231678, + "loss": 0.5515, + "step": 77440 + }, + { + "epoch": 3.28, + "learning_rate": 0.0004737263880938752, + "loss": 0.4546, + "step": 77450 + }, + { + "epoch": 3.28, + "learning_rate": 0.0004736839873645827, + "loss": 0.5731, + "step": 77460 + }, + { + "epoch": 3.28, + "learning_rate": 0.00047364158663529016, + "loss": 0.5447, + "step": 77470 + }, + { + "epoch": 3.28, + "learning_rate": 0.0004735991859059976, + "loss": 0.4933, + "step": 77480 + }, + { + "epoch": 3.28, + "learning_rate": 0.00047355678517670504, + "loss": 0.5239, + "step": 77490 + }, + { + "epoch": 3.28, + "learning_rate": 0.00047351438444741256, + "loss": 0.4848, + "step": 77500 + }, + { + "epoch": 3.28, + "learning_rate": 0.0004734719837181199, + "loss": 0.5548, + "step": 77510 + }, + { + "epoch": 3.28, + "learning_rate": 0.00047342958298882744, + "loss": 0.5022, + "step": 77520 + }, + { + "epoch": 3.28, + "learning_rate": 0.0004733871822595349, + "loss": 0.513, + "step": 77530 + }, + { + "epoch": 3.28, + "learning_rate": 0.0004733447815302423, + "loss": 0.4325, + "step": 77540 + }, + { + "epoch": 3.28, + "learning_rate": 0.0004733023808009498, + "loss": 0.5866, + "step": 77550 + }, + { + "epoch": 3.28, + "learning_rate": 0.00047325998007165726, + "loss": 0.4742, + "step": 77560 + }, + { + "epoch": 3.28, + "learning_rate": 0.00047321757934236467, + "loss": 0.5336, + "step": 77570 + }, + { + "epoch": 3.28, + "learning_rate": 0.0004731751786130722, + "loss": 0.4936, + "step": 77580 + }, + { + "epoch": 3.28, + "learning_rate": 0.00047313277788377966, + "loss": 0.6111, + "step": 77590 + }, + { + "epoch": 3.28, + "learning_rate": 0.00047309037715448707, + "loss": 0.5541, + "step": 77600 + }, + { + "epoch": 3.28, + "learning_rate": 0.00047304797642519454, + "loss": 0.5036, + "step": 77610 + }, + { + "epoch": 3.28, + "learning_rate": 0.000473005575695902, + "loss": 0.5936, + "step": 77620 + }, + { + "epoch": 3.28, + "learning_rate": 0.0004729631749666094, + "loss": 0.4725, + "step": 77630 + }, + { + "epoch": 3.28, + "learning_rate": 0.00047292077423731694, + "loss": 0.4948, + "step": 77640 + }, + { + "epoch": 3.28, + "learning_rate": 0.0004728783735080244, + "loss": 0.5842, + "step": 77650 + }, + { + "epoch": 3.28, + "learning_rate": 0.0004728359727787318, + "loss": 0.57, + "step": 77660 + }, + { + "epoch": 3.28, + "learning_rate": 0.0004727935720494393, + "loss": 0.4679, + "step": 77670 + }, + { + "epoch": 3.28, + "learning_rate": 0.00047275117132014675, + "loss": 0.6122, + "step": 77680 + }, + { + "epoch": 3.29, + "learning_rate": 0.00047270877059085416, + "loss": 0.5156, + "step": 77690 + }, + { + "epoch": 3.29, + "learning_rate": 0.00047266636986156163, + "loss": 0.5896, + "step": 77700 + }, + { + "epoch": 3.29, + "learning_rate": 0.00047262396913226915, + "loss": 0.5119, + "step": 77710 + }, + { + "epoch": 3.29, + "learning_rate": 0.00047258156840297656, + "loss": 0.5107, + "step": 77720 + }, + { + "epoch": 3.29, + "learning_rate": 0.00047253916767368403, + "loss": 0.4566, + "step": 77730 + }, + { + "epoch": 3.29, + "learning_rate": 0.00047249676694439144, + "loss": 0.618, + "step": 77740 + }, + { + "epoch": 3.29, + "learning_rate": 0.0004724543662150989, + "loss": 0.5318, + "step": 77750 + }, + { + "epoch": 3.29, + "learning_rate": 0.0004724119654858064, + "loss": 0.4899, + "step": 77760 + }, + { + "epoch": 3.29, + "learning_rate": 0.0004723695647565138, + "loss": 0.5115, + "step": 77770 + }, + { + "epoch": 3.29, + "learning_rate": 0.00047232716402722126, + "loss": 0.5572, + "step": 77780 + }, + { + "epoch": 3.29, + "learning_rate": 0.0004722847632979288, + "loss": 0.556, + "step": 77790 + }, + { + "epoch": 3.29, + "learning_rate": 0.0004722423625686362, + "loss": 0.5518, + "step": 77800 + }, + { + "epoch": 3.29, + "learning_rate": 0.00047219996183934366, + "loss": 0.4718, + "step": 77810 + }, + { + "epoch": 3.29, + "learning_rate": 0.0004721575611100511, + "loss": 0.5253, + "step": 77820 + }, + { + "epoch": 3.29, + "learning_rate": 0.00047211516038075853, + "loss": 0.5141, + "step": 77830 + }, + { + "epoch": 3.29, + "learning_rate": 0.000472072759651466, + "loss": 0.5777, + "step": 77840 + }, + { + "epoch": 3.29, + "learning_rate": 0.0004720303589221735, + "loss": 0.5581, + "step": 77850 + }, + { + "epoch": 3.29, + "learning_rate": 0.0004719879581928809, + "loss": 0.5388, + "step": 77860 + }, + { + "epoch": 3.29, + "learning_rate": 0.0004719455574635884, + "loss": 0.5617, + "step": 77870 + }, + { + "epoch": 3.29, + "learning_rate": 0.00047190315673429587, + "loss": 0.5255, + "step": 77880 + }, + { + "epoch": 3.29, + "learning_rate": 0.0004718607560050033, + "loss": 0.5058, + "step": 77890 + }, + { + "epoch": 3.29, + "learning_rate": 0.00047181835527571075, + "loss": 0.4948, + "step": 77900 + }, + { + "epoch": 3.29, + "learning_rate": 0.00047177595454641827, + "loss": 0.494, + "step": 77910 + }, + { + "epoch": 3.3, + "learning_rate": 0.00047173355381712563, + "loss": 0.5289, + "step": 77920 + }, + { + "epoch": 3.3, + "learning_rate": 0.00047169115308783315, + "loss": 0.4526, + "step": 77930 + }, + { + "epoch": 3.3, + "learning_rate": 0.0004716487523585406, + "loss": 0.512, + "step": 77940 + }, + { + "epoch": 3.3, + "learning_rate": 0.00047160635162924803, + "loss": 0.4611, + "step": 77950 + }, + { + "epoch": 3.3, + "learning_rate": 0.0004715639508999555, + "loss": 0.5745, + "step": 77960 + }, + { + "epoch": 3.3, + "learning_rate": 0.000471521550170663, + "loss": 0.6043, + "step": 77970 + }, + { + "epoch": 3.3, + "learning_rate": 0.0004714791494413704, + "loss": 0.45, + "step": 77980 + }, + { + "epoch": 3.3, + "learning_rate": 0.0004714367487120779, + "loss": 0.6834, + "step": 77990 + }, + { + "epoch": 3.3, + "learning_rate": 0.00047139434798278536, + "loss": 0.4226, + "step": 78000 + }, + { + "epoch": 3.3, + "learning_rate": 0.0004713519472534928, + "loss": 0.5423, + "step": 78010 + }, + { + "epoch": 3.3, + "learning_rate": 0.00047130954652420024, + "loss": 0.5776, + "step": 78020 + }, + { + "epoch": 3.3, + "learning_rate": 0.00047126714579490776, + "loss": 0.4903, + "step": 78030 + }, + { + "epoch": 3.3, + "learning_rate": 0.0004712247450656151, + "loss": 0.5277, + "step": 78040 + }, + { + "epoch": 3.3, + "learning_rate": 0.00047118234433632264, + "loss": 0.4924, + "step": 78050 + }, + { + "epoch": 3.3, + "learning_rate": 0.00047113994360703, + "loss": 0.5796, + "step": 78060 + }, + { + "epoch": 3.3, + "learning_rate": 0.0004710975428777375, + "loss": 0.5438, + "step": 78070 + }, + { + "epoch": 3.3, + "learning_rate": 0.000471055142148445, + "loss": 0.6223, + "step": 78080 + }, + { + "epoch": 3.3, + "learning_rate": 0.0004710127414191524, + "loss": 0.5058, + "step": 78090 + }, + { + "epoch": 3.3, + "learning_rate": 0.00047097034068985987, + "loss": 0.4797, + "step": 78100 + }, + { + "epoch": 3.3, + "learning_rate": 0.0004709279399605674, + "loss": 0.52, + "step": 78110 + }, + { + "epoch": 3.3, + "learning_rate": 0.00047088553923127475, + "loss": 0.5706, + "step": 78120 + }, + { + "epoch": 3.3, + "learning_rate": 0.00047084313850198227, + "loss": 0.4606, + "step": 78130 + }, + { + "epoch": 3.3, + "learning_rate": 0.00047080073777268973, + "loss": 0.4405, + "step": 78140 + }, + { + "epoch": 3.3, + "learning_rate": 0.00047075833704339715, + "loss": 0.4801, + "step": 78150 + }, + { + "epoch": 3.31, + "learning_rate": 0.0004707159363141046, + "loss": 0.6313, + "step": 78160 + }, + { + "epoch": 3.31, + "learning_rate": 0.00047067353558481213, + "loss": 0.4597, + "step": 78170 + }, + { + "epoch": 3.31, + "learning_rate": 0.0004706311348555195, + "loss": 0.5006, + "step": 78180 + }, + { + "epoch": 3.31, + "learning_rate": 0.000470588734126227, + "loss": 0.4894, + "step": 78190 + }, + { + "epoch": 3.31, + "learning_rate": 0.0004705463333969345, + "loss": 0.5283, + "step": 78200 + }, + { + "epoch": 3.31, + "learning_rate": 0.0004705039326676419, + "loss": 0.5121, + "step": 78210 + }, + { + "epoch": 3.31, + "learning_rate": 0.00047046153193834936, + "loss": 0.6172, + "step": 78220 + }, + { + "epoch": 3.31, + "learning_rate": 0.0004704191312090568, + "loss": 0.5118, + "step": 78230 + }, + { + "epoch": 3.31, + "learning_rate": 0.00047037673047976424, + "loss": 0.493, + "step": 78240 + }, + { + "epoch": 3.31, + "learning_rate": 0.00047033432975047176, + "loss": 0.5701, + "step": 78250 + }, + { + "epoch": 3.31, + "learning_rate": 0.00047029192902117923, + "loss": 0.5941, + "step": 78260 + }, + { + "epoch": 3.31, + "learning_rate": 0.00047024952829188664, + "loss": 0.5032, + "step": 78270 + }, + { + "epoch": 3.31, + "learning_rate": 0.0004702071275625941, + "loss": 0.4835, + "step": 78280 + }, + { + "epoch": 3.31, + "learning_rate": 0.0004701647268333016, + "loss": 0.5355, + "step": 78290 + }, + { + "epoch": 3.31, + "learning_rate": 0.000470122326104009, + "loss": 0.4992, + "step": 78300 + }, + { + "epoch": 3.31, + "learning_rate": 0.00047007992537471645, + "loss": 0.427, + "step": 78310 + }, + { + "epoch": 3.31, + "learning_rate": 0.000470037524645424, + "loss": 0.5313, + "step": 78320 + }, + { + "epoch": 3.31, + "learning_rate": 0.0004699951239161314, + "loss": 0.5094, + "step": 78330 + }, + { + "epoch": 3.31, + "learning_rate": 0.00046995272318683885, + "loss": 0.5913, + "step": 78340 + }, + { + "epoch": 3.31, + "learning_rate": 0.00046991032245754627, + "loss": 0.5217, + "step": 78350 + }, + { + "epoch": 3.31, + "learning_rate": 0.00046986792172825373, + "loss": 0.5733, + "step": 78360 + }, + { + "epoch": 3.31, + "learning_rate": 0.0004698255209989612, + "loss": 0.4892, + "step": 78370 + }, + { + "epoch": 3.31, + "learning_rate": 0.0004697831202696686, + "loss": 0.5264, + "step": 78380 + }, + { + "epoch": 3.32, + "learning_rate": 0.00046974071954037613, + "loss": 0.5522, + "step": 78390 + }, + { + "epoch": 3.32, + "learning_rate": 0.0004696983188110836, + "loss": 0.5134, + "step": 78400 + }, + { + "epoch": 3.32, + "learning_rate": 0.000469655918081791, + "loss": 0.5031, + "step": 78410 + }, + { + "epoch": 3.32, + "learning_rate": 0.0004696135173524985, + "loss": 0.4955, + "step": 78420 + }, + { + "epoch": 3.32, + "learning_rate": 0.00046957111662320595, + "loss": 0.6239, + "step": 78430 + }, + { + "epoch": 3.32, + "learning_rate": 0.00046952871589391336, + "loss": 0.4901, + "step": 78440 + }, + { + "epoch": 3.32, + "learning_rate": 0.0004694863151646208, + "loss": 0.5437, + "step": 78450 + }, + { + "epoch": 3.32, + "learning_rate": 0.00046944391443532835, + "loss": 0.5054, + "step": 78460 + }, + { + "epoch": 3.32, + "learning_rate": 0.00046940151370603576, + "loss": 0.5168, + "step": 78470 + }, + { + "epoch": 3.32, + "learning_rate": 0.0004693591129767432, + "loss": 0.5396, + "step": 78480 + }, + { + "epoch": 3.32, + "learning_rate": 0.0004693167122474507, + "loss": 0.5613, + "step": 78490 + }, + { + "epoch": 3.32, + "learning_rate": 0.0004692743115181581, + "loss": 0.5335, + "step": 78500 + }, + { + "epoch": 3.32, + "learning_rate": 0.00046923191078886557, + "loss": 0.4796, + "step": 78510 + }, + { + "epoch": 3.32, + "learning_rate": 0.0004691895100595731, + "loss": 0.503, + "step": 78520 + }, + { + "epoch": 3.32, + "learning_rate": 0.00046914710933028045, + "loss": 0.4648, + "step": 78530 + }, + { + "epoch": 3.32, + "learning_rate": 0.00046910470860098797, + "loss": 0.5999, + "step": 78540 + }, + { + "epoch": 3.32, + "learning_rate": 0.00046906230787169544, + "loss": 0.5072, + "step": 78550 + }, + { + "epoch": 3.32, + "learning_rate": 0.00046901990714240285, + "loss": 0.5321, + "step": 78560 + }, + { + "epoch": 3.32, + "learning_rate": 0.0004689775064131103, + "loss": 0.5419, + "step": 78570 + }, + { + "epoch": 3.32, + "learning_rate": 0.00046893510568381784, + "loss": 0.4863, + "step": 78580 + }, + { + "epoch": 3.32, + "learning_rate": 0.0004688927049545252, + "loss": 0.6213, + "step": 78590 + }, + { + "epoch": 3.32, + "learning_rate": 0.0004688503042252327, + "loss": 0.5466, + "step": 78600 + }, + { + "epoch": 3.32, + "learning_rate": 0.0004688079034959402, + "loss": 0.5072, + "step": 78610 + }, + { + "epoch": 3.32, + "learning_rate": 0.0004687655027666476, + "loss": 0.5147, + "step": 78620 + }, + { + "epoch": 3.33, + "learning_rate": 0.00046872310203735506, + "loss": 0.5214, + "step": 78630 + }, + { + "epoch": 3.33, + "learning_rate": 0.0004686807013080626, + "loss": 0.5302, + "step": 78640 + }, + { + "epoch": 3.33, + "learning_rate": 0.00046863830057876994, + "loss": 0.5548, + "step": 78650 + }, + { + "epoch": 3.33, + "learning_rate": 0.00046859589984947747, + "loss": 0.5373, + "step": 78660 + }, + { + "epoch": 3.33, + "learning_rate": 0.0004685534991201848, + "loss": 0.512, + "step": 78670 + }, + { + "epoch": 3.33, + "learning_rate": 0.00046851109839089234, + "loss": 0.6199, + "step": 78680 + }, + { + "epoch": 3.33, + "learning_rate": 0.0004684686976615998, + "loss": 0.5322, + "step": 78690 + }, + { + "epoch": 3.33, + "learning_rate": 0.0004684262969323072, + "loss": 0.4938, + "step": 78700 + }, + { + "epoch": 3.33, + "learning_rate": 0.0004683838962030147, + "loss": 0.5166, + "step": 78710 + }, + { + "epoch": 3.33, + "learning_rate": 0.0004683414954737222, + "loss": 0.5319, + "step": 78720 + }, + { + "epoch": 3.33, + "learning_rate": 0.00046829909474442957, + "loss": 0.6023, + "step": 78730 + }, + { + "epoch": 3.33, + "learning_rate": 0.0004682566940151371, + "loss": 0.5089, + "step": 78740 + }, + { + "epoch": 3.33, + "learning_rate": 0.00046821429328584456, + "loss": 0.5156, + "step": 78750 + }, + { + "epoch": 3.33, + "learning_rate": 0.00046817189255655197, + "loss": 0.519, + "step": 78760 + }, + { + "epoch": 3.33, + "learning_rate": 0.00046812949182725944, + "loss": 0.4357, + "step": 78770 + }, + { + "epoch": 3.33, + "learning_rate": 0.00046808709109796696, + "loss": 0.5004, + "step": 78780 + }, + { + "epoch": 3.33, + "learning_rate": 0.0004680446903686743, + "loss": 0.5748, + "step": 78790 + }, + { + "epoch": 3.33, + "learning_rate": 0.00046800228963938184, + "loss": 0.575, + "step": 78800 + }, + { + "epoch": 3.33, + "learning_rate": 0.0004679598889100893, + "loss": 0.5357, + "step": 78810 + }, + { + "epoch": 3.33, + "learning_rate": 0.0004679174881807967, + "loss": 0.5592, + "step": 78820 + }, + { + "epoch": 3.33, + "learning_rate": 0.0004678750874515042, + "loss": 0.5436, + "step": 78830 + }, + { + "epoch": 3.33, + "learning_rate": 0.00046783268672221165, + "loss": 0.5701, + "step": 78840 + }, + { + "epoch": 3.33, + "learning_rate": 0.00046779028599291906, + "loss": 0.4055, + "step": 78850 + }, + { + "epoch": 3.33, + "learning_rate": 0.0004677478852636266, + "loss": 0.5461, + "step": 78860 + }, + { + "epoch": 3.34, + "learning_rate": 0.00046770548453433405, + "loss": 0.5097, + "step": 78870 + }, + { + "epoch": 3.34, + "learning_rate": 0.00046766308380504146, + "loss": 0.5031, + "step": 78880 + }, + { + "epoch": 3.34, + "learning_rate": 0.00046762068307574893, + "loss": 0.4804, + "step": 78890 + }, + { + "epoch": 3.34, + "learning_rate": 0.0004675782823464564, + "loss": 0.4972, + "step": 78900 + }, + { + "epoch": 3.34, + "learning_rate": 0.0004675358816171638, + "loss": 0.5617, + "step": 78910 + }, + { + "epoch": 3.34, + "learning_rate": 0.00046749348088787133, + "loss": 0.5363, + "step": 78920 + }, + { + "epoch": 3.34, + "learning_rate": 0.0004674510801585788, + "loss": 0.5407, + "step": 78930 + }, + { + "epoch": 3.34, + "learning_rate": 0.0004674086794292862, + "loss": 0.5001, + "step": 78940 + }, + { + "epoch": 3.34, + "learning_rate": 0.0004673662786999937, + "loss": 0.4758, + "step": 78950 + }, + { + "epoch": 3.34, + "learning_rate": 0.00046732387797070114, + "loss": 0.5059, + "step": 78960 + }, + { + "epoch": 3.34, + "learning_rate": 0.00046728147724140856, + "loss": 0.524, + "step": 78970 + }, + { + "epoch": 3.34, + "learning_rate": 0.000467239076512116, + "loss": 0.5423, + "step": 78980 + }, + { + "epoch": 3.34, + "learning_rate": 0.00046719667578282344, + "loss": 0.6003, + "step": 78990 + }, + { + "epoch": 3.34, + "learning_rate": 0.00046715427505353096, + "loss": 0.5398, + "step": 79000 + }, + { + "epoch": 3.34, + "learning_rate": 0.0004671118743242384, + "loss": 0.5538, + "step": 79010 + }, + { + "epoch": 3.34, + "learning_rate": 0.00046706947359494584, + "loss": 0.5319, + "step": 79020 + }, + { + "epoch": 3.34, + "learning_rate": 0.0004670270728656533, + "loss": 0.4928, + "step": 79030 + }, + { + "epoch": 3.34, + "learning_rate": 0.00046698467213636077, + "loss": 0.499, + "step": 79040 + }, + { + "epoch": 3.34, + "learning_rate": 0.0004669422714070682, + "loss": 0.4768, + "step": 79050 + }, + { + "epoch": 3.34, + "learning_rate": 0.00046689987067777565, + "loss": 0.4737, + "step": 79060 + }, + { + "epoch": 3.34, + "learning_rate": 0.00046685746994848317, + "loss": 0.4453, + "step": 79070 + }, + { + "epoch": 3.34, + "learning_rate": 0.0004668150692191906, + "loss": 0.4771, + "step": 79080 + }, + { + "epoch": 3.34, + "learning_rate": 0.00046677266848989805, + "loss": 0.5497, + "step": 79090 + }, + { + "epoch": 3.35, + "learning_rate": 0.0004667302677606055, + "loss": 0.4014, + "step": 79100 + }, + { + "epoch": 3.35, + "learning_rate": 0.00046668786703131293, + "loss": 0.5063, + "step": 79110 + }, + { + "epoch": 3.35, + "learning_rate": 0.0004666454663020204, + "loss": 0.4955, + "step": 79120 + }, + { + "epoch": 3.35, + "learning_rate": 0.0004666030655727279, + "loss": 0.5, + "step": 79130 + }, + { + "epoch": 3.35, + "learning_rate": 0.00046656066484343533, + "loss": 0.5109, + "step": 79140 + }, + { + "epoch": 3.35, + "learning_rate": 0.0004665182641141428, + "loss": 0.5023, + "step": 79150 + }, + { + "epoch": 3.35, + "learning_rate": 0.00046647586338485026, + "loss": 0.5738, + "step": 79160 + }, + { + "epoch": 3.35, + "learning_rate": 0.0004664334626555577, + "loss": 0.5026, + "step": 79170 + }, + { + "epoch": 3.35, + "learning_rate": 0.00046639106192626514, + "loss": 0.5959, + "step": 79180 + }, + { + "epoch": 3.35, + "learning_rate": 0.00046634866119697266, + "loss": 0.5153, + "step": 79190 + }, + { + "epoch": 3.35, + "learning_rate": 0.00046630626046768, + "loss": 0.5151, + "step": 79200 + }, + { + "epoch": 3.35, + "learning_rate": 0.00046626385973838754, + "loss": 0.4512, + "step": 79210 + }, + { + "epoch": 3.35, + "learning_rate": 0.000466221459009095, + "loss": 0.6424, + "step": 79220 + }, + { + "epoch": 3.35, + "learning_rate": 0.0004661790582798024, + "loss": 0.4702, + "step": 79230 + }, + { + "epoch": 3.35, + "learning_rate": 0.0004661366575505099, + "loss": 0.5134, + "step": 79240 + }, + { + "epoch": 3.35, + "learning_rate": 0.0004660942568212174, + "loss": 0.451, + "step": 79250 + }, + { + "epoch": 3.35, + "learning_rate": 0.00046605185609192477, + "loss": 0.5722, + "step": 79260 + }, + { + "epoch": 3.35, + "learning_rate": 0.0004660094553626323, + "loss": 0.5236, + "step": 79270 + }, + { + "epoch": 3.35, + "learning_rate": 0.00046596705463333965, + "loss": 0.4958, + "step": 79280 + }, + { + "epoch": 3.35, + "learning_rate": 0.00046592465390404717, + "loss": 0.6337, + "step": 79290 + }, + { + "epoch": 3.35, + "learning_rate": 0.00046588225317475463, + "loss": 0.5613, + "step": 79300 + }, + { + "epoch": 3.35, + "learning_rate": 0.00046583985244546205, + "loss": 0.4438, + "step": 79310 + }, + { + "epoch": 3.35, + "learning_rate": 0.0004657974517161695, + "loss": 0.5383, + "step": 79320 + }, + { + "epoch": 3.35, + "learning_rate": 0.00046575505098687704, + "loss": 0.5035, + "step": 79330 + }, + { + "epoch": 3.36, + "learning_rate": 0.0004657126502575844, + "loss": 0.6884, + "step": 79340 + }, + { + "epoch": 3.36, + "learning_rate": 0.0004656702495282919, + "loss": 0.5175, + "step": 79350 + }, + { + "epoch": 3.36, + "learning_rate": 0.0004656278487989994, + "loss": 0.5082, + "step": 79360 + }, + { + "epoch": 3.36, + "learning_rate": 0.0004655854480697068, + "loss": 0.5844, + "step": 79370 + }, + { + "epoch": 3.36, + "learning_rate": 0.00046554304734041426, + "loss": 0.4739, + "step": 79380 + }, + { + "epoch": 3.36, + "learning_rate": 0.0004655006466111218, + "loss": 0.4622, + "step": 79390 + }, + { + "epoch": 3.36, + "learning_rate": 0.00046545824588182914, + "loss": 0.6094, + "step": 79400 + }, + { + "epoch": 3.36, + "learning_rate": 0.00046541584515253666, + "loss": 0.4682, + "step": 79410 + }, + { + "epoch": 3.36, + "learning_rate": 0.00046537344442324413, + "loss": 0.5531, + "step": 79420 + }, + { + "epoch": 3.36, + "learning_rate": 0.00046533104369395154, + "loss": 0.4334, + "step": 79430 + }, + { + "epoch": 3.36, + "learning_rate": 0.000465288642964659, + "loss": 0.5724, + "step": 79440 + }, + { + "epoch": 3.36, + "learning_rate": 0.00046524624223536653, + "loss": 0.527, + "step": 79450 + }, + { + "epoch": 3.36, + "learning_rate": 0.0004652038415060739, + "loss": 0.5527, + "step": 79460 + }, + { + "epoch": 3.36, + "learning_rate": 0.0004651614407767814, + "loss": 0.5686, + "step": 79470 + }, + { + "epoch": 3.36, + "learning_rate": 0.0004651190400474889, + "loss": 0.5562, + "step": 79480 + }, + { + "epoch": 3.36, + "learning_rate": 0.0004650766393181963, + "loss": 0.523, + "step": 79490 + }, + { + "epoch": 3.36, + "learning_rate": 0.00046503423858890375, + "loss": 0.5057, + "step": 79500 + }, + { + "epoch": 3.36, + "learning_rate": 0.0004649918378596112, + "loss": 0.4205, + "step": 79510 + }, + { + "epoch": 3.36, + "learning_rate": 0.00046494943713031863, + "loss": 0.5258, + "step": 79520 + }, + { + "epoch": 3.36, + "learning_rate": 0.00046490703640102615, + "loss": 0.4094, + "step": 79530 + }, + { + "epoch": 3.36, + "learning_rate": 0.0004648646356717336, + "loss": 0.5119, + "step": 79540 + }, + { + "epoch": 3.36, + "learning_rate": 0.00046482223494244103, + "loss": 0.5036, + "step": 79550 + }, + { + "epoch": 3.36, + "learning_rate": 0.0004647798342131485, + "loss": 0.5161, + "step": 79560 + }, + { + "epoch": 3.36, + "learning_rate": 0.00046473743348385597, + "loss": 0.5624, + "step": 79570 + }, + { + "epoch": 3.37, + "learning_rate": 0.0004646950327545634, + "loss": 0.5417, + "step": 79580 + }, + { + "epoch": 3.37, + "learning_rate": 0.00046465263202527085, + "loss": 0.4978, + "step": 79590 + }, + { + "epoch": 3.37, + "learning_rate": 0.00046461023129597826, + "loss": 0.6289, + "step": 79600 + }, + { + "epoch": 3.37, + "learning_rate": 0.0004645678305666858, + "loss": 0.5259, + "step": 79610 + }, + { + "epoch": 3.37, + "learning_rate": 0.00046452542983739325, + "loss": 0.4834, + "step": 79620 + }, + { + "epoch": 3.37, + "learning_rate": 0.00046448302910810066, + "loss": 0.4916, + "step": 79630 + }, + { + "epoch": 3.37, + "learning_rate": 0.0004644406283788081, + "loss": 0.5284, + "step": 79640 + }, + { + "epoch": 3.37, + "learning_rate": 0.0004643982276495156, + "loss": 0.5404, + "step": 79650 + }, + { + "epoch": 3.37, + "learning_rate": 0.000464355826920223, + "loss": 0.5326, + "step": 79660 + }, + { + "epoch": 3.37, + "learning_rate": 0.0004643134261909305, + "loss": 0.5076, + "step": 79670 + }, + { + "epoch": 3.37, + "learning_rate": 0.000464271025461638, + "loss": 0.4532, + "step": 79680 + }, + { + "epoch": 3.37, + "learning_rate": 0.0004642286247323454, + "loss": 0.5021, + "step": 79690 + }, + { + "epoch": 3.37, + "learning_rate": 0.00046418622400305287, + "loss": 0.5198, + "step": 79700 + }, + { + "epoch": 3.37, + "learning_rate": 0.00046414382327376034, + "loss": 0.6371, + "step": 79710 + }, + { + "epoch": 3.37, + "learning_rate": 0.00046410142254446775, + "loss": 0.546, + "step": 79720 + }, + { + "epoch": 3.37, + "learning_rate": 0.0004640590218151752, + "loss": 0.5171, + "step": 79730 + }, + { + "epoch": 3.37, + "learning_rate": 0.00046401662108588274, + "loss": 0.5401, + "step": 79740 + }, + { + "epoch": 3.37, + "learning_rate": 0.00046397422035659015, + "loss": 0.4441, + "step": 79750 + }, + { + "epoch": 3.37, + "learning_rate": 0.0004639318196272976, + "loss": 0.5267, + "step": 79760 + }, + { + "epoch": 3.37, + "learning_rate": 0.0004638894188980051, + "loss": 0.5362, + "step": 79770 + }, + { + "epoch": 3.37, + "learning_rate": 0.0004638470181687125, + "loss": 0.6054, + "step": 79780 + }, + { + "epoch": 3.37, + "learning_rate": 0.00046380461743941997, + "loss": 0.5124, + "step": 79790 + }, + { + "epoch": 3.37, + "learning_rate": 0.0004637622167101275, + "loss": 0.4606, + "step": 79800 + }, + { + "epoch": 3.38, + "learning_rate": 0.00046371981598083484, + "loss": 0.4983, + "step": 79810 + }, + { + "epoch": 3.38, + "learning_rate": 0.00046367741525154237, + "loss": 0.4681, + "step": 79820 + }, + { + "epoch": 3.38, + "learning_rate": 0.00046363501452224983, + "loss": 0.4496, + "step": 79830 + }, + { + "epoch": 3.38, + "learning_rate": 0.00046359261379295725, + "loss": 0.4359, + "step": 79840 + }, + { + "epoch": 3.38, + "learning_rate": 0.0004635502130636647, + "loss": 0.5247, + "step": 79850 + }, + { + "epoch": 3.38, + "learning_rate": 0.00046350781233437223, + "loss": 0.3983, + "step": 79860 + }, + { + "epoch": 3.38, + "learning_rate": 0.0004634654116050796, + "loss": 0.5179, + "step": 79870 + }, + { + "epoch": 3.38, + "learning_rate": 0.0004634230108757871, + "loss": 0.507, + "step": 79880 + }, + { + "epoch": 3.38, + "learning_rate": 0.0004633806101464945, + "loss": 0.4863, + "step": 79890 + }, + { + "epoch": 3.38, + "learning_rate": 0.000463338209417202, + "loss": 0.4881, + "step": 79900 + }, + { + "epoch": 3.38, + "learning_rate": 0.00046329580868790946, + "loss": 0.566, + "step": 79910 + }, + { + "epoch": 3.38, + "learning_rate": 0.00046325340795861687, + "loss": 0.5129, + "step": 79920 + }, + { + "epoch": 3.38, + "learning_rate": 0.00046321100722932434, + "loss": 0.5945, + "step": 79930 + }, + { + "epoch": 3.38, + "learning_rate": 0.00046316860650003186, + "loss": 0.4884, + "step": 79940 + }, + { + "epoch": 3.38, + "learning_rate": 0.0004631262057707392, + "loss": 0.528, + "step": 79950 + }, + { + "epoch": 3.38, + "learning_rate": 0.00046308380504144674, + "loss": 0.477, + "step": 79960 + }, + { + "epoch": 3.38, + "learning_rate": 0.0004630414043121542, + "loss": 0.4978, + "step": 79970 + }, + { + "epoch": 3.38, + "learning_rate": 0.0004629990035828616, + "loss": 0.475, + "step": 79980 + }, + { + "epoch": 3.38, + "learning_rate": 0.0004629566028535691, + "loss": 0.5057, + "step": 79990 + }, + { + "epoch": 3.38, + "learning_rate": 0.0004629142021242766, + "loss": 0.4942, + "step": 80000 + }, + { + "epoch": 3.38, + "eval_loss": 0.6122093796730042, + "eval_runtime": 338.4, + "eval_samples_per_second": 15.529, + "eval_steps_per_second": 3.883, + "step": 80000 + }, + { + "epoch": 3.38, + "learning_rate": 0.00046287180139498396, + "loss": 0.4082, + "step": 80010 + }, + { + "epoch": 3.38, + "learning_rate": 0.0004628294006656915, + "loss": 0.5666, + "step": 80020 + }, + { + "epoch": 3.38, + "learning_rate": 0.00046278699993639895, + "loss": 0.4753, + "step": 80030 + }, + { + "epoch": 3.38, + "learning_rate": 0.00046274459920710636, + "loss": 0.5011, + "step": 80040 + }, + { + "epoch": 3.39, + "learning_rate": 0.00046270219847781383, + "loss": 0.4901, + "step": 80050 + }, + { + "epoch": 3.39, + "learning_rate": 0.00046265979774852135, + "loss": 0.5085, + "step": 80060 + }, + { + "epoch": 3.39, + "learning_rate": 0.0004626173970192287, + "loss": 0.4822, + "step": 80070 + }, + { + "epoch": 3.39, + "learning_rate": 0.00046257499628993623, + "loss": 0.5356, + "step": 80080 + }, + { + "epoch": 3.39, + "learning_rate": 0.0004625325955606437, + "loss": 0.6019, + "step": 80090 + }, + { + "epoch": 3.39, + "learning_rate": 0.0004624901948313511, + "loss": 0.5721, + "step": 80100 + }, + { + "epoch": 3.39, + "learning_rate": 0.0004624477941020586, + "loss": 0.4752, + "step": 80110 + }, + { + "epoch": 3.39, + "learning_rate": 0.0004624053933727661, + "loss": 0.5088, + "step": 80120 + }, + { + "epoch": 3.39, + "learning_rate": 0.00046236299264347346, + "loss": 0.5359, + "step": 80130 + }, + { + "epoch": 3.39, + "learning_rate": 0.000462320591914181, + "loss": 0.5309, + "step": 80140 + }, + { + "epoch": 3.39, + "learning_rate": 0.00046227819118488844, + "loss": 0.5341, + "step": 80150 + }, + { + "epoch": 3.39, + "learning_rate": 0.00046223579045559586, + "loss": 0.5814, + "step": 80160 + }, + { + "epoch": 3.39, + "learning_rate": 0.0004621933897263033, + "loss": 0.6042, + "step": 80170 + }, + { + "epoch": 3.39, + "learning_rate": 0.0004621509889970108, + "loss": 0.5697, + "step": 80180 + }, + { + "epoch": 3.39, + "learning_rate": 0.0004621085882677182, + "loss": 0.5131, + "step": 80190 + }, + { + "epoch": 3.39, + "learning_rate": 0.0004620661875384257, + "loss": 0.5079, + "step": 80200 + }, + { + "epoch": 3.39, + "learning_rate": 0.0004620237868091331, + "loss": 0.5079, + "step": 80210 + }, + { + "epoch": 3.39, + "learning_rate": 0.0004619813860798406, + "loss": 0.5032, + "step": 80220 + }, + { + "epoch": 3.39, + "learning_rate": 0.00046193898535054807, + "loss": 0.4044, + "step": 80230 + }, + { + "epoch": 3.39, + "learning_rate": 0.0004618965846212555, + "loss": 0.5015, + "step": 80240 + }, + { + "epoch": 3.39, + "learning_rate": 0.00046185418389196295, + "loss": 0.5037, + "step": 80250 + }, + { + "epoch": 3.39, + "learning_rate": 0.0004618117831626704, + "loss": 0.5532, + "step": 80260 + }, + { + "epoch": 3.39, + "learning_rate": 0.00046176938243337783, + "loss": 0.5879, + "step": 80270 + }, + { + "epoch": 3.39, + "learning_rate": 0.00046172698170408535, + "loss": 0.5359, + "step": 80280 + }, + { + "epoch": 3.4, + "learning_rate": 0.0004616845809747928, + "loss": 0.5016, + "step": 80290 + }, + { + "epoch": 3.4, + "learning_rate": 0.00046164218024550023, + "loss": 0.6164, + "step": 80300 + }, + { + "epoch": 3.4, + "learning_rate": 0.0004615997795162077, + "loss": 0.5227, + "step": 80310 + }, + { + "epoch": 3.4, + "learning_rate": 0.00046155737878691516, + "loss": 0.5624, + "step": 80320 + }, + { + "epoch": 3.4, + "learning_rate": 0.0004615149780576226, + "loss": 0.4982, + "step": 80330 + }, + { + "epoch": 3.4, + "learning_rate": 0.00046147257732833004, + "loss": 0.522, + "step": 80340 + }, + { + "epoch": 3.4, + "learning_rate": 0.00046143017659903756, + "loss": 0.5027, + "step": 80350 + }, + { + "epoch": 3.4, + "learning_rate": 0.000461387775869745, + "loss": 0.5325, + "step": 80360 + }, + { + "epoch": 3.4, + "learning_rate": 0.00046134537514045244, + "loss": 0.4683, + "step": 80370 + }, + { + "epoch": 3.4, + "learning_rate": 0.0004613029744111599, + "loss": 0.5384, + "step": 80380 + }, + { + "epoch": 3.4, + "learning_rate": 0.0004612605736818673, + "loss": 0.4719, + "step": 80390 + }, + { + "epoch": 3.4, + "learning_rate": 0.0004612181729525748, + "loss": 0.512, + "step": 80400 + }, + { + "epoch": 3.4, + "learning_rate": 0.0004611757722232823, + "loss": 0.5083, + "step": 80410 + }, + { + "epoch": 3.4, + "learning_rate": 0.0004611333714939897, + "loss": 0.4943, + "step": 80420 + }, + { + "epoch": 3.4, + "learning_rate": 0.0004610909707646972, + "loss": 0.5388, + "step": 80430 + }, + { + "epoch": 3.4, + "learning_rate": 0.00046104857003540466, + "loss": 0.4786, + "step": 80440 + }, + { + "epoch": 3.4, + "learning_rate": 0.00046100616930611207, + "loss": 0.5195, + "step": 80450 + }, + { + "epoch": 3.4, + "learning_rate": 0.00046096376857681954, + "loss": 0.4849, + "step": 80460 + }, + { + "epoch": 3.4, + "learning_rate": 0.00046092136784752706, + "loss": 0.4619, + "step": 80470 + }, + { + "epoch": 3.4, + "learning_rate": 0.0004608789671182344, + "loss": 0.4356, + "step": 80480 + }, + { + "epoch": 3.4, + "learning_rate": 0.00046083656638894194, + "loss": 0.4706, + "step": 80490 + }, + { + "epoch": 3.4, + "learning_rate": 0.00046079416565964935, + "loss": 0.4908, + "step": 80500 + }, + { + "epoch": 3.4, + "learning_rate": 0.0004607517649303568, + "loss": 0.4665, + "step": 80510 + }, + { + "epoch": 3.41, + "learning_rate": 0.0004607093642010643, + "loss": 0.5556, + "step": 80520 + }, + { + "epoch": 3.41, + "learning_rate": 0.0004606669634717717, + "loss": 0.5441, + "step": 80530 + }, + { + "epoch": 3.41, + "learning_rate": 0.00046062456274247916, + "loss": 0.4858, + "step": 80540 + }, + { + "epoch": 3.41, + "learning_rate": 0.0004605821620131867, + "loss": 0.5106, + "step": 80550 + }, + { + "epoch": 3.41, + "learning_rate": 0.00046053976128389404, + "loss": 0.5486, + "step": 80560 + }, + { + "epoch": 3.41, + "learning_rate": 0.00046049736055460156, + "loss": 0.5059, + "step": 80570 + }, + { + "epoch": 3.41, + "learning_rate": 0.00046045495982530903, + "loss": 0.3893, + "step": 80580 + }, + { + "epoch": 3.41, + "learning_rate": 0.00046041255909601644, + "loss": 0.5148, + "step": 80590 + }, + { + "epoch": 3.41, + "learning_rate": 0.0004603701583667239, + "loss": 0.5648, + "step": 80600 + }, + { + "epoch": 3.41, + "learning_rate": 0.00046032775763743143, + "loss": 0.436, + "step": 80610 + }, + { + "epoch": 3.41, + "learning_rate": 0.0004602853569081388, + "loss": 0.6451, + "step": 80620 + }, + { + "epoch": 3.41, + "learning_rate": 0.0004602429561788463, + "loss": 0.5732, + "step": 80630 + }, + { + "epoch": 3.41, + "learning_rate": 0.0004602005554495538, + "loss": 0.4868, + "step": 80640 + }, + { + "epoch": 3.41, + "learning_rate": 0.0004601581547202612, + "loss": 0.5091, + "step": 80650 + }, + { + "epoch": 3.41, + "learning_rate": 0.00046011575399096865, + "loss": 0.4533, + "step": 80660 + }, + { + "epoch": 3.41, + "learning_rate": 0.0004600733532616762, + "loss": 0.4441, + "step": 80670 + }, + { + "epoch": 3.41, + "learning_rate": 0.00046003095253238353, + "loss": 0.5597, + "step": 80680 + }, + { + "epoch": 3.41, + "learning_rate": 0.00045998855180309105, + "loss": 0.5628, + "step": 80690 + }, + { + "epoch": 3.41, + "learning_rate": 0.0004599461510737985, + "loss": 0.5432, + "step": 80700 + }, + { + "epoch": 3.41, + "learning_rate": 0.00045990375034450593, + "loss": 0.5332, + "step": 80710 + }, + { + "epoch": 3.41, + "learning_rate": 0.0004598613496152134, + "loss": 0.516, + "step": 80720 + }, + { + "epoch": 3.41, + "learning_rate": 0.0004598189488859209, + "loss": 0.5998, + "step": 80730 + }, + { + "epoch": 3.41, + "learning_rate": 0.0004597765481566283, + "loss": 0.5582, + "step": 80740 + }, + { + "epoch": 3.41, + "learning_rate": 0.0004597341474273358, + "loss": 0.5704, + "step": 80750 + }, + { + "epoch": 3.42, + "learning_rate": 0.00045969174669804327, + "loss": 0.6038, + "step": 80760 + }, + { + "epoch": 3.42, + "learning_rate": 0.0004596493459687507, + "loss": 0.4745, + "step": 80770 + }, + { + "epoch": 3.42, + "learning_rate": 0.00045960694523945815, + "loss": 0.4955, + "step": 80780 + }, + { + "epoch": 3.42, + "learning_rate": 0.0004595645445101656, + "loss": 0.5732, + "step": 80790 + }, + { + "epoch": 3.42, + "learning_rate": 0.000459522143780873, + "loss": 0.5424, + "step": 80800 + }, + { + "epoch": 3.42, + "learning_rate": 0.00045947974305158055, + "loss": 0.4858, + "step": 80810 + }, + { + "epoch": 3.42, + "learning_rate": 0.0004594373423222879, + "loss": 0.5128, + "step": 80820 + }, + { + "epoch": 3.42, + "learning_rate": 0.00045939494159299543, + "loss": 0.5234, + "step": 80830 + }, + { + "epoch": 3.42, + "learning_rate": 0.0004593525408637029, + "loss": 0.4326, + "step": 80840 + }, + { + "epoch": 3.42, + "learning_rate": 0.0004593101401344103, + "loss": 0.496, + "step": 80850 + }, + { + "epoch": 3.42, + "learning_rate": 0.0004592677394051178, + "loss": 0.5151, + "step": 80860 + }, + { + "epoch": 3.42, + "learning_rate": 0.0004592253386758253, + "loss": 0.5356, + "step": 80870 + }, + { + "epoch": 3.42, + "learning_rate": 0.00045918293794653265, + "loss": 0.5105, + "step": 80880 + }, + { + "epoch": 3.42, + "learning_rate": 0.0004591405372172402, + "loss": 0.4664, + "step": 80890 + }, + { + "epoch": 3.42, + "learning_rate": 0.00045909813648794764, + "loss": 0.4944, + "step": 80900 + }, + { + "epoch": 3.42, + "learning_rate": 0.00045905573575865505, + "loss": 0.4627, + "step": 80910 + }, + { + "epoch": 3.42, + "learning_rate": 0.0004590133350293625, + "loss": 0.4559, + "step": 80920 + }, + { + "epoch": 3.42, + "learning_rate": 0.00045897093430007, + "loss": 0.5585, + "step": 80930 + }, + { + "epoch": 3.42, + "learning_rate": 0.0004589285335707774, + "loss": 0.4767, + "step": 80940 + }, + { + "epoch": 3.42, + "learning_rate": 0.0004588861328414849, + "loss": 0.5086, + "step": 80950 + }, + { + "epoch": 3.42, + "learning_rate": 0.0004588437321121924, + "loss": 0.4738, + "step": 80960 + }, + { + "epoch": 3.42, + "learning_rate": 0.0004588013313828998, + "loss": 0.553, + "step": 80970 + }, + { + "epoch": 3.42, + "learning_rate": 0.00045875893065360727, + "loss": 0.5742, + "step": 80980 + }, + { + "epoch": 3.42, + "learning_rate": 0.00045871652992431473, + "loss": 0.5178, + "step": 80990 + }, + { + "epoch": 3.43, + "learning_rate": 0.00045867412919502215, + "loss": 0.4982, + "step": 81000 + }, + { + "epoch": 3.43, + "learning_rate": 0.0004586317284657296, + "loss": 0.4358, + "step": 81010 + }, + { + "epoch": 3.43, + "learning_rate": 0.00045858932773643713, + "loss": 0.5442, + "step": 81020 + }, + { + "epoch": 3.43, + "learning_rate": 0.00045854692700714455, + "loss": 0.5227, + "step": 81030 + }, + { + "epoch": 3.43, + "learning_rate": 0.000458504526277852, + "loss": 0.4837, + "step": 81040 + }, + { + "epoch": 3.43, + "learning_rate": 0.0004584621255485595, + "loss": 0.5415, + "step": 81050 + }, + { + "epoch": 3.43, + "learning_rate": 0.0004584197248192669, + "loss": 0.5626, + "step": 81060 + }, + { + "epoch": 3.43, + "learning_rate": 0.00045837732408997436, + "loss": 0.6067, + "step": 81070 + }, + { + "epoch": 3.43, + "learning_rate": 0.0004583349233606819, + "loss": 0.4479, + "step": 81080 + }, + { + "epoch": 3.43, + "learning_rate": 0.00045829252263138924, + "loss": 0.5558, + "step": 81090 + }, + { + "epoch": 3.43, + "learning_rate": 0.00045825012190209676, + "loss": 0.6024, + "step": 81100 + }, + { + "epoch": 3.43, + "learning_rate": 0.0004582077211728042, + "loss": 0.4793, + "step": 81110 + }, + { + "epoch": 3.43, + "learning_rate": 0.00045816532044351164, + "loss": 0.4667, + "step": 81120 + }, + { + "epoch": 3.43, + "learning_rate": 0.0004581229197142191, + "loss": 0.522, + "step": 81130 + }, + { + "epoch": 3.43, + "learning_rate": 0.0004580805189849265, + "loss": 0.5487, + "step": 81140 + }, + { + "epoch": 3.43, + "learning_rate": 0.000458038118255634, + "loss": 0.5486, + "step": 81150 + }, + { + "epoch": 3.43, + "learning_rate": 0.0004579957175263415, + "loss": 0.5599, + "step": 81160 + }, + { + "epoch": 3.43, + "learning_rate": 0.0004579533167970489, + "loss": 0.5028, + "step": 81170 + }, + { + "epoch": 3.43, + "learning_rate": 0.0004579109160677564, + "loss": 0.4728, + "step": 81180 + }, + { + "epoch": 3.43, + "learning_rate": 0.00045786851533846385, + "loss": 0.4658, + "step": 81190 + }, + { + "epoch": 3.43, + "learning_rate": 0.00045782611460917126, + "loss": 0.5624, + "step": 81200 + }, + { + "epoch": 3.43, + "learning_rate": 0.00045778371387987873, + "loss": 0.5132, + "step": 81210 + }, + { + "epoch": 3.43, + "learning_rate": 0.00045774131315058625, + "loss": 0.5158, + "step": 81220 + }, + { + "epoch": 3.44, + "learning_rate": 0.0004576989124212936, + "loss": 0.4703, + "step": 81230 + }, + { + "epoch": 3.44, + "learning_rate": 0.00045765651169200113, + "loss": 0.5479, + "step": 81240 + }, + { + "epoch": 3.44, + "learning_rate": 0.0004576141109627086, + "loss": 0.5464, + "step": 81250 + }, + { + "epoch": 3.44, + "learning_rate": 0.000457571710233416, + "loss": 0.457, + "step": 81260 + }, + { + "epoch": 3.44, + "learning_rate": 0.0004575293095041235, + "loss": 0.5089, + "step": 81270 + }, + { + "epoch": 3.44, + "learning_rate": 0.000457486908774831, + "loss": 0.5214, + "step": 81280 + }, + { + "epoch": 3.44, + "learning_rate": 0.00045744450804553836, + "loss": 0.5, + "step": 81290 + }, + { + "epoch": 3.44, + "learning_rate": 0.0004574021073162459, + "loss": 0.4528, + "step": 81300 + }, + { + "epoch": 3.44, + "learning_rate": 0.00045735970658695335, + "loss": 0.474, + "step": 81310 + }, + { + "epoch": 3.44, + "learning_rate": 0.00045731730585766076, + "loss": 0.4784, + "step": 81320 + }, + { + "epoch": 3.44, + "learning_rate": 0.0004572749051283682, + "loss": 0.5163, + "step": 81330 + }, + { + "epoch": 3.44, + "learning_rate": 0.00045723250439907575, + "loss": 0.4624, + "step": 81340 + }, + { + "epoch": 3.44, + "learning_rate": 0.0004571901036697831, + "loss": 0.549, + "step": 81350 + }, + { + "epoch": 3.44, + "learning_rate": 0.0004571477029404906, + "loss": 0.5126, + "step": 81360 + }, + { + "epoch": 3.44, + "learning_rate": 0.0004571053022111981, + "loss": 0.5391, + "step": 81370 + }, + { + "epoch": 3.44, + "learning_rate": 0.0004570629014819055, + "loss": 0.5185, + "step": 81380 + }, + { + "epoch": 3.44, + "learning_rate": 0.00045702050075261297, + "loss": 0.5017, + "step": 81390 + }, + { + "epoch": 3.44, + "learning_rate": 0.0004569781000233205, + "loss": 0.5432, + "step": 81400 + }, + { + "epoch": 3.44, + "learning_rate": 0.00045693569929402785, + "loss": 0.5678, + "step": 81410 + }, + { + "epoch": 3.44, + "learning_rate": 0.00045689329856473537, + "loss": 0.5031, + "step": 81420 + }, + { + "epoch": 3.44, + "learning_rate": 0.00045685089783544273, + "loss": 0.4915, + "step": 81430 + }, + { + "epoch": 3.44, + "learning_rate": 0.00045680849710615025, + "loss": 0.5898, + "step": 81440 + }, + { + "epoch": 3.44, + "learning_rate": 0.0004567660963768577, + "loss": 0.5806, + "step": 81450 + }, + { + "epoch": 3.44, + "learning_rate": 0.00045672369564756513, + "loss": 0.5183, + "step": 81460 + }, + { + "epoch": 3.45, + "learning_rate": 0.0004566812949182726, + "loss": 0.5584, + "step": 81470 + }, + { + "epoch": 3.45, + "learning_rate": 0.0004566388941889801, + "loss": 0.5242, + "step": 81480 + }, + { + "epoch": 3.45, + "learning_rate": 0.0004565964934596875, + "loss": 0.5042, + "step": 81490 + }, + { + "epoch": 3.45, + "learning_rate": 0.000456554092730395, + "loss": 0.5844, + "step": 81500 + }, + { + "epoch": 3.45, + "learning_rate": 0.00045651169200110246, + "loss": 0.5123, + "step": 81510 + }, + { + "epoch": 3.45, + "learning_rate": 0.0004564692912718099, + "loss": 0.5625, + "step": 81520 + }, + { + "epoch": 3.45, + "learning_rate": 0.00045642689054251734, + "loss": 0.4909, + "step": 81530 + }, + { + "epoch": 3.45, + "learning_rate": 0.0004563844898132248, + "loss": 0.5326, + "step": 81540 + }, + { + "epoch": 3.45, + "learning_rate": 0.0004563420890839322, + "loss": 0.5314, + "step": 81550 + }, + { + "epoch": 3.45, + "learning_rate": 0.00045629968835463974, + "loss": 0.5879, + "step": 81560 + }, + { + "epoch": 3.45, + "learning_rate": 0.0004562572876253472, + "loss": 0.5022, + "step": 81570 + }, + { + "epoch": 3.45, + "learning_rate": 0.0004562148868960546, + "loss": 0.5684, + "step": 81580 + }, + { + "epoch": 3.45, + "learning_rate": 0.0004561724861667621, + "loss": 0.4716, + "step": 81590 + }, + { + "epoch": 3.45, + "learning_rate": 0.00045613008543746956, + "loss": 0.4866, + "step": 81600 + }, + { + "epoch": 3.45, + "learning_rate": 0.00045608768470817697, + "loss": 0.528, + "step": 81610 + }, + { + "epoch": 3.45, + "learning_rate": 0.00045604528397888444, + "loss": 0.5541, + "step": 81620 + }, + { + "epoch": 3.45, + "learning_rate": 0.00045600288324959196, + "loss": 0.5, + "step": 81630 + }, + { + "epoch": 3.45, + "learning_rate": 0.00045596048252029937, + "loss": 0.5643, + "step": 81640 + }, + { + "epoch": 3.45, + "learning_rate": 0.00045591808179100684, + "loss": 0.5087, + "step": 81650 + }, + { + "epoch": 3.45, + "learning_rate": 0.0004558756810617143, + "loss": 0.5993, + "step": 81660 + }, + { + "epoch": 3.45, + "learning_rate": 0.0004558332803324217, + "loss": 0.52, + "step": 81670 + }, + { + "epoch": 3.45, + "learning_rate": 0.0004557908796031292, + "loss": 0.4685, + "step": 81680 + }, + { + "epoch": 3.45, + "learning_rate": 0.0004557484788738367, + "loss": 0.4724, + "step": 81690 + }, + { + "epoch": 3.45, + "learning_rate": 0.0004557060781445441, + "loss": 0.5762, + "step": 81700 + }, + { + "epoch": 3.46, + "learning_rate": 0.0004556636774152516, + "loss": 0.495, + "step": 81710 + }, + { + "epoch": 3.46, + "learning_rate": 0.00045562127668595905, + "loss": 0.4559, + "step": 81720 + }, + { + "epoch": 3.46, + "learning_rate": 0.00045557887595666646, + "loss": 0.5523, + "step": 81730 + }, + { + "epoch": 3.46, + "learning_rate": 0.00045553647522737393, + "loss": 0.5143, + "step": 81740 + }, + { + "epoch": 3.46, + "learning_rate": 0.00045549407449808134, + "loss": 0.504, + "step": 81750 + }, + { + "epoch": 3.46, + "learning_rate": 0.0004554516737687888, + "loss": 0.5626, + "step": 81760 + }, + { + "epoch": 3.46, + "learning_rate": 0.00045540927303949633, + "loss": 0.4716, + "step": 81770 + }, + { + "epoch": 3.46, + "learning_rate": 0.00045536687231020374, + "loss": 0.63, + "step": 81780 + }, + { + "epoch": 3.46, + "learning_rate": 0.0004553244715809112, + "loss": 0.539, + "step": 81790 + }, + { + "epoch": 3.46, + "learning_rate": 0.0004552820708516187, + "loss": 0.4813, + "step": 81800 + }, + { + "epoch": 3.46, + "learning_rate": 0.0004552396701223261, + "loss": 0.5638, + "step": 81810 + }, + { + "epoch": 3.46, + "learning_rate": 0.00045519726939303356, + "loss": 0.5177, + "step": 81820 + }, + { + "epoch": 3.46, + "learning_rate": 0.0004551548686637411, + "loss": 0.5292, + "step": 81830 + }, + { + "epoch": 3.46, + "learning_rate": 0.00045511246793444843, + "loss": 0.6428, + "step": 81840 + }, + { + "epoch": 3.46, + "learning_rate": 0.00045507006720515596, + "loss": 0.5405, + "step": 81850 + }, + { + "epoch": 3.46, + "learning_rate": 0.0004550276664758634, + "loss": 0.5132, + "step": 81860 + }, + { + "epoch": 3.46, + "learning_rate": 0.00045498526574657083, + "loss": 0.5864, + "step": 81870 + }, + { + "epoch": 3.46, + "learning_rate": 0.0004549428650172783, + "loss": 0.4627, + "step": 81880 + }, + { + "epoch": 3.46, + "learning_rate": 0.0004549004642879858, + "loss": 0.6501, + "step": 81890 + }, + { + "epoch": 3.46, + "learning_rate": 0.0004548580635586932, + "loss": 0.4923, + "step": 81900 + }, + { + "epoch": 3.46, + "learning_rate": 0.0004548156628294007, + "loss": 0.5172, + "step": 81910 + }, + { + "epoch": 3.46, + "learning_rate": 0.00045477326210010817, + "loss": 0.5591, + "step": 81920 + }, + { + "epoch": 3.46, + "learning_rate": 0.0004547308613708156, + "loss": 0.577, + "step": 81930 + }, + { + "epoch": 3.47, + "learning_rate": 0.00045468846064152305, + "loss": 0.6231, + "step": 81940 + }, + { + "epoch": 3.47, + "learning_rate": 0.00045464605991223057, + "loss": 0.5523, + "step": 81950 + }, + { + "epoch": 3.47, + "learning_rate": 0.00045460365918293793, + "loss": 0.5714, + "step": 81960 + }, + { + "epoch": 3.47, + "learning_rate": 0.00045456125845364545, + "loss": 0.4711, + "step": 81970 + }, + { + "epoch": 3.47, + "learning_rate": 0.0004545188577243529, + "loss": 0.5696, + "step": 81980 + }, + { + "epoch": 3.47, + "learning_rate": 0.00045447645699506033, + "loss": 0.476, + "step": 81990 + }, + { + "epoch": 3.47, + "learning_rate": 0.0004544340562657678, + "loss": 0.5441, + "step": 82000 + }, + { + "epoch": 3.47, + "learning_rate": 0.0004543916555364753, + "loss": 0.5386, + "step": 82010 + }, + { + "epoch": 3.47, + "learning_rate": 0.0004543492548071827, + "loss": 0.4634, + "step": 82020 + }, + { + "epoch": 3.47, + "learning_rate": 0.0004543068540778902, + "loss": 0.5125, + "step": 82030 + }, + { + "epoch": 3.47, + "learning_rate": 0.00045426445334859755, + "loss": 0.4412, + "step": 82040 + }, + { + "epoch": 3.47, + "learning_rate": 0.0004542220526193051, + "loss": 0.5387, + "step": 82050 + }, + { + "epoch": 3.47, + "learning_rate": 0.00045417965189001254, + "loss": 0.5798, + "step": 82060 + }, + { + "epoch": 3.47, + "learning_rate": 0.00045413725116071995, + "loss": 0.4738, + "step": 82070 + }, + { + "epoch": 3.47, + "learning_rate": 0.0004540948504314274, + "loss": 0.4823, + "step": 82080 + }, + { + "epoch": 3.47, + "learning_rate": 0.00045405244970213494, + "loss": 0.5516, + "step": 82090 + }, + { + "epoch": 3.47, + "learning_rate": 0.0004540100489728423, + "loss": 0.5518, + "step": 82100 + }, + { + "epoch": 3.47, + "learning_rate": 0.0004539676482435498, + "loss": 0.4681, + "step": 82110 + }, + { + "epoch": 3.47, + "learning_rate": 0.0004539252475142573, + "loss": 0.5436, + "step": 82120 + }, + { + "epoch": 3.47, + "learning_rate": 0.0004538828467849647, + "loss": 0.5578, + "step": 82130 + }, + { + "epoch": 3.47, + "learning_rate": 0.00045384044605567217, + "loss": 0.4621, + "step": 82140 + }, + { + "epoch": 3.47, + "learning_rate": 0.0004537980453263797, + "loss": 0.5388, + "step": 82150 + }, + { + "epoch": 3.47, + "learning_rate": 0.00045375564459708705, + "loss": 0.5444, + "step": 82160 + }, + { + "epoch": 3.47, + "learning_rate": 0.00045371324386779457, + "loss": 0.4769, + "step": 82170 + }, + { + "epoch": 3.48, + "learning_rate": 0.00045367084313850203, + "loss": 0.5739, + "step": 82180 + }, + { + "epoch": 3.48, + "learning_rate": 0.00045362844240920945, + "loss": 0.5447, + "step": 82190 + }, + { + "epoch": 3.48, + "learning_rate": 0.0004535860416799169, + "loss": 0.5437, + "step": 82200 + }, + { + "epoch": 3.48, + "learning_rate": 0.0004535436409506244, + "loss": 0.555, + "step": 82210 + }, + { + "epoch": 3.48, + "learning_rate": 0.0004535012402213318, + "loss": 0.4813, + "step": 82220 + }, + { + "epoch": 3.48, + "learning_rate": 0.0004534588394920393, + "loss": 0.532, + "step": 82230 + }, + { + "epoch": 3.48, + "learning_rate": 0.0004534164387627468, + "loss": 0.5235, + "step": 82240 + }, + { + "epoch": 3.48, + "learning_rate": 0.0004533740380334542, + "loss": 0.5056, + "step": 82250 + }, + { + "epoch": 3.48, + "learning_rate": 0.00045333163730416166, + "loss": 0.5727, + "step": 82260 + }, + { + "epoch": 3.48, + "learning_rate": 0.0004532892365748691, + "loss": 0.5287, + "step": 82270 + }, + { + "epoch": 3.48, + "learning_rate": 0.00045324683584557654, + "loss": 0.4592, + "step": 82280 + }, + { + "epoch": 3.48, + "learning_rate": 0.000453204435116284, + "loss": 0.6155, + "step": 82290 + }, + { + "epoch": 3.48, + "learning_rate": 0.00045316203438699153, + "loss": 0.5061, + "step": 82300 + }, + { + "epoch": 3.48, + "learning_rate": 0.00045311963365769894, + "loss": 0.5835, + "step": 82310 + }, + { + "epoch": 3.48, + "learning_rate": 0.0004530772329284064, + "loss": 0.5108, + "step": 82320 + }, + { + "epoch": 3.48, + "learning_rate": 0.0004530348321991139, + "loss": 0.5279, + "step": 82330 + }, + { + "epoch": 3.48, + "learning_rate": 0.0004529924314698213, + "loss": 0.4711, + "step": 82340 + }, + { + "epoch": 3.48, + "learning_rate": 0.00045295003074052875, + "loss": 0.4996, + "step": 82350 + }, + { + "epoch": 3.48, + "learning_rate": 0.00045290763001123617, + "loss": 0.5757, + "step": 82360 + }, + { + "epoch": 3.48, + "learning_rate": 0.00045286522928194363, + "loss": 0.4761, + "step": 82370 + }, + { + "epoch": 3.48, + "learning_rate": 0.00045282282855265115, + "loss": 0.5496, + "step": 82380 + }, + { + "epoch": 3.48, + "learning_rate": 0.00045278042782335857, + "loss": 0.4844, + "step": 82390 + }, + { + "epoch": 3.48, + "learning_rate": 0.00045273802709406603, + "loss": 0.5401, + "step": 82400 + }, + { + "epoch": 3.49, + "learning_rate": 0.0004526956263647735, + "loss": 0.5302, + "step": 82410 + }, + { + "epoch": 3.49, + "learning_rate": 0.0004526532256354809, + "loss": 0.5183, + "step": 82420 + }, + { + "epoch": 3.49, + "learning_rate": 0.0004526108249061884, + "loss": 0.5641, + "step": 82430 + }, + { + "epoch": 3.49, + "learning_rate": 0.0004525684241768959, + "loss": 0.4539, + "step": 82440 + }, + { + "epoch": 3.49, + "learning_rate": 0.0004525260234476033, + "loss": 0.5785, + "step": 82450 + }, + { + "epoch": 3.49, + "learning_rate": 0.0004524836227183108, + "loss": 0.5916, + "step": 82460 + }, + { + "epoch": 3.49, + "learning_rate": 0.00045244122198901825, + "loss": 0.5579, + "step": 82470 + }, + { + "epoch": 3.49, + "learning_rate": 0.00045239882125972566, + "loss": 0.5276, + "step": 82480 + }, + { + "epoch": 3.49, + "learning_rate": 0.0004523564205304331, + "loss": 0.4613, + "step": 82490 + }, + { + "epoch": 3.49, + "learning_rate": 0.00045231401980114065, + "loss": 0.5174, + "step": 82500 + }, + { + "epoch": 3.49, + "learning_rate": 0.000452271619071848, + "loss": 0.5421, + "step": 82510 + }, + { + "epoch": 3.49, + "learning_rate": 0.0004522292183425555, + "loss": 0.5778, + "step": 82520 + }, + { + "epoch": 3.49, + "learning_rate": 0.000452186817613263, + "loss": 0.4922, + "step": 82530 + }, + { + "epoch": 3.49, + "learning_rate": 0.0004521444168839704, + "loss": 0.5214, + "step": 82540 + }, + { + "epoch": 3.49, + "learning_rate": 0.00045210201615467787, + "loss": 0.5612, + "step": 82550 + }, + { + "epoch": 3.49, + "learning_rate": 0.0004520596154253854, + "loss": 0.5043, + "step": 82560 + }, + { + "epoch": 3.49, + "learning_rate": 0.00045201721469609275, + "loss": 0.6314, + "step": 82570 + }, + { + "epoch": 3.49, + "learning_rate": 0.00045197481396680027, + "loss": 0.6151, + "step": 82580 + }, + { + "epoch": 3.49, + "learning_rate": 0.00045193241323750774, + "loss": 0.5662, + "step": 82590 + }, + { + "epoch": 3.49, + "learning_rate": 0.00045189001250821515, + "loss": 0.5599, + "step": 82600 + }, + { + "epoch": 3.49, + "learning_rate": 0.0004518476117789226, + "loss": 0.4748, + "step": 82610 + }, + { + "epoch": 3.49, + "learning_rate": 0.00045180521104963014, + "loss": 0.5501, + "step": 82620 + }, + { + "epoch": 3.49, + "learning_rate": 0.0004517628103203375, + "loss": 0.58, + "step": 82630 + }, + { + "epoch": 3.49, + "learning_rate": 0.000451720409591045, + "loss": 0.514, + "step": 82640 + }, + { + "epoch": 3.5, + "learning_rate": 0.0004516780088617524, + "loss": 0.5529, + "step": 82650 + }, + { + "epoch": 3.5, + "learning_rate": 0.0004516356081324599, + "loss": 0.5298, + "step": 82660 + }, + { + "epoch": 3.5, + "learning_rate": 0.00045159320740316736, + "loss": 0.5562, + "step": 82670 + }, + { + "epoch": 3.5, + "learning_rate": 0.0004515508066738748, + "loss": 0.5103, + "step": 82680 + }, + { + "epoch": 3.5, + "learning_rate": 0.00045150840594458224, + "loss": 0.5277, + "step": 82690 + }, + { + "epoch": 3.5, + "learning_rate": 0.00045146600521528977, + "loss": 0.5476, + "step": 82700 + }, + { + "epoch": 3.5, + "learning_rate": 0.0004514236044859971, + "loss": 0.4783, + "step": 82710 + }, + { + "epoch": 3.5, + "learning_rate": 0.00045138120375670464, + "loss": 0.5444, + "step": 82720 + }, + { + "epoch": 3.5, + "learning_rate": 0.0004513388030274121, + "loss": 0.5361, + "step": 82730 + }, + { + "epoch": 3.5, + "learning_rate": 0.0004512964022981195, + "loss": 0.6381, + "step": 82740 + }, + { + "epoch": 3.5, + "learning_rate": 0.000451254001568827, + "loss": 0.5046, + "step": 82750 + }, + { + "epoch": 3.5, + "learning_rate": 0.0004512116008395345, + "loss": 0.4788, + "step": 82760 + }, + { + "epoch": 3.5, + "learning_rate": 0.00045116920011024187, + "loss": 0.4343, + "step": 82770 + }, + { + "epoch": 3.5, + "learning_rate": 0.0004511267993809494, + "loss": 0.5258, + "step": 82780 + }, + { + "epoch": 3.5, + "learning_rate": 0.00045108439865165686, + "loss": 0.5609, + "step": 82790 + }, + { + "epoch": 3.5, + "learning_rate": 0.00045104199792236427, + "loss": 0.4592, + "step": 82800 + }, + { + "epoch": 3.5, + "learning_rate": 0.00045099959719307174, + "loss": 0.5579, + "step": 82810 + }, + { + "epoch": 3.5, + "learning_rate": 0.0004509571964637792, + "loss": 0.5347, + "step": 82820 + }, + { + "epoch": 3.5, + "learning_rate": 0.0004509147957344866, + "loss": 0.5186, + "step": 82830 + }, + { + "epoch": 3.5, + "learning_rate": 0.00045087239500519414, + "loss": 0.5301, + "step": 82840 + }, + { + "epoch": 3.5, + "learning_rate": 0.0004508299942759016, + "loss": 0.5299, + "step": 82850 + }, + { + "epoch": 3.5, + "learning_rate": 0.000450787593546609, + "loss": 0.5089, + "step": 82860 + }, + { + "epoch": 3.5, + "learning_rate": 0.0004507451928173165, + "loss": 0.4958, + "step": 82870 + }, + { + "epoch": 3.5, + "learning_rate": 0.00045070279208802395, + "loss": 0.5328, + "step": 82880 + }, + { + "epoch": 3.51, + "learning_rate": 0.00045066039135873136, + "loss": 0.4385, + "step": 82890 + }, + { + "epoch": 3.51, + "learning_rate": 0.0004506179906294389, + "loss": 0.5842, + "step": 82900 + }, + { + "epoch": 3.51, + "learning_rate": 0.00045057558990014635, + "loss": 0.4933, + "step": 82910 + }, + { + "epoch": 3.51, + "learning_rate": 0.00045053318917085376, + "loss": 0.5542, + "step": 82920 + }, + { + "epoch": 3.51, + "learning_rate": 0.00045049078844156123, + "loss": 0.6322, + "step": 82930 + }, + { + "epoch": 3.51, + "learning_rate": 0.0004504483877122687, + "loss": 0.6365, + "step": 82940 + }, + { + "epoch": 3.51, + "learning_rate": 0.0004504059869829761, + "loss": 0.5494, + "step": 82950 + }, + { + "epoch": 3.51, + "learning_rate": 0.0004503635862536836, + "loss": 0.501, + "step": 82960 + }, + { + "epoch": 3.51, + "learning_rate": 0.000450321185524391, + "loss": 0.6392, + "step": 82970 + }, + { + "epoch": 3.51, + "learning_rate": 0.0004502787847950985, + "loss": 0.5053, + "step": 82980 + }, + { + "epoch": 3.51, + "learning_rate": 0.000450236384065806, + "loss": 0.5539, + "step": 82990 + }, + { + "epoch": 3.51, + "learning_rate": 0.0004501939833365134, + "loss": 0.5205, + "step": 83000 + }, + { + "epoch": 3.51, + "learning_rate": 0.00045015158260722086, + "loss": 0.6119, + "step": 83010 + }, + { + "epoch": 3.51, + "learning_rate": 0.0004501091818779283, + "loss": 0.5717, + "step": 83020 + }, + { + "epoch": 3.51, + "learning_rate": 0.00045006678114863574, + "loss": 0.5324, + "step": 83030 + }, + { + "epoch": 3.51, + "learning_rate": 0.0004500243804193432, + "loss": 0.4644, + "step": 83040 + }, + { + "epoch": 3.51, + "learning_rate": 0.0004499819796900507, + "loss": 0.5668, + "step": 83050 + }, + { + "epoch": 3.51, + "learning_rate": 0.00044993957896075814, + "loss": 0.5465, + "step": 83060 + }, + { + "epoch": 3.51, + "learning_rate": 0.0004498971782314656, + "loss": 0.5275, + "step": 83070 + }, + { + "epoch": 3.51, + "learning_rate": 0.00044985477750217307, + "loss": 0.4499, + "step": 83080 + }, + { + "epoch": 3.51, + "learning_rate": 0.0004498123767728805, + "loss": 0.5026, + "step": 83090 + }, + { + "epoch": 3.51, + "learning_rate": 0.00044976997604358795, + "loss": 0.5261, + "step": 83100 + }, + { + "epoch": 3.51, + "learning_rate": 0.00044972757531429547, + "loss": 0.4873, + "step": 83110 + }, + { + "epoch": 3.52, + "learning_rate": 0.00044968517458500283, + "loss": 0.5636, + "step": 83120 + }, + { + "epoch": 3.52, + "learning_rate": 0.00044964277385571035, + "loss": 0.4283, + "step": 83130 + }, + { + "epoch": 3.52, + "learning_rate": 0.0004496003731264178, + "loss": 0.4693, + "step": 83140 + }, + { + "epoch": 3.52, + "learning_rate": 0.00044955797239712523, + "loss": 0.5161, + "step": 83150 + }, + { + "epoch": 3.52, + "learning_rate": 0.0004495155716678327, + "loss": 0.6137, + "step": 83160 + }, + { + "epoch": 3.52, + "learning_rate": 0.0004494731709385402, + "loss": 0.5253, + "step": 83170 + }, + { + "epoch": 3.52, + "learning_rate": 0.0004494307702092476, + "loss": 0.5435, + "step": 83180 + }, + { + "epoch": 3.52, + "learning_rate": 0.0004493883694799551, + "loss": 0.5428, + "step": 83190 + }, + { + "epoch": 3.52, + "learning_rate": 0.00044934596875066256, + "loss": 0.5358, + "step": 83200 + }, + { + "epoch": 3.52, + "learning_rate": 0.00044930356802137, + "loss": 0.5268, + "step": 83210 + }, + { + "epoch": 3.52, + "learning_rate": 0.00044926116729207744, + "loss": 0.5296, + "step": 83220 + }, + { + "epoch": 3.52, + "learning_rate": 0.00044921876656278496, + "loss": 0.5871, + "step": 83230 + }, + { + "epoch": 3.52, + "learning_rate": 0.0004491763658334923, + "loss": 0.4401, + "step": 83240 + }, + { + "epoch": 3.52, + "learning_rate": 0.00044913396510419984, + "loss": 0.5203, + "step": 83250 + }, + { + "epoch": 3.52, + "learning_rate": 0.0004490915643749073, + "loss": 0.56, + "step": 83260 + }, + { + "epoch": 3.52, + "learning_rate": 0.0004490491636456147, + "loss": 0.4519, + "step": 83270 + }, + { + "epoch": 3.52, + "learning_rate": 0.0004490067629163222, + "loss": 0.5871, + "step": 83280 + }, + { + "epoch": 3.52, + "learning_rate": 0.0004489643621870296, + "loss": 0.4771, + "step": 83290 + }, + { + "epoch": 3.52, + "learning_rate": 0.00044892196145773707, + "loss": 0.4497, + "step": 83300 + }, + { + "epoch": 3.52, + "learning_rate": 0.0004488795607284446, + "loss": 0.5287, + "step": 83310 + }, + { + "epoch": 3.52, + "learning_rate": 0.00044883715999915195, + "loss": 0.5091, + "step": 83320 + }, + { + "epoch": 3.52, + "learning_rate": 0.00044879475926985947, + "loss": 0.5507, + "step": 83330 + }, + { + "epoch": 3.52, + "learning_rate": 0.00044875235854056694, + "loss": 0.5586, + "step": 83340 + }, + { + "epoch": 3.52, + "learning_rate": 0.00044870995781127435, + "loss": 0.6183, + "step": 83350 + }, + { + "epoch": 3.53, + "learning_rate": 0.0004486675570819818, + "loss": 0.4532, + "step": 83360 + }, + { + "epoch": 3.53, + "learning_rate": 0.00044862515635268934, + "loss": 0.6164, + "step": 83370 + }, + { + "epoch": 3.53, + "learning_rate": 0.0004485827556233967, + "loss": 0.5126, + "step": 83380 + }, + { + "epoch": 3.53, + "learning_rate": 0.0004485403548941042, + "loss": 0.5413, + "step": 83390 + }, + { + "epoch": 3.53, + "learning_rate": 0.0004484979541648117, + "loss": 0.4547, + "step": 83400 + }, + { + "epoch": 3.53, + "learning_rate": 0.0004484555534355191, + "loss": 0.4671, + "step": 83410 + }, + { + "epoch": 3.53, + "learning_rate": 0.00044841315270622656, + "loss": 0.505, + "step": 83420 + }, + { + "epoch": 3.53, + "learning_rate": 0.0004483707519769341, + "loss": 0.5613, + "step": 83430 + }, + { + "epoch": 3.53, + "learning_rate": 0.00044832835124764144, + "loss": 0.5112, + "step": 83440 + }, + { + "epoch": 3.53, + "learning_rate": 0.00044828595051834896, + "loss": 0.5131, + "step": 83450 + }, + { + "epoch": 3.53, + "learning_rate": 0.00044824354978905643, + "loss": 0.5919, + "step": 83460 + }, + { + "epoch": 3.53, + "learning_rate": 0.00044820114905976384, + "loss": 0.6508, + "step": 83470 + }, + { + "epoch": 3.53, + "learning_rate": 0.0004481587483304713, + "loss": 0.5672, + "step": 83480 + }, + { + "epoch": 3.53, + "learning_rate": 0.0004481163476011788, + "loss": 0.5671, + "step": 83490 + }, + { + "epoch": 3.53, + "learning_rate": 0.0004480739468718862, + "loss": 0.5119, + "step": 83500 + }, + { + "epoch": 3.53, + "learning_rate": 0.0004480315461425937, + "loss": 0.53, + "step": 83510 + }, + { + "epoch": 3.53, + "learning_rate": 0.0004479891454133012, + "loss": 0.4594, + "step": 83520 + }, + { + "epoch": 3.53, + "learning_rate": 0.0004479467446840086, + "loss": 0.5048, + "step": 83530 + }, + { + "epoch": 3.53, + "learning_rate": 0.00044790434395471605, + "loss": 0.5263, + "step": 83540 + }, + { + "epoch": 3.53, + "learning_rate": 0.0004478619432254235, + "loss": 0.5455, + "step": 83550 + }, + { + "epoch": 3.53, + "learning_rate": 0.00044781954249613093, + "loss": 0.5206, + "step": 83560 + }, + { + "epoch": 3.53, + "learning_rate": 0.0004477771417668384, + "loss": 0.4713, + "step": 83570 + }, + { + "epoch": 3.53, + "learning_rate": 0.0004477347410375458, + "loss": 0.4864, + "step": 83580 + }, + { + "epoch": 3.53, + "learning_rate": 0.00044769234030825333, + "loss": 0.6233, + "step": 83590 + }, + { + "epoch": 3.54, + "learning_rate": 0.0004476499395789608, + "loss": 0.6024, + "step": 83600 + }, + { + "epoch": 3.54, + "learning_rate": 0.0004476075388496682, + "loss": 0.6078, + "step": 83610 + }, + { + "epoch": 3.54, + "learning_rate": 0.0004475651381203757, + "loss": 0.5134, + "step": 83620 + }, + { + "epoch": 3.54, + "learning_rate": 0.00044752273739108315, + "loss": 0.5147, + "step": 83630 + }, + { + "epoch": 3.54, + "learning_rate": 0.00044748033666179056, + "loss": 0.5741, + "step": 83640 + }, + { + "epoch": 3.54, + "learning_rate": 0.0004474379359324981, + "loss": 0.4602, + "step": 83650 + }, + { + "epoch": 3.54, + "learning_rate": 0.00044739553520320555, + "loss": 0.5221, + "step": 83660 + }, + { + "epoch": 3.54, + "learning_rate": 0.00044735313447391296, + "loss": 0.4531, + "step": 83670 + }, + { + "epoch": 3.54, + "learning_rate": 0.0004473107337446204, + "loss": 0.4976, + "step": 83680 + }, + { + "epoch": 3.54, + "learning_rate": 0.0004472683330153279, + "loss": 0.5993, + "step": 83690 + }, + { + "epoch": 3.54, + "learning_rate": 0.0004472259322860353, + "loss": 0.4825, + "step": 83700 + }, + { + "epoch": 3.54, + "learning_rate": 0.00044718353155674277, + "loss": 0.6312, + "step": 83710 + }, + { + "epoch": 3.54, + "learning_rate": 0.0004471411308274503, + "loss": 0.5747, + "step": 83720 + }, + { + "epoch": 3.54, + "learning_rate": 0.0004470987300981577, + "loss": 0.5228, + "step": 83730 + }, + { + "epoch": 3.54, + "learning_rate": 0.0004470563293688652, + "loss": 0.5486, + "step": 83740 + }, + { + "epoch": 3.54, + "learning_rate": 0.00044701392863957264, + "loss": 0.5338, + "step": 83750 + }, + { + "epoch": 3.54, + "learning_rate": 0.00044697152791028005, + "loss": 0.4703, + "step": 83760 + }, + { + "epoch": 3.54, + "learning_rate": 0.0004469291271809875, + "loss": 0.5432, + "step": 83770 + }, + { + "epoch": 3.54, + "learning_rate": 0.00044688672645169504, + "loss": 0.542, + "step": 83780 + }, + { + "epoch": 3.54, + "learning_rate": 0.0004468443257224024, + "loss": 0.5322, + "step": 83790 + }, + { + "epoch": 3.54, + "learning_rate": 0.0004468019249931099, + "loss": 0.4817, + "step": 83800 + }, + { + "epoch": 3.54, + "learning_rate": 0.0004467595242638174, + "loss": 0.4988, + "step": 83810 + }, + { + "epoch": 3.54, + "learning_rate": 0.0004467171235345248, + "loss": 0.4825, + "step": 83820 + }, + { + "epoch": 3.55, + "learning_rate": 0.00044667472280523227, + "loss": 0.4934, + "step": 83830 + }, + { + "epoch": 3.55, + "learning_rate": 0.0004466323220759398, + "loss": 0.5376, + "step": 83840 + }, + { + "epoch": 3.55, + "learning_rate": 0.00044658992134664714, + "loss": 0.5794, + "step": 83850 + }, + { + "epoch": 3.55, + "learning_rate": 0.00044654752061735467, + "loss": 0.5744, + "step": 83860 + }, + { + "epoch": 3.55, + "learning_rate": 0.00044650511988806213, + "loss": 0.5188, + "step": 83870 + }, + { + "epoch": 3.55, + "learning_rate": 0.00044646271915876955, + "loss": 0.5872, + "step": 83880 + }, + { + "epoch": 3.55, + "learning_rate": 0.000446420318429477, + "loss": 0.5117, + "step": 83890 + }, + { + "epoch": 3.55, + "learning_rate": 0.0004463779177001844, + "loss": 0.5524, + "step": 83900 + }, + { + "epoch": 3.55, + "learning_rate": 0.0004463355169708919, + "loss": 0.5982, + "step": 83910 + }, + { + "epoch": 3.55, + "learning_rate": 0.0004462931162415994, + "loss": 0.5329, + "step": 83920 + }, + { + "epoch": 3.55, + "learning_rate": 0.00044625071551230677, + "loss": 0.443, + "step": 83930 + }, + { + "epoch": 3.55, + "learning_rate": 0.0004462083147830143, + "loss": 0.5514, + "step": 83940 + }, + { + "epoch": 3.55, + "learning_rate": 0.00044616591405372176, + "loss": 0.5228, + "step": 83950 + }, + { + "epoch": 3.55, + "learning_rate": 0.00044612351332442917, + "loss": 0.5166, + "step": 83960 + }, + { + "epoch": 3.55, + "learning_rate": 0.00044608111259513664, + "loss": 0.505, + "step": 83970 + }, + { + "epoch": 3.55, + "learning_rate": 0.00044603871186584416, + "loss": 0.4763, + "step": 83980 + }, + { + "epoch": 3.55, + "learning_rate": 0.0004459963111365515, + "loss": 0.4607, + "step": 83990 + }, + { + "epoch": 3.55, + "learning_rate": 0.00044595391040725904, + "loss": 0.5326, + "step": 84000 + }, + { + "epoch": 3.55, + "learning_rate": 0.0004459115096779665, + "loss": 0.5692, + "step": 84010 + }, + { + "epoch": 3.55, + "learning_rate": 0.0004458691089486739, + "loss": 0.4286, + "step": 84020 + }, + { + "epoch": 3.55, + "learning_rate": 0.0004458267082193814, + "loss": 0.4385, + "step": 84030 + }, + { + "epoch": 3.55, + "learning_rate": 0.0004457843074900889, + "loss": 0.4507, + "step": 84040 + }, + { + "epoch": 3.55, + "learning_rate": 0.00044574190676079626, + "loss": 0.4707, + "step": 84050 + }, + { + "epoch": 3.55, + "learning_rate": 0.0004456995060315038, + "loss": 0.3809, + "step": 84060 + }, + { + "epoch": 3.56, + "learning_rate": 0.00044565710530221125, + "loss": 0.5548, + "step": 84070 + }, + { + "epoch": 3.56, + "learning_rate": 0.00044561470457291866, + "loss": 0.5125, + "step": 84080 + }, + { + "epoch": 3.56, + "learning_rate": 0.00044557230384362613, + "loss": 0.5263, + "step": 84090 + }, + { + "epoch": 3.56, + "learning_rate": 0.0004455299031143336, + "loss": 0.5338, + "step": 84100 + }, + { + "epoch": 3.56, + "learning_rate": 0.000445487502385041, + "loss": 0.5481, + "step": 84110 + }, + { + "epoch": 3.56, + "learning_rate": 0.00044544510165574853, + "loss": 0.5074, + "step": 84120 + }, + { + "epoch": 3.56, + "learning_rate": 0.000445402700926456, + "loss": 0.5417, + "step": 84130 + }, + { + "epoch": 3.56, + "learning_rate": 0.0004453603001971634, + "loss": 0.5721, + "step": 84140 + }, + { + "epoch": 3.56, + "learning_rate": 0.0004453178994678709, + "loss": 0.5455, + "step": 84150 + }, + { + "epoch": 3.56, + "learning_rate": 0.00044527549873857834, + "loss": 0.5335, + "step": 84160 + }, + { + "epoch": 3.56, + "learning_rate": 0.00044523309800928576, + "loss": 0.5326, + "step": 84170 + }, + { + "epoch": 3.56, + "learning_rate": 0.0004451906972799933, + "loss": 0.5758, + "step": 84180 + }, + { + "epoch": 3.56, + "learning_rate": 0.00044514829655070064, + "loss": 0.4561, + "step": 84190 + }, + { + "epoch": 3.56, + "learning_rate": 0.00044510589582140816, + "loss": 0.575, + "step": 84200 + }, + { + "epoch": 3.56, + "learning_rate": 0.0004450634950921156, + "loss": 0.4696, + "step": 84210 + }, + { + "epoch": 3.56, + "learning_rate": 0.00044502109436282304, + "loss": 0.5284, + "step": 84220 + }, + { + "epoch": 3.56, + "learning_rate": 0.0004449786936335305, + "loss": 0.4735, + "step": 84230 + }, + { + "epoch": 3.56, + "learning_rate": 0.00044493629290423797, + "loss": 0.5021, + "step": 84240 + }, + { + "epoch": 3.56, + "learning_rate": 0.0004448938921749454, + "loss": 0.542, + "step": 84250 + }, + { + "epoch": 3.56, + "learning_rate": 0.0004448514914456529, + "loss": 0.5449, + "step": 84260 + }, + { + "epoch": 3.56, + "learning_rate": 0.00044480909071636037, + "loss": 0.6105, + "step": 84270 + }, + { + "epoch": 3.56, + "learning_rate": 0.0004447666899870678, + "loss": 0.4518, + "step": 84280 + }, + { + "epoch": 3.56, + "learning_rate": 0.00044472428925777525, + "loss": 0.5018, + "step": 84290 + }, + { + "epoch": 3.56, + "learning_rate": 0.0004446818885284827, + "loss": 0.4776, + "step": 84300 + }, + { + "epoch": 3.57, + "learning_rate": 0.00044463948779919013, + "loss": 0.522, + "step": 84310 + }, + { + "epoch": 3.57, + "learning_rate": 0.0004445970870698976, + "loss": 0.5755, + "step": 84320 + }, + { + "epoch": 3.57, + "learning_rate": 0.0004445546863406051, + "loss": 0.493, + "step": 84330 + }, + { + "epoch": 3.57, + "learning_rate": 0.00044451228561131253, + "loss": 0.5082, + "step": 84340 + }, + { + "epoch": 3.57, + "learning_rate": 0.00044446988488202, + "loss": 0.4678, + "step": 84350 + }, + { + "epoch": 3.57, + "learning_rate": 0.00044442748415272746, + "loss": 0.5638, + "step": 84360 + }, + { + "epoch": 3.57, + "learning_rate": 0.0004443850834234349, + "loss": 0.5563, + "step": 84370 + }, + { + "epoch": 3.57, + "learning_rate": 0.00044434268269414234, + "loss": 0.4749, + "step": 84380 + }, + { + "epoch": 3.57, + "learning_rate": 0.00044430028196484986, + "loss": 0.558, + "step": 84390 + }, + { + "epoch": 3.57, + "learning_rate": 0.0004442578812355573, + "loss": 0.5229, + "step": 84400 + }, + { + "epoch": 3.57, + "learning_rate": 0.00044421548050626474, + "loss": 0.679, + "step": 84410 + }, + { + "epoch": 3.57, + "learning_rate": 0.0004441730797769722, + "loss": 0.6506, + "step": 84420 + }, + { + "epoch": 3.57, + "learning_rate": 0.0004441306790476796, + "loss": 0.5528, + "step": 84430 + }, + { + "epoch": 3.57, + "learning_rate": 0.0004440882783183871, + "loss": 0.5434, + "step": 84440 + }, + { + "epoch": 3.57, + "learning_rate": 0.0004440458775890946, + "loss": 0.4858, + "step": 84450 + }, + { + "epoch": 3.57, + "learning_rate": 0.00044400347685980197, + "loss": 0.4966, + "step": 84460 + }, + { + "epoch": 3.57, + "learning_rate": 0.0004439610761305095, + "loss": 0.5439, + "step": 84470 + }, + { + "epoch": 3.57, + "learning_rate": 0.00044391867540121696, + "loss": 0.441, + "step": 84480 + }, + { + "epoch": 3.57, + "learning_rate": 0.00044387627467192437, + "loss": 0.5547, + "step": 84490 + }, + { + "epoch": 3.57, + "learning_rate": 0.00044383387394263184, + "loss": 0.5049, + "step": 84500 + }, + { + "epoch": 3.57, + "learning_rate": 0.00044379147321333925, + "loss": 0.5213, + "step": 84510 + }, + { + "epoch": 3.57, + "learning_rate": 0.0004437490724840467, + "loss": 0.5028, + "step": 84520 + }, + { + "epoch": 3.57, + "learning_rate": 0.00044370667175475424, + "loss": 0.5814, + "step": 84530 + }, + { + "epoch": 3.58, + "learning_rate": 0.0004436642710254616, + "loss": 0.5443, + "step": 84540 + }, + { + "epoch": 3.58, + "learning_rate": 0.0004436218702961691, + "loss": 0.4903, + "step": 84550 + }, + { + "epoch": 3.58, + "learning_rate": 0.0004435794695668766, + "loss": 0.5407, + "step": 84560 + }, + { + "epoch": 3.58, + "learning_rate": 0.000443537068837584, + "loss": 0.4883, + "step": 84570 + }, + { + "epoch": 3.58, + "learning_rate": 0.00044349466810829146, + "loss": 0.5595, + "step": 84580 + }, + { + "epoch": 3.58, + "learning_rate": 0.000443452267378999, + "loss": 0.4469, + "step": 84590 + }, + { + "epoch": 3.58, + "learning_rate": 0.00044340986664970634, + "loss": 0.4961, + "step": 84600 + }, + { + "epoch": 3.58, + "learning_rate": 0.00044336746592041386, + "loss": 0.5048, + "step": 84610 + }, + { + "epoch": 3.58, + "learning_rate": 0.00044332506519112133, + "loss": 0.5406, + "step": 84620 + }, + { + "epoch": 3.58, + "learning_rate": 0.00044328266446182874, + "loss": 0.4807, + "step": 84630 + }, + { + "epoch": 3.58, + "learning_rate": 0.0004432402637325362, + "loss": 0.4999, + "step": 84640 + }, + { + "epoch": 3.58, + "learning_rate": 0.00044319786300324373, + "loss": 0.5355, + "step": 84650 + }, + { + "epoch": 3.58, + "learning_rate": 0.0004431554622739511, + "loss": 0.5461, + "step": 84660 + }, + { + "epoch": 3.58, + "learning_rate": 0.0004431130615446586, + "loss": 0.5225, + "step": 84670 + }, + { + "epoch": 3.58, + "learning_rate": 0.0004430706608153661, + "loss": 0.6021, + "step": 84680 + }, + { + "epoch": 3.58, + "learning_rate": 0.0004430282600860735, + "loss": 0.5528, + "step": 84690 + }, + { + "epoch": 3.58, + "learning_rate": 0.00044298585935678095, + "loss": 0.6018, + "step": 84700 + }, + { + "epoch": 3.58, + "learning_rate": 0.0004429434586274885, + "loss": 0.4394, + "step": 84710 + }, + { + "epoch": 3.58, + "learning_rate": 0.00044290105789819583, + "loss": 0.5551, + "step": 84720 + }, + { + "epoch": 3.58, + "learning_rate": 0.00044285865716890336, + "loss": 0.4662, + "step": 84730 + }, + { + "epoch": 3.58, + "learning_rate": 0.0004428162564396108, + "loss": 0.5123, + "step": 84740 + }, + { + "epoch": 3.58, + "learning_rate": 0.00044277385571031823, + "loss": 0.4977, + "step": 84750 + }, + { + "epoch": 3.58, + "learning_rate": 0.0004427314549810257, + "loss": 0.6125, + "step": 84760 + }, + { + "epoch": 3.58, + "learning_rate": 0.00044268905425173317, + "loss": 0.5126, + "step": 84770 + }, + { + "epoch": 3.59, + "learning_rate": 0.0004426466535224406, + "loss": 0.5547, + "step": 84780 + }, + { + "epoch": 3.59, + "learning_rate": 0.0004426042527931481, + "loss": 0.577, + "step": 84790 + }, + { + "epoch": 3.59, + "learning_rate": 0.00044256185206385546, + "loss": 0.6022, + "step": 84800 + }, + { + "epoch": 3.59, + "learning_rate": 0.000442519451334563, + "loss": 0.4909, + "step": 84810 + }, + { + "epoch": 3.59, + "learning_rate": 0.00044247705060527045, + "loss": 0.5884, + "step": 84820 + }, + { + "epoch": 3.59, + "learning_rate": 0.00044243464987597786, + "loss": 0.5631, + "step": 84830 + }, + { + "epoch": 3.59, + "learning_rate": 0.0004423922491466853, + "loss": 0.4358, + "step": 84840 + }, + { + "epoch": 3.59, + "learning_rate": 0.0004423498484173928, + "loss": 0.4794, + "step": 84850 + }, + { + "epoch": 3.59, + "learning_rate": 0.0004423074476881002, + "loss": 0.6476, + "step": 84860 + }, + { + "epoch": 3.59, + "learning_rate": 0.00044226504695880773, + "loss": 0.414, + "step": 84870 + }, + { + "epoch": 3.59, + "learning_rate": 0.0004422226462295152, + "loss": 0.6074, + "step": 84880 + }, + { + "epoch": 3.59, + "learning_rate": 0.0004421802455002226, + "loss": 0.542, + "step": 84890 + }, + { + "epoch": 3.59, + "learning_rate": 0.0004421378447709301, + "loss": 0.5114, + "step": 84900 + }, + { + "epoch": 3.59, + "learning_rate": 0.00044209544404163754, + "loss": 0.5425, + "step": 84910 + }, + { + "epoch": 3.59, + "learning_rate": 0.00044205304331234495, + "loss": 0.4501, + "step": 84920 + }, + { + "epoch": 3.59, + "learning_rate": 0.0004420106425830525, + "loss": 0.474, + "step": 84930 + }, + { + "epoch": 3.59, + "learning_rate": 0.00044196824185375994, + "loss": 0.4763, + "step": 84940 + }, + { + "epoch": 3.59, + "learning_rate": 0.00044192584112446735, + "loss": 0.5761, + "step": 84950 + }, + { + "epoch": 3.59, + "learning_rate": 0.0004418834403951748, + "loss": 0.6129, + "step": 84960 + }, + { + "epoch": 3.59, + "learning_rate": 0.0004418410396658823, + "loss": 0.581, + "step": 84970 + }, + { + "epoch": 3.59, + "learning_rate": 0.0004417986389365897, + "loss": 0.5368, + "step": 84980 + }, + { + "epoch": 3.59, + "learning_rate": 0.00044175623820729717, + "loss": 0.5618, + "step": 84990 + }, + { + "epoch": 3.59, + "learning_rate": 0.0004417138374780047, + "loss": 0.6195, + "step": 85000 + }, + { + "epoch": 3.59, + "learning_rate": 0.0004416714367487121, + "loss": 0.5983, + "step": 85010 + }, + { + "epoch": 3.6, + "learning_rate": 0.00044162903601941957, + "loss": 0.516, + "step": 85020 + }, + { + "epoch": 3.6, + "learning_rate": 0.00044158663529012703, + "loss": 0.4342, + "step": 85030 + }, + { + "epoch": 3.6, + "learning_rate": 0.00044154423456083445, + "loss": 0.5415, + "step": 85040 + }, + { + "epoch": 3.6, + "learning_rate": 0.0004415018338315419, + "loss": 0.5459, + "step": 85050 + }, + { + "epoch": 3.6, + "learning_rate": 0.00044145943310224943, + "loss": 0.616, + "step": 85060 + }, + { + "epoch": 3.6, + "learning_rate": 0.0004414170323729568, + "loss": 0.4723, + "step": 85070 + }, + { + "epoch": 3.6, + "learning_rate": 0.0004413746316436643, + "loss": 0.4865, + "step": 85080 + }, + { + "epoch": 3.6, + "learning_rate": 0.0004413322309143718, + "loss": 0.4965, + "step": 85090 + }, + { + "epoch": 3.6, + "learning_rate": 0.0004412898301850792, + "loss": 0.497, + "step": 85100 + }, + { + "epoch": 3.6, + "learning_rate": 0.00044124742945578666, + "loss": 0.5738, + "step": 85110 + }, + { + "epoch": 3.6, + "learning_rate": 0.00044120502872649407, + "loss": 0.4881, + "step": 85120 + }, + { + "epoch": 3.6, + "learning_rate": 0.00044116262799720154, + "loss": 0.4178, + "step": 85130 + }, + { + "epoch": 3.6, + "learning_rate": 0.00044112022726790906, + "loss": 0.5749, + "step": 85140 + }, + { + "epoch": 3.6, + "learning_rate": 0.00044107782653861647, + "loss": 0.5208, + "step": 85150 + }, + { + "epoch": 3.6, + "learning_rate": 0.00044103542580932394, + "loss": 0.5684, + "step": 85160 + }, + { + "epoch": 3.6, + "learning_rate": 0.0004409930250800314, + "loss": 0.4905, + "step": 85170 + }, + { + "epoch": 3.6, + "learning_rate": 0.0004409506243507388, + "loss": 0.5708, + "step": 85180 + }, + { + "epoch": 3.6, + "learning_rate": 0.0004409082236214463, + "loss": 0.5222, + "step": 85190 + }, + { + "epoch": 3.6, + "learning_rate": 0.0004408658228921538, + "loss": 0.5718, + "step": 85200 + }, + { + "epoch": 3.6, + "learning_rate": 0.00044082342216286116, + "loss": 0.5746, + "step": 85210 + }, + { + "epoch": 3.6, + "learning_rate": 0.0004407810214335687, + "loss": 0.5455, + "step": 85220 + }, + { + "epoch": 3.6, + "learning_rate": 0.00044073862070427615, + "loss": 0.5344, + "step": 85230 + }, + { + "epoch": 3.6, + "learning_rate": 0.00044069621997498356, + "loss": 0.5264, + "step": 85240 + }, + { + "epoch": 3.61, + "learning_rate": 0.00044065381924569103, + "loss": 0.4885, + "step": 85250 + }, + { + "epoch": 3.61, + "learning_rate": 0.00044061141851639855, + "loss": 0.4787, + "step": 85260 + }, + { + "epoch": 3.61, + "learning_rate": 0.0004405690177871059, + "loss": 0.4727, + "step": 85270 + }, + { + "epoch": 3.61, + "learning_rate": 0.00044052661705781343, + "loss": 0.5517, + "step": 85280 + }, + { + "epoch": 3.61, + "learning_rate": 0.0004404842163285209, + "loss": 0.4539, + "step": 85290 + }, + { + "epoch": 3.61, + "learning_rate": 0.0004404418155992283, + "loss": 0.5866, + "step": 85300 + }, + { + "epoch": 3.61, + "learning_rate": 0.0004403994148699358, + "loss": 0.5412, + "step": 85310 + }, + { + "epoch": 3.61, + "learning_rate": 0.0004403570141406433, + "loss": 0.5663, + "step": 85320 + }, + { + "epoch": 3.61, + "learning_rate": 0.00044031461341135066, + "loss": 0.5476, + "step": 85330 + }, + { + "epoch": 3.61, + "learning_rate": 0.0004402722126820582, + "loss": 0.4708, + "step": 85340 + }, + { + "epoch": 3.61, + "learning_rate": 0.00044022981195276565, + "loss": 0.5706, + "step": 85350 + }, + { + "epoch": 3.61, + "learning_rate": 0.00044018741122347306, + "loss": 0.4997, + "step": 85360 + }, + { + "epoch": 3.61, + "learning_rate": 0.0004401450104941805, + "loss": 0.5027, + "step": 85370 + }, + { + "epoch": 3.61, + "learning_rate": 0.000440102609764888, + "loss": 0.5442, + "step": 85380 + }, + { + "epoch": 3.61, + "learning_rate": 0.0004400602090355954, + "loss": 0.604, + "step": 85390 + }, + { + "epoch": 3.61, + "learning_rate": 0.0004400178083063029, + "loss": 0.5525, + "step": 85400 + }, + { + "epoch": 3.61, + "learning_rate": 0.0004399754075770104, + "loss": 0.6223, + "step": 85410 + }, + { + "epoch": 3.61, + "learning_rate": 0.0004399330068477178, + "loss": 0.4008, + "step": 85420 + }, + { + "epoch": 3.61, + "learning_rate": 0.00043989060611842527, + "loss": 0.4679, + "step": 85430 + }, + { + "epoch": 3.61, + "learning_rate": 0.0004398482053891327, + "loss": 0.5575, + "step": 85440 + }, + { + "epoch": 3.61, + "learning_rate": 0.00043980580465984015, + "loss": 0.4792, + "step": 85450 + }, + { + "epoch": 3.61, + "learning_rate": 0.00043976340393054767, + "loss": 0.4965, + "step": 85460 + }, + { + "epoch": 3.61, + "learning_rate": 0.00043972100320125503, + "loss": 0.5742, + "step": 85470 + }, + { + "epoch": 3.61, + "learning_rate": 0.00043967860247196255, + "loss": 0.504, + "step": 85480 + }, + { + "epoch": 3.62, + "learning_rate": 0.00043963620174267, + "loss": 0.5266, + "step": 85490 + }, + { + "epoch": 3.62, + "learning_rate": 0.00043959380101337743, + "loss": 0.5285, + "step": 85500 + }, + { + "epoch": 3.62, + "learning_rate": 0.0004395514002840849, + "loss": 0.495, + "step": 85510 + }, + { + "epoch": 3.62, + "learning_rate": 0.00043950899955479236, + "loss": 0.5104, + "step": 85520 + }, + { + "epoch": 3.62, + "learning_rate": 0.0004394665988254998, + "loss": 0.5056, + "step": 85530 + }, + { + "epoch": 3.62, + "learning_rate": 0.0004394241980962073, + "loss": 0.5206, + "step": 85540 + }, + { + "epoch": 3.62, + "learning_rate": 0.00043938179736691476, + "loss": 0.4619, + "step": 85550 + }, + { + "epoch": 3.62, + "learning_rate": 0.0004393393966376222, + "loss": 0.5217, + "step": 85560 + }, + { + "epoch": 3.62, + "learning_rate": 0.00043929699590832964, + "loss": 0.535, + "step": 85570 + }, + { + "epoch": 3.62, + "learning_rate": 0.0004392545951790371, + "loss": 0.5163, + "step": 85580 + }, + { + "epoch": 3.62, + "learning_rate": 0.0004392121944497445, + "loss": 0.5574, + "step": 85590 + }, + { + "epoch": 3.62, + "learning_rate": 0.000439169793720452, + "loss": 0.5208, + "step": 85600 + }, + { + "epoch": 3.62, + "learning_rate": 0.0004391273929911595, + "loss": 0.5403, + "step": 85610 + }, + { + "epoch": 3.62, + "learning_rate": 0.0004390849922618669, + "loss": 0.5184, + "step": 85620 + }, + { + "epoch": 3.62, + "learning_rate": 0.0004390425915325744, + "loss": 0.5457, + "step": 85630 + }, + { + "epoch": 3.62, + "learning_rate": 0.00043900019080328186, + "loss": 0.5083, + "step": 85640 + }, + { + "epoch": 3.62, + "learning_rate": 0.00043895779007398927, + "loss": 0.6399, + "step": 85650 + }, + { + "epoch": 3.62, + "learning_rate": 0.00043891538934469674, + "loss": 0.5405, + "step": 85660 + }, + { + "epoch": 3.62, + "learning_rate": 0.00043887298861540426, + "loss": 0.5363, + "step": 85670 + }, + { + "epoch": 3.62, + "learning_rate": 0.00043883058788611167, + "loss": 0.5443, + "step": 85680 + }, + { + "epoch": 3.62, + "learning_rate": 0.00043878818715681914, + "loss": 0.5858, + "step": 85690 + }, + { + "epoch": 3.62, + "learning_rate": 0.0004387457864275266, + "loss": 0.4625, + "step": 85700 + }, + { + "epoch": 3.62, + "learning_rate": 0.000438703385698234, + "loss": 0.5875, + "step": 85710 + }, + { + "epoch": 3.62, + "learning_rate": 0.0004386609849689415, + "loss": 0.5467, + "step": 85720 + }, + { + "epoch": 3.63, + "learning_rate": 0.0004386185842396489, + "loss": 0.5551, + "step": 85730 + }, + { + "epoch": 3.63, + "learning_rate": 0.00043857618351035636, + "loss": 0.4893, + "step": 85740 + }, + { + "epoch": 3.63, + "learning_rate": 0.0004385337827810639, + "loss": 0.5175, + "step": 85750 + }, + { + "epoch": 3.63, + "learning_rate": 0.0004384913820517713, + "loss": 0.5652, + "step": 85760 + }, + { + "epoch": 3.63, + "learning_rate": 0.00043844898132247876, + "loss": 0.4485, + "step": 85770 + }, + { + "epoch": 3.63, + "learning_rate": 0.00043840658059318623, + "loss": 0.5182, + "step": 85780 + }, + { + "epoch": 3.63, + "learning_rate": 0.00043836417986389364, + "loss": 0.6477, + "step": 85790 + }, + { + "epoch": 3.63, + "learning_rate": 0.0004383217791346011, + "loss": 0.5421, + "step": 85800 + }, + { + "epoch": 3.63, + "learning_rate": 0.00043827937840530863, + "loss": 0.5363, + "step": 85810 + }, + { + "epoch": 3.63, + "learning_rate": 0.000438236977676016, + "loss": 0.5127, + "step": 85820 + }, + { + "epoch": 3.63, + "learning_rate": 0.0004381945769467235, + "loss": 0.4745, + "step": 85830 + }, + { + "epoch": 3.63, + "learning_rate": 0.000438152176217431, + "loss": 0.5126, + "step": 85840 + }, + { + "epoch": 3.63, + "learning_rate": 0.0004381097754881384, + "loss": 0.5022, + "step": 85850 + }, + { + "epoch": 3.63, + "learning_rate": 0.00043806737475884586, + "loss": 0.5648, + "step": 85860 + }, + { + "epoch": 3.63, + "learning_rate": 0.0004380249740295534, + "loss": 0.567, + "step": 85870 + }, + { + "epoch": 3.63, + "learning_rate": 0.00043798257330026073, + "loss": 0.5832, + "step": 85880 + }, + { + "epoch": 3.63, + "learning_rate": 0.00043794017257096826, + "loss": 0.5353, + "step": 85890 + }, + { + "epoch": 3.63, + "learning_rate": 0.0004378977718416757, + "loss": 0.512, + "step": 85900 + }, + { + "epoch": 3.63, + "learning_rate": 0.00043785537111238314, + "loss": 0.5716, + "step": 85910 + }, + { + "epoch": 3.63, + "learning_rate": 0.0004378129703830906, + "loss": 0.5152, + "step": 85920 + }, + { + "epoch": 3.63, + "learning_rate": 0.0004377705696537981, + "loss": 0.5015, + "step": 85930 + }, + { + "epoch": 3.63, + "learning_rate": 0.0004377281689245055, + "loss": 0.4709, + "step": 85940 + }, + { + "epoch": 3.63, + "learning_rate": 0.000437685768195213, + "loss": 0.4795, + "step": 85950 + }, + { + "epoch": 3.64, + "learning_rate": 0.00043764336746592047, + "loss": 0.5041, + "step": 85960 + }, + { + "epoch": 3.64, + "learning_rate": 0.0004376009667366279, + "loss": 0.4605, + "step": 85970 + }, + { + "epoch": 3.64, + "learning_rate": 0.00043755856600733535, + "loss": 0.5105, + "step": 85980 + }, + { + "epoch": 3.64, + "learning_rate": 0.00043751616527804287, + "loss": 0.5354, + "step": 85990 + }, + { + "epoch": 3.64, + "learning_rate": 0.00043747376454875023, + "loss": 0.5571, + "step": 86000 + }, + { + "epoch": 3.64, + "learning_rate": 0.00043743136381945775, + "loss": 0.5551, + "step": 86010 + }, + { + "epoch": 3.64, + "learning_rate": 0.0004373889630901652, + "loss": 0.4443, + "step": 86020 + }, + { + "epoch": 3.64, + "learning_rate": 0.00043734656236087263, + "loss": 0.5295, + "step": 86030 + }, + { + "epoch": 3.64, + "learning_rate": 0.0004373041616315801, + "loss": 0.5739, + "step": 86040 + }, + { + "epoch": 3.64, + "learning_rate": 0.0004372617609022875, + "loss": 0.5418, + "step": 86050 + }, + { + "epoch": 3.64, + "learning_rate": 0.000437219360172995, + "loss": 0.4685, + "step": 86060 + }, + { + "epoch": 3.64, + "learning_rate": 0.0004371769594437025, + "loss": 0.4347, + "step": 86070 + }, + { + "epoch": 3.64, + "learning_rate": 0.00043713455871440985, + "loss": 0.528, + "step": 86080 + }, + { + "epoch": 3.64, + "learning_rate": 0.0004370921579851174, + "loss": 0.5047, + "step": 86090 + }, + { + "epoch": 3.64, + "learning_rate": 0.00043704975725582484, + "loss": 0.5114, + "step": 86100 + }, + { + "epoch": 3.64, + "learning_rate": 0.00043700735652653225, + "loss": 0.5152, + "step": 86110 + }, + { + "epoch": 3.64, + "learning_rate": 0.0004369649557972397, + "loss": 0.6092, + "step": 86120 + }, + { + "epoch": 3.64, + "learning_rate": 0.0004369225550679472, + "loss": 0.5384, + "step": 86130 + }, + { + "epoch": 3.64, + "learning_rate": 0.0004368801543386546, + "loss": 0.5526, + "step": 86140 + }, + { + "epoch": 3.64, + "learning_rate": 0.0004368377536093621, + "loss": 0.5762, + "step": 86150 + }, + { + "epoch": 3.64, + "learning_rate": 0.0004367953528800696, + "loss": 0.5755, + "step": 86160 + }, + { + "epoch": 3.64, + "learning_rate": 0.000436752952150777, + "loss": 0.4948, + "step": 86170 + }, + { + "epoch": 3.64, + "learning_rate": 0.00043671055142148447, + "loss": 0.6771, + "step": 86180 + }, + { + "epoch": 3.64, + "learning_rate": 0.00043666815069219193, + "loss": 0.5445, + "step": 86190 + }, + { + "epoch": 3.65, + "learning_rate": 0.00043662574996289935, + "loss": 0.5199, + "step": 86200 + }, + { + "epoch": 3.65, + "learning_rate": 0.00043658334923360687, + "loss": 0.5837, + "step": 86210 + }, + { + "epoch": 3.65, + "learning_rate": 0.00043654094850431433, + "loss": 0.5741, + "step": 86220 + }, + { + "epoch": 3.65, + "learning_rate": 0.00043649854777502175, + "loss": 0.5084, + "step": 86230 + }, + { + "epoch": 3.65, + "learning_rate": 0.0004364561470457292, + "loss": 0.568, + "step": 86240 + }, + { + "epoch": 3.65, + "learning_rate": 0.0004364137463164367, + "loss": 0.5, + "step": 86250 + }, + { + "epoch": 3.65, + "learning_rate": 0.0004363713455871441, + "loss": 0.5629, + "step": 86260 + }, + { + "epoch": 3.65, + "learning_rate": 0.00043632894485785156, + "loss": 0.534, + "step": 86270 + }, + { + "epoch": 3.65, + "learning_rate": 0.0004362865441285591, + "loss": 0.4145, + "step": 86280 + }, + { + "epoch": 3.65, + "learning_rate": 0.0004362441433992665, + "loss": 0.4546, + "step": 86290 + }, + { + "epoch": 3.65, + "learning_rate": 0.00043620174266997396, + "loss": 0.5297, + "step": 86300 + }, + { + "epoch": 3.65, + "learning_rate": 0.00043615934194068143, + "loss": 0.5635, + "step": 86310 + }, + { + "epoch": 3.65, + "learning_rate": 0.00043611694121138884, + "loss": 0.6177, + "step": 86320 + }, + { + "epoch": 3.65, + "learning_rate": 0.0004360745404820963, + "loss": 0.536, + "step": 86330 + }, + { + "epoch": 3.65, + "learning_rate": 0.0004360321397528037, + "loss": 0.5687, + "step": 86340 + }, + { + "epoch": 3.65, + "learning_rate": 0.0004359897390235112, + "loss": 0.4659, + "step": 86350 + }, + { + "epoch": 3.65, + "learning_rate": 0.0004359473382942187, + "loss": 0.4922, + "step": 86360 + }, + { + "epoch": 3.65, + "learning_rate": 0.0004359049375649261, + "loss": 0.5146, + "step": 86370 + }, + { + "epoch": 3.65, + "learning_rate": 0.0004358625368356336, + "loss": 0.5641, + "step": 86380 + }, + { + "epoch": 3.65, + "learning_rate": 0.00043582013610634105, + "loss": 0.6432, + "step": 86390 + }, + { + "epoch": 3.65, + "learning_rate": 0.00043577773537704847, + "loss": 0.5069, + "step": 86400 + }, + { + "epoch": 3.65, + "learning_rate": 0.00043573533464775593, + "loss": 0.4869, + "step": 86410 + }, + { + "epoch": 3.65, + "learning_rate": 0.00043569293391846345, + "loss": 0.62, + "step": 86420 + }, + { + "epoch": 3.66, + "learning_rate": 0.00043565053318917087, + "loss": 0.5567, + "step": 86430 + }, + { + "epoch": 3.66, + "learning_rate": 0.00043560813245987833, + "loss": 0.5288, + "step": 86440 + }, + { + "epoch": 3.66, + "learning_rate": 0.0004355657317305858, + "loss": 0.5456, + "step": 86450 + }, + { + "epoch": 3.66, + "learning_rate": 0.0004355233310012932, + "loss": 0.553, + "step": 86460 + }, + { + "epoch": 3.66, + "learning_rate": 0.0004354809302720007, + "loss": 0.4192, + "step": 86470 + }, + { + "epoch": 3.66, + "learning_rate": 0.0004354385295427082, + "loss": 0.5029, + "step": 86480 + }, + { + "epoch": 3.66, + "learning_rate": 0.00043539612881341556, + "loss": 0.6469, + "step": 86490 + }, + { + "epoch": 3.66, + "learning_rate": 0.0004353537280841231, + "loss": 0.5495, + "step": 86500 + }, + { + "epoch": 3.66, + "learning_rate": 0.00043531132735483055, + "loss": 0.5194, + "step": 86510 + }, + { + "epoch": 3.66, + "learning_rate": 0.00043526892662553796, + "loss": 0.527, + "step": 86520 + }, + { + "epoch": 3.66, + "learning_rate": 0.0004352265258962454, + "loss": 0.4841, + "step": 86530 + }, + { + "epoch": 3.66, + "learning_rate": 0.00043518412516695295, + "loss": 0.6061, + "step": 86540 + }, + { + "epoch": 3.66, + "learning_rate": 0.0004351417244376603, + "loss": 0.5062, + "step": 86550 + }, + { + "epoch": 3.66, + "learning_rate": 0.0004350993237083678, + "loss": 0.5021, + "step": 86560 + }, + { + "epoch": 3.66, + "learning_rate": 0.0004350569229790753, + "loss": 0.5191, + "step": 86570 + }, + { + "epoch": 3.66, + "learning_rate": 0.0004350145222497827, + "loss": 0.6089, + "step": 86580 + }, + { + "epoch": 3.66, + "learning_rate": 0.00043497212152049017, + "loss": 0.5568, + "step": 86590 + }, + { + "epoch": 3.66, + "learning_rate": 0.0004349297207911977, + "loss": 0.5194, + "step": 86600 + }, + { + "epoch": 3.66, + "learning_rate": 0.00043488732006190505, + "loss": 0.5036, + "step": 86610 + }, + { + "epoch": 3.66, + "learning_rate": 0.00043484491933261257, + "loss": 0.4623, + "step": 86620 + }, + { + "epoch": 3.66, + "learning_rate": 0.00043480251860332004, + "loss": 0.5337, + "step": 86630 + }, + { + "epoch": 3.66, + "learning_rate": 0.00043476011787402745, + "loss": 0.4612, + "step": 86640 + }, + { + "epoch": 3.66, + "learning_rate": 0.0004347177171447349, + "loss": 0.5264, + "step": 86650 + }, + { + "epoch": 3.66, + "learning_rate": 0.00043467531641544233, + "loss": 0.4986, + "step": 86660 + }, + { + "epoch": 3.67, + "learning_rate": 0.0004346329156861498, + "loss": 0.5437, + "step": 86670 + }, + { + "epoch": 3.67, + "learning_rate": 0.0004345905149568573, + "loss": 0.5328, + "step": 86680 + }, + { + "epoch": 3.67, + "learning_rate": 0.0004345481142275647, + "loss": 0.4962, + "step": 86690 + }, + { + "epoch": 3.67, + "learning_rate": 0.0004345057134982722, + "loss": 0.51, + "step": 86700 + }, + { + "epoch": 3.67, + "learning_rate": 0.00043446331276897967, + "loss": 0.5283, + "step": 86710 + }, + { + "epoch": 3.67, + "learning_rate": 0.0004344209120396871, + "loss": 0.5473, + "step": 86720 + }, + { + "epoch": 3.67, + "learning_rate": 0.00043437851131039454, + "loss": 0.6265, + "step": 86730 + }, + { + "epoch": 3.67, + "learning_rate": 0.00043433611058110207, + "loss": 0.5634, + "step": 86740 + }, + { + "epoch": 3.67, + "learning_rate": 0.0004342937098518094, + "loss": 0.5212, + "step": 86750 + }, + { + "epoch": 3.67, + "learning_rate": 0.00043425130912251694, + "loss": 0.4861, + "step": 86760 + }, + { + "epoch": 3.67, + "learning_rate": 0.0004342089083932244, + "loss": 0.5387, + "step": 86770 + }, + { + "epoch": 3.67, + "learning_rate": 0.0004341665076639318, + "loss": 0.493, + "step": 86780 + }, + { + "epoch": 3.67, + "learning_rate": 0.0004341241069346393, + "loss": 0.5704, + "step": 86790 + }, + { + "epoch": 3.67, + "learning_rate": 0.00043408170620534676, + "loss": 0.4432, + "step": 86800 + }, + { + "epoch": 3.67, + "learning_rate": 0.00043403930547605417, + "loss": 0.5499, + "step": 86810 + }, + { + "epoch": 3.67, + "learning_rate": 0.0004339969047467617, + "loss": 0.5931, + "step": 86820 + }, + { + "epoch": 3.67, + "learning_rate": 0.00043395450401746916, + "loss": 0.589, + "step": 86830 + }, + { + "epoch": 3.67, + "learning_rate": 0.00043391210328817657, + "loss": 0.5723, + "step": 86840 + }, + { + "epoch": 3.67, + "learning_rate": 0.00043386970255888404, + "loss": 0.5657, + "step": 86850 + }, + { + "epoch": 3.67, + "learning_rate": 0.0004338273018295915, + "loss": 0.5261, + "step": 86860 + }, + { + "epoch": 3.67, + "learning_rate": 0.0004337849011002989, + "loss": 0.4944, + "step": 86870 + }, + { + "epoch": 3.67, + "learning_rate": 0.0004337425003710064, + "loss": 0.4734, + "step": 86880 + }, + { + "epoch": 3.67, + "learning_rate": 0.0004337000996417139, + "loss": 0.5, + "step": 86890 + }, + { + "epoch": 3.67, + "learning_rate": 0.0004336576989124213, + "loss": 0.5582, + "step": 86900 + }, + { + "epoch": 3.68, + "learning_rate": 0.0004336152981831288, + "loss": 0.463, + "step": 86910 + }, + { + "epoch": 3.68, + "learning_rate": 0.00043357289745383625, + "loss": 0.5304, + "step": 86920 + }, + { + "epoch": 3.68, + "learning_rate": 0.00043353049672454366, + "loss": 0.4997, + "step": 86930 + }, + { + "epoch": 3.68, + "learning_rate": 0.00043348809599525113, + "loss": 0.4673, + "step": 86940 + }, + { + "epoch": 3.68, + "learning_rate": 0.00043344569526595854, + "loss": 0.6519, + "step": 86950 + }, + { + "epoch": 3.68, + "learning_rate": 0.00043340329453666606, + "loss": 0.5253, + "step": 86960 + }, + { + "epoch": 3.68, + "learning_rate": 0.00043336089380737353, + "loss": 0.636, + "step": 86970 + }, + { + "epoch": 3.68, + "learning_rate": 0.00043331849307808094, + "loss": 0.4972, + "step": 86980 + }, + { + "epoch": 3.68, + "learning_rate": 0.0004332760923487884, + "loss": 0.6198, + "step": 86990 + }, + { + "epoch": 3.68, + "learning_rate": 0.0004332336916194959, + "loss": 0.469, + "step": 87000 + }, + { + "epoch": 3.68, + "learning_rate": 0.0004331912908902033, + "loss": 0.5797, + "step": 87010 + }, + { + "epoch": 3.68, + "learning_rate": 0.00043314889016091076, + "loss": 0.6235, + "step": 87020 + }, + { + "epoch": 3.68, + "learning_rate": 0.0004331064894316183, + "loss": 0.5785, + "step": 87030 + }, + { + "epoch": 3.68, + "learning_rate": 0.0004330640887023257, + "loss": 0.4992, + "step": 87040 + }, + { + "epoch": 3.68, + "learning_rate": 0.00043302168797303316, + "loss": 0.5376, + "step": 87050 + }, + { + "epoch": 3.68, + "learning_rate": 0.0004329792872437406, + "loss": 0.4156, + "step": 87060 + }, + { + "epoch": 3.68, + "learning_rate": 0.00043293688651444804, + "loss": 0.4991, + "step": 87070 + }, + { + "epoch": 3.68, + "learning_rate": 0.0004328944857851555, + "loss": 0.5344, + "step": 87080 + }, + { + "epoch": 3.68, + "learning_rate": 0.000432852085055863, + "loss": 0.5204, + "step": 87090 + }, + { + "epoch": 3.68, + "learning_rate": 0.0004328096843265704, + "loss": 0.6039, + "step": 87100 + }, + { + "epoch": 3.68, + "learning_rate": 0.0004327672835972779, + "loss": 0.5539, + "step": 87110 + }, + { + "epoch": 3.68, + "learning_rate": 0.00043272488286798537, + "loss": 0.5106, + "step": 87120 + }, + { + "epoch": 3.68, + "learning_rate": 0.0004326824821386928, + "loss": 0.5626, + "step": 87130 + }, + { + "epoch": 3.69, + "learning_rate": 0.00043264008140940025, + "loss": 0.5268, + "step": 87140 + }, + { + "epoch": 3.69, + "learning_rate": 0.00043259768068010777, + "loss": 0.5176, + "step": 87150 + }, + { + "epoch": 3.69, + "learning_rate": 0.00043255527995081513, + "loss": 0.4934, + "step": 87160 + }, + { + "epoch": 3.69, + "learning_rate": 0.00043251287922152265, + "loss": 0.5697, + "step": 87170 + }, + { + "epoch": 3.69, + "learning_rate": 0.0004324704784922301, + "loss": 0.5499, + "step": 87180 + }, + { + "epoch": 3.69, + "learning_rate": 0.00043242807776293753, + "loss": 0.5673, + "step": 87190 + }, + { + "epoch": 3.69, + "learning_rate": 0.000432385677033645, + "loss": 0.532, + "step": 87200 + }, + { + "epoch": 3.69, + "learning_rate": 0.0004323432763043525, + "loss": 0.4873, + "step": 87210 + }, + { + "epoch": 3.69, + "learning_rate": 0.0004323008755750599, + "loss": 0.5136, + "step": 87220 + }, + { + "epoch": 3.69, + "learning_rate": 0.0004322584748457674, + "loss": 0.4783, + "step": 87230 + }, + { + "epoch": 3.69, + "learning_rate": 0.00043221607411647486, + "loss": 0.5454, + "step": 87240 + }, + { + "epoch": 3.69, + "learning_rate": 0.0004321736733871823, + "loss": 0.4978, + "step": 87250 + }, + { + "epoch": 3.69, + "learning_rate": 0.00043213127265788974, + "loss": 0.4931, + "step": 87260 + }, + { + "epoch": 3.69, + "learning_rate": 0.00043208887192859715, + "loss": 0.5452, + "step": 87270 + }, + { + "epoch": 3.69, + "learning_rate": 0.0004320464711993046, + "loss": 0.4454, + "step": 87280 + }, + { + "epoch": 3.69, + "learning_rate": 0.00043200407047001214, + "loss": 0.4802, + "step": 87290 + }, + { + "epoch": 3.69, + "learning_rate": 0.0004319616697407195, + "loss": 0.5758, + "step": 87300 + }, + { + "epoch": 3.69, + "learning_rate": 0.000431919269011427, + "loss": 0.443, + "step": 87310 + }, + { + "epoch": 3.69, + "learning_rate": 0.0004318768682821345, + "loss": 0.5306, + "step": 87320 + }, + { + "epoch": 3.69, + "learning_rate": 0.0004318344675528419, + "loss": 0.4744, + "step": 87330 + }, + { + "epoch": 3.69, + "learning_rate": 0.00043179206682354937, + "loss": 0.5922, + "step": 87340 + }, + { + "epoch": 3.69, + "learning_rate": 0.0004317496660942569, + "loss": 0.5659, + "step": 87350 + }, + { + "epoch": 3.69, + "learning_rate": 0.00043170726536496425, + "loss": 0.5585, + "step": 87360 + }, + { + "epoch": 3.69, + "learning_rate": 0.00043166486463567177, + "loss": 0.7003, + "step": 87370 + }, + { + "epoch": 3.7, + "learning_rate": 0.00043162246390637924, + "loss": 0.5822, + "step": 87380 + }, + { + "epoch": 3.7, + "learning_rate": 0.00043158006317708665, + "loss": 0.5574, + "step": 87390 + }, + { + "epoch": 3.7, + "learning_rate": 0.0004315376624477941, + "loss": 0.5228, + "step": 87400 + }, + { + "epoch": 3.7, + "learning_rate": 0.00043149526171850164, + "loss": 0.4817, + "step": 87410 + }, + { + "epoch": 3.7, + "learning_rate": 0.000431452860989209, + "loss": 0.5107, + "step": 87420 + }, + { + "epoch": 3.7, + "learning_rate": 0.0004314104602599165, + "loss": 0.4782, + "step": 87430 + }, + { + "epoch": 3.7, + "learning_rate": 0.000431368059530624, + "loss": 0.4693, + "step": 87440 + }, + { + "epoch": 3.7, + "learning_rate": 0.0004313256588013314, + "loss": 0.6263, + "step": 87450 + }, + { + "epoch": 3.7, + "learning_rate": 0.00043128325807203886, + "loss": 0.4928, + "step": 87460 + }, + { + "epoch": 3.7, + "learning_rate": 0.00043124085734274633, + "loss": 0.5142, + "step": 87470 + }, + { + "epoch": 3.7, + "learning_rate": 0.00043119845661345374, + "loss": 0.5517, + "step": 87480 + }, + { + "epoch": 3.7, + "learning_rate": 0.00043115605588416126, + "loss": 0.5393, + "step": 87490 + }, + { + "epoch": 3.7, + "learning_rate": 0.00043111365515486873, + "loss": 0.5983, + "step": 87500 + }, + { + "epoch": 3.7, + "learning_rate": 0.00043107125442557614, + "loss": 0.5266, + "step": 87510 + }, + { + "epoch": 3.7, + "learning_rate": 0.0004310288536962836, + "loss": 0.5685, + "step": 87520 + }, + { + "epoch": 3.7, + "learning_rate": 0.0004309864529669911, + "loss": 0.5181, + "step": 87530 + }, + { + "epoch": 3.7, + "learning_rate": 0.0004309440522376985, + "loss": 0.4949, + "step": 87540 + }, + { + "epoch": 3.7, + "learning_rate": 0.00043090165150840595, + "loss": 0.4883, + "step": 87550 + }, + { + "epoch": 3.7, + "learning_rate": 0.00043085925077911337, + "loss": 0.4847, + "step": 87560 + }, + { + "epoch": 3.7, + "learning_rate": 0.0004308168500498209, + "loss": 0.5835, + "step": 87570 + }, + { + "epoch": 3.7, + "learning_rate": 0.00043077444932052835, + "loss": 0.5219, + "step": 87580 + }, + { + "epoch": 3.7, + "learning_rate": 0.00043073204859123577, + "loss": 0.5542, + "step": 87590 + }, + { + "epoch": 3.7, + "learning_rate": 0.00043068964786194323, + "loss": 0.6945, + "step": 87600 + }, + { + "epoch": 3.7, + "learning_rate": 0.0004306472471326507, + "loss": 0.5206, + "step": 87610 + }, + { + "epoch": 3.71, + "learning_rate": 0.0004306048464033581, + "loss": 0.5347, + "step": 87620 + }, + { + "epoch": 3.71, + "learning_rate": 0.0004305624456740656, + "loss": 0.543, + "step": 87630 + }, + { + "epoch": 3.71, + "learning_rate": 0.0004305200449447731, + "loss": 0.576, + "step": 87640 + }, + { + "epoch": 3.71, + "learning_rate": 0.0004304776442154805, + "loss": 0.5513, + "step": 87650 + }, + { + "epoch": 3.71, + "learning_rate": 0.000430435243486188, + "loss": 0.5384, + "step": 87660 + }, + { + "epoch": 3.71, + "learning_rate": 0.00043039284275689545, + "loss": 0.5489, + "step": 87670 + }, + { + "epoch": 3.71, + "learning_rate": 0.00043035044202760286, + "loss": 0.4677, + "step": 87680 + }, + { + "epoch": 3.71, + "learning_rate": 0.0004303080412983103, + "loss": 0.5045, + "step": 87690 + }, + { + "epoch": 3.71, + "learning_rate": 0.00043026564056901785, + "loss": 0.5755, + "step": 87700 + }, + { + "epoch": 3.71, + "learning_rate": 0.00043022323983972526, + "loss": 0.4964, + "step": 87710 + }, + { + "epoch": 3.71, + "learning_rate": 0.0004301808391104327, + "loss": 0.5189, + "step": 87720 + }, + { + "epoch": 3.71, + "learning_rate": 0.0004301384383811402, + "loss": 0.4733, + "step": 87730 + }, + { + "epoch": 3.71, + "learning_rate": 0.0004300960376518476, + "loss": 0.5589, + "step": 87740 + }, + { + "epoch": 3.71, + "learning_rate": 0.00043005363692255507, + "loss": 0.6515, + "step": 87750 + }, + { + "epoch": 3.71, + "learning_rate": 0.0004300112361932626, + "loss": 0.587, + "step": 87760 + }, + { + "epoch": 3.71, + "learning_rate": 0.00042996883546396995, + "loss": 0.5701, + "step": 87770 + }, + { + "epoch": 3.71, + "learning_rate": 0.0004299264347346775, + "loss": 0.6282, + "step": 87780 + }, + { + "epoch": 3.71, + "learning_rate": 0.00042988403400538494, + "loss": 0.5699, + "step": 87790 + }, + { + "epoch": 3.71, + "learning_rate": 0.00042984163327609235, + "loss": 0.5541, + "step": 87800 + }, + { + "epoch": 3.71, + "learning_rate": 0.0004297992325467998, + "loss": 0.5345, + "step": 87810 + }, + { + "epoch": 3.71, + "learning_rate": 0.00042975683181750734, + "loss": 0.4805, + "step": 87820 + }, + { + "epoch": 3.71, + "learning_rate": 0.0004297144310882147, + "loss": 0.5942, + "step": 87830 + }, + { + "epoch": 3.71, + "learning_rate": 0.0004296720303589222, + "loss": 0.56, + "step": 87840 + }, + { + "epoch": 3.72, + "learning_rate": 0.0004296296296296297, + "loss": 0.6051, + "step": 87850 + }, + { + "epoch": 3.72, + "learning_rate": 0.0004295872289003371, + "loss": 0.5569, + "step": 87860 + }, + { + "epoch": 3.72, + "learning_rate": 0.00042954482817104457, + "loss": 0.4933, + "step": 87870 + }, + { + "epoch": 3.72, + "learning_rate": 0.000429502427441752, + "loss": 0.5274, + "step": 87880 + }, + { + "epoch": 3.72, + "learning_rate": 0.00042946002671245945, + "loss": 0.5127, + "step": 87890 + }, + { + "epoch": 3.72, + "learning_rate": 0.00042941762598316697, + "loss": 0.5668, + "step": 87900 + }, + { + "epoch": 3.72, + "learning_rate": 0.0004293752252538743, + "loss": 0.532, + "step": 87910 + }, + { + "epoch": 3.72, + "learning_rate": 0.00042933282452458185, + "loss": 0.525, + "step": 87920 + }, + { + "epoch": 3.72, + "learning_rate": 0.0004292904237952893, + "loss": 0.5724, + "step": 87930 + }, + { + "epoch": 3.72, + "learning_rate": 0.0004292480230659967, + "loss": 0.6056, + "step": 87940 + }, + { + "epoch": 3.72, + "learning_rate": 0.0004292056223367042, + "loss": 0.4738, + "step": 87950 + }, + { + "epoch": 3.72, + "learning_rate": 0.0004291632216074117, + "loss": 0.5088, + "step": 87960 + }, + { + "epoch": 3.72, + "learning_rate": 0.00042912082087811907, + "loss": 0.5548, + "step": 87970 + }, + { + "epoch": 3.72, + "learning_rate": 0.0004290784201488266, + "loss": 0.5236, + "step": 87980 + }, + { + "epoch": 3.72, + "learning_rate": 0.00042903601941953406, + "loss": 0.5458, + "step": 87990 + }, + { + "epoch": 3.72, + "learning_rate": 0.00042899361869024147, + "loss": 0.6178, + "step": 88000 + }, + { + "epoch": 3.72, + "learning_rate": 0.00042895121796094894, + "loss": 0.4888, + "step": 88010 + }, + { + "epoch": 3.72, + "learning_rate": 0.00042890881723165646, + "loss": 0.4738, + "step": 88020 + }, + { + "epoch": 3.72, + "learning_rate": 0.0004288664165023638, + "loss": 0.4946, + "step": 88030 + }, + { + "epoch": 3.72, + "learning_rate": 0.00042882401577307134, + "loss": 0.5707, + "step": 88040 + }, + { + "epoch": 3.72, + "learning_rate": 0.0004287816150437788, + "loss": 0.5212, + "step": 88050 + }, + { + "epoch": 3.72, + "learning_rate": 0.0004287392143144862, + "loss": 0.5221, + "step": 88060 + }, + { + "epoch": 3.72, + "learning_rate": 0.0004286968135851937, + "loss": 0.4305, + "step": 88070 + }, + { + "epoch": 3.72, + "learning_rate": 0.00042865441285590115, + "loss": 0.5432, + "step": 88080 + }, + { + "epoch": 3.73, + "learning_rate": 0.00042861201212660856, + "loss": 0.4819, + "step": 88090 + }, + { + "epoch": 3.73, + "learning_rate": 0.0004285696113973161, + "loss": 0.5526, + "step": 88100 + }, + { + "epoch": 3.73, + "learning_rate": 0.00042852721066802355, + "loss": 0.5476, + "step": 88110 + }, + { + "epoch": 3.73, + "learning_rate": 0.00042848480993873096, + "loss": 0.4591, + "step": 88120 + }, + { + "epoch": 3.73, + "learning_rate": 0.00042844240920943843, + "loss": 0.5158, + "step": 88130 + }, + { + "epoch": 3.73, + "learning_rate": 0.0004284000084801459, + "loss": 0.5396, + "step": 88140 + }, + { + "epoch": 3.73, + "learning_rate": 0.0004283576077508533, + "loss": 0.5455, + "step": 88150 + }, + { + "epoch": 3.73, + "learning_rate": 0.00042831520702156083, + "loss": 0.4556, + "step": 88160 + }, + { + "epoch": 3.73, + "learning_rate": 0.0004282728062922683, + "loss": 0.5008, + "step": 88170 + }, + { + "epoch": 3.73, + "learning_rate": 0.0004282304055629757, + "loss": 0.6215, + "step": 88180 + }, + { + "epoch": 3.73, + "learning_rate": 0.0004281880048336832, + "loss": 0.6145, + "step": 88190 + }, + { + "epoch": 3.73, + "learning_rate": 0.0004281456041043906, + "loss": 0.5144, + "step": 88200 + }, + { + "epoch": 3.73, + "learning_rate": 0.00042810320337509806, + "loss": 0.512, + "step": 88210 + }, + { + "epoch": 3.73, + "learning_rate": 0.0004280608026458055, + "loss": 0.4794, + "step": 88220 + }, + { + "epoch": 3.73, + "learning_rate": 0.00042801840191651294, + "loss": 0.4936, + "step": 88230 + }, + { + "epoch": 3.73, + "learning_rate": 0.00042797600118722046, + "loss": 0.5949, + "step": 88240 + }, + { + "epoch": 3.73, + "learning_rate": 0.0004279336004579279, + "loss": 0.6048, + "step": 88250 + }, + { + "epoch": 3.73, + "learning_rate": 0.00042789119972863534, + "loss": 0.4564, + "step": 88260 + }, + { + "epoch": 3.73, + "learning_rate": 0.0004278487989993428, + "loss": 0.5551, + "step": 88270 + }, + { + "epoch": 3.73, + "learning_rate": 0.00042780639827005027, + "loss": 0.5587, + "step": 88280 + }, + { + "epoch": 3.73, + "learning_rate": 0.0004277639975407577, + "loss": 0.4728, + "step": 88290 + }, + { + "epoch": 3.73, + "learning_rate": 0.00042772159681146515, + "loss": 0.4974, + "step": 88300 + }, + { + "epoch": 3.73, + "learning_rate": 0.00042767919608217267, + "loss": 0.4846, + "step": 88310 + }, + { + "epoch": 3.73, + "learning_rate": 0.0004276367953528801, + "loss": 0.4857, + "step": 88320 + }, + { + "epoch": 3.74, + "learning_rate": 0.00042759439462358755, + "loss": 0.5705, + "step": 88330 + }, + { + "epoch": 3.74, + "learning_rate": 0.000427551993894295, + "loss": 0.6223, + "step": 88340 + }, + { + "epoch": 3.74, + "learning_rate": 0.00042750959316500243, + "loss": 0.6948, + "step": 88350 + }, + { + "epoch": 3.74, + "learning_rate": 0.0004274671924357099, + "loss": 0.5185, + "step": 88360 + }, + { + "epoch": 3.74, + "learning_rate": 0.0004274247917064174, + "loss": 0.5263, + "step": 88370 + }, + { + "epoch": 3.74, + "learning_rate": 0.0004273823909771248, + "loss": 0.6081, + "step": 88380 + }, + { + "epoch": 3.74, + "learning_rate": 0.0004273399902478323, + "loss": 0.6076, + "step": 88390 + }, + { + "epoch": 3.74, + "learning_rate": 0.00042729758951853976, + "loss": 0.5677, + "step": 88400 + }, + { + "epoch": 3.74, + "learning_rate": 0.0004272551887892472, + "loss": 0.5123, + "step": 88410 + }, + { + "epoch": 3.74, + "learning_rate": 0.00042721278805995464, + "loss": 0.5541, + "step": 88420 + }, + { + "epoch": 3.74, + "learning_rate": 0.00042717038733066216, + "loss": 0.4296, + "step": 88430 + }, + { + "epoch": 3.74, + "learning_rate": 0.0004271279866013695, + "loss": 0.51, + "step": 88440 + }, + { + "epoch": 3.74, + "learning_rate": 0.00042708558587207704, + "loss": 0.5002, + "step": 88450 + }, + { + "epoch": 3.74, + "learning_rate": 0.0004270431851427845, + "loss": 0.5832, + "step": 88460 + }, + { + "epoch": 3.74, + "learning_rate": 0.0004270007844134919, + "loss": 0.5247, + "step": 88470 + }, + { + "epoch": 3.74, + "learning_rate": 0.0004269583836841994, + "loss": 0.4818, + "step": 88480 + }, + { + "epoch": 3.74, + "learning_rate": 0.0004269159829549068, + "loss": 0.5523, + "step": 88490 + }, + { + "epoch": 3.74, + "learning_rate": 0.00042687358222561427, + "loss": 0.5616, + "step": 88500 + }, + { + "epoch": 3.74, + "learning_rate": 0.0004268311814963218, + "loss": 0.4886, + "step": 88510 + }, + { + "epoch": 3.74, + "learning_rate": 0.00042678878076702915, + "loss": 0.5465, + "step": 88520 + }, + { + "epoch": 3.74, + "learning_rate": 0.00042674638003773667, + "loss": 0.506, + "step": 88530 + }, + { + "epoch": 3.74, + "learning_rate": 0.00042670397930844414, + "loss": 0.5303, + "step": 88540 + }, + { + "epoch": 3.74, + "learning_rate": 0.00042666157857915155, + "loss": 0.522, + "step": 88550 + }, + { + "epoch": 3.75, + "learning_rate": 0.000426619177849859, + "loss": 0.5048, + "step": 88560 + }, + { + "epoch": 3.75, + "learning_rate": 0.00042657677712056654, + "loss": 0.4647, + "step": 88570 + }, + { + "epoch": 3.75, + "learning_rate": 0.0004265343763912739, + "loss": 0.4578, + "step": 88580 + }, + { + "epoch": 3.75, + "learning_rate": 0.0004264919756619814, + "loss": 0.4945, + "step": 88590 + }, + { + "epoch": 3.75, + "learning_rate": 0.0004264495749326889, + "loss": 0.5246, + "step": 88600 + }, + { + "epoch": 3.75, + "learning_rate": 0.0004264071742033963, + "loss": 0.4359, + "step": 88610 + }, + { + "epoch": 3.75, + "learning_rate": 0.00042636477347410376, + "loss": 0.478, + "step": 88620 + }, + { + "epoch": 3.75, + "learning_rate": 0.0004263223727448113, + "loss": 0.4938, + "step": 88630 + }, + { + "epoch": 3.75, + "learning_rate": 0.00042627997201551864, + "loss": 0.5392, + "step": 88640 + }, + { + "epoch": 3.75, + "learning_rate": 0.00042623757128622616, + "loss": 0.519, + "step": 88650 + }, + { + "epoch": 3.75, + "learning_rate": 0.00042619517055693363, + "loss": 0.4912, + "step": 88660 + }, + { + "epoch": 3.75, + "learning_rate": 0.00042615276982764104, + "loss": 0.4366, + "step": 88670 + }, + { + "epoch": 3.75, + "learning_rate": 0.0004261103690983485, + "loss": 0.48, + "step": 88680 + }, + { + "epoch": 3.75, + "learning_rate": 0.00042606796836905603, + "loss": 0.4271, + "step": 88690 + }, + { + "epoch": 3.75, + "learning_rate": 0.0004260255676397634, + "loss": 0.4868, + "step": 88700 + }, + { + "epoch": 3.75, + "learning_rate": 0.0004259831669104709, + "loss": 0.5431, + "step": 88710 + }, + { + "epoch": 3.75, + "learning_rate": 0.0004259407661811784, + "loss": 0.5644, + "step": 88720 + }, + { + "epoch": 3.75, + "learning_rate": 0.0004258983654518858, + "loss": 0.5712, + "step": 88730 + }, + { + "epoch": 3.75, + "learning_rate": 0.00042585596472259325, + "loss": 0.5712, + "step": 88740 + }, + { + "epoch": 3.75, + "learning_rate": 0.0004258135639933007, + "loss": 0.4472, + "step": 88750 + }, + { + "epoch": 3.75, + "learning_rate": 0.00042577116326400813, + "loss": 0.5373, + "step": 88760 + }, + { + "epoch": 3.75, + "learning_rate": 0.00042572876253471566, + "loss": 0.4917, + "step": 88770 + }, + { + "epoch": 3.75, + "learning_rate": 0.0004256863618054231, + "loss": 0.5566, + "step": 88780 + }, + { + "epoch": 3.75, + "learning_rate": 0.00042564396107613053, + "loss": 0.5335, + "step": 88790 + }, + { + "epoch": 3.76, + "learning_rate": 0.000425601560346838, + "loss": 0.468, + "step": 88800 + }, + { + "epoch": 3.76, + "learning_rate": 0.0004255591596175454, + "loss": 0.5526, + "step": 88810 + }, + { + "epoch": 3.76, + "learning_rate": 0.0004255167588882529, + "loss": 0.5053, + "step": 88820 + }, + { + "epoch": 3.76, + "learning_rate": 0.00042547435815896035, + "loss": 0.4864, + "step": 88830 + }, + { + "epoch": 3.76, + "learning_rate": 0.00042543195742966776, + "loss": 0.6192, + "step": 88840 + }, + { + "epoch": 3.76, + "learning_rate": 0.0004253895567003753, + "loss": 0.5106, + "step": 88850 + }, + { + "epoch": 3.76, + "learning_rate": 0.00042534715597108275, + "loss": 0.566, + "step": 88860 + }, + { + "epoch": 3.76, + "learning_rate": 0.00042530475524179016, + "loss": 0.5262, + "step": 88870 + }, + { + "epoch": 3.76, + "learning_rate": 0.00042526235451249763, + "loss": 0.5293, + "step": 88880 + }, + { + "epoch": 3.76, + "learning_rate": 0.0004252199537832051, + "loss": 0.5709, + "step": 88890 + }, + { + "epoch": 3.76, + "learning_rate": 0.0004251775530539125, + "loss": 0.5345, + "step": 88900 + }, + { + "epoch": 3.76, + "learning_rate": 0.00042513515232462003, + "loss": 0.5218, + "step": 88910 + }, + { + "epoch": 3.76, + "learning_rate": 0.0004250927515953275, + "loss": 0.5793, + "step": 88920 + }, + { + "epoch": 3.76, + "learning_rate": 0.0004250503508660349, + "loss": 0.4565, + "step": 88930 + }, + { + "epoch": 3.76, + "learning_rate": 0.0004250079501367424, + "loss": 0.4838, + "step": 88940 + }, + { + "epoch": 3.76, + "learning_rate": 0.00042496554940744984, + "loss": 0.5819, + "step": 88950 + }, + { + "epoch": 3.76, + "learning_rate": 0.00042492314867815725, + "loss": 0.5683, + "step": 88960 + }, + { + "epoch": 3.76, + "learning_rate": 0.0004248807479488647, + "loss": 0.5328, + "step": 88970 + }, + { + "epoch": 3.76, + "learning_rate": 0.00042483834721957224, + "loss": 0.5135, + "step": 88980 + }, + { + "epoch": 3.76, + "learning_rate": 0.00042479594649027965, + "loss": 0.5597, + "step": 88990 + }, + { + "epoch": 3.76, + "learning_rate": 0.0004247535457609871, + "loss": 0.4936, + "step": 89000 + }, + { + "epoch": 3.76, + "learning_rate": 0.0004247111450316946, + "loss": 0.5465, + "step": 89010 + }, + { + "epoch": 3.76, + "learning_rate": 0.000424668744302402, + "loss": 0.5741, + "step": 89020 + }, + { + "epoch": 3.76, + "learning_rate": 0.00042462634357310947, + "loss": 0.5071, + "step": 89030 + }, + { + "epoch": 3.77, + "learning_rate": 0.000424583942843817, + "loss": 0.5346, + "step": 89040 + }, + { + "epoch": 3.77, + "learning_rate": 0.00042454154211452435, + "loss": 0.5355, + "step": 89050 + }, + { + "epoch": 3.77, + "learning_rate": 0.00042449914138523187, + "loss": 0.5484, + "step": 89060 + }, + { + "epoch": 3.77, + "learning_rate": 0.00042445674065593933, + "loss": 0.6231, + "step": 89070 + }, + { + "epoch": 3.77, + "learning_rate": 0.00042441433992664675, + "loss": 0.5035, + "step": 89080 + }, + { + "epoch": 3.77, + "learning_rate": 0.0004243719391973542, + "loss": 0.5163, + "step": 89090 + }, + { + "epoch": 3.77, + "learning_rate": 0.0004243295384680616, + "loss": 0.5299, + "step": 89100 + }, + { + "epoch": 3.77, + "learning_rate": 0.0004242871377387691, + "loss": 0.4932, + "step": 89110 + }, + { + "epoch": 3.77, + "learning_rate": 0.0004242447370094766, + "loss": 0.5707, + "step": 89120 + }, + { + "epoch": 3.77, + "learning_rate": 0.00042420233628018397, + "loss": 0.5435, + "step": 89130 + }, + { + "epoch": 3.77, + "learning_rate": 0.0004241599355508915, + "loss": 0.5846, + "step": 89140 + }, + { + "epoch": 3.77, + "learning_rate": 0.00042411753482159896, + "loss": 0.4945, + "step": 89150 + }, + { + "epoch": 3.77, + "learning_rate": 0.00042407513409230637, + "loss": 0.5759, + "step": 89160 + }, + { + "epoch": 3.77, + "learning_rate": 0.00042403273336301384, + "loss": 0.5365, + "step": 89170 + }, + { + "epoch": 3.77, + "learning_rate": 0.00042399033263372136, + "loss": 0.5395, + "step": 89180 + }, + { + "epoch": 3.77, + "learning_rate": 0.0004239479319044287, + "loss": 0.535, + "step": 89190 + }, + { + "epoch": 3.77, + "learning_rate": 0.00042390553117513624, + "loss": 0.5079, + "step": 89200 + }, + { + "epoch": 3.77, + "learning_rate": 0.0004238631304458437, + "loss": 0.5702, + "step": 89210 + }, + { + "epoch": 3.77, + "learning_rate": 0.0004238207297165511, + "loss": 0.4987, + "step": 89220 + }, + { + "epoch": 3.77, + "learning_rate": 0.0004237783289872586, + "loss": 0.5177, + "step": 89230 + }, + { + "epoch": 3.77, + "learning_rate": 0.0004237359282579661, + "loss": 0.4967, + "step": 89240 + }, + { + "epoch": 3.77, + "learning_rate": 0.00042369352752867346, + "loss": 0.5135, + "step": 89250 + }, + { + "epoch": 3.77, + "learning_rate": 0.000423651126799381, + "loss": 0.5129, + "step": 89260 + }, + { + "epoch": 3.78, + "learning_rate": 0.00042360872607008845, + "loss": 0.5459, + "step": 89270 + }, + { + "epoch": 3.78, + "learning_rate": 0.00042356632534079587, + "loss": 0.4796, + "step": 89280 + }, + { + "epoch": 3.78, + "learning_rate": 0.00042352392461150333, + "loss": 0.5611, + "step": 89290 + }, + { + "epoch": 3.78, + "learning_rate": 0.00042348152388221085, + "loss": 0.5478, + "step": 89300 + }, + { + "epoch": 3.78, + "learning_rate": 0.0004234391231529182, + "loss": 0.579, + "step": 89310 + }, + { + "epoch": 3.78, + "learning_rate": 0.00042339672242362573, + "loss": 0.5116, + "step": 89320 + }, + { + "epoch": 3.78, + "learning_rate": 0.0004233543216943332, + "loss": 0.5585, + "step": 89330 + }, + { + "epoch": 3.78, + "learning_rate": 0.0004233119209650406, + "loss": 0.4798, + "step": 89340 + }, + { + "epoch": 3.78, + "learning_rate": 0.0004232695202357481, + "loss": 0.5564, + "step": 89350 + }, + { + "epoch": 3.78, + "learning_rate": 0.00042322711950645555, + "loss": 0.5102, + "step": 89360 + }, + { + "epoch": 3.78, + "learning_rate": 0.00042318471877716296, + "loss": 0.4969, + "step": 89370 + }, + { + "epoch": 3.78, + "learning_rate": 0.0004231423180478705, + "loss": 0.495, + "step": 89380 + }, + { + "epoch": 3.78, + "learning_rate": 0.00042309991731857795, + "loss": 0.5318, + "step": 89390 + }, + { + "epoch": 3.78, + "learning_rate": 0.00042305751658928536, + "loss": 0.5625, + "step": 89400 + }, + { + "epoch": 3.78, + "learning_rate": 0.0004230151158599928, + "loss": 0.5348, + "step": 89410 + }, + { + "epoch": 3.78, + "learning_rate": 0.00042297271513070024, + "loss": 0.5264, + "step": 89420 + }, + { + "epoch": 3.78, + "learning_rate": 0.0004229303144014077, + "loss": 0.5032, + "step": 89430 + }, + { + "epoch": 3.78, + "learning_rate": 0.0004228879136721152, + "loss": 0.5668, + "step": 89440 + }, + { + "epoch": 3.78, + "learning_rate": 0.0004228455129428226, + "loss": 0.5475, + "step": 89450 + }, + { + "epoch": 3.78, + "learning_rate": 0.0004228031122135301, + "loss": 0.531, + "step": 89460 + }, + { + "epoch": 3.78, + "learning_rate": 0.00042276071148423757, + "loss": 0.4393, + "step": 89470 + }, + { + "epoch": 3.78, + "learning_rate": 0.000422718310754945, + "loss": 0.4252, + "step": 89480 + }, + { + "epoch": 3.78, + "learning_rate": 0.00042267591002565245, + "loss": 0.5174, + "step": 89490 + }, + { + "epoch": 3.78, + "learning_rate": 0.0004226335092963599, + "loss": 0.5479, + "step": 89500 + }, + { + "epoch": 3.79, + "learning_rate": 0.00042259110856706733, + "loss": 0.5303, + "step": 89510 + }, + { + "epoch": 3.79, + "learning_rate": 0.00042254870783777485, + "loss": 0.5193, + "step": 89520 + }, + { + "epoch": 3.79, + "learning_rate": 0.0004225063071084823, + "loss": 0.5488, + "step": 89530 + }, + { + "epoch": 3.79, + "learning_rate": 0.00042246390637918973, + "loss": 0.5263, + "step": 89540 + }, + { + "epoch": 3.79, + "learning_rate": 0.0004224215056498972, + "loss": 0.4405, + "step": 89550 + }, + { + "epoch": 3.79, + "learning_rate": 0.00042237910492060466, + "loss": 0.4911, + "step": 89560 + }, + { + "epoch": 3.79, + "learning_rate": 0.0004223367041913121, + "loss": 0.5193, + "step": 89570 + }, + { + "epoch": 3.79, + "learning_rate": 0.00042229430346201954, + "loss": 0.5234, + "step": 89580 + }, + { + "epoch": 3.79, + "learning_rate": 0.00042225190273272706, + "loss": 0.4931, + "step": 89590 + }, + { + "epoch": 3.79, + "learning_rate": 0.0004222095020034345, + "loss": 0.4988, + "step": 89600 + }, + { + "epoch": 3.79, + "learning_rate": 0.00042216710127414194, + "loss": 0.566, + "step": 89610 + }, + { + "epoch": 3.79, + "learning_rate": 0.0004221247005448494, + "loss": 0.5661, + "step": 89620 + }, + { + "epoch": 3.79, + "learning_rate": 0.0004220822998155568, + "loss": 0.5533, + "step": 89630 + }, + { + "epoch": 3.79, + "learning_rate": 0.0004220398990862643, + "loss": 0.5427, + "step": 89640 + }, + { + "epoch": 3.79, + "learning_rate": 0.0004219974983569718, + "loss": 0.5255, + "step": 89650 + }, + { + "epoch": 3.79, + "learning_rate": 0.0004219550976276792, + "loss": 0.5646, + "step": 89660 + }, + { + "epoch": 3.79, + "learning_rate": 0.0004219126968983867, + "loss": 0.4642, + "step": 89670 + }, + { + "epoch": 3.79, + "learning_rate": 0.00042187029616909416, + "loss": 0.5141, + "step": 89680 + }, + { + "epoch": 3.79, + "learning_rate": 0.00042182789543980157, + "loss": 0.5128, + "step": 89690 + }, + { + "epoch": 3.79, + "learning_rate": 0.00042178549471050904, + "loss": 0.5598, + "step": 89700 + }, + { + "epoch": 3.79, + "learning_rate": 0.00042174309398121645, + "loss": 0.5743, + "step": 89710 + }, + { + "epoch": 3.79, + "learning_rate": 0.0004217006932519239, + "loss": 0.5676, + "step": 89720 + }, + { + "epoch": 3.79, + "learning_rate": 0.00042165829252263144, + "loss": 0.5346, + "step": 89730 + }, + { + "epoch": 3.79, + "learning_rate": 0.00042161589179333885, + "loss": 0.606, + "step": 89740 + }, + { + "epoch": 3.8, + "learning_rate": 0.0004215734910640463, + "loss": 0.5293, + "step": 89750 + }, + { + "epoch": 3.8, + "learning_rate": 0.0004215310903347538, + "loss": 0.4518, + "step": 89760 + }, + { + "epoch": 3.8, + "learning_rate": 0.0004214886896054612, + "loss": 0.6161, + "step": 89770 + }, + { + "epoch": 3.8, + "learning_rate": 0.00042144628887616866, + "loss": 0.5517, + "step": 89780 + }, + { + "epoch": 3.8, + "learning_rate": 0.0004214038881468762, + "loss": 0.4909, + "step": 89790 + }, + { + "epoch": 3.8, + "learning_rate": 0.00042136148741758354, + "loss": 0.5326, + "step": 89800 + }, + { + "epoch": 3.8, + "learning_rate": 0.00042131908668829106, + "loss": 0.5167, + "step": 89810 + }, + { + "epoch": 3.8, + "learning_rate": 0.00042127668595899853, + "loss": 0.4947, + "step": 89820 + }, + { + "epoch": 3.8, + "learning_rate": 0.00042123428522970594, + "loss": 0.545, + "step": 89830 + }, + { + "epoch": 3.8, + "learning_rate": 0.0004211918845004134, + "loss": 0.5648, + "step": 89840 + }, + { + "epoch": 3.8, + "learning_rate": 0.00042114948377112093, + "loss": 0.5963, + "step": 89850 + }, + { + "epoch": 3.8, + "learning_rate": 0.0004211070830418283, + "loss": 0.5431, + "step": 89860 + }, + { + "epoch": 3.8, + "learning_rate": 0.0004210646823125358, + "loss": 0.543, + "step": 89870 + }, + { + "epoch": 3.8, + "learning_rate": 0.0004210222815832433, + "loss": 0.4565, + "step": 89880 + }, + { + "epoch": 3.8, + "learning_rate": 0.0004209798808539507, + "loss": 0.5885, + "step": 89890 + }, + { + "epoch": 3.8, + "learning_rate": 0.00042093748012465816, + "loss": 0.5322, + "step": 89900 + }, + { + "epoch": 3.8, + "learning_rate": 0.0004208950793953657, + "loss": 0.6068, + "step": 89910 + }, + { + "epoch": 3.8, + "learning_rate": 0.00042085267866607303, + "loss": 0.5339, + "step": 89920 + }, + { + "epoch": 3.8, + "learning_rate": 0.00042081027793678056, + "loss": 0.467, + "step": 89930 + }, + { + "epoch": 3.8, + "learning_rate": 0.000420767877207488, + "loss": 0.5088, + "step": 89940 + }, + { + "epoch": 3.8, + "learning_rate": 0.00042072547647819544, + "loss": 0.5071, + "step": 89950 + }, + { + "epoch": 3.8, + "learning_rate": 0.0004206830757489029, + "loss": 0.5425, + "step": 89960 + }, + { + "epoch": 3.8, + "learning_rate": 0.0004206406750196104, + "loss": 0.5028, + "step": 89970 + }, + { + "epoch": 3.81, + "learning_rate": 0.0004205982742903178, + "loss": 0.5451, + "step": 89980 + }, + { + "epoch": 3.81, + "learning_rate": 0.0004205558735610253, + "loss": 0.5287, + "step": 89990 + }, + { + "epoch": 3.81, + "learning_rate": 0.00042051347283173277, + "loss": 0.5184, + "step": 90000 + }, + { + "epoch": 3.81, + "eval_loss": 0.6043498516082764, + "eval_runtime": 337.771, + "eval_samples_per_second": 15.558, + "eval_steps_per_second": 3.89, + "step": 90000 + }, + { + "epoch": 3.81, + "learning_rate": 0.0004204710721024402, + "loss": 0.4972, + "step": 90010 + }, + { + "epoch": 3.81, + "learning_rate": 0.00042042867137314765, + "loss": 0.4611, + "step": 90020 + }, + { + "epoch": 3.81, + "learning_rate": 0.00042038627064385506, + "loss": 0.4679, + "step": 90030 + }, + { + "epoch": 3.81, + "learning_rate": 0.00042034386991456253, + "loss": 0.5363, + "step": 90040 + }, + { + "epoch": 3.81, + "learning_rate": 0.00042030146918527005, + "loss": 0.5741, + "step": 90050 + }, + { + "epoch": 3.81, + "learning_rate": 0.0004202590684559774, + "loss": 0.4266, + "step": 90060 + }, + { + "epoch": 3.81, + "learning_rate": 0.00042021666772668493, + "loss": 0.5124, + "step": 90070 + }, + { + "epoch": 3.81, + "learning_rate": 0.0004201742669973924, + "loss": 0.535, + "step": 90080 + }, + { + "epoch": 3.81, + "learning_rate": 0.0004201318662680998, + "loss": 0.5862, + "step": 90090 + }, + { + "epoch": 3.81, + "learning_rate": 0.0004200894655388073, + "loss": 0.5156, + "step": 90100 + }, + { + "epoch": 3.81, + "learning_rate": 0.00042004706480951474, + "loss": 0.4629, + "step": 90110 + }, + { + "epoch": 3.81, + "learning_rate": 0.00042000466408022215, + "loss": 0.484, + "step": 90120 + }, + { + "epoch": 3.81, + "learning_rate": 0.0004199622633509297, + "loss": 0.5924, + "step": 90130 + }, + { + "epoch": 3.81, + "learning_rate": 0.00041991986262163714, + "loss": 0.5571, + "step": 90140 + }, + { + "epoch": 3.81, + "learning_rate": 0.00041987746189234455, + "loss": 0.6235, + "step": 90150 + }, + { + "epoch": 3.81, + "learning_rate": 0.000419835061163052, + "loss": 0.4898, + "step": 90160 + }, + { + "epoch": 3.81, + "learning_rate": 0.0004197926604337595, + "loss": 0.4908, + "step": 90170 + }, + { + "epoch": 3.81, + "learning_rate": 0.0004197502597044669, + "loss": 0.5333, + "step": 90180 + }, + { + "epoch": 3.81, + "learning_rate": 0.0004197078589751744, + "loss": 0.5706, + "step": 90190 + }, + { + "epoch": 3.81, + "learning_rate": 0.0004196654582458819, + "loss": 0.503, + "step": 90200 + }, + { + "epoch": 3.81, + "learning_rate": 0.0004196230575165893, + "loss": 0.4491, + "step": 90210 + }, + { + "epoch": 3.82, + "learning_rate": 0.00041958065678729677, + "loss": 0.5667, + "step": 90220 + }, + { + "epoch": 3.82, + "learning_rate": 0.00041953825605800423, + "loss": 0.5221, + "step": 90230 + }, + { + "epoch": 3.82, + "learning_rate": 0.00041949585532871165, + "loss": 0.4915, + "step": 90240 + }, + { + "epoch": 3.82, + "learning_rate": 0.0004194534545994191, + "loss": 0.4662, + "step": 90250 + }, + { + "epoch": 3.82, + "learning_rate": 0.00041941105387012663, + "loss": 0.6016, + "step": 90260 + }, + { + "epoch": 3.82, + "learning_rate": 0.00041936865314083405, + "loss": 0.4769, + "step": 90270 + }, + { + "epoch": 3.82, + "learning_rate": 0.0004193262524115415, + "loss": 0.5131, + "step": 90280 + }, + { + "epoch": 3.82, + "learning_rate": 0.000419283851682249, + "loss": 0.539, + "step": 90290 + }, + { + "epoch": 3.82, + "learning_rate": 0.0004192414509529564, + "loss": 0.5891, + "step": 90300 + }, + { + "epoch": 3.82, + "learning_rate": 0.00041919905022366386, + "loss": 0.5043, + "step": 90310 + }, + { + "epoch": 3.82, + "learning_rate": 0.0004191566494943714, + "loss": 0.5528, + "step": 90320 + }, + { + "epoch": 3.82, + "learning_rate": 0.00041911424876507874, + "loss": 0.5008, + "step": 90330 + }, + { + "epoch": 3.82, + "learning_rate": 0.00041907184803578626, + "loss": 0.5357, + "step": 90340 + }, + { + "epoch": 3.82, + "learning_rate": 0.0004190294473064937, + "loss": 0.5288, + "step": 90350 + }, + { + "epoch": 3.82, + "learning_rate": 0.00041898704657720114, + "loss": 0.4776, + "step": 90360 + }, + { + "epoch": 3.82, + "learning_rate": 0.0004189446458479086, + "loss": 0.5549, + "step": 90370 + }, + { + "epoch": 3.82, + "learning_rate": 0.000418902245118616, + "loss": 0.5446, + "step": 90380 + }, + { + "epoch": 3.82, + "learning_rate": 0.0004188598443893235, + "loss": 0.5027, + "step": 90390 + }, + { + "epoch": 3.82, + "learning_rate": 0.000418817443660031, + "loss": 0.4826, + "step": 90400 + }, + { + "epoch": 3.82, + "learning_rate": 0.0004187750429307384, + "loss": 0.3647, + "step": 90410 + }, + { + "epoch": 3.82, + "learning_rate": 0.0004187326422014459, + "loss": 0.4977, + "step": 90420 + }, + { + "epoch": 3.82, + "learning_rate": 0.00041869024147215335, + "loss": 0.5878, + "step": 90430 + }, + { + "epoch": 3.82, + "learning_rate": 0.00041864784074286077, + "loss": 0.4731, + "step": 90440 + }, + { + "epoch": 3.83, + "learning_rate": 0.00041860544001356823, + "loss": 0.478, + "step": 90450 + }, + { + "epoch": 3.83, + "learning_rate": 0.00041856303928427575, + "loss": 0.5701, + "step": 90460 + }, + { + "epoch": 3.83, + "learning_rate": 0.0004185206385549831, + "loss": 0.5017, + "step": 90470 + }, + { + "epoch": 3.83, + "learning_rate": 0.00041847823782569063, + "loss": 0.4694, + "step": 90480 + }, + { + "epoch": 3.83, + "learning_rate": 0.0004184358370963981, + "loss": 0.5958, + "step": 90490 + }, + { + "epoch": 3.83, + "learning_rate": 0.0004183934363671055, + "loss": 0.6148, + "step": 90500 + }, + { + "epoch": 3.83, + "learning_rate": 0.000418351035637813, + "loss": 0.4949, + "step": 90510 + }, + { + "epoch": 3.83, + "learning_rate": 0.0004183086349085205, + "loss": 0.5254, + "step": 90520 + }, + { + "epoch": 3.83, + "learning_rate": 0.00041826623417922786, + "loss": 0.5135, + "step": 90530 + }, + { + "epoch": 3.83, + "learning_rate": 0.0004182238334499354, + "loss": 0.6176, + "step": 90540 + }, + { + "epoch": 3.83, + "learning_rate": 0.00041818143272064285, + "loss": 0.5088, + "step": 90550 + }, + { + "epoch": 3.83, + "learning_rate": 0.00041813903199135026, + "loss": 0.4523, + "step": 90560 + }, + { + "epoch": 3.83, + "learning_rate": 0.0004180966312620577, + "loss": 0.5226, + "step": 90570 + }, + { + "epoch": 3.83, + "learning_rate": 0.00041805423053276525, + "loss": 0.4966, + "step": 90580 + }, + { + "epoch": 3.83, + "learning_rate": 0.0004180118298034726, + "loss": 0.5714, + "step": 90590 + }, + { + "epoch": 3.83, + "learning_rate": 0.0004179694290741801, + "loss": 0.4944, + "step": 90600 + }, + { + "epoch": 3.83, + "learning_rate": 0.0004179270283448876, + "loss": 0.5635, + "step": 90610 + }, + { + "epoch": 3.83, + "learning_rate": 0.000417884627615595, + "loss": 0.5306, + "step": 90620 + }, + { + "epoch": 3.83, + "learning_rate": 0.00041784222688630247, + "loss": 0.6078, + "step": 90630 + }, + { + "epoch": 3.83, + "learning_rate": 0.0004177998261570099, + "loss": 0.502, + "step": 90640 + }, + { + "epoch": 3.83, + "learning_rate": 0.00041775742542771735, + "loss": 0.6534, + "step": 90650 + }, + { + "epoch": 3.83, + "learning_rate": 0.00041771502469842487, + "loss": 0.5158, + "step": 90660 + }, + { + "epoch": 3.83, + "learning_rate": 0.00041767262396913223, + "loss": 0.685, + "step": 90670 + }, + { + "epoch": 3.83, + "learning_rate": 0.00041763022323983975, + "loss": 0.5393, + "step": 90680 + }, + { + "epoch": 3.84, + "learning_rate": 0.0004175878225105472, + "loss": 0.5298, + "step": 90690 + }, + { + "epoch": 3.84, + "learning_rate": 0.00041754542178125463, + "loss": 0.5022, + "step": 90700 + }, + { + "epoch": 3.84, + "learning_rate": 0.0004175030210519621, + "loss": 0.49, + "step": 90710 + }, + { + "epoch": 3.84, + "learning_rate": 0.0004174606203226696, + "loss": 0.5174, + "step": 90720 + }, + { + "epoch": 3.84, + "learning_rate": 0.000417418219593377, + "loss": 0.4317, + "step": 90730 + }, + { + "epoch": 3.84, + "learning_rate": 0.0004173758188640845, + "loss": 0.5103, + "step": 90740 + }, + { + "epoch": 3.84, + "learning_rate": 0.00041733341813479197, + "loss": 0.5383, + "step": 90750 + }, + { + "epoch": 3.84, + "learning_rate": 0.0004172910174054994, + "loss": 0.4348, + "step": 90760 + }, + { + "epoch": 3.84, + "learning_rate": 0.00041724861667620684, + "loss": 0.5221, + "step": 90770 + }, + { + "epoch": 3.84, + "learning_rate": 0.0004172062159469143, + "loss": 0.5611, + "step": 90780 + }, + { + "epoch": 3.84, + "learning_rate": 0.0004171638152176217, + "loss": 0.5257, + "step": 90790 + }, + { + "epoch": 3.84, + "learning_rate": 0.00041712141448832924, + "loss": 0.5956, + "step": 90800 + }, + { + "epoch": 3.84, + "learning_rate": 0.0004170790137590367, + "loss": 0.5082, + "step": 90810 + }, + { + "epoch": 3.84, + "learning_rate": 0.0004170366130297441, + "loss": 0.5808, + "step": 90820 + }, + { + "epoch": 3.84, + "learning_rate": 0.0004169942123004516, + "loss": 0.5975, + "step": 90830 + }, + { + "epoch": 3.84, + "learning_rate": 0.00041695181157115906, + "loss": 0.5242, + "step": 90840 + }, + { + "epoch": 3.84, + "learning_rate": 0.00041690941084186647, + "loss": 0.554, + "step": 90850 + }, + { + "epoch": 3.84, + "learning_rate": 0.00041686701011257394, + "loss": 0.5624, + "step": 90860 + }, + { + "epoch": 3.84, + "learning_rate": 0.00041682460938328146, + "loss": 0.4407, + "step": 90870 + }, + { + "epoch": 3.84, + "learning_rate": 0.00041678220865398887, + "loss": 0.5498, + "step": 90880 + }, + { + "epoch": 3.84, + "learning_rate": 0.00041673980792469634, + "loss": 0.5634, + "step": 90890 + }, + { + "epoch": 3.84, + "learning_rate": 0.0004166974071954038, + "loss": 0.5022, + "step": 90900 + }, + { + "epoch": 3.84, + "learning_rate": 0.0004166550064661112, + "loss": 0.5721, + "step": 90910 + }, + { + "epoch": 3.84, + "learning_rate": 0.0004166126057368187, + "loss": 0.4812, + "step": 90920 + }, + { + "epoch": 3.85, + "learning_rate": 0.0004165702050075262, + "loss": 0.5363, + "step": 90930 + }, + { + "epoch": 3.85, + "learning_rate": 0.0004165278042782336, + "loss": 0.4668, + "step": 90940 + }, + { + "epoch": 3.85, + "learning_rate": 0.0004164854035489411, + "loss": 0.59, + "step": 90950 + }, + { + "epoch": 3.85, + "learning_rate": 0.0004164430028196485, + "loss": 0.5673, + "step": 90960 + }, + { + "epoch": 3.85, + "learning_rate": 0.00041640060209035596, + "loss": 0.4765, + "step": 90970 + }, + { + "epoch": 3.85, + "learning_rate": 0.00041635820136106343, + "loss": 0.5011, + "step": 90980 + }, + { + "epoch": 3.85, + "learning_rate": 0.00041631580063177084, + "loss": 0.5694, + "step": 90990 + }, + { + "epoch": 3.85, + "learning_rate": 0.0004162733999024783, + "loss": 0.5061, + "step": 91000 + }, + { + "epoch": 3.85, + "learning_rate": 0.00041623099917318583, + "loss": 0.5409, + "step": 91010 + }, + { + "epoch": 3.85, + "learning_rate": 0.00041618859844389324, + "loss": 0.4512, + "step": 91020 + }, + { + "epoch": 3.85, + "learning_rate": 0.0004161461977146007, + "loss": 0.4962, + "step": 91030 + }, + { + "epoch": 3.85, + "learning_rate": 0.0004161037969853082, + "loss": 0.4985, + "step": 91040 + }, + { + "epoch": 3.85, + "learning_rate": 0.0004160613962560156, + "loss": 0.4893, + "step": 91050 + }, + { + "epoch": 3.85, + "learning_rate": 0.00041601899552672306, + "loss": 0.5919, + "step": 91060 + }, + { + "epoch": 3.85, + "learning_rate": 0.0004159765947974306, + "loss": 0.629, + "step": 91070 + }, + { + "epoch": 3.85, + "learning_rate": 0.00041593419406813794, + "loss": 0.4808, + "step": 91080 + }, + { + "epoch": 3.85, + "learning_rate": 0.00041589179333884546, + "loss": 0.5395, + "step": 91090 + }, + { + "epoch": 3.85, + "learning_rate": 0.0004158493926095529, + "loss": 0.4962, + "step": 91100 + }, + { + "epoch": 3.85, + "learning_rate": 0.00041580699188026034, + "loss": 0.4942, + "step": 91110 + }, + { + "epoch": 3.85, + "learning_rate": 0.0004157645911509678, + "loss": 0.5828, + "step": 91120 + }, + { + "epoch": 3.85, + "learning_rate": 0.0004157221904216753, + "loss": 0.4904, + "step": 91130 + }, + { + "epoch": 3.85, + "learning_rate": 0.0004156797896923827, + "loss": 0.5556, + "step": 91140 + }, + { + "epoch": 3.85, + "learning_rate": 0.0004156373889630902, + "loss": 0.5048, + "step": 91150 + }, + { + "epoch": 3.86, + "learning_rate": 0.00041559498823379767, + "loss": 0.4993, + "step": 91160 + }, + { + "epoch": 3.86, + "learning_rate": 0.0004155525875045051, + "loss": 0.5936, + "step": 91170 + }, + { + "epoch": 3.86, + "learning_rate": 0.00041551018677521255, + "loss": 0.5172, + "step": 91180 + }, + { + "epoch": 3.86, + "learning_rate": 0.00041546778604592007, + "loss": 0.5227, + "step": 91190 + }, + { + "epoch": 3.86, + "learning_rate": 0.00041542538531662743, + "loss": 0.4739, + "step": 91200 + }, + { + "epoch": 3.86, + "learning_rate": 0.00041538298458733495, + "loss": 0.5624, + "step": 91210 + }, + { + "epoch": 3.86, + "learning_rate": 0.0004153405838580424, + "loss": 0.5553, + "step": 91220 + }, + { + "epoch": 3.86, + "learning_rate": 0.00041529818312874983, + "loss": 0.4594, + "step": 91230 + }, + { + "epoch": 3.86, + "learning_rate": 0.0004152557823994573, + "loss": 0.4687, + "step": 91240 + }, + { + "epoch": 3.86, + "learning_rate": 0.0004152133816701647, + "loss": 0.4591, + "step": 91250 + }, + { + "epoch": 3.86, + "learning_rate": 0.0004151709809408722, + "loss": 0.5428, + "step": 91260 + }, + { + "epoch": 3.86, + "learning_rate": 0.0004151285802115797, + "loss": 0.5919, + "step": 91270 + }, + { + "epoch": 3.86, + "learning_rate": 0.00041508617948228705, + "loss": 0.4907, + "step": 91280 + }, + { + "epoch": 3.86, + "learning_rate": 0.0004150437787529946, + "loss": 0.5052, + "step": 91290 + }, + { + "epoch": 3.86, + "learning_rate": 0.00041500137802370204, + "loss": 0.4809, + "step": 91300 + }, + { + "epoch": 3.86, + "learning_rate": 0.00041495897729440945, + "loss": 0.5812, + "step": 91310 + }, + { + "epoch": 3.86, + "learning_rate": 0.0004149165765651169, + "loss": 0.571, + "step": 91320 + }, + { + "epoch": 3.86, + "learning_rate": 0.00041487417583582444, + "loss": 0.5233, + "step": 91330 + }, + { + "epoch": 3.86, + "learning_rate": 0.0004148317751065318, + "loss": 0.4688, + "step": 91340 + }, + { + "epoch": 3.86, + "learning_rate": 0.0004147893743772393, + "loss": 0.4674, + "step": 91350 + }, + { + "epoch": 3.86, + "learning_rate": 0.0004147469736479468, + "loss": 0.5323, + "step": 91360 + }, + { + "epoch": 3.86, + "learning_rate": 0.0004147045729186542, + "loss": 0.4165, + "step": 91370 + }, + { + "epoch": 3.86, + "learning_rate": 0.00041466217218936167, + "loss": 0.52, + "step": 91380 + }, + { + "epoch": 3.86, + "learning_rate": 0.00041461977146006913, + "loss": 0.4655, + "step": 91390 + }, + { + "epoch": 3.87, + "learning_rate": 0.00041457737073077655, + "loss": 0.5733, + "step": 91400 + }, + { + "epoch": 3.87, + "learning_rate": 0.00041453497000148407, + "loss": 0.4595, + "step": 91410 + }, + { + "epoch": 3.87, + "learning_rate": 0.00041449256927219154, + "loss": 0.5277, + "step": 91420 + }, + { + "epoch": 3.87, + "learning_rate": 0.00041445016854289895, + "loss": 0.5252, + "step": 91430 + }, + { + "epoch": 3.87, + "learning_rate": 0.0004144077678136064, + "loss": 0.4969, + "step": 91440 + }, + { + "epoch": 3.87, + "learning_rate": 0.0004143653670843139, + "loss": 0.5613, + "step": 91450 + }, + { + "epoch": 3.87, + "learning_rate": 0.0004143229663550213, + "loss": 0.5067, + "step": 91460 + }, + { + "epoch": 3.87, + "learning_rate": 0.0004142805656257288, + "loss": 0.6234, + "step": 91470 + }, + { + "epoch": 3.87, + "learning_rate": 0.0004142381648964363, + "loss": 0.5241, + "step": 91480 + }, + { + "epoch": 3.87, + "learning_rate": 0.0004141957641671437, + "loss": 0.569, + "step": 91490 + }, + { + "epoch": 3.87, + "learning_rate": 0.00041415336343785116, + "loss": 0.5263, + "step": 91500 + }, + { + "epoch": 3.87, + "learning_rate": 0.00041411096270855863, + "loss": 0.4858, + "step": 91510 + }, + { + "epoch": 3.87, + "learning_rate": 0.00041406856197926604, + "loss": 0.5499, + "step": 91520 + }, + { + "epoch": 3.87, + "learning_rate": 0.0004140261612499735, + "loss": 0.5708, + "step": 91530 + }, + { + "epoch": 3.87, + "learning_rate": 0.00041398376052068103, + "loss": 0.5226, + "step": 91540 + }, + { + "epoch": 3.87, + "learning_rate": 0.00041394135979138844, + "loss": 0.5039, + "step": 91550 + }, + { + "epoch": 3.87, + "learning_rate": 0.0004138989590620959, + "loss": 0.4153, + "step": 91560 + }, + { + "epoch": 3.87, + "learning_rate": 0.0004138565583328033, + "loss": 0.5966, + "step": 91570 + }, + { + "epoch": 3.87, + "learning_rate": 0.0004138141576035108, + "loss": 0.5321, + "step": 91580 + }, + { + "epoch": 3.87, + "learning_rate": 0.00041377175687421825, + "loss": 0.4228, + "step": 91590 + }, + { + "epoch": 3.87, + "learning_rate": 0.00041372935614492567, + "loss": 0.568, + "step": 91600 + }, + { + "epoch": 3.87, + "learning_rate": 0.00041368695541563313, + "loss": 0.5032, + "step": 91610 + }, + { + "epoch": 3.87, + "learning_rate": 0.00041364455468634065, + "loss": 0.5123, + "step": 91620 + }, + { + "epoch": 3.87, + "learning_rate": 0.00041360215395704807, + "loss": 0.5763, + "step": 91630 + }, + { + "epoch": 3.88, + "learning_rate": 0.00041355975322775553, + "loss": 0.4659, + "step": 91640 + }, + { + "epoch": 3.88, + "learning_rate": 0.000413517352498463, + "loss": 0.5479, + "step": 91650 + }, + { + "epoch": 3.88, + "learning_rate": 0.0004134749517691704, + "loss": 0.5027, + "step": 91660 + }, + { + "epoch": 3.88, + "learning_rate": 0.0004134325510398779, + "loss": 0.5568, + "step": 91670 + }, + { + "epoch": 3.88, + "learning_rate": 0.0004133901503105854, + "loss": 0.5918, + "step": 91680 + }, + { + "epoch": 3.88, + "learning_rate": 0.0004133477495812928, + "loss": 0.5031, + "step": 91690 + }, + { + "epoch": 3.88, + "learning_rate": 0.0004133053488520003, + "loss": 0.5864, + "step": 91700 + }, + { + "epoch": 3.88, + "learning_rate": 0.00041326294812270775, + "loss": 0.4803, + "step": 91710 + }, + { + "epoch": 3.88, + "learning_rate": 0.00041322054739341516, + "loss": 0.5605, + "step": 91720 + }, + { + "epoch": 3.88, + "learning_rate": 0.0004131781466641226, + "loss": 0.5164, + "step": 91730 + }, + { + "epoch": 3.88, + "learning_rate": 0.00041313574593483015, + "loss": 0.4507, + "step": 91740 + }, + { + "epoch": 3.88, + "learning_rate": 0.0004130933452055375, + "loss": 0.5121, + "step": 91750 + }, + { + "epoch": 3.88, + "learning_rate": 0.000413050944476245, + "loss": 0.6755, + "step": 91760 + }, + { + "epoch": 3.88, + "learning_rate": 0.0004130085437469525, + "loss": 0.6269, + "step": 91770 + }, + { + "epoch": 3.88, + "learning_rate": 0.0004129661430176599, + "loss": 0.5626, + "step": 91780 + }, + { + "epoch": 3.88, + "learning_rate": 0.00041292374228836737, + "loss": 0.5334, + "step": 91790 + }, + { + "epoch": 3.88, + "learning_rate": 0.0004128813415590749, + "loss": 0.5431, + "step": 91800 + }, + { + "epoch": 3.88, + "learning_rate": 0.00041283894082978225, + "loss": 0.5615, + "step": 91810 + }, + { + "epoch": 3.88, + "learning_rate": 0.0004127965401004898, + "loss": 0.6489, + "step": 91820 + }, + { + "epoch": 3.88, + "learning_rate": 0.00041275413937119724, + "loss": 0.5208, + "step": 91830 + }, + { + "epoch": 3.88, + "learning_rate": 0.00041271173864190465, + "loss": 0.5385, + "step": 91840 + }, + { + "epoch": 3.88, + "learning_rate": 0.0004126693379126121, + "loss": 0.5467, + "step": 91850 + }, + { + "epoch": 3.88, + "learning_rate": 0.00041262693718331953, + "loss": 0.5053, + "step": 91860 + }, + { + "epoch": 3.89, + "learning_rate": 0.000412584536454027, + "loss": 0.4759, + "step": 91870 + }, + { + "epoch": 3.89, + "learning_rate": 0.0004125421357247345, + "loss": 0.5273, + "step": 91880 + }, + { + "epoch": 3.89, + "learning_rate": 0.0004124997349954419, + "loss": 0.5672, + "step": 91890 + }, + { + "epoch": 3.89, + "learning_rate": 0.0004124573342661494, + "loss": 0.4577, + "step": 91900 + }, + { + "epoch": 3.89, + "learning_rate": 0.00041241493353685687, + "loss": 0.448, + "step": 91910 + }, + { + "epoch": 3.89, + "learning_rate": 0.0004123725328075643, + "loss": 0.5076, + "step": 91920 + }, + { + "epoch": 3.89, + "learning_rate": 0.00041233013207827175, + "loss": 0.5238, + "step": 91930 + }, + { + "epoch": 3.89, + "learning_rate": 0.00041228773134897927, + "loss": 0.5415, + "step": 91940 + }, + { + "epoch": 3.89, + "learning_rate": 0.0004122453306196866, + "loss": 0.496, + "step": 91950 + }, + { + "epoch": 3.89, + "learning_rate": 0.00041220292989039415, + "loss": 0.4662, + "step": 91960 + }, + { + "epoch": 3.89, + "learning_rate": 0.0004121605291611016, + "loss": 0.4986, + "step": 91970 + }, + { + "epoch": 3.89, + "learning_rate": 0.000412118128431809, + "loss": 0.4392, + "step": 91980 + }, + { + "epoch": 3.89, + "learning_rate": 0.0004120757277025165, + "loss": 0.5562, + "step": 91990 + }, + { + "epoch": 3.89, + "learning_rate": 0.000412033326973224, + "loss": 0.5829, + "step": 92000 + }, + { + "epoch": 3.89, + "learning_rate": 0.00041199092624393137, + "loss": 0.533, + "step": 92010 + }, + { + "epoch": 3.89, + "learning_rate": 0.0004119485255146389, + "loss": 0.5527, + "step": 92020 + }, + { + "epoch": 3.89, + "learning_rate": 0.00041190612478534636, + "loss": 0.523, + "step": 92030 + }, + { + "epoch": 3.89, + "learning_rate": 0.00041186372405605377, + "loss": 0.5578, + "step": 92040 + }, + { + "epoch": 3.89, + "learning_rate": 0.00041182132332676124, + "loss": 0.4742, + "step": 92050 + }, + { + "epoch": 3.89, + "learning_rate": 0.0004117789225974687, + "loss": 0.5103, + "step": 92060 + }, + { + "epoch": 3.89, + "learning_rate": 0.0004117365218681761, + "loss": 0.5207, + "step": 92070 + }, + { + "epoch": 3.89, + "learning_rate": 0.00041169412113888364, + "loss": 0.5057, + "step": 92080 + }, + { + "epoch": 3.89, + "learning_rate": 0.0004116517204095911, + "loss": 0.4989, + "step": 92090 + }, + { + "epoch": 3.89, + "learning_rate": 0.0004116093196802985, + "loss": 0.4893, + "step": 92100 + }, + { + "epoch": 3.9, + "learning_rate": 0.000411566918951006, + "loss": 0.4941, + "step": 92110 + }, + { + "epoch": 3.9, + "learning_rate": 0.00041152451822171345, + "loss": 0.5881, + "step": 92120 + }, + { + "epoch": 3.9, + "learning_rate": 0.00041148211749242086, + "loss": 0.5462, + "step": 92130 + }, + { + "epoch": 3.9, + "learning_rate": 0.00041143971676312833, + "loss": 0.5012, + "step": 92140 + }, + { + "epoch": 3.9, + "learning_rate": 0.00041139731603383585, + "loss": 0.4595, + "step": 92150 + }, + { + "epoch": 3.9, + "learning_rate": 0.00041135491530454326, + "loss": 0.5015, + "step": 92160 + }, + { + "epoch": 3.9, + "learning_rate": 0.00041131251457525073, + "loss": 0.5667, + "step": 92170 + }, + { + "epoch": 3.9, + "learning_rate": 0.00041127011384595814, + "loss": 0.5645, + "step": 92180 + }, + { + "epoch": 3.9, + "learning_rate": 0.0004112277131166656, + "loss": 0.4931, + "step": 92190 + }, + { + "epoch": 3.9, + "learning_rate": 0.0004111853123873731, + "loss": 0.5266, + "step": 92200 + }, + { + "epoch": 3.9, + "learning_rate": 0.0004111429116580805, + "loss": 0.5136, + "step": 92210 + }, + { + "epoch": 3.9, + "learning_rate": 0.000411100510928788, + "loss": 0.4746, + "step": 92220 + }, + { + "epoch": 3.9, + "learning_rate": 0.0004110581101994955, + "loss": 0.4926, + "step": 92230 + }, + { + "epoch": 3.9, + "learning_rate": 0.0004110157094702029, + "loss": 0.5606, + "step": 92240 + }, + { + "epoch": 3.9, + "learning_rate": 0.00041097330874091036, + "loss": 0.583, + "step": 92250 + }, + { + "epoch": 3.9, + "learning_rate": 0.0004109309080116178, + "loss": 0.5829, + "step": 92260 + }, + { + "epoch": 3.9, + "learning_rate": 0.00041088850728232524, + "loss": 0.5161, + "step": 92270 + }, + { + "epoch": 3.9, + "learning_rate": 0.0004108461065530327, + "loss": 0.517, + "step": 92280 + }, + { + "epoch": 3.9, + "learning_rate": 0.0004108037058237402, + "loss": 0.5864, + "step": 92290 + }, + { + "epoch": 3.9, + "learning_rate": 0.00041076130509444764, + "loss": 0.5353, + "step": 92300 + }, + { + "epoch": 3.9, + "learning_rate": 0.0004107189043651551, + "loss": 0.521, + "step": 92310 + }, + { + "epoch": 3.9, + "learning_rate": 0.00041067650363586257, + "loss": 0.4913, + "step": 92320 + }, + { + "epoch": 3.9, + "learning_rate": 0.00041063410290657, + "loss": 0.5151, + "step": 92330 + }, + { + "epoch": 3.9, + "learning_rate": 0.00041059170217727745, + "loss": 0.6511, + "step": 92340 + }, + { + "epoch": 3.91, + "learning_rate": 0.00041054930144798497, + "loss": 0.5362, + "step": 92350 + }, + { + "epoch": 3.91, + "learning_rate": 0.00041050690071869233, + "loss": 0.6393, + "step": 92360 + }, + { + "epoch": 3.91, + "learning_rate": 0.00041046449998939985, + "loss": 0.6401, + "step": 92370 + }, + { + "epoch": 3.91, + "learning_rate": 0.0004104220992601073, + "loss": 0.4962, + "step": 92380 + }, + { + "epoch": 3.91, + "learning_rate": 0.00041037969853081473, + "loss": 0.5931, + "step": 92390 + }, + { + "epoch": 3.91, + "learning_rate": 0.0004103372978015222, + "loss": 0.5502, + "step": 92400 + }, + { + "epoch": 3.91, + "learning_rate": 0.0004102948970722297, + "loss": 0.6143, + "step": 92410 + }, + { + "epoch": 3.91, + "learning_rate": 0.0004102524963429371, + "loss": 0.4851, + "step": 92420 + }, + { + "epoch": 3.91, + "learning_rate": 0.0004102100956136446, + "loss": 0.509, + "step": 92430 + }, + { + "epoch": 3.91, + "learning_rate": 0.00041016769488435206, + "loss": 0.5259, + "step": 92440 + }, + { + "epoch": 3.91, + "learning_rate": 0.0004101252941550595, + "loss": 0.6084, + "step": 92450 + }, + { + "epoch": 3.91, + "learning_rate": 0.00041008289342576694, + "loss": 0.5895, + "step": 92460 + }, + { + "epoch": 3.91, + "learning_rate": 0.00041004049269647446, + "loss": 0.5308, + "step": 92470 + }, + { + "epoch": 3.91, + "learning_rate": 0.0004099980919671818, + "loss": 0.5035, + "step": 92480 + }, + { + "epoch": 3.91, + "learning_rate": 0.00040995569123788934, + "loss": 0.5169, + "step": 92490 + }, + { + "epoch": 3.91, + "learning_rate": 0.0004099132905085967, + "loss": 0.5239, + "step": 92500 + }, + { + "epoch": 3.91, + "learning_rate": 0.0004098708897793042, + "loss": 0.5971, + "step": 92510 + }, + { + "epoch": 3.91, + "learning_rate": 0.0004098284890500117, + "loss": 0.515, + "step": 92520 + }, + { + "epoch": 3.91, + "learning_rate": 0.0004097860883207191, + "loss": 0.5071, + "step": 92530 + }, + { + "epoch": 3.91, + "learning_rate": 0.00040974368759142657, + "loss": 0.5389, + "step": 92540 + }, + { + "epoch": 3.91, + "learning_rate": 0.0004097012868621341, + "loss": 0.5107, + "step": 92550 + }, + { + "epoch": 3.91, + "learning_rate": 0.00040965888613284145, + "loss": 0.571, + "step": 92560 + }, + { + "epoch": 3.91, + "learning_rate": 0.00040961648540354897, + "loss": 0.5012, + "step": 92570 + }, + { + "epoch": 3.92, + "learning_rate": 0.00040957408467425644, + "loss": 0.5825, + "step": 92580 + }, + { + "epoch": 3.92, + "learning_rate": 0.00040953168394496385, + "loss": 0.5112, + "step": 92590 + }, + { + "epoch": 3.92, + "learning_rate": 0.0004094892832156713, + "loss": 0.4767, + "step": 92600 + }, + { + "epoch": 3.92, + "learning_rate": 0.00040944688248637884, + "loss": 0.5808, + "step": 92610 + }, + { + "epoch": 3.92, + "learning_rate": 0.0004094044817570862, + "loss": 0.5246, + "step": 92620 + }, + { + "epoch": 3.92, + "learning_rate": 0.0004093620810277937, + "loss": 0.5582, + "step": 92630 + }, + { + "epoch": 3.92, + "learning_rate": 0.0004093196802985012, + "loss": 0.4842, + "step": 92640 + }, + { + "epoch": 3.92, + "learning_rate": 0.0004092772795692086, + "loss": 0.67, + "step": 92650 + }, + { + "epoch": 3.92, + "learning_rate": 0.00040923487883991606, + "loss": 0.479, + "step": 92660 + }, + { + "epoch": 3.92, + "learning_rate": 0.0004091924781106236, + "loss": 0.5505, + "step": 92670 + }, + { + "epoch": 3.92, + "learning_rate": 0.00040915007738133094, + "loss": 0.4715, + "step": 92680 + }, + { + "epoch": 3.92, + "learning_rate": 0.00040910767665203846, + "loss": 0.4846, + "step": 92690 + }, + { + "epoch": 3.92, + "learning_rate": 0.00040906527592274593, + "loss": 0.5285, + "step": 92700 + }, + { + "epoch": 3.92, + "learning_rate": 0.00040902287519345334, + "loss": 0.4925, + "step": 92710 + }, + { + "epoch": 3.92, + "learning_rate": 0.0004089804744641608, + "loss": 0.5246, + "step": 92720 + }, + { + "epoch": 3.92, + "learning_rate": 0.0004089380737348683, + "loss": 0.5018, + "step": 92730 + }, + { + "epoch": 3.92, + "learning_rate": 0.0004088956730055757, + "loss": 0.4529, + "step": 92740 + }, + { + "epoch": 3.92, + "learning_rate": 0.0004088532722762832, + "loss": 0.6586, + "step": 92750 + }, + { + "epoch": 3.92, + "learning_rate": 0.0004088108715469907, + "loss": 0.5618, + "step": 92760 + }, + { + "epoch": 3.92, + "learning_rate": 0.0004087684708176981, + "loss": 0.5165, + "step": 92770 + }, + { + "epoch": 3.92, + "learning_rate": 0.00040872607008840555, + "loss": 0.5486, + "step": 92780 + }, + { + "epoch": 3.92, + "learning_rate": 0.00040868366935911297, + "loss": 0.4767, + "step": 92790 + }, + { + "epoch": 3.92, + "learning_rate": 0.00040864126862982043, + "loss": 0.4931, + "step": 92800 + }, + { + "epoch": 3.92, + "learning_rate": 0.0004085988679005279, + "loss": 0.5714, + "step": 92810 + }, + { + "epoch": 3.93, + "learning_rate": 0.0004085564671712353, + "loss": 0.5342, + "step": 92820 + }, + { + "epoch": 3.93, + "learning_rate": 0.00040851406644194283, + "loss": 0.5561, + "step": 92830 + }, + { + "epoch": 3.93, + "learning_rate": 0.0004084716657126503, + "loss": 0.5219, + "step": 92840 + }, + { + "epoch": 3.93, + "learning_rate": 0.0004084292649833577, + "loss": 0.4911, + "step": 92850 + }, + { + "epoch": 3.93, + "learning_rate": 0.0004083868642540652, + "loss": 0.5457, + "step": 92860 + }, + { + "epoch": 3.93, + "learning_rate": 0.00040834446352477265, + "loss": 0.5283, + "step": 92870 + }, + { + "epoch": 3.93, + "learning_rate": 0.00040830206279548006, + "loss": 0.5557, + "step": 92880 + }, + { + "epoch": 3.93, + "learning_rate": 0.0004082596620661875, + "loss": 0.558, + "step": 92890 + }, + { + "epoch": 3.93, + "learning_rate": 0.00040821726133689505, + "loss": 0.5103, + "step": 92900 + }, + { + "epoch": 3.93, + "learning_rate": 0.00040817486060760246, + "loss": 0.5264, + "step": 92910 + }, + { + "epoch": 3.93, + "learning_rate": 0.00040813245987830993, + "loss": 0.5268, + "step": 92920 + }, + { + "epoch": 3.93, + "learning_rate": 0.0004080900591490174, + "loss": 0.5532, + "step": 92930 + }, + { + "epoch": 3.93, + "learning_rate": 0.0004080476584197248, + "loss": 0.4587, + "step": 92940 + }, + { + "epoch": 3.93, + "learning_rate": 0.0004080052576904323, + "loss": 0.5493, + "step": 92950 + }, + { + "epoch": 3.93, + "learning_rate": 0.0004079628569611398, + "loss": 0.5359, + "step": 92960 + }, + { + "epoch": 3.93, + "learning_rate": 0.0004079204562318472, + "loss": 0.6037, + "step": 92970 + }, + { + "epoch": 3.93, + "learning_rate": 0.0004078780555025547, + "loss": 0.5949, + "step": 92980 + }, + { + "epoch": 3.93, + "learning_rate": 0.00040783565477326214, + "loss": 0.5163, + "step": 92990 + }, + { + "epoch": 3.93, + "learning_rate": 0.00040779325404396955, + "loss": 0.5138, + "step": 93000 + }, + { + "epoch": 3.93, + "learning_rate": 0.000407750853314677, + "loss": 0.6217, + "step": 93010 + }, + { + "epoch": 3.93, + "learning_rate": 0.00040770845258538454, + "loss": 0.5473, + "step": 93020 + }, + { + "epoch": 3.93, + "learning_rate": 0.0004076660518560919, + "loss": 0.5168, + "step": 93030 + }, + { + "epoch": 3.93, + "learning_rate": 0.0004076236511267994, + "loss": 0.4693, + "step": 93040 + }, + { + "epoch": 3.93, + "learning_rate": 0.0004075812503975069, + "loss": 0.5514, + "step": 93050 + }, + { + "epoch": 3.94, + "learning_rate": 0.0004075388496682143, + "loss": 0.5046, + "step": 93060 + }, + { + "epoch": 3.94, + "learning_rate": 0.00040749644893892177, + "loss": 0.5455, + "step": 93070 + }, + { + "epoch": 3.94, + "learning_rate": 0.0004074540482096293, + "loss": 0.5842, + "step": 93080 + }, + { + "epoch": 3.94, + "learning_rate": 0.00040741164748033665, + "loss": 0.5222, + "step": 93090 + }, + { + "epoch": 3.94, + "learning_rate": 0.00040736924675104417, + "loss": 0.5646, + "step": 93100 + }, + { + "epoch": 3.94, + "learning_rate": 0.0004073268460217515, + "loss": 0.5892, + "step": 93110 + }, + { + "epoch": 3.94, + "learning_rate": 0.00040728444529245905, + "loss": 0.5407, + "step": 93120 + }, + { + "epoch": 3.94, + "learning_rate": 0.0004072420445631665, + "loss": 0.6352, + "step": 93130 + }, + { + "epoch": 3.94, + "learning_rate": 0.0004071996438338739, + "loss": 0.5534, + "step": 93140 + }, + { + "epoch": 3.94, + "learning_rate": 0.0004071572431045814, + "loss": 0.577, + "step": 93150 + }, + { + "epoch": 3.94, + "learning_rate": 0.0004071148423752889, + "loss": 0.4931, + "step": 93160 + }, + { + "epoch": 3.94, + "learning_rate": 0.00040707244164599627, + "loss": 0.4565, + "step": 93170 + }, + { + "epoch": 3.94, + "learning_rate": 0.0004070300409167038, + "loss": 0.4646, + "step": 93180 + }, + { + "epoch": 3.94, + "learning_rate": 0.00040698764018741126, + "loss": 0.5615, + "step": 93190 + }, + { + "epoch": 3.94, + "learning_rate": 0.00040694523945811867, + "loss": 0.595, + "step": 93200 + }, + { + "epoch": 3.94, + "learning_rate": 0.00040690283872882614, + "loss": 0.5281, + "step": 93210 + }, + { + "epoch": 3.94, + "learning_rate": 0.00040686043799953366, + "loss": 0.6543, + "step": 93220 + }, + { + "epoch": 3.94, + "learning_rate": 0.000406818037270241, + "loss": 0.4985, + "step": 93230 + }, + { + "epoch": 3.94, + "learning_rate": 0.00040677563654094854, + "loss": 0.4817, + "step": 93240 + }, + { + "epoch": 3.94, + "learning_rate": 0.000406733235811656, + "loss": 0.4952, + "step": 93250 + }, + { + "epoch": 3.94, + "learning_rate": 0.0004066908350823634, + "loss": 0.5003, + "step": 93260 + }, + { + "epoch": 3.94, + "learning_rate": 0.0004066484343530709, + "loss": 0.6419, + "step": 93270 + }, + { + "epoch": 3.94, + "learning_rate": 0.0004066060336237784, + "loss": 0.5162, + "step": 93280 + }, + { + "epoch": 3.95, + "learning_rate": 0.00040656363289448576, + "loss": 0.489, + "step": 93290 + }, + { + "epoch": 3.95, + "learning_rate": 0.0004065212321651933, + "loss": 0.5209, + "step": 93300 + }, + { + "epoch": 3.95, + "learning_rate": 0.00040647883143590075, + "loss": 0.5228, + "step": 93310 + }, + { + "epoch": 3.95, + "learning_rate": 0.00040643643070660817, + "loss": 0.5394, + "step": 93320 + }, + { + "epoch": 3.95, + "learning_rate": 0.00040639402997731563, + "loss": 0.4801, + "step": 93330 + }, + { + "epoch": 3.95, + "learning_rate": 0.0004063516292480231, + "loss": 0.4803, + "step": 93340 + }, + { + "epoch": 3.95, + "learning_rate": 0.0004063092285187305, + "loss": 0.521, + "step": 93350 + }, + { + "epoch": 3.95, + "learning_rate": 0.00040626682778943803, + "loss": 0.5539, + "step": 93360 + }, + { + "epoch": 3.95, + "learning_rate": 0.0004062244270601455, + "loss": 0.5832, + "step": 93370 + }, + { + "epoch": 3.95, + "learning_rate": 0.0004061820263308529, + "loss": 0.5612, + "step": 93380 + }, + { + "epoch": 3.95, + "learning_rate": 0.0004061396256015604, + "loss": 0.512, + "step": 93390 + }, + { + "epoch": 3.95, + "learning_rate": 0.0004060972248722678, + "loss": 0.536, + "step": 93400 + }, + { + "epoch": 3.95, + "learning_rate": 0.00040605482414297526, + "loss": 0.5223, + "step": 93410 + }, + { + "epoch": 3.95, + "learning_rate": 0.0004060124234136828, + "loss": 0.5754, + "step": 93420 + }, + { + "epoch": 3.95, + "learning_rate": 0.00040597002268439014, + "loss": 0.5612, + "step": 93430 + }, + { + "epoch": 3.95, + "learning_rate": 0.00040592762195509766, + "loss": 0.4364, + "step": 93440 + }, + { + "epoch": 3.95, + "learning_rate": 0.0004058852212258051, + "loss": 0.5766, + "step": 93450 + }, + { + "epoch": 3.95, + "learning_rate": 0.00040584282049651254, + "loss": 0.5721, + "step": 93460 + }, + { + "epoch": 3.95, + "learning_rate": 0.00040580041976722, + "loss": 0.5364, + "step": 93470 + }, + { + "epoch": 3.95, + "learning_rate": 0.00040575801903792747, + "loss": 0.6067, + "step": 93480 + }, + { + "epoch": 3.95, + "learning_rate": 0.0004057156183086349, + "loss": 0.631, + "step": 93490 + }, + { + "epoch": 3.95, + "learning_rate": 0.0004056732175793424, + "loss": 0.5805, + "step": 93500 + }, + { + "epoch": 3.95, + "learning_rate": 0.00040563081685004987, + "loss": 0.5113, + "step": 93510 + }, + { + "epoch": 3.95, + "learning_rate": 0.0004055884161207573, + "loss": 0.5727, + "step": 93520 + }, + { + "epoch": 3.96, + "learning_rate": 0.00040554601539146475, + "loss": 0.472, + "step": 93530 + }, + { + "epoch": 3.96, + "learning_rate": 0.0004055036146621722, + "loss": 0.4793, + "step": 93540 + }, + { + "epoch": 3.96, + "learning_rate": 0.00040546121393287963, + "loss": 0.5495, + "step": 93550 + }, + { + "epoch": 3.96, + "learning_rate": 0.0004054188132035871, + "loss": 0.4998, + "step": 93560 + }, + { + "epoch": 3.96, + "learning_rate": 0.0004053764124742946, + "loss": 0.5746, + "step": 93570 + }, + { + "epoch": 3.96, + "learning_rate": 0.00040533401174500203, + "loss": 0.55, + "step": 93580 + }, + { + "epoch": 3.96, + "learning_rate": 0.0004052916110157095, + "loss": 0.6305, + "step": 93590 + }, + { + "epoch": 3.96, + "learning_rate": 0.00040524921028641696, + "loss": 0.5137, + "step": 93600 + }, + { + "epoch": 3.96, + "learning_rate": 0.0004052068095571244, + "loss": 0.5035, + "step": 93610 + }, + { + "epoch": 3.96, + "learning_rate": 0.00040516440882783184, + "loss": 0.5134, + "step": 93620 + }, + { + "epoch": 3.96, + "learning_rate": 0.00040512200809853936, + "loss": 0.5068, + "step": 93630 + }, + { + "epoch": 3.96, + "learning_rate": 0.0004050796073692467, + "loss": 0.5489, + "step": 93640 + }, + { + "epoch": 3.96, + "learning_rate": 0.00040503720663995424, + "loss": 0.5548, + "step": 93650 + }, + { + "epoch": 3.96, + "learning_rate": 0.0004049948059106617, + "loss": 0.5947, + "step": 93660 + }, + { + "epoch": 3.96, + "learning_rate": 0.0004049524051813691, + "loss": 0.501, + "step": 93670 + }, + { + "epoch": 3.96, + "learning_rate": 0.0004049100044520766, + "loss": 0.5387, + "step": 93680 + }, + { + "epoch": 3.96, + "learning_rate": 0.0004048676037227841, + "loss": 0.6137, + "step": 93690 + }, + { + "epoch": 3.96, + "learning_rate": 0.00040482520299349147, + "loss": 0.5107, + "step": 93700 + }, + { + "epoch": 3.96, + "learning_rate": 0.000404782802264199, + "loss": 0.5551, + "step": 93710 + }, + { + "epoch": 3.96, + "learning_rate": 0.0004047404015349064, + "loss": 0.5107, + "step": 93720 + }, + { + "epoch": 3.96, + "learning_rate": 0.00040469800080561387, + "loss": 0.4905, + "step": 93730 + }, + { + "epoch": 3.96, + "learning_rate": 0.00040465560007632134, + "loss": 0.5181, + "step": 93740 + }, + { + "epoch": 3.96, + "learning_rate": 0.00040461319934702875, + "loss": 0.5367, + "step": 93750 + }, + { + "epoch": 3.96, + "learning_rate": 0.0004045707986177362, + "loss": 0.4941, + "step": 93760 + }, + { + "epoch": 3.97, + "learning_rate": 0.00040452839788844374, + "loss": 0.5366, + "step": 93770 + }, + { + "epoch": 3.97, + "learning_rate": 0.0004044859971591511, + "loss": 0.4319, + "step": 93780 + }, + { + "epoch": 3.97, + "learning_rate": 0.0004044435964298586, + "loss": 0.6072, + "step": 93790 + }, + { + "epoch": 3.97, + "learning_rate": 0.0004044011957005661, + "loss": 0.5317, + "step": 93800 + }, + { + "epoch": 3.97, + "learning_rate": 0.0004043587949712735, + "loss": 0.7465, + "step": 93810 + }, + { + "epoch": 3.97, + "learning_rate": 0.00040431639424198096, + "loss": 0.5089, + "step": 93820 + }, + { + "epoch": 3.97, + "learning_rate": 0.0004042739935126885, + "loss": 0.5102, + "step": 93830 + }, + { + "epoch": 3.97, + "learning_rate": 0.00040423159278339584, + "loss": 0.4869, + "step": 93840 + }, + { + "epoch": 3.97, + "learning_rate": 0.00040418919205410336, + "loss": 0.4816, + "step": 93850 + }, + { + "epoch": 3.97, + "learning_rate": 0.00040414679132481083, + "loss": 0.5329, + "step": 93860 + }, + { + "epoch": 3.97, + "learning_rate": 0.00040410439059551824, + "loss": 0.7165, + "step": 93870 + }, + { + "epoch": 3.97, + "learning_rate": 0.0004040619898662257, + "loss": 0.5129, + "step": 93880 + }, + { + "epoch": 3.97, + "learning_rate": 0.00040401958913693323, + "loss": 0.5452, + "step": 93890 + }, + { + "epoch": 3.97, + "learning_rate": 0.0004039771884076406, + "loss": 0.5669, + "step": 93900 + }, + { + "epoch": 3.97, + "learning_rate": 0.0004039347876783481, + "loss": 0.462, + "step": 93910 + }, + { + "epoch": 3.97, + "learning_rate": 0.0004038923869490556, + "loss": 0.5452, + "step": 93920 + }, + { + "epoch": 3.97, + "learning_rate": 0.000403849986219763, + "loss": 0.5644, + "step": 93930 + }, + { + "epoch": 3.97, + "learning_rate": 0.00040380758549047046, + "loss": 0.5424, + "step": 93940 + }, + { + "epoch": 3.97, + "learning_rate": 0.000403765184761178, + "loss": 0.524, + "step": 93950 + }, + { + "epoch": 3.97, + "learning_rate": 0.00040372278403188533, + "loss": 0.5129, + "step": 93960 + }, + { + "epoch": 3.97, + "learning_rate": 0.00040368038330259286, + "loss": 0.4136, + "step": 93970 + }, + { + "epoch": 3.97, + "learning_rate": 0.0004036379825733003, + "loss": 0.556, + "step": 93980 + }, + { + "epoch": 3.97, + "learning_rate": 0.00040359558184400774, + "loss": 0.4477, + "step": 93990 + }, + { + "epoch": 3.98, + "learning_rate": 0.0004035531811147152, + "loss": 0.4738, + "step": 94000 + }, + { + "epoch": 3.98, + "learning_rate": 0.0004035107803854226, + "loss": 0.5402, + "step": 94010 + }, + { + "epoch": 3.98, + "learning_rate": 0.0004034683796561301, + "loss": 0.5599, + "step": 94020 + }, + { + "epoch": 3.98, + "learning_rate": 0.0004034259789268376, + "loss": 0.5062, + "step": 94030 + }, + { + "epoch": 3.98, + "learning_rate": 0.00040338357819754496, + "loss": 0.4681, + "step": 94040 + }, + { + "epoch": 3.98, + "learning_rate": 0.0004033411774682525, + "loss": 0.5212, + "step": 94050 + }, + { + "epoch": 3.98, + "learning_rate": 0.00040329877673895995, + "loss": 0.569, + "step": 94060 + }, + { + "epoch": 3.98, + "learning_rate": 0.00040325637600966736, + "loss": 0.4875, + "step": 94070 + }, + { + "epoch": 3.98, + "learning_rate": 0.00040321397528037483, + "loss": 0.4897, + "step": 94080 + }, + { + "epoch": 3.98, + "learning_rate": 0.0004031715745510823, + "loss": 0.5268, + "step": 94090 + }, + { + "epoch": 3.98, + "learning_rate": 0.0004031291738217897, + "loss": 0.4776, + "step": 94100 + }, + { + "epoch": 3.98, + "learning_rate": 0.00040308677309249723, + "loss": 0.539, + "step": 94110 + }, + { + "epoch": 3.98, + "learning_rate": 0.0004030443723632047, + "loss": 0.6051, + "step": 94120 + }, + { + "epoch": 3.98, + "learning_rate": 0.0004030019716339121, + "loss": 0.5553, + "step": 94130 + }, + { + "epoch": 3.98, + "learning_rate": 0.0004029595709046196, + "loss": 0.4881, + "step": 94140 + }, + { + "epoch": 3.98, + "learning_rate": 0.00040291717017532704, + "loss": 0.5244, + "step": 94150 + }, + { + "epoch": 3.98, + "learning_rate": 0.00040287476944603445, + "loss": 0.4655, + "step": 94160 + }, + { + "epoch": 3.98, + "learning_rate": 0.000402832368716742, + "loss": 0.4705, + "step": 94170 + }, + { + "epoch": 3.98, + "learning_rate": 0.00040278996798744944, + "loss": 0.4914, + "step": 94180 + }, + { + "epoch": 3.98, + "learning_rate": 0.00040274756725815685, + "loss": 0.5742, + "step": 94190 + }, + { + "epoch": 3.98, + "learning_rate": 0.0004027051665288643, + "loss": 0.5076, + "step": 94200 + }, + { + "epoch": 3.98, + "learning_rate": 0.0004026627657995718, + "loss": 0.5307, + "step": 94210 + }, + { + "epoch": 3.98, + "learning_rate": 0.0004026203650702792, + "loss": 0.5031, + "step": 94220 + }, + { + "epoch": 3.98, + "learning_rate": 0.00040257796434098667, + "loss": 0.5419, + "step": 94230 + }, + { + "epoch": 3.99, + "learning_rate": 0.0004025355636116942, + "loss": 0.5102, + "step": 94240 + }, + { + "epoch": 3.99, + "learning_rate": 0.0004024931628824016, + "loss": 0.5271, + "step": 94250 + }, + { + "epoch": 3.99, + "learning_rate": 0.00040245076215310907, + "loss": 0.5472, + "step": 94260 + }, + { + "epoch": 3.99, + "learning_rate": 0.00040240836142381653, + "loss": 0.5157, + "step": 94270 + }, + { + "epoch": 3.99, + "learning_rate": 0.00040236596069452395, + "loss": 0.5035, + "step": 94280 + }, + { + "epoch": 3.99, + "learning_rate": 0.0004023235599652314, + "loss": 0.4929, + "step": 94290 + }, + { + "epoch": 3.99, + "learning_rate": 0.00040228115923593893, + "loss": 0.4663, + "step": 94300 + }, + { + "epoch": 3.99, + "learning_rate": 0.0004022387585066463, + "loss": 0.5273, + "step": 94310 + }, + { + "epoch": 3.99, + "learning_rate": 0.0004021963577773538, + "loss": 0.499, + "step": 94320 + }, + { + "epoch": 3.99, + "learning_rate": 0.0004021539570480612, + "loss": 0.562, + "step": 94330 + }, + { + "epoch": 3.99, + "learning_rate": 0.0004021115563187687, + "loss": 0.5678, + "step": 94340 + }, + { + "epoch": 3.99, + "learning_rate": 0.00040206915558947616, + "loss": 0.5679, + "step": 94350 + }, + { + "epoch": 3.99, + "learning_rate": 0.00040202675486018357, + "loss": 0.4466, + "step": 94360 + }, + { + "epoch": 3.99, + "learning_rate": 0.00040198435413089104, + "loss": 0.4638, + "step": 94370 + }, + { + "epoch": 3.99, + "learning_rate": 0.00040194195340159856, + "loss": 0.4542, + "step": 94380 + }, + { + "epoch": 3.99, + "learning_rate": 0.0004018995526723059, + "loss": 0.5485, + "step": 94390 + }, + { + "epoch": 3.99, + "learning_rate": 0.00040185715194301344, + "loss": 0.6233, + "step": 94400 + }, + { + "epoch": 3.99, + "learning_rate": 0.0004018147512137209, + "loss": 0.4927, + "step": 94410 + }, + { + "epoch": 3.99, + "learning_rate": 0.0004017723504844283, + "loss": 0.48, + "step": 94420 + }, + { + "epoch": 3.99, + "learning_rate": 0.0004017299497551358, + "loss": 0.6609, + "step": 94430 + }, + { + "epoch": 3.99, + "learning_rate": 0.0004016875490258433, + "loss": 0.5263, + "step": 94440 + }, + { + "epoch": 3.99, + "learning_rate": 0.00040164514829655067, + "loss": 0.5226, + "step": 94450 + }, + { + "epoch": 3.99, + "learning_rate": 0.0004016027475672582, + "loss": 0.6089, + "step": 94460 + }, + { + "epoch": 4.0, + "learning_rate": 0.00040156034683796565, + "loss": 0.5618, + "step": 94470 + }, + { + "epoch": 4.0, + "learning_rate": 0.00040151794610867307, + "loss": 0.5268, + "step": 94480 + }, + { + "epoch": 4.0, + "learning_rate": 0.00040147554537938053, + "loss": 0.5104, + "step": 94490 + }, + { + "epoch": 4.0, + "learning_rate": 0.00040143314465008805, + "loss": 0.5089, + "step": 94500 + }, + { + "epoch": 4.0, + "learning_rate": 0.0004013907439207954, + "loss": 0.4649, + "step": 94510 + }, + { + "epoch": 4.0, + "learning_rate": 0.00040134834319150293, + "loss": 0.5386, + "step": 94520 + }, + { + "epoch": 4.0, + "learning_rate": 0.0004013059424622104, + "loss": 0.6264, + "step": 94530 + }, + { + "epoch": 4.0, + "learning_rate": 0.0004012635417329178, + "loss": 0.5977, + "step": 94540 + }, + { + "epoch": 4.0, + "learning_rate": 0.0004012211410036253, + "loss": 0.4853, + "step": 94550 + }, + { + "epoch": 4.0, + "learning_rate": 0.0004011787402743328, + "loss": 0.4641, + "step": 94560 + }, + { + "epoch": 4.0, + "learning_rate": 0.00040113633954504016, + "loss": 0.5532, + "step": 94570 + }, + { + "epoch": 4.0, + "learning_rate": 0.0004010939388157477, + "loss": 0.5826, + "step": 94580 + }, + { + "epoch": 4.0, + "learning_rate": 0.00040105153808645515, + "loss": 0.573, + "step": 94590 + }, + { + "epoch": 4.0, + "learning_rate": 0.00040100913735716256, + "loss": 0.5345, + "step": 94600 + }, + { + "epoch": 4.0, + "learning_rate": 0.00040096673662787, + "loss": 0.4824, + "step": 94610 + }, + { + "epoch": 4.0, + "learning_rate": 0.0004009243358985775, + "loss": 0.5414, + "step": 94620 + }, + { + "epoch": 4.0, + "learning_rate": 0.0004008819351692849, + "loss": 0.4844, + "step": 94630 + }, + { + "epoch": 4.0, + "learning_rate": 0.0004008395344399924, + "loss": 0.4921, + "step": 94640 + }, + { + "epoch": 4.0, + "learning_rate": 0.0004007971337106998, + "loss": 0.4602, + "step": 94650 + }, + { + "epoch": 4.0, + "learning_rate": 0.0004007547329814073, + "loss": 0.4741, + "step": 94660 + }, + { + "epoch": 4.0, + "learning_rate": 0.00040071233225211477, + "loss": 0.4545, + "step": 94670 + }, + { + "epoch": 4.0, + "learning_rate": 0.0004006699315228222, + "loss": 0.4483, + "step": 94680 + }, + { + "epoch": 4.0, + "learning_rate": 0.00040062753079352965, + "loss": 0.4483, + "step": 94690 + }, + { + "epoch": 4.0, + "learning_rate": 0.00040058513006423717, + "loss": 0.4667, + "step": 94700 + }, + { + "epoch": 4.01, + "learning_rate": 0.00040054272933494453, + "loss": 0.4741, + "step": 94710 + }, + { + "epoch": 4.01, + "learning_rate": 0.00040050032860565205, + "loss": 0.4716, + "step": 94720 + }, + { + "epoch": 4.01, + "learning_rate": 0.0004004579278763595, + "loss": 0.4809, + "step": 94730 + }, + { + "epoch": 4.01, + "learning_rate": 0.00040041552714706693, + "loss": 0.3972, + "step": 94740 + }, + { + "epoch": 4.01, + "learning_rate": 0.0004003731264177744, + "loss": 0.4351, + "step": 94750 + }, + { + "epoch": 4.01, + "learning_rate": 0.00040033072568848186, + "loss": 0.3956, + "step": 94760 + }, + { + "epoch": 4.01, + "learning_rate": 0.0004002883249591893, + "loss": 0.5638, + "step": 94770 + }, + { + "epoch": 4.01, + "learning_rate": 0.0004002459242298968, + "loss": 0.4518, + "step": 94780 + }, + { + "epoch": 4.01, + "learning_rate": 0.00040020352350060427, + "loss": 0.4385, + "step": 94790 + }, + { + "epoch": 4.01, + "learning_rate": 0.0004001611227713117, + "loss": 0.5426, + "step": 94800 + }, + { + "epoch": 4.01, + "learning_rate": 0.00040011872204201914, + "loss": 0.4678, + "step": 94810 + }, + { + "epoch": 4.01, + "learning_rate": 0.0004000763213127266, + "loss": 0.496, + "step": 94820 + }, + { + "epoch": 4.01, + "learning_rate": 0.000400033920583434, + "loss": 0.4843, + "step": 94830 + }, + { + "epoch": 4.01, + "learning_rate": 0.0003999915198541415, + "loss": 0.4491, + "step": 94840 + }, + { + "epoch": 4.01, + "learning_rate": 0.00039994911912484896, + "loss": 0.4477, + "step": 94850 + }, + { + "epoch": 4.01, + "learning_rate": 0.0003999067183955564, + "loss": 0.4479, + "step": 94860 + }, + { + "epoch": 4.01, + "learning_rate": 0.0003998643176662639, + "loss": 0.4569, + "step": 94870 + }, + { + "epoch": 4.01, + "learning_rate": 0.0003998219169369713, + "loss": 0.5829, + "step": 94880 + }, + { + "epoch": 4.01, + "learning_rate": 0.00039977951620767877, + "loss": 0.444, + "step": 94890 + }, + { + "epoch": 4.01, + "learning_rate": 0.00039973711547838624, + "loss": 0.4692, + "step": 94900 + }, + { + "epoch": 4.01, + "learning_rate": 0.0003996947147490937, + "loss": 0.4274, + "step": 94910 + }, + { + "epoch": 4.01, + "learning_rate": 0.00039965231401980117, + "loss": 0.4928, + "step": 94920 + }, + { + "epoch": 4.01, + "learning_rate": 0.00039960991329050864, + "loss": 0.596, + "step": 94930 + }, + { + "epoch": 4.01, + "learning_rate": 0.00039956751256121605, + "loss": 0.4284, + "step": 94940 + }, + { + "epoch": 4.02, + "learning_rate": 0.0003995251118319235, + "loss": 0.4383, + "step": 94950 + }, + { + "epoch": 4.02, + "learning_rate": 0.000399482711102631, + "loss": 0.4631, + "step": 94960 + }, + { + "epoch": 4.02, + "learning_rate": 0.00039944031037333845, + "loss": 0.4764, + "step": 94970 + }, + { + "epoch": 4.02, + "learning_rate": 0.00039939790964404586, + "loss": 0.5072, + "step": 94980 + }, + { + "epoch": 4.02, + "learning_rate": 0.0003993555089147534, + "loss": 0.4626, + "step": 94990 + }, + { + "epoch": 4.02, + "learning_rate": 0.0003993131081854608, + "loss": 0.4766, + "step": 95000 + }, + { + "epoch": 4.02, + "learning_rate": 0.00039927070745616826, + "loss": 0.4916, + "step": 95010 + }, + { + "epoch": 4.02, + "learning_rate": 0.0003992283067268757, + "loss": 0.4339, + "step": 95020 + }, + { + "epoch": 4.02, + "learning_rate": 0.0003991859059975832, + "loss": 0.5471, + "step": 95030 + }, + { + "epoch": 4.02, + "learning_rate": 0.0003991435052682906, + "loss": 0.4572, + "step": 95040 + }, + { + "epoch": 4.02, + "learning_rate": 0.0003991011045389981, + "loss": 0.485, + "step": 95050 + }, + { + "epoch": 4.02, + "learning_rate": 0.00039905870380970554, + "loss": 0.4821, + "step": 95060 + }, + { + "epoch": 4.02, + "learning_rate": 0.000399016303080413, + "loss": 0.5379, + "step": 95070 + }, + { + "epoch": 4.02, + "learning_rate": 0.0003989739023511204, + "loss": 0.4164, + "step": 95080 + }, + { + "epoch": 4.02, + "learning_rate": 0.00039893150162182794, + "loss": 0.3894, + "step": 95090 + }, + { + "epoch": 4.02, + "learning_rate": 0.00039888910089253536, + "loss": 0.4528, + "step": 95100 + }, + { + "epoch": 4.02, + "learning_rate": 0.0003988467001632428, + "loss": 0.5317, + "step": 95110 + }, + { + "epoch": 4.02, + "learning_rate": 0.0003988042994339503, + "loss": 0.4585, + "step": 95120 + }, + { + "epoch": 4.02, + "learning_rate": 0.00039876189870465776, + "loss": 0.528, + "step": 95130 + }, + { + "epoch": 4.02, + "learning_rate": 0.00039871949797536517, + "loss": 0.4344, + "step": 95140 + }, + { + "epoch": 4.02, + "learning_rate": 0.0003986770972460727, + "loss": 0.584, + "step": 95150 + }, + { + "epoch": 4.02, + "learning_rate": 0.0003986346965167801, + "loss": 0.4631, + "step": 95160 + }, + { + "epoch": 4.02, + "learning_rate": 0.00039859229578748757, + "loss": 0.4432, + "step": 95170 + }, + { + "epoch": 4.03, + "learning_rate": 0.000398549895058195, + "loss": 0.5725, + "step": 95180 + }, + { + "epoch": 4.03, + "learning_rate": 0.0003985074943289025, + "loss": 0.4526, + "step": 95190 + }, + { + "epoch": 4.03, + "learning_rate": 0.0003984650935996099, + "loss": 0.5108, + "step": 95200 + }, + { + "epoch": 4.03, + "learning_rate": 0.0003984226928703174, + "loss": 0.4305, + "step": 95210 + }, + { + "epoch": 4.03, + "learning_rate": 0.00039838029214102485, + "loss": 0.4318, + "step": 95220 + }, + { + "epoch": 4.03, + "learning_rate": 0.0003983378914117323, + "loss": 0.5558, + "step": 95230 + }, + { + "epoch": 4.03, + "learning_rate": 0.00039829549068243973, + "loss": 0.4631, + "step": 95240 + }, + { + "epoch": 4.03, + "learning_rate": 0.00039825308995314725, + "loss": 0.5002, + "step": 95250 + }, + { + "epoch": 4.03, + "learning_rate": 0.00039821068922385466, + "loss": 0.4408, + "step": 95260 + }, + { + "epoch": 4.03, + "learning_rate": 0.00039816828849456213, + "loss": 0.4102, + "step": 95270 + }, + { + "epoch": 4.03, + "learning_rate": 0.0003981258877652696, + "loss": 0.5139, + "step": 95280 + }, + { + "epoch": 4.03, + "learning_rate": 0.00039808348703597706, + "loss": 0.4366, + "step": 95290 + }, + { + "epoch": 4.03, + "learning_rate": 0.0003980410863066845, + "loss": 0.4692, + "step": 95300 + }, + { + "epoch": 4.03, + "learning_rate": 0.000397998685577392, + "loss": 0.4697, + "step": 95310 + }, + { + "epoch": 4.03, + "learning_rate": 0.0003979562848480994, + "loss": 0.3841, + "step": 95320 + }, + { + "epoch": 4.03, + "learning_rate": 0.0003979138841188069, + "loss": 0.4393, + "step": 95330 + }, + { + "epoch": 4.03, + "learning_rate": 0.0003978714833895143, + "loss": 0.4713, + "step": 95340 + }, + { + "epoch": 4.03, + "learning_rate": 0.0003978290826602218, + "loss": 0.4351, + "step": 95350 + }, + { + "epoch": 4.03, + "learning_rate": 0.0003977866819309292, + "loss": 0.4879, + "step": 95360 + }, + { + "epoch": 4.03, + "learning_rate": 0.0003977442812016367, + "loss": 0.5067, + "step": 95370 + }, + { + "epoch": 4.03, + "learning_rate": 0.00039770188047234416, + "loss": 0.4959, + "step": 95380 + }, + { + "epoch": 4.03, + "learning_rate": 0.0003976594797430516, + "loss": 0.3938, + "step": 95390 + }, + { + "epoch": 4.03, + "learning_rate": 0.00039761707901375903, + "loss": 0.452, + "step": 95400 + }, + { + "epoch": 4.03, + "learning_rate": 0.0003975746782844665, + "loss": 0.4825, + "step": 95410 + }, + { + "epoch": 4.04, + "learning_rate": 0.00039753227755517397, + "loss": 0.3996, + "step": 95420 + }, + { + "epoch": 4.04, + "learning_rate": 0.00039748987682588144, + "loss": 0.5005, + "step": 95430 + }, + { + "epoch": 4.04, + "learning_rate": 0.0003974474760965889, + "loss": 0.4829, + "step": 95440 + }, + { + "epoch": 4.04, + "learning_rate": 0.00039740507536729637, + "loss": 0.5097, + "step": 95450 + }, + { + "epoch": 4.04, + "learning_rate": 0.0003973626746380038, + "loss": 0.4815, + "step": 95460 + }, + { + "epoch": 4.04, + "learning_rate": 0.00039732027390871125, + "loss": 0.442, + "step": 95470 + }, + { + "epoch": 4.04, + "learning_rate": 0.0003972778731794187, + "loss": 0.4451, + "step": 95480 + }, + { + "epoch": 4.04, + "learning_rate": 0.0003972354724501262, + "loss": 0.4121, + "step": 95490 + }, + { + "epoch": 4.04, + "learning_rate": 0.0003971930717208336, + "loss": 0.4242, + "step": 95500 + }, + { + "epoch": 4.04, + "learning_rate": 0.00039715067099154106, + "loss": 0.5157, + "step": 95510 + }, + { + "epoch": 4.04, + "learning_rate": 0.00039710827026224853, + "loss": 0.4918, + "step": 95520 + }, + { + "epoch": 4.04, + "learning_rate": 0.000397065869532956, + "loss": 0.5394, + "step": 95530 + }, + { + "epoch": 4.04, + "learning_rate": 0.00039702346880366346, + "loss": 0.4983, + "step": 95540 + }, + { + "epoch": 4.04, + "learning_rate": 0.0003969810680743709, + "loss": 0.5035, + "step": 95550 + }, + { + "epoch": 4.04, + "learning_rate": 0.00039693866734507834, + "loss": 0.4643, + "step": 95560 + }, + { + "epoch": 4.04, + "learning_rate": 0.0003968962666157858, + "loss": 0.422, + "step": 95570 + }, + { + "epoch": 4.04, + "learning_rate": 0.0003968538658864933, + "loss": 0.421, + "step": 95580 + }, + { + "epoch": 4.04, + "learning_rate": 0.0003968114651572007, + "loss": 0.4223, + "step": 95590 + }, + { + "epoch": 4.04, + "learning_rate": 0.0003967690644279082, + "loss": 0.5198, + "step": 95600 + }, + { + "epoch": 4.04, + "learning_rate": 0.0003967266636986156, + "loss": 0.5642, + "step": 95610 + }, + { + "epoch": 4.04, + "learning_rate": 0.0003966842629693231, + "loss": 0.4865, + "step": 95620 + }, + { + "epoch": 4.04, + "learning_rate": 0.0003966418622400305, + "loss": 0.5249, + "step": 95630 + }, + { + "epoch": 4.04, + "learning_rate": 0.000396599461510738, + "loss": 0.4655, + "step": 95640 + }, + { + "epoch": 4.04, + "learning_rate": 0.00039655706078144543, + "loss": 0.4124, + "step": 95650 + }, + { + "epoch": 4.05, + "learning_rate": 0.0003965146600521529, + "loss": 0.4791, + "step": 95660 + }, + { + "epoch": 4.05, + "learning_rate": 0.00039647225932286037, + "loss": 0.3911, + "step": 95670 + }, + { + "epoch": 4.05, + "learning_rate": 0.00039642985859356783, + "loss": 0.5073, + "step": 95680 + }, + { + "epoch": 4.05, + "learning_rate": 0.00039638745786427525, + "loss": 0.4179, + "step": 95690 + }, + { + "epoch": 4.05, + "learning_rate": 0.00039634505713498277, + "loss": 0.5227, + "step": 95700 + }, + { + "epoch": 4.05, + "learning_rate": 0.0003963026564056902, + "loss": 0.4904, + "step": 95710 + }, + { + "epoch": 4.05, + "learning_rate": 0.00039626025567639765, + "loss": 0.47, + "step": 95720 + }, + { + "epoch": 4.05, + "learning_rate": 0.0003962178549471051, + "loss": 0.5209, + "step": 95730 + }, + { + "epoch": 4.05, + "learning_rate": 0.0003961754542178126, + "loss": 0.4696, + "step": 95740 + }, + { + "epoch": 4.05, + "learning_rate": 0.00039613305348852, + "loss": 0.5272, + "step": 95750 + }, + { + "epoch": 4.05, + "learning_rate": 0.0003960906527592275, + "loss": 0.4536, + "step": 95760 + }, + { + "epoch": 4.05, + "learning_rate": 0.0003960482520299349, + "loss": 0.415, + "step": 95770 + }, + { + "epoch": 4.05, + "learning_rate": 0.0003960058513006424, + "loss": 0.424, + "step": 95780 + }, + { + "epoch": 4.05, + "learning_rate": 0.0003959634505713498, + "loss": 0.464, + "step": 95790 + }, + { + "epoch": 4.05, + "learning_rate": 0.0003959210498420573, + "loss": 0.4969, + "step": 95800 + }, + { + "epoch": 4.05, + "learning_rate": 0.00039587864911276474, + "loss": 0.5042, + "step": 95810 + }, + { + "epoch": 4.05, + "learning_rate": 0.0003958362483834722, + "loss": 0.4704, + "step": 95820 + }, + { + "epoch": 4.05, + "learning_rate": 0.0003957938476541797, + "loss": 0.4999, + "step": 95830 + }, + { + "epoch": 4.05, + "learning_rate": 0.00039575144692488714, + "loss": 0.4309, + "step": 95840 + }, + { + "epoch": 4.05, + "learning_rate": 0.00039570904619559455, + "loss": 0.4319, + "step": 95850 + }, + { + "epoch": 4.05, + "learning_rate": 0.0003956666454663021, + "loss": 0.482, + "step": 95860 + }, + { + "epoch": 4.05, + "learning_rate": 0.0003956242447370095, + "loss": 0.4202, + "step": 95870 + }, + { + "epoch": 4.05, + "learning_rate": 0.00039558184400771695, + "loss": 0.4843, + "step": 95880 + }, + { + "epoch": 4.06, + "learning_rate": 0.0003955394432784244, + "loss": 0.5194, + "step": 95890 + }, + { + "epoch": 4.06, + "learning_rate": 0.0003954970425491319, + "loss": 0.5083, + "step": 95900 + }, + { + "epoch": 4.06, + "learning_rate": 0.0003954546418198393, + "loss": 0.3892, + "step": 95910 + }, + { + "epoch": 4.06, + "learning_rate": 0.0003954122410905468, + "loss": 0.449, + "step": 95920 + }, + { + "epoch": 4.06, + "learning_rate": 0.00039536984036125423, + "loss": 0.4672, + "step": 95930 + }, + { + "epoch": 4.06, + "learning_rate": 0.0003953274396319617, + "loss": 0.5553, + "step": 95940 + }, + { + "epoch": 4.06, + "learning_rate": 0.0003952850389026691, + "loss": 0.4667, + "step": 95950 + }, + { + "epoch": 4.06, + "learning_rate": 0.00039524263817337663, + "loss": 0.4841, + "step": 95960 + }, + { + "epoch": 4.06, + "learning_rate": 0.00039520023744408405, + "loss": 0.5358, + "step": 95970 + }, + { + "epoch": 4.06, + "learning_rate": 0.0003951578367147915, + "loss": 0.4394, + "step": 95980 + }, + { + "epoch": 4.06, + "learning_rate": 0.000395115435985499, + "loss": 0.5158, + "step": 95990 + }, + { + "epoch": 4.06, + "learning_rate": 0.00039507303525620645, + "loss": 0.4772, + "step": 96000 + }, + { + "epoch": 4.06, + "learning_rate": 0.00039503063452691386, + "loss": 0.4481, + "step": 96010 + }, + { + "epoch": 4.06, + "learning_rate": 0.0003949882337976214, + "loss": 0.479, + "step": 96020 + }, + { + "epoch": 4.06, + "learning_rate": 0.0003949458330683288, + "loss": 0.3874, + "step": 96030 + }, + { + "epoch": 4.06, + "learning_rate": 0.00039490343233903626, + "loss": 0.4316, + "step": 96040 + }, + { + "epoch": 4.06, + "learning_rate": 0.0003948610316097437, + "loss": 0.4906, + "step": 96050 + }, + { + "epoch": 4.06, + "learning_rate": 0.0003948186308804512, + "loss": 0.597, + "step": 96060 + }, + { + "epoch": 4.06, + "learning_rate": 0.0003947762301511586, + "loss": 0.5307, + "step": 96070 + }, + { + "epoch": 4.06, + "learning_rate": 0.00039473382942186607, + "loss": 0.3736, + "step": 96080 + }, + { + "epoch": 4.06, + "learning_rate": 0.00039469142869257354, + "loss": 0.4193, + "step": 96090 + }, + { + "epoch": 4.06, + "learning_rate": 0.000394649027963281, + "loss": 0.3996, + "step": 96100 + }, + { + "epoch": 4.06, + "learning_rate": 0.0003946066272339884, + "loss": 0.5047, + "step": 96110 + }, + { + "epoch": 4.06, + "learning_rate": 0.0003945642265046959, + "loss": 0.5089, + "step": 96120 + }, + { + "epoch": 4.07, + "learning_rate": 0.00039452182577540335, + "loss": 0.4591, + "step": 96130 + }, + { + "epoch": 4.07, + "learning_rate": 0.0003944794250461108, + "loss": 0.4939, + "step": 96140 + }, + { + "epoch": 4.07, + "learning_rate": 0.0003944370243168183, + "loss": 0.4817, + "step": 96150 + }, + { + "epoch": 4.07, + "learning_rate": 0.0003943946235875257, + "loss": 0.5551, + "step": 96160 + }, + { + "epoch": 4.07, + "learning_rate": 0.00039435222285823316, + "loss": 0.502, + "step": 96170 + }, + { + "epoch": 4.07, + "learning_rate": 0.00039430982212894063, + "loss": 0.5783, + "step": 96180 + }, + { + "epoch": 4.07, + "learning_rate": 0.0003942674213996481, + "loss": 0.5376, + "step": 96190 + }, + { + "epoch": 4.07, + "learning_rate": 0.00039422502067035556, + "loss": 0.5237, + "step": 96200 + }, + { + "epoch": 4.07, + "learning_rate": 0.00039418261994106303, + "loss": 0.4875, + "step": 96210 + }, + { + "epoch": 4.07, + "learning_rate": 0.00039414021921177044, + "loss": 0.4719, + "step": 96220 + }, + { + "epoch": 4.07, + "learning_rate": 0.0003940978184824779, + "loss": 0.4378, + "step": 96230 + }, + { + "epoch": 4.07, + "learning_rate": 0.0003940554177531854, + "loss": 0.4231, + "step": 96240 + }, + { + "epoch": 4.07, + "learning_rate": 0.00039401301702389284, + "loss": 0.4636, + "step": 96250 + }, + { + "epoch": 4.07, + "learning_rate": 0.00039397061629460026, + "loss": 0.474, + "step": 96260 + }, + { + "epoch": 4.07, + "learning_rate": 0.0003939282155653077, + "loss": 0.467, + "step": 96270 + }, + { + "epoch": 4.07, + "learning_rate": 0.0003938858148360152, + "loss": 0.4729, + "step": 96280 + }, + { + "epoch": 4.07, + "learning_rate": 0.00039384341410672266, + "loss": 0.4401, + "step": 96290 + }, + { + "epoch": 4.07, + "learning_rate": 0.00039380101337743007, + "loss": 0.5589, + "step": 96300 + }, + { + "epoch": 4.07, + "learning_rate": 0.0003937586126481376, + "loss": 0.4501, + "step": 96310 + }, + { + "epoch": 4.07, + "learning_rate": 0.000393716211918845, + "loss": 0.4337, + "step": 96320 + }, + { + "epoch": 4.07, + "learning_rate": 0.00039367381118955247, + "loss": 0.4523, + "step": 96330 + }, + { + "epoch": 4.07, + "learning_rate": 0.00039363141046025994, + "loss": 0.5298, + "step": 96340 + }, + { + "epoch": 4.07, + "learning_rate": 0.0003935890097309674, + "loss": 0.5092, + "step": 96350 + }, + { + "epoch": 4.07, + "learning_rate": 0.0003935466090016748, + "loss": 0.4508, + "step": 96360 + }, + { + "epoch": 4.08, + "learning_rate": 0.00039350420827238234, + "loss": 0.4336, + "step": 96370 + }, + { + "epoch": 4.08, + "learning_rate": 0.00039346180754308975, + "loss": 0.4628, + "step": 96380 + }, + { + "epoch": 4.08, + "learning_rate": 0.0003934194068137972, + "loss": 0.4938, + "step": 96390 + }, + { + "epoch": 4.08, + "learning_rate": 0.00039337700608450463, + "loss": 0.547, + "step": 96400 + }, + { + "epoch": 4.08, + "learning_rate": 0.00039333460535521215, + "loss": 0.4567, + "step": 96410 + }, + { + "epoch": 4.08, + "learning_rate": 0.00039329220462591956, + "loss": 0.4711, + "step": 96420 + }, + { + "epoch": 4.08, + "learning_rate": 0.00039324980389662703, + "loss": 0.4571, + "step": 96430 + }, + { + "epoch": 4.08, + "learning_rate": 0.0003932074031673345, + "loss": 0.5278, + "step": 96440 + }, + { + "epoch": 4.08, + "learning_rate": 0.00039316500243804196, + "loss": 0.4761, + "step": 96450 + }, + { + "epoch": 4.08, + "learning_rate": 0.0003931226017087494, + "loss": 0.4586, + "step": 96460 + }, + { + "epoch": 4.08, + "learning_rate": 0.0003930802009794569, + "loss": 0.5102, + "step": 96470 + }, + { + "epoch": 4.08, + "learning_rate": 0.0003930378002501643, + "loss": 0.4727, + "step": 96480 + }, + { + "epoch": 4.08, + "learning_rate": 0.0003929953995208718, + "loss": 0.5176, + "step": 96490 + }, + { + "epoch": 4.08, + "learning_rate": 0.00039295299879157924, + "loss": 0.4586, + "step": 96500 + }, + { + "epoch": 4.08, + "learning_rate": 0.0003929105980622867, + "loss": 0.5411, + "step": 96510 + }, + { + "epoch": 4.08, + "learning_rate": 0.0003928681973329941, + "loss": 0.5188, + "step": 96520 + }, + { + "epoch": 4.08, + "learning_rate": 0.00039282579660370164, + "loss": 0.4543, + "step": 96530 + }, + { + "epoch": 4.08, + "learning_rate": 0.00039278339587440906, + "loss": 0.4383, + "step": 96540 + }, + { + "epoch": 4.08, + "learning_rate": 0.0003927409951451165, + "loss": 0.5262, + "step": 96550 + }, + { + "epoch": 4.08, + "learning_rate": 0.00039269859441582394, + "loss": 0.4256, + "step": 96560 + }, + { + "epoch": 4.08, + "learning_rate": 0.00039265619368653146, + "loss": 0.4418, + "step": 96570 + }, + { + "epoch": 4.08, + "learning_rate": 0.00039261379295723887, + "loss": 0.4116, + "step": 96580 + }, + { + "epoch": 4.08, + "learning_rate": 0.00039257139222794634, + "loss": 0.489, + "step": 96590 + }, + { + "epoch": 4.09, + "learning_rate": 0.0003925289914986538, + "loss": 0.4934, + "step": 96600 + }, + { + "epoch": 4.09, + "learning_rate": 0.00039248659076936127, + "loss": 0.4959, + "step": 96610 + }, + { + "epoch": 4.09, + "learning_rate": 0.0003924441900400687, + "loss": 0.518, + "step": 96620 + }, + { + "epoch": 4.09, + "learning_rate": 0.0003924017893107762, + "loss": 0.417, + "step": 96630 + }, + { + "epoch": 4.09, + "learning_rate": 0.0003923593885814836, + "loss": 0.4185, + "step": 96640 + }, + { + "epoch": 4.09, + "learning_rate": 0.0003923169878521911, + "loss": 0.4338, + "step": 96650 + }, + { + "epoch": 4.09, + "learning_rate": 0.00039227458712289855, + "loss": 0.528, + "step": 96660 + }, + { + "epoch": 4.09, + "learning_rate": 0.000392232186393606, + "loss": 0.5685, + "step": 96670 + }, + { + "epoch": 4.09, + "learning_rate": 0.00039218978566431343, + "loss": 0.4494, + "step": 96680 + }, + { + "epoch": 4.09, + "learning_rate": 0.00039214738493502095, + "loss": 0.4542, + "step": 96690 + }, + { + "epoch": 4.09, + "learning_rate": 0.00039210498420572836, + "loss": 0.4849, + "step": 96700 + }, + { + "epoch": 4.09, + "learning_rate": 0.00039206258347643583, + "loss": 0.5136, + "step": 96710 + }, + { + "epoch": 4.09, + "learning_rate": 0.00039202018274714324, + "loss": 0.5586, + "step": 96720 + }, + { + "epoch": 4.09, + "learning_rate": 0.00039197778201785076, + "loss": 0.4868, + "step": 96730 + }, + { + "epoch": 4.09, + "learning_rate": 0.0003919353812885582, + "loss": 0.5016, + "step": 96740 + }, + { + "epoch": 4.09, + "learning_rate": 0.00039189298055926564, + "loss": 0.4546, + "step": 96750 + }, + { + "epoch": 4.09, + "learning_rate": 0.0003918505798299731, + "loss": 0.4951, + "step": 96760 + }, + { + "epoch": 4.09, + "learning_rate": 0.0003918081791006806, + "loss": 0.4818, + "step": 96770 + }, + { + "epoch": 4.09, + "learning_rate": 0.000391765778371388, + "loss": 0.4039, + "step": 96780 + }, + { + "epoch": 4.09, + "learning_rate": 0.00039172337764209545, + "loss": 0.3697, + "step": 96790 + }, + { + "epoch": 4.09, + "learning_rate": 0.0003916809769128029, + "loss": 0.4654, + "step": 96800 + }, + { + "epoch": 4.09, + "learning_rate": 0.0003916385761835104, + "loss": 0.5017, + "step": 96810 + }, + { + "epoch": 4.09, + "learning_rate": 0.00039159617545421786, + "loss": 0.4317, + "step": 96820 + }, + { + "epoch": 4.09, + "learning_rate": 0.00039155377472492527, + "loss": 0.4901, + "step": 96830 + }, + { + "epoch": 4.1, + "learning_rate": 0.00039151137399563273, + "loss": 0.4857, + "step": 96840 + }, + { + "epoch": 4.1, + "learning_rate": 0.0003914689732663402, + "loss": 0.5235, + "step": 96850 + }, + { + "epoch": 4.1, + "learning_rate": 0.00039142657253704767, + "loss": 0.4437, + "step": 96860 + }, + { + "epoch": 4.1, + "learning_rate": 0.0003913841718077551, + "loss": 0.4663, + "step": 96870 + }, + { + "epoch": 4.1, + "learning_rate": 0.00039134177107846255, + "loss": 0.4361, + "step": 96880 + }, + { + "epoch": 4.1, + "learning_rate": 0.00039129937034917, + "loss": 0.4142, + "step": 96890 + }, + { + "epoch": 4.1, + "learning_rate": 0.0003912569696198775, + "loss": 0.4221, + "step": 96900 + }, + { + "epoch": 4.1, + "learning_rate": 0.0003912145688905849, + "loss": 0.5427, + "step": 96910 + }, + { + "epoch": 4.1, + "learning_rate": 0.0003911721681612924, + "loss": 0.5554, + "step": 96920 + }, + { + "epoch": 4.1, + "learning_rate": 0.00039112976743199983, + "loss": 0.5151, + "step": 96930 + }, + { + "epoch": 4.1, + "learning_rate": 0.0003910873667027073, + "loss": 0.4496, + "step": 96940 + }, + { + "epoch": 4.1, + "learning_rate": 0.00039104496597341476, + "loss": 0.4385, + "step": 96950 + }, + { + "epoch": 4.1, + "learning_rate": 0.00039100256524412223, + "loss": 0.4683, + "step": 96960 + }, + { + "epoch": 4.1, + "learning_rate": 0.00039096016451482964, + "loss": 0.4876, + "step": 96970 + }, + { + "epoch": 4.1, + "learning_rate": 0.00039091776378553716, + "loss": 0.4694, + "step": 96980 + }, + { + "epoch": 4.1, + "learning_rate": 0.0003908753630562446, + "loss": 0.4382, + "step": 96990 + }, + { + "epoch": 4.1, + "learning_rate": 0.00039083296232695204, + "loss": 0.5246, + "step": 97000 + }, + { + "epoch": 4.1, + "learning_rate": 0.00039079056159765945, + "loss": 0.4855, + "step": 97010 + }, + { + "epoch": 4.1, + "learning_rate": 0.000390748160868367, + "loss": 0.5445, + "step": 97020 + }, + { + "epoch": 4.1, + "learning_rate": 0.0003907057601390744, + "loss": 0.4906, + "step": 97030 + }, + { + "epoch": 4.1, + "learning_rate": 0.00039066335940978185, + "loss": 0.5156, + "step": 97040 + }, + { + "epoch": 4.1, + "learning_rate": 0.0003906209586804893, + "loss": 0.5039, + "step": 97050 + }, + { + "epoch": 4.1, + "learning_rate": 0.0003905785579511968, + "loss": 0.3811, + "step": 97060 + }, + { + "epoch": 4.1, + "learning_rate": 0.0003905361572219042, + "loss": 0.4462, + "step": 97070 + }, + { + "epoch": 4.11, + "learning_rate": 0.0003904937564926117, + "loss": 0.5163, + "step": 97080 + }, + { + "epoch": 4.11, + "learning_rate": 0.00039045135576331913, + "loss": 0.4384, + "step": 97090 + }, + { + "epoch": 4.11, + "learning_rate": 0.0003904089550340266, + "loss": 0.4726, + "step": 97100 + }, + { + "epoch": 4.11, + "learning_rate": 0.00039036655430473407, + "loss": 0.4689, + "step": 97110 + }, + { + "epoch": 4.11, + "learning_rate": 0.00039032415357544153, + "loss": 0.4517, + "step": 97120 + }, + { + "epoch": 4.11, + "learning_rate": 0.00039028175284614895, + "loss": 0.4629, + "step": 97130 + }, + { + "epoch": 4.11, + "learning_rate": 0.00039023935211685647, + "loss": 0.5007, + "step": 97140 + }, + { + "epoch": 4.11, + "learning_rate": 0.0003901969513875639, + "loss": 0.4392, + "step": 97150 + }, + { + "epoch": 4.11, + "learning_rate": 0.00039015455065827135, + "loss": 0.543, + "step": 97160 + }, + { + "epoch": 4.11, + "learning_rate": 0.00039011214992897876, + "loss": 0.5684, + "step": 97170 + }, + { + "epoch": 4.11, + "learning_rate": 0.0003900697491996863, + "loss": 0.5694, + "step": 97180 + }, + { + "epoch": 4.11, + "learning_rate": 0.0003900273484703937, + "loss": 0.521, + "step": 97190 + }, + { + "epoch": 4.11, + "learning_rate": 0.00038998494774110116, + "loss": 0.4695, + "step": 97200 + }, + { + "epoch": 4.11, + "learning_rate": 0.0003899425470118086, + "loss": 0.403, + "step": 97210 + }, + { + "epoch": 4.11, + "learning_rate": 0.0003899001462825161, + "loss": 0.4519, + "step": 97220 + }, + { + "epoch": 4.11, + "learning_rate": 0.0003898577455532235, + "loss": 0.4644, + "step": 97230 + }, + { + "epoch": 4.11, + "learning_rate": 0.000389815344823931, + "loss": 0.4333, + "step": 97240 + }, + { + "epoch": 4.11, + "learning_rate": 0.00038977294409463844, + "loss": 0.5098, + "step": 97250 + }, + { + "epoch": 4.11, + "learning_rate": 0.0003897305433653459, + "loss": 0.4364, + "step": 97260 + }, + { + "epoch": 4.11, + "learning_rate": 0.00038968814263605337, + "loss": 0.5358, + "step": 97270 + }, + { + "epoch": 4.11, + "learning_rate": 0.00038964574190676084, + "loss": 0.4475, + "step": 97280 + }, + { + "epoch": 4.11, + "learning_rate": 0.00038960334117746825, + "loss": 0.4379, + "step": 97290 + }, + { + "epoch": 4.11, + "learning_rate": 0.0003895609404481758, + "loss": 0.5201, + "step": 97300 + }, + { + "epoch": 4.12, + "learning_rate": 0.0003895185397188832, + "loss": 0.3804, + "step": 97310 + }, + { + "epoch": 4.12, + "learning_rate": 0.00038947613898959065, + "loss": 0.4451, + "step": 97320 + }, + { + "epoch": 4.12, + "learning_rate": 0.00038943373826029806, + "loss": 0.5364, + "step": 97330 + }, + { + "epoch": 4.12, + "learning_rate": 0.0003893913375310056, + "loss": 0.4478, + "step": 97340 + }, + { + "epoch": 4.12, + "learning_rate": 0.000389348936801713, + "loss": 0.4783, + "step": 97350 + }, + { + "epoch": 4.12, + "learning_rate": 0.00038930653607242047, + "loss": 0.4111, + "step": 97360 + }, + { + "epoch": 4.12, + "learning_rate": 0.00038926413534312793, + "loss": 0.485, + "step": 97370 + }, + { + "epoch": 4.12, + "learning_rate": 0.0003892217346138354, + "loss": 0.5128, + "step": 97380 + }, + { + "epoch": 4.12, + "learning_rate": 0.0003891793338845428, + "loss": 0.4971, + "step": 97390 + }, + { + "epoch": 4.12, + "learning_rate": 0.0003891369331552503, + "loss": 0.4828, + "step": 97400 + }, + { + "epoch": 4.12, + "learning_rate": 0.00038909453242595775, + "loss": 0.5476, + "step": 97410 + }, + { + "epoch": 4.12, + "learning_rate": 0.0003890521316966652, + "loss": 0.5443, + "step": 97420 + }, + { + "epoch": 4.12, + "learning_rate": 0.0003890097309673727, + "loss": 0.4828, + "step": 97430 + }, + { + "epoch": 4.12, + "learning_rate": 0.00038896733023808015, + "loss": 0.4526, + "step": 97440 + }, + { + "epoch": 4.12, + "learning_rate": 0.00038892492950878756, + "loss": 0.5007, + "step": 97450 + }, + { + "epoch": 4.12, + "learning_rate": 0.000388882528779495, + "loss": 0.4236, + "step": 97460 + }, + { + "epoch": 4.12, + "learning_rate": 0.0003888401280502025, + "loss": 0.4574, + "step": 97470 + }, + { + "epoch": 4.12, + "learning_rate": 0.00038879772732090996, + "loss": 0.5109, + "step": 97480 + }, + { + "epoch": 4.12, + "learning_rate": 0.00038875532659161737, + "loss": 0.4673, + "step": 97490 + }, + { + "epoch": 4.12, + "learning_rate": 0.00038871292586232484, + "loss": 0.4868, + "step": 97500 + }, + { + "epoch": 4.12, + "learning_rate": 0.0003886705251330323, + "loss": 0.3506, + "step": 97510 + }, + { + "epoch": 4.12, + "learning_rate": 0.00038862812440373977, + "loss": 0.4401, + "step": 97520 + }, + { + "epoch": 4.12, + "learning_rate": 0.00038858572367444724, + "loss": 0.4874, + "step": 97530 + }, + { + "epoch": 4.12, + "learning_rate": 0.00038854332294515465, + "loss": 0.4959, + "step": 97540 + }, + { + "epoch": 4.13, + "learning_rate": 0.0003885009222158621, + "loss": 0.4939, + "step": 97550 + }, + { + "epoch": 4.13, + "learning_rate": 0.0003884585214865696, + "loss": 0.4449, + "step": 97560 + }, + { + "epoch": 4.13, + "learning_rate": 0.00038841612075727705, + "loss": 0.4768, + "step": 97570 + }, + { + "epoch": 4.13, + "learning_rate": 0.00038837372002798446, + "loss": 0.5877, + "step": 97580 + }, + { + "epoch": 4.13, + "learning_rate": 0.000388331319298692, + "loss": 0.4701, + "step": 97590 + }, + { + "epoch": 4.13, + "learning_rate": 0.0003882889185693994, + "loss": 0.4629, + "step": 97600 + }, + { + "epoch": 4.13, + "learning_rate": 0.00038824651784010686, + "loss": 0.4641, + "step": 97610 + }, + { + "epoch": 4.13, + "learning_rate": 0.0003882041171108143, + "loss": 0.4669, + "step": 97620 + }, + { + "epoch": 4.13, + "learning_rate": 0.0003881617163815218, + "loss": 0.4725, + "step": 97630 + }, + { + "epoch": 4.13, + "learning_rate": 0.0003881193156522292, + "loss": 0.5087, + "step": 97640 + }, + { + "epoch": 4.13, + "learning_rate": 0.0003880769149229367, + "loss": 0.418, + "step": 97650 + }, + { + "epoch": 4.13, + "learning_rate": 0.00038803451419364414, + "loss": 0.5191, + "step": 97660 + }, + { + "epoch": 4.13, + "learning_rate": 0.0003879921134643516, + "loss": 0.4028, + "step": 97670 + }, + { + "epoch": 4.13, + "learning_rate": 0.000387949712735059, + "loss": 0.4205, + "step": 97680 + }, + { + "epoch": 4.13, + "learning_rate": 0.00038790731200576654, + "loss": 0.4399, + "step": 97690 + }, + { + "epoch": 4.13, + "learning_rate": 0.00038786491127647396, + "loss": 0.4639, + "step": 97700 + }, + { + "epoch": 4.13, + "learning_rate": 0.0003878225105471814, + "loss": 0.5034, + "step": 97710 + }, + { + "epoch": 4.13, + "learning_rate": 0.0003877801098178889, + "loss": 0.4934, + "step": 97720 + }, + { + "epoch": 4.13, + "learning_rate": 0.00038773770908859636, + "loss": 0.492, + "step": 97730 + }, + { + "epoch": 4.13, + "learning_rate": 0.00038769530835930377, + "loss": 0.4656, + "step": 97740 + }, + { + "epoch": 4.13, + "learning_rate": 0.0003876529076300113, + "loss": 0.4129, + "step": 97750 + }, + { + "epoch": 4.13, + "learning_rate": 0.0003876105069007187, + "loss": 0.4729, + "step": 97760 + }, + { + "epoch": 4.13, + "learning_rate": 0.00038756810617142617, + "loss": 0.5426, + "step": 97770 + }, + { + "epoch": 4.13, + "learning_rate": 0.0003875257054421336, + "loss": 0.4186, + "step": 97780 + }, + { + "epoch": 4.14, + "learning_rate": 0.0003874833047128411, + "loss": 0.4532, + "step": 97790 + }, + { + "epoch": 4.14, + "learning_rate": 0.0003874409039835485, + "loss": 0.4787, + "step": 97800 + }, + { + "epoch": 4.14, + "learning_rate": 0.000387398503254256, + "loss": 0.4714, + "step": 97810 + }, + { + "epoch": 4.14, + "learning_rate": 0.00038735610252496345, + "loss": 0.4903, + "step": 97820 + }, + { + "epoch": 4.14, + "learning_rate": 0.0003873137017956709, + "loss": 0.4519, + "step": 97830 + }, + { + "epoch": 4.14, + "learning_rate": 0.00038727130106637833, + "loss": 0.5078, + "step": 97840 + }, + { + "epoch": 4.14, + "learning_rate": 0.00038722890033708585, + "loss": 0.4832, + "step": 97850 + }, + { + "epoch": 4.14, + "learning_rate": 0.00038718649960779326, + "loss": 0.5123, + "step": 97860 + }, + { + "epoch": 4.14, + "learning_rate": 0.00038714409887850073, + "loss": 0.4526, + "step": 97870 + }, + { + "epoch": 4.14, + "learning_rate": 0.0003871016981492082, + "loss": 0.5277, + "step": 97880 + }, + { + "epoch": 4.14, + "learning_rate": 0.00038705929741991566, + "loss": 0.4701, + "step": 97890 + }, + { + "epoch": 4.14, + "learning_rate": 0.0003870168966906231, + "loss": 0.4577, + "step": 97900 + }, + { + "epoch": 4.14, + "learning_rate": 0.0003869744959613306, + "loss": 0.4158, + "step": 97910 + }, + { + "epoch": 4.14, + "learning_rate": 0.000386932095232038, + "loss": 0.4899, + "step": 97920 + }, + { + "epoch": 4.14, + "learning_rate": 0.0003868896945027455, + "loss": 0.601, + "step": 97930 + }, + { + "epoch": 4.14, + "learning_rate": 0.0003868472937734529, + "loss": 0.4681, + "step": 97940 + }, + { + "epoch": 4.14, + "learning_rate": 0.0003868048930441604, + "loss": 0.4641, + "step": 97950 + }, + { + "epoch": 4.14, + "learning_rate": 0.0003867624923148678, + "loss": 0.5471, + "step": 97960 + }, + { + "epoch": 4.14, + "learning_rate": 0.0003867200915855753, + "loss": 0.5078, + "step": 97970 + }, + { + "epoch": 4.14, + "learning_rate": 0.00038667769085628276, + "loss": 0.4574, + "step": 97980 + }, + { + "epoch": 4.14, + "learning_rate": 0.0003866352901269902, + "loss": 0.4623, + "step": 97990 + }, + { + "epoch": 4.14, + "learning_rate": 0.00038659288939769764, + "loss": 0.518, + "step": 98000 + }, + { + "epoch": 4.14, + "learning_rate": 0.00038655048866840516, + "loss": 0.5162, + "step": 98010 + }, + { + "epoch": 4.15, + "learning_rate": 0.00038650808793911257, + "loss": 0.4825, + "step": 98020 + }, + { + "epoch": 4.15, + "learning_rate": 0.00038646568720982004, + "loss": 0.5356, + "step": 98030 + }, + { + "epoch": 4.15, + "learning_rate": 0.0003864232864805275, + "loss": 0.386, + "step": 98040 + }, + { + "epoch": 4.15, + "learning_rate": 0.00038638088575123497, + "loss": 0.4012, + "step": 98050 + }, + { + "epoch": 4.15, + "learning_rate": 0.0003863384850219424, + "loss": 0.4765, + "step": 98060 + }, + { + "epoch": 4.15, + "learning_rate": 0.00038629608429264985, + "loss": 0.4969, + "step": 98070 + }, + { + "epoch": 4.15, + "learning_rate": 0.0003862536835633573, + "loss": 0.5055, + "step": 98080 + }, + { + "epoch": 4.15, + "learning_rate": 0.0003862112828340648, + "loss": 0.4125, + "step": 98090 + }, + { + "epoch": 4.15, + "learning_rate": 0.0003861688821047722, + "loss": 0.5299, + "step": 98100 + }, + { + "epoch": 4.15, + "learning_rate": 0.00038612648137547966, + "loss": 0.4013, + "step": 98110 + }, + { + "epoch": 4.15, + "learning_rate": 0.00038608408064618713, + "loss": 0.5099, + "step": 98120 + }, + { + "epoch": 4.15, + "learning_rate": 0.0003860416799168946, + "loss": 0.3924, + "step": 98130 + }, + { + "epoch": 4.15, + "learning_rate": 0.00038599927918760206, + "loss": 0.4424, + "step": 98140 + }, + { + "epoch": 4.15, + "learning_rate": 0.0003859568784583095, + "loss": 0.495, + "step": 98150 + }, + { + "epoch": 4.15, + "learning_rate": 0.00038591447772901694, + "loss": 0.4864, + "step": 98160 + }, + { + "epoch": 4.15, + "learning_rate": 0.0003858720769997244, + "loss": 0.458, + "step": 98170 + }, + { + "epoch": 4.15, + "learning_rate": 0.0003858296762704319, + "loss": 0.4734, + "step": 98180 + }, + { + "epoch": 4.15, + "learning_rate": 0.00038578727554113934, + "loss": 0.5201, + "step": 98190 + }, + { + "epoch": 4.15, + "learning_rate": 0.0003857448748118468, + "loss": 0.5129, + "step": 98200 + }, + { + "epoch": 4.15, + "learning_rate": 0.0003857024740825542, + "loss": 0.4544, + "step": 98210 + }, + { + "epoch": 4.15, + "learning_rate": 0.0003856600733532617, + "loss": 0.4701, + "step": 98220 + }, + { + "epoch": 4.15, + "learning_rate": 0.00038561767262396915, + "loss": 0.4781, + "step": 98230 + }, + { + "epoch": 4.15, + "learning_rate": 0.0003855752718946766, + "loss": 0.4573, + "step": 98240 + }, + { + "epoch": 4.15, + "learning_rate": 0.00038553287116538403, + "loss": 0.462, + "step": 98250 + }, + { + "epoch": 4.16, + "learning_rate": 0.0003854904704360915, + "loss": 0.5033, + "step": 98260 + }, + { + "epoch": 4.16, + "learning_rate": 0.00038544806970679897, + "loss": 0.4977, + "step": 98270 + }, + { + "epoch": 4.16, + "learning_rate": 0.00038540566897750643, + "loss": 0.5035, + "step": 98280 + }, + { + "epoch": 4.16, + "learning_rate": 0.00038536326824821385, + "loss": 0.4727, + "step": 98290 + }, + { + "epoch": 4.16, + "learning_rate": 0.00038532086751892137, + "loss": 0.4455, + "step": 98300 + }, + { + "epoch": 4.16, + "learning_rate": 0.0003852784667896288, + "loss": 0.5257, + "step": 98310 + }, + { + "epoch": 4.16, + "learning_rate": 0.00038523606606033625, + "loss": 0.5465, + "step": 98320 + }, + { + "epoch": 4.16, + "learning_rate": 0.0003851936653310437, + "loss": 0.4572, + "step": 98330 + }, + { + "epoch": 4.16, + "learning_rate": 0.0003851512646017512, + "loss": 0.4427, + "step": 98340 + }, + { + "epoch": 4.16, + "learning_rate": 0.0003851088638724586, + "loss": 0.5449, + "step": 98350 + }, + { + "epoch": 4.16, + "learning_rate": 0.0003850664631431661, + "loss": 0.4233, + "step": 98360 + }, + { + "epoch": 4.16, + "learning_rate": 0.0003850240624138735, + "loss": 0.4447, + "step": 98370 + }, + { + "epoch": 4.16, + "learning_rate": 0.000384981661684581, + "loss": 0.4358, + "step": 98380 + }, + { + "epoch": 4.16, + "learning_rate": 0.0003849392609552884, + "loss": 0.4116, + "step": 98390 + }, + { + "epoch": 4.16, + "learning_rate": 0.00038489686022599593, + "loss": 0.4757, + "step": 98400 + }, + { + "epoch": 4.16, + "learning_rate": 0.00038485445949670334, + "loss": 0.4725, + "step": 98410 + }, + { + "epoch": 4.16, + "learning_rate": 0.0003848120587674108, + "loss": 0.4837, + "step": 98420 + }, + { + "epoch": 4.16, + "learning_rate": 0.0003847696580381183, + "loss": 0.4455, + "step": 98430 + }, + { + "epoch": 4.16, + "learning_rate": 0.00038472725730882574, + "loss": 0.4123, + "step": 98440 + }, + { + "epoch": 4.16, + "learning_rate": 0.00038468485657953315, + "loss": 0.4927, + "step": 98450 + }, + { + "epoch": 4.16, + "learning_rate": 0.0003846424558502407, + "loss": 0.4349, + "step": 98460 + }, + { + "epoch": 4.16, + "learning_rate": 0.0003846000551209481, + "loss": 0.5418, + "step": 98470 + }, + { + "epoch": 4.16, + "learning_rate": 0.00038455765439165555, + "loss": 0.5195, + "step": 98480 + }, + { + "epoch": 4.17, + "learning_rate": 0.000384515253662363, + "loss": 0.4384, + "step": 98490 + }, + { + "epoch": 4.17, + "learning_rate": 0.0003844728529330705, + "loss": 0.3954, + "step": 98500 + }, + { + "epoch": 4.17, + "learning_rate": 0.0003844304522037779, + "loss": 0.4554, + "step": 98510 + }, + { + "epoch": 4.17, + "learning_rate": 0.0003843880514744854, + "loss": 0.4049, + "step": 98520 + }, + { + "epoch": 4.17, + "learning_rate": 0.00038434565074519283, + "loss": 0.4664, + "step": 98530 + }, + { + "epoch": 4.17, + "learning_rate": 0.0003843032500159003, + "loss": 0.4463, + "step": 98540 + }, + { + "epoch": 4.17, + "learning_rate": 0.0003842608492866077, + "loss": 0.4708, + "step": 98550 + }, + { + "epoch": 4.17, + "learning_rate": 0.00038421844855731523, + "loss": 0.5715, + "step": 98560 + }, + { + "epoch": 4.17, + "learning_rate": 0.00038417604782802265, + "loss": 0.5209, + "step": 98570 + }, + { + "epoch": 4.17, + "learning_rate": 0.0003841336470987301, + "loss": 0.4115, + "step": 98580 + }, + { + "epoch": 4.17, + "learning_rate": 0.0003840912463694376, + "loss": 0.4621, + "step": 98590 + }, + { + "epoch": 4.17, + "learning_rate": 0.00038404884564014505, + "loss": 0.4495, + "step": 98600 + }, + { + "epoch": 4.17, + "learning_rate": 0.00038400644491085246, + "loss": 0.4998, + "step": 98610 + }, + { + "epoch": 4.17, + "learning_rate": 0.00038396404418156, + "loss": 0.4962, + "step": 98620 + }, + { + "epoch": 4.17, + "learning_rate": 0.0003839216434522674, + "loss": 0.4991, + "step": 98630 + }, + { + "epoch": 4.17, + "learning_rate": 0.00038387924272297486, + "loss": 0.4656, + "step": 98640 + }, + { + "epoch": 4.17, + "learning_rate": 0.0003838368419936823, + "loss": 0.4955, + "step": 98650 + }, + { + "epoch": 4.17, + "learning_rate": 0.0003837944412643898, + "loss": 0.5307, + "step": 98660 + }, + { + "epoch": 4.17, + "learning_rate": 0.0003837520405350972, + "loss": 0.5815, + "step": 98670 + }, + { + "epoch": 4.17, + "learning_rate": 0.0003837096398058047, + "loss": 0.4562, + "step": 98680 + }, + { + "epoch": 4.17, + "learning_rate": 0.00038366723907651214, + "loss": 0.5252, + "step": 98690 + }, + { + "epoch": 4.17, + "learning_rate": 0.0003836248383472196, + "loss": 0.5637, + "step": 98700 + }, + { + "epoch": 4.17, + "learning_rate": 0.000383582437617927, + "loss": 0.5278, + "step": 98710 + }, + { + "epoch": 4.17, + "learning_rate": 0.00038354003688863454, + "loss": 0.522, + "step": 98720 + }, + { + "epoch": 4.18, + "learning_rate": 0.00038349763615934195, + "loss": 0.588, + "step": 98730 + }, + { + "epoch": 4.18, + "learning_rate": 0.0003834552354300494, + "loss": 0.4258, + "step": 98740 + }, + { + "epoch": 4.18, + "learning_rate": 0.0003834128347007569, + "loss": 0.4393, + "step": 98750 + }, + { + "epoch": 4.18, + "learning_rate": 0.00038337043397146435, + "loss": 0.4584, + "step": 98760 + }, + { + "epoch": 4.18, + "learning_rate": 0.00038332803324217176, + "loss": 0.4852, + "step": 98770 + }, + { + "epoch": 4.18, + "learning_rate": 0.00038328563251287923, + "loss": 0.5235, + "step": 98780 + }, + { + "epoch": 4.18, + "learning_rate": 0.0003832432317835867, + "loss": 0.4592, + "step": 98790 + }, + { + "epoch": 4.18, + "learning_rate": 0.00038320083105429417, + "loss": 0.5108, + "step": 98800 + }, + { + "epoch": 4.18, + "learning_rate": 0.00038315843032500163, + "loss": 0.4966, + "step": 98810 + }, + { + "epoch": 4.18, + "learning_rate": 0.00038311602959570904, + "loss": 0.5211, + "step": 98820 + }, + { + "epoch": 4.18, + "learning_rate": 0.0003830736288664165, + "loss": 0.5422, + "step": 98830 + }, + { + "epoch": 4.18, + "learning_rate": 0.000383031228137124, + "loss": 0.5236, + "step": 98840 + }, + { + "epoch": 4.18, + "learning_rate": 0.00038298882740783144, + "loss": 0.4361, + "step": 98850 + }, + { + "epoch": 4.18, + "learning_rate": 0.00038294642667853886, + "loss": 0.4658, + "step": 98860 + }, + { + "epoch": 4.18, + "learning_rate": 0.0003829040259492463, + "loss": 0.4, + "step": 98870 + }, + { + "epoch": 4.18, + "learning_rate": 0.0003828616252199538, + "loss": 0.3967, + "step": 98880 + }, + { + "epoch": 4.18, + "learning_rate": 0.00038281922449066126, + "loss": 0.5085, + "step": 98890 + }, + { + "epoch": 4.18, + "learning_rate": 0.00038277682376136867, + "loss": 0.4977, + "step": 98900 + }, + { + "epoch": 4.18, + "learning_rate": 0.0003827344230320762, + "loss": 0.5347, + "step": 98910 + }, + { + "epoch": 4.18, + "learning_rate": 0.0003826920223027836, + "loss": 0.5013, + "step": 98920 + }, + { + "epoch": 4.18, + "learning_rate": 0.00038264962157349107, + "loss": 0.4075, + "step": 98930 + }, + { + "epoch": 4.18, + "learning_rate": 0.00038260722084419854, + "loss": 0.4998, + "step": 98940 + }, + { + "epoch": 4.18, + "learning_rate": 0.000382564820114906, + "loss": 0.4893, + "step": 98950 + }, + { + "epoch": 4.18, + "learning_rate": 0.0003825224193856134, + "loss": 0.4438, + "step": 98960 + }, + { + "epoch": 4.19, + "learning_rate": 0.00038248001865632094, + "loss": 0.4782, + "step": 98970 + }, + { + "epoch": 4.19, + "learning_rate": 0.00038243761792702835, + "loss": 0.4554, + "step": 98980 + }, + { + "epoch": 4.19, + "learning_rate": 0.0003823952171977358, + "loss": 0.4415, + "step": 98990 + }, + { + "epoch": 4.19, + "learning_rate": 0.00038235281646844323, + "loss": 0.5155, + "step": 99000 + }, + { + "epoch": 4.19, + "learning_rate": 0.00038231041573915075, + "loss": 0.4368, + "step": 99010 + }, + { + "epoch": 4.19, + "learning_rate": 0.00038226801500985816, + "loss": 0.5128, + "step": 99020 + }, + { + "epoch": 4.19, + "learning_rate": 0.00038222561428056563, + "loss": 0.5842, + "step": 99030 + }, + { + "epoch": 4.19, + "learning_rate": 0.0003821832135512731, + "loss": 0.5101, + "step": 99040 + }, + { + "epoch": 4.19, + "learning_rate": 0.00038214081282198056, + "loss": 0.4631, + "step": 99050 + }, + { + "epoch": 4.19, + "learning_rate": 0.000382098412092688, + "loss": 0.4853, + "step": 99060 + }, + { + "epoch": 4.19, + "learning_rate": 0.0003820560113633955, + "loss": 0.5459, + "step": 99070 + }, + { + "epoch": 4.19, + "learning_rate": 0.0003820136106341029, + "loss": 0.4844, + "step": 99080 + }, + { + "epoch": 4.19, + "learning_rate": 0.0003819712099048104, + "loss": 0.4817, + "step": 99090 + }, + { + "epoch": 4.19, + "learning_rate": 0.00038192880917551784, + "loss": 0.4181, + "step": 99100 + }, + { + "epoch": 4.19, + "learning_rate": 0.0003818864084462253, + "loss": 0.4381, + "step": 99110 + }, + { + "epoch": 4.19, + "learning_rate": 0.0003818440077169327, + "loss": 0.4025, + "step": 99120 + }, + { + "epoch": 4.19, + "learning_rate": 0.00038180160698764024, + "loss": 0.4582, + "step": 99130 + }, + { + "epoch": 4.19, + "learning_rate": 0.00038175920625834766, + "loss": 0.4998, + "step": 99140 + }, + { + "epoch": 4.19, + "learning_rate": 0.0003817168055290551, + "loss": 0.417, + "step": 99150 + }, + { + "epoch": 4.19, + "learning_rate": 0.00038167440479976254, + "loss": 0.4082, + "step": 99160 + }, + { + "epoch": 4.19, + "learning_rate": 0.00038163200407047006, + "loss": 0.5306, + "step": 99170 + }, + { + "epoch": 4.19, + "learning_rate": 0.00038158960334117747, + "loss": 0.4372, + "step": 99180 + }, + { + "epoch": 4.19, + "learning_rate": 0.00038154720261188494, + "loss": 0.5193, + "step": 99190 + }, + { + "epoch": 4.2, + "learning_rate": 0.0003815048018825924, + "loss": 0.5284, + "step": 99200 + }, + { + "epoch": 4.2, + "learning_rate": 0.00038146240115329987, + "loss": 0.455, + "step": 99210 + }, + { + "epoch": 4.2, + "learning_rate": 0.0003814200004240073, + "loss": 0.4665, + "step": 99220 + }, + { + "epoch": 4.2, + "learning_rate": 0.0003813775996947148, + "loss": 0.5504, + "step": 99230 + }, + { + "epoch": 4.2, + "learning_rate": 0.0003813351989654222, + "loss": 0.5824, + "step": 99240 + }, + { + "epoch": 4.2, + "learning_rate": 0.0003812927982361297, + "loss": 0.4241, + "step": 99250 + }, + { + "epoch": 4.2, + "learning_rate": 0.00038125039750683715, + "loss": 0.4434, + "step": 99260 + }, + { + "epoch": 4.2, + "learning_rate": 0.0003812079967775446, + "loss": 0.4456, + "step": 99270 + }, + { + "epoch": 4.2, + "learning_rate": 0.00038116559604825203, + "loss": 0.4553, + "step": 99280 + }, + { + "epoch": 4.2, + "learning_rate": 0.00038112319531895955, + "loss": 0.494, + "step": 99290 + }, + { + "epoch": 4.2, + "learning_rate": 0.00038108079458966696, + "loss": 0.571, + "step": 99300 + }, + { + "epoch": 4.2, + "learning_rate": 0.00038103839386037443, + "loss": 0.4913, + "step": 99310 + }, + { + "epoch": 4.2, + "learning_rate": 0.00038099599313108184, + "loss": 0.5229, + "step": 99320 + }, + { + "epoch": 4.2, + "learning_rate": 0.00038095359240178936, + "loss": 0.5163, + "step": 99330 + }, + { + "epoch": 4.2, + "learning_rate": 0.0003809111916724968, + "loss": 0.4645, + "step": 99340 + }, + { + "epoch": 4.2, + "learning_rate": 0.00038086879094320424, + "loss": 0.5403, + "step": 99350 + }, + { + "epoch": 4.2, + "learning_rate": 0.0003808263902139117, + "loss": 0.5003, + "step": 99360 + }, + { + "epoch": 4.2, + "learning_rate": 0.0003807839894846192, + "loss": 0.4849, + "step": 99370 + }, + { + "epoch": 4.2, + "learning_rate": 0.0003807415887553266, + "loss": 0.4452, + "step": 99380 + }, + { + "epoch": 4.2, + "learning_rate": 0.00038069918802603406, + "loss": 0.4926, + "step": 99390 + }, + { + "epoch": 4.2, + "learning_rate": 0.0003806567872967415, + "loss": 0.5515, + "step": 99400 + }, + { + "epoch": 4.2, + "learning_rate": 0.000380614386567449, + "loss": 0.4629, + "step": 99410 + }, + { + "epoch": 4.2, + "learning_rate": 0.00038057198583815646, + "loss": 0.5346, + "step": 99420 + }, + { + "epoch": 4.2, + "learning_rate": 0.0003805295851088639, + "loss": 0.4315, + "step": 99430 + }, + { + "epoch": 4.21, + "learning_rate": 0.00038048718437957133, + "loss": 0.4702, + "step": 99440 + }, + { + "epoch": 4.21, + "learning_rate": 0.0003804447836502788, + "loss": 0.5576, + "step": 99450 + }, + { + "epoch": 4.21, + "learning_rate": 0.00038040238292098627, + "loss": 0.4938, + "step": 99460 + }, + { + "epoch": 4.21, + "learning_rate": 0.00038035998219169374, + "loss": 0.4466, + "step": 99470 + }, + { + "epoch": 4.21, + "learning_rate": 0.00038031758146240115, + "loss": 0.4295, + "step": 99480 + }, + { + "epoch": 4.21, + "learning_rate": 0.0003802751807331086, + "loss": 0.5209, + "step": 99490 + }, + { + "epoch": 4.21, + "learning_rate": 0.0003802327800038161, + "loss": 0.4908, + "step": 99500 + }, + { + "epoch": 4.21, + "learning_rate": 0.00038019037927452355, + "loss": 0.4614, + "step": 99510 + }, + { + "epoch": 4.21, + "learning_rate": 0.000380147978545231, + "loss": 0.5161, + "step": 99520 + }, + { + "epoch": 4.21, + "learning_rate": 0.00038010557781593843, + "loss": 0.6379, + "step": 99530 + }, + { + "epoch": 4.21, + "learning_rate": 0.0003800631770866459, + "loss": 0.4807, + "step": 99540 + }, + { + "epoch": 4.21, + "learning_rate": 0.00038002077635735336, + "loss": 0.5525, + "step": 99550 + }, + { + "epoch": 4.21, + "learning_rate": 0.00037997837562806083, + "loss": 0.4906, + "step": 99560 + }, + { + "epoch": 4.21, + "learning_rate": 0.00037993597489876824, + "loss": 0.4352, + "step": 99570 + }, + { + "epoch": 4.21, + "learning_rate": 0.00037989357416947576, + "loss": 0.4641, + "step": 99580 + }, + { + "epoch": 4.21, + "learning_rate": 0.0003798511734401832, + "loss": 0.5135, + "step": 99590 + }, + { + "epoch": 4.21, + "learning_rate": 0.00037980877271089064, + "loss": 0.4699, + "step": 99600 + }, + { + "epoch": 4.21, + "learning_rate": 0.0003797663719815981, + "loss": 0.5312, + "step": 99610 + }, + { + "epoch": 4.21, + "learning_rate": 0.0003797239712523056, + "loss": 0.4993, + "step": 99620 + }, + { + "epoch": 4.21, + "learning_rate": 0.000379681570523013, + "loss": 0.5444, + "step": 99630 + }, + { + "epoch": 4.21, + "learning_rate": 0.00037963916979372045, + "loss": 0.5327, + "step": 99640 + }, + { + "epoch": 4.21, + "learning_rate": 0.0003795967690644279, + "loss": 0.4989, + "step": 99650 + }, + { + "epoch": 4.21, + "learning_rate": 0.0003795543683351354, + "loss": 0.4361, + "step": 99660 + }, + { + "epoch": 4.21, + "learning_rate": 0.0003795119676058428, + "loss": 0.4836, + "step": 99670 + }, + { + "epoch": 4.22, + "learning_rate": 0.0003794695668765503, + "loss": 0.547, + "step": 99680 + }, + { + "epoch": 4.22, + "learning_rate": 0.00037942716614725773, + "loss": 0.4711, + "step": 99690 + }, + { + "epoch": 4.22, + "learning_rate": 0.0003793847654179652, + "loss": 0.5111, + "step": 99700 + }, + { + "epoch": 4.22, + "learning_rate": 0.00037934236468867267, + "loss": 0.5302, + "step": 99710 + }, + { + "epoch": 4.22, + "learning_rate": 0.00037929996395938013, + "loss": 0.4733, + "step": 99720 + }, + { + "epoch": 4.22, + "learning_rate": 0.00037925756323008755, + "loss": 0.5167, + "step": 99730 + }, + { + "epoch": 4.22, + "learning_rate": 0.00037921516250079507, + "loss": 0.4723, + "step": 99740 + }, + { + "epoch": 4.22, + "learning_rate": 0.0003791727617715025, + "loss": 0.4516, + "step": 99750 + }, + { + "epoch": 4.22, + "learning_rate": 0.00037913036104220995, + "loss": 0.4104, + "step": 99760 + }, + { + "epoch": 4.22, + "learning_rate": 0.00037908796031291736, + "loss": 0.4714, + "step": 99770 + }, + { + "epoch": 4.22, + "learning_rate": 0.0003790455595836249, + "loss": 0.4702, + "step": 99780 + }, + { + "epoch": 4.22, + "learning_rate": 0.0003790031588543323, + "loss": 0.5058, + "step": 99790 + }, + { + "epoch": 4.22, + "learning_rate": 0.00037896075812503976, + "loss": 0.4766, + "step": 99800 + }, + { + "epoch": 4.22, + "learning_rate": 0.0003789183573957472, + "loss": 0.476, + "step": 99810 + }, + { + "epoch": 4.22, + "learning_rate": 0.0003788759566664547, + "loss": 0.4197, + "step": 99820 + }, + { + "epoch": 4.22, + "learning_rate": 0.0003788335559371621, + "loss": 0.4561, + "step": 99830 + }, + { + "epoch": 4.22, + "learning_rate": 0.0003787911552078696, + "loss": 0.495, + "step": 99840 + }, + { + "epoch": 4.22, + "learning_rate": 0.00037874875447857704, + "loss": 0.4526, + "step": 99850 + }, + { + "epoch": 4.22, + "learning_rate": 0.0003787063537492845, + "loss": 0.4722, + "step": 99860 + }, + { + "epoch": 4.22, + "learning_rate": 0.000378663953019992, + "loss": 0.5302, + "step": 99870 + }, + { + "epoch": 4.22, + "learning_rate": 0.00037862155229069944, + "loss": 0.4386, + "step": 99880 + }, + { + "epoch": 4.22, + "learning_rate": 0.00037857915156140685, + "loss": 0.4804, + "step": 99890 + }, + { + "epoch": 4.22, + "learning_rate": 0.0003785367508321144, + "loss": 0.4799, + "step": 99900 + }, + { + "epoch": 4.23, + "learning_rate": 0.0003784943501028218, + "loss": 0.4876, + "step": 99910 + }, + { + "epoch": 4.23, + "learning_rate": 0.00037845194937352925, + "loss": 0.5028, + "step": 99920 + }, + { + "epoch": 4.23, + "learning_rate": 0.00037840954864423667, + "loss": 0.4858, + "step": 99930 + }, + { + "epoch": 4.23, + "learning_rate": 0.0003783671479149442, + "loss": 0.5198, + "step": 99940 + }, + { + "epoch": 4.23, + "learning_rate": 0.0003783247471856516, + "loss": 0.5352, + "step": 99950 + }, + { + "epoch": 4.23, + "learning_rate": 0.00037828234645635907, + "loss": 0.4749, + "step": 99960 + }, + { + "epoch": 4.23, + "learning_rate": 0.00037823994572706653, + "loss": 0.5112, + "step": 99970 + }, + { + "epoch": 4.23, + "learning_rate": 0.000378197544997774, + "loss": 0.474, + "step": 99980 + }, + { + "epoch": 4.23, + "learning_rate": 0.0003781551442684814, + "loss": 0.5256, + "step": 99990 + }, + { + "epoch": 4.23, + "learning_rate": 0.00037811274353918893, + "loss": 0.5201, + "step": 100000 + }, + { + "epoch": 4.23, + "eval_loss": 0.6122934222221375, + "eval_runtime": 337.7306, + "eval_samples_per_second": 15.56, + "eval_steps_per_second": 3.891, + "step": 100000 + }, + { + "epoch": 4.23, + "learning_rate": 0.00037807034280989635, + "loss": 0.5115, + "step": 100010 + }, + { + "epoch": 4.23, + "learning_rate": 0.0003780279420806038, + "loss": 0.479, + "step": 100020 + }, + { + "epoch": 4.23, + "learning_rate": 0.0003779855413513113, + "loss": 0.4572, + "step": 100030 + }, + { + "epoch": 4.23, + "learning_rate": 0.00037794314062201875, + "loss": 0.5408, + "step": 100040 + }, + { + "epoch": 4.23, + "learning_rate": 0.00037790073989272616, + "loss": 0.458, + "step": 100050 + }, + { + "epoch": 4.23, + "learning_rate": 0.0003778583391634336, + "loss": 0.5142, + "step": 100060 + }, + { + "epoch": 4.23, + "learning_rate": 0.0003778159384341411, + "loss": 0.4192, + "step": 100070 + }, + { + "epoch": 4.23, + "learning_rate": 0.00037777353770484856, + "loss": 0.4939, + "step": 100080 + }, + { + "epoch": 4.23, + "learning_rate": 0.00037773113697555597, + "loss": 0.5021, + "step": 100090 + }, + { + "epoch": 4.23, + "learning_rate": 0.00037768873624626344, + "loss": 0.5499, + "step": 100100 + }, + { + "epoch": 4.23, + "learning_rate": 0.0003776463355169709, + "loss": 0.4566, + "step": 100110 + }, + { + "epoch": 4.23, + "learning_rate": 0.00037760393478767837, + "loss": 0.4493, + "step": 100120 + }, + { + "epoch": 4.23, + "learning_rate": 0.00037756153405838584, + "loss": 0.4751, + "step": 100130 + }, + { + "epoch": 4.23, + "learning_rate": 0.00037751913332909325, + "loss": 0.5277, + "step": 100140 + }, + { + "epoch": 4.24, + "learning_rate": 0.0003774767325998007, + "loss": 0.5718, + "step": 100150 + }, + { + "epoch": 4.24, + "learning_rate": 0.0003774343318705082, + "loss": 0.4864, + "step": 100160 + }, + { + "epoch": 4.24, + "learning_rate": 0.00037739193114121565, + "loss": 0.4527, + "step": 100170 + }, + { + "epoch": 4.24, + "learning_rate": 0.00037734953041192306, + "loss": 0.4825, + "step": 100180 + }, + { + "epoch": 4.24, + "learning_rate": 0.0003773071296826306, + "loss": 0.3646, + "step": 100190 + }, + { + "epoch": 4.24, + "learning_rate": 0.000377264728953338, + "loss": 0.4667, + "step": 100200 + }, + { + "epoch": 4.24, + "learning_rate": 0.00037722232822404546, + "loss": 0.4921, + "step": 100210 + }, + { + "epoch": 4.24, + "learning_rate": 0.00037717992749475293, + "loss": 0.4794, + "step": 100220 + }, + { + "epoch": 4.24, + "learning_rate": 0.0003771375267654604, + "loss": 0.5227, + "step": 100230 + }, + { + "epoch": 4.24, + "learning_rate": 0.0003770951260361678, + "loss": 0.503, + "step": 100240 + }, + { + "epoch": 4.24, + "learning_rate": 0.0003770527253068753, + "loss": 0.4732, + "step": 100250 + }, + { + "epoch": 4.24, + "learning_rate": 0.00037701032457758274, + "loss": 0.4712, + "step": 100260 + }, + { + "epoch": 4.24, + "learning_rate": 0.0003769679238482902, + "loss": 0.4727, + "step": 100270 + }, + { + "epoch": 4.24, + "learning_rate": 0.0003769255231189976, + "loss": 0.3906, + "step": 100280 + }, + { + "epoch": 4.24, + "learning_rate": 0.00037688312238970514, + "loss": 0.5432, + "step": 100290 + }, + { + "epoch": 4.24, + "learning_rate": 0.00037684072166041256, + "loss": 0.464, + "step": 100300 + }, + { + "epoch": 4.24, + "learning_rate": 0.00037679832093112, + "loss": 0.4892, + "step": 100310 + }, + { + "epoch": 4.24, + "learning_rate": 0.0003767559202018275, + "loss": 0.4791, + "step": 100320 + }, + { + "epoch": 4.24, + "learning_rate": 0.00037671351947253496, + "loss": 0.4881, + "step": 100330 + }, + { + "epoch": 4.24, + "learning_rate": 0.00037667111874324237, + "loss": 0.4679, + "step": 100340 + }, + { + "epoch": 4.24, + "learning_rate": 0.0003766287180139499, + "loss": 0.4985, + "step": 100350 + }, + { + "epoch": 4.24, + "learning_rate": 0.0003765863172846573, + "loss": 0.4248, + "step": 100360 + }, + { + "epoch": 4.24, + "learning_rate": 0.00037654391655536477, + "loss": 0.5163, + "step": 100370 + }, + { + "epoch": 4.24, + "learning_rate": 0.00037650151582607224, + "loss": 0.4325, + "step": 100380 + }, + { + "epoch": 4.25, + "learning_rate": 0.0003764591150967797, + "loss": 0.411, + "step": 100390 + }, + { + "epoch": 4.25, + "learning_rate": 0.0003764167143674871, + "loss": 0.4943, + "step": 100400 + }, + { + "epoch": 4.25, + "learning_rate": 0.0003763743136381946, + "loss": 0.4491, + "step": 100410 + }, + { + "epoch": 4.25, + "learning_rate": 0.00037633191290890205, + "loss": 0.4697, + "step": 100420 + }, + { + "epoch": 4.25, + "learning_rate": 0.0003762895121796095, + "loss": 0.5559, + "step": 100430 + }, + { + "epoch": 4.25, + "learning_rate": 0.00037624711145031693, + "loss": 0.4303, + "step": 100440 + }, + { + "epoch": 4.25, + "learning_rate": 0.00037620471072102445, + "loss": 0.6466, + "step": 100450 + }, + { + "epoch": 4.25, + "learning_rate": 0.00037616230999173186, + "loss": 0.4771, + "step": 100460 + }, + { + "epoch": 4.25, + "learning_rate": 0.00037611990926243933, + "loss": 0.4422, + "step": 100470 + }, + { + "epoch": 4.25, + "learning_rate": 0.0003760775085331468, + "loss": 0.5213, + "step": 100480 + }, + { + "epoch": 4.25, + "learning_rate": 0.00037603510780385426, + "loss": 0.4292, + "step": 100490 + }, + { + "epoch": 4.25, + "learning_rate": 0.0003759927070745617, + "loss": 0.5017, + "step": 100500 + }, + { + "epoch": 4.25, + "learning_rate": 0.0003759503063452692, + "loss": 0.4536, + "step": 100510 + }, + { + "epoch": 4.25, + "learning_rate": 0.0003759079056159766, + "loss": 0.5045, + "step": 100520 + }, + { + "epoch": 4.25, + "learning_rate": 0.0003758655048866841, + "loss": 0.5174, + "step": 100530 + }, + { + "epoch": 4.25, + "learning_rate": 0.0003758231041573915, + "loss": 0.5312, + "step": 100540 + }, + { + "epoch": 4.25, + "learning_rate": 0.000375780703428099, + "loss": 0.4634, + "step": 100550 + }, + { + "epoch": 4.25, + "learning_rate": 0.0003757383026988064, + "loss": 0.5226, + "step": 100560 + }, + { + "epoch": 4.25, + "learning_rate": 0.0003756959019695139, + "loss": 0.4747, + "step": 100570 + }, + { + "epoch": 4.25, + "learning_rate": 0.00037565350124022136, + "loss": 0.469, + "step": 100580 + }, + { + "epoch": 4.25, + "learning_rate": 0.0003756111005109288, + "loss": 0.4342, + "step": 100590 + }, + { + "epoch": 4.25, + "learning_rate": 0.00037556869978163624, + "loss": 0.4665, + "step": 100600 + }, + { + "epoch": 4.25, + "learning_rate": 0.00037552629905234376, + "loss": 0.4794, + "step": 100610 + }, + { + "epoch": 4.26, + "learning_rate": 0.00037548389832305117, + "loss": 0.4671, + "step": 100620 + }, + { + "epoch": 4.26, + "learning_rate": 0.00037544149759375864, + "loss": 0.4681, + "step": 100630 + }, + { + "epoch": 4.26, + "learning_rate": 0.0003753990968644661, + "loss": 0.5686, + "step": 100640 + }, + { + "epoch": 4.26, + "learning_rate": 0.00037535669613517357, + "loss": 0.4833, + "step": 100650 + }, + { + "epoch": 4.26, + "learning_rate": 0.000375314295405881, + "loss": 0.4997, + "step": 100660 + }, + { + "epoch": 4.26, + "learning_rate": 0.00037527189467658845, + "loss": 0.4928, + "step": 100670 + }, + { + "epoch": 4.26, + "learning_rate": 0.0003752294939472959, + "loss": 0.4515, + "step": 100680 + }, + { + "epoch": 4.26, + "learning_rate": 0.0003751870932180034, + "loss": 0.4354, + "step": 100690 + }, + { + "epoch": 4.26, + "learning_rate": 0.0003751446924887108, + "loss": 0.4629, + "step": 100700 + }, + { + "epoch": 4.26, + "learning_rate": 0.0003751022917594183, + "loss": 0.6308, + "step": 100710 + }, + { + "epoch": 4.26, + "learning_rate": 0.00037505989103012573, + "loss": 0.5108, + "step": 100720 + }, + { + "epoch": 4.26, + "learning_rate": 0.0003750174903008332, + "loss": 0.5576, + "step": 100730 + }, + { + "epoch": 4.26, + "learning_rate": 0.00037497508957154066, + "loss": 0.457, + "step": 100740 + }, + { + "epoch": 4.26, + "learning_rate": 0.00037493268884224813, + "loss": 0.4403, + "step": 100750 + }, + { + "epoch": 4.26, + "learning_rate": 0.00037489028811295554, + "loss": 0.5121, + "step": 100760 + }, + { + "epoch": 4.26, + "learning_rate": 0.000374847887383663, + "loss": 0.4065, + "step": 100770 + }, + { + "epoch": 4.26, + "learning_rate": 0.0003748054866543705, + "loss": 0.4476, + "step": 100780 + }, + { + "epoch": 4.26, + "learning_rate": 0.00037476308592507794, + "loss": 0.5822, + "step": 100790 + }, + { + "epoch": 4.26, + "learning_rate": 0.0003747206851957854, + "loss": 0.544, + "step": 100800 + }, + { + "epoch": 4.26, + "learning_rate": 0.0003746782844664928, + "loss": 0.54, + "step": 100810 + }, + { + "epoch": 4.26, + "learning_rate": 0.0003746358837372003, + "loss": 0.4811, + "step": 100820 + }, + { + "epoch": 4.26, + "learning_rate": 0.00037459348300790775, + "loss": 0.4448, + "step": 100830 + }, + { + "epoch": 4.26, + "learning_rate": 0.0003745510822786152, + "loss": 0.5341, + "step": 100840 + }, + { + "epoch": 4.26, + "learning_rate": 0.00037450868154932263, + "loss": 0.5665, + "step": 100850 + }, + { + "epoch": 4.27, + "learning_rate": 0.0003744662808200301, + "loss": 0.4939, + "step": 100860 + }, + { + "epoch": 4.27, + "learning_rate": 0.00037442388009073757, + "loss": 0.4745, + "step": 100870 + }, + { + "epoch": 4.27, + "learning_rate": 0.00037438147936144503, + "loss": 0.4471, + "step": 100880 + }, + { + "epoch": 4.27, + "learning_rate": 0.00037433907863215245, + "loss": 0.5303, + "step": 100890 + }, + { + "epoch": 4.27, + "learning_rate": 0.00037429667790285997, + "loss": 0.4711, + "step": 100900 + }, + { + "epoch": 4.27, + "learning_rate": 0.0003742542771735674, + "loss": 0.4201, + "step": 100910 + }, + { + "epoch": 4.27, + "learning_rate": 0.00037421187644427485, + "loss": 0.4695, + "step": 100920 + }, + { + "epoch": 4.27, + "learning_rate": 0.0003741694757149823, + "loss": 0.4731, + "step": 100930 + }, + { + "epoch": 4.27, + "learning_rate": 0.0003741270749856898, + "loss": 0.4791, + "step": 100940 + }, + { + "epoch": 4.27, + "learning_rate": 0.0003740846742563972, + "loss": 0.4711, + "step": 100950 + }, + { + "epoch": 4.27, + "learning_rate": 0.0003740422735271047, + "loss": 0.4971, + "step": 100960 + }, + { + "epoch": 4.27, + "learning_rate": 0.00037399987279781213, + "loss": 0.4488, + "step": 100970 + }, + { + "epoch": 4.27, + "learning_rate": 0.0003739574720685196, + "loss": 0.449, + "step": 100980 + }, + { + "epoch": 4.27, + "learning_rate": 0.00037391507133922706, + "loss": 0.5899, + "step": 100990 + }, + { + "epoch": 4.27, + "learning_rate": 0.00037387267060993453, + "loss": 0.4882, + "step": 101000 + }, + { + "epoch": 4.27, + "learning_rate": 0.00037383026988064194, + "loss": 0.5395, + "step": 101010 + }, + { + "epoch": 4.27, + "learning_rate": 0.0003737878691513494, + "loss": 0.618, + "step": 101020 + }, + { + "epoch": 4.27, + "learning_rate": 0.0003737454684220569, + "loss": 0.4357, + "step": 101030 + }, + { + "epoch": 4.27, + "learning_rate": 0.00037370306769276434, + "loss": 0.4591, + "step": 101040 + }, + { + "epoch": 4.27, + "learning_rate": 0.00037366066696347175, + "loss": 0.4628, + "step": 101050 + }, + { + "epoch": 4.27, + "learning_rate": 0.0003736182662341793, + "loss": 0.4982, + "step": 101060 + }, + { + "epoch": 4.27, + "learning_rate": 0.0003735758655048867, + "loss": 0.4522, + "step": 101070 + }, + { + "epoch": 4.27, + "learning_rate": 0.00037353346477559415, + "loss": 0.456, + "step": 101080 + }, + { + "epoch": 4.27, + "learning_rate": 0.0003734910640463016, + "loss": 0.4378, + "step": 101090 + }, + { + "epoch": 4.28, + "learning_rate": 0.0003734486633170091, + "loss": 0.4571, + "step": 101100 + }, + { + "epoch": 4.28, + "learning_rate": 0.0003734062625877165, + "loss": 0.5577, + "step": 101110 + }, + { + "epoch": 4.28, + "learning_rate": 0.000373363861858424, + "loss": 0.4907, + "step": 101120 + }, + { + "epoch": 4.28, + "learning_rate": 0.00037332146112913143, + "loss": 0.4421, + "step": 101130 + }, + { + "epoch": 4.28, + "learning_rate": 0.0003732790603998389, + "loss": 0.4506, + "step": 101140 + }, + { + "epoch": 4.28, + "learning_rate": 0.0003732366596705463, + "loss": 0.4444, + "step": 101150 + }, + { + "epoch": 4.28, + "learning_rate": 0.00037319425894125383, + "loss": 0.4696, + "step": 101160 + }, + { + "epoch": 4.28, + "learning_rate": 0.00037315185821196125, + "loss": 0.4072, + "step": 101170 + }, + { + "epoch": 4.28, + "learning_rate": 0.0003731094574826687, + "loss": 0.3991, + "step": 101180 + }, + { + "epoch": 4.28, + "learning_rate": 0.0003730670567533762, + "loss": 0.392, + "step": 101190 + }, + { + "epoch": 4.28, + "learning_rate": 0.00037302465602408365, + "loss": 0.4471, + "step": 101200 + }, + { + "epoch": 4.28, + "learning_rate": 0.00037298225529479106, + "loss": 0.5004, + "step": 101210 + }, + { + "epoch": 4.28, + "learning_rate": 0.0003729398545654986, + "loss": 0.4755, + "step": 101220 + }, + { + "epoch": 4.28, + "learning_rate": 0.000372897453836206, + "loss": 0.4983, + "step": 101230 + }, + { + "epoch": 4.28, + "learning_rate": 0.00037285505310691346, + "loss": 0.504, + "step": 101240 + }, + { + "epoch": 4.28, + "learning_rate": 0.0003728126523776209, + "loss": 0.5152, + "step": 101250 + }, + { + "epoch": 4.28, + "learning_rate": 0.0003727702516483284, + "loss": 0.5447, + "step": 101260 + }, + { + "epoch": 4.28, + "learning_rate": 0.0003727278509190358, + "loss": 0.4825, + "step": 101270 + }, + { + "epoch": 4.28, + "learning_rate": 0.0003726854501897433, + "loss": 0.5222, + "step": 101280 + }, + { + "epoch": 4.28, + "learning_rate": 0.00037264304946045074, + "loss": 0.4397, + "step": 101290 + }, + { + "epoch": 4.28, + "learning_rate": 0.0003726006487311582, + "loss": 0.5157, + "step": 101300 + }, + { + "epoch": 4.28, + "learning_rate": 0.0003725582480018656, + "loss": 0.4055, + "step": 101310 + }, + { + "epoch": 4.28, + "learning_rate": 0.00037251584727257314, + "loss": 0.4765, + "step": 101320 + }, + { + "epoch": 4.29, + "learning_rate": 0.00037247344654328055, + "loss": 0.4089, + "step": 101330 + }, + { + "epoch": 4.29, + "learning_rate": 0.000372431045813988, + "loss": 0.469, + "step": 101340 + }, + { + "epoch": 4.29, + "learning_rate": 0.0003723886450846955, + "loss": 0.539, + "step": 101350 + }, + { + "epoch": 4.29, + "learning_rate": 0.00037234624435540295, + "loss": 0.5122, + "step": 101360 + }, + { + "epoch": 4.29, + "learning_rate": 0.00037230384362611037, + "loss": 0.5208, + "step": 101370 + }, + { + "epoch": 4.29, + "learning_rate": 0.00037226144289681783, + "loss": 0.6185, + "step": 101380 + }, + { + "epoch": 4.29, + "learning_rate": 0.0003722190421675253, + "loss": 0.4555, + "step": 101390 + }, + { + "epoch": 4.29, + "learning_rate": 0.00037217664143823277, + "loss": 0.47, + "step": 101400 + }, + { + "epoch": 4.29, + "learning_rate": 0.00037213424070894023, + "loss": 0.4936, + "step": 101410 + }, + { + "epoch": 4.29, + "learning_rate": 0.00037209183997964764, + "loss": 0.5135, + "step": 101420 + }, + { + "epoch": 4.29, + "learning_rate": 0.0003720494392503551, + "loss": 0.462, + "step": 101430 + }, + { + "epoch": 4.29, + "learning_rate": 0.0003720070385210626, + "loss": 0.4825, + "step": 101440 + }, + { + "epoch": 4.29, + "learning_rate": 0.00037196463779177005, + "loss": 0.4943, + "step": 101450 + }, + { + "epoch": 4.29, + "learning_rate": 0.0003719222370624775, + "loss": 0.4534, + "step": 101460 + }, + { + "epoch": 4.29, + "learning_rate": 0.0003718798363331849, + "loss": 0.4931, + "step": 101470 + }, + { + "epoch": 4.29, + "learning_rate": 0.0003718374356038924, + "loss": 0.4604, + "step": 101480 + }, + { + "epoch": 4.29, + "learning_rate": 0.00037179503487459986, + "loss": 0.4067, + "step": 101490 + }, + { + "epoch": 4.29, + "learning_rate": 0.0003717526341453073, + "loss": 0.4985, + "step": 101500 + }, + { + "epoch": 4.29, + "learning_rate": 0.0003717102334160148, + "loss": 0.4664, + "step": 101510 + }, + { + "epoch": 4.29, + "learning_rate": 0.0003716678326867222, + "loss": 0.4352, + "step": 101520 + }, + { + "epoch": 4.29, + "learning_rate": 0.00037162543195742967, + "loss": 0.4618, + "step": 101530 + }, + { + "epoch": 4.29, + "learning_rate": 0.00037158303122813714, + "loss": 0.5526, + "step": 101540 + }, + { + "epoch": 4.29, + "learning_rate": 0.0003715406304988446, + "loss": 0.4533, + "step": 101550 + }, + { + "epoch": 4.29, + "learning_rate": 0.000371498229769552, + "loss": 0.5558, + "step": 101560 + }, + { + "epoch": 4.3, + "learning_rate": 0.00037145582904025954, + "loss": 0.4319, + "step": 101570 + }, + { + "epoch": 4.3, + "learning_rate": 0.00037141342831096695, + "loss": 0.5158, + "step": 101580 + }, + { + "epoch": 4.3, + "learning_rate": 0.0003713710275816744, + "loss": 0.6012, + "step": 101590 + }, + { + "epoch": 4.3, + "learning_rate": 0.0003713286268523819, + "loss": 0.5404, + "step": 101600 + }, + { + "epoch": 4.3, + "learning_rate": 0.00037128622612308935, + "loss": 0.4806, + "step": 101610 + }, + { + "epoch": 4.3, + "learning_rate": 0.00037124382539379676, + "loss": 0.4804, + "step": 101620 + }, + { + "epoch": 4.3, + "learning_rate": 0.00037120142466450423, + "loss": 0.4985, + "step": 101630 + }, + { + "epoch": 4.3, + "learning_rate": 0.0003711590239352117, + "loss": 0.4649, + "step": 101640 + }, + { + "epoch": 4.3, + "learning_rate": 0.00037111662320591916, + "loss": 0.4198, + "step": 101650 + }, + { + "epoch": 4.3, + "learning_rate": 0.0003710742224766266, + "loss": 0.5085, + "step": 101660 + }, + { + "epoch": 4.3, + "learning_rate": 0.0003710318217473341, + "loss": 0.4929, + "step": 101670 + }, + { + "epoch": 4.3, + "learning_rate": 0.0003709894210180415, + "loss": 0.5219, + "step": 101680 + }, + { + "epoch": 4.3, + "learning_rate": 0.000370947020288749, + "loss": 0.4805, + "step": 101690 + }, + { + "epoch": 4.3, + "learning_rate": 0.00037090461955945644, + "loss": 0.3792, + "step": 101700 + }, + { + "epoch": 4.3, + "learning_rate": 0.0003708622188301639, + "loss": 0.5078, + "step": 101710 + }, + { + "epoch": 4.3, + "learning_rate": 0.0003708198181008713, + "loss": 0.5032, + "step": 101720 + }, + { + "epoch": 4.3, + "learning_rate": 0.00037077741737157884, + "loss": 0.4606, + "step": 101730 + }, + { + "epoch": 4.3, + "learning_rate": 0.00037073501664228626, + "loss": 0.5181, + "step": 101740 + }, + { + "epoch": 4.3, + "learning_rate": 0.0003706926159129937, + "loss": 0.4921, + "step": 101750 + }, + { + "epoch": 4.3, + "learning_rate": 0.0003706502151837012, + "loss": 0.5124, + "step": 101760 + }, + { + "epoch": 4.3, + "learning_rate": 0.00037060781445440866, + "loss": 0.4922, + "step": 101770 + }, + { + "epoch": 4.3, + "learning_rate": 0.00037056541372511607, + "loss": 0.4976, + "step": 101780 + }, + { + "epoch": 4.3, + "learning_rate": 0.00037052301299582354, + "loss": 0.565, + "step": 101790 + }, + { + "epoch": 4.3, + "learning_rate": 0.000370480612266531, + "loss": 0.4461, + "step": 101800 + }, + { + "epoch": 4.31, + "learning_rate": 0.00037043821153723847, + "loss": 0.4763, + "step": 101810 + }, + { + "epoch": 4.31, + "learning_rate": 0.0003703958108079459, + "loss": 0.4917, + "step": 101820 + }, + { + "epoch": 4.31, + "learning_rate": 0.0003703534100786534, + "loss": 0.4658, + "step": 101830 + }, + { + "epoch": 4.31, + "learning_rate": 0.0003703110093493608, + "loss": 0.5279, + "step": 101840 + }, + { + "epoch": 4.31, + "learning_rate": 0.0003702686086200683, + "loss": 0.5185, + "step": 101850 + }, + { + "epoch": 4.31, + "learning_rate": 0.00037022620789077575, + "loss": 0.4976, + "step": 101860 + }, + { + "epoch": 4.31, + "learning_rate": 0.0003701838071614832, + "loss": 0.4343, + "step": 101870 + }, + { + "epoch": 4.31, + "learning_rate": 0.00037014140643219063, + "loss": 0.5151, + "step": 101880 + }, + { + "epoch": 4.31, + "learning_rate": 0.00037009900570289815, + "loss": 0.4488, + "step": 101890 + }, + { + "epoch": 4.31, + "learning_rate": 0.00037005660497360556, + "loss": 0.5107, + "step": 101900 + }, + { + "epoch": 4.31, + "learning_rate": 0.00037001420424431303, + "loss": 0.414, + "step": 101910 + }, + { + "epoch": 4.31, + "learning_rate": 0.00036997180351502044, + "loss": 0.4319, + "step": 101920 + }, + { + "epoch": 4.31, + "learning_rate": 0.00036992940278572796, + "loss": 0.5021, + "step": 101930 + }, + { + "epoch": 4.31, + "learning_rate": 0.0003698870020564354, + "loss": 0.4721, + "step": 101940 + }, + { + "epoch": 4.31, + "learning_rate": 0.00036984460132714284, + "loss": 0.4593, + "step": 101950 + }, + { + "epoch": 4.31, + "learning_rate": 0.0003698022005978503, + "loss": 0.6094, + "step": 101960 + }, + { + "epoch": 4.31, + "learning_rate": 0.0003697597998685578, + "loss": 0.5516, + "step": 101970 + }, + { + "epoch": 4.31, + "learning_rate": 0.0003697173991392652, + "loss": 0.5456, + "step": 101980 + }, + { + "epoch": 4.31, + "learning_rate": 0.0003696749984099727, + "loss": 0.5488, + "step": 101990 + }, + { + "epoch": 4.31, + "learning_rate": 0.0003696325976806801, + "loss": 0.5308, + "step": 102000 + }, + { + "epoch": 4.31, + "learning_rate": 0.0003695901969513876, + "loss": 0.4163, + "step": 102010 + }, + { + "epoch": 4.31, + "learning_rate": 0.00036954779622209506, + "loss": 0.5638, + "step": 102020 + }, + { + "epoch": 4.31, + "learning_rate": 0.0003695053954928025, + "loss": 0.5331, + "step": 102030 + }, + { + "epoch": 4.32, + "learning_rate": 0.00036946299476350994, + "loss": 0.4481, + "step": 102040 + }, + { + "epoch": 4.32, + "learning_rate": 0.0003694205940342174, + "loss": 0.4986, + "step": 102050 + }, + { + "epoch": 4.32, + "learning_rate": 0.00036937819330492487, + "loss": 0.4702, + "step": 102060 + }, + { + "epoch": 4.32, + "learning_rate": 0.00036933579257563234, + "loss": 0.4666, + "step": 102070 + }, + { + "epoch": 4.32, + "learning_rate": 0.00036929339184633975, + "loss": 0.5633, + "step": 102080 + }, + { + "epoch": 4.32, + "learning_rate": 0.0003692509911170472, + "loss": 0.4454, + "step": 102090 + }, + { + "epoch": 4.32, + "learning_rate": 0.0003692085903877547, + "loss": 0.5684, + "step": 102100 + }, + { + "epoch": 4.32, + "learning_rate": 0.00036916618965846215, + "loss": 0.5848, + "step": 102110 + }, + { + "epoch": 4.32, + "learning_rate": 0.0003691237889291696, + "loss": 0.5835, + "step": 102120 + }, + { + "epoch": 4.32, + "learning_rate": 0.00036908138819987703, + "loss": 0.4701, + "step": 102130 + }, + { + "epoch": 4.32, + "learning_rate": 0.0003690389874705845, + "loss": 0.4939, + "step": 102140 + }, + { + "epoch": 4.32, + "learning_rate": 0.00036899658674129196, + "loss": 0.5113, + "step": 102150 + }, + { + "epoch": 4.32, + "learning_rate": 0.00036895418601199943, + "loss": 0.489, + "step": 102160 + }, + { + "epoch": 4.32, + "learning_rate": 0.00036891178528270684, + "loss": 0.3875, + "step": 102170 + }, + { + "epoch": 4.32, + "learning_rate": 0.00036886938455341436, + "loss": 0.5317, + "step": 102180 + }, + { + "epoch": 4.32, + "learning_rate": 0.0003688269838241218, + "loss": 0.558, + "step": 102190 + }, + { + "epoch": 4.32, + "learning_rate": 0.00036878458309482924, + "loss": 0.4438, + "step": 102200 + }, + { + "epoch": 4.32, + "learning_rate": 0.0003687421823655367, + "loss": 0.486, + "step": 102210 + }, + { + "epoch": 4.32, + "learning_rate": 0.0003686997816362442, + "loss": 0.4561, + "step": 102220 + }, + { + "epoch": 4.32, + "learning_rate": 0.0003686573809069516, + "loss": 0.4441, + "step": 102230 + }, + { + "epoch": 4.32, + "learning_rate": 0.00036861498017765905, + "loss": 0.5718, + "step": 102240 + }, + { + "epoch": 4.32, + "learning_rate": 0.0003685725794483665, + "loss": 0.4413, + "step": 102250 + }, + { + "epoch": 4.32, + "learning_rate": 0.000368530178719074, + "loss": 0.5337, + "step": 102260 + }, + { + "epoch": 4.32, + "learning_rate": 0.0003684877779897814, + "loss": 0.5053, + "step": 102270 + }, + { + "epoch": 4.33, + "learning_rate": 0.0003684453772604889, + "loss": 0.523, + "step": 102280 + }, + { + "epoch": 4.33, + "learning_rate": 0.00036840297653119633, + "loss": 0.5179, + "step": 102290 + }, + { + "epoch": 4.33, + "learning_rate": 0.0003683605758019038, + "loss": 0.4743, + "step": 102300 + }, + { + "epoch": 4.33, + "learning_rate": 0.00036831817507261127, + "loss": 0.4615, + "step": 102310 + }, + { + "epoch": 4.33, + "learning_rate": 0.00036827577434331873, + "loss": 0.4905, + "step": 102320 + }, + { + "epoch": 4.33, + "learning_rate": 0.00036823337361402615, + "loss": 0.4638, + "step": 102330 + }, + { + "epoch": 4.33, + "learning_rate": 0.00036819097288473367, + "loss": 0.5252, + "step": 102340 + }, + { + "epoch": 4.33, + "learning_rate": 0.0003681485721554411, + "loss": 0.4362, + "step": 102350 + }, + { + "epoch": 4.33, + "learning_rate": 0.00036810617142614855, + "loss": 0.4802, + "step": 102360 + }, + { + "epoch": 4.33, + "learning_rate": 0.000368063770696856, + "loss": 0.5025, + "step": 102370 + }, + { + "epoch": 4.33, + "learning_rate": 0.0003680213699675635, + "loss": 0.4221, + "step": 102380 + }, + { + "epoch": 4.33, + "learning_rate": 0.0003679789692382709, + "loss": 0.5063, + "step": 102390 + }, + { + "epoch": 4.33, + "learning_rate": 0.00036793656850897836, + "loss": 0.6129, + "step": 102400 + }, + { + "epoch": 4.33, + "learning_rate": 0.0003678941677796858, + "loss": 0.4768, + "step": 102410 + }, + { + "epoch": 4.33, + "learning_rate": 0.0003678517670503933, + "loss": 0.482, + "step": 102420 + }, + { + "epoch": 4.33, + "learning_rate": 0.0003678093663211007, + "loss": 0.5466, + "step": 102430 + }, + { + "epoch": 4.33, + "learning_rate": 0.00036776696559180823, + "loss": 0.5793, + "step": 102440 + }, + { + "epoch": 4.33, + "learning_rate": 0.00036772456486251564, + "loss": 0.5644, + "step": 102450 + }, + { + "epoch": 4.33, + "learning_rate": 0.0003676821641332231, + "loss": 0.4425, + "step": 102460 + }, + { + "epoch": 4.33, + "learning_rate": 0.0003676397634039306, + "loss": 0.4284, + "step": 102470 + }, + { + "epoch": 4.33, + "learning_rate": 0.00036759736267463804, + "loss": 0.4455, + "step": 102480 + }, + { + "epoch": 4.33, + "learning_rate": 0.00036755496194534545, + "loss": 0.4762, + "step": 102490 + }, + { + "epoch": 4.33, + "learning_rate": 0.000367512561216053, + "loss": 0.5369, + "step": 102500 + }, + { + "epoch": 4.34, + "learning_rate": 0.0003674701604867604, + "loss": 0.5148, + "step": 102510 + }, + { + "epoch": 4.34, + "learning_rate": 0.00036742775975746785, + "loss": 0.4669, + "step": 102520 + }, + { + "epoch": 4.34, + "learning_rate": 0.0003673853590281753, + "loss": 0.5429, + "step": 102530 + }, + { + "epoch": 4.34, + "learning_rate": 0.0003673429582988828, + "loss": 0.5196, + "step": 102540 + }, + { + "epoch": 4.34, + "learning_rate": 0.0003673005575695902, + "loss": 0.4884, + "step": 102550 + }, + { + "epoch": 4.34, + "learning_rate": 0.00036725815684029767, + "loss": 0.4548, + "step": 102560 + }, + { + "epoch": 4.34, + "learning_rate": 0.00036721575611100513, + "loss": 0.4906, + "step": 102570 + }, + { + "epoch": 4.34, + "learning_rate": 0.0003671733553817126, + "loss": 0.4371, + "step": 102580 + }, + { + "epoch": 4.34, + "learning_rate": 0.00036713095465242, + "loss": 0.4545, + "step": 102590 + }, + { + "epoch": 4.34, + "learning_rate": 0.00036708855392312753, + "loss": 0.4472, + "step": 102600 + }, + { + "epoch": 4.34, + "learning_rate": 0.00036704615319383495, + "loss": 0.4258, + "step": 102610 + }, + { + "epoch": 4.34, + "learning_rate": 0.0003670037524645424, + "loss": 0.5266, + "step": 102620 + }, + { + "epoch": 4.34, + "learning_rate": 0.0003669613517352499, + "loss": 0.4329, + "step": 102630 + }, + { + "epoch": 4.34, + "learning_rate": 0.00036691895100595735, + "loss": 0.5059, + "step": 102640 + }, + { + "epoch": 4.34, + "learning_rate": 0.00036687655027666476, + "loss": 0.4846, + "step": 102650 + }, + { + "epoch": 4.34, + "learning_rate": 0.0003668341495473722, + "loss": 0.5789, + "step": 102660 + }, + { + "epoch": 4.34, + "learning_rate": 0.0003667917488180797, + "loss": 0.4559, + "step": 102670 + }, + { + "epoch": 4.34, + "learning_rate": 0.00036674934808878716, + "loss": 0.4843, + "step": 102680 + }, + { + "epoch": 4.34, + "learning_rate": 0.00036670694735949457, + "loss": 0.4956, + "step": 102690 + }, + { + "epoch": 4.34, + "learning_rate": 0.0003666645466302021, + "loss": 0.5291, + "step": 102700 + }, + { + "epoch": 4.34, + "learning_rate": 0.0003666221459009095, + "loss": 0.5475, + "step": 102710 + }, + { + "epoch": 4.34, + "learning_rate": 0.00036657974517161697, + "loss": 0.4643, + "step": 102720 + }, + { + "epoch": 4.34, + "learning_rate": 0.00036653734444232444, + "loss": 0.5001, + "step": 102730 + }, + { + "epoch": 4.34, + "learning_rate": 0.0003664949437130319, + "loss": 0.4551, + "step": 102740 + }, + { + "epoch": 4.35, + "learning_rate": 0.0003664525429837393, + "loss": 0.5982, + "step": 102750 + }, + { + "epoch": 4.35, + "learning_rate": 0.0003664101422544468, + "loss": 0.5116, + "step": 102760 + }, + { + "epoch": 4.35, + "learning_rate": 0.00036636774152515425, + "loss": 0.4814, + "step": 102770 + }, + { + "epoch": 4.35, + "learning_rate": 0.0003663253407958617, + "loss": 0.5746, + "step": 102780 + }, + { + "epoch": 4.35, + "learning_rate": 0.0003662829400665692, + "loss": 0.3834, + "step": 102790 + }, + { + "epoch": 4.35, + "learning_rate": 0.0003662405393372766, + "loss": 0.5123, + "step": 102800 + }, + { + "epoch": 4.35, + "learning_rate": 0.00036619813860798406, + "loss": 0.4858, + "step": 102810 + }, + { + "epoch": 4.35, + "learning_rate": 0.00036615573787869153, + "loss": 0.4419, + "step": 102820 + }, + { + "epoch": 4.35, + "learning_rate": 0.000366113337149399, + "loss": 0.4902, + "step": 102830 + }, + { + "epoch": 4.35, + "learning_rate": 0.0003660709364201064, + "loss": 0.4806, + "step": 102840 + }, + { + "epoch": 4.35, + "learning_rate": 0.0003660285356908139, + "loss": 0.494, + "step": 102850 + }, + { + "epoch": 4.35, + "learning_rate": 0.00036598613496152134, + "loss": 0.513, + "step": 102860 + }, + { + "epoch": 4.35, + "learning_rate": 0.0003659437342322288, + "loss": 0.5162, + "step": 102870 + }, + { + "epoch": 4.35, + "learning_rate": 0.0003659013335029362, + "loss": 0.4529, + "step": 102880 + }, + { + "epoch": 4.35, + "learning_rate": 0.00036585893277364374, + "loss": 0.531, + "step": 102890 + }, + { + "epoch": 4.35, + "learning_rate": 0.00036581653204435116, + "loss": 0.4299, + "step": 102900 + }, + { + "epoch": 4.35, + "learning_rate": 0.0003657741313150586, + "loss": 0.551, + "step": 102910 + }, + { + "epoch": 4.35, + "learning_rate": 0.0003657317305857661, + "loss": 0.4751, + "step": 102920 + }, + { + "epoch": 4.35, + "learning_rate": 0.00036568932985647356, + "loss": 0.4236, + "step": 102930 + }, + { + "epoch": 4.35, + "learning_rate": 0.00036564692912718097, + "loss": 0.5195, + "step": 102940 + }, + { + "epoch": 4.35, + "learning_rate": 0.0003656045283978885, + "loss": 0.501, + "step": 102950 + }, + { + "epoch": 4.35, + "learning_rate": 0.0003655621276685959, + "loss": 0.5366, + "step": 102960 + }, + { + "epoch": 4.35, + "learning_rate": 0.00036551972693930337, + "loss": 0.4711, + "step": 102970 + }, + { + "epoch": 4.35, + "learning_rate": 0.00036547732621001084, + "loss": 0.4757, + "step": 102980 + }, + { + "epoch": 4.36, + "learning_rate": 0.0003654349254807183, + "loss": 0.5217, + "step": 102990 + }, + { + "epoch": 4.36, + "learning_rate": 0.0003653925247514257, + "loss": 0.4649, + "step": 103000 + }, + { + "epoch": 4.36, + "learning_rate": 0.0003653501240221332, + "loss": 0.4923, + "step": 103010 + }, + { + "epoch": 4.36, + "learning_rate": 0.00036530772329284065, + "loss": 0.5585, + "step": 103020 + }, + { + "epoch": 4.36, + "learning_rate": 0.0003652653225635481, + "loss": 0.5184, + "step": 103030 + }, + { + "epoch": 4.36, + "learning_rate": 0.00036522292183425553, + "loss": 0.5643, + "step": 103040 + }, + { + "epoch": 4.36, + "learning_rate": 0.00036518052110496305, + "loss": 0.5209, + "step": 103050 + }, + { + "epoch": 4.36, + "learning_rate": 0.00036513812037567046, + "loss": 0.4463, + "step": 103060 + }, + { + "epoch": 4.36, + "learning_rate": 0.00036509571964637793, + "loss": 0.5097, + "step": 103070 + }, + { + "epoch": 4.36, + "learning_rate": 0.0003650533189170854, + "loss": 0.4961, + "step": 103080 + }, + { + "epoch": 4.36, + "learning_rate": 0.00036501091818779286, + "loss": 0.4846, + "step": 103090 + }, + { + "epoch": 4.36, + "learning_rate": 0.0003649685174585003, + "loss": 0.5334, + "step": 103100 + }, + { + "epoch": 4.36, + "learning_rate": 0.0003649261167292078, + "loss": 0.4753, + "step": 103110 + }, + { + "epoch": 4.36, + "learning_rate": 0.0003648837159999152, + "loss": 0.5223, + "step": 103120 + }, + { + "epoch": 4.36, + "learning_rate": 0.0003648413152706227, + "loss": 0.517, + "step": 103130 + }, + { + "epoch": 4.36, + "learning_rate": 0.00036479891454133014, + "loss": 0.5345, + "step": 103140 + }, + { + "epoch": 4.36, + "learning_rate": 0.0003647565138120376, + "loss": 0.5344, + "step": 103150 + }, + { + "epoch": 4.36, + "learning_rate": 0.000364714113082745, + "loss": 0.5162, + "step": 103160 + }, + { + "epoch": 4.36, + "learning_rate": 0.0003646717123534525, + "loss": 0.553, + "step": 103170 + }, + { + "epoch": 4.36, + "learning_rate": 0.00036462931162415996, + "loss": 0.5309, + "step": 103180 + }, + { + "epoch": 4.36, + "learning_rate": 0.0003645869108948674, + "loss": 0.5348, + "step": 103190 + }, + { + "epoch": 4.36, + "learning_rate": 0.00036454451016557484, + "loss": 0.5191, + "step": 103200 + }, + { + "epoch": 4.36, + "learning_rate": 0.00036450210943628236, + "loss": 0.4434, + "step": 103210 + }, + { + "epoch": 4.37, + "learning_rate": 0.00036445970870698977, + "loss": 0.5649, + "step": 103220 + }, + { + "epoch": 4.37, + "learning_rate": 0.00036441730797769724, + "loss": 0.4572, + "step": 103230 + }, + { + "epoch": 4.37, + "learning_rate": 0.0003643749072484047, + "loss": 0.4595, + "step": 103240 + }, + { + "epoch": 4.37, + "learning_rate": 0.00036433250651911217, + "loss": 0.4614, + "step": 103250 + }, + { + "epoch": 4.37, + "learning_rate": 0.0003642901057898196, + "loss": 0.5647, + "step": 103260 + }, + { + "epoch": 4.37, + "learning_rate": 0.0003642477050605271, + "loss": 0.465, + "step": 103270 + }, + { + "epoch": 4.37, + "learning_rate": 0.0003642053043312345, + "loss": 0.4129, + "step": 103280 + }, + { + "epoch": 4.37, + "learning_rate": 0.000364162903601942, + "loss": 0.4536, + "step": 103290 + }, + { + "epoch": 4.37, + "learning_rate": 0.0003641205028726494, + "loss": 0.5146, + "step": 103300 + }, + { + "epoch": 4.37, + "learning_rate": 0.0003640781021433569, + "loss": 0.5298, + "step": 103310 + }, + { + "epoch": 4.37, + "learning_rate": 0.00036403570141406433, + "loss": 0.4482, + "step": 103320 + }, + { + "epoch": 4.37, + "learning_rate": 0.0003639933006847718, + "loss": 0.5, + "step": 103330 + }, + { + "epoch": 4.37, + "learning_rate": 0.00036395089995547926, + "loss": 0.4675, + "step": 103340 + }, + { + "epoch": 4.37, + "learning_rate": 0.00036390849922618673, + "loss": 0.4835, + "step": 103350 + }, + { + "epoch": 4.37, + "learning_rate": 0.00036386609849689414, + "loss": 0.5281, + "step": 103360 + }, + { + "epoch": 4.37, + "learning_rate": 0.0003638236977676016, + "loss": 0.5208, + "step": 103370 + }, + { + "epoch": 4.37, + "learning_rate": 0.0003637812970383091, + "loss": 0.5056, + "step": 103380 + }, + { + "epoch": 4.37, + "learning_rate": 0.00036373889630901654, + "loss": 0.4169, + "step": 103390 + }, + { + "epoch": 4.37, + "learning_rate": 0.000363696495579724, + "loss": 0.4949, + "step": 103400 + }, + { + "epoch": 4.37, + "learning_rate": 0.0003636540948504314, + "loss": 0.4416, + "step": 103410 + }, + { + "epoch": 4.37, + "learning_rate": 0.0003636116941211389, + "loss": 0.5237, + "step": 103420 + }, + { + "epoch": 4.37, + "learning_rate": 0.00036356929339184636, + "loss": 0.5434, + "step": 103430 + }, + { + "epoch": 4.37, + "learning_rate": 0.0003635268926625538, + "loss": 0.4383, + "step": 103440 + }, + { + "epoch": 4.37, + "learning_rate": 0.0003634844919332613, + "loss": 0.574, + "step": 103450 + }, + { + "epoch": 4.38, + "learning_rate": 0.0003634420912039687, + "loss": 0.5021, + "step": 103460 + }, + { + "epoch": 4.38, + "learning_rate": 0.00036339969047467617, + "loss": 0.5078, + "step": 103470 + }, + { + "epoch": 4.38, + "learning_rate": 0.00036335728974538363, + "loss": 0.4568, + "step": 103480 + }, + { + "epoch": 4.38, + "learning_rate": 0.0003633148890160911, + "loss": 0.4943, + "step": 103490 + }, + { + "epoch": 4.38, + "learning_rate": 0.00036327248828679857, + "loss": 0.4891, + "step": 103500 + }, + { + "epoch": 4.38, + "learning_rate": 0.000363230087557506, + "loss": 0.4637, + "step": 103510 + }, + { + "epoch": 4.38, + "learning_rate": 0.00036318768682821345, + "loss": 0.4625, + "step": 103520 + }, + { + "epoch": 4.38, + "learning_rate": 0.0003631452860989209, + "loss": 0.542, + "step": 103530 + }, + { + "epoch": 4.38, + "learning_rate": 0.0003631028853696284, + "loss": 0.5357, + "step": 103540 + }, + { + "epoch": 4.38, + "learning_rate": 0.0003630604846403358, + "loss": 0.521, + "step": 103550 + }, + { + "epoch": 4.38, + "learning_rate": 0.0003630180839110433, + "loss": 0.4793, + "step": 103560 + }, + { + "epoch": 4.38, + "learning_rate": 0.00036297568318175073, + "loss": 0.4963, + "step": 103570 + }, + { + "epoch": 4.38, + "learning_rate": 0.0003629332824524582, + "loss": 0.4769, + "step": 103580 + }, + { + "epoch": 4.38, + "learning_rate": 0.00036289088172316566, + "loss": 0.4633, + "step": 103590 + }, + { + "epoch": 4.38, + "learning_rate": 0.00036284848099387313, + "loss": 0.5571, + "step": 103600 + }, + { + "epoch": 4.38, + "learning_rate": 0.00036280608026458054, + "loss": 0.4937, + "step": 103610 + }, + { + "epoch": 4.38, + "learning_rate": 0.000362763679535288, + "loss": 0.4919, + "step": 103620 + }, + { + "epoch": 4.38, + "learning_rate": 0.0003627212788059955, + "loss": 0.4584, + "step": 103630 + }, + { + "epoch": 4.38, + "learning_rate": 0.00036267887807670294, + "loss": 0.5242, + "step": 103640 + }, + { + "epoch": 4.38, + "learning_rate": 0.00036263647734741035, + "loss": 0.485, + "step": 103650 + }, + { + "epoch": 4.38, + "learning_rate": 0.0003625940766181179, + "loss": 0.3861, + "step": 103660 + }, + { + "epoch": 4.38, + "learning_rate": 0.0003625516758888253, + "loss": 0.5732, + "step": 103670 + }, + { + "epoch": 4.38, + "learning_rate": 0.00036250927515953275, + "loss": 0.4918, + "step": 103680 + }, + { + "epoch": 4.38, + "learning_rate": 0.0003624668744302402, + "loss": 0.5439, + "step": 103690 + }, + { + "epoch": 4.39, + "learning_rate": 0.0003624244737009477, + "loss": 0.5105, + "step": 103700 + }, + { + "epoch": 4.39, + "learning_rate": 0.0003623820729716551, + "loss": 0.5255, + "step": 103710 + }, + { + "epoch": 4.39, + "learning_rate": 0.0003623396722423626, + "loss": 0.4703, + "step": 103720 + }, + { + "epoch": 4.39, + "learning_rate": 0.00036229727151307003, + "loss": 0.539, + "step": 103730 + }, + { + "epoch": 4.39, + "learning_rate": 0.0003622548707837775, + "loss": 0.5117, + "step": 103740 + }, + { + "epoch": 4.39, + "learning_rate": 0.00036221247005448497, + "loss": 0.4562, + "step": 103750 + }, + { + "epoch": 4.39, + "learning_rate": 0.00036217006932519243, + "loss": 0.5883, + "step": 103760 + }, + { + "epoch": 4.39, + "learning_rate": 0.00036212766859589985, + "loss": 0.4963, + "step": 103770 + }, + { + "epoch": 4.39, + "learning_rate": 0.0003620852678666073, + "loss": 0.5133, + "step": 103780 + }, + { + "epoch": 4.39, + "learning_rate": 0.0003620428671373148, + "loss": 0.4832, + "step": 103790 + }, + { + "epoch": 4.39, + "learning_rate": 0.00036200046640802225, + "loss": 0.4865, + "step": 103800 + }, + { + "epoch": 4.39, + "learning_rate": 0.00036195806567872966, + "loss": 0.4634, + "step": 103810 + }, + { + "epoch": 4.39, + "learning_rate": 0.0003619156649494372, + "loss": 0.4517, + "step": 103820 + }, + { + "epoch": 4.39, + "learning_rate": 0.0003618732642201446, + "loss": 0.5837, + "step": 103830 + }, + { + "epoch": 4.39, + "learning_rate": 0.00036183086349085206, + "loss": 0.5501, + "step": 103840 + }, + { + "epoch": 4.39, + "learning_rate": 0.0003617884627615595, + "loss": 0.4845, + "step": 103850 + }, + { + "epoch": 4.39, + "learning_rate": 0.000361746062032267, + "loss": 0.4675, + "step": 103860 + }, + { + "epoch": 4.39, + "learning_rate": 0.0003617036613029744, + "loss": 0.4699, + "step": 103870 + }, + { + "epoch": 4.39, + "learning_rate": 0.0003616612605736819, + "loss": 0.4945, + "step": 103880 + }, + { + "epoch": 4.39, + "learning_rate": 0.00036161885984438934, + "loss": 0.5303, + "step": 103890 + }, + { + "epoch": 4.39, + "learning_rate": 0.0003615764591150968, + "loss": 0.4926, + "step": 103900 + }, + { + "epoch": 4.39, + "learning_rate": 0.0003615340583858043, + "loss": 0.4721, + "step": 103910 + }, + { + "epoch": 4.39, + "learning_rate": 0.00036149165765651174, + "loss": 0.4442, + "step": 103920 + }, + { + "epoch": 4.4, + "learning_rate": 0.00036144925692721915, + "loss": 0.5286, + "step": 103930 + }, + { + "epoch": 4.4, + "learning_rate": 0.0003614068561979266, + "loss": 0.5439, + "step": 103940 + }, + { + "epoch": 4.4, + "learning_rate": 0.0003613644554686341, + "loss": 0.5191, + "step": 103950 + }, + { + "epoch": 4.4, + "learning_rate": 0.00036132205473934155, + "loss": 0.5007, + "step": 103960 + }, + { + "epoch": 4.4, + "learning_rate": 0.00036127965401004897, + "loss": 0.5406, + "step": 103970 + }, + { + "epoch": 4.4, + "learning_rate": 0.0003612372532807565, + "loss": 0.5764, + "step": 103980 + }, + { + "epoch": 4.4, + "learning_rate": 0.0003611948525514639, + "loss": 0.5082, + "step": 103990 + }, + { + "epoch": 4.4, + "learning_rate": 0.00036115245182217137, + "loss": 0.5358, + "step": 104000 + }, + { + "epoch": 4.4, + "learning_rate": 0.00036111005109287883, + "loss": 0.474, + "step": 104010 + }, + { + "epoch": 4.4, + "learning_rate": 0.0003610676503635863, + "loss": 0.5554, + "step": 104020 + }, + { + "epoch": 4.4, + "learning_rate": 0.0003610252496342937, + "loss": 0.5797, + "step": 104030 + }, + { + "epoch": 4.4, + "learning_rate": 0.0003609828489050012, + "loss": 0.4829, + "step": 104040 + }, + { + "epoch": 4.4, + "learning_rate": 0.00036094044817570865, + "loss": 0.3736, + "step": 104050 + }, + { + "epoch": 4.4, + "learning_rate": 0.0003608980474464161, + "loss": 0.501, + "step": 104060 + }, + { + "epoch": 4.4, + "learning_rate": 0.0003608556467171235, + "loss": 0.5699, + "step": 104070 + }, + { + "epoch": 4.4, + "learning_rate": 0.000360813245987831, + "loss": 0.4828, + "step": 104080 + }, + { + "epoch": 4.4, + "learning_rate": 0.00036077084525853846, + "loss": 0.5193, + "step": 104090 + }, + { + "epoch": 4.4, + "learning_rate": 0.0003607284445292459, + "loss": 0.5074, + "step": 104100 + }, + { + "epoch": 4.4, + "learning_rate": 0.0003606860437999534, + "loss": 0.4394, + "step": 104110 + }, + { + "epoch": 4.4, + "learning_rate": 0.0003606436430706608, + "loss": 0.5012, + "step": 104120 + }, + { + "epoch": 4.4, + "learning_rate": 0.00036060124234136827, + "loss": 0.3702, + "step": 104130 + }, + { + "epoch": 4.4, + "learning_rate": 0.00036055884161207574, + "loss": 0.5003, + "step": 104140 + }, + { + "epoch": 4.4, + "learning_rate": 0.0003605164408827832, + "loss": 0.4448, + "step": 104150 + }, + { + "epoch": 4.4, + "learning_rate": 0.0003604740401534906, + "loss": 0.5133, + "step": 104160 + }, + { + "epoch": 4.41, + "learning_rate": 0.00036043163942419814, + "loss": 0.4981, + "step": 104170 + }, + { + "epoch": 4.41, + "learning_rate": 0.00036038923869490555, + "loss": 0.4623, + "step": 104180 + }, + { + "epoch": 4.41, + "learning_rate": 0.000360346837965613, + "loss": 0.5479, + "step": 104190 + }, + { + "epoch": 4.41, + "learning_rate": 0.0003603044372363205, + "loss": 0.6045, + "step": 104200 + }, + { + "epoch": 4.41, + "learning_rate": 0.00036026203650702795, + "loss": 0.423, + "step": 104210 + }, + { + "epoch": 4.41, + "learning_rate": 0.00036021963577773536, + "loss": 0.5066, + "step": 104220 + }, + { + "epoch": 4.41, + "learning_rate": 0.00036017723504844283, + "loss": 0.4476, + "step": 104230 + }, + { + "epoch": 4.41, + "learning_rate": 0.0003601348343191503, + "loss": 0.4595, + "step": 104240 + }, + { + "epoch": 4.41, + "learning_rate": 0.00036009243358985776, + "loss": 0.4719, + "step": 104250 + }, + { + "epoch": 4.41, + "learning_rate": 0.0003600500328605652, + "loss": 0.4919, + "step": 104260 + }, + { + "epoch": 4.41, + "learning_rate": 0.0003600076321312727, + "loss": 0.5467, + "step": 104270 + }, + { + "epoch": 4.41, + "learning_rate": 0.0003599652314019801, + "loss": 0.5131, + "step": 104280 + }, + { + "epoch": 4.41, + "learning_rate": 0.0003599228306726876, + "loss": 0.5379, + "step": 104290 + }, + { + "epoch": 4.41, + "learning_rate": 0.00035988042994339504, + "loss": 0.4731, + "step": 104300 + }, + { + "epoch": 4.41, + "learning_rate": 0.0003598380292141025, + "loss": 0.3841, + "step": 104310 + }, + { + "epoch": 4.41, + "learning_rate": 0.0003597956284848099, + "loss": 0.5442, + "step": 104320 + }, + { + "epoch": 4.41, + "learning_rate": 0.00035975322775551744, + "loss": 0.6518, + "step": 104330 + }, + { + "epoch": 4.41, + "learning_rate": 0.00035971082702622486, + "loss": 0.491, + "step": 104340 + }, + { + "epoch": 4.41, + "learning_rate": 0.0003596684262969323, + "loss": 0.4487, + "step": 104350 + }, + { + "epoch": 4.41, + "learning_rate": 0.0003596260255676398, + "loss": 0.598, + "step": 104360 + }, + { + "epoch": 4.41, + "learning_rate": 0.00035958362483834726, + "loss": 0.4188, + "step": 104370 + }, + { + "epoch": 4.41, + "learning_rate": 0.00035954122410905467, + "loss": 0.4783, + "step": 104380 + }, + { + "epoch": 4.41, + "learning_rate": 0.00035949882337976214, + "loss": 0.485, + "step": 104390 + }, + { + "epoch": 4.41, + "learning_rate": 0.0003594564226504696, + "loss": 0.4888, + "step": 104400 + }, + { + "epoch": 4.42, + "learning_rate": 0.00035941402192117707, + "loss": 0.5, + "step": 104410 + }, + { + "epoch": 4.42, + "learning_rate": 0.0003593716211918845, + "loss": 0.4957, + "step": 104420 + }, + { + "epoch": 4.42, + "learning_rate": 0.000359329220462592, + "loss": 0.4225, + "step": 104430 + }, + { + "epoch": 4.42, + "learning_rate": 0.0003592868197332994, + "loss": 0.5398, + "step": 104440 + }, + { + "epoch": 4.42, + "learning_rate": 0.0003592444190040069, + "loss": 0.4807, + "step": 104450 + }, + { + "epoch": 4.42, + "learning_rate": 0.00035920201827471435, + "loss": 0.4688, + "step": 104460 + }, + { + "epoch": 4.42, + "learning_rate": 0.0003591596175454218, + "loss": 0.4567, + "step": 104470 + }, + { + "epoch": 4.42, + "learning_rate": 0.00035911721681612923, + "loss": 0.5426, + "step": 104480 + }, + { + "epoch": 4.42, + "learning_rate": 0.00035907481608683675, + "loss": 0.4507, + "step": 104490 + }, + { + "epoch": 4.42, + "learning_rate": 0.00035903241535754416, + "loss": 0.477, + "step": 104500 + }, + { + "epoch": 4.42, + "learning_rate": 0.00035899001462825163, + "loss": 0.4739, + "step": 104510 + }, + { + "epoch": 4.42, + "learning_rate": 0.0003589476138989591, + "loss": 0.4973, + "step": 104520 + }, + { + "epoch": 4.42, + "learning_rate": 0.00035890521316966656, + "loss": 0.6004, + "step": 104530 + }, + { + "epoch": 4.42, + "learning_rate": 0.000358862812440374, + "loss": 0.4649, + "step": 104540 + }, + { + "epoch": 4.42, + "learning_rate": 0.00035882041171108144, + "loss": 0.4739, + "step": 104550 + }, + { + "epoch": 4.42, + "learning_rate": 0.0003587780109817889, + "loss": 0.5195, + "step": 104560 + }, + { + "epoch": 4.42, + "learning_rate": 0.0003587356102524964, + "loss": 0.4426, + "step": 104570 + }, + { + "epoch": 4.42, + "learning_rate": 0.0003586932095232038, + "loss": 0.563, + "step": 104580 + }, + { + "epoch": 4.42, + "learning_rate": 0.0003586508087939113, + "loss": 0.573, + "step": 104590 + }, + { + "epoch": 4.42, + "learning_rate": 0.0003586084080646187, + "loss": 0.5555, + "step": 104600 + }, + { + "epoch": 4.42, + "learning_rate": 0.0003585660073353262, + "loss": 0.4835, + "step": 104610 + }, + { + "epoch": 4.42, + "learning_rate": 0.00035852360660603366, + "loss": 0.4872, + "step": 104620 + }, + { + "epoch": 4.42, + "learning_rate": 0.0003584812058767411, + "loss": 0.4917, + "step": 104630 + }, + { + "epoch": 4.43, + "learning_rate": 0.00035843880514744854, + "loss": 0.5178, + "step": 104640 + }, + { + "epoch": 4.43, + "learning_rate": 0.000358396404418156, + "loss": 0.4519, + "step": 104650 + }, + { + "epoch": 4.43, + "learning_rate": 0.00035835400368886347, + "loss": 0.5319, + "step": 104660 + }, + { + "epoch": 4.43, + "learning_rate": 0.00035831160295957094, + "loss": 0.4505, + "step": 104670 + }, + { + "epoch": 4.43, + "learning_rate": 0.0003582692022302784, + "loss": 0.4873, + "step": 104680 + }, + { + "epoch": 4.43, + "learning_rate": 0.0003582268015009858, + "loss": 0.4098, + "step": 104690 + }, + { + "epoch": 4.43, + "learning_rate": 0.0003581844007716933, + "loss": 0.5111, + "step": 104700 + }, + { + "epoch": 4.43, + "learning_rate": 0.00035814200004240075, + "loss": 0.5149, + "step": 104710 + }, + { + "epoch": 4.43, + "learning_rate": 0.0003580995993131082, + "loss": 0.5276, + "step": 104720 + }, + { + "epoch": 4.43, + "learning_rate": 0.0003580571985838157, + "loss": 0.4428, + "step": 104730 + }, + { + "epoch": 4.43, + "learning_rate": 0.0003580147978545231, + "loss": 0.6269, + "step": 104740 + }, + { + "epoch": 4.43, + "learning_rate": 0.00035797239712523056, + "loss": 0.5209, + "step": 104750 + }, + { + "epoch": 4.43, + "learning_rate": 0.00035792999639593803, + "loss": 0.4949, + "step": 104760 + }, + { + "epoch": 4.43, + "learning_rate": 0.0003578875956666455, + "loss": 0.4982, + "step": 104770 + }, + { + "epoch": 4.43, + "learning_rate": 0.00035784519493735296, + "loss": 0.5091, + "step": 104780 + }, + { + "epoch": 4.43, + "learning_rate": 0.0003578027942080604, + "loss": 0.5044, + "step": 104790 + }, + { + "epoch": 4.43, + "learning_rate": 0.00035776039347876784, + "loss": 0.4891, + "step": 104800 + }, + { + "epoch": 4.43, + "learning_rate": 0.0003577179927494753, + "loss": 0.4099, + "step": 104810 + }, + { + "epoch": 4.43, + "learning_rate": 0.0003576755920201828, + "loss": 0.4263, + "step": 104820 + }, + { + "epoch": 4.43, + "learning_rate": 0.0003576331912908902, + "loss": 0.4671, + "step": 104830 + }, + { + "epoch": 4.43, + "learning_rate": 0.00035759079056159765, + "loss": 0.5133, + "step": 104840 + }, + { + "epoch": 4.43, + "learning_rate": 0.0003575483898323051, + "loss": 0.5197, + "step": 104850 + }, + { + "epoch": 4.43, + "learning_rate": 0.0003575059891030126, + "loss": 0.5085, + "step": 104860 + }, + { + "epoch": 4.43, + "learning_rate": 0.00035746358837372, + "loss": 0.4961, + "step": 104870 + }, + { + "epoch": 4.44, + "learning_rate": 0.0003574211876444275, + "loss": 0.4951, + "step": 104880 + }, + { + "epoch": 4.44, + "learning_rate": 0.00035737878691513493, + "loss": 0.5375, + "step": 104890 + }, + { + "epoch": 4.44, + "learning_rate": 0.0003573363861858424, + "loss": 0.498, + "step": 104900 + }, + { + "epoch": 4.44, + "learning_rate": 0.00035729398545654987, + "loss": 0.5091, + "step": 104910 + }, + { + "epoch": 4.44, + "learning_rate": 0.00035725158472725733, + "loss": 0.5227, + "step": 104920 + }, + { + "epoch": 4.44, + "learning_rate": 0.00035720918399796475, + "loss": 0.4665, + "step": 104930 + }, + { + "epoch": 4.44, + "learning_rate": 0.00035716678326867227, + "loss": 0.4684, + "step": 104940 + }, + { + "epoch": 4.44, + "learning_rate": 0.0003571243825393797, + "loss": 0.5529, + "step": 104950 + }, + { + "epoch": 4.44, + "learning_rate": 0.00035708198181008715, + "loss": 0.5686, + "step": 104960 + }, + { + "epoch": 4.44, + "learning_rate": 0.0003570395810807946, + "loss": 0.5361, + "step": 104970 + }, + { + "epoch": 4.44, + "learning_rate": 0.0003569971803515021, + "loss": 0.4467, + "step": 104980 + }, + { + "epoch": 4.44, + "learning_rate": 0.0003569547796222095, + "loss": 0.4745, + "step": 104990 + }, + { + "epoch": 4.44, + "learning_rate": 0.00035691237889291696, + "loss": 0.44, + "step": 105000 + }, + { + "epoch": 4.44, + "learning_rate": 0.00035686997816362443, + "loss": 0.4929, + "step": 105010 + }, + { + "epoch": 4.44, + "learning_rate": 0.0003568275774343319, + "loss": 0.5419, + "step": 105020 + }, + { + "epoch": 4.44, + "learning_rate": 0.0003567851767050393, + "loss": 0.4348, + "step": 105030 + }, + { + "epoch": 4.44, + "learning_rate": 0.00035674277597574683, + "loss": 0.618, + "step": 105040 + }, + { + "epoch": 4.44, + "learning_rate": 0.00035670037524645424, + "loss": 0.5684, + "step": 105050 + }, + { + "epoch": 4.44, + "learning_rate": 0.0003566579745171617, + "loss": 0.525, + "step": 105060 + }, + { + "epoch": 4.44, + "learning_rate": 0.0003566155737878692, + "loss": 0.5373, + "step": 105070 + }, + { + "epoch": 4.44, + "learning_rate": 0.00035657317305857664, + "loss": 0.5271, + "step": 105080 + }, + { + "epoch": 4.44, + "learning_rate": 0.00035653077232928405, + "loss": 0.5136, + "step": 105090 + }, + { + "epoch": 4.44, + "learning_rate": 0.0003564883715999916, + "loss": 0.444, + "step": 105100 + }, + { + "epoch": 4.44, + "learning_rate": 0.000356445970870699, + "loss": 0.5035, + "step": 105110 + }, + { + "epoch": 4.45, + "learning_rate": 0.00035640357014140645, + "loss": 0.4363, + "step": 105120 + }, + { + "epoch": 4.45, + "learning_rate": 0.0003563611694121139, + "loss": 0.5455, + "step": 105130 + }, + { + "epoch": 4.45, + "learning_rate": 0.0003563187686828214, + "loss": 0.576, + "step": 105140 + }, + { + "epoch": 4.45, + "learning_rate": 0.0003562763679535288, + "loss": 0.4509, + "step": 105150 + }, + { + "epoch": 4.45, + "learning_rate": 0.00035623396722423627, + "loss": 0.4989, + "step": 105160 + }, + { + "epoch": 4.45, + "learning_rate": 0.00035619156649494373, + "loss": 0.4125, + "step": 105170 + }, + { + "epoch": 4.45, + "learning_rate": 0.0003561491657656512, + "loss": 0.519, + "step": 105180 + }, + { + "epoch": 4.45, + "learning_rate": 0.0003561067650363586, + "loss": 0.484, + "step": 105190 + }, + { + "epoch": 4.45, + "learning_rate": 0.00035606436430706613, + "loss": 0.4996, + "step": 105200 + }, + { + "epoch": 4.45, + "learning_rate": 0.00035602196357777355, + "loss": 0.4741, + "step": 105210 + }, + { + "epoch": 4.45, + "learning_rate": 0.000355979562848481, + "loss": 0.4549, + "step": 105220 + }, + { + "epoch": 4.45, + "learning_rate": 0.0003559371621191885, + "loss": 0.4845, + "step": 105230 + }, + { + "epoch": 4.45, + "learning_rate": 0.00035589476138989595, + "loss": 0.4593, + "step": 105240 + }, + { + "epoch": 4.45, + "learning_rate": 0.00035585236066060336, + "loss": 0.5409, + "step": 105250 + }, + { + "epoch": 4.45, + "learning_rate": 0.0003558099599313109, + "loss": 0.4813, + "step": 105260 + }, + { + "epoch": 4.45, + "learning_rate": 0.0003557675592020183, + "loss": 0.4658, + "step": 105270 + }, + { + "epoch": 4.45, + "learning_rate": 0.00035572515847272576, + "loss": 0.4525, + "step": 105280 + }, + { + "epoch": 4.45, + "learning_rate": 0.0003556827577434332, + "loss": 0.4674, + "step": 105290 + }, + { + "epoch": 4.45, + "learning_rate": 0.0003556403570141407, + "loss": 0.4741, + "step": 105300 + }, + { + "epoch": 4.45, + "learning_rate": 0.0003555979562848481, + "loss": 0.5298, + "step": 105310 + }, + { + "epoch": 4.45, + "learning_rate": 0.00035555555555555557, + "loss": 0.4605, + "step": 105320 + }, + { + "epoch": 4.45, + "learning_rate": 0.00035551315482626304, + "loss": 0.5392, + "step": 105330 + }, + { + "epoch": 4.45, + "learning_rate": 0.0003554707540969705, + "loss": 0.51, + "step": 105340 + }, + { + "epoch": 4.46, + "learning_rate": 0.0003554283533676779, + "loss": 0.4546, + "step": 105350 + }, + { + "epoch": 4.46, + "learning_rate": 0.0003553859526383854, + "loss": 0.4861, + "step": 105360 + }, + { + "epoch": 4.46, + "learning_rate": 0.00035534355190909285, + "loss": 0.608, + "step": 105370 + }, + { + "epoch": 4.46, + "learning_rate": 0.0003553011511798003, + "loss": 0.5053, + "step": 105380 + }, + { + "epoch": 4.46, + "learning_rate": 0.0003552587504505078, + "loss": 0.4714, + "step": 105390 + }, + { + "epoch": 4.46, + "learning_rate": 0.0003552163497212152, + "loss": 0.504, + "step": 105400 + }, + { + "epoch": 4.46, + "learning_rate": 0.00035517394899192267, + "loss": 0.5187, + "step": 105410 + }, + { + "epoch": 4.46, + "learning_rate": 0.00035513154826263013, + "loss": 0.4688, + "step": 105420 + }, + { + "epoch": 4.46, + "learning_rate": 0.0003550891475333376, + "loss": 0.5242, + "step": 105430 + }, + { + "epoch": 4.46, + "learning_rate": 0.000355046746804045, + "loss": 0.3869, + "step": 105440 + }, + { + "epoch": 4.46, + "learning_rate": 0.0003550043460747525, + "loss": 0.4179, + "step": 105450 + }, + { + "epoch": 4.46, + "learning_rate": 0.00035496194534545995, + "loss": 0.4486, + "step": 105460 + }, + { + "epoch": 4.46, + "learning_rate": 0.0003549195446161674, + "loss": 0.4635, + "step": 105470 + }, + { + "epoch": 4.46, + "learning_rate": 0.0003548771438868749, + "loss": 0.5008, + "step": 105480 + }, + { + "epoch": 4.46, + "learning_rate": 0.00035483474315758235, + "loss": 0.4662, + "step": 105490 + }, + { + "epoch": 4.46, + "learning_rate": 0.00035479234242828976, + "loss": 0.5475, + "step": 105500 + }, + { + "epoch": 4.46, + "learning_rate": 0.0003547499416989972, + "loss": 0.5898, + "step": 105510 + }, + { + "epoch": 4.46, + "learning_rate": 0.0003547075409697047, + "loss": 0.513, + "step": 105520 + }, + { + "epoch": 4.46, + "learning_rate": 0.00035466514024041216, + "loss": 0.5331, + "step": 105530 + }, + { + "epoch": 4.46, + "learning_rate": 0.00035462273951111957, + "loss": 0.4987, + "step": 105540 + }, + { + "epoch": 4.46, + "learning_rate": 0.0003545803387818271, + "loss": 0.4708, + "step": 105550 + }, + { + "epoch": 4.46, + "learning_rate": 0.0003545379380525345, + "loss": 0.5129, + "step": 105560 + }, + { + "epoch": 4.46, + "learning_rate": 0.00035449553732324197, + "loss": 0.501, + "step": 105570 + }, + { + "epoch": 4.46, + "learning_rate": 0.00035445313659394944, + "loss": 0.5049, + "step": 105580 + }, + { + "epoch": 4.47, + "learning_rate": 0.0003544107358646569, + "loss": 0.5147, + "step": 105590 + }, + { + "epoch": 4.47, + "learning_rate": 0.0003543683351353643, + "loss": 0.3935, + "step": 105600 + }, + { + "epoch": 4.47, + "learning_rate": 0.0003543259344060718, + "loss": 0.5518, + "step": 105610 + }, + { + "epoch": 4.47, + "learning_rate": 0.00035428353367677925, + "loss": 0.5809, + "step": 105620 + }, + { + "epoch": 4.47, + "learning_rate": 0.0003542411329474867, + "loss": 0.4651, + "step": 105630 + }, + { + "epoch": 4.47, + "learning_rate": 0.00035419873221819413, + "loss": 0.4918, + "step": 105640 + }, + { + "epoch": 4.47, + "learning_rate": 0.00035415633148890165, + "loss": 0.5046, + "step": 105650 + }, + { + "epoch": 4.47, + "learning_rate": 0.00035411393075960906, + "loss": 0.4106, + "step": 105660 + }, + { + "epoch": 4.47, + "learning_rate": 0.00035407153003031653, + "loss": 0.5549, + "step": 105670 + }, + { + "epoch": 4.47, + "learning_rate": 0.000354029129301024, + "loss": 0.4425, + "step": 105680 + }, + { + "epoch": 4.47, + "learning_rate": 0.00035398672857173146, + "loss": 0.5426, + "step": 105690 + }, + { + "epoch": 4.47, + "learning_rate": 0.0003539443278424389, + "loss": 0.5294, + "step": 105700 + }, + { + "epoch": 4.47, + "learning_rate": 0.0003539019271131464, + "loss": 0.48, + "step": 105710 + }, + { + "epoch": 4.47, + "learning_rate": 0.0003538595263838538, + "loss": 0.5395, + "step": 105720 + }, + { + "epoch": 4.47, + "learning_rate": 0.0003538171256545613, + "loss": 0.4375, + "step": 105730 + }, + { + "epoch": 4.47, + "learning_rate": 0.00035377472492526874, + "loss": 0.5187, + "step": 105740 + }, + { + "epoch": 4.47, + "learning_rate": 0.0003537323241959762, + "loss": 0.4333, + "step": 105750 + }, + { + "epoch": 4.47, + "learning_rate": 0.0003536899234666836, + "loss": 0.4588, + "step": 105760 + }, + { + "epoch": 4.47, + "learning_rate": 0.0003536475227373911, + "loss": 0.4458, + "step": 105770 + }, + { + "epoch": 4.47, + "learning_rate": 0.00035360512200809856, + "loss": 0.4292, + "step": 105780 + }, + { + "epoch": 4.47, + "learning_rate": 0.000353562721278806, + "loss": 0.5041, + "step": 105790 + }, + { + "epoch": 4.47, + "learning_rate": 0.00035352032054951344, + "loss": 0.5579, + "step": 105800 + }, + { + "epoch": 4.47, + "learning_rate": 0.00035347791982022096, + "loss": 0.4804, + "step": 105810 + }, + { + "epoch": 4.47, + "learning_rate": 0.00035343551909092837, + "loss": 0.468, + "step": 105820 + }, + { + "epoch": 4.48, + "learning_rate": 0.00035339311836163584, + "loss": 0.4845, + "step": 105830 + }, + { + "epoch": 4.48, + "learning_rate": 0.0003533507176323433, + "loss": 0.5108, + "step": 105840 + }, + { + "epoch": 4.48, + "learning_rate": 0.00035330831690305077, + "loss": 0.5436, + "step": 105850 + }, + { + "epoch": 4.48, + "learning_rate": 0.0003532659161737582, + "loss": 0.5103, + "step": 105860 + }, + { + "epoch": 4.48, + "learning_rate": 0.0003532235154444657, + "loss": 0.528, + "step": 105870 + }, + { + "epoch": 4.48, + "learning_rate": 0.0003531811147151731, + "loss": 0.535, + "step": 105880 + }, + { + "epoch": 4.48, + "learning_rate": 0.0003531387139858806, + "loss": 0.5673, + "step": 105890 + }, + { + "epoch": 4.48, + "learning_rate": 0.00035309631325658805, + "loss": 0.4695, + "step": 105900 + }, + { + "epoch": 4.48, + "learning_rate": 0.0003530539125272955, + "loss": 0.5182, + "step": 105910 + }, + { + "epoch": 4.48, + "learning_rate": 0.00035301151179800293, + "loss": 0.509, + "step": 105920 + }, + { + "epoch": 4.48, + "learning_rate": 0.0003529691110687104, + "loss": 0.4662, + "step": 105930 + }, + { + "epoch": 4.48, + "learning_rate": 0.00035292671033941786, + "loss": 0.5813, + "step": 105940 + }, + { + "epoch": 4.48, + "learning_rate": 0.00035288430961012533, + "loss": 0.6149, + "step": 105950 + }, + { + "epoch": 4.48, + "learning_rate": 0.00035284190888083274, + "loss": 0.49, + "step": 105960 + }, + { + "epoch": 4.48, + "learning_rate": 0.00035279950815154026, + "loss": 0.4402, + "step": 105970 + }, + { + "epoch": 4.48, + "learning_rate": 0.0003527571074222477, + "loss": 0.4106, + "step": 105980 + }, + { + "epoch": 4.48, + "learning_rate": 0.00035271470669295514, + "loss": 0.5169, + "step": 105990 + }, + { + "epoch": 4.48, + "learning_rate": 0.0003526723059636626, + "loss": 0.5891, + "step": 106000 + }, + { + "epoch": 4.48, + "learning_rate": 0.0003526299052343701, + "loss": 0.571, + "step": 106010 + }, + { + "epoch": 4.48, + "learning_rate": 0.0003525875045050775, + "loss": 0.5261, + "step": 106020 + }, + { + "epoch": 4.48, + "learning_rate": 0.00035254510377578496, + "loss": 0.513, + "step": 106030 + }, + { + "epoch": 4.48, + "learning_rate": 0.0003525027030464924, + "loss": 0.47, + "step": 106040 + }, + { + "epoch": 4.48, + "learning_rate": 0.0003524603023171999, + "loss": 0.4799, + "step": 106050 + }, + { + "epoch": 4.49, + "learning_rate": 0.00035241790158790736, + "loss": 0.5154, + "step": 106060 + }, + { + "epoch": 4.49, + "learning_rate": 0.00035237550085861477, + "loss": 0.4341, + "step": 106070 + }, + { + "epoch": 4.49, + "learning_rate": 0.00035233310012932224, + "loss": 0.5716, + "step": 106080 + }, + { + "epoch": 4.49, + "learning_rate": 0.0003522906994000297, + "loss": 0.5518, + "step": 106090 + }, + { + "epoch": 4.49, + "learning_rate": 0.00035224829867073717, + "loss": 0.4163, + "step": 106100 + }, + { + "epoch": 4.49, + "learning_rate": 0.0003522058979414446, + "loss": 0.4499, + "step": 106110 + }, + { + "epoch": 4.49, + "learning_rate": 0.00035216349721215205, + "loss": 0.5145, + "step": 106120 + }, + { + "epoch": 4.49, + "learning_rate": 0.0003521210964828595, + "loss": 0.5179, + "step": 106130 + }, + { + "epoch": 4.49, + "learning_rate": 0.000352078695753567, + "loss": 0.474, + "step": 106140 + }, + { + "epoch": 4.49, + "learning_rate": 0.0003520362950242744, + "loss": 0.4918, + "step": 106150 + }, + { + "epoch": 4.49, + "learning_rate": 0.0003519938942949819, + "loss": 0.5022, + "step": 106160 + }, + { + "epoch": 4.49, + "learning_rate": 0.00035195149356568933, + "loss": 0.5773, + "step": 106170 + }, + { + "epoch": 4.49, + "learning_rate": 0.0003519090928363968, + "loss": 0.5664, + "step": 106180 + }, + { + "epoch": 4.49, + "learning_rate": 0.00035186669210710426, + "loss": 0.4396, + "step": 106190 + }, + { + "epoch": 4.49, + "learning_rate": 0.00035182429137781173, + "loss": 0.4456, + "step": 106200 + }, + { + "epoch": 4.49, + "learning_rate": 0.00035178189064851914, + "loss": 0.4985, + "step": 106210 + }, + { + "epoch": 4.49, + "learning_rate": 0.0003517394899192266, + "loss": 0.4263, + "step": 106220 + }, + { + "epoch": 4.49, + "learning_rate": 0.0003516970891899341, + "loss": 0.413, + "step": 106230 + }, + { + "epoch": 4.49, + "learning_rate": 0.00035165468846064154, + "loss": 0.4729, + "step": 106240 + }, + { + "epoch": 4.49, + "learning_rate": 0.00035161228773134895, + "loss": 0.4792, + "step": 106250 + }, + { + "epoch": 4.49, + "learning_rate": 0.0003515698870020565, + "loss": 0.5171, + "step": 106260 + }, + { + "epoch": 4.49, + "learning_rate": 0.0003515274862727639, + "loss": 0.4817, + "step": 106270 + }, + { + "epoch": 4.49, + "learning_rate": 0.00035148508554347135, + "loss": 0.4817, + "step": 106280 + }, + { + "epoch": 4.49, + "learning_rate": 0.0003514426848141788, + "loss": 0.4149, + "step": 106290 + }, + { + "epoch": 4.5, + "learning_rate": 0.0003514002840848863, + "loss": 0.4842, + "step": 106300 + }, + { + "epoch": 4.5, + "learning_rate": 0.0003513578833555937, + "loss": 0.4949, + "step": 106310 + }, + { + "epoch": 4.5, + "learning_rate": 0.0003513154826263012, + "loss": 0.567, + "step": 106320 + }, + { + "epoch": 4.5, + "learning_rate": 0.00035127308189700863, + "loss": 0.4502, + "step": 106330 + }, + { + "epoch": 4.5, + "learning_rate": 0.0003512306811677161, + "loss": 0.5403, + "step": 106340 + }, + { + "epoch": 4.5, + "learning_rate": 0.00035118828043842357, + "loss": 0.472, + "step": 106350 + }, + { + "epoch": 4.5, + "learning_rate": 0.00035114587970913103, + "loss": 0.4921, + "step": 106360 + }, + { + "epoch": 4.5, + "learning_rate": 0.00035110347897983845, + "loss": 0.5264, + "step": 106370 + }, + { + "epoch": 4.5, + "learning_rate": 0.0003510610782505459, + "loss": 0.4602, + "step": 106380 + }, + { + "epoch": 4.5, + "learning_rate": 0.0003510186775212534, + "loss": 0.571, + "step": 106390 + }, + { + "epoch": 4.5, + "learning_rate": 0.00035097627679196085, + "loss": 0.3894, + "step": 106400 + }, + { + "epoch": 4.5, + "learning_rate": 0.00035093387606266826, + "loss": 0.4524, + "step": 106410 + }, + { + "epoch": 4.5, + "learning_rate": 0.0003508914753333758, + "loss": 0.4882, + "step": 106420 + }, + { + "epoch": 4.5, + "learning_rate": 0.0003508490746040832, + "loss": 0.4774, + "step": 106430 + }, + { + "epoch": 4.5, + "learning_rate": 0.00035080667387479066, + "loss": 0.4718, + "step": 106440 + }, + { + "epoch": 4.5, + "learning_rate": 0.00035076427314549813, + "loss": 0.3744, + "step": 106450 + }, + { + "epoch": 4.5, + "learning_rate": 0.0003507218724162056, + "loss": 0.5668, + "step": 106460 + }, + { + "epoch": 4.5, + "learning_rate": 0.000350679471686913, + "loss": 0.6328, + "step": 106470 + }, + { + "epoch": 4.5, + "learning_rate": 0.00035063707095762053, + "loss": 0.4928, + "step": 106480 + }, + { + "epoch": 4.5, + "learning_rate": 0.00035059467022832794, + "loss": 0.5547, + "step": 106490 + }, + { + "epoch": 4.5, + "learning_rate": 0.0003505522694990354, + "loss": 0.4658, + "step": 106500 + }, + { + "epoch": 4.5, + "learning_rate": 0.0003505098687697429, + "loss": 0.5365, + "step": 106510 + }, + { + "epoch": 4.5, + "learning_rate": 0.00035046746804045034, + "loss": 0.5803, + "step": 106520 + }, + { + "epoch": 4.51, + "learning_rate": 0.00035042506731115775, + "loss": 0.4629, + "step": 106530 + }, + { + "epoch": 4.51, + "learning_rate": 0.0003503826665818652, + "loss": 0.5381, + "step": 106540 + }, + { + "epoch": 4.51, + "learning_rate": 0.0003503402658525727, + "loss": 0.5841, + "step": 106550 + }, + { + "epoch": 4.51, + "learning_rate": 0.00035029786512328015, + "loss": 0.4277, + "step": 106560 + }, + { + "epoch": 4.51, + "learning_rate": 0.00035025546439398757, + "loss": 0.4285, + "step": 106570 + }, + { + "epoch": 4.51, + "learning_rate": 0.0003502130636646951, + "loss": 0.529, + "step": 106580 + }, + { + "epoch": 4.51, + "learning_rate": 0.0003501706629354025, + "loss": 0.4909, + "step": 106590 + }, + { + "epoch": 4.51, + "learning_rate": 0.00035012826220610997, + "loss": 0.4684, + "step": 106600 + }, + { + "epoch": 4.51, + "learning_rate": 0.00035008586147681743, + "loss": 0.534, + "step": 106610 + }, + { + "epoch": 4.51, + "learning_rate": 0.0003500434607475249, + "loss": 0.4918, + "step": 106620 + }, + { + "epoch": 4.51, + "learning_rate": 0.0003500010600182323, + "loss": 0.4488, + "step": 106630 + }, + { + "epoch": 4.51, + "learning_rate": 0.0003499586592889398, + "loss": 0.4534, + "step": 106640 + }, + { + "epoch": 4.51, + "learning_rate": 0.00034991625855964725, + "loss": 0.5353, + "step": 106650 + }, + { + "epoch": 4.51, + "learning_rate": 0.0003498738578303547, + "loss": 0.5378, + "step": 106660 + }, + { + "epoch": 4.51, + "learning_rate": 0.0003498314571010622, + "loss": 0.5609, + "step": 106670 + }, + { + "epoch": 4.51, + "learning_rate": 0.0003497890563717696, + "loss": 0.4754, + "step": 106680 + }, + { + "epoch": 4.51, + "learning_rate": 0.00034974665564247706, + "loss": 0.5381, + "step": 106690 + }, + { + "epoch": 4.51, + "learning_rate": 0.0003497042549131845, + "loss": 0.4833, + "step": 106700 + }, + { + "epoch": 4.51, + "learning_rate": 0.000349661854183892, + "loss": 0.448, + "step": 106710 + }, + { + "epoch": 4.51, + "learning_rate": 0.00034961945345459946, + "loss": 0.4436, + "step": 106720 + }, + { + "epoch": 4.51, + "learning_rate": 0.00034957705272530687, + "loss": 0.4929, + "step": 106730 + }, + { + "epoch": 4.51, + "learning_rate": 0.00034953465199601434, + "loss": 0.6984, + "step": 106740 + }, + { + "epoch": 4.51, + "learning_rate": 0.0003494922512667218, + "loss": 0.4845, + "step": 106750 + }, + { + "epoch": 4.51, + "learning_rate": 0.00034944985053742927, + "loss": 0.4963, + "step": 106760 + }, + { + "epoch": 4.52, + "learning_rate": 0.00034940744980813674, + "loss": 0.4141, + "step": 106770 + }, + { + "epoch": 4.52, + "learning_rate": 0.00034936504907884415, + "loss": 0.4954, + "step": 106780 + }, + { + "epoch": 4.52, + "learning_rate": 0.0003493226483495516, + "loss": 0.5065, + "step": 106790 + }, + { + "epoch": 4.52, + "learning_rate": 0.0003492802476202591, + "loss": 0.5094, + "step": 106800 + }, + { + "epoch": 4.52, + "learning_rate": 0.00034923784689096655, + "loss": 0.4835, + "step": 106810 + }, + { + "epoch": 4.52, + "learning_rate": 0.00034919544616167396, + "loss": 0.5174, + "step": 106820 + }, + { + "epoch": 4.52, + "learning_rate": 0.00034915304543238143, + "loss": 0.4896, + "step": 106830 + }, + { + "epoch": 4.52, + "learning_rate": 0.0003491106447030889, + "loss": 0.4268, + "step": 106840 + }, + { + "epoch": 4.52, + "learning_rate": 0.00034906824397379637, + "loss": 0.4797, + "step": 106850 + }, + { + "epoch": 4.52, + "learning_rate": 0.0003490258432445038, + "loss": 0.4695, + "step": 106860 + }, + { + "epoch": 4.52, + "learning_rate": 0.0003489834425152113, + "loss": 0.5712, + "step": 106870 + }, + { + "epoch": 4.52, + "learning_rate": 0.0003489410417859187, + "loss": 0.49, + "step": 106880 + }, + { + "epoch": 4.52, + "learning_rate": 0.0003488986410566262, + "loss": 0.499, + "step": 106890 + }, + { + "epoch": 4.52, + "learning_rate": 0.00034885624032733364, + "loss": 0.5207, + "step": 106900 + }, + { + "epoch": 4.52, + "learning_rate": 0.0003488138395980411, + "loss": 0.503, + "step": 106910 + }, + { + "epoch": 4.52, + "learning_rate": 0.0003487714388687485, + "loss": 0.5505, + "step": 106920 + }, + { + "epoch": 4.52, + "learning_rate": 0.00034872903813945605, + "loss": 0.5236, + "step": 106930 + }, + { + "epoch": 4.52, + "learning_rate": 0.00034868663741016346, + "loss": 0.4945, + "step": 106940 + }, + { + "epoch": 4.52, + "learning_rate": 0.0003486442366808709, + "loss": 0.5301, + "step": 106950 + }, + { + "epoch": 4.52, + "learning_rate": 0.0003486018359515784, + "loss": 0.579, + "step": 106960 + }, + { + "epoch": 4.52, + "learning_rate": 0.00034855943522228586, + "loss": 0.6279, + "step": 106970 + }, + { + "epoch": 4.52, + "learning_rate": 0.00034851703449299327, + "loss": 0.4421, + "step": 106980 + }, + { + "epoch": 4.52, + "learning_rate": 0.00034847463376370074, + "loss": 0.5439, + "step": 106990 + }, + { + "epoch": 4.52, + "learning_rate": 0.0003484322330344082, + "loss": 0.5025, + "step": 107000 + }, + { + "epoch": 4.53, + "learning_rate": 0.00034838983230511567, + "loss": 0.5406, + "step": 107010 + }, + { + "epoch": 4.53, + "learning_rate": 0.0003483474315758231, + "loss": 0.4764, + "step": 107020 + }, + { + "epoch": 4.53, + "learning_rate": 0.0003483050308465306, + "loss": 0.4957, + "step": 107030 + }, + { + "epoch": 4.53, + "learning_rate": 0.000348262630117238, + "loss": 0.3927, + "step": 107040 + }, + { + "epoch": 4.53, + "learning_rate": 0.0003482202293879455, + "loss": 0.5063, + "step": 107050 + }, + { + "epoch": 4.53, + "learning_rate": 0.00034817782865865295, + "loss": 0.4234, + "step": 107060 + }, + { + "epoch": 4.53, + "learning_rate": 0.0003481354279293604, + "loss": 0.5742, + "step": 107070 + }, + { + "epoch": 4.53, + "learning_rate": 0.00034809302720006783, + "loss": 0.4469, + "step": 107080 + }, + { + "epoch": 4.53, + "learning_rate": 0.00034805062647077535, + "loss": 0.5323, + "step": 107090 + }, + { + "epoch": 4.53, + "learning_rate": 0.00034800822574148276, + "loss": 0.4703, + "step": 107100 + }, + { + "epoch": 4.53, + "learning_rate": 0.00034796582501219023, + "loss": 0.5174, + "step": 107110 + }, + { + "epoch": 4.53, + "learning_rate": 0.0003479234242828977, + "loss": 0.435, + "step": 107120 + }, + { + "epoch": 4.53, + "learning_rate": 0.00034788102355360516, + "loss": 0.5701, + "step": 107130 + }, + { + "epoch": 4.53, + "learning_rate": 0.0003478386228243126, + "loss": 0.5459, + "step": 107140 + }, + { + "epoch": 4.53, + "learning_rate": 0.00034779622209502004, + "loss": 0.4502, + "step": 107150 + }, + { + "epoch": 4.53, + "learning_rate": 0.0003477538213657275, + "loss": 0.4554, + "step": 107160 + }, + { + "epoch": 4.53, + "learning_rate": 0.000347711420636435, + "loss": 0.4884, + "step": 107170 + }, + { + "epoch": 4.53, + "learning_rate": 0.0003476690199071424, + "loss": 0.4633, + "step": 107180 + }, + { + "epoch": 4.53, + "learning_rate": 0.0003476266191778499, + "loss": 0.4079, + "step": 107190 + }, + { + "epoch": 4.53, + "learning_rate": 0.0003475842184485573, + "loss": 0.4479, + "step": 107200 + }, + { + "epoch": 4.53, + "learning_rate": 0.0003475418177192648, + "loss": 0.4533, + "step": 107210 + }, + { + "epoch": 4.53, + "learning_rate": 0.00034749941698997226, + "loss": 0.4297, + "step": 107220 + }, + { + "epoch": 4.53, + "learning_rate": 0.0003474570162606797, + "loss": 0.4894, + "step": 107230 + }, + { + "epoch": 4.54, + "learning_rate": 0.00034741461553138714, + "loss": 0.5471, + "step": 107240 + }, + { + "epoch": 4.54, + "learning_rate": 0.00034737221480209466, + "loss": 0.4712, + "step": 107250 + }, + { + "epoch": 4.54, + "learning_rate": 0.00034732981407280207, + "loss": 0.4601, + "step": 107260 + }, + { + "epoch": 4.54, + "learning_rate": 0.00034728741334350954, + "loss": 0.4511, + "step": 107270 + }, + { + "epoch": 4.54, + "learning_rate": 0.000347245012614217, + "loss": 0.5375, + "step": 107280 + }, + { + "epoch": 4.54, + "learning_rate": 0.00034720261188492447, + "loss": 0.5216, + "step": 107290 + }, + { + "epoch": 4.54, + "learning_rate": 0.0003471602111556319, + "loss": 0.433, + "step": 107300 + }, + { + "epoch": 4.54, + "learning_rate": 0.00034711781042633935, + "loss": 0.4578, + "step": 107310 + }, + { + "epoch": 4.54, + "learning_rate": 0.0003470754096970468, + "loss": 0.5175, + "step": 107320 + }, + { + "epoch": 4.54, + "learning_rate": 0.0003470330089677543, + "loss": 0.6046, + "step": 107330 + }, + { + "epoch": 4.54, + "learning_rate": 0.0003469906082384617, + "loss": 0.5059, + "step": 107340 + }, + { + "epoch": 4.54, + "learning_rate": 0.00034694820750916916, + "loss": 0.5152, + "step": 107350 + }, + { + "epoch": 4.54, + "learning_rate": 0.00034690580677987663, + "loss": 0.4585, + "step": 107360 + }, + { + "epoch": 4.54, + "learning_rate": 0.0003468634060505841, + "loss": 0.4774, + "step": 107370 + }, + { + "epoch": 4.54, + "learning_rate": 0.00034682100532129156, + "loss": 0.4959, + "step": 107380 + }, + { + "epoch": 4.54, + "learning_rate": 0.000346778604591999, + "loss": 0.5307, + "step": 107390 + }, + { + "epoch": 4.54, + "learning_rate": 0.00034673620386270644, + "loss": 0.5356, + "step": 107400 + }, + { + "epoch": 4.54, + "learning_rate": 0.0003466938031334139, + "loss": 0.5499, + "step": 107410 + }, + { + "epoch": 4.54, + "learning_rate": 0.0003466514024041214, + "loss": 0.5504, + "step": 107420 + }, + { + "epoch": 4.54, + "learning_rate": 0.0003466090016748288, + "loss": 0.4537, + "step": 107430 + }, + { + "epoch": 4.54, + "learning_rate": 0.0003465666009455363, + "loss": 0.4826, + "step": 107440 + }, + { + "epoch": 4.54, + "learning_rate": 0.0003465242002162437, + "loss": 0.5716, + "step": 107450 + }, + { + "epoch": 4.54, + "learning_rate": 0.0003464817994869512, + "loss": 0.4838, + "step": 107460 + }, + { + "epoch": 4.54, + "learning_rate": 0.00034643939875765866, + "loss": 0.4609, + "step": 107470 + }, + { + "epoch": 4.55, + "learning_rate": 0.0003463969980283661, + "loss": 0.4426, + "step": 107480 + }, + { + "epoch": 4.55, + "learning_rate": 0.00034635459729907353, + "loss": 0.5005, + "step": 107490 + }, + { + "epoch": 4.55, + "learning_rate": 0.000346312196569781, + "loss": 0.5682, + "step": 107500 + }, + { + "epoch": 4.55, + "learning_rate": 0.00034626979584048847, + "loss": 0.5103, + "step": 107510 + }, + { + "epoch": 4.55, + "learning_rate": 0.00034622739511119594, + "loss": 0.4696, + "step": 107520 + }, + { + "epoch": 4.55, + "learning_rate": 0.00034618499438190335, + "loss": 0.493, + "step": 107530 + }, + { + "epoch": 4.55, + "learning_rate": 0.00034614259365261087, + "loss": 0.4887, + "step": 107540 + }, + { + "epoch": 4.55, + "learning_rate": 0.0003461001929233183, + "loss": 0.5209, + "step": 107550 + }, + { + "epoch": 4.55, + "learning_rate": 0.00034605779219402575, + "loss": 0.4904, + "step": 107560 + }, + { + "epoch": 4.55, + "learning_rate": 0.0003460153914647332, + "loss": 0.3924, + "step": 107570 + }, + { + "epoch": 4.55, + "learning_rate": 0.0003459729907354407, + "loss": 0.5073, + "step": 107580 + }, + { + "epoch": 4.55, + "learning_rate": 0.0003459305900061481, + "loss": 0.5398, + "step": 107590 + }, + { + "epoch": 4.55, + "learning_rate": 0.00034588818927685556, + "loss": 0.555, + "step": 107600 + }, + { + "epoch": 4.55, + "learning_rate": 0.00034584578854756303, + "loss": 0.5702, + "step": 107610 + }, + { + "epoch": 4.55, + "learning_rate": 0.0003458033878182705, + "loss": 0.5026, + "step": 107620 + }, + { + "epoch": 4.55, + "learning_rate": 0.0003457609870889779, + "loss": 0.4956, + "step": 107630 + }, + { + "epoch": 4.55, + "learning_rate": 0.00034571858635968543, + "loss": 0.4388, + "step": 107640 + }, + { + "epoch": 4.55, + "learning_rate": 0.00034567618563039284, + "loss": 0.4928, + "step": 107650 + }, + { + "epoch": 4.55, + "learning_rate": 0.0003456337849011003, + "loss": 0.4737, + "step": 107660 + }, + { + "epoch": 4.55, + "learning_rate": 0.0003455913841718078, + "loss": 0.4968, + "step": 107670 + }, + { + "epoch": 4.55, + "learning_rate": 0.00034554898344251524, + "loss": 0.457, + "step": 107680 + }, + { + "epoch": 4.55, + "learning_rate": 0.00034550658271322265, + "loss": 0.4246, + "step": 107690 + }, + { + "epoch": 4.55, + "learning_rate": 0.0003454641819839302, + "loss": 0.4906, + "step": 107700 + }, + { + "epoch": 4.55, + "learning_rate": 0.0003454217812546376, + "loss": 0.5061, + "step": 107710 + }, + { + "epoch": 4.56, + "learning_rate": 0.00034537938052534505, + "loss": 0.4846, + "step": 107720 + }, + { + "epoch": 4.56, + "learning_rate": 0.0003453369797960525, + "loss": 0.5337, + "step": 107730 + }, + { + "epoch": 4.56, + "learning_rate": 0.00034529457906676, + "loss": 0.548, + "step": 107740 + }, + { + "epoch": 4.56, + "learning_rate": 0.0003452521783374674, + "loss": 0.4874, + "step": 107750 + }, + { + "epoch": 4.56, + "learning_rate": 0.00034520977760817487, + "loss": 0.5021, + "step": 107760 + }, + { + "epoch": 4.56, + "learning_rate": 0.00034516737687888233, + "loss": 0.4684, + "step": 107770 + }, + { + "epoch": 4.56, + "learning_rate": 0.0003451249761495898, + "loss": 0.3846, + "step": 107780 + }, + { + "epoch": 4.56, + "learning_rate": 0.0003450825754202972, + "loss": 0.4669, + "step": 107790 + }, + { + "epoch": 4.56, + "learning_rate": 0.00034504017469100473, + "loss": 0.5252, + "step": 107800 + }, + { + "epoch": 4.56, + "learning_rate": 0.00034499777396171215, + "loss": 0.4335, + "step": 107810 + }, + { + "epoch": 4.56, + "learning_rate": 0.0003449553732324196, + "loss": 0.4736, + "step": 107820 + }, + { + "epoch": 4.56, + "learning_rate": 0.0003449129725031271, + "loss": 0.4428, + "step": 107830 + }, + { + "epoch": 4.56, + "learning_rate": 0.00034487057177383455, + "loss": 0.556, + "step": 107840 + }, + { + "epoch": 4.56, + "learning_rate": 0.00034482817104454196, + "loss": 0.4922, + "step": 107850 + }, + { + "epoch": 4.56, + "learning_rate": 0.0003447857703152495, + "loss": 0.4925, + "step": 107860 + }, + { + "epoch": 4.56, + "learning_rate": 0.0003447433695859569, + "loss": 0.5169, + "step": 107870 + }, + { + "epoch": 4.56, + "learning_rate": 0.00034470096885666436, + "loss": 0.5352, + "step": 107880 + }, + { + "epoch": 4.56, + "learning_rate": 0.0003446585681273718, + "loss": 0.5075, + "step": 107890 + }, + { + "epoch": 4.56, + "learning_rate": 0.0003446161673980793, + "loss": 0.4733, + "step": 107900 + }, + { + "epoch": 4.56, + "learning_rate": 0.0003445737666687867, + "loss": 0.5317, + "step": 107910 + }, + { + "epoch": 4.56, + "learning_rate": 0.0003445313659394942, + "loss": 0.5063, + "step": 107920 + }, + { + "epoch": 4.56, + "learning_rate": 0.00034448896521020164, + "loss": 0.5444, + "step": 107930 + }, + { + "epoch": 4.56, + "learning_rate": 0.0003444465644809091, + "loss": 0.5286, + "step": 107940 + }, + { + "epoch": 4.57, + "learning_rate": 0.0003444041637516165, + "loss": 0.5924, + "step": 107950 + }, + { + "epoch": 4.57, + "learning_rate": 0.00034436176302232404, + "loss": 0.4883, + "step": 107960 + }, + { + "epoch": 4.57, + "learning_rate": 0.00034431936229303145, + "loss": 0.4851, + "step": 107970 + }, + { + "epoch": 4.57, + "learning_rate": 0.0003442769615637389, + "loss": 0.6215, + "step": 107980 + }, + { + "epoch": 4.57, + "learning_rate": 0.0003442345608344464, + "loss": 0.4841, + "step": 107990 + }, + { + "epoch": 4.57, + "learning_rate": 0.00034419216010515385, + "loss": 0.5319, + "step": 108000 + }, + { + "epoch": 4.57, + "learning_rate": 0.00034414975937586127, + "loss": 0.4775, + "step": 108010 + }, + { + "epoch": 4.57, + "learning_rate": 0.00034410735864656873, + "loss": 0.4822, + "step": 108020 + }, + { + "epoch": 4.57, + "learning_rate": 0.0003440649579172762, + "loss": 0.5886, + "step": 108030 + }, + { + "epoch": 4.57, + "learning_rate": 0.00034402255718798367, + "loss": 0.5262, + "step": 108040 + }, + { + "epoch": 4.57, + "learning_rate": 0.00034398015645869113, + "loss": 0.5194, + "step": 108050 + }, + { + "epoch": 4.57, + "learning_rate": 0.00034393775572939855, + "loss": 0.534, + "step": 108060 + }, + { + "epoch": 4.57, + "learning_rate": 0.000343895355000106, + "loss": 0.4929, + "step": 108070 + }, + { + "epoch": 4.57, + "learning_rate": 0.0003438529542708135, + "loss": 0.5975, + "step": 108080 + }, + { + "epoch": 4.57, + "learning_rate": 0.00034381055354152095, + "loss": 0.5299, + "step": 108090 + }, + { + "epoch": 4.57, + "learning_rate": 0.00034376815281222836, + "loss": 0.4774, + "step": 108100 + }, + { + "epoch": 4.57, + "learning_rate": 0.0003437257520829358, + "loss": 0.5331, + "step": 108110 + }, + { + "epoch": 4.57, + "learning_rate": 0.0003436833513536433, + "loss": 0.5488, + "step": 108120 + }, + { + "epoch": 4.57, + "learning_rate": 0.00034364095062435076, + "loss": 0.4879, + "step": 108130 + }, + { + "epoch": 4.57, + "learning_rate": 0.00034359854989505817, + "loss": 0.5142, + "step": 108140 + }, + { + "epoch": 4.57, + "learning_rate": 0.0003435561491657657, + "loss": 0.5254, + "step": 108150 + }, + { + "epoch": 4.57, + "learning_rate": 0.0003435137484364731, + "loss": 0.5427, + "step": 108160 + }, + { + "epoch": 4.57, + "learning_rate": 0.00034347134770718057, + "loss": 0.5496, + "step": 108170 + }, + { + "epoch": 4.57, + "learning_rate": 0.00034342894697788804, + "loss": 0.5269, + "step": 108180 + }, + { + "epoch": 4.58, + "learning_rate": 0.0003433865462485955, + "loss": 0.4337, + "step": 108190 + }, + { + "epoch": 4.58, + "learning_rate": 0.0003433441455193029, + "loss": 0.5217, + "step": 108200 + }, + { + "epoch": 4.58, + "learning_rate": 0.00034330174479001044, + "loss": 0.4488, + "step": 108210 + }, + { + "epoch": 4.58, + "learning_rate": 0.00034325934406071785, + "loss": 0.5362, + "step": 108220 + }, + { + "epoch": 4.58, + "learning_rate": 0.0003432169433314253, + "loss": 0.4492, + "step": 108230 + }, + { + "epoch": 4.58, + "learning_rate": 0.00034317454260213273, + "loss": 0.4603, + "step": 108240 + }, + { + "epoch": 4.58, + "learning_rate": 0.00034313214187284025, + "loss": 0.4717, + "step": 108250 + }, + { + "epoch": 4.58, + "learning_rate": 0.00034308974114354766, + "loss": 0.5156, + "step": 108260 + }, + { + "epoch": 4.58, + "learning_rate": 0.00034304734041425513, + "loss": 0.4649, + "step": 108270 + }, + { + "epoch": 4.58, + "learning_rate": 0.0003430049396849626, + "loss": 0.4972, + "step": 108280 + }, + { + "epoch": 4.58, + "learning_rate": 0.00034296253895567006, + "loss": 0.5109, + "step": 108290 + }, + { + "epoch": 4.58, + "learning_rate": 0.0003429201382263775, + "loss": 0.5172, + "step": 108300 + }, + { + "epoch": 4.58, + "learning_rate": 0.000342877737497085, + "loss": 0.5499, + "step": 108310 + }, + { + "epoch": 4.58, + "learning_rate": 0.0003428353367677924, + "loss": 0.4737, + "step": 108320 + }, + { + "epoch": 4.58, + "learning_rate": 0.0003427929360384999, + "loss": 0.5354, + "step": 108330 + }, + { + "epoch": 4.58, + "learning_rate": 0.00034275053530920734, + "loss": 0.5004, + "step": 108340 + }, + { + "epoch": 4.58, + "learning_rate": 0.0003427081345799148, + "loss": 0.4944, + "step": 108350 + }, + { + "epoch": 4.58, + "learning_rate": 0.0003426657338506222, + "loss": 0.6272, + "step": 108360 + }, + { + "epoch": 4.58, + "learning_rate": 0.0003426233331213297, + "loss": 0.5181, + "step": 108370 + }, + { + "epoch": 4.58, + "learning_rate": 0.00034258093239203716, + "loss": 0.5306, + "step": 108380 + }, + { + "epoch": 4.58, + "learning_rate": 0.0003425385316627446, + "loss": 0.4948, + "step": 108390 + }, + { + "epoch": 4.58, + "learning_rate": 0.00034249613093345204, + "loss": 0.5037, + "step": 108400 + }, + { + "epoch": 4.58, + "learning_rate": 0.00034245373020415956, + "loss": 0.5172, + "step": 108410 + }, + { + "epoch": 4.58, + "learning_rate": 0.00034241132947486697, + "loss": 0.4617, + "step": 108420 + }, + { + "epoch": 4.59, + "learning_rate": 0.00034236892874557444, + "loss": 0.5727, + "step": 108430 + }, + { + "epoch": 4.59, + "learning_rate": 0.0003423265280162819, + "loss": 0.4726, + "step": 108440 + }, + { + "epoch": 4.59, + "learning_rate": 0.00034228412728698937, + "loss": 0.4441, + "step": 108450 + }, + { + "epoch": 4.59, + "learning_rate": 0.0003422417265576968, + "loss": 0.5064, + "step": 108460 + }, + { + "epoch": 4.59, + "learning_rate": 0.0003421993258284043, + "loss": 0.497, + "step": 108470 + }, + { + "epoch": 4.59, + "learning_rate": 0.0003421569250991117, + "loss": 0.553, + "step": 108480 + }, + { + "epoch": 4.59, + "learning_rate": 0.0003421145243698192, + "loss": 0.4618, + "step": 108490 + }, + { + "epoch": 4.59, + "learning_rate": 0.00034207212364052665, + "loss": 0.4039, + "step": 108500 + }, + { + "epoch": 4.59, + "learning_rate": 0.0003420297229112341, + "loss": 0.4445, + "step": 108510 + }, + { + "epoch": 4.59, + "learning_rate": 0.00034198732218194153, + "loss": 0.5413, + "step": 108520 + }, + { + "epoch": 4.59, + "learning_rate": 0.000341944921452649, + "loss": 0.5072, + "step": 108530 + }, + { + "epoch": 4.59, + "learning_rate": 0.00034190252072335646, + "loss": 0.3748, + "step": 108540 + }, + { + "epoch": 4.59, + "learning_rate": 0.00034186011999406393, + "loss": 0.499, + "step": 108550 + }, + { + "epoch": 4.59, + "learning_rate": 0.00034181771926477134, + "loss": 0.5649, + "step": 108560 + }, + { + "epoch": 4.59, + "learning_rate": 0.00034177531853547886, + "loss": 0.453, + "step": 108570 + }, + { + "epoch": 4.59, + "learning_rate": 0.0003417329178061863, + "loss": 0.4569, + "step": 108580 + }, + { + "epoch": 4.59, + "learning_rate": 0.00034169051707689374, + "loss": 0.5877, + "step": 108590 + }, + { + "epoch": 4.59, + "learning_rate": 0.0003416481163476012, + "loss": 0.4822, + "step": 108600 + }, + { + "epoch": 4.59, + "learning_rate": 0.0003416057156183087, + "loss": 0.5503, + "step": 108610 + }, + { + "epoch": 4.59, + "learning_rate": 0.0003415633148890161, + "loss": 0.4607, + "step": 108620 + }, + { + "epoch": 4.59, + "learning_rate": 0.00034152091415972356, + "loss": 0.5399, + "step": 108630 + }, + { + "epoch": 4.59, + "learning_rate": 0.000341478513430431, + "loss": 0.4981, + "step": 108640 + }, + { + "epoch": 4.59, + "learning_rate": 0.0003414361127011385, + "loss": 0.4212, + "step": 108650 + }, + { + "epoch": 4.6, + "learning_rate": 0.00034139371197184596, + "loss": 0.4988, + "step": 108660 + }, + { + "epoch": 4.6, + "learning_rate": 0.00034135131124255337, + "loss": 0.4604, + "step": 108670 + }, + { + "epoch": 4.6, + "learning_rate": 0.00034130891051326084, + "loss": 0.5038, + "step": 108680 + }, + { + "epoch": 4.6, + "learning_rate": 0.0003412665097839683, + "loss": 0.4508, + "step": 108690 + }, + { + "epoch": 4.6, + "learning_rate": 0.00034122410905467577, + "loss": 0.5555, + "step": 108700 + }, + { + "epoch": 4.6, + "learning_rate": 0.00034118170832538324, + "loss": 0.5274, + "step": 108710 + }, + { + "epoch": 4.6, + "learning_rate": 0.00034113930759609065, + "loss": 0.4886, + "step": 108720 + }, + { + "epoch": 4.6, + "learning_rate": 0.0003410969068667981, + "loss": 0.4547, + "step": 108730 + }, + { + "epoch": 4.6, + "learning_rate": 0.0003410545061375056, + "loss": 0.4666, + "step": 108740 + }, + { + "epoch": 4.6, + "learning_rate": 0.00034101210540821305, + "loss": 0.4362, + "step": 108750 + }, + { + "epoch": 4.6, + "learning_rate": 0.0003409697046789205, + "loss": 0.4944, + "step": 108760 + }, + { + "epoch": 4.6, + "learning_rate": 0.00034092730394962793, + "loss": 0.5456, + "step": 108770 + }, + { + "epoch": 4.6, + "learning_rate": 0.0003408849032203354, + "loss": 0.4279, + "step": 108780 + }, + { + "epoch": 4.6, + "learning_rate": 0.00034084250249104286, + "loss": 0.4319, + "step": 108790 + }, + { + "epoch": 4.6, + "learning_rate": 0.00034080010176175033, + "loss": 0.4287, + "step": 108800 + }, + { + "epoch": 4.6, + "learning_rate": 0.00034075770103245774, + "loss": 0.375, + "step": 108810 + }, + { + "epoch": 4.6, + "learning_rate": 0.00034071530030316526, + "loss": 0.4871, + "step": 108820 + }, + { + "epoch": 4.6, + "learning_rate": 0.0003406728995738727, + "loss": 0.5187, + "step": 108830 + }, + { + "epoch": 4.6, + "learning_rate": 0.00034063049884458014, + "loss": 0.5983, + "step": 108840 + }, + { + "epoch": 4.6, + "learning_rate": 0.00034058809811528755, + "loss": 0.4669, + "step": 108850 + }, + { + "epoch": 4.6, + "learning_rate": 0.0003405456973859951, + "loss": 0.4791, + "step": 108860 + }, + { + "epoch": 4.6, + "learning_rate": 0.0003405032966567025, + "loss": 0.4765, + "step": 108870 + }, + { + "epoch": 4.6, + "learning_rate": 0.00034046089592740995, + "loss": 0.4531, + "step": 108880 + }, + { + "epoch": 4.6, + "learning_rate": 0.0003404184951981174, + "loss": 0.4705, + "step": 108890 + }, + { + "epoch": 4.61, + "learning_rate": 0.0003403760944688249, + "loss": 0.5924, + "step": 108900 + }, + { + "epoch": 4.61, + "learning_rate": 0.0003403336937395323, + "loss": 0.5683, + "step": 108910 + }, + { + "epoch": 4.61, + "learning_rate": 0.0003402912930102398, + "loss": 0.4875, + "step": 108920 + }, + { + "epoch": 4.61, + "learning_rate": 0.00034024889228094723, + "loss": 0.4569, + "step": 108930 + }, + { + "epoch": 4.61, + "learning_rate": 0.0003402064915516547, + "loss": 0.4816, + "step": 108940 + }, + { + "epoch": 4.61, + "learning_rate": 0.00034016409082236217, + "loss": 0.4478, + "step": 108950 + }, + { + "epoch": 4.61, + "learning_rate": 0.00034012169009306963, + "loss": 0.4939, + "step": 108960 + }, + { + "epoch": 4.61, + "learning_rate": 0.00034007928936377705, + "loss": 0.4594, + "step": 108970 + }, + { + "epoch": 4.61, + "learning_rate": 0.0003400368886344845, + "loss": 0.4957, + "step": 108980 + }, + { + "epoch": 4.61, + "learning_rate": 0.000339994487905192, + "loss": 0.5479, + "step": 108990 + }, + { + "epoch": 4.61, + "learning_rate": 0.00033995208717589945, + "loss": 0.4163, + "step": 109000 + }, + { + "epoch": 4.61, + "learning_rate": 0.00033990968644660686, + "loss": 0.6043, + "step": 109010 + }, + { + "epoch": 4.61, + "learning_rate": 0.0003398672857173144, + "loss": 0.4203, + "step": 109020 + }, + { + "epoch": 4.61, + "learning_rate": 0.0003398248849880218, + "loss": 0.4971, + "step": 109030 + }, + { + "epoch": 4.61, + "learning_rate": 0.00033978248425872926, + "loss": 0.5308, + "step": 109040 + }, + { + "epoch": 4.61, + "learning_rate": 0.00033974008352943673, + "loss": 0.4764, + "step": 109050 + }, + { + "epoch": 4.61, + "learning_rate": 0.0003396976828001442, + "loss": 0.4314, + "step": 109060 + }, + { + "epoch": 4.61, + "learning_rate": 0.0003396552820708516, + "loss": 0.5048, + "step": 109070 + }, + { + "epoch": 4.61, + "learning_rate": 0.00033961288134155913, + "loss": 0.5041, + "step": 109080 + }, + { + "epoch": 4.61, + "learning_rate": 0.00033957048061226654, + "loss": 0.4423, + "step": 109090 + }, + { + "epoch": 4.61, + "learning_rate": 0.000339528079882974, + "loss": 0.515, + "step": 109100 + }, + { + "epoch": 4.61, + "learning_rate": 0.0003394856791536815, + "loss": 0.4034, + "step": 109110 + }, + { + "epoch": 4.61, + "learning_rate": 0.00033944327842438894, + "loss": 0.5175, + "step": 109120 + }, + { + "epoch": 4.61, + "learning_rate": 0.00033940087769509635, + "loss": 0.4981, + "step": 109130 + }, + { + "epoch": 4.62, + "learning_rate": 0.0003393584769658038, + "loss": 0.4485, + "step": 109140 + }, + { + "epoch": 4.62, + "learning_rate": 0.0003393160762365113, + "loss": 0.5905, + "step": 109150 + }, + { + "epoch": 4.62, + "learning_rate": 0.00033927367550721875, + "loss": 0.4875, + "step": 109160 + }, + { + "epoch": 4.62, + "learning_rate": 0.00033923127477792617, + "loss": 0.449, + "step": 109170 + }, + { + "epoch": 4.62, + "learning_rate": 0.0003391888740486337, + "loss": 0.5813, + "step": 109180 + }, + { + "epoch": 4.62, + "learning_rate": 0.0003391464733193411, + "loss": 0.4833, + "step": 109190 + }, + { + "epoch": 4.62, + "learning_rate": 0.00033910407259004857, + "loss": 0.4621, + "step": 109200 + }, + { + "epoch": 4.62, + "learning_rate": 0.00033906167186075603, + "loss": 0.5292, + "step": 109210 + }, + { + "epoch": 4.62, + "learning_rate": 0.0003390192711314635, + "loss": 0.5061, + "step": 109220 + }, + { + "epoch": 4.62, + "learning_rate": 0.0003389768704021709, + "loss": 0.4953, + "step": 109230 + }, + { + "epoch": 4.62, + "learning_rate": 0.00033893446967287843, + "loss": 0.5209, + "step": 109240 + }, + { + "epoch": 4.62, + "learning_rate": 0.00033889206894358585, + "loss": 0.4883, + "step": 109250 + }, + { + "epoch": 4.62, + "learning_rate": 0.0003388496682142933, + "loss": 0.5746, + "step": 109260 + }, + { + "epoch": 4.62, + "learning_rate": 0.0003388072674850008, + "loss": 0.4708, + "step": 109270 + }, + { + "epoch": 4.62, + "learning_rate": 0.00033876486675570825, + "loss": 0.4688, + "step": 109280 + }, + { + "epoch": 4.62, + "learning_rate": 0.00033872246602641566, + "loss": 0.4718, + "step": 109290 + }, + { + "epoch": 4.62, + "learning_rate": 0.0003386800652971231, + "loss": 0.4368, + "step": 109300 + }, + { + "epoch": 4.62, + "learning_rate": 0.0003386376645678306, + "loss": 0.5894, + "step": 109310 + }, + { + "epoch": 4.62, + "learning_rate": 0.00033859526383853806, + "loss": 0.462, + "step": 109320 + }, + { + "epoch": 4.62, + "learning_rate": 0.00033855286310924547, + "loss": 0.4448, + "step": 109330 + }, + { + "epoch": 4.62, + "learning_rate": 0.00033851046237995294, + "loss": 0.4837, + "step": 109340 + }, + { + "epoch": 4.62, + "learning_rate": 0.0003384680616506604, + "loss": 0.4997, + "step": 109350 + }, + { + "epoch": 4.62, + "learning_rate": 0.00033842566092136787, + "loss": 0.4402, + "step": 109360 + }, + { + "epoch": 4.63, + "learning_rate": 0.00033838326019207534, + "loss": 0.4664, + "step": 109370 + }, + { + "epoch": 4.63, + "learning_rate": 0.00033834085946278275, + "loss": 0.5278, + "step": 109380 + }, + { + "epoch": 4.63, + "learning_rate": 0.0003382984587334902, + "loss": 0.5284, + "step": 109390 + }, + { + "epoch": 4.63, + "learning_rate": 0.0003382560580041977, + "loss": 0.4765, + "step": 109400 + }, + { + "epoch": 4.63, + "learning_rate": 0.00033821365727490515, + "loss": 0.5916, + "step": 109410 + }, + { + "epoch": 4.63, + "learning_rate": 0.00033817125654561257, + "loss": 0.618, + "step": 109420 + }, + { + "epoch": 4.63, + "learning_rate": 0.0003381288558163201, + "loss": 0.4823, + "step": 109430 + }, + { + "epoch": 4.63, + "learning_rate": 0.0003380864550870275, + "loss": 0.4825, + "step": 109440 + }, + { + "epoch": 4.63, + "learning_rate": 0.00033804405435773497, + "loss": 0.5848, + "step": 109450 + }, + { + "epoch": 4.63, + "learning_rate": 0.00033800165362844243, + "loss": 0.465, + "step": 109460 + }, + { + "epoch": 4.63, + "learning_rate": 0.0003379592528991499, + "loss": 0.4794, + "step": 109470 + }, + { + "epoch": 4.63, + "learning_rate": 0.0003379168521698573, + "loss": 0.5311, + "step": 109480 + }, + { + "epoch": 4.63, + "learning_rate": 0.0003378744514405648, + "loss": 0.4644, + "step": 109490 + }, + { + "epoch": 4.63, + "learning_rate": 0.00033783205071127225, + "loss": 0.4925, + "step": 109500 + }, + { + "epoch": 4.63, + "learning_rate": 0.0003377896499819797, + "loss": 0.4459, + "step": 109510 + }, + { + "epoch": 4.63, + "learning_rate": 0.0003377472492526871, + "loss": 0.5077, + "step": 109520 + }, + { + "epoch": 4.63, + "learning_rate": 0.00033770484852339465, + "loss": 0.5126, + "step": 109530 + }, + { + "epoch": 4.63, + "learning_rate": 0.00033766244779410206, + "loss": 0.4515, + "step": 109540 + }, + { + "epoch": 4.63, + "learning_rate": 0.0003376200470648095, + "loss": 0.472, + "step": 109550 + }, + { + "epoch": 4.63, + "learning_rate": 0.000337577646335517, + "loss": 0.4832, + "step": 109560 + }, + { + "epoch": 4.63, + "learning_rate": 0.00033753524560622446, + "loss": 0.4661, + "step": 109570 + }, + { + "epoch": 4.63, + "learning_rate": 0.00033749284487693187, + "loss": 0.4482, + "step": 109580 + }, + { + "epoch": 4.63, + "learning_rate": 0.0003374504441476394, + "loss": 0.5055, + "step": 109590 + }, + { + "epoch": 4.63, + "learning_rate": 0.0003374080434183468, + "loss": 0.4819, + "step": 109600 + }, + { + "epoch": 4.64, + "learning_rate": 0.00033736564268905427, + "loss": 0.3648, + "step": 109610 + }, + { + "epoch": 4.64, + "learning_rate": 0.0003373232419597617, + "loss": 0.4855, + "step": 109620 + }, + { + "epoch": 4.64, + "learning_rate": 0.0003372808412304692, + "loss": 0.5572, + "step": 109630 + }, + { + "epoch": 4.64, + "learning_rate": 0.0003372384405011766, + "loss": 0.482, + "step": 109640 + }, + { + "epoch": 4.64, + "learning_rate": 0.0003371960397718841, + "loss": 0.5199, + "step": 109650 + }, + { + "epoch": 4.64, + "learning_rate": 0.00033715363904259155, + "loss": 0.5626, + "step": 109660 + }, + { + "epoch": 4.64, + "learning_rate": 0.000337111238313299, + "loss": 0.5461, + "step": 109670 + }, + { + "epoch": 4.64, + "learning_rate": 0.00033706883758400643, + "loss": 0.5249, + "step": 109680 + }, + { + "epoch": 4.64, + "learning_rate": 0.00033702643685471395, + "loss": 0.4616, + "step": 109690 + }, + { + "epoch": 4.64, + "learning_rate": 0.00033698403612542136, + "loss": 0.5248, + "step": 109700 + }, + { + "epoch": 4.64, + "learning_rate": 0.00033694163539612883, + "loss": 0.4879, + "step": 109710 + }, + { + "epoch": 4.64, + "learning_rate": 0.0003368992346668363, + "loss": 0.4985, + "step": 109720 + }, + { + "epoch": 4.64, + "learning_rate": 0.00033685683393754376, + "loss": 0.5683, + "step": 109730 + }, + { + "epoch": 4.64, + "learning_rate": 0.0003368144332082512, + "loss": 0.5311, + "step": 109740 + }, + { + "epoch": 4.64, + "learning_rate": 0.00033677203247895864, + "loss": 0.4919, + "step": 109750 + }, + { + "epoch": 4.64, + "learning_rate": 0.0003367296317496661, + "loss": 0.5906, + "step": 109760 + }, + { + "epoch": 4.64, + "learning_rate": 0.0003366872310203736, + "loss": 0.503, + "step": 109770 + }, + { + "epoch": 4.64, + "learning_rate": 0.000336644830291081, + "loss": 0.5049, + "step": 109780 + }, + { + "epoch": 4.64, + "learning_rate": 0.0003366024295617885, + "loss": 0.4431, + "step": 109790 + }, + { + "epoch": 4.64, + "learning_rate": 0.0003365600288324959, + "loss": 0.3958, + "step": 109800 + }, + { + "epoch": 4.64, + "learning_rate": 0.0003365176281032034, + "loss": 0.4799, + "step": 109810 + }, + { + "epoch": 4.64, + "learning_rate": 0.00033647522737391086, + "loss": 0.3981, + "step": 109820 + }, + { + "epoch": 4.64, + "learning_rate": 0.0003364328266446183, + "loss": 0.6033, + "step": 109830 + }, + { + "epoch": 4.64, + "learning_rate": 0.00033639042591532574, + "loss": 0.6336, + "step": 109840 + }, + { + "epoch": 4.65, + "learning_rate": 0.00033634802518603326, + "loss": 0.4697, + "step": 109850 + }, + { + "epoch": 4.65, + "learning_rate": 0.00033630562445674067, + "loss": 0.4609, + "step": 109860 + }, + { + "epoch": 4.65, + "learning_rate": 0.00033626322372744814, + "loss": 0.5227, + "step": 109870 + }, + { + "epoch": 4.65, + "learning_rate": 0.0003362208229981556, + "loss": 0.4926, + "step": 109880 + }, + { + "epoch": 4.65, + "learning_rate": 0.00033617842226886307, + "loss": 0.5061, + "step": 109890 + }, + { + "epoch": 4.65, + "learning_rate": 0.0003361360215395705, + "loss": 0.5304, + "step": 109900 + }, + { + "epoch": 4.65, + "learning_rate": 0.00033609362081027795, + "loss": 0.5917, + "step": 109910 + }, + { + "epoch": 4.65, + "learning_rate": 0.0003360512200809854, + "loss": 0.491, + "step": 109920 + }, + { + "epoch": 4.65, + "learning_rate": 0.0003360088193516929, + "loss": 0.4913, + "step": 109930 + }, + { + "epoch": 4.65, + "learning_rate": 0.0003359664186224003, + "loss": 0.4361, + "step": 109940 + }, + { + "epoch": 4.65, + "learning_rate": 0.00033592401789310776, + "loss": 0.518, + "step": 109950 + }, + { + "epoch": 4.65, + "learning_rate": 0.00033588161716381523, + "loss": 0.4883, + "step": 109960 + }, + { + "epoch": 4.65, + "learning_rate": 0.0003358392164345227, + "loss": 0.4736, + "step": 109970 + }, + { + "epoch": 4.65, + "learning_rate": 0.00033579681570523016, + "loss": 0.42, + "step": 109980 + }, + { + "epoch": 4.65, + "learning_rate": 0.00033575441497593763, + "loss": 0.498, + "step": 109990 + }, + { + "epoch": 4.65, + "learning_rate": 0.00033571201424664504, + "loss": 0.4672, + "step": 110000 + }, + { + "epoch": 4.65, + "eval_loss": 0.6067126393318176, + "eval_runtime": 337.585, + "eval_samples_per_second": 15.566, + "eval_steps_per_second": 3.892, + "step": 110000 + }, + { + "epoch": 4.65, + "learning_rate": 0.0003356696135173525, + "loss": 0.4274, + "step": 110010 + }, + { + "epoch": 4.65, + "learning_rate": 0.00033562721278806, + "loss": 0.4238, + "step": 110020 + }, + { + "epoch": 4.65, + "learning_rate": 0.00033558481205876744, + "loss": 0.4934, + "step": 110030 + }, + { + "epoch": 4.65, + "learning_rate": 0.0003355424113294749, + "loss": 0.4414, + "step": 110040 + }, + { + "epoch": 4.65, + "learning_rate": 0.0003355000106001823, + "loss": 0.5067, + "step": 110050 + }, + { + "epoch": 4.65, + "learning_rate": 0.0003354576098708898, + "loss": 0.5901, + "step": 110060 + }, + { + "epoch": 4.65, + "learning_rate": 0.00033541520914159726, + "loss": 0.4406, + "step": 110070 + }, + { + "epoch": 4.66, + "learning_rate": 0.0003353728084123047, + "loss": 0.5329, + "step": 110080 + }, + { + "epoch": 4.66, + "learning_rate": 0.00033533040768301214, + "loss": 0.5422, + "step": 110090 + }, + { + "epoch": 4.66, + "learning_rate": 0.0003352880069537196, + "loss": 0.5314, + "step": 110100 + }, + { + "epoch": 4.66, + "learning_rate": 0.00033524560622442707, + "loss": 0.556, + "step": 110110 + }, + { + "epoch": 4.66, + "learning_rate": 0.00033520320549513454, + "loss": 0.5898, + "step": 110120 + }, + { + "epoch": 4.66, + "learning_rate": 0.00033516080476584195, + "loss": 0.4611, + "step": 110130 + }, + { + "epoch": 4.66, + "learning_rate": 0.00033511840403654947, + "loss": 0.5194, + "step": 110140 + }, + { + "epoch": 4.66, + "learning_rate": 0.0003350760033072569, + "loss": 0.4905, + "step": 110150 + }, + { + "epoch": 4.66, + "learning_rate": 0.00033503360257796435, + "loss": 0.4019, + "step": 110160 + }, + { + "epoch": 4.66, + "learning_rate": 0.0003349912018486718, + "loss": 0.4521, + "step": 110170 + }, + { + "epoch": 4.66, + "learning_rate": 0.0003349488011193793, + "loss": 0.4644, + "step": 110180 + }, + { + "epoch": 4.66, + "learning_rate": 0.0003349064003900867, + "loss": 0.4901, + "step": 110190 + }, + { + "epoch": 4.66, + "learning_rate": 0.0003348639996607942, + "loss": 0.4841, + "step": 110200 + }, + { + "epoch": 4.66, + "learning_rate": 0.00033482159893150163, + "loss": 0.4383, + "step": 110210 + }, + { + "epoch": 4.66, + "learning_rate": 0.0003347791982022091, + "loss": 0.5671, + "step": 110220 + }, + { + "epoch": 4.66, + "learning_rate": 0.0003347367974729165, + "loss": 0.4979, + "step": 110230 + }, + { + "epoch": 4.66, + "learning_rate": 0.00033469439674362403, + "loss": 0.5806, + "step": 110240 + }, + { + "epoch": 4.66, + "learning_rate": 0.00033465199601433144, + "loss": 0.4674, + "step": 110250 + }, + { + "epoch": 4.66, + "learning_rate": 0.0003346095952850389, + "loss": 0.4922, + "step": 110260 + }, + { + "epoch": 4.66, + "learning_rate": 0.0003345671945557464, + "loss": 0.5915, + "step": 110270 + }, + { + "epoch": 4.66, + "learning_rate": 0.00033452479382645384, + "loss": 0.5798, + "step": 110280 + }, + { + "epoch": 4.66, + "learning_rate": 0.00033448239309716125, + "loss": 0.5383, + "step": 110290 + }, + { + "epoch": 4.66, + "learning_rate": 0.0003344399923678688, + "loss": 0.5043, + "step": 110300 + }, + { + "epoch": 4.66, + "learning_rate": 0.0003343975916385762, + "loss": 0.4659, + "step": 110310 + }, + { + "epoch": 4.67, + "learning_rate": 0.00033435519090928365, + "loss": 0.5022, + "step": 110320 + }, + { + "epoch": 4.67, + "learning_rate": 0.0003343127901799911, + "loss": 0.4892, + "step": 110330 + }, + { + "epoch": 4.67, + "learning_rate": 0.0003342703894506986, + "loss": 0.4185, + "step": 110340 + }, + { + "epoch": 4.67, + "learning_rate": 0.000334227988721406, + "loss": 0.453, + "step": 110350 + }, + { + "epoch": 4.67, + "learning_rate": 0.0003341855879921135, + "loss": 0.5077, + "step": 110360 + }, + { + "epoch": 4.67, + "learning_rate": 0.00033414318726282093, + "loss": 0.5, + "step": 110370 + }, + { + "epoch": 4.67, + "learning_rate": 0.0003341007865335284, + "loss": 0.5, + "step": 110380 + }, + { + "epoch": 4.67, + "learning_rate": 0.0003340583858042358, + "loss": 0.5345, + "step": 110390 + }, + { + "epoch": 4.67, + "learning_rate": 0.00033401598507494333, + "loss": 0.5014, + "step": 110400 + }, + { + "epoch": 4.67, + "learning_rate": 0.00033397358434565075, + "loss": 0.4436, + "step": 110410 + }, + { + "epoch": 4.67, + "learning_rate": 0.0003339311836163582, + "loss": 0.6237, + "step": 110420 + }, + { + "epoch": 4.67, + "learning_rate": 0.0003338887828870657, + "loss": 0.5859, + "step": 110430 + }, + { + "epoch": 4.67, + "learning_rate": 0.00033384638215777315, + "loss": 0.5128, + "step": 110440 + }, + { + "epoch": 4.67, + "learning_rate": 0.00033380398142848056, + "loss": 0.4639, + "step": 110450 + }, + { + "epoch": 4.67, + "learning_rate": 0.0003337615806991881, + "loss": 0.5145, + "step": 110460 + }, + { + "epoch": 4.67, + "learning_rate": 0.0003337191799698955, + "loss": 0.5606, + "step": 110470 + }, + { + "epoch": 4.67, + "learning_rate": 0.00033367677924060296, + "loss": 0.4392, + "step": 110480 + }, + { + "epoch": 4.67, + "learning_rate": 0.00033363437851131043, + "loss": 0.3992, + "step": 110490 + }, + { + "epoch": 4.67, + "learning_rate": 0.0003335919777820179, + "loss": 0.4788, + "step": 110500 + }, + { + "epoch": 4.67, + "learning_rate": 0.0003335495770527253, + "loss": 0.4611, + "step": 110510 + }, + { + "epoch": 4.67, + "learning_rate": 0.0003335071763234328, + "loss": 0.5545, + "step": 110520 + }, + { + "epoch": 4.67, + "learning_rate": 0.00033346477559414024, + "loss": 0.6042, + "step": 110530 + }, + { + "epoch": 4.67, + "learning_rate": 0.0003334223748648477, + "loss": 0.3606, + "step": 110540 + }, + { + "epoch": 4.68, + "learning_rate": 0.0003333799741355551, + "loss": 0.4672, + "step": 110550 + }, + { + "epoch": 4.68, + "learning_rate": 0.00033333757340626264, + "loss": 0.4926, + "step": 110560 + }, + { + "epoch": 4.68, + "learning_rate": 0.00033329517267697005, + "loss": 0.4707, + "step": 110570 + }, + { + "epoch": 4.68, + "learning_rate": 0.0003332527719476775, + "loss": 0.4684, + "step": 110580 + }, + { + "epoch": 4.68, + "learning_rate": 0.000333210371218385, + "loss": 0.4678, + "step": 110590 + }, + { + "epoch": 4.68, + "learning_rate": 0.00033316797048909245, + "loss": 0.4138, + "step": 110600 + }, + { + "epoch": 4.68, + "learning_rate": 0.00033312556975979987, + "loss": 0.4968, + "step": 110610 + }, + { + "epoch": 4.68, + "learning_rate": 0.00033308316903050733, + "loss": 0.4683, + "step": 110620 + }, + { + "epoch": 4.68, + "learning_rate": 0.0003330407683012148, + "loss": 0.4177, + "step": 110630 + }, + { + "epoch": 4.68, + "learning_rate": 0.00033299836757192227, + "loss": 0.5396, + "step": 110640 + }, + { + "epoch": 4.68, + "learning_rate": 0.00033295596684262973, + "loss": 0.4403, + "step": 110650 + }, + { + "epoch": 4.68, + "learning_rate": 0.00033291356611333715, + "loss": 0.4717, + "step": 110660 + }, + { + "epoch": 4.68, + "learning_rate": 0.0003328711653840446, + "loss": 0.4798, + "step": 110670 + }, + { + "epoch": 4.68, + "learning_rate": 0.0003328287646547521, + "loss": 0.4852, + "step": 110680 + }, + { + "epoch": 4.68, + "learning_rate": 0.00033278636392545955, + "loss": 0.4967, + "step": 110690 + }, + { + "epoch": 4.68, + "learning_rate": 0.00033274396319616696, + "loss": 0.4354, + "step": 110700 + }, + { + "epoch": 4.68, + "learning_rate": 0.0003327015624668744, + "loss": 0.3833, + "step": 110710 + }, + { + "epoch": 4.68, + "learning_rate": 0.0003326591617375819, + "loss": 0.6045, + "step": 110720 + }, + { + "epoch": 4.68, + "learning_rate": 0.00033261676100828936, + "loss": 0.551, + "step": 110730 + }, + { + "epoch": 4.68, + "learning_rate": 0.0003325743602789968, + "loss": 0.511, + "step": 110740 + }, + { + "epoch": 4.68, + "learning_rate": 0.0003325319595497043, + "loss": 0.6171, + "step": 110750 + }, + { + "epoch": 4.68, + "learning_rate": 0.0003324895588204117, + "loss": 0.4, + "step": 110760 + }, + { + "epoch": 4.68, + "learning_rate": 0.00033244715809111917, + "loss": 0.5761, + "step": 110770 + }, + { + "epoch": 4.68, + "learning_rate": 0.00033240475736182664, + "loss": 0.4585, + "step": 110780 + }, + { + "epoch": 4.69, + "learning_rate": 0.0003323623566325341, + "loss": 0.5371, + "step": 110790 + }, + { + "epoch": 4.69, + "learning_rate": 0.0003323199559032415, + "loss": 0.4973, + "step": 110800 + }, + { + "epoch": 4.69, + "learning_rate": 0.00033227755517394904, + "loss": 0.4346, + "step": 110810 + }, + { + "epoch": 4.69, + "learning_rate": 0.00033223515444465645, + "loss": 0.4422, + "step": 110820 + }, + { + "epoch": 4.69, + "learning_rate": 0.0003321927537153639, + "loss": 0.4622, + "step": 110830 + }, + { + "epoch": 4.69, + "learning_rate": 0.00033215035298607133, + "loss": 0.4472, + "step": 110840 + }, + { + "epoch": 4.69, + "learning_rate": 0.00033210795225677885, + "loss": 0.4683, + "step": 110850 + }, + { + "epoch": 4.69, + "learning_rate": 0.00033206555152748626, + "loss": 0.4559, + "step": 110860 + }, + { + "epoch": 4.69, + "learning_rate": 0.00033202315079819373, + "loss": 0.5344, + "step": 110870 + }, + { + "epoch": 4.69, + "learning_rate": 0.0003319807500689012, + "loss": 0.4956, + "step": 110880 + }, + { + "epoch": 4.69, + "learning_rate": 0.00033193834933960867, + "loss": 0.4332, + "step": 110890 + }, + { + "epoch": 4.69, + "learning_rate": 0.0003318959486103161, + "loss": 0.5205, + "step": 110900 + }, + { + "epoch": 4.69, + "learning_rate": 0.0003318535478810236, + "loss": 0.4883, + "step": 110910 + }, + { + "epoch": 4.69, + "learning_rate": 0.000331811147151731, + "loss": 0.4676, + "step": 110920 + }, + { + "epoch": 4.69, + "learning_rate": 0.0003317687464224385, + "loss": 0.448, + "step": 110930 + }, + { + "epoch": 4.69, + "learning_rate": 0.00033172634569314594, + "loss": 0.4734, + "step": 110940 + }, + { + "epoch": 4.69, + "learning_rate": 0.0003316839449638534, + "loss": 0.4144, + "step": 110950 + }, + { + "epoch": 4.69, + "learning_rate": 0.0003316415442345608, + "loss": 0.423, + "step": 110960 + }, + { + "epoch": 4.69, + "learning_rate": 0.00033159914350526835, + "loss": 0.4711, + "step": 110970 + }, + { + "epoch": 4.69, + "learning_rate": 0.00033155674277597576, + "loss": 0.5471, + "step": 110980 + }, + { + "epoch": 4.69, + "learning_rate": 0.0003315143420466832, + "loss": 0.5033, + "step": 110990 + }, + { + "epoch": 4.69, + "learning_rate": 0.00033147194131739064, + "loss": 0.5116, + "step": 111000 + }, + { + "epoch": 4.69, + "learning_rate": 0.00033142954058809816, + "loss": 0.4365, + "step": 111010 + }, + { + "epoch": 4.69, + "learning_rate": 0.00033138713985880557, + "loss": 0.4849, + "step": 111020 + }, + { + "epoch": 4.7, + "learning_rate": 0.00033134473912951304, + "loss": 0.5259, + "step": 111030 + }, + { + "epoch": 4.7, + "learning_rate": 0.0003313023384002205, + "loss": 0.5282, + "step": 111040 + }, + { + "epoch": 4.7, + "learning_rate": 0.00033125993767092797, + "loss": 0.5133, + "step": 111050 + }, + { + "epoch": 4.7, + "learning_rate": 0.0003312175369416354, + "loss": 0.4718, + "step": 111060 + }, + { + "epoch": 4.7, + "learning_rate": 0.0003311751362123429, + "loss": 0.5402, + "step": 111070 + }, + { + "epoch": 4.7, + "learning_rate": 0.0003311327354830503, + "loss": 0.4577, + "step": 111080 + }, + { + "epoch": 4.7, + "learning_rate": 0.0003310903347537578, + "loss": 0.4029, + "step": 111090 + }, + { + "epoch": 4.7, + "learning_rate": 0.00033104793402446525, + "loss": 0.4293, + "step": 111100 + }, + { + "epoch": 4.7, + "learning_rate": 0.0003310055332951727, + "loss": 0.44, + "step": 111110 + }, + { + "epoch": 4.7, + "learning_rate": 0.00033096313256588013, + "loss": 0.533, + "step": 111120 + }, + { + "epoch": 4.7, + "learning_rate": 0.0003309207318365876, + "loss": 0.4127, + "step": 111130 + }, + { + "epoch": 4.7, + "learning_rate": 0.00033087833110729506, + "loss": 0.5537, + "step": 111140 + }, + { + "epoch": 4.7, + "learning_rate": 0.00033083593037800253, + "loss": 0.4938, + "step": 111150 + }, + { + "epoch": 4.7, + "learning_rate": 0.00033079352964870994, + "loss": 0.4938, + "step": 111160 + }, + { + "epoch": 4.7, + "learning_rate": 0.00033075112891941746, + "loss": 0.4769, + "step": 111170 + }, + { + "epoch": 4.7, + "learning_rate": 0.0003307087281901249, + "loss": 0.5031, + "step": 111180 + }, + { + "epoch": 4.7, + "learning_rate": 0.00033066632746083234, + "loss": 0.4395, + "step": 111190 + }, + { + "epoch": 4.7, + "learning_rate": 0.0003306239267315398, + "loss": 0.4828, + "step": 111200 + }, + { + "epoch": 4.7, + "learning_rate": 0.0003305815260022473, + "loss": 0.5023, + "step": 111210 + }, + { + "epoch": 4.7, + "learning_rate": 0.0003305391252729547, + "loss": 0.5633, + "step": 111220 + }, + { + "epoch": 4.7, + "learning_rate": 0.0003304967245436622, + "loss": 0.508, + "step": 111230 + }, + { + "epoch": 4.7, + "learning_rate": 0.0003304543238143696, + "loss": 0.4762, + "step": 111240 + }, + { + "epoch": 4.7, + "learning_rate": 0.0003304119230850771, + "loss": 0.4741, + "step": 111250 + }, + { + "epoch": 4.71, + "learning_rate": 0.00033036952235578456, + "loss": 0.5344, + "step": 111260 + }, + { + "epoch": 4.71, + "learning_rate": 0.000330327121626492, + "loss": 0.475, + "step": 111270 + }, + { + "epoch": 4.71, + "learning_rate": 0.00033028472089719944, + "loss": 0.4422, + "step": 111280 + }, + { + "epoch": 4.71, + "learning_rate": 0.0003302423201679069, + "loss": 0.4617, + "step": 111290 + }, + { + "epoch": 4.71, + "learning_rate": 0.00033019991943861437, + "loss": 0.5689, + "step": 111300 + }, + { + "epoch": 4.71, + "learning_rate": 0.00033015751870932184, + "loss": 0.4594, + "step": 111310 + }, + { + "epoch": 4.71, + "learning_rate": 0.00033011511798002925, + "loss": 0.5374, + "step": 111320 + }, + { + "epoch": 4.71, + "learning_rate": 0.0003300727172507367, + "loss": 0.4532, + "step": 111330 + }, + { + "epoch": 4.71, + "learning_rate": 0.0003300303165214442, + "loss": 0.4427, + "step": 111340 + }, + { + "epoch": 4.71, + "learning_rate": 0.00032998791579215165, + "loss": 0.4226, + "step": 111350 + }, + { + "epoch": 4.71, + "learning_rate": 0.0003299455150628591, + "loss": 0.5266, + "step": 111360 + }, + { + "epoch": 4.71, + "learning_rate": 0.00032990311433356653, + "loss": 0.541, + "step": 111370 + }, + { + "epoch": 4.71, + "learning_rate": 0.000329860713604274, + "loss": 0.4821, + "step": 111380 + }, + { + "epoch": 4.71, + "learning_rate": 0.00032981831287498146, + "loss": 0.5041, + "step": 111390 + }, + { + "epoch": 4.71, + "learning_rate": 0.00032977591214568893, + "loss": 0.5455, + "step": 111400 + }, + { + "epoch": 4.71, + "learning_rate": 0.00032973351141639634, + "loss": 0.4832, + "step": 111410 + }, + { + "epoch": 4.71, + "learning_rate": 0.00032969111068710386, + "loss": 0.526, + "step": 111420 + }, + { + "epoch": 4.71, + "learning_rate": 0.0003296487099578113, + "loss": 0.4763, + "step": 111430 + }, + { + "epoch": 4.71, + "learning_rate": 0.00032960630922851874, + "loss": 0.521, + "step": 111440 + }, + { + "epoch": 4.71, + "learning_rate": 0.00032956390849922615, + "loss": 0.4367, + "step": 111450 + }, + { + "epoch": 4.71, + "learning_rate": 0.0003295215077699337, + "loss": 0.4031, + "step": 111460 + }, + { + "epoch": 4.71, + "learning_rate": 0.0003294791070406411, + "loss": 0.3974, + "step": 111470 + }, + { + "epoch": 4.71, + "learning_rate": 0.00032943670631134856, + "loss": 0.4428, + "step": 111480 + }, + { + "epoch": 4.71, + "learning_rate": 0.000329394305582056, + "loss": 0.4851, + "step": 111490 + }, + { + "epoch": 4.72, + "learning_rate": 0.0003293519048527635, + "loss": 0.4517, + "step": 111500 + }, + { + "epoch": 4.72, + "learning_rate": 0.0003293095041234709, + "loss": 0.542, + "step": 111510 + }, + { + "epoch": 4.72, + "learning_rate": 0.0003292671033941784, + "loss": 0.5398, + "step": 111520 + }, + { + "epoch": 4.72, + "learning_rate": 0.00032922470266488583, + "loss": 0.5576, + "step": 111530 + }, + { + "epoch": 4.72, + "learning_rate": 0.0003291823019355933, + "loss": 0.4784, + "step": 111540 + }, + { + "epoch": 4.72, + "learning_rate": 0.00032913990120630077, + "loss": 0.5218, + "step": 111550 + }, + { + "epoch": 4.72, + "learning_rate": 0.00032909750047700824, + "loss": 0.5257, + "step": 111560 + }, + { + "epoch": 4.72, + "learning_rate": 0.00032905509974771565, + "loss": 0.4893, + "step": 111570 + }, + { + "epoch": 4.72, + "learning_rate": 0.00032901269901842317, + "loss": 0.5859, + "step": 111580 + }, + { + "epoch": 4.72, + "learning_rate": 0.0003289702982891306, + "loss": 0.4928, + "step": 111590 + }, + { + "epoch": 4.72, + "learning_rate": 0.00032892789755983805, + "loss": 0.4575, + "step": 111600 + }, + { + "epoch": 4.72, + "learning_rate": 0.00032888549683054546, + "loss": 0.3963, + "step": 111610 + }, + { + "epoch": 4.72, + "learning_rate": 0.000328843096101253, + "loss": 0.4918, + "step": 111620 + }, + { + "epoch": 4.72, + "learning_rate": 0.0003288006953719604, + "loss": 0.5402, + "step": 111630 + }, + { + "epoch": 4.72, + "learning_rate": 0.00032875829464266786, + "loss": 0.5089, + "step": 111640 + }, + { + "epoch": 4.72, + "learning_rate": 0.00032871589391337533, + "loss": 0.4593, + "step": 111650 + }, + { + "epoch": 4.72, + "learning_rate": 0.0003286734931840828, + "loss": 0.4544, + "step": 111660 + }, + { + "epoch": 4.72, + "learning_rate": 0.0003286310924547902, + "loss": 0.5428, + "step": 111670 + }, + { + "epoch": 4.72, + "learning_rate": 0.00032858869172549773, + "loss": 0.4695, + "step": 111680 + }, + { + "epoch": 4.72, + "learning_rate": 0.00032854629099620514, + "loss": 0.5507, + "step": 111690 + }, + { + "epoch": 4.72, + "learning_rate": 0.0003285038902669126, + "loss": 0.6213, + "step": 111700 + }, + { + "epoch": 4.72, + "learning_rate": 0.0003284614895376201, + "loss": 0.5477, + "step": 111710 + }, + { + "epoch": 4.72, + "learning_rate": 0.00032841908880832754, + "loss": 0.5225, + "step": 111720 + }, + { + "epoch": 4.72, + "learning_rate": 0.00032837668807903495, + "loss": 0.4647, + "step": 111730 + }, + { + "epoch": 4.73, + "learning_rate": 0.0003283342873497425, + "loss": 0.4722, + "step": 111740 + }, + { + "epoch": 4.73, + "learning_rate": 0.0003282918866204499, + "loss": 0.5223, + "step": 111750 + }, + { + "epoch": 4.73, + "learning_rate": 0.00032824948589115735, + "loss": 0.4762, + "step": 111760 + }, + { + "epoch": 4.73, + "learning_rate": 0.00032820708516186477, + "loss": 0.4761, + "step": 111770 + }, + { + "epoch": 4.73, + "learning_rate": 0.0003281646844325723, + "loss": 0.4674, + "step": 111780 + }, + { + "epoch": 4.73, + "learning_rate": 0.0003281222837032797, + "loss": 0.4865, + "step": 111790 + }, + { + "epoch": 4.73, + "learning_rate": 0.00032807988297398717, + "loss": 0.5534, + "step": 111800 + }, + { + "epoch": 4.73, + "learning_rate": 0.00032803748224469463, + "loss": 0.504, + "step": 111810 + }, + { + "epoch": 4.73, + "learning_rate": 0.0003279950815154021, + "loss": 0.5923, + "step": 111820 + }, + { + "epoch": 4.73, + "learning_rate": 0.0003279526807861095, + "loss": 0.5002, + "step": 111830 + }, + { + "epoch": 4.73, + "learning_rate": 0.00032791028005681703, + "loss": 0.4922, + "step": 111840 + }, + { + "epoch": 4.73, + "learning_rate": 0.00032786787932752445, + "loss": 0.4166, + "step": 111850 + }, + { + "epoch": 4.73, + "learning_rate": 0.0003278254785982319, + "loss": 0.4206, + "step": 111860 + }, + { + "epoch": 4.73, + "learning_rate": 0.0003277830778689394, + "loss": 0.4511, + "step": 111870 + }, + { + "epoch": 4.73, + "learning_rate": 0.00032774067713964685, + "loss": 0.413, + "step": 111880 + }, + { + "epoch": 4.73, + "learning_rate": 0.00032769827641035426, + "loss": 0.5162, + "step": 111890 + }, + { + "epoch": 4.73, + "learning_rate": 0.0003276558756810617, + "loss": 0.4729, + "step": 111900 + }, + { + "epoch": 4.73, + "learning_rate": 0.0003276134749517692, + "loss": 0.5134, + "step": 111910 + }, + { + "epoch": 4.73, + "learning_rate": 0.00032757107422247666, + "loss": 0.4872, + "step": 111920 + }, + { + "epoch": 4.73, + "learning_rate": 0.00032752867349318407, + "loss": 0.496, + "step": 111930 + }, + { + "epoch": 4.73, + "learning_rate": 0.00032748627276389154, + "loss": 0.5904, + "step": 111940 + }, + { + "epoch": 4.73, + "learning_rate": 0.000327443872034599, + "loss": 0.4329, + "step": 111950 + }, + { + "epoch": 4.73, + "learning_rate": 0.0003274014713053065, + "loss": 0.4553, + "step": 111960 + }, + { + "epoch": 4.74, + "learning_rate": 0.00032735907057601394, + "loss": 0.4838, + "step": 111970 + }, + { + "epoch": 4.74, + "learning_rate": 0.0003273166698467214, + "loss": 0.4876, + "step": 111980 + }, + { + "epoch": 4.74, + "learning_rate": 0.0003272742691174288, + "loss": 0.4853, + "step": 111990 + }, + { + "epoch": 4.74, + "learning_rate": 0.0003272318683881363, + "loss": 0.4446, + "step": 112000 + }, + { + "epoch": 4.74, + "learning_rate": 0.00032718946765884375, + "loss": 0.5232, + "step": 112010 + }, + { + "epoch": 4.74, + "learning_rate": 0.0003271470669295512, + "loss": 0.5934, + "step": 112020 + }, + { + "epoch": 4.74, + "learning_rate": 0.0003271046662002587, + "loss": 0.5275, + "step": 112030 + }, + { + "epoch": 4.74, + "learning_rate": 0.0003270622654709661, + "loss": 0.4315, + "step": 112040 + }, + { + "epoch": 4.74, + "learning_rate": 0.00032701986474167357, + "loss": 0.5369, + "step": 112050 + }, + { + "epoch": 4.74, + "learning_rate": 0.00032697746401238103, + "loss": 0.3887, + "step": 112060 + }, + { + "epoch": 4.74, + "learning_rate": 0.0003269350632830885, + "loss": 0.4569, + "step": 112070 + }, + { + "epoch": 4.74, + "learning_rate": 0.0003268926625537959, + "loss": 0.4865, + "step": 112080 + }, + { + "epoch": 4.74, + "learning_rate": 0.0003268502618245034, + "loss": 0.5158, + "step": 112090 + }, + { + "epoch": 4.74, + "learning_rate": 0.00032680786109521085, + "loss": 0.4464, + "step": 112100 + }, + { + "epoch": 4.74, + "learning_rate": 0.0003267654603659183, + "loss": 0.4816, + "step": 112110 + }, + { + "epoch": 4.74, + "learning_rate": 0.0003267230596366257, + "loss": 0.544, + "step": 112120 + }, + { + "epoch": 4.74, + "learning_rate": 0.00032668065890733325, + "loss": 0.4313, + "step": 112130 + }, + { + "epoch": 4.74, + "learning_rate": 0.00032663825817804066, + "loss": 0.4999, + "step": 112140 + }, + { + "epoch": 4.74, + "learning_rate": 0.0003265958574487481, + "loss": 0.4829, + "step": 112150 + }, + { + "epoch": 4.74, + "learning_rate": 0.0003265534567194556, + "loss": 0.5368, + "step": 112160 + }, + { + "epoch": 4.74, + "learning_rate": 0.00032651105599016306, + "loss": 0.5355, + "step": 112170 + }, + { + "epoch": 4.74, + "learning_rate": 0.00032646865526087047, + "loss": 0.5052, + "step": 112180 + }, + { + "epoch": 4.74, + "learning_rate": 0.000326426254531578, + "loss": 0.4438, + "step": 112190 + }, + { + "epoch": 4.74, + "learning_rate": 0.0003263838538022854, + "loss": 0.5208, + "step": 112200 + }, + { + "epoch": 4.75, + "learning_rate": 0.00032634145307299287, + "loss": 0.417, + "step": 112210 + }, + { + "epoch": 4.75, + "learning_rate": 0.0003262990523437003, + "loss": 0.6108, + "step": 112220 + }, + { + "epoch": 4.75, + "learning_rate": 0.0003262566516144078, + "loss": 0.4536, + "step": 112230 + }, + { + "epoch": 4.75, + "learning_rate": 0.0003262142508851152, + "loss": 0.4932, + "step": 112240 + }, + { + "epoch": 4.75, + "learning_rate": 0.0003261718501558227, + "loss": 0.4616, + "step": 112250 + }, + { + "epoch": 4.75, + "learning_rate": 0.00032612944942653015, + "loss": 0.5858, + "step": 112260 + }, + { + "epoch": 4.75, + "learning_rate": 0.0003260870486972376, + "loss": 0.4744, + "step": 112270 + }, + { + "epoch": 4.75, + "learning_rate": 0.00032604464796794503, + "loss": 0.5593, + "step": 112280 + }, + { + "epoch": 4.75, + "learning_rate": 0.00032600224723865255, + "loss": 0.4765, + "step": 112290 + }, + { + "epoch": 4.75, + "learning_rate": 0.00032595984650935996, + "loss": 0.5703, + "step": 112300 + }, + { + "epoch": 4.75, + "learning_rate": 0.00032591744578006743, + "loss": 0.4299, + "step": 112310 + }, + { + "epoch": 4.75, + "learning_rate": 0.0003258750450507749, + "loss": 0.5629, + "step": 112320 + }, + { + "epoch": 4.75, + "learning_rate": 0.00032583264432148236, + "loss": 0.4857, + "step": 112330 + }, + { + "epoch": 4.75, + "learning_rate": 0.0003257902435921898, + "loss": 0.4966, + "step": 112340 + }, + { + "epoch": 4.75, + "learning_rate": 0.0003257478428628973, + "loss": 0.5129, + "step": 112350 + }, + { + "epoch": 4.75, + "learning_rate": 0.0003257054421336047, + "loss": 0.5088, + "step": 112360 + }, + { + "epoch": 4.75, + "learning_rate": 0.0003256630414043122, + "loss": 0.4964, + "step": 112370 + }, + { + "epoch": 4.75, + "learning_rate": 0.0003256206406750196, + "loss": 0.4798, + "step": 112380 + }, + { + "epoch": 4.75, + "learning_rate": 0.0003255782399457271, + "loss": 0.437, + "step": 112390 + }, + { + "epoch": 4.75, + "learning_rate": 0.0003255358392164345, + "loss": 0.4466, + "step": 112400 + }, + { + "epoch": 4.75, + "learning_rate": 0.000325493438487142, + "loss": 0.552, + "step": 112410 + }, + { + "epoch": 4.75, + "learning_rate": 0.00032545103775784946, + "loss": 0.4826, + "step": 112420 + }, + { + "epoch": 4.75, + "learning_rate": 0.0003254086370285569, + "loss": 0.4499, + "step": 112430 + }, + { + "epoch": 4.75, + "learning_rate": 0.00032536623629926434, + "loss": 0.4893, + "step": 112440 + }, + { + "epoch": 4.76, + "learning_rate": 0.00032532383556997186, + "loss": 0.6257, + "step": 112450 + }, + { + "epoch": 4.76, + "learning_rate": 0.00032528143484067927, + "loss": 0.4995, + "step": 112460 + }, + { + "epoch": 4.76, + "learning_rate": 0.00032523903411138674, + "loss": 0.4074, + "step": 112470 + }, + { + "epoch": 4.76, + "learning_rate": 0.0003251966333820942, + "loss": 0.5132, + "step": 112480 + }, + { + "epoch": 4.76, + "learning_rate": 0.00032515423265280167, + "loss": 0.4563, + "step": 112490 + }, + { + "epoch": 4.76, + "learning_rate": 0.0003251118319235091, + "loss": 0.427, + "step": 112500 + }, + { + "epoch": 4.76, + "learning_rate": 0.0003250694311942166, + "loss": 0.441, + "step": 112510 + }, + { + "epoch": 4.76, + "learning_rate": 0.000325027030464924, + "loss": 0.542, + "step": 112520 + }, + { + "epoch": 4.76, + "learning_rate": 0.0003249846297356315, + "loss": 0.4457, + "step": 112530 + }, + { + "epoch": 4.76, + "learning_rate": 0.0003249422290063389, + "loss": 0.5106, + "step": 112540 + }, + { + "epoch": 4.76, + "learning_rate": 0.0003248998282770464, + "loss": 0.539, + "step": 112550 + }, + { + "epoch": 4.76, + "learning_rate": 0.00032485742754775383, + "loss": 0.4972, + "step": 112560 + }, + { + "epoch": 4.76, + "learning_rate": 0.0003248150268184613, + "loss": 0.4669, + "step": 112570 + }, + { + "epoch": 4.76, + "learning_rate": 0.00032477262608916876, + "loss": 0.477, + "step": 112580 + }, + { + "epoch": 4.76, + "learning_rate": 0.00032473022535987623, + "loss": 0.5037, + "step": 112590 + }, + { + "epoch": 4.76, + "learning_rate": 0.00032468782463058364, + "loss": 0.4545, + "step": 112600 + }, + { + "epoch": 4.76, + "learning_rate": 0.0003246454239012911, + "loss": 0.4606, + "step": 112610 + }, + { + "epoch": 4.76, + "learning_rate": 0.0003246030231719986, + "loss": 0.5194, + "step": 112620 + }, + { + "epoch": 4.76, + "learning_rate": 0.00032456062244270604, + "loss": 0.4526, + "step": 112630 + }, + { + "epoch": 4.76, + "learning_rate": 0.0003245182217134135, + "loss": 0.4414, + "step": 112640 + }, + { + "epoch": 4.76, + "learning_rate": 0.0003244758209841209, + "loss": 0.5027, + "step": 112650 + }, + { + "epoch": 4.76, + "learning_rate": 0.0003244334202548284, + "loss": 0.5223, + "step": 112660 + }, + { + "epoch": 4.76, + "learning_rate": 0.00032439101952553586, + "loss": 0.4685, + "step": 112670 + }, + { + "epoch": 4.77, + "learning_rate": 0.0003243486187962433, + "loss": 0.4877, + "step": 112680 + }, + { + "epoch": 4.77, + "learning_rate": 0.00032430621806695074, + "loss": 0.4397, + "step": 112690 + }, + { + "epoch": 4.77, + "learning_rate": 0.0003242638173376582, + "loss": 0.4917, + "step": 112700 + }, + { + "epoch": 4.77, + "learning_rate": 0.00032422141660836567, + "loss": 0.5463, + "step": 112710 + }, + { + "epoch": 4.77, + "learning_rate": 0.00032417901587907314, + "loss": 0.4873, + "step": 112720 + }, + { + "epoch": 4.77, + "learning_rate": 0.0003241366151497806, + "loss": 0.4686, + "step": 112730 + }, + { + "epoch": 4.77, + "learning_rate": 0.00032409421442048807, + "loss": 0.5318, + "step": 112740 + }, + { + "epoch": 4.77, + "learning_rate": 0.0003240518136911955, + "loss": 0.426, + "step": 112750 + }, + { + "epoch": 4.77, + "learning_rate": 0.00032400941296190295, + "loss": 0.5139, + "step": 112760 + }, + { + "epoch": 4.77, + "learning_rate": 0.0003239670122326104, + "loss": 0.4774, + "step": 112770 + }, + { + "epoch": 4.77, + "learning_rate": 0.0003239246115033179, + "loss": 0.5127, + "step": 112780 + }, + { + "epoch": 4.77, + "learning_rate": 0.0003238822107740253, + "loss": 0.5013, + "step": 112790 + }, + { + "epoch": 4.77, + "learning_rate": 0.0003238398100447328, + "loss": 0.4923, + "step": 112800 + }, + { + "epoch": 4.77, + "learning_rate": 0.00032379740931544023, + "loss": 0.5179, + "step": 112810 + }, + { + "epoch": 4.77, + "learning_rate": 0.0003237550085861477, + "loss": 0.4463, + "step": 112820 + }, + { + "epoch": 4.77, + "learning_rate": 0.0003237126078568551, + "loss": 0.4778, + "step": 112830 + }, + { + "epoch": 4.77, + "learning_rate": 0.00032367020712756263, + "loss": 0.4798, + "step": 112840 + }, + { + "epoch": 4.77, + "learning_rate": 0.00032362780639827004, + "loss": 0.4113, + "step": 112850 + }, + { + "epoch": 4.77, + "learning_rate": 0.0003235854056689775, + "loss": 0.4777, + "step": 112860 + }, + { + "epoch": 4.77, + "learning_rate": 0.000323543004939685, + "loss": 0.5544, + "step": 112870 + }, + { + "epoch": 4.77, + "learning_rate": 0.00032350060421039244, + "loss": 0.3591, + "step": 112880 + }, + { + "epoch": 4.77, + "learning_rate": 0.00032345820348109985, + "loss": 0.5048, + "step": 112890 + }, + { + "epoch": 4.77, + "learning_rate": 0.0003234158027518074, + "loss": 0.4915, + "step": 112900 + }, + { + "epoch": 4.77, + "learning_rate": 0.0003233734020225148, + "loss": 0.4465, + "step": 112910 + }, + { + "epoch": 4.78, + "learning_rate": 0.00032333100129322225, + "loss": 0.4395, + "step": 112920 + }, + { + "epoch": 4.78, + "learning_rate": 0.0003232886005639297, + "loss": 0.4859, + "step": 112930 + }, + { + "epoch": 4.78, + "learning_rate": 0.0003232461998346372, + "loss": 0.4694, + "step": 112940 + }, + { + "epoch": 4.78, + "learning_rate": 0.0003232037991053446, + "loss": 0.4408, + "step": 112950 + }, + { + "epoch": 4.78, + "learning_rate": 0.0003231613983760521, + "loss": 0.5286, + "step": 112960 + }, + { + "epoch": 4.78, + "learning_rate": 0.00032311899764675953, + "loss": 0.5261, + "step": 112970 + }, + { + "epoch": 4.78, + "learning_rate": 0.000323076596917467, + "loss": 0.5615, + "step": 112980 + }, + { + "epoch": 4.78, + "learning_rate": 0.0003230341961881744, + "loss": 0.5196, + "step": 112990 + }, + { + "epoch": 4.78, + "learning_rate": 0.00032299179545888193, + "loss": 0.4922, + "step": 113000 + }, + { + "epoch": 4.78, + "learning_rate": 0.00032294939472958935, + "loss": 0.5118, + "step": 113010 + }, + { + "epoch": 4.78, + "learning_rate": 0.0003229069940002968, + "loss": 0.5086, + "step": 113020 + }, + { + "epoch": 4.78, + "learning_rate": 0.0003228645932710043, + "loss": 0.555, + "step": 113030 + }, + { + "epoch": 4.78, + "learning_rate": 0.00032282219254171175, + "loss": 0.485, + "step": 113040 + }, + { + "epoch": 4.78, + "learning_rate": 0.00032277979181241916, + "loss": 0.5838, + "step": 113050 + }, + { + "epoch": 4.78, + "learning_rate": 0.0003227373910831267, + "loss": 0.5133, + "step": 113060 + }, + { + "epoch": 4.78, + "learning_rate": 0.0003226949903538341, + "loss": 0.5983, + "step": 113070 + }, + { + "epoch": 4.78, + "learning_rate": 0.00032265258962454156, + "loss": 0.4837, + "step": 113080 + }, + { + "epoch": 4.78, + "learning_rate": 0.00032261018889524903, + "loss": 0.4898, + "step": 113090 + }, + { + "epoch": 4.78, + "learning_rate": 0.0003225677881659565, + "loss": 0.4557, + "step": 113100 + }, + { + "epoch": 4.78, + "learning_rate": 0.0003225253874366639, + "loss": 0.4503, + "step": 113110 + }, + { + "epoch": 4.78, + "learning_rate": 0.00032248298670737143, + "loss": 0.5404, + "step": 113120 + }, + { + "epoch": 4.78, + "learning_rate": 0.00032244058597807884, + "loss": 0.4508, + "step": 113130 + }, + { + "epoch": 4.78, + "learning_rate": 0.0003223981852487863, + "loss": 0.5207, + "step": 113140 + }, + { + "epoch": 4.78, + "learning_rate": 0.0003223557845194937, + "loss": 0.5299, + "step": 113150 + }, + { + "epoch": 4.79, + "learning_rate": 0.00032231338379020124, + "loss": 0.529, + "step": 113160 + }, + { + "epoch": 4.79, + "learning_rate": 0.00032227098306090865, + "loss": 0.5357, + "step": 113170 + }, + { + "epoch": 4.79, + "learning_rate": 0.0003222285823316161, + "loss": 0.499, + "step": 113180 + }, + { + "epoch": 4.79, + "learning_rate": 0.0003221861816023236, + "loss": 0.4575, + "step": 113190 + }, + { + "epoch": 4.79, + "learning_rate": 0.00032214378087303105, + "loss": 0.5238, + "step": 113200 + }, + { + "epoch": 4.79, + "learning_rate": 0.00032210138014373847, + "loss": 0.5197, + "step": 113210 + }, + { + "epoch": 4.79, + "learning_rate": 0.000322058979414446, + "loss": 0.5704, + "step": 113220 + }, + { + "epoch": 4.79, + "learning_rate": 0.0003220165786851534, + "loss": 0.5029, + "step": 113230 + }, + { + "epoch": 4.79, + "learning_rate": 0.00032197417795586087, + "loss": 0.5265, + "step": 113240 + }, + { + "epoch": 4.79, + "learning_rate": 0.00032193177722656833, + "loss": 0.4628, + "step": 113250 + }, + { + "epoch": 4.79, + "learning_rate": 0.0003218893764972758, + "loss": 0.4838, + "step": 113260 + }, + { + "epoch": 4.79, + "learning_rate": 0.0003218469757679832, + "loss": 0.3985, + "step": 113270 + }, + { + "epoch": 4.79, + "learning_rate": 0.0003218045750386907, + "loss": 0.5372, + "step": 113280 + }, + { + "epoch": 4.79, + "learning_rate": 0.00032176217430939815, + "loss": 0.4852, + "step": 113290 + }, + { + "epoch": 4.79, + "learning_rate": 0.0003217197735801056, + "loss": 0.4497, + "step": 113300 + }, + { + "epoch": 4.79, + "learning_rate": 0.000321677372850813, + "loss": 0.5343, + "step": 113310 + }, + { + "epoch": 4.79, + "learning_rate": 0.0003216349721215205, + "loss": 0.345, + "step": 113320 + }, + { + "epoch": 4.79, + "learning_rate": 0.00032159257139222796, + "loss": 0.4662, + "step": 113330 + }, + { + "epoch": 4.79, + "learning_rate": 0.0003215501706629354, + "loss": 0.5635, + "step": 113340 + }, + { + "epoch": 4.79, + "learning_rate": 0.0003215077699336429, + "loss": 0.4866, + "step": 113350 + }, + { + "epoch": 4.79, + "learning_rate": 0.0003214653692043503, + "loss": 0.5475, + "step": 113360 + }, + { + "epoch": 4.79, + "learning_rate": 0.00032142296847505777, + "loss": 0.5321, + "step": 113370 + }, + { + "epoch": 4.79, + "learning_rate": 0.00032138056774576524, + "loss": 0.4454, + "step": 113380 + }, + { + "epoch": 4.8, + "learning_rate": 0.0003213381670164727, + "loss": 0.4072, + "step": 113390 + }, + { + "epoch": 4.8, + "learning_rate": 0.0003212957662871801, + "loss": 0.5987, + "step": 113400 + }, + { + "epoch": 4.8, + "learning_rate": 0.00032125336555788764, + "loss": 0.5297, + "step": 113410 + }, + { + "epoch": 4.8, + "learning_rate": 0.00032121096482859505, + "loss": 0.4405, + "step": 113420 + }, + { + "epoch": 4.8, + "learning_rate": 0.0003211685640993025, + "loss": 0.4695, + "step": 113430 + }, + { + "epoch": 4.8, + "learning_rate": 0.00032112616337000993, + "loss": 0.5223, + "step": 113440 + }, + { + "epoch": 4.8, + "learning_rate": 0.00032108376264071745, + "loss": 0.5386, + "step": 113450 + }, + { + "epoch": 4.8, + "learning_rate": 0.00032104136191142487, + "loss": 0.5565, + "step": 113460 + }, + { + "epoch": 4.8, + "learning_rate": 0.00032099896118213233, + "loss": 0.5017, + "step": 113470 + }, + { + "epoch": 4.8, + "learning_rate": 0.0003209565604528398, + "loss": 0.5408, + "step": 113480 + }, + { + "epoch": 4.8, + "learning_rate": 0.00032091415972354727, + "loss": 0.4998, + "step": 113490 + }, + { + "epoch": 4.8, + "learning_rate": 0.0003208717589942547, + "loss": 0.4352, + "step": 113500 + }, + { + "epoch": 4.8, + "learning_rate": 0.0003208293582649622, + "loss": 0.5374, + "step": 113510 + }, + { + "epoch": 4.8, + "learning_rate": 0.0003207869575356696, + "loss": 0.5198, + "step": 113520 + }, + { + "epoch": 4.8, + "learning_rate": 0.0003207445568063771, + "loss": 0.5777, + "step": 113530 + }, + { + "epoch": 4.8, + "learning_rate": 0.00032070215607708455, + "loss": 0.5467, + "step": 113540 + }, + { + "epoch": 4.8, + "learning_rate": 0.000320659755347792, + "loss": 0.4996, + "step": 113550 + }, + { + "epoch": 4.8, + "learning_rate": 0.0003206173546184994, + "loss": 0.4581, + "step": 113560 + }, + { + "epoch": 4.8, + "learning_rate": 0.00032057495388920695, + "loss": 0.5455, + "step": 113570 + }, + { + "epoch": 4.8, + "learning_rate": 0.00032053255315991436, + "loss": 0.4507, + "step": 113580 + }, + { + "epoch": 4.8, + "learning_rate": 0.0003204901524306218, + "loss": 0.4605, + "step": 113590 + }, + { + "epoch": 4.8, + "learning_rate": 0.00032044775170132924, + "loss": 0.5062, + "step": 113600 + }, + { + "epoch": 4.8, + "learning_rate": 0.00032040535097203676, + "loss": 0.5319, + "step": 113610 + }, + { + "epoch": 4.8, + "learning_rate": 0.00032036295024274417, + "loss": 0.4596, + "step": 113620 + }, + { + "epoch": 4.81, + "learning_rate": 0.00032032054951345164, + "loss": 0.4611, + "step": 113630 + }, + { + "epoch": 4.81, + "learning_rate": 0.0003202781487841591, + "loss": 0.4532, + "step": 113640 + }, + { + "epoch": 4.81, + "learning_rate": 0.00032023574805486657, + "loss": 0.5782, + "step": 113650 + }, + { + "epoch": 4.81, + "learning_rate": 0.000320193347325574, + "loss": 0.5156, + "step": 113660 + }, + { + "epoch": 4.81, + "learning_rate": 0.0003201509465962815, + "loss": 0.4811, + "step": 113670 + }, + { + "epoch": 4.81, + "learning_rate": 0.0003201085458669889, + "loss": 0.3971, + "step": 113680 + }, + { + "epoch": 4.81, + "learning_rate": 0.0003200661451376964, + "loss": 0.5171, + "step": 113690 + }, + { + "epoch": 4.81, + "learning_rate": 0.00032002374440840385, + "loss": 0.5804, + "step": 113700 + }, + { + "epoch": 4.81, + "learning_rate": 0.0003199813436791113, + "loss": 0.499, + "step": 113710 + }, + { + "epoch": 4.81, + "learning_rate": 0.00031993894294981873, + "loss": 0.42, + "step": 113720 + }, + { + "epoch": 4.81, + "learning_rate": 0.00031989654222052625, + "loss": 0.4609, + "step": 113730 + }, + { + "epoch": 4.81, + "learning_rate": 0.00031985414149123366, + "loss": 0.5767, + "step": 113740 + }, + { + "epoch": 4.81, + "learning_rate": 0.00031981174076194113, + "loss": 0.4071, + "step": 113750 + }, + { + "epoch": 4.81, + "learning_rate": 0.00031976934003264854, + "loss": 0.5568, + "step": 113760 + }, + { + "epoch": 4.81, + "learning_rate": 0.00031972693930335606, + "loss": 0.4933, + "step": 113770 + }, + { + "epoch": 4.81, + "learning_rate": 0.0003196845385740635, + "loss": 0.4858, + "step": 113780 + }, + { + "epoch": 4.81, + "learning_rate": 0.00031964213784477094, + "loss": 0.4596, + "step": 113790 + }, + { + "epoch": 4.81, + "learning_rate": 0.0003195997371154784, + "loss": 0.4825, + "step": 113800 + }, + { + "epoch": 4.81, + "learning_rate": 0.0003195573363861859, + "loss": 0.4869, + "step": 113810 + }, + { + "epoch": 4.81, + "learning_rate": 0.0003195149356568933, + "loss": 0.5184, + "step": 113820 + }, + { + "epoch": 4.81, + "learning_rate": 0.0003194725349276008, + "loss": 0.5375, + "step": 113830 + }, + { + "epoch": 4.81, + "learning_rate": 0.0003194301341983082, + "loss": 0.4961, + "step": 113840 + }, + { + "epoch": 4.81, + "learning_rate": 0.0003193877334690157, + "loss": 0.5145, + "step": 113850 + }, + { + "epoch": 4.81, + "learning_rate": 0.00031934533273972316, + "loss": 0.4791, + "step": 113860 + }, + { + "epoch": 4.82, + "learning_rate": 0.0003193029320104306, + "loss": 0.4572, + "step": 113870 + }, + { + "epoch": 4.82, + "learning_rate": 0.00031926053128113804, + "loss": 0.4635, + "step": 113880 + }, + { + "epoch": 4.82, + "learning_rate": 0.0003192181305518455, + "loss": 0.5268, + "step": 113890 + }, + { + "epoch": 4.82, + "learning_rate": 0.00031917572982255297, + "loss": 0.4826, + "step": 113900 + }, + { + "epoch": 4.82, + "learning_rate": 0.00031913332909326044, + "loss": 0.6019, + "step": 113910 + }, + { + "epoch": 4.82, + "learning_rate": 0.00031909092836396785, + "loss": 0.5503, + "step": 113920 + }, + { + "epoch": 4.82, + "learning_rate": 0.0003190485276346753, + "loss": 0.4099, + "step": 113930 + }, + { + "epoch": 4.82, + "learning_rate": 0.0003190061269053828, + "loss": 0.3949, + "step": 113940 + }, + { + "epoch": 4.82, + "learning_rate": 0.00031896372617609025, + "loss": 0.4721, + "step": 113950 + }, + { + "epoch": 4.82, + "learning_rate": 0.0003189213254467977, + "loss": 0.4481, + "step": 113960 + }, + { + "epoch": 4.82, + "learning_rate": 0.00031887892471750513, + "loss": 0.5375, + "step": 113970 + }, + { + "epoch": 4.82, + "learning_rate": 0.0003188365239882126, + "loss": 0.4827, + "step": 113980 + }, + { + "epoch": 4.82, + "learning_rate": 0.00031879412325892006, + "loss": 0.5709, + "step": 113990 + }, + { + "epoch": 4.82, + "learning_rate": 0.00031875172252962753, + "loss": 0.4472, + "step": 114000 + }, + { + "epoch": 4.82, + "learning_rate": 0.000318709321800335, + "loss": 0.5047, + "step": 114010 + }, + { + "epoch": 4.82, + "learning_rate": 0.00031866692107104246, + "loss": 0.5252, + "step": 114020 + }, + { + "epoch": 4.82, + "learning_rate": 0.0003186245203417499, + "loss": 0.5519, + "step": 114030 + }, + { + "epoch": 4.82, + "learning_rate": 0.00031858211961245734, + "loss": 0.5879, + "step": 114040 + }, + { + "epoch": 4.82, + "learning_rate": 0.0003185397188831648, + "loss": 0.4675, + "step": 114050 + }, + { + "epoch": 4.82, + "learning_rate": 0.0003184973181538723, + "loss": 0.5109, + "step": 114060 + }, + { + "epoch": 4.82, + "learning_rate": 0.0003184549174245797, + "loss": 0.4334, + "step": 114070 + }, + { + "epoch": 4.82, + "learning_rate": 0.00031841251669528716, + "loss": 0.5317, + "step": 114080 + }, + { + "epoch": 4.82, + "learning_rate": 0.0003183701159659946, + "loss": 0.4365, + "step": 114090 + }, + { + "epoch": 4.83, + "learning_rate": 0.0003183277152367021, + "loss": 0.4841, + "step": 114100 + }, + { + "epoch": 4.83, + "learning_rate": 0.0003182853145074095, + "loss": 0.5572, + "step": 114110 + }, + { + "epoch": 4.83, + "learning_rate": 0.000318242913778117, + "loss": 0.485, + "step": 114120 + }, + { + "epoch": 4.83, + "learning_rate": 0.00031820051304882444, + "loss": 0.462, + "step": 114130 + }, + { + "epoch": 4.83, + "learning_rate": 0.0003181581123195319, + "loss": 0.4468, + "step": 114140 + }, + { + "epoch": 4.83, + "learning_rate": 0.00031811571159023937, + "loss": 0.4678, + "step": 114150 + }, + { + "epoch": 4.83, + "learning_rate": 0.00031807331086094684, + "loss": 0.498, + "step": 114160 + }, + { + "epoch": 4.83, + "learning_rate": 0.00031803091013165425, + "loss": 0.5165, + "step": 114170 + }, + { + "epoch": 4.83, + "learning_rate": 0.00031798850940236177, + "loss": 0.5056, + "step": 114180 + }, + { + "epoch": 4.83, + "learning_rate": 0.0003179461086730692, + "loss": 0.5435, + "step": 114190 + }, + { + "epoch": 4.83, + "learning_rate": 0.00031790370794377665, + "loss": 0.4972, + "step": 114200 + }, + { + "epoch": 4.83, + "learning_rate": 0.00031786130721448406, + "loss": 0.4603, + "step": 114210 + }, + { + "epoch": 4.83, + "learning_rate": 0.0003178189064851916, + "loss": 0.5162, + "step": 114220 + }, + { + "epoch": 4.83, + "learning_rate": 0.000317776505755899, + "loss": 0.4765, + "step": 114230 + }, + { + "epoch": 4.83, + "learning_rate": 0.00031773410502660646, + "loss": 0.4669, + "step": 114240 + }, + { + "epoch": 4.83, + "learning_rate": 0.00031769170429731393, + "loss": 0.5141, + "step": 114250 + }, + { + "epoch": 4.83, + "learning_rate": 0.0003176493035680214, + "loss": 0.4732, + "step": 114260 + }, + { + "epoch": 4.83, + "learning_rate": 0.0003176069028387288, + "loss": 0.468, + "step": 114270 + }, + { + "epoch": 4.83, + "learning_rate": 0.00031756450210943633, + "loss": 0.594, + "step": 114280 + }, + { + "epoch": 4.83, + "learning_rate": 0.00031752210138014374, + "loss": 0.5564, + "step": 114290 + }, + { + "epoch": 4.83, + "learning_rate": 0.0003174797006508512, + "loss": 0.5708, + "step": 114300 + }, + { + "epoch": 4.83, + "learning_rate": 0.0003174372999215587, + "loss": 0.4275, + "step": 114310 + }, + { + "epoch": 4.83, + "learning_rate": 0.00031739489919226614, + "loss": 0.5683, + "step": 114320 + }, + { + "epoch": 4.83, + "learning_rate": 0.00031735249846297355, + "loss": 0.4596, + "step": 114330 + }, + { + "epoch": 4.84, + "learning_rate": 0.0003173100977336811, + "loss": 0.5099, + "step": 114340 + }, + { + "epoch": 4.84, + "learning_rate": 0.0003172676970043885, + "loss": 0.5392, + "step": 114350 + }, + { + "epoch": 4.84, + "learning_rate": 0.00031722529627509595, + "loss": 0.6219, + "step": 114360 + }, + { + "epoch": 4.84, + "learning_rate": 0.00031718289554580337, + "loss": 0.4468, + "step": 114370 + }, + { + "epoch": 4.84, + "learning_rate": 0.0003171404948165109, + "loss": 0.4923, + "step": 114380 + }, + { + "epoch": 4.84, + "learning_rate": 0.0003170980940872183, + "loss": 0.4412, + "step": 114390 + }, + { + "epoch": 4.84, + "learning_rate": 0.00031705569335792577, + "loss": 0.5173, + "step": 114400 + }, + { + "epoch": 4.84, + "learning_rate": 0.00031701329262863323, + "loss": 0.476, + "step": 114410 + }, + { + "epoch": 4.84, + "learning_rate": 0.0003169708918993407, + "loss": 0.4938, + "step": 114420 + }, + { + "epoch": 4.84, + "learning_rate": 0.0003169284911700481, + "loss": 0.4378, + "step": 114430 + }, + { + "epoch": 4.84, + "learning_rate": 0.00031688609044075563, + "loss": 0.4715, + "step": 114440 + }, + { + "epoch": 4.84, + "learning_rate": 0.00031684368971146305, + "loss": 0.5672, + "step": 114450 + }, + { + "epoch": 4.84, + "learning_rate": 0.0003168012889821705, + "loss": 0.5265, + "step": 114460 + }, + { + "epoch": 4.84, + "learning_rate": 0.000316758888252878, + "loss": 0.4606, + "step": 114470 + }, + { + "epoch": 4.84, + "learning_rate": 0.00031671648752358545, + "loss": 0.6111, + "step": 114480 + }, + { + "epoch": 4.84, + "learning_rate": 0.00031667408679429286, + "loss": 0.5009, + "step": 114490 + }, + { + "epoch": 4.84, + "learning_rate": 0.0003166316860650004, + "loss": 0.5492, + "step": 114500 + }, + { + "epoch": 4.84, + "learning_rate": 0.0003165892853357078, + "loss": 0.5697, + "step": 114510 + }, + { + "epoch": 4.84, + "learning_rate": 0.00031654688460641526, + "loss": 0.5448, + "step": 114520 + }, + { + "epoch": 4.84, + "learning_rate": 0.0003165044838771227, + "loss": 0.6029, + "step": 114530 + }, + { + "epoch": 4.84, + "learning_rate": 0.0003164620831478302, + "loss": 0.4192, + "step": 114540 + }, + { + "epoch": 4.84, + "learning_rate": 0.0003164196824185376, + "loss": 0.4536, + "step": 114550 + }, + { + "epoch": 4.84, + "learning_rate": 0.0003163772816892451, + "loss": 0.4399, + "step": 114560 + }, + { + "epoch": 4.85, + "learning_rate": 0.00031633488095995254, + "loss": 0.4574, + "step": 114570 + }, + { + "epoch": 4.85, + "learning_rate": 0.00031629248023066, + "loss": 0.4795, + "step": 114580 + }, + { + "epoch": 4.85, + "learning_rate": 0.0003162500795013674, + "loss": 0.3926, + "step": 114590 + }, + { + "epoch": 4.85, + "learning_rate": 0.0003162076787720749, + "loss": 0.5373, + "step": 114600 + }, + { + "epoch": 4.85, + "learning_rate": 0.00031616527804278235, + "loss": 0.4229, + "step": 114610 + }, + { + "epoch": 4.85, + "learning_rate": 0.0003161228773134898, + "loss": 0.4483, + "step": 114620 + }, + { + "epoch": 4.85, + "learning_rate": 0.0003160804765841973, + "loss": 0.4951, + "step": 114630 + }, + { + "epoch": 4.85, + "learning_rate": 0.0003160380758549047, + "loss": 0.471, + "step": 114640 + }, + { + "epoch": 4.85, + "learning_rate": 0.00031599567512561217, + "loss": 0.4534, + "step": 114650 + }, + { + "epoch": 4.85, + "learning_rate": 0.00031595327439631963, + "loss": 0.5066, + "step": 114660 + }, + { + "epoch": 4.85, + "learning_rate": 0.0003159108736670271, + "loss": 0.5055, + "step": 114670 + }, + { + "epoch": 4.85, + "learning_rate": 0.0003158684729377345, + "loss": 0.5275, + "step": 114680 + }, + { + "epoch": 4.85, + "learning_rate": 0.000315826072208442, + "loss": 0.5081, + "step": 114690 + }, + { + "epoch": 4.85, + "learning_rate": 0.00031578367147914945, + "loss": 0.5429, + "step": 114700 + }, + { + "epoch": 4.85, + "learning_rate": 0.0003157412707498569, + "loss": 0.4962, + "step": 114710 + }, + { + "epoch": 4.85, + "learning_rate": 0.0003156988700205643, + "loss": 0.5458, + "step": 114720 + }, + { + "epoch": 4.85, + "learning_rate": 0.00031565646929127185, + "loss": 0.509, + "step": 114730 + }, + { + "epoch": 4.85, + "learning_rate": 0.00031561406856197926, + "loss": 0.518, + "step": 114740 + }, + { + "epoch": 4.85, + "learning_rate": 0.0003155716678326867, + "loss": 0.5391, + "step": 114750 + }, + { + "epoch": 4.85, + "learning_rate": 0.0003155292671033942, + "loss": 0.5587, + "step": 114760 + }, + { + "epoch": 4.85, + "learning_rate": 0.00031548686637410166, + "loss": 0.5339, + "step": 114770 + }, + { + "epoch": 4.85, + "learning_rate": 0.00031544446564480907, + "loss": 0.5164, + "step": 114780 + }, + { + "epoch": 4.85, + "learning_rate": 0.0003154020649155166, + "loss": 0.4865, + "step": 114790 + }, + { + "epoch": 4.85, + "learning_rate": 0.000315359664186224, + "loss": 0.4402, + "step": 114800 + }, + { + "epoch": 4.86, + "learning_rate": 0.00031531726345693147, + "loss": 0.4831, + "step": 114810 + }, + { + "epoch": 4.86, + "learning_rate": 0.0003152748627276389, + "loss": 0.5722, + "step": 114820 + }, + { + "epoch": 4.86, + "learning_rate": 0.0003152324619983464, + "loss": 0.4701, + "step": 114830 + }, + { + "epoch": 4.86, + "learning_rate": 0.0003151900612690538, + "loss": 0.5209, + "step": 114840 + }, + { + "epoch": 4.86, + "learning_rate": 0.0003151476605397613, + "loss": 0.4506, + "step": 114850 + }, + { + "epoch": 4.86, + "learning_rate": 0.00031510525981046875, + "loss": 0.487, + "step": 114860 + }, + { + "epoch": 4.86, + "learning_rate": 0.0003150628590811762, + "loss": 0.4443, + "step": 114870 + }, + { + "epoch": 4.86, + "learning_rate": 0.00031502045835188363, + "loss": 0.4866, + "step": 114880 + }, + { + "epoch": 4.86, + "learning_rate": 0.00031497805762259115, + "loss": 0.5079, + "step": 114890 + }, + { + "epoch": 4.86, + "learning_rate": 0.00031493565689329856, + "loss": 0.4953, + "step": 114900 + }, + { + "epoch": 4.86, + "learning_rate": 0.00031489325616400603, + "loss": 0.563, + "step": 114910 + }, + { + "epoch": 4.86, + "learning_rate": 0.0003148508554347135, + "loss": 0.5369, + "step": 114920 + }, + { + "epoch": 4.86, + "learning_rate": 0.00031480845470542097, + "loss": 0.4302, + "step": 114930 + }, + { + "epoch": 4.86, + "learning_rate": 0.0003147660539761284, + "loss": 0.4927, + "step": 114940 + }, + { + "epoch": 4.86, + "learning_rate": 0.0003147236532468359, + "loss": 0.4455, + "step": 114950 + }, + { + "epoch": 4.86, + "learning_rate": 0.0003146812525175433, + "loss": 0.5113, + "step": 114960 + }, + { + "epoch": 4.86, + "learning_rate": 0.0003146388517882508, + "loss": 0.5523, + "step": 114970 + }, + { + "epoch": 4.86, + "learning_rate": 0.0003145964510589582, + "loss": 0.4765, + "step": 114980 + }, + { + "epoch": 4.86, + "learning_rate": 0.0003145540503296657, + "loss": 0.5701, + "step": 114990 + }, + { + "epoch": 4.86, + "learning_rate": 0.0003145116496003731, + "loss": 0.4651, + "step": 115000 + }, + { + "epoch": 4.86, + "learning_rate": 0.0003144692488710806, + "loss": 0.5096, + "step": 115010 + }, + { + "epoch": 4.86, + "learning_rate": 0.00031442684814178806, + "loss": 0.4172, + "step": 115020 + }, + { + "epoch": 4.86, + "learning_rate": 0.0003143844474124955, + "loss": 0.5277, + "step": 115030 + }, + { + "epoch": 4.86, + "learning_rate": 0.00031434204668320294, + "loss": 0.4566, + "step": 115040 + }, + { + "epoch": 4.87, + "learning_rate": 0.00031429964595391046, + "loss": 0.4837, + "step": 115050 + }, + { + "epoch": 4.87, + "learning_rate": 0.00031425724522461787, + "loss": 0.4456, + "step": 115060 + }, + { + "epoch": 4.87, + "learning_rate": 0.00031421484449532534, + "loss": 0.4734, + "step": 115070 + }, + { + "epoch": 4.87, + "learning_rate": 0.0003141724437660328, + "loss": 0.4867, + "step": 115080 + }, + { + "epoch": 4.87, + "learning_rate": 0.00031413004303674027, + "loss": 0.4671, + "step": 115090 + }, + { + "epoch": 4.87, + "learning_rate": 0.0003140876423074477, + "loss": 0.4839, + "step": 115100 + }, + { + "epoch": 4.87, + "learning_rate": 0.0003140452415781552, + "loss": 0.546, + "step": 115110 + }, + { + "epoch": 4.87, + "learning_rate": 0.0003140028408488626, + "loss": 0.4234, + "step": 115120 + }, + { + "epoch": 4.87, + "learning_rate": 0.0003139604401195701, + "loss": 0.5237, + "step": 115130 + }, + { + "epoch": 4.87, + "learning_rate": 0.0003139180393902775, + "loss": 0.4662, + "step": 115140 + }, + { + "epoch": 4.87, + "learning_rate": 0.000313875638660985, + "loss": 0.5599, + "step": 115150 + }, + { + "epoch": 4.87, + "learning_rate": 0.00031383323793169243, + "loss": 0.4377, + "step": 115160 + }, + { + "epoch": 4.87, + "learning_rate": 0.0003137908372023999, + "loss": 0.427, + "step": 115170 + }, + { + "epoch": 4.87, + "learning_rate": 0.00031374843647310736, + "loss": 0.5629, + "step": 115180 + }, + { + "epoch": 4.87, + "learning_rate": 0.00031370603574381483, + "loss": 0.493, + "step": 115190 + }, + { + "epoch": 4.87, + "learning_rate": 0.00031366363501452224, + "loss": 0.548, + "step": 115200 + }, + { + "epoch": 4.87, + "learning_rate": 0.0003136212342852297, + "loss": 0.5536, + "step": 115210 + }, + { + "epoch": 4.87, + "learning_rate": 0.0003135788335559372, + "loss": 0.4575, + "step": 115220 + }, + { + "epoch": 4.87, + "learning_rate": 0.00031353643282664464, + "loss": 0.4625, + "step": 115230 + }, + { + "epoch": 4.87, + "learning_rate": 0.0003134940320973521, + "loss": 0.4658, + "step": 115240 + }, + { + "epoch": 4.87, + "learning_rate": 0.0003134516313680596, + "loss": 0.4834, + "step": 115250 + }, + { + "epoch": 4.87, + "learning_rate": 0.000313409230638767, + "loss": 0.5417, + "step": 115260 + }, + { + "epoch": 4.87, + "learning_rate": 0.00031336682990947446, + "loss": 0.4556, + "step": 115270 + }, + { + "epoch": 4.88, + "learning_rate": 0.0003133244291801819, + "loss": 0.5109, + "step": 115280 + }, + { + "epoch": 4.88, + "learning_rate": 0.0003132820284508894, + "loss": 0.5083, + "step": 115290 + }, + { + "epoch": 4.88, + "learning_rate": 0.0003132396277215968, + "loss": 0.4481, + "step": 115300 + }, + { + "epoch": 4.88, + "learning_rate": 0.00031319722699230427, + "loss": 0.447, + "step": 115310 + }, + { + "epoch": 4.88, + "learning_rate": 0.00031315482626301174, + "loss": 0.4037, + "step": 115320 + }, + { + "epoch": 4.88, + "learning_rate": 0.0003131124255337192, + "loss": 0.5308, + "step": 115330 + }, + { + "epoch": 4.88, + "learning_rate": 0.00031307002480442667, + "loss": 0.4922, + "step": 115340 + }, + { + "epoch": 4.88, + "learning_rate": 0.0003130276240751341, + "loss": 0.5148, + "step": 115350 + }, + { + "epoch": 4.88, + "learning_rate": 0.00031298522334584155, + "loss": 0.5091, + "step": 115360 + }, + { + "epoch": 4.88, + "learning_rate": 0.000312942822616549, + "loss": 0.4997, + "step": 115370 + }, + { + "epoch": 4.88, + "learning_rate": 0.0003129004218872565, + "loss": 0.4475, + "step": 115380 + }, + { + "epoch": 4.88, + "learning_rate": 0.0003128580211579639, + "loss": 0.5875, + "step": 115390 + }, + { + "epoch": 4.88, + "learning_rate": 0.0003128156204286714, + "loss": 0.4616, + "step": 115400 + }, + { + "epoch": 4.88, + "learning_rate": 0.00031277321969937883, + "loss": 0.4852, + "step": 115410 + }, + { + "epoch": 4.88, + "learning_rate": 0.0003127308189700863, + "loss": 0.4862, + "step": 115420 + }, + { + "epoch": 4.88, + "learning_rate": 0.0003126884182407937, + "loss": 0.4595, + "step": 115430 + }, + { + "epoch": 4.88, + "learning_rate": 0.00031264601751150123, + "loss": 0.4773, + "step": 115440 + }, + { + "epoch": 4.88, + "learning_rate": 0.00031260361678220864, + "loss": 0.4843, + "step": 115450 + }, + { + "epoch": 4.88, + "learning_rate": 0.0003125612160529161, + "loss": 0.5217, + "step": 115460 + }, + { + "epoch": 4.88, + "learning_rate": 0.0003125188153236236, + "loss": 0.4963, + "step": 115470 + }, + { + "epoch": 4.88, + "learning_rate": 0.00031247641459433104, + "loss": 0.4834, + "step": 115480 + }, + { + "epoch": 4.88, + "learning_rate": 0.00031243401386503845, + "loss": 0.4112, + "step": 115490 + }, + { + "epoch": 4.88, + "learning_rate": 0.000312391613135746, + "loss": 0.4696, + "step": 115500 + }, + { + "epoch": 4.88, + "learning_rate": 0.0003123492124064534, + "loss": 0.5304, + "step": 115510 + }, + { + "epoch": 4.89, + "learning_rate": 0.00031230681167716086, + "loss": 0.4048, + "step": 115520 + }, + { + "epoch": 4.89, + "learning_rate": 0.0003122644109478683, + "loss": 0.5304, + "step": 115530 + }, + { + "epoch": 4.89, + "learning_rate": 0.0003122220102185758, + "loss": 0.5368, + "step": 115540 + }, + { + "epoch": 4.89, + "learning_rate": 0.0003121796094892832, + "loss": 0.5394, + "step": 115550 + }, + { + "epoch": 4.89, + "learning_rate": 0.0003121372087599907, + "loss": 0.3897, + "step": 115560 + }, + { + "epoch": 4.89, + "learning_rate": 0.00031209480803069814, + "loss": 0.585, + "step": 115570 + }, + { + "epoch": 4.89, + "learning_rate": 0.0003120524073014056, + "loss": 0.4862, + "step": 115580 + }, + { + "epoch": 4.89, + "learning_rate": 0.000312010006572113, + "loss": 0.5455, + "step": 115590 + }, + { + "epoch": 4.89, + "learning_rate": 0.00031196760584282054, + "loss": 0.4814, + "step": 115600 + }, + { + "epoch": 4.89, + "learning_rate": 0.00031192520511352795, + "loss": 0.5085, + "step": 115610 + }, + { + "epoch": 4.89, + "learning_rate": 0.0003118828043842354, + "loss": 0.526, + "step": 115620 + }, + { + "epoch": 4.89, + "learning_rate": 0.0003118404036549429, + "loss": 0.4809, + "step": 115630 + }, + { + "epoch": 4.89, + "learning_rate": 0.00031179800292565035, + "loss": 0.4463, + "step": 115640 + }, + { + "epoch": 4.89, + "learning_rate": 0.00031175560219635776, + "loss": 0.4926, + "step": 115650 + }, + { + "epoch": 4.89, + "learning_rate": 0.0003117132014670653, + "loss": 0.4983, + "step": 115660 + }, + { + "epoch": 4.89, + "learning_rate": 0.0003116708007377727, + "loss": 0.6062, + "step": 115670 + }, + { + "epoch": 4.89, + "learning_rate": 0.00031162840000848016, + "loss": 0.4649, + "step": 115680 + }, + { + "epoch": 4.89, + "learning_rate": 0.00031158599927918763, + "loss": 0.418, + "step": 115690 + }, + { + "epoch": 4.89, + "learning_rate": 0.0003115435985498951, + "loss": 0.4652, + "step": 115700 + }, + { + "epoch": 4.89, + "learning_rate": 0.0003115011978206025, + "loss": 0.4635, + "step": 115710 + }, + { + "epoch": 4.89, + "learning_rate": 0.00031145879709131003, + "loss": 0.5309, + "step": 115720 + }, + { + "epoch": 4.89, + "learning_rate": 0.00031141639636201744, + "loss": 0.5159, + "step": 115730 + }, + { + "epoch": 4.89, + "learning_rate": 0.0003113739956327249, + "loss": 0.5028, + "step": 115740 + }, + { + "epoch": 4.89, + "learning_rate": 0.0003113315949034323, + "loss": 0.4386, + "step": 115750 + }, + { + "epoch": 4.9, + "learning_rate": 0.00031128919417413984, + "loss": 0.494, + "step": 115760 + }, + { + "epoch": 4.9, + "learning_rate": 0.00031124679344484725, + "loss": 0.4982, + "step": 115770 + }, + { + "epoch": 4.9, + "learning_rate": 0.0003112043927155547, + "loss": 0.4231, + "step": 115780 + }, + { + "epoch": 4.9, + "learning_rate": 0.0003111619919862622, + "loss": 0.5679, + "step": 115790 + }, + { + "epoch": 4.9, + "learning_rate": 0.00031111959125696965, + "loss": 0.5091, + "step": 115800 + }, + { + "epoch": 4.9, + "learning_rate": 0.00031107719052767707, + "loss": 0.5177, + "step": 115810 + }, + { + "epoch": 4.9, + "learning_rate": 0.0003110347897983846, + "loss": 0.4842, + "step": 115820 + }, + { + "epoch": 4.9, + "learning_rate": 0.000310992389069092, + "loss": 0.4924, + "step": 115830 + }, + { + "epoch": 4.9, + "learning_rate": 0.00031094998833979947, + "loss": 0.4781, + "step": 115840 + }, + { + "epoch": 4.9, + "learning_rate": 0.00031090758761050693, + "loss": 0.5058, + "step": 115850 + }, + { + "epoch": 4.9, + "learning_rate": 0.0003108651868812144, + "loss": 0.508, + "step": 115860 + }, + { + "epoch": 4.9, + "learning_rate": 0.0003108227861519218, + "loss": 0.5045, + "step": 115870 + }, + { + "epoch": 4.9, + "learning_rate": 0.0003107803854226293, + "loss": 0.4434, + "step": 115880 + }, + { + "epoch": 4.9, + "learning_rate": 0.00031073798469333675, + "loss": 0.4744, + "step": 115890 + }, + { + "epoch": 4.9, + "learning_rate": 0.0003106955839640442, + "loss": 0.5654, + "step": 115900 + }, + { + "epoch": 4.9, + "learning_rate": 0.0003106531832347516, + "loss": 0.4187, + "step": 115910 + }, + { + "epoch": 4.9, + "learning_rate": 0.0003106107825054591, + "loss": 0.4239, + "step": 115920 + }, + { + "epoch": 4.9, + "learning_rate": 0.00031056838177616656, + "loss": 0.5591, + "step": 115930 + }, + { + "epoch": 4.9, + "learning_rate": 0.000310525981046874, + "loss": 0.4386, + "step": 115940 + }, + { + "epoch": 4.9, + "learning_rate": 0.0003104835803175815, + "loss": 0.4639, + "step": 115950 + }, + { + "epoch": 4.9, + "learning_rate": 0.0003104411795882889, + "loss": 0.4677, + "step": 115960 + }, + { + "epoch": 4.9, + "learning_rate": 0.0003103987788589964, + "loss": 0.5596, + "step": 115970 + }, + { + "epoch": 4.9, + "learning_rate": 0.00031035637812970384, + "loss": 0.4698, + "step": 115980 + }, + { + "epoch": 4.91, + "learning_rate": 0.0003103139774004113, + "loss": 0.5218, + "step": 115990 + }, + { + "epoch": 4.91, + "learning_rate": 0.0003102715766711188, + "loss": 0.4619, + "step": 116000 + }, + { + "epoch": 4.91, + "learning_rate": 0.00031022917594182624, + "loss": 0.545, + "step": 116010 + }, + { + "epoch": 4.91, + "learning_rate": 0.00031018677521253365, + "loss": 0.5864, + "step": 116020 + }, + { + "epoch": 4.91, + "learning_rate": 0.0003101443744832411, + "loss": 0.5749, + "step": 116030 + }, + { + "epoch": 4.91, + "learning_rate": 0.0003101019737539486, + "loss": 0.3888, + "step": 116040 + }, + { + "epoch": 4.91, + "learning_rate": 0.00031005957302465605, + "loss": 0.4275, + "step": 116050 + }, + { + "epoch": 4.91, + "learning_rate": 0.00031001717229536347, + "loss": 0.494, + "step": 116060 + }, + { + "epoch": 4.91, + "learning_rate": 0.00030997477156607093, + "loss": 0.475, + "step": 116070 + }, + { + "epoch": 4.91, + "learning_rate": 0.0003099323708367784, + "loss": 0.4768, + "step": 116080 + }, + { + "epoch": 4.91, + "learning_rate": 0.00030988997010748587, + "loss": 0.4983, + "step": 116090 + }, + { + "epoch": 4.91, + "learning_rate": 0.0003098475693781933, + "loss": 0.4784, + "step": 116100 + }, + { + "epoch": 4.91, + "learning_rate": 0.0003098051686489008, + "loss": 0.5156, + "step": 116110 + }, + { + "epoch": 4.91, + "learning_rate": 0.0003097627679196082, + "loss": 0.4454, + "step": 116120 + }, + { + "epoch": 4.91, + "learning_rate": 0.0003097203671903157, + "loss": 0.4646, + "step": 116130 + }, + { + "epoch": 4.91, + "learning_rate": 0.00030967796646102315, + "loss": 0.5723, + "step": 116140 + }, + { + "epoch": 4.91, + "learning_rate": 0.0003096355657317306, + "loss": 0.481, + "step": 116150 + }, + { + "epoch": 4.91, + "learning_rate": 0.000309593165002438, + "loss": 0.516, + "step": 116160 + }, + { + "epoch": 4.91, + "learning_rate": 0.00030955076427314555, + "loss": 0.5028, + "step": 116170 + }, + { + "epoch": 4.91, + "learning_rate": 0.00030950836354385296, + "loss": 0.5142, + "step": 116180 + }, + { + "epoch": 4.91, + "learning_rate": 0.0003094659628145604, + "loss": 0.5335, + "step": 116190 + }, + { + "epoch": 4.91, + "learning_rate": 0.00030942356208526784, + "loss": 0.5553, + "step": 116200 + }, + { + "epoch": 4.91, + "learning_rate": 0.00030938116135597536, + "loss": 0.5141, + "step": 116210 + }, + { + "epoch": 4.91, + "learning_rate": 0.00030933876062668277, + "loss": 0.4998, + "step": 116220 + }, + { + "epoch": 4.92, + "learning_rate": 0.00030929635989739024, + "loss": 0.5709, + "step": 116230 + }, + { + "epoch": 4.92, + "learning_rate": 0.0003092539591680977, + "loss": 0.4947, + "step": 116240 + }, + { + "epoch": 4.92, + "learning_rate": 0.00030921155843880517, + "loss": 0.5475, + "step": 116250 + }, + { + "epoch": 4.92, + "learning_rate": 0.0003091691577095126, + "loss": 0.5793, + "step": 116260 + }, + { + "epoch": 4.92, + "learning_rate": 0.0003091267569802201, + "loss": 0.5252, + "step": 116270 + }, + { + "epoch": 4.92, + "learning_rate": 0.0003090843562509275, + "loss": 0.5052, + "step": 116280 + }, + { + "epoch": 4.92, + "learning_rate": 0.000309041955521635, + "loss": 0.4926, + "step": 116290 + }, + { + "epoch": 4.92, + "learning_rate": 0.00030899955479234245, + "loss": 0.51, + "step": 116300 + }, + { + "epoch": 4.92, + "learning_rate": 0.0003089571540630499, + "loss": 0.5582, + "step": 116310 + }, + { + "epoch": 4.92, + "learning_rate": 0.00030891475333375733, + "loss": 0.5628, + "step": 116320 + }, + { + "epoch": 4.92, + "learning_rate": 0.00030887235260446485, + "loss": 0.4366, + "step": 116330 + }, + { + "epoch": 4.92, + "learning_rate": 0.00030882995187517226, + "loss": 0.505, + "step": 116340 + }, + { + "epoch": 4.92, + "learning_rate": 0.00030878755114587973, + "loss": 0.4806, + "step": 116350 + }, + { + "epoch": 4.92, + "learning_rate": 0.00030874515041658714, + "loss": 0.4482, + "step": 116360 + }, + { + "epoch": 4.92, + "learning_rate": 0.00030870274968729467, + "loss": 0.5082, + "step": 116370 + }, + { + "epoch": 4.92, + "learning_rate": 0.0003086603489580021, + "loss": 0.5105, + "step": 116380 + }, + { + "epoch": 4.92, + "learning_rate": 0.00030861794822870954, + "loss": 0.4213, + "step": 116390 + }, + { + "epoch": 4.92, + "learning_rate": 0.000308575547499417, + "loss": 0.5633, + "step": 116400 + }, + { + "epoch": 4.92, + "learning_rate": 0.0003085331467701245, + "loss": 0.4861, + "step": 116410 + }, + { + "epoch": 4.92, + "learning_rate": 0.0003084907460408319, + "loss": 0.5051, + "step": 116420 + }, + { + "epoch": 4.92, + "learning_rate": 0.0003084483453115394, + "loss": 0.4623, + "step": 116430 + }, + { + "epoch": 4.92, + "learning_rate": 0.0003084059445822468, + "loss": 0.5376, + "step": 116440 + }, + { + "epoch": 4.92, + "learning_rate": 0.0003083635438529543, + "loss": 0.563, + "step": 116450 + }, + { + "epoch": 4.92, + "learning_rate": 0.00030832114312366176, + "loss": 0.4177, + "step": 116460 + }, + { + "epoch": 4.93, + "learning_rate": 0.0003082787423943692, + "loss": 0.4607, + "step": 116470 + }, + { + "epoch": 4.93, + "learning_rate": 0.00030823634166507664, + "loss": 0.6, + "step": 116480 + }, + { + "epoch": 4.93, + "learning_rate": 0.00030819394093578416, + "loss": 0.4971, + "step": 116490 + }, + { + "epoch": 4.93, + "learning_rate": 0.00030815154020649157, + "loss": 0.4944, + "step": 116500 + }, + { + "epoch": 4.93, + "learning_rate": 0.00030810913947719904, + "loss": 0.3751, + "step": 116510 + }, + { + "epoch": 4.93, + "learning_rate": 0.00030806673874790645, + "loss": 0.4569, + "step": 116520 + }, + { + "epoch": 4.93, + "learning_rate": 0.00030802433801861397, + "loss": 0.4815, + "step": 116530 + }, + { + "epoch": 4.93, + "learning_rate": 0.0003079819372893214, + "loss": 0.5019, + "step": 116540 + }, + { + "epoch": 4.93, + "learning_rate": 0.00030793953656002885, + "loss": 0.4659, + "step": 116550 + }, + { + "epoch": 4.93, + "learning_rate": 0.0003078971358307363, + "loss": 0.5544, + "step": 116560 + }, + { + "epoch": 4.93, + "learning_rate": 0.0003078547351014438, + "loss": 0.4565, + "step": 116570 + }, + { + "epoch": 4.93, + "learning_rate": 0.0003078123343721512, + "loss": 0.5113, + "step": 116580 + }, + { + "epoch": 4.93, + "learning_rate": 0.00030776993364285866, + "loss": 0.4616, + "step": 116590 + }, + { + "epoch": 4.93, + "learning_rate": 0.00030772753291356613, + "loss": 0.4732, + "step": 116600 + }, + { + "epoch": 4.93, + "learning_rate": 0.0003076851321842736, + "loss": 0.5328, + "step": 116610 + }, + { + "epoch": 4.93, + "learning_rate": 0.00030764273145498106, + "loss": 0.5333, + "step": 116620 + }, + { + "epoch": 4.93, + "learning_rate": 0.0003076003307256885, + "loss": 0.5194, + "step": 116630 + }, + { + "epoch": 4.93, + "learning_rate": 0.00030755792999639594, + "loss": 0.5152, + "step": 116640 + }, + { + "epoch": 4.93, + "learning_rate": 0.0003075155292671034, + "loss": 0.4664, + "step": 116650 + }, + { + "epoch": 4.93, + "learning_rate": 0.0003074731285378109, + "loss": 0.504, + "step": 116660 + }, + { + "epoch": 4.93, + "learning_rate": 0.0003074307278085183, + "loss": 0.46, + "step": 116670 + }, + { + "epoch": 4.93, + "learning_rate": 0.00030738832707922576, + "loss": 0.4192, + "step": 116680 + }, + { + "epoch": 4.93, + "learning_rate": 0.0003073459263499332, + "loss": 0.4158, + "step": 116690 + }, + { + "epoch": 4.94, + "learning_rate": 0.0003073035256206407, + "loss": 0.5494, + "step": 116700 + }, + { + "epoch": 4.94, + "learning_rate": 0.0003072611248913481, + "loss": 0.5004, + "step": 116710 + }, + { + "epoch": 4.94, + "learning_rate": 0.0003072187241620556, + "loss": 0.4949, + "step": 116720 + }, + { + "epoch": 4.94, + "learning_rate": 0.00030717632343276304, + "loss": 0.4833, + "step": 116730 + }, + { + "epoch": 4.94, + "learning_rate": 0.0003071339227034705, + "loss": 0.5164, + "step": 116740 + }, + { + "epoch": 4.94, + "learning_rate": 0.00030709152197417797, + "loss": 0.5109, + "step": 116750 + }, + { + "epoch": 4.94, + "learning_rate": 0.00030704912124488544, + "loss": 0.5301, + "step": 116760 + }, + { + "epoch": 4.94, + "learning_rate": 0.00030700672051559285, + "loss": 0.4843, + "step": 116770 + }, + { + "epoch": 4.94, + "learning_rate": 0.00030696431978630037, + "loss": 0.4544, + "step": 116780 + }, + { + "epoch": 4.94, + "learning_rate": 0.0003069219190570078, + "loss": 0.5131, + "step": 116790 + }, + { + "epoch": 4.94, + "learning_rate": 0.00030687951832771525, + "loss": 0.4595, + "step": 116800 + }, + { + "epoch": 4.94, + "learning_rate": 0.0003068371175984227, + "loss": 0.4926, + "step": 116810 + }, + { + "epoch": 4.94, + "learning_rate": 0.0003067947168691302, + "loss": 0.4475, + "step": 116820 + }, + { + "epoch": 4.94, + "learning_rate": 0.0003067523161398376, + "loss": 0.4522, + "step": 116830 + }, + { + "epoch": 4.94, + "learning_rate": 0.00030670991541054506, + "loss": 0.4752, + "step": 116840 + }, + { + "epoch": 4.94, + "learning_rate": 0.00030666751468125253, + "loss": 0.4368, + "step": 116850 + }, + { + "epoch": 4.94, + "learning_rate": 0.00030662511395196, + "loss": 0.4989, + "step": 116860 + }, + { + "epoch": 4.94, + "learning_rate": 0.0003065827132226674, + "loss": 0.5151, + "step": 116870 + }, + { + "epoch": 4.94, + "learning_rate": 0.00030654031249337493, + "loss": 0.5249, + "step": 116880 + }, + { + "epoch": 4.94, + "learning_rate": 0.00030649791176408234, + "loss": 0.4855, + "step": 116890 + }, + { + "epoch": 4.94, + "learning_rate": 0.0003064555110347898, + "loss": 0.5098, + "step": 116900 + }, + { + "epoch": 4.94, + "learning_rate": 0.0003064131103054973, + "loss": 0.4798, + "step": 116910 + }, + { + "epoch": 4.94, + "learning_rate": 0.00030637070957620474, + "loss": 0.4192, + "step": 116920 + }, + { + "epoch": 4.94, + "learning_rate": 0.00030632830884691215, + "loss": 0.4261, + "step": 116930 + }, + { + "epoch": 4.95, + "learning_rate": 0.0003062859081176197, + "loss": 0.5457, + "step": 116940 + }, + { + "epoch": 4.95, + "learning_rate": 0.0003062435073883271, + "loss": 0.4903, + "step": 116950 + }, + { + "epoch": 4.95, + "learning_rate": 0.00030620110665903456, + "loss": 0.5544, + "step": 116960 + }, + { + "epoch": 4.95, + "learning_rate": 0.00030615870592974197, + "loss": 0.5157, + "step": 116970 + }, + { + "epoch": 4.95, + "learning_rate": 0.0003061163052004495, + "loss": 0.4232, + "step": 116980 + }, + { + "epoch": 4.95, + "learning_rate": 0.0003060739044711569, + "loss": 0.5069, + "step": 116990 + }, + { + "epoch": 4.95, + "learning_rate": 0.00030603150374186437, + "loss": 0.5024, + "step": 117000 + }, + { + "epoch": 4.95, + "learning_rate": 0.00030598910301257183, + "loss": 0.5531, + "step": 117010 + }, + { + "epoch": 4.95, + "learning_rate": 0.0003059467022832793, + "loss": 0.4584, + "step": 117020 + }, + { + "epoch": 4.95, + "learning_rate": 0.0003059043015539867, + "loss": 0.4407, + "step": 117030 + }, + { + "epoch": 4.95, + "learning_rate": 0.00030586190082469424, + "loss": 0.4469, + "step": 117040 + }, + { + "epoch": 4.95, + "learning_rate": 0.00030581950009540165, + "loss": 0.5155, + "step": 117050 + }, + { + "epoch": 4.95, + "learning_rate": 0.0003057770993661091, + "loss": 0.5061, + "step": 117060 + }, + { + "epoch": 4.95, + "learning_rate": 0.0003057346986368166, + "loss": 0.5654, + "step": 117070 + }, + { + "epoch": 4.95, + "learning_rate": 0.00030569229790752405, + "loss": 0.4628, + "step": 117080 + }, + { + "epoch": 4.95, + "learning_rate": 0.00030564989717823146, + "loss": 0.5373, + "step": 117090 + }, + { + "epoch": 4.95, + "learning_rate": 0.000305607496448939, + "loss": 0.5112, + "step": 117100 + }, + { + "epoch": 4.95, + "learning_rate": 0.0003055650957196464, + "loss": 0.5358, + "step": 117110 + }, + { + "epoch": 4.95, + "learning_rate": 0.00030552269499035386, + "loss": 0.5419, + "step": 117120 + }, + { + "epoch": 4.95, + "learning_rate": 0.0003054802942610613, + "loss": 0.4735, + "step": 117130 + }, + { + "epoch": 4.95, + "learning_rate": 0.0003054378935317688, + "loss": 0.4375, + "step": 117140 + }, + { + "epoch": 4.95, + "learning_rate": 0.0003053954928024762, + "loss": 0.5052, + "step": 117150 + }, + { + "epoch": 4.95, + "learning_rate": 0.0003053530920731837, + "loss": 0.5533, + "step": 117160 + }, + { + "epoch": 4.95, + "learning_rate": 0.00030531069134389114, + "loss": 0.522, + "step": 117170 + }, + { + "epoch": 4.96, + "learning_rate": 0.0003052682906145986, + "loss": 0.4975, + "step": 117180 + }, + { + "epoch": 4.96, + "learning_rate": 0.000305225889885306, + "loss": 0.5206, + "step": 117190 + }, + { + "epoch": 4.96, + "learning_rate": 0.0003051834891560135, + "loss": 0.56, + "step": 117200 + }, + { + "epoch": 4.96, + "learning_rate": 0.00030514108842672095, + "loss": 0.6001, + "step": 117210 + }, + { + "epoch": 4.96, + "learning_rate": 0.0003050986876974284, + "loss": 0.5088, + "step": 117220 + }, + { + "epoch": 4.96, + "learning_rate": 0.0003050562869681359, + "loss": 0.4581, + "step": 117230 + }, + { + "epoch": 4.96, + "learning_rate": 0.00030501388623884335, + "loss": 0.5225, + "step": 117240 + }, + { + "epoch": 4.96, + "learning_rate": 0.00030497148550955077, + "loss": 0.456, + "step": 117250 + }, + { + "epoch": 4.96, + "learning_rate": 0.00030492908478025823, + "loss": 0.4829, + "step": 117260 + }, + { + "epoch": 4.96, + "learning_rate": 0.0003048866840509657, + "loss": 0.4498, + "step": 117270 + }, + { + "epoch": 4.96, + "learning_rate": 0.00030484428332167317, + "loss": 0.5406, + "step": 117280 + }, + { + "epoch": 4.96, + "learning_rate": 0.0003048018825923806, + "loss": 0.448, + "step": 117290 + }, + { + "epoch": 4.96, + "learning_rate": 0.00030475948186308805, + "loss": 0.4367, + "step": 117300 + }, + { + "epoch": 4.96, + "learning_rate": 0.0003047170811337955, + "loss": 0.3891, + "step": 117310 + }, + { + "epoch": 4.96, + "learning_rate": 0.000304674680404503, + "loss": 0.4346, + "step": 117320 + }, + { + "epoch": 4.96, + "learning_rate": 0.00030463227967521045, + "loss": 0.455, + "step": 117330 + }, + { + "epoch": 4.96, + "learning_rate": 0.00030458987894591786, + "loss": 0.5004, + "step": 117340 + }, + { + "epoch": 4.96, + "learning_rate": 0.0003045474782166253, + "loss": 0.5391, + "step": 117350 + }, + { + "epoch": 4.96, + "learning_rate": 0.0003045050774873328, + "loss": 0.4236, + "step": 117360 + }, + { + "epoch": 4.96, + "learning_rate": 0.00030446267675804026, + "loss": 0.5025, + "step": 117370 + }, + { + "epoch": 4.96, + "learning_rate": 0.00030442027602874767, + "loss": 0.5744, + "step": 117380 + }, + { + "epoch": 4.96, + "learning_rate": 0.0003043778752994552, + "loss": 0.5166, + "step": 117390 + }, + { + "epoch": 4.96, + "learning_rate": 0.0003043354745701626, + "loss": 0.5652, + "step": 117400 + }, + { + "epoch": 4.97, + "learning_rate": 0.00030429307384087007, + "loss": 0.485, + "step": 117410 + }, + { + "epoch": 4.97, + "learning_rate": 0.00030425067311157754, + "loss": 0.4593, + "step": 117420 + }, + { + "epoch": 4.97, + "learning_rate": 0.000304208272382285, + "loss": 0.5312, + "step": 117430 + }, + { + "epoch": 4.97, + "learning_rate": 0.0003041658716529924, + "loss": 0.4736, + "step": 117440 + }, + { + "epoch": 4.97, + "learning_rate": 0.0003041234709236999, + "loss": 0.5684, + "step": 117450 + }, + { + "epoch": 4.97, + "learning_rate": 0.00030408107019440735, + "loss": 0.492, + "step": 117460 + }, + { + "epoch": 4.97, + "learning_rate": 0.0003040386694651148, + "loss": 0.5304, + "step": 117470 + }, + { + "epoch": 4.97, + "learning_rate": 0.00030399626873582223, + "loss": 0.4837, + "step": 117480 + }, + { + "epoch": 4.97, + "learning_rate": 0.00030395386800652975, + "loss": 0.4962, + "step": 117490 + }, + { + "epoch": 4.97, + "learning_rate": 0.00030391146727723717, + "loss": 0.4648, + "step": 117500 + }, + { + "epoch": 4.97, + "learning_rate": 0.00030386906654794463, + "loss": 0.5192, + "step": 117510 + }, + { + "epoch": 4.97, + "learning_rate": 0.0003038266658186521, + "loss": 0.4705, + "step": 117520 + }, + { + "epoch": 4.97, + "learning_rate": 0.00030378426508935957, + "loss": 0.5472, + "step": 117530 + }, + { + "epoch": 4.97, + "learning_rate": 0.000303741864360067, + "loss": 0.499, + "step": 117540 + }, + { + "epoch": 4.97, + "learning_rate": 0.0003036994636307745, + "loss": 0.5548, + "step": 117550 + }, + { + "epoch": 4.97, + "learning_rate": 0.0003036570629014819, + "loss": 0.3976, + "step": 117560 + }, + { + "epoch": 4.97, + "learning_rate": 0.0003036146621721894, + "loss": 0.4767, + "step": 117570 + }, + { + "epoch": 4.97, + "learning_rate": 0.0003035722614428968, + "loss": 0.4652, + "step": 117580 + }, + { + "epoch": 4.97, + "learning_rate": 0.0003035298607136043, + "loss": 0.5103, + "step": 117590 + }, + { + "epoch": 4.97, + "learning_rate": 0.0003034874599843117, + "loss": 0.538, + "step": 117600 + }, + { + "epoch": 4.97, + "learning_rate": 0.0003034450592550192, + "loss": 0.4392, + "step": 117610 + }, + { + "epoch": 4.97, + "learning_rate": 0.00030340265852572666, + "loss": 0.4984, + "step": 117620 + }, + { + "epoch": 4.97, + "learning_rate": 0.0003033602577964341, + "loss": 0.5534, + "step": 117630 + }, + { + "epoch": 4.97, + "learning_rate": 0.00030331785706714154, + "loss": 0.5756, + "step": 117640 + }, + { + "epoch": 4.98, + "learning_rate": 0.00030327545633784906, + "loss": 0.5292, + "step": 117650 + }, + { + "epoch": 4.98, + "learning_rate": 0.00030323305560855647, + "loss": 0.482, + "step": 117660 + }, + { + "epoch": 4.98, + "learning_rate": 0.00030319065487926394, + "loss": 0.4755, + "step": 117670 + }, + { + "epoch": 4.98, + "learning_rate": 0.0003031482541499714, + "loss": 0.401, + "step": 117680 + }, + { + "epoch": 4.98, + "learning_rate": 0.00030310585342067887, + "loss": 0.3924, + "step": 117690 + }, + { + "epoch": 4.98, + "learning_rate": 0.0003030634526913863, + "loss": 0.6677, + "step": 117700 + }, + { + "epoch": 4.98, + "learning_rate": 0.0003030210519620938, + "loss": 0.5163, + "step": 117710 + }, + { + "epoch": 4.98, + "learning_rate": 0.0003029786512328012, + "loss": 0.5435, + "step": 117720 + }, + { + "epoch": 4.98, + "learning_rate": 0.0003029362505035087, + "loss": 0.5297, + "step": 117730 + }, + { + "epoch": 4.98, + "learning_rate": 0.0003028938497742161, + "loss": 0.4901, + "step": 117740 + }, + { + "epoch": 4.98, + "learning_rate": 0.0003028514490449236, + "loss": 0.494, + "step": 117750 + }, + { + "epoch": 4.98, + "learning_rate": 0.00030280904831563103, + "loss": 0.5158, + "step": 117760 + }, + { + "epoch": 4.98, + "learning_rate": 0.0003027666475863385, + "loss": 0.4836, + "step": 117770 + }, + { + "epoch": 4.98, + "learning_rate": 0.00030272424685704596, + "loss": 0.5246, + "step": 117780 + }, + { + "epoch": 4.98, + "learning_rate": 0.00030268184612775343, + "loss": 0.5265, + "step": 117790 + }, + { + "epoch": 4.98, + "learning_rate": 0.00030263944539846084, + "loss": 0.4729, + "step": 117800 + }, + { + "epoch": 4.98, + "learning_rate": 0.00030259704466916836, + "loss": 0.4861, + "step": 117810 + }, + { + "epoch": 4.98, + "learning_rate": 0.0003025546439398758, + "loss": 0.4759, + "step": 117820 + }, + { + "epoch": 4.98, + "learning_rate": 0.00030251224321058324, + "loss": 0.5824, + "step": 117830 + }, + { + "epoch": 4.98, + "learning_rate": 0.0003024698424812907, + "loss": 0.5232, + "step": 117840 + }, + { + "epoch": 4.98, + "learning_rate": 0.0003024274417519982, + "loss": 0.4556, + "step": 117850 + }, + { + "epoch": 4.98, + "learning_rate": 0.0003023850410227056, + "loss": 0.4461, + "step": 117860 + }, + { + "epoch": 4.98, + "learning_rate": 0.00030234264029341306, + "loss": 0.506, + "step": 117870 + }, + { + "epoch": 4.98, + "learning_rate": 0.0003023002395641205, + "loss": 0.433, + "step": 117880 + }, + { + "epoch": 4.99, + "learning_rate": 0.000302257838834828, + "loss": 0.4924, + "step": 117890 + }, + { + "epoch": 4.99, + "learning_rate": 0.0003022154381055354, + "loss": 0.4476, + "step": 117900 + }, + { + "epoch": 4.99, + "learning_rate": 0.00030217303737624287, + "loss": 0.5044, + "step": 117910 + }, + { + "epoch": 4.99, + "learning_rate": 0.00030213063664695034, + "loss": 0.5141, + "step": 117920 + }, + { + "epoch": 4.99, + "learning_rate": 0.0003020882359176578, + "loss": 0.4987, + "step": 117930 + }, + { + "epoch": 4.99, + "learning_rate": 0.00030204583518836527, + "loss": 0.5272, + "step": 117940 + }, + { + "epoch": 4.99, + "learning_rate": 0.0003020034344590727, + "loss": 0.4535, + "step": 117950 + }, + { + "epoch": 4.99, + "learning_rate": 0.00030196103372978015, + "loss": 0.5096, + "step": 117960 + }, + { + "epoch": 4.99, + "learning_rate": 0.0003019186330004876, + "loss": 0.4555, + "step": 117970 + }, + { + "epoch": 4.99, + "learning_rate": 0.0003018762322711951, + "loss": 0.4815, + "step": 117980 + }, + { + "epoch": 4.99, + "learning_rate": 0.00030183383154190255, + "loss": 0.4404, + "step": 117990 + }, + { + "epoch": 4.99, + "learning_rate": 0.00030179143081261, + "loss": 0.4458, + "step": 118000 + }, + { + "epoch": 4.99, + "learning_rate": 0.00030174903008331743, + "loss": 0.4571, + "step": 118010 + }, + { + "epoch": 4.99, + "learning_rate": 0.0003017066293540249, + "loss": 0.4371, + "step": 118020 + }, + { + "epoch": 4.99, + "learning_rate": 0.00030166422862473236, + "loss": 0.4887, + "step": 118030 + }, + { + "epoch": 4.99, + "learning_rate": 0.00030162182789543983, + "loss": 0.5693, + "step": 118040 + }, + { + "epoch": 4.99, + "learning_rate": 0.00030157942716614724, + "loss": 0.5188, + "step": 118050 + }, + { + "epoch": 4.99, + "learning_rate": 0.0003015370264368547, + "loss": 0.4771, + "step": 118060 + }, + { + "epoch": 4.99, + "learning_rate": 0.0003014946257075622, + "loss": 0.4985, + "step": 118070 + }, + { + "epoch": 4.99, + "learning_rate": 0.00030145222497826964, + "loss": 0.5457, + "step": 118080 + }, + { + "epoch": 4.99, + "learning_rate": 0.00030140982424897706, + "loss": 0.5065, + "step": 118090 + }, + { + "epoch": 4.99, + "learning_rate": 0.0003013674235196846, + "loss": 0.4892, + "step": 118100 + }, + { + "epoch": 4.99, + "learning_rate": 0.000301325022790392, + "loss": 0.424, + "step": 118110 + }, + { + "epoch": 5.0, + "learning_rate": 0.00030128262206109946, + "loss": 0.4578, + "step": 118120 + }, + { + "epoch": 5.0, + "learning_rate": 0.0003012402213318069, + "loss": 0.5222, + "step": 118130 + }, + { + "epoch": 5.0, + "learning_rate": 0.0003011978206025144, + "loss": 0.5164, + "step": 118140 + }, + { + "epoch": 5.0, + "learning_rate": 0.0003011554198732218, + "loss": 0.4121, + "step": 118150 + }, + { + "epoch": 5.0, + "learning_rate": 0.0003011130191439293, + "loss": 0.5155, + "step": 118160 + }, + { + "epoch": 5.0, + "learning_rate": 0.00030107061841463674, + "loss": 0.573, + "step": 118170 + }, + { + "epoch": 5.0, + "learning_rate": 0.0003010282176853442, + "loss": 0.5218, + "step": 118180 + }, + { + "epoch": 5.0, + "learning_rate": 0.00030098581695605167, + "loss": 0.5038, + "step": 118190 + }, + { + "epoch": 5.0, + "learning_rate": 0.00030094341622675914, + "loss": 0.4662, + "step": 118200 + }, + { + "epoch": 5.0, + "learning_rate": 0.00030090101549746655, + "loss": 0.5325, + "step": 118210 + }, + { + "epoch": 5.0, + "learning_rate": 0.000300858614768174, + "loss": 0.5972, + "step": 118220 + }, + { + "epoch": 5.0, + "learning_rate": 0.0003008162140388815, + "loss": 0.4889, + "step": 118230 + }, + { + "epoch": 5.0, + "learning_rate": 0.00030077381330958895, + "loss": 0.4908, + "step": 118240 + }, + { + "epoch": 5.0, + "learning_rate": 0.00030073141258029636, + "loss": 0.4795, + "step": 118250 + }, + { + "epoch": 5.0, + "learning_rate": 0.0003006890118510039, + "loss": 0.3743, + "step": 118260 + }, + { + "epoch": 5.0, + "learning_rate": 0.0003006466111217113, + "loss": 0.3919, + "step": 118270 + }, + { + "epoch": 5.0, + "learning_rate": 0.00030060421039241876, + "loss": 0.4698, + "step": 118280 + }, + { + "epoch": 5.0, + "learning_rate": 0.00030056180966312623, + "loss": 0.4007, + "step": 118290 + }, + { + "epoch": 5.0, + "learning_rate": 0.0003005194089338337, + "loss": 0.4578, + "step": 118300 + }, + { + "epoch": 5.0, + "learning_rate": 0.0003004770082045411, + "loss": 0.49, + "step": 118310 + }, + { + "epoch": 5.0, + "learning_rate": 0.00030043460747524863, + "loss": 0.4356, + "step": 118320 + }, + { + "epoch": 5.0, + "learning_rate": 0.00030039220674595604, + "loss": 0.4511, + "step": 118330 + }, + { + "epoch": 5.0, + "learning_rate": 0.0003003498060166635, + "loss": 0.4216, + "step": 118340 + }, + { + "epoch": 5.0, + "learning_rate": 0.0003003074052873709, + "loss": 0.4491, + "step": 118350 + }, + { + "epoch": 5.01, + "learning_rate": 0.00030026500455807844, + "loss": 0.5323, + "step": 118360 + }, + { + "epoch": 5.01, + "learning_rate": 0.00030022260382878585, + "loss": 0.4616, + "step": 118370 + }, + { + "epoch": 5.01, + "learning_rate": 0.0003001802030994933, + "loss": 0.3807, + "step": 118380 + }, + { + "epoch": 5.01, + "learning_rate": 0.0003001378023702008, + "loss": 0.4736, + "step": 118390 + }, + { + "epoch": 5.01, + "learning_rate": 0.00030009540164090825, + "loss": 0.4643, + "step": 118400 + }, + { + "epoch": 5.01, + "learning_rate": 0.00030005300091161567, + "loss": 0.4294, + "step": 118410 + }, + { + "epoch": 5.01, + "learning_rate": 0.0003000106001823232, + "loss": 0.4354, + "step": 118420 + }, + { + "epoch": 5.01, + "learning_rate": 0.0002999681994530306, + "loss": 0.3918, + "step": 118430 + }, + { + "epoch": 5.01, + "learning_rate": 0.00029992579872373807, + "loss": 0.4468, + "step": 118440 + }, + { + "epoch": 5.01, + "learning_rate": 0.00029988339799444553, + "loss": 0.4394, + "step": 118450 + }, + { + "epoch": 5.01, + "learning_rate": 0.000299840997265153, + "loss": 0.3717, + "step": 118460 + }, + { + "epoch": 5.01, + "learning_rate": 0.0002997985965358604, + "loss": 0.4624, + "step": 118470 + }, + { + "epoch": 5.01, + "learning_rate": 0.0002997561958065679, + "loss": 0.4499, + "step": 118480 + }, + { + "epoch": 5.01, + "learning_rate": 0.00029971379507727535, + "loss": 0.4401, + "step": 118490 + }, + { + "epoch": 5.01, + "learning_rate": 0.0002996713943479828, + "loss": 0.433, + "step": 118500 + }, + { + "epoch": 5.01, + "learning_rate": 0.0002996289936186902, + "loss": 0.4414, + "step": 118510 + }, + { + "epoch": 5.01, + "learning_rate": 0.00029958659288939775, + "loss": 0.4363, + "step": 118520 + }, + { + "epoch": 5.01, + "learning_rate": 0.00029954419216010516, + "loss": 0.3571, + "step": 118530 + }, + { + "epoch": 5.01, + "learning_rate": 0.00029950179143081263, + "loss": 0.4909, + "step": 118540 + }, + { + "epoch": 5.01, + "learning_rate": 0.0002994593907015201, + "loss": 0.4166, + "step": 118550 + }, + { + "epoch": 5.01, + "learning_rate": 0.00029941698997222756, + "loss": 0.3843, + "step": 118560 + }, + { + "epoch": 5.01, + "learning_rate": 0.000299374589242935, + "loss": 0.4603, + "step": 118570 + }, + { + "epoch": 5.01, + "learning_rate": 0.00029933218851364244, + "loss": 0.479, + "step": 118580 + }, + { + "epoch": 5.02, + "learning_rate": 0.0002992897877843499, + "loss": 0.3815, + "step": 118590 + }, + { + "epoch": 5.02, + "learning_rate": 0.0002992473870550574, + "loss": 0.4867, + "step": 118600 + }, + { + "epoch": 5.02, + "learning_rate": 0.00029920498632576484, + "loss": 0.4444, + "step": 118610 + }, + { + "epoch": 5.02, + "learning_rate": 0.00029916258559647225, + "loss": 0.4278, + "step": 118620 + }, + { + "epoch": 5.02, + "learning_rate": 0.0002991201848671797, + "loss": 0.441, + "step": 118630 + }, + { + "epoch": 5.02, + "learning_rate": 0.0002990777841378872, + "loss": 0.4316, + "step": 118640 + }, + { + "epoch": 5.02, + "learning_rate": 0.00029903538340859465, + "loss": 0.4239, + "step": 118650 + }, + { + "epoch": 5.02, + "learning_rate": 0.00029899298267930207, + "loss": 0.4164, + "step": 118660 + }, + { + "epoch": 5.02, + "learning_rate": 0.00029895058195000953, + "loss": 0.5171, + "step": 118670 + }, + { + "epoch": 5.02, + "learning_rate": 0.000298908181220717, + "loss": 0.3567, + "step": 118680 + }, + { + "epoch": 5.02, + "learning_rate": 0.00029886578049142447, + "loss": 0.4179, + "step": 118690 + }, + { + "epoch": 5.02, + "learning_rate": 0.0002988233797621319, + "loss": 0.407, + "step": 118700 + }, + { + "epoch": 5.02, + "learning_rate": 0.0002987809790328394, + "loss": 0.4484, + "step": 118710 + }, + { + "epoch": 5.02, + "learning_rate": 0.0002987385783035468, + "loss": 0.3892, + "step": 118720 + }, + { + "epoch": 5.02, + "learning_rate": 0.0002986961775742543, + "loss": 0.3902, + "step": 118730 + }, + { + "epoch": 5.02, + "learning_rate": 0.00029865377684496175, + "loss": 0.407, + "step": 118740 + }, + { + "epoch": 5.02, + "learning_rate": 0.0002986113761156692, + "loss": 0.4316, + "step": 118750 + }, + { + "epoch": 5.02, + "learning_rate": 0.0002985689753863766, + "loss": 0.4246, + "step": 118760 + }, + { + "epoch": 5.02, + "learning_rate": 0.00029852657465708415, + "loss": 0.386, + "step": 118770 + }, + { + "epoch": 5.02, + "learning_rate": 0.00029848417392779156, + "loss": 0.5008, + "step": 118780 + }, + { + "epoch": 5.02, + "learning_rate": 0.000298441773198499, + "loss": 0.3723, + "step": 118790 + }, + { + "epoch": 5.02, + "learning_rate": 0.0002983993724692065, + "loss": 0.4671, + "step": 118800 + }, + { + "epoch": 5.02, + "learning_rate": 0.00029835697173991396, + "loss": 0.4579, + "step": 118810 + }, + { + "epoch": 5.02, + "learning_rate": 0.00029831457101062137, + "loss": 0.508, + "step": 118820 + }, + { + "epoch": 5.03, + "learning_rate": 0.00029827217028132884, + "loss": 0.5177, + "step": 118830 + }, + { + "epoch": 5.03, + "learning_rate": 0.0002982297695520363, + "loss": 0.4905, + "step": 118840 + }, + { + "epoch": 5.03, + "learning_rate": 0.00029818736882274377, + "loss": 0.509, + "step": 118850 + }, + { + "epoch": 5.03, + "learning_rate": 0.0002981449680934512, + "loss": 0.4522, + "step": 118860 + }, + { + "epoch": 5.03, + "learning_rate": 0.0002981025673641587, + "loss": 0.4388, + "step": 118870 + }, + { + "epoch": 5.03, + "learning_rate": 0.0002980601666348661, + "loss": 0.4781, + "step": 118880 + }, + { + "epoch": 5.03, + "learning_rate": 0.0002980177659055736, + "loss": 0.4532, + "step": 118890 + }, + { + "epoch": 5.03, + "learning_rate": 0.00029797536517628105, + "loss": 0.4153, + "step": 118900 + }, + { + "epoch": 5.03, + "learning_rate": 0.0002979329644469885, + "loss": 0.4216, + "step": 118910 + }, + { + "epoch": 5.03, + "learning_rate": 0.00029789056371769593, + "loss": 0.4545, + "step": 118920 + }, + { + "epoch": 5.03, + "learning_rate": 0.00029784816298840345, + "loss": 0.3802, + "step": 118930 + }, + { + "epoch": 5.03, + "learning_rate": 0.00029780576225911087, + "loss": 0.4638, + "step": 118940 + }, + { + "epoch": 5.03, + "learning_rate": 0.00029776336152981833, + "loss": 0.4715, + "step": 118950 + }, + { + "epoch": 5.03, + "learning_rate": 0.00029772096080052574, + "loss": 0.4514, + "step": 118960 + }, + { + "epoch": 5.03, + "learning_rate": 0.00029767856007123327, + "loss": 0.4674, + "step": 118970 + }, + { + "epoch": 5.03, + "learning_rate": 0.0002976361593419407, + "loss": 0.3853, + "step": 118980 + }, + { + "epoch": 5.03, + "learning_rate": 0.00029759375861264814, + "loss": 0.4092, + "step": 118990 + }, + { + "epoch": 5.03, + "learning_rate": 0.0002975513578833556, + "loss": 0.3981, + "step": 119000 + }, + { + "epoch": 5.03, + "learning_rate": 0.0002975089571540631, + "loss": 0.4984, + "step": 119010 + }, + { + "epoch": 5.03, + "learning_rate": 0.0002974665564247705, + "loss": 0.4744, + "step": 119020 + }, + { + "epoch": 5.03, + "learning_rate": 0.000297424155695478, + "loss": 0.4588, + "step": 119030 + }, + { + "epoch": 5.03, + "learning_rate": 0.0002973817549661854, + "loss": 0.4485, + "step": 119040 + }, + { + "epoch": 5.03, + "learning_rate": 0.0002973393542368929, + "loss": 0.4269, + "step": 119050 + }, + { + "epoch": 5.03, + "learning_rate": 0.00029729695350760036, + "loss": 0.4061, + "step": 119060 + }, + { + "epoch": 5.04, + "learning_rate": 0.0002972545527783078, + "loss": 0.3342, + "step": 119070 + }, + { + "epoch": 5.04, + "learning_rate": 0.00029721215204901524, + "loss": 0.396, + "step": 119080 + }, + { + "epoch": 5.04, + "learning_rate": 0.00029716975131972276, + "loss": 0.4375, + "step": 119090 + }, + { + "epoch": 5.04, + "learning_rate": 0.00029712735059043017, + "loss": 0.4536, + "step": 119100 + }, + { + "epoch": 5.04, + "learning_rate": 0.00029708494986113764, + "loss": 0.5258, + "step": 119110 + }, + { + "epoch": 5.04, + "learning_rate": 0.00029704254913184505, + "loss": 0.5019, + "step": 119120 + }, + { + "epoch": 5.04, + "learning_rate": 0.00029700014840255257, + "loss": 0.4427, + "step": 119130 + }, + { + "epoch": 5.04, + "learning_rate": 0.00029695774767326, + "loss": 0.4917, + "step": 119140 + }, + { + "epoch": 5.04, + "learning_rate": 0.00029691534694396745, + "loss": 0.476, + "step": 119150 + }, + { + "epoch": 5.04, + "learning_rate": 0.0002968729462146749, + "loss": 0.4614, + "step": 119160 + }, + { + "epoch": 5.04, + "learning_rate": 0.0002968305454853824, + "loss": 0.4864, + "step": 119170 + }, + { + "epoch": 5.04, + "learning_rate": 0.0002967881447560898, + "loss": 0.4864, + "step": 119180 + }, + { + "epoch": 5.04, + "learning_rate": 0.00029674574402679726, + "loss": 0.4458, + "step": 119190 + }, + { + "epoch": 5.04, + "learning_rate": 0.00029670334329750473, + "loss": 0.4074, + "step": 119200 + }, + { + "epoch": 5.04, + "learning_rate": 0.0002966609425682122, + "loss": 0.4787, + "step": 119210 + }, + { + "epoch": 5.04, + "learning_rate": 0.00029661854183891966, + "loss": 0.4457, + "step": 119220 + }, + { + "epoch": 5.04, + "learning_rate": 0.0002965761411096271, + "loss": 0.4461, + "step": 119230 + }, + { + "epoch": 5.04, + "learning_rate": 0.00029653374038033454, + "loss": 0.4378, + "step": 119240 + }, + { + "epoch": 5.04, + "learning_rate": 0.000296491339651042, + "loss": 0.4709, + "step": 119250 + }, + { + "epoch": 5.04, + "learning_rate": 0.0002964489389217495, + "loss": 0.4948, + "step": 119260 + }, + { + "epoch": 5.04, + "learning_rate": 0.00029640653819245694, + "loss": 0.4355, + "step": 119270 + }, + { + "epoch": 5.04, + "learning_rate": 0.00029636413746316436, + "loss": 0.4555, + "step": 119280 + }, + { + "epoch": 5.04, + "learning_rate": 0.0002963217367338718, + "loss": 0.4187, + "step": 119290 + }, + { + "epoch": 5.05, + "learning_rate": 0.0002962793360045793, + "loss": 0.4291, + "step": 119300 + }, + { + "epoch": 5.05, + "learning_rate": 0.00029623693527528676, + "loss": 0.4265, + "step": 119310 + }, + { + "epoch": 5.05, + "learning_rate": 0.0002961945345459942, + "loss": 0.452, + "step": 119320 + }, + { + "epoch": 5.05, + "learning_rate": 0.00029615213381670164, + "loss": 0.3971, + "step": 119330 + }, + { + "epoch": 5.05, + "learning_rate": 0.0002961097330874091, + "loss": 0.4395, + "step": 119340 + }, + { + "epoch": 5.05, + "learning_rate": 0.00029606733235811657, + "loss": 0.4639, + "step": 119350 + }, + { + "epoch": 5.05, + "learning_rate": 0.00029602493162882404, + "loss": 0.4533, + "step": 119360 + }, + { + "epoch": 5.05, + "learning_rate": 0.00029598253089953145, + "loss": 0.4588, + "step": 119370 + }, + { + "epoch": 5.05, + "learning_rate": 0.00029594013017023897, + "loss": 0.4844, + "step": 119380 + }, + { + "epoch": 5.05, + "learning_rate": 0.0002958977294409464, + "loss": 0.3619, + "step": 119390 + }, + { + "epoch": 5.05, + "learning_rate": 0.00029585532871165385, + "loss": 0.4382, + "step": 119400 + }, + { + "epoch": 5.05, + "learning_rate": 0.0002958129279823613, + "loss": 0.4357, + "step": 119410 + }, + { + "epoch": 5.05, + "learning_rate": 0.0002957705272530688, + "loss": 0.4684, + "step": 119420 + }, + { + "epoch": 5.05, + "learning_rate": 0.0002957281265237762, + "loss": 0.4458, + "step": 119430 + }, + { + "epoch": 5.05, + "learning_rate": 0.00029568572579448366, + "loss": 0.3695, + "step": 119440 + }, + { + "epoch": 5.05, + "learning_rate": 0.00029564332506519113, + "loss": 0.4284, + "step": 119450 + }, + { + "epoch": 5.05, + "learning_rate": 0.0002956009243358986, + "loss": 0.472, + "step": 119460 + }, + { + "epoch": 5.05, + "learning_rate": 0.000295558523606606, + "loss": 0.4735, + "step": 119470 + }, + { + "epoch": 5.05, + "learning_rate": 0.00029551612287731353, + "loss": 0.481, + "step": 119480 + }, + { + "epoch": 5.05, + "learning_rate": 0.00029547372214802094, + "loss": 0.4329, + "step": 119490 + }, + { + "epoch": 5.05, + "learning_rate": 0.0002954313214187284, + "loss": 0.4135, + "step": 119500 + }, + { + "epoch": 5.05, + "learning_rate": 0.0002953889206894359, + "loss": 0.427, + "step": 119510 + }, + { + "epoch": 5.05, + "learning_rate": 0.00029534651996014334, + "loss": 0.4082, + "step": 119520 + }, + { + "epoch": 5.05, + "learning_rate": 0.00029530411923085076, + "loss": 0.4557, + "step": 119530 + }, + { + "epoch": 5.06, + "learning_rate": 0.0002952617185015583, + "loss": 0.454, + "step": 119540 + }, + { + "epoch": 5.06, + "learning_rate": 0.0002952193177722657, + "loss": 0.4995, + "step": 119550 + }, + { + "epoch": 5.06, + "learning_rate": 0.00029517691704297316, + "loss": 0.409, + "step": 119560 + }, + { + "epoch": 5.06, + "learning_rate": 0.0002951345163136806, + "loss": 0.4714, + "step": 119570 + }, + { + "epoch": 5.06, + "learning_rate": 0.0002950921155843881, + "loss": 0.4417, + "step": 119580 + }, + { + "epoch": 5.06, + "learning_rate": 0.0002950497148550955, + "loss": 0.4236, + "step": 119590 + }, + { + "epoch": 5.06, + "learning_rate": 0.00029500731412580297, + "loss": 0.4311, + "step": 119600 + }, + { + "epoch": 5.06, + "learning_rate": 0.00029496491339651044, + "loss": 0.3869, + "step": 119610 + }, + { + "epoch": 5.06, + "learning_rate": 0.0002949225126672179, + "loss": 0.3998, + "step": 119620 + }, + { + "epoch": 5.06, + "learning_rate": 0.0002948801119379253, + "loss": 0.4858, + "step": 119630 + }, + { + "epoch": 5.06, + "learning_rate": 0.00029483771120863284, + "loss": 0.5396, + "step": 119640 + }, + { + "epoch": 5.06, + "learning_rate": 0.00029479531047934025, + "loss": 0.4521, + "step": 119650 + }, + { + "epoch": 5.06, + "learning_rate": 0.0002947529097500477, + "loss": 0.3859, + "step": 119660 + }, + { + "epoch": 5.06, + "learning_rate": 0.0002947105090207552, + "loss": 0.406, + "step": 119670 + }, + { + "epoch": 5.06, + "learning_rate": 0.00029466810829146265, + "loss": 0.4799, + "step": 119680 + }, + { + "epoch": 5.06, + "learning_rate": 0.00029462570756217006, + "loss": 0.4349, + "step": 119690 + }, + { + "epoch": 5.06, + "learning_rate": 0.0002945833068328776, + "loss": 0.4563, + "step": 119700 + }, + { + "epoch": 5.06, + "learning_rate": 0.000294540906103585, + "loss": 0.3544, + "step": 119710 + }, + { + "epoch": 5.06, + "learning_rate": 0.00029449850537429246, + "loss": 0.4368, + "step": 119720 + }, + { + "epoch": 5.06, + "learning_rate": 0.0002944561046449999, + "loss": 0.4207, + "step": 119730 + }, + { + "epoch": 5.06, + "learning_rate": 0.0002944137039157074, + "loss": 0.4692, + "step": 119740 + }, + { + "epoch": 5.06, + "learning_rate": 0.0002943713031864148, + "loss": 0.4174, + "step": 119750 + }, + { + "epoch": 5.06, + "learning_rate": 0.0002943289024571223, + "loss": 0.4493, + "step": 119760 + }, + { + "epoch": 5.06, + "learning_rate": 0.00029428650172782974, + "loss": 0.4576, + "step": 119770 + }, + { + "epoch": 5.07, + "learning_rate": 0.0002942441009985372, + "loss": 0.3887, + "step": 119780 + }, + { + "epoch": 5.07, + "learning_rate": 0.0002942017002692446, + "loss": 0.4793, + "step": 119790 + }, + { + "epoch": 5.07, + "learning_rate": 0.00029415929953995214, + "loss": 0.4482, + "step": 119800 + }, + { + "epoch": 5.07, + "learning_rate": 0.00029411689881065955, + "loss": 0.4218, + "step": 119810 + }, + { + "epoch": 5.07, + "learning_rate": 0.000294074498081367, + "loss": 0.4867, + "step": 119820 + }, + { + "epoch": 5.07, + "learning_rate": 0.0002940320973520745, + "loss": 0.442, + "step": 119830 + }, + { + "epoch": 5.07, + "learning_rate": 0.00029398969662278195, + "loss": 0.5015, + "step": 119840 + }, + { + "epoch": 5.07, + "learning_rate": 0.00029394729589348937, + "loss": 0.476, + "step": 119850 + }, + { + "epoch": 5.07, + "learning_rate": 0.00029390489516419683, + "loss": 0.4008, + "step": 119860 + }, + { + "epoch": 5.07, + "learning_rate": 0.0002938624944349043, + "loss": 0.4272, + "step": 119870 + }, + { + "epoch": 5.07, + "learning_rate": 0.00029382009370561177, + "loss": 0.4004, + "step": 119880 + }, + { + "epoch": 5.07, + "learning_rate": 0.0002937776929763192, + "loss": 0.4223, + "step": 119890 + }, + { + "epoch": 5.07, + "learning_rate": 0.00029373529224702665, + "loss": 0.5161, + "step": 119900 + }, + { + "epoch": 5.07, + "learning_rate": 0.0002936928915177341, + "loss": 0.4125, + "step": 119910 + }, + { + "epoch": 5.07, + "learning_rate": 0.0002936504907884416, + "loss": 0.3728, + "step": 119920 + }, + { + "epoch": 5.07, + "learning_rate": 0.00029360809005914905, + "loss": 0.4935, + "step": 119930 + }, + { + "epoch": 5.07, + "learning_rate": 0.00029356568932985646, + "loss": 0.3668, + "step": 119940 + }, + { + "epoch": 5.07, + "learning_rate": 0.0002935232886005639, + "loss": 0.5353, + "step": 119950 + }, + { + "epoch": 5.07, + "learning_rate": 0.0002934808878712714, + "loss": 0.406, + "step": 119960 + }, + { + "epoch": 5.07, + "learning_rate": 0.00029343848714197886, + "loss": 0.3757, + "step": 119970 + }, + { + "epoch": 5.07, + "learning_rate": 0.00029339608641268627, + "loss": 0.49, + "step": 119980 + }, + { + "epoch": 5.07, + "learning_rate": 0.0002933536856833938, + "loss": 0.4489, + "step": 119990 + }, + { + "epoch": 5.07, + "learning_rate": 0.0002933112849541012, + "loss": 0.4493, + "step": 120000 + }, + { + "epoch": 5.07, + "eval_loss": 0.6166871190071106, + "eval_runtime": 337.5835, + "eval_samples_per_second": 15.567, + "eval_steps_per_second": 3.892, + "step": 120000 + }, + { + "epoch": 5.08, + "learning_rate": 0.0002932688842248087, + "loss": 0.4113, + "step": 120010 + }, + { + "epoch": 5.08, + "learning_rate": 0.00029322648349551614, + "loss": 0.378, + "step": 120020 + }, + { + "epoch": 5.08, + "learning_rate": 0.0002931840827662236, + "loss": 0.428, + "step": 120030 + }, + { + "epoch": 5.08, + "learning_rate": 0.000293141682036931, + "loss": 0.4916, + "step": 120040 + }, + { + "epoch": 5.08, + "learning_rate": 0.0002930992813076385, + "loss": 0.4533, + "step": 120050 + }, + { + "epoch": 5.08, + "learning_rate": 0.00029305688057834595, + "loss": 0.4051, + "step": 120060 + }, + { + "epoch": 5.08, + "learning_rate": 0.0002930144798490534, + "loss": 0.5, + "step": 120070 + }, + { + "epoch": 5.08, + "learning_rate": 0.00029297207911976083, + "loss": 0.4556, + "step": 120080 + }, + { + "epoch": 5.08, + "learning_rate": 0.00029292967839046835, + "loss": 0.5262, + "step": 120090 + }, + { + "epoch": 5.08, + "learning_rate": 0.00029288727766117577, + "loss": 0.4996, + "step": 120100 + }, + { + "epoch": 5.08, + "learning_rate": 0.00029284487693188323, + "loss": 0.3569, + "step": 120110 + }, + { + "epoch": 5.08, + "learning_rate": 0.0002928024762025907, + "loss": 0.4744, + "step": 120120 + }, + { + "epoch": 5.08, + "learning_rate": 0.00029276007547329817, + "loss": 0.4336, + "step": 120130 + }, + { + "epoch": 5.08, + "learning_rate": 0.0002927176747440056, + "loss": 0.4766, + "step": 120140 + }, + { + "epoch": 5.08, + "learning_rate": 0.0002926752740147131, + "loss": 0.5059, + "step": 120150 + }, + { + "epoch": 5.08, + "learning_rate": 0.0002926328732854205, + "loss": 0.5101, + "step": 120160 + }, + { + "epoch": 5.08, + "learning_rate": 0.000292590472556128, + "loss": 0.4061, + "step": 120170 + }, + { + "epoch": 5.08, + "learning_rate": 0.00029254807182683545, + "loss": 0.4054, + "step": 120180 + }, + { + "epoch": 5.08, + "learning_rate": 0.0002925056710975429, + "loss": 0.4351, + "step": 120190 + }, + { + "epoch": 5.08, + "learning_rate": 0.0002924632703682503, + "loss": 0.4343, + "step": 120200 + }, + { + "epoch": 5.08, + "learning_rate": 0.0002924208696389578, + "loss": 0.3402, + "step": 120210 + }, + { + "epoch": 5.08, + "learning_rate": 0.00029237846890966526, + "loss": 0.3841, + "step": 120220 + }, + { + "epoch": 5.08, + "learning_rate": 0.0002923360681803727, + "loss": 0.4945, + "step": 120230 + }, + { + "epoch": 5.08, + "learning_rate": 0.00029229366745108014, + "loss": 0.439, + "step": 120240 + }, + { + "epoch": 5.09, + "learning_rate": 0.00029225126672178766, + "loss": 0.4465, + "step": 120250 + }, + { + "epoch": 5.09, + "learning_rate": 0.00029220886599249507, + "loss": 0.4644, + "step": 120260 + }, + { + "epoch": 5.09, + "learning_rate": 0.00029216646526320254, + "loss": 0.4964, + "step": 120270 + }, + { + "epoch": 5.09, + "learning_rate": 0.00029212406453391, + "loss": 0.3841, + "step": 120280 + }, + { + "epoch": 5.09, + "learning_rate": 0.00029208166380461747, + "loss": 0.5011, + "step": 120290 + }, + { + "epoch": 5.09, + "learning_rate": 0.0002920392630753249, + "loss": 0.4438, + "step": 120300 + }, + { + "epoch": 5.09, + "learning_rate": 0.0002919968623460324, + "loss": 0.3761, + "step": 120310 + }, + { + "epoch": 5.09, + "learning_rate": 0.0002919544616167398, + "loss": 0.404, + "step": 120320 + }, + { + "epoch": 5.09, + "learning_rate": 0.0002919120608874473, + "loss": 0.4051, + "step": 120330 + }, + { + "epoch": 5.09, + "learning_rate": 0.00029186966015815475, + "loss": 0.3904, + "step": 120340 + }, + { + "epoch": 5.09, + "learning_rate": 0.0002918272594288622, + "loss": 0.453, + "step": 120350 + }, + { + "epoch": 5.09, + "learning_rate": 0.00029178485869956963, + "loss": 0.4404, + "step": 120360 + }, + { + "epoch": 5.09, + "learning_rate": 0.0002917424579702771, + "loss": 0.4397, + "step": 120370 + }, + { + "epoch": 5.09, + "learning_rate": 0.00029170005724098456, + "loss": 0.3858, + "step": 120380 + }, + { + "epoch": 5.09, + "learning_rate": 0.00029165765651169203, + "loss": 0.5937, + "step": 120390 + }, + { + "epoch": 5.09, + "learning_rate": 0.00029161525578239944, + "loss": 0.5642, + "step": 120400 + }, + { + "epoch": 5.09, + "learning_rate": 0.00029157285505310697, + "loss": 0.4428, + "step": 120410 + }, + { + "epoch": 5.09, + "learning_rate": 0.0002915304543238144, + "loss": 0.4162, + "step": 120420 + }, + { + "epoch": 5.09, + "learning_rate": 0.00029148805359452184, + "loss": 0.4062, + "step": 120430 + }, + { + "epoch": 5.09, + "learning_rate": 0.0002914456528652293, + "loss": 0.3962, + "step": 120440 + }, + { + "epoch": 5.09, + "learning_rate": 0.0002914032521359368, + "loss": 0.4585, + "step": 120450 + }, + { + "epoch": 5.09, + "learning_rate": 0.0002913608514066442, + "loss": 0.3695, + "step": 120460 + }, + { + "epoch": 5.09, + "learning_rate": 0.00029131845067735166, + "loss": 0.5408, + "step": 120470 + }, + { + "epoch": 5.09, + "learning_rate": 0.0002912760499480591, + "loss": 0.4901, + "step": 120480 + }, + { + "epoch": 5.1, + "learning_rate": 0.0002912336492187666, + "loss": 0.4666, + "step": 120490 + }, + { + "epoch": 5.1, + "learning_rate": 0.000291191248489474, + "loss": 0.4244, + "step": 120500 + }, + { + "epoch": 5.1, + "learning_rate": 0.0002911488477601815, + "loss": 0.4949, + "step": 120510 + }, + { + "epoch": 5.1, + "learning_rate": 0.00029110644703088894, + "loss": 0.3635, + "step": 120520 + }, + { + "epoch": 5.1, + "learning_rate": 0.0002910640463015964, + "loss": 0.4143, + "step": 120530 + }, + { + "epoch": 5.1, + "learning_rate": 0.00029102164557230387, + "loss": 0.4768, + "step": 120540 + }, + { + "epoch": 5.1, + "learning_rate": 0.00029097924484301134, + "loss": 0.4946, + "step": 120550 + }, + { + "epoch": 5.1, + "learning_rate": 0.00029093684411371875, + "loss": 0.5449, + "step": 120560 + }, + { + "epoch": 5.1, + "learning_rate": 0.0002908944433844262, + "loss": 0.4687, + "step": 120570 + }, + { + "epoch": 5.1, + "learning_rate": 0.0002908520426551337, + "loss": 0.4103, + "step": 120580 + }, + { + "epoch": 5.1, + "learning_rate": 0.00029080964192584115, + "loss": 0.4336, + "step": 120590 + }, + { + "epoch": 5.1, + "learning_rate": 0.0002907672411965486, + "loss": 0.4058, + "step": 120600 + }, + { + "epoch": 5.1, + "learning_rate": 0.00029072484046725603, + "loss": 0.4794, + "step": 120610 + }, + { + "epoch": 5.1, + "learning_rate": 0.0002906824397379635, + "loss": 0.439, + "step": 120620 + }, + { + "epoch": 5.1, + "learning_rate": 0.00029064003900867096, + "loss": 0.4371, + "step": 120630 + }, + { + "epoch": 5.1, + "learning_rate": 0.00029059763827937843, + "loss": 0.5636, + "step": 120640 + }, + { + "epoch": 5.1, + "learning_rate": 0.00029055523755008584, + "loss": 0.473, + "step": 120650 + }, + { + "epoch": 5.1, + "learning_rate": 0.0002905128368207933, + "loss": 0.5103, + "step": 120660 + }, + { + "epoch": 5.1, + "learning_rate": 0.0002904704360915008, + "loss": 0.4398, + "step": 120670 + }, + { + "epoch": 5.1, + "learning_rate": 0.00029042803536220824, + "loss": 0.4569, + "step": 120680 + }, + { + "epoch": 5.1, + "learning_rate": 0.00029038563463291566, + "loss": 0.4471, + "step": 120690 + }, + { + "epoch": 5.1, + "learning_rate": 0.0002903432339036232, + "loss": 0.4572, + "step": 120700 + }, + { + "epoch": 5.1, + "learning_rate": 0.0002903008331743306, + "loss": 0.44, + "step": 120710 + }, + { + "epoch": 5.11, + "learning_rate": 0.00029025843244503806, + "loss": 0.4542, + "step": 120720 + }, + { + "epoch": 5.11, + "learning_rate": 0.0002902160317157455, + "loss": 0.4567, + "step": 120730 + }, + { + "epoch": 5.11, + "learning_rate": 0.000290173630986453, + "loss": 0.5224, + "step": 120740 + }, + { + "epoch": 5.11, + "learning_rate": 0.0002901312302571604, + "loss": 0.5153, + "step": 120750 + }, + { + "epoch": 5.11, + "learning_rate": 0.0002900888295278679, + "loss": 0.4475, + "step": 120760 + }, + { + "epoch": 5.11, + "learning_rate": 0.00029004642879857534, + "loss": 0.4322, + "step": 120770 + }, + { + "epoch": 5.11, + "learning_rate": 0.0002900040280692828, + "loss": 0.3882, + "step": 120780 + }, + { + "epoch": 5.11, + "learning_rate": 0.00028996162733999027, + "loss": 0.521, + "step": 120790 + }, + { + "epoch": 5.11, + "learning_rate": 0.00028991922661069774, + "loss": 0.5127, + "step": 120800 + }, + { + "epoch": 5.11, + "learning_rate": 0.00028987682588140515, + "loss": 0.4666, + "step": 120810 + }, + { + "epoch": 5.11, + "learning_rate": 0.0002898344251521126, + "loss": 0.4165, + "step": 120820 + }, + { + "epoch": 5.11, + "learning_rate": 0.0002897920244228201, + "loss": 0.5306, + "step": 120830 + }, + { + "epoch": 5.11, + "learning_rate": 0.00028974962369352755, + "loss": 0.437, + "step": 120840 + }, + { + "epoch": 5.11, + "learning_rate": 0.00028970722296423496, + "loss": 0.3806, + "step": 120850 + }, + { + "epoch": 5.11, + "learning_rate": 0.0002896648222349425, + "loss": 0.3951, + "step": 120860 + }, + { + "epoch": 5.11, + "learning_rate": 0.0002896224215056499, + "loss": 0.4742, + "step": 120870 + }, + { + "epoch": 5.11, + "learning_rate": 0.00028958002077635736, + "loss": 0.4839, + "step": 120880 + }, + { + "epoch": 5.11, + "learning_rate": 0.00028953762004706483, + "loss": 0.4528, + "step": 120890 + }, + { + "epoch": 5.11, + "learning_rate": 0.0002894952193177723, + "loss": 0.5627, + "step": 120900 + }, + { + "epoch": 5.11, + "learning_rate": 0.0002894528185884797, + "loss": 0.3479, + "step": 120910 + }, + { + "epoch": 5.11, + "learning_rate": 0.00028941041785918723, + "loss": 0.5019, + "step": 120920 + }, + { + "epoch": 5.11, + "learning_rate": 0.00028936801712989464, + "loss": 0.4358, + "step": 120930 + }, + { + "epoch": 5.11, + "learning_rate": 0.0002893256164006021, + "loss": 0.3892, + "step": 120940 + }, + { + "epoch": 5.11, + "learning_rate": 0.0002892832156713096, + "loss": 0.4206, + "step": 120950 + }, + { + "epoch": 5.12, + "learning_rate": 0.00028924081494201704, + "loss": 0.4978, + "step": 120960 + }, + { + "epoch": 5.12, + "learning_rate": 0.00028919841421272445, + "loss": 0.3915, + "step": 120970 + }, + { + "epoch": 5.12, + "learning_rate": 0.0002891560134834319, + "loss": 0.4202, + "step": 120980 + }, + { + "epoch": 5.12, + "learning_rate": 0.0002891136127541394, + "loss": 0.4517, + "step": 120990 + }, + { + "epoch": 5.12, + "learning_rate": 0.00028907121202484686, + "loss": 0.4558, + "step": 121000 + }, + { + "epoch": 5.12, + "learning_rate": 0.00028902881129555427, + "loss": 0.4595, + "step": 121010 + }, + { + "epoch": 5.12, + "learning_rate": 0.0002889864105662618, + "loss": 0.4181, + "step": 121020 + }, + { + "epoch": 5.12, + "learning_rate": 0.0002889440098369692, + "loss": 0.5052, + "step": 121030 + }, + { + "epoch": 5.12, + "learning_rate": 0.00028890160910767667, + "loss": 0.5398, + "step": 121040 + }, + { + "epoch": 5.12, + "learning_rate": 0.00028885920837838413, + "loss": 0.566, + "step": 121050 + }, + { + "epoch": 5.12, + "learning_rate": 0.0002888168076490916, + "loss": 0.5004, + "step": 121060 + }, + { + "epoch": 5.12, + "learning_rate": 0.000288774406919799, + "loss": 0.4986, + "step": 121070 + }, + { + "epoch": 5.12, + "learning_rate": 0.00028873200619050654, + "loss": 0.4359, + "step": 121080 + }, + { + "epoch": 5.12, + "learning_rate": 0.00028868960546121395, + "loss": 0.5071, + "step": 121090 + }, + { + "epoch": 5.12, + "learning_rate": 0.0002886472047319214, + "loss": 0.4633, + "step": 121100 + }, + { + "epoch": 5.12, + "learning_rate": 0.00028860480400262883, + "loss": 0.5563, + "step": 121110 + }, + { + "epoch": 5.12, + "learning_rate": 0.00028856240327333635, + "loss": 0.4044, + "step": 121120 + }, + { + "epoch": 5.12, + "learning_rate": 0.00028852000254404376, + "loss": 0.4218, + "step": 121130 + }, + { + "epoch": 5.12, + "learning_rate": 0.00028847760181475123, + "loss": 0.4054, + "step": 121140 + }, + { + "epoch": 5.12, + "learning_rate": 0.0002884352010854587, + "loss": 0.3974, + "step": 121150 + }, + { + "epoch": 5.12, + "learning_rate": 0.00028839280035616616, + "loss": 0.4067, + "step": 121160 + }, + { + "epoch": 5.12, + "learning_rate": 0.0002883503996268736, + "loss": 0.4957, + "step": 121170 + }, + { + "epoch": 5.12, + "learning_rate": 0.00028830799889758104, + "loss": 0.4782, + "step": 121180 + }, + { + "epoch": 5.12, + "learning_rate": 0.0002882655981682885, + "loss": 0.4268, + "step": 121190 + }, + { + "epoch": 5.13, + "learning_rate": 0.000288223197438996, + "loss": 0.5038, + "step": 121200 + }, + { + "epoch": 5.13, + "learning_rate": 0.00028818079670970344, + "loss": 0.4437, + "step": 121210 + }, + { + "epoch": 5.13, + "learning_rate": 0.00028813839598041085, + "loss": 0.4872, + "step": 121220 + }, + { + "epoch": 5.13, + "learning_rate": 0.0002880959952511183, + "loss": 0.4921, + "step": 121230 + }, + { + "epoch": 5.13, + "learning_rate": 0.0002880535945218258, + "loss": 0.4166, + "step": 121240 + }, + { + "epoch": 5.13, + "learning_rate": 0.00028801119379253325, + "loss": 0.4543, + "step": 121250 + }, + { + "epoch": 5.13, + "learning_rate": 0.0002879687930632407, + "loss": 0.4418, + "step": 121260 + }, + { + "epoch": 5.13, + "learning_rate": 0.00028792639233394813, + "loss": 0.4807, + "step": 121270 + }, + { + "epoch": 5.13, + "learning_rate": 0.0002878839916046556, + "loss": 0.4386, + "step": 121280 + }, + { + "epoch": 5.13, + "learning_rate": 0.00028784159087536307, + "loss": 0.4184, + "step": 121290 + }, + { + "epoch": 5.13, + "learning_rate": 0.00028779919014607053, + "loss": 0.3907, + "step": 121300 + }, + { + "epoch": 5.13, + "learning_rate": 0.000287756789416778, + "loss": 0.4035, + "step": 121310 + }, + { + "epoch": 5.13, + "learning_rate": 0.0002877143886874854, + "loss": 0.4284, + "step": 121320 + }, + { + "epoch": 5.13, + "learning_rate": 0.0002876719879581929, + "loss": 0.4882, + "step": 121330 + }, + { + "epoch": 5.13, + "learning_rate": 0.00028762958722890035, + "loss": 0.4511, + "step": 121340 + }, + { + "epoch": 5.13, + "learning_rate": 0.0002875871864996078, + "loss": 0.4439, + "step": 121350 + }, + { + "epoch": 5.13, + "learning_rate": 0.0002875447857703152, + "loss": 0.4574, + "step": 121360 + }, + { + "epoch": 5.13, + "learning_rate": 0.00028750238504102275, + "loss": 0.4437, + "step": 121370 + }, + { + "epoch": 5.13, + "learning_rate": 0.00028745998431173016, + "loss": 0.448, + "step": 121380 + }, + { + "epoch": 5.13, + "learning_rate": 0.0002874175835824376, + "loss": 0.5163, + "step": 121390 + }, + { + "epoch": 5.13, + "learning_rate": 0.0002873751828531451, + "loss": 0.5089, + "step": 121400 + }, + { + "epoch": 5.13, + "learning_rate": 0.00028733278212385256, + "loss": 0.4501, + "step": 121410 + }, + { + "epoch": 5.13, + "learning_rate": 0.00028729038139455997, + "loss": 0.4568, + "step": 121420 + }, + { + "epoch": 5.14, + "learning_rate": 0.00028724798066526744, + "loss": 0.497, + "step": 121430 + }, + { + "epoch": 5.14, + "learning_rate": 0.0002872055799359749, + "loss": 0.4212, + "step": 121440 + }, + { + "epoch": 5.14, + "learning_rate": 0.00028716317920668237, + "loss": 0.4151, + "step": 121450 + }, + { + "epoch": 5.14, + "learning_rate": 0.0002871207784773898, + "loss": 0.4422, + "step": 121460 + }, + { + "epoch": 5.14, + "learning_rate": 0.0002870783777480973, + "loss": 0.5071, + "step": 121470 + }, + { + "epoch": 5.14, + "learning_rate": 0.0002870359770188047, + "loss": 0.4737, + "step": 121480 + }, + { + "epoch": 5.14, + "learning_rate": 0.0002869935762895122, + "loss": 0.4624, + "step": 121490 + }, + { + "epoch": 5.14, + "learning_rate": 0.00028695117556021965, + "loss": 0.4551, + "step": 121500 + }, + { + "epoch": 5.14, + "learning_rate": 0.0002869087748309271, + "loss": 0.4175, + "step": 121510 + }, + { + "epoch": 5.14, + "learning_rate": 0.00028686637410163453, + "loss": 0.5211, + "step": 121520 + }, + { + "epoch": 5.14, + "learning_rate": 0.00028682397337234205, + "loss": 0.4422, + "step": 121530 + }, + { + "epoch": 5.14, + "learning_rate": 0.00028678157264304947, + "loss": 0.5069, + "step": 121540 + }, + { + "epoch": 5.14, + "learning_rate": 0.00028673917191375693, + "loss": 0.421, + "step": 121550 + }, + { + "epoch": 5.14, + "learning_rate": 0.0002866967711844644, + "loss": 0.4379, + "step": 121560 + }, + { + "epoch": 5.14, + "learning_rate": 0.00028665437045517187, + "loss": 0.3807, + "step": 121570 + }, + { + "epoch": 5.14, + "learning_rate": 0.0002866119697258793, + "loss": 0.4492, + "step": 121580 + }, + { + "epoch": 5.14, + "learning_rate": 0.00028656956899658675, + "loss": 0.4565, + "step": 121590 + }, + { + "epoch": 5.14, + "learning_rate": 0.0002865271682672942, + "loss": 0.438, + "step": 121600 + }, + { + "epoch": 5.14, + "learning_rate": 0.0002864847675380017, + "loss": 0.4804, + "step": 121610 + }, + { + "epoch": 5.14, + "learning_rate": 0.0002864423668087091, + "loss": 0.4862, + "step": 121620 + }, + { + "epoch": 5.14, + "learning_rate": 0.0002863999660794166, + "loss": 0.392, + "step": 121630 + }, + { + "epoch": 5.14, + "learning_rate": 0.000286357565350124, + "loss": 0.4048, + "step": 121640 + }, + { + "epoch": 5.14, + "learning_rate": 0.0002863151646208315, + "loss": 0.4648, + "step": 121650 + }, + { + "epoch": 5.14, + "learning_rate": 0.00028627276389153896, + "loss": 0.4442, + "step": 121660 + }, + { + "epoch": 5.15, + "learning_rate": 0.0002862303631622464, + "loss": 0.4459, + "step": 121670 + }, + { + "epoch": 5.15, + "learning_rate": 0.00028618796243295384, + "loss": 0.4546, + "step": 121680 + }, + { + "epoch": 5.15, + "learning_rate": 0.00028614556170366136, + "loss": 0.4769, + "step": 121690 + }, + { + "epoch": 5.15, + "learning_rate": 0.00028610316097436877, + "loss": 0.4888, + "step": 121700 + }, + { + "epoch": 5.15, + "learning_rate": 0.00028606076024507624, + "loss": 0.4308, + "step": 121710 + }, + { + "epoch": 5.15, + "learning_rate": 0.0002860183595157837, + "loss": 0.4324, + "step": 121720 + }, + { + "epoch": 5.15, + "learning_rate": 0.00028597595878649117, + "loss": 0.4718, + "step": 121730 + }, + { + "epoch": 5.15, + "learning_rate": 0.0002859335580571986, + "loss": 0.4581, + "step": 121740 + }, + { + "epoch": 5.15, + "learning_rate": 0.00028589115732790605, + "loss": 0.4734, + "step": 121750 + }, + { + "epoch": 5.15, + "learning_rate": 0.0002858487565986135, + "loss": 0.4611, + "step": 121760 + }, + { + "epoch": 5.15, + "learning_rate": 0.000285806355869321, + "loss": 0.4264, + "step": 121770 + }, + { + "epoch": 5.15, + "learning_rate": 0.0002857639551400284, + "loss": 0.3944, + "step": 121780 + }, + { + "epoch": 5.15, + "learning_rate": 0.0002857215544107359, + "loss": 0.387, + "step": 121790 + }, + { + "epoch": 5.15, + "learning_rate": 0.00028567915368144333, + "loss": 0.4801, + "step": 121800 + }, + { + "epoch": 5.15, + "learning_rate": 0.0002856367529521508, + "loss": 0.4465, + "step": 121810 + }, + { + "epoch": 5.15, + "learning_rate": 0.00028559435222285826, + "loss": 0.4145, + "step": 121820 + }, + { + "epoch": 5.15, + "learning_rate": 0.00028555195149356573, + "loss": 0.4269, + "step": 121830 + }, + { + "epoch": 5.15, + "learning_rate": 0.00028550955076427314, + "loss": 0.4888, + "step": 121840 + }, + { + "epoch": 5.15, + "learning_rate": 0.0002854671500349806, + "loss": 0.4442, + "step": 121850 + }, + { + "epoch": 5.15, + "learning_rate": 0.0002854247493056881, + "loss": 0.4209, + "step": 121860 + }, + { + "epoch": 5.15, + "learning_rate": 0.00028538234857639554, + "loss": 0.3957, + "step": 121870 + }, + { + "epoch": 5.15, + "learning_rate": 0.00028533994784710296, + "loss": 0.4653, + "step": 121880 + }, + { + "epoch": 5.15, + "learning_rate": 0.0002852975471178104, + "loss": 0.4413, + "step": 121890 + }, + { + "epoch": 5.15, + "learning_rate": 0.0002852551463885179, + "loss": 0.4254, + "step": 121900 + }, + { + "epoch": 5.16, + "learning_rate": 0.00028521274565922536, + "loss": 0.4015, + "step": 121910 + }, + { + "epoch": 5.16, + "learning_rate": 0.0002851703449299328, + "loss": 0.459, + "step": 121920 + }, + { + "epoch": 5.16, + "learning_rate": 0.00028512794420064024, + "loss": 0.448, + "step": 121930 + }, + { + "epoch": 5.16, + "learning_rate": 0.0002850855434713477, + "loss": 0.4354, + "step": 121940 + }, + { + "epoch": 5.16, + "learning_rate": 0.00028504314274205517, + "loss": 0.4227, + "step": 121950 + }, + { + "epoch": 5.16, + "learning_rate": 0.00028500074201276264, + "loss": 0.4349, + "step": 121960 + }, + { + "epoch": 5.16, + "learning_rate": 0.00028495834128347005, + "loss": 0.3619, + "step": 121970 + }, + { + "epoch": 5.16, + "learning_rate": 0.00028491594055417757, + "loss": 0.3729, + "step": 121980 + }, + { + "epoch": 5.16, + "learning_rate": 0.000284873539824885, + "loss": 0.4761, + "step": 121990 + }, + { + "epoch": 5.16, + "learning_rate": 0.00028483113909559245, + "loss": 0.3983, + "step": 122000 + }, + { + "epoch": 5.16, + "learning_rate": 0.0002847887383662999, + "loss": 0.4287, + "step": 122010 + }, + { + "epoch": 5.16, + "learning_rate": 0.0002847463376370074, + "loss": 0.5014, + "step": 122020 + }, + { + "epoch": 5.16, + "learning_rate": 0.0002847039369077148, + "loss": 0.4474, + "step": 122030 + }, + { + "epoch": 5.16, + "learning_rate": 0.00028466153617842226, + "loss": 0.4679, + "step": 122040 + }, + { + "epoch": 5.16, + "learning_rate": 0.00028461913544912973, + "loss": 0.4194, + "step": 122050 + }, + { + "epoch": 5.16, + "learning_rate": 0.0002845767347198372, + "loss": 0.479, + "step": 122060 + }, + { + "epoch": 5.16, + "learning_rate": 0.0002845343339905446, + "loss": 0.4271, + "step": 122070 + }, + { + "epoch": 5.16, + "learning_rate": 0.00028449193326125213, + "loss": 0.4465, + "step": 122080 + }, + { + "epoch": 5.16, + "learning_rate": 0.00028444953253195954, + "loss": 0.4853, + "step": 122090 + }, + { + "epoch": 5.16, + "learning_rate": 0.000284407131802667, + "loss": 0.4676, + "step": 122100 + }, + { + "epoch": 5.16, + "learning_rate": 0.0002843647310733745, + "loss": 0.4061, + "step": 122110 + }, + { + "epoch": 5.16, + "learning_rate": 0.00028432233034408194, + "loss": 0.4212, + "step": 122120 + }, + { + "epoch": 5.16, + "learning_rate": 0.00028427992961478936, + "loss": 0.4431, + "step": 122130 + }, + { + "epoch": 5.17, + "learning_rate": 0.0002842375288854969, + "loss": 0.4238, + "step": 122140 + }, + { + "epoch": 5.17, + "learning_rate": 0.0002841951281562043, + "loss": 0.3818, + "step": 122150 + }, + { + "epoch": 5.17, + "learning_rate": 0.00028415272742691176, + "loss": 0.4852, + "step": 122160 + }, + { + "epoch": 5.17, + "learning_rate": 0.0002841103266976192, + "loss": 0.4497, + "step": 122170 + }, + { + "epoch": 5.17, + "learning_rate": 0.0002840679259683267, + "loss": 0.4292, + "step": 122180 + }, + { + "epoch": 5.17, + "learning_rate": 0.0002840255252390341, + "loss": 0.4241, + "step": 122190 + }, + { + "epoch": 5.17, + "learning_rate": 0.00028398312450974157, + "loss": 0.4711, + "step": 122200 + }, + { + "epoch": 5.17, + "learning_rate": 0.00028394072378044904, + "loss": 0.4403, + "step": 122210 + }, + { + "epoch": 5.17, + "learning_rate": 0.0002838983230511565, + "loss": 0.5022, + "step": 122220 + }, + { + "epoch": 5.17, + "learning_rate": 0.0002838559223218639, + "loss": 0.4162, + "step": 122230 + }, + { + "epoch": 5.17, + "learning_rate": 0.00028381352159257144, + "loss": 0.3993, + "step": 122240 + }, + { + "epoch": 5.17, + "learning_rate": 0.00028377112086327885, + "loss": 0.4317, + "step": 122250 + }, + { + "epoch": 5.17, + "learning_rate": 0.0002837287201339863, + "loss": 0.4381, + "step": 122260 + }, + { + "epoch": 5.17, + "learning_rate": 0.0002836863194046938, + "loss": 0.4401, + "step": 122270 + }, + { + "epoch": 5.17, + "learning_rate": 0.00028364391867540125, + "loss": 0.4768, + "step": 122280 + }, + { + "epoch": 5.17, + "learning_rate": 0.00028360151794610866, + "loss": 0.4961, + "step": 122290 + }, + { + "epoch": 5.17, + "learning_rate": 0.0002835591172168162, + "loss": 0.4615, + "step": 122300 + }, + { + "epoch": 5.17, + "learning_rate": 0.0002835167164875236, + "loss": 0.4244, + "step": 122310 + }, + { + "epoch": 5.17, + "learning_rate": 0.00028347431575823106, + "loss": 0.4404, + "step": 122320 + }, + { + "epoch": 5.17, + "learning_rate": 0.00028343191502893853, + "loss": 0.405, + "step": 122330 + }, + { + "epoch": 5.17, + "learning_rate": 0.000283389514299646, + "loss": 0.4865, + "step": 122340 + }, + { + "epoch": 5.17, + "learning_rate": 0.0002833471135703534, + "loss": 0.4853, + "step": 122350 + }, + { + "epoch": 5.17, + "learning_rate": 0.0002833047128410609, + "loss": 0.4307, + "step": 122360 + }, + { + "epoch": 5.17, + "learning_rate": 0.00028326231211176834, + "loss": 0.4931, + "step": 122370 + }, + { + "epoch": 5.18, + "learning_rate": 0.0002832199113824758, + "loss": 0.4183, + "step": 122380 + }, + { + "epoch": 5.18, + "learning_rate": 0.0002831775106531832, + "loss": 0.4616, + "step": 122390 + }, + { + "epoch": 5.18, + "learning_rate": 0.00028313510992389074, + "loss": 0.4826, + "step": 122400 + }, + { + "epoch": 5.18, + "learning_rate": 0.00028309270919459815, + "loss": 0.4128, + "step": 122410 + }, + { + "epoch": 5.18, + "learning_rate": 0.0002830503084653056, + "loss": 0.4757, + "step": 122420 + }, + { + "epoch": 5.18, + "learning_rate": 0.0002830079077360131, + "loss": 0.4858, + "step": 122430 + }, + { + "epoch": 5.18, + "learning_rate": 0.00028296550700672055, + "loss": 0.4586, + "step": 122440 + }, + { + "epoch": 5.18, + "learning_rate": 0.00028292310627742797, + "loss": 0.5077, + "step": 122450 + }, + { + "epoch": 5.18, + "learning_rate": 0.00028288070554813543, + "loss": 0.3923, + "step": 122460 + }, + { + "epoch": 5.18, + "learning_rate": 0.0002828383048188429, + "loss": 0.4457, + "step": 122470 + }, + { + "epoch": 5.18, + "learning_rate": 0.00028279590408955037, + "loss": 0.4135, + "step": 122480 + }, + { + "epoch": 5.18, + "learning_rate": 0.00028275350336025783, + "loss": 0.4265, + "step": 122490 + }, + { + "epoch": 5.18, + "learning_rate": 0.0002827111026309653, + "loss": 0.4152, + "step": 122500 + }, + { + "epoch": 5.18, + "learning_rate": 0.0002826687019016727, + "loss": 0.5188, + "step": 122510 + }, + { + "epoch": 5.18, + "learning_rate": 0.0002826263011723802, + "loss": 0.4373, + "step": 122520 + }, + { + "epoch": 5.18, + "learning_rate": 0.00028258390044308765, + "loss": 0.4712, + "step": 122530 + }, + { + "epoch": 5.18, + "learning_rate": 0.0002825414997137951, + "loss": 0.5437, + "step": 122540 + }, + { + "epoch": 5.18, + "learning_rate": 0.0002824990989845025, + "loss": 0.4356, + "step": 122550 + }, + { + "epoch": 5.18, + "learning_rate": 0.00028245669825521, + "loss": 0.357, + "step": 122560 + }, + { + "epoch": 5.18, + "learning_rate": 0.00028241429752591746, + "loss": 0.3949, + "step": 122570 + }, + { + "epoch": 5.18, + "learning_rate": 0.00028237189679662493, + "loss": 0.4009, + "step": 122580 + }, + { + "epoch": 5.18, + "learning_rate": 0.0002823294960673324, + "loss": 0.4419, + "step": 122590 + }, + { + "epoch": 5.18, + "learning_rate": 0.0002822870953380398, + "loss": 0.4019, + "step": 122600 + }, + { + "epoch": 5.19, + "learning_rate": 0.0002822446946087473, + "loss": 0.4354, + "step": 122610 + }, + { + "epoch": 5.19, + "learning_rate": 0.00028220229387945474, + "loss": 0.4039, + "step": 122620 + }, + { + "epoch": 5.19, + "learning_rate": 0.0002821598931501622, + "loss": 0.3838, + "step": 122630 + }, + { + "epoch": 5.19, + "learning_rate": 0.0002821174924208696, + "loss": 0.4611, + "step": 122640 + }, + { + "epoch": 5.19, + "learning_rate": 0.0002820750916915771, + "loss": 0.4749, + "step": 122650 + }, + { + "epoch": 5.19, + "learning_rate": 0.00028203269096228455, + "loss": 0.4462, + "step": 122660 + }, + { + "epoch": 5.19, + "learning_rate": 0.000281990290232992, + "loss": 0.4242, + "step": 122670 + }, + { + "epoch": 5.19, + "learning_rate": 0.00028194788950369943, + "loss": 0.3983, + "step": 122680 + }, + { + "epoch": 5.19, + "learning_rate": 0.00028190548877440695, + "loss": 0.4835, + "step": 122690 + }, + { + "epoch": 5.19, + "learning_rate": 0.00028186308804511437, + "loss": 0.4844, + "step": 122700 + }, + { + "epoch": 5.19, + "learning_rate": 0.00028182068731582183, + "loss": 0.4815, + "step": 122710 + }, + { + "epoch": 5.19, + "learning_rate": 0.0002817782865865293, + "loss": 0.4569, + "step": 122720 + }, + { + "epoch": 5.19, + "learning_rate": 0.00028173588585723677, + "loss": 0.5103, + "step": 122730 + }, + { + "epoch": 5.19, + "learning_rate": 0.0002816934851279442, + "loss": 0.3717, + "step": 122740 + }, + { + "epoch": 5.19, + "learning_rate": 0.0002816510843986517, + "loss": 0.3608, + "step": 122750 + }, + { + "epoch": 5.19, + "learning_rate": 0.0002816086836693591, + "loss": 0.4852, + "step": 122760 + }, + { + "epoch": 5.19, + "learning_rate": 0.0002815662829400666, + "loss": 0.4328, + "step": 122770 + }, + { + "epoch": 5.19, + "learning_rate": 0.00028152388221077405, + "loss": 0.4375, + "step": 122780 + }, + { + "epoch": 5.19, + "learning_rate": 0.0002814814814814815, + "loss": 0.4582, + "step": 122790 + }, + { + "epoch": 5.19, + "learning_rate": 0.0002814390807521889, + "loss": 0.4179, + "step": 122800 + }, + { + "epoch": 5.19, + "learning_rate": 0.0002813966800228964, + "loss": 0.4329, + "step": 122810 + }, + { + "epoch": 5.19, + "learning_rate": 0.00028135427929360386, + "loss": 0.5125, + "step": 122820 + }, + { + "epoch": 5.19, + "learning_rate": 0.0002813118785643113, + "loss": 0.528, + "step": 122830 + }, + { + "epoch": 5.19, + "learning_rate": 0.00028126947783501874, + "loss": 0.4979, + "step": 122840 + }, + { + "epoch": 5.2, + "learning_rate": 0.00028122707710572626, + "loss": 0.5133, + "step": 122850 + }, + { + "epoch": 5.2, + "learning_rate": 0.00028118467637643367, + "loss": 0.4794, + "step": 122860 + }, + { + "epoch": 5.2, + "learning_rate": 0.00028114227564714114, + "loss": 0.5203, + "step": 122870 + }, + { + "epoch": 5.2, + "learning_rate": 0.0002810998749178486, + "loss": 0.4147, + "step": 122880 + }, + { + "epoch": 5.2, + "learning_rate": 0.00028105747418855607, + "loss": 0.448, + "step": 122890 + }, + { + "epoch": 5.2, + "learning_rate": 0.0002810150734592635, + "loss": 0.4728, + "step": 122900 + }, + { + "epoch": 5.2, + "learning_rate": 0.000280972672729971, + "loss": 0.4005, + "step": 122910 + }, + { + "epoch": 5.2, + "learning_rate": 0.0002809302720006784, + "loss": 0.3939, + "step": 122920 + }, + { + "epoch": 5.2, + "learning_rate": 0.0002808878712713859, + "loss": 0.4079, + "step": 122930 + }, + { + "epoch": 5.2, + "learning_rate": 0.00028084547054209335, + "loss": 0.398, + "step": 122940 + }, + { + "epoch": 5.2, + "learning_rate": 0.0002808030698128008, + "loss": 0.4103, + "step": 122950 + }, + { + "epoch": 5.2, + "learning_rate": 0.00028076066908350823, + "loss": 0.4133, + "step": 122960 + }, + { + "epoch": 5.2, + "learning_rate": 0.0002807182683542157, + "loss": 0.4136, + "step": 122970 + }, + { + "epoch": 5.2, + "learning_rate": 0.00028067586762492317, + "loss": 0.4932, + "step": 122980 + }, + { + "epoch": 5.2, + "learning_rate": 0.00028063346689563063, + "loss": 0.4305, + "step": 122990 + }, + { + "epoch": 5.2, + "learning_rate": 0.00028059106616633804, + "loss": 0.4023, + "step": 123000 + }, + { + "epoch": 5.2, + "learning_rate": 0.00028054866543704557, + "loss": 0.4691, + "step": 123010 + }, + { + "epoch": 5.2, + "learning_rate": 0.000280506264707753, + "loss": 0.5049, + "step": 123020 + }, + { + "epoch": 5.2, + "learning_rate": 0.00028046386397846044, + "loss": 0.3833, + "step": 123030 + }, + { + "epoch": 5.2, + "learning_rate": 0.0002804214632491679, + "loss": 0.3954, + "step": 123040 + }, + { + "epoch": 5.2, + "learning_rate": 0.0002803790625198754, + "loss": 0.4383, + "step": 123050 + }, + { + "epoch": 5.2, + "learning_rate": 0.0002803366617905828, + "loss": 0.4803, + "step": 123060 + }, + { + "epoch": 5.2, + "learning_rate": 0.0002802942610612903, + "loss": 0.4313, + "step": 123070 + }, + { + "epoch": 5.2, + "learning_rate": 0.0002802518603319977, + "loss": 0.3932, + "step": 123080 + }, + { + "epoch": 5.21, + "learning_rate": 0.0002802094596027052, + "loss": 0.4287, + "step": 123090 + }, + { + "epoch": 5.21, + "learning_rate": 0.00028016705887341266, + "loss": 0.4515, + "step": 123100 + }, + { + "epoch": 5.21, + "learning_rate": 0.0002801246581441201, + "loss": 0.4065, + "step": 123110 + }, + { + "epoch": 5.21, + "learning_rate": 0.00028008225741482754, + "loss": 0.4627, + "step": 123120 + }, + { + "epoch": 5.21, + "learning_rate": 0.000280039856685535, + "loss": 0.3884, + "step": 123130 + }, + { + "epoch": 5.21, + "learning_rate": 0.00027999745595624247, + "loss": 0.4532, + "step": 123140 + }, + { + "epoch": 5.21, + "learning_rate": 0.00027995505522694994, + "loss": 0.4971, + "step": 123150 + }, + { + "epoch": 5.21, + "learning_rate": 0.00027991265449765735, + "loss": 0.3962, + "step": 123160 + }, + { + "epoch": 5.21, + "learning_rate": 0.0002798702537683648, + "loss": 0.5079, + "step": 123170 + }, + { + "epoch": 5.21, + "learning_rate": 0.0002798278530390723, + "loss": 0.4666, + "step": 123180 + }, + { + "epoch": 5.21, + "learning_rate": 0.00027978545230977975, + "loss": 0.429, + "step": 123190 + }, + { + "epoch": 5.21, + "learning_rate": 0.0002797430515804872, + "loss": 0.4091, + "step": 123200 + }, + { + "epoch": 5.21, + "learning_rate": 0.00027970065085119463, + "loss": 0.4609, + "step": 123210 + }, + { + "epoch": 5.21, + "learning_rate": 0.0002796582501219021, + "loss": 0.4191, + "step": 123220 + }, + { + "epoch": 5.21, + "learning_rate": 0.00027961584939260956, + "loss": 0.4297, + "step": 123230 + }, + { + "epoch": 5.21, + "learning_rate": 0.00027957344866331703, + "loss": 0.4071, + "step": 123240 + }, + { + "epoch": 5.21, + "learning_rate": 0.00027953104793402444, + "loss": 0.5149, + "step": 123250 + }, + { + "epoch": 5.21, + "learning_rate": 0.0002794886472047319, + "loss": 0.4985, + "step": 123260 + }, + { + "epoch": 5.21, + "learning_rate": 0.0002794462464754394, + "loss": 0.4279, + "step": 123270 + }, + { + "epoch": 5.21, + "learning_rate": 0.00027940384574614684, + "loss": 0.4312, + "step": 123280 + }, + { + "epoch": 5.21, + "learning_rate": 0.0002793614450168543, + "loss": 0.5846, + "step": 123290 + }, + { + "epoch": 5.21, + "learning_rate": 0.0002793190442875618, + "loss": 0.4303, + "step": 123300 + }, + { + "epoch": 5.21, + "learning_rate": 0.0002792766435582692, + "loss": 0.3558, + "step": 123310 + }, + { + "epoch": 5.22, + "learning_rate": 0.00027923424282897666, + "loss": 0.4156, + "step": 123320 + }, + { + "epoch": 5.22, + "learning_rate": 0.0002791918420996841, + "loss": 0.5243, + "step": 123330 + }, + { + "epoch": 5.22, + "learning_rate": 0.0002791494413703916, + "loss": 0.3582, + "step": 123340 + }, + { + "epoch": 5.22, + "learning_rate": 0.000279107040641099, + "loss": 0.4567, + "step": 123350 + }, + { + "epoch": 5.22, + "learning_rate": 0.0002790646399118065, + "loss": 0.5024, + "step": 123360 + }, + { + "epoch": 5.22, + "learning_rate": 0.00027902223918251394, + "loss": 0.5132, + "step": 123370 + }, + { + "epoch": 5.22, + "learning_rate": 0.0002789798384532214, + "loss": 0.4317, + "step": 123380 + }, + { + "epoch": 5.22, + "learning_rate": 0.00027893743772392887, + "loss": 0.4379, + "step": 123390 + }, + { + "epoch": 5.22, + "learning_rate": 0.00027889503699463634, + "loss": 0.4067, + "step": 123400 + }, + { + "epoch": 5.22, + "learning_rate": 0.00027885263626534375, + "loss": 0.4338, + "step": 123410 + }, + { + "epoch": 5.22, + "learning_rate": 0.0002788102355360512, + "loss": 0.3822, + "step": 123420 + }, + { + "epoch": 5.22, + "learning_rate": 0.0002787678348067587, + "loss": 0.4605, + "step": 123430 + }, + { + "epoch": 5.22, + "learning_rate": 0.00027872543407746615, + "loss": 0.415, + "step": 123440 + }, + { + "epoch": 5.22, + "learning_rate": 0.00027868303334817356, + "loss": 0.4131, + "step": 123450 + }, + { + "epoch": 5.22, + "learning_rate": 0.0002786406326188811, + "loss": 0.4123, + "step": 123460 + }, + { + "epoch": 5.22, + "learning_rate": 0.0002785982318895885, + "loss": 0.4769, + "step": 123470 + }, + { + "epoch": 5.22, + "learning_rate": 0.00027855583116029596, + "loss": 0.4737, + "step": 123480 + }, + { + "epoch": 5.22, + "learning_rate": 0.00027851343043100343, + "loss": 0.3637, + "step": 123490 + }, + { + "epoch": 5.22, + "learning_rate": 0.0002784710297017109, + "loss": 0.4735, + "step": 123500 + }, + { + "epoch": 5.22, + "learning_rate": 0.0002784286289724183, + "loss": 0.456, + "step": 123510 + }, + { + "epoch": 5.22, + "learning_rate": 0.00027838622824312583, + "loss": 0.4262, + "step": 123520 + }, + { + "epoch": 5.22, + "learning_rate": 0.00027834382751383324, + "loss": 0.513, + "step": 123530 + }, + { + "epoch": 5.22, + "learning_rate": 0.0002783014267845407, + "loss": 0.4886, + "step": 123540 + }, + { + "epoch": 5.22, + "learning_rate": 0.0002782590260552482, + "loss": 0.5833, + "step": 123550 + }, + { + "epoch": 5.23, + "learning_rate": 0.00027821662532595564, + "loss": 0.4753, + "step": 123560 + }, + { + "epoch": 5.23, + "learning_rate": 0.00027817422459666306, + "loss": 0.4114, + "step": 123570 + }, + { + "epoch": 5.23, + "learning_rate": 0.0002781318238673705, + "loss": 0.4289, + "step": 123580 + }, + { + "epoch": 5.23, + "learning_rate": 0.000278089423138078, + "loss": 0.4628, + "step": 123590 + }, + { + "epoch": 5.23, + "learning_rate": 0.00027804702240878546, + "loss": 0.4134, + "step": 123600 + }, + { + "epoch": 5.23, + "learning_rate": 0.00027800462167949287, + "loss": 0.4371, + "step": 123610 + }, + { + "epoch": 5.23, + "learning_rate": 0.0002779622209502004, + "loss": 0.4074, + "step": 123620 + }, + { + "epoch": 5.23, + "learning_rate": 0.0002779198202209078, + "loss": 0.44, + "step": 123630 + }, + { + "epoch": 5.23, + "learning_rate": 0.00027787741949161527, + "loss": 0.4103, + "step": 123640 + }, + { + "epoch": 5.23, + "learning_rate": 0.00027783501876232274, + "loss": 0.4108, + "step": 123650 + }, + { + "epoch": 5.23, + "learning_rate": 0.0002777926180330302, + "loss": 0.4713, + "step": 123660 + }, + { + "epoch": 5.23, + "learning_rate": 0.0002777502173037376, + "loss": 0.3928, + "step": 123670 + }, + { + "epoch": 5.23, + "learning_rate": 0.00027770781657444514, + "loss": 0.4973, + "step": 123680 + }, + { + "epoch": 5.23, + "learning_rate": 0.00027766541584515255, + "loss": 0.454, + "step": 123690 + }, + { + "epoch": 5.23, + "learning_rate": 0.00027762301511586, + "loss": 0.488, + "step": 123700 + }, + { + "epoch": 5.23, + "learning_rate": 0.0002775806143865675, + "loss": 0.4375, + "step": 123710 + }, + { + "epoch": 5.23, + "learning_rate": 0.00027753821365727495, + "loss": 0.4224, + "step": 123720 + }, + { + "epoch": 5.23, + "learning_rate": 0.00027749581292798236, + "loss": 0.3993, + "step": 123730 + }, + { + "epoch": 5.23, + "learning_rate": 0.00027745341219868983, + "loss": 0.4775, + "step": 123740 + }, + { + "epoch": 5.23, + "learning_rate": 0.0002774110114693973, + "loss": 0.4208, + "step": 123750 + }, + { + "epoch": 5.23, + "learning_rate": 0.00027736861074010476, + "loss": 0.4917, + "step": 123760 + }, + { + "epoch": 5.23, + "learning_rate": 0.0002773262100108122, + "loss": 0.4971, + "step": 123770 + }, + { + "epoch": 5.23, + "learning_rate": 0.0002772838092815197, + "loss": 0.509, + "step": 123780 + }, + { + "epoch": 5.23, + "learning_rate": 0.0002772414085522271, + "loss": 0.4591, + "step": 123790 + }, + { + "epoch": 5.24, + "learning_rate": 0.0002771990078229346, + "loss": 0.449, + "step": 123800 + }, + { + "epoch": 5.24, + "learning_rate": 0.00027715660709364204, + "loss": 0.4147, + "step": 123810 + }, + { + "epoch": 5.24, + "learning_rate": 0.0002771142063643495, + "loss": 0.4262, + "step": 123820 + }, + { + "epoch": 5.24, + "learning_rate": 0.0002770718056350569, + "loss": 0.4361, + "step": 123830 + }, + { + "epoch": 5.24, + "learning_rate": 0.0002770294049057644, + "loss": 0.4402, + "step": 123840 + }, + { + "epoch": 5.24, + "learning_rate": 0.00027698700417647185, + "loss": 0.3919, + "step": 123850 + }, + { + "epoch": 5.24, + "learning_rate": 0.0002769446034471793, + "loss": 0.4073, + "step": 123860 + }, + { + "epoch": 5.24, + "learning_rate": 0.0002769022027178868, + "loss": 0.4591, + "step": 123870 + }, + { + "epoch": 5.24, + "learning_rate": 0.0002768598019885942, + "loss": 0.4773, + "step": 123880 + }, + { + "epoch": 5.24, + "learning_rate": 0.00027681740125930167, + "loss": 0.5069, + "step": 123890 + }, + { + "epoch": 5.24, + "learning_rate": 0.00027677500053000913, + "loss": 0.4302, + "step": 123900 + }, + { + "epoch": 5.24, + "learning_rate": 0.0002767325998007166, + "loss": 0.5579, + "step": 123910 + }, + { + "epoch": 5.24, + "learning_rate": 0.000276690199071424, + "loss": 0.4282, + "step": 123920 + }, + { + "epoch": 5.24, + "learning_rate": 0.0002766477983421315, + "loss": 0.4541, + "step": 123930 + }, + { + "epoch": 5.24, + "learning_rate": 0.00027660539761283895, + "loss": 0.472, + "step": 123940 + }, + { + "epoch": 5.24, + "learning_rate": 0.0002765629968835464, + "loss": 0.4392, + "step": 123950 + }, + { + "epoch": 5.24, + "learning_rate": 0.0002765205961542538, + "loss": 0.426, + "step": 123960 + }, + { + "epoch": 5.24, + "learning_rate": 0.00027647819542496135, + "loss": 0.4761, + "step": 123970 + }, + { + "epoch": 5.24, + "learning_rate": 0.00027643579469566876, + "loss": 0.4681, + "step": 123980 + }, + { + "epoch": 5.24, + "learning_rate": 0.0002763933939663762, + "loss": 0.4696, + "step": 123990 + }, + { + "epoch": 5.24, + "learning_rate": 0.0002763509932370837, + "loss": 0.4728, + "step": 124000 + }, + { + "epoch": 5.24, + "learning_rate": 0.00027630859250779116, + "loss": 0.4599, + "step": 124010 + }, + { + "epoch": 5.24, + "learning_rate": 0.00027626619177849857, + "loss": 0.4041, + "step": 124020 + }, + { + "epoch": 5.25, + "learning_rate": 0.00027622379104920604, + "loss": 0.3679, + "step": 124030 + }, + { + "epoch": 5.25, + "learning_rate": 0.0002761813903199135, + "loss": 0.4134, + "step": 124040 + }, + { + "epoch": 5.25, + "learning_rate": 0.000276138989590621, + "loss": 0.5854, + "step": 124050 + }, + { + "epoch": 5.25, + "learning_rate": 0.0002760965888613284, + "loss": 0.5159, + "step": 124060 + }, + { + "epoch": 5.25, + "learning_rate": 0.0002760541881320359, + "loss": 0.464, + "step": 124070 + }, + { + "epoch": 5.25, + "learning_rate": 0.0002760117874027433, + "loss": 0.4533, + "step": 124080 + }, + { + "epoch": 5.25, + "learning_rate": 0.0002759693866734508, + "loss": 0.4036, + "step": 124090 + }, + { + "epoch": 5.25, + "learning_rate": 0.00027592698594415825, + "loss": 0.4771, + "step": 124100 + }, + { + "epoch": 5.25, + "learning_rate": 0.0002758845852148657, + "loss": 0.4632, + "step": 124110 + }, + { + "epoch": 5.25, + "learning_rate": 0.00027584218448557313, + "loss": 0.3907, + "step": 124120 + }, + { + "epoch": 5.25, + "learning_rate": 0.00027579978375628065, + "loss": 0.326, + "step": 124130 + }, + { + "epoch": 5.25, + "learning_rate": 0.00027575738302698807, + "loss": 0.4869, + "step": 124140 + }, + { + "epoch": 5.25, + "learning_rate": 0.00027571498229769553, + "loss": 0.4004, + "step": 124150 + }, + { + "epoch": 5.25, + "learning_rate": 0.000275672581568403, + "loss": 0.4434, + "step": 124160 + }, + { + "epoch": 5.25, + "learning_rate": 0.00027563018083911047, + "loss": 0.4289, + "step": 124170 + }, + { + "epoch": 5.25, + "learning_rate": 0.0002755877801098179, + "loss": 0.4901, + "step": 124180 + }, + { + "epoch": 5.25, + "learning_rate": 0.00027554537938052535, + "loss": 0.4187, + "step": 124190 + }, + { + "epoch": 5.25, + "learning_rate": 0.0002755029786512328, + "loss": 0.439, + "step": 124200 + }, + { + "epoch": 5.25, + "learning_rate": 0.0002754605779219403, + "loss": 0.4481, + "step": 124210 + }, + { + "epoch": 5.25, + "learning_rate": 0.0002754181771926477, + "loss": 0.4766, + "step": 124220 + }, + { + "epoch": 5.25, + "learning_rate": 0.0002753757764633552, + "loss": 0.4589, + "step": 124230 + }, + { + "epoch": 5.25, + "learning_rate": 0.0002753333757340626, + "loss": 0.4011, + "step": 124240 + }, + { + "epoch": 5.25, + "learning_rate": 0.0002752909750047701, + "loss": 0.4234, + "step": 124250 + }, + { + "epoch": 5.25, + "learning_rate": 0.00027524857427547756, + "loss": 0.473, + "step": 124260 + }, + { + "epoch": 5.26, + "learning_rate": 0.000275206173546185, + "loss": 0.4088, + "step": 124270 + }, + { + "epoch": 5.26, + "learning_rate": 0.00027516377281689244, + "loss": 0.4403, + "step": 124280 + }, + { + "epoch": 5.26, + "learning_rate": 0.00027512137208759996, + "loss": 0.4461, + "step": 124290 + }, + { + "epoch": 5.26, + "learning_rate": 0.00027507897135830737, + "loss": 0.4381, + "step": 124300 + }, + { + "epoch": 5.26, + "learning_rate": 0.00027503657062901484, + "loss": 0.4959, + "step": 124310 + }, + { + "epoch": 5.26, + "learning_rate": 0.0002749941698997223, + "loss": 0.4104, + "step": 124320 + }, + { + "epoch": 5.26, + "learning_rate": 0.00027495176917042977, + "loss": 0.4855, + "step": 124330 + }, + { + "epoch": 5.26, + "learning_rate": 0.0002749093684411372, + "loss": 0.3838, + "step": 124340 + }, + { + "epoch": 5.26, + "learning_rate": 0.00027486696771184465, + "loss": 0.4665, + "step": 124350 + }, + { + "epoch": 5.26, + "learning_rate": 0.0002748245669825521, + "loss": 0.4892, + "step": 124360 + }, + { + "epoch": 5.26, + "learning_rate": 0.0002747821662532596, + "loss": 0.4536, + "step": 124370 + }, + { + "epoch": 5.26, + "learning_rate": 0.000274739765523967, + "loss": 0.4188, + "step": 124380 + }, + { + "epoch": 5.26, + "learning_rate": 0.0002746973647946745, + "loss": 0.4857, + "step": 124390 + }, + { + "epoch": 5.26, + "learning_rate": 0.00027465496406538193, + "loss": 0.4115, + "step": 124400 + }, + { + "epoch": 5.26, + "learning_rate": 0.0002746125633360894, + "loss": 0.4585, + "step": 124410 + }, + { + "epoch": 5.26, + "learning_rate": 0.00027457016260679686, + "loss": 0.4539, + "step": 124420 + }, + { + "epoch": 5.26, + "learning_rate": 0.00027452776187750433, + "loss": 0.4067, + "step": 124430 + }, + { + "epoch": 5.26, + "learning_rate": 0.00027448536114821174, + "loss": 0.4479, + "step": 124440 + }, + { + "epoch": 5.26, + "learning_rate": 0.0002744429604189192, + "loss": 0.5221, + "step": 124450 + }, + { + "epoch": 5.26, + "learning_rate": 0.0002744005596896267, + "loss": 0.4624, + "step": 124460 + }, + { + "epoch": 5.26, + "learning_rate": 0.00027435815896033414, + "loss": 0.4487, + "step": 124470 + }, + { + "epoch": 5.26, + "learning_rate": 0.0002743157582310416, + "loss": 0.5287, + "step": 124480 + }, + { + "epoch": 5.26, + "learning_rate": 0.000274273357501749, + "loss": 0.4204, + "step": 124490 + }, + { + "epoch": 5.26, + "learning_rate": 0.0002742309567724565, + "loss": 0.3844, + "step": 124500 + }, + { + "epoch": 5.27, + "learning_rate": 0.00027418855604316396, + "loss": 0.484, + "step": 124510 + }, + { + "epoch": 5.27, + "learning_rate": 0.0002741461553138714, + "loss": 0.4057, + "step": 124520 + }, + { + "epoch": 5.27, + "learning_rate": 0.0002741037545845789, + "loss": 0.5035, + "step": 124530 + }, + { + "epoch": 5.27, + "learning_rate": 0.0002740613538552863, + "loss": 0.4473, + "step": 124540 + }, + { + "epoch": 5.27, + "learning_rate": 0.00027401895312599377, + "loss": 0.4374, + "step": 124550 + }, + { + "epoch": 5.27, + "learning_rate": 0.00027397655239670124, + "loss": 0.4469, + "step": 124560 + }, + { + "epoch": 5.27, + "learning_rate": 0.0002739341516674087, + "loss": 0.4383, + "step": 124570 + }, + { + "epoch": 5.27, + "learning_rate": 0.00027389175093811617, + "loss": 0.4482, + "step": 124580 + }, + { + "epoch": 5.27, + "learning_rate": 0.0002738493502088236, + "loss": 0.4425, + "step": 124590 + }, + { + "epoch": 5.27, + "learning_rate": 0.00027380694947953105, + "loss": 0.4729, + "step": 124600 + }, + { + "epoch": 5.27, + "learning_rate": 0.0002737645487502385, + "loss": 0.4852, + "step": 124610 + }, + { + "epoch": 5.27, + "learning_rate": 0.000273722148020946, + "loss": 0.461, + "step": 124620 + }, + { + "epoch": 5.27, + "learning_rate": 0.0002736797472916534, + "loss": 0.4916, + "step": 124630 + }, + { + "epoch": 5.27, + "learning_rate": 0.0002736373465623609, + "loss": 0.4519, + "step": 124640 + }, + { + "epoch": 5.27, + "learning_rate": 0.00027359494583306833, + "loss": 0.5399, + "step": 124650 + }, + { + "epoch": 5.27, + "learning_rate": 0.0002735525451037758, + "loss": 0.4566, + "step": 124660 + }, + { + "epoch": 5.27, + "learning_rate": 0.0002735101443744832, + "loss": 0.4663, + "step": 124670 + }, + { + "epoch": 5.27, + "learning_rate": 0.00027346774364519073, + "loss": 0.513, + "step": 124680 + }, + { + "epoch": 5.27, + "learning_rate": 0.00027342534291589814, + "loss": 0.4532, + "step": 124690 + }, + { + "epoch": 5.27, + "learning_rate": 0.0002733829421866056, + "loss": 0.4733, + "step": 124700 + }, + { + "epoch": 5.27, + "learning_rate": 0.0002733405414573131, + "loss": 0.4797, + "step": 124710 + }, + { + "epoch": 5.27, + "learning_rate": 0.00027329814072802054, + "loss": 0.4835, + "step": 124720 + }, + { + "epoch": 5.27, + "learning_rate": 0.00027325573999872796, + "loss": 0.3423, + "step": 124730 + }, + { + "epoch": 5.28, + "learning_rate": 0.0002732133392694355, + "loss": 0.5036, + "step": 124740 + }, + { + "epoch": 5.28, + "learning_rate": 0.0002731709385401429, + "loss": 0.5015, + "step": 124750 + }, + { + "epoch": 5.28, + "learning_rate": 0.00027312853781085036, + "loss": 0.429, + "step": 124760 + }, + { + "epoch": 5.28, + "learning_rate": 0.0002730861370815578, + "loss": 0.502, + "step": 124770 + }, + { + "epoch": 5.28, + "learning_rate": 0.0002730437363522653, + "loss": 0.4075, + "step": 124780 + }, + { + "epoch": 5.28, + "learning_rate": 0.0002730013356229727, + "loss": 0.4783, + "step": 124790 + }, + { + "epoch": 5.28, + "learning_rate": 0.00027295893489368017, + "loss": 0.478, + "step": 124800 + }, + { + "epoch": 5.28, + "learning_rate": 0.00027291653416438764, + "loss": 0.4389, + "step": 124810 + }, + { + "epoch": 5.28, + "learning_rate": 0.0002728741334350951, + "loss": 0.5079, + "step": 124820 + }, + { + "epoch": 5.28, + "learning_rate": 0.0002728317327058025, + "loss": 0.4433, + "step": 124830 + }, + { + "epoch": 5.28, + "learning_rate": 0.00027278933197651004, + "loss": 0.4955, + "step": 124840 + }, + { + "epoch": 5.28, + "learning_rate": 0.00027274693124721745, + "loss": 0.5201, + "step": 124850 + }, + { + "epoch": 5.28, + "learning_rate": 0.0002727045305179249, + "loss": 0.4307, + "step": 124860 + }, + { + "epoch": 5.28, + "learning_rate": 0.0002726621297886324, + "loss": 0.4115, + "step": 124870 + }, + { + "epoch": 5.28, + "learning_rate": 0.00027261972905933985, + "loss": 0.3758, + "step": 124880 + }, + { + "epoch": 5.28, + "learning_rate": 0.00027257732833004726, + "loss": 0.4072, + "step": 124890 + }, + { + "epoch": 5.28, + "learning_rate": 0.0002725349276007548, + "loss": 0.4617, + "step": 124900 + }, + { + "epoch": 5.28, + "learning_rate": 0.0002724925268714622, + "loss": 0.477, + "step": 124910 + }, + { + "epoch": 5.28, + "learning_rate": 0.00027245012614216966, + "loss": 0.517, + "step": 124920 + }, + { + "epoch": 5.28, + "learning_rate": 0.00027240772541287713, + "loss": 0.452, + "step": 124930 + }, + { + "epoch": 5.28, + "learning_rate": 0.0002723653246835846, + "loss": 0.419, + "step": 124940 + }, + { + "epoch": 5.28, + "learning_rate": 0.000272322923954292, + "loss": 0.5059, + "step": 124950 + }, + { + "epoch": 5.28, + "learning_rate": 0.0002722805232249995, + "loss": 0.4757, + "step": 124960 + }, + { + "epoch": 5.28, + "learning_rate": 0.00027223812249570694, + "loss": 0.449, + "step": 124970 + }, + { + "epoch": 5.29, + "learning_rate": 0.0002721957217664144, + "loss": 0.5168, + "step": 124980 + }, + { + "epoch": 5.29, + "learning_rate": 0.0002721533210371218, + "loss": 0.4956, + "step": 124990 + }, + { + "epoch": 5.29, + "learning_rate": 0.00027211092030782934, + "loss": 0.3737, + "step": 125000 + }, + { + "epoch": 5.29, + "learning_rate": 0.00027206851957853675, + "loss": 0.397, + "step": 125010 + }, + { + "epoch": 5.29, + "learning_rate": 0.0002720261188492442, + "loss": 0.3932, + "step": 125020 + }, + { + "epoch": 5.29, + "learning_rate": 0.0002719837181199517, + "loss": 0.4864, + "step": 125030 + }, + { + "epoch": 5.29, + "learning_rate": 0.00027194131739065916, + "loss": 0.4049, + "step": 125040 + }, + { + "epoch": 5.29, + "learning_rate": 0.00027189891666136657, + "loss": 0.4584, + "step": 125050 + }, + { + "epoch": 5.29, + "learning_rate": 0.0002718565159320741, + "loss": 0.5397, + "step": 125060 + }, + { + "epoch": 5.29, + "learning_rate": 0.0002718141152027815, + "loss": 0.4496, + "step": 125070 + }, + { + "epoch": 5.29, + "learning_rate": 0.00027177171447348897, + "loss": 0.496, + "step": 125080 + }, + { + "epoch": 5.29, + "learning_rate": 0.00027172931374419644, + "loss": 0.3277, + "step": 125090 + }, + { + "epoch": 5.29, + "learning_rate": 0.0002716869130149039, + "loss": 0.5019, + "step": 125100 + }, + { + "epoch": 5.29, + "learning_rate": 0.0002716445122856113, + "loss": 0.4245, + "step": 125110 + }, + { + "epoch": 5.29, + "learning_rate": 0.0002716021115563188, + "loss": 0.4305, + "step": 125120 + }, + { + "epoch": 5.29, + "learning_rate": 0.00027155971082702625, + "loss": 0.4861, + "step": 125130 + }, + { + "epoch": 5.29, + "learning_rate": 0.0002715173100977337, + "loss": 0.501, + "step": 125140 + }, + { + "epoch": 5.29, + "learning_rate": 0.00027147490936844113, + "loss": 0.4242, + "step": 125150 + }, + { + "epoch": 5.29, + "learning_rate": 0.0002714325086391486, + "loss": 0.4801, + "step": 125160 + }, + { + "epoch": 5.29, + "learning_rate": 0.00027139010790985606, + "loss": 0.4285, + "step": 125170 + }, + { + "epoch": 5.29, + "learning_rate": 0.00027134770718056353, + "loss": 0.4781, + "step": 125180 + }, + { + "epoch": 5.29, + "learning_rate": 0.000271305306451271, + "loss": 0.5648, + "step": 125190 + }, + { + "epoch": 5.29, + "learning_rate": 0.0002712629057219784, + "loss": 0.5845, + "step": 125200 + }, + { + "epoch": 5.29, + "learning_rate": 0.0002712205049926859, + "loss": 0.4731, + "step": 125210 + }, + { + "epoch": 5.3, + "learning_rate": 0.00027117810426339334, + "loss": 0.4691, + "step": 125220 + }, + { + "epoch": 5.3, + "learning_rate": 0.0002711357035341008, + "loss": 0.4813, + "step": 125230 + }, + { + "epoch": 5.3, + "learning_rate": 0.0002710933028048082, + "loss": 0.4072, + "step": 125240 + }, + { + "epoch": 5.3, + "learning_rate": 0.00027105090207551574, + "loss": 0.4433, + "step": 125250 + }, + { + "epoch": 5.3, + "learning_rate": 0.00027100850134622315, + "loss": 0.4147, + "step": 125260 + }, + { + "epoch": 5.3, + "learning_rate": 0.0002709661006169306, + "loss": 0.5477, + "step": 125270 + }, + { + "epoch": 5.3, + "learning_rate": 0.0002709236998876381, + "loss": 0.4633, + "step": 125280 + }, + { + "epoch": 5.3, + "learning_rate": 0.00027088129915834555, + "loss": 0.5101, + "step": 125290 + }, + { + "epoch": 5.3, + "learning_rate": 0.00027083889842905297, + "loss": 0.4536, + "step": 125300 + }, + { + "epoch": 5.3, + "learning_rate": 0.00027079649769976043, + "loss": 0.4244, + "step": 125310 + }, + { + "epoch": 5.3, + "learning_rate": 0.0002707540969704679, + "loss": 0.4287, + "step": 125320 + }, + { + "epoch": 5.3, + "learning_rate": 0.00027071169624117537, + "loss": 0.5009, + "step": 125330 + }, + { + "epoch": 5.3, + "learning_rate": 0.0002706692955118828, + "loss": 0.4533, + "step": 125340 + }, + { + "epoch": 5.3, + "learning_rate": 0.0002706268947825903, + "loss": 0.4841, + "step": 125350 + }, + { + "epoch": 5.3, + "learning_rate": 0.0002705844940532977, + "loss": 0.3431, + "step": 125360 + }, + { + "epoch": 5.3, + "learning_rate": 0.0002705420933240052, + "loss": 0.4898, + "step": 125370 + }, + { + "epoch": 5.3, + "learning_rate": 0.00027049969259471265, + "loss": 0.4859, + "step": 125380 + }, + { + "epoch": 5.3, + "learning_rate": 0.0002704572918654201, + "loss": 0.5032, + "step": 125390 + }, + { + "epoch": 5.3, + "learning_rate": 0.0002704148911361275, + "loss": 0.4704, + "step": 125400 + }, + { + "epoch": 5.3, + "learning_rate": 0.000270372490406835, + "loss": 0.472, + "step": 125410 + }, + { + "epoch": 5.3, + "learning_rate": 0.00027033008967754246, + "loss": 0.4939, + "step": 125420 + }, + { + "epoch": 5.3, + "learning_rate": 0.0002702876889482499, + "loss": 0.3689, + "step": 125430 + }, + { + "epoch": 5.3, + "learning_rate": 0.00027024528821895734, + "loss": 0.4865, + "step": 125440 + }, + { + "epoch": 5.31, + "learning_rate": 0.00027020288748966486, + "loss": 0.4662, + "step": 125450 + }, + { + "epoch": 5.31, + "learning_rate": 0.00027016048676037227, + "loss": 0.5151, + "step": 125460 + }, + { + "epoch": 5.31, + "learning_rate": 0.00027011808603107974, + "loss": 0.475, + "step": 125470 + }, + { + "epoch": 5.31, + "learning_rate": 0.0002700756853017872, + "loss": 0.4379, + "step": 125480 + }, + { + "epoch": 5.31, + "learning_rate": 0.0002700332845724947, + "loss": 0.5399, + "step": 125490 + }, + { + "epoch": 5.31, + "learning_rate": 0.0002699908838432021, + "loss": 0.4878, + "step": 125500 + }, + { + "epoch": 5.31, + "learning_rate": 0.0002699484831139096, + "loss": 0.4415, + "step": 125510 + }, + { + "epoch": 5.31, + "learning_rate": 0.000269906082384617, + "loss": 0.475, + "step": 125520 + }, + { + "epoch": 5.31, + "learning_rate": 0.0002698636816553245, + "loss": 0.5434, + "step": 125530 + }, + { + "epoch": 5.31, + "learning_rate": 0.00026982128092603195, + "loss": 0.4781, + "step": 125540 + }, + { + "epoch": 5.31, + "learning_rate": 0.0002697788801967394, + "loss": 0.4766, + "step": 125550 + }, + { + "epoch": 5.31, + "learning_rate": 0.00026973647946744683, + "loss": 0.4626, + "step": 125560 + }, + { + "epoch": 5.31, + "learning_rate": 0.0002696940787381543, + "loss": 0.4806, + "step": 125570 + }, + { + "epoch": 5.31, + "learning_rate": 0.00026965167800886177, + "loss": 0.4597, + "step": 125580 + }, + { + "epoch": 5.31, + "learning_rate": 0.00026960927727956923, + "loss": 0.4634, + "step": 125590 + }, + { + "epoch": 5.31, + "learning_rate": 0.00026956687655027664, + "loss": 0.4088, + "step": 125600 + }, + { + "epoch": 5.31, + "learning_rate": 0.00026952447582098417, + "loss": 0.4631, + "step": 125610 + }, + { + "epoch": 5.31, + "learning_rate": 0.0002694820750916916, + "loss": 0.415, + "step": 125620 + }, + { + "epoch": 5.31, + "learning_rate": 0.00026943967436239905, + "loss": 0.4693, + "step": 125630 + }, + { + "epoch": 5.31, + "learning_rate": 0.0002693972736331065, + "loss": 0.4571, + "step": 125640 + }, + { + "epoch": 5.31, + "learning_rate": 0.000269354872903814, + "loss": 0.4095, + "step": 125650 + }, + { + "epoch": 5.31, + "learning_rate": 0.0002693124721745214, + "loss": 0.502, + "step": 125660 + }, + { + "epoch": 5.31, + "learning_rate": 0.0002692700714452289, + "loss": 0.4733, + "step": 125670 + }, + { + "epoch": 5.31, + "learning_rate": 0.0002692276707159363, + "loss": 0.4514, + "step": 125680 + }, + { + "epoch": 5.32, + "learning_rate": 0.0002691852699866438, + "loss": 0.4264, + "step": 125690 + }, + { + "epoch": 5.32, + "learning_rate": 0.00026914286925735126, + "loss": 0.4115, + "step": 125700 + }, + { + "epoch": 5.32, + "learning_rate": 0.0002691004685280587, + "loss": 0.4922, + "step": 125710 + }, + { + "epoch": 5.32, + "learning_rate": 0.00026905806779876614, + "loss": 0.5101, + "step": 125720 + }, + { + "epoch": 5.32, + "learning_rate": 0.0002690156670694736, + "loss": 0.4739, + "step": 125730 + }, + { + "epoch": 5.32, + "learning_rate": 0.00026897326634018107, + "loss": 0.5404, + "step": 125740 + }, + { + "epoch": 5.32, + "learning_rate": 0.00026893086561088854, + "loss": 0.4954, + "step": 125750 + }, + { + "epoch": 5.32, + "learning_rate": 0.00026888846488159595, + "loss": 0.4753, + "step": 125760 + }, + { + "epoch": 5.32, + "learning_rate": 0.00026884606415230347, + "loss": 0.4005, + "step": 125770 + }, + { + "epoch": 5.32, + "learning_rate": 0.0002688036634230109, + "loss": 0.3893, + "step": 125780 + }, + { + "epoch": 5.32, + "learning_rate": 0.00026876126269371835, + "loss": 0.4146, + "step": 125790 + }, + { + "epoch": 5.32, + "learning_rate": 0.0002687188619644258, + "loss": 0.3445, + "step": 125800 + }, + { + "epoch": 5.32, + "learning_rate": 0.0002686764612351333, + "loss": 0.4199, + "step": 125810 + }, + { + "epoch": 5.32, + "learning_rate": 0.0002686340605058407, + "loss": 0.4469, + "step": 125820 + }, + { + "epoch": 5.32, + "learning_rate": 0.00026859165977654816, + "loss": 0.4694, + "step": 125830 + }, + { + "epoch": 5.32, + "learning_rate": 0.00026854925904725563, + "loss": 0.4978, + "step": 125840 + }, + { + "epoch": 5.32, + "learning_rate": 0.0002685068583179631, + "loss": 0.4897, + "step": 125850 + }, + { + "epoch": 5.32, + "learning_rate": 0.00026846445758867056, + "loss": 0.5421, + "step": 125860 + }, + { + "epoch": 5.32, + "learning_rate": 0.000268422056859378, + "loss": 0.3836, + "step": 125870 + }, + { + "epoch": 5.32, + "learning_rate": 0.00026837965613008544, + "loss": 0.4193, + "step": 125880 + }, + { + "epoch": 5.32, + "learning_rate": 0.0002683372554007929, + "loss": 0.4615, + "step": 125890 + }, + { + "epoch": 5.32, + "learning_rate": 0.0002682948546715004, + "loss": 0.3863, + "step": 125900 + }, + { + "epoch": 5.32, + "learning_rate": 0.0002682524539422078, + "loss": 0.441, + "step": 125910 + }, + { + "epoch": 5.32, + "learning_rate": 0.00026821005321291526, + "loss": 0.4514, + "step": 125920 + }, + { + "epoch": 5.33, + "learning_rate": 0.0002681676524836227, + "loss": 0.5002, + "step": 125930 + }, + { + "epoch": 5.33, + "learning_rate": 0.0002681252517543302, + "loss": 0.5125, + "step": 125940 + }, + { + "epoch": 5.33, + "learning_rate": 0.0002680828510250376, + "loss": 0.4177, + "step": 125950 + }, + { + "epoch": 5.33, + "learning_rate": 0.0002680404502957451, + "loss": 0.5146, + "step": 125960 + }, + { + "epoch": 5.33, + "learning_rate": 0.00026799804956645254, + "loss": 0.5002, + "step": 125970 + }, + { + "epoch": 5.33, + "learning_rate": 0.00026795564883716, + "loss": 0.3729, + "step": 125980 + }, + { + "epoch": 5.33, + "learning_rate": 0.00026791324810786747, + "loss": 0.4739, + "step": 125990 + }, + { + "epoch": 5.33, + "learning_rate": 0.00026787084737857494, + "loss": 0.4981, + "step": 126000 + }, + { + "epoch": 5.33, + "learning_rate": 0.00026782844664928235, + "loss": 0.4225, + "step": 126010 + }, + { + "epoch": 5.33, + "learning_rate": 0.00026778604591998987, + "loss": 0.4554, + "step": 126020 + }, + { + "epoch": 5.33, + "learning_rate": 0.0002677436451906973, + "loss": 0.4431, + "step": 126030 + }, + { + "epoch": 5.33, + "learning_rate": 0.00026770124446140475, + "loss": 0.4996, + "step": 126040 + }, + { + "epoch": 5.33, + "learning_rate": 0.00026765884373211216, + "loss": 0.4845, + "step": 126050 + }, + { + "epoch": 5.33, + "learning_rate": 0.0002676164430028197, + "loss": 0.4286, + "step": 126060 + }, + { + "epoch": 5.33, + "learning_rate": 0.0002675740422735271, + "loss": 0.4594, + "step": 126070 + }, + { + "epoch": 5.33, + "learning_rate": 0.00026753164154423456, + "loss": 0.5286, + "step": 126080 + }, + { + "epoch": 5.33, + "learning_rate": 0.00026748924081494203, + "loss": 0.4677, + "step": 126090 + }, + { + "epoch": 5.33, + "learning_rate": 0.0002674468400856495, + "loss": 0.3942, + "step": 126100 + }, + { + "epoch": 5.33, + "learning_rate": 0.0002674044393563569, + "loss": 0.4611, + "step": 126110 + }, + { + "epoch": 5.33, + "learning_rate": 0.00026736203862706443, + "loss": 0.4316, + "step": 126120 + }, + { + "epoch": 5.33, + "learning_rate": 0.00026731963789777184, + "loss": 0.4033, + "step": 126130 + }, + { + "epoch": 5.33, + "learning_rate": 0.0002672772371684793, + "loss": 0.5023, + "step": 126140 + }, + { + "epoch": 5.33, + "learning_rate": 0.0002672348364391868, + "loss": 0.4793, + "step": 126150 + }, + { + "epoch": 5.34, + "learning_rate": 0.00026719243570989424, + "loss": 0.3651, + "step": 126160 + }, + { + "epoch": 5.34, + "learning_rate": 0.00026715003498060166, + "loss": 0.4234, + "step": 126170 + }, + { + "epoch": 5.34, + "learning_rate": 0.0002671076342513091, + "loss": 0.4448, + "step": 126180 + }, + { + "epoch": 5.34, + "learning_rate": 0.0002670652335220166, + "loss": 0.5347, + "step": 126190 + }, + { + "epoch": 5.34, + "learning_rate": 0.00026702283279272406, + "loss": 0.4153, + "step": 126200 + }, + { + "epoch": 5.34, + "learning_rate": 0.00026698043206343147, + "loss": 0.536, + "step": 126210 + }, + { + "epoch": 5.34, + "learning_rate": 0.000266938031334139, + "loss": 0.4909, + "step": 126220 + }, + { + "epoch": 5.34, + "learning_rate": 0.0002668956306048464, + "loss": 0.5215, + "step": 126230 + }, + { + "epoch": 5.34, + "learning_rate": 0.00026685322987555387, + "loss": 0.3809, + "step": 126240 + }, + { + "epoch": 5.34, + "learning_rate": 0.00026681082914626134, + "loss": 0.4875, + "step": 126250 + }, + { + "epoch": 5.34, + "learning_rate": 0.0002667684284169688, + "loss": 0.4381, + "step": 126260 + }, + { + "epoch": 5.34, + "learning_rate": 0.0002667260276876762, + "loss": 0.4149, + "step": 126270 + }, + { + "epoch": 5.34, + "learning_rate": 0.00026668362695838374, + "loss": 0.5262, + "step": 126280 + }, + { + "epoch": 5.34, + "learning_rate": 0.00026664122622909115, + "loss": 0.3535, + "step": 126290 + }, + { + "epoch": 5.34, + "learning_rate": 0.0002665988254997986, + "loss": 0.4472, + "step": 126300 + }, + { + "epoch": 5.34, + "learning_rate": 0.0002665564247705061, + "loss": 0.4604, + "step": 126310 + }, + { + "epoch": 5.34, + "learning_rate": 0.00026651402404121355, + "loss": 0.4296, + "step": 126320 + }, + { + "epoch": 5.34, + "learning_rate": 0.00026647162331192096, + "loss": 0.5606, + "step": 126330 + }, + { + "epoch": 5.34, + "learning_rate": 0.00026642922258262843, + "loss": 0.4629, + "step": 126340 + }, + { + "epoch": 5.34, + "learning_rate": 0.0002663868218533359, + "loss": 0.5076, + "step": 126350 + }, + { + "epoch": 5.34, + "learning_rate": 0.00026634442112404336, + "loss": 0.4692, + "step": 126360 + }, + { + "epoch": 5.34, + "learning_rate": 0.0002663020203947508, + "loss": 0.5232, + "step": 126370 + }, + { + "epoch": 5.34, + "learning_rate": 0.0002662596196654583, + "loss": 0.4123, + "step": 126380 + }, + { + "epoch": 5.34, + "learning_rate": 0.0002662172189361657, + "loss": 0.3984, + "step": 126390 + }, + { + "epoch": 5.35, + "learning_rate": 0.0002661748182068732, + "loss": 0.4017, + "step": 126400 + }, + { + "epoch": 5.35, + "learning_rate": 0.00026613241747758064, + "loss": 0.4516, + "step": 126410 + }, + { + "epoch": 5.35, + "learning_rate": 0.0002660900167482881, + "loss": 0.4836, + "step": 126420 + }, + { + "epoch": 5.35, + "learning_rate": 0.0002660476160189955, + "loss": 0.4305, + "step": 126430 + }, + { + "epoch": 5.35, + "learning_rate": 0.000266005215289703, + "loss": 0.4709, + "step": 126440 + }, + { + "epoch": 5.35, + "learning_rate": 0.00026596281456041045, + "loss": 0.4887, + "step": 126450 + }, + { + "epoch": 5.35, + "learning_rate": 0.0002659204138311179, + "loss": 0.3958, + "step": 126460 + }, + { + "epoch": 5.35, + "learning_rate": 0.0002658780131018254, + "loss": 0.3957, + "step": 126470 + }, + { + "epoch": 5.35, + "learning_rate": 0.0002658356123725328, + "loss": 0.3739, + "step": 126480 + }, + { + "epoch": 5.35, + "learning_rate": 0.00026579321164324027, + "loss": 0.4739, + "step": 126490 + }, + { + "epoch": 5.35, + "learning_rate": 0.00026575081091394773, + "loss": 0.4495, + "step": 126500 + }, + { + "epoch": 5.35, + "learning_rate": 0.0002657084101846552, + "loss": 0.4273, + "step": 126510 + }, + { + "epoch": 5.35, + "learning_rate": 0.00026566600945536267, + "loss": 0.4736, + "step": 126520 + }, + { + "epoch": 5.35, + "learning_rate": 0.0002656236087260701, + "loss": 0.4723, + "step": 126530 + }, + { + "epoch": 5.35, + "learning_rate": 0.00026558120799677755, + "loss": 0.4537, + "step": 126540 + }, + { + "epoch": 5.35, + "learning_rate": 0.000265538807267485, + "loss": 0.4917, + "step": 126550 + }, + { + "epoch": 5.35, + "learning_rate": 0.0002654964065381925, + "loss": 0.5481, + "step": 126560 + }, + { + "epoch": 5.35, + "learning_rate": 0.00026545400580889995, + "loss": 0.5047, + "step": 126570 + }, + { + "epoch": 5.35, + "learning_rate": 0.00026541160507960736, + "loss": 0.419, + "step": 126580 + }, + { + "epoch": 5.35, + "learning_rate": 0.0002653692043503148, + "loss": 0.4154, + "step": 126590 + }, + { + "epoch": 5.35, + "learning_rate": 0.0002653268036210223, + "loss": 0.4481, + "step": 126600 + }, + { + "epoch": 5.35, + "learning_rate": 0.00026528440289172976, + "loss": 0.4738, + "step": 126610 + }, + { + "epoch": 5.35, + "learning_rate": 0.0002652420021624372, + "loss": 0.378, + "step": 126620 + }, + { + "epoch": 5.36, + "learning_rate": 0.0002651996014331447, + "loss": 0.3775, + "step": 126630 + }, + { + "epoch": 5.36, + "learning_rate": 0.0002651572007038521, + "loss": 0.4244, + "step": 126640 + }, + { + "epoch": 5.36, + "learning_rate": 0.0002651147999745596, + "loss": 0.5271, + "step": 126650 + }, + { + "epoch": 5.36, + "learning_rate": 0.000265072399245267, + "loss": 0.3789, + "step": 126660 + }, + { + "epoch": 5.36, + "learning_rate": 0.0002650299985159745, + "loss": 0.4433, + "step": 126670 + }, + { + "epoch": 5.36, + "learning_rate": 0.0002649875977866819, + "loss": 0.4257, + "step": 126680 + }, + { + "epoch": 5.36, + "learning_rate": 0.0002649451970573894, + "loss": 0.4554, + "step": 126690 + }, + { + "epoch": 5.36, + "learning_rate": 0.00026490279632809685, + "loss": 0.3907, + "step": 126700 + }, + { + "epoch": 5.36, + "learning_rate": 0.0002648603955988043, + "loss": 0.4481, + "step": 126710 + }, + { + "epoch": 5.36, + "learning_rate": 0.00026481799486951173, + "loss": 0.452, + "step": 126720 + }, + { + "epoch": 5.36, + "learning_rate": 0.00026477559414021925, + "loss": 0.4116, + "step": 126730 + }, + { + "epoch": 5.36, + "learning_rate": 0.00026473319341092667, + "loss": 0.4295, + "step": 126740 + }, + { + "epoch": 5.36, + "learning_rate": 0.00026469079268163413, + "loss": 0.4852, + "step": 126750 + }, + { + "epoch": 5.36, + "learning_rate": 0.0002646483919523416, + "loss": 0.4668, + "step": 126760 + }, + { + "epoch": 5.36, + "learning_rate": 0.00026460599122304907, + "loss": 0.4951, + "step": 126770 + }, + { + "epoch": 5.36, + "learning_rate": 0.0002645635904937565, + "loss": 0.4702, + "step": 126780 + }, + { + "epoch": 5.36, + "learning_rate": 0.000264521189764464, + "loss": 0.3579, + "step": 126790 + }, + { + "epoch": 5.36, + "learning_rate": 0.0002644787890351714, + "loss": 0.3863, + "step": 126800 + }, + { + "epoch": 5.36, + "learning_rate": 0.0002644363883058789, + "loss": 0.6278, + "step": 126810 + }, + { + "epoch": 5.36, + "learning_rate": 0.0002643939875765863, + "loss": 0.4689, + "step": 126820 + }, + { + "epoch": 5.36, + "learning_rate": 0.0002643515868472938, + "loss": 0.4741, + "step": 126830 + }, + { + "epoch": 5.36, + "learning_rate": 0.0002643091861180012, + "loss": 0.5173, + "step": 126840 + }, + { + "epoch": 5.36, + "learning_rate": 0.0002642667853887087, + "loss": 0.4582, + "step": 126850 + }, + { + "epoch": 5.36, + "learning_rate": 0.00026422438465941616, + "loss": 0.4374, + "step": 126860 + }, + { + "epoch": 5.37, + "learning_rate": 0.0002641819839301236, + "loss": 0.5076, + "step": 126870 + }, + { + "epoch": 5.37, + "learning_rate": 0.00026413958320083104, + "loss": 0.5059, + "step": 126880 + }, + { + "epoch": 5.37, + "learning_rate": 0.00026409718247153856, + "loss": 0.4431, + "step": 126890 + }, + { + "epoch": 5.37, + "learning_rate": 0.00026405478174224597, + "loss": 0.4762, + "step": 126900 + }, + { + "epoch": 5.37, + "learning_rate": 0.00026401238101295344, + "loss": 0.4177, + "step": 126910 + }, + { + "epoch": 5.37, + "learning_rate": 0.0002639699802836609, + "loss": 0.4356, + "step": 126920 + }, + { + "epoch": 5.37, + "learning_rate": 0.00026392757955436837, + "loss": 0.5245, + "step": 126930 + }, + { + "epoch": 5.37, + "learning_rate": 0.0002638851788250758, + "loss": 0.4502, + "step": 126940 + }, + { + "epoch": 5.37, + "learning_rate": 0.00026384277809578325, + "loss": 0.464, + "step": 126950 + }, + { + "epoch": 5.37, + "learning_rate": 0.0002638003773664907, + "loss": 0.4875, + "step": 126960 + }, + { + "epoch": 5.37, + "learning_rate": 0.0002637579766371982, + "loss": 0.398, + "step": 126970 + }, + { + "epoch": 5.37, + "learning_rate": 0.0002637155759079056, + "loss": 0.5378, + "step": 126980 + }, + { + "epoch": 5.37, + "learning_rate": 0.0002636731751786131, + "loss": 0.4744, + "step": 126990 + }, + { + "epoch": 5.37, + "learning_rate": 0.00026363077444932053, + "loss": 0.4561, + "step": 127000 + }, + { + "epoch": 5.37, + "learning_rate": 0.000263588373720028, + "loss": 0.5089, + "step": 127010 + }, + { + "epoch": 5.37, + "learning_rate": 0.00026354597299073547, + "loss": 0.4472, + "step": 127020 + }, + { + "epoch": 5.37, + "learning_rate": 0.00026350357226144293, + "loss": 0.394, + "step": 127030 + }, + { + "epoch": 5.37, + "learning_rate": 0.00026346117153215034, + "loss": 0.4603, + "step": 127040 + }, + { + "epoch": 5.37, + "learning_rate": 0.00026341877080285787, + "loss": 0.4426, + "step": 127050 + }, + { + "epoch": 5.37, + "learning_rate": 0.0002633763700735653, + "loss": 0.4221, + "step": 127060 + }, + { + "epoch": 5.37, + "learning_rate": 0.00026333396934427275, + "loss": 0.4806, + "step": 127070 + }, + { + "epoch": 5.37, + "learning_rate": 0.0002632915686149802, + "loss": 0.5636, + "step": 127080 + }, + { + "epoch": 5.37, + "learning_rate": 0.0002632491678856877, + "loss": 0.4188, + "step": 127090 + }, + { + "epoch": 5.37, + "learning_rate": 0.0002632067671563951, + "loss": 0.5022, + "step": 127100 + }, + { + "epoch": 5.38, + "learning_rate": 0.00026316436642710256, + "loss": 0.5236, + "step": 127110 + }, + { + "epoch": 5.38, + "learning_rate": 0.00026312196569781, + "loss": 0.4529, + "step": 127120 + }, + { + "epoch": 5.38, + "learning_rate": 0.0002630795649685175, + "loss": 0.3406, + "step": 127130 + }, + { + "epoch": 5.38, + "learning_rate": 0.0002630371642392249, + "loss": 0.417, + "step": 127140 + }, + { + "epoch": 5.38, + "learning_rate": 0.00026299476350993237, + "loss": 0.3906, + "step": 127150 + }, + { + "epoch": 5.38, + "learning_rate": 0.00026295236278063984, + "loss": 0.4312, + "step": 127160 + }, + { + "epoch": 5.38, + "learning_rate": 0.0002629099620513473, + "loss": 0.5238, + "step": 127170 + }, + { + "epoch": 5.38, + "learning_rate": 0.00026286756132205477, + "loss": 0.4229, + "step": 127180 + }, + { + "epoch": 5.38, + "learning_rate": 0.0002628251605927622, + "loss": 0.4509, + "step": 127190 + }, + { + "epoch": 5.38, + "learning_rate": 0.00026278275986346965, + "loss": 0.5057, + "step": 127200 + }, + { + "epoch": 5.38, + "learning_rate": 0.0002627403591341771, + "loss": 0.4427, + "step": 127210 + }, + { + "epoch": 5.38, + "learning_rate": 0.0002626979584048846, + "loss": 0.4981, + "step": 127220 + }, + { + "epoch": 5.38, + "learning_rate": 0.000262655557675592, + "loss": 0.3992, + "step": 127230 + }, + { + "epoch": 5.38, + "learning_rate": 0.0002626131569462995, + "loss": 0.4517, + "step": 127240 + }, + { + "epoch": 5.38, + "learning_rate": 0.00026257075621700693, + "loss": 0.5138, + "step": 127250 + }, + { + "epoch": 5.38, + "learning_rate": 0.0002625283554877144, + "loss": 0.4821, + "step": 127260 + }, + { + "epoch": 5.38, + "learning_rate": 0.00026248595475842186, + "loss": 0.4541, + "step": 127270 + }, + { + "epoch": 5.38, + "learning_rate": 0.00026244355402912933, + "loss": 0.4519, + "step": 127280 + }, + { + "epoch": 5.38, + "learning_rate": 0.00026240115329983674, + "loss": 0.3729, + "step": 127290 + }, + { + "epoch": 5.38, + "learning_rate": 0.0002623587525705442, + "loss": 0.4479, + "step": 127300 + }, + { + "epoch": 5.38, + "learning_rate": 0.0002623163518412517, + "loss": 0.4601, + "step": 127310 + }, + { + "epoch": 5.38, + "learning_rate": 0.00026227395111195914, + "loss": 0.529, + "step": 127320 + }, + { + "epoch": 5.38, + "learning_rate": 0.00026223155038266656, + "loss": 0.5133, + "step": 127330 + }, + { + "epoch": 5.39, + "learning_rate": 0.0002621891496533741, + "loss": 0.4805, + "step": 127340 + }, + { + "epoch": 5.39, + "learning_rate": 0.0002621467489240815, + "loss": 0.4923, + "step": 127350 + }, + { + "epoch": 5.39, + "learning_rate": 0.00026210434819478896, + "loss": 0.4865, + "step": 127360 + }, + { + "epoch": 5.39, + "learning_rate": 0.0002620619474654964, + "loss": 0.4604, + "step": 127370 + }, + { + "epoch": 5.39, + "learning_rate": 0.0002620195467362039, + "loss": 0.3934, + "step": 127380 + }, + { + "epoch": 5.39, + "learning_rate": 0.0002619771460069113, + "loss": 0.4701, + "step": 127390 + }, + { + "epoch": 5.39, + "learning_rate": 0.0002619347452776188, + "loss": 0.4038, + "step": 127400 + }, + { + "epoch": 5.39, + "learning_rate": 0.00026189234454832624, + "loss": 0.4181, + "step": 127410 + }, + { + "epoch": 5.39, + "learning_rate": 0.0002618499438190337, + "loss": 0.4998, + "step": 127420 + }, + { + "epoch": 5.39, + "learning_rate": 0.0002618075430897411, + "loss": 0.5698, + "step": 127430 + }, + { + "epoch": 5.39, + "learning_rate": 0.00026176514236044864, + "loss": 0.4473, + "step": 127440 + }, + { + "epoch": 5.39, + "learning_rate": 0.00026172274163115605, + "loss": 0.456, + "step": 127450 + }, + { + "epoch": 5.39, + "learning_rate": 0.0002616803409018635, + "loss": 0.4943, + "step": 127460 + }, + { + "epoch": 5.39, + "learning_rate": 0.000261637940172571, + "loss": 0.4144, + "step": 127470 + }, + { + "epoch": 5.39, + "learning_rate": 0.00026159553944327845, + "loss": 0.4639, + "step": 127480 + }, + { + "epoch": 5.39, + "learning_rate": 0.00026155313871398586, + "loss": 0.5062, + "step": 127490 + }, + { + "epoch": 5.39, + "learning_rate": 0.0002615107379846934, + "loss": 0.393, + "step": 127500 + }, + { + "epoch": 5.39, + "learning_rate": 0.0002614683372554008, + "loss": 0.5046, + "step": 127510 + }, + { + "epoch": 5.39, + "learning_rate": 0.00026142593652610826, + "loss": 0.4413, + "step": 127520 + }, + { + "epoch": 5.39, + "learning_rate": 0.00026138353579681573, + "loss": 0.4411, + "step": 127530 + }, + { + "epoch": 5.39, + "learning_rate": 0.0002613411350675232, + "loss": 0.3945, + "step": 127540 + }, + { + "epoch": 5.39, + "learning_rate": 0.0002612987343382306, + "loss": 0.4287, + "step": 127550 + }, + { + "epoch": 5.39, + "learning_rate": 0.0002612563336089381, + "loss": 0.5042, + "step": 127560 + }, + { + "epoch": 5.39, + "learning_rate": 0.00026121393287964554, + "loss": 0.446, + "step": 127570 + }, + { + "epoch": 5.4, + "learning_rate": 0.000261171532150353, + "loss": 0.4095, + "step": 127580 + }, + { + "epoch": 5.4, + "learning_rate": 0.0002611291314210604, + "loss": 0.3991, + "step": 127590 + }, + { + "epoch": 5.4, + "learning_rate": 0.00026108673069176794, + "loss": 0.4187, + "step": 127600 + }, + { + "epoch": 5.4, + "learning_rate": 0.00026104432996247536, + "loss": 0.4352, + "step": 127610 + }, + { + "epoch": 5.4, + "learning_rate": 0.0002610019292331828, + "loss": 0.4709, + "step": 127620 + }, + { + "epoch": 5.4, + "learning_rate": 0.0002609595285038903, + "loss": 0.3773, + "step": 127630 + }, + { + "epoch": 5.4, + "learning_rate": 0.00026091712777459776, + "loss": 0.4728, + "step": 127640 + }, + { + "epoch": 5.4, + "learning_rate": 0.00026087472704530517, + "loss": 0.3815, + "step": 127650 + }, + { + "epoch": 5.4, + "learning_rate": 0.0002608323263160127, + "loss": 0.4875, + "step": 127660 + }, + { + "epoch": 5.4, + "learning_rate": 0.0002607899255867201, + "loss": 0.4882, + "step": 127670 + }, + { + "epoch": 5.4, + "learning_rate": 0.00026074752485742757, + "loss": 0.494, + "step": 127680 + }, + { + "epoch": 5.4, + "learning_rate": 0.00026070512412813504, + "loss": 0.4077, + "step": 127690 + }, + { + "epoch": 5.4, + "learning_rate": 0.0002606627233988425, + "loss": 0.4379, + "step": 127700 + }, + { + "epoch": 5.4, + "learning_rate": 0.0002606203226695499, + "loss": 0.526, + "step": 127710 + }, + { + "epoch": 5.4, + "learning_rate": 0.0002605779219402574, + "loss": 0.4622, + "step": 127720 + }, + { + "epoch": 5.4, + "learning_rate": 0.00026053552121096485, + "loss": 0.4302, + "step": 127730 + }, + { + "epoch": 5.4, + "learning_rate": 0.0002604931204816723, + "loss": 0.4636, + "step": 127740 + }, + { + "epoch": 5.4, + "learning_rate": 0.00026045071975237973, + "loss": 0.4224, + "step": 127750 + }, + { + "epoch": 5.4, + "learning_rate": 0.0002604083190230872, + "loss": 0.499, + "step": 127760 + }, + { + "epoch": 5.4, + "learning_rate": 0.00026036591829379466, + "loss": 0.5035, + "step": 127770 + }, + { + "epoch": 5.4, + "learning_rate": 0.00026032351756450213, + "loss": 0.4358, + "step": 127780 + }, + { + "epoch": 5.4, + "learning_rate": 0.0002602811168352096, + "loss": 0.4894, + "step": 127790 + }, + { + "epoch": 5.4, + "learning_rate": 0.00026023871610591706, + "loss": 0.589, + "step": 127800 + }, + { + "epoch": 5.4, + "learning_rate": 0.0002601963153766245, + "loss": 0.478, + "step": 127810 + }, + { + "epoch": 5.41, + "learning_rate": 0.00026015391464733194, + "loss": 0.3481, + "step": 127820 + }, + { + "epoch": 5.41, + "learning_rate": 0.0002601115139180394, + "loss": 0.5171, + "step": 127830 + }, + { + "epoch": 5.41, + "learning_rate": 0.0002600691131887469, + "loss": 0.4682, + "step": 127840 + }, + { + "epoch": 5.41, + "learning_rate": 0.00026002671245945434, + "loss": 0.5093, + "step": 127850 + }, + { + "epoch": 5.41, + "learning_rate": 0.00025998431173016175, + "loss": 0.4245, + "step": 127860 + }, + { + "epoch": 5.41, + "learning_rate": 0.0002599419110008692, + "loss": 0.4671, + "step": 127870 + }, + { + "epoch": 5.41, + "learning_rate": 0.0002598995102715767, + "loss": 0.4454, + "step": 127880 + }, + { + "epoch": 5.41, + "learning_rate": 0.00025985710954228415, + "loss": 0.4112, + "step": 127890 + }, + { + "epoch": 5.41, + "learning_rate": 0.00025981470881299157, + "loss": 0.3858, + "step": 127900 + }, + { + "epoch": 5.41, + "learning_rate": 0.00025977230808369903, + "loss": 0.5184, + "step": 127910 + }, + { + "epoch": 5.41, + "learning_rate": 0.0002597299073544065, + "loss": 0.4784, + "step": 127920 + }, + { + "epoch": 5.41, + "learning_rate": 0.00025968750662511397, + "loss": 0.3822, + "step": 127930 + }, + { + "epoch": 5.41, + "learning_rate": 0.0002596451058958214, + "loss": 0.363, + "step": 127940 + }, + { + "epoch": 5.41, + "learning_rate": 0.0002596027051665289, + "loss": 0.445, + "step": 127950 + }, + { + "epoch": 5.41, + "learning_rate": 0.0002595603044372363, + "loss": 0.4612, + "step": 127960 + }, + { + "epoch": 5.41, + "learning_rate": 0.0002595179037079438, + "loss": 0.4759, + "step": 127970 + }, + { + "epoch": 5.41, + "learning_rate": 0.00025947550297865125, + "loss": 0.4113, + "step": 127980 + }, + { + "epoch": 5.41, + "learning_rate": 0.0002594331022493587, + "loss": 0.5499, + "step": 127990 + }, + { + "epoch": 5.41, + "learning_rate": 0.0002593907015200661, + "loss": 0.4574, + "step": 128000 + }, + { + "epoch": 5.41, + "learning_rate": 0.00025934830079077365, + "loss": 0.3969, + "step": 128010 + }, + { + "epoch": 5.41, + "learning_rate": 0.00025930590006148106, + "loss": 0.4673, + "step": 128020 + }, + { + "epoch": 5.41, + "learning_rate": 0.0002592634993321885, + "loss": 0.433, + "step": 128030 + }, + { + "epoch": 5.41, + "learning_rate": 0.00025922109860289594, + "loss": 0.47, + "step": 128040 + }, + { + "epoch": 5.42, + "learning_rate": 0.00025917869787360346, + "loss": 0.4405, + "step": 128050 + }, + { + "epoch": 5.42, + "learning_rate": 0.0002591362971443109, + "loss": 0.4268, + "step": 128060 + }, + { + "epoch": 5.42, + "learning_rate": 0.00025909389641501834, + "loss": 0.4932, + "step": 128070 + }, + { + "epoch": 5.42, + "learning_rate": 0.0002590514956857258, + "loss": 0.4787, + "step": 128080 + }, + { + "epoch": 5.42, + "learning_rate": 0.0002590090949564333, + "loss": 0.4181, + "step": 128090 + }, + { + "epoch": 5.42, + "learning_rate": 0.0002589666942271407, + "loss": 0.3713, + "step": 128100 + }, + { + "epoch": 5.42, + "learning_rate": 0.0002589242934978482, + "loss": 0.4096, + "step": 128110 + }, + { + "epoch": 5.42, + "learning_rate": 0.0002588818927685556, + "loss": 0.4297, + "step": 128120 + }, + { + "epoch": 5.42, + "learning_rate": 0.0002588394920392631, + "loss": 0.5462, + "step": 128130 + }, + { + "epoch": 5.42, + "learning_rate": 0.00025879709130997055, + "loss": 0.5192, + "step": 128140 + }, + { + "epoch": 5.42, + "learning_rate": 0.000258754690580678, + "loss": 0.4362, + "step": 128150 + }, + { + "epoch": 5.42, + "learning_rate": 0.00025871228985138543, + "loss": 0.3699, + "step": 128160 + }, + { + "epoch": 5.42, + "learning_rate": 0.00025866988912209295, + "loss": 0.43, + "step": 128170 + }, + { + "epoch": 5.42, + "learning_rate": 0.00025862748839280037, + "loss": 0.393, + "step": 128180 + }, + { + "epoch": 5.42, + "learning_rate": 0.00025858508766350783, + "loss": 0.4189, + "step": 128190 + }, + { + "epoch": 5.42, + "learning_rate": 0.00025854268693421525, + "loss": 0.413, + "step": 128200 + }, + { + "epoch": 5.42, + "learning_rate": 0.00025850028620492277, + "loss": 0.5135, + "step": 128210 + }, + { + "epoch": 5.42, + "learning_rate": 0.0002584578854756302, + "loss": 0.4858, + "step": 128220 + }, + { + "epoch": 5.42, + "learning_rate": 0.00025841548474633765, + "loss": 0.4561, + "step": 128230 + }, + { + "epoch": 5.42, + "learning_rate": 0.0002583730840170451, + "loss": 0.4726, + "step": 128240 + }, + { + "epoch": 5.42, + "learning_rate": 0.0002583306832877526, + "loss": 0.4825, + "step": 128250 + }, + { + "epoch": 5.42, + "learning_rate": 0.00025828828255846, + "loss": 0.5216, + "step": 128260 + }, + { + "epoch": 5.42, + "learning_rate": 0.0002582458818291675, + "loss": 0.5788, + "step": 128270 + }, + { + "epoch": 5.42, + "learning_rate": 0.0002582034810998749, + "loss": 0.4245, + "step": 128280 + }, + { + "epoch": 5.43, + "learning_rate": 0.0002581610803705824, + "loss": 0.4402, + "step": 128290 + }, + { + "epoch": 5.43, + "learning_rate": 0.00025811867964128986, + "loss": 0.4615, + "step": 128300 + }, + { + "epoch": 5.43, + "learning_rate": 0.0002580762789119973, + "loss": 0.4182, + "step": 128310 + }, + { + "epoch": 5.43, + "learning_rate": 0.00025803387818270474, + "loss": 0.4437, + "step": 128320 + }, + { + "epoch": 5.43, + "learning_rate": 0.0002579914774534122, + "loss": 0.4477, + "step": 128330 + }, + { + "epoch": 5.43, + "learning_rate": 0.00025794907672411967, + "loss": 0.4113, + "step": 128340 + }, + { + "epoch": 5.43, + "learning_rate": 0.00025790667599482714, + "loss": 0.4197, + "step": 128350 + }, + { + "epoch": 5.43, + "learning_rate": 0.00025786427526553455, + "loss": 0.3795, + "step": 128360 + }, + { + "epoch": 5.43, + "learning_rate": 0.00025782187453624207, + "loss": 0.4479, + "step": 128370 + }, + { + "epoch": 5.43, + "learning_rate": 0.0002577794738069495, + "loss": 0.419, + "step": 128380 + }, + { + "epoch": 5.43, + "learning_rate": 0.00025773707307765695, + "loss": 0.4112, + "step": 128390 + }, + { + "epoch": 5.43, + "learning_rate": 0.0002576946723483644, + "loss": 0.4903, + "step": 128400 + }, + { + "epoch": 5.43, + "learning_rate": 0.0002576522716190719, + "loss": 0.4794, + "step": 128410 + }, + { + "epoch": 5.43, + "learning_rate": 0.0002576098708897793, + "loss": 0.4308, + "step": 128420 + }, + { + "epoch": 5.43, + "learning_rate": 0.00025756747016048676, + "loss": 0.3622, + "step": 128430 + }, + { + "epoch": 5.43, + "learning_rate": 0.00025752506943119423, + "loss": 0.4067, + "step": 128440 + }, + { + "epoch": 5.43, + "learning_rate": 0.0002574826687019017, + "loss": 0.3668, + "step": 128450 + }, + { + "epoch": 5.43, + "learning_rate": 0.00025744026797260917, + "loss": 0.4697, + "step": 128460 + }, + { + "epoch": 5.43, + "learning_rate": 0.0002573978672433166, + "loss": 0.4828, + "step": 128470 + }, + { + "epoch": 5.43, + "learning_rate": 0.00025735546651402404, + "loss": 0.4945, + "step": 128480 + }, + { + "epoch": 5.43, + "learning_rate": 0.0002573130657847315, + "loss": 0.421, + "step": 128490 + }, + { + "epoch": 5.43, + "learning_rate": 0.000257270665055439, + "loss": 0.505, + "step": 128500 + }, + { + "epoch": 5.43, + "learning_rate": 0.0002572282643261464, + "loss": 0.4459, + "step": 128510 + }, + { + "epoch": 5.43, + "learning_rate": 0.00025718586359685386, + "loss": 0.5361, + "step": 128520 + }, + { + "epoch": 5.44, + "learning_rate": 0.0002571434628675613, + "loss": 0.4394, + "step": 128530 + }, + { + "epoch": 5.44, + "learning_rate": 0.0002571010621382688, + "loss": 0.4506, + "step": 128540 + }, + { + "epoch": 5.44, + "learning_rate": 0.00025705866140897626, + "loss": 0.4121, + "step": 128550 + }, + { + "epoch": 5.44, + "learning_rate": 0.0002570162606796837, + "loss": 0.4098, + "step": 128560 + }, + { + "epoch": 5.44, + "learning_rate": 0.00025697385995039114, + "loss": 0.4772, + "step": 128570 + }, + { + "epoch": 5.44, + "learning_rate": 0.0002569314592210986, + "loss": 0.4739, + "step": 128580 + }, + { + "epoch": 5.44, + "learning_rate": 0.00025688905849180607, + "loss": 0.4526, + "step": 128590 + }, + { + "epoch": 5.44, + "learning_rate": 0.00025684665776251354, + "loss": 0.42, + "step": 128600 + }, + { + "epoch": 5.44, + "learning_rate": 0.00025680425703322095, + "loss": 0.4367, + "step": 128610 + }, + { + "epoch": 5.44, + "learning_rate": 0.00025676185630392847, + "loss": 0.5034, + "step": 128620 + }, + { + "epoch": 5.44, + "learning_rate": 0.0002567194555746359, + "loss": 0.4062, + "step": 128630 + }, + { + "epoch": 5.44, + "learning_rate": 0.00025667705484534335, + "loss": 0.4423, + "step": 128640 + }, + { + "epoch": 5.44, + "learning_rate": 0.00025663465411605076, + "loss": 0.474, + "step": 128650 + }, + { + "epoch": 5.44, + "learning_rate": 0.0002565922533867583, + "loss": 0.4532, + "step": 128660 + }, + { + "epoch": 5.44, + "learning_rate": 0.0002565498526574657, + "loss": 0.4389, + "step": 128670 + }, + { + "epoch": 5.44, + "learning_rate": 0.00025650745192817316, + "loss": 0.462, + "step": 128680 + }, + { + "epoch": 5.44, + "learning_rate": 0.00025646505119888063, + "loss": 0.4802, + "step": 128690 + }, + { + "epoch": 5.44, + "learning_rate": 0.0002564226504695881, + "loss": 0.4346, + "step": 128700 + }, + { + "epoch": 5.44, + "learning_rate": 0.0002563802497402955, + "loss": 0.4848, + "step": 128710 + }, + { + "epoch": 5.44, + "learning_rate": 0.00025633784901100303, + "loss": 0.4709, + "step": 128720 + }, + { + "epoch": 5.44, + "learning_rate": 0.00025629544828171044, + "loss": 0.4429, + "step": 128730 + }, + { + "epoch": 5.44, + "learning_rate": 0.0002562530475524179, + "loss": 0.4682, + "step": 128740 + }, + { + "epoch": 5.44, + "learning_rate": 0.0002562106468231254, + "loss": 0.4954, + "step": 128750 + }, + { + "epoch": 5.45, + "learning_rate": 0.00025616824609383284, + "loss": 0.5419, + "step": 128760 + }, + { + "epoch": 5.45, + "learning_rate": 0.00025612584536454026, + "loss": 0.4956, + "step": 128770 + }, + { + "epoch": 5.45, + "learning_rate": 0.0002560834446352478, + "loss": 0.3776, + "step": 128780 + }, + { + "epoch": 5.45, + "learning_rate": 0.0002560410439059552, + "loss": 0.4733, + "step": 128790 + }, + { + "epoch": 5.45, + "learning_rate": 0.00025599864317666266, + "loss": 0.5033, + "step": 128800 + }, + { + "epoch": 5.45, + "learning_rate": 0.00025595624244737007, + "loss": 0.4125, + "step": 128810 + }, + { + "epoch": 5.45, + "learning_rate": 0.0002559138417180776, + "loss": 0.4626, + "step": 128820 + }, + { + "epoch": 5.45, + "learning_rate": 0.000255871440988785, + "loss": 0.4539, + "step": 128830 + }, + { + "epoch": 5.45, + "learning_rate": 0.00025582904025949247, + "loss": 0.3871, + "step": 128840 + }, + { + "epoch": 5.45, + "learning_rate": 0.00025578663953019994, + "loss": 0.4886, + "step": 128850 + }, + { + "epoch": 5.45, + "learning_rate": 0.0002557442388009074, + "loss": 0.4688, + "step": 128860 + }, + { + "epoch": 5.45, + "learning_rate": 0.0002557018380716148, + "loss": 0.3917, + "step": 128870 + }, + { + "epoch": 5.45, + "learning_rate": 0.00025565943734232234, + "loss": 0.4683, + "step": 128880 + }, + { + "epoch": 5.45, + "learning_rate": 0.00025561703661302975, + "loss": 0.5167, + "step": 128890 + }, + { + "epoch": 5.45, + "learning_rate": 0.0002555746358837372, + "loss": 0.4346, + "step": 128900 + }, + { + "epoch": 5.45, + "learning_rate": 0.0002555322351544447, + "loss": 0.4964, + "step": 128910 + }, + { + "epoch": 5.45, + "learning_rate": 0.00025548983442515215, + "loss": 0.4955, + "step": 128920 + }, + { + "epoch": 5.45, + "learning_rate": 0.00025544743369585956, + "loss": 0.4628, + "step": 128930 + }, + { + "epoch": 5.45, + "learning_rate": 0.0002554050329665671, + "loss": 0.5195, + "step": 128940 + }, + { + "epoch": 5.45, + "learning_rate": 0.0002553626322372745, + "loss": 0.4455, + "step": 128950 + }, + { + "epoch": 5.45, + "learning_rate": 0.00025532023150798196, + "loss": 0.4941, + "step": 128960 + }, + { + "epoch": 5.45, + "learning_rate": 0.0002552778307786894, + "loss": 0.3866, + "step": 128970 + }, + { + "epoch": 5.45, + "learning_rate": 0.0002552354300493969, + "loss": 0.4085, + "step": 128980 + }, + { + "epoch": 5.45, + "learning_rate": 0.0002551930293201043, + "loss": 0.426, + "step": 128990 + }, + { + "epoch": 5.46, + "learning_rate": 0.0002551506285908118, + "loss": 0.4419, + "step": 129000 + }, + { + "epoch": 5.46, + "learning_rate": 0.00025510822786151924, + "loss": 0.4158, + "step": 129010 + }, + { + "epoch": 5.46, + "learning_rate": 0.0002550658271322267, + "loss": 0.5339, + "step": 129020 + }, + { + "epoch": 5.46, + "learning_rate": 0.0002550234264029341, + "loss": 0.5044, + "step": 129030 + }, + { + "epoch": 5.46, + "learning_rate": 0.00025498102567364164, + "loss": 0.4911, + "step": 129040 + }, + { + "epoch": 5.46, + "learning_rate": 0.00025493862494434906, + "loss": 0.417, + "step": 129050 + }, + { + "epoch": 5.46, + "learning_rate": 0.0002548962242150565, + "loss": 0.4684, + "step": 129060 + }, + { + "epoch": 5.46, + "learning_rate": 0.000254853823485764, + "loss": 0.3659, + "step": 129070 + }, + { + "epoch": 5.46, + "learning_rate": 0.00025481142275647146, + "loss": 0.4303, + "step": 129080 + }, + { + "epoch": 5.46, + "learning_rate": 0.00025476902202717887, + "loss": 0.4247, + "step": 129090 + }, + { + "epoch": 5.46, + "learning_rate": 0.00025472662129788633, + "loss": 0.3772, + "step": 129100 + }, + { + "epoch": 5.46, + "learning_rate": 0.0002546842205685938, + "loss": 0.4825, + "step": 129110 + }, + { + "epoch": 5.46, + "learning_rate": 0.00025464181983930127, + "loss": 0.49, + "step": 129120 + }, + { + "epoch": 5.46, + "learning_rate": 0.0002545994191100087, + "loss": 0.4754, + "step": 129130 + }, + { + "epoch": 5.46, + "learning_rate": 0.00025455701838071615, + "loss": 0.3475, + "step": 129140 + }, + { + "epoch": 5.46, + "learning_rate": 0.0002545146176514236, + "loss": 0.4699, + "step": 129150 + }, + { + "epoch": 5.46, + "learning_rate": 0.0002544722169221311, + "loss": 0.4739, + "step": 129160 + }, + { + "epoch": 5.46, + "learning_rate": 0.00025442981619283855, + "loss": 0.4621, + "step": 129170 + }, + { + "epoch": 5.46, + "learning_rate": 0.00025438741546354596, + "loss": 0.5021, + "step": 129180 + }, + { + "epoch": 5.46, + "learning_rate": 0.00025434501473425343, + "loss": 0.4638, + "step": 129190 + }, + { + "epoch": 5.46, + "learning_rate": 0.0002543026140049609, + "loss": 0.4506, + "step": 129200 + }, + { + "epoch": 5.46, + "learning_rate": 0.00025426021327566836, + "loss": 0.4732, + "step": 129210 + }, + { + "epoch": 5.46, + "learning_rate": 0.0002542178125463758, + "loss": 0.4113, + "step": 129220 + }, + { + "epoch": 5.46, + "learning_rate": 0.0002541754118170833, + "loss": 0.4781, + "step": 129230 + }, + { + "epoch": 5.47, + "learning_rate": 0.0002541330110877907, + "loss": 0.4179, + "step": 129240 + }, + { + "epoch": 5.47, + "learning_rate": 0.0002540906103584982, + "loss": 0.4411, + "step": 129250 + }, + { + "epoch": 5.47, + "learning_rate": 0.0002540482096292056, + "loss": 0.5754, + "step": 129260 + }, + { + "epoch": 5.47, + "learning_rate": 0.0002540058088999131, + "loss": 0.3932, + "step": 129270 + }, + { + "epoch": 5.47, + "learning_rate": 0.0002539634081706205, + "loss": 0.3936, + "step": 129280 + }, + { + "epoch": 5.47, + "learning_rate": 0.000253921007441328, + "loss": 0.429, + "step": 129290 + }, + { + "epoch": 5.47, + "learning_rate": 0.00025387860671203545, + "loss": 0.4513, + "step": 129300 + }, + { + "epoch": 5.47, + "learning_rate": 0.0002538362059827429, + "loss": 0.3776, + "step": 129310 + }, + { + "epoch": 5.47, + "learning_rate": 0.00025379380525345033, + "loss": 0.3979, + "step": 129320 + }, + { + "epoch": 5.47, + "learning_rate": 0.00025375140452415785, + "loss": 0.4457, + "step": 129330 + }, + { + "epoch": 5.47, + "learning_rate": 0.00025370900379486527, + "loss": 0.426, + "step": 129340 + }, + { + "epoch": 5.47, + "learning_rate": 0.00025366660306557273, + "loss": 0.4459, + "step": 129350 + }, + { + "epoch": 5.47, + "learning_rate": 0.0002536242023362802, + "loss": 0.388, + "step": 129360 + }, + { + "epoch": 5.47, + "learning_rate": 0.00025358180160698767, + "loss": 0.4172, + "step": 129370 + }, + { + "epoch": 5.47, + "learning_rate": 0.0002535394008776951, + "loss": 0.487, + "step": 129380 + }, + { + "epoch": 5.47, + "learning_rate": 0.0002534970001484026, + "loss": 0.4597, + "step": 129390 + }, + { + "epoch": 5.47, + "learning_rate": 0.00025345459941911, + "loss": 0.3926, + "step": 129400 + }, + { + "epoch": 5.47, + "learning_rate": 0.0002534121986898175, + "loss": 0.4983, + "step": 129410 + }, + { + "epoch": 5.47, + "learning_rate": 0.0002533697979605249, + "loss": 0.5037, + "step": 129420 + }, + { + "epoch": 5.47, + "learning_rate": 0.0002533273972312324, + "loss": 0.448, + "step": 129430 + }, + { + "epoch": 5.47, + "learning_rate": 0.0002532849965019398, + "loss": 0.4672, + "step": 129440 + }, + { + "epoch": 5.47, + "learning_rate": 0.0002532425957726473, + "loss": 0.5306, + "step": 129450 + }, + { + "epoch": 5.47, + "learning_rate": 0.00025320019504335476, + "loss": 0.5274, + "step": 129460 + }, + { + "epoch": 5.48, + "learning_rate": 0.0002531577943140622, + "loss": 0.4508, + "step": 129470 + }, + { + "epoch": 5.48, + "learning_rate": 0.00025311539358476964, + "loss": 0.4033, + "step": 129480 + }, + { + "epoch": 5.48, + "learning_rate": 0.00025307299285547716, + "loss": 0.4455, + "step": 129490 + }, + { + "epoch": 5.48, + "learning_rate": 0.00025303059212618457, + "loss": 0.4352, + "step": 129500 + }, + { + "epoch": 5.48, + "learning_rate": 0.00025298819139689204, + "loss": 0.4052, + "step": 129510 + }, + { + "epoch": 5.48, + "learning_rate": 0.0002529457906675995, + "loss": 0.466, + "step": 129520 + }, + { + "epoch": 5.48, + "learning_rate": 0.000252903389938307, + "loss": 0.42, + "step": 129530 + }, + { + "epoch": 5.48, + "learning_rate": 0.0002528609892090144, + "loss": 0.507, + "step": 129540 + }, + { + "epoch": 5.48, + "learning_rate": 0.0002528185884797219, + "loss": 0.4606, + "step": 129550 + }, + { + "epoch": 5.48, + "learning_rate": 0.0002527761877504293, + "loss": 0.4265, + "step": 129560 + }, + { + "epoch": 5.48, + "learning_rate": 0.0002527337870211368, + "loss": 0.419, + "step": 129570 + }, + { + "epoch": 5.48, + "learning_rate": 0.0002526913862918442, + "loss": 0.4458, + "step": 129580 + }, + { + "epoch": 5.48, + "learning_rate": 0.0002526489855625517, + "loss": 0.4814, + "step": 129590 + }, + { + "epoch": 5.48, + "learning_rate": 0.00025260658483325913, + "loss": 0.453, + "step": 129600 + }, + { + "epoch": 5.48, + "learning_rate": 0.0002525641841039666, + "loss": 0.3841, + "step": 129610 + }, + { + "epoch": 5.48, + "learning_rate": 0.00025252178337467407, + "loss": 0.43, + "step": 129620 + }, + { + "epoch": 5.48, + "learning_rate": 0.00025247938264538153, + "loss": 0.4828, + "step": 129630 + }, + { + "epoch": 5.48, + "learning_rate": 0.00025243698191608895, + "loss": 0.4429, + "step": 129640 + }, + { + "epoch": 5.48, + "learning_rate": 0.00025239458118679647, + "loss": 0.4417, + "step": 129650 + }, + { + "epoch": 5.48, + "learning_rate": 0.0002523521804575039, + "loss": 0.4788, + "step": 129660 + }, + { + "epoch": 5.48, + "learning_rate": 0.00025230977972821135, + "loss": 0.4976, + "step": 129670 + }, + { + "epoch": 5.48, + "learning_rate": 0.0002522673789989188, + "loss": 0.4409, + "step": 129680 + }, + { + "epoch": 5.48, + "learning_rate": 0.0002522249782696263, + "loss": 0.3278, + "step": 129690 + }, + { + "epoch": 5.48, + "learning_rate": 0.0002521825775403337, + "loss": 0.4725, + "step": 129700 + }, + { + "epoch": 5.49, + "learning_rate": 0.00025214017681104116, + "loss": 0.5112, + "step": 129710 + }, + { + "epoch": 5.49, + "learning_rate": 0.0002520977760817486, + "loss": 0.4201, + "step": 129720 + }, + { + "epoch": 5.49, + "learning_rate": 0.0002520553753524561, + "loss": 0.5525, + "step": 129730 + }, + { + "epoch": 5.49, + "learning_rate": 0.0002520129746231635, + "loss": 0.4309, + "step": 129740 + }, + { + "epoch": 5.49, + "learning_rate": 0.00025197057389387097, + "loss": 0.4863, + "step": 129750 + }, + { + "epoch": 5.49, + "learning_rate": 0.00025192817316457844, + "loss": 0.4536, + "step": 129760 + }, + { + "epoch": 5.49, + "learning_rate": 0.0002518857724352859, + "loss": 0.4793, + "step": 129770 + }, + { + "epoch": 5.49, + "learning_rate": 0.00025184337170599337, + "loss": 0.4943, + "step": 129780 + }, + { + "epoch": 5.49, + "learning_rate": 0.00025180097097670084, + "loss": 0.5061, + "step": 129790 + }, + { + "epoch": 5.49, + "learning_rate": 0.00025175857024740825, + "loss": 0.4825, + "step": 129800 + }, + { + "epoch": 5.49, + "learning_rate": 0.0002517161695181157, + "loss": 0.469, + "step": 129810 + }, + { + "epoch": 5.49, + "learning_rate": 0.0002516737687888232, + "loss": 0.5403, + "step": 129820 + }, + { + "epoch": 5.49, + "learning_rate": 0.00025163136805953065, + "loss": 0.4106, + "step": 129830 + }, + { + "epoch": 5.49, + "learning_rate": 0.0002515889673302381, + "loss": 0.4106, + "step": 129840 + }, + { + "epoch": 5.49, + "learning_rate": 0.00025154656660094553, + "loss": 0.4373, + "step": 129850 + }, + { + "epoch": 5.49, + "learning_rate": 0.000251504165871653, + "loss": 0.3759, + "step": 129860 + }, + { + "epoch": 5.49, + "learning_rate": 0.00025146176514236046, + "loss": 0.4268, + "step": 129870 + }, + { + "epoch": 5.49, + "learning_rate": 0.00025141936441306793, + "loss": 0.4225, + "step": 129880 + }, + { + "epoch": 5.49, + "learning_rate": 0.00025137696368377534, + "loss": 0.4157, + "step": 129890 + }, + { + "epoch": 5.49, + "learning_rate": 0.0002513345629544828, + "loss": 0.5216, + "step": 129900 + }, + { + "epoch": 5.49, + "learning_rate": 0.0002512921622251903, + "loss": 0.4665, + "step": 129910 + }, + { + "epoch": 5.49, + "learning_rate": 0.00025124976149589774, + "loss": 0.5125, + "step": 129920 + }, + { + "epoch": 5.49, + "learning_rate": 0.00025120736076660516, + "loss": 0.5122, + "step": 129930 + }, + { + "epoch": 5.49, + "learning_rate": 0.0002511649600373127, + "loss": 0.3717, + "step": 129940 + }, + { + "epoch": 5.5, + "learning_rate": 0.0002511225593080201, + "loss": 0.4593, + "step": 129950 + }, + { + "epoch": 5.5, + "learning_rate": 0.00025108015857872756, + "loss": 0.4757, + "step": 129960 + }, + { + "epoch": 5.5, + "learning_rate": 0.000251037757849435, + "loss": 0.4289, + "step": 129970 + }, + { + "epoch": 5.5, + "learning_rate": 0.0002509953571201425, + "loss": 0.4425, + "step": 129980 + }, + { + "epoch": 5.5, + "learning_rate": 0.0002509529563908499, + "loss": 0.4299, + "step": 129990 + }, + { + "epoch": 5.5, + "learning_rate": 0.0002509105556615574, + "loss": 0.4679, + "step": 130000 + }, + { + "epoch": 5.5, + "eval_loss": 0.6127136945724487, + "eval_runtime": 337.643, + "eval_samples_per_second": 15.564, + "eval_steps_per_second": 3.892, + "step": 130000 + }, + { + "epoch": 5.5, + "learning_rate": 0.00025086815493226484, + "loss": 0.4309, + "step": 130010 + }, + { + "epoch": 5.5, + "learning_rate": 0.0002508257542029723, + "loss": 0.4469, + "step": 130020 + }, + { + "epoch": 5.5, + "learning_rate": 0.0002507833534736797, + "loss": 0.3861, + "step": 130030 + }, + { + "epoch": 5.5, + "learning_rate": 0.00025074095274438724, + "loss": 0.3999, + "step": 130040 + }, + { + "epoch": 5.5, + "learning_rate": 0.00025069855201509465, + "loss": 0.4197, + "step": 130050 + }, + { + "epoch": 5.5, + "learning_rate": 0.0002506561512858021, + "loss": 0.4518, + "step": 130060 + }, + { + "epoch": 5.5, + "learning_rate": 0.0002506137505565096, + "loss": 0.4336, + "step": 130070 + }, + { + "epoch": 5.5, + "learning_rate": 0.00025057134982721705, + "loss": 0.4566, + "step": 130080 + }, + { + "epoch": 5.5, + "learning_rate": 0.00025052894909792446, + "loss": 0.4498, + "step": 130090 + }, + { + "epoch": 5.5, + "learning_rate": 0.000250486548368632, + "loss": 0.4629, + "step": 130100 + }, + { + "epoch": 5.5, + "learning_rate": 0.0002504441476393394, + "loss": 0.4943, + "step": 130110 + }, + { + "epoch": 5.5, + "learning_rate": 0.00025040174691004686, + "loss": 0.4331, + "step": 130120 + }, + { + "epoch": 5.5, + "learning_rate": 0.00025035934618075433, + "loss": 0.4425, + "step": 130130 + }, + { + "epoch": 5.5, + "learning_rate": 0.0002503169454514618, + "loss": 0.4086, + "step": 130140 + }, + { + "epoch": 5.5, + "learning_rate": 0.0002502745447221692, + "loss": 0.4575, + "step": 130150 + }, + { + "epoch": 5.5, + "learning_rate": 0.00025023214399287673, + "loss": 0.5048, + "step": 130160 + }, + { + "epoch": 5.5, + "learning_rate": 0.00025018974326358414, + "loss": 0.4861, + "step": 130170 + }, + { + "epoch": 5.51, + "learning_rate": 0.0002501473425342916, + "loss": 0.4411, + "step": 130180 + }, + { + "epoch": 5.51, + "learning_rate": 0.000250104941804999, + "loss": 0.508, + "step": 130190 + }, + { + "epoch": 5.51, + "learning_rate": 0.00025006254107570654, + "loss": 0.4431, + "step": 130200 + }, + { + "epoch": 5.51, + "learning_rate": 0.00025002014034641396, + "loss": 0.458, + "step": 130210 + }, + { + "epoch": 5.51, + "learning_rate": 0.0002499777396171214, + "loss": 0.4728, + "step": 130220 + }, + { + "epoch": 5.51, + "learning_rate": 0.0002499353388878289, + "loss": 0.4582, + "step": 130230 + }, + { + "epoch": 5.51, + "learning_rate": 0.00024989293815853636, + "loss": 0.4168, + "step": 130240 + }, + { + "epoch": 5.51, + "learning_rate": 0.00024985053742924377, + "loss": 0.4125, + "step": 130250 + }, + { + "epoch": 5.51, + "learning_rate": 0.0002498081366999513, + "loss": 0.4813, + "step": 130260 + }, + { + "epoch": 5.51, + "learning_rate": 0.0002497657359706587, + "loss": 0.5304, + "step": 130270 + }, + { + "epoch": 5.51, + "learning_rate": 0.00024972333524136617, + "loss": 0.3963, + "step": 130280 + }, + { + "epoch": 5.51, + "learning_rate": 0.00024968093451207364, + "loss": 0.4248, + "step": 130290 + }, + { + "epoch": 5.51, + "learning_rate": 0.0002496385337827811, + "loss": 0.409, + "step": 130300 + }, + { + "epoch": 5.51, + "learning_rate": 0.0002495961330534885, + "loss": 0.4499, + "step": 130310 + }, + { + "epoch": 5.51, + "learning_rate": 0.00024955373232419604, + "loss": 0.479, + "step": 130320 + }, + { + "epoch": 5.51, + "learning_rate": 0.00024951133159490345, + "loss": 0.4674, + "step": 130330 + }, + { + "epoch": 5.51, + "learning_rate": 0.0002494689308656109, + "loss": 0.451, + "step": 130340 + }, + { + "epoch": 5.51, + "learning_rate": 0.00024942653013631833, + "loss": 0.4361, + "step": 130350 + }, + { + "epoch": 5.51, + "learning_rate": 0.00024938412940702585, + "loss": 0.4576, + "step": 130360 + }, + { + "epoch": 5.51, + "learning_rate": 0.00024934172867773326, + "loss": 0.478, + "step": 130370 + }, + { + "epoch": 5.51, + "learning_rate": 0.00024929932794844073, + "loss": 0.5265, + "step": 130380 + }, + { + "epoch": 5.51, + "learning_rate": 0.0002492569272191482, + "loss": 0.4826, + "step": 130390 + }, + { + "epoch": 5.51, + "learning_rate": 0.00024921452648985566, + "loss": 0.4756, + "step": 130400 + }, + { + "epoch": 5.51, + "learning_rate": 0.0002491721257605631, + "loss": 0.3666, + "step": 130410 + }, + { + "epoch": 5.52, + "learning_rate": 0.00024912972503127054, + "loss": 0.5519, + "step": 130420 + }, + { + "epoch": 5.52, + "learning_rate": 0.000249087324301978, + "loss": 0.4781, + "step": 130430 + }, + { + "epoch": 5.52, + "learning_rate": 0.0002490449235726855, + "loss": 0.3722, + "step": 130440 + }, + { + "epoch": 5.52, + "learning_rate": 0.00024900252284339294, + "loss": 0.4238, + "step": 130450 + }, + { + "epoch": 5.52, + "learning_rate": 0.00024896012211410035, + "loss": 0.4656, + "step": 130460 + }, + { + "epoch": 5.52, + "learning_rate": 0.0002489177213848078, + "loss": 0.5112, + "step": 130470 + }, + { + "epoch": 5.52, + "learning_rate": 0.0002488753206555153, + "loss": 0.439, + "step": 130480 + }, + { + "epoch": 5.52, + "learning_rate": 0.00024883291992622275, + "loss": 0.4698, + "step": 130490 + }, + { + "epoch": 5.52, + "learning_rate": 0.00024879051919693017, + "loss": 0.4497, + "step": 130500 + }, + { + "epoch": 5.52, + "learning_rate": 0.00024874811846763763, + "loss": 0.437, + "step": 130510 + }, + { + "epoch": 5.52, + "learning_rate": 0.0002487057177383451, + "loss": 0.4478, + "step": 130520 + }, + { + "epoch": 5.52, + "learning_rate": 0.00024866331700905257, + "loss": 0.4288, + "step": 130530 + }, + { + "epoch": 5.52, + "learning_rate": 0.00024862091627976003, + "loss": 0.3795, + "step": 130540 + }, + { + "epoch": 5.52, + "learning_rate": 0.0002485785155504675, + "loss": 0.4048, + "step": 130550 + }, + { + "epoch": 5.52, + "learning_rate": 0.0002485361148211749, + "loss": 0.4414, + "step": 130560 + }, + { + "epoch": 5.52, + "learning_rate": 0.0002484937140918824, + "loss": 0.5314, + "step": 130570 + }, + { + "epoch": 5.52, + "learning_rate": 0.00024845131336258985, + "loss": 0.4457, + "step": 130580 + }, + { + "epoch": 5.52, + "learning_rate": 0.0002484089126332973, + "loss": 0.5407, + "step": 130590 + }, + { + "epoch": 5.52, + "learning_rate": 0.0002483665119040047, + "loss": 0.5084, + "step": 130600 + }, + { + "epoch": 5.52, + "learning_rate": 0.00024832411117471225, + "loss": 0.524, + "step": 130610 + }, + { + "epoch": 5.52, + "learning_rate": 0.00024828171044541966, + "loss": 0.4464, + "step": 130620 + }, + { + "epoch": 5.52, + "learning_rate": 0.00024823930971612713, + "loss": 0.4625, + "step": 130630 + }, + { + "epoch": 5.52, + "learning_rate": 0.00024819690898683454, + "loss": 0.4718, + "step": 130640 + }, + { + "epoch": 5.53, + "learning_rate": 0.00024815450825754206, + "loss": 0.4774, + "step": 130650 + }, + { + "epoch": 5.53, + "learning_rate": 0.0002481121075282495, + "loss": 0.4865, + "step": 130660 + }, + { + "epoch": 5.53, + "learning_rate": 0.00024806970679895694, + "loss": 0.3866, + "step": 130670 + }, + { + "epoch": 5.53, + "learning_rate": 0.0002480273060696644, + "loss": 0.4968, + "step": 130680 + }, + { + "epoch": 5.53, + "learning_rate": 0.0002479849053403719, + "loss": 0.4642, + "step": 130690 + }, + { + "epoch": 5.53, + "learning_rate": 0.0002479425046110793, + "loss": 0.5064, + "step": 130700 + }, + { + "epoch": 5.53, + "learning_rate": 0.0002479001038817868, + "loss": 0.55, + "step": 130710 + }, + { + "epoch": 5.53, + "learning_rate": 0.0002478577031524942, + "loss": 0.4496, + "step": 130720 + }, + { + "epoch": 5.53, + "learning_rate": 0.0002478153024232017, + "loss": 0.4034, + "step": 130730 + }, + { + "epoch": 5.53, + "learning_rate": 0.00024777290169390915, + "loss": 0.6204, + "step": 130740 + }, + { + "epoch": 5.53, + "learning_rate": 0.0002477305009646166, + "loss": 0.4856, + "step": 130750 + }, + { + "epoch": 5.53, + "learning_rate": 0.00024768810023532403, + "loss": 0.4269, + "step": 130760 + }, + { + "epoch": 5.53, + "learning_rate": 0.00024764569950603155, + "loss": 0.4328, + "step": 130770 + }, + { + "epoch": 5.53, + "learning_rate": 0.00024760329877673897, + "loss": 0.4681, + "step": 130780 + }, + { + "epoch": 5.53, + "learning_rate": 0.00024756089804744643, + "loss": 0.4792, + "step": 130790 + }, + { + "epoch": 5.53, + "learning_rate": 0.00024751849731815385, + "loss": 0.3939, + "step": 130800 + }, + { + "epoch": 5.53, + "learning_rate": 0.00024747609658886137, + "loss": 0.4916, + "step": 130810 + }, + { + "epoch": 5.53, + "learning_rate": 0.0002474336958595688, + "loss": 0.4871, + "step": 130820 + }, + { + "epoch": 5.53, + "learning_rate": 0.00024739129513027625, + "loss": 0.3534, + "step": 130830 + }, + { + "epoch": 5.53, + "learning_rate": 0.0002473488944009837, + "loss": 0.4695, + "step": 130840 + }, + { + "epoch": 5.53, + "learning_rate": 0.0002473064936716912, + "loss": 0.4645, + "step": 130850 + }, + { + "epoch": 5.53, + "learning_rate": 0.0002472640929423986, + "loss": 0.4462, + "step": 130860 + }, + { + "epoch": 5.53, + "learning_rate": 0.0002472216922131061, + "loss": 0.4224, + "step": 130870 + }, + { + "epoch": 5.53, + "learning_rate": 0.0002471792914838135, + "loss": 0.4561, + "step": 130880 + }, + { + "epoch": 5.54, + "learning_rate": 0.000247136890754521, + "loss": 0.4656, + "step": 130890 + }, + { + "epoch": 5.54, + "learning_rate": 0.00024709449002522846, + "loss": 0.4898, + "step": 130900 + }, + { + "epoch": 5.54, + "learning_rate": 0.0002470520892959359, + "loss": 0.4614, + "step": 130910 + }, + { + "epoch": 5.54, + "learning_rate": 0.00024700968856664334, + "loss": 0.4656, + "step": 130920 + }, + { + "epoch": 5.54, + "learning_rate": 0.00024696728783735086, + "loss": 0.3796, + "step": 130930 + }, + { + "epoch": 5.54, + "learning_rate": 0.00024692488710805827, + "loss": 0.4683, + "step": 130940 + }, + { + "epoch": 5.54, + "learning_rate": 0.00024688248637876574, + "loss": 0.5279, + "step": 130950 + }, + { + "epoch": 5.54, + "learning_rate": 0.00024684008564947315, + "loss": 0.4602, + "step": 130960 + }, + { + "epoch": 5.54, + "learning_rate": 0.00024679768492018067, + "loss": 0.4597, + "step": 130970 + }, + { + "epoch": 5.54, + "learning_rate": 0.0002467552841908881, + "loss": 0.462, + "step": 130980 + }, + { + "epoch": 5.54, + "learning_rate": 0.00024671288346159555, + "loss": 0.4378, + "step": 130990 + }, + { + "epoch": 5.54, + "learning_rate": 0.000246670482732303, + "loss": 0.4747, + "step": 131000 + }, + { + "epoch": 5.54, + "learning_rate": 0.0002466280820030105, + "loss": 0.4743, + "step": 131010 + }, + { + "epoch": 5.54, + "learning_rate": 0.0002465856812737179, + "loss": 0.4269, + "step": 131020 + }, + { + "epoch": 5.54, + "learning_rate": 0.0002465432805444254, + "loss": 0.4573, + "step": 131030 + }, + { + "epoch": 5.54, + "learning_rate": 0.00024650087981513283, + "loss": 0.4598, + "step": 131040 + }, + { + "epoch": 5.54, + "learning_rate": 0.0002464584790858403, + "loss": 0.4531, + "step": 131050 + }, + { + "epoch": 5.54, + "learning_rate": 0.00024641607835654777, + "loss": 0.4261, + "step": 131060 + }, + { + "epoch": 5.54, + "learning_rate": 0.00024637367762725523, + "loss": 0.4044, + "step": 131070 + }, + { + "epoch": 5.54, + "learning_rate": 0.00024633127689796264, + "loss": 0.4225, + "step": 131080 + }, + { + "epoch": 5.54, + "learning_rate": 0.0002462888761686701, + "loss": 0.4355, + "step": 131090 + }, + { + "epoch": 5.54, + "learning_rate": 0.0002462464754393776, + "loss": 0.5009, + "step": 131100 + }, + { + "epoch": 5.54, + "learning_rate": 0.00024620407471008505, + "loss": 0.4637, + "step": 131110 + }, + { + "epoch": 5.54, + "learning_rate": 0.00024616167398079246, + "loss": 0.4586, + "step": 131120 + }, + { + "epoch": 5.55, + "learning_rate": 0.0002461192732514999, + "loss": 0.5421, + "step": 131130 + }, + { + "epoch": 5.55, + "learning_rate": 0.0002460768725222074, + "loss": 0.5207, + "step": 131140 + }, + { + "epoch": 5.55, + "learning_rate": 0.00024603447179291486, + "loss": 0.4894, + "step": 131150 + }, + { + "epoch": 5.55, + "learning_rate": 0.0002459920710636223, + "loss": 0.4054, + "step": 131160 + }, + { + "epoch": 5.55, + "learning_rate": 0.00024594967033432974, + "loss": 0.4834, + "step": 131170 + }, + { + "epoch": 5.55, + "learning_rate": 0.0002459072696050372, + "loss": 0.4408, + "step": 131180 + }, + { + "epoch": 5.55, + "learning_rate": 0.00024586486887574467, + "loss": 0.4787, + "step": 131190 + }, + { + "epoch": 5.55, + "learning_rate": 0.00024582246814645214, + "loss": 0.3763, + "step": 131200 + }, + { + "epoch": 5.55, + "learning_rate": 0.00024578006741715955, + "loss": 0.4691, + "step": 131210 + }, + { + "epoch": 5.55, + "learning_rate": 0.00024573766668786707, + "loss": 0.4857, + "step": 131220 + }, + { + "epoch": 5.55, + "learning_rate": 0.0002456952659585745, + "loss": 0.4401, + "step": 131230 + }, + { + "epoch": 5.55, + "learning_rate": 0.00024565286522928195, + "loss": 0.455, + "step": 131240 + }, + { + "epoch": 5.55, + "learning_rate": 0.00024561046449998936, + "loss": 0.386, + "step": 131250 + }, + { + "epoch": 5.55, + "learning_rate": 0.0002455680637706969, + "loss": 0.428, + "step": 131260 + }, + { + "epoch": 5.55, + "learning_rate": 0.0002455256630414043, + "loss": 0.5376, + "step": 131270 + }, + { + "epoch": 5.55, + "learning_rate": 0.00024548326231211176, + "loss": 0.47, + "step": 131280 + }, + { + "epoch": 5.55, + "learning_rate": 0.00024544086158281923, + "loss": 0.4121, + "step": 131290 + }, + { + "epoch": 5.55, + "learning_rate": 0.0002453984608535267, + "loss": 0.4817, + "step": 131300 + }, + { + "epoch": 5.55, + "learning_rate": 0.0002453560601242341, + "loss": 0.4511, + "step": 131310 + }, + { + "epoch": 5.55, + "learning_rate": 0.00024531365939494163, + "loss": 0.5175, + "step": 131320 + }, + { + "epoch": 5.55, + "learning_rate": 0.00024527125866564904, + "loss": 0.4745, + "step": 131330 + }, + { + "epoch": 5.55, + "learning_rate": 0.0002452288579363565, + "loss": 0.5107, + "step": 131340 + }, + { + "epoch": 5.55, + "learning_rate": 0.000245186457207064, + "loss": 0.3634, + "step": 131350 + }, + { + "epoch": 5.56, + "learning_rate": 0.00024514405647777144, + "loss": 0.4309, + "step": 131360 + }, + { + "epoch": 5.56, + "learning_rate": 0.00024510165574847886, + "loss": 0.4568, + "step": 131370 + }, + { + "epoch": 5.56, + "learning_rate": 0.0002450592550191864, + "loss": 0.4735, + "step": 131380 + }, + { + "epoch": 5.56, + "learning_rate": 0.0002450168542898938, + "loss": 0.4354, + "step": 131390 + }, + { + "epoch": 5.56, + "learning_rate": 0.00024497445356060126, + "loss": 0.5315, + "step": 131400 + }, + { + "epoch": 5.56, + "learning_rate": 0.00024493205283130867, + "loss": 0.468, + "step": 131410 + }, + { + "epoch": 5.56, + "learning_rate": 0.0002448896521020162, + "loss": 0.4249, + "step": 131420 + }, + { + "epoch": 5.56, + "learning_rate": 0.0002448472513727236, + "loss": 0.485, + "step": 131430 + }, + { + "epoch": 5.56, + "learning_rate": 0.00024480485064343107, + "loss": 0.5003, + "step": 131440 + }, + { + "epoch": 5.56, + "learning_rate": 0.00024476244991413854, + "loss": 0.4565, + "step": 131450 + }, + { + "epoch": 5.56, + "learning_rate": 0.000244720049184846, + "loss": 0.4301, + "step": 131460 + }, + { + "epoch": 5.56, + "learning_rate": 0.0002446776484555534, + "loss": 0.4722, + "step": 131470 + }, + { + "epoch": 5.56, + "learning_rate": 0.00024463524772626094, + "loss": 0.486, + "step": 131480 + }, + { + "epoch": 5.56, + "learning_rate": 0.00024459284699696835, + "loss": 0.4708, + "step": 131490 + }, + { + "epoch": 5.56, + "learning_rate": 0.0002445504462676758, + "loss": 0.4265, + "step": 131500 + }, + { + "epoch": 5.56, + "learning_rate": 0.0002445080455383833, + "loss": 0.4149, + "step": 131510 + }, + { + "epoch": 5.56, + "learning_rate": 0.00024446564480909075, + "loss": 0.5085, + "step": 131520 + }, + { + "epoch": 5.56, + "learning_rate": 0.00024442324407979816, + "loss": 0.4489, + "step": 131530 + }, + { + "epoch": 5.56, + "learning_rate": 0.0002443808433505057, + "loss": 0.3855, + "step": 131540 + }, + { + "epoch": 5.56, + "learning_rate": 0.0002443384426212131, + "loss": 0.4765, + "step": 131550 + }, + { + "epoch": 5.56, + "learning_rate": 0.00024429604189192056, + "loss": 0.4888, + "step": 131560 + }, + { + "epoch": 5.56, + "learning_rate": 0.000244253641162628, + "loss": 0.4107, + "step": 131570 + }, + { + "epoch": 5.56, + "learning_rate": 0.0002442112404333355, + "loss": 0.5054, + "step": 131580 + }, + { + "epoch": 5.56, + "learning_rate": 0.0002441688397040429, + "loss": 0.4437, + "step": 131590 + }, + { + "epoch": 5.57, + "learning_rate": 0.00024412643897475035, + "loss": 0.4619, + "step": 131600 + }, + { + "epoch": 5.57, + "learning_rate": 0.00024408403824545784, + "loss": 0.3901, + "step": 131610 + }, + { + "epoch": 5.57, + "learning_rate": 0.00024404163751616528, + "loss": 0.4833, + "step": 131620 + }, + { + "epoch": 5.57, + "learning_rate": 0.00024399923678687272, + "loss": 0.4509, + "step": 131630 + }, + { + "epoch": 5.57, + "learning_rate": 0.00024395683605758022, + "loss": 0.3997, + "step": 131640 + }, + { + "epoch": 5.57, + "learning_rate": 0.00024391443532828766, + "loss": 0.4734, + "step": 131650 + }, + { + "epoch": 5.57, + "learning_rate": 0.0002438720345989951, + "loss": 0.4813, + "step": 131660 + }, + { + "epoch": 5.57, + "learning_rate": 0.0002438296338697026, + "loss": 0.4079, + "step": 131670 + }, + { + "epoch": 5.57, + "learning_rate": 0.00024378723314041003, + "loss": 0.443, + "step": 131680 + }, + { + "epoch": 5.57, + "learning_rate": 0.00024374483241111747, + "loss": 0.4718, + "step": 131690 + }, + { + "epoch": 5.57, + "learning_rate": 0.00024370243168182496, + "loss": 0.3811, + "step": 131700 + }, + { + "epoch": 5.57, + "learning_rate": 0.0002436600309525324, + "loss": 0.6331, + "step": 131710 + }, + { + "epoch": 5.57, + "learning_rate": 0.00024361763022323984, + "loss": 0.4738, + "step": 131720 + }, + { + "epoch": 5.57, + "learning_rate": 0.00024357522949394728, + "loss": 0.4939, + "step": 131730 + }, + { + "epoch": 5.57, + "learning_rate": 0.00024353282876465478, + "loss": 0.3902, + "step": 131740 + }, + { + "epoch": 5.57, + "learning_rate": 0.00024349042803536221, + "loss": 0.4817, + "step": 131750 + }, + { + "epoch": 5.57, + "learning_rate": 0.00024344802730606965, + "loss": 0.4279, + "step": 131760 + }, + { + "epoch": 5.57, + "learning_rate": 0.00024340562657677715, + "loss": 0.4698, + "step": 131770 + }, + { + "epoch": 5.57, + "learning_rate": 0.0002433632258474846, + "loss": 0.4042, + "step": 131780 + }, + { + "epoch": 5.57, + "learning_rate": 0.00024332082511819203, + "loss": 0.4371, + "step": 131790 + }, + { + "epoch": 5.57, + "learning_rate": 0.00024327842438889952, + "loss": 0.4104, + "step": 131800 + }, + { + "epoch": 5.57, + "learning_rate": 0.00024323602365960696, + "loss": 0.495, + "step": 131810 + }, + { + "epoch": 5.57, + "learning_rate": 0.0002431936229303144, + "loss": 0.4523, + "step": 131820 + }, + { + "epoch": 5.57, + "learning_rate": 0.0002431512222010219, + "loss": 0.4635, + "step": 131830 + }, + { + "epoch": 5.58, + "learning_rate": 0.00024310882147172933, + "loss": 0.4205, + "step": 131840 + }, + { + "epoch": 5.58, + "learning_rate": 0.00024306642074243677, + "loss": 0.4654, + "step": 131850 + }, + { + "epoch": 5.58, + "learning_rate": 0.00024302402001314421, + "loss": 0.3801, + "step": 131860 + }, + { + "epoch": 5.58, + "learning_rate": 0.0002429816192838517, + "loss": 0.428, + "step": 131870 + }, + { + "epoch": 5.58, + "learning_rate": 0.00024293921855455915, + "loss": 0.5411, + "step": 131880 + }, + { + "epoch": 5.58, + "learning_rate": 0.0002428968178252666, + "loss": 0.4418, + "step": 131890 + }, + { + "epoch": 5.58, + "learning_rate": 0.00024285441709597408, + "loss": 0.4106, + "step": 131900 + }, + { + "epoch": 5.58, + "learning_rate": 0.00024281201636668152, + "loss": 0.5252, + "step": 131910 + }, + { + "epoch": 5.58, + "learning_rate": 0.00024276961563738896, + "loss": 0.3991, + "step": 131920 + }, + { + "epoch": 5.58, + "learning_rate": 0.00024272721490809645, + "loss": 0.4314, + "step": 131930 + }, + { + "epoch": 5.58, + "learning_rate": 0.0002426848141788039, + "loss": 0.516, + "step": 131940 + }, + { + "epoch": 5.58, + "learning_rate": 0.00024264241344951133, + "loss": 0.4007, + "step": 131950 + }, + { + "epoch": 5.58, + "learning_rate": 0.00024260001272021883, + "loss": 0.4274, + "step": 131960 + }, + { + "epoch": 5.58, + "learning_rate": 0.00024255761199092627, + "loss": 0.4225, + "step": 131970 + }, + { + "epoch": 5.58, + "learning_rate": 0.0002425152112616337, + "loss": 0.4819, + "step": 131980 + }, + { + "epoch": 5.58, + "learning_rate": 0.0002424728105323412, + "loss": 0.5004, + "step": 131990 + }, + { + "epoch": 5.58, + "learning_rate": 0.00024243040980304864, + "loss": 0.4286, + "step": 132000 + }, + { + "epoch": 5.58, + "learning_rate": 0.00024238800907375608, + "loss": 0.4781, + "step": 132010 + }, + { + "epoch": 5.58, + "learning_rate": 0.00024234560834446352, + "loss": 0.5058, + "step": 132020 + }, + { + "epoch": 5.58, + "learning_rate": 0.00024230320761517101, + "loss": 0.4995, + "step": 132030 + }, + { + "epoch": 5.58, + "learning_rate": 0.00024226080688587845, + "loss": 0.4377, + "step": 132040 + }, + { + "epoch": 5.58, + "learning_rate": 0.0002422184061565859, + "loss": 0.4288, + "step": 132050 + }, + { + "epoch": 5.58, + "learning_rate": 0.00024217600542729336, + "loss": 0.4574, + "step": 132060 + }, + { + "epoch": 5.59, + "learning_rate": 0.00024213360469800083, + "loss": 0.4114, + "step": 132070 + }, + { + "epoch": 5.59, + "learning_rate": 0.00024209120396870827, + "loss": 0.4623, + "step": 132080 + }, + { + "epoch": 5.59, + "learning_rate": 0.00024204880323941573, + "loss": 0.4163, + "step": 132090 + }, + { + "epoch": 5.59, + "learning_rate": 0.0002420064025101232, + "loss": 0.3597, + "step": 132100 + }, + { + "epoch": 5.59, + "learning_rate": 0.00024196400178083064, + "loss": 0.5151, + "step": 132110 + }, + { + "epoch": 5.59, + "learning_rate": 0.0002419216010515381, + "loss": 0.4841, + "step": 132120 + }, + { + "epoch": 5.59, + "learning_rate": 0.00024187920032224555, + "loss": 0.4358, + "step": 132130 + }, + { + "epoch": 5.59, + "learning_rate": 0.000241836799592953, + "loss": 0.4854, + "step": 132140 + }, + { + "epoch": 5.59, + "learning_rate": 0.00024179439886366048, + "loss": 0.503, + "step": 132150 + }, + { + "epoch": 5.59, + "learning_rate": 0.00024175199813436792, + "loss": 0.4871, + "step": 132160 + }, + { + "epoch": 5.59, + "learning_rate": 0.00024170959740507536, + "loss": 0.5192, + "step": 132170 + }, + { + "epoch": 5.59, + "learning_rate": 0.00024166719667578283, + "loss": 0.5609, + "step": 132180 + }, + { + "epoch": 5.59, + "learning_rate": 0.0002416247959464903, + "loss": 0.4858, + "step": 132190 + }, + { + "epoch": 5.59, + "learning_rate": 0.00024158239521719773, + "loss": 0.4977, + "step": 132200 + }, + { + "epoch": 5.59, + "learning_rate": 0.00024153999448790517, + "loss": 0.4674, + "step": 132210 + }, + { + "epoch": 5.59, + "learning_rate": 0.00024149759375861267, + "loss": 0.4462, + "step": 132220 + }, + { + "epoch": 5.59, + "learning_rate": 0.0002414551930293201, + "loss": 0.4921, + "step": 132230 + }, + { + "epoch": 5.59, + "learning_rate": 0.00024141279230002755, + "loss": 0.4351, + "step": 132240 + }, + { + "epoch": 5.59, + "learning_rate": 0.00024137039157073504, + "loss": 0.4831, + "step": 132250 + }, + { + "epoch": 5.59, + "learning_rate": 0.00024132799084144248, + "loss": 0.4457, + "step": 132260 + }, + { + "epoch": 5.59, + "learning_rate": 0.00024128559011214992, + "loss": 0.4741, + "step": 132270 + }, + { + "epoch": 5.59, + "learning_rate": 0.0002412431893828574, + "loss": 0.4757, + "step": 132280 + }, + { + "epoch": 5.59, + "learning_rate": 0.00024120078865356485, + "loss": 0.4567, + "step": 132290 + }, + { + "epoch": 5.59, + "learning_rate": 0.0002411583879242723, + "loss": 0.4034, + "step": 132300 + }, + { + "epoch": 5.6, + "learning_rate": 0.00024111598719497979, + "loss": 0.4515, + "step": 132310 + }, + { + "epoch": 5.6, + "learning_rate": 0.00024107358646568723, + "loss": 0.4608, + "step": 132320 + }, + { + "epoch": 5.6, + "learning_rate": 0.00024103118573639467, + "loss": 0.4141, + "step": 132330 + }, + { + "epoch": 5.6, + "learning_rate": 0.0002409887850071021, + "loss": 0.5332, + "step": 132340 + }, + { + "epoch": 5.6, + "learning_rate": 0.0002409463842778096, + "loss": 0.4427, + "step": 132350 + }, + { + "epoch": 5.6, + "learning_rate": 0.00024090398354851704, + "loss": 0.4188, + "step": 132360 + }, + { + "epoch": 5.6, + "learning_rate": 0.00024086158281922448, + "loss": 0.4277, + "step": 132370 + }, + { + "epoch": 5.6, + "learning_rate": 0.00024081918208993197, + "loss": 0.4432, + "step": 132380 + }, + { + "epoch": 5.6, + "learning_rate": 0.0002407767813606394, + "loss": 0.443, + "step": 132390 + }, + { + "epoch": 5.6, + "learning_rate": 0.00024073438063134685, + "loss": 0.453, + "step": 132400 + }, + { + "epoch": 5.6, + "learning_rate": 0.00024069197990205435, + "loss": 0.4597, + "step": 132410 + }, + { + "epoch": 5.6, + "learning_rate": 0.00024064957917276179, + "loss": 0.4516, + "step": 132420 + }, + { + "epoch": 5.6, + "learning_rate": 0.00024060717844346922, + "loss": 0.4831, + "step": 132430 + }, + { + "epoch": 5.6, + "learning_rate": 0.00024056477771417672, + "loss": 0.4021, + "step": 132440 + }, + { + "epoch": 5.6, + "learning_rate": 0.00024052237698488416, + "loss": 0.4796, + "step": 132450 + }, + { + "epoch": 5.6, + "learning_rate": 0.0002404799762555916, + "loss": 0.4955, + "step": 132460 + }, + { + "epoch": 5.6, + "learning_rate": 0.0002404375755262991, + "loss": 0.4238, + "step": 132470 + }, + { + "epoch": 5.6, + "learning_rate": 0.00024039517479700653, + "loss": 0.5683, + "step": 132480 + }, + { + "epoch": 5.6, + "learning_rate": 0.00024035277406771397, + "loss": 0.377, + "step": 132490 + }, + { + "epoch": 5.6, + "learning_rate": 0.0002403103733384214, + "loss": 0.4218, + "step": 132500 + }, + { + "epoch": 5.6, + "learning_rate": 0.0002402679726091289, + "loss": 0.4739, + "step": 132510 + }, + { + "epoch": 5.6, + "learning_rate": 0.00024022557187983634, + "loss": 0.4906, + "step": 132520 + }, + { + "epoch": 5.6, + "learning_rate": 0.00024018317115054378, + "loss": 0.4706, + "step": 132530 + }, + { + "epoch": 5.6, + "learning_rate": 0.00024014077042125128, + "loss": 0.4664, + "step": 132540 + }, + { + "epoch": 5.61, + "learning_rate": 0.00024009836969195872, + "loss": 0.4498, + "step": 132550 + }, + { + "epoch": 5.61, + "learning_rate": 0.00024005596896266616, + "loss": 0.426, + "step": 132560 + }, + { + "epoch": 5.61, + "learning_rate": 0.00024001356823337365, + "loss": 0.5092, + "step": 132570 + }, + { + "epoch": 5.61, + "learning_rate": 0.0002399711675040811, + "loss": 0.4337, + "step": 132580 + }, + { + "epoch": 5.61, + "learning_rate": 0.00023992876677478853, + "loss": 0.4767, + "step": 132590 + }, + { + "epoch": 5.61, + "learning_rate": 0.00023988636604549602, + "loss": 0.4226, + "step": 132600 + }, + { + "epoch": 5.61, + "learning_rate": 0.00023984396531620346, + "loss": 0.4798, + "step": 132610 + }, + { + "epoch": 5.61, + "learning_rate": 0.0002398015645869109, + "loss": 0.4762, + "step": 132620 + }, + { + "epoch": 5.61, + "learning_rate": 0.00023975916385761834, + "loss": 0.5711, + "step": 132630 + }, + { + "epoch": 5.61, + "learning_rate": 0.00023971676312832584, + "loss": 0.4756, + "step": 132640 + }, + { + "epoch": 5.61, + "learning_rate": 0.00023967436239903328, + "loss": 0.4509, + "step": 132650 + }, + { + "epoch": 5.61, + "learning_rate": 0.00023963196166974072, + "loss": 0.4357, + "step": 132660 + }, + { + "epoch": 5.61, + "learning_rate": 0.0002395895609404482, + "loss": 0.3958, + "step": 132670 + }, + { + "epoch": 5.61, + "learning_rate": 0.00023954716021115565, + "loss": 0.4342, + "step": 132680 + }, + { + "epoch": 5.61, + "learning_rate": 0.0002395047594818631, + "loss": 0.4805, + "step": 132690 + }, + { + "epoch": 5.61, + "learning_rate": 0.00023946235875257056, + "loss": 0.4059, + "step": 132700 + }, + { + "epoch": 5.61, + "learning_rate": 0.00023941995802327802, + "loss": 0.4255, + "step": 132710 + }, + { + "epoch": 5.61, + "learning_rate": 0.00023937755729398546, + "loss": 0.4999, + "step": 132720 + }, + { + "epoch": 5.61, + "learning_rate": 0.00023933515656469293, + "loss": 0.4911, + "step": 132730 + }, + { + "epoch": 5.61, + "learning_rate": 0.0002392927558354004, + "loss": 0.4289, + "step": 132740 + }, + { + "epoch": 5.61, + "learning_rate": 0.00023925035510610784, + "loss": 0.4462, + "step": 132750 + }, + { + "epoch": 5.61, + "learning_rate": 0.0002392079543768153, + "loss": 0.5012, + "step": 132760 + }, + { + "epoch": 5.61, + "learning_rate": 0.00023916555364752274, + "loss": 0.424, + "step": 132770 + }, + { + "epoch": 5.62, + "learning_rate": 0.0002391231529182302, + "loss": 0.3998, + "step": 132780 + }, + { + "epoch": 5.62, + "learning_rate": 0.00023908075218893765, + "loss": 0.4083, + "step": 132790 + }, + { + "epoch": 5.62, + "learning_rate": 0.00023903835145964512, + "loss": 0.4099, + "step": 132800 + }, + { + "epoch": 5.62, + "learning_rate": 0.00023899595073035256, + "loss": 0.4527, + "step": 132810 + }, + { + "epoch": 5.62, + "learning_rate": 0.00023895355000106002, + "loss": 0.4954, + "step": 132820 + }, + { + "epoch": 5.62, + "learning_rate": 0.0002389111492717675, + "loss": 0.461, + "step": 132830 + }, + { + "epoch": 5.62, + "learning_rate": 0.00023886874854247493, + "loss": 0.4651, + "step": 132840 + }, + { + "epoch": 5.62, + "learning_rate": 0.0002388263478131824, + "loss": 0.4762, + "step": 132850 + }, + { + "epoch": 5.62, + "learning_rate": 0.00023878394708388986, + "loss": 0.457, + "step": 132860 + }, + { + "epoch": 5.62, + "learning_rate": 0.0002387415463545973, + "loss": 0.4222, + "step": 132870 + }, + { + "epoch": 5.62, + "learning_rate": 0.00023869914562530474, + "loss": 0.4352, + "step": 132880 + }, + { + "epoch": 5.62, + "learning_rate": 0.00023865674489601224, + "loss": 0.4315, + "step": 132890 + }, + { + "epoch": 5.62, + "learning_rate": 0.00023861434416671968, + "loss": 0.4231, + "step": 132900 + }, + { + "epoch": 5.62, + "learning_rate": 0.00023857194343742712, + "loss": 0.4835, + "step": 132910 + }, + { + "epoch": 5.62, + "learning_rate": 0.0002385295427081346, + "loss": 0.4479, + "step": 132920 + }, + { + "epoch": 5.62, + "learning_rate": 0.00023848714197884205, + "loss": 0.3953, + "step": 132930 + }, + { + "epoch": 5.62, + "learning_rate": 0.0002384447412495495, + "loss": 0.4449, + "step": 132940 + }, + { + "epoch": 5.62, + "learning_rate": 0.00023840234052025693, + "loss": 0.4107, + "step": 132950 + }, + { + "epoch": 5.62, + "learning_rate": 0.00023835993979096442, + "loss": 0.4342, + "step": 132960 + }, + { + "epoch": 5.62, + "learning_rate": 0.00023831753906167186, + "loss": 0.4559, + "step": 132970 + }, + { + "epoch": 5.62, + "learning_rate": 0.0002382751383323793, + "loss": 0.4886, + "step": 132980 + }, + { + "epoch": 5.62, + "learning_rate": 0.0002382327376030868, + "loss": 0.4393, + "step": 132990 + }, + { + "epoch": 5.62, + "learning_rate": 0.00023819033687379424, + "loss": 0.4781, + "step": 133000 + }, + { + "epoch": 5.62, + "learning_rate": 0.00023814793614450168, + "loss": 0.4899, + "step": 133010 + }, + { + "epoch": 5.63, + "learning_rate": 0.00023810553541520917, + "loss": 0.432, + "step": 133020 + }, + { + "epoch": 5.63, + "learning_rate": 0.0002380631346859166, + "loss": 0.4553, + "step": 133030 + }, + { + "epoch": 5.63, + "learning_rate": 0.00023802073395662405, + "loss": 0.4564, + "step": 133040 + }, + { + "epoch": 5.63, + "learning_rate": 0.00023797833322733154, + "loss": 0.5364, + "step": 133050 + }, + { + "epoch": 5.63, + "learning_rate": 0.00023793593249803898, + "loss": 0.4775, + "step": 133060 + }, + { + "epoch": 5.63, + "learning_rate": 0.00023789353176874642, + "loss": 0.5024, + "step": 133070 + }, + { + "epoch": 5.63, + "learning_rate": 0.00023785113103945392, + "loss": 0.4937, + "step": 133080 + }, + { + "epoch": 5.63, + "learning_rate": 0.00023780873031016136, + "loss": 0.4167, + "step": 133090 + }, + { + "epoch": 5.63, + "learning_rate": 0.0002377663295808688, + "loss": 0.4475, + "step": 133100 + }, + { + "epoch": 5.63, + "learning_rate": 0.00023772392885157623, + "loss": 0.4744, + "step": 133110 + }, + { + "epoch": 5.63, + "learning_rate": 0.00023768152812228373, + "loss": 0.4594, + "step": 133120 + }, + { + "epoch": 5.63, + "learning_rate": 0.00023763912739299117, + "loss": 0.5758, + "step": 133130 + }, + { + "epoch": 5.63, + "learning_rate": 0.0002375967266636986, + "loss": 0.3897, + "step": 133140 + }, + { + "epoch": 5.63, + "learning_rate": 0.0002375543259344061, + "loss": 0.491, + "step": 133150 + }, + { + "epoch": 5.63, + "learning_rate": 0.00023751192520511354, + "loss": 0.4022, + "step": 133160 + }, + { + "epoch": 5.63, + "learning_rate": 0.00023746952447582098, + "loss": 0.4621, + "step": 133170 + }, + { + "epoch": 5.63, + "learning_rate": 0.00023742712374652848, + "loss": 0.3787, + "step": 133180 + }, + { + "epoch": 5.63, + "learning_rate": 0.00023738472301723591, + "loss": 0.4708, + "step": 133190 + }, + { + "epoch": 5.63, + "learning_rate": 0.00023734232228794335, + "loss": 0.4214, + "step": 133200 + }, + { + "epoch": 5.63, + "learning_rate": 0.00023729992155865085, + "loss": 0.3633, + "step": 133210 + }, + { + "epoch": 5.63, + "learning_rate": 0.0002372575208293583, + "loss": 0.4476, + "step": 133220 + }, + { + "epoch": 5.63, + "learning_rate": 0.00023721512010006573, + "loss": 0.4499, + "step": 133230 + }, + { + "epoch": 5.63, + "learning_rate": 0.00023717271937077317, + "loss": 0.4802, + "step": 133240 + }, + { + "epoch": 5.63, + "learning_rate": 0.00023713031864148066, + "loss": 0.5186, + "step": 133250 + }, + { + "epoch": 5.64, + "learning_rate": 0.0002370879179121881, + "loss": 0.4282, + "step": 133260 + }, + { + "epoch": 5.64, + "learning_rate": 0.00023704551718289554, + "loss": 0.4147, + "step": 133270 + }, + { + "epoch": 5.64, + "learning_rate": 0.00023700311645360303, + "loss": 0.4768, + "step": 133280 + }, + { + "epoch": 5.64, + "learning_rate": 0.00023696071572431047, + "loss": 0.4444, + "step": 133290 + }, + { + "epoch": 5.64, + "learning_rate": 0.00023691831499501791, + "loss": 0.4792, + "step": 133300 + }, + { + "epoch": 5.64, + "learning_rate": 0.0002368759142657254, + "loss": 0.4684, + "step": 133310 + }, + { + "epoch": 5.64, + "learning_rate": 0.00023683351353643285, + "loss": 0.5071, + "step": 133320 + }, + { + "epoch": 5.64, + "learning_rate": 0.0002367911128071403, + "loss": 0.4814, + "step": 133330 + }, + { + "epoch": 5.64, + "learning_rate": 0.00023674871207784775, + "loss": 0.4781, + "step": 133340 + }, + { + "epoch": 5.64, + "learning_rate": 0.00023670631134855522, + "loss": 0.452, + "step": 133350 + }, + { + "epoch": 5.64, + "learning_rate": 0.00023666391061926266, + "loss": 0.4028, + "step": 133360 + }, + { + "epoch": 5.64, + "learning_rate": 0.00023662150988997013, + "loss": 0.4842, + "step": 133370 + }, + { + "epoch": 5.64, + "learning_rate": 0.0002365791091606776, + "loss": 0.5428, + "step": 133380 + }, + { + "epoch": 5.64, + "learning_rate": 0.00023653670843138503, + "loss": 0.5021, + "step": 133390 + }, + { + "epoch": 5.64, + "learning_rate": 0.00023649430770209247, + "loss": 0.4718, + "step": 133400 + }, + { + "epoch": 5.64, + "learning_rate": 0.00023645190697279994, + "loss": 0.4061, + "step": 133410 + }, + { + "epoch": 5.64, + "learning_rate": 0.0002364095062435074, + "loss": 0.4146, + "step": 133420 + }, + { + "epoch": 5.64, + "learning_rate": 0.00023636710551421485, + "loss": 0.5243, + "step": 133430 + }, + { + "epoch": 5.64, + "learning_rate": 0.0002363247047849223, + "loss": 0.4355, + "step": 133440 + }, + { + "epoch": 5.64, + "learning_rate": 0.00023628230405562975, + "loss": 0.3948, + "step": 133450 + }, + { + "epoch": 5.64, + "learning_rate": 0.00023623990332633722, + "loss": 0.4378, + "step": 133460 + }, + { + "epoch": 5.64, + "learning_rate": 0.0002361975025970447, + "loss": 0.4562, + "step": 133470 + }, + { + "epoch": 5.64, + "learning_rate": 0.00023615510186775213, + "loss": 0.4271, + "step": 133480 + }, + { + "epoch": 5.65, + "learning_rate": 0.0002361127011384596, + "loss": 0.4621, + "step": 133490 + }, + { + "epoch": 5.65, + "learning_rate": 0.00023607030040916706, + "loss": 0.4219, + "step": 133500 + }, + { + "epoch": 5.65, + "learning_rate": 0.0002360278996798745, + "loss": 0.4507, + "step": 133510 + }, + { + "epoch": 5.65, + "learning_rate": 0.00023598549895058194, + "loss": 0.4482, + "step": 133520 + }, + { + "epoch": 5.65, + "learning_rate": 0.00023594309822128943, + "loss": 0.4889, + "step": 133530 + }, + { + "epoch": 5.65, + "learning_rate": 0.00023590069749199687, + "loss": 0.4564, + "step": 133540 + }, + { + "epoch": 5.65, + "learning_rate": 0.0002358582967627043, + "loss": 0.4001, + "step": 133550 + }, + { + "epoch": 5.65, + "learning_rate": 0.00023581589603341175, + "loss": 0.4209, + "step": 133560 + }, + { + "epoch": 5.65, + "learning_rate": 0.00023577349530411925, + "loss": 0.4345, + "step": 133570 + }, + { + "epoch": 5.65, + "learning_rate": 0.00023573109457482669, + "loss": 0.506, + "step": 133580 + }, + { + "epoch": 5.65, + "learning_rate": 0.00023568869384553413, + "loss": 0.4244, + "step": 133590 + }, + { + "epoch": 5.65, + "learning_rate": 0.00023564629311624162, + "loss": 0.3714, + "step": 133600 + }, + { + "epoch": 5.65, + "learning_rate": 0.00023560389238694906, + "loss": 0.4901, + "step": 133610 + }, + { + "epoch": 5.65, + "learning_rate": 0.0002355614916576565, + "loss": 0.3869, + "step": 133620 + }, + { + "epoch": 5.65, + "learning_rate": 0.000235519090928364, + "loss": 0.4765, + "step": 133630 + }, + { + "epoch": 5.65, + "learning_rate": 0.00023547669019907143, + "loss": 0.4975, + "step": 133640 + }, + { + "epoch": 5.65, + "learning_rate": 0.00023543428946977887, + "loss": 0.4557, + "step": 133650 + }, + { + "epoch": 5.65, + "learning_rate": 0.00023539188874048637, + "loss": 0.5623, + "step": 133660 + }, + { + "epoch": 5.65, + "learning_rate": 0.0002353494880111938, + "loss": 0.5288, + "step": 133670 + }, + { + "epoch": 5.65, + "learning_rate": 0.00023530708728190125, + "loss": 0.4574, + "step": 133680 + }, + { + "epoch": 5.65, + "learning_rate": 0.00023526468655260874, + "loss": 0.389, + "step": 133690 + }, + { + "epoch": 5.65, + "learning_rate": 0.00023522228582331618, + "loss": 0.4098, + "step": 133700 + }, + { + "epoch": 5.65, + "learning_rate": 0.00023517988509402362, + "loss": 0.4338, + "step": 133710 + }, + { + "epoch": 5.65, + "learning_rate": 0.00023513748436473106, + "loss": 0.4924, + "step": 133720 + }, + { + "epoch": 5.66, + "learning_rate": 0.00023509508363543855, + "loss": 0.3825, + "step": 133730 + }, + { + "epoch": 5.66, + "learning_rate": 0.000235052682906146, + "loss": 0.5269, + "step": 133740 + }, + { + "epoch": 5.66, + "learning_rate": 0.00023501028217685343, + "loss": 0.5303, + "step": 133750 + }, + { + "epoch": 5.66, + "learning_rate": 0.00023496788144756093, + "loss": 0.4723, + "step": 133760 + }, + { + "epoch": 5.66, + "learning_rate": 0.00023492548071826837, + "loss": 0.4592, + "step": 133770 + }, + { + "epoch": 5.66, + "learning_rate": 0.0002348830799889758, + "loss": 0.5037, + "step": 133780 + }, + { + "epoch": 5.66, + "learning_rate": 0.0002348406792596833, + "loss": 0.4459, + "step": 133790 + }, + { + "epoch": 5.66, + "learning_rate": 0.00023479827853039074, + "loss": 0.5153, + "step": 133800 + }, + { + "epoch": 5.66, + "learning_rate": 0.00023475587780109818, + "loss": 0.4893, + "step": 133810 + }, + { + "epoch": 5.66, + "learning_rate": 0.00023471347707180567, + "loss": 0.407, + "step": 133820 + }, + { + "epoch": 5.66, + "learning_rate": 0.0002346710763425131, + "loss": 0.4, + "step": 133830 + }, + { + "epoch": 5.66, + "learning_rate": 0.00023462867561322055, + "loss": 0.47, + "step": 133840 + }, + { + "epoch": 5.66, + "learning_rate": 0.00023458627488392805, + "loss": 0.42, + "step": 133850 + }, + { + "epoch": 5.66, + "learning_rate": 0.00023454387415463548, + "loss": 0.4781, + "step": 133860 + }, + { + "epoch": 5.66, + "learning_rate": 0.00023450147342534292, + "loss": 0.5447, + "step": 133870 + }, + { + "epoch": 5.66, + "learning_rate": 0.00023445907269605036, + "loss": 0.4471, + "step": 133880 + }, + { + "epoch": 5.66, + "learning_rate": 0.00023441667196675786, + "loss": 0.4492, + "step": 133890 + }, + { + "epoch": 5.66, + "learning_rate": 0.0002343742712374653, + "loss": 0.4352, + "step": 133900 + }, + { + "epoch": 5.66, + "learning_rate": 0.00023433187050817274, + "loss": 0.3919, + "step": 133910 + }, + { + "epoch": 5.66, + "learning_rate": 0.00023428946977888023, + "loss": 0.5184, + "step": 133920 + }, + { + "epoch": 5.66, + "learning_rate": 0.00023424706904958767, + "loss": 0.479, + "step": 133930 + }, + { + "epoch": 5.66, + "learning_rate": 0.0002342046683202951, + "loss": 0.5686, + "step": 133940 + }, + { + "epoch": 5.66, + "learning_rate": 0.0002341622675910026, + "loss": 0.6078, + "step": 133950 + }, + { + "epoch": 5.66, + "learning_rate": 0.00023411986686171004, + "loss": 0.4787, + "step": 133960 + }, + { + "epoch": 5.67, + "learning_rate": 0.00023407746613241748, + "loss": 0.5169, + "step": 133970 + }, + { + "epoch": 5.67, + "learning_rate": 0.00023403506540312498, + "loss": 0.4539, + "step": 133980 + }, + { + "epoch": 5.67, + "learning_rate": 0.00023399266467383242, + "loss": 0.406, + "step": 133990 + }, + { + "epoch": 5.67, + "learning_rate": 0.00023395026394453986, + "loss": 0.5042, + "step": 134000 + }, + { + "epoch": 5.67, + "learning_rate": 0.0002339078632152473, + "loss": 0.4391, + "step": 134010 + }, + { + "epoch": 5.67, + "learning_rate": 0.0002338654624859548, + "loss": 0.4467, + "step": 134020 + }, + { + "epoch": 5.67, + "learning_rate": 0.00023382306175666223, + "loss": 0.3878, + "step": 134030 + }, + { + "epoch": 5.67, + "learning_rate": 0.00023378066102736967, + "loss": 0.4474, + "step": 134040 + }, + { + "epoch": 5.67, + "learning_rate": 0.00023373826029807714, + "loss": 0.4788, + "step": 134050 + }, + { + "epoch": 5.67, + "learning_rate": 0.0002336958595687846, + "loss": 0.4587, + "step": 134060 + }, + { + "epoch": 5.67, + "learning_rate": 0.00023365345883949204, + "loss": 0.5032, + "step": 134070 + }, + { + "epoch": 5.67, + "learning_rate": 0.0002336110581101995, + "loss": 0.5117, + "step": 134080 + }, + { + "epoch": 5.67, + "learning_rate": 0.00023356865738090695, + "loss": 0.3946, + "step": 134090 + }, + { + "epoch": 5.67, + "learning_rate": 0.00023352625665161442, + "loss": 0.4741, + "step": 134100 + }, + { + "epoch": 5.67, + "learning_rate": 0.00023348385592232188, + "loss": 0.5057, + "step": 134110 + }, + { + "epoch": 5.67, + "learning_rate": 0.00023344145519302932, + "loss": 0.5208, + "step": 134120 + }, + { + "epoch": 5.67, + "learning_rate": 0.0002333990544637368, + "loss": 0.4992, + "step": 134130 + }, + { + "epoch": 5.67, + "learning_rate": 0.00023335665373444426, + "loss": 0.4902, + "step": 134140 + }, + { + "epoch": 5.67, + "learning_rate": 0.0002333142530051517, + "loss": 0.4812, + "step": 134150 + }, + { + "epoch": 5.67, + "learning_rate": 0.00023327185227585914, + "loss": 0.43, + "step": 134160 + }, + { + "epoch": 5.67, + "learning_rate": 0.0002332294515465666, + "loss": 0.3769, + "step": 134170 + }, + { + "epoch": 5.67, + "learning_rate": 0.00023318705081727407, + "loss": 0.4265, + "step": 134180 + }, + { + "epoch": 5.67, + "learning_rate": 0.0002331446500879815, + "loss": 0.4796, + "step": 134190 + }, + { + "epoch": 5.68, + "learning_rate": 0.00023310224935868895, + "loss": 0.4562, + "step": 134200 + }, + { + "epoch": 5.68, + "learning_rate": 0.00023305984862939644, + "loss": 0.4559, + "step": 134210 + }, + { + "epoch": 5.68, + "learning_rate": 0.00023301744790010388, + "loss": 0.4339, + "step": 134220 + }, + { + "epoch": 5.68, + "learning_rate": 0.00023297504717081132, + "loss": 0.4498, + "step": 134230 + }, + { + "epoch": 5.68, + "learning_rate": 0.00023293264644151882, + "loss": 0.3999, + "step": 134240 + }, + { + "epoch": 5.68, + "learning_rate": 0.00023289024571222626, + "loss": 0.4686, + "step": 134250 + }, + { + "epoch": 5.68, + "learning_rate": 0.0002328478449829337, + "loss": 0.4313, + "step": 134260 + }, + { + "epoch": 5.68, + "learning_rate": 0.0002328054442536412, + "loss": 0.4808, + "step": 134270 + }, + { + "epoch": 5.68, + "learning_rate": 0.00023276304352434863, + "loss": 0.4263, + "step": 134280 + }, + { + "epoch": 5.68, + "learning_rate": 0.00023272064279505607, + "loss": 0.4085, + "step": 134290 + }, + { + "epoch": 5.68, + "learning_rate": 0.00023267824206576356, + "loss": 0.4119, + "step": 134300 + }, + { + "epoch": 5.68, + "learning_rate": 0.000232635841336471, + "loss": 0.4545, + "step": 134310 + }, + { + "epoch": 5.68, + "learning_rate": 0.00023259344060717844, + "loss": 0.3983, + "step": 134320 + }, + { + "epoch": 5.68, + "learning_rate": 0.00023255103987788588, + "loss": 0.5322, + "step": 134330 + }, + { + "epoch": 5.68, + "learning_rate": 0.00023250863914859338, + "loss": 0.4843, + "step": 134340 + }, + { + "epoch": 5.68, + "learning_rate": 0.00023246623841930082, + "loss": 0.3851, + "step": 134350 + }, + { + "epoch": 5.68, + "learning_rate": 0.00023242383769000826, + "loss": 0.4214, + "step": 134360 + }, + { + "epoch": 5.68, + "learning_rate": 0.00023238143696071575, + "loss": 0.4989, + "step": 134370 + }, + { + "epoch": 5.68, + "learning_rate": 0.0002323390362314232, + "loss": 0.4967, + "step": 134380 + }, + { + "epoch": 5.68, + "learning_rate": 0.00023229663550213063, + "loss": 0.5017, + "step": 134390 + }, + { + "epoch": 5.68, + "learning_rate": 0.00023225423477283812, + "loss": 0.5039, + "step": 134400 + }, + { + "epoch": 5.68, + "learning_rate": 0.00023221183404354556, + "loss": 0.4413, + "step": 134410 + }, + { + "epoch": 5.68, + "learning_rate": 0.000232169433314253, + "loss": 0.4313, + "step": 134420 + }, + { + "epoch": 5.68, + "learning_rate": 0.0002321270325849605, + "loss": 0.4582, + "step": 134430 + }, + { + "epoch": 5.69, + "learning_rate": 0.00023208463185566794, + "loss": 0.4248, + "step": 134440 + }, + { + "epoch": 5.69, + "learning_rate": 0.00023204223112637537, + "loss": 0.5257, + "step": 134450 + }, + { + "epoch": 5.69, + "learning_rate": 0.00023199983039708287, + "loss": 0.4554, + "step": 134460 + }, + { + "epoch": 5.69, + "learning_rate": 0.0002319574296677903, + "loss": 0.4613, + "step": 134470 + }, + { + "epoch": 5.69, + "learning_rate": 0.00023191502893849775, + "loss": 0.3785, + "step": 134480 + }, + { + "epoch": 5.69, + "learning_rate": 0.0002318726282092052, + "loss": 0.3934, + "step": 134490 + }, + { + "epoch": 5.69, + "learning_rate": 0.00023183022747991268, + "loss": 0.513, + "step": 134500 + }, + { + "epoch": 5.69, + "learning_rate": 0.00023178782675062012, + "loss": 0.5099, + "step": 134510 + }, + { + "epoch": 5.69, + "learning_rate": 0.00023174542602132756, + "loss": 0.4316, + "step": 134520 + }, + { + "epoch": 5.69, + "learning_rate": 0.00023170302529203505, + "loss": 0.437, + "step": 134530 + }, + { + "epoch": 5.69, + "learning_rate": 0.0002316606245627425, + "loss": 0.4267, + "step": 134540 + }, + { + "epoch": 5.69, + "learning_rate": 0.00023161822383344993, + "loss": 0.4991, + "step": 134550 + }, + { + "epoch": 5.69, + "learning_rate": 0.00023157582310415743, + "loss": 0.4881, + "step": 134560 + }, + { + "epoch": 5.69, + "learning_rate": 0.00023153342237486487, + "loss": 0.4727, + "step": 134570 + }, + { + "epoch": 5.69, + "learning_rate": 0.0002314910216455723, + "loss": 0.5469, + "step": 134580 + }, + { + "epoch": 5.69, + "learning_rate": 0.0002314486209162798, + "loss": 0.5538, + "step": 134590 + }, + { + "epoch": 5.69, + "learning_rate": 0.00023140622018698724, + "loss": 0.4631, + "step": 134600 + }, + { + "epoch": 5.69, + "learning_rate": 0.00023136381945769468, + "loss": 0.4615, + "step": 134610 + }, + { + "epoch": 5.69, + "learning_rate": 0.00023132141872840217, + "loss": 0.3812, + "step": 134620 + }, + { + "epoch": 5.69, + "learning_rate": 0.00023127901799910961, + "loss": 0.4773, + "step": 134630 + }, + { + "epoch": 5.69, + "learning_rate": 0.00023123661726981705, + "loss": 0.4497, + "step": 134640 + }, + { + "epoch": 5.69, + "learning_rate": 0.0002311942165405245, + "loss": 0.4854, + "step": 134650 + }, + { + "epoch": 5.69, + "learning_rate": 0.000231151815811232, + "loss": 0.4168, + "step": 134660 + }, + { + "epoch": 5.7, + "learning_rate": 0.00023110941508193943, + "loss": 0.4889, + "step": 134670 + }, + { + "epoch": 5.7, + "learning_rate": 0.00023106701435264687, + "loss": 0.4084, + "step": 134680 + }, + { + "epoch": 5.7, + "learning_rate": 0.00023102461362335433, + "loss": 0.4693, + "step": 134690 + }, + { + "epoch": 5.7, + "learning_rate": 0.0002309822128940618, + "loss": 0.5205, + "step": 134700 + }, + { + "epoch": 5.7, + "learning_rate": 0.00023093981216476924, + "loss": 0.4976, + "step": 134710 + }, + { + "epoch": 5.7, + "learning_rate": 0.0002308974114354767, + "loss": 0.4541, + "step": 134720 + }, + { + "epoch": 5.7, + "learning_rate": 0.00023085501070618417, + "loss": 0.4239, + "step": 134730 + }, + { + "epoch": 5.7, + "learning_rate": 0.00023081260997689161, + "loss": 0.4331, + "step": 134740 + }, + { + "epoch": 5.7, + "learning_rate": 0.00023077020924759908, + "loss": 0.4411, + "step": 134750 + }, + { + "epoch": 5.7, + "learning_rate": 0.00023072780851830652, + "loss": 0.4922, + "step": 134760 + }, + { + "epoch": 5.7, + "learning_rate": 0.000230685407789014, + "loss": 0.5046, + "step": 134770 + }, + { + "epoch": 5.7, + "learning_rate": 0.00023064300705972143, + "loss": 0.4949, + "step": 134780 + }, + { + "epoch": 5.7, + "learning_rate": 0.0002306006063304289, + "loss": 0.5441, + "step": 134790 + }, + { + "epoch": 5.7, + "learning_rate": 0.00023055820560113633, + "loss": 0.4763, + "step": 134800 + }, + { + "epoch": 5.7, + "learning_rate": 0.0002305158048718438, + "loss": 0.4624, + "step": 134810 + }, + { + "epoch": 5.7, + "learning_rate": 0.00023047340414255127, + "loss": 0.5307, + "step": 134820 + }, + { + "epoch": 5.7, + "learning_rate": 0.0002304310034132587, + "loss": 0.4778, + "step": 134830 + }, + { + "epoch": 5.7, + "learning_rate": 0.00023038860268396615, + "loss": 0.6543, + "step": 134840 + }, + { + "epoch": 5.7, + "learning_rate": 0.00023034620195467364, + "loss": 0.4539, + "step": 134850 + }, + { + "epoch": 5.7, + "learning_rate": 0.00023030380122538108, + "loss": 0.4644, + "step": 134860 + }, + { + "epoch": 5.7, + "learning_rate": 0.00023026140049608852, + "loss": 0.4047, + "step": 134870 + }, + { + "epoch": 5.7, + "learning_rate": 0.000230218999766796, + "loss": 0.4068, + "step": 134880 + }, + { + "epoch": 5.7, + "learning_rate": 0.00023017659903750345, + "loss": 0.4481, + "step": 134890 + }, + { + "epoch": 5.7, + "learning_rate": 0.0002301341983082109, + "loss": 0.4719, + "step": 134900 + }, + { + "epoch": 5.71, + "learning_rate": 0.00023009179757891839, + "loss": 0.458, + "step": 134910 + }, + { + "epoch": 5.71, + "learning_rate": 0.00023004939684962583, + "loss": 0.4449, + "step": 134920 + }, + { + "epoch": 5.71, + "learning_rate": 0.00023000699612033327, + "loss": 0.4538, + "step": 134930 + }, + { + "epoch": 5.71, + "learning_rate": 0.0002299645953910407, + "loss": 0.4706, + "step": 134940 + }, + { + "epoch": 5.71, + "learning_rate": 0.0002299221946617482, + "loss": 0.4486, + "step": 134950 + }, + { + "epoch": 5.71, + "learning_rate": 0.00022987979393245564, + "loss": 0.4703, + "step": 134960 + }, + { + "epoch": 5.71, + "learning_rate": 0.00022983739320316308, + "loss": 0.5689, + "step": 134970 + }, + { + "epoch": 5.71, + "learning_rate": 0.00022979499247387057, + "loss": 0.4425, + "step": 134980 + }, + { + "epoch": 5.71, + "learning_rate": 0.000229752591744578, + "loss": 0.439, + "step": 134990 + }, + { + "epoch": 5.71, + "learning_rate": 0.00022971019101528545, + "loss": 0.4384, + "step": 135000 + }, + { + "epoch": 5.71, + "learning_rate": 0.00022966779028599295, + "loss": 0.4576, + "step": 135010 + }, + { + "epoch": 5.71, + "learning_rate": 0.00022962538955670039, + "loss": 0.5138, + "step": 135020 + }, + { + "epoch": 5.71, + "learning_rate": 0.00022958298882740783, + "loss": 0.4155, + "step": 135030 + }, + { + "epoch": 5.71, + "learning_rate": 0.00022954058809811532, + "loss": 0.5247, + "step": 135040 + }, + { + "epoch": 5.71, + "learning_rate": 0.00022949818736882276, + "loss": 0.4185, + "step": 135050 + }, + { + "epoch": 5.71, + "learning_rate": 0.0002294557866395302, + "loss": 0.444, + "step": 135060 + }, + { + "epoch": 5.71, + "learning_rate": 0.0002294133859102377, + "loss": 0.435, + "step": 135070 + }, + { + "epoch": 5.71, + "learning_rate": 0.00022937098518094513, + "loss": 0.436, + "step": 135080 + }, + { + "epoch": 5.71, + "learning_rate": 0.00022932858445165257, + "loss": 0.4329, + "step": 135090 + }, + { + "epoch": 5.71, + "learning_rate": 0.00022928618372236, + "loss": 0.474, + "step": 135100 + }, + { + "epoch": 5.71, + "learning_rate": 0.0002292437829930675, + "loss": 0.475, + "step": 135110 + }, + { + "epoch": 5.71, + "learning_rate": 0.00022920138226377494, + "loss": 0.4254, + "step": 135120 + }, + { + "epoch": 5.71, + "learning_rate": 0.00022915898153448238, + "loss": 0.4233, + "step": 135130 + }, + { + "epoch": 5.71, + "learning_rate": 0.00022911658080518988, + "loss": 0.4445, + "step": 135140 + }, + { + "epoch": 5.72, + "learning_rate": 0.00022907418007589732, + "loss": 0.4277, + "step": 135150 + }, + { + "epoch": 5.72, + "learning_rate": 0.00022903177934660476, + "loss": 0.4113, + "step": 135160 + }, + { + "epoch": 5.72, + "learning_rate": 0.00022898937861731225, + "loss": 0.4712, + "step": 135170 + }, + { + "epoch": 5.72, + "learning_rate": 0.0002289469778880197, + "loss": 0.4392, + "step": 135180 + }, + { + "epoch": 5.72, + "learning_rate": 0.00022890457715872713, + "loss": 0.589, + "step": 135190 + }, + { + "epoch": 5.72, + "learning_rate": 0.00022886217642943463, + "loss": 0.4625, + "step": 135200 + }, + { + "epoch": 5.72, + "learning_rate": 0.00022881977570014206, + "loss": 0.4077, + "step": 135210 + }, + { + "epoch": 5.72, + "learning_rate": 0.0002287773749708495, + "loss": 0.4711, + "step": 135220 + }, + { + "epoch": 5.72, + "learning_rate": 0.000228734974241557, + "loss": 0.5163, + "step": 135230 + }, + { + "epoch": 5.72, + "learning_rate": 0.00022869257351226444, + "loss": 0.4653, + "step": 135240 + }, + { + "epoch": 5.72, + "learning_rate": 0.00022865017278297188, + "loss": 0.4647, + "step": 135250 + }, + { + "epoch": 5.72, + "learning_rate": 0.00022860777205367932, + "loss": 0.4066, + "step": 135260 + }, + { + "epoch": 5.72, + "learning_rate": 0.0002285653713243868, + "loss": 0.4173, + "step": 135270 + }, + { + "epoch": 5.72, + "learning_rate": 0.00022852297059509425, + "loss": 0.403, + "step": 135280 + }, + { + "epoch": 5.72, + "learning_rate": 0.0002284805698658017, + "loss": 0.4775, + "step": 135290 + }, + { + "epoch": 5.72, + "learning_rate": 0.00022843816913650918, + "loss": 0.4165, + "step": 135300 + }, + { + "epoch": 5.72, + "learning_rate": 0.00022839576840721662, + "loss": 0.509, + "step": 135310 + }, + { + "epoch": 5.72, + "learning_rate": 0.00022835336767792406, + "loss": 0.4771, + "step": 135320 + }, + { + "epoch": 5.72, + "learning_rate": 0.00022831096694863153, + "loss": 0.5289, + "step": 135330 + }, + { + "epoch": 5.72, + "learning_rate": 0.000228268566219339, + "loss": 0.4683, + "step": 135340 + }, + { + "epoch": 5.72, + "learning_rate": 0.00022822616549004644, + "loss": 0.4737, + "step": 135350 + }, + { + "epoch": 5.72, + "learning_rate": 0.0002281837647607539, + "loss": 0.4956, + "step": 135360 + }, + { + "epoch": 5.72, + "learning_rate": 0.00022814136403146137, + "loss": 0.4924, + "step": 135370 + }, + { + "epoch": 5.73, + "learning_rate": 0.0002280989633021688, + "loss": 0.4808, + "step": 135380 + }, + { + "epoch": 5.73, + "learning_rate": 0.00022805656257287625, + "loss": 0.5183, + "step": 135390 + }, + { + "epoch": 5.73, + "learning_rate": 0.00022801416184358372, + "loss": 0.5302, + "step": 135400 + }, + { + "epoch": 5.73, + "learning_rate": 0.00022797176111429118, + "loss": 0.4821, + "step": 135410 + }, + { + "epoch": 5.73, + "learning_rate": 0.00022792936038499862, + "loss": 0.4643, + "step": 135420 + }, + { + "epoch": 5.73, + "learning_rate": 0.0002278869596557061, + "loss": 0.4666, + "step": 135430 + }, + { + "epoch": 5.73, + "learning_rate": 0.00022784455892641353, + "loss": 0.4447, + "step": 135440 + }, + { + "epoch": 5.73, + "learning_rate": 0.000227802158197121, + "loss": 0.4139, + "step": 135450 + }, + { + "epoch": 5.73, + "learning_rate": 0.00022775975746782846, + "loss": 0.4212, + "step": 135460 + }, + { + "epoch": 5.73, + "learning_rate": 0.0002277173567385359, + "loss": 0.4663, + "step": 135470 + }, + { + "epoch": 5.73, + "learning_rate": 0.00022767495600924337, + "loss": 0.5104, + "step": 135480 + }, + { + "epoch": 5.73, + "learning_rate": 0.00022763255527995084, + "loss": 0.4212, + "step": 135490 + }, + { + "epoch": 5.73, + "learning_rate": 0.00022759015455065828, + "loss": 0.4767, + "step": 135500 + }, + { + "epoch": 5.73, + "learning_rate": 0.00022754775382136572, + "loss": 0.4249, + "step": 135510 + }, + { + "epoch": 5.73, + "learning_rate": 0.0002275053530920732, + "loss": 0.4835, + "step": 135520 + }, + { + "epoch": 5.73, + "learning_rate": 0.00022746295236278065, + "loss": 0.4983, + "step": 135530 + }, + { + "epoch": 5.73, + "learning_rate": 0.0002274205516334881, + "loss": 0.4985, + "step": 135540 + }, + { + "epoch": 5.73, + "learning_rate": 0.00022737815090419553, + "loss": 0.4076, + "step": 135550 + }, + { + "epoch": 5.73, + "learning_rate": 0.00022733575017490302, + "loss": 0.5724, + "step": 135560 + }, + { + "epoch": 5.73, + "learning_rate": 0.00022729334944561046, + "loss": 0.4971, + "step": 135570 + }, + { + "epoch": 5.73, + "learning_rate": 0.0002272509487163179, + "loss": 0.3866, + "step": 135580 + }, + { + "epoch": 5.73, + "learning_rate": 0.0002272085479870254, + "loss": 0.4149, + "step": 135590 + }, + { + "epoch": 5.73, + "learning_rate": 0.00022716614725773284, + "loss": 0.4221, + "step": 135600 + }, + { + "epoch": 5.73, + "learning_rate": 0.00022712374652844028, + "loss": 0.492, + "step": 135610 + }, + { + "epoch": 5.74, + "learning_rate": 0.00022708134579914777, + "loss": 0.4722, + "step": 135620 + }, + { + "epoch": 5.74, + "learning_rate": 0.0002270389450698552, + "loss": 0.5396, + "step": 135630 + }, + { + "epoch": 5.74, + "learning_rate": 0.00022699654434056265, + "loss": 0.4661, + "step": 135640 + }, + { + "epoch": 5.74, + "learning_rate": 0.00022695414361127014, + "loss": 0.505, + "step": 135650 + }, + { + "epoch": 5.74, + "learning_rate": 0.00022691174288197758, + "loss": 0.4467, + "step": 135660 + }, + { + "epoch": 5.74, + "learning_rate": 0.00022686934215268502, + "loss": 0.4268, + "step": 135670 + }, + { + "epoch": 5.74, + "learning_rate": 0.00022682694142339252, + "loss": 0.4382, + "step": 135680 + }, + { + "epoch": 5.74, + "learning_rate": 0.00022678454069409996, + "loss": 0.4664, + "step": 135690 + }, + { + "epoch": 5.74, + "learning_rate": 0.0002267421399648074, + "loss": 0.5045, + "step": 135700 + }, + { + "epoch": 5.74, + "learning_rate": 0.00022669973923551484, + "loss": 0.4181, + "step": 135710 + }, + { + "epoch": 5.74, + "learning_rate": 0.00022665733850622233, + "loss": 0.4074, + "step": 135720 + }, + { + "epoch": 5.74, + "learning_rate": 0.00022661493777692977, + "loss": 0.4894, + "step": 135730 + }, + { + "epoch": 5.74, + "learning_rate": 0.0002265725370476372, + "loss": 0.397, + "step": 135740 + }, + { + "epoch": 5.74, + "learning_rate": 0.0002265301363183447, + "loss": 0.4717, + "step": 135750 + }, + { + "epoch": 5.74, + "learning_rate": 0.00022648773558905214, + "loss": 0.506, + "step": 135760 + }, + { + "epoch": 5.74, + "learning_rate": 0.00022644533485975958, + "loss": 0.4887, + "step": 135770 + }, + { + "epoch": 5.74, + "learning_rate": 0.00022640293413046708, + "loss": 0.4002, + "step": 135780 + }, + { + "epoch": 5.74, + "learning_rate": 0.00022636053340117452, + "loss": 0.4637, + "step": 135790 + }, + { + "epoch": 5.74, + "learning_rate": 0.00022631813267188195, + "loss": 0.4777, + "step": 135800 + }, + { + "epoch": 5.74, + "learning_rate": 0.00022627573194258945, + "loss": 0.4355, + "step": 135810 + }, + { + "epoch": 5.74, + "learning_rate": 0.0002262333312132969, + "loss": 0.4474, + "step": 135820 + }, + { + "epoch": 5.74, + "learning_rate": 0.00022619093048400433, + "loss": 0.4377, + "step": 135830 + }, + { + "epoch": 5.74, + "learning_rate": 0.00022614852975471182, + "loss": 0.4261, + "step": 135840 + }, + { + "epoch": 5.74, + "learning_rate": 0.00022610612902541926, + "loss": 0.5369, + "step": 135850 + }, + { + "epoch": 5.75, + "learning_rate": 0.0002260637282961267, + "loss": 0.3658, + "step": 135860 + }, + { + "epoch": 5.75, + "learning_rate": 0.00022602132756683414, + "loss": 0.4833, + "step": 135870 + }, + { + "epoch": 5.75, + "learning_rate": 0.00022597892683754163, + "loss": 0.43, + "step": 135880 + }, + { + "epoch": 5.75, + "learning_rate": 0.00022593652610824907, + "loss": 0.4585, + "step": 135890 + }, + { + "epoch": 5.75, + "learning_rate": 0.00022589412537895651, + "loss": 0.4613, + "step": 135900 + }, + { + "epoch": 5.75, + "learning_rate": 0.000225851724649664, + "loss": 0.4877, + "step": 135910 + }, + { + "epoch": 5.75, + "learning_rate": 0.00022580932392037145, + "loss": 0.3974, + "step": 135920 + }, + { + "epoch": 5.75, + "learning_rate": 0.0002257669231910789, + "loss": 0.4574, + "step": 135930 + }, + { + "epoch": 5.75, + "learning_rate": 0.00022572452246178638, + "loss": 0.463, + "step": 135940 + }, + { + "epoch": 5.75, + "learning_rate": 0.00022568212173249382, + "loss": 0.4485, + "step": 135950 + }, + { + "epoch": 5.75, + "learning_rate": 0.00022563972100320126, + "loss": 0.4282, + "step": 135960 + }, + { + "epoch": 5.75, + "learning_rate": 0.00022559732027390873, + "loss": 0.4636, + "step": 135970 + }, + { + "epoch": 5.75, + "learning_rate": 0.0002255549195446162, + "loss": 0.413, + "step": 135980 + }, + { + "epoch": 5.75, + "learning_rate": 0.00022551251881532363, + "loss": 0.4829, + "step": 135990 + }, + { + "epoch": 5.75, + "learning_rate": 0.0002254701180860311, + "loss": 0.4773, + "step": 136000 + }, + { + "epoch": 5.75, + "learning_rate": 0.00022542771735673857, + "loss": 0.4684, + "step": 136010 + }, + { + "epoch": 5.75, + "learning_rate": 0.000225385316627446, + "loss": 0.4196, + "step": 136020 + }, + { + "epoch": 5.75, + "learning_rate": 0.00022534291589815345, + "loss": 0.4823, + "step": 136030 + }, + { + "epoch": 5.75, + "learning_rate": 0.00022530051516886091, + "loss": 0.4753, + "step": 136040 + }, + { + "epoch": 5.75, + "learning_rate": 0.00022525811443956838, + "loss": 0.4455, + "step": 136050 + }, + { + "epoch": 5.75, + "learning_rate": 0.00022521571371027582, + "loss": 0.4214, + "step": 136060 + }, + { + "epoch": 5.75, + "learning_rate": 0.0002251733129809833, + "loss": 0.5117, + "step": 136070 + }, + { + "epoch": 5.75, + "learning_rate": 0.00022513091225169073, + "loss": 0.4816, + "step": 136080 + }, + { + "epoch": 5.76, + "learning_rate": 0.0002250885115223982, + "loss": 0.5513, + "step": 136090 + }, + { + "epoch": 5.76, + "learning_rate": 0.00022504611079310566, + "loss": 0.523, + "step": 136100 + }, + { + "epoch": 5.76, + "learning_rate": 0.0002250037100638131, + "loss": 0.3967, + "step": 136110 + }, + { + "epoch": 5.76, + "learning_rate": 0.00022496130933452057, + "loss": 0.4359, + "step": 136120 + }, + { + "epoch": 5.76, + "learning_rate": 0.00022491890860522803, + "loss": 0.4531, + "step": 136130 + }, + { + "epoch": 5.76, + "learning_rate": 0.00022487650787593547, + "loss": 0.4137, + "step": 136140 + }, + { + "epoch": 5.76, + "learning_rate": 0.0002248341071466429, + "loss": 0.4645, + "step": 136150 + }, + { + "epoch": 5.76, + "learning_rate": 0.00022479170641735038, + "loss": 0.4884, + "step": 136160 + }, + { + "epoch": 5.76, + "learning_rate": 0.00022474930568805785, + "loss": 0.4337, + "step": 136170 + }, + { + "epoch": 5.76, + "learning_rate": 0.00022470690495876529, + "loss": 0.3971, + "step": 136180 + }, + { + "epoch": 5.76, + "learning_rate": 0.00022466450422947273, + "loss": 0.5368, + "step": 136190 + }, + { + "epoch": 5.76, + "learning_rate": 0.00022462210350018022, + "loss": 0.5003, + "step": 136200 + }, + { + "epoch": 5.76, + "learning_rate": 0.00022457970277088766, + "loss": 0.503, + "step": 136210 + }, + { + "epoch": 5.76, + "learning_rate": 0.0002245373020415951, + "loss": 0.4171, + "step": 136220 + }, + { + "epoch": 5.76, + "learning_rate": 0.0002244949013123026, + "loss": 0.4444, + "step": 136230 + }, + { + "epoch": 5.76, + "learning_rate": 0.00022445250058301003, + "loss": 0.4972, + "step": 136240 + }, + { + "epoch": 5.76, + "learning_rate": 0.00022441009985371747, + "loss": 0.4084, + "step": 136250 + }, + { + "epoch": 5.76, + "learning_rate": 0.00022436769912442497, + "loss": 0.4414, + "step": 136260 + }, + { + "epoch": 5.76, + "learning_rate": 0.0002243252983951324, + "loss": 0.4563, + "step": 136270 + }, + { + "epoch": 5.76, + "learning_rate": 0.00022428289766583985, + "loss": 0.4281, + "step": 136280 + }, + { + "epoch": 5.76, + "learning_rate": 0.00022424049693654734, + "loss": 0.4561, + "step": 136290 + }, + { + "epoch": 5.76, + "learning_rate": 0.00022419809620725478, + "loss": 0.4146, + "step": 136300 + }, + { + "epoch": 5.76, + "learning_rate": 0.00022415569547796222, + "loss": 0.4908, + "step": 136310 + }, + { + "epoch": 5.76, + "learning_rate": 0.00022411329474866966, + "loss": 0.4676, + "step": 136320 + }, + { + "epoch": 5.77, + "learning_rate": 0.00022407089401937715, + "loss": 0.4885, + "step": 136330 + }, + { + "epoch": 5.77, + "learning_rate": 0.0002240284932900846, + "loss": 0.5372, + "step": 136340 + }, + { + "epoch": 5.77, + "learning_rate": 0.00022398609256079203, + "loss": 0.4775, + "step": 136350 + }, + { + "epoch": 5.77, + "learning_rate": 0.00022394369183149953, + "loss": 0.3942, + "step": 136360 + }, + { + "epoch": 5.77, + "learning_rate": 0.00022390129110220697, + "loss": 0.4442, + "step": 136370 + }, + { + "epoch": 5.77, + "learning_rate": 0.0002238588903729144, + "loss": 0.4898, + "step": 136380 + }, + { + "epoch": 5.77, + "learning_rate": 0.0002238164896436219, + "loss": 0.4483, + "step": 136390 + }, + { + "epoch": 5.77, + "learning_rate": 0.00022377408891432934, + "loss": 0.4744, + "step": 136400 + }, + { + "epoch": 5.77, + "learning_rate": 0.00022373168818503678, + "loss": 0.4319, + "step": 136410 + }, + { + "epoch": 5.77, + "learning_rate": 0.00022368928745574427, + "loss": 0.4583, + "step": 136420 + }, + { + "epoch": 5.77, + "learning_rate": 0.0002236468867264517, + "loss": 0.5123, + "step": 136430 + }, + { + "epoch": 5.77, + "learning_rate": 0.00022360448599715915, + "loss": 0.4642, + "step": 136440 + }, + { + "epoch": 5.77, + "learning_rate": 0.00022356208526786665, + "loss": 0.4512, + "step": 136450 + }, + { + "epoch": 5.77, + "learning_rate": 0.00022351968453857409, + "loss": 0.4798, + "step": 136460 + }, + { + "epoch": 5.77, + "learning_rate": 0.00022347728380928152, + "loss": 0.4894, + "step": 136470 + }, + { + "epoch": 5.77, + "learning_rate": 0.00022343488307998896, + "loss": 0.3926, + "step": 136480 + }, + { + "epoch": 5.77, + "learning_rate": 0.00022339248235069646, + "loss": 0.4291, + "step": 136490 + }, + { + "epoch": 5.77, + "learning_rate": 0.0002233500816214039, + "loss": 0.4697, + "step": 136500 + }, + { + "epoch": 5.77, + "learning_rate": 0.00022330768089211134, + "loss": 0.5094, + "step": 136510 + }, + { + "epoch": 5.77, + "learning_rate": 0.00022326528016281883, + "loss": 0.3909, + "step": 136520 + }, + { + "epoch": 5.77, + "learning_rate": 0.00022322287943352627, + "loss": 0.4375, + "step": 136530 + }, + { + "epoch": 5.77, + "learning_rate": 0.0002231804787042337, + "loss": 0.4491, + "step": 136540 + }, + { + "epoch": 5.77, + "learning_rate": 0.0002231380779749412, + "loss": 0.4028, + "step": 136550 + }, + { + "epoch": 5.77, + "learning_rate": 0.00022309567724564864, + "loss": 0.378, + "step": 136560 + }, + { + "epoch": 5.78, + "learning_rate": 0.00022305327651635608, + "loss": 0.4358, + "step": 136570 + }, + { + "epoch": 5.78, + "learning_rate": 0.00022301087578706358, + "loss": 0.4733, + "step": 136580 + }, + { + "epoch": 5.78, + "learning_rate": 0.00022296847505777102, + "loss": 0.4274, + "step": 136590 + }, + { + "epoch": 5.78, + "learning_rate": 0.00022292607432847846, + "loss": 0.4752, + "step": 136600 + }, + { + "epoch": 5.78, + "learning_rate": 0.00022288367359918595, + "loss": 0.3996, + "step": 136610 + }, + { + "epoch": 5.78, + "learning_rate": 0.0002228412728698934, + "loss": 0.4567, + "step": 136620 + }, + { + "epoch": 5.78, + "learning_rate": 0.00022279887214060083, + "loss": 0.3825, + "step": 136630 + }, + { + "epoch": 5.78, + "learning_rate": 0.00022275647141130827, + "loss": 0.4471, + "step": 136640 + }, + { + "epoch": 5.78, + "learning_rate": 0.00022271407068201576, + "loss": 0.4492, + "step": 136650 + }, + { + "epoch": 5.78, + "learning_rate": 0.0002226716699527232, + "loss": 0.5035, + "step": 136660 + }, + { + "epoch": 5.78, + "learning_rate": 0.00022262926922343064, + "loss": 0.4498, + "step": 136670 + }, + { + "epoch": 5.78, + "learning_rate": 0.0002225868684941381, + "loss": 0.5016, + "step": 136680 + }, + { + "epoch": 5.78, + "learning_rate": 0.00022254446776484558, + "loss": 0.4204, + "step": 136690 + }, + { + "epoch": 5.78, + "learning_rate": 0.00022250206703555302, + "loss": 0.4726, + "step": 136700 + }, + { + "epoch": 5.78, + "learning_rate": 0.00022245966630626048, + "loss": 0.4334, + "step": 136710 + }, + { + "epoch": 5.78, + "learning_rate": 0.00022241726557696792, + "loss": 0.5198, + "step": 136720 + }, + { + "epoch": 5.78, + "learning_rate": 0.0002223748648476754, + "loss": 0.4089, + "step": 136730 + }, + { + "epoch": 5.78, + "learning_rate": 0.00022233246411838286, + "loss": 0.4129, + "step": 136740 + }, + { + "epoch": 5.78, + "learning_rate": 0.0002222900633890903, + "loss": 0.4392, + "step": 136750 + }, + { + "epoch": 5.78, + "learning_rate": 0.00022224766265979776, + "loss": 0.4083, + "step": 136760 + }, + { + "epoch": 5.78, + "learning_rate": 0.00022220526193050523, + "loss": 0.5089, + "step": 136770 + }, + { + "epoch": 5.78, + "learning_rate": 0.00022216286120121267, + "loss": 0.4183, + "step": 136780 + }, + { + "epoch": 5.78, + "learning_rate": 0.0002221204604719201, + "loss": 0.4547, + "step": 136790 + }, + { + "epoch": 5.79, + "learning_rate": 0.00022207805974262758, + "loss": 0.449, + "step": 136800 + }, + { + "epoch": 5.79, + "learning_rate": 0.00022203565901333504, + "loss": 0.4407, + "step": 136810 + }, + { + "epoch": 5.79, + "learning_rate": 0.00022199325828404248, + "loss": 0.5582, + "step": 136820 + }, + { + "epoch": 5.79, + "learning_rate": 0.00022195085755474992, + "loss": 0.4349, + "step": 136830 + }, + { + "epoch": 5.79, + "learning_rate": 0.00022190845682545742, + "loss": 0.4728, + "step": 136840 + }, + { + "epoch": 5.79, + "learning_rate": 0.00022186605609616486, + "loss": 0.561, + "step": 136850 + }, + { + "epoch": 5.79, + "learning_rate": 0.0002218236553668723, + "loss": 0.5399, + "step": 136860 + }, + { + "epoch": 5.79, + "learning_rate": 0.0002217812546375798, + "loss": 0.3749, + "step": 136870 + }, + { + "epoch": 5.79, + "learning_rate": 0.00022173885390828723, + "loss": 0.4991, + "step": 136880 + }, + { + "epoch": 5.79, + "learning_rate": 0.00022169645317899467, + "loss": 0.4673, + "step": 136890 + }, + { + "epoch": 5.79, + "learning_rate": 0.00022165405244970216, + "loss": 0.4365, + "step": 136900 + }, + { + "epoch": 5.79, + "learning_rate": 0.0002216116517204096, + "loss": 0.4103, + "step": 136910 + }, + { + "epoch": 5.79, + "learning_rate": 0.00022156925099111704, + "loss": 0.4266, + "step": 136920 + }, + { + "epoch": 5.79, + "learning_rate": 0.00022152685026182448, + "loss": 0.4594, + "step": 136930 + }, + { + "epoch": 5.79, + "learning_rate": 0.00022148444953253198, + "loss": 0.5715, + "step": 136940 + }, + { + "epoch": 5.79, + "learning_rate": 0.00022144204880323942, + "loss": 0.4494, + "step": 136950 + }, + { + "epoch": 5.79, + "learning_rate": 0.00022139964807394686, + "loss": 0.5257, + "step": 136960 + }, + { + "epoch": 5.79, + "learning_rate": 0.00022135724734465435, + "loss": 0.3912, + "step": 136970 + }, + { + "epoch": 5.79, + "learning_rate": 0.0002213148466153618, + "loss": 0.4761, + "step": 136980 + }, + { + "epoch": 5.79, + "learning_rate": 0.00022127244588606923, + "loss": 0.4266, + "step": 136990 + }, + { + "epoch": 5.79, + "learning_rate": 0.00022123004515677672, + "loss": 0.5021, + "step": 137000 + }, + { + "epoch": 5.79, + "learning_rate": 0.00022118764442748416, + "loss": 0.3853, + "step": 137010 + }, + { + "epoch": 5.79, + "learning_rate": 0.0002211452436981916, + "loss": 0.4511, + "step": 137020 + }, + { + "epoch": 5.79, + "learning_rate": 0.0002211028429688991, + "loss": 0.5454, + "step": 137030 + }, + { + "epoch": 5.8, + "learning_rate": 0.00022106044223960654, + "loss": 0.4441, + "step": 137040 + }, + { + "epoch": 5.8, + "learning_rate": 0.00022101804151031398, + "loss": 0.5351, + "step": 137050 + }, + { + "epoch": 5.8, + "learning_rate": 0.00022097564078102147, + "loss": 0.5318, + "step": 137060 + }, + { + "epoch": 5.8, + "learning_rate": 0.0002209332400517289, + "loss": 0.4721, + "step": 137070 + }, + { + "epoch": 5.8, + "learning_rate": 0.00022089083932243635, + "loss": 0.5089, + "step": 137080 + }, + { + "epoch": 5.8, + "learning_rate": 0.0002208484385931438, + "loss": 0.4651, + "step": 137090 + }, + { + "epoch": 5.8, + "learning_rate": 0.00022080603786385128, + "loss": 0.5013, + "step": 137100 + }, + { + "epoch": 5.8, + "learning_rate": 0.00022076363713455872, + "loss": 0.4365, + "step": 137110 + }, + { + "epoch": 5.8, + "learning_rate": 0.00022072123640526616, + "loss": 0.4402, + "step": 137120 + }, + { + "epoch": 5.8, + "learning_rate": 0.00022067883567597366, + "loss": 0.4227, + "step": 137130 + }, + { + "epoch": 5.8, + "learning_rate": 0.0002206364349466811, + "loss": 0.4916, + "step": 137140 + }, + { + "epoch": 5.8, + "learning_rate": 0.00022059403421738853, + "loss": 0.461, + "step": 137150 + }, + { + "epoch": 5.8, + "learning_rate": 0.00022055163348809603, + "loss": 0.4272, + "step": 137160 + }, + { + "epoch": 5.8, + "learning_rate": 0.00022050923275880347, + "loss": 0.4432, + "step": 137170 + }, + { + "epoch": 5.8, + "learning_rate": 0.0002204668320295109, + "loss": 0.4281, + "step": 137180 + }, + { + "epoch": 5.8, + "learning_rate": 0.0002204244313002184, + "loss": 0.4541, + "step": 137190 + }, + { + "epoch": 5.8, + "learning_rate": 0.00022038203057092584, + "loss": 0.4509, + "step": 137200 + }, + { + "epoch": 5.8, + "learning_rate": 0.00022033962984163328, + "loss": 0.429, + "step": 137210 + }, + { + "epoch": 5.8, + "learning_rate": 0.00022029722911234078, + "loss": 0.444, + "step": 137220 + }, + { + "epoch": 5.8, + "learning_rate": 0.00022025482838304821, + "loss": 0.4205, + "step": 137230 + }, + { + "epoch": 5.8, + "learning_rate": 0.00022021242765375565, + "loss": 0.5531, + "step": 137240 + }, + { + "epoch": 5.8, + "learning_rate": 0.0002201700269244631, + "loss": 0.3977, + "step": 137250 + }, + { + "epoch": 5.8, + "learning_rate": 0.0002201276261951706, + "loss": 0.4263, + "step": 137260 + }, + { + "epoch": 5.8, + "learning_rate": 0.00022008522546587803, + "loss": 0.4455, + "step": 137270 + }, + { + "epoch": 5.81, + "learning_rate": 0.00022004282473658547, + "loss": 0.4875, + "step": 137280 + }, + { + "epoch": 5.81, + "learning_rate": 0.00022000042400729296, + "loss": 0.4483, + "step": 137290 + }, + { + "epoch": 5.81, + "learning_rate": 0.0002199580232780004, + "loss": 0.3466, + "step": 137300 + }, + { + "epoch": 5.81, + "learning_rate": 0.00021991562254870784, + "loss": 0.4759, + "step": 137310 + }, + { + "epoch": 5.81, + "learning_rate": 0.0002198732218194153, + "loss": 0.4656, + "step": 137320 + }, + { + "epoch": 5.81, + "learning_rate": 0.00021983082109012277, + "loss": 0.5198, + "step": 137330 + }, + { + "epoch": 5.81, + "learning_rate": 0.00021978842036083021, + "loss": 0.3796, + "step": 137340 + }, + { + "epoch": 5.81, + "learning_rate": 0.00021974601963153768, + "loss": 0.4897, + "step": 137350 + }, + { + "epoch": 5.81, + "learning_rate": 0.00021970361890224515, + "loss": 0.472, + "step": 137360 + }, + { + "epoch": 5.81, + "learning_rate": 0.0002196612181729526, + "loss": 0.4385, + "step": 137370 + }, + { + "epoch": 5.81, + "learning_rate": 0.00021961881744366005, + "loss": 0.4271, + "step": 137380 + }, + { + "epoch": 5.81, + "learning_rate": 0.0002195764167143675, + "loss": 0.3951, + "step": 137390 + }, + { + "epoch": 5.81, + "learning_rate": 0.00021953401598507496, + "loss": 0.4445, + "step": 137400 + }, + { + "epoch": 5.81, + "learning_rate": 0.0002194916152557824, + "loss": 0.4876, + "step": 137410 + }, + { + "epoch": 5.81, + "learning_rate": 0.00021944921452648987, + "loss": 0.4143, + "step": 137420 + }, + { + "epoch": 5.81, + "learning_rate": 0.0002194068137971973, + "loss": 0.4934, + "step": 137430 + }, + { + "epoch": 5.81, + "learning_rate": 0.00021936441306790477, + "loss": 0.368, + "step": 137440 + }, + { + "epoch": 5.81, + "learning_rate": 0.00021932201233861224, + "loss": 0.4454, + "step": 137450 + }, + { + "epoch": 5.81, + "learning_rate": 0.00021927961160931968, + "loss": 0.4123, + "step": 137460 + }, + { + "epoch": 5.81, + "learning_rate": 0.00021923721088002712, + "loss": 0.5073, + "step": 137470 + }, + { + "epoch": 5.81, + "learning_rate": 0.0002191948101507346, + "loss": 0.4567, + "step": 137480 + }, + { + "epoch": 5.81, + "learning_rate": 0.00021915240942144205, + "loss": 0.4996, + "step": 137490 + }, + { + "epoch": 5.81, + "learning_rate": 0.0002191100086921495, + "loss": 0.5326, + "step": 137500 + }, + { + "epoch": 5.82, + "learning_rate": 0.000219067607962857, + "loss": 0.48, + "step": 137510 + }, + { + "epoch": 5.82, + "learning_rate": 0.00021902520723356443, + "loss": 0.4651, + "step": 137520 + }, + { + "epoch": 5.82, + "learning_rate": 0.00021898280650427187, + "loss": 0.4009, + "step": 137530 + }, + { + "epoch": 5.82, + "learning_rate": 0.0002189404057749793, + "loss": 0.4262, + "step": 137540 + }, + { + "epoch": 5.82, + "learning_rate": 0.0002188980050456868, + "loss": 0.4265, + "step": 137550 + }, + { + "epoch": 5.82, + "learning_rate": 0.00021885560431639424, + "loss": 0.4692, + "step": 137560 + }, + { + "epoch": 5.82, + "learning_rate": 0.00021881320358710168, + "loss": 0.4746, + "step": 137570 + }, + { + "epoch": 5.82, + "learning_rate": 0.00021877080285780917, + "loss": 0.4032, + "step": 137580 + }, + { + "epoch": 5.82, + "learning_rate": 0.0002187284021285166, + "loss": 0.4417, + "step": 137590 + }, + { + "epoch": 5.82, + "learning_rate": 0.00021868600139922405, + "loss": 0.4932, + "step": 137600 + }, + { + "epoch": 5.82, + "learning_rate": 0.00021864360066993155, + "loss": 0.4553, + "step": 137610 + }, + { + "epoch": 5.82, + "learning_rate": 0.00021860119994063899, + "loss": 0.5052, + "step": 137620 + }, + { + "epoch": 5.82, + "learning_rate": 0.00021855879921134643, + "loss": 0.4811, + "step": 137630 + }, + { + "epoch": 5.82, + "learning_rate": 0.00021851639848205392, + "loss": 0.4964, + "step": 137640 + }, + { + "epoch": 5.82, + "learning_rate": 0.00021847399775276136, + "loss": 0.4497, + "step": 137650 + }, + { + "epoch": 5.82, + "learning_rate": 0.0002184315970234688, + "loss": 0.4663, + "step": 137660 + }, + { + "epoch": 5.82, + "learning_rate": 0.0002183891962941763, + "loss": 0.4391, + "step": 137670 + }, + { + "epoch": 5.82, + "learning_rate": 0.00021834679556488373, + "loss": 0.4489, + "step": 137680 + }, + { + "epoch": 5.82, + "learning_rate": 0.00021830439483559117, + "loss": 0.4504, + "step": 137690 + }, + { + "epoch": 5.82, + "learning_rate": 0.0002182619941062986, + "loss": 0.4701, + "step": 137700 + }, + { + "epoch": 5.82, + "learning_rate": 0.0002182195933770061, + "loss": 0.4326, + "step": 137710 + }, + { + "epoch": 5.82, + "learning_rate": 0.00021817719264771355, + "loss": 0.4555, + "step": 137720 + }, + { + "epoch": 5.82, + "learning_rate": 0.00021813479191842099, + "loss": 0.3884, + "step": 137730 + }, + { + "epoch": 5.82, + "learning_rate": 0.00021809239118912848, + "loss": 0.5089, + "step": 137740 + }, + { + "epoch": 5.83, + "learning_rate": 0.00021804999045983592, + "loss": 0.4026, + "step": 137750 + }, + { + "epoch": 5.83, + "learning_rate": 0.00021800758973054336, + "loss": 0.48, + "step": 137760 + }, + { + "epoch": 5.83, + "learning_rate": 0.00021796518900125085, + "loss": 0.485, + "step": 137770 + }, + { + "epoch": 5.83, + "learning_rate": 0.0002179227882719583, + "loss": 0.4663, + "step": 137780 + }, + { + "epoch": 5.83, + "learning_rate": 0.00021788038754266573, + "loss": 0.4366, + "step": 137790 + }, + { + "epoch": 5.83, + "learning_rate": 0.00021783798681337323, + "loss": 0.425, + "step": 137800 + }, + { + "epoch": 5.83, + "learning_rate": 0.00021779558608408067, + "loss": 0.4206, + "step": 137810 + }, + { + "epoch": 5.83, + "learning_rate": 0.0002177531853547881, + "loss": 0.4518, + "step": 137820 + }, + { + "epoch": 5.83, + "learning_rate": 0.0002177107846254956, + "loss": 0.4394, + "step": 137830 + }, + { + "epoch": 5.83, + "learning_rate": 0.00021766838389620304, + "loss": 0.5056, + "step": 137840 + }, + { + "epoch": 5.83, + "learning_rate": 0.00021762598316691048, + "loss": 0.4918, + "step": 137850 + }, + { + "epoch": 5.83, + "learning_rate": 0.00021758358243761792, + "loss": 0.4682, + "step": 137860 + }, + { + "epoch": 5.83, + "learning_rate": 0.0002175411817083254, + "loss": 0.3859, + "step": 137870 + }, + { + "epoch": 5.83, + "learning_rate": 0.00021749878097903285, + "loss": 0.4939, + "step": 137880 + }, + { + "epoch": 5.83, + "learning_rate": 0.0002174563802497403, + "loss": 0.4555, + "step": 137890 + }, + { + "epoch": 5.83, + "learning_rate": 0.00021741397952044778, + "loss": 0.4779, + "step": 137900 + }, + { + "epoch": 5.83, + "learning_rate": 0.00021737157879115522, + "loss": 0.4317, + "step": 137910 + }, + { + "epoch": 5.83, + "learning_rate": 0.00021732917806186266, + "loss": 0.5262, + "step": 137920 + }, + { + "epoch": 5.83, + "learning_rate": 0.00021728677733257016, + "loss": 0.4191, + "step": 137930 + }, + { + "epoch": 5.83, + "learning_rate": 0.0002172443766032776, + "loss": 0.3928, + "step": 137940 + }, + { + "epoch": 5.83, + "learning_rate": 0.00021720197587398504, + "loss": 0.5087, + "step": 137950 + }, + { + "epoch": 5.83, + "learning_rate": 0.0002171595751446925, + "loss": 0.4621, + "step": 137960 + }, + { + "epoch": 5.83, + "learning_rate": 0.00021711717441539997, + "loss": 0.4279, + "step": 137970 + }, + { + "epoch": 5.83, + "learning_rate": 0.0002170747736861074, + "loss": 0.4447, + "step": 137980 + }, + { + "epoch": 5.84, + "learning_rate": 0.00021703237295681488, + "loss": 0.4773, + "step": 137990 + }, + { + "epoch": 5.84, + "learning_rate": 0.00021698997222752234, + "loss": 0.4155, + "step": 138000 + }, + { + "epoch": 5.84, + "learning_rate": 0.00021694757149822978, + "loss": 0.4336, + "step": 138010 + }, + { + "epoch": 5.84, + "learning_rate": 0.00021690517076893722, + "loss": 0.4095, + "step": 138020 + }, + { + "epoch": 5.84, + "learning_rate": 0.0002168627700396447, + "loss": 0.4773, + "step": 138030 + }, + { + "epoch": 5.84, + "learning_rate": 0.00021682036931035216, + "loss": 0.5362, + "step": 138040 + }, + { + "epoch": 5.84, + "learning_rate": 0.0002167779685810596, + "loss": 0.4438, + "step": 138050 + }, + { + "epoch": 5.84, + "learning_rate": 0.00021673556785176706, + "loss": 0.3999, + "step": 138060 + }, + { + "epoch": 5.84, + "learning_rate": 0.0002166931671224745, + "loss": 0.399, + "step": 138070 + }, + { + "epoch": 5.84, + "learning_rate": 0.00021665076639318197, + "loss": 0.4866, + "step": 138080 + }, + { + "epoch": 5.84, + "learning_rate": 0.00021660836566388944, + "loss": 0.3908, + "step": 138090 + }, + { + "epoch": 5.84, + "learning_rate": 0.00021656596493459688, + "loss": 0.4284, + "step": 138100 + }, + { + "epoch": 5.84, + "learning_rate": 0.00021652356420530434, + "loss": 0.4429, + "step": 138110 + }, + { + "epoch": 5.84, + "learning_rate": 0.0002164811634760118, + "loss": 0.4407, + "step": 138120 + }, + { + "epoch": 5.84, + "learning_rate": 0.00021643876274671925, + "loss": 0.449, + "step": 138130 + }, + { + "epoch": 5.84, + "learning_rate": 0.0002163963620174267, + "loss": 0.4316, + "step": 138140 + }, + { + "epoch": 5.84, + "learning_rate": 0.00021635396128813418, + "loss": 0.5016, + "step": 138150 + }, + { + "epoch": 5.84, + "learning_rate": 0.00021631156055884162, + "loss": 0.4551, + "step": 138160 + }, + { + "epoch": 5.84, + "learning_rate": 0.00021626915982954906, + "loss": 0.503, + "step": 138170 + }, + { + "epoch": 5.84, + "learning_rate": 0.0002162267591002565, + "loss": 0.3877, + "step": 138180 + }, + { + "epoch": 5.84, + "learning_rate": 0.000216184358370964, + "loss": 0.5389, + "step": 138190 + }, + { + "epoch": 5.84, + "learning_rate": 0.00021614195764167144, + "loss": 0.4412, + "step": 138200 + }, + { + "epoch": 5.84, + "learning_rate": 0.00021609955691237888, + "loss": 0.4659, + "step": 138210 + }, + { + "epoch": 5.85, + "learning_rate": 0.00021605715618308637, + "loss": 0.4343, + "step": 138220 + }, + { + "epoch": 5.85, + "learning_rate": 0.0002160147554537938, + "loss": 0.3774, + "step": 138230 + }, + { + "epoch": 5.85, + "learning_rate": 0.00021597235472450125, + "loss": 0.418, + "step": 138240 + }, + { + "epoch": 5.85, + "learning_rate": 0.00021592995399520874, + "loss": 0.4399, + "step": 138250 + }, + { + "epoch": 5.85, + "learning_rate": 0.00021588755326591618, + "loss": 0.5241, + "step": 138260 + }, + { + "epoch": 5.85, + "learning_rate": 0.00021584515253662362, + "loss": 0.471, + "step": 138270 + }, + { + "epoch": 5.85, + "learning_rate": 0.00021580275180733112, + "loss": 0.4928, + "step": 138280 + }, + { + "epoch": 5.85, + "learning_rate": 0.00021576035107803856, + "loss": 0.3881, + "step": 138290 + }, + { + "epoch": 5.85, + "learning_rate": 0.000215717950348746, + "loss": 0.4516, + "step": 138300 + }, + { + "epoch": 5.85, + "learning_rate": 0.00021567554961945344, + "loss": 0.4564, + "step": 138310 + }, + { + "epoch": 5.85, + "learning_rate": 0.00021563314889016093, + "loss": 0.4703, + "step": 138320 + }, + { + "epoch": 5.85, + "learning_rate": 0.00021559074816086837, + "loss": 0.4971, + "step": 138330 + }, + { + "epoch": 5.85, + "learning_rate": 0.0002155483474315758, + "loss": 0.4078, + "step": 138340 + }, + { + "epoch": 5.85, + "learning_rate": 0.0002155059467022833, + "loss": 0.4606, + "step": 138350 + }, + { + "epoch": 5.85, + "learning_rate": 0.00021546354597299074, + "loss": 0.5039, + "step": 138360 + }, + { + "epoch": 5.85, + "learning_rate": 0.00021542114524369818, + "loss": 0.4673, + "step": 138370 + }, + { + "epoch": 5.85, + "learning_rate": 0.00021537874451440568, + "loss": 0.4584, + "step": 138380 + }, + { + "epoch": 5.85, + "learning_rate": 0.00021533634378511312, + "loss": 0.3842, + "step": 138390 + }, + { + "epoch": 5.85, + "learning_rate": 0.00021529394305582056, + "loss": 0.468, + "step": 138400 + }, + { + "epoch": 5.85, + "learning_rate": 0.00021525154232652805, + "loss": 0.4422, + "step": 138410 + }, + { + "epoch": 5.85, + "learning_rate": 0.0002152091415972355, + "loss": 0.4624, + "step": 138420 + }, + { + "epoch": 5.85, + "learning_rate": 0.00021516674086794293, + "loss": 0.4506, + "step": 138430 + }, + { + "epoch": 5.85, + "learning_rate": 0.00021512434013865042, + "loss": 0.3714, + "step": 138440 + }, + { + "epoch": 5.85, + "learning_rate": 0.00021508193940935786, + "loss": 0.4725, + "step": 138450 + }, + { + "epoch": 5.86, + "learning_rate": 0.0002150395386800653, + "loss": 0.5363, + "step": 138460 + }, + { + "epoch": 5.86, + "learning_rate": 0.00021499713795077274, + "loss": 0.4032, + "step": 138470 + }, + { + "epoch": 5.86, + "learning_rate": 0.00021495473722148024, + "loss": 0.4893, + "step": 138480 + }, + { + "epoch": 5.86, + "learning_rate": 0.00021491233649218767, + "loss": 0.4209, + "step": 138490 + }, + { + "epoch": 5.86, + "learning_rate": 0.00021486993576289511, + "loss": 0.4426, + "step": 138500 + }, + { + "epoch": 5.86, + "learning_rate": 0.0002148275350336026, + "loss": 0.4634, + "step": 138510 + }, + { + "epoch": 5.86, + "learning_rate": 0.00021478513430431005, + "loss": 0.4702, + "step": 138520 + }, + { + "epoch": 5.86, + "learning_rate": 0.0002147427335750175, + "loss": 0.4486, + "step": 138530 + }, + { + "epoch": 5.86, + "learning_rate": 0.00021470033284572498, + "loss": 0.4841, + "step": 138540 + }, + { + "epoch": 5.86, + "learning_rate": 0.00021465793211643242, + "loss": 0.4767, + "step": 138550 + }, + { + "epoch": 5.86, + "learning_rate": 0.00021461553138713986, + "loss": 0.5451, + "step": 138560 + }, + { + "epoch": 5.86, + "learning_rate": 0.00021457313065784736, + "loss": 0.4826, + "step": 138570 + }, + { + "epoch": 5.86, + "learning_rate": 0.0002145307299285548, + "loss": 0.3677, + "step": 138580 + }, + { + "epoch": 5.86, + "learning_rate": 0.00021448832919926223, + "loss": 0.4388, + "step": 138590 + }, + { + "epoch": 5.86, + "learning_rate": 0.0002144459284699697, + "loss": 0.4761, + "step": 138600 + }, + { + "epoch": 5.86, + "learning_rate": 0.00021440352774067717, + "loss": 0.4595, + "step": 138610 + }, + { + "epoch": 5.86, + "learning_rate": 0.0002143611270113846, + "loss": 0.4504, + "step": 138620 + }, + { + "epoch": 5.86, + "learning_rate": 0.00021431872628209205, + "loss": 0.4318, + "step": 138630 + }, + { + "epoch": 5.86, + "learning_rate": 0.00021427632555279954, + "loss": 0.4357, + "step": 138640 + }, + { + "epoch": 5.86, + "learning_rate": 0.00021423392482350698, + "loss": 0.4682, + "step": 138650 + }, + { + "epoch": 5.86, + "learning_rate": 0.00021419152409421442, + "loss": 0.4502, + "step": 138660 + }, + { + "epoch": 5.86, + "learning_rate": 0.0002141491233649219, + "loss": 0.4606, + "step": 138670 + }, + { + "epoch": 5.86, + "learning_rate": 0.00021410672263562935, + "loss": 0.5044, + "step": 138680 + }, + { + "epoch": 5.87, + "learning_rate": 0.0002140643219063368, + "loss": 0.4462, + "step": 138690 + }, + { + "epoch": 5.87, + "learning_rate": 0.00021402192117704426, + "loss": 0.4724, + "step": 138700 + }, + { + "epoch": 5.87, + "learning_rate": 0.0002139795204477517, + "loss": 0.484, + "step": 138710 + }, + { + "epoch": 5.87, + "learning_rate": 0.00021393711971845917, + "loss": 0.3903, + "step": 138720 + }, + { + "epoch": 5.87, + "learning_rate": 0.00021389471898916663, + "loss": 0.4734, + "step": 138730 + }, + { + "epoch": 5.87, + "learning_rate": 0.00021385231825987407, + "loss": 0.5154, + "step": 138740 + }, + { + "epoch": 5.87, + "learning_rate": 0.00021380991753058154, + "loss": 0.506, + "step": 138750 + }, + { + "epoch": 5.87, + "learning_rate": 0.000213767516801289, + "loss": 0.4108, + "step": 138760 + }, + { + "epoch": 5.87, + "learning_rate": 0.00021372511607199645, + "loss": 0.4009, + "step": 138770 + }, + { + "epoch": 5.87, + "learning_rate": 0.0002136827153427039, + "loss": 0.4162, + "step": 138780 + }, + { + "epoch": 5.87, + "learning_rate": 0.00021364031461341135, + "loss": 0.4591, + "step": 138790 + }, + { + "epoch": 5.87, + "learning_rate": 0.00021359791388411882, + "loss": 0.443, + "step": 138800 + }, + { + "epoch": 5.87, + "learning_rate": 0.00021355551315482626, + "loss": 0.4277, + "step": 138810 + }, + { + "epoch": 5.87, + "learning_rate": 0.0002135131124255337, + "loss": 0.6059, + "step": 138820 + }, + { + "epoch": 5.87, + "learning_rate": 0.0002134707116962412, + "loss": 0.436, + "step": 138830 + }, + { + "epoch": 5.87, + "learning_rate": 0.00021342831096694863, + "loss": 0.5304, + "step": 138840 + }, + { + "epoch": 5.87, + "learning_rate": 0.00021338591023765607, + "loss": 0.5081, + "step": 138850 + }, + { + "epoch": 5.87, + "learning_rate": 0.00021334350950836357, + "loss": 0.4352, + "step": 138860 + }, + { + "epoch": 5.87, + "learning_rate": 0.000213301108779071, + "loss": 0.4444, + "step": 138870 + }, + { + "epoch": 5.87, + "learning_rate": 0.00021325870804977845, + "loss": 0.4739, + "step": 138880 + }, + { + "epoch": 5.87, + "learning_rate": 0.00021321630732048594, + "loss": 0.5099, + "step": 138890 + }, + { + "epoch": 5.87, + "learning_rate": 0.00021317390659119338, + "loss": 0.4564, + "step": 138900 + }, + { + "epoch": 5.87, + "learning_rate": 0.00021313150586190082, + "loss": 0.3596, + "step": 138910 + }, + { + "epoch": 5.87, + "learning_rate": 0.0002130891051326083, + "loss": 0.4201, + "step": 138920 + }, + { + "epoch": 5.88, + "learning_rate": 0.00021304670440331575, + "loss": 0.458, + "step": 138930 + }, + { + "epoch": 5.88, + "learning_rate": 0.0002130043036740232, + "loss": 0.429, + "step": 138940 + }, + { + "epoch": 5.88, + "learning_rate": 0.00021296190294473063, + "loss": 0.4224, + "step": 138950 + }, + { + "epoch": 5.88, + "learning_rate": 0.00021291950221543813, + "loss": 0.4913, + "step": 138960 + }, + { + "epoch": 5.88, + "learning_rate": 0.00021287710148614557, + "loss": 0.4379, + "step": 138970 + }, + { + "epoch": 5.88, + "learning_rate": 0.000212834700756853, + "loss": 0.3837, + "step": 138980 + }, + { + "epoch": 5.88, + "learning_rate": 0.0002127923000275605, + "loss": 0.4204, + "step": 138990 + }, + { + "epoch": 5.88, + "learning_rate": 0.00021274989929826794, + "loss": 0.4901, + "step": 139000 + }, + { + "epoch": 5.88, + "learning_rate": 0.00021270749856897538, + "loss": 0.4159, + "step": 139010 + }, + { + "epoch": 5.88, + "learning_rate": 0.00021266509783968287, + "loss": 0.4275, + "step": 139020 + }, + { + "epoch": 5.88, + "learning_rate": 0.0002126226971103903, + "loss": 0.4891, + "step": 139030 + }, + { + "epoch": 5.88, + "learning_rate": 0.00021258029638109775, + "loss": 0.4351, + "step": 139040 + }, + { + "epoch": 5.88, + "learning_rate": 0.00021253789565180525, + "loss": 0.4155, + "step": 139050 + }, + { + "epoch": 5.88, + "learning_rate": 0.00021249549492251269, + "loss": 0.4034, + "step": 139060 + }, + { + "epoch": 5.88, + "learning_rate": 0.00021245309419322013, + "loss": 0.5147, + "step": 139070 + }, + { + "epoch": 5.88, + "learning_rate": 0.00021241069346392757, + "loss": 0.4261, + "step": 139080 + }, + { + "epoch": 5.88, + "learning_rate": 0.00021236829273463506, + "loss": 0.4618, + "step": 139090 + }, + { + "epoch": 5.88, + "learning_rate": 0.0002123258920053425, + "loss": 0.4811, + "step": 139100 + }, + { + "epoch": 5.88, + "learning_rate": 0.00021228349127604994, + "loss": 0.4579, + "step": 139110 + }, + { + "epoch": 5.88, + "learning_rate": 0.00021224109054675743, + "loss": 0.4624, + "step": 139120 + }, + { + "epoch": 5.88, + "learning_rate": 0.00021219868981746487, + "loss": 0.4563, + "step": 139130 + }, + { + "epoch": 5.88, + "learning_rate": 0.0002121562890881723, + "loss": 0.4369, + "step": 139140 + }, + { + "epoch": 5.88, + "learning_rate": 0.0002121138883588798, + "loss": 0.3875, + "step": 139150 + }, + { + "epoch": 5.88, + "learning_rate": 0.00021207148762958725, + "loss": 0.4522, + "step": 139160 + }, + { + "epoch": 5.89, + "learning_rate": 0.00021202908690029468, + "loss": 0.4307, + "step": 139170 + }, + { + "epoch": 5.89, + "learning_rate": 0.00021198668617100218, + "loss": 0.5078, + "step": 139180 + }, + { + "epoch": 5.89, + "learning_rate": 0.00021194428544170962, + "loss": 0.4428, + "step": 139190 + }, + { + "epoch": 5.89, + "learning_rate": 0.00021190188471241706, + "loss": 0.4614, + "step": 139200 + }, + { + "epoch": 5.89, + "learning_rate": 0.00021185948398312455, + "loss": 0.433, + "step": 139210 + }, + { + "epoch": 5.89, + "learning_rate": 0.000211817083253832, + "loss": 0.4316, + "step": 139220 + }, + { + "epoch": 5.89, + "learning_rate": 0.00021177468252453943, + "loss": 0.4697, + "step": 139230 + }, + { + "epoch": 5.89, + "learning_rate": 0.00021173228179524687, + "loss": 0.5082, + "step": 139240 + }, + { + "epoch": 5.89, + "learning_rate": 0.00021168988106595436, + "loss": 0.4202, + "step": 139250 + }, + { + "epoch": 5.89, + "learning_rate": 0.0002116474803366618, + "loss": 0.3984, + "step": 139260 + }, + { + "epoch": 5.89, + "learning_rate": 0.00021160507960736924, + "loss": 0.4396, + "step": 139270 + }, + { + "epoch": 5.89, + "learning_rate": 0.00021156267887807674, + "loss": 0.386, + "step": 139280 + }, + { + "epoch": 5.89, + "learning_rate": 0.00021152027814878418, + "loss": 0.4641, + "step": 139290 + }, + { + "epoch": 5.89, + "learning_rate": 0.00021147787741949162, + "loss": 0.393, + "step": 139300 + }, + { + "epoch": 5.89, + "learning_rate": 0.00021143547669019908, + "loss": 0.4663, + "step": 139310 + }, + { + "epoch": 5.89, + "learning_rate": 0.00021139307596090655, + "loss": 0.4419, + "step": 139320 + }, + { + "epoch": 5.89, + "learning_rate": 0.000211350675231614, + "loss": 0.4864, + "step": 139330 + }, + { + "epoch": 5.89, + "learning_rate": 0.00021130827450232146, + "loss": 0.4792, + "step": 139340 + }, + { + "epoch": 5.89, + "learning_rate": 0.0002112658737730289, + "loss": 0.4481, + "step": 139350 + }, + { + "epoch": 5.89, + "learning_rate": 0.00021122347304373636, + "loss": 0.3896, + "step": 139360 + }, + { + "epoch": 5.89, + "learning_rate": 0.00021118107231444383, + "loss": 0.539, + "step": 139370 + }, + { + "epoch": 5.89, + "learning_rate": 0.00021113867158515127, + "loss": 0.408, + "step": 139380 + }, + { + "epoch": 5.89, + "learning_rate": 0.00021109627085585874, + "loss": 0.4089, + "step": 139390 + }, + { + "epoch": 5.9, + "learning_rate": 0.00021105387012656618, + "loss": 0.4153, + "step": 139400 + }, + { + "epoch": 5.9, + "learning_rate": 0.00021101146939727364, + "loss": 0.4764, + "step": 139410 + }, + { + "epoch": 5.9, + "learning_rate": 0.00021096906866798108, + "loss": 0.5316, + "step": 139420 + }, + { + "epoch": 5.9, + "learning_rate": 0.00021092666793868855, + "loss": 0.4748, + "step": 139430 + }, + { + "epoch": 5.9, + "learning_rate": 0.00021088426720939602, + "loss": 0.431, + "step": 139440 + }, + { + "epoch": 5.9, + "learning_rate": 0.00021084186648010346, + "loss": 0.488, + "step": 139450 + }, + { + "epoch": 5.9, + "learning_rate": 0.0002107994657508109, + "loss": 0.518, + "step": 139460 + }, + { + "epoch": 5.9, + "learning_rate": 0.0002107570650215184, + "loss": 0.4565, + "step": 139470 + }, + { + "epoch": 5.9, + "learning_rate": 0.00021071466429222583, + "loss": 0.411, + "step": 139480 + }, + { + "epoch": 5.9, + "learning_rate": 0.00021067226356293327, + "loss": 0.439, + "step": 139490 + }, + { + "epoch": 5.9, + "learning_rate": 0.00021062986283364076, + "loss": 0.4509, + "step": 139500 + }, + { + "epoch": 5.9, + "learning_rate": 0.0002105874621043482, + "loss": 0.4913, + "step": 139510 + }, + { + "epoch": 5.9, + "learning_rate": 0.00021054506137505564, + "loss": 0.4745, + "step": 139520 + }, + { + "epoch": 5.9, + "learning_rate": 0.00021050266064576314, + "loss": 0.424, + "step": 139530 + }, + { + "epoch": 5.9, + "learning_rate": 0.00021046025991647058, + "loss": 0.4331, + "step": 139540 + }, + { + "epoch": 5.9, + "learning_rate": 0.00021041785918717802, + "loss": 0.4473, + "step": 139550 + }, + { + "epoch": 5.9, + "learning_rate": 0.00021037545845788546, + "loss": 0.4625, + "step": 139560 + }, + { + "epoch": 5.9, + "learning_rate": 0.00021033305772859295, + "loss": 0.5133, + "step": 139570 + }, + { + "epoch": 5.9, + "learning_rate": 0.0002102906569993004, + "loss": 0.4238, + "step": 139580 + }, + { + "epoch": 5.9, + "learning_rate": 0.00021024825627000783, + "loss": 0.4161, + "step": 139590 + }, + { + "epoch": 5.9, + "learning_rate": 0.00021020585554071532, + "loss": 0.5078, + "step": 139600 + }, + { + "epoch": 5.9, + "learning_rate": 0.00021016345481142276, + "loss": 0.496, + "step": 139610 + }, + { + "epoch": 5.9, + "learning_rate": 0.0002101210540821302, + "loss": 0.5024, + "step": 139620 + }, + { + "epoch": 5.9, + "learning_rate": 0.0002100786533528377, + "loss": 0.4132, + "step": 139630 + }, + { + "epoch": 5.91, + "learning_rate": 0.00021003625262354514, + "loss": 0.5155, + "step": 139640 + }, + { + "epoch": 5.91, + "learning_rate": 0.00020999385189425258, + "loss": 0.482, + "step": 139650 + }, + { + "epoch": 5.91, + "learning_rate": 0.00020995145116496007, + "loss": 0.4365, + "step": 139660 + }, + { + "epoch": 5.91, + "learning_rate": 0.0002099090504356675, + "loss": 0.5303, + "step": 139670 + }, + { + "epoch": 5.91, + "learning_rate": 0.00020986664970637495, + "loss": 0.4323, + "step": 139680 + }, + { + "epoch": 5.91, + "learning_rate": 0.0002098242489770824, + "loss": 0.5299, + "step": 139690 + }, + { + "epoch": 5.91, + "learning_rate": 0.00020978184824778988, + "loss": 0.4978, + "step": 139700 + }, + { + "epoch": 5.91, + "learning_rate": 0.00020973944751849732, + "loss": 0.4797, + "step": 139710 + }, + { + "epoch": 5.91, + "learning_rate": 0.00020969704678920476, + "loss": 0.4894, + "step": 139720 + }, + { + "epoch": 5.91, + "learning_rate": 0.00020965464605991226, + "loss": 0.4908, + "step": 139730 + }, + { + "epoch": 5.91, + "learning_rate": 0.0002096122453306197, + "loss": 0.5069, + "step": 139740 + }, + { + "epoch": 5.91, + "learning_rate": 0.00020956984460132714, + "loss": 0.445, + "step": 139750 + }, + { + "epoch": 5.91, + "learning_rate": 0.00020952744387203463, + "loss": 0.4705, + "step": 139760 + }, + { + "epoch": 5.91, + "learning_rate": 0.00020948504314274207, + "loss": 0.5959, + "step": 139770 + }, + { + "epoch": 5.91, + "learning_rate": 0.0002094426424134495, + "loss": 0.4569, + "step": 139780 + }, + { + "epoch": 5.91, + "learning_rate": 0.000209400241684157, + "loss": 0.4059, + "step": 139790 + }, + { + "epoch": 5.91, + "learning_rate": 0.00020935784095486444, + "loss": 0.4391, + "step": 139800 + }, + { + "epoch": 5.91, + "learning_rate": 0.00020931544022557188, + "loss": 0.4742, + "step": 139810 + }, + { + "epoch": 5.91, + "learning_rate": 0.00020927303949627938, + "loss": 0.4306, + "step": 139820 + }, + { + "epoch": 5.91, + "learning_rate": 0.00020923063876698682, + "loss": 0.47, + "step": 139830 + }, + { + "epoch": 5.91, + "learning_rate": 0.00020918823803769425, + "loss": 0.4165, + "step": 139840 + }, + { + "epoch": 5.91, + "learning_rate": 0.0002091458373084017, + "loss": 0.4242, + "step": 139850 + }, + { + "epoch": 5.91, + "learning_rate": 0.0002091034365791092, + "loss": 0.462, + "step": 139860 + }, + { + "epoch": 5.91, + "learning_rate": 0.00020906103584981663, + "loss": 0.4482, + "step": 139870 + }, + { + "epoch": 5.92, + "learning_rate": 0.00020901863512052407, + "loss": 0.4302, + "step": 139880 + }, + { + "epoch": 5.92, + "learning_rate": 0.00020897623439123156, + "loss": 0.3772, + "step": 139890 + }, + { + "epoch": 5.92, + "learning_rate": 0.000208933833661939, + "loss": 0.527, + "step": 139900 + }, + { + "epoch": 5.92, + "learning_rate": 0.00020889143293264644, + "loss": 0.3955, + "step": 139910 + }, + { + "epoch": 5.92, + "learning_rate": 0.00020884903220335394, + "loss": 0.4213, + "step": 139920 + }, + { + "epoch": 5.92, + "learning_rate": 0.00020880663147406137, + "loss": 0.4523, + "step": 139930 + }, + { + "epoch": 5.92, + "learning_rate": 0.00020876423074476881, + "loss": 0.3461, + "step": 139940 + }, + { + "epoch": 5.92, + "learning_rate": 0.00020872183001547628, + "loss": 0.4427, + "step": 139950 + }, + { + "epoch": 5.92, + "learning_rate": 0.00020867942928618375, + "loss": 0.4866, + "step": 139960 + }, + { + "epoch": 5.92, + "learning_rate": 0.0002086370285568912, + "loss": 0.5519, + "step": 139970 + }, + { + "epoch": 5.92, + "learning_rate": 0.00020859462782759865, + "loss": 0.4811, + "step": 139980 + }, + { + "epoch": 5.92, + "learning_rate": 0.00020855222709830612, + "loss": 0.4446, + "step": 139990 + }, + { + "epoch": 5.92, + "learning_rate": 0.00020850982636901356, + "loss": 0.5399, + "step": 140000 + }, + { + "epoch": 5.92, + "eval_loss": 0.6056107878684998, + "eval_runtime": 337.6283, + "eval_samples_per_second": 15.564, + "eval_steps_per_second": 3.892, + "step": 140000 + }, + { + "epoch": 5.92, + "learning_rate": 0.000208467425639721, + "loss": 0.4435, + "step": 140010 + }, + { + "epoch": 5.92, + "learning_rate": 0.00020842502491042847, + "loss": 0.3995, + "step": 140020 + }, + { + "epoch": 5.92, + "learning_rate": 0.00020838262418113593, + "loss": 0.4419, + "step": 140030 + }, + { + "epoch": 5.92, + "learning_rate": 0.00020834022345184337, + "loss": 0.3923, + "step": 140040 + }, + { + "epoch": 5.92, + "learning_rate": 0.00020829782272255084, + "loss": 0.4462, + "step": 140050 + }, + { + "epoch": 5.92, + "learning_rate": 0.00020825542199325828, + "loss": 0.5182, + "step": 140060 + }, + { + "epoch": 5.92, + "learning_rate": 0.00020821302126396575, + "loss": 0.4705, + "step": 140070 + }, + { + "epoch": 5.92, + "learning_rate": 0.00020817062053467321, + "loss": 0.4274, + "step": 140080 + }, + { + "epoch": 5.92, + "learning_rate": 0.00020812821980538065, + "loss": 0.4781, + "step": 140090 + }, + { + "epoch": 5.92, + "learning_rate": 0.0002080858190760881, + "loss": 0.4013, + "step": 140100 + }, + { + "epoch": 5.93, + "learning_rate": 0.0002080434183467956, + "loss": 0.4162, + "step": 140110 + }, + { + "epoch": 5.93, + "learning_rate": 0.00020800101761750303, + "loss": 0.4701, + "step": 140120 + }, + { + "epoch": 5.93, + "learning_rate": 0.00020795861688821047, + "loss": 0.4789, + "step": 140130 + }, + { + "epoch": 5.93, + "learning_rate": 0.00020791621615891796, + "loss": 0.4661, + "step": 140140 + }, + { + "epoch": 5.93, + "learning_rate": 0.0002078738154296254, + "loss": 0.4929, + "step": 140150 + }, + { + "epoch": 5.93, + "learning_rate": 0.00020783141470033284, + "loss": 0.4461, + "step": 140160 + }, + { + "epoch": 5.93, + "learning_rate": 0.00020778901397104028, + "loss": 0.3702, + "step": 140170 + }, + { + "epoch": 5.93, + "learning_rate": 0.00020774661324174777, + "loss": 0.4704, + "step": 140180 + }, + { + "epoch": 5.93, + "learning_rate": 0.0002077042125124552, + "loss": 0.4845, + "step": 140190 + }, + { + "epoch": 5.93, + "learning_rate": 0.00020766181178316265, + "loss": 0.5168, + "step": 140200 + }, + { + "epoch": 5.93, + "learning_rate": 0.00020761941105387015, + "loss": 0.4443, + "step": 140210 + }, + { + "epoch": 5.93, + "learning_rate": 0.00020757701032457759, + "loss": 0.4931, + "step": 140220 + }, + { + "epoch": 5.93, + "learning_rate": 0.00020753460959528503, + "loss": 0.4857, + "step": 140230 + }, + { + "epoch": 5.93, + "learning_rate": 0.00020749220886599252, + "loss": 0.455, + "step": 140240 + }, + { + "epoch": 5.93, + "learning_rate": 0.00020744980813669996, + "loss": 0.4857, + "step": 140250 + }, + { + "epoch": 5.93, + "learning_rate": 0.0002074074074074074, + "loss": 0.4848, + "step": 140260 + }, + { + "epoch": 5.93, + "learning_rate": 0.0002073650066781149, + "loss": 0.4913, + "step": 140270 + }, + { + "epoch": 5.93, + "learning_rate": 0.00020732260594882233, + "loss": 0.4615, + "step": 140280 + }, + { + "epoch": 5.93, + "learning_rate": 0.00020728020521952977, + "loss": 0.3982, + "step": 140290 + }, + { + "epoch": 5.93, + "learning_rate": 0.00020723780449023727, + "loss": 0.3955, + "step": 140300 + }, + { + "epoch": 5.93, + "learning_rate": 0.0002071954037609447, + "loss": 0.4264, + "step": 140310 + }, + { + "epoch": 5.93, + "learning_rate": 0.00020715300303165215, + "loss": 0.4605, + "step": 140320 + }, + { + "epoch": 5.93, + "learning_rate": 0.00020711060230235959, + "loss": 0.4678, + "step": 140330 + }, + { + "epoch": 5.93, + "learning_rate": 0.00020706820157306708, + "loss": 0.4913, + "step": 140340 + }, + { + "epoch": 5.94, + "learning_rate": 0.00020702580084377452, + "loss": 0.4678, + "step": 140350 + }, + { + "epoch": 5.94, + "learning_rate": 0.00020698340011448196, + "loss": 0.4012, + "step": 140360 + }, + { + "epoch": 5.94, + "learning_rate": 0.00020694099938518945, + "loss": 0.5381, + "step": 140370 + }, + { + "epoch": 5.94, + "learning_rate": 0.0002068985986558969, + "loss": 0.4243, + "step": 140380 + }, + { + "epoch": 5.94, + "learning_rate": 0.00020685619792660433, + "loss": 0.4447, + "step": 140390 + }, + { + "epoch": 5.94, + "learning_rate": 0.00020681379719731183, + "loss": 0.4233, + "step": 140400 + }, + { + "epoch": 5.94, + "learning_rate": 0.00020677139646801927, + "loss": 0.4134, + "step": 140410 + }, + { + "epoch": 5.94, + "learning_rate": 0.0002067289957387267, + "loss": 0.4155, + "step": 140420 + }, + { + "epoch": 5.94, + "learning_rate": 0.0002066865950094342, + "loss": 0.5145, + "step": 140430 + }, + { + "epoch": 5.94, + "learning_rate": 0.00020664419428014164, + "loss": 0.4915, + "step": 140440 + }, + { + "epoch": 5.94, + "learning_rate": 0.00020660179355084908, + "loss": 0.411, + "step": 140450 + }, + { + "epoch": 5.94, + "learning_rate": 0.00020655939282155652, + "loss": 0.4694, + "step": 140460 + }, + { + "epoch": 5.94, + "learning_rate": 0.000206516992092264, + "loss": 0.4513, + "step": 140470 + }, + { + "epoch": 5.94, + "learning_rate": 0.00020647459136297145, + "loss": 0.5017, + "step": 140480 + }, + { + "epoch": 5.94, + "learning_rate": 0.0002064321906336789, + "loss": 0.5045, + "step": 140490 + }, + { + "epoch": 5.94, + "learning_rate": 0.00020638978990438639, + "loss": 0.4968, + "step": 140500 + }, + { + "epoch": 5.94, + "learning_rate": 0.00020634738917509383, + "loss": 0.4893, + "step": 140510 + }, + { + "epoch": 5.94, + "learning_rate": 0.00020630498844580126, + "loss": 0.4275, + "step": 140520 + }, + { + "epoch": 5.94, + "learning_rate": 0.00020626258771650876, + "loss": 0.4521, + "step": 140530 + }, + { + "epoch": 5.94, + "learning_rate": 0.0002062201869872162, + "loss": 0.4474, + "step": 140540 + }, + { + "epoch": 5.94, + "learning_rate": 0.00020617778625792364, + "loss": 0.4842, + "step": 140550 + }, + { + "epoch": 5.94, + "learning_rate": 0.00020613538552863113, + "loss": 0.4281, + "step": 140560 + }, + { + "epoch": 5.94, + "learning_rate": 0.00020609298479933857, + "loss": 0.4758, + "step": 140570 + }, + { + "epoch": 5.94, + "learning_rate": 0.000206050584070046, + "loss": 0.4596, + "step": 140580 + }, + { + "epoch": 5.95, + "learning_rate": 0.00020600818334075348, + "loss": 0.4603, + "step": 140590 + }, + { + "epoch": 5.95, + "learning_rate": 0.00020596578261146094, + "loss": 0.4749, + "step": 140600 + }, + { + "epoch": 5.95, + "learning_rate": 0.00020592338188216838, + "loss": 0.4512, + "step": 140610 + }, + { + "epoch": 5.95, + "learning_rate": 0.00020588098115287582, + "loss": 0.5089, + "step": 140620 + }, + { + "epoch": 5.95, + "learning_rate": 0.00020583858042358332, + "loss": 0.4436, + "step": 140630 + }, + { + "epoch": 5.95, + "learning_rate": 0.00020579617969429076, + "loss": 0.4434, + "step": 140640 + }, + { + "epoch": 5.95, + "learning_rate": 0.0002057537789649982, + "loss": 0.5214, + "step": 140650 + }, + { + "epoch": 5.95, + "learning_rate": 0.00020571137823570566, + "loss": 0.4603, + "step": 140660 + }, + { + "epoch": 5.95, + "learning_rate": 0.00020566897750641313, + "loss": 0.3975, + "step": 140670 + }, + { + "epoch": 5.95, + "learning_rate": 0.00020562657677712057, + "loss": 0.4745, + "step": 140680 + }, + { + "epoch": 5.95, + "learning_rate": 0.00020558417604782804, + "loss": 0.4703, + "step": 140690 + }, + { + "epoch": 5.95, + "learning_rate": 0.00020554177531853548, + "loss": 0.5482, + "step": 140700 + }, + { + "epoch": 5.95, + "learning_rate": 0.00020549937458924294, + "loss": 0.5312, + "step": 140710 + }, + { + "epoch": 5.95, + "learning_rate": 0.0002054569738599504, + "loss": 0.5067, + "step": 140720 + }, + { + "epoch": 5.95, + "learning_rate": 0.00020541457313065785, + "loss": 0.5307, + "step": 140730 + }, + { + "epoch": 5.95, + "learning_rate": 0.0002053721724013653, + "loss": 0.499, + "step": 140740 + }, + { + "epoch": 5.95, + "learning_rate": 0.00020532977167207278, + "loss": 0.4767, + "step": 140750 + }, + { + "epoch": 5.95, + "learning_rate": 0.00020528737094278022, + "loss": 0.4559, + "step": 140760 + }, + { + "epoch": 5.95, + "learning_rate": 0.00020524497021348766, + "loss": 0.5038, + "step": 140770 + }, + { + "epoch": 5.95, + "learning_rate": 0.00020520256948419513, + "loss": 0.444, + "step": 140780 + }, + { + "epoch": 5.95, + "learning_rate": 0.0002051601687549026, + "loss": 0.4647, + "step": 140790 + }, + { + "epoch": 5.95, + "learning_rate": 0.00020511776802561004, + "loss": 0.4331, + "step": 140800 + }, + { + "epoch": 5.95, + "learning_rate": 0.00020507536729631748, + "loss": 0.542, + "step": 140810 + }, + { + "epoch": 5.96, + "learning_rate": 0.00020503296656702497, + "loss": 0.4823, + "step": 140820 + }, + { + "epoch": 5.96, + "learning_rate": 0.0002049905658377324, + "loss": 0.5285, + "step": 140830 + }, + { + "epoch": 5.96, + "learning_rate": 0.00020494816510843985, + "loss": 0.471, + "step": 140840 + }, + { + "epoch": 5.96, + "learning_rate": 0.00020490576437914734, + "loss": 0.4496, + "step": 140850 + }, + { + "epoch": 5.96, + "learning_rate": 0.00020486336364985478, + "loss": 0.4394, + "step": 140860 + }, + { + "epoch": 5.96, + "learning_rate": 0.00020482096292056222, + "loss": 0.5092, + "step": 140870 + }, + { + "epoch": 5.96, + "learning_rate": 0.00020477856219126972, + "loss": 0.4786, + "step": 140880 + }, + { + "epoch": 5.96, + "learning_rate": 0.00020473616146197716, + "loss": 0.4327, + "step": 140890 + }, + { + "epoch": 5.96, + "learning_rate": 0.0002046937607326846, + "loss": 0.4458, + "step": 140900 + }, + { + "epoch": 5.96, + "learning_rate": 0.0002046513600033921, + "loss": 0.4785, + "step": 140910 + }, + { + "epoch": 5.96, + "learning_rate": 0.00020460895927409953, + "loss": 0.4165, + "step": 140920 + }, + { + "epoch": 5.96, + "learning_rate": 0.00020456655854480697, + "loss": 0.4503, + "step": 140930 + }, + { + "epoch": 5.96, + "learning_rate": 0.0002045241578155144, + "loss": 0.4904, + "step": 140940 + }, + { + "epoch": 5.96, + "learning_rate": 0.0002044817570862219, + "loss": 0.4138, + "step": 140950 + }, + { + "epoch": 5.96, + "learning_rate": 0.00020443935635692934, + "loss": 0.4491, + "step": 140960 + }, + { + "epoch": 5.96, + "learning_rate": 0.00020439695562763678, + "loss": 0.401, + "step": 140970 + }, + { + "epoch": 5.96, + "learning_rate": 0.00020435455489834428, + "loss": 0.435, + "step": 140980 + }, + { + "epoch": 5.96, + "learning_rate": 0.00020431215416905172, + "loss": 0.4205, + "step": 140990 + }, + { + "epoch": 5.96, + "learning_rate": 0.00020426975343975916, + "loss": 0.4694, + "step": 141000 + }, + { + "epoch": 5.96, + "learning_rate": 0.00020422735271046665, + "loss": 0.4325, + "step": 141010 + }, + { + "epoch": 5.96, + "learning_rate": 0.0002041849519811741, + "loss": 0.4604, + "step": 141020 + }, + { + "epoch": 5.96, + "learning_rate": 0.00020414255125188153, + "loss": 0.4837, + "step": 141030 + }, + { + "epoch": 5.96, + "learning_rate": 0.00020410015052258902, + "loss": 0.4719, + "step": 141040 + }, + { + "epoch": 5.96, + "learning_rate": 0.00020405774979329646, + "loss": 0.4859, + "step": 141050 + }, + { + "epoch": 5.97, + "learning_rate": 0.0002040153490640039, + "loss": 0.4369, + "step": 141060 + }, + { + "epoch": 5.97, + "learning_rate": 0.0002039729483347114, + "loss": 0.4064, + "step": 141070 + }, + { + "epoch": 5.97, + "learning_rate": 0.00020393054760541884, + "loss": 0.5766, + "step": 141080 + }, + { + "epoch": 5.97, + "learning_rate": 0.00020388814687612628, + "loss": 0.431, + "step": 141090 + }, + { + "epoch": 5.97, + "learning_rate": 0.00020384574614683372, + "loss": 0.5034, + "step": 141100 + }, + { + "epoch": 5.97, + "learning_rate": 0.0002038033454175412, + "loss": 0.4028, + "step": 141110 + }, + { + "epoch": 5.97, + "learning_rate": 0.00020376094468824865, + "loss": 0.4808, + "step": 141120 + }, + { + "epoch": 5.97, + "learning_rate": 0.0002037185439589561, + "loss": 0.4421, + "step": 141130 + }, + { + "epoch": 5.97, + "learning_rate": 0.00020367614322966358, + "loss": 0.4982, + "step": 141140 + }, + { + "epoch": 5.97, + "learning_rate": 0.00020363374250037102, + "loss": 0.4477, + "step": 141150 + }, + { + "epoch": 5.97, + "learning_rate": 0.00020359134177107846, + "loss": 0.4531, + "step": 141160 + }, + { + "epoch": 5.97, + "learning_rate": 0.00020354894104178596, + "loss": 0.4731, + "step": 141170 + }, + { + "epoch": 5.97, + "learning_rate": 0.0002035065403124934, + "loss": 0.4431, + "step": 141180 + }, + { + "epoch": 5.97, + "learning_rate": 0.00020346413958320083, + "loss": 0.4777, + "step": 141190 + }, + { + "epoch": 5.97, + "learning_rate": 0.00020342173885390833, + "loss": 0.4413, + "step": 141200 + }, + { + "epoch": 5.97, + "learning_rate": 0.00020337933812461577, + "loss": 0.4524, + "step": 141210 + }, + { + "epoch": 5.97, + "learning_rate": 0.0002033369373953232, + "loss": 0.4095, + "step": 141220 + }, + { + "epoch": 5.97, + "learning_rate": 0.00020329453666603065, + "loss": 0.46, + "step": 141230 + }, + { + "epoch": 5.97, + "learning_rate": 0.00020325213593673814, + "loss": 0.4905, + "step": 141240 + }, + { + "epoch": 5.97, + "learning_rate": 0.00020320973520744558, + "loss": 0.4158, + "step": 141250 + }, + { + "epoch": 5.97, + "learning_rate": 0.00020316733447815302, + "loss": 0.4762, + "step": 141260 + }, + { + "epoch": 5.97, + "learning_rate": 0.00020312493374886051, + "loss": 0.4825, + "step": 141270 + }, + { + "epoch": 5.97, + "learning_rate": 0.00020308253301956795, + "loss": 0.4171, + "step": 141280 + }, + { + "epoch": 5.97, + "learning_rate": 0.0002030401322902754, + "loss": 0.4303, + "step": 141290 + }, + { + "epoch": 5.98, + "learning_rate": 0.00020299773156098286, + "loss": 0.4421, + "step": 141300 + }, + { + "epoch": 5.98, + "learning_rate": 0.00020295533083169033, + "loss": 0.4495, + "step": 141310 + }, + { + "epoch": 5.98, + "learning_rate": 0.00020291293010239777, + "loss": 0.4548, + "step": 141320 + }, + { + "epoch": 5.98, + "learning_rate": 0.00020287052937310523, + "loss": 0.4955, + "step": 141330 + }, + { + "epoch": 5.98, + "learning_rate": 0.00020282812864381267, + "loss": 0.4924, + "step": 141340 + }, + { + "epoch": 5.98, + "learning_rate": 0.00020278572791452014, + "loss": 0.4803, + "step": 141350 + }, + { + "epoch": 5.98, + "learning_rate": 0.0002027433271852276, + "loss": 0.4846, + "step": 141360 + }, + { + "epoch": 5.98, + "learning_rate": 0.00020270092645593505, + "loss": 0.539, + "step": 141370 + }, + { + "epoch": 5.98, + "learning_rate": 0.00020265852572664251, + "loss": 0.4754, + "step": 141380 + }, + { + "epoch": 5.98, + "learning_rate": 0.00020261612499734995, + "loss": 0.3854, + "step": 141390 + }, + { + "epoch": 5.98, + "learning_rate": 0.00020257372426805742, + "loss": 0.4309, + "step": 141400 + }, + { + "epoch": 5.98, + "learning_rate": 0.00020253132353876486, + "loss": 0.5001, + "step": 141410 + }, + { + "epoch": 5.98, + "learning_rate": 0.00020248892280947233, + "loss": 0.4951, + "step": 141420 + }, + { + "epoch": 5.98, + "learning_rate": 0.0002024465220801798, + "loss": 0.4696, + "step": 141430 + }, + { + "epoch": 5.98, + "learning_rate": 0.00020240412135088723, + "loss": 0.427, + "step": 141440 + }, + { + "epoch": 5.98, + "learning_rate": 0.00020236172062159467, + "loss": 0.4574, + "step": 141450 + }, + { + "epoch": 5.98, + "learning_rate": 0.00020231931989230217, + "loss": 0.382, + "step": 141460 + }, + { + "epoch": 5.98, + "learning_rate": 0.0002022769191630096, + "loss": 0.4455, + "step": 141470 + }, + { + "epoch": 5.98, + "learning_rate": 0.00020223451843371705, + "loss": 0.5587, + "step": 141480 + }, + { + "epoch": 5.98, + "learning_rate": 0.00020219211770442454, + "loss": 0.5636, + "step": 141490 + }, + { + "epoch": 5.98, + "learning_rate": 0.00020214971697513198, + "loss": 0.4108, + "step": 141500 + }, + { + "epoch": 5.98, + "learning_rate": 0.00020210731624583942, + "loss": 0.4447, + "step": 141510 + }, + { + "epoch": 5.98, + "learning_rate": 0.00020206491551654691, + "loss": 0.5121, + "step": 141520 + }, + { + "epoch": 5.99, + "learning_rate": 0.00020202251478725435, + "loss": 0.3978, + "step": 141530 + }, + { + "epoch": 5.99, + "learning_rate": 0.0002019801140579618, + "loss": 0.4973, + "step": 141540 + }, + { + "epoch": 5.99, + "learning_rate": 0.00020193771332866923, + "loss": 0.4394, + "step": 141550 + }, + { + "epoch": 5.99, + "learning_rate": 0.00020189531259937673, + "loss": 0.4478, + "step": 141560 + }, + { + "epoch": 5.99, + "learning_rate": 0.00020185291187008417, + "loss": 0.4161, + "step": 141570 + }, + { + "epoch": 5.99, + "learning_rate": 0.0002018105111407916, + "loss": 0.3954, + "step": 141580 + }, + { + "epoch": 5.99, + "learning_rate": 0.0002017681104114991, + "loss": 0.4452, + "step": 141590 + }, + { + "epoch": 5.99, + "learning_rate": 0.00020172570968220654, + "loss": 0.4731, + "step": 141600 + }, + { + "epoch": 5.99, + "learning_rate": 0.00020168330895291398, + "loss": 0.4756, + "step": 141610 + }, + { + "epoch": 5.99, + "learning_rate": 0.00020164090822362147, + "loss": 0.4408, + "step": 141620 + }, + { + "epoch": 5.99, + "learning_rate": 0.0002015985074943289, + "loss": 0.443, + "step": 141630 + }, + { + "epoch": 5.99, + "learning_rate": 0.00020155610676503635, + "loss": 0.4205, + "step": 141640 + }, + { + "epoch": 5.99, + "learning_rate": 0.00020151370603574385, + "loss": 0.4268, + "step": 141650 + }, + { + "epoch": 5.99, + "learning_rate": 0.00020147130530645129, + "loss": 0.3971, + "step": 141660 + }, + { + "epoch": 5.99, + "learning_rate": 0.00020142890457715873, + "loss": 0.5806, + "step": 141670 + }, + { + "epoch": 5.99, + "learning_rate": 0.00020138650384786622, + "loss": 0.5053, + "step": 141680 + }, + { + "epoch": 5.99, + "learning_rate": 0.00020134410311857366, + "loss": 0.4947, + "step": 141690 + }, + { + "epoch": 5.99, + "learning_rate": 0.0002013017023892811, + "loss": 0.4715, + "step": 141700 + }, + { + "epoch": 5.99, + "learning_rate": 0.00020125930165998854, + "loss": 0.5126, + "step": 141710 + }, + { + "epoch": 5.99, + "learning_rate": 0.00020121690093069603, + "loss": 0.4659, + "step": 141720 + }, + { + "epoch": 5.99, + "learning_rate": 0.00020117450020140347, + "loss": 0.5113, + "step": 141730 + }, + { + "epoch": 5.99, + "learning_rate": 0.0002011320994721109, + "loss": 0.4932, + "step": 141740 + }, + { + "epoch": 5.99, + "learning_rate": 0.0002010896987428184, + "loss": 0.4056, + "step": 141750 + }, + { + "epoch": 5.99, + "learning_rate": 0.00020104729801352585, + "loss": 0.5303, + "step": 141760 + }, + { + "epoch": 6.0, + "learning_rate": 0.00020100489728423329, + "loss": 0.4841, + "step": 141770 + }, + { + "epoch": 6.0, + "learning_rate": 0.00020096249655494078, + "loss": 0.5178, + "step": 141780 + }, + { + "epoch": 6.0, + "learning_rate": 0.00020092009582564822, + "loss": 0.4291, + "step": 141790 + }, + { + "epoch": 6.0, + "learning_rate": 0.00020087769509635566, + "loss": 0.483, + "step": 141800 + }, + { + "epoch": 6.0, + "learning_rate": 0.00020083529436706315, + "loss": 0.5078, + "step": 141810 + }, + { + "epoch": 6.0, + "learning_rate": 0.0002007928936377706, + "loss": 0.5337, + "step": 141820 + }, + { + "epoch": 6.0, + "learning_rate": 0.00020075049290847803, + "loss": 0.442, + "step": 141830 + }, + { + "epoch": 6.0, + "learning_rate": 0.00020070809217918547, + "loss": 0.4391, + "step": 141840 + }, + { + "epoch": 6.0, + "learning_rate": 0.00020066569144989297, + "loss": 0.5012, + "step": 141850 + }, + { + "epoch": 6.0, + "learning_rate": 0.0002006232907206004, + "loss": 0.5113, + "step": 141860 + }, + { + "epoch": 6.0, + "learning_rate": 0.00020058088999130784, + "loss": 0.4494, + "step": 141870 + }, + { + "epoch": 6.0, + "learning_rate": 0.00020053848926201534, + "loss": 0.4982, + "step": 141880 + }, + { + "epoch": 6.0, + "learning_rate": 0.00020049608853272278, + "loss": 0.3851, + "step": 141890 + }, + { + "epoch": 6.0, + "learning_rate": 0.00020045368780343022, + "loss": 0.3998, + "step": 141900 + }, + { + "epoch": 6.0, + "learning_rate": 0.0002004112870741377, + "loss": 0.4652, + "step": 141910 + }, + { + "epoch": 6.0, + "learning_rate": 0.00020036888634484515, + "loss": 0.4644, + "step": 141920 + }, + { + "epoch": 6.0, + "learning_rate": 0.0002003264856155526, + "loss": 0.4457, + "step": 141930 + }, + { + "epoch": 6.0, + "learning_rate": 0.00020028408488626006, + "loss": 0.4183, + "step": 141940 + }, + { + "epoch": 6.0, + "learning_rate": 0.00020024168415696752, + "loss": 0.3659, + "step": 141950 + }, + { + "epoch": 6.0, + "learning_rate": 0.00020019928342767496, + "loss": 0.408, + "step": 141960 + }, + { + "epoch": 6.0, + "learning_rate": 0.00020015688269838243, + "loss": 0.3959, + "step": 141970 + }, + { + "epoch": 6.0, + "learning_rate": 0.00020011448196908987, + "loss": 0.3814, + "step": 141980 + }, + { + "epoch": 6.0, + "learning_rate": 0.00020007208123979734, + "loss": 0.3989, + "step": 141990 + }, + { + "epoch": 6.0, + "learning_rate": 0.00020002968051050478, + "loss": 0.3814, + "step": 142000 + }, + { + "epoch": 6.01, + "learning_rate": 0.00019998727978121224, + "loss": 0.417, + "step": 142010 + }, + { + "epoch": 6.01, + "learning_rate": 0.0001999448790519197, + "loss": 0.3782, + "step": 142020 + }, + { + "epoch": 6.01, + "learning_rate": 0.00019990247832262715, + "loss": 0.4113, + "step": 142030 + }, + { + "epoch": 6.01, + "learning_rate": 0.00019986007759333462, + "loss": 0.4304, + "step": 142040 + }, + { + "epoch": 6.01, + "learning_rate": 0.00019981767686404206, + "loss": 0.3995, + "step": 142050 + }, + { + "epoch": 6.01, + "learning_rate": 0.00019977527613474952, + "loss": 0.3925, + "step": 142060 + }, + { + "epoch": 6.01, + "learning_rate": 0.00019973287540545696, + "loss": 0.3916, + "step": 142070 + }, + { + "epoch": 6.01, + "learning_rate": 0.00019969047467616443, + "loss": 0.3546, + "step": 142080 + }, + { + "epoch": 6.01, + "learning_rate": 0.0001996480739468719, + "loss": 0.4423, + "step": 142090 + }, + { + "epoch": 6.01, + "learning_rate": 0.00019960567321757934, + "loss": 0.4288, + "step": 142100 + }, + { + "epoch": 6.01, + "learning_rate": 0.0001995632724882868, + "loss": 0.4947, + "step": 142110 + }, + { + "epoch": 6.01, + "learning_rate": 0.00019952087175899424, + "loss": 0.3974, + "step": 142120 + }, + { + "epoch": 6.01, + "learning_rate": 0.0001994784710297017, + "loss": 0.4346, + "step": 142130 + }, + { + "epoch": 6.01, + "learning_rate": 0.00019943607030040918, + "loss": 0.4068, + "step": 142140 + }, + { + "epoch": 6.01, + "learning_rate": 0.00019939366957111662, + "loss": 0.4233, + "step": 142150 + }, + { + "epoch": 6.01, + "learning_rate": 0.00019935126884182408, + "loss": 0.4621, + "step": 142160 + }, + { + "epoch": 6.01, + "learning_rate": 0.00019930886811253155, + "loss": 0.415, + "step": 142170 + }, + { + "epoch": 6.01, + "learning_rate": 0.000199266467383239, + "loss": 0.4705, + "step": 142180 + }, + { + "epoch": 6.01, + "learning_rate": 0.00019922406665394646, + "loss": 0.3809, + "step": 142190 + }, + { + "epoch": 6.01, + "learning_rate": 0.0001991816659246539, + "loss": 0.3868, + "step": 142200 + }, + { + "epoch": 6.01, + "learning_rate": 0.00019913926519536136, + "loss": 0.4102, + "step": 142210 + }, + { + "epoch": 6.01, + "learning_rate": 0.00019909686446606883, + "loss": 0.4405, + "step": 142220 + }, + { + "epoch": 6.01, + "learning_rate": 0.00019905446373677627, + "loss": 0.3791, + "step": 142230 + }, + { + "epoch": 6.02, + "learning_rate": 0.00019901206300748374, + "loss": 0.3668, + "step": 142240 + }, + { + "epoch": 6.02, + "learning_rate": 0.0001989696622781912, + "loss": 0.3944, + "step": 142250 + }, + { + "epoch": 6.02, + "learning_rate": 0.00019892726154889864, + "loss": 0.44, + "step": 142260 + }, + { + "epoch": 6.02, + "learning_rate": 0.0001988848608196061, + "loss": 0.3905, + "step": 142270 + }, + { + "epoch": 6.02, + "learning_rate": 0.00019884246009031355, + "loss": 0.3644, + "step": 142280 + }, + { + "epoch": 6.02, + "learning_rate": 0.00019880005936102102, + "loss": 0.4325, + "step": 142290 + }, + { + "epoch": 6.02, + "learning_rate": 0.00019875765863172848, + "loss": 0.447, + "step": 142300 + }, + { + "epoch": 6.02, + "learning_rate": 0.00019871525790243592, + "loss": 0.4266, + "step": 142310 + }, + { + "epoch": 6.02, + "learning_rate": 0.0001986728571731434, + "loss": 0.3616, + "step": 142320 + }, + { + "epoch": 6.02, + "learning_rate": 0.00019863045644385086, + "loss": 0.3751, + "step": 142330 + }, + { + "epoch": 6.02, + "learning_rate": 0.0001985880557145583, + "loss": 0.424, + "step": 142340 + }, + { + "epoch": 6.02, + "learning_rate": 0.00019854565498526576, + "loss": 0.4086, + "step": 142350 + }, + { + "epoch": 6.02, + "learning_rate": 0.0001985032542559732, + "loss": 0.324, + "step": 142360 + }, + { + "epoch": 6.02, + "learning_rate": 0.00019846085352668067, + "loss": 0.5179, + "step": 142370 + }, + { + "epoch": 6.02, + "learning_rate": 0.00019841845279738814, + "loss": 0.376, + "step": 142380 + }, + { + "epoch": 6.02, + "learning_rate": 0.00019837605206809558, + "loss": 0.4242, + "step": 142390 + }, + { + "epoch": 6.02, + "learning_rate": 0.00019833365133880304, + "loss": 0.4544, + "step": 142400 + }, + { + "epoch": 6.02, + "learning_rate": 0.0001982912506095105, + "loss": 0.313, + "step": 142410 + }, + { + "epoch": 6.02, + "learning_rate": 0.00019824884988021795, + "loss": 0.4081, + "step": 142420 + }, + { + "epoch": 6.02, + "learning_rate": 0.00019820644915092542, + "loss": 0.3852, + "step": 142430 + }, + { + "epoch": 6.02, + "learning_rate": 0.00019816404842163286, + "loss": 0.3994, + "step": 142440 + }, + { + "epoch": 6.02, + "learning_rate": 0.00019812164769234032, + "loss": 0.3987, + "step": 142450 + }, + { + "epoch": 6.02, + "learning_rate": 0.0001980792469630478, + "loss": 0.3493, + "step": 142460 + }, + { + "epoch": 6.02, + "learning_rate": 0.00019803684623375523, + "loss": 0.4797, + "step": 142470 + }, + { + "epoch": 6.03, + "learning_rate": 0.0001979944455044627, + "loss": 0.4488, + "step": 142480 + }, + { + "epoch": 6.03, + "learning_rate": 0.00019795204477517016, + "loss": 0.4633, + "step": 142490 + }, + { + "epoch": 6.03, + "learning_rate": 0.0001979096440458776, + "loss": 0.3701, + "step": 142500 + }, + { + "epoch": 6.03, + "learning_rate": 0.00019786724331658507, + "loss": 0.337, + "step": 142510 + }, + { + "epoch": 6.03, + "learning_rate": 0.0001978248425872925, + "loss": 0.3821, + "step": 142520 + }, + { + "epoch": 6.03, + "learning_rate": 0.00019778244185799998, + "loss": 0.4256, + "step": 142530 + }, + { + "epoch": 6.03, + "learning_rate": 0.00019774004112870744, + "loss": 0.4003, + "step": 142540 + }, + { + "epoch": 6.03, + "learning_rate": 0.00019769764039941488, + "loss": 0.4219, + "step": 142550 + }, + { + "epoch": 6.03, + "learning_rate": 0.00019765523967012235, + "loss": 0.4388, + "step": 142560 + }, + { + "epoch": 6.03, + "learning_rate": 0.0001976128389408298, + "loss": 0.4373, + "step": 142570 + }, + { + "epoch": 6.03, + "learning_rate": 0.00019757043821153725, + "loss": 0.4111, + "step": 142580 + }, + { + "epoch": 6.03, + "learning_rate": 0.00019752803748224472, + "loss": 0.3655, + "step": 142590 + }, + { + "epoch": 6.03, + "learning_rate": 0.00019748563675295216, + "loss": 0.3782, + "step": 142600 + }, + { + "epoch": 6.03, + "learning_rate": 0.00019744323602365963, + "loss": 0.4445, + "step": 142610 + }, + { + "epoch": 6.03, + "learning_rate": 0.00019740083529436707, + "loss": 0.4083, + "step": 142620 + }, + { + "epoch": 6.03, + "learning_rate": 0.00019735843456507453, + "loss": 0.4513, + "step": 142630 + }, + { + "epoch": 6.03, + "learning_rate": 0.000197316033835782, + "loss": 0.4055, + "step": 142640 + }, + { + "epoch": 6.03, + "learning_rate": 0.00019727363310648944, + "loss": 0.3697, + "step": 142650 + }, + { + "epoch": 6.03, + "learning_rate": 0.0001972312323771969, + "loss": 0.35, + "step": 142660 + }, + { + "epoch": 6.03, + "learning_rate": 0.00019718883164790435, + "loss": 0.3951, + "step": 142670 + }, + { + "epoch": 6.03, + "learning_rate": 0.00019714643091861181, + "loss": 0.3938, + "step": 142680 + }, + { + "epoch": 6.03, + "learning_rate": 0.00019710403018931925, + "loss": 0.3893, + "step": 142690 + }, + { + "epoch": 6.03, + "learning_rate": 0.00019706162946002672, + "loss": 0.409, + "step": 142700 + }, + { + "epoch": 6.04, + "learning_rate": 0.00019701922873073416, + "loss": 0.3647, + "step": 142710 + }, + { + "epoch": 6.04, + "learning_rate": 0.00019697682800144163, + "loss": 0.3969, + "step": 142720 + }, + { + "epoch": 6.04, + "learning_rate": 0.00019693442727214907, + "loss": 0.3918, + "step": 142730 + }, + { + "epoch": 6.04, + "learning_rate": 0.00019689202654285653, + "loss": 0.453, + "step": 142740 + }, + { + "epoch": 6.04, + "learning_rate": 0.000196849625813564, + "loss": 0.4115, + "step": 142750 + }, + { + "epoch": 6.04, + "learning_rate": 0.00019680722508427144, + "loss": 0.4046, + "step": 142760 + }, + { + "epoch": 6.04, + "learning_rate": 0.0001967648243549789, + "loss": 0.3763, + "step": 142770 + }, + { + "epoch": 6.04, + "learning_rate": 0.00019672242362568637, + "loss": 0.3966, + "step": 142780 + }, + { + "epoch": 6.04, + "learning_rate": 0.0001966800228963938, + "loss": 0.3863, + "step": 142790 + }, + { + "epoch": 6.04, + "learning_rate": 0.00019663762216710128, + "loss": 0.3506, + "step": 142800 + }, + { + "epoch": 6.04, + "learning_rate": 0.00019659522143780872, + "loss": 0.3985, + "step": 142810 + }, + { + "epoch": 6.04, + "learning_rate": 0.0001965528207085162, + "loss": 0.3652, + "step": 142820 + }, + { + "epoch": 6.04, + "learning_rate": 0.00019651041997922365, + "loss": 0.4266, + "step": 142830 + }, + { + "epoch": 6.04, + "learning_rate": 0.0001964680192499311, + "loss": 0.4275, + "step": 142840 + }, + { + "epoch": 6.04, + "learning_rate": 0.00019642561852063856, + "loss": 0.385, + "step": 142850 + }, + { + "epoch": 6.04, + "learning_rate": 0.00019638321779134603, + "loss": 0.4651, + "step": 142860 + }, + { + "epoch": 6.04, + "learning_rate": 0.00019634081706205347, + "loss": 0.4654, + "step": 142870 + }, + { + "epoch": 6.04, + "learning_rate": 0.00019629841633276093, + "loss": 0.3457, + "step": 142880 + }, + { + "epoch": 6.04, + "learning_rate": 0.00019625601560346837, + "loss": 0.3197, + "step": 142890 + }, + { + "epoch": 6.04, + "learning_rate": 0.00019621361487417584, + "loss": 0.3503, + "step": 142900 + }, + { + "epoch": 6.04, + "learning_rate": 0.0001961712141448833, + "loss": 0.3799, + "step": 142910 + }, + { + "epoch": 6.04, + "learning_rate": 0.00019612881341559075, + "loss": 0.3325, + "step": 142920 + }, + { + "epoch": 6.04, + "learning_rate": 0.0001960864126862982, + "loss": 0.4206, + "step": 142930 + }, + { + "epoch": 6.04, + "learning_rate": 0.00019604401195700568, + "loss": 0.4224, + "step": 142940 + }, + { + "epoch": 6.05, + "learning_rate": 0.00019600161122771312, + "loss": 0.4441, + "step": 142950 + }, + { + "epoch": 6.05, + "learning_rate": 0.00019595921049842059, + "loss": 0.3402, + "step": 142960 + }, + { + "epoch": 6.05, + "learning_rate": 0.00019591680976912803, + "loss": 0.383, + "step": 142970 + }, + { + "epoch": 6.05, + "learning_rate": 0.0001958744090398355, + "loss": 0.3584, + "step": 142980 + }, + { + "epoch": 6.05, + "learning_rate": 0.00019583200831054296, + "loss": 0.4178, + "step": 142990 + }, + { + "epoch": 6.05, + "learning_rate": 0.0001957896075812504, + "loss": 0.3716, + "step": 143000 + }, + { + "epoch": 6.05, + "learning_rate": 0.00019574720685195787, + "loss": 0.4914, + "step": 143010 + }, + { + "epoch": 6.05, + "learning_rate": 0.00019570480612266533, + "loss": 0.406, + "step": 143020 + }, + { + "epoch": 6.05, + "learning_rate": 0.00019566240539337277, + "loss": 0.4118, + "step": 143030 + }, + { + "epoch": 6.05, + "learning_rate": 0.00019562000466408024, + "loss": 0.4163, + "step": 143040 + }, + { + "epoch": 6.05, + "learning_rate": 0.00019557760393478768, + "loss": 0.3787, + "step": 143050 + }, + { + "epoch": 6.05, + "learning_rate": 0.00019553520320549515, + "loss": 0.49, + "step": 143060 + }, + { + "epoch": 6.05, + "learning_rate": 0.0001954928024762026, + "loss": 0.4417, + "step": 143070 + }, + { + "epoch": 6.05, + "learning_rate": 0.00019545040174691005, + "loss": 0.425, + "step": 143080 + }, + { + "epoch": 6.05, + "learning_rate": 0.00019540800101761752, + "loss": 0.374, + "step": 143090 + }, + { + "epoch": 6.05, + "learning_rate": 0.00019536560028832499, + "loss": 0.4207, + "step": 143100 + }, + { + "epoch": 6.05, + "learning_rate": 0.00019532319955903243, + "loss": 0.4257, + "step": 143110 + }, + { + "epoch": 6.05, + "learning_rate": 0.0001952807988297399, + "loss": 0.3861, + "step": 143120 + }, + { + "epoch": 6.05, + "learning_rate": 0.00019523839810044733, + "loss": 0.3486, + "step": 143130 + }, + { + "epoch": 6.05, + "learning_rate": 0.0001951959973711548, + "loss": 0.4175, + "step": 143140 + }, + { + "epoch": 6.05, + "learning_rate": 0.00019515359664186227, + "loss": 0.4193, + "step": 143150 + }, + { + "epoch": 6.05, + "learning_rate": 0.0001951111959125697, + "loss": 0.4292, + "step": 143160 + }, + { + "epoch": 6.05, + "learning_rate": 0.00019506879518327717, + "loss": 0.3514, + "step": 143170 + }, + { + "epoch": 6.05, + "learning_rate": 0.00019502639445398464, + "loss": 0.3567, + "step": 143180 + }, + { + "epoch": 6.06, + "learning_rate": 0.00019498399372469208, + "loss": 0.4268, + "step": 143190 + }, + { + "epoch": 6.06, + "learning_rate": 0.00019494159299539955, + "loss": 0.3809, + "step": 143200 + }, + { + "epoch": 6.06, + "learning_rate": 0.00019489919226610698, + "loss": 0.3889, + "step": 143210 + }, + { + "epoch": 6.06, + "learning_rate": 0.00019485679153681445, + "loss": 0.4764, + "step": 143220 + }, + { + "epoch": 6.06, + "learning_rate": 0.00019481439080752192, + "loss": 0.3665, + "step": 143230 + }, + { + "epoch": 6.06, + "learning_rate": 0.00019477199007822936, + "loss": 0.3566, + "step": 143240 + }, + { + "epoch": 6.06, + "learning_rate": 0.00019472958934893682, + "loss": 0.3788, + "step": 143250 + }, + { + "epoch": 6.06, + "learning_rate": 0.00019468718861964426, + "loss": 0.4452, + "step": 143260 + }, + { + "epoch": 6.06, + "learning_rate": 0.00019464478789035173, + "loss": 0.3491, + "step": 143270 + }, + { + "epoch": 6.06, + "learning_rate": 0.0001946023871610592, + "loss": 0.4318, + "step": 143280 + }, + { + "epoch": 6.06, + "learning_rate": 0.00019455998643176664, + "loss": 0.3821, + "step": 143290 + }, + { + "epoch": 6.06, + "learning_rate": 0.0001945175857024741, + "loss": 0.3312, + "step": 143300 + }, + { + "epoch": 6.06, + "learning_rate": 0.00019447518497318154, + "loss": 0.3606, + "step": 143310 + }, + { + "epoch": 6.06, + "learning_rate": 0.000194432784243889, + "loss": 0.3916, + "step": 143320 + }, + { + "epoch": 6.06, + "learning_rate": 0.00019439038351459645, + "loss": 0.3984, + "step": 143330 + }, + { + "epoch": 6.06, + "learning_rate": 0.00019434798278530392, + "loss": 0.4117, + "step": 143340 + }, + { + "epoch": 6.06, + "learning_rate": 0.00019430558205601136, + "loss": 0.3753, + "step": 143350 + }, + { + "epoch": 6.06, + "learning_rate": 0.00019426318132671882, + "loss": 0.465, + "step": 143360 + }, + { + "epoch": 6.06, + "learning_rate": 0.00019422078059742626, + "loss": 0.4029, + "step": 143370 + }, + { + "epoch": 6.06, + "learning_rate": 0.00019417837986813373, + "loss": 0.4144, + "step": 143380 + }, + { + "epoch": 6.06, + "learning_rate": 0.0001941359791388412, + "loss": 0.3753, + "step": 143390 + }, + { + "epoch": 6.06, + "learning_rate": 0.00019409357840954864, + "loss": 0.355, + "step": 143400 + }, + { + "epoch": 6.06, + "learning_rate": 0.0001940511776802561, + "loss": 0.3914, + "step": 143410 + }, + { + "epoch": 6.07, + "learning_rate": 0.00019400877695096354, + "loss": 0.4783, + "step": 143420 + }, + { + "epoch": 6.07, + "learning_rate": 0.000193966376221671, + "loss": 0.3892, + "step": 143430 + }, + { + "epoch": 6.07, + "learning_rate": 0.00019392397549237848, + "loss": 0.4223, + "step": 143440 + }, + { + "epoch": 6.07, + "learning_rate": 0.00019388157476308592, + "loss": 0.4712, + "step": 143450 + }, + { + "epoch": 6.07, + "learning_rate": 0.00019383917403379338, + "loss": 0.3834, + "step": 143460 + }, + { + "epoch": 6.07, + "learning_rate": 0.00019379677330450085, + "loss": 0.4228, + "step": 143470 + }, + { + "epoch": 6.07, + "learning_rate": 0.0001937543725752083, + "loss": 0.4347, + "step": 143480 + }, + { + "epoch": 6.07, + "learning_rate": 0.00019371197184591576, + "loss": 0.395, + "step": 143490 + }, + { + "epoch": 6.07, + "learning_rate": 0.0001936695711166232, + "loss": 0.4071, + "step": 143500 + }, + { + "epoch": 6.07, + "learning_rate": 0.00019362717038733066, + "loss": 0.4367, + "step": 143510 + }, + { + "epoch": 6.07, + "learning_rate": 0.00019358476965803813, + "loss": 0.3883, + "step": 143520 + }, + { + "epoch": 6.07, + "learning_rate": 0.00019354236892874557, + "loss": 0.4616, + "step": 143530 + }, + { + "epoch": 6.07, + "learning_rate": 0.00019349996819945304, + "loss": 0.393, + "step": 143540 + }, + { + "epoch": 6.07, + "learning_rate": 0.0001934575674701605, + "loss": 0.3386, + "step": 143550 + }, + { + "epoch": 6.07, + "learning_rate": 0.00019341516674086794, + "loss": 0.45, + "step": 143560 + }, + { + "epoch": 6.07, + "learning_rate": 0.0001933727660115754, + "loss": 0.478, + "step": 143570 + }, + { + "epoch": 6.07, + "learning_rate": 0.00019333036528228285, + "loss": 0.3925, + "step": 143580 + }, + { + "epoch": 6.07, + "learning_rate": 0.00019328796455299032, + "loss": 0.3823, + "step": 143590 + }, + { + "epoch": 6.07, + "learning_rate": 0.00019324556382369778, + "loss": 0.3828, + "step": 143600 + }, + { + "epoch": 6.07, + "learning_rate": 0.00019320316309440522, + "loss": 0.4873, + "step": 143610 + }, + { + "epoch": 6.07, + "learning_rate": 0.0001931607623651127, + "loss": 0.4831, + "step": 143620 + }, + { + "epoch": 6.07, + "learning_rate": 0.00019311836163582016, + "loss": 0.3488, + "step": 143630 + }, + { + "epoch": 6.07, + "learning_rate": 0.0001930759609065276, + "loss": 0.3522, + "step": 143640 + }, + { + "epoch": 6.07, + "learning_rate": 0.00019303356017723506, + "loss": 0.4414, + "step": 143650 + }, + { + "epoch": 6.08, + "learning_rate": 0.0001929911594479425, + "loss": 0.4216, + "step": 143660 + }, + { + "epoch": 6.08, + "learning_rate": 0.00019294875871864997, + "loss": 0.4154, + "step": 143670 + }, + { + "epoch": 6.08, + "learning_rate": 0.00019290635798935744, + "loss": 0.4816, + "step": 143680 + }, + { + "epoch": 6.08, + "learning_rate": 0.00019286395726006488, + "loss": 0.4875, + "step": 143690 + }, + { + "epoch": 6.08, + "learning_rate": 0.00019282155653077234, + "loss": 0.4278, + "step": 143700 + }, + { + "epoch": 6.08, + "learning_rate": 0.0001927791558014798, + "loss": 0.4589, + "step": 143710 + }, + { + "epoch": 6.08, + "learning_rate": 0.00019273675507218725, + "loss": 0.4597, + "step": 143720 + }, + { + "epoch": 6.08, + "learning_rate": 0.00019269435434289472, + "loss": 0.3599, + "step": 143730 + }, + { + "epoch": 6.08, + "learning_rate": 0.00019265195361360216, + "loss": 0.3937, + "step": 143740 + }, + { + "epoch": 6.08, + "learning_rate": 0.00019260955288430962, + "loss": 0.3842, + "step": 143750 + }, + { + "epoch": 6.08, + "learning_rate": 0.0001925671521550171, + "loss": 0.3533, + "step": 143760 + }, + { + "epoch": 6.08, + "learning_rate": 0.00019252475142572453, + "loss": 0.4035, + "step": 143770 + }, + { + "epoch": 6.08, + "learning_rate": 0.000192482350696432, + "loss": 0.377, + "step": 143780 + }, + { + "epoch": 6.08, + "learning_rate": 0.00019243994996713946, + "loss": 0.4049, + "step": 143790 + }, + { + "epoch": 6.08, + "learning_rate": 0.0001923975492378469, + "loss": 0.3367, + "step": 143800 + }, + { + "epoch": 6.08, + "learning_rate": 0.00019235514850855437, + "loss": 0.3727, + "step": 143810 + }, + { + "epoch": 6.08, + "learning_rate": 0.0001923127477792618, + "loss": 0.4536, + "step": 143820 + }, + { + "epoch": 6.08, + "learning_rate": 0.00019227034704996928, + "loss": 0.4978, + "step": 143830 + }, + { + "epoch": 6.08, + "learning_rate": 0.00019222794632067674, + "loss": 0.3582, + "step": 143840 + }, + { + "epoch": 6.08, + "learning_rate": 0.00019218554559138418, + "loss": 0.3485, + "step": 143850 + }, + { + "epoch": 6.08, + "learning_rate": 0.00019214314486209165, + "loss": 0.4139, + "step": 143860 + }, + { + "epoch": 6.08, + "learning_rate": 0.00019210074413279912, + "loss": 0.4689, + "step": 143870 + }, + { + "epoch": 6.08, + "learning_rate": 0.00019205834340350656, + "loss": 0.4615, + "step": 143880 + }, + { + "epoch": 6.08, + "learning_rate": 0.00019201594267421402, + "loss": 0.3773, + "step": 143890 + }, + { + "epoch": 6.09, + "learning_rate": 0.00019197354194492146, + "loss": 0.4557, + "step": 143900 + }, + { + "epoch": 6.09, + "learning_rate": 0.00019193114121562893, + "loss": 0.4496, + "step": 143910 + }, + { + "epoch": 6.09, + "learning_rate": 0.0001918887404863364, + "loss": 0.3796, + "step": 143920 + }, + { + "epoch": 6.09, + "learning_rate": 0.00019184633975704383, + "loss": 0.4902, + "step": 143930 + }, + { + "epoch": 6.09, + "learning_rate": 0.0001918039390277513, + "loss": 0.4156, + "step": 143940 + }, + { + "epoch": 6.09, + "learning_rate": 0.00019176153829845874, + "loss": 0.4085, + "step": 143950 + }, + { + "epoch": 6.09, + "learning_rate": 0.0001917191375691662, + "loss": 0.3926, + "step": 143960 + }, + { + "epoch": 6.09, + "learning_rate": 0.00019167673683987365, + "loss": 0.4005, + "step": 143970 + }, + { + "epoch": 6.09, + "learning_rate": 0.00019163433611058111, + "loss": 0.3337, + "step": 143980 + }, + { + "epoch": 6.09, + "learning_rate": 0.00019159193538128855, + "loss": 0.4086, + "step": 143990 + }, + { + "epoch": 6.09, + "learning_rate": 0.00019154953465199602, + "loss": 0.3994, + "step": 144000 + }, + { + "epoch": 6.09, + "learning_rate": 0.0001915071339227035, + "loss": 0.4026, + "step": 144010 + }, + { + "epoch": 6.09, + "learning_rate": 0.00019146473319341093, + "loss": 0.3769, + "step": 144020 + }, + { + "epoch": 6.09, + "learning_rate": 0.0001914223324641184, + "loss": 0.4473, + "step": 144030 + }, + { + "epoch": 6.09, + "learning_rate": 0.00019137993173482583, + "loss": 0.4743, + "step": 144040 + }, + { + "epoch": 6.09, + "learning_rate": 0.0001913375310055333, + "loss": 0.4121, + "step": 144050 + }, + { + "epoch": 6.09, + "learning_rate": 0.00019129513027624074, + "loss": 0.395, + "step": 144060 + }, + { + "epoch": 6.09, + "learning_rate": 0.0001912527295469482, + "loss": 0.4219, + "step": 144070 + }, + { + "epoch": 6.09, + "learning_rate": 0.00019121032881765567, + "loss": 0.3769, + "step": 144080 + }, + { + "epoch": 6.09, + "learning_rate": 0.00019116792808836311, + "loss": 0.4029, + "step": 144090 + }, + { + "epoch": 6.09, + "learning_rate": 0.00019112552735907058, + "loss": 0.4074, + "step": 144100 + }, + { + "epoch": 6.09, + "learning_rate": 0.00019108312662977802, + "loss": 0.3609, + "step": 144110 + }, + { + "epoch": 6.09, + "learning_rate": 0.0001910407259004855, + "loss": 0.4157, + "step": 144120 + }, + { + "epoch": 6.1, + "learning_rate": 0.00019099832517119295, + "loss": 0.3623, + "step": 144130 + }, + { + "epoch": 6.1, + "learning_rate": 0.0001909559244419004, + "loss": 0.3637, + "step": 144140 + }, + { + "epoch": 6.1, + "learning_rate": 0.00019091352371260786, + "loss": 0.4978, + "step": 144150 + }, + { + "epoch": 6.1, + "learning_rate": 0.00019087112298331533, + "loss": 0.4247, + "step": 144160 + }, + { + "epoch": 6.1, + "learning_rate": 0.00019082872225402277, + "loss": 0.4548, + "step": 144170 + }, + { + "epoch": 6.1, + "learning_rate": 0.00019078632152473023, + "loss": 0.4758, + "step": 144180 + }, + { + "epoch": 6.1, + "learning_rate": 0.00019074392079543767, + "loss": 0.4737, + "step": 144190 + }, + { + "epoch": 6.1, + "learning_rate": 0.00019070152006614514, + "loss": 0.3734, + "step": 144200 + }, + { + "epoch": 6.1, + "learning_rate": 0.0001906591193368526, + "loss": 0.4585, + "step": 144210 + }, + { + "epoch": 6.1, + "learning_rate": 0.00019061671860756005, + "loss": 0.4382, + "step": 144220 + }, + { + "epoch": 6.1, + "learning_rate": 0.0001905743178782675, + "loss": 0.3986, + "step": 144230 + }, + { + "epoch": 6.1, + "learning_rate": 0.00019053191714897498, + "loss": 0.408, + "step": 144240 + }, + { + "epoch": 6.1, + "learning_rate": 0.00019048951641968242, + "loss": 0.4543, + "step": 144250 + }, + { + "epoch": 6.1, + "learning_rate": 0.0001904471156903899, + "loss": 0.4397, + "step": 144260 + }, + { + "epoch": 6.1, + "learning_rate": 0.00019040471496109733, + "loss": 0.4121, + "step": 144270 + }, + { + "epoch": 6.1, + "learning_rate": 0.0001903623142318048, + "loss": 0.4448, + "step": 144280 + }, + { + "epoch": 6.1, + "learning_rate": 0.00019031991350251226, + "loss": 0.3833, + "step": 144290 + }, + { + "epoch": 6.1, + "learning_rate": 0.0001902775127732197, + "loss": 0.3725, + "step": 144300 + }, + { + "epoch": 6.1, + "learning_rate": 0.00019023511204392717, + "loss": 0.3544, + "step": 144310 + }, + { + "epoch": 6.1, + "learning_rate": 0.00019019271131463463, + "loss": 0.3441, + "step": 144320 + }, + { + "epoch": 6.1, + "learning_rate": 0.00019015031058534207, + "loss": 0.3425, + "step": 144330 + }, + { + "epoch": 6.1, + "learning_rate": 0.00019010790985604954, + "loss": 0.4337, + "step": 144340 + }, + { + "epoch": 6.1, + "learning_rate": 0.00019006550912675698, + "loss": 0.3828, + "step": 144350 + }, + { + "epoch": 6.1, + "learning_rate": 0.00019002310839746445, + "loss": 0.4379, + "step": 144360 + }, + { + "epoch": 6.11, + "learning_rate": 0.0001899807076681719, + "loss": 0.4271, + "step": 144370 + }, + { + "epoch": 6.11, + "learning_rate": 0.00018993830693887935, + "loss": 0.3987, + "step": 144380 + }, + { + "epoch": 6.11, + "learning_rate": 0.00018989590620958682, + "loss": 0.4131, + "step": 144390 + }, + { + "epoch": 6.11, + "learning_rate": 0.00018985350548029429, + "loss": 0.3887, + "step": 144400 + }, + { + "epoch": 6.11, + "learning_rate": 0.00018981110475100173, + "loss": 0.4537, + "step": 144410 + }, + { + "epoch": 6.11, + "learning_rate": 0.0001897687040217092, + "loss": 0.4587, + "step": 144420 + }, + { + "epoch": 6.11, + "learning_rate": 0.00018972630329241663, + "loss": 0.3811, + "step": 144430 + }, + { + "epoch": 6.11, + "learning_rate": 0.0001896839025631241, + "loss": 0.4048, + "step": 144440 + }, + { + "epoch": 6.11, + "learning_rate": 0.00018964150183383157, + "loss": 0.3693, + "step": 144450 + }, + { + "epoch": 6.11, + "learning_rate": 0.000189599101104539, + "loss": 0.3903, + "step": 144460 + }, + { + "epoch": 6.11, + "learning_rate": 0.00018955670037524647, + "loss": 0.3941, + "step": 144470 + }, + { + "epoch": 6.11, + "learning_rate": 0.00018951429964595394, + "loss": 0.5154, + "step": 144480 + }, + { + "epoch": 6.11, + "learning_rate": 0.00018947189891666138, + "loss": 0.3756, + "step": 144490 + }, + { + "epoch": 6.11, + "learning_rate": 0.00018942949818736885, + "loss": 0.4005, + "step": 144500 + }, + { + "epoch": 6.11, + "learning_rate": 0.00018938709745807629, + "loss": 0.4222, + "step": 144510 + }, + { + "epoch": 6.11, + "learning_rate": 0.00018934469672878375, + "loss": 0.4088, + "step": 144520 + }, + { + "epoch": 6.11, + "learning_rate": 0.00018930229599949122, + "loss": 0.3773, + "step": 144530 + }, + { + "epoch": 6.11, + "learning_rate": 0.00018925989527019866, + "loss": 0.36, + "step": 144540 + }, + { + "epoch": 6.11, + "learning_rate": 0.00018921749454090613, + "loss": 0.3481, + "step": 144550 + }, + { + "epoch": 6.11, + "learning_rate": 0.0001891750938116136, + "loss": 0.4042, + "step": 144560 + }, + { + "epoch": 6.11, + "learning_rate": 0.00018913269308232103, + "loss": 0.419, + "step": 144570 + }, + { + "epoch": 6.11, + "learning_rate": 0.0001890902923530285, + "loss": 0.412, + "step": 144580 + }, + { + "epoch": 6.11, + "learning_rate": 0.00018904789162373594, + "loss": 0.413, + "step": 144590 + }, + { + "epoch": 6.11, + "learning_rate": 0.0001890054908944434, + "loss": 0.3914, + "step": 144600 + }, + { + "epoch": 6.12, + "learning_rate": 0.00018896309016515084, + "loss": 0.457, + "step": 144610 + }, + { + "epoch": 6.12, + "learning_rate": 0.0001889206894358583, + "loss": 0.4394, + "step": 144620 + }, + { + "epoch": 6.12, + "learning_rate": 0.00018887828870656578, + "loss": 0.4843, + "step": 144630 + }, + { + "epoch": 6.12, + "learning_rate": 0.00018883588797727322, + "loss": 0.3387, + "step": 144640 + }, + { + "epoch": 6.12, + "learning_rate": 0.00018879348724798068, + "loss": 0.3744, + "step": 144650 + }, + { + "epoch": 6.12, + "learning_rate": 0.00018875108651868812, + "loss": 0.4289, + "step": 144660 + }, + { + "epoch": 6.12, + "learning_rate": 0.0001887086857893956, + "loss": 0.3899, + "step": 144670 + }, + { + "epoch": 6.12, + "learning_rate": 0.00018866628506010303, + "loss": 0.4657, + "step": 144680 + }, + { + "epoch": 6.12, + "learning_rate": 0.0001886238843308105, + "loss": 0.4419, + "step": 144690 + }, + { + "epoch": 6.12, + "learning_rate": 0.00018858148360151794, + "loss": 0.4014, + "step": 144700 + }, + { + "epoch": 6.12, + "learning_rate": 0.0001885390828722254, + "loss": 0.4231, + "step": 144710 + }, + { + "epoch": 6.12, + "learning_rate": 0.00018849668214293284, + "loss": 0.372, + "step": 144720 + }, + { + "epoch": 6.12, + "learning_rate": 0.0001884542814136403, + "loss": 0.4059, + "step": 144730 + }, + { + "epoch": 6.12, + "learning_rate": 0.00018841188068434778, + "loss": 0.3875, + "step": 144740 + }, + { + "epoch": 6.12, + "learning_rate": 0.00018836947995505522, + "loss": 0.3717, + "step": 144750 + }, + { + "epoch": 6.12, + "learning_rate": 0.00018832707922576268, + "loss": 0.4321, + "step": 144760 + }, + { + "epoch": 6.12, + "learning_rate": 0.00018828467849647015, + "loss": 0.3869, + "step": 144770 + }, + { + "epoch": 6.12, + "learning_rate": 0.0001882422777671776, + "loss": 0.4149, + "step": 144780 + }, + { + "epoch": 6.12, + "learning_rate": 0.00018819987703788506, + "loss": 0.4253, + "step": 144790 + }, + { + "epoch": 6.12, + "learning_rate": 0.0001881574763085925, + "loss": 0.376, + "step": 144800 + }, + { + "epoch": 6.12, + "learning_rate": 0.00018811507557929996, + "loss": 0.3834, + "step": 144810 + }, + { + "epoch": 6.12, + "learning_rate": 0.00018807267485000743, + "loss": 0.3722, + "step": 144820 + }, + { + "epoch": 6.12, + "learning_rate": 0.00018803027412071487, + "loss": 0.4209, + "step": 144830 + }, + { + "epoch": 6.13, + "learning_rate": 0.00018798787339142234, + "loss": 0.4268, + "step": 144840 + }, + { + "epoch": 6.13, + "learning_rate": 0.0001879454726621298, + "loss": 0.4235, + "step": 144850 + }, + { + "epoch": 6.13, + "learning_rate": 0.00018790307193283724, + "loss": 0.3711, + "step": 144860 + }, + { + "epoch": 6.13, + "learning_rate": 0.0001878606712035447, + "loss": 0.3582, + "step": 144870 + }, + { + "epoch": 6.13, + "learning_rate": 0.00018781827047425215, + "loss": 0.4053, + "step": 144880 + }, + { + "epoch": 6.13, + "learning_rate": 0.00018777586974495962, + "loss": 0.4469, + "step": 144890 + }, + { + "epoch": 6.13, + "learning_rate": 0.00018773346901566708, + "loss": 0.4452, + "step": 144900 + }, + { + "epoch": 6.13, + "learning_rate": 0.00018769106828637452, + "loss": 0.4117, + "step": 144910 + }, + { + "epoch": 6.13, + "learning_rate": 0.000187648667557082, + "loss": 0.4189, + "step": 144920 + }, + { + "epoch": 6.13, + "learning_rate": 0.00018760626682778946, + "loss": 0.3463, + "step": 144930 + }, + { + "epoch": 6.13, + "learning_rate": 0.0001875638660984969, + "loss": 0.3739, + "step": 144940 + }, + { + "epoch": 6.13, + "learning_rate": 0.00018752146536920436, + "loss": 0.4439, + "step": 144950 + }, + { + "epoch": 6.13, + "learning_rate": 0.0001874790646399118, + "loss": 0.4493, + "step": 144960 + }, + { + "epoch": 6.13, + "learning_rate": 0.00018743666391061927, + "loss": 0.4878, + "step": 144970 + }, + { + "epoch": 6.13, + "learning_rate": 0.00018739426318132674, + "loss": 0.4139, + "step": 144980 + }, + { + "epoch": 6.13, + "learning_rate": 0.00018735186245203418, + "loss": 0.3811, + "step": 144990 + }, + { + "epoch": 6.13, + "learning_rate": 0.00018730946172274164, + "loss": 0.3376, + "step": 145000 + }, + { + "epoch": 6.13, + "learning_rate": 0.0001872670609934491, + "loss": 0.4493, + "step": 145010 + }, + { + "epoch": 6.13, + "learning_rate": 0.00018722466026415655, + "loss": 0.4143, + "step": 145020 + }, + { + "epoch": 6.13, + "learning_rate": 0.00018718225953486402, + "loss": 0.4372, + "step": 145030 + }, + { + "epoch": 6.13, + "learning_rate": 0.00018713985880557146, + "loss": 0.3637, + "step": 145040 + }, + { + "epoch": 6.13, + "learning_rate": 0.00018709745807627892, + "loss": 0.3817, + "step": 145050 + }, + { + "epoch": 6.13, + "learning_rate": 0.0001870550573469864, + "loss": 0.4344, + "step": 145060 + }, + { + "epoch": 6.13, + "learning_rate": 0.00018701265661769383, + "loss": 0.3871, + "step": 145070 + }, + { + "epoch": 6.14, + "learning_rate": 0.0001869702558884013, + "loss": 0.4058, + "step": 145080 + }, + { + "epoch": 6.14, + "learning_rate": 0.00018692785515910876, + "loss": 0.3962, + "step": 145090 + }, + { + "epoch": 6.14, + "learning_rate": 0.0001868854544298162, + "loss": 0.4249, + "step": 145100 + }, + { + "epoch": 6.14, + "learning_rate": 0.00018684305370052367, + "loss": 0.4257, + "step": 145110 + }, + { + "epoch": 6.14, + "learning_rate": 0.0001868006529712311, + "loss": 0.366, + "step": 145120 + }, + { + "epoch": 6.14, + "learning_rate": 0.00018675825224193858, + "loss": 0.38, + "step": 145130 + }, + { + "epoch": 6.14, + "learning_rate": 0.00018671585151264604, + "loss": 0.3706, + "step": 145140 + }, + { + "epoch": 6.14, + "learning_rate": 0.00018667345078335348, + "loss": 0.4063, + "step": 145150 + }, + { + "epoch": 6.14, + "learning_rate": 0.00018663105005406095, + "loss": 0.354, + "step": 145160 + }, + { + "epoch": 6.14, + "learning_rate": 0.00018658864932476842, + "loss": 0.4309, + "step": 145170 + }, + { + "epoch": 6.14, + "learning_rate": 0.00018654624859547586, + "loss": 0.4472, + "step": 145180 + }, + { + "epoch": 6.14, + "learning_rate": 0.00018650384786618332, + "loss": 0.3967, + "step": 145190 + }, + { + "epoch": 6.14, + "learning_rate": 0.00018646144713689076, + "loss": 0.4212, + "step": 145200 + }, + { + "epoch": 6.14, + "learning_rate": 0.00018641904640759823, + "loss": 0.4234, + "step": 145210 + }, + { + "epoch": 6.14, + "learning_rate": 0.0001863766456783057, + "loss": 0.3429, + "step": 145220 + }, + { + "epoch": 6.14, + "learning_rate": 0.00018633424494901314, + "loss": 0.3706, + "step": 145230 + }, + { + "epoch": 6.14, + "learning_rate": 0.0001862918442197206, + "loss": 0.3842, + "step": 145240 + }, + { + "epoch": 6.14, + "learning_rate": 0.00018624944349042804, + "loss": 0.4368, + "step": 145250 + }, + { + "epoch": 6.14, + "learning_rate": 0.0001862070427611355, + "loss": 0.445, + "step": 145260 + }, + { + "epoch": 6.14, + "learning_rate": 0.00018616464203184298, + "loss": 0.4927, + "step": 145270 + }, + { + "epoch": 6.14, + "learning_rate": 0.00018612224130255041, + "loss": 0.4809, + "step": 145280 + }, + { + "epoch": 6.14, + "learning_rate": 0.00018607984057325788, + "loss": 0.4594, + "step": 145290 + }, + { + "epoch": 6.14, + "learning_rate": 0.00018603743984396532, + "loss": 0.4302, + "step": 145300 + }, + { + "epoch": 6.14, + "learning_rate": 0.0001859950391146728, + "loss": 0.4424, + "step": 145310 + }, + { + "epoch": 6.15, + "learning_rate": 0.00018595263838538023, + "loss": 0.4369, + "step": 145320 + }, + { + "epoch": 6.15, + "learning_rate": 0.0001859102376560877, + "loss": 0.3561, + "step": 145330 + }, + { + "epoch": 6.15, + "learning_rate": 0.00018586783692679513, + "loss": 0.436, + "step": 145340 + }, + { + "epoch": 6.15, + "learning_rate": 0.0001858254361975026, + "loss": 0.4107, + "step": 145350 + }, + { + "epoch": 6.15, + "learning_rate": 0.00018578303546821004, + "loss": 0.4635, + "step": 145360 + }, + { + "epoch": 6.15, + "learning_rate": 0.0001857406347389175, + "loss": 0.3527, + "step": 145370 + }, + { + "epoch": 6.15, + "learning_rate": 0.00018569823400962497, + "loss": 0.4151, + "step": 145380 + }, + { + "epoch": 6.15, + "learning_rate": 0.00018565583328033241, + "loss": 0.3704, + "step": 145390 + }, + { + "epoch": 6.15, + "learning_rate": 0.00018561343255103988, + "loss": 0.3829, + "step": 145400 + }, + { + "epoch": 6.15, + "learning_rate": 0.00018557103182174732, + "loss": 0.4721, + "step": 145410 + }, + { + "epoch": 6.15, + "learning_rate": 0.0001855286310924548, + "loss": 0.3691, + "step": 145420 + }, + { + "epoch": 6.15, + "learning_rate": 0.00018548623036316225, + "loss": 0.4487, + "step": 145430 + }, + { + "epoch": 6.15, + "learning_rate": 0.0001854438296338697, + "loss": 0.466, + "step": 145440 + }, + { + "epoch": 6.15, + "learning_rate": 0.00018540142890457716, + "loss": 0.3853, + "step": 145450 + }, + { + "epoch": 6.15, + "learning_rate": 0.00018535902817528463, + "loss": 0.3458, + "step": 145460 + }, + { + "epoch": 6.15, + "learning_rate": 0.00018531662744599207, + "loss": 0.3893, + "step": 145470 + }, + { + "epoch": 6.15, + "learning_rate": 0.00018527422671669953, + "loss": 0.4818, + "step": 145480 + }, + { + "epoch": 6.15, + "learning_rate": 0.00018523182598740697, + "loss": 0.4613, + "step": 145490 + }, + { + "epoch": 6.15, + "learning_rate": 0.00018518942525811444, + "loss": 0.3547, + "step": 145500 + }, + { + "epoch": 6.15, + "learning_rate": 0.0001851470245288219, + "loss": 0.4367, + "step": 145510 + }, + { + "epoch": 6.15, + "learning_rate": 0.00018510462379952935, + "loss": 0.4658, + "step": 145520 + }, + { + "epoch": 6.15, + "learning_rate": 0.0001850622230702368, + "loss": 0.4301, + "step": 145530 + }, + { + "epoch": 6.15, + "learning_rate": 0.00018501982234094428, + "loss": 0.3794, + "step": 145540 + }, + { + "epoch": 6.16, + "learning_rate": 0.00018497742161165172, + "loss": 0.4776, + "step": 145550 + }, + { + "epoch": 6.16, + "learning_rate": 0.0001849350208823592, + "loss": 0.431, + "step": 145560 + }, + { + "epoch": 6.16, + "learning_rate": 0.00018489262015306663, + "loss": 0.3359, + "step": 145570 + }, + { + "epoch": 6.16, + "learning_rate": 0.0001848502194237741, + "loss": 0.3601, + "step": 145580 + }, + { + "epoch": 6.16, + "learning_rate": 0.00018480781869448156, + "loss": 0.4329, + "step": 145590 + }, + { + "epoch": 6.16, + "learning_rate": 0.000184765417965189, + "loss": 0.4361, + "step": 145600 + }, + { + "epoch": 6.16, + "learning_rate": 0.00018472301723589647, + "loss": 0.4023, + "step": 145610 + }, + { + "epoch": 6.16, + "learning_rate": 0.00018468061650660393, + "loss": 0.4225, + "step": 145620 + }, + { + "epoch": 6.16, + "learning_rate": 0.00018463821577731137, + "loss": 0.3832, + "step": 145630 + }, + { + "epoch": 6.16, + "learning_rate": 0.00018459581504801884, + "loss": 0.4427, + "step": 145640 + }, + { + "epoch": 6.16, + "learning_rate": 0.00018455341431872628, + "loss": 0.4582, + "step": 145650 + }, + { + "epoch": 6.16, + "learning_rate": 0.00018451101358943375, + "loss": 0.514, + "step": 145660 + }, + { + "epoch": 6.16, + "learning_rate": 0.0001844686128601412, + "loss": 0.3773, + "step": 145670 + }, + { + "epoch": 6.16, + "learning_rate": 0.00018442621213084865, + "loss": 0.3924, + "step": 145680 + }, + { + "epoch": 6.16, + "learning_rate": 0.00018438381140155612, + "loss": 0.394, + "step": 145690 + }, + { + "epoch": 6.16, + "learning_rate": 0.00018434141067226359, + "loss": 0.4187, + "step": 145700 + }, + { + "epoch": 6.16, + "learning_rate": 0.00018429900994297103, + "loss": 0.4234, + "step": 145710 + }, + { + "epoch": 6.16, + "learning_rate": 0.0001842566092136785, + "loss": 0.3529, + "step": 145720 + }, + { + "epoch": 6.16, + "learning_rate": 0.00018421420848438593, + "loss": 0.4189, + "step": 145730 + }, + { + "epoch": 6.16, + "learning_rate": 0.0001841718077550934, + "loss": 0.4345, + "step": 145740 + }, + { + "epoch": 6.16, + "learning_rate": 0.00018412940702580087, + "loss": 0.3774, + "step": 145750 + }, + { + "epoch": 6.16, + "learning_rate": 0.0001840870062965083, + "loss": 0.454, + "step": 145760 + }, + { + "epoch": 6.16, + "learning_rate": 0.00018404460556721577, + "loss": 0.418, + "step": 145770 + }, + { + "epoch": 6.16, + "learning_rate": 0.00018400220483792324, + "loss": 0.4786, + "step": 145780 + }, + { + "epoch": 6.17, + "learning_rate": 0.00018395980410863068, + "loss": 0.399, + "step": 145790 + }, + { + "epoch": 6.17, + "learning_rate": 0.00018391740337933815, + "loss": 0.4947, + "step": 145800 + }, + { + "epoch": 6.17, + "learning_rate": 0.00018387500265004559, + "loss": 0.4316, + "step": 145810 + }, + { + "epoch": 6.17, + "learning_rate": 0.00018383260192075305, + "loss": 0.4373, + "step": 145820 + }, + { + "epoch": 6.17, + "learning_rate": 0.00018379020119146052, + "loss": 0.4448, + "step": 145830 + }, + { + "epoch": 6.17, + "learning_rate": 0.00018374780046216796, + "loss": 0.4345, + "step": 145840 + }, + { + "epoch": 6.17, + "learning_rate": 0.00018370539973287543, + "loss": 0.4362, + "step": 145850 + }, + { + "epoch": 6.17, + "learning_rate": 0.0001836629990035829, + "loss": 0.3256, + "step": 145860 + }, + { + "epoch": 6.17, + "learning_rate": 0.00018362059827429033, + "loss": 0.461, + "step": 145870 + }, + { + "epoch": 6.17, + "learning_rate": 0.0001835781975449978, + "loss": 0.4742, + "step": 145880 + }, + { + "epoch": 6.17, + "learning_rate": 0.00018353579681570524, + "loss": 0.4306, + "step": 145890 + }, + { + "epoch": 6.17, + "learning_rate": 0.0001834933960864127, + "loss": 0.4, + "step": 145900 + }, + { + "epoch": 6.17, + "learning_rate": 0.00018345099535712017, + "loss": 0.4743, + "step": 145910 + }, + { + "epoch": 6.17, + "learning_rate": 0.0001834085946278276, + "loss": 0.3929, + "step": 145920 + }, + { + "epoch": 6.17, + "learning_rate": 0.00018336619389853508, + "loss": 0.3555, + "step": 145930 + }, + { + "epoch": 6.17, + "learning_rate": 0.00018332379316924252, + "loss": 0.4241, + "step": 145940 + }, + { + "epoch": 6.17, + "learning_rate": 0.00018328139243994998, + "loss": 0.4161, + "step": 145950 + }, + { + "epoch": 6.17, + "learning_rate": 0.00018323899171065742, + "loss": 0.4161, + "step": 145960 + }, + { + "epoch": 6.17, + "learning_rate": 0.0001831965909813649, + "loss": 0.4022, + "step": 145970 + }, + { + "epoch": 6.17, + "learning_rate": 0.00018315419025207233, + "loss": 0.4496, + "step": 145980 + }, + { + "epoch": 6.17, + "learning_rate": 0.0001831117895227798, + "loss": 0.4749, + "step": 145990 + }, + { + "epoch": 6.17, + "learning_rate": 0.00018306938879348724, + "loss": 0.4791, + "step": 146000 + }, + { + "epoch": 6.17, + "learning_rate": 0.0001830269880641947, + "loss": 0.4379, + "step": 146010 + }, + { + "epoch": 6.17, + "learning_rate": 0.00018298458733490217, + "loss": 0.4554, + "step": 146020 + }, + { + "epoch": 6.18, + "learning_rate": 0.0001829421866056096, + "loss": 0.3981, + "step": 146030 + }, + { + "epoch": 6.18, + "learning_rate": 0.00018289978587631708, + "loss": 0.3735, + "step": 146040 + }, + { + "epoch": 6.18, + "learning_rate": 0.00018285738514702452, + "loss": 0.479, + "step": 146050 + }, + { + "epoch": 6.18, + "learning_rate": 0.00018281498441773198, + "loss": 0.4228, + "step": 146060 + }, + { + "epoch": 6.18, + "learning_rate": 0.00018277258368843945, + "loss": 0.4178, + "step": 146070 + }, + { + "epoch": 6.18, + "learning_rate": 0.0001827301829591469, + "loss": 0.3783, + "step": 146080 + }, + { + "epoch": 6.18, + "learning_rate": 0.00018268778222985436, + "loss": 0.4075, + "step": 146090 + }, + { + "epoch": 6.18, + "learning_rate": 0.0001826453815005618, + "loss": 0.384, + "step": 146100 + }, + { + "epoch": 6.18, + "learning_rate": 0.00018260298077126926, + "loss": 0.3865, + "step": 146110 + }, + { + "epoch": 6.18, + "learning_rate": 0.00018256058004197673, + "loss": 0.4648, + "step": 146120 + }, + { + "epoch": 6.18, + "learning_rate": 0.00018251817931268417, + "loss": 0.4227, + "step": 146130 + }, + { + "epoch": 6.18, + "learning_rate": 0.00018247577858339164, + "loss": 0.5446, + "step": 146140 + }, + { + "epoch": 6.18, + "learning_rate": 0.0001824333778540991, + "loss": 0.4242, + "step": 146150 + }, + { + "epoch": 6.18, + "learning_rate": 0.00018239097712480654, + "loss": 0.3752, + "step": 146160 + }, + { + "epoch": 6.18, + "learning_rate": 0.000182348576395514, + "loss": 0.419, + "step": 146170 + }, + { + "epoch": 6.18, + "learning_rate": 0.00018230617566622145, + "loss": 0.378, + "step": 146180 + }, + { + "epoch": 6.18, + "learning_rate": 0.00018226377493692892, + "loss": 0.3827, + "step": 146190 + }, + { + "epoch": 6.18, + "learning_rate": 0.00018222137420763638, + "loss": 0.4113, + "step": 146200 + }, + { + "epoch": 6.18, + "learning_rate": 0.00018217897347834382, + "loss": 0.4134, + "step": 146210 + }, + { + "epoch": 6.18, + "learning_rate": 0.0001821365727490513, + "loss": 0.3815, + "step": 146220 + }, + { + "epoch": 6.18, + "learning_rate": 0.00018209417201975876, + "loss": 0.3556, + "step": 146230 + }, + { + "epoch": 6.18, + "learning_rate": 0.0001820517712904662, + "loss": 0.3789, + "step": 146240 + }, + { + "epoch": 6.18, + "learning_rate": 0.00018200937056117366, + "loss": 0.3808, + "step": 146250 + }, + { + "epoch": 6.19, + "learning_rate": 0.0001819669698318811, + "loss": 0.4059, + "step": 146260 + }, + { + "epoch": 6.19, + "learning_rate": 0.00018192456910258857, + "loss": 0.4246, + "step": 146270 + }, + { + "epoch": 6.19, + "learning_rate": 0.00018188216837329604, + "loss": 0.405, + "step": 146280 + }, + { + "epoch": 6.19, + "learning_rate": 0.00018183976764400348, + "loss": 0.4559, + "step": 146290 + }, + { + "epoch": 6.19, + "learning_rate": 0.00018179736691471094, + "loss": 0.4096, + "step": 146300 + }, + { + "epoch": 6.19, + "learning_rate": 0.0001817549661854184, + "loss": 0.3847, + "step": 146310 + }, + { + "epoch": 6.19, + "learning_rate": 0.00018171256545612585, + "loss": 0.412, + "step": 146320 + }, + { + "epoch": 6.19, + "learning_rate": 0.00018167016472683332, + "loss": 0.4103, + "step": 146330 + }, + { + "epoch": 6.19, + "learning_rate": 0.00018162776399754076, + "loss": 0.4903, + "step": 146340 + }, + { + "epoch": 6.19, + "learning_rate": 0.00018158536326824822, + "loss": 0.4608, + "step": 146350 + }, + { + "epoch": 6.19, + "learning_rate": 0.0001815429625389557, + "loss": 0.3919, + "step": 146360 + }, + { + "epoch": 6.19, + "learning_rate": 0.00018150056180966313, + "loss": 0.4385, + "step": 146370 + }, + { + "epoch": 6.19, + "learning_rate": 0.0001814581610803706, + "loss": 0.4011, + "step": 146380 + }, + { + "epoch": 6.19, + "learning_rate": 0.00018141576035107806, + "loss": 0.3867, + "step": 146390 + }, + { + "epoch": 6.19, + "learning_rate": 0.0001813733596217855, + "loss": 0.4377, + "step": 146400 + }, + { + "epoch": 6.19, + "learning_rate": 0.00018133095889249297, + "loss": 0.4357, + "step": 146410 + }, + { + "epoch": 6.19, + "learning_rate": 0.0001812885581632004, + "loss": 0.4293, + "step": 146420 + }, + { + "epoch": 6.19, + "learning_rate": 0.00018124615743390788, + "loss": 0.4687, + "step": 146430 + }, + { + "epoch": 6.19, + "learning_rate": 0.00018120375670461534, + "loss": 0.3649, + "step": 146440 + }, + { + "epoch": 6.19, + "learning_rate": 0.00018116135597532278, + "loss": 0.3803, + "step": 146450 + }, + { + "epoch": 6.19, + "learning_rate": 0.00018111895524603025, + "loss": 0.3431, + "step": 146460 + }, + { + "epoch": 6.19, + "learning_rate": 0.00018107655451673772, + "loss": 0.3762, + "step": 146470 + }, + { + "epoch": 6.19, + "learning_rate": 0.00018103415378744516, + "loss": 0.4243, + "step": 146480 + }, + { + "epoch": 6.19, + "learning_rate": 0.00018099175305815262, + "loss": 0.451, + "step": 146490 + }, + { + "epoch": 6.2, + "learning_rate": 0.00018094935232886006, + "loss": 0.468, + "step": 146500 + }, + { + "epoch": 6.2, + "learning_rate": 0.00018090695159956753, + "loss": 0.4076, + "step": 146510 + }, + { + "epoch": 6.2, + "learning_rate": 0.000180864550870275, + "loss": 0.4111, + "step": 146520 + }, + { + "epoch": 6.2, + "learning_rate": 0.00018082215014098244, + "loss": 0.4225, + "step": 146530 + }, + { + "epoch": 6.2, + "learning_rate": 0.0001807797494116899, + "loss": 0.489, + "step": 146540 + }, + { + "epoch": 6.2, + "learning_rate": 0.00018073734868239737, + "loss": 0.4969, + "step": 146550 + }, + { + "epoch": 6.2, + "learning_rate": 0.0001806949479531048, + "loss": 0.4233, + "step": 146560 + }, + { + "epoch": 6.2, + "learning_rate": 0.00018065254722381228, + "loss": 0.4511, + "step": 146570 + }, + { + "epoch": 6.2, + "learning_rate": 0.00018061014649451971, + "loss": 0.3949, + "step": 146580 + }, + { + "epoch": 6.2, + "learning_rate": 0.00018056774576522718, + "loss": 0.3591, + "step": 146590 + }, + { + "epoch": 6.2, + "learning_rate": 0.00018052534503593462, + "loss": 0.4517, + "step": 146600 + }, + { + "epoch": 6.2, + "learning_rate": 0.0001804829443066421, + "loss": 0.5151, + "step": 146610 + }, + { + "epoch": 6.2, + "learning_rate": 0.00018044054357734953, + "loss": 0.3692, + "step": 146620 + }, + { + "epoch": 6.2, + "learning_rate": 0.000180398142848057, + "loss": 0.4301, + "step": 146630 + }, + { + "epoch": 6.2, + "learning_rate": 0.00018035574211876446, + "loss": 0.397, + "step": 146640 + }, + { + "epoch": 6.2, + "learning_rate": 0.0001803133413894719, + "loss": 0.4201, + "step": 146650 + }, + { + "epoch": 6.2, + "learning_rate": 0.00018027094066017937, + "loss": 0.4148, + "step": 146660 + }, + { + "epoch": 6.2, + "learning_rate": 0.0001802285399308868, + "loss": 0.3897, + "step": 146670 + }, + { + "epoch": 6.2, + "learning_rate": 0.00018018613920159427, + "loss": 0.44, + "step": 146680 + }, + { + "epoch": 6.2, + "learning_rate": 0.00018014373847230171, + "loss": 0.4989, + "step": 146690 + }, + { + "epoch": 6.2, + "learning_rate": 0.00018010133774300918, + "loss": 0.4121, + "step": 146700 + }, + { + "epoch": 6.2, + "learning_rate": 0.00018005893701371665, + "loss": 0.3807, + "step": 146710 + }, + { + "epoch": 6.2, + "learning_rate": 0.0001800165362844241, + "loss": 0.4286, + "step": 146720 + }, + { + "epoch": 6.21, + "learning_rate": 0.00017997413555513155, + "loss": 0.3665, + "step": 146730 + }, + { + "epoch": 6.21, + "learning_rate": 0.000179931734825839, + "loss": 0.4794, + "step": 146740 + }, + { + "epoch": 6.21, + "learning_rate": 0.00017988933409654646, + "loss": 0.5037, + "step": 146750 + }, + { + "epoch": 6.21, + "learning_rate": 0.00017984693336725393, + "loss": 0.4559, + "step": 146760 + }, + { + "epoch": 6.21, + "learning_rate": 0.00017980453263796137, + "loss": 0.4002, + "step": 146770 + }, + { + "epoch": 6.21, + "learning_rate": 0.00017976213190866883, + "loss": 0.3469, + "step": 146780 + }, + { + "epoch": 6.21, + "learning_rate": 0.00017971973117937627, + "loss": 0.5095, + "step": 146790 + }, + { + "epoch": 6.21, + "learning_rate": 0.00017967733045008374, + "loss": 0.4599, + "step": 146800 + }, + { + "epoch": 6.21, + "learning_rate": 0.0001796349297207912, + "loss": 0.3891, + "step": 146810 + }, + { + "epoch": 6.21, + "learning_rate": 0.00017959252899149865, + "loss": 0.3963, + "step": 146820 + }, + { + "epoch": 6.21, + "learning_rate": 0.00017955012826220611, + "loss": 0.4786, + "step": 146830 + }, + { + "epoch": 6.21, + "learning_rate": 0.00017950772753291358, + "loss": 0.3538, + "step": 146840 + }, + { + "epoch": 6.21, + "learning_rate": 0.00017946532680362102, + "loss": 0.4592, + "step": 146850 + }, + { + "epoch": 6.21, + "learning_rate": 0.0001794229260743285, + "loss": 0.3861, + "step": 146860 + }, + { + "epoch": 6.21, + "learning_rate": 0.00017938052534503593, + "loss": 0.4106, + "step": 146870 + }, + { + "epoch": 6.21, + "learning_rate": 0.0001793381246157434, + "loss": 0.4359, + "step": 146880 + }, + { + "epoch": 6.21, + "learning_rate": 0.00017929572388645086, + "loss": 0.4042, + "step": 146890 + }, + { + "epoch": 6.21, + "learning_rate": 0.0001792533231571583, + "loss": 0.3885, + "step": 146900 + }, + { + "epoch": 6.21, + "learning_rate": 0.00017921092242786577, + "loss": 0.382, + "step": 146910 + }, + { + "epoch": 6.21, + "learning_rate": 0.00017916852169857323, + "loss": 0.4324, + "step": 146920 + }, + { + "epoch": 6.21, + "learning_rate": 0.00017912612096928067, + "loss": 0.4234, + "step": 146930 + }, + { + "epoch": 6.21, + "learning_rate": 0.00017908372023998814, + "loss": 0.3727, + "step": 146940 + }, + { + "epoch": 6.21, + "learning_rate": 0.00017904131951069558, + "loss": 0.4776, + "step": 146950 + }, + { + "epoch": 6.21, + "learning_rate": 0.00017899891878140305, + "loss": 0.414, + "step": 146960 + }, + { + "epoch": 6.22, + "learning_rate": 0.0001789565180521105, + "loss": 0.3763, + "step": 146970 + }, + { + "epoch": 6.22, + "learning_rate": 0.00017891411732281795, + "loss": 0.3977, + "step": 146980 + }, + { + "epoch": 6.22, + "learning_rate": 0.00017887171659352542, + "loss": 0.5221, + "step": 146990 + }, + { + "epoch": 6.22, + "learning_rate": 0.00017882931586423289, + "loss": 0.4471, + "step": 147000 + }, + { + "epoch": 6.22, + "learning_rate": 0.00017878691513494033, + "loss": 0.4043, + "step": 147010 + }, + { + "epoch": 6.22, + "learning_rate": 0.0001787445144056478, + "loss": 0.3915, + "step": 147020 + }, + { + "epoch": 6.22, + "learning_rate": 0.00017870211367635523, + "loss": 0.4404, + "step": 147030 + }, + { + "epoch": 6.22, + "learning_rate": 0.0001786597129470627, + "loss": 0.3471, + "step": 147040 + }, + { + "epoch": 6.22, + "learning_rate": 0.00017861731221777017, + "loss": 0.3604, + "step": 147050 + }, + { + "epoch": 6.22, + "learning_rate": 0.0001785749114884776, + "loss": 0.406, + "step": 147060 + }, + { + "epoch": 6.22, + "learning_rate": 0.00017853251075918507, + "loss": 0.3686, + "step": 147070 + }, + { + "epoch": 6.22, + "learning_rate": 0.00017849011002989254, + "loss": 0.3571, + "step": 147080 + }, + { + "epoch": 6.22, + "learning_rate": 0.00017844770930059998, + "loss": 0.3328, + "step": 147090 + }, + { + "epoch": 6.22, + "learning_rate": 0.00017840530857130745, + "loss": 0.4292, + "step": 147100 + }, + { + "epoch": 6.22, + "learning_rate": 0.00017836290784201489, + "loss": 0.4613, + "step": 147110 + }, + { + "epoch": 6.22, + "learning_rate": 0.00017832050711272235, + "loss": 0.4661, + "step": 147120 + }, + { + "epoch": 6.22, + "learning_rate": 0.00017827810638342982, + "loss": 0.4361, + "step": 147130 + }, + { + "epoch": 6.22, + "learning_rate": 0.00017823570565413726, + "loss": 0.3788, + "step": 147140 + }, + { + "epoch": 6.22, + "learning_rate": 0.00017819330492484473, + "loss": 0.4478, + "step": 147150 + }, + { + "epoch": 6.22, + "learning_rate": 0.0001781509041955522, + "loss": 0.4474, + "step": 147160 + }, + { + "epoch": 6.22, + "learning_rate": 0.00017810850346625963, + "loss": 0.383, + "step": 147170 + }, + { + "epoch": 6.22, + "learning_rate": 0.0001780661027369671, + "loss": 0.424, + "step": 147180 + }, + { + "epoch": 6.22, + "learning_rate": 0.00017802370200767454, + "loss": 0.3824, + "step": 147190 + }, + { + "epoch": 6.22, + "learning_rate": 0.000177981301278382, + "loss": 0.4404, + "step": 147200 + }, + { + "epoch": 6.23, + "learning_rate": 0.00017793890054908947, + "loss": 0.4427, + "step": 147210 + }, + { + "epoch": 6.23, + "learning_rate": 0.0001778964998197969, + "loss": 0.3679, + "step": 147220 + }, + { + "epoch": 6.23, + "learning_rate": 0.00017785409909050438, + "loss": 0.3997, + "step": 147230 + }, + { + "epoch": 6.23, + "learning_rate": 0.00017781169836121182, + "loss": 0.3983, + "step": 147240 + }, + { + "epoch": 6.23, + "learning_rate": 0.00017776929763191929, + "loss": 0.4177, + "step": 147250 + }, + { + "epoch": 6.23, + "learning_rate": 0.00017772689690262672, + "loss": 0.4338, + "step": 147260 + }, + { + "epoch": 6.23, + "learning_rate": 0.0001776844961733342, + "loss": 0.3515, + "step": 147270 + }, + { + "epoch": 6.23, + "learning_rate": 0.00017764209544404166, + "loss": 0.4394, + "step": 147280 + }, + { + "epoch": 6.23, + "learning_rate": 0.0001775996947147491, + "loss": 0.3203, + "step": 147290 + }, + { + "epoch": 6.23, + "learning_rate": 0.00017755729398545656, + "loss": 0.4528, + "step": 147300 + }, + { + "epoch": 6.23, + "learning_rate": 0.000177514893256164, + "loss": 0.4052, + "step": 147310 + }, + { + "epoch": 6.23, + "learning_rate": 0.00017747249252687147, + "loss": 0.3966, + "step": 147320 + }, + { + "epoch": 6.23, + "learning_rate": 0.0001774300917975789, + "loss": 0.4124, + "step": 147330 + }, + { + "epoch": 6.23, + "learning_rate": 0.00017738769106828638, + "loss": 0.4337, + "step": 147340 + }, + { + "epoch": 6.23, + "learning_rate": 0.00017734529033899382, + "loss": 0.4638, + "step": 147350 + }, + { + "epoch": 6.23, + "learning_rate": 0.00017730288960970128, + "loss": 0.3944, + "step": 147360 + }, + { + "epoch": 6.23, + "learning_rate": 0.00017726048888040875, + "loss": 0.4178, + "step": 147370 + }, + { + "epoch": 6.23, + "learning_rate": 0.0001772180881511162, + "loss": 0.4081, + "step": 147380 + }, + { + "epoch": 6.23, + "learning_rate": 0.00017717568742182366, + "loss": 0.3966, + "step": 147390 + }, + { + "epoch": 6.23, + "learning_rate": 0.00017713328669253112, + "loss": 0.3427, + "step": 147400 + }, + { + "epoch": 6.23, + "learning_rate": 0.00017709088596323856, + "loss": 0.4021, + "step": 147410 + }, + { + "epoch": 6.23, + "learning_rate": 0.00017704848523394603, + "loss": 0.3807, + "step": 147420 + }, + { + "epoch": 6.23, + "learning_rate": 0.00017700608450465347, + "loss": 0.359, + "step": 147430 + }, + { + "epoch": 6.24, + "learning_rate": 0.00017696368377536094, + "loss": 0.4458, + "step": 147440 + }, + { + "epoch": 6.24, + "learning_rate": 0.0001769212830460684, + "loss": 0.4334, + "step": 147450 + }, + { + "epoch": 6.24, + "learning_rate": 0.00017687888231677584, + "loss": 0.4663, + "step": 147460 + }, + { + "epoch": 6.24, + "learning_rate": 0.0001768364815874833, + "loss": 0.3677, + "step": 147470 + }, + { + "epoch": 6.24, + "learning_rate": 0.00017679408085819078, + "loss": 0.4231, + "step": 147480 + }, + { + "epoch": 6.24, + "learning_rate": 0.00017675168012889822, + "loss": 0.4202, + "step": 147490 + }, + { + "epoch": 6.24, + "learning_rate": 0.00017670927939960568, + "loss": 0.3875, + "step": 147500 + }, + { + "epoch": 6.24, + "learning_rate": 0.00017666687867031312, + "loss": 0.4612, + "step": 147510 + }, + { + "epoch": 6.24, + "learning_rate": 0.0001766244779410206, + "loss": 0.4056, + "step": 147520 + }, + { + "epoch": 6.24, + "learning_rate": 0.00017658207721172806, + "loss": 0.407, + "step": 147530 + }, + { + "epoch": 6.24, + "learning_rate": 0.0001765396764824355, + "loss": 0.4166, + "step": 147540 + }, + { + "epoch": 6.24, + "learning_rate": 0.00017649727575314296, + "loss": 0.4638, + "step": 147550 + }, + { + "epoch": 6.24, + "learning_rate": 0.0001764548750238504, + "loss": 0.447, + "step": 147560 + }, + { + "epoch": 6.24, + "learning_rate": 0.00017641247429455787, + "loss": 0.466, + "step": 147570 + }, + { + "epoch": 6.24, + "learning_rate": 0.00017637007356526534, + "loss": 0.437, + "step": 147580 + }, + { + "epoch": 6.24, + "learning_rate": 0.00017632767283597278, + "loss": 0.4592, + "step": 147590 + }, + { + "epoch": 6.24, + "learning_rate": 0.00017628527210668024, + "loss": 0.3855, + "step": 147600 + }, + { + "epoch": 6.24, + "learning_rate": 0.0001762428713773877, + "loss": 0.3937, + "step": 147610 + }, + { + "epoch": 6.24, + "learning_rate": 0.00017620047064809515, + "loss": 0.397, + "step": 147620 + }, + { + "epoch": 6.24, + "learning_rate": 0.00017615806991880262, + "loss": 0.4127, + "step": 147630 + }, + { + "epoch": 6.24, + "learning_rate": 0.00017611566918951006, + "loss": 0.4594, + "step": 147640 + }, + { + "epoch": 6.24, + "learning_rate": 0.00017607326846021752, + "loss": 0.4462, + "step": 147650 + }, + { + "epoch": 6.24, + "learning_rate": 0.000176030867730925, + "loss": 0.414, + "step": 147660 + }, + { + "epoch": 6.24, + "learning_rate": 0.00017598846700163243, + "loss": 0.4581, + "step": 147670 + }, + { + "epoch": 6.25, + "learning_rate": 0.0001759460662723399, + "loss": 0.4493, + "step": 147680 + }, + { + "epoch": 6.25, + "learning_rate": 0.00017590366554304736, + "loss": 0.3802, + "step": 147690 + }, + { + "epoch": 6.25, + "learning_rate": 0.0001758612648137548, + "loss": 0.4374, + "step": 147700 + }, + { + "epoch": 6.25, + "learning_rate": 0.00017581886408446227, + "loss": 0.3717, + "step": 147710 + }, + { + "epoch": 6.25, + "learning_rate": 0.0001757764633551697, + "loss": 0.4221, + "step": 147720 + }, + { + "epoch": 6.25, + "learning_rate": 0.00017573406262587718, + "loss": 0.4211, + "step": 147730 + }, + { + "epoch": 6.25, + "learning_rate": 0.00017569166189658464, + "loss": 0.4184, + "step": 147740 + }, + { + "epoch": 6.25, + "learning_rate": 0.00017564926116729208, + "loss": 0.4402, + "step": 147750 + }, + { + "epoch": 6.25, + "learning_rate": 0.00017560686043799955, + "loss": 0.3711, + "step": 147760 + }, + { + "epoch": 6.25, + "learning_rate": 0.00017556445970870702, + "loss": 0.3952, + "step": 147770 + }, + { + "epoch": 6.25, + "learning_rate": 0.00017552205897941446, + "loss": 0.4248, + "step": 147780 + }, + { + "epoch": 6.25, + "learning_rate": 0.00017547965825012192, + "loss": 0.4354, + "step": 147790 + }, + { + "epoch": 6.25, + "learning_rate": 0.00017543725752082936, + "loss": 0.4048, + "step": 147800 + }, + { + "epoch": 6.25, + "learning_rate": 0.00017539485679153683, + "loss": 0.3771, + "step": 147810 + }, + { + "epoch": 6.25, + "learning_rate": 0.0001753524560622443, + "loss": 0.3701, + "step": 147820 + }, + { + "epoch": 6.25, + "learning_rate": 0.00017531005533295174, + "loss": 0.4507, + "step": 147830 + }, + { + "epoch": 6.25, + "learning_rate": 0.0001752676546036592, + "loss": 0.3975, + "step": 147840 + }, + { + "epoch": 6.25, + "learning_rate": 0.00017522525387436667, + "loss": 0.4382, + "step": 147850 + }, + { + "epoch": 6.25, + "learning_rate": 0.0001751828531450741, + "loss": 0.4444, + "step": 147860 + }, + { + "epoch": 6.25, + "learning_rate": 0.00017514045241578158, + "loss": 0.3519, + "step": 147870 + }, + { + "epoch": 6.25, + "learning_rate": 0.00017509805168648902, + "loss": 0.4855, + "step": 147880 + }, + { + "epoch": 6.25, + "learning_rate": 0.00017505565095719648, + "loss": 0.3703, + "step": 147890 + }, + { + "epoch": 6.25, + "learning_rate": 0.00017501325022790395, + "loss": 0.4397, + "step": 147900 + }, + { + "epoch": 6.25, + "learning_rate": 0.0001749708494986114, + "loss": 0.4141, + "step": 147910 + }, + { + "epoch": 6.26, + "learning_rate": 0.00017492844876931886, + "loss": 0.3538, + "step": 147920 + }, + { + "epoch": 6.26, + "learning_rate": 0.0001748860480400263, + "loss": 0.4333, + "step": 147930 + }, + { + "epoch": 6.26, + "learning_rate": 0.00017484364731073376, + "loss": 0.4269, + "step": 147940 + }, + { + "epoch": 6.26, + "learning_rate": 0.0001748012465814412, + "loss": 0.3769, + "step": 147950 + }, + { + "epoch": 6.26, + "learning_rate": 0.00017475884585214867, + "loss": 0.4156, + "step": 147960 + }, + { + "epoch": 6.26, + "learning_rate": 0.0001747164451228561, + "loss": 0.3675, + "step": 147970 + }, + { + "epoch": 6.26, + "learning_rate": 0.00017467404439356357, + "loss": 0.381, + "step": 147980 + }, + { + "epoch": 6.26, + "learning_rate": 0.00017463164366427101, + "loss": 0.4458, + "step": 147990 + }, + { + "epoch": 6.26, + "learning_rate": 0.00017458924293497848, + "loss": 0.3588, + "step": 148000 + }, + { + "epoch": 6.26, + "learning_rate": 0.00017454684220568595, + "loss": 0.4374, + "step": 148010 + }, + { + "epoch": 6.26, + "learning_rate": 0.0001745044414763934, + "loss": 0.4285, + "step": 148020 + }, + { + "epoch": 6.26, + "learning_rate": 0.00017446204074710085, + "loss": 0.3682, + "step": 148030 + }, + { + "epoch": 6.26, + "learning_rate": 0.0001744196400178083, + "loss": 0.4618, + "step": 148040 + }, + { + "epoch": 6.26, + "learning_rate": 0.00017437723928851576, + "loss": 0.4327, + "step": 148050 + }, + { + "epoch": 6.26, + "learning_rate": 0.00017433483855922323, + "loss": 0.3827, + "step": 148060 + }, + { + "epoch": 6.26, + "learning_rate": 0.00017429243782993067, + "loss": 0.4408, + "step": 148070 + }, + { + "epoch": 6.26, + "learning_rate": 0.00017425003710063813, + "loss": 0.353, + "step": 148080 + }, + { + "epoch": 6.26, + "learning_rate": 0.0001742076363713456, + "loss": 0.3618, + "step": 148090 + }, + { + "epoch": 6.26, + "learning_rate": 0.00017416523564205304, + "loss": 0.4135, + "step": 148100 + }, + { + "epoch": 6.26, + "learning_rate": 0.0001741228349127605, + "loss": 0.3933, + "step": 148110 + }, + { + "epoch": 6.26, + "learning_rate": 0.00017408043418346795, + "loss": 0.3555, + "step": 148120 + }, + { + "epoch": 6.26, + "learning_rate": 0.00017403803345417541, + "loss": 0.4045, + "step": 148130 + }, + { + "epoch": 6.26, + "learning_rate": 0.00017399563272488288, + "loss": 0.3698, + "step": 148140 + }, + { + "epoch": 6.27, + "learning_rate": 0.00017395323199559032, + "loss": 0.6143, + "step": 148150 + }, + { + "epoch": 6.27, + "learning_rate": 0.0001739108312662978, + "loss": 0.4554, + "step": 148160 + }, + { + "epoch": 6.27, + "learning_rate": 0.00017386843053700525, + "loss": 0.4295, + "step": 148170 + }, + { + "epoch": 6.27, + "learning_rate": 0.0001738260298077127, + "loss": 0.4077, + "step": 148180 + }, + { + "epoch": 6.27, + "learning_rate": 0.00017378362907842016, + "loss": 0.4251, + "step": 148190 + }, + { + "epoch": 6.27, + "learning_rate": 0.0001737412283491276, + "loss": 0.4576, + "step": 148200 + }, + { + "epoch": 6.27, + "learning_rate": 0.00017369882761983507, + "loss": 0.4749, + "step": 148210 + }, + { + "epoch": 6.27, + "learning_rate": 0.00017365642689054253, + "loss": 0.4167, + "step": 148220 + }, + { + "epoch": 6.27, + "learning_rate": 0.00017361402616124997, + "loss": 0.4246, + "step": 148230 + }, + { + "epoch": 6.27, + "learning_rate": 0.00017357162543195744, + "loss": 0.4588, + "step": 148240 + }, + { + "epoch": 6.27, + "learning_rate": 0.00017352922470266488, + "loss": 0.4145, + "step": 148250 + }, + { + "epoch": 6.27, + "learning_rate": 0.00017348682397337235, + "loss": 0.3726, + "step": 148260 + }, + { + "epoch": 6.27, + "learning_rate": 0.0001734444232440798, + "loss": 0.3717, + "step": 148270 + }, + { + "epoch": 6.27, + "learning_rate": 0.00017340202251478725, + "loss": 0.3899, + "step": 148280 + }, + { + "epoch": 6.27, + "learning_rate": 0.00017335962178549472, + "loss": 0.3918, + "step": 148290 + }, + { + "epoch": 6.27, + "learning_rate": 0.0001733172210562022, + "loss": 0.4908, + "step": 148300 + }, + { + "epoch": 6.27, + "learning_rate": 0.00017327482032690963, + "loss": 0.4318, + "step": 148310 + }, + { + "epoch": 6.27, + "learning_rate": 0.0001732324195976171, + "loss": 0.3898, + "step": 148320 + }, + { + "epoch": 6.27, + "learning_rate": 0.00017319001886832453, + "loss": 0.4109, + "step": 148330 + }, + { + "epoch": 6.27, + "learning_rate": 0.000173147618139032, + "loss": 0.4344, + "step": 148340 + }, + { + "epoch": 6.27, + "learning_rate": 0.00017310521740973947, + "loss": 0.4131, + "step": 148350 + }, + { + "epoch": 6.27, + "learning_rate": 0.0001730628166804469, + "loss": 0.4608, + "step": 148360 + }, + { + "epoch": 6.27, + "learning_rate": 0.00017302041595115437, + "loss": 0.4144, + "step": 148370 + }, + { + "epoch": 6.27, + "learning_rate": 0.00017297801522186184, + "loss": 0.3894, + "step": 148380 + }, + { + "epoch": 6.28, + "learning_rate": 0.00017293561449256928, + "loss": 0.3999, + "step": 148390 + }, + { + "epoch": 6.28, + "learning_rate": 0.00017289321376327675, + "loss": 0.4242, + "step": 148400 + }, + { + "epoch": 6.28, + "learning_rate": 0.00017285081303398419, + "loss": 0.4314, + "step": 148410 + }, + { + "epoch": 6.28, + "learning_rate": 0.00017280841230469165, + "loss": 0.4684, + "step": 148420 + }, + { + "epoch": 6.28, + "learning_rate": 0.00017276601157539912, + "loss": 0.2868, + "step": 148430 + }, + { + "epoch": 6.28, + "learning_rate": 0.00017272361084610656, + "loss": 0.4535, + "step": 148440 + }, + { + "epoch": 6.28, + "learning_rate": 0.00017268121011681403, + "loss": 0.4571, + "step": 148450 + }, + { + "epoch": 6.28, + "learning_rate": 0.0001726388093875215, + "loss": 0.3412, + "step": 148460 + }, + { + "epoch": 6.28, + "learning_rate": 0.00017259640865822893, + "loss": 0.5075, + "step": 148470 + }, + { + "epoch": 6.28, + "learning_rate": 0.0001725540079289364, + "loss": 0.4627, + "step": 148480 + }, + { + "epoch": 6.28, + "learning_rate": 0.00017251160719964384, + "loss": 0.3632, + "step": 148490 + }, + { + "epoch": 6.28, + "learning_rate": 0.0001724692064703513, + "loss": 0.3812, + "step": 148500 + }, + { + "epoch": 6.28, + "learning_rate": 0.00017242680574105877, + "loss": 0.3858, + "step": 148510 + }, + { + "epoch": 6.28, + "learning_rate": 0.0001723844050117662, + "loss": 0.391, + "step": 148520 + }, + { + "epoch": 6.28, + "learning_rate": 0.00017234200428247368, + "loss": 0.3904, + "step": 148530 + }, + { + "epoch": 6.28, + "learning_rate": 0.00017229960355318115, + "loss": 0.4135, + "step": 148540 + }, + { + "epoch": 6.28, + "learning_rate": 0.00017225720282388859, + "loss": 0.4198, + "step": 148550 + }, + { + "epoch": 6.28, + "learning_rate": 0.00017221480209459605, + "loss": 0.3907, + "step": 148560 + }, + { + "epoch": 6.28, + "learning_rate": 0.0001721724013653035, + "loss": 0.3899, + "step": 148570 + }, + { + "epoch": 6.28, + "learning_rate": 0.00017213000063601096, + "loss": 0.4637, + "step": 148580 + }, + { + "epoch": 6.28, + "learning_rate": 0.0001720875999067184, + "loss": 0.375, + "step": 148590 + }, + { + "epoch": 6.28, + "learning_rate": 0.00017204519917742587, + "loss": 0.4699, + "step": 148600 + }, + { + "epoch": 6.28, + "learning_rate": 0.0001720027984481333, + "loss": 0.376, + "step": 148610 + }, + { + "epoch": 6.28, + "learning_rate": 0.00017196039771884077, + "loss": 0.4271, + "step": 148620 + }, + { + "epoch": 6.29, + "learning_rate": 0.0001719179969895482, + "loss": 0.408, + "step": 148630 + }, + { + "epoch": 6.29, + "learning_rate": 0.00017187559626025568, + "loss": 0.4013, + "step": 148640 + }, + { + "epoch": 6.29, + "learning_rate": 0.00017183319553096314, + "loss": 0.392, + "step": 148650 + }, + { + "epoch": 6.29, + "learning_rate": 0.00017179079480167058, + "loss": 0.4231, + "step": 148660 + }, + { + "epoch": 6.29, + "learning_rate": 0.00017174839407237805, + "loss": 0.4223, + "step": 148670 + }, + { + "epoch": 6.29, + "learning_rate": 0.0001717059933430855, + "loss": 0.4981, + "step": 148680 + }, + { + "epoch": 6.29, + "learning_rate": 0.00017166359261379296, + "loss": 0.3927, + "step": 148690 + }, + { + "epoch": 6.29, + "learning_rate": 0.00017162119188450042, + "loss": 0.406, + "step": 148700 + }, + { + "epoch": 6.29, + "learning_rate": 0.00017157879115520786, + "loss": 0.4278, + "step": 148710 + }, + { + "epoch": 6.29, + "learning_rate": 0.00017153639042591533, + "loss": 0.433, + "step": 148720 + }, + { + "epoch": 6.29, + "learning_rate": 0.00017149398969662277, + "loss": 0.385, + "step": 148730 + }, + { + "epoch": 6.29, + "learning_rate": 0.00017145158896733024, + "loss": 0.4408, + "step": 148740 + }, + { + "epoch": 6.29, + "learning_rate": 0.0001714091882380377, + "loss": 0.4873, + "step": 148750 + }, + { + "epoch": 6.29, + "learning_rate": 0.00017136678750874514, + "loss": 0.4124, + "step": 148760 + }, + { + "epoch": 6.29, + "learning_rate": 0.0001713243867794526, + "loss": 0.4302, + "step": 148770 + }, + { + "epoch": 6.29, + "learning_rate": 0.00017128198605016008, + "loss": 0.4301, + "step": 148780 + }, + { + "epoch": 6.29, + "learning_rate": 0.00017123958532086752, + "loss": 0.4528, + "step": 148790 + }, + { + "epoch": 6.29, + "learning_rate": 0.00017119718459157498, + "loss": 0.3964, + "step": 148800 + }, + { + "epoch": 6.29, + "learning_rate": 0.00017115478386228242, + "loss": 0.4126, + "step": 148810 + }, + { + "epoch": 6.29, + "learning_rate": 0.0001711123831329899, + "loss": 0.363, + "step": 148820 + }, + { + "epoch": 6.29, + "learning_rate": 0.00017106998240369736, + "loss": 0.3766, + "step": 148830 + }, + { + "epoch": 6.29, + "learning_rate": 0.0001710275816744048, + "loss": 0.4382, + "step": 148840 + }, + { + "epoch": 6.29, + "learning_rate": 0.00017098518094511226, + "loss": 0.4468, + "step": 148850 + }, + { + "epoch": 6.3, + "learning_rate": 0.00017094278021581973, + "loss": 0.4049, + "step": 148860 + }, + { + "epoch": 6.3, + "learning_rate": 0.00017090037948652717, + "loss": 0.4063, + "step": 148870 + }, + { + "epoch": 6.3, + "learning_rate": 0.00017085797875723464, + "loss": 0.3596, + "step": 148880 + }, + { + "epoch": 6.3, + "learning_rate": 0.00017081557802794208, + "loss": 0.4508, + "step": 148890 + }, + { + "epoch": 6.3, + "learning_rate": 0.00017077317729864954, + "loss": 0.4823, + "step": 148900 + }, + { + "epoch": 6.3, + "learning_rate": 0.000170730776569357, + "loss": 0.3928, + "step": 148910 + }, + { + "epoch": 6.3, + "learning_rate": 0.00017068837584006445, + "loss": 0.4226, + "step": 148920 + }, + { + "epoch": 6.3, + "learning_rate": 0.00017064597511077192, + "loss": 0.4728, + "step": 148930 + }, + { + "epoch": 6.3, + "learning_rate": 0.00017060357438147936, + "loss": 0.3924, + "step": 148940 + }, + { + "epoch": 6.3, + "learning_rate": 0.00017056117365218682, + "loss": 0.3808, + "step": 148950 + }, + { + "epoch": 6.3, + "learning_rate": 0.0001705187729228943, + "loss": 0.4142, + "step": 148960 + }, + { + "epoch": 6.3, + "learning_rate": 0.00017047637219360173, + "loss": 0.4363, + "step": 148970 + }, + { + "epoch": 6.3, + "learning_rate": 0.0001704339714643092, + "loss": 0.5135, + "step": 148980 + }, + { + "epoch": 6.3, + "learning_rate": 0.00017039157073501666, + "loss": 0.3809, + "step": 148990 + }, + { + "epoch": 6.3, + "learning_rate": 0.0001703491700057241, + "loss": 0.4514, + "step": 149000 + }, + { + "epoch": 6.3, + "learning_rate": 0.00017030676927643157, + "loss": 0.4754, + "step": 149010 + }, + { + "epoch": 6.3, + "learning_rate": 0.000170264368547139, + "loss": 0.4643, + "step": 149020 + }, + { + "epoch": 6.3, + "learning_rate": 0.00017022196781784648, + "loss": 0.4273, + "step": 149030 + }, + { + "epoch": 6.3, + "learning_rate": 0.00017017956708855394, + "loss": 0.403, + "step": 149040 + }, + { + "epoch": 6.3, + "learning_rate": 0.00017013716635926138, + "loss": 0.392, + "step": 149050 + }, + { + "epoch": 6.3, + "learning_rate": 0.00017009476562996885, + "loss": 0.4083, + "step": 149060 + }, + { + "epoch": 6.3, + "learning_rate": 0.00017005236490067632, + "loss": 0.4095, + "step": 149070 + }, + { + "epoch": 6.3, + "learning_rate": 0.00017000996417138376, + "loss": 0.4127, + "step": 149080 + }, + { + "epoch": 6.3, + "learning_rate": 0.00016996756344209122, + "loss": 0.4037, + "step": 149090 + }, + { + "epoch": 6.31, + "learning_rate": 0.00016992516271279866, + "loss": 0.403, + "step": 149100 + }, + { + "epoch": 6.31, + "learning_rate": 0.00016988276198350613, + "loss": 0.3797, + "step": 149110 + }, + { + "epoch": 6.31, + "learning_rate": 0.0001698403612542136, + "loss": 0.4228, + "step": 149120 + }, + { + "epoch": 6.31, + "learning_rate": 0.00016979796052492104, + "loss": 0.3669, + "step": 149130 + }, + { + "epoch": 6.31, + "learning_rate": 0.0001697555597956285, + "loss": 0.403, + "step": 149140 + }, + { + "epoch": 6.31, + "learning_rate": 0.00016971315906633597, + "loss": 0.3899, + "step": 149150 + }, + { + "epoch": 6.31, + "learning_rate": 0.0001696707583370434, + "loss": 0.4175, + "step": 149160 + }, + { + "epoch": 6.31, + "learning_rate": 0.00016962835760775088, + "loss": 0.4095, + "step": 149170 + }, + { + "epoch": 6.31, + "learning_rate": 0.00016958595687845832, + "loss": 0.4742, + "step": 149180 + }, + { + "epoch": 6.31, + "learning_rate": 0.00016954355614916578, + "loss": 0.4137, + "step": 149190 + }, + { + "epoch": 6.31, + "learning_rate": 0.00016950115541987325, + "loss": 0.4807, + "step": 149200 + }, + { + "epoch": 6.31, + "learning_rate": 0.0001694587546905807, + "loss": 0.4503, + "step": 149210 + }, + { + "epoch": 6.31, + "learning_rate": 0.00016941635396128816, + "loss": 0.3842, + "step": 149220 + }, + { + "epoch": 6.31, + "learning_rate": 0.0001693739532319956, + "loss": 0.3936, + "step": 149230 + }, + { + "epoch": 6.31, + "learning_rate": 0.00016933155250270306, + "loss": 0.3672, + "step": 149240 + }, + { + "epoch": 6.31, + "learning_rate": 0.0001692891517734105, + "loss": 0.3701, + "step": 149250 + }, + { + "epoch": 6.31, + "learning_rate": 0.00016924675104411797, + "loss": 0.4404, + "step": 149260 + }, + { + "epoch": 6.31, + "learning_rate": 0.00016920435031482544, + "loss": 0.4373, + "step": 149270 + }, + { + "epoch": 6.31, + "learning_rate": 0.00016916194958553287, + "loss": 0.479, + "step": 149280 + }, + { + "epoch": 6.31, + "learning_rate": 0.00016911954885624034, + "loss": 0.419, + "step": 149290 + }, + { + "epoch": 6.31, + "learning_rate": 0.00016907714812694778, + "loss": 0.3587, + "step": 149300 + }, + { + "epoch": 6.31, + "learning_rate": 0.00016903474739765525, + "loss": 0.4173, + "step": 149310 + }, + { + "epoch": 6.31, + "learning_rate": 0.0001689923466683627, + "loss": 0.3782, + "step": 149320 + }, + { + "epoch": 6.31, + "learning_rate": 0.00016894994593907015, + "loss": 0.4247, + "step": 149330 + }, + { + "epoch": 6.32, + "learning_rate": 0.0001689075452097776, + "loss": 0.6197, + "step": 149340 + }, + { + "epoch": 6.32, + "learning_rate": 0.00016886514448048506, + "loss": 0.3552, + "step": 149350 + }, + { + "epoch": 6.32, + "learning_rate": 0.00016882274375119253, + "loss": 0.4788, + "step": 149360 + }, + { + "epoch": 6.32, + "learning_rate": 0.00016878034302189997, + "loss": 0.4158, + "step": 149370 + }, + { + "epoch": 6.32, + "learning_rate": 0.00016873794229260743, + "loss": 0.4653, + "step": 149380 + }, + { + "epoch": 6.32, + "learning_rate": 0.0001686955415633149, + "loss": 0.4088, + "step": 149390 + }, + { + "epoch": 6.32, + "learning_rate": 0.00016865314083402234, + "loss": 0.4587, + "step": 149400 + }, + { + "epoch": 6.32, + "learning_rate": 0.0001686107401047298, + "loss": 0.4681, + "step": 149410 + }, + { + "epoch": 6.32, + "learning_rate": 0.00016856833937543725, + "loss": 0.3771, + "step": 149420 + }, + { + "epoch": 6.32, + "learning_rate": 0.00016852593864614471, + "loss": 0.4414, + "step": 149430 + }, + { + "epoch": 6.32, + "learning_rate": 0.00016848353791685218, + "loss": 0.4895, + "step": 149440 + }, + { + "epoch": 6.32, + "learning_rate": 0.00016844113718755962, + "loss": 0.4233, + "step": 149450 + }, + { + "epoch": 6.32, + "learning_rate": 0.0001683987364582671, + "loss": 0.3459, + "step": 149460 + }, + { + "epoch": 6.32, + "learning_rate": 0.00016835633572897455, + "loss": 0.3918, + "step": 149470 + }, + { + "epoch": 6.32, + "learning_rate": 0.000168313934999682, + "loss": 0.4583, + "step": 149480 + }, + { + "epoch": 6.32, + "learning_rate": 0.00016827153427038946, + "loss": 0.3916, + "step": 149490 + }, + { + "epoch": 6.32, + "learning_rate": 0.0001682291335410969, + "loss": 0.3955, + "step": 149500 + }, + { + "epoch": 6.32, + "learning_rate": 0.00016818673281180437, + "loss": 0.4913, + "step": 149510 + }, + { + "epoch": 6.32, + "learning_rate": 0.00016814433208251183, + "loss": 0.4011, + "step": 149520 + }, + { + "epoch": 6.32, + "learning_rate": 0.00016810193135321927, + "loss": 0.4563, + "step": 149530 + }, + { + "epoch": 6.32, + "learning_rate": 0.00016805953062392674, + "loss": 0.4679, + "step": 149540 + }, + { + "epoch": 6.32, + "learning_rate": 0.0001680171298946342, + "loss": 0.4781, + "step": 149550 + }, + { + "epoch": 6.32, + "learning_rate": 0.00016797472916534165, + "loss": 0.4308, + "step": 149560 + }, + { + "epoch": 6.33, + "learning_rate": 0.00016793232843604911, + "loss": 0.4228, + "step": 149570 + }, + { + "epoch": 6.33, + "learning_rate": 0.00016788992770675655, + "loss": 0.4473, + "step": 149580 + }, + { + "epoch": 6.33, + "learning_rate": 0.00016784752697746402, + "loss": 0.4784, + "step": 149590 + }, + { + "epoch": 6.33, + "learning_rate": 0.0001678051262481715, + "loss": 0.4267, + "step": 149600 + }, + { + "epoch": 6.33, + "learning_rate": 0.00016776272551887893, + "loss": 0.4034, + "step": 149610 + }, + { + "epoch": 6.33, + "learning_rate": 0.0001677203247895864, + "loss": 0.3861, + "step": 149620 + }, + { + "epoch": 6.33, + "learning_rate": 0.00016767792406029386, + "loss": 0.479, + "step": 149630 + }, + { + "epoch": 6.33, + "learning_rate": 0.0001676355233310013, + "loss": 0.391, + "step": 149640 + }, + { + "epoch": 6.33, + "learning_rate": 0.00016759312260170877, + "loss": 0.4248, + "step": 149650 + }, + { + "epoch": 6.33, + "learning_rate": 0.0001675507218724162, + "loss": 0.333, + "step": 149660 + }, + { + "epoch": 6.33, + "learning_rate": 0.00016750832114312367, + "loss": 0.4662, + "step": 149670 + }, + { + "epoch": 6.33, + "learning_rate": 0.00016746592041383114, + "loss": 0.3612, + "step": 149680 + }, + { + "epoch": 6.33, + "learning_rate": 0.00016742351968453858, + "loss": 0.43, + "step": 149690 + }, + { + "epoch": 6.33, + "learning_rate": 0.00016738111895524605, + "loss": 0.3623, + "step": 149700 + }, + { + "epoch": 6.33, + "learning_rate": 0.00016733871822595349, + "loss": 0.4173, + "step": 149710 + }, + { + "epoch": 6.33, + "learning_rate": 0.00016729631749666095, + "loss": 0.3944, + "step": 149720 + }, + { + "epoch": 6.33, + "learning_rate": 0.00016725391676736842, + "loss": 0.4291, + "step": 149730 + }, + { + "epoch": 6.33, + "learning_rate": 0.00016721151603807586, + "loss": 0.4213, + "step": 149740 + }, + { + "epoch": 6.33, + "learning_rate": 0.00016716911530878333, + "loss": 0.4218, + "step": 149750 + }, + { + "epoch": 6.33, + "learning_rate": 0.0001671267145794908, + "loss": 0.3202, + "step": 149760 + }, + { + "epoch": 6.33, + "learning_rate": 0.00016708431385019823, + "loss": 0.3829, + "step": 149770 + }, + { + "epoch": 6.33, + "learning_rate": 0.0001670419131209057, + "loss": 0.4195, + "step": 149780 + }, + { + "epoch": 6.33, + "learning_rate": 0.00016699951239161314, + "loss": 0.4307, + "step": 149790 + }, + { + "epoch": 6.33, + "learning_rate": 0.0001669571116623206, + "loss": 0.3513, + "step": 149800 + }, + { + "epoch": 6.34, + "learning_rate": 0.00016691471093302807, + "loss": 0.3721, + "step": 149810 + }, + { + "epoch": 6.34, + "learning_rate": 0.0001668723102037355, + "loss": 0.4445, + "step": 149820 + }, + { + "epoch": 6.34, + "learning_rate": 0.00016682990947444298, + "loss": 0.4334, + "step": 149830 + }, + { + "epoch": 6.34, + "learning_rate": 0.00016678750874515045, + "loss": 0.4046, + "step": 149840 + }, + { + "epoch": 6.34, + "learning_rate": 0.00016674510801585789, + "loss": 0.4297, + "step": 149850 + }, + { + "epoch": 6.34, + "learning_rate": 0.00016670270728656535, + "loss": 0.3865, + "step": 149860 + }, + { + "epoch": 6.34, + "learning_rate": 0.0001666603065572728, + "loss": 0.4855, + "step": 149870 + }, + { + "epoch": 6.34, + "learning_rate": 0.00016661790582798026, + "loss": 0.3764, + "step": 149880 + }, + { + "epoch": 6.34, + "learning_rate": 0.0001665755050986877, + "loss": 0.3701, + "step": 149890 + }, + { + "epoch": 6.34, + "learning_rate": 0.00016653310436939517, + "loss": 0.4425, + "step": 149900 + }, + { + "epoch": 6.34, + "learning_rate": 0.00016649070364010263, + "loss": 0.4233, + "step": 149910 + }, + { + "epoch": 6.34, + "learning_rate": 0.00016644830291081007, + "loss": 0.4632, + "step": 149920 + }, + { + "epoch": 6.34, + "learning_rate": 0.00016640590218151754, + "loss": 0.383, + "step": 149930 + }, + { + "epoch": 6.34, + "learning_rate": 0.00016636350145222498, + "loss": 0.3903, + "step": 149940 + }, + { + "epoch": 6.34, + "learning_rate": 0.00016632110072293244, + "loss": 0.4477, + "step": 149950 + }, + { + "epoch": 6.34, + "learning_rate": 0.00016627869999363988, + "loss": 0.3624, + "step": 149960 + }, + { + "epoch": 6.34, + "learning_rate": 0.00016623629926434735, + "loss": 0.3793, + "step": 149970 + }, + { + "epoch": 6.34, + "learning_rate": 0.0001661938985350548, + "loss": 0.4434, + "step": 149980 + }, + { + "epoch": 6.34, + "learning_rate": 0.00016615149780576226, + "loss": 0.4944, + "step": 149990 + }, + { + "epoch": 6.34, + "learning_rate": 0.00016610909707646972, + "loss": 0.4031, + "step": 150000 + }, + { + "epoch": 6.34, + "eval_loss": 0.6249366998672485, + "eval_runtime": 337.5836, + "eval_samples_per_second": 15.567, + "eval_steps_per_second": 3.892, + "step": 150000 + }, + { + "epoch": 6.34, + "learning_rate": 0.00016606669634717716, + "loss": 0.3895, + "step": 150010 + }, + { + "epoch": 6.34, + "learning_rate": 0.00016602429561788463, + "loss": 0.4483, + "step": 150020 + }, + { + "epoch": 6.34, + "learning_rate": 0.00016598189488859207, + "loss": 0.3734, + "step": 150030 + }, + { + "epoch": 6.34, + "learning_rate": 0.00016593949415929954, + "loss": 0.4779, + "step": 150040 + }, + { + "epoch": 6.35, + "learning_rate": 0.000165897093430007, + "loss": 0.4544, + "step": 150050 + }, + { + "epoch": 6.35, + "learning_rate": 0.00016585469270071444, + "loss": 0.4851, + "step": 150060 + }, + { + "epoch": 6.35, + "learning_rate": 0.0001658122919714219, + "loss": 0.4594, + "step": 150070 + }, + { + "epoch": 6.35, + "learning_rate": 0.00016576989124212938, + "loss": 0.359, + "step": 150080 + }, + { + "epoch": 6.35, + "learning_rate": 0.00016572749051283682, + "loss": 0.4762, + "step": 150090 + }, + { + "epoch": 6.35, + "learning_rate": 0.00016568508978354428, + "loss": 0.4376, + "step": 150100 + }, + { + "epoch": 6.35, + "learning_rate": 0.00016564268905425172, + "loss": 0.3709, + "step": 150110 + }, + { + "epoch": 6.35, + "learning_rate": 0.0001656002883249592, + "loss": 0.4145, + "step": 150120 + }, + { + "epoch": 6.35, + "learning_rate": 0.00016555788759566666, + "loss": 0.364, + "step": 150130 + }, + { + "epoch": 6.35, + "learning_rate": 0.0001655154868663741, + "loss": 0.4267, + "step": 150140 + }, + { + "epoch": 6.35, + "learning_rate": 0.00016547308613708156, + "loss": 0.4239, + "step": 150150 + }, + { + "epoch": 6.35, + "learning_rate": 0.00016543068540778903, + "loss": 0.3614, + "step": 150160 + }, + { + "epoch": 6.35, + "learning_rate": 0.00016538828467849647, + "loss": 0.3792, + "step": 150170 + }, + { + "epoch": 6.35, + "learning_rate": 0.00016534588394920394, + "loss": 0.4179, + "step": 150180 + }, + { + "epoch": 6.35, + "learning_rate": 0.00016530348321991138, + "loss": 0.3971, + "step": 150190 + }, + { + "epoch": 6.35, + "learning_rate": 0.00016526108249061884, + "loss": 0.4234, + "step": 150200 + }, + { + "epoch": 6.35, + "learning_rate": 0.0001652186817613263, + "loss": 0.3982, + "step": 150210 + }, + { + "epoch": 6.35, + "learning_rate": 0.00016517628103203375, + "loss": 0.4357, + "step": 150220 + }, + { + "epoch": 6.35, + "learning_rate": 0.00016513388030274122, + "loss": 0.3829, + "step": 150230 + }, + { + "epoch": 6.35, + "learning_rate": 0.00016509147957344868, + "loss": 0.3748, + "step": 150240 + }, + { + "epoch": 6.35, + "learning_rate": 0.00016504907884415612, + "loss": 0.4179, + "step": 150250 + }, + { + "epoch": 6.35, + "learning_rate": 0.0001650066781148636, + "loss": 0.4115, + "step": 150260 + }, + { + "epoch": 6.35, + "learning_rate": 0.00016496427738557103, + "loss": 0.3821, + "step": 150270 + }, + { + "epoch": 6.36, + "learning_rate": 0.0001649218766562785, + "loss": 0.399, + "step": 150280 + }, + { + "epoch": 6.36, + "learning_rate": 0.00016487947592698596, + "loss": 0.3966, + "step": 150290 + }, + { + "epoch": 6.36, + "learning_rate": 0.0001648370751976934, + "loss": 0.4367, + "step": 150300 + }, + { + "epoch": 6.36, + "learning_rate": 0.00016479467446840087, + "loss": 0.4425, + "step": 150310 + }, + { + "epoch": 6.36, + "learning_rate": 0.00016475227373910834, + "loss": 0.4246, + "step": 150320 + }, + { + "epoch": 6.36, + "learning_rate": 0.00016470987300981578, + "loss": 0.3847, + "step": 150330 + }, + { + "epoch": 6.36, + "learning_rate": 0.00016466747228052324, + "loss": 0.3874, + "step": 150340 + }, + { + "epoch": 6.36, + "learning_rate": 0.00016462507155123068, + "loss": 0.4303, + "step": 150350 + }, + { + "epoch": 6.36, + "learning_rate": 0.00016458267082193815, + "loss": 0.48, + "step": 150360 + }, + { + "epoch": 6.36, + "learning_rate": 0.00016454027009264562, + "loss": 0.4324, + "step": 150370 + }, + { + "epoch": 6.36, + "learning_rate": 0.00016449786936335306, + "loss": 0.373, + "step": 150380 + }, + { + "epoch": 6.36, + "learning_rate": 0.00016445546863406052, + "loss": 0.4895, + "step": 150390 + }, + { + "epoch": 6.36, + "learning_rate": 0.00016441306790476796, + "loss": 0.4179, + "step": 150400 + }, + { + "epoch": 6.36, + "learning_rate": 0.00016437066717547543, + "loss": 0.4882, + "step": 150410 + }, + { + "epoch": 6.36, + "learning_rate": 0.0001643282664461829, + "loss": 0.3627, + "step": 150420 + }, + { + "epoch": 6.36, + "learning_rate": 0.00016428586571689034, + "loss": 0.4683, + "step": 150430 + }, + { + "epoch": 6.36, + "learning_rate": 0.0001642434649875978, + "loss": 0.49, + "step": 150440 + }, + { + "epoch": 6.36, + "learning_rate": 0.00016420106425830527, + "loss": 0.4712, + "step": 150450 + }, + { + "epoch": 6.36, + "learning_rate": 0.0001641586635290127, + "loss": 0.4057, + "step": 150460 + }, + { + "epoch": 6.36, + "learning_rate": 0.00016411626279972018, + "loss": 0.424, + "step": 150470 + }, + { + "epoch": 6.36, + "learning_rate": 0.00016407386207042762, + "loss": 0.3612, + "step": 150480 + }, + { + "epoch": 6.36, + "learning_rate": 0.00016403146134113508, + "loss": 0.4253, + "step": 150490 + }, + { + "epoch": 6.36, + "learning_rate": 0.00016398906061184255, + "loss": 0.3882, + "step": 150500 + }, + { + "epoch": 6.36, + "learning_rate": 0.00016394665988255, + "loss": 0.4262, + "step": 150510 + }, + { + "epoch": 6.37, + "learning_rate": 0.00016390425915325746, + "loss": 0.3863, + "step": 150520 + }, + { + "epoch": 6.37, + "learning_rate": 0.00016386185842396492, + "loss": 0.4432, + "step": 150530 + }, + { + "epoch": 6.37, + "learning_rate": 0.00016381945769467236, + "loss": 0.4083, + "step": 150540 + }, + { + "epoch": 6.37, + "learning_rate": 0.00016377705696537983, + "loss": 0.4122, + "step": 150550 + }, + { + "epoch": 6.37, + "learning_rate": 0.00016373465623608727, + "loss": 0.4523, + "step": 150560 + }, + { + "epoch": 6.37, + "learning_rate": 0.00016369225550679474, + "loss": 0.3884, + "step": 150570 + }, + { + "epoch": 6.37, + "learning_rate": 0.00016364985477750218, + "loss": 0.3428, + "step": 150580 + }, + { + "epoch": 6.37, + "learning_rate": 0.00016360745404820964, + "loss": 0.421, + "step": 150590 + }, + { + "epoch": 6.37, + "learning_rate": 0.00016356505331891708, + "loss": 0.4618, + "step": 150600 + }, + { + "epoch": 6.37, + "learning_rate": 0.00016352265258962455, + "loss": 0.4178, + "step": 150610 + }, + { + "epoch": 6.37, + "learning_rate": 0.000163480251860332, + "loss": 0.3935, + "step": 150620 + }, + { + "epoch": 6.37, + "learning_rate": 0.00016343785113103945, + "loss": 0.3658, + "step": 150630 + }, + { + "epoch": 6.37, + "learning_rate": 0.0001633954504017469, + "loss": 0.4078, + "step": 150640 + }, + { + "epoch": 6.37, + "learning_rate": 0.00016335304967245436, + "loss": 0.3357, + "step": 150650 + }, + { + "epoch": 6.37, + "learning_rate": 0.00016331064894316183, + "loss": 0.4275, + "step": 150660 + }, + { + "epoch": 6.37, + "learning_rate": 0.00016326824821386927, + "loss": 0.427, + "step": 150670 + }, + { + "epoch": 6.37, + "learning_rate": 0.00016322584748457673, + "loss": 0.3687, + "step": 150680 + }, + { + "epoch": 6.37, + "learning_rate": 0.0001631834467552842, + "loss": 0.4328, + "step": 150690 + }, + { + "epoch": 6.37, + "learning_rate": 0.00016314104602599164, + "loss": 0.4064, + "step": 150700 + }, + { + "epoch": 6.37, + "learning_rate": 0.0001630986452966991, + "loss": 0.4135, + "step": 150710 + }, + { + "epoch": 6.37, + "learning_rate": 0.00016305624456740655, + "loss": 0.3168, + "step": 150720 + }, + { + "epoch": 6.37, + "learning_rate": 0.00016301384383811401, + "loss": 0.3937, + "step": 150730 + }, + { + "epoch": 6.37, + "learning_rate": 0.00016297144310882148, + "loss": 0.3963, + "step": 150740 + }, + { + "epoch": 6.38, + "learning_rate": 0.00016292904237952892, + "loss": 0.4083, + "step": 150750 + }, + { + "epoch": 6.38, + "learning_rate": 0.0001628866416502364, + "loss": 0.3808, + "step": 150760 + }, + { + "epoch": 6.38, + "learning_rate": 0.00016284424092094385, + "loss": 0.4177, + "step": 150770 + }, + { + "epoch": 6.38, + "learning_rate": 0.0001628018401916513, + "loss": 0.4028, + "step": 150780 + }, + { + "epoch": 6.38, + "learning_rate": 0.00016275943946235876, + "loss": 0.433, + "step": 150790 + }, + { + "epoch": 6.38, + "learning_rate": 0.0001627170387330662, + "loss": 0.467, + "step": 150800 + }, + { + "epoch": 6.38, + "learning_rate": 0.00016267463800377367, + "loss": 0.4543, + "step": 150810 + }, + { + "epoch": 6.38, + "learning_rate": 0.00016263223727448113, + "loss": 0.3426, + "step": 150820 + }, + { + "epoch": 6.38, + "learning_rate": 0.00016258983654518857, + "loss": 0.3924, + "step": 150830 + }, + { + "epoch": 6.38, + "learning_rate": 0.00016254743581589604, + "loss": 0.4029, + "step": 150840 + }, + { + "epoch": 6.38, + "learning_rate": 0.0001625050350866035, + "loss": 0.3973, + "step": 150850 + }, + { + "epoch": 6.38, + "learning_rate": 0.00016246263435731095, + "loss": 0.3794, + "step": 150860 + }, + { + "epoch": 6.38, + "learning_rate": 0.00016242023362801841, + "loss": 0.4174, + "step": 150870 + }, + { + "epoch": 6.38, + "learning_rate": 0.00016237783289872585, + "loss": 0.3387, + "step": 150880 + }, + { + "epoch": 6.38, + "learning_rate": 0.00016233543216943332, + "loss": 0.3269, + "step": 150890 + }, + { + "epoch": 6.38, + "learning_rate": 0.0001622930314401408, + "loss": 0.359, + "step": 150900 + }, + { + "epoch": 6.38, + "learning_rate": 0.00016225063071084823, + "loss": 0.4964, + "step": 150910 + }, + { + "epoch": 6.38, + "learning_rate": 0.0001622082299815557, + "loss": 0.3951, + "step": 150920 + }, + { + "epoch": 6.38, + "learning_rate": 0.00016216582925226316, + "loss": 0.3614, + "step": 150930 + }, + { + "epoch": 6.38, + "learning_rate": 0.0001621234285229706, + "loss": 0.3896, + "step": 150940 + }, + { + "epoch": 6.38, + "learning_rate": 0.00016208102779367807, + "loss": 0.4188, + "step": 150950 + }, + { + "epoch": 6.38, + "learning_rate": 0.0001620386270643855, + "loss": 0.4068, + "step": 150960 + }, + { + "epoch": 6.38, + "learning_rate": 0.00016199622633509297, + "loss": 0.355, + "step": 150970 + }, + { + "epoch": 6.38, + "learning_rate": 0.00016195382560580044, + "loss": 0.385, + "step": 150980 + }, + { + "epoch": 6.39, + "learning_rate": 0.00016191142487650788, + "loss": 0.3681, + "step": 150990 + }, + { + "epoch": 6.39, + "learning_rate": 0.00016186902414721535, + "loss": 0.3817, + "step": 151000 + }, + { + "epoch": 6.39, + "learning_rate": 0.0001618266234179228, + "loss": 0.4426, + "step": 151010 + }, + { + "epoch": 6.39, + "learning_rate": 0.00016178422268863025, + "loss": 0.4564, + "step": 151020 + }, + { + "epoch": 6.39, + "learning_rate": 0.00016174182195933772, + "loss": 0.3323, + "step": 151030 + }, + { + "epoch": 6.39, + "learning_rate": 0.00016169942123004516, + "loss": 0.3175, + "step": 151040 + }, + { + "epoch": 6.39, + "learning_rate": 0.00016165702050075263, + "loss": 0.4285, + "step": 151050 + }, + { + "epoch": 6.39, + "learning_rate": 0.0001616146197714601, + "loss": 0.5147, + "step": 151060 + }, + { + "epoch": 6.39, + "learning_rate": 0.00016157221904216753, + "loss": 0.4106, + "step": 151070 + }, + { + "epoch": 6.39, + "learning_rate": 0.000161529818312875, + "loss": 0.4123, + "step": 151080 + }, + { + "epoch": 6.39, + "learning_rate": 0.00016148741758358244, + "loss": 0.4185, + "step": 151090 + }, + { + "epoch": 6.39, + "learning_rate": 0.0001614450168542899, + "loss": 0.3875, + "step": 151100 + }, + { + "epoch": 6.39, + "learning_rate": 0.00016140261612499737, + "loss": 0.4155, + "step": 151110 + }, + { + "epoch": 6.39, + "learning_rate": 0.0001613602153957048, + "loss": 0.4242, + "step": 151120 + }, + { + "epoch": 6.39, + "learning_rate": 0.00016131781466641228, + "loss": 0.4106, + "step": 151130 + }, + { + "epoch": 6.39, + "learning_rate": 0.00016127541393711975, + "loss": 0.4172, + "step": 151140 + }, + { + "epoch": 6.39, + "learning_rate": 0.00016123301320782719, + "loss": 0.4805, + "step": 151150 + }, + { + "epoch": 6.39, + "learning_rate": 0.00016119061247853465, + "loss": 0.4552, + "step": 151160 + }, + { + "epoch": 6.39, + "learning_rate": 0.0001611482117492421, + "loss": 0.4333, + "step": 151170 + }, + { + "epoch": 6.39, + "learning_rate": 0.00016110581101994956, + "loss": 0.3812, + "step": 151180 + }, + { + "epoch": 6.39, + "learning_rate": 0.00016106341029065703, + "loss": 0.4355, + "step": 151190 + }, + { + "epoch": 6.39, + "learning_rate": 0.00016102100956136447, + "loss": 0.4061, + "step": 151200 + }, + { + "epoch": 6.39, + "learning_rate": 0.00016097860883207193, + "loss": 0.4372, + "step": 151210 + }, + { + "epoch": 6.39, + "learning_rate": 0.00016093620810277937, + "loss": 0.4157, + "step": 151220 + }, + { + "epoch": 6.4, + "learning_rate": 0.00016089380737348684, + "loss": 0.4504, + "step": 151230 + }, + { + "epoch": 6.4, + "learning_rate": 0.00016085140664419428, + "loss": 0.4392, + "step": 151240 + }, + { + "epoch": 6.4, + "learning_rate": 0.00016080900591490175, + "loss": 0.3766, + "step": 151250 + }, + { + "epoch": 6.4, + "learning_rate": 0.00016076660518560918, + "loss": 0.3798, + "step": 151260 + }, + { + "epoch": 6.4, + "learning_rate": 0.00016072420445631665, + "loss": 0.4652, + "step": 151270 + }, + { + "epoch": 6.4, + "learning_rate": 0.00016068180372702412, + "loss": 0.4565, + "step": 151280 + }, + { + "epoch": 6.4, + "learning_rate": 0.00016063940299773156, + "loss": 0.3943, + "step": 151290 + }, + { + "epoch": 6.4, + "learning_rate": 0.00016059700226843902, + "loss": 0.4209, + "step": 151300 + }, + { + "epoch": 6.4, + "learning_rate": 0.00016055460153914646, + "loss": 0.3977, + "step": 151310 + }, + { + "epoch": 6.4, + "learning_rate": 0.00016051220080985393, + "loss": 0.4083, + "step": 151320 + }, + { + "epoch": 6.4, + "learning_rate": 0.00016046980008056137, + "loss": 0.4728, + "step": 151330 + }, + { + "epoch": 6.4, + "learning_rate": 0.00016042739935126884, + "loss": 0.4214, + "step": 151340 + }, + { + "epoch": 6.4, + "learning_rate": 0.0001603849986219763, + "loss": 0.4493, + "step": 151350 + }, + { + "epoch": 6.4, + "learning_rate": 0.00016034259789268374, + "loss": 0.4123, + "step": 151360 + }, + { + "epoch": 6.4, + "learning_rate": 0.0001603001971633912, + "loss": 0.4344, + "step": 151370 + }, + { + "epoch": 6.4, + "learning_rate": 0.00016025779643409868, + "loss": 0.3829, + "step": 151380 + }, + { + "epoch": 6.4, + "learning_rate": 0.00016021539570480612, + "loss": 0.4158, + "step": 151390 + }, + { + "epoch": 6.4, + "learning_rate": 0.00016017299497551358, + "loss": 0.4484, + "step": 151400 + }, + { + "epoch": 6.4, + "learning_rate": 0.00016013059424622102, + "loss": 0.4172, + "step": 151410 + }, + { + "epoch": 6.4, + "learning_rate": 0.0001600881935169285, + "loss": 0.3934, + "step": 151420 + }, + { + "epoch": 6.4, + "learning_rate": 0.00016004579278763596, + "loss": 0.4016, + "step": 151430 + }, + { + "epoch": 6.4, + "learning_rate": 0.0001600033920583434, + "loss": 0.4995, + "step": 151440 + }, + { + "epoch": 6.4, + "learning_rate": 0.00015996099132905086, + "loss": 0.4238, + "step": 151450 + }, + { + "epoch": 6.41, + "learning_rate": 0.00015991859059975833, + "loss": 0.4599, + "step": 151460 + }, + { + "epoch": 6.41, + "learning_rate": 0.00015987618987046577, + "loss": 0.4421, + "step": 151470 + }, + { + "epoch": 6.41, + "learning_rate": 0.00015983378914117324, + "loss": 0.3996, + "step": 151480 + }, + { + "epoch": 6.41, + "learning_rate": 0.00015979138841188068, + "loss": 0.4511, + "step": 151490 + }, + { + "epoch": 6.41, + "learning_rate": 0.00015974898768258814, + "loss": 0.4413, + "step": 151500 + }, + { + "epoch": 6.41, + "learning_rate": 0.0001597065869532956, + "loss": 0.344, + "step": 151510 + }, + { + "epoch": 6.41, + "learning_rate": 0.00015966418622400305, + "loss": 0.4413, + "step": 151520 + }, + { + "epoch": 6.41, + "learning_rate": 0.00015962178549471052, + "loss": 0.4279, + "step": 151530 + }, + { + "epoch": 6.41, + "learning_rate": 0.00015957938476541798, + "loss": 0.4211, + "step": 151540 + }, + { + "epoch": 6.41, + "learning_rate": 0.00015953698403612542, + "loss": 0.44, + "step": 151550 + }, + { + "epoch": 6.41, + "learning_rate": 0.0001594945833068329, + "loss": 0.3862, + "step": 151560 + }, + { + "epoch": 6.41, + "learning_rate": 0.00015945218257754033, + "loss": 0.4193, + "step": 151570 + }, + { + "epoch": 6.41, + "learning_rate": 0.0001594097818482478, + "loss": 0.4102, + "step": 151580 + }, + { + "epoch": 6.41, + "learning_rate": 0.00015936738111895526, + "loss": 0.3941, + "step": 151590 + }, + { + "epoch": 6.41, + "learning_rate": 0.0001593249803896627, + "loss": 0.4784, + "step": 151600 + }, + { + "epoch": 6.41, + "learning_rate": 0.00015928257966037017, + "loss": 0.393, + "step": 151610 + }, + { + "epoch": 6.41, + "learning_rate": 0.00015924017893107764, + "loss": 0.3827, + "step": 151620 + }, + { + "epoch": 6.41, + "learning_rate": 0.00015919777820178508, + "loss": 0.4172, + "step": 151630 + }, + { + "epoch": 6.41, + "learning_rate": 0.00015915537747249254, + "loss": 0.4122, + "step": 151640 + }, + { + "epoch": 6.41, + "learning_rate": 0.00015911297674319998, + "loss": 0.4817, + "step": 151650 + }, + { + "epoch": 6.41, + "learning_rate": 0.00015907057601390745, + "loss": 0.4364, + "step": 151660 + }, + { + "epoch": 6.41, + "learning_rate": 0.00015902817528461492, + "loss": 0.4187, + "step": 151670 + }, + { + "epoch": 6.41, + "learning_rate": 0.00015898577455532236, + "loss": 0.4517, + "step": 151680 + }, + { + "epoch": 6.41, + "learning_rate": 0.00015894337382602982, + "loss": 0.4456, + "step": 151690 + }, + { + "epoch": 6.42, + "learning_rate": 0.0001589009730967373, + "loss": 0.3248, + "step": 151700 + }, + { + "epoch": 6.42, + "learning_rate": 0.00015885857236744473, + "loss": 0.3664, + "step": 151710 + }, + { + "epoch": 6.42, + "learning_rate": 0.0001588161716381522, + "loss": 0.5148, + "step": 151720 + }, + { + "epoch": 6.42, + "learning_rate": 0.00015877377090885964, + "loss": 0.437, + "step": 151730 + }, + { + "epoch": 6.42, + "learning_rate": 0.0001587313701795671, + "loss": 0.4816, + "step": 151740 + }, + { + "epoch": 6.42, + "learning_rate": 0.00015868896945027457, + "loss": 0.3516, + "step": 151750 + }, + { + "epoch": 6.42, + "learning_rate": 0.000158646568720982, + "loss": 0.4266, + "step": 151760 + }, + { + "epoch": 6.42, + "learning_rate": 0.00015860416799168948, + "loss": 0.3786, + "step": 151770 + }, + { + "epoch": 6.42, + "learning_rate": 0.00015856176726239694, + "loss": 0.4289, + "step": 151780 + }, + { + "epoch": 6.42, + "learning_rate": 0.00015851936653310438, + "loss": 0.3995, + "step": 151790 + }, + { + "epoch": 6.42, + "learning_rate": 0.00015847696580381185, + "loss": 0.3583, + "step": 151800 + }, + { + "epoch": 6.42, + "learning_rate": 0.0001584345650745193, + "loss": 0.3835, + "step": 151810 + }, + { + "epoch": 6.42, + "learning_rate": 0.00015839216434522676, + "loss": 0.3831, + "step": 151820 + }, + { + "epoch": 6.42, + "learning_rate": 0.00015834976361593422, + "loss": 0.4797, + "step": 151830 + }, + { + "epoch": 6.42, + "learning_rate": 0.00015830736288664166, + "loss": 0.386, + "step": 151840 + }, + { + "epoch": 6.42, + "learning_rate": 0.00015826496215734913, + "loss": 0.4323, + "step": 151850 + }, + { + "epoch": 6.42, + "learning_rate": 0.00015822256142805657, + "loss": 0.4556, + "step": 151860 + }, + { + "epoch": 6.42, + "learning_rate": 0.00015818016069876404, + "loss": 0.4268, + "step": 151870 + }, + { + "epoch": 6.42, + "learning_rate": 0.00015813775996947148, + "loss": 0.3987, + "step": 151880 + }, + { + "epoch": 6.42, + "learning_rate": 0.00015809535924017894, + "loss": 0.3079, + "step": 151890 + }, + { + "epoch": 6.42, + "learning_rate": 0.0001580529585108864, + "loss": 0.4228, + "step": 151900 + }, + { + "epoch": 6.42, + "learning_rate": 0.00015801055778159385, + "loss": 0.3922, + "step": 151910 + }, + { + "epoch": 6.42, + "learning_rate": 0.00015796815705230132, + "loss": 0.4522, + "step": 151920 + }, + { + "epoch": 6.42, + "learning_rate": 0.00015792575632300875, + "loss": 0.4056, + "step": 151930 + }, + { + "epoch": 6.43, + "learning_rate": 0.00015788335559371622, + "loss": 0.4161, + "step": 151940 + }, + { + "epoch": 6.43, + "learning_rate": 0.00015784095486442366, + "loss": 0.404, + "step": 151950 + }, + { + "epoch": 6.43, + "learning_rate": 0.00015779855413513113, + "loss": 0.3697, + "step": 151960 + }, + { + "epoch": 6.43, + "learning_rate": 0.00015775615340583857, + "loss": 0.357, + "step": 151970 + }, + { + "epoch": 6.43, + "learning_rate": 0.00015771375267654603, + "loss": 0.439, + "step": 151980 + }, + { + "epoch": 6.43, + "learning_rate": 0.0001576713519472535, + "loss": 0.4395, + "step": 151990 + }, + { + "epoch": 6.43, + "learning_rate": 0.00015762895121796094, + "loss": 0.4022, + "step": 152000 + }, + { + "epoch": 6.43, + "learning_rate": 0.0001575865504886684, + "loss": 0.3744, + "step": 152010 + }, + { + "epoch": 6.43, + "learning_rate": 0.00015754414975937585, + "loss": 0.4094, + "step": 152020 + }, + { + "epoch": 6.43, + "learning_rate": 0.00015750174903008331, + "loss": 0.3225, + "step": 152030 + }, + { + "epoch": 6.43, + "learning_rate": 0.00015745934830079078, + "loss": 0.4295, + "step": 152040 + }, + { + "epoch": 6.43, + "learning_rate": 0.00015741694757149822, + "loss": 0.452, + "step": 152050 + }, + { + "epoch": 6.43, + "learning_rate": 0.0001573745468422057, + "loss": 0.4311, + "step": 152060 + }, + { + "epoch": 6.43, + "learning_rate": 0.00015733214611291315, + "loss": 0.3545, + "step": 152070 + }, + { + "epoch": 6.43, + "learning_rate": 0.0001572897453836206, + "loss": 0.4389, + "step": 152080 + }, + { + "epoch": 6.43, + "learning_rate": 0.00015724734465432806, + "loss": 0.3916, + "step": 152090 + }, + { + "epoch": 6.43, + "learning_rate": 0.0001572049439250355, + "loss": 0.3855, + "step": 152100 + }, + { + "epoch": 6.43, + "learning_rate": 0.00015716254319574297, + "loss": 0.4006, + "step": 152110 + }, + { + "epoch": 6.43, + "learning_rate": 0.00015712014246645043, + "loss": 0.4898, + "step": 152120 + }, + { + "epoch": 6.43, + "learning_rate": 0.00015707774173715787, + "loss": 0.3661, + "step": 152130 + }, + { + "epoch": 6.43, + "learning_rate": 0.00015703534100786534, + "loss": 0.3761, + "step": 152140 + }, + { + "epoch": 6.43, + "learning_rate": 0.0001569929402785728, + "loss": 0.4819, + "step": 152150 + }, + { + "epoch": 6.43, + "learning_rate": 0.00015695053954928025, + "loss": 0.3641, + "step": 152160 + }, + { + "epoch": 6.44, + "learning_rate": 0.00015690813881998771, + "loss": 0.3792, + "step": 152170 + }, + { + "epoch": 6.44, + "learning_rate": 0.00015686573809069515, + "loss": 0.4497, + "step": 152180 + }, + { + "epoch": 6.44, + "learning_rate": 0.00015682333736140262, + "loss": 0.4825, + "step": 152190 + }, + { + "epoch": 6.44, + "learning_rate": 0.0001567809366321101, + "loss": 0.3937, + "step": 152200 + }, + { + "epoch": 6.44, + "learning_rate": 0.00015673853590281753, + "loss": 0.4413, + "step": 152210 + }, + { + "epoch": 6.44, + "learning_rate": 0.000156696135173525, + "loss": 0.4402, + "step": 152220 + }, + { + "epoch": 6.44, + "learning_rate": 0.00015665373444423246, + "loss": 0.4194, + "step": 152230 + }, + { + "epoch": 6.44, + "learning_rate": 0.0001566113337149399, + "loss": 0.4641, + "step": 152240 + }, + { + "epoch": 6.44, + "learning_rate": 0.00015656893298564737, + "loss": 0.3776, + "step": 152250 + }, + { + "epoch": 6.44, + "learning_rate": 0.0001565265322563548, + "loss": 0.4253, + "step": 152260 + }, + { + "epoch": 6.44, + "learning_rate": 0.00015648413152706227, + "loss": 0.4108, + "step": 152270 + }, + { + "epoch": 6.44, + "learning_rate": 0.00015644173079776974, + "loss": 0.4138, + "step": 152280 + }, + { + "epoch": 6.44, + "learning_rate": 0.00015639933006847718, + "loss": 0.4931, + "step": 152290 + }, + { + "epoch": 6.44, + "learning_rate": 0.00015635692933918465, + "loss": 0.3982, + "step": 152300 + }, + { + "epoch": 6.44, + "learning_rate": 0.0001563145286098921, + "loss": 0.4446, + "step": 152310 + }, + { + "epoch": 6.44, + "learning_rate": 0.00015627212788059955, + "loss": 0.4441, + "step": 152320 + }, + { + "epoch": 6.44, + "learning_rate": 0.00015622972715130702, + "loss": 0.3652, + "step": 152330 + }, + { + "epoch": 6.44, + "learning_rate": 0.00015618732642201446, + "loss": 0.4904, + "step": 152340 + }, + { + "epoch": 6.44, + "learning_rate": 0.00015614492569272193, + "loss": 0.4766, + "step": 152350 + }, + { + "epoch": 6.44, + "learning_rate": 0.0001561025249634294, + "loss": 0.431, + "step": 152360 + }, + { + "epoch": 6.44, + "learning_rate": 0.00015606012423413683, + "loss": 0.4174, + "step": 152370 + }, + { + "epoch": 6.44, + "learning_rate": 0.0001560177235048443, + "loss": 0.4113, + "step": 152380 + }, + { + "epoch": 6.44, + "learning_rate": 0.00015597532277555177, + "loss": 0.4291, + "step": 152390 + }, + { + "epoch": 6.44, + "learning_rate": 0.0001559329220462592, + "loss": 0.3874, + "step": 152400 + }, + { + "epoch": 6.45, + "learning_rate": 0.00015589052131696667, + "loss": 0.3958, + "step": 152410 + }, + { + "epoch": 6.45, + "learning_rate": 0.0001558481205876741, + "loss": 0.405, + "step": 152420 + }, + { + "epoch": 6.45, + "learning_rate": 0.00015580571985838158, + "loss": 0.4053, + "step": 152430 + }, + { + "epoch": 6.45, + "learning_rate": 0.00015576331912908905, + "loss": 0.3916, + "step": 152440 + }, + { + "epoch": 6.45, + "learning_rate": 0.00015572091839979649, + "loss": 0.3717, + "step": 152450 + }, + { + "epoch": 6.45, + "learning_rate": 0.00015567851767050395, + "loss": 0.4219, + "step": 152460 + }, + { + "epoch": 6.45, + "learning_rate": 0.00015563611694121142, + "loss": 0.3797, + "step": 152470 + }, + { + "epoch": 6.45, + "learning_rate": 0.00015559371621191886, + "loss": 0.4713, + "step": 152480 + }, + { + "epoch": 6.45, + "learning_rate": 0.00015555131548262633, + "loss": 0.4502, + "step": 152490 + }, + { + "epoch": 6.45, + "learning_rate": 0.00015550891475333377, + "loss": 0.4485, + "step": 152500 + }, + { + "epoch": 6.45, + "learning_rate": 0.00015546651402404123, + "loss": 0.4677, + "step": 152510 + }, + { + "epoch": 6.45, + "learning_rate": 0.00015542411329474867, + "loss": 0.4576, + "step": 152520 + }, + { + "epoch": 6.45, + "learning_rate": 0.00015538171256545614, + "loss": 0.4313, + "step": 152530 + }, + { + "epoch": 6.45, + "learning_rate": 0.0001553393118361636, + "loss": 0.4679, + "step": 152540 + }, + { + "epoch": 6.45, + "learning_rate": 0.00015529691110687105, + "loss": 0.4352, + "step": 152550 + }, + { + "epoch": 6.45, + "learning_rate": 0.0001552545103775785, + "loss": 0.4051, + "step": 152560 + }, + { + "epoch": 6.45, + "learning_rate": 0.00015521210964828595, + "loss": 0.4146, + "step": 152570 + }, + { + "epoch": 6.45, + "learning_rate": 0.00015516970891899342, + "loss": 0.3984, + "step": 152580 + }, + { + "epoch": 6.45, + "learning_rate": 0.00015512730818970086, + "loss": 0.3957, + "step": 152590 + }, + { + "epoch": 6.45, + "learning_rate": 0.00015508490746040833, + "loss": 0.4286, + "step": 152600 + }, + { + "epoch": 6.45, + "learning_rate": 0.00015504250673111576, + "loss": 0.4235, + "step": 152610 + }, + { + "epoch": 6.45, + "learning_rate": 0.00015500010600182323, + "loss": 0.3969, + "step": 152620 + }, + { + "epoch": 6.45, + "learning_rate": 0.00015495770527253067, + "loss": 0.3559, + "step": 152630 + }, + { + "epoch": 6.45, + "learning_rate": 0.00015491530454323814, + "loss": 0.4208, + "step": 152640 + }, + { + "epoch": 6.46, + "learning_rate": 0.0001548729038139456, + "loss": 0.4163, + "step": 152650 + }, + { + "epoch": 6.46, + "learning_rate": 0.00015483050308465304, + "loss": 0.4257, + "step": 152660 + }, + { + "epoch": 6.46, + "learning_rate": 0.0001547881023553605, + "loss": 0.3815, + "step": 152670 + }, + { + "epoch": 6.46, + "learning_rate": 0.00015474570162606798, + "loss": 0.3603, + "step": 152680 + }, + { + "epoch": 6.46, + "learning_rate": 0.00015470330089677542, + "loss": 0.4414, + "step": 152690 + }, + { + "epoch": 6.46, + "learning_rate": 0.00015466090016748288, + "loss": 0.3837, + "step": 152700 + }, + { + "epoch": 6.46, + "learning_rate": 0.00015461849943819032, + "loss": 0.3884, + "step": 152710 + }, + { + "epoch": 6.46, + "learning_rate": 0.0001545760987088978, + "loss": 0.4546, + "step": 152720 + }, + { + "epoch": 6.46, + "learning_rate": 0.00015453369797960526, + "loss": 0.3745, + "step": 152730 + }, + { + "epoch": 6.46, + "learning_rate": 0.0001544912972503127, + "loss": 0.4314, + "step": 152740 + }, + { + "epoch": 6.46, + "learning_rate": 0.00015444889652102016, + "loss": 0.3314, + "step": 152750 + }, + { + "epoch": 6.46, + "learning_rate": 0.00015440649579172763, + "loss": 0.3408, + "step": 152760 + }, + { + "epoch": 6.46, + "learning_rate": 0.00015436409506243507, + "loss": 0.4093, + "step": 152770 + }, + { + "epoch": 6.46, + "learning_rate": 0.00015432169433314254, + "loss": 0.4076, + "step": 152780 + }, + { + "epoch": 6.46, + "learning_rate": 0.00015427929360384998, + "loss": 0.4339, + "step": 152790 + }, + { + "epoch": 6.46, + "learning_rate": 0.00015423689287455744, + "loss": 0.4486, + "step": 152800 + }, + { + "epoch": 6.46, + "learning_rate": 0.0001541944921452649, + "loss": 0.457, + "step": 152810 + }, + { + "epoch": 6.46, + "learning_rate": 0.00015415209141597235, + "loss": 0.4782, + "step": 152820 + }, + { + "epoch": 6.46, + "learning_rate": 0.00015410969068667982, + "loss": 0.3989, + "step": 152830 + }, + { + "epoch": 6.46, + "learning_rate": 0.00015406728995738728, + "loss": 0.3932, + "step": 152840 + }, + { + "epoch": 6.46, + "learning_rate": 0.00015402488922809472, + "loss": 0.3803, + "step": 152850 + }, + { + "epoch": 6.46, + "learning_rate": 0.0001539824884988022, + "loss": 0.4187, + "step": 152860 + }, + { + "epoch": 6.46, + "learning_rate": 0.00015394008776950963, + "loss": 0.4, + "step": 152870 + }, + { + "epoch": 6.47, + "learning_rate": 0.0001538976870402171, + "loss": 0.428, + "step": 152880 + }, + { + "epoch": 6.47, + "learning_rate": 0.00015385528631092456, + "loss": 0.4025, + "step": 152890 + }, + { + "epoch": 6.47, + "learning_rate": 0.000153812885581632, + "loss": 0.3999, + "step": 152900 + }, + { + "epoch": 6.47, + "learning_rate": 0.00015377048485233947, + "loss": 0.3863, + "step": 152910 + }, + { + "epoch": 6.47, + "learning_rate": 0.00015372808412304694, + "loss": 0.4502, + "step": 152920 + }, + { + "epoch": 6.47, + "learning_rate": 0.00015368568339375438, + "loss": 0.4458, + "step": 152930 + }, + { + "epoch": 6.47, + "learning_rate": 0.00015364328266446184, + "loss": 0.3726, + "step": 152940 + }, + { + "epoch": 6.47, + "learning_rate": 0.00015360088193516928, + "loss": 0.4704, + "step": 152950 + }, + { + "epoch": 6.47, + "learning_rate": 0.00015355848120587675, + "loss": 0.414, + "step": 152960 + }, + { + "epoch": 6.47, + "learning_rate": 0.00015351608047658422, + "loss": 0.4238, + "step": 152970 + }, + { + "epoch": 6.47, + "learning_rate": 0.00015347367974729166, + "loss": 0.4284, + "step": 152980 + }, + { + "epoch": 6.47, + "learning_rate": 0.00015343127901799912, + "loss": 0.3967, + "step": 152990 + }, + { + "epoch": 6.47, + "learning_rate": 0.0001533888782887066, + "loss": 0.4244, + "step": 153000 + }, + { + "epoch": 6.47, + "learning_rate": 0.00015334647755941403, + "loss": 0.4053, + "step": 153010 + }, + { + "epoch": 6.47, + "learning_rate": 0.0001533040768301215, + "loss": 0.4067, + "step": 153020 + }, + { + "epoch": 6.47, + "learning_rate": 0.00015326167610082894, + "loss": 0.4316, + "step": 153030 + }, + { + "epoch": 6.47, + "learning_rate": 0.0001532192753715364, + "loss": 0.4119, + "step": 153040 + }, + { + "epoch": 6.47, + "learning_rate": 0.00015317687464224387, + "loss": 0.3908, + "step": 153050 + }, + { + "epoch": 6.47, + "learning_rate": 0.0001531344739129513, + "loss": 0.4314, + "step": 153060 + }, + { + "epoch": 6.47, + "learning_rate": 0.00015309207318365878, + "loss": 0.4131, + "step": 153070 + }, + { + "epoch": 6.47, + "learning_rate": 0.00015304967245436624, + "loss": 0.4003, + "step": 153080 + }, + { + "epoch": 6.47, + "learning_rate": 0.00015300727172507368, + "loss": 0.3409, + "step": 153090 + }, + { + "epoch": 6.47, + "learning_rate": 0.00015296487099578115, + "loss": 0.3564, + "step": 153100 + }, + { + "epoch": 6.47, + "learning_rate": 0.0001529224702664886, + "loss": 0.4007, + "step": 153110 + }, + { + "epoch": 6.48, + "learning_rate": 0.00015288006953719606, + "loss": 0.3842, + "step": 153120 + }, + { + "epoch": 6.48, + "learning_rate": 0.00015283766880790352, + "loss": 0.3858, + "step": 153130 + }, + { + "epoch": 6.48, + "learning_rate": 0.00015279526807861096, + "loss": 0.4296, + "step": 153140 + }, + { + "epoch": 6.48, + "learning_rate": 0.00015275286734931843, + "loss": 0.4134, + "step": 153150 + }, + { + "epoch": 6.48, + "learning_rate": 0.0001527104666200259, + "loss": 0.4054, + "step": 153160 + }, + { + "epoch": 6.48, + "learning_rate": 0.00015266806589073334, + "loss": 0.4091, + "step": 153170 + }, + { + "epoch": 6.48, + "learning_rate": 0.0001526256651614408, + "loss": 0.4443, + "step": 153180 + }, + { + "epoch": 6.48, + "learning_rate": 0.00015258326443214824, + "loss": 0.411, + "step": 153190 + }, + { + "epoch": 6.48, + "learning_rate": 0.0001525408637028557, + "loss": 0.4238, + "step": 153200 + }, + { + "epoch": 6.48, + "learning_rate": 0.00015249846297356315, + "loss": 0.4557, + "step": 153210 + }, + { + "epoch": 6.48, + "learning_rate": 0.00015245606224427062, + "loss": 0.4489, + "step": 153220 + }, + { + "epoch": 6.48, + "learning_rate": 0.00015241366151497806, + "loss": 0.4473, + "step": 153230 + }, + { + "epoch": 6.48, + "learning_rate": 0.00015237126078568552, + "loss": 0.4224, + "step": 153240 + }, + { + "epoch": 6.48, + "learning_rate": 0.00015232886005639296, + "loss": 0.3966, + "step": 153250 + }, + { + "epoch": 6.48, + "learning_rate": 0.00015228645932710043, + "loss": 0.3376, + "step": 153260 + }, + { + "epoch": 6.48, + "learning_rate": 0.00015224405859780787, + "loss": 0.4348, + "step": 153270 + }, + { + "epoch": 6.48, + "learning_rate": 0.00015220165786851533, + "loss": 0.4279, + "step": 153280 + }, + { + "epoch": 6.48, + "learning_rate": 0.0001521592571392228, + "loss": 0.3613, + "step": 153290 + }, + { + "epoch": 6.48, + "learning_rate": 0.00015211685640993024, + "loss": 0.4158, + "step": 153300 + }, + { + "epoch": 6.48, + "learning_rate": 0.0001520744556806377, + "loss": 0.4067, + "step": 153310 + }, + { + "epoch": 6.48, + "learning_rate": 0.00015203205495134515, + "loss": 0.3876, + "step": 153320 + }, + { + "epoch": 6.48, + "learning_rate": 0.00015198965422205261, + "loss": 0.458, + "step": 153330 + }, + { + "epoch": 6.48, + "learning_rate": 0.00015194725349276008, + "loss": 0.3794, + "step": 153340 + }, + { + "epoch": 6.48, + "learning_rate": 0.00015190485276346752, + "loss": 0.4486, + "step": 153350 + }, + { + "epoch": 6.49, + "learning_rate": 0.000151862452034175, + "loss": 0.4532, + "step": 153360 + }, + { + "epoch": 6.49, + "learning_rate": 0.00015182005130488245, + "loss": 0.4171, + "step": 153370 + }, + { + "epoch": 6.49, + "learning_rate": 0.0001517776505755899, + "loss": 0.4, + "step": 153380 + }, + { + "epoch": 6.49, + "learning_rate": 0.00015173524984629736, + "loss": 0.4297, + "step": 153390 + }, + { + "epoch": 6.49, + "learning_rate": 0.0001516928491170048, + "loss": 0.4979, + "step": 153400 + }, + { + "epoch": 6.49, + "learning_rate": 0.00015165044838771227, + "loss": 0.4537, + "step": 153410 + }, + { + "epoch": 6.49, + "learning_rate": 0.00015160804765841973, + "loss": 0.381, + "step": 153420 + }, + { + "epoch": 6.49, + "learning_rate": 0.00015156564692912717, + "loss": 0.4284, + "step": 153430 + }, + { + "epoch": 6.49, + "learning_rate": 0.00015152324619983464, + "loss": 0.3918, + "step": 153440 + }, + { + "epoch": 6.49, + "learning_rate": 0.0001514808454705421, + "loss": 0.3888, + "step": 153450 + }, + { + "epoch": 6.49, + "learning_rate": 0.00015143844474124955, + "loss": 0.4466, + "step": 153460 + }, + { + "epoch": 6.49, + "learning_rate": 0.00015139604401195701, + "loss": 0.4598, + "step": 153470 + }, + { + "epoch": 6.49, + "learning_rate": 0.00015135364328266445, + "loss": 0.3538, + "step": 153480 + }, + { + "epoch": 6.49, + "learning_rate": 0.00015131124255337192, + "loss": 0.4292, + "step": 153490 + }, + { + "epoch": 6.49, + "learning_rate": 0.0001512688418240794, + "loss": 0.4457, + "step": 153500 + }, + { + "epoch": 6.49, + "learning_rate": 0.00015122644109478683, + "loss": 0.3925, + "step": 153510 + }, + { + "epoch": 6.49, + "learning_rate": 0.0001511840403654943, + "loss": 0.3724, + "step": 153520 + }, + { + "epoch": 6.49, + "learning_rate": 0.00015114163963620176, + "loss": 0.4726, + "step": 153530 + }, + { + "epoch": 6.49, + "learning_rate": 0.0001510992389069092, + "loss": 0.3939, + "step": 153540 + }, + { + "epoch": 6.49, + "learning_rate": 0.00015105683817761667, + "loss": 0.3785, + "step": 153550 + }, + { + "epoch": 6.49, + "learning_rate": 0.0001510144374483241, + "loss": 0.3633, + "step": 153560 + }, + { + "epoch": 6.49, + "learning_rate": 0.00015097203671903157, + "loss": 0.4695, + "step": 153570 + }, + { + "epoch": 6.49, + "learning_rate": 0.00015092963598973904, + "loss": 0.5212, + "step": 153580 + }, + { + "epoch": 6.5, + "learning_rate": 0.00015088723526044648, + "loss": 0.4202, + "step": 153590 + }, + { + "epoch": 6.5, + "learning_rate": 0.00015084483453115395, + "loss": 0.3625, + "step": 153600 + }, + { + "epoch": 6.5, + "learning_rate": 0.00015080243380186141, + "loss": 0.3812, + "step": 153610 + }, + { + "epoch": 6.5, + "learning_rate": 0.00015076003307256885, + "loss": 0.393, + "step": 153620 + }, + { + "epoch": 6.5, + "learning_rate": 0.00015071763234327632, + "loss": 0.4378, + "step": 153630 + }, + { + "epoch": 6.5, + "learning_rate": 0.00015067523161398376, + "loss": 0.3793, + "step": 153640 + }, + { + "epoch": 6.5, + "learning_rate": 0.00015063283088469123, + "loss": 0.4934, + "step": 153650 + }, + { + "epoch": 6.5, + "learning_rate": 0.0001505904301553987, + "loss": 0.361, + "step": 153660 + }, + { + "epoch": 6.5, + "learning_rate": 0.00015054802942610613, + "loss": 0.4316, + "step": 153670 + }, + { + "epoch": 6.5, + "learning_rate": 0.0001505056286968136, + "loss": 0.4247, + "step": 153680 + }, + { + "epoch": 6.5, + "learning_rate": 0.00015046322796752107, + "loss": 0.396, + "step": 153690 + }, + { + "epoch": 6.5, + "learning_rate": 0.0001504208272382285, + "loss": 0.4327, + "step": 153700 + }, + { + "epoch": 6.5, + "learning_rate": 0.00015037842650893597, + "loss": 0.3994, + "step": 153710 + }, + { + "epoch": 6.5, + "learning_rate": 0.0001503360257796434, + "loss": 0.414, + "step": 153720 + }, + { + "epoch": 6.5, + "learning_rate": 0.00015029362505035088, + "loss": 0.4266, + "step": 153730 + }, + { + "epoch": 6.5, + "learning_rate": 0.00015025122432105835, + "loss": 0.3973, + "step": 153740 + }, + { + "epoch": 6.5, + "learning_rate": 0.00015020882359176579, + "loss": 0.4018, + "step": 153750 + }, + { + "epoch": 6.5, + "learning_rate": 0.00015016642286247325, + "loss": 0.4847, + "step": 153760 + }, + { + "epoch": 6.5, + "learning_rate": 0.00015012402213318072, + "loss": 0.4152, + "step": 153770 + }, + { + "epoch": 6.5, + "learning_rate": 0.00015008162140388816, + "loss": 0.4426, + "step": 153780 + }, + { + "epoch": 6.5, + "learning_rate": 0.00015003922067459563, + "loss": 0.3902, + "step": 153790 + }, + { + "epoch": 6.5, + "learning_rate": 0.00014999681994530307, + "loss": 0.4551, + "step": 153800 + }, + { + "epoch": 6.5, + "learning_rate": 0.00014995441921601053, + "loss": 0.3904, + "step": 153810 + }, + { + "epoch": 6.5, + "learning_rate": 0.000149912018486718, + "loss": 0.4214, + "step": 153820 + }, + { + "epoch": 6.51, + "learning_rate": 0.00014986961775742544, + "loss": 0.4568, + "step": 153830 + }, + { + "epoch": 6.51, + "learning_rate": 0.0001498272170281329, + "loss": 0.4283, + "step": 153840 + }, + { + "epoch": 6.51, + "learning_rate": 0.00014978481629884035, + "loss": 0.4529, + "step": 153850 + }, + { + "epoch": 6.51, + "learning_rate": 0.0001497424155695478, + "loss": 0.3253, + "step": 153860 + }, + { + "epoch": 6.51, + "learning_rate": 0.00014970001484025525, + "loss": 0.4483, + "step": 153870 + }, + { + "epoch": 6.51, + "learning_rate": 0.00014965761411096272, + "loss": 0.3925, + "step": 153880 + }, + { + "epoch": 6.51, + "learning_rate": 0.00014961521338167016, + "loss": 0.4263, + "step": 153890 + }, + { + "epoch": 6.51, + "learning_rate": 0.00014957281265237763, + "loss": 0.3742, + "step": 153900 + }, + { + "epoch": 6.51, + "learning_rate": 0.0001495304119230851, + "loss": 0.426, + "step": 153910 + }, + { + "epoch": 6.51, + "learning_rate": 0.00014948801119379253, + "loss": 0.3853, + "step": 153920 + }, + { + "epoch": 6.51, + "learning_rate": 0.0001494456104645, + "loss": 0.3763, + "step": 153930 + }, + { + "epoch": 6.51, + "learning_rate": 0.00014940320973520744, + "loss": 0.391, + "step": 153940 + }, + { + "epoch": 6.51, + "learning_rate": 0.0001493608090059149, + "loss": 0.4035, + "step": 153950 + }, + { + "epoch": 6.51, + "learning_rate": 0.00014931840827662234, + "loss": 0.4044, + "step": 153960 + }, + { + "epoch": 6.51, + "learning_rate": 0.0001492760075473298, + "loss": 0.3674, + "step": 153970 + }, + { + "epoch": 6.51, + "learning_rate": 0.00014923360681803728, + "loss": 0.5458, + "step": 153980 + }, + { + "epoch": 6.51, + "learning_rate": 0.00014919120608874472, + "loss": 0.4002, + "step": 153990 + }, + { + "epoch": 6.51, + "learning_rate": 0.00014914880535945218, + "loss": 0.5244, + "step": 154000 + }, + { + "epoch": 6.51, + "learning_rate": 0.00014910640463015962, + "loss": 0.4389, + "step": 154010 + }, + { + "epoch": 6.51, + "learning_rate": 0.0001490640039008671, + "loss": 0.4293, + "step": 154020 + }, + { + "epoch": 6.51, + "learning_rate": 0.00014902160317157456, + "loss": 0.4055, + "step": 154030 + }, + { + "epoch": 6.51, + "learning_rate": 0.000148979202442282, + "loss": 0.4399, + "step": 154040 + }, + { + "epoch": 6.51, + "learning_rate": 0.00014893680171298946, + "loss": 0.4213, + "step": 154050 + }, + { + "epoch": 6.51, + "learning_rate": 0.00014889440098369693, + "loss": 0.3833, + "step": 154060 + }, + { + "epoch": 6.52, + "learning_rate": 0.00014885200025440437, + "loss": 0.371, + "step": 154070 + }, + { + "epoch": 6.52, + "learning_rate": 0.00014880959952511184, + "loss": 0.4711, + "step": 154080 + }, + { + "epoch": 6.52, + "learning_rate": 0.00014876719879581928, + "loss": 0.3545, + "step": 154090 + }, + { + "epoch": 6.52, + "learning_rate": 0.00014872479806652674, + "loss": 0.4106, + "step": 154100 + }, + { + "epoch": 6.52, + "learning_rate": 0.0001486823973372342, + "loss": 0.4528, + "step": 154110 + }, + { + "epoch": 6.52, + "learning_rate": 0.00014863999660794165, + "loss": 0.4228, + "step": 154120 + }, + { + "epoch": 6.52, + "learning_rate": 0.00014859759587864912, + "loss": 0.3715, + "step": 154130 + }, + { + "epoch": 6.52, + "learning_rate": 0.00014855519514935658, + "loss": 0.418, + "step": 154140 + }, + { + "epoch": 6.52, + "learning_rate": 0.00014851279442006402, + "loss": 0.355, + "step": 154150 + }, + { + "epoch": 6.52, + "learning_rate": 0.0001484703936907715, + "loss": 0.3995, + "step": 154160 + }, + { + "epoch": 6.52, + "learning_rate": 0.00014842799296147893, + "loss": 0.4289, + "step": 154170 + }, + { + "epoch": 6.52, + "learning_rate": 0.0001483855922321864, + "loss": 0.4067, + "step": 154180 + }, + { + "epoch": 6.52, + "learning_rate": 0.00014834319150289386, + "loss": 0.3546, + "step": 154190 + }, + { + "epoch": 6.52, + "learning_rate": 0.0001483007907736013, + "loss": 0.4044, + "step": 154200 + }, + { + "epoch": 6.52, + "learning_rate": 0.00014825839004430877, + "loss": 0.4268, + "step": 154210 + }, + { + "epoch": 6.52, + "learning_rate": 0.00014821598931501624, + "loss": 0.4267, + "step": 154220 + }, + { + "epoch": 6.52, + "learning_rate": 0.00014817358858572368, + "loss": 0.455, + "step": 154230 + }, + { + "epoch": 6.52, + "learning_rate": 0.00014813118785643114, + "loss": 0.434, + "step": 154240 + }, + { + "epoch": 6.52, + "learning_rate": 0.00014808878712713858, + "loss": 0.3877, + "step": 154250 + }, + { + "epoch": 6.52, + "learning_rate": 0.00014804638639784605, + "loss": 0.3736, + "step": 154260 + }, + { + "epoch": 6.52, + "learning_rate": 0.00014800398566855352, + "loss": 0.4241, + "step": 154270 + }, + { + "epoch": 6.52, + "learning_rate": 0.00014796158493926096, + "loss": 0.4276, + "step": 154280 + }, + { + "epoch": 6.52, + "learning_rate": 0.00014791918420996842, + "loss": 0.3964, + "step": 154290 + }, + { + "epoch": 6.53, + "learning_rate": 0.0001478767834806759, + "loss": 0.4713, + "step": 154300 + }, + { + "epoch": 6.53, + "learning_rate": 0.00014783438275138333, + "loss": 0.5215, + "step": 154310 + }, + { + "epoch": 6.53, + "learning_rate": 0.0001477919820220908, + "loss": 0.3523, + "step": 154320 + }, + { + "epoch": 6.53, + "learning_rate": 0.00014774958129279824, + "loss": 0.4087, + "step": 154330 + }, + { + "epoch": 6.53, + "learning_rate": 0.0001477071805635057, + "loss": 0.3954, + "step": 154340 + }, + { + "epoch": 6.53, + "learning_rate": 0.00014766477983421317, + "loss": 0.3639, + "step": 154350 + }, + { + "epoch": 6.53, + "learning_rate": 0.0001476223791049206, + "loss": 0.4001, + "step": 154360 + }, + { + "epoch": 6.53, + "learning_rate": 0.00014757997837562808, + "loss": 0.4388, + "step": 154370 + }, + { + "epoch": 6.53, + "learning_rate": 0.00014753757764633554, + "loss": 0.4551, + "step": 154380 + }, + { + "epoch": 6.53, + "learning_rate": 0.00014749517691704298, + "loss": 0.389, + "step": 154390 + }, + { + "epoch": 6.53, + "learning_rate": 0.00014745277618775045, + "loss": 0.4852, + "step": 154400 + }, + { + "epoch": 6.53, + "learning_rate": 0.0001474103754584579, + "loss": 0.3706, + "step": 154410 + }, + { + "epoch": 6.53, + "learning_rate": 0.00014736797472916536, + "loss": 0.4199, + "step": 154420 + }, + { + "epoch": 6.53, + "learning_rate": 0.00014732557399987282, + "loss": 0.436, + "step": 154430 + }, + { + "epoch": 6.53, + "learning_rate": 0.00014728317327058026, + "loss": 0.4347, + "step": 154440 + }, + { + "epoch": 6.53, + "learning_rate": 0.00014724077254128773, + "loss": 0.3641, + "step": 154450 + }, + { + "epoch": 6.53, + "learning_rate": 0.0001471983718119952, + "loss": 0.3587, + "step": 154460 + }, + { + "epoch": 6.53, + "learning_rate": 0.00014715597108270264, + "loss": 0.4337, + "step": 154470 + }, + { + "epoch": 6.53, + "learning_rate": 0.0001471135703534101, + "loss": 0.3921, + "step": 154480 + }, + { + "epoch": 6.53, + "learning_rate": 0.00014707116962411754, + "loss": 0.3821, + "step": 154490 + }, + { + "epoch": 6.53, + "learning_rate": 0.000147028768894825, + "loss": 0.429, + "step": 154500 + }, + { + "epoch": 6.53, + "learning_rate": 0.00014698636816553245, + "loss": 0.3824, + "step": 154510 + }, + { + "epoch": 6.53, + "learning_rate": 0.00014694396743623992, + "loss": 0.4578, + "step": 154520 + }, + { + "epoch": 6.53, + "learning_rate": 0.00014690156670694736, + "loss": 0.3159, + "step": 154530 + }, + { + "epoch": 6.54, + "learning_rate": 0.00014685916597765482, + "loss": 0.3999, + "step": 154540 + }, + { + "epoch": 6.54, + "learning_rate": 0.0001468167652483623, + "loss": 0.4923, + "step": 154550 + }, + { + "epoch": 6.54, + "learning_rate": 0.00014677436451906973, + "loss": 0.3966, + "step": 154560 + }, + { + "epoch": 6.54, + "learning_rate": 0.0001467319637897772, + "loss": 0.3552, + "step": 154570 + }, + { + "epoch": 6.54, + "learning_rate": 0.00014668956306048464, + "loss": 0.4286, + "step": 154580 + }, + { + "epoch": 6.54, + "learning_rate": 0.0001466471623311921, + "loss": 0.4697, + "step": 154590 + }, + { + "epoch": 6.54, + "learning_rate": 0.00014660476160189954, + "loss": 0.4442, + "step": 154600 + }, + { + "epoch": 6.54, + "learning_rate": 0.000146562360872607, + "loss": 0.4347, + "step": 154610 + }, + { + "epoch": 6.54, + "learning_rate": 0.00014651996014331448, + "loss": 0.3979, + "step": 154620 + }, + { + "epoch": 6.54, + "learning_rate": 0.00014647755941402191, + "loss": 0.4005, + "step": 154630 + }, + { + "epoch": 6.54, + "learning_rate": 0.00014643515868472938, + "loss": 0.3388, + "step": 154640 + }, + { + "epoch": 6.54, + "learning_rate": 0.00014639275795543682, + "loss": 0.3966, + "step": 154650 + }, + { + "epoch": 6.54, + "learning_rate": 0.0001463503572261443, + "loss": 0.3923, + "step": 154660 + }, + { + "epoch": 6.54, + "learning_rate": 0.00014630795649685175, + "loss": 0.4557, + "step": 154670 + }, + { + "epoch": 6.54, + "learning_rate": 0.0001462655557675592, + "loss": 0.454, + "step": 154680 + }, + { + "epoch": 6.54, + "learning_rate": 0.00014622315503826666, + "loss": 0.4217, + "step": 154690 + }, + { + "epoch": 6.54, + "learning_rate": 0.0001461807543089741, + "loss": 0.4072, + "step": 154700 + }, + { + "epoch": 6.54, + "learning_rate": 0.00014613835357968157, + "loss": 0.4379, + "step": 154710 + }, + { + "epoch": 6.54, + "learning_rate": 0.00014609595285038903, + "loss": 0.3844, + "step": 154720 + }, + { + "epoch": 6.54, + "learning_rate": 0.00014605355212109647, + "loss": 0.4306, + "step": 154730 + }, + { + "epoch": 6.54, + "learning_rate": 0.00014601115139180394, + "loss": 0.4822, + "step": 154740 + }, + { + "epoch": 6.54, + "learning_rate": 0.0001459687506625114, + "loss": 0.4605, + "step": 154750 + }, + { + "epoch": 6.54, + "learning_rate": 0.00014592634993321885, + "loss": 0.3928, + "step": 154760 + }, + { + "epoch": 6.55, + "learning_rate": 0.00014588394920392631, + "loss": 0.3901, + "step": 154770 + }, + { + "epoch": 6.55, + "learning_rate": 0.00014584154847463375, + "loss": 0.4102, + "step": 154780 + }, + { + "epoch": 6.55, + "learning_rate": 0.00014579914774534122, + "loss": 0.4955, + "step": 154790 + }, + { + "epoch": 6.55, + "learning_rate": 0.0001457567470160487, + "loss": 0.4518, + "step": 154800 + }, + { + "epoch": 6.55, + "learning_rate": 0.00014571434628675613, + "loss": 0.5036, + "step": 154810 + }, + { + "epoch": 6.55, + "learning_rate": 0.0001456719455574636, + "loss": 0.4731, + "step": 154820 + }, + { + "epoch": 6.55, + "learning_rate": 0.00014562954482817106, + "loss": 0.4293, + "step": 154830 + }, + { + "epoch": 6.55, + "learning_rate": 0.0001455871440988785, + "loss": 0.4143, + "step": 154840 + }, + { + "epoch": 6.55, + "learning_rate": 0.00014554474336958597, + "loss": 0.4466, + "step": 154850 + }, + { + "epoch": 6.55, + "learning_rate": 0.0001455023426402934, + "loss": 0.4447, + "step": 154860 + }, + { + "epoch": 6.55, + "learning_rate": 0.00014545994191100087, + "loss": 0.4101, + "step": 154870 + }, + { + "epoch": 6.55, + "learning_rate": 0.00014541754118170834, + "loss": 0.3885, + "step": 154880 + }, + { + "epoch": 6.55, + "learning_rate": 0.00014537514045241578, + "loss": 0.4497, + "step": 154890 + }, + { + "epoch": 6.55, + "learning_rate": 0.00014533273972312325, + "loss": 0.4248, + "step": 154900 + }, + { + "epoch": 6.55, + "learning_rate": 0.00014529033899383071, + "loss": 0.415, + "step": 154910 + }, + { + "epoch": 6.55, + "learning_rate": 0.00014524793826453815, + "loss": 0.4431, + "step": 154920 + }, + { + "epoch": 6.55, + "learning_rate": 0.00014520553753524562, + "loss": 0.3836, + "step": 154930 + }, + { + "epoch": 6.55, + "learning_rate": 0.00014516313680595306, + "loss": 0.4579, + "step": 154940 + }, + { + "epoch": 6.55, + "learning_rate": 0.00014512073607666053, + "loss": 0.4084, + "step": 154950 + }, + { + "epoch": 6.55, + "learning_rate": 0.000145078335347368, + "loss": 0.4567, + "step": 154960 + }, + { + "epoch": 6.55, + "learning_rate": 0.00014503593461807543, + "loss": 0.3963, + "step": 154970 + }, + { + "epoch": 6.55, + "learning_rate": 0.0001449935338887829, + "loss": 0.5191, + "step": 154980 + }, + { + "epoch": 6.55, + "learning_rate": 0.00014495113315949037, + "loss": 0.4591, + "step": 154990 + }, + { + "epoch": 6.55, + "learning_rate": 0.0001449087324301978, + "loss": 0.3978, + "step": 155000 + }, + { + "epoch": 6.56, + "learning_rate": 0.00014486633170090527, + "loss": 0.3645, + "step": 155010 + }, + { + "epoch": 6.56, + "learning_rate": 0.0001448239309716127, + "loss": 0.4246, + "step": 155020 + }, + { + "epoch": 6.56, + "learning_rate": 0.00014478153024232018, + "loss": 0.4509, + "step": 155030 + }, + { + "epoch": 6.56, + "learning_rate": 0.00014473912951302765, + "loss": 0.3781, + "step": 155040 + }, + { + "epoch": 6.56, + "learning_rate": 0.00014469672878373509, + "loss": 0.3703, + "step": 155050 + }, + { + "epoch": 6.56, + "learning_rate": 0.00014465432805444255, + "loss": 0.3775, + "step": 155060 + }, + { + "epoch": 6.56, + "learning_rate": 0.00014461192732515002, + "loss": 0.4367, + "step": 155070 + }, + { + "epoch": 6.56, + "learning_rate": 0.00014456952659585746, + "loss": 0.3645, + "step": 155080 + }, + { + "epoch": 6.56, + "learning_rate": 0.00014452712586656493, + "loss": 0.3628, + "step": 155090 + }, + { + "epoch": 6.56, + "learning_rate": 0.00014448472513727237, + "loss": 0.468, + "step": 155100 + }, + { + "epoch": 6.56, + "learning_rate": 0.00014444232440797983, + "loss": 0.3547, + "step": 155110 + }, + { + "epoch": 6.56, + "learning_rate": 0.0001443999236786873, + "loss": 0.3932, + "step": 155120 + }, + { + "epoch": 6.56, + "learning_rate": 0.00014435752294939474, + "loss": 0.4054, + "step": 155130 + }, + { + "epoch": 6.56, + "learning_rate": 0.0001443151222201022, + "loss": 0.3988, + "step": 155140 + }, + { + "epoch": 6.56, + "learning_rate": 0.00014427272149080965, + "loss": 0.4163, + "step": 155150 + }, + { + "epoch": 6.56, + "learning_rate": 0.0001442303207615171, + "loss": 0.3936, + "step": 155160 + }, + { + "epoch": 6.56, + "learning_rate": 0.00014418792003222458, + "loss": 0.3676, + "step": 155170 + }, + { + "epoch": 6.56, + "learning_rate": 0.00014414551930293202, + "loss": 0.4842, + "step": 155180 + }, + { + "epoch": 6.56, + "learning_rate": 0.00014410311857363949, + "loss": 0.3682, + "step": 155190 + }, + { + "epoch": 6.56, + "learning_rate": 0.00014406071784434693, + "loss": 0.4326, + "step": 155200 + }, + { + "epoch": 6.56, + "learning_rate": 0.0001440183171150544, + "loss": 0.516, + "step": 155210 + }, + { + "epoch": 6.56, + "learning_rate": 0.00014397591638576183, + "loss": 0.4506, + "step": 155220 + }, + { + "epoch": 6.56, + "learning_rate": 0.0001439335156564693, + "loss": 0.4902, + "step": 155230 + }, + { + "epoch": 6.56, + "learning_rate": 0.00014389111492717674, + "loss": 0.3754, + "step": 155240 + }, + { + "epoch": 6.57, + "learning_rate": 0.0001438487141978842, + "loss": 0.4511, + "step": 155250 + }, + { + "epoch": 6.57, + "learning_rate": 0.00014380631346859164, + "loss": 0.4753, + "step": 155260 + }, + { + "epoch": 6.57, + "learning_rate": 0.0001437639127392991, + "loss": 0.385, + "step": 155270 + }, + { + "epoch": 6.57, + "learning_rate": 0.00014372151201000658, + "loss": 0.4015, + "step": 155280 + }, + { + "epoch": 6.57, + "learning_rate": 0.00014367911128071402, + "loss": 0.403, + "step": 155290 + }, + { + "epoch": 6.57, + "learning_rate": 0.00014363671055142148, + "loss": 0.3932, + "step": 155300 + }, + { + "epoch": 6.57, + "learning_rate": 0.00014359430982212895, + "loss": 0.3865, + "step": 155310 + }, + { + "epoch": 6.57, + "learning_rate": 0.0001435519090928364, + "loss": 0.3902, + "step": 155320 + }, + { + "epoch": 6.57, + "learning_rate": 0.00014350950836354386, + "loss": 0.4137, + "step": 155330 + }, + { + "epoch": 6.57, + "learning_rate": 0.0001434671076342513, + "loss": 0.4117, + "step": 155340 + }, + { + "epoch": 6.57, + "learning_rate": 0.00014342470690495876, + "loss": 0.4472, + "step": 155350 + }, + { + "epoch": 6.57, + "learning_rate": 0.00014338230617566623, + "loss": 0.4437, + "step": 155360 + }, + { + "epoch": 6.57, + "learning_rate": 0.00014333990544637367, + "loss": 0.3608, + "step": 155370 + }, + { + "epoch": 6.57, + "learning_rate": 0.00014329750471708114, + "loss": 0.3841, + "step": 155380 + }, + { + "epoch": 6.57, + "learning_rate": 0.00014325510398778858, + "loss": 0.375, + "step": 155390 + }, + { + "epoch": 6.57, + "learning_rate": 0.00014321270325849604, + "loss": 0.4609, + "step": 155400 + }, + { + "epoch": 6.57, + "learning_rate": 0.0001431703025292035, + "loss": 0.4521, + "step": 155410 + }, + { + "epoch": 6.57, + "learning_rate": 0.00014312790179991095, + "loss": 0.4505, + "step": 155420 + }, + { + "epoch": 6.57, + "learning_rate": 0.00014308550107061842, + "loss": 0.4108, + "step": 155430 + }, + { + "epoch": 6.57, + "learning_rate": 0.00014304310034132588, + "loss": 0.4156, + "step": 155440 + }, + { + "epoch": 6.57, + "learning_rate": 0.00014300069961203332, + "loss": 0.4207, + "step": 155450 + }, + { + "epoch": 6.57, + "learning_rate": 0.0001429582988827408, + "loss": 0.41, + "step": 155460 + }, + { + "epoch": 6.57, + "learning_rate": 0.00014291589815344823, + "loss": 0.4404, + "step": 155470 + }, + { + "epoch": 6.58, + "learning_rate": 0.0001428734974241557, + "loss": 0.4142, + "step": 155480 + }, + { + "epoch": 6.58, + "learning_rate": 0.00014283109669486316, + "loss": 0.445, + "step": 155490 + }, + { + "epoch": 6.58, + "learning_rate": 0.0001427886959655706, + "loss": 0.3727, + "step": 155500 + }, + { + "epoch": 6.58, + "learning_rate": 0.00014274629523627807, + "loss": 0.3979, + "step": 155510 + }, + { + "epoch": 6.58, + "learning_rate": 0.00014270389450698554, + "loss": 0.497, + "step": 155520 + }, + { + "epoch": 6.58, + "learning_rate": 0.00014266149377769298, + "loss": 0.4644, + "step": 155530 + }, + { + "epoch": 6.58, + "learning_rate": 0.00014261909304840044, + "loss": 0.4732, + "step": 155540 + }, + { + "epoch": 6.58, + "learning_rate": 0.00014257669231910788, + "loss": 0.4872, + "step": 155550 + }, + { + "epoch": 6.58, + "learning_rate": 0.00014253429158981535, + "loss": 0.3759, + "step": 155560 + }, + { + "epoch": 6.58, + "learning_rate": 0.00014249189086052282, + "loss": 0.4376, + "step": 155570 + }, + { + "epoch": 6.58, + "learning_rate": 0.00014244949013123026, + "loss": 0.4252, + "step": 155580 + }, + { + "epoch": 6.58, + "learning_rate": 0.00014240708940193772, + "loss": 0.384, + "step": 155590 + }, + { + "epoch": 6.58, + "learning_rate": 0.0001423646886726452, + "loss": 0.3531, + "step": 155600 + }, + { + "epoch": 6.58, + "learning_rate": 0.00014232228794335263, + "loss": 0.3937, + "step": 155610 + }, + { + "epoch": 6.58, + "learning_rate": 0.0001422798872140601, + "loss": 0.4507, + "step": 155620 + }, + { + "epoch": 6.58, + "learning_rate": 0.00014223748648476754, + "loss": 0.3388, + "step": 155630 + }, + { + "epoch": 6.58, + "learning_rate": 0.000142195085755475, + "loss": 0.4306, + "step": 155640 + }, + { + "epoch": 6.58, + "learning_rate": 0.00014215268502618247, + "loss": 0.379, + "step": 155650 + }, + { + "epoch": 6.58, + "learning_rate": 0.0001421102842968899, + "loss": 0.4185, + "step": 155660 + }, + { + "epoch": 6.58, + "learning_rate": 0.00014206788356759738, + "loss": 0.3971, + "step": 155670 + }, + { + "epoch": 6.58, + "learning_rate": 0.00014202548283830484, + "loss": 0.4256, + "step": 155680 + }, + { + "epoch": 6.58, + "learning_rate": 0.00014198308210901228, + "loss": 0.4495, + "step": 155690 + }, + { + "epoch": 6.58, + "learning_rate": 0.00014194068137971975, + "loss": 0.3932, + "step": 155700 + }, + { + "epoch": 6.58, + "learning_rate": 0.0001418982806504272, + "loss": 0.3911, + "step": 155710 + }, + { + "epoch": 6.59, + "learning_rate": 0.00014185587992113466, + "loss": 0.4569, + "step": 155720 + }, + { + "epoch": 6.59, + "learning_rate": 0.00014181347919184212, + "loss": 0.3629, + "step": 155730 + }, + { + "epoch": 6.59, + "learning_rate": 0.00014177107846254956, + "loss": 0.4549, + "step": 155740 + }, + { + "epoch": 6.59, + "learning_rate": 0.00014172867773325703, + "loss": 0.3505, + "step": 155750 + }, + { + "epoch": 6.59, + "learning_rate": 0.0001416862770039645, + "loss": 0.4087, + "step": 155760 + }, + { + "epoch": 6.59, + "learning_rate": 0.00014164387627467194, + "loss": 0.4262, + "step": 155770 + }, + { + "epoch": 6.59, + "learning_rate": 0.0001416014755453794, + "loss": 0.446, + "step": 155780 + }, + { + "epoch": 6.59, + "learning_rate": 0.00014155907481608684, + "loss": 0.3926, + "step": 155790 + }, + { + "epoch": 6.59, + "learning_rate": 0.0001415166740867943, + "loss": 0.3797, + "step": 155800 + }, + { + "epoch": 6.59, + "learning_rate": 0.00014147427335750178, + "loss": 0.3803, + "step": 155810 + }, + { + "epoch": 6.59, + "learning_rate": 0.00014143187262820922, + "loss": 0.4817, + "step": 155820 + }, + { + "epoch": 6.59, + "learning_rate": 0.00014138947189891668, + "loss": 0.4268, + "step": 155830 + }, + { + "epoch": 6.59, + "learning_rate": 0.00014134707116962412, + "loss": 0.438, + "step": 155840 + }, + { + "epoch": 6.59, + "learning_rate": 0.0001413046704403316, + "loss": 0.4293, + "step": 155850 + }, + { + "epoch": 6.59, + "learning_rate": 0.00014126226971103903, + "loss": 0.417, + "step": 155860 + }, + { + "epoch": 6.59, + "learning_rate": 0.0001412198689817465, + "loss": 0.4005, + "step": 155870 + }, + { + "epoch": 6.59, + "learning_rate": 0.00014117746825245394, + "loss": 0.365, + "step": 155880 + }, + { + "epoch": 6.59, + "learning_rate": 0.0001411350675231614, + "loss": 0.397, + "step": 155890 + }, + { + "epoch": 6.59, + "learning_rate": 0.00014109266679386884, + "loss": 0.4522, + "step": 155900 + }, + { + "epoch": 6.59, + "learning_rate": 0.0001410502660645763, + "loss": 0.3719, + "step": 155910 + }, + { + "epoch": 6.59, + "learning_rate": 0.00014100786533528378, + "loss": 0.4545, + "step": 155920 + }, + { + "epoch": 6.59, + "learning_rate": 0.00014096546460599122, + "loss": 0.4243, + "step": 155930 + }, + { + "epoch": 6.59, + "learning_rate": 0.00014092306387669868, + "loss": 0.3802, + "step": 155940 + }, + { + "epoch": 6.59, + "learning_rate": 0.00014088066314740612, + "loss": 0.3465, + "step": 155950 + }, + { + "epoch": 6.6, + "learning_rate": 0.0001408382624181136, + "loss": 0.386, + "step": 155960 + }, + { + "epoch": 6.6, + "learning_rate": 0.00014079586168882106, + "loss": 0.4301, + "step": 155970 + }, + { + "epoch": 6.6, + "learning_rate": 0.0001407534609595285, + "loss": 0.4452, + "step": 155980 + }, + { + "epoch": 6.6, + "learning_rate": 0.00014071106023023596, + "loss": 0.4097, + "step": 155990 + }, + { + "epoch": 6.6, + "learning_rate": 0.00014066865950094343, + "loss": 0.3449, + "step": 156000 + }, + { + "epoch": 6.6, + "learning_rate": 0.00014062625877165087, + "loss": 0.4236, + "step": 156010 + }, + { + "epoch": 6.6, + "learning_rate": 0.00014058385804235833, + "loss": 0.4394, + "step": 156020 + }, + { + "epoch": 6.6, + "learning_rate": 0.00014054145731306577, + "loss": 0.4083, + "step": 156030 + }, + { + "epoch": 6.6, + "learning_rate": 0.00014049905658377324, + "loss": 0.4244, + "step": 156040 + }, + { + "epoch": 6.6, + "learning_rate": 0.0001404566558544807, + "loss": 0.4855, + "step": 156050 + }, + { + "epoch": 6.6, + "learning_rate": 0.00014041425512518815, + "loss": 0.4104, + "step": 156060 + }, + { + "epoch": 6.6, + "learning_rate": 0.00014037185439589561, + "loss": 0.3873, + "step": 156070 + }, + { + "epoch": 6.6, + "learning_rate": 0.00014032945366660305, + "loss": 0.3976, + "step": 156080 + }, + { + "epoch": 6.6, + "learning_rate": 0.00014028705293731052, + "loss": 0.3742, + "step": 156090 + }, + { + "epoch": 6.6, + "learning_rate": 0.000140244652208018, + "loss": 0.4321, + "step": 156100 + }, + { + "epoch": 6.6, + "learning_rate": 0.00014020225147872543, + "loss": 0.3523, + "step": 156110 + }, + { + "epoch": 6.6, + "learning_rate": 0.0001401598507494329, + "loss": 0.3928, + "step": 156120 + }, + { + "epoch": 6.6, + "learning_rate": 0.00014011745002014036, + "loss": 0.4285, + "step": 156130 + }, + { + "epoch": 6.6, + "learning_rate": 0.0001400750492908478, + "loss": 0.4159, + "step": 156140 + }, + { + "epoch": 6.6, + "learning_rate": 0.00014003264856155527, + "loss": 0.3922, + "step": 156150 + }, + { + "epoch": 6.6, + "learning_rate": 0.0001399902478322627, + "loss": 0.4255, + "step": 156160 + }, + { + "epoch": 6.6, + "learning_rate": 0.00013994784710297017, + "loss": 0.3468, + "step": 156170 + }, + { + "epoch": 6.6, + "learning_rate": 0.00013990544637367764, + "loss": 0.3697, + "step": 156180 + }, + { + "epoch": 6.61, + "learning_rate": 0.00013986304564438508, + "loss": 0.4387, + "step": 156190 + }, + { + "epoch": 6.61, + "learning_rate": 0.00013982064491509255, + "loss": 0.5188, + "step": 156200 + }, + { + "epoch": 6.61, + "learning_rate": 0.00013977824418580001, + "loss": 0.3661, + "step": 156210 + }, + { + "epoch": 6.61, + "learning_rate": 0.00013973584345650745, + "loss": 0.3888, + "step": 156220 + }, + { + "epoch": 6.61, + "learning_rate": 0.00013969344272721492, + "loss": 0.3922, + "step": 156230 + }, + { + "epoch": 6.61, + "learning_rate": 0.00013965104199792236, + "loss": 0.3986, + "step": 156240 + }, + { + "epoch": 6.61, + "learning_rate": 0.00013960864126862983, + "loss": 0.4038, + "step": 156250 + }, + { + "epoch": 6.61, + "learning_rate": 0.0001395662405393373, + "loss": 0.417, + "step": 156260 + }, + { + "epoch": 6.61, + "learning_rate": 0.00013952383981004473, + "loss": 0.3803, + "step": 156270 + }, + { + "epoch": 6.61, + "learning_rate": 0.0001394814390807522, + "loss": 0.4705, + "step": 156280 + }, + { + "epoch": 6.61, + "learning_rate": 0.00013943903835145967, + "loss": 0.4111, + "step": 156290 + }, + { + "epoch": 6.61, + "learning_rate": 0.0001393966376221671, + "loss": 0.381, + "step": 156300 + }, + { + "epoch": 6.61, + "learning_rate": 0.00013935423689287457, + "loss": 0.4999, + "step": 156310 + }, + { + "epoch": 6.61, + "learning_rate": 0.000139311836163582, + "loss": 0.3993, + "step": 156320 + }, + { + "epoch": 6.61, + "learning_rate": 0.00013926943543428948, + "loss": 0.4185, + "step": 156330 + }, + { + "epoch": 6.61, + "learning_rate": 0.00013922703470499695, + "loss": 0.4119, + "step": 156340 + }, + { + "epoch": 6.61, + "learning_rate": 0.0001391846339757044, + "loss": 0.4426, + "step": 156350 + }, + { + "epoch": 6.61, + "learning_rate": 0.00013914223324641185, + "loss": 0.4188, + "step": 156360 + }, + { + "epoch": 6.61, + "learning_rate": 0.00013909983251711932, + "loss": 0.392, + "step": 156370 + }, + { + "epoch": 6.61, + "learning_rate": 0.00013905743178782676, + "loss": 0.4337, + "step": 156380 + }, + { + "epoch": 6.61, + "learning_rate": 0.00013901503105853423, + "loss": 0.375, + "step": 156390 + }, + { + "epoch": 6.61, + "learning_rate": 0.00013897263032924167, + "loss": 0.4631, + "step": 156400 + }, + { + "epoch": 6.61, + "learning_rate": 0.00013893022959994913, + "loss": 0.3621, + "step": 156410 + }, + { + "epoch": 6.61, + "learning_rate": 0.0001388878288706566, + "loss": 0.3422, + "step": 156420 + }, + { + "epoch": 6.62, + "learning_rate": 0.00013884542814136404, + "loss": 0.3714, + "step": 156430 + }, + { + "epoch": 6.62, + "learning_rate": 0.0001388030274120715, + "loss": 0.4051, + "step": 156440 + }, + { + "epoch": 6.62, + "learning_rate": 0.00013876062668277897, + "loss": 0.3895, + "step": 156450 + }, + { + "epoch": 6.62, + "learning_rate": 0.0001387182259534864, + "loss": 0.4554, + "step": 156460 + }, + { + "epoch": 6.62, + "learning_rate": 0.00013867582522419388, + "loss": 0.4039, + "step": 156470 + }, + { + "epoch": 6.62, + "learning_rate": 0.00013863342449490132, + "loss": 0.4298, + "step": 156480 + }, + { + "epoch": 6.62, + "learning_rate": 0.00013859102376560879, + "loss": 0.4213, + "step": 156490 + }, + { + "epoch": 6.62, + "learning_rate": 0.00013854862303631623, + "loss": 0.4452, + "step": 156500 + }, + { + "epoch": 6.62, + "learning_rate": 0.0001385062223070237, + "loss": 0.3859, + "step": 156510 + }, + { + "epoch": 6.62, + "learning_rate": 0.00013846382157773113, + "loss": 0.3814, + "step": 156520 + }, + { + "epoch": 6.62, + "learning_rate": 0.0001384214208484386, + "loss": 0.4438, + "step": 156530 + }, + { + "epoch": 6.62, + "learning_rate": 0.00013837902011914607, + "loss": 0.3728, + "step": 156540 + }, + { + "epoch": 6.62, + "learning_rate": 0.0001383366193898535, + "loss": 0.416, + "step": 156550 + }, + { + "epoch": 6.62, + "learning_rate": 0.00013829421866056097, + "loss": 0.4377, + "step": 156560 + }, + { + "epoch": 6.62, + "learning_rate": 0.0001382518179312684, + "loss": 0.4262, + "step": 156570 + }, + { + "epoch": 6.62, + "learning_rate": 0.00013820941720197588, + "loss": 0.3679, + "step": 156580 + }, + { + "epoch": 6.62, + "learning_rate": 0.00013816701647268332, + "loss": 0.4706, + "step": 156590 + }, + { + "epoch": 6.62, + "learning_rate": 0.00013812461574339079, + "loss": 0.4175, + "step": 156600 + }, + { + "epoch": 6.62, + "learning_rate": 0.00013808221501409825, + "loss": 0.3575, + "step": 156610 + }, + { + "epoch": 6.62, + "learning_rate": 0.0001380398142848057, + "loss": 0.382, + "step": 156620 + }, + { + "epoch": 6.62, + "learning_rate": 0.00013799741355551316, + "loss": 0.4101, + "step": 156630 + }, + { + "epoch": 6.62, + "learning_rate": 0.0001379550128262206, + "loss": 0.4137, + "step": 156640 + }, + { + "epoch": 6.62, + "learning_rate": 0.00013791261209692806, + "loss": 0.3276, + "step": 156650 + }, + { + "epoch": 6.62, + "learning_rate": 0.00013787021136763553, + "loss": 0.4347, + "step": 156660 + }, + { + "epoch": 6.63, + "learning_rate": 0.00013782781063834297, + "loss": 0.4109, + "step": 156670 + }, + { + "epoch": 6.63, + "learning_rate": 0.00013778540990905044, + "loss": 0.425, + "step": 156680 + }, + { + "epoch": 6.63, + "learning_rate": 0.0001377430091797579, + "loss": 0.3872, + "step": 156690 + }, + { + "epoch": 6.63, + "learning_rate": 0.00013770060845046534, + "loss": 0.4144, + "step": 156700 + }, + { + "epoch": 6.63, + "learning_rate": 0.0001376582077211728, + "loss": 0.4498, + "step": 156710 + }, + { + "epoch": 6.63, + "learning_rate": 0.00013761580699188025, + "loss": 0.4489, + "step": 156720 + }, + { + "epoch": 6.63, + "learning_rate": 0.00013757340626258772, + "loss": 0.4037, + "step": 156730 + }, + { + "epoch": 6.63, + "learning_rate": 0.00013753100553329518, + "loss": 0.4566, + "step": 156740 + }, + { + "epoch": 6.63, + "learning_rate": 0.00013748860480400262, + "loss": 0.4779, + "step": 156750 + }, + { + "epoch": 6.63, + "learning_rate": 0.0001374462040747101, + "loss": 0.4171, + "step": 156760 + }, + { + "epoch": 6.63, + "learning_rate": 0.00013740380334541756, + "loss": 0.4269, + "step": 156770 + }, + { + "epoch": 6.63, + "learning_rate": 0.000137361402616125, + "loss": 0.4155, + "step": 156780 + }, + { + "epoch": 6.63, + "learning_rate": 0.00013731900188683246, + "loss": 0.396, + "step": 156790 + }, + { + "epoch": 6.63, + "learning_rate": 0.0001372766011575399, + "loss": 0.4153, + "step": 156800 + }, + { + "epoch": 6.63, + "learning_rate": 0.00013723420042824737, + "loss": 0.4422, + "step": 156810 + }, + { + "epoch": 6.63, + "learning_rate": 0.00013719179969895484, + "loss": 0.3292, + "step": 156820 + }, + { + "epoch": 6.63, + "learning_rate": 0.00013714939896966228, + "loss": 0.4496, + "step": 156830 + }, + { + "epoch": 6.63, + "learning_rate": 0.00013710699824036974, + "loss": 0.4354, + "step": 156840 + }, + { + "epoch": 6.63, + "learning_rate": 0.00013706459751107718, + "loss": 0.4034, + "step": 156850 + }, + { + "epoch": 6.63, + "learning_rate": 0.00013702219678178465, + "loss": 0.3553, + "step": 156860 + }, + { + "epoch": 6.63, + "learning_rate": 0.00013697979605249212, + "loss": 0.4574, + "step": 156870 + }, + { + "epoch": 6.63, + "learning_rate": 0.00013693739532319956, + "loss": 0.3534, + "step": 156880 + }, + { + "epoch": 6.63, + "learning_rate": 0.00013689499459390702, + "loss": 0.3818, + "step": 156890 + }, + { + "epoch": 6.64, + "learning_rate": 0.0001368525938646145, + "loss": 0.3899, + "step": 156900 + }, + { + "epoch": 6.64, + "learning_rate": 0.00013681019313532193, + "loss": 0.4219, + "step": 156910 + }, + { + "epoch": 6.64, + "learning_rate": 0.0001367677924060294, + "loss": 0.4276, + "step": 156920 + }, + { + "epoch": 6.64, + "learning_rate": 0.00013672539167673684, + "loss": 0.431, + "step": 156930 + }, + { + "epoch": 6.64, + "learning_rate": 0.0001366829909474443, + "loss": 0.4465, + "step": 156940 + }, + { + "epoch": 6.64, + "learning_rate": 0.00013664059021815177, + "loss": 0.5243, + "step": 156950 + }, + { + "epoch": 6.64, + "learning_rate": 0.0001365981894888592, + "loss": 0.4504, + "step": 156960 + }, + { + "epoch": 6.64, + "learning_rate": 0.00013655578875956668, + "loss": 0.3496, + "step": 156970 + }, + { + "epoch": 6.64, + "learning_rate": 0.00013651338803027414, + "loss": 0.3399, + "step": 156980 + }, + { + "epoch": 6.64, + "learning_rate": 0.00013647098730098158, + "loss": 0.4531, + "step": 156990 + }, + { + "epoch": 6.64, + "learning_rate": 0.00013642858657168905, + "loss": 0.4082, + "step": 157000 + }, + { + "epoch": 6.64, + "learning_rate": 0.0001363861858423965, + "loss": 0.3763, + "step": 157010 + }, + { + "epoch": 6.64, + "learning_rate": 0.00013634378511310396, + "loss": 0.4638, + "step": 157020 + }, + { + "epoch": 6.64, + "learning_rate": 0.00013630138438381142, + "loss": 0.3962, + "step": 157030 + }, + { + "epoch": 6.64, + "learning_rate": 0.00013625898365451886, + "loss": 0.4232, + "step": 157040 + }, + { + "epoch": 6.64, + "learning_rate": 0.00013621658292522633, + "loss": 0.432, + "step": 157050 + }, + { + "epoch": 6.64, + "learning_rate": 0.0001361741821959338, + "loss": 0.3658, + "step": 157060 + }, + { + "epoch": 6.64, + "learning_rate": 0.00013613178146664124, + "loss": 0.3691, + "step": 157070 + }, + { + "epoch": 6.64, + "learning_rate": 0.0001360893807373487, + "loss": 0.465, + "step": 157080 + }, + { + "epoch": 6.64, + "learning_rate": 0.00013604698000805614, + "loss": 0.449, + "step": 157090 + }, + { + "epoch": 6.64, + "learning_rate": 0.0001360045792787636, + "loss": 0.3939, + "step": 157100 + }, + { + "epoch": 6.64, + "learning_rate": 0.00013596217854947108, + "loss": 0.3934, + "step": 157110 + }, + { + "epoch": 6.64, + "learning_rate": 0.00013591977782017852, + "loss": 0.4863, + "step": 157120 + }, + { + "epoch": 6.64, + "learning_rate": 0.00013587737709088598, + "loss": 0.3602, + "step": 157130 + }, + { + "epoch": 6.65, + "learning_rate": 0.00013583497636159342, + "loss": 0.4319, + "step": 157140 + }, + { + "epoch": 6.65, + "learning_rate": 0.0001357925756323009, + "loss": 0.4623, + "step": 157150 + }, + { + "epoch": 6.65, + "learning_rate": 0.00013575017490300833, + "loss": 0.4165, + "step": 157160 + }, + { + "epoch": 6.65, + "learning_rate": 0.0001357077741737158, + "loss": 0.3893, + "step": 157170 + }, + { + "epoch": 6.65, + "learning_rate": 0.00013566537344442326, + "loss": 0.3902, + "step": 157180 + }, + { + "epoch": 6.65, + "learning_rate": 0.0001356229727151307, + "loss": 0.4786, + "step": 157190 + }, + { + "epoch": 6.65, + "learning_rate": 0.00013558057198583817, + "loss": 0.4449, + "step": 157200 + }, + { + "epoch": 6.65, + "learning_rate": 0.0001355381712565456, + "loss": 0.3804, + "step": 157210 + }, + { + "epoch": 6.65, + "learning_rate": 0.00013549577052725308, + "loss": 0.4625, + "step": 157220 + }, + { + "epoch": 6.65, + "learning_rate": 0.00013545336979796052, + "loss": 0.4768, + "step": 157230 + }, + { + "epoch": 6.65, + "learning_rate": 0.00013541096906866798, + "loss": 0.4556, + "step": 157240 + }, + { + "epoch": 6.65, + "learning_rate": 0.00013536856833937542, + "loss": 0.3901, + "step": 157250 + }, + { + "epoch": 6.65, + "learning_rate": 0.0001353261676100829, + "loss": 0.4641, + "step": 157260 + }, + { + "epoch": 6.65, + "learning_rate": 0.00013528376688079036, + "loss": 0.4532, + "step": 157270 + }, + { + "epoch": 6.65, + "learning_rate": 0.0001352413661514978, + "loss": 0.4577, + "step": 157280 + }, + { + "epoch": 6.65, + "learning_rate": 0.00013519896542220526, + "loss": 0.4378, + "step": 157290 + }, + { + "epoch": 6.65, + "learning_rate": 0.00013515656469291273, + "loss": 0.4267, + "step": 157300 + }, + { + "epoch": 6.65, + "learning_rate": 0.00013511416396362017, + "loss": 0.3574, + "step": 157310 + }, + { + "epoch": 6.65, + "learning_rate": 0.00013507176323432764, + "loss": 0.4278, + "step": 157320 + }, + { + "epoch": 6.65, + "learning_rate": 0.00013502936250503507, + "loss": 0.4179, + "step": 157330 + }, + { + "epoch": 6.65, + "learning_rate": 0.00013498696177574254, + "loss": 0.4536, + "step": 157340 + }, + { + "epoch": 6.65, + "learning_rate": 0.00013494456104645, + "loss": 0.3985, + "step": 157350 + }, + { + "epoch": 6.65, + "learning_rate": 0.00013490216031715745, + "loss": 0.3994, + "step": 157360 + }, + { + "epoch": 6.65, + "learning_rate": 0.00013485975958786491, + "loss": 0.3287, + "step": 157370 + }, + { + "epoch": 6.66, + "learning_rate": 0.00013481735885857238, + "loss": 0.3811, + "step": 157380 + }, + { + "epoch": 6.66, + "learning_rate": 0.00013477495812927982, + "loss": 0.4357, + "step": 157390 + }, + { + "epoch": 6.66, + "learning_rate": 0.0001347325573999873, + "loss": 0.4021, + "step": 157400 + }, + { + "epoch": 6.66, + "learning_rate": 0.00013469015667069473, + "loss": 0.4149, + "step": 157410 + }, + { + "epoch": 6.66, + "learning_rate": 0.0001346477559414022, + "loss": 0.4983, + "step": 157420 + }, + { + "epoch": 6.66, + "learning_rate": 0.00013460535521210966, + "loss": 0.3803, + "step": 157430 + }, + { + "epoch": 6.66, + "learning_rate": 0.0001345629544828171, + "loss": 0.3834, + "step": 157440 + }, + { + "epoch": 6.66, + "learning_rate": 0.00013452055375352457, + "loss": 0.414, + "step": 157450 + }, + { + "epoch": 6.66, + "learning_rate": 0.00013447815302423203, + "loss": 0.4495, + "step": 157460 + }, + { + "epoch": 6.66, + "learning_rate": 0.00013443575229493947, + "loss": 0.3874, + "step": 157470 + }, + { + "epoch": 6.66, + "learning_rate": 0.00013439335156564694, + "loss": 0.4013, + "step": 157480 + }, + { + "epoch": 6.66, + "learning_rate": 0.00013435095083635438, + "loss": 0.41, + "step": 157490 + }, + { + "epoch": 6.66, + "learning_rate": 0.00013430855010706185, + "loss": 0.4104, + "step": 157500 + }, + { + "epoch": 6.66, + "learning_rate": 0.00013426614937776931, + "loss": 0.4052, + "step": 157510 + }, + { + "epoch": 6.66, + "learning_rate": 0.00013422374864847675, + "loss": 0.3576, + "step": 157520 + }, + { + "epoch": 6.66, + "learning_rate": 0.00013418134791918422, + "loss": 0.4453, + "step": 157530 + }, + { + "epoch": 6.66, + "learning_rate": 0.00013413894718989166, + "loss": 0.4459, + "step": 157540 + }, + { + "epoch": 6.66, + "learning_rate": 0.00013409654646059913, + "loss": 0.4347, + "step": 157550 + }, + { + "epoch": 6.66, + "learning_rate": 0.0001340541457313066, + "loss": 0.4077, + "step": 157560 + }, + { + "epoch": 6.66, + "learning_rate": 0.00013401174500201403, + "loss": 0.3983, + "step": 157570 + }, + { + "epoch": 6.66, + "learning_rate": 0.0001339693442727215, + "loss": 0.4659, + "step": 157580 + }, + { + "epoch": 6.66, + "learning_rate": 0.00013392694354342897, + "loss": 0.4566, + "step": 157590 + }, + { + "epoch": 6.66, + "learning_rate": 0.0001338845428141364, + "loss": 0.4533, + "step": 157600 + }, + { + "epoch": 6.67, + "learning_rate": 0.00013384214208484387, + "loss": 0.4147, + "step": 157610 + }, + { + "epoch": 6.67, + "learning_rate": 0.0001337997413555513, + "loss": 0.4302, + "step": 157620 + }, + { + "epoch": 6.67, + "learning_rate": 0.00013375734062625878, + "loss": 0.4155, + "step": 157630 + }, + { + "epoch": 6.67, + "learning_rate": 0.00013371493989696625, + "loss": 0.4178, + "step": 157640 + }, + { + "epoch": 6.67, + "learning_rate": 0.0001336725391676737, + "loss": 0.4108, + "step": 157650 + }, + { + "epoch": 6.67, + "learning_rate": 0.00013363013843838115, + "loss": 0.4093, + "step": 157660 + }, + { + "epoch": 6.67, + "learning_rate": 0.00013358773770908862, + "loss": 0.3979, + "step": 157670 + }, + { + "epoch": 6.67, + "learning_rate": 0.00013354533697979606, + "loss": 0.4611, + "step": 157680 + }, + { + "epoch": 6.67, + "learning_rate": 0.00013350293625050353, + "loss": 0.4468, + "step": 157690 + }, + { + "epoch": 6.67, + "learning_rate": 0.00013346053552121097, + "loss": 0.3595, + "step": 157700 + }, + { + "epoch": 6.67, + "learning_rate": 0.00013341813479191843, + "loss": 0.4491, + "step": 157710 + }, + { + "epoch": 6.67, + "learning_rate": 0.0001333757340626259, + "loss": 0.4017, + "step": 157720 + }, + { + "epoch": 6.67, + "learning_rate": 0.00013333333333333334, + "loss": 0.3983, + "step": 157730 + }, + { + "epoch": 6.67, + "learning_rate": 0.0001332909326040408, + "loss": 0.451, + "step": 157740 + }, + { + "epoch": 6.67, + "learning_rate": 0.00013324853187474827, + "loss": 0.4129, + "step": 157750 + }, + { + "epoch": 6.67, + "learning_rate": 0.0001332061311454557, + "loss": 0.4005, + "step": 157760 + }, + { + "epoch": 6.67, + "learning_rate": 0.00013316373041616318, + "loss": 0.3446, + "step": 157770 + }, + { + "epoch": 6.67, + "learning_rate": 0.00013312132968687062, + "loss": 0.4444, + "step": 157780 + }, + { + "epoch": 6.67, + "learning_rate": 0.00013307892895757809, + "loss": 0.465, + "step": 157790 + }, + { + "epoch": 6.67, + "learning_rate": 0.00013303652822828555, + "loss": 0.4865, + "step": 157800 + }, + { + "epoch": 6.67, + "learning_rate": 0.000132994127498993, + "loss": 0.3962, + "step": 157810 + }, + { + "epoch": 6.67, + "learning_rate": 0.00013295172676970046, + "loss": 0.3968, + "step": 157820 + }, + { + "epoch": 6.67, + "learning_rate": 0.0001329093260404079, + "loss": 0.4275, + "step": 157830 + }, + { + "epoch": 6.67, + "learning_rate": 0.00013286692531111537, + "loss": 0.4522, + "step": 157840 + }, + { + "epoch": 6.68, + "learning_rate": 0.0001328245245818228, + "loss": 0.4037, + "step": 157850 + }, + { + "epoch": 6.68, + "learning_rate": 0.00013278212385253027, + "loss": 0.3622, + "step": 157860 + }, + { + "epoch": 6.68, + "learning_rate": 0.0001327397231232377, + "loss": 0.378, + "step": 157870 + }, + { + "epoch": 6.68, + "learning_rate": 0.00013269732239394518, + "loss": 0.3909, + "step": 157880 + }, + { + "epoch": 6.68, + "learning_rate": 0.00013265492166465262, + "loss": 0.4807, + "step": 157890 + }, + { + "epoch": 6.68, + "learning_rate": 0.00013261252093536009, + "loss": 0.4552, + "step": 157900 + }, + { + "epoch": 6.68, + "learning_rate": 0.00013257012020606755, + "loss": 0.416, + "step": 157910 + }, + { + "epoch": 6.68, + "learning_rate": 0.000132527719476775, + "loss": 0.418, + "step": 157920 + }, + { + "epoch": 6.68, + "learning_rate": 0.00013248531874748246, + "loss": 0.4083, + "step": 157930 + }, + { + "epoch": 6.68, + "learning_rate": 0.0001324429180181899, + "loss": 0.4571, + "step": 157940 + }, + { + "epoch": 6.68, + "learning_rate": 0.00013240051728889737, + "loss": 0.4415, + "step": 157950 + }, + { + "epoch": 6.68, + "learning_rate": 0.00013235811655960483, + "loss": 0.4318, + "step": 157960 + }, + { + "epoch": 6.68, + "learning_rate": 0.00013231571583031227, + "loss": 0.4797, + "step": 157970 + }, + { + "epoch": 6.68, + "learning_rate": 0.00013227331510101974, + "loss": 0.3786, + "step": 157980 + }, + { + "epoch": 6.68, + "learning_rate": 0.0001322309143717272, + "loss": 0.4037, + "step": 157990 + }, + { + "epoch": 6.68, + "learning_rate": 0.00013218851364243464, + "loss": 0.4288, + "step": 158000 + }, + { + "epoch": 6.68, + "learning_rate": 0.0001321461129131421, + "loss": 0.3957, + "step": 158010 + }, + { + "epoch": 6.68, + "learning_rate": 0.00013210371218384955, + "loss": 0.4093, + "step": 158020 + }, + { + "epoch": 6.68, + "learning_rate": 0.00013206131145455702, + "loss": 0.3776, + "step": 158030 + }, + { + "epoch": 6.68, + "learning_rate": 0.00013201891072526448, + "loss": 0.3682, + "step": 158040 + }, + { + "epoch": 6.68, + "learning_rate": 0.00013197650999597192, + "loss": 0.3536, + "step": 158050 + }, + { + "epoch": 6.68, + "learning_rate": 0.0001319341092666794, + "loss": 0.4011, + "step": 158060 + }, + { + "epoch": 6.68, + "learning_rate": 0.00013189170853738686, + "loss": 0.3939, + "step": 158070 + }, + { + "epoch": 6.68, + "learning_rate": 0.0001318493078080943, + "loss": 0.3925, + "step": 158080 + }, + { + "epoch": 6.69, + "learning_rate": 0.00013180690707880176, + "loss": 0.4163, + "step": 158090 + }, + { + "epoch": 6.69, + "learning_rate": 0.0001317645063495092, + "loss": 0.407, + "step": 158100 + }, + { + "epoch": 6.69, + "learning_rate": 0.00013172210562021667, + "loss": 0.3928, + "step": 158110 + }, + { + "epoch": 6.69, + "learning_rate": 0.00013167970489092414, + "loss": 0.4379, + "step": 158120 + }, + { + "epoch": 6.69, + "learning_rate": 0.00013163730416163158, + "loss": 0.3936, + "step": 158130 + }, + { + "epoch": 6.69, + "learning_rate": 0.00013159490343233904, + "loss": 0.4168, + "step": 158140 + }, + { + "epoch": 6.69, + "learning_rate": 0.0001315525027030465, + "loss": 0.3904, + "step": 158150 + }, + { + "epoch": 6.69, + "learning_rate": 0.00013151010197375395, + "loss": 0.3685, + "step": 158160 + }, + { + "epoch": 6.69, + "learning_rate": 0.00013146770124446142, + "loss": 0.5109, + "step": 158170 + }, + { + "epoch": 6.69, + "learning_rate": 0.00013142530051516886, + "loss": 0.3505, + "step": 158180 + }, + { + "epoch": 6.69, + "learning_rate": 0.00013138289978587632, + "loss": 0.4148, + "step": 158190 + }, + { + "epoch": 6.69, + "learning_rate": 0.0001313404990565838, + "loss": 0.4117, + "step": 158200 + }, + { + "epoch": 6.69, + "learning_rate": 0.00013129809832729123, + "loss": 0.3992, + "step": 158210 + }, + { + "epoch": 6.69, + "learning_rate": 0.0001312556975979987, + "loss": 0.4807, + "step": 158220 + }, + { + "epoch": 6.69, + "learning_rate": 0.00013121329686870614, + "loss": 0.4373, + "step": 158230 + }, + { + "epoch": 6.69, + "learning_rate": 0.0001311708961394136, + "loss": 0.3259, + "step": 158240 + }, + { + "epoch": 6.69, + "learning_rate": 0.00013112849541012107, + "loss": 0.3954, + "step": 158250 + }, + { + "epoch": 6.69, + "learning_rate": 0.0001310860946808285, + "loss": 0.4347, + "step": 158260 + }, + { + "epoch": 6.69, + "learning_rate": 0.00013104369395153598, + "loss": 0.4509, + "step": 158270 + }, + { + "epoch": 6.69, + "learning_rate": 0.00013100129322224344, + "loss": 0.4512, + "step": 158280 + }, + { + "epoch": 6.69, + "learning_rate": 0.00013095889249295088, + "loss": 0.435, + "step": 158290 + }, + { + "epoch": 6.69, + "learning_rate": 0.00013091649176365835, + "loss": 0.4352, + "step": 158300 + }, + { + "epoch": 6.69, + "learning_rate": 0.0001308740910343658, + "loss": 0.4161, + "step": 158310 + }, + { + "epoch": 6.7, + "learning_rate": 0.00013083169030507326, + "loss": 0.3599, + "step": 158320 + }, + { + "epoch": 6.7, + "learning_rate": 0.00013078928957578072, + "loss": 0.4158, + "step": 158330 + }, + { + "epoch": 6.7, + "learning_rate": 0.00013074688884648816, + "loss": 0.4042, + "step": 158340 + }, + { + "epoch": 6.7, + "learning_rate": 0.00013070448811719563, + "loss": 0.3857, + "step": 158350 + }, + { + "epoch": 6.7, + "learning_rate": 0.0001306620873879031, + "loss": 0.4633, + "step": 158360 + }, + { + "epoch": 6.7, + "learning_rate": 0.00013061968665861054, + "loss": 0.4538, + "step": 158370 + }, + { + "epoch": 6.7, + "learning_rate": 0.000130577285929318, + "loss": 0.4081, + "step": 158380 + }, + { + "epoch": 6.7, + "learning_rate": 0.00013053488520002544, + "loss": 0.4473, + "step": 158390 + }, + { + "epoch": 6.7, + "learning_rate": 0.0001304924844707329, + "loss": 0.4251, + "step": 158400 + }, + { + "epoch": 6.7, + "learning_rate": 0.00013045008374144038, + "loss": 0.4632, + "step": 158410 + }, + { + "epoch": 6.7, + "learning_rate": 0.00013040768301214782, + "loss": 0.4083, + "step": 158420 + }, + { + "epoch": 6.7, + "learning_rate": 0.00013036528228285528, + "loss": 0.4386, + "step": 158430 + }, + { + "epoch": 6.7, + "learning_rate": 0.00013032288155356275, + "loss": 0.4343, + "step": 158440 + }, + { + "epoch": 6.7, + "learning_rate": 0.0001302804808242702, + "loss": 0.3543, + "step": 158450 + }, + { + "epoch": 6.7, + "learning_rate": 0.00013023808009497766, + "loss": 0.4665, + "step": 158460 + }, + { + "epoch": 6.7, + "learning_rate": 0.0001301956793656851, + "loss": 0.452, + "step": 158470 + }, + { + "epoch": 6.7, + "learning_rate": 0.00013015327863639256, + "loss": 0.3792, + "step": 158480 + }, + { + "epoch": 6.7, + "learning_rate": 0.0001301108779071, + "loss": 0.4421, + "step": 158490 + }, + { + "epoch": 6.7, + "learning_rate": 0.00013006847717780747, + "loss": 0.412, + "step": 158500 + }, + { + "epoch": 6.7, + "learning_rate": 0.0001300260764485149, + "loss": 0.3561, + "step": 158510 + }, + { + "epoch": 6.7, + "learning_rate": 0.00012998367571922238, + "loss": 0.4143, + "step": 158520 + }, + { + "epoch": 6.7, + "learning_rate": 0.00012994127498992982, + "loss": 0.4288, + "step": 158530 + }, + { + "epoch": 6.7, + "learning_rate": 0.00012989887426063728, + "loss": 0.4681, + "step": 158540 + }, + { + "epoch": 6.7, + "learning_rate": 0.00012985647353134475, + "loss": 0.4353, + "step": 158550 + }, + { + "epoch": 6.71, + "learning_rate": 0.0001298140728020522, + "loss": 0.4049, + "step": 158560 + }, + { + "epoch": 6.71, + "learning_rate": 0.00012977167207275966, + "loss": 0.4624, + "step": 158570 + }, + { + "epoch": 6.71, + "learning_rate": 0.0001297292713434671, + "loss": 0.3986, + "step": 158580 + }, + { + "epoch": 6.71, + "learning_rate": 0.00012968687061417456, + "loss": 0.4676, + "step": 158590 + }, + { + "epoch": 6.71, + "learning_rate": 0.00012964446988488203, + "loss": 0.4308, + "step": 158600 + }, + { + "epoch": 6.71, + "learning_rate": 0.00012960206915558947, + "loss": 0.4365, + "step": 158610 + }, + { + "epoch": 6.71, + "learning_rate": 0.00012955966842629694, + "loss": 0.4432, + "step": 158620 + }, + { + "epoch": 6.71, + "learning_rate": 0.00012951726769700437, + "loss": 0.4392, + "step": 158630 + }, + { + "epoch": 6.71, + "learning_rate": 0.00012947486696771184, + "loss": 0.3623, + "step": 158640 + }, + { + "epoch": 6.71, + "learning_rate": 0.0001294324662384193, + "loss": 0.4553, + "step": 158650 + }, + { + "epoch": 6.71, + "learning_rate": 0.00012939006550912675, + "loss": 0.3848, + "step": 158660 + }, + { + "epoch": 6.71, + "learning_rate": 0.00012934766477983421, + "loss": 0.3446, + "step": 158670 + }, + { + "epoch": 6.71, + "learning_rate": 0.00012930526405054168, + "loss": 0.4102, + "step": 158680 + }, + { + "epoch": 6.71, + "learning_rate": 0.00012926286332124912, + "loss": 0.4517, + "step": 158690 + }, + { + "epoch": 6.71, + "learning_rate": 0.0001292204625919566, + "loss": 0.3269, + "step": 158700 + }, + { + "epoch": 6.71, + "learning_rate": 0.00012917806186266403, + "loss": 0.4255, + "step": 158710 + }, + { + "epoch": 6.71, + "learning_rate": 0.0001291356611333715, + "loss": 0.4326, + "step": 158720 + }, + { + "epoch": 6.71, + "learning_rate": 0.00012909326040407896, + "loss": 0.4231, + "step": 158730 + }, + { + "epoch": 6.71, + "learning_rate": 0.0001290508596747864, + "loss": 0.4145, + "step": 158740 + }, + { + "epoch": 6.71, + "learning_rate": 0.00012900845894549387, + "loss": 0.4713, + "step": 158750 + }, + { + "epoch": 6.71, + "learning_rate": 0.00012896605821620133, + "loss": 0.4295, + "step": 158760 + }, + { + "epoch": 6.71, + "learning_rate": 0.00012892365748690877, + "loss": 0.3763, + "step": 158770 + }, + { + "epoch": 6.71, + "learning_rate": 0.00012888125675761624, + "loss": 0.3811, + "step": 158780 + }, + { + "epoch": 6.72, + "learning_rate": 0.00012883885602832368, + "loss": 0.4104, + "step": 158790 + }, + { + "epoch": 6.72, + "learning_rate": 0.00012879645529903115, + "loss": 0.4254, + "step": 158800 + }, + { + "epoch": 6.72, + "learning_rate": 0.00012875405456973861, + "loss": 0.3958, + "step": 158810 + }, + { + "epoch": 6.72, + "learning_rate": 0.00012871165384044605, + "loss": 0.4489, + "step": 158820 + }, + { + "epoch": 6.72, + "learning_rate": 0.00012866925311115352, + "loss": 0.3966, + "step": 158830 + }, + { + "epoch": 6.72, + "learning_rate": 0.000128626852381861, + "loss": 0.4117, + "step": 158840 + }, + { + "epoch": 6.72, + "learning_rate": 0.00012858445165256843, + "loss": 0.4345, + "step": 158850 + }, + { + "epoch": 6.72, + "learning_rate": 0.0001285420509232759, + "loss": 0.3889, + "step": 158860 + }, + { + "epoch": 6.72, + "learning_rate": 0.00012849965019398333, + "loss": 0.4425, + "step": 158870 + }, + { + "epoch": 6.72, + "learning_rate": 0.0001284572494646908, + "loss": 0.4487, + "step": 158880 + }, + { + "epoch": 6.72, + "learning_rate": 0.00012841484873539827, + "loss": 0.4026, + "step": 158890 + }, + { + "epoch": 6.72, + "learning_rate": 0.0001283724480061057, + "loss": 0.4299, + "step": 158900 + }, + { + "epoch": 6.72, + "learning_rate": 0.00012833004727681317, + "loss": 0.3529, + "step": 158910 + }, + { + "epoch": 6.72, + "learning_rate": 0.00012828764654752061, + "loss": 0.4214, + "step": 158920 + }, + { + "epoch": 6.72, + "learning_rate": 0.00012824524581822808, + "loss": 0.4637, + "step": 158930 + }, + { + "epoch": 6.72, + "learning_rate": 0.00012820284508893555, + "loss": 0.396, + "step": 158940 + }, + { + "epoch": 6.72, + "learning_rate": 0.000128160444359643, + "loss": 0.4552, + "step": 158950 + }, + { + "epoch": 6.72, + "learning_rate": 0.00012811804363035045, + "loss": 0.4131, + "step": 158960 + }, + { + "epoch": 6.72, + "learning_rate": 0.00012807564290105792, + "loss": 0.42, + "step": 158970 + }, + { + "epoch": 6.72, + "learning_rate": 0.00012803324217176536, + "loss": 0.3663, + "step": 158980 + }, + { + "epoch": 6.72, + "learning_rate": 0.00012799084144247283, + "loss": 0.4124, + "step": 158990 + }, + { + "epoch": 6.72, + "learning_rate": 0.00012794844071318027, + "loss": 0.4342, + "step": 159000 + }, + { + "epoch": 6.72, + "learning_rate": 0.00012790603998388773, + "loss": 0.3737, + "step": 159010 + }, + { + "epoch": 6.72, + "learning_rate": 0.0001278636392545952, + "loss": 0.4225, + "step": 159020 + }, + { + "epoch": 6.73, + "learning_rate": 0.00012782123852530264, + "loss": 0.4392, + "step": 159030 + }, + { + "epoch": 6.73, + "learning_rate": 0.0001277788377960101, + "loss": 0.4131, + "step": 159040 + }, + { + "epoch": 6.73, + "learning_rate": 0.00012773643706671757, + "loss": 0.4228, + "step": 159050 + }, + { + "epoch": 6.73, + "learning_rate": 0.000127694036337425, + "loss": 0.4758, + "step": 159060 + }, + { + "epoch": 6.73, + "learning_rate": 0.00012765163560813248, + "loss": 0.3479, + "step": 159070 + }, + { + "epoch": 6.73, + "learning_rate": 0.00012760923487883992, + "loss": 0.4733, + "step": 159080 + }, + { + "epoch": 6.73, + "learning_rate": 0.0001275668341495474, + "loss": 0.4288, + "step": 159090 + }, + { + "epoch": 6.73, + "learning_rate": 0.00012752443342025485, + "loss": 0.4685, + "step": 159100 + }, + { + "epoch": 6.73, + "learning_rate": 0.0001274820326909623, + "loss": 0.4455, + "step": 159110 + }, + { + "epoch": 6.73, + "learning_rate": 0.00012743963196166976, + "loss": 0.4536, + "step": 159120 + }, + { + "epoch": 6.73, + "learning_rate": 0.0001273972312323772, + "loss": 0.3953, + "step": 159130 + }, + { + "epoch": 6.73, + "learning_rate": 0.00012735483050308467, + "loss": 0.4363, + "step": 159140 + }, + { + "epoch": 6.73, + "learning_rate": 0.0001273124297737921, + "loss": 0.3678, + "step": 159150 + }, + { + "epoch": 6.73, + "learning_rate": 0.00012727002904449957, + "loss": 0.4257, + "step": 159160 + }, + { + "epoch": 6.73, + "learning_rate": 0.000127227628315207, + "loss": 0.4002, + "step": 159170 + }, + { + "epoch": 6.73, + "learning_rate": 0.00012718522758591448, + "loss": 0.4228, + "step": 159180 + }, + { + "epoch": 6.73, + "learning_rate": 0.00012714282685662195, + "loss": 0.3618, + "step": 159190 + }, + { + "epoch": 6.73, + "learning_rate": 0.00012710042612732939, + "loss": 0.411, + "step": 159200 + }, + { + "epoch": 6.73, + "learning_rate": 0.00012705802539803685, + "loss": 0.3611, + "step": 159210 + }, + { + "epoch": 6.73, + "learning_rate": 0.0001270156246687443, + "loss": 0.4122, + "step": 159220 + }, + { + "epoch": 6.73, + "learning_rate": 0.00012697322393945176, + "loss": 0.4197, + "step": 159230 + }, + { + "epoch": 6.73, + "learning_rate": 0.0001269308232101592, + "loss": 0.4736, + "step": 159240 + }, + { + "epoch": 6.73, + "learning_rate": 0.00012688842248086667, + "loss": 0.4356, + "step": 159250 + }, + { + "epoch": 6.73, + "learning_rate": 0.00012684602175157413, + "loss": 0.342, + "step": 159260 + }, + { + "epoch": 6.74, + "learning_rate": 0.00012680362102228157, + "loss": 0.3973, + "step": 159270 + }, + { + "epoch": 6.74, + "learning_rate": 0.00012676122029298904, + "loss": 0.325, + "step": 159280 + }, + { + "epoch": 6.74, + "learning_rate": 0.0001267188195636965, + "loss": 0.429, + "step": 159290 + }, + { + "epoch": 6.74, + "learning_rate": 0.00012667641883440395, + "loss": 0.4751, + "step": 159300 + }, + { + "epoch": 6.74, + "learning_rate": 0.0001266340181051114, + "loss": 0.4077, + "step": 159310 + }, + { + "epoch": 6.74, + "learning_rate": 0.00012659161737581885, + "loss": 0.4727, + "step": 159320 + }, + { + "epoch": 6.74, + "learning_rate": 0.00012654921664652632, + "loss": 0.4407, + "step": 159330 + }, + { + "epoch": 6.74, + "learning_rate": 0.00012650681591723379, + "loss": 0.4166, + "step": 159340 + }, + { + "epoch": 6.74, + "learning_rate": 0.00012646441518794122, + "loss": 0.4489, + "step": 159350 + }, + { + "epoch": 6.74, + "learning_rate": 0.0001264220144586487, + "loss": 0.4584, + "step": 159360 + }, + { + "epoch": 6.74, + "learning_rate": 0.00012637961372935616, + "loss": 0.4244, + "step": 159370 + }, + { + "epoch": 6.74, + "learning_rate": 0.0001263372130000636, + "loss": 0.4072, + "step": 159380 + }, + { + "epoch": 6.74, + "learning_rate": 0.00012629481227077106, + "loss": 0.4198, + "step": 159390 + }, + { + "epoch": 6.74, + "learning_rate": 0.0001262524115414785, + "loss": 0.4104, + "step": 159400 + }, + { + "epoch": 6.74, + "learning_rate": 0.00012621001081218597, + "loss": 0.4027, + "step": 159410 + }, + { + "epoch": 6.74, + "learning_rate": 0.00012616761008289344, + "loss": 0.3846, + "step": 159420 + }, + { + "epoch": 6.74, + "learning_rate": 0.00012612520935360088, + "loss": 0.3633, + "step": 159430 + }, + { + "epoch": 6.74, + "learning_rate": 0.00012608280862430834, + "loss": 0.457, + "step": 159440 + }, + { + "epoch": 6.74, + "learning_rate": 0.0001260404078950158, + "loss": 0.3168, + "step": 159450 + }, + { + "epoch": 6.74, + "learning_rate": 0.00012599800716572325, + "loss": 0.4067, + "step": 159460 + }, + { + "epoch": 6.74, + "learning_rate": 0.00012595560643643072, + "loss": 0.4226, + "step": 159470 + }, + { + "epoch": 6.74, + "learning_rate": 0.00012591320570713816, + "loss": 0.4864, + "step": 159480 + }, + { + "epoch": 6.74, + "learning_rate": 0.00012587080497784562, + "loss": 0.4166, + "step": 159490 + }, + { + "epoch": 6.75, + "learning_rate": 0.0001258284042485531, + "loss": 0.3989, + "step": 159500 + }, + { + "epoch": 6.75, + "learning_rate": 0.00012578600351926053, + "loss": 0.4028, + "step": 159510 + }, + { + "epoch": 6.75, + "learning_rate": 0.000125743602789968, + "loss": 0.3738, + "step": 159520 + }, + { + "epoch": 6.75, + "learning_rate": 0.00012570120206067546, + "loss": 0.424, + "step": 159530 + }, + { + "epoch": 6.75, + "learning_rate": 0.0001256588013313829, + "loss": 0.4032, + "step": 159540 + }, + { + "epoch": 6.75, + "learning_rate": 0.00012561640060209037, + "loss": 0.3911, + "step": 159550 + }, + { + "epoch": 6.75, + "learning_rate": 0.0001255739998727978, + "loss": 0.4763, + "step": 159560 + }, + { + "epoch": 6.75, + "learning_rate": 0.00012553159914350528, + "loss": 0.3946, + "step": 159570 + }, + { + "epoch": 6.75, + "learning_rate": 0.00012548919841421274, + "loss": 0.4117, + "step": 159580 + }, + { + "epoch": 6.75, + "learning_rate": 0.00012544679768492018, + "loss": 0.442, + "step": 159590 + }, + { + "epoch": 6.75, + "learning_rate": 0.00012540439695562765, + "loss": 0.4974, + "step": 159600 + }, + { + "epoch": 6.75, + "learning_rate": 0.00012536199622633512, + "loss": 0.3765, + "step": 159610 + }, + { + "epoch": 6.75, + "learning_rate": 0.00012531959549704256, + "loss": 0.4534, + "step": 159620 + }, + { + "epoch": 6.75, + "learning_rate": 0.00012527719476775002, + "loss": 0.3846, + "step": 159630 + }, + { + "epoch": 6.75, + "learning_rate": 0.00012523479403845746, + "loss": 0.3766, + "step": 159640 + }, + { + "epoch": 6.75, + "learning_rate": 0.00012519239330916493, + "loss": 0.4488, + "step": 159650 + }, + { + "epoch": 6.75, + "learning_rate": 0.0001251499925798724, + "loss": 0.3595, + "step": 159660 + }, + { + "epoch": 6.75, + "learning_rate": 0.00012510759185057984, + "loss": 0.4384, + "step": 159670 + }, + { + "epoch": 6.75, + "learning_rate": 0.0001250651911212873, + "loss": 0.4275, + "step": 159680 + }, + { + "epoch": 6.75, + "learning_rate": 0.00012502279039199474, + "loss": 0.3955, + "step": 159690 + }, + { + "epoch": 6.75, + "learning_rate": 0.0001249803896627022, + "loss": 0.4127, + "step": 159700 + }, + { + "epoch": 6.75, + "learning_rate": 0.00012493798893340968, + "loss": 0.4505, + "step": 159710 + }, + { + "epoch": 6.75, + "learning_rate": 0.00012489558820411712, + "loss": 0.448, + "step": 159720 + }, + { + "epoch": 6.75, + "learning_rate": 0.00012485318747482458, + "loss": 0.434, + "step": 159730 + }, + { + "epoch": 6.76, + "learning_rate": 0.00012481078674553205, + "loss": 0.4885, + "step": 159740 + }, + { + "epoch": 6.76, + "learning_rate": 0.0001247683860162395, + "loss": 0.4095, + "step": 159750 + }, + { + "epoch": 6.76, + "learning_rate": 0.00012472598528694696, + "loss": 0.418, + "step": 159760 + }, + { + "epoch": 6.76, + "learning_rate": 0.0001246835845576544, + "loss": 0.4174, + "step": 159770 + }, + { + "epoch": 6.76, + "learning_rate": 0.00012464118382836186, + "loss": 0.4013, + "step": 159780 + }, + { + "epoch": 6.76, + "learning_rate": 0.0001245987830990693, + "loss": 0.3361, + "step": 159790 + }, + { + "epoch": 6.76, + "learning_rate": 0.00012455638236977677, + "loss": 0.3713, + "step": 159800 + }, + { + "epoch": 6.76, + "learning_rate": 0.00012451398164048424, + "loss": 0.4141, + "step": 159810 + }, + { + "epoch": 6.76, + "learning_rate": 0.00012447158091119168, + "loss": 0.5273, + "step": 159820 + }, + { + "epoch": 6.76, + "learning_rate": 0.00012442918018189914, + "loss": 0.395, + "step": 159830 + }, + { + "epoch": 6.76, + "learning_rate": 0.00012438677945260658, + "loss": 0.4621, + "step": 159840 + }, + { + "epoch": 6.76, + "learning_rate": 0.00012434437872331405, + "loss": 0.3713, + "step": 159850 + }, + { + "epoch": 6.76, + "learning_rate": 0.0001243019779940215, + "loss": 0.452, + "step": 159860 + }, + { + "epoch": 6.76, + "learning_rate": 0.00012425957726472896, + "loss": 0.4523, + "step": 159870 + }, + { + "epoch": 6.76, + "learning_rate": 0.0001242171765354364, + "loss": 0.3627, + "step": 159880 + }, + { + "epoch": 6.76, + "learning_rate": 0.00012417477580614386, + "loss": 0.3808, + "step": 159890 + }, + { + "epoch": 6.76, + "learning_rate": 0.00012413237507685133, + "loss": 0.4035, + "step": 159900 + }, + { + "epoch": 6.76, + "learning_rate": 0.00012408997434755877, + "loss": 0.3686, + "step": 159910 + }, + { + "epoch": 6.76, + "learning_rate": 0.00012404757361826624, + "loss": 0.3698, + "step": 159920 + }, + { + "epoch": 6.76, + "learning_rate": 0.00012400517288897368, + "loss": 0.414, + "step": 159930 + }, + { + "epoch": 6.76, + "learning_rate": 0.00012396277215968114, + "loss": 0.3679, + "step": 159940 + }, + { + "epoch": 6.76, + "learning_rate": 0.0001239203714303886, + "loss": 0.3984, + "step": 159950 + }, + { + "epoch": 6.76, + "learning_rate": 0.00012387797070109605, + "loss": 0.4771, + "step": 159960 + }, + { + "epoch": 6.76, + "learning_rate": 0.00012383556997180352, + "loss": 0.3626, + "step": 159970 + }, + { + "epoch": 6.77, + "learning_rate": 0.00012379316924251098, + "loss": 0.4223, + "step": 159980 + }, + { + "epoch": 6.77, + "learning_rate": 0.00012375076851321842, + "loss": 0.3674, + "step": 159990 + }, + { + "epoch": 6.77, + "learning_rate": 0.0001237083677839259, + "loss": 0.4429, + "step": 160000 + }, + { + "epoch": 6.77, + "eval_loss": 0.6193305253982544, + "eval_runtime": 337.814, + "eval_samples_per_second": 15.556, + "eval_steps_per_second": 3.89, + "step": 160000 + }, + { + "epoch": 6.77, + "learning_rate": 0.00012366596705463333, + "loss": 0.3586, + "step": 160010 + }, + { + "epoch": 6.77, + "learning_rate": 0.0001236235663253408, + "loss": 0.4581, + "step": 160020 + }, + { + "epoch": 6.77, + "learning_rate": 0.00012358116559604826, + "loss": 0.4582, + "step": 160030 + }, + { + "epoch": 6.77, + "learning_rate": 0.0001235387648667557, + "loss": 0.4384, + "step": 160040 + }, + { + "epoch": 6.77, + "learning_rate": 0.00012349636413746317, + "loss": 0.4175, + "step": 160050 + }, + { + "epoch": 6.77, + "learning_rate": 0.00012345396340817063, + "loss": 0.4511, + "step": 160060 + }, + { + "epoch": 6.77, + "learning_rate": 0.00012341156267887807, + "loss": 0.4035, + "step": 160070 + }, + { + "epoch": 6.77, + "learning_rate": 0.00012336916194958554, + "loss": 0.3402, + "step": 160080 + }, + { + "epoch": 6.77, + "learning_rate": 0.00012332676122029298, + "loss": 0.4397, + "step": 160090 + }, + { + "epoch": 6.77, + "learning_rate": 0.00012328436049100045, + "loss": 0.4377, + "step": 160100 + }, + { + "epoch": 6.77, + "learning_rate": 0.00012324195976170791, + "loss": 0.3871, + "step": 160110 + }, + { + "epoch": 6.77, + "learning_rate": 0.00012319955903241535, + "loss": 0.4212, + "step": 160120 + }, + { + "epoch": 6.77, + "learning_rate": 0.00012315715830312282, + "loss": 0.407, + "step": 160130 + }, + { + "epoch": 6.77, + "learning_rate": 0.0001231147575738303, + "loss": 0.4141, + "step": 160140 + }, + { + "epoch": 6.77, + "learning_rate": 0.00012307235684453773, + "loss": 0.3954, + "step": 160150 + }, + { + "epoch": 6.77, + "learning_rate": 0.0001230299561152452, + "loss": 0.448, + "step": 160160 + }, + { + "epoch": 6.77, + "learning_rate": 0.00012298755538595263, + "loss": 0.4154, + "step": 160170 + }, + { + "epoch": 6.77, + "learning_rate": 0.0001229451546566601, + "loss": 0.4147, + "step": 160180 + }, + { + "epoch": 6.77, + "learning_rate": 0.00012290275392736757, + "loss": 0.3833, + "step": 160190 + }, + { + "epoch": 6.77, + "learning_rate": 0.000122860353198075, + "loss": 0.4365, + "step": 160200 + }, + { + "epoch": 6.78, + "learning_rate": 0.00012281795246878247, + "loss": 0.4368, + "step": 160210 + }, + { + "epoch": 6.78, + "learning_rate": 0.00012277555173948994, + "loss": 0.4194, + "step": 160220 + }, + { + "epoch": 6.78, + "learning_rate": 0.00012273315101019738, + "loss": 0.432, + "step": 160230 + }, + { + "epoch": 6.78, + "learning_rate": 0.00012269075028090485, + "loss": 0.4702, + "step": 160240 + }, + { + "epoch": 6.78, + "learning_rate": 0.0001226483495516123, + "loss": 0.3777, + "step": 160250 + }, + { + "epoch": 6.78, + "learning_rate": 0.00012260594882231975, + "loss": 0.4414, + "step": 160260 + }, + { + "epoch": 6.78, + "learning_rate": 0.00012256354809302722, + "loss": 0.3737, + "step": 160270 + }, + { + "epoch": 6.78, + "learning_rate": 0.00012252114736373466, + "loss": 0.5201, + "step": 160280 + }, + { + "epoch": 6.78, + "learning_rate": 0.00012247874663444213, + "loss": 0.4046, + "step": 160290 + }, + { + "epoch": 6.78, + "learning_rate": 0.0001224363459051496, + "loss": 0.4428, + "step": 160300 + }, + { + "epoch": 6.78, + "learning_rate": 0.00012239394517585703, + "loss": 0.4452, + "step": 160310 + }, + { + "epoch": 6.78, + "learning_rate": 0.0001223515444465645, + "loss": 0.4505, + "step": 160320 + }, + { + "epoch": 6.78, + "learning_rate": 0.00012230914371727194, + "loss": 0.355, + "step": 160330 + }, + { + "epoch": 6.78, + "learning_rate": 0.0001222667429879794, + "loss": 0.3732, + "step": 160340 + }, + { + "epoch": 6.78, + "learning_rate": 0.00012222434225868687, + "loss": 0.4491, + "step": 160350 + }, + { + "epoch": 6.78, + "learning_rate": 0.0001221819415293943, + "loss": 0.4507, + "step": 160360 + }, + { + "epoch": 6.78, + "learning_rate": 0.00012213954080010178, + "loss": 0.4283, + "step": 160370 + }, + { + "epoch": 6.78, + "learning_rate": 0.00012209714007080922, + "loss": 0.4181, + "step": 160380 + }, + { + "epoch": 6.78, + "learning_rate": 0.00012205473934151667, + "loss": 0.415, + "step": 160390 + }, + { + "epoch": 6.78, + "learning_rate": 0.00012201233861222414, + "loss": 0.4026, + "step": 160400 + }, + { + "epoch": 6.78, + "learning_rate": 0.00012196993788293158, + "loss": 0.5036, + "step": 160410 + }, + { + "epoch": 6.78, + "learning_rate": 0.00012192753715363905, + "loss": 0.4076, + "step": 160420 + }, + { + "epoch": 6.78, + "learning_rate": 0.00012188513642434651, + "loss": 0.3875, + "step": 160430 + }, + { + "epoch": 6.78, + "learning_rate": 0.00012184273569505395, + "loss": 0.4771, + "step": 160440 + }, + { + "epoch": 6.79, + "learning_rate": 0.00012180033496576142, + "loss": 0.4242, + "step": 160450 + }, + { + "epoch": 6.79, + "learning_rate": 0.00012175793423646886, + "loss": 0.367, + "step": 160460 + }, + { + "epoch": 6.79, + "learning_rate": 0.00012171553350717633, + "loss": 0.4615, + "step": 160470 + }, + { + "epoch": 6.79, + "learning_rate": 0.00012167313277788379, + "loss": 0.4065, + "step": 160480 + }, + { + "epoch": 6.79, + "learning_rate": 0.00012163073204859123, + "loss": 0.4618, + "step": 160490 + }, + { + "epoch": 6.79, + "learning_rate": 0.0001215883313192987, + "loss": 0.3895, + "step": 160500 + }, + { + "epoch": 6.79, + "learning_rate": 0.00012154593059000617, + "loss": 0.3979, + "step": 160510 + }, + { + "epoch": 6.79, + "learning_rate": 0.0001215035298607136, + "loss": 0.4776, + "step": 160520 + }, + { + "epoch": 6.79, + "learning_rate": 0.00012146112913142107, + "loss": 0.4243, + "step": 160530 + }, + { + "epoch": 6.79, + "learning_rate": 0.00012141872840212851, + "loss": 0.3659, + "step": 160540 + }, + { + "epoch": 6.79, + "learning_rate": 0.00012137632767283598, + "loss": 0.3813, + "step": 160550 + }, + { + "epoch": 6.79, + "learning_rate": 0.00012133392694354345, + "loss": 0.3998, + "step": 160560 + }, + { + "epoch": 6.79, + "learning_rate": 0.00012129152621425089, + "loss": 0.3773, + "step": 160570 + }, + { + "epoch": 6.79, + "learning_rate": 0.00012124912548495835, + "loss": 0.3941, + "step": 160580 + }, + { + "epoch": 6.79, + "learning_rate": 0.00012120672475566582, + "loss": 0.4783, + "step": 160590 + }, + { + "epoch": 6.79, + "learning_rate": 0.00012116432402637326, + "loss": 0.4185, + "step": 160600 + }, + { + "epoch": 6.79, + "learning_rate": 0.00012112192329708073, + "loss": 0.3467, + "step": 160610 + }, + { + "epoch": 6.79, + "learning_rate": 0.00012107952256778817, + "loss": 0.4063, + "step": 160620 + }, + { + "epoch": 6.79, + "learning_rate": 0.00012103712183849563, + "loss": 0.3242, + "step": 160630 + }, + { + "epoch": 6.79, + "learning_rate": 0.00012099472110920309, + "loss": 0.4444, + "step": 160640 + }, + { + "epoch": 6.79, + "learning_rate": 0.00012095232037991054, + "loss": 0.3551, + "step": 160650 + }, + { + "epoch": 6.79, + "learning_rate": 0.00012090991965061799, + "loss": 0.392, + "step": 160660 + }, + { + "epoch": 6.79, + "learning_rate": 0.00012086751892132546, + "loss": 0.3785, + "step": 160670 + }, + { + "epoch": 6.79, + "learning_rate": 0.0001208251181920329, + "loss": 0.486, + "step": 160680 + }, + { + "epoch": 6.8, + "learning_rate": 0.00012078271746274037, + "loss": 0.4681, + "step": 160690 + }, + { + "epoch": 6.8, + "learning_rate": 0.0001207403167334478, + "loss": 0.4396, + "step": 160700 + }, + { + "epoch": 6.8, + "learning_rate": 0.00012069791600415527, + "loss": 0.3874, + "step": 160710 + }, + { + "epoch": 6.8, + "learning_rate": 0.00012065551527486274, + "loss": 0.4349, + "step": 160720 + }, + { + "epoch": 6.8, + "learning_rate": 0.00012061311454557018, + "loss": 0.4447, + "step": 160730 + }, + { + "epoch": 6.8, + "learning_rate": 0.00012057071381627764, + "loss": 0.435, + "step": 160740 + }, + { + "epoch": 6.8, + "learning_rate": 0.00012052831308698511, + "loss": 0.4351, + "step": 160750 + }, + { + "epoch": 6.8, + "learning_rate": 0.00012048591235769255, + "loss": 0.4847, + "step": 160760 + }, + { + "epoch": 6.8, + "learning_rate": 0.00012044351162840002, + "loss": 0.4171, + "step": 160770 + }, + { + "epoch": 6.8, + "learning_rate": 0.00012040111089910746, + "loss": 0.458, + "step": 160780 + }, + { + "epoch": 6.8, + "learning_rate": 0.00012035871016981492, + "loss": 0.4465, + "step": 160790 + }, + { + "epoch": 6.8, + "learning_rate": 0.00012031630944052239, + "loss": 0.4071, + "step": 160800 + }, + { + "epoch": 6.8, + "learning_rate": 0.00012027390871122983, + "loss": 0.4501, + "step": 160810 + }, + { + "epoch": 6.8, + "learning_rate": 0.0001202315079819373, + "loss": 0.4112, + "step": 160820 + }, + { + "epoch": 6.8, + "learning_rate": 0.00012018910725264476, + "loss": 0.3814, + "step": 160830 + }, + { + "epoch": 6.8, + "learning_rate": 0.0001201467065233522, + "loss": 0.4318, + "step": 160840 + }, + { + "epoch": 6.8, + "learning_rate": 0.00012010430579405967, + "loss": 0.4259, + "step": 160850 + }, + { + "epoch": 6.8, + "learning_rate": 0.00012006190506476711, + "loss": 0.4464, + "step": 160860 + }, + { + "epoch": 6.8, + "learning_rate": 0.00012001950433547458, + "loss": 0.4427, + "step": 160870 + }, + { + "epoch": 6.8, + "learning_rate": 0.00011997710360618204, + "loss": 0.4449, + "step": 160880 + }, + { + "epoch": 6.8, + "learning_rate": 0.00011993470287688948, + "loss": 0.4828, + "step": 160890 + }, + { + "epoch": 6.8, + "learning_rate": 0.00011989230214759695, + "loss": 0.4887, + "step": 160900 + }, + { + "epoch": 6.8, + "learning_rate": 0.00011984990141830442, + "loss": 0.4053, + "step": 160910 + }, + { + "epoch": 6.81, + "learning_rate": 0.00011980750068901186, + "loss": 0.4456, + "step": 160920 + }, + { + "epoch": 6.81, + "learning_rate": 0.00011976509995971932, + "loss": 0.534, + "step": 160930 + }, + { + "epoch": 6.81, + "learning_rate": 0.00011972269923042676, + "loss": 0.4499, + "step": 160940 + }, + { + "epoch": 6.81, + "learning_rate": 0.00011968029850113423, + "loss": 0.4008, + "step": 160950 + }, + { + "epoch": 6.81, + "learning_rate": 0.00011963789777184168, + "loss": 0.4225, + "step": 160960 + }, + { + "epoch": 6.81, + "learning_rate": 0.00011959549704254914, + "loss": 0.3679, + "step": 160970 + }, + { + "epoch": 6.81, + "learning_rate": 0.00011955309631325659, + "loss": 0.3583, + "step": 160980 + }, + { + "epoch": 6.81, + "learning_rate": 0.00011951069558396406, + "loss": 0.4077, + "step": 160990 + }, + { + "epoch": 6.81, + "learning_rate": 0.0001194682948546715, + "loss": 0.4013, + "step": 161000 + }, + { + "epoch": 6.81, + "learning_rate": 0.00011942589412537896, + "loss": 0.4033, + "step": 161010 + }, + { + "epoch": 6.81, + "learning_rate": 0.0001193834933960864, + "loss": 0.3432, + "step": 161020 + }, + { + "epoch": 6.81, + "learning_rate": 0.00011934109266679387, + "loss": 0.5049, + "step": 161030 + }, + { + "epoch": 6.81, + "learning_rate": 0.00011929869193750134, + "loss": 0.4097, + "step": 161040 + }, + { + "epoch": 6.81, + "learning_rate": 0.00011925629120820878, + "loss": 0.4791, + "step": 161050 + }, + { + "epoch": 6.81, + "learning_rate": 0.00011921389047891624, + "loss": 0.3714, + "step": 161060 + }, + { + "epoch": 6.81, + "learning_rate": 0.00011917148974962368, + "loss": 0.3731, + "step": 161070 + }, + { + "epoch": 6.81, + "learning_rate": 0.00011912908902033115, + "loss": 0.3103, + "step": 161080 + }, + { + "epoch": 6.81, + "learning_rate": 0.00011908668829103862, + "loss": 0.4273, + "step": 161090 + }, + { + "epoch": 6.81, + "learning_rate": 0.00011904428756174606, + "loss": 0.3363, + "step": 161100 + }, + { + "epoch": 6.81, + "learning_rate": 0.00011900188683245352, + "loss": 0.3838, + "step": 161110 + }, + { + "epoch": 6.81, + "learning_rate": 0.00011895948610316099, + "loss": 0.4587, + "step": 161120 + }, + { + "epoch": 6.81, + "learning_rate": 0.00011891708537386843, + "loss": 0.4411, + "step": 161130 + }, + { + "epoch": 6.81, + "learning_rate": 0.0001188746846445759, + "loss": 0.3964, + "step": 161140 + }, + { + "epoch": 6.81, + "learning_rate": 0.00011883228391528334, + "loss": 0.3941, + "step": 161150 + }, + { + "epoch": 6.82, + "learning_rate": 0.0001187898831859908, + "loss": 0.3977, + "step": 161160 + }, + { + "epoch": 6.82, + "learning_rate": 0.00011874748245669827, + "loss": 0.3744, + "step": 161170 + }, + { + "epoch": 6.82, + "learning_rate": 0.00011870508172740571, + "loss": 0.3732, + "step": 161180 + }, + { + "epoch": 6.82, + "learning_rate": 0.00011866268099811318, + "loss": 0.3863, + "step": 161190 + }, + { + "epoch": 6.82, + "learning_rate": 0.00011862028026882064, + "loss": 0.4746, + "step": 161200 + }, + { + "epoch": 6.82, + "learning_rate": 0.00011857787953952808, + "loss": 0.4259, + "step": 161210 + }, + { + "epoch": 6.82, + "learning_rate": 0.00011853547881023555, + "loss": 0.4235, + "step": 161220 + }, + { + "epoch": 6.82, + "learning_rate": 0.00011849307808094299, + "loss": 0.4105, + "step": 161230 + }, + { + "epoch": 6.82, + "learning_rate": 0.00011845067735165046, + "loss": 0.4454, + "step": 161240 + }, + { + "epoch": 6.82, + "learning_rate": 0.00011840827662235792, + "loss": 0.3912, + "step": 161250 + }, + { + "epoch": 6.82, + "learning_rate": 0.00011836587589306536, + "loss": 0.4487, + "step": 161260 + }, + { + "epoch": 6.82, + "learning_rate": 0.00011832347516377283, + "loss": 0.4294, + "step": 161270 + }, + { + "epoch": 6.82, + "learning_rate": 0.00011828107443448028, + "loss": 0.4226, + "step": 161280 + }, + { + "epoch": 6.82, + "learning_rate": 0.00011823867370518774, + "loss": 0.4034, + "step": 161290 + }, + { + "epoch": 6.82, + "learning_rate": 0.00011819627297589519, + "loss": 0.4225, + "step": 161300 + }, + { + "epoch": 6.82, + "learning_rate": 0.00011815387224660264, + "loss": 0.4603, + "step": 161310 + }, + { + "epoch": 6.82, + "learning_rate": 0.0001181114715173101, + "loss": 0.4602, + "step": 161320 + }, + { + "epoch": 6.82, + "learning_rate": 0.00011806907078801756, + "loss": 0.3774, + "step": 161330 + }, + { + "epoch": 6.82, + "learning_rate": 0.00011802667005872502, + "loss": 0.3671, + "step": 161340 + }, + { + "epoch": 6.82, + "learning_rate": 0.00011798426932943247, + "loss": 0.3734, + "step": 161350 + }, + { + "epoch": 6.82, + "learning_rate": 0.00011794186860013994, + "loss": 0.4843, + "step": 161360 + }, + { + "epoch": 6.82, + "learning_rate": 0.00011789946787084737, + "loss": 0.4343, + "step": 161370 + }, + { + "epoch": 6.82, + "learning_rate": 0.00011785706714155484, + "loss": 0.4857, + "step": 161380 + }, + { + "epoch": 6.82, + "learning_rate": 0.00011781466641226228, + "loss": 0.4503, + "step": 161390 + }, + { + "epoch": 6.83, + "learning_rate": 0.00011777226568296975, + "loss": 0.4431, + "step": 161400 + }, + { + "epoch": 6.83, + "learning_rate": 0.00011772986495367721, + "loss": 0.3792, + "step": 161410 + }, + { + "epoch": 6.83, + "learning_rate": 0.00011768746422438465, + "loss": 0.4144, + "step": 161420 + }, + { + "epoch": 6.83, + "learning_rate": 0.00011764506349509212, + "loss": 0.4078, + "step": 161430 + }, + { + "epoch": 6.83, + "learning_rate": 0.00011760266276579959, + "loss": 0.459, + "step": 161440 + }, + { + "epoch": 6.83, + "learning_rate": 0.00011756026203650703, + "loss": 0.4673, + "step": 161450 + }, + { + "epoch": 6.83, + "learning_rate": 0.0001175178613072145, + "loss": 0.4341, + "step": 161460 + }, + { + "epoch": 6.83, + "learning_rate": 0.00011747546057792193, + "loss": 0.4034, + "step": 161470 + }, + { + "epoch": 6.83, + "learning_rate": 0.0001174330598486294, + "loss": 0.4057, + "step": 161480 + }, + { + "epoch": 6.83, + "learning_rate": 0.00011739065911933687, + "loss": 0.4452, + "step": 161490 + }, + { + "epoch": 6.83, + "learning_rate": 0.00011734825839004431, + "loss": 0.3836, + "step": 161500 + }, + { + "epoch": 6.83, + "learning_rate": 0.00011730585766075177, + "loss": 0.4964, + "step": 161510 + }, + { + "epoch": 6.83, + "learning_rate": 0.00011726345693145924, + "loss": 0.436, + "step": 161520 + }, + { + "epoch": 6.83, + "learning_rate": 0.00011722105620216668, + "loss": 0.4359, + "step": 161530 + }, + { + "epoch": 6.83, + "learning_rate": 0.00011717865547287415, + "loss": 0.4204, + "step": 161540 + }, + { + "epoch": 6.83, + "learning_rate": 0.00011713625474358159, + "loss": 0.4058, + "step": 161550 + }, + { + "epoch": 6.83, + "learning_rate": 0.00011709385401428905, + "loss": 0.3547, + "step": 161560 + }, + { + "epoch": 6.83, + "learning_rate": 0.00011705145328499652, + "loss": 0.3769, + "step": 161570 + }, + { + "epoch": 6.83, + "learning_rate": 0.00011700905255570396, + "loss": 0.3594, + "step": 161580 + }, + { + "epoch": 6.83, + "learning_rate": 0.00011696665182641143, + "loss": 0.3718, + "step": 161590 + }, + { + "epoch": 6.83, + "learning_rate": 0.00011692425109711888, + "loss": 0.4571, + "step": 161600 + }, + { + "epoch": 6.83, + "learning_rate": 0.00011688185036782633, + "loss": 0.4376, + "step": 161610 + }, + { + "epoch": 6.83, + "learning_rate": 0.00011683944963853379, + "loss": 0.4902, + "step": 161620 + }, + { + "epoch": 6.84, + "learning_rate": 0.00011679704890924124, + "loss": 0.4363, + "step": 161630 + }, + { + "epoch": 6.84, + "learning_rate": 0.0001167546481799487, + "loss": 0.4485, + "step": 161640 + }, + { + "epoch": 6.84, + "learning_rate": 0.00011671224745065616, + "loss": 0.4024, + "step": 161650 + }, + { + "epoch": 6.84, + "learning_rate": 0.00011666984672136361, + "loss": 0.376, + "step": 161660 + }, + { + "epoch": 6.84, + "learning_rate": 0.00011662744599207107, + "loss": 0.4457, + "step": 161670 + }, + { + "epoch": 6.84, + "learning_rate": 0.00011658504526277853, + "loss": 0.4057, + "step": 161680 + }, + { + "epoch": 6.84, + "learning_rate": 0.00011654264453348597, + "loss": 0.3863, + "step": 161690 + }, + { + "epoch": 6.84, + "learning_rate": 0.00011650024380419344, + "loss": 0.3155, + "step": 161700 + }, + { + "epoch": 6.84, + "learning_rate": 0.00011645784307490088, + "loss": 0.4464, + "step": 161710 + }, + { + "epoch": 6.84, + "learning_rate": 0.00011641544234560835, + "loss": 0.3907, + "step": 161720 + }, + { + "epoch": 6.84, + "learning_rate": 0.00011637304161631581, + "loss": 0.4546, + "step": 161730 + }, + { + "epoch": 6.84, + "learning_rate": 0.00011633064088702325, + "loss": 0.4365, + "step": 161740 + }, + { + "epoch": 6.84, + "learning_rate": 0.00011628824015773072, + "loss": 0.4225, + "step": 161750 + }, + { + "epoch": 6.84, + "learning_rate": 0.00011624583942843819, + "loss": 0.4376, + "step": 161760 + }, + { + "epoch": 6.84, + "learning_rate": 0.00011620343869914563, + "loss": 0.4492, + "step": 161770 + }, + { + "epoch": 6.84, + "learning_rate": 0.00011616103796985309, + "loss": 0.374, + "step": 161780 + }, + { + "epoch": 6.84, + "learning_rate": 0.00011611863724056053, + "loss": 0.381, + "step": 161790 + }, + { + "epoch": 6.84, + "learning_rate": 0.000116076236511268, + "loss": 0.4006, + "step": 161800 + }, + { + "epoch": 6.84, + "learning_rate": 0.00011603383578197547, + "loss": 0.3646, + "step": 161810 + }, + { + "epoch": 6.84, + "learning_rate": 0.0001159914350526829, + "loss": 0.4144, + "step": 161820 + }, + { + "epoch": 6.84, + "learning_rate": 0.00011594903432339037, + "loss": 0.5415, + "step": 161830 + }, + { + "epoch": 6.84, + "learning_rate": 0.00011590663359409781, + "loss": 0.4381, + "step": 161840 + }, + { + "epoch": 6.84, + "learning_rate": 0.00011586423286480528, + "loss": 0.3854, + "step": 161850 + }, + { + "epoch": 6.84, + "learning_rate": 0.00011582183213551275, + "loss": 0.3917, + "step": 161860 + }, + { + "epoch": 6.85, + "learning_rate": 0.00011577943140622019, + "loss": 0.423, + "step": 161870 + }, + { + "epoch": 6.85, + "learning_rate": 0.00011573703067692765, + "loss": 0.3615, + "step": 161880 + }, + { + "epoch": 6.85, + "learning_rate": 0.00011569462994763512, + "loss": 0.4412, + "step": 161890 + }, + { + "epoch": 6.85, + "learning_rate": 0.00011565222921834256, + "loss": 0.4433, + "step": 161900 + }, + { + "epoch": 6.85, + "learning_rate": 0.00011560982848905003, + "loss": 0.3654, + "step": 161910 + }, + { + "epoch": 6.85, + "learning_rate": 0.00011556742775975747, + "loss": 0.3885, + "step": 161920 + }, + { + "epoch": 6.85, + "learning_rate": 0.00011552502703046493, + "loss": 0.4587, + "step": 161930 + }, + { + "epoch": 6.85, + "learning_rate": 0.00011548262630117239, + "loss": 0.4118, + "step": 161940 + }, + { + "epoch": 6.85, + "learning_rate": 0.00011544022557187984, + "loss": 0.3761, + "step": 161950 + }, + { + "epoch": 6.85, + "learning_rate": 0.00011539782484258729, + "loss": 0.4227, + "step": 161960 + }, + { + "epoch": 6.85, + "learning_rate": 0.00011535542411329476, + "loss": 0.4481, + "step": 161970 + }, + { + "epoch": 6.85, + "learning_rate": 0.00011531302338400221, + "loss": 0.4033, + "step": 161980 + }, + { + "epoch": 6.85, + "learning_rate": 0.00011527062265470967, + "loss": 0.4975, + "step": 161990 + }, + { + "epoch": 6.85, + "learning_rate": 0.00011522822192541712, + "loss": 0.4327, + "step": 162000 + }, + { + "epoch": 6.85, + "learning_rate": 0.00011518582119612457, + "loss": 0.4076, + "step": 162010 + }, + { + "epoch": 6.85, + "learning_rate": 0.00011514342046683204, + "loss": 0.4186, + "step": 162020 + }, + { + "epoch": 6.85, + "learning_rate": 0.00011510101973753948, + "loss": 0.3624, + "step": 162030 + }, + { + "epoch": 6.85, + "learning_rate": 0.00011505861900824695, + "loss": 0.3757, + "step": 162040 + }, + { + "epoch": 6.85, + "learning_rate": 0.00011501621827895441, + "loss": 0.4108, + "step": 162050 + }, + { + "epoch": 6.85, + "learning_rate": 0.00011497381754966185, + "loss": 0.397, + "step": 162060 + }, + { + "epoch": 6.85, + "learning_rate": 0.00011493141682036932, + "loss": 0.4245, + "step": 162070 + }, + { + "epoch": 6.85, + "learning_rate": 0.00011488901609107676, + "loss": 0.4892, + "step": 162080 + }, + { + "epoch": 6.85, + "learning_rate": 0.00011484661536178422, + "loss": 0.4784, + "step": 162090 + }, + { + "epoch": 6.85, + "learning_rate": 0.00011480421463249169, + "loss": 0.3936, + "step": 162100 + }, + { + "epoch": 6.86, + "learning_rate": 0.00011476181390319913, + "loss": 0.3964, + "step": 162110 + }, + { + "epoch": 6.86, + "learning_rate": 0.0001147194131739066, + "loss": 0.2985, + "step": 162120 + }, + { + "epoch": 6.86, + "learning_rate": 0.00011467701244461406, + "loss": 0.3979, + "step": 162130 + }, + { + "epoch": 6.86, + "learning_rate": 0.0001146346117153215, + "loss": 0.4363, + "step": 162140 + }, + { + "epoch": 6.86, + "learning_rate": 0.00011459221098602897, + "loss": 0.4486, + "step": 162150 + }, + { + "epoch": 6.86, + "learning_rate": 0.00011454981025673641, + "loss": 0.3953, + "step": 162160 + }, + { + "epoch": 6.86, + "learning_rate": 0.00011450740952744388, + "loss": 0.3528, + "step": 162170 + }, + { + "epoch": 6.86, + "learning_rate": 0.00011446500879815134, + "loss": 0.3992, + "step": 162180 + }, + { + "epoch": 6.86, + "learning_rate": 0.00011442260806885878, + "loss": 0.4795, + "step": 162190 + }, + { + "epoch": 6.86, + "learning_rate": 0.00011438020733956625, + "loss": 0.3902, + "step": 162200 + }, + { + "epoch": 6.86, + "learning_rate": 0.00011433780661027372, + "loss": 0.394, + "step": 162210 + }, + { + "epoch": 6.86, + "learning_rate": 0.00011429540588098116, + "loss": 0.3972, + "step": 162220 + }, + { + "epoch": 6.86, + "learning_rate": 0.00011425300515168862, + "loss": 0.3891, + "step": 162230 + }, + { + "epoch": 6.86, + "learning_rate": 0.00011421060442239606, + "loss": 0.4387, + "step": 162240 + }, + { + "epoch": 6.86, + "learning_rate": 0.00011416820369310353, + "loss": 0.4759, + "step": 162250 + }, + { + "epoch": 6.86, + "learning_rate": 0.00011412580296381098, + "loss": 0.4014, + "step": 162260 + }, + { + "epoch": 6.86, + "learning_rate": 0.00011408340223451844, + "loss": 0.4274, + "step": 162270 + }, + { + "epoch": 6.86, + "learning_rate": 0.0001140410015052259, + "loss": 0.4149, + "step": 162280 + }, + { + "epoch": 6.86, + "learning_rate": 0.00011399860077593336, + "loss": 0.4046, + "step": 162290 + }, + { + "epoch": 6.86, + "learning_rate": 0.00011395620004664081, + "loss": 0.4047, + "step": 162300 + }, + { + "epoch": 6.86, + "learning_rate": 0.00011391379931734826, + "loss": 0.4413, + "step": 162310 + }, + { + "epoch": 6.86, + "learning_rate": 0.00011387139858805572, + "loss": 0.433, + "step": 162320 + }, + { + "epoch": 6.86, + "learning_rate": 0.00011382899785876317, + "loss": 0.4586, + "step": 162330 + }, + { + "epoch": 6.87, + "learning_rate": 0.00011378659712947064, + "loss": 0.4153, + "step": 162340 + }, + { + "epoch": 6.87, + "learning_rate": 0.00011374419640017808, + "loss": 0.4736, + "step": 162350 + }, + { + "epoch": 6.87, + "learning_rate": 0.00011370179567088554, + "loss": 0.4401, + "step": 162360 + }, + { + "epoch": 6.87, + "learning_rate": 0.00011365939494159301, + "loss": 0.4231, + "step": 162370 + }, + { + "epoch": 6.87, + "learning_rate": 0.00011361699421230045, + "loss": 0.4602, + "step": 162380 + }, + { + "epoch": 6.87, + "learning_rate": 0.00011357459348300792, + "loss": 0.3744, + "step": 162390 + }, + { + "epoch": 6.87, + "learning_rate": 0.00011353219275371536, + "loss": 0.441, + "step": 162400 + }, + { + "epoch": 6.87, + "learning_rate": 0.00011348979202442282, + "loss": 0.466, + "step": 162410 + }, + { + "epoch": 6.87, + "learning_rate": 0.00011344739129513029, + "loss": 0.3235, + "step": 162420 + }, + { + "epoch": 6.87, + "learning_rate": 0.00011340499056583773, + "loss": 0.3576, + "step": 162430 + }, + { + "epoch": 6.87, + "learning_rate": 0.0001133625898365452, + "loss": 0.4398, + "step": 162440 + }, + { + "epoch": 6.87, + "learning_rate": 0.00011332018910725266, + "loss": 0.4316, + "step": 162450 + }, + { + "epoch": 6.87, + "learning_rate": 0.0001132777883779601, + "loss": 0.3896, + "step": 162460 + }, + { + "epoch": 6.87, + "learning_rate": 0.00011323538764866757, + "loss": 0.387, + "step": 162470 + }, + { + "epoch": 6.87, + "learning_rate": 0.00011319298691937501, + "loss": 0.3852, + "step": 162480 + }, + { + "epoch": 6.87, + "learning_rate": 0.00011315058619008248, + "loss": 0.3789, + "step": 162490 + }, + { + "epoch": 6.87, + "learning_rate": 0.00011310818546078994, + "loss": 0.3927, + "step": 162500 + }, + { + "epoch": 6.87, + "learning_rate": 0.00011306578473149738, + "loss": 0.3967, + "step": 162510 + }, + { + "epoch": 6.87, + "learning_rate": 0.00011302338400220485, + "loss": 0.4019, + "step": 162520 + }, + { + "epoch": 6.87, + "learning_rate": 0.00011298098327291229, + "loss": 0.4174, + "step": 162530 + }, + { + "epoch": 6.87, + "learning_rate": 0.00011293858254361976, + "loss": 0.4147, + "step": 162540 + }, + { + "epoch": 6.87, + "learning_rate": 0.00011289618181432722, + "loss": 0.3213, + "step": 162550 + }, + { + "epoch": 6.87, + "learning_rate": 0.00011285378108503466, + "loss": 0.4078, + "step": 162560 + }, + { + "epoch": 6.87, + "learning_rate": 0.00011281138035574213, + "loss": 0.3978, + "step": 162570 + }, + { + "epoch": 6.88, + "learning_rate": 0.00011276897962644958, + "loss": 0.4493, + "step": 162580 + }, + { + "epoch": 6.88, + "learning_rate": 0.00011272657889715704, + "loss": 0.3932, + "step": 162590 + }, + { + "epoch": 6.88, + "learning_rate": 0.0001126841781678645, + "loss": 0.3711, + "step": 162600 + }, + { + "epoch": 6.88, + "learning_rate": 0.00011264177743857194, + "loss": 0.4203, + "step": 162610 + }, + { + "epoch": 6.88, + "learning_rate": 0.00011259937670927941, + "loss": 0.4329, + "step": 162620 + }, + { + "epoch": 6.88, + "learning_rate": 0.00011255697597998686, + "loss": 0.3939, + "step": 162630 + }, + { + "epoch": 6.88, + "learning_rate": 0.00011251457525069432, + "loss": 0.4899, + "step": 162640 + }, + { + "epoch": 6.88, + "learning_rate": 0.00011247217452140177, + "loss": 0.4876, + "step": 162650 + }, + { + "epoch": 6.88, + "learning_rate": 0.00011242977379210924, + "loss": 0.4664, + "step": 162660 + }, + { + "epoch": 6.88, + "learning_rate": 0.00011238737306281668, + "loss": 0.4562, + "step": 162670 + }, + { + "epoch": 6.88, + "learning_rate": 0.00011234497233352414, + "loss": 0.3758, + "step": 162680 + }, + { + "epoch": 6.88, + "learning_rate": 0.00011230257160423158, + "loss": 0.4114, + "step": 162690 + }, + { + "epoch": 6.88, + "learning_rate": 0.00011226017087493905, + "loss": 0.4234, + "step": 162700 + }, + { + "epoch": 6.88, + "learning_rate": 0.00011221777014564652, + "loss": 0.4023, + "step": 162710 + }, + { + "epoch": 6.88, + "learning_rate": 0.00011217536941635395, + "loss": 0.3744, + "step": 162720 + }, + { + "epoch": 6.88, + "learning_rate": 0.00011213296868706142, + "loss": 0.4321, + "step": 162730 + }, + { + "epoch": 6.88, + "learning_rate": 0.00011209056795776889, + "loss": 0.4304, + "step": 162740 + }, + { + "epoch": 6.88, + "learning_rate": 0.00011204816722847633, + "loss": 0.4506, + "step": 162750 + }, + { + "epoch": 6.88, + "learning_rate": 0.0001120057664991838, + "loss": 0.4174, + "step": 162760 + }, + { + "epoch": 6.88, + "learning_rate": 0.00011196336576989123, + "loss": 0.4715, + "step": 162770 + }, + { + "epoch": 6.88, + "learning_rate": 0.0001119209650405987, + "loss": 0.4083, + "step": 162780 + }, + { + "epoch": 6.88, + "learning_rate": 0.00011187856431130617, + "loss": 0.4788, + "step": 162790 + }, + { + "epoch": 6.88, + "learning_rate": 0.00011183616358201361, + "loss": 0.4185, + "step": 162800 + }, + { + "epoch": 6.89, + "learning_rate": 0.00011179376285272107, + "loss": 0.4064, + "step": 162810 + }, + { + "epoch": 6.89, + "learning_rate": 0.00011175136212342854, + "loss": 0.3524, + "step": 162820 + }, + { + "epoch": 6.89, + "learning_rate": 0.00011170896139413598, + "loss": 0.3894, + "step": 162830 + }, + { + "epoch": 6.89, + "learning_rate": 0.00011166656066484345, + "loss": 0.4624, + "step": 162840 + }, + { + "epoch": 6.89, + "learning_rate": 0.00011162415993555089, + "loss": 0.3803, + "step": 162850 + }, + { + "epoch": 6.89, + "learning_rate": 0.00011158175920625835, + "loss": 0.452, + "step": 162860 + }, + { + "epoch": 6.89, + "learning_rate": 0.00011153935847696582, + "loss": 0.456, + "step": 162870 + }, + { + "epoch": 6.89, + "learning_rate": 0.00011149695774767326, + "loss": 0.3846, + "step": 162880 + }, + { + "epoch": 6.89, + "learning_rate": 0.00011145455701838073, + "loss": 0.4696, + "step": 162890 + }, + { + "epoch": 6.89, + "learning_rate": 0.00011141215628908818, + "loss": 0.3727, + "step": 162900 + }, + { + "epoch": 6.89, + "learning_rate": 0.00011136975555979563, + "loss": 0.3912, + "step": 162910 + }, + { + "epoch": 6.89, + "learning_rate": 0.0001113273548305031, + "loss": 0.3799, + "step": 162920 + }, + { + "epoch": 6.89, + "learning_rate": 0.00011128495410121054, + "loss": 0.3391, + "step": 162930 + }, + { + "epoch": 6.89, + "learning_rate": 0.00011124255337191801, + "loss": 0.381, + "step": 162940 + }, + { + "epoch": 6.89, + "learning_rate": 0.00011120015264262546, + "loss": 0.4316, + "step": 162950 + }, + { + "epoch": 6.89, + "learning_rate": 0.00011115775191333291, + "loss": 0.4006, + "step": 162960 + }, + { + "epoch": 6.89, + "learning_rate": 0.00011111535118404037, + "loss": 0.4757, + "step": 162970 + }, + { + "epoch": 6.89, + "learning_rate": 0.00011107295045474783, + "loss": 0.4355, + "step": 162980 + }, + { + "epoch": 6.89, + "learning_rate": 0.00011103054972545527, + "loss": 0.4215, + "step": 162990 + }, + { + "epoch": 6.89, + "learning_rate": 0.00011098814899616274, + "loss": 0.4629, + "step": 163000 + }, + { + "epoch": 6.89, + "learning_rate": 0.00011094574826687018, + "loss": 0.4878, + "step": 163010 + }, + { + "epoch": 6.89, + "learning_rate": 0.00011090334753757765, + "loss": 0.4175, + "step": 163020 + }, + { + "epoch": 6.89, + "learning_rate": 0.00011086094680828511, + "loss": 0.3979, + "step": 163030 + }, + { + "epoch": 6.89, + "learning_rate": 0.00011081854607899255, + "loss": 0.3632, + "step": 163040 + }, + { + "epoch": 6.9, + "learning_rate": 0.00011077614534970002, + "loss": 0.411, + "step": 163050 + }, + { + "epoch": 6.9, + "learning_rate": 0.00011073374462040749, + "loss": 0.4222, + "step": 163060 + }, + { + "epoch": 6.9, + "learning_rate": 0.00011069134389111493, + "loss": 0.4223, + "step": 163070 + }, + { + "epoch": 6.9, + "learning_rate": 0.0001106489431618224, + "loss": 0.4233, + "step": 163080 + }, + { + "epoch": 6.9, + "learning_rate": 0.00011060654243252983, + "loss": 0.3864, + "step": 163090 + }, + { + "epoch": 6.9, + "learning_rate": 0.0001105641417032373, + "loss": 0.3348, + "step": 163100 + }, + { + "epoch": 6.9, + "learning_rate": 0.00011052174097394477, + "loss": 0.4167, + "step": 163110 + }, + { + "epoch": 6.9, + "learning_rate": 0.0001104793402446522, + "loss": 0.4564, + "step": 163120 + }, + { + "epoch": 6.9, + "learning_rate": 0.00011043693951535967, + "loss": 0.4776, + "step": 163130 + }, + { + "epoch": 6.9, + "learning_rate": 0.00011039453878606714, + "loss": 0.4068, + "step": 163140 + }, + { + "epoch": 6.9, + "learning_rate": 0.00011035213805677458, + "loss": 0.4089, + "step": 163150 + }, + { + "epoch": 6.9, + "learning_rate": 0.00011030973732748205, + "loss": 0.3361, + "step": 163160 + }, + { + "epoch": 6.9, + "learning_rate": 0.00011026733659818949, + "loss": 0.4712, + "step": 163170 + }, + { + "epoch": 6.9, + "learning_rate": 0.00011022493586889695, + "loss": 0.4372, + "step": 163180 + }, + { + "epoch": 6.9, + "learning_rate": 0.00011018253513960442, + "loss": 0.4144, + "step": 163190 + }, + { + "epoch": 6.9, + "learning_rate": 0.00011014013441031186, + "loss": 0.4042, + "step": 163200 + }, + { + "epoch": 6.9, + "learning_rate": 0.00011009773368101933, + "loss": 0.4799, + "step": 163210 + }, + { + "epoch": 6.9, + "learning_rate": 0.00011005533295172677, + "loss": 0.4231, + "step": 163220 + }, + { + "epoch": 6.9, + "learning_rate": 0.00011001293222243423, + "loss": 0.2971, + "step": 163230 + }, + { + "epoch": 6.9, + "learning_rate": 0.0001099705314931417, + "loss": 0.4183, + "step": 163240 + }, + { + "epoch": 6.9, + "learning_rate": 0.00010992813076384914, + "loss": 0.4522, + "step": 163250 + }, + { + "epoch": 6.9, + "learning_rate": 0.0001098857300345566, + "loss": 0.4334, + "step": 163260 + }, + { + "epoch": 6.9, + "learning_rate": 0.00010984332930526406, + "loss": 0.3781, + "step": 163270 + }, + { + "epoch": 6.9, + "learning_rate": 0.00010980092857597151, + "loss": 0.3935, + "step": 163280 + }, + { + "epoch": 6.91, + "learning_rate": 0.00010975852784667897, + "loss": 0.4039, + "step": 163290 + }, + { + "epoch": 6.91, + "learning_rate": 0.00010971612711738642, + "loss": 0.4406, + "step": 163300 + }, + { + "epoch": 6.91, + "learning_rate": 0.00010967372638809387, + "loss": 0.4856, + "step": 163310 + }, + { + "epoch": 6.91, + "learning_rate": 0.00010963132565880134, + "loss": 0.4007, + "step": 163320 + }, + { + "epoch": 6.91, + "learning_rate": 0.00010958892492950878, + "loss": 0.408, + "step": 163330 + }, + { + "epoch": 6.91, + "learning_rate": 0.00010954652420021625, + "loss": 0.455, + "step": 163340 + }, + { + "epoch": 6.91, + "learning_rate": 0.00010950412347092371, + "loss": 0.4144, + "step": 163350 + }, + { + "epoch": 6.91, + "learning_rate": 0.00010946172274163115, + "loss": 0.4386, + "step": 163360 + }, + { + "epoch": 6.91, + "learning_rate": 0.00010941932201233862, + "loss": 0.4312, + "step": 163370 + }, + { + "epoch": 6.91, + "learning_rate": 0.00010937692128304606, + "loss": 0.381, + "step": 163380 + }, + { + "epoch": 6.91, + "learning_rate": 0.00010933452055375352, + "loss": 0.3705, + "step": 163390 + }, + { + "epoch": 6.91, + "learning_rate": 0.00010929211982446099, + "loss": 0.3503, + "step": 163400 + }, + { + "epoch": 6.91, + "learning_rate": 0.00010924971909516843, + "loss": 0.4344, + "step": 163410 + }, + { + "epoch": 6.91, + "learning_rate": 0.0001092073183658759, + "loss": 0.4151, + "step": 163420 + }, + { + "epoch": 6.91, + "learning_rate": 0.00010916491763658336, + "loss": 0.3877, + "step": 163430 + }, + { + "epoch": 6.91, + "learning_rate": 0.0001091225169072908, + "loss": 0.4096, + "step": 163440 + }, + { + "epoch": 6.91, + "learning_rate": 0.00010908011617799827, + "loss": 0.3702, + "step": 163450 + }, + { + "epoch": 6.91, + "learning_rate": 0.00010903771544870571, + "loss": 0.3954, + "step": 163460 + }, + { + "epoch": 6.91, + "learning_rate": 0.00010899531471941318, + "loss": 0.4581, + "step": 163470 + }, + { + "epoch": 6.91, + "learning_rate": 0.00010895291399012064, + "loss": 0.3823, + "step": 163480 + }, + { + "epoch": 6.91, + "learning_rate": 0.00010891051326082808, + "loss": 0.4085, + "step": 163490 + }, + { + "epoch": 6.91, + "learning_rate": 0.00010886811253153555, + "loss": 0.4403, + "step": 163500 + }, + { + "epoch": 6.91, + "learning_rate": 0.00010882571180224302, + "loss": 0.4557, + "step": 163510 + }, + { + "epoch": 6.92, + "learning_rate": 0.00010878331107295046, + "loss": 0.3916, + "step": 163520 + }, + { + "epoch": 6.92, + "learning_rate": 0.00010874091034365792, + "loss": 0.3904, + "step": 163530 + }, + { + "epoch": 6.92, + "learning_rate": 0.00010869850961436536, + "loss": 0.4107, + "step": 163540 + }, + { + "epoch": 6.92, + "learning_rate": 0.00010865610888507283, + "loss": 0.3764, + "step": 163550 + }, + { + "epoch": 6.92, + "learning_rate": 0.0001086137081557803, + "loss": 0.4031, + "step": 163560 + }, + { + "epoch": 6.92, + "learning_rate": 0.00010857130742648774, + "loss": 0.4384, + "step": 163570 + }, + { + "epoch": 6.92, + "learning_rate": 0.0001085289066971952, + "loss": 0.4087, + "step": 163580 + }, + { + "epoch": 6.92, + "learning_rate": 0.00010848650596790266, + "loss": 0.4064, + "step": 163590 + }, + { + "epoch": 6.92, + "learning_rate": 0.00010844410523861011, + "loss": 0.4826, + "step": 163600 + }, + { + "epoch": 6.92, + "learning_rate": 0.00010840170450931756, + "loss": 0.4048, + "step": 163610 + }, + { + "epoch": 6.92, + "learning_rate": 0.00010835930378002502, + "loss": 0.4183, + "step": 163620 + }, + { + "epoch": 6.92, + "learning_rate": 0.00010831690305073247, + "loss": 0.4324, + "step": 163630 + }, + { + "epoch": 6.92, + "learning_rate": 0.00010827450232143994, + "loss": 0.3959, + "step": 163640 + }, + { + "epoch": 6.92, + "learning_rate": 0.00010823210159214738, + "loss": 0.3588, + "step": 163650 + }, + { + "epoch": 6.92, + "learning_rate": 0.00010818970086285484, + "loss": 0.4751, + "step": 163660 + }, + { + "epoch": 6.92, + "learning_rate": 0.00010814730013356231, + "loss": 0.3934, + "step": 163670 + }, + { + "epoch": 6.92, + "learning_rate": 0.00010810489940426975, + "loss": 0.4515, + "step": 163680 + }, + { + "epoch": 6.92, + "learning_rate": 0.00010806249867497722, + "loss": 0.4145, + "step": 163690 + }, + { + "epoch": 6.92, + "learning_rate": 0.00010802009794568466, + "loss": 0.3632, + "step": 163700 + }, + { + "epoch": 6.92, + "learning_rate": 0.00010797769721639212, + "loss": 0.3606, + "step": 163710 + }, + { + "epoch": 6.92, + "learning_rate": 0.00010793529648709959, + "loss": 0.4412, + "step": 163720 + }, + { + "epoch": 6.92, + "learning_rate": 0.00010789289575780703, + "loss": 0.3875, + "step": 163730 + }, + { + "epoch": 6.92, + "learning_rate": 0.0001078504950285145, + "loss": 0.4931, + "step": 163740 + }, + { + "epoch": 6.92, + "learning_rate": 0.00010780809429922196, + "loss": 0.4473, + "step": 163750 + }, + { + "epoch": 6.93, + "learning_rate": 0.0001077656935699294, + "loss": 0.3961, + "step": 163760 + }, + { + "epoch": 6.93, + "learning_rate": 0.00010772329284063687, + "loss": 0.4438, + "step": 163770 + }, + { + "epoch": 6.93, + "learning_rate": 0.00010768089211134431, + "loss": 0.4414, + "step": 163780 + }, + { + "epoch": 6.93, + "learning_rate": 0.00010763849138205178, + "loss": 0.4168, + "step": 163790 + }, + { + "epoch": 6.93, + "learning_rate": 0.00010759609065275924, + "loss": 0.4724, + "step": 163800 + }, + { + "epoch": 6.93, + "learning_rate": 0.00010755368992346668, + "loss": 0.364, + "step": 163810 + }, + { + "epoch": 6.93, + "learning_rate": 0.00010751128919417415, + "loss": 0.3987, + "step": 163820 + }, + { + "epoch": 6.93, + "learning_rate": 0.00010746888846488162, + "loss": 0.3828, + "step": 163830 + }, + { + "epoch": 6.93, + "learning_rate": 0.00010742648773558906, + "loss": 0.4406, + "step": 163840 + }, + { + "epoch": 6.93, + "learning_rate": 0.00010738408700629652, + "loss": 0.4265, + "step": 163850 + }, + { + "epoch": 6.93, + "learning_rate": 0.00010734168627700396, + "loss": 0.414, + "step": 163860 + }, + { + "epoch": 6.93, + "learning_rate": 0.00010729928554771143, + "loss": 0.4672, + "step": 163870 + }, + { + "epoch": 6.93, + "learning_rate": 0.0001072568848184189, + "loss": 0.3902, + "step": 163880 + }, + { + "epoch": 6.93, + "learning_rate": 0.00010721448408912634, + "loss": 0.4526, + "step": 163890 + }, + { + "epoch": 6.93, + "learning_rate": 0.0001071720833598338, + "loss": 0.4193, + "step": 163900 + }, + { + "epoch": 6.93, + "learning_rate": 0.00010712968263054126, + "loss": 0.4014, + "step": 163910 + }, + { + "epoch": 6.93, + "learning_rate": 0.00010708728190124871, + "loss": 0.4199, + "step": 163920 + }, + { + "epoch": 6.93, + "learning_rate": 0.00010704488117195616, + "loss": 0.4005, + "step": 163930 + }, + { + "epoch": 6.93, + "learning_rate": 0.00010700248044266362, + "loss": 0.4984, + "step": 163940 + }, + { + "epoch": 6.93, + "learning_rate": 0.00010696007971337107, + "loss": 0.3383, + "step": 163950 + }, + { + "epoch": 6.93, + "learning_rate": 0.00010691767898407854, + "loss": 0.4898, + "step": 163960 + }, + { + "epoch": 6.93, + "learning_rate": 0.00010687527825478599, + "loss": 0.4532, + "step": 163970 + }, + { + "epoch": 6.93, + "learning_rate": 0.00010683287752549344, + "loss": 0.4047, + "step": 163980 + }, + { + "epoch": 6.93, + "learning_rate": 0.0001067904767962009, + "loss": 0.4131, + "step": 163990 + }, + { + "epoch": 6.94, + "learning_rate": 0.00010674807606690835, + "loss": 0.3795, + "step": 164000 + }, + { + "epoch": 6.94, + "learning_rate": 0.00010670567533761582, + "loss": 0.3664, + "step": 164010 + }, + { + "epoch": 6.94, + "learning_rate": 0.00010666327460832326, + "loss": 0.3825, + "step": 164020 + }, + { + "epoch": 6.94, + "learning_rate": 0.00010662087387903072, + "loss": 0.4579, + "step": 164030 + }, + { + "epoch": 6.94, + "learning_rate": 0.00010657847314973819, + "loss": 0.4279, + "step": 164040 + }, + { + "epoch": 6.94, + "learning_rate": 0.00010653607242044563, + "loss": 0.423, + "step": 164050 + }, + { + "epoch": 6.94, + "learning_rate": 0.0001064936716911531, + "loss": 0.4133, + "step": 164060 + }, + { + "epoch": 6.94, + "learning_rate": 0.00010645127096186053, + "loss": 0.3689, + "step": 164070 + }, + { + "epoch": 6.94, + "learning_rate": 0.000106408870232568, + "loss": 0.4616, + "step": 164080 + }, + { + "epoch": 6.94, + "learning_rate": 0.00010636646950327547, + "loss": 0.3983, + "step": 164090 + }, + { + "epoch": 6.94, + "learning_rate": 0.00010632406877398291, + "loss": 0.4006, + "step": 164100 + }, + { + "epoch": 6.94, + "learning_rate": 0.00010628166804469037, + "loss": 0.3724, + "step": 164110 + }, + { + "epoch": 6.94, + "learning_rate": 0.00010623926731539784, + "loss": 0.354, + "step": 164120 + }, + { + "epoch": 6.94, + "learning_rate": 0.00010619686658610528, + "loss": 0.4484, + "step": 164130 + }, + { + "epoch": 6.94, + "learning_rate": 0.00010615446585681275, + "loss": 0.4153, + "step": 164140 + }, + { + "epoch": 6.94, + "learning_rate": 0.00010611206512752019, + "loss": 0.4233, + "step": 164150 + }, + { + "epoch": 6.94, + "learning_rate": 0.00010606966439822765, + "loss": 0.4312, + "step": 164160 + }, + { + "epoch": 6.94, + "learning_rate": 0.00010602726366893512, + "loss": 0.4936, + "step": 164170 + }, + { + "epoch": 6.94, + "learning_rate": 0.00010598486293964256, + "loss": 0.3604, + "step": 164180 + }, + { + "epoch": 6.94, + "learning_rate": 0.00010594246221035003, + "loss": 0.4277, + "step": 164190 + }, + { + "epoch": 6.94, + "learning_rate": 0.0001059000614810575, + "loss": 0.3834, + "step": 164200 + }, + { + "epoch": 6.94, + "learning_rate": 0.00010585766075176493, + "loss": 0.3535, + "step": 164210 + }, + { + "epoch": 6.94, + "learning_rate": 0.0001058152600224724, + "loss": 0.4771, + "step": 164220 + }, + { + "epoch": 6.95, + "learning_rate": 0.00010577285929317984, + "loss": 0.4385, + "step": 164230 + }, + { + "epoch": 6.95, + "learning_rate": 0.00010573045856388731, + "loss": 0.4364, + "step": 164240 + }, + { + "epoch": 6.95, + "learning_rate": 0.00010568805783459476, + "loss": 0.41, + "step": 164250 + }, + { + "epoch": 6.95, + "learning_rate": 0.00010564565710530221, + "loss": 0.3948, + "step": 164260 + }, + { + "epoch": 6.95, + "learning_rate": 0.00010560325637600967, + "loss": 0.486, + "step": 164270 + }, + { + "epoch": 6.95, + "learning_rate": 0.00010556085564671713, + "loss": 0.5081, + "step": 164280 + }, + { + "epoch": 6.95, + "learning_rate": 0.00010551845491742459, + "loss": 0.4028, + "step": 164290 + }, + { + "epoch": 6.95, + "learning_rate": 0.00010547605418813204, + "loss": 0.4751, + "step": 164300 + }, + { + "epoch": 6.95, + "learning_rate": 0.0001054336534588395, + "loss": 0.4355, + "step": 164310 + }, + { + "epoch": 6.95, + "learning_rate": 0.00010539125272954695, + "loss": 0.3917, + "step": 164320 + }, + { + "epoch": 6.95, + "learning_rate": 0.00010534885200025441, + "loss": 0.3667, + "step": 164330 + }, + { + "epoch": 6.95, + "learning_rate": 0.00010530645127096185, + "loss": 0.4028, + "step": 164340 + }, + { + "epoch": 6.95, + "learning_rate": 0.00010526405054166932, + "loss": 0.3937, + "step": 164350 + }, + { + "epoch": 6.95, + "learning_rate": 0.00010522164981237679, + "loss": 0.4871, + "step": 164360 + }, + { + "epoch": 6.95, + "learning_rate": 0.00010517924908308423, + "loss": 0.43, + "step": 164370 + }, + { + "epoch": 6.95, + "learning_rate": 0.0001051368483537917, + "loss": 0.4569, + "step": 164380 + }, + { + "epoch": 6.95, + "learning_rate": 0.00010509444762449913, + "loss": 0.4184, + "step": 164390 + }, + { + "epoch": 6.95, + "learning_rate": 0.0001050520468952066, + "loss": 0.4332, + "step": 164400 + }, + { + "epoch": 6.95, + "learning_rate": 0.00010500964616591407, + "loss": 0.3968, + "step": 164410 + }, + { + "epoch": 6.95, + "learning_rate": 0.0001049672454366215, + "loss": 0.3462, + "step": 164420 + }, + { + "epoch": 6.95, + "learning_rate": 0.00010492484470732897, + "loss": 0.4177, + "step": 164430 + }, + { + "epoch": 6.95, + "learning_rate": 0.00010488244397803644, + "loss": 0.4121, + "step": 164440 + }, + { + "epoch": 6.95, + "learning_rate": 0.00010484004324874388, + "loss": 0.4459, + "step": 164450 + }, + { + "epoch": 6.95, + "learning_rate": 0.00010479764251945135, + "loss": 0.42, + "step": 164460 + }, + { + "epoch": 6.96, + "learning_rate": 0.00010475524179015879, + "loss": 0.5086, + "step": 164470 + }, + { + "epoch": 6.96, + "learning_rate": 0.00010471284106086625, + "loss": 0.4314, + "step": 164480 + }, + { + "epoch": 6.96, + "learning_rate": 0.00010467044033157372, + "loss": 0.3588, + "step": 164490 + }, + { + "epoch": 6.96, + "learning_rate": 0.00010462803960228116, + "loss": 0.4637, + "step": 164500 + }, + { + "epoch": 6.96, + "learning_rate": 0.00010458563887298863, + "loss": 0.4369, + "step": 164510 + }, + { + "epoch": 6.96, + "learning_rate": 0.00010454323814369609, + "loss": 0.4329, + "step": 164520 + }, + { + "epoch": 6.96, + "learning_rate": 0.00010450083741440353, + "loss": 0.4363, + "step": 164530 + }, + { + "epoch": 6.96, + "learning_rate": 0.000104458436685111, + "loss": 0.3831, + "step": 164540 + }, + { + "epoch": 6.96, + "learning_rate": 0.00010441603595581844, + "loss": 0.4413, + "step": 164550 + }, + { + "epoch": 6.96, + "learning_rate": 0.0001043736352265259, + "loss": 0.4121, + "step": 164560 + }, + { + "epoch": 6.96, + "learning_rate": 0.00010433123449723336, + "loss": 0.4134, + "step": 164570 + }, + { + "epoch": 6.96, + "learning_rate": 0.00010428883376794081, + "loss": 0.4358, + "step": 164580 + }, + { + "epoch": 6.96, + "learning_rate": 0.00010424643303864827, + "loss": 0.4656, + "step": 164590 + }, + { + "epoch": 6.96, + "learning_rate": 0.00010420403230935573, + "loss": 0.4453, + "step": 164600 + }, + { + "epoch": 6.96, + "learning_rate": 0.00010416163158006319, + "loss": 0.3493, + "step": 164610 + }, + { + "epoch": 6.96, + "learning_rate": 0.00010411923085077064, + "loss": 0.426, + "step": 164620 + }, + { + "epoch": 6.96, + "learning_rate": 0.00010407683012147809, + "loss": 0.4198, + "step": 164630 + }, + { + "epoch": 6.96, + "learning_rate": 0.00010403442939218555, + "loss": 0.4355, + "step": 164640 + }, + { + "epoch": 6.96, + "learning_rate": 0.00010399202866289301, + "loss": 0.4037, + "step": 164650 + }, + { + "epoch": 6.96, + "learning_rate": 0.00010394962793360045, + "loss": 0.5075, + "step": 164660 + }, + { + "epoch": 6.96, + "learning_rate": 0.00010390722720430792, + "loss": 0.3866, + "step": 164670 + }, + { + "epoch": 6.96, + "learning_rate": 0.00010386482647501536, + "loss": 0.3899, + "step": 164680 + }, + { + "epoch": 6.96, + "learning_rate": 0.00010382242574572283, + "loss": 0.4065, + "step": 164690 + }, + { + "epoch": 6.96, + "learning_rate": 0.00010378002501643029, + "loss": 0.3803, + "step": 164700 + }, + { + "epoch": 6.97, + "learning_rate": 0.00010373762428713773, + "loss": 0.4706, + "step": 164710 + }, + { + "epoch": 6.97, + "learning_rate": 0.0001036952235578452, + "loss": 0.5217, + "step": 164720 + }, + { + "epoch": 6.97, + "learning_rate": 0.00010365282282855267, + "loss": 0.3844, + "step": 164730 + }, + { + "epoch": 6.97, + "learning_rate": 0.0001036104220992601, + "loss": 0.4103, + "step": 164740 + }, + { + "epoch": 6.97, + "learning_rate": 0.00010356802136996757, + "loss": 0.4562, + "step": 164750 + }, + { + "epoch": 6.97, + "learning_rate": 0.00010352562064067501, + "loss": 0.447, + "step": 164760 + }, + { + "epoch": 6.97, + "learning_rate": 0.00010348321991138248, + "loss": 0.4476, + "step": 164770 + }, + { + "epoch": 6.97, + "learning_rate": 0.00010344081918208994, + "loss": 0.496, + "step": 164780 + }, + { + "epoch": 6.97, + "learning_rate": 0.00010339841845279738, + "loss": 0.3871, + "step": 164790 + }, + { + "epoch": 6.97, + "learning_rate": 0.00010335601772350485, + "loss": 0.3957, + "step": 164800 + }, + { + "epoch": 6.97, + "learning_rate": 0.00010331361699421232, + "loss": 0.4218, + "step": 164810 + }, + { + "epoch": 6.97, + "learning_rate": 0.00010327121626491976, + "loss": 0.4064, + "step": 164820 + }, + { + "epoch": 6.97, + "learning_rate": 0.00010322881553562722, + "loss": 0.3502, + "step": 164830 + }, + { + "epoch": 6.97, + "learning_rate": 0.00010318641480633466, + "loss": 0.4369, + "step": 164840 + }, + { + "epoch": 6.97, + "learning_rate": 0.00010314401407704213, + "loss": 0.4025, + "step": 164850 + }, + { + "epoch": 6.97, + "learning_rate": 0.0001031016133477496, + "loss": 0.3998, + "step": 164860 + }, + { + "epoch": 6.97, + "learning_rate": 0.00010305921261845704, + "loss": 0.3435, + "step": 164870 + }, + { + "epoch": 6.97, + "learning_rate": 0.0001030168118891645, + "loss": 0.4234, + "step": 164880 + }, + { + "epoch": 6.97, + "learning_rate": 0.00010297441115987196, + "loss": 0.4373, + "step": 164890 + }, + { + "epoch": 6.97, + "learning_rate": 0.00010293201043057941, + "loss": 0.4138, + "step": 164900 + }, + { + "epoch": 6.97, + "learning_rate": 0.00010288960970128688, + "loss": 0.4081, + "step": 164910 + }, + { + "epoch": 6.97, + "learning_rate": 0.00010284720897199432, + "loss": 0.4506, + "step": 164920 + }, + { + "epoch": 6.97, + "learning_rate": 0.00010280480824270178, + "loss": 0.4169, + "step": 164930 + }, + { + "epoch": 6.98, + "learning_rate": 0.00010276240751340924, + "loss": 0.4653, + "step": 164940 + }, + { + "epoch": 6.98, + "learning_rate": 0.00010272000678411669, + "loss": 0.4019, + "step": 164950 + }, + { + "epoch": 6.98, + "learning_rate": 0.00010267760605482414, + "loss": 0.5007, + "step": 164960 + }, + { + "epoch": 6.98, + "learning_rate": 0.00010263520532553161, + "loss": 0.4669, + "step": 164970 + }, + { + "epoch": 6.98, + "learning_rate": 0.00010259280459623905, + "loss": 0.4867, + "step": 164980 + }, + { + "epoch": 6.98, + "learning_rate": 0.00010255040386694652, + "loss": 0.3927, + "step": 164990 + }, + { + "epoch": 6.98, + "learning_rate": 0.00010250800313765396, + "loss": 0.3938, + "step": 165000 + }, + { + "epoch": 6.98, + "learning_rate": 0.00010246560240836142, + "loss": 0.4583, + "step": 165010 + }, + { + "epoch": 6.98, + "learning_rate": 0.00010242320167906889, + "loss": 0.3938, + "step": 165020 + }, + { + "epoch": 6.98, + "learning_rate": 0.00010238080094977633, + "loss": 0.4441, + "step": 165030 + }, + { + "epoch": 6.98, + "learning_rate": 0.0001023384002204838, + "loss": 0.4073, + "step": 165040 + }, + { + "epoch": 6.98, + "learning_rate": 0.00010229599949119126, + "loss": 0.3919, + "step": 165050 + }, + { + "epoch": 6.98, + "learning_rate": 0.0001022535987618987, + "loss": 0.4077, + "step": 165060 + }, + { + "epoch": 6.98, + "learning_rate": 0.00010221119803260617, + "loss": 0.3734, + "step": 165070 + }, + { + "epoch": 6.98, + "learning_rate": 0.00010216879730331361, + "loss": 0.3839, + "step": 165080 + }, + { + "epoch": 6.98, + "learning_rate": 0.00010212639657402108, + "loss": 0.4407, + "step": 165090 + }, + { + "epoch": 6.98, + "learning_rate": 0.00010208399584472854, + "loss": 0.4247, + "step": 165100 + }, + { + "epoch": 6.98, + "learning_rate": 0.00010204159511543598, + "loss": 0.4197, + "step": 165110 + }, + { + "epoch": 6.98, + "learning_rate": 0.00010199919438614345, + "loss": 0.3818, + "step": 165120 + }, + { + "epoch": 6.98, + "learning_rate": 0.00010195679365685092, + "loss": 0.3594, + "step": 165130 + }, + { + "epoch": 6.98, + "learning_rate": 0.00010191439292755836, + "loss": 0.4552, + "step": 165140 + }, + { + "epoch": 6.98, + "learning_rate": 0.00010187199219826582, + "loss": 0.4619, + "step": 165150 + }, + { + "epoch": 6.98, + "learning_rate": 0.00010182959146897326, + "loss": 0.4532, + "step": 165160 + }, + { + "epoch": 6.98, + "learning_rate": 0.00010178719073968073, + "loss": 0.4466, + "step": 165170 + }, + { + "epoch": 6.99, + "learning_rate": 0.0001017447900103882, + "loss": 0.4273, + "step": 165180 + }, + { + "epoch": 6.99, + "learning_rate": 0.00010170238928109564, + "loss": 0.3453, + "step": 165190 + }, + { + "epoch": 6.99, + "learning_rate": 0.0001016599885518031, + "loss": 0.4605, + "step": 165200 + }, + { + "epoch": 6.99, + "learning_rate": 0.00010161758782251056, + "loss": 0.396, + "step": 165210 + }, + { + "epoch": 6.99, + "learning_rate": 0.00010157518709321801, + "loss": 0.419, + "step": 165220 + }, + { + "epoch": 6.99, + "learning_rate": 0.00010153278636392548, + "loss": 0.4148, + "step": 165230 + }, + { + "epoch": 6.99, + "learning_rate": 0.00010149038563463292, + "loss": 0.3956, + "step": 165240 + }, + { + "epoch": 6.99, + "learning_rate": 0.00010144798490534038, + "loss": 0.3899, + "step": 165250 + }, + { + "epoch": 6.99, + "learning_rate": 0.00010140558417604784, + "loss": 0.4139, + "step": 165260 + }, + { + "epoch": 6.99, + "learning_rate": 0.00010136318344675529, + "loss": 0.4622, + "step": 165270 + }, + { + "epoch": 6.99, + "learning_rate": 0.00010132078271746274, + "loss": 0.3957, + "step": 165280 + }, + { + "epoch": 6.99, + "learning_rate": 0.00010127838198817021, + "loss": 0.3525, + "step": 165290 + }, + { + "epoch": 6.99, + "learning_rate": 0.00010123598125887765, + "loss": 0.337, + "step": 165300 + }, + { + "epoch": 6.99, + "learning_rate": 0.00010119358052958512, + "loss": 0.4205, + "step": 165310 + }, + { + "epoch": 6.99, + "learning_rate": 0.00010115117980029256, + "loss": 0.4639, + "step": 165320 + }, + { + "epoch": 6.99, + "learning_rate": 0.00010110877907100002, + "loss": 0.397, + "step": 165330 + }, + { + "epoch": 6.99, + "learning_rate": 0.00010106637834170749, + "loss": 0.4336, + "step": 165340 + }, + { + "epoch": 6.99, + "learning_rate": 0.00010102397761241493, + "loss": 0.4232, + "step": 165350 + }, + { + "epoch": 6.99, + "learning_rate": 0.0001009815768831224, + "loss": 0.3539, + "step": 165360 + }, + { + "epoch": 6.99, + "learning_rate": 0.00010093917615382983, + "loss": 0.3643, + "step": 165370 + }, + { + "epoch": 6.99, + "learning_rate": 0.0001008967754245373, + "loss": 0.4238, + "step": 165380 + }, + { + "epoch": 6.99, + "learning_rate": 0.00010085437469524477, + "loss": 0.408, + "step": 165390 + }, + { + "epoch": 6.99, + "learning_rate": 0.00010081197396595221, + "loss": 0.4204, + "step": 165400 + }, + { + "epoch": 6.99, + "learning_rate": 0.00010076957323665968, + "loss": 0.519, + "step": 165410 + }, + { + "epoch": 7.0, + "learning_rate": 0.00010072717250736714, + "loss": 0.3761, + "step": 165420 + }, + { + "epoch": 7.0, + "learning_rate": 0.00010068477177807458, + "loss": 0.3484, + "step": 165430 + }, + { + "epoch": 7.0, + "learning_rate": 0.00010064237104878205, + "loss": 0.4608, + "step": 165440 + }, + { + "epoch": 7.0, + "learning_rate": 0.00010059997031948949, + "loss": 0.4223, + "step": 165450 + }, + { + "epoch": 7.0, + "learning_rate": 0.00010055756959019695, + "loss": 0.372, + "step": 165460 + }, + { + "epoch": 7.0, + "learning_rate": 0.00010051516886090442, + "loss": 0.4247, + "step": 165470 + }, + { + "epoch": 7.0, + "learning_rate": 0.00010047276813161186, + "loss": 0.41, + "step": 165480 + }, + { + "epoch": 7.0, + "learning_rate": 0.00010043036740231933, + "loss": 0.3412, + "step": 165490 + }, + { + "epoch": 7.0, + "learning_rate": 0.0001003879666730268, + "loss": 0.4307, + "step": 165500 + }, + { + "epoch": 7.0, + "learning_rate": 0.00010034556594373423, + "loss": 0.413, + "step": 165510 + }, + { + "epoch": 7.0, + "learning_rate": 0.0001003031652144417, + "loss": 0.3909, + "step": 165520 + }, + { + "epoch": 7.0, + "learning_rate": 0.00010026076448514914, + "loss": 0.4391, + "step": 165530 + }, + { + "epoch": 7.0, + "learning_rate": 0.00010021836375585661, + "loss": 0.4065, + "step": 165540 + }, + { + "epoch": 7.0, + "learning_rate": 0.00010017596302656407, + "loss": 0.4402, + "step": 165550 + }, + { + "epoch": 7.0, + "learning_rate": 0.00010013356229727151, + "loss": 0.3629, + "step": 165560 + }, + { + "epoch": 7.0, + "learning_rate": 0.00010009116156797898, + "loss": 0.4048, + "step": 165570 + }, + { + "epoch": 7.0, + "learning_rate": 0.00010004876083868643, + "loss": 0.2991, + "step": 165580 + }, + { + "epoch": 7.0, + "learning_rate": 0.00010000636010939389, + "loss": 0.3781, + "step": 165590 + }, + { + "epoch": 7.0, + "learning_rate": 9.996395938010134e-05, + "loss": 0.3547, + "step": 165600 + }, + { + "epoch": 7.0, + "learning_rate": 9.99215586508088e-05, + "loss": 0.3889, + "step": 165610 + }, + { + "epoch": 7.0, + "learning_rate": 9.987915792151625e-05, + "loss": 0.3725, + "step": 165620 + }, + { + "epoch": 7.0, + "learning_rate": 9.98367571922237e-05, + "loss": 0.374, + "step": 165630 + }, + { + "epoch": 7.0, + "learning_rate": 9.979435646293117e-05, + "loss": 0.3733, + "step": 165640 + }, + { + "epoch": 7.01, + "learning_rate": 9.975195573363862e-05, + "loss": 0.344, + "step": 165650 + }, + { + "epoch": 7.01, + "learning_rate": 9.970955500434607e-05, + "loss": 0.4012, + "step": 165660 + }, + { + "epoch": 7.01, + "learning_rate": 9.966715427505353e-05, + "loss": 0.3826, + "step": 165670 + }, + { + "epoch": 7.01, + "learning_rate": 9.9624753545761e-05, + "loss": 0.3475, + "step": 165680 + }, + { + "epoch": 7.01, + "learning_rate": 9.958235281646845e-05, + "loss": 0.3278, + "step": 165690 + }, + { + "epoch": 7.01, + "learning_rate": 9.95399520871759e-05, + "loss": 0.3751, + "step": 165700 + }, + { + "epoch": 7.01, + "learning_rate": 9.949755135788335e-05, + "loss": 0.3684, + "step": 165710 + }, + { + "epoch": 7.01, + "learning_rate": 9.945515062859082e-05, + "loss": 0.3428, + "step": 165720 + }, + { + "epoch": 7.01, + "learning_rate": 9.941274989929827e-05, + "loss": 0.3092, + "step": 165730 + }, + { + "epoch": 7.01, + "learning_rate": 9.937034917000573e-05, + "loss": 0.4498, + "step": 165740 + }, + { + "epoch": 7.01, + "learning_rate": 9.932794844071318e-05, + "loss": 0.395, + "step": 165750 + }, + { + "epoch": 7.01, + "learning_rate": 9.928554771142065e-05, + "loss": 0.4298, + "step": 165760 + }, + { + "epoch": 7.01, + "learning_rate": 9.92431469821281e-05, + "loss": 0.3868, + "step": 165770 + }, + { + "epoch": 7.01, + "learning_rate": 9.920074625283555e-05, + "loss": 0.3774, + "step": 165780 + }, + { + "epoch": 7.01, + "learning_rate": 9.9158345523543e-05, + "loss": 0.4206, + "step": 165790 + }, + { + "epoch": 7.01, + "learning_rate": 9.911594479425047e-05, + "loss": 0.3816, + "step": 165800 + }, + { + "epoch": 7.01, + "learning_rate": 9.907354406495793e-05, + "loss": 0.3771, + "step": 165810 + }, + { + "epoch": 7.01, + "learning_rate": 9.903114333566538e-05, + "loss": 0.3843, + "step": 165820 + }, + { + "epoch": 7.01, + "learning_rate": 9.898874260637283e-05, + "loss": 0.3739, + "step": 165830 + }, + { + "epoch": 7.01, + "learning_rate": 9.89463418770803e-05, + "loss": 0.353, + "step": 165840 + }, + { + "epoch": 7.01, + "learning_rate": 9.890394114778775e-05, + "loss": 0.4054, + "step": 165850 + }, + { + "epoch": 7.01, + "learning_rate": 9.88615404184952e-05, + "loss": 0.332, + "step": 165860 + }, + { + "epoch": 7.01, + "learning_rate": 9.881913968920266e-05, + "loss": 0.3153, + "step": 165870 + }, + { + "epoch": 7.01, + "learning_rate": 9.877673895991013e-05, + "loss": 0.341, + "step": 165880 + }, + { + "epoch": 7.02, + "learning_rate": 9.873433823061758e-05, + "loss": 0.3992, + "step": 165890 + }, + { + "epoch": 7.02, + "learning_rate": 9.869193750132503e-05, + "loss": 0.3512, + "step": 165900 + }, + { + "epoch": 7.02, + "learning_rate": 9.864953677203249e-05, + "loss": 0.351, + "step": 165910 + }, + { + "epoch": 7.02, + "learning_rate": 9.860713604273994e-05, + "loss": 0.4287, + "step": 165920 + }, + { + "epoch": 7.02, + "learning_rate": 9.856473531344739e-05, + "loss": 0.3909, + "step": 165930 + }, + { + "epoch": 7.02, + "learning_rate": 9.852233458415485e-05, + "loss": 0.4037, + "step": 165940 + }, + { + "epoch": 7.02, + "learning_rate": 9.84799338548623e-05, + "loss": 0.3327, + "step": 165950 + }, + { + "epoch": 7.02, + "learning_rate": 9.843753312556977e-05, + "loss": 0.3735, + "step": 165960 + }, + { + "epoch": 7.02, + "learning_rate": 9.839513239627722e-05, + "loss": 0.3675, + "step": 165970 + }, + { + "epoch": 7.02, + "learning_rate": 9.835273166698467e-05, + "loss": 0.3917, + "step": 165980 + }, + { + "epoch": 7.02, + "learning_rate": 9.831033093769213e-05, + "loss": 0.3654, + "step": 165990 + }, + { + "epoch": 7.02, + "learning_rate": 9.826793020839959e-05, + "loss": 0.3569, + "step": 166000 + }, + { + "epoch": 7.02, + "learning_rate": 9.822552947910705e-05, + "loss": 0.3674, + "step": 166010 + }, + { + "epoch": 7.02, + "learning_rate": 9.81831287498145e-05, + "loss": 0.4262, + "step": 166020 + }, + { + "epoch": 7.02, + "learning_rate": 9.814072802052195e-05, + "loss": 0.3666, + "step": 166030 + }, + { + "epoch": 7.02, + "learning_rate": 9.809832729122942e-05, + "loss": 0.3668, + "step": 166040 + }, + { + "epoch": 7.02, + "learning_rate": 9.805592656193687e-05, + "loss": 0.3908, + "step": 166050 + }, + { + "epoch": 7.02, + "learning_rate": 9.801352583264433e-05, + "loss": 0.3611, + "step": 166060 + }, + { + "epoch": 7.02, + "learning_rate": 9.797112510335178e-05, + "loss": 0.3639, + "step": 166070 + }, + { + "epoch": 7.02, + "learning_rate": 9.792872437405923e-05, + "loss": 0.3405, + "step": 166080 + }, + { + "epoch": 7.02, + "learning_rate": 9.78863236447667e-05, + "loss": 0.4063, + "step": 166090 + }, + { + "epoch": 7.02, + "learning_rate": 9.784392291547415e-05, + "loss": 0.3968, + "step": 166100 + }, + { + "epoch": 7.02, + "learning_rate": 9.78015221861816e-05, + "loss": 0.3375, + "step": 166110 + }, + { + "epoch": 7.02, + "learning_rate": 9.775912145688906e-05, + "loss": 0.3439, + "step": 166120 + }, + { + "epoch": 7.03, + "learning_rate": 9.771672072759652e-05, + "loss": 0.3975, + "step": 166130 + }, + { + "epoch": 7.03, + "learning_rate": 9.767431999830398e-05, + "loss": 0.353, + "step": 166140 + }, + { + "epoch": 7.03, + "learning_rate": 9.763191926901143e-05, + "loss": 0.3907, + "step": 166150 + }, + { + "epoch": 7.03, + "learning_rate": 9.758951853971888e-05, + "loss": 0.3345, + "step": 166160 + }, + { + "epoch": 7.03, + "learning_rate": 9.754711781042635e-05, + "loss": 0.3562, + "step": 166170 + }, + { + "epoch": 7.03, + "learning_rate": 9.75047170811338e-05, + "loss": 0.3899, + "step": 166180 + }, + { + "epoch": 7.03, + "learning_rate": 9.746231635184126e-05, + "loss": 0.4327, + "step": 166190 + }, + { + "epoch": 7.03, + "learning_rate": 9.741991562254871e-05, + "loss": 0.3636, + "step": 166200 + }, + { + "epoch": 7.03, + "learning_rate": 9.737751489325618e-05, + "loss": 0.3481, + "step": 166210 + }, + { + "epoch": 7.03, + "learning_rate": 9.733511416396363e-05, + "loss": 0.3534, + "step": 166220 + }, + { + "epoch": 7.03, + "learning_rate": 9.729271343467108e-05, + "loss": 0.3908, + "step": 166230 + }, + { + "epoch": 7.03, + "learning_rate": 9.725031270537854e-05, + "loss": 0.3566, + "step": 166240 + }, + { + "epoch": 7.03, + "learning_rate": 9.720791197608599e-05, + "loss": 0.3557, + "step": 166250 + }, + { + "epoch": 7.03, + "learning_rate": 9.716551124679344e-05, + "loss": 0.3554, + "step": 166260 + }, + { + "epoch": 7.03, + "learning_rate": 9.71231105175009e-05, + "loss": 0.3487, + "step": 166270 + }, + { + "epoch": 7.03, + "learning_rate": 9.708070978820835e-05, + "loss": 0.3908, + "step": 166280 + }, + { + "epoch": 7.03, + "learning_rate": 9.703830905891582e-05, + "loss": 0.3243, + "step": 166290 + }, + { + "epoch": 7.03, + "learning_rate": 9.699590832962327e-05, + "loss": 0.4481, + "step": 166300 + }, + { + "epoch": 7.03, + "learning_rate": 9.695350760033072e-05, + "loss": 0.3963, + "step": 166310 + }, + { + "epoch": 7.03, + "learning_rate": 9.691110687103818e-05, + "loss": 0.3565, + "step": 166320 + }, + { + "epoch": 7.03, + "learning_rate": 9.686870614174564e-05, + "loss": 0.4374, + "step": 166330 + }, + { + "epoch": 7.03, + "learning_rate": 9.68263054124531e-05, + "loss": 0.3882, + "step": 166340 + }, + { + "epoch": 7.03, + "learning_rate": 9.678390468316055e-05, + "loss": 0.4405, + "step": 166350 + }, + { + "epoch": 7.04, + "learning_rate": 9.6741503953868e-05, + "loss": 0.3459, + "step": 166360 + }, + { + "epoch": 7.04, + "learning_rate": 9.669910322457547e-05, + "loss": 0.4236, + "step": 166370 + }, + { + "epoch": 7.04, + "learning_rate": 9.665670249528292e-05, + "loss": 0.3867, + "step": 166380 + }, + { + "epoch": 7.04, + "learning_rate": 9.661430176599038e-05, + "loss": 0.3402, + "step": 166390 + }, + { + "epoch": 7.04, + "learning_rate": 9.657190103669783e-05, + "loss": 0.3498, + "step": 166400 + }, + { + "epoch": 7.04, + "learning_rate": 9.65295003074053e-05, + "loss": 0.405, + "step": 166410 + }, + { + "epoch": 7.04, + "learning_rate": 9.648709957811275e-05, + "loss": 0.3577, + "step": 166420 + }, + { + "epoch": 7.04, + "learning_rate": 9.64446988488202e-05, + "loss": 0.3593, + "step": 166430 + }, + { + "epoch": 7.04, + "learning_rate": 9.640229811952766e-05, + "loss": 0.3405, + "step": 166440 + }, + { + "epoch": 7.04, + "learning_rate": 9.635989739023512e-05, + "loss": 0.3189, + "step": 166450 + }, + { + "epoch": 7.04, + "learning_rate": 9.631749666094258e-05, + "loss": 0.3951, + "step": 166460 + }, + { + "epoch": 7.04, + "learning_rate": 9.627509593165003e-05, + "loss": 0.4063, + "step": 166470 + }, + { + "epoch": 7.04, + "learning_rate": 9.623269520235748e-05, + "loss": 0.3434, + "step": 166480 + }, + { + "epoch": 7.04, + "learning_rate": 9.619029447306495e-05, + "loss": 0.2953, + "step": 166490 + }, + { + "epoch": 7.04, + "learning_rate": 9.61478937437724e-05, + "loss": 0.4415, + "step": 166500 + }, + { + "epoch": 7.04, + "learning_rate": 9.610549301447986e-05, + "loss": 0.3631, + "step": 166510 + }, + { + "epoch": 7.04, + "learning_rate": 9.606309228518731e-05, + "loss": 0.3715, + "step": 166520 + }, + { + "epoch": 7.04, + "learning_rate": 9.602069155589478e-05, + "loss": 0.3486, + "step": 166530 + }, + { + "epoch": 7.04, + "learning_rate": 9.597829082660223e-05, + "loss": 0.3978, + "step": 166540 + }, + { + "epoch": 7.04, + "learning_rate": 9.593589009730968e-05, + "loss": 0.3321, + "step": 166550 + }, + { + "epoch": 7.04, + "learning_rate": 9.589348936801714e-05, + "loss": 0.3787, + "step": 166560 + }, + { + "epoch": 7.04, + "learning_rate": 9.585108863872459e-05, + "loss": 0.3611, + "step": 166570 + }, + { + "epoch": 7.04, + "learning_rate": 9.580868790943204e-05, + "loss": 0.3458, + "step": 166580 + }, + { + "epoch": 7.04, + "learning_rate": 9.57662871801395e-05, + "loss": 0.4109, + "step": 166590 + }, + { + "epoch": 7.05, + "learning_rate": 9.572388645084695e-05, + "loss": 0.3457, + "step": 166600 + }, + { + "epoch": 7.05, + "learning_rate": 9.568148572155442e-05, + "loss": 0.3636, + "step": 166610 + }, + { + "epoch": 7.05, + "learning_rate": 9.563908499226187e-05, + "loss": 0.3644, + "step": 166620 + }, + { + "epoch": 7.05, + "learning_rate": 9.559668426296932e-05, + "loss": 0.3363, + "step": 166630 + }, + { + "epoch": 7.05, + "learning_rate": 9.555428353367678e-05, + "loss": 0.3285, + "step": 166640 + }, + { + "epoch": 7.05, + "learning_rate": 9.551188280438424e-05, + "loss": 0.4555, + "step": 166650 + }, + { + "epoch": 7.05, + "learning_rate": 9.54694820750917e-05, + "loss": 0.3886, + "step": 166660 + }, + { + "epoch": 7.05, + "learning_rate": 9.542708134579915e-05, + "loss": 0.3483, + "step": 166670 + }, + { + "epoch": 7.05, + "learning_rate": 9.53846806165066e-05, + "loss": 0.3525, + "step": 166680 + }, + { + "epoch": 7.05, + "learning_rate": 9.534227988721407e-05, + "loss": 0.3553, + "step": 166690 + }, + { + "epoch": 7.05, + "learning_rate": 9.529987915792152e-05, + "loss": 0.3999, + "step": 166700 + }, + { + "epoch": 7.05, + "learning_rate": 9.525747842862898e-05, + "loss": 0.3618, + "step": 166710 + }, + { + "epoch": 7.05, + "learning_rate": 9.521507769933643e-05, + "loss": 0.325, + "step": 166720 + }, + { + "epoch": 7.05, + "learning_rate": 9.51726769700439e-05, + "loss": 0.3882, + "step": 166730 + }, + { + "epoch": 7.05, + "learning_rate": 9.513027624075135e-05, + "loss": 0.3906, + "step": 166740 + }, + { + "epoch": 7.05, + "learning_rate": 9.50878755114588e-05, + "loss": 0.4195, + "step": 166750 + }, + { + "epoch": 7.05, + "learning_rate": 9.504547478216625e-05, + "loss": 0.4358, + "step": 166760 + }, + { + "epoch": 7.05, + "learning_rate": 9.500307405287371e-05, + "loss": 0.3617, + "step": 166770 + }, + { + "epoch": 7.05, + "learning_rate": 9.496067332358117e-05, + "loss": 0.465, + "step": 166780 + }, + { + "epoch": 7.05, + "learning_rate": 9.491827259428863e-05, + "loss": 0.338, + "step": 166790 + }, + { + "epoch": 7.05, + "learning_rate": 9.487587186499608e-05, + "loss": 0.3767, + "step": 166800 + }, + { + "epoch": 7.05, + "learning_rate": 9.483347113570353e-05, + "loss": 0.3984, + "step": 166810 + }, + { + "epoch": 7.05, + "learning_rate": 9.4791070406411e-05, + "loss": 0.3353, + "step": 166820 + }, + { + "epoch": 7.06, + "learning_rate": 9.474866967711845e-05, + "loss": 0.412, + "step": 166830 + }, + { + "epoch": 7.06, + "learning_rate": 9.470626894782591e-05, + "loss": 0.3567, + "step": 166840 + }, + { + "epoch": 7.06, + "learning_rate": 9.466386821853336e-05, + "loss": 0.3249, + "step": 166850 + }, + { + "epoch": 7.06, + "learning_rate": 9.462146748924083e-05, + "loss": 0.3803, + "step": 166860 + }, + { + "epoch": 7.06, + "learning_rate": 9.457906675994828e-05, + "loss": 0.4376, + "step": 166870 + }, + { + "epoch": 7.06, + "learning_rate": 9.453666603065573e-05, + "loss": 0.3578, + "step": 166880 + }, + { + "epoch": 7.06, + "learning_rate": 9.449426530136319e-05, + "loss": 0.4502, + "step": 166890 + }, + { + "epoch": 7.06, + "learning_rate": 9.445186457207064e-05, + "loss": 0.3847, + "step": 166900 + }, + { + "epoch": 7.06, + "learning_rate": 9.44094638427781e-05, + "loss": 0.374, + "step": 166910 + }, + { + "epoch": 7.06, + "learning_rate": 9.436706311348556e-05, + "loss": 0.3878, + "step": 166920 + }, + { + "epoch": 7.06, + "learning_rate": 9.432466238419301e-05, + "loss": 0.3617, + "step": 166930 + }, + { + "epoch": 7.06, + "learning_rate": 9.428226165490047e-05, + "loss": 0.3852, + "step": 166940 + }, + { + "epoch": 7.06, + "learning_rate": 9.423986092560792e-05, + "loss": 0.3922, + "step": 166950 + }, + { + "epoch": 7.06, + "learning_rate": 9.419746019631537e-05, + "loss": 0.4076, + "step": 166960 + }, + { + "epoch": 7.06, + "learning_rate": 9.415505946702283e-05, + "loss": 0.4278, + "step": 166970 + }, + { + "epoch": 7.06, + "learning_rate": 9.41126587377303e-05, + "loss": 0.401, + "step": 166980 + }, + { + "epoch": 7.06, + "learning_rate": 9.407025800843775e-05, + "loss": 0.3484, + "step": 166990 + }, + { + "epoch": 7.06, + "learning_rate": 9.40278572791452e-05, + "loss": 0.4709, + "step": 167000 + }, + { + "epoch": 7.06, + "learning_rate": 9.398545654985265e-05, + "loss": 0.4524, + "step": 167010 + }, + { + "epoch": 7.06, + "learning_rate": 9.394305582056012e-05, + "loss": 0.3449, + "step": 167020 + }, + { + "epoch": 7.06, + "learning_rate": 9.390065509126757e-05, + "loss": 0.3999, + "step": 167030 + }, + { + "epoch": 7.06, + "learning_rate": 9.385825436197503e-05, + "loss": 0.3916, + "step": 167040 + }, + { + "epoch": 7.06, + "learning_rate": 9.381585363268248e-05, + "loss": 0.4179, + "step": 167050 + }, + { + "epoch": 7.06, + "learning_rate": 9.377345290338995e-05, + "loss": 0.3997, + "step": 167060 + }, + { + "epoch": 7.07, + "learning_rate": 9.37310521740974e-05, + "loss": 0.3252, + "step": 167070 + }, + { + "epoch": 7.07, + "learning_rate": 9.368865144480485e-05, + "loss": 0.5115, + "step": 167080 + }, + { + "epoch": 7.07, + "learning_rate": 9.36462507155123e-05, + "loss": 0.3878, + "step": 167090 + }, + { + "epoch": 7.07, + "learning_rate": 9.360384998621977e-05, + "loss": 0.4041, + "step": 167100 + }, + { + "epoch": 7.07, + "learning_rate": 9.356144925692723e-05, + "loss": 0.3912, + "step": 167110 + }, + { + "epoch": 7.07, + "learning_rate": 9.351904852763468e-05, + "loss": 0.3961, + "step": 167120 + }, + { + "epoch": 7.07, + "learning_rate": 9.347664779834213e-05, + "loss": 0.3886, + "step": 167130 + }, + { + "epoch": 7.07, + "learning_rate": 9.34342470690496e-05, + "loss": 0.3309, + "step": 167140 + }, + { + "epoch": 7.07, + "learning_rate": 9.339184633975705e-05, + "loss": 0.4054, + "step": 167150 + }, + { + "epoch": 7.07, + "learning_rate": 9.33494456104645e-05, + "loss": 0.3756, + "step": 167160 + }, + { + "epoch": 7.07, + "learning_rate": 9.330704488117196e-05, + "loss": 0.3525, + "step": 167170 + }, + { + "epoch": 7.07, + "learning_rate": 9.326464415187943e-05, + "loss": 0.4158, + "step": 167180 + }, + { + "epoch": 7.07, + "learning_rate": 9.322224342258688e-05, + "loss": 0.4022, + "step": 167190 + }, + { + "epoch": 7.07, + "learning_rate": 9.317984269329433e-05, + "loss": 0.3686, + "step": 167200 + }, + { + "epoch": 7.07, + "learning_rate": 9.313744196400179e-05, + "loss": 0.3599, + "step": 167210 + }, + { + "epoch": 7.07, + "learning_rate": 9.309504123470924e-05, + "loss": 0.3984, + "step": 167220 + }, + { + "epoch": 7.07, + "learning_rate": 9.30526405054167e-05, + "loss": 0.4048, + "step": 167230 + }, + { + "epoch": 7.07, + "learning_rate": 9.301023977612416e-05, + "loss": 0.4043, + "step": 167240 + }, + { + "epoch": 7.07, + "learning_rate": 9.296783904683161e-05, + "loss": 0.3575, + "step": 167250 + }, + { + "epoch": 7.07, + "learning_rate": 9.292543831753907e-05, + "loss": 0.3955, + "step": 167260 + }, + { + "epoch": 7.07, + "learning_rate": 9.288303758824652e-05, + "loss": 0.3579, + "step": 167270 + }, + { + "epoch": 7.07, + "learning_rate": 9.284063685895397e-05, + "loss": 0.3982, + "step": 167280 + }, + { + "epoch": 7.07, + "learning_rate": 9.279823612966143e-05, + "loss": 0.405, + "step": 167290 + }, + { + "epoch": 7.07, + "learning_rate": 9.275583540036889e-05, + "loss": 0.3027, + "step": 167300 + }, + { + "epoch": 7.08, + "learning_rate": 9.271343467107635e-05, + "loss": 0.3691, + "step": 167310 + }, + { + "epoch": 7.08, + "learning_rate": 9.26710339417838e-05, + "loss": 0.3347, + "step": 167320 + }, + { + "epoch": 7.08, + "learning_rate": 9.262863321249125e-05, + "loss": 0.3663, + "step": 167330 + }, + { + "epoch": 7.08, + "learning_rate": 9.258623248319872e-05, + "loss": 0.3532, + "step": 167340 + }, + { + "epoch": 7.08, + "learning_rate": 9.254383175390617e-05, + "loss": 0.3686, + "step": 167350 + }, + { + "epoch": 7.08, + "learning_rate": 9.250143102461363e-05, + "loss": 0.3612, + "step": 167360 + }, + { + "epoch": 7.08, + "learning_rate": 9.245903029532108e-05, + "loss": 0.4094, + "step": 167370 + }, + { + "epoch": 7.08, + "learning_rate": 9.241662956602855e-05, + "loss": 0.4256, + "step": 167380 + }, + { + "epoch": 7.08, + "learning_rate": 9.2374228836736e-05, + "loss": 0.3562, + "step": 167390 + }, + { + "epoch": 7.08, + "learning_rate": 9.233182810744345e-05, + "loss": 0.3761, + "step": 167400 + }, + { + "epoch": 7.08, + "learning_rate": 9.22894273781509e-05, + "loss": 0.3234, + "step": 167410 + }, + { + "epoch": 7.08, + "learning_rate": 9.224702664885837e-05, + "loss": 0.3154, + "step": 167420 + }, + { + "epoch": 7.08, + "learning_rate": 9.220462591956583e-05, + "loss": 0.3702, + "step": 167430 + }, + { + "epoch": 7.08, + "learning_rate": 9.216222519027328e-05, + "loss": 0.3451, + "step": 167440 + }, + { + "epoch": 7.08, + "learning_rate": 9.211982446098073e-05, + "loss": 0.3864, + "step": 167450 + }, + { + "epoch": 7.08, + "learning_rate": 9.20774237316882e-05, + "loss": 0.3759, + "step": 167460 + }, + { + "epoch": 7.08, + "learning_rate": 9.203502300239565e-05, + "loss": 0.4643, + "step": 167470 + }, + { + "epoch": 7.08, + "learning_rate": 9.19926222731031e-05, + "loss": 0.3862, + "step": 167480 + }, + { + "epoch": 7.08, + "learning_rate": 9.195022154381056e-05, + "loss": 0.3753, + "step": 167490 + }, + { + "epoch": 7.08, + "learning_rate": 9.190782081451801e-05, + "loss": 0.3451, + "step": 167500 + }, + { + "epoch": 7.08, + "learning_rate": 9.186542008522548e-05, + "loss": 0.3264, + "step": 167510 + }, + { + "epoch": 7.08, + "learning_rate": 9.182301935593293e-05, + "loss": 0.3922, + "step": 167520 + }, + { + "epoch": 7.08, + "learning_rate": 9.178061862664038e-05, + "loss": 0.3313, + "step": 167530 + }, + { + "epoch": 7.09, + "learning_rate": 9.173821789734784e-05, + "loss": 0.3375, + "step": 167540 + }, + { + "epoch": 7.09, + "learning_rate": 9.16958171680553e-05, + "loss": 0.3776, + "step": 167550 + }, + { + "epoch": 7.09, + "learning_rate": 9.165341643876276e-05, + "loss": 0.4089, + "step": 167560 + }, + { + "epoch": 7.09, + "learning_rate": 9.161101570947021e-05, + "loss": 0.37, + "step": 167570 + }, + { + "epoch": 7.09, + "learning_rate": 9.156861498017766e-05, + "loss": 0.3863, + "step": 167580 + }, + { + "epoch": 7.09, + "learning_rate": 9.152621425088512e-05, + "loss": 0.3978, + "step": 167590 + }, + { + "epoch": 7.09, + "learning_rate": 9.148381352159257e-05, + "loss": 0.3614, + "step": 167600 + }, + { + "epoch": 7.09, + "learning_rate": 9.144141279230002e-05, + "loss": 0.4322, + "step": 167610 + }, + { + "epoch": 7.09, + "learning_rate": 9.139901206300748e-05, + "loss": 0.4248, + "step": 167620 + }, + { + "epoch": 7.09, + "learning_rate": 9.135661133371494e-05, + "loss": 0.3555, + "step": 167630 + }, + { + "epoch": 7.09, + "learning_rate": 9.13142106044224e-05, + "loss": 0.3591, + "step": 167640 + }, + { + "epoch": 7.09, + "learning_rate": 9.127180987512985e-05, + "loss": 0.3873, + "step": 167650 + }, + { + "epoch": 7.09, + "learning_rate": 9.12294091458373e-05, + "loss": 0.4038, + "step": 167660 + }, + { + "epoch": 7.09, + "learning_rate": 9.118700841654477e-05, + "loss": 0.3806, + "step": 167670 + }, + { + "epoch": 7.09, + "learning_rate": 9.114460768725222e-05, + "loss": 0.3879, + "step": 167680 + }, + { + "epoch": 7.09, + "learning_rate": 9.110220695795968e-05, + "loss": 0.2949, + "step": 167690 + }, + { + "epoch": 7.09, + "learning_rate": 9.105980622866713e-05, + "loss": 0.3481, + "step": 167700 + }, + { + "epoch": 7.09, + "learning_rate": 9.10174054993746e-05, + "loss": 0.3394, + "step": 167710 + }, + { + "epoch": 7.09, + "learning_rate": 9.097500477008205e-05, + "loss": 0.3667, + "step": 167720 + }, + { + "epoch": 7.09, + "learning_rate": 9.09326040407895e-05, + "loss": 0.3549, + "step": 167730 + }, + { + "epoch": 7.09, + "learning_rate": 9.089020331149696e-05, + "loss": 0.4024, + "step": 167740 + }, + { + "epoch": 7.09, + "learning_rate": 9.084780258220442e-05, + "loss": 0.3958, + "step": 167750 + }, + { + "epoch": 7.09, + "learning_rate": 9.080540185291188e-05, + "loss": 0.3945, + "step": 167760 + }, + { + "epoch": 7.09, + "learning_rate": 9.076300112361933e-05, + "loss": 0.3769, + "step": 167770 + }, + { + "epoch": 7.1, + "learning_rate": 9.072060039432678e-05, + "loss": 0.3694, + "step": 167780 + }, + { + "epoch": 7.1, + "learning_rate": 9.067819966503425e-05, + "loss": 0.3555, + "step": 167790 + }, + { + "epoch": 7.1, + "learning_rate": 9.06357989357417e-05, + "loss": 0.4114, + "step": 167800 + }, + { + "epoch": 7.1, + "learning_rate": 9.059339820644916e-05, + "loss": 0.3526, + "step": 167810 + }, + { + "epoch": 7.1, + "learning_rate": 9.055099747715661e-05, + "loss": 0.3704, + "step": 167820 + }, + { + "epoch": 7.1, + "learning_rate": 9.050859674786408e-05, + "loss": 0.3461, + "step": 167830 + }, + { + "epoch": 7.1, + "learning_rate": 9.046619601857153e-05, + "loss": 0.3496, + "step": 167840 + }, + { + "epoch": 7.1, + "learning_rate": 9.042379528927898e-05, + "loss": 0.3703, + "step": 167850 + }, + { + "epoch": 7.1, + "learning_rate": 9.038139455998644e-05, + "loss": 0.3709, + "step": 167860 + }, + { + "epoch": 7.1, + "learning_rate": 9.03389938306939e-05, + "loss": 0.3827, + "step": 167870 + }, + { + "epoch": 7.1, + "learning_rate": 9.029659310140136e-05, + "loss": 0.3801, + "step": 167880 + }, + { + "epoch": 7.1, + "learning_rate": 9.025419237210881e-05, + "loss": 0.4944, + "step": 167890 + }, + { + "epoch": 7.1, + "learning_rate": 9.021179164281626e-05, + "loss": 0.3096, + "step": 167900 + }, + { + "epoch": 7.1, + "learning_rate": 9.016939091352372e-05, + "loss": 0.2842, + "step": 167910 + }, + { + "epoch": 7.1, + "learning_rate": 9.012699018423117e-05, + "loss": 0.3721, + "step": 167920 + }, + { + "epoch": 7.1, + "learning_rate": 9.008458945493862e-05, + "loss": 0.3918, + "step": 167930 + }, + { + "epoch": 7.1, + "learning_rate": 9.004218872564608e-05, + "loss": 0.3498, + "step": 167940 + }, + { + "epoch": 7.1, + "learning_rate": 8.999978799635354e-05, + "loss": 0.3286, + "step": 167950 + }, + { + "epoch": 7.1, + "learning_rate": 8.9957387267061e-05, + "loss": 0.4289, + "step": 167960 + }, + { + "epoch": 7.1, + "learning_rate": 8.991498653776845e-05, + "loss": 0.3721, + "step": 167970 + }, + { + "epoch": 7.1, + "learning_rate": 8.98725858084759e-05, + "loss": 0.3721, + "step": 167980 + }, + { + "epoch": 7.1, + "learning_rate": 8.983018507918337e-05, + "loss": 0.4148, + "step": 167990 + }, + { + "epoch": 7.1, + "learning_rate": 8.978778434989082e-05, + "loss": 0.396, + "step": 168000 + }, + { + "epoch": 7.1, + "learning_rate": 8.974538362059828e-05, + "loss": 0.3891, + "step": 168010 + }, + { + "epoch": 7.11, + "learning_rate": 8.970298289130573e-05, + "loss": 0.395, + "step": 168020 + }, + { + "epoch": 7.11, + "learning_rate": 8.96605821620132e-05, + "loss": 0.3075, + "step": 168030 + }, + { + "epoch": 7.11, + "learning_rate": 8.961818143272065e-05, + "loss": 0.3434, + "step": 168040 + }, + { + "epoch": 7.11, + "learning_rate": 8.95757807034281e-05, + "loss": 0.3561, + "step": 168050 + }, + { + "epoch": 7.11, + "learning_rate": 8.953337997413556e-05, + "loss": 0.4042, + "step": 168060 + }, + { + "epoch": 7.11, + "learning_rate": 8.949097924484302e-05, + "loss": 0.3442, + "step": 168070 + }, + { + "epoch": 7.11, + "learning_rate": 8.944857851555048e-05, + "loss": 0.327, + "step": 168080 + }, + { + "epoch": 7.11, + "learning_rate": 8.940617778625793e-05, + "loss": 0.391, + "step": 168090 + }, + { + "epoch": 7.11, + "learning_rate": 8.936377705696538e-05, + "loss": 0.409, + "step": 168100 + }, + { + "epoch": 7.11, + "learning_rate": 8.932137632767285e-05, + "loss": 0.3363, + "step": 168110 + }, + { + "epoch": 7.11, + "learning_rate": 8.92789755983803e-05, + "loss": 0.4125, + "step": 168120 + }, + { + "epoch": 7.11, + "learning_rate": 8.923657486908775e-05, + "loss": 0.4537, + "step": 168130 + }, + { + "epoch": 7.11, + "learning_rate": 8.919417413979521e-05, + "loss": 0.3947, + "step": 168140 + }, + { + "epoch": 7.11, + "learning_rate": 8.915177341050267e-05, + "loss": 0.3773, + "step": 168150 + }, + { + "epoch": 7.11, + "learning_rate": 8.910937268121013e-05, + "loss": 0.4202, + "step": 168160 + }, + { + "epoch": 7.11, + "learning_rate": 8.906697195191758e-05, + "loss": 0.403, + "step": 168170 + }, + { + "epoch": 7.11, + "learning_rate": 8.902457122262503e-05, + "loss": 0.3263, + "step": 168180 + }, + { + "epoch": 7.11, + "learning_rate": 8.89821704933325e-05, + "loss": 0.3624, + "step": 168190 + }, + { + "epoch": 7.11, + "learning_rate": 8.893976976403995e-05, + "loss": 0.4079, + "step": 168200 + }, + { + "epoch": 7.11, + "learning_rate": 8.889736903474741e-05, + "loss": 0.4057, + "step": 168210 + }, + { + "epoch": 7.11, + "learning_rate": 8.885496830545486e-05, + "loss": 0.4123, + "step": 168220 + }, + { + "epoch": 7.11, + "learning_rate": 8.881256757616231e-05, + "loss": 0.391, + "step": 168230 + }, + { + "epoch": 7.11, + "learning_rate": 8.877016684686977e-05, + "loss": 0.3936, + "step": 168240 + }, + { + "epoch": 7.12, + "learning_rate": 8.872776611757722e-05, + "loss": 0.3781, + "step": 168250 + }, + { + "epoch": 7.12, + "learning_rate": 8.868536538828467e-05, + "loss": 0.4725, + "step": 168260 + }, + { + "epoch": 7.12, + "learning_rate": 8.864296465899213e-05, + "loss": 0.3861, + "step": 168270 + }, + { + "epoch": 7.12, + "learning_rate": 8.86005639296996e-05, + "loss": 0.4029, + "step": 168280 + }, + { + "epoch": 7.12, + "learning_rate": 8.855816320040705e-05, + "loss": 0.397, + "step": 168290 + }, + { + "epoch": 7.12, + "learning_rate": 8.85157624711145e-05, + "loss": 0.3692, + "step": 168300 + }, + { + "epoch": 7.12, + "learning_rate": 8.847336174182195e-05, + "loss": 0.3412, + "step": 168310 + }, + { + "epoch": 7.12, + "learning_rate": 8.843096101252942e-05, + "loss": 0.4182, + "step": 168320 + }, + { + "epoch": 7.12, + "learning_rate": 8.838856028323687e-05, + "loss": 0.4095, + "step": 168330 + }, + { + "epoch": 7.12, + "learning_rate": 8.834615955394433e-05, + "loss": 0.3638, + "step": 168340 + }, + { + "epoch": 7.12, + "learning_rate": 8.830375882465178e-05, + "loss": 0.3586, + "step": 168350 + }, + { + "epoch": 7.12, + "learning_rate": 8.826135809535925e-05, + "loss": 0.3838, + "step": 168360 + }, + { + "epoch": 7.12, + "learning_rate": 8.82189573660667e-05, + "loss": 0.4291, + "step": 168370 + }, + { + "epoch": 7.12, + "learning_rate": 8.817655663677415e-05, + "loss": 0.3544, + "step": 168380 + }, + { + "epoch": 7.12, + "learning_rate": 8.813415590748161e-05, + "loss": 0.4241, + "step": 168390 + }, + { + "epoch": 7.12, + "learning_rate": 8.809175517818907e-05, + "loss": 0.3191, + "step": 168400 + }, + { + "epoch": 7.12, + "learning_rate": 8.804935444889653e-05, + "loss": 0.3628, + "step": 168410 + }, + { + "epoch": 7.12, + "learning_rate": 8.800695371960398e-05, + "loss": 0.4156, + "step": 168420 + }, + { + "epoch": 7.12, + "learning_rate": 8.796455299031143e-05, + "loss": 0.3337, + "step": 168430 + }, + { + "epoch": 7.12, + "learning_rate": 8.79221522610189e-05, + "loss": 0.3319, + "step": 168440 + }, + { + "epoch": 7.12, + "learning_rate": 8.787975153172635e-05, + "loss": 0.4399, + "step": 168450 + }, + { + "epoch": 7.12, + "learning_rate": 8.78373508024338e-05, + "loss": 0.3778, + "step": 168460 + }, + { + "epoch": 7.12, + "learning_rate": 8.779495007314126e-05, + "loss": 0.3469, + "step": 168470 + }, + { + "epoch": 7.12, + "learning_rate": 8.775254934384873e-05, + "loss": 0.3435, + "step": 168480 + }, + { + "epoch": 7.13, + "learning_rate": 8.771014861455618e-05, + "loss": 0.3623, + "step": 168490 + }, + { + "epoch": 7.13, + "learning_rate": 8.766774788526363e-05, + "loss": 0.3656, + "step": 168500 + }, + { + "epoch": 7.13, + "learning_rate": 8.762534715597109e-05, + "loss": 0.4326, + "step": 168510 + }, + { + "epoch": 7.13, + "learning_rate": 8.758294642667855e-05, + "loss": 0.361, + "step": 168520 + }, + { + "epoch": 7.13, + "learning_rate": 8.7540545697386e-05, + "loss": 0.3599, + "step": 168530 + }, + { + "epoch": 7.13, + "learning_rate": 8.749814496809346e-05, + "loss": 0.3826, + "step": 168540 + }, + { + "epoch": 7.13, + "learning_rate": 8.745574423880091e-05, + "loss": 0.4171, + "step": 168550 + }, + { + "epoch": 7.13, + "learning_rate": 8.741334350950837e-05, + "loss": 0.4546, + "step": 168560 + }, + { + "epoch": 7.13, + "learning_rate": 8.737094278021582e-05, + "loss": 0.3432, + "step": 168570 + }, + { + "epoch": 7.13, + "learning_rate": 8.732854205092327e-05, + "loss": 0.3323, + "step": 168580 + }, + { + "epoch": 7.13, + "learning_rate": 8.728614132163073e-05, + "loss": 0.3177, + "step": 168590 + }, + { + "epoch": 7.13, + "learning_rate": 8.724374059233819e-05, + "loss": 0.3889, + "step": 168600 + }, + { + "epoch": 7.13, + "learning_rate": 8.720133986304565e-05, + "loss": 0.3158, + "step": 168610 + }, + { + "epoch": 7.13, + "learning_rate": 8.71589391337531e-05, + "loss": 0.4235, + "step": 168620 + }, + { + "epoch": 7.13, + "learning_rate": 8.711653840446055e-05, + "loss": 0.3629, + "step": 168630 + }, + { + "epoch": 7.13, + "learning_rate": 8.707413767516802e-05, + "loss": 0.4256, + "step": 168640 + }, + { + "epoch": 7.13, + "learning_rate": 8.703173694587547e-05, + "loss": 0.3781, + "step": 168650 + }, + { + "epoch": 7.13, + "learning_rate": 8.698933621658293e-05, + "loss": 0.3412, + "step": 168660 + }, + { + "epoch": 7.13, + "learning_rate": 8.694693548729038e-05, + "loss": 0.3698, + "step": 168670 + }, + { + "epoch": 7.13, + "learning_rate": 8.690453475799785e-05, + "loss": 0.4038, + "step": 168680 + }, + { + "epoch": 7.13, + "learning_rate": 8.68621340287053e-05, + "loss": 0.4435, + "step": 168690 + }, + { + "epoch": 7.13, + "learning_rate": 8.681973329941275e-05, + "loss": 0.3654, + "step": 168700 + }, + { + "epoch": 7.13, + "learning_rate": 8.67773325701202e-05, + "loss": 0.4056, + "step": 168710 + }, + { + "epoch": 7.13, + "learning_rate": 8.673493184082767e-05, + "loss": 0.404, + "step": 168720 + }, + { + "epoch": 7.14, + "learning_rate": 8.669253111153513e-05, + "loss": 0.3634, + "step": 168730 + }, + { + "epoch": 7.14, + "learning_rate": 8.665013038224258e-05, + "loss": 0.3846, + "step": 168740 + }, + { + "epoch": 7.14, + "learning_rate": 8.660772965295003e-05, + "loss": 0.3637, + "step": 168750 + }, + { + "epoch": 7.14, + "learning_rate": 8.65653289236575e-05, + "loss": 0.3653, + "step": 168760 + }, + { + "epoch": 7.14, + "learning_rate": 8.652292819436495e-05, + "loss": 0.3549, + "step": 168770 + }, + { + "epoch": 7.14, + "learning_rate": 8.64805274650724e-05, + "loss": 0.3724, + "step": 168780 + }, + { + "epoch": 7.14, + "learning_rate": 8.643812673577986e-05, + "loss": 0.4418, + "step": 168790 + }, + { + "epoch": 7.14, + "learning_rate": 8.639572600648733e-05, + "loss": 0.3779, + "step": 168800 + }, + { + "epoch": 7.14, + "learning_rate": 8.635332527719478e-05, + "loss": 0.3678, + "step": 168810 + }, + { + "epoch": 7.14, + "learning_rate": 8.631092454790223e-05, + "loss": 0.4291, + "step": 168820 + }, + { + "epoch": 7.14, + "learning_rate": 8.626852381860968e-05, + "loss": 0.3617, + "step": 168830 + }, + { + "epoch": 7.14, + "learning_rate": 8.622612308931715e-05, + "loss": 0.4082, + "step": 168840 + }, + { + "epoch": 7.14, + "learning_rate": 8.61837223600246e-05, + "loss": 0.3544, + "step": 168850 + }, + { + "epoch": 7.14, + "learning_rate": 8.614132163073206e-05, + "loss": 0.3772, + "step": 168860 + }, + { + "epoch": 7.14, + "learning_rate": 8.609892090143951e-05, + "loss": 0.3742, + "step": 168870 + }, + { + "epoch": 7.14, + "learning_rate": 8.605652017214696e-05, + "loss": 0.3768, + "step": 168880 + }, + { + "epoch": 7.14, + "learning_rate": 8.601411944285442e-05, + "loss": 0.3792, + "step": 168890 + }, + { + "epoch": 7.14, + "learning_rate": 8.597171871356187e-05, + "loss": 0.3988, + "step": 168900 + }, + { + "epoch": 7.14, + "learning_rate": 8.592931798426932e-05, + "loss": 0.3788, + "step": 168910 + }, + { + "epoch": 7.14, + "learning_rate": 8.588691725497678e-05, + "loss": 0.3726, + "step": 168920 + }, + { + "epoch": 7.14, + "learning_rate": 8.584451652568424e-05, + "loss": 0.3605, + "step": 168930 + }, + { + "epoch": 7.14, + "learning_rate": 8.58021157963917e-05, + "loss": 0.3591, + "step": 168940 + }, + { + "epoch": 7.14, + "learning_rate": 8.575971506709915e-05, + "loss": 0.3571, + "step": 168950 + }, + { + "epoch": 7.15, + "learning_rate": 8.57173143378066e-05, + "loss": 0.3068, + "step": 168960 + }, + { + "epoch": 7.15, + "learning_rate": 8.567491360851407e-05, + "loss": 0.3259, + "step": 168970 + }, + { + "epoch": 7.15, + "learning_rate": 8.563251287922152e-05, + "loss": 0.4198, + "step": 168980 + }, + { + "epoch": 7.15, + "learning_rate": 8.559011214992898e-05, + "loss": 0.3973, + "step": 168990 + }, + { + "epoch": 7.15, + "learning_rate": 8.554771142063643e-05, + "loss": 0.3951, + "step": 169000 + }, + { + "epoch": 7.15, + "learning_rate": 8.55053106913439e-05, + "loss": 0.4613, + "step": 169010 + }, + { + "epoch": 7.15, + "learning_rate": 8.546290996205135e-05, + "loss": 0.3566, + "step": 169020 + }, + { + "epoch": 7.15, + "learning_rate": 8.54205092327588e-05, + "loss": 0.3906, + "step": 169030 + }, + { + "epoch": 7.15, + "learning_rate": 8.537810850346626e-05, + "loss": 0.3853, + "step": 169040 + }, + { + "epoch": 7.15, + "learning_rate": 8.533570777417372e-05, + "loss": 0.3528, + "step": 169050 + }, + { + "epoch": 7.15, + "learning_rate": 8.529330704488118e-05, + "loss": 0.3884, + "step": 169060 + }, + { + "epoch": 7.15, + "learning_rate": 8.525090631558863e-05, + "loss": 0.4508, + "step": 169070 + }, + { + "epoch": 7.15, + "learning_rate": 8.520850558629608e-05, + "loss": 0.3553, + "step": 169080 + }, + { + "epoch": 7.15, + "learning_rate": 8.516610485700355e-05, + "loss": 0.4123, + "step": 169090 + }, + { + "epoch": 7.15, + "learning_rate": 8.5123704127711e-05, + "loss": 0.3679, + "step": 169100 + }, + { + "epoch": 7.15, + "learning_rate": 8.508130339841846e-05, + "loss": 0.4319, + "step": 169110 + }, + { + "epoch": 7.15, + "learning_rate": 8.503890266912591e-05, + "loss": 0.3538, + "step": 169120 + }, + { + "epoch": 7.15, + "learning_rate": 8.499650193983338e-05, + "loss": 0.3503, + "step": 169130 + }, + { + "epoch": 7.15, + "learning_rate": 8.495410121054083e-05, + "loss": 0.4049, + "step": 169140 + }, + { + "epoch": 7.15, + "learning_rate": 8.491170048124828e-05, + "loss": 0.3766, + "step": 169150 + }, + { + "epoch": 7.15, + "learning_rate": 8.486929975195574e-05, + "loss": 0.3404, + "step": 169160 + }, + { + "epoch": 7.15, + "learning_rate": 8.48268990226632e-05, + "loss": 0.4344, + "step": 169170 + }, + { + "epoch": 7.15, + "learning_rate": 8.478449829337066e-05, + "loss": 0.3354, + "step": 169180 + }, + { + "epoch": 7.15, + "learning_rate": 8.474209756407811e-05, + "loss": 0.3328, + "step": 169190 + }, + { + "epoch": 7.16, + "learning_rate": 8.469969683478556e-05, + "loss": 0.3987, + "step": 169200 + }, + { + "epoch": 7.16, + "learning_rate": 8.465729610549302e-05, + "loss": 0.3279, + "step": 169210 + }, + { + "epoch": 7.16, + "learning_rate": 8.461489537620047e-05, + "loss": 0.4133, + "step": 169220 + }, + { + "epoch": 7.16, + "learning_rate": 8.457249464690792e-05, + "loss": 0.3863, + "step": 169230 + }, + { + "epoch": 7.16, + "learning_rate": 8.453009391761539e-05, + "loss": 0.4029, + "step": 169240 + }, + { + "epoch": 7.16, + "learning_rate": 8.448769318832284e-05, + "loss": 0.3473, + "step": 169250 + }, + { + "epoch": 7.16, + "learning_rate": 8.44452924590303e-05, + "loss": 0.3531, + "step": 169260 + }, + { + "epoch": 7.16, + "learning_rate": 8.440289172973775e-05, + "loss": 0.379, + "step": 169270 + }, + { + "epoch": 7.16, + "learning_rate": 8.43604910004452e-05, + "loss": 0.3671, + "step": 169280 + }, + { + "epoch": 7.16, + "learning_rate": 8.431809027115267e-05, + "loss": 0.3688, + "step": 169290 + }, + { + "epoch": 7.16, + "learning_rate": 8.427568954186012e-05, + "loss": 0.404, + "step": 169300 + }, + { + "epoch": 7.16, + "learning_rate": 8.423328881256758e-05, + "loss": 0.352, + "step": 169310 + }, + { + "epoch": 7.16, + "learning_rate": 8.419088808327503e-05, + "loss": 0.3647, + "step": 169320 + }, + { + "epoch": 7.16, + "learning_rate": 8.41484873539825e-05, + "loss": 0.3826, + "step": 169330 + }, + { + "epoch": 7.16, + "learning_rate": 8.410608662468995e-05, + "loss": 0.3898, + "step": 169340 + }, + { + "epoch": 7.16, + "learning_rate": 8.40636858953974e-05, + "loss": 0.3766, + "step": 169350 + }, + { + "epoch": 7.16, + "learning_rate": 8.402128516610486e-05, + "loss": 0.4087, + "step": 169360 + }, + { + "epoch": 7.16, + "learning_rate": 8.397888443681232e-05, + "loss": 0.3444, + "step": 169370 + }, + { + "epoch": 7.16, + "learning_rate": 8.393648370751978e-05, + "loss": 0.3529, + "step": 169380 + }, + { + "epoch": 7.16, + "learning_rate": 8.389408297822723e-05, + "loss": 0.3495, + "step": 169390 + }, + { + "epoch": 7.16, + "learning_rate": 8.385168224893468e-05, + "loss": 0.3483, + "step": 169400 + }, + { + "epoch": 7.16, + "learning_rate": 8.380928151964215e-05, + "loss": 0.3632, + "step": 169410 + }, + { + "epoch": 7.16, + "learning_rate": 8.37668807903496e-05, + "loss": 0.3955, + "step": 169420 + }, + { + "epoch": 7.16, + "learning_rate": 8.372448006105706e-05, + "loss": 0.4494, + "step": 169430 + }, + { + "epoch": 7.17, + "learning_rate": 8.368207933176451e-05, + "loss": 0.3963, + "step": 169440 + }, + { + "epoch": 7.17, + "learning_rate": 8.363967860247198e-05, + "loss": 0.3791, + "step": 169450 + }, + { + "epoch": 7.17, + "learning_rate": 8.359727787317943e-05, + "loss": 0.4079, + "step": 169460 + }, + { + "epoch": 7.17, + "learning_rate": 8.355487714388688e-05, + "loss": 0.4172, + "step": 169470 + }, + { + "epoch": 7.17, + "learning_rate": 8.351247641459433e-05, + "loss": 0.4115, + "step": 169480 + }, + { + "epoch": 7.17, + "learning_rate": 8.34700756853018e-05, + "loss": 0.3809, + "step": 169490 + }, + { + "epoch": 7.17, + "learning_rate": 8.342767495600925e-05, + "loss": 0.4046, + "step": 169500 + }, + { + "epoch": 7.17, + "learning_rate": 8.338527422671671e-05, + "loss": 0.3889, + "step": 169510 + }, + { + "epoch": 7.17, + "learning_rate": 8.334287349742416e-05, + "loss": 0.4262, + "step": 169520 + }, + { + "epoch": 7.17, + "learning_rate": 8.330047276813161e-05, + "loss": 0.3818, + "step": 169530 + }, + { + "epoch": 7.17, + "learning_rate": 8.325807203883907e-05, + "loss": 0.455, + "step": 169540 + }, + { + "epoch": 7.17, + "learning_rate": 8.321567130954653e-05, + "loss": 0.4179, + "step": 169550 + }, + { + "epoch": 7.17, + "learning_rate": 8.317327058025399e-05, + "loss": 0.4042, + "step": 169560 + }, + { + "epoch": 7.17, + "learning_rate": 8.313086985096144e-05, + "loss": 0.3782, + "step": 169570 + }, + { + "epoch": 7.17, + "learning_rate": 8.30884691216689e-05, + "loss": 0.3297, + "step": 169580 + }, + { + "epoch": 7.17, + "learning_rate": 8.304606839237635e-05, + "loss": 0.3988, + "step": 169590 + }, + { + "epoch": 7.17, + "learning_rate": 8.30036676630838e-05, + "loss": 0.3993, + "step": 169600 + }, + { + "epoch": 7.17, + "learning_rate": 8.296126693379127e-05, + "loss": 0.3092, + "step": 169610 + }, + { + "epoch": 7.17, + "learning_rate": 8.291886620449872e-05, + "loss": 0.3741, + "step": 169620 + }, + { + "epoch": 7.17, + "learning_rate": 8.287646547520617e-05, + "loss": 0.3291, + "step": 169630 + }, + { + "epoch": 7.17, + "learning_rate": 8.283406474591363e-05, + "loss": 0.3862, + "step": 169640 + }, + { + "epoch": 7.17, + "learning_rate": 8.279166401662108e-05, + "loss": 0.3641, + "step": 169650 + }, + { + "epoch": 7.17, + "learning_rate": 8.274926328732855e-05, + "loss": 0.3589, + "step": 169660 + }, + { + "epoch": 7.18, + "learning_rate": 8.2706862558036e-05, + "loss": 0.451, + "step": 169670 + }, + { + "epoch": 7.18, + "learning_rate": 8.266446182874345e-05, + "loss": 0.4004, + "step": 169680 + }, + { + "epoch": 7.18, + "learning_rate": 8.262206109945091e-05, + "loss": 0.3653, + "step": 169690 + }, + { + "epoch": 7.18, + "learning_rate": 8.257966037015837e-05, + "loss": 0.3105, + "step": 169700 + }, + { + "epoch": 7.18, + "learning_rate": 8.253725964086583e-05, + "loss": 0.378, + "step": 169710 + }, + { + "epoch": 7.18, + "learning_rate": 8.249485891157328e-05, + "loss": 0.3644, + "step": 169720 + }, + { + "epoch": 7.18, + "learning_rate": 8.245245818228073e-05, + "loss": 0.3662, + "step": 169730 + }, + { + "epoch": 7.18, + "learning_rate": 8.24100574529882e-05, + "loss": 0.386, + "step": 169740 + }, + { + "epoch": 7.18, + "learning_rate": 8.236765672369565e-05, + "loss": 0.3114, + "step": 169750 + }, + { + "epoch": 7.18, + "learning_rate": 8.232525599440311e-05, + "loss": 0.3947, + "step": 169760 + }, + { + "epoch": 7.18, + "learning_rate": 8.228285526511056e-05, + "loss": 0.3149, + "step": 169770 + }, + { + "epoch": 7.18, + "learning_rate": 8.224045453581803e-05, + "loss": 0.3703, + "step": 169780 + }, + { + "epoch": 7.18, + "learning_rate": 8.219805380652548e-05, + "loss": 0.3626, + "step": 169790 + }, + { + "epoch": 7.18, + "learning_rate": 8.215565307723293e-05, + "loss": 0.4188, + "step": 169800 + }, + { + "epoch": 7.18, + "learning_rate": 8.211325234794039e-05, + "loss": 0.4035, + "step": 169810 + }, + { + "epoch": 7.18, + "learning_rate": 8.207085161864785e-05, + "loss": 0.417, + "step": 169820 + }, + { + "epoch": 7.18, + "learning_rate": 8.20284508893553e-05, + "loss": 0.3851, + "step": 169830 + }, + { + "epoch": 7.18, + "learning_rate": 8.198605016006276e-05, + "loss": 0.3381, + "step": 169840 + }, + { + "epoch": 7.18, + "learning_rate": 8.194364943077021e-05, + "loss": 0.3568, + "step": 169850 + }, + { + "epoch": 7.18, + "learning_rate": 8.190124870147767e-05, + "loss": 0.3612, + "step": 169860 + }, + { + "epoch": 7.18, + "learning_rate": 8.185884797218513e-05, + "loss": 0.4116, + "step": 169870 + }, + { + "epoch": 7.18, + "learning_rate": 8.181644724289259e-05, + "loss": 0.3835, + "step": 169880 + }, + { + "epoch": 7.18, + "learning_rate": 8.177404651360004e-05, + "loss": 0.4632, + "step": 169890 + }, + { + "epoch": 7.18, + "learning_rate": 8.173164578430749e-05, + "loss": 0.368, + "step": 169900 + }, + { + "epoch": 7.19, + "learning_rate": 8.168924505501495e-05, + "loss": 0.3429, + "step": 169910 + }, + { + "epoch": 7.19, + "learning_rate": 8.16468443257224e-05, + "loss": 0.436, + "step": 169920 + }, + { + "epoch": 7.19, + "learning_rate": 8.160444359642985e-05, + "loss": 0.3992, + "step": 169930 + }, + { + "epoch": 7.19, + "learning_rate": 8.156204286713732e-05, + "loss": 0.3543, + "step": 169940 + }, + { + "epoch": 7.19, + "learning_rate": 8.151964213784477e-05, + "loss": 0.3592, + "step": 169950 + }, + { + "epoch": 7.19, + "learning_rate": 8.147724140855223e-05, + "loss": 0.2943, + "step": 169960 + }, + { + "epoch": 7.19, + "learning_rate": 8.143484067925968e-05, + "loss": 0.3595, + "step": 169970 + }, + { + "epoch": 7.19, + "learning_rate": 8.139243994996715e-05, + "loss": 0.4172, + "step": 169980 + }, + { + "epoch": 7.19, + "learning_rate": 8.13500392206746e-05, + "loss": 0.3513, + "step": 169990 + }, + { + "epoch": 7.19, + "learning_rate": 8.130763849138205e-05, + "loss": 0.4688, + "step": 170000 + }, + { + "epoch": 7.19, + "eval_loss": 0.6409904956817627, + "eval_runtime": 337.7795, + "eval_samples_per_second": 15.557, + "eval_steps_per_second": 3.89, + "step": 170000 + }, + { + "epoch": 7.19, + "learning_rate": 8.12652377620895e-05, + "loss": 0.3743, + "step": 170010 + }, + { + "epoch": 7.19, + "learning_rate": 8.122283703279697e-05, + "loss": 0.3994, + "step": 170020 + }, + { + "epoch": 7.19, + "learning_rate": 8.118043630350443e-05, + "loss": 0.4012, + "step": 170030 + }, + { + "epoch": 7.19, + "learning_rate": 8.113803557421188e-05, + "loss": 0.4289, + "step": 170040 + }, + { + "epoch": 7.19, + "learning_rate": 8.109563484491933e-05, + "loss": 0.4133, + "step": 170050 + }, + { + "epoch": 7.19, + "learning_rate": 8.10532341156268e-05, + "loss": 0.4181, + "step": 170060 + }, + { + "epoch": 7.19, + "learning_rate": 8.101083338633425e-05, + "loss": 0.4194, + "step": 170070 + }, + { + "epoch": 7.19, + "learning_rate": 8.09684326570417e-05, + "loss": 0.3584, + "step": 170080 + }, + { + "epoch": 7.19, + "learning_rate": 8.092603192774916e-05, + "loss": 0.4197, + "step": 170090 + }, + { + "epoch": 7.19, + "learning_rate": 8.088363119845663e-05, + "loss": 0.3714, + "step": 170100 + }, + { + "epoch": 7.19, + "learning_rate": 8.084123046916408e-05, + "loss": 0.3981, + "step": 170110 + }, + { + "epoch": 7.19, + "learning_rate": 8.079882973987153e-05, + "loss": 0.3119, + "step": 170120 + }, + { + "epoch": 7.19, + "learning_rate": 8.075642901057898e-05, + "loss": 0.433, + "step": 170130 + }, + { + "epoch": 7.19, + "learning_rate": 8.071402828128645e-05, + "loss": 0.4269, + "step": 170140 + }, + { + "epoch": 7.2, + "learning_rate": 8.06716275519939e-05, + "loss": 0.409, + "step": 170150 + }, + { + "epoch": 7.2, + "learning_rate": 8.062922682270136e-05, + "loss": 0.4083, + "step": 170160 + }, + { + "epoch": 7.2, + "learning_rate": 8.058682609340881e-05, + "loss": 0.2996, + "step": 170170 + }, + { + "epoch": 7.2, + "learning_rate": 8.054442536411628e-05, + "loss": 0.3121, + "step": 170180 + }, + { + "epoch": 7.2, + "learning_rate": 8.050202463482373e-05, + "loss": 0.3983, + "step": 170190 + }, + { + "epoch": 7.2, + "learning_rate": 8.045962390553118e-05, + "loss": 0.3894, + "step": 170200 + }, + { + "epoch": 7.2, + "learning_rate": 8.041722317623864e-05, + "loss": 0.3806, + "step": 170210 + }, + { + "epoch": 7.2, + "learning_rate": 8.037482244694609e-05, + "loss": 0.3836, + "step": 170220 + }, + { + "epoch": 7.2, + "learning_rate": 8.033242171765354e-05, + "loss": 0.3592, + "step": 170230 + }, + { + "epoch": 7.2, + "learning_rate": 8.0290020988361e-05, + "loss": 0.4927, + "step": 170240 + }, + { + "epoch": 7.2, + "learning_rate": 8.024762025906845e-05, + "loss": 0.4103, + "step": 170250 + }, + { + "epoch": 7.2, + "learning_rate": 8.020521952977592e-05, + "loss": 0.372, + "step": 170260 + }, + { + "epoch": 7.2, + "learning_rate": 8.016281880048337e-05, + "loss": 0.3087, + "step": 170270 + }, + { + "epoch": 7.2, + "learning_rate": 8.012041807119082e-05, + "loss": 0.3686, + "step": 170280 + }, + { + "epoch": 7.2, + "learning_rate": 8.007801734189828e-05, + "loss": 0.4004, + "step": 170290 + }, + { + "epoch": 7.2, + "learning_rate": 8.003561661260574e-05, + "loss": 0.3754, + "step": 170300 + }, + { + "epoch": 7.2, + "learning_rate": 7.99932158833132e-05, + "loss": 0.3689, + "step": 170310 + }, + { + "epoch": 7.2, + "learning_rate": 7.995081515402065e-05, + "loss": 0.3984, + "step": 170320 + }, + { + "epoch": 7.2, + "learning_rate": 7.99084144247281e-05, + "loss": 0.3923, + "step": 170330 + }, + { + "epoch": 7.2, + "learning_rate": 7.986601369543556e-05, + "loss": 0.3257, + "step": 170340 + }, + { + "epoch": 7.2, + "learning_rate": 7.982361296614302e-05, + "loss": 0.3839, + "step": 170350 + }, + { + "epoch": 7.2, + "learning_rate": 7.978121223685048e-05, + "loss": 0.3412, + "step": 170360 + }, + { + "epoch": 7.2, + "learning_rate": 7.973881150755793e-05, + "loss": 0.3416, + "step": 170370 + }, + { + "epoch": 7.21, + "learning_rate": 7.969641077826538e-05, + "loss": 0.4021, + "step": 170380 + }, + { + "epoch": 7.21, + "learning_rate": 7.965401004897285e-05, + "loss": 0.4112, + "step": 170390 + }, + { + "epoch": 7.21, + "learning_rate": 7.96116093196803e-05, + "loss": 0.3572, + "step": 170400 + }, + { + "epoch": 7.21, + "learning_rate": 7.956920859038776e-05, + "loss": 0.3205, + "step": 170410 + }, + { + "epoch": 7.21, + "learning_rate": 7.952680786109521e-05, + "loss": 0.3719, + "step": 170420 + }, + { + "epoch": 7.21, + "learning_rate": 7.948440713180268e-05, + "loss": 0.3753, + "step": 170430 + }, + { + "epoch": 7.21, + "learning_rate": 7.944200640251013e-05, + "loss": 0.2839, + "step": 170440 + }, + { + "epoch": 7.21, + "learning_rate": 7.939960567321758e-05, + "loss": 0.3807, + "step": 170450 + }, + { + "epoch": 7.21, + "learning_rate": 7.935720494392504e-05, + "loss": 0.3959, + "step": 170460 + }, + { + "epoch": 7.21, + "learning_rate": 7.93148042146325e-05, + "loss": 0.4145, + "step": 170470 + }, + { + "epoch": 7.21, + "learning_rate": 7.927240348533996e-05, + "loss": 0.4156, + "step": 170480 + }, + { + "epoch": 7.21, + "learning_rate": 7.923000275604741e-05, + "loss": 0.3069, + "step": 170490 + }, + { + "epoch": 7.21, + "learning_rate": 7.918760202675486e-05, + "loss": 0.416, + "step": 170500 + }, + { + "epoch": 7.21, + "learning_rate": 7.914520129746233e-05, + "loss": 0.419, + "step": 170510 + }, + { + "epoch": 7.21, + "learning_rate": 7.910280056816978e-05, + "loss": 0.3591, + "step": 170520 + }, + { + "epoch": 7.21, + "learning_rate": 7.906039983887724e-05, + "loss": 0.4125, + "step": 170530 + }, + { + "epoch": 7.21, + "learning_rate": 7.901799910958469e-05, + "loss": 0.3318, + "step": 170540 + }, + { + "epoch": 7.21, + "learning_rate": 7.897559838029214e-05, + "loss": 0.356, + "step": 170550 + }, + { + "epoch": 7.21, + "learning_rate": 7.89331976509996e-05, + "loss": 0.3338, + "step": 170560 + }, + { + "epoch": 7.21, + "learning_rate": 7.889079692170705e-05, + "loss": 0.4691, + "step": 170570 + }, + { + "epoch": 7.21, + "learning_rate": 7.88483961924145e-05, + "loss": 0.4429, + "step": 170580 + }, + { + "epoch": 7.21, + "learning_rate": 7.880599546312197e-05, + "loss": 0.4111, + "step": 170590 + }, + { + "epoch": 7.21, + "learning_rate": 7.876359473382942e-05, + "loss": 0.4285, + "step": 170600 + }, + { + "epoch": 7.21, + "learning_rate": 7.872119400453688e-05, + "loss": 0.3986, + "step": 170610 + }, + { + "epoch": 7.22, + "learning_rate": 7.867879327524433e-05, + "loss": 0.4314, + "step": 170620 + }, + { + "epoch": 7.22, + "learning_rate": 7.86363925459518e-05, + "loss": 0.3045, + "step": 170630 + }, + { + "epoch": 7.22, + "learning_rate": 7.859399181665925e-05, + "loss": 0.4099, + "step": 170640 + }, + { + "epoch": 7.22, + "learning_rate": 7.85515910873667e-05, + "loss": 0.387, + "step": 170650 + }, + { + "epoch": 7.22, + "learning_rate": 7.850919035807416e-05, + "loss": 0.3686, + "step": 170660 + }, + { + "epoch": 7.22, + "learning_rate": 7.846678962878162e-05, + "loss": 0.3773, + "step": 170670 + }, + { + "epoch": 7.22, + "learning_rate": 7.842438889948908e-05, + "loss": 0.411, + "step": 170680 + }, + { + "epoch": 7.22, + "learning_rate": 7.838198817019653e-05, + "loss": 0.3584, + "step": 170690 + }, + { + "epoch": 7.22, + "learning_rate": 7.833958744090398e-05, + "loss": 0.3684, + "step": 170700 + }, + { + "epoch": 7.22, + "learning_rate": 7.829718671161145e-05, + "loss": 0.3248, + "step": 170710 + }, + { + "epoch": 7.22, + "learning_rate": 7.82547859823189e-05, + "loss": 0.3286, + "step": 170720 + }, + { + "epoch": 7.22, + "learning_rate": 7.821238525302636e-05, + "loss": 0.3386, + "step": 170730 + }, + { + "epoch": 7.22, + "learning_rate": 7.816998452373381e-05, + "loss": 0.3854, + "step": 170740 + }, + { + "epoch": 7.22, + "learning_rate": 7.812758379444128e-05, + "loss": 0.3151, + "step": 170750 + }, + { + "epoch": 7.22, + "learning_rate": 7.808518306514873e-05, + "loss": 0.3341, + "step": 170760 + }, + { + "epoch": 7.22, + "learning_rate": 7.804278233585618e-05, + "loss": 0.3462, + "step": 170770 + }, + { + "epoch": 7.22, + "learning_rate": 7.800038160656364e-05, + "loss": 0.378, + "step": 170780 + }, + { + "epoch": 7.22, + "learning_rate": 7.79579808772711e-05, + "loss": 0.3625, + "step": 170790 + }, + { + "epoch": 7.22, + "learning_rate": 7.791558014797856e-05, + "loss": 0.4656, + "step": 170800 + }, + { + "epoch": 7.22, + "learning_rate": 7.787317941868601e-05, + "loss": 0.4071, + "step": 170810 + }, + { + "epoch": 7.22, + "learning_rate": 7.783077868939346e-05, + "loss": 0.3687, + "step": 170820 + }, + { + "epoch": 7.22, + "learning_rate": 7.778837796010093e-05, + "loss": 0.3901, + "step": 170830 + }, + { + "epoch": 7.22, + "learning_rate": 7.774597723080838e-05, + "loss": 0.3837, + "step": 170840 + }, + { + "epoch": 7.23, + "learning_rate": 7.770357650151583e-05, + "loss": 0.3887, + "step": 170850 + }, + { + "epoch": 7.23, + "learning_rate": 7.766117577222329e-05, + "loss": 0.4009, + "step": 170860 + }, + { + "epoch": 7.23, + "learning_rate": 7.761877504293074e-05, + "loss": 0.3983, + "step": 170870 + }, + { + "epoch": 7.23, + "learning_rate": 7.75763743136382e-05, + "loss": 0.3567, + "step": 170880 + }, + { + "epoch": 7.23, + "learning_rate": 7.753397358434565e-05, + "loss": 0.4064, + "step": 170890 + }, + { + "epoch": 7.23, + "learning_rate": 7.74915728550531e-05, + "loss": 0.434, + "step": 170900 + }, + { + "epoch": 7.23, + "learning_rate": 7.744917212576057e-05, + "loss": 0.3693, + "step": 170910 + }, + { + "epoch": 7.23, + "learning_rate": 7.740677139646802e-05, + "loss": 0.3687, + "step": 170920 + }, + { + "epoch": 7.23, + "learning_rate": 7.736437066717547e-05, + "loss": 0.3681, + "step": 170930 + }, + { + "epoch": 7.23, + "learning_rate": 7.732196993788293e-05, + "loss": 0.4438, + "step": 170940 + }, + { + "epoch": 7.23, + "learning_rate": 7.72795692085904e-05, + "loss": 0.3637, + "step": 170950 + }, + { + "epoch": 7.23, + "learning_rate": 7.723716847929785e-05, + "loss": 0.352, + "step": 170960 + }, + { + "epoch": 7.23, + "learning_rate": 7.71947677500053e-05, + "loss": 0.3578, + "step": 170970 + }, + { + "epoch": 7.23, + "learning_rate": 7.715236702071275e-05, + "loss": 0.368, + "step": 170980 + }, + { + "epoch": 7.23, + "learning_rate": 7.710996629142022e-05, + "loss": 0.3545, + "step": 170990 + }, + { + "epoch": 7.23, + "learning_rate": 7.706756556212767e-05, + "loss": 0.3276, + "step": 171000 + }, + { + "epoch": 7.23, + "learning_rate": 7.702516483283513e-05, + "loss": 0.4462, + "step": 171010 + }, + { + "epoch": 7.23, + "learning_rate": 7.698276410354258e-05, + "loss": 0.3985, + "step": 171020 + }, + { + "epoch": 7.23, + "learning_rate": 7.694036337425005e-05, + "loss": 0.3256, + "step": 171030 + }, + { + "epoch": 7.23, + "learning_rate": 7.68979626449575e-05, + "loss": 0.396, + "step": 171040 + }, + { + "epoch": 7.23, + "learning_rate": 7.685556191566495e-05, + "loss": 0.3943, + "step": 171050 + }, + { + "epoch": 7.23, + "learning_rate": 7.681316118637241e-05, + "loss": 0.3621, + "step": 171060 + }, + { + "epoch": 7.23, + "learning_rate": 7.677076045707986e-05, + "loss": 0.3588, + "step": 171070 + }, + { + "epoch": 7.23, + "learning_rate": 7.672835972778733e-05, + "loss": 0.3215, + "step": 171080 + }, + { + "epoch": 7.24, + "learning_rate": 7.668595899849478e-05, + "loss": 0.3224, + "step": 171090 + }, + { + "epoch": 7.24, + "learning_rate": 7.664355826920223e-05, + "loss": 0.3853, + "step": 171100 + }, + { + "epoch": 7.24, + "learning_rate": 7.660115753990969e-05, + "loss": 0.3481, + "step": 171110 + }, + { + "epoch": 7.24, + "learning_rate": 7.655875681061715e-05, + "loss": 0.419, + "step": 171120 + }, + { + "epoch": 7.24, + "learning_rate": 7.65163560813246e-05, + "loss": 0.4104, + "step": 171130 + }, + { + "epoch": 7.24, + "learning_rate": 7.647395535203206e-05, + "loss": 0.3409, + "step": 171140 + }, + { + "epoch": 7.24, + "learning_rate": 7.643155462273951e-05, + "loss": 0.329, + "step": 171150 + }, + { + "epoch": 7.24, + "learning_rate": 7.638915389344698e-05, + "loss": 0.3721, + "step": 171160 + }, + { + "epoch": 7.24, + "learning_rate": 7.634675316415443e-05, + "loss": 0.2699, + "step": 171170 + }, + { + "epoch": 7.24, + "learning_rate": 7.630435243486189e-05, + "loss": 0.3801, + "step": 171180 + }, + { + "epoch": 7.24, + "learning_rate": 7.626195170556934e-05, + "loss": 0.3383, + "step": 171190 + }, + { + "epoch": 7.24, + "learning_rate": 7.621955097627679e-05, + "loss": 0.465, + "step": 171200 + }, + { + "epoch": 7.24, + "learning_rate": 7.617715024698425e-05, + "loss": 0.4145, + "step": 171210 + }, + { + "epoch": 7.24, + "learning_rate": 7.61347495176917e-05, + "loss": 0.3599, + "step": 171220 + }, + { + "epoch": 7.24, + "learning_rate": 7.609234878839915e-05, + "loss": 0.392, + "step": 171230 + }, + { + "epoch": 7.24, + "learning_rate": 7.604994805910662e-05, + "loss": 0.3437, + "step": 171240 + }, + { + "epoch": 7.24, + "learning_rate": 7.600754732981407e-05, + "loss": 0.4092, + "step": 171250 + }, + { + "epoch": 7.24, + "learning_rate": 7.596514660052153e-05, + "loss": 0.3389, + "step": 171260 + }, + { + "epoch": 7.24, + "learning_rate": 7.592274587122898e-05, + "loss": 0.3992, + "step": 171270 + }, + { + "epoch": 7.24, + "learning_rate": 7.588034514193645e-05, + "loss": 0.361, + "step": 171280 + }, + { + "epoch": 7.24, + "learning_rate": 7.58379444126439e-05, + "loss": 0.3174, + "step": 171290 + }, + { + "epoch": 7.24, + "learning_rate": 7.579554368335135e-05, + "loss": 0.3782, + "step": 171300 + }, + { + "epoch": 7.24, + "learning_rate": 7.57531429540588e-05, + "loss": 0.3276, + "step": 171310 + }, + { + "epoch": 7.24, + "learning_rate": 7.571074222476627e-05, + "loss": 0.3997, + "step": 171320 + }, + { + "epoch": 7.25, + "learning_rate": 7.566834149547373e-05, + "loss": 0.4079, + "step": 171330 + }, + { + "epoch": 7.25, + "learning_rate": 7.562594076618118e-05, + "loss": 0.405, + "step": 171340 + }, + { + "epoch": 7.25, + "learning_rate": 7.558354003688863e-05, + "loss": 0.4079, + "step": 171350 + }, + { + "epoch": 7.25, + "learning_rate": 7.55411393075961e-05, + "loss": 0.3557, + "step": 171360 + }, + { + "epoch": 7.25, + "learning_rate": 7.549873857830355e-05, + "loss": 0.374, + "step": 171370 + }, + { + "epoch": 7.25, + "learning_rate": 7.5456337849011e-05, + "loss": 0.3766, + "step": 171380 + }, + { + "epoch": 7.25, + "learning_rate": 7.541393711971846e-05, + "loss": 0.4272, + "step": 171390 + }, + { + "epoch": 7.25, + "learning_rate": 7.537153639042593e-05, + "loss": 0.402, + "step": 171400 + }, + { + "epoch": 7.25, + "learning_rate": 7.532913566113338e-05, + "loss": 0.3716, + "step": 171410 + }, + { + "epoch": 7.25, + "learning_rate": 7.528673493184083e-05, + "loss": 0.3726, + "step": 171420 + }, + { + "epoch": 7.25, + "learning_rate": 7.524433420254829e-05, + "loss": 0.413, + "step": 171430 + }, + { + "epoch": 7.25, + "learning_rate": 7.520193347325575e-05, + "loss": 0.3826, + "step": 171440 + }, + { + "epoch": 7.25, + "learning_rate": 7.51595327439632e-05, + "loss": 0.3967, + "step": 171450 + }, + { + "epoch": 7.25, + "learning_rate": 7.511713201467066e-05, + "loss": 0.3142, + "step": 171460 + }, + { + "epoch": 7.25, + "learning_rate": 7.507473128537811e-05, + "loss": 0.3711, + "step": 171470 + }, + { + "epoch": 7.25, + "learning_rate": 7.503233055608558e-05, + "loss": 0.3387, + "step": 171480 + }, + { + "epoch": 7.25, + "learning_rate": 7.498992982679303e-05, + "loss": 0.3564, + "step": 171490 + }, + { + "epoch": 7.25, + "learning_rate": 7.494752909750048e-05, + "loss": 0.3998, + "step": 171500 + }, + { + "epoch": 7.25, + "learning_rate": 7.490512836820794e-05, + "loss": 0.3978, + "step": 171510 + }, + { + "epoch": 7.25, + "learning_rate": 7.486272763891539e-05, + "loss": 0.3881, + "step": 171520 + }, + { + "epoch": 7.25, + "learning_rate": 7.482032690962284e-05, + "loss": 0.4302, + "step": 171530 + }, + { + "epoch": 7.25, + "learning_rate": 7.47779261803303e-05, + "loss": 0.393, + "step": 171540 + }, + { + "epoch": 7.25, + "learning_rate": 7.473552545103775e-05, + "loss": 0.4053, + "step": 171550 + }, + { + "epoch": 7.26, + "learning_rate": 7.469312472174522e-05, + "loss": 0.4151, + "step": 171560 + }, + { + "epoch": 7.26, + "learning_rate": 7.465072399245267e-05, + "loss": 0.3574, + "step": 171570 + }, + { + "epoch": 7.26, + "learning_rate": 7.460832326316012e-05, + "loss": 0.3773, + "step": 171580 + }, + { + "epoch": 7.26, + "learning_rate": 7.456592253386758e-05, + "loss": 0.3778, + "step": 171590 + }, + { + "epoch": 7.26, + "learning_rate": 7.452352180457504e-05, + "loss": 0.3442, + "step": 171600 + }, + { + "epoch": 7.26, + "learning_rate": 7.44811210752825e-05, + "loss": 0.3802, + "step": 171610 + }, + { + "epoch": 7.26, + "learning_rate": 7.443872034598995e-05, + "loss": 0.3441, + "step": 171620 + }, + { + "epoch": 7.26, + "learning_rate": 7.43963196166974e-05, + "loss": 0.3487, + "step": 171630 + }, + { + "epoch": 7.26, + "learning_rate": 7.435391888740487e-05, + "loss": 0.3621, + "step": 171640 + }, + { + "epoch": 7.26, + "learning_rate": 7.431151815811232e-05, + "loss": 0.3403, + "step": 171650 + }, + { + "epoch": 7.26, + "learning_rate": 7.426911742881978e-05, + "loss": 0.3909, + "step": 171660 + }, + { + "epoch": 7.26, + "learning_rate": 7.422671669952723e-05, + "loss": 0.3029, + "step": 171670 + }, + { + "epoch": 7.26, + "learning_rate": 7.41843159702347e-05, + "loss": 0.382, + "step": 171680 + }, + { + "epoch": 7.26, + "learning_rate": 7.414191524094215e-05, + "loss": 0.383, + "step": 171690 + }, + { + "epoch": 7.26, + "learning_rate": 7.40995145116496e-05, + "loss": 0.3499, + "step": 171700 + }, + { + "epoch": 7.26, + "learning_rate": 7.405711378235706e-05, + "loss": 0.4027, + "step": 171710 + }, + { + "epoch": 7.26, + "learning_rate": 7.401471305306452e-05, + "loss": 0.3815, + "step": 171720 + }, + { + "epoch": 7.26, + "learning_rate": 7.397231232377198e-05, + "loss": 0.3601, + "step": 171730 + }, + { + "epoch": 7.26, + "learning_rate": 7.392991159447943e-05, + "loss": 0.3923, + "step": 171740 + }, + { + "epoch": 7.26, + "learning_rate": 7.388751086518688e-05, + "loss": 0.3103, + "step": 171750 + }, + { + "epoch": 7.26, + "learning_rate": 7.384511013589435e-05, + "loss": 0.3598, + "step": 171760 + }, + { + "epoch": 7.26, + "learning_rate": 7.38027094066018e-05, + "loss": 0.3936, + "step": 171770 + }, + { + "epoch": 7.26, + "learning_rate": 7.376030867730926e-05, + "loss": 0.3, + "step": 171780 + }, + { + "epoch": 7.26, + "learning_rate": 7.371790794801671e-05, + "loss": 0.3332, + "step": 171790 + }, + { + "epoch": 7.27, + "learning_rate": 7.367550721872416e-05, + "loss": 0.3595, + "step": 171800 + }, + { + "epoch": 7.27, + "learning_rate": 7.363310648943163e-05, + "loss": 0.3371, + "step": 171810 + }, + { + "epoch": 7.27, + "learning_rate": 7.359070576013908e-05, + "loss": 0.4136, + "step": 171820 + }, + { + "epoch": 7.27, + "learning_rate": 7.354830503084654e-05, + "loss": 0.3537, + "step": 171830 + }, + { + "epoch": 7.27, + "learning_rate": 7.350590430155399e-05, + "loss": 0.3894, + "step": 171840 + }, + { + "epoch": 7.27, + "learning_rate": 7.346350357226144e-05, + "loss": 0.3667, + "step": 171850 + }, + { + "epoch": 7.27, + "learning_rate": 7.34211028429689e-05, + "loss": 0.3133, + "step": 171860 + }, + { + "epoch": 7.27, + "learning_rate": 7.337870211367636e-05, + "loss": 0.4134, + "step": 171870 + }, + { + "epoch": 7.27, + "learning_rate": 7.333630138438382e-05, + "loss": 0.3117, + "step": 171880 + }, + { + "epoch": 7.27, + "learning_rate": 7.329390065509127e-05, + "loss": 0.3367, + "step": 171890 + }, + { + "epoch": 7.27, + "learning_rate": 7.325149992579872e-05, + "loss": 0.401, + "step": 171900 + }, + { + "epoch": 7.27, + "learning_rate": 7.320909919650618e-05, + "loss": 0.3853, + "step": 171910 + }, + { + "epoch": 7.27, + "learning_rate": 7.316669846721363e-05, + "loss": 0.3794, + "step": 171920 + }, + { + "epoch": 7.27, + "learning_rate": 7.31242977379211e-05, + "loss": 0.324, + "step": 171930 + }, + { + "epoch": 7.27, + "learning_rate": 7.308189700862855e-05, + "loss": 0.3347, + "step": 171940 + }, + { + "epoch": 7.27, + "learning_rate": 7.3039496279336e-05, + "loss": 0.3859, + "step": 171950 + }, + { + "epoch": 7.27, + "learning_rate": 7.299709555004346e-05, + "loss": 0.3902, + "step": 171960 + }, + { + "epoch": 7.27, + "learning_rate": 7.295469482075092e-05, + "loss": 0.3681, + "step": 171970 + }, + { + "epoch": 7.27, + "learning_rate": 7.291229409145838e-05, + "loss": 0.3484, + "step": 171980 + }, + { + "epoch": 7.27, + "learning_rate": 7.286989336216583e-05, + "loss": 0.3982, + "step": 171990 + }, + { + "epoch": 7.27, + "learning_rate": 7.282749263287328e-05, + "loss": 0.3683, + "step": 172000 + }, + { + "epoch": 7.27, + "learning_rate": 7.278509190358075e-05, + "loss": 0.3707, + "step": 172010 + }, + { + "epoch": 7.27, + "learning_rate": 7.27426911742882e-05, + "loss": 0.3684, + "step": 172020 + }, + { + "epoch": 7.27, + "learning_rate": 7.270029044499566e-05, + "loss": 0.4124, + "step": 172030 + }, + { + "epoch": 7.28, + "learning_rate": 7.265788971570311e-05, + "loss": 0.3506, + "step": 172040 + }, + { + "epoch": 7.28, + "learning_rate": 7.261548898641058e-05, + "loss": 0.4148, + "step": 172050 + }, + { + "epoch": 7.28, + "learning_rate": 7.257308825711803e-05, + "loss": 0.3499, + "step": 172060 + }, + { + "epoch": 7.28, + "learning_rate": 7.253068752782548e-05, + "loss": 0.3862, + "step": 172070 + }, + { + "epoch": 7.28, + "learning_rate": 7.248828679853294e-05, + "loss": 0.3426, + "step": 172080 + }, + { + "epoch": 7.28, + "learning_rate": 7.24458860692404e-05, + "loss": 0.4725, + "step": 172090 + }, + { + "epoch": 7.28, + "learning_rate": 7.240348533994786e-05, + "loss": 0.3597, + "step": 172100 + }, + { + "epoch": 7.28, + "learning_rate": 7.236108461065531e-05, + "loss": 0.363, + "step": 172110 + }, + { + "epoch": 7.28, + "learning_rate": 7.231868388136276e-05, + "loss": 0.4152, + "step": 172120 + }, + { + "epoch": 7.28, + "learning_rate": 7.227628315207023e-05, + "loss": 0.3483, + "step": 172130 + }, + { + "epoch": 7.28, + "learning_rate": 7.223388242277768e-05, + "loss": 0.3556, + "step": 172140 + }, + { + "epoch": 7.28, + "learning_rate": 7.219148169348514e-05, + "loss": 0.3508, + "step": 172150 + }, + { + "epoch": 7.28, + "learning_rate": 7.214908096419259e-05, + "loss": 0.42, + "step": 172160 + }, + { + "epoch": 7.28, + "learning_rate": 7.210668023490004e-05, + "loss": 0.3519, + "step": 172170 + }, + { + "epoch": 7.28, + "learning_rate": 7.20642795056075e-05, + "loss": 0.3296, + "step": 172180 + }, + { + "epoch": 7.28, + "learning_rate": 7.202187877631496e-05, + "loss": 0.378, + "step": 172190 + }, + { + "epoch": 7.28, + "learning_rate": 7.197947804702241e-05, + "loss": 0.3099, + "step": 172200 + }, + { + "epoch": 7.28, + "learning_rate": 7.193707731772987e-05, + "loss": 0.3531, + "step": 172210 + }, + { + "epoch": 7.28, + "learning_rate": 7.189467658843732e-05, + "loss": 0.3529, + "step": 172220 + }, + { + "epoch": 7.28, + "learning_rate": 7.185227585914477e-05, + "loss": 0.346, + "step": 172230 + }, + { + "epoch": 7.28, + "learning_rate": 7.180987512985223e-05, + "loss": 0.3837, + "step": 172240 + }, + { + "epoch": 7.28, + "learning_rate": 7.17674744005597e-05, + "loss": 0.4271, + "step": 172250 + }, + { + "epoch": 7.28, + "learning_rate": 7.172507367126715e-05, + "loss": 0.35, + "step": 172260 + }, + { + "epoch": 7.29, + "learning_rate": 7.16826729419746e-05, + "loss": 0.4909, + "step": 172270 + }, + { + "epoch": 7.29, + "learning_rate": 7.164027221268205e-05, + "loss": 0.3565, + "step": 172280 + }, + { + "epoch": 7.29, + "learning_rate": 7.159787148338952e-05, + "loss": 0.4062, + "step": 172290 + }, + { + "epoch": 7.29, + "learning_rate": 7.155547075409697e-05, + "loss": 0.4308, + "step": 172300 + }, + { + "epoch": 7.29, + "learning_rate": 7.151307002480443e-05, + "loss": 0.4223, + "step": 172310 + }, + { + "epoch": 7.29, + "learning_rate": 7.147066929551188e-05, + "loss": 0.3274, + "step": 172320 + }, + { + "epoch": 7.29, + "learning_rate": 7.142826856621935e-05, + "loss": 0.368, + "step": 172330 + }, + { + "epoch": 7.29, + "learning_rate": 7.13858678369268e-05, + "loss": 0.4376, + "step": 172340 + }, + { + "epoch": 7.29, + "learning_rate": 7.134346710763425e-05, + "loss": 0.4027, + "step": 172350 + }, + { + "epoch": 7.29, + "learning_rate": 7.130106637834171e-05, + "loss": 0.3553, + "step": 172360 + }, + { + "epoch": 7.29, + "learning_rate": 7.125866564904917e-05, + "loss": 0.3649, + "step": 172370 + }, + { + "epoch": 7.29, + "learning_rate": 7.121626491975663e-05, + "loss": 0.3078, + "step": 172380 + }, + { + "epoch": 7.29, + "learning_rate": 7.117386419046408e-05, + "loss": 0.4552, + "step": 172390 + }, + { + "epoch": 7.29, + "learning_rate": 7.113146346117153e-05, + "loss": 0.4395, + "step": 172400 + }, + { + "epoch": 7.29, + "learning_rate": 7.1089062731879e-05, + "loss": 0.4239, + "step": 172410 + }, + { + "epoch": 7.29, + "learning_rate": 7.104666200258645e-05, + "loss": 0.3768, + "step": 172420 + }, + { + "epoch": 7.29, + "learning_rate": 7.100426127329391e-05, + "loss": 0.3263, + "step": 172430 + }, + { + "epoch": 7.29, + "learning_rate": 7.096186054400136e-05, + "loss": 0.3895, + "step": 172440 + }, + { + "epoch": 7.29, + "learning_rate": 7.091945981470883e-05, + "loss": 0.3807, + "step": 172450 + }, + { + "epoch": 7.29, + "learning_rate": 7.087705908541628e-05, + "loss": 0.4476, + "step": 172460 + }, + { + "epoch": 7.29, + "learning_rate": 7.083465835612373e-05, + "loss": 0.3645, + "step": 172470 + }, + { + "epoch": 7.29, + "learning_rate": 7.079225762683119e-05, + "loss": 0.4035, + "step": 172480 + }, + { + "epoch": 7.29, + "learning_rate": 7.074985689753864e-05, + "loss": 0.3538, + "step": 172490 + }, + { + "epoch": 7.29, + "learning_rate": 7.07074561682461e-05, + "loss": 0.3487, + "step": 172500 + }, + { + "epoch": 7.3, + "learning_rate": 7.066505543895356e-05, + "loss": 0.3608, + "step": 172510 + }, + { + "epoch": 7.3, + "learning_rate": 7.062265470966101e-05, + "loss": 0.3379, + "step": 172520 + }, + { + "epoch": 7.3, + "learning_rate": 7.058025398036847e-05, + "loss": 0.4122, + "step": 172530 + }, + { + "epoch": 7.3, + "learning_rate": 7.053785325107592e-05, + "loss": 0.3434, + "step": 172540 + }, + { + "epoch": 7.3, + "learning_rate": 7.049545252178337e-05, + "loss": 0.4031, + "step": 172550 + }, + { + "epoch": 7.3, + "learning_rate": 7.045305179249083e-05, + "loss": 0.3508, + "step": 172560 + }, + { + "epoch": 7.3, + "learning_rate": 7.041065106319828e-05, + "loss": 0.4131, + "step": 172570 + }, + { + "epoch": 7.3, + "learning_rate": 7.036825033390575e-05, + "loss": 0.3949, + "step": 172580 + }, + { + "epoch": 7.3, + "learning_rate": 7.03258496046132e-05, + "loss": 0.3846, + "step": 172590 + }, + { + "epoch": 7.3, + "learning_rate": 7.028344887532065e-05, + "loss": 0.4409, + "step": 172600 + }, + { + "epoch": 7.3, + "learning_rate": 7.02410481460281e-05, + "loss": 0.3305, + "step": 172610 + }, + { + "epoch": 7.3, + "learning_rate": 7.019864741673557e-05, + "loss": 0.4115, + "step": 172620 + }, + { + "epoch": 7.3, + "learning_rate": 7.015624668744303e-05, + "loss": 0.3268, + "step": 172630 + }, + { + "epoch": 7.3, + "learning_rate": 7.011384595815048e-05, + "loss": 0.3466, + "step": 172640 + }, + { + "epoch": 7.3, + "learning_rate": 7.007144522885793e-05, + "loss": 0.352, + "step": 172650 + }, + { + "epoch": 7.3, + "learning_rate": 7.00290444995654e-05, + "loss": 0.3175, + "step": 172660 + }, + { + "epoch": 7.3, + "learning_rate": 6.998664377027285e-05, + "loss": 0.4346, + "step": 172670 + }, + { + "epoch": 7.3, + "learning_rate": 6.99442430409803e-05, + "loss": 0.358, + "step": 172680 + }, + { + "epoch": 7.3, + "learning_rate": 6.990184231168776e-05, + "loss": 0.3724, + "step": 172690 + }, + { + "epoch": 7.3, + "learning_rate": 6.985944158239523e-05, + "loss": 0.3692, + "step": 172700 + }, + { + "epoch": 7.3, + "learning_rate": 6.981704085310268e-05, + "loss": 0.3441, + "step": 172710 + }, + { + "epoch": 7.3, + "learning_rate": 6.977464012381013e-05, + "loss": 0.4359, + "step": 172720 + }, + { + "epoch": 7.3, + "learning_rate": 6.973223939451759e-05, + "loss": 0.315, + "step": 172730 + }, + { + "epoch": 7.3, + "learning_rate": 6.968983866522505e-05, + "loss": 0.4402, + "step": 172740 + }, + { + "epoch": 7.31, + "learning_rate": 6.96474379359325e-05, + "loss": 0.3975, + "step": 172750 + }, + { + "epoch": 7.31, + "learning_rate": 6.960503720663996e-05, + "loss": 0.3618, + "step": 172760 + }, + { + "epoch": 7.31, + "learning_rate": 6.956263647734741e-05, + "loss": 0.438, + "step": 172770 + }, + { + "epoch": 7.31, + "learning_rate": 6.952023574805488e-05, + "loss": 0.3574, + "step": 172780 + }, + { + "epoch": 7.31, + "learning_rate": 6.947783501876233e-05, + "loss": 0.3462, + "step": 172790 + }, + { + "epoch": 7.31, + "learning_rate": 6.943543428946979e-05, + "loss": 0.3501, + "step": 172800 + }, + { + "epoch": 7.31, + "learning_rate": 6.939303356017724e-05, + "loss": 0.3997, + "step": 172810 + }, + { + "epoch": 7.31, + "learning_rate": 6.93506328308847e-05, + "loss": 0.3539, + "step": 172820 + }, + { + "epoch": 7.31, + "learning_rate": 6.930823210159216e-05, + "loss": 0.3753, + "step": 172830 + }, + { + "epoch": 7.31, + "learning_rate": 6.926583137229961e-05, + "loss": 0.401, + "step": 172840 + }, + { + "epoch": 7.31, + "learning_rate": 6.922343064300706e-05, + "loss": 0.4588, + "step": 172850 + }, + { + "epoch": 7.31, + "learning_rate": 6.918102991371452e-05, + "loss": 0.3881, + "step": 172860 + }, + { + "epoch": 7.31, + "learning_rate": 6.913862918442197e-05, + "loss": 0.322, + "step": 172870 + }, + { + "epoch": 7.31, + "learning_rate": 6.909622845512942e-05, + "loss": 0.4331, + "step": 172880 + }, + { + "epoch": 7.31, + "learning_rate": 6.905382772583688e-05, + "loss": 0.3793, + "step": 172890 + }, + { + "epoch": 7.31, + "learning_rate": 6.901142699654434e-05, + "loss": 0.381, + "step": 172900 + }, + { + "epoch": 7.31, + "learning_rate": 6.89690262672518e-05, + "loss": 0.3423, + "step": 172910 + }, + { + "epoch": 7.31, + "learning_rate": 6.892662553795925e-05, + "loss": 0.3895, + "step": 172920 + }, + { + "epoch": 7.31, + "learning_rate": 6.88842248086667e-05, + "loss": 0.3675, + "step": 172930 + }, + { + "epoch": 7.31, + "learning_rate": 6.884182407937417e-05, + "loss": 0.4274, + "step": 172940 + }, + { + "epoch": 7.31, + "learning_rate": 6.879942335008162e-05, + "loss": 0.4417, + "step": 172950 + }, + { + "epoch": 7.31, + "learning_rate": 6.875702262078908e-05, + "loss": 0.4222, + "step": 172960 + }, + { + "epoch": 7.31, + "learning_rate": 6.871462189149653e-05, + "loss": 0.3852, + "step": 172970 + }, + { + "epoch": 7.32, + "learning_rate": 6.8672221162204e-05, + "loss": 0.2801, + "step": 172980 + }, + { + "epoch": 7.32, + "learning_rate": 6.862982043291145e-05, + "loss": 0.376, + "step": 172990 + }, + { + "epoch": 7.32, + "learning_rate": 6.85874197036189e-05, + "loss": 0.4015, + "step": 173000 + }, + { + "epoch": 7.32, + "learning_rate": 6.854501897432636e-05, + "loss": 0.3521, + "step": 173010 + }, + { + "epoch": 7.32, + "learning_rate": 6.850261824503382e-05, + "loss": 0.3918, + "step": 173020 + }, + { + "epoch": 7.32, + "learning_rate": 6.846021751574128e-05, + "loss": 0.4287, + "step": 173030 + }, + { + "epoch": 7.32, + "learning_rate": 6.841781678644873e-05, + "loss": 0.366, + "step": 173040 + }, + { + "epoch": 7.32, + "learning_rate": 6.837541605715618e-05, + "loss": 0.3727, + "step": 173050 + }, + { + "epoch": 7.32, + "learning_rate": 6.833301532786365e-05, + "loss": 0.363, + "step": 173060 + }, + { + "epoch": 7.32, + "learning_rate": 6.82906145985711e-05, + "loss": 0.3568, + "step": 173070 + }, + { + "epoch": 7.32, + "learning_rate": 6.824821386927856e-05, + "loss": 0.4068, + "step": 173080 + }, + { + "epoch": 7.32, + "learning_rate": 6.820581313998601e-05, + "loss": 0.3891, + "step": 173090 + }, + { + "epoch": 7.32, + "learning_rate": 6.816341241069348e-05, + "loss": 0.3769, + "step": 173100 + }, + { + "epoch": 7.32, + "learning_rate": 6.812101168140093e-05, + "loss": 0.3891, + "step": 173110 + }, + { + "epoch": 7.32, + "learning_rate": 6.807861095210838e-05, + "loss": 0.3451, + "step": 173120 + }, + { + "epoch": 7.32, + "learning_rate": 6.803621022281584e-05, + "loss": 0.3525, + "step": 173130 + }, + { + "epoch": 7.32, + "learning_rate": 6.79938094935233e-05, + "loss": 0.3684, + "step": 173140 + }, + { + "epoch": 7.32, + "learning_rate": 6.795140876423076e-05, + "loss": 0.4081, + "step": 173150 + }, + { + "epoch": 7.32, + "learning_rate": 6.790900803493821e-05, + "loss": 0.3493, + "step": 173160 + }, + { + "epoch": 7.32, + "learning_rate": 6.786660730564566e-05, + "loss": 0.3457, + "step": 173170 + }, + { + "epoch": 7.32, + "learning_rate": 6.782420657635312e-05, + "loss": 0.399, + "step": 173180 + }, + { + "epoch": 7.32, + "learning_rate": 6.778180584706057e-05, + "loss": 0.3855, + "step": 173190 + }, + { + "epoch": 7.32, + "learning_rate": 6.773940511776802e-05, + "loss": 0.3548, + "step": 173200 + }, + { + "epoch": 7.32, + "learning_rate": 6.769700438847548e-05, + "loss": 0.4051, + "step": 173210 + }, + { + "epoch": 7.33, + "learning_rate": 6.765460365918293e-05, + "loss": 0.3812, + "step": 173220 + }, + { + "epoch": 7.33, + "learning_rate": 6.76122029298904e-05, + "loss": 0.4145, + "step": 173230 + }, + { + "epoch": 7.33, + "learning_rate": 6.756980220059785e-05, + "loss": 0.4225, + "step": 173240 + }, + { + "epoch": 7.33, + "learning_rate": 6.75274014713053e-05, + "loss": 0.3765, + "step": 173250 + }, + { + "epoch": 7.33, + "learning_rate": 6.748500074201276e-05, + "loss": 0.3869, + "step": 173260 + }, + { + "epoch": 7.33, + "learning_rate": 6.744260001272022e-05, + "loss": 0.4071, + "step": 173270 + }, + { + "epoch": 7.33, + "learning_rate": 6.740019928342768e-05, + "loss": 0.3194, + "step": 173280 + }, + { + "epoch": 7.33, + "learning_rate": 6.735779855413513e-05, + "loss": 0.3864, + "step": 173290 + }, + { + "epoch": 7.33, + "learning_rate": 6.731539782484258e-05, + "loss": 0.3122, + "step": 173300 + }, + { + "epoch": 7.33, + "learning_rate": 6.727299709555005e-05, + "loss": 0.4194, + "step": 173310 + }, + { + "epoch": 7.33, + "learning_rate": 6.72305963662575e-05, + "loss": 0.3644, + "step": 173320 + }, + { + "epoch": 7.33, + "learning_rate": 6.718819563696496e-05, + "loss": 0.4265, + "step": 173330 + }, + { + "epoch": 7.33, + "learning_rate": 6.714579490767241e-05, + "loss": 0.386, + "step": 173340 + }, + { + "epoch": 7.33, + "learning_rate": 6.710339417837988e-05, + "loss": 0.4148, + "step": 173350 + }, + { + "epoch": 7.33, + "learning_rate": 6.706099344908733e-05, + "loss": 0.3463, + "step": 173360 + }, + { + "epoch": 7.33, + "learning_rate": 6.701859271979478e-05, + "loss": 0.3471, + "step": 173370 + }, + { + "epoch": 7.33, + "learning_rate": 6.697619199050224e-05, + "loss": 0.3535, + "step": 173380 + }, + { + "epoch": 7.33, + "learning_rate": 6.69337912612097e-05, + "loss": 0.3487, + "step": 173390 + }, + { + "epoch": 7.33, + "learning_rate": 6.689139053191716e-05, + "loss": 0.4342, + "step": 173400 + }, + { + "epoch": 7.33, + "learning_rate": 6.684898980262461e-05, + "loss": 0.4685, + "step": 173410 + }, + { + "epoch": 7.33, + "learning_rate": 6.680658907333206e-05, + "loss": 0.4091, + "step": 173420 + }, + { + "epoch": 7.33, + "learning_rate": 6.676418834403953e-05, + "loss": 0.4086, + "step": 173430 + }, + { + "epoch": 7.33, + "learning_rate": 6.672178761474698e-05, + "loss": 0.3555, + "step": 173440 + }, + { + "epoch": 7.33, + "learning_rate": 6.667938688545444e-05, + "loss": 0.367, + "step": 173450 + }, + { + "epoch": 7.34, + "learning_rate": 6.663698615616189e-05, + "loss": 0.4097, + "step": 173460 + }, + { + "epoch": 7.34, + "learning_rate": 6.659458542686936e-05, + "loss": 0.3858, + "step": 173470 + }, + { + "epoch": 7.34, + "learning_rate": 6.655218469757681e-05, + "loss": 0.3607, + "step": 173480 + }, + { + "epoch": 7.34, + "learning_rate": 6.650978396828426e-05, + "loss": 0.3548, + "step": 173490 + }, + { + "epoch": 7.34, + "learning_rate": 6.646738323899171e-05, + "loss": 0.3247, + "step": 173500 + }, + { + "epoch": 7.34, + "learning_rate": 6.642498250969917e-05, + "loss": 0.3973, + "step": 173510 + }, + { + "epoch": 7.34, + "learning_rate": 6.638258178040662e-05, + "loss": 0.3887, + "step": 173520 + }, + { + "epoch": 7.34, + "learning_rate": 6.634018105111407e-05, + "loss": 0.3555, + "step": 173530 + }, + { + "epoch": 7.34, + "learning_rate": 6.629778032182153e-05, + "loss": 0.3292, + "step": 173540 + }, + { + "epoch": 7.34, + "learning_rate": 6.6255379592529e-05, + "loss": 0.3494, + "step": 173550 + }, + { + "epoch": 7.34, + "learning_rate": 6.621297886323645e-05, + "loss": 0.3725, + "step": 173560 + }, + { + "epoch": 7.34, + "learning_rate": 6.61705781339439e-05, + "loss": 0.383, + "step": 173570 + }, + { + "epoch": 7.34, + "learning_rate": 6.612817740465135e-05, + "loss": 0.4238, + "step": 173580 + }, + { + "epoch": 7.34, + "learning_rate": 6.608577667535882e-05, + "loss": 0.3906, + "step": 173590 + }, + { + "epoch": 7.34, + "learning_rate": 6.604337594606627e-05, + "loss": 0.3269, + "step": 173600 + }, + { + "epoch": 7.34, + "learning_rate": 6.600097521677373e-05, + "loss": 0.3748, + "step": 173610 + }, + { + "epoch": 7.34, + "learning_rate": 6.595857448748118e-05, + "loss": 0.4058, + "step": 173620 + }, + { + "epoch": 7.34, + "learning_rate": 6.591617375818865e-05, + "loss": 0.4392, + "step": 173630 + }, + { + "epoch": 7.34, + "learning_rate": 6.58737730288961e-05, + "loss": 0.3932, + "step": 173640 + }, + { + "epoch": 7.34, + "learning_rate": 6.583137229960355e-05, + "loss": 0.3973, + "step": 173650 + }, + { + "epoch": 7.34, + "learning_rate": 6.578897157031101e-05, + "loss": 0.4611, + "step": 173660 + }, + { + "epoch": 7.34, + "learning_rate": 6.574657084101847e-05, + "loss": 0.4586, + "step": 173670 + }, + { + "epoch": 7.34, + "learning_rate": 6.570417011172593e-05, + "loss": 0.3624, + "step": 173680 + }, + { + "epoch": 7.35, + "learning_rate": 6.566176938243338e-05, + "loss": 0.3708, + "step": 173690 + }, + { + "epoch": 7.35, + "learning_rate": 6.561936865314083e-05, + "loss": 0.3922, + "step": 173700 + }, + { + "epoch": 7.35, + "learning_rate": 6.55769679238483e-05, + "loss": 0.3983, + "step": 173710 + }, + { + "epoch": 7.35, + "learning_rate": 6.553456719455575e-05, + "loss": 0.339, + "step": 173720 + }, + { + "epoch": 7.35, + "learning_rate": 6.549216646526321e-05, + "loss": 0.3458, + "step": 173730 + }, + { + "epoch": 7.35, + "learning_rate": 6.544976573597066e-05, + "loss": 0.4541, + "step": 173740 + }, + { + "epoch": 7.35, + "learning_rate": 6.540736500667813e-05, + "loss": 0.3575, + "step": 173750 + }, + { + "epoch": 7.35, + "learning_rate": 6.536496427738558e-05, + "loss": 0.3164, + "step": 173760 + }, + { + "epoch": 7.35, + "learning_rate": 6.532256354809303e-05, + "loss": 0.3596, + "step": 173770 + }, + { + "epoch": 7.35, + "learning_rate": 6.528016281880049e-05, + "loss": 0.3461, + "step": 173780 + }, + { + "epoch": 7.35, + "learning_rate": 6.523776208950795e-05, + "loss": 0.4001, + "step": 173790 + }, + { + "epoch": 7.35, + "learning_rate": 6.519536136021541e-05, + "loss": 0.4258, + "step": 173800 + }, + { + "epoch": 7.35, + "learning_rate": 6.515296063092286e-05, + "loss": 0.4026, + "step": 173810 + }, + { + "epoch": 7.35, + "learning_rate": 6.511055990163031e-05, + "loss": 0.4364, + "step": 173820 + }, + { + "epoch": 7.35, + "learning_rate": 6.506815917233777e-05, + "loss": 0.3625, + "step": 173830 + }, + { + "epoch": 7.35, + "learning_rate": 6.502575844304522e-05, + "loss": 0.299, + "step": 173840 + }, + { + "epoch": 7.35, + "learning_rate": 6.498335771375267e-05, + "loss": 0.4057, + "step": 173850 + }, + { + "epoch": 7.35, + "learning_rate": 6.494095698446013e-05, + "loss": 0.3528, + "step": 173860 + }, + { + "epoch": 7.35, + "learning_rate": 6.489855625516759e-05, + "loss": 0.3375, + "step": 173870 + }, + { + "epoch": 7.35, + "learning_rate": 6.485615552587505e-05, + "loss": 0.415, + "step": 173880 + }, + { + "epoch": 7.35, + "learning_rate": 6.48137547965825e-05, + "loss": 0.412, + "step": 173890 + }, + { + "epoch": 7.35, + "learning_rate": 6.477135406728995e-05, + "loss": 0.3714, + "step": 173900 + }, + { + "epoch": 7.35, + "learning_rate": 6.47289533379974e-05, + "loss": 0.3782, + "step": 173910 + }, + { + "epoch": 7.35, + "learning_rate": 6.468655260870487e-05, + "loss": 0.4251, + "step": 173920 + }, + { + "epoch": 7.36, + "learning_rate": 6.464415187941233e-05, + "loss": 0.4202, + "step": 173930 + }, + { + "epoch": 7.36, + "learning_rate": 6.460175115011978e-05, + "loss": 0.2971, + "step": 173940 + }, + { + "epoch": 7.36, + "learning_rate": 6.455935042082723e-05, + "loss": 0.3705, + "step": 173950 + }, + { + "epoch": 7.36, + "learning_rate": 6.45169496915347e-05, + "loss": 0.3384, + "step": 173960 + }, + { + "epoch": 7.36, + "learning_rate": 6.447454896224215e-05, + "loss": 0.3753, + "step": 173970 + }, + { + "epoch": 7.36, + "learning_rate": 6.44321482329496e-05, + "loss": 0.3352, + "step": 173980 + }, + { + "epoch": 7.36, + "learning_rate": 6.438974750365706e-05, + "loss": 0.336, + "step": 173990 + }, + { + "epoch": 7.36, + "learning_rate": 6.434734677436453e-05, + "loss": 0.3811, + "step": 174000 + }, + { + "epoch": 7.36, + "learning_rate": 6.430494604507198e-05, + "loss": 0.3349, + "step": 174010 + }, + { + "epoch": 7.36, + "learning_rate": 6.426254531577943e-05, + "loss": 0.4321, + "step": 174020 + }, + { + "epoch": 7.36, + "learning_rate": 6.422014458648689e-05, + "loss": 0.4625, + "step": 174030 + }, + { + "epoch": 7.36, + "learning_rate": 6.417774385719435e-05, + "loss": 0.383, + "step": 174040 + }, + { + "epoch": 7.36, + "learning_rate": 6.41353431279018e-05, + "loss": 0.3544, + "step": 174050 + }, + { + "epoch": 7.36, + "learning_rate": 6.409294239860926e-05, + "loss": 0.326, + "step": 174060 + }, + { + "epoch": 7.36, + "learning_rate": 6.405054166931671e-05, + "loss": 0.3408, + "step": 174070 + }, + { + "epoch": 7.36, + "learning_rate": 6.400814094002418e-05, + "loss": 0.421, + "step": 174080 + }, + { + "epoch": 7.36, + "learning_rate": 6.396574021073163e-05, + "loss": 0.3765, + "step": 174090 + }, + { + "epoch": 7.36, + "learning_rate": 6.392333948143909e-05, + "loss": 0.428, + "step": 174100 + }, + { + "epoch": 7.36, + "learning_rate": 6.388093875214654e-05, + "loss": 0.4037, + "step": 174110 + }, + { + "epoch": 7.36, + "learning_rate": 6.3838538022854e-05, + "loss": 0.4146, + "step": 174120 + }, + { + "epoch": 7.36, + "learning_rate": 6.379613729356146e-05, + "loss": 0.3864, + "step": 174130 + }, + { + "epoch": 7.36, + "learning_rate": 6.375373656426891e-05, + "loss": 0.409, + "step": 174140 + }, + { + "epoch": 7.36, + "learning_rate": 6.371133583497637e-05, + "loss": 0.4444, + "step": 174150 + }, + { + "epoch": 7.36, + "learning_rate": 6.366893510568382e-05, + "loss": 0.3463, + "step": 174160 + }, + { + "epoch": 7.37, + "learning_rate": 6.362653437639127e-05, + "loss": 0.4287, + "step": 174170 + }, + { + "epoch": 7.37, + "learning_rate": 6.358413364709872e-05, + "loss": 0.4066, + "step": 174180 + }, + { + "epoch": 7.37, + "learning_rate": 6.354173291780619e-05, + "loss": 0.3587, + "step": 174190 + }, + { + "epoch": 7.37, + "learning_rate": 6.349933218851364e-05, + "loss": 0.3499, + "step": 174200 + }, + { + "epoch": 7.37, + "learning_rate": 6.34569314592211e-05, + "loss": 0.3972, + "step": 174210 + }, + { + "epoch": 7.37, + "learning_rate": 6.341453072992855e-05, + "loss": 0.2979, + "step": 174220 + }, + { + "epoch": 7.37, + "learning_rate": 6.3372130000636e-05, + "loss": 0.4268, + "step": 174230 + }, + { + "epoch": 7.37, + "learning_rate": 6.332972927134347e-05, + "loss": 0.4125, + "step": 174240 + }, + { + "epoch": 7.37, + "learning_rate": 6.328732854205092e-05, + "loss": 0.4404, + "step": 174250 + }, + { + "epoch": 7.37, + "learning_rate": 6.324492781275838e-05, + "loss": 0.3742, + "step": 174260 + }, + { + "epoch": 7.37, + "learning_rate": 6.320252708346583e-05, + "loss": 0.3958, + "step": 174270 + }, + { + "epoch": 7.37, + "learning_rate": 6.31601263541733e-05, + "loss": 0.4461, + "step": 174280 + }, + { + "epoch": 7.37, + "learning_rate": 6.311772562488075e-05, + "loss": 0.3608, + "step": 174290 + }, + { + "epoch": 7.37, + "learning_rate": 6.30753248955882e-05, + "loss": 0.4369, + "step": 174300 + }, + { + "epoch": 7.37, + "learning_rate": 6.303292416629566e-05, + "loss": 0.3674, + "step": 174310 + }, + { + "epoch": 7.37, + "learning_rate": 6.299052343700312e-05, + "loss": 0.3832, + "step": 174320 + }, + { + "epoch": 7.37, + "learning_rate": 6.294812270771058e-05, + "loss": 0.3463, + "step": 174330 + }, + { + "epoch": 7.37, + "learning_rate": 6.290572197841803e-05, + "loss": 0.3489, + "step": 174340 + }, + { + "epoch": 7.37, + "learning_rate": 6.286332124912548e-05, + "loss": 0.3796, + "step": 174350 + }, + { + "epoch": 7.37, + "learning_rate": 6.282092051983295e-05, + "loss": 0.3685, + "step": 174360 + }, + { + "epoch": 7.37, + "learning_rate": 6.27785197905404e-05, + "loss": 0.3854, + "step": 174370 + }, + { + "epoch": 7.37, + "learning_rate": 6.273611906124786e-05, + "loss": 0.4135, + "step": 174380 + }, + { + "epoch": 7.37, + "learning_rate": 6.269371833195531e-05, + "loss": 0.3676, + "step": 174390 + }, + { + "epoch": 7.38, + "learning_rate": 6.265131760266278e-05, + "loss": 0.3841, + "step": 174400 + }, + { + "epoch": 7.38, + "learning_rate": 6.260891687337023e-05, + "loss": 0.3186, + "step": 174410 + }, + { + "epoch": 7.38, + "learning_rate": 6.256651614407768e-05, + "loss": 0.3675, + "step": 174420 + }, + { + "epoch": 7.38, + "learning_rate": 6.252411541478514e-05, + "loss": 0.3773, + "step": 174430 + }, + { + "epoch": 7.38, + "learning_rate": 6.24817146854926e-05, + "loss": 0.4095, + "step": 174440 + }, + { + "epoch": 7.38, + "learning_rate": 6.243931395620006e-05, + "loss": 0.3566, + "step": 174450 + }, + { + "epoch": 7.38, + "learning_rate": 6.239691322690751e-05, + "loss": 0.4223, + "step": 174460 + }, + { + "epoch": 7.38, + "learning_rate": 6.235451249761496e-05, + "loss": 0.3743, + "step": 174470 + }, + { + "epoch": 7.38, + "learning_rate": 6.231211176832242e-05, + "loss": 0.344, + "step": 174480 + }, + { + "epoch": 7.38, + "learning_rate": 6.226971103902987e-05, + "loss": 0.3455, + "step": 174490 + }, + { + "epoch": 7.38, + "learning_rate": 6.222731030973734e-05, + "loss": 0.3565, + "step": 174500 + }, + { + "epoch": 7.38, + "learning_rate": 6.218490958044479e-05, + "loss": 0.3965, + "step": 174510 + }, + { + "epoch": 7.38, + "learning_rate": 6.214250885115224e-05, + "loss": 0.3789, + "step": 174520 + }, + { + "epoch": 7.38, + "learning_rate": 6.21001081218597e-05, + "loss": 0.4274, + "step": 174530 + }, + { + "epoch": 7.38, + "learning_rate": 6.205770739256715e-05, + "loss": 0.3832, + "step": 174540 + }, + { + "epoch": 7.38, + "learning_rate": 6.20153066632746e-05, + "loss": 0.3845, + "step": 174550 + }, + { + "epoch": 7.38, + "learning_rate": 6.197290593398207e-05, + "loss": 0.3934, + "step": 174560 + }, + { + "epoch": 7.38, + "learning_rate": 6.193050520468952e-05, + "loss": 0.3591, + "step": 174570 + }, + { + "epoch": 7.38, + "learning_rate": 6.188810447539698e-05, + "loss": 0.4131, + "step": 174580 + }, + { + "epoch": 7.38, + "learning_rate": 6.184570374610443e-05, + "loss": 0.3047, + "step": 174590 + }, + { + "epoch": 7.38, + "learning_rate": 6.18033030168119e-05, + "loss": 0.381, + "step": 174600 + }, + { + "epoch": 7.38, + "learning_rate": 6.176090228751935e-05, + "loss": 0.338, + "step": 174610 + }, + { + "epoch": 7.38, + "learning_rate": 6.17185015582268e-05, + "loss": 0.3644, + "step": 174620 + }, + { + "epoch": 7.38, + "learning_rate": 6.167610082893426e-05, + "loss": 0.3396, + "step": 174630 + }, + { + "epoch": 7.39, + "learning_rate": 6.163370009964171e-05, + "loss": 0.3568, + "step": 174640 + }, + { + "epoch": 7.39, + "learning_rate": 6.159129937034918e-05, + "loss": 0.4121, + "step": 174650 + }, + { + "epoch": 7.39, + "learning_rate": 6.154889864105663e-05, + "loss": 0.3842, + "step": 174660 + }, + { + "epoch": 7.39, + "learning_rate": 6.150649791176408e-05, + "loss": 0.3603, + "step": 174670 + }, + { + "epoch": 7.39, + "learning_rate": 6.146409718247154e-05, + "loss": 0.2844, + "step": 174680 + }, + { + "epoch": 7.39, + "learning_rate": 6.1421696453179e-05, + "loss": 0.4306, + "step": 174690 + }, + { + "epoch": 7.39, + "learning_rate": 6.137929572388646e-05, + "loss": 0.3651, + "step": 174700 + }, + { + "epoch": 7.39, + "learning_rate": 6.133689499459391e-05, + "loss": 0.3826, + "step": 174710 + }, + { + "epoch": 7.39, + "learning_rate": 6.129449426530136e-05, + "loss": 0.3481, + "step": 174720 + }, + { + "epoch": 7.39, + "learning_rate": 6.125209353600883e-05, + "loss": 0.3317, + "step": 174730 + }, + { + "epoch": 7.39, + "learning_rate": 6.120969280671628e-05, + "loss": 0.3507, + "step": 174740 + }, + { + "epoch": 7.39, + "learning_rate": 6.116729207742374e-05, + "loss": 0.4062, + "step": 174750 + }, + { + "epoch": 7.39, + "learning_rate": 6.112489134813119e-05, + "loss": 0.313, + "step": 174760 + }, + { + "epoch": 7.39, + "learning_rate": 6.108249061883866e-05, + "loss": 0.3911, + "step": 174770 + }, + { + "epoch": 7.39, + "learning_rate": 6.104008988954611e-05, + "loss": 0.3823, + "step": 174780 + }, + { + "epoch": 7.39, + "learning_rate": 6.0997689160253555e-05, + "loss": 0.348, + "step": 174790 + }, + { + "epoch": 7.39, + "learning_rate": 6.095528843096101e-05, + "loss": 0.3332, + "step": 174800 + }, + { + "epoch": 7.39, + "learning_rate": 6.0912887701668475e-05, + "loss": 0.3292, + "step": 174810 + }, + { + "epoch": 7.39, + "learning_rate": 6.087048697237593e-05, + "loss": 0.3904, + "step": 174820 + }, + { + "epoch": 7.39, + "learning_rate": 6.082808624308338e-05, + "loss": 0.4173, + "step": 174830 + }, + { + "epoch": 7.39, + "learning_rate": 6.0785685513790835e-05, + "loss": 0.3525, + "step": 174840 + }, + { + "epoch": 7.39, + "learning_rate": 6.07432847844983e-05, + "loss": 0.3782, + "step": 174850 + }, + { + "epoch": 7.39, + "learning_rate": 6.0700884055205755e-05, + "loss": 0.3038, + "step": 174860 + }, + { + "epoch": 7.4, + "learning_rate": 6.065848332591321e-05, + "loss": 0.3905, + "step": 174870 + }, + { + "epoch": 7.4, + "learning_rate": 6.061608259662066e-05, + "loss": 0.349, + "step": 174880 + }, + { + "epoch": 7.4, + "learning_rate": 6.057368186732812e-05, + "loss": 0.3189, + "step": 174890 + }, + { + "epoch": 7.4, + "learning_rate": 6.053128113803558e-05, + "loss": 0.3492, + "step": 174900 + }, + { + "epoch": 7.4, + "learning_rate": 6.0488880408743035e-05, + "loss": 0.4132, + "step": 174910 + }, + { + "epoch": 7.4, + "learning_rate": 6.044647967945049e-05, + "loss": 0.3743, + "step": 174920 + }, + { + "epoch": 7.4, + "learning_rate": 6.040407895015795e-05, + "loss": 0.3453, + "step": 174930 + }, + { + "epoch": 7.4, + "learning_rate": 6.03616782208654e-05, + "loss": 0.3586, + "step": 174940 + }, + { + "epoch": 7.4, + "learning_rate": 6.0319277491572854e-05, + "loss": 0.3046, + "step": 174950 + }, + { + "epoch": 7.4, + "learning_rate": 6.027687676228031e-05, + "loss": 0.3693, + "step": 174960 + }, + { + "epoch": 7.4, + "learning_rate": 6.0234476032987774e-05, + "loss": 0.4343, + "step": 174970 + }, + { + "epoch": 7.4, + "learning_rate": 6.019207530369523e-05, + "loss": 0.3297, + "step": 174980 + }, + { + "epoch": 7.4, + "learning_rate": 6.014967457440268e-05, + "loss": 0.3643, + "step": 174990 + }, + { + "epoch": 7.4, + "learning_rate": 6.0107273845110134e-05, + "loss": 0.3618, + "step": 175000 + }, + { + "epoch": 7.4, + "learning_rate": 6.00648731158176e-05, + "loss": 0.3853, + "step": 175010 + }, + { + "epoch": 7.4, + "learning_rate": 6.0022472386525054e-05, + "loss": 0.3925, + "step": 175020 + }, + { + "epoch": 7.4, + "learning_rate": 5.998007165723251e-05, + "loss": 0.3966, + "step": 175030 + }, + { + "epoch": 7.4, + "learning_rate": 5.993767092793996e-05, + "loss": 0.4144, + "step": 175040 + }, + { + "epoch": 7.4, + "learning_rate": 5.989527019864743e-05, + "loss": 0.4148, + "step": 175050 + }, + { + "epoch": 7.4, + "learning_rate": 5.985286946935488e-05, + "loss": 0.3321, + "step": 175060 + }, + { + "epoch": 7.4, + "learning_rate": 5.9810468740062334e-05, + "loss": 0.4209, + "step": 175070 + }, + { + "epoch": 7.4, + "learning_rate": 5.976806801076979e-05, + "loss": 0.3828, + "step": 175080 + }, + { + "epoch": 7.4, + "learning_rate": 5.972566728147725e-05, + "loss": 0.415, + "step": 175090 + }, + { + "epoch": 7.4, + "learning_rate": 5.96832665521847e-05, + "loss": 0.3596, + "step": 175100 + }, + { + "epoch": 7.41, + "learning_rate": 5.9640865822892154e-05, + "loss": 0.332, + "step": 175110 + }, + { + "epoch": 7.41, + "learning_rate": 5.959846509359961e-05, + "loss": 0.3536, + "step": 175120 + }, + { + "epoch": 7.41, + "learning_rate": 5.9556064364307074e-05, + "loss": 0.3176, + "step": 175130 + }, + { + "epoch": 7.41, + "learning_rate": 5.951366363501453e-05, + "loss": 0.4183, + "step": 175140 + }, + { + "epoch": 7.41, + "learning_rate": 5.947126290572198e-05, + "loss": 0.3716, + "step": 175150 + }, + { + "epoch": 7.41, + "learning_rate": 5.942886217642943e-05, + "loss": 0.317, + "step": 175160 + }, + { + "epoch": 7.41, + "learning_rate": 5.93864614471369e-05, + "loss": 0.3348, + "step": 175170 + }, + { + "epoch": 7.41, + "learning_rate": 5.934406071784435e-05, + "loss": 0.4548, + "step": 175180 + }, + { + "epoch": 7.41, + "learning_rate": 5.9301659988551807e-05, + "loss": 0.3574, + "step": 175190 + }, + { + "epoch": 7.41, + "learning_rate": 5.925925925925926e-05, + "loss": 0.3478, + "step": 175200 + }, + { + "epoch": 7.41, + "learning_rate": 5.921685852996673e-05, + "loss": 0.3579, + "step": 175210 + }, + { + "epoch": 7.41, + "learning_rate": 5.917445780067418e-05, + "loss": 0.421, + "step": 175220 + }, + { + "epoch": 7.41, + "learning_rate": 5.913205707138163e-05, + "loss": 0.34, + "step": 175230 + }, + { + "epoch": 7.41, + "learning_rate": 5.9089656342089086e-05, + "loss": 0.3464, + "step": 175240 + }, + { + "epoch": 7.41, + "learning_rate": 5.9047255612796546e-05, + "loss": 0.3571, + "step": 175250 + }, + { + "epoch": 7.41, + "learning_rate": 5.9004854883504e-05, + "loss": 0.3104, + "step": 175260 + }, + { + "epoch": 7.41, + "learning_rate": 5.896245415421145e-05, + "loss": 0.3454, + "step": 175270 + }, + { + "epoch": 7.41, + "learning_rate": 5.8920053424918906e-05, + "loss": 0.3764, + "step": 175280 + }, + { + "epoch": 7.41, + "learning_rate": 5.887765269562637e-05, + "loss": 0.352, + "step": 175290 + }, + { + "epoch": 7.41, + "learning_rate": 5.8835251966333826e-05, + "loss": 0.384, + "step": 175300 + }, + { + "epoch": 7.41, + "learning_rate": 5.879285123704128e-05, + "loss": 0.3834, + "step": 175310 + }, + { + "epoch": 7.41, + "learning_rate": 5.875045050774873e-05, + "loss": 0.4073, + "step": 175320 + }, + { + "epoch": 7.41, + "learning_rate": 5.87080497784562e-05, + "loss": 0.3307, + "step": 175330 + }, + { + "epoch": 7.41, + "learning_rate": 5.866564904916365e-05, + "loss": 0.4457, + "step": 175340 + }, + { + "epoch": 7.42, + "learning_rate": 5.8623248319871106e-05, + "loss": 0.3902, + "step": 175350 + }, + { + "epoch": 7.42, + "learning_rate": 5.858084759057856e-05, + "loss": 0.3326, + "step": 175360 + }, + { + "epoch": 7.42, + "learning_rate": 5.853844686128601e-05, + "loss": 0.3518, + "step": 175370 + }, + { + "epoch": 7.42, + "learning_rate": 5.849604613199348e-05, + "loss": 0.4413, + "step": 175380 + }, + { + "epoch": 7.42, + "learning_rate": 5.845364540270093e-05, + "loss": 0.4172, + "step": 175390 + }, + { + "epoch": 7.42, + "learning_rate": 5.8411244673408386e-05, + "loss": 0.3694, + "step": 175400 + }, + { + "epoch": 7.42, + "learning_rate": 5.836884394411584e-05, + "loss": 0.3862, + "step": 175410 + }, + { + "epoch": 7.42, + "learning_rate": 5.83264432148233e-05, + "loss": 0.4081, + "step": 175420 + }, + { + "epoch": 7.42, + "learning_rate": 5.828404248553075e-05, + "loss": 0.3461, + "step": 175430 + }, + { + "epoch": 7.42, + "learning_rate": 5.8241641756238205e-05, + "loss": 0.3518, + "step": 175440 + }, + { + "epoch": 7.42, + "learning_rate": 5.819924102694566e-05, + "loss": 0.3562, + "step": 175450 + }, + { + "epoch": 7.42, + "learning_rate": 5.8156840297653125e-05, + "loss": 0.3649, + "step": 175460 + }, + { + "epoch": 7.42, + "learning_rate": 5.811443956836058e-05, + "loss": 0.2926, + "step": 175470 + }, + { + "epoch": 7.42, + "learning_rate": 5.807203883906803e-05, + "loss": 0.3788, + "step": 175480 + }, + { + "epoch": 7.42, + "learning_rate": 5.8029638109775485e-05, + "loss": 0.3695, + "step": 175490 + }, + { + "epoch": 7.42, + "learning_rate": 5.798723738048295e-05, + "loss": 0.3847, + "step": 175500 + }, + { + "epoch": 7.42, + "learning_rate": 5.7944836651190405e-05, + "loss": 0.3416, + "step": 175510 + }, + { + "epoch": 7.42, + "learning_rate": 5.790243592189786e-05, + "loss": 0.3611, + "step": 175520 + }, + { + "epoch": 7.42, + "learning_rate": 5.786003519260531e-05, + "loss": 0.3856, + "step": 175530 + }, + { + "epoch": 7.42, + "learning_rate": 5.781763446331278e-05, + "loss": 0.3495, + "step": 175540 + }, + { + "epoch": 7.42, + "learning_rate": 5.777523373402023e-05, + "loss": 0.3242, + "step": 175550 + }, + { + "epoch": 7.42, + "learning_rate": 5.7732833004727685e-05, + "loss": 0.4048, + "step": 175560 + }, + { + "epoch": 7.42, + "learning_rate": 5.769043227543514e-05, + "loss": 0.4478, + "step": 175570 + }, + { + "epoch": 7.43, + "learning_rate": 5.76480315461426e-05, + "loss": 0.3036, + "step": 175580 + }, + { + "epoch": 7.43, + "learning_rate": 5.760563081685005e-05, + "loss": 0.3859, + "step": 175590 + }, + { + "epoch": 7.43, + "learning_rate": 5.7563230087557504e-05, + "loss": 0.3352, + "step": 175600 + }, + { + "epoch": 7.43, + "learning_rate": 5.752082935826496e-05, + "loss": 0.2999, + "step": 175610 + }, + { + "epoch": 7.43, + "learning_rate": 5.7478428628972425e-05, + "loss": 0.2795, + "step": 175620 + }, + { + "epoch": 7.43, + "learning_rate": 5.743602789967988e-05, + "loss": 0.3459, + "step": 175630 + }, + { + "epoch": 7.43, + "learning_rate": 5.739362717038733e-05, + "loss": 0.3902, + "step": 175640 + }, + { + "epoch": 7.43, + "learning_rate": 5.7351226441094784e-05, + "loss": 0.3841, + "step": 175650 + }, + { + "epoch": 7.43, + "learning_rate": 5.730882571180225e-05, + "loss": 0.3245, + "step": 175660 + }, + { + "epoch": 7.43, + "learning_rate": 5.7266424982509704e-05, + "loss": 0.326, + "step": 175670 + }, + { + "epoch": 7.43, + "learning_rate": 5.722402425321716e-05, + "loss": 0.3596, + "step": 175680 + }, + { + "epoch": 7.43, + "learning_rate": 5.718162352392461e-05, + "loss": 0.3246, + "step": 175690 + }, + { + "epoch": 7.43, + "learning_rate": 5.713922279463208e-05, + "loss": 0.3547, + "step": 175700 + }, + { + "epoch": 7.43, + "learning_rate": 5.709682206533953e-05, + "loss": 0.3725, + "step": 175710 + }, + { + "epoch": 7.43, + "learning_rate": 5.7054421336046984e-05, + "loss": 0.4036, + "step": 175720 + }, + { + "epoch": 7.43, + "learning_rate": 5.701202060675444e-05, + "loss": 0.4171, + "step": 175730 + }, + { + "epoch": 7.43, + "learning_rate": 5.69696198774619e-05, + "loss": 0.4039, + "step": 175740 + }, + { + "epoch": 7.43, + "learning_rate": 5.692721914816935e-05, + "loss": 0.3858, + "step": 175750 + }, + { + "epoch": 7.43, + "learning_rate": 5.6884818418876804e-05, + "loss": 0.4296, + "step": 175760 + }, + { + "epoch": 7.43, + "learning_rate": 5.684241768958426e-05, + "loss": 0.3723, + "step": 175770 + }, + { + "epoch": 7.43, + "learning_rate": 5.6800016960291724e-05, + "loss": 0.3846, + "step": 175780 + }, + { + "epoch": 7.43, + "learning_rate": 5.675761623099918e-05, + "loss": 0.377, + "step": 175790 + }, + { + "epoch": 7.43, + "learning_rate": 5.671521550170663e-05, + "loss": 0.4501, + "step": 175800 + }, + { + "epoch": 7.43, + "learning_rate": 5.6672814772414083e-05, + "loss": 0.4178, + "step": 175810 + }, + { + "epoch": 7.44, + "learning_rate": 5.663041404312155e-05, + "loss": 0.3466, + "step": 175820 + }, + { + "epoch": 7.44, + "learning_rate": 5.6588013313829003e-05, + "loss": 0.3654, + "step": 175830 + }, + { + "epoch": 7.44, + "learning_rate": 5.654561258453646e-05, + "loss": 0.3272, + "step": 175840 + }, + { + "epoch": 7.44, + "learning_rate": 5.650321185524391e-05, + "loss": 0.3593, + "step": 175850 + }, + { + "epoch": 7.44, + "learning_rate": 5.646081112595138e-05, + "loss": 0.3666, + "step": 175860 + }, + { + "epoch": 7.44, + "learning_rate": 5.641841039665883e-05, + "loss": 0.386, + "step": 175870 + }, + { + "epoch": 7.44, + "learning_rate": 5.637600966736628e-05, + "loss": 0.4376, + "step": 175880 + }, + { + "epoch": 7.44, + "learning_rate": 5.6333608938073736e-05, + "loss": 0.3595, + "step": 175890 + }, + { + "epoch": 7.44, + "learning_rate": 5.6291208208781196e-05, + "loss": 0.3506, + "step": 175900 + }, + { + "epoch": 7.44, + "learning_rate": 5.624880747948865e-05, + "loss": 0.3578, + "step": 175910 + }, + { + "epoch": 7.44, + "learning_rate": 5.62064067501961e-05, + "loss": 0.4632, + "step": 175920 + }, + { + "epoch": 7.44, + "learning_rate": 5.6164006020903556e-05, + "loss": 0.3453, + "step": 175930 + }, + { + "epoch": 7.44, + "learning_rate": 5.612160529161102e-05, + "loss": 0.3645, + "step": 175940 + }, + { + "epoch": 7.44, + "learning_rate": 5.6079204562318476e-05, + "loss": 0.3817, + "step": 175950 + }, + { + "epoch": 7.44, + "learning_rate": 5.603680383302593e-05, + "loss": 0.3971, + "step": 175960 + }, + { + "epoch": 7.44, + "learning_rate": 5.599440310373338e-05, + "loss": 0.3397, + "step": 175970 + }, + { + "epoch": 7.44, + "learning_rate": 5.595200237444085e-05, + "loss": 0.3526, + "step": 175980 + }, + { + "epoch": 7.44, + "learning_rate": 5.59096016451483e-05, + "loss": 0.3404, + "step": 175990 + }, + { + "epoch": 7.44, + "learning_rate": 5.5867200915855756e-05, + "loss": 0.4214, + "step": 176000 + }, + { + "epoch": 7.44, + "learning_rate": 5.582480018656321e-05, + "loss": 0.3355, + "step": 176010 + }, + { + "epoch": 7.44, + "learning_rate": 5.5782399457270676e-05, + "loss": 0.4348, + "step": 176020 + }, + { + "epoch": 7.44, + "learning_rate": 5.573999872797813e-05, + "loss": 0.4481, + "step": 176030 + }, + { + "epoch": 7.44, + "learning_rate": 5.569759799868558e-05, + "loss": 0.3806, + "step": 176040 + }, + { + "epoch": 7.44, + "learning_rate": 5.5655197269393036e-05, + "loss": 0.3674, + "step": 176050 + }, + { + "epoch": 7.45, + "learning_rate": 5.561279654010049e-05, + "loss": 0.3548, + "step": 176060 + }, + { + "epoch": 7.45, + "learning_rate": 5.557039581080795e-05, + "loss": 0.3405, + "step": 176070 + }, + { + "epoch": 7.45, + "learning_rate": 5.55279950815154e-05, + "loss": 0.4247, + "step": 176080 + }, + { + "epoch": 7.45, + "learning_rate": 5.5485594352222855e-05, + "loss": 0.4044, + "step": 176090 + }, + { + "epoch": 7.45, + "learning_rate": 5.544319362293031e-05, + "loss": 0.3834, + "step": 176100 + }, + { + "epoch": 7.45, + "learning_rate": 5.5400792893637775e-05, + "loss": 0.4171, + "step": 176110 + }, + { + "epoch": 7.45, + "learning_rate": 5.535839216434523e-05, + "loss": 0.4269, + "step": 176120 + }, + { + "epoch": 7.45, + "learning_rate": 5.531599143505268e-05, + "loss": 0.3761, + "step": 176130 + }, + { + "epoch": 7.45, + "learning_rate": 5.5273590705760135e-05, + "loss": 0.319, + "step": 176140 + }, + { + "epoch": 7.45, + "learning_rate": 5.52311899764676e-05, + "loss": 0.3601, + "step": 176150 + }, + { + "epoch": 7.45, + "learning_rate": 5.5188789247175055e-05, + "loss": 0.3993, + "step": 176160 + }, + { + "epoch": 7.45, + "learning_rate": 5.514638851788251e-05, + "loss": 0.3788, + "step": 176170 + }, + { + "epoch": 7.45, + "learning_rate": 5.510398778858996e-05, + "loss": 0.377, + "step": 176180 + }, + { + "epoch": 7.45, + "learning_rate": 5.506158705929743e-05, + "loss": 0.3736, + "step": 176190 + }, + { + "epoch": 7.45, + "learning_rate": 5.501918633000488e-05, + "loss": 0.3471, + "step": 176200 + }, + { + "epoch": 7.45, + "learning_rate": 5.4976785600712335e-05, + "loss": 0.3423, + "step": 176210 + }, + { + "epoch": 7.45, + "learning_rate": 5.493438487141979e-05, + "loss": 0.3802, + "step": 176220 + }, + { + "epoch": 7.45, + "learning_rate": 5.489198414212725e-05, + "loss": 0.3765, + "step": 176230 + }, + { + "epoch": 7.45, + "learning_rate": 5.48495834128347e-05, + "loss": 0.3609, + "step": 176240 + }, + { + "epoch": 7.45, + "learning_rate": 5.4807182683542155e-05, + "loss": 0.3526, + "step": 176250 + }, + { + "epoch": 7.45, + "learning_rate": 5.476478195424961e-05, + "loss": 0.3622, + "step": 176260 + }, + { + "epoch": 7.45, + "learning_rate": 5.4722381224957075e-05, + "loss": 0.371, + "step": 176270 + }, + { + "epoch": 7.45, + "learning_rate": 5.467998049566453e-05, + "loss": 0.3725, + "step": 176280 + }, + { + "epoch": 7.46, + "learning_rate": 5.463757976637198e-05, + "loss": 0.3929, + "step": 176290 + }, + { + "epoch": 7.46, + "learning_rate": 5.4595179037079434e-05, + "loss": 0.3034, + "step": 176300 + }, + { + "epoch": 7.46, + "learning_rate": 5.45527783077869e-05, + "loss": 0.3299, + "step": 176310 + }, + { + "epoch": 7.46, + "learning_rate": 5.4510377578494354e-05, + "loss": 0.3503, + "step": 176320 + }, + { + "epoch": 7.46, + "learning_rate": 5.446797684920181e-05, + "loss": 0.4074, + "step": 176330 + }, + { + "epoch": 7.46, + "learning_rate": 5.442557611990926e-05, + "loss": 0.3763, + "step": 176340 + }, + { + "epoch": 7.46, + "learning_rate": 5.438317539061673e-05, + "loss": 0.3686, + "step": 176350 + }, + { + "epoch": 7.46, + "learning_rate": 5.434077466132418e-05, + "loss": 0.3052, + "step": 176360 + }, + { + "epoch": 7.46, + "learning_rate": 5.4298373932031634e-05, + "loss": 0.3402, + "step": 176370 + }, + { + "epoch": 7.46, + "learning_rate": 5.425597320273909e-05, + "loss": 0.3552, + "step": 176380 + }, + { + "epoch": 7.46, + "learning_rate": 5.421357247344655e-05, + "loss": 0.3635, + "step": 176390 + }, + { + "epoch": 7.46, + "learning_rate": 5.4171171744154e-05, + "loss": 0.3628, + "step": 176400 + }, + { + "epoch": 7.46, + "learning_rate": 5.4128771014861454e-05, + "loss": 0.411, + "step": 176410 + }, + { + "epoch": 7.46, + "learning_rate": 5.408637028556891e-05, + "loss": 0.3293, + "step": 176420 + }, + { + "epoch": 7.46, + "learning_rate": 5.4043969556276374e-05, + "loss": 0.3364, + "step": 176430 + }, + { + "epoch": 7.46, + "learning_rate": 5.400156882698383e-05, + "loss": 0.4364, + "step": 176440 + }, + { + "epoch": 7.46, + "learning_rate": 5.395916809769128e-05, + "loss": 0.4053, + "step": 176450 + }, + { + "epoch": 7.46, + "learning_rate": 5.3916767368398734e-05, + "loss": 0.3674, + "step": 176460 + }, + { + "epoch": 7.46, + "learning_rate": 5.38743666391062e-05, + "loss": 0.3821, + "step": 176470 + }, + { + "epoch": 7.46, + "learning_rate": 5.3831965909813654e-05, + "loss": 0.3585, + "step": 176480 + }, + { + "epoch": 7.46, + "learning_rate": 5.378956518052111e-05, + "loss": 0.4259, + "step": 176490 + }, + { + "epoch": 7.46, + "learning_rate": 5.374716445122856e-05, + "loss": 0.3391, + "step": 176500 + }, + { + "epoch": 7.46, + "learning_rate": 5.370476372193603e-05, + "loss": 0.3735, + "step": 176510 + }, + { + "epoch": 7.46, + "learning_rate": 5.366236299264348e-05, + "loss": 0.361, + "step": 176520 + }, + { + "epoch": 7.47, + "learning_rate": 5.361996226335093e-05, + "loss": 0.3635, + "step": 176530 + }, + { + "epoch": 7.47, + "learning_rate": 5.3577561534058387e-05, + "loss": 0.3883, + "step": 176540 + }, + { + "epoch": 7.47, + "learning_rate": 5.3535160804765847e-05, + "loss": 0.3756, + "step": 176550 + }, + { + "epoch": 7.47, + "learning_rate": 5.34927600754733e-05, + "loss": 0.3928, + "step": 176560 + }, + { + "epoch": 7.47, + "learning_rate": 5.345035934618075e-05, + "loss": 0.4315, + "step": 176570 + }, + { + "epoch": 7.47, + "learning_rate": 5.3407958616888206e-05, + "loss": 0.3122, + "step": 176580 + }, + { + "epoch": 7.47, + "learning_rate": 5.336555788759567e-05, + "loss": 0.3662, + "step": 176590 + }, + { + "epoch": 7.47, + "learning_rate": 5.3323157158303126e-05, + "loss": 0.3335, + "step": 176600 + }, + { + "epoch": 7.47, + "learning_rate": 5.328075642901058e-05, + "loss": 0.3843, + "step": 176610 + }, + { + "epoch": 7.47, + "learning_rate": 5.323835569971803e-05, + "loss": 0.3528, + "step": 176620 + }, + { + "epoch": 7.47, + "learning_rate": 5.31959549704255e-05, + "loss": 0.3934, + "step": 176630 + }, + { + "epoch": 7.47, + "learning_rate": 5.315355424113295e-05, + "loss": 0.3595, + "step": 176640 + }, + { + "epoch": 7.47, + "learning_rate": 5.3111153511840406e-05, + "loss": 0.3613, + "step": 176650 + }, + { + "epoch": 7.47, + "learning_rate": 5.306875278254786e-05, + "loss": 0.4442, + "step": 176660 + }, + { + "epoch": 7.47, + "learning_rate": 5.3026352053255326e-05, + "loss": 0.3459, + "step": 176670 + }, + { + "epoch": 7.47, + "learning_rate": 5.298395132396278e-05, + "loss": 0.3446, + "step": 176680 + }, + { + "epoch": 7.47, + "learning_rate": 5.294155059467023e-05, + "loss": 0.452, + "step": 176690 + }, + { + "epoch": 7.47, + "learning_rate": 5.2899149865377686e-05, + "loss": 0.3363, + "step": 176700 + }, + { + "epoch": 7.47, + "learning_rate": 5.2856749136085146e-05, + "loss": 0.3461, + "step": 176710 + }, + { + "epoch": 7.47, + "learning_rate": 5.28143484067926e-05, + "loss": 0.3985, + "step": 176720 + }, + { + "epoch": 7.47, + "learning_rate": 5.277194767750005e-05, + "loss": 0.3847, + "step": 176730 + }, + { + "epoch": 7.47, + "learning_rate": 5.2729546948207505e-05, + "loss": 0.4289, + "step": 176740 + }, + { + "epoch": 7.47, + "learning_rate": 5.268714621891497e-05, + "loss": 0.3596, + "step": 176750 + }, + { + "epoch": 7.47, + "learning_rate": 5.2644745489622426e-05, + "loss": 0.3391, + "step": 176760 + }, + { + "epoch": 7.48, + "learning_rate": 5.260234476032988e-05, + "loss": 0.4041, + "step": 176770 + }, + { + "epoch": 7.48, + "learning_rate": 5.255994403103733e-05, + "loss": 0.3534, + "step": 176780 + }, + { + "epoch": 7.48, + "learning_rate": 5.2517543301744785e-05, + "loss": 0.3483, + "step": 176790 + }, + { + "epoch": 7.48, + "learning_rate": 5.247514257245225e-05, + "loss": 0.3358, + "step": 176800 + }, + { + "epoch": 7.48, + "learning_rate": 5.2432741843159705e-05, + "loss": 0.3463, + "step": 176810 + }, + { + "epoch": 7.48, + "learning_rate": 5.239034111386716e-05, + "loss": 0.3787, + "step": 176820 + }, + { + "epoch": 7.48, + "learning_rate": 5.234794038457461e-05, + "loss": 0.3867, + "step": 176830 + }, + { + "epoch": 7.48, + "learning_rate": 5.230553965528208e-05, + "loss": 0.385, + "step": 176840 + }, + { + "epoch": 7.48, + "learning_rate": 5.226313892598953e-05, + "loss": 0.3834, + "step": 176850 + }, + { + "epoch": 7.48, + "learning_rate": 5.2220738196696985e-05, + "loss": 0.3864, + "step": 176860 + }, + { + "epoch": 7.48, + "learning_rate": 5.217833746740444e-05, + "loss": 0.4061, + "step": 176870 + }, + { + "epoch": 7.48, + "learning_rate": 5.21359367381119e-05, + "loss": 0.347, + "step": 176880 + }, + { + "epoch": 7.48, + "learning_rate": 5.209353600881935e-05, + "loss": 0.3571, + "step": 176890 + }, + { + "epoch": 7.48, + "learning_rate": 5.205113527952681e-05, + "loss": 0.4263, + "step": 176900 + }, + { + "epoch": 7.48, + "learning_rate": 5.2008734550234265e-05, + "loss": 0.4201, + "step": 176910 + }, + { + "epoch": 7.48, + "learning_rate": 5.1966333820941725e-05, + "loss": 0.3858, + "step": 176920 + }, + { + "epoch": 7.48, + "learning_rate": 5.192393309164918e-05, + "loss": 0.3914, + "step": 176930 + }, + { + "epoch": 7.48, + "learning_rate": 5.188153236235663e-05, + "loss": 0.3384, + "step": 176940 + }, + { + "epoch": 7.48, + "learning_rate": 5.1839131633064084e-05, + "loss": 0.3862, + "step": 176950 + }, + { + "epoch": 7.48, + "learning_rate": 5.179673090377155e-05, + "loss": 0.3574, + "step": 176960 + }, + { + "epoch": 7.48, + "learning_rate": 5.1754330174479004e-05, + "loss": 0.3561, + "step": 176970 + }, + { + "epoch": 7.48, + "learning_rate": 5.171192944518646e-05, + "loss": 0.3892, + "step": 176980 + }, + { + "epoch": 7.48, + "learning_rate": 5.166952871589391e-05, + "loss": 0.3934, + "step": 176990 + }, + { + "epoch": 7.49, + "learning_rate": 5.162712798660138e-05, + "loss": 0.4013, + "step": 177000 + }, + { + "epoch": 7.49, + "learning_rate": 5.158472725730883e-05, + "loss": 0.3351, + "step": 177010 + }, + { + "epoch": 7.49, + "learning_rate": 5.1542326528016284e-05, + "loss": 0.3116, + "step": 177020 + }, + { + "epoch": 7.49, + "learning_rate": 5.149992579872374e-05, + "loss": 0.3549, + "step": 177030 + }, + { + "epoch": 7.49, + "learning_rate": 5.14575250694312e-05, + "loss": 0.3996, + "step": 177040 + }, + { + "epoch": 7.49, + "learning_rate": 5.141512434013865e-05, + "loss": 0.3646, + "step": 177050 + }, + { + "epoch": 7.49, + "learning_rate": 5.137272361084611e-05, + "loss": 0.3568, + "step": 177060 + }, + { + "epoch": 7.49, + "learning_rate": 5.1330322881553564e-05, + "loss": 0.3754, + "step": 177070 + }, + { + "epoch": 7.49, + "learning_rate": 5.1287922152261024e-05, + "loss": 0.3218, + "step": 177080 + }, + { + "epoch": 7.49, + "learning_rate": 5.124552142296848e-05, + "loss": 0.3492, + "step": 177090 + }, + { + "epoch": 7.49, + "learning_rate": 5.120312069367593e-05, + "loss": 0.4075, + "step": 177100 + }, + { + "epoch": 7.49, + "learning_rate": 5.1160719964383384e-05, + "loss": 0.3514, + "step": 177110 + }, + { + "epoch": 7.49, + "learning_rate": 5.111831923509085e-05, + "loss": 0.3478, + "step": 177120 + }, + { + "epoch": 7.49, + "learning_rate": 5.1075918505798304e-05, + "loss": 0.3832, + "step": 177130 + }, + { + "epoch": 7.49, + "learning_rate": 5.103351777650576e-05, + "loss": 0.4611, + "step": 177140 + }, + { + "epoch": 7.49, + "learning_rate": 5.099111704721321e-05, + "loss": 0.3638, + "step": 177150 + }, + { + "epoch": 7.49, + "learning_rate": 5.094871631792068e-05, + "loss": 0.3891, + "step": 177160 + }, + { + "epoch": 7.49, + "learning_rate": 5.090631558862813e-05, + "loss": 0.3945, + "step": 177170 + }, + { + "epoch": 7.49, + "learning_rate": 5.0863914859335583e-05, + "loss": 0.3863, + "step": 177180 + }, + { + "epoch": 7.49, + "learning_rate": 5.082151413004304e-05, + "loss": 0.3595, + "step": 177190 + }, + { + "epoch": 7.49, + "learning_rate": 5.07791134007505e-05, + "loss": 0.3559, + "step": 177200 + }, + { + "epoch": 7.49, + "learning_rate": 5.073671267145795e-05, + "loss": 0.3648, + "step": 177210 + }, + { + "epoch": 7.49, + "learning_rate": 5.069431194216541e-05, + "loss": 0.362, + "step": 177220 + }, + { + "epoch": 7.49, + "learning_rate": 5.065191121287286e-05, + "loss": 0.3785, + "step": 177230 + }, + { + "epoch": 7.5, + "learning_rate": 5.060951048358032e-05, + "loss": 0.372, + "step": 177240 + }, + { + "epoch": 7.5, + "learning_rate": 5.0567109754287776e-05, + "loss": 0.4159, + "step": 177250 + }, + { + "epoch": 7.5, + "learning_rate": 5.052470902499523e-05, + "loss": 0.3685, + "step": 177260 + }, + { + "epoch": 7.5, + "learning_rate": 5.048230829570268e-05, + "loss": 0.3879, + "step": 177270 + }, + { + "epoch": 7.5, + "learning_rate": 5.043990756641015e-05, + "loss": 0.3307, + "step": 177280 + }, + { + "epoch": 7.5, + "learning_rate": 5.03975068371176e-05, + "loss": 0.3977, + "step": 177290 + }, + { + "epoch": 7.5, + "learning_rate": 5.0355106107825056e-05, + "loss": 0.4097, + "step": 177300 + }, + { + "epoch": 7.5, + "learning_rate": 5.031270537853251e-05, + "loss": 0.3374, + "step": 177310 + }, + { + "epoch": 7.5, + "learning_rate": 5.0270304649239976e-05, + "loss": 0.4126, + "step": 177320 + }, + { + "epoch": 7.5, + "learning_rate": 5.022790391994743e-05, + "loss": 0.4327, + "step": 177330 + }, + { + "epoch": 7.5, + "learning_rate": 5.018550319065488e-05, + "loss": 0.3817, + "step": 177340 + }, + { + "epoch": 7.5, + "learning_rate": 5.0143102461362336e-05, + "loss": 0.4152, + "step": 177350 + }, + { + "epoch": 7.5, + "learning_rate": 5.0100701732069796e-05, + "loss": 0.3585, + "step": 177360 + }, + { + "epoch": 7.5, + "learning_rate": 5.0058301002777256e-05, + "loss": 0.4104, + "step": 177370 + }, + { + "epoch": 7.5, + "learning_rate": 5.001590027348471e-05, + "loss": 0.3844, + "step": 177380 + }, + { + "epoch": 7.5, + "learning_rate": 4.997349954419216e-05, + "loss": 0.3476, + "step": 177390 + }, + { + "epoch": 7.5, + "learning_rate": 4.9931098814899616e-05, + "loss": 0.3839, + "step": 177400 + }, + { + "epoch": 7.5, + "learning_rate": 4.9888698085607076e-05, + "loss": 0.4208, + "step": 177410 + }, + { + "epoch": 7.5, + "learning_rate": 4.984629735631453e-05, + "loss": 0.3254, + "step": 177420 + }, + { + "epoch": 7.5, + "learning_rate": 4.980389662702199e-05, + "loss": 0.3538, + "step": 177430 + }, + { + "epoch": 7.5, + "learning_rate": 4.976149589772944e-05, + "loss": 0.3768, + "step": 177440 + }, + { + "epoch": 7.5, + "learning_rate": 4.97190951684369e-05, + "loss": 0.3438, + "step": 177450 + }, + { + "epoch": 7.5, + "learning_rate": 4.9676694439144355e-05, + "loss": 0.3636, + "step": 177460 + }, + { + "epoch": 7.5, + "learning_rate": 4.9634293709851815e-05, + "loss": 0.3079, + "step": 177470 + }, + { + "epoch": 7.51, + "learning_rate": 4.959189298055927e-05, + "loss": 0.3713, + "step": 177480 + }, + { + "epoch": 7.51, + "learning_rate": 4.954949225126672e-05, + "loss": 0.3978, + "step": 177490 + }, + { + "epoch": 7.51, + "learning_rate": 4.950709152197418e-05, + "loss": 0.3946, + "step": 177500 + }, + { + "epoch": 7.51, + "learning_rate": 4.9464690792681635e-05, + "loss": 0.4108, + "step": 177510 + }, + { + "epoch": 7.51, + "learning_rate": 4.9422290063389095e-05, + "loss": 0.3699, + "step": 177520 + }, + { + "epoch": 7.51, + "learning_rate": 4.937988933409655e-05, + "loss": 0.4176, + "step": 177530 + }, + { + "epoch": 7.51, + "learning_rate": 4.933748860480401e-05, + "loss": 0.3334, + "step": 177540 + }, + { + "epoch": 7.51, + "learning_rate": 4.929508787551146e-05, + "loss": 0.3863, + "step": 177550 + }, + { + "epoch": 7.51, + "learning_rate": 4.9252687146218915e-05, + "loss": 0.436, + "step": 177560 + }, + { + "epoch": 7.51, + "learning_rate": 4.921028641692637e-05, + "loss": 0.3383, + "step": 177570 + }, + { + "epoch": 7.51, + "learning_rate": 4.916788568763383e-05, + "loss": 0.3618, + "step": 177580 + }, + { + "epoch": 7.51, + "learning_rate": 4.912548495834128e-05, + "loss": 0.3666, + "step": 177590 + }, + { + "epoch": 7.51, + "learning_rate": 4.908308422904874e-05, + "loss": 0.4362, + "step": 177600 + }, + { + "epoch": 7.51, + "learning_rate": 4.9040683499756195e-05, + "loss": 0.39, + "step": 177610 + }, + { + "epoch": 7.51, + "learning_rate": 4.8998282770463655e-05, + "loss": 0.3935, + "step": 177620 + }, + { + "epoch": 7.51, + "learning_rate": 4.895588204117111e-05, + "loss": 0.4005, + "step": 177630 + }, + { + "epoch": 7.51, + "learning_rate": 4.891348131187857e-05, + "loss": 0.4094, + "step": 177640 + }, + { + "epoch": 7.51, + "learning_rate": 4.887108058258602e-05, + "loss": 0.3962, + "step": 177650 + }, + { + "epoch": 7.51, + "learning_rate": 4.882867985329348e-05, + "loss": 0.3866, + "step": 177660 + }, + { + "epoch": 7.51, + "learning_rate": 4.8786279124000934e-05, + "loss": 0.423, + "step": 177670 + }, + { + "epoch": 7.51, + "learning_rate": 4.8743878394708394e-05, + "loss": 0.3751, + "step": 177680 + }, + { + "epoch": 7.51, + "learning_rate": 4.870147766541585e-05, + "loss": 0.4473, + "step": 177690 + }, + { + "epoch": 7.51, + "learning_rate": 4.865907693612331e-05, + "loss": 0.4267, + "step": 177700 + }, + { + "epoch": 7.52, + "learning_rate": 4.861667620683076e-05, + "loss": 0.3404, + "step": 177710 + }, + { + "epoch": 7.52, + "learning_rate": 4.8574275477538214e-05, + "loss": 0.3593, + "step": 177720 + }, + { + "epoch": 7.52, + "learning_rate": 4.853187474824567e-05, + "loss": 0.3564, + "step": 177730 + }, + { + "epoch": 7.52, + "learning_rate": 4.848947401895313e-05, + "loss": 0.4448, + "step": 177740 + }, + { + "epoch": 7.52, + "learning_rate": 4.844707328966058e-05, + "loss": 0.3195, + "step": 177750 + }, + { + "epoch": 7.52, + "learning_rate": 4.840467256036804e-05, + "loss": 0.3305, + "step": 177760 + }, + { + "epoch": 7.52, + "learning_rate": 4.8362271831075494e-05, + "loss": 0.3378, + "step": 177770 + }, + { + "epoch": 7.52, + "learning_rate": 4.8319871101782954e-05, + "loss": 0.3551, + "step": 177780 + }, + { + "epoch": 7.52, + "learning_rate": 4.827747037249041e-05, + "loss": 0.4005, + "step": 177790 + }, + { + "epoch": 7.52, + "learning_rate": 4.823506964319787e-05, + "loss": 0.3447, + "step": 177800 + }, + { + "epoch": 7.52, + "learning_rate": 4.819266891390532e-05, + "loss": 0.3841, + "step": 177810 + }, + { + "epoch": 7.52, + "learning_rate": 4.815026818461278e-05, + "loss": 0.3985, + "step": 177820 + }, + { + "epoch": 7.52, + "learning_rate": 4.8107867455320234e-05, + "loss": 0.3118, + "step": 177830 + }, + { + "epoch": 7.52, + "learning_rate": 4.8065466726027694e-05, + "loss": 0.3412, + "step": 177840 + }, + { + "epoch": 7.52, + "learning_rate": 4.802306599673515e-05, + "loss": 0.4339, + "step": 177850 + }, + { + "epoch": 7.52, + "learning_rate": 4.798066526744261e-05, + "loss": 0.371, + "step": 177860 + }, + { + "epoch": 7.52, + "learning_rate": 4.793826453815006e-05, + "loss": 0.3907, + "step": 177870 + }, + { + "epoch": 7.52, + "learning_rate": 4.789586380885751e-05, + "loss": 0.3659, + "step": 177880 + }, + { + "epoch": 7.52, + "learning_rate": 4.7853463079564966e-05, + "loss": 0.354, + "step": 177890 + }, + { + "epoch": 7.52, + "learning_rate": 4.7811062350272426e-05, + "loss": 0.343, + "step": 177900 + }, + { + "epoch": 7.52, + "learning_rate": 4.776866162097988e-05, + "loss": 0.3732, + "step": 177910 + }, + { + "epoch": 7.52, + "learning_rate": 4.772626089168734e-05, + "loss": 0.3661, + "step": 177920 + }, + { + "epoch": 7.52, + "learning_rate": 4.768386016239479e-05, + "loss": 0.3951, + "step": 177930 + }, + { + "epoch": 7.52, + "learning_rate": 4.764145943310225e-05, + "loss": 0.379, + "step": 177940 + }, + { + "epoch": 7.53, + "learning_rate": 4.7599058703809706e-05, + "loss": 0.3893, + "step": 177950 + }, + { + "epoch": 7.53, + "learning_rate": 4.7556657974517166e-05, + "loss": 0.3487, + "step": 177960 + }, + { + "epoch": 7.53, + "learning_rate": 4.751425724522462e-05, + "loss": 0.3407, + "step": 177970 + }, + { + "epoch": 7.53, + "learning_rate": 4.747185651593208e-05, + "loss": 0.3892, + "step": 177980 + }, + { + "epoch": 7.53, + "learning_rate": 4.742945578663953e-05, + "loss": 0.3416, + "step": 177990 + }, + { + "epoch": 7.53, + "learning_rate": 4.738705505734699e-05, + "loss": 0.3175, + "step": 178000 + }, + { + "epoch": 7.53, + "learning_rate": 4.7344654328054446e-05, + "loss": 0.3724, + "step": 178010 + }, + { + "epoch": 7.53, + "learning_rate": 4.7302253598761906e-05, + "loss": 0.3358, + "step": 178020 + }, + { + "epoch": 7.53, + "learning_rate": 4.725985286946936e-05, + "loss": 0.4543, + "step": 178030 + }, + { + "epoch": 7.53, + "learning_rate": 4.721745214017681e-05, + "loss": 0.4339, + "step": 178040 + }, + { + "epoch": 7.53, + "learning_rate": 4.7175051410884266e-05, + "loss": 0.361, + "step": 178050 + }, + { + "epoch": 7.53, + "learning_rate": 4.7132650681591726e-05, + "loss": 0.402, + "step": 178060 + }, + { + "epoch": 7.53, + "learning_rate": 4.709024995229918e-05, + "loss": 0.3791, + "step": 178070 + }, + { + "epoch": 7.53, + "learning_rate": 4.704784922300664e-05, + "loss": 0.3572, + "step": 178080 + }, + { + "epoch": 7.53, + "learning_rate": 4.700544849371409e-05, + "loss": 0.4009, + "step": 178090 + }, + { + "epoch": 7.53, + "learning_rate": 4.696304776442155e-05, + "loss": 0.3716, + "step": 178100 + }, + { + "epoch": 7.53, + "learning_rate": 4.6920647035129005e-05, + "loss": 0.3875, + "step": 178110 + }, + { + "epoch": 7.53, + "learning_rate": 4.6878246305836465e-05, + "loss": 0.3876, + "step": 178120 + }, + { + "epoch": 7.53, + "learning_rate": 4.683584557654392e-05, + "loss": 0.3451, + "step": 178130 + }, + { + "epoch": 7.53, + "learning_rate": 4.679344484725138e-05, + "loss": 0.3944, + "step": 178140 + }, + { + "epoch": 7.53, + "learning_rate": 4.675104411795883e-05, + "loss": 0.3669, + "step": 178150 + }, + { + "epoch": 7.53, + "learning_rate": 4.670864338866629e-05, + "loss": 0.3545, + "step": 178160 + }, + { + "epoch": 7.53, + "learning_rate": 4.6666242659373745e-05, + "loss": 0.4213, + "step": 178170 + }, + { + "epoch": 7.53, + "learning_rate": 4.6623841930081205e-05, + "loss": 0.374, + "step": 178180 + }, + { + "epoch": 7.54, + "learning_rate": 4.658144120078866e-05, + "loss": 0.4139, + "step": 178190 + }, + { + "epoch": 7.54, + "learning_rate": 4.653904047149611e-05, + "loss": 0.3336, + "step": 178200 + }, + { + "epoch": 7.54, + "learning_rate": 4.6496639742203565e-05, + "loss": 0.3387, + "step": 178210 + }, + { + "epoch": 7.54, + "learning_rate": 4.6454239012911025e-05, + "loss": 0.3357, + "step": 178220 + }, + { + "epoch": 7.54, + "learning_rate": 4.641183828361848e-05, + "loss": 0.3914, + "step": 178230 + }, + { + "epoch": 7.54, + "learning_rate": 4.636943755432593e-05, + "loss": 0.3595, + "step": 178240 + }, + { + "epoch": 7.54, + "learning_rate": 4.632703682503339e-05, + "loss": 0.4255, + "step": 178250 + }, + { + "epoch": 7.54, + "learning_rate": 4.6284636095740845e-05, + "loss": 0.397, + "step": 178260 + }, + { + "epoch": 7.54, + "learning_rate": 4.6242235366448305e-05, + "loss": 0.4552, + "step": 178270 + }, + { + "epoch": 7.54, + "learning_rate": 4.619983463715576e-05, + "loss": 0.381, + "step": 178280 + }, + { + "epoch": 7.54, + "learning_rate": 4.615743390786322e-05, + "loss": 0.4404, + "step": 178290 + }, + { + "epoch": 7.54, + "learning_rate": 4.611503317857067e-05, + "loss": 0.4421, + "step": 178300 + }, + { + "epoch": 7.54, + "learning_rate": 4.607263244927813e-05, + "loss": 0.3205, + "step": 178310 + }, + { + "epoch": 7.54, + "learning_rate": 4.6030231719985584e-05, + "loss": 0.3904, + "step": 178320 + }, + { + "epoch": 7.54, + "learning_rate": 4.5987830990693044e-05, + "loss": 0.3961, + "step": 178330 + }, + { + "epoch": 7.54, + "learning_rate": 4.59454302614005e-05, + "loss": 0.3389, + "step": 178340 + }, + { + "epoch": 7.54, + "learning_rate": 4.590302953210796e-05, + "loss": 0.3585, + "step": 178350 + }, + { + "epoch": 7.54, + "learning_rate": 4.586062880281541e-05, + "loss": 0.3281, + "step": 178360 + }, + { + "epoch": 7.54, + "learning_rate": 4.5818228073522864e-05, + "loss": 0.4122, + "step": 178370 + }, + { + "epoch": 7.54, + "learning_rate": 4.5775827344230324e-05, + "loss": 0.3352, + "step": 178380 + }, + { + "epoch": 7.54, + "learning_rate": 4.573342661493778e-05, + "loss": 0.4145, + "step": 178390 + }, + { + "epoch": 7.54, + "learning_rate": 4.569102588564523e-05, + "loss": 0.4218, + "step": 178400 + }, + { + "epoch": 7.54, + "learning_rate": 4.564862515635269e-05, + "loss": 0.3875, + "step": 178410 + }, + { + "epoch": 7.55, + "learning_rate": 4.5606224427060144e-05, + "loss": 0.3892, + "step": 178420 + }, + { + "epoch": 7.55, + "learning_rate": 4.5563823697767604e-05, + "loss": 0.4021, + "step": 178430 + }, + { + "epoch": 7.55, + "learning_rate": 4.552142296847506e-05, + "loss": 0.3915, + "step": 178440 + }, + { + "epoch": 7.55, + "learning_rate": 4.547902223918252e-05, + "loss": 0.4285, + "step": 178450 + }, + { + "epoch": 7.55, + "learning_rate": 4.543662150988997e-05, + "loss": 0.3606, + "step": 178460 + }, + { + "epoch": 7.55, + "learning_rate": 4.539422078059743e-05, + "loss": 0.3625, + "step": 178470 + }, + { + "epoch": 7.55, + "learning_rate": 4.5351820051304884e-05, + "loss": 0.4176, + "step": 178480 + }, + { + "epoch": 7.55, + "learning_rate": 4.5309419322012344e-05, + "loss": 0.4132, + "step": 178490 + }, + { + "epoch": 7.55, + "learning_rate": 4.52670185927198e-05, + "loss": 0.4508, + "step": 178500 + }, + { + "epoch": 7.55, + "learning_rate": 4.522461786342726e-05, + "loss": 0.3895, + "step": 178510 + }, + { + "epoch": 7.55, + "learning_rate": 4.518221713413471e-05, + "loss": 0.3388, + "step": 178520 + }, + { + "epoch": 7.55, + "learning_rate": 4.513981640484217e-05, + "loss": 0.3944, + "step": 178530 + }, + { + "epoch": 7.55, + "learning_rate": 4.509741567554962e-05, + "loss": 0.3509, + "step": 178540 + }, + { + "epoch": 7.55, + "learning_rate": 4.5055014946257077e-05, + "loss": 0.3646, + "step": 178550 + }, + { + "epoch": 7.55, + "learning_rate": 4.501261421696453e-05, + "loss": 0.3751, + "step": 178560 + }, + { + "epoch": 7.55, + "learning_rate": 4.497021348767199e-05, + "loss": 0.3736, + "step": 178570 + }, + { + "epoch": 7.55, + "learning_rate": 4.492781275837944e-05, + "loss": 0.3627, + "step": 178580 + }, + { + "epoch": 7.55, + "learning_rate": 4.48854120290869e-05, + "loss": 0.4134, + "step": 178590 + }, + { + "epoch": 7.55, + "learning_rate": 4.4843011299794356e-05, + "loss": 0.3227, + "step": 178600 + }, + { + "epoch": 7.55, + "learning_rate": 4.4800610570501816e-05, + "loss": 0.3906, + "step": 178610 + }, + { + "epoch": 7.55, + "learning_rate": 4.475820984120927e-05, + "loss": 0.3399, + "step": 178620 + }, + { + "epoch": 7.55, + "learning_rate": 4.471580911191673e-05, + "loss": 0.425, + "step": 178630 + }, + { + "epoch": 7.55, + "learning_rate": 4.467340838262418e-05, + "loss": 0.3898, + "step": 178640 + }, + { + "epoch": 7.55, + "learning_rate": 4.463100765333164e-05, + "loss": 0.3421, + "step": 178650 + }, + { + "epoch": 7.56, + "learning_rate": 4.4588606924039096e-05, + "loss": 0.3503, + "step": 178660 + }, + { + "epoch": 7.56, + "learning_rate": 4.4546206194746556e-05, + "loss": 0.3146, + "step": 178670 + }, + { + "epoch": 7.56, + "learning_rate": 4.450380546545401e-05, + "loss": 0.3686, + "step": 178680 + }, + { + "epoch": 7.56, + "learning_rate": 4.446140473616147e-05, + "loss": 0.3639, + "step": 178690 + }, + { + "epoch": 7.56, + "learning_rate": 4.441900400686892e-05, + "loss": 0.3721, + "step": 178700 + }, + { + "epoch": 7.56, + "learning_rate": 4.4376603277576376e-05, + "loss": 0.39, + "step": 178710 + }, + { + "epoch": 7.56, + "learning_rate": 4.433420254828383e-05, + "loss": 0.4112, + "step": 178720 + }, + { + "epoch": 7.56, + "learning_rate": 4.429180181899129e-05, + "loss": 0.3495, + "step": 178730 + }, + { + "epoch": 7.56, + "learning_rate": 4.424940108969874e-05, + "loss": 0.4141, + "step": 178740 + }, + { + "epoch": 7.56, + "learning_rate": 4.42070003604062e-05, + "loss": 0.3076, + "step": 178750 + }, + { + "epoch": 7.56, + "learning_rate": 4.4164599631113656e-05, + "loss": 0.3934, + "step": 178760 + }, + { + "epoch": 7.56, + "learning_rate": 4.4122198901821116e-05, + "loss": 0.4204, + "step": 178770 + }, + { + "epoch": 7.56, + "learning_rate": 4.407979817252857e-05, + "loss": 0.3942, + "step": 178780 + }, + { + "epoch": 7.56, + "learning_rate": 4.403739744323603e-05, + "loss": 0.4555, + "step": 178790 + }, + { + "epoch": 7.56, + "learning_rate": 4.399499671394348e-05, + "loss": 0.2976, + "step": 178800 + }, + { + "epoch": 7.56, + "learning_rate": 4.395259598465094e-05, + "loss": 0.4542, + "step": 178810 + }, + { + "epoch": 7.56, + "learning_rate": 4.3910195255358395e-05, + "loss": 0.3407, + "step": 178820 + }, + { + "epoch": 7.56, + "learning_rate": 4.3867794526065855e-05, + "loss": 0.3681, + "step": 178830 + }, + { + "epoch": 7.56, + "learning_rate": 4.382539379677331e-05, + "loss": 0.3662, + "step": 178840 + }, + { + "epoch": 7.56, + "learning_rate": 4.378299306748077e-05, + "loss": 0.4305, + "step": 178850 + }, + { + "epoch": 7.56, + "learning_rate": 4.374059233818822e-05, + "loss": 0.4427, + "step": 178860 + }, + { + "epoch": 7.56, + "learning_rate": 4.3698191608895675e-05, + "loss": 0.3627, + "step": 178870 + }, + { + "epoch": 7.56, + "learning_rate": 4.365579087960313e-05, + "loss": 0.3664, + "step": 178880 + }, + { + "epoch": 7.57, + "learning_rate": 4.361339015031059e-05, + "loss": 0.3466, + "step": 178890 + }, + { + "epoch": 7.57, + "learning_rate": 4.357098942101804e-05, + "loss": 0.376, + "step": 178900 + }, + { + "epoch": 7.57, + "learning_rate": 4.3528588691725495e-05, + "loss": 0.3844, + "step": 178910 + }, + { + "epoch": 7.57, + "learning_rate": 4.3486187962432955e-05, + "loss": 0.3174, + "step": 178920 + }, + { + "epoch": 7.57, + "learning_rate": 4.344378723314041e-05, + "loss": 0.357, + "step": 178930 + }, + { + "epoch": 7.57, + "learning_rate": 4.340138650384787e-05, + "loss": 0.3408, + "step": 178940 + }, + { + "epoch": 7.57, + "learning_rate": 4.335898577455532e-05, + "loss": 0.3972, + "step": 178950 + }, + { + "epoch": 7.57, + "learning_rate": 4.331658504526278e-05, + "loss": 0.3124, + "step": 178960 + }, + { + "epoch": 7.57, + "learning_rate": 4.3274184315970235e-05, + "loss": 0.3292, + "step": 178970 + }, + { + "epoch": 7.57, + "learning_rate": 4.3231783586677695e-05, + "loss": 0.3594, + "step": 178980 + }, + { + "epoch": 7.57, + "learning_rate": 4.318938285738515e-05, + "loss": 0.3978, + "step": 178990 + }, + { + "epoch": 7.57, + "learning_rate": 4.314698212809261e-05, + "loss": 0.3818, + "step": 179000 + }, + { + "epoch": 7.57, + "learning_rate": 4.310458139880006e-05, + "loss": 0.3983, + "step": 179010 + }, + { + "epoch": 7.57, + "learning_rate": 4.306218066950752e-05, + "loss": 0.3273, + "step": 179020 + }, + { + "epoch": 7.57, + "learning_rate": 4.3019779940214974e-05, + "loss": 0.3672, + "step": 179030 + }, + { + "epoch": 7.57, + "learning_rate": 4.297737921092243e-05, + "loss": 0.4191, + "step": 179040 + }, + { + "epoch": 7.57, + "learning_rate": 4.293497848162988e-05, + "loss": 0.4039, + "step": 179050 + }, + { + "epoch": 7.57, + "learning_rate": 4.289257775233734e-05, + "loss": 0.4022, + "step": 179060 + }, + { + "epoch": 7.57, + "learning_rate": 4.2850177023044794e-05, + "loss": 0.3341, + "step": 179070 + }, + { + "epoch": 7.57, + "learning_rate": 4.2807776293752254e-05, + "loss": 0.4387, + "step": 179080 + }, + { + "epoch": 7.57, + "learning_rate": 4.276537556445971e-05, + "loss": 0.3251, + "step": 179090 + }, + { + "epoch": 7.57, + "learning_rate": 4.272297483516717e-05, + "loss": 0.3433, + "step": 179100 + }, + { + "epoch": 7.57, + "learning_rate": 4.268057410587462e-05, + "loss": 0.3918, + "step": 179110 + }, + { + "epoch": 7.57, + "learning_rate": 4.263817337658208e-05, + "loss": 0.3941, + "step": 179120 + }, + { + "epoch": 7.58, + "learning_rate": 4.2595772647289534e-05, + "loss": 0.4052, + "step": 179130 + }, + { + "epoch": 7.58, + "learning_rate": 4.2553371917996994e-05, + "loss": 0.3662, + "step": 179140 + }, + { + "epoch": 7.58, + "learning_rate": 4.251097118870445e-05, + "loss": 0.343, + "step": 179150 + }, + { + "epoch": 7.58, + "learning_rate": 4.246857045941191e-05, + "loss": 0.3992, + "step": 179160 + }, + { + "epoch": 7.58, + "learning_rate": 4.242616973011936e-05, + "loss": 0.3378, + "step": 179170 + }, + { + "epoch": 7.58, + "learning_rate": 4.238376900082682e-05, + "loss": 0.4396, + "step": 179180 + }, + { + "epoch": 7.58, + "learning_rate": 4.2341368271534273e-05, + "loss": 0.3548, + "step": 179190 + }, + { + "epoch": 7.58, + "learning_rate": 4.229896754224173e-05, + "loss": 0.4095, + "step": 179200 + }, + { + "epoch": 7.58, + "learning_rate": 4.225656681294918e-05, + "loss": 0.3603, + "step": 179210 + }, + { + "epoch": 7.58, + "learning_rate": 4.221416608365664e-05, + "loss": 0.3897, + "step": 179220 + }, + { + "epoch": 7.58, + "learning_rate": 4.217176535436409e-05, + "loss": 0.3906, + "step": 179230 + }, + { + "epoch": 7.58, + "learning_rate": 4.212936462507155e-05, + "loss": 0.4147, + "step": 179240 + }, + { + "epoch": 7.58, + "learning_rate": 4.2086963895779006e-05, + "loss": 0.3655, + "step": 179250 + }, + { + "epoch": 7.58, + "learning_rate": 4.2044563166486466e-05, + "loss": 0.3884, + "step": 179260 + }, + { + "epoch": 7.58, + "learning_rate": 4.200216243719392e-05, + "loss": 0.4055, + "step": 179270 + }, + { + "epoch": 7.58, + "learning_rate": 4.195976170790138e-05, + "loss": 0.4126, + "step": 179280 + }, + { + "epoch": 7.58, + "learning_rate": 4.191736097860883e-05, + "loss": 0.3597, + "step": 179290 + }, + { + "epoch": 7.58, + "learning_rate": 4.187496024931629e-05, + "loss": 0.3108, + "step": 179300 + }, + { + "epoch": 7.58, + "learning_rate": 4.1832559520023746e-05, + "loss": 0.38, + "step": 179310 + }, + { + "epoch": 7.58, + "learning_rate": 4.1790158790731206e-05, + "loss": 0.3434, + "step": 179320 + }, + { + "epoch": 7.58, + "learning_rate": 4.174775806143866e-05, + "loss": 0.3386, + "step": 179330 + }, + { + "epoch": 7.58, + "learning_rate": 4.170535733214612e-05, + "loss": 0.3938, + "step": 179340 + }, + { + "epoch": 7.58, + "learning_rate": 4.166295660285357e-05, + "loss": 0.3388, + "step": 179350 + }, + { + "epoch": 7.58, + "learning_rate": 4.1620555873561026e-05, + "loss": 0.4119, + "step": 179360 + }, + { + "epoch": 7.59, + "learning_rate": 4.157815514426848e-05, + "loss": 0.2934, + "step": 179370 + }, + { + "epoch": 7.59, + "learning_rate": 4.153575441497594e-05, + "loss": 0.2932, + "step": 179380 + }, + { + "epoch": 7.59, + "learning_rate": 4.149335368568339e-05, + "loss": 0.3877, + "step": 179390 + }, + { + "epoch": 7.59, + "learning_rate": 4.145095295639085e-05, + "loss": 0.3148, + "step": 179400 + }, + { + "epoch": 7.59, + "learning_rate": 4.1408552227098306e-05, + "loss": 0.3843, + "step": 179410 + }, + { + "epoch": 7.59, + "learning_rate": 4.1366151497805766e-05, + "loss": 0.387, + "step": 179420 + }, + { + "epoch": 7.59, + "learning_rate": 4.132375076851322e-05, + "loss": 0.3845, + "step": 179430 + }, + { + "epoch": 7.59, + "learning_rate": 4.128135003922068e-05, + "loss": 0.3918, + "step": 179440 + }, + { + "epoch": 7.59, + "learning_rate": 4.123894930992813e-05, + "loss": 0.3622, + "step": 179450 + }, + { + "epoch": 7.59, + "learning_rate": 4.119654858063559e-05, + "loss": 0.3652, + "step": 179460 + }, + { + "epoch": 7.59, + "learning_rate": 4.1154147851343045e-05, + "loss": 0.3982, + "step": 179470 + }, + { + "epoch": 7.59, + "learning_rate": 4.1111747122050505e-05, + "loss": 0.3583, + "step": 179480 + }, + { + "epoch": 7.59, + "learning_rate": 4.106934639275796e-05, + "loss": 0.3748, + "step": 179490 + }, + { + "epoch": 7.59, + "learning_rate": 4.102694566346542e-05, + "loss": 0.3401, + "step": 179500 + }, + { + "epoch": 7.59, + "learning_rate": 4.098454493417287e-05, + "loss": 0.3526, + "step": 179510 + }, + { + "epoch": 7.59, + "learning_rate": 4.0942144204880325e-05, + "loss": 0.4288, + "step": 179520 + }, + { + "epoch": 7.59, + "learning_rate": 4.089974347558778e-05, + "loss": 0.3416, + "step": 179530 + }, + { + "epoch": 7.59, + "learning_rate": 4.085734274629524e-05, + "loss": 0.3578, + "step": 179540 + }, + { + "epoch": 7.59, + "learning_rate": 4.081494201700269e-05, + "loss": 0.3617, + "step": 179550 + }, + { + "epoch": 7.59, + "learning_rate": 4.077254128771015e-05, + "loss": 0.3703, + "step": 179560 + }, + { + "epoch": 7.59, + "learning_rate": 4.0730140558417605e-05, + "loss": 0.3837, + "step": 179570 + }, + { + "epoch": 7.59, + "learning_rate": 4.0687739829125065e-05, + "loss": 0.3072, + "step": 179580 + }, + { + "epoch": 7.59, + "learning_rate": 4.064533909983252e-05, + "loss": 0.3505, + "step": 179590 + }, + { + "epoch": 7.6, + "learning_rate": 4.060293837053998e-05, + "loss": 0.408, + "step": 179600 + }, + { + "epoch": 7.6, + "learning_rate": 4.056053764124743e-05, + "loss": 0.3599, + "step": 179610 + }, + { + "epoch": 7.6, + "learning_rate": 4.0518136911954885e-05, + "loss": 0.3681, + "step": 179620 + }, + { + "epoch": 7.6, + "learning_rate": 4.0475736182662345e-05, + "loss": 0.3881, + "step": 179630 + }, + { + "epoch": 7.6, + "learning_rate": 4.04333354533698e-05, + "loss": 0.4266, + "step": 179640 + }, + { + "epoch": 7.6, + "learning_rate": 4.039093472407726e-05, + "loss": 0.342, + "step": 179650 + }, + { + "epoch": 7.6, + "learning_rate": 4.034853399478471e-05, + "loss": 0.3934, + "step": 179660 + }, + { + "epoch": 7.6, + "learning_rate": 4.030613326549217e-05, + "loss": 0.4083, + "step": 179670 + }, + { + "epoch": 7.6, + "learning_rate": 4.0263732536199624e-05, + "loss": 0.3617, + "step": 179680 + }, + { + "epoch": 7.6, + "learning_rate": 4.0221331806907084e-05, + "loss": 0.3543, + "step": 179690 + }, + { + "epoch": 7.6, + "learning_rate": 4.017893107761454e-05, + "loss": 0.4502, + "step": 179700 + }, + { + "epoch": 7.6, + "learning_rate": 4.013653034832199e-05, + "loss": 0.35, + "step": 179710 + }, + { + "epoch": 7.6, + "learning_rate": 4.0094129619029444e-05, + "loss": 0.3396, + "step": 179720 + }, + { + "epoch": 7.6, + "learning_rate": 4.0051728889736904e-05, + "loss": 0.351, + "step": 179730 + }, + { + "epoch": 7.6, + "learning_rate": 4.000932816044436e-05, + "loss": 0.3412, + "step": 179740 + }, + { + "epoch": 7.6, + "learning_rate": 3.996692743115182e-05, + "loss": 0.3846, + "step": 179750 + }, + { + "epoch": 7.6, + "learning_rate": 3.992452670185927e-05, + "loss": 0.4162, + "step": 179760 + }, + { + "epoch": 7.6, + "learning_rate": 3.988212597256673e-05, + "loss": 0.377, + "step": 179770 + }, + { + "epoch": 7.6, + "learning_rate": 3.9839725243274184e-05, + "loss": 0.3058, + "step": 179780 + }, + { + "epoch": 7.6, + "learning_rate": 3.9797324513981644e-05, + "loss": 0.3788, + "step": 179790 + }, + { + "epoch": 7.6, + "learning_rate": 3.97549237846891e-05, + "loss": 0.3497, + "step": 179800 + }, + { + "epoch": 7.6, + "learning_rate": 3.971252305539656e-05, + "loss": 0.4421, + "step": 179810 + }, + { + "epoch": 7.6, + "learning_rate": 3.967012232610401e-05, + "loss": 0.4008, + "step": 179820 + }, + { + "epoch": 7.6, + "learning_rate": 3.962772159681147e-05, + "loss": 0.3398, + "step": 179830 + }, + { + "epoch": 7.61, + "learning_rate": 3.9585320867518924e-05, + "loss": 0.3406, + "step": 179840 + }, + { + "epoch": 7.61, + "learning_rate": 3.9542920138226384e-05, + "loss": 0.4135, + "step": 179850 + }, + { + "epoch": 7.61, + "learning_rate": 3.950051940893384e-05, + "loss": 0.3637, + "step": 179860 + }, + { + "epoch": 7.61, + "learning_rate": 3.945811867964129e-05, + "loss": 0.3861, + "step": 179870 + }, + { + "epoch": 7.61, + "learning_rate": 3.941571795034874e-05, + "loss": 0.3647, + "step": 179880 + }, + { + "epoch": 7.61, + "learning_rate": 3.93733172210562e-05, + "loss": 0.4256, + "step": 179890 + }, + { + "epoch": 7.61, + "learning_rate": 3.9330916491763657e-05, + "loss": 0.355, + "step": 179900 + }, + { + "epoch": 7.61, + "learning_rate": 3.9288515762471117e-05, + "loss": 0.38, + "step": 179910 + }, + { + "epoch": 7.61, + "learning_rate": 3.924611503317857e-05, + "loss": 0.4408, + "step": 179920 + }, + { + "epoch": 7.61, + "learning_rate": 3.920371430388603e-05, + "loss": 0.4167, + "step": 179930 + }, + { + "epoch": 7.61, + "learning_rate": 3.916131357459348e-05, + "loss": 0.2952, + "step": 179940 + }, + { + "epoch": 7.61, + "learning_rate": 3.911891284530094e-05, + "loss": 0.3536, + "step": 179950 + }, + { + "epoch": 7.61, + "learning_rate": 3.9076512116008396e-05, + "loss": 0.3394, + "step": 179960 + }, + { + "epoch": 7.61, + "learning_rate": 3.9034111386715856e-05, + "loss": 0.4128, + "step": 179970 + }, + { + "epoch": 7.61, + "learning_rate": 3.899171065742331e-05, + "loss": 0.3366, + "step": 179980 + }, + { + "epoch": 7.61, + "learning_rate": 3.894930992813077e-05, + "loss": 0.4002, + "step": 179990 + }, + { + "epoch": 7.61, + "learning_rate": 3.890690919883822e-05, + "loss": 0.3933, + "step": 180000 + }, + { + "epoch": 7.61, + "eval_loss": 0.6400695443153381, + "eval_runtime": 337.7542, + "eval_samples_per_second": 15.559, + "eval_steps_per_second": 3.89, + "step": 180000 + }, + { + "epoch": 7.61, + "learning_rate": 3.886450846954568e-05, + "loss": 0.4208, + "step": 180010 + }, + { + "epoch": 7.61, + "learning_rate": 3.8822107740253136e-05, + "loss": 0.3394, + "step": 180020 + }, + { + "epoch": 7.61, + "learning_rate": 3.877970701096059e-05, + "loss": 0.3709, + "step": 180030 + }, + { + "epoch": 7.61, + "learning_rate": 3.873730628166804e-05, + "loss": 0.4343, + "step": 180040 + }, + { + "epoch": 7.61, + "learning_rate": 3.86949055523755e-05, + "loss": 0.3522, + "step": 180050 + }, + { + "epoch": 7.61, + "learning_rate": 3.8652504823082956e-05, + "loss": 0.3617, + "step": 180060 + }, + { + "epoch": 7.61, + "learning_rate": 3.8610104093790416e-05, + "loss": 0.365, + "step": 180070 + }, + { + "epoch": 7.62, + "learning_rate": 3.856770336449787e-05, + "loss": 0.3577, + "step": 180080 + }, + { + "epoch": 7.62, + "learning_rate": 3.852530263520533e-05, + "loss": 0.4694, + "step": 180090 + }, + { + "epoch": 7.62, + "learning_rate": 3.848290190591278e-05, + "loss": 0.3679, + "step": 180100 + }, + { + "epoch": 7.62, + "learning_rate": 3.844050117662024e-05, + "loss": 0.3613, + "step": 180110 + }, + { + "epoch": 7.62, + "learning_rate": 3.8398100447327696e-05, + "loss": 0.4121, + "step": 180120 + }, + { + "epoch": 7.62, + "learning_rate": 3.8355699718035156e-05, + "loss": 0.3746, + "step": 180130 + }, + { + "epoch": 7.62, + "learning_rate": 3.831329898874261e-05, + "loss": 0.4022, + "step": 180140 + }, + { + "epoch": 7.62, + "learning_rate": 3.827089825945007e-05, + "loss": 0.386, + "step": 180150 + }, + { + "epoch": 7.62, + "learning_rate": 3.822849753015752e-05, + "loss": 0.4199, + "step": 180160 + }, + { + "epoch": 7.62, + "learning_rate": 3.818609680086498e-05, + "loss": 0.4033, + "step": 180170 + }, + { + "epoch": 7.62, + "learning_rate": 3.8143696071572435e-05, + "loss": 0.3379, + "step": 180180 + }, + { + "epoch": 7.62, + "learning_rate": 3.810129534227989e-05, + "loss": 0.4852, + "step": 180190 + }, + { + "epoch": 7.62, + "learning_rate": 3.805889461298734e-05, + "loss": 0.3845, + "step": 180200 + }, + { + "epoch": 7.62, + "learning_rate": 3.80164938836948e-05, + "loss": 0.3895, + "step": 180210 + }, + { + "epoch": 7.62, + "learning_rate": 3.7974093154402255e-05, + "loss": 0.3831, + "step": 180220 + }, + { + "epoch": 7.62, + "learning_rate": 3.7931692425109715e-05, + "loss": 0.3262, + "step": 180230 + }, + { + "epoch": 7.62, + "learning_rate": 3.788929169581717e-05, + "loss": 0.3601, + "step": 180240 + }, + { + "epoch": 7.62, + "learning_rate": 3.784689096652463e-05, + "loss": 0.3536, + "step": 180250 + }, + { + "epoch": 7.62, + "learning_rate": 3.780449023723208e-05, + "loss": 0.2843, + "step": 180260 + }, + { + "epoch": 7.62, + "learning_rate": 3.776208950793954e-05, + "loss": 0.4315, + "step": 180270 + }, + { + "epoch": 7.62, + "learning_rate": 3.7719688778646995e-05, + "loss": 0.3922, + "step": 180280 + }, + { + "epoch": 7.62, + "learning_rate": 3.7677288049354455e-05, + "loss": 0.3889, + "step": 180290 + }, + { + "epoch": 7.62, + "learning_rate": 3.763488732006191e-05, + "loss": 0.3186, + "step": 180300 + }, + { + "epoch": 7.63, + "learning_rate": 3.759248659076937e-05, + "loss": 0.3759, + "step": 180310 + }, + { + "epoch": 7.63, + "learning_rate": 3.755008586147682e-05, + "loss": 0.3227, + "step": 180320 + }, + { + "epoch": 7.63, + "learning_rate": 3.7507685132184274e-05, + "loss": 0.4027, + "step": 180330 + }, + { + "epoch": 7.63, + "learning_rate": 3.7465284402891734e-05, + "loss": 0.4531, + "step": 180340 + }, + { + "epoch": 7.63, + "learning_rate": 3.742288367359919e-05, + "loss": 0.3726, + "step": 180350 + }, + { + "epoch": 7.63, + "learning_rate": 3.738048294430664e-05, + "loss": 0.3601, + "step": 180360 + }, + { + "epoch": 7.63, + "learning_rate": 3.7338082215014094e-05, + "loss": 0.4087, + "step": 180370 + }, + { + "epoch": 7.63, + "learning_rate": 3.7295681485721554e-05, + "loss": 0.3821, + "step": 180380 + }, + { + "epoch": 7.63, + "learning_rate": 3.725328075642901e-05, + "loss": 0.397, + "step": 180390 + }, + { + "epoch": 7.63, + "learning_rate": 3.721088002713647e-05, + "loss": 0.3892, + "step": 180400 + }, + { + "epoch": 7.63, + "learning_rate": 3.716847929784392e-05, + "loss": 0.3634, + "step": 180410 + }, + { + "epoch": 7.63, + "learning_rate": 3.712607856855138e-05, + "loss": 0.3577, + "step": 180420 + }, + { + "epoch": 7.63, + "learning_rate": 3.7083677839258834e-05, + "loss": 0.3735, + "step": 180430 + }, + { + "epoch": 7.63, + "learning_rate": 3.7041277109966294e-05, + "loss": 0.4313, + "step": 180440 + }, + { + "epoch": 7.63, + "learning_rate": 3.699887638067375e-05, + "loss": 0.3971, + "step": 180450 + }, + { + "epoch": 7.63, + "learning_rate": 3.695647565138121e-05, + "loss": 0.3582, + "step": 180460 + }, + { + "epoch": 7.63, + "learning_rate": 3.691407492208866e-05, + "loss": 0.4024, + "step": 180470 + }, + { + "epoch": 7.63, + "learning_rate": 3.687167419279612e-05, + "loss": 0.3732, + "step": 180480 + }, + { + "epoch": 7.63, + "learning_rate": 3.6829273463503574e-05, + "loss": 0.3377, + "step": 180490 + }, + { + "epoch": 7.63, + "learning_rate": 3.6786872734211034e-05, + "loss": 0.4495, + "step": 180500 + }, + { + "epoch": 7.63, + "learning_rate": 3.674447200491849e-05, + "loss": 0.3528, + "step": 180510 + }, + { + "epoch": 7.63, + "learning_rate": 3.670207127562594e-05, + "loss": 0.3054, + "step": 180520 + }, + { + "epoch": 7.63, + "learning_rate": 3.6659670546333393e-05, + "loss": 0.3942, + "step": 180530 + }, + { + "epoch": 7.63, + "learning_rate": 3.6617269817040853e-05, + "loss": 0.3301, + "step": 180540 + }, + { + "epoch": 7.64, + "learning_rate": 3.657486908774831e-05, + "loss": 0.3718, + "step": 180550 + }, + { + "epoch": 7.64, + "learning_rate": 3.653246835845577e-05, + "loss": 0.365, + "step": 180560 + }, + { + "epoch": 7.64, + "learning_rate": 3.649006762916322e-05, + "loss": 0.3947, + "step": 180570 + }, + { + "epoch": 7.64, + "learning_rate": 3.644766689987068e-05, + "loss": 0.4585, + "step": 180580 + }, + { + "epoch": 7.64, + "learning_rate": 3.640526617057813e-05, + "loss": 0.399, + "step": 180590 + }, + { + "epoch": 7.64, + "learning_rate": 3.636286544128559e-05, + "loss": 0.3233, + "step": 180600 + }, + { + "epoch": 7.64, + "learning_rate": 3.6320464711993046e-05, + "loss": 0.3761, + "step": 180610 + }, + { + "epoch": 7.64, + "learning_rate": 3.6278063982700506e-05, + "loss": 0.3624, + "step": 180620 + }, + { + "epoch": 7.64, + "learning_rate": 3.623566325340796e-05, + "loss": 0.3181, + "step": 180630 + }, + { + "epoch": 7.64, + "learning_rate": 3.619326252411542e-05, + "loss": 0.4137, + "step": 180640 + }, + { + "epoch": 7.64, + "learning_rate": 3.615086179482287e-05, + "loss": 0.3353, + "step": 180650 + }, + { + "epoch": 7.64, + "learning_rate": 3.610846106553033e-05, + "loss": 0.4408, + "step": 180660 + }, + { + "epoch": 7.64, + "learning_rate": 3.6066060336237786e-05, + "loss": 0.4455, + "step": 180670 + }, + { + "epoch": 7.64, + "learning_rate": 3.602365960694524e-05, + "loss": 0.385, + "step": 180680 + }, + { + "epoch": 7.64, + "learning_rate": 3.598125887765269e-05, + "loss": 0.4412, + "step": 180690 + }, + { + "epoch": 7.64, + "learning_rate": 3.593885814836015e-05, + "loss": 0.3809, + "step": 180700 + }, + { + "epoch": 7.64, + "learning_rate": 3.5896457419067606e-05, + "loss": 0.3556, + "step": 180710 + }, + { + "epoch": 7.64, + "learning_rate": 3.5854056689775066e-05, + "loss": 0.3696, + "step": 180720 + }, + { + "epoch": 7.64, + "learning_rate": 3.581165596048252e-05, + "loss": 0.3562, + "step": 180730 + }, + { + "epoch": 7.64, + "learning_rate": 3.576925523118998e-05, + "loss": 0.4093, + "step": 180740 + }, + { + "epoch": 7.64, + "learning_rate": 3.572685450189743e-05, + "loss": 0.3859, + "step": 180750 + }, + { + "epoch": 7.64, + "learning_rate": 3.568445377260489e-05, + "loss": 0.4329, + "step": 180760 + }, + { + "epoch": 7.64, + "learning_rate": 3.5642053043312346e-05, + "loss": 0.3776, + "step": 180770 + }, + { + "epoch": 7.64, + "learning_rate": 3.5599652314019806e-05, + "loss": 0.4454, + "step": 180780 + }, + { + "epoch": 7.65, + "learning_rate": 3.555725158472726e-05, + "loss": 0.3829, + "step": 180790 + }, + { + "epoch": 7.65, + "learning_rate": 3.551485085543472e-05, + "loss": 0.3394, + "step": 180800 + }, + { + "epoch": 7.65, + "learning_rate": 3.547245012614217e-05, + "loss": 0.3746, + "step": 180810 + }, + { + "epoch": 7.65, + "learning_rate": 3.543004939684963e-05, + "loss": 0.3673, + "step": 180820 + }, + { + "epoch": 7.65, + "learning_rate": 3.5387648667557085e-05, + "loss": 0.3854, + "step": 180830 + }, + { + "epoch": 7.65, + "learning_rate": 3.534524793826454e-05, + "loss": 0.3929, + "step": 180840 + }, + { + "epoch": 7.65, + "learning_rate": 3.5302847208972e-05, + "loss": 0.4062, + "step": 180850 + }, + { + "epoch": 7.65, + "learning_rate": 3.526044647967945e-05, + "loss": 0.3663, + "step": 180860 + }, + { + "epoch": 7.65, + "learning_rate": 3.5218045750386905e-05, + "loss": 0.3839, + "step": 180870 + }, + { + "epoch": 7.65, + "learning_rate": 3.5175645021094365e-05, + "loss": 0.3678, + "step": 180880 + }, + { + "epoch": 7.65, + "learning_rate": 3.513324429180182e-05, + "loss": 0.3919, + "step": 180890 + }, + { + "epoch": 7.65, + "learning_rate": 3.509084356250928e-05, + "loss": 0.3825, + "step": 180900 + }, + { + "epoch": 7.65, + "learning_rate": 3.504844283321673e-05, + "loss": 0.3428, + "step": 180910 + }, + { + "epoch": 7.65, + "learning_rate": 3.500604210392419e-05, + "loss": 0.396, + "step": 180920 + }, + { + "epoch": 7.65, + "learning_rate": 3.4963641374631645e-05, + "loss": 0.3458, + "step": 180930 + }, + { + "epoch": 7.65, + "learning_rate": 3.4921240645339105e-05, + "loss": 0.367, + "step": 180940 + }, + { + "epoch": 7.65, + "learning_rate": 3.487883991604656e-05, + "loss": 0.3458, + "step": 180950 + }, + { + "epoch": 7.65, + "learning_rate": 3.483643918675402e-05, + "loss": 0.3811, + "step": 180960 + }, + { + "epoch": 7.65, + "learning_rate": 3.479403845746147e-05, + "loss": 0.3465, + "step": 180970 + }, + { + "epoch": 7.65, + "learning_rate": 3.475163772816893e-05, + "loss": 0.3307, + "step": 180980 + }, + { + "epoch": 7.65, + "learning_rate": 3.4709236998876385e-05, + "loss": 0.3629, + "step": 180990 + }, + { + "epoch": 7.65, + "learning_rate": 3.466683626958384e-05, + "loss": 0.3769, + "step": 181000 + }, + { + "epoch": 7.65, + "learning_rate": 3.46244355402913e-05, + "loss": 0.3571, + "step": 181010 + }, + { + "epoch": 7.66, + "learning_rate": 3.458203481099875e-05, + "loss": 0.3609, + "step": 181020 + }, + { + "epoch": 7.66, + "learning_rate": 3.4539634081706204e-05, + "loss": 0.4149, + "step": 181030 + }, + { + "epoch": 7.66, + "learning_rate": 3.4497233352413664e-05, + "loss": 0.4836, + "step": 181040 + }, + { + "epoch": 7.66, + "learning_rate": 3.445483262312112e-05, + "loss": 0.3969, + "step": 181050 + }, + { + "epoch": 7.66, + "learning_rate": 3.441243189382857e-05, + "loss": 0.2939, + "step": 181060 + }, + { + "epoch": 7.66, + "learning_rate": 3.437003116453603e-05, + "loss": 0.3055, + "step": 181070 + }, + { + "epoch": 7.66, + "learning_rate": 3.4327630435243484e-05, + "loss": 0.4025, + "step": 181080 + }, + { + "epoch": 7.66, + "learning_rate": 3.4285229705950944e-05, + "loss": 0.3978, + "step": 181090 + }, + { + "epoch": 7.66, + "learning_rate": 3.42428289766584e-05, + "loss": 0.3, + "step": 181100 + }, + { + "epoch": 7.66, + "learning_rate": 3.420042824736586e-05, + "loss": 0.3492, + "step": 181110 + }, + { + "epoch": 7.66, + "learning_rate": 3.415802751807331e-05, + "loss": 0.3746, + "step": 181120 + }, + { + "epoch": 7.66, + "learning_rate": 3.411562678878077e-05, + "loss": 0.3734, + "step": 181130 + }, + { + "epoch": 7.66, + "learning_rate": 3.4073226059488224e-05, + "loss": 0.4154, + "step": 181140 + }, + { + "epoch": 7.66, + "learning_rate": 3.4030825330195684e-05, + "loss": 0.3692, + "step": 181150 + }, + { + "epoch": 7.66, + "learning_rate": 3.398842460090314e-05, + "loss": 0.3865, + "step": 181160 + }, + { + "epoch": 7.66, + "learning_rate": 3.39460238716106e-05, + "loss": 0.3091, + "step": 181170 + }, + { + "epoch": 7.66, + "learning_rate": 3.390362314231805e-05, + "loss": 0.3165, + "step": 181180 + }, + { + "epoch": 7.66, + "learning_rate": 3.3861222413025504e-05, + "loss": 0.3354, + "step": 181190 + }, + { + "epoch": 7.66, + "learning_rate": 3.381882168373296e-05, + "loss": 0.3883, + "step": 181200 + }, + { + "epoch": 7.66, + "learning_rate": 3.377642095444042e-05, + "loss": 0.4148, + "step": 181210 + }, + { + "epoch": 7.66, + "learning_rate": 3.373402022514787e-05, + "loss": 0.4053, + "step": 181220 + }, + { + "epoch": 7.66, + "learning_rate": 3.369161949585533e-05, + "loss": 0.3744, + "step": 181230 + }, + { + "epoch": 7.66, + "learning_rate": 3.364921876656278e-05, + "loss": 0.3724, + "step": 181240 + }, + { + "epoch": 7.66, + "learning_rate": 3.360681803727024e-05, + "loss": 0.4042, + "step": 181250 + }, + { + "epoch": 7.67, + "learning_rate": 3.3564417307977696e-05, + "loss": 0.3867, + "step": 181260 + }, + { + "epoch": 7.67, + "learning_rate": 3.3522016578685157e-05, + "loss": 0.3812, + "step": 181270 + }, + { + "epoch": 7.67, + "learning_rate": 3.347961584939261e-05, + "loss": 0.3069, + "step": 181280 + }, + { + "epoch": 7.67, + "learning_rate": 3.343721512010007e-05, + "loss": 0.3752, + "step": 181290 + }, + { + "epoch": 7.67, + "learning_rate": 3.339481439080752e-05, + "loss": 0.3266, + "step": 181300 + }, + { + "epoch": 7.67, + "learning_rate": 3.335241366151498e-05, + "loss": 0.2872, + "step": 181310 + }, + { + "epoch": 7.67, + "learning_rate": 3.3310012932222436e-05, + "loss": 0.3462, + "step": 181320 + }, + { + "epoch": 7.67, + "learning_rate": 3.3267612202929896e-05, + "loss": 0.3487, + "step": 181330 + }, + { + "epoch": 7.67, + "learning_rate": 3.322521147363735e-05, + "loss": 0.3298, + "step": 181340 + }, + { + "epoch": 7.67, + "learning_rate": 3.31828107443448e-05, + "loss": 0.346, + "step": 181350 + }, + { + "epoch": 7.67, + "learning_rate": 3.3140410015052256e-05, + "loss": 0.4122, + "step": 181360 + }, + { + "epoch": 7.67, + "learning_rate": 3.3098009285759716e-05, + "loss": 0.3593, + "step": 181370 + }, + { + "epoch": 7.67, + "learning_rate": 3.305560855646717e-05, + "loss": 0.3296, + "step": 181380 + }, + { + "epoch": 7.67, + "learning_rate": 3.301320782717463e-05, + "loss": 0.395, + "step": 181390 + }, + { + "epoch": 7.67, + "learning_rate": 3.297080709788208e-05, + "loss": 0.4058, + "step": 181400 + }, + { + "epoch": 7.67, + "learning_rate": 3.292840636858954e-05, + "loss": 0.4078, + "step": 181410 + }, + { + "epoch": 7.67, + "learning_rate": 3.2886005639296996e-05, + "loss": 0.4161, + "step": 181420 + }, + { + "epoch": 7.67, + "learning_rate": 3.2843604910004456e-05, + "loss": 0.3714, + "step": 181430 + }, + { + "epoch": 7.67, + "learning_rate": 3.280120418071191e-05, + "loss": 0.3779, + "step": 181440 + }, + { + "epoch": 7.67, + "learning_rate": 3.275880345141937e-05, + "loss": 0.3457, + "step": 181450 + }, + { + "epoch": 7.67, + "learning_rate": 3.271640272212682e-05, + "loss": 0.3727, + "step": 181460 + }, + { + "epoch": 7.67, + "learning_rate": 3.267400199283428e-05, + "loss": 0.311, + "step": 181470 + }, + { + "epoch": 7.67, + "learning_rate": 3.2631601263541735e-05, + "loss": 0.3624, + "step": 181480 + }, + { + "epoch": 7.67, + "learning_rate": 3.2589200534249195e-05, + "loss": 0.3822, + "step": 181490 + }, + { + "epoch": 7.68, + "learning_rate": 3.254679980495665e-05, + "loss": 0.3704, + "step": 181500 + }, + { + "epoch": 7.68, + "learning_rate": 3.25043990756641e-05, + "loss": 0.3609, + "step": 181510 + }, + { + "epoch": 7.68, + "learning_rate": 3.2461998346371555e-05, + "loss": 0.4514, + "step": 181520 + }, + { + "epoch": 7.68, + "learning_rate": 3.2419597617079015e-05, + "loss": 0.3456, + "step": 181530 + }, + { + "epoch": 7.68, + "learning_rate": 3.237719688778647e-05, + "loss": 0.3665, + "step": 181540 + }, + { + "epoch": 7.68, + "learning_rate": 3.233479615849393e-05, + "loss": 0.4226, + "step": 181550 + }, + { + "epoch": 7.68, + "learning_rate": 3.229239542920138e-05, + "loss": 0.3868, + "step": 181560 + }, + { + "epoch": 7.68, + "learning_rate": 3.224999469990884e-05, + "loss": 0.4243, + "step": 181570 + }, + { + "epoch": 7.68, + "learning_rate": 3.2207593970616295e-05, + "loss": 0.3688, + "step": 181580 + }, + { + "epoch": 7.68, + "learning_rate": 3.2165193241323755e-05, + "loss": 0.4084, + "step": 181590 + }, + { + "epoch": 7.68, + "learning_rate": 3.212279251203121e-05, + "loss": 0.4025, + "step": 181600 + }, + { + "epoch": 7.68, + "learning_rate": 3.208039178273867e-05, + "loss": 0.2797, + "step": 181610 + }, + { + "epoch": 7.68, + "learning_rate": 3.203799105344612e-05, + "loss": 0.3384, + "step": 181620 + }, + { + "epoch": 7.68, + "learning_rate": 3.199559032415358e-05, + "loss": 0.3842, + "step": 181630 + }, + { + "epoch": 7.68, + "learning_rate": 3.1953189594861035e-05, + "loss": 0.3377, + "step": 181640 + }, + { + "epoch": 7.68, + "learning_rate": 3.1910788865568495e-05, + "loss": 0.4121, + "step": 181650 + }, + { + "epoch": 7.68, + "learning_rate": 3.186838813627595e-05, + "loss": 0.4085, + "step": 181660 + }, + { + "epoch": 7.68, + "learning_rate": 3.18259874069834e-05, + "loss": 0.3427, + "step": 181670 + }, + { + "epoch": 7.68, + "learning_rate": 3.1783586677690854e-05, + "loss": 0.3017, + "step": 181680 + }, + { + "epoch": 7.68, + "learning_rate": 3.1741185948398314e-05, + "loss": 0.307, + "step": 181690 + }, + { + "epoch": 7.68, + "learning_rate": 3.169878521910577e-05, + "loss": 0.4326, + "step": 181700 + }, + { + "epoch": 7.68, + "learning_rate": 3.165638448981323e-05, + "loss": 0.3708, + "step": 181710 + }, + { + "epoch": 7.68, + "learning_rate": 3.161398376052068e-05, + "loss": 0.4085, + "step": 181720 + }, + { + "epoch": 7.69, + "learning_rate": 3.157158303122814e-05, + "loss": 0.3326, + "step": 181730 + }, + { + "epoch": 7.69, + "learning_rate": 3.1529182301935594e-05, + "loss": 0.304, + "step": 181740 + }, + { + "epoch": 7.69, + "learning_rate": 3.1486781572643054e-05, + "loss": 0.3431, + "step": 181750 + }, + { + "epoch": 7.69, + "learning_rate": 3.144438084335051e-05, + "loss": 0.3844, + "step": 181760 + }, + { + "epoch": 7.69, + "learning_rate": 3.140198011405796e-05, + "loss": 0.3735, + "step": 181770 + }, + { + "epoch": 7.69, + "learning_rate": 3.135957938476542e-05, + "loss": 0.3927, + "step": 181780 + }, + { + "epoch": 7.69, + "learning_rate": 3.1317178655472874e-05, + "loss": 0.3849, + "step": 181790 + }, + { + "epoch": 7.69, + "learning_rate": 3.1274777926180334e-05, + "loss": 0.4756, + "step": 181800 + }, + { + "epoch": 7.69, + "learning_rate": 3.123237719688779e-05, + "loss": 0.3566, + "step": 181810 + }, + { + "epoch": 7.69, + "learning_rate": 3.118997646759525e-05, + "loss": 0.3783, + "step": 181820 + }, + { + "epoch": 7.69, + "learning_rate": 3.11475757383027e-05, + "loss": 0.3641, + "step": 181830 + }, + { + "epoch": 7.69, + "learning_rate": 3.1105175009010154e-05, + "loss": 0.4167, + "step": 181840 + }, + { + "epoch": 7.69, + "learning_rate": 3.106277427971761e-05, + "loss": 0.4446, + "step": 181850 + }, + { + "epoch": 7.69, + "learning_rate": 3.102037355042507e-05, + "loss": 0.3383, + "step": 181860 + }, + { + "epoch": 7.69, + "learning_rate": 3.097797282113252e-05, + "loss": 0.4025, + "step": 181870 + }, + { + "epoch": 7.69, + "learning_rate": 3.093557209183998e-05, + "loss": 0.3333, + "step": 181880 + }, + { + "epoch": 7.69, + "learning_rate": 3.089317136254743e-05, + "loss": 0.3335, + "step": 181890 + }, + { + "epoch": 7.69, + "learning_rate": 3.085077063325489e-05, + "loss": 0.327, + "step": 181900 + }, + { + "epoch": 7.69, + "learning_rate": 3.0808369903962347e-05, + "loss": 0.3798, + "step": 181910 + }, + { + "epoch": 7.69, + "learning_rate": 3.076596917466981e-05, + "loss": 0.3591, + "step": 181920 + }, + { + "epoch": 7.69, + "learning_rate": 3.072356844537726e-05, + "loss": 0.3822, + "step": 181930 + }, + { + "epoch": 7.69, + "learning_rate": 3.068116771608472e-05, + "loss": 0.3592, + "step": 181940 + }, + { + "epoch": 7.69, + "learning_rate": 3.063876698679217e-05, + "loss": 0.3698, + "step": 181950 + }, + { + "epoch": 7.69, + "learning_rate": 3.059636625749963e-05, + "loss": 0.4058, + "step": 181960 + }, + { + "epoch": 7.7, + "learning_rate": 3.0553965528207086e-05, + "loss": 0.3501, + "step": 181970 + }, + { + "epoch": 7.7, + "learning_rate": 3.0511564798914543e-05, + "loss": 0.3263, + "step": 181980 + }, + { + "epoch": 7.7, + "learning_rate": 3.0469164069621996e-05, + "loss": 0.3873, + "step": 181990 + }, + { + "epoch": 7.7, + "learning_rate": 3.0426763340329456e-05, + "loss": 0.4083, + "step": 182000 + }, + { + "epoch": 7.7, + "learning_rate": 3.038436261103691e-05, + "loss": 0.3042, + "step": 182010 + }, + { + "epoch": 7.7, + "learning_rate": 3.034196188174437e-05, + "loss": 0.3949, + "step": 182020 + }, + { + "epoch": 7.7, + "learning_rate": 3.0299561152451823e-05, + "loss": 0.3562, + "step": 182030 + }, + { + "epoch": 7.7, + "learning_rate": 3.025716042315928e-05, + "loss": 0.3782, + "step": 182040 + }, + { + "epoch": 7.7, + "learning_rate": 3.0214759693866736e-05, + "loss": 0.4045, + "step": 182050 + }, + { + "epoch": 7.7, + "learning_rate": 3.0172358964574193e-05, + "loss": 0.3718, + "step": 182060 + }, + { + "epoch": 7.7, + "learning_rate": 3.0129958235281646e-05, + "loss": 0.3426, + "step": 182070 + }, + { + "epoch": 7.7, + "learning_rate": 3.0087557505989106e-05, + "loss": 0.3729, + "step": 182080 + }, + { + "epoch": 7.7, + "learning_rate": 3.004515677669656e-05, + "loss": 0.3507, + "step": 182090 + }, + { + "epoch": 7.7, + "learning_rate": 3.000275604740402e-05, + "loss": 0.3365, + "step": 182100 + }, + { + "epoch": 7.7, + "learning_rate": 2.9960355318111472e-05, + "loss": 0.3382, + "step": 182110 + }, + { + "epoch": 7.7, + "learning_rate": 2.991795458881893e-05, + "loss": 0.3762, + "step": 182120 + }, + { + "epoch": 7.7, + "learning_rate": 2.9875553859526386e-05, + "loss": 0.364, + "step": 182130 + }, + { + "epoch": 7.7, + "learning_rate": 2.9833153130233842e-05, + "loss": 0.4364, + "step": 182140 + }, + { + "epoch": 7.7, + "learning_rate": 2.9790752400941295e-05, + "loss": 0.424, + "step": 182150 + }, + { + "epoch": 7.7, + "learning_rate": 2.9748351671648755e-05, + "loss": 0.4052, + "step": 182160 + }, + { + "epoch": 7.7, + "learning_rate": 2.970595094235621e-05, + "loss": 0.4235, + "step": 182170 + }, + { + "epoch": 7.7, + "learning_rate": 2.966355021306367e-05, + "loss": 0.3885, + "step": 182180 + }, + { + "epoch": 7.7, + "learning_rate": 2.9621149483771122e-05, + "loss": 0.3433, + "step": 182190 + }, + { + "epoch": 7.7, + "learning_rate": 2.957874875447858e-05, + "loss": 0.3567, + "step": 182200 + }, + { + "epoch": 7.71, + "learning_rate": 2.9536348025186035e-05, + "loss": 0.3874, + "step": 182210 + }, + { + "epoch": 7.71, + "learning_rate": 2.9493947295893492e-05, + "loss": 0.3986, + "step": 182220 + }, + { + "epoch": 7.71, + "learning_rate": 2.9451546566600945e-05, + "loss": 0.3989, + "step": 182230 + }, + { + "epoch": 7.71, + "learning_rate": 2.9409145837308405e-05, + "loss": 0.3751, + "step": 182240 + }, + { + "epoch": 7.71, + "learning_rate": 2.9366745108015858e-05, + "loss": 0.3791, + "step": 182250 + }, + { + "epoch": 7.71, + "learning_rate": 2.9324344378723318e-05, + "loss": 0.3845, + "step": 182260 + }, + { + "epoch": 7.71, + "learning_rate": 2.928194364943077e-05, + "loss": 0.4138, + "step": 182270 + }, + { + "epoch": 7.71, + "learning_rate": 2.9239542920138228e-05, + "loss": 0.4169, + "step": 182280 + }, + { + "epoch": 7.71, + "learning_rate": 2.9197142190845685e-05, + "loss": 0.3585, + "step": 182290 + }, + { + "epoch": 7.71, + "learning_rate": 2.915474146155314e-05, + "loss": 0.3439, + "step": 182300 + }, + { + "epoch": 7.71, + "learning_rate": 2.9112340732260595e-05, + "loss": 0.385, + "step": 182310 + }, + { + "epoch": 7.71, + "learning_rate": 2.9069940002968055e-05, + "loss": 0.4092, + "step": 182320 + }, + { + "epoch": 7.71, + "learning_rate": 2.9027539273675508e-05, + "loss": 0.328, + "step": 182330 + }, + { + "epoch": 7.71, + "learning_rate": 2.8985138544382968e-05, + "loss": 0.3645, + "step": 182340 + }, + { + "epoch": 7.71, + "learning_rate": 2.894273781509042e-05, + "loss": 0.3626, + "step": 182350 + }, + { + "epoch": 7.71, + "learning_rate": 2.890033708579788e-05, + "loss": 0.4505, + "step": 182360 + }, + { + "epoch": 7.71, + "learning_rate": 2.8857936356505334e-05, + "loss": 0.3499, + "step": 182370 + }, + { + "epoch": 7.71, + "learning_rate": 2.881553562721279e-05, + "loss": 0.4688, + "step": 182380 + }, + { + "epoch": 7.71, + "learning_rate": 2.8773134897920244e-05, + "loss": 0.3437, + "step": 182390 + }, + { + "epoch": 7.71, + "learning_rate": 2.8730734168627704e-05, + "loss": 0.4439, + "step": 182400 + }, + { + "epoch": 7.71, + "learning_rate": 2.8688333439335157e-05, + "loss": 0.3735, + "step": 182410 + }, + { + "epoch": 7.71, + "learning_rate": 2.8645932710042618e-05, + "loss": 0.3934, + "step": 182420 + }, + { + "epoch": 7.71, + "learning_rate": 2.860353198075007e-05, + "loss": 0.364, + "step": 182430 + }, + { + "epoch": 7.72, + "learning_rate": 2.856113125145753e-05, + "loss": 0.2971, + "step": 182440 + }, + { + "epoch": 7.72, + "learning_rate": 2.8518730522164984e-05, + "loss": 0.4174, + "step": 182450 + }, + { + "epoch": 7.72, + "learning_rate": 2.847632979287244e-05, + "loss": 0.3494, + "step": 182460 + }, + { + "epoch": 7.72, + "learning_rate": 2.8433929063579894e-05, + "loss": 0.336, + "step": 182470 + }, + { + "epoch": 7.72, + "learning_rate": 2.8391528334287347e-05, + "loss": 0.3409, + "step": 182480 + }, + { + "epoch": 7.72, + "learning_rate": 2.8349127604994807e-05, + "loss": 0.3421, + "step": 182490 + }, + { + "epoch": 7.72, + "learning_rate": 2.830672687570226e-05, + "loss": 0.3638, + "step": 182500 + }, + { + "epoch": 7.72, + "learning_rate": 2.826432614640972e-05, + "loss": 0.4128, + "step": 182510 + }, + { + "epoch": 7.72, + "learning_rate": 2.8221925417117174e-05, + "loss": 0.2854, + "step": 182520 + }, + { + "epoch": 7.72, + "learning_rate": 2.8179524687824634e-05, + "loss": 0.3565, + "step": 182530 + }, + { + "epoch": 7.72, + "learning_rate": 2.8137123958532087e-05, + "loss": 0.3762, + "step": 182540 + }, + { + "epoch": 7.72, + "learning_rate": 2.8094723229239543e-05, + "loss": 0.3557, + "step": 182550 + }, + { + "epoch": 7.72, + "learning_rate": 2.8052322499946997e-05, + "loss": 0.3531, + "step": 182560 + }, + { + "epoch": 7.72, + "learning_rate": 2.8009921770654457e-05, + "loss": 0.3449, + "step": 182570 + }, + { + "epoch": 7.72, + "learning_rate": 2.796752104136191e-05, + "loss": 0.4061, + "step": 182580 + }, + { + "epoch": 7.72, + "learning_rate": 2.792512031206937e-05, + "loss": 0.4665, + "step": 182590 + }, + { + "epoch": 7.72, + "learning_rate": 2.7882719582776823e-05, + "loss": 0.3377, + "step": 182600 + }, + { + "epoch": 7.72, + "learning_rate": 2.7840318853484283e-05, + "loss": 0.4084, + "step": 182610 + }, + { + "epoch": 7.72, + "learning_rate": 2.7797918124191736e-05, + "loss": 0.4587, + "step": 182620 + }, + { + "epoch": 7.72, + "learning_rate": 2.7755517394899193e-05, + "loss": 0.3841, + "step": 182630 + }, + { + "epoch": 7.72, + "learning_rate": 2.7713116665606646e-05, + "loss": 0.2746, + "step": 182640 + }, + { + "epoch": 7.72, + "learning_rate": 2.7670715936314106e-05, + "loss": 0.3787, + "step": 182650 + }, + { + "epoch": 7.72, + "learning_rate": 2.762831520702156e-05, + "loss": 0.3524, + "step": 182660 + }, + { + "epoch": 7.72, + "learning_rate": 2.758591447772902e-05, + "loss": 0.3611, + "step": 182670 + }, + { + "epoch": 7.73, + "learning_rate": 2.7543513748436473e-05, + "loss": 0.3665, + "step": 182680 + }, + { + "epoch": 7.73, + "learning_rate": 2.7501113019143933e-05, + "loss": 0.3524, + "step": 182690 + }, + { + "epoch": 7.73, + "learning_rate": 2.7458712289851386e-05, + "loss": 0.3811, + "step": 182700 + }, + { + "epoch": 7.73, + "learning_rate": 2.7416311560558843e-05, + "loss": 0.4125, + "step": 182710 + }, + { + "epoch": 7.73, + "learning_rate": 2.7373910831266296e-05, + "loss": 0.4118, + "step": 182720 + }, + { + "epoch": 7.73, + "learning_rate": 2.7331510101973756e-05, + "loss": 0.3933, + "step": 182730 + }, + { + "epoch": 7.73, + "learning_rate": 2.728910937268121e-05, + "loss": 0.3537, + "step": 182740 + }, + { + "epoch": 7.73, + "learning_rate": 2.724670864338867e-05, + "loss": 0.3877, + "step": 182750 + }, + { + "epoch": 7.73, + "learning_rate": 2.7204307914096122e-05, + "loss": 0.4283, + "step": 182760 + }, + { + "epoch": 7.73, + "learning_rate": 2.7161907184803582e-05, + "loss": 0.339, + "step": 182770 + }, + { + "epoch": 7.73, + "learning_rate": 2.7119506455511036e-05, + "loss": 0.413, + "step": 182780 + }, + { + "epoch": 7.73, + "learning_rate": 2.7077105726218492e-05, + "loss": 0.3788, + "step": 182790 + }, + { + "epoch": 7.73, + "learning_rate": 2.7034704996925946e-05, + "loss": 0.3625, + "step": 182800 + }, + { + "epoch": 7.73, + "learning_rate": 2.6992304267633406e-05, + "loss": 0.3874, + "step": 182810 + }, + { + "epoch": 7.73, + "learning_rate": 2.694990353834086e-05, + "loss": 0.3317, + "step": 182820 + }, + { + "epoch": 7.73, + "learning_rate": 2.690750280904832e-05, + "loss": 0.3454, + "step": 182830 + }, + { + "epoch": 7.73, + "learning_rate": 2.6865102079755772e-05, + "loss": 0.3848, + "step": 182840 + }, + { + "epoch": 7.73, + "learning_rate": 2.6822701350463232e-05, + "loss": 0.3201, + "step": 182850 + }, + { + "epoch": 7.73, + "learning_rate": 2.6780300621170685e-05, + "loss": 0.4144, + "step": 182860 + }, + { + "epoch": 7.73, + "learning_rate": 2.6737899891878142e-05, + "loss": 0.3589, + "step": 182870 + }, + { + "epoch": 7.73, + "learning_rate": 2.6695499162585595e-05, + "loss": 0.3665, + "step": 182880 + }, + { + "epoch": 7.73, + "learning_rate": 2.6653098433293055e-05, + "loss": 0.4048, + "step": 182890 + }, + { + "epoch": 7.73, + "learning_rate": 2.661069770400051e-05, + "loss": 0.443, + "step": 182900 + }, + { + "epoch": 7.74, + "learning_rate": 2.656829697470797e-05, + "loss": 0.3874, + "step": 182910 + }, + { + "epoch": 7.74, + "learning_rate": 2.652589624541542e-05, + "loss": 0.4529, + "step": 182920 + }, + { + "epoch": 7.74, + "learning_rate": 2.648349551612288e-05, + "loss": 0.3819, + "step": 182930 + }, + { + "epoch": 7.74, + "learning_rate": 2.6441094786830335e-05, + "loss": 0.3623, + "step": 182940 + }, + { + "epoch": 7.74, + "learning_rate": 2.639869405753779e-05, + "loss": 0.3992, + "step": 182950 + }, + { + "epoch": 7.74, + "learning_rate": 2.6356293328245245e-05, + "loss": 0.404, + "step": 182960 + }, + { + "epoch": 7.74, + "learning_rate": 2.6313892598952705e-05, + "loss": 0.3934, + "step": 182970 + }, + { + "epoch": 7.74, + "learning_rate": 2.6271491869660158e-05, + "loss": 0.3584, + "step": 182980 + }, + { + "epoch": 7.74, + "learning_rate": 2.6229091140367618e-05, + "loss": 0.3628, + "step": 182990 + }, + { + "epoch": 7.74, + "learning_rate": 2.618669041107507e-05, + "loss": 0.4097, + "step": 183000 + }, + { + "epoch": 7.74, + "learning_rate": 2.614428968178253e-05, + "loss": 0.3376, + "step": 183010 + }, + { + "epoch": 7.74, + "learning_rate": 2.6101888952489984e-05, + "loss": 0.3635, + "step": 183020 + }, + { + "epoch": 7.74, + "learning_rate": 2.605948822319744e-05, + "loss": 0.3744, + "step": 183030 + }, + { + "epoch": 7.74, + "learning_rate": 2.6017087493904894e-05, + "loss": 0.386, + "step": 183040 + }, + { + "epoch": 7.74, + "learning_rate": 2.5974686764612354e-05, + "loss": 0.4421, + "step": 183050 + }, + { + "epoch": 7.74, + "learning_rate": 2.5932286035319808e-05, + "loss": 0.3538, + "step": 183060 + }, + { + "epoch": 7.74, + "learning_rate": 2.5889885306027268e-05, + "loss": 0.3459, + "step": 183070 + }, + { + "epoch": 7.74, + "learning_rate": 2.584748457673472e-05, + "loss": 0.3453, + "step": 183080 + }, + { + "epoch": 7.74, + "learning_rate": 2.580508384744218e-05, + "loss": 0.3149, + "step": 183090 + }, + { + "epoch": 7.74, + "learning_rate": 2.5762683118149634e-05, + "loss": 0.3306, + "step": 183100 + }, + { + "epoch": 7.74, + "learning_rate": 2.572028238885709e-05, + "loss": 0.3347, + "step": 183110 + }, + { + "epoch": 7.74, + "learning_rate": 2.5677881659564544e-05, + "loss": 0.3902, + "step": 183120 + }, + { + "epoch": 7.74, + "learning_rate": 2.5635480930272004e-05, + "loss": 0.3292, + "step": 183130 + }, + { + "epoch": 7.74, + "learning_rate": 2.5593080200979457e-05, + "loss": 0.3657, + "step": 183140 + }, + { + "epoch": 7.75, + "learning_rate": 2.5550679471686917e-05, + "loss": 0.3692, + "step": 183150 + }, + { + "epoch": 7.75, + "learning_rate": 2.550827874239437e-05, + "loss": 0.3675, + "step": 183160 + }, + { + "epoch": 7.75, + "learning_rate": 2.546587801310183e-05, + "loss": 0.285, + "step": 183170 + }, + { + "epoch": 7.75, + "learning_rate": 2.5423477283809284e-05, + "loss": 0.3751, + "step": 183180 + }, + { + "epoch": 7.75, + "learning_rate": 2.5381076554516737e-05, + "loss": 0.3342, + "step": 183190 + }, + { + "epoch": 7.75, + "learning_rate": 2.5338675825224194e-05, + "loss": 0.4006, + "step": 183200 + }, + { + "epoch": 7.75, + "learning_rate": 2.529627509593165e-05, + "loss": 0.3813, + "step": 183210 + }, + { + "epoch": 7.75, + "learning_rate": 2.5253874366639107e-05, + "loss": 0.3504, + "step": 183220 + }, + { + "epoch": 7.75, + "learning_rate": 2.521147363734656e-05, + "loss": 0.3442, + "step": 183230 + }, + { + "epoch": 7.75, + "learning_rate": 2.516907290805402e-05, + "loss": 0.3516, + "step": 183240 + }, + { + "epoch": 7.75, + "learning_rate": 2.5126672178761473e-05, + "loss": 0.4368, + "step": 183250 + }, + { + "epoch": 7.75, + "learning_rate": 2.5084271449468933e-05, + "loss": 0.3109, + "step": 183260 + }, + { + "epoch": 7.75, + "learning_rate": 2.5041870720176387e-05, + "loss": 0.3892, + "step": 183270 + }, + { + "epoch": 7.75, + "learning_rate": 2.4999469990883843e-05, + "loss": 0.3049, + "step": 183280 + }, + { + "epoch": 7.75, + "learning_rate": 2.49570692615913e-05, + "loss": 0.3884, + "step": 183290 + }, + { + "epoch": 7.75, + "learning_rate": 2.4914668532298756e-05, + "loss": 0.3338, + "step": 183300 + }, + { + "epoch": 7.75, + "learning_rate": 2.4872267803006213e-05, + "loss": 0.3555, + "step": 183310 + }, + { + "epoch": 7.75, + "learning_rate": 2.482986707371367e-05, + "loss": 0.3306, + "step": 183320 + }, + { + "epoch": 7.75, + "learning_rate": 2.4787466344421126e-05, + "loss": 0.4244, + "step": 183330 + }, + { + "epoch": 7.75, + "learning_rate": 2.4745065615128583e-05, + "loss": 0.3564, + "step": 183340 + }, + { + "epoch": 7.75, + "learning_rate": 2.470266488583604e-05, + "loss": 0.3806, + "step": 183350 + }, + { + "epoch": 7.75, + "learning_rate": 2.4660264156543493e-05, + "loss": 0.3458, + "step": 183360 + }, + { + "epoch": 7.75, + "learning_rate": 2.461786342725095e-05, + "loss": 0.3564, + "step": 183370 + }, + { + "epoch": 7.75, + "learning_rate": 2.4575462697958406e-05, + "loss": 0.432, + "step": 183380 + }, + { + "epoch": 7.76, + "learning_rate": 2.4533061968665863e-05, + "loss": 0.3419, + "step": 183390 + }, + { + "epoch": 7.76, + "learning_rate": 2.449066123937332e-05, + "loss": 0.2882, + "step": 183400 + }, + { + "epoch": 7.76, + "learning_rate": 2.4448260510080776e-05, + "loss": 0.3424, + "step": 183410 + }, + { + "epoch": 7.76, + "learning_rate": 2.4405859780788233e-05, + "loss": 0.4403, + "step": 183420 + }, + { + "epoch": 7.76, + "learning_rate": 2.436345905149569e-05, + "loss": 0.3997, + "step": 183430 + }, + { + "epoch": 7.76, + "learning_rate": 2.4321058322203142e-05, + "loss": 0.3191, + "step": 183440 + }, + { + "epoch": 7.76, + "learning_rate": 2.42786575929106e-05, + "loss": 0.3822, + "step": 183450 + }, + { + "epoch": 7.76, + "learning_rate": 2.4236256863618056e-05, + "loss": 0.3781, + "step": 183460 + }, + { + "epoch": 7.76, + "learning_rate": 2.4193856134325512e-05, + "loss": 0.4212, + "step": 183470 + }, + { + "epoch": 7.76, + "learning_rate": 2.415145540503297e-05, + "loss": 0.3141, + "step": 183480 + }, + { + "epoch": 7.76, + "learning_rate": 2.4109054675740426e-05, + "loss": 0.3492, + "step": 183490 + }, + { + "epoch": 7.76, + "learning_rate": 2.4066653946447882e-05, + "loss": 0.4218, + "step": 183500 + }, + { + "epoch": 7.76, + "learning_rate": 2.402425321715534e-05, + "loss": 0.4447, + "step": 183510 + }, + { + "epoch": 7.76, + "learning_rate": 2.3981852487862795e-05, + "loss": 0.3339, + "step": 183520 + }, + { + "epoch": 7.76, + "learning_rate": 2.393945175857025e-05, + "loss": 0.3729, + "step": 183530 + }, + { + "epoch": 7.76, + "learning_rate": 2.3897051029277702e-05, + "loss": 0.4137, + "step": 183540 + }, + { + "epoch": 7.76, + "learning_rate": 2.385465029998516e-05, + "loss": 0.4112, + "step": 183550 + }, + { + "epoch": 7.76, + "learning_rate": 2.3812249570692615e-05, + "loss": 0.4026, + "step": 183560 + }, + { + "epoch": 7.76, + "learning_rate": 2.3769848841400072e-05, + "loss": 0.4015, + "step": 183570 + }, + { + "epoch": 7.76, + "learning_rate": 2.372744811210753e-05, + "loss": 0.3402, + "step": 183580 + }, + { + "epoch": 7.76, + "learning_rate": 2.3685047382814985e-05, + "loss": 0.3767, + "step": 183590 + }, + { + "epoch": 7.76, + "learning_rate": 2.364264665352244e-05, + "loss": 0.3169, + "step": 183600 + }, + { + "epoch": 7.76, + "learning_rate": 2.3600245924229898e-05, + "loss": 0.4019, + "step": 183610 + }, + { + "epoch": 7.77, + "learning_rate": 2.355784519493735e-05, + "loss": 0.3686, + "step": 183620 + }, + { + "epoch": 7.77, + "learning_rate": 2.3515444465644808e-05, + "loss": 0.3945, + "step": 183630 + }, + { + "epoch": 7.77, + "learning_rate": 2.3473043736352265e-05, + "loss": 0.3539, + "step": 183640 + }, + { + "epoch": 7.77, + "learning_rate": 2.343064300705972e-05, + "loss": 0.4016, + "step": 183650 + }, + { + "epoch": 7.77, + "learning_rate": 2.3388242277767178e-05, + "loss": 0.3874, + "step": 183660 + }, + { + "epoch": 7.77, + "learning_rate": 2.3345841548474635e-05, + "loss": 0.4051, + "step": 183670 + }, + { + "epoch": 7.77, + "learning_rate": 2.330344081918209e-05, + "loss": 0.3978, + "step": 183680 + }, + { + "epoch": 7.77, + "learning_rate": 2.3261040089889548e-05, + "loss": 0.3903, + "step": 183690 + }, + { + "epoch": 7.77, + "learning_rate": 2.3218639360597e-05, + "loss": 0.3897, + "step": 183700 + }, + { + "epoch": 7.77, + "learning_rate": 2.3176238631304458e-05, + "loss": 0.3907, + "step": 183710 + }, + { + "epoch": 7.77, + "learning_rate": 2.3133837902011914e-05, + "loss": 0.3428, + "step": 183720 + }, + { + "epoch": 7.77, + "learning_rate": 2.309143717271937e-05, + "loss": 0.3443, + "step": 183730 + }, + { + "epoch": 7.77, + "learning_rate": 2.3049036443426828e-05, + "loss": 0.3903, + "step": 183740 + }, + { + "epoch": 7.77, + "learning_rate": 2.3006635714134284e-05, + "loss": 0.3485, + "step": 183750 + }, + { + "epoch": 7.77, + "learning_rate": 2.296423498484174e-05, + "loss": 0.3953, + "step": 183760 + }, + { + "epoch": 7.77, + "learning_rate": 2.2921834255549197e-05, + "loss": 0.3635, + "step": 183770 + }, + { + "epoch": 7.77, + "learning_rate": 2.287943352625665e-05, + "loss": 0.3239, + "step": 183780 + }, + { + "epoch": 7.77, + "learning_rate": 2.2837032796964107e-05, + "loss": 0.3929, + "step": 183790 + }, + { + "epoch": 7.77, + "learning_rate": 2.2794632067671564e-05, + "loss": 0.3812, + "step": 183800 + }, + { + "epoch": 7.77, + "learning_rate": 2.275223133837902e-05, + "loss": 0.3755, + "step": 183810 + }, + { + "epoch": 7.77, + "learning_rate": 2.2709830609086477e-05, + "loss": 0.3101, + "step": 183820 + }, + { + "epoch": 7.77, + "learning_rate": 2.2667429879793934e-05, + "loss": 0.4312, + "step": 183830 + }, + { + "epoch": 7.77, + "learning_rate": 2.262502915050139e-05, + "loss": 0.4142, + "step": 183840 + }, + { + "epoch": 7.77, + "learning_rate": 2.2582628421208847e-05, + "loss": 0.3778, + "step": 183850 + }, + { + "epoch": 7.78, + "learning_rate": 2.25402276919163e-05, + "loss": 0.3544, + "step": 183860 + }, + { + "epoch": 7.78, + "learning_rate": 2.2497826962623757e-05, + "loss": 0.3691, + "step": 183870 + }, + { + "epoch": 7.78, + "learning_rate": 2.2455426233331214e-05, + "loss": 0.464, + "step": 183880 + }, + { + "epoch": 7.78, + "learning_rate": 2.241302550403867e-05, + "loss": 0.4208, + "step": 183890 + }, + { + "epoch": 7.78, + "learning_rate": 2.2370624774746127e-05, + "loss": 0.3721, + "step": 183900 + }, + { + "epoch": 7.78, + "learning_rate": 2.2328224045453583e-05, + "loss": 0.3458, + "step": 183910 + }, + { + "epoch": 7.78, + "learning_rate": 2.228582331616104e-05, + "loss": 0.376, + "step": 183920 + }, + { + "epoch": 7.78, + "learning_rate": 2.2243422586868497e-05, + "loss": 0.3222, + "step": 183930 + }, + { + "epoch": 7.78, + "learning_rate": 2.220102185757595e-05, + "loss": 0.3822, + "step": 183940 + }, + { + "epoch": 7.78, + "learning_rate": 2.2158621128283407e-05, + "loss": 0.3683, + "step": 183950 + }, + { + "epoch": 7.78, + "learning_rate": 2.2116220398990863e-05, + "loss": 0.2676, + "step": 183960 + }, + { + "epoch": 7.78, + "learning_rate": 2.207381966969832e-05, + "loss": 0.369, + "step": 183970 + }, + { + "epoch": 7.78, + "learning_rate": 2.2031418940405776e-05, + "loss": 0.3758, + "step": 183980 + }, + { + "epoch": 7.78, + "learning_rate": 2.1989018211113233e-05, + "loss": 0.3598, + "step": 183990 + }, + { + "epoch": 7.78, + "learning_rate": 2.194661748182069e-05, + "loss": 0.3682, + "step": 184000 + }, + { + "epoch": 7.78, + "learning_rate": 2.1904216752528146e-05, + "loss": 0.3553, + "step": 184010 + }, + { + "epoch": 7.78, + "learning_rate": 2.18618160232356e-05, + "loss": 0.3728, + "step": 184020 + }, + { + "epoch": 7.78, + "learning_rate": 2.1819415293943056e-05, + "loss": 0.3714, + "step": 184030 + }, + { + "epoch": 7.78, + "learning_rate": 2.1777014564650513e-05, + "loss": 0.3882, + "step": 184040 + }, + { + "epoch": 7.78, + "learning_rate": 2.173461383535797e-05, + "loss": 0.294, + "step": 184050 + }, + { + "epoch": 7.78, + "learning_rate": 2.1692213106065426e-05, + "loss": 0.404, + "step": 184060 + }, + { + "epoch": 7.78, + "learning_rate": 2.1649812376772883e-05, + "loss": 0.3445, + "step": 184070 + }, + { + "epoch": 7.78, + "learning_rate": 2.160741164748034e-05, + "loss": 0.3456, + "step": 184080 + }, + { + "epoch": 7.78, + "learning_rate": 2.1565010918187796e-05, + "loss": 0.4402, + "step": 184090 + }, + { + "epoch": 7.79, + "learning_rate": 2.1522610188895253e-05, + "loss": 0.3456, + "step": 184100 + }, + { + "epoch": 7.79, + "learning_rate": 2.1480209459602706e-05, + "loss": 0.3621, + "step": 184110 + }, + { + "epoch": 7.79, + "learning_rate": 2.1437808730310162e-05, + "loss": 0.3822, + "step": 184120 + }, + { + "epoch": 7.79, + "learning_rate": 2.139540800101762e-05, + "loss": 0.4201, + "step": 184130 + }, + { + "epoch": 7.79, + "learning_rate": 2.1353007271725076e-05, + "loss": 0.4038, + "step": 184140 + }, + { + "epoch": 7.79, + "learning_rate": 2.1310606542432532e-05, + "loss": 0.326, + "step": 184150 + }, + { + "epoch": 7.79, + "learning_rate": 2.126820581313999e-05, + "loss": 0.357, + "step": 184160 + }, + { + "epoch": 7.79, + "learning_rate": 2.1225805083847445e-05, + "loss": 0.4149, + "step": 184170 + }, + { + "epoch": 7.79, + "learning_rate": 2.1183404354554902e-05, + "loss": 0.3658, + "step": 184180 + }, + { + "epoch": 7.79, + "learning_rate": 2.1141003625262355e-05, + "loss": 0.2931, + "step": 184190 + }, + { + "epoch": 7.79, + "learning_rate": 2.1098602895969812e-05, + "loss": 0.4045, + "step": 184200 + }, + { + "epoch": 7.79, + "learning_rate": 2.105620216667727e-05, + "loss": 0.3554, + "step": 184210 + }, + { + "epoch": 7.79, + "learning_rate": 2.1013801437384725e-05, + "loss": 0.375, + "step": 184220 + }, + { + "epoch": 7.79, + "learning_rate": 2.0971400708092182e-05, + "loss": 0.4195, + "step": 184230 + }, + { + "epoch": 7.79, + "learning_rate": 2.092899997879964e-05, + "loss": 0.3723, + "step": 184240 + }, + { + "epoch": 7.79, + "learning_rate": 2.0886599249507095e-05, + "loss": 0.2803, + "step": 184250 + }, + { + "epoch": 7.79, + "learning_rate": 2.084419852021455e-05, + "loss": 0.41, + "step": 184260 + }, + { + "epoch": 7.79, + "learning_rate": 2.0801797790922005e-05, + "loss": 0.3553, + "step": 184270 + }, + { + "epoch": 7.79, + "learning_rate": 2.0759397061629458e-05, + "loss": 0.3525, + "step": 184280 + }, + { + "epoch": 7.79, + "learning_rate": 2.0716996332336915e-05, + "loss": 0.3692, + "step": 184290 + }, + { + "epoch": 7.79, + "learning_rate": 2.067459560304437e-05, + "loss": 0.3556, + "step": 184300 + }, + { + "epoch": 7.79, + "learning_rate": 2.0632194873751828e-05, + "loss": 0.3521, + "step": 184310 + }, + { + "epoch": 7.79, + "learning_rate": 2.0589794144459285e-05, + "loss": 0.3495, + "step": 184320 + }, + { + "epoch": 7.8, + "learning_rate": 2.054739341516674e-05, + "loss": 0.3358, + "step": 184330 + }, + { + "epoch": 7.8, + "learning_rate": 2.0504992685874198e-05, + "loss": 0.3917, + "step": 184340 + }, + { + "epoch": 7.8, + "learning_rate": 2.0462591956581655e-05, + "loss": 0.3652, + "step": 184350 + }, + { + "epoch": 7.8, + "learning_rate": 2.0420191227289108e-05, + "loss": 0.3577, + "step": 184360 + }, + { + "epoch": 7.8, + "learning_rate": 2.0377790497996564e-05, + "loss": 0.4198, + "step": 184370 + }, + { + "epoch": 7.8, + "learning_rate": 2.033538976870402e-05, + "loss": 0.3253, + "step": 184380 + }, + { + "epoch": 7.8, + "learning_rate": 2.0292989039411478e-05, + "loss": 0.4207, + "step": 184390 + }, + { + "epoch": 7.8, + "learning_rate": 2.0250588310118934e-05, + "loss": 0.3946, + "step": 184400 + }, + { + "epoch": 7.8, + "learning_rate": 2.020818758082639e-05, + "loss": 0.3663, + "step": 184410 + }, + { + "epoch": 7.8, + "learning_rate": 2.0165786851533848e-05, + "loss": 0.3623, + "step": 184420 + }, + { + "epoch": 7.8, + "learning_rate": 2.0123386122241304e-05, + "loss": 0.41, + "step": 184430 + }, + { + "epoch": 7.8, + "learning_rate": 2.0080985392948757e-05, + "loss": 0.3845, + "step": 184440 + }, + { + "epoch": 7.8, + "learning_rate": 2.0038584663656214e-05, + "loss": 0.3386, + "step": 184450 + }, + { + "epoch": 7.8, + "learning_rate": 1.999618393436367e-05, + "loss": 0.3594, + "step": 184460 + }, + { + "epoch": 7.8, + "learning_rate": 1.9953783205071127e-05, + "loss": 0.3848, + "step": 184470 + }, + { + "epoch": 7.8, + "learning_rate": 1.9911382475778584e-05, + "loss": 0.3072, + "step": 184480 + }, + { + "epoch": 7.8, + "learning_rate": 1.986898174648604e-05, + "loss": 0.3652, + "step": 184490 + }, + { + "epoch": 7.8, + "learning_rate": 1.9826581017193497e-05, + "loss": 0.3684, + "step": 184500 + }, + { + "epoch": 7.8, + "learning_rate": 1.9784180287900954e-05, + "loss": 0.3812, + "step": 184510 + }, + { + "epoch": 7.8, + "learning_rate": 1.9741779558608407e-05, + "loss": 0.364, + "step": 184520 + }, + { + "epoch": 7.8, + "learning_rate": 1.9699378829315864e-05, + "loss": 0.3583, + "step": 184530 + }, + { + "epoch": 7.8, + "learning_rate": 1.965697810002332e-05, + "loss": 0.3665, + "step": 184540 + }, + { + "epoch": 7.8, + "learning_rate": 1.9614577370730777e-05, + "loss": 0.4021, + "step": 184550 + }, + { + "epoch": 7.8, + "learning_rate": 1.9572176641438234e-05, + "loss": 0.3641, + "step": 184560 + }, + { + "epoch": 7.81, + "learning_rate": 1.952977591214569e-05, + "loss": 0.3798, + "step": 184570 + }, + { + "epoch": 7.81, + "learning_rate": 1.9487375182853147e-05, + "loss": 0.4578, + "step": 184580 + }, + { + "epoch": 7.81, + "learning_rate": 1.9444974453560603e-05, + "loss": 0.4162, + "step": 184590 + }, + { + "epoch": 7.81, + "learning_rate": 1.9402573724268057e-05, + "loss": 0.3748, + "step": 184600 + }, + { + "epoch": 7.81, + "learning_rate": 1.9360172994975513e-05, + "loss": 0.3441, + "step": 184610 + }, + { + "epoch": 7.81, + "learning_rate": 1.931777226568297e-05, + "loss": 0.4127, + "step": 184620 + }, + { + "epoch": 7.81, + "learning_rate": 1.9275371536390427e-05, + "loss": 0.3466, + "step": 184630 + }, + { + "epoch": 7.81, + "learning_rate": 1.9232970807097883e-05, + "loss": 0.3582, + "step": 184640 + }, + { + "epoch": 7.81, + "learning_rate": 1.919057007780534e-05, + "loss": 0.3218, + "step": 184650 + }, + { + "epoch": 7.81, + "learning_rate": 1.9148169348512796e-05, + "loss": 0.3771, + "step": 184660 + }, + { + "epoch": 7.81, + "learning_rate": 1.9105768619220253e-05, + "loss": 0.3632, + "step": 184670 + }, + { + "epoch": 7.81, + "learning_rate": 1.906336788992771e-05, + "loss": 0.4084, + "step": 184680 + }, + { + "epoch": 7.81, + "learning_rate": 1.9020967160635163e-05, + "loss": 0.3025, + "step": 184690 + }, + { + "epoch": 7.81, + "learning_rate": 1.897856643134262e-05, + "loss": 0.4001, + "step": 184700 + }, + { + "epoch": 7.81, + "learning_rate": 1.8936165702050076e-05, + "loss": 0.3685, + "step": 184710 + }, + { + "epoch": 7.81, + "learning_rate": 1.8893764972757533e-05, + "loss": 0.3924, + "step": 184720 + }, + { + "epoch": 7.81, + "learning_rate": 1.885136424346499e-05, + "loss": 0.3888, + "step": 184730 + }, + { + "epoch": 7.81, + "learning_rate": 1.8808963514172446e-05, + "loss": 0.3532, + "step": 184740 + }, + { + "epoch": 7.81, + "learning_rate": 1.8766562784879903e-05, + "loss": 0.3608, + "step": 184750 + }, + { + "epoch": 7.81, + "learning_rate": 1.872416205558736e-05, + "loss": 0.3787, + "step": 184760 + }, + { + "epoch": 7.81, + "learning_rate": 1.8681761326294812e-05, + "loss": 0.4143, + "step": 184770 + }, + { + "epoch": 7.81, + "learning_rate": 1.863936059700227e-05, + "loss": 0.3548, + "step": 184780 + }, + { + "epoch": 7.81, + "learning_rate": 1.8596959867709726e-05, + "loss": 0.3699, + "step": 184790 + }, + { + "epoch": 7.81, + "learning_rate": 1.8554559138417182e-05, + "loss": 0.3992, + "step": 184800 + }, + { + "epoch": 7.82, + "learning_rate": 1.851215840912464e-05, + "loss": 0.4057, + "step": 184810 + }, + { + "epoch": 7.82, + "learning_rate": 1.8469757679832096e-05, + "loss": 0.4115, + "step": 184820 + }, + { + "epoch": 7.82, + "learning_rate": 1.8427356950539552e-05, + "loss": 0.37, + "step": 184830 + }, + { + "epoch": 7.82, + "learning_rate": 1.838495622124701e-05, + "loss": 0.36, + "step": 184840 + }, + { + "epoch": 7.82, + "learning_rate": 1.8342555491954462e-05, + "loss": 0.3133, + "step": 184850 + }, + { + "epoch": 7.82, + "learning_rate": 1.830015476266192e-05, + "loss": 0.4425, + "step": 184860 + }, + { + "epoch": 7.82, + "learning_rate": 1.8257754033369375e-05, + "loss": 0.3928, + "step": 184870 + }, + { + "epoch": 7.82, + "learning_rate": 1.8215353304076832e-05, + "loss": 0.3717, + "step": 184880 + }, + { + "epoch": 7.82, + "learning_rate": 1.817295257478429e-05, + "loss": 0.4305, + "step": 184890 + }, + { + "epoch": 7.82, + "learning_rate": 1.8130551845491745e-05, + "loss": 0.415, + "step": 184900 + }, + { + "epoch": 7.82, + "learning_rate": 1.8088151116199202e-05, + "loss": 0.3945, + "step": 184910 + }, + { + "epoch": 7.82, + "learning_rate": 1.804575038690666e-05, + "loss": 0.4029, + "step": 184920 + }, + { + "epoch": 7.82, + "learning_rate": 1.800334965761411e-05, + "loss": 0.3591, + "step": 184930 + }, + { + "epoch": 7.82, + "learning_rate": 1.796094892832157e-05, + "loss": 0.3365, + "step": 184940 + }, + { + "epoch": 7.82, + "learning_rate": 1.7918548199029025e-05, + "loss": 0.373, + "step": 184950 + }, + { + "epoch": 7.82, + "learning_rate": 1.787614746973648e-05, + "loss": 0.4612, + "step": 184960 + }, + { + "epoch": 7.82, + "learning_rate": 1.7833746740443935e-05, + "loss": 0.4433, + "step": 184970 + }, + { + "epoch": 7.82, + "learning_rate": 1.779134601115139e-05, + "loss": 0.3811, + "step": 184980 + }, + { + "epoch": 7.82, + "learning_rate": 1.7748945281858848e-05, + "loss": 0.4238, + "step": 184990 + }, + { + "epoch": 7.82, + "learning_rate": 1.7706544552566305e-05, + "loss": 0.3846, + "step": 185000 + }, + { + "epoch": 7.82, + "learning_rate": 1.766414382327376e-05, + "loss": 0.3644, + "step": 185010 + }, + { + "epoch": 7.82, + "learning_rate": 1.7621743093981215e-05, + "loss": 0.3907, + "step": 185020 + }, + { + "epoch": 7.82, + "learning_rate": 1.757934236468867e-05, + "loss": 0.3514, + "step": 185030 + }, + { + "epoch": 7.83, + "learning_rate": 1.7536941635396128e-05, + "loss": 0.4321, + "step": 185040 + }, + { + "epoch": 7.83, + "learning_rate": 1.7494540906103584e-05, + "loss": 0.4335, + "step": 185050 + }, + { + "epoch": 7.83, + "learning_rate": 1.745214017681104e-05, + "loss": 0.3566, + "step": 185060 + }, + { + "epoch": 7.83, + "learning_rate": 1.7409739447518498e-05, + "loss": 0.338, + "step": 185070 + }, + { + "epoch": 7.83, + "learning_rate": 1.7367338718225954e-05, + "loss": 0.3773, + "step": 185080 + }, + { + "epoch": 7.83, + "learning_rate": 1.732493798893341e-05, + "loss": 0.386, + "step": 185090 + }, + { + "epoch": 7.83, + "learning_rate": 1.7282537259640864e-05, + "loss": 0.393, + "step": 185100 + }, + { + "epoch": 7.83, + "learning_rate": 1.724013653034832e-05, + "loss": 0.3874, + "step": 185110 + }, + { + "epoch": 7.83, + "learning_rate": 1.7197735801055777e-05, + "loss": 0.3942, + "step": 185120 + }, + { + "epoch": 7.83, + "learning_rate": 1.7155335071763234e-05, + "loss": 0.4317, + "step": 185130 + }, + { + "epoch": 7.83, + "learning_rate": 1.711293434247069e-05, + "loss": 0.3955, + "step": 185140 + }, + { + "epoch": 7.83, + "learning_rate": 1.7070533613178147e-05, + "loss": 0.3254, + "step": 185150 + }, + { + "epoch": 7.83, + "learning_rate": 1.7028132883885604e-05, + "loss": 0.3536, + "step": 185160 + }, + { + "epoch": 7.83, + "learning_rate": 1.698573215459306e-05, + "loss": 0.4375, + "step": 185170 + }, + { + "epoch": 7.83, + "learning_rate": 1.6943331425300514e-05, + "loss": 0.3472, + "step": 185180 + }, + { + "epoch": 7.83, + "learning_rate": 1.690093069600797e-05, + "loss": 0.3742, + "step": 185190 + }, + { + "epoch": 7.83, + "learning_rate": 1.6858529966715427e-05, + "loss": 0.3856, + "step": 185200 + }, + { + "epoch": 7.83, + "learning_rate": 1.6816129237422884e-05, + "loss": 0.4201, + "step": 185210 + }, + { + "epoch": 7.83, + "learning_rate": 1.677372850813034e-05, + "loss": 0.4182, + "step": 185220 + }, + { + "epoch": 7.83, + "learning_rate": 1.6731327778837797e-05, + "loss": 0.3448, + "step": 185230 + }, + { + "epoch": 7.83, + "learning_rate": 1.6688927049545253e-05, + "loss": 0.3576, + "step": 185240 + }, + { + "epoch": 7.83, + "learning_rate": 1.664652632025271e-05, + "loss": 0.3505, + "step": 185250 + }, + { + "epoch": 7.83, + "learning_rate": 1.6604125590960167e-05, + "loss": 0.5023, + "step": 185260 + }, + { + "epoch": 7.83, + "learning_rate": 1.656172486166762e-05, + "loss": 0.3455, + "step": 185270 + }, + { + "epoch": 7.84, + "learning_rate": 1.6519324132375077e-05, + "loss": 0.3317, + "step": 185280 + }, + { + "epoch": 7.84, + "learning_rate": 1.6476923403082533e-05, + "loss": 0.4032, + "step": 185290 + }, + { + "epoch": 7.84, + "learning_rate": 1.643452267378999e-05, + "loss": 0.3757, + "step": 185300 + }, + { + "epoch": 7.84, + "learning_rate": 1.6392121944497446e-05, + "loss": 0.3658, + "step": 185310 + }, + { + "epoch": 7.84, + "learning_rate": 1.6349721215204903e-05, + "loss": 0.3617, + "step": 185320 + }, + { + "epoch": 7.84, + "learning_rate": 1.630732048591236e-05, + "loss": 0.3706, + "step": 185330 + }, + { + "epoch": 7.84, + "learning_rate": 1.6264919756619816e-05, + "loss": 0.4194, + "step": 185340 + }, + { + "epoch": 7.84, + "learning_rate": 1.622251902732727e-05, + "loss": 0.3574, + "step": 185350 + }, + { + "epoch": 7.84, + "learning_rate": 1.6180118298034726e-05, + "loss": 0.4101, + "step": 185360 + }, + { + "epoch": 7.84, + "learning_rate": 1.6137717568742183e-05, + "loss": 0.3865, + "step": 185370 + }, + { + "epoch": 7.84, + "learning_rate": 1.609531683944964e-05, + "loss": 0.3178, + "step": 185380 + }, + { + "epoch": 7.84, + "learning_rate": 1.6052916110157096e-05, + "loss": 0.3911, + "step": 185390 + }, + { + "epoch": 7.84, + "learning_rate": 1.6010515380864553e-05, + "loss": 0.3466, + "step": 185400 + }, + { + "epoch": 7.84, + "learning_rate": 1.596811465157201e-05, + "loss": 0.3802, + "step": 185410 + }, + { + "epoch": 7.84, + "learning_rate": 1.5925713922279466e-05, + "loss": 0.4018, + "step": 185420 + }, + { + "epoch": 7.84, + "learning_rate": 1.588331319298692e-05, + "loss": 0.3965, + "step": 185430 + }, + { + "epoch": 7.84, + "learning_rate": 1.5840912463694376e-05, + "loss": 0.4004, + "step": 185440 + }, + { + "epoch": 7.84, + "learning_rate": 1.5798511734401832e-05, + "loss": 0.3426, + "step": 185450 + }, + { + "epoch": 7.84, + "learning_rate": 1.575611100510929e-05, + "loss": 0.3918, + "step": 185460 + }, + { + "epoch": 7.84, + "learning_rate": 1.5713710275816746e-05, + "loss": 0.3796, + "step": 185470 + }, + { + "epoch": 7.84, + "learning_rate": 1.5671309546524202e-05, + "loss": 0.399, + "step": 185480 + }, + { + "epoch": 7.84, + "learning_rate": 1.562890881723166e-05, + "loss": 0.4123, + "step": 185490 + }, + { + "epoch": 7.84, + "learning_rate": 1.5586508087939116e-05, + "loss": 0.3148, + "step": 185500 + }, + { + "epoch": 7.84, + "learning_rate": 1.554410735864657e-05, + "loss": 0.3669, + "step": 185510 + }, + { + "epoch": 7.85, + "learning_rate": 1.5501706629354025e-05, + "loss": 0.4057, + "step": 185520 + }, + { + "epoch": 7.85, + "learning_rate": 1.5459305900061482e-05, + "loss": 0.4121, + "step": 185530 + }, + { + "epoch": 7.85, + "learning_rate": 1.541690517076894e-05, + "loss": 0.4053, + "step": 185540 + }, + { + "epoch": 7.85, + "learning_rate": 1.5374504441476395e-05, + "loss": 0.3636, + "step": 185550 + }, + { + "epoch": 7.85, + "learning_rate": 1.5332103712183852e-05, + "loss": 0.3092, + "step": 185560 + }, + { + "epoch": 7.85, + "learning_rate": 1.528970298289131e-05, + "loss": 0.3915, + "step": 185570 + }, + { + "epoch": 7.85, + "learning_rate": 1.5247302253598763e-05, + "loss": 0.3744, + "step": 185580 + }, + { + "epoch": 7.85, + "learning_rate": 1.520490152430622e-05, + "loss": 0.3883, + "step": 185590 + }, + { + "epoch": 7.85, + "learning_rate": 1.5162500795013677e-05, + "loss": 0.3589, + "step": 185600 + }, + { + "epoch": 7.85, + "learning_rate": 1.5120100065721132e-05, + "loss": 0.3103, + "step": 185610 + }, + { + "epoch": 7.85, + "learning_rate": 1.5077699336428588e-05, + "loss": 0.4398, + "step": 185620 + }, + { + "epoch": 7.85, + "learning_rate": 1.5035298607136045e-05, + "loss": 0.3487, + "step": 185630 + }, + { + "epoch": 7.85, + "learning_rate": 1.4992897877843502e-05, + "loss": 0.3662, + "step": 185640 + }, + { + "epoch": 7.85, + "learning_rate": 1.4950497148550956e-05, + "loss": 0.375, + "step": 185650 + }, + { + "epoch": 7.85, + "learning_rate": 1.4908096419258413e-05, + "loss": 0.3721, + "step": 185660 + }, + { + "epoch": 7.85, + "learning_rate": 1.486569568996587e-05, + "loss": 0.3647, + "step": 185670 + }, + { + "epoch": 7.85, + "learning_rate": 1.4823294960673323e-05, + "loss": 0.3318, + "step": 185680 + }, + { + "epoch": 7.85, + "learning_rate": 1.478089423138078e-05, + "loss": 0.4614, + "step": 185690 + }, + { + "epoch": 7.85, + "learning_rate": 1.4738493502088235e-05, + "loss": 0.3015, + "step": 185700 + }, + { + "epoch": 7.85, + "learning_rate": 1.4696092772795691e-05, + "loss": 0.4044, + "step": 185710 + }, + { + "epoch": 7.85, + "learning_rate": 1.4653692043503148e-05, + "loss": 0.4387, + "step": 185720 + }, + { + "epoch": 7.85, + "learning_rate": 1.4611291314210604e-05, + "loss": 0.3427, + "step": 185730 + }, + { + "epoch": 7.85, + "learning_rate": 1.456889058491806e-05, + "loss": 0.3152, + "step": 185740 + }, + { + "epoch": 7.86, + "learning_rate": 1.4526489855625516e-05, + "loss": 0.3476, + "step": 185750 + }, + { + "epoch": 7.86, + "learning_rate": 1.4484089126332973e-05, + "loss": 0.3812, + "step": 185760 + }, + { + "epoch": 7.86, + "learning_rate": 1.444168839704043e-05, + "loss": 0.3826, + "step": 185770 + }, + { + "epoch": 7.86, + "learning_rate": 1.4399287667747884e-05, + "loss": 0.4275, + "step": 185780 + }, + { + "epoch": 7.86, + "learning_rate": 1.435688693845534e-05, + "loss": 0.3901, + "step": 185790 + }, + { + "epoch": 7.86, + "learning_rate": 1.4314486209162797e-05, + "loss": 0.3319, + "step": 185800 + }, + { + "epoch": 7.86, + "learning_rate": 1.4272085479870254e-05, + "loss": 0.3753, + "step": 185810 + }, + { + "epoch": 7.86, + "learning_rate": 1.422968475057771e-05, + "loss": 0.4716, + "step": 185820 + }, + { + "epoch": 7.86, + "learning_rate": 1.4187284021285166e-05, + "loss": 0.4051, + "step": 185830 + }, + { + "epoch": 7.86, + "learning_rate": 1.4144883291992622e-05, + "loss": 0.3977, + "step": 185840 + }, + { + "epoch": 7.86, + "learning_rate": 1.4102482562700079e-05, + "loss": 0.4423, + "step": 185850 + }, + { + "epoch": 7.86, + "learning_rate": 1.4060081833407535e-05, + "loss": 0.3781, + "step": 185860 + }, + { + "epoch": 7.86, + "learning_rate": 1.401768110411499e-05, + "loss": 0.3043, + "step": 185870 + }, + { + "epoch": 7.86, + "learning_rate": 1.3975280374822447e-05, + "loss": 0.3833, + "step": 185880 + }, + { + "epoch": 7.86, + "learning_rate": 1.3932879645529904e-05, + "loss": 0.4245, + "step": 185890 + }, + { + "epoch": 7.86, + "learning_rate": 1.389047891623736e-05, + "loss": 0.4038, + "step": 185900 + }, + { + "epoch": 7.86, + "learning_rate": 1.3848078186944815e-05, + "loss": 0.4404, + "step": 185910 + }, + { + "epoch": 7.86, + "learning_rate": 1.3805677457652272e-05, + "loss": 0.3907, + "step": 185920 + }, + { + "epoch": 7.86, + "learning_rate": 1.3763276728359728e-05, + "loss": 0.4008, + "step": 185930 + }, + { + "epoch": 7.86, + "learning_rate": 1.3720875999067185e-05, + "loss": 0.3636, + "step": 185940 + }, + { + "epoch": 7.86, + "learning_rate": 1.367847526977464e-05, + "loss": 0.3097, + "step": 185950 + }, + { + "epoch": 7.86, + "learning_rate": 1.3636074540482097e-05, + "loss": 0.3898, + "step": 185960 + }, + { + "epoch": 7.86, + "learning_rate": 1.3593673811189553e-05, + "loss": 0.3788, + "step": 185970 + }, + { + "epoch": 7.86, + "learning_rate": 1.355127308189701e-05, + "loss": 0.3197, + "step": 185980 + }, + { + "epoch": 7.87, + "learning_rate": 1.3508872352604465e-05, + "loss": 0.329, + "step": 185990 + }, + { + "epoch": 7.87, + "learning_rate": 1.3466471623311921e-05, + "loss": 0.3515, + "step": 186000 + }, + { + "epoch": 7.87, + "learning_rate": 1.3424070894019378e-05, + "loss": 0.3704, + "step": 186010 + }, + { + "epoch": 7.87, + "learning_rate": 1.3381670164726835e-05, + "loss": 0.3703, + "step": 186020 + }, + { + "epoch": 7.87, + "learning_rate": 1.333926943543429e-05, + "loss": 0.3936, + "step": 186030 + }, + { + "epoch": 7.87, + "learning_rate": 1.3296868706141746e-05, + "loss": 0.3891, + "step": 186040 + }, + { + "epoch": 7.87, + "learning_rate": 1.3254467976849203e-05, + "loss": 0.337, + "step": 186050 + }, + { + "epoch": 7.87, + "learning_rate": 1.321206724755666e-05, + "loss": 0.4038, + "step": 186060 + }, + { + "epoch": 7.87, + "learning_rate": 1.3169666518264114e-05, + "loss": 0.4156, + "step": 186070 + }, + { + "epoch": 7.87, + "learning_rate": 1.3127265788971571e-05, + "loss": 0.4155, + "step": 186080 + }, + { + "epoch": 7.87, + "learning_rate": 1.3084865059679028e-05, + "loss": 0.3392, + "step": 186090 + }, + { + "epoch": 7.87, + "learning_rate": 1.3042464330386484e-05, + "loss": 0.4044, + "step": 186100 + }, + { + "epoch": 7.87, + "learning_rate": 1.300006360109394e-05, + "loss": 0.386, + "step": 186110 + }, + { + "epoch": 7.87, + "learning_rate": 1.2957662871801396e-05, + "loss": 0.3983, + "step": 186120 + }, + { + "epoch": 7.87, + "learning_rate": 1.2915262142508852e-05, + "loss": 0.4207, + "step": 186130 + }, + { + "epoch": 7.87, + "learning_rate": 1.2872861413216309e-05, + "loss": 0.4145, + "step": 186140 + }, + { + "epoch": 7.87, + "learning_rate": 1.2830460683923764e-05, + "loss": 0.4018, + "step": 186150 + }, + { + "epoch": 7.87, + "learning_rate": 1.278805995463122e-05, + "loss": 0.298, + "step": 186160 + }, + { + "epoch": 7.87, + "learning_rate": 1.2745659225338677e-05, + "loss": 0.328, + "step": 186170 + }, + { + "epoch": 7.87, + "learning_rate": 1.2703258496046134e-05, + "loss": 0.3534, + "step": 186180 + }, + { + "epoch": 7.87, + "learning_rate": 1.2660857766753589e-05, + "loss": 0.3538, + "step": 186190 + }, + { + "epoch": 7.87, + "learning_rate": 1.2618457037461045e-05, + "loss": 0.3785, + "step": 186200 + }, + { + "epoch": 7.87, + "learning_rate": 1.2576056308168502e-05, + "loss": 0.3375, + "step": 186210 + }, + { + "epoch": 7.87, + "learning_rate": 1.2533655578875959e-05, + "loss": 0.3753, + "step": 186220 + }, + { + "epoch": 7.88, + "learning_rate": 1.2491254849583414e-05, + "loss": 0.35, + "step": 186230 + }, + { + "epoch": 7.88, + "learning_rate": 1.2448854120290869e-05, + "loss": 0.4205, + "step": 186240 + }, + { + "epoch": 7.88, + "learning_rate": 1.2406453390998325e-05, + "loss": 0.3705, + "step": 186250 + }, + { + "epoch": 7.88, + "learning_rate": 1.2364052661705782e-05, + "loss": 0.3567, + "step": 186260 + }, + { + "epoch": 7.88, + "learning_rate": 1.2321651932413238e-05, + "loss": 0.3971, + "step": 186270 + }, + { + "epoch": 7.88, + "learning_rate": 1.2279251203120693e-05, + "loss": 0.384, + "step": 186280 + }, + { + "epoch": 7.88, + "learning_rate": 1.223685047382815e-05, + "loss": 0.4279, + "step": 186290 + }, + { + "epoch": 7.88, + "learning_rate": 1.2194449744535607e-05, + "loss": 0.4351, + "step": 186300 + }, + { + "epoch": 7.88, + "learning_rate": 1.2152049015243063e-05, + "loss": 0.4061, + "step": 186310 + }, + { + "epoch": 7.88, + "learning_rate": 1.2109648285950518e-05, + "loss": 0.3652, + "step": 186320 + }, + { + "epoch": 7.88, + "learning_rate": 1.2067247556657975e-05, + "loss": 0.3425, + "step": 186330 + }, + { + "epoch": 7.88, + "learning_rate": 1.2024846827365431e-05, + "loss": 0.3397, + "step": 186340 + }, + { + "epoch": 7.88, + "learning_rate": 1.1982446098072888e-05, + "loss": 0.3757, + "step": 186350 + }, + { + "epoch": 7.88, + "learning_rate": 1.1940045368780343e-05, + "loss": 0.4406, + "step": 186360 + }, + { + "epoch": 7.88, + "learning_rate": 1.18976446394878e-05, + "loss": 0.4133, + "step": 186370 + }, + { + "epoch": 7.88, + "learning_rate": 1.1855243910195256e-05, + "loss": 0.3724, + "step": 186380 + }, + { + "epoch": 7.88, + "learning_rate": 1.1812843180902713e-05, + "loss": 0.3731, + "step": 186390 + }, + { + "epoch": 7.88, + "learning_rate": 1.1770442451610168e-05, + "loss": 0.3044, + "step": 186400 + }, + { + "epoch": 7.88, + "learning_rate": 1.1728041722317624e-05, + "loss": 0.3422, + "step": 186410 + }, + { + "epoch": 7.88, + "learning_rate": 1.1685640993025081e-05, + "loss": 0.3862, + "step": 186420 + }, + { + "epoch": 7.88, + "learning_rate": 1.1643240263732538e-05, + "loss": 0.3238, + "step": 186430 + }, + { + "epoch": 7.88, + "learning_rate": 1.1600839534439993e-05, + "loss": 0.4351, + "step": 186440 + }, + { + "epoch": 7.88, + "learning_rate": 1.155843880514745e-05, + "loss": 0.3973, + "step": 186450 + }, + { + "epoch": 7.89, + "learning_rate": 1.1516038075854906e-05, + "loss": 0.3736, + "step": 186460 + }, + { + "epoch": 7.89, + "learning_rate": 1.1473637346562362e-05, + "loss": 0.3503, + "step": 186470 + }, + { + "epoch": 7.89, + "learning_rate": 1.1431236617269817e-05, + "loss": 0.3079, + "step": 186480 + }, + { + "epoch": 7.89, + "learning_rate": 1.1388835887977274e-05, + "loss": 0.3765, + "step": 186490 + }, + { + "epoch": 7.89, + "learning_rate": 1.134643515868473e-05, + "loss": 0.3824, + "step": 186500 + }, + { + "epoch": 7.89, + "learning_rate": 1.1304034429392187e-05, + "loss": 0.3941, + "step": 186510 + }, + { + "epoch": 7.89, + "learning_rate": 1.1261633700099642e-05, + "loss": 0.3593, + "step": 186520 + }, + { + "epoch": 7.89, + "learning_rate": 1.1219232970807099e-05, + "loss": 0.35, + "step": 186530 + }, + { + "epoch": 7.89, + "learning_rate": 1.1176832241514555e-05, + "loss": 0.2618, + "step": 186540 + }, + { + "epoch": 7.89, + "learning_rate": 1.1134431512222012e-05, + "loss": 0.3465, + "step": 186550 + }, + { + "epoch": 7.89, + "learning_rate": 1.1092030782929467e-05, + "loss": 0.4177, + "step": 186560 + }, + { + "epoch": 7.89, + "learning_rate": 1.1049630053636924e-05, + "loss": 0.3588, + "step": 186570 + }, + { + "epoch": 7.89, + "learning_rate": 1.1007229324344379e-05, + "loss": 0.3496, + "step": 186580 + }, + { + "epoch": 7.89, + "learning_rate": 1.0964828595051835e-05, + "loss": 0.3465, + "step": 186590 + }, + { + "epoch": 7.89, + "learning_rate": 1.0922427865759292e-05, + "loss": 0.3001, + "step": 186600 + }, + { + "epoch": 7.89, + "learning_rate": 1.0880027136466747e-05, + "loss": 0.4129, + "step": 186610 + }, + { + "epoch": 7.89, + "learning_rate": 1.0837626407174203e-05, + "loss": 0.3368, + "step": 186620 + }, + { + "epoch": 7.89, + "learning_rate": 1.079522567788166e-05, + "loss": 0.3641, + "step": 186630 + }, + { + "epoch": 7.89, + "learning_rate": 1.0752824948589117e-05, + "loss": 0.4608, + "step": 186640 + }, + { + "epoch": 7.89, + "learning_rate": 1.0710424219296571e-05, + "loss": 0.3792, + "step": 186650 + }, + { + "epoch": 7.89, + "learning_rate": 1.0668023490004028e-05, + "loss": 0.3593, + "step": 186660 + }, + { + "epoch": 7.89, + "learning_rate": 1.0625622760711485e-05, + "loss": 0.4129, + "step": 186670 + }, + { + "epoch": 7.89, + "learning_rate": 1.0583222031418941e-05, + "loss": 0.366, + "step": 186680 + }, + { + "epoch": 7.89, + "learning_rate": 1.0540821302126396e-05, + "loss": 0.3457, + "step": 186690 + }, + { + "epoch": 7.9, + "learning_rate": 1.0498420572833853e-05, + "loss": 0.3944, + "step": 186700 + }, + { + "epoch": 7.9, + "learning_rate": 1.045601984354131e-05, + "loss": 0.4162, + "step": 186710 + }, + { + "epoch": 7.9, + "learning_rate": 1.0413619114248766e-05, + "loss": 0.4099, + "step": 186720 + }, + { + "epoch": 7.9, + "learning_rate": 1.0371218384956221e-05, + "loss": 0.3969, + "step": 186730 + }, + { + "epoch": 7.9, + "learning_rate": 1.0328817655663678e-05, + "loss": 0.349, + "step": 186740 + }, + { + "epoch": 7.9, + "learning_rate": 1.0286416926371134e-05, + "loss": 0.346, + "step": 186750 + }, + { + "epoch": 7.9, + "learning_rate": 1.0244016197078591e-05, + "loss": 0.3775, + "step": 186760 + }, + { + "epoch": 7.9, + "learning_rate": 1.0201615467786046e-05, + "loss": 0.4394, + "step": 186770 + }, + { + "epoch": 7.9, + "learning_rate": 1.0159214738493503e-05, + "loss": 0.4033, + "step": 186780 + }, + { + "epoch": 7.9, + "learning_rate": 1.011681400920096e-05, + "loss": 0.4326, + "step": 186790 + }, + { + "epoch": 7.9, + "learning_rate": 1.0074413279908416e-05, + "loss": 0.376, + "step": 186800 + }, + { + "epoch": 7.9, + "learning_rate": 1.003201255061587e-05, + "loss": 0.4004, + "step": 186810 + }, + { + "epoch": 7.9, + "learning_rate": 9.989611821323327e-06, + "loss": 0.3771, + "step": 186820 + }, + { + "epoch": 7.9, + "learning_rate": 9.947211092030784e-06, + "loss": 0.3444, + "step": 186830 + }, + { + "epoch": 7.9, + "learning_rate": 9.90481036273824e-06, + "loss": 0.3267, + "step": 186840 + }, + { + "epoch": 7.9, + "learning_rate": 9.862409633445696e-06, + "loss": 0.3677, + "step": 186850 + }, + { + "epoch": 7.9, + "learning_rate": 9.820008904153152e-06, + "loss": 0.358, + "step": 186860 + }, + { + "epoch": 7.9, + "learning_rate": 9.777608174860609e-06, + "loss": 0.3816, + "step": 186870 + }, + { + "epoch": 7.9, + "learning_rate": 9.735207445568065e-06, + "loss": 0.326, + "step": 186880 + }, + { + "epoch": 7.9, + "learning_rate": 9.69280671627552e-06, + "loss": 0.3423, + "step": 186890 + }, + { + "epoch": 7.9, + "learning_rate": 9.650405986982977e-06, + "loss": 0.2798, + "step": 186900 + }, + { + "epoch": 7.9, + "learning_rate": 9.608005257690434e-06, + "loss": 0.3309, + "step": 186910 + }, + { + "epoch": 7.9, + "learning_rate": 9.56560452839789e-06, + "loss": 0.4486, + "step": 186920 + }, + { + "epoch": 7.91, + "learning_rate": 9.523203799105345e-06, + "loss": 0.3956, + "step": 186930 + }, + { + "epoch": 7.91, + "learning_rate": 9.4808030698128e-06, + "loss": 0.4326, + "step": 186940 + }, + { + "epoch": 7.91, + "learning_rate": 9.438402340520257e-06, + "loss": 0.3918, + "step": 186950 + }, + { + "epoch": 7.91, + "learning_rate": 9.396001611227713e-06, + "loss": 0.4103, + "step": 186960 + }, + { + "epoch": 7.91, + "learning_rate": 9.35360088193517e-06, + "loss": 0.3981, + "step": 186970 + }, + { + "epoch": 7.91, + "learning_rate": 9.311200152642625e-06, + "loss": 0.383, + "step": 186980 + }, + { + "epoch": 7.91, + "learning_rate": 9.268799423350081e-06, + "loss": 0.378, + "step": 186990 + }, + { + "epoch": 7.91, + "learning_rate": 9.226398694057538e-06, + "loss": 0.3184, + "step": 187000 + }, + { + "epoch": 7.91, + "learning_rate": 9.183997964764995e-06, + "loss": 0.3963, + "step": 187010 + }, + { + "epoch": 7.91, + "learning_rate": 9.14159723547245e-06, + "loss": 0.4113, + "step": 187020 + }, + { + "epoch": 7.91, + "learning_rate": 9.099196506179906e-06, + "loss": 0.3635, + "step": 187030 + }, + { + "epoch": 7.91, + "learning_rate": 9.056795776887363e-06, + "loss": 0.3341, + "step": 187040 + }, + { + "epoch": 7.91, + "learning_rate": 9.01439504759482e-06, + "loss": 0.3746, + "step": 187050 + }, + { + "epoch": 7.91, + "learning_rate": 8.971994318302274e-06, + "loss": 0.4123, + "step": 187060 + }, + { + "epoch": 7.91, + "learning_rate": 8.929593589009731e-06, + "loss": 0.3954, + "step": 187070 + }, + { + "epoch": 7.91, + "learning_rate": 8.887192859717188e-06, + "loss": 0.3778, + "step": 187080 + }, + { + "epoch": 7.91, + "learning_rate": 8.844792130424644e-06, + "loss": 0.3544, + "step": 187090 + }, + { + "epoch": 7.91, + "learning_rate": 8.8023914011321e-06, + "loss": 0.3541, + "step": 187100 + }, + { + "epoch": 7.91, + "learning_rate": 8.759990671839556e-06, + "loss": 0.4015, + "step": 187110 + }, + { + "epoch": 7.91, + "learning_rate": 8.717589942547013e-06, + "loss": 0.4043, + "step": 187120 + }, + { + "epoch": 7.91, + "learning_rate": 8.675189213254469e-06, + "loss": 0.3358, + "step": 187130 + }, + { + "epoch": 7.91, + "learning_rate": 8.632788483961924e-06, + "loss": 0.3883, + "step": 187140 + }, + { + "epoch": 7.91, + "learning_rate": 8.59038775466938e-06, + "loss": 0.3171, + "step": 187150 + }, + { + "epoch": 7.91, + "learning_rate": 8.547987025376837e-06, + "loss": 0.4211, + "step": 187160 + }, + { + "epoch": 7.92, + "learning_rate": 8.505586296084294e-06, + "loss": 0.3787, + "step": 187170 + }, + { + "epoch": 7.92, + "learning_rate": 8.463185566791749e-06, + "loss": 0.3884, + "step": 187180 + }, + { + "epoch": 7.92, + "learning_rate": 8.420784837499206e-06, + "loss": 0.3941, + "step": 187190 + }, + { + "epoch": 7.92, + "learning_rate": 8.378384108206662e-06, + "loss": 0.3932, + "step": 187200 + }, + { + "epoch": 7.92, + "learning_rate": 8.335983378914119e-06, + "loss": 0.3455, + "step": 187210 + }, + { + "epoch": 7.92, + "learning_rate": 8.293582649621574e-06, + "loss": 0.4081, + "step": 187220 + }, + { + "epoch": 7.92, + "learning_rate": 8.25118192032903e-06, + "loss": 0.4124, + "step": 187230 + }, + { + "epoch": 7.92, + "learning_rate": 8.208781191036487e-06, + "loss": 0.3422, + "step": 187240 + }, + { + "epoch": 7.92, + "learning_rate": 8.166380461743944e-06, + "loss": 0.3477, + "step": 187250 + }, + { + "epoch": 7.92, + "learning_rate": 8.123979732451398e-06, + "loss": 0.2841, + "step": 187260 + }, + { + "epoch": 7.92, + "learning_rate": 8.081579003158855e-06, + "loss": 0.4003, + "step": 187270 + }, + { + "epoch": 7.92, + "learning_rate": 8.039178273866312e-06, + "loss": 0.3828, + "step": 187280 + }, + { + "epoch": 7.92, + "learning_rate": 7.996777544573767e-06, + "loss": 0.4011, + "step": 187290 + }, + { + "epoch": 7.92, + "learning_rate": 7.954376815281223e-06, + "loss": 0.4213, + "step": 187300 + }, + { + "epoch": 7.92, + "learning_rate": 7.911976085988678e-06, + "loss": 0.3612, + "step": 187310 + }, + { + "epoch": 7.92, + "learning_rate": 7.869575356696135e-06, + "loss": 0.3605, + "step": 187320 + }, + { + "epoch": 7.92, + "learning_rate": 7.827174627403591e-06, + "loss": 0.4473, + "step": 187330 + }, + { + "epoch": 7.92, + "learning_rate": 7.784773898111048e-06, + "loss": 0.384, + "step": 187340 + }, + { + "epoch": 7.92, + "learning_rate": 7.742373168818503e-06, + "loss": 0.3554, + "step": 187350 + }, + { + "epoch": 7.92, + "learning_rate": 7.69997243952596e-06, + "loss": 0.3842, + "step": 187360 + }, + { + "epoch": 7.92, + "learning_rate": 7.657571710233416e-06, + "loss": 0.434, + "step": 187370 + }, + { + "epoch": 7.92, + "learning_rate": 7.615170980940872e-06, + "loss": 0.3086, + "step": 187380 + }, + { + "epoch": 7.92, + "learning_rate": 7.572770251648329e-06, + "loss": 0.3993, + "step": 187390 + }, + { + "epoch": 7.92, + "learning_rate": 7.5303695223557845e-06, + "loss": 0.3495, + "step": 187400 + }, + { + "epoch": 7.93, + "learning_rate": 7.487968793063241e-06, + "loss": 0.4184, + "step": 187410 + }, + { + "epoch": 7.93, + "learning_rate": 7.445568063770697e-06, + "loss": 0.3354, + "step": 187420 + }, + { + "epoch": 7.93, + "learning_rate": 7.4031673344781535e-06, + "loss": 0.3789, + "step": 187430 + }, + { + "epoch": 7.93, + "learning_rate": 7.360766605185609e-06, + "loss": 0.3266, + "step": 187440 + }, + { + "epoch": 7.93, + "learning_rate": 7.318365875893066e-06, + "loss": 0.3851, + "step": 187450 + }, + { + "epoch": 7.93, + "learning_rate": 7.275965146600522e-06, + "loss": 0.3263, + "step": 187460 + }, + { + "epoch": 7.93, + "learning_rate": 7.233564417307978e-06, + "loss": 0.3465, + "step": 187470 + }, + { + "epoch": 7.93, + "learning_rate": 7.191163688015434e-06, + "loss": 0.3828, + "step": 187480 + }, + { + "epoch": 7.93, + "learning_rate": 7.148762958722891e-06, + "loss": 0.3576, + "step": 187490 + }, + { + "epoch": 7.93, + "learning_rate": 7.1063622294303465e-06, + "loss": 0.3423, + "step": 187500 + }, + { + "epoch": 7.93, + "learning_rate": 7.063961500137803e-06, + "loss": 0.3403, + "step": 187510 + }, + { + "epoch": 7.93, + "learning_rate": 7.021560770845259e-06, + "loss": 0.3606, + "step": 187520 + }, + { + "epoch": 7.93, + "learning_rate": 6.9791600415527155e-06, + "loss": 0.3039, + "step": 187530 + }, + { + "epoch": 7.93, + "learning_rate": 6.936759312260171e-06, + "loss": 0.3124, + "step": 187540 + }, + { + "epoch": 7.93, + "learning_rate": 6.894358582967628e-06, + "loss": 0.3701, + "step": 187550 + }, + { + "epoch": 7.93, + "learning_rate": 6.851957853675084e-06, + "loss": 0.3779, + "step": 187560 + }, + { + "epoch": 7.93, + "learning_rate": 6.80955712438254e-06, + "loss": 0.4105, + "step": 187570 + }, + { + "epoch": 7.93, + "learning_rate": 6.767156395089996e-06, + "loss": 0.3491, + "step": 187580 + }, + { + "epoch": 7.93, + "learning_rate": 6.724755665797453e-06, + "loss": 0.3898, + "step": 187590 + }, + { + "epoch": 7.93, + "learning_rate": 6.6823549365049085e-06, + "loss": 0.341, + "step": 187600 + }, + { + "epoch": 7.93, + "learning_rate": 6.639954207212365e-06, + "loss": 0.352, + "step": 187610 + }, + { + "epoch": 7.93, + "learning_rate": 6.597553477919821e-06, + "loss": 0.4399, + "step": 187620 + }, + { + "epoch": 7.93, + "learning_rate": 6.5551527486272775e-06, + "loss": 0.362, + "step": 187630 + }, + { + "epoch": 7.94, + "learning_rate": 6.512752019334733e-06, + "loss": 0.3749, + "step": 187640 + }, + { + "epoch": 7.94, + "learning_rate": 6.470351290042188e-06, + "loss": 0.3234, + "step": 187650 + }, + { + "epoch": 7.94, + "learning_rate": 6.427950560749645e-06, + "loss": 0.3269, + "step": 187660 + }, + { + "epoch": 7.94, + "learning_rate": 6.385549831457101e-06, + "loss": 0.392, + "step": 187670 + }, + { + "epoch": 7.94, + "learning_rate": 6.343149102164557e-06, + "loss": 0.394, + "step": 187680 + }, + { + "epoch": 7.94, + "learning_rate": 6.300748372872013e-06, + "loss": 0.3628, + "step": 187690 + }, + { + "epoch": 7.94, + "learning_rate": 6.25834764357947e-06, + "loss": 0.2963, + "step": 187700 + }, + { + "epoch": 7.94, + "learning_rate": 6.215946914286926e-06, + "loss": 0.3753, + "step": 187710 + }, + { + "epoch": 7.94, + "learning_rate": 6.173546184994383e-06, + "loss": 0.46, + "step": 187720 + }, + { + "epoch": 7.94, + "learning_rate": 6.131145455701839e-06, + "loss": 0.3302, + "step": 187730 + }, + { + "epoch": 7.94, + "learning_rate": 6.0887447264092944e-06, + "loss": 0.3223, + "step": 187740 + }, + { + "epoch": 7.94, + "learning_rate": 6.04634399711675e-06, + "loss": 0.3791, + "step": 187750 + }, + { + "epoch": 7.94, + "learning_rate": 6.003943267824207e-06, + "loss": 0.3155, + "step": 187760 + }, + { + "epoch": 7.94, + "learning_rate": 5.961542538531663e-06, + "loss": 0.4258, + "step": 187770 + }, + { + "epoch": 7.94, + "learning_rate": 5.919141809239119e-06, + "loss": 0.4531, + "step": 187780 + }, + { + "epoch": 7.94, + "learning_rate": 5.876741079946575e-06, + "loss": 0.4223, + "step": 187790 + }, + { + "epoch": 7.94, + "learning_rate": 5.834340350654032e-06, + "loss": 0.3574, + "step": 187800 + }, + { + "epoch": 7.94, + "learning_rate": 5.7919396213614874e-06, + "loss": 0.3339, + "step": 187810 + }, + { + "epoch": 7.94, + "learning_rate": 5.749538892068944e-06, + "loss": 0.3279, + "step": 187820 + }, + { + "epoch": 7.94, + "learning_rate": 5.7071381627764e-06, + "loss": 0.318, + "step": 187830 + }, + { + "epoch": 7.94, + "learning_rate": 5.6647374334838565e-06, + "loss": 0.3657, + "step": 187840 + }, + { + "epoch": 7.94, + "learning_rate": 5.622336704191312e-06, + "loss": 0.388, + "step": 187850 + }, + { + "epoch": 7.94, + "learning_rate": 5.579935974898769e-06, + "loss": 0.3398, + "step": 187860 + }, + { + "epoch": 7.94, + "learning_rate": 5.537535245606225e-06, + "loss": 0.4048, + "step": 187870 + }, + { + "epoch": 7.95, + "learning_rate": 5.495134516313681e-06, + "loss": 0.354, + "step": 187880 + }, + { + "epoch": 7.95, + "learning_rate": 5.452733787021137e-06, + "loss": 0.3165, + "step": 187890 + }, + { + "epoch": 7.95, + "learning_rate": 5.410333057728594e-06, + "loss": 0.4625, + "step": 187900 + }, + { + "epoch": 7.95, + "learning_rate": 5.367932328436049e-06, + "loss": 0.332, + "step": 187910 + }, + { + "epoch": 7.95, + "learning_rate": 5.325531599143505e-06, + "loss": 0.3661, + "step": 187920 + }, + { + "epoch": 7.95, + "learning_rate": 5.283130869850962e-06, + "loss": 0.3643, + "step": 187930 + }, + { + "epoch": 7.95, + "learning_rate": 5.240730140558418e-06, + "loss": 0.3534, + "step": 187940 + }, + { + "epoch": 7.95, + "learning_rate": 5.198329411265874e-06, + "loss": 0.3512, + "step": 187950 + }, + { + "epoch": 7.95, + "learning_rate": 5.15592868197333e-06, + "loss": 0.4055, + "step": 187960 + }, + { + "epoch": 7.95, + "learning_rate": 5.113527952680787e-06, + "loss": 0.3783, + "step": 187970 + }, + { + "epoch": 7.95, + "learning_rate": 5.071127223388242e-06, + "loss": 0.3498, + "step": 187980 + }, + { + "epoch": 7.95, + "learning_rate": 5.028726494095699e-06, + "loss": 0.3665, + "step": 187990 + }, + { + "epoch": 7.95, + "learning_rate": 4.986325764803155e-06, + "loss": 0.4156, + "step": 188000 + }, + { + "epoch": 7.95, + "learning_rate": 4.9439250355106114e-06, + "loss": 0.3749, + "step": 188010 + }, + { + "epoch": 7.95, + "learning_rate": 4.901524306218067e-06, + "loss": 0.4014, + "step": 188020 + }, + { + "epoch": 7.95, + "learning_rate": 4.859123576925524e-06, + "loss": 0.4212, + "step": 188030 + }, + { + "epoch": 7.95, + "learning_rate": 4.81672284763298e-06, + "loss": 0.3886, + "step": 188040 + }, + { + "epoch": 7.95, + "learning_rate": 4.774322118340436e-06, + "loss": 0.363, + "step": 188050 + }, + { + "epoch": 7.95, + "learning_rate": 4.731921389047892e-06, + "loss": 0.3829, + "step": 188060 + }, + { + "epoch": 7.95, + "learning_rate": 4.689520659755349e-06, + "loss": 0.4149, + "step": 188070 + }, + { + "epoch": 7.95, + "learning_rate": 4.6471199304628044e-06, + "loss": 0.4315, + "step": 188080 + }, + { + "epoch": 7.95, + "learning_rate": 4.60471920117026e-06, + "loss": 0.3514, + "step": 188090 + }, + { + "epoch": 7.95, + "learning_rate": 4.562318471877716e-06, + "loss": 0.3601, + "step": 188100 + }, + { + "epoch": 7.95, + "learning_rate": 4.519917742585173e-06, + "loss": 0.3208, + "step": 188110 + }, + { + "epoch": 7.96, + "learning_rate": 4.477517013292628e-06, + "loss": 0.3671, + "step": 188120 + }, + { + "epoch": 7.96, + "learning_rate": 4.435116284000085e-06, + "loss": 0.347, + "step": 188130 + }, + { + "epoch": 7.96, + "learning_rate": 4.392715554707541e-06, + "loss": 0.3588, + "step": 188140 + }, + { + "epoch": 7.96, + "learning_rate": 4.350314825414997e-06, + "loss": 0.3675, + "step": 188150 + }, + { + "epoch": 7.96, + "learning_rate": 4.307914096122453e-06, + "loss": 0.362, + "step": 188160 + }, + { + "epoch": 7.96, + "learning_rate": 4.26551336682991e-06, + "loss": 0.3806, + "step": 188170 + }, + { + "epoch": 7.96, + "learning_rate": 4.223112637537366e-06, + "loss": 0.399, + "step": 188180 + }, + { + "epoch": 7.96, + "learning_rate": 4.180711908244822e-06, + "loss": 0.4158, + "step": 188190 + }, + { + "epoch": 7.96, + "learning_rate": 4.138311178952278e-06, + "loss": 0.4081, + "step": 188200 + }, + { + "epoch": 7.96, + "learning_rate": 4.095910449659735e-06, + "loss": 0.4062, + "step": 188210 + }, + { + "epoch": 7.96, + "learning_rate": 4.05350972036719e-06, + "loss": 0.4085, + "step": 188220 + }, + { + "epoch": 7.96, + "learning_rate": 4.011108991074647e-06, + "loss": 0.348, + "step": 188230 + }, + { + "epoch": 7.96, + "learning_rate": 3.968708261782103e-06, + "loss": 0.3183, + "step": 188240 + }, + { + "epoch": 7.96, + "learning_rate": 3.9263075324895594e-06, + "loss": 0.3781, + "step": 188250 + }, + { + "epoch": 7.96, + "learning_rate": 3.883906803197015e-06, + "loss": 0.3701, + "step": 188260 + }, + { + "epoch": 7.96, + "learning_rate": 3.841506073904471e-06, + "loss": 0.2834, + "step": 188270 + }, + { + "epoch": 7.96, + "learning_rate": 3.799105344611927e-06, + "loss": 0.3667, + "step": 188280 + }, + { + "epoch": 7.96, + "learning_rate": 3.7567046153193834e-06, + "loss": 0.3851, + "step": 188290 + }, + { + "epoch": 7.96, + "learning_rate": 3.7143038860268396e-06, + "loss": 0.3611, + "step": 188300 + }, + { + "epoch": 7.96, + "learning_rate": 3.6719031567342958e-06, + "loss": 0.3603, + "step": 188310 + }, + { + "epoch": 7.96, + "learning_rate": 3.629502427441752e-06, + "loss": 0.3429, + "step": 188320 + }, + { + "epoch": 7.96, + "learning_rate": 3.587101698149208e-06, + "loss": 0.3994, + "step": 188330 + }, + { + "epoch": 7.96, + "learning_rate": 3.5447009688566644e-06, + "loss": 0.4298, + "step": 188340 + }, + { + "epoch": 7.97, + "learning_rate": 3.5023002395641206e-06, + "loss": 0.3748, + "step": 188350 + }, + { + "epoch": 7.97, + "learning_rate": 3.459899510271577e-06, + "loss": 0.3241, + "step": 188360 + }, + { + "epoch": 7.97, + "learning_rate": 3.417498780979033e-06, + "loss": 0.3628, + "step": 188370 + }, + { + "epoch": 7.97, + "learning_rate": 3.375098051686489e-06, + "loss": 0.3873, + "step": 188380 + }, + { + "epoch": 7.97, + "learning_rate": 3.3326973223939454e-06, + "loss": 0.3908, + "step": 188390 + }, + { + "epoch": 7.97, + "learning_rate": 3.2902965931014016e-06, + "loss": 0.3557, + "step": 188400 + }, + { + "epoch": 7.97, + "learning_rate": 3.247895863808858e-06, + "loss": 0.3286, + "step": 188410 + }, + { + "epoch": 7.97, + "learning_rate": 3.205495134516314e-06, + "loss": 0.3564, + "step": 188420 + }, + { + "epoch": 7.97, + "learning_rate": 3.16309440522377e-06, + "loss": 0.3343, + "step": 188430 + }, + { + "epoch": 7.97, + "learning_rate": 3.120693675931226e-06, + "loss": 0.4041, + "step": 188440 + }, + { + "epoch": 7.97, + "learning_rate": 3.078292946638682e-06, + "loss": 0.3524, + "step": 188450 + }, + { + "epoch": 7.97, + "learning_rate": 3.0358922173461384e-06, + "loss": 0.3915, + "step": 188460 + }, + { + "epoch": 7.97, + "learning_rate": 2.9934914880535946e-06, + "loss": 0.3694, + "step": 188470 + }, + { + "epoch": 7.97, + "learning_rate": 2.9510907587610508e-06, + "loss": 0.3221, + "step": 188480 + }, + { + "epoch": 7.97, + "learning_rate": 2.908690029468507e-06, + "loss": 0.3741, + "step": 188490 + }, + { + "epoch": 7.97, + "learning_rate": 2.866289300175963e-06, + "loss": 0.3468, + "step": 188500 + }, + { + "epoch": 7.97, + "learning_rate": 2.8238885708834194e-06, + "loss": 0.3717, + "step": 188510 + }, + { + "epoch": 7.97, + "learning_rate": 2.7814878415908756e-06, + "loss": 0.3878, + "step": 188520 + }, + { + "epoch": 7.97, + "learning_rate": 2.7390871122983318e-06, + "loss": 0.3663, + "step": 188530 + }, + { + "epoch": 7.97, + "learning_rate": 2.696686383005788e-06, + "loss": 0.3775, + "step": 188540 + }, + { + "epoch": 7.97, + "learning_rate": 2.654285653713244e-06, + "loss": 0.3357, + "step": 188550 + }, + { + "epoch": 7.97, + "learning_rate": 2.6118849244207004e-06, + "loss": 0.3769, + "step": 188560 + }, + { + "epoch": 7.97, + "learning_rate": 2.5694841951281566e-06, + "loss": 0.3984, + "step": 188570 + }, + { + "epoch": 7.97, + "learning_rate": 2.5270834658356124e-06, + "loss": 0.3586, + "step": 188580 + }, + { + "epoch": 7.98, + "learning_rate": 2.4846827365430686e-06, + "loss": 0.3351, + "step": 188590 + }, + { + "epoch": 7.98, + "learning_rate": 2.4422820072505248e-06, + "loss": 0.3412, + "step": 188600 + }, + { + "epoch": 7.98, + "learning_rate": 2.399881277957981e-06, + "loss": 0.3288, + "step": 188610 + }, + { + "epoch": 7.98, + "learning_rate": 2.357480548665437e-06, + "loss": 0.3391, + "step": 188620 + }, + { + "epoch": 7.98, + "learning_rate": 2.3150798193728934e-06, + "loss": 0.3556, + "step": 188630 + }, + { + "epoch": 7.98, + "learning_rate": 2.2726790900803496e-06, + "loss": 0.3307, + "step": 188640 + }, + { + "epoch": 7.98, + "learning_rate": 2.2302783607878058e-06, + "loss": 0.3969, + "step": 188650 + }, + { + "epoch": 7.98, + "learning_rate": 2.187877631495262e-06, + "loss": 0.3858, + "step": 188660 + }, + { + "epoch": 7.98, + "learning_rate": 2.1454769022027178e-06, + "loss": 0.386, + "step": 188670 + }, + { + "epoch": 7.98, + "learning_rate": 2.103076172910174e-06, + "loss": 0.3921, + "step": 188680 + }, + { + "epoch": 7.98, + "learning_rate": 2.06067544361763e-06, + "loss": 0.4552, + "step": 188690 + }, + { + "epoch": 7.98, + "learning_rate": 2.0182747143250864e-06, + "loss": 0.4191, + "step": 188700 + }, + { + "epoch": 7.98, + "learning_rate": 1.9758739850325426e-06, + "loss": 0.4667, + "step": 188710 + }, + { + "epoch": 7.98, + "learning_rate": 1.9334732557399988e-06, + "loss": 0.4009, + "step": 188720 + }, + { + "epoch": 7.98, + "learning_rate": 1.891072526447455e-06, + "loss": 0.3635, + "step": 188730 + }, + { + "epoch": 7.98, + "learning_rate": 1.8486717971549112e-06, + "loss": 0.3378, + "step": 188740 + }, + { + "epoch": 7.98, + "learning_rate": 1.8062710678623676e-06, + "loss": 0.4412, + "step": 188750 + }, + { + "epoch": 7.98, + "learning_rate": 1.7638703385698233e-06, + "loss": 0.3824, + "step": 188760 + }, + { + "epoch": 7.98, + "learning_rate": 1.7214696092772796e-06, + "loss": 0.4426, + "step": 188770 + }, + { + "epoch": 7.98, + "learning_rate": 1.6790688799847358e-06, + "loss": 0.3922, + "step": 188780 + }, + { + "epoch": 7.98, + "learning_rate": 1.636668150692192e-06, + "loss": 0.292, + "step": 188790 + }, + { + "epoch": 7.98, + "learning_rate": 1.5942674213996482e-06, + "loss": 0.35, + "step": 188800 + }, + { + "epoch": 7.98, + "learning_rate": 1.5518666921071044e-06, + "loss": 0.346, + "step": 188810 + }, + { + "epoch": 7.98, + "learning_rate": 1.5094659628145606e-06, + "loss": 0.3384, + "step": 188820 + }, + { + "epoch": 7.99, + "learning_rate": 1.4670652335220165e-06, + "loss": 0.4101, + "step": 188830 + }, + { + "epoch": 7.99, + "learning_rate": 1.4246645042294727e-06, + "loss": 0.4087, + "step": 188840 + }, + { + "epoch": 7.99, + "learning_rate": 1.382263774936929e-06, + "loss": 0.3239, + "step": 188850 + }, + { + "epoch": 7.99, + "learning_rate": 1.3398630456443851e-06, + "loss": 0.36, + "step": 188860 + }, + { + "epoch": 7.99, + "learning_rate": 1.2974623163518411e-06, + "loss": 0.3982, + "step": 188870 + }, + { + "epoch": 7.99, + "learning_rate": 1.2550615870592973e-06, + "loss": 0.357, + "step": 188880 + }, + { + "epoch": 7.99, + "learning_rate": 1.2126608577667535e-06, + "loss": 0.3256, + "step": 188890 + }, + { + "epoch": 7.99, + "learning_rate": 1.17026012847421e-06, + "loss": 0.3523, + "step": 188900 + }, + { + "epoch": 7.99, + "learning_rate": 1.1278593991816662e-06, + "loss": 0.3405, + "step": 188910 + }, + { + "epoch": 7.99, + "learning_rate": 1.0854586698891221e-06, + "loss": 0.3408, + "step": 188920 + }, + { + "epoch": 7.99, + "learning_rate": 1.0430579405965783e-06, + "loss": 0.3336, + "step": 188930 + }, + { + "epoch": 7.99, + "learning_rate": 1.0006572113040345e-06, + "loss": 0.3523, + "step": 188940 + }, + { + "epoch": 7.99, + "learning_rate": 9.582564820114907e-07, + "loss": 0.4189, + "step": 188950 + }, + { + "epoch": 7.99, + "learning_rate": 9.158557527189467e-07, + "loss": 0.3673, + "step": 188960 + }, + { + "epoch": 7.99, + "learning_rate": 8.734550234264029e-07, + "loss": 0.3636, + "step": 188970 + }, + { + "epoch": 7.99, + "learning_rate": 8.310542941338591e-07, + "loss": 0.3977, + "step": 188980 + }, + { + "epoch": 7.99, + "learning_rate": 7.886535648413153e-07, + "loss": 0.3785, + "step": 188990 + }, + { + "epoch": 7.99, + "learning_rate": 7.462528355487714e-07, + "loss": 0.4021, + "step": 189000 + } + ], + "max_steps": 189176, + "num_train_epochs": 8, + "total_flos": 9.214706529557545e+17, + "trial_name": null, + "trial_params": null +}