{ "best_metric": null, "best_model_checkpoint": null, "epoch": 39.993726474278546, "global_step": 510000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 0.0005, "loss": 2.9557, "step": 100 }, { "epoch": 0.02, "learning_rate": 0.0005, "loss": 2.8658, "step": 200 }, { "epoch": 0.02, "learning_rate": 0.0005, "loss": 2.8174, "step": 300 }, { "epoch": 0.03, "learning_rate": 0.0005, "loss": 2.7738, "step": 400 }, { "epoch": 0.04, "learning_rate": 0.0005, "loss": 2.8055, "step": 500 }, { "epoch": 0.05, "learning_rate": 0.0005, "loss": 2.7804, "step": 600 }, { "epoch": 0.05, "learning_rate": 0.0005, "loss": 2.7898, "step": 700 }, { "epoch": 0.06, "learning_rate": 0.0005, "loss": 2.7531, "step": 800 }, { "epoch": 0.07, "learning_rate": 0.0005, "loss": 2.7081, "step": 900 }, { "epoch": 0.08, "learning_rate": 0.0005, "loss": 2.7108, "step": 1000 }, { "epoch": 0.09, "learning_rate": 0.0005, "loss": 2.7372, "step": 1100 }, { "epoch": 0.09, "learning_rate": 0.0005, "loss": 2.7245, "step": 1200 }, { "epoch": 0.1, "learning_rate": 0.0005, "loss": 2.718, "step": 1300 }, { "epoch": 0.11, "learning_rate": 0.0005, "loss": 2.71, "step": 1400 }, { "epoch": 0.12, "learning_rate": 0.0005, "loss": 2.6816, "step": 1500 }, { "epoch": 0.13, "learning_rate": 0.0005, "loss": 2.7044, "step": 1600 }, { "epoch": 0.13, "learning_rate": 0.0005, "loss": 2.6867, "step": 1700 }, { "epoch": 0.14, "learning_rate": 0.0005, "loss": 2.7035, "step": 1800 }, { "epoch": 0.15, "learning_rate": 0.0005, "loss": 2.6853, "step": 1900 }, { "epoch": 0.16, "learning_rate": 0.0005, "loss": 2.6772, "step": 2000 }, { "epoch": 0.16, "learning_rate": 0.0005, "loss": 2.6647, "step": 2100 }, { "epoch": 0.17, "learning_rate": 0.0005, "loss": 2.6923, "step": 2200 }, { "epoch": 0.18, "learning_rate": 0.0005, "loss": 2.6514, "step": 2300 }, { "epoch": 0.19, "learning_rate": 0.0005, "loss": 2.6542, "step": 2400 }, { "epoch": 0.2, "learning_rate": 0.0005, "loss": 2.6391, "step": 2500 }, { "epoch": 0.2, "learning_rate": 0.0005, "loss": 2.6527, "step": 2600 }, { "epoch": 0.21, "learning_rate": 0.0005, "loss": 2.6658, "step": 2700 }, { "epoch": 0.22, "learning_rate": 0.0005, "loss": 2.6537, "step": 2800 }, { "epoch": 0.23, "learning_rate": 0.0005, "loss": 2.67, "step": 2900 }, { "epoch": 0.24, "learning_rate": 0.0005, "loss": 2.6605, "step": 3000 }, { "epoch": 0.24, "learning_rate": 0.0005, "loss": 2.6516, "step": 3100 }, { "epoch": 0.25, "learning_rate": 0.0005, "loss": 2.6358, "step": 3200 }, { "epoch": 0.26, "learning_rate": 0.0005, "loss": 2.6302, "step": 3300 }, { "epoch": 0.27, "learning_rate": 0.0005, "loss": 2.6007, "step": 3400 }, { "epoch": 0.27, "learning_rate": 0.0005, "loss": 2.6063, "step": 3500 }, { "epoch": 0.28, "learning_rate": 0.0005, "loss": 2.6016, "step": 3600 }, { "epoch": 0.29, "learning_rate": 0.0005, "loss": 2.6013, "step": 3700 }, { "epoch": 0.3, "learning_rate": 0.0005, "loss": 2.5946, "step": 3800 }, { "epoch": 0.31, "learning_rate": 0.0005, "loss": 2.6054, "step": 3900 }, { "epoch": 0.31, "learning_rate": 0.0005, "loss": 2.6073, "step": 4000 }, { "epoch": 0.32, "learning_rate": 0.0005, "loss": 2.6101, "step": 4100 }, { "epoch": 0.33, "learning_rate": 0.0005, "loss": 2.6027, "step": 4200 }, { "epoch": 0.34, "learning_rate": 0.0005, "loss": 2.5753, "step": 4300 }, { "epoch": 0.35, "learning_rate": 0.0005, "loss": 2.5974, "step": 4400 }, { "epoch": 0.35, "learning_rate": 0.0005, "loss": 2.6356, "step": 4500 }, { "epoch": 0.36, "learning_rate": 0.0005, "loss": 2.5751, "step": 4600 }, { "epoch": 0.37, "learning_rate": 0.0005, "loss": 2.5813, "step": 4700 }, { "epoch": 0.38, "learning_rate": 0.0005, "loss": 2.5925, "step": 4800 }, { "epoch": 0.38, "learning_rate": 0.0005, "loss": 2.5726, "step": 4900 }, { "epoch": 0.39, "learning_rate": 0.0005, "loss": 2.593, "step": 5000 }, { "epoch": 0.4, "learning_rate": 0.0005, "loss": 2.5702, "step": 5100 }, { "epoch": 0.41, "learning_rate": 0.0005, "loss": 2.5746, "step": 5200 }, { "epoch": 0.42, "learning_rate": 0.0005, "loss": 2.5799, "step": 5300 }, { "epoch": 0.42, "learning_rate": 0.0005, "loss": 2.5502, "step": 5400 }, { "epoch": 0.43, "learning_rate": 0.0005, "loss": 2.5812, "step": 5500 }, { "epoch": 0.44, "learning_rate": 0.0005, "loss": 2.5866, "step": 5600 }, { "epoch": 0.45, "learning_rate": 0.0005, "loss": 2.5586, "step": 5700 }, { "epoch": 0.45, "learning_rate": 0.0005, "loss": 2.568, "step": 5800 }, { "epoch": 0.46, "learning_rate": 0.0005, "loss": 2.5671, "step": 5900 }, { "epoch": 0.47, "learning_rate": 0.0005, "loss": 2.5502, "step": 6000 }, { "epoch": 0.48, "learning_rate": 0.0005, "loss": 2.5526, "step": 6100 }, { "epoch": 0.49, "learning_rate": 0.0005, "loss": 2.5421, "step": 6200 }, { "epoch": 0.49, "learning_rate": 0.0005, "loss": 2.5462, "step": 6300 }, { "epoch": 0.5, "learning_rate": 0.0005, "loss": 2.5394, "step": 6400 }, { "epoch": 0.51, "learning_rate": 0.0005, "loss": 2.5563, "step": 6500 }, { "epoch": 0.52, "learning_rate": 0.0005, "loss": 2.5762, "step": 6600 }, { "epoch": 0.53, "learning_rate": 0.0005, "loss": 2.5232, "step": 6700 }, { "epoch": 0.53, "learning_rate": 0.0005, "loss": 2.5681, "step": 6800 }, { "epoch": 0.54, "learning_rate": 0.0005, "loss": 2.565, "step": 6900 }, { "epoch": 0.55, "learning_rate": 0.0005, "loss": 2.516, "step": 7000 }, { "epoch": 0.56, "learning_rate": 0.0005, "loss": 2.5506, "step": 7100 }, { "epoch": 0.56, "learning_rate": 0.0005, "loss": 2.5251, "step": 7200 }, { "epoch": 0.57, "learning_rate": 0.0005, "loss": 2.4942, "step": 7300 }, { "epoch": 0.58, "learning_rate": 0.0005, "loss": 2.5034, "step": 7400 }, { "epoch": 0.59, "learning_rate": 0.0005, "loss": 2.5232, "step": 7500 }, { "epoch": 0.6, "learning_rate": 0.0005, "loss": 2.523, "step": 7600 }, { "epoch": 0.6, "learning_rate": 0.0005, "loss": 2.5113, "step": 7700 }, { "epoch": 0.61, "learning_rate": 0.0005, "loss": 2.5103, "step": 7800 }, { "epoch": 0.62, "learning_rate": 0.0005, "loss": 2.512, "step": 7900 }, { "epoch": 0.63, "learning_rate": 0.0005, "loss": 2.5053, "step": 8000 }, { "epoch": 0.64, "learning_rate": 0.0005, "loss": 2.5327, "step": 8100 }, { "epoch": 0.64, "learning_rate": 0.0005, "loss": 2.5199, "step": 8200 }, { "epoch": 0.65, "learning_rate": 0.0005, "loss": 2.5373, "step": 8300 }, { "epoch": 0.66, "learning_rate": 0.0005, "loss": 2.484, "step": 8400 }, { "epoch": 0.67, "learning_rate": 0.0005, "loss": 2.5555, "step": 8500 }, { "epoch": 0.67, "learning_rate": 0.0005, "loss": 2.4938, "step": 8600 }, { "epoch": 0.68, "learning_rate": 0.0005, "loss": 2.5342, "step": 8700 }, { "epoch": 0.69, "learning_rate": 0.0005, "loss": 2.4963, "step": 8800 }, { "epoch": 0.7, "learning_rate": 0.0005, "loss": 2.5022, "step": 8900 }, { "epoch": 0.71, "learning_rate": 0.0005, "loss": 2.5201, "step": 9000 }, { "epoch": 0.71, "learning_rate": 0.0005, "loss": 2.4868, "step": 9100 }, { "epoch": 0.72, "learning_rate": 0.0005, "loss": 2.4804, "step": 9200 }, { "epoch": 0.73, "learning_rate": 0.0005, "loss": 2.4936, "step": 9300 }, { "epoch": 0.74, "learning_rate": 0.0005, "loss": 2.5081, "step": 9400 }, { "epoch": 0.74, "learning_rate": 0.0005, "loss": 2.4851, "step": 9500 }, { "epoch": 0.75, "learning_rate": 0.0005, "loss": 2.498, "step": 9600 }, { "epoch": 0.76, "learning_rate": 0.0005, "loss": 2.4868, "step": 9700 }, { "epoch": 0.77, "learning_rate": 0.0005, "loss": 2.5126, "step": 9800 }, { "epoch": 0.78, "learning_rate": 0.0005, "loss": 2.5031, "step": 9900 }, { "epoch": 0.78, "learning_rate": 0.0005, "loss": 2.4989, "step": 10000 }, { "epoch": 0.78, "eval_gen_len": 18.83164121126512, "eval_loss": 2.2748022079467773, "eval_rouge1": 31.1136, "eval_rouge2": 10.0478, "eval_rougeL": 24.9492, "eval_rougeLsum": 24.9433, "eval_runtime": 361.4284, "eval_samples_per_second": 31.34, "eval_steps_per_second": 1.959, "step": 10000 }, { "epoch": 0.79, "learning_rate": 0.0005, "loss": 2.4972, "step": 10100 }, { "epoch": 0.8, "learning_rate": 0.0005, "loss": 2.4781, "step": 10200 }, { "epoch": 0.81, "learning_rate": 0.0005, "loss": 2.4716, "step": 10300 }, { "epoch": 0.82, "learning_rate": 0.0005, "loss": 2.47, "step": 10400 }, { "epoch": 0.82, "learning_rate": 0.0005, "loss": 2.5001, "step": 10500 }, { "epoch": 0.83, "learning_rate": 0.0005, "loss": 2.5084, "step": 10600 }, { "epoch": 0.84, "learning_rate": 0.0005, "loss": 2.4845, "step": 10700 }, { "epoch": 0.85, "learning_rate": 0.0005, "loss": 2.469, "step": 10800 }, { "epoch": 0.85, "learning_rate": 0.0005, "loss": 2.4909, "step": 10900 }, { "epoch": 0.86, "learning_rate": 0.0005, "loss": 2.4785, "step": 11000 }, { "epoch": 0.87, "learning_rate": 0.0005, "loss": 2.4664, "step": 11100 }, { "epoch": 0.88, "learning_rate": 0.0005, "loss": 2.4913, "step": 11200 }, { "epoch": 0.89, "learning_rate": 0.0005, "loss": 2.4588, "step": 11300 }, { "epoch": 0.89, "learning_rate": 0.0005, "loss": 2.4712, "step": 11400 }, { "epoch": 0.9, "learning_rate": 0.0005, "loss": 2.4857, "step": 11500 }, { "epoch": 0.91, "learning_rate": 0.0005, "loss": 2.4542, "step": 11600 }, { "epoch": 0.92, "learning_rate": 0.0005, "loss": 2.4724, "step": 11700 }, { "epoch": 0.93, "learning_rate": 0.0005, "loss": 2.48, "step": 11800 }, { "epoch": 0.93, "learning_rate": 0.0005, "loss": 2.4975, "step": 11900 }, { "epoch": 0.94, "learning_rate": 0.0005, "loss": 2.4908, "step": 12000 }, { "epoch": 0.95, "learning_rate": 0.0005, "loss": 2.4845, "step": 12100 }, { "epoch": 0.96, "learning_rate": 0.0005, "loss": 2.474, "step": 12200 }, { "epoch": 0.96, "learning_rate": 0.0005, "loss": 2.4674, "step": 12300 }, { "epoch": 0.97, "learning_rate": 0.0005, "loss": 2.4571, "step": 12400 }, { "epoch": 0.98, "learning_rate": 0.0005, "loss": 2.4618, "step": 12500 }, { "epoch": 0.99, "learning_rate": 0.0005, "loss": 2.5003, "step": 12600 }, { "epoch": 1.0, "learning_rate": 0.0005, "loss": 2.4793, "step": 12700 }, { "epoch": 1.0, "learning_rate": 0.0005, "loss": 2.4271, "step": 12800 }, { "epoch": 1.01, "learning_rate": 0.0005, "loss": 2.372, "step": 12900 }, { "epoch": 1.02, "learning_rate": 0.0005, "loss": 2.3714, "step": 13000 }, { "epoch": 1.03, "learning_rate": 0.0005, "loss": 2.3498, "step": 13100 }, { "epoch": 1.04, "learning_rate": 0.0005, "loss": 2.3581, "step": 13200 }, { "epoch": 1.04, "learning_rate": 0.0005, "loss": 2.3774, "step": 13300 }, { "epoch": 1.05, "learning_rate": 0.0005, "loss": 2.3658, "step": 13400 }, { "epoch": 1.06, "learning_rate": 0.0005, "loss": 2.4074, "step": 13500 }, { "epoch": 1.07, "learning_rate": 0.0005, "loss": 2.4005, "step": 13600 }, { "epoch": 1.07, "learning_rate": 0.0005, "loss": 2.3915, "step": 13700 }, { "epoch": 1.08, "learning_rate": 0.0005, "loss": 2.3894, "step": 13800 }, { "epoch": 1.09, "learning_rate": 0.0005, "loss": 2.3981, "step": 13900 }, { "epoch": 1.1, "learning_rate": 0.0005, "loss": 2.3875, "step": 14000 }, { "epoch": 1.11, "learning_rate": 0.0005, "loss": 2.3823, "step": 14100 }, { "epoch": 1.11, "learning_rate": 0.0005, "loss": 2.3678, "step": 14200 }, { "epoch": 1.12, "learning_rate": 0.0005, "loss": 2.3891, "step": 14300 }, { "epoch": 1.13, "learning_rate": 0.0005, "loss": 2.3688, "step": 14400 }, { "epoch": 1.14, "learning_rate": 0.0005, "loss": 2.3524, "step": 14500 }, { "epoch": 1.14, "learning_rate": 0.0005, "loss": 2.3719, "step": 14600 }, { "epoch": 1.15, "learning_rate": 0.0005, "loss": 2.3646, "step": 14700 }, { "epoch": 1.16, "learning_rate": 0.0005, "loss": 2.3875, "step": 14800 }, { "epoch": 1.17, "learning_rate": 0.0005, "loss": 2.3717, "step": 14900 }, { "epoch": 1.18, "learning_rate": 0.0005, "loss": 2.3585, "step": 15000 }, { "epoch": 1.18, "learning_rate": 0.0005, "loss": 2.348, "step": 15100 }, { "epoch": 1.19, "learning_rate": 0.0005, "loss": 2.3713, "step": 15200 }, { "epoch": 1.2, "learning_rate": 0.0005, "loss": 2.392, "step": 15300 }, { "epoch": 1.21, "learning_rate": 0.0005, "loss": 2.3611, "step": 15400 }, { "epoch": 1.22, "learning_rate": 0.0005, "loss": 2.3781, "step": 15500 }, { "epoch": 1.22, "learning_rate": 0.0005, "loss": 2.3851, "step": 15600 }, { "epoch": 1.23, "learning_rate": 0.0005, "loss": 2.3868, "step": 15700 }, { "epoch": 1.24, "learning_rate": 0.0005, "loss": 2.3603, "step": 15800 }, { "epoch": 1.25, "learning_rate": 0.0005, "loss": 2.3947, "step": 15900 }, { "epoch": 1.25, "learning_rate": 0.0005, "loss": 2.3965, "step": 16000 }, { "epoch": 1.26, "learning_rate": 0.0005, "loss": 2.3836, "step": 16100 }, { "epoch": 1.27, "learning_rate": 0.0005, "loss": 2.3423, "step": 16200 }, { "epoch": 1.28, "learning_rate": 0.0005, "loss": 2.3669, "step": 16300 }, { "epoch": 1.29, "learning_rate": 0.0005, "loss": 2.3748, "step": 16400 }, { "epoch": 1.29, "learning_rate": 0.0005, "loss": 2.3593, "step": 16500 }, { "epoch": 1.3, "learning_rate": 0.0005, "loss": 2.3559, "step": 16600 }, { "epoch": 1.31, "learning_rate": 0.0005, "loss": 2.3652, "step": 16700 }, { "epoch": 1.32, "learning_rate": 0.0005, "loss": 2.3841, "step": 16800 }, { "epoch": 1.33, "learning_rate": 0.0005, "loss": 2.3874, "step": 16900 }, { "epoch": 1.33, "learning_rate": 0.0005, "loss": 2.3607, "step": 17000 }, { "epoch": 1.34, "learning_rate": 0.0005, "loss": 2.3849, "step": 17100 }, { "epoch": 1.35, "learning_rate": 0.0005, "loss": 2.3809, "step": 17200 }, { "epoch": 1.36, "learning_rate": 0.0005, "loss": 2.359, "step": 17300 }, { "epoch": 1.36, "learning_rate": 0.0005, "loss": 2.3618, "step": 17400 }, { "epoch": 1.37, "learning_rate": 0.0005, "loss": 2.3758, "step": 17500 }, { "epoch": 1.38, "learning_rate": 0.0005, "loss": 2.3538, "step": 17600 }, { "epoch": 1.39, "learning_rate": 0.0005, "loss": 2.3733, "step": 17700 }, { "epoch": 1.4, "learning_rate": 0.0005, "loss": 2.3735, "step": 17800 }, { "epoch": 1.4, "learning_rate": 0.0005, "loss": 2.3859, "step": 17900 }, { "epoch": 1.41, "learning_rate": 0.0005, "loss": 2.3508, "step": 18000 }, { "epoch": 1.42, "learning_rate": 0.0005, "loss": 2.3859, "step": 18100 }, { "epoch": 1.43, "learning_rate": 0.0005, "loss": 2.3567, "step": 18200 }, { "epoch": 1.44, "learning_rate": 0.0005, "loss": 2.3292, "step": 18300 }, { "epoch": 1.44, "learning_rate": 0.0005, "loss": 2.3882, "step": 18400 }, { "epoch": 1.45, "learning_rate": 0.0005, "loss": 2.3592, "step": 18500 }, { "epoch": 1.46, "learning_rate": 0.0005, "loss": 2.3594, "step": 18600 }, { "epoch": 1.47, "learning_rate": 0.0005, "loss": 2.359, "step": 18700 }, { "epoch": 1.47, "learning_rate": 0.0005, "loss": 2.3549, "step": 18800 }, { "epoch": 1.48, "learning_rate": 0.0005, "loss": 2.3962, "step": 18900 }, { "epoch": 1.49, "learning_rate": 0.0005, "loss": 2.3413, "step": 19000 }, { "epoch": 1.5, "learning_rate": 0.0005, "loss": 2.3636, "step": 19100 }, { "epoch": 1.51, "learning_rate": 0.0005, "loss": 2.3381, "step": 19200 }, { "epoch": 1.51, "learning_rate": 0.0005, "loss": 2.363, "step": 19300 }, { "epoch": 1.52, "learning_rate": 0.0005, "loss": 2.3687, "step": 19400 }, { "epoch": 1.53, "learning_rate": 0.0005, "loss": 2.3359, "step": 19500 }, { "epoch": 1.54, "learning_rate": 0.0005, "loss": 2.361, "step": 19600 }, { "epoch": 1.54, "learning_rate": 0.0005, "loss": 2.3808, "step": 19700 }, { "epoch": 1.55, "learning_rate": 0.0005, "loss": 2.347, "step": 19800 }, { "epoch": 1.56, "learning_rate": 0.0005, "loss": 2.3695, "step": 19900 }, { "epoch": 1.57, "learning_rate": 0.0005, "loss": 2.3456, "step": 20000 }, { "epoch": 1.57, "eval_gen_len": 18.73682351902534, "eval_loss": 2.202566385269165, "eval_rouge1": 31.9461, "eval_rouge2": 10.8723, "eval_rougeL": 25.7822, "eval_rougeLsum": 25.7746, "eval_runtime": 358.9411, "eval_samples_per_second": 31.557, "eval_steps_per_second": 1.972, "step": 20000 }, { "epoch": 1.58, "learning_rate": 0.0005, "loss": 2.3445, "step": 20100 }, { "epoch": 1.58, "learning_rate": 0.0005, "loss": 2.36, "step": 20200 }, { "epoch": 1.59, "learning_rate": 0.0005, "loss": 2.3749, "step": 20300 }, { "epoch": 1.6, "learning_rate": 0.0005, "loss": 2.3606, "step": 20400 }, { "epoch": 1.61, "learning_rate": 0.0005, "loss": 2.3375, "step": 20500 }, { "epoch": 1.62, "learning_rate": 0.0005, "loss": 2.3448, "step": 20600 }, { "epoch": 1.62, "learning_rate": 0.0005, "loss": 2.3698, "step": 20700 }, { "epoch": 1.63, "learning_rate": 0.0005, "loss": 2.3403, "step": 20800 }, { "epoch": 1.64, "learning_rate": 0.0005, "loss": 2.354, "step": 20900 }, { "epoch": 1.65, "learning_rate": 0.0005, "loss": 2.3578, "step": 21000 }, { "epoch": 1.65, "learning_rate": 0.0005, "loss": 2.3598, "step": 21100 }, { "epoch": 1.66, "learning_rate": 0.0005, "loss": 2.3262, "step": 21200 }, { "epoch": 1.67, "learning_rate": 0.0005, "loss": 2.3667, "step": 21300 }, { "epoch": 1.68, "learning_rate": 0.0005, "loss": 2.3801, "step": 21400 }, { "epoch": 1.69, "learning_rate": 0.0005, "loss": 2.3638, "step": 21500 }, { "epoch": 1.69, "learning_rate": 0.0005, "loss": 2.3559, "step": 21600 }, { "epoch": 1.7, "learning_rate": 0.0005, "loss": 2.3595, "step": 21700 }, { "epoch": 1.71, "learning_rate": 0.0005, "loss": 2.3472, "step": 21800 }, { "epoch": 1.72, "learning_rate": 0.0005, "loss": 2.3415, "step": 21900 }, { "epoch": 1.73, "learning_rate": 0.0005, "loss": 2.3427, "step": 22000 }, { "epoch": 1.73, "learning_rate": 0.0005, "loss": 2.3306, "step": 22100 }, { "epoch": 1.74, "learning_rate": 0.0005, "loss": 2.3147, "step": 22200 }, { "epoch": 1.75, "learning_rate": 0.0005, "loss": 2.3402, "step": 22300 }, { "epoch": 1.76, "learning_rate": 0.0005, "loss": 2.3294, "step": 22400 }, { "epoch": 1.76, "learning_rate": 0.0005, "loss": 2.383, "step": 22500 }, { "epoch": 1.77, "learning_rate": 0.0005, "loss": 2.3562, "step": 22600 }, { "epoch": 1.78, "learning_rate": 0.0005, "loss": 2.3583, "step": 22700 }, { "epoch": 1.79, "learning_rate": 0.0005, "loss": 2.353, "step": 22800 }, { "epoch": 1.8, "learning_rate": 0.0005, "loss": 2.3428, "step": 22900 }, { "epoch": 1.8, "learning_rate": 0.0005, "loss": 2.3461, "step": 23000 }, { "epoch": 1.81, "learning_rate": 0.0005, "loss": 2.3351, "step": 23100 }, { "epoch": 1.82, "learning_rate": 0.0005, "loss": 2.3735, "step": 23200 }, { "epoch": 1.83, "learning_rate": 0.0005, "loss": 2.3722, "step": 23300 }, { "epoch": 1.84, "learning_rate": 0.0005, "loss": 2.3445, "step": 23400 }, { "epoch": 1.84, "learning_rate": 0.0005, "loss": 2.2993, "step": 23500 }, { "epoch": 1.85, "learning_rate": 0.0005, "loss": 2.3267, "step": 23600 }, { "epoch": 1.86, "learning_rate": 0.0005, "loss": 2.3075, "step": 23700 }, { "epoch": 1.87, "learning_rate": 0.0005, "loss": 2.3993, "step": 23800 }, { "epoch": 1.87, "learning_rate": 0.0005, "loss": 2.3256, "step": 23900 }, { "epoch": 1.88, "learning_rate": 0.0005, "loss": 2.3395, "step": 24000 }, { "epoch": 1.89, "learning_rate": 0.0005, "loss": 2.3395, "step": 24100 }, { "epoch": 1.9, "learning_rate": 0.0005, "loss": 2.3619, "step": 24200 }, { "epoch": 1.91, "learning_rate": 0.0005, "loss": 2.3502, "step": 24300 }, { "epoch": 1.91, "learning_rate": 0.0005, "loss": 2.3278, "step": 24400 }, { "epoch": 1.92, "learning_rate": 0.0005, "loss": 2.3603, "step": 24500 }, { "epoch": 1.93, "learning_rate": 0.0005, "loss": 2.3323, "step": 24600 }, { "epoch": 1.94, "learning_rate": 0.0005, "loss": 2.374, "step": 24700 }, { "epoch": 1.94, "learning_rate": 0.0005, "loss": 2.3298, "step": 24800 }, { "epoch": 1.95, "learning_rate": 0.0005, "loss": 2.3274, "step": 24900 }, { "epoch": 1.96, "learning_rate": 0.0005, "loss": 2.3451, "step": 25000 }, { "epoch": 1.97, "learning_rate": 0.0005, "loss": 2.2976, "step": 25100 }, { "epoch": 1.98, "learning_rate": 0.0005, "loss": 2.3222, "step": 25200 }, { "epoch": 1.98, "learning_rate": 0.0005, "loss": 2.3013, "step": 25300 }, { "epoch": 1.99, "learning_rate": 0.0005, "loss": 2.3352, "step": 25400 }, { "epoch": 2.0, "learning_rate": 0.0005, "loss": 2.3124, "step": 25500 }, { "epoch": 2.01, "learning_rate": 0.0005, "loss": 2.2476, "step": 25600 }, { "epoch": 2.02, "learning_rate": 0.0005, "loss": 2.2515, "step": 25700 }, { "epoch": 2.02, "learning_rate": 0.0005, "loss": 2.2521, "step": 25800 }, { "epoch": 2.03, "learning_rate": 0.0005, "loss": 2.2177, "step": 25900 }, { "epoch": 2.04, "learning_rate": 0.0005, "loss": 2.2554, "step": 26000 }, { "epoch": 2.05, "learning_rate": 0.0005, "loss": 2.2345, "step": 26100 }, { "epoch": 2.05, "learning_rate": 0.0005, "loss": 2.2548, "step": 26200 }, { "epoch": 2.06, "learning_rate": 0.0005, "loss": 2.2591, "step": 26300 }, { "epoch": 2.07, "learning_rate": 0.0005, "loss": 2.2414, "step": 26400 }, { "epoch": 2.08, "learning_rate": 0.0005, "loss": 2.2562, "step": 26500 }, { "epoch": 2.09, "learning_rate": 0.0005, "loss": 2.2722, "step": 26600 }, { "epoch": 2.09, "learning_rate": 0.0005, "loss": 2.2945, "step": 26700 }, { "epoch": 2.1, "learning_rate": 0.0005, "loss": 2.2473, "step": 26800 }, { "epoch": 2.11, "learning_rate": 0.0005, "loss": 2.2406, "step": 26900 }, { "epoch": 2.12, "learning_rate": 0.0005, "loss": 2.2683, "step": 27000 }, { "epoch": 2.13, "learning_rate": 0.0005, "loss": 2.2277, "step": 27100 }, { "epoch": 2.13, "learning_rate": 0.0005, "loss": 2.2773, "step": 27200 }, { "epoch": 2.14, "learning_rate": 0.0005, "loss": 2.2379, "step": 27300 }, { "epoch": 2.15, "learning_rate": 0.0005, "loss": 2.2632, "step": 27400 }, { "epoch": 2.16, "learning_rate": 0.0005, "loss": 2.2494, "step": 27500 }, { "epoch": 2.16, "learning_rate": 0.0005, "loss": 2.2149, "step": 27600 }, { "epoch": 2.17, "learning_rate": 0.0005, "loss": 2.2357, "step": 27700 }, { "epoch": 2.18, "learning_rate": 0.0005, "loss": 2.2573, "step": 27800 }, { "epoch": 2.19, "learning_rate": 0.0005, "loss": 2.2577, "step": 27900 }, { "epoch": 2.2, "learning_rate": 0.0005, "loss": 2.2526, "step": 28000 }, { "epoch": 2.2, "learning_rate": 0.0005, "loss": 2.2573, "step": 28100 }, { "epoch": 2.21, "learning_rate": 0.0005, "loss": 2.2741, "step": 28200 }, { "epoch": 2.22, "learning_rate": 0.0005, "loss": 2.2442, "step": 28300 }, { "epoch": 2.23, "learning_rate": 0.0005, "loss": 2.2483, "step": 28400 }, { "epoch": 2.23, "learning_rate": 0.0005, "loss": 2.2582, "step": 28500 }, { "epoch": 2.24, "learning_rate": 0.0005, "loss": 2.275, "step": 28600 }, { "epoch": 2.25, "learning_rate": 0.0005, "loss": 2.2972, "step": 28700 }, { "epoch": 2.26, "learning_rate": 0.0005, "loss": 2.2356, "step": 28800 }, { "epoch": 2.27, "learning_rate": 0.0005, "loss": 2.2517, "step": 28900 }, { "epoch": 2.27, "learning_rate": 0.0005, "loss": 2.3137, "step": 29000 }, { "epoch": 2.28, "learning_rate": 0.0005, "loss": 2.2788, "step": 29100 }, { "epoch": 2.29, "learning_rate": 0.0005, "loss": 2.2562, "step": 29200 }, { "epoch": 2.3, "learning_rate": 0.0005, "loss": 2.2825, "step": 29300 }, { "epoch": 2.31, "learning_rate": 0.0005, "loss": 2.2082, "step": 29400 }, { "epoch": 2.31, "learning_rate": 0.0005, "loss": 2.2491, "step": 29500 }, { "epoch": 2.32, "learning_rate": 0.0005, "loss": 2.2299, "step": 29600 }, { "epoch": 2.33, "learning_rate": 0.0005, "loss": 2.2678, "step": 29700 }, { "epoch": 2.34, "learning_rate": 0.0005, "loss": 2.2961, "step": 29800 }, { "epoch": 2.34, "learning_rate": 0.0005, "loss": 2.2751, "step": 29900 }, { "epoch": 2.35, "learning_rate": 0.0005, "loss": 2.2444, "step": 30000 }, { "epoch": 2.35, "eval_gen_len": 18.769223978105412, "eval_loss": 2.1571497917175293, "eval_rouge1": 32.5123, "eval_rouge2": 11.4387, "eval_rougeL": 26.3083, "eval_rougeLsum": 26.2953, "eval_runtime": 360.0544, "eval_samples_per_second": 31.459, "eval_steps_per_second": 1.966, "step": 30000 }, { "epoch": 2.36, "learning_rate": 0.0005, "loss": 2.2231, "step": 30100 }, { "epoch": 2.37, "learning_rate": 0.0005, "loss": 2.2138, "step": 30200 }, { "epoch": 2.38, "learning_rate": 0.0005, "loss": 2.2555, "step": 30300 }, { "epoch": 2.38, "learning_rate": 0.0005, "loss": 2.255, "step": 30400 }, { "epoch": 2.39, "learning_rate": 0.0005, "loss": 2.2482, "step": 30500 }, { "epoch": 2.4, "learning_rate": 0.0005, "loss": 2.2546, "step": 30600 }, { "epoch": 2.41, "learning_rate": 0.0005, "loss": 2.2404, "step": 30700 }, { "epoch": 2.42, "learning_rate": 0.0005, "loss": 2.2568, "step": 30800 }, { "epoch": 2.42, "learning_rate": 0.0005, "loss": 2.2331, "step": 30900 }, { "epoch": 2.43, "learning_rate": 0.0005, "loss": 2.2175, "step": 31000 }, { "epoch": 2.44, "learning_rate": 0.0005, "loss": 2.2343, "step": 31100 }, { "epoch": 2.45, "learning_rate": 0.0005, "loss": 2.2838, "step": 31200 }, { "epoch": 2.45, "learning_rate": 0.0005, "loss": 2.2531, "step": 31300 }, { "epoch": 2.46, "learning_rate": 0.0005, "loss": 2.2971, "step": 31400 }, { "epoch": 2.47, "learning_rate": 0.0005, "loss": 2.2527, "step": 31500 }, { "epoch": 2.48, "learning_rate": 0.0005, "loss": 2.2508, "step": 31600 }, { "epoch": 2.49, "learning_rate": 0.0005, "loss": 2.2392, "step": 31700 }, { "epoch": 2.49, "learning_rate": 0.0005, "loss": 2.2263, "step": 31800 }, { "epoch": 2.5, "learning_rate": 0.0005, "loss": 2.2048, "step": 31900 }, { "epoch": 2.51, "learning_rate": 0.0005, "loss": 2.2687, "step": 32000 }, { "epoch": 2.52, "learning_rate": 0.0005, "loss": 2.2202, "step": 32100 }, { "epoch": 2.53, "learning_rate": 0.0005, "loss": 2.2439, "step": 32200 }, { "epoch": 2.53, "learning_rate": 0.0005, "loss": 2.2705, "step": 32300 }, { "epoch": 2.54, "learning_rate": 0.0005, "loss": 2.2384, "step": 32400 }, { "epoch": 2.55, "learning_rate": 0.0005, "loss": 2.2517, "step": 32500 }, { "epoch": 2.56, "learning_rate": 0.0005, "loss": 2.2336, "step": 32600 }, { "epoch": 2.56, "learning_rate": 0.0005, "loss": 2.2587, "step": 32700 }, { "epoch": 2.57, "learning_rate": 0.0005, "loss": 2.2716, "step": 32800 }, { "epoch": 2.58, "learning_rate": 0.0005, "loss": 2.2294, "step": 32900 }, { "epoch": 2.59, "learning_rate": 0.0005, "loss": 2.2819, "step": 33000 }, { "epoch": 2.6, "learning_rate": 0.0005, "loss": 2.2461, "step": 33100 }, { "epoch": 2.6, "learning_rate": 0.0005, "loss": 2.2399, "step": 33200 }, { "epoch": 2.61, "learning_rate": 0.0005, "loss": 2.2518, "step": 33300 }, { "epoch": 2.62, "learning_rate": 0.0005, "loss": 2.2797, "step": 33400 }, { "epoch": 2.63, "learning_rate": 0.0005, "loss": 2.2575, "step": 33500 }, { "epoch": 2.63, "learning_rate": 0.0005, "loss": 2.2288, "step": 33600 }, { "epoch": 2.64, "learning_rate": 0.0005, "loss": 2.2504, "step": 33700 }, { "epoch": 2.65, "learning_rate": 0.0005, "loss": 2.229, "step": 33800 }, { "epoch": 2.66, "learning_rate": 0.0005, "loss": 2.2625, "step": 33900 }, { "epoch": 2.67, "learning_rate": 0.0005, "loss": 2.2648, "step": 34000 }, { "epoch": 2.67, "learning_rate": 0.0005, "loss": 2.2677, "step": 34100 }, { "epoch": 2.68, "learning_rate": 0.0005, "loss": 2.2825, "step": 34200 }, { "epoch": 2.69, "learning_rate": 0.0005, "loss": 2.3043, "step": 34300 }, { "epoch": 2.7, "learning_rate": 0.0005, "loss": 2.2503, "step": 34400 }, { "epoch": 2.71, "learning_rate": 0.0005, "loss": 2.2582, "step": 34500 }, { "epoch": 2.71, "learning_rate": 0.0005, "loss": 2.2516, "step": 34600 }, { "epoch": 2.72, "learning_rate": 0.0005, "loss": 2.2583, "step": 34700 }, { "epoch": 2.73, "learning_rate": 0.0005, "loss": 2.2899, "step": 34800 }, { "epoch": 2.74, "learning_rate": 0.0005, "loss": 2.2794, "step": 34900 }, { "epoch": 2.74, "learning_rate": 0.0005, "loss": 2.2469, "step": 35000 }, { "epoch": 2.75, "learning_rate": 0.0005, "loss": 2.2274, "step": 35100 }, { "epoch": 2.76, "learning_rate": 0.0005, "loss": 2.2566, "step": 35200 }, { "epoch": 2.77, "learning_rate": 0.0005, "loss": 2.2657, "step": 35300 }, { "epoch": 2.78, "learning_rate": 0.0005, "loss": 2.2718, "step": 35400 }, { "epoch": 2.78, "learning_rate": 0.0005, "loss": 2.2226, "step": 35500 }, { "epoch": 2.79, "learning_rate": 0.0005, "loss": 2.2495, "step": 35600 }, { "epoch": 2.8, "learning_rate": 0.0005, "loss": 2.2368, "step": 35700 }, { "epoch": 2.81, "learning_rate": 0.0005, "loss": 2.2553, "step": 35800 }, { "epoch": 2.82, "learning_rate": 0.0005, "loss": 2.2305, "step": 35900 }, { "epoch": 2.82, "learning_rate": 0.0005, "loss": 2.2716, "step": 36000 }, { "epoch": 2.83, "learning_rate": 0.0005, "loss": 2.2758, "step": 36100 }, { "epoch": 2.84, "learning_rate": 0.0005, "loss": 2.2743, "step": 36200 }, { "epoch": 2.85, "learning_rate": 0.0005, "loss": 2.2621, "step": 36300 }, { "epoch": 2.85, "learning_rate": 0.0005, "loss": 2.2489, "step": 36400 }, { "epoch": 2.86, "learning_rate": 0.0005, "loss": 2.2363, "step": 36500 }, { "epoch": 2.87, "learning_rate": 0.0005, "loss": 2.257, "step": 36600 }, { "epoch": 2.88, "learning_rate": 0.0005, "loss": 2.2738, "step": 36700 }, { "epoch": 2.89, "learning_rate": 0.0005, "loss": 2.2153, "step": 36800 }, { "epoch": 2.89, "learning_rate": 0.0005, "loss": 2.2587, "step": 36900 }, { "epoch": 2.9, "learning_rate": 0.0005, "loss": 2.2536, "step": 37000 }, { "epoch": 2.91, "learning_rate": 0.0005, "loss": 2.2509, "step": 37100 }, { "epoch": 2.92, "learning_rate": 0.0005, "loss": 2.2362, "step": 37200 }, { "epoch": 2.93, "learning_rate": 0.0005, "loss": 2.2259, "step": 37300 }, { "epoch": 2.93, "learning_rate": 0.0005, "loss": 2.2235, "step": 37400 }, { "epoch": 2.94, "learning_rate": 0.0005, "loss": 2.2439, "step": 37500 }, { "epoch": 2.95, "learning_rate": 0.0005, "loss": 2.2646, "step": 37600 }, { "epoch": 2.96, "learning_rate": 0.0005, "loss": 2.2301, "step": 37700 }, { "epoch": 2.96, "learning_rate": 0.0005, "loss": 2.2284, "step": 37800 }, { "epoch": 2.97, "learning_rate": 0.0005, "loss": 2.2703, "step": 37900 }, { "epoch": 2.98, "learning_rate": 0.0005, "loss": 2.2647, "step": 38000 }, { "epoch": 2.99, "learning_rate": 0.0005, "loss": 2.2456, "step": 38100 }, { "epoch": 3.0, "learning_rate": 0.0005, "loss": 2.2619, "step": 38200 }, { "epoch": 3.0, "learning_rate": 0.0005, "loss": 2.21, "step": 38300 }, { "epoch": 3.01, "learning_rate": 0.0005, "loss": 2.1604, "step": 38400 }, { "epoch": 3.02, "learning_rate": 0.0005, "loss": 2.165, "step": 38500 }, { "epoch": 3.03, "learning_rate": 0.0005, "loss": 2.1502, "step": 38600 }, { "epoch": 3.03, "learning_rate": 0.0005, "loss": 2.1564, "step": 38700 }, { "epoch": 3.04, "learning_rate": 0.0005, "loss": 2.147, "step": 38800 }, { "epoch": 3.05, "learning_rate": 0.0005, "loss": 2.171, "step": 38900 }, { "epoch": 3.06, "learning_rate": 0.0005, "loss": 2.1522, "step": 39000 }, { "epoch": 3.07, "learning_rate": 0.0005, "loss": 2.19, "step": 39100 }, { "epoch": 3.07, "learning_rate": 0.0005, "loss": 2.1632, "step": 39200 }, { "epoch": 3.08, "learning_rate": 0.0005, "loss": 2.1739, "step": 39300 }, { "epoch": 3.09, "learning_rate": 0.0005, "loss": 2.1466, "step": 39400 }, { "epoch": 3.1, "learning_rate": 0.0005, "loss": 2.1726, "step": 39500 }, { "epoch": 3.11, "learning_rate": 0.0005, "loss": 2.1659, "step": 39600 }, { "epoch": 3.11, "learning_rate": 0.0005, "loss": 2.1573, "step": 39700 }, { "epoch": 3.12, "learning_rate": 0.0005, "loss": 2.1853, "step": 39800 }, { "epoch": 3.13, "learning_rate": 0.0005, "loss": 2.1446, "step": 39900 }, { "epoch": 3.14, "learning_rate": 0.0005, "loss": 2.1901, "step": 40000 }, { "epoch": 3.14, "eval_gen_len": 18.773461640328417, "eval_loss": 2.1257381439208984, "eval_rouge1": 32.8553, "eval_rouge2": 11.7404, "eval_rougeL": 26.6114, "eval_rougeLsum": 26.6102, "eval_runtime": 359.175, "eval_samples_per_second": 31.536, "eval_steps_per_second": 1.971, "step": 40000 }, { "epoch": 3.14, "learning_rate": 0.0005, "loss": 2.1934, "step": 40100 }, { "epoch": 3.15, "learning_rate": 0.0005, "loss": 2.1582, "step": 40200 }, { "epoch": 3.16, "learning_rate": 0.0005, "loss": 2.1633, "step": 40300 }, { "epoch": 3.17, "learning_rate": 0.0005, "loss": 2.1623, "step": 40400 }, { "epoch": 3.18, "learning_rate": 0.0005, "loss": 2.1895, "step": 40500 }, { "epoch": 3.18, "learning_rate": 0.0005, "loss": 2.1656, "step": 40600 }, { "epoch": 3.19, "learning_rate": 0.0005, "loss": 2.1944, "step": 40700 }, { "epoch": 3.2, "learning_rate": 0.0005, "loss": 2.1575, "step": 40800 }, { "epoch": 3.21, "learning_rate": 0.0005, "loss": 2.1717, "step": 40900 }, { "epoch": 3.22, "learning_rate": 0.0005, "loss": 2.1541, "step": 41000 }, { "epoch": 3.22, "learning_rate": 0.0005, "loss": 2.1976, "step": 41100 }, { "epoch": 3.23, "learning_rate": 0.0005, "loss": 2.1578, "step": 41200 }, { "epoch": 3.24, "learning_rate": 0.0005, "loss": 2.1661, "step": 41300 }, { "epoch": 3.25, "learning_rate": 0.0005, "loss": 2.2012, "step": 41400 }, { "epoch": 3.25, "learning_rate": 0.0005, "loss": 2.1878, "step": 41500 }, { "epoch": 3.26, "learning_rate": 0.0005, "loss": 2.144, "step": 41600 }, { "epoch": 3.27, "learning_rate": 0.0005, "loss": 2.1595, "step": 41700 }, { "epoch": 3.28, "learning_rate": 0.0005, "loss": 2.1741, "step": 41800 }, { "epoch": 3.29, "learning_rate": 0.0005, "loss": 2.1908, "step": 41900 }, { "epoch": 3.29, "learning_rate": 0.0005, "loss": 2.1943, "step": 42000 }, { "epoch": 3.3, "learning_rate": 0.0005, "loss": 2.1714, "step": 42100 }, { "epoch": 3.31, "learning_rate": 0.0005, "loss": 2.1638, "step": 42200 }, { "epoch": 3.32, "learning_rate": 0.0005, "loss": 2.1751, "step": 42300 }, { "epoch": 3.32, "learning_rate": 0.0005, "loss": 2.1649, "step": 42400 }, { "epoch": 3.33, "learning_rate": 0.0005, "loss": 2.2036, "step": 42500 }, { "epoch": 3.34, "learning_rate": 0.0005, "loss": 2.1772, "step": 42600 }, { "epoch": 3.35, "learning_rate": 0.0005, "loss": 2.16, "step": 42700 }, { "epoch": 3.36, "learning_rate": 0.0005, "loss": 2.1918, "step": 42800 }, { "epoch": 3.36, "learning_rate": 0.0005, "loss": 2.1737, "step": 42900 }, { "epoch": 3.37, "learning_rate": 0.0005, "loss": 2.1684, "step": 43000 }, { "epoch": 3.38, "learning_rate": 0.0005, "loss": 2.1722, "step": 43100 }, { "epoch": 3.39, "learning_rate": 0.0005, "loss": 2.1881, "step": 43200 }, { "epoch": 3.4, "learning_rate": 0.0005, "loss": 2.1944, "step": 43300 }, { "epoch": 3.4, "learning_rate": 0.0005, "loss": 2.192, "step": 43400 }, { "epoch": 3.41, "learning_rate": 0.0005, "loss": 2.1617, "step": 43500 }, { "epoch": 3.42, "learning_rate": 0.0005, "loss": 2.2029, "step": 43600 }, { "epoch": 3.43, "learning_rate": 0.0005, "loss": 2.1596, "step": 43700 }, { "epoch": 3.43, "learning_rate": 0.0005, "loss": 2.1793, "step": 43800 }, { "epoch": 3.44, "learning_rate": 0.0005, "loss": 2.1792, "step": 43900 }, { "epoch": 3.45, "learning_rate": 0.0005, "loss": 2.1892, "step": 44000 }, { "epoch": 3.46, "learning_rate": 0.0005, "loss": 2.1759, "step": 44100 }, { "epoch": 3.47, "learning_rate": 0.0005, "loss": 2.1724, "step": 44200 }, { "epoch": 3.47, "learning_rate": 0.0005, "loss": 2.1689, "step": 44300 }, { "epoch": 3.48, "learning_rate": 0.0005, "loss": 2.1707, "step": 44400 }, { "epoch": 3.49, "learning_rate": 0.0005, "loss": 2.1845, "step": 44500 }, { "epoch": 3.5, "learning_rate": 0.0005, "loss": 2.1765, "step": 44600 }, { "epoch": 3.51, "learning_rate": 0.0005, "loss": 2.1788, "step": 44700 }, { "epoch": 3.51, "learning_rate": 0.0005, "loss": 2.1824, "step": 44800 }, { "epoch": 3.52, "learning_rate": 0.0005, "loss": 2.1826, "step": 44900 }, { "epoch": 3.53, "learning_rate": 0.0005, "loss": 2.1299, "step": 45000 }, { "epoch": 3.54, "learning_rate": 0.0005, "loss": 2.1819, "step": 45100 }, { "epoch": 3.54, "learning_rate": 0.0005, "loss": 2.1741, "step": 45200 }, { "epoch": 3.55, "learning_rate": 0.0005, "loss": 2.1896, "step": 45300 }, { "epoch": 3.56, "learning_rate": 0.0005, "loss": 2.1772, "step": 45400 }, { "epoch": 3.57, "learning_rate": 0.0005, "loss": 2.2326, "step": 45500 }, { "epoch": 3.58, "learning_rate": 0.0005, "loss": 2.1733, "step": 45600 }, { "epoch": 3.58, "learning_rate": 0.0005, "loss": 2.2147, "step": 45700 }, { "epoch": 3.59, "learning_rate": 0.0005, "loss": 2.1753, "step": 45800 }, { "epoch": 3.6, "learning_rate": 0.0005, "loss": 2.187, "step": 45900 }, { "epoch": 3.61, "learning_rate": 0.0005, "loss": 2.1863, "step": 46000 }, { "epoch": 3.62, "learning_rate": 0.0005, "loss": 2.1605, "step": 46100 }, { "epoch": 3.62, "learning_rate": 0.0005, "loss": 2.1484, "step": 46200 }, { "epoch": 3.63, "learning_rate": 0.0005, "loss": 2.2143, "step": 46300 }, { "epoch": 3.64, "learning_rate": 0.0005, "loss": 2.1812, "step": 46400 }, { "epoch": 3.65, "learning_rate": 0.0005, "loss": 2.1725, "step": 46500 }, { "epoch": 3.65, "learning_rate": 0.0005, "loss": 2.2017, "step": 46600 }, { "epoch": 3.66, "learning_rate": 0.0005, "loss": 2.225, "step": 46700 }, { "epoch": 3.67, "learning_rate": 0.0005, "loss": 2.1981, "step": 46800 }, { "epoch": 3.68, "learning_rate": 0.0005, "loss": 2.1845, "step": 46900 }, { "epoch": 3.69, "learning_rate": 0.0005, "loss": 2.1595, "step": 47000 }, { "epoch": 3.69, "learning_rate": 0.0005, "loss": 2.1557, "step": 47100 }, { "epoch": 3.7, "learning_rate": 0.0005, "loss": 2.1729, "step": 47200 }, { "epoch": 3.71, "learning_rate": 0.0005, "loss": 2.1649, "step": 47300 }, { "epoch": 3.72, "learning_rate": 0.0005, "loss": 2.1793, "step": 47400 }, { "epoch": 3.72, "learning_rate": 0.0005, "loss": 2.1388, "step": 47500 }, { "epoch": 3.73, "learning_rate": 0.0005, "loss": 2.1773, "step": 47600 }, { "epoch": 3.74, "learning_rate": 0.0005, "loss": 2.1915, "step": 47700 }, { "epoch": 3.75, "learning_rate": 0.0005, "loss": 2.1809, "step": 47800 }, { "epoch": 3.76, "learning_rate": 0.0005, "loss": 2.1909, "step": 47900 }, { "epoch": 3.76, "learning_rate": 0.0005, "loss": 2.153, "step": 48000 }, { "epoch": 3.77, "learning_rate": 0.0005, "loss": 2.1878, "step": 48100 }, { "epoch": 3.78, "learning_rate": 0.0005, "loss": 2.1892, "step": 48200 }, { "epoch": 3.79, "learning_rate": 0.0005, "loss": 2.1804, "step": 48300 }, { "epoch": 3.8, "learning_rate": 0.0005, "loss": 2.1694, "step": 48400 }, { "epoch": 3.8, "learning_rate": 0.0005, "loss": 2.208, "step": 48500 }, { "epoch": 3.81, "learning_rate": 0.0005, "loss": 2.153, "step": 48600 }, { "epoch": 3.82, "learning_rate": 0.0005, "loss": 2.215, "step": 48700 }, { "epoch": 3.83, "learning_rate": 0.0005, "loss": 2.1499, "step": 48800 }, { "epoch": 3.83, "learning_rate": 0.0005, "loss": 2.1766, "step": 48900 }, { "epoch": 3.84, "learning_rate": 0.0005, "loss": 2.1973, "step": 49000 }, { "epoch": 3.85, "learning_rate": 0.0005, "loss": 2.2039, "step": 49100 }, { "epoch": 3.86, "learning_rate": 0.0005, "loss": 2.1866, "step": 49200 }, { "epoch": 3.87, "learning_rate": 0.0005, "loss": 2.1763, "step": 49300 }, { "epoch": 3.87, "learning_rate": 0.0005, "loss": 2.1737, "step": 49400 }, { "epoch": 3.88, "learning_rate": 0.0005, "loss": 2.2036, "step": 49500 }, { "epoch": 3.89, "learning_rate": 0.0005, "loss": 2.21, "step": 49600 }, { "epoch": 3.9, "learning_rate": 0.0005, "loss": 2.137, "step": 49700 }, { "epoch": 3.91, "learning_rate": 0.0005, "loss": 2.1908, "step": 49800 }, { "epoch": 3.91, "learning_rate": 0.0005, "loss": 2.1764, "step": 49900 }, { "epoch": 3.92, "learning_rate": 0.0005, "loss": 2.2058, "step": 50000 }, { "epoch": 3.92, "eval_gen_len": 18.717577469762514, "eval_loss": 2.0990633964538574, "eval_rouge1": 33.2448, "eval_rouge2": 12.1995, "eval_rougeL": 27.0821, "eval_rougeLsum": 27.086, "eval_runtime": 360.4822, "eval_samples_per_second": 31.422, "eval_steps_per_second": 1.964, "step": 50000 }, { "epoch": 3.93, "learning_rate": 0.0005, "loss": 2.1585, "step": 50100 }, { "epoch": 3.94, "learning_rate": 0.0005, "loss": 2.1888, "step": 50200 }, { "epoch": 3.94, "learning_rate": 0.0005, "loss": 2.2, "step": 50300 }, { "epoch": 3.95, "learning_rate": 0.0005, "loss": 2.1772, "step": 50400 }, { "epoch": 3.96, "learning_rate": 0.0005, "loss": 2.2016, "step": 50500 }, { "epoch": 3.97, "learning_rate": 0.0005, "loss": 2.1783, "step": 50600 }, { "epoch": 3.98, "learning_rate": 0.0005, "loss": 2.1639, "step": 50700 }, { "epoch": 3.98, "learning_rate": 0.0005, "loss": 2.1585, "step": 50800 }, { "epoch": 3.99, "learning_rate": 0.0005, "loss": 2.1983, "step": 50900 }, { "epoch": 4.0, "learning_rate": 0.0005, "loss": 2.1695, "step": 51000 }, { "epoch": 4.01, "learning_rate": 0.0005, "loss": 2.0872, "step": 51100 }, { "epoch": 4.02, "learning_rate": 0.0005, "loss": 2.1085, "step": 51200 }, { "epoch": 4.02, "learning_rate": 0.0005, "loss": 2.1441, "step": 51300 }, { "epoch": 4.03, "learning_rate": 0.0005, "loss": 2.0954, "step": 51400 }, { "epoch": 4.04, "learning_rate": 0.0005, "loss": 2.1041, "step": 51500 }, { "epoch": 4.05, "learning_rate": 0.0005, "loss": 2.0745, "step": 51600 }, { "epoch": 4.05, "learning_rate": 0.0005, "loss": 2.097, "step": 51700 }, { "epoch": 4.06, "learning_rate": 0.0005, "loss": 2.119, "step": 51800 }, { "epoch": 4.07, "learning_rate": 0.0005, "loss": 2.0559, "step": 51900 }, { "epoch": 4.08, "learning_rate": 0.0005, "loss": 2.1543, "step": 52000 }, { "epoch": 4.09, "learning_rate": 0.0005, "loss": 2.1138, "step": 52100 }, { "epoch": 4.09, "learning_rate": 0.0005, "loss": 2.0916, "step": 52200 }, { "epoch": 4.1, "learning_rate": 0.0005, "loss": 2.1135, "step": 52300 }, { "epoch": 4.11, "learning_rate": 0.0005, "loss": 2.0917, "step": 52400 }, { "epoch": 4.12, "learning_rate": 0.0005, "loss": 2.1246, "step": 52500 }, { "epoch": 4.12, "learning_rate": 0.0005, "loss": 2.1137, "step": 52600 }, { "epoch": 4.13, "learning_rate": 0.0005, "loss": 2.1029, "step": 52700 }, { "epoch": 4.14, "learning_rate": 0.0005, "loss": 2.103, "step": 52800 }, { "epoch": 4.15, "learning_rate": 0.0005, "loss": 2.1063, "step": 52900 }, { "epoch": 4.16, "learning_rate": 0.0005, "loss": 2.1103, "step": 53000 }, { "epoch": 4.16, "learning_rate": 0.0005, "loss": 2.1083, "step": 53100 }, { "epoch": 4.17, "learning_rate": 0.0005, "loss": 2.1142, "step": 53200 }, { "epoch": 4.18, "learning_rate": 0.0005, "loss": 2.1066, "step": 53300 }, { "epoch": 4.19, "learning_rate": 0.0005, "loss": 2.1003, "step": 53400 }, { "epoch": 4.2, "learning_rate": 0.0005, "loss": 2.0934, "step": 53500 }, { "epoch": 4.2, "learning_rate": 0.0005, "loss": 2.0904, "step": 53600 }, { "epoch": 4.21, "learning_rate": 0.0005, "loss": 2.141, "step": 53700 }, { "epoch": 4.22, "learning_rate": 0.0005, "loss": 2.0869, "step": 53800 }, { "epoch": 4.23, "learning_rate": 0.0005, "loss": 2.1202, "step": 53900 }, { "epoch": 4.23, "learning_rate": 0.0005, "loss": 2.1131, "step": 54000 }, { "epoch": 4.24, "learning_rate": 0.0005, "loss": 2.1181, "step": 54100 }, { "epoch": 4.25, "learning_rate": 0.0005, "loss": 2.1484, "step": 54200 }, { "epoch": 4.26, "learning_rate": 0.0005, "loss": 2.1127, "step": 54300 }, { "epoch": 4.27, "learning_rate": 0.0005, "loss": 2.1079, "step": 54400 }, { "epoch": 4.27, "learning_rate": 0.0005, "loss": 2.0957, "step": 54500 }, { "epoch": 4.28, "learning_rate": 0.0005, "loss": 2.1231, "step": 54600 }, { "epoch": 4.29, "learning_rate": 0.0005, "loss": 2.1139, "step": 54700 }, { "epoch": 4.3, "learning_rate": 0.0005, "loss": 2.096, "step": 54800 }, { "epoch": 4.31, "learning_rate": 0.0005, "loss": 2.1174, "step": 54900 }, { "epoch": 4.31, "learning_rate": 0.0005, "loss": 2.1045, "step": 55000 }, { "epoch": 4.32, "learning_rate": 0.0005, "loss": 2.0988, "step": 55100 }, { "epoch": 4.33, "learning_rate": 0.0005, "loss": 2.0954, "step": 55200 }, { "epoch": 4.34, "learning_rate": 0.0005, "loss": 2.0964, "step": 55300 }, { "epoch": 4.34, "learning_rate": 0.0005, "loss": 2.1128, "step": 55400 }, { "epoch": 4.35, "learning_rate": 0.0005, "loss": 2.0874, "step": 55500 }, { "epoch": 4.36, "learning_rate": 0.0005, "loss": 2.1303, "step": 55600 }, { "epoch": 4.37, "learning_rate": 0.0005, "loss": 2.1261, "step": 55700 }, { "epoch": 4.38, "learning_rate": 0.0005, "loss": 2.0916, "step": 55800 }, { "epoch": 4.38, "learning_rate": 0.0005, "loss": 2.0894, "step": 55900 }, { "epoch": 4.39, "learning_rate": 0.0005, "loss": 2.1365, "step": 56000 }, { "epoch": 4.4, "learning_rate": 0.0005, "loss": 2.1081, "step": 56100 }, { "epoch": 4.41, "learning_rate": 0.0005, "loss": 2.143, "step": 56200 }, { "epoch": 4.41, "learning_rate": 0.0005, "loss": 2.1147, "step": 56300 }, { "epoch": 4.42, "learning_rate": 0.0005, "loss": 2.1072, "step": 56400 }, { "epoch": 4.43, "learning_rate": 0.0005, "loss": 2.1, "step": 56500 }, { "epoch": 4.44, "learning_rate": 0.0005, "loss": 2.1373, "step": 56600 }, { "epoch": 4.45, "learning_rate": 0.0005, "loss": 2.1385, "step": 56700 }, { "epoch": 4.45, "learning_rate": 0.0005, "loss": 2.1063, "step": 56800 }, { "epoch": 4.46, "learning_rate": 0.0005, "loss": 2.1563, "step": 56900 }, { "epoch": 4.47, "learning_rate": 0.0005, "loss": 2.1238, "step": 57000 }, { "epoch": 4.48, "learning_rate": 0.0005, "loss": 2.1087, "step": 57100 }, { "epoch": 4.49, "learning_rate": 0.0005, "loss": 2.1064, "step": 57200 }, { "epoch": 4.49, "learning_rate": 0.0005, "loss": 2.0979, "step": 57300 }, { "epoch": 4.5, "learning_rate": 0.0005, "loss": 2.0988, "step": 57400 }, { "epoch": 4.51, "learning_rate": 0.0005, "loss": 2.115, "step": 57500 }, { "epoch": 4.52, "learning_rate": 0.0005, "loss": 2.1191, "step": 57600 }, { "epoch": 4.52, "learning_rate": 0.0005, "loss": 2.098, "step": 57700 }, { "epoch": 4.53, "learning_rate": 0.0005, "loss": 2.1081, "step": 57800 }, { "epoch": 4.54, "learning_rate": 0.0005, "loss": 2.134, "step": 57900 }, { "epoch": 4.55, "learning_rate": 0.0005, "loss": 2.1103, "step": 58000 }, { "epoch": 4.56, "learning_rate": 0.0005, "loss": 2.1114, "step": 58100 }, { "epoch": 4.56, "learning_rate": 0.0005, "loss": 2.1201, "step": 58200 }, { "epoch": 4.57, "learning_rate": 0.0005, "loss": 2.1435, "step": 58300 }, { "epoch": 4.58, "learning_rate": 0.0005, "loss": 2.1254, "step": 58400 }, { "epoch": 4.59, "learning_rate": 0.0005, "loss": 2.1204, "step": 58500 }, { "epoch": 4.6, "learning_rate": 0.0005, "loss": 2.1461, "step": 58600 }, { "epoch": 4.6, "learning_rate": 0.0005, "loss": 2.1181, "step": 58700 }, { "epoch": 4.61, "learning_rate": 0.0005, "loss": 2.1405, "step": 58800 }, { "epoch": 4.62, "learning_rate": 0.0005, "loss": 2.1213, "step": 58900 }, { "epoch": 4.63, "learning_rate": 0.0005, "loss": 2.1087, "step": 59000 }, { "epoch": 4.63, "learning_rate": 0.0005, "loss": 2.1094, "step": 59100 }, { "epoch": 4.64, "learning_rate": 0.0005, "loss": 2.12, "step": 59200 }, { "epoch": 4.65, "learning_rate": 0.0005, "loss": 2.0994, "step": 59300 }, { "epoch": 4.66, "learning_rate": 0.0005, "loss": 2.1193, "step": 59400 }, { "epoch": 4.67, "learning_rate": 0.0005, "loss": 2.1288, "step": 59500 }, { "epoch": 4.67, "learning_rate": 0.0005, "loss": 2.1091, "step": 59600 }, { "epoch": 4.68, "learning_rate": 0.0005, "loss": 2.1191, "step": 59700 }, { "epoch": 4.69, "learning_rate": 0.0005, "loss": 2.1305, "step": 59800 }, { "epoch": 4.7, "learning_rate": 0.0005, "loss": 2.1271, "step": 59900 }, { "epoch": 4.71, "learning_rate": 0.0005, "loss": 2.1531, "step": 60000 }, { "epoch": 4.71, "eval_gen_len": 18.730025602542597, "eval_loss": 2.0838534832000732, "eval_rouge1": 33.6123, "eval_rouge2": 12.46, "eval_rougeL": 27.3966, "eval_rougeLsum": 27.3902, "eval_runtime": 366.4271, "eval_samples_per_second": 30.912, "eval_steps_per_second": 1.932, "step": 60000 }, { "epoch": 4.71, "learning_rate": 0.0005, "loss": 2.1047, "step": 60100 }, { "epoch": 4.72, "learning_rate": 0.0005, "loss": 2.1382, "step": 60200 }, { "epoch": 4.73, "learning_rate": 0.0005, "loss": 2.1807, "step": 60300 }, { "epoch": 4.74, "learning_rate": 0.0005, "loss": 2.1061, "step": 60400 }, { "epoch": 4.74, "learning_rate": 0.0005, "loss": 2.1272, "step": 60500 }, { "epoch": 4.75, "learning_rate": 0.0005, "loss": 2.1286, "step": 60600 }, { "epoch": 4.76, "learning_rate": 0.0005, "loss": 2.1149, "step": 60700 }, { "epoch": 4.77, "learning_rate": 0.0005, "loss": 2.1097, "step": 60800 }, { "epoch": 4.78, "learning_rate": 0.0005, "loss": 2.1103, "step": 60900 }, { "epoch": 4.78, "learning_rate": 0.0005, "loss": 2.121, "step": 61000 }, { "epoch": 4.79, "learning_rate": 0.0005, "loss": 2.1304, "step": 61100 }, { "epoch": 4.8, "learning_rate": 0.0005, "loss": 2.1219, "step": 61200 }, { "epoch": 4.81, "learning_rate": 0.0005, "loss": 2.1391, "step": 61300 }, { "epoch": 4.81, "learning_rate": 0.0005, "loss": 2.1386, "step": 61400 }, { "epoch": 4.82, "learning_rate": 0.0005, "loss": 2.1213, "step": 61500 }, { "epoch": 4.83, "learning_rate": 0.0005, "loss": 2.0743, "step": 61600 }, { "epoch": 4.84, "learning_rate": 0.0005, "loss": 2.101, "step": 61700 }, { "epoch": 4.85, "learning_rate": 0.0005, "loss": 2.1325, "step": 61800 }, { "epoch": 4.85, "learning_rate": 0.0005, "loss": 2.128, "step": 61900 }, { "epoch": 4.86, "learning_rate": 0.0005, "loss": 2.1176, "step": 62000 }, { "epoch": 4.87, "learning_rate": 0.0005, "loss": 2.1379, "step": 62100 }, { "epoch": 4.88, "learning_rate": 0.0005, "loss": 2.1102, "step": 62200 }, { "epoch": 4.89, "learning_rate": 0.0005, "loss": 2.149, "step": 62300 }, { "epoch": 4.89, "learning_rate": 0.0005, "loss": 2.1386, "step": 62400 }, { "epoch": 4.9, "learning_rate": 0.0005, "loss": 2.1165, "step": 62500 }, { "epoch": 4.91, "learning_rate": 0.0005, "loss": 2.1297, "step": 62600 }, { "epoch": 4.92, "learning_rate": 0.0005, "loss": 2.1164, "step": 62700 }, { "epoch": 4.92, "learning_rate": 0.0005, "loss": 2.1092, "step": 62800 }, { "epoch": 4.93, "learning_rate": 0.0005, "loss": 2.1188, "step": 62900 }, { "epoch": 4.94, "learning_rate": 0.0005, "loss": 2.128, "step": 63000 }, { "epoch": 4.95, "learning_rate": 0.0005, "loss": 2.1438, "step": 63100 }, { "epoch": 4.96, "learning_rate": 0.0005, "loss": 2.119, "step": 63200 }, { "epoch": 4.96, "learning_rate": 0.0005, "loss": 2.1456, "step": 63300 }, { "epoch": 4.97, "learning_rate": 0.0005, "loss": 2.1314, "step": 63400 }, { "epoch": 4.98, "learning_rate": 0.0005, "loss": 2.1293, "step": 63500 }, { "epoch": 4.99, "learning_rate": 0.0005, "loss": 2.1111, "step": 63600 }, { "epoch": 5.0, "learning_rate": 0.0005, "loss": 2.1195, "step": 63700 }, { "epoch": 5.0, "learning_rate": 0.0005, "loss": 2.1054, "step": 63800 }, { "epoch": 5.01, "learning_rate": 0.0005, "loss": 2.0384, "step": 63900 }, { "epoch": 5.02, "learning_rate": 0.0005, "loss": 2.034, "step": 64000 }, { "epoch": 5.03, "learning_rate": 0.0005, "loss": 2.0256, "step": 64100 }, { "epoch": 5.03, "learning_rate": 0.0005, "loss": 2.0358, "step": 64200 }, { "epoch": 5.04, "learning_rate": 0.0005, "loss": 2.0021, "step": 64300 }, { "epoch": 5.05, "learning_rate": 0.0005, "loss": 2.0582, "step": 64400 }, { "epoch": 5.06, "learning_rate": 0.0005, "loss": 2.0526, "step": 64500 }, { "epoch": 5.07, "learning_rate": 0.0005, "loss": 2.0456, "step": 64600 }, { "epoch": 5.07, "learning_rate": 0.0005, "loss": 2.0602, "step": 64700 }, { "epoch": 5.08, "learning_rate": 0.0005, "loss": 2.0504, "step": 64800 }, { "epoch": 5.09, "learning_rate": 0.0005, "loss": 2.0517, "step": 64900 }, { "epoch": 5.1, "learning_rate": 0.0005, "loss": 2.0499, "step": 65000 }, { "epoch": 5.11, "learning_rate": 0.0005, "loss": 2.0296, "step": 65100 }, { "epoch": 5.11, "learning_rate": 0.0005, "loss": 2.054, "step": 65200 }, { "epoch": 5.12, "learning_rate": 0.0005, "loss": 2.0454, "step": 65300 }, { "epoch": 5.13, "learning_rate": 0.0005, "loss": 2.0635, "step": 65400 }, { "epoch": 5.14, "learning_rate": 0.0005, "loss": 2.0474, "step": 65500 }, { "epoch": 5.14, "learning_rate": 0.0005, "loss": 2.0691, "step": 65600 }, { "epoch": 5.15, "learning_rate": 0.0005, "loss": 2.0333, "step": 65700 }, { "epoch": 5.16, "learning_rate": 0.0005, "loss": 2.0033, "step": 65800 }, { "epoch": 5.17, "learning_rate": 0.0005, "loss": 2.0756, "step": 65900 }, { "epoch": 5.18, "learning_rate": 0.0005, "loss": 2.0386, "step": 66000 }, { "epoch": 5.18, "learning_rate": 0.0005, "loss": 2.0326, "step": 66100 }, { "epoch": 5.19, "learning_rate": 0.0005, "loss": 2.0672, "step": 66200 }, { "epoch": 5.2, "learning_rate": 0.0005, "loss": 2.0822, "step": 66300 }, { "epoch": 5.21, "learning_rate": 0.0005, "loss": 2.0693, "step": 66400 }, { "epoch": 5.21, "learning_rate": 0.0005, "loss": 2.0563, "step": 66500 }, { "epoch": 5.22, "learning_rate": 0.0005, "loss": 2.0756, "step": 66600 }, { "epoch": 5.23, "learning_rate": 0.0005, "loss": 2.0132, "step": 66700 }, { "epoch": 5.24, "learning_rate": 0.0005, "loss": 2.0786, "step": 66800 }, { "epoch": 5.25, "learning_rate": 0.0005, "loss": 2.067, "step": 66900 }, { "epoch": 5.25, "learning_rate": 0.0005, "loss": 2.059, "step": 67000 }, { "epoch": 5.26, "learning_rate": 0.0005, "loss": 2.0199, "step": 67100 }, { "epoch": 5.27, "learning_rate": 0.0005, "loss": 2.0458, "step": 67200 }, { "epoch": 5.28, "learning_rate": 0.0005, "loss": 2.0783, "step": 67300 }, { "epoch": 5.29, "learning_rate": 0.0005, "loss": 2.076, "step": 67400 }, { "epoch": 5.29, "learning_rate": 0.0005, "loss": 2.0714, "step": 67500 }, { "epoch": 5.3, "learning_rate": 0.0005, "loss": 2.0723, "step": 67600 }, { "epoch": 5.31, "learning_rate": 0.0005, "loss": 2.0727, "step": 67700 }, { "epoch": 5.32, "learning_rate": 0.0005, "loss": 2.0356, "step": 67800 }, { "epoch": 5.32, "learning_rate": 0.0005, "loss": 2.0634, "step": 67900 }, { "epoch": 5.33, "learning_rate": 0.0005, "loss": 2.0605, "step": 68000 }, { "epoch": 5.34, "learning_rate": 0.0005, "loss": 2.0342, "step": 68100 }, { "epoch": 5.35, "learning_rate": 0.0005, "loss": 2.0354, "step": 68200 }, { "epoch": 5.36, "learning_rate": 0.0005, "loss": 2.0479, "step": 68300 }, { "epoch": 5.36, "learning_rate": 0.0005, "loss": 2.0752, "step": 68400 }, { "epoch": 5.37, "learning_rate": 0.0005, "loss": 2.0633, "step": 68500 }, { "epoch": 5.38, "learning_rate": 0.0005, "loss": 2.0621, "step": 68600 }, { "epoch": 5.39, "learning_rate": 0.0005, "loss": 2.0963, "step": 68700 }, { "epoch": 5.4, "learning_rate": 0.0005, "loss": 2.0484, "step": 68800 }, { "epoch": 5.4, "learning_rate": 0.0005, "loss": 2.0551, "step": 68900 }, { "epoch": 5.41, "learning_rate": 0.0005, "loss": 2.0968, "step": 69000 }, { "epoch": 5.42, "learning_rate": 0.0005, "loss": 2.0809, "step": 69100 }, { "epoch": 5.43, "learning_rate": 0.0005, "loss": 2.0783, "step": 69200 }, { "epoch": 5.43, "learning_rate": 0.0005, "loss": 2.0593, "step": 69300 }, { "epoch": 5.44, "learning_rate": 0.0005, "loss": 2.0443, "step": 69400 }, { "epoch": 5.45, "learning_rate": 0.0005, "loss": 2.0586, "step": 69500 }, { "epoch": 5.46, "learning_rate": 0.0005, "loss": 2.0708, "step": 69600 }, { "epoch": 5.47, "learning_rate": 0.0005, "loss": 2.079, "step": 69700 }, { "epoch": 5.47, "learning_rate": 0.0005, "loss": 2.0888, "step": 69800 }, { "epoch": 5.48, "learning_rate": 0.0005, "loss": 2.0972, "step": 69900 }, { "epoch": 5.49, "learning_rate": 0.0005, "loss": 2.063, "step": 70000 }, { "epoch": 5.49, "eval_gen_len": 18.784497219034165, "eval_loss": 2.068556785583496, "eval_rouge1": 33.6877, "eval_rouge2": 12.6196, "eval_rougeL": 27.5291, "eval_rougeLsum": 27.5307, "eval_runtime": 359.5783, "eval_samples_per_second": 31.501, "eval_steps_per_second": 1.969, "step": 70000 }, { "epoch": 5.5, "learning_rate": 0.0005, "loss": 2.0793, "step": 70100 }, { "epoch": 5.51, "learning_rate": 0.0005, "loss": 2.0636, "step": 70200 }, { "epoch": 5.51, "learning_rate": 0.0005, "loss": 2.0758, "step": 70300 }, { "epoch": 5.52, "learning_rate": 0.0005, "loss": 2.0628, "step": 70400 }, { "epoch": 5.53, "learning_rate": 0.0005, "loss": 2.0622, "step": 70500 }, { "epoch": 5.54, "learning_rate": 0.0005, "loss": 2.0957, "step": 70600 }, { "epoch": 5.54, "learning_rate": 0.0005, "loss": 2.096, "step": 70700 }, { "epoch": 5.55, "learning_rate": 0.0005, "loss": 2.0606, "step": 70800 }, { "epoch": 5.56, "learning_rate": 0.0005, "loss": 2.1034, "step": 70900 }, { "epoch": 5.57, "learning_rate": 0.0005, "loss": 2.0591, "step": 71000 }, { "epoch": 5.58, "learning_rate": 0.0005, "loss": 2.0884, "step": 71100 }, { "epoch": 5.58, "learning_rate": 0.0005, "loss": 2.0633, "step": 71200 }, { "epoch": 5.59, "learning_rate": 0.0005, "loss": 2.0562, "step": 71300 }, { "epoch": 5.6, "learning_rate": 0.0005, "loss": 2.0772, "step": 71400 }, { "epoch": 5.61, "learning_rate": 0.0005, "loss": 2.126, "step": 71500 }, { "epoch": 5.61, "learning_rate": 0.0005, "loss": 2.0717, "step": 71600 }, { "epoch": 5.62, "learning_rate": 0.0005, "loss": 2.081, "step": 71700 }, { "epoch": 5.63, "learning_rate": 0.0005, "loss": 2.0639, "step": 71800 }, { "epoch": 5.64, "learning_rate": 0.0005, "loss": 2.1004, "step": 71900 }, { "epoch": 5.65, "learning_rate": 0.0005, "loss": 2.0877, "step": 72000 }, { "epoch": 5.65, "learning_rate": 0.0005, "loss": 2.0836, "step": 72100 }, { "epoch": 5.66, "learning_rate": 0.0005, "loss": 2.0649, "step": 72200 }, { "epoch": 5.67, "learning_rate": 0.0005, "loss": 2.0571, "step": 72300 }, { "epoch": 5.68, "learning_rate": 0.0005, "loss": 2.0496, "step": 72400 }, { "epoch": 5.69, "learning_rate": 0.0005, "loss": 2.0761, "step": 72500 }, { "epoch": 5.69, "learning_rate": 0.0005, "loss": 2.0765, "step": 72600 }, { "epoch": 5.7, "learning_rate": 0.0005, "loss": 2.0764, "step": 72700 }, { "epoch": 5.71, "learning_rate": 0.0005, "loss": 2.0524, "step": 72800 }, { "epoch": 5.72, "learning_rate": 0.0005, "loss": 2.0802, "step": 72900 }, { "epoch": 5.72, "learning_rate": 0.0005, "loss": 2.0551, "step": 73000 }, { "epoch": 5.73, "learning_rate": 0.0005, "loss": 2.0552, "step": 73100 }, { "epoch": 5.74, "learning_rate": 0.0005, "loss": 2.0579, "step": 73200 }, { "epoch": 5.75, "learning_rate": 0.0005, "loss": 2.0506, "step": 73300 }, { "epoch": 5.76, "learning_rate": 0.0005, "loss": 2.0771, "step": 73400 }, { "epoch": 5.76, "learning_rate": 0.0005, "loss": 2.0851, "step": 73500 }, { "epoch": 5.77, "learning_rate": 0.0005, "loss": 2.0828, "step": 73600 }, { "epoch": 5.78, "learning_rate": 0.0005, "loss": 2.0987, "step": 73700 }, { "epoch": 5.79, "learning_rate": 0.0005, "loss": 2.1023, "step": 73800 }, { "epoch": 5.8, "learning_rate": 0.0005, "loss": 2.0703, "step": 73900 }, { "epoch": 5.8, "learning_rate": 0.0005, "loss": 2.0784, "step": 74000 }, { "epoch": 5.81, "learning_rate": 0.0005, "loss": 2.0518, "step": 74100 }, { "epoch": 5.82, "learning_rate": 0.0005, "loss": 2.0901, "step": 74200 }, { "epoch": 5.83, "learning_rate": 0.0005, "loss": 2.0442, "step": 74300 }, { "epoch": 5.83, "learning_rate": 0.0005, "loss": 2.0926, "step": 74400 }, { "epoch": 5.84, "learning_rate": 0.0005, "loss": 2.0789, "step": 74500 }, { "epoch": 5.85, "learning_rate": 0.0005, "loss": 2.0929, "step": 74600 }, { "epoch": 5.86, "learning_rate": 0.0005, "loss": 2.0689, "step": 74700 }, { "epoch": 5.87, "learning_rate": 0.0005, "loss": 2.1078, "step": 74800 }, { "epoch": 5.87, "learning_rate": 0.0005, "loss": 2.0821, "step": 74900 }, { "epoch": 5.88, "learning_rate": 0.0005, "loss": 2.0605, "step": 75000 }, { "epoch": 5.89, "learning_rate": 0.0005, "loss": 2.0762, "step": 75100 }, { "epoch": 5.9, "learning_rate": 0.0005, "loss": 2.0827, "step": 75200 }, { "epoch": 5.9, "learning_rate": 0.0005, "loss": 2.0828, "step": 75300 }, { "epoch": 5.91, "learning_rate": 0.0005, "loss": 2.0767, "step": 75400 }, { "epoch": 5.92, "learning_rate": 0.0005, "loss": 2.0732, "step": 75500 }, { "epoch": 5.93, "learning_rate": 0.0005, "loss": 2.0692, "step": 75600 }, { "epoch": 5.94, "learning_rate": 0.0005, "loss": 2.0544, "step": 75700 }, { "epoch": 5.94, "learning_rate": 0.0005, "loss": 2.0736, "step": 75800 }, { "epoch": 5.95, "learning_rate": 0.0005, "loss": 2.0803, "step": 75900 }, { "epoch": 5.96, "learning_rate": 0.0005, "loss": 2.0889, "step": 76000 }, { "epoch": 5.97, "learning_rate": 0.0005, "loss": 2.0841, "step": 76100 }, { "epoch": 5.98, "learning_rate": 0.0005, "loss": 2.064, "step": 76200 }, { "epoch": 5.98, "learning_rate": 0.0005, "loss": 2.0575, "step": 76300 }, { "epoch": 5.99, "learning_rate": 0.0005, "loss": 2.0763, "step": 76400 }, { "epoch": 6.0, "learning_rate": 0.0005, "loss": 2.0605, "step": 76500 }, { "epoch": 6.01, "learning_rate": 0.0005, "loss": 2.0113, "step": 76600 }, { "epoch": 6.01, "learning_rate": 0.0005, "loss": 1.9809, "step": 76700 }, { "epoch": 6.02, "learning_rate": 0.0005, "loss": 2.0006, "step": 76800 }, { "epoch": 6.03, "learning_rate": 0.0005, "loss": 1.9569, "step": 76900 }, { "epoch": 6.04, "learning_rate": 0.0005, "loss": 1.985, "step": 77000 }, { "epoch": 6.05, "learning_rate": 0.0005, "loss": 1.9736, "step": 77100 }, { "epoch": 6.05, "learning_rate": 0.0005, "loss": 1.9773, "step": 77200 }, { "epoch": 6.06, "learning_rate": 0.0005, "loss": 2.0067, "step": 77300 }, { "epoch": 6.07, "learning_rate": 0.0005, "loss": 2.0069, "step": 77400 }, { "epoch": 6.08, "learning_rate": 0.0005, "loss": 2.0018, "step": 77500 }, { "epoch": 6.09, "learning_rate": 0.0005, "loss": 1.9895, "step": 77600 }, { "epoch": 6.09, "learning_rate": 0.0005, "loss": 1.9927, "step": 77700 }, { "epoch": 6.1, "learning_rate": 0.0005, "loss": 1.9861, "step": 77800 }, { "epoch": 6.11, "learning_rate": 0.0005, "loss": 1.9965, "step": 77900 }, { "epoch": 6.12, "learning_rate": 0.0005, "loss": 2.0095, "step": 78000 }, { "epoch": 6.12, "learning_rate": 0.0005, "loss": 2.0137, "step": 78100 }, { "epoch": 6.13, "learning_rate": 0.0005, "loss": 1.9971, "step": 78200 }, { "epoch": 6.14, "learning_rate": 0.0005, "loss": 2.0021, "step": 78300 }, { "epoch": 6.15, "learning_rate": 0.0005, "loss": 2.0098, "step": 78400 }, { "epoch": 6.16, "learning_rate": 0.0005, "loss": 2.029, "step": 78500 }, { "epoch": 6.16, "learning_rate": 0.0005, "loss": 2.0294, "step": 78600 }, { "epoch": 6.17, "learning_rate": 0.0005, "loss": 2.0184, "step": 78700 }, { "epoch": 6.18, "learning_rate": 0.0005, "loss": 2.0426, "step": 78800 }, { "epoch": 6.19, "learning_rate": 0.0005, "loss": 2.0107, "step": 78900 }, { "epoch": 6.2, "learning_rate": 0.0005, "loss": 2.0161, "step": 79000 }, { "epoch": 6.2, "learning_rate": 0.0005, "loss": 2.0044, "step": 79100 }, { "epoch": 6.21, "learning_rate": 0.0005, "loss": 2.0025, "step": 79200 }, { "epoch": 6.22, "learning_rate": 0.0005, "loss": 2.0023, "step": 79300 }, { "epoch": 6.23, "learning_rate": 0.0005, "loss": 2.0455, "step": 79400 }, { "epoch": 6.23, "learning_rate": 0.0005, "loss": 2.0443, "step": 79500 }, { "epoch": 6.24, "learning_rate": 0.0005, "loss": 2.0365, "step": 79600 }, { "epoch": 6.25, "learning_rate": 0.0005, "loss": 2.0225, "step": 79700 }, { "epoch": 6.26, "learning_rate": 0.0005, "loss": 2.0021, "step": 79800 }, { "epoch": 6.27, "learning_rate": 0.0005, "loss": 2.0114, "step": 79900 }, { "epoch": 6.27, "learning_rate": 0.0005, "loss": 1.982, "step": 80000 }, { "epoch": 6.27, "eval_gen_len": 18.763926900326652, "eval_loss": 2.057493209838867, "eval_rouge1": 33.9585, "eval_rouge2": 12.853, "eval_rougeL": 27.7139, "eval_rougeLsum": 27.7071, "eval_runtime": 362.3051, "eval_samples_per_second": 31.264, "eval_steps_per_second": 1.954, "step": 80000 }, { "epoch": 6.28, "learning_rate": 0.0005, "loss": 2.0384, "step": 80100 }, { "epoch": 6.29, "learning_rate": 0.0005, "loss": 2.0101, "step": 80200 }, { "epoch": 6.3, "learning_rate": 0.0005, "loss": 1.9886, "step": 80300 }, { "epoch": 6.3, "learning_rate": 0.0005, "loss": 2.0217, "step": 80400 }, { "epoch": 6.31, "learning_rate": 0.0005, "loss": 1.9968, "step": 80500 }, { "epoch": 6.32, "learning_rate": 0.0005, "loss": 2.0091, "step": 80600 }, { "epoch": 6.33, "learning_rate": 0.0005, "loss": 2.0001, "step": 80700 }, { "epoch": 6.34, "learning_rate": 0.0005, "loss": 2.0355, "step": 80800 }, { "epoch": 6.34, "learning_rate": 0.0005, "loss": 2.0076, "step": 80900 }, { "epoch": 6.35, "learning_rate": 0.0005, "loss": 2.0297, "step": 81000 }, { "epoch": 6.36, "learning_rate": 0.0005, "loss": 1.9883, "step": 81100 }, { "epoch": 6.37, "learning_rate": 0.0005, "loss": 1.9958, "step": 81200 }, { "epoch": 6.38, "learning_rate": 0.0005, "loss": 2.0411, "step": 81300 }, { "epoch": 6.38, "learning_rate": 0.0005, "loss": 1.9781, "step": 81400 }, { "epoch": 6.39, "learning_rate": 0.0005, "loss": 2.047, "step": 81500 }, { "epoch": 6.4, "learning_rate": 0.0005, "loss": 2.0289, "step": 81600 }, { "epoch": 6.41, "learning_rate": 0.0005, "loss": 2.0189, "step": 81700 }, { "epoch": 6.41, "learning_rate": 0.0005, "loss": 2.086, "step": 81800 }, { "epoch": 6.42, "learning_rate": 0.0005, "loss": 2.0131, "step": 81900 }, { "epoch": 6.43, "learning_rate": 0.0005, "loss": 2.0036, "step": 82000 }, { "epoch": 6.44, "learning_rate": 0.0005, "loss": 2.0146, "step": 82100 }, { "epoch": 6.45, "learning_rate": 0.0005, "loss": 2.0362, "step": 82200 }, { "epoch": 6.45, "learning_rate": 0.0005, "loss": 2.0268, "step": 82300 }, { "epoch": 6.46, "learning_rate": 0.0005, "loss": 2.0321, "step": 82400 }, { "epoch": 6.47, "learning_rate": 0.0005, "loss": 2.0105, "step": 82500 }, { "epoch": 6.48, "learning_rate": 0.0005, "loss": 2.0522, "step": 82600 }, { "epoch": 6.49, "learning_rate": 0.0005, "loss": 2.0456, "step": 82700 }, { "epoch": 6.49, "learning_rate": 0.0005, "loss": 1.9935, "step": 82800 }, { "epoch": 6.5, "learning_rate": 0.0005, "loss": 2.041, "step": 82900 }, { "epoch": 6.51, "learning_rate": 0.0005, "loss": 1.9851, "step": 83000 }, { "epoch": 6.52, "learning_rate": 0.0005, "loss": 2.0574, "step": 83100 }, { "epoch": 6.52, "learning_rate": 0.0005, "loss": 2.0324, "step": 83200 }, { "epoch": 6.53, "learning_rate": 0.0005, "loss": 2.0367, "step": 83300 }, { "epoch": 6.54, "learning_rate": 0.0005, "loss": 2.0264, "step": 83400 }, { "epoch": 6.55, "learning_rate": 0.0005, "loss": 2.0314, "step": 83500 }, { "epoch": 6.56, "learning_rate": 0.0005, "loss": 1.9554, "step": 83600 }, { "epoch": 6.56, "learning_rate": 0.0005, "loss": 2.0183, "step": 83700 }, { "epoch": 6.57, "learning_rate": 0.0005, "loss": 2.0161, "step": 83800 }, { "epoch": 6.58, "learning_rate": 0.0005, "loss": 2.0337, "step": 83900 }, { "epoch": 6.59, "learning_rate": 0.0005, "loss": 2.0075, "step": 84000 }, { "epoch": 6.6, "learning_rate": 0.0005, "loss": 2.0191, "step": 84100 }, { "epoch": 6.6, "learning_rate": 0.0005, "loss": 2.0136, "step": 84200 }, { "epoch": 6.61, "learning_rate": 0.0005, "loss": 2.0535, "step": 84300 }, { "epoch": 6.62, "learning_rate": 0.0005, "loss": 2.0413, "step": 84400 }, { "epoch": 6.63, "learning_rate": 0.0005, "loss": 2.0472, "step": 84500 }, { "epoch": 6.63, "learning_rate": 0.0005, "loss": 2.0352, "step": 84600 }, { "epoch": 6.64, "learning_rate": 0.0005, "loss": 2.0219, "step": 84700 }, { "epoch": 6.65, "learning_rate": 0.0005, "loss": 2.0327, "step": 84800 }, { "epoch": 6.66, "learning_rate": 0.0005, "loss": 2.0169, "step": 84900 }, { "epoch": 6.67, "learning_rate": 0.0005, "loss": 2.0091, "step": 85000 }, { "epoch": 6.67, "learning_rate": 0.0005, "loss": 2.0307, "step": 85100 }, { "epoch": 6.68, "learning_rate": 0.0005, "loss": 2.0415, "step": 85200 }, { "epoch": 6.69, "learning_rate": 0.0005, "loss": 1.9875, "step": 85300 }, { "epoch": 6.7, "learning_rate": 0.0005, "loss": 2.0242, "step": 85400 }, { "epoch": 6.7, "learning_rate": 0.0005, "loss": 2.0333, "step": 85500 }, { "epoch": 6.71, "learning_rate": 0.0005, "loss": 2.0341, "step": 85600 }, { "epoch": 6.72, "learning_rate": 0.0005, "loss": 2.0467, "step": 85700 }, { "epoch": 6.73, "learning_rate": 0.0005, "loss": 2.0223, "step": 85800 }, { "epoch": 6.74, "learning_rate": 0.0005, "loss": 2.0483, "step": 85900 }, { "epoch": 6.74, "learning_rate": 0.0005, "loss": 2.0412, "step": 86000 }, { "epoch": 6.75, "learning_rate": 0.0005, "loss": 2.0299, "step": 86100 }, { "epoch": 6.76, "learning_rate": 0.0005, "loss": 2.0148, "step": 86200 }, { "epoch": 6.77, "learning_rate": 0.0005, "loss": 2.0199, "step": 86300 }, { "epoch": 6.78, "learning_rate": 0.0005, "loss": 2.0394, "step": 86400 }, { "epoch": 6.78, "learning_rate": 0.0005, "loss": 2.0118, "step": 86500 }, { "epoch": 6.79, "learning_rate": 0.0005, "loss": 2.0217, "step": 86600 }, { "epoch": 6.8, "learning_rate": 0.0005, "loss": 2.063, "step": 86700 }, { "epoch": 6.81, "learning_rate": 0.0005, "loss": 2.0358, "step": 86800 }, { "epoch": 6.81, "learning_rate": 0.0005, "loss": 2.0223, "step": 86900 }, { "epoch": 6.82, "learning_rate": 0.0005, "loss": 2.0308, "step": 87000 }, { "epoch": 6.83, "learning_rate": 0.0005, "loss": 2.0555, "step": 87100 }, { "epoch": 6.84, "learning_rate": 0.0005, "loss": 2.0664, "step": 87200 }, { "epoch": 6.85, "learning_rate": 0.0005, "loss": 2.0429, "step": 87300 }, { "epoch": 6.85, "learning_rate": 0.0005, "loss": 2.0329, "step": 87400 }, { "epoch": 6.86, "learning_rate": 0.0005, "loss": 2.0086, "step": 87500 }, { "epoch": 6.87, "learning_rate": 0.0005, "loss": 2.0284, "step": 87600 }, { "epoch": 6.88, "learning_rate": 0.0005, "loss": 2.0535, "step": 87700 }, { "epoch": 6.89, "learning_rate": 0.0005, "loss": 2.0376, "step": 87800 }, { "epoch": 6.89, "learning_rate": 0.0005, "loss": 2.0191, "step": 87900 }, { "epoch": 6.9, "learning_rate": 0.0005, "loss": 2.0417, "step": 88000 }, { "epoch": 6.91, "learning_rate": 0.0005, "loss": 2.0254, "step": 88100 }, { "epoch": 6.92, "learning_rate": 0.0005, "loss": 2.0469, "step": 88200 }, { "epoch": 6.92, "learning_rate": 0.0005, "loss": 2.0422, "step": 88300 }, { "epoch": 6.93, "learning_rate": 0.0005, "loss": 2.0291, "step": 88400 }, { "epoch": 6.94, "learning_rate": 0.0005, "loss": 2.0549, "step": 88500 }, { "epoch": 6.95, "learning_rate": 0.0005, "loss": 2.0494, "step": 88600 }, { "epoch": 6.96, "learning_rate": 0.0005, "loss": 2.0522, "step": 88700 }, { "epoch": 6.96, "learning_rate": 0.0005, "loss": 2.0315, "step": 88800 }, { "epoch": 6.97, "learning_rate": 0.0005, "loss": 2.0284, "step": 88900 }, { "epoch": 6.98, "learning_rate": 0.0005, "loss": 2.0619, "step": 89000 }, { "epoch": 6.99, "learning_rate": 0.0005, "loss": 2.0335, "step": 89100 }, { "epoch": 6.99, "learning_rate": 0.0005, "loss": 2.0259, "step": 89200 }, { "epoch": 7.0, "learning_rate": 0.0005, "loss": 2.0142, "step": 89300 }, { "epoch": 7.01, "learning_rate": 0.0005, "loss": 1.9777, "step": 89400 }, { "epoch": 7.02, "learning_rate": 0.0005, "loss": 1.9655, "step": 89500 }, { "epoch": 7.03, "learning_rate": 0.0005, "loss": 1.9457, "step": 89600 }, { "epoch": 7.03, "learning_rate": 0.0005, "loss": 1.9775, "step": 89700 }, { "epoch": 7.04, "learning_rate": 0.0005, "loss": 1.9603, "step": 89800 }, { "epoch": 7.05, "learning_rate": 0.0005, "loss": 1.9705, "step": 89900 }, { "epoch": 7.06, "learning_rate": 0.0005, "loss": 1.9568, "step": 90000 }, { "epoch": 7.06, "eval_gen_len": 18.745916835878873, "eval_loss": 2.060124635696411, "eval_rouge1": 34.3703, "eval_rouge2": 13.056, "eval_rougeL": 28.0273, "eval_rougeLsum": 28.0164, "eval_runtime": 360.3535, "eval_samples_per_second": 31.433, "eval_steps_per_second": 1.965, "step": 90000 }, { "epoch": 7.07, "learning_rate": 0.0005, "loss": 1.95, "step": 90100 }, { "epoch": 7.07, "learning_rate": 0.0005, "loss": 1.9641, "step": 90200 }, { "epoch": 7.08, "learning_rate": 0.0005, "loss": 1.9718, "step": 90300 }, { "epoch": 7.09, "learning_rate": 0.0005, "loss": 1.9298, "step": 90400 }, { "epoch": 7.1, "learning_rate": 0.0005, "loss": 1.926, "step": 90500 }, { "epoch": 7.1, "learning_rate": 0.0005, "loss": 1.9711, "step": 90600 }, { "epoch": 7.11, "learning_rate": 0.0005, "loss": 1.955, "step": 90700 }, { "epoch": 7.12, "learning_rate": 0.0005, "loss": 1.9411, "step": 90800 }, { "epoch": 7.13, "learning_rate": 0.0005, "loss": 1.9471, "step": 90900 }, { "epoch": 7.14, "learning_rate": 0.0005, "loss": 1.9949, "step": 91000 }, { "epoch": 7.14, "learning_rate": 0.0005, "loss": 1.9662, "step": 91100 }, { "epoch": 7.15, "learning_rate": 0.0005, "loss": 1.9512, "step": 91200 }, { "epoch": 7.16, "learning_rate": 0.0005, "loss": 1.9485, "step": 91300 }, { "epoch": 7.17, "learning_rate": 0.0005, "loss": 1.9587, "step": 91400 }, { "epoch": 7.18, "learning_rate": 0.0005, "loss": 2.0031, "step": 91500 }, { "epoch": 7.18, "learning_rate": 0.0005, "loss": 1.9903, "step": 91600 }, { "epoch": 7.19, "learning_rate": 0.0005, "loss": 1.9852, "step": 91700 }, { "epoch": 7.2, "learning_rate": 0.0005, "loss": 1.9856, "step": 91800 }, { "epoch": 7.21, "learning_rate": 0.0005, "loss": 1.9691, "step": 91900 }, { "epoch": 7.21, "learning_rate": 0.0005, "loss": 1.9728, "step": 92000 }, { "epoch": 7.22, "learning_rate": 0.0005, "loss": 1.9831, "step": 92100 }, { "epoch": 7.23, "learning_rate": 0.0005, "loss": 1.9617, "step": 92200 }, { "epoch": 7.24, "learning_rate": 0.0005, "loss": 1.9783, "step": 92300 }, { "epoch": 7.25, "learning_rate": 0.0005, "loss": 1.9817, "step": 92400 }, { "epoch": 7.25, "learning_rate": 0.0005, "loss": 1.9759, "step": 92500 }, { "epoch": 7.26, "learning_rate": 0.0005, "loss": 1.9912, "step": 92600 }, { "epoch": 7.27, "learning_rate": 0.0005, "loss": 1.9836, "step": 92700 }, { "epoch": 7.28, "learning_rate": 0.0005, "loss": 1.9792, "step": 92800 }, { "epoch": 7.29, "learning_rate": 0.0005, "loss": 1.9728, "step": 92900 }, { "epoch": 7.29, "learning_rate": 0.0005, "loss": 2.0051, "step": 93000 }, { "epoch": 7.3, "learning_rate": 0.0005, "loss": 1.9884, "step": 93100 }, { "epoch": 7.31, "learning_rate": 0.0005, "loss": 1.9679, "step": 93200 }, { "epoch": 7.32, "learning_rate": 0.0005, "loss": 1.9732, "step": 93300 }, { "epoch": 7.32, "learning_rate": 0.0005, "loss": 1.9627, "step": 93400 }, { "epoch": 7.33, "learning_rate": 0.0005, "loss": 1.9745, "step": 93500 }, { "epoch": 7.34, "learning_rate": 0.0005, "loss": 1.9982, "step": 93600 }, { "epoch": 7.35, "learning_rate": 0.0005, "loss": 1.9901, "step": 93700 }, { "epoch": 7.36, "learning_rate": 0.0005, "loss": 1.9544, "step": 93800 }, { "epoch": 7.36, "learning_rate": 0.0005, "loss": 1.9727, "step": 93900 }, { "epoch": 7.37, "learning_rate": 0.0005, "loss": 1.9721, "step": 94000 }, { "epoch": 7.38, "learning_rate": 0.0005, "loss": 1.9807, "step": 94100 }, { "epoch": 7.39, "learning_rate": 0.0005, "loss": 1.937, "step": 94200 }, { "epoch": 7.39, "learning_rate": 0.0005, "loss": 1.9661, "step": 94300 }, { "epoch": 7.4, "learning_rate": 0.0005, "loss": 1.9868, "step": 94400 }, { "epoch": 7.41, "learning_rate": 0.0005, "loss": 1.9727, "step": 94500 }, { "epoch": 7.42, "learning_rate": 0.0005, "loss": 1.9726, "step": 94600 }, { "epoch": 7.43, "learning_rate": 0.0005, "loss": 1.9914, "step": 94700 }, { "epoch": 7.43, "learning_rate": 0.0005, "loss": 1.9804, "step": 94800 }, { "epoch": 7.44, "learning_rate": 0.0005, "loss": 1.9533, "step": 94900 }, { "epoch": 7.45, "learning_rate": 0.0005, "loss": 1.9625, "step": 95000 }, { "epoch": 7.46, "learning_rate": 0.0005, "loss": 1.986, "step": 95100 }, { "epoch": 7.47, "learning_rate": 0.0005, "loss": 1.9745, "step": 95200 }, { "epoch": 7.47, "learning_rate": 0.0005, "loss": 2.0023, "step": 95300 }, { "epoch": 7.48, "learning_rate": 0.0005, "loss": 1.9857, "step": 95400 }, { "epoch": 7.49, "learning_rate": 0.0005, "loss": 1.9837, "step": 95500 }, { "epoch": 7.5, "learning_rate": 0.0005, "loss": 1.9924, "step": 95600 }, { "epoch": 7.5, "learning_rate": 0.0005, "loss": 1.9984, "step": 95700 }, { "epoch": 7.51, "learning_rate": 0.0005, "loss": 1.9832, "step": 95800 }, { "epoch": 7.52, "learning_rate": 0.0005, "loss": 1.9741, "step": 95900 }, { "epoch": 7.53, "learning_rate": 0.0005, "loss": 1.9719, "step": 96000 }, { "epoch": 7.54, "learning_rate": 0.0005, "loss": 1.9789, "step": 96100 }, { "epoch": 7.54, "learning_rate": 0.0005, "loss": 1.9968, "step": 96200 }, { "epoch": 7.55, "learning_rate": 0.0005, "loss": 1.9551, "step": 96300 }, { "epoch": 7.56, "learning_rate": 0.0005, "loss": 2.0159, "step": 96400 }, { "epoch": 7.57, "learning_rate": 0.0005, "loss": 1.9721, "step": 96500 }, { "epoch": 7.58, "learning_rate": 0.0005, "loss": 1.9896, "step": 96600 }, { "epoch": 7.58, "learning_rate": 0.0005, "loss": 2.004, "step": 96700 }, { "epoch": 7.59, "learning_rate": 0.0005, "loss": 1.9564, "step": 96800 }, { "epoch": 7.6, "learning_rate": 0.0005, "loss": 1.9491, "step": 96900 }, { "epoch": 7.61, "learning_rate": 0.0005, "loss": 1.9866, "step": 97000 }, { "epoch": 7.61, "learning_rate": 0.0005, "loss": 1.9746, "step": 97100 }, { "epoch": 7.62, "learning_rate": 0.0005, "loss": 1.9724, "step": 97200 }, { "epoch": 7.63, "learning_rate": 0.0005, "loss": 1.9936, "step": 97300 }, { "epoch": 7.64, "learning_rate": 0.0005, "loss": 1.9812, "step": 97400 }, { "epoch": 7.65, "learning_rate": 0.0005, "loss": 2.0067, "step": 97500 }, { "epoch": 7.65, "learning_rate": 0.0005, "loss": 1.9797, "step": 97600 }, { "epoch": 7.66, "learning_rate": 0.0005, "loss": 2.0005, "step": 97700 }, { "epoch": 7.67, "learning_rate": 0.0005, "loss": 1.9963, "step": 97800 }, { "epoch": 7.68, "learning_rate": 0.0005, "loss": 2.0044, "step": 97900 }, { "epoch": 7.69, "learning_rate": 0.0005, "loss": 1.9896, "step": 98000 }, { "epoch": 7.69, "learning_rate": 0.0005, "loss": 1.9853, "step": 98100 }, { "epoch": 7.7, "learning_rate": 0.0005, "loss": 1.9957, "step": 98200 }, { "epoch": 7.71, "learning_rate": 0.0005, "loss": 2.0128, "step": 98300 }, { "epoch": 7.72, "learning_rate": 0.0005, "loss": 1.9685, "step": 98400 }, { "epoch": 7.72, "learning_rate": 0.0005, "loss": 2.0051, "step": 98500 }, { "epoch": 7.73, "learning_rate": 0.0005, "loss": 1.9703, "step": 98600 }, { "epoch": 7.74, "learning_rate": 0.0005, "loss": 2.0022, "step": 98700 }, { "epoch": 7.75, "learning_rate": 0.0005, "loss": 2.0205, "step": 98800 }, { "epoch": 7.76, "learning_rate": 0.0005, "loss": 2.0008, "step": 98900 }, { "epoch": 7.76, "learning_rate": 0.0005, "loss": 2.0121, "step": 99000 }, { "epoch": 7.77, "learning_rate": 0.0005, "loss": 2.0063, "step": 99100 }, { "epoch": 7.78, "learning_rate": 0.0005, "loss": 1.9981, "step": 99200 }, { "epoch": 7.79, "learning_rate": 0.0005, "loss": 1.9838, "step": 99300 }, { "epoch": 7.79, "learning_rate": 0.0005, "loss": 1.9923, "step": 99400 }, { "epoch": 7.8, "learning_rate": 0.0005, "loss": 1.9959, "step": 99500 }, { "epoch": 7.81, "learning_rate": 0.0005, "loss": 1.9924, "step": 99600 }, { "epoch": 7.82, "learning_rate": 0.0005, "loss": 2.0133, "step": 99700 }, { "epoch": 7.83, "learning_rate": 0.0005, "loss": 1.9995, "step": 99800 }, { "epoch": 7.83, "learning_rate": 0.0005, "loss": 2.0026, "step": 99900 }, { "epoch": 7.84, "learning_rate": 0.0005, "loss": 1.9736, "step": 100000 }, { "epoch": 7.84, "eval_gen_len": 18.74485742032312, "eval_loss": 2.0384180545806885, "eval_rouge1": 34.1724, "eval_rouge2": 13.072, "eval_rougeL": 27.9429, "eval_rougeLsum": 27.9294, "eval_runtime": 358.3007, "eval_samples_per_second": 31.613, "eval_steps_per_second": 1.976, "step": 100000 }, { "epoch": 7.85, "learning_rate": 0.0005, "loss": 2.0139, "step": 100100 }, { "epoch": 7.86, "learning_rate": 0.0005, "loss": 2.0064, "step": 100200 }, { "epoch": 7.87, "learning_rate": 0.0005, "loss": 1.9845, "step": 100300 }, { "epoch": 7.87, "learning_rate": 0.0005, "loss": 1.9921, "step": 100400 }, { "epoch": 7.88, "learning_rate": 0.0005, "loss": 1.9755, "step": 100500 }, { "epoch": 7.89, "learning_rate": 0.0005, "loss": 2.0093, "step": 100600 }, { "epoch": 7.9, "learning_rate": 0.0005, "loss": 2.0127, "step": 100700 }, { "epoch": 7.9, "learning_rate": 0.0005, "loss": 1.9878, "step": 100800 }, { "epoch": 7.91, "learning_rate": 0.0005, "loss": 1.9925, "step": 100900 }, { "epoch": 7.92, "learning_rate": 0.0005, "loss": 1.9798, "step": 101000 }, { "epoch": 7.93, "learning_rate": 0.0005, "loss": 2.0125, "step": 101100 }, { "epoch": 7.94, "learning_rate": 0.0005, "loss": 2.012, "step": 101200 }, { "epoch": 7.94, "learning_rate": 0.0005, "loss": 1.9951, "step": 101300 }, { "epoch": 7.95, "learning_rate": 0.0005, "loss": 2.0181, "step": 101400 }, { "epoch": 7.96, "learning_rate": 0.0005, "loss": 2.0121, "step": 101500 }, { "epoch": 7.97, "learning_rate": 0.0005, "loss": 2.0009, "step": 101600 }, { "epoch": 7.98, "learning_rate": 0.0005, "loss": 1.9855, "step": 101700 }, { "epoch": 7.98, "learning_rate": 0.0005, "loss": 1.9993, "step": 101800 }, { "epoch": 7.99, "learning_rate": 0.0005, "loss": 1.9784, "step": 101900 }, { "epoch": 8.0, "learning_rate": 0.0005, "loss": 1.9794, "step": 102000 }, { "epoch": 8.01, "learning_rate": 0.0005, "loss": 1.9454, "step": 102100 }, { "epoch": 8.01, "learning_rate": 0.0005, "loss": 1.9057, "step": 102200 }, { "epoch": 8.02, "learning_rate": 0.0005, "loss": 1.912, "step": 102300 }, { "epoch": 8.03, "learning_rate": 0.0005, "loss": 1.9005, "step": 102400 }, { "epoch": 8.04, "learning_rate": 0.0005, "loss": 1.9037, "step": 102500 }, { "epoch": 8.05, "learning_rate": 0.0005, "loss": 1.9229, "step": 102600 }, { "epoch": 8.05, "learning_rate": 0.0005, "loss": 1.9096, "step": 102700 }, { "epoch": 8.06, "learning_rate": 0.0005, "loss": 1.9353, "step": 102800 }, { "epoch": 8.07, "learning_rate": 0.0005, "loss": 1.9332, "step": 102900 }, { "epoch": 8.08, "learning_rate": 0.0005, "loss": 1.9093, "step": 103000 }, { "epoch": 8.09, "learning_rate": 0.0005, "loss": 1.9085, "step": 103100 }, { "epoch": 8.09, "learning_rate": 0.0005, "loss": 1.9351, "step": 103200 }, { "epoch": 8.1, "learning_rate": 0.0005, "loss": 1.9295, "step": 103300 }, { "epoch": 8.11, "learning_rate": 0.0005, "loss": 1.919, "step": 103400 }, { "epoch": 8.12, "learning_rate": 0.0005, "loss": 1.9265, "step": 103500 }, { "epoch": 8.12, "learning_rate": 0.0005, "loss": 1.923, "step": 103600 }, { "epoch": 8.13, "learning_rate": 0.0005, "loss": 1.9284, "step": 103700 }, { "epoch": 8.14, "learning_rate": 0.0005, "loss": 1.9155, "step": 103800 }, { "epoch": 8.15, "learning_rate": 0.0005, "loss": 1.9245, "step": 103900 }, { "epoch": 8.16, "learning_rate": 0.0005, "loss": 1.9388, "step": 104000 }, { "epoch": 8.16, "learning_rate": 0.0005, "loss": 1.9247, "step": 104100 }, { "epoch": 8.17, "learning_rate": 0.0005, "loss": 1.9535, "step": 104200 }, { "epoch": 8.18, "learning_rate": 0.0005, "loss": 1.9312, "step": 104300 }, { "epoch": 8.19, "learning_rate": 0.0005, "loss": 1.924, "step": 104400 }, { "epoch": 8.19, "learning_rate": 0.0005, "loss": 1.944, "step": 104500 }, { "epoch": 8.2, "learning_rate": 0.0005, "loss": 1.9328, "step": 104600 }, { "epoch": 8.21, "learning_rate": 0.0005, "loss": 1.926, "step": 104700 }, { "epoch": 8.22, "learning_rate": 0.0005, "loss": 1.9264, "step": 104800 }, { "epoch": 8.23, "learning_rate": 0.0005, "loss": 1.92, "step": 104900 }, { "epoch": 8.23, "learning_rate": 0.0005, "loss": 1.94, "step": 105000 }, { "epoch": 8.24, "learning_rate": 0.0005, "loss": 1.9447, "step": 105100 }, { "epoch": 8.25, "learning_rate": 0.0005, "loss": 1.9307, "step": 105200 }, { "epoch": 8.26, "learning_rate": 0.0005, "loss": 1.9461, "step": 105300 }, { "epoch": 8.27, "learning_rate": 0.0005, "loss": 1.9426, "step": 105400 }, { "epoch": 8.27, "learning_rate": 0.0005, "loss": 1.9638, "step": 105500 }, { "epoch": 8.28, "learning_rate": 0.0005, "loss": 1.9381, "step": 105600 }, { "epoch": 8.29, "learning_rate": 0.0005, "loss": 1.9362, "step": 105700 }, { "epoch": 8.3, "learning_rate": 0.0005, "loss": 1.9646, "step": 105800 }, { "epoch": 8.3, "learning_rate": 0.0005, "loss": 1.9434, "step": 105900 }, { "epoch": 8.31, "learning_rate": 0.0005, "loss": 1.9305, "step": 106000 }, { "epoch": 8.32, "learning_rate": 0.0005, "loss": 1.9263, "step": 106100 }, { "epoch": 8.33, "learning_rate": 0.0005, "loss": 1.9568, "step": 106200 }, { "epoch": 8.34, "learning_rate": 0.0005, "loss": 1.941, "step": 106300 }, { "epoch": 8.34, "learning_rate": 0.0005, "loss": 1.9541, "step": 106400 }, { "epoch": 8.35, "learning_rate": 0.0005, "loss": 1.9446, "step": 106500 }, { "epoch": 8.36, "learning_rate": 0.0005, "loss": 1.9513, "step": 106600 }, { "epoch": 8.37, "learning_rate": 0.0005, "loss": 1.9394, "step": 106700 }, { "epoch": 8.38, "learning_rate": 0.0005, "loss": 1.9623, "step": 106800 }, { "epoch": 8.38, "learning_rate": 0.0005, "loss": 1.9638, "step": 106900 }, { "epoch": 8.39, "learning_rate": 0.0005, "loss": 1.9246, "step": 107000 }, { "epoch": 8.4, "learning_rate": 0.0005, "loss": 1.9464, "step": 107100 }, { "epoch": 8.41, "learning_rate": 0.0005, "loss": 1.9515, "step": 107200 }, { "epoch": 8.41, "learning_rate": 0.0005, "loss": 1.9604, "step": 107300 }, { "epoch": 8.42, "learning_rate": 0.0005, "loss": 1.9294, "step": 107400 }, { "epoch": 8.43, "learning_rate": 0.0005, "loss": 1.9376, "step": 107500 }, { "epoch": 8.44, "learning_rate": 0.0005, "loss": 1.9506, "step": 107600 }, { "epoch": 8.45, "learning_rate": 0.0005, "loss": 1.9304, "step": 107700 }, { "epoch": 8.45, "learning_rate": 0.0005, "loss": 1.958, "step": 107800 }, { "epoch": 8.46, "learning_rate": 0.0005, "loss": 1.9809, "step": 107900 }, { "epoch": 8.47, "learning_rate": 0.0005, "loss": 1.9446, "step": 108000 }, { "epoch": 8.48, "learning_rate": 0.0005, "loss": 1.9503, "step": 108100 }, { "epoch": 8.48, "learning_rate": 0.0005, "loss": 1.9452, "step": 108200 }, { "epoch": 8.49, "learning_rate": 0.0005, "loss": 1.9523, "step": 108300 }, { "epoch": 8.5, "learning_rate": 0.0005, "loss": 1.9206, "step": 108400 }, { "epoch": 8.51, "learning_rate": 0.0005, "loss": 1.958, "step": 108500 }, { "epoch": 8.52, "learning_rate": 0.0005, "loss": 1.9445, "step": 108600 }, { "epoch": 8.52, "learning_rate": 0.0005, "loss": 1.9558, "step": 108700 }, { "epoch": 8.53, "learning_rate": 0.0005, "loss": 1.9497, "step": 108800 }, { "epoch": 8.54, "learning_rate": 0.0005, "loss": 1.9523, "step": 108900 }, { "epoch": 8.55, "learning_rate": 0.0005, "loss": 1.9334, "step": 109000 }, { "epoch": 8.56, "learning_rate": 0.0005, "loss": 1.9342, "step": 109100 }, { "epoch": 8.56, "learning_rate": 0.0005, "loss": 1.9514, "step": 109200 }, { "epoch": 8.57, "learning_rate": 0.0005, "loss": 1.979, "step": 109300 }, { "epoch": 8.58, "learning_rate": 0.0005, "loss": 1.9424, "step": 109400 }, { "epoch": 8.59, "learning_rate": 0.0005, "loss": 1.9586, "step": 109500 }, { "epoch": 8.59, "learning_rate": 0.0005, "loss": 1.9227, "step": 109600 }, { "epoch": 8.6, "learning_rate": 0.0005, "loss": 1.9335, "step": 109700 }, { "epoch": 8.61, "learning_rate": 0.0005, "loss": 1.9582, "step": 109800 }, { "epoch": 8.62, "learning_rate": 0.0005, "loss": 1.9475, "step": 109900 }, { "epoch": 8.63, "learning_rate": 0.0005, "loss": 1.9571, "step": 110000 }, { "epoch": 8.63, "eval_gen_len": 18.777257879403194, "eval_loss": 2.0414655208587646, "eval_rouge1": 34.5053, "eval_rouge2": 13.283, "eval_rougeL": 28.19, "eval_rougeLsum": 28.1836, "eval_runtime": 360.6557, "eval_samples_per_second": 31.407, "eval_steps_per_second": 1.963, "step": 110000 }, { "epoch": 8.63, "learning_rate": 0.0005, "loss": 1.9556, "step": 110100 }, { "epoch": 8.64, "learning_rate": 0.0005, "loss": 1.9607, "step": 110200 }, { "epoch": 8.65, "learning_rate": 0.0005, "loss": 1.9676, "step": 110300 }, { "epoch": 8.66, "learning_rate": 0.0005, "loss": 1.9648, "step": 110400 }, { "epoch": 8.67, "learning_rate": 0.0005, "loss": 1.9451, "step": 110500 }, { "epoch": 8.67, "learning_rate": 0.0005, "loss": 1.9554, "step": 110600 }, { "epoch": 8.68, "learning_rate": 0.0005, "loss": 1.9645, "step": 110700 }, { "epoch": 8.69, "learning_rate": 0.0005, "loss": 1.9936, "step": 110800 }, { "epoch": 8.7, "learning_rate": 0.0005, "loss": 1.9629, "step": 110900 }, { "epoch": 8.7, "learning_rate": 0.0005, "loss": 1.9463, "step": 111000 }, { "epoch": 8.71, "learning_rate": 0.0005, "loss": 1.9464, "step": 111100 }, { "epoch": 8.72, "learning_rate": 0.0005, "loss": 1.9193, "step": 111200 }, { "epoch": 8.73, "learning_rate": 0.0005, "loss": 1.9795, "step": 111300 }, { "epoch": 8.74, "learning_rate": 0.0005, "loss": 1.9997, "step": 111400 }, { "epoch": 8.74, "learning_rate": 0.0005, "loss": 1.9712, "step": 111500 }, { "epoch": 8.75, "learning_rate": 0.0005, "loss": 1.9425, "step": 111600 }, { "epoch": 8.76, "learning_rate": 0.0005, "loss": 1.9544, "step": 111700 }, { "epoch": 8.77, "learning_rate": 0.0005, "loss": 1.9692, "step": 111800 }, { "epoch": 8.78, "learning_rate": 0.0005, "loss": 1.9595, "step": 111900 }, { "epoch": 8.78, "learning_rate": 0.0005, "loss": 1.974, "step": 112000 }, { "epoch": 8.79, "learning_rate": 0.0005, "loss": 1.9502, "step": 112100 }, { "epoch": 8.8, "learning_rate": 0.0005, "loss": 1.972, "step": 112200 }, { "epoch": 8.81, "learning_rate": 0.0005, "loss": 1.9803, "step": 112300 }, { "epoch": 8.81, "learning_rate": 0.0005, "loss": 1.9553, "step": 112400 }, { "epoch": 8.82, "learning_rate": 0.0005, "loss": 1.9793, "step": 112500 }, { "epoch": 8.83, "learning_rate": 0.0005, "loss": 1.9593, "step": 112600 }, { "epoch": 8.84, "learning_rate": 0.0005, "loss": 1.9687, "step": 112700 }, { "epoch": 8.85, "learning_rate": 0.0005, "loss": 1.949, "step": 112800 }, { "epoch": 8.85, "learning_rate": 0.0005, "loss": 1.9785, "step": 112900 }, { "epoch": 8.86, "learning_rate": 0.0005, "loss": 1.9625, "step": 113000 }, { "epoch": 8.87, "learning_rate": 0.0005, "loss": 1.9763, "step": 113100 }, { "epoch": 8.88, "learning_rate": 0.0005, "loss": 1.9741, "step": 113200 }, { "epoch": 8.88, "learning_rate": 0.0005, "loss": 1.9307, "step": 113300 }, { "epoch": 8.89, "learning_rate": 0.0005, "loss": 1.9471, "step": 113400 }, { "epoch": 8.9, "learning_rate": 0.0005, "loss": 1.9682, "step": 113500 }, { "epoch": 8.91, "learning_rate": 0.0005, "loss": 2.0082, "step": 113600 }, { "epoch": 8.92, "learning_rate": 0.0005, "loss": 1.9933, "step": 113700 }, { "epoch": 8.92, "learning_rate": 0.0005, "loss": 1.9674, "step": 113800 }, { "epoch": 8.93, "learning_rate": 0.0005, "loss": 1.9422, "step": 113900 }, { "epoch": 8.94, "learning_rate": 0.0005, "loss": 1.9523, "step": 114000 }, { "epoch": 8.95, "learning_rate": 0.0005, "loss": 1.9825, "step": 114100 }, { "epoch": 8.96, "learning_rate": 0.0005, "loss": 1.9663, "step": 114200 }, { "epoch": 8.96, "learning_rate": 0.0005, "loss": 1.9994, "step": 114300 }, { "epoch": 8.97, "learning_rate": 0.0005, "loss": 1.9455, "step": 114400 }, { "epoch": 8.98, "learning_rate": 0.0005, "loss": 1.9715, "step": 114500 }, { "epoch": 8.99, "learning_rate": 0.0005, "loss": 1.9657, "step": 114600 }, { "epoch": 8.99, "learning_rate": 0.0005, "loss": 1.9587, "step": 114700 }, { "epoch": 9.0, "learning_rate": 0.0005, "loss": 1.9472, "step": 114800 }, { "epoch": 9.01, "learning_rate": 0.0005, "loss": 1.8601, "step": 114900 }, { "epoch": 9.02, "learning_rate": 0.0005, "loss": 1.8969, "step": 115000 }, { "epoch": 9.03, "learning_rate": 0.0005, "loss": 1.8682, "step": 115100 }, { "epoch": 9.03, "learning_rate": 0.0005, "loss": 1.9005, "step": 115200 }, { "epoch": 9.04, "learning_rate": 0.0005, "loss": 1.8729, "step": 115300 }, { "epoch": 9.05, "learning_rate": 0.0005, "loss": 1.8889, "step": 115400 }, { "epoch": 9.06, "learning_rate": 0.0005, "loss": 1.8802, "step": 115500 }, { "epoch": 9.07, "learning_rate": 0.0005, "loss": 1.8617, "step": 115600 }, { "epoch": 9.07, "learning_rate": 0.0005, "loss": 1.8912, "step": 115700 }, { "epoch": 9.08, "learning_rate": 0.0005, "loss": 1.8954, "step": 115800 }, { "epoch": 9.09, "learning_rate": 0.0005, "loss": 1.9125, "step": 115900 }, { "epoch": 9.1, "learning_rate": 0.0005, "loss": 1.8967, "step": 116000 }, { "epoch": 9.1, "learning_rate": 0.0005, "loss": 1.8914, "step": 116100 }, { "epoch": 9.11, "learning_rate": 0.0005, "loss": 1.8867, "step": 116200 }, { "epoch": 9.12, "learning_rate": 0.0005, "loss": 1.9153, "step": 116300 }, { "epoch": 9.13, "learning_rate": 0.0005, "loss": 1.9004, "step": 116400 }, { "epoch": 9.14, "learning_rate": 0.0005, "loss": 1.8644, "step": 116500 }, { "epoch": 9.14, "learning_rate": 0.0005, "loss": 1.8856, "step": 116600 }, { "epoch": 9.15, "learning_rate": 0.0005, "loss": 1.8949, "step": 116700 }, { "epoch": 9.16, "learning_rate": 0.0005, "loss": 1.8782, "step": 116800 }, { "epoch": 9.17, "learning_rate": 0.0005, "loss": 1.8724, "step": 116900 }, { "epoch": 9.18, "learning_rate": 0.0005, "loss": 1.9133, "step": 117000 }, { "epoch": 9.18, "learning_rate": 0.0005, "loss": 1.8869, "step": 117100 }, { "epoch": 9.19, "learning_rate": 0.0005, "loss": 1.9042, "step": 117200 }, { "epoch": 9.2, "learning_rate": 0.0005, "loss": 1.8915, "step": 117300 }, { "epoch": 9.21, "learning_rate": 0.0005, "loss": 1.9083, "step": 117400 }, { "epoch": 9.21, "learning_rate": 0.0005, "loss": 1.9009, "step": 117500 }, { "epoch": 9.22, "learning_rate": 0.0005, "loss": 1.8974, "step": 117600 }, { "epoch": 9.23, "learning_rate": 0.0005, "loss": 1.9142, "step": 117700 }, { "epoch": 9.24, "learning_rate": 0.0005, "loss": 1.9136, "step": 117800 }, { "epoch": 9.25, "learning_rate": 0.0005, "loss": 1.9299, "step": 117900 }, { "epoch": 9.25, "learning_rate": 0.0005, "loss": 1.8972, "step": 118000 }, { "epoch": 9.26, "learning_rate": 0.0005, "loss": 1.9068, "step": 118100 }, { "epoch": 9.27, "learning_rate": 0.0005, "loss": 1.8926, "step": 118200 }, { "epoch": 9.28, "learning_rate": 0.0005, "loss": 1.9263, "step": 118300 }, { "epoch": 9.28, "learning_rate": 0.0005, "loss": 1.9052, "step": 118400 }, { "epoch": 9.29, "learning_rate": 0.0005, "loss": 1.9119, "step": 118500 }, { "epoch": 9.3, "learning_rate": 0.0005, "loss": 1.8979, "step": 118600 }, { "epoch": 9.31, "learning_rate": 0.0005, "loss": 1.8753, "step": 118700 }, { "epoch": 9.32, "learning_rate": 0.0005, "loss": 1.8867, "step": 118800 }, { "epoch": 9.32, "learning_rate": 0.0005, "loss": 1.8759, "step": 118900 }, { "epoch": 9.33, "learning_rate": 0.0005, "loss": 1.932, "step": 119000 }, { "epoch": 9.34, "learning_rate": 0.0005, "loss": 1.9213, "step": 119100 }, { "epoch": 9.35, "learning_rate": 0.0005, "loss": 1.9241, "step": 119200 }, { "epoch": 9.36, "learning_rate": 0.0005, "loss": 1.9039, "step": 119300 }, { "epoch": 9.36, "learning_rate": 0.0005, "loss": 1.9411, "step": 119400 }, { "epoch": 9.37, "learning_rate": 0.0005, "loss": 1.9071, "step": 119500 }, { "epoch": 9.38, "learning_rate": 0.0005, "loss": 1.9214, "step": 119600 }, { "epoch": 9.39, "learning_rate": 0.0005, "loss": 1.8785, "step": 119700 }, { "epoch": 9.39, "learning_rate": 0.0005, "loss": 1.9119, "step": 119800 }, { "epoch": 9.4, "learning_rate": 0.0005, "loss": 1.9278, "step": 119900 }, { "epoch": 9.41, "learning_rate": 0.0005, "loss": 1.9196, "step": 120000 }, { "epoch": 9.41, "eval_gen_len": 18.812042023483713, "eval_loss": 2.018998384475708, "eval_rouge1": 34.5454, "eval_rouge2": 13.3829, "eval_rougeL": 28.2239, "eval_rougeLsum": 28.2114, "eval_runtime": 359.993, "eval_samples_per_second": 31.465, "eval_steps_per_second": 1.967, "step": 120000 }, { "epoch": 9.42, "learning_rate": 0.0005, "loss": 1.9188, "step": 120100 }, { "epoch": 9.43, "learning_rate": 0.0005, "loss": 1.9056, "step": 120200 }, { "epoch": 9.43, "learning_rate": 0.0005, "loss": 1.9468, "step": 120300 }, { "epoch": 9.44, "learning_rate": 0.0005, "loss": 1.9084, "step": 120400 }, { "epoch": 9.45, "learning_rate": 0.0005, "loss": 1.9327, "step": 120500 }, { "epoch": 9.46, "learning_rate": 0.0005, "loss": 1.9426, "step": 120600 }, { "epoch": 9.47, "learning_rate": 0.0005, "loss": 1.9135, "step": 120700 }, { "epoch": 9.47, "learning_rate": 0.0005, "loss": 1.8908, "step": 120800 }, { "epoch": 9.48, "learning_rate": 0.0005, "loss": 1.9273, "step": 120900 }, { "epoch": 9.49, "learning_rate": 0.0005, "loss": 1.9364, "step": 121000 }, { "epoch": 9.5, "learning_rate": 0.0005, "loss": 1.906, "step": 121100 }, { "epoch": 9.5, "learning_rate": 0.0005, "loss": 1.9405, "step": 121200 }, { "epoch": 9.51, "learning_rate": 0.0005, "loss": 1.9018, "step": 121300 }, { "epoch": 9.52, "learning_rate": 0.0005, "loss": 1.9183, "step": 121400 }, { "epoch": 9.53, "learning_rate": 0.0005, "loss": 1.9258, "step": 121500 }, { "epoch": 9.54, "learning_rate": 0.0005, "loss": 1.8952, "step": 121600 }, { "epoch": 9.54, "learning_rate": 0.0005, "loss": 1.9412, "step": 121700 }, { "epoch": 9.55, "learning_rate": 0.0005, "loss": 1.9284, "step": 121800 }, { "epoch": 9.56, "learning_rate": 0.0005, "loss": 1.9197, "step": 121900 }, { "epoch": 9.57, "learning_rate": 0.0005, "loss": 1.9171, "step": 122000 }, { "epoch": 9.57, "learning_rate": 0.0005, "loss": 1.9278, "step": 122100 }, { "epoch": 9.58, "learning_rate": 0.0005, "loss": 1.9395, "step": 122200 }, { "epoch": 9.59, "learning_rate": 0.0005, "loss": 1.936, "step": 122300 }, { "epoch": 9.6, "learning_rate": 0.0005, "loss": 1.9, "step": 122400 }, { "epoch": 9.61, "learning_rate": 0.0005, "loss": 1.9328, "step": 122500 }, { "epoch": 9.61, "learning_rate": 0.0005, "loss": 1.9329, "step": 122600 }, { "epoch": 9.62, "learning_rate": 0.0005, "loss": 1.9407, "step": 122700 }, { "epoch": 9.63, "learning_rate": 0.0005, "loss": 1.9219, "step": 122800 }, { "epoch": 9.64, "learning_rate": 0.0005, "loss": 1.9634, "step": 122900 }, { "epoch": 9.65, "learning_rate": 0.0005, "loss": 1.9186, "step": 123000 }, { "epoch": 9.65, "learning_rate": 0.0005, "loss": 1.9361, "step": 123100 }, { "epoch": 9.66, "learning_rate": 0.0005, "loss": 1.9279, "step": 123200 }, { "epoch": 9.67, "learning_rate": 0.0005, "loss": 1.9251, "step": 123300 }, { "epoch": 9.68, "learning_rate": 0.0005, "loss": 1.9143, "step": 123400 }, { "epoch": 9.68, "learning_rate": 0.0005, "loss": 1.9565, "step": 123500 }, { "epoch": 9.69, "learning_rate": 0.0005, "loss": 1.9419, "step": 123600 }, { "epoch": 9.7, "learning_rate": 0.0005, "loss": 1.9147, "step": 123700 }, { "epoch": 9.71, "learning_rate": 0.0005, "loss": 1.8996, "step": 123800 }, { "epoch": 9.72, "learning_rate": 0.0005, "loss": 1.9299, "step": 123900 }, { "epoch": 9.72, "learning_rate": 0.0005, "loss": 1.9303, "step": 124000 }, { "epoch": 9.73, "learning_rate": 0.0005, "loss": 1.9447, "step": 124100 }, { "epoch": 9.74, "learning_rate": 0.0005, "loss": 1.9251, "step": 124200 }, { "epoch": 9.75, "learning_rate": 0.0005, "loss": 1.9279, "step": 124300 }, { "epoch": 9.76, "learning_rate": 0.0005, "loss": 1.8948, "step": 124400 }, { "epoch": 9.76, "learning_rate": 0.0005, "loss": 1.9331, "step": 124500 }, { "epoch": 9.77, "learning_rate": 0.0005, "loss": 1.9378, "step": 124600 }, { "epoch": 9.78, "learning_rate": 0.0005, "loss": 1.9366, "step": 124700 }, { "epoch": 9.79, "learning_rate": 0.0005, "loss": 1.9875, "step": 124800 }, { "epoch": 9.79, "learning_rate": 0.0005, "loss": 1.9354, "step": 124900 }, { "epoch": 9.8, "learning_rate": 0.0005, "loss": 1.9318, "step": 125000 }, { "epoch": 9.81, "learning_rate": 0.0005, "loss": 1.9058, "step": 125100 }, { "epoch": 9.82, "learning_rate": 0.0005, "loss": 1.9133, "step": 125200 }, { "epoch": 9.83, "learning_rate": 0.0005, "loss": 1.9353, "step": 125300 }, { "epoch": 9.83, "learning_rate": 0.0005, "loss": 1.9616, "step": 125400 }, { "epoch": 9.84, "learning_rate": 0.0005, "loss": 1.9395, "step": 125500 }, { "epoch": 9.85, "learning_rate": 0.0005, "loss": 1.9405, "step": 125600 }, { "epoch": 9.86, "learning_rate": 0.0005, "loss": 1.8969, "step": 125700 }, { "epoch": 9.87, "learning_rate": 0.0005, "loss": 1.9206, "step": 125800 }, { "epoch": 9.87, "learning_rate": 0.0005, "loss": 1.8971, "step": 125900 }, { "epoch": 9.88, "learning_rate": 0.0005, "loss": 1.912, "step": 126000 }, { "epoch": 9.89, "learning_rate": 0.0005, "loss": 1.9517, "step": 126100 }, { "epoch": 9.9, "learning_rate": 0.0005, "loss": 1.9252, "step": 126200 }, { "epoch": 9.9, "learning_rate": 0.0005, "loss": 1.9225, "step": 126300 }, { "epoch": 9.91, "learning_rate": 0.0005, "loss": 1.943, "step": 126400 }, { "epoch": 9.92, "learning_rate": 0.0005, "loss": 1.9287, "step": 126500 }, { "epoch": 9.93, "learning_rate": 0.0005, "loss": 1.9797, "step": 126600 }, { "epoch": 9.94, "learning_rate": 0.0005, "loss": 1.9319, "step": 126700 }, { "epoch": 9.94, "learning_rate": 0.0005, "loss": 1.9392, "step": 126800 }, { "epoch": 9.95, "learning_rate": 0.0005, "loss": 1.9354, "step": 126900 }, { "epoch": 9.96, "learning_rate": 0.0005, "loss": 1.9127, "step": 127000 }, { "epoch": 9.97, "learning_rate": 0.0005, "loss": 1.9682, "step": 127100 }, { "epoch": 9.97, "learning_rate": 0.0005, "loss": 1.9153, "step": 127200 }, { "epoch": 9.98, "learning_rate": 0.0005, "loss": 1.9246, "step": 127300 }, { "epoch": 9.99, "learning_rate": 0.0005, "loss": 1.9302, "step": 127400 }, { "epoch": 10.0, "learning_rate": 0.0005, "loss": 1.9188, "step": 127500 }, { "epoch": 10.01, "learning_rate": 0.0005, "loss": 1.9, "step": 127600 }, { "epoch": 10.01, "learning_rate": 0.0005, "loss": 1.847, "step": 127700 }, { "epoch": 10.02, "learning_rate": 0.0005, "loss": 1.8283, "step": 127800 }, { "epoch": 10.03, "learning_rate": 0.0005, "loss": 1.8143, "step": 127900 }, { "epoch": 10.04, "learning_rate": 0.0005, "loss": 1.8273, "step": 128000 }, { "epoch": 10.05, "learning_rate": 0.0005, "loss": 1.8709, "step": 128100 }, { "epoch": 10.05, "learning_rate": 0.0005, "loss": 1.8487, "step": 128200 }, { "epoch": 10.06, "learning_rate": 0.0005, "loss": 1.8281, "step": 128300 }, { "epoch": 10.07, "learning_rate": 0.0005, "loss": 1.8641, "step": 128400 }, { "epoch": 10.08, "learning_rate": 0.0005, "loss": 1.862, "step": 128500 }, { "epoch": 10.08, "learning_rate": 0.0005, "loss": 1.8973, "step": 128600 }, { "epoch": 10.09, "learning_rate": 0.0005, "loss": 1.8294, "step": 128700 }, { "epoch": 10.1, "learning_rate": 0.0005, "loss": 1.8656, "step": 128800 }, { "epoch": 10.11, "learning_rate": 0.0005, "loss": 1.8619, "step": 128900 }, { "epoch": 10.12, "learning_rate": 0.0005, "loss": 1.8493, "step": 129000 }, { "epoch": 10.12, "learning_rate": 0.0005, "loss": 1.8471, "step": 129100 }, { "epoch": 10.13, "learning_rate": 0.0005, "loss": 1.8663, "step": 129200 }, { "epoch": 10.14, "learning_rate": 0.0005, "loss": 1.8781, "step": 129300 }, { "epoch": 10.15, "learning_rate": 0.0005, "loss": 1.8959, "step": 129400 }, { "epoch": 10.16, "learning_rate": 0.0005, "loss": 1.8703, "step": 129500 }, { "epoch": 10.16, "learning_rate": 0.0005, "loss": 1.8461, "step": 129600 }, { "epoch": 10.17, "learning_rate": 0.0005, "loss": 1.8471, "step": 129700 }, { "epoch": 10.18, "learning_rate": 0.0005, "loss": 1.8793, "step": 129800 }, { "epoch": 10.19, "learning_rate": 0.0005, "loss": 1.9014, "step": 129900 }, { "epoch": 10.19, "learning_rate": 0.0005, "loss": 1.8524, "step": 130000 }, { "epoch": 10.19, "eval_gen_len": 18.745651981989937, "eval_loss": 2.035221815109253, "eval_rouge1": 34.7913, "eval_rouge2": 13.5607, "eval_rougeL": 28.4207, "eval_rougeLsum": 28.4117, "eval_runtime": 358.3534, "eval_samples_per_second": 31.608, "eval_steps_per_second": 1.976, "step": 130000 }, { "epoch": 10.2, "learning_rate": 0.0005, "loss": 1.8683, "step": 130100 }, { "epoch": 10.21, "learning_rate": 0.0005, "loss": 1.8492, "step": 130200 }, { "epoch": 10.22, "learning_rate": 0.0005, "loss": 1.8901, "step": 130300 }, { "epoch": 10.23, "learning_rate": 0.0005, "loss": 1.8592, "step": 130400 }, { "epoch": 10.23, "learning_rate": 0.0005, "loss": 1.8946, "step": 130500 }, { "epoch": 10.24, "learning_rate": 0.0005, "loss": 1.8495, "step": 130600 }, { "epoch": 10.25, "learning_rate": 0.0005, "loss": 1.8598, "step": 130700 }, { "epoch": 10.26, "learning_rate": 0.0005, "loss": 1.8755, "step": 130800 }, { "epoch": 10.27, "learning_rate": 0.0005, "loss": 1.8473, "step": 130900 }, { "epoch": 10.27, "learning_rate": 0.0005, "loss": 1.8724, "step": 131000 }, { "epoch": 10.28, "learning_rate": 0.0005, "loss": 1.8806, "step": 131100 }, { "epoch": 10.29, "learning_rate": 0.0005, "loss": 1.8776, "step": 131200 }, { "epoch": 10.3, "learning_rate": 0.0005, "loss": 1.8739, "step": 131300 }, { "epoch": 10.3, "learning_rate": 0.0005, "loss": 1.8831, "step": 131400 }, { "epoch": 10.31, "learning_rate": 0.0005, "loss": 1.8764, "step": 131500 }, { "epoch": 10.32, "learning_rate": 0.0005, "loss": 1.8965, "step": 131600 }, { "epoch": 10.33, "learning_rate": 0.0005, "loss": 1.8842, "step": 131700 }, { "epoch": 10.34, "learning_rate": 0.0005, "loss": 1.8578, "step": 131800 }, { "epoch": 10.34, "learning_rate": 0.0005, "loss": 1.8977, "step": 131900 }, { "epoch": 10.35, "learning_rate": 0.0005, "loss": 1.8717, "step": 132000 }, { "epoch": 10.36, "learning_rate": 0.0005, "loss": 1.8595, "step": 132100 }, { "epoch": 10.37, "learning_rate": 0.0005, "loss": 1.8855, "step": 132200 }, { "epoch": 10.37, "learning_rate": 0.0005, "loss": 1.9059, "step": 132300 }, { "epoch": 10.38, "learning_rate": 0.0005, "loss": 1.8721, "step": 132400 }, { "epoch": 10.39, "learning_rate": 0.0005, "loss": 1.9069, "step": 132500 }, { "epoch": 10.4, "learning_rate": 0.0005, "loss": 1.8871, "step": 132600 }, { "epoch": 10.41, "learning_rate": 0.0005, "loss": 1.888, "step": 132700 }, { "epoch": 10.41, "learning_rate": 0.0005, "loss": 1.8623, "step": 132800 }, { "epoch": 10.42, "learning_rate": 0.0005, "loss": 1.8942, "step": 132900 }, { "epoch": 10.43, "learning_rate": 0.0005, "loss": 1.9247, "step": 133000 }, { "epoch": 10.44, "learning_rate": 0.0005, "loss": 1.871, "step": 133100 }, { "epoch": 10.45, "learning_rate": 0.0005, "loss": 1.8971, "step": 133200 }, { "epoch": 10.45, "learning_rate": 0.0005, "loss": 1.8707, "step": 133300 }, { "epoch": 10.46, "learning_rate": 0.0005, "loss": 1.8797, "step": 133400 }, { "epoch": 10.47, "learning_rate": 0.0005, "loss": 1.8896, "step": 133500 }, { "epoch": 10.48, "learning_rate": 0.0005, "loss": 1.878, "step": 133600 }, { "epoch": 10.48, "learning_rate": 0.0005, "loss": 1.8719, "step": 133700 }, { "epoch": 10.49, "learning_rate": 0.0005, "loss": 1.8828, "step": 133800 }, { "epoch": 10.5, "learning_rate": 0.0005, "loss": 1.91, "step": 133900 }, { "epoch": 10.51, "learning_rate": 0.0005, "loss": 1.8776, "step": 134000 }, { "epoch": 10.52, "learning_rate": 0.0005, "loss": 1.8828, "step": 134100 }, { "epoch": 10.52, "learning_rate": 0.0005, "loss": 1.8739, "step": 134200 }, { "epoch": 10.53, "learning_rate": 0.0005, "loss": 1.9038, "step": 134300 }, { "epoch": 10.54, "learning_rate": 0.0005, "loss": 1.9058, "step": 134400 }, { "epoch": 10.55, "learning_rate": 0.0005, "loss": 1.8967, "step": 134500 }, { "epoch": 10.56, "learning_rate": 0.0005, "loss": 1.9057, "step": 134600 }, { "epoch": 10.56, "learning_rate": 0.0005, "loss": 1.8868, "step": 134700 }, { "epoch": 10.57, "learning_rate": 0.0005, "loss": 1.8957, "step": 134800 }, { "epoch": 10.58, "learning_rate": 0.0005, "loss": 1.8816, "step": 134900 }, { "epoch": 10.59, "learning_rate": 0.0005, "loss": 1.8901, "step": 135000 }, { "epoch": 10.59, "learning_rate": 0.0005, "loss": 1.8964, "step": 135100 }, { "epoch": 10.6, "learning_rate": 0.0005, "loss": 1.8975, "step": 135200 }, { "epoch": 10.61, "learning_rate": 0.0005, "loss": 1.9138, "step": 135300 }, { "epoch": 10.62, "learning_rate": 0.0005, "loss": 1.9164, "step": 135400 }, { "epoch": 10.63, "learning_rate": 0.0005, "loss": 1.8969, "step": 135500 }, { "epoch": 10.63, "learning_rate": 0.0005, "loss": 1.9051, "step": 135600 }, { "epoch": 10.64, "learning_rate": 0.0005, "loss": 1.8947, "step": 135700 }, { "epoch": 10.65, "learning_rate": 0.0005, "loss": 1.9156, "step": 135800 }, { "epoch": 10.66, "learning_rate": 0.0005, "loss": 1.906, "step": 135900 }, { "epoch": 10.66, "learning_rate": 0.0005, "loss": 1.8948, "step": 136000 }, { "epoch": 10.67, "learning_rate": 0.0005, "loss": 1.8997, "step": 136100 }, { "epoch": 10.68, "learning_rate": 0.0005, "loss": 1.9112, "step": 136200 }, { "epoch": 10.69, "learning_rate": 0.0005, "loss": 1.9091, "step": 136300 }, { "epoch": 10.7, "learning_rate": 0.0005, "loss": 1.9083, "step": 136400 }, { "epoch": 10.7, "learning_rate": 0.0005, "loss": 1.9142, "step": 136500 }, { "epoch": 10.71, "learning_rate": 0.0005, "loss": 1.9074, "step": 136600 }, { "epoch": 10.72, "learning_rate": 0.0005, "loss": 1.8614, "step": 136700 }, { "epoch": 10.73, "learning_rate": 0.0005, "loss": 1.8908, "step": 136800 }, { "epoch": 10.74, "learning_rate": 0.0005, "loss": 1.9128, "step": 136900 }, { "epoch": 10.74, "learning_rate": 0.0005, "loss": 1.9006, "step": 137000 }, { "epoch": 10.75, "learning_rate": 0.0005, "loss": 1.8909, "step": 137100 }, { "epoch": 10.76, "learning_rate": 0.0005, "loss": 1.8812, "step": 137200 }, { "epoch": 10.77, "learning_rate": 0.0005, "loss": 1.9118, "step": 137300 }, { "epoch": 10.77, "learning_rate": 0.0005, "loss": 1.8966, "step": 137400 }, { "epoch": 10.78, "learning_rate": 0.0005, "loss": 1.9206, "step": 137500 }, { "epoch": 10.79, "learning_rate": 0.0005, "loss": 1.9065, "step": 137600 }, { "epoch": 10.8, "learning_rate": 0.0005, "loss": 1.8795, "step": 137700 }, { "epoch": 10.81, "learning_rate": 0.0005, "loss": 1.9157, "step": 137800 }, { "epoch": 10.81, "learning_rate": 0.0005, "loss": 1.9352, "step": 137900 }, { "epoch": 10.82, "learning_rate": 0.0005, "loss": 1.8882, "step": 138000 }, { "epoch": 10.83, "learning_rate": 0.0005, "loss": 1.8767, "step": 138100 }, { "epoch": 10.84, "learning_rate": 0.0005, "loss": 1.8922, "step": 138200 }, { "epoch": 10.85, "learning_rate": 0.0005, "loss": 1.902, "step": 138300 }, { "epoch": 10.85, "learning_rate": 0.0005, "loss": 1.9059, "step": 138400 }, { "epoch": 10.86, "learning_rate": 0.0005, "loss": 1.9211, "step": 138500 }, { "epoch": 10.87, "learning_rate": 0.0005, "loss": 1.8916, "step": 138600 }, { "epoch": 10.88, "learning_rate": 0.0005, "loss": 1.936, "step": 138700 }, { "epoch": 10.88, "learning_rate": 0.0005, "loss": 1.9028, "step": 138800 }, { "epoch": 10.89, "learning_rate": 0.0005, "loss": 1.9066, "step": 138900 }, { "epoch": 10.9, "learning_rate": 0.0005, "loss": 1.8575, "step": 139000 }, { "epoch": 10.91, "learning_rate": 0.0005, "loss": 1.9116, "step": 139100 }, { "epoch": 10.92, "learning_rate": 0.0005, "loss": 1.9183, "step": 139200 }, { "epoch": 10.92, "learning_rate": 0.0005, "loss": 1.8794, "step": 139300 }, { "epoch": 10.93, "learning_rate": 0.0005, "loss": 1.9203, "step": 139400 }, { "epoch": 10.94, "learning_rate": 0.0005, "loss": 1.9218, "step": 139500 }, { "epoch": 10.95, "learning_rate": 0.0005, "loss": 1.9005, "step": 139600 }, { "epoch": 10.96, "learning_rate": 0.0005, "loss": 1.9146, "step": 139700 }, { "epoch": 10.96, "learning_rate": 0.0005, "loss": 1.9101, "step": 139800 }, { "epoch": 10.97, "learning_rate": 0.0005, "loss": 1.9226, "step": 139900 }, { "epoch": 10.98, "learning_rate": 0.0005, "loss": 1.8968, "step": 140000 }, { "epoch": 10.98, "eval_gen_len": 18.780524410700096, "eval_loss": 2.0128087997436523, "eval_rouge1": 34.8079, "eval_rouge2": 13.6337, "eval_rougeL": 28.4248, "eval_rougeLsum": 28.4272, "eval_runtime": 363.0976, "eval_samples_per_second": 31.195, "eval_steps_per_second": 1.95, "step": 140000 }, { "epoch": 10.99, "learning_rate": 0.0005, "loss": 1.908, "step": 140100 }, { "epoch": 10.99, "learning_rate": 0.0005, "loss": 1.9198, "step": 140200 }, { "epoch": 11.0, "learning_rate": 0.0005, "loss": 1.8756, "step": 140300 }, { "epoch": 11.01, "learning_rate": 0.0005, "loss": 1.842, "step": 140400 }, { "epoch": 11.02, "learning_rate": 0.0005, "loss": 1.8369, "step": 140500 }, { "epoch": 11.03, "learning_rate": 0.0005, "loss": 1.7975, "step": 140600 }, { "epoch": 11.03, "learning_rate": 0.0005, "loss": 1.8204, "step": 140700 }, { "epoch": 11.04, "learning_rate": 0.0005, "loss": 1.8019, "step": 140800 }, { "epoch": 11.05, "learning_rate": 0.0005, "loss": 1.8289, "step": 140900 }, { "epoch": 11.06, "learning_rate": 0.0005, "loss": 1.8119, "step": 141000 }, { "epoch": 11.06, "learning_rate": 0.0005, "loss": 1.8118, "step": 141100 }, { "epoch": 11.07, "learning_rate": 0.0005, "loss": 1.8111, "step": 141200 }, { "epoch": 11.08, "learning_rate": 0.0005, "loss": 1.8147, "step": 141300 }, { "epoch": 11.09, "learning_rate": 0.0005, "loss": 1.8222, "step": 141400 }, { "epoch": 11.1, "learning_rate": 0.0005, "loss": 1.8397, "step": 141500 }, { "epoch": 11.1, "learning_rate": 0.0005, "loss": 1.8353, "step": 141600 }, { "epoch": 11.11, "learning_rate": 0.0005, "loss": 1.854, "step": 141700 }, { "epoch": 11.12, "learning_rate": 0.0005, "loss": 1.8107, "step": 141800 }, { "epoch": 11.13, "learning_rate": 0.0005, "loss": 1.8227, "step": 141900 }, { "epoch": 11.14, "learning_rate": 0.0005, "loss": 1.8332, "step": 142000 }, { "epoch": 11.14, "learning_rate": 0.0005, "loss": 1.8161, "step": 142100 }, { "epoch": 11.15, "learning_rate": 0.0005, "loss": 1.8332, "step": 142200 }, { "epoch": 11.16, "learning_rate": 0.0005, "loss": 1.8283, "step": 142300 }, { "epoch": 11.17, "learning_rate": 0.0005, "loss": 1.8493, "step": 142400 }, { "epoch": 11.17, "learning_rate": 0.0005, "loss": 1.8284, "step": 142500 }, { "epoch": 11.18, "learning_rate": 0.0005, "loss": 1.8085, "step": 142600 }, { "epoch": 11.19, "learning_rate": 0.0005, "loss": 1.8523, "step": 142700 }, { "epoch": 11.2, "learning_rate": 0.0005, "loss": 1.8412, "step": 142800 }, { "epoch": 11.21, "learning_rate": 0.0005, "loss": 1.8628, "step": 142900 }, { "epoch": 11.21, "learning_rate": 0.0005, "loss": 1.8296, "step": 143000 }, { "epoch": 11.22, "learning_rate": 0.0005, "loss": 1.8591, "step": 143100 }, { "epoch": 11.23, "learning_rate": 0.0005, "loss": 1.8804, "step": 143200 }, { "epoch": 11.24, "learning_rate": 0.0005, "loss": 1.8345, "step": 143300 }, { "epoch": 11.25, "learning_rate": 0.0005, "loss": 1.8262, "step": 143400 }, { "epoch": 11.25, "learning_rate": 0.0005, "loss": 1.8762, "step": 143500 }, { "epoch": 11.26, "learning_rate": 0.0005, "loss": 1.8287, "step": 143600 }, { "epoch": 11.27, "learning_rate": 0.0005, "loss": 1.8452, "step": 143700 }, { "epoch": 11.28, "learning_rate": 0.0005, "loss": 1.8575, "step": 143800 }, { "epoch": 11.28, "learning_rate": 0.0005, "loss": 1.8701, "step": 143900 }, { "epoch": 11.29, "learning_rate": 0.0005, "loss": 1.8291, "step": 144000 }, { "epoch": 11.3, "learning_rate": 0.0005, "loss": 1.8768, "step": 144100 }, { "epoch": 11.31, "learning_rate": 0.0005, "loss": 1.8742, "step": 144200 }, { "epoch": 11.32, "learning_rate": 0.0005, "loss": 1.8481, "step": 144300 }, { "epoch": 11.32, "learning_rate": 0.0005, "loss": 1.8406, "step": 144400 }, { "epoch": 11.33, "learning_rate": 0.0005, "loss": 1.8386, "step": 144500 }, { "epoch": 11.34, "learning_rate": 0.0005, "loss": 1.8815, "step": 144600 }, { "epoch": 11.35, "learning_rate": 0.0005, "loss": 1.8318, "step": 144700 }, { "epoch": 11.36, "learning_rate": 0.0005, "loss": 1.8546, "step": 144800 }, { "epoch": 11.36, "learning_rate": 0.0005, "loss": 1.889, "step": 144900 }, { "epoch": 11.37, "learning_rate": 0.0005, "loss": 1.8426, "step": 145000 }, { "epoch": 11.38, "learning_rate": 0.0005, "loss": 1.8647, "step": 145100 }, { "epoch": 11.39, "learning_rate": 0.0005, "loss": 1.8627, "step": 145200 }, { "epoch": 11.39, "learning_rate": 0.0005, "loss": 1.8513, "step": 145300 }, { "epoch": 11.4, "learning_rate": 0.0005, "loss": 1.8939, "step": 145400 }, { "epoch": 11.41, "learning_rate": 0.0005, "loss": 1.8071, "step": 145500 }, { "epoch": 11.42, "learning_rate": 0.0005, "loss": 1.871, "step": 145600 }, { "epoch": 11.43, "learning_rate": 0.0005, "loss": 1.8685, "step": 145700 }, { "epoch": 11.43, "learning_rate": 0.0005, "loss": 1.8702, "step": 145800 }, { "epoch": 11.44, "learning_rate": 0.0005, "loss": 1.8751, "step": 145900 }, { "epoch": 11.45, "learning_rate": 0.0005, "loss": 1.8614, "step": 146000 }, { "epoch": 11.46, "learning_rate": 0.0005, "loss": 1.8781, "step": 146100 }, { "epoch": 11.46, "learning_rate": 0.0005, "loss": 1.8487, "step": 146200 }, { "epoch": 11.47, "learning_rate": 0.0005, "loss": 1.829, "step": 146300 }, { "epoch": 11.48, "learning_rate": 0.0005, "loss": 1.8417, "step": 146400 }, { "epoch": 11.49, "learning_rate": 0.0005, "loss": 1.8634, "step": 146500 }, { "epoch": 11.5, "learning_rate": 0.0005, "loss": 1.8713, "step": 146600 }, { "epoch": 11.5, "learning_rate": 0.0005, "loss": 1.8597, "step": 146700 }, { "epoch": 11.51, "learning_rate": 0.0005, "loss": 1.8772, "step": 146800 }, { "epoch": 11.52, "learning_rate": 0.0005, "loss": 1.8325, "step": 146900 }, { "epoch": 11.53, "learning_rate": 0.0005, "loss": 1.8753, "step": 147000 }, { "epoch": 11.54, "learning_rate": 0.0005, "loss": 1.8868, "step": 147100 }, { "epoch": 11.54, "learning_rate": 0.0005, "loss": 1.8634, "step": 147200 }, { "epoch": 11.55, "learning_rate": 0.0005, "loss": 1.8759, "step": 147300 }, { "epoch": 11.56, "learning_rate": 0.0005, "loss": 1.8571, "step": 147400 }, { "epoch": 11.57, "learning_rate": 0.0005, "loss": 1.8848, "step": 147500 }, { "epoch": 11.57, "learning_rate": 0.0005, "loss": 1.8947, "step": 147600 }, { "epoch": 11.58, "learning_rate": 0.0005, "loss": 1.8594, "step": 147700 }, { "epoch": 11.59, "learning_rate": 0.0005, "loss": 1.8686, "step": 147800 }, { "epoch": 11.6, "learning_rate": 0.0005, "loss": 1.8718, "step": 147900 }, { "epoch": 11.61, "learning_rate": 0.0005, "loss": 1.8632, "step": 148000 }, { "epoch": 11.61, "learning_rate": 0.0005, "loss": 1.8723, "step": 148100 }, { "epoch": 11.62, "learning_rate": 0.0005, "loss": 1.845, "step": 148200 }, { "epoch": 11.63, "learning_rate": 0.0005, "loss": 1.9104, "step": 148300 }, { "epoch": 11.64, "learning_rate": 0.0005, "loss": 1.8627, "step": 148400 }, { "epoch": 11.65, "learning_rate": 0.0005, "loss": 1.86, "step": 148500 }, { "epoch": 11.65, "learning_rate": 0.0005, "loss": 1.8749, "step": 148600 }, { "epoch": 11.66, "learning_rate": 0.0005, "loss": 1.8809, "step": 148700 }, { "epoch": 11.67, "learning_rate": 0.0005, "loss": 1.8597, "step": 148800 }, { "epoch": 11.68, "learning_rate": 0.0005, "loss": 1.8427, "step": 148900 }, { "epoch": 11.68, "learning_rate": 0.0005, "loss": 1.8585, "step": 149000 }, { "epoch": 11.69, "learning_rate": 0.0005, "loss": 1.8673, "step": 149100 }, { "epoch": 11.7, "learning_rate": 0.0005, "loss": 1.871, "step": 149200 }, { "epoch": 11.71, "learning_rate": 0.0005, "loss": 1.8847, "step": 149300 }, { "epoch": 11.72, "learning_rate": 0.0005, "loss": 1.8439, "step": 149400 }, { "epoch": 11.72, "learning_rate": 0.0005, "loss": 1.8545, "step": 149500 }, { "epoch": 11.73, "learning_rate": 0.0005, "loss": 1.873, "step": 149600 }, { "epoch": 11.74, "learning_rate": 0.0005, "loss": 1.8647, "step": 149700 }, { "epoch": 11.75, "learning_rate": 0.0005, "loss": 1.8725, "step": 149800 }, { "epoch": 11.76, "learning_rate": 0.0005, "loss": 1.8905, "step": 149900 }, { "epoch": 11.76, "learning_rate": 0.0005, "loss": 1.8758, "step": 150000 }, { "epoch": 11.76, "eval_gen_len": 18.80771607663106, "eval_loss": 2.0203864574432373, "eval_rouge1": 34.6581, "eval_rouge2": 13.5851, "eval_rougeL": 28.3861, "eval_rougeLsum": 28.3839, "eval_runtime": 370.8862, "eval_samples_per_second": 30.54, "eval_steps_per_second": 1.909, "step": 150000 }, { "epoch": 11.77, "learning_rate": 0.0005, "loss": 1.8741, "step": 150100 }, { "epoch": 11.78, "learning_rate": 0.0005, "loss": 1.8762, "step": 150200 }, { "epoch": 11.79, "learning_rate": 0.0005, "loss": 1.8504, "step": 150300 }, { "epoch": 11.79, "learning_rate": 0.0005, "loss": 1.8605, "step": 150400 }, { "epoch": 11.8, "learning_rate": 0.0005, "loss": 1.8651, "step": 150500 }, { "epoch": 11.81, "learning_rate": 0.0005, "loss": 1.8913, "step": 150600 }, { "epoch": 11.82, "learning_rate": 0.0005, "loss": 1.9204, "step": 150700 }, { "epoch": 11.83, "learning_rate": 0.0005, "loss": 1.8737, "step": 150800 }, { "epoch": 11.83, "learning_rate": 0.0005, "loss": 1.8794, "step": 150900 }, { "epoch": 11.84, "learning_rate": 0.0005, "loss": 1.9059, "step": 151000 }, { "epoch": 11.85, "learning_rate": 0.0005, "loss": 1.9089, "step": 151100 }, { "epoch": 11.86, "learning_rate": 0.0005, "loss": 1.8743, "step": 151200 }, { "epoch": 11.86, "learning_rate": 0.0005, "loss": 1.8677, "step": 151300 }, { "epoch": 11.87, "learning_rate": 0.0005, "loss": 1.8821, "step": 151400 }, { "epoch": 11.88, "learning_rate": 0.0005, "loss": 1.8667, "step": 151500 }, { "epoch": 11.89, "learning_rate": 0.0005, "loss": 1.8949, "step": 151600 }, { "epoch": 11.9, "learning_rate": 0.0005, "loss": 1.8714, "step": 151700 }, { "epoch": 11.9, "learning_rate": 0.0005, "loss": 1.8637, "step": 151800 }, { "epoch": 11.91, "learning_rate": 0.0005, "loss": 1.8878, "step": 151900 }, { "epoch": 11.92, "learning_rate": 0.0005, "loss": 1.8625, "step": 152000 }, { "epoch": 11.93, "learning_rate": 0.0005, "loss": 1.8858, "step": 152100 }, { "epoch": 11.94, "learning_rate": 0.0005, "loss": 1.8811, "step": 152200 }, { "epoch": 11.94, "learning_rate": 0.0005, "loss": 1.8735, "step": 152300 }, { "epoch": 11.95, "learning_rate": 0.0005, "loss": 1.8728, "step": 152400 }, { "epoch": 11.96, "learning_rate": 0.0005, "loss": 1.8751, "step": 152500 }, { "epoch": 11.97, "learning_rate": 0.0005, "loss": 1.8996, "step": 152600 }, { "epoch": 11.97, "learning_rate": 0.0005, "loss": 1.9027, "step": 152700 }, { "epoch": 11.98, "learning_rate": 0.0005, "loss": 1.8875, "step": 152800 }, { "epoch": 11.99, "learning_rate": 0.0005, "loss": 1.8858, "step": 152900 }, { "epoch": 12.0, "learning_rate": 0.0005, "loss": 1.8822, "step": 153000 }, { "epoch": 12.01, "learning_rate": 0.0005, "loss": 1.8282, "step": 153100 }, { "epoch": 12.01, "learning_rate": 0.0005, "loss": 1.7881, "step": 153200 }, { "epoch": 12.02, "learning_rate": 0.0005, "loss": 1.7834, "step": 153300 }, { "epoch": 12.03, "learning_rate": 0.0005, "loss": 1.7981, "step": 153400 }, { "epoch": 12.04, "learning_rate": 0.0005, "loss": 1.796, "step": 153500 }, { "epoch": 12.05, "learning_rate": 0.0005, "loss": 1.7744, "step": 153600 }, { "epoch": 12.05, "learning_rate": 0.0005, "loss": 1.7785, "step": 153700 }, { "epoch": 12.06, "learning_rate": 0.0005, "loss": 1.7937, "step": 153800 }, { "epoch": 12.07, "learning_rate": 0.0005, "loss": 1.7746, "step": 153900 }, { "epoch": 12.08, "learning_rate": 0.0005, "loss": 1.8114, "step": 154000 }, { "epoch": 12.08, "learning_rate": 0.0005, "loss": 1.814, "step": 154100 }, { "epoch": 12.09, "learning_rate": 0.0005, "loss": 1.7709, "step": 154200 }, { "epoch": 12.1, "learning_rate": 0.0005, "loss": 1.7994, "step": 154300 }, { "epoch": 12.11, "learning_rate": 0.0005, "loss": 1.8088, "step": 154400 }, { "epoch": 12.12, "learning_rate": 0.0005, "loss": 1.8016, "step": 154500 }, { "epoch": 12.12, "learning_rate": 0.0005, "loss": 1.8205, "step": 154600 }, { "epoch": 12.13, "learning_rate": 0.0005, "loss": 1.826, "step": 154700 }, { "epoch": 12.14, "learning_rate": 0.0005, "loss": 1.8543, "step": 154800 }, { "epoch": 12.15, "learning_rate": 0.0005, "loss": 1.8301, "step": 154900 }, { "epoch": 12.15, "learning_rate": 0.0005, "loss": 1.797, "step": 155000 }, { "epoch": 12.16, "learning_rate": 0.0005, "loss": 1.8202, "step": 155100 }, { "epoch": 12.17, "learning_rate": 0.0005, "loss": 1.8305, "step": 155200 }, { "epoch": 12.18, "learning_rate": 0.0005, "loss": 1.8421, "step": 155300 }, { "epoch": 12.19, "learning_rate": 0.0005, "loss": 1.8219, "step": 155400 }, { "epoch": 12.19, "learning_rate": 0.0005, "loss": 1.848, "step": 155500 }, { "epoch": 12.2, "learning_rate": 0.0005, "loss": 1.8103, "step": 155600 }, { "epoch": 12.21, "learning_rate": 0.0005, "loss": 1.8199, "step": 155700 }, { "epoch": 12.22, "learning_rate": 0.0005, "loss": 1.8423, "step": 155800 }, { "epoch": 12.23, "learning_rate": 0.0005, "loss": 1.8153, "step": 155900 }, { "epoch": 12.23, "learning_rate": 0.0005, "loss": 1.8072, "step": 156000 }, { "epoch": 12.24, "learning_rate": 0.0005, "loss": 1.8351, "step": 156100 }, { "epoch": 12.25, "learning_rate": 0.0005, "loss": 1.8175, "step": 156200 }, { "epoch": 12.26, "learning_rate": 0.0005, "loss": 1.8411, "step": 156300 }, { "epoch": 12.26, "learning_rate": 0.0005, "loss": 1.8558, "step": 156400 }, { "epoch": 12.27, "learning_rate": 0.0005, "loss": 1.83, "step": 156500 }, { "epoch": 12.28, "learning_rate": 0.0005, "loss": 1.8168, "step": 156600 }, { "epoch": 12.29, "learning_rate": 0.0005, "loss": 1.8293, "step": 156700 }, { "epoch": 12.3, "learning_rate": 0.0005, "loss": 1.8293, "step": 156800 }, { "epoch": 12.3, "learning_rate": 0.0005, "loss": 1.8264, "step": 156900 }, { "epoch": 12.31, "learning_rate": 0.0005, "loss": 1.832, "step": 157000 }, { "epoch": 12.32, "learning_rate": 0.0005, "loss": 1.8167, "step": 157100 }, { "epoch": 12.33, "learning_rate": 0.0005, "loss": 1.8483, "step": 157200 }, { "epoch": 12.34, "learning_rate": 0.0005, "loss": 1.836, "step": 157300 }, { "epoch": 12.34, "learning_rate": 0.0005, "loss": 1.8409, "step": 157400 }, { "epoch": 12.35, "learning_rate": 0.0005, "loss": 1.8207, "step": 157500 }, { "epoch": 12.36, "learning_rate": 0.0005, "loss": 1.8434, "step": 157600 }, { "epoch": 12.37, "learning_rate": 0.0005, "loss": 1.8394, "step": 157700 }, { "epoch": 12.37, "learning_rate": 0.0005, "loss": 1.8232, "step": 157800 }, { "epoch": 12.38, "learning_rate": 0.0005, "loss": 1.8328, "step": 157900 }, { "epoch": 12.39, "learning_rate": 0.0005, "loss": 1.8358, "step": 158000 }, { "epoch": 12.4, "learning_rate": 0.0005, "loss": 1.8278, "step": 158100 }, { "epoch": 12.41, "learning_rate": 0.0005, "loss": 1.7989, "step": 158200 }, { "epoch": 12.41, "learning_rate": 0.0005, "loss": 1.8207, "step": 158300 }, { "epoch": 12.42, "learning_rate": 0.0005, "loss": 1.8147, "step": 158400 }, { "epoch": 12.43, "learning_rate": 0.0005, "loss": 1.8319, "step": 158500 }, { "epoch": 12.44, "learning_rate": 0.0005, "loss": 1.823, "step": 158600 }, { "epoch": 12.45, "learning_rate": 0.0005, "loss": 1.8673, "step": 158700 }, { "epoch": 12.45, "learning_rate": 0.0005, "loss": 1.8437, "step": 158800 }, { "epoch": 12.46, "learning_rate": 0.0005, "loss": 1.8654, "step": 158900 }, { "epoch": 12.47, "learning_rate": 0.0005, "loss": 1.8534, "step": 159000 }, { "epoch": 12.48, "learning_rate": 0.0005, "loss": 1.8327, "step": 159100 }, { "epoch": 12.48, "learning_rate": 0.0005, "loss": 1.8365, "step": 159200 }, { "epoch": 12.49, "learning_rate": 0.0005, "loss": 1.8231, "step": 159300 }, { "epoch": 12.5, "learning_rate": 0.0005, "loss": 1.8104, "step": 159400 }, { "epoch": 12.51, "learning_rate": 0.0005, "loss": 1.8582, "step": 159500 }, { "epoch": 12.52, "learning_rate": 0.0005, "loss": 1.8159, "step": 159600 }, { "epoch": 12.52, "learning_rate": 0.0005, "loss": 1.8491, "step": 159700 }, { "epoch": 12.53, "learning_rate": 0.0005, "loss": 1.8316, "step": 159800 }, { "epoch": 12.54, "learning_rate": 0.0005, "loss": 1.8465, "step": 159900 }, { "epoch": 12.55, "learning_rate": 0.0005, "loss": 1.831, "step": 160000 }, { "epoch": 12.55, "eval_gen_len": 18.794914805332393, "eval_loss": 2.022197961807251, "eval_rouge1": 35.0141, "eval_rouge2": 13.742, "eval_rougeL": 28.6326, "eval_rougeLsum": 28.6266, "eval_runtime": 366.073, "eval_samples_per_second": 30.942, "eval_steps_per_second": 1.934, "step": 160000 }, { "epoch": 12.55, "learning_rate": 0.0005, "loss": 1.8515, "step": 160100 }, { "epoch": 12.56, "learning_rate": 0.0005, "loss": 1.8349, "step": 160200 }, { "epoch": 12.57, "learning_rate": 0.0005, "loss": 1.8328, "step": 160300 }, { "epoch": 12.58, "learning_rate": 0.0005, "loss": 1.8438, "step": 160400 }, { "epoch": 12.59, "learning_rate": 0.0005, "loss": 1.8385, "step": 160500 }, { "epoch": 12.59, "learning_rate": 0.0005, "loss": 1.8492, "step": 160600 }, { "epoch": 12.6, "learning_rate": 0.0005, "loss": 1.8458, "step": 160700 }, { "epoch": 12.61, "learning_rate": 0.0005, "loss": 1.8523, "step": 160800 }, { "epoch": 12.62, "learning_rate": 0.0005, "loss": 1.8413, "step": 160900 }, { "epoch": 12.63, "learning_rate": 0.0005, "loss": 1.8455, "step": 161000 }, { "epoch": 12.63, "learning_rate": 0.0005, "loss": 1.8402, "step": 161100 }, { "epoch": 12.64, "learning_rate": 0.0005, "loss": 1.836, "step": 161200 }, { "epoch": 12.65, "learning_rate": 0.0005, "loss": 1.826, "step": 161300 }, { "epoch": 12.66, "learning_rate": 0.0005, "loss": 1.8346, "step": 161400 }, { "epoch": 12.66, "learning_rate": 0.0005, "loss": 1.8614, "step": 161500 }, { "epoch": 12.67, "learning_rate": 0.0005, "loss": 1.8132, "step": 161600 }, { "epoch": 12.68, "learning_rate": 0.0005, "loss": 1.8829, "step": 161700 }, { "epoch": 12.69, "learning_rate": 0.0005, "loss": 1.8424, "step": 161800 }, { "epoch": 12.7, "learning_rate": 0.0005, "loss": 1.8492, "step": 161900 }, { "epoch": 12.7, "learning_rate": 0.0005, "loss": 1.8379, "step": 162000 }, { "epoch": 12.71, "learning_rate": 0.0005, "loss": 1.8563, "step": 162100 }, { "epoch": 12.72, "learning_rate": 0.0005, "loss": 1.8041, "step": 162200 }, { "epoch": 12.73, "learning_rate": 0.0005, "loss": 1.8593, "step": 162300 }, { "epoch": 12.74, "learning_rate": 0.0005, "loss": 1.8275, "step": 162400 }, { "epoch": 12.74, "learning_rate": 0.0005, "loss": 1.8515, "step": 162500 }, { "epoch": 12.75, "learning_rate": 0.0005, "loss": 1.8346, "step": 162600 }, { "epoch": 12.76, "learning_rate": 0.0005, "loss": 1.856, "step": 162700 }, { "epoch": 12.77, "learning_rate": 0.0005, "loss": 1.8382, "step": 162800 }, { "epoch": 12.77, "learning_rate": 0.0005, "loss": 1.8589, "step": 162900 }, { "epoch": 12.78, "learning_rate": 0.0005, "loss": 1.8555, "step": 163000 }, { "epoch": 12.79, "learning_rate": 0.0005, "loss": 1.8464, "step": 163100 }, { "epoch": 12.8, "learning_rate": 0.0005, "loss": 1.8498, "step": 163200 }, { "epoch": 12.81, "learning_rate": 0.0005, "loss": 1.8542, "step": 163300 }, { "epoch": 12.81, "learning_rate": 0.0005, "loss": 1.8414, "step": 163400 }, { "epoch": 12.82, "learning_rate": 0.0005, "loss": 1.8498, "step": 163500 }, { "epoch": 12.83, "learning_rate": 0.0005, "loss": 1.8572, "step": 163600 }, { "epoch": 12.84, "learning_rate": 0.0005, "loss": 1.8524, "step": 163700 }, { "epoch": 12.85, "learning_rate": 0.0005, "loss": 1.8547, "step": 163800 }, { "epoch": 12.85, "learning_rate": 0.0005, "loss": 1.8326, "step": 163900 }, { "epoch": 12.86, "learning_rate": 0.0005, "loss": 1.8452, "step": 164000 }, { "epoch": 12.87, "learning_rate": 0.0005, "loss": 1.8259, "step": 164100 }, { "epoch": 12.88, "learning_rate": 0.0005, "loss": 1.8649, "step": 164200 }, { "epoch": 12.88, "learning_rate": 0.0005, "loss": 1.8535, "step": 164300 }, { "epoch": 12.89, "learning_rate": 0.0005, "loss": 1.8384, "step": 164400 }, { "epoch": 12.9, "learning_rate": 0.0005, "loss": 1.8549, "step": 164500 }, { "epoch": 12.91, "learning_rate": 0.0005, "loss": 1.888, "step": 164600 }, { "epoch": 12.92, "learning_rate": 0.0005, "loss": 1.8618, "step": 164700 }, { "epoch": 12.92, "learning_rate": 0.0005, "loss": 1.8834, "step": 164800 }, { "epoch": 12.93, "learning_rate": 0.0005, "loss": 1.8608, "step": 164900 }, { "epoch": 12.94, "learning_rate": 0.0005, "loss": 1.8636, "step": 165000 }, { "epoch": 12.95, "learning_rate": 0.0005, "loss": 1.8552, "step": 165100 }, { "epoch": 12.95, "learning_rate": 0.0005, "loss": 1.8288, "step": 165200 }, { "epoch": 12.96, "learning_rate": 0.0005, "loss": 1.8426, "step": 165300 }, { "epoch": 12.97, "learning_rate": 0.0005, "loss": 1.8586, "step": 165400 }, { "epoch": 12.98, "learning_rate": 0.0005, "loss": 1.8693, "step": 165500 }, { "epoch": 12.99, "learning_rate": 0.0005, "loss": 1.8556, "step": 165600 }, { "epoch": 12.99, "learning_rate": 0.0005, "loss": 1.8689, "step": 165700 }, { "epoch": 13.0, "learning_rate": 0.0005, "loss": 1.8191, "step": 165800 }, { "epoch": 13.01, "learning_rate": 0.0005, "loss": 1.7918, "step": 165900 }, { "epoch": 13.02, "learning_rate": 0.0005, "loss": 1.8066, "step": 166000 }, { "epoch": 13.03, "learning_rate": 0.0005, "loss": 1.7596, "step": 166100 }, { "epoch": 13.03, "learning_rate": 0.0005, "loss": 1.7748, "step": 166200 }, { "epoch": 13.04, "learning_rate": 0.0005, "loss": 1.7756, "step": 166300 }, { "epoch": 13.05, "learning_rate": 0.0005, "loss": 1.7872, "step": 166400 }, { "epoch": 13.06, "learning_rate": 0.0005, "loss": 1.78, "step": 166500 }, { "epoch": 13.06, "learning_rate": 0.0005, "loss": 1.7967, "step": 166600 }, { "epoch": 13.07, "learning_rate": 0.0005, "loss": 1.7719, "step": 166700 }, { "epoch": 13.08, "learning_rate": 0.0005, "loss": 1.7773, "step": 166800 }, { "epoch": 13.09, "learning_rate": 0.0005, "loss": 1.7931, "step": 166900 }, { "epoch": 13.1, "learning_rate": 0.0005, "loss": 1.7814, "step": 167000 }, { "epoch": 13.1, "learning_rate": 0.0005, "loss": 1.7707, "step": 167100 }, { "epoch": 13.11, "learning_rate": 0.0005, "loss": 1.7711, "step": 167200 }, { "epoch": 13.12, "learning_rate": 0.0005, "loss": 1.7758, "step": 167300 }, { "epoch": 13.13, "learning_rate": 0.0005, "loss": 1.772, "step": 167400 }, { "epoch": 13.14, "learning_rate": 0.0005, "loss": 1.7794, "step": 167500 }, { "epoch": 13.14, "learning_rate": 0.0005, "loss": 1.8027, "step": 167600 }, { "epoch": 13.15, "learning_rate": 0.0005, "loss": 1.7688, "step": 167700 }, { "epoch": 13.16, "learning_rate": 0.0005, "loss": 1.7958, "step": 167800 }, { "epoch": 13.17, "learning_rate": 0.0005, "loss": 1.8164, "step": 167900 }, { "epoch": 13.17, "learning_rate": 0.0005, "loss": 1.7801, "step": 168000 }, { "epoch": 13.18, "learning_rate": 0.0005, "loss": 1.7964, "step": 168100 }, { "epoch": 13.19, "learning_rate": 0.0005, "loss": 1.784, "step": 168200 }, { "epoch": 13.2, "learning_rate": 0.0005, "loss": 1.8051, "step": 168300 }, { "epoch": 13.21, "learning_rate": 0.0005, "loss": 1.7861, "step": 168400 }, { "epoch": 13.21, "learning_rate": 0.0005, "loss": 1.7915, "step": 168500 }, { "epoch": 13.22, "learning_rate": 0.0005, "loss": 1.8157, "step": 168600 }, { "epoch": 13.23, "learning_rate": 0.0005, "loss": 1.7867, "step": 168700 }, { "epoch": 13.24, "learning_rate": 0.0005, "loss": 1.7561, "step": 168800 }, { "epoch": 13.24, "learning_rate": 0.0005, "loss": 1.7943, "step": 168900 }, { "epoch": 13.25, "learning_rate": 0.0005, "loss": 1.7808, "step": 169000 }, { "epoch": 13.26, "learning_rate": 0.0005, "loss": 1.7856, "step": 169100 }, { "epoch": 13.27, "learning_rate": 0.0005, "loss": 1.8041, "step": 169200 }, { "epoch": 13.28, "learning_rate": 0.0005, "loss": 1.8003, "step": 169300 }, { "epoch": 13.28, "learning_rate": 0.0005, "loss": 1.8013, "step": 169400 }, { "epoch": 13.29, "learning_rate": 0.0005, "loss": 1.7939, "step": 169500 }, { "epoch": 13.3, "learning_rate": 0.0005, "loss": 1.8059, "step": 169600 }, { "epoch": 13.31, "learning_rate": 0.0005, "loss": 1.8036, "step": 169700 }, { "epoch": 13.32, "learning_rate": 0.0005, "loss": 1.7763, "step": 169800 }, { "epoch": 13.32, "learning_rate": 0.0005, "loss": 1.7864, "step": 169900 }, { "epoch": 13.33, "learning_rate": 0.0005, "loss": 1.8027, "step": 170000 }, { "epoch": 13.33, "eval_gen_len": 18.75721726847356, "eval_loss": 2.0271575450897217, "eval_rouge1": 35.2864, "eval_rouge2": 13.9499, "eval_rougeL": 28.9284, "eval_rougeLsum": 28.9215, "eval_runtime": 360.6463, "eval_samples_per_second": 31.408, "eval_steps_per_second": 1.963, "step": 170000 }, { "epoch": 13.34, "learning_rate": 0.0005, "loss": 1.8209, "step": 170100 }, { "epoch": 13.35, "learning_rate": 0.0005, "loss": 1.7839, "step": 170200 }, { "epoch": 13.35, "learning_rate": 0.0005, "loss": 1.8168, "step": 170300 }, { "epoch": 13.36, "learning_rate": 0.0005, "loss": 1.8084, "step": 170400 }, { "epoch": 13.37, "learning_rate": 0.0005, "loss": 1.7727, "step": 170500 }, { "epoch": 13.38, "learning_rate": 0.0005, "loss": 1.7981, "step": 170600 }, { "epoch": 13.39, "learning_rate": 0.0005, "loss": 1.806, "step": 170700 }, { "epoch": 13.39, "learning_rate": 0.0005, "loss": 1.7888, "step": 170800 }, { "epoch": 13.4, "learning_rate": 0.0005, "loss": 1.8181, "step": 170900 }, { "epoch": 13.41, "learning_rate": 0.0005, "loss": 1.8123, "step": 171000 }, { "epoch": 13.42, "learning_rate": 0.0005, "loss": 1.8251, "step": 171100 }, { "epoch": 13.43, "learning_rate": 0.0005, "loss": 1.8066, "step": 171200 }, { "epoch": 13.43, "learning_rate": 0.0005, "loss": 1.7705, "step": 171300 }, { "epoch": 13.44, "learning_rate": 0.0005, "loss": 1.7931, "step": 171400 }, { "epoch": 13.45, "learning_rate": 0.0005, "loss": 1.8041, "step": 171500 }, { "epoch": 13.46, "learning_rate": 0.0005, "loss": 1.8164, "step": 171600 }, { "epoch": 13.46, "learning_rate": 0.0005, "loss": 1.8147, "step": 171700 }, { "epoch": 13.47, "learning_rate": 0.0005, "loss": 1.8222, "step": 171800 }, { "epoch": 13.48, "learning_rate": 0.0005, "loss": 1.8062, "step": 171900 }, { "epoch": 13.49, "learning_rate": 0.0005, "loss": 1.8085, "step": 172000 }, { "epoch": 13.5, "learning_rate": 0.0005, "loss": 1.8116, "step": 172100 }, { "epoch": 13.5, "learning_rate": 0.0005, "loss": 1.8293, "step": 172200 }, { "epoch": 13.51, "learning_rate": 0.0005, "loss": 1.8549, "step": 172300 }, { "epoch": 13.52, "learning_rate": 0.0005, "loss": 1.8357, "step": 172400 }, { "epoch": 13.53, "learning_rate": 0.0005, "loss": 1.8385, "step": 172500 }, { "epoch": 13.54, "learning_rate": 0.0005, "loss": 1.8104, "step": 172600 }, { "epoch": 13.54, "learning_rate": 0.0005, "loss": 1.8192, "step": 172700 }, { "epoch": 13.55, "learning_rate": 0.0005, "loss": 1.7848, "step": 172800 }, { "epoch": 13.56, "learning_rate": 0.0005, "loss": 1.826, "step": 172900 }, { "epoch": 13.57, "learning_rate": 0.0005, "loss": 1.812, "step": 173000 }, { "epoch": 13.57, "learning_rate": 0.0005, "loss": 1.8255, "step": 173100 }, { "epoch": 13.58, "learning_rate": 0.0005, "loss": 1.8037, "step": 173200 }, { "epoch": 13.59, "learning_rate": 0.0005, "loss": 1.8181, "step": 173300 }, { "epoch": 13.6, "learning_rate": 0.0005, "loss": 1.8365, "step": 173400 }, { "epoch": 13.61, "learning_rate": 0.0005, "loss": 1.8409, "step": 173500 }, { "epoch": 13.61, "learning_rate": 0.0005, "loss": 1.8047, "step": 173600 }, { "epoch": 13.62, "learning_rate": 0.0005, "loss": 1.8418, "step": 173700 }, { "epoch": 13.63, "learning_rate": 0.0005, "loss": 1.8107, "step": 173800 }, { "epoch": 13.64, "learning_rate": 0.0005, "loss": 1.8037, "step": 173900 }, { "epoch": 13.64, "learning_rate": 0.0005, "loss": 1.8453, "step": 174000 }, { "epoch": 13.65, "learning_rate": 0.0005, "loss": 1.8125, "step": 174100 }, { "epoch": 13.66, "learning_rate": 0.0005, "loss": 1.8392, "step": 174200 }, { "epoch": 13.67, "learning_rate": 0.0005, "loss": 1.7939, "step": 174300 }, { "epoch": 13.68, "learning_rate": 0.0005, "loss": 1.8239, "step": 174400 }, { "epoch": 13.68, "learning_rate": 0.0005, "loss": 1.8236, "step": 174500 }, { "epoch": 13.69, "learning_rate": 0.0005, "loss": 1.8072, "step": 174600 }, { "epoch": 13.7, "learning_rate": 0.0005, "loss": 1.8417, "step": 174700 }, { "epoch": 13.71, "learning_rate": 0.0005, "loss": 1.8382, "step": 174800 }, { "epoch": 13.72, "learning_rate": 0.0005, "loss": 1.7897, "step": 174900 }, { "epoch": 13.72, "learning_rate": 0.0005, "loss": 1.8221, "step": 175000 }, { "epoch": 13.73, "learning_rate": 0.0005, "loss": 1.8289, "step": 175100 }, { "epoch": 13.74, "learning_rate": 0.0005, "loss": 1.8416, "step": 175200 }, { "epoch": 13.75, "learning_rate": 0.0005, "loss": 1.8429, "step": 175300 }, { "epoch": 13.75, "learning_rate": 0.0005, "loss": 1.8408, "step": 175400 }, { "epoch": 13.76, "learning_rate": 0.0005, "loss": 1.836, "step": 175500 }, { "epoch": 13.77, "learning_rate": 0.0005, "loss": 1.8222, "step": 175600 }, { "epoch": 13.78, "learning_rate": 0.0005, "loss": 1.8146, "step": 175700 }, { "epoch": 13.79, "learning_rate": 0.0005, "loss": 1.8134, "step": 175800 }, { "epoch": 13.79, "learning_rate": 0.0005, "loss": 1.7945, "step": 175900 }, { "epoch": 13.8, "learning_rate": 0.0005, "loss": 1.8222, "step": 176000 }, { "epoch": 13.81, "learning_rate": 0.0005, "loss": 1.8414, "step": 176100 }, { "epoch": 13.82, "learning_rate": 0.0005, "loss": 1.8085, "step": 176200 }, { "epoch": 13.83, "learning_rate": 0.0005, "loss": 1.8338, "step": 176300 }, { "epoch": 13.83, "learning_rate": 0.0005, "loss": 1.8468, "step": 176400 }, { "epoch": 13.84, "learning_rate": 0.0005, "loss": 1.8403, "step": 176500 }, { "epoch": 13.85, "learning_rate": 0.0005, "loss": 1.8421, "step": 176600 }, { "epoch": 13.86, "learning_rate": 0.0005, "loss": 1.8111, "step": 176700 }, { "epoch": 13.86, "learning_rate": 0.0005, "loss": 1.8125, "step": 176800 }, { "epoch": 13.87, "learning_rate": 0.0005, "loss": 1.8113, "step": 176900 }, { "epoch": 13.88, "learning_rate": 0.0005, "loss": 1.8023, "step": 177000 }, { "epoch": 13.89, "learning_rate": 0.0005, "loss": 1.8249, "step": 177100 }, { "epoch": 13.9, "learning_rate": 0.0005, "loss": 1.8316, "step": 177200 }, { "epoch": 13.9, "learning_rate": 0.0005, "loss": 1.8266, "step": 177300 }, { "epoch": 13.91, "learning_rate": 0.0005, "loss": 1.8324, "step": 177400 }, { "epoch": 13.92, "learning_rate": 0.0005, "loss": 1.8145, "step": 177500 }, { "epoch": 13.93, "learning_rate": 0.0005, "loss": 1.8484, "step": 177600 }, { "epoch": 13.94, "learning_rate": 0.0005, "loss": 1.8215, "step": 177700 }, { "epoch": 13.94, "learning_rate": 0.0005, "loss": 1.83, "step": 177800 }, { "epoch": 13.95, "learning_rate": 0.0005, "loss": 1.8315, "step": 177900 }, { "epoch": 13.96, "learning_rate": 0.0005, "loss": 1.882, "step": 178000 }, { "epoch": 13.97, "learning_rate": 0.0005, "loss": 1.8308, "step": 178100 }, { "epoch": 13.97, "learning_rate": 0.0005, "loss": 1.8354, "step": 178200 }, { "epoch": 13.98, "learning_rate": 0.0005, "loss": 1.8254, "step": 178300 }, { "epoch": 13.99, "learning_rate": 0.0005, "loss": 1.8696, "step": 178400 }, { "epoch": 14.0, "learning_rate": 0.0005, "loss": 1.8469, "step": 178500 }, { "epoch": 14.01, "learning_rate": 0.0005, "loss": 1.7778, "step": 178600 }, { "epoch": 14.01, "learning_rate": 0.0005, "loss": 1.7202, "step": 178700 }, { "epoch": 14.02, "learning_rate": 0.0005, "loss": 1.7704, "step": 178800 }, { "epoch": 14.03, "learning_rate": 0.0005, "loss": 1.7825, "step": 178900 }, { "epoch": 14.04, "learning_rate": 0.0005, "loss": 1.7526, "step": 179000 }, { "epoch": 14.04, "learning_rate": 0.0005, "loss": 1.777, "step": 179100 }, { "epoch": 14.05, "learning_rate": 0.0005, "loss": 1.7551, "step": 179200 }, { "epoch": 14.06, "learning_rate": 0.0005, "loss": 1.7501, "step": 179300 }, { "epoch": 14.07, "learning_rate": 0.0005, "loss": 1.7711, "step": 179400 }, { "epoch": 14.08, "learning_rate": 0.0005, "loss": 1.756, "step": 179500 }, { "epoch": 14.08, "learning_rate": 0.0005, "loss": 1.76, "step": 179600 }, { "epoch": 14.09, "learning_rate": 0.0005, "loss": 1.7467, "step": 179700 }, { "epoch": 14.1, "learning_rate": 0.0005, "loss": 1.7563, "step": 179800 }, { "epoch": 14.11, "learning_rate": 0.0005, "loss": 1.7338, "step": 179900 }, { "epoch": 14.12, "learning_rate": 0.0005, "loss": 1.7544, "step": 180000 }, { "epoch": 14.12, "eval_gen_len": 18.749006797916483, "eval_loss": 2.0176799297332764, "eval_rouge1": 35.1221, "eval_rouge2": 13.8252, "eval_rougeL": 28.7503, "eval_rougeLsum": 28.7366, "eval_runtime": 359.2813, "eval_samples_per_second": 31.527, "eval_steps_per_second": 1.971, "step": 180000 }, { "epoch": 14.12, "learning_rate": 0.0005, "loss": 1.7795, "step": 180100 }, { "epoch": 14.13, "learning_rate": 0.0005, "loss": 1.7548, "step": 180200 }, { "epoch": 14.14, "learning_rate": 0.0005, "loss": 1.7473, "step": 180300 }, { "epoch": 14.15, "learning_rate": 0.0005, "loss": 1.7341, "step": 180400 }, { "epoch": 14.15, "learning_rate": 0.0005, "loss": 1.7936, "step": 180500 }, { "epoch": 14.16, "learning_rate": 0.0005, "loss": 1.7506, "step": 180600 }, { "epoch": 14.17, "learning_rate": 0.0005, "loss": 1.7988, "step": 180700 }, { "epoch": 14.18, "learning_rate": 0.0005, "loss": 1.7907, "step": 180800 }, { "epoch": 14.19, "learning_rate": 0.0005, "loss": 1.7804, "step": 180900 }, { "epoch": 14.19, "learning_rate": 0.0005, "loss": 1.7554, "step": 181000 }, { "epoch": 14.2, "learning_rate": 0.0005, "loss": 1.7807, "step": 181100 }, { "epoch": 14.21, "learning_rate": 0.0005, "loss": 1.7703, "step": 181200 }, { "epoch": 14.22, "learning_rate": 0.0005, "loss": 1.7901, "step": 181300 }, { "epoch": 14.23, "learning_rate": 0.0005, "loss": 1.796, "step": 181400 }, { "epoch": 14.23, "learning_rate": 0.0005, "loss": 1.789, "step": 181500 }, { "epoch": 14.24, "learning_rate": 0.0005, "loss": 1.7764, "step": 181600 }, { "epoch": 14.25, "learning_rate": 0.0005, "loss": 1.7761, "step": 181700 }, { "epoch": 14.26, "learning_rate": 0.0005, "loss": 1.7824, "step": 181800 }, { "epoch": 14.26, "learning_rate": 0.0005, "loss": 1.7522, "step": 181900 }, { "epoch": 14.27, "learning_rate": 0.0005, "loss": 1.7723, "step": 182000 }, { "epoch": 14.28, "learning_rate": 0.0005, "loss": 1.7678, "step": 182100 }, { "epoch": 14.29, "learning_rate": 0.0005, "loss": 1.7886, "step": 182200 }, { "epoch": 14.3, "learning_rate": 0.0005, "loss": 1.7694, "step": 182300 }, { "epoch": 14.3, "learning_rate": 0.0005, "loss": 1.7767, "step": 182400 }, { "epoch": 14.31, "learning_rate": 0.0005, "loss": 1.7684, "step": 182500 }, { "epoch": 14.32, "learning_rate": 0.0005, "loss": 1.7488, "step": 182600 }, { "epoch": 14.33, "learning_rate": 0.0005, "loss": 1.7981, "step": 182700 }, { "epoch": 14.34, "learning_rate": 0.0005, "loss": 1.7871, "step": 182800 }, { "epoch": 14.34, "learning_rate": 0.0005, "loss": 1.7587, "step": 182900 }, { "epoch": 14.35, "learning_rate": 0.0005, "loss": 1.8053, "step": 183000 }, { "epoch": 14.36, "learning_rate": 0.0005, "loss": 1.7758, "step": 183100 }, { "epoch": 14.37, "learning_rate": 0.0005, "loss": 1.7966, "step": 183200 }, { "epoch": 14.37, "learning_rate": 0.0005, "loss": 1.7478, "step": 183300 }, { "epoch": 14.38, "learning_rate": 0.0005, "loss": 1.7651, "step": 183400 }, { "epoch": 14.39, "learning_rate": 0.0005, "loss": 1.7924, "step": 183500 }, { "epoch": 14.4, "learning_rate": 0.0005, "loss": 1.7729, "step": 183600 }, { "epoch": 14.41, "learning_rate": 0.0005, "loss": 1.7818, "step": 183700 }, { "epoch": 14.41, "learning_rate": 0.0005, "loss": 1.7805, "step": 183800 }, { "epoch": 14.42, "learning_rate": 0.0005, "loss": 1.8025, "step": 183900 }, { "epoch": 14.43, "learning_rate": 0.0005, "loss": 1.7769, "step": 184000 }, { "epoch": 14.44, "learning_rate": 0.0005, "loss": 1.798, "step": 184100 }, { "epoch": 14.44, "learning_rate": 0.0005, "loss": 1.7751, "step": 184200 }, { "epoch": 14.45, "learning_rate": 0.0005, "loss": 1.7869, "step": 184300 }, { "epoch": 14.46, "learning_rate": 0.0005, "loss": 1.7882, "step": 184400 }, { "epoch": 14.47, "learning_rate": 0.0005, "loss": 1.7908, "step": 184500 }, { "epoch": 14.48, "learning_rate": 0.0005, "loss": 1.819, "step": 184600 }, { "epoch": 14.48, "learning_rate": 0.0005, "loss": 1.794, "step": 184700 }, { "epoch": 14.49, "learning_rate": 0.0005, "loss": 1.7894, "step": 184800 }, { "epoch": 14.5, "learning_rate": 0.0005, "loss": 1.8124, "step": 184900 }, { "epoch": 14.51, "learning_rate": 0.0005, "loss": 1.7684, "step": 185000 }, { "epoch": 14.52, "learning_rate": 0.0005, "loss": 1.7929, "step": 185100 }, { "epoch": 14.52, "learning_rate": 0.0005, "loss": 1.7906, "step": 185200 }, { "epoch": 14.53, "learning_rate": 0.0005, "loss": 1.7852, "step": 185300 }, { "epoch": 14.54, "learning_rate": 0.0005, "loss": 1.7982, "step": 185400 }, { "epoch": 14.55, "learning_rate": 0.0005, "loss": 1.8103, "step": 185500 }, { "epoch": 14.55, "learning_rate": 0.0005, "loss": 1.8011, "step": 185600 }, { "epoch": 14.56, "learning_rate": 0.0005, "loss": 1.77, "step": 185700 }, { "epoch": 14.57, "learning_rate": 0.0005, "loss": 1.7581, "step": 185800 }, { "epoch": 14.58, "learning_rate": 0.0005, "loss": 1.7856, "step": 185900 }, { "epoch": 14.59, "learning_rate": 0.0005, "loss": 1.7908, "step": 186000 }, { "epoch": 14.59, "learning_rate": 0.0005, "loss": 1.7839, "step": 186100 }, { "epoch": 14.6, "learning_rate": 0.0005, "loss": 1.8084, "step": 186200 }, { "epoch": 14.61, "learning_rate": 0.0005, "loss": 1.7967, "step": 186300 }, { "epoch": 14.62, "learning_rate": 0.0005, "loss": 1.7827, "step": 186400 }, { "epoch": 14.63, "learning_rate": 0.0005, "loss": 1.8011, "step": 186500 }, { "epoch": 14.63, "learning_rate": 0.0005, "loss": 1.8161, "step": 186600 }, { "epoch": 14.64, "learning_rate": 0.0005, "loss": 1.8012, "step": 186700 }, { "epoch": 14.65, "learning_rate": 0.0005, "loss": 1.8069, "step": 186800 }, { "epoch": 14.66, "learning_rate": 0.0005, "loss": 1.8105, "step": 186900 }, { "epoch": 14.66, "learning_rate": 0.0005, "loss": 1.7905, "step": 187000 }, { "epoch": 14.67, "learning_rate": 0.0005, "loss": 1.803, "step": 187100 }, { "epoch": 14.68, "learning_rate": 0.0005, "loss": 1.7951, "step": 187200 }, { "epoch": 14.69, "learning_rate": 0.0005, "loss": 1.8003, "step": 187300 }, { "epoch": 14.7, "learning_rate": 0.0005, "loss": 1.7834, "step": 187400 }, { "epoch": 14.7, "learning_rate": 0.0005, "loss": 1.827, "step": 187500 }, { "epoch": 14.71, "learning_rate": 0.0005, "loss": 1.7933, "step": 187600 }, { "epoch": 14.72, "learning_rate": 0.0005, "loss": 1.8315, "step": 187700 }, { "epoch": 14.73, "learning_rate": 0.0005, "loss": 1.7726, "step": 187800 }, { "epoch": 14.73, "learning_rate": 0.0005, "loss": 1.7941, "step": 187900 }, { "epoch": 14.74, "learning_rate": 0.0005, "loss": 1.7815, "step": 188000 }, { "epoch": 14.75, "learning_rate": 0.0005, "loss": 1.7911, "step": 188100 }, { "epoch": 14.76, "learning_rate": 0.0005, "loss": 1.8138, "step": 188200 }, { "epoch": 14.77, "learning_rate": 0.0005, "loss": 1.8124, "step": 188300 }, { "epoch": 14.77, "learning_rate": 0.0005, "loss": 1.7925, "step": 188400 }, { "epoch": 14.78, "learning_rate": 0.0005, "loss": 1.7887, "step": 188500 }, { "epoch": 14.79, "learning_rate": 0.0005, "loss": 1.8255, "step": 188600 }, { "epoch": 14.8, "learning_rate": 0.0005, "loss": 1.7837, "step": 188700 }, { "epoch": 14.81, "learning_rate": 0.0005, "loss": 1.8091, "step": 188800 }, { "epoch": 14.81, "learning_rate": 0.0005, "loss": 1.8004, "step": 188900 }, { "epoch": 14.82, "learning_rate": 0.0005, "loss": 1.8177, "step": 189000 }, { "epoch": 14.83, "learning_rate": 0.0005, "loss": 1.8072, "step": 189100 }, { "epoch": 14.84, "learning_rate": 0.0005, "loss": 1.8119, "step": 189200 }, { "epoch": 14.84, "learning_rate": 0.0005, "loss": 1.7918, "step": 189300 }, { "epoch": 14.85, "learning_rate": 0.0005, "loss": 1.797, "step": 189400 }, { "epoch": 14.86, "learning_rate": 0.0005, "loss": 1.8045, "step": 189500 }, { "epoch": 14.87, "learning_rate": 0.0005, "loss": 1.815, "step": 189600 }, { "epoch": 14.88, "learning_rate": 0.0005, "loss": 1.8034, "step": 189700 }, { "epoch": 14.88, "learning_rate": 0.0005, "loss": 1.7908, "step": 189800 }, { "epoch": 14.89, "learning_rate": 0.0005, "loss": 1.8085, "step": 189900 }, { "epoch": 14.9, "learning_rate": 0.0005, "loss": 1.8096, "step": 190000 }, { "epoch": 14.9, "eval_gen_len": 18.80727465348283, "eval_loss": 2.011542558670044, "eval_rouge1": 35.1261, "eval_rouge2": 13.9669, "eval_rougeL": 28.8032, "eval_rougeLsum": 28.7986, "eval_runtime": 359.3121, "eval_samples_per_second": 31.524, "eval_steps_per_second": 1.97, "step": 190000 }, { "epoch": 14.91, "learning_rate": 0.0005, "loss": 1.7785, "step": 190100 }, { "epoch": 14.92, "learning_rate": 0.0005, "loss": 1.8173, "step": 190200 }, { "epoch": 14.92, "learning_rate": 0.0005, "loss": 1.8061, "step": 190300 }, { "epoch": 14.93, "learning_rate": 0.0005, "loss": 1.8585, "step": 190400 }, { "epoch": 14.94, "learning_rate": 0.0005, "loss": 1.8302, "step": 190500 }, { "epoch": 14.95, "learning_rate": 0.0005, "loss": 1.8016, "step": 190600 }, { "epoch": 14.95, "learning_rate": 0.0005, "loss": 1.8222, "step": 190700 }, { "epoch": 14.96, "learning_rate": 0.0005, "loss": 1.8069, "step": 190800 }, { "epoch": 14.97, "learning_rate": 0.0005, "loss": 1.8366, "step": 190900 }, { "epoch": 14.98, "learning_rate": 0.0005, "loss": 1.8089, "step": 191000 }, { "epoch": 14.99, "learning_rate": 0.0005, "loss": 1.8057, "step": 191100 }, { "epoch": 14.99, "learning_rate": 0.0005, "loss": 1.8068, "step": 191200 }, { "epoch": 15.0, "learning_rate": 0.0005, "loss": 1.7916, "step": 191300 }, { "epoch": 15.01, "learning_rate": 0.0005, "loss": 1.7352, "step": 191400 }, { "epoch": 15.02, "learning_rate": 0.0005, "loss": 1.7374, "step": 191500 }, { "epoch": 15.03, "learning_rate": 0.0005, "loss": 1.7113, "step": 191600 }, { "epoch": 15.03, "learning_rate": 0.0005, "loss": 1.7363, "step": 191700 }, { "epoch": 15.04, "learning_rate": 0.0005, "loss": 1.759, "step": 191800 }, { "epoch": 15.05, "learning_rate": 0.0005, "loss": 1.7333, "step": 191900 }, { "epoch": 15.06, "learning_rate": 0.0005, "loss": 1.7388, "step": 192000 }, { "epoch": 15.06, "learning_rate": 0.0005, "loss": 1.7501, "step": 192100 }, { "epoch": 15.07, "learning_rate": 0.0005, "loss": 1.7265, "step": 192200 }, { "epoch": 15.08, "learning_rate": 0.0005, "loss": 1.7163, "step": 192300 }, { "epoch": 15.09, "learning_rate": 0.0005, "loss": 1.7372, "step": 192400 }, { "epoch": 15.1, "learning_rate": 0.0005, "loss": 1.7249, "step": 192500 }, { "epoch": 15.1, "learning_rate": 0.0005, "loss": 1.7426, "step": 192600 }, { "epoch": 15.11, "learning_rate": 0.0005, "loss": 1.7574, "step": 192700 }, { "epoch": 15.12, "learning_rate": 0.0005, "loss": 1.7542, "step": 192800 }, { "epoch": 15.13, "learning_rate": 0.0005, "loss": 1.7373, "step": 192900 }, { "epoch": 15.13, "learning_rate": 0.0005, "loss": 1.7739, "step": 193000 }, { "epoch": 15.14, "learning_rate": 0.0005, "loss": 1.7292, "step": 193100 }, { "epoch": 15.15, "learning_rate": 0.0005, "loss": 1.7559, "step": 193200 }, { "epoch": 15.16, "learning_rate": 0.0005, "loss": 1.7664, "step": 193300 }, { "epoch": 15.17, "learning_rate": 0.0005, "loss": 1.775, "step": 193400 }, { "epoch": 15.17, "learning_rate": 0.0005, "loss": 1.7353, "step": 193500 }, { "epoch": 15.18, "learning_rate": 0.0005, "loss": 1.7059, "step": 193600 }, { "epoch": 15.19, "learning_rate": 0.0005, "loss": 1.7496, "step": 193700 }, { "epoch": 15.2, "learning_rate": 0.0005, "loss": 1.7722, "step": 193800 }, { "epoch": 15.21, "learning_rate": 0.0005, "loss": 1.7518, "step": 193900 }, { "epoch": 15.21, "learning_rate": 0.0005, "loss": 1.7517, "step": 194000 }, { "epoch": 15.22, "learning_rate": 0.0005, "loss": 1.7901, "step": 194100 }, { "epoch": 15.23, "learning_rate": 0.0005, "loss": 1.7402, "step": 194200 }, { "epoch": 15.24, "learning_rate": 0.0005, "loss": 1.7294, "step": 194300 }, { "epoch": 15.24, "learning_rate": 0.0005, "loss": 1.7142, "step": 194400 }, { "epoch": 15.25, "learning_rate": 0.0005, "loss": 1.7213, "step": 194500 }, { "epoch": 15.26, "learning_rate": 0.0005, "loss": 1.7418, "step": 194600 }, { "epoch": 15.27, "learning_rate": 0.0005, "loss": 1.7736, "step": 194700 }, { "epoch": 15.28, "learning_rate": 0.0005, "loss": 1.7709, "step": 194800 }, { "epoch": 15.28, "learning_rate": 0.0005, "loss": 1.7365, "step": 194900 }, { "epoch": 15.29, "learning_rate": 0.0005, "loss": 1.7571, "step": 195000 }, { "epoch": 15.3, "learning_rate": 0.0005, "loss": 1.7207, "step": 195100 }, { "epoch": 15.31, "learning_rate": 0.0005, "loss": 1.7616, "step": 195200 }, { "epoch": 15.32, "learning_rate": 0.0005, "loss": 1.752, "step": 195300 }, { "epoch": 15.32, "learning_rate": 0.0005, "loss": 1.737, "step": 195400 }, { "epoch": 15.33, "learning_rate": 0.0005, "loss": 1.7397, "step": 195500 }, { "epoch": 15.34, "learning_rate": 0.0005, "loss": 1.7586, "step": 195600 }, { "epoch": 15.35, "learning_rate": 0.0005, "loss": 1.7357, "step": 195700 }, { "epoch": 15.35, "learning_rate": 0.0005, "loss": 1.742, "step": 195800 }, { "epoch": 15.36, "learning_rate": 0.0005, "loss": 1.7802, "step": 195900 }, { "epoch": 15.37, "learning_rate": 0.0005, "loss": 1.7528, "step": 196000 }, { "epoch": 15.38, "learning_rate": 0.0005, "loss": 1.7203, "step": 196100 }, { "epoch": 15.39, "learning_rate": 0.0005, "loss": 1.7815, "step": 196200 }, { "epoch": 15.39, "learning_rate": 0.0005, "loss": 1.7547, "step": 196300 }, { "epoch": 15.4, "learning_rate": 0.0005, "loss": 1.761, "step": 196400 }, { "epoch": 15.41, "learning_rate": 0.0005, "loss": 1.7246, "step": 196500 }, { "epoch": 15.42, "learning_rate": 0.0005, "loss": 1.7984, "step": 196600 }, { "epoch": 15.43, "learning_rate": 0.0005, "loss": 1.7696, "step": 196700 }, { "epoch": 15.43, "learning_rate": 0.0005, "loss": 1.7424, "step": 196800 }, { "epoch": 15.44, "learning_rate": 0.0005, "loss": 1.7836, "step": 196900 }, { "epoch": 15.45, "learning_rate": 0.0005, "loss": 1.763, "step": 197000 }, { "epoch": 15.46, "learning_rate": 0.0005, "loss": 1.7935, "step": 197100 }, { "epoch": 15.46, "learning_rate": 0.0005, "loss": 1.757, "step": 197200 }, { "epoch": 15.47, "learning_rate": 0.0005, "loss": 1.7406, "step": 197300 }, { "epoch": 15.48, "learning_rate": 0.0005, "loss": 1.7726, "step": 197400 }, { "epoch": 15.49, "learning_rate": 0.0005, "loss": 1.7588, "step": 197500 }, { "epoch": 15.5, "learning_rate": 0.0005, "loss": 1.775, "step": 197600 }, { "epoch": 15.5, "learning_rate": 0.0005, "loss": 1.7454, "step": 197700 }, { "epoch": 15.51, "learning_rate": 0.0005, "loss": 1.7705, "step": 197800 }, { "epoch": 15.52, "learning_rate": 0.0005, "loss": 1.8015, "step": 197900 }, { "epoch": 15.53, "learning_rate": 0.0005, "loss": 1.7881, "step": 198000 }, { "epoch": 15.53, "learning_rate": 0.0005, "loss": 1.7773, "step": 198100 }, { "epoch": 15.54, "learning_rate": 0.0005, "loss": 1.7813, "step": 198200 }, { "epoch": 15.55, "learning_rate": 0.0005, "loss": 1.7729, "step": 198300 }, { "epoch": 15.56, "learning_rate": 0.0005, "loss": 1.77, "step": 198400 }, { "epoch": 15.57, "learning_rate": 0.0005, "loss": 1.7687, "step": 198500 }, { "epoch": 15.57, "learning_rate": 0.0005, "loss": 1.786, "step": 198600 }, { "epoch": 15.58, "learning_rate": 0.0005, "loss": 1.765, "step": 198700 }, { "epoch": 15.59, "learning_rate": 0.0005, "loss": 1.7615, "step": 198800 }, { "epoch": 15.6, "learning_rate": 0.0005, "loss": 1.7887, "step": 198900 }, { "epoch": 15.61, "learning_rate": 0.0005, "loss": 1.7714, "step": 199000 }, { "epoch": 15.61, "learning_rate": 0.0005, "loss": 1.8128, "step": 199100 }, { "epoch": 15.62, "learning_rate": 0.0005, "loss": 1.7684, "step": 199200 }, { "epoch": 15.63, "learning_rate": 0.0005, "loss": 1.769, "step": 199300 }, { "epoch": 15.64, "learning_rate": 0.0005, "loss": 1.7688, "step": 199400 }, { "epoch": 15.64, "learning_rate": 0.0005, "loss": 1.7897, "step": 199500 }, { "epoch": 15.65, "learning_rate": 0.0005, "loss": 1.7827, "step": 199600 }, { "epoch": 15.66, "learning_rate": 0.0005, "loss": 1.775, "step": 199700 }, { "epoch": 15.67, "learning_rate": 0.0005, "loss": 1.7955, "step": 199800 }, { "epoch": 15.68, "learning_rate": 0.0005, "loss": 1.7774, "step": 199900 }, { "epoch": 15.68, "learning_rate": 0.0005, "loss": 1.8084, "step": 200000 }, { "epoch": 15.68, "eval_gen_len": 18.788293458108942, "eval_loss": 2.014420747756958, "eval_rouge1": 35.1009, "eval_rouge2": 13.9441, "eval_rougeL": 28.7814, "eval_rougeLsum": 28.7778, "eval_runtime": 359.2501, "eval_samples_per_second": 31.53, "eval_steps_per_second": 1.971, "step": 200000 }, { "epoch": 15.69, "learning_rate": 0.0005, "loss": 1.7672, "step": 200100 }, { "epoch": 15.7, "learning_rate": 0.0005, "loss": 1.7826, "step": 200200 }, { "epoch": 15.71, "learning_rate": 0.0005, "loss": 1.7923, "step": 200300 }, { "epoch": 15.72, "learning_rate": 0.0005, "loss": 1.7803, "step": 200400 }, { "epoch": 15.72, "learning_rate": 0.0005, "loss": 1.7878, "step": 200500 }, { "epoch": 15.73, "learning_rate": 0.0005, "loss": 1.7987, "step": 200600 }, { "epoch": 15.74, "learning_rate": 0.0005, "loss": 1.775, "step": 200700 }, { "epoch": 15.75, "learning_rate": 0.0005, "loss": 1.7686, "step": 200800 }, { "epoch": 15.75, "learning_rate": 0.0005, "loss": 1.7859, "step": 200900 }, { "epoch": 15.76, "learning_rate": 0.0005, "loss": 1.7605, "step": 201000 }, { "epoch": 15.77, "learning_rate": 0.0005, "loss": 1.8035, "step": 201100 }, { "epoch": 15.78, "learning_rate": 0.0005, "loss": 1.7632, "step": 201200 }, { "epoch": 15.79, "learning_rate": 0.0005, "loss": 1.7976, "step": 201300 }, { "epoch": 15.79, "learning_rate": 0.0005, "loss": 1.7718, "step": 201400 }, { "epoch": 15.8, "learning_rate": 0.0005, "loss": 1.7786, "step": 201500 }, { "epoch": 15.81, "learning_rate": 0.0005, "loss": 1.7525, "step": 201600 }, { "epoch": 15.82, "learning_rate": 0.0005, "loss": 1.7773, "step": 201700 }, { "epoch": 15.82, "learning_rate": 0.0005, "loss": 1.7534, "step": 201800 }, { "epoch": 15.83, "learning_rate": 0.0005, "loss": 1.7707, "step": 201900 }, { "epoch": 15.84, "learning_rate": 0.0005, "loss": 1.7756, "step": 202000 }, { "epoch": 15.85, "learning_rate": 0.0005, "loss": 1.797, "step": 202100 }, { "epoch": 15.86, "learning_rate": 0.0005, "loss": 1.7775, "step": 202200 }, { "epoch": 15.86, "learning_rate": 0.0005, "loss": 1.8021, "step": 202300 }, { "epoch": 15.87, "learning_rate": 0.0005, "loss": 1.8092, "step": 202400 }, { "epoch": 15.88, "learning_rate": 0.0005, "loss": 1.802, "step": 202500 }, { "epoch": 15.89, "learning_rate": 0.0005, "loss": 1.7979, "step": 202600 }, { "epoch": 15.9, "learning_rate": 0.0005, "loss": 1.8046, "step": 202700 }, { "epoch": 15.9, "learning_rate": 0.0005, "loss": 1.7676, "step": 202800 }, { "epoch": 15.91, "learning_rate": 0.0005, "loss": 1.7956, "step": 202900 }, { "epoch": 15.92, "learning_rate": 0.0005, "loss": 1.7877, "step": 203000 }, { "epoch": 15.93, "learning_rate": 0.0005, "loss": 1.7923, "step": 203100 }, { "epoch": 15.93, "learning_rate": 0.0005, "loss": 1.7798, "step": 203200 }, { "epoch": 15.94, "learning_rate": 0.0005, "loss": 1.805, "step": 203300 }, { "epoch": 15.95, "learning_rate": 0.0005, "loss": 1.7862, "step": 203400 }, { "epoch": 15.96, "learning_rate": 0.0005, "loss": 1.8017, "step": 203500 }, { "epoch": 15.97, "learning_rate": 0.0005, "loss": 1.7967, "step": 203600 }, { "epoch": 15.97, "learning_rate": 0.0005, "loss": 1.7794, "step": 203700 }, { "epoch": 15.98, "learning_rate": 0.0005, "loss": 1.8175, "step": 203800 }, { "epoch": 15.99, "learning_rate": 0.0005, "loss": 1.7917, "step": 203900 }, { "epoch": 16.0, "learning_rate": 0.0005, "loss": 1.7841, "step": 204000 }, { "epoch": 16.01, "learning_rate": 0.0005, "loss": 1.7394, "step": 204100 }, { "epoch": 16.01, "learning_rate": 0.0005, "loss": 1.6738, "step": 204200 }, { "epoch": 16.02, "learning_rate": 0.0005, "loss": 1.6943, "step": 204300 }, { "epoch": 16.03, "learning_rate": 0.0005, "loss": 1.7132, "step": 204400 }, { "epoch": 16.04, "learning_rate": 0.0005, "loss": 1.7137, "step": 204500 }, { "epoch": 16.04, "learning_rate": 0.0005, "loss": 1.7141, "step": 204600 }, { "epoch": 16.05, "learning_rate": 0.0005, "loss": 1.6959, "step": 204700 }, { "epoch": 16.06, "learning_rate": 0.0005, "loss": 1.7451, "step": 204800 }, { "epoch": 16.07, "learning_rate": 0.0005, "loss": 1.713, "step": 204900 }, { "epoch": 16.08, "learning_rate": 0.0005, "loss": 1.7364, "step": 205000 }, { "epoch": 16.08, "learning_rate": 0.0005, "loss": 1.7192, "step": 205100 }, { "epoch": 16.09, "learning_rate": 0.0005, "loss": 1.7199, "step": 205200 }, { "epoch": 16.1, "learning_rate": 0.0005, "loss": 1.734, "step": 205300 }, { "epoch": 16.11, "learning_rate": 0.0005, "loss": 1.6913, "step": 205400 }, { "epoch": 16.12, "learning_rate": 0.0005, "loss": 1.7102, "step": 205500 }, { "epoch": 16.12, "learning_rate": 0.0005, "loss": 1.7346, "step": 205600 }, { "epoch": 16.13, "learning_rate": 0.0005, "loss": 1.7258, "step": 205700 }, { "epoch": 16.14, "learning_rate": 0.0005, "loss": 1.6992, "step": 205800 }, { "epoch": 16.15, "learning_rate": 0.0005, "loss": 1.7197, "step": 205900 }, { "epoch": 16.15, "learning_rate": 0.0005, "loss": 1.7702, "step": 206000 }, { "epoch": 16.16, "learning_rate": 0.0005, "loss": 1.7227, "step": 206100 }, { "epoch": 16.17, "learning_rate": 0.0005, "loss": 1.7175, "step": 206200 }, { "epoch": 16.18, "learning_rate": 0.0005, "loss": 1.7367, "step": 206300 }, { "epoch": 16.19, "learning_rate": 0.0005, "loss": 1.7115, "step": 206400 }, { "epoch": 16.19, "learning_rate": 0.0005, "loss": 1.751, "step": 206500 }, { "epoch": 16.2, "learning_rate": 0.0005, "loss": 1.7377, "step": 206600 }, { "epoch": 16.21, "learning_rate": 0.0005, "loss": 1.7243, "step": 206700 }, { "epoch": 16.22, "learning_rate": 0.0005, "loss": 1.7267, "step": 206800 }, { "epoch": 16.22, "learning_rate": 0.0005, "loss": 1.7448, "step": 206900 }, { "epoch": 16.23, "learning_rate": 0.0005, "loss": 1.7357, "step": 207000 }, { "epoch": 16.24, "learning_rate": 0.0005, "loss": 1.7057, "step": 207100 }, { "epoch": 16.25, "learning_rate": 0.0005, "loss": 1.7444, "step": 207200 }, { "epoch": 16.26, "learning_rate": 0.0005, "loss": 1.7404, "step": 207300 }, { "epoch": 16.26, "learning_rate": 0.0005, "loss": 1.7089, "step": 207400 }, { "epoch": 16.27, "learning_rate": 0.0005, "loss": 1.7147, "step": 207500 }, { "epoch": 16.28, "learning_rate": 0.0005, "loss": 1.745, "step": 207600 }, { "epoch": 16.29, "learning_rate": 0.0005, "loss": 1.7442, "step": 207700 }, { "epoch": 16.3, "learning_rate": 0.0005, "loss": 1.7184, "step": 207800 }, { "epoch": 16.3, "learning_rate": 0.0005, "loss": 1.724, "step": 207900 }, { "epoch": 16.31, "learning_rate": 0.0005, "loss": 1.7249, "step": 208000 }, { "epoch": 16.32, "learning_rate": 0.0005, "loss": 1.7072, "step": 208100 }, { "epoch": 16.33, "learning_rate": 0.0005, "loss": 1.7377, "step": 208200 }, { "epoch": 16.33, "learning_rate": 0.0005, "loss": 1.7466, "step": 208300 }, { "epoch": 16.34, "learning_rate": 0.0005, "loss": 1.7426, "step": 208400 }, { "epoch": 16.35, "learning_rate": 0.0005, "loss": 1.7699, "step": 208500 }, { "epoch": 16.36, "learning_rate": 0.0005, "loss": 1.7459, "step": 208600 }, { "epoch": 16.37, "learning_rate": 0.0005, "loss": 1.7269, "step": 208700 }, { "epoch": 16.37, "learning_rate": 0.0005, "loss": 1.7108, "step": 208800 }, { "epoch": 16.38, "learning_rate": 0.0005, "loss": 1.7786, "step": 208900 }, { "epoch": 16.39, "learning_rate": 0.0005, "loss": 1.7564, "step": 209000 }, { "epoch": 16.4, "learning_rate": 0.0005, "loss": 1.7284, "step": 209100 }, { "epoch": 16.41, "learning_rate": 0.0005, "loss": 1.728, "step": 209200 }, { "epoch": 16.41, "learning_rate": 0.0005, "loss": 1.7521, "step": 209300 }, { "epoch": 16.42, "learning_rate": 0.0005, "loss": 1.7642, "step": 209400 }, { "epoch": 16.43, "learning_rate": 0.0005, "loss": 1.7576, "step": 209500 }, { "epoch": 16.44, "learning_rate": 0.0005, "loss": 1.7292, "step": 209600 }, { "epoch": 16.44, "learning_rate": 0.0005, "loss": 1.7253, "step": 209700 }, { "epoch": 16.45, "learning_rate": 0.0005, "loss": 1.7542, "step": 209800 }, { "epoch": 16.46, "learning_rate": 0.0005, "loss": 1.7427, "step": 209900 }, { "epoch": 16.47, "learning_rate": 0.0005, "loss": 1.7272, "step": 210000 }, { "epoch": 16.47, "eval_gen_len": 18.780789264589036, "eval_loss": 2.018299102783203, "eval_rouge1": 35.0675, "eval_rouge2": 13.9534, "eval_rougeL": 28.8007, "eval_rougeLsum": 28.7924, "eval_runtime": 359.03, "eval_samples_per_second": 31.549, "eval_steps_per_second": 1.972, "step": 210000 }, { "epoch": 16.48, "learning_rate": 0.0005, "loss": 1.7419, "step": 210100 }, { "epoch": 16.48, "learning_rate": 0.0005, "loss": 1.7509, "step": 210200 }, { "epoch": 16.49, "learning_rate": 0.0005, "loss": 1.7605, "step": 210300 }, { "epoch": 16.5, "learning_rate": 0.0005, "loss": 1.7442, "step": 210400 }, { "epoch": 16.51, "learning_rate": 0.0005, "loss": 1.7492, "step": 210500 }, { "epoch": 16.52, "learning_rate": 0.0005, "loss": 1.7504, "step": 210600 }, { "epoch": 16.52, "learning_rate": 0.0005, "loss": 1.7649, "step": 210700 }, { "epoch": 16.53, "learning_rate": 0.0005, "loss": 1.7518, "step": 210800 }, { "epoch": 16.54, "learning_rate": 0.0005, "loss": 1.7234, "step": 210900 }, { "epoch": 16.55, "learning_rate": 0.0005, "loss": 1.7515, "step": 211000 }, { "epoch": 16.55, "learning_rate": 0.0005, "loss": 1.7684, "step": 211100 }, { "epoch": 16.56, "learning_rate": 0.0005, "loss": 1.7516, "step": 211200 }, { "epoch": 16.57, "learning_rate": 0.0005, "loss": 1.7365, "step": 211300 }, { "epoch": 16.58, "learning_rate": 0.0005, "loss": 1.7448, "step": 211400 }, { "epoch": 16.59, "learning_rate": 0.0005, "loss": 1.7608, "step": 211500 }, { "epoch": 16.59, "learning_rate": 0.0005, "loss": 1.7207, "step": 211600 }, { "epoch": 16.6, "learning_rate": 0.0005, "loss": 1.7543, "step": 211700 }, { "epoch": 16.61, "learning_rate": 0.0005, "loss": 1.7662, "step": 211800 }, { "epoch": 16.62, "learning_rate": 0.0005, "loss": 1.7219, "step": 211900 }, { "epoch": 16.62, "learning_rate": 0.0005, "loss": 1.7558, "step": 212000 }, { "epoch": 16.63, "learning_rate": 0.0005, "loss": 1.7425, "step": 212100 }, { "epoch": 16.64, "learning_rate": 0.0005, "loss": 1.7497, "step": 212200 }, { "epoch": 16.65, "learning_rate": 0.0005, "loss": 1.7257, "step": 212300 }, { "epoch": 16.66, "learning_rate": 0.0005, "loss": 1.7546, "step": 212400 }, { "epoch": 16.66, "learning_rate": 0.0005, "loss": 1.7781, "step": 212500 }, { "epoch": 16.67, "learning_rate": 0.0005, "loss": 1.7755, "step": 212600 }, { "epoch": 16.68, "learning_rate": 0.0005, "loss": 1.7529, "step": 212700 }, { "epoch": 16.69, "learning_rate": 0.0005, "loss": 1.7525, "step": 212800 }, { "epoch": 16.7, "learning_rate": 0.0005, "loss": 1.7715, "step": 212900 }, { "epoch": 16.7, "learning_rate": 0.0005, "loss": 1.7378, "step": 213000 }, { "epoch": 16.71, "learning_rate": 0.0005, "loss": 1.7646, "step": 213100 }, { "epoch": 16.72, "learning_rate": 0.0005, "loss": 1.7489, "step": 213200 }, { "epoch": 16.73, "learning_rate": 0.0005, "loss": 1.7538, "step": 213300 }, { "epoch": 16.73, "learning_rate": 0.0005, "loss": 1.7877, "step": 213400 }, { "epoch": 16.74, "learning_rate": 0.0005, "loss": 1.7733, "step": 213500 }, { "epoch": 16.75, "learning_rate": 0.0005, "loss": 1.7807, "step": 213600 }, { "epoch": 16.76, "learning_rate": 0.0005, "loss": 1.7642, "step": 213700 }, { "epoch": 16.77, "learning_rate": 0.0005, "loss": 1.7609, "step": 213800 }, { "epoch": 16.77, "learning_rate": 0.0005, "loss": 1.7679, "step": 213900 }, { "epoch": 16.78, "learning_rate": 0.0005, "loss": 1.7719, "step": 214000 }, { "epoch": 16.79, "learning_rate": 0.0005, "loss": 1.7707, "step": 214100 }, { "epoch": 16.8, "learning_rate": 0.0005, "loss": 1.7702, "step": 214200 }, { "epoch": 16.81, "learning_rate": 0.0005, "loss": 1.7301, "step": 214300 }, { "epoch": 16.81, "learning_rate": 0.0005, "loss": 1.7522, "step": 214400 }, { "epoch": 16.82, "learning_rate": 0.0005, "loss": 1.7738, "step": 214500 }, { "epoch": 16.83, "learning_rate": 0.0005, "loss": 1.7551, "step": 214600 }, { "epoch": 16.84, "learning_rate": 0.0005, "loss": 1.742, "step": 214700 }, { "epoch": 16.84, "learning_rate": 0.0005, "loss": 1.7641, "step": 214800 }, { "epoch": 16.85, "learning_rate": 0.0005, "loss": 1.7828, "step": 214900 }, { "epoch": 16.86, "learning_rate": 0.0005, "loss": 1.7799, "step": 215000 }, { "epoch": 16.87, "learning_rate": 0.0005, "loss": 1.7476, "step": 215100 }, { "epoch": 16.88, "learning_rate": 0.0005, "loss": 1.7662, "step": 215200 }, { "epoch": 16.88, "learning_rate": 0.0005, "loss": 1.7764, "step": 215300 }, { "epoch": 16.89, "learning_rate": 0.0005, "loss": 1.79, "step": 215400 }, { "epoch": 16.9, "learning_rate": 0.0005, "loss": 1.7862, "step": 215500 }, { "epoch": 16.91, "learning_rate": 0.0005, "loss": 1.7792, "step": 215600 }, { "epoch": 16.91, "learning_rate": 0.0005, "loss": 1.7772, "step": 215700 }, { "epoch": 16.92, "learning_rate": 0.0005, "loss": 1.8195, "step": 215800 }, { "epoch": 16.93, "learning_rate": 0.0005, "loss": 1.7426, "step": 215900 }, { "epoch": 16.94, "learning_rate": 0.0005, "loss": 1.781, "step": 216000 }, { "epoch": 16.95, "learning_rate": 0.0005, "loss": 1.7399, "step": 216100 }, { "epoch": 16.95, "learning_rate": 0.0005, "loss": 1.774, "step": 216200 }, { "epoch": 16.96, "learning_rate": 0.0005, "loss": 1.7855, "step": 216300 }, { "epoch": 16.97, "learning_rate": 0.0005, "loss": 1.785, "step": 216400 }, { "epoch": 16.98, "learning_rate": 0.0005, "loss": 1.7603, "step": 216500 }, { "epoch": 16.99, "learning_rate": 0.0005, "loss": 1.7576, "step": 216600 }, { "epoch": 16.99, "learning_rate": 0.0005, "loss": 1.7952, "step": 216700 }, { "epoch": 17.0, "learning_rate": 0.0005, "loss": 1.7494, "step": 216800 }, { "epoch": 17.01, "learning_rate": 0.0005, "loss": 1.7225, "step": 216900 }, { "epoch": 17.02, "learning_rate": 0.0005, "loss": 1.6896, "step": 217000 }, { "epoch": 17.02, "learning_rate": 0.0005, "loss": 1.7069, "step": 217100 }, { "epoch": 17.03, "learning_rate": 0.0005, "loss": 1.6877, "step": 217200 }, { "epoch": 17.04, "learning_rate": 0.0005, "loss": 1.6936, "step": 217300 }, { "epoch": 17.05, "learning_rate": 0.0005, "loss": 1.6868, "step": 217400 }, { "epoch": 17.06, "learning_rate": 0.0005, "loss": 1.6801, "step": 217500 }, { "epoch": 17.06, "learning_rate": 0.0005, "loss": 1.7087, "step": 217600 }, { "epoch": 17.07, "learning_rate": 0.0005, "loss": 1.677, "step": 217700 }, { "epoch": 17.08, "learning_rate": 0.0005, "loss": 1.7113, "step": 217800 }, { "epoch": 17.09, "learning_rate": 0.0005, "loss": 1.7145, "step": 217900 }, { "epoch": 17.1, "learning_rate": 0.0005, "loss": 1.6894, "step": 218000 }, { "epoch": 17.1, "learning_rate": 0.0005, "loss": 1.7059, "step": 218100 }, { "epoch": 17.11, "learning_rate": 0.0005, "loss": 1.6714, "step": 218200 }, { "epoch": 17.12, "learning_rate": 0.0005, "loss": 1.7143, "step": 218300 }, { "epoch": 17.13, "learning_rate": 0.0005, "loss": 1.7091, "step": 218400 }, { "epoch": 17.13, "learning_rate": 0.0005, "loss": 1.6903, "step": 218500 }, { "epoch": 17.14, "learning_rate": 0.0005, "loss": 1.7064, "step": 218600 }, { "epoch": 17.15, "learning_rate": 0.0005, "loss": 1.6881, "step": 218700 }, { "epoch": 17.16, "learning_rate": 0.0005, "loss": 1.6891, "step": 218800 }, { "epoch": 17.17, "learning_rate": 0.0005, "loss": 1.7152, "step": 218900 }, { "epoch": 17.17, "learning_rate": 0.0005, "loss": 1.7019, "step": 219000 }, { "epoch": 17.18, "learning_rate": 0.0005, "loss": 1.7281, "step": 219100 }, { "epoch": 17.19, "learning_rate": 0.0005, "loss": 1.7069, "step": 219200 }, { "epoch": 17.2, "learning_rate": 0.0005, "loss": 1.7337, "step": 219300 }, { "epoch": 17.21, "learning_rate": 0.0005, "loss": 1.7059, "step": 219400 }, { "epoch": 17.21, "learning_rate": 0.0005, "loss": 1.7184, "step": 219500 }, { "epoch": 17.22, "learning_rate": 0.0005, "loss": 1.7176, "step": 219600 }, { "epoch": 17.23, "learning_rate": 0.0005, "loss": 1.6903, "step": 219700 }, { "epoch": 17.24, "learning_rate": 0.0005, "loss": 1.7106, "step": 219800 }, { "epoch": 17.24, "learning_rate": 0.0005, "loss": 1.7179, "step": 219900 }, { "epoch": 17.25, "learning_rate": 0.0005, "loss": 1.7223, "step": 220000 }, { "epoch": 17.25, "eval_gen_len": 18.785556634589916, "eval_loss": 2.0205140113830566, "eval_rouge1": 35.2932, "eval_rouge2": 14.0579, "eval_rougeL": 28.9651, "eval_rougeLsum": 28.9628, "eval_runtime": 358.8009, "eval_samples_per_second": 31.569, "eval_steps_per_second": 1.973, "step": 220000 }, { "epoch": 17.26, "learning_rate": 0.0005, "loss": 1.7245, "step": 220100 }, { "epoch": 17.27, "learning_rate": 0.0005, "loss": 1.6959, "step": 220200 }, { "epoch": 17.28, "learning_rate": 0.0005, "loss": 1.7083, "step": 220300 }, { "epoch": 17.28, "learning_rate": 0.0005, "loss": 1.7132, "step": 220400 }, { "epoch": 17.29, "learning_rate": 0.0005, "loss": 1.7206, "step": 220500 }, { "epoch": 17.3, "learning_rate": 0.0005, "loss": 1.7128, "step": 220600 }, { "epoch": 17.31, "learning_rate": 0.0005, "loss": 1.711, "step": 220700 }, { "epoch": 17.31, "learning_rate": 0.0005, "loss": 1.7169, "step": 220800 }, { "epoch": 17.32, "learning_rate": 0.0005, "loss": 1.6995, "step": 220900 }, { "epoch": 17.33, "learning_rate": 0.0005, "loss": 1.7092, "step": 221000 }, { "epoch": 17.34, "learning_rate": 0.0005, "loss": 1.7284, "step": 221100 }, { "epoch": 17.35, "learning_rate": 0.0005, "loss": 1.7167, "step": 221200 }, { "epoch": 17.35, "learning_rate": 0.0005, "loss": 1.7499, "step": 221300 }, { "epoch": 17.36, "learning_rate": 0.0005, "loss": 1.693, "step": 221400 }, { "epoch": 17.37, "learning_rate": 0.0005, "loss": 1.7267, "step": 221500 }, { "epoch": 17.38, "learning_rate": 0.0005, "loss": 1.6923, "step": 221600 }, { "epoch": 17.39, "learning_rate": 0.0005, "loss": 1.7253, "step": 221700 }, { "epoch": 17.39, "learning_rate": 0.0005, "loss": 1.7074, "step": 221800 }, { "epoch": 17.4, "learning_rate": 0.0005, "loss": 1.6936, "step": 221900 }, { "epoch": 17.41, "learning_rate": 0.0005, "loss": 1.7314, "step": 222000 }, { "epoch": 17.42, "learning_rate": 0.0005, "loss": 1.7109, "step": 222100 }, { "epoch": 17.42, "learning_rate": 0.0005, "loss": 1.7574, "step": 222200 }, { "epoch": 17.43, "learning_rate": 0.0005, "loss": 1.6917, "step": 222300 }, { "epoch": 17.44, "learning_rate": 0.0005, "loss": 1.7473, "step": 222400 }, { "epoch": 17.45, "learning_rate": 0.0005, "loss": 1.7132, "step": 222500 }, { "epoch": 17.46, "learning_rate": 0.0005, "loss": 1.7103, "step": 222600 }, { "epoch": 17.46, "learning_rate": 0.0005, "loss": 1.717, "step": 222700 }, { "epoch": 17.47, "learning_rate": 0.0005, "loss": 1.7215, "step": 222800 }, { "epoch": 17.48, "learning_rate": 0.0005, "loss": 1.7374, "step": 222900 }, { "epoch": 17.49, "learning_rate": 0.0005, "loss": 1.6961, "step": 223000 }, { "epoch": 17.5, "learning_rate": 0.0005, "loss": 1.7261, "step": 223100 }, { "epoch": 17.5, "learning_rate": 0.0005, "loss": 1.7277, "step": 223200 }, { "epoch": 17.51, "learning_rate": 0.0005, "loss": 1.7473, "step": 223300 }, { "epoch": 17.52, "learning_rate": 0.0005, "loss": 1.7192, "step": 223400 }, { "epoch": 17.53, "learning_rate": 0.0005, "loss": 1.7496, "step": 223500 }, { "epoch": 17.53, "learning_rate": 0.0005, "loss": 1.7392, "step": 223600 }, { "epoch": 17.54, "learning_rate": 0.0005, "loss": 1.7387, "step": 223700 }, { "epoch": 17.55, "learning_rate": 0.0005, "loss": 1.6937, "step": 223800 }, { "epoch": 17.56, "learning_rate": 0.0005, "loss": 1.7106, "step": 223900 }, { "epoch": 17.57, "learning_rate": 0.0005, "loss": 1.7596, "step": 224000 }, { "epoch": 17.57, "learning_rate": 0.0005, "loss": 1.743, "step": 224100 }, { "epoch": 17.58, "learning_rate": 0.0005, "loss": 1.7321, "step": 224200 }, { "epoch": 17.59, "learning_rate": 0.0005, "loss": 1.7312, "step": 224300 }, { "epoch": 17.6, "learning_rate": 0.0005, "loss": 1.737, "step": 224400 }, { "epoch": 17.61, "learning_rate": 0.0005, "loss": 1.7505, "step": 224500 }, { "epoch": 17.61, "learning_rate": 0.0005, "loss": 1.7366, "step": 224600 }, { "epoch": 17.62, "learning_rate": 0.0005, "loss": 1.7694, "step": 224700 }, { "epoch": 17.63, "learning_rate": 0.0005, "loss": 1.7462, "step": 224800 }, { "epoch": 17.64, "learning_rate": 0.0005, "loss": 1.7142, "step": 224900 }, { "epoch": 17.64, "learning_rate": 0.0005, "loss": 1.7402, "step": 225000 }, { "epoch": 17.65, "learning_rate": 0.0005, "loss": 1.7443, "step": 225100 }, { "epoch": 17.66, "learning_rate": 0.0005, "loss": 1.7299, "step": 225200 }, { "epoch": 17.67, "learning_rate": 0.0005, "loss": 1.7341, "step": 225300 }, { "epoch": 17.68, "learning_rate": 0.0005, "loss": 1.7406, "step": 225400 }, { "epoch": 17.68, "learning_rate": 0.0005, "loss": 1.7466, "step": 225500 }, { "epoch": 17.69, "learning_rate": 0.0005, "loss": 1.7195, "step": 225600 }, { "epoch": 17.7, "learning_rate": 0.0005, "loss": 1.7175, "step": 225700 }, { "epoch": 17.71, "learning_rate": 0.0005, "loss": 1.7269, "step": 225800 }, { "epoch": 17.71, "learning_rate": 0.0005, "loss": 1.7371, "step": 225900 }, { "epoch": 17.72, "learning_rate": 0.0005, "loss": 1.7362, "step": 226000 }, { "epoch": 17.73, "learning_rate": 0.0005, "loss": 1.7418, "step": 226100 }, { "epoch": 17.74, "learning_rate": 0.0005, "loss": 1.7583, "step": 226200 }, { "epoch": 17.75, "learning_rate": 0.0005, "loss": 1.7261, "step": 226300 }, { "epoch": 17.75, "learning_rate": 0.0005, "loss": 1.7558, "step": 226400 }, { "epoch": 17.76, "learning_rate": 0.0005, "loss": 1.7613, "step": 226500 }, { "epoch": 17.77, "learning_rate": 0.0005, "loss": 1.7171, "step": 226600 }, { "epoch": 17.78, "learning_rate": 0.0005, "loss": 1.7267, "step": 226700 }, { "epoch": 17.79, "learning_rate": 0.0005, "loss": 1.7391, "step": 226800 }, { "epoch": 17.79, "learning_rate": 0.0005, "loss": 1.7509, "step": 226900 }, { "epoch": 17.8, "learning_rate": 0.0005, "loss": 1.7602, "step": 227000 }, { "epoch": 17.81, "learning_rate": 0.0005, "loss": 1.7495, "step": 227100 }, { "epoch": 17.82, "learning_rate": 0.0005, "loss": 1.7496, "step": 227200 }, { "epoch": 17.82, "learning_rate": 0.0005, "loss": 1.7514, "step": 227300 }, { "epoch": 17.83, "learning_rate": 0.0005, "loss": 1.7236, "step": 227400 }, { "epoch": 17.84, "learning_rate": 0.0005, "loss": 1.7744, "step": 227500 }, { "epoch": 17.85, "learning_rate": 0.0005, "loss": 1.7376, "step": 227600 }, { "epoch": 17.86, "learning_rate": 0.0005, "loss": 1.7838, "step": 227700 }, { "epoch": 17.86, "learning_rate": 0.0005, "loss": 1.7483, "step": 227800 }, { "epoch": 17.87, "learning_rate": 0.0005, "loss": 1.7373, "step": 227900 }, { "epoch": 17.88, "learning_rate": 0.0005, "loss": 1.763, "step": 228000 }, { "epoch": 17.89, "learning_rate": 0.0005, "loss": 1.7521, "step": 228100 }, { "epoch": 17.9, "learning_rate": 0.0005, "loss": 1.7713, "step": 228200 }, { "epoch": 17.9, "learning_rate": 0.0005, "loss": 1.7472, "step": 228300 }, { "epoch": 17.91, "learning_rate": 0.0005, "loss": 1.7377, "step": 228400 }, { "epoch": 17.92, "learning_rate": 0.0005, "loss": 1.7492, "step": 228500 }, { "epoch": 17.93, "learning_rate": 0.0005, "loss": 1.7903, "step": 228600 }, { "epoch": 17.93, "learning_rate": 0.0005, "loss": 1.7734, "step": 228700 }, { "epoch": 17.94, "learning_rate": 0.0005, "loss": 1.743, "step": 228800 }, { "epoch": 17.95, "learning_rate": 0.0005, "loss": 1.7376, "step": 228900 }, { "epoch": 17.96, "learning_rate": 0.0005, "loss": 1.7402, "step": 229000 }, { "epoch": 17.97, "learning_rate": 0.0005, "loss": 1.7615, "step": 229100 }, { "epoch": 17.97, "learning_rate": 0.0005, "loss": 1.7213, "step": 229200 }, { "epoch": 17.98, "learning_rate": 0.0005, "loss": 1.7485, "step": 229300 }, { "epoch": 17.99, "learning_rate": 0.0005, "loss": 1.7361, "step": 229400 }, { "epoch": 18.0, "learning_rate": 0.0005, "loss": 1.7568, "step": 229500 }, { "epoch": 18.01, "learning_rate": 0.0005, "loss": 1.6941, "step": 229600 }, { "epoch": 18.01, "learning_rate": 0.0005, "loss": 1.6589, "step": 229700 }, { "epoch": 18.02, "learning_rate": 0.0005, "loss": 1.6802, "step": 229800 }, { "epoch": 18.03, "learning_rate": 0.0005, "loss": 1.6694, "step": 229900 }, { "epoch": 18.04, "learning_rate": 0.0005, "loss": 1.6742, "step": 230000 }, { "epoch": 18.04, "eval_gen_len": 18.784497219034165, "eval_loss": 2.0137345790863037, "eval_rouge1": 35.3791, "eval_rouge2": 14.1491, "eval_rougeL": 29.0175, "eval_rougeLsum": 29.0086, "eval_runtime": 361.5305, "eval_samples_per_second": 31.331, "eval_steps_per_second": 1.958, "step": 230000 }, { "epoch": 18.04, "learning_rate": 0.0005, "loss": 1.6665, "step": 230100 }, { "epoch": 18.05, "learning_rate": 0.0005, "loss": 1.6606, "step": 230200 }, { "epoch": 18.06, "learning_rate": 0.0005, "loss": 1.677, "step": 230300 }, { "epoch": 18.07, "learning_rate": 0.0005, "loss": 1.6896, "step": 230400 }, { "epoch": 18.08, "learning_rate": 0.0005, "loss": 1.6786, "step": 230500 }, { "epoch": 18.08, "learning_rate": 0.0005, "loss": 1.653, "step": 230600 }, { "epoch": 18.09, "learning_rate": 0.0005, "loss": 1.7007, "step": 230700 }, { "epoch": 18.1, "learning_rate": 0.0005, "loss": 1.6996, "step": 230800 }, { "epoch": 18.11, "learning_rate": 0.0005, "loss": 1.6833, "step": 230900 }, { "epoch": 18.11, "learning_rate": 0.0005, "loss": 1.6877, "step": 231000 }, { "epoch": 18.12, "learning_rate": 0.0005, "loss": 1.7055, "step": 231100 }, { "epoch": 18.13, "learning_rate": 0.0005, "loss": 1.6772, "step": 231200 }, { "epoch": 18.14, "learning_rate": 0.0005, "loss": 1.6763, "step": 231300 }, { "epoch": 18.15, "learning_rate": 0.0005, "loss": 1.69, "step": 231400 }, { "epoch": 18.15, "learning_rate": 0.0005, "loss": 1.6732, "step": 231500 }, { "epoch": 18.16, "learning_rate": 0.0005, "loss": 1.6955, "step": 231600 }, { "epoch": 18.17, "learning_rate": 0.0005, "loss": 1.7267, "step": 231700 }, { "epoch": 18.18, "learning_rate": 0.0005, "loss": 1.6848, "step": 231800 }, { "epoch": 18.19, "learning_rate": 0.0005, "loss": 1.6835, "step": 231900 }, { "epoch": 18.19, "learning_rate": 0.0005, "loss": 1.689, "step": 232000 }, { "epoch": 18.2, "learning_rate": 0.0005, "loss": 1.6716, "step": 232100 }, { "epoch": 18.21, "learning_rate": 0.0005, "loss": 1.6968, "step": 232200 }, { "epoch": 18.22, "learning_rate": 0.0005, "loss": 1.6927, "step": 232300 }, { "epoch": 18.22, "learning_rate": 0.0005, "loss": 1.678, "step": 232400 }, { "epoch": 18.23, "learning_rate": 0.0005, "loss": 1.6848, "step": 232500 }, { "epoch": 18.24, "learning_rate": 0.0005, "loss": 1.6858, "step": 232600 }, { "epoch": 18.25, "learning_rate": 0.0005, "loss": 1.6891, "step": 232700 }, { "epoch": 18.26, "learning_rate": 0.0005, "loss": 1.7047, "step": 232800 }, { "epoch": 18.26, "learning_rate": 0.0005, "loss": 1.6695, "step": 232900 }, { "epoch": 18.27, "learning_rate": 0.0005, "loss": 1.7147, "step": 233000 }, { "epoch": 18.28, "learning_rate": 0.0005, "loss": 1.681, "step": 233100 }, { "epoch": 18.29, "learning_rate": 0.0005, "loss": 1.703, "step": 233200 }, { "epoch": 18.3, "learning_rate": 0.0005, "loss": 1.7181, "step": 233300 }, { "epoch": 18.3, "learning_rate": 0.0005, "loss": 1.6858, "step": 233400 }, { "epoch": 18.31, "learning_rate": 0.0005, "loss": 1.695, "step": 233500 }, { "epoch": 18.32, "learning_rate": 0.0005, "loss": 1.6958, "step": 233600 }, { "epoch": 18.33, "learning_rate": 0.0005, "loss": 1.6873, "step": 233700 }, { "epoch": 18.33, "learning_rate": 0.0005, "loss": 1.7194, "step": 233800 }, { "epoch": 18.34, "learning_rate": 0.0005, "loss": 1.7067, "step": 233900 }, { "epoch": 18.35, "learning_rate": 0.0005, "loss": 1.7099, "step": 234000 }, { "epoch": 18.36, "learning_rate": 0.0005, "loss": 1.6824, "step": 234100 }, { "epoch": 18.37, "learning_rate": 0.0005, "loss": 1.6951, "step": 234200 }, { "epoch": 18.37, "learning_rate": 0.0005, "loss": 1.6935, "step": 234300 }, { "epoch": 18.38, "learning_rate": 0.0005, "loss": 1.7125, "step": 234400 }, { "epoch": 18.39, "learning_rate": 0.0005, "loss": 1.729, "step": 234500 }, { "epoch": 18.4, "learning_rate": 0.0005, "loss": 1.7548, "step": 234600 }, { "epoch": 18.4, "learning_rate": 0.0005, "loss": 1.6915, "step": 234700 }, { "epoch": 18.41, "learning_rate": 0.0005, "loss": 1.677, "step": 234800 }, { "epoch": 18.42, "learning_rate": 0.0005, "loss": 1.6872, "step": 234900 }, { "epoch": 18.43, "learning_rate": 0.0005, "loss": 1.6984, "step": 235000 }, { "epoch": 18.44, "learning_rate": 0.0005, "loss": 1.6906, "step": 235100 }, { "epoch": 18.44, "learning_rate": 0.0005, "loss": 1.7143, "step": 235200 }, { "epoch": 18.45, "learning_rate": 0.0005, "loss": 1.7065, "step": 235300 }, { "epoch": 18.46, "learning_rate": 0.0005, "loss": 1.7155, "step": 235400 }, { "epoch": 18.47, "learning_rate": 0.0005, "loss": 1.7136, "step": 235500 }, { "epoch": 18.48, "learning_rate": 0.0005, "loss": 1.703, "step": 235600 }, { "epoch": 18.48, "learning_rate": 0.0005, "loss": 1.6929, "step": 235700 }, { "epoch": 18.49, "learning_rate": 0.0005, "loss": 1.7074, "step": 235800 }, { "epoch": 18.5, "learning_rate": 0.0005, "loss": 1.6988, "step": 235900 }, { "epoch": 18.51, "learning_rate": 0.0005, "loss": 1.7208, "step": 236000 }, { "epoch": 18.51, "learning_rate": 0.0005, "loss": 1.7038, "step": 236100 }, { "epoch": 18.52, "learning_rate": 0.0005, "loss": 1.7127, "step": 236200 }, { "epoch": 18.53, "learning_rate": 0.0005, "loss": 1.7226, "step": 236300 }, { "epoch": 18.54, "learning_rate": 0.0005, "loss": 1.7001, "step": 236400 }, { "epoch": 18.55, "learning_rate": 0.0005, "loss": 1.6968, "step": 236500 }, { "epoch": 18.55, "learning_rate": 0.0005, "loss": 1.6991, "step": 236600 }, { "epoch": 18.56, "learning_rate": 0.0005, "loss": 1.7272, "step": 236700 }, { "epoch": 18.57, "learning_rate": 0.0005, "loss": 1.7066, "step": 236800 }, { "epoch": 18.58, "learning_rate": 0.0005, "loss": 1.7438, "step": 236900 }, { "epoch": 18.59, "learning_rate": 0.0005, "loss": 1.7206, "step": 237000 }, { "epoch": 18.59, "learning_rate": 0.0005, "loss": 1.751, "step": 237100 }, { "epoch": 18.6, "learning_rate": 0.0005, "loss": 1.7057, "step": 237200 }, { "epoch": 18.61, "learning_rate": 0.0005, "loss": 1.7001, "step": 237300 }, { "epoch": 18.62, "learning_rate": 0.0005, "loss": 1.7321, "step": 237400 }, { "epoch": 18.62, "learning_rate": 0.0005, "loss": 1.733, "step": 237500 }, { "epoch": 18.63, "learning_rate": 0.0005, "loss": 1.704, "step": 237600 }, { "epoch": 18.64, "learning_rate": 0.0005, "loss": 1.7253, "step": 237700 }, { "epoch": 18.65, "learning_rate": 0.0005, "loss": 1.7128, "step": 237800 }, { "epoch": 18.66, "learning_rate": 0.0005, "loss": 1.7277, "step": 237900 }, { "epoch": 18.66, "learning_rate": 0.0005, "loss": 1.7428, "step": 238000 }, { "epoch": 18.67, "learning_rate": 0.0005, "loss": 1.7269, "step": 238100 }, { "epoch": 18.68, "learning_rate": 0.0005, "loss": 1.7137, "step": 238200 }, { "epoch": 18.69, "learning_rate": 0.0005, "loss": 1.7148, "step": 238300 }, { "epoch": 18.7, "learning_rate": 0.0005, "loss": 1.7229, "step": 238400 }, { "epoch": 18.7, "learning_rate": 0.0005, "loss": 1.7405, "step": 238500 }, { "epoch": 18.71, "learning_rate": 0.0005, "loss": 1.7477, "step": 238600 }, { "epoch": 18.72, "learning_rate": 0.0005, "loss": 1.7204, "step": 238700 }, { "epoch": 18.73, "learning_rate": 0.0005, "loss": 1.7565, "step": 238800 }, { "epoch": 18.73, "learning_rate": 0.0005, "loss": 1.7104, "step": 238900 }, { "epoch": 18.74, "learning_rate": 0.0005, "loss": 1.748, "step": 239000 }, { "epoch": 18.75, "learning_rate": 0.0005, "loss": 1.7363, "step": 239100 }, { "epoch": 18.76, "learning_rate": 0.0005, "loss": 1.7393, "step": 239200 }, { "epoch": 18.77, "learning_rate": 0.0005, "loss": 1.7151, "step": 239300 }, { "epoch": 18.77, "learning_rate": 0.0005, "loss": 1.7496, "step": 239400 }, { "epoch": 18.78, "learning_rate": 0.0005, "loss": 1.7605, "step": 239500 }, { "epoch": 18.79, "learning_rate": 0.0005, "loss": 1.7371, "step": 239600 }, { "epoch": 18.8, "learning_rate": 0.0005, "loss": 1.7388, "step": 239700 }, { "epoch": 18.8, "learning_rate": 0.0005, "loss": 1.7166, "step": 239800 }, { "epoch": 18.81, "learning_rate": 0.0005, "loss": 1.7391, "step": 239900 }, { "epoch": 18.82, "learning_rate": 0.0005, "loss": 1.7056, "step": 240000 }, { "epoch": 18.82, "eval_gen_len": 18.775668756069567, "eval_loss": 2.0135035514831543, "eval_rouge1": 35.5094, "eval_rouge2": 14.2676, "eval_rougeL": 29.0733, "eval_rougeLsum": 29.0642, "eval_runtime": 364.5866, "eval_samples_per_second": 31.068, "eval_steps_per_second": 1.942, "step": 240000 }, { "epoch": 18.83, "learning_rate": 0.0005, "loss": 1.7024, "step": 240100 }, { "epoch": 18.84, "learning_rate": 0.0005, "loss": 1.6971, "step": 240200 }, { "epoch": 18.84, "learning_rate": 0.0005, "loss": 1.6981, "step": 240300 }, { "epoch": 18.85, "learning_rate": 0.0005, "loss": 1.7311, "step": 240400 }, { "epoch": 18.86, "learning_rate": 0.0005, "loss": 1.7257, "step": 240500 }, { "epoch": 18.87, "learning_rate": 0.0005, "loss": 1.6868, "step": 240600 }, { "epoch": 18.88, "learning_rate": 0.0005, "loss": 1.7249, "step": 240700 }, { "epoch": 18.88, "learning_rate": 0.0005, "loss": 1.7212, "step": 240800 }, { "epoch": 18.89, "learning_rate": 0.0005, "loss": 1.7294, "step": 240900 }, { "epoch": 18.9, "learning_rate": 0.0005, "loss": 1.7355, "step": 241000 }, { "epoch": 18.91, "learning_rate": 0.0005, "loss": 1.7353, "step": 241100 }, { "epoch": 18.91, "learning_rate": 0.0005, "loss": 1.7472, "step": 241200 }, { "epoch": 18.92, "learning_rate": 0.0005, "loss": 1.7534, "step": 241300 }, { "epoch": 18.93, "learning_rate": 0.0005, "loss": 1.7067, "step": 241400 }, { "epoch": 18.94, "learning_rate": 0.0005, "loss": 1.7399, "step": 241500 }, { "epoch": 18.95, "learning_rate": 0.0005, "loss": 1.7203, "step": 241600 }, { "epoch": 18.95, "learning_rate": 0.0005, "loss": 1.7496, "step": 241700 }, { "epoch": 18.96, "learning_rate": 0.0005, "loss": 1.7307, "step": 241800 }, { "epoch": 18.97, "learning_rate": 0.0005, "loss": 1.7298, "step": 241900 }, { "epoch": 18.98, "learning_rate": 0.0005, "loss": 1.7253, "step": 242000 }, { "epoch": 18.99, "learning_rate": 0.0005, "loss": 1.7364, "step": 242100 }, { "epoch": 18.99, "learning_rate": 0.0005, "loss": 1.7221, "step": 242200 }, { "epoch": 19.0, "learning_rate": 0.0005, "loss": 1.7532, "step": 242300 }, { "epoch": 19.01, "learning_rate": 0.0005, "loss": 1.6476, "step": 242400 }, { "epoch": 19.02, "learning_rate": 0.0005, "loss": 1.6458, "step": 242500 }, { "epoch": 19.02, "learning_rate": 0.0005, "loss": 1.627, "step": 242600 }, { "epoch": 19.03, "learning_rate": 0.0005, "loss": 1.6363, "step": 242700 }, { "epoch": 19.04, "learning_rate": 0.0005, "loss": 1.6743, "step": 242800 }, { "epoch": 19.05, "learning_rate": 0.0005, "loss": 1.666, "step": 242900 }, { "epoch": 19.06, "learning_rate": 0.0005, "loss": 1.6507, "step": 243000 }, { "epoch": 19.06, "learning_rate": 0.0005, "loss": 1.6605, "step": 243100 }, { "epoch": 19.07, "learning_rate": 0.0005, "loss": 1.6324, "step": 243200 }, { "epoch": 19.08, "learning_rate": 0.0005, "loss": 1.6725, "step": 243300 }, { "epoch": 19.09, "learning_rate": 0.0005, "loss": 1.6617, "step": 243400 }, { "epoch": 19.1, "learning_rate": 0.0005, "loss": 1.6396, "step": 243500 }, { "epoch": 19.1, "learning_rate": 0.0005, "loss": 1.6576, "step": 243600 }, { "epoch": 19.11, "learning_rate": 0.0005, "loss": 1.6778, "step": 243700 }, { "epoch": 19.12, "learning_rate": 0.0005, "loss": 1.6664, "step": 243800 }, { "epoch": 19.13, "learning_rate": 0.0005, "loss": 1.7057, "step": 243900 }, { "epoch": 19.13, "learning_rate": 0.0005, "loss": 1.6805, "step": 244000 }, { "epoch": 19.14, "learning_rate": 0.0005, "loss": 1.6807, "step": 244100 }, { "epoch": 19.15, "learning_rate": 0.0005, "loss": 1.6879, "step": 244200 }, { "epoch": 19.16, "learning_rate": 0.0005, "loss": 1.6696, "step": 244300 }, { "epoch": 19.17, "learning_rate": 0.0005, "loss": 1.6631, "step": 244400 }, { "epoch": 19.17, "learning_rate": 0.0005, "loss": 1.672, "step": 244500 }, { "epoch": 19.18, "learning_rate": 0.0005, "loss": 1.6705, "step": 244600 }, { "epoch": 19.19, "learning_rate": 0.0005, "loss": 1.6861, "step": 244700 }, { "epoch": 19.2, "learning_rate": 0.0005, "loss": 1.6579, "step": 244800 }, { "epoch": 19.2, "learning_rate": 0.0005, "loss": 1.6765, "step": 244900 }, { "epoch": 19.21, "learning_rate": 0.0005, "loss": 1.7092, "step": 245000 }, { "epoch": 19.22, "learning_rate": 0.0005, "loss": 1.6675, "step": 245100 }, { "epoch": 19.23, "learning_rate": 0.0005, "loss": 1.6635, "step": 245200 }, { "epoch": 19.24, "learning_rate": 0.0005, "loss": 1.678, "step": 245300 }, { "epoch": 19.24, "learning_rate": 0.0005, "loss": 1.664, "step": 245400 }, { "epoch": 19.25, "learning_rate": 0.0005, "loss": 1.6954, "step": 245500 }, { "epoch": 19.26, "learning_rate": 0.0005, "loss": 1.6661, "step": 245600 }, { "epoch": 19.27, "learning_rate": 0.0005, "loss": 1.6942, "step": 245700 }, { "epoch": 19.28, "learning_rate": 0.0005, "loss": 1.6785, "step": 245800 }, { "epoch": 19.28, "learning_rate": 0.0005, "loss": 1.6845, "step": 245900 }, { "epoch": 19.29, "learning_rate": 0.0005, "loss": 1.6905, "step": 246000 }, { "epoch": 19.3, "learning_rate": 0.0005, "loss": 1.7119, "step": 246100 }, { "epoch": 19.31, "learning_rate": 0.0005, "loss": 1.6895, "step": 246200 }, { "epoch": 19.31, "learning_rate": 0.0005, "loss": 1.6958, "step": 246300 }, { "epoch": 19.32, "learning_rate": 0.0005, "loss": 1.6489, "step": 246400 }, { "epoch": 19.33, "learning_rate": 0.0005, "loss": 1.672, "step": 246500 }, { "epoch": 19.34, "learning_rate": 0.0005, "loss": 1.7014, "step": 246600 }, { "epoch": 19.35, "learning_rate": 0.0005, "loss": 1.6828, "step": 246700 }, { "epoch": 19.35, "learning_rate": 0.0005, "loss": 1.6354, "step": 246800 }, { "epoch": 19.36, "learning_rate": 0.0005, "loss": 1.6998, "step": 246900 }, { "epoch": 19.37, "learning_rate": 0.0005, "loss": 1.679, "step": 247000 }, { "epoch": 19.38, "learning_rate": 0.0005, "loss": 1.6901, "step": 247100 }, { "epoch": 19.39, "learning_rate": 0.0005, "loss": 1.6976, "step": 247200 }, { "epoch": 19.39, "learning_rate": 0.0005, "loss": 1.6883, "step": 247300 }, { "epoch": 19.4, "learning_rate": 0.0005, "loss": 1.6643, "step": 247400 }, { "epoch": 19.41, "learning_rate": 0.0005, "loss": 1.6772, "step": 247500 }, { "epoch": 19.42, "learning_rate": 0.0005, "loss": 1.6931, "step": 247600 }, { "epoch": 19.42, "learning_rate": 0.0005, "loss": 1.6521, "step": 247700 }, { "epoch": 19.43, "learning_rate": 0.0005, "loss": 1.7068, "step": 247800 }, { "epoch": 19.44, "learning_rate": 0.0005, "loss": 1.6937, "step": 247900 }, { "epoch": 19.45, "learning_rate": 0.0005, "loss": 1.72, "step": 248000 }, { "epoch": 19.46, "learning_rate": 0.0005, "loss": 1.7136, "step": 248100 }, { "epoch": 19.46, "learning_rate": 0.0005, "loss": 1.6937, "step": 248200 }, { "epoch": 19.47, "learning_rate": 0.0005, "loss": 1.6706, "step": 248300 }, { "epoch": 19.48, "learning_rate": 0.0005, "loss": 1.6952, "step": 248400 }, { "epoch": 19.49, "learning_rate": 0.0005, "loss": 1.698, "step": 248500 }, { "epoch": 19.49, "learning_rate": 0.0005, "loss": 1.699, "step": 248600 }, { "epoch": 19.5, "learning_rate": 0.0005, "loss": 1.6925, "step": 248700 }, { "epoch": 19.51, "learning_rate": 0.0005, "loss": 1.6807, "step": 248800 }, { "epoch": 19.52, "learning_rate": 0.0005, "loss": 1.6993, "step": 248900 }, { "epoch": 19.53, "learning_rate": 0.0005, "loss": 1.6907, "step": 249000 }, { "epoch": 19.53, "learning_rate": 0.0005, "loss": 1.7015, "step": 249100 }, { "epoch": 19.54, "learning_rate": 0.0005, "loss": 1.7157, "step": 249200 }, { "epoch": 19.55, "learning_rate": 0.0005, "loss": 1.6905, "step": 249300 }, { "epoch": 19.56, "learning_rate": 0.0005, "loss": 1.6841, "step": 249400 }, { "epoch": 19.57, "learning_rate": 0.0005, "loss": 1.6621, "step": 249500 }, { "epoch": 19.57, "learning_rate": 0.0005, "loss": 1.7077, "step": 249600 }, { "epoch": 19.58, "learning_rate": 0.0005, "loss": 1.6981, "step": 249700 }, { "epoch": 19.59, "learning_rate": 0.0005, "loss": 1.6905, "step": 249800 }, { "epoch": 19.6, "learning_rate": 0.0005, "loss": 1.7028, "step": 249900 }, { "epoch": 19.6, "learning_rate": 0.0005, "loss": 1.6849, "step": 250000 }, { "epoch": 19.6, "eval_gen_len": 18.80047673700009, "eval_loss": 2.020770311355591, "eval_rouge1": 35.1918, "eval_rouge2": 14.0743, "eval_rougeL": 28.8604, "eval_rougeLsum": 28.846, "eval_runtime": 358.4886, "eval_samples_per_second": 31.597, "eval_steps_per_second": 1.975, "step": 250000 }, { "epoch": 19.61, "learning_rate": 0.0005, "loss": 1.6831, "step": 250100 }, { "epoch": 19.62, "learning_rate": 0.0005, "loss": 1.7192, "step": 250200 }, { "epoch": 19.63, "learning_rate": 0.0005, "loss": 1.7143, "step": 250300 }, { "epoch": 19.64, "learning_rate": 0.0005, "loss": 1.708, "step": 250400 }, { "epoch": 19.64, "learning_rate": 0.0005, "loss": 1.714, "step": 250500 }, { "epoch": 19.65, "learning_rate": 0.0005, "loss": 1.7072, "step": 250600 }, { "epoch": 19.66, "learning_rate": 0.0005, "loss": 1.7227, "step": 250700 }, { "epoch": 19.67, "learning_rate": 0.0005, "loss": 1.7089, "step": 250800 }, { "epoch": 19.68, "learning_rate": 0.0005, "loss": 1.7213, "step": 250900 }, { "epoch": 19.68, "learning_rate": 0.0005, "loss": 1.7167, "step": 251000 }, { "epoch": 19.69, "learning_rate": 0.0005, "loss": 1.7012, "step": 251100 }, { "epoch": 19.7, "learning_rate": 0.0005, "loss": 1.6903, "step": 251200 }, { "epoch": 19.71, "learning_rate": 0.0005, "loss": 1.6836, "step": 251300 }, { "epoch": 19.71, "learning_rate": 0.0005, "loss": 1.6714, "step": 251400 }, { "epoch": 19.72, "learning_rate": 0.0005, "loss": 1.7137, "step": 251500 }, { "epoch": 19.73, "learning_rate": 0.0005, "loss": 1.6813, "step": 251600 }, { "epoch": 19.74, "learning_rate": 0.0005, "loss": 1.7059, "step": 251700 }, { "epoch": 19.75, "learning_rate": 0.0005, "loss": 1.6909, "step": 251800 }, { "epoch": 19.75, "learning_rate": 0.0005, "loss": 1.7011, "step": 251900 }, { "epoch": 19.76, "learning_rate": 0.0005, "loss": 1.7112, "step": 252000 }, { "epoch": 19.77, "learning_rate": 0.0005, "loss": 1.7428, "step": 252100 }, { "epoch": 19.78, "learning_rate": 0.0005, "loss": 1.7332, "step": 252200 }, { "epoch": 19.79, "learning_rate": 0.0005, "loss": 1.7068, "step": 252300 }, { "epoch": 19.79, "learning_rate": 0.0005, "loss": 1.7146, "step": 252400 }, { "epoch": 19.8, "learning_rate": 0.0005, "loss": 1.7073, "step": 252500 }, { "epoch": 19.81, "learning_rate": 0.0005, "loss": 1.7071, "step": 252600 }, { "epoch": 19.82, "learning_rate": 0.0005, "loss": 1.7196, "step": 252700 }, { "epoch": 19.82, "learning_rate": 0.0005, "loss": 1.7188, "step": 252800 }, { "epoch": 19.83, "learning_rate": 0.0005, "loss": 1.691, "step": 252900 }, { "epoch": 19.84, "learning_rate": 0.0005, "loss": 1.7275, "step": 253000 }, { "epoch": 19.85, "learning_rate": 0.0005, "loss": 1.6993, "step": 253100 }, { "epoch": 19.86, "learning_rate": 0.0005, "loss": 1.7168, "step": 253200 }, { "epoch": 19.86, "learning_rate": 0.0005, "loss": 1.6927, "step": 253300 }, { "epoch": 19.87, "learning_rate": 0.0005, "loss": 1.7066, "step": 253400 }, { "epoch": 19.88, "learning_rate": 0.0005, "loss": 1.7022, "step": 253500 }, { "epoch": 19.89, "learning_rate": 0.0005, "loss": 1.6908, "step": 253600 }, { "epoch": 19.89, "learning_rate": 0.0005, "loss": 1.7124, "step": 253700 }, { "epoch": 19.9, "learning_rate": 0.0005, "loss": 1.7099, "step": 253800 }, { "epoch": 19.91, "learning_rate": 0.0005, "loss": 1.7335, "step": 253900 }, { "epoch": 19.92, "learning_rate": 0.0005, "loss": 1.7446, "step": 254000 }, { "epoch": 19.93, "learning_rate": 0.0005, "loss": 1.7187, "step": 254100 }, { "epoch": 19.93, "learning_rate": 0.0005, "loss": 1.6978, "step": 254200 }, { "epoch": 19.94, "learning_rate": 0.0005, "loss": 1.7339, "step": 254300 }, { "epoch": 19.95, "learning_rate": 0.0005, "loss": 1.698, "step": 254400 }, { "epoch": 19.96, "learning_rate": 0.0005, "loss": 1.7041, "step": 254500 }, { "epoch": 19.97, "learning_rate": 0.0005, "loss": 1.7173, "step": 254600 }, { "epoch": 19.97, "learning_rate": 0.0005, "loss": 1.7025, "step": 254700 }, { "epoch": 19.98, "learning_rate": 0.0005, "loss": 1.6967, "step": 254800 }, { "epoch": 19.99, "learning_rate": 0.0005, "loss": 1.6963, "step": 254900 }, { "epoch": 20.0, "learning_rate": 0.0005, "loss": 1.7125, "step": 255000 }, { "epoch": 20.0, "learning_rate": 0.0005, "loss": 1.6839, "step": 255100 }, { "epoch": 20.01, "learning_rate": 0.0005, "loss": 1.6403, "step": 255200 }, { "epoch": 20.02, "learning_rate": 0.0005, "loss": 1.6344, "step": 255300 }, { "epoch": 20.03, "learning_rate": 0.0005, "loss": 1.643, "step": 255400 }, { "epoch": 20.04, "learning_rate": 0.0005, "loss": 1.6433, "step": 255500 }, { "epoch": 20.04, "learning_rate": 0.0005, "loss": 1.631, "step": 255600 }, { "epoch": 20.05, "learning_rate": 0.0005, "loss": 1.662, "step": 255700 }, { "epoch": 20.06, "learning_rate": 0.0005, "loss": 1.6108, "step": 255800 }, { "epoch": 20.07, "learning_rate": 0.0005, "loss": 1.6505, "step": 255900 }, { "epoch": 20.08, "learning_rate": 0.0005, "loss": 1.6687, "step": 256000 }, { "epoch": 20.08, "learning_rate": 0.0005, "loss": 1.6718, "step": 256100 }, { "epoch": 20.09, "learning_rate": 0.0005, "loss": 1.6497, "step": 256200 }, { "epoch": 20.1, "learning_rate": 0.0005, "loss": 1.6455, "step": 256300 }, { "epoch": 20.11, "learning_rate": 0.0005, "loss": 1.6545, "step": 256400 }, { "epoch": 20.11, "learning_rate": 0.0005, "loss": 1.6349, "step": 256500 }, { "epoch": 20.12, "learning_rate": 0.0005, "loss": 1.6368, "step": 256600 }, { "epoch": 20.13, "learning_rate": 0.0005, "loss": 1.624, "step": 256700 }, { "epoch": 20.14, "learning_rate": 0.0005, "loss": 1.6588, "step": 256800 }, { "epoch": 20.15, "learning_rate": 0.0005, "loss": 1.6254, "step": 256900 }, { "epoch": 20.15, "learning_rate": 0.0005, "loss": 1.6433, "step": 257000 }, { "epoch": 20.16, "learning_rate": 0.0005, "loss": 1.6505, "step": 257100 }, { "epoch": 20.17, "learning_rate": 0.0005, "loss": 1.6722, "step": 257200 }, { "epoch": 20.18, "learning_rate": 0.0005, "loss": 1.6373, "step": 257300 }, { "epoch": 20.19, "learning_rate": 0.0005, "loss": 1.6586, "step": 257400 }, { "epoch": 20.19, "learning_rate": 0.0005, "loss": 1.6745, "step": 257500 }, { "epoch": 20.2, "learning_rate": 0.0005, "loss": 1.665, "step": 257600 }, { "epoch": 20.21, "learning_rate": 0.0005, "loss": 1.656, "step": 257700 }, { "epoch": 20.22, "learning_rate": 0.0005, "loss": 1.6748, "step": 257800 }, { "epoch": 20.22, "learning_rate": 0.0005, "loss": 1.6635, "step": 257900 }, { "epoch": 20.23, "learning_rate": 0.0005, "loss": 1.6579, "step": 258000 }, { "epoch": 20.24, "learning_rate": 0.0005, "loss": 1.6716, "step": 258100 }, { "epoch": 20.25, "learning_rate": 0.0005, "loss": 1.6748, "step": 258200 }, { "epoch": 20.26, "learning_rate": 0.0005, "loss": 1.688, "step": 258300 }, { "epoch": 20.26, "learning_rate": 0.0005, "loss": 1.6389, "step": 258400 }, { "epoch": 20.27, "learning_rate": 0.0005, "loss": 1.6642, "step": 258500 }, { "epoch": 20.28, "learning_rate": 0.0005, "loss": 1.6493, "step": 258600 }, { "epoch": 20.29, "learning_rate": 0.0005, "loss": 1.6601, "step": 258700 }, { "epoch": 20.29, "learning_rate": 0.0005, "loss": 1.6424, "step": 258800 }, { "epoch": 20.3, "learning_rate": 0.0005, "loss": 1.6607, "step": 258900 }, { "epoch": 20.31, "learning_rate": 0.0005, "loss": 1.6403, "step": 259000 }, { "epoch": 20.32, "learning_rate": 0.0005, "loss": 1.693, "step": 259100 }, { "epoch": 20.33, "learning_rate": 0.0005, "loss": 1.6657, "step": 259200 }, { "epoch": 20.33, "learning_rate": 0.0005, "loss": 1.6836, "step": 259300 }, { "epoch": 20.34, "learning_rate": 0.0005, "loss": 1.6652, "step": 259400 }, { "epoch": 20.35, "learning_rate": 0.0005, "loss": 1.6512, "step": 259500 }, { "epoch": 20.36, "learning_rate": 0.0005, "loss": 1.671, "step": 259600 }, { "epoch": 20.37, "learning_rate": 0.0005, "loss": 1.6558, "step": 259700 }, { "epoch": 20.37, "learning_rate": 0.0005, "loss": 1.6636, "step": 259800 }, { "epoch": 20.38, "learning_rate": 0.0005, "loss": 1.6533, "step": 259900 }, { "epoch": 20.39, "learning_rate": 0.0005, "loss": 1.6784, "step": 260000 }, { "epoch": 20.39, "eval_gen_len": 18.790235719961156, "eval_loss": 2.0271031856536865, "eval_rouge1": 35.165, "eval_rouge2": 14.0225, "eval_rougeL": 28.8773, "eval_rougeLsum": 28.8644, "eval_runtime": 359.9035, "eval_samples_per_second": 31.472, "eval_steps_per_second": 1.967, "step": 260000 }, { "epoch": 20.4, "learning_rate": 0.0005, "loss": 1.6699, "step": 260100 }, { "epoch": 20.4, "learning_rate": 0.0005, "loss": 1.6597, "step": 260200 }, { "epoch": 20.41, "learning_rate": 0.0005, "loss": 1.6813, "step": 260300 }, { "epoch": 20.42, "learning_rate": 0.0005, "loss": 1.6641, "step": 260400 }, { "epoch": 20.43, "learning_rate": 0.0005, "loss": 1.6587, "step": 260500 }, { "epoch": 20.44, "learning_rate": 0.0005, "loss": 1.6786, "step": 260600 }, { "epoch": 20.44, "learning_rate": 0.0005, "loss": 1.6915, "step": 260700 }, { "epoch": 20.45, "learning_rate": 0.0005, "loss": 1.659, "step": 260800 }, { "epoch": 20.46, "learning_rate": 0.0005, "loss": 1.6866, "step": 260900 }, { "epoch": 20.47, "learning_rate": 0.0005, "loss": 1.6817, "step": 261000 }, { "epoch": 20.48, "learning_rate": 0.0005, "loss": 1.6575, "step": 261100 }, { "epoch": 20.48, "learning_rate": 0.0005, "loss": 1.6908, "step": 261200 }, { "epoch": 20.49, "learning_rate": 0.0005, "loss": 1.6865, "step": 261300 }, { "epoch": 20.5, "learning_rate": 0.0005, "loss": 1.6798, "step": 261400 }, { "epoch": 20.51, "learning_rate": 0.0005, "loss": 1.6394, "step": 261500 }, { "epoch": 20.51, "learning_rate": 0.0005, "loss": 1.6804, "step": 261600 }, { "epoch": 20.52, "learning_rate": 0.0005, "loss": 1.679, "step": 261700 }, { "epoch": 20.53, "learning_rate": 0.0005, "loss": 1.7161, "step": 261800 }, { "epoch": 20.54, "learning_rate": 0.0005, "loss": 1.6768, "step": 261900 }, { "epoch": 20.55, "learning_rate": 0.0005, "loss": 1.7062, "step": 262000 }, { "epoch": 20.55, "learning_rate": 0.0005, "loss": 1.6279, "step": 262100 }, { "epoch": 20.56, "learning_rate": 0.0005, "loss": 1.6862, "step": 262200 }, { "epoch": 20.57, "learning_rate": 0.0005, "loss": 1.7075, "step": 262300 }, { "epoch": 20.58, "learning_rate": 0.0005, "loss": 1.6938, "step": 262400 }, { "epoch": 20.59, "learning_rate": 0.0005, "loss": 1.7088, "step": 262500 }, { "epoch": 20.59, "learning_rate": 0.0005, "loss": 1.6599, "step": 262600 }, { "epoch": 20.6, "learning_rate": 0.0005, "loss": 1.6652, "step": 262700 }, { "epoch": 20.61, "learning_rate": 0.0005, "loss": 1.6543, "step": 262800 }, { "epoch": 20.62, "learning_rate": 0.0005, "loss": 1.6803, "step": 262900 }, { "epoch": 20.62, "learning_rate": 0.0005, "loss": 1.6728, "step": 263000 }, { "epoch": 20.63, "learning_rate": 0.0005, "loss": 1.6599, "step": 263100 }, { "epoch": 20.64, "learning_rate": 0.0005, "loss": 1.6822, "step": 263200 }, { "epoch": 20.65, "learning_rate": 0.0005, "loss": 1.6958, "step": 263300 }, { "epoch": 20.66, "learning_rate": 0.0005, "loss": 1.7065, "step": 263400 }, { "epoch": 20.66, "learning_rate": 0.0005, "loss": 1.7043, "step": 263500 }, { "epoch": 20.67, "learning_rate": 0.0005, "loss": 1.6914, "step": 263600 }, { "epoch": 20.68, "learning_rate": 0.0005, "loss": 1.701, "step": 263700 }, { "epoch": 20.69, "learning_rate": 0.0005, "loss": 1.6959, "step": 263800 }, { "epoch": 20.69, "learning_rate": 0.0005, "loss": 1.6852, "step": 263900 }, { "epoch": 20.7, "learning_rate": 0.0005, "loss": 1.6723, "step": 264000 }, { "epoch": 20.71, "learning_rate": 0.0005, "loss": 1.6832, "step": 264100 }, { "epoch": 20.72, "learning_rate": 0.0005, "loss": 1.65, "step": 264200 }, { "epoch": 20.73, "learning_rate": 0.0005, "loss": 1.6954, "step": 264300 }, { "epoch": 20.73, "learning_rate": 0.0005, "loss": 1.6687, "step": 264400 }, { "epoch": 20.74, "learning_rate": 0.0005, "loss": 1.693, "step": 264500 }, { "epoch": 20.75, "learning_rate": 0.0005, "loss": 1.6815, "step": 264600 }, { "epoch": 20.76, "learning_rate": 0.0005, "loss": 1.6688, "step": 264700 }, { "epoch": 20.77, "learning_rate": 0.0005, "loss": 1.6602, "step": 264800 }, { "epoch": 20.77, "learning_rate": 0.0005, "loss": 1.6585, "step": 264900 }, { "epoch": 20.78, "learning_rate": 0.0005, "loss": 1.6895, "step": 265000 }, { "epoch": 20.79, "learning_rate": 0.0005, "loss": 1.6925, "step": 265100 }, { "epoch": 20.8, "learning_rate": 0.0005, "loss": 1.7074, "step": 265200 }, { "epoch": 20.8, "learning_rate": 0.0005, "loss": 1.7013, "step": 265300 }, { "epoch": 20.81, "learning_rate": 0.0005, "loss": 1.6727, "step": 265400 }, { "epoch": 20.82, "learning_rate": 0.0005, "loss": 1.6998, "step": 265500 }, { "epoch": 20.83, "learning_rate": 0.0005, "loss": 1.687, "step": 265600 }, { "epoch": 20.84, "learning_rate": 0.0005, "loss": 1.6983, "step": 265700 }, { "epoch": 20.84, "learning_rate": 0.0005, "loss": 1.7113, "step": 265800 }, { "epoch": 20.85, "learning_rate": 0.0005, "loss": 1.6944, "step": 265900 }, { "epoch": 20.86, "learning_rate": 0.0005, "loss": 1.7157, "step": 266000 }, { "epoch": 20.87, "learning_rate": 0.0005, "loss": 1.7116, "step": 266100 }, { "epoch": 20.88, "learning_rate": 0.0005, "loss": 1.7307, "step": 266200 }, { "epoch": 20.88, "learning_rate": 0.0005, "loss": 1.6907, "step": 266300 }, { "epoch": 20.89, "learning_rate": 0.0005, "loss": 1.6861, "step": 266400 }, { "epoch": 20.9, "learning_rate": 0.0005, "loss": 1.6892, "step": 266500 }, { "epoch": 20.91, "learning_rate": 0.0005, "loss": 1.6904, "step": 266600 }, { "epoch": 20.91, "learning_rate": 0.0005, "loss": 1.7172, "step": 266700 }, { "epoch": 20.92, "learning_rate": 0.0005, "loss": 1.7, "step": 266800 }, { "epoch": 20.93, "learning_rate": 0.0005, "loss": 1.6893, "step": 266900 }, { "epoch": 20.94, "learning_rate": 0.0005, "loss": 1.6898, "step": 267000 }, { "epoch": 20.95, "learning_rate": 0.0005, "loss": 1.7073, "step": 267100 }, { "epoch": 20.95, "learning_rate": 0.0005, "loss": 1.703, "step": 267200 }, { "epoch": 20.96, "learning_rate": 0.0005, "loss": 1.6952, "step": 267300 }, { "epoch": 20.97, "learning_rate": 0.0005, "loss": 1.6893, "step": 267400 }, { "epoch": 20.98, "learning_rate": 0.0005, "loss": 1.7366, "step": 267500 }, { "epoch": 20.98, "learning_rate": 0.0005, "loss": 1.7323, "step": 267600 }, { "epoch": 20.99, "learning_rate": 0.0005, "loss": 1.7095, "step": 267700 }, { "epoch": 21.0, "learning_rate": 0.0005, "loss": 1.6746, "step": 267800 }, { "epoch": 21.01, "learning_rate": 0.0005, "loss": 1.5848, "step": 267900 }, { "epoch": 21.02, "learning_rate": 0.0005, "loss": 1.64, "step": 268000 }, { "epoch": 21.02, "learning_rate": 0.0005, "loss": 1.6432, "step": 268100 }, { "epoch": 21.03, "learning_rate": 0.0005, "loss": 1.6222, "step": 268200 }, { "epoch": 21.04, "learning_rate": 0.0005, "loss": 1.6085, "step": 268300 }, { "epoch": 21.05, "learning_rate": 0.0005, "loss": 1.6059, "step": 268400 }, { "epoch": 21.06, "learning_rate": 0.0005, "loss": 1.6361, "step": 268500 }, { "epoch": 21.06, "learning_rate": 0.0005, "loss": 1.6125, "step": 268600 }, { "epoch": 21.07, "learning_rate": 0.0005, "loss": 1.6033, "step": 268700 }, { "epoch": 21.08, "learning_rate": 0.0005, "loss": 1.6163, "step": 268800 }, { "epoch": 21.09, "learning_rate": 0.0005, "loss": 1.6157, "step": 268900 }, { "epoch": 21.09, "learning_rate": 0.0005, "loss": 1.6505, "step": 269000 }, { "epoch": 21.1, "learning_rate": 0.0005, "loss": 1.6267, "step": 269100 }, { "epoch": 21.11, "learning_rate": 0.0005, "loss": 1.6007, "step": 269200 }, { "epoch": 21.12, "learning_rate": 0.0005, "loss": 1.6224, "step": 269300 }, { "epoch": 21.13, "learning_rate": 0.0005, "loss": 1.6151, "step": 269400 }, { "epoch": 21.13, "learning_rate": 0.0005, "loss": 1.6311, "step": 269500 }, { "epoch": 21.14, "learning_rate": 0.0005, "loss": 1.6143, "step": 269600 }, { "epoch": 21.15, "learning_rate": 0.0005, "loss": 1.6499, "step": 269700 }, { "epoch": 21.16, "learning_rate": 0.0005, "loss": 1.6286, "step": 269800 }, { "epoch": 21.17, "learning_rate": 0.0005, "loss": 1.6327, "step": 269900 }, { "epoch": 21.17, "learning_rate": 0.0005, "loss": 1.6273, "step": 270000 }, { "epoch": 21.17, "eval_gen_len": 18.794120243665578, "eval_loss": 2.0332844257354736, "eval_rouge1": 35.267, "eval_rouge2": 14.1753, "eval_rougeL": 28.973, "eval_rougeLsum": 28.9735, "eval_runtime": 364.1194, "eval_samples_per_second": 31.108, "eval_steps_per_second": 1.944, "step": 270000 }, { "epoch": 21.18, "learning_rate": 0.0005, "loss": 1.6709, "step": 270100 }, { "epoch": 21.19, "learning_rate": 0.0005, "loss": 1.6506, "step": 270200 }, { "epoch": 21.2, "learning_rate": 0.0005, "loss": 1.6591, "step": 270300 }, { "epoch": 21.2, "learning_rate": 0.0005, "loss": 1.6446, "step": 270400 }, { "epoch": 21.21, "learning_rate": 0.0005, "loss": 1.6299, "step": 270500 }, { "epoch": 21.22, "learning_rate": 0.0005, "loss": 1.6684, "step": 270600 }, { "epoch": 21.23, "learning_rate": 0.0005, "loss": 1.6636, "step": 270700 }, { "epoch": 21.24, "learning_rate": 0.0005, "loss": 1.6326, "step": 270800 }, { "epoch": 21.24, "learning_rate": 0.0005, "loss": 1.6458, "step": 270900 }, { "epoch": 21.25, "learning_rate": 0.0005, "loss": 1.6513, "step": 271000 }, { "epoch": 21.26, "learning_rate": 0.0005, "loss": 1.6618, "step": 271100 }, { "epoch": 21.27, "learning_rate": 0.0005, "loss": 1.6441, "step": 271200 }, { "epoch": 21.28, "learning_rate": 0.0005, "loss": 1.64, "step": 271300 }, { "epoch": 21.28, "learning_rate": 0.0005, "loss": 1.6454, "step": 271400 }, { "epoch": 21.29, "learning_rate": 0.0005, "loss": 1.6246, "step": 271500 }, { "epoch": 21.3, "learning_rate": 0.0005, "loss": 1.6632, "step": 271600 }, { "epoch": 21.31, "learning_rate": 0.0005, "loss": 1.6478, "step": 271700 }, { "epoch": 21.31, "learning_rate": 0.0005, "loss": 1.6651, "step": 271800 }, { "epoch": 21.32, "learning_rate": 0.0005, "loss": 1.626, "step": 271900 }, { "epoch": 21.33, "learning_rate": 0.0005, "loss": 1.6519, "step": 272000 }, { "epoch": 21.34, "learning_rate": 0.0005, "loss": 1.66, "step": 272100 }, { "epoch": 21.35, "learning_rate": 0.0005, "loss": 1.6479, "step": 272200 }, { "epoch": 21.35, "learning_rate": 0.0005, "loss": 1.6556, "step": 272300 }, { "epoch": 21.36, "learning_rate": 0.0005, "loss": 1.6563, "step": 272400 }, { "epoch": 21.37, "learning_rate": 0.0005, "loss": 1.6392, "step": 272500 }, { "epoch": 21.38, "learning_rate": 0.0005, "loss": 1.6474, "step": 272600 }, { "epoch": 21.38, "learning_rate": 0.0005, "loss": 1.6544, "step": 272700 }, { "epoch": 21.39, "learning_rate": 0.0005, "loss": 1.6677, "step": 272800 }, { "epoch": 21.4, "learning_rate": 0.0005, "loss": 1.6584, "step": 272900 }, { "epoch": 21.41, "learning_rate": 0.0005, "loss": 1.656, "step": 273000 }, { "epoch": 21.42, "learning_rate": 0.0005, "loss": 1.6487, "step": 273100 }, { "epoch": 21.42, "learning_rate": 0.0005, "loss": 1.6614, "step": 273200 }, { "epoch": 21.43, "learning_rate": 0.0005, "loss": 1.6839, "step": 273300 }, { "epoch": 21.44, "learning_rate": 0.0005, "loss": 1.6518, "step": 273400 }, { "epoch": 21.45, "learning_rate": 0.0005, "loss": 1.6572, "step": 273500 }, { "epoch": 21.46, "learning_rate": 0.0005, "loss": 1.6604, "step": 273600 }, { "epoch": 21.46, "learning_rate": 0.0005, "loss": 1.683, "step": 273700 }, { "epoch": 21.47, "learning_rate": 0.0005, "loss": 1.6378, "step": 273800 }, { "epoch": 21.48, "learning_rate": 0.0005, "loss": 1.6439, "step": 273900 }, { "epoch": 21.49, "learning_rate": 0.0005, "loss": 1.6667, "step": 274000 }, { "epoch": 21.49, "learning_rate": 0.0005, "loss": 1.6472, "step": 274100 }, { "epoch": 21.5, "learning_rate": 0.0005, "loss": 1.639, "step": 274200 }, { "epoch": 21.51, "learning_rate": 0.0005, "loss": 1.6757, "step": 274300 }, { "epoch": 21.52, "learning_rate": 0.0005, "loss": 1.6633, "step": 274400 }, { "epoch": 21.53, "learning_rate": 0.0005, "loss": 1.65, "step": 274500 }, { "epoch": 21.53, "learning_rate": 0.0005, "loss": 1.67, "step": 274600 }, { "epoch": 21.54, "learning_rate": 0.0005, "loss": 1.6726, "step": 274700 }, { "epoch": 21.55, "learning_rate": 0.0005, "loss": 1.6634, "step": 274800 }, { "epoch": 21.56, "learning_rate": 0.0005, "loss": 1.6844, "step": 274900 }, { "epoch": 21.57, "learning_rate": 0.0005, "loss": 1.6708, "step": 275000 }, { "epoch": 21.57, "learning_rate": 0.0005, "loss": 1.6485, "step": 275100 }, { "epoch": 21.58, "learning_rate": 0.0005, "loss": 1.6648, "step": 275200 }, { "epoch": 21.59, "learning_rate": 0.0005, "loss": 1.6691, "step": 275300 }, { "epoch": 21.6, "learning_rate": 0.0005, "loss": 1.6751, "step": 275400 }, { "epoch": 21.6, "learning_rate": 0.0005, "loss": 1.6635, "step": 275500 }, { "epoch": 21.61, "learning_rate": 0.0005, "loss": 1.6693, "step": 275600 }, { "epoch": 21.62, "learning_rate": 0.0005, "loss": 1.6764, "step": 275700 }, { "epoch": 21.63, "learning_rate": 0.0005, "loss": 1.6629, "step": 275800 }, { "epoch": 21.64, "learning_rate": 0.0005, "loss": 1.6837, "step": 275900 }, { "epoch": 21.64, "learning_rate": 0.0005, "loss": 1.6856, "step": 276000 }, { "epoch": 21.65, "learning_rate": 0.0005, "loss": 1.6608, "step": 276100 }, { "epoch": 21.66, "learning_rate": 0.0005, "loss": 1.665, "step": 276200 }, { "epoch": 21.67, "learning_rate": 0.0005, "loss": 1.6734, "step": 276300 }, { "epoch": 21.68, "learning_rate": 0.0005, "loss": 1.6561, "step": 276400 }, { "epoch": 21.68, "learning_rate": 0.0005, "loss": 1.6762, "step": 276500 }, { "epoch": 21.69, "learning_rate": 0.0005, "loss": 1.6431, "step": 276600 }, { "epoch": 21.7, "learning_rate": 0.0005, "loss": 1.6631, "step": 276700 }, { "epoch": 21.71, "learning_rate": 0.0005, "loss": 1.7076, "step": 276800 }, { "epoch": 21.71, "learning_rate": 0.0005, "loss": 1.6691, "step": 276900 }, { "epoch": 21.72, "learning_rate": 0.0005, "loss": 1.6689, "step": 277000 }, { "epoch": 21.73, "learning_rate": 0.0005, "loss": 1.6895, "step": 277100 }, { "epoch": 21.74, "learning_rate": 0.0005, "loss": 1.701, "step": 277200 }, { "epoch": 21.75, "learning_rate": 0.0005, "loss": 1.6691, "step": 277300 }, { "epoch": 21.75, "learning_rate": 0.0005, "loss": 1.6708, "step": 277400 }, { "epoch": 21.76, "learning_rate": 0.0005, "loss": 1.6708, "step": 277500 }, { "epoch": 21.77, "learning_rate": 0.0005, "loss": 1.7038, "step": 277600 }, { "epoch": 21.78, "learning_rate": 0.0005, "loss": 1.6813, "step": 277700 }, { "epoch": 21.78, "learning_rate": 0.0005, "loss": 1.6738, "step": 277800 }, { "epoch": 21.79, "learning_rate": 0.0005, "loss": 1.6991, "step": 277900 }, { "epoch": 21.8, "learning_rate": 0.0005, "loss": 1.7033, "step": 278000 }, { "epoch": 21.81, "learning_rate": 0.0005, "loss": 1.6716, "step": 278100 }, { "epoch": 21.82, "learning_rate": 0.0005, "loss": 1.678, "step": 278200 }, { "epoch": 21.82, "learning_rate": 0.0005, "loss": 1.6645, "step": 278300 }, { "epoch": 21.83, "learning_rate": 0.0005, "loss": 1.6913, "step": 278400 }, { "epoch": 21.84, "learning_rate": 0.0005, "loss": 1.6807, "step": 278500 }, { "epoch": 21.85, "learning_rate": 0.0005, "loss": 1.6646, "step": 278600 }, { "epoch": 21.86, "learning_rate": 0.0005, "loss": 1.6968, "step": 278700 }, { "epoch": 21.86, "learning_rate": 0.0005, "loss": 1.6384, "step": 278800 }, { "epoch": 21.87, "learning_rate": 0.0005, "loss": 1.6578, "step": 278900 }, { "epoch": 21.88, "learning_rate": 0.0005, "loss": 1.6761, "step": 279000 }, { "epoch": 21.89, "learning_rate": 0.0005, "loss": 1.6749, "step": 279100 }, { "epoch": 21.89, "learning_rate": 0.0005, "loss": 1.6953, "step": 279200 }, { "epoch": 21.9, "learning_rate": 0.0005, "loss": 1.6829, "step": 279300 }, { "epoch": 21.91, "learning_rate": 0.0005, "loss": 1.6733, "step": 279400 }, { "epoch": 21.92, "learning_rate": 0.0005, "loss": 1.6732, "step": 279500 }, { "epoch": 21.93, "learning_rate": 0.0005, "loss": 1.6764, "step": 279600 }, { "epoch": 21.93, "learning_rate": 0.0005, "loss": 1.6588, "step": 279700 }, { "epoch": 21.94, "learning_rate": 0.0005, "loss": 1.6779, "step": 279800 }, { "epoch": 21.95, "learning_rate": 0.0005, "loss": 1.6869, "step": 279900 }, { "epoch": 21.96, "learning_rate": 0.0005, "loss": 1.7009, "step": 280000 }, { "epoch": 21.96, "eval_gen_len": 18.80851063829787, "eval_loss": 2.0156733989715576, "eval_rouge1": 35.3219, "eval_rouge2": 14.2115, "eval_rougeL": 28.9797, "eval_rougeLsum": 28.9688, "eval_runtime": 361.1372, "eval_samples_per_second": 31.365, "eval_steps_per_second": 1.96, "step": 280000 }, { "epoch": 21.97, "learning_rate": 0.0005, "loss": 1.6889, "step": 280100 }, { "epoch": 21.97, "learning_rate": 0.0005, "loss": 1.6603, "step": 280200 }, { "epoch": 21.98, "learning_rate": 0.0005, "loss": 1.6909, "step": 280300 }, { "epoch": 21.99, "learning_rate": 0.0005, "loss": 1.7038, "step": 280400 }, { "epoch": 22.0, "learning_rate": 0.0005, "loss": 1.6765, "step": 280500 }, { "epoch": 22.0, "learning_rate": 0.0005, "loss": 1.6226, "step": 280600 }, { "epoch": 22.01, "learning_rate": 0.0005, "loss": 1.6016, "step": 280700 }, { "epoch": 22.02, "learning_rate": 0.0005, "loss": 1.6137, "step": 280800 }, { "epoch": 22.03, "learning_rate": 0.0005, "loss": 1.6311, "step": 280900 }, { "epoch": 22.04, "learning_rate": 0.0005, "loss": 1.6032, "step": 281000 }, { "epoch": 22.04, "learning_rate": 0.0005, "loss": 1.586, "step": 281100 }, { "epoch": 22.05, "learning_rate": 0.0005, "loss": 1.6228, "step": 281200 }, { "epoch": 22.06, "learning_rate": 0.0005, "loss": 1.5936, "step": 281300 }, { "epoch": 22.07, "learning_rate": 0.0005, "loss": 1.6262, "step": 281400 }, { "epoch": 22.07, "learning_rate": 0.0005, "loss": 1.6091, "step": 281500 }, { "epoch": 22.08, "learning_rate": 0.0005, "loss": 1.6138, "step": 281600 }, { "epoch": 22.09, "learning_rate": 0.0005, "loss": 1.6125, "step": 281700 }, { "epoch": 22.1, "learning_rate": 0.0005, "loss": 1.636, "step": 281800 }, { "epoch": 22.11, "learning_rate": 0.0005, "loss": 1.5914, "step": 281900 }, { "epoch": 22.11, "learning_rate": 0.0005, "loss": 1.6495, "step": 282000 }, { "epoch": 22.12, "learning_rate": 0.0005, "loss": 1.6053, "step": 282100 }, { "epoch": 22.13, "learning_rate": 0.0005, "loss": 1.6091, "step": 282200 }, { "epoch": 22.14, "learning_rate": 0.0005, "loss": 1.6329, "step": 282300 }, { "epoch": 22.15, "learning_rate": 0.0005, "loss": 1.6053, "step": 282400 }, { "epoch": 22.15, "learning_rate": 0.0005, "loss": 1.5926, "step": 282500 }, { "epoch": 22.16, "learning_rate": 0.0005, "loss": 1.5977, "step": 282600 }, { "epoch": 22.17, "learning_rate": 0.0005, "loss": 1.59, "step": 282700 }, { "epoch": 22.18, "learning_rate": 0.0005, "loss": 1.6346, "step": 282800 }, { "epoch": 22.18, "learning_rate": 0.0005, "loss": 1.5948, "step": 282900 }, { "epoch": 22.19, "learning_rate": 0.0005, "loss": 1.6221, "step": 283000 }, { "epoch": 22.2, "learning_rate": 0.0005, "loss": 1.6059, "step": 283100 }, { "epoch": 22.21, "learning_rate": 0.0005, "loss": 1.6255, "step": 283200 }, { "epoch": 22.22, "learning_rate": 0.0005, "loss": 1.6016, "step": 283300 }, { "epoch": 22.22, "learning_rate": 0.0005, "loss": 1.6287, "step": 283400 }, { "epoch": 22.23, "learning_rate": 0.0005, "loss": 1.6254, "step": 283500 }, { "epoch": 22.24, "learning_rate": 0.0005, "loss": 1.623, "step": 283600 }, { "epoch": 22.25, "learning_rate": 0.0005, "loss": 1.6471, "step": 283700 }, { "epoch": 22.26, "learning_rate": 0.0005, "loss": 1.6425, "step": 283800 }, { "epoch": 22.26, "learning_rate": 0.0005, "loss": 1.6553, "step": 283900 }, { "epoch": 22.27, "learning_rate": 0.0005, "loss": 1.6522, "step": 284000 }, { "epoch": 22.28, "learning_rate": 0.0005, "loss": 1.6446, "step": 284100 }, { "epoch": 22.29, "learning_rate": 0.0005, "loss": 1.6494, "step": 284200 }, { "epoch": 22.29, "learning_rate": 0.0005, "loss": 1.6355, "step": 284300 }, { "epoch": 22.3, "learning_rate": 0.0005, "loss": 1.612, "step": 284400 }, { "epoch": 22.31, "learning_rate": 0.0005, "loss": 1.6425, "step": 284500 }, { "epoch": 22.32, "learning_rate": 0.0005, "loss": 1.6426, "step": 284600 }, { "epoch": 22.33, "learning_rate": 0.0005, "loss": 1.6647, "step": 284700 }, { "epoch": 22.33, "learning_rate": 0.0005, "loss": 1.6259, "step": 284800 }, { "epoch": 22.34, "learning_rate": 0.0005, "loss": 1.6348, "step": 284900 }, { "epoch": 22.35, "learning_rate": 0.0005, "loss": 1.6271, "step": 285000 }, { "epoch": 22.36, "learning_rate": 0.0005, "loss": 1.6255, "step": 285100 }, { "epoch": 22.37, "learning_rate": 0.0005, "loss": 1.6273, "step": 285200 }, { "epoch": 22.37, "learning_rate": 0.0005, "loss": 1.6208, "step": 285300 }, { "epoch": 22.38, "learning_rate": 0.0005, "loss": 1.639, "step": 285400 }, { "epoch": 22.39, "learning_rate": 0.0005, "loss": 1.6401, "step": 285500 }, { "epoch": 22.4, "learning_rate": 0.0005, "loss": 1.631, "step": 285600 }, { "epoch": 22.4, "learning_rate": 0.0005, "loss": 1.6275, "step": 285700 }, { "epoch": 22.41, "learning_rate": 0.0005, "loss": 1.6289, "step": 285800 }, { "epoch": 22.42, "learning_rate": 0.0005, "loss": 1.6153, "step": 285900 }, { "epoch": 22.43, "learning_rate": 0.0005, "loss": 1.6571, "step": 286000 }, { "epoch": 22.44, "learning_rate": 0.0005, "loss": 1.6038, "step": 286100 }, { "epoch": 22.44, "learning_rate": 0.0005, "loss": 1.6535, "step": 286200 }, { "epoch": 22.45, "learning_rate": 0.0005, "loss": 1.6414, "step": 286300 }, { "epoch": 22.46, "learning_rate": 0.0005, "loss": 1.642, "step": 286400 }, { "epoch": 22.47, "learning_rate": 0.0005, "loss": 1.6574, "step": 286500 }, { "epoch": 22.47, "learning_rate": 0.0005, "loss": 1.612, "step": 286600 }, { "epoch": 22.48, "learning_rate": 0.0005, "loss": 1.6388, "step": 286700 }, { "epoch": 22.49, "learning_rate": 0.0005, "loss": 1.6577, "step": 286800 }, { "epoch": 22.5, "learning_rate": 0.0005, "loss": 1.6306, "step": 286900 }, { "epoch": 22.51, "learning_rate": 0.0005, "loss": 1.6493, "step": 287000 }, { "epoch": 22.51, "learning_rate": 0.0005, "loss": 1.6496, "step": 287100 }, { "epoch": 22.52, "learning_rate": 0.0005, "loss": 1.6655, "step": 287200 }, { "epoch": 22.53, "learning_rate": 0.0005, "loss": 1.6347, "step": 287300 }, { "epoch": 22.54, "learning_rate": 0.0005, "loss": 1.6628, "step": 287400 }, { "epoch": 22.55, "learning_rate": 0.0005, "loss": 1.6369, "step": 287500 }, { "epoch": 22.55, "learning_rate": 0.0005, "loss": 1.6818, "step": 287600 }, { "epoch": 22.56, "learning_rate": 0.0005, "loss": 1.6497, "step": 287700 }, { "epoch": 22.57, "learning_rate": 0.0005, "loss": 1.6406, "step": 287800 }, { "epoch": 22.58, "learning_rate": 0.0005, "loss": 1.6353, "step": 287900 }, { "epoch": 22.58, "learning_rate": 0.0005, "loss": 1.6637, "step": 288000 }, { "epoch": 22.59, "learning_rate": 0.0005, "loss": 1.6416, "step": 288100 }, { "epoch": 22.6, "learning_rate": 0.0005, "loss": 1.6283, "step": 288200 }, { "epoch": 22.61, "learning_rate": 0.0005, "loss": 1.667, "step": 288300 }, { "epoch": 22.62, "learning_rate": 0.0005, "loss": 1.6687, "step": 288400 }, { "epoch": 22.62, "learning_rate": 0.0005, "loss": 1.6424, "step": 288500 }, { "epoch": 22.63, "learning_rate": 0.0005, "loss": 1.661, "step": 288600 }, { "epoch": 22.64, "learning_rate": 0.0005, "loss": 1.6544, "step": 288700 }, { "epoch": 22.65, "learning_rate": 0.0005, "loss": 1.66, "step": 288800 }, { "epoch": 22.66, "learning_rate": 0.0005, "loss": 1.661, "step": 288900 }, { "epoch": 22.66, "learning_rate": 0.0005, "loss": 1.6509, "step": 289000 }, { "epoch": 22.67, "learning_rate": 0.0005, "loss": 1.6761, "step": 289100 }, { "epoch": 22.68, "learning_rate": 0.0005, "loss": 1.6578, "step": 289200 }, { "epoch": 22.69, "learning_rate": 0.0005, "loss": 1.6454, "step": 289300 }, { "epoch": 22.69, "learning_rate": 0.0005, "loss": 1.6606, "step": 289400 }, { "epoch": 22.7, "learning_rate": 0.0005, "loss": 1.6326, "step": 289500 }, { "epoch": 22.71, "learning_rate": 0.0005, "loss": 1.6758, "step": 289600 }, { "epoch": 22.72, "learning_rate": 0.0005, "loss": 1.6564, "step": 289700 }, { "epoch": 22.73, "learning_rate": 0.0005, "loss": 1.656, "step": 289800 }, { "epoch": 22.73, "learning_rate": 0.0005, "loss": 1.6915, "step": 289900 }, { "epoch": 22.74, "learning_rate": 0.0005, "loss": 1.6582, "step": 290000 }, { "epoch": 22.74, "eval_gen_len": 18.805862099408493, "eval_loss": 2.0187759399414062, "eval_rouge1": 35.4028, "eval_rouge2": 14.3182, "eval_rougeL": 29.1245, "eval_rougeLsum": 29.1239, "eval_runtime": 358.1851, "eval_samples_per_second": 31.623, "eval_steps_per_second": 1.977, "step": 290000 }, { "epoch": 22.75, "learning_rate": 0.0005, "loss": 1.6384, "step": 290100 }, { "epoch": 22.76, "learning_rate": 0.0005, "loss": 1.6645, "step": 290200 }, { "epoch": 22.77, "learning_rate": 0.0005, "loss": 1.6506, "step": 290300 }, { "epoch": 22.77, "learning_rate": 0.0005, "loss": 1.6602, "step": 290400 }, { "epoch": 22.78, "learning_rate": 0.0005, "loss": 1.6624, "step": 290500 }, { "epoch": 22.79, "learning_rate": 0.0005, "loss": 1.661, "step": 290600 }, { "epoch": 22.8, "learning_rate": 0.0005, "loss": 1.659, "step": 290700 }, { "epoch": 22.8, "learning_rate": 0.0005, "loss": 1.7059, "step": 290800 }, { "epoch": 22.81, "learning_rate": 0.0005, "loss": 1.6346, "step": 290900 }, { "epoch": 22.82, "learning_rate": 0.0005, "loss": 1.6551, "step": 291000 }, { "epoch": 22.83, "learning_rate": 0.0005, "loss": 1.6512, "step": 291100 }, { "epoch": 22.84, "learning_rate": 0.0005, "loss": 1.6733, "step": 291200 }, { "epoch": 22.84, "learning_rate": 0.0005, "loss": 1.6672, "step": 291300 }, { "epoch": 22.85, "learning_rate": 0.0005, "loss": 1.6735, "step": 291400 }, { "epoch": 22.86, "learning_rate": 0.0005, "loss": 1.6639, "step": 291500 }, { "epoch": 22.87, "learning_rate": 0.0005, "loss": 1.6709, "step": 291600 }, { "epoch": 22.87, "learning_rate": 0.0005, "loss": 1.6485, "step": 291700 }, { "epoch": 22.88, "learning_rate": 0.0005, "loss": 1.6701, "step": 291800 }, { "epoch": 22.89, "learning_rate": 0.0005, "loss": 1.6622, "step": 291900 }, { "epoch": 22.9, "learning_rate": 0.0005, "loss": 1.6651, "step": 292000 }, { "epoch": 22.91, "learning_rate": 0.0005, "loss": 1.6575, "step": 292100 }, { "epoch": 22.91, "learning_rate": 0.0005, "loss": 1.661, "step": 292200 }, { "epoch": 22.92, "learning_rate": 0.0005, "loss": 1.6728, "step": 292300 }, { "epoch": 22.93, "learning_rate": 0.0005, "loss": 1.6599, "step": 292400 }, { "epoch": 22.94, "learning_rate": 0.0005, "loss": 1.6628, "step": 292500 }, { "epoch": 22.95, "learning_rate": 0.0005, "loss": 1.6632, "step": 292600 }, { "epoch": 22.95, "learning_rate": 0.0005, "loss": 1.642, "step": 292700 }, { "epoch": 22.96, "learning_rate": 0.0005, "loss": 1.6767, "step": 292800 }, { "epoch": 22.97, "learning_rate": 0.0005, "loss": 1.6652, "step": 292900 }, { "epoch": 22.98, "learning_rate": 0.0005, "loss": 1.6488, "step": 293000 }, { "epoch": 22.98, "learning_rate": 0.0005, "loss": 1.6915, "step": 293100 }, { "epoch": 22.99, "learning_rate": 0.0005, "loss": 1.6965, "step": 293200 }, { "epoch": 23.0, "learning_rate": 0.0005, "loss": 1.6508, "step": 293300 }, { "epoch": 23.01, "learning_rate": 0.0005, "loss": 1.5917, "step": 293400 }, { "epoch": 23.02, "learning_rate": 0.0005, "loss": 1.5818, "step": 293500 }, { "epoch": 23.02, "learning_rate": 0.0005, "loss": 1.6064, "step": 293600 }, { "epoch": 23.03, "learning_rate": 0.0005, "loss": 1.6003, "step": 293700 }, { "epoch": 23.04, "learning_rate": 0.0005, "loss": 1.5817, "step": 293800 }, { "epoch": 23.05, "learning_rate": 0.0005, "loss": 1.5865, "step": 293900 }, { "epoch": 23.06, "learning_rate": 0.0005, "loss": 1.5629, "step": 294000 }, { "epoch": 23.06, "learning_rate": 0.0005, "loss": 1.5881, "step": 294100 }, { "epoch": 23.07, "learning_rate": 0.0005, "loss": 1.5776, "step": 294200 }, { "epoch": 23.08, "learning_rate": 0.0005, "loss": 1.59, "step": 294300 }, { "epoch": 23.09, "learning_rate": 0.0005, "loss": 1.5971, "step": 294400 }, { "epoch": 23.09, "learning_rate": 0.0005, "loss": 1.6124, "step": 294500 }, { "epoch": 23.1, "learning_rate": 0.0005, "loss": 1.6203, "step": 294600 }, { "epoch": 23.11, "learning_rate": 0.0005, "loss": 1.574, "step": 294700 }, { "epoch": 23.12, "learning_rate": 0.0005, "loss": 1.6149, "step": 294800 }, { "epoch": 23.13, "learning_rate": 0.0005, "loss": 1.6092, "step": 294900 }, { "epoch": 23.13, "learning_rate": 0.0005, "loss": 1.5889, "step": 295000 }, { "epoch": 23.14, "learning_rate": 0.0005, "loss": 1.5929, "step": 295100 }, { "epoch": 23.15, "learning_rate": 0.0005, "loss": 1.6198, "step": 295200 }, { "epoch": 23.16, "learning_rate": 0.0005, "loss": 1.5972, "step": 295300 }, { "epoch": 23.16, "learning_rate": 0.0005, "loss": 1.5884, "step": 295400 }, { "epoch": 23.17, "learning_rate": 0.0005, "loss": 1.5839, "step": 295500 }, { "epoch": 23.18, "learning_rate": 0.0005, "loss": 1.6108, "step": 295600 }, { "epoch": 23.19, "learning_rate": 0.0005, "loss": 1.5895, "step": 295700 }, { "epoch": 23.2, "learning_rate": 0.0005, "loss": 1.5911, "step": 295800 }, { "epoch": 23.2, "learning_rate": 0.0005, "loss": 1.5722, "step": 295900 }, { "epoch": 23.21, "learning_rate": 0.0005, "loss": 1.6111, "step": 296000 }, { "epoch": 23.22, "learning_rate": 0.0005, "loss": 1.6076, "step": 296100 }, { "epoch": 23.23, "learning_rate": 0.0005, "loss": 1.6228, "step": 296200 }, { "epoch": 23.24, "learning_rate": 0.0005, "loss": 1.5836, "step": 296300 }, { "epoch": 23.24, "learning_rate": 0.0005, "loss": 1.616, "step": 296400 }, { "epoch": 23.25, "learning_rate": 0.0005, "loss": 1.6038, "step": 296500 }, { "epoch": 23.26, "learning_rate": 0.0005, "loss": 1.5998, "step": 296600 }, { "epoch": 23.27, "learning_rate": 0.0005, "loss": 1.6057, "step": 296700 }, { "epoch": 23.27, "learning_rate": 0.0005, "loss": 1.6069, "step": 296800 }, { "epoch": 23.28, "learning_rate": 0.0005, "loss": 1.6165, "step": 296900 }, { "epoch": 23.29, "learning_rate": 0.0005, "loss": 1.6075, "step": 297000 }, { "epoch": 23.3, "learning_rate": 0.0005, "loss": 1.6274, "step": 297100 }, { "epoch": 23.31, "learning_rate": 0.0005, "loss": 1.6295, "step": 297200 }, { "epoch": 23.31, "learning_rate": 0.0005, "loss": 1.6176, "step": 297300 }, { "epoch": 23.32, "learning_rate": 0.0005, "loss": 1.6193, "step": 297400 }, { "epoch": 23.33, "learning_rate": 0.0005, "loss": 1.6231, "step": 297500 }, { "epoch": 23.34, "learning_rate": 0.0005, "loss": 1.6155, "step": 297600 }, { "epoch": 23.35, "learning_rate": 0.0005, "loss": 1.6239, "step": 297700 }, { "epoch": 23.35, "learning_rate": 0.0005, "loss": 1.6217, "step": 297800 }, { "epoch": 23.36, "learning_rate": 0.0005, "loss": 1.5893, "step": 297900 }, { "epoch": 23.37, "learning_rate": 0.0005, "loss": 1.632, "step": 298000 }, { "epoch": 23.38, "learning_rate": 0.0005, "loss": 1.614, "step": 298100 }, { "epoch": 23.38, "learning_rate": 0.0005, "loss": 1.604, "step": 298200 }, { "epoch": 23.39, "learning_rate": 0.0005, "loss": 1.6348, "step": 298300 }, { "epoch": 23.4, "learning_rate": 0.0005, "loss": 1.6524, "step": 298400 }, { "epoch": 23.41, "learning_rate": 0.0005, "loss": 1.6346, "step": 298500 }, { "epoch": 23.42, "learning_rate": 0.0005, "loss": 1.6335, "step": 298600 }, { "epoch": 23.42, "learning_rate": 0.0005, "loss": 1.6356, "step": 298700 }, { "epoch": 23.43, "learning_rate": 0.0005, "loss": 1.6304, "step": 298800 }, { "epoch": 23.44, "learning_rate": 0.0005, "loss": 1.6099, "step": 298900 }, { "epoch": 23.45, "learning_rate": 0.0005, "loss": 1.6269, "step": 299000 }, { "epoch": 23.46, "learning_rate": 0.0005, "loss": 1.6192, "step": 299100 }, { "epoch": 23.46, "learning_rate": 0.0005, "loss": 1.6656, "step": 299200 }, { "epoch": 23.47, "learning_rate": 0.0005, "loss": 1.6019, "step": 299300 }, { "epoch": 23.48, "learning_rate": 0.0005, "loss": 1.6528, "step": 299400 }, { "epoch": 23.49, "learning_rate": 0.0005, "loss": 1.6116, "step": 299500 }, { "epoch": 23.49, "learning_rate": 0.0005, "loss": 1.6486, "step": 299600 }, { "epoch": 23.5, "learning_rate": 0.0005, "loss": 1.6194, "step": 299700 }, { "epoch": 23.51, "learning_rate": 0.0005, "loss": 1.6463, "step": 299800 }, { "epoch": 23.52, "learning_rate": 0.0005, "loss": 1.628, "step": 299900 }, { "epoch": 23.53, "learning_rate": 0.0005, "loss": 1.6391, "step": 300000 }, { "epoch": 23.53, "eval_gen_len": 18.781583826255847, "eval_loss": 2.024906635284424, "eval_rouge1": 35.371, "eval_rouge2": 14.234, "eval_rougeL": 29.0754, "eval_rougeLsum": 29.072, "eval_runtime": 361.8269, "eval_samples_per_second": 31.305, "eval_steps_per_second": 1.957, "step": 300000 }, { "epoch": 23.53, "learning_rate": 0.0005, "loss": 1.666, "step": 300100 }, { "epoch": 23.54, "learning_rate": 0.0005, "loss": 1.6385, "step": 300200 }, { "epoch": 23.55, "learning_rate": 0.0005, "loss": 1.6464, "step": 300300 }, { "epoch": 23.56, "learning_rate": 0.0005, "loss": 1.6211, "step": 300400 }, { "epoch": 23.56, "learning_rate": 0.0005, "loss": 1.6403, "step": 300500 }, { "epoch": 23.57, "learning_rate": 0.0005, "loss": 1.6051, "step": 300600 }, { "epoch": 23.58, "learning_rate": 0.0005, "loss": 1.6281, "step": 300700 }, { "epoch": 23.59, "learning_rate": 0.0005, "loss": 1.628, "step": 300800 }, { "epoch": 23.6, "learning_rate": 0.0005, "loss": 1.6414, "step": 300900 }, { "epoch": 23.6, "learning_rate": 0.0005, "loss": 1.6388, "step": 301000 }, { "epoch": 23.61, "learning_rate": 0.0005, "loss": 1.6349, "step": 301100 }, { "epoch": 23.62, "learning_rate": 0.0005, "loss": 1.6436, "step": 301200 }, { "epoch": 23.63, "learning_rate": 0.0005, "loss": 1.6358, "step": 301300 }, { "epoch": 23.64, "learning_rate": 0.0005, "loss": 1.6356, "step": 301400 }, { "epoch": 23.64, "learning_rate": 0.0005, "loss": 1.6494, "step": 301500 }, { "epoch": 23.65, "learning_rate": 0.0005, "loss": 1.6575, "step": 301600 }, { "epoch": 23.66, "learning_rate": 0.0005, "loss": 1.6476, "step": 301700 }, { "epoch": 23.67, "learning_rate": 0.0005, "loss": 1.6487, "step": 301800 }, { "epoch": 23.67, "learning_rate": 0.0005, "loss": 1.6258, "step": 301900 }, { "epoch": 23.68, "learning_rate": 0.0005, "loss": 1.6489, "step": 302000 }, { "epoch": 23.69, "learning_rate": 0.0005, "loss": 1.6275, "step": 302100 }, { "epoch": 23.7, "learning_rate": 0.0005, "loss": 1.6157, "step": 302200 }, { "epoch": 23.71, "learning_rate": 0.0005, "loss": 1.6603, "step": 302300 }, { "epoch": 23.71, "learning_rate": 0.0005, "loss": 1.6366, "step": 302400 }, { "epoch": 23.72, "learning_rate": 0.0005, "loss": 1.648, "step": 302500 }, { "epoch": 23.73, "learning_rate": 0.0005, "loss": 1.6612, "step": 302600 }, { "epoch": 23.74, "learning_rate": 0.0005, "loss": 1.6281, "step": 302700 }, { "epoch": 23.75, "learning_rate": 0.0005, "loss": 1.6699, "step": 302800 }, { "epoch": 23.75, "learning_rate": 0.0005, "loss": 1.6539, "step": 302900 }, { "epoch": 23.76, "learning_rate": 0.0005, "loss": 1.6383, "step": 303000 }, { "epoch": 23.77, "learning_rate": 0.0005, "loss": 1.6336, "step": 303100 }, { "epoch": 23.78, "learning_rate": 0.0005, "loss": 1.6608, "step": 303200 }, { "epoch": 23.78, "learning_rate": 0.0005, "loss": 1.6244, "step": 303300 }, { "epoch": 23.79, "learning_rate": 0.0005, "loss": 1.6521, "step": 303400 }, { "epoch": 23.8, "learning_rate": 0.0005, "loss": 1.6382, "step": 303500 }, { "epoch": 23.81, "learning_rate": 0.0005, "loss": 1.6328, "step": 303600 }, { "epoch": 23.82, "learning_rate": 0.0005, "loss": 1.6603, "step": 303700 }, { "epoch": 23.82, "learning_rate": 0.0005, "loss": 1.6212, "step": 303800 }, { "epoch": 23.83, "learning_rate": 0.0005, "loss": 1.6531, "step": 303900 }, { "epoch": 23.84, "learning_rate": 0.0005, "loss": 1.6388, "step": 304000 }, { "epoch": 23.85, "learning_rate": 0.0005, "loss": 1.65, "step": 304100 }, { "epoch": 23.86, "learning_rate": 0.0005, "loss": 1.6484, "step": 304200 }, { "epoch": 23.86, "learning_rate": 0.0005, "loss": 1.6867, "step": 304300 }, { "epoch": 23.87, "learning_rate": 0.0005, "loss": 1.6901, "step": 304400 }, { "epoch": 23.88, "learning_rate": 0.0005, "loss": 1.6416, "step": 304500 }, { "epoch": 23.89, "learning_rate": 0.0005, "loss": 1.6623, "step": 304600 }, { "epoch": 23.89, "learning_rate": 0.0005, "loss": 1.651, "step": 304700 }, { "epoch": 23.9, "learning_rate": 0.0005, "loss": 1.6387, "step": 304800 }, { "epoch": 23.91, "learning_rate": 0.0005, "loss": 1.6614, "step": 304900 }, { "epoch": 23.92, "learning_rate": 0.0005, "loss": 1.6405, "step": 305000 }, { "epoch": 23.93, "learning_rate": 0.0005, "loss": 1.6339, "step": 305100 }, { "epoch": 23.93, "learning_rate": 0.0005, "loss": 1.6873, "step": 305200 }, { "epoch": 23.94, "learning_rate": 0.0005, "loss": 1.6774, "step": 305300 }, { "epoch": 23.95, "learning_rate": 0.0005, "loss": 1.6678, "step": 305400 }, { "epoch": 23.96, "learning_rate": 0.0005, "loss": 1.6646, "step": 305500 }, { "epoch": 23.96, "learning_rate": 0.0005, "loss": 1.6671, "step": 305600 }, { "epoch": 23.97, "learning_rate": 0.0005, "loss": 1.6392, "step": 305700 }, { "epoch": 23.98, "learning_rate": 0.0005, "loss": 1.6524, "step": 305800 }, { "epoch": 23.99, "learning_rate": 0.0005, "loss": 1.6699, "step": 305900 }, { "epoch": 24.0, "learning_rate": 0.0005, "loss": 1.6702, "step": 306000 }, { "epoch": 24.0, "learning_rate": 0.0005, "loss": 1.6122, "step": 306100 }, { "epoch": 24.01, "learning_rate": 0.0005, "loss": 1.5641, "step": 306200 }, { "epoch": 24.02, "learning_rate": 0.0005, "loss": 1.5877, "step": 306300 }, { "epoch": 24.03, "learning_rate": 0.0005, "loss": 1.5558, "step": 306400 }, { "epoch": 24.04, "learning_rate": 0.0005, "loss": 1.5677, "step": 306500 }, { "epoch": 24.04, "learning_rate": 0.0005, "loss": 1.5845, "step": 306600 }, { "epoch": 24.05, "learning_rate": 0.0005, "loss": 1.6126, "step": 306700 }, { "epoch": 24.06, "learning_rate": 0.0005, "loss": 1.5886, "step": 306800 }, { "epoch": 24.07, "learning_rate": 0.0005, "loss": 1.5871, "step": 306900 }, { "epoch": 24.07, "learning_rate": 0.0005, "loss": 1.5685, "step": 307000 }, { "epoch": 24.08, "learning_rate": 0.0005, "loss": 1.5822, "step": 307100 }, { "epoch": 24.09, "learning_rate": 0.0005, "loss": 1.5561, "step": 307200 }, { "epoch": 24.1, "learning_rate": 0.0005, "loss": 1.5736, "step": 307300 }, { "epoch": 24.11, "learning_rate": 0.0005, "loss": 1.6063, "step": 307400 }, { "epoch": 24.11, "learning_rate": 0.0005, "loss": 1.5786, "step": 307500 }, { "epoch": 24.12, "learning_rate": 0.0005, "loss": 1.5952, "step": 307600 }, { "epoch": 24.13, "learning_rate": 0.0005, "loss": 1.5937, "step": 307700 }, { "epoch": 24.14, "learning_rate": 0.0005, "loss": 1.5623, "step": 307800 }, { "epoch": 24.15, "learning_rate": 0.0005, "loss": 1.5925, "step": 307900 }, { "epoch": 24.15, "learning_rate": 0.0005, "loss": 1.5756, "step": 308000 }, { "epoch": 24.16, "learning_rate": 0.0005, "loss": 1.5649, "step": 308100 }, { "epoch": 24.17, "learning_rate": 0.0005, "loss": 1.5827, "step": 308200 }, { "epoch": 24.18, "learning_rate": 0.0005, "loss": 1.5905, "step": 308300 }, { "epoch": 24.18, "learning_rate": 0.0005, "loss": 1.6098, "step": 308400 }, { "epoch": 24.19, "learning_rate": 0.0005, "loss": 1.5813, "step": 308500 }, { "epoch": 24.2, "learning_rate": 0.0005, "loss": 1.5959, "step": 308600 }, { "epoch": 24.21, "learning_rate": 0.0005, "loss": 1.602, "step": 308700 }, { "epoch": 24.22, "learning_rate": 0.0005, "loss": 1.5968, "step": 308800 }, { "epoch": 24.22, "learning_rate": 0.0005, "loss": 1.5857, "step": 308900 }, { "epoch": 24.23, "learning_rate": 0.0005, "loss": 1.5816, "step": 309000 }, { "epoch": 24.24, "learning_rate": 0.0005, "loss": 1.6208, "step": 309100 }, { "epoch": 24.25, "learning_rate": 0.0005, "loss": 1.6071, "step": 309200 }, { "epoch": 24.26, "learning_rate": 0.0005, "loss": 1.6001, "step": 309300 }, { "epoch": 24.26, "learning_rate": 0.0005, "loss": 1.5837, "step": 309400 }, { "epoch": 24.27, "learning_rate": 0.0005, "loss": 1.5884, "step": 309500 }, { "epoch": 24.28, "learning_rate": 0.0005, "loss": 1.6021, "step": 309600 }, { "epoch": 24.29, "learning_rate": 0.0005, "loss": 1.6263, "step": 309700 }, { "epoch": 24.29, "learning_rate": 0.0005, "loss": 1.5973, "step": 309800 }, { "epoch": 24.3, "learning_rate": 0.0005, "loss": 1.61, "step": 309900 }, { "epoch": 24.31, "learning_rate": 0.0005, "loss": 1.6117, "step": 310000 }, { "epoch": 24.31, "eval_gen_len": 18.821841617374414, "eval_loss": 2.028640031814575, "eval_rouge1": 35.3532, "eval_rouge2": 14.2979, "eval_rougeL": 29.0784, "eval_rougeLsum": 29.0753, "eval_runtime": 359.726, "eval_samples_per_second": 31.488, "eval_steps_per_second": 1.968, "step": 310000 }, { "epoch": 24.32, "learning_rate": 0.0005, "loss": 1.6213, "step": 310100 }, { "epoch": 24.33, "learning_rate": 0.0005, "loss": 1.6144, "step": 310200 }, { "epoch": 24.33, "learning_rate": 0.0005, "loss": 1.6167, "step": 310300 }, { "epoch": 24.34, "learning_rate": 0.0005, "loss": 1.5916, "step": 310400 }, { "epoch": 24.35, "learning_rate": 0.0005, "loss": 1.6089, "step": 310500 }, { "epoch": 24.36, "learning_rate": 0.0005, "loss": 1.6133, "step": 310600 }, { "epoch": 24.36, "learning_rate": 0.0005, "loss": 1.5997, "step": 310700 }, { "epoch": 24.37, "learning_rate": 0.0005, "loss": 1.5821, "step": 310800 }, { "epoch": 24.38, "learning_rate": 0.0005, "loss": 1.5782, "step": 310900 }, { "epoch": 24.39, "learning_rate": 0.0005, "loss": 1.6115, "step": 311000 }, { "epoch": 24.4, "learning_rate": 0.0005, "loss": 1.586, "step": 311100 }, { "epoch": 24.4, "learning_rate": 0.0005, "loss": 1.608, "step": 311200 }, { "epoch": 24.41, "learning_rate": 0.0005, "loss": 1.5944, "step": 311300 }, { "epoch": 24.42, "learning_rate": 0.0005, "loss": 1.6191, "step": 311400 }, { "epoch": 24.43, "learning_rate": 0.0005, "loss": 1.5982, "step": 311500 }, { "epoch": 24.44, "learning_rate": 0.0005, "loss": 1.6038, "step": 311600 }, { "epoch": 24.44, "learning_rate": 0.0005, "loss": 1.6403, "step": 311700 }, { "epoch": 24.45, "learning_rate": 0.0005, "loss": 1.6233, "step": 311800 }, { "epoch": 24.46, "learning_rate": 0.0005, "loss": 1.6071, "step": 311900 }, { "epoch": 24.47, "learning_rate": 0.0005, "loss": 1.6255, "step": 312000 }, { "epoch": 24.47, "learning_rate": 0.0005, "loss": 1.6234, "step": 312100 }, { "epoch": 24.48, "learning_rate": 0.0005, "loss": 1.6308, "step": 312200 }, { "epoch": 24.49, "learning_rate": 0.0005, "loss": 1.6172, "step": 312300 }, { "epoch": 24.5, "learning_rate": 0.0005, "loss": 1.6077, "step": 312400 }, { "epoch": 24.51, "learning_rate": 0.0005, "loss": 1.6378, "step": 312500 }, { "epoch": 24.51, "learning_rate": 0.0005, "loss": 1.6038, "step": 312600 }, { "epoch": 24.52, "learning_rate": 0.0005, "loss": 1.6411, "step": 312700 }, { "epoch": 24.53, "learning_rate": 0.0005, "loss": 1.6063, "step": 312800 }, { "epoch": 24.54, "learning_rate": 0.0005, "loss": 1.638, "step": 312900 }, { "epoch": 24.55, "learning_rate": 0.0005, "loss": 1.6137, "step": 313000 }, { "epoch": 24.55, "learning_rate": 0.0005, "loss": 1.6173, "step": 313100 }, { "epoch": 24.56, "learning_rate": 0.0005, "loss": 1.5994, "step": 313200 }, { "epoch": 24.57, "learning_rate": 0.0005, "loss": 1.6321, "step": 313300 }, { "epoch": 24.58, "learning_rate": 0.0005, "loss": 1.6248, "step": 313400 }, { "epoch": 24.58, "learning_rate": 0.0005, "loss": 1.622, "step": 313500 }, { "epoch": 24.59, "learning_rate": 0.0005, "loss": 1.623, "step": 313600 }, { "epoch": 24.6, "learning_rate": 0.0005, "loss": 1.6185, "step": 313700 }, { "epoch": 24.61, "learning_rate": 0.0005, "loss": 1.6278, "step": 313800 }, { "epoch": 24.62, "learning_rate": 0.0005, "loss": 1.6302, "step": 313900 }, { "epoch": 24.62, "learning_rate": 0.0005, "loss": 1.5845, "step": 314000 }, { "epoch": 24.63, "learning_rate": 0.0005, "loss": 1.6256, "step": 314100 }, { "epoch": 24.64, "learning_rate": 0.0005, "loss": 1.6112, "step": 314200 }, { "epoch": 24.65, "learning_rate": 0.0005, "loss": 1.6205, "step": 314300 }, { "epoch": 24.65, "learning_rate": 0.0005, "loss": 1.6244, "step": 314400 }, { "epoch": 24.66, "learning_rate": 0.0005, "loss": 1.6834, "step": 314500 }, { "epoch": 24.67, "learning_rate": 0.0005, "loss": 1.6155, "step": 314600 }, { "epoch": 24.68, "learning_rate": 0.0005, "loss": 1.6176, "step": 314700 }, { "epoch": 24.69, "learning_rate": 0.0005, "loss": 1.6207, "step": 314800 }, { "epoch": 24.69, "learning_rate": 0.0005, "loss": 1.6335, "step": 314900 }, { "epoch": 24.7, "learning_rate": 0.0005, "loss": 1.6422, "step": 315000 }, { "epoch": 24.71, "learning_rate": 0.0005, "loss": 1.612, "step": 315100 }, { "epoch": 24.72, "learning_rate": 0.0005, "loss": 1.6276, "step": 315200 }, { "epoch": 24.73, "learning_rate": 0.0005, "loss": 1.6261, "step": 315300 }, { "epoch": 24.73, "learning_rate": 0.0005, "loss": 1.6383, "step": 315400 }, { "epoch": 24.74, "learning_rate": 0.0005, "loss": 1.6474, "step": 315500 }, { "epoch": 24.75, "learning_rate": 0.0005, "loss": 1.6421, "step": 315600 }, { "epoch": 24.76, "learning_rate": 0.0005, "loss": 1.6137, "step": 315700 }, { "epoch": 24.76, "learning_rate": 0.0005, "loss": 1.6436, "step": 315800 }, { "epoch": 24.77, "learning_rate": 0.0005, "loss": 1.6318, "step": 315900 }, { "epoch": 24.78, "learning_rate": 0.0005, "loss": 1.6004, "step": 316000 }, { "epoch": 24.79, "learning_rate": 0.0005, "loss": 1.6413, "step": 316100 }, { "epoch": 24.8, "learning_rate": 0.0005, "loss": 1.5908, "step": 316200 }, { "epoch": 24.8, "learning_rate": 0.0005, "loss": 1.6252, "step": 316300 }, { "epoch": 24.81, "learning_rate": 0.0005, "loss": 1.6307, "step": 316400 }, { "epoch": 24.82, "learning_rate": 0.0005, "loss": 1.6039, "step": 316500 }, { "epoch": 24.83, "learning_rate": 0.0005, "loss": 1.635, "step": 316600 }, { "epoch": 24.84, "learning_rate": 0.0005, "loss": 1.6595, "step": 316700 }, { "epoch": 24.84, "learning_rate": 0.0005, "loss": 1.6373, "step": 316800 }, { "epoch": 24.85, "learning_rate": 0.0005, "loss": 1.6513, "step": 316900 }, { "epoch": 24.86, "learning_rate": 0.0005, "loss": 1.6329, "step": 317000 }, { "epoch": 24.87, "learning_rate": 0.0005, "loss": 1.6245, "step": 317100 }, { "epoch": 24.87, "learning_rate": 0.0005, "loss": 1.6224, "step": 317200 }, { "epoch": 24.88, "learning_rate": 0.0005, "loss": 1.6333, "step": 317300 }, { "epoch": 24.89, "learning_rate": 0.0005, "loss": 1.6248, "step": 317400 }, { "epoch": 24.9, "learning_rate": 0.0005, "loss": 1.6371, "step": 317500 }, { "epoch": 24.91, "learning_rate": 0.0005, "loss": 1.6373, "step": 317600 }, { "epoch": 24.91, "learning_rate": 0.0005, "loss": 1.6378, "step": 317700 }, { "epoch": 24.92, "learning_rate": 0.0005, "loss": 1.6357, "step": 317800 }, { "epoch": 24.93, "learning_rate": 0.0005, "loss": 1.6325, "step": 317900 }, { "epoch": 24.94, "learning_rate": 0.0005, "loss": 1.6113, "step": 318000 }, { "epoch": 24.95, "learning_rate": 0.0005, "loss": 1.6174, "step": 318100 }, { "epoch": 24.95, "learning_rate": 0.0005, "loss": 1.6451, "step": 318200 }, { "epoch": 24.96, "learning_rate": 0.0005, "loss": 1.6558, "step": 318300 }, { "epoch": 24.97, "learning_rate": 0.0005, "loss": 1.662, "step": 318400 }, { "epoch": 24.98, "learning_rate": 0.0005, "loss": 1.6724, "step": 318500 }, { "epoch": 24.98, "learning_rate": 0.0005, "loss": 1.6367, "step": 318600 }, { "epoch": 24.99, "learning_rate": 0.0005, "loss": 1.6426, "step": 318700 }, { "epoch": 25.0, "learning_rate": 0.0005, "loss": 1.6336, "step": 318800 }, { "epoch": 25.01, "learning_rate": 0.0005, "loss": 1.5282, "step": 318900 }, { "epoch": 25.02, "learning_rate": 0.0005, "loss": 1.5774, "step": 319000 }, { "epoch": 25.02, "learning_rate": 0.0005, "loss": 1.5832, "step": 319100 }, { "epoch": 25.03, "learning_rate": 0.0005, "loss": 1.5514, "step": 319200 }, { "epoch": 25.04, "learning_rate": 0.0005, "loss": 1.5626, "step": 319300 }, { "epoch": 25.05, "learning_rate": 0.0005, "loss": 1.5512, "step": 319400 }, { "epoch": 25.05, "learning_rate": 0.0005, "loss": 1.5384, "step": 319500 }, { "epoch": 25.06, "learning_rate": 0.0005, "loss": 1.5862, "step": 319600 }, { "epoch": 25.07, "learning_rate": 0.0005, "loss": 1.5757, "step": 319700 }, { "epoch": 25.08, "learning_rate": 0.0005, "loss": 1.5636, "step": 319800 }, { "epoch": 25.09, "learning_rate": 0.0005, "loss": 1.5657, "step": 319900 }, { "epoch": 25.09, "learning_rate": 0.0005, "loss": 1.5626, "step": 320000 }, { "epoch": 25.09, "eval_gen_len": 18.80259556811159, "eval_loss": 2.050901174545288, "eval_rouge1": 35.454, "eval_rouge2": 14.3262, "eval_rougeL": 29.1638, "eval_rougeLsum": 29.1575, "eval_runtime": 378.8445, "eval_samples_per_second": 29.899, "eval_steps_per_second": 1.869, "step": 320000 }, { "epoch": 25.1, "learning_rate": 0.0005, "loss": 1.5716, "step": 320100 }, { "epoch": 25.11, "learning_rate": 0.0005, "loss": 1.5771, "step": 320200 }, { "epoch": 25.12, "learning_rate": 0.0005, "loss": 1.5626, "step": 320300 }, { "epoch": 25.13, "learning_rate": 0.0005, "loss": 1.5642, "step": 320400 }, { "epoch": 25.13, "learning_rate": 0.0005, "loss": 1.5571, "step": 320500 }, { "epoch": 25.14, "learning_rate": 0.0005, "loss": 1.5582, "step": 320600 }, { "epoch": 25.15, "learning_rate": 0.0005, "loss": 1.5973, "step": 320700 }, { "epoch": 25.16, "learning_rate": 0.0005, "loss": 1.5949, "step": 320800 }, { "epoch": 25.16, "learning_rate": 0.0005, "loss": 1.6062, "step": 320900 }, { "epoch": 25.17, "learning_rate": 0.0005, "loss": 1.5625, "step": 321000 }, { "epoch": 25.18, "learning_rate": 0.0005, "loss": 1.6049, "step": 321100 }, { "epoch": 25.19, "learning_rate": 0.0005, "loss": 1.5804, "step": 321200 }, { "epoch": 25.2, "learning_rate": 0.0005, "loss": 1.5686, "step": 321300 }, { "epoch": 25.2, "learning_rate": 0.0005, "loss": 1.5573, "step": 321400 }, { "epoch": 25.21, "learning_rate": 0.0005, "loss": 1.5394, "step": 321500 }, { "epoch": 25.22, "learning_rate": 0.0005, "loss": 1.5871, "step": 321600 }, { "epoch": 25.23, "learning_rate": 0.0005, "loss": 1.5794, "step": 321700 }, { "epoch": 25.24, "learning_rate": 0.0005, "loss": 1.5796, "step": 321800 }, { "epoch": 25.24, "learning_rate": 0.0005, "loss": 1.581, "step": 321900 }, { "epoch": 25.25, "learning_rate": 0.0005, "loss": 1.5899, "step": 322000 }, { "epoch": 25.26, "learning_rate": 0.0005, "loss": 1.5909, "step": 322100 }, { "epoch": 25.27, "learning_rate": 0.0005, "loss": 1.5914, "step": 322200 }, { "epoch": 25.27, "learning_rate": 0.0005, "loss": 1.564, "step": 322300 }, { "epoch": 25.28, "learning_rate": 0.0005, "loss": 1.595, "step": 322400 }, { "epoch": 25.29, "learning_rate": 0.0005, "loss": 1.5983, "step": 322500 }, { "epoch": 25.3, "learning_rate": 0.0005, "loss": 1.5696, "step": 322600 }, { "epoch": 25.31, "learning_rate": 0.0005, "loss": 1.5909, "step": 322700 }, { "epoch": 25.31, "learning_rate": 0.0005, "loss": 1.5952, "step": 322800 }, { "epoch": 25.32, "learning_rate": 0.0005, "loss": 1.5782, "step": 322900 }, { "epoch": 25.33, "learning_rate": 0.0005, "loss": 1.6011, "step": 323000 }, { "epoch": 25.34, "learning_rate": 0.0005, "loss": 1.5609, "step": 323100 }, { "epoch": 25.35, "learning_rate": 0.0005, "loss": 1.5914, "step": 323200 }, { "epoch": 25.35, "learning_rate": 0.0005, "loss": 1.5832, "step": 323300 }, { "epoch": 25.36, "learning_rate": 0.0005, "loss": 1.5871, "step": 323400 }, { "epoch": 25.37, "learning_rate": 0.0005, "loss": 1.5713, "step": 323500 }, { "epoch": 25.38, "learning_rate": 0.0005, "loss": 1.6133, "step": 323600 }, { "epoch": 25.38, "learning_rate": 0.0005, "loss": 1.6008, "step": 323700 }, { "epoch": 25.39, "learning_rate": 0.0005, "loss": 1.6153, "step": 323800 }, { "epoch": 25.4, "learning_rate": 0.0005, "loss": 1.6015, "step": 323900 }, { "epoch": 25.41, "learning_rate": 0.0005, "loss": 1.5809, "step": 324000 }, { "epoch": 25.42, "learning_rate": 0.0005, "loss": 1.5851, "step": 324100 }, { "epoch": 25.42, "learning_rate": 0.0005, "loss": 1.5882, "step": 324200 }, { "epoch": 25.43, "learning_rate": 0.0005, "loss": 1.5865, "step": 324300 }, { "epoch": 25.44, "learning_rate": 0.0005, "loss": 1.6289, "step": 324400 }, { "epoch": 25.45, "learning_rate": 0.0005, "loss": 1.5973, "step": 324500 }, { "epoch": 25.45, "learning_rate": 0.0005, "loss": 1.6141, "step": 324600 }, { "epoch": 25.46, "learning_rate": 0.0005, "loss": 1.5939, "step": 324700 }, { "epoch": 25.47, "learning_rate": 0.0005, "loss": 1.5722, "step": 324800 }, { "epoch": 25.48, "learning_rate": 0.0005, "loss": 1.587, "step": 324900 }, { "epoch": 25.49, "learning_rate": 0.0005, "loss": 1.5885, "step": 325000 }, { "epoch": 25.49, "learning_rate": 0.0005, "loss": 1.6158, "step": 325100 }, { "epoch": 25.5, "learning_rate": 0.0005, "loss": 1.5998, "step": 325200 }, { "epoch": 25.51, "learning_rate": 0.0005, "loss": 1.6076, "step": 325300 }, { "epoch": 25.52, "learning_rate": 0.0005, "loss": 1.6122, "step": 325400 }, { "epoch": 25.53, "learning_rate": 0.0005, "loss": 1.598, "step": 325500 }, { "epoch": 25.53, "learning_rate": 0.0005, "loss": 1.6054, "step": 325600 }, { "epoch": 25.54, "learning_rate": 0.0005, "loss": 1.6074, "step": 325700 }, { "epoch": 25.55, "learning_rate": 0.0005, "loss": 1.5875, "step": 325800 }, { "epoch": 25.56, "learning_rate": 0.0005, "loss": 1.5863, "step": 325900 }, { "epoch": 25.56, "learning_rate": 0.0005, "loss": 1.6041, "step": 326000 }, { "epoch": 25.57, "learning_rate": 0.0005, "loss": 1.6147, "step": 326100 }, { "epoch": 25.58, "learning_rate": 0.0005, "loss": 1.6279, "step": 326200 }, { "epoch": 25.59, "learning_rate": 0.0005, "loss": 1.6338, "step": 326300 }, { "epoch": 25.6, "learning_rate": 0.0005, "loss": 1.6134, "step": 326400 }, { "epoch": 25.6, "learning_rate": 0.0005, "loss": 1.5858, "step": 326500 }, { "epoch": 25.61, "learning_rate": 0.0005, "loss": 1.6179, "step": 326600 }, { "epoch": 25.62, "learning_rate": 0.0005, "loss": 1.6083, "step": 326700 }, { "epoch": 25.63, "learning_rate": 0.0005, "loss": 1.6161, "step": 326800 }, { "epoch": 25.64, "learning_rate": 0.0005, "loss": 1.6253, "step": 326900 }, { "epoch": 25.64, "learning_rate": 0.0005, "loss": 1.6429, "step": 327000 }, { "epoch": 25.65, "learning_rate": 0.0005, "loss": 1.6, "step": 327100 }, { "epoch": 25.66, "learning_rate": 0.0005, "loss": 1.5922, "step": 327200 }, { "epoch": 25.67, "learning_rate": 0.0005, "loss": 1.6395, "step": 327300 }, { "epoch": 25.67, "learning_rate": 0.0005, "loss": 1.61, "step": 327400 }, { "epoch": 25.68, "learning_rate": 0.0005, "loss": 1.6241, "step": 327500 }, { "epoch": 25.69, "learning_rate": 0.0005, "loss": 1.6458, "step": 327600 }, { "epoch": 25.7, "learning_rate": 0.0005, "loss": 1.597, "step": 327700 }, { "epoch": 25.71, "learning_rate": 0.0005, "loss": 1.6225, "step": 327800 }, { "epoch": 25.71, "learning_rate": 0.0005, "loss": 1.6317, "step": 327900 }, { "epoch": 25.72, "learning_rate": 0.0005, "loss": 1.6394, "step": 328000 }, { "epoch": 25.73, "learning_rate": 0.0005, "loss": 1.5723, "step": 328100 }, { "epoch": 25.74, "learning_rate": 0.0005, "loss": 1.6114, "step": 328200 }, { "epoch": 25.74, "learning_rate": 0.0005, "loss": 1.5943, "step": 328300 }, { "epoch": 25.75, "learning_rate": 0.0005, "loss": 1.6357, "step": 328400 }, { "epoch": 25.76, "learning_rate": 0.0005, "loss": 1.5943, "step": 328500 }, { "epoch": 25.77, "learning_rate": 0.0005, "loss": 1.6326, "step": 328600 }, { "epoch": 25.78, "learning_rate": 0.0005, "loss": 1.62, "step": 328700 }, { "epoch": 25.78, "learning_rate": 0.0005, "loss": 1.6162, "step": 328800 }, { "epoch": 25.79, "learning_rate": 0.0005, "loss": 1.6115, "step": 328900 }, { "epoch": 25.8, "learning_rate": 0.0005, "loss": 1.5891, "step": 329000 }, { "epoch": 25.81, "learning_rate": 0.0005, "loss": 1.6339, "step": 329100 }, { "epoch": 25.82, "learning_rate": 0.0005, "loss": 1.6204, "step": 329200 }, { "epoch": 25.82, "learning_rate": 0.0005, "loss": 1.6211, "step": 329300 }, { "epoch": 25.83, "learning_rate": 0.0005, "loss": 1.6182, "step": 329400 }, { "epoch": 25.84, "learning_rate": 0.0005, "loss": 1.5841, "step": 329500 }, { "epoch": 25.85, "learning_rate": 0.0005, "loss": 1.6014, "step": 329600 }, { "epoch": 25.85, "learning_rate": 0.0005, "loss": 1.6241, "step": 329700 }, { "epoch": 25.86, "learning_rate": 0.0005, "loss": 1.6218, "step": 329800 }, { "epoch": 25.87, "learning_rate": 0.0005, "loss": 1.6071, "step": 329900 }, { "epoch": 25.88, "learning_rate": 0.0005, "loss": 1.6151, "step": 330000 }, { "epoch": 25.88, "eval_gen_len": 18.749359936435066, "eval_loss": 2.0329372882843018, "eval_rouge1": 35.4835, "eval_rouge2": 14.3358, "eval_rougeL": 29.1764, "eval_rougeLsum": 29.1679, "eval_runtime": 354.0201, "eval_samples_per_second": 31.995, "eval_steps_per_second": 2.0, "step": 330000 }, { "epoch": 25.89, "learning_rate": 0.0005, "loss": 1.6346, "step": 330100 }, { "epoch": 25.89, "learning_rate": 0.0005, "loss": 1.5966, "step": 330200 }, { "epoch": 25.9, "learning_rate": 0.0005, "loss": 1.6239, "step": 330300 }, { "epoch": 25.91, "learning_rate": 0.0005, "loss": 1.6197, "step": 330400 }, { "epoch": 25.92, "learning_rate": 0.0005, "loss": 1.6218, "step": 330500 }, { "epoch": 25.93, "learning_rate": 0.0005, "loss": 1.6206, "step": 330600 }, { "epoch": 25.93, "learning_rate": 0.0005, "loss": 1.627, "step": 330700 }, { "epoch": 25.94, "learning_rate": 0.0005, "loss": 1.6337, "step": 330800 }, { "epoch": 25.95, "learning_rate": 0.0005, "loss": 1.6318, "step": 330900 }, { "epoch": 25.96, "learning_rate": 0.0005, "loss": 1.6188, "step": 331000 }, { "epoch": 25.96, "learning_rate": 0.0005, "loss": 1.6249, "step": 331100 }, { "epoch": 25.97, "learning_rate": 0.0005, "loss": 1.6343, "step": 331200 }, { "epoch": 25.98, "learning_rate": 0.0005, "loss": 1.629, "step": 331300 }, { "epoch": 25.99, "learning_rate": 0.0005, "loss": 1.611, "step": 331400 }, { "epoch": 26.0, "learning_rate": 0.0005, "loss": 1.6585, "step": 331500 }, { "epoch": 26.0, "learning_rate": 0.0005, "loss": 1.5997, "step": 331600 }, { "epoch": 26.01, "learning_rate": 0.0005, "loss": 1.5408, "step": 331700 }, { "epoch": 26.02, "learning_rate": 0.0005, "loss": 1.5374, "step": 331800 }, { "epoch": 26.03, "learning_rate": 0.0005, "loss": 1.5412, "step": 331900 }, { "epoch": 26.04, "learning_rate": 0.0005, "loss": 1.5719, "step": 332000 }, { "epoch": 26.04, "learning_rate": 0.0005, "loss": 1.5398, "step": 332100 }, { "epoch": 26.05, "learning_rate": 0.0005, "loss": 1.5591, "step": 332200 }, { "epoch": 26.06, "learning_rate": 0.0005, "loss": 1.5313, "step": 332300 }, { "epoch": 26.07, "learning_rate": 0.0005, "loss": 1.5362, "step": 332400 }, { "epoch": 26.07, "learning_rate": 0.0005, "loss": 1.5486, "step": 332500 }, { "epoch": 26.08, "learning_rate": 0.0005, "loss": 1.5609, "step": 332600 }, { "epoch": 26.09, "learning_rate": 0.0005, "loss": 1.5272, "step": 332700 }, { "epoch": 26.1, "learning_rate": 0.0005, "loss": 1.5446, "step": 332800 }, { "epoch": 26.11, "learning_rate": 0.0005, "loss": 1.5752, "step": 332900 }, { "epoch": 26.11, "learning_rate": 0.0005, "loss": 1.5672, "step": 333000 }, { "epoch": 26.12, "learning_rate": 0.0005, "loss": 1.5693, "step": 333100 }, { "epoch": 26.13, "learning_rate": 0.0005, "loss": 1.5406, "step": 333200 }, { "epoch": 26.14, "learning_rate": 0.0005, "loss": 1.5762, "step": 333300 }, { "epoch": 26.14, "learning_rate": 0.0005, "loss": 1.5586, "step": 333400 }, { "epoch": 26.15, "learning_rate": 0.0005, "loss": 1.5743, "step": 333500 }, { "epoch": 26.16, "learning_rate": 0.0005, "loss": 1.5519, "step": 333600 }, { "epoch": 26.17, "learning_rate": 0.0005, "loss": 1.5945, "step": 333700 }, { "epoch": 26.18, "learning_rate": 0.0005, "loss": 1.5771, "step": 333800 }, { "epoch": 26.18, "learning_rate": 0.0005, "loss": 1.5625, "step": 333900 }, { "epoch": 26.19, "learning_rate": 0.0005, "loss": 1.5387, "step": 334000 }, { "epoch": 26.2, "learning_rate": 0.0005, "loss": 1.5574, "step": 334100 }, { "epoch": 26.21, "learning_rate": 0.0005, "loss": 1.562, "step": 334200 }, { "epoch": 26.22, "learning_rate": 0.0005, "loss": 1.5775, "step": 334300 }, { "epoch": 26.22, "learning_rate": 0.0005, "loss": 1.5707, "step": 334400 }, { "epoch": 26.23, "learning_rate": 0.0005, "loss": 1.5796, "step": 334500 }, { "epoch": 26.24, "learning_rate": 0.0005, "loss": 1.5527, "step": 334600 }, { "epoch": 26.25, "learning_rate": 0.0005, "loss": 1.5768, "step": 334700 }, { "epoch": 26.25, "learning_rate": 0.0005, "loss": 1.5504, "step": 334800 }, { "epoch": 26.26, "learning_rate": 0.0005, "loss": 1.5557, "step": 334900 }, { "epoch": 26.27, "learning_rate": 0.0005, "loss": 1.5739, "step": 335000 }, { "epoch": 26.28, "learning_rate": 0.0005, "loss": 1.5477, "step": 335100 }, { "epoch": 26.29, "learning_rate": 0.0005, "loss": 1.566, "step": 335200 }, { "epoch": 26.29, "learning_rate": 0.0005, "loss": 1.5769, "step": 335300 }, { "epoch": 26.3, "learning_rate": 0.0005, "loss": 1.5689, "step": 335400 }, { "epoch": 26.31, "learning_rate": 0.0005, "loss": 1.5647, "step": 335500 }, { "epoch": 26.32, "learning_rate": 0.0005, "loss": 1.5846, "step": 335600 }, { "epoch": 26.33, "learning_rate": 0.0005, "loss": 1.5499, "step": 335700 }, { "epoch": 26.33, "learning_rate": 0.0005, "loss": 1.5697, "step": 335800 }, { "epoch": 26.34, "learning_rate": 0.0005, "loss": 1.6024, "step": 335900 }, { "epoch": 26.35, "learning_rate": 0.0005, "loss": 1.5963, "step": 336000 }, { "epoch": 26.36, "learning_rate": 0.0005, "loss": 1.5966, "step": 336100 }, { "epoch": 26.36, "learning_rate": 0.0005, "loss": 1.5425, "step": 336200 }, { "epoch": 26.37, "learning_rate": 0.0005, "loss": 1.6052, "step": 336300 }, { "epoch": 26.38, "learning_rate": 0.0005, "loss": 1.5511, "step": 336400 }, { "epoch": 26.39, "learning_rate": 0.0005, "loss": 1.57, "step": 336500 }, { "epoch": 26.4, "learning_rate": 0.0005, "loss": 1.5823, "step": 336600 }, { "epoch": 26.4, "learning_rate": 0.0005, "loss": 1.5938, "step": 336700 }, { "epoch": 26.41, "learning_rate": 0.0005, "loss": 1.5801, "step": 336800 }, { "epoch": 26.42, "learning_rate": 0.0005, "loss": 1.5825, "step": 336900 }, { "epoch": 26.43, "learning_rate": 0.0005, "loss": 1.5734, "step": 337000 }, { "epoch": 26.44, "learning_rate": 0.0005, "loss": 1.5825, "step": 337100 }, { "epoch": 26.44, "learning_rate": 0.0005, "loss": 1.5665, "step": 337200 }, { "epoch": 26.45, "learning_rate": 0.0005, "loss": 1.6113, "step": 337300 }, { "epoch": 26.46, "learning_rate": 0.0005, "loss": 1.6016, "step": 337400 }, { "epoch": 26.47, "learning_rate": 0.0005, "loss": 1.5879, "step": 337500 }, { "epoch": 26.47, "learning_rate": 0.0005, "loss": 1.5739, "step": 337600 }, { "epoch": 26.48, "learning_rate": 0.0005, "loss": 1.5764, "step": 337700 }, { "epoch": 26.49, "learning_rate": 0.0005, "loss": 1.6103, "step": 337800 }, { "epoch": 26.5, "learning_rate": 0.0005, "loss": 1.5855, "step": 337900 }, { "epoch": 26.51, "learning_rate": 0.0005, "loss": 1.5896, "step": 338000 }, { "epoch": 26.51, "learning_rate": 0.0005, "loss": 1.5746, "step": 338100 }, { "epoch": 26.52, "learning_rate": 0.0005, "loss": 1.579, "step": 338200 }, { "epoch": 26.53, "learning_rate": 0.0005, "loss": 1.614, "step": 338300 }, { "epoch": 26.54, "learning_rate": 0.0005, "loss": 1.6163, "step": 338400 }, { "epoch": 26.54, "learning_rate": 0.0005, "loss": 1.5803, "step": 338500 }, { "epoch": 26.55, "learning_rate": 0.0005, "loss": 1.6032, "step": 338600 }, { "epoch": 26.56, "learning_rate": 0.0005, "loss": 1.5849, "step": 338700 }, { "epoch": 26.57, "learning_rate": 0.0005, "loss": 1.6291, "step": 338800 }, { "epoch": 26.58, "learning_rate": 0.0005, "loss": 1.5912, "step": 338900 }, { "epoch": 26.58, "learning_rate": 0.0005, "loss": 1.6117, "step": 339000 }, { "epoch": 26.59, "learning_rate": 0.0005, "loss": 1.5926, "step": 339100 }, { "epoch": 26.6, "learning_rate": 0.0005, "loss": 1.5644, "step": 339200 }, { "epoch": 26.61, "learning_rate": 0.0005, "loss": 1.6154, "step": 339300 }, { "epoch": 26.62, "learning_rate": 0.0005, "loss": 1.5896, "step": 339400 }, { "epoch": 26.62, "learning_rate": 0.0005, "loss": 1.5688, "step": 339500 }, { "epoch": 26.63, "learning_rate": 0.0005, "loss": 1.6337, "step": 339600 }, { "epoch": 26.64, "learning_rate": 0.0005, "loss": 1.5815, "step": 339700 }, { "epoch": 26.65, "learning_rate": 0.0005, "loss": 1.6126, "step": 339800 }, { "epoch": 26.65, "learning_rate": 0.0005, "loss": 1.5892, "step": 339900 }, { "epoch": 26.66, "learning_rate": 0.0005, "loss": 1.6247, "step": 340000 }, { "epoch": 26.66, "eval_gen_len": 18.760395515140814, "eval_loss": 2.032465696334839, "eval_rouge1": 35.301, "eval_rouge2": 14.2909, "eval_rougeL": 29.0546, "eval_rougeLsum": 29.0366, "eval_runtime": 354.3826, "eval_samples_per_second": 31.963, "eval_steps_per_second": 1.998, "step": 340000 }, { "epoch": 26.67, "learning_rate": 0.0005, "loss": 1.6087, "step": 340100 }, { "epoch": 26.68, "learning_rate": 0.0005, "loss": 1.5795, "step": 340200 }, { "epoch": 26.69, "learning_rate": 0.0005, "loss": 1.5813, "step": 340300 }, { "epoch": 26.69, "learning_rate": 0.0005, "loss": 1.5897, "step": 340400 }, { "epoch": 26.7, "learning_rate": 0.0005, "loss": 1.6026, "step": 340500 }, { "epoch": 26.71, "learning_rate": 0.0005, "loss": 1.6048, "step": 340600 }, { "epoch": 26.72, "learning_rate": 0.0005, "loss": 1.6041, "step": 340700 }, { "epoch": 26.73, "learning_rate": 0.0005, "loss": 1.5818, "step": 340800 }, { "epoch": 26.73, "learning_rate": 0.0005, "loss": 1.5973, "step": 340900 }, { "epoch": 26.74, "learning_rate": 0.0005, "loss": 1.6304, "step": 341000 }, { "epoch": 26.75, "learning_rate": 0.0005, "loss": 1.5943, "step": 341100 }, { "epoch": 26.76, "learning_rate": 0.0005, "loss": 1.604, "step": 341200 }, { "epoch": 26.76, "learning_rate": 0.0005, "loss": 1.5891, "step": 341300 }, { "epoch": 26.77, "learning_rate": 0.0005, "loss": 1.6235, "step": 341400 }, { "epoch": 26.78, "learning_rate": 0.0005, "loss": 1.6246, "step": 341500 }, { "epoch": 26.79, "learning_rate": 0.0005, "loss": 1.5878, "step": 341600 }, { "epoch": 26.8, "learning_rate": 0.0005, "loss": 1.6338, "step": 341700 }, { "epoch": 26.8, "learning_rate": 0.0005, "loss": 1.5986, "step": 341800 }, { "epoch": 26.81, "learning_rate": 0.0005, "loss": 1.5755, "step": 341900 }, { "epoch": 26.82, "learning_rate": 0.0005, "loss": 1.5932, "step": 342000 }, { "epoch": 26.83, "learning_rate": 0.0005, "loss": 1.6145, "step": 342100 }, { "epoch": 26.84, "learning_rate": 0.0005, "loss": 1.5958, "step": 342200 }, { "epoch": 26.84, "learning_rate": 0.0005, "loss": 1.5869, "step": 342300 }, { "epoch": 26.85, "learning_rate": 0.0005, "loss": 1.6158, "step": 342400 }, { "epoch": 26.86, "learning_rate": 0.0005, "loss": 1.6216, "step": 342500 }, { "epoch": 26.87, "learning_rate": 0.0005, "loss": 1.6138, "step": 342600 }, { "epoch": 26.87, "learning_rate": 0.0005, "loss": 1.5909, "step": 342700 }, { "epoch": 26.88, "learning_rate": 0.0005, "loss": 1.6228, "step": 342800 }, { "epoch": 26.89, "learning_rate": 0.0005, "loss": 1.6369, "step": 342900 }, { "epoch": 26.9, "learning_rate": 0.0005, "loss": 1.5997, "step": 343000 }, { "epoch": 26.91, "learning_rate": 0.0005, "loss": 1.6028, "step": 343100 }, { "epoch": 26.91, "learning_rate": 0.0005, "loss": 1.5992, "step": 343200 }, { "epoch": 26.92, "learning_rate": 0.0005, "loss": 1.6149, "step": 343300 }, { "epoch": 26.93, "learning_rate": 0.0005, "loss": 1.6065, "step": 343400 }, { "epoch": 26.94, "learning_rate": 0.0005, "loss": 1.5902, "step": 343500 }, { "epoch": 26.94, "learning_rate": 0.0005, "loss": 1.633, "step": 343600 }, { "epoch": 26.95, "learning_rate": 0.0005, "loss": 1.5864, "step": 343700 }, { "epoch": 26.96, "learning_rate": 0.0005, "loss": 1.6256, "step": 343800 }, { "epoch": 26.97, "learning_rate": 0.0005, "loss": 1.6103, "step": 343900 }, { "epoch": 26.98, "learning_rate": 0.0005, "loss": 1.6103, "step": 344000 }, { "epoch": 26.98, "learning_rate": 0.0005, "loss": 1.6147, "step": 344100 }, { "epoch": 26.99, "learning_rate": 0.0005, "loss": 1.6163, "step": 344200 }, { "epoch": 27.0, "learning_rate": 0.0005, "loss": 1.6354, "step": 344300 }, { "epoch": 27.01, "learning_rate": 0.0005, "loss": 1.5376, "step": 344400 }, { "epoch": 27.02, "learning_rate": 0.0005, "loss": 1.5317, "step": 344500 }, { "epoch": 27.02, "learning_rate": 0.0005, "loss": 1.5318, "step": 344600 }, { "epoch": 27.03, "learning_rate": 0.0005, "loss": 1.5563, "step": 344700 }, { "epoch": 27.04, "learning_rate": 0.0005, "loss": 1.5286, "step": 344800 }, { "epoch": 27.05, "learning_rate": 0.0005, "loss": 1.519, "step": 344900 }, { "epoch": 27.05, "learning_rate": 0.0005, "loss": 1.5067, "step": 345000 }, { "epoch": 27.06, "learning_rate": 0.0005, "loss": 1.5485, "step": 345100 }, { "epoch": 27.07, "learning_rate": 0.0005, "loss": 1.5473, "step": 345200 }, { "epoch": 27.08, "learning_rate": 0.0005, "loss": 1.5467, "step": 345300 }, { "epoch": 27.09, "learning_rate": 0.0005, "loss": 1.5235, "step": 345400 }, { "epoch": 27.09, "learning_rate": 0.0005, "loss": 1.5264, "step": 345500 }, { "epoch": 27.1, "learning_rate": 0.0005, "loss": 1.5376, "step": 345600 }, { "epoch": 27.11, "learning_rate": 0.0005, "loss": 1.5383, "step": 345700 }, { "epoch": 27.12, "learning_rate": 0.0005, "loss": 1.5499, "step": 345800 }, { "epoch": 27.13, "learning_rate": 0.0005, "loss": 1.5365, "step": 345900 }, { "epoch": 27.13, "learning_rate": 0.0005, "loss": 1.5262, "step": 346000 }, { "epoch": 27.14, "learning_rate": 0.0005, "loss": 1.5533, "step": 346100 }, { "epoch": 27.15, "learning_rate": 0.0005, "loss": 1.5306, "step": 346200 }, { "epoch": 27.16, "learning_rate": 0.0005, "loss": 1.5767, "step": 346300 }, { "epoch": 27.16, "learning_rate": 0.0005, "loss": 1.5315, "step": 346400 }, { "epoch": 27.17, "learning_rate": 0.0005, "loss": 1.5352, "step": 346500 }, { "epoch": 27.18, "learning_rate": 0.0005, "loss": 1.534, "step": 346600 }, { "epoch": 27.19, "learning_rate": 0.0005, "loss": 1.5646, "step": 346700 }, { "epoch": 27.2, "learning_rate": 0.0005, "loss": 1.5394, "step": 346800 }, { "epoch": 27.2, "learning_rate": 0.0005, "loss": 1.5655, "step": 346900 }, { "epoch": 27.21, "learning_rate": 0.0005, "loss": 1.5497, "step": 347000 }, { "epoch": 27.22, "learning_rate": 0.0005, "loss": 1.56, "step": 347100 }, { "epoch": 27.23, "learning_rate": 0.0005, "loss": 1.5485, "step": 347200 }, { "epoch": 27.23, "learning_rate": 0.0005, "loss": 1.5658, "step": 347300 }, { "epoch": 27.24, "learning_rate": 0.0005, "loss": 1.5506, "step": 347400 }, { "epoch": 27.25, "learning_rate": 0.0005, "loss": 1.5561, "step": 347500 }, { "epoch": 27.26, "learning_rate": 0.0005, "loss": 1.5674, "step": 347600 }, { "epoch": 27.27, "learning_rate": 0.0005, "loss": 1.5509, "step": 347700 }, { "epoch": 27.27, "learning_rate": 0.0005, "loss": 1.5593, "step": 347800 }, { "epoch": 27.28, "learning_rate": 0.0005, "loss": 1.557, "step": 347900 }, { "epoch": 27.29, "learning_rate": 0.0005, "loss": 1.5817, "step": 348000 }, { "epoch": 27.3, "learning_rate": 0.0005, "loss": 1.5781, "step": 348100 }, { "epoch": 27.31, "learning_rate": 0.0005, "loss": 1.574, "step": 348200 }, { "epoch": 27.31, "learning_rate": 0.0005, "loss": 1.5613, "step": 348300 }, { "epoch": 27.32, "learning_rate": 0.0005, "loss": 1.5685, "step": 348400 }, { "epoch": 27.33, "learning_rate": 0.0005, "loss": 1.5287, "step": 348500 }, { "epoch": 27.34, "learning_rate": 0.0005, "loss": 1.5573, "step": 348600 }, { "epoch": 27.34, "learning_rate": 0.0005, "loss": 1.565, "step": 348700 }, { "epoch": 27.35, "learning_rate": 0.0005, "loss": 1.5758, "step": 348800 }, { "epoch": 27.36, "learning_rate": 0.0005, "loss": 1.5418, "step": 348900 }, { "epoch": 27.37, "learning_rate": 0.0005, "loss": 1.557, "step": 349000 }, { "epoch": 27.38, "learning_rate": 0.0005, "loss": 1.5715, "step": 349100 }, { "epoch": 27.38, "learning_rate": 0.0005, "loss": 1.5413, "step": 349200 }, { "epoch": 27.39, "learning_rate": 0.0005, "loss": 1.5829, "step": 349300 }, { "epoch": 27.4, "learning_rate": 0.0005, "loss": 1.6001, "step": 349400 }, { "epoch": 27.41, "learning_rate": 0.0005, "loss": 1.5802, "step": 349500 }, { "epoch": 27.42, "learning_rate": 0.0005, "loss": 1.5757, "step": 349600 }, { "epoch": 27.42, "learning_rate": 0.0005, "loss": 1.566, "step": 349700 }, { "epoch": 27.43, "learning_rate": 0.0005, "loss": 1.5486, "step": 349800 }, { "epoch": 27.44, "learning_rate": 0.0005, "loss": 1.5868, "step": 349900 }, { "epoch": 27.45, "learning_rate": 0.0005, "loss": 1.5729, "step": 350000 }, { "epoch": 27.45, "eval_gen_len": 18.799858744592566, "eval_loss": 2.0437819957733154, "eval_rouge1": 35.5653, "eval_rouge2": 14.397, "eval_rougeL": 29.2537, "eval_rougeLsum": 29.2463, "eval_runtime": 364.5616, "eval_samples_per_second": 31.07, "eval_steps_per_second": 1.942, "step": 350000 }, { "epoch": 27.45, "learning_rate": 0.0005, "loss": 1.567, "step": 350100 }, { "epoch": 27.46, "learning_rate": 0.0005, "loss": 1.5654, "step": 350200 }, { "epoch": 27.47, "learning_rate": 0.0005, "loss": 1.5725, "step": 350300 }, { "epoch": 27.48, "learning_rate": 0.0005, "loss": 1.5994, "step": 350400 }, { "epoch": 27.49, "learning_rate": 0.0005, "loss": 1.5819, "step": 350500 }, { "epoch": 27.49, "learning_rate": 0.0005, "loss": 1.5937, "step": 350600 }, { "epoch": 27.5, "learning_rate": 0.0005, "loss": 1.5933, "step": 350700 }, { "epoch": 27.51, "learning_rate": 0.0005, "loss": 1.5684, "step": 350800 }, { "epoch": 27.52, "learning_rate": 0.0005, "loss": 1.5684, "step": 350900 }, { "epoch": 27.53, "learning_rate": 0.0005, "loss": 1.5823, "step": 351000 }, { "epoch": 27.53, "learning_rate": 0.0005, "loss": 1.5831, "step": 351100 }, { "epoch": 27.54, "learning_rate": 0.0005, "loss": 1.5852, "step": 351200 }, { "epoch": 27.55, "learning_rate": 0.0005, "loss": 1.5816, "step": 351300 }, { "epoch": 27.56, "learning_rate": 0.0005, "loss": 1.5839, "step": 351400 }, { "epoch": 27.56, "learning_rate": 0.0005, "loss": 1.5611, "step": 351500 }, { "epoch": 27.57, "learning_rate": 0.0005, "loss": 1.5772, "step": 351600 }, { "epoch": 27.58, "learning_rate": 0.0005, "loss": 1.5868, "step": 351700 }, { "epoch": 27.59, "learning_rate": 0.0005, "loss": 1.5839, "step": 351800 }, { "epoch": 27.6, "learning_rate": 0.0005, "loss": 1.5971, "step": 351900 }, { "epoch": 27.6, "learning_rate": 0.0005, "loss": 1.6076, "step": 352000 }, { "epoch": 27.61, "learning_rate": 0.0005, "loss": 1.5971, "step": 352100 }, { "epoch": 27.62, "learning_rate": 0.0005, "loss": 1.5659, "step": 352200 }, { "epoch": 27.63, "learning_rate": 0.0005, "loss": 1.589, "step": 352300 }, { "epoch": 27.63, "learning_rate": 0.0005, "loss": 1.5862, "step": 352400 }, { "epoch": 27.64, "learning_rate": 0.0005, "loss": 1.5739, "step": 352500 }, { "epoch": 27.65, "learning_rate": 0.0005, "loss": 1.6004, "step": 352600 }, { "epoch": 27.66, "learning_rate": 0.0005, "loss": 1.558, "step": 352700 }, { "epoch": 27.67, "learning_rate": 0.0005, "loss": 1.5733, "step": 352800 }, { "epoch": 27.67, "learning_rate": 0.0005, "loss": 1.573, "step": 352900 }, { "epoch": 27.68, "learning_rate": 0.0005, "loss": 1.5761, "step": 353000 }, { "epoch": 27.69, "learning_rate": 0.0005, "loss": 1.5865, "step": 353100 }, { "epoch": 27.7, "learning_rate": 0.0005, "loss": 1.5739, "step": 353200 }, { "epoch": 27.71, "learning_rate": 0.0005, "loss": 1.5902, "step": 353300 }, { "epoch": 27.71, "learning_rate": 0.0005, "loss": 1.5813, "step": 353400 }, { "epoch": 27.72, "learning_rate": 0.0005, "loss": 1.5722, "step": 353500 }, { "epoch": 27.73, "learning_rate": 0.0005, "loss": 1.5662, "step": 353600 }, { "epoch": 27.74, "learning_rate": 0.0005, "loss": 1.5982, "step": 353700 }, { "epoch": 27.74, "learning_rate": 0.0005, "loss": 1.5977, "step": 353800 }, { "epoch": 27.75, "learning_rate": 0.0005, "loss": 1.5929, "step": 353900 }, { "epoch": 27.76, "learning_rate": 0.0005, "loss": 1.5885, "step": 354000 }, { "epoch": 27.77, "learning_rate": 0.0005, "loss": 1.6001, "step": 354100 }, { "epoch": 27.78, "learning_rate": 0.0005, "loss": 1.6063, "step": 354200 }, { "epoch": 27.78, "learning_rate": 0.0005, "loss": 1.5976, "step": 354300 }, { "epoch": 27.79, "learning_rate": 0.0005, "loss": 1.5982, "step": 354400 }, { "epoch": 27.8, "learning_rate": 0.0005, "loss": 1.603, "step": 354500 }, { "epoch": 27.81, "learning_rate": 0.0005, "loss": 1.5693, "step": 354600 }, { "epoch": 27.82, "learning_rate": 0.0005, "loss": 1.5969, "step": 354700 }, { "epoch": 27.82, "learning_rate": 0.0005, "loss": 1.5818, "step": 354800 }, { "epoch": 27.83, "learning_rate": 0.0005, "loss": 1.5917, "step": 354900 }, { "epoch": 27.84, "learning_rate": 0.0005, "loss": 1.6187, "step": 355000 }, { "epoch": 27.85, "learning_rate": 0.0005, "loss": 1.5905, "step": 355100 }, { "epoch": 27.85, "learning_rate": 0.0005, "loss": 1.5871, "step": 355200 }, { "epoch": 27.86, "learning_rate": 0.0005, "loss": 1.5929, "step": 355300 }, { "epoch": 27.87, "learning_rate": 0.0005, "loss": 1.6266, "step": 355400 }, { "epoch": 27.88, "learning_rate": 0.0005, "loss": 1.5843, "step": 355500 }, { "epoch": 27.89, "learning_rate": 0.0005, "loss": 1.5929, "step": 355600 }, { "epoch": 27.89, "learning_rate": 0.0005, "loss": 1.5763, "step": 355700 }, { "epoch": 27.9, "learning_rate": 0.0005, "loss": 1.5725, "step": 355800 }, { "epoch": 27.91, "learning_rate": 0.0005, "loss": 1.5876, "step": 355900 }, { "epoch": 27.92, "learning_rate": 0.0005, "loss": 1.6009, "step": 356000 }, { "epoch": 27.93, "learning_rate": 0.0005, "loss": 1.5801, "step": 356100 }, { "epoch": 27.93, "learning_rate": 0.0005, "loss": 1.6225, "step": 356200 }, { "epoch": 27.94, "learning_rate": 0.0005, "loss": 1.6086, "step": 356300 }, { "epoch": 27.95, "learning_rate": 0.0005, "loss": 1.5756, "step": 356400 }, { "epoch": 27.96, "learning_rate": 0.0005, "loss": 1.6061, "step": 356500 }, { "epoch": 27.96, "learning_rate": 0.0005, "loss": 1.6086, "step": 356600 }, { "epoch": 27.97, "learning_rate": 0.0005, "loss": 1.573, "step": 356700 }, { "epoch": 27.98, "learning_rate": 0.0005, "loss": 1.6276, "step": 356800 }, { "epoch": 27.99, "learning_rate": 0.0005, "loss": 1.5962, "step": 356900 }, { "epoch": 28.0, "learning_rate": 0.0005, "loss": 1.5872, "step": 357000 }, { "epoch": 28.0, "learning_rate": 0.0005, "loss": 1.5682, "step": 357100 }, { "epoch": 28.01, "learning_rate": 0.0005, "loss": 1.5024, "step": 357200 }, { "epoch": 28.02, "learning_rate": 0.0005, "loss": 1.5107, "step": 357300 }, { "epoch": 28.03, "learning_rate": 0.0005, "loss": 1.5163, "step": 357400 }, { "epoch": 28.03, "learning_rate": 0.0005, "loss": 1.526, "step": 357500 }, { "epoch": 28.04, "learning_rate": 0.0005, "loss": 1.5352, "step": 357600 }, { "epoch": 28.05, "learning_rate": 0.0005, "loss": 1.5269, "step": 357700 }, { "epoch": 28.06, "learning_rate": 0.0005, "loss": 1.5396, "step": 357800 }, { "epoch": 28.07, "learning_rate": 0.0005, "loss": 1.523, "step": 357900 }, { "epoch": 28.07, "learning_rate": 0.0005, "loss": 1.5314, "step": 358000 }, { "epoch": 28.08, "learning_rate": 0.0005, "loss": 1.525, "step": 358100 }, { "epoch": 28.09, "learning_rate": 0.0005, "loss": 1.5458, "step": 358200 }, { "epoch": 28.1, "learning_rate": 0.0005, "loss": 1.5383, "step": 358300 }, { "epoch": 28.11, "learning_rate": 0.0005, "loss": 1.5293, "step": 358400 }, { "epoch": 28.11, "learning_rate": 0.0005, "loss": 1.5114, "step": 358500 }, { "epoch": 28.12, "learning_rate": 0.0005, "loss": 1.5141, "step": 358600 }, { "epoch": 28.13, "learning_rate": 0.0005, "loss": 1.5213, "step": 358700 }, { "epoch": 28.14, "learning_rate": 0.0005, "loss": 1.5199, "step": 358800 }, { "epoch": 28.14, "learning_rate": 0.0005, "loss": 1.542, "step": 358900 }, { "epoch": 28.15, "learning_rate": 0.0005, "loss": 1.5349, "step": 359000 }, { "epoch": 28.16, "learning_rate": 0.0005, "loss": 1.494, "step": 359100 }, { "epoch": 28.17, "learning_rate": 0.0005, "loss": 1.5497, "step": 359200 }, { "epoch": 28.18, "learning_rate": 0.0005, "loss": 1.5573, "step": 359300 }, { "epoch": 28.18, "learning_rate": 0.0005, "loss": 1.533, "step": 359400 }, { "epoch": 28.19, "learning_rate": 0.0005, "loss": 1.5426, "step": 359500 }, { "epoch": 28.2, "learning_rate": 0.0005, "loss": 1.5489, "step": 359600 }, { "epoch": 28.21, "learning_rate": 0.0005, "loss": 1.5595, "step": 359700 }, { "epoch": 28.22, "learning_rate": 0.0005, "loss": 1.5472, "step": 359800 }, { "epoch": 28.22, "learning_rate": 0.0005, "loss": 1.5507, "step": 359900 }, { "epoch": 28.23, "learning_rate": 0.0005, "loss": 1.558, "step": 360000 }, { "epoch": 28.23, "eval_gen_len": 18.821665048115122, "eval_loss": 2.0342040061950684, "eval_rouge1": 35.4848, "eval_rouge2": 14.3471, "eval_rougeL": 29.1774, "eval_rougeLsum": 29.1699, "eval_runtime": 355.9312, "eval_samples_per_second": 31.824, "eval_steps_per_second": 1.989, "step": 360000 }, { "epoch": 28.24, "learning_rate": 0.0005, "loss": 1.5385, "step": 360100 }, { "epoch": 28.25, "learning_rate": 0.0005, "loss": 1.5381, "step": 360200 }, { "epoch": 28.25, "learning_rate": 0.0005, "loss": 1.5552, "step": 360300 }, { "epoch": 28.26, "learning_rate": 0.0005, "loss": 1.5452, "step": 360400 }, { "epoch": 28.27, "learning_rate": 0.0005, "loss": 1.5726, "step": 360500 }, { "epoch": 28.28, "learning_rate": 0.0005, "loss": 1.5543, "step": 360600 }, { "epoch": 28.29, "learning_rate": 0.0005, "loss": 1.5459, "step": 360700 }, { "epoch": 28.29, "learning_rate": 0.0005, "loss": 1.5494, "step": 360800 }, { "epoch": 28.3, "learning_rate": 0.0005, "loss": 1.5852, "step": 360900 }, { "epoch": 28.31, "learning_rate": 0.0005, "loss": 1.5807, "step": 361000 }, { "epoch": 28.32, "learning_rate": 0.0005, "loss": 1.5449, "step": 361100 }, { "epoch": 28.32, "learning_rate": 0.0005, "loss": 1.5605, "step": 361200 }, { "epoch": 28.33, "learning_rate": 0.0005, "loss": 1.5405, "step": 361300 }, { "epoch": 28.34, "learning_rate": 0.0005, "loss": 1.5573, "step": 361400 }, { "epoch": 28.35, "learning_rate": 0.0005, "loss": 1.5437, "step": 361500 }, { "epoch": 28.36, "learning_rate": 0.0005, "loss": 1.5475, "step": 361600 }, { "epoch": 28.36, "learning_rate": 0.0005, "loss": 1.5543, "step": 361700 }, { "epoch": 28.37, "learning_rate": 0.0005, "loss": 1.5673, "step": 361800 }, { "epoch": 28.38, "learning_rate": 0.0005, "loss": 1.5429, "step": 361900 }, { "epoch": 28.39, "learning_rate": 0.0005, "loss": 1.5304, "step": 362000 }, { "epoch": 28.4, "learning_rate": 0.0005, "loss": 1.5438, "step": 362100 }, { "epoch": 28.4, "learning_rate": 0.0005, "loss": 1.5253, "step": 362200 }, { "epoch": 28.41, "learning_rate": 0.0005, "loss": 1.5658, "step": 362300 }, { "epoch": 28.42, "learning_rate": 0.0005, "loss": 1.5737, "step": 362400 }, { "epoch": 28.43, "learning_rate": 0.0005, "loss": 1.5934, "step": 362500 }, { "epoch": 28.43, "learning_rate": 0.0005, "loss": 1.5434, "step": 362600 }, { "epoch": 28.44, "learning_rate": 0.0005, "loss": 1.5473, "step": 362700 }, { "epoch": 28.45, "learning_rate": 0.0005, "loss": 1.5754, "step": 362800 }, { "epoch": 28.46, "learning_rate": 0.0005, "loss": 1.5531, "step": 362900 }, { "epoch": 28.47, "learning_rate": 0.0005, "loss": 1.5496, "step": 363000 }, { "epoch": 28.47, "learning_rate": 0.0005, "loss": 1.5591, "step": 363100 }, { "epoch": 28.48, "learning_rate": 0.0005, "loss": 1.5777, "step": 363200 }, { "epoch": 28.49, "learning_rate": 0.0005, "loss": 1.5633, "step": 363300 }, { "epoch": 28.5, "learning_rate": 0.0005, "loss": 1.569, "step": 363400 }, { "epoch": 28.51, "learning_rate": 0.0005, "loss": 1.5578, "step": 363500 }, { "epoch": 28.51, "learning_rate": 0.0005, "loss": 1.5764, "step": 363600 }, { "epoch": 28.52, "learning_rate": 0.0005, "loss": 1.5284, "step": 363700 }, { "epoch": 28.53, "learning_rate": 0.0005, "loss": 1.5656, "step": 363800 }, { "epoch": 28.54, "learning_rate": 0.0005, "loss": 1.5843, "step": 363900 }, { "epoch": 28.54, "learning_rate": 0.0005, "loss": 1.5774, "step": 364000 }, { "epoch": 28.55, "learning_rate": 0.0005, "loss": 1.5399, "step": 364100 }, { "epoch": 28.56, "learning_rate": 0.0005, "loss": 1.5777, "step": 364200 }, { "epoch": 28.57, "learning_rate": 0.0005, "loss": 1.5734, "step": 364300 }, { "epoch": 28.58, "learning_rate": 0.0005, "loss": 1.5538, "step": 364400 }, { "epoch": 28.58, "learning_rate": 0.0005, "loss": 1.551, "step": 364500 }, { "epoch": 28.59, "learning_rate": 0.0005, "loss": 1.5904, "step": 364600 }, { "epoch": 28.6, "learning_rate": 0.0005, "loss": 1.5802, "step": 364700 }, { "epoch": 28.61, "learning_rate": 0.0005, "loss": 1.5516, "step": 364800 }, { "epoch": 28.62, "learning_rate": 0.0005, "loss": 1.5416, "step": 364900 }, { "epoch": 28.62, "learning_rate": 0.0005, "loss": 1.5757, "step": 365000 }, { "epoch": 28.63, "learning_rate": 0.0005, "loss": 1.5526, "step": 365100 }, { "epoch": 28.64, "learning_rate": 0.0005, "loss": 1.5526, "step": 365200 }, { "epoch": 28.65, "learning_rate": 0.0005, "loss": 1.5748, "step": 365300 }, { "epoch": 28.65, "learning_rate": 0.0005, "loss": 1.5667, "step": 365400 }, { "epoch": 28.66, "learning_rate": 0.0005, "loss": 1.5389, "step": 365500 }, { "epoch": 28.67, "learning_rate": 0.0005, "loss": 1.5805, "step": 365600 }, { "epoch": 28.68, "learning_rate": 0.0005, "loss": 1.5537, "step": 365700 }, { "epoch": 28.69, "learning_rate": 0.0005, "loss": 1.5464, "step": 365800 }, { "epoch": 28.69, "learning_rate": 0.0005, "loss": 1.5525, "step": 365900 }, { "epoch": 28.7, "learning_rate": 0.0005, "loss": 1.5933, "step": 366000 }, { "epoch": 28.71, "learning_rate": 0.0005, "loss": 1.5686, "step": 366100 }, { "epoch": 28.72, "learning_rate": 0.0005, "loss": 1.5916, "step": 366200 }, { "epoch": 28.72, "learning_rate": 0.0005, "loss": 1.5913, "step": 366300 }, { "epoch": 28.73, "learning_rate": 0.0005, "loss": 1.5873, "step": 366400 }, { "epoch": 28.74, "learning_rate": 0.0005, "loss": 1.5629, "step": 366500 }, { "epoch": 28.75, "learning_rate": 0.0005, "loss": 1.5586, "step": 366600 }, { "epoch": 28.76, "learning_rate": 0.0005, "loss": 1.5848, "step": 366700 }, { "epoch": 28.76, "learning_rate": 0.0005, "loss": 1.6012, "step": 366800 }, { "epoch": 28.77, "learning_rate": 0.0005, "loss": 1.5689, "step": 366900 }, { "epoch": 28.78, "learning_rate": 0.0005, "loss": 1.6003, "step": 367000 }, { "epoch": 28.79, "learning_rate": 0.0005, "loss": 1.6016, "step": 367100 }, { "epoch": 28.8, "learning_rate": 0.0005, "loss": 1.6006, "step": 367200 }, { "epoch": 28.8, "learning_rate": 0.0005, "loss": 1.5989, "step": 367300 }, { "epoch": 28.81, "learning_rate": 0.0005, "loss": 1.5512, "step": 367400 }, { "epoch": 28.82, "learning_rate": 0.0005, "loss": 1.5561, "step": 367500 }, { "epoch": 28.83, "learning_rate": 0.0005, "loss": 1.5544, "step": 367600 }, { "epoch": 28.83, "learning_rate": 0.0005, "loss": 1.5827, "step": 367700 }, { "epoch": 28.84, "learning_rate": 0.0005, "loss": 1.5807, "step": 367800 }, { "epoch": 28.85, "learning_rate": 0.0005, "loss": 1.5896, "step": 367900 }, { "epoch": 28.86, "learning_rate": 0.0005, "loss": 1.5632, "step": 368000 }, { "epoch": 28.87, "learning_rate": 0.0005, "loss": 1.6038, "step": 368100 }, { "epoch": 28.87, "learning_rate": 0.0005, "loss": 1.572, "step": 368200 }, { "epoch": 28.88, "learning_rate": 0.0005, "loss": 1.5859, "step": 368300 }, { "epoch": 28.89, "learning_rate": 0.0005, "loss": 1.5879, "step": 368400 }, { "epoch": 28.9, "learning_rate": 0.0005, "loss": 1.5807, "step": 368500 }, { "epoch": 28.91, "learning_rate": 0.0005, "loss": 1.5997, "step": 368600 }, { "epoch": 28.91, "learning_rate": 0.0005, "loss": 1.5818, "step": 368700 }, { "epoch": 28.92, "learning_rate": 0.0005, "loss": 1.5872, "step": 368800 }, { "epoch": 28.93, "learning_rate": 0.0005, "loss": 1.5478, "step": 368900 }, { "epoch": 28.94, "learning_rate": 0.0005, "loss": 1.5551, "step": 369000 }, { "epoch": 28.94, "learning_rate": 0.0005, "loss": 1.565, "step": 369100 }, { "epoch": 28.95, "learning_rate": 0.0005, "loss": 1.5758, "step": 369200 }, { "epoch": 28.96, "learning_rate": 0.0005, "loss": 1.5759, "step": 369300 }, { "epoch": 28.97, "learning_rate": 0.0005, "loss": 1.5761, "step": 369400 }, { "epoch": 28.98, "learning_rate": 0.0005, "loss": 1.6011, "step": 369500 }, { "epoch": 28.98, "learning_rate": 0.0005, "loss": 1.5991, "step": 369600 }, { "epoch": 28.99, "learning_rate": 0.0005, "loss": 1.5906, "step": 369700 }, { "epoch": 29.0, "learning_rate": 0.0005, "loss": 1.5794, "step": 369800 }, { "epoch": 29.01, "learning_rate": 0.0005, "loss": 1.5154, "step": 369900 }, { "epoch": 29.02, "learning_rate": 0.0005, "loss": 1.5134, "step": 370000 }, { "epoch": 29.02, "eval_gen_len": 18.791383420146552, "eval_loss": 2.0581727027893066, "eval_rouge1": 35.3426, "eval_rouge2": 14.425, "eval_rougeL": 29.1042, "eval_rougeLsum": 29.0882, "eval_runtime": 354.5816, "eval_samples_per_second": 31.945, "eval_steps_per_second": 1.997, "step": 370000 }, { "epoch": 29.02, "learning_rate": 0.0005, "loss": 1.5084, "step": 370100 }, { "epoch": 29.03, "learning_rate": 0.0005, "loss": 1.5087, "step": 370200 }, { "epoch": 29.04, "learning_rate": 0.0005, "loss": 1.5158, "step": 370300 }, { "epoch": 29.05, "learning_rate": 0.0005, "loss": 1.5025, "step": 370400 }, { "epoch": 29.05, "learning_rate": 0.0005, "loss": 1.5105, "step": 370500 }, { "epoch": 29.06, "learning_rate": 0.0005, "loss": 1.4915, "step": 370600 }, { "epoch": 29.07, "learning_rate": 0.0005, "loss": 1.5036, "step": 370700 }, { "epoch": 29.08, "learning_rate": 0.0005, "loss": 1.5132, "step": 370800 }, { "epoch": 29.09, "learning_rate": 0.0005, "loss": 1.5009, "step": 370900 }, { "epoch": 29.09, "learning_rate": 0.0005, "loss": 1.5353, "step": 371000 }, { "epoch": 29.1, "learning_rate": 0.0005, "loss": 1.4914, "step": 371100 }, { "epoch": 29.11, "learning_rate": 0.0005, "loss": 1.5104, "step": 371200 }, { "epoch": 29.12, "learning_rate": 0.0005, "loss": 1.5413, "step": 371300 }, { "epoch": 29.12, "learning_rate": 0.0005, "loss": 1.5274, "step": 371400 }, { "epoch": 29.13, "learning_rate": 0.0005, "loss": 1.5159, "step": 371500 }, { "epoch": 29.14, "learning_rate": 0.0005, "loss": 1.54, "step": 371600 }, { "epoch": 29.15, "learning_rate": 0.0005, "loss": 1.5592, "step": 371700 }, { "epoch": 29.16, "learning_rate": 0.0005, "loss": 1.5061, "step": 371800 }, { "epoch": 29.16, "learning_rate": 0.0005, "loss": 1.5144, "step": 371900 }, { "epoch": 29.17, "learning_rate": 0.0005, "loss": 1.5369, "step": 372000 }, { "epoch": 29.18, "learning_rate": 0.0005, "loss": 1.5371, "step": 372100 }, { "epoch": 29.19, "learning_rate": 0.0005, "loss": 1.5418, "step": 372200 }, { "epoch": 29.2, "learning_rate": 0.0005, "loss": 1.5116, "step": 372300 }, { "epoch": 29.2, "learning_rate": 0.0005, "loss": 1.507, "step": 372400 }, { "epoch": 29.21, "learning_rate": 0.0005, "loss": 1.5431, "step": 372500 }, { "epoch": 29.22, "learning_rate": 0.0005, "loss": 1.5175, "step": 372600 }, { "epoch": 29.23, "learning_rate": 0.0005, "loss": 1.5071, "step": 372700 }, { "epoch": 29.23, "learning_rate": 0.0005, "loss": 1.5442, "step": 372800 }, { "epoch": 29.24, "learning_rate": 0.0005, "loss": 1.5561, "step": 372900 }, { "epoch": 29.25, "learning_rate": 0.0005, "loss": 1.4922, "step": 373000 }, { "epoch": 29.26, "learning_rate": 0.0005, "loss": 1.5415, "step": 373100 }, { "epoch": 29.27, "learning_rate": 0.0005, "loss": 1.543, "step": 373200 }, { "epoch": 29.27, "learning_rate": 0.0005, "loss": 1.5519, "step": 373300 }, { "epoch": 29.28, "learning_rate": 0.0005, "loss": 1.5147, "step": 373400 }, { "epoch": 29.29, "learning_rate": 0.0005, "loss": 1.5495, "step": 373500 }, { "epoch": 29.3, "learning_rate": 0.0005, "loss": 1.5376, "step": 373600 }, { "epoch": 29.31, "learning_rate": 0.0005, "loss": 1.5431, "step": 373700 }, { "epoch": 29.31, "learning_rate": 0.0005, "loss": 1.5454, "step": 373800 }, { "epoch": 29.32, "learning_rate": 0.0005, "loss": 1.5311, "step": 373900 }, { "epoch": 29.33, "learning_rate": 0.0005, "loss": 1.531, "step": 374000 }, { "epoch": 29.34, "learning_rate": 0.0005, "loss": 1.548, "step": 374100 }, { "epoch": 29.34, "learning_rate": 0.0005, "loss": 1.5405, "step": 374200 }, { "epoch": 29.35, "learning_rate": 0.0005, "loss": 1.5334, "step": 374300 }, { "epoch": 29.36, "learning_rate": 0.0005, "loss": 1.5425, "step": 374400 }, { "epoch": 29.37, "learning_rate": 0.0005, "loss": 1.5558, "step": 374500 }, { "epoch": 29.38, "learning_rate": 0.0005, "loss": 1.5261, "step": 374600 }, { "epoch": 29.38, "learning_rate": 0.0005, "loss": 1.5543, "step": 374700 }, { "epoch": 29.39, "learning_rate": 0.0005, "loss": 1.5462, "step": 374800 }, { "epoch": 29.4, "learning_rate": 0.0005, "loss": 1.5245, "step": 374900 }, { "epoch": 29.41, "learning_rate": 0.0005, "loss": 1.5498, "step": 375000 }, { "epoch": 29.41, "learning_rate": 0.0005, "loss": 1.5805, "step": 375100 }, { "epoch": 29.42, "learning_rate": 0.0005, "loss": 1.5346, "step": 375200 }, { "epoch": 29.43, "learning_rate": 0.0005, "loss": 1.5383, "step": 375300 }, { "epoch": 29.44, "learning_rate": 0.0005, "loss": 1.5239, "step": 375400 }, { "epoch": 29.45, "learning_rate": 0.0005, "loss": 1.588, "step": 375500 }, { "epoch": 29.45, "learning_rate": 0.0005, "loss": 1.5122, "step": 375600 }, { "epoch": 29.46, "learning_rate": 0.0005, "loss": 1.5565, "step": 375700 }, { "epoch": 29.47, "learning_rate": 0.0005, "loss": 1.5167, "step": 375800 }, { "epoch": 29.48, "learning_rate": 0.0005, "loss": 1.5715, "step": 375900 }, { "epoch": 29.49, "learning_rate": 0.0005, "loss": 1.5449, "step": 376000 }, { "epoch": 29.49, "learning_rate": 0.0005, "loss": 1.5567, "step": 376100 }, { "epoch": 29.5, "learning_rate": 0.0005, "loss": 1.5546, "step": 376200 }, { "epoch": 29.51, "learning_rate": 0.0005, "loss": 1.5428, "step": 376300 }, { "epoch": 29.52, "learning_rate": 0.0005, "loss": 1.5627, "step": 376400 }, { "epoch": 29.52, "learning_rate": 0.0005, "loss": 1.5605, "step": 376500 }, { "epoch": 29.53, "learning_rate": 0.0005, "loss": 1.5493, "step": 376600 }, { "epoch": 29.54, "learning_rate": 0.0005, "loss": 1.5653, "step": 376700 }, { "epoch": 29.55, "learning_rate": 0.0005, "loss": 1.5443, "step": 376800 }, { "epoch": 29.56, "learning_rate": 0.0005, "loss": 1.5499, "step": 376900 }, { "epoch": 29.56, "learning_rate": 0.0005, "loss": 1.5444, "step": 377000 }, { "epoch": 29.57, "learning_rate": 0.0005, "loss": 1.5792, "step": 377100 }, { "epoch": 29.58, "learning_rate": 0.0005, "loss": 1.5403, "step": 377200 }, { "epoch": 29.59, "learning_rate": 0.0005, "loss": 1.5827, "step": 377300 }, { "epoch": 29.6, "learning_rate": 0.0005, "loss": 1.5682, "step": 377400 }, { "epoch": 29.6, "learning_rate": 0.0005, "loss": 1.5276, "step": 377500 }, { "epoch": 29.61, "learning_rate": 0.0005, "loss": 1.5516, "step": 377600 }, { "epoch": 29.62, "learning_rate": 0.0005, "loss": 1.5465, "step": 377700 }, { "epoch": 29.63, "learning_rate": 0.0005, "loss": 1.577, "step": 377800 }, { "epoch": 29.63, "learning_rate": 0.0005, "loss": 1.575, "step": 377900 }, { "epoch": 29.64, "learning_rate": 0.0005, "loss": 1.5309, "step": 378000 }, { "epoch": 29.65, "learning_rate": 0.0005, "loss": 1.5714, "step": 378100 }, { "epoch": 29.66, "learning_rate": 0.0005, "loss": 1.5524, "step": 378200 }, { "epoch": 29.67, "learning_rate": 0.0005, "loss": 1.5659, "step": 378300 }, { "epoch": 29.67, "learning_rate": 0.0005, "loss": 1.5679, "step": 378400 }, { "epoch": 29.68, "learning_rate": 0.0005, "loss": 1.5378, "step": 378500 }, { "epoch": 29.69, "learning_rate": 0.0005, "loss": 1.5523, "step": 378600 }, { "epoch": 29.7, "learning_rate": 0.0005, "loss": 1.5675, "step": 378700 }, { "epoch": 29.71, "learning_rate": 0.0005, "loss": 1.578, "step": 378800 }, { "epoch": 29.71, "learning_rate": 0.0005, "loss": 1.5386, "step": 378900 }, { "epoch": 29.72, "learning_rate": 0.0005, "loss": 1.5588, "step": 379000 }, { "epoch": 29.73, "learning_rate": 0.0005, "loss": 1.5356, "step": 379100 }, { "epoch": 29.74, "learning_rate": 0.0005, "loss": 1.5889, "step": 379200 }, { "epoch": 29.74, "learning_rate": 0.0005, "loss": 1.5759, "step": 379300 }, { "epoch": 29.75, "learning_rate": 0.0005, "loss": 1.556, "step": 379400 }, { "epoch": 29.76, "learning_rate": 0.0005, "loss": 1.5655, "step": 379500 }, { "epoch": 29.77, "learning_rate": 0.0005, "loss": 1.589, "step": 379600 }, { "epoch": 29.78, "learning_rate": 0.0005, "loss": 1.5837, "step": 379700 }, { "epoch": 29.78, "learning_rate": 0.0005, "loss": 1.5566, "step": 379800 }, { "epoch": 29.79, "learning_rate": 0.0005, "loss": 1.5692, "step": 379900 }, { "epoch": 29.8, "learning_rate": 0.0005, "loss": 1.5468, "step": 380000 }, { "epoch": 29.8, "eval_gen_len": 18.78440893440452, "eval_loss": 2.0447006225585938, "eval_rouge1": 35.5171, "eval_rouge2": 14.5247, "eval_rougeL": 29.2402, "eval_rougeLsum": 29.2352, "eval_runtime": 354.064, "eval_samples_per_second": 31.991, "eval_steps_per_second": 2.0, "step": 380000 }, { "epoch": 29.81, "learning_rate": 0.0005, "loss": 1.5943, "step": 380100 }, { "epoch": 29.81, "learning_rate": 0.0005, "loss": 1.5811, "step": 380200 }, { "epoch": 29.82, "learning_rate": 0.0005, "loss": 1.5551, "step": 380300 }, { "epoch": 29.83, "learning_rate": 0.0005, "loss": 1.5615, "step": 380400 }, { "epoch": 29.84, "learning_rate": 0.0005, "loss": 1.5603, "step": 380500 }, { "epoch": 29.85, "learning_rate": 0.0005, "loss": 1.5292, "step": 380600 }, { "epoch": 29.85, "learning_rate": 0.0005, "loss": 1.5513, "step": 380700 }, { "epoch": 29.86, "learning_rate": 0.0005, "loss": 1.568, "step": 380800 }, { "epoch": 29.87, "learning_rate": 0.0005, "loss": 1.5795, "step": 380900 }, { "epoch": 29.88, "learning_rate": 0.0005, "loss": 1.5701, "step": 381000 }, { "epoch": 29.89, "learning_rate": 0.0005, "loss": 1.5592, "step": 381100 }, { "epoch": 29.89, "learning_rate": 0.0005, "loss": 1.5821, "step": 381200 }, { "epoch": 29.9, "learning_rate": 0.0005, "loss": 1.5689, "step": 381300 }, { "epoch": 29.91, "learning_rate": 0.0005, "loss": 1.5638, "step": 381400 }, { "epoch": 29.92, "learning_rate": 0.0005, "loss": 1.5541, "step": 381500 }, { "epoch": 29.92, "learning_rate": 0.0005, "loss": 1.5629, "step": 381600 }, { "epoch": 29.93, "learning_rate": 0.0005, "loss": 1.5603, "step": 381700 }, { "epoch": 29.94, "learning_rate": 0.0005, "loss": 1.5926, "step": 381800 }, { "epoch": 29.95, "learning_rate": 0.0005, "loss": 1.5746, "step": 381900 }, { "epoch": 29.96, "learning_rate": 0.0005, "loss": 1.5786, "step": 382000 }, { "epoch": 29.96, "learning_rate": 0.0005, "loss": 1.5756, "step": 382100 }, { "epoch": 29.97, "learning_rate": 0.0005, "loss": 1.5677, "step": 382200 }, { "epoch": 29.98, "learning_rate": 0.0005, "loss": 1.5615, "step": 382300 }, { "epoch": 29.99, "learning_rate": 0.0005, "loss": 1.5771, "step": 382400 }, { "epoch": 30.0, "learning_rate": 0.0005, "loss": 1.5749, "step": 382500 }, { "epoch": 30.0, "learning_rate": 0.0005, "loss": 1.5499, "step": 382600 }, { "epoch": 30.01, "learning_rate": 0.0005, "loss": 1.4999, "step": 382700 }, { "epoch": 30.02, "learning_rate": 0.0005, "loss": 1.5096, "step": 382800 }, { "epoch": 30.03, "learning_rate": 0.0005, "loss": 1.4812, "step": 382900 }, { "epoch": 30.03, "learning_rate": 0.0005, "loss": 1.4866, "step": 383000 }, { "epoch": 30.04, "learning_rate": 0.0005, "loss": 1.4996, "step": 383100 }, { "epoch": 30.05, "learning_rate": 0.0005, "loss": 1.5003, "step": 383200 }, { "epoch": 30.06, "learning_rate": 0.0005, "loss": 1.4927, "step": 383300 }, { "epoch": 30.07, "learning_rate": 0.0005, "loss": 1.4954, "step": 383400 }, { "epoch": 30.07, "learning_rate": 0.0005, "loss": 1.5292, "step": 383500 }, { "epoch": 30.08, "learning_rate": 0.0005, "loss": 1.5011, "step": 383600 }, { "epoch": 30.09, "learning_rate": 0.0005, "loss": 1.5287, "step": 383700 }, { "epoch": 30.1, "learning_rate": 0.0005, "loss": 1.485, "step": 383800 }, { "epoch": 30.11, "learning_rate": 0.0005, "loss": 1.5102, "step": 383900 }, { "epoch": 30.11, "learning_rate": 0.0005, "loss": 1.5175, "step": 384000 }, { "epoch": 30.12, "learning_rate": 0.0005, "loss": 1.5078, "step": 384100 }, { "epoch": 30.13, "learning_rate": 0.0005, "loss": 1.5168, "step": 384200 }, { "epoch": 30.14, "learning_rate": 0.0005, "loss": 1.5254, "step": 384300 }, { "epoch": 30.14, "learning_rate": 0.0005, "loss": 1.5083, "step": 384400 }, { "epoch": 30.15, "learning_rate": 0.0005, "loss": 1.5071, "step": 384500 }, { "epoch": 30.16, "learning_rate": 0.0005, "loss": 1.5103, "step": 384600 }, { "epoch": 30.17, "learning_rate": 0.0005, "loss": 1.4913, "step": 384700 }, { "epoch": 30.18, "learning_rate": 0.0005, "loss": 1.4917, "step": 384800 }, { "epoch": 30.18, "learning_rate": 0.0005, "loss": 1.5229, "step": 384900 }, { "epoch": 30.19, "learning_rate": 0.0005, "loss": 1.4823, "step": 385000 }, { "epoch": 30.2, "learning_rate": 0.0005, "loss": 1.5141, "step": 385100 }, { "epoch": 30.21, "learning_rate": 0.0005, "loss": 1.5119, "step": 385200 }, { "epoch": 30.21, "learning_rate": 0.0005, "loss": 1.5448, "step": 385300 }, { "epoch": 30.22, "learning_rate": 0.0005, "loss": 1.5142, "step": 385400 }, { "epoch": 30.23, "learning_rate": 0.0005, "loss": 1.5171, "step": 385500 }, { "epoch": 30.24, "learning_rate": 0.0005, "loss": 1.5354, "step": 385600 }, { "epoch": 30.25, "learning_rate": 0.0005, "loss": 1.4944, "step": 385700 }, { "epoch": 30.25, "learning_rate": 0.0005, "loss": 1.523, "step": 385800 }, { "epoch": 30.26, "learning_rate": 0.0005, "loss": 1.5474, "step": 385900 }, { "epoch": 30.27, "learning_rate": 0.0005, "loss": 1.5116, "step": 386000 }, { "epoch": 30.28, "learning_rate": 0.0005, "loss": 1.5224, "step": 386100 }, { "epoch": 30.29, "learning_rate": 0.0005, "loss": 1.5225, "step": 386200 }, { "epoch": 30.29, "learning_rate": 0.0005, "loss": 1.5191, "step": 386300 }, { "epoch": 30.3, "learning_rate": 0.0005, "loss": 1.5531, "step": 386400 }, { "epoch": 30.31, "learning_rate": 0.0005, "loss": 1.5301, "step": 386500 }, { "epoch": 30.32, "learning_rate": 0.0005, "loss": 1.5313, "step": 386600 }, { "epoch": 30.32, "learning_rate": 0.0005, "loss": 1.4778, "step": 386700 }, { "epoch": 30.33, "learning_rate": 0.0005, "loss": 1.5302, "step": 386800 }, { "epoch": 30.34, "learning_rate": 0.0005, "loss": 1.5164, "step": 386900 }, { "epoch": 30.35, "learning_rate": 0.0005, "loss": 1.5088, "step": 387000 }, { "epoch": 30.36, "learning_rate": 0.0005, "loss": 1.5215, "step": 387100 }, { "epoch": 30.36, "learning_rate": 0.0005, "loss": 1.5594, "step": 387200 }, { "epoch": 30.37, "learning_rate": 0.0005, "loss": 1.534, "step": 387300 }, { "epoch": 30.38, "learning_rate": 0.0005, "loss": 1.5421, "step": 387400 }, { "epoch": 30.39, "learning_rate": 0.0005, "loss": 1.5384, "step": 387500 }, { "epoch": 30.4, "learning_rate": 0.0005, "loss": 1.5201, "step": 387600 }, { "epoch": 30.4, "learning_rate": 0.0005, "loss": 1.5385, "step": 387700 }, { "epoch": 30.41, "learning_rate": 0.0005, "loss": 1.5387, "step": 387800 }, { "epoch": 30.42, "learning_rate": 0.0005, "loss": 1.5556, "step": 387900 }, { "epoch": 30.43, "learning_rate": 0.0005, "loss": 1.5433, "step": 388000 }, { "epoch": 30.43, "learning_rate": 0.0005, "loss": 1.5271, "step": 388100 }, { "epoch": 30.44, "learning_rate": 0.0005, "loss": 1.5368, "step": 388200 }, { "epoch": 30.45, "learning_rate": 0.0005, "loss": 1.544, "step": 388300 }, { "epoch": 30.46, "learning_rate": 0.0005, "loss": 1.5288, "step": 388400 }, { "epoch": 30.47, "learning_rate": 0.0005, "loss": 1.5404, "step": 388500 }, { "epoch": 30.47, "learning_rate": 0.0005, "loss": 1.5382, "step": 388600 }, { "epoch": 30.48, "learning_rate": 0.0005, "loss": 1.5495, "step": 388700 }, { "epoch": 30.49, "learning_rate": 0.0005, "loss": 1.58, "step": 388800 }, { "epoch": 30.5, "learning_rate": 0.0005, "loss": 1.5482, "step": 388900 }, { "epoch": 30.51, "learning_rate": 0.0005, "loss": 1.4916, "step": 389000 }, { "epoch": 30.51, "learning_rate": 0.0005, "loss": 1.5232, "step": 389100 }, { "epoch": 30.52, "learning_rate": 0.0005, "loss": 1.5288, "step": 389200 }, { "epoch": 30.53, "learning_rate": 0.0005, "loss": 1.5001, "step": 389300 }, { "epoch": 30.54, "learning_rate": 0.0005, "loss": 1.5628, "step": 389400 }, { "epoch": 30.54, "learning_rate": 0.0005, "loss": 1.5341, "step": 389500 }, { "epoch": 30.55, "learning_rate": 0.0005, "loss": 1.522, "step": 389600 }, { "epoch": 30.56, "learning_rate": 0.0005, "loss": 1.5442, "step": 389700 }, { "epoch": 30.57, "learning_rate": 0.0005, "loss": 1.549, "step": 389800 }, { "epoch": 30.58, "learning_rate": 0.0005, "loss": 1.5188, "step": 389900 }, { "epoch": 30.58, "learning_rate": 0.0005, "loss": 1.5073, "step": 390000 }, { "epoch": 30.58, "eval_gen_len": 18.813278008298756, "eval_loss": 2.047095775604248, "eval_rouge1": 35.2735, "eval_rouge2": 14.329, "eval_rougeL": 29.0615, "eval_rougeLsum": 29.0566, "eval_runtime": 354.7807, "eval_samples_per_second": 31.927, "eval_steps_per_second": 1.996, "step": 390000 }, { "epoch": 30.59, "learning_rate": 0.0005, "loss": 1.5498, "step": 390100 }, { "epoch": 30.6, "learning_rate": 0.0005, "loss": 1.5313, "step": 390200 }, { "epoch": 30.61, "learning_rate": 0.0005, "loss": 1.5331, "step": 390300 }, { "epoch": 30.61, "learning_rate": 0.0005, "loss": 1.5503, "step": 390400 }, { "epoch": 30.62, "learning_rate": 0.0005, "loss": 1.5392, "step": 390500 }, { "epoch": 30.63, "learning_rate": 0.0005, "loss": 1.5448, "step": 390600 }, { "epoch": 30.64, "learning_rate": 0.0005, "loss": 1.5421, "step": 390700 }, { "epoch": 30.65, "learning_rate": 0.0005, "loss": 1.5446, "step": 390800 }, { "epoch": 30.65, "learning_rate": 0.0005, "loss": 1.5466, "step": 390900 }, { "epoch": 30.66, "learning_rate": 0.0005, "loss": 1.557, "step": 391000 }, { "epoch": 30.67, "learning_rate": 0.0005, "loss": 1.5456, "step": 391100 }, { "epoch": 30.68, "learning_rate": 0.0005, "loss": 1.5441, "step": 391200 }, { "epoch": 30.69, "learning_rate": 0.0005, "loss": 1.5624, "step": 391300 }, { "epoch": 30.69, "learning_rate": 0.0005, "loss": 1.5449, "step": 391400 }, { "epoch": 30.7, "learning_rate": 0.0005, "loss": 1.5352, "step": 391500 }, { "epoch": 30.71, "learning_rate": 0.0005, "loss": 1.5159, "step": 391600 }, { "epoch": 30.72, "learning_rate": 0.0005, "loss": 1.5566, "step": 391700 }, { "epoch": 30.72, "learning_rate": 0.0005, "loss": 1.528, "step": 391800 }, { "epoch": 30.73, "learning_rate": 0.0005, "loss": 1.5379, "step": 391900 }, { "epoch": 30.74, "learning_rate": 0.0005, "loss": 1.5754, "step": 392000 }, { "epoch": 30.75, "learning_rate": 0.0005, "loss": 1.5689, "step": 392100 }, { "epoch": 30.76, "learning_rate": 0.0005, "loss": 1.5678, "step": 392200 }, { "epoch": 30.76, "learning_rate": 0.0005, "loss": 1.5665, "step": 392300 }, { "epoch": 30.77, "learning_rate": 0.0005, "loss": 1.5618, "step": 392400 }, { "epoch": 30.78, "learning_rate": 0.0005, "loss": 1.5661, "step": 392500 }, { "epoch": 30.79, "learning_rate": 0.0005, "loss": 1.5586, "step": 392600 }, { "epoch": 30.8, "learning_rate": 0.0005, "loss": 1.556, "step": 392700 }, { "epoch": 30.8, "learning_rate": 0.0005, "loss": 1.5416, "step": 392800 }, { "epoch": 30.81, "learning_rate": 0.0005, "loss": 1.5362, "step": 392900 }, { "epoch": 30.82, "learning_rate": 0.0005, "loss": 1.554, "step": 393000 }, { "epoch": 30.83, "learning_rate": 0.0005, "loss": 1.563, "step": 393100 }, { "epoch": 30.83, "learning_rate": 0.0005, "loss": 1.5251, "step": 393200 }, { "epoch": 30.84, "learning_rate": 0.0005, "loss": 1.5873, "step": 393300 }, { "epoch": 30.85, "learning_rate": 0.0005, "loss": 1.5558, "step": 393400 }, { "epoch": 30.86, "learning_rate": 0.0005, "loss": 1.5633, "step": 393500 }, { "epoch": 30.87, "learning_rate": 0.0005, "loss": 1.5633, "step": 393600 }, { "epoch": 30.87, "learning_rate": 0.0005, "loss": 1.5309, "step": 393700 }, { "epoch": 30.88, "learning_rate": 0.0005, "loss": 1.5891, "step": 393800 }, { "epoch": 30.89, "learning_rate": 0.0005, "loss": 1.5933, "step": 393900 }, { "epoch": 30.9, "learning_rate": 0.0005, "loss": 1.5798, "step": 394000 }, { "epoch": 30.9, "learning_rate": 0.0005, "loss": 1.5414, "step": 394100 }, { "epoch": 30.91, "learning_rate": 0.0005, "loss": 1.5658, "step": 394200 }, { "epoch": 30.92, "learning_rate": 0.0005, "loss": 1.5304, "step": 394300 }, { "epoch": 30.93, "learning_rate": 0.0005, "loss": 1.5448, "step": 394400 }, { "epoch": 30.94, "learning_rate": 0.0005, "loss": 1.5352, "step": 394500 }, { "epoch": 30.94, "learning_rate": 0.0005, "loss": 1.5415, "step": 394600 }, { "epoch": 30.95, "learning_rate": 0.0005, "loss": 1.5869, "step": 394700 }, { "epoch": 30.96, "learning_rate": 0.0005, "loss": 1.5442, "step": 394800 }, { "epoch": 30.97, "learning_rate": 0.0005, "loss": 1.5851, "step": 394900 }, { "epoch": 30.98, "learning_rate": 0.0005, "loss": 1.5618, "step": 395000 }, { "epoch": 30.98, "learning_rate": 0.0005, "loss": 1.5343, "step": 395100 }, { "epoch": 30.99, "learning_rate": 0.0005, "loss": 1.5599, "step": 395200 }, { "epoch": 31.0, "learning_rate": 0.0005, "loss": 1.5824, "step": 395300 }, { "epoch": 31.01, "learning_rate": 0.0005, "loss": 1.4802, "step": 395400 }, { "epoch": 31.01, "learning_rate": 0.0005, "loss": 1.5035, "step": 395500 }, { "epoch": 31.02, "learning_rate": 0.0005, "loss": 1.4814, "step": 395600 }, { "epoch": 31.03, "learning_rate": 0.0005, "loss": 1.4673, "step": 395700 }, { "epoch": 31.04, "learning_rate": 0.0005, "loss": 1.4805, "step": 395800 }, { "epoch": 31.05, "learning_rate": 0.0005, "loss": 1.4997, "step": 395900 }, { "epoch": 31.05, "learning_rate": 0.0005, "loss": 1.4932, "step": 396000 }, { "epoch": 31.06, "learning_rate": 0.0005, "loss": 1.4683, "step": 396100 }, { "epoch": 31.07, "learning_rate": 0.0005, "loss": 1.4829, "step": 396200 }, { "epoch": 31.08, "learning_rate": 0.0005, "loss": 1.4926, "step": 396300 }, { "epoch": 31.09, "learning_rate": 0.0005, "loss": 1.4833, "step": 396400 }, { "epoch": 31.09, "learning_rate": 0.0005, "loss": 1.5129, "step": 396500 }, { "epoch": 31.1, "learning_rate": 0.0005, "loss": 1.4949, "step": 396600 }, { "epoch": 31.11, "learning_rate": 0.0005, "loss": 1.5111, "step": 396700 }, { "epoch": 31.12, "learning_rate": 0.0005, "loss": 1.4714, "step": 396800 }, { "epoch": 31.12, "learning_rate": 0.0005, "loss": 1.4798, "step": 396900 }, { "epoch": 31.13, "learning_rate": 0.0005, "loss": 1.4855, "step": 397000 }, { "epoch": 31.14, "learning_rate": 0.0005, "loss": 1.4933, "step": 397100 }, { "epoch": 31.15, "learning_rate": 0.0005, "loss": 1.4793, "step": 397200 }, { "epoch": 31.16, "learning_rate": 0.0005, "loss": 1.4858, "step": 397300 }, { "epoch": 31.16, "learning_rate": 0.0005, "loss": 1.5112, "step": 397400 }, { "epoch": 31.17, "learning_rate": 0.0005, "loss": 1.5084, "step": 397500 }, { "epoch": 31.18, "learning_rate": 0.0005, "loss": 1.4982, "step": 397600 }, { "epoch": 31.19, "learning_rate": 0.0005, "loss": 1.4986, "step": 397700 }, { "epoch": 31.2, "learning_rate": 0.0005, "loss": 1.5113, "step": 397800 }, { "epoch": 31.2, "learning_rate": 0.0005, "loss": 1.5151, "step": 397900 }, { "epoch": 31.21, "learning_rate": 0.0005, "loss": 1.4965, "step": 398000 }, { "epoch": 31.22, "learning_rate": 0.0005, "loss": 1.4944, "step": 398100 }, { "epoch": 31.23, "learning_rate": 0.0005, "loss": 1.4989, "step": 398200 }, { "epoch": 31.23, "learning_rate": 0.0005, "loss": 1.5089, "step": 398300 }, { "epoch": 31.24, "learning_rate": 0.0005, "loss": 1.51, "step": 398400 }, { "epoch": 31.25, "learning_rate": 0.0005, "loss": 1.5227, "step": 398500 }, { "epoch": 31.26, "learning_rate": 0.0005, "loss": 1.5136, "step": 398600 }, { "epoch": 31.27, "learning_rate": 0.0005, "loss": 1.4949, "step": 398700 }, { "epoch": 31.27, "learning_rate": 0.0005, "loss": 1.485, "step": 398800 }, { "epoch": 31.28, "learning_rate": 0.0005, "loss": 1.5228, "step": 398900 }, { "epoch": 31.29, "learning_rate": 0.0005, "loss": 1.5132, "step": 399000 }, { "epoch": 31.3, "learning_rate": 0.0005, "loss": 1.5193, "step": 399100 }, { "epoch": 31.3, "learning_rate": 0.0005, "loss": 1.5199, "step": 399200 }, { "epoch": 31.31, "learning_rate": 0.0005, "loss": 1.5519, "step": 399300 }, { "epoch": 31.32, "learning_rate": 0.0005, "loss": 1.5422, "step": 399400 }, { "epoch": 31.33, "learning_rate": 0.0005, "loss": 1.5042, "step": 399500 }, { "epoch": 31.34, "learning_rate": 0.0005, "loss": 1.5193, "step": 399600 }, { "epoch": 31.34, "learning_rate": 0.0005, "loss": 1.5402, "step": 399700 }, { "epoch": 31.35, "learning_rate": 0.0005, "loss": 1.5366, "step": 399800 }, { "epoch": 31.36, "learning_rate": 0.0005, "loss": 1.5035, "step": 399900 }, { "epoch": 31.37, "learning_rate": 0.0005, "loss": 1.5176, "step": 400000 }, { "epoch": 31.37, "eval_gen_len": 18.77999470292222, "eval_loss": 2.055631637573242, "eval_rouge1": 35.2942, "eval_rouge2": 14.3432, "eval_rougeL": 29.0501, "eval_rougeLsum": 29.0411, "eval_runtime": 356.7733, "eval_samples_per_second": 31.748, "eval_steps_per_second": 1.984, "step": 400000 }, { "epoch": 31.38, "learning_rate": 0.0005, "loss": 1.5454, "step": 400100 }, { "epoch": 31.38, "learning_rate": 0.0005, "loss": 1.5285, "step": 400200 }, { "epoch": 31.39, "learning_rate": 0.0005, "loss": 1.5118, "step": 400300 }, { "epoch": 31.4, "learning_rate": 0.0005, "loss": 1.5388, "step": 400400 }, { "epoch": 31.41, "learning_rate": 0.0005, "loss": 1.516, "step": 400500 }, { "epoch": 31.41, "learning_rate": 0.0005, "loss": 1.4913, "step": 400600 }, { "epoch": 31.42, "learning_rate": 0.0005, "loss": 1.5452, "step": 400700 }, { "epoch": 31.43, "learning_rate": 0.0005, "loss": 1.5255, "step": 400800 }, { "epoch": 31.44, "learning_rate": 0.0005, "loss": 1.535, "step": 400900 }, { "epoch": 31.45, "learning_rate": 0.0005, "loss": 1.5296, "step": 401000 }, { "epoch": 31.45, "learning_rate": 0.0005, "loss": 1.5089, "step": 401100 }, { "epoch": 31.46, "learning_rate": 0.0005, "loss": 1.5081, "step": 401200 }, { "epoch": 31.47, "learning_rate": 0.0005, "loss": 1.5248, "step": 401300 }, { "epoch": 31.48, "learning_rate": 0.0005, "loss": 1.5038, "step": 401400 }, { "epoch": 31.49, "learning_rate": 0.0005, "loss": 1.5106, "step": 401500 }, { "epoch": 31.49, "learning_rate": 0.0005, "loss": 1.5322, "step": 401600 }, { "epoch": 31.5, "learning_rate": 0.0005, "loss": 1.523, "step": 401700 }, { "epoch": 31.51, "learning_rate": 0.0005, "loss": 1.5027, "step": 401800 }, { "epoch": 31.52, "learning_rate": 0.0005, "loss": 1.5182, "step": 401900 }, { "epoch": 31.52, "learning_rate": 0.0005, "loss": 1.5425, "step": 402000 }, { "epoch": 31.53, "learning_rate": 0.0005, "loss": 1.5213, "step": 402100 }, { "epoch": 31.54, "learning_rate": 0.0005, "loss": 1.494, "step": 402200 }, { "epoch": 31.55, "learning_rate": 0.0005, "loss": 1.5569, "step": 402300 }, { "epoch": 31.56, "learning_rate": 0.0005, "loss": 1.5362, "step": 402400 }, { "epoch": 31.56, "learning_rate": 0.0005, "loss": 1.5118, "step": 402500 }, { "epoch": 31.57, "learning_rate": 0.0005, "loss": 1.5371, "step": 402600 }, { "epoch": 31.58, "learning_rate": 0.0005, "loss": 1.5366, "step": 402700 }, { "epoch": 31.59, "learning_rate": 0.0005, "loss": 1.5466, "step": 402800 }, { "epoch": 31.6, "learning_rate": 0.0005, "loss": 1.5523, "step": 402900 }, { "epoch": 31.6, "learning_rate": 0.0005, "loss": 1.5207, "step": 403000 }, { "epoch": 31.61, "learning_rate": 0.0005, "loss": 1.5294, "step": 403100 }, { "epoch": 31.62, "learning_rate": 0.0005, "loss": 1.5199, "step": 403200 }, { "epoch": 31.63, "learning_rate": 0.0005, "loss": 1.5206, "step": 403300 }, { "epoch": 31.63, "learning_rate": 0.0005, "loss": 1.5636, "step": 403400 }, { "epoch": 31.64, "learning_rate": 0.0005, "loss": 1.5254, "step": 403500 }, { "epoch": 31.65, "learning_rate": 0.0005, "loss": 1.5464, "step": 403600 }, { "epoch": 31.66, "learning_rate": 0.0005, "loss": 1.5271, "step": 403700 }, { "epoch": 31.67, "learning_rate": 0.0005, "loss": 1.5663, "step": 403800 }, { "epoch": 31.67, "learning_rate": 0.0005, "loss": 1.5366, "step": 403900 }, { "epoch": 31.68, "learning_rate": 0.0005, "loss": 1.522, "step": 404000 }, { "epoch": 31.69, "learning_rate": 0.0005, "loss": 1.5086, "step": 404100 }, { "epoch": 31.7, "learning_rate": 0.0005, "loss": 1.5337, "step": 404200 }, { "epoch": 31.7, "learning_rate": 0.0005, "loss": 1.5372, "step": 404300 }, { "epoch": 31.71, "learning_rate": 0.0005, "loss": 1.5346, "step": 404400 }, { "epoch": 31.72, "learning_rate": 0.0005, "loss": 1.5595, "step": 404500 }, { "epoch": 31.73, "learning_rate": 0.0005, "loss": 1.5283, "step": 404600 }, { "epoch": 31.74, "learning_rate": 0.0005, "loss": 1.5531, "step": 404700 }, { "epoch": 31.74, "learning_rate": 0.0005, "loss": 1.5486, "step": 404800 }, { "epoch": 31.75, "learning_rate": 0.0005, "loss": 1.5326, "step": 404900 }, { "epoch": 31.76, "learning_rate": 0.0005, "loss": 1.5473, "step": 405000 }, { "epoch": 31.77, "learning_rate": 0.0005, "loss": 1.5356, "step": 405100 }, { "epoch": 31.78, "learning_rate": 0.0005, "loss": 1.5393, "step": 405200 }, { "epoch": 31.78, "learning_rate": 0.0005, "loss": 1.5449, "step": 405300 }, { "epoch": 31.79, "learning_rate": 0.0005, "loss": 1.5594, "step": 405400 }, { "epoch": 31.8, "learning_rate": 0.0005, "loss": 1.5404, "step": 405500 }, { "epoch": 31.81, "learning_rate": 0.0005, "loss": 1.5441, "step": 405600 }, { "epoch": 31.81, "learning_rate": 0.0005, "loss": 1.5549, "step": 405700 }, { "epoch": 31.82, "learning_rate": 0.0005, "loss": 1.5179, "step": 405800 }, { "epoch": 31.83, "learning_rate": 0.0005, "loss": 1.5099, "step": 405900 }, { "epoch": 31.84, "learning_rate": 0.0005, "loss": 1.5443, "step": 406000 }, { "epoch": 31.85, "learning_rate": 0.0005, "loss": 1.5671, "step": 406100 }, { "epoch": 31.85, "learning_rate": 0.0005, "loss": 1.537, "step": 406200 }, { "epoch": 31.86, "learning_rate": 0.0005, "loss": 1.5413, "step": 406300 }, { "epoch": 31.87, "learning_rate": 0.0005, "loss": 1.548, "step": 406400 }, { "epoch": 31.88, "learning_rate": 0.0005, "loss": 1.5445, "step": 406500 }, { "epoch": 31.89, "learning_rate": 0.0005, "loss": 1.5405, "step": 406600 }, { "epoch": 31.89, "learning_rate": 0.0005, "loss": 1.5748, "step": 406700 }, { "epoch": 31.9, "learning_rate": 0.0005, "loss": 1.5358, "step": 406800 }, { "epoch": 31.91, "learning_rate": 0.0005, "loss": 1.5438, "step": 406900 }, { "epoch": 31.92, "learning_rate": 0.0005, "loss": 1.5229, "step": 407000 }, { "epoch": 31.92, "learning_rate": 0.0005, "loss": 1.546, "step": 407100 }, { "epoch": 31.93, "learning_rate": 0.0005, "loss": 1.5315, "step": 407200 }, { "epoch": 31.94, "learning_rate": 0.0005, "loss": 1.5509, "step": 407300 }, { "epoch": 31.95, "learning_rate": 0.0005, "loss": 1.5527, "step": 407400 }, { "epoch": 31.96, "learning_rate": 0.0005, "loss": 1.5307, "step": 407500 }, { "epoch": 31.96, "learning_rate": 0.0005, "loss": 1.5234, "step": 407600 }, { "epoch": 31.97, "learning_rate": 0.0005, "loss": 1.5626, "step": 407700 }, { "epoch": 31.98, "learning_rate": 0.0005, "loss": 1.5508, "step": 407800 }, { "epoch": 31.99, "learning_rate": 0.0005, "loss": 1.5658, "step": 407900 }, { "epoch": 31.99, "learning_rate": 0.0005, "loss": 1.541, "step": 408000 }, { "epoch": 32.0, "learning_rate": 0.0005, "loss": 1.5188, "step": 408100 }, { "epoch": 32.01, "learning_rate": 0.0005, "loss": 1.4728, "step": 408200 }, { "epoch": 32.02, "learning_rate": 0.0005, "loss": 1.4386, "step": 408300 }, { "epoch": 32.03, "learning_rate": 0.0005, "loss": 1.4807, "step": 408400 }, { "epoch": 32.03, "learning_rate": 0.0005, "loss": 1.4852, "step": 408500 }, { "epoch": 32.04, "learning_rate": 0.0005, "loss": 1.4643, "step": 408600 }, { "epoch": 32.05, "learning_rate": 0.0005, "loss": 1.4934, "step": 408700 }, { "epoch": 32.06, "learning_rate": 0.0005, "loss": 1.4816, "step": 408800 }, { "epoch": 32.07, "learning_rate": 0.0005, "loss": 1.4521, "step": 408900 }, { "epoch": 32.07, "learning_rate": 0.0005, "loss": 1.4754, "step": 409000 }, { "epoch": 32.08, "learning_rate": 0.0005, "loss": 1.4847, "step": 409100 }, { "epoch": 32.09, "learning_rate": 0.0005, "loss": 1.5096, "step": 409200 }, { "epoch": 32.1, "learning_rate": 0.0005, "loss": 1.4838, "step": 409300 }, { "epoch": 32.1, "learning_rate": 0.0005, "loss": 1.4722, "step": 409400 }, { "epoch": 32.11, "learning_rate": 0.0005, "loss": 1.4993, "step": 409500 }, { "epoch": 32.12, "learning_rate": 0.0005, "loss": 1.4843, "step": 409600 }, { "epoch": 32.13, "learning_rate": 0.0005, "loss": 1.4763, "step": 409700 }, { "epoch": 32.14, "learning_rate": 0.0005, "loss": 1.4765, "step": 409800 }, { "epoch": 32.14, "learning_rate": 0.0005, "loss": 1.5052, "step": 409900 }, { "epoch": 32.15, "learning_rate": 0.0005, "loss": 1.4973, "step": 410000 }, { "epoch": 32.15, "eval_gen_len": 18.798799329036814, "eval_loss": 2.0617787837982178, "eval_rouge1": 35.6624, "eval_rouge2": 14.4996, "eval_rougeL": 29.2625, "eval_rougeLsum": 29.2571, "eval_runtime": 364.7552, "eval_samples_per_second": 31.054, "eval_steps_per_second": 1.941, "step": 410000 }, { "epoch": 32.16, "learning_rate": 0.0005, "loss": 1.4893, "step": 410100 }, { "epoch": 32.17, "learning_rate": 0.0005, "loss": 1.51, "step": 410200 }, { "epoch": 32.18, "learning_rate": 0.0005, "loss": 1.5004, "step": 410300 }, { "epoch": 32.18, "learning_rate": 0.0005, "loss": 1.5074, "step": 410400 }, { "epoch": 32.19, "learning_rate": 0.0005, "loss": 1.4952, "step": 410500 }, { "epoch": 32.2, "learning_rate": 0.0005, "loss": 1.4865, "step": 410600 }, { "epoch": 32.21, "learning_rate": 0.0005, "loss": 1.5009, "step": 410700 }, { "epoch": 32.21, "learning_rate": 0.0005, "loss": 1.4766, "step": 410800 }, { "epoch": 32.22, "learning_rate": 0.0005, "loss": 1.4949, "step": 410900 }, { "epoch": 32.23, "learning_rate": 0.0005, "loss": 1.4874, "step": 411000 }, { "epoch": 32.24, "learning_rate": 0.0005, "loss": 1.4945, "step": 411100 }, { "epoch": 32.25, "learning_rate": 0.0005, "loss": 1.5005, "step": 411200 }, { "epoch": 32.25, "learning_rate": 0.0005, "loss": 1.4743, "step": 411300 }, { "epoch": 32.26, "learning_rate": 0.0005, "loss": 1.4751, "step": 411400 }, { "epoch": 32.27, "learning_rate": 0.0005, "loss": 1.5003, "step": 411500 }, { "epoch": 32.28, "learning_rate": 0.0005, "loss": 1.473, "step": 411600 }, { "epoch": 32.29, "learning_rate": 0.0005, "loss": 1.477, "step": 411700 }, { "epoch": 32.29, "learning_rate": 0.0005, "loss": 1.518, "step": 411800 }, { "epoch": 32.3, "learning_rate": 0.0005, "loss": 1.4561, "step": 411900 }, { "epoch": 32.31, "learning_rate": 0.0005, "loss": 1.4887, "step": 412000 }, { "epoch": 32.32, "learning_rate": 0.0005, "loss": 1.5099, "step": 412100 }, { "epoch": 32.32, "learning_rate": 0.0005, "loss": 1.5178, "step": 412200 }, { "epoch": 32.33, "learning_rate": 0.0005, "loss": 1.5064, "step": 412300 }, { "epoch": 32.34, "learning_rate": 0.0005, "loss": 1.4879, "step": 412400 }, { "epoch": 32.35, "learning_rate": 0.0005, "loss": 1.5057, "step": 412500 }, { "epoch": 32.36, "learning_rate": 0.0005, "loss": 1.5124, "step": 412600 }, { "epoch": 32.36, "learning_rate": 0.0005, "loss": 1.5316, "step": 412700 }, { "epoch": 32.37, "learning_rate": 0.0005, "loss": 1.4788, "step": 412800 }, { "epoch": 32.38, "learning_rate": 0.0005, "loss": 1.5265, "step": 412900 }, { "epoch": 32.39, "learning_rate": 0.0005, "loss": 1.5078, "step": 413000 }, { "epoch": 32.39, "learning_rate": 0.0005, "loss": 1.5052, "step": 413100 }, { "epoch": 32.4, "learning_rate": 0.0005, "loss": 1.4874, "step": 413200 }, { "epoch": 32.41, "learning_rate": 0.0005, "loss": 1.5137, "step": 413300 }, { "epoch": 32.42, "learning_rate": 0.0005, "loss": 1.5306, "step": 413400 }, { "epoch": 32.43, "learning_rate": 0.0005, "loss": 1.4981, "step": 413500 }, { "epoch": 32.43, "learning_rate": 0.0005, "loss": 1.4998, "step": 413600 }, { "epoch": 32.44, "learning_rate": 0.0005, "loss": 1.496, "step": 413700 }, { "epoch": 32.45, "learning_rate": 0.0005, "loss": 1.529, "step": 413800 }, { "epoch": 32.46, "learning_rate": 0.0005, "loss": 1.5146, "step": 413900 }, { "epoch": 32.47, "learning_rate": 0.0005, "loss": 1.4946, "step": 414000 }, { "epoch": 32.47, "learning_rate": 0.0005, "loss": 1.5132, "step": 414100 }, { "epoch": 32.48, "learning_rate": 0.0005, "loss": 1.5192, "step": 414200 }, { "epoch": 32.49, "learning_rate": 0.0005, "loss": 1.498, "step": 414300 }, { "epoch": 32.5, "learning_rate": 0.0005, "loss": 1.5389, "step": 414400 }, { "epoch": 32.5, "learning_rate": 0.0005, "loss": 1.5097, "step": 414500 }, { "epoch": 32.51, "learning_rate": 0.0005, "loss": 1.4973, "step": 414600 }, { "epoch": 32.52, "learning_rate": 0.0005, "loss": 1.5201, "step": 414700 }, { "epoch": 32.53, "learning_rate": 0.0005, "loss": 1.4835, "step": 414800 }, { "epoch": 32.54, "learning_rate": 0.0005, "loss": 1.5017, "step": 414900 }, { "epoch": 32.54, "learning_rate": 0.0005, "loss": 1.5011, "step": 415000 }, { "epoch": 32.55, "learning_rate": 0.0005, "loss": 1.5075, "step": 415100 }, { "epoch": 32.56, "learning_rate": 0.0005, "loss": 1.5067, "step": 415200 }, { "epoch": 32.57, "learning_rate": 0.0005, "loss": 1.5373, "step": 415300 }, { "epoch": 32.58, "learning_rate": 0.0005, "loss": 1.5174, "step": 415400 }, { "epoch": 32.58, "learning_rate": 0.0005, "loss": 1.5264, "step": 415500 }, { "epoch": 32.59, "learning_rate": 0.0005, "loss": 1.5168, "step": 415600 }, { "epoch": 32.6, "learning_rate": 0.0005, "loss": 1.5411, "step": 415700 }, { "epoch": 32.61, "learning_rate": 0.0005, "loss": 1.5051, "step": 415800 }, { "epoch": 32.61, "learning_rate": 0.0005, "loss": 1.5014, "step": 415900 }, { "epoch": 32.62, "learning_rate": 0.0005, "loss": 1.5198, "step": 416000 }, { "epoch": 32.63, "learning_rate": 0.0005, "loss": 1.5211, "step": 416100 }, { "epoch": 32.64, "learning_rate": 0.0005, "loss": 1.5409, "step": 416200 }, { "epoch": 32.65, "learning_rate": 0.0005, "loss": 1.5302, "step": 416300 }, { "epoch": 32.65, "learning_rate": 0.0005, "loss": 1.5224, "step": 416400 }, { "epoch": 32.66, "learning_rate": 0.0005, "loss": 1.519, "step": 416500 }, { "epoch": 32.67, "learning_rate": 0.0005, "loss": 1.5162, "step": 416600 }, { "epoch": 32.68, "learning_rate": 0.0005, "loss": 1.5157, "step": 416700 }, { "epoch": 32.69, "learning_rate": 0.0005, "loss": 1.5368, "step": 416800 }, { "epoch": 32.69, "learning_rate": 0.0005, "loss": 1.513, "step": 416900 }, { "epoch": 32.7, "learning_rate": 0.0005, "loss": 1.5508, "step": 417000 }, { "epoch": 32.71, "learning_rate": 0.0005, "loss": 1.5104, "step": 417100 }, { "epoch": 32.72, "learning_rate": 0.0005, "loss": 1.5185, "step": 417200 }, { "epoch": 32.72, "learning_rate": 0.0005, "loss": 1.492, "step": 417300 }, { "epoch": 32.73, "learning_rate": 0.0005, "loss": 1.5189, "step": 417400 }, { "epoch": 32.74, "learning_rate": 0.0005, "loss": 1.5375, "step": 417500 }, { "epoch": 32.75, "learning_rate": 0.0005, "loss": 1.5309, "step": 417600 }, { "epoch": 32.76, "learning_rate": 0.0005, "loss": 1.5239, "step": 417700 }, { "epoch": 32.76, "learning_rate": 0.0005, "loss": 1.5401, "step": 417800 }, { "epoch": 32.77, "learning_rate": 0.0005, "loss": 1.5296, "step": 417900 }, { "epoch": 32.78, "learning_rate": 0.0005, "loss": 1.5092, "step": 418000 }, { "epoch": 32.79, "learning_rate": 0.0005, "loss": 1.5272, "step": 418100 }, { "epoch": 32.79, "learning_rate": 0.0005, "loss": 1.5192, "step": 418200 }, { "epoch": 32.8, "learning_rate": 0.0005, "loss": 1.5315, "step": 418300 }, { "epoch": 32.81, "learning_rate": 0.0005, "loss": 1.5335, "step": 418400 }, { "epoch": 32.82, "learning_rate": 0.0005, "loss": 1.5279, "step": 418500 }, { "epoch": 32.83, "learning_rate": 0.0005, "loss": 1.5168, "step": 418600 }, { "epoch": 32.83, "learning_rate": 0.0005, "loss": 1.5587, "step": 418700 }, { "epoch": 32.84, "learning_rate": 0.0005, "loss": 1.5549, "step": 418800 }, { "epoch": 32.85, "learning_rate": 0.0005, "loss": 1.5463, "step": 418900 }, { "epoch": 32.86, "learning_rate": 0.0005, "loss": 1.5279, "step": 419000 }, { "epoch": 32.87, "learning_rate": 0.0005, "loss": 1.5431, "step": 419100 }, { "epoch": 32.87, "learning_rate": 0.0005, "loss": 1.5552, "step": 419200 }, { "epoch": 32.88, "learning_rate": 0.0005, "loss": 1.5442, "step": 419300 }, { "epoch": 32.89, "learning_rate": 0.0005, "loss": 1.5202, "step": 419400 }, { "epoch": 32.9, "learning_rate": 0.0005, "loss": 1.543, "step": 419500 }, { "epoch": 32.9, "learning_rate": 0.0005, "loss": 1.5277, "step": 419600 }, { "epoch": 32.91, "learning_rate": 0.0005, "loss": 1.5405, "step": 419700 }, { "epoch": 32.92, "learning_rate": 0.0005, "loss": 1.557, "step": 419800 }, { "epoch": 32.93, "learning_rate": 0.0005, "loss": 1.5291, "step": 419900 }, { "epoch": 32.94, "learning_rate": 0.0005, "loss": 1.5584, "step": 420000 }, { "epoch": 32.94, "eval_gen_len": 18.746093405138165, "eval_loss": 2.043473720550537, "eval_rouge1": 35.4532, "eval_rouge2": 14.4774, "eval_rougeL": 29.1654, "eval_rougeLsum": 29.1572, "eval_runtime": 355.0094, "eval_samples_per_second": 31.906, "eval_steps_per_second": 1.994, "step": 420000 }, { "epoch": 32.94, "learning_rate": 0.0005, "loss": 1.5445, "step": 420100 }, { "epoch": 32.95, "learning_rate": 0.0005, "loss": 1.5559, "step": 420200 }, { "epoch": 32.96, "learning_rate": 0.0005, "loss": 1.5439, "step": 420300 }, { "epoch": 32.97, "learning_rate": 0.0005, "loss": 1.5413, "step": 420400 }, { "epoch": 32.98, "learning_rate": 0.0005, "loss": 1.5161, "step": 420500 }, { "epoch": 32.98, "learning_rate": 0.0005, "loss": 1.526, "step": 420600 }, { "epoch": 32.99, "learning_rate": 0.0005, "loss": 1.5214, "step": 420700 }, { "epoch": 33.0, "learning_rate": 0.0005, "loss": 1.5649, "step": 420800 }, { "epoch": 33.01, "learning_rate": 0.0005, "loss": 1.479, "step": 420900 }, { "epoch": 33.01, "learning_rate": 0.0005, "loss": 1.4364, "step": 421000 }, { "epoch": 33.02, "learning_rate": 0.0005, "loss": 1.4831, "step": 421100 }, { "epoch": 33.03, "learning_rate": 0.0005, "loss": 1.4527, "step": 421200 }, { "epoch": 33.04, "learning_rate": 0.0005, "loss": 1.4754, "step": 421300 }, { "epoch": 33.05, "learning_rate": 0.0005, "loss": 1.4573, "step": 421400 }, { "epoch": 33.05, "learning_rate": 0.0005, "loss": 1.457, "step": 421500 }, { "epoch": 33.06, "learning_rate": 0.0005, "loss": 1.4525, "step": 421600 }, { "epoch": 33.07, "learning_rate": 0.0005, "loss": 1.4549, "step": 421700 }, { "epoch": 33.08, "learning_rate": 0.0005, "loss": 1.4764, "step": 421800 }, { "epoch": 33.09, "learning_rate": 0.0005, "loss": 1.48, "step": 421900 }, { "epoch": 33.09, "learning_rate": 0.0005, "loss": 1.4471, "step": 422000 }, { "epoch": 33.1, "learning_rate": 0.0005, "loss": 1.4762, "step": 422100 }, { "epoch": 33.11, "learning_rate": 0.0005, "loss": 1.4794, "step": 422200 }, { "epoch": 33.12, "learning_rate": 0.0005, "loss": 1.4909, "step": 422300 }, { "epoch": 33.12, "learning_rate": 0.0005, "loss": 1.4878, "step": 422400 }, { "epoch": 33.13, "learning_rate": 0.0005, "loss": 1.4934, "step": 422500 }, { "epoch": 33.14, "learning_rate": 0.0005, "loss": 1.4961, "step": 422600 }, { "epoch": 33.15, "learning_rate": 0.0005, "loss": 1.4815, "step": 422700 }, { "epoch": 33.16, "learning_rate": 0.0005, "loss": 1.4502, "step": 422800 }, { "epoch": 33.16, "learning_rate": 0.0005, "loss": 1.4715, "step": 422900 }, { "epoch": 33.17, "learning_rate": 0.0005, "loss": 1.4665, "step": 423000 }, { "epoch": 33.18, "learning_rate": 0.0005, "loss": 1.4945, "step": 423100 }, { "epoch": 33.19, "learning_rate": 0.0005, "loss": 1.4853, "step": 423200 }, { "epoch": 33.19, "learning_rate": 0.0005, "loss": 1.4898, "step": 423300 }, { "epoch": 33.2, "learning_rate": 0.0005, "loss": 1.4736, "step": 423400 }, { "epoch": 33.21, "learning_rate": 0.0005, "loss": 1.4931, "step": 423500 }, { "epoch": 33.22, "learning_rate": 0.0005, "loss": 1.508, "step": 423600 }, { "epoch": 33.23, "learning_rate": 0.0005, "loss": 1.459, "step": 423700 }, { "epoch": 33.23, "learning_rate": 0.0005, "loss": 1.4718, "step": 423800 }, { "epoch": 33.24, "learning_rate": 0.0005, "loss": 1.4846, "step": 423900 }, { "epoch": 33.25, "learning_rate": 0.0005, "loss": 1.4906, "step": 424000 }, { "epoch": 33.26, "learning_rate": 0.0005, "loss": 1.4812, "step": 424100 }, { "epoch": 33.27, "learning_rate": 0.0005, "loss": 1.5124, "step": 424200 }, { "epoch": 33.27, "learning_rate": 0.0005, "loss": 1.4844, "step": 424300 }, { "epoch": 33.28, "learning_rate": 0.0005, "loss": 1.4951, "step": 424400 }, { "epoch": 33.29, "learning_rate": 0.0005, "loss": 1.5, "step": 424500 }, { "epoch": 33.3, "learning_rate": 0.0005, "loss": 1.4982, "step": 424600 }, { "epoch": 33.3, "learning_rate": 0.0005, "loss": 1.4761, "step": 424700 }, { "epoch": 33.31, "learning_rate": 0.0005, "loss": 1.4928, "step": 424800 }, { "epoch": 33.32, "learning_rate": 0.0005, "loss": 1.4939, "step": 424900 }, { "epoch": 33.33, "learning_rate": 0.0005, "loss": 1.47, "step": 425000 }, { "epoch": 33.34, "learning_rate": 0.0005, "loss": 1.506, "step": 425100 }, { "epoch": 33.34, "learning_rate": 0.0005, "loss": 1.4834, "step": 425200 }, { "epoch": 33.35, "learning_rate": 0.0005, "loss": 1.4674, "step": 425300 }, { "epoch": 33.36, "learning_rate": 0.0005, "loss": 1.4815, "step": 425400 }, { "epoch": 33.37, "learning_rate": 0.0005, "loss": 1.4808, "step": 425500 }, { "epoch": 33.38, "learning_rate": 0.0005, "loss": 1.528, "step": 425600 }, { "epoch": 33.38, "learning_rate": 0.0005, "loss": 1.4721, "step": 425700 }, { "epoch": 33.39, "learning_rate": 0.0005, "loss": 1.5043, "step": 425800 }, { "epoch": 33.4, "learning_rate": 0.0005, "loss": 1.4849, "step": 425900 }, { "epoch": 33.41, "learning_rate": 0.0005, "loss": 1.5054, "step": 426000 }, { "epoch": 33.41, "learning_rate": 0.0005, "loss": 1.4862, "step": 426100 }, { "epoch": 33.42, "learning_rate": 0.0005, "loss": 1.4764, "step": 426200 }, { "epoch": 33.43, "learning_rate": 0.0005, "loss": 1.4996, "step": 426300 }, { "epoch": 33.44, "learning_rate": 0.0005, "loss": 1.5226, "step": 426400 }, { "epoch": 33.45, "learning_rate": 0.0005, "loss": 1.4969, "step": 426500 }, { "epoch": 33.45, "learning_rate": 0.0005, "loss": 1.5382, "step": 426600 }, { "epoch": 33.46, "learning_rate": 0.0005, "loss": 1.5159, "step": 426700 }, { "epoch": 33.47, "learning_rate": 0.0005, "loss": 1.5188, "step": 426800 }, { "epoch": 33.48, "learning_rate": 0.0005, "loss": 1.5403, "step": 426900 }, { "epoch": 33.48, "learning_rate": 0.0005, "loss": 1.5021, "step": 427000 }, { "epoch": 33.49, "learning_rate": 0.0005, "loss": 1.5213, "step": 427100 }, { "epoch": 33.5, "learning_rate": 0.0005, "loss": 1.5093, "step": 427200 }, { "epoch": 33.51, "learning_rate": 0.0005, "loss": 1.5155, "step": 427300 }, { "epoch": 33.52, "learning_rate": 0.0005, "loss": 1.5006, "step": 427400 }, { "epoch": 33.52, "learning_rate": 0.0005, "loss": 1.5207, "step": 427500 }, { "epoch": 33.53, "learning_rate": 0.0005, "loss": 1.526, "step": 427600 }, { "epoch": 33.54, "learning_rate": 0.0005, "loss": 1.505, "step": 427700 }, { "epoch": 33.55, "learning_rate": 0.0005, "loss": 1.5, "step": 427800 }, { "epoch": 33.56, "learning_rate": 0.0005, "loss": 1.4947, "step": 427900 }, { "epoch": 33.56, "learning_rate": 0.0005, "loss": 1.4942, "step": 428000 }, { "epoch": 33.57, "learning_rate": 0.0005, "loss": 1.5054, "step": 428100 }, { "epoch": 33.58, "learning_rate": 0.0005, "loss": 1.5072, "step": 428200 }, { "epoch": 33.59, "learning_rate": 0.0005, "loss": 1.4926, "step": 428300 }, { "epoch": 33.59, "learning_rate": 0.0005, "loss": 1.4933, "step": 428400 }, { "epoch": 33.6, "learning_rate": 0.0005, "loss": 1.5019, "step": 428500 }, { "epoch": 33.61, "learning_rate": 0.0005, "loss": 1.503, "step": 428600 }, { "epoch": 33.62, "learning_rate": 0.0005, "loss": 1.4991, "step": 428700 }, { "epoch": 33.63, "learning_rate": 0.0005, "loss": 1.4853, "step": 428800 }, { "epoch": 33.63, "learning_rate": 0.0005, "loss": 1.5268, "step": 428900 }, { "epoch": 33.64, "learning_rate": 0.0005, "loss": 1.48, "step": 429000 }, { "epoch": 33.65, "learning_rate": 0.0005, "loss": 1.517, "step": 429100 }, { "epoch": 33.66, "learning_rate": 0.0005, "loss": 1.5225, "step": 429200 }, { "epoch": 33.67, "learning_rate": 0.0005, "loss": 1.5214, "step": 429300 }, { "epoch": 33.67, "learning_rate": 0.0005, "loss": 1.506, "step": 429400 }, { "epoch": 33.68, "learning_rate": 0.0005, "loss": 1.5391, "step": 429500 }, { "epoch": 33.69, "learning_rate": 0.0005, "loss": 1.5118, "step": 429600 }, { "epoch": 33.7, "learning_rate": 0.0005, "loss": 1.4951, "step": 429700 }, { "epoch": 33.7, "learning_rate": 0.0005, "loss": 1.5314, "step": 429800 }, { "epoch": 33.71, "learning_rate": 0.0005, "loss": 1.5346, "step": 429900 }, { "epoch": 33.72, "learning_rate": 0.0005, "loss": 1.5106, "step": 430000 }, { "epoch": 33.72, "eval_gen_len": 18.773549924958065, "eval_loss": 2.0536205768585205, "eval_rouge1": 35.5461, "eval_rouge2": 14.4953, "eval_rougeL": 29.2671, "eval_rougeLsum": 29.2571, "eval_runtime": 355.0911, "eval_samples_per_second": 31.899, "eval_steps_per_second": 1.994, "step": 430000 }, { "epoch": 33.73, "learning_rate": 0.0005, "loss": 1.5295, "step": 430100 }, { "epoch": 33.74, "learning_rate": 0.0005, "loss": 1.5017, "step": 430200 }, { "epoch": 33.74, "learning_rate": 0.0005, "loss": 1.5423, "step": 430300 }, { "epoch": 33.75, "learning_rate": 0.0005, "loss": 1.4981, "step": 430400 }, { "epoch": 33.76, "learning_rate": 0.0005, "loss": 1.5199, "step": 430500 }, { "epoch": 33.77, "learning_rate": 0.0005, "loss": 1.5, "step": 430600 }, { "epoch": 33.78, "learning_rate": 0.0005, "loss": 1.5106, "step": 430700 }, { "epoch": 33.78, "learning_rate": 0.0005, "loss": 1.5251, "step": 430800 }, { "epoch": 33.79, "learning_rate": 0.0005, "loss": 1.5132, "step": 430900 }, { "epoch": 33.8, "learning_rate": 0.0005, "loss": 1.5032, "step": 431000 }, { "epoch": 33.81, "learning_rate": 0.0005, "loss": 1.5136, "step": 431100 }, { "epoch": 33.81, "learning_rate": 0.0005, "loss": 1.5212, "step": 431200 }, { "epoch": 33.82, "learning_rate": 0.0005, "loss": 1.5235, "step": 431300 }, { "epoch": 33.83, "learning_rate": 0.0005, "loss": 1.4808, "step": 431400 }, { "epoch": 33.84, "learning_rate": 0.0005, "loss": 1.5457, "step": 431500 }, { "epoch": 33.85, "learning_rate": 0.0005, "loss": 1.5044, "step": 431600 }, { "epoch": 33.85, "learning_rate": 0.0005, "loss": 1.5327, "step": 431700 }, { "epoch": 33.86, "learning_rate": 0.0005, "loss": 1.5262, "step": 431800 }, { "epoch": 33.87, "learning_rate": 0.0005, "loss": 1.5198, "step": 431900 }, { "epoch": 33.88, "learning_rate": 0.0005, "loss": 1.5182, "step": 432000 }, { "epoch": 33.88, "learning_rate": 0.0005, "loss": 1.5306, "step": 432100 }, { "epoch": 33.89, "learning_rate": 0.0005, "loss": 1.5222, "step": 432200 }, { "epoch": 33.9, "learning_rate": 0.0005, "loss": 1.488, "step": 432300 }, { "epoch": 33.91, "learning_rate": 0.0005, "loss": 1.5269, "step": 432400 }, { "epoch": 33.92, "learning_rate": 0.0005, "loss": 1.5264, "step": 432500 }, { "epoch": 33.92, "learning_rate": 0.0005, "loss": 1.5207, "step": 432600 }, { "epoch": 33.93, "learning_rate": 0.0005, "loss": 1.5209, "step": 432700 }, { "epoch": 33.94, "learning_rate": 0.0005, "loss": 1.5291, "step": 432800 }, { "epoch": 33.95, "learning_rate": 0.0005, "loss": 1.5382, "step": 432900 }, { "epoch": 33.96, "learning_rate": 0.0005, "loss": 1.5209, "step": 433000 }, { "epoch": 33.96, "learning_rate": 0.0005, "loss": 1.5115, "step": 433100 }, { "epoch": 33.97, "learning_rate": 0.0005, "loss": 1.5056, "step": 433200 }, { "epoch": 33.98, "learning_rate": 0.0005, "loss": 1.5244, "step": 433300 }, { "epoch": 33.99, "learning_rate": 0.0005, "loss": 1.5259, "step": 433400 }, { "epoch": 33.99, "learning_rate": 0.0005, "loss": 1.5489, "step": 433500 }, { "epoch": 34.0, "learning_rate": 0.0005, "loss": 1.4757, "step": 433600 }, { "epoch": 34.01, "learning_rate": 0.0005, "loss": 1.4282, "step": 433700 }, { "epoch": 34.02, "learning_rate": 0.0005, "loss": 1.4352, "step": 433800 }, { "epoch": 34.03, "learning_rate": 0.0005, "loss": 1.4427, "step": 433900 }, { "epoch": 34.03, "learning_rate": 0.0005, "loss": 1.4662, "step": 434000 }, { "epoch": 34.04, "learning_rate": 0.0005, "loss": 1.4606, "step": 434100 }, { "epoch": 34.05, "learning_rate": 0.0005, "loss": 1.4774, "step": 434200 }, { "epoch": 34.06, "learning_rate": 0.0005, "loss": 1.4842, "step": 434300 }, { "epoch": 34.07, "learning_rate": 0.0005, "loss": 1.452, "step": 434400 }, { "epoch": 34.07, "learning_rate": 0.0005, "loss": 1.4619, "step": 434500 }, { "epoch": 34.08, "learning_rate": 0.0005, "loss": 1.4472, "step": 434600 }, { "epoch": 34.09, "learning_rate": 0.0005, "loss": 1.4587, "step": 434700 }, { "epoch": 34.1, "learning_rate": 0.0005, "loss": 1.4629, "step": 434800 }, { "epoch": 34.1, "learning_rate": 0.0005, "loss": 1.4664, "step": 434900 }, { "epoch": 34.11, "learning_rate": 0.0005, "loss": 1.4545, "step": 435000 }, { "epoch": 34.12, "learning_rate": 0.0005, "loss": 1.466, "step": 435100 }, { "epoch": 34.13, "learning_rate": 0.0005, "loss": 1.4531, "step": 435200 }, { "epoch": 34.14, "learning_rate": 0.0005, "loss": 1.4366, "step": 435300 }, { "epoch": 34.14, "learning_rate": 0.0005, "loss": 1.4653, "step": 435400 }, { "epoch": 34.15, "learning_rate": 0.0005, "loss": 1.4787, "step": 435500 }, { "epoch": 34.16, "learning_rate": 0.0005, "loss": 1.4584, "step": 435600 }, { "epoch": 34.17, "learning_rate": 0.0005, "loss": 1.4645, "step": 435700 }, { "epoch": 34.18, "learning_rate": 0.0005, "loss": 1.4765, "step": 435800 }, { "epoch": 34.18, "learning_rate": 0.0005, "loss": 1.4918, "step": 435900 }, { "epoch": 34.19, "learning_rate": 0.0005, "loss": 1.4779, "step": 436000 }, { "epoch": 34.2, "learning_rate": 0.0005, "loss": 1.4586, "step": 436100 }, { "epoch": 34.21, "learning_rate": 0.0005, "loss": 1.4688, "step": 436200 }, { "epoch": 34.21, "learning_rate": 0.0005, "loss": 1.4623, "step": 436300 }, { "epoch": 34.22, "learning_rate": 0.0005, "loss": 1.4582, "step": 436400 }, { "epoch": 34.23, "learning_rate": 0.0005, "loss": 1.4566, "step": 436500 }, { "epoch": 34.24, "learning_rate": 0.0005, "loss": 1.4839, "step": 436600 }, { "epoch": 34.25, "learning_rate": 0.0005, "loss": 1.4723, "step": 436700 }, { "epoch": 34.25, "learning_rate": 0.0005, "loss": 1.4567, "step": 436800 }, { "epoch": 34.26, "learning_rate": 0.0005, "loss": 1.4795, "step": 436900 }, { "epoch": 34.27, "learning_rate": 0.0005, "loss": 1.4614, "step": 437000 }, { "epoch": 34.28, "learning_rate": 0.0005, "loss": 1.4889, "step": 437100 }, { "epoch": 34.28, "learning_rate": 0.0005, "loss": 1.472, "step": 437200 }, { "epoch": 34.29, "learning_rate": 0.0005, "loss": 1.4718, "step": 437300 }, { "epoch": 34.3, "learning_rate": 0.0005, "loss": 1.4689, "step": 437400 }, { "epoch": 34.31, "learning_rate": 0.0005, "loss": 1.4568, "step": 437500 }, { "epoch": 34.32, "learning_rate": 0.0005, "loss": 1.4908, "step": 437600 }, { "epoch": 34.32, "learning_rate": 0.0005, "loss": 1.4646, "step": 437700 }, { "epoch": 34.33, "learning_rate": 0.0005, "loss": 1.4859, "step": 437800 }, { "epoch": 34.34, "learning_rate": 0.0005, "loss": 1.4901, "step": 437900 }, { "epoch": 34.35, "learning_rate": 0.0005, "loss": 1.4836, "step": 438000 }, { "epoch": 34.36, "learning_rate": 0.0005, "loss": 1.4968, "step": 438100 }, { "epoch": 34.36, "learning_rate": 0.0005, "loss": 1.4828, "step": 438200 }, { "epoch": 34.37, "learning_rate": 0.0005, "loss": 1.4885, "step": 438300 }, { "epoch": 34.38, "learning_rate": 0.0005, "loss": 1.4761, "step": 438400 }, { "epoch": 34.39, "learning_rate": 0.0005, "loss": 1.5173, "step": 438500 }, { "epoch": 34.39, "learning_rate": 0.0005, "loss": 1.4909, "step": 438600 }, { "epoch": 34.4, "learning_rate": 0.0005, "loss": 1.4682, "step": 438700 }, { "epoch": 34.41, "learning_rate": 0.0005, "loss": 1.5019, "step": 438800 }, { "epoch": 34.42, "learning_rate": 0.0005, "loss": 1.5032, "step": 438900 }, { "epoch": 34.43, "learning_rate": 0.0005, "loss": 1.5024, "step": 439000 }, { "epoch": 34.43, "learning_rate": 0.0005, "loss": 1.4704, "step": 439100 }, { "epoch": 34.44, "learning_rate": 0.0005, "loss": 1.4926, "step": 439200 }, { "epoch": 34.45, "learning_rate": 0.0005, "loss": 1.4694, "step": 439300 }, { "epoch": 34.46, "learning_rate": 0.0005, "loss": 1.5041, "step": 439400 }, { "epoch": 34.47, "learning_rate": 0.0005, "loss": 1.4666, "step": 439500 }, { "epoch": 34.47, "learning_rate": 0.0005, "loss": 1.5155, "step": 439600 }, { "epoch": 34.48, "learning_rate": 0.0005, "loss": 1.4776, "step": 439700 }, { "epoch": 34.49, "learning_rate": 0.0005, "loss": 1.4833, "step": 439800 }, { "epoch": 34.5, "learning_rate": 0.0005, "loss": 1.4878, "step": 439900 }, { "epoch": 34.5, "learning_rate": 0.0005, "loss": 1.5018, "step": 440000 }, { "epoch": 34.5, "eval_gen_len": 18.794385097554517, "eval_loss": 2.0479230880737305, "eval_rouge1": 35.8012, "eval_rouge2": 14.6585, "eval_rougeL": 29.4215, "eval_rougeLsum": 29.4088, "eval_runtime": 359.8484, "eval_samples_per_second": 31.477, "eval_steps_per_second": 1.967, "step": 440000 }, { "epoch": 34.51, "learning_rate": 0.0005, "loss": 1.476, "step": 440100 }, { "epoch": 34.52, "learning_rate": 0.0005, "loss": 1.5182, "step": 440200 }, { "epoch": 34.53, "learning_rate": 0.0005, "loss": 1.4924, "step": 440300 }, { "epoch": 34.54, "learning_rate": 0.0005, "loss": 1.482, "step": 440400 }, { "epoch": 34.54, "learning_rate": 0.0005, "loss": 1.4905, "step": 440500 }, { "epoch": 34.55, "learning_rate": 0.0005, "loss": 1.4989, "step": 440600 }, { "epoch": 34.56, "learning_rate": 0.0005, "loss": 1.5047, "step": 440700 }, { "epoch": 34.57, "learning_rate": 0.0005, "loss": 1.5066, "step": 440800 }, { "epoch": 34.57, "learning_rate": 0.0005, "loss": 1.4814, "step": 440900 }, { "epoch": 34.58, "learning_rate": 0.0005, "loss": 1.4962, "step": 441000 }, { "epoch": 34.59, "learning_rate": 0.0005, "loss": 1.4896, "step": 441100 }, { "epoch": 34.6, "learning_rate": 0.0005, "loss": 1.5113, "step": 441200 }, { "epoch": 34.61, "learning_rate": 0.0005, "loss": 1.5018, "step": 441300 }, { "epoch": 34.61, "learning_rate": 0.0005, "loss": 1.5106, "step": 441400 }, { "epoch": 34.62, "learning_rate": 0.0005, "loss": 1.5057, "step": 441500 }, { "epoch": 34.63, "learning_rate": 0.0005, "loss": 1.4933, "step": 441600 }, { "epoch": 34.64, "learning_rate": 0.0005, "loss": 1.4933, "step": 441700 }, { "epoch": 34.65, "learning_rate": 0.0005, "loss": 1.5129, "step": 441800 }, { "epoch": 34.65, "learning_rate": 0.0005, "loss": 1.5032, "step": 441900 }, { "epoch": 34.66, "learning_rate": 0.0005, "loss": 1.5064, "step": 442000 }, { "epoch": 34.67, "learning_rate": 0.0005, "loss": 1.5261, "step": 442100 }, { "epoch": 34.68, "learning_rate": 0.0005, "loss": 1.5168, "step": 442200 }, { "epoch": 34.68, "learning_rate": 0.0005, "loss": 1.4994, "step": 442300 }, { "epoch": 34.69, "learning_rate": 0.0005, "loss": 1.5022, "step": 442400 }, { "epoch": 34.7, "learning_rate": 0.0005, "loss": 1.5251, "step": 442500 }, { "epoch": 34.71, "learning_rate": 0.0005, "loss": 1.5243, "step": 442600 }, { "epoch": 34.72, "learning_rate": 0.0005, "loss": 1.5109, "step": 442700 }, { "epoch": 34.72, "learning_rate": 0.0005, "loss": 1.4982, "step": 442800 }, { "epoch": 34.73, "learning_rate": 0.0005, "loss": 1.5158, "step": 442900 }, { "epoch": 34.74, "learning_rate": 0.0005, "loss": 1.509, "step": 443000 }, { "epoch": 34.75, "learning_rate": 0.0005, "loss": 1.5081, "step": 443100 }, { "epoch": 34.76, "learning_rate": 0.0005, "loss": 1.5148, "step": 443200 }, { "epoch": 34.76, "learning_rate": 0.0005, "loss": 1.4927, "step": 443300 }, { "epoch": 34.77, "learning_rate": 0.0005, "loss": 1.5106, "step": 443400 }, { "epoch": 34.78, "learning_rate": 0.0005, "loss": 1.4789, "step": 443500 }, { "epoch": 34.79, "learning_rate": 0.0005, "loss": 1.5104, "step": 443600 }, { "epoch": 34.79, "learning_rate": 0.0005, "loss": 1.5178, "step": 443700 }, { "epoch": 34.8, "learning_rate": 0.0005, "loss": 1.5068, "step": 443800 }, { "epoch": 34.81, "learning_rate": 0.0005, "loss": 1.4998, "step": 443900 }, { "epoch": 34.82, "learning_rate": 0.0005, "loss": 1.4906, "step": 444000 }, { "epoch": 34.83, "learning_rate": 0.0005, "loss": 1.5059, "step": 444100 }, { "epoch": 34.83, "learning_rate": 0.0005, "loss": 1.4998, "step": 444200 }, { "epoch": 34.84, "learning_rate": 0.0005, "loss": 1.5098, "step": 444300 }, { "epoch": 34.85, "learning_rate": 0.0005, "loss": 1.5164, "step": 444400 }, { "epoch": 34.86, "learning_rate": 0.0005, "loss": 1.5034, "step": 444500 }, { "epoch": 34.87, "learning_rate": 0.0005, "loss": 1.5082, "step": 444600 }, { "epoch": 34.87, "learning_rate": 0.0005, "loss": 1.5303, "step": 444700 }, { "epoch": 34.88, "learning_rate": 0.0005, "loss": 1.5157, "step": 444800 }, { "epoch": 34.89, "learning_rate": 0.0005, "loss": 1.5325, "step": 444900 }, { "epoch": 34.9, "learning_rate": 0.0005, "loss": 1.5343, "step": 445000 }, { "epoch": 34.9, "learning_rate": 0.0005, "loss": 1.516, "step": 445100 }, { "epoch": 34.91, "learning_rate": 0.0005, "loss": 1.5298, "step": 445200 }, { "epoch": 34.92, "learning_rate": 0.0005, "loss": 1.5331, "step": 445300 }, { "epoch": 34.93, "learning_rate": 0.0005, "loss": 1.5028, "step": 445400 }, { "epoch": 34.94, "learning_rate": 0.0005, "loss": 1.5004, "step": 445500 }, { "epoch": 34.94, "learning_rate": 0.0005, "loss": 1.5088, "step": 445600 }, { "epoch": 34.95, "learning_rate": 0.0005, "loss": 1.4988, "step": 445700 }, { "epoch": 34.96, "learning_rate": 0.0005, "loss": 1.49, "step": 445800 }, { "epoch": 34.97, "learning_rate": 0.0005, "loss": 1.5205, "step": 445900 }, { "epoch": 34.97, "learning_rate": 0.0005, "loss": 1.4997, "step": 446000 }, { "epoch": 34.98, "learning_rate": 0.0005, "loss": 1.5047, "step": 446100 }, { "epoch": 34.99, "learning_rate": 0.0005, "loss": 1.5249, "step": 446200 }, { "epoch": 35.0, "learning_rate": 0.0005, "loss": 1.5226, "step": 446300 }, { "epoch": 35.01, "learning_rate": 0.0005, "loss": 1.4523, "step": 446400 }, { "epoch": 35.01, "learning_rate": 0.0005, "loss": 1.4336, "step": 446500 }, { "epoch": 35.02, "learning_rate": 0.0005, "loss": 1.4534, "step": 446600 }, { "epoch": 35.03, "learning_rate": 0.0005, "loss": 1.4535, "step": 446700 }, { "epoch": 35.04, "learning_rate": 0.0005, "loss": 1.4416, "step": 446800 }, { "epoch": 35.05, "learning_rate": 0.0005, "loss": 1.4218, "step": 446900 }, { "epoch": 35.05, "learning_rate": 0.0005, "loss": 1.4596, "step": 447000 }, { "epoch": 35.06, "learning_rate": 0.0005, "loss": 1.4474, "step": 447100 }, { "epoch": 35.07, "learning_rate": 0.0005, "loss": 1.4481, "step": 447200 }, { "epoch": 35.08, "learning_rate": 0.0005, "loss": 1.4636, "step": 447300 }, { "epoch": 35.08, "learning_rate": 0.0005, "loss": 1.4575, "step": 447400 }, { "epoch": 35.09, "learning_rate": 0.0005, "loss": 1.484, "step": 447500 }, { "epoch": 35.1, "learning_rate": 0.0005, "loss": 1.453, "step": 447600 }, { "epoch": 35.11, "learning_rate": 0.0005, "loss": 1.4408, "step": 447700 }, { "epoch": 35.12, "learning_rate": 0.0005, "loss": 1.4777, "step": 447800 }, { "epoch": 35.12, "learning_rate": 0.0005, "loss": 1.4703, "step": 447900 }, { "epoch": 35.13, "learning_rate": 0.0005, "loss": 1.4324, "step": 448000 }, { "epoch": 35.14, "learning_rate": 0.0005, "loss": 1.4664, "step": 448100 }, { "epoch": 35.15, "learning_rate": 0.0005, "loss": 1.4364, "step": 448200 }, { "epoch": 35.16, "learning_rate": 0.0005, "loss": 1.4724, "step": 448300 }, { "epoch": 35.16, "learning_rate": 0.0005, "loss": 1.4512, "step": 448400 }, { "epoch": 35.17, "learning_rate": 0.0005, "loss": 1.4756, "step": 448500 }, { "epoch": 35.18, "learning_rate": 0.0005, "loss": 1.4784, "step": 448600 }, { "epoch": 35.19, "learning_rate": 0.0005, "loss": 1.4541, "step": 448700 }, { "epoch": 35.19, "learning_rate": 0.0005, "loss": 1.4614, "step": 448800 }, { "epoch": 35.2, "learning_rate": 0.0005, "loss": 1.4543, "step": 448900 }, { "epoch": 35.21, "learning_rate": 0.0005, "loss": 1.4481, "step": 449000 }, { "epoch": 35.22, "learning_rate": 0.0005, "loss": 1.4598, "step": 449100 }, { "epoch": 35.23, "learning_rate": 0.0005, "loss": 1.4563, "step": 449200 }, { "epoch": 35.23, "learning_rate": 0.0005, "loss": 1.4464, "step": 449300 }, { "epoch": 35.24, "learning_rate": 0.0005, "loss": 1.4748, "step": 449400 }, { "epoch": 35.25, "learning_rate": 0.0005, "loss": 1.4759, "step": 449500 }, { "epoch": 35.26, "learning_rate": 0.0005, "loss": 1.4241, "step": 449600 }, { "epoch": 35.27, "learning_rate": 0.0005, "loss": 1.4666, "step": 449700 }, { "epoch": 35.27, "learning_rate": 0.0005, "loss": 1.4687, "step": 449800 }, { "epoch": 35.28, "learning_rate": 0.0005, "loss": 1.4675, "step": 449900 }, { "epoch": 35.29, "learning_rate": 0.0005, "loss": 1.455, "step": 450000 }, { "epoch": 35.29, "eval_gen_len": 18.781495541626203, "eval_loss": 2.0648272037506104, "eval_rouge1": 35.5722, "eval_rouge2": 14.5725, "eval_rougeL": 29.2816, "eval_rougeLsum": 29.2646, "eval_runtime": 355.571, "eval_samples_per_second": 31.856, "eval_steps_per_second": 1.991, "step": 450000 }, { "epoch": 35.3, "learning_rate": 0.0005, "loss": 1.4855, "step": 450100 }, { "epoch": 35.3, "learning_rate": 0.0005, "loss": 1.4581, "step": 450200 }, { "epoch": 35.31, "learning_rate": 0.0005, "loss": 1.4779, "step": 450300 }, { "epoch": 35.32, "learning_rate": 0.0005, "loss": 1.4595, "step": 450400 }, { "epoch": 35.33, "learning_rate": 0.0005, "loss": 1.4915, "step": 450500 }, { "epoch": 35.34, "learning_rate": 0.0005, "loss": 1.4983, "step": 450600 }, { "epoch": 35.34, "learning_rate": 0.0005, "loss": 1.4711, "step": 450700 }, { "epoch": 35.35, "learning_rate": 0.0005, "loss": 1.4428, "step": 450800 }, { "epoch": 35.36, "learning_rate": 0.0005, "loss": 1.4672, "step": 450900 }, { "epoch": 35.37, "learning_rate": 0.0005, "loss": 1.4455, "step": 451000 }, { "epoch": 35.37, "learning_rate": 0.0005, "loss": 1.467, "step": 451100 }, { "epoch": 35.38, "learning_rate": 0.0005, "loss": 1.459, "step": 451200 }, { "epoch": 35.39, "learning_rate": 0.0005, "loss": 1.4694, "step": 451300 }, { "epoch": 35.4, "learning_rate": 0.0005, "loss": 1.4676, "step": 451400 }, { "epoch": 35.41, "learning_rate": 0.0005, "loss": 1.4714, "step": 451500 }, { "epoch": 35.41, "learning_rate": 0.0005, "loss": 1.495, "step": 451600 }, { "epoch": 35.42, "learning_rate": 0.0005, "loss": 1.4767, "step": 451700 }, { "epoch": 35.43, "learning_rate": 0.0005, "loss": 1.4694, "step": 451800 }, { "epoch": 35.44, "learning_rate": 0.0005, "loss": 1.4619, "step": 451900 }, { "epoch": 35.45, "learning_rate": 0.0005, "loss": 1.4719, "step": 452000 }, { "epoch": 35.45, "learning_rate": 0.0005, "loss": 1.4722, "step": 452100 }, { "epoch": 35.46, "learning_rate": 0.0005, "loss": 1.4741, "step": 452200 }, { "epoch": 35.47, "learning_rate": 0.0005, "loss": 1.4872, "step": 452300 }, { "epoch": 35.48, "learning_rate": 0.0005, "loss": 1.4847, "step": 452400 }, { "epoch": 35.48, "learning_rate": 0.0005, "loss": 1.4803, "step": 452500 }, { "epoch": 35.49, "learning_rate": 0.0005, "loss": 1.4905, "step": 452600 }, { "epoch": 35.5, "learning_rate": 0.0005, "loss": 1.4646, "step": 452700 }, { "epoch": 35.51, "learning_rate": 0.0005, "loss": 1.5042, "step": 452800 }, { "epoch": 35.52, "learning_rate": 0.0005, "loss": 1.4808, "step": 452900 }, { "epoch": 35.52, "learning_rate": 0.0005, "loss": 1.4739, "step": 453000 }, { "epoch": 35.53, "learning_rate": 0.0005, "loss": 1.4859, "step": 453100 }, { "epoch": 35.54, "learning_rate": 0.0005, "loss": 1.492, "step": 453200 }, { "epoch": 35.55, "learning_rate": 0.0005, "loss": 1.484, "step": 453300 }, { "epoch": 35.56, "learning_rate": 0.0005, "loss": 1.4576, "step": 453400 }, { "epoch": 35.56, "learning_rate": 0.0005, "loss": 1.4851, "step": 453500 }, { "epoch": 35.57, "learning_rate": 0.0005, "loss": 1.4735, "step": 453600 }, { "epoch": 35.58, "learning_rate": 0.0005, "loss": 1.4744, "step": 453700 }, { "epoch": 35.59, "learning_rate": 0.0005, "loss": 1.4876, "step": 453800 }, { "epoch": 35.59, "learning_rate": 0.0005, "loss": 1.4768, "step": 453900 }, { "epoch": 35.6, "learning_rate": 0.0005, "loss": 1.4672, "step": 454000 }, { "epoch": 35.61, "learning_rate": 0.0005, "loss": 1.4988, "step": 454100 }, { "epoch": 35.62, "learning_rate": 0.0005, "loss": 1.4867, "step": 454200 }, { "epoch": 35.63, "learning_rate": 0.0005, "loss": 1.4815, "step": 454300 }, { "epoch": 35.63, "learning_rate": 0.0005, "loss": 1.4974, "step": 454400 }, { "epoch": 35.64, "learning_rate": 0.0005, "loss": 1.4821, "step": 454500 }, { "epoch": 35.65, "learning_rate": 0.0005, "loss": 1.4884, "step": 454600 }, { "epoch": 35.66, "learning_rate": 0.0005, "loss": 1.4819, "step": 454700 }, { "epoch": 35.66, "learning_rate": 0.0005, "loss": 1.492, "step": 454800 }, { "epoch": 35.67, "learning_rate": 0.0005, "loss": 1.4862, "step": 454900 }, { "epoch": 35.68, "learning_rate": 0.0005, "loss": 1.4751, "step": 455000 }, { "epoch": 35.69, "learning_rate": 0.0005, "loss": 1.4783, "step": 455100 }, { "epoch": 35.7, "learning_rate": 0.0005, "loss": 1.4948, "step": 455200 }, { "epoch": 35.7, "learning_rate": 0.0005, "loss": 1.5175, "step": 455300 }, { "epoch": 35.71, "learning_rate": 0.0005, "loss": 1.5152, "step": 455400 }, { "epoch": 35.72, "learning_rate": 0.0005, "loss": 1.4671, "step": 455500 }, { "epoch": 35.73, "learning_rate": 0.0005, "loss": 1.4956, "step": 455600 }, { "epoch": 35.74, "learning_rate": 0.0005, "loss": 1.5073, "step": 455700 }, { "epoch": 35.74, "learning_rate": 0.0005, "loss": 1.4786, "step": 455800 }, { "epoch": 35.75, "learning_rate": 0.0005, "loss": 1.5124, "step": 455900 }, { "epoch": 35.76, "learning_rate": 0.0005, "loss": 1.4967, "step": 456000 }, { "epoch": 35.77, "learning_rate": 0.0005, "loss": 1.4846, "step": 456100 }, { "epoch": 35.77, "learning_rate": 0.0005, "loss": 1.4982, "step": 456200 }, { "epoch": 35.78, "learning_rate": 0.0005, "loss": 1.5005, "step": 456300 }, { "epoch": 35.79, "learning_rate": 0.0005, "loss": 1.504, "step": 456400 }, { "epoch": 35.8, "learning_rate": 0.0005, "loss": 1.4895, "step": 456500 }, { "epoch": 35.81, "learning_rate": 0.0005, "loss": 1.4705, "step": 456600 }, { "epoch": 35.81, "learning_rate": 0.0005, "loss": 1.4976, "step": 456700 }, { "epoch": 35.82, "learning_rate": 0.0005, "loss": 1.4959, "step": 456800 }, { "epoch": 35.83, "learning_rate": 0.0005, "loss": 1.4716, "step": 456900 }, { "epoch": 35.84, "learning_rate": 0.0005, "loss": 1.4709, "step": 457000 }, { "epoch": 35.85, "learning_rate": 0.0005, "loss": 1.5104, "step": 457100 }, { "epoch": 35.85, "learning_rate": 0.0005, "loss": 1.5258, "step": 457200 }, { "epoch": 35.86, "learning_rate": 0.0005, "loss": 1.4968, "step": 457300 }, { "epoch": 35.87, "learning_rate": 0.0005, "loss": 1.5232, "step": 457400 }, { "epoch": 35.88, "learning_rate": 0.0005, "loss": 1.5145, "step": 457500 }, { "epoch": 35.88, "learning_rate": 0.0005, "loss": 1.5133, "step": 457600 }, { "epoch": 35.89, "learning_rate": 0.0005, "loss": 1.4737, "step": 457700 }, { "epoch": 35.9, "learning_rate": 0.0005, "loss": 1.513, "step": 457800 }, { "epoch": 35.91, "learning_rate": 0.0005, "loss": 1.5141, "step": 457900 }, { "epoch": 35.92, "learning_rate": 0.0005, "loss": 1.5234, "step": 458000 }, { "epoch": 35.92, "learning_rate": 0.0005, "loss": 1.5103, "step": 458100 }, { "epoch": 35.93, "learning_rate": 0.0005, "loss": 1.5067, "step": 458200 }, { "epoch": 35.94, "learning_rate": 0.0005, "loss": 1.4814, "step": 458300 }, { "epoch": 35.95, "learning_rate": 0.0005, "loss": 1.527, "step": 458400 }, { "epoch": 35.96, "learning_rate": 0.0005, "loss": 1.4959, "step": 458500 }, { "epoch": 35.96, "learning_rate": 0.0005, "loss": 1.48, "step": 458600 }, { "epoch": 35.97, "learning_rate": 0.0005, "loss": 1.499, "step": 458700 }, { "epoch": 35.98, "learning_rate": 0.0005, "loss": 1.5181, "step": 458800 }, { "epoch": 35.99, "learning_rate": 0.0005, "loss": 1.5087, "step": 458900 }, { "epoch": 35.99, "learning_rate": 0.0005, "loss": 1.5377, "step": 459000 }, { "epoch": 36.0, "learning_rate": 0.0005, "loss": 1.5162, "step": 459100 }, { "epoch": 36.01, "learning_rate": 0.0005, "loss": 1.4301, "step": 459200 }, { "epoch": 36.02, "learning_rate": 0.0005, "loss": 1.466, "step": 459300 }, { "epoch": 36.03, "learning_rate": 0.0005, "loss": 1.435, "step": 459400 }, { "epoch": 36.03, "learning_rate": 0.0005, "loss": 1.4635, "step": 459500 }, { "epoch": 36.04, "learning_rate": 0.0005, "loss": 1.4322, "step": 459600 }, { "epoch": 36.05, "learning_rate": 0.0005, "loss": 1.4237, "step": 459700 }, { "epoch": 36.06, "learning_rate": 0.0005, "loss": 1.4357, "step": 459800 }, { "epoch": 36.06, "learning_rate": 0.0005, "loss": 1.4613, "step": 459900 }, { "epoch": 36.07, "learning_rate": 0.0005, "loss": 1.4209, "step": 460000 }, { "epoch": 36.07, "eval_gen_len": 18.764368323474883, "eval_loss": 2.0837438106536865, "eval_rouge1": 35.6324, "eval_rouge2": 14.6282, "eval_rougeL": 29.3399, "eval_rougeLsum": 29.3299, "eval_runtime": 358.8308, "eval_samples_per_second": 31.566, "eval_steps_per_second": 1.973, "step": 460000 }, { "epoch": 36.08, "learning_rate": 0.0005, "loss": 1.4281, "step": 460100 }, { "epoch": 36.09, "learning_rate": 0.0005, "loss": 1.4115, "step": 460200 }, { "epoch": 36.1, "learning_rate": 0.0005, "loss": 1.4535, "step": 460300 }, { "epoch": 36.1, "learning_rate": 0.0005, "loss": 1.4266, "step": 460400 }, { "epoch": 36.11, "learning_rate": 0.0005, "loss": 1.446, "step": 460500 }, { "epoch": 36.12, "learning_rate": 0.0005, "loss": 1.4321, "step": 460600 }, { "epoch": 36.13, "learning_rate": 0.0005, "loss": 1.4412, "step": 460700 }, { "epoch": 36.14, "learning_rate": 0.0005, "loss": 1.4506, "step": 460800 }, { "epoch": 36.14, "learning_rate": 0.0005, "loss": 1.4445, "step": 460900 }, { "epoch": 36.15, "learning_rate": 0.0005, "loss": 1.454, "step": 461000 }, { "epoch": 36.16, "learning_rate": 0.0005, "loss": 1.4436, "step": 461100 }, { "epoch": 36.17, "learning_rate": 0.0005, "loss": 1.445, "step": 461200 }, { "epoch": 36.17, "learning_rate": 0.0005, "loss": 1.4234, "step": 461300 }, { "epoch": 36.18, "learning_rate": 0.0005, "loss": 1.425, "step": 461400 }, { "epoch": 36.19, "learning_rate": 0.0005, "loss": 1.4417, "step": 461500 }, { "epoch": 36.2, "learning_rate": 0.0005, "loss": 1.4736, "step": 461600 }, { "epoch": 36.21, "learning_rate": 0.0005, "loss": 1.4525, "step": 461700 }, { "epoch": 36.21, "learning_rate": 0.0005, "loss": 1.4606, "step": 461800 }, { "epoch": 36.22, "learning_rate": 0.0005, "loss": 1.4473, "step": 461900 }, { "epoch": 36.23, "learning_rate": 0.0005, "loss": 1.4424, "step": 462000 }, { "epoch": 36.24, "learning_rate": 0.0005, "loss": 1.4631, "step": 462100 }, { "epoch": 36.25, "learning_rate": 0.0005, "loss": 1.4441, "step": 462200 }, { "epoch": 36.25, "learning_rate": 0.0005, "loss": 1.4454, "step": 462300 }, { "epoch": 36.26, "learning_rate": 0.0005, "loss": 1.4588, "step": 462400 }, { "epoch": 36.27, "learning_rate": 0.0005, "loss": 1.4737, "step": 462500 }, { "epoch": 36.28, "learning_rate": 0.0005, "loss": 1.4574, "step": 462600 }, { "epoch": 36.28, "learning_rate": 0.0005, "loss": 1.4646, "step": 462700 }, { "epoch": 36.29, "learning_rate": 0.0005, "loss": 1.4646, "step": 462800 }, { "epoch": 36.3, "learning_rate": 0.0005, "loss": 1.4558, "step": 462900 }, { "epoch": 36.31, "learning_rate": 0.0005, "loss": 1.4674, "step": 463000 }, { "epoch": 36.32, "learning_rate": 0.0005, "loss": 1.4519, "step": 463100 }, { "epoch": 36.32, "learning_rate": 0.0005, "loss": 1.4642, "step": 463200 }, { "epoch": 36.33, "learning_rate": 0.0005, "loss": 1.4722, "step": 463300 }, { "epoch": 36.34, "learning_rate": 0.0005, "loss": 1.4422, "step": 463400 }, { "epoch": 36.35, "learning_rate": 0.0005, "loss": 1.4698, "step": 463500 }, { "epoch": 36.36, "learning_rate": 0.0005, "loss": 1.4571, "step": 463600 }, { "epoch": 36.36, "learning_rate": 0.0005, "loss": 1.4748, "step": 463700 }, { "epoch": 36.37, "learning_rate": 0.0005, "loss": 1.4678, "step": 463800 }, { "epoch": 36.38, "learning_rate": 0.0005, "loss": 1.4802, "step": 463900 }, { "epoch": 36.39, "learning_rate": 0.0005, "loss": 1.4493, "step": 464000 }, { "epoch": 36.39, "learning_rate": 0.0005, "loss": 1.4593, "step": 464100 }, { "epoch": 36.4, "learning_rate": 0.0005, "loss": 1.4984, "step": 464200 }, { "epoch": 36.41, "learning_rate": 0.0005, "loss": 1.456, "step": 464300 }, { "epoch": 36.42, "learning_rate": 0.0005, "loss": 1.4591, "step": 464400 }, { "epoch": 36.43, "learning_rate": 0.0005, "loss": 1.4638, "step": 464500 }, { "epoch": 36.43, "learning_rate": 0.0005, "loss": 1.4558, "step": 464600 }, { "epoch": 36.44, "learning_rate": 0.0005, "loss": 1.465, "step": 464700 }, { "epoch": 36.45, "learning_rate": 0.0005, "loss": 1.4669, "step": 464800 }, { "epoch": 36.46, "learning_rate": 0.0005, "loss": 1.4679, "step": 464900 }, { "epoch": 36.46, "learning_rate": 0.0005, "loss": 1.473, "step": 465000 }, { "epoch": 36.47, "learning_rate": 0.0005, "loss": 1.4825, "step": 465100 }, { "epoch": 36.48, "learning_rate": 0.0005, "loss": 1.4514, "step": 465200 }, { "epoch": 36.49, "learning_rate": 0.0005, "loss": 1.4839, "step": 465300 }, { "epoch": 36.5, "learning_rate": 0.0005, "loss": 1.4616, "step": 465400 }, { "epoch": 36.5, "learning_rate": 0.0005, "loss": 1.4662, "step": 465500 }, { "epoch": 36.51, "learning_rate": 0.0005, "loss": 1.4781, "step": 465600 }, { "epoch": 36.52, "learning_rate": 0.0005, "loss": 1.4787, "step": 465700 }, { "epoch": 36.53, "learning_rate": 0.0005, "loss": 1.4712, "step": 465800 }, { "epoch": 36.54, "learning_rate": 0.0005, "loss": 1.4667, "step": 465900 }, { "epoch": 36.54, "learning_rate": 0.0005, "loss": 1.4688, "step": 466000 }, { "epoch": 36.55, "learning_rate": 0.0005, "loss": 1.4499, "step": 466100 }, { "epoch": 36.56, "learning_rate": 0.0005, "loss": 1.4675, "step": 466200 }, { "epoch": 36.57, "learning_rate": 0.0005, "loss": 1.4699, "step": 466300 }, { "epoch": 36.57, "learning_rate": 0.0005, "loss": 1.4869, "step": 466400 }, { "epoch": 36.58, "learning_rate": 0.0005, "loss": 1.4657, "step": 466500 }, { "epoch": 36.59, "learning_rate": 0.0005, "loss": 1.46, "step": 466600 }, { "epoch": 36.6, "learning_rate": 0.0005, "loss": 1.4743, "step": 466700 }, { "epoch": 36.61, "learning_rate": 0.0005, "loss": 1.4902, "step": 466800 }, { "epoch": 36.61, "learning_rate": 0.0005, "loss": 1.463, "step": 466900 }, { "epoch": 36.62, "learning_rate": 0.0005, "loss": 1.4619, "step": 467000 }, { "epoch": 36.63, "learning_rate": 0.0005, "loss": 1.4819, "step": 467100 }, { "epoch": 36.64, "learning_rate": 0.0005, "loss": 1.4381, "step": 467200 }, { "epoch": 36.65, "learning_rate": 0.0005, "loss": 1.4876, "step": 467300 }, { "epoch": 36.65, "learning_rate": 0.0005, "loss": 1.4934, "step": 467400 }, { "epoch": 36.66, "learning_rate": 0.0005, "loss": 1.4673, "step": 467500 }, { "epoch": 36.67, "learning_rate": 0.0005, "loss": 1.4811, "step": 467600 }, { "epoch": 36.68, "learning_rate": 0.0005, "loss": 1.4584, "step": 467700 }, { "epoch": 36.68, "learning_rate": 0.0005, "loss": 1.4824, "step": 467800 }, { "epoch": 36.69, "learning_rate": 0.0005, "loss": 1.5057, "step": 467900 }, { "epoch": 36.7, "learning_rate": 0.0005, "loss": 1.4781, "step": 468000 }, { "epoch": 36.71, "learning_rate": 0.0005, "loss": 1.4791, "step": 468100 }, { "epoch": 36.72, "learning_rate": 0.0005, "loss": 1.507, "step": 468200 }, { "epoch": 36.72, "learning_rate": 0.0005, "loss": 1.4879, "step": 468300 }, { "epoch": 36.73, "learning_rate": 0.0005, "loss": 1.4758, "step": 468400 }, { "epoch": 36.74, "learning_rate": 0.0005, "loss": 1.4805, "step": 468500 }, { "epoch": 36.75, "learning_rate": 0.0005, "loss": 1.5019, "step": 468600 }, { "epoch": 36.76, "learning_rate": 0.0005, "loss": 1.4877, "step": 468700 }, { "epoch": 36.76, "learning_rate": 0.0005, "loss": 1.4739, "step": 468800 }, { "epoch": 36.77, "learning_rate": 0.0005, "loss": 1.456, "step": 468900 }, { "epoch": 36.78, "learning_rate": 0.0005, "loss": 1.4748, "step": 469000 }, { "epoch": 36.79, "learning_rate": 0.0005, "loss": 1.4974, "step": 469100 }, { "epoch": 36.79, "learning_rate": 0.0005, "loss": 1.4883, "step": 469200 }, { "epoch": 36.8, "learning_rate": 0.0005, "loss": 1.4729, "step": 469300 }, { "epoch": 36.81, "learning_rate": 0.0005, "loss": 1.485, "step": 469400 }, { "epoch": 36.82, "learning_rate": 0.0005, "loss": 1.4733, "step": 469500 }, { "epoch": 36.83, "learning_rate": 0.0005, "loss": 1.4927, "step": 469600 }, { "epoch": 36.83, "learning_rate": 0.0005, "loss": 1.4826, "step": 469700 }, { "epoch": 36.84, "learning_rate": 0.0005, "loss": 1.5009, "step": 469800 }, { "epoch": 36.85, "learning_rate": 0.0005, "loss": 1.4938, "step": 469900 }, { "epoch": 36.86, "learning_rate": 0.0005, "loss": 1.4894, "step": 470000 }, { "epoch": 36.86, "eval_gen_len": 18.78617462699744, "eval_loss": 2.0626096725463867, "eval_rouge1": 35.7293, "eval_rouge2": 14.5534, "eval_rougeL": 29.39, "eval_rougeLsum": 29.3792, "eval_runtime": 354.5618, "eval_samples_per_second": 31.946, "eval_steps_per_second": 1.997, "step": 470000 }, { "epoch": 36.86, "learning_rate": 0.0005, "loss": 1.506, "step": 470100 }, { "epoch": 36.87, "learning_rate": 0.0005, "loss": 1.4744, "step": 470200 }, { "epoch": 36.88, "learning_rate": 0.0005, "loss": 1.4895, "step": 470300 }, { "epoch": 36.89, "learning_rate": 0.0005, "loss": 1.5299, "step": 470400 }, { "epoch": 36.9, "learning_rate": 0.0005, "loss": 1.4758, "step": 470500 }, { "epoch": 36.9, "learning_rate": 0.0005, "loss": 1.4826, "step": 470600 }, { "epoch": 36.91, "learning_rate": 0.0005, "loss": 1.4854, "step": 470700 }, { "epoch": 36.92, "learning_rate": 0.0005, "loss": 1.4919, "step": 470800 }, { "epoch": 36.93, "learning_rate": 0.0005, "loss": 1.4969, "step": 470900 }, { "epoch": 36.94, "learning_rate": 0.0005, "loss": 1.5361, "step": 471000 }, { "epoch": 36.94, "learning_rate": 0.0005, "loss": 1.4955, "step": 471100 }, { "epoch": 36.95, "learning_rate": 0.0005, "loss": 1.4918, "step": 471200 }, { "epoch": 36.96, "learning_rate": 0.0005, "loss": 1.4706, "step": 471300 }, { "epoch": 36.97, "learning_rate": 0.0005, "loss": 1.508, "step": 471400 }, { "epoch": 36.97, "learning_rate": 0.0005, "loss": 1.4943, "step": 471500 }, { "epoch": 36.98, "learning_rate": 0.0005, "loss": 1.5189, "step": 471600 }, { "epoch": 36.99, "learning_rate": 0.0005, "loss": 1.5013, "step": 471700 }, { "epoch": 37.0, "learning_rate": 0.0005, "loss": 1.4928, "step": 471800 }, { "epoch": 37.01, "learning_rate": 0.0005, "loss": 1.4571, "step": 471900 }, { "epoch": 37.01, "learning_rate": 0.0005, "loss": 1.422, "step": 472000 }, { "epoch": 37.02, "learning_rate": 0.0005, "loss": 1.3969, "step": 472100 }, { "epoch": 37.03, "learning_rate": 0.0005, "loss": 1.4296, "step": 472200 }, { "epoch": 37.04, "learning_rate": 0.0005, "loss": 1.4178, "step": 472300 }, { "epoch": 37.05, "learning_rate": 0.0005, "loss": 1.4167, "step": 472400 }, { "epoch": 37.05, "learning_rate": 0.0005, "loss": 1.4105, "step": 472500 }, { "epoch": 37.06, "learning_rate": 0.0005, "loss": 1.4229, "step": 472600 }, { "epoch": 37.07, "learning_rate": 0.0005, "loss": 1.427, "step": 472700 }, { "epoch": 37.08, "learning_rate": 0.0005, "loss": 1.4127, "step": 472800 }, { "epoch": 37.08, "learning_rate": 0.0005, "loss": 1.3988, "step": 472900 }, { "epoch": 37.09, "learning_rate": 0.0005, "loss": 1.4349, "step": 473000 }, { "epoch": 37.1, "learning_rate": 0.0005, "loss": 1.383, "step": 473100 }, { "epoch": 37.11, "learning_rate": 0.0005, "loss": 1.4186, "step": 473200 }, { "epoch": 37.12, "learning_rate": 0.0005, "loss": 1.4331, "step": 473300 }, { "epoch": 37.12, "learning_rate": 0.0005, "loss": 1.4529, "step": 473400 }, { "epoch": 37.13, "learning_rate": 0.0005, "loss": 1.4127, "step": 473500 }, { "epoch": 37.14, "learning_rate": 0.0005, "loss": 1.4333, "step": 473600 }, { "epoch": 37.15, "learning_rate": 0.0005, "loss": 1.4442, "step": 473700 }, { "epoch": 37.15, "learning_rate": 0.0005, "loss": 1.4151, "step": 473800 }, { "epoch": 37.16, "learning_rate": 0.0005, "loss": 1.4192, "step": 473900 }, { "epoch": 37.17, "learning_rate": 0.0005, "loss": 1.4437, "step": 474000 }, { "epoch": 37.18, "learning_rate": 0.0005, "loss": 1.4698, "step": 474100 }, { "epoch": 37.19, "learning_rate": 0.0005, "loss": 1.4343, "step": 474200 }, { "epoch": 37.19, "learning_rate": 0.0005, "loss": 1.456, "step": 474300 }, { "epoch": 37.2, "learning_rate": 0.0005, "loss": 1.4433, "step": 474400 }, { "epoch": 37.21, "learning_rate": 0.0005, "loss": 1.4381, "step": 474500 }, { "epoch": 37.22, "learning_rate": 0.0005, "loss": 1.4324, "step": 474600 }, { "epoch": 37.23, "learning_rate": 0.0005, "loss": 1.4325, "step": 474700 }, { "epoch": 37.23, "learning_rate": 0.0005, "loss": 1.4719, "step": 474800 }, { "epoch": 37.24, "learning_rate": 0.0005, "loss": 1.4383, "step": 474900 }, { "epoch": 37.25, "learning_rate": 0.0005, "loss": 1.4518, "step": 475000 }, { "epoch": 37.26, "learning_rate": 0.0005, "loss": 1.457, "step": 475100 }, { "epoch": 37.26, "learning_rate": 0.0005, "loss": 1.4485, "step": 475200 }, { "epoch": 37.27, "learning_rate": 0.0005, "loss": 1.4477, "step": 475300 }, { "epoch": 37.28, "learning_rate": 0.0005, "loss": 1.4299, "step": 475400 }, { "epoch": 37.29, "learning_rate": 0.0005, "loss": 1.4411, "step": 475500 }, { "epoch": 37.3, "learning_rate": 0.0005, "loss": 1.4588, "step": 475600 }, { "epoch": 37.3, "learning_rate": 0.0005, "loss": 1.453, "step": 475700 }, { "epoch": 37.31, "learning_rate": 0.0005, "loss": 1.4495, "step": 475800 }, { "epoch": 37.32, "learning_rate": 0.0005, "loss": 1.4252, "step": 475900 }, { "epoch": 37.33, "learning_rate": 0.0005, "loss": 1.4523, "step": 476000 }, { "epoch": 37.34, "learning_rate": 0.0005, "loss": 1.434, "step": 476100 }, { "epoch": 37.34, "learning_rate": 0.0005, "loss": 1.4625, "step": 476200 }, { "epoch": 37.35, "learning_rate": 0.0005, "loss": 1.4547, "step": 476300 }, { "epoch": 37.36, "learning_rate": 0.0005, "loss": 1.4441, "step": 476400 }, { "epoch": 37.37, "learning_rate": 0.0005, "loss": 1.4308, "step": 476500 }, { "epoch": 37.37, "learning_rate": 0.0005, "loss": 1.4641, "step": 476600 }, { "epoch": 37.38, "learning_rate": 0.0005, "loss": 1.4441, "step": 476700 }, { "epoch": 37.39, "learning_rate": 0.0005, "loss": 1.4377, "step": 476800 }, { "epoch": 37.4, "learning_rate": 0.0005, "loss": 1.4943, "step": 476900 }, { "epoch": 37.41, "learning_rate": 0.0005, "loss": 1.4712, "step": 477000 }, { "epoch": 37.41, "learning_rate": 0.0005, "loss": 1.4618, "step": 477100 }, { "epoch": 37.42, "learning_rate": 0.0005, "loss": 1.4653, "step": 477200 }, { "epoch": 37.43, "learning_rate": 0.0005, "loss": 1.4633, "step": 477300 }, { "epoch": 37.44, "learning_rate": 0.0005, "loss": 1.4464, "step": 477400 }, { "epoch": 37.45, "learning_rate": 0.0005, "loss": 1.4462, "step": 477500 }, { "epoch": 37.45, "learning_rate": 0.0005, "loss": 1.4831, "step": 477600 }, { "epoch": 37.46, "learning_rate": 0.0005, "loss": 1.4498, "step": 477700 }, { "epoch": 37.47, "learning_rate": 0.0005, "loss": 1.4278, "step": 477800 }, { "epoch": 37.48, "learning_rate": 0.0005, "loss": 1.4742, "step": 477900 }, { "epoch": 37.48, "learning_rate": 0.0005, "loss": 1.4649, "step": 478000 }, { "epoch": 37.49, "learning_rate": 0.0005, "loss": 1.4472, "step": 478100 }, { "epoch": 37.5, "learning_rate": 0.0005, "loss": 1.4375, "step": 478200 }, { "epoch": 37.51, "learning_rate": 0.0005, "loss": 1.4675, "step": 478300 }, { "epoch": 37.52, "learning_rate": 0.0005, "loss": 1.4489, "step": 478400 }, { "epoch": 37.52, "learning_rate": 0.0005, "loss": 1.476, "step": 478500 }, { "epoch": 37.53, "learning_rate": 0.0005, "loss": 1.495, "step": 478600 }, { "epoch": 37.54, "learning_rate": 0.0005, "loss": 1.4534, "step": 478700 }, { "epoch": 37.55, "learning_rate": 0.0005, "loss": 1.4523, "step": 478800 }, { "epoch": 37.55, "learning_rate": 0.0005, "loss": 1.4639, "step": 478900 }, { "epoch": 37.56, "learning_rate": 0.0005, "loss": 1.464, "step": 479000 }, { "epoch": 37.57, "learning_rate": 0.0005, "loss": 1.4707, "step": 479100 }, { "epoch": 37.58, "learning_rate": 0.0005, "loss": 1.4779, "step": 479200 }, { "epoch": 37.59, "learning_rate": 0.0005, "loss": 1.4698, "step": 479300 }, { "epoch": 37.59, "learning_rate": 0.0005, "loss": 1.459, "step": 479400 }, { "epoch": 37.6, "learning_rate": 0.0005, "loss": 1.474, "step": 479500 }, { "epoch": 37.61, "learning_rate": 0.0005, "loss": 1.4644, "step": 479600 }, { "epoch": 37.62, "learning_rate": 0.0005, "loss": 1.4517, "step": 479700 }, { "epoch": 37.63, "learning_rate": 0.0005, "loss": 1.4844, "step": 479800 }, { "epoch": 37.63, "learning_rate": 0.0005, "loss": 1.4627, "step": 479900 }, { "epoch": 37.64, "learning_rate": 0.0005, "loss": 1.4801, "step": 480000 }, { "epoch": 37.64, "eval_gen_len": 18.763485477178424, "eval_loss": 2.0635464191436768, "eval_rouge1": 35.7102, "eval_rouge2": 14.5234, "eval_rougeL": 29.4028, "eval_rougeLsum": 29.3844, "eval_runtime": 356.8998, "eval_samples_per_second": 31.737, "eval_steps_per_second": 1.984, "step": 480000 }, { "epoch": 37.65, "learning_rate": 0.0005, "loss": 1.4629, "step": 480100 }, { "epoch": 37.66, "learning_rate": 0.0005, "loss": 1.4369, "step": 480200 }, { "epoch": 37.66, "learning_rate": 0.0005, "loss": 1.5003, "step": 480300 }, { "epoch": 37.67, "learning_rate": 0.0005, "loss": 1.4944, "step": 480400 }, { "epoch": 37.68, "learning_rate": 0.0005, "loss": 1.4683, "step": 480500 }, { "epoch": 37.69, "learning_rate": 0.0005, "loss": 1.4545, "step": 480600 }, { "epoch": 37.7, "learning_rate": 0.0005, "loss": 1.4789, "step": 480700 }, { "epoch": 37.7, "learning_rate": 0.0005, "loss": 1.4754, "step": 480800 }, { "epoch": 37.71, "learning_rate": 0.0005, "loss": 1.4473, "step": 480900 }, { "epoch": 37.72, "learning_rate": 0.0005, "loss": 1.4698, "step": 481000 }, { "epoch": 37.73, "learning_rate": 0.0005, "loss": 1.4793, "step": 481100 }, { "epoch": 37.74, "learning_rate": 0.0005, "loss": 1.4657, "step": 481200 }, { "epoch": 37.74, "learning_rate": 0.0005, "loss": 1.4849, "step": 481300 }, { "epoch": 37.75, "learning_rate": 0.0005, "loss": 1.4639, "step": 481400 }, { "epoch": 37.76, "learning_rate": 0.0005, "loss": 1.4791, "step": 481500 }, { "epoch": 37.77, "learning_rate": 0.0005, "loss": 1.4802, "step": 481600 }, { "epoch": 37.77, "learning_rate": 0.0005, "loss": 1.4676, "step": 481700 }, { "epoch": 37.78, "learning_rate": 0.0005, "loss": 1.4727, "step": 481800 }, { "epoch": 37.79, "learning_rate": 0.0005, "loss": 1.4757, "step": 481900 }, { "epoch": 37.8, "learning_rate": 0.0005, "loss": 1.5054, "step": 482000 }, { "epoch": 37.81, "learning_rate": 0.0005, "loss": 1.4704, "step": 482100 }, { "epoch": 37.81, "learning_rate": 0.0005, "loss": 1.4794, "step": 482200 }, { "epoch": 37.82, "learning_rate": 0.0005, "loss": 1.4873, "step": 482300 }, { "epoch": 37.83, "learning_rate": 0.0005, "loss": 1.4662, "step": 482400 }, { "epoch": 37.84, "learning_rate": 0.0005, "loss": 1.4757, "step": 482500 }, { "epoch": 37.85, "learning_rate": 0.0005, "loss": 1.4867, "step": 482600 }, { "epoch": 37.85, "learning_rate": 0.0005, "loss": 1.4748, "step": 482700 }, { "epoch": 37.86, "learning_rate": 0.0005, "loss": 1.5006, "step": 482800 }, { "epoch": 37.87, "learning_rate": 0.0005, "loss": 1.4998, "step": 482900 }, { "epoch": 37.88, "learning_rate": 0.0005, "loss": 1.4715, "step": 483000 }, { "epoch": 37.88, "learning_rate": 0.0005, "loss": 1.4772, "step": 483100 }, { "epoch": 37.89, "learning_rate": 0.0005, "loss": 1.477, "step": 483200 }, { "epoch": 37.9, "learning_rate": 0.0005, "loss": 1.4816, "step": 483300 }, { "epoch": 37.91, "learning_rate": 0.0005, "loss": 1.4715, "step": 483400 }, { "epoch": 37.92, "learning_rate": 0.0005, "loss": 1.4821, "step": 483500 }, { "epoch": 37.92, "learning_rate": 0.0005, "loss": 1.5009, "step": 483600 }, { "epoch": 37.93, "learning_rate": 0.0005, "loss": 1.4916, "step": 483700 }, { "epoch": 37.94, "learning_rate": 0.0005, "loss": 1.5157, "step": 483800 }, { "epoch": 37.95, "learning_rate": 0.0005, "loss": 1.4821, "step": 483900 }, { "epoch": 37.95, "learning_rate": 0.0005, "loss": 1.4669, "step": 484000 }, { "epoch": 37.96, "learning_rate": 0.0005, "loss": 1.4763, "step": 484100 }, { "epoch": 37.97, "learning_rate": 0.0005, "loss": 1.4832, "step": 484200 }, { "epoch": 37.98, "learning_rate": 0.0005, "loss": 1.4875, "step": 484300 }, { "epoch": 37.99, "learning_rate": 0.0005, "loss": 1.5, "step": 484400 }, { "epoch": 37.99, "learning_rate": 0.0005, "loss": 1.4921, "step": 484500 }, { "epoch": 38.0, "learning_rate": 0.0005, "loss": 1.4853, "step": 484600 }, { "epoch": 38.01, "learning_rate": 0.0005, "loss": 1.4102, "step": 484700 }, { "epoch": 38.02, "learning_rate": 0.0005, "loss": 1.4333, "step": 484800 }, { "epoch": 38.03, "learning_rate": 0.0005, "loss": 1.4171, "step": 484900 }, { "epoch": 38.03, "learning_rate": 0.0005, "loss": 1.4047, "step": 485000 }, { "epoch": 38.04, "learning_rate": 0.0005, "loss": 1.4108, "step": 485100 }, { "epoch": 38.05, "learning_rate": 0.0005, "loss": 1.3879, "step": 485200 }, { "epoch": 38.06, "learning_rate": 0.0005, "loss": 1.4144, "step": 485300 }, { "epoch": 38.06, "learning_rate": 0.0005, "loss": 1.4304, "step": 485400 }, { "epoch": 38.07, "learning_rate": 0.0005, "loss": 1.3913, "step": 485500 }, { "epoch": 38.08, "learning_rate": 0.0005, "loss": 1.4303, "step": 485600 }, { "epoch": 38.09, "learning_rate": 0.0005, "loss": 1.3798, "step": 485700 }, { "epoch": 38.1, "learning_rate": 0.0005, "loss": 1.4202, "step": 485800 }, { "epoch": 38.1, "learning_rate": 0.0005, "loss": 1.4066, "step": 485900 }, { "epoch": 38.11, "learning_rate": 0.0005, "loss": 1.4153, "step": 486000 }, { "epoch": 38.12, "learning_rate": 0.0005, "loss": 1.4256, "step": 486100 }, { "epoch": 38.13, "learning_rate": 0.0005, "loss": 1.4468, "step": 486200 }, { "epoch": 38.14, "learning_rate": 0.0005, "loss": 1.3938, "step": 486300 }, { "epoch": 38.14, "learning_rate": 0.0005, "loss": 1.4367, "step": 486400 }, { "epoch": 38.15, "learning_rate": 0.0005, "loss": 1.4248, "step": 486500 }, { "epoch": 38.16, "learning_rate": 0.0005, "loss": 1.4351, "step": 486600 }, { "epoch": 38.17, "learning_rate": 0.0005, "loss": 1.4488, "step": 486700 }, { "epoch": 38.17, "learning_rate": 0.0005, "loss": 1.4195, "step": 486800 }, { "epoch": 38.18, "learning_rate": 0.0005, "loss": 1.4355, "step": 486900 }, { "epoch": 38.19, "learning_rate": 0.0005, "loss": 1.4374, "step": 487000 }, { "epoch": 38.2, "learning_rate": 0.0005, "loss": 1.4443, "step": 487100 }, { "epoch": 38.21, "learning_rate": 0.0005, "loss": 1.4462, "step": 487200 }, { "epoch": 38.21, "learning_rate": 0.0005, "loss": 1.423, "step": 487300 }, { "epoch": 38.22, "learning_rate": 0.0005, "loss": 1.4214, "step": 487400 }, { "epoch": 38.23, "learning_rate": 0.0005, "loss": 1.4318, "step": 487500 }, { "epoch": 38.24, "learning_rate": 0.0005, "loss": 1.4522, "step": 487600 }, { "epoch": 38.24, "learning_rate": 0.0005, "loss": 1.4219, "step": 487700 }, { "epoch": 38.25, "learning_rate": 0.0005, "loss": 1.4298, "step": 487800 }, { "epoch": 38.26, "learning_rate": 0.0005, "loss": 1.4482, "step": 487900 }, { "epoch": 38.27, "learning_rate": 0.0005, "loss": 1.4304, "step": 488000 }, { "epoch": 38.28, "learning_rate": 0.0005, "loss": 1.4545, "step": 488100 }, { "epoch": 38.28, "learning_rate": 0.0005, "loss": 1.4368, "step": 488200 }, { "epoch": 38.29, "learning_rate": 0.0005, "loss": 1.4392, "step": 488300 }, { "epoch": 38.3, "learning_rate": 0.0005, "loss": 1.435, "step": 488400 }, { "epoch": 38.31, "learning_rate": 0.0005, "loss": 1.4098, "step": 488500 }, { "epoch": 38.32, "learning_rate": 0.0005, "loss": 1.4293, "step": 488600 }, { "epoch": 38.32, "learning_rate": 0.0005, "loss": 1.4606, "step": 488700 }, { "epoch": 38.33, "learning_rate": 0.0005, "loss": 1.4354, "step": 488800 }, { "epoch": 38.34, "learning_rate": 0.0005, "loss": 1.4521, "step": 488900 }, { "epoch": 38.35, "learning_rate": 0.0005, "loss": 1.4369, "step": 489000 }, { "epoch": 38.35, "learning_rate": 0.0005, "loss": 1.4305, "step": 489100 }, { "epoch": 38.36, "learning_rate": 0.0005, "loss": 1.4358, "step": 489200 }, { "epoch": 38.37, "learning_rate": 0.0005, "loss": 1.4482, "step": 489300 }, { "epoch": 38.38, "learning_rate": 0.0005, "loss": 1.4456, "step": 489400 }, { "epoch": 38.39, "learning_rate": 0.0005, "loss": 1.4476, "step": 489500 }, { "epoch": 38.39, "learning_rate": 0.0005, "loss": 1.4623, "step": 489600 }, { "epoch": 38.4, "learning_rate": 0.0005, "loss": 1.4577, "step": 489700 }, { "epoch": 38.41, "learning_rate": 0.0005, "loss": 1.4286, "step": 489800 }, { "epoch": 38.42, "learning_rate": 0.0005, "loss": 1.4479, "step": 489900 }, { "epoch": 38.43, "learning_rate": 0.0005, "loss": 1.4553, "step": 490000 }, { "epoch": 38.43, "eval_gen_len": 18.818486801447868, "eval_loss": 2.0702340602874756, "eval_rouge1": 35.6617, "eval_rouge2": 14.5328, "eval_rougeL": 29.2952, "eval_rougeLsum": 29.2949, "eval_runtime": 360.1467, "eval_samples_per_second": 31.451, "eval_steps_per_second": 1.966, "step": 490000 }, { "epoch": 38.43, "learning_rate": 0.0005, "loss": 1.4381, "step": 490100 }, { "epoch": 38.44, "learning_rate": 0.0005, "loss": 1.4515, "step": 490200 }, { "epoch": 38.45, "learning_rate": 0.0005, "loss": 1.4214, "step": 490300 }, { "epoch": 38.46, "learning_rate": 0.0005, "loss": 1.4283, "step": 490400 }, { "epoch": 38.46, "learning_rate": 0.0005, "loss": 1.435, "step": 490500 }, { "epoch": 38.47, "learning_rate": 0.0005, "loss": 1.4338, "step": 490600 }, { "epoch": 38.48, "learning_rate": 0.0005, "loss": 1.4418, "step": 490700 }, { "epoch": 38.49, "learning_rate": 0.0005, "loss": 1.4445, "step": 490800 }, { "epoch": 38.5, "learning_rate": 0.0005, "loss": 1.4408, "step": 490900 }, { "epoch": 38.5, "learning_rate": 0.0005, "loss": 1.4541, "step": 491000 }, { "epoch": 38.51, "learning_rate": 0.0005, "loss": 1.4602, "step": 491100 }, { "epoch": 38.52, "learning_rate": 0.0005, "loss": 1.4278, "step": 491200 }, { "epoch": 38.53, "learning_rate": 0.0005, "loss": 1.4641, "step": 491300 }, { "epoch": 38.54, "learning_rate": 0.0005, "loss": 1.4253, "step": 491400 }, { "epoch": 38.54, "learning_rate": 0.0005, "loss": 1.4791, "step": 491500 }, { "epoch": 38.55, "learning_rate": 0.0005, "loss": 1.4497, "step": 491600 }, { "epoch": 38.56, "learning_rate": 0.0005, "loss": 1.4517, "step": 491700 }, { "epoch": 38.57, "learning_rate": 0.0005, "loss": 1.4639, "step": 491800 }, { "epoch": 38.57, "learning_rate": 0.0005, "loss": 1.4773, "step": 491900 }, { "epoch": 38.58, "learning_rate": 0.0005, "loss": 1.4668, "step": 492000 }, { "epoch": 38.59, "learning_rate": 0.0005, "loss": 1.4547, "step": 492100 }, { "epoch": 38.6, "learning_rate": 0.0005, "loss": 1.4635, "step": 492200 }, { "epoch": 38.61, "learning_rate": 0.0005, "loss": 1.4822, "step": 492300 }, { "epoch": 38.61, "learning_rate": 0.0005, "loss": 1.4691, "step": 492400 }, { "epoch": 38.62, "learning_rate": 0.0005, "loss": 1.4819, "step": 492500 }, { "epoch": 38.63, "learning_rate": 0.0005, "loss": 1.4483, "step": 492600 }, { "epoch": 38.64, "learning_rate": 0.0005, "loss": 1.4592, "step": 492700 }, { "epoch": 38.64, "learning_rate": 0.0005, "loss": 1.4713, "step": 492800 }, { "epoch": 38.65, "learning_rate": 0.0005, "loss": 1.4678, "step": 492900 }, { "epoch": 38.66, "learning_rate": 0.0005, "loss": 1.461, "step": 493000 }, { "epoch": 38.67, "learning_rate": 0.0005, "loss": 1.4566, "step": 493100 }, { "epoch": 38.68, "learning_rate": 0.0005, "loss": 1.4501, "step": 493200 }, { "epoch": 38.68, "learning_rate": 0.0005, "loss": 1.4737, "step": 493300 }, { "epoch": 38.69, "learning_rate": 0.0005, "loss": 1.4579, "step": 493400 }, { "epoch": 38.7, "learning_rate": 0.0005, "loss": 1.4737, "step": 493500 }, { "epoch": 38.71, "learning_rate": 0.0005, "loss": 1.4571, "step": 493600 }, { "epoch": 38.72, "learning_rate": 0.0005, "loss": 1.4711, "step": 493700 }, { "epoch": 38.72, "learning_rate": 0.0005, "loss": 1.4522, "step": 493800 }, { "epoch": 38.73, "learning_rate": 0.0005, "loss": 1.4809, "step": 493900 }, { "epoch": 38.74, "learning_rate": 0.0005, "loss": 1.4816, "step": 494000 }, { "epoch": 38.75, "learning_rate": 0.0005, "loss": 1.4372, "step": 494100 }, { "epoch": 38.75, "learning_rate": 0.0005, "loss": 1.4837, "step": 494200 }, { "epoch": 38.76, "learning_rate": 0.0005, "loss": 1.4849, "step": 494300 }, { "epoch": 38.77, "learning_rate": 0.0005, "loss": 1.4664, "step": 494400 }, { "epoch": 38.78, "learning_rate": 0.0005, "loss": 1.4442, "step": 494500 }, { "epoch": 38.79, "learning_rate": 0.0005, "loss": 1.4493, "step": 494600 }, { "epoch": 38.79, "learning_rate": 0.0005, "loss": 1.45, "step": 494700 }, { "epoch": 38.8, "learning_rate": 0.0005, "loss": 1.4717, "step": 494800 }, { "epoch": 38.81, "learning_rate": 0.0005, "loss": 1.4816, "step": 494900 }, { "epoch": 38.82, "learning_rate": 0.0005, "loss": 1.4556, "step": 495000 }, { "epoch": 38.83, "learning_rate": 0.0005, "loss": 1.4619, "step": 495100 }, { "epoch": 38.83, "learning_rate": 0.0005, "loss": 1.4585, "step": 495200 }, { "epoch": 38.84, "learning_rate": 0.0005, "loss": 1.4512, "step": 495300 }, { "epoch": 38.85, "learning_rate": 0.0005, "loss": 1.4606, "step": 495400 }, { "epoch": 38.86, "learning_rate": 0.0005, "loss": 1.4713, "step": 495500 }, { "epoch": 38.86, "learning_rate": 0.0005, "loss": 1.4917, "step": 495600 }, { "epoch": 38.87, "learning_rate": 0.0005, "loss": 1.4603, "step": 495700 }, { "epoch": 38.88, "learning_rate": 0.0005, "loss": 1.4885, "step": 495800 }, { "epoch": 38.89, "learning_rate": 0.0005, "loss": 1.4555, "step": 495900 }, { "epoch": 38.9, "learning_rate": 0.0005, "loss": 1.4674, "step": 496000 }, { "epoch": 38.9, "learning_rate": 0.0005, "loss": 1.4614, "step": 496100 }, { "epoch": 38.91, "learning_rate": 0.0005, "loss": 1.4815, "step": 496200 }, { "epoch": 38.92, "learning_rate": 0.0005, "loss": 1.4637, "step": 496300 }, { "epoch": 38.93, "learning_rate": 0.0005, "loss": 1.475, "step": 496400 }, { "epoch": 38.94, "learning_rate": 0.0005, "loss": 1.4696, "step": 496500 }, { "epoch": 38.94, "learning_rate": 0.0005, "loss": 1.4896, "step": 496600 }, { "epoch": 38.95, "learning_rate": 0.0005, "loss": 1.4555, "step": 496700 }, { "epoch": 38.96, "learning_rate": 0.0005, "loss": 1.4873, "step": 496800 }, { "epoch": 38.97, "learning_rate": 0.0005, "loss": 1.4663, "step": 496900 }, { "epoch": 38.97, "learning_rate": 0.0005, "loss": 1.4983, "step": 497000 }, { "epoch": 38.98, "learning_rate": 0.0005, "loss": 1.4715, "step": 497100 }, { "epoch": 38.99, "learning_rate": 0.0005, "loss": 1.5052, "step": 497200 }, { "epoch": 39.0, "learning_rate": 0.0005, "loss": 1.4862, "step": 497300 }, { "epoch": 39.01, "learning_rate": 0.0005, "loss": 1.412, "step": 497400 }, { "epoch": 39.01, "learning_rate": 0.0005, "loss": 1.3872, "step": 497500 }, { "epoch": 39.02, "learning_rate": 0.0005, "loss": 1.3971, "step": 497600 }, { "epoch": 39.03, "learning_rate": 0.0005, "loss": 1.4171, "step": 497700 }, { "epoch": 39.04, "learning_rate": 0.0005, "loss": 1.4026, "step": 497800 }, { "epoch": 39.04, "learning_rate": 0.0005, "loss": 1.39, "step": 497900 }, { "epoch": 39.05, "learning_rate": 0.0005, "loss": 1.3829, "step": 498000 }, { "epoch": 39.06, "learning_rate": 0.0005, "loss": 1.3924, "step": 498100 }, { "epoch": 39.07, "learning_rate": 0.0005, "loss": 1.396, "step": 498200 }, { "epoch": 39.08, "learning_rate": 0.0005, "loss": 1.386, "step": 498300 }, { "epoch": 39.08, "learning_rate": 0.0005, "loss": 1.3992, "step": 498400 }, { "epoch": 39.09, "learning_rate": 0.0005, "loss": 1.4199, "step": 498500 }, { "epoch": 39.1, "learning_rate": 0.0005, "loss": 1.3924, "step": 498600 }, { "epoch": 39.11, "learning_rate": 0.0005, "loss": 1.402, "step": 498700 }, { "epoch": 39.12, "learning_rate": 0.0005, "loss": 1.4241, "step": 498800 }, { "epoch": 39.12, "learning_rate": 0.0005, "loss": 1.4045, "step": 498900 }, { "epoch": 39.13, "learning_rate": 0.0005, "loss": 1.4163, "step": 499000 }, { "epoch": 39.14, "learning_rate": 0.0005, "loss": 1.4071, "step": 499100 }, { "epoch": 39.15, "learning_rate": 0.0005, "loss": 1.4081, "step": 499200 }, { "epoch": 39.15, "learning_rate": 0.0005, "loss": 1.4432, "step": 499300 }, { "epoch": 39.16, "learning_rate": 0.0005, "loss": 1.4187, "step": 499400 }, { "epoch": 39.17, "learning_rate": 0.0005, "loss": 1.4159, "step": 499500 }, { "epoch": 39.18, "learning_rate": 0.0005, "loss": 1.4294, "step": 499600 }, { "epoch": 39.19, "learning_rate": 0.0005, "loss": 1.4187, "step": 499700 }, { "epoch": 39.19, "learning_rate": 0.0005, "loss": 1.4207, "step": 499800 }, { "epoch": 39.2, "learning_rate": 0.0005, "loss": 1.429, "step": 499900 }, { "epoch": 39.21, "learning_rate": 0.0005, "loss": 1.4295, "step": 500000 }, { "epoch": 39.21, "eval_gen_len": 18.78025955681116, "eval_loss": 2.1033854484558105, "eval_rouge1": 35.8815, "eval_rouge2": 14.7785, "eval_rougeL": 29.503, "eval_rougeLsum": 29.4979, "eval_runtime": 355.4667, "eval_samples_per_second": 31.865, "eval_steps_per_second": 1.992, "step": 500000 }, { "epoch": 39.22, "learning_rate": 0.0005, "loss": 1.4241, "step": 500100 }, { "epoch": 39.23, "learning_rate": 0.0005, "loss": 1.4048, "step": 500200 }, { "epoch": 39.23, "learning_rate": 0.0005, "loss": 1.4098, "step": 500300 }, { "epoch": 39.24, "learning_rate": 0.0005, "loss": 1.3955, "step": 500400 }, { "epoch": 39.25, "learning_rate": 0.0005, "loss": 1.4262, "step": 500500 }, { "epoch": 39.26, "learning_rate": 0.0005, "loss": 1.4024, "step": 500600 }, { "epoch": 39.26, "learning_rate": 0.0005, "loss": 1.4432, "step": 500700 }, { "epoch": 39.27, "learning_rate": 0.0005, "loss": 1.4488, "step": 500800 }, { "epoch": 39.28, "learning_rate": 0.0005, "loss": 1.4148, "step": 500900 }, { "epoch": 39.29, "learning_rate": 0.0005, "loss": 1.4618, "step": 501000 }, { "epoch": 39.3, "learning_rate": 0.0005, "loss": 1.4333, "step": 501100 }, { "epoch": 39.3, "learning_rate": 0.0005, "loss": 1.4268, "step": 501200 }, { "epoch": 39.31, "learning_rate": 0.0005, "loss": 1.4125, "step": 501300 }, { "epoch": 39.32, "learning_rate": 0.0005, "loss": 1.4449, "step": 501400 }, { "epoch": 39.33, "learning_rate": 0.0005, "loss": 1.4244, "step": 501500 }, { "epoch": 39.34, "learning_rate": 0.0005, "loss": 1.4344, "step": 501600 }, { "epoch": 39.34, "learning_rate": 0.0005, "loss": 1.4306, "step": 501700 }, { "epoch": 39.35, "learning_rate": 0.0005, "loss": 1.447, "step": 501800 }, { "epoch": 39.36, "learning_rate": 0.0005, "loss": 1.4367, "step": 501900 }, { "epoch": 39.37, "learning_rate": 0.0005, "loss": 1.4568, "step": 502000 }, { "epoch": 39.37, "learning_rate": 0.0005, "loss": 1.4365, "step": 502100 }, { "epoch": 39.38, "learning_rate": 0.0005, "loss": 1.449, "step": 502200 }, { "epoch": 39.39, "learning_rate": 0.0005, "loss": 1.4194, "step": 502300 }, { "epoch": 39.4, "learning_rate": 0.0005, "loss": 1.4658, "step": 502400 }, { "epoch": 39.41, "learning_rate": 0.0005, "loss": 1.4472, "step": 502500 }, { "epoch": 39.41, "learning_rate": 0.0005, "loss": 1.4311, "step": 502600 }, { "epoch": 39.42, "learning_rate": 0.0005, "loss": 1.448, "step": 502700 }, { "epoch": 39.43, "learning_rate": 0.0005, "loss": 1.4346, "step": 502800 }, { "epoch": 39.44, "learning_rate": 0.0005, "loss": 1.4348, "step": 502900 }, { "epoch": 39.44, "learning_rate": 0.0005, "loss": 1.4081, "step": 503000 }, { "epoch": 39.45, "learning_rate": 0.0005, "loss": 1.4255, "step": 503100 }, { "epoch": 39.46, "learning_rate": 0.0005, "loss": 1.4441, "step": 503200 }, { "epoch": 39.47, "learning_rate": 0.0005, "loss": 1.4332, "step": 503300 }, { "epoch": 39.48, "learning_rate": 0.0005, "loss": 1.4609, "step": 503400 }, { "epoch": 39.48, "learning_rate": 0.0005, "loss": 1.452, "step": 503500 }, { "epoch": 39.49, "learning_rate": 0.0005, "loss": 1.4605, "step": 503600 }, { "epoch": 39.5, "learning_rate": 0.0005, "loss": 1.4327, "step": 503700 }, { "epoch": 39.51, "learning_rate": 0.0005, "loss": 1.4194, "step": 503800 }, { "epoch": 39.52, "learning_rate": 0.0005, "loss": 1.4626, "step": 503900 }, { "epoch": 39.52, "learning_rate": 0.0005, "loss": 1.4482, "step": 504000 }, { "epoch": 39.53, "learning_rate": 0.0005, "loss": 1.4396, "step": 504100 }, { "epoch": 39.54, "learning_rate": 0.0005, "loss": 1.4632, "step": 504200 }, { "epoch": 39.55, "learning_rate": 0.0005, "loss": 1.4469, "step": 504300 }, { "epoch": 39.55, "learning_rate": 0.0005, "loss": 1.4564, "step": 504400 }, { "epoch": 39.56, "learning_rate": 0.0005, "loss": 1.4522, "step": 504500 }, { "epoch": 39.57, "learning_rate": 0.0005, "loss": 1.4582, "step": 504600 }, { "epoch": 39.58, "learning_rate": 0.0005, "loss": 1.4384, "step": 504700 }, { "epoch": 39.59, "learning_rate": 0.0005, "loss": 1.4638, "step": 504800 }, { "epoch": 39.59, "learning_rate": 0.0005, "loss": 1.4518, "step": 504900 }, { "epoch": 39.6, "learning_rate": 0.0005, "loss": 1.4322, "step": 505000 }, { "epoch": 39.61, "learning_rate": 0.0005, "loss": 1.4654, "step": 505100 }, { "epoch": 39.62, "learning_rate": 0.0005, "loss": 1.4446, "step": 505200 }, { "epoch": 39.63, "learning_rate": 0.0005, "loss": 1.4646, "step": 505300 }, { "epoch": 39.63, "learning_rate": 0.0005, "loss": 1.4356, "step": 505400 }, { "epoch": 39.64, "learning_rate": 0.0005, "loss": 1.4614, "step": 505500 }, { "epoch": 39.65, "learning_rate": 0.0005, "loss": 1.4351, "step": 505600 }, { "epoch": 39.66, "learning_rate": 0.0005, "loss": 1.4379, "step": 505700 }, { "epoch": 39.66, "learning_rate": 0.0005, "loss": 1.4683, "step": 505800 }, { "epoch": 39.67, "learning_rate": 0.0005, "loss": 1.4412, "step": 505900 }, { "epoch": 39.68, "learning_rate": 0.0005, "loss": 1.4549, "step": 506000 }, { "epoch": 39.69, "learning_rate": 0.0005, "loss": 1.4474, "step": 506100 }, { "epoch": 39.7, "learning_rate": 0.0005, "loss": 1.4656, "step": 506200 }, { "epoch": 39.7, "learning_rate": 0.0005, "loss": 1.4564, "step": 506300 }, { "epoch": 39.71, "learning_rate": 0.0005, "loss": 1.4471, "step": 506400 }, { "epoch": 39.72, "learning_rate": 0.0005, "loss": 1.4516, "step": 506500 }, { "epoch": 39.73, "learning_rate": 0.0005, "loss": 1.4619, "step": 506600 }, { "epoch": 39.73, "learning_rate": 0.0005, "loss": 1.4515, "step": 506700 }, { "epoch": 39.74, "learning_rate": 0.0005, "loss": 1.4363, "step": 506800 }, { "epoch": 39.75, "learning_rate": 0.0005, "loss": 1.4689, "step": 506900 }, { "epoch": 39.76, "learning_rate": 0.0005, "loss": 1.4752, "step": 507000 }, { "epoch": 39.77, "learning_rate": 0.0005, "loss": 1.4471, "step": 507100 }, { "epoch": 39.77, "learning_rate": 0.0005, "loss": 1.4483, "step": 507200 }, { "epoch": 39.78, "learning_rate": 0.0005, "loss": 1.43, "step": 507300 }, { "epoch": 39.79, "learning_rate": 0.0005, "loss": 1.4408, "step": 507400 }, { "epoch": 39.8, "learning_rate": 0.0005, "loss": 1.4258, "step": 507500 }, { "epoch": 39.81, "learning_rate": 0.0005, "loss": 1.4326, "step": 507600 }, { "epoch": 39.81, "learning_rate": 0.0005, "loss": 1.5031, "step": 507700 }, { "epoch": 39.82, "learning_rate": 0.0005, "loss": 1.461, "step": 507800 }, { "epoch": 39.83, "learning_rate": 0.0005, "loss": 1.4695, "step": 507900 }, { "epoch": 39.84, "learning_rate": 0.0005, "loss": 1.491, "step": 508000 }, { "epoch": 39.84, "learning_rate": 0.0005, "loss": 1.4485, "step": 508100 }, { "epoch": 39.85, "learning_rate": 0.0005, "loss": 1.4509, "step": 508200 }, { "epoch": 39.86, "learning_rate": 0.0005, "loss": 1.4386, "step": 508300 }, { "epoch": 39.87, "learning_rate": 0.0005, "loss": 1.4672, "step": 508400 }, { "epoch": 39.88, "learning_rate": 0.0005, "loss": 1.4693, "step": 508500 }, { "epoch": 39.88, "learning_rate": 0.0005, "loss": 1.4867, "step": 508600 }, { "epoch": 39.89, "learning_rate": 0.0005, "loss": 1.4633, "step": 508700 }, { "epoch": 39.9, "learning_rate": 0.0005, "loss": 1.4794, "step": 508800 }, { "epoch": 39.91, "learning_rate": 0.0005, "loss": 1.4676, "step": 508900 }, { "epoch": 39.92, "learning_rate": 0.0005, "loss": 1.4694, "step": 509000 }, { "epoch": 39.92, "learning_rate": 0.0005, "loss": 1.4547, "step": 509100 }, { "epoch": 39.93, "learning_rate": 0.0005, "loss": 1.4533, "step": 509200 }, { "epoch": 39.94, "learning_rate": 0.0005, "loss": 1.4713, "step": 509300 }, { "epoch": 39.95, "learning_rate": 0.0005, "loss": 1.455, "step": 509400 }, { "epoch": 39.95, "learning_rate": 0.0005, "loss": 1.4833, "step": 509500 }, { "epoch": 39.96, "learning_rate": 0.0005, "loss": 1.4749, "step": 509600 }, { "epoch": 39.97, "learning_rate": 0.0005, "loss": 1.4435, "step": 509700 }, { "epoch": 39.98, "learning_rate": 0.0005, "loss": 1.4589, "step": 509800 }, { "epoch": 39.99, "learning_rate": 0.0005, "loss": 1.469, "step": 509900 }, { "epoch": 39.99, "learning_rate": 0.0005, "loss": 1.4541, "step": 510000 }, { "epoch": 39.99, "eval_gen_len": 18.759071245696123, "eval_loss": 2.0739943981170654, "eval_rouge1": 35.8683, "eval_rouge2": 14.7103, "eval_rougeL": 29.5012, "eval_rougeLsum": 29.4958, "eval_runtime": 358.0238, "eval_samples_per_second": 31.638, "eval_steps_per_second": 1.978, "step": 510000 } ], "max_steps": 637600, "num_train_epochs": 50, "total_flos": 2.1607039565631652e+18, "trial_name": null, "trial_params": null }