{ "best_metric": null, "best_model_checkpoint": null, "epoch": 12.548024703460445, "global_step": 160000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 5e-05, "loss": 2.0248, "step": 100 }, { "epoch": 0.02, "learning_rate": 5e-05, "loss": 1.8708, "step": 200 }, { "epoch": 0.02, "learning_rate": 5e-05, "loss": 1.8434, "step": 300 }, { "epoch": 0.03, "learning_rate": 5e-05, "loss": 1.7994, "step": 400 }, { "epoch": 0.04, "learning_rate": 5e-05, "loss": 1.8287, "step": 500 }, { "epoch": 0.05, "learning_rate": 5e-05, "loss": 1.8163, "step": 600 }, { "epoch": 0.05, "learning_rate": 5e-05, "loss": 1.8126, "step": 700 }, { "epoch": 0.06, "learning_rate": 5e-05, "loss": 1.7973, "step": 800 }, { "epoch": 0.07, "learning_rate": 5e-05, "loss": 1.7749, "step": 900 }, { "epoch": 0.08, "learning_rate": 5e-05, "loss": 1.7852, "step": 1000 }, { "epoch": 0.09, "learning_rate": 5e-05, "loss": 1.7927, "step": 1100 }, { "epoch": 0.09, "learning_rate": 5e-05, "loss": 1.7736, "step": 1200 }, { "epoch": 0.1, "learning_rate": 5e-05, "loss": 1.7792, "step": 1300 }, { "epoch": 0.11, "learning_rate": 5e-05, "loss": 1.7559, "step": 1400 }, { "epoch": 0.12, "learning_rate": 5e-05, "loss": 1.7508, "step": 1500 }, { "epoch": 0.13, "learning_rate": 5e-05, "loss": 1.7809, "step": 1600 }, { "epoch": 0.13, "learning_rate": 5e-05, "loss": 1.767, "step": 1700 }, { "epoch": 0.14, "learning_rate": 5e-05, "loss": 1.7906, "step": 1800 }, { "epoch": 0.15, "learning_rate": 5e-05, "loss": 1.7622, "step": 1900 }, { "epoch": 0.16, "learning_rate": 5e-05, "loss": 1.7726, "step": 2000 }, { "epoch": 0.16, "learning_rate": 5e-05, "loss": 1.7597, "step": 2100 }, { "epoch": 0.17, "learning_rate": 5e-05, "loss": 1.7649, "step": 2200 }, { "epoch": 0.18, "learning_rate": 5e-05, "loss": 1.7352, "step": 2300 }, { "epoch": 0.19, "learning_rate": 5e-05, "loss": 1.7408, "step": 2400 }, { "epoch": 0.2, "learning_rate": 5e-05, "loss": 1.7453, "step": 2500 }, { "epoch": 0.2, "learning_rate": 5e-05, "loss": 1.7417, "step": 2600 }, { "epoch": 0.21, "learning_rate": 5e-05, "loss": 1.7524, "step": 2700 }, { "epoch": 0.22, "learning_rate": 5e-05, "loss": 1.7461, "step": 2800 }, { "epoch": 0.23, "learning_rate": 5e-05, "loss": 1.7584, "step": 2900 }, { "epoch": 0.24, "learning_rate": 5e-05, "loss": 1.7485, "step": 3000 }, { "epoch": 0.24, "learning_rate": 5e-05, "loss": 1.7532, "step": 3100 }, { "epoch": 0.25, "learning_rate": 5e-05, "loss": 1.7221, "step": 3200 }, { "epoch": 0.26, "learning_rate": 5e-05, "loss": 1.7351, "step": 3300 }, { "epoch": 0.27, "learning_rate": 5e-05, "loss": 1.7286, "step": 3400 }, { "epoch": 0.27, "learning_rate": 5e-05, "loss": 1.724, "step": 3500 }, { "epoch": 0.28, "learning_rate": 5e-05, "loss": 1.711, "step": 3600 }, { "epoch": 0.29, "learning_rate": 5e-05, "loss": 1.7233, "step": 3700 }, { "epoch": 0.3, "learning_rate": 5e-05, "loss": 1.7075, "step": 3800 }, { "epoch": 0.31, "learning_rate": 5e-05, "loss": 1.7264, "step": 3900 }, { "epoch": 0.31, "learning_rate": 5e-05, "loss": 1.7189, "step": 4000 }, { "epoch": 0.32, "learning_rate": 5e-05, "loss": 1.7265, "step": 4100 }, { "epoch": 0.33, "learning_rate": 5e-05, "loss": 1.7145, "step": 4200 }, { "epoch": 0.34, "learning_rate": 5e-05, "loss": 1.7188, "step": 4300 }, { "epoch": 0.35, "learning_rate": 5e-05, "loss": 1.7173, "step": 4400 }, { "epoch": 0.35, "learning_rate": 5e-05, "loss": 1.7454, "step": 4500 }, { "epoch": 0.36, "learning_rate": 5e-05, "loss": 1.7071, "step": 4600 }, { "epoch": 0.37, "learning_rate": 5e-05, "loss": 1.7, "step": 4700 }, { "epoch": 0.38, "learning_rate": 5e-05, "loss": 1.7312, "step": 4800 }, { "epoch": 0.38, "learning_rate": 5e-05, "loss": 1.6972, "step": 4900 }, { "epoch": 0.39, "learning_rate": 5e-05, "loss": 1.717, "step": 5000 }, { "epoch": 0.4, "learning_rate": 5e-05, "loss": 1.7042, "step": 5100 }, { "epoch": 0.41, "learning_rate": 5e-05, "loss": 1.7075, "step": 5200 }, { "epoch": 0.42, "learning_rate": 5e-05, "loss": 1.7037, "step": 5300 }, { "epoch": 0.42, "learning_rate": 5e-05, "loss": 1.689, "step": 5400 }, { "epoch": 0.43, "learning_rate": 5e-05, "loss": 1.7099, "step": 5500 }, { "epoch": 0.44, "learning_rate": 5e-05, "loss": 1.7175, "step": 5600 }, { "epoch": 0.45, "learning_rate": 5e-05, "loss": 1.6906, "step": 5700 }, { "epoch": 0.45, "learning_rate": 5e-05, "loss": 1.7112, "step": 5800 }, { "epoch": 0.46, "learning_rate": 5e-05, "loss": 1.7183, "step": 5900 }, { "epoch": 0.47, "learning_rate": 5e-05, "loss": 1.7158, "step": 6000 }, { "epoch": 0.48, "learning_rate": 5e-05, "loss": 1.6972, "step": 6100 }, { "epoch": 0.49, "learning_rate": 5e-05, "loss": 1.6869, "step": 6200 }, { "epoch": 0.49, "learning_rate": 5e-05, "loss": 1.6896, "step": 6300 }, { "epoch": 0.5, "learning_rate": 5e-05, "loss": 1.6895, "step": 6400 }, { "epoch": 0.51, "learning_rate": 5e-05, "loss": 1.7044, "step": 6500 }, { "epoch": 0.52, "learning_rate": 5e-05, "loss": 1.7103, "step": 6600 }, { "epoch": 0.53, "learning_rate": 5e-05, "loss": 1.6788, "step": 6700 }, { "epoch": 0.53, "learning_rate": 5e-05, "loss": 1.7091, "step": 6800 }, { "epoch": 0.54, "learning_rate": 5e-05, "loss": 1.7046, "step": 6900 }, { "epoch": 0.55, "learning_rate": 5e-05, "loss": 1.6683, "step": 7000 }, { "epoch": 0.56, "learning_rate": 5e-05, "loss": 1.7084, "step": 7100 }, { "epoch": 0.56, "learning_rate": 5e-05, "loss": 1.6933, "step": 7200 }, { "epoch": 0.57, "learning_rate": 5e-05, "loss": 1.6533, "step": 7300 }, { "epoch": 0.58, "learning_rate": 5e-05, "loss": 1.6701, "step": 7400 }, { "epoch": 0.59, "learning_rate": 5e-05, "loss": 1.6786, "step": 7500 }, { "epoch": 0.6, "learning_rate": 5e-05, "loss": 1.6886, "step": 7600 }, { "epoch": 0.6, "learning_rate": 5e-05, "loss": 1.6805, "step": 7700 }, { "epoch": 0.61, "learning_rate": 5e-05, "loss": 1.6651, "step": 7800 }, { "epoch": 0.62, "learning_rate": 5e-05, "loss": 1.666, "step": 7900 }, { "epoch": 0.63, "learning_rate": 5e-05, "loss": 1.6671, "step": 8000 }, { "epoch": 0.64, "learning_rate": 5e-05, "loss": 1.6979, "step": 8100 }, { "epoch": 0.64, "learning_rate": 5e-05, "loss": 1.6823, "step": 8200 }, { "epoch": 0.65, "learning_rate": 5e-05, "loss": 1.6944, "step": 8300 }, { "epoch": 0.66, "learning_rate": 5e-05, "loss": 1.6571, "step": 8400 }, { "epoch": 0.67, "learning_rate": 5e-05, "loss": 1.7135, "step": 8500 }, { "epoch": 0.67, "learning_rate": 5e-05, "loss": 1.67, "step": 8600 }, { "epoch": 0.68, "learning_rate": 5e-05, "loss": 1.6908, "step": 8700 }, { "epoch": 0.69, "learning_rate": 5e-05, "loss": 1.665, "step": 8800 }, { "epoch": 0.7, "learning_rate": 5e-05, "loss": 1.6862, "step": 8900 }, { "epoch": 0.71, "learning_rate": 5e-05, "loss": 1.6903, "step": 9000 }, { "epoch": 0.71, "learning_rate": 5e-05, "loss": 1.6551, "step": 9100 }, { "epoch": 0.72, "learning_rate": 5e-05, "loss": 1.6588, "step": 9200 }, { "epoch": 0.73, "learning_rate": 5e-05, "loss": 1.6589, "step": 9300 }, { "epoch": 0.74, "learning_rate": 5e-05, "loss": 1.671, "step": 9400 }, { "epoch": 0.75, "learning_rate": 5e-05, "loss": 1.6702, "step": 9500 }, { "epoch": 0.75, "learning_rate": 5e-05, "loss": 1.6641, "step": 9600 }, { "epoch": 0.76, "learning_rate": 5e-05, "loss": 1.6681, "step": 9700 }, { "epoch": 0.77, "learning_rate": 5e-05, "loss": 1.6798, "step": 9800 }, { "epoch": 0.78, "learning_rate": 5e-05, "loss": 1.6762, "step": 9900 }, { "epoch": 0.78, "learning_rate": 5e-05, "loss": 1.6781, "step": 10000 }, { "epoch": 0.78, "eval_gen_len": 18.729231040875785, "eval_loss": 1.5352756977081299, "eval_rouge1": 38.2416, "eval_rouge2": 16.0401, "eval_rougeL": 31.3511, "eval_rougeLsum": 31.3586, "eval_runtime": 2357.8446, "eval_samples_per_second": 4.804, "eval_steps_per_second": 1.201, "step": 10000 }, { "epoch": 0.79, "learning_rate": 5e-05, "loss": 1.6725, "step": 10100 }, { "epoch": 0.8, "learning_rate": 5e-05, "loss": 1.6569, "step": 10200 }, { "epoch": 0.81, "learning_rate": 5e-05, "loss": 1.631, "step": 10300 }, { "epoch": 0.82, "learning_rate": 5e-05, "loss": 1.6549, "step": 10400 }, { "epoch": 0.82, "learning_rate": 5e-05, "loss": 1.6714, "step": 10500 }, { "epoch": 0.83, "learning_rate": 5e-05, "loss": 1.6827, "step": 10600 }, { "epoch": 0.84, "learning_rate": 5e-05, "loss": 1.6725, "step": 10700 }, { "epoch": 0.85, "learning_rate": 5e-05, "loss": 1.6517, "step": 10800 }, { "epoch": 0.85, "learning_rate": 5e-05, "loss": 1.647, "step": 10900 }, { "epoch": 0.86, "learning_rate": 5e-05, "loss": 1.6536, "step": 11000 }, { "epoch": 0.87, "learning_rate": 5e-05, "loss": 1.6545, "step": 11100 }, { "epoch": 0.88, "learning_rate": 5e-05, "loss": 1.6697, "step": 11200 }, { "epoch": 0.89, "learning_rate": 5e-05, "loss": 1.6329, "step": 11300 }, { "epoch": 0.89, "learning_rate": 5e-05, "loss": 1.6601, "step": 11400 }, { "epoch": 0.9, "learning_rate": 5e-05, "loss": 1.665, "step": 11500 }, { "epoch": 0.91, "learning_rate": 5e-05, "loss": 1.6462, "step": 11600 }, { "epoch": 0.92, "learning_rate": 5e-05, "loss": 1.6512, "step": 11700 }, { "epoch": 0.93, "learning_rate": 5e-05, "loss": 1.673, "step": 11800 }, { "epoch": 0.93, "learning_rate": 5e-05, "loss": 1.6818, "step": 11900 }, { "epoch": 0.94, "learning_rate": 5e-05, "loss": 1.6698, "step": 12000 }, { "epoch": 0.95, "learning_rate": 5e-05, "loss": 1.6772, "step": 12100 }, { "epoch": 0.96, "learning_rate": 5e-05, "loss": 1.6526, "step": 12200 }, { "epoch": 0.96, "learning_rate": 5e-05, "loss": 1.6561, "step": 12300 }, { "epoch": 0.97, "learning_rate": 5e-05, "loss": 1.6429, "step": 12400 }, { "epoch": 0.98, "learning_rate": 5e-05, "loss": 1.6517, "step": 12500 }, { "epoch": 0.99, "learning_rate": 5e-05, "loss": 1.6766, "step": 12600 }, { "epoch": 1.0, "learning_rate": 5e-05, "loss": 1.6708, "step": 12700 }, { "epoch": 1.0, "learning_rate": 5e-05, "loss": 1.6229, "step": 12800 }, { "epoch": 1.01, "learning_rate": 5e-05, "loss": 1.5406, "step": 12900 }, { "epoch": 1.02, "learning_rate": 5e-05, "loss": 1.5325, "step": 13000 }, { "epoch": 1.03, "learning_rate": 5e-05, "loss": 1.5196, "step": 13100 }, { "epoch": 1.04, "learning_rate": 5e-05, "loss": 1.5283, "step": 13200 }, { "epoch": 1.04, "learning_rate": 5e-05, "loss": 1.5463, "step": 13300 }, { "epoch": 1.05, "learning_rate": 5e-05, "loss": 1.5461, "step": 13400 }, { "epoch": 1.06, "learning_rate": 5e-05, "loss": 1.5496, "step": 13500 }, { "epoch": 1.07, "learning_rate": 5e-05, "loss": 1.5646, "step": 13600 }, { "epoch": 1.07, "learning_rate": 5e-05, "loss": 1.5527, "step": 13700 }, { "epoch": 1.08, "learning_rate": 5e-05, "loss": 1.551, "step": 13800 }, { "epoch": 1.09, "learning_rate": 5e-05, "loss": 1.5566, "step": 13900 }, { "epoch": 1.1, "learning_rate": 5e-05, "loss": 1.5514, "step": 14000 }, { "epoch": 1.11, "learning_rate": 5e-05, "loss": 1.5425, "step": 14100 }, { "epoch": 1.11, "learning_rate": 5e-05, "loss": 1.5321, "step": 14200 }, { "epoch": 1.12, "learning_rate": 5e-05, "loss": 1.5552, "step": 14300 }, { "epoch": 1.13, "learning_rate": 5e-05, "loss": 1.5316, "step": 14400 }, { "epoch": 1.14, "learning_rate": 5e-05, "loss": 1.5271, "step": 14500 }, { "epoch": 1.15, "learning_rate": 5e-05, "loss": 1.5331, "step": 14600 }, { "epoch": 1.15, "learning_rate": 5e-05, "loss": 1.5428, "step": 14700 }, { "epoch": 1.16, "learning_rate": 5e-05, "loss": 1.5534, "step": 14800 }, { "epoch": 1.17, "learning_rate": 5e-05, "loss": 1.5306, "step": 14900 }, { "epoch": 1.18, "learning_rate": 5e-05, "loss": 1.5468, "step": 15000 }, { "epoch": 1.18, "learning_rate": 5e-05, "loss": 1.529, "step": 15100 }, { "epoch": 1.19, "learning_rate": 5e-05, "loss": 1.5349, "step": 15200 }, { "epoch": 1.2, "learning_rate": 5e-05, "loss": 1.5596, "step": 15300 }, { "epoch": 1.21, "learning_rate": 5e-05, "loss": 1.5328, "step": 15400 }, { "epoch": 1.22, "learning_rate": 5e-05, "loss": 1.5495, "step": 15500 }, { "epoch": 1.22, "learning_rate": 5e-05, "loss": 1.5447, "step": 15600 }, { "epoch": 1.23, "learning_rate": 5e-05, "loss": 1.5442, "step": 15700 }, { "epoch": 1.24, "learning_rate": 5e-05, "loss": 1.5435, "step": 15800 }, { "epoch": 1.25, "learning_rate": 5e-05, "loss": 1.5588, "step": 15900 }, { "epoch": 1.25, "learning_rate": 5e-05, "loss": 1.5632, "step": 16000 }, { "epoch": 1.26, "learning_rate": 5e-05, "loss": 1.5504, "step": 16100 }, { "epoch": 1.27, "learning_rate": 5e-05, "loss": 1.5148, "step": 16200 }, { "epoch": 1.28, "learning_rate": 5e-05, "loss": 1.5523, "step": 16300 }, { "epoch": 1.29, "learning_rate": 5e-05, "loss": 1.544, "step": 16400 }, { "epoch": 1.29, "learning_rate": 5e-05, "loss": 1.5472, "step": 16500 }, { "epoch": 1.3, "learning_rate": 5e-05, "loss": 1.5294, "step": 16600 }, { "epoch": 1.31, "learning_rate": 5e-05, "loss": 1.5458, "step": 16700 }, { "epoch": 1.32, "learning_rate": 5e-05, "loss": 1.5579, "step": 16800 }, { "epoch": 1.33, "learning_rate": 5e-05, "loss": 1.5524, "step": 16900 }, { "epoch": 1.33, "learning_rate": 5e-05, "loss": 1.5455, "step": 17000 }, { "epoch": 1.34, "learning_rate": 5e-05, "loss": 1.5549, "step": 17100 }, { "epoch": 1.35, "learning_rate": 5e-05, "loss": 1.5457, "step": 17200 }, { "epoch": 1.36, "learning_rate": 5e-05, "loss": 1.5378, "step": 17300 }, { "epoch": 1.36, "learning_rate": 5e-05, "loss": 1.5422, "step": 17400 }, { "epoch": 1.37, "learning_rate": 5e-05, "loss": 1.5559, "step": 17500 }, { "epoch": 1.38, "learning_rate": 5e-05, "loss": 1.5463, "step": 17600 }, { "epoch": 1.39, "learning_rate": 5e-05, "loss": 1.5385, "step": 17700 }, { "epoch": 1.4, "learning_rate": 5e-05, "loss": 1.5487, "step": 17800 }, { "epoch": 1.4, "learning_rate": 5e-05, "loss": 1.5535, "step": 17900 }, { "epoch": 1.41, "learning_rate": 5e-05, "loss": 1.5348, "step": 18000 }, { "epoch": 1.42, "learning_rate": 5e-05, "loss": 1.5555, "step": 18100 }, { "epoch": 1.43, "learning_rate": 5e-05, "loss": 1.5378, "step": 18200 }, { "epoch": 1.44, "learning_rate": 5e-05, "loss": 1.5212, "step": 18300 }, { "epoch": 1.44, "learning_rate": 5e-05, "loss": 1.5559, "step": 18400 }, { "epoch": 1.45, "learning_rate": 5e-05, "loss": 1.5353, "step": 18500 }, { "epoch": 1.46, "learning_rate": 5e-05, "loss": 1.5461, "step": 18600 }, { "epoch": 1.47, "learning_rate": 5e-05, "loss": 1.5407, "step": 18700 }, { "epoch": 1.47, "learning_rate": 5e-05, "loss": 1.5359, "step": 18800 }, { "epoch": 1.48, "learning_rate": 5e-05, "loss": 1.5561, "step": 18900 }, { "epoch": 1.49, "learning_rate": 5e-05, "loss": 1.5274, "step": 19000 }, { "epoch": 1.5, "learning_rate": 5e-05, "loss": 1.5544, "step": 19100 }, { "epoch": 1.51, "learning_rate": 5e-05, "loss": 1.5407, "step": 19200 }, { "epoch": 1.51, "learning_rate": 5e-05, "loss": 1.5374, "step": 19300 }, { "epoch": 1.52, "learning_rate": 5e-05, "loss": 1.5572, "step": 19400 }, { "epoch": 1.53, "learning_rate": 5e-05, "loss": 1.5177, "step": 19500 }, { "epoch": 1.54, "learning_rate": 5e-05, "loss": 1.5504, "step": 19600 }, { "epoch": 1.54, "learning_rate": 5e-05, "loss": 1.5611, "step": 19700 }, { "epoch": 1.55, "learning_rate": 5e-05, "loss": 1.5362, "step": 19800 }, { "epoch": 1.56, "learning_rate": 5e-05, "loss": 1.5411, "step": 19900 }, { "epoch": 1.57, "learning_rate": 5e-05, "loss": 1.5473, "step": 20000 }, { "epoch": 1.57, "eval_gen_len": 18.753509314028427, "eval_loss": 1.5065853595733643, "eval_rouge1": 38.5975, "eval_rouge2": 16.6862, "eval_rougeL": 31.8033, "eval_rougeLsum": 31.8084, "eval_runtime": 2373.7069, "eval_samples_per_second": 4.772, "eval_steps_per_second": 1.193, "step": 20000 }, { "epoch": 1.58, "learning_rate": 5e-05, "loss": 1.5304, "step": 20100 }, { "epoch": 1.58, "learning_rate": 5e-05, "loss": 1.55, "step": 20200 }, { "epoch": 1.59, "learning_rate": 5e-05, "loss": 1.5375, "step": 20300 }, { "epoch": 1.6, "learning_rate": 5e-05, "loss": 1.5366, "step": 20400 }, { "epoch": 1.61, "learning_rate": 5e-05, "loss": 1.5254, "step": 20500 }, { "epoch": 1.62, "learning_rate": 5e-05, "loss": 1.5369, "step": 20600 }, { "epoch": 1.62, "learning_rate": 5e-05, "loss": 1.5455, "step": 20700 }, { "epoch": 1.63, "learning_rate": 5e-05, "loss": 1.5337, "step": 20800 }, { "epoch": 1.64, "learning_rate": 5e-05, "loss": 1.5411, "step": 20900 }, { "epoch": 1.65, "learning_rate": 5e-05, "loss": 1.5416, "step": 21000 }, { "epoch": 1.65, "learning_rate": 5e-05, "loss": 1.5477, "step": 21100 }, { "epoch": 1.66, "learning_rate": 5e-05, "loss": 1.5276, "step": 21200 }, { "epoch": 1.67, "learning_rate": 5e-05, "loss": 1.5643, "step": 21300 }, { "epoch": 1.68, "learning_rate": 5e-05, "loss": 1.558, "step": 21400 }, { "epoch": 1.69, "learning_rate": 5e-05, "loss": 1.5548, "step": 21500 }, { "epoch": 1.69, "learning_rate": 5e-05, "loss": 1.5486, "step": 21600 }, { "epoch": 1.7, "learning_rate": 5e-05, "loss": 1.551, "step": 21700 }, { "epoch": 1.71, "learning_rate": 5e-05, "loss": 1.5494, "step": 21800 }, { "epoch": 1.72, "learning_rate": 5e-05, "loss": 1.5526, "step": 21900 }, { "epoch": 1.73, "learning_rate": 5e-05, "loss": 1.5294, "step": 22000 }, { "epoch": 1.73, "learning_rate": 5e-05, "loss": 1.5288, "step": 22100 }, { "epoch": 1.74, "learning_rate": 5e-05, "loss": 1.5093, "step": 22200 }, { "epoch": 1.75, "learning_rate": 5e-05, "loss": 1.5276, "step": 22300 }, { "epoch": 1.76, "learning_rate": 5e-05, "loss": 1.5321, "step": 22400 }, { "epoch": 1.76, "learning_rate": 5e-05, "loss": 1.5655, "step": 22500 }, { "epoch": 1.77, "learning_rate": 5e-05, "loss": 1.5438, "step": 22600 }, { "epoch": 1.78, "learning_rate": 5e-05, "loss": 1.5507, "step": 22700 }, { "epoch": 1.79, "learning_rate": 5e-05, "loss": 1.5506, "step": 22800 }, { "epoch": 1.8, "learning_rate": 5e-05, "loss": 1.5377, "step": 22900 }, { "epoch": 1.8, "learning_rate": 5e-05, "loss": 1.5447, "step": 23000 }, { "epoch": 1.81, "learning_rate": 5e-05, "loss": 1.5361, "step": 23100 }, { "epoch": 1.82, "learning_rate": 5e-05, "loss": 1.5568, "step": 23200 }, { "epoch": 1.83, "learning_rate": 5e-05, "loss": 1.5498, "step": 23300 }, { "epoch": 1.84, "learning_rate": 5e-05, "loss": 1.5405, "step": 23400 }, { "epoch": 1.84, "learning_rate": 5e-05, "loss": 1.5198, "step": 23500 }, { "epoch": 1.85, "learning_rate": 5e-05, "loss": 1.5227, "step": 23600 }, { "epoch": 1.86, "learning_rate": 5e-05, "loss": 1.5163, "step": 23700 }, { "epoch": 1.87, "learning_rate": 5e-05, "loss": 1.5811, "step": 23800 }, { "epoch": 1.87, "learning_rate": 5e-05, "loss": 1.5303, "step": 23900 }, { "epoch": 1.88, "learning_rate": 5e-05, "loss": 1.5416, "step": 24000 }, { "epoch": 1.89, "learning_rate": 5e-05, "loss": 1.5296, "step": 24100 }, { "epoch": 1.9, "learning_rate": 5e-05, "loss": 1.5521, "step": 24200 }, { "epoch": 1.91, "learning_rate": 5e-05, "loss": 1.5387, "step": 24300 }, { "epoch": 1.91, "learning_rate": 5e-05, "loss": 1.5311, "step": 24400 }, { "epoch": 1.92, "learning_rate": 5e-05, "loss": 1.5539, "step": 24500 }, { "epoch": 1.93, "learning_rate": 5e-05, "loss": 1.53, "step": 24600 }, { "epoch": 1.94, "learning_rate": 5e-05, "loss": 1.5588, "step": 24700 }, { "epoch": 1.94, "learning_rate": 5e-05, "loss": 1.5402, "step": 24800 }, { "epoch": 1.95, "learning_rate": 5e-05, "loss": 1.5269, "step": 24900 }, { "epoch": 1.96, "learning_rate": 5e-05, "loss": 1.5407, "step": 25000 }, { "epoch": 1.97, "learning_rate": 5e-05, "loss": 1.5141, "step": 25100 }, { "epoch": 1.98, "learning_rate": 5e-05, "loss": 1.523, "step": 25200 }, { "epoch": 1.98, "learning_rate": 5e-05, "loss": 1.5112, "step": 25300 }, { "epoch": 1.99, "learning_rate": 5e-05, "loss": 1.5314, "step": 25400 }, { "epoch": 2.0, "learning_rate": 5e-05, "loss": 1.5345, "step": 25500 }, { "epoch": 2.01, "learning_rate": 5e-05, "loss": 1.4314, "step": 25600 }, { "epoch": 2.02, "learning_rate": 5e-05, "loss": 1.445, "step": 25700 }, { "epoch": 2.02, "learning_rate": 5e-05, "loss": 1.4215, "step": 25800 }, { "epoch": 2.03, "learning_rate": 5e-05, "loss": 1.4116, "step": 25900 }, { "epoch": 2.04, "learning_rate": 5e-05, "loss": 1.4264, "step": 26000 }, { "epoch": 2.05, "learning_rate": 5e-05, "loss": 1.4238, "step": 26100 }, { "epoch": 2.05, "learning_rate": 5e-05, "loss": 1.4172, "step": 26200 }, { "epoch": 2.06, "learning_rate": 5e-05, "loss": 1.4319, "step": 26300 }, { "epoch": 2.07, "learning_rate": 5e-05, "loss": 1.4174, "step": 26400 }, { "epoch": 2.08, "learning_rate": 5e-05, "loss": 1.4293, "step": 26500 }, { "epoch": 2.09, "learning_rate": 5e-05, "loss": 1.4393, "step": 26600 }, { "epoch": 2.09, "learning_rate": 5e-05, "loss": 1.4474, "step": 26700 }, { "epoch": 2.1, "learning_rate": 5e-05, "loss": 1.4227, "step": 26800 }, { "epoch": 2.11, "learning_rate": 5e-05, "loss": 1.4232, "step": 26900 }, { "epoch": 2.12, "learning_rate": 5e-05, "loss": 1.4294, "step": 27000 }, { "epoch": 2.13, "learning_rate": 5e-05, "loss": 1.4118, "step": 27100 }, { "epoch": 2.13, "learning_rate": 5e-05, "loss": 1.4277, "step": 27200 }, { "epoch": 2.14, "learning_rate": 5e-05, "loss": 1.4171, "step": 27300 }, { "epoch": 2.15, "learning_rate": 5e-05, "loss": 1.4413, "step": 27400 }, { "epoch": 2.16, "learning_rate": 5e-05, "loss": 1.4119, "step": 27500 }, { "epoch": 2.16, "learning_rate": 5e-05, "loss": 1.3893, "step": 27600 }, { "epoch": 2.17, "learning_rate": 5e-05, "loss": 1.4255, "step": 27700 }, { "epoch": 2.18, "learning_rate": 5e-05, "loss": 1.4221, "step": 27800 }, { "epoch": 2.19, "learning_rate": 5e-05, "loss": 1.4339, "step": 27900 }, { "epoch": 2.2, "learning_rate": 5e-05, "loss": 1.4394, "step": 28000 }, { "epoch": 2.2, "learning_rate": 5e-05, "loss": 1.4249, "step": 28100 }, { "epoch": 2.21, "learning_rate": 5e-05, "loss": 1.4458, "step": 28200 }, { "epoch": 2.22, "learning_rate": 5e-05, "loss": 1.4323, "step": 28300 }, { "epoch": 2.23, "learning_rate": 5e-05, "loss": 1.4167, "step": 28400 }, { "epoch": 2.24, "learning_rate": 5e-05, "loss": 1.431, "step": 28500 }, { "epoch": 2.24, "learning_rate": 5e-05, "loss": 1.4568, "step": 28600 }, { "epoch": 2.25, "learning_rate": 5e-05, "loss": 1.4716, "step": 28700 }, { "epoch": 2.26, "learning_rate": 5e-05, "loss": 1.4147, "step": 28800 }, { "epoch": 2.27, "learning_rate": 5e-05, "loss": 1.427, "step": 28900 }, { "epoch": 2.27, "learning_rate": 5e-05, "loss": 1.4643, "step": 29000 }, { "epoch": 2.28, "learning_rate": 5e-05, "loss": 1.4436, "step": 29100 }, { "epoch": 2.29, "learning_rate": 5e-05, "loss": 1.4321, "step": 29200 }, { "epoch": 2.3, "learning_rate": 5e-05, "loss": 1.4464, "step": 29300 }, { "epoch": 2.31, "learning_rate": 5e-05, "loss": 1.4052, "step": 29400 }, { "epoch": 2.31, "learning_rate": 5e-05, "loss": 1.4278, "step": 29500 }, { "epoch": 2.32, "learning_rate": 5e-05, "loss": 1.401, "step": 29600 }, { "epoch": 2.33, "learning_rate": 5e-05, "loss": 1.4458, "step": 29700 }, { "epoch": 2.34, "learning_rate": 5e-05, "loss": 1.4517, "step": 29800 }, { "epoch": 2.34, "learning_rate": 5e-05, "loss": 1.4541, "step": 29900 }, { "epoch": 2.35, "learning_rate": 5e-05, "loss": 1.4334, "step": 30000 }, { "epoch": 2.35, "eval_gen_len": 18.717400900503222, "eval_loss": 1.4981415271759033, "eval_rouge1": 39.0553, "eval_rouge2": 17.2294, "eval_rougeL": 32.3778, "eval_rougeLsum": 32.3837, "eval_runtime": 2349.3062, "eval_samples_per_second": 4.821, "eval_steps_per_second": 1.205, "step": 30000 }, { "epoch": 2.36, "learning_rate": 5e-05, "loss": 1.4209, "step": 30100 }, { "epoch": 2.37, "learning_rate": 5e-05, "loss": 1.401, "step": 30200 }, { "epoch": 2.38, "learning_rate": 5e-05, "loss": 1.4375, "step": 30300 }, { "epoch": 2.38, "learning_rate": 5e-05, "loss": 1.4383, "step": 30400 }, { "epoch": 2.39, "learning_rate": 5e-05, "loss": 1.4378, "step": 30500 }, { "epoch": 2.4, "learning_rate": 5e-05, "loss": 1.444, "step": 30600 }, { "epoch": 2.41, "learning_rate": 5e-05, "loss": 1.432, "step": 30700 }, { "epoch": 2.42, "learning_rate": 5e-05, "loss": 1.4397, "step": 30800 }, { "epoch": 2.42, "learning_rate": 5e-05, "loss": 1.4196, "step": 30900 }, { "epoch": 2.43, "learning_rate": 5e-05, "loss": 1.4123, "step": 31000 }, { "epoch": 2.44, "learning_rate": 5e-05, "loss": 1.4194, "step": 31100 }, { "epoch": 2.45, "learning_rate": 5e-05, "loss": 1.4466, "step": 31200 }, { "epoch": 2.45, "learning_rate": 5e-05, "loss": 1.4358, "step": 31300 }, { "epoch": 2.46, "learning_rate": 5e-05, "loss": 1.4641, "step": 31400 }, { "epoch": 2.47, "learning_rate": 5e-05, "loss": 1.4346, "step": 31500 }, { "epoch": 2.48, "learning_rate": 5e-05, "loss": 1.4372, "step": 31600 }, { "epoch": 2.49, "learning_rate": 5e-05, "loss": 1.4128, "step": 31700 }, { "epoch": 2.49, "learning_rate": 5e-05, "loss": 1.4267, "step": 31800 }, { "epoch": 2.5, "learning_rate": 5e-05, "loss": 1.4233, "step": 31900 }, { "epoch": 2.51, "learning_rate": 5e-05, "loss": 1.4357, "step": 32000 }, { "epoch": 2.52, "learning_rate": 5e-05, "loss": 1.4127, "step": 32100 }, { "epoch": 2.53, "learning_rate": 5e-05, "loss": 1.4324, "step": 32200 }, { "epoch": 2.53, "learning_rate": 5e-05, "loss": 1.4501, "step": 32300 }, { "epoch": 2.54, "learning_rate": 5e-05, "loss": 1.426, "step": 32400 }, { "epoch": 2.55, "learning_rate": 5e-05, "loss": 1.445, "step": 32500 }, { "epoch": 2.56, "learning_rate": 5e-05, "loss": 1.4226, "step": 32600 }, { "epoch": 2.56, "learning_rate": 5e-05, "loss": 1.4569, "step": 32700 }, { "epoch": 2.57, "learning_rate": 5e-05, "loss": 1.4501, "step": 32800 }, { "epoch": 2.58, "learning_rate": 5e-05, "loss": 1.4113, "step": 32900 }, { "epoch": 2.59, "learning_rate": 5e-05, "loss": 1.4629, "step": 33000 }, { "epoch": 2.6, "learning_rate": 5e-05, "loss": 1.4288, "step": 33100 }, { "epoch": 2.6, "learning_rate": 5e-05, "loss": 1.4311, "step": 33200 }, { "epoch": 2.61, "learning_rate": 5e-05, "loss": 1.4409, "step": 33300 }, { "epoch": 2.62, "learning_rate": 5e-05, "loss": 1.4597, "step": 33400 }, { "epoch": 2.63, "learning_rate": 5e-05, "loss": 1.4401, "step": 33500 }, { "epoch": 2.64, "learning_rate": 5e-05, "loss": 1.4155, "step": 33600 }, { "epoch": 2.64, "learning_rate": 5e-05, "loss": 1.438, "step": 33700 }, { "epoch": 2.65, "learning_rate": 5e-05, "loss": 1.4159, "step": 33800 }, { "epoch": 2.66, "learning_rate": 5e-05, "loss": 1.4377, "step": 33900 }, { "epoch": 2.67, "learning_rate": 5e-05, "loss": 1.4474, "step": 34000 }, { "epoch": 2.67, "learning_rate": 5e-05, "loss": 1.4338, "step": 34100 }, { "epoch": 2.68, "learning_rate": 5e-05, "loss": 1.4747, "step": 34200 }, { "epoch": 2.69, "learning_rate": 5e-05, "loss": 1.475, "step": 34300 }, { "epoch": 2.7, "learning_rate": 5e-05, "loss": 1.4491, "step": 34400 }, { "epoch": 2.71, "learning_rate": 5e-05, "loss": 1.4372, "step": 34500 }, { "epoch": 2.71, "learning_rate": 5e-05, "loss": 1.455, "step": 34600 }, { "epoch": 2.72, "learning_rate": 5e-05, "loss": 1.4535, "step": 34700 }, { "epoch": 2.73, "learning_rate": 5e-05, "loss": 1.4649, "step": 34800 }, { "epoch": 2.74, "learning_rate": 5e-05, "loss": 1.457, "step": 34900 }, { "epoch": 2.74, "learning_rate": 5e-05, "loss": 1.4422, "step": 35000 }, { "epoch": 2.75, "learning_rate": 5e-05, "loss": 1.431, "step": 35100 }, { "epoch": 2.76, "learning_rate": 5e-05, "loss": 1.4279, "step": 35200 }, { "epoch": 2.77, "learning_rate": 5e-05, "loss": 1.4487, "step": 35300 }, { "epoch": 2.78, "learning_rate": 5e-05, "loss": 1.456, "step": 35400 }, { "epoch": 2.78, "learning_rate": 5e-05, "loss": 1.4208, "step": 35500 }, { "epoch": 2.79, "learning_rate": 5e-05, "loss": 1.4449, "step": 35600 }, { "epoch": 2.8, "learning_rate": 5e-05, "loss": 1.4324, "step": 35700 }, { "epoch": 2.81, "learning_rate": 5e-05, "loss": 1.4399, "step": 35800 }, { "epoch": 2.82, "learning_rate": 5e-05, "loss": 1.4257, "step": 35900 }, { "epoch": 2.82, "learning_rate": 5e-05, "loss": 1.457, "step": 36000 }, { "epoch": 2.83, "learning_rate": 5e-05, "loss": 1.4549, "step": 36100 }, { "epoch": 2.84, "learning_rate": 5e-05, "loss": 1.4525, "step": 36200 }, { "epoch": 2.85, "learning_rate": 5e-05, "loss": 1.4587, "step": 36300 }, { "epoch": 2.85, "learning_rate": 5e-05, "loss": 1.4319, "step": 36400 }, { "epoch": 2.86, "learning_rate": 5e-05, "loss": 1.4267, "step": 36500 }, { "epoch": 2.87, "learning_rate": 5e-05, "loss": 1.4644, "step": 36600 }, { "epoch": 2.88, "learning_rate": 5e-05, "loss": 1.4477, "step": 36700 }, { "epoch": 2.89, "learning_rate": 5e-05, "loss": 1.4342, "step": 36800 }, { "epoch": 2.89, "learning_rate": 5e-05, "loss": 1.4348, "step": 36900 }, { "epoch": 2.9, "learning_rate": 5e-05, "loss": 1.4384, "step": 37000 }, { "epoch": 2.91, "learning_rate": 5e-05, "loss": 1.4432, "step": 37100 }, { "epoch": 2.92, "learning_rate": 5e-05, "loss": 1.4382, "step": 37200 }, { "epoch": 2.93, "learning_rate": 5e-05, "loss": 1.4454, "step": 37300 }, { "epoch": 2.93, "learning_rate": 5e-05, "loss": 1.4196, "step": 37400 }, { "epoch": 2.94, "learning_rate": 5e-05, "loss": 1.4476, "step": 37500 }, { "epoch": 2.95, "learning_rate": 5e-05, "loss": 1.4554, "step": 37600 }, { "epoch": 2.96, "learning_rate": 5e-05, "loss": 1.4315, "step": 37700 }, { "epoch": 2.96, "learning_rate": 5e-05, "loss": 1.4471, "step": 37800 }, { "epoch": 2.97, "learning_rate": 5e-05, "loss": 1.4523, "step": 37900 }, { "epoch": 2.98, "learning_rate": 5e-05, "loss": 1.4562, "step": 38000 }, { "epoch": 2.99, "learning_rate": 5e-05, "loss": 1.4413, "step": 38100 }, { "epoch": 3.0, "learning_rate": 5e-05, "loss": 1.4608, "step": 38200 }, { "epoch": 3.0, "learning_rate": 5e-05, "loss": 1.4121, "step": 38300 }, { "epoch": 3.01, "learning_rate": 5e-05, "loss": 1.3319, "step": 38400 }, { "epoch": 3.02, "learning_rate": 5e-05, "loss": 1.3408, "step": 38500 }, { "epoch": 3.03, "learning_rate": 5e-05, "loss": 1.3191, "step": 38600 }, { "epoch": 3.04, "learning_rate": 5e-05, "loss": 1.326, "step": 38700 }, { "epoch": 3.04, "learning_rate": 5e-05, "loss": 1.3219, "step": 38800 }, { "epoch": 3.05, "learning_rate": 5e-05, "loss": 1.333, "step": 38900 }, { "epoch": 3.06, "learning_rate": 5e-05, "loss": 1.3286, "step": 39000 }, { "epoch": 3.07, "learning_rate": 5e-05, "loss": 1.3439, "step": 39100 }, { "epoch": 3.07, "learning_rate": 5e-05, "loss": 1.3256, "step": 39200 }, { "epoch": 3.08, "learning_rate": 5e-05, "loss": 1.3363, "step": 39300 }, { "epoch": 3.09, "learning_rate": 5e-05, "loss": 1.3229, "step": 39400 }, { "epoch": 3.1, "learning_rate": 5e-05, "loss": 1.3285, "step": 39500 }, { "epoch": 3.11, "learning_rate": 5e-05, "loss": 1.3242, "step": 39600 }, { "epoch": 3.11, "learning_rate": 5e-05, "loss": 1.329, "step": 39700 }, { "epoch": 3.12, "learning_rate": 5e-05, "loss": 1.3486, "step": 39800 }, { "epoch": 3.13, "learning_rate": 5e-05, "loss": 1.3239, "step": 39900 }, { "epoch": 3.14, "learning_rate": 5e-05, "loss": 1.3582, "step": 40000 }, { "epoch": 3.14, "eval_gen_len": 18.757482122362497, "eval_loss": 1.4984853267669678, "eval_rouge1": 39.0472, "eval_rouge2": 17.2043, "eval_rougeL": 32.2958, "eval_rougeLsum": 32.2919, "eval_runtime": 2351.4068, "eval_samples_per_second": 4.817, "eval_steps_per_second": 1.204, "step": 40000 }, { "epoch": 3.14, "learning_rate": 5e-05, "loss": 1.348, "step": 40100 }, { "epoch": 3.15, "learning_rate": 5e-05, "loss": 1.3358, "step": 40200 }, { "epoch": 3.16, "learning_rate": 5e-05, "loss": 1.3307, "step": 40300 }, { "epoch": 3.17, "learning_rate": 5e-05, "loss": 1.3387, "step": 40400 }, { "epoch": 3.18, "learning_rate": 5e-05, "loss": 1.3412, "step": 40500 }, { "epoch": 3.18, "learning_rate": 5e-05, "loss": 1.3391, "step": 40600 }, { "epoch": 3.19, "learning_rate": 5e-05, "loss": 1.367, "step": 40700 }, { "epoch": 3.2, "learning_rate": 5e-05, "loss": 1.3217, "step": 40800 }, { "epoch": 3.21, "learning_rate": 5e-05, "loss": 1.3407, "step": 40900 }, { "epoch": 3.22, "learning_rate": 5e-05, "loss": 1.3321, "step": 41000 }, { "epoch": 3.22, "learning_rate": 5e-05, "loss": 1.3565, "step": 41100 }, { "epoch": 3.23, "learning_rate": 5e-05, "loss": 1.33, "step": 41200 }, { "epoch": 3.24, "learning_rate": 5e-05, "loss": 1.3402, "step": 41300 }, { "epoch": 3.25, "learning_rate": 5e-05, "loss": 1.3641, "step": 41400 }, { "epoch": 3.25, "learning_rate": 5e-05, "loss": 1.3589, "step": 41500 }, { "epoch": 3.26, "learning_rate": 5e-05, "loss": 1.329, "step": 41600 }, { "epoch": 3.27, "learning_rate": 5e-05, "loss": 1.3262, "step": 41700 }, { "epoch": 3.28, "learning_rate": 5e-05, "loss": 1.3384, "step": 41800 }, { "epoch": 3.29, "learning_rate": 5e-05, "loss": 1.3475, "step": 41900 }, { "epoch": 3.29, "learning_rate": 5e-05, "loss": 1.3532, "step": 42000 }, { "epoch": 3.3, "learning_rate": 5e-05, "loss": 1.3395, "step": 42100 }, { "epoch": 3.31, "learning_rate": 5e-05, "loss": 1.3357, "step": 42200 }, { "epoch": 3.32, "learning_rate": 5e-05, "loss": 1.3364, "step": 42300 }, { "epoch": 3.33, "learning_rate": 5e-05, "loss": 1.351, "step": 42400 }, { "epoch": 3.33, "learning_rate": 5e-05, "loss": 1.3656, "step": 42500 }, { "epoch": 3.34, "learning_rate": 5e-05, "loss": 1.3651, "step": 42600 }, { "epoch": 3.35, "learning_rate": 5e-05, "loss": 1.3317, "step": 42700 }, { "epoch": 3.36, "learning_rate": 5e-05, "loss": 1.3492, "step": 42800 }, { "epoch": 3.36, "learning_rate": 5e-05, "loss": 1.3497, "step": 42900 }, { "epoch": 3.37, "learning_rate": 5e-05, "loss": 1.3523, "step": 43000 }, { "epoch": 3.38, "learning_rate": 5e-05, "loss": 1.3484, "step": 43100 }, { "epoch": 3.39, "learning_rate": 5e-05, "loss": 1.3495, "step": 43200 }, { "epoch": 3.4, "learning_rate": 5e-05, "loss": 1.3633, "step": 43300 }, { "epoch": 3.4, "learning_rate": 5e-05, "loss": 1.3578, "step": 43400 }, { "epoch": 3.41, "learning_rate": 5e-05, "loss": 1.334, "step": 43500 }, { "epoch": 3.42, "learning_rate": 5e-05, "loss": 1.3591, "step": 43600 }, { "epoch": 3.43, "learning_rate": 5e-05, "loss": 1.334, "step": 43700 }, { "epoch": 3.44, "learning_rate": 5e-05, "loss": 1.3484, "step": 43800 }, { "epoch": 3.44, "learning_rate": 5e-05, "loss": 1.3557, "step": 43900 }, { "epoch": 3.45, "learning_rate": 5e-05, "loss": 1.3453, "step": 44000 }, { "epoch": 3.46, "learning_rate": 5e-05, "loss": 1.342, "step": 44100 }, { "epoch": 3.47, "learning_rate": 5e-05, "loss": 1.3449, "step": 44200 }, { "epoch": 3.47, "learning_rate": 5e-05, "loss": 1.3392, "step": 44300 }, { "epoch": 3.48, "learning_rate": 5e-05, "loss": 1.3421, "step": 44400 }, { "epoch": 3.49, "learning_rate": 5e-05, "loss": 1.3444, "step": 44500 }, { "epoch": 3.5, "learning_rate": 5e-05, "loss": 1.3492, "step": 44600 }, { "epoch": 3.51, "learning_rate": 5e-05, "loss": 1.3575, "step": 44700 }, { "epoch": 3.51, "learning_rate": 5e-05, "loss": 1.3487, "step": 44800 }, { "epoch": 3.52, "learning_rate": 5e-05, "loss": 1.3432, "step": 44900 }, { "epoch": 3.53, "learning_rate": 5e-05, "loss": 1.3119, "step": 45000 }, { "epoch": 3.54, "learning_rate": 5e-05, "loss": 1.3624, "step": 45100 }, { "epoch": 3.54, "learning_rate": 5e-05, "loss": 1.3454, "step": 45200 }, { "epoch": 3.55, "learning_rate": 5e-05, "loss": 1.3507, "step": 45300 }, { "epoch": 3.56, "learning_rate": 5e-05, "loss": 1.3481, "step": 45400 }, { "epoch": 3.57, "learning_rate": 5e-05, "loss": 1.3822, "step": 45500 }, { "epoch": 3.58, "learning_rate": 5e-05, "loss": 1.3479, "step": 45600 }, { "epoch": 3.58, "learning_rate": 5e-05, "loss": 1.3556, "step": 45700 }, { "epoch": 3.59, "learning_rate": 5e-05, "loss": 1.3584, "step": 45800 }, { "epoch": 3.6, "learning_rate": 5e-05, "loss": 1.3583, "step": 45900 }, { "epoch": 3.61, "learning_rate": 5e-05, "loss": 1.3644, "step": 46000 }, { "epoch": 3.62, "learning_rate": 5e-05, "loss": 1.3376, "step": 46100 }, { "epoch": 3.62, "learning_rate": 5e-05, "loss": 1.3345, "step": 46200 }, { "epoch": 3.63, "learning_rate": 5e-05, "loss": 1.3796, "step": 46300 }, { "epoch": 3.64, "learning_rate": 5e-05, "loss": 1.3425, "step": 46400 }, { "epoch": 3.65, "learning_rate": 5e-05, "loss": 1.3434, "step": 46500 }, { "epoch": 3.65, "learning_rate": 5e-05, "loss": 1.382, "step": 46600 }, { "epoch": 3.66, "learning_rate": 5e-05, "loss": 1.3829, "step": 46700 }, { "epoch": 3.67, "learning_rate": 5e-05, "loss": 1.3671, "step": 46800 }, { "epoch": 3.68, "learning_rate": 5e-05, "loss": 1.3561, "step": 46900 }, { "epoch": 3.69, "learning_rate": 5e-05, "loss": 1.3528, "step": 47000 }, { "epoch": 3.69, "learning_rate": 5e-05, "loss": 1.3359, "step": 47100 }, { "epoch": 3.7, "learning_rate": 5e-05, "loss": 1.3458, "step": 47200 }, { "epoch": 3.71, "learning_rate": 5e-05, "loss": 1.3547, "step": 47300 }, { "epoch": 3.72, "learning_rate": 5e-05, "loss": 1.3579, "step": 47400 }, { "epoch": 3.73, "learning_rate": 5e-05, "loss": 1.3383, "step": 47500 }, { "epoch": 3.73, "learning_rate": 5e-05, "loss": 1.3555, "step": 47600 }, { "epoch": 3.74, "learning_rate": 5e-05, "loss": 1.3702, "step": 47700 }, { "epoch": 3.75, "learning_rate": 5e-05, "loss": 1.3472, "step": 47800 }, { "epoch": 3.76, "learning_rate": 5e-05, "loss": 1.3567, "step": 47900 }, { "epoch": 3.76, "learning_rate": 5e-05, "loss": 1.3464, "step": 48000 }, { "epoch": 3.77, "learning_rate": 5e-05, "loss": 1.3709, "step": 48100 }, { "epoch": 3.78, "learning_rate": 5e-05, "loss": 1.3677, "step": 48200 }, { "epoch": 3.79, "learning_rate": 5e-05, "loss": 1.3653, "step": 48300 }, { "epoch": 3.8, "learning_rate": 5e-05, "loss": 1.3726, "step": 48400 }, { "epoch": 3.8, "learning_rate": 5e-05, "loss": 1.3684, "step": 48500 }, { "epoch": 3.81, "learning_rate": 5e-05, "loss": 1.3459, "step": 48600 }, { "epoch": 3.82, "learning_rate": 5e-05, "loss": 1.3812, "step": 48700 }, { "epoch": 3.83, "learning_rate": 5e-05, "loss": 1.3432, "step": 48800 }, { "epoch": 3.83, "learning_rate": 5e-05, "loss": 1.3573, "step": 48900 }, { "epoch": 3.84, "learning_rate": 5e-05, "loss": 1.363, "step": 49000 }, { "epoch": 3.85, "learning_rate": 5e-05, "loss": 1.3744, "step": 49100 }, { "epoch": 3.86, "learning_rate": 5e-05, "loss": 1.3751, "step": 49200 }, { "epoch": 3.87, "learning_rate": 5e-05, "loss": 1.3629, "step": 49300 }, { "epoch": 3.87, "learning_rate": 5e-05, "loss": 1.3549, "step": 49400 }, { "epoch": 3.88, "learning_rate": 5e-05, "loss": 1.3704, "step": 49500 }, { "epoch": 3.89, "learning_rate": 5e-05, "loss": 1.3743, "step": 49600 }, { "epoch": 3.9, "learning_rate": 5e-05, "loss": 1.3361, "step": 49700 }, { "epoch": 3.91, "learning_rate": 5e-05, "loss": 1.3684, "step": 49800 }, { "epoch": 3.91, "learning_rate": 5e-05, "loss": 1.3545, "step": 49900 }, { "epoch": 3.92, "learning_rate": 5e-05, "loss": 1.3716, "step": 50000 }, { "epoch": 3.92, "eval_gen_len": 18.721197139578, "eval_loss": 1.4834269285202026, "eval_rouge1": 39.5208, "eval_rouge2": 17.6414, "eval_rougeL": 32.8518, "eval_rougeLsum": 32.8448, "eval_runtime": 2347.5327, "eval_samples_per_second": 4.825, "eval_steps_per_second": 1.206, "step": 50000 }, { "epoch": 3.93, "learning_rate": 5e-05, "loss": 1.355, "step": 50100 }, { "epoch": 3.94, "learning_rate": 5e-05, "loss": 1.3659, "step": 50200 }, { "epoch": 3.94, "learning_rate": 5e-05, "loss": 1.3819, "step": 50300 }, { "epoch": 3.95, "learning_rate": 5e-05, "loss": 1.3587, "step": 50400 }, { "epoch": 3.96, "learning_rate": 5e-05, "loss": 1.3802, "step": 50500 }, { "epoch": 3.97, "learning_rate": 5e-05, "loss": 1.3681, "step": 50600 }, { "epoch": 3.98, "learning_rate": 5e-05, "loss": 1.3567, "step": 50700 }, { "epoch": 3.98, "learning_rate": 5e-05, "loss": 1.3517, "step": 50800 }, { "epoch": 3.99, "learning_rate": 5e-05, "loss": 1.3769, "step": 50900 }, { "epoch": 4.0, "learning_rate": 5e-05, "loss": 1.3688, "step": 51000 }, { "epoch": 4.01, "learning_rate": 5e-05, "loss": 1.2497, "step": 51100 }, { "epoch": 4.02, "learning_rate": 5e-05, "loss": 1.2667, "step": 51200 }, { "epoch": 4.02, "learning_rate": 5e-05, "loss": 1.2817, "step": 51300 }, { "epoch": 4.03, "learning_rate": 5e-05, "loss": 1.2461, "step": 51400 }, { "epoch": 4.04, "learning_rate": 5e-05, "loss": 1.2406, "step": 51500 }, { "epoch": 4.05, "learning_rate": 5e-05, "loss": 1.2409, "step": 51600 }, { "epoch": 4.05, "learning_rate": 5e-05, "loss": 1.2674, "step": 51700 }, { "epoch": 4.06, "learning_rate": 5e-05, "loss": 1.2651, "step": 51800 }, { "epoch": 4.07, "learning_rate": 5e-05, "loss": 1.2241, "step": 51900 }, { "epoch": 4.08, "learning_rate": 5e-05, "loss": 1.2928, "step": 52000 }, { "epoch": 4.09, "learning_rate": 5e-05, "loss": 1.2599, "step": 52100 }, { "epoch": 4.09, "learning_rate": 5e-05, "loss": 1.2408, "step": 52200 }, { "epoch": 4.1, "learning_rate": 5e-05, "loss": 1.2664, "step": 52300 }, { "epoch": 4.11, "learning_rate": 5e-05, "loss": 1.2434, "step": 52400 }, { "epoch": 4.12, "learning_rate": 5e-05, "loss": 1.2654, "step": 52500 }, { "epoch": 4.13, "learning_rate": 5e-05, "loss": 1.2663, "step": 52600 }, { "epoch": 4.13, "learning_rate": 5e-05, "loss": 1.2511, "step": 52700 }, { "epoch": 4.14, "learning_rate": 5e-05, "loss": 1.2763, "step": 52800 }, { "epoch": 4.15, "learning_rate": 5e-05, "loss": 1.2623, "step": 52900 }, { "epoch": 4.16, "learning_rate": 5e-05, "loss": 1.2667, "step": 53000 }, { "epoch": 4.16, "learning_rate": 5e-05, "loss": 1.2627, "step": 53100 }, { "epoch": 4.17, "learning_rate": 5e-05, "loss": 1.2648, "step": 53200 }, { "epoch": 4.18, "learning_rate": 5e-05, "loss": 1.2609, "step": 53300 }, { "epoch": 4.19, "learning_rate": 5e-05, "loss": 1.2636, "step": 53400 }, { "epoch": 4.2, "learning_rate": 5e-05, "loss": 1.2592, "step": 53500 }, { "epoch": 4.2, "learning_rate": 5e-05, "loss": 1.2517, "step": 53600 }, { "epoch": 4.21, "learning_rate": 5e-05, "loss": 1.2881, "step": 53700 }, { "epoch": 4.22, "learning_rate": 5e-05, "loss": 1.254, "step": 53800 }, { "epoch": 4.23, "learning_rate": 5e-05, "loss": 1.2749, "step": 53900 }, { "epoch": 4.23, "learning_rate": 5e-05, "loss": 1.2544, "step": 54000 }, { "epoch": 4.24, "learning_rate": 5e-05, "loss": 1.2681, "step": 54100 }, { "epoch": 4.25, "learning_rate": 5e-05, "loss": 1.2929, "step": 54200 }, { "epoch": 4.26, "learning_rate": 5e-05, "loss": 1.273, "step": 54300 }, { "epoch": 4.27, "learning_rate": 5e-05, "loss": 1.2709, "step": 54400 }, { "epoch": 4.27, "learning_rate": 5e-05, "loss": 1.2504, "step": 54500 }, { "epoch": 4.28, "learning_rate": 5e-05, "loss": 1.2867, "step": 54600 }, { "epoch": 4.29, "learning_rate": 5e-05, "loss": 1.2771, "step": 54700 }, { "epoch": 4.3, "learning_rate": 5e-05, "loss": 1.257, "step": 54800 }, { "epoch": 4.31, "learning_rate": 5e-05, "loss": 1.287, "step": 54900 }, { "epoch": 4.31, "learning_rate": 5e-05, "loss": 1.2629, "step": 55000 }, { "epoch": 4.32, "learning_rate": 5e-05, "loss": 1.2612, "step": 55100 }, { "epoch": 4.33, "learning_rate": 5e-05, "loss": 1.2433, "step": 55200 }, { "epoch": 4.34, "learning_rate": 5e-05, "loss": 1.2655, "step": 55300 }, { "epoch": 4.34, "learning_rate": 5e-05, "loss": 1.2762, "step": 55400 }, { "epoch": 4.35, "learning_rate": 5e-05, "loss": 1.2714, "step": 55500 }, { "epoch": 4.36, "learning_rate": 5e-05, "loss": 1.2821, "step": 55600 }, { "epoch": 4.37, "learning_rate": 5e-05, "loss": 1.265, "step": 55700 }, { "epoch": 4.38, "learning_rate": 5e-05, "loss": 1.2547, "step": 55800 }, { "epoch": 4.38, "learning_rate": 5e-05, "loss": 1.2555, "step": 55900 }, { "epoch": 4.39, "learning_rate": 5e-05, "loss": 1.2784, "step": 56000 }, { "epoch": 4.4, "learning_rate": 5e-05, "loss": 1.2729, "step": 56100 }, { "epoch": 4.41, "learning_rate": 5e-05, "loss": 1.2978, "step": 56200 }, { "epoch": 4.42, "learning_rate": 5e-05, "loss": 1.2682, "step": 56300 }, { "epoch": 4.42, "learning_rate": 5e-05, "loss": 1.2884, "step": 56400 }, { "epoch": 4.43, "learning_rate": 5e-05, "loss": 1.2571, "step": 56500 }, { "epoch": 4.44, "learning_rate": 5e-05, "loss": 1.2852, "step": 56600 }, { "epoch": 4.45, "learning_rate": 5e-05, "loss": 1.2858, "step": 56700 }, { "epoch": 4.45, "learning_rate": 5e-05, "loss": 1.2736, "step": 56800 }, { "epoch": 4.46, "learning_rate": 5e-05, "loss": 1.2969, "step": 56900 }, { "epoch": 4.47, "learning_rate": 5e-05, "loss": 1.2767, "step": 57000 }, { "epoch": 4.48, "learning_rate": 5e-05, "loss": 1.2777, "step": 57100 }, { "epoch": 4.49, "learning_rate": 5e-05, "loss": 1.2589, "step": 57200 }, { "epoch": 4.49, "learning_rate": 5e-05, "loss": 1.2571, "step": 57300 }, { "epoch": 4.5, "learning_rate": 5e-05, "loss": 1.2631, "step": 57400 }, { "epoch": 4.51, "learning_rate": 5e-05, "loss": 1.2696, "step": 57500 }, { "epoch": 4.52, "learning_rate": 5e-05, "loss": 1.2654, "step": 57600 }, { "epoch": 4.53, "learning_rate": 5e-05, "loss": 1.2738, "step": 57700 }, { "epoch": 4.53, "learning_rate": 5e-05, "loss": 1.2798, "step": 57800 }, { "epoch": 4.54, "learning_rate": 5e-05, "loss": 1.301, "step": 57900 }, { "epoch": 4.55, "learning_rate": 5e-05, "loss": 1.2678, "step": 58000 }, { "epoch": 4.56, "learning_rate": 5e-05, "loss": 1.2659, "step": 58100 }, { "epoch": 4.56, "learning_rate": 5e-05, "loss": 1.2779, "step": 58200 }, { "epoch": 4.57, "learning_rate": 5e-05, "loss": 1.2936, "step": 58300 }, { "epoch": 4.58, "learning_rate": 5e-05, "loss": 1.2843, "step": 58400 }, { "epoch": 4.59, "learning_rate": 5e-05, "loss": 1.2633, "step": 58500 }, { "epoch": 4.6, "learning_rate": 5e-05, "loss": 1.2969, "step": 58600 }, { "epoch": 4.6, "learning_rate": 5e-05, "loss": 1.2624, "step": 58700 }, { "epoch": 4.61, "learning_rate": 5e-05, "loss": 1.2952, "step": 58800 }, { "epoch": 4.62, "learning_rate": 5e-05, "loss": 1.2845, "step": 58900 }, { "epoch": 4.63, "learning_rate": 5e-05, "loss": 1.2759, "step": 59000 }, { "epoch": 4.63, "learning_rate": 5e-05, "loss": 1.2948, "step": 59100 }, { "epoch": 4.64, "learning_rate": 5e-05, "loss": 1.2547, "step": 59200 }, { "epoch": 4.65, "learning_rate": 5e-05, "loss": 1.2721, "step": 59300 }, { "epoch": 4.66, "learning_rate": 5e-05, "loss": 1.2678, "step": 59400 }, { "epoch": 4.67, "learning_rate": 5e-05, "loss": 1.2821, "step": 59500 }, { "epoch": 4.67, "learning_rate": 5e-05, "loss": 1.2667, "step": 59600 }, { "epoch": 4.68, "learning_rate": 5e-05, "loss": 1.2791, "step": 59700 }, { "epoch": 4.69, "learning_rate": 5e-05, "loss": 1.284, "step": 59800 }, { "epoch": 4.7, "learning_rate": 5e-05, "loss": 1.2932, "step": 59900 }, { "epoch": 4.71, "learning_rate": 5e-05, "loss": 1.3006, "step": 60000 }, { "epoch": 4.71, "eval_gen_len": 18.713604661428445, "eval_loss": 1.4940598011016846, "eval_rouge1": 39.3973, "eval_rouge2": 17.7088, "eval_rougeL": 32.7938, "eval_rougeLsum": 32.7949, "eval_runtime": 2600.1572, "eval_samples_per_second": 4.356, "eval_steps_per_second": 1.089, "step": 60000 }, { "epoch": 4.71, "learning_rate": 5e-05, "loss": 1.2774, "step": 60100 }, { "epoch": 4.72, "learning_rate": 5e-05, "loss": 1.296, "step": 60200 }, { "epoch": 4.73, "learning_rate": 5e-05, "loss": 1.3068, "step": 60300 }, { "epoch": 4.74, "learning_rate": 5e-05, "loss": 1.2759, "step": 60400 }, { "epoch": 4.74, "learning_rate": 5e-05, "loss": 1.2824, "step": 60500 }, { "epoch": 4.75, "learning_rate": 5e-05, "loss": 1.3045, "step": 60600 }, { "epoch": 4.76, "learning_rate": 5e-05, "loss": 1.2766, "step": 60700 }, { "epoch": 4.77, "learning_rate": 5e-05, "loss": 1.2674, "step": 60800 }, { "epoch": 4.78, "learning_rate": 5e-05, "loss": 1.2793, "step": 60900 }, { "epoch": 4.78, "learning_rate": 5e-05, "loss": 1.284, "step": 61000 }, { "epoch": 4.79, "learning_rate": 5e-05, "loss": 1.2861, "step": 61100 }, { "epoch": 4.8, "learning_rate": 5e-05, "loss": 1.2924, "step": 61200 }, { "epoch": 4.81, "learning_rate": 5e-05, "loss": 1.2949, "step": 61300 }, { "epoch": 4.82, "learning_rate": 5e-05, "loss": 1.2945, "step": 61400 }, { "epoch": 4.82, "learning_rate": 5e-05, "loss": 1.2832, "step": 61500 }, { "epoch": 4.83, "learning_rate": 5e-05, "loss": 1.2674, "step": 61600 }, { "epoch": 4.84, "learning_rate": 5e-05, "loss": 1.2703, "step": 61700 }, { "epoch": 4.85, "learning_rate": 5e-05, "loss": 1.2977, "step": 61800 }, { "epoch": 4.85, "learning_rate": 5e-05, "loss": 1.2994, "step": 61900 }, { "epoch": 4.86, "learning_rate": 5e-05, "loss": 1.2934, "step": 62000 }, { "epoch": 4.87, "learning_rate": 5e-05, "loss": 1.2885, "step": 62100 }, { "epoch": 4.88, "learning_rate": 5e-05, "loss": 1.2849, "step": 62200 }, { "epoch": 4.89, "learning_rate": 5e-05, "loss": 1.3009, "step": 62300 }, { "epoch": 4.89, "learning_rate": 5e-05, "loss": 1.284, "step": 62400 }, { "epoch": 4.9, "learning_rate": 5e-05, "loss": 1.2807, "step": 62500 }, { "epoch": 4.91, "learning_rate": 5e-05, "loss": 1.2911, "step": 62600 }, { "epoch": 4.92, "learning_rate": 5e-05, "loss": 1.2881, "step": 62700 }, { "epoch": 4.93, "learning_rate": 5e-05, "loss": 1.2812, "step": 62800 }, { "epoch": 4.93, "learning_rate": 5e-05, "loss": 1.2934, "step": 62900 }, { "epoch": 4.94, "learning_rate": 5e-05, "loss": 1.284, "step": 63000 }, { "epoch": 4.95, "learning_rate": 5e-05, "loss": 1.3022, "step": 63100 }, { "epoch": 4.96, "learning_rate": 5e-05, "loss": 1.2916, "step": 63200 }, { "epoch": 4.96, "learning_rate": 5e-05, "loss": 1.3006, "step": 63300 }, { "epoch": 4.97, "learning_rate": 5e-05, "loss": 1.3157, "step": 63400 }, { "epoch": 4.98, "learning_rate": 5e-05, "loss": 1.2868, "step": 63500 }, { "epoch": 4.99, "learning_rate": 5e-05, "loss": 1.2879, "step": 63600 }, { "epoch": 5.0, "learning_rate": 5e-05, "loss": 1.2873, "step": 63700 }, { "epoch": 5.0, "learning_rate": 5e-05, "loss": 1.2625, "step": 63800 }, { "epoch": 5.01, "learning_rate": 5e-05, "loss": 1.1801, "step": 63900 }, { "epoch": 5.02, "learning_rate": 5e-05, "loss": 1.178, "step": 64000 }, { "epoch": 5.03, "learning_rate": 5e-05, "loss": 1.1779, "step": 64100 }, { "epoch": 5.03, "learning_rate": 5e-05, "loss": 1.1858, "step": 64200 }, { "epoch": 5.04, "learning_rate": 5e-05, "loss": 1.1579, "step": 64300 }, { "epoch": 5.05, "learning_rate": 5e-05, "loss": 1.1974, "step": 64400 }, { "epoch": 5.06, "learning_rate": 5e-05, "loss": 1.1938, "step": 64500 }, { "epoch": 5.07, "learning_rate": 5e-05, "loss": 1.1938, "step": 64600 }, { "epoch": 5.07, "learning_rate": 5e-05, "loss": 1.2015, "step": 64700 }, { "epoch": 5.08, "learning_rate": 5e-05, "loss": 1.1884, "step": 64800 }, { "epoch": 5.09, "learning_rate": 5e-05, "loss": 1.1764, "step": 64900 }, { "epoch": 5.1, "learning_rate": 5e-05, "loss": 1.1796, "step": 65000 }, { "epoch": 5.11, "learning_rate": 5e-05, "loss": 1.1815, "step": 65100 }, { "epoch": 5.11, "learning_rate": 5e-05, "loss": 1.2015, "step": 65200 }, { "epoch": 5.12, "learning_rate": 5e-05, "loss": 1.1862, "step": 65300 }, { "epoch": 5.13, "learning_rate": 5e-05, "loss": 1.2127, "step": 65400 }, { "epoch": 5.14, "learning_rate": 5e-05, "loss": 1.1809, "step": 65500 }, { "epoch": 5.14, "learning_rate": 5e-05, "loss": 1.1978, "step": 65600 }, { "epoch": 5.15, "learning_rate": 5e-05, "loss": 1.1866, "step": 65700 }, { "epoch": 5.16, "learning_rate": 5e-05, "loss": 1.1646, "step": 65800 }, { "epoch": 5.17, "learning_rate": 5e-05, "loss": 1.2152, "step": 65900 }, { "epoch": 5.18, "learning_rate": 5e-05, "loss": 1.1896, "step": 66000 }, { "epoch": 5.18, "learning_rate": 5e-05, "loss": 1.1779, "step": 66100 }, { "epoch": 5.19, "learning_rate": 5e-05, "loss": 1.1951, "step": 66200 }, { "epoch": 5.2, "learning_rate": 5e-05, "loss": 1.2209, "step": 66300 }, { "epoch": 5.21, "learning_rate": 5e-05, "loss": 1.2072, "step": 66400 }, { "epoch": 5.22, "learning_rate": 5e-05, "loss": 1.1887, "step": 66500 }, { "epoch": 5.22, "learning_rate": 5e-05, "loss": 1.2142, "step": 66600 }, { "epoch": 5.23, "learning_rate": 5e-05, "loss": 1.157, "step": 66700 }, { "epoch": 5.24, "learning_rate": 5e-05, "loss": 1.2097, "step": 66800 }, { "epoch": 5.25, "learning_rate": 5e-05, "loss": 1.2041, "step": 66900 }, { "epoch": 5.25, "learning_rate": 5e-05, "loss": 1.1877, "step": 67000 }, { "epoch": 5.26, "learning_rate": 5e-05, "loss": 1.1838, "step": 67100 }, { "epoch": 5.27, "learning_rate": 5e-05, "loss": 1.1936, "step": 67200 }, { "epoch": 5.28, "learning_rate": 5e-05, "loss": 1.2084, "step": 67300 }, { "epoch": 5.29, "learning_rate": 5e-05, "loss": 1.2174, "step": 67400 }, { "epoch": 5.29, "learning_rate": 5e-05, "loss": 1.2014, "step": 67500 }, { "epoch": 5.3, "learning_rate": 5e-05, "loss": 1.2259, "step": 67600 }, { "epoch": 5.31, "learning_rate": 5e-05, "loss": 1.2031, "step": 67700 }, { "epoch": 5.32, "learning_rate": 5e-05, "loss": 1.2046, "step": 67800 }, { "epoch": 5.33, "learning_rate": 5e-05, "loss": 1.2045, "step": 67900 }, { "epoch": 5.33, "learning_rate": 5e-05, "loss": 1.2016, "step": 68000 }, { "epoch": 5.34, "learning_rate": 5e-05, "loss": 1.1902, "step": 68100 }, { "epoch": 5.35, "learning_rate": 5e-05, "loss": 1.1769, "step": 68200 }, { "epoch": 5.36, "learning_rate": 5e-05, "loss": 1.1939, "step": 68300 }, { "epoch": 5.36, "learning_rate": 5e-05, "loss": 1.1984, "step": 68400 }, { "epoch": 5.37, "learning_rate": 5e-05, "loss": 1.1975, "step": 68500 }, { "epoch": 5.38, "learning_rate": 5e-05, "loss": 1.2162, "step": 68600 }, { "epoch": 5.39, "learning_rate": 5e-05, "loss": 1.219, "step": 68700 }, { "epoch": 5.4, "learning_rate": 5e-05, "loss": 1.1927, "step": 68800 }, { "epoch": 5.4, "learning_rate": 5e-05, "loss": 1.2096, "step": 68900 }, { "epoch": 5.41, "learning_rate": 5e-05, "loss": 1.2227, "step": 69000 }, { "epoch": 5.42, "learning_rate": 5e-05, "loss": 1.2265, "step": 69100 }, { "epoch": 5.43, "learning_rate": 5e-05, "loss": 1.2238, "step": 69200 }, { "epoch": 5.43, "learning_rate": 5e-05, "loss": 1.2066, "step": 69300 }, { "epoch": 5.44, "learning_rate": 5e-05, "loss": 1.2013, "step": 69400 }, { "epoch": 5.45, "learning_rate": 5e-05, "loss": 1.1938, "step": 69500 }, { "epoch": 5.46, "learning_rate": 5e-05, "loss": 1.2008, "step": 69600 }, { "epoch": 5.47, "learning_rate": 5e-05, "loss": 1.2239, "step": 69700 }, { "epoch": 5.47, "learning_rate": 5e-05, "loss": 1.2272, "step": 69800 }, { "epoch": 5.48, "learning_rate": 5e-05, "loss": 1.2162, "step": 69900 }, { "epoch": 5.49, "learning_rate": 5e-05, "loss": 1.2137, "step": 70000 }, { "epoch": 5.49, "eval_gen_len": 18.72711220976428, "eval_loss": 1.511597752571106, "eval_rouge1": 39.4402, "eval_rouge2": 17.7009, "eval_rougeL": 32.8481, "eval_rougeLsum": 32.8525, "eval_runtime": 2553.8215, "eval_samples_per_second": 4.435, "eval_steps_per_second": 1.109, "step": 70000 }, { "epoch": 5.5, "learning_rate": 5e-05, "loss": 1.2279, "step": 70100 }, { "epoch": 5.51, "learning_rate": 5e-05, "loss": 1.2261, "step": 70200 }, { "epoch": 5.51, "learning_rate": 5e-05, "loss": 1.2069, "step": 70300 }, { "epoch": 5.52, "learning_rate": 5e-05, "loss": 1.2134, "step": 70400 }, { "epoch": 5.53, "learning_rate": 5e-05, "loss": 1.215, "step": 70500 }, { "epoch": 5.54, "learning_rate": 5e-05, "loss": 1.22, "step": 70600 }, { "epoch": 5.54, "learning_rate": 5e-05, "loss": 1.2227, "step": 70700 }, { "epoch": 5.55, "learning_rate": 5e-05, "loss": 1.2098, "step": 70800 }, { "epoch": 5.56, "learning_rate": 5e-05, "loss": 1.2228, "step": 70900 }, { "epoch": 5.57, "learning_rate": 5e-05, "loss": 1.2188, "step": 71000 }, { "epoch": 5.58, "learning_rate": 5e-05, "loss": 1.2287, "step": 71100 }, { "epoch": 5.58, "learning_rate": 5e-05, "loss": 1.2088, "step": 71200 }, { "epoch": 5.59, "learning_rate": 5e-05, "loss": 1.2171, "step": 71300 }, { "epoch": 5.6, "learning_rate": 5e-05, "loss": 1.2172, "step": 71400 }, { "epoch": 5.61, "learning_rate": 5e-05, "loss": 1.2379, "step": 71500 }, { "epoch": 5.62, "learning_rate": 5e-05, "loss": 1.2229, "step": 71600 }, { "epoch": 5.62, "learning_rate": 5e-05, "loss": 1.2165, "step": 71700 }, { "epoch": 5.63, "learning_rate": 5e-05, "loss": 1.2165, "step": 71800 }, { "epoch": 5.64, "learning_rate": 5e-05, "loss": 1.2346, "step": 71900 }, { "epoch": 5.65, "learning_rate": 5e-05, "loss": 1.224, "step": 72000 }, { "epoch": 5.65, "learning_rate": 5e-05, "loss": 1.2332, "step": 72100 }, { "epoch": 5.66, "learning_rate": 5e-05, "loss": 1.2139, "step": 72200 }, { "epoch": 5.67, "learning_rate": 5e-05, "loss": 1.2102, "step": 72300 }, { "epoch": 5.68, "learning_rate": 5e-05, "loss": 1.202, "step": 72400 }, { "epoch": 5.69, "learning_rate": 5e-05, "loss": 1.2048, "step": 72500 }, { "epoch": 5.69, "learning_rate": 5e-05, "loss": 1.2215, "step": 72600 }, { "epoch": 5.7, "learning_rate": 5e-05, "loss": 1.2297, "step": 72700 }, { "epoch": 5.71, "learning_rate": 5e-05, "loss": 1.2171, "step": 72800 }, { "epoch": 5.72, "learning_rate": 5e-05, "loss": 1.2257, "step": 72900 }, { "epoch": 5.73, "learning_rate": 5e-05, "loss": 1.2091, "step": 73000 }, { "epoch": 5.73, "learning_rate": 5e-05, "loss": 1.2063, "step": 73100 }, { "epoch": 5.74, "learning_rate": 5e-05, "loss": 1.1986, "step": 73200 }, { "epoch": 5.75, "learning_rate": 5e-05, "loss": 1.222, "step": 73300 }, { "epoch": 5.76, "learning_rate": 5e-05, "loss": 1.2118, "step": 73400 }, { "epoch": 5.76, "learning_rate": 5e-05, "loss": 1.2252, "step": 73500 }, { "epoch": 5.77, "learning_rate": 5e-05, "loss": 1.2296, "step": 73600 }, { "epoch": 5.78, "learning_rate": 5e-05, "loss": 1.2421, "step": 73700 }, { "epoch": 5.79, "learning_rate": 5e-05, "loss": 1.24, "step": 73800 }, { "epoch": 5.8, "learning_rate": 5e-05, "loss": 1.2032, "step": 73900 }, { "epoch": 5.8, "learning_rate": 5e-05, "loss": 1.2204, "step": 74000 }, { "epoch": 5.81, "learning_rate": 5e-05, "loss": 1.2147, "step": 74100 }, { "epoch": 5.82, "learning_rate": 5e-05, "loss": 1.2317, "step": 74200 }, { "epoch": 5.83, "learning_rate": 5e-05, "loss": 1.2055, "step": 74300 }, { "epoch": 5.83, "learning_rate": 5e-05, "loss": 1.2386, "step": 74400 }, { "epoch": 5.84, "learning_rate": 5e-05, "loss": 1.227, "step": 74500 }, { "epoch": 5.85, "learning_rate": 5e-05, "loss": 1.2341, "step": 74600 }, { "epoch": 5.86, "learning_rate": 5e-05, "loss": 1.2191, "step": 74700 }, { "epoch": 5.87, "learning_rate": 5e-05, "loss": 1.2396, "step": 74800 }, { "epoch": 5.87, "learning_rate": 5e-05, "loss": 1.2343, "step": 74900 }, { "epoch": 5.88, "learning_rate": 5e-05, "loss": 1.2257, "step": 75000 }, { "epoch": 5.89, "learning_rate": 5e-05, "loss": 1.2295, "step": 75100 }, { "epoch": 5.9, "learning_rate": 5e-05, "loss": 1.2128, "step": 75200 }, { "epoch": 5.91, "learning_rate": 5e-05, "loss": 1.2317, "step": 75300 }, { "epoch": 5.91, "learning_rate": 5e-05, "loss": 1.2195, "step": 75400 }, { "epoch": 5.92, "learning_rate": 5e-05, "loss": 1.2275, "step": 75500 }, { "epoch": 5.93, "learning_rate": 5e-05, "loss": 1.2202, "step": 75600 }, { "epoch": 5.94, "learning_rate": 5e-05, "loss": 1.2161, "step": 75700 }, { "epoch": 5.94, "learning_rate": 5e-05, "loss": 1.2215, "step": 75800 }, { "epoch": 5.95, "learning_rate": 5e-05, "loss": 1.2285, "step": 75900 }, { "epoch": 5.96, "learning_rate": 5e-05, "loss": 1.228, "step": 76000 }, { "epoch": 5.97, "learning_rate": 5e-05, "loss": 1.2368, "step": 76100 }, { "epoch": 5.98, "learning_rate": 5e-05, "loss": 1.2231, "step": 76200 }, { "epoch": 5.98, "learning_rate": 5e-05, "loss": 1.213, "step": 76300 }, { "epoch": 5.99, "learning_rate": 5e-05, "loss": 1.233, "step": 76400 }, { "epoch": 6.0, "learning_rate": 5e-05, "loss": 1.2155, "step": 76500 }, { "epoch": 6.01, "learning_rate": 5e-05, "loss": 1.1235, "step": 76600 }, { "epoch": 6.02, "learning_rate": 5e-05, "loss": 1.1247, "step": 76700 }, { "epoch": 6.02, "learning_rate": 5e-05, "loss": 1.1164, "step": 76800 }, { "epoch": 6.03, "learning_rate": 5e-05, "loss": 1.1156, "step": 76900 }, { "epoch": 6.04, "learning_rate": 5e-05, "loss": 1.1105, "step": 77000 }, { "epoch": 6.05, "learning_rate": 5e-05, "loss": 1.107, "step": 77100 }, { "epoch": 6.05, "learning_rate": 5e-05, "loss": 1.1293, "step": 77200 }, { "epoch": 6.06, "learning_rate": 5e-05, "loss": 1.1294, "step": 77300 }, { "epoch": 6.07, "learning_rate": 5e-05, "loss": 1.1274, "step": 77400 }, { "epoch": 6.08, "learning_rate": 5e-05, "loss": 1.1346, "step": 77500 }, { "epoch": 6.09, "learning_rate": 5e-05, "loss": 1.1089, "step": 77600 }, { "epoch": 6.09, "learning_rate": 5e-05, "loss": 1.1235, "step": 77700 }, { "epoch": 6.1, "learning_rate": 5e-05, "loss": 1.1154, "step": 77800 }, { "epoch": 6.11, "learning_rate": 5e-05, "loss": 1.1244, "step": 77900 }, { "epoch": 6.12, "learning_rate": 5e-05, "loss": 1.1424, "step": 78000 }, { "epoch": 6.13, "learning_rate": 5e-05, "loss": 1.1417, "step": 78100 }, { "epoch": 6.13, "learning_rate": 5e-05, "loss": 1.1393, "step": 78200 }, { "epoch": 6.14, "learning_rate": 5e-05, "loss": 1.1147, "step": 78300 }, { "epoch": 6.15, "learning_rate": 5e-05, "loss": 1.1396, "step": 78400 }, { "epoch": 6.16, "learning_rate": 5e-05, "loss": 1.1564, "step": 78500 }, { "epoch": 6.16, "learning_rate": 5e-05, "loss": 1.1457, "step": 78600 }, { "epoch": 6.17, "learning_rate": 5e-05, "loss": 1.1349, "step": 78700 }, { "epoch": 6.18, "learning_rate": 5e-05, "loss": 1.1724, "step": 78800 }, { "epoch": 6.19, "learning_rate": 5e-05, "loss": 1.1388, "step": 78900 }, { "epoch": 6.2, "learning_rate": 5e-05, "loss": 1.1393, "step": 79000 }, { "epoch": 6.2, "learning_rate": 5e-05, "loss": 1.1325, "step": 79100 }, { "epoch": 6.21, "learning_rate": 5e-05, "loss": 1.1315, "step": 79200 }, { "epoch": 6.22, "learning_rate": 5e-05, "loss": 1.1428, "step": 79300 }, { "epoch": 6.23, "learning_rate": 5e-05, "loss": 1.148, "step": 79400 }, { "epoch": 6.23, "learning_rate": 5e-05, "loss": 1.1534, "step": 79500 }, { "epoch": 6.24, "learning_rate": 5e-05, "loss": 1.1454, "step": 79600 }, { "epoch": 6.25, "learning_rate": 5e-05, "loss": 1.1383, "step": 79700 }, { "epoch": 6.26, "learning_rate": 5e-05, "loss": 1.1266, "step": 79800 }, { "epoch": 6.27, "learning_rate": 5e-05, "loss": 1.1382, "step": 79900 }, { "epoch": 6.27, "learning_rate": 5e-05, "loss": 1.1169, "step": 80000 }, { "epoch": 6.27, "eval_gen_len": 18.758806391807187, "eval_loss": 1.5334253311157227, "eval_rouge1": 39.579, "eval_rouge2": 17.841, "eval_rougeL": 32.8832, "eval_rougeLsum": 32.8889, "eval_runtime": 2444.6143, "eval_samples_per_second": 4.633, "eval_steps_per_second": 1.158, "step": 80000 }, { "epoch": 6.28, "learning_rate": 5e-05, "loss": 1.1758, "step": 80100 }, { "epoch": 6.29, "learning_rate": 5e-05, "loss": 1.1429, "step": 80200 }, { "epoch": 6.3, "learning_rate": 5e-05, "loss": 1.1347, "step": 80300 }, { "epoch": 6.31, "learning_rate": 5e-05, "loss": 1.149, "step": 80400 }, { "epoch": 6.31, "learning_rate": 5e-05, "loss": 1.1293, "step": 80500 }, { "epoch": 6.32, "learning_rate": 5e-05, "loss": 1.1366, "step": 80600 }, { "epoch": 6.33, "learning_rate": 5e-05, "loss": 1.1534, "step": 80700 }, { "epoch": 6.34, "learning_rate": 5e-05, "loss": 1.1561, "step": 80800 }, { "epoch": 6.34, "learning_rate": 5e-05, "loss": 1.1476, "step": 80900 }, { "epoch": 6.35, "learning_rate": 5e-05, "loss": 1.1491, "step": 81000 }, { "epoch": 6.36, "learning_rate": 5e-05, "loss": 1.136, "step": 81100 }, { "epoch": 6.37, "learning_rate": 5e-05, "loss": 1.1254, "step": 81200 }, { "epoch": 6.38, "learning_rate": 5e-05, "loss": 1.1536, "step": 81300 }, { "epoch": 6.38, "learning_rate": 5e-05, "loss": 1.13, "step": 81400 }, { "epoch": 6.39, "learning_rate": 5e-05, "loss": 1.1626, "step": 81500 }, { "epoch": 6.4, "learning_rate": 5e-05, "loss": 1.1309, "step": 81600 }, { "epoch": 6.41, "learning_rate": 5e-05, "loss": 1.1467, "step": 81700 }, { "epoch": 6.42, "learning_rate": 5e-05, "loss": 1.1761, "step": 81800 }, { "epoch": 6.42, "learning_rate": 5e-05, "loss": 1.1415, "step": 81900 }, { "epoch": 6.43, "learning_rate": 5e-05, "loss": 1.1515, "step": 82000 }, { "epoch": 6.44, "learning_rate": 5e-05, "loss": 1.1396, "step": 82100 }, { "epoch": 6.45, "learning_rate": 5e-05, "loss": 1.1501, "step": 82200 }, { "epoch": 6.45, "learning_rate": 5e-05, "loss": 1.1594, "step": 82300 }, { "epoch": 6.46, "learning_rate": 5e-05, "loss": 1.1486, "step": 82400 }, { "epoch": 6.47, "learning_rate": 5e-05, "loss": 1.1459, "step": 82500 }, { "epoch": 6.48, "learning_rate": 5e-05, "loss": 1.1682, "step": 82600 }, { "epoch": 6.49, "learning_rate": 5e-05, "loss": 1.1647, "step": 82700 }, { "epoch": 6.49, "learning_rate": 5e-05, "loss": 1.1347, "step": 82800 }, { "epoch": 6.5, "learning_rate": 5e-05, "loss": 1.1628, "step": 82900 }, { "epoch": 6.51, "learning_rate": 5e-05, "loss": 1.129, "step": 83000 }, { "epoch": 6.52, "learning_rate": 5e-05, "loss": 1.1791, "step": 83100 }, { "epoch": 6.52, "learning_rate": 5e-05, "loss": 1.1582, "step": 83200 }, { "epoch": 6.53, "learning_rate": 5e-05, "loss": 1.157, "step": 83300 }, { "epoch": 6.54, "learning_rate": 5e-05, "loss": 1.1459, "step": 83400 }, { "epoch": 6.55, "learning_rate": 5e-05, "loss": 1.15, "step": 83500 }, { "epoch": 6.56, "learning_rate": 5e-05, "loss": 1.1235, "step": 83600 }, { "epoch": 6.56, "learning_rate": 5e-05, "loss": 1.142, "step": 83700 }, { "epoch": 6.57, "learning_rate": 5e-05, "loss": 1.1587, "step": 83800 }, { "epoch": 6.58, "learning_rate": 5e-05, "loss": 1.1564, "step": 83900 }, { "epoch": 6.59, "learning_rate": 5e-05, "loss": 1.1457, "step": 84000 }, { "epoch": 6.6, "learning_rate": 5e-05, "loss": 1.1434, "step": 84100 }, { "epoch": 6.6, "learning_rate": 5e-05, "loss": 1.1512, "step": 84200 }, { "epoch": 6.61, "learning_rate": 5e-05, "loss": 1.1666, "step": 84300 }, { "epoch": 6.62, "learning_rate": 5e-05, "loss": 1.1624, "step": 84400 }, { "epoch": 6.63, "learning_rate": 5e-05, "loss": 1.1629, "step": 84500 }, { "epoch": 6.63, "learning_rate": 5e-05, "loss": 1.1617, "step": 84600 }, { "epoch": 6.64, "learning_rate": 5e-05, "loss": 1.1573, "step": 84700 }, { "epoch": 6.65, "learning_rate": 5e-05, "loss": 1.1666, "step": 84800 }, { "epoch": 6.66, "learning_rate": 5e-05, "loss": 1.1675, "step": 84900 }, { "epoch": 6.67, "learning_rate": 5e-05, "loss": 1.1446, "step": 85000 }, { "epoch": 6.67, "learning_rate": 5e-05, "loss": 1.1584, "step": 85100 }, { "epoch": 6.68, "learning_rate": 5e-05, "loss": 1.1524, "step": 85200 }, { "epoch": 6.69, "learning_rate": 5e-05, "loss": 1.1391, "step": 85300 }, { "epoch": 6.7, "learning_rate": 5e-05, "loss": 1.1488, "step": 85400 }, { "epoch": 6.71, "learning_rate": 5e-05, "loss": 1.1671, "step": 85500 }, { "epoch": 6.71, "learning_rate": 5e-05, "loss": 1.1627, "step": 85600 }, { "epoch": 6.72, "learning_rate": 5e-05, "loss": 1.1586, "step": 85700 }, { "epoch": 6.73, "learning_rate": 5e-05, "loss": 1.1468, "step": 85800 }, { "epoch": 6.74, "learning_rate": 5e-05, "loss": 1.1746, "step": 85900 }, { "epoch": 6.74, "learning_rate": 5e-05, "loss": 1.1674, "step": 86000 }, { "epoch": 6.75, "learning_rate": 5e-05, "loss": 1.148, "step": 86100 }, { "epoch": 6.76, "learning_rate": 5e-05, "loss": 1.1649, "step": 86200 }, { "epoch": 6.77, "learning_rate": 5e-05, "loss": 1.1567, "step": 86300 }, { "epoch": 6.78, "learning_rate": 5e-05, "loss": 1.1611, "step": 86400 }, { "epoch": 6.78, "learning_rate": 5e-05, "loss": 1.1541, "step": 86500 }, { "epoch": 6.79, "learning_rate": 5e-05, "loss": 1.1589, "step": 86600 }, { "epoch": 6.8, "learning_rate": 5e-05, "loss": 1.1942, "step": 86700 }, { "epoch": 6.81, "learning_rate": 5e-05, "loss": 1.1697, "step": 86800 }, { "epoch": 6.82, "learning_rate": 5e-05, "loss": 1.1645, "step": 86900 }, { "epoch": 6.82, "learning_rate": 5e-05, "loss": 1.1638, "step": 87000 }, { "epoch": 6.83, "learning_rate": 5e-05, "loss": 1.1851, "step": 87100 }, { "epoch": 6.84, "learning_rate": 5e-05, "loss": 1.1837, "step": 87200 }, { "epoch": 6.85, "learning_rate": 5e-05, "loss": 1.1693, "step": 87300 }, { "epoch": 6.85, "learning_rate": 5e-05, "loss": 1.1633, "step": 87400 }, { "epoch": 6.86, "learning_rate": 5e-05, "loss": 1.1601, "step": 87500 }, { "epoch": 6.87, "learning_rate": 5e-05, "loss": 1.1706, "step": 87600 }, { "epoch": 6.88, "learning_rate": 5e-05, "loss": 1.1805, "step": 87700 }, { "epoch": 6.89, "learning_rate": 5e-05, "loss": 1.1802, "step": 87800 }, { "epoch": 6.89, "learning_rate": 5e-05, "loss": 1.1563, "step": 87900 }, { "epoch": 6.9, "learning_rate": 5e-05, "loss": 1.182, "step": 88000 }, { "epoch": 6.91, "learning_rate": 5e-05, "loss": 1.1532, "step": 88100 }, { "epoch": 6.92, "learning_rate": 5e-05, "loss": 1.1903, "step": 88200 }, { "epoch": 6.92, "learning_rate": 5e-05, "loss": 1.163, "step": 88300 }, { "epoch": 6.93, "learning_rate": 5e-05, "loss": 1.1692, "step": 88400 }, { "epoch": 6.94, "learning_rate": 5e-05, "loss": 1.1884, "step": 88500 }, { "epoch": 6.95, "learning_rate": 5e-05, "loss": 1.1697, "step": 88600 }, { "epoch": 6.96, "learning_rate": 5e-05, "loss": 1.183, "step": 88700 }, { "epoch": 6.96, "learning_rate": 5e-05, "loss": 1.1736, "step": 88800 }, { "epoch": 6.97, "learning_rate": 5e-05, "loss": 1.1652, "step": 88900 }, { "epoch": 6.98, "learning_rate": 5e-05, "loss": 1.1804, "step": 89000 }, { "epoch": 6.99, "learning_rate": 5e-05, "loss": 1.1731, "step": 89100 }, { "epoch": 7.0, "learning_rate": 5e-05, "loss": 1.1745, "step": 89200 }, { "epoch": 7.0, "learning_rate": 5e-05, "loss": 1.1464, "step": 89300 }, { "epoch": 7.01, "learning_rate": 5e-05, "loss": 1.0755, "step": 89400 }, { "epoch": 7.02, "learning_rate": 5e-05, "loss": 1.0766, "step": 89500 }, { "epoch": 7.03, "learning_rate": 5e-05, "loss": 1.068, "step": 89600 }, { "epoch": 7.03, "learning_rate": 5e-05, "loss": 1.0792, "step": 89700 }, { "epoch": 7.04, "learning_rate": 5e-05, "loss": 1.0746, "step": 89800 }, { "epoch": 7.05, "learning_rate": 5e-05, "loss": 1.0775, "step": 89900 }, { "epoch": 7.06, "learning_rate": 5e-05, "loss": 1.0757, "step": 90000 }, { "epoch": 7.06, "eval_gen_len": 18.725523086430652, "eval_loss": 1.5569835901260376, "eval_rouge1": 39.4694, "eval_rouge2": 17.8419, "eval_rougeL": 32.87, "eval_rougeLsum": 32.864, "eval_runtime": 2362.234, "eval_samples_per_second": 4.795, "eval_steps_per_second": 1.199, "step": 90000 }, { "epoch": 7.07, "learning_rate": 5e-05, "loss": 1.0726, "step": 90100 }, { "epoch": 7.07, "learning_rate": 5e-05, "loss": 1.0736, "step": 90200 }, { "epoch": 7.08, "learning_rate": 5e-05, "loss": 1.0776, "step": 90300 }, { "epoch": 7.09, "learning_rate": 5e-05, "loss": 1.0533, "step": 90400 }, { "epoch": 7.1, "learning_rate": 5e-05, "loss": 1.0583, "step": 90500 }, { "epoch": 7.11, "learning_rate": 5e-05, "loss": 1.0816, "step": 90600 }, { "epoch": 7.11, "learning_rate": 5e-05, "loss": 1.0684, "step": 90700 }, { "epoch": 7.12, "learning_rate": 5e-05, "loss": 1.0656, "step": 90800 }, { "epoch": 7.13, "learning_rate": 5e-05, "loss": 1.0685, "step": 90900 }, { "epoch": 7.14, "learning_rate": 5e-05, "loss": 1.1002, "step": 91000 }, { "epoch": 7.14, "learning_rate": 5e-05, "loss": 1.0789, "step": 91100 }, { "epoch": 7.15, "learning_rate": 5e-05, "loss": 1.0738, "step": 91200 }, { "epoch": 7.16, "learning_rate": 5e-05, "loss": 1.0764, "step": 91300 }, { "epoch": 7.17, "learning_rate": 5e-05, "loss": 1.0733, "step": 91400 }, { "epoch": 7.18, "learning_rate": 5e-05, "loss": 1.0984, "step": 91500 }, { "epoch": 7.18, "learning_rate": 5e-05, "loss": 1.0767, "step": 91600 }, { "epoch": 7.19, "learning_rate": 5e-05, "loss": 1.0991, "step": 91700 }, { "epoch": 7.2, "learning_rate": 5e-05, "loss": 1.0882, "step": 91800 }, { "epoch": 7.21, "learning_rate": 5e-05, "loss": 1.0868, "step": 91900 }, { "epoch": 7.22, "learning_rate": 5e-05, "loss": 1.0789, "step": 92000 }, { "epoch": 7.22, "learning_rate": 5e-05, "loss": 1.0887, "step": 92100 }, { "epoch": 7.23, "learning_rate": 5e-05, "loss": 1.0777, "step": 92200 }, { "epoch": 7.24, "learning_rate": 5e-05, "loss": 1.0868, "step": 92300 }, { "epoch": 7.25, "learning_rate": 5e-05, "loss": 1.0971, "step": 92400 }, { "epoch": 7.25, "learning_rate": 5e-05, "loss": 1.0849, "step": 92500 }, { "epoch": 7.26, "learning_rate": 5e-05, "loss": 1.0796, "step": 92600 }, { "epoch": 7.27, "learning_rate": 5e-05, "loss": 1.0923, "step": 92700 }, { "epoch": 7.28, "learning_rate": 5e-05, "loss": 1.0929, "step": 92800 }, { "epoch": 7.29, "learning_rate": 5e-05, "loss": 1.0868, "step": 92900 }, { "epoch": 7.29, "learning_rate": 5e-05, "loss": 1.0987, "step": 93000 }, { "epoch": 7.3, "learning_rate": 5e-05, "loss": 1.0891, "step": 93100 }, { "epoch": 7.31, "learning_rate": 5e-05, "loss": 1.0812, "step": 93200 }, { "epoch": 7.32, "learning_rate": 5e-05, "loss": 1.087, "step": 93300 }, { "epoch": 7.32, "learning_rate": 5e-05, "loss": 1.0809, "step": 93400 }, { "epoch": 7.33, "learning_rate": 5e-05, "loss": 1.0906, "step": 93500 }, { "epoch": 7.34, "learning_rate": 5e-05, "loss": 1.104, "step": 93600 }, { "epoch": 7.35, "learning_rate": 5e-05, "loss": 1.0869, "step": 93700 }, { "epoch": 7.36, "learning_rate": 5e-05, "loss": 1.074, "step": 93800 }, { "epoch": 7.36, "learning_rate": 5e-05, "loss": 1.091, "step": 93900 }, { "epoch": 7.37, "learning_rate": 5e-05, "loss": 1.0797, "step": 94000 }, { "epoch": 7.38, "learning_rate": 5e-05, "loss": 1.0966, "step": 94100 }, { "epoch": 7.39, "learning_rate": 5e-05, "loss": 1.0721, "step": 94200 }, { "epoch": 7.4, "learning_rate": 5e-05, "loss": 1.0829, "step": 94300 }, { "epoch": 7.4, "learning_rate": 5e-05, "loss": 1.0853, "step": 94400 }, { "epoch": 7.41, "learning_rate": 5e-05, "loss": 1.0875, "step": 94500 }, { "epoch": 7.42, "learning_rate": 5e-05, "loss": 1.0935, "step": 94600 }, { "epoch": 7.43, "learning_rate": 5e-05, "loss": 1.1017, "step": 94700 }, { "epoch": 7.43, "learning_rate": 5e-05, "loss": 1.0981, "step": 94800 }, { "epoch": 7.44, "learning_rate": 5e-05, "loss": 1.0926, "step": 94900 }, { "epoch": 7.45, "learning_rate": 5e-05, "loss": 1.0902, "step": 95000 }, { "epoch": 7.46, "learning_rate": 5e-05, "loss": 1.1018, "step": 95100 }, { "epoch": 7.47, "learning_rate": 5e-05, "loss": 1.0786, "step": 95200 }, { "epoch": 7.47, "learning_rate": 5e-05, "loss": 1.1005, "step": 95300 }, { "epoch": 7.48, "learning_rate": 5e-05, "loss": 1.094, "step": 95400 }, { "epoch": 7.49, "learning_rate": 5e-05, "loss": 1.1042, "step": 95500 }, { "epoch": 7.5, "learning_rate": 5e-05, "loss": 1.0926, "step": 95600 }, { "epoch": 7.51, "learning_rate": 5e-05, "loss": 1.1012, "step": 95700 }, { "epoch": 7.51, "learning_rate": 5e-05, "loss": 1.1069, "step": 95800 }, { "epoch": 7.52, "learning_rate": 5e-05, "loss": 1.0873, "step": 95900 }, { "epoch": 7.53, "learning_rate": 5e-05, "loss": 1.0865, "step": 96000 }, { "epoch": 7.54, "learning_rate": 5e-05, "loss": 1.1074, "step": 96100 }, { "epoch": 7.54, "learning_rate": 5e-05, "loss": 1.0932, "step": 96200 }, { "epoch": 7.55, "learning_rate": 5e-05, "loss": 1.061, "step": 96300 }, { "epoch": 7.56, "learning_rate": 5e-05, "loss": 1.1158, "step": 96400 }, { "epoch": 7.57, "learning_rate": 5e-05, "loss": 1.0995, "step": 96500 }, { "epoch": 7.58, "learning_rate": 5e-05, "loss": 1.0997, "step": 96600 }, { "epoch": 7.58, "learning_rate": 5e-05, "loss": 1.1066, "step": 96700 }, { "epoch": 7.59, "learning_rate": 5e-05, "loss": 1.089, "step": 96800 }, { "epoch": 7.6, "learning_rate": 5e-05, "loss": 1.0987, "step": 96900 }, { "epoch": 7.61, "learning_rate": 5e-05, "loss": 1.0963, "step": 97000 }, { "epoch": 7.62, "learning_rate": 5e-05, "loss": 1.0996, "step": 97100 }, { "epoch": 7.62, "learning_rate": 5e-05, "loss": 1.0909, "step": 97200 }, { "epoch": 7.63, "learning_rate": 5e-05, "loss": 1.1201, "step": 97300 }, { "epoch": 7.64, "learning_rate": 5e-05, "loss": 1.1071, "step": 97400 }, { "epoch": 7.65, "learning_rate": 5e-05, "loss": 1.1108, "step": 97500 }, { "epoch": 7.65, "learning_rate": 5e-05, "loss": 1.0933, "step": 97600 }, { "epoch": 7.66, "learning_rate": 5e-05, "loss": 1.1054, "step": 97700 }, { "epoch": 7.67, "learning_rate": 5e-05, "loss": 1.1041, "step": 97800 }, { "epoch": 7.68, "learning_rate": 5e-05, "loss": 1.1092, "step": 97900 }, { "epoch": 7.69, "learning_rate": 5e-05, "loss": 1.1093, "step": 98000 }, { "epoch": 7.69, "learning_rate": 5e-05, "loss": 1.1123, "step": 98100 }, { "epoch": 7.7, "learning_rate": 5e-05, "loss": 1.1099, "step": 98200 }, { "epoch": 7.71, "learning_rate": 5e-05, "loss": 1.1093, "step": 98300 }, { "epoch": 7.72, "learning_rate": 5e-05, "loss": 1.1016, "step": 98400 }, { "epoch": 7.72, "learning_rate": 5e-05, "loss": 1.1115, "step": 98500 }, { "epoch": 7.73, "learning_rate": 5e-05, "loss": 1.0945, "step": 98600 }, { "epoch": 7.74, "learning_rate": 5e-05, "loss": 1.1069, "step": 98700 }, { "epoch": 7.75, "learning_rate": 5e-05, "loss": 1.1253, "step": 98800 }, { "epoch": 7.76, "learning_rate": 5e-05, "loss": 1.1174, "step": 98900 }, { "epoch": 7.76, "learning_rate": 5e-05, "loss": 1.1218, "step": 99000 }, { "epoch": 7.77, "learning_rate": 5e-05, "loss": 1.1059, "step": 99100 }, { "epoch": 7.78, "learning_rate": 5e-05, "loss": 1.1036, "step": 99200 }, { "epoch": 7.79, "learning_rate": 5e-05, "loss": 1.1115, "step": 99300 }, { "epoch": 7.8, "learning_rate": 5e-05, "loss": 1.1155, "step": 99400 }, { "epoch": 7.8, "learning_rate": 5e-05, "loss": 1.1088, "step": 99500 }, { "epoch": 7.81, "learning_rate": 5e-05, "loss": 1.1035, "step": 99600 }, { "epoch": 7.82, "learning_rate": 5e-05, "loss": 1.1217, "step": 99700 }, { "epoch": 7.83, "learning_rate": 5e-05, "loss": 1.114, "step": 99800 }, { "epoch": 7.83, "learning_rate": 5e-05, "loss": 1.1107, "step": 99900 }, { "epoch": 7.84, "learning_rate": 5e-05, "loss": 1.0901, "step": 100000 }, { "epoch": 7.84, "eval_gen_len": 18.753509314028427, "eval_loss": 1.5419243574142456, "eval_rouge1": 39.5689, "eval_rouge2": 17.9809, "eval_rougeL": 33.0274, "eval_rougeLsum": 33.0285, "eval_runtime": 2388.4938, "eval_samples_per_second": 4.742, "eval_steps_per_second": 1.186, "step": 100000 }, { "epoch": 7.85, "learning_rate": 5e-05, "loss": 1.1357, "step": 100100 }, { "epoch": 7.86, "learning_rate": 5e-05, "loss": 1.1203, "step": 100200 }, { "epoch": 7.87, "learning_rate": 5e-05, "loss": 1.1088, "step": 100300 }, { "epoch": 7.87, "learning_rate": 5e-05, "loss": 1.1058, "step": 100400 }, { "epoch": 7.88, "learning_rate": 5e-05, "loss": 1.1042, "step": 100500 }, { "epoch": 7.89, "learning_rate": 5e-05, "loss": 1.1145, "step": 100600 }, { "epoch": 7.9, "learning_rate": 5e-05, "loss": 1.1221, "step": 100700 }, { "epoch": 7.91, "learning_rate": 5e-05, "loss": 1.0963, "step": 100800 }, { "epoch": 7.91, "learning_rate": 5e-05, "loss": 1.1021, "step": 100900 }, { "epoch": 7.92, "learning_rate": 5e-05, "loss": 1.1095, "step": 101000 }, { "epoch": 7.93, "learning_rate": 5e-05, "loss": 1.1184, "step": 101100 }, { "epoch": 7.94, "learning_rate": 5e-05, "loss": 1.1181, "step": 101200 }, { "epoch": 7.94, "learning_rate": 5e-05, "loss": 1.1212, "step": 101300 }, { "epoch": 7.95, "learning_rate": 5e-05, "loss": 1.1323, "step": 101400 }, { "epoch": 7.96, "learning_rate": 5e-05, "loss": 1.1228, "step": 101500 }, { "epoch": 7.97, "learning_rate": 5e-05, "loss": 1.123, "step": 101600 }, { "epoch": 7.98, "learning_rate": 5e-05, "loss": 1.1024, "step": 101700 }, { "epoch": 7.98, "learning_rate": 5e-05, "loss": 1.1225, "step": 101800 }, { "epoch": 7.99, "learning_rate": 5e-05, "loss": 1.0943, "step": 101900 }, { "epoch": 8.0, "learning_rate": 5e-05, "loss": 1.1048, "step": 102000 }, { "epoch": 8.01, "learning_rate": 5e-05, "loss": 1.0375, "step": 102100 }, { "epoch": 8.02, "learning_rate": 5e-05, "loss": 1.0024, "step": 102200 }, { "epoch": 8.02, "learning_rate": 5e-05, "loss": 1.0088, "step": 102300 }, { "epoch": 8.03, "learning_rate": 5e-05, "loss": 0.9942, "step": 102400 }, { "epoch": 8.04, "learning_rate": 5e-05, "loss": 1.0172, "step": 102500 }, { "epoch": 8.05, "learning_rate": 5e-05, "loss": 1.0192, "step": 102600 }, { "epoch": 8.05, "learning_rate": 5e-05, "loss": 1.016, "step": 102700 }, { "epoch": 8.06, "learning_rate": 5e-05, "loss": 1.0227, "step": 102800 }, { "epoch": 8.07, "learning_rate": 5e-05, "loss": 1.0317, "step": 102900 }, { "epoch": 8.08, "learning_rate": 5e-05, "loss": 1.0192, "step": 103000 }, { "epoch": 8.09, "learning_rate": 5e-05, "loss": 1.0271, "step": 103100 }, { "epoch": 8.09, "learning_rate": 5e-05, "loss": 1.0337, "step": 103200 }, { "epoch": 8.1, "learning_rate": 5e-05, "loss": 1.017, "step": 103300 }, { "epoch": 8.11, "learning_rate": 5e-05, "loss": 1.0143, "step": 103400 }, { "epoch": 8.12, "learning_rate": 5e-05, "loss": 1.0128, "step": 103500 }, { "epoch": 8.12, "learning_rate": 5e-05, "loss": 1.0181, "step": 103600 }, { "epoch": 8.13, "learning_rate": 5e-05, "loss": 1.0114, "step": 103700 }, { "epoch": 8.14, "learning_rate": 5e-05, "loss": 1.0279, "step": 103800 }, { "epoch": 8.15, "learning_rate": 5e-05, "loss": 1.0109, "step": 103900 }, { "epoch": 8.16, "learning_rate": 5e-05, "loss": 1.0197, "step": 104000 }, { "epoch": 8.16, "learning_rate": 5e-05, "loss": 1.0286, "step": 104100 }, { "epoch": 8.17, "learning_rate": 5e-05, "loss": 1.037, "step": 104200 }, { "epoch": 8.18, "learning_rate": 5e-05, "loss": 1.021, "step": 104300 }, { "epoch": 8.19, "learning_rate": 5e-05, "loss": 1.0114, "step": 104400 }, { "epoch": 8.2, "learning_rate": 5e-05, "loss": 1.0398, "step": 104500 }, { "epoch": 8.2, "learning_rate": 5e-05, "loss": 1.0266, "step": 104600 }, { "epoch": 8.21, "learning_rate": 5e-05, "loss": 1.0313, "step": 104700 }, { "epoch": 8.22, "learning_rate": 5e-05, "loss": 1.0181, "step": 104800 }, { "epoch": 8.23, "learning_rate": 5e-05, "loss": 1.0293, "step": 104900 }, { "epoch": 8.23, "learning_rate": 5e-05, "loss": 1.0336, "step": 105000 }, { "epoch": 8.24, "learning_rate": 5e-05, "loss": 1.0231, "step": 105100 }, { "epoch": 8.25, "learning_rate": 5e-05, "loss": 1.039, "step": 105200 }, { "epoch": 8.26, "learning_rate": 5e-05, "loss": 1.0291, "step": 105300 }, { "epoch": 8.27, "learning_rate": 5e-05, "loss": 1.0375, "step": 105400 }, { "epoch": 8.27, "learning_rate": 5e-05, "loss": 1.0385, "step": 105500 }, { "epoch": 8.28, "learning_rate": 5e-05, "loss": 1.0182, "step": 105600 }, { "epoch": 8.29, "learning_rate": 5e-05, "loss": 1.0498, "step": 105700 }, { "epoch": 8.3, "learning_rate": 5e-05, "loss": 1.0368, "step": 105800 }, { "epoch": 8.31, "learning_rate": 5e-05, "loss": 1.0403, "step": 105900 }, { "epoch": 8.31, "learning_rate": 5e-05, "loss": 1.0292, "step": 106000 }, { "epoch": 8.32, "learning_rate": 5e-05, "loss": 1.0306, "step": 106100 }, { "epoch": 8.33, "learning_rate": 5e-05, "loss": 1.038, "step": 106200 }, { "epoch": 8.34, "learning_rate": 5e-05, "loss": 1.0445, "step": 106300 }, { "epoch": 8.34, "learning_rate": 5e-05, "loss": 1.0541, "step": 106400 }, { "epoch": 8.35, "learning_rate": 5e-05, "loss": 1.0281, "step": 106500 }, { "epoch": 8.36, "learning_rate": 5e-05, "loss": 1.0506, "step": 106600 }, { "epoch": 8.37, "learning_rate": 5e-05, "loss": 1.0351, "step": 106700 }, { "epoch": 8.38, "learning_rate": 5e-05, "loss": 1.0396, "step": 106800 }, { "epoch": 8.38, "learning_rate": 5e-05, "loss": 1.0616, "step": 106900 }, { "epoch": 8.39, "learning_rate": 5e-05, "loss": 1.0317, "step": 107000 }, { "epoch": 8.4, "learning_rate": 5e-05, "loss": 1.0462, "step": 107100 }, { "epoch": 8.41, "learning_rate": 5e-05, "loss": 1.0388, "step": 107200 }, { "epoch": 8.42, "learning_rate": 5e-05, "loss": 1.0529, "step": 107300 }, { "epoch": 8.42, "learning_rate": 5e-05, "loss": 1.0231, "step": 107400 }, { "epoch": 8.43, "learning_rate": 5e-05, "loss": 1.0487, "step": 107500 }, { "epoch": 8.44, "learning_rate": 5e-05, "loss": 1.0489, "step": 107600 }, { "epoch": 8.45, "learning_rate": 5e-05, "loss": 1.033, "step": 107700 }, { "epoch": 8.45, "learning_rate": 5e-05, "loss": 1.0567, "step": 107800 }, { "epoch": 8.46, "learning_rate": 5e-05, "loss": 1.0471, "step": 107900 }, { "epoch": 8.47, "learning_rate": 5e-05, "loss": 1.0462, "step": 108000 }, { "epoch": 8.48, "learning_rate": 5e-05, "loss": 1.0405, "step": 108100 }, { "epoch": 8.49, "learning_rate": 5e-05, "loss": 1.0485, "step": 108200 }, { "epoch": 8.49, "learning_rate": 5e-05, "loss": 1.0537, "step": 108300 }, { "epoch": 8.5, "learning_rate": 5e-05, "loss": 1.0363, "step": 108400 }, { "epoch": 8.51, "learning_rate": 5e-05, "loss": 1.0451, "step": 108500 }, { "epoch": 8.52, "learning_rate": 5e-05, "loss": 1.0405, "step": 108600 }, { "epoch": 8.52, "learning_rate": 5e-05, "loss": 1.0504, "step": 108700 }, { "epoch": 8.53, "learning_rate": 5e-05, "loss": 1.0418, "step": 108800 }, { "epoch": 8.54, "learning_rate": 5e-05, "loss": 1.0605, "step": 108900 }, { "epoch": 8.55, "learning_rate": 5e-05, "loss": 1.0347, "step": 109000 }, { "epoch": 8.56, "learning_rate": 5e-05, "loss": 1.0526, "step": 109100 }, { "epoch": 8.56, "learning_rate": 5e-05, "loss": 1.0434, "step": 109200 }, { "epoch": 8.57, "learning_rate": 5e-05, "loss": 1.0608, "step": 109300 }, { "epoch": 8.58, "learning_rate": 5e-05, "loss": 1.0426, "step": 109400 }, { "epoch": 8.59, "learning_rate": 5e-05, "loss": 1.0462, "step": 109500 }, { "epoch": 8.6, "learning_rate": 5e-05, "loss": 1.0283, "step": 109600 }, { "epoch": 8.6, "learning_rate": 5e-05, "loss": 1.0405, "step": 109700 }, { "epoch": 8.61, "learning_rate": 5e-05, "loss": 1.0557, "step": 109800 }, { "epoch": 8.62, "learning_rate": 5e-05, "loss": 1.042, "step": 109900 }, { "epoch": 8.63, "learning_rate": 5e-05, "loss": 1.0506, "step": 110000 }, { "epoch": 8.63, "eval_gen_len": 18.76313233865984, "eval_loss": 1.561972737312317, "eval_rouge1": 39.6409, "eval_rouge2": 18.0941, "eval_rougeL": 33.1012, "eval_rougeLsum": 33.0983, "eval_runtime": 2370.4984, "eval_samples_per_second": 4.778, "eval_steps_per_second": 1.195, "step": 110000 }, { "epoch": 8.63, "learning_rate": 5e-05, "loss": 1.0483, "step": 110100 }, { "epoch": 8.64, "learning_rate": 5e-05, "loss": 1.0626, "step": 110200 }, { "epoch": 8.65, "learning_rate": 5e-05, "loss": 1.0567, "step": 110300 }, { "epoch": 8.66, "learning_rate": 5e-05, "loss": 1.0455, "step": 110400 }, { "epoch": 8.67, "learning_rate": 5e-05, "loss": 1.0634, "step": 110500 }, { "epoch": 8.67, "learning_rate": 5e-05, "loss": 1.0537, "step": 110600 }, { "epoch": 8.68, "learning_rate": 5e-05, "loss": 1.0522, "step": 110700 }, { "epoch": 8.69, "learning_rate": 5e-05, "loss": 1.0724, "step": 110800 }, { "epoch": 8.7, "learning_rate": 5e-05, "loss": 1.0534, "step": 110900 }, { "epoch": 8.71, "learning_rate": 5e-05, "loss": 1.0407, "step": 111000 }, { "epoch": 8.71, "learning_rate": 5e-05, "loss": 1.0354, "step": 111100 }, { "epoch": 8.72, "learning_rate": 5e-05, "loss": 1.0423, "step": 111200 }, { "epoch": 8.73, "learning_rate": 5e-05, "loss": 1.0671, "step": 111300 }, { "epoch": 8.74, "learning_rate": 5e-05, "loss": 1.0798, "step": 111400 }, { "epoch": 8.74, "learning_rate": 5e-05, "loss": 1.0678, "step": 111500 }, { "epoch": 8.75, "learning_rate": 5e-05, "loss": 1.046, "step": 111600 }, { "epoch": 8.76, "learning_rate": 5e-05, "loss": 1.0487, "step": 111700 }, { "epoch": 8.77, "learning_rate": 5e-05, "loss": 1.0628, "step": 111800 }, { "epoch": 8.78, "learning_rate": 5e-05, "loss": 1.0606, "step": 111900 }, { "epoch": 8.78, "learning_rate": 5e-05, "loss": 1.0695, "step": 112000 }, { "epoch": 8.79, "learning_rate": 5e-05, "loss": 1.0646, "step": 112100 }, { "epoch": 8.8, "learning_rate": 5e-05, "loss": 1.0613, "step": 112200 }, { "epoch": 8.81, "learning_rate": 5e-05, "loss": 1.0652, "step": 112300 }, { "epoch": 8.81, "learning_rate": 5e-05, "loss": 1.059, "step": 112400 }, { "epoch": 8.82, "learning_rate": 5e-05, "loss": 1.0661, "step": 112500 }, { "epoch": 8.83, "learning_rate": 5e-05, "loss": 1.0529, "step": 112600 }, { "epoch": 8.84, "learning_rate": 5e-05, "loss": 1.0732, "step": 112700 }, { "epoch": 8.85, "learning_rate": 5e-05, "loss": 1.0477, "step": 112800 }, { "epoch": 8.85, "learning_rate": 5e-05, "loss": 1.0621, "step": 112900 }, { "epoch": 8.86, "learning_rate": 5e-05, "loss": 1.0757, "step": 113000 }, { "epoch": 8.87, "learning_rate": 5e-05, "loss": 1.0802, "step": 113100 }, { "epoch": 8.88, "learning_rate": 5e-05, "loss": 1.0634, "step": 113200 }, { "epoch": 8.89, "learning_rate": 5e-05, "loss": 1.0512, "step": 113300 }, { "epoch": 8.89, "learning_rate": 5e-05, "loss": 1.0457, "step": 113400 }, { "epoch": 8.9, "learning_rate": 5e-05, "loss": 1.0774, "step": 113500 }, { "epoch": 8.91, "learning_rate": 5e-05, "loss": 1.084, "step": 113600 }, { "epoch": 8.92, "learning_rate": 5e-05, "loss": 1.0792, "step": 113700 }, { "epoch": 8.92, "learning_rate": 5e-05, "loss": 1.0755, "step": 113800 }, { "epoch": 8.93, "learning_rate": 5e-05, "loss": 1.0546, "step": 113900 }, { "epoch": 8.94, "learning_rate": 5e-05, "loss": 1.0596, "step": 114000 }, { "epoch": 8.95, "learning_rate": 5e-05, "loss": 1.0675, "step": 114100 }, { "epoch": 8.96, "learning_rate": 5e-05, "loss": 1.0627, "step": 114200 }, { "epoch": 8.96, "learning_rate": 5e-05, "loss": 1.0663, "step": 114300 }, { "epoch": 8.97, "learning_rate": 5e-05, "loss": 1.0533, "step": 114400 }, { "epoch": 8.98, "learning_rate": 5e-05, "loss": 1.0769, "step": 114500 }, { "epoch": 8.99, "learning_rate": 5e-05, "loss": 1.0566, "step": 114600 }, { "epoch": 9.0, "learning_rate": 5e-05, "loss": 1.0622, "step": 114700 }, { "epoch": 9.0, "learning_rate": 5e-05, "loss": 1.027, "step": 114800 }, { "epoch": 9.01, "learning_rate": 5e-05, "loss": 0.9537, "step": 114900 }, { "epoch": 9.02, "learning_rate": 5e-05, "loss": 0.9737, "step": 115000 }, { "epoch": 9.03, "learning_rate": 5e-05, "loss": 0.9674, "step": 115100 }, { "epoch": 9.03, "learning_rate": 5e-05, "loss": 0.9575, "step": 115200 }, { "epoch": 9.04, "learning_rate": 5e-05, "loss": 0.973, "step": 115300 }, { "epoch": 9.05, "learning_rate": 5e-05, "loss": 0.9645, "step": 115400 }, { "epoch": 9.06, "learning_rate": 5e-05, "loss": 0.9557, "step": 115500 }, { "epoch": 9.07, "learning_rate": 5e-05, "loss": 0.9579, "step": 115600 }, { "epoch": 9.07, "learning_rate": 5e-05, "loss": 0.9655, "step": 115700 }, { "epoch": 9.08, "learning_rate": 5e-05, "loss": 0.9783, "step": 115800 }, { "epoch": 9.09, "learning_rate": 5e-05, "loss": 0.9927, "step": 115900 }, { "epoch": 9.1, "learning_rate": 5e-05, "loss": 0.9748, "step": 116000 }, { "epoch": 9.11, "learning_rate": 5e-05, "loss": 0.9799, "step": 116100 }, { "epoch": 9.11, "learning_rate": 5e-05, "loss": 0.9669, "step": 116200 }, { "epoch": 9.12, "learning_rate": 5e-05, "loss": 1.0048, "step": 116300 }, { "epoch": 9.13, "learning_rate": 5e-05, "loss": 0.9806, "step": 116400 }, { "epoch": 9.14, "learning_rate": 5e-05, "loss": 0.9617, "step": 116500 }, { "epoch": 9.14, "learning_rate": 5e-05, "loss": 0.9767, "step": 116600 }, { "epoch": 9.15, "learning_rate": 5e-05, "loss": 0.9616, "step": 116700 }, { "epoch": 9.16, "learning_rate": 5e-05, "loss": 0.9755, "step": 116800 }, { "epoch": 9.17, "learning_rate": 5e-05, "loss": 0.9715, "step": 116900 }, { "epoch": 9.18, "learning_rate": 5e-05, "loss": 0.9821, "step": 117000 }, { "epoch": 9.18, "learning_rate": 5e-05, "loss": 0.9752, "step": 117100 }, { "epoch": 9.19, "learning_rate": 5e-05, "loss": 0.982, "step": 117200 }, { "epoch": 9.2, "learning_rate": 5e-05, "loss": 0.969, "step": 117300 }, { "epoch": 9.21, "learning_rate": 5e-05, "loss": 0.9874, "step": 117400 }, { "epoch": 9.21, "learning_rate": 5e-05, "loss": 0.9779, "step": 117500 }, { "epoch": 9.22, "learning_rate": 5e-05, "loss": 0.9752, "step": 117600 }, { "epoch": 9.23, "learning_rate": 5e-05, "loss": 0.9965, "step": 117700 }, { "epoch": 9.24, "learning_rate": 5e-05, "loss": 0.9936, "step": 117800 }, { "epoch": 9.25, "learning_rate": 5e-05, "loss": 0.9964, "step": 117900 }, { "epoch": 9.25, "learning_rate": 5e-05, "loss": 0.9793, "step": 118000 }, { "epoch": 9.26, "learning_rate": 5e-05, "loss": 0.9735, "step": 118100 }, { "epoch": 9.27, "learning_rate": 5e-05, "loss": 0.9839, "step": 118200 }, { "epoch": 9.28, "learning_rate": 5e-05, "loss": 0.9986, "step": 118300 }, { "epoch": 9.29, "learning_rate": 5e-05, "loss": 0.9789, "step": 118400 }, { "epoch": 9.29, "learning_rate": 5e-05, "loss": 0.9874, "step": 118500 }, { "epoch": 9.3, "learning_rate": 5e-05, "loss": 0.9839, "step": 118600 }, { "epoch": 9.31, "learning_rate": 5e-05, "loss": 0.9716, "step": 118700 }, { "epoch": 9.32, "learning_rate": 5e-05, "loss": 0.9764, "step": 118800 }, { "epoch": 9.32, "learning_rate": 5e-05, "loss": 0.974, "step": 118900 }, { "epoch": 9.33, "learning_rate": 5e-05, "loss": 0.9817, "step": 119000 }, { "epoch": 9.34, "learning_rate": 5e-05, "loss": 0.9874, "step": 119100 }, { "epoch": 9.35, "learning_rate": 5e-05, "loss": 0.9955, "step": 119200 }, { "epoch": 9.36, "learning_rate": 5e-05, "loss": 0.9879, "step": 119300 }, { "epoch": 9.36, "learning_rate": 5e-05, "loss": 1.0022, "step": 119400 }, { "epoch": 9.37, "learning_rate": 5e-05, "loss": 0.9908, "step": 119500 }, { "epoch": 9.38, "learning_rate": 5e-05, "loss": 0.9849, "step": 119600 }, { "epoch": 9.39, "learning_rate": 5e-05, "loss": 0.9749, "step": 119700 }, { "epoch": 9.4, "learning_rate": 5e-05, "loss": 0.9932, "step": 119800 }, { "epoch": 9.4, "learning_rate": 5e-05, "loss": 0.9971, "step": 119900 }, { "epoch": 9.41, "learning_rate": 5e-05, "loss": 0.9846, "step": 120000 }, { "epoch": 9.41, "eval_gen_len": 18.769135693475764, "eval_loss": 1.6027214527130127, "eval_rouge1": 39.3134, "eval_rouge2": 17.9425, "eval_rougeL": 32.8563, "eval_rougeLsum": 32.8559, "eval_runtime": 2347.8962, "eval_samples_per_second": 4.824, "eval_steps_per_second": 1.206, "step": 120000 }, { "epoch": 9.42, "learning_rate": 5e-05, "loss": 0.9855, "step": 120100 }, { "epoch": 9.43, "learning_rate": 5e-05, "loss": 0.9865, "step": 120200 }, { "epoch": 9.43, "learning_rate": 5e-05, "loss": 1.0009, "step": 120300 }, { "epoch": 9.44, "learning_rate": 5e-05, "loss": 0.9908, "step": 120400 }, { "epoch": 9.45, "learning_rate": 5e-05, "loss": 1.0213, "step": 120500 }, { "epoch": 9.46, "learning_rate": 5e-05, "loss": 1.0078, "step": 120600 }, { "epoch": 9.47, "learning_rate": 5e-05, "loss": 1.0009, "step": 120700 }, { "epoch": 9.47, "learning_rate": 5e-05, "loss": 0.987, "step": 120800 }, { "epoch": 9.48, "learning_rate": 5e-05, "loss": 0.9886, "step": 120900 }, { "epoch": 9.49, "learning_rate": 5e-05, "loss": 1.0088, "step": 121000 }, { "epoch": 9.5, "learning_rate": 5e-05, "loss": 0.9998, "step": 121100 }, { "epoch": 9.51, "learning_rate": 5e-05, "loss": 1.0107, "step": 121200 }, { "epoch": 9.51, "learning_rate": 5e-05, "loss": 0.9858, "step": 121300 }, { "epoch": 9.52, "learning_rate": 5e-05, "loss": 0.9944, "step": 121400 }, { "epoch": 9.53, "learning_rate": 5e-05, "loss": 0.9938, "step": 121500 }, { "epoch": 9.54, "learning_rate": 5e-05, "loss": 0.9932, "step": 121600 }, { "epoch": 9.54, "learning_rate": 5e-05, "loss": 1.0013, "step": 121700 }, { "epoch": 9.55, "learning_rate": 5e-05, "loss": 1.0077, "step": 121800 }, { "epoch": 9.56, "learning_rate": 5e-05, "loss": 1.0019, "step": 121900 }, { "epoch": 9.57, "learning_rate": 5e-05, "loss": 1.0033, "step": 122000 }, { "epoch": 9.58, "learning_rate": 5e-05, "loss": 1.0015, "step": 122100 }, { "epoch": 9.58, "learning_rate": 5e-05, "loss": 1.0139, "step": 122200 }, { "epoch": 9.59, "learning_rate": 5e-05, "loss": 1.0096, "step": 122300 }, { "epoch": 9.6, "learning_rate": 5e-05, "loss": 0.9902, "step": 122400 }, { "epoch": 9.61, "learning_rate": 5e-05, "loss": 1.0116, "step": 122500 }, { "epoch": 9.61, "learning_rate": 5e-05, "loss": 1.0116, "step": 122600 }, { "epoch": 9.62, "learning_rate": 5e-05, "loss": 1.0046, "step": 122700 }, { "epoch": 9.63, "learning_rate": 5e-05, "loss": 1.014, "step": 122800 }, { "epoch": 9.64, "learning_rate": 5e-05, "loss": 1.0187, "step": 122900 }, { "epoch": 9.65, "learning_rate": 5e-05, "loss": 1.006, "step": 123000 }, { "epoch": 9.65, "learning_rate": 5e-05, "loss": 1.0128, "step": 123100 }, { "epoch": 9.66, "learning_rate": 5e-05, "loss": 0.9984, "step": 123200 }, { "epoch": 9.67, "learning_rate": 5e-05, "loss": 0.995, "step": 123300 }, { "epoch": 9.68, "learning_rate": 5e-05, "loss": 1.0211, "step": 123400 }, { "epoch": 9.69, "learning_rate": 5e-05, "loss": 1.028, "step": 123500 }, { "epoch": 9.69, "learning_rate": 5e-05, "loss": 1.0029, "step": 123600 }, { "epoch": 9.7, "learning_rate": 5e-05, "loss": 1.0003, "step": 123700 }, { "epoch": 9.71, "learning_rate": 5e-05, "loss": 0.991, "step": 123800 }, { "epoch": 9.72, "learning_rate": 5e-05, "loss": 1.0048, "step": 123900 }, { "epoch": 9.72, "learning_rate": 5e-05, "loss": 1.0188, "step": 124000 }, { "epoch": 9.73, "learning_rate": 5e-05, "loss": 1.0033, "step": 124100 }, { "epoch": 9.74, "learning_rate": 5e-05, "loss": 1.0226, "step": 124200 }, { "epoch": 9.75, "learning_rate": 5e-05, "loss": 1.0104, "step": 124300 }, { "epoch": 9.76, "learning_rate": 5e-05, "loss": 0.9867, "step": 124400 }, { "epoch": 9.76, "learning_rate": 5e-05, "loss": 1.0037, "step": 124500 }, { "epoch": 9.77, "learning_rate": 5e-05, "loss": 1.0097, "step": 124600 }, { "epoch": 9.78, "learning_rate": 5e-05, "loss": 1.0213, "step": 124700 }, { "epoch": 9.79, "learning_rate": 5e-05, "loss": 1.0441, "step": 124800 }, { "epoch": 9.8, "learning_rate": 5e-05, "loss": 0.9998, "step": 124900 }, { "epoch": 9.8, "learning_rate": 5e-05, "loss": 1.0209, "step": 125000 }, { "epoch": 9.81, "learning_rate": 5e-05, "loss": 1.012, "step": 125100 }, { "epoch": 9.82, "learning_rate": 5e-05, "loss": 1.0061, "step": 125200 }, { "epoch": 9.83, "learning_rate": 5e-05, "loss": 1.0079, "step": 125300 }, { "epoch": 9.83, "learning_rate": 5e-05, "loss": 1.0104, "step": 125400 }, { "epoch": 9.84, "learning_rate": 5e-05, "loss": 1.0232, "step": 125500 }, { "epoch": 9.85, "learning_rate": 5e-05, "loss": 0.9992, "step": 125600 }, { "epoch": 9.86, "learning_rate": 5e-05, "loss": 0.9957, "step": 125700 }, { "epoch": 9.87, "learning_rate": 5e-05, "loss": 1.0075, "step": 125800 }, { "epoch": 9.87, "learning_rate": 5e-05, "loss": 1.0014, "step": 125900 }, { "epoch": 9.88, "learning_rate": 5e-05, "loss": 1.0205, "step": 126000 }, { "epoch": 9.89, "learning_rate": 5e-05, "loss": 1.0097, "step": 126100 }, { "epoch": 9.9, "learning_rate": 5e-05, "loss": 1.0078, "step": 126200 }, { "epoch": 9.91, "learning_rate": 5e-05, "loss": 1.0099, "step": 126300 }, { "epoch": 9.91, "learning_rate": 5e-05, "loss": 1.0139, "step": 126400 }, { "epoch": 9.92, "learning_rate": 5e-05, "loss": 1.0171, "step": 126500 }, { "epoch": 9.93, "learning_rate": 5e-05, "loss": 1.0505, "step": 126600 }, { "epoch": 9.94, "learning_rate": 5e-05, "loss": 1.0137, "step": 126700 }, { "epoch": 9.94, "learning_rate": 5e-05, "loss": 1.0241, "step": 126800 }, { "epoch": 9.95, "learning_rate": 5e-05, "loss": 1.0221, "step": 126900 }, { "epoch": 9.96, "learning_rate": 5e-05, "loss": 1.0107, "step": 127000 }, { "epoch": 9.97, "learning_rate": 5e-05, "loss": 1.022, "step": 127100 }, { "epoch": 9.98, "learning_rate": 5e-05, "loss": 1.0076, "step": 127200 }, { "epoch": 9.98, "learning_rate": 5e-05, "loss": 1.0194, "step": 127300 }, { "epoch": 9.99, "learning_rate": 5e-05, "loss": 1.011, "step": 127400 }, { "epoch": 10.0, "learning_rate": 5e-05, "loss": 1.0128, "step": 127500 }, { "epoch": 10.01, "learning_rate": 5e-05, "loss": 0.9376, "step": 127600 }, { "epoch": 10.01, "learning_rate": 5e-05, "loss": 0.9231, "step": 127700 }, { "epoch": 10.02, "learning_rate": 5e-05, "loss": 0.9108, "step": 127800 }, { "epoch": 10.03, "learning_rate": 5e-05, "loss": 0.9122, "step": 127900 }, { "epoch": 10.04, "learning_rate": 5e-05, "loss": 0.918, "step": 128000 }, { "epoch": 10.05, "learning_rate": 5e-05, "loss": 0.9231, "step": 128100 }, { "epoch": 10.05, "learning_rate": 5e-05, "loss": 0.9267, "step": 128200 }, { "epoch": 10.06, "learning_rate": 5e-05, "loss": 0.9139, "step": 128300 }, { "epoch": 10.07, "learning_rate": 5e-05, "loss": 0.9155, "step": 128400 }, { "epoch": 10.08, "learning_rate": 5e-05, "loss": 0.94, "step": 128500 }, { "epoch": 10.09, "learning_rate": 5e-05, "loss": 0.938, "step": 128600 }, { "epoch": 10.09, "learning_rate": 5e-05, "loss": 0.9136, "step": 128700 }, { "epoch": 10.1, "learning_rate": 5e-05, "loss": 0.9205, "step": 128800 }, { "epoch": 10.11, "learning_rate": 5e-05, "loss": 0.9269, "step": 128900 }, { "epoch": 10.12, "learning_rate": 5e-05, "loss": 0.9215, "step": 129000 }, { "epoch": 10.12, "learning_rate": 5e-05, "loss": 0.9153, "step": 129100 }, { "epoch": 10.13, "learning_rate": 5e-05, "loss": 0.9522, "step": 129200 }, { "epoch": 10.14, "learning_rate": 5e-05, "loss": 0.9342, "step": 129300 }, { "epoch": 10.15, "learning_rate": 5e-05, "loss": 0.9349, "step": 129400 }, { "epoch": 10.16, "learning_rate": 5e-05, "loss": 0.9316, "step": 129500 }, { "epoch": 10.16, "learning_rate": 5e-05, "loss": 0.9214, "step": 129600 }, { "epoch": 10.17, "learning_rate": 5e-05, "loss": 0.931, "step": 129700 }, { "epoch": 10.18, "learning_rate": 5e-05, "loss": 0.9418, "step": 129800 }, { "epoch": 10.19, "learning_rate": 5e-05, "loss": 0.9534, "step": 129900 }, { "epoch": 10.2, "learning_rate": 5e-05, "loss": 0.9217, "step": 130000 }, { "epoch": 10.2, "eval_gen_len": 18.793502251258055, "eval_loss": 1.6331114768981934, "eval_rouge1": 39.2465, "eval_rouge2": 17.7996, "eval_rougeL": 32.7625, "eval_rougeLsum": 32.7679, "eval_runtime": 2377.4093, "eval_samples_per_second": 4.764, "eval_steps_per_second": 1.191, "step": 130000 }, { "epoch": 10.2, "learning_rate": 5e-05, "loss": 0.9296, "step": 130100 }, { "epoch": 10.21, "learning_rate": 5e-05, "loss": 0.9386, "step": 130200 }, { "epoch": 10.22, "learning_rate": 5e-05, "loss": 0.9402, "step": 130300 }, { "epoch": 10.23, "learning_rate": 5e-05, "loss": 0.9317, "step": 130400 }, { "epoch": 10.23, "learning_rate": 5e-05, "loss": 0.9484, "step": 130500 }, { "epoch": 10.24, "learning_rate": 5e-05, "loss": 0.9338, "step": 130600 }, { "epoch": 10.25, "learning_rate": 5e-05, "loss": 0.9273, "step": 130700 }, { "epoch": 10.26, "learning_rate": 5e-05, "loss": 0.9402, "step": 130800 }, { "epoch": 10.27, "learning_rate": 5e-05, "loss": 0.9333, "step": 130900 }, { "epoch": 10.27, "learning_rate": 5e-05, "loss": 0.9379, "step": 131000 }, { "epoch": 10.28, "learning_rate": 5e-05, "loss": 0.9531, "step": 131100 }, { "epoch": 10.29, "learning_rate": 5e-05, "loss": 0.933, "step": 131200 }, { "epoch": 10.3, "learning_rate": 5e-05, "loss": 0.9423, "step": 131300 }, { "epoch": 10.31, "learning_rate": 5e-05, "loss": 0.9395, "step": 131400 }, { "epoch": 10.31, "learning_rate": 5e-05, "loss": 0.9337, "step": 131500 }, { "epoch": 10.32, "learning_rate": 5e-05, "loss": 0.9532, "step": 131600 }, { "epoch": 10.33, "learning_rate": 5e-05, "loss": 0.9587, "step": 131700 }, { "epoch": 10.34, "learning_rate": 5e-05, "loss": 0.9321, "step": 131800 }, { "epoch": 10.34, "learning_rate": 5e-05, "loss": 0.9541, "step": 131900 }, { "epoch": 10.35, "learning_rate": 5e-05, "loss": 0.951, "step": 132000 }, { "epoch": 10.36, "learning_rate": 5e-05, "loss": 0.9324, "step": 132100 }, { "epoch": 10.37, "learning_rate": 5e-05, "loss": 0.9521, "step": 132200 }, { "epoch": 10.38, "learning_rate": 5e-05, "loss": 0.9568, "step": 132300 }, { "epoch": 10.38, "learning_rate": 5e-05, "loss": 0.9492, "step": 132400 }, { "epoch": 10.39, "learning_rate": 5e-05, "loss": 0.9577, "step": 132500 }, { "epoch": 10.4, "learning_rate": 5e-05, "loss": 0.9533, "step": 132600 }, { "epoch": 10.41, "learning_rate": 5e-05, "loss": 0.9551, "step": 132700 }, { "epoch": 10.41, "learning_rate": 5e-05, "loss": 0.9283, "step": 132800 }, { "epoch": 10.42, "learning_rate": 5e-05, "loss": 0.9644, "step": 132900 }, { "epoch": 10.43, "learning_rate": 5e-05, "loss": 0.967, "step": 133000 }, { "epoch": 10.44, "learning_rate": 5e-05, "loss": 0.947, "step": 133100 }, { "epoch": 10.45, "learning_rate": 5e-05, "loss": 0.9688, "step": 133200 }, { "epoch": 10.45, "learning_rate": 5e-05, "loss": 0.9266, "step": 133300 }, { "epoch": 10.46, "learning_rate": 5e-05, "loss": 0.9544, "step": 133400 }, { "epoch": 10.47, "learning_rate": 5e-05, "loss": 0.9429, "step": 133500 }, { "epoch": 10.48, "learning_rate": 5e-05, "loss": 0.9553, "step": 133600 }, { "epoch": 10.49, "learning_rate": 5e-05, "loss": 0.9385, "step": 133700 }, { "epoch": 10.49, "learning_rate": 5e-05, "loss": 0.9545, "step": 133800 }, { "epoch": 10.5, "learning_rate": 5e-05, "loss": 0.9577, "step": 133900 }, { "epoch": 10.51, "learning_rate": 5e-05, "loss": 0.9532, "step": 134000 }, { "epoch": 10.52, "learning_rate": 5e-05, "loss": 0.9503, "step": 134100 }, { "epoch": 10.52, "learning_rate": 5e-05, "loss": 0.9504, "step": 134200 }, { "epoch": 10.53, "learning_rate": 5e-05, "loss": 0.9556, "step": 134300 }, { "epoch": 10.54, "learning_rate": 5e-05, "loss": 0.963, "step": 134400 }, { "epoch": 10.55, "learning_rate": 5e-05, "loss": 0.9663, "step": 134500 }, { "epoch": 10.56, "learning_rate": 5e-05, "loss": 0.9645, "step": 134600 }, { "epoch": 10.56, "learning_rate": 5e-05, "loss": 0.9531, "step": 134700 }, { "epoch": 10.57, "learning_rate": 5e-05, "loss": 0.9566, "step": 134800 }, { "epoch": 10.58, "learning_rate": 5e-05, "loss": 0.9443, "step": 134900 }, { "epoch": 10.59, "learning_rate": 5e-05, "loss": 0.9575, "step": 135000 }, { "epoch": 10.6, "learning_rate": 5e-05, "loss": 0.9522, "step": 135100 }, { "epoch": 10.6, "learning_rate": 5e-05, "loss": 0.9417, "step": 135200 }, { "epoch": 10.61, "learning_rate": 5e-05, "loss": 0.9595, "step": 135300 }, { "epoch": 10.62, "learning_rate": 5e-05, "loss": 0.9746, "step": 135400 }, { "epoch": 10.63, "learning_rate": 5e-05, "loss": 0.9531, "step": 135500 }, { "epoch": 10.63, "learning_rate": 5e-05, "loss": 0.9666, "step": 135600 }, { "epoch": 10.64, "learning_rate": 5e-05, "loss": 0.9508, "step": 135700 }, { "epoch": 10.65, "learning_rate": 5e-05, "loss": 0.9707, "step": 135800 }, { "epoch": 10.66, "learning_rate": 5e-05, "loss": 0.9508, "step": 135900 }, { "epoch": 10.67, "learning_rate": 5e-05, "loss": 0.9637, "step": 136000 }, { "epoch": 10.67, "learning_rate": 5e-05, "loss": 0.9641, "step": 136100 }, { "epoch": 10.68, "learning_rate": 5e-05, "loss": 0.9605, "step": 136200 }, { "epoch": 10.69, "learning_rate": 5e-05, "loss": 0.9672, "step": 136300 }, { "epoch": 10.7, "learning_rate": 5e-05, "loss": 0.9683, "step": 136400 }, { "epoch": 10.71, "learning_rate": 5e-05, "loss": 0.9686, "step": 136500 }, { "epoch": 10.71, "learning_rate": 5e-05, "loss": 0.9655, "step": 136600 }, { "epoch": 10.72, "learning_rate": 5e-05, "loss": 0.9457, "step": 136700 }, { "epoch": 10.73, "learning_rate": 5e-05, "loss": 0.9543, "step": 136800 }, { "epoch": 10.74, "learning_rate": 5e-05, "loss": 0.9712, "step": 136900 }, { "epoch": 10.74, "learning_rate": 5e-05, "loss": 0.9574, "step": 137000 }, { "epoch": 10.75, "learning_rate": 5e-05, "loss": 0.9726, "step": 137100 }, { "epoch": 10.76, "learning_rate": 5e-05, "loss": 0.9752, "step": 137200 }, { "epoch": 10.77, "learning_rate": 5e-05, "loss": 0.9676, "step": 137300 }, { "epoch": 10.78, "learning_rate": 5e-05, "loss": 0.9694, "step": 137400 }, { "epoch": 10.78, "learning_rate": 5e-05, "loss": 0.9805, "step": 137500 }, { "epoch": 10.79, "learning_rate": 5e-05, "loss": 0.9632, "step": 137600 }, { "epoch": 10.8, "learning_rate": 5e-05, "loss": 0.9553, "step": 137700 }, { "epoch": 10.81, "learning_rate": 5e-05, "loss": 0.9712, "step": 137800 }, { "epoch": 10.81, "learning_rate": 5e-05, "loss": 0.9857, "step": 137900 }, { "epoch": 10.82, "learning_rate": 5e-05, "loss": 0.9676, "step": 138000 }, { "epoch": 10.83, "learning_rate": 5e-05, "loss": 0.9511, "step": 138100 }, { "epoch": 10.84, "learning_rate": 5e-05, "loss": 0.9619, "step": 138200 }, { "epoch": 10.85, "learning_rate": 5e-05, "loss": 0.9716, "step": 138300 }, { "epoch": 10.85, "learning_rate": 5e-05, "loss": 0.966, "step": 138400 }, { "epoch": 10.86, "learning_rate": 5e-05, "loss": 0.9637, "step": 138500 }, { "epoch": 10.87, "learning_rate": 5e-05, "loss": 0.9726, "step": 138600 }, { "epoch": 10.88, "learning_rate": 5e-05, "loss": 0.9753, "step": 138700 }, { "epoch": 10.89, "learning_rate": 5e-05, "loss": 0.9845, "step": 138800 }, { "epoch": 10.89, "learning_rate": 5e-05, "loss": 0.9738, "step": 138900 }, { "epoch": 10.9, "learning_rate": 5e-05, "loss": 0.9612, "step": 139000 }, { "epoch": 10.91, "learning_rate": 5e-05, "loss": 0.9791, "step": 139100 }, { "epoch": 10.92, "learning_rate": 5e-05, "loss": 0.9772, "step": 139200 }, { "epoch": 10.92, "learning_rate": 5e-05, "loss": 0.9645, "step": 139300 }, { "epoch": 10.93, "learning_rate": 5e-05, "loss": 0.9761, "step": 139400 }, { "epoch": 10.94, "learning_rate": 5e-05, "loss": 0.982, "step": 139500 }, { "epoch": 10.95, "learning_rate": 5e-05, "loss": 0.9827, "step": 139600 }, { "epoch": 10.96, "learning_rate": 5e-05, "loss": 0.9782, "step": 139700 }, { "epoch": 10.96, "learning_rate": 5e-05, "loss": 0.9825, "step": 139800 }, { "epoch": 10.97, "learning_rate": 5e-05, "loss": 0.9786, "step": 139900 }, { "epoch": 10.98, "learning_rate": 5e-05, "loss": 0.9657, "step": 140000 }, { "epoch": 10.98, "eval_gen_len": 18.765427739030635, "eval_loss": 1.6096522808074951, "eval_rouge1": 39.4889, "eval_rouge2": 17.9974, "eval_rougeL": 33.0222, "eval_rougeLsum": 33.0106, "eval_runtime": 2438.6189, "eval_samples_per_second": 4.645, "eval_steps_per_second": 1.161, "step": 140000 }, { "epoch": 10.99, "learning_rate": 5e-05, "loss": 0.9702, "step": 140100 }, { "epoch": 11.0, "learning_rate": 5e-05, "loss": 0.9868, "step": 140200 }, { "epoch": 11.0, "learning_rate": 5e-05, "loss": 0.9282, "step": 140300 }, { "epoch": 11.01, "learning_rate": 5e-05, "loss": 0.8992, "step": 140400 }, { "epoch": 11.02, "learning_rate": 5e-05, "loss": 0.8785, "step": 140500 }, { "epoch": 11.03, "learning_rate": 5e-05, "loss": 0.8643, "step": 140600 }, { "epoch": 11.03, "learning_rate": 5e-05, "loss": 0.8896, "step": 140700 }, { "epoch": 11.04, "learning_rate": 5e-05, "loss": 0.8829, "step": 140800 }, { "epoch": 11.05, "learning_rate": 5e-05, "loss": 0.8793, "step": 140900 }, { "epoch": 11.06, "learning_rate": 5e-05, "loss": 0.8632, "step": 141000 }, { "epoch": 11.07, "learning_rate": 5e-05, "loss": 0.8656, "step": 141100 }, { "epoch": 11.07, "learning_rate": 5e-05, "loss": 0.887, "step": 141200 }, { "epoch": 11.08, "learning_rate": 5e-05, "loss": 0.8705, "step": 141300 }, { "epoch": 11.09, "learning_rate": 5e-05, "loss": 0.8785, "step": 141400 }, { "epoch": 11.1, "learning_rate": 5e-05, "loss": 0.8813, "step": 141500 }, { "epoch": 11.11, "learning_rate": 5e-05, "loss": 0.8884, "step": 141600 }, { "epoch": 11.11, "learning_rate": 5e-05, "loss": 0.8907, "step": 141700 }, { "epoch": 11.12, "learning_rate": 5e-05, "loss": 0.8836, "step": 141800 }, { "epoch": 11.13, "learning_rate": 5e-05, "loss": 0.8849, "step": 141900 }, { "epoch": 11.14, "learning_rate": 5e-05, "loss": 0.8802, "step": 142000 }, { "epoch": 11.14, "learning_rate": 5e-05, "loss": 0.8824, "step": 142100 }, { "epoch": 11.15, "learning_rate": 5e-05, "loss": 0.8882, "step": 142200 }, { "epoch": 11.16, "learning_rate": 5e-05, "loss": 0.8845, "step": 142300 }, { "epoch": 11.17, "learning_rate": 5e-05, "loss": 0.8946, "step": 142400 }, { "epoch": 11.18, "learning_rate": 5e-05, "loss": 0.8921, "step": 142500 }, { "epoch": 11.18, "learning_rate": 5e-05, "loss": 0.8864, "step": 142600 }, { "epoch": 11.19, "learning_rate": 5e-05, "loss": 0.8896, "step": 142700 }, { "epoch": 11.2, "learning_rate": 5e-05, "loss": 0.8809, "step": 142800 }, { "epoch": 11.21, "learning_rate": 5e-05, "loss": 0.9066, "step": 142900 }, { "epoch": 11.21, "learning_rate": 5e-05, "loss": 0.9023, "step": 143000 }, { "epoch": 11.22, "learning_rate": 5e-05, "loss": 0.9192, "step": 143100 }, { "epoch": 11.23, "learning_rate": 5e-05, "loss": 0.9137, "step": 143200 }, { "epoch": 11.24, "learning_rate": 5e-05, "loss": 0.8832, "step": 143300 }, { "epoch": 11.25, "learning_rate": 5e-05, "loss": 0.8887, "step": 143400 }, { "epoch": 11.25, "learning_rate": 5e-05, "loss": 0.9082, "step": 143500 }, { "epoch": 11.26, "learning_rate": 5e-05, "loss": 0.8935, "step": 143600 }, { "epoch": 11.27, "learning_rate": 5e-05, "loss": 0.8923, "step": 143700 }, { "epoch": 11.28, "learning_rate": 5e-05, "loss": 0.9177, "step": 143800 }, { "epoch": 11.29, "learning_rate": 5e-05, "loss": 0.8995, "step": 143900 }, { "epoch": 11.29, "learning_rate": 5e-05, "loss": 0.8996, "step": 144000 }, { "epoch": 11.3, "learning_rate": 5e-05, "loss": 0.9046, "step": 144100 }, { "epoch": 11.31, "learning_rate": 5e-05, "loss": 0.9144, "step": 144200 }, { "epoch": 11.32, "learning_rate": 5e-05, "loss": 0.8997, "step": 144300 }, { "epoch": 11.32, "learning_rate": 5e-05, "loss": 0.8818, "step": 144400 }, { "epoch": 11.33, "learning_rate": 5e-05, "loss": 0.8952, "step": 144500 }, { "epoch": 11.34, "learning_rate": 5e-05, "loss": 0.9112, "step": 144600 }, { "epoch": 11.35, "learning_rate": 5e-05, "loss": 0.8976, "step": 144700 }, { "epoch": 11.36, "learning_rate": 5e-05, "loss": 0.8983, "step": 144800 }, { "epoch": 11.36, "learning_rate": 5e-05, "loss": 0.9207, "step": 144900 }, { "epoch": 11.37, "learning_rate": 5e-05, "loss": 0.9011, "step": 145000 }, { "epoch": 11.38, "learning_rate": 5e-05, "loss": 0.9068, "step": 145100 }, { "epoch": 11.39, "learning_rate": 5e-05, "loss": 0.8939, "step": 145200 }, { "epoch": 11.4, "learning_rate": 5e-05, "loss": 0.9102, "step": 145300 }, { "epoch": 11.4, "learning_rate": 5e-05, "loss": 0.914, "step": 145400 }, { "epoch": 11.41, "learning_rate": 5e-05, "loss": 0.8868, "step": 145500 }, { "epoch": 11.42, "learning_rate": 5e-05, "loss": 0.9171, "step": 145600 }, { "epoch": 11.43, "learning_rate": 5e-05, "loss": 0.9036, "step": 145700 }, { "epoch": 11.43, "learning_rate": 5e-05, "loss": 0.9035, "step": 145800 }, { "epoch": 11.44, "learning_rate": 5e-05, "loss": 0.916, "step": 145900 }, { "epoch": 11.45, "learning_rate": 5e-05, "loss": 0.9135, "step": 146000 }, { "epoch": 11.46, "learning_rate": 5e-05, "loss": 0.9106, "step": 146100 }, { "epoch": 11.47, "learning_rate": 5e-05, "loss": 0.9052, "step": 146200 }, { "epoch": 11.47, "learning_rate": 5e-05, "loss": 0.9017, "step": 146300 }, { "epoch": 11.48, "learning_rate": 5e-05, "loss": 0.8923, "step": 146400 }, { "epoch": 11.49, "learning_rate": 5e-05, "loss": 0.9145, "step": 146500 }, { "epoch": 11.5, "learning_rate": 5e-05, "loss": 0.917, "step": 146600 }, { "epoch": 11.5, "learning_rate": 5e-05, "loss": 0.9145, "step": 146700 }, { "epoch": 11.51, "learning_rate": 5e-05, "loss": 0.9204, "step": 146800 }, { "epoch": 11.52, "learning_rate": 5e-05, "loss": 0.9049, "step": 146900 }, { "epoch": 11.53, "learning_rate": 5e-05, "loss": 0.9188, "step": 147000 }, { "epoch": 11.54, "learning_rate": 5e-05, "loss": 0.9211, "step": 147100 }, { "epoch": 11.54, "learning_rate": 5e-05, "loss": 0.8907, "step": 147200 }, { "epoch": 11.55, "learning_rate": 5e-05, "loss": 0.9142, "step": 147300 }, { "epoch": 11.56, "learning_rate": 5e-05, "loss": 0.9182, "step": 147400 }, { "epoch": 11.57, "learning_rate": 5e-05, "loss": 0.9251, "step": 147500 }, { "epoch": 11.58, "learning_rate": 5e-05, "loss": 0.9156, "step": 147600 }, { "epoch": 11.58, "learning_rate": 5e-05, "loss": 0.9317, "step": 147700 }, { "epoch": 11.59, "learning_rate": 5e-05, "loss": 0.9117, "step": 147800 }, { "epoch": 11.6, "learning_rate": 5e-05, "loss": 0.9183, "step": 147900 }, { "epoch": 11.61, "learning_rate": 5e-05, "loss": 0.9069, "step": 148000 }, { "epoch": 11.61, "learning_rate": 5e-05, "loss": 0.9165, "step": 148100 }, { "epoch": 11.62, "learning_rate": 5e-05, "loss": 0.9078, "step": 148200 }, { "epoch": 11.63, "learning_rate": 5e-05, "loss": 0.9342, "step": 148300 }, { "epoch": 11.64, "learning_rate": 5e-05, "loss": 0.9206, "step": 148400 }, { "epoch": 11.65, "learning_rate": 5e-05, "loss": 0.9127, "step": 148500 }, { "epoch": 11.65, "learning_rate": 5e-05, "loss": 0.921, "step": 148600 }, { "epoch": 11.66, "learning_rate": 5e-05, "loss": 0.9207, "step": 148700 }, { "epoch": 11.67, "learning_rate": 5e-05, "loss": 0.9156, "step": 148800 }, { "epoch": 11.68, "learning_rate": 5e-05, "loss": 0.9028, "step": 148900 }, { "epoch": 11.69, "learning_rate": 5e-05, "loss": 0.9092, "step": 149000 }, { "epoch": 11.69, "learning_rate": 5e-05, "loss": 0.9215, "step": 149100 }, { "epoch": 11.7, "learning_rate": 5e-05, "loss": 0.9208, "step": 149200 }, { "epoch": 11.71, "learning_rate": 5e-05, "loss": 0.9308, "step": 149300 }, { "epoch": 11.72, "learning_rate": 5e-05, "loss": 0.9075, "step": 149400 }, { "epoch": 11.72, "learning_rate": 5e-05, "loss": 0.9103, "step": 149500 }, { "epoch": 11.73, "learning_rate": 5e-05, "loss": 0.9306, "step": 149600 }, { "epoch": 11.74, "learning_rate": 5e-05, "loss": 0.924, "step": 149700 }, { "epoch": 11.75, "learning_rate": 5e-05, "loss": 0.9214, "step": 149800 }, { "epoch": 11.76, "learning_rate": 5e-05, "loss": 0.9252, "step": 149900 }, { "epoch": 11.76, "learning_rate": 5e-05, "loss": 0.9206, "step": 150000 }, { "epoch": 11.76, "eval_gen_len": 18.793767105146994, "eval_loss": 1.6424586772918701, "eval_rouge1": 39.3295, "eval_rouge2": 17.9689, "eval_rougeL": 32.8956, "eval_rougeLsum": 32.889, "eval_runtime": 2509.3575, "eval_samples_per_second": 4.514, "eval_steps_per_second": 1.129, "step": 150000 }, { "epoch": 11.77, "learning_rate": 5e-05, "loss": 0.9152, "step": 150100 }, { "epoch": 11.78, "learning_rate": 5e-05, "loss": 0.9187, "step": 150200 }, { "epoch": 11.79, "learning_rate": 5e-05, "loss": 0.9092, "step": 150300 }, { "epoch": 11.8, "learning_rate": 5e-05, "loss": 0.9096, "step": 150400 }, { "epoch": 11.8, "learning_rate": 5e-05, "loss": 0.9297, "step": 150500 }, { "epoch": 11.81, "learning_rate": 5e-05, "loss": 0.9272, "step": 150600 }, { "epoch": 11.82, "learning_rate": 5e-05, "loss": 0.9477, "step": 150700 }, { "epoch": 11.83, "learning_rate": 5e-05, "loss": 0.9237, "step": 150800 }, { "epoch": 11.83, "learning_rate": 5e-05, "loss": 0.9298, "step": 150900 }, { "epoch": 11.84, "learning_rate": 5e-05, "loss": 0.9283, "step": 151000 }, { "epoch": 11.85, "learning_rate": 5e-05, "loss": 0.9449, "step": 151100 }, { "epoch": 11.86, "learning_rate": 5e-05, "loss": 0.9287, "step": 151200 }, { "epoch": 11.87, "learning_rate": 5e-05, "loss": 0.9181, "step": 151300 }, { "epoch": 11.87, "learning_rate": 5e-05, "loss": 0.9318, "step": 151400 }, { "epoch": 11.88, "learning_rate": 5e-05, "loss": 0.9127, "step": 151500 }, { "epoch": 11.89, "learning_rate": 5e-05, "loss": 0.9318, "step": 151600 }, { "epoch": 11.9, "learning_rate": 5e-05, "loss": 0.9263, "step": 151700 }, { "epoch": 11.9, "learning_rate": 5e-05, "loss": 0.9319, "step": 151800 }, { "epoch": 11.91, "learning_rate": 5e-05, "loss": 0.9412, "step": 151900 }, { "epoch": 11.92, "learning_rate": 5e-05, "loss": 0.9246, "step": 152000 }, { "epoch": 11.93, "learning_rate": 5e-05, "loss": 0.935, "step": 152100 }, { "epoch": 11.94, "learning_rate": 5e-05, "loss": 0.9311, "step": 152200 }, { "epoch": 11.94, "learning_rate": 5e-05, "loss": 0.9228, "step": 152300 }, { "epoch": 11.95, "learning_rate": 5e-05, "loss": 0.9253, "step": 152400 }, { "epoch": 11.96, "learning_rate": 5e-05, "loss": 0.9347, "step": 152500 }, { "epoch": 11.97, "learning_rate": 5e-05, "loss": 0.9357, "step": 152600 }, { "epoch": 11.98, "learning_rate": 5e-05, "loss": 0.9402, "step": 152700 }, { "epoch": 11.98, "learning_rate": 5e-05, "loss": 0.9422, "step": 152800 }, { "epoch": 11.99, "learning_rate": 5e-05, "loss": 0.9344, "step": 152900 }, { "epoch": 12.0, "learning_rate": 5e-05, "loss": 0.9344, "step": 153000 }, { "epoch": 12.01, "learning_rate": 5e-05, "loss": 0.8553, "step": 153100 }, { "epoch": 12.01, "learning_rate": 5e-05, "loss": 0.8309, "step": 153200 }, { "epoch": 12.02, "learning_rate": 5e-05, "loss": 0.8392, "step": 153300 }, { "epoch": 12.03, "learning_rate": 5e-05, "loss": 0.839, "step": 153400 }, { "epoch": 12.04, "learning_rate": 5e-05, "loss": 0.832, "step": 153500 }, { "epoch": 12.05, "learning_rate": 5e-05, "loss": 0.8413, "step": 153600 }, { "epoch": 12.05, "learning_rate": 5e-05, "loss": 0.8306, "step": 153700 }, { "epoch": 12.06, "learning_rate": 5e-05, "loss": 0.8318, "step": 153800 }, { "epoch": 12.07, "learning_rate": 5e-05, "loss": 0.8422, "step": 153900 }, { "epoch": 12.08, "learning_rate": 5e-05, "loss": 0.8423, "step": 154000 }, { "epoch": 12.09, "learning_rate": 5e-05, "loss": 0.8485, "step": 154100 }, { "epoch": 12.09, "learning_rate": 5e-05, "loss": 0.8339, "step": 154200 }, { "epoch": 12.1, "learning_rate": 5e-05, "loss": 0.8444, "step": 154300 }, { "epoch": 12.11, "learning_rate": 5e-05, "loss": 0.8435, "step": 154400 }, { "epoch": 12.12, "learning_rate": 5e-05, "loss": 0.8419, "step": 154500 }, { "epoch": 12.12, "learning_rate": 5e-05, "loss": 0.8566, "step": 154600 }, { "epoch": 12.13, "learning_rate": 5e-05, "loss": 0.8516, "step": 154700 }, { "epoch": 12.14, "learning_rate": 5e-05, "loss": 0.8737, "step": 154800 }, { "epoch": 12.15, "learning_rate": 5e-05, "loss": 0.8527, "step": 154900 }, { "epoch": 12.16, "learning_rate": 5e-05, "loss": 0.8388, "step": 155000 }, { "epoch": 12.16, "learning_rate": 5e-05, "loss": 0.8528, "step": 155100 }, { "epoch": 12.17, "learning_rate": 5e-05, "loss": 0.8548, "step": 155200 }, { "epoch": 12.18, "learning_rate": 5e-05, "loss": 0.866, "step": 155300 }, { "epoch": 12.19, "learning_rate": 5e-05, "loss": 0.8604, "step": 155400 }, { "epoch": 12.2, "learning_rate": 5e-05, "loss": 0.8596, "step": 155500 }, { "epoch": 12.2, "learning_rate": 5e-05, "loss": 0.8483, "step": 155600 }, { "epoch": 12.21, "learning_rate": 5e-05, "loss": 0.8523, "step": 155700 }, { "epoch": 12.22, "learning_rate": 5e-05, "loss": 0.8588, "step": 155800 }, { "epoch": 12.23, "learning_rate": 5e-05, "loss": 0.8571, "step": 155900 }, { "epoch": 12.23, "learning_rate": 5e-05, "loss": 0.8554, "step": 156000 }, { "epoch": 12.24, "learning_rate": 5e-05, "loss": 0.8586, "step": 156100 }, { "epoch": 12.25, "learning_rate": 5e-05, "loss": 0.8577, "step": 156200 }, { "epoch": 12.26, "learning_rate": 5e-05, "loss": 0.8751, "step": 156300 }, { "epoch": 12.27, "learning_rate": 5e-05, "loss": 0.8783, "step": 156400 }, { "epoch": 12.27, "learning_rate": 5e-05, "loss": 0.8635, "step": 156500 }, { "epoch": 12.28, "learning_rate": 5e-05, "loss": 0.8546, "step": 156600 }, { "epoch": 12.29, "learning_rate": 5e-05, "loss": 0.8548, "step": 156700 }, { "epoch": 12.3, "learning_rate": 5e-05, "loss": 0.8657, "step": 156800 }, { "epoch": 12.3, "learning_rate": 5e-05, "loss": 0.8578, "step": 156900 }, { "epoch": 12.31, "learning_rate": 5e-05, "loss": 0.8626, "step": 157000 }, { "epoch": 12.32, "learning_rate": 5e-05, "loss": 0.869, "step": 157100 }, { "epoch": 12.33, "learning_rate": 5e-05, "loss": 0.864, "step": 157200 }, { "epoch": 12.34, "learning_rate": 5e-05, "loss": 0.8677, "step": 157300 }, { "epoch": 12.34, "learning_rate": 5e-05, "loss": 0.8673, "step": 157400 }, { "epoch": 12.35, "learning_rate": 5e-05, "loss": 0.8685, "step": 157500 }, { "epoch": 12.36, "learning_rate": 5e-05, "loss": 0.8777, "step": 157600 }, { "epoch": 12.37, "learning_rate": 5e-05, "loss": 0.8707, "step": 157700 }, { "epoch": 12.38, "learning_rate": 5e-05, "loss": 0.8495, "step": 157800 }, { "epoch": 12.38, "learning_rate": 5e-05, "loss": 0.8891, "step": 157900 }, { "epoch": 12.39, "learning_rate": 5e-05, "loss": 0.8652, "step": 158000 }, { "epoch": 12.4, "learning_rate": 5e-05, "loss": 0.8688, "step": 158100 }, { "epoch": 12.41, "learning_rate": 5e-05, "loss": 0.8519, "step": 158200 }, { "epoch": 12.41, "learning_rate": 5e-05, "loss": 0.8686, "step": 158300 }, { "epoch": 12.42, "learning_rate": 5e-05, "loss": 0.859, "step": 158400 }, { "epoch": 12.43, "learning_rate": 5e-05, "loss": 0.8559, "step": 158500 }, { "epoch": 12.44, "learning_rate": 5e-05, "loss": 0.8713, "step": 158600 }, { "epoch": 12.45, "learning_rate": 5e-05, "loss": 0.8868, "step": 158700 }, { "epoch": 12.45, "learning_rate": 5e-05, "loss": 0.8699, "step": 158800 }, { "epoch": 12.46, "learning_rate": 5e-05, "loss": 0.8768, "step": 158900 }, { "epoch": 12.47, "learning_rate": 5e-05, "loss": 0.8746, "step": 159000 }, { "epoch": 12.48, "learning_rate": 5e-05, "loss": 0.873, "step": 159100 }, { "epoch": 12.49, "learning_rate": 5e-05, "loss": 0.8646, "step": 159200 }, { "epoch": 12.49, "learning_rate": 5e-05, "loss": 0.86, "step": 159300 }, { "epoch": 12.5, "learning_rate": 5e-05, "loss": 0.8631, "step": 159400 }, { "epoch": 12.51, "learning_rate": 5e-05, "loss": 0.8907, "step": 159500 }, { "epoch": 12.52, "learning_rate": 5e-05, "loss": 0.8596, "step": 159600 }, { "epoch": 12.52, "learning_rate": 5e-05, "loss": 0.8781, "step": 159700 }, { "epoch": 12.53, "learning_rate": 5e-05, "loss": 0.8728, "step": 159800 }, { "epoch": 12.54, "learning_rate": 5e-05, "loss": 0.8767, "step": 159900 }, { "epoch": 12.55, "learning_rate": 5e-05, "loss": 0.8635, "step": 160000 }, { "epoch": 12.55, "eval_gen_len": 18.793943674406286, "eval_loss": 1.6820098161697388, "eval_rouge1": 39.4633, "eval_rouge2": 18.0301, "eval_rougeL": 32.9134, "eval_rougeLsum": 32.919, "eval_runtime": 2390.8284, "eval_samples_per_second": 4.738, "eval_steps_per_second": 1.185, "step": 160000 } ], "max_steps": 637550, "num_train_epochs": 50, "total_flos": 8.741750241043968e+18, "trial_name": null, "trial_params": null }