{ "best_metric": 0.3778, "best_model_checkpoint": "rut5-base-absum-tech-support-calls/checkpoint-2000", "epoch": 144.44444444444446, "global_step": 2600, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 2.78, "learning_rate": 5e-06, "loss": 2.7022, "step": 50 }, { "epoch": 2.78, "eval_avg_rouge_f": 0.0, "eval_gen_len": 6.875, "eval_loss": 2.296959400177002, "eval_rouge-1": 0.0, "eval_rouge-2": 0.0, "eval_rouge-l": 0.0, "eval_runtime": 3.1977, "eval_samples_per_second": 2.502, "eval_steps_per_second": 2.502, "step": 50 }, { "epoch": 5.56, "learning_rate": 1e-05, "loss": 2.2932, "step": 100 }, { "epoch": 5.56, "eval_avg_rouge_f": 0.0, "eval_gen_len": 10.375, "eval_loss": 1.8183443546295166, "eval_rouge-1": 0.0, "eval_rouge-2": 0.0, "eval_rouge-l": 0.0, "eval_runtime": 1.7212, "eval_samples_per_second": 4.648, "eval_steps_per_second": 4.648, "step": 100 }, { "epoch": 8.33, "learning_rate": 1.5000000000000002e-05, "loss": 1.8234, "step": 150 }, { "epoch": 8.33, "eval_avg_rouge_f": 0.3018, "eval_gen_len": 14.0, "eval_loss": 1.4890449047088623, "eval_rouge-1": 0.3588, "eval_rouge-2": 0.2205, "eval_rouge-l": 0.3262, "eval_runtime": 2.5527, "eval_samples_per_second": 3.134, "eval_steps_per_second": 3.134, "step": 150 }, { "epoch": 11.11, "learning_rate": 2e-05, "loss": 1.3727, "step": 200 }, { "epoch": 11.11, "eval_avg_rouge_f": 0.2771, "eval_gen_len": 12.375, "eval_loss": 1.373950719833374, "eval_rouge-1": 0.3493, "eval_rouge-2": 0.1653, "eval_rouge-l": 0.3167, "eval_runtime": 2.0484, "eval_samples_per_second": 3.906, "eval_steps_per_second": 3.906, "step": 200 }, { "epoch": 13.89, "learning_rate": 1.9767441860465116e-05, "loss": 1.0367, "step": 250 }, { "epoch": 13.89, "eval_avg_rouge_f": 0.1974, "eval_gen_len": 15.375, "eval_loss": 1.3832566738128662, "eval_rouge-1": 0.2607, "eval_rouge-2": 0.0984, "eval_rouge-l": 0.2331, "eval_runtime": 3.0521, "eval_samples_per_second": 2.621, "eval_steps_per_second": 2.621, "step": 250 }, { "epoch": 16.67, "learning_rate": 1.9534883720930235e-05, "loss": 0.841, "step": 300 }, { "epoch": 16.67, "eval_avg_rouge_f": 0.3055, "eval_gen_len": 16.0, "eval_loss": 1.3515713214874268, "eval_rouge-1": 0.3713, "eval_rouge-2": 0.1857, "eval_rouge-l": 0.3594, "eval_runtime": 2.4808, "eval_samples_per_second": 3.225, "eval_steps_per_second": 3.225, "step": 300 }, { "epoch": 19.44, "learning_rate": 1.9302325581395353e-05, "loss": 0.7182, "step": 350 }, { "epoch": 19.44, "eval_avg_rouge_f": 0.2672, "eval_gen_len": 16.125, "eval_loss": 1.3606797456741333, "eval_rouge-1": 0.3352, "eval_rouge-2": 0.143, "eval_rouge-l": 0.3233, "eval_runtime": 2.5469, "eval_samples_per_second": 3.141, "eval_steps_per_second": 3.141, "step": 350 }, { "epoch": 22.22, "learning_rate": 1.9069767441860468e-05, "loss": 0.5102, "step": 400 }, { "epoch": 22.22, "eval_avg_rouge_f": 0.2849, "eval_gen_len": 16.625, "eval_loss": 1.3673444986343384, "eval_rouge-1": 0.36, "eval_rouge-2": 0.1597, "eval_rouge-l": 0.3349, "eval_runtime": 2.6789, "eval_samples_per_second": 2.986, "eval_steps_per_second": 2.986, "step": 400 }, { "epoch": 25.0, "learning_rate": 1.8837209302325582e-05, "loss": 0.4595, "step": 450 }, { "epoch": 25.0, "eval_avg_rouge_f": 0.3228, "eval_gen_len": 17.125, "eval_loss": 1.371541976928711, "eval_rouge-1": 0.3892, "eval_rouge-2": 0.2153, "eval_rouge-l": 0.3641, "eval_runtime": 2.6184, "eval_samples_per_second": 3.055, "eval_steps_per_second": 3.055, "step": 450 }, { "epoch": 27.78, "learning_rate": 1.86046511627907e-05, "loss": 0.3886, "step": 500 }, { "epoch": 27.78, "eval_avg_rouge_f": 0.3252, "eval_gen_len": 16.375, "eval_loss": 1.4634039402008057, "eval_rouge-1": 0.3801, "eval_rouge-2": 0.2274, "eval_rouge-l": 0.3682, "eval_runtime": 2.8205, "eval_samples_per_second": 2.836, "eval_steps_per_second": 2.836, "step": 500 }, { "epoch": 30.56, "learning_rate": 1.8372093023255815e-05, "loss": 0.3158, "step": 550 }, { "epoch": 30.56, "eval_avg_rouge_f": 0.331, "eval_gen_len": 16.75, "eval_loss": 1.5123608112335205, "eval_rouge-1": 0.3938, "eval_rouge-2": 0.2319, "eval_rouge-l": 0.3672, "eval_runtime": 3.0478, "eval_samples_per_second": 2.625, "eval_steps_per_second": 2.625, "step": 550 }, { "epoch": 33.33, "learning_rate": 1.813953488372093e-05, "loss": 0.2687, "step": 600 }, { "epoch": 33.33, "eval_avg_rouge_f": 0.3468, "eval_gen_len": 16.5, "eval_loss": 1.5868151187896729, "eval_rouge-1": 0.3987, "eval_rouge-2": 0.2568, "eval_rouge-l": 0.3848, "eval_runtime": 2.5696, "eval_samples_per_second": 3.113, "eval_steps_per_second": 3.113, "step": 600 }, { "epoch": 36.11, "learning_rate": 1.790697674418605e-05, "loss": 0.2361, "step": 650 }, { "epoch": 36.11, "eval_avg_rouge_f": 0.3163, "eval_gen_len": 17.75, "eval_loss": 1.6459990739822388, "eval_rouge-1": 0.375, "eval_rouge-2": 0.2107, "eval_rouge-l": 0.3631, "eval_runtime": 2.8706, "eval_samples_per_second": 2.787, "eval_steps_per_second": 2.787, "step": 650 }, { "epoch": 38.89, "learning_rate": 1.7674418604651163e-05, "loss": 0.1991, "step": 700 }, { "epoch": 38.89, "eval_avg_rouge_f": 0.3085, "eval_gen_len": 16.25, "eval_loss": 1.6946561336517334, "eval_rouge-1": 0.3605, "eval_rouge-2": 0.2177, "eval_rouge-l": 0.3474, "eval_runtime": 2.519, "eval_samples_per_second": 3.176, "eval_steps_per_second": 3.176, "step": 700 }, { "epoch": 41.67, "learning_rate": 1.744186046511628e-05, "loss": 0.151, "step": 750 }, { "epoch": 41.67, "eval_avg_rouge_f": 0.3222, "eval_gen_len": 16.5, "eval_loss": 1.8248298168182373, "eval_rouge-1": 0.3832, "eval_rouge-2": 0.2274, "eval_rouge-l": 0.3559, "eval_runtime": 2.6923, "eval_samples_per_second": 2.971, "eval_steps_per_second": 2.971, "step": 750 }, { "epoch": 44.44, "learning_rate": 1.7209302325581396e-05, "loss": 0.1517, "step": 800 }, { "epoch": 44.44, "eval_avg_rouge_f": 0.3811, "eval_gen_len": 16.875, "eval_loss": 1.7883902788162231, "eval_rouge-1": 0.4309, "eval_rouge-2": 0.294, "eval_rouge-l": 0.4184, "eval_runtime": 2.5559, "eval_samples_per_second": 3.13, "eval_steps_per_second": 3.13, "step": 800 }, { "epoch": 47.22, "learning_rate": 1.697674418604651e-05, "loss": 0.1444, "step": 850 }, { "epoch": 47.22, "eval_avg_rouge_f": 0.322, "eval_gen_len": 17.125, "eval_loss": 1.8518762588500977, "eval_rouge-1": 0.3843, "eval_rouge-2": 0.2107, "eval_rouge-l": 0.3711, "eval_runtime": 2.7195, "eval_samples_per_second": 2.942, "eval_steps_per_second": 2.942, "step": 850 }, { "epoch": 50.0, "learning_rate": 1.674418604651163e-05, "loss": 0.1106, "step": 900 }, { "epoch": 50.0, "eval_avg_rouge_f": 0.3209, "eval_gen_len": 17.5, "eval_loss": 1.9637408256530762, "eval_rouge-1": 0.383, "eval_rouge-2": 0.2107, "eval_rouge-l": 0.3691, "eval_runtime": 2.6625, "eval_samples_per_second": 3.005, "eval_steps_per_second": 3.005, "step": 900 }, { "epoch": 52.78, "learning_rate": 1.6511627906976747e-05, "loss": 0.0961, "step": 950 }, { "epoch": 52.78, "eval_avg_rouge_f": 0.3103, "eval_gen_len": 16.75, "eval_loss": 2.07181715965271, "eval_rouge-1": 0.3645, "eval_rouge-2": 0.2177, "eval_rouge-l": 0.3488, "eval_runtime": 2.7383, "eval_samples_per_second": 2.921, "eval_steps_per_second": 2.921, "step": 950 }, { "epoch": 55.56, "learning_rate": 1.6279069767441862e-05, "loss": 0.1131, "step": 1000 }, { "epoch": 55.56, "eval_avg_rouge_f": 0.3067, "eval_gen_len": 16.75, "eval_loss": 1.9934816360473633, "eval_rouge-1": 0.3602, "eval_rouge-2": 0.2153, "eval_rouge-l": 0.3446, "eval_runtime": 2.6295, "eval_samples_per_second": 3.042, "eval_steps_per_second": 3.042, "step": 1000 }, { "epoch": 58.33, "learning_rate": 1.6046511627906977e-05, "loss": 0.0996, "step": 1050 }, { "epoch": 58.33, "eval_avg_rouge_f": 0.3712, "eval_gen_len": 16.0, "eval_loss": 2.06162166595459, "eval_rouge-1": 0.4153, "eval_rouge-2": 0.2986, "eval_rouge-l": 0.3996, "eval_runtime": 3.0388, "eval_samples_per_second": 2.633, "eval_steps_per_second": 2.633, "step": 1050 }, { "epoch": 61.11, "learning_rate": 1.5813953488372095e-05, "loss": 0.0663, "step": 1100 }, { "epoch": 61.11, "eval_avg_rouge_f": 0.3786, "eval_gen_len": 14.625, "eval_loss": 2.1466333866119385, "eval_rouge-1": 0.4257, "eval_rouge-2": 0.301, "eval_rouge-l": 0.409, "eval_runtime": 3.1902, "eval_samples_per_second": 2.508, "eval_steps_per_second": 2.508, "step": 1100 }, { "epoch": 63.89, "learning_rate": 1.558139534883721e-05, "loss": 0.0789, "step": 1150 }, { "epoch": 63.89, "eval_avg_rouge_f": 0.3728, "eval_gen_len": 16.0, "eval_loss": 2.1657214164733887, "eval_rouge-1": 0.4166, "eval_rouge-2": 0.301, "eval_rouge-l": 0.4009, "eval_runtime": 2.4781, "eval_samples_per_second": 3.228, "eval_steps_per_second": 3.228, "step": 1150 }, { "epoch": 66.67, "learning_rate": 1.5348837209302328e-05, "loss": 0.073, "step": 1200 }, { "epoch": 66.67, "eval_avg_rouge_f": 0.3713, "eval_gen_len": 16.25, "eval_loss": 2.251979351043701, "eval_rouge-1": 0.4131, "eval_rouge-2": 0.301, "eval_rouge-l": 0.3999, "eval_runtime": 3.4503, "eval_samples_per_second": 2.319, "eval_steps_per_second": 2.319, "step": 1200 }, { "epoch": 69.44, "learning_rate": 1.5116279069767443e-05, "loss": 0.0739, "step": 1250 }, { "epoch": 69.44, "eval_avg_rouge_f": 0.3051, "eval_gen_len": 17.0, "eval_loss": 2.260244369506836, "eval_rouge-1": 0.3582, "eval_rouge-2": 0.2145, "eval_rouge-l": 0.3426, "eval_runtime": 2.5948, "eval_samples_per_second": 3.083, "eval_steps_per_second": 3.083, "step": 1250 }, { "epoch": 72.22, "learning_rate": 1.488372093023256e-05, "loss": 0.0799, "step": 1300 }, { "epoch": 72.22, "eval_avg_rouge_f": 0.3156, "eval_gen_len": 16.75, "eval_loss": 2.3278074264526367, "eval_rouge-1": 0.369, "eval_rouge-2": 0.2242, "eval_rouge-l": 0.3534, "eval_runtime": 3.3341, "eval_samples_per_second": 2.399, "eval_steps_per_second": 2.399, "step": 1300 }, { "epoch": 75.0, "learning_rate": 1.4651162790697674e-05, "loss": 0.0546, "step": 1350 }, { "epoch": 75.0, "eval_avg_rouge_f": 0.3164, "eval_gen_len": 16.5, "eval_loss": 2.402118444442749, "eval_rouge-1": 0.369, "eval_rouge-2": 0.2242, "eval_rouge-l": 0.3559, "eval_runtime": 2.5497, "eval_samples_per_second": 3.138, "eval_steps_per_second": 3.138, "step": 1350 }, { "epoch": 77.78, "learning_rate": 1.441860465116279e-05, "loss": 0.0674, "step": 1400 }, { "epoch": 77.78, "eval_avg_rouge_f": 0.3697, "eval_gen_len": 17.25, "eval_loss": 2.3492679595947266, "eval_rouge-1": 0.4149, "eval_rouge-2": 0.2924, "eval_rouge-l": 0.4017, "eval_runtime": 3.1613, "eval_samples_per_second": 2.531, "eval_steps_per_second": 2.531, "step": 1400 }, { "epoch": 80.56, "learning_rate": 1.4186046511627909e-05, "loss": 0.0459, "step": 1450 }, { "epoch": 80.56, "eval_avg_rouge_f": 0.3839, "eval_gen_len": 16.125, "eval_loss": 2.3503048419952393, "eval_rouge-1": 0.426, "eval_rouge-2": 0.3153, "eval_rouge-l": 0.4104, "eval_runtime": 2.4756, "eval_samples_per_second": 3.232, "eval_steps_per_second": 3.232, "step": 1450 }, { "epoch": 83.33, "learning_rate": 1.3953488372093025e-05, "loss": 0.0501, "step": 1500 }, { "epoch": 83.33, "eval_avg_rouge_f": 0.3732, "eval_gen_len": 15.375, "eval_loss": 2.371870517730713, "eval_rouge-1": 0.4172, "eval_rouge-2": 0.301, "eval_rouge-l": 0.4016, "eval_runtime": 2.8658, "eval_samples_per_second": 2.792, "eval_steps_per_second": 2.792, "step": 1500 }, { "epoch": 86.11, "learning_rate": 1.372093023255814e-05, "loss": 0.0509, "step": 1550 }, { "epoch": 86.11, "eval_avg_rouge_f": 0.3926, "eval_gen_len": 16.375, "eval_loss": 2.4419479370117188, "eval_rouge-1": 0.4361, "eval_rouge-2": 0.3188, "eval_rouge-l": 0.4229, "eval_runtime": 3.0315, "eval_samples_per_second": 2.639, "eval_steps_per_second": 2.639, "step": 1550 }, { "epoch": 88.89, "learning_rate": 1.3488372093023257e-05, "loss": 0.0449, "step": 1600 }, { "epoch": 88.89, "eval_avg_rouge_f": 0.4026, "eval_gen_len": 16.375, "eval_loss": 2.3171658515930176, "eval_rouge-1": 0.4514, "eval_rouge-2": 0.3188, "eval_rouge-l": 0.4375, "eval_runtime": 3.636, "eval_samples_per_second": 2.2, "eval_steps_per_second": 2.2, "step": 1600 }, { "epoch": 91.67, "learning_rate": 1.3255813953488372e-05, "loss": 0.0408, "step": 1650 }, { "epoch": 91.67, "eval_avg_rouge_f": 0.3906, "eval_gen_len": 16.25, "eval_loss": 2.4437549114227295, "eval_rouge-1": 0.4349, "eval_rouge-2": 0.3153, "eval_rouge-l": 0.4217, "eval_runtime": 2.4939, "eval_samples_per_second": 3.208, "eval_steps_per_second": 3.208, "step": 1650 }, { "epoch": 94.44, "learning_rate": 1.302325581395349e-05, "loss": 0.0357, "step": 1700 }, { "epoch": 94.44, "eval_avg_rouge_f": 0.3831, "eval_gen_len": 16.25, "eval_loss": 2.540635108947754, "eval_rouge-1": 0.4236, "eval_rouge-2": 0.3153, "eval_rouge-l": 0.4104, "eval_runtime": 3.5767, "eval_samples_per_second": 2.237, "eval_steps_per_second": 2.237, "step": 1700 }, { "epoch": 97.22, "learning_rate": 1.2790697674418606e-05, "loss": 0.0403, "step": 1750 }, { "epoch": 97.22, "eval_avg_rouge_f": 0.3748, "eval_gen_len": 16.375, "eval_loss": 2.4441065788269043, "eval_rouge-1": 0.4111, "eval_rouge-2": 0.3153, "eval_rouge-l": 0.398, "eval_runtime": 2.5203, "eval_samples_per_second": 3.174, "eval_steps_per_second": 3.174, "step": 1750 }, { "epoch": 100.0, "learning_rate": 1.2558139534883723e-05, "loss": 0.0489, "step": 1800 }, { "epoch": 100.0, "eval_avg_rouge_f": 0.3768, "eval_gen_len": 16.125, "eval_loss": 2.459872245788574, "eval_rouge-1": 0.4154, "eval_rouge-2": 0.3153, "eval_rouge-l": 0.3997, "eval_runtime": 3.4884, "eval_samples_per_second": 2.293, "eval_steps_per_second": 2.293, "step": 1800 }, { "epoch": 102.78, "learning_rate": 1.2325581395348838e-05, "loss": 0.032, "step": 1850 }, { "epoch": 102.78, "eval_avg_rouge_f": 0.407, "eval_gen_len": 15.0, "eval_loss": 2.623534679412842, "eval_rouge-1": 0.4515, "eval_rouge-2": 0.3335, "eval_rouge-l": 0.4359, "eval_runtime": 2.3389, "eval_samples_per_second": 3.42, "eval_steps_per_second": 3.42, "step": 1850 }, { "epoch": 105.56, "learning_rate": 1.2093023255813954e-05, "loss": 0.0379, "step": 1900 }, { "epoch": 105.56, "eval_avg_rouge_f": 0.407, "eval_gen_len": 15.125, "eval_loss": 2.6058127880096436, "eval_rouge-1": 0.4515, "eval_rouge-2": 0.3335, "eval_rouge-l": 0.4359, "eval_runtime": 3.2938, "eval_samples_per_second": 2.429, "eval_steps_per_second": 2.429, "step": 1900 }, { "epoch": 108.33, "learning_rate": 1.1860465116279072e-05, "loss": 0.0466, "step": 1950 }, { "epoch": 108.33, "eval_avg_rouge_f": 0.3768, "eval_gen_len": 16.125, "eval_loss": 2.5748019218444824, "eval_rouge-1": 0.4154, "eval_rouge-2": 0.3153, "eval_rouge-l": 0.3997, "eval_runtime": 2.4802, "eval_samples_per_second": 3.226, "eval_steps_per_second": 3.226, "step": 1950 }, { "epoch": 111.11, "learning_rate": 1.1627906976744187e-05, "loss": 0.0317, "step": 2000 }, { "epoch": 111.11, "eval_avg_rouge_f": 0.3778, "eval_gen_len": 16.125, "eval_loss": 2.663809299468994, "eval_rouge-1": 0.4169, "eval_rouge-2": 0.3153, "eval_rouge-l": 0.4013, "eval_runtime": 3.2301, "eval_samples_per_second": 2.477, "eval_steps_per_second": 2.477, "step": 2000 }, { "epoch": 113.89, "learning_rate": 1.1395348837209304e-05, "loss": 0.0234, "step": 2050 }, { "epoch": 113.89, "eval_avg_rouge_f": 0.3888, "eval_gen_len": 15.5, "eval_loss": 2.740657091140747, "eval_rouge-1": 0.4334, "eval_rouge-2": 0.3153, "eval_rouge-l": 0.4178, "eval_runtime": 4.1851, "eval_samples_per_second": 1.912, "eval_steps_per_second": 1.912, "step": 2050 }, { "epoch": 116.67, "learning_rate": 1.116279069767442e-05, "loss": 0.0308, "step": 2100 }, { "epoch": 116.67, "eval_avg_rouge_f": 0.3799, "eval_gen_len": 16.125, "eval_loss": 2.70857572555542, "eval_rouge-1": 0.4201, "eval_rouge-2": 0.3153, "eval_rouge-l": 0.4044, "eval_runtime": 2.5313, "eval_samples_per_second": 3.16, "eval_steps_per_second": 3.16, "step": 2100 }, { "epoch": 119.44, "learning_rate": 1.0930232558139535e-05, "loss": 0.0305, "step": 2150 }, { "epoch": 119.44, "eval_avg_rouge_f": 0.3598, "eval_gen_len": 15.5, "eval_loss": 2.7068228721618652, "eval_rouge-1": 0.4059, "eval_rouge-2": 0.2831, "eval_rouge-l": 0.3902, "eval_runtime": 2.7634, "eval_samples_per_second": 2.895, "eval_steps_per_second": 2.895, "step": 2150 }, { "epoch": 122.22, "learning_rate": 1.0697674418604651e-05, "loss": 0.0289, "step": 2200 }, { "epoch": 122.22, "eval_avg_rouge_f": 0.3598, "eval_gen_len": 15.5, "eval_loss": 2.8503403663635254, "eval_rouge-1": 0.4059, "eval_rouge-2": 0.2831, "eval_rouge-l": 0.3902, "eval_runtime": 2.4338, "eval_samples_per_second": 3.287, "eval_steps_per_second": 3.287, "step": 2200 }, { "epoch": 125.0, "learning_rate": 1.046511627906977e-05, "loss": 0.0555, "step": 2250 }, { "epoch": 125.0, "eval_avg_rouge_f": 0.3598, "eval_gen_len": 15.5, "eval_loss": 2.8522231578826904, "eval_rouge-1": 0.4059, "eval_rouge-2": 0.2831, "eval_rouge-l": 0.3902, "eval_runtime": 2.8814, "eval_samples_per_second": 2.776, "eval_steps_per_second": 2.776, "step": 2250 }, { "epoch": 127.78, "learning_rate": 1.0232558139534884e-05, "loss": 0.022, "step": 2300 }, { "epoch": 127.78, "eval_avg_rouge_f": 0.3598, "eval_gen_len": 15.5, "eval_loss": 2.9057185649871826, "eval_rouge-1": 0.4059, "eval_rouge-2": 0.2831, "eval_rouge-l": 0.3902, "eval_runtime": 2.43, "eval_samples_per_second": 3.292, "eval_steps_per_second": 3.292, "step": 2300 }, { "epoch": 130.56, "learning_rate": 1e-05, "loss": 0.0369, "step": 2350 }, { "epoch": 130.56, "eval_avg_rouge_f": 0.3598, "eval_gen_len": 15.5, "eval_loss": 2.8735642433166504, "eval_rouge-1": 0.4059, "eval_rouge-2": 0.2831, "eval_rouge-l": 0.3902, "eval_runtime": 3.0784, "eval_samples_per_second": 2.599, "eval_steps_per_second": 2.599, "step": 2350 }, { "epoch": 133.33, "learning_rate": 9.767441860465117e-06, "loss": 0.0195, "step": 2400 }, { "epoch": 133.33, "eval_avg_rouge_f": 0.3598, "eval_gen_len": 15.5, "eval_loss": 2.7636728286743164, "eval_rouge-1": 0.4059, "eval_rouge-2": 0.2831, "eval_rouge-l": 0.3902, "eval_runtime": 2.4413, "eval_samples_per_second": 3.277, "eval_steps_per_second": 3.277, "step": 2400 }, { "epoch": 136.11, "learning_rate": 9.534883720930234e-06, "loss": 0.0387, "step": 2450 }, { "epoch": 136.11, "eval_avg_rouge_f": 0.3598, "eval_gen_len": 15.5, "eval_loss": 2.743685007095337, "eval_rouge-1": 0.4059, "eval_rouge-2": 0.2831, "eval_rouge-l": 0.3902, "eval_runtime": 2.9809, "eval_samples_per_second": 2.684, "eval_steps_per_second": 2.684, "step": 2450 }, { "epoch": 138.89, "learning_rate": 9.30232558139535e-06, "loss": 0.0298, "step": 2500 }, { "epoch": 138.89, "eval_avg_rouge_f": 0.3443, "eval_gen_len": 16.25, "eval_loss": 2.8817646503448486, "eval_rouge-1": 0.391, "eval_rouge-2": 0.2665, "eval_rouge-l": 0.3754, "eval_runtime": 2.5341, "eval_samples_per_second": 3.157, "eval_steps_per_second": 3.157, "step": 2500 }, { "epoch": 141.67, "learning_rate": 9.069767441860465e-06, "loss": 0.0265, "step": 2550 }, { "epoch": 141.67, "eval_avg_rouge_f": 0.3353, "eval_gen_len": 16.5, "eval_loss": 2.834005355834961, "eval_rouge-1": 0.3776, "eval_rouge-2": 0.2665, "eval_rouge-l": 0.362, "eval_runtime": 3.3656, "eval_samples_per_second": 2.377, "eval_steps_per_second": 2.377, "step": 2550 }, { "epoch": 144.44, "learning_rate": 8.837209302325582e-06, "loss": 0.0182, "step": 2600 }, { "epoch": 144.44, "eval_avg_rouge_f": 0.3598, "eval_gen_len": 15.5, "eval_loss": 2.873906135559082, "eval_rouge-1": 0.4059, "eval_rouge-2": 0.2831, "eval_rouge-l": 0.3902, "eval_runtime": 2.4328, "eval_samples_per_second": 3.288, "eval_steps_per_second": 3.288, "step": 2600 }, { "epoch": 144.44, "step": 2600, "total_flos": 3409770731258880.0, "train_loss": 0.29600492647060983, "train_runtime": 1522.0987, "train_samples_per_second": 8.869, "train_steps_per_second": 2.956 } ], "max_steps": 4500, "num_train_epochs": 250, "total_flos": 3409770731258880.0, "trial_name": null, "trial_params": null }