{ "best_metric": 91.75966152710339, "best_model_checkpoint": "/scratch/mrahma45/pixel/finetuned_models/mbert/mbert-base-finetuned-parsing-ud-Hindi-HDTB/checkpoint-6500", "epoch": 21.634615384615383, "global_step": 9000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.24, "learning_rate": 7.840000000000001e-05, "loss": 2.8321, "step": 100 }, { "epoch": 0.48, "learning_rate": 7.947382550335571e-05, "loss": 0.7588, "step": 200 }, { "epoch": 0.72, "learning_rate": 7.893691275167786e-05, "loss": 0.607, "step": 300 }, { "epoch": 0.96, "learning_rate": 7.840000000000001e-05, "loss": 0.5116, "step": 400 }, { "epoch": 1.2, "learning_rate": 7.786308724832216e-05, "loss": 0.3944, "step": 500 }, { "epoch": 1.2, "eval_las": 89.23531249112644, "eval_loss": 0.415132611989975, "eval_runtime": 11.8077, "eval_samples_per_second": 140.501, "eval_steps_per_second": 17.616, "eval_uas": 93.45202601016554, "step": 500 }, { "epoch": 1.44, "learning_rate": 7.73261744966443e-05, "loss": 0.3888, "step": 600 }, { "epoch": 1.68, "learning_rate": 7.678926174496645e-05, "loss": 0.3689, "step": 700 }, { "epoch": 1.92, "learning_rate": 7.62523489932886e-05, "loss": 0.3628, "step": 800 }, { "epoch": 2.16, "learning_rate": 7.571543624161075e-05, "loss": 0.291, "step": 900 }, { "epoch": 2.4, "learning_rate": 7.51785234899329e-05, "loss": 0.265, "step": 1000 }, { "epoch": 2.4, "eval_las": 90.46199278757419, "eval_loss": 0.37675201892852783, "eval_runtime": 11.7912, "eval_samples_per_second": 140.698, "eval_steps_per_second": 17.64, "eval_uas": 94.2726524121873, "step": 1000 }, { "epoch": 2.64, "learning_rate": 7.464161073825505e-05, "loss": 0.2676, "step": 1100 }, { "epoch": 2.88, "learning_rate": 7.410469798657718e-05, "loss": 0.2719, "step": 1200 }, { "epoch": 3.12, "learning_rate": 7.356778523489933e-05, "loss": 0.2325, "step": 1300 }, { "epoch": 3.37, "learning_rate": 7.303087248322148e-05, "loss": 0.2006, "step": 1400 }, { "epoch": 3.61, "learning_rate": 7.249395973154363e-05, "loss": 0.2103, "step": 1500 }, { "epoch": 3.61, "eval_las": 91.04409802084221, "eval_loss": 0.3845142126083374, "eval_runtime": 11.7995, "eval_samples_per_second": 140.6, "eval_steps_per_second": 17.628, "eval_uas": 94.56512479768294, "step": 1500 }, { "epoch": 3.85, "learning_rate": 7.195704697986577e-05, "loss": 0.2208, "step": 1600 }, { "epoch": 4.09, "learning_rate": 7.142013422818792e-05, "loss": 0.1948, "step": 1700 }, { "epoch": 4.33, "learning_rate": 7.088322147651007e-05, "loss": 0.1553, "step": 1800 }, { "epoch": 4.57, "learning_rate": 7.034630872483222e-05, "loss": 0.1593, "step": 1900 }, { "epoch": 4.81, "learning_rate": 6.980939597315437e-05, "loss": 0.1655, "step": 2000 }, { "epoch": 4.81, "eval_las": 91.03557940767243, "eval_loss": 0.3911457359790802, "eval_runtime": 11.7868, "eval_samples_per_second": 140.751, "eval_steps_per_second": 17.647, "eval_uas": 94.65882954255048, "step": 2000 }, { "epoch": 5.05, "learning_rate": 6.927248322147651e-05, "loss": 0.1562, "step": 2100 }, { "epoch": 5.29, "learning_rate": 6.873557046979866e-05, "loss": 0.1157, "step": 2200 }, { "epoch": 5.53, "learning_rate": 6.819865771812081e-05, "loss": 0.1221, "step": 2300 }, { "epoch": 5.77, "learning_rate": 6.766174496644296e-05, "loss": 0.1336, "step": 2400 }, { "epoch": 6.01, "learning_rate": 6.712483221476511e-05, "loss": 0.1319, "step": 2500 }, { "epoch": 6.01, "eval_las": 90.95323281369792, "eval_loss": 0.44331711530685425, "eval_runtime": 11.7942, "eval_samples_per_second": 140.663, "eval_steps_per_second": 17.636, "eval_uas": 94.61055740125508, "step": 2500 }, { "epoch": 6.25, "learning_rate": 6.658791946308726e-05, "loss": 0.0893, "step": 2600 }, { "epoch": 6.49, "learning_rate": 6.60510067114094e-05, "loss": 0.0981, "step": 2700 }, { "epoch": 6.73, "learning_rate": 6.551409395973155e-05, "loss": 0.099, "step": 2800 }, { "epoch": 6.97, "learning_rate": 6.49771812080537e-05, "loss": 0.1028, "step": 2900 }, { "epoch": 7.21, "learning_rate": 6.444026845637585e-05, "loss": 0.0774, "step": 3000 }, { "epoch": 7.21, "eval_las": 91.02138171905614, "eval_loss": 0.534425675868988, "eval_runtime": 11.7987, "eval_samples_per_second": 140.609, "eval_steps_per_second": 17.629, "eval_uas": 94.7184598347389, "step": 3000 }, { "epoch": 7.45, "learning_rate": 6.3903355704698e-05, "loss": 0.0774, "step": 3100 }, { "epoch": 7.69, "learning_rate": 6.336644295302015e-05, "loss": 0.0815, "step": 3200 }, { "epoch": 7.93, "learning_rate": 6.28295302013423e-05, "loss": 0.0855, "step": 3300 }, { "epoch": 8.17, "learning_rate": 6.229261744966444e-05, "loss": 0.0633, "step": 3400 }, { "epoch": 8.41, "learning_rate": 6.175570469798658e-05, "loss": 0.0655, "step": 3500 }, { "epoch": 8.41, "eval_las": 91.10088877530738, "eval_loss": 0.5579993724822998, "eval_runtime": 11.8126, "eval_samples_per_second": 140.444, "eval_steps_per_second": 17.608, "eval_uas": 94.6304341653179, "step": 3500 }, { "epoch": 8.65, "learning_rate": 6.121879194630873e-05, "loss": 0.0654, "step": 3600 }, { "epoch": 8.89, "learning_rate": 6.068187919463087e-05, "loss": 0.0692, "step": 3700 }, { "epoch": 9.13, "learning_rate": 6.014496644295302e-05, "loss": 0.0608, "step": 3800 }, { "epoch": 9.38, "learning_rate": 5.9613422818791955e-05, "loss": 0.0529, "step": 3900 }, { "epoch": 9.62, "learning_rate": 5.90765100671141e-05, "loss": 0.0568, "step": 4000 }, { "epoch": 9.62, "eval_las": 91.29397734048896, "eval_loss": 0.5721077919006348, "eval_runtime": 11.8178, "eval_samples_per_second": 140.382, "eval_steps_per_second": 17.601, "eval_uas": 94.87179487179486, "step": 4000 }, { "epoch": 9.86, "learning_rate": 5.853959731543625e-05, "loss": 0.0554, "step": 4100 }, { "epoch": 10.1, "learning_rate": 5.80026845637584e-05, "loss": 0.0517, "step": 4200 }, { "epoch": 10.34, "learning_rate": 5.7465771812080534e-05, "loss": 0.0441, "step": 4300 }, { "epoch": 10.58, "learning_rate": 5.692885906040268e-05, "loss": 0.0471, "step": 4400 }, { "epoch": 10.82, "learning_rate": 5.639194630872483e-05, "loss": 0.0462, "step": 4500 }, { "epoch": 10.82, "eval_las": 91.28261918959593, "eval_loss": 0.6217162609100342, "eval_runtime": 11.7974, "eval_samples_per_second": 140.624, "eval_steps_per_second": 17.631, "eval_uas": 94.88599256041117, "step": 4500 }, { "epoch": 11.06, "learning_rate": 5.5855033557046986e-05, "loss": 0.0483, "step": 4600 }, { "epoch": 11.3, "learning_rate": 5.5318120805369134e-05, "loss": 0.0393, "step": 4700 }, { "epoch": 11.54, "learning_rate": 5.478120805369128e-05, "loss": 0.0402, "step": 4800 }, { "epoch": 11.78, "learning_rate": 5.424429530201343e-05, "loss": 0.0424, "step": 4900 }, { "epoch": 12.02, "learning_rate": 5.370738255033558e-05, "loss": 0.0418, "step": 5000 }, { "epoch": 12.02, "eval_las": 91.32237271772155, "eval_loss": 0.6571192145347595, "eval_runtime": 11.8066, "eval_samples_per_second": 140.515, "eval_steps_per_second": 17.617, "eval_uas": 94.87747394724138, "step": 5000 }, { "epoch": 12.26, "learning_rate": 5.317046979865773e-05, "loss": 0.0339, "step": 5100 }, { "epoch": 12.5, "learning_rate": 5.2633557046979875e-05, "loss": 0.0361, "step": 5200 }, { "epoch": 12.74, "learning_rate": 5.2096644295302024e-05, "loss": 0.0347, "step": 5300 }, { "epoch": 12.98, "learning_rate": 5.155973154362417e-05, "loss": 0.0374, "step": 5400 }, { "epoch": 13.22, "learning_rate": 5.1022818791946307e-05, "loss": 0.0304, "step": 5500 }, { "epoch": 13.22, "eval_las": 91.43879376437516, "eval_loss": 0.6834462881088257, "eval_runtime": 11.776, "eval_samples_per_second": 140.879, "eval_steps_per_second": 17.663, "eval_uas": 95.01661129568106, "step": 5500 }, { "epoch": 13.46, "learning_rate": 5.0485906040268455e-05, "loss": 0.0299, "step": 5600 }, { "epoch": 13.7, "learning_rate": 4.99489932885906e-05, "loss": 0.0335, "step": 5700 }, { "epoch": 13.94, "learning_rate": 4.941208053691275e-05, "loss": 0.0331, "step": 5800 }, { "epoch": 14.18, "learning_rate": 4.88751677852349e-05, "loss": 0.026, "step": 5900 }, { "epoch": 14.42, "learning_rate": 4.833825503355705e-05, "loss": 0.0255, "step": 6000 }, { "epoch": 14.42, "eval_las": 91.52965897151944, "eval_loss": 0.7086959481239319, "eval_runtime": 11.7728, "eval_samples_per_second": 140.918, "eval_steps_per_second": 17.668, "eval_uas": 95.10179742737883, "step": 6000 }, { "epoch": 14.66, "learning_rate": 4.7801342281879196e-05, "loss": 0.0273, "step": 6100 }, { "epoch": 14.9, "learning_rate": 4.726442953020135e-05, "loss": 0.0286, "step": 6200 }, { "epoch": 15.14, "learning_rate": 4.67275167785235e-05, "loss": 0.0219, "step": 6300 }, { "epoch": 15.38, "learning_rate": 4.619060402684565e-05, "loss": 0.0216, "step": 6400 }, { "epoch": 15.62, "learning_rate": 4.565906040268457e-05, "loss": 0.0254, "step": 6500 }, { "epoch": 15.62, "eval_las": 91.75966152710339, "eval_loss": 0.7071714401245117, "eval_runtime": 11.7877, "eval_samples_per_second": 140.74, "eval_steps_per_second": 17.646, "eval_uas": 95.23525570037198, "step": 6500 }, { "epoch": 15.87, "learning_rate": 4.5122147651006716e-05, "loss": 0.0242, "step": 6600 }, { "epoch": 16.11, "learning_rate": 4.4585234899328864e-05, "loss": 0.023, "step": 6700 }, { "epoch": 16.35, "learning_rate": 4.404832214765101e-05, "loss": 0.0222, "step": 6800 }, { "epoch": 16.59, "learning_rate": 4.351140939597316e-05, "loss": 0.0198, "step": 6900 }, { "epoch": 16.83, "learning_rate": 4.297449664429531e-05, "loss": 0.0211, "step": 7000 }, { "epoch": 16.83, "eval_las": 91.60632649004742, "eval_loss": 0.7650447487831116, "eval_runtime": 11.8115, "eval_samples_per_second": 140.456, "eval_steps_per_second": 17.61, "eval_uas": 95.07908112559275, "step": 7000 }, { "epoch": 17.07, "learning_rate": 4.243758389261746e-05, "loss": 0.0202, "step": 7100 }, { "epoch": 17.31, "learning_rate": 4.1900671140939605e-05, "loss": 0.0165, "step": 7200 }, { "epoch": 17.55, "learning_rate": 4.1363758389261754e-05, "loss": 0.021, "step": 7300 }, { "epoch": 17.79, "learning_rate": 4.0826845637583895e-05, "loss": 0.0175, "step": 7400 }, { "epoch": 18.03, "learning_rate": 4.028993288590604e-05, "loss": 0.019, "step": 7500 }, { "epoch": 18.03, "eval_las": 91.65743816906607, "eval_loss": 0.7897760272026062, "eval_runtime": 11.7771, "eval_samples_per_second": 140.867, "eval_steps_per_second": 17.661, "eval_uas": 95.08759973876253, "step": 7500 }, { "epoch": 18.27, "learning_rate": 3.975302013422819e-05, "loss": 0.015, "step": 7600 }, { "epoch": 18.51, "learning_rate": 3.921610738255034e-05, "loss": 0.0166, "step": 7700 }, { "epoch": 18.75, "learning_rate": 3.867919463087249e-05, "loss": 0.0165, "step": 7800 }, { "epoch": 18.99, "learning_rate": 3.8142281879194636e-05, "loss": 0.0156, "step": 7900 }, { "epoch": 19.23, "learning_rate": 3.7605369127516784e-05, "loss": 0.0125, "step": 8000 }, { "epoch": 19.23, "eval_las": 91.5183008206264, "eval_loss": 0.7997450828552246, "eval_runtime": 11.8137, "eval_samples_per_second": 140.431, "eval_steps_per_second": 17.607, "eval_uas": 94.9995740693415, "step": 8000 }, { "epoch": 19.47, "learning_rate": 3.706845637583893e-05, "loss": 0.0155, "step": 8100 }, { "epoch": 19.71, "learning_rate": 3.653154362416108e-05, "loss": 0.0155, "step": 8200 }, { "epoch": 19.95, "learning_rate": 3.599463087248322e-05, "loss": 0.0142, "step": 8300 }, { "epoch": 20.19, "learning_rate": 3.545771812080537e-05, "loss": 0.0096, "step": 8400 }, { "epoch": 20.43, "learning_rate": 3.492080536912752e-05, "loss": 0.0132, "step": 8500 }, { "epoch": 20.43, "eval_las": 91.67731493312888, "eval_loss": 0.8170235753059387, "eval_runtime": 11.7652, "eval_samples_per_second": 141.009, "eval_steps_per_second": 17.679, "eval_uas": 95.12735326688815, "step": 8500 }, { "epoch": 20.67, "learning_rate": 3.438389261744967e-05, "loss": 0.0122, "step": 8600 }, { "epoch": 20.91, "learning_rate": 3.3846979865771815e-05, "loss": 0.0116, "step": 8700 }, { "epoch": 21.15, "learning_rate": 3.3310067114093964e-05, "loss": 0.0114, "step": 8800 }, { "epoch": 21.39, "learning_rate": 3.277315436241611e-05, "loss": 0.0089, "step": 8900 }, { "epoch": 21.63, "learning_rate": 3.223624161073826e-05, "loss": 0.0115, "step": 9000 }, { "epoch": 21.63, "eval_las": 91.60348695232416, "eval_loss": 0.8960289359092712, "eval_runtime": 12.1056, "eval_samples_per_second": 137.044, "eval_steps_per_second": 17.182, "eval_uas": 95.08759973876253, "step": 9000 }, { "epoch": 21.63, "step": 9000, "total_flos": 4.80612874451927e+16, "train_loss": 0.13298969575431613, "train_runtime": 5206.9637, "train_samples_per_second": 92.184, "train_steps_per_second": 2.881 } ], "max_steps": 15000, "num_train_epochs": 37, "total_flos": 4.80612874451927e+16, "trial_name": null, "trial_params": null }