|
{ |
|
"best_metric": 0.8855701147554326, |
|
"best_model_checkpoint": "/scratch/mrahma45/pixel/finetuned_models/bert/bert-base-finetuned-pos-ud-Japanese-GSD/checkpoint-2000", |
|
"epoch": 20.361990950226243, |
|
"global_step": 4500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.9500000000000004e-05, |
|
"loss": 1.3362, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 4.966778523489933e-05, |
|
"loss": 0.5269, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 4.933221476510068e-05, |
|
"loss": 0.432, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 4.8996644295302016e-05, |
|
"loss": 0.4029, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 4.8661073825503355e-05, |
|
"loss": 0.3774, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"eval_accuracy": 0.8758850817937658, |
|
"eval_loss": 0.37171030044555664, |
|
"eval_runtime": 2.6026, |
|
"eval_samples_per_second": 194.803, |
|
"eval_steps_per_second": 24.591, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 4.83255033557047e-05, |
|
"loss": 0.3581, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 3.17, |
|
"learning_rate": 4.798993288590604e-05, |
|
"loss": 0.3378, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 3.62, |
|
"learning_rate": 4.765436241610739e-05, |
|
"loss": 0.3239, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 4.07, |
|
"learning_rate": 4.7318791946308726e-05, |
|
"loss": 0.3132, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 4.52, |
|
"learning_rate": 4.698322147651007e-05, |
|
"loss": 0.2882, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.52, |
|
"eval_accuracy": 0.8841865386180516, |
|
"eval_loss": 0.3594276010990143, |
|
"eval_runtime": 2.6266, |
|
"eval_samples_per_second": 193.024, |
|
"eval_steps_per_second": 24.366, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.98, |
|
"learning_rate": 4.664765100671141e-05, |
|
"loss": 0.2882, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 5.43, |
|
"learning_rate": 4.631208053691276e-05, |
|
"loss": 0.2499, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 5.88, |
|
"learning_rate": 4.5976510067114097e-05, |
|
"loss": 0.2633, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 6.33, |
|
"learning_rate": 4.564093959731544e-05, |
|
"loss": 0.2364, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 6.79, |
|
"learning_rate": 4.5305369127516775e-05, |
|
"loss": 0.2304, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 6.79, |
|
"eval_accuracy": 0.8854887279238219, |
|
"eval_loss": 0.3736213743686676, |
|
"eval_runtime": 2.6247, |
|
"eval_samples_per_second": 193.165, |
|
"eval_steps_per_second": 24.384, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 7.24, |
|
"learning_rate": 4.496979865771812e-05, |
|
"loss": 0.2144, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 7.69, |
|
"learning_rate": 4.463422818791946e-05, |
|
"loss": 0.2067, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 8.14, |
|
"learning_rate": 4.4298657718120806e-05, |
|
"loss": 0.1925, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 8.6, |
|
"learning_rate": 4.3963087248322146e-05, |
|
"loss": 0.1764, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 9.05, |
|
"learning_rate": 4.362751677852349e-05, |
|
"loss": 0.1778, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 9.05, |
|
"eval_accuracy": 0.8855701147554326, |
|
"eval_loss": 0.43164435029029846, |
|
"eval_runtime": 2.6105, |
|
"eval_samples_per_second": 194.219, |
|
"eval_steps_per_second": 24.517, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 9.5, |
|
"learning_rate": 4.329194630872484e-05, |
|
"loss": 0.1529, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 9.95, |
|
"learning_rate": 4.295637583892618e-05, |
|
"loss": 0.1583, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 10.41, |
|
"learning_rate": 4.262080536912752e-05, |
|
"loss": 0.1356, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 10.86, |
|
"learning_rate": 4.228523489932886e-05, |
|
"loss": 0.1374, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 11.31, |
|
"learning_rate": 4.194966442953021e-05, |
|
"loss": 0.1217, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 11.31, |
|
"eval_accuracy": 0.8805241311955726, |
|
"eval_loss": 0.493513286113739, |
|
"eval_runtime": 2.6161, |
|
"eval_samples_per_second": 193.798, |
|
"eval_steps_per_second": 24.464, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 11.76, |
|
"learning_rate": 4.161409395973155e-05, |
|
"loss": 0.1193, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 12.22, |
|
"learning_rate": 4.127852348993289e-05, |
|
"loss": 0.1098, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 12.67, |
|
"learning_rate": 4.0942953020134226e-05, |
|
"loss": 0.103, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 13.12, |
|
"learning_rate": 4.060738255033557e-05, |
|
"loss": 0.1012, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 13.57, |
|
"learning_rate": 4.027181208053691e-05, |
|
"loss": 0.0896, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 13.57, |
|
"eval_accuracy": 0.8823146414910068, |
|
"eval_loss": 0.5315696001052856, |
|
"eval_runtime": 2.6096, |
|
"eval_samples_per_second": 194.28, |
|
"eval_steps_per_second": 24.525, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 14.03, |
|
"learning_rate": 3.993624161073826e-05, |
|
"loss": 0.0869, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 14.48, |
|
"learning_rate": 3.96006711409396e-05, |
|
"loss": 0.0784, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 14.93, |
|
"learning_rate": 3.926510067114094e-05, |
|
"loss": 0.0783, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 15.38, |
|
"learning_rate": 3.892953020134228e-05, |
|
"loss": 0.0691, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 15.84, |
|
"learning_rate": 3.859395973154363e-05, |
|
"loss": 0.0685, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 15.84, |
|
"eval_accuracy": 0.8823960283226174, |
|
"eval_loss": 0.5921140313148499, |
|
"eval_runtime": 2.6221, |
|
"eval_samples_per_second": 193.357, |
|
"eval_steps_per_second": 24.408, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 16.29, |
|
"learning_rate": 3.825838926174497e-05, |
|
"loss": 0.0646, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 16.74, |
|
"learning_rate": 3.7922818791946313e-05, |
|
"loss": 0.0592, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 17.19, |
|
"learning_rate": 3.758724832214765e-05, |
|
"loss": 0.0593, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 17.65, |
|
"learning_rate": 3.725167785234899e-05, |
|
"loss": 0.0546, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 18.1, |
|
"learning_rate": 3.691610738255034e-05, |
|
"loss": 0.0546, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 18.1, |
|
"eval_accuracy": 0.88272157564906, |
|
"eval_loss": 0.6464724540710449, |
|
"eval_runtime": 2.62, |
|
"eval_samples_per_second": 193.511, |
|
"eval_steps_per_second": 24.427, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 18.55, |
|
"learning_rate": 3.658053691275168e-05, |
|
"loss": 0.0494, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"learning_rate": 3.624496644295302e-05, |
|
"loss": 0.0519, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 19.46, |
|
"learning_rate": 3.590939597315436e-05, |
|
"loss": 0.0422, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 19.91, |
|
"learning_rate": 3.557382550335571e-05, |
|
"loss": 0.0445, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 20.36, |
|
"learning_rate": 3.523825503355705e-05, |
|
"loss": 0.0395, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 20.36, |
|
"eval_accuracy": 0.8812566126800684, |
|
"eval_loss": 0.6766866445541382, |
|
"eval_runtime": 2.6147, |
|
"eval_samples_per_second": 193.903, |
|
"eval_steps_per_second": 24.477, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 20.36, |
|
"step": 4500, |
|
"total_flos": 1.875842476013568e+16, |
|
"train_loss": 0.20138724523120458, |
|
"train_runtime": 1165.6408, |
|
"train_samples_per_second": 411.791, |
|
"train_steps_per_second": 12.868 |
|
} |
|
], |
|
"max_steps": 15000, |
|
"num_train_epochs": 68, |
|
"total_flos": 1.875842476013568e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|