{ "best_metric": 0.43151227236737927, "best_model_checkpoint": "/scratch/mrahma45/pixel/finetuned_models/bert/bert-base-finetuned-pos-ud-Tamil-TTB/checkpoint-500", "epoch": 230.76923076923077, "global_step": 3000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 7.69, "learning_rate": 4.9500000000000004e-05, "loss": 1.8535, "step": 100 }, { "epoch": 15.38, "learning_rate": 4.966778523489933e-05, "loss": 1.3671, "step": 200 }, { "epoch": 23.08, "learning_rate": 4.933221476510068e-05, "loss": 1.0908, "step": 300 }, { "epoch": 30.77, "learning_rate": 4.8996644295302016e-05, "loss": 0.8496, "step": 400 }, { "epoch": 38.46, "learning_rate": 4.8661073825503355e-05, "loss": 0.6718, "step": 500 }, { "epoch": 38.46, "eval_accuracy": 0.43151227236737927, "eval_loss": 2.5977065563201904, "eval_runtime": 0.4117, "eval_samples_per_second": 194.297, "eval_steps_per_second": 24.287, "step": 500 }, { "epoch": 46.15, "learning_rate": 4.83255033557047e-05, "loss": 0.5325, "step": 600 }, { "epoch": 53.85, "learning_rate": 4.798993288590604e-05, "loss": 0.433, "step": 700 }, { "epoch": 61.54, "learning_rate": 4.765436241610739e-05, "loss": 0.3592, "step": 800 }, { "epoch": 69.23, "learning_rate": 4.7318791946308726e-05, "loss": 0.3163, "step": 900 }, { "epoch": 76.92, "learning_rate": 4.698322147651007e-05, "loss": 0.2805, "step": 1000 }, { "epoch": 76.92, "eval_accuracy": 0.4014251781472684, "eval_loss": 3.6307425498962402, "eval_runtime": 0.4118, "eval_samples_per_second": 194.246, "eval_steps_per_second": 24.281, "step": 1000 }, { "epoch": 84.62, "learning_rate": 4.664765100671141e-05, "loss": 0.2562, "step": 1100 }, { "epoch": 92.31, "learning_rate": 4.631208053691276e-05, "loss": 0.2323, "step": 1200 }, { "epoch": 100.0, "learning_rate": 4.5976510067114097e-05, "loss": 0.2213, "step": 1300 }, { "epoch": 107.69, "learning_rate": 4.564093959731544e-05, "loss": 0.2116, "step": 1400 }, { "epoch": 115.38, "learning_rate": 4.5305369127516775e-05, "loss": 0.2013, "step": 1500 }, { "epoch": 115.38, "eval_accuracy": 0.38717339667458434, "eval_loss": 4.366728782653809, "eval_runtime": 0.4129, "eval_samples_per_second": 193.772, "eval_steps_per_second": 24.222, "step": 1500 }, { "epoch": 123.08, "learning_rate": 4.496979865771812e-05, "loss": 0.1972, "step": 1600 }, { "epoch": 130.77, "learning_rate": 4.463422818791946e-05, "loss": 0.1935, "step": 1700 }, { "epoch": 138.46, "learning_rate": 4.4298657718120806e-05, "loss": 0.1867, "step": 1800 }, { "epoch": 146.15, "learning_rate": 4.3963087248322146e-05, "loss": 0.183, "step": 1900 }, { "epoch": 153.85, "learning_rate": 4.362751677852349e-05, "loss": 0.1832, "step": 2000 }, { "epoch": 153.85, "eval_accuracy": 0.38796516231195566, "eval_loss": 4.474318504333496, "eval_runtime": 0.4157, "eval_samples_per_second": 192.457, "eval_steps_per_second": 24.057, "step": 2000 }, { "epoch": 161.54, "learning_rate": 4.329194630872484e-05, "loss": 0.1777, "step": 2100 }, { "epoch": 169.23, "learning_rate": 4.295637583892618e-05, "loss": 0.1753, "step": 2200 }, { "epoch": 176.92, "learning_rate": 4.262080536912752e-05, "loss": 0.1754, "step": 2300 }, { "epoch": 184.62, "learning_rate": 4.228523489932886e-05, "loss": 0.1717, "step": 2400 }, { "epoch": 192.31, "learning_rate": 4.194966442953021e-05, "loss": 0.1687, "step": 2500 }, { "epoch": 192.31, "eval_accuracy": 0.3784639746634996, "eval_loss": 4.800501346588135, "eval_runtime": 0.4165, "eval_samples_per_second": 192.1, "eval_steps_per_second": 24.012, "step": 2500 }, { "epoch": 200.0, "learning_rate": 4.161409395973155e-05, "loss": 0.1673, "step": 2600 }, { "epoch": 207.69, "learning_rate": 4.127852348993289e-05, "loss": 0.1673, "step": 2700 }, { "epoch": 215.38, "learning_rate": 4.0942953020134226e-05, "loss": 0.1659, "step": 2800 }, { "epoch": 223.08, "learning_rate": 4.060738255033557e-05, "loss": 0.1675, "step": 2900 }, { "epoch": 230.77, "learning_rate": 4.027181208053691e-05, "loss": 0.1634, "step": 3000 }, { "epoch": 230.77, "eval_accuracy": 0.38954869358669836, "eval_loss": 4.857724666595459, "eval_runtime": 0.4142, "eval_samples_per_second": 193.136, "eval_steps_per_second": 24.142, "step": 3000 }, { "epoch": 230.77, "step": 3000, "total_flos": 1.206309399453696e+16, "train_loss": 0.38402640787760417, "train_runtime": 743.2887, "train_samples_per_second": 645.779, "train_steps_per_second": 20.181 } ], "max_steps": 15000, "num_train_epochs": 1154, "total_flos": 1.206309399453696e+16, "trial_name": null, "trial_params": null }