|
{ |
|
"best_metric": 0.43151227236737927, |
|
"best_model_checkpoint": "/scratch/mrahma45/pixel/finetuned_models/bert/bert-base-finetuned-pos-ud-Tamil-TTB/checkpoint-500", |
|
"epoch": 230.76923076923077, |
|
"global_step": 3000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 7.69, |
|
"learning_rate": 4.9500000000000004e-05, |
|
"loss": 1.8535, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 15.38, |
|
"learning_rate": 4.966778523489933e-05, |
|
"loss": 1.3671, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 23.08, |
|
"learning_rate": 4.933221476510068e-05, |
|
"loss": 1.0908, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 30.77, |
|
"learning_rate": 4.8996644295302016e-05, |
|
"loss": 0.8496, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 38.46, |
|
"learning_rate": 4.8661073825503355e-05, |
|
"loss": 0.6718, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 38.46, |
|
"eval_accuracy": 0.43151227236737927, |
|
"eval_loss": 2.5977065563201904, |
|
"eval_runtime": 0.4117, |
|
"eval_samples_per_second": 194.297, |
|
"eval_steps_per_second": 24.287, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 46.15, |
|
"learning_rate": 4.83255033557047e-05, |
|
"loss": 0.5325, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 53.85, |
|
"learning_rate": 4.798993288590604e-05, |
|
"loss": 0.433, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 61.54, |
|
"learning_rate": 4.765436241610739e-05, |
|
"loss": 0.3592, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 69.23, |
|
"learning_rate": 4.7318791946308726e-05, |
|
"loss": 0.3163, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 76.92, |
|
"learning_rate": 4.698322147651007e-05, |
|
"loss": 0.2805, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 76.92, |
|
"eval_accuracy": 0.4014251781472684, |
|
"eval_loss": 3.6307425498962402, |
|
"eval_runtime": 0.4118, |
|
"eval_samples_per_second": 194.246, |
|
"eval_steps_per_second": 24.281, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 84.62, |
|
"learning_rate": 4.664765100671141e-05, |
|
"loss": 0.2562, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 92.31, |
|
"learning_rate": 4.631208053691276e-05, |
|
"loss": 0.2323, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"learning_rate": 4.5976510067114097e-05, |
|
"loss": 0.2213, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 107.69, |
|
"learning_rate": 4.564093959731544e-05, |
|
"loss": 0.2116, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 115.38, |
|
"learning_rate": 4.5305369127516775e-05, |
|
"loss": 0.2013, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 115.38, |
|
"eval_accuracy": 0.38717339667458434, |
|
"eval_loss": 4.366728782653809, |
|
"eval_runtime": 0.4129, |
|
"eval_samples_per_second": 193.772, |
|
"eval_steps_per_second": 24.222, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 123.08, |
|
"learning_rate": 4.496979865771812e-05, |
|
"loss": 0.1972, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 130.77, |
|
"learning_rate": 4.463422818791946e-05, |
|
"loss": 0.1935, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 138.46, |
|
"learning_rate": 4.4298657718120806e-05, |
|
"loss": 0.1867, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 146.15, |
|
"learning_rate": 4.3963087248322146e-05, |
|
"loss": 0.183, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 153.85, |
|
"learning_rate": 4.362751677852349e-05, |
|
"loss": 0.1832, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 153.85, |
|
"eval_accuracy": 0.38796516231195566, |
|
"eval_loss": 4.474318504333496, |
|
"eval_runtime": 0.4157, |
|
"eval_samples_per_second": 192.457, |
|
"eval_steps_per_second": 24.057, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 161.54, |
|
"learning_rate": 4.329194630872484e-05, |
|
"loss": 0.1777, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 169.23, |
|
"learning_rate": 4.295637583892618e-05, |
|
"loss": 0.1753, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 176.92, |
|
"learning_rate": 4.262080536912752e-05, |
|
"loss": 0.1754, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 184.62, |
|
"learning_rate": 4.228523489932886e-05, |
|
"loss": 0.1717, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 192.31, |
|
"learning_rate": 4.194966442953021e-05, |
|
"loss": 0.1687, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 192.31, |
|
"eval_accuracy": 0.3784639746634996, |
|
"eval_loss": 4.800501346588135, |
|
"eval_runtime": 0.4165, |
|
"eval_samples_per_second": 192.1, |
|
"eval_steps_per_second": 24.012, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 200.0, |
|
"learning_rate": 4.161409395973155e-05, |
|
"loss": 0.1673, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 207.69, |
|
"learning_rate": 4.127852348993289e-05, |
|
"loss": 0.1673, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 215.38, |
|
"learning_rate": 4.0942953020134226e-05, |
|
"loss": 0.1659, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 223.08, |
|
"learning_rate": 4.060738255033557e-05, |
|
"loss": 0.1675, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 230.77, |
|
"learning_rate": 4.027181208053691e-05, |
|
"loss": 0.1634, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 230.77, |
|
"eval_accuracy": 0.38954869358669836, |
|
"eval_loss": 4.857724666595459, |
|
"eval_runtime": 0.4142, |
|
"eval_samples_per_second": 193.136, |
|
"eval_steps_per_second": 24.142, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 230.77, |
|
"step": 3000, |
|
"total_flos": 1.206309399453696e+16, |
|
"train_loss": 0.38402640787760417, |
|
"train_runtime": 743.2887, |
|
"train_samples_per_second": 645.779, |
|
"train_steps_per_second": 20.181 |
|
} |
|
], |
|
"max_steps": 15000, |
|
"num_train_epochs": 1154, |
|
"total_flos": 1.206309399453696e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|