|
{ |
|
"best_metric": 0.7326732673267327, |
|
"best_model_checkpoint": "distilhubert-finetuned-not-a-word2/run-14/checkpoint-48", |
|
"epoch": 2.0, |
|
"eval_steps": 500, |
|
"global_step": 96, |
|
"is_hyper_param_search": true, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 1.263960838317871, |
|
"learning_rate": 4.556569949810922e-06, |
|
"loss": 0.7021, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 1.1750551462173462, |
|
"learning_rate": 9.113139899621844e-06, |
|
"loss": 0.6857, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 1.3693294525146484, |
|
"learning_rate": 1.3669709849432766e-05, |
|
"loss": 0.6849, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 0.9440478086471558, |
|
"learning_rate": 1.822627979924369e-05, |
|
"loss": 0.6652, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 0.9551452398300171, |
|
"learning_rate": 2.2782849749054612e-05, |
|
"loss": 0.64, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 0.777751088142395, |
|
"learning_rate": 2.7339419698865533e-05, |
|
"loss": 0.6269, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 2.4816713333129883, |
|
"learning_rate": 3.1895989648676456e-05, |
|
"loss": 0.5586, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 1.696999430656433, |
|
"learning_rate": 3.645255959848738e-05, |
|
"loss": 0.6586, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 3.2336232662200928, |
|
"learning_rate": 4.10091295482983e-05, |
|
"loss": 0.6259, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_f1": 0.7326732673267327, |
|
"eval_loss": 0.7427406311035156, |
|
"eval_runtime": 1.4051, |
|
"eval_samples_per_second": 45.548, |
|
"eval_steps_per_second": 5.694, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"grad_norm": 4.544165134429932, |
|
"learning_rate": 4.354055729819326e-05, |
|
"loss": 0.7, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"grad_norm": 0.8700355887413025, |
|
"learning_rate": 4.3034271748214265e-05, |
|
"loss": 0.6972, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"grad_norm": 1.7657314538955688, |
|
"learning_rate": 4.252798619823527e-05, |
|
"loss": 0.4085, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"grad_norm": 0.6718289852142334, |
|
"learning_rate": 4.202170064825628e-05, |
|
"loss": 0.5618, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"grad_norm": 1.5080432891845703, |
|
"learning_rate": 4.151541509827729e-05, |
|
"loss": 0.4822, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"grad_norm": Infinity, |
|
"learning_rate": 4.11103866582941e-05, |
|
"loss": 0.7679, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"grad_norm": 1.8398224115371704, |
|
"learning_rate": 4.0604101108315106e-05, |
|
"loss": 0.6183, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"grad_norm": 1.7079250812530518, |
|
"learning_rate": 4.0097815558336114e-05, |
|
"loss": 0.5954, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"grad_norm": 1.8899829387664795, |
|
"learning_rate": 3.959153000835712e-05, |
|
"loss": 0.596, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"grad_norm": 3.904428243637085, |
|
"learning_rate": 3.908524445837813e-05, |
|
"loss": 0.6094, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_f1": 0.7326732673267327, |
|
"eval_loss": 0.673431396484375, |
|
"eval_runtime": 1.3605, |
|
"eval_samples_per_second": 47.041, |
|
"eval_steps_per_second": 5.88, |
|
"step": 96 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 480, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 500, |
|
"total_flos": 1442567462539200.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": { |
|
"learning_rate": 4.374307151818485e-05, |
|
"per_device_train_batch_size": 4 |
|
} |
|
} |
|
|