|
{ |
|
"best_metric": 0.7474747474747475, |
|
"best_model_checkpoint": "distilhubert-finetuned-not-a-word2/run-2/checkpoint-80", |
|
"epoch": 5.0, |
|
"eval_steps": 500, |
|
"global_step": 80, |
|
"is_hyper_param_search": true, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 0.8302626013755798, |
|
"learning_rate": 9.517539799539297e-06, |
|
"loss": 0.6966, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 0.6829280257225037, |
|
"learning_rate": 1.9035079599078593e-05, |
|
"loss": 0.6813, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 0.5496665835380554, |
|
"learning_rate": 2.8552619398617893e-05, |
|
"loss": 0.6451, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_f1": 0.7326732673267327, |
|
"eval_loss": 0.6860618591308594, |
|
"eval_runtime": 1.3401, |
|
"eval_samples_per_second": 47.759, |
|
"eval_steps_per_second": 5.97, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"grad_norm": 1.3569624423980713, |
|
"learning_rate": 2.9610123820788925e-05, |
|
"loss": 0.6113, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"grad_norm": 0.3838456869125366, |
|
"learning_rate": 2.8552619398617893e-05, |
|
"loss": 0.5997, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"grad_norm": 0.46842828392982483, |
|
"learning_rate": 2.7495114976446858e-05, |
|
"loss": 0.6133, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_f1": 0.7326732673267327, |
|
"eval_loss": 0.719573974609375, |
|
"eval_runtime": 1.3426, |
|
"eval_samples_per_second": 47.668, |
|
"eval_steps_per_second": 5.958, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"grad_norm": 0.7610273957252502, |
|
"learning_rate": 2.6437610554275826e-05, |
|
"loss": 0.5971, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"grad_norm": 0.5527757406234741, |
|
"learning_rate": 2.5380106132104794e-05, |
|
"loss": 0.6373, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"grad_norm": 0.9699375033378601, |
|
"learning_rate": 2.4322601709933762e-05, |
|
"loss": 0.5701, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_f1": 0.7326732673267327, |
|
"eval_loss": 0.6987495422363281, |
|
"eval_runtime": 1.344, |
|
"eval_samples_per_second": 47.619, |
|
"eval_steps_per_second": 5.952, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"grad_norm": 0.5822675824165344, |
|
"learning_rate": 2.3265097287762727e-05, |
|
"loss": 0.5492, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 3.44, |
|
"grad_norm": 0.9859365224838257, |
|
"learning_rate": 2.2207592865591692e-05, |
|
"loss": 0.5552, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 3.75, |
|
"grad_norm": 0.7946156859397888, |
|
"learning_rate": 2.115008844342066e-05, |
|
"loss": 0.5459, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_f1": 0.7326732673267327, |
|
"eval_loss": 0.6806888580322266, |
|
"eval_runtime": 1.3463, |
|
"eval_samples_per_second": 47.538, |
|
"eval_steps_per_second": 5.942, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 4.06, |
|
"grad_norm": 1.2706314325332642, |
|
"learning_rate": 2.009258402124963e-05, |
|
"loss": 0.5792, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 4.38, |
|
"grad_norm": 1.456335186958313, |
|
"learning_rate": 1.9035079599078593e-05, |
|
"loss": 0.4967, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 4.69, |
|
"grad_norm": 1.6856915950775146, |
|
"learning_rate": 1.797757517690756e-05, |
|
"loss": 0.4966, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 4.080612659454346, |
|
"learning_rate": 1.692007075473653e-05, |
|
"loss": 0.4589, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_f1": 0.7474747474747475, |
|
"eval_loss": 0.6495094299316406, |
|
"eval_runtime": 1.348, |
|
"eval_samples_per_second": 47.477, |
|
"eval_steps_per_second": 5.935, |
|
"step": 80 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 160, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 500, |
|
"total_flos": 4530217300158096.0, |
|
"train_batch_size": 12, |
|
"trial_name": null, |
|
"trial_params": { |
|
"learning_rate": 3.045612735852575e-05, |
|
"per_device_train_batch_size": 12 |
|
} |
|
} |
|
|