|
{ |
|
"best_metric": 0.7659574468085106, |
|
"best_model_checkpoint": "distilhubert-finetuned-not-a-word2/run-2/checkpoint-112", |
|
"epoch": 7.0, |
|
"eval_steps": 500, |
|
"global_step": 112, |
|
"is_hyper_param_search": true, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 0.8302626013755798, |
|
"learning_rate": 9.517539799539297e-06, |
|
"loss": 0.6966, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 0.6829280257225037, |
|
"learning_rate": 1.9035079599078593e-05, |
|
"loss": 0.6813, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 0.5496665835380554, |
|
"learning_rate": 2.8552619398617893e-05, |
|
"loss": 0.6451, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_f1": 0.7326732673267327, |
|
"eval_loss": 0.6860618591308594, |
|
"eval_runtime": 1.3401, |
|
"eval_samples_per_second": 47.759, |
|
"eval_steps_per_second": 5.97, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"grad_norm": 1.3569624423980713, |
|
"learning_rate": 2.9610123820788925e-05, |
|
"loss": 0.6113, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"grad_norm": 0.3838456869125366, |
|
"learning_rate": 2.8552619398617893e-05, |
|
"loss": 0.5997, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"grad_norm": 0.46842828392982483, |
|
"learning_rate": 2.7495114976446858e-05, |
|
"loss": 0.6133, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_f1": 0.7326732673267327, |
|
"eval_loss": 0.719573974609375, |
|
"eval_runtime": 1.3426, |
|
"eval_samples_per_second": 47.668, |
|
"eval_steps_per_second": 5.958, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"grad_norm": 0.7610273957252502, |
|
"learning_rate": 2.6437610554275826e-05, |
|
"loss": 0.5971, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"grad_norm": 0.5527757406234741, |
|
"learning_rate": 2.5380106132104794e-05, |
|
"loss": 0.6373, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"grad_norm": 0.9699375033378601, |
|
"learning_rate": 2.4322601709933762e-05, |
|
"loss": 0.5701, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_f1": 0.7326732673267327, |
|
"eval_loss": 0.6987495422363281, |
|
"eval_runtime": 1.344, |
|
"eval_samples_per_second": 47.619, |
|
"eval_steps_per_second": 5.952, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"grad_norm": 0.5822675824165344, |
|
"learning_rate": 2.3265097287762727e-05, |
|
"loss": 0.5492, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 3.44, |
|
"grad_norm": 0.9859365224838257, |
|
"learning_rate": 2.2207592865591692e-05, |
|
"loss": 0.5552, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 3.75, |
|
"grad_norm": 0.7946156859397888, |
|
"learning_rate": 2.115008844342066e-05, |
|
"loss": 0.5459, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_f1": 0.7326732673267327, |
|
"eval_loss": 0.6806888580322266, |
|
"eval_runtime": 1.3463, |
|
"eval_samples_per_second": 47.538, |
|
"eval_steps_per_second": 5.942, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 4.06, |
|
"grad_norm": 1.2706314325332642, |
|
"learning_rate": 2.009258402124963e-05, |
|
"loss": 0.5792, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 4.38, |
|
"grad_norm": 1.456335186958313, |
|
"learning_rate": 1.9035079599078593e-05, |
|
"loss": 0.4967, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 4.69, |
|
"grad_norm": 1.6856915950775146, |
|
"learning_rate": 1.797757517690756e-05, |
|
"loss": 0.4966, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 4.080612659454346, |
|
"learning_rate": 1.692007075473653e-05, |
|
"loss": 0.4589, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_f1": 0.7474747474747475, |
|
"eval_loss": 0.6495094299316406, |
|
"eval_runtime": 1.348, |
|
"eval_samples_per_second": 47.477, |
|
"eval_steps_per_second": 5.935, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 5.31, |
|
"grad_norm": 2.8024661540985107, |
|
"learning_rate": 1.5862566332565498e-05, |
|
"loss": 0.4312, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 5.62, |
|
"grad_norm": 3.024564027786255, |
|
"learning_rate": 1.4805061910394462e-05, |
|
"loss": 0.4318, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 5.94, |
|
"grad_norm": 3.236055374145508, |
|
"learning_rate": 1.3747557488223429e-05, |
|
"loss": 0.3968, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_f1": 0.7391304347826088, |
|
"eval_loss": 0.6395978927612305, |
|
"eval_runtime": 1.3605, |
|
"eval_samples_per_second": 47.043, |
|
"eval_steps_per_second": 5.88, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 6.25, |
|
"grad_norm": 2.6648523807525635, |
|
"learning_rate": 1.2690053066052397e-05, |
|
"loss": 0.3623, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 6.56, |
|
"grad_norm": 2.6884357929229736, |
|
"learning_rate": 1.1632548643881364e-05, |
|
"loss": 0.4061, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 6.88, |
|
"grad_norm": 3.5860610008239746, |
|
"learning_rate": 1.057504422171033e-05, |
|
"loss": 0.3356, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_f1": 0.7659574468085106, |
|
"eval_loss": 0.7118406295776367, |
|
"eval_runtime": 1.3559, |
|
"eval_samples_per_second": 47.202, |
|
"eval_steps_per_second": 5.9, |
|
"step": 112 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 160, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 500, |
|
"total_flos": 6207811312717728.0, |
|
"train_batch_size": 12, |
|
"trial_name": null, |
|
"trial_params": { |
|
"learning_rate": 3.045612735852575e-05, |
|
"per_device_train_batch_size": 12 |
|
} |
|
} |
|
|