|
{ |
|
"best_metric": 0.74, |
|
"best_model_checkpoint": "distilhubert-finetuned-not-a-word2/run-8/checkpoint-168", |
|
"epoch": 9.0, |
|
"eval_steps": 500, |
|
"global_step": 216, |
|
"is_hyper_param_search": true, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 1.6496813297271729, |
|
"learning_rate": 2.9441483262927863e-06, |
|
"loss": 0.6992, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 0.620004415512085, |
|
"learning_rate": 5.888296652585573e-06, |
|
"loss": 0.6939, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 0.5726878046989441, |
|
"learning_rate": 8.832444978878358e-06, |
|
"loss": 0.6835, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 1.4239176511764526, |
|
"learning_rate": 1.1776593305171145e-05, |
|
"loss": 0.6673, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_f1": 0.7326732673267327, |
|
"eval_loss": 0.6805419921875, |
|
"eval_runtime": 1.3716, |
|
"eval_samples_per_second": 46.661, |
|
"eval_steps_per_second": 5.833, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"grad_norm": 0.9195191264152527, |
|
"learning_rate": 1.406648644784331e-05, |
|
"loss": 0.6673, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"grad_norm": 1.8524231910705566, |
|
"learning_rate": 1.3739358856033002e-05, |
|
"loss": 0.6155, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"grad_norm": 1.8213531970977783, |
|
"learning_rate": 1.3412231264222692e-05, |
|
"loss": 0.5895, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"grad_norm": 0.4818130433559418, |
|
"learning_rate": 1.3085103672412383e-05, |
|
"loss": 0.6468, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"grad_norm": 0.6597484946250916, |
|
"learning_rate": 1.2757976080602073e-05, |
|
"loss": 0.6173, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_f1": 0.7326732673267327, |
|
"eval_loss": 0.6970634460449219, |
|
"eval_runtime": 1.3632, |
|
"eval_samples_per_second": 46.949, |
|
"eval_steps_per_second": 5.869, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"grad_norm": 0.48903289437294006, |
|
"learning_rate": 1.2430848488791764e-05, |
|
"loss": 0.6302, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"grad_norm": 0.6064260601997375, |
|
"learning_rate": 1.2103720896981454e-05, |
|
"loss": 0.5867, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"grad_norm": 0.6802453398704529, |
|
"learning_rate": 1.1776593305171145e-05, |
|
"loss": 0.6321, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"grad_norm": 1.2592875957489014, |
|
"learning_rate": 1.1449465713360835e-05, |
|
"loss": 0.6223, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"grad_norm": 1.1591824293136597, |
|
"learning_rate": 1.1122338121550526e-05, |
|
"loss": 0.4922, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_f1": 0.7326732673267327, |
|
"eval_loss": 0.7079887390136719, |
|
"eval_runtime": 1.3669, |
|
"eval_samples_per_second": 46.821, |
|
"eval_steps_per_second": 5.853, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"grad_norm": 0.5332023501396179, |
|
"learning_rate": 1.0795210529740214e-05, |
|
"loss": 0.5989, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"grad_norm": 0.5555600523948669, |
|
"learning_rate": 1.0468082937929906e-05, |
|
"loss": 0.6104, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 3.54, |
|
"grad_norm": 1.2928024530410767, |
|
"learning_rate": 1.0140955346119596e-05, |
|
"loss": 0.4936, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 3.75, |
|
"grad_norm": 1.1424989700317383, |
|
"learning_rate": 9.813827754309287e-06, |
|
"loss": 0.6191, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 3.96, |
|
"grad_norm": 1.119732141494751, |
|
"learning_rate": 9.486700162498977e-06, |
|
"loss": 0.6004, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_f1": 0.7326732673267327, |
|
"eval_loss": 0.7053489685058594, |
|
"eval_runtime": 1.3556, |
|
"eval_samples_per_second": 47.211, |
|
"eval_steps_per_second": 5.901, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 4.17, |
|
"grad_norm": 0.8135461211204529, |
|
"learning_rate": 9.159572570688668e-06, |
|
"loss": 0.5154, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 4.38, |
|
"grad_norm": 1.8034342527389526, |
|
"learning_rate": 8.832444978878358e-06, |
|
"loss": 0.6067, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 4.58, |
|
"grad_norm": 0.8029685020446777, |
|
"learning_rate": 8.505317387068049e-06, |
|
"loss": 0.5499, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 4.79, |
|
"grad_norm": 1.019626259803772, |
|
"learning_rate": 8.178189795257739e-06, |
|
"loss": 0.5542, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 1.861674427986145, |
|
"learning_rate": 7.85106220344743e-06, |
|
"loss": 0.5545, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_f1": 0.7326732673267327, |
|
"eval_loss": 0.6832504272460938, |
|
"eval_runtime": 1.3811, |
|
"eval_samples_per_second": 46.341, |
|
"eval_steps_per_second": 5.793, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 5.21, |
|
"grad_norm": 1.5949212312698364, |
|
"learning_rate": 7.523934611637121e-06, |
|
"loss": 0.4806, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 5.42, |
|
"grad_norm": 3.002861738204956, |
|
"learning_rate": 7.196807019826811e-06, |
|
"loss": 0.5832, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 5.62, |
|
"grad_norm": 1.4606820344924927, |
|
"learning_rate": 6.9351049463785626e-06, |
|
"loss": 0.5481, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 5.83, |
|
"grad_norm": 1.6088628768920898, |
|
"learning_rate": 6.607977354568253e-06, |
|
"loss": 0.5333, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_f1": 0.7326732673267327, |
|
"eval_loss": 0.6599597930908203, |
|
"eval_runtime": 1.3747, |
|
"eval_samples_per_second": 46.555, |
|
"eval_steps_per_second": 5.819, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 6.04, |
|
"grad_norm": 2.2051286697387695, |
|
"learning_rate": 6.280849762757943e-06, |
|
"loss": 0.4805, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 6.25, |
|
"grad_norm": 1.6964988708496094, |
|
"learning_rate": 5.953722170947633e-06, |
|
"loss": 0.4573, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 6.46, |
|
"grad_norm": 2.1374056339263916, |
|
"learning_rate": 5.626594579137324e-06, |
|
"loss": 0.59, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 6.67, |
|
"grad_norm": 1.8037084341049194, |
|
"learning_rate": 5.299466987327015e-06, |
|
"loss": 0.494, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 6.88, |
|
"grad_norm": 1.7916295528411865, |
|
"learning_rate": 4.972339395516706e-06, |
|
"loss": 0.4997, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_f1": 0.74, |
|
"eval_loss": 0.6653976440429688, |
|
"eval_runtime": 1.3593, |
|
"eval_samples_per_second": 47.083, |
|
"eval_steps_per_second": 5.885, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 7.08, |
|
"grad_norm": 2.2235190868377686, |
|
"learning_rate": 4.645211803706396e-06, |
|
"loss": 0.3887, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 7.29, |
|
"grad_norm": 1.239268183708191, |
|
"learning_rate": 4.318084211896087e-06, |
|
"loss": 0.4519, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 7.5, |
|
"grad_norm": 1.8677798509597778, |
|
"learning_rate": 3.990956620085777e-06, |
|
"loss": 0.5146, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 7.71, |
|
"grad_norm": 3.8495407104492188, |
|
"learning_rate": 3.6638290282754668e-06, |
|
"loss": 0.4237, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 7.92, |
|
"grad_norm": 1.8828785419464111, |
|
"learning_rate": 3.3367014364651573e-06, |
|
"loss": 0.5033, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_f1": 0.74, |
|
"eval_loss": 0.6842975616455078, |
|
"eval_runtime": 1.3685, |
|
"eval_samples_per_second": 46.767, |
|
"eval_steps_per_second": 5.846, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 8.12, |
|
"grad_norm": 4.3374714851379395, |
|
"learning_rate": 3.009573844654848e-06, |
|
"loss": 0.4588, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 8.33, |
|
"grad_norm": 2.9799509048461914, |
|
"learning_rate": 2.6824462528445384e-06, |
|
"loss": 0.3783, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 8.54, |
|
"grad_norm": 3.3768601417541504, |
|
"learning_rate": 2.3553186610342286e-06, |
|
"loss": 0.4366, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 8.75, |
|
"grad_norm": 2.2495288848876953, |
|
"learning_rate": 2.028191069223919e-06, |
|
"loss": 0.4545, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 8.96, |
|
"grad_norm": 2.078002691268921, |
|
"learning_rate": 1.7010634774136097e-06, |
|
"loss": 0.6012, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_f1": 0.74, |
|
"eval_loss": 0.6836881637573242, |
|
"eval_runtime": 1.3545, |
|
"eval_samples_per_second": 47.25, |
|
"eval_steps_per_second": 5.906, |
|
"step": 216 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 240, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 500, |
|
"total_flos": 7545814381042464.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": { |
|
"learning_rate": 1.4131911966205373e-05, |
|
"per_device_train_batch_size": 8 |
|
} |
|
} |
|
|