|
{ |
|
"best_metric": 0.7326732673267327, |
|
"best_model_checkpoint": "distilhubert-finetuned-not-a-word2/run-8/checkpoint-24", |
|
"epoch": 6.0, |
|
"eval_steps": 500, |
|
"global_step": 144, |
|
"is_hyper_param_search": true, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 1.6496813297271729, |
|
"learning_rate": 2.9441483262927863e-06, |
|
"loss": 0.6992, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 0.620004415512085, |
|
"learning_rate": 5.888296652585573e-06, |
|
"loss": 0.6939, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 0.5726878046989441, |
|
"learning_rate": 8.832444978878358e-06, |
|
"loss": 0.6835, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 1.4239176511764526, |
|
"learning_rate": 1.1776593305171145e-05, |
|
"loss": 0.6673, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_f1": 0.7326732673267327, |
|
"eval_loss": 0.6805419921875, |
|
"eval_runtime": 1.3716, |
|
"eval_samples_per_second": 46.661, |
|
"eval_steps_per_second": 5.833, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"grad_norm": 0.9195191264152527, |
|
"learning_rate": 1.406648644784331e-05, |
|
"loss": 0.6673, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"grad_norm": 1.8524231910705566, |
|
"learning_rate": 1.3739358856033002e-05, |
|
"loss": 0.6155, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"grad_norm": 1.8213531970977783, |
|
"learning_rate": 1.3412231264222692e-05, |
|
"loss": 0.5895, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"grad_norm": 0.4818130433559418, |
|
"learning_rate": 1.3085103672412383e-05, |
|
"loss": 0.6468, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"grad_norm": 0.6597484946250916, |
|
"learning_rate": 1.2757976080602073e-05, |
|
"loss": 0.6173, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_f1": 0.7326732673267327, |
|
"eval_loss": 0.6970634460449219, |
|
"eval_runtime": 1.3632, |
|
"eval_samples_per_second": 46.949, |
|
"eval_steps_per_second": 5.869, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"grad_norm": 0.48903289437294006, |
|
"learning_rate": 1.2430848488791764e-05, |
|
"loss": 0.6302, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"grad_norm": 0.6064260601997375, |
|
"learning_rate": 1.2103720896981454e-05, |
|
"loss": 0.5867, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"grad_norm": 0.6802453398704529, |
|
"learning_rate": 1.1776593305171145e-05, |
|
"loss": 0.6321, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"grad_norm": 1.2592875957489014, |
|
"learning_rate": 1.1449465713360835e-05, |
|
"loss": 0.6223, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"grad_norm": 1.1591824293136597, |
|
"learning_rate": 1.1122338121550526e-05, |
|
"loss": 0.4922, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_f1": 0.7326732673267327, |
|
"eval_loss": 0.7079887390136719, |
|
"eval_runtime": 1.3669, |
|
"eval_samples_per_second": 46.821, |
|
"eval_steps_per_second": 5.853, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"grad_norm": 0.5332023501396179, |
|
"learning_rate": 1.0795210529740214e-05, |
|
"loss": 0.5989, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"grad_norm": 0.5555600523948669, |
|
"learning_rate": 1.0468082937929906e-05, |
|
"loss": 0.6104, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 3.54, |
|
"grad_norm": 1.2928024530410767, |
|
"learning_rate": 1.0140955346119596e-05, |
|
"loss": 0.4936, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 3.75, |
|
"grad_norm": 1.1424989700317383, |
|
"learning_rate": 9.813827754309287e-06, |
|
"loss": 0.6191, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 3.96, |
|
"grad_norm": 1.119732141494751, |
|
"learning_rate": 9.486700162498977e-06, |
|
"loss": 0.6004, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_f1": 0.7326732673267327, |
|
"eval_loss": 0.7053489685058594, |
|
"eval_runtime": 1.3556, |
|
"eval_samples_per_second": 47.211, |
|
"eval_steps_per_second": 5.901, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 4.17, |
|
"grad_norm": 0.8135461211204529, |
|
"learning_rate": 9.159572570688668e-06, |
|
"loss": 0.5154, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 4.38, |
|
"grad_norm": 1.8034342527389526, |
|
"learning_rate": 8.832444978878358e-06, |
|
"loss": 0.6067, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 4.58, |
|
"grad_norm": 0.8029685020446777, |
|
"learning_rate": 8.505317387068049e-06, |
|
"loss": 0.5499, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 4.79, |
|
"grad_norm": 1.019626259803772, |
|
"learning_rate": 8.178189795257739e-06, |
|
"loss": 0.5542, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 1.861674427986145, |
|
"learning_rate": 7.85106220344743e-06, |
|
"loss": 0.5545, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_f1": 0.7326732673267327, |
|
"eval_loss": 0.6832504272460938, |
|
"eval_runtime": 1.3811, |
|
"eval_samples_per_second": 46.341, |
|
"eval_steps_per_second": 5.793, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 5.21, |
|
"grad_norm": 1.5949212312698364, |
|
"learning_rate": 7.523934611637121e-06, |
|
"loss": 0.4806, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 5.42, |
|
"grad_norm": 3.002861738204956, |
|
"learning_rate": 7.196807019826811e-06, |
|
"loss": 0.5832, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 5.62, |
|
"grad_norm": 1.4606820344924927, |
|
"learning_rate": 6.9351049463785626e-06, |
|
"loss": 0.5481, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 5.83, |
|
"grad_norm": 1.6088628768920898, |
|
"learning_rate": 6.607977354568253e-06, |
|
"loss": 0.5333, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_f1": 0.7326732673267327, |
|
"eval_loss": 0.6599597930908203, |
|
"eval_runtime": 1.3747, |
|
"eval_samples_per_second": 46.555, |
|
"eval_steps_per_second": 5.819, |
|
"step": 144 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 240, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 500, |
|
"total_flos": 4911381340990080.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": { |
|
"learning_rate": 1.4131911966205373e-05, |
|
"per_device_train_batch_size": 8 |
|
} |
|
} |
|
|