|
{ |
|
"best_metric": 0.19506023824214935, |
|
"best_model_checkpoint": "bioformer16L_caption_RTX6000/checkpoint-1230", |
|
"epoch": 4.0, |
|
"eval_steps": 500, |
|
"global_step": 4920, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.4065040650406504, |
|
"grad_norm": 4.495114803314209, |
|
"learning_rate": 2.75609756097561e-05, |
|
"loss": 0.256, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.8130081300813008, |
|
"grad_norm": 2.5886974334716797, |
|
"learning_rate": 2.5121951219512197e-05, |
|
"loss": 0.1859, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.9271035681423168, |
|
"eval_f1": 0.9220692013036642, |
|
"eval_f1_medical": 0.9418765794407761, |
|
"eval_f1_non_medical": 0.9022618231665525, |
|
"eval_loss": 0.19506023824214935, |
|
"eval_precision": 0.9249732755411849, |
|
"eval_recall": 0.9194829650061407, |
|
"eval_runtime": 34.6825, |
|
"eval_samples_per_second": 282.015, |
|
"eval_steps_per_second": 8.823, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 1.2195121951219512, |
|
"grad_norm": 2.54653263092041, |
|
"learning_rate": 2.2682926829268295e-05, |
|
"loss": 0.1571, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.6260162601626016, |
|
"grad_norm": 2.3185017108917236, |
|
"learning_rate": 2.024390243902439e-05, |
|
"loss": 0.1303, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.92516102647991, |
|
"eval_f1": 0.921045875731096, |
|
"eval_f1_medical": 0.9390710837356417, |
|
"eval_f1_non_medical": 0.9030206677265501, |
|
"eval_loss": 0.2094709575176239, |
|
"eval_precision": 0.9184306018002388, |
|
"eval_recall": 0.9240747625361165, |
|
"eval_runtime": 34.5371, |
|
"eval_samples_per_second": 283.202, |
|
"eval_steps_per_second": 8.86, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 2.032520325203252, |
|
"grad_norm": 4.058623313903809, |
|
"learning_rate": 1.7804878048780488e-05, |
|
"loss": 0.1238, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 2.4390243902439024, |
|
"grad_norm": 5.887447834014893, |
|
"learning_rate": 1.5365853658536586e-05, |
|
"loss": 0.0884, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.845528455284553, |
|
"grad_norm": 3.797337055206299, |
|
"learning_rate": 1.2926829268292684e-05, |
|
"loss": 0.092, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.9272058071771803, |
|
"eval_f1": 0.9219373155502308, |
|
"eval_f1_medical": 0.9422171725369258, |
|
"eval_f1_non_medical": 0.901657458563536, |
|
"eval_loss": 0.27733102440834045, |
|
"eval_precision": 0.9264226794905697, |
|
"eval_recall": 0.9181447502548421, |
|
"eval_runtime": 34.4489, |
|
"eval_samples_per_second": 283.927, |
|
"eval_steps_per_second": 8.883, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 3.252032520325203, |
|
"grad_norm": 11.003833770751953, |
|
"learning_rate": 1.048780487804878e-05, |
|
"loss": 0.0625, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 3.658536585365854, |
|
"grad_norm": 11.000012397766113, |
|
"learning_rate": 8.048780487804879e-06, |
|
"loss": 0.059, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.9267968510377262, |
|
"eval_f1": 0.9224128655389552, |
|
"eval_f1_medical": 0.9408557739963654, |
|
"eval_f1_non_medical": 0.903969957081545, |
|
"eval_loss": 0.30652934312820435, |
|
"eval_precision": 0.9214772563975018, |
|
"eval_recall": 0.9233923856098107, |
|
"eval_runtime": 34.3959, |
|
"eval_samples_per_second": 284.366, |
|
"eval_steps_per_second": 8.896, |
|
"step": 4920 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 6150, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.1766206373267072e+16, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|