NicholasCorrado's picture
Model save
f8e3a1b verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9947089947089947,
"eval_steps": 1000,
"global_step": 94,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.010582010582010581,
"grad_norm": 3.9034635996758364,
"learning_rate": 5e-08,
"logits/chosen": -2.8740313053131104,
"logits/rejected": -2.909637928009033,
"logps/chosen": -495.3936462402344,
"logps/rejected": -468.7409973144531,
"loss": 0.6931,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1
},
{
"epoch": 0.10582010582010581,
"grad_norm": 3.740412497210408,
"learning_rate": 5e-07,
"logits/chosen": -2.8803439140319824,
"logits/rejected": -2.933382987976074,
"logps/chosen": -489.9436340332031,
"logps/rejected": -471.76068115234375,
"loss": 0.6926,
"rewards/accuracies": 0.4765625,
"rewards/chosen": 0.0005175346159376204,
"rewards/margins": 0.001013587461784482,
"rewards/rejected": -0.0004960527876392007,
"step": 10
},
{
"epoch": 0.21164021164021163,
"grad_norm": 4.0026509714343526,
"learning_rate": 4.82718437161051e-07,
"logits/chosen": -2.9089906215667725,
"logits/rejected": -2.9724087715148926,
"logps/chosen": -490.58831787109375,
"logps/rejected": -484.2608947753906,
"loss": 0.677,
"rewards/accuracies": 0.8101562261581421,
"rewards/chosen": 0.017761804163455963,
"rewards/margins": 0.033221058547496796,
"rewards/rejected": -0.015459256246685982,
"step": 20
},
{
"epoch": 0.31746031746031744,
"grad_norm": 5.045224654085605,
"learning_rate": 4.332629679574565e-07,
"logits/chosen": -2.9424614906311035,
"logits/rejected": -2.994748830795288,
"logps/chosen": -486.4335021972656,
"logps/rejected": -484.3814392089844,
"loss": 0.623,
"rewards/accuracies": 0.8492187261581421,
"rewards/chosen": 0.045325733721256256,
"rewards/margins": 0.14735476672649384,
"rewards/rejected": -0.10202904045581818,
"step": 30
},
{
"epoch": 0.42328042328042326,
"grad_norm": 3.94438220021588,
"learning_rate": 3.584709347793895e-07,
"logits/chosen": -2.92952036857605,
"logits/rejected": -3.001981019973755,
"logps/chosen": -487.70849609375,
"logps/rejected": -510.1280212402344,
"loss": 0.5749,
"rewards/accuracies": 0.85546875,
"rewards/chosen": 0.017966564744710922,
"rewards/margins": 0.2901422381401062,
"rewards/rejected": -0.2721756398677826,
"step": 40
},
{
"epoch": 0.5291005291005291,
"grad_norm": 3.0093221435481667,
"learning_rate": 2.6868252339660607e-07,
"logits/chosen": -2.92526912689209,
"logits/rejected": -2.995861768722534,
"logps/chosen": -514.0173950195312,
"logps/rejected": -578.7903442382812,
"loss": 0.4668,
"rewards/accuracies": 0.8609374761581421,
"rewards/chosen": -0.239375501871109,
"rewards/margins": 0.7664871215820312,
"rewards/rejected": -1.0058627128601074,
"step": 50
},
{
"epoch": 0.6349206349206349,
"grad_norm": 3.4147008885194587,
"learning_rate": 1.763112063972739e-07,
"logits/chosen": -2.9192681312561035,
"logits/rejected": -2.9830739498138428,
"logps/chosen": -526.348876953125,
"logps/rejected": -614.0721435546875,
"loss": 0.421,
"rewards/accuracies": 0.858593761920929,
"rewards/chosen": -0.4103693962097168,
"rewards/margins": 0.9783406257629395,
"rewards/rejected": -1.3887102603912354,
"step": 60
},
{
"epoch": 0.7407407407407407,
"grad_norm": 3.0922082498626846,
"learning_rate": 9.412754953531663e-08,
"logits/chosen": -2.918205976486206,
"logits/rejected": -2.970673084259033,
"logps/chosen": -554.398681640625,
"logps/rejected": -667.9301147460938,
"loss": 0.4035,
"rewards/accuracies": 0.8492187261581421,
"rewards/chosen": -0.6199524998664856,
"rewards/margins": 1.1833505630493164,
"rewards/rejected": -1.8033031225204468,
"step": 70
},
{
"epoch": 0.8465608465608465,
"grad_norm": 3.5694688653613595,
"learning_rate": 3.349364905389032e-08,
"logits/chosen": -2.8878700733184814,
"logits/rejected": -2.932638645172119,
"logps/chosen": -557.0501708984375,
"logps/rejected": -678.7019653320312,
"loss": 0.3889,
"rewards/accuracies": 0.8578125238418579,
"rewards/chosen": -0.7133009433746338,
"rewards/margins": 1.2904046773910522,
"rewards/rejected": -2.0037055015563965,
"step": 80
},
{
"epoch": 0.9523809523809523,
"grad_norm": 3.149374311366939,
"learning_rate": 2.7922934437178692e-09,
"logits/chosen": -2.89034366607666,
"logits/rejected": -2.936624765396118,
"logps/chosen": -562.55322265625,
"logps/rejected": -680.3013305664062,
"loss": 0.3864,
"rewards/accuracies": 0.836718738079071,
"rewards/chosen": -0.7662609815597534,
"rewards/margins": 1.2681959867477417,
"rewards/rejected": -2.034456968307495,
"step": 90
},
{
"epoch": 0.9947089947089947,
"step": 94,
"total_flos": 0.0,
"train_loss": 0.5087684798747936,
"train_runtime": 2521.2346,
"train_samples_per_second": 38.335,
"train_steps_per_second": 0.037
}
],
"logging_steps": 10,
"max_steps": 94,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}