|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9947089947089947, |
|
"eval_steps": 1000, |
|
"global_step": 94, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.010582010582010581, |
|
"grad_norm": 3.9034635996758364, |
|
"learning_rate": 5e-08, |
|
"logits/chosen": -2.8740313053131104, |
|
"logits/rejected": -2.909637928009033, |
|
"logps/chosen": -495.3936462402344, |
|
"logps/rejected": -468.7409973144531, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.10582010582010581, |
|
"grad_norm": 3.740412497210408, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": -2.8803439140319824, |
|
"logits/rejected": -2.933382987976074, |
|
"logps/chosen": -489.9436340332031, |
|
"logps/rejected": -471.76068115234375, |
|
"loss": 0.6926, |
|
"rewards/accuracies": 0.4765625, |
|
"rewards/chosen": 0.0005175346159376204, |
|
"rewards/margins": 0.001013587461784482, |
|
"rewards/rejected": -0.0004960527876392007, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.21164021164021163, |
|
"grad_norm": 4.0026509714343526, |
|
"learning_rate": 4.82718437161051e-07, |
|
"logits/chosen": -2.9089906215667725, |
|
"logits/rejected": -2.9724087715148926, |
|
"logps/chosen": -490.58831787109375, |
|
"logps/rejected": -484.2608947753906, |
|
"loss": 0.677, |
|
"rewards/accuracies": 0.8101562261581421, |
|
"rewards/chosen": 0.017761804163455963, |
|
"rewards/margins": 0.033221058547496796, |
|
"rewards/rejected": -0.015459256246685982, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.31746031746031744, |
|
"grad_norm": 5.045224654085605, |
|
"learning_rate": 4.332629679574565e-07, |
|
"logits/chosen": -2.9424614906311035, |
|
"logits/rejected": -2.994748830795288, |
|
"logps/chosen": -486.4335021972656, |
|
"logps/rejected": -484.3814392089844, |
|
"loss": 0.623, |
|
"rewards/accuracies": 0.8492187261581421, |
|
"rewards/chosen": 0.045325733721256256, |
|
"rewards/margins": 0.14735476672649384, |
|
"rewards/rejected": -0.10202904045581818, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.42328042328042326, |
|
"grad_norm": 3.94438220021588, |
|
"learning_rate": 3.584709347793895e-07, |
|
"logits/chosen": -2.92952036857605, |
|
"logits/rejected": -3.001981019973755, |
|
"logps/chosen": -487.70849609375, |
|
"logps/rejected": -510.1280212402344, |
|
"loss": 0.5749, |
|
"rewards/accuracies": 0.85546875, |
|
"rewards/chosen": 0.017966564744710922, |
|
"rewards/margins": 0.2901422381401062, |
|
"rewards/rejected": -0.2721756398677826, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.5291005291005291, |
|
"grad_norm": 3.0093221435481667, |
|
"learning_rate": 2.6868252339660607e-07, |
|
"logits/chosen": -2.92526912689209, |
|
"logits/rejected": -2.995861768722534, |
|
"logps/chosen": -514.0173950195312, |
|
"logps/rejected": -578.7903442382812, |
|
"loss": 0.4668, |
|
"rewards/accuracies": 0.8609374761581421, |
|
"rewards/chosen": -0.239375501871109, |
|
"rewards/margins": 0.7664871215820312, |
|
"rewards/rejected": -1.0058627128601074, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.6349206349206349, |
|
"grad_norm": 3.4147008885194587, |
|
"learning_rate": 1.763112063972739e-07, |
|
"logits/chosen": -2.9192681312561035, |
|
"logits/rejected": -2.9830739498138428, |
|
"logps/chosen": -526.348876953125, |
|
"logps/rejected": -614.0721435546875, |
|
"loss": 0.421, |
|
"rewards/accuracies": 0.858593761920929, |
|
"rewards/chosen": -0.4103693962097168, |
|
"rewards/margins": 0.9783406257629395, |
|
"rewards/rejected": -1.3887102603912354, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.7407407407407407, |
|
"grad_norm": 3.0922082498626846, |
|
"learning_rate": 9.412754953531663e-08, |
|
"logits/chosen": -2.918205976486206, |
|
"logits/rejected": -2.970673084259033, |
|
"logps/chosen": -554.398681640625, |
|
"logps/rejected": -667.9301147460938, |
|
"loss": 0.4035, |
|
"rewards/accuracies": 0.8492187261581421, |
|
"rewards/chosen": -0.6199524998664856, |
|
"rewards/margins": 1.1833505630493164, |
|
"rewards/rejected": -1.8033031225204468, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.8465608465608465, |
|
"grad_norm": 3.5694688653613595, |
|
"learning_rate": 3.349364905389032e-08, |
|
"logits/chosen": -2.8878700733184814, |
|
"logits/rejected": -2.932638645172119, |
|
"logps/chosen": -557.0501708984375, |
|
"logps/rejected": -678.7019653320312, |
|
"loss": 0.3889, |
|
"rewards/accuracies": 0.8578125238418579, |
|
"rewards/chosen": -0.7133009433746338, |
|
"rewards/margins": 1.2904046773910522, |
|
"rewards/rejected": -2.0037055015563965, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.9523809523809523, |
|
"grad_norm": 3.149374311366939, |
|
"learning_rate": 2.7922934437178692e-09, |
|
"logits/chosen": -2.89034366607666, |
|
"logits/rejected": -2.936624765396118, |
|
"logps/chosen": -562.55322265625, |
|
"logps/rejected": -680.3013305664062, |
|
"loss": 0.3864, |
|
"rewards/accuracies": 0.836718738079071, |
|
"rewards/chosen": -0.7662609815597534, |
|
"rewards/margins": 1.2681959867477417, |
|
"rewards/rejected": -2.034456968307495, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.9947089947089947, |
|
"step": 94, |
|
"total_flos": 0.0, |
|
"train_loss": 0.5087684798747936, |
|
"train_runtime": 2521.2346, |
|
"train_samples_per_second": 38.335, |
|
"train_steps_per_second": 0.037 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 94, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|