|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9874476987447699, |
|
"eval_steps": 500, |
|
"global_step": 59, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.016736401673640166, |
|
"grad_norm": 73.53016894281072, |
|
"learning_rate": 1.6666666666666664e-08, |
|
"logits/chosen": -2.377516984939575, |
|
"logits/rejected": -2.342954158782959, |
|
"logps/chosen": -267.31927490234375, |
|
"logps/pi_response": -149.59169006347656, |
|
"logps/ref_response": -149.59169006347656, |
|
"logps/rejected": -539.9591674804688, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.16736401673640167, |
|
"grad_norm": 80.4074524611749, |
|
"learning_rate": 9.860114570402053e-08, |
|
"logits/chosen": -2.3693184852600098, |
|
"logits/rejected": -2.286440372467041, |
|
"logps/chosen": -298.61932373046875, |
|
"logps/pi_response": -132.10630798339844, |
|
"logps/ref_response": -131.78355407714844, |
|
"logps/rejected": -554.6019897460938, |
|
"loss": 0.6859, |
|
"rewards/accuracies": 0.5555555820465088, |
|
"rewards/chosen": -0.00936645083129406, |
|
"rewards/margins": 0.014261982403695583, |
|
"rewards/rejected": -0.023628434166312218, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.33472803347280333, |
|
"grad_norm": 46.77444881870846, |
|
"learning_rate": 8.374915007591053e-08, |
|
"logits/chosen": -2.3324151039123535, |
|
"logits/rejected": -2.2493882179260254, |
|
"logps/chosen": -298.06707763671875, |
|
"logps/pi_response": -146.1650390625, |
|
"logps/ref_response": -140.46481323242188, |
|
"logps/rejected": -580.0590209960938, |
|
"loss": 0.6135, |
|
"rewards/accuracies": 0.721875011920929, |
|
"rewards/chosen": -0.18148620426654816, |
|
"rewards/margins": 0.20174245536327362, |
|
"rewards/rejected": -0.3832286298274994, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.502092050209205, |
|
"grad_norm": 42.64368867415699, |
|
"learning_rate": 5.738232820012406e-08, |
|
"logits/chosen": -2.2694995403289795, |
|
"logits/rejected": -2.207530975341797, |
|
"logps/chosen": -327.8448181152344, |
|
"logps/pi_response": -139.88900756835938, |
|
"logps/ref_response": -131.197509765625, |
|
"logps/rejected": -638.9929809570312, |
|
"loss": 0.545, |
|
"rewards/accuracies": 0.7718750238418579, |
|
"rewards/chosen": -0.371069997549057, |
|
"rewards/margins": 0.4864231050014496, |
|
"rewards/rejected": -0.8574931025505066, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.6694560669456067, |
|
"grad_norm": 40.23447563253812, |
|
"learning_rate": 2.8496739886173992e-08, |
|
"logits/chosen": -2.242140054702759, |
|
"logits/rejected": -2.160322666168213, |
|
"logps/chosen": -358.05889892578125, |
|
"logps/pi_response": -157.74525451660156, |
|
"logps/ref_response": -145.66329956054688, |
|
"logps/rejected": -707.8968505859375, |
|
"loss": 0.5184, |
|
"rewards/accuracies": 0.734375, |
|
"rewards/chosen": -0.5855440497398376, |
|
"rewards/margins": 0.7927876114845276, |
|
"rewards/rejected": -1.3783317804336548, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.8368200836820083, |
|
"grad_norm": 82.30441781177622, |
|
"learning_rate": 6.947819411632222e-09, |
|
"logits/chosen": -2.2379250526428223, |
|
"logits/rejected": -2.158501386642456, |
|
"logps/chosen": -376.53204345703125, |
|
"logps/pi_response": -153.90878295898438, |
|
"logps/ref_response": -141.55398559570312, |
|
"logps/rejected": -747.1853637695312, |
|
"loss": 0.4999, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.6662783622741699, |
|
"rewards/margins": 0.9346103668212891, |
|
"rewards/rejected": -1.6008888483047485, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.9874476987447699, |
|
"step": 59, |
|
"total_flos": 0.0, |
|
"train_loss": 0.5589375010991501, |
|
"train_runtime": 2582.4535, |
|
"train_samples_per_second": 5.918, |
|
"train_steps_per_second": 0.023 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 59, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|