{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9966024915062288, "eval_steps": 1000, "global_step": 110, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.009060022650056626, "grad_norm": 2.142748189776569, "learning_rate": 4.545454545454545e-08, "logits/chosen": -2.2157700061798096, "logits/rejected": -2.1868345737457275, "logps/chosen": -314.38787841796875, "logps/rejected": -291.1216735839844, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.09060022650056625, "grad_norm": 2.150777513425362, "learning_rate": 4.545454545454545e-07, "logits/chosen": -2.2591588497161865, "logits/rejected": -2.233074188232422, "logps/chosen": -323.4332275390625, "logps/rejected": -301.56719970703125, "loss": 0.6929, "rewards/accuracies": 0.4713541567325592, "rewards/chosen": 0.0009152439888566732, "rewards/margins": 0.000497353496029973, "rewards/rejected": 0.00041789052193053067, "step": 10 }, { "epoch": 0.1812004530011325, "grad_norm": 2.066434141751538, "learning_rate": 4.898732434036243e-07, "logits/chosen": -2.2525153160095215, "logits/rejected": -2.2378909587860107, "logps/chosen": -325.8227233886719, "logps/rejected": -305.6023254394531, "loss": 0.6861, "rewards/accuracies": 0.7789062261581421, "rewards/chosen": 0.0187881700694561, "rewards/margins": 0.014950500801205635, "rewards/rejected": 0.0038376704324036837, "step": 20 }, { "epoch": 0.2718006795016987, "grad_norm": 1.935984789640625, "learning_rate": 4.5591914535745817e-07, "logits/chosen": -2.1835825443267822, "logits/rejected": -2.173578977584839, "logps/chosen": -313.19586181640625, "logps/rejected": -300.5938720703125, "loss": 0.6661, "rewards/accuracies": 0.8046875, "rewards/chosen": 0.04926218464970589, "rewards/margins": 0.05489668250083923, "rewards/rejected": -0.005634505767375231, "step": 30 }, { "epoch": 0.362400906002265, "grad_norm": 2.2867184944223293, "learning_rate": 4.0140242178441665e-07, "logits/chosen": -2.1189799308776855, "logits/rejected": -2.114716053009033, "logps/chosen": -317.4134826660156, "logps/rejected": -306.8409423828125, "loss": 0.6458, "rewards/accuracies": 0.8046875, "rewards/chosen": 0.03598688170313835, "rewards/margins": 0.09652377665042877, "rewards/rejected": -0.06053689122200012, "step": 40 }, { "epoch": 0.45300113250283125, "grad_norm": 2.40453710231171, "learning_rate": 3.317669908293554e-07, "logits/chosen": -1.9152988195419312, "logits/rejected": -1.9253225326538086, "logps/chosen": -321.01800537109375, "logps/rejected": -334.33349609375, "loss": 0.5959, "rewards/accuracies": 0.8109375238418579, "rewards/chosen": -0.13218708336353302, "rewards/margins": 0.22598442435264587, "rewards/rejected": -0.3581715524196625, "step": 50 }, { "epoch": 0.5436013590033975, "grad_norm": 2.548257476263302, "learning_rate": 2.53966490958702e-07, "logits/chosen": -1.8517974615097046, "logits/rejected": -1.851008653640747, "logps/chosen": -349.90252685546875, "logps/rejected": -362.4208984375, "loss": 0.5612, "rewards/accuracies": 0.815625011920929, "rewards/chosen": -0.2689761817455292, "rewards/margins": 0.33339887857437134, "rewards/rejected": -0.6023750305175781, "step": 60 }, { "epoch": 0.6342015855039638, "grad_norm": 2.564454636474787, "learning_rate": 1.7576990616793137e-07, "logits/chosen": -1.8310279846191406, "logits/rejected": -1.8546864986419678, "logps/chosen": -362.08538818359375, "logps/rejected": -394.1742858886719, "loss": 0.5353, "rewards/accuracies": 0.803906261920929, "rewards/chosen": -0.4547205865383148, "rewards/margins": 0.4052005410194397, "rewards/rejected": -0.8599211573600769, "step": 70 }, { "epoch": 0.72480181200453, "grad_norm": 2.5983243283209183, "learning_rate": 1.0498577260720048e-07, "logits/chosen": -1.8076627254486084, "logits/rejected": -1.8311001062393188, "logps/chosen": -375.9088134765625, "logps/rejected": -410.9624938964844, "loss": 0.5223, "rewards/accuracies": 0.782031238079071, "rewards/chosen": -0.5610671639442444, "rewards/margins": 0.4681544303894043, "rewards/rejected": -1.029221534729004, "step": 80 }, { "epoch": 0.8154020385050963, "grad_norm": 2.742604490623868, "learning_rate": 4.868243561723534e-08, "logits/chosen": -1.809565544128418, "logits/rejected": -1.8266630172729492, "logps/chosen": -381.6236267089844, "logps/rejected": -415.415771484375, "loss": 0.513, "rewards/accuracies": 0.792187511920929, "rewards/chosen": -0.6585050225257874, "rewards/margins": 0.4940672814846039, "rewards/rejected": -1.1525723934173584, "step": 90 }, { "epoch": 0.9060022650056625, "grad_norm": 2.4312610291944887, "learning_rate": 1.2482220564763667e-08, "logits/chosen": -1.8194091320037842, "logits/rejected": -1.811342477798462, "logps/chosen": -386.8002624511719, "logps/rejected": -417.9185485839844, "loss": 0.508, "rewards/accuracies": 0.817187488079071, "rewards/chosen": -0.6888748407363892, "rewards/margins": 0.544217050075531, "rewards/rejected": -1.2330917119979858, "step": 100 }, { "epoch": 0.9966024915062288, "grad_norm": 2.609201337420324, "learning_rate": 0.0, "logits/chosen": -1.8002452850341797, "logits/rejected": -1.796565294265747, "logps/chosen": -391.4776611328125, "logps/rejected": -427.072265625, "loss": 0.506, "rewards/accuracies": 0.8046875, "rewards/chosen": -0.7028344869613647, "rewards/margins": 0.5477779507637024, "rewards/rejected": -1.250612497329712, "step": 110 }, { "epoch": 0.9966024915062288, "step": 110, "total_flos": 0.0, "train_loss": 0.5847668994556774, "train_runtime": 2901.4735, "train_samples_per_second": 38.945, "train_steps_per_second": 0.038 } ], "logging_steps": 10, "max_steps": 110, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 16, "trial_name": null, "trial_params": null }