{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9922480620155039, "eval_steps": 1000, "global_step": 64, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.015503875968992248, "grad_norm": 1.5117514217434924, "learning_rate": 7.142857142857142e-08, "logits/chosen": -2.131749153137207, "logits/rejected": -2.0810136795043945, "logps/chosen": -263.0189208984375, "logps/rejected": -181.19131469726562, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.15503875968992248, "grad_norm": 1.4140598087009932, "learning_rate": 4.965903258506806e-07, "logits/chosen": -2.120738983154297, "logits/rejected": -2.07973575592041, "logps/chosen": -234.01666259765625, "logps/rejected": -198.01730346679688, "loss": 0.6931, "rewards/accuracies": 0.4470486044883728, "rewards/chosen": -0.0003320822142995894, "rewards/margins": 3.237514101783745e-05, "rewards/rejected": -0.0003644573735073209, "step": 10 }, { "epoch": 0.31007751937984496, "grad_norm": 1.5687714199328893, "learning_rate": 4.38526652444224e-07, "logits/chosen": -2.128849506378174, "logits/rejected": -2.094902276992798, "logps/chosen": -239.3656005859375, "logps/rejected": -199.91905212402344, "loss": 0.6918, "rewards/accuracies": 0.5609375238418579, "rewards/chosen": -0.007283635437488556, "rewards/margins": 0.0028790258802473545, "rewards/rejected": -0.010162660852074623, "step": 20 }, { "epoch": 0.46511627906976744, "grad_norm": 1.5123545615833116, "learning_rate": 3.2462870275042367e-07, "logits/chosen": -2.1159348487854004, "logits/rejected": -2.0800769329071045, "logps/chosen": -244.09561157226562, "logps/rejected": -197.23634338378906, "loss": 0.6887, "rewards/accuracies": 0.62890625, "rewards/chosen": -0.025394853204488754, "rewards/margins": 0.01062600314617157, "rewards/rejected": -0.036020856350660324, "step": 30 }, { "epoch": 0.6201550387596899, "grad_norm": 1.4326711784058517, "learning_rate": 1.886286282148002e-07, "logits/chosen": -2.1413533687591553, "logits/rejected": -2.112823009490967, "logps/chosen": -237.096435546875, "logps/rejected": -202.06204223632812, "loss": 0.686, "rewards/accuracies": 0.6156250238418579, "rewards/chosen": -0.05361751466989517, "rewards/margins": 0.012631883844733238, "rewards/rejected": -0.06624939292669296, "step": 40 }, { "epoch": 0.7751937984496124, "grad_norm": 1.4875305488479276, "learning_rate": 7.080437170788722e-08, "logits/chosen": -2.150388240814209, "logits/rejected": -2.116311550140381, "logps/chosen": -247.1051483154297, "logps/rejected": -210.83724975585938, "loss": 0.6843, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.06689126789569855, "rewards/margins": 0.01844162493944168, "rewards/rejected": -0.08533290028572083, "step": 50 }, { "epoch": 0.9302325581395349, "grad_norm": 1.4783879194322498, "learning_rate": 6.0509043431410945e-09, "logits/chosen": -2.174121379852295, "logits/rejected": -2.1149704456329346, "logps/chosen": -245.97012329101562, "logps/rejected": -205.9432373046875, "loss": 0.6838, "rewards/accuracies": 0.625781238079071, "rewards/chosen": -0.07243042439222336, "rewards/margins": 0.021614735946059227, "rewards/rejected": -0.09404516220092773, "step": 60 }, { "epoch": 0.9922480620155039, "step": 64, "total_flos": 0.0, "train_loss": 0.6876659728586674, "train_runtime": 1604.6168, "train_samples_per_second": 20.566, "train_steps_per_second": 0.04 } ], "logging_steps": 10, "max_steps": 64, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 16, "trial_name": null, "trial_params": null }