{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9992429977289932, "eval_steps": 100, "global_step": 165, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 2.9411764705882356e-07, "logits/chosen": -2.737081289291382, "logits/rejected": -2.680964708328247, "logps/chosen": -126.38134765625, "logps/rejected": -136.25076293945312, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.06, "learning_rate": 2.9411764705882355e-06, "logits/chosen": -2.731968879699707, "logits/rejected": -2.708989143371582, "logps/chosen": -118.21139526367188, "logps/rejected": -111.94728088378906, "loss": 0.6932, "rewards/accuracies": 0.4583333432674408, "rewards/chosen": -1.655664891586639e-05, "rewards/margins": -7.096579793142155e-05, "rewards/rejected": 5.4409170843428e-05, "step": 10 }, { "epoch": 0.12, "learning_rate": 4.994932636402032e-06, "logits/chosen": -2.744499683380127, "logits/rejected": -2.7339892387390137, "logps/chosen": -113.14430236816406, "logps/rejected": -123.68851470947266, "loss": 0.6929, "rewards/accuracies": 0.5218750238418579, "rewards/chosen": 0.007225497625768185, "rewards/margins": 0.0004294503596611321, "rewards/rejected": 0.0067960480228066444, "step": 20 }, { "epoch": 0.18, "learning_rate": 4.905416503522124e-06, "logits/chosen": -2.772244930267334, "logits/rejected": -2.7886576652526855, "logps/chosen": -115.32562255859375, "logps/rejected": -122.47587585449219, "loss": 0.6925, "rewards/accuracies": 0.518750011920929, "rewards/chosen": 0.024915488436818123, "rewards/margins": 0.00023288575175683945, "rewards/rejected": 0.024682600051164627, "step": 30 }, { "epoch": 0.24, "learning_rate": 4.707922373336524e-06, "logits/chosen": -2.740614175796509, "logits/rejected": -2.727870464324951, "logps/chosen": -111.22251892089844, "logps/rejected": -113.79164123535156, "loss": 0.691, "rewards/accuracies": 0.596875011920929, "rewards/chosen": 0.052265096455812454, "rewards/margins": 0.003552838694304228, "rewards/rejected": 0.04871225729584694, "step": 40 }, { "epoch": 0.3, "learning_rate": 4.411315662967732e-06, "logits/chosen": -2.775801181793213, "logits/rejected": -2.7031962871551514, "logps/chosen": -109.81254577636719, "logps/rejected": -113.3335952758789, "loss": 0.6897, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.0849301889538765, "rewards/margins": 0.006556935608386993, "rewards/rejected": 0.0783732533454895, "step": 50 }, { "epoch": 0.36, "learning_rate": 4.028910905897229e-06, "logits/chosen": -2.756106376647949, "logits/rejected": -2.7039153575897217, "logps/chosen": -110.66825103759766, "logps/rejected": -110.25054931640625, "loss": 0.689, "rewards/accuracies": 0.596875011920929, "rewards/chosen": 0.09365645796060562, "rewards/margins": 0.008471069857478142, "rewards/rejected": 0.08518538624048233, "step": 60 }, { "epoch": 0.42, "learning_rate": 3.577874068920446e-06, "logits/chosen": -2.7151947021484375, "logits/rejected": -2.706727981567383, "logps/chosen": -109.93450927734375, "logps/rejected": -113.0154800415039, "loss": 0.6885, "rewards/accuracies": 0.546875, "rewards/chosen": 0.07744868099689484, "rewards/margins": 0.004513105843216181, "rewards/rejected": 0.0729355737566948, "step": 70 }, { "epoch": 0.48, "learning_rate": 3.0784519801008546e-06, "logits/chosen": -2.692235231399536, "logits/rejected": -2.6538617610931396, "logps/chosen": -111.87430572509766, "logps/rejected": -115.57984924316406, "loss": 0.6867, "rewards/accuracies": 0.596875011920929, "rewards/chosen": 0.07563529908657074, "rewards/margins": 0.015832457691431046, "rewards/rejected": 0.059802841395139694, "step": 80 }, { "epoch": 0.55, "learning_rate": 2.553063458334059e-06, "logits/chosen": -2.738804340362549, "logits/rejected": -2.690701723098755, "logps/chosen": -120.409423828125, "logps/rejected": -118.25407409667969, "loss": 0.6841, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.05362895876169205, "rewards/margins": 0.01961613819003105, "rewards/rejected": 0.034012824296951294, "step": 90 }, { "epoch": 0.61, "learning_rate": 2.025292943281429e-06, "logits/chosen": -2.718660593032837, "logits/rejected": -2.7189323902130127, "logps/chosen": -108.1891098022461, "logps/rejected": -112.934814453125, "loss": 0.6837, "rewards/accuracies": 0.590624988079071, "rewards/chosen": 0.06428461521863937, "rewards/margins": 0.0179769154638052, "rewards/rejected": 0.04630769044160843, "step": 100 }, { "epoch": 0.61, "eval_logits/chosen": -2.7115368843078613, "eval_logits/rejected": -2.6278061866760254, "eval_logps/chosen": -286.1498718261719, "eval_logps/rejected": -263.6365966796875, "eval_loss": 0.6740216016769409, "eval_rewards/accuracies": 0.6359999775886536, "eval_rewards/chosen": 0.008544988930225372, "eval_rewards/margins": 0.03750109300017357, "eval_rewards/rejected": -0.028956104069948196, "eval_runtime": 383.9881, "eval_samples_per_second": 5.208, "eval_steps_per_second": 0.651, "step": 100 }, { "epoch": 0.67, "learning_rate": 1.5188318011445907e-06, "logits/chosen": -2.7399725914001465, "logits/rejected": -2.710850954055786, "logps/chosen": -123.49687194824219, "logps/rejected": -121.06239318847656, "loss": 0.6835, "rewards/accuracies": 0.637499988079071, "rewards/chosen": 0.031930722296237946, "rewards/margins": 0.025511348620057106, "rewards/rejected": 0.00641937181353569, "step": 110 }, { "epoch": 0.73, "learning_rate": 1.0564148305586296e-06, "logits/chosen": -2.733646869659424, "logits/rejected": -2.7130093574523926, "logps/chosen": -116.14371490478516, "logps/rejected": -121.31622314453125, "loss": 0.6835, "rewards/accuracies": 0.559374988079071, "rewards/chosen": 0.04128889739513397, "rewards/margins": 0.021482665091753006, "rewards/rejected": 0.019806232303380966, "step": 120 }, { "epoch": 0.79, "learning_rate": 6.587997083462197e-07, "logits/chosen": -2.706106424331665, "logits/rejected": -2.683042049407959, "logps/chosen": -121.27201080322266, "logps/rejected": -124.78038024902344, "loss": 0.6784, "rewards/accuracies": 0.621874988079071, "rewards/chosen": 0.032023753970861435, "rewards/margins": 0.03133785352110863, "rewards/rejected": 0.0006858977722004056, "step": 130 }, { "epoch": 0.85, "learning_rate": 3.438351873250492e-07, "logits/chosen": -2.6774215698242188, "logits/rejected": -2.66115140914917, "logps/chosen": -111.2270736694336, "logps/rejected": -116.47705078125, "loss": 0.6774, "rewards/accuracies": 0.6187499761581421, "rewards/chosen": 0.032939545810222626, "rewards/margins": 0.04166686534881592, "rewards/rejected": -0.008727315813302994, "step": 140 }, { "epoch": 0.91, "learning_rate": 1.2565987432367032e-07, "logits/chosen": -2.7032761573791504, "logits/rejected": -2.6723108291625977, "logps/chosen": -109.5132064819336, "logps/rejected": -114.572509765625, "loss": 0.6802, "rewards/accuracies": 0.581250011920929, "rewards/chosen": 0.037231095135211945, "rewards/margins": 0.029751187190413475, "rewards/rejected": 0.007479907013475895, "step": 150 }, { "epoch": 0.97, "learning_rate": 1.4067554877743861e-08, "logits/chosen": -2.6688144207000732, "logits/rejected": -2.6238021850585938, "logps/chosen": -110.0870361328125, "logps/rejected": -108.6930160522461, "loss": 0.68, "rewards/accuracies": 0.5843750238418579, "rewards/chosen": 0.050410233438014984, "rewards/margins": 0.03611644357442856, "rewards/rejected": 0.014293788000941277, "step": 160 }, { "epoch": 1.0, "step": 165, "total_flos": 0.0, "train_loss": 0.685761218359976, "train_runtime": 6660.773, "train_samples_per_second": 3.173, "train_steps_per_second": 0.025 } ], "logging_steps": 10, "max_steps": 165, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "total_flos": 0.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }