{ "best_metric": 0.04710844159126282, "best_model_checkpoint": "./mistral/22-04-24-Weni-WeniGPT-Agents-Mistral-1.0.0-SFT-1.0.24-DPO_Experiment on DPO with other hyperparameters and best SFT model of WeniGPT-2_max_steps-180_batch_8_2024-04-22_ppid_9/checkpoint-90", "epoch": 2.903225806451613, "eval_steps": 30, "global_step": 90, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.3225806451612903, "grad_norm": 18.102449417114258, "learning_rate": 4.971264367816092e-06, "logits/chosen": -1.8189502954483032, "logits/rejected": -1.8384132385253906, "logps/chosen": -183.32998657226562, "logps/rejected": -217.82275390625, "loss": 0.6772, "rewards/accuracies": 0.3499999940395355, "rewards/chosen": 0.028210829943418503, "rewards/margins": 0.022311249747872353, "rewards/rejected": 0.0058995820581912994, "step": 10 }, { "epoch": 0.6451612903225806, "grad_norm": 20.25644302368164, "learning_rate": 4.71264367816092e-06, "logits/chosen": -1.8640209436416626, "logits/rejected": -1.8766177892684937, "logps/chosen": -152.9895477294922, "logps/rejected": -246.1815948486328, "loss": 0.4471, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.5268799662590027, "rewards/margins": 0.6505464315414429, "rewards/rejected": -0.12366630882024765, "step": 20 }, { "epoch": 0.967741935483871, "grad_norm": 23.647602081298828, "learning_rate": 4.42528735632184e-06, "logits/chosen": -1.8179765939712524, "logits/rejected": -1.8386437892913818, "logps/chosen": -154.68057250976562, "logps/rejected": -222.60986328125, "loss": 0.2964, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 1.1595659255981445, "rewards/margins": 1.5337896347045898, "rewards/rejected": -0.3742235600948334, "step": 30 }, { "epoch": 0.967741935483871, "eval_logits/chosen": -1.922228217124939, "eval_logits/rejected": -1.9377918243408203, "eval_logps/chosen": -190.84751892089844, "eval_logps/rejected": -194.19583129882812, "eval_loss": 0.25011441111564636, "eval_rewards/accuracies": 0.8571428656578064, "eval_rewards/chosen": 0.8715044856071472, "eval_rewards/margins": 1.0091807842254639, "eval_rewards/rejected": -0.13767634332180023, "eval_runtime": 15.9917, "eval_samples_per_second": 1.751, "eval_steps_per_second": 0.438, "step": 30 }, { "epoch": 1.2903225806451613, "grad_norm": 14.360973358154297, "learning_rate": 4.137931034482759e-06, "logits/chosen": -1.7312577962875366, "logits/rejected": -1.720571517944336, "logps/chosen": -133.35958862304688, "logps/rejected": -174.8995361328125, "loss": 0.1702, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 2.0476880073547363, "rewards/margins": 2.6601643562316895, "rewards/rejected": -0.6124762296676636, "step": 40 }, { "epoch": 1.6129032258064515, "grad_norm": 7.5603814125061035, "learning_rate": 3.850574712643678e-06, "logits/chosen": -1.814079999923706, "logits/rejected": -1.8412790298461914, "logps/chosen": -135.43051147460938, "logps/rejected": -218.2666473388672, "loss": 0.1671, "rewards/accuracies": 1.0, "rewards/chosen": 2.6564764976501465, "rewards/margins": 3.553129196166992, "rewards/rejected": -0.8966524004936218, "step": 50 }, { "epoch": 1.935483870967742, "grad_norm": 1.6607310771942139, "learning_rate": 3.563218390804598e-06, "logits/chosen": -1.8398892879486084, "logits/rejected": -1.8503402471542358, "logps/chosen": -147.40121459960938, "logps/rejected": -175.20462036132812, "loss": 0.1148, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 2.9025120735168457, "rewards/margins": 3.795964002609253, "rewards/rejected": -0.8934518098831177, "step": 60 }, { "epoch": 1.935483870967742, "eval_logits/chosen": -1.933964729309082, "eval_logits/rejected": -1.9501203298568726, "eval_logps/chosen": -186.97340393066406, "eval_logps/rejected": -197.0532989501953, "eval_loss": 0.11131088435649872, "eval_rewards/accuracies": 0.8571428656578064, "eval_rewards/chosen": 1.6463254690170288, "eval_rewards/margins": 2.355492115020752, "eval_rewards/rejected": -0.7091667056083679, "eval_runtime": 15.9564, "eval_samples_per_second": 1.755, "eval_steps_per_second": 0.439, "step": 60 }, { "epoch": 2.258064516129032, "grad_norm": 1.0933343172073364, "learning_rate": 3.2758620689655175e-06, "logits/chosen": -1.8214714527130127, "logits/rejected": -1.8398475646972656, "logps/chosen": -123.23341369628906, "logps/rejected": -169.8323974609375, "loss": 0.0958, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 3.013068914413452, "rewards/margins": 3.9151065349578857, "rewards/rejected": -0.9020377993583679, "step": 70 }, { "epoch": 2.5806451612903225, "grad_norm": 1.6529628038406372, "learning_rate": 2.988505747126437e-06, "logits/chosen": -1.8991115093231201, "logits/rejected": -1.9303562641143799, "logps/chosen": -126.10279846191406, "logps/rejected": -249.8067169189453, "loss": 0.0542, "rewards/accuracies": 1.0, "rewards/chosen": 2.2060089111328125, "rewards/margins": 4.4772820472717285, "rewards/rejected": -2.271273136138916, "step": 80 }, { "epoch": 2.903225806451613, "grad_norm": 1.3197133541107178, "learning_rate": 2.7011494252873567e-06, "logits/chosen": -1.9221817255020142, "logits/rejected": -1.926163911819458, "logps/chosen": -130.5477752685547, "logps/rejected": -212.64401245117188, "loss": 0.0655, "rewards/accuracies": 1.0, "rewards/chosen": 3.013507127761841, "rewards/margins": 4.679448127746582, "rewards/rejected": -1.665941596031189, "step": 90 }, { "epoch": 2.903225806451613, "eval_logits/chosen": -1.9456537961959839, "eval_logits/rejected": -1.9615535736083984, "eval_logps/chosen": -184.64419555664062, "eval_logps/rejected": -200.91307067871094, "eval_loss": 0.04710844159126282, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": 2.112164258956909, "eval_rewards/margins": 3.5932810306549072, "eval_rewards/rejected": -1.4811171293258667, "eval_runtime": 15.9489, "eval_samples_per_second": 1.756, "eval_steps_per_second": 0.439, "step": 90 } ], "logging_steps": 10, "max_steps": 180, "num_input_tokens_seen": 0, "num_train_epochs": 6, "save_steps": 90, "total_flos": 0.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }