{ "best_metric": 0.16122232377529144, "best_model_checkpoint": "./zephyr/08-04-24-Weni-WeniGPT-Agents-Zephyr-1.0.7-KTO_Experiment with a strict verification of positive and negative examples-3_max_steps-135_batch_16_2024-04-08_ppid_9/checkpoint-100", "epoch": 2.197802197802198, "eval_steps": 50, "global_step": 100, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.44, "grad_norm": 3.36242938041687, "kl": 9.133125305175781, "learning_rate": 0.00018, "logps/chosen": -206.2735595703125, "logps/rejected": -335.83697509765625, "loss": 0.3842, "rewards/chosen": 0.7616328597068787, "rewards/margins": 2.642458438873291, "rewards/rejected": -1.7609893083572388, "step": 20 }, { "epoch": 0.88, "grad_norm": 2.063199043273926, "kl": 4.093469142913818, "learning_rate": 0.00014923076923076923, "logps/chosen": -193.60472106933594, "logps/rejected": -353.46917724609375, "loss": 0.2383, "rewards/chosen": 2.5526974201202393, "rewards/margins": 7.415192127227783, "rewards/rejected": -4.737576007843018, "step": 40 }, { "epoch": 1.1, "eval_kl": 7.046002388000488, "eval_logps/chosen": -208.6980743408203, "eval_logps/rejected": -355.1390686035156, "eval_loss": 0.19116578996181488, "eval_rewards/chosen": 2.8550193309783936, "eval_rewards/margins": 9.79314136505127, "eval_rewards/rejected": -7.435754776000977, "eval_runtime": 38.6482, "eval_samples_per_second": 2.096, "eval_steps_per_second": 0.543, "step": 50 }, { "epoch": 1.32, "grad_norm": 2.2549781799316406, "kl": 7.282756805419922, "learning_rate": 0.00011846153846153846, "logps/chosen": -168.28033447265625, "logps/rejected": -366.1405944824219, "loss": 0.1573, "rewards/chosen": 4.453644752502441, "rewards/margins": 11.622222900390625, "rewards/rejected": -7.442926406860352, "step": 60 }, { "epoch": 1.76, "grad_norm": 1.4295282363891602, "kl": 3.5643277168273926, "learning_rate": 8.76923076923077e-05, "logps/chosen": -181.92678833007812, "logps/rejected": -439.3050842285156, "loss": 0.1193, "rewards/chosen": 4.041715145111084, "rewards/margins": 15.924114227294922, "rewards/rejected": -11.56220531463623, "step": 80 }, { "epoch": 2.2, "grad_norm": 1.01235830783844, "kl": 2.842973470687866, "learning_rate": 5.692307692307692e-05, "logps/chosen": -155.36322021484375, "logps/rejected": -392.3459777832031, "loss": 0.0633, "rewards/chosen": 5.212385177612305, "rewards/margins": 15.498960494995117, "rewards/rejected": -10.24057674407959, "step": 100 }, { "epoch": 2.2, "eval_kl": 7.762875556945801, "eval_logps/chosen": -205.56210327148438, "eval_logps/rejected": -361.947998046875, "eval_loss": 0.16122232377529144, "eval_rewards/chosen": 3.168619155883789, "eval_rewards/margins": 10.616473197937012, "eval_rewards/rejected": -8.11664867401123, "eval_runtime": 38.6382, "eval_samples_per_second": 2.096, "eval_steps_per_second": 0.544, "step": 100 } ], "logging_steps": 20, "max_steps": 135, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 100, "total_flos": 0.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }