|
{ |
|
"best_metric": 0.16122232377529144, |
|
"best_model_checkpoint": "./zephyr/08-04-24-Weni-WeniGPT-Agents-Zephyr-1.0.7-KTO_Experiment with a strict verification of positive and negative examples-3_max_steps-135_batch_16_2024-04-08_ppid_9/checkpoint-100", |
|
"epoch": 2.197802197802198, |
|
"eval_steps": 50, |
|
"global_step": 100, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 3.36242938041687, |
|
"kl": 9.133125305175781, |
|
"learning_rate": 0.00018, |
|
"logps/chosen": -206.2735595703125, |
|
"logps/rejected": -335.83697509765625, |
|
"loss": 0.3842, |
|
"rewards/chosen": 0.7616328597068787, |
|
"rewards/margins": 2.642458438873291, |
|
"rewards/rejected": -1.7609893083572388, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 2.063199043273926, |
|
"kl": 4.093469142913818, |
|
"learning_rate": 0.00014923076923076923, |
|
"logps/chosen": -193.60472106933594, |
|
"logps/rejected": -353.46917724609375, |
|
"loss": 0.2383, |
|
"rewards/chosen": 2.5526974201202393, |
|
"rewards/margins": 7.415192127227783, |
|
"rewards/rejected": -4.737576007843018, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"eval_kl": 7.046002388000488, |
|
"eval_logps/chosen": -208.6980743408203, |
|
"eval_logps/rejected": -355.1390686035156, |
|
"eval_loss": 0.19116578996181488, |
|
"eval_rewards/chosen": 2.8550193309783936, |
|
"eval_rewards/margins": 9.79314136505127, |
|
"eval_rewards/rejected": -7.435754776000977, |
|
"eval_runtime": 38.6482, |
|
"eval_samples_per_second": 2.096, |
|
"eval_steps_per_second": 0.543, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"grad_norm": 2.2549781799316406, |
|
"kl": 7.282756805419922, |
|
"learning_rate": 0.00011846153846153846, |
|
"logps/chosen": -168.28033447265625, |
|
"logps/rejected": -366.1405944824219, |
|
"loss": 0.1573, |
|
"rewards/chosen": 4.453644752502441, |
|
"rewards/margins": 11.622222900390625, |
|
"rewards/rejected": -7.442926406860352, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"grad_norm": 1.4295282363891602, |
|
"kl": 3.5643277168273926, |
|
"learning_rate": 8.76923076923077e-05, |
|
"logps/chosen": -181.92678833007812, |
|
"logps/rejected": -439.3050842285156, |
|
"loss": 0.1193, |
|
"rewards/chosen": 4.041715145111084, |
|
"rewards/margins": 15.924114227294922, |
|
"rewards/rejected": -11.56220531463623, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"grad_norm": 1.01235830783844, |
|
"kl": 2.842973470687866, |
|
"learning_rate": 5.692307692307692e-05, |
|
"logps/chosen": -155.36322021484375, |
|
"logps/rejected": -392.3459777832031, |
|
"loss": 0.0633, |
|
"rewards/chosen": 5.212385177612305, |
|
"rewards/margins": 15.498960494995117, |
|
"rewards/rejected": -10.24057674407959, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"eval_kl": 7.762875556945801, |
|
"eval_logps/chosen": -205.56210327148438, |
|
"eval_logps/rejected": -361.947998046875, |
|
"eval_loss": 0.16122232377529144, |
|
"eval_rewards/chosen": 3.168619155883789, |
|
"eval_rewards/margins": 10.616473197937012, |
|
"eval_rewards/rejected": -8.11664867401123, |
|
"eval_runtime": 38.6382, |
|
"eval_samples_per_second": 2.096, |
|
"eval_steps_per_second": 0.544, |
|
"step": 100 |
|
} |
|
], |
|
"logging_steps": 20, |
|
"max_steps": 135, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|