ironrock's picture
Training in progress, step 100, checkpoint
b8b149e verified
raw
history blame
3.66 kB
{
"best_metric": 0.16122232377529144,
"best_model_checkpoint": "./zephyr/08-04-24-Weni-WeniGPT-Agents-Zephyr-1.0.7-KTO_Experiment with a strict verification of positive and negative examples-3_max_steps-135_batch_16_2024-04-08_ppid_9/checkpoint-100",
"epoch": 2.197802197802198,
"eval_steps": 50,
"global_step": 100,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.44,
"grad_norm": 3.36242938041687,
"kl": 9.133125305175781,
"learning_rate": 0.00018,
"logps/chosen": -206.2735595703125,
"logps/rejected": -335.83697509765625,
"loss": 0.3842,
"rewards/chosen": 0.7616328597068787,
"rewards/margins": 2.642458438873291,
"rewards/rejected": -1.7609893083572388,
"step": 20
},
{
"epoch": 0.88,
"grad_norm": 2.063199043273926,
"kl": 4.093469142913818,
"learning_rate": 0.00014923076923076923,
"logps/chosen": -193.60472106933594,
"logps/rejected": -353.46917724609375,
"loss": 0.2383,
"rewards/chosen": 2.5526974201202393,
"rewards/margins": 7.415192127227783,
"rewards/rejected": -4.737576007843018,
"step": 40
},
{
"epoch": 1.1,
"eval_kl": 7.046002388000488,
"eval_logps/chosen": -208.6980743408203,
"eval_logps/rejected": -355.1390686035156,
"eval_loss": 0.19116578996181488,
"eval_rewards/chosen": 2.8550193309783936,
"eval_rewards/margins": 9.79314136505127,
"eval_rewards/rejected": -7.435754776000977,
"eval_runtime": 38.6482,
"eval_samples_per_second": 2.096,
"eval_steps_per_second": 0.543,
"step": 50
},
{
"epoch": 1.32,
"grad_norm": 2.2549781799316406,
"kl": 7.282756805419922,
"learning_rate": 0.00011846153846153846,
"logps/chosen": -168.28033447265625,
"logps/rejected": -366.1405944824219,
"loss": 0.1573,
"rewards/chosen": 4.453644752502441,
"rewards/margins": 11.622222900390625,
"rewards/rejected": -7.442926406860352,
"step": 60
},
{
"epoch": 1.76,
"grad_norm": 1.4295282363891602,
"kl": 3.5643277168273926,
"learning_rate": 8.76923076923077e-05,
"logps/chosen": -181.92678833007812,
"logps/rejected": -439.3050842285156,
"loss": 0.1193,
"rewards/chosen": 4.041715145111084,
"rewards/margins": 15.924114227294922,
"rewards/rejected": -11.56220531463623,
"step": 80
},
{
"epoch": 2.2,
"grad_norm": 1.01235830783844,
"kl": 2.842973470687866,
"learning_rate": 5.692307692307692e-05,
"logps/chosen": -155.36322021484375,
"logps/rejected": -392.3459777832031,
"loss": 0.0633,
"rewards/chosen": 5.212385177612305,
"rewards/margins": 15.498960494995117,
"rewards/rejected": -10.24057674407959,
"step": 100
},
{
"epoch": 2.2,
"eval_kl": 7.762875556945801,
"eval_logps/chosen": -205.56210327148438,
"eval_logps/rejected": -361.947998046875,
"eval_loss": 0.16122232377529144,
"eval_rewards/chosen": 3.168619155883789,
"eval_rewards/margins": 10.616473197937012,
"eval_rewards/rejected": -8.11664867401123,
"eval_runtime": 38.6382,
"eval_samples_per_second": 2.096,
"eval_steps_per_second": 0.544,
"step": 100
}
],
"logging_steps": 20,
"max_steps": 135,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 100,
"total_flos": 0.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}