ironrock's picture
Training in progress, step 90, checkpoint
d92f3f8 verified
raw
history blame
7.3 kB
{
"best_metric": 0.464372456073761,
"best_model_checkpoint": "./mistral/21-04-24-Weni-WeniGPT-Agents-Mistral-1.0.6-SFT-1.0.11-DPO_Experiment on DPO with other hyperparameters and best SFT model of WeniGPT-2_max_steps-366_batch_4_2024-04-21_ppid_9/checkpoint-90",
"epoch": 1.4634146341463414,
"eval_steps": 30,
"global_step": 90,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.16260162601626016,
"grad_norm": 11.074726104736328,
"learning_rate": 3.6363636363636366e-06,
"logits/chosen": -1.7639459371566772,
"logits/rejected": -1.8396574258804321,
"logps/chosen": -186.35623168945312,
"logps/rejected": -242.7454376220703,
"loss": 0.6923,
"rewards/accuracies": 0.550000011920929,
"rewards/chosen": 0.0025298120453953743,
"rewards/margins": 0.002917728153988719,
"rewards/rejected": -0.000387916574254632,
"step": 10
},
{
"epoch": 0.3252032520325203,
"grad_norm": 13.123202323913574,
"learning_rate": 4.9154929577464795e-06,
"logits/chosen": -1.8020210266113281,
"logits/rejected": -1.827330231666565,
"logps/chosen": -179.5525665283203,
"logps/rejected": -159.48426818847656,
"loss": 0.6821,
"rewards/accuracies": 1.0,
"rewards/chosen": 0.0269151721149683,
"rewards/margins": 0.020867865532636642,
"rewards/rejected": 0.00604730611667037,
"step": 20
},
{
"epoch": 0.4878048780487805,
"grad_norm": 14.401883125305176,
"learning_rate": 4.774647887323944e-06,
"logits/chosen": -1.7544463872909546,
"logits/rejected": -1.8119304180145264,
"logps/chosen": -131.93418884277344,
"logps/rejected": -186.62869262695312,
"loss": 0.6531,
"rewards/accuracies": 0.949999988079071,
"rewards/chosen": 0.0737195536494255,
"rewards/margins": 0.08270301669836044,
"rewards/rejected": -0.008983459323644638,
"step": 30
},
{
"epoch": 0.4878048780487805,
"eval_logits/chosen": -1.7289739847183228,
"eval_logits/rejected": -1.7717361450195312,
"eval_logps/chosen": -135.4998321533203,
"eval_logps/rejected": -185.3151397705078,
"eval_loss": 0.6393889784812927,
"eval_rewards/accuracies": 0.7857142686843872,
"eval_rewards/chosen": 0.12710317969322205,
"eval_rewards/margins": 0.1346290558576584,
"eval_rewards/rejected": -0.007525864988565445,
"eval_runtime": 17.3728,
"eval_samples_per_second": 1.612,
"eval_steps_per_second": 0.806,
"step": 30
},
{
"epoch": 0.6504065040650406,
"grad_norm": 15.026944160461426,
"learning_rate": 4.647887323943662e-06,
"logits/chosen": -1.8521251678466797,
"logits/rejected": -1.9095609188079834,
"logps/chosen": -167.38348388671875,
"logps/rejected": -215.0012969970703,
"loss": 0.6343,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": 0.11476834118366241,
"rewards/margins": 0.10640791803598404,
"rewards/rejected": 0.008360415697097778,
"step": 40
},
{
"epoch": 0.8130081300813008,
"grad_norm": 15.622106552124023,
"learning_rate": 4.507042253521127e-06,
"logits/chosen": -1.7336807250976562,
"logits/rejected": -1.7874727249145508,
"logps/chosen": -151.20423889160156,
"logps/rejected": -212.6553192138672,
"loss": 0.5966,
"rewards/accuracies": 0.949999988079071,
"rewards/chosen": 0.16852515935897827,
"rewards/margins": 0.19663922488689423,
"rewards/rejected": -0.0281140748411417,
"step": 50
},
{
"epoch": 0.975609756097561,
"grad_norm": 15.992025375366211,
"learning_rate": 4.3661971830985915e-06,
"logits/chosen": -1.8077377080917358,
"logits/rejected": -1.8177680969238281,
"logps/chosen": -166.0524139404297,
"logps/rejected": -166.77749633789062,
"loss": 0.5732,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": 0.28099626302719116,
"rewards/margins": 0.25574439764022827,
"rewards/rejected": 0.0252518467605114,
"step": 60
},
{
"epoch": 0.975609756097561,
"eval_logits/chosen": -1.734244465827942,
"eval_logits/rejected": -1.7778174877166748,
"eval_logps/chosen": -133.12557983398438,
"eval_logps/rejected": -185.54177856445312,
"eval_loss": 0.5475863218307495,
"eval_rewards/accuracies": 0.7857142686843872,
"eval_rewards/chosen": 0.36452820897102356,
"eval_rewards/margins": 0.39471906423568726,
"eval_rewards/rejected": -0.030190791934728622,
"eval_runtime": 17.4075,
"eval_samples_per_second": 1.609,
"eval_steps_per_second": 0.804,
"step": 60
},
{
"epoch": 1.1382113821138211,
"grad_norm": 13.867135047912598,
"learning_rate": 4.225352112676057e-06,
"logits/chosen": -1.8027187585830688,
"logits/rejected": -1.882067084312439,
"logps/chosen": -174.7197723388672,
"logps/rejected": -223.87844848632812,
"loss": 0.5589,
"rewards/accuracies": 0.949999988079071,
"rewards/chosen": 0.2828216254711151,
"rewards/margins": 0.361534059047699,
"rewards/rejected": -0.07871242612600327,
"step": 70
},
{
"epoch": 1.3008130081300813,
"grad_norm": 14.660954475402832,
"learning_rate": 4.0845070422535216e-06,
"logits/chosen": -1.8306058645248413,
"logits/rejected": -1.8653805255889893,
"logps/chosen": -170.78506469726562,
"logps/rejected": -210.1864013671875,
"loss": 0.4755,
"rewards/accuracies": 0.949999988079071,
"rewards/chosen": 0.4126508831977844,
"rewards/margins": 0.48294001817703247,
"rewards/rejected": -0.07028912007808685,
"step": 80
},
{
"epoch": 1.4634146341463414,
"grad_norm": 15.84911060333252,
"learning_rate": 3.943661971830986e-06,
"logits/chosen": -1.7707087993621826,
"logits/rejected": -1.81671142578125,
"logps/chosen": -143.22067260742188,
"logps/rejected": -205.97433471679688,
"loss": 0.5252,
"rewards/accuracies": 1.0,
"rewards/chosen": 0.4095473885536194,
"rewards/margins": 0.5323301553726196,
"rewards/rejected": -0.12278278172016144,
"step": 90
},
{
"epoch": 1.4634146341463414,
"eval_logits/chosen": -1.74051034450531,
"eval_logits/rejected": -1.7852736711502075,
"eval_logps/chosen": -130.74990844726562,
"eval_logps/rejected": -186.0896759033203,
"eval_loss": 0.464372456073761,
"eval_rewards/accuracies": 0.7857142686843872,
"eval_rewards/chosen": 0.6020953059196472,
"eval_rewards/margins": 0.6870760321617126,
"eval_rewards/rejected": -0.08498072624206543,
"eval_runtime": 17.3793,
"eval_samples_per_second": 1.611,
"eval_steps_per_second": 0.806,
"step": 90
}
],
"logging_steps": 10,
"max_steps": 366,
"num_input_tokens_seen": 0,
"num_train_epochs": 6,
"save_steps": 90,
"total_flos": 0.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}