|
{ |
|
"best_metric": 0.464372456073761, |
|
"best_model_checkpoint": "./mistral/21-04-24-Weni-WeniGPT-Agents-Mistral-1.0.6-SFT-1.0.11-DPO_Experiment on DPO with other hyperparameters and best SFT model of WeniGPT-2_max_steps-366_batch_4_2024-04-21_ppid_9/checkpoint-90", |
|
"epoch": 1.4634146341463414, |
|
"eval_steps": 30, |
|
"global_step": 90, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.16260162601626016, |
|
"grad_norm": 11.074726104736328, |
|
"learning_rate": 3.6363636363636366e-06, |
|
"logits/chosen": -1.7639459371566772, |
|
"logits/rejected": -1.8396574258804321, |
|
"logps/chosen": -186.35623168945312, |
|
"logps/rejected": -242.7454376220703, |
|
"loss": 0.6923, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.0025298120453953743, |
|
"rewards/margins": 0.002917728153988719, |
|
"rewards/rejected": -0.000387916574254632, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.3252032520325203, |
|
"grad_norm": 13.123202323913574, |
|
"learning_rate": 4.9154929577464795e-06, |
|
"logits/chosen": -1.8020210266113281, |
|
"logits/rejected": -1.827330231666565, |
|
"logps/chosen": -179.5525665283203, |
|
"logps/rejected": -159.48426818847656, |
|
"loss": 0.6821, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.0269151721149683, |
|
"rewards/margins": 0.020867865532636642, |
|
"rewards/rejected": 0.00604730611667037, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.4878048780487805, |
|
"grad_norm": 14.401883125305176, |
|
"learning_rate": 4.774647887323944e-06, |
|
"logits/chosen": -1.7544463872909546, |
|
"logits/rejected": -1.8119304180145264, |
|
"logps/chosen": -131.93418884277344, |
|
"logps/rejected": -186.62869262695312, |
|
"loss": 0.6531, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.0737195536494255, |
|
"rewards/margins": 0.08270301669836044, |
|
"rewards/rejected": -0.008983459323644638, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.4878048780487805, |
|
"eval_logits/chosen": -1.7289739847183228, |
|
"eval_logits/rejected": -1.7717361450195312, |
|
"eval_logps/chosen": -135.4998321533203, |
|
"eval_logps/rejected": -185.3151397705078, |
|
"eval_loss": 0.6393889784812927, |
|
"eval_rewards/accuracies": 0.7857142686843872, |
|
"eval_rewards/chosen": 0.12710317969322205, |
|
"eval_rewards/margins": 0.1346290558576584, |
|
"eval_rewards/rejected": -0.007525864988565445, |
|
"eval_runtime": 17.3728, |
|
"eval_samples_per_second": 1.612, |
|
"eval_steps_per_second": 0.806, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.6504065040650406, |
|
"grad_norm": 15.026944160461426, |
|
"learning_rate": 4.647887323943662e-06, |
|
"logits/chosen": -1.8521251678466797, |
|
"logits/rejected": -1.9095609188079834, |
|
"logps/chosen": -167.38348388671875, |
|
"logps/rejected": -215.0012969970703, |
|
"loss": 0.6343, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.11476834118366241, |
|
"rewards/margins": 0.10640791803598404, |
|
"rewards/rejected": 0.008360415697097778, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.8130081300813008, |
|
"grad_norm": 15.622106552124023, |
|
"learning_rate": 4.507042253521127e-06, |
|
"logits/chosen": -1.7336807250976562, |
|
"logits/rejected": -1.7874727249145508, |
|
"logps/chosen": -151.20423889160156, |
|
"logps/rejected": -212.6553192138672, |
|
"loss": 0.5966, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.16852515935897827, |
|
"rewards/margins": 0.19663922488689423, |
|
"rewards/rejected": -0.0281140748411417, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.975609756097561, |
|
"grad_norm": 15.992025375366211, |
|
"learning_rate": 4.3661971830985915e-06, |
|
"logits/chosen": -1.8077377080917358, |
|
"logits/rejected": -1.8177680969238281, |
|
"logps/chosen": -166.0524139404297, |
|
"logps/rejected": -166.77749633789062, |
|
"loss": 0.5732, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.28099626302719116, |
|
"rewards/margins": 0.25574439764022827, |
|
"rewards/rejected": 0.0252518467605114, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.975609756097561, |
|
"eval_logits/chosen": -1.734244465827942, |
|
"eval_logits/rejected": -1.7778174877166748, |
|
"eval_logps/chosen": -133.12557983398438, |
|
"eval_logps/rejected": -185.54177856445312, |
|
"eval_loss": 0.5475863218307495, |
|
"eval_rewards/accuracies": 0.7857142686843872, |
|
"eval_rewards/chosen": 0.36452820897102356, |
|
"eval_rewards/margins": 0.39471906423568726, |
|
"eval_rewards/rejected": -0.030190791934728622, |
|
"eval_runtime": 17.4075, |
|
"eval_samples_per_second": 1.609, |
|
"eval_steps_per_second": 0.804, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.1382113821138211, |
|
"grad_norm": 13.867135047912598, |
|
"learning_rate": 4.225352112676057e-06, |
|
"logits/chosen": -1.8027187585830688, |
|
"logits/rejected": -1.882067084312439, |
|
"logps/chosen": -174.7197723388672, |
|
"logps/rejected": -223.87844848632812, |
|
"loss": 0.5589, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.2828216254711151, |
|
"rewards/margins": 0.361534059047699, |
|
"rewards/rejected": -0.07871242612600327, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.3008130081300813, |
|
"grad_norm": 14.660954475402832, |
|
"learning_rate": 4.0845070422535216e-06, |
|
"logits/chosen": -1.8306058645248413, |
|
"logits/rejected": -1.8653805255889893, |
|
"logps/chosen": -170.78506469726562, |
|
"logps/rejected": -210.1864013671875, |
|
"loss": 0.4755, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.4126508831977844, |
|
"rewards/margins": 0.48294001817703247, |
|
"rewards/rejected": -0.07028912007808685, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.4634146341463414, |
|
"grad_norm": 15.84911060333252, |
|
"learning_rate": 3.943661971830986e-06, |
|
"logits/chosen": -1.7707087993621826, |
|
"logits/rejected": -1.81671142578125, |
|
"logps/chosen": -143.22067260742188, |
|
"logps/rejected": -205.97433471679688, |
|
"loss": 0.5252, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.4095473885536194, |
|
"rewards/margins": 0.5323301553726196, |
|
"rewards/rejected": -0.12278278172016144, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.4634146341463414, |
|
"eval_logits/chosen": -1.74051034450531, |
|
"eval_logits/rejected": -1.7852736711502075, |
|
"eval_logps/chosen": -130.74990844726562, |
|
"eval_logps/rejected": -186.0896759033203, |
|
"eval_loss": 0.464372456073761, |
|
"eval_rewards/accuracies": 0.7857142686843872, |
|
"eval_rewards/chosen": 0.6020953059196472, |
|
"eval_rewards/margins": 0.6870760321617126, |
|
"eval_rewards/rejected": -0.08498072624206543, |
|
"eval_runtime": 17.3793, |
|
"eval_samples_per_second": 1.611, |
|
"eval_steps_per_second": 0.806, |
|
"step": 90 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 366, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 6, |
|
"save_steps": 90, |
|
"total_flos": 0.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|