|
{ |
|
"best_metric": 0.6628985404968262, |
|
"best_model_checkpoint": "./Zephyr/14-03-24-Weni-WeniGPT-2.4.1-Zephyr-7B-3-epochs-LLM_Base_2.0.3_DPO_WeniGPT DPO training-2_max_steps-267_batch_32_2024-03-14_ppid_9/checkpoint-100", |
|
"epoch": 1.1173184357541899, |
|
"eval_steps": 100, |
|
"global_step": 100, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 2.263607978820801, |
|
"learning_rate": 3.518518518518519e-05, |
|
"logits/chosen": -2.214717149734497, |
|
"logits/rejected": -2.2155280113220215, |
|
"logps/chosen": -5.710334777832031, |
|
"logps/rejected": -9.765484809875488, |
|
"loss": 0.6883, |
|
"rewards/accuracies": 0.04062499850988388, |
|
"rewards/chosen": 0.009401815943419933, |
|
"rewards/margins": 0.012448241002857685, |
|
"rewards/rejected": -0.0030464245937764645, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 0.008143444545567036, |
|
"learning_rate": 4.75e-05, |
|
"logits/chosen": -2.2037911415100098, |
|
"logits/rejected": -2.2049593925476074, |
|
"logps/chosen": -4.532853126525879, |
|
"logps/rejected": -9.577239036560059, |
|
"loss": 0.6685, |
|
"rewards/accuracies": 0.05000000074505806, |
|
"rewards/chosen": 0.05643541365861893, |
|
"rewards/margins": 0.14316150546073914, |
|
"rewards/rejected": -0.0867261067032814, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 0.03508400544524193, |
|
"learning_rate": 4.3333333333333334e-05, |
|
"logits/chosen": -2.1209869384765625, |
|
"logits/rejected": -2.1222269535064697, |
|
"logps/chosen": -8.109282493591309, |
|
"logps/rejected": -18.921493530273438, |
|
"loss": 0.648, |
|
"rewards/accuracies": 0.06875000149011612, |
|
"rewards/chosen": -0.08420856297016144, |
|
"rewards/margins": 0.552670955657959, |
|
"rewards/rejected": -0.636879563331604, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 9.134880656347377e-07, |
|
"learning_rate": 3.9166666666666665e-05, |
|
"logits/chosen": -2.05387020111084, |
|
"logits/rejected": -2.0554463863372803, |
|
"logps/chosen": -10.270573616027832, |
|
"logps/rejected": -23.5235595703125, |
|
"loss": 0.6617, |
|
"rewards/accuracies": 0.0703125, |
|
"rewards/chosen": -0.29610732197761536, |
|
"rewards/margins": 0.937633216381073, |
|
"rewards/rejected": -1.2337405681610107, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"grad_norm": 0.011362694203853607, |
|
"learning_rate": 3.5e-05, |
|
"logits/chosen": -2.0597641468048096, |
|
"logits/rejected": -2.061570167541504, |
|
"logps/chosen": -8.65438175201416, |
|
"logps/rejected": -23.752864837646484, |
|
"loss": 0.645, |
|
"rewards/accuracies": 0.0703125, |
|
"rewards/chosen": -0.09740939736366272, |
|
"rewards/margins": 1.0688247680664062, |
|
"rewards/rejected": -1.1662342548370361, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"eval_logits/chosen": -2.0584123134613037, |
|
"eval_logits/rejected": -2.058645725250244, |
|
"eval_logps/chosen": -5.4038591384887695, |
|
"eval_logps/rejected": -15.805567741394043, |
|
"eval_loss": 0.6628985404968262, |
|
"eval_rewards/accuracies": 0.046875, |
|
"eval_rewards/chosen": -0.038359977304935455, |
|
"eval_rewards/margins": 0.7473276853561401, |
|
"eval_rewards/rejected": -0.7856876254081726, |
|
"eval_runtime": 184.5849, |
|
"eval_samples_per_second": 1.723, |
|
"eval_steps_per_second": 0.217, |
|
"step": 100 |
|
} |
|
], |
|
"logging_steps": 20, |
|
"max_steps": 267, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|