beamaia's picture
Upload folder using huggingface_hub
8d19768 verified
{
"best_metric": 1.6283894777297974,
"best_model_checkpoint": "./Zephyr/09-03-24-Weni-WeniGPT-2.8.1-Zephyr-7B-zephyr-prompt-DPO-binarized_DPO tests with binarized dataset-2_max_steps-112_batch_16_2024-03-09_ppid_7/checkpoint-100",
"epoch": 0.8888888888888888,
"eval_steps": 100,
"global_step": 100,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.18,
"grad_norm": 65.50724029541016,
"learning_rate": 0.000194,
"logits/chosen": -2.6164324283599854,
"logits/rejected": -2.6339125633239746,
"logps/chosen": -366.4737854003906,
"logps/rejected": -344.0569763183594,
"loss": 1.0162,
"rewards/accuracies": 0.3218750059604645,
"rewards/chosen": -0.37838560342788696,
"rewards/margins": 0.8077453374862671,
"rewards/rejected": -1.1861308813095093,
"step": 20
},
{
"epoch": 0.36,
"grad_norm": 302.9382629394531,
"learning_rate": 0.000154,
"logits/chosen": -2.624668598175049,
"logits/rejected": -2.626055955886841,
"logps/chosen": -387.1952209472656,
"logps/rejected": -358.3865661621094,
"loss": 2.0677,
"rewards/accuracies": 0.5562499761581421,
"rewards/chosen": 8.134626388549805,
"rewards/margins": 6.054707050323486,
"rewards/rejected": 2.0799195766448975,
"step": 40
},
{
"epoch": 0.53,
"grad_norm": 105.22598266601562,
"learning_rate": 0.00011399999999999999,
"logits/chosen": -2.7092220783233643,
"logits/rejected": -2.7026538848876953,
"logps/chosen": -317.4967041015625,
"logps/rejected": -311.2018737792969,
"loss": 2.2608,
"rewards/accuracies": 0.47187501192092896,
"rewards/chosen": 0.6841039657592773,
"rewards/margins": 5.323451042175293,
"rewards/rejected": -4.639346599578857,
"step": 60
},
{
"epoch": 0.71,
"grad_norm": 82.97381591796875,
"learning_rate": 7.4e-05,
"logits/chosen": -2.659626007080078,
"logits/rejected": -2.6671993732452393,
"logps/chosen": -358.8479919433594,
"logps/rejected": -344.2733154296875,
"loss": 2.3074,
"rewards/accuracies": 0.512499988079071,
"rewards/chosen": -0.8363685607910156,
"rewards/margins": 4.435623645782471,
"rewards/rejected": -5.2719926834106445,
"step": 80
},
{
"epoch": 0.89,
"grad_norm": 139.97763061523438,
"learning_rate": 3.4000000000000007e-05,
"logits/chosen": -2.6330013275146484,
"logits/rejected": -2.6500449180603027,
"logps/chosen": -378.33270263671875,
"logps/rejected": -368.93402099609375,
"loss": 2.0157,
"rewards/accuracies": 0.5093749761581421,
"rewards/chosen": -0.5400040745735168,
"rewards/margins": 4.212619304656982,
"rewards/rejected": -4.752623558044434,
"step": 100
},
{
"epoch": 0.89,
"eval_logits/chosen": -2.5779407024383545,
"eval_logits/rejected": -2.5749547481536865,
"eval_logps/chosen": -362.0609130859375,
"eval_logps/rejected": -357.7032165527344,
"eval_loss": 1.6283894777297974,
"eval_rewards/accuracies": 0.5649999976158142,
"eval_rewards/chosen": 0.8732965588569641,
"eval_rewards/margins": 6.959313869476318,
"eval_rewards/rejected": -6.086017608642578,
"eval_runtime": 99.816,
"eval_samples_per_second": 2.004,
"eval_steps_per_second": 0.501,
"step": 100
}
],
"logging_steps": 20,
"max_steps": 112,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100,
"total_flos": 0.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}