|
{ |
|
"best_metric": 1.6283894777297974, |
|
"best_model_checkpoint": "./Zephyr/09-03-24-Weni-WeniGPT-2.8.1-Zephyr-7B-zephyr-prompt-DPO-binarized_DPO tests with binarized dataset-2_max_steps-112_batch_16_2024-03-09_ppid_7/checkpoint-100", |
|
"epoch": 0.8888888888888888, |
|
"eval_steps": 100, |
|
"global_step": 100, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 65.50724029541016, |
|
"learning_rate": 0.000194, |
|
"logits/chosen": -2.6164324283599854, |
|
"logits/rejected": -2.6339125633239746, |
|
"logps/chosen": -366.4737854003906, |
|
"logps/rejected": -344.0569763183594, |
|
"loss": 1.0162, |
|
"rewards/accuracies": 0.3218750059604645, |
|
"rewards/chosen": -0.37838560342788696, |
|
"rewards/margins": 0.8077453374862671, |
|
"rewards/rejected": -1.1861308813095093, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 302.9382629394531, |
|
"learning_rate": 0.000154, |
|
"logits/chosen": -2.624668598175049, |
|
"logits/rejected": -2.626055955886841, |
|
"logps/chosen": -387.1952209472656, |
|
"logps/rejected": -358.3865661621094, |
|
"loss": 2.0677, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": 8.134626388549805, |
|
"rewards/margins": 6.054707050323486, |
|
"rewards/rejected": 2.0799195766448975, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 105.22598266601562, |
|
"learning_rate": 0.00011399999999999999, |
|
"logits/chosen": -2.7092220783233643, |
|
"logits/rejected": -2.7026538848876953, |
|
"logps/chosen": -317.4967041015625, |
|
"logps/rejected": -311.2018737792969, |
|
"loss": 2.2608, |
|
"rewards/accuracies": 0.47187501192092896, |
|
"rewards/chosen": 0.6841039657592773, |
|
"rewards/margins": 5.323451042175293, |
|
"rewards/rejected": -4.639346599578857, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 82.97381591796875, |
|
"learning_rate": 7.4e-05, |
|
"logits/chosen": -2.659626007080078, |
|
"logits/rejected": -2.6671993732452393, |
|
"logps/chosen": -358.8479919433594, |
|
"logps/rejected": -344.2733154296875, |
|
"loss": 2.3074, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.8363685607910156, |
|
"rewards/margins": 4.435623645782471, |
|
"rewards/rejected": -5.2719926834106445, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 139.97763061523438, |
|
"learning_rate": 3.4000000000000007e-05, |
|
"logits/chosen": -2.6330013275146484, |
|
"logits/rejected": -2.6500449180603027, |
|
"logps/chosen": -378.33270263671875, |
|
"logps/rejected": -368.93402099609375, |
|
"loss": 2.0157, |
|
"rewards/accuracies": 0.5093749761581421, |
|
"rewards/chosen": -0.5400040745735168, |
|
"rewards/margins": 4.212619304656982, |
|
"rewards/rejected": -4.752623558044434, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"eval_logits/chosen": -2.5779407024383545, |
|
"eval_logits/rejected": -2.5749547481536865, |
|
"eval_logps/chosen": -362.0609130859375, |
|
"eval_logps/rejected": -357.7032165527344, |
|
"eval_loss": 1.6283894777297974, |
|
"eval_rewards/accuracies": 0.5649999976158142, |
|
"eval_rewards/chosen": 0.8732965588569641, |
|
"eval_rewards/margins": 6.959313869476318, |
|
"eval_rewards/rejected": -6.086017608642578, |
|
"eval_runtime": 99.816, |
|
"eval_samples_per_second": 2.004, |
|
"eval_steps_per_second": 0.501, |
|
"step": 100 |
|
} |
|
], |
|
"logging_steps": 20, |
|
"max_steps": 112, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|