File size: 6,196 Bytes
f8e3a1b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 |
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9947089947089947,
"eval_steps": 1000,
"global_step": 94,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.010582010582010581,
"grad_norm": 3.9034635996758364,
"learning_rate": 5e-08,
"logits/chosen": -2.8740313053131104,
"logits/rejected": -2.909637928009033,
"logps/chosen": -495.3936462402344,
"logps/rejected": -468.7409973144531,
"loss": 0.6931,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1
},
{
"epoch": 0.10582010582010581,
"grad_norm": 3.740412497210408,
"learning_rate": 5e-07,
"logits/chosen": -2.8803439140319824,
"logits/rejected": -2.933382987976074,
"logps/chosen": -489.9436340332031,
"logps/rejected": -471.76068115234375,
"loss": 0.6926,
"rewards/accuracies": 0.4765625,
"rewards/chosen": 0.0005175346159376204,
"rewards/margins": 0.001013587461784482,
"rewards/rejected": -0.0004960527876392007,
"step": 10
},
{
"epoch": 0.21164021164021163,
"grad_norm": 4.0026509714343526,
"learning_rate": 4.82718437161051e-07,
"logits/chosen": -2.9089906215667725,
"logits/rejected": -2.9724087715148926,
"logps/chosen": -490.58831787109375,
"logps/rejected": -484.2608947753906,
"loss": 0.677,
"rewards/accuracies": 0.8101562261581421,
"rewards/chosen": 0.017761804163455963,
"rewards/margins": 0.033221058547496796,
"rewards/rejected": -0.015459256246685982,
"step": 20
},
{
"epoch": 0.31746031746031744,
"grad_norm": 5.045224654085605,
"learning_rate": 4.332629679574565e-07,
"logits/chosen": -2.9424614906311035,
"logits/rejected": -2.994748830795288,
"logps/chosen": -486.4335021972656,
"logps/rejected": -484.3814392089844,
"loss": 0.623,
"rewards/accuracies": 0.8492187261581421,
"rewards/chosen": 0.045325733721256256,
"rewards/margins": 0.14735476672649384,
"rewards/rejected": -0.10202904045581818,
"step": 30
},
{
"epoch": 0.42328042328042326,
"grad_norm": 3.94438220021588,
"learning_rate": 3.584709347793895e-07,
"logits/chosen": -2.92952036857605,
"logits/rejected": -3.001981019973755,
"logps/chosen": -487.70849609375,
"logps/rejected": -510.1280212402344,
"loss": 0.5749,
"rewards/accuracies": 0.85546875,
"rewards/chosen": 0.017966564744710922,
"rewards/margins": 0.2901422381401062,
"rewards/rejected": -0.2721756398677826,
"step": 40
},
{
"epoch": 0.5291005291005291,
"grad_norm": 3.0093221435481667,
"learning_rate": 2.6868252339660607e-07,
"logits/chosen": -2.92526912689209,
"logits/rejected": -2.995861768722534,
"logps/chosen": -514.0173950195312,
"logps/rejected": -578.7903442382812,
"loss": 0.4668,
"rewards/accuracies": 0.8609374761581421,
"rewards/chosen": -0.239375501871109,
"rewards/margins": 0.7664871215820312,
"rewards/rejected": -1.0058627128601074,
"step": 50
},
{
"epoch": 0.6349206349206349,
"grad_norm": 3.4147008885194587,
"learning_rate": 1.763112063972739e-07,
"logits/chosen": -2.9192681312561035,
"logits/rejected": -2.9830739498138428,
"logps/chosen": -526.348876953125,
"logps/rejected": -614.0721435546875,
"loss": 0.421,
"rewards/accuracies": 0.858593761920929,
"rewards/chosen": -0.4103693962097168,
"rewards/margins": 0.9783406257629395,
"rewards/rejected": -1.3887102603912354,
"step": 60
},
{
"epoch": 0.7407407407407407,
"grad_norm": 3.0922082498626846,
"learning_rate": 9.412754953531663e-08,
"logits/chosen": -2.918205976486206,
"logits/rejected": -2.970673084259033,
"logps/chosen": -554.398681640625,
"logps/rejected": -667.9301147460938,
"loss": 0.4035,
"rewards/accuracies": 0.8492187261581421,
"rewards/chosen": -0.6199524998664856,
"rewards/margins": 1.1833505630493164,
"rewards/rejected": -1.8033031225204468,
"step": 70
},
{
"epoch": 0.8465608465608465,
"grad_norm": 3.5694688653613595,
"learning_rate": 3.349364905389032e-08,
"logits/chosen": -2.8878700733184814,
"logits/rejected": -2.932638645172119,
"logps/chosen": -557.0501708984375,
"logps/rejected": -678.7019653320312,
"loss": 0.3889,
"rewards/accuracies": 0.8578125238418579,
"rewards/chosen": -0.7133009433746338,
"rewards/margins": 1.2904046773910522,
"rewards/rejected": -2.0037055015563965,
"step": 80
},
{
"epoch": 0.9523809523809523,
"grad_norm": 3.149374311366939,
"learning_rate": 2.7922934437178692e-09,
"logits/chosen": -2.89034366607666,
"logits/rejected": -2.936624765396118,
"logps/chosen": -562.55322265625,
"logps/rejected": -680.3013305664062,
"loss": 0.3864,
"rewards/accuracies": 0.836718738079071,
"rewards/chosen": -0.7662609815597534,
"rewards/margins": 1.2681959867477417,
"rewards/rejected": -2.034456968307495,
"step": 90
},
{
"epoch": 0.9947089947089947,
"step": 94,
"total_flos": 0.0,
"train_loss": 0.5087684798747936,
"train_runtime": 2521.2346,
"train_samples_per_second": 38.335,
"train_steps_per_second": 0.037
}
],
"logging_steps": 10,
"max_steps": 94,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}
|