File size: 13,529 Bytes
0f9feac |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 |
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.996510067114094,
"eval_steps": 400,
"global_step": 116,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.008590604026845637,
"grad_norm": 86.01569524610598,
"learning_rate": 4.166666666666666e-08,
"logits/chosen": -2.431039571762085,
"logits/rejected": -2.618009090423584,
"logps/chosen": -1197.8489990234375,
"logps/rejected": -7907.7099609375,
"loss": 0.6931,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1
},
{
"epoch": 0.042953020134228186,
"grad_norm": 81.41508200934528,
"learning_rate": 2.0833333333333333e-07,
"logits/chosen": -2.8407392501831055,
"logits/rejected": -3.0651891231536865,
"logps/chosen": -1897.8328857421875,
"logps/rejected": -9360.8955078125,
"loss": 0.692,
"rewards/accuracies": 0.5078125,
"rewards/chosen": -0.00021657101751770824,
"rewards/margins": 0.003072525840252638,
"rewards/rejected": -0.003289096988737583,
"step": 5
},
{
"epoch": 0.08590604026845637,
"grad_norm": 77.79677990779399,
"learning_rate": 4.1666666666666667e-07,
"logits/chosen": -2.6457934379577637,
"logits/rejected": -2.990572690963745,
"logps/chosen": -1608.490478515625,
"logps/rejected": -9246.4970703125,
"loss": 0.6499,
"rewards/accuracies": 0.862500011920929,
"rewards/chosen": -0.021037336438894272,
"rewards/margins": 0.08746902644634247,
"rewards/rejected": -0.10850635915994644,
"step": 10
},
{
"epoch": 0.12885906040268458,
"grad_norm": 34.39608246130055,
"learning_rate": 4.989741394042727e-07,
"logits/chosen": -2.5478570461273193,
"logits/rejected": -2.9816832542419434,
"logps/chosen": -1612.6597900390625,
"logps/rejected": -9213.7060546875,
"loss": 0.4677,
"rewards/accuracies": 0.9437500238418579,
"rewards/chosen": -0.11297205835580826,
"rewards/margins": 0.6206797361373901,
"rewards/rejected": -0.7336517572402954,
"step": 15
},
{
"epoch": 0.17181208053691274,
"grad_norm": 5.898677172155064,
"learning_rate": 4.92735454356513e-07,
"logits/chosen": -2.497690200805664,
"logits/rejected": -3.0749311447143555,
"logps/chosen": -1422.3905029296875,
"logps/rejected": -9622.453125,
"loss": 0.2772,
"rewards/accuracies": 0.8812500238418579,
"rewards/chosen": -0.46565741300582886,
"rewards/margins": 2.972280979156494,
"rewards/rejected": -3.437938690185547,
"step": 20
},
{
"epoch": 0.21476510067114093,
"grad_norm": 4.142364189438871,
"learning_rate": 4.809698831278217e-07,
"logits/chosen": -2.579451322555542,
"logits/rejected": -3.227189540863037,
"logps/chosen": -1883.857421875,
"logps/rejected": -10082.71875,
"loss": 0.255,
"rewards/accuracies": 0.925000011920929,
"rewards/chosen": -0.9519465565681458,
"rewards/margins": 5.0396575927734375,
"rewards/rejected": -5.991604328155518,
"step": 25
},
{
"epoch": 0.25771812080536916,
"grad_norm": 7.398342740617321,
"learning_rate": 4.639453180753619e-07,
"logits/chosen": -2.527676820755005,
"logits/rejected": -3.185889959335327,
"logps/chosen": -2057.02001953125,
"logps/rejected": -9853.166015625,
"loss": 0.2471,
"rewards/accuracies": 0.918749988079071,
"rewards/chosen": -1.8792552947998047,
"rewards/margins": 8.077213287353516,
"rewards/rejected": -9.956467628479004,
"step": 30
},
{
"epoch": 0.3006711409395973,
"grad_norm": 12.059892215323622,
"learning_rate": 4.420493945100701e-07,
"logits/chosen": -2.484814167022705,
"logits/rejected": -3.121709108352661,
"logps/chosen": -2086.67919921875,
"logps/rejected": -9674.890625,
"loss": 0.274,
"rewards/accuracies": 0.875,
"rewards/chosen": -2.0851800441741943,
"rewards/margins": 7.848902225494385,
"rewards/rejected": -9.934083938598633,
"step": 35
},
{
"epoch": 0.3436241610738255,
"grad_norm": 28.99159189374227,
"learning_rate": 4.157806645601988e-07,
"logits/chosen": -2.427899122238159,
"logits/rejected": -2.911158800125122,
"logps/chosen": -1157.116455078125,
"logps/rejected": -10012.34765625,
"loss": 0.1935,
"rewards/accuracies": 0.9312499761581421,
"rewards/chosen": -1.1424468755722046,
"rewards/margins": 10.81905460357666,
"rewards/rejected": -11.961501121520996,
"step": 40
},
{
"epoch": 0.3865771812080537,
"grad_norm": 27.796451654387887,
"learning_rate": 3.857372455503697e-07,
"logits/chosen": -2.5545668601989746,
"logits/rejected": -2.8794655799865723,
"logps/chosen": -1950.16796875,
"logps/rejected": -10788.267578125,
"loss": 0.1834,
"rewards/accuracies": 0.9312499761581421,
"rewards/chosen": -2.45682954788208,
"rewards/margins": 11.35061264038086,
"rewards/rejected": -13.807443618774414,
"step": 45
},
{
"epoch": 0.42953020134228187,
"grad_norm": 26.159018172068677,
"learning_rate": 3.5260320136318924e-07,
"logits/chosen": -2.4899744987487793,
"logits/rejected": -2.9161746501922607,
"logps/chosen": -1632.9305419921875,
"logps/rejected": -10670.7177734375,
"loss": 0.1654,
"rewards/accuracies": 0.9125000238418579,
"rewards/chosen": -2.4171953201293945,
"rewards/margins": 11.587867736816406,
"rewards/rejected": -14.0050630569458,
"step": 50
},
{
"epoch": 0.47248322147651006,
"grad_norm": 27.198586027054176,
"learning_rate": 3.171329668685942e-07,
"logits/chosen": -2.460887908935547,
"logits/rejected": -2.9514319896698,
"logps/chosen": -1985.7174072265625,
"logps/rejected": -10099.3125,
"loss": 0.1791,
"rewards/accuracies": 0.9375,
"rewards/chosen": -2.890523672103882,
"rewards/margins": 9.628759384155273,
"rewards/rejected": -12.519282341003418,
"step": 55
},
{
"epoch": 0.5154362416107383,
"grad_norm": 39.49613447619216,
"learning_rate": 2.801341700638307e-07,
"logits/chosen": -2.5868403911590576,
"logits/rejected": -3.0467333793640137,
"logps/chosen": -1847.5205078125,
"logps/rejected": -10758.123046875,
"loss": 0.1843,
"rewards/accuracies": 0.9437500238418579,
"rewards/chosen": -2.3636813163757324,
"rewards/margins": 10.008157730102539,
"rewards/rejected": -12.37183952331543,
"step": 60
},
{
"epoch": 0.5583892617449664,
"grad_norm": 12.8001844827942,
"learning_rate": 2.424492430497778e-07,
"logits/chosen": -2.506343126296997,
"logits/rejected": -2.965503215789795,
"logps/chosen": -2238.29443359375,
"logps/rejected": -10792.2021484375,
"loss": 0.2211,
"rewards/accuracies": 0.925000011920929,
"rewards/chosen": -2.6938767433166504,
"rewards/margins": 9.952713012695312,
"rewards/rejected": -12.646589279174805,
"step": 65
},
{
"epoch": 0.6013422818791946,
"grad_norm": 15.245673155295346,
"learning_rate": 2.0493624054652355e-07,
"logits/chosen": -2.58244252204895,
"logits/rejected": -2.951399326324463,
"logps/chosen": -2069.97998046875,
"logps/rejected": -10962.5087890625,
"loss": 0.246,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -2.3197269439697266,
"rewards/margins": 10.182031631469727,
"rewards/rejected": -12.50175952911377,
"step": 70
},
{
"epoch": 0.6442953020134228,
"grad_norm": 24.590124308811014,
"learning_rate": 1.6844930269478273e-07,
"logits/chosen": -2.5273938179016113,
"logits/rejected": -2.789759397506714,
"logps/chosen": -2302.49169921875,
"logps/rejected": -10204.7763671875,
"loss": 0.2857,
"rewards/accuracies": 0.9312499761581421,
"rewards/chosen": -2.474081039428711,
"rewards/margins": 8.876008987426758,
"rewards/rejected": -11.350090980529785,
"step": 75
},
{
"epoch": 0.687248322147651,
"grad_norm": 5.243275488519254,
"learning_rate": 1.3381920698905784e-07,
"logits/chosen": -2.599067211151123,
"logits/rejected": -2.9476146697998047,
"logps/chosen": -2229.91162109375,
"logps/rejected": -10514.13671875,
"loss": 0.1797,
"rewards/accuracies": 0.949999988079071,
"rewards/chosen": -2.218345880508423,
"rewards/margins": 8.61392593383789,
"rewards/rejected": -10.832271575927734,
"step": 80
},
{
"epoch": 0.7302013422818792,
"grad_norm": 8.334219171923868,
"learning_rate": 1.0183445215899584e-07,
"logits/chosen": -2.6111998558044434,
"logits/rejected": -2.9625191688537598,
"logps/chosen": -1786.7320556640625,
"logps/rejected": -10765.2060546875,
"loss": 0.1725,
"rewards/accuracies": 0.9624999761581421,
"rewards/chosen": -1.8339601755142212,
"rewards/margins": 8.938019752502441,
"rewards/rejected": -10.771979331970215,
"step": 85
},
{
"epoch": 0.7731543624161074,
"grad_norm": 14.219523417845217,
"learning_rate": 7.322330470336313e-08,
"logits/chosen": -2.2908596992492676,
"logits/rejected": -2.7106270790100098,
"logps/chosen": -1873.132568359375,
"logps/rejected": -9457.634765625,
"loss": 0.1766,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -1.8528430461883545,
"rewards/margins": 7.991453647613525,
"rewards/rejected": -9.844297409057617,
"step": 90
},
{
"epoch": 0.8161073825503355,
"grad_norm": 15.487241466447763,
"learning_rate": 4.863721686226349e-08,
"logits/chosen": -2.6290388107299805,
"logits/rejected": -2.9791619777679443,
"logps/chosen": -1920.321044921875,
"logps/rejected": -10810.255859375,
"loss": 0.203,
"rewards/accuracies": 0.9125000238418579,
"rewards/chosen": -2.1639926433563232,
"rewards/margins": 9.248581886291504,
"rewards/rejected": -11.412572860717773,
"step": 95
},
{
"epoch": 0.8590604026845637,
"grad_norm": 10.617279968480949,
"learning_rate": 2.863599358669755e-08,
"logits/chosen": -2.513326644897461,
"logits/rejected": -2.773226499557495,
"logps/chosen": -2062.977294921875,
"logps/rejected": -10156.541015625,
"loss": 0.158,
"rewards/accuracies": 0.90625,
"rewards/chosen": -2.4399707317352295,
"rewards/margins": 8.490180969238281,
"rewards/rejected": -10.930150985717773,
"step": 100
},
{
"epoch": 0.9020134228187919,
"grad_norm": 15.766621732547646,
"learning_rate": 1.3675046241339916e-08,
"logits/chosen": -2.458155870437622,
"logits/rejected": -2.8766350746154785,
"logps/chosen": -1824.9993896484375,
"logps/rejected": -10561.1455078125,
"loss": 0.2026,
"rewards/accuracies": 0.9125000238418579,
"rewards/chosen": -2.1738946437835693,
"rewards/margins": 8.930900573730469,
"rewards/rejected": -11.104796409606934,
"step": 105
},
{
"epoch": 0.9449664429530201,
"grad_norm": 5.046777954270063,
"learning_rate": 4.0950232632141205e-09,
"logits/chosen": -2.5404601097106934,
"logits/rejected": -2.9703125953674316,
"logps/chosen": -1576.762939453125,
"logps/rejected": -11024.712890625,
"loss": 0.2015,
"rewards/accuracies": 0.9375,
"rewards/chosen": -1.8676944971084595,
"rewards/margins": 9.766222953796387,
"rewards/rejected": -11.633917808532715,
"step": 110
},
{
"epoch": 0.9879194630872483,
"grad_norm": 13.782488804833061,
"learning_rate": 1.1405387761664887e-10,
"logits/chosen": -2.4785690307617188,
"logits/rejected": -2.7061634063720703,
"logps/chosen": -2462.81689453125,
"logps/rejected": -9758.763671875,
"loss": 0.2002,
"rewards/accuracies": 0.9375,
"rewards/chosen": -3.0159003734588623,
"rewards/margins": 7.688973426818848,
"rewards/rejected": -10.704873085021973,
"step": 115
},
{
"epoch": 0.996510067114094,
"step": 116,
"total_flos": 0.0,
"train_loss": 0.2600768296497649,
"train_runtime": 7822.1359,
"train_samples_per_second": 1.904,
"train_steps_per_second": 0.015
}
],
"logging_steps": 5,
"max_steps": 116,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 1000000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}
|