reprover_by_file / trainer_state.json
tcwong's picture
Upload 10 files
6c4dcb4 verified
raw
history blame contribute delete
No virus
20.2 kB
{
"best_metric": 1.1806877851486206,
"best_model_checkpoint": "model_training/reprover/checkpoints-by_file-09-07-16-31/checkpoint-450",
"epoch": 7.309644670050761,
"eval_steps": 25,
"global_step": 450,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.08121827411167512,
"grad_norm": 24.31340217590332,
"learning_rate": 5.102040816326531e-05,
"loss": 12.4632,
"step": 5
},
{
"epoch": 0.16243654822335024,
"grad_norm": 7.6981658935546875,
"learning_rate": 0.00010204081632653062,
"loss": 7.3045,
"step": 10
},
{
"epoch": 0.2436548223350254,
"grad_norm": 0.584043562412262,
"learning_rate": 0.00015306122448979594,
"loss": 3.8878,
"step": 15
},
{
"epoch": 0.3248730964467005,
"grad_norm": 0.229707270860672,
"learning_rate": 0.00020408163265306123,
"loss": 3.6345,
"step": 20
},
{
"epoch": 0.40609137055837563,
"grad_norm": 0.2534404695034027,
"learning_rate": 0.00025510204081632655,
"loss": 3.508,
"step": 25
},
{
"epoch": 0.40609137055837563,
"eval_loss": 3.2811102867126465,
"eval_runtime": 86.0155,
"eval_samples_per_second": 11.626,
"eval_steps_per_second": 1.453,
"step": 25
},
{
"epoch": 0.4873096446700508,
"grad_norm": 2.226205587387085,
"learning_rate": 0.0003061224489795919,
"loss": 3.4572,
"step": 30
},
{
"epoch": 0.5685279187817259,
"grad_norm": 2.194638729095459,
"learning_rate": 0.00035714285714285714,
"loss": 2.504,
"step": 35
},
{
"epoch": 0.649746192893401,
"grad_norm": 0.7849161624908447,
"learning_rate": 0.00040816326530612246,
"loss": 1.9298,
"step": 40
},
{
"epoch": 0.7309644670050761,
"grad_norm": 0.5836678147315979,
"learning_rate": 0.0004591836734693878,
"loss": 1.7631,
"step": 45
},
{
"epoch": 0.8121827411167513,
"grad_norm": 0.4033905565738678,
"learning_rate": 0.0004999935985425297,
"loss": 1.6774,
"step": 50
},
{
"epoch": 0.8121827411167513,
"eval_loss": 1.367048740386963,
"eval_runtime": 85.8218,
"eval_samples_per_second": 11.652,
"eval_steps_per_second": 1.457,
"step": 50
},
{
"epoch": 0.8934010152284264,
"grad_norm": 0.3479253947734833,
"learning_rate": 0.0004997695819512612,
"loss": 1.6005,
"step": 55
},
{
"epoch": 0.9746192893401016,
"grad_norm": 0.4574773907661438,
"learning_rate": 0.0004992258202402822,
"loss": 1.5389,
"step": 60
},
{
"epoch": 1.0558375634517767,
"grad_norm": 0.2955416738986969,
"learning_rate": 0.0004983630095117843,
"loss": 1.4855,
"step": 65
},
{
"epoch": 1.1370558375634519,
"grad_norm": 0.235136017203331,
"learning_rate": 0.0004971822543018662,
"loss": 1.4427,
"step": 70
},
{
"epoch": 1.218274111675127,
"grad_norm": 0.2502729594707489,
"learning_rate": 0.0004956850661665511,
"loss": 1.462,
"step": 75
},
{
"epoch": 1.218274111675127,
"eval_loss": 1.2605030536651611,
"eval_runtime": 85.8507,
"eval_samples_per_second": 11.648,
"eval_steps_per_second": 1.456,
"step": 75
},
{
"epoch": 1.299492385786802,
"grad_norm": 0.46766167879104614,
"learning_rate": 0.0004938733617467517,
"loss": 1.4421,
"step": 80
},
{
"epoch": 1.380710659898477,
"grad_norm": 0.2875135540962219,
"learning_rate": 0.0004917494603146632,
"loss": 1.4042,
"step": 85
},
{
"epoch": 1.4619289340101522,
"grad_norm": 0.5056741833686829,
"learning_rate": 0.0004893160808047222,
"loss": 1.3901,
"step": 90
},
{
"epoch": 1.5431472081218274,
"grad_norm": 0.17907263338565826,
"learning_rate": 0.00048657633833293557,
"loss": 1.3829,
"step": 95
},
{
"epoch": 1.6243654822335025,
"grad_norm": 0.25355765223503113,
"learning_rate": 0.0004835337402090316,
"loss": 1.3592,
"step": 100
},
{
"epoch": 1.6243654822335025,
"eval_loss": 1.2396838665008545,
"eval_runtime": 85.75,
"eval_samples_per_second": 11.662,
"eval_steps_per_second": 1.458,
"step": 100
},
{
"epoch": 1.7055837563451777,
"grad_norm": 0.1514243483543396,
"learning_rate": 0.0004801921814465414,
"loss": 1.379,
"step": 105
},
{
"epoch": 1.7868020304568528,
"grad_norm": 0.14222057163715363,
"learning_rate": 0.00047655593977655674,
"loss": 1.3385,
"step": 110
},
{
"epoch": 1.868020304568528,
"grad_norm": 0.38187268376350403,
"learning_rate": 0.0004726296701715489,
"loss": 1.339,
"step": 115
},
{
"epoch": 1.9492385786802031,
"grad_norm": 0.42166340351104736,
"learning_rate": 0.00046841839888625623,
"loss": 1.4193,
"step": 120
},
{
"epoch": 2.030456852791878,
"grad_norm": 0.3789190948009491,
"learning_rate": 0.0004639275170232734,
"loss": 1.3529,
"step": 125
},
{
"epoch": 2.030456852791878,
"eval_loss": 1.2485618591308594,
"eval_runtime": 85.8636,
"eval_samples_per_second": 11.646,
"eval_steps_per_second": 1.456,
"step": 125
},
{
"epoch": 2.1116751269035534,
"grad_norm": 0.3297707736492157,
"learning_rate": 0.0004591627736315743,
"loss": 1.3754,
"step": 130
},
{
"epoch": 2.1928934010152283,
"grad_norm": 0.6720882058143616,
"learning_rate": 0.0004541302683468084,
"loss": 1.3633,
"step": 135
},
{
"epoch": 2.2741116751269037,
"grad_norm": 0.423013299703598,
"learning_rate": 0.0004488364435827881,
"loss": 1.3739,
"step": 140
},
{
"epoch": 2.3553299492385786,
"grad_norm": 0.3096015751361847,
"learning_rate": 0.00044328807628416644,
"loss": 1.3734,
"step": 145
},
{
"epoch": 2.436548223350254,
"grad_norm": 0.25548315048217773,
"learning_rate": 0.0004374922692508611,
"loss": 1.354,
"step": 150
},
{
"epoch": 2.436548223350254,
"eval_loss": 1.2303425073623657,
"eval_runtime": 85.8621,
"eval_samples_per_second": 11.647,
"eval_steps_per_second": 1.456,
"step": 150
},
{
"epoch": 2.517766497461929,
"grad_norm": 0.2693212330341339,
"learning_rate": 0.0004314564420453311,
"loss": 1.3311,
"step": 155
},
{
"epoch": 2.598984771573604,
"grad_norm": 0.2327384501695633,
"learning_rate": 0.0004251883214943475,
"loss": 1.3367,
"step": 160
},
{
"epoch": 2.6802030456852792,
"grad_norm": 0.20477798581123352,
"learning_rate": 0.0004186959317974155,
"loss": 1.3718,
"step": 165
},
{
"epoch": 2.761421319796954,
"grad_norm": 0.6859282851219177,
"learning_rate": 0.00041198758425451266,
"loss": 1.3351,
"step": 170
},
{
"epoch": 2.8426395939086295,
"grad_norm": 0.4204564392566681,
"learning_rate": 0.00040507186662629185,
"loss": 1.3633,
"step": 175
},
{
"epoch": 2.8426395939086295,
"eval_loss": 1.2046416997909546,
"eval_runtime": 85.7507,
"eval_samples_per_second": 11.662,
"eval_steps_per_second": 1.458,
"step": 175
},
{
"epoch": 2.9238578680203045,
"grad_norm": 0.4428386390209198,
"learning_rate": 0.0003979576321403705,
"loss": 1.2933,
"step": 180
},
{
"epoch": 3.00507614213198,
"grad_norm": 0.5064650774002075,
"learning_rate": 0.0003906539881577793,
"loss": 1.3117,
"step": 185
},
{
"epoch": 3.0862944162436547,
"grad_norm": 0.19443967938423157,
"learning_rate": 0.0003831702845140801,
"loss": 1.3285,
"step": 190
},
{
"epoch": 3.16751269035533,
"grad_norm": 0.17194636166095734,
"learning_rate": 0.00037551610155007613,
"loss": 1.2587,
"step": 195
},
{
"epoch": 3.248730964467005,
"grad_norm": 0.4715280830860138,
"learning_rate": 0.00036770123784744027,
"loss": 1.3266,
"step": 200
},
{
"epoch": 3.248730964467005,
"eval_loss": 1.209210991859436,
"eval_runtime": 85.8547,
"eval_samples_per_second": 11.648,
"eval_steps_per_second": 1.456,
"step": 200
},
{
"epoch": 3.3299492385786804,
"grad_norm": 0.5037745237350464,
"learning_rate": 0.00035973569768495855,
"loss": 1.3085,
"step": 205
},
{
"epoch": 3.4111675126903553,
"grad_norm": 0.4525098204612732,
"learning_rate": 0.0003516296782314491,
"loss": 1.2944,
"step": 210
},
{
"epoch": 3.4923857868020303,
"grad_norm": 0.30172044038772583,
"learning_rate": 0.00034339355649175095,
"loss": 1.3206,
"step": 215
},
{
"epoch": 3.5736040609137056,
"grad_norm": 0.8903113603591919,
"learning_rate": 0.00033503787602249364,
"loss": 1.3404,
"step": 220
},
{
"epoch": 3.6548223350253806,
"grad_norm": 0.5123381614685059,
"learning_rate": 0.00032657333343465356,
"loss": 1.321,
"step": 225
},
{
"epoch": 3.6548223350253806,
"eval_loss": 1.200507640838623,
"eval_runtime": 85.8377,
"eval_samples_per_second": 11.65,
"eval_steps_per_second": 1.456,
"step": 225
},
{
"epoch": 3.736040609137056,
"grad_norm": 0.19661971926689148,
"learning_rate": 0.0003180107647001769,
"loss": 1.3276,
"step": 230
},
{
"epoch": 3.817258883248731,
"grad_norm": 0.34714922308921814,
"learning_rate": 0.0003093611312801979,
"loss": 1.2869,
"step": 235
},
{
"epoch": 3.8984771573604062,
"grad_norm": 0.3150271773338318,
"learning_rate": 0.00030063550609261025,
"loss": 1.2738,
"step": 240
},
{
"epoch": 3.979695431472081,
"grad_norm": 0.24157479405403137,
"learning_rate": 0.000291845059336957,
"loss": 1.3107,
"step": 245
},
{
"epoch": 4.060913705583756,
"grad_norm": 0.2525878846645355,
"learning_rate": 0.0002830010441947834,
"loss": 1.2679,
"step": 250
},
{
"epoch": 4.060913705583756,
"eval_loss": 1.196422815322876,
"eval_runtime": 85.8149,
"eval_samples_per_second": 11.653,
"eval_steps_per_second": 1.457,
"step": 250
},
{
"epoch": 4.1421319796954315,
"grad_norm": 0.49697160720825195,
"learning_rate": 0.00027411478242376017,
"loss": 1.3141,
"step": 255
},
{
"epoch": 4.223350253807107,
"grad_norm": 0.3860456943511963,
"learning_rate": 0.00026519764986401774,
"loss": 1.3039,
"step": 260
},
{
"epoch": 4.304568527918782,
"grad_norm": 0.2929648160934448,
"learning_rate": 0.000256261061875247,
"loss": 1.2864,
"step": 265
},
{
"epoch": 4.385786802030457,
"grad_norm": 0.49687063694000244,
"learning_rate": 0.0002473164587232079,
"loss": 1.3065,
"step": 270
},
{
"epoch": 4.467005076142132,
"grad_norm": 0.26030004024505615,
"learning_rate": 0.0002383752909343547,
"loss": 1.299,
"step": 275
},
{
"epoch": 4.467005076142132,
"eval_loss": 1.1841660737991333,
"eval_runtime": 85.8048,
"eval_samples_per_second": 11.654,
"eval_steps_per_second": 1.457,
"step": 275
},
{
"epoch": 4.548223350253807,
"grad_norm": 0.1852976679801941,
"learning_rate": 0.0002294490046373259,
"loss": 1.2506,
"step": 280
},
{
"epoch": 4.629441624365482,
"grad_norm": 0.41372132301330566,
"learning_rate": 0.00022054902691006405,
"loss": 1.3174,
"step": 285
},
{
"epoch": 4.710659898477157,
"grad_norm": 0.18286170065402985,
"learning_rate": 0.00021168675115132315,
"loss": 1.3335,
"step": 290
},
{
"epoch": 4.791878172588833,
"grad_norm": 0.1263434737920761,
"learning_rate": 0.00020287352249529153,
"loss": 1.3019,
"step": 295
},
{
"epoch": 4.873096446700508,
"grad_norm": 0.13266858458518982,
"learning_rate": 0.00019412062328800044,
"loss": 1.2709,
"step": 300
},
{
"epoch": 4.873096446700508,
"eval_loss": 1.1873031854629517,
"eval_runtime": 85.8269,
"eval_samples_per_second": 11.651,
"eval_steps_per_second": 1.456,
"step": 300
},
{
"epoch": 4.9543147208121825,
"grad_norm": 0.1231088787317276,
"learning_rate": 0.000185439258644112,
"loss": 1.3102,
"step": 305
},
{
"epoch": 5.035532994923858,
"grad_norm": 0.16395896673202515,
"learning_rate": 0.00017684054210257517,
"loss": 1.3088,
"step": 310
},
{
"epoch": 5.116751269035533,
"grad_norm": 0.14288240671157837,
"learning_rate": 0.00016833548139951395,
"loss": 1.3205,
"step": 315
},
{
"epoch": 5.197969543147208,
"grad_norm": 0.16564807295799255,
"learning_rate": 0.0001599349643765599,
"loss": 1.2845,
"step": 320
},
{
"epoch": 5.279187817258883,
"grad_norm": 0.133669912815094,
"learning_rate": 0.0001516497450426686,
"loss": 1.3105,
"step": 325
},
{
"epoch": 5.279187817258883,
"eval_loss": 1.183776617050171,
"eval_runtime": 85.7228,
"eval_samples_per_second": 11.666,
"eval_steps_per_second": 1.458,
"step": 325
},
{
"epoch": 5.3604060913705585,
"grad_norm": 0.050202421844005585,
"learning_rate": 0.00014349042980726362,
"loss": 1.2825,
"step": 330
},
{
"epoch": 5.441624365482234,
"grad_norm": 0.08104140311479568,
"learning_rate": 0.0001354674639023318,
"loss": 1.2975,
"step": 335
},
{
"epoch": 5.522842639593908,
"grad_norm": 0.045941274613142014,
"learning_rate": 0.00012759111801085066,
"loss": 1.2848,
"step": 340
},
{
"epoch": 5.604060913705584,
"grad_norm": 0.10393151640892029,
"learning_rate": 0.00011987147511866788,
"loss": 1.3075,
"step": 345
},
{
"epoch": 5.685279187817259,
"grad_norm": 0.05486277490854263,
"learning_rate": 0.00011231841760666186,
"loss": 1.2337,
"step": 350
},
{
"epoch": 5.685279187817259,
"eval_loss": 1.1837860345840454,
"eval_runtime": 85.8492,
"eval_samples_per_second": 11.648,
"eval_steps_per_second": 1.456,
"step": 350
},
{
"epoch": 5.7664974619289335,
"grad_norm": 0.06317424029111862,
"learning_rate": 0.0001049416145997094,
"loss": 1.2696,
"step": 355
},
{
"epoch": 5.847715736040609,
"grad_norm": 0.06952204555273056,
"learning_rate": 9.775050958865584e-05,
"loss": 1.2792,
"step": 360
},
{
"epoch": 5.928934010152284,
"grad_norm": 0.05977623909711838,
"learning_rate": 9.075430834113152e-05,
"loss": 1.2672,
"step": 365
},
{
"epoch": 6.01015228426396,
"grad_norm": 0.1008043885231018,
"learning_rate": 8.396196711669335e-05,
"loss": 1.3045,
"step": 370
},
{
"epoch": 6.091370558375634,
"grad_norm": 0.06693598628044128,
"learning_rate": 7.738218120137671e-05,
"loss": 1.2753,
"step": 375
},
{
"epoch": 6.091370558375634,
"eval_loss": 1.1829280853271484,
"eval_runtime": 85.8364,
"eval_samples_per_second": 11.65,
"eval_steps_per_second": 1.456,
"step": 375
},
{
"epoch": 6.1725888324873095,
"grad_norm": 0.10474739223718643,
"learning_rate": 7.102337377633394e-05,
"loss": 1.2936,
"step": 380
},
{
"epoch": 6.253807106598985,
"grad_norm": 0.05388178676366806,
"learning_rate": 6.489368513481228e-05,
"loss": 1.2439,
"step": 385
},
{
"epoch": 6.33502538071066,
"grad_norm": 0.09186024218797684,
"learning_rate": 5.9000962261273136e-05,
"loss": 1.3222,
"step": 390
},
{
"epoch": 6.416243654822335,
"grad_norm": 0.07748736441135406,
"learning_rate": 5.3352748785993164e-05,
"loss": 1.2731,
"step": 395
},
{
"epoch": 6.49746192893401,
"grad_norm": 0.07806465774774551,
"learning_rate": 4.795627532800806e-05,
"loss": 1.2849,
"step": 400
},
{
"epoch": 6.49746192893401,
"eval_loss": 1.1841729879379272,
"eval_runtime": 85.8225,
"eval_samples_per_second": 11.652,
"eval_steps_per_second": 1.456,
"step": 400
},
{
"epoch": 6.5786802030456855,
"grad_norm": 0.05898735672235489,
"learning_rate": 4.281845023876074e-05,
"loss": 1.2866,
"step": 405
},
{
"epoch": 6.659898477157361,
"grad_norm": 0.052701685577631,
"learning_rate": 3.794585075830329e-05,
"loss": 1.2848,
"step": 410
},
{
"epoch": 6.741116751269035,
"grad_norm": 0.04117720574140549,
"learning_rate": 3.334471459537497e-05,
"loss": 1.2859,
"step": 415
},
{
"epoch": 6.822335025380711,
"grad_norm": 0.04338378086686134,
"learning_rate": 2.902093194213526e-05,
"loss": 1.2934,
"step": 420
},
{
"epoch": 6.903553299492386,
"grad_norm": 0.05017438903450966,
"learning_rate": 2.4980037933772488e-05,
"loss": 1.2508,
"step": 425
},
{
"epoch": 6.903553299492386,
"eval_loss": 1.1836540699005127,
"eval_runtime": 85.78,
"eval_samples_per_second": 11.658,
"eval_steps_per_second": 1.457,
"step": 425
},
{
"epoch": 6.9847715736040605,
"grad_norm": 0.04826565086841583,
"learning_rate": 2.122720556264357e-05,
"loss": 1.2958,
"step": 430
},
{
"epoch": 7.065989847715736,
"grad_norm": 0.05151581019163132,
"learning_rate": 1.776723905601438e-05,
"loss": 1.279,
"step": 435
},
{
"epoch": 7.147208121827411,
"grad_norm": 0.06657218188047409,
"learning_rate": 1.4604567725877926e-05,
"loss": 1.2756,
"step": 440
},
{
"epoch": 7.228426395939087,
"grad_norm": 0.05859878286719322,
"learning_rate": 1.1743240298725116e-05,
"loss": 1.3084,
"step": 445
},
{
"epoch": 7.309644670050761,
"grad_norm": 0.051177285611629486,
"learning_rate": 9.18691973252539e-06,
"loss": 1.296,
"step": 450
},
{
"epoch": 7.309644670050761,
"eval_loss": 1.1806877851486206,
"eval_runtime": 85.7014,
"eval_samples_per_second": 11.668,
"eval_steps_per_second": 1.459,
"step": 450
}
],
"logging_steps": 5,
"max_steps": 488,
"num_input_tokens_seen": 0,
"num_train_epochs": 8,
"save_steps": 25,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 3.089179686194381e+17,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}