Training in progress, epoch 3, checkpoint

Browse files

Files changed (8) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/rng_state_2.pth +1 -1
last-checkpoint/rng_state_3.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +473 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5deef1e641a03f70336b52c3a3efb8ebefbcdb16eb98aa75bba08e1fe4032873
 size 45118424

 version https://git-lfs.github.com/spec/v1
+oid sha256:b71fc631d5783c5d069af4fee78013a24ddd3c46bcf3fbcf09c0bb6b0ca43422
 size 45118424

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:461c10d7d47f690a9bad0ea0840c0e8dc09b351a785d985dc1f32b20c4068ecc
 size 23159290

 version https://git-lfs.github.com/spec/v1
+oid sha256:3d7aeb6126275e01bdf1f49341b16dde99e46a80df284da5918226f1f17b4c6f
 size 23159290

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a75c8ddc1097597193455db293c6504cd13a1c1919dbd19099b4ad60d6bfabb0
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:eeec199a7b6b3341564794f492d5f2cdbdf672dec36d76d5b370bee2f1e7adea
 size 14960

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:094216163007d45050b212c7a70efe5fae78f962c8cb96c9335b05613a3052f2
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:8e34bd0fa659ced3625b7171012be0646e813b4ec2a721d8ca72a36286759e37
 size 15024

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6fb1c6881d979182b2b854d82868818657102f22e08698828a952d734455b26f
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:d98792d492243d608246176dd844c995fbd105690efb258bea318e3d46b293b6
 size 15024

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2e871667ddc380695d990d6c2ad338fc24321a2ffbfd570f50f1a355b41fce9f
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:38b2e32c1c6174211836d9d5f0df91a24428c9d7cb793bda03ac3d72db2083b2
 size 15024

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a5f01d1f39c96959ff207a7ea1e1ec315077fbaf203b474a1febad2bfc263e3a
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:a9caf63dc2234ee02c5e279e3b9796b437d67d266ac0e404750a5a75ec92c982
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 2.0,
   "eval_steps": 500,
-  "global_step": 134,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -945,6 +945,475 @@
       "learning_rate": 6.753433794837662e-05,
       "loss": 0.8727,
       "step": 134
     }
   ],
   "logging_steps": 1,
@@ -959,12 +1428,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 3.501220166757253e+17,
   "train_batch_size": 18,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 3.0,
   "eval_steps": 500,
+  "global_step": 201,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 6.753433794837662e-05,
       "loss": 0.8727,
       "step": 134
+    },
+    {
+      "epoch": 2.014925373134328,
+      "grad_norm": 0.35240477323532104,
+      "learning_rate": 6.626149280816546e-05,
+      "loss": 1.0679,
+      "step": 135
+    },
+    {
+      "epoch": 2.029850746268657,
+      "grad_norm": 0.33283913135528564,
+      "learning_rate": 6.500000000000002e-05,
+      "loss": 1.0181,
+      "step": 136
+    },
+    {
+      "epoch": 2.044776119402985,
+      "grad_norm": 0.3615310788154602,
+      "learning_rate": 6.375018694441084e-05,
+      "loss": 0.9978,
+      "step": 137
+    },
+    {
+      "epoch": 2.0597014925373136,
+      "grad_norm": 0.36938410997390747,
+      "learning_rate": 6.251237803044805e-05,
+      "loss": 0.9883,
+      "step": 138
+    },
+    {
+      "epoch": 2.074626865671642,
+      "grad_norm": 0.3807987570762634,
+      "learning_rate": 6.128689453148619e-05,
+      "loss": 0.9572,
+      "step": 139
+    },
+    {
+      "epoch": 2.08955223880597,
+      "grad_norm": 0.37807610630989075,
+      "learning_rate": 6.00740545218375e-05,
+      "loss": 0.9174,
+      "step": 140
+    },
+    {
+      "epoch": 2.1044776119402986,
+      "grad_norm": 0.37602487206459045,
+      "learning_rate": 5.887417279419599e-05,
+      "loss": 0.8229,
+      "step": 141
+    },
+    {
+      "epoch": 2.1194029850746268,
+      "grad_norm": 0.35350197553634644,
+      "learning_rate": 5.7687560777932735e-05,
+      "loss": 0.8076,
+      "step": 142
+    },
+    {
+      "epoch": 2.1343283582089554,
+      "grad_norm": 0.3940332233905792,
+      "learning_rate": 5.651452645826445e-05,
+      "loss": 0.788,
+      "step": 143
+    },
+    {
+      "epoch": 2.1492537313432836,
+      "grad_norm": 0.46034398674964905,
+      "learning_rate": 5.5355374296315995e-05,
+      "loss": 0.7882,
+      "step": 144
+    },
+    {
+      "epoch": 2.1641791044776117,
+      "grad_norm": 0.4225603938102722,
+      "learning_rate": 5.421040515009737e-05,
+      "loss": 0.7197,
+      "step": 145
+    },
+    {
+      "epoch": 2.1791044776119404,
+      "grad_norm": 0.46008700132369995,
+      "learning_rate": 5.3079916196416055e-05,
+      "loss": 0.6569,
+      "step": 146
+    },
+    {
+      "epoch": 2.1940298507462686,
+      "grad_norm": 0.41973477602005005,
+      "learning_rate": 5.196420085374467e-05,
+      "loss": 0.8682,
+      "step": 147
+    },
+    {
+      "epoch": 2.208955223880597,
+      "grad_norm": 0.3677213191986084,
+      "learning_rate": 5.0863548706064245e-05,
+      "loss": 1.0353,
+      "step": 148
+    },
+    {
+      "epoch": 2.2238805970149254,
+      "grad_norm": 0.37162861227989197,
+      "learning_rate": 4.977824542770279e-05,
+      "loss": 1.001,
+      "step": 149
+    },
+    {
+      "epoch": 2.2388059701492535,
+      "grad_norm": 0.39737215638160706,
+      "learning_rate": 4.870857270918825e-05,
+      "loss": 0.9846,
+      "step": 150
+    },
+    {
+      "epoch": 2.253731343283582,
+      "grad_norm": 0.38380125164985657,
+      "learning_rate": 4.7654808184136064e-05,
+      "loss": 0.9606,
+      "step": 151
+    },
+    {
+      "epoch": 2.2686567164179103,
+      "grad_norm": 0.40244144201278687,
+      "learning_rate": 4.6617225357188976e-05,
+      "loss": 0.8571,
+      "step": 152
+    },
+    {
+      "epoch": 2.283582089552239,
+      "grad_norm": 0.4329751431941986,
+      "learning_rate": 4.5596093533029116e-05,
+      "loss": 0.8531,
+      "step": 153
+    },
+    {
+      "epoch": 2.298507462686567,
+      "grad_norm": 0.45405519008636475,
+      "learning_rate": 4.459167774647993e-05,
+      "loss": 0.8512,
+      "step": 154
+    },
+    {
+      "epoch": 2.3134328358208958,
+      "grad_norm": 0.45590460300445557,
+      "learning_rate": 4.360423869371629e-05,
+      "loss": 0.8208,
+      "step": 155
+    },
+    {
+      "epoch": 2.328358208955224,
+      "grad_norm": 0.4376915395259857,
+      "learning_rate": 4.2634032664600895e-05,
+      "loss": 0.7654,
+      "step": 156
+    },
+    {
+      "epoch": 2.343283582089552,
+      "grad_norm": 0.45759543776512146,
+      "learning_rate": 4.168131147616417e-05,
+      "loss": 0.7857,
+      "step": 157
+    },
+    {
+      "epoch": 2.3582089552238807,
+      "grad_norm": 0.4490528702735901,
+      "learning_rate": 4.0746322407245066e-05,
+      "loss": 0.7051,
+      "step": 158
+    },
+    {
+      "epoch": 2.373134328358209,
+      "grad_norm": 0.4924563765525818,
+      "learning_rate": 3.982930813430999e-05,
+      "loss": 0.6348,
+      "step": 159
+    },
+    {
+      "epoch": 2.388059701492537,
+      "grad_norm": 0.35502833127975464,
+      "learning_rate": 3.893050666846596e-05,
+      "loss": 1.1142,
+      "step": 160
+    },
+    {
+      "epoch": 2.4029850746268657,
+      "grad_norm": 0.3795003890991211,
+      "learning_rate": 3.805015129368492e-05,
+      "loss": 1.0387,
+      "step": 161
+    },
+    {
+      "epoch": 2.417910447761194,
+      "grad_norm": 0.3922593593597412,
+      "learning_rate": 3.718847050625475e-05,
+      "loss": 1.0402,
+      "step": 162
+    },
+    {
+      "epoch": 2.4328358208955225,
+      "grad_norm": 0.4245050251483917,
+      "learning_rate": 3.6345687955473166e-05,
+      "loss": 0.9854,
+      "step": 163
+    },
+    {
+      "epoch": 2.4477611940298507,
+      "grad_norm": 0.39441049098968506,
+      "learning_rate": 3.552202238559953e-05,
+      "loss": 0.9561,
+      "step": 164
+    },
+    {
+      "epoch": 2.4626865671641793,
+      "grad_norm": 0.39788442850112915,
+      "learning_rate": 3.4717687579079596e-05,
+      "loss": 0.9104,
+      "step": 165
+    },
+    {
+      "epoch": 2.4776119402985075,
+      "grad_norm": 0.4182056784629822,
+      "learning_rate": 3.393289230105849e-05,
+      "loss": 0.8841,
+      "step": 166
+    },
+    {
+      "epoch": 2.4925373134328357,
+      "grad_norm": 0.42861151695251465,
+      "learning_rate": 3.316784024519553e-05,
+      "loss": 0.8055,
+      "step": 167
+    },
+    {
+      "epoch": 2.5074626865671643,
+      "grad_norm": 0.42246565222740173,
+      "learning_rate": 3.242272998079557e-05,
+      "loss": 0.7947,
+      "step": 168
+    },
+    {
+      "epoch": 2.5223880597014925,
+      "grad_norm": 0.46474263072013855,
+      "learning_rate": 3.1697754901270473e-05,
+      "loss": 0.8153,
+      "step": 169
+    },
+    {
+      "epoch": 2.5373134328358207,
+      "grad_norm": 0.4996289312839508,
+      "learning_rate": 3.099310317394359e-05,
+      "loss": 0.7579,
+      "step": 170
+    },
+    {
+      "epoch": 2.5522388059701493,
+      "grad_norm": 0.47399628162384033,
+      "learning_rate": 3.030895769121112e-05,
+      "loss": 0.6813,
+      "step": 171
+    },
+    {
+      "epoch": 2.5671641791044775,
+      "grad_norm": 0.4417833983898163,
+      "learning_rate": 2.9645496023072244e-05,
+      "loss": 0.8971,
+      "step": 172
+    },
+    {
+      "epoch": 2.582089552238806,
+      "grad_norm": 0.3691651225090027,
+      "learning_rate": 2.9002890371040918e-05,
+      "loss": 1.0862,
+      "step": 173
+    },
+    {
+      "epoch": 2.5970149253731343,
+      "grad_norm": 0.4065288007259369,
+      "learning_rate": 2.8381307523450916e-05,
+      "loss": 1.031,
+      "step": 174
+    },
+    {
+      "epoch": 2.611940298507463,
+      "grad_norm": 0.3905118405818939,
+      "learning_rate": 2.778090881216592e-05,
+      "loss": 0.9701,
+      "step": 175
+    },
+    {
+      "epoch": 2.626865671641791,
+      "grad_norm": 0.39984792470932007,
+      "learning_rate": 2.7201850070705826e-05,
+      "loss": 0.9493,
+      "step": 176
+    },
+    {
+      "epoch": 2.6417910447761193,
+      "grad_norm": 0.415585994720459,
+      "learning_rate": 2.664428159380013e-05,
+      "loss": 0.9129,
+      "step": 177
+    },
+    {
+      "epoch": 2.656716417910448,
+      "grad_norm": 0.42336076498031616,
+      "learning_rate": 2.610834809837891e-05,
+      "loss": 0.8791,
+      "step": 178
+    },
+    {
+      "epoch": 2.671641791044776,
+      "grad_norm": 0.45662274956703186,
+      "learning_rate": 2.5594188686011615e-05,
+      "loss": 0.871,
+      "step": 179
+    },
+    {
+      "epoch": 2.6865671641791042,
+      "grad_norm": 0.4160149395465851,
+      "learning_rate": 2.5101936806803117e-05,
+      "loss": 0.7626,
+      "step": 180
+    },
+    {
+      "epoch": 2.701492537313433,
+      "grad_norm": 0.43893417716026306,
+      "learning_rate": 2.463172022475691e-05,
+      "loss": 0.8046,
+      "step": 181
+    },
+    {
+      "epoch": 2.716417910447761,
+      "grad_norm": 0.4579525291919708,
+      "learning_rate": 2.418366098461374e-05,
+      "loss": 0.7713,
+      "step": 182
+    },
+    {
+      "epoch": 2.7313432835820897,
+      "grad_norm": 0.4761490523815155,
+      "learning_rate": 2.3757875380175044e-05,
+      "loss": 0.69,
+      "step": 183
+    },
+    {
+      "epoch": 2.746268656716418,
+      "grad_norm": 0.5591773986816406,
+      "learning_rate": 2.3354473924118842e-05,
+      "loss": 0.6075,
+      "step": 184
+    },
+    {
+      "epoch": 2.7611940298507465,
+      "grad_norm": 0.3821795880794525,
+      "learning_rate": 2.297356131931614e-05,
+      "loss": 1.0839,
+      "step": 185
+    },
+    {
+      "epoch": 2.7761194029850746,
+      "grad_norm": 0.37466174364089966,
+      "learning_rate": 2.261523643165532e-05,
+      "loss": 1.0221,
+      "step": 186
+    },
+    {
+      "epoch": 2.791044776119403,
+      "grad_norm": 0.3825508654117584,
+      "learning_rate": 2.22795922643815e-05,
+      "loss": 1.0,
+      "step": 187
+    },
+    {
+      "epoch": 2.8059701492537314,
+      "grad_norm": 0.41949060559272766,
+      "learning_rate": 2.196671593395749e-05,
+      "loss": 0.9473,
+      "step": 188
+    },
+    {
+      "epoch": 2.8208955223880596,
+      "grad_norm": 0.42044076323509216,
+      "learning_rate": 2.167668864745279e-05,
+      "loss": 0.8887,
+      "step": 189
+    },
+    {
+      "epoch": 2.835820895522388,
+      "grad_norm": 0.4112393856048584,
+      "learning_rate": 2.1409585681466204e-05,
+      "loss": 0.8724,
+      "step": 190
+    },
+    {
+      "epoch": 2.8507462686567164,
+      "grad_norm": 0.45745235681533813,
+      "learning_rate": 2.1165476362587846e-05,
+      "loss": 0.8562,
+      "step": 191
+    },
+    {
+      "epoch": 2.8656716417910446,
+      "grad_norm": 0.4491675794124603,
+      "learning_rate": 2.09444240494054e-05,
+      "loss": 0.8593,
+      "step": 192
+    },
+    {
+      "epoch": 2.8805970149253732,
+      "grad_norm": 0.4317816197872162,
+      "learning_rate": 2.0746486116059418e-05,
+      "loss": 0.7933,
+      "step": 193
+    },
+    {
+      "epoch": 2.8955223880597014,
+      "grad_norm": 0.46604618430137634,
+      "learning_rate": 2.0571713937351834e-05,
+      "loss": 0.7903,
+      "step": 194
+    },
+    {
+      "epoch": 2.91044776119403,
+      "grad_norm": 0.48169732093811035,
+      "learning_rate": 2.0420152875411624e-05,
+      "loss": 0.7668,
+      "step": 195
+    },
+    {
+      "epoch": 2.925373134328358,
+      "grad_norm": 0.4627380073070526,
+      "learning_rate": 2.0291842267921108e-05,
+      "loss": 0.6404,
+      "step": 196
+    },
+    {
+      "epoch": 2.9402985074626864,
+      "grad_norm": 0.44821980595588684,
+      "learning_rate": 2.0186815417905787e-05,
+      "loss": 0.8672,
+      "step": 197
+    },
+    {
+      "epoch": 2.955223880597015,
+      "grad_norm": 0.3909732699394226,
+      "learning_rate": 2.0105099585090603e-05,
+      "loss": 0.9487,
+      "step": 198
+    },
+    {
+      "epoch": 2.970149253731343,
+      "grad_norm": 0.4137306213378906,
+      "learning_rate": 2.0046715978824664e-05,
+      "loss": 0.8438,
+      "step": 199
+    },
+    {
+      "epoch": 2.9850746268656714,
+      "grad_norm": 0.4548170864582062,
+      "learning_rate": 2.001167975257628e-05,
+      "loss": 0.8052,
+      "step": 200
+    },
+    {
+      "epoch": 3.0,
+      "grad_norm": 0.4302070140838623,
+      "learning_rate": 2e-05,
+      "loss": 0.8199,
+      "step": 201
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 5.25183025013588e+17,
   "train_batch_size": 18,
   "trial_name": null,
   "trial_params": null