Training in progress, step 260, checkpoint

Browse files

Files changed (6) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +2 -2
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +151 -3

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6181c92989cc12c5c4bf8ebe5c826179e6c49d9a8f6fdcdac17fbc54e6be7263
 size 45118424

 version https://git-lfs.github.com/spec/v1
+oid sha256:d78fd5c628f31fe6f81b7e9bbd7adcf7dcd7e2069fba87a4287bb9992aa32940
 size 45118424

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:201e9ee2aad5fb6422cb6bd1f1a23c9fc2c463c94ca6b58351cd891758b9db65
-size 23159290

 version https://git-lfs.github.com/spec/v1
+oid sha256:02b2f2518671db7884993614fc2de098c0047accad5c9b663a3acf6ae07fa80d
+size 23159546

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a6398556d22fe95a7e5e6c7da07ebca8075991f502dbf137933551966eb78dbd
 size 14512

 version https://git-lfs.github.com/spec/v1
+oid sha256:390b86d1750c11659578b906a56e7ab9fdb42bccc921eb4cc727a15d8557be03
 size 14512

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d37fad6be6ffe60fea334ab6bc75d4ae6805a5c373142ff91d27c556b05e854c
 size 14512

 version https://git-lfs.github.com/spec/v1
+oid sha256:42476027bd2bfd54d2018c7efa4a234360e0e33427747c62bc385bfb49affb98
 size 14512

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a2f6dcce6785392487d83864c0be888500239b6ce81b1cd85adb6f30290e683a
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:cb93d8c2ec6d70ccb139d9861604471242441ca47e1ff57cc12b015fe36fd3bd
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 2.2911694510739857,
   "eval_steps": 52,
-  "global_step": 240,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1727,6 +1727,154 @@
       "learning_rate": 6.481377904428171e-05,
       "loss": 0.6306,
       "step": 240
     }
   ],
   "logging_steps": 1,
@@ -1746,7 +1894,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1.8580273335631872e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 2.4821002386634845,
   "eval_steps": 52,
+  "global_step": 260,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 6.481377904428171e-05,
       "loss": 0.6306,
       "step": 240
+    },
+    {
+      "epoch": 2.300715990453461,
+      "grad_norm": 0.40222805738449097,
+      "learning_rate": 6.449424378870123e-05,
+      "loss": 0.6857,
+      "step": 241
+    },
+    {
+      "epoch": 2.3102625298329356,
+      "grad_norm": 0.4215107858181,
+      "learning_rate": 6.41740609463409e-05,
+      "loss": 0.7309,
+      "step": 242
+    },
+    {
+      "epoch": 2.3198090692124103,
+      "grad_norm": 0.4149893820285797,
+      "learning_rate": 6.385324482261597e-05,
+      "loss": 0.6562,
+      "step": 243
+    },
+    {
+      "epoch": 2.3293556085918854,
+      "grad_norm": 0.4119661748409271,
+      "learning_rate": 6.353180975123595e-05,
+      "loss": 0.7544,
+      "step": 244
+    },
+    {
+      "epoch": 2.3389021479713605,
+      "grad_norm": 0.32324427366256714,
+      "learning_rate": 6.320977009356431e-05,
+      "loss": 0.5994,
+      "step": 245
+    },
+    {
+      "epoch": 2.3484486873508352,
+      "grad_norm": 0.4508344531059265,
+      "learning_rate": 6.288714023797672e-05,
+      "loss": 0.7047,
+      "step": 246
+    },
+    {
+      "epoch": 2.3579952267303104,
+      "grad_norm": 0.3957417905330658,
+      "learning_rate": 6.256393459921824e-05,
+      "loss": 0.6364,
+      "step": 247
+    },
+    {
+      "epoch": 2.367541766109785,
+      "grad_norm": 0.4180348813533783,
+      "learning_rate": 6.224016761775933e-05,
+      "loss": 0.6511,
+      "step": 248
+    },
+    {
+      "epoch": 2.37708830548926,
+      "grad_norm": 0.46107926964759827,
+      "learning_rate": 6.191585375915055e-05,
+      "loss": 0.6736,
+      "step": 249
+    },
+    {
+      "epoch": 2.386634844868735,
+      "grad_norm": 0.43949881196022034,
+      "learning_rate": 6.159100751337642e-05,
+      "loss": 0.6639,
+      "step": 250
+    },
+    {
+      "epoch": 2.39618138424821,
+      "grad_norm": 0.4427139461040497,
+      "learning_rate": 6.126564339420784e-05,
+      "loss": 0.6581,
+      "step": 251
+    },
+    {
+      "epoch": 2.405727923627685,
+      "grad_norm": 0.4241901636123657,
+      "learning_rate": 6.093977593855375e-05,
+      "loss": 0.6738,
+      "step": 252
+    },
+    {
+      "epoch": 2.41527446300716,
+      "grad_norm": 0.41828441619873047,
+      "learning_rate": 6.061341970581165e-05,
+      "loss": 0.685,
+      "step": 253
+    },
+    {
+      "epoch": 2.424821002386635,
+      "grad_norm": 0.4712134599685669,
+      "learning_rate": 6.028658927721697e-05,
+      "loss": 0.6853,
+      "step": 254
+    },
+    {
+      "epoch": 2.4343675417661097,
+      "grad_norm": 0.47678568959236145,
+      "learning_rate": 5.99592992551918e-05,
+      "loss": 0.673,
+      "step": 255
+    },
+    {
+      "epoch": 2.443914081145585,
+      "grad_norm": 0.46318480372428894,
+      "learning_rate": 5.9631564262692274e-05,
+      "loss": 0.688,
+      "step": 256
+    },
+    {
+      "epoch": 2.4534606205250595,
+      "grad_norm": 0.4256531000137329,
+      "learning_rate": 5.930339894255532e-05,
+      "loss": 0.6521,
+      "step": 257
+    },
+    {
+      "epoch": 2.4630071599045347,
+      "grad_norm": 0.39636510610580444,
+      "learning_rate": 5.897481795684446e-05,
+      "loss": 0.6713,
+      "step": 258
+    },
+    {
+      "epoch": 2.4725536992840094,
+      "grad_norm": 0.497344434261322,
+      "learning_rate": 5.8645835986194676e-05,
+      "loss": 0.7745,
+      "step": 259
+    },
+    {
+      "epoch": 2.4821002386634845,
+      "grad_norm": 0.4814034104347229,
+      "learning_rate": 5.831646772915651e-05,
+      "loss": 0.6849,
+      "step": 260
+    },
+    {
+      "epoch": 2.4821002386634845,
+      "eval_loss": 0.7669724225997925,
+      "eval_runtime": 13.0227,
+      "eval_samples_per_second": 13.592,
+      "eval_steps_per_second": 1.766,
+      "step": 260
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 2.0128629446934528e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null