Training in progress, step 260, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +2 -2
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +152 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e0b38996684554d1f09b04a3e5ab2b0b03df762b983defcab7835951fdacba51
 size 90207248

 version https://git-lfs.github.com/spec/v1
+oid sha256:05f6ed908b6c0a5b0bf09c0573053a4af39c4c92f5bf61fe05d9df2519c97031
 size 90207248

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:31eea40405f95b3ea2b2b047c33bd41db6558f1265fd847d6fd69497f9cc08db
-size 46057082

 version https://git-lfs.github.com/spec/v1
+oid sha256:a2e3c04601c84fd5ba7eeb5da4ff1003918f386a2d1833589198a44ba65f0d8d
+size 46057338

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8e0f15e8250e550076d6f480971ea58fe35e1e1ccca0d097b04c361be146de54
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:b90b2977b40565c7b47786677726b21587b954d614cb5eddcc13e2d79ccfddfd
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:11fce6896214284d141064893064ad6e844c1b7b446de9ee050ea3044b9b2a3b
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:24163fa211a98380eaaf8162c38702de00babc1b46887461983dfe21c7fd7b23
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 1.151181697845459,
   "best_model_checkpoint": "miner_id_besimray/checkpoint-80",
-  "epoch": 5.052631578947368,
   "eval_steps": 20,
-  "global_step": 240,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1791,6 +1791,154 @@
       "eval_samples_per_second": 6.623,
       "eval_steps_per_second": 0.662,
       "step": 240
     }
   ],
   "logging_steps": 1,
@@ -1805,7 +1953,7 @@
         "early_stopping_threshold": 0.0
       },
       "attributes": {
-        "early_stopping_patience_counter": 8
       }
     },
     "TrainerControl": {
@@ -1819,7 +1967,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 2.349128604450816e+17,
   "train_batch_size": 10,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 1.151181697845459,
   "best_model_checkpoint": "miner_id_besimray/checkpoint-80",
+  "epoch": 5.473684210526316,
   "eval_steps": 20,
+  "global_step": 260,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 6.623,
       "eval_steps_per_second": 0.662,
       "step": 240
+    },
+    {
+      "epoch": 5.073684210526316,
+      "grad_norm": 0.4180259704589844,
+      "learning_rate": 0.00019883026221137652,
+      "loss": 0.526,
+      "step": 241
+    },
+    {
+      "epoch": 5.094736842105263,
+      "grad_norm": 0.42054420709609985,
+      "learning_rate": 0.00019882013269579584,
+      "loss": 0.4412,
+      "step": 242
+    },
+    {
+      "epoch": 5.11578947368421,
+      "grad_norm": 0.5846607089042664,
+      "learning_rate": 0.00019880995977034584,
+      "loss": 0.5306,
+      "step": 243
+    },
+    {
+      "epoch": 5.136842105263158,
+      "grad_norm": 0.6321561932563782,
+      "learning_rate": 0.00019879974343949526,
+      "loss": 0.575,
+      "step": 244
+    },
+    {
+      "epoch": 5.157894736842105,
+      "grad_norm": 0.48956233263015747,
+      "learning_rate": 0.00019878948370773193,
+      "loss": 0.4667,
+      "step": 245
+    },
+    {
+      "epoch": 5.178947368421053,
+      "grad_norm": 0.49197542667388916,
+      "learning_rate": 0.00019877918057956278,
+      "loss": 0.473,
+      "step": 246
+    },
+    {
+      "epoch": 5.2,
+      "grad_norm": 0.5268818736076355,
+      "learning_rate": 0.00019876883405951377,
+      "loss": 0.6249,
+      "step": 247
+    },
+    {
+      "epoch": 5.221052631578948,
+      "grad_norm": 0.4883573651313782,
+      "learning_rate": 0.00019875844415212997,
+      "loss": 0.5239,
+      "step": 248
+    },
+    {
+      "epoch": 5.242105263157895,
+      "grad_norm": 0.45860010385513306,
+      "learning_rate": 0.00019874801086197544,
+      "loss": 0.5462,
+      "step": 249
+    },
+    {
+      "epoch": 5.2631578947368425,
+      "grad_norm": 0.41302675008773804,
+      "learning_rate": 0.00019873753419363336,
+      "loss": 0.5144,
+      "step": 250
+    },
+    {
+      "epoch": 5.284210526315789,
+      "grad_norm": 0.550791323184967,
+      "learning_rate": 0.00019872701415170593,
+      "loss": 0.5071,
+      "step": 251
+    },
+    {
+      "epoch": 5.3052631578947365,
+      "grad_norm": 0.4419604539871216,
+      "learning_rate": 0.00019871645074081434,
+      "loss": 0.4598,
+      "step": 252
+    },
+    {
+      "epoch": 5.326315789473684,
+      "grad_norm": 0.5271047353744507,
+      "learning_rate": 0.00019870584396559902,
+      "loss": 0.5444,
+      "step": 253
+    },
+    {
+      "epoch": 5.347368421052631,
+      "grad_norm": 0.4978967308998108,
+      "learning_rate": 0.00019869519383071928,
+      "loss": 0.5829,
+      "step": 254
+    },
+    {
+      "epoch": 5.368421052631579,
+      "grad_norm": 0.5046519041061401,
+      "learning_rate": 0.00019868450034085352,
+      "loss": 0.5343,
+      "step": 255
+    },
+    {
+      "epoch": 5.389473684210526,
+      "grad_norm": 0.5924373865127563,
+      "learning_rate": 0.0001986737635006992,
+      "loss": 0.514,
+      "step": 256
+    },
+    {
+      "epoch": 5.410526315789474,
+      "grad_norm": 0.47235432267189026,
+      "learning_rate": 0.00019866298331497283,
+      "loss": 0.4899,
+      "step": 257
+    },
+    {
+      "epoch": 5.431578947368421,
+      "grad_norm": 0.49679791927337646,
+      "learning_rate": 0.0001986521597884099,
+      "loss": 0.5483,
+      "step": 258
+    },
+    {
+      "epoch": 5.4526315789473685,
+      "grad_norm": 0.4871433973312378,
+      "learning_rate": 0.00019864129292576505,
+      "loss": 0.5544,
+      "step": 259
+    },
+    {
+      "epoch": 5.473684210526316,
+      "grad_norm": 0.5678947567939758,
+      "learning_rate": 0.00019863038273181186,
+      "loss": 0.5298,
+      "step": 260
+    },
+    {
+      "epoch": 5.473684210526316,
+      "eval_loss": 1.5484461784362793,
+      "eval_runtime": 15.0951,
+      "eval_samples_per_second": 6.625,
+      "eval_steps_per_second": 0.662,
+      "step": 260
     }
   ],
   "logging_steps": 1,
         "early_stopping_threshold": 0.0
       },
       "attributes": {
+        "early_stopping_patience_counter": 9
       }
     },
     "TrainerControl": {
       "attributes": {}
     }
   },
+  "total_flos": 2.544889321488384e+17,
   "train_batch_size": 10,
   "trial_name": null,
   "trial_params": null