Training in progress, step 280, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +153 -5

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:05f6ed908b6c0a5b0bf09c0573053a4af39c4c92f5bf61fe05d9df2519c97031
 size 90207248

 version https://git-lfs.github.com/spec/v1
+oid sha256:bedd12fb64e67b0ba54ce9a65703cf9231d8f3947a3a2421c5324ea3c4f4a458
 size 90207248

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a2e3c04601c84fd5ba7eeb5da4ff1003918f386a2d1833589198a44ba65f0d8d
 size 46057338

 version https://git-lfs.github.com/spec/v1
+oid sha256:13aa5702b220dfabda9ee206f9a7825c21d198ab9f22812c322f93e88794d492
 size 46057338

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b90b2977b40565c7b47786677726b21587b954d614cb5eddcc13e2d79ccfddfd
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:fed915c84bf19420142cd5928514948593dacfab134a8c615244e7726ec07b27
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:24163fa211a98380eaaf8162c38702de00babc1b46887461983dfe21c7fd7b23
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:62b01704910ed42611ba769dd8b7cb883a0e572e8ef2ce5c29f4f8f6102196ba
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 1.151181697845459,
   "best_model_checkpoint": "miner_id_besimray/checkpoint-80",
-  "epoch": 5.473684210526316,
   "eval_steps": 20,
-  "global_step": 260,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1939,6 +1939,154 @@
       "eval_samples_per_second": 6.625,
       "eval_steps_per_second": 0.662,
       "step": 260
     }
   ],
   "logging_steps": 1,
@@ -1953,7 +2101,7 @@
         "early_stopping_threshold": 0.0
       },
       "attributes": {
-        "early_stopping_patience_counter": 9
       }
     },
     "TrainerControl": {
@@ -1962,12 +2110,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 2.544889321488384e+17,
   "train_batch_size": 10,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 1.151181697845459,
   "best_model_checkpoint": "miner_id_besimray/checkpoint-80",
+  "epoch": 5.894736842105263,
   "eval_steps": 20,
+  "global_step": 280,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 6.625,
       "eval_steps_per_second": 0.662,
       "step": 260
+    },
+    {
+      "epoch": 5.494736842105263,
+      "grad_norm": 0.541333019733429,
+      "learning_rate": 0.00019861942921134298,
+      "loss": 0.5321,
+      "step": 261
+    },
+    {
+      "epoch": 5.515789473684211,
+      "grad_norm": 0.45463690161705017,
+      "learning_rate": 0.0001986084323691701,
+      "loss": 0.5239,
+      "step": 262
+    },
+    {
+      "epoch": 5.536842105263158,
+      "grad_norm": 0.5732460618019104,
+      "learning_rate": 0.0001985973922101239,
+      "loss": 0.4861,
+      "step": 263
+    },
+    {
+      "epoch": 5.557894736842105,
+      "grad_norm": 0.4361143112182617,
+      "learning_rate": 0.00019858630873905418,
+      "loss": 0.5427,
+      "step": 264
+    },
+    {
+      "epoch": 5.578947368421053,
+      "grad_norm": 0.48954471945762634,
+      "learning_rate": 0.00019857518196082964,
+      "loss": 0.5614,
+      "step": 265
+    },
+    {
+      "epoch": 5.6,
+      "grad_norm": 0.5832586884498596,
+      "learning_rate": 0.0001985640118803381,
+      "loss": 0.4603,
+      "step": 266
+    },
+    {
+      "epoch": 5.621052631578947,
+      "grad_norm": 0.5026202201843262,
+      "learning_rate": 0.0001985527985024864,
+      "loss": 0.6399,
+      "step": 267
+    },
+    {
+      "epoch": 5.6421052631578945,
+      "grad_norm": 0.4579145908355713,
+      "learning_rate": 0.0001985415418322003,
+      "loss": 0.5354,
+      "step": 268
+    },
+    {
+      "epoch": 5.663157894736842,
+      "grad_norm": 0.545054018497467,
+      "learning_rate": 0.00019853024187442472,
+      "loss": 0.5158,
+      "step": 269
+    },
+    {
+      "epoch": 5.684210526315789,
+      "grad_norm": 0.48174452781677246,
+      "learning_rate": 0.00019851889863412345,
+      "loss": 0.5014,
+      "step": 270
+    },
+    {
+      "epoch": 5.705263157894737,
+      "grad_norm": 0.5417779684066772,
+      "learning_rate": 0.00019850751211627945,
+      "loss": 0.54,
+      "step": 271
+    },
+    {
+      "epoch": 5.726315789473684,
+      "grad_norm": 0.46869099140167236,
+      "learning_rate": 0.00019849608232589457,
+      "loss": 0.5416,
+      "step": 272
+    },
+    {
+      "epoch": 5.747368421052632,
+      "grad_norm": 0.6471317410469055,
+      "learning_rate": 0.00019848460926798968,
+      "loss": 0.5962,
+      "step": 273
+    },
+    {
+      "epoch": 5.768421052631579,
+      "grad_norm": 0.5855197310447693,
+      "learning_rate": 0.00019847309294760473,
+      "loss": 0.6314,
+      "step": 274
+    },
+    {
+      "epoch": 5.7894736842105265,
+      "grad_norm": 0.5380208492279053,
+      "learning_rate": 0.00019846153336979856,
+      "loss": 0.5651,
+      "step": 275
+    },
+    {
+      "epoch": 5.810526315789474,
+      "grad_norm": 0.46017733216285706,
+      "learning_rate": 0.00019844993053964917,
+      "loss": 0.5575,
+      "step": 276
+    },
+    {
+      "epoch": 5.831578947368421,
+      "grad_norm": 0.49735313653945923,
+      "learning_rate": 0.00019843828446225342,
+      "loss": 0.5628,
+      "step": 277
+    },
+    {
+      "epoch": 5.852631578947369,
+      "grad_norm": 0.5164270401000977,
+      "learning_rate": 0.0001984265951427272,
+      "loss": 0.5026,
+      "step": 278
+    },
+    {
+      "epoch": 5.873684210526315,
+      "grad_norm": 0.5263252258300781,
+      "learning_rate": 0.00019841486258620545,
+      "loss": 0.5588,
+      "step": 279
+    },
+    {
+      "epoch": 5.894736842105263,
+      "grad_norm": 0.47757405042648315,
+      "learning_rate": 0.00019840308679784207,
+      "loss": 0.5671,
+      "step": 280
+    },
+    {
+      "epoch": 5.894736842105263,
+      "eval_loss": 1.5009753704071045,
+      "eval_runtime": 15.1017,
+      "eval_samples_per_second": 6.622,
+      "eval_steps_per_second": 0.662,
+      "step": 280
     }
   ],
   "logging_steps": 1,
         "early_stopping_threshold": 0.0
       },
       "attributes": {
+        "early_stopping_patience_counter": 10
       }
     },
     "TrainerControl": {
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 2.740650038525952e+17,
   "train_batch_size": 10,
   "trial_name": null,
   "trial_params": null