Training in progress, step 520, checkpoint

Browse files

Files changed (6) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +152 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:64f4d2407a2d801a79431b62cbb89f6b811d012a90f3c05c5776d1fe8b649c23
 size 45118424

 version https://git-lfs.github.com/spec/v1
+oid sha256:a7583d22ab1c9114b632ece2e6cc49282443f5b4a3fdedb52fb678f52b2aad8d
 size 45118424

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:50d796085c1da6a664a0a095bd97a802e19a9b834f81f1af7482a826cd3c13fd
 size 23159546

 version https://git-lfs.github.com/spec/v1
+oid sha256:f15e17e91563986d667221e9c8faff7fc7a9b3f1e836c62d18349f2266965a51
 size 23159546

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3ec4e8fb4df96f748a330df5f48ab7541f526fa33838ac8f0a0d1dbf6f82cae7
 size 14512

 version https://git-lfs.github.com/spec/v1
+oid sha256:b77422a2be1769a3b7ceb86eaff5f1b80e128d2313f8eed1d5032246ec30f6a5
 size 14512

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:09e0fb551bedc08207717eba3973833686a962221df59796ce2b28fa11f3445e
 size 14512

 version https://git-lfs.github.com/spec/v1
+oid sha256:bc5b1354b82006b1bf63ee90590eb9a369c774920d9f04b12bfc9ee51c6a1d65
 size 14512

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:496bad4c8c14ed4e934e08779be511e0a3138f38c99bc251313e9540e43073e5
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:8d33b0edd5b6ac8e7325d969d8a731d29cfad089e3aa53b250b771d90d30a917
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 4.77326968973747,
   "eval_steps": 52,
-  "global_step": 500,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -3587,6 +3587,154 @@
       "learning_rate": 4.461255922609986e-07,
       "loss": 0.5518,
       "step": 500
     }
   ],
   "logging_steps": 1,
@@ -3601,12 +3749,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 3.87089027825664e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 4.964200477326969,
   "eval_steps": 52,
+  "global_step": 520,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 4.461255922609986e-07,
       "loss": 0.5518,
       "step": 500
+    },
+    {
+      "epoch": 4.782816229116945,
+      "grad_norm": 0.548302412033081,
+      "learning_rate": 4.0268683226741265e-07,
+      "loss": 0.6202,
+      "step": 501
+    },
+    {
+      "epoch": 4.79236276849642,
+      "grad_norm": 0.548893928527832,
+      "learning_rate": 3.6146402536468283e-07,
+      "loss": 0.6218,
+      "step": 502
+    },
+    {
+      "epoch": 4.801909307875895,
+      "grad_norm": 0.5890070199966431,
+      "learning_rate": 3.2245901334221895e-07,
+      "loss": 0.6638,
+      "step": 503
+    },
+    {
+      "epoch": 4.81145584725537,
+      "grad_norm": 0.4871584475040436,
+      "learning_rate": 2.856735389008269e-07,
+      "loss": 0.6107,
+      "step": 504
+    },
+    {
+      "epoch": 4.821002386634845,
+      "grad_norm": 0.5432624816894531,
+      "learning_rate": 2.511092455747932e-07,
+      "loss": 0.583,
+      "step": 505
+    },
+    {
+      "epoch": 4.83054892601432,
+      "grad_norm": 0.5359986424446106,
+      "learning_rate": 2.1876767765853234e-07,
+      "loss": 0.5368,
+      "step": 506
+    },
+    {
+      "epoch": 4.840095465393794,
+      "grad_norm": 0.5359886288642883,
+      "learning_rate": 1.8865028013751452e-07,
+      "loss": 0.6259,
+      "step": 507
+    },
+    {
+      "epoch": 4.84964200477327,
+      "grad_norm": 0.5111921429634094,
+      "learning_rate": 1.6075839862374488e-07,
+      "loss": 0.5609,
+      "step": 508
+    },
+    {
+      "epoch": 4.859188544152745,
+      "grad_norm": 0.6437258124351501,
+      "learning_rate": 1.3509327929563942e-07,
+      "loss": 0.6395,
+      "step": 509
+    },
+    {
+      "epoch": 4.868735083532219,
+      "grad_norm": 0.5992398262023926,
+      "learning_rate": 1.1165606884234181e-07,
+      "loss": 0.6546,
+      "step": 510
+    },
+    {
+      "epoch": 4.878281622911695,
+      "grad_norm": 0.5831811428070068,
+      "learning_rate": 9.044781441249207e-08,
+      "loss": 0.609,
+      "step": 511
+    },
+    {
+      "epoch": 4.88782816229117,
+      "grad_norm": 0.5561614632606506,
+      "learning_rate": 7.146946356743067e-08,
+      "loss": 0.6699,
+      "step": 512
+    },
+    {
+      "epoch": 4.897374701670644,
+      "grad_norm": 0.5337750911712646,
+      "learning_rate": 5.472186423889358e-08,
+      "loss": 0.5614,
+      "step": 513
+    },
+    {
+      "epoch": 4.906921241050119,
+      "grad_norm": 0.41179969906806946,
+      "learning_rate": 4.020576469108139e-08,
+      "loss": 0.4451,
+      "step": 514
+    },
+    {
+      "epoch": 4.916467780429595,
+      "grad_norm": 0.4820442795753479,
+      "learning_rate": 2.792181348726941e-08,
+      "loss": 0.5897,
+      "step": 515
+    },
+    {
+      "epoch": 4.926014319809069,
+      "grad_norm": 0.5927594900131226,
+      "learning_rate": 1.7870559460814173e-08,
+      "loss": 0.6788,
+      "step": 516
+    },
+    {
+      "epoch": 4.935560859188544,
+      "grad_norm": 0.5302107334136963,
+      "learning_rate": 1.0052451690617527e-08,
+      "loss": 0.6105,
+      "step": 517
+    },
+    {
+      "epoch": 4.945107398568019,
+      "grad_norm": 0.5596168041229248,
+      "learning_rate": 4.46783948109819e-09,
+      "loss": 0.5537,
+      "step": 518
+    },
+    {
+      "epoch": 4.954653937947494,
+      "grad_norm": 0.5655501484870911,
+      "learning_rate": 1.1169723465487281e-09,
+      "loss": 0.6915,
+      "step": 519
+    },
+    {
+      "epoch": 4.964200477326969,
+      "grad_norm": 0.5537259578704834,
+      "learning_rate": 0.0,
+      "loss": 0.7157,
+      "step": 520
+    },
+    {
+      "epoch": 4.964200477326969,
+      "eval_loss": 0.7753176689147949,
+      "eval_runtime": 12.9676,
+      "eval_samples_per_second": 13.649,
+      "eval_steps_per_second": 1.774,
+      "step": 520
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 4.0257258893869056e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null