Training in progress, step 7000

Files changed (12) hide show

config.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
-  "_name_or_path": "sberbank-ai/rugpt3medium_based_on_gpt2",
   "activation_function": "gelu_new",
   "architectures": [
     "GPT2LMHeadModel"

 {
+  "_name_or_path": "/content/Socrat_tmp/last-checkpoint",
   "activation_function": "gelu_new",
   "architectures": [
     "GPT2LMHeadModel"

last-checkpoint/config.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
-  "_name_or_path": "sberbank-ai/rugpt3medium_based_on_gpt2",
   "activation_function": "gelu_new",
   "architectures": [
     "GPT2LMHeadModel"

 {
+  "_name_or_path": "/content/Socrat_tmp/last-checkpoint",
   "activation_function": "gelu_new",
   "architectures": [
     "GPT2LMHeadModel"

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:67978948fb3b549f5c65bac6be20371317a7760f6a1e185ee4429165340c2211
 size 2847145157

 version https://git-lfs.github.com/spec/v1
+oid sha256:5c136ff0e9cb20c7463cf837c57a825a4e61457ecc99effe498b8cec7d902776
 size 2847145157

last-checkpoint/pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:43af96f1ebc8edb6272710e8eded46b397b3566845920240e9b47c2c9d0eae77
 size 1524261149

 version https://git-lfs.github.com/spec/v1
+oid sha256:89e09ba7644b8ce1be54b5d151fb88c54f7af6e3f93b2834943e87018320b472
 size 1524261149

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a8442c4ffbd50200b4bc943e03cf1d899ee87f53e137754e23543c2176c8b920
 size 14575

 version https://git-lfs.github.com/spec/v1
+oid sha256:89f2ea1099f73d4771d0adfcd2464d3b79ddc8be7baf6cb50692837d58a8a048
 size 14575

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:81baa9d1bbfebc8a9494097ebb00110cffda569e0baf3070d563ac3650b9b0ae
 size 627

 version https://git-lfs.github.com/spec/v1
+oid sha256:14808219b4a7001cd7bd7967362505aa479b3e57cd0120b221d6370031f10450
 size 627

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 3.0015007503751874,
-  "global_step": 6000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -126,11 +126,31 @@
       "eval_samples_per_second": 15.661,
       "eval_steps_per_second": 2.611,
       "step": 6000
     }
   ],
   "max_steps": 9995,
   "num_train_epochs": 5,
-  "total_flos": 1.697780957184e+16,
   "trial_name": null,
   "trial_params": null
 }

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 3.501750875437719,
+  "global_step": 7000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 15.661,
       "eval_steps_per_second": 2.611,
       "step": 6000
+    },
+    {
+      "epoch": 3.25,
+      "learning_rate": 6.9934967483741875e-06,
+      "loss": 2.8394,
+      "step": 6500
+    },
+    {
+      "epoch": 3.5,
+      "learning_rate": 5.992996498249125e-06,
+      "loss": 2.819,
+      "step": 7000
+    },
+    {
+      "epoch": 3.5,
+      "eval_loss": 3.090629816055298,
+      "eval_runtime": 133.0042,
+      "eval_samples_per_second": 15.917,
+      "eval_steps_per_second": 2.654,
+      "step": 7000
     }
   ],
   "max_steps": 9995,
   "num_train_epochs": 5,
+  "total_flos": 1.980744450048e+16,
   "trial_name": null,
   "trial_params": null
 }

last-checkpoint/training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:175fa8453377bf4a0c9a3ff9526282460c0fde24b49844814ef9cd85c3698074
 size 3515

 version https://git-lfs.github.com/spec/v1
+oid sha256:7310e8acee49c640435d0d63ee639d9b8543c98942deb1f0453405499bf4acef
 size 3515

pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:43af96f1ebc8edb6272710e8eded46b397b3566845920240e9b47c2c9d0eae77
 size 1524261149

 version https://git-lfs.github.com/spec/v1
+oid sha256:89e09ba7644b8ce1be54b5d151fb88c54f7af6e3f93b2834943e87018320b472
 size 1524261149

runs/Mar25_07-47-44_6e686009e3a0/1679730482.1827366/events.out.tfevents.1679730482.6e686009e3a0.183.1 ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:00f6c7ae6be349384a4f7a61c28fe5d4b861d59cb04560646f996698189df91c
+size 5754

runs/Mar25_07-47-44_6e686009e3a0/events.out.tfevents.1679730482.6e686009e3a0.183.0 ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:187a073febbce9d08c947a3283b5e8e473950f162a4dbd8c354fd7f957611812
+size 4813

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:175fa8453377bf4a0c9a3ff9526282460c0fde24b49844814ef9cd85c3698074
 size 3515

 version https://git-lfs.github.com/spec/v1
+oid sha256:7310e8acee49c640435d0d63ee639d9b8543c98942deb1f0453405499bf4acef
 size 3515