diogopaes10 commited on Jul 22, 2023

Commit

8eb03d2

•

1 Parent(s): 11288fe

Training in progress, epoch 13

Browse files

Files changed (26) hide show

{checkpoint-2000 → checkpoint-3000}/added_tokens.json +0 -0
{checkpoint-2000 → checkpoint-3000}/config.json +0 -0
{checkpoint-2000 → checkpoint-3000}/optimizer.pt +1 -1
{checkpoint-2250 → checkpoint-3000}/pytorch_model.bin +1 -1
{checkpoint-2250 → checkpoint-3000}/rng_state.pth +1 -1
{checkpoint-2250 → checkpoint-3000}/scheduler.pt +1 -1
{checkpoint-2000 → checkpoint-3000}/special_tokens_map.json +0 -0
{checkpoint-2000 → checkpoint-3000}/spm.model +0 -0
{checkpoint-2000 → checkpoint-3000}/tokenizer.json +0 -0
{checkpoint-2000 → checkpoint-3000}/tokenizer_config.json +0 -0
{checkpoint-2250 → checkpoint-3000}/trainer_state.json +107 -3
{checkpoint-2000 → checkpoint-3000}/training_args.bin +0 -0
{checkpoint-2250 → checkpoint-3250}/added_tokens.json +0 -0
{checkpoint-2250 → checkpoint-3250}/config.json +0 -0
{checkpoint-2250 → checkpoint-3250}/optimizer.pt +1 -1
{checkpoint-2000 → checkpoint-3250}/pytorch_model.bin +1 -1
{checkpoint-2000 → checkpoint-3250}/rng_state.pth +1 -1
{checkpoint-2000 → checkpoint-3250}/scheduler.pt +1 -1
{checkpoint-2250 → checkpoint-3250}/special_tokens_map.json +0 -0
{checkpoint-2250 → checkpoint-3250}/spm.model +0 -0
{checkpoint-2250 → checkpoint-3250}/tokenizer.json +0 -0
{checkpoint-2250 → checkpoint-3250}/tokenizer_config.json +0 -0
{checkpoint-2000 → checkpoint-3250}/trainer_state.json +185 -3
{checkpoint-2250 → checkpoint-3250}/training_args.bin +0 -0
pytorch_model.bin +1 -1
runs/Jul22_21-19-20_ab4276e44fca/events.out.tfevents.1690060770.ab4276e44fca.659.0 +2 -2

{checkpoint-2000 → checkpoint-3000}/added_tokens.json RENAMED Viewed

File without changes

{checkpoint-2000 → checkpoint-3000}/config.json RENAMED Viewed

File without changes

{checkpoint-2000 → checkpoint-3000}/optimizer.pt RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:81f3216586a58d2085a7f37ca95117d6c10fbcd3dc9c70f034b94e35fd6e9149
 size 1475557125

 version https://git-lfs.github.com/spec/v1
+oid sha256:6a06836e74e2ee556f75e4f728cb07fd38bf7dca4688b3915aab2696b9adac99
 size 1475557125

{checkpoint-2250 → checkpoint-3000}/pytorch_model.bin RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:627070829ce397286a4c2e3016e3ade6fbe191d90b64283878e08c51320b3b27
 size 737788917

 version https://git-lfs.github.com/spec/v1
+oid sha256:c95f2d92d432e839ff645f87515fc248494db36dac837cae703842d2cbf775e5
 size 737788917

{checkpoint-2250 → checkpoint-3000}/rng_state.pth RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c5e45bb5a3782239e1b011180d84487acde3e428af09ac96bbf315bcb54dd611
 size 14575

 version https://git-lfs.github.com/spec/v1
+oid sha256:30675d1e37d339cae6e3dc477927b70e39062f0910613a7d90db6c1671bca5bc
 size 14575

{checkpoint-2250 → checkpoint-3000}/scheduler.pt RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9a4113c70550b7a4ade899ac1f4a91272bb6bdc8dc785c5c4e5e342583232813
 size 627

 version https://git-lfs.github.com/spec/v1
+oid sha256:68cbe85bad4e57d93e8caf9830d7003e889867d2bc1bdf97b16703437df161a3
 size 627

{checkpoint-2000 → checkpoint-3000}/special_tokens_map.json RENAMED Viewed

File without changes

{checkpoint-2000 → checkpoint-3000}/spm.model RENAMED Viewed

File without changes

{checkpoint-2000 → checkpoint-3000}/tokenizer.json RENAMED Viewed

File without changes

{checkpoint-2000 → checkpoint-3000}/tokenizer_config.json RENAMED Viewed

File without changes

{checkpoint-2250 → checkpoint-3000}/trainer_state.json RENAMED Viewed

@@ -1,8 +1,8 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 9.0,
-  "global_step": 2250,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -298,11 +298,115 @@
       "eval_system_ram_total": 83.48074722290039,
       "eval_system_ram_used": 4.219398498535156,
       "step": 2068
     }
   ],
   "max_steps": 3750,
   "num_train_epochs": 15,
-  "total_flos": 4334831263537536.0,
   "trial_name": null,
   "trial_params": null
 }

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 12.0,
+  "global_step": 3000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_system_ram_total": 83.48074722290039,
       "eval_system_ram_used": 4.219398498535156,
       "step": 2068
+    },
+    {
+      "epoch": 9.02,
+      "learning_rate": 7.978666666666667e-06,
+      "loss": 0.1505,
+      "step": 2256
+    },
+    {
+      "epoch": 9.02,
+      "eval_accuracy": 0.7075,
+      "eval_disk_space_total": 78.1898422241211,
+      "eval_disk_space_used": 35.827659606933594,
+      "eval_f1": 0.709341703450241,
+      "eval_gpu_ram_allocated": 2.0897817611694336,
+      "eval_gpu_ram_cached": 25.85546875,
+      "eval_gpu_ram_total": 39.56402587890625,
+      "eval_gpu_utilization": 48,
+      "eval_loss": 1.4310206174850464,
+      "eval_precision": 0.7133423622104005,
+      "eval_recall": 0.7075,
+      "eval_runtime": 2.4471,
+      "eval_samples_per_second": 817.29,
+      "eval_steps_per_second": 25.745,
+      "eval_system_ram_total": 83.48074722290039,
+      "eval_system_ram_used": 4.2417755126953125,
+      "step": 2256
+    },
+    {
+      "epoch": 9.78,
+      "learning_rate": 6.976000000000001e-06,
+      "loss": 0.1132,
+      "step": 2444
+    },
+    {
+      "epoch": 9.78,
+      "eval_accuracy": 0.7045,
+      "eval_disk_space_total": 78.1898422241211,
+      "eval_disk_space_used": 35.827754974365234,
+      "eval_f1": 0.705265213679387,
+      "eval_gpu_ram_allocated": 2.089801788330078,
+      "eval_gpu_ram_cached": 25.85546875,
+      "eval_gpu_ram_total": 39.56402587890625,
+      "eval_gpu_utilization": 48,
+      "eval_loss": 1.5454399585723877,
+      "eval_precision": 0.7097494768850874,
+      "eval_recall": 0.7045,
+      "eval_runtime": 2.5035,
+      "eval_samples_per_second": 798.873,
+      "eval_steps_per_second": 25.165,
+      "eval_system_ram_total": 83.48074722290039,
+      "eval_system_ram_used": 4.293117523193359,
+      "step": 2444
+    },
+    {
+      "epoch": 10.53,
+      "learning_rate": 5.973333333333334e-06,
+      "loss": 0.0979,
+      "step": 2632
+    },
+    {
+      "epoch": 10.53,
+      "eval_accuracy": 0.708,
+      "eval_disk_space_total": 78.1898422241211,
+      "eval_disk_space_used": 35.82805252075195,
+      "eval_f1": 0.7090322597492875,
+      "eval_gpu_ram_allocated": 2.089801788330078,
+      "eval_gpu_ram_cached": 25.85546875,
+      "eval_gpu_ram_total": 39.56402587890625,
+      "eval_gpu_utilization": 45,
+      "eval_loss": 1.64204740524292,
+      "eval_precision": 0.7171054872018443,
+      "eval_recall": 0.708,
+      "eval_runtime": 2.5339,
+      "eval_samples_per_second": 789.29,
+      "eval_steps_per_second": 24.863,
+      "eval_system_ram_total": 83.48074722290039,
+      "eval_system_ram_used": 4.279300689697266,
+      "step": 2632
+    },
+    {
+      "epoch": 11.28,
+      "learning_rate": 4.976e-06,
+      "loss": 0.0818,
+      "step": 2820
+    },
+    {
+      "epoch": 11.28,
+      "eval_accuracy": 0.7065,
+      "eval_disk_space_total": 78.1898422241211,
+      "eval_disk_space_used": 35.828128814697266,
+      "eval_f1": 0.706242034421972,
+      "eval_gpu_ram_allocated": 2.0898447036743164,
+      "eval_gpu_ram_cached": 25.85546875,
+      "eval_gpu_ram_total": 39.56402587890625,
+      "eval_gpu_utilization": 49,
+      "eval_loss": 1.686875820159912,
+      "eval_precision": 0.7102028476355108,
+      "eval_recall": 0.7065,
+      "eval_runtime": 2.4408,
+      "eval_samples_per_second": 819.396,
+      "eval_steps_per_second": 25.811,
+      "eval_system_ram_total": 83.48074722290039,
+      "eval_system_ram_used": 4.2822418212890625,
+      "step": 2820
     }
   ],
   "max_steps": 3750,
   "num_train_epochs": 15,
+  "total_flos": 5780526048262272.0,
   "trial_name": null,
   "trial_params": null
 }

{checkpoint-2000 → checkpoint-3000}/training_args.bin RENAMED Viewed

File without changes

{checkpoint-2250 → checkpoint-3250}/added_tokens.json RENAMED Viewed

File without changes

{checkpoint-2250 → checkpoint-3250}/config.json RENAMED Viewed

File without changes

{checkpoint-2250 → checkpoint-3250}/optimizer.pt RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d092da6a86e9d3fddf667c0e8cc73377daca3a3d8630e425531f34eb77545eea
 size 1475557125

 version https://git-lfs.github.com/spec/v1
+oid sha256:f17eacaf5376c0fabb8aeff03d05e59319e7a180e3c00d273c966e5a26d33f06
 size 1475557125

{checkpoint-2000 → checkpoint-3250}/pytorch_model.bin RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0b71e7c7798b6760f8be8c551b4e8951ebe3dfa93da41d1460b9cb33fd0d6f86
 size 737788917

 version https://git-lfs.github.com/spec/v1
+oid sha256:ab87f1854f930634513326e51246eb456af5e9a20373ec512925553c65de13d4
 size 737788917

{checkpoint-2000 → checkpoint-3250}/rng_state.pth RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:96ffa685fc4010a50b57e506b086e0167e48b18c5d9de223e06893b4aa16c22a
 size 14575

 version https://git-lfs.github.com/spec/v1
+oid sha256:e9318169f07f11cd9456a08a8554cc70b98429bd8764cb326d58f8d40bc05005
 size 14575

{checkpoint-2000 → checkpoint-3250}/scheduler.pt RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:626c1ac2df61838775a74a9749f04b4d515724a3ccd7da9a914227a50ae26d2d
 size 627

 version https://git-lfs.github.com/spec/v1
+oid sha256:6b6692caff16315b18091d72ba55872cc98f8a135dd4601d0a933fafdf6b6bcd
 size 627

{checkpoint-2250 → checkpoint-3250}/special_tokens_map.json RENAMED Viewed

File without changes

{checkpoint-2250 → checkpoint-3250}/spm.model RENAMED Viewed

File without changes

{checkpoint-2250 → checkpoint-3250}/tokenizer.json RENAMED Viewed

File without changes

{checkpoint-2250 → checkpoint-3250}/tokenizer_config.json RENAMED Viewed

File without changes

{checkpoint-2000 → checkpoint-3250}/trainer_state.json RENAMED Viewed

@@ -1,8 +1,8 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 8.0,
-  "global_step": 2000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -272,11 +272,193 @@
       "eval_system_ram_total": 83.48074722290039,
       "eval_system_ram_used": 4.224781036376953,
       "step": 1880
     }
   ],
   "max_steps": 3750,
   "num_train_epochs": 15,
-  "total_flos": 3850219425648384.0,
   "trial_name": null,
   "trial_params": null
 }

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 13.0,
+  "global_step": 3250,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_system_ram_total": 83.48074722290039,
       "eval_system_ram_used": 4.224781036376953,
       "step": 1880
+    },
+    {
+      "epoch": 8.27,
+      "learning_rate": 8.981333333333333e-06,
+      "loss": 0.197,
+      "step": 2068
+    },
+    {
+      "epoch": 8.27,
+      "eval_accuracy": 0.712,
+      "eval_disk_space_total": 78.1898422241211,
+      "eval_disk_space_used": 33.7657470703125,
+      "eval_f1": 0.7097931257647566,
+      "eval_gpu_ram_allocated": 2.0897903442382812,
+      "eval_gpu_ram_cached": 25.85546875,
+      "eval_gpu_ram_total": 39.56402587890625,
+      "eval_gpu_utilization": 45,
+      "eval_loss": 1.3960117101669312,
+      "eval_precision": 0.7137187449926237,
+      "eval_recall": 0.712,
+      "eval_runtime": 2.3878,
+      "eval_samples_per_second": 837.604,
+      "eval_steps_per_second": 26.385,
+      "eval_system_ram_total": 83.48074722290039,
+      "eval_system_ram_used": 4.219398498535156,
+      "step": 2068
+    },
+    {
+      "epoch": 9.02,
+      "learning_rate": 7.978666666666667e-06,
+      "loss": 0.1505,
+      "step": 2256
+    },
+    {
+      "epoch": 9.02,
+      "eval_accuracy": 0.7075,
+      "eval_disk_space_total": 78.1898422241211,
+      "eval_disk_space_used": 35.827659606933594,
+      "eval_f1": 0.709341703450241,
+      "eval_gpu_ram_allocated": 2.0897817611694336,
+      "eval_gpu_ram_cached": 25.85546875,
+      "eval_gpu_ram_total": 39.56402587890625,
+      "eval_gpu_utilization": 48,
+      "eval_loss": 1.4310206174850464,
+      "eval_precision": 0.7133423622104005,
+      "eval_recall": 0.7075,
+      "eval_runtime": 2.4471,
+      "eval_samples_per_second": 817.29,
+      "eval_steps_per_second": 25.745,
+      "eval_system_ram_total": 83.48074722290039,
+      "eval_system_ram_used": 4.2417755126953125,
+      "step": 2256
+    },
+    {
+      "epoch": 9.78,
+      "learning_rate": 6.976000000000001e-06,
+      "loss": 0.1132,
+      "step": 2444
+    },
+    {
+      "epoch": 9.78,
+      "eval_accuracy": 0.7045,
+      "eval_disk_space_total": 78.1898422241211,
+      "eval_disk_space_used": 35.827754974365234,
+      "eval_f1": 0.705265213679387,
+      "eval_gpu_ram_allocated": 2.089801788330078,
+      "eval_gpu_ram_cached": 25.85546875,
+      "eval_gpu_ram_total": 39.56402587890625,
+      "eval_gpu_utilization": 48,
+      "eval_loss": 1.5454399585723877,
+      "eval_precision": 0.7097494768850874,
+      "eval_recall": 0.7045,
+      "eval_runtime": 2.5035,
+      "eval_samples_per_second": 798.873,
+      "eval_steps_per_second": 25.165,
+      "eval_system_ram_total": 83.48074722290039,
+      "eval_system_ram_used": 4.293117523193359,
+      "step": 2444
+    },
+    {
+      "epoch": 10.53,
+      "learning_rate": 5.973333333333334e-06,
+      "loss": 0.0979,
+      "step": 2632
+    },
+    {
+      "epoch": 10.53,
+      "eval_accuracy": 0.708,
+      "eval_disk_space_total": 78.1898422241211,
+      "eval_disk_space_used": 35.82805252075195,
+      "eval_f1": 0.7090322597492875,
+      "eval_gpu_ram_allocated": 2.089801788330078,
+      "eval_gpu_ram_cached": 25.85546875,
+      "eval_gpu_ram_total": 39.56402587890625,
+      "eval_gpu_utilization": 45,
+      "eval_loss": 1.64204740524292,
+      "eval_precision": 0.7171054872018443,
+      "eval_recall": 0.708,
+      "eval_runtime": 2.5339,
+      "eval_samples_per_second": 789.29,
+      "eval_steps_per_second": 24.863,
+      "eval_system_ram_total": 83.48074722290039,
+      "eval_system_ram_used": 4.279300689697266,
+      "step": 2632
+    },
+    {
+      "epoch": 11.28,
+      "learning_rate": 4.976e-06,
+      "loss": 0.0818,
+      "step": 2820
+    },
+    {
+      "epoch": 11.28,
+      "eval_accuracy": 0.7065,
+      "eval_disk_space_total": 78.1898422241211,
+      "eval_disk_space_used": 35.828128814697266,
+      "eval_f1": 0.706242034421972,
+      "eval_gpu_ram_allocated": 2.0898447036743164,
+      "eval_gpu_ram_cached": 25.85546875,
+      "eval_gpu_ram_total": 39.56402587890625,
+      "eval_gpu_utilization": 49,
+      "eval_loss": 1.686875820159912,
+      "eval_precision": 0.7102028476355108,
+      "eval_recall": 0.7065,
+      "eval_runtime": 2.4408,
+      "eval_samples_per_second": 819.396,
+      "eval_steps_per_second": 25.811,
+      "eval_system_ram_total": 83.48074722290039,
+      "eval_system_ram_used": 4.2822418212890625,
+      "step": 2820
+    },
+    {
+      "epoch": 12.03,
+      "learning_rate": 3.973333333333333e-06,
+      "loss": 0.062,
+      "step": 3008
+    },
+    {
+      "epoch": 12.03,
+      "eval_accuracy": 0.701,
+      "eval_disk_space_total": 78.1898422241211,
+      "eval_disk_space_used": 35.82817459106445,
+      "eval_f1": 0.704316965060789,
+      "eval_gpu_ram_allocated": 2.0900821685791016,
+      "eval_gpu_ram_cached": 25.85546875,
+      "eval_gpu_ram_total": 39.56402587890625,
+      "eval_gpu_utilization": 50,
+      "eval_loss": 1.781833291053772,
+      "eval_precision": 0.7122852239266858,
+      "eval_recall": 0.701,
+      "eval_runtime": 2.3858,
+      "eval_samples_per_second": 838.291,
+      "eval_steps_per_second": 26.406,
+      "eval_system_ram_total": 83.48074722290039,
+      "eval_system_ram_used": 4.286445617675781,
+      "step": 3008
+    },
+    {
+      "epoch": 12.78,
+      "learning_rate": 2.970666666666667e-06,
+      "loss": 0.0433,
+      "step": 3196
+    },
+    {
+      "epoch": 12.78,
+      "eval_accuracy": 0.707,
+      "eval_disk_space_total": 78.1898422241211,
+      "eval_disk_space_used": 35.82823181152344,
+      "eval_f1": 0.70799964440731,
+      "eval_gpu_ram_allocated": 2.089794635772705,
+      "eval_gpu_ram_cached": 25.85546875,
+      "eval_gpu_ram_total": 39.56402587890625,
+      "eval_gpu_utilization": 49,
+      "eval_loss": 1.7981120347976685,
+      "eval_precision": 0.7109713384315803,
+      "eval_recall": 0.707,
+      "eval_runtime": 2.4151,
+      "eval_samples_per_second": 828.137,
+      "eval_steps_per_second": 26.086,
+      "eval_system_ram_total": 83.48074722290039,
+      "eval_system_ram_used": 4.266563415527344,
+      "step": 3196
     }
   ],
   "max_steps": 3750,
   "num_train_epochs": 15,
+  "total_flos": 6259743625357056.0,
   "trial_name": null,
   "trial_params": null
 }

{checkpoint-2250 → checkpoint-3250}/training_args.bin RENAMED Viewed

File without changes

pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:627070829ce397286a4c2e3016e3ade6fbe191d90b64283878e08c51320b3b27
 size 737788917

 version https://git-lfs.github.com/spec/v1
+oid sha256:ab87f1854f930634513326e51246eb456af5e9a20373ec512925553c65de13d4
 size 737788917

runs/Jul22_21-19-20_ab4276e44fca/events.out.tfevents.1690060770.ab4276e44fca.659.0 CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9480f7c1ae50fb4afe8f904897ca584d0ecad3a73621bc2257b9e47e566bf68b
-size 17242

 version https://git-lfs.github.com/spec/v1
+oid sha256:a0676d7575783045c7b0b81b3dac8f9748e1362dd10f8ada9768f991e63e7691
+size 23854