lucasmaes commited on
Commit
2f9af5f
1 Parent(s): e8828d8

Training in progress, step 990000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f583ea7c93bc66a9ef8a4e308c8dc630c9ff1c01fbc3af28ec5ab02ff4f79681
3
  size 893441093
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aa56c6244a84a771cf2984aa63900b7128ad40dd69818d54b918bf9b35954fe3
3
  size 893441093
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3aff5625e82bdff15b343b27e9d6a9b3efa1fcccca6fcaa49aaf091e12be1731
3
  size 449474181
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:45e8cc279c772df75f8d61748adcf8cab41db1f2f7207255b9b48edc3dca234c
3
  size 449474181
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:70a20325b16659225e3cd01033074400c13a25610958f69aa9e3e72a98f16147
3
- size 17577
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7d5b60e0a007a854b2858ef033ce2307ff2527680a1a2fca671d1fe620a358a8
3
+ size 17641
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:43c4cc1073d6660901cb3dc90d6cc9fa2b9e92470c9b5b2cfa7f8fe0038e768d
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:28cacbced11de35b0d6bed5b65452853709959052dcbd10ec95163f3114bda17
3
  size 627
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 69.69136680415303,
5
- "global_step": 980000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -7258,11 +7258,85 @@
7258
  "eval_samples_per_second": 163.457,
7259
  "eval_steps_per_second": 5.109,
7260
  "step": 980000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7261
  }
7262
  ],
7263
  "max_steps": 1000000,
7264
  "num_train_epochs": 72,
7265
- "total_flos": 3.4348958289582365e+22,
7266
  "trial_name": null,
7267
  "trial_params": null
7268
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 70.40250320011378,
5
+ "global_step": 990000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
7258
  "eval_samples_per_second": 163.457,
7259
  "eval_steps_per_second": 5.109,
7260
  "step": 980000
7261
+ },
7262
+ {
7263
+ "epoch": 69.76,
7264
+ "learning_rate": 1.0064131326081175e-05,
7265
+ "loss": 0.1074,
7266
+ "step": 981000
7267
+ },
7268
+ {
7269
+ "epoch": 69.83,
7270
+ "learning_rate": 1.0057560250235656e-05,
7271
+ "loss": 0.1075,
7272
+ "step": 982000
7273
+ },
7274
+ {
7275
+ "epoch": 69.9,
7276
+ "learning_rate": 1.0051343959713499e-05,
7277
+ "loss": 0.1075,
7278
+ "step": 983000
7279
+ },
7280
+ {
7281
+ "epoch": 69.98,
7282
+ "learning_rate": 1.0045482522495065e-05,
7283
+ "loss": 0.1075,
7284
+ "step": 984000
7285
+ },
7286
+ {
7287
+ "epoch": 70.05,
7288
+ "learning_rate": 1.0039976002680097e-05,
7289
+ "loss": 0.1076,
7290
+ "step": 985000
7291
+ },
7292
+ {
7293
+ "epoch": 70.05,
7294
+ "eval_runtime": 519.6166,
7295
+ "eval_samples_per_second": 169.188,
7296
+ "eval_steps_per_second": 5.289,
7297
+ "step": 985000
7298
+ },
7299
+ {
7300
+ "epoch": 70.12,
7301
+ "learning_rate": 1.0034824460487e-05,
7302
+ "loss": 0.1072,
7303
+ "step": 986000
7304
+ },
7305
+ {
7306
+ "epoch": 70.19,
7307
+ "learning_rate": 1.003002795225223e-05,
7308
+ "loss": 0.1071,
7309
+ "step": 987000
7310
+ },
7311
+ {
7312
+ "epoch": 70.26,
7313
+ "learning_rate": 1.0025586530429617e-05,
7314
+ "loss": 0.1076,
7315
+ "step": 988000
7316
+ },
7317
+ {
7318
+ "epoch": 70.33,
7319
+ "learning_rate": 1.0021500243589852e-05,
7320
+ "loss": 0.1073,
7321
+ "step": 989000
7322
+ },
7323
+ {
7324
+ "epoch": 70.4,
7325
+ "learning_rate": 1.0017769136419904e-05,
7326
+ "loss": 0.107,
7327
+ "step": 990000
7328
+ },
7329
+ {
7330
+ "epoch": 70.4,
7331
+ "eval_runtime": 526.375,
7332
+ "eval_samples_per_second": 167.016,
7333
+ "eval_steps_per_second": 5.221,
7334
+ "step": 990000
7335
  }
7336
  ],
7337
  "max_steps": 1000000,
7338
  "num_train_epochs": 72,
7339
+ "total_flos": 3.4699457134686187e+22,
7340
  "trial_name": null,
7341
  "trial_params": null
7342
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3aff5625e82bdff15b343b27e9d6a9b3efa1fcccca6fcaa49aaf091e12be1731
3
  size 449474181
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:45e8cc279c772df75f8d61748adcf8cab41db1f2f7207255b9b48edc3dca234c
3
  size 449474181