lucasmaes commited on
Commit
b656661
1 Parent(s): 2f9af5f

Training in progress, step 1000000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:aa56c6244a84a771cf2984aa63900b7128ad40dd69818d54b918bf9b35954fe3
3
  size 893441093
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a41c0cfdbba59adb55e48f4497cb58e6acc6b3452b49124d5a5f0fe8e60c1ed5
3
  size 893441093
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:45e8cc279c772df75f8d61748adcf8cab41db1f2f7207255b9b48edc3dca234c
3
  size 449474181
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c27f116a4294f22674c90e68e437b33583a613d34e7b53d5073c3c54064bc66c
3
  size 449474181
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7d5b60e0a007a854b2858ef033ce2307ff2527680a1a2fca671d1fe620a358a8
3
  size 17641
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0ff52e12d1ecb30cb53f7d49f3e0645dc2a062bffe7e31ad2d54f6553c1e3bcf
3
  size 17641
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:28cacbced11de35b0d6bed5b65452853709959052dcbd10ec95163f3114bda17
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:00fbdf9131638fc1a94afc0c7252f3e83640538433d010f21c313201f00efd8a
3
  size 627
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 70.40250320011378,
5
- "global_step": 990000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -7332,11 +7332,85 @@
7332
  "eval_samples_per_second": 167.016,
7333
  "eval_steps_per_second": 5.221,
7334
  "step": 990000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7335
  }
7336
  ],
7337
  "max_steps": 1000000,
7338
  "num_train_epochs": 72,
7339
- "total_flos": 3.4699457134686187e+22,
7340
  "trial_name": null,
7341
  "trial_params": null
7342
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 71.11363959607452,
5
+ "global_step": 1000000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
7332
  "eval_samples_per_second": 167.016,
7333
  "eval_steps_per_second": 5.221,
7334
  "step": 990000
7335
+ },
7336
+ {
7337
+ "epoch": 70.47,
7338
+ "learning_rate": 1.001439324972257e-05,
7339
+ "loss": 0.1073,
7340
+ "step": 991000
7341
+ },
7342
+ {
7343
+ "epoch": 70.54,
7344
+ "learning_rate": 1.0011372620416e-05,
7345
+ "loss": 0.1072,
7346
+ "step": 992000
7347
+ },
7348
+ {
7349
+ "epoch": 70.62,
7350
+ "learning_rate": 1.0008707281533315e-05,
7351
+ "loss": 0.1071,
7352
+ "step": 993000
7353
+ },
7354
+ {
7355
+ "epoch": 70.69,
7356
+ "learning_rate": 1.0006397262222235e-05,
7357
+ "loss": 0.1071,
7358
+ "step": 994000
7359
+ },
7360
+ {
7361
+ "epoch": 70.76,
7362
+ "learning_rate": 1.0004442587744758e-05,
7363
+ "loss": 0.107,
7364
+ "step": 995000
7365
+ },
7366
+ {
7367
+ "epoch": 70.76,
7368
+ "eval_runtime": 521.8934,
7369
+ "eval_samples_per_second": 168.45,
7370
+ "eval_steps_per_second": 5.265,
7371
+ "step": 995000
7372
+ },
7373
+ {
7374
+ "epoch": 70.83,
7375
+ "learning_rate": 1.0002843279476897e-05,
7376
+ "loss": 0.1068,
7377
+ "step": 996000
7378
+ },
7379
+ {
7380
+ "epoch": 70.9,
7381
+ "learning_rate": 1.0001599354908423e-05,
7382
+ "loss": 0.1071,
7383
+ "step": 997000
7384
+ },
7385
+ {
7386
+ "epoch": 70.97,
7387
+ "learning_rate": 1.0000710827642701e-05,
7388
+ "loss": 0.1072,
7389
+ "step": 998000
7390
+ },
7391
+ {
7392
+ "epoch": 71.04,
7393
+ "learning_rate": 1.0000177707396518e-05,
7394
+ "loss": 0.1068,
7395
+ "step": 999000
7396
+ },
7397
+ {
7398
+ "epoch": 71.11,
7399
+ "learning_rate": 1e-05,
7400
+ "loss": 0.1067,
7401
+ "step": 1000000
7402
+ },
7403
+ {
7404
+ "epoch": 71.11,
7405
+ "eval_runtime": 522.5962,
7406
+ "eval_samples_per_second": 168.224,
7407
+ "eval_steps_per_second": 5.258,
7408
+ "step": 1000000
7409
  }
7410
  ],
7411
  "max_steps": 1000000,
7412
  "num_train_epochs": 72,
7413
+ "total_flos": 3.504995597979001e+22,
7414
  "trial_name": null,
7415
  "trial_params": null
7416
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:45e8cc279c772df75f8d61748adcf8cab41db1f2f7207255b9b48edc3dca234c
3
  size 449474181
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c27f116a4294f22674c90e68e437b33583a613d34e7b53d5073c3c54064bc66c
3
  size 449474181