lucasmaes commited on
Commit
e8828d8
1 Parent(s): 9902b68

Training in progress, step 980000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2e432b78001697802935de247a39fbb372c81f0a8a5495fa8b10802c9e09893b
3
  size 893441093
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f583ea7c93bc66a9ef8a4e308c8dc630c9ff1c01fbc3af28ec5ab02ff4f79681
3
  size 893441093
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1fa80d5373c46fd0ac9bf52389a9e6e9cf3955843a1e3da677ff34d39b7d44c0
3
  size 449474181
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3aff5625e82bdff15b343b27e9d6a9b3efa1fcccca6fcaa49aaf091e12be1731
3
  size 449474181
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ec24cbb8ce4192257d55e6aa76c8de9c95eafe7de9cb3bbb4e920ca7e78fc649
3
  size 17577
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:70a20325b16659225e3cd01033074400c13a25610958f69aa9e3e72a98f16147
3
  size 17577
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d86d5b3aa53b15a666affdc8015b88ae744c0263f7bb1e6519ffd82b431ab45a
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:43c4cc1073d6660901cb3dc90d6cc9fa2b9e92470c9b5b2cfa7f8fe0038e768d
3
  size 627
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 68.98023040819228,
5
- "global_step": 970000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -7184,11 +7184,85 @@
7184
  "eval_samples_per_second": 168.183,
7185
  "eval_steps_per_second": 5.257,
7186
  "step": 970000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7187
  }
7188
  ],
7189
  "max_steps": 1000000,
7190
  "num_train_epochs": 72,
7191
- "total_flos": 3.3998459444478542e+22,
7192
  "trial_name": null,
7193
  "trial_params": null
7194
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 69.69136680415303,
5
+ "global_step": 980000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
7184
  "eval_samples_per_second": 168.183,
7185
  "eval_steps_per_second": 5.257,
7186
  "step": 970000
7187
+ },
7188
+ {
7189
+ "epoch": 69.05,
7190
+ "learning_rate": 1.0149337548616087e-05,
7191
+ "loss": 0.1075,
7192
+ "step": 971000
7193
+ },
7194
+ {
7195
+ "epoch": 69.12,
7196
+ "learning_rate": 1.0139223211606578e-05,
7197
+ "loss": 0.1076,
7198
+ "step": 972000
7199
+ },
7200
+ {
7201
+ "epoch": 69.19,
7202
+ "learning_rate": 1.012946276686737e-05,
7203
+ "loss": 0.1078,
7204
+ "step": 973000
7205
+ },
7206
+ {
7207
+ "epoch": 69.26,
7208
+ "learning_rate": 1.0120056321137129e-05,
7209
+ "loss": 0.1079,
7210
+ "step": 974000
7211
+ },
7212
+ {
7213
+ "epoch": 69.34,
7214
+ "learning_rate": 1.0111003977283231e-05,
7215
+ "loss": 0.1074,
7216
+ "step": 975000
7217
+ },
7218
+ {
7219
+ "epoch": 69.34,
7220
+ "eval_runtime": 524.9926,
7221
+ "eval_samples_per_second": 167.456,
7222
+ "eval_steps_per_second": 5.234,
7223
+ "step": 975000
7224
+ },
7225
+ {
7226
+ "epoch": 69.41,
7227
+ "learning_rate": 1.010230583430066e-05,
7228
+ "loss": 0.1073,
7229
+ "step": 976000
7230
+ },
7231
+ {
7232
+ "epoch": 69.48,
7233
+ "learning_rate": 1.009396198731092e-05,
7234
+ "loss": 0.1079,
7235
+ "step": 977000
7236
+ },
7237
+ {
7238
+ "epoch": 69.55,
7239
+ "learning_rate": 1.0085972527560981e-05,
7240
+ "loss": 0.1072,
7241
+ "step": 978000
7242
+ },
7243
+ {
7244
+ "epoch": 69.62,
7245
+ "learning_rate": 1.0078337542422287e-05,
7246
+ "loss": 0.1077,
7247
+ "step": 979000
7248
+ },
7249
+ {
7250
+ "epoch": 69.69,
7251
+ "learning_rate": 1.0071057115389828e-05,
7252
+ "loss": 0.108,
7253
+ "step": 980000
7254
+ },
7255
+ {
7256
+ "epoch": 69.69,
7257
+ "eval_runtime": 537.8359,
7258
+ "eval_samples_per_second": 163.457,
7259
+ "eval_steps_per_second": 5.109,
7260
+ "step": 980000
7261
  }
7262
  ],
7263
  "max_steps": 1000000,
7264
  "num_train_epochs": 72,
7265
+ "total_flos": 3.4348958289582365e+22,
7266
  "trial_name": null,
7267
  "trial_params": null
7268
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1fa80d5373c46fd0ac9bf52389a9e6e9cf3955843a1e3da677ff34d39b7d44c0
3
  size 449474181
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3aff5625e82bdff15b343b27e9d6a9b3efa1fcccca6fcaa49aaf091e12be1731
3
  size 449474181