Nadav commited on
Commit
66d740d
1 Parent(s): 6281a0b

Training in progress, step 95000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d002168cc69538373ab33244390da8c4a63690ab3ac7af2902d3df798e6bc748
3
  size 893439185
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb0a61ab643c18ccf5b4cde2db1687dfb3353ac9730e884873954f6a4914ee49
3
  size 893439185
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3a1b08538448c3955f9816afd9712587d226efc04ddd65ee0378bda9ad13a5ad
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e8beb25b1f0bcf41b4de421e9957ad88915f0b34dec0216d6304d37fb6ed0fc2
3
  size 449471589
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cfd06f2b98bc509f92f4e1415fd68ebc1b6603ab44f533199dd7ae056dc6b235
3
  size 15459
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc2ae884d96e2e98bf3a22796a951063fc339dc13602ff01679fb67e0849a907
3
  size 15459
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1541b5ef2368a76e7bbd5980ca4a3faf491d955c90d4e8794732bf908704ed18
3
  size 559
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bb8cb9021f3a60f9e715cf2cfd338061faee5a6bd0df4d32e00e1489dc2742d6
3
  size 559
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f61de41cc564c25462ca0290993ffba1d92f72b28f091680e90a006d4be7a958
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1fa03e39186130ec116897cef176842ce103a90b1b243dfa00171a77a06fae55
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 7.0938452163315056,
5
- "global_step": 90000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -1230,11 +1230,79 @@
1230
  "eval_samples_per_second": 37.527,
1231
  "eval_steps_per_second": 0.593,
1232
  "step": 90000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1233
  }
1234
  ],
1235
  "max_steps": 100000,
1236
  "num_train_epochs": 9,
1237
- "total_flos": 4.2388489249674913e+21,
1238
  "trial_name": null,
1239
  "trial_params": null
1240
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 7.529119874640898,
5
+ "global_step": 95000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
1230
  "eval_samples_per_second": 37.527,
1231
  "eval_steps_per_second": 0.593,
1232
  "step": 90000
1233
+ },
1234
+ {
1235
+ "epoch": 7.14,
1236
+ "learning_rate": 1.20196015019827e-05,
1237
+ "loss": 0.4036,
1238
+ "step": 90500
1239
+ },
1240
+ {
1241
+ "epoch": 7.18,
1242
+ "learning_rate": 1.1815047267485115e-05,
1243
+ "loss": 0.4037,
1244
+ "step": 91000
1245
+ },
1246
+ {
1247
+ "epoch": 7.22,
1248
+ "learning_rate": 1.162082400805151e-05,
1249
+ "loss": 0.4048,
1250
+ "step": 91500
1251
+ },
1252
+ {
1253
+ "epoch": 7.27,
1254
+ "learning_rate": 1.1437400292071077e-05,
1255
+ "loss": 0.4032,
1256
+ "step": 92000
1257
+ },
1258
+ {
1259
+ "epoch": 7.31,
1260
+ "learning_rate": 1.126482137660111e-05,
1261
+ "loss": 0.4032,
1262
+ "step": 92500
1263
+ },
1264
+ {
1265
+ "epoch": 7.36,
1266
+ "learning_rate": 1.1103129842906643e-05,
1267
+ "loss": 0.4029,
1268
+ "step": 93000
1269
+ },
1270
+ {
1271
+ "epoch": 7.4,
1272
+ "learning_rate": 1.0952365585954172e-05,
1273
+ "loss": 0.4023,
1274
+ "step": 93500
1275
+ },
1276
+ {
1277
+ "epoch": 7.44,
1278
+ "learning_rate": 1.0812565804568168e-05,
1279
+ "loss": 0.4027,
1280
+ "step": 94000
1281
+ },
1282
+ {
1283
+ "epoch": 7.49,
1284
+ "learning_rate": 1.0683764992252818e-05,
1285
+ "loss": 0.4022,
1286
+ "step": 94500
1287
+ },
1288
+ {
1289
+ "epoch": 7.53,
1290
+ "learning_rate": 1.0566219440353348e-05,
1291
+ "loss": 0.4027,
1292
+ "step": 95000
1293
+ },
1294
+ {
1295
+ "epoch": 7.53,
1296
+ "eval_loss": 0.38710081577301025,
1297
+ "eval_runtime": 65.4468,
1298
+ "eval_samples_per_second": 76.398,
1299
+ "eval_steps_per_second": 1.207,
1300
+ "step": 95000
1301
  }
1302
  ],
1303
  "max_steps": 100000,
1304
  "num_train_epochs": 9,
1305
+ "total_flos": 4.474341992332253e+21,
1306
  "trial_name": null,
1307
  "trial_params": null
1308
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3a1b08538448c3955f9816afd9712587d226efc04ddd65ee0378bda9ad13a5ad
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e8beb25b1f0bcf41b4de421e9957ad88915f0b34dec0216d6304d37fb6ed0fc2
3
  size 449471589