Nadav commited on
Commit
c8d9e80
1 Parent(s): 66d740d

Training in progress, step 100000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cb0a61ab643c18ccf5b4cde2db1687dfb3353ac9730e884873954f6a4914ee49
3
  size 893439185
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e0083228159a9684fe6abf7fc7f3682abd4e091c336c63a24ba76d9736a69586
3
  size 893439185
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e8beb25b1f0bcf41b4de421e9957ad88915f0b34dec0216d6304d37fb6ed0fc2
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e59ec613175c3d9dd17cb09bb4ec1c875973ee63862b916c52950e17b26a470
3
  size 449471589
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bc2ae884d96e2e98bf3a22796a951063fc339dc13602ff01679fb67e0849a907
3
  size 15459
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cccf7b9fd3b0a19dfeea5197c3f0bc5fb356c93fa129ae332264dec9fa909108
3
  size 15459
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bb8cb9021f3a60f9e715cf2cfd338061faee5a6bd0df4d32e00e1489dc2742d6
3
  size 559
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4db330b47705f38a44d9ca503524acd867a7f5b9c7d09cea471cc2d7ef717581
3
  size 559
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1fa03e39186130ec116897cef176842ce103a90b1b243dfa00171a77a06fae55
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:53825b8848113cf8bc0172552a5393f9bdd93287620ad2f38f87ba251b8fb391
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 7.529119874640898,
5
- "global_step": 95000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -1298,11 +1298,79 @@
1298
  "eval_samples_per_second": 76.398,
1299
  "eval_steps_per_second": 1.207,
1300
  "step": 95000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1301
  }
1302
  ],
1303
  "max_steps": 100000,
1304
  "num_train_epochs": 9,
1305
- "total_flos": 4.474341992332253e+21,
1306
  "trial_name": null,
1307
  "trial_params": null
1308
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 7.964394532950291,
5
+ "global_step": 100000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
1298
  "eval_samples_per_second": 76.398,
1299
  "eval_steps_per_second": 1.207,
1300
  "step": 95000
1301
+ },
1302
+ {
1303
+ "epoch": 7.57,
1304
+ "learning_rate": 1.0459487036725839e-05,
1305
+ "loss": 0.4031,
1306
+ "step": 95500
1307
+ },
1308
+ {
1309
+ "epoch": 7.62,
1310
+ "learning_rate": 1.0363840719071717e-05,
1311
+ "loss": 0.4023,
1312
+ "step": 96000
1313
+ },
1314
+ {
1315
+ "epoch": 7.66,
1316
+ "learning_rate": 1.0279462058166865e-05,
1317
+ "loss": 0.4025,
1318
+ "step": 96500
1319
+ },
1320
+ {
1321
+ "epoch": 7.7,
1322
+ "learning_rate": 1.0206033689125313e-05,
1323
+ "loss": 0.4028,
1324
+ "step": 97000
1325
+ },
1326
+ {
1327
+ "epoch": 7.75,
1328
+ "learning_rate": 1.0143753941878168e-05,
1329
+ "loss": 0.4023,
1330
+ "step": 97500
1331
+ },
1332
+ {
1333
+ "epoch": 7.79,
1334
+ "learning_rate": 1.0092638183021144e-05,
1335
+ "loss": 0.4019,
1336
+ "step": 98000
1337
+ },
1338
+ {
1339
+ "epoch": 7.83,
1340
+ "learning_rate": 1.0052699024602892e-05,
1341
+ "loss": 0.4014,
1342
+ "step": 98500
1343
+ },
1344
+ {
1345
+ "epoch": 7.88,
1346
+ "learning_rate": 1.0023946321013112e-05,
1347
+ "loss": 0.4018,
1348
+ "step": 99000
1349
+ },
1350
+ {
1351
+ "epoch": 7.92,
1352
+ "learning_rate": 1.0006411110358469e-05,
1353
+ "loss": 0.4024,
1354
+ "step": 99500
1355
+ },
1356
+ {
1357
+ "epoch": 7.96,
1358
+ "learning_rate": 1.0000027439232365e-05,
1359
+ "loss": 0.4024,
1360
+ "step": 100000
1361
+ },
1362
+ {
1363
+ "epoch": 7.96,
1364
+ "eval_loss": 0.38628044724464417,
1365
+ "eval_runtime": 91.7648,
1366
+ "eval_samples_per_second": 54.487,
1367
+ "eval_steps_per_second": 0.861,
1368
+ "step": 100000
1369
  }
1370
  ],
1371
  "max_steps": 100000,
1372
  "num_train_epochs": 9,
1373
+ "total_flos": 4.7098350596970145e+21,
1374
  "trial_name": null,
1375
  "trial_params": null
1376
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e8beb25b1f0bcf41b4de421e9957ad88915f0b34dec0216d6304d37fb6ed0fc2
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e59ec613175c3d9dd17cb09bb4ec1c875973ee63862b916c52950e17b26a470
3
  size 449471589