Nadav commited on
Commit
dbcfded
1 Parent(s): 780dbc7

Training in progress, step 200000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bdf19b8ac1ef38105671f7e3ed466178582ff690b2e4244bda799fabd849a44a
3
  size 893439185
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6e4e20c1ef2ac6843c886418e98a47bae638ea7d617febd310a65b1910a8f8e4
3
  size 893439185
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:34cdc450f36015ade18e6cf8d347ba98eed346e9fca052902560b578799df39f
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:65fa8e0f8a447fcc5954aa678e61448bcfe8f8387d2caed08d0f1179f60750d7
3
  size 449471589
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4e2a05b33f27bc07e845ac5ddf394e12d5aa9e01cff4a464ac84d19c70049e32
3
  size 15587
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fc8cc6cb0c14f159f66977ee3dfa2ab864df7c0e19869ebf5c6d7b0393c02b8d
3
  size 15587
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:09ac2f6b5dab3c2f241653e24158eb8e5933ba687a501a1fa916c1b82a746b90
3
  size 559
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b0c7a6adc45a0eea76cb785076f51f02ab2ddc692d12cb942437e9bf8e23bc08
3
  size 559
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2639f53746eae2335e88ed02acce8977dcbe3ece7ab8cbccde5b25d715fd5406
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:de21128fb4d2d9fbd6335f650a62e3e1299cfe449b8f64957937f253cda36cc0
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.2,
5
- "global_step": 190000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -2446,11 +2446,131 @@
2446
  "learning_rate": 1.0566893240808188e-05,
2447
  "loss": 0.3743,
2448
  "step": 190000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2449
  }
2450
  ],
2451
  "max_steps": 200000,
2452
  "num_train_epochs": 9223372036854775807,
2453
- "total_flos": 4.4743682799304704e+21,
2454
  "trial_name": null,
2455
  "trial_params": null
2456
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.05,
5
+ "global_step": 200000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
2446
  "learning_rate": 1.0566893240808188e-05,
2447
  "loss": 0.3743,
2448
  "step": 190000
2449
+ },
2450
+ {
2451
+ "epoch": 0.0,
2452
+ "learning_rate": 1.0512109659997981e-05,
2453
+ "loss": 0.3673,
2454
+ "step": 190500
2455
+ },
2456
+ {
2457
+ "epoch": 0.01,
2458
+ "learning_rate": 1.0460094397410629e-05,
2459
+ "loss": 0.3658,
2460
+ "step": 191000
2461
+ },
2462
+ {
2463
+ "epoch": 0.01,
2464
+ "learning_rate": 1.041085066159254e-05,
2465
+ "loss": 0.3661,
2466
+ "step": 191500
2467
+ },
2468
+ {
2469
+ "epoch": 0.01,
2470
+ "learning_rate": 1.036447165752325e-05,
2471
+ "loss": 0.3648,
2472
+ "step": 192000
2473
+ },
2474
+ {
2475
+ "epoch": 0.01,
2476
+ "learning_rate": 1.03207743592438e-05,
2477
+ "loss": 0.3649,
2478
+ "step": 192500
2479
+ },
2480
+ {
2481
+ "epoch": 0.01,
2482
+ "learning_rate": 1.0279936239738395e-05,
2483
+ "loss": 0.3645,
2484
+ "step": 193000
2485
+ },
2486
+ {
2487
+ "epoch": 0.02,
2488
+ "learning_rate": 1.0241796139099991e-05,
2489
+ "loss": 0.3646,
2490
+ "step": 193500
2491
+ },
2492
+ {
2493
+ "epoch": 0.02,
2494
+ "learning_rate": 1.0206441030895691e-05,
2495
+ "loss": 0.3652,
2496
+ "step": 194000
2497
+ },
2498
+ {
2499
+ "epoch": 0.02,
2500
+ "learning_rate": 1.0173873095995124e-05,
2501
+ "loss": 0.3659,
2502
+ "step": 194500
2503
+ },
2504
+ {
2505
+ "epoch": 0.03,
2506
+ "learning_rate": 1.0144094343341912e-05,
2507
+ "loss": 0.3645,
2508
+ "step": 195000
2509
+ },
2510
+ {
2511
+ "epoch": 0.03,
2512
+ "learning_rate": 1.011715779872454e-05,
2513
+ "loss": 0.3676,
2514
+ "step": 195500
2515
+ },
2516
+ {
2517
+ "epoch": 0.03,
2518
+ "learning_rate": 1.0092957162169376e-05,
2519
+ "loss": 0.3673,
2520
+ "step": 196000
2521
+ },
2522
+ {
2523
+ "epoch": 0.03,
2524
+ "learning_rate": 1.0071550699137517e-05,
2525
+ "loss": 0.3674,
2526
+ "step": 196500
2527
+ },
2528
+ {
2529
+ "epoch": 0.04,
2530
+ "learning_rate": 1.0052939730080435e-05,
2531
+ "loss": 0.3678,
2532
+ "step": 197000
2533
+ },
2534
+ {
2535
+ "epoch": 0.04,
2536
+ "learning_rate": 1.0037125403010368e-05,
2537
+ "loss": 0.3688,
2538
+ "step": 197500
2539
+ },
2540
+ {
2541
+ "epoch": 0.04,
2542
+ "learning_rate": 1.0024108693429497e-05,
2543
+ "loss": 0.3676,
2544
+ "step": 198000
2545
+ },
2546
+ {
2547
+ "epoch": 0.04,
2548
+ "learning_rate": 1.0013890404269793e-05,
2549
+ "loss": 0.3685,
2550
+ "step": 198500
2551
+ },
2552
+ {
2553
+ "epoch": 0.04,
2554
+ "learning_rate": 1.0006471165843453e-05,
2555
+ "loss": 0.3664,
2556
+ "step": 199000
2557
+ },
2558
+ {
2559
+ "epoch": 0.05,
2560
+ "learning_rate": 1.000185143580406e-05,
2561
+ "loss": 0.3678,
2562
+ "step": 199500
2563
+ },
2564
+ {
2565
+ "epoch": 0.05,
2566
+ "learning_rate": 1.0000031499118314e-05,
2567
+ "loss": 0.3682,
2568
+ "step": 200000
2569
  }
2570
  ],
2571
  "max_steps": 200000,
2572
  "num_train_epochs": 9223372036854775807,
2573
+ "total_flos": 4.709861347295232e+21,
2574
  "trial_name": null,
2575
  "trial_params": null
2576
  }
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:818b7ad8f2aec69753e6ae13a5e403ef3686da3a5de695a9abc49d9951fcd73b
3
  size 5551
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:53c18b4efeb50055b9df3de46e48e5ef92c97dea961e5c1e8f4076e024c3c1db
3
  size 5551
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:34cdc450f36015ade18e6cf8d347ba98eed346e9fca052902560b578799df39f
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:65fa8e0f8a447fcc5954aa678e61448bcfe8f8387d2caed08d0f1179f60750d7
3
  size 449471589
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:818b7ad8f2aec69753e6ae13a5e403ef3686da3a5de695a9abc49d9951fcd73b
3
  size 5551
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:53c18b4efeb50055b9df3de46e48e5ef92c97dea961e5c1e8f4076e024c3c1db
3
  size 5551