ncbateman commited on
Commit
1943437
1 Parent(s): 5e0369f

Training in progress, step 520, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:64f4d2407a2d801a79431b62cbb89f6b811d012a90f3c05c5776d1fe8b649c23
3
  size 45118424
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a7583d22ab1c9114b632ece2e6cc49282443f5b4a3fdedb52fb678f52b2aad8d
3
  size 45118424
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:50d796085c1da6a664a0a095bd97a802e19a9b834f81f1af7482a826cd3c13fd
3
  size 23159546
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f15e17e91563986d667221e9c8faff7fc7a9b3f1e836c62d18349f2266965a51
3
  size 23159546
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3ec4e8fb4df96f748a330df5f48ab7541f526fa33838ac8f0a0d1dbf6f82cae7
3
  size 14512
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b77422a2be1769a3b7ceb86eaff5f1b80e128d2313f8eed1d5032246ec30f6a5
3
  size 14512
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:09e0fb551bedc08207717eba3973833686a962221df59796ce2b28fa11f3445e
3
  size 14512
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc5b1354b82006b1bf63ee90590eb9a369c774920d9f04b12bfc9ee51c6a1d65
3
  size 14512
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:496bad4c8c14ed4e934e08779be511e0a3138f38c99bc251313e9540e43073e5
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8d33b0edd5b6ac8e7325d969d8a731d29cfad089e3aa53b250b771d90d30a917
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 4.77326968973747,
5
  "eval_steps": 52,
6
- "global_step": 500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -3587,6 +3587,154 @@
3587
  "learning_rate": 4.461255922609986e-07,
3588
  "loss": 0.5518,
3589
  "step": 500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3590
  }
3591
  ],
3592
  "logging_steps": 1,
@@ -3601,12 +3749,12 @@
3601
  "should_evaluate": false,
3602
  "should_log": false,
3603
  "should_save": true,
3604
- "should_training_stop": false
3605
  },
3606
  "attributes": {}
3607
  }
3608
  },
3609
- "total_flos": 3.87089027825664e+17,
3610
  "train_batch_size": 4,
3611
  "trial_name": null,
3612
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 4.964200477326969,
5
  "eval_steps": 52,
6
+ "global_step": 520,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
3587
  "learning_rate": 4.461255922609986e-07,
3588
  "loss": 0.5518,
3589
  "step": 500
3590
+ },
3591
+ {
3592
+ "epoch": 4.782816229116945,
3593
+ "grad_norm": 0.548302412033081,
3594
+ "learning_rate": 4.0268683226741265e-07,
3595
+ "loss": 0.6202,
3596
+ "step": 501
3597
+ },
3598
+ {
3599
+ "epoch": 4.79236276849642,
3600
+ "grad_norm": 0.548893928527832,
3601
+ "learning_rate": 3.6146402536468283e-07,
3602
+ "loss": 0.6218,
3603
+ "step": 502
3604
+ },
3605
+ {
3606
+ "epoch": 4.801909307875895,
3607
+ "grad_norm": 0.5890070199966431,
3608
+ "learning_rate": 3.2245901334221895e-07,
3609
+ "loss": 0.6638,
3610
+ "step": 503
3611
+ },
3612
+ {
3613
+ "epoch": 4.81145584725537,
3614
+ "grad_norm": 0.4871584475040436,
3615
+ "learning_rate": 2.856735389008269e-07,
3616
+ "loss": 0.6107,
3617
+ "step": 504
3618
+ },
3619
+ {
3620
+ "epoch": 4.821002386634845,
3621
+ "grad_norm": 0.5432624816894531,
3622
+ "learning_rate": 2.511092455747932e-07,
3623
+ "loss": 0.583,
3624
+ "step": 505
3625
+ },
3626
+ {
3627
+ "epoch": 4.83054892601432,
3628
+ "grad_norm": 0.5359986424446106,
3629
+ "learning_rate": 2.1876767765853234e-07,
3630
+ "loss": 0.5368,
3631
+ "step": 506
3632
+ },
3633
+ {
3634
+ "epoch": 4.840095465393794,
3635
+ "grad_norm": 0.5359886288642883,
3636
+ "learning_rate": 1.8865028013751452e-07,
3637
+ "loss": 0.6259,
3638
+ "step": 507
3639
+ },
3640
+ {
3641
+ "epoch": 4.84964200477327,
3642
+ "grad_norm": 0.5111921429634094,
3643
+ "learning_rate": 1.6075839862374488e-07,
3644
+ "loss": 0.5609,
3645
+ "step": 508
3646
+ },
3647
+ {
3648
+ "epoch": 4.859188544152745,
3649
+ "grad_norm": 0.6437258124351501,
3650
+ "learning_rate": 1.3509327929563942e-07,
3651
+ "loss": 0.6395,
3652
+ "step": 509
3653
+ },
3654
+ {
3655
+ "epoch": 4.868735083532219,
3656
+ "grad_norm": 0.5992398262023926,
3657
+ "learning_rate": 1.1165606884234181e-07,
3658
+ "loss": 0.6546,
3659
+ "step": 510
3660
+ },
3661
+ {
3662
+ "epoch": 4.878281622911695,
3663
+ "grad_norm": 0.5831811428070068,
3664
+ "learning_rate": 9.044781441249207e-08,
3665
+ "loss": 0.609,
3666
+ "step": 511
3667
+ },
3668
+ {
3669
+ "epoch": 4.88782816229117,
3670
+ "grad_norm": 0.5561614632606506,
3671
+ "learning_rate": 7.146946356743067e-08,
3672
+ "loss": 0.6699,
3673
+ "step": 512
3674
+ },
3675
+ {
3676
+ "epoch": 4.897374701670644,
3677
+ "grad_norm": 0.5337750911712646,
3678
+ "learning_rate": 5.472186423889358e-08,
3679
+ "loss": 0.5614,
3680
+ "step": 513
3681
+ },
3682
+ {
3683
+ "epoch": 4.906921241050119,
3684
+ "grad_norm": 0.41179969906806946,
3685
+ "learning_rate": 4.020576469108139e-08,
3686
+ "loss": 0.4451,
3687
+ "step": 514
3688
+ },
3689
+ {
3690
+ "epoch": 4.916467780429595,
3691
+ "grad_norm": 0.4820442795753479,
3692
+ "learning_rate": 2.792181348726941e-08,
3693
+ "loss": 0.5897,
3694
+ "step": 515
3695
+ },
3696
+ {
3697
+ "epoch": 4.926014319809069,
3698
+ "grad_norm": 0.5927594900131226,
3699
+ "learning_rate": 1.7870559460814173e-08,
3700
+ "loss": 0.6788,
3701
+ "step": 516
3702
+ },
3703
+ {
3704
+ "epoch": 4.935560859188544,
3705
+ "grad_norm": 0.5302107334136963,
3706
+ "learning_rate": 1.0052451690617527e-08,
3707
+ "loss": 0.6105,
3708
+ "step": 517
3709
+ },
3710
+ {
3711
+ "epoch": 4.945107398568019,
3712
+ "grad_norm": 0.5596168041229248,
3713
+ "learning_rate": 4.46783948109819e-09,
3714
+ "loss": 0.5537,
3715
+ "step": 518
3716
+ },
3717
+ {
3718
+ "epoch": 4.954653937947494,
3719
+ "grad_norm": 0.5655501484870911,
3720
+ "learning_rate": 1.1169723465487281e-09,
3721
+ "loss": 0.6915,
3722
+ "step": 519
3723
+ },
3724
+ {
3725
+ "epoch": 4.964200477326969,
3726
+ "grad_norm": 0.5537259578704834,
3727
+ "learning_rate": 0.0,
3728
+ "loss": 0.7157,
3729
+ "step": 520
3730
+ },
3731
+ {
3732
+ "epoch": 4.964200477326969,
3733
+ "eval_loss": 0.7753176689147949,
3734
+ "eval_runtime": 12.9676,
3735
+ "eval_samples_per_second": 13.649,
3736
+ "eval_steps_per_second": 1.774,
3737
+ "step": 520
3738
  }
3739
  ],
3740
  "logging_steps": 1,
 
3749
  "should_evaluate": false,
3750
  "should_log": false,
3751
  "should_save": true,
3752
+ "should_training_stop": true
3753
  },
3754
  "attributes": {}
3755
  }
3756
  },
3757
+ "total_flos": 4.0257258893869056e+17,
3758
  "train_batch_size": 4,
3759
  "trial_name": null,
3760
  "trial_params": null