Federic commited on
Commit
cc96b05
1 Parent(s): 4f92d7b

Training in progress, step 150, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7ddb0a3040e4bc33b45e5f683357414fba640069beb3aef7d35d83b45853605b
3
  size 536906096
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:034e187a68a928df60de8335d8df6f9d752c366c792ade88c4c13240c4166357
3
  size 536906096
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:df7c014a1bf191c56fdbf386cd19429a2f9f3882d634e9dfcfb1f27ac32ff6ba
3
  size 269267284
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e325c8d6e17b7311ab280d18a60d02cca13364708dc86af9066b50e60d2b98d3
3
  size 269267284
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d8cc3c3121e910c0b9dfe5b58766894e33e13b11c23a5da31d662071e56c6dec
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:21d1ae586164ef573a6c2ef2af404c49503a0d50cf74be438545d9458602c094
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:efdbfe8676cd24a75fed7e6e38125bb1bb838a4b70c0fbf7469557659d9b1fec
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d31dc31a119769737d72f3df4c8cdf99522596cafc12bf2eea05a4ff374f599c
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.5,
5
  "eval_steps": 500,
6
- "global_step": 125,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -757,13 +757,163 @@
757
  "learning_rate": 0.0002,
758
  "loss": 0.4266,
759
  "step": 125
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
760
  }
761
  ],
762
  "logging_steps": 1,
763
  "max_steps": 250,
764
  "num_train_epochs": 1,
765
  "save_steps": 25,
766
- "total_flos": 9948052490158080.0,
767
  "trial_name": null,
768
  "trial_params": null
769
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.6,
5
  "eval_steps": 500,
6
+ "global_step": 150,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
757
  "learning_rate": 0.0002,
758
  "loss": 0.4266,
759
  "step": 125
760
+ },
761
+ {
762
+ "epoch": 0.5,
763
+ "learning_rate": 0.0002,
764
+ "loss": 0.4661,
765
+ "step": 126
766
+ },
767
+ {
768
+ "epoch": 0.51,
769
+ "learning_rate": 0.0002,
770
+ "loss": 0.3564,
771
+ "step": 127
772
+ },
773
+ {
774
+ "epoch": 0.51,
775
+ "learning_rate": 0.0002,
776
+ "loss": 0.3744,
777
+ "step": 128
778
+ },
779
+ {
780
+ "epoch": 0.52,
781
+ "learning_rate": 0.0002,
782
+ "loss": 0.3771,
783
+ "step": 129
784
+ },
785
+ {
786
+ "epoch": 0.52,
787
+ "learning_rate": 0.0002,
788
+ "loss": 0.4058,
789
+ "step": 130
790
+ },
791
+ {
792
+ "epoch": 0.52,
793
+ "learning_rate": 0.0002,
794
+ "loss": 0.3986,
795
+ "step": 131
796
+ },
797
+ {
798
+ "epoch": 0.53,
799
+ "learning_rate": 0.0002,
800
+ "loss": 0.3803,
801
+ "step": 132
802
+ },
803
+ {
804
+ "epoch": 0.53,
805
+ "learning_rate": 0.0002,
806
+ "loss": 0.4069,
807
+ "step": 133
808
+ },
809
+ {
810
+ "epoch": 0.54,
811
+ "learning_rate": 0.0002,
812
+ "loss": 0.3484,
813
+ "step": 134
814
+ },
815
+ {
816
+ "epoch": 0.54,
817
+ "learning_rate": 0.0002,
818
+ "loss": 0.3777,
819
+ "step": 135
820
+ },
821
+ {
822
+ "epoch": 0.54,
823
+ "learning_rate": 0.0002,
824
+ "loss": 0.3637,
825
+ "step": 136
826
+ },
827
+ {
828
+ "epoch": 0.55,
829
+ "learning_rate": 0.0002,
830
+ "loss": 0.4426,
831
+ "step": 137
832
+ },
833
+ {
834
+ "epoch": 0.55,
835
+ "learning_rate": 0.0002,
836
+ "loss": 0.3648,
837
+ "step": 138
838
+ },
839
+ {
840
+ "epoch": 0.56,
841
+ "learning_rate": 0.0002,
842
+ "loss": 0.3665,
843
+ "step": 139
844
+ },
845
+ {
846
+ "epoch": 0.56,
847
+ "learning_rate": 0.0002,
848
+ "loss": 0.3592,
849
+ "step": 140
850
+ },
851
+ {
852
+ "epoch": 0.56,
853
+ "learning_rate": 0.0002,
854
+ "loss": 0.3375,
855
+ "step": 141
856
+ },
857
+ {
858
+ "epoch": 0.57,
859
+ "learning_rate": 0.0002,
860
+ "loss": 0.3669,
861
+ "step": 142
862
+ },
863
+ {
864
+ "epoch": 0.57,
865
+ "learning_rate": 0.0002,
866
+ "loss": 0.3499,
867
+ "step": 143
868
+ },
869
+ {
870
+ "epoch": 0.58,
871
+ "learning_rate": 0.0002,
872
+ "loss": 0.3832,
873
+ "step": 144
874
+ },
875
+ {
876
+ "epoch": 0.58,
877
+ "learning_rate": 0.0002,
878
+ "loss": 0.2947,
879
+ "step": 145
880
+ },
881
+ {
882
+ "epoch": 0.58,
883
+ "learning_rate": 0.0002,
884
+ "loss": 0.3493,
885
+ "step": 146
886
+ },
887
+ {
888
+ "epoch": 0.59,
889
+ "learning_rate": 0.0002,
890
+ "loss": 0.2903,
891
+ "step": 147
892
+ },
893
+ {
894
+ "epoch": 0.59,
895
+ "learning_rate": 0.0002,
896
+ "loss": 0.2703,
897
+ "step": 148
898
+ },
899
+ {
900
+ "epoch": 0.6,
901
+ "learning_rate": 0.0002,
902
+ "loss": 0.2989,
903
+ "step": 149
904
+ },
905
+ {
906
+ "epoch": 0.6,
907
+ "learning_rate": 0.0002,
908
+ "loss": 0.2957,
909
+ "step": 150
910
  }
911
  ],
912
  "logging_steps": 1,
913
  "max_steps": 250,
914
  "num_train_epochs": 1,
915
  "save_steps": 25,
916
+ "total_flos": 1.165066384195584e+16,
917
  "trial_name": null,
918
  "trial_params": null
919
  }