Federic commited on
Commit
e29c02c
1 Parent(s): 1f9566c

Training in progress, step 175, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:034e187a68a928df60de8335d8df6f9d752c366c792ade88c4c13240c4166357
3
  size 536906096
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d2486faa614fefac339054a626954e901ed19edcd5c4630e13d6f17fcc4e122
3
  size 536906096
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e325c8d6e17b7311ab280d18a60d02cca13364708dc86af9066b50e60d2b98d3
3
  size 269267284
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a9a202cd852528ab00873ca3f1d4f8e386a8264d607612cae77ab5ffeccc6872
3
  size 269267284
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:21d1ae586164ef573a6c2ef2af404c49503a0d50cf74be438545d9458602c094
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b2e3bd96c9227f8005659a008a5de4b22b3d54ac87368e34f44e67994ab38530
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d31dc31a119769737d72f3df4c8cdf99522596cafc12bf2eea05a4ff374f599c
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c17c000dd3ad474c8365253b4464489310fed0d13c745556138280174b380deb
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.6,
5
  "eval_steps": 500,
6
- "global_step": 150,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -907,13 +907,163 @@
907
  "learning_rate": 0.0002,
908
  "loss": 0.2957,
909
  "step": 150
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
910
  }
911
  ],
912
  "logging_steps": 1,
913
  "max_steps": 250,
914
  "num_train_epochs": 1,
915
  "save_steps": 25,
916
- "total_flos": 1.165066384195584e+16,
917
  "trial_name": null,
918
  "trial_params": null
919
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.7,
5
  "eval_steps": 500,
6
+ "global_step": 175,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
907
  "learning_rate": 0.0002,
908
  "loss": 0.2957,
909
  "step": 150
910
+ },
911
+ {
912
+ "epoch": 0.6,
913
+ "learning_rate": 0.0002,
914
+ "loss": 0.5856,
915
+ "step": 151
916
+ },
917
+ {
918
+ "epoch": 0.61,
919
+ "learning_rate": 0.0002,
920
+ "loss": 0.5389,
921
+ "step": 152
922
+ },
923
+ {
924
+ "epoch": 0.61,
925
+ "learning_rate": 0.0002,
926
+ "loss": 0.5347,
927
+ "step": 153
928
+ },
929
+ {
930
+ "epoch": 0.62,
931
+ "learning_rate": 0.0002,
932
+ "loss": 0.4574,
933
+ "step": 154
934
+ },
935
+ {
936
+ "epoch": 0.62,
937
+ "learning_rate": 0.0002,
938
+ "loss": 0.5172,
939
+ "step": 155
940
+ },
941
+ {
942
+ "epoch": 0.62,
943
+ "learning_rate": 0.0002,
944
+ "loss": 0.4515,
945
+ "step": 156
946
+ },
947
+ {
948
+ "epoch": 0.63,
949
+ "learning_rate": 0.0002,
950
+ "loss": 0.4383,
951
+ "step": 157
952
+ },
953
+ {
954
+ "epoch": 0.63,
955
+ "learning_rate": 0.0002,
956
+ "loss": 0.5203,
957
+ "step": 158
958
+ },
959
+ {
960
+ "epoch": 0.64,
961
+ "learning_rate": 0.0002,
962
+ "loss": 0.5767,
963
+ "step": 159
964
+ },
965
+ {
966
+ "epoch": 0.64,
967
+ "learning_rate": 0.0002,
968
+ "loss": 0.4975,
969
+ "step": 160
970
+ },
971
+ {
972
+ "epoch": 0.64,
973
+ "learning_rate": 0.0002,
974
+ "loss": 0.4923,
975
+ "step": 161
976
+ },
977
+ {
978
+ "epoch": 0.65,
979
+ "learning_rate": 0.0002,
980
+ "loss": 0.4294,
981
+ "step": 162
982
+ },
983
+ {
984
+ "epoch": 0.65,
985
+ "learning_rate": 0.0002,
986
+ "loss": 0.4449,
987
+ "step": 163
988
+ },
989
+ {
990
+ "epoch": 0.66,
991
+ "learning_rate": 0.0002,
992
+ "loss": 0.4739,
993
+ "step": 164
994
+ },
995
+ {
996
+ "epoch": 0.66,
997
+ "learning_rate": 0.0002,
998
+ "loss": 0.508,
999
+ "step": 165
1000
+ },
1001
+ {
1002
+ "epoch": 0.66,
1003
+ "learning_rate": 0.0002,
1004
+ "loss": 0.413,
1005
+ "step": 166
1006
+ },
1007
+ {
1008
+ "epoch": 0.67,
1009
+ "learning_rate": 0.0002,
1010
+ "loss": 0.4587,
1011
+ "step": 167
1012
+ },
1013
+ {
1014
+ "epoch": 0.67,
1015
+ "learning_rate": 0.0002,
1016
+ "loss": 0.4465,
1017
+ "step": 168
1018
+ },
1019
+ {
1020
+ "epoch": 0.68,
1021
+ "learning_rate": 0.0002,
1022
+ "loss": 0.4142,
1023
+ "step": 169
1024
+ },
1025
+ {
1026
+ "epoch": 0.68,
1027
+ "learning_rate": 0.0002,
1028
+ "loss": 0.4477,
1029
+ "step": 170
1030
+ },
1031
+ {
1032
+ "epoch": 0.68,
1033
+ "learning_rate": 0.0002,
1034
+ "loss": 0.4854,
1035
+ "step": 171
1036
+ },
1037
+ {
1038
+ "epoch": 0.69,
1039
+ "learning_rate": 0.0002,
1040
+ "loss": 0.4451,
1041
+ "step": 172
1042
+ },
1043
+ {
1044
+ "epoch": 0.69,
1045
+ "learning_rate": 0.0002,
1046
+ "loss": 0.4283,
1047
+ "step": 173
1048
+ },
1049
+ {
1050
+ "epoch": 0.7,
1051
+ "learning_rate": 0.0002,
1052
+ "loss": 0.4031,
1053
+ "step": 174
1054
+ },
1055
+ {
1056
+ "epoch": 0.7,
1057
+ "learning_rate": 0.0002,
1058
+ "loss": 0.395,
1059
+ "step": 175
1060
  }
1061
  ],
1062
  "logging_steps": 1,
1063
  "max_steps": 250,
1064
  "num_train_epochs": 1,
1065
  "save_steps": 25,
1066
+ "total_flos": 1.382556660645888e+16,
1067
  "trial_name": null,
1068
  "trial_params": null
1069
  }