cwaud commited on
Commit
66c316d
1 Parent(s): 34dcc7b

Training in progress, epoch 3, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5deef1e641a03f70336b52c3a3efb8ebefbcdb16eb98aa75bba08e1fe4032873
3
  size 45118424
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b71fc631d5783c5d069af4fee78013a24ddd3c46bcf3fbcf09c0bb6b0ca43422
3
  size 45118424
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:461c10d7d47f690a9bad0ea0840c0e8dc09b351a785d985dc1f32b20c4068ecc
3
  size 23159290
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3d7aeb6126275e01bdf1f49341b16dde99e46a80df284da5918226f1f17b4c6f
3
  size 23159290
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a75c8ddc1097597193455db293c6504cd13a1c1919dbd19099b4ad60d6bfabb0
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eeec199a7b6b3341564794f492d5f2cdbdf672dec36d76d5b370bee2f1e7adea
3
  size 14960
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:094216163007d45050b212c7a70efe5fae78f962c8cb96c9335b05613a3052f2
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8e34bd0fa659ced3625b7171012be0646e813b4ec2a721d8ca72a36286759e37
3
  size 15024
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6fb1c6881d979182b2b854d82868818657102f22e08698828a952d734455b26f
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d98792d492243d608246176dd844c995fbd105690efb258bea318e3d46b293b6
3
  size 15024
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2e871667ddc380695d990d6c2ad338fc24321a2ffbfd570f50f1a355b41fce9f
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:38b2e32c1c6174211836d9d5f0df91a24428c9d7cb793bda03ac3d72db2083b2
3
  size 15024
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a5f01d1f39c96959ff207a7ea1e1ec315077fbaf203b474a1febad2bfc263e3a
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a9caf63dc2234ee02c5e279e3b9796b437d67d266ac0e404750a5a75ec92c982
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 2.0,
5
  "eval_steps": 500,
6
- "global_step": 134,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -945,6 +945,475 @@
945
  "learning_rate": 6.753433794837662e-05,
946
  "loss": 0.8727,
947
  "step": 134
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
948
  }
949
  ],
950
  "logging_steps": 1,
@@ -959,12 +1428,12 @@
959
  "should_evaluate": false,
960
  "should_log": false,
961
  "should_save": true,
962
- "should_training_stop": false
963
  },
964
  "attributes": {}
965
  }
966
  },
967
- "total_flos": 3.501220166757253e+17,
968
  "train_batch_size": 18,
969
  "trial_name": null,
970
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 3.0,
5
  "eval_steps": 500,
6
+ "global_step": 201,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
945
  "learning_rate": 6.753433794837662e-05,
946
  "loss": 0.8727,
947
  "step": 134
948
+ },
949
+ {
950
+ "epoch": 2.014925373134328,
951
+ "grad_norm": 0.35240477323532104,
952
+ "learning_rate": 6.626149280816546e-05,
953
+ "loss": 1.0679,
954
+ "step": 135
955
+ },
956
+ {
957
+ "epoch": 2.029850746268657,
958
+ "grad_norm": 0.33283913135528564,
959
+ "learning_rate": 6.500000000000002e-05,
960
+ "loss": 1.0181,
961
+ "step": 136
962
+ },
963
+ {
964
+ "epoch": 2.044776119402985,
965
+ "grad_norm": 0.3615310788154602,
966
+ "learning_rate": 6.375018694441084e-05,
967
+ "loss": 0.9978,
968
+ "step": 137
969
+ },
970
+ {
971
+ "epoch": 2.0597014925373136,
972
+ "grad_norm": 0.36938410997390747,
973
+ "learning_rate": 6.251237803044805e-05,
974
+ "loss": 0.9883,
975
+ "step": 138
976
+ },
977
+ {
978
+ "epoch": 2.074626865671642,
979
+ "grad_norm": 0.3807987570762634,
980
+ "learning_rate": 6.128689453148619e-05,
981
+ "loss": 0.9572,
982
+ "step": 139
983
+ },
984
+ {
985
+ "epoch": 2.08955223880597,
986
+ "grad_norm": 0.37807610630989075,
987
+ "learning_rate": 6.00740545218375e-05,
988
+ "loss": 0.9174,
989
+ "step": 140
990
+ },
991
+ {
992
+ "epoch": 2.1044776119402986,
993
+ "grad_norm": 0.37602487206459045,
994
+ "learning_rate": 5.887417279419599e-05,
995
+ "loss": 0.8229,
996
+ "step": 141
997
+ },
998
+ {
999
+ "epoch": 2.1194029850746268,
1000
+ "grad_norm": 0.35350197553634644,
1001
+ "learning_rate": 5.7687560777932735e-05,
1002
+ "loss": 0.8076,
1003
+ "step": 142
1004
+ },
1005
+ {
1006
+ "epoch": 2.1343283582089554,
1007
+ "grad_norm": 0.3940332233905792,
1008
+ "learning_rate": 5.651452645826445e-05,
1009
+ "loss": 0.788,
1010
+ "step": 143
1011
+ },
1012
+ {
1013
+ "epoch": 2.1492537313432836,
1014
+ "grad_norm": 0.46034398674964905,
1015
+ "learning_rate": 5.5355374296315995e-05,
1016
+ "loss": 0.7882,
1017
+ "step": 144
1018
+ },
1019
+ {
1020
+ "epoch": 2.1641791044776117,
1021
+ "grad_norm": 0.4225603938102722,
1022
+ "learning_rate": 5.421040515009737e-05,
1023
+ "loss": 0.7197,
1024
+ "step": 145
1025
+ },
1026
+ {
1027
+ "epoch": 2.1791044776119404,
1028
+ "grad_norm": 0.46008700132369995,
1029
+ "learning_rate": 5.3079916196416055e-05,
1030
+ "loss": 0.6569,
1031
+ "step": 146
1032
+ },
1033
+ {
1034
+ "epoch": 2.1940298507462686,
1035
+ "grad_norm": 0.41973477602005005,
1036
+ "learning_rate": 5.196420085374467e-05,
1037
+ "loss": 0.8682,
1038
+ "step": 147
1039
+ },
1040
+ {
1041
+ "epoch": 2.208955223880597,
1042
+ "grad_norm": 0.3677213191986084,
1043
+ "learning_rate": 5.0863548706064245e-05,
1044
+ "loss": 1.0353,
1045
+ "step": 148
1046
+ },
1047
+ {
1048
+ "epoch": 2.2238805970149254,
1049
+ "grad_norm": 0.37162861227989197,
1050
+ "learning_rate": 4.977824542770279e-05,
1051
+ "loss": 1.001,
1052
+ "step": 149
1053
+ },
1054
+ {
1055
+ "epoch": 2.2388059701492535,
1056
+ "grad_norm": 0.39737215638160706,
1057
+ "learning_rate": 4.870857270918825e-05,
1058
+ "loss": 0.9846,
1059
+ "step": 150
1060
+ },
1061
+ {
1062
+ "epoch": 2.253731343283582,
1063
+ "grad_norm": 0.38380125164985657,
1064
+ "learning_rate": 4.7654808184136064e-05,
1065
+ "loss": 0.9606,
1066
+ "step": 151
1067
+ },
1068
+ {
1069
+ "epoch": 2.2686567164179103,
1070
+ "grad_norm": 0.40244144201278687,
1071
+ "learning_rate": 4.6617225357188976e-05,
1072
+ "loss": 0.8571,
1073
+ "step": 152
1074
+ },
1075
+ {
1076
+ "epoch": 2.283582089552239,
1077
+ "grad_norm": 0.4329751431941986,
1078
+ "learning_rate": 4.5596093533029116e-05,
1079
+ "loss": 0.8531,
1080
+ "step": 153
1081
+ },
1082
+ {
1083
+ "epoch": 2.298507462686567,
1084
+ "grad_norm": 0.45405519008636475,
1085
+ "learning_rate": 4.459167774647993e-05,
1086
+ "loss": 0.8512,
1087
+ "step": 154
1088
+ },
1089
+ {
1090
+ "epoch": 2.3134328358208958,
1091
+ "grad_norm": 0.45590460300445557,
1092
+ "learning_rate": 4.360423869371629e-05,
1093
+ "loss": 0.8208,
1094
+ "step": 155
1095
+ },
1096
+ {
1097
+ "epoch": 2.328358208955224,
1098
+ "grad_norm": 0.4376915395259857,
1099
+ "learning_rate": 4.2634032664600895e-05,
1100
+ "loss": 0.7654,
1101
+ "step": 156
1102
+ },
1103
+ {
1104
+ "epoch": 2.343283582089552,
1105
+ "grad_norm": 0.45759543776512146,
1106
+ "learning_rate": 4.168131147616417e-05,
1107
+ "loss": 0.7857,
1108
+ "step": 157
1109
+ },
1110
+ {
1111
+ "epoch": 2.3582089552238807,
1112
+ "grad_norm": 0.4490528702735901,
1113
+ "learning_rate": 4.0746322407245066e-05,
1114
+ "loss": 0.7051,
1115
+ "step": 158
1116
+ },
1117
+ {
1118
+ "epoch": 2.373134328358209,
1119
+ "grad_norm": 0.4924563765525818,
1120
+ "learning_rate": 3.982930813430999e-05,
1121
+ "loss": 0.6348,
1122
+ "step": 159
1123
+ },
1124
+ {
1125
+ "epoch": 2.388059701492537,
1126
+ "grad_norm": 0.35502833127975464,
1127
+ "learning_rate": 3.893050666846596e-05,
1128
+ "loss": 1.1142,
1129
+ "step": 160
1130
+ },
1131
+ {
1132
+ "epoch": 2.4029850746268657,
1133
+ "grad_norm": 0.3795003890991211,
1134
+ "learning_rate": 3.805015129368492e-05,
1135
+ "loss": 1.0387,
1136
+ "step": 161
1137
+ },
1138
+ {
1139
+ "epoch": 2.417910447761194,
1140
+ "grad_norm": 0.3922593593597412,
1141
+ "learning_rate": 3.718847050625475e-05,
1142
+ "loss": 1.0402,
1143
+ "step": 162
1144
+ },
1145
+ {
1146
+ "epoch": 2.4328358208955225,
1147
+ "grad_norm": 0.4245050251483917,
1148
+ "learning_rate": 3.6345687955473166e-05,
1149
+ "loss": 0.9854,
1150
+ "step": 163
1151
+ },
1152
+ {
1153
+ "epoch": 2.4477611940298507,
1154
+ "grad_norm": 0.39441049098968506,
1155
+ "learning_rate": 3.552202238559953e-05,
1156
+ "loss": 0.9561,
1157
+ "step": 164
1158
+ },
1159
+ {
1160
+ "epoch": 2.4626865671641793,
1161
+ "grad_norm": 0.39788442850112915,
1162
+ "learning_rate": 3.4717687579079596e-05,
1163
+ "loss": 0.9104,
1164
+ "step": 165
1165
+ },
1166
+ {
1167
+ "epoch": 2.4776119402985075,
1168
+ "grad_norm": 0.4182056784629822,
1169
+ "learning_rate": 3.393289230105849e-05,
1170
+ "loss": 0.8841,
1171
+ "step": 166
1172
+ },
1173
+ {
1174
+ "epoch": 2.4925373134328357,
1175
+ "grad_norm": 0.42861151695251465,
1176
+ "learning_rate": 3.316784024519553e-05,
1177
+ "loss": 0.8055,
1178
+ "step": 167
1179
+ },
1180
+ {
1181
+ "epoch": 2.5074626865671643,
1182
+ "grad_norm": 0.42246565222740173,
1183
+ "learning_rate": 3.242272998079557e-05,
1184
+ "loss": 0.7947,
1185
+ "step": 168
1186
+ },
1187
+ {
1188
+ "epoch": 2.5223880597014925,
1189
+ "grad_norm": 0.46474263072013855,
1190
+ "learning_rate": 3.1697754901270473e-05,
1191
+ "loss": 0.8153,
1192
+ "step": 169
1193
+ },
1194
+ {
1195
+ "epoch": 2.5373134328358207,
1196
+ "grad_norm": 0.4996289312839508,
1197
+ "learning_rate": 3.099310317394359e-05,
1198
+ "loss": 0.7579,
1199
+ "step": 170
1200
+ },
1201
+ {
1202
+ "epoch": 2.5522388059701493,
1203
+ "grad_norm": 0.47399628162384033,
1204
+ "learning_rate": 3.030895769121112e-05,
1205
+ "loss": 0.6813,
1206
+ "step": 171
1207
+ },
1208
+ {
1209
+ "epoch": 2.5671641791044775,
1210
+ "grad_norm": 0.4417833983898163,
1211
+ "learning_rate": 2.9645496023072244e-05,
1212
+ "loss": 0.8971,
1213
+ "step": 172
1214
+ },
1215
+ {
1216
+ "epoch": 2.582089552238806,
1217
+ "grad_norm": 0.3691651225090027,
1218
+ "learning_rate": 2.9002890371040918e-05,
1219
+ "loss": 1.0862,
1220
+ "step": 173
1221
+ },
1222
+ {
1223
+ "epoch": 2.5970149253731343,
1224
+ "grad_norm": 0.4065288007259369,
1225
+ "learning_rate": 2.8381307523450916e-05,
1226
+ "loss": 1.031,
1227
+ "step": 174
1228
+ },
1229
+ {
1230
+ "epoch": 2.611940298507463,
1231
+ "grad_norm": 0.3905118405818939,
1232
+ "learning_rate": 2.778090881216592e-05,
1233
+ "loss": 0.9701,
1234
+ "step": 175
1235
+ },
1236
+ {
1237
+ "epoch": 2.626865671641791,
1238
+ "grad_norm": 0.39984792470932007,
1239
+ "learning_rate": 2.7201850070705826e-05,
1240
+ "loss": 0.9493,
1241
+ "step": 176
1242
+ },
1243
+ {
1244
+ "epoch": 2.6417910447761193,
1245
+ "grad_norm": 0.415585994720459,
1246
+ "learning_rate": 2.664428159380013e-05,
1247
+ "loss": 0.9129,
1248
+ "step": 177
1249
+ },
1250
+ {
1251
+ "epoch": 2.656716417910448,
1252
+ "grad_norm": 0.42336076498031616,
1253
+ "learning_rate": 2.610834809837891e-05,
1254
+ "loss": 0.8791,
1255
+ "step": 178
1256
+ },
1257
+ {
1258
+ "epoch": 2.671641791044776,
1259
+ "grad_norm": 0.45662274956703186,
1260
+ "learning_rate": 2.5594188686011615e-05,
1261
+ "loss": 0.871,
1262
+ "step": 179
1263
+ },
1264
+ {
1265
+ "epoch": 2.6865671641791042,
1266
+ "grad_norm": 0.4160149395465851,
1267
+ "learning_rate": 2.5101936806803117e-05,
1268
+ "loss": 0.7626,
1269
+ "step": 180
1270
+ },
1271
+ {
1272
+ "epoch": 2.701492537313433,
1273
+ "grad_norm": 0.43893417716026306,
1274
+ "learning_rate": 2.463172022475691e-05,
1275
+ "loss": 0.8046,
1276
+ "step": 181
1277
+ },
1278
+ {
1279
+ "epoch": 2.716417910447761,
1280
+ "grad_norm": 0.4579525291919708,
1281
+ "learning_rate": 2.418366098461374e-05,
1282
+ "loss": 0.7713,
1283
+ "step": 182
1284
+ },
1285
+ {
1286
+ "epoch": 2.7313432835820897,
1287
+ "grad_norm": 0.4761490523815155,
1288
+ "learning_rate": 2.3757875380175044e-05,
1289
+ "loss": 0.69,
1290
+ "step": 183
1291
+ },
1292
+ {
1293
+ "epoch": 2.746268656716418,
1294
+ "grad_norm": 0.5591773986816406,
1295
+ "learning_rate": 2.3354473924118842e-05,
1296
+ "loss": 0.6075,
1297
+ "step": 184
1298
+ },
1299
+ {
1300
+ "epoch": 2.7611940298507465,
1301
+ "grad_norm": 0.3821795880794525,
1302
+ "learning_rate": 2.297356131931614e-05,
1303
+ "loss": 1.0839,
1304
+ "step": 185
1305
+ },
1306
+ {
1307
+ "epoch": 2.7761194029850746,
1308
+ "grad_norm": 0.37466174364089966,
1309
+ "learning_rate": 2.261523643165532e-05,
1310
+ "loss": 1.0221,
1311
+ "step": 186
1312
+ },
1313
+ {
1314
+ "epoch": 2.791044776119403,
1315
+ "grad_norm": 0.3825508654117584,
1316
+ "learning_rate": 2.22795922643815e-05,
1317
+ "loss": 1.0,
1318
+ "step": 187
1319
+ },
1320
+ {
1321
+ "epoch": 2.8059701492537314,
1322
+ "grad_norm": 0.41949060559272766,
1323
+ "learning_rate": 2.196671593395749e-05,
1324
+ "loss": 0.9473,
1325
+ "step": 188
1326
+ },
1327
+ {
1328
+ "epoch": 2.8208955223880596,
1329
+ "grad_norm": 0.42044076323509216,
1330
+ "learning_rate": 2.167668864745279e-05,
1331
+ "loss": 0.8887,
1332
+ "step": 189
1333
+ },
1334
+ {
1335
+ "epoch": 2.835820895522388,
1336
+ "grad_norm": 0.4112393856048584,
1337
+ "learning_rate": 2.1409585681466204e-05,
1338
+ "loss": 0.8724,
1339
+ "step": 190
1340
+ },
1341
+ {
1342
+ "epoch": 2.8507462686567164,
1343
+ "grad_norm": 0.45745235681533813,
1344
+ "learning_rate": 2.1165476362587846e-05,
1345
+ "loss": 0.8562,
1346
+ "step": 191
1347
+ },
1348
+ {
1349
+ "epoch": 2.8656716417910446,
1350
+ "grad_norm": 0.4491675794124603,
1351
+ "learning_rate": 2.09444240494054e-05,
1352
+ "loss": 0.8593,
1353
+ "step": 192
1354
+ },
1355
+ {
1356
+ "epoch": 2.8805970149253732,
1357
+ "grad_norm": 0.4317816197872162,
1358
+ "learning_rate": 2.0746486116059418e-05,
1359
+ "loss": 0.7933,
1360
+ "step": 193
1361
+ },
1362
+ {
1363
+ "epoch": 2.8955223880597014,
1364
+ "grad_norm": 0.46604618430137634,
1365
+ "learning_rate": 2.0571713937351834e-05,
1366
+ "loss": 0.7903,
1367
+ "step": 194
1368
+ },
1369
+ {
1370
+ "epoch": 2.91044776119403,
1371
+ "grad_norm": 0.48169732093811035,
1372
+ "learning_rate": 2.0420152875411624e-05,
1373
+ "loss": 0.7668,
1374
+ "step": 195
1375
+ },
1376
+ {
1377
+ "epoch": 2.925373134328358,
1378
+ "grad_norm": 0.4627380073070526,
1379
+ "learning_rate": 2.0291842267921108e-05,
1380
+ "loss": 0.6404,
1381
+ "step": 196
1382
+ },
1383
+ {
1384
+ "epoch": 2.9402985074626864,
1385
+ "grad_norm": 0.44821980595588684,
1386
+ "learning_rate": 2.0186815417905787e-05,
1387
+ "loss": 0.8672,
1388
+ "step": 197
1389
+ },
1390
+ {
1391
+ "epoch": 2.955223880597015,
1392
+ "grad_norm": 0.3909732699394226,
1393
+ "learning_rate": 2.0105099585090603e-05,
1394
+ "loss": 0.9487,
1395
+ "step": 198
1396
+ },
1397
+ {
1398
+ "epoch": 2.970149253731343,
1399
+ "grad_norm": 0.4137306213378906,
1400
+ "learning_rate": 2.0046715978824664e-05,
1401
+ "loss": 0.8438,
1402
+ "step": 199
1403
+ },
1404
+ {
1405
+ "epoch": 2.9850746268656714,
1406
+ "grad_norm": 0.4548170864582062,
1407
+ "learning_rate": 2.001167975257628e-05,
1408
+ "loss": 0.8052,
1409
+ "step": 200
1410
+ },
1411
+ {
1412
+ "epoch": 3.0,
1413
+ "grad_norm": 0.4302070140838623,
1414
+ "learning_rate": 2e-05,
1415
+ "loss": 0.8199,
1416
+ "step": 201
1417
  }
1418
  ],
1419
  "logging_steps": 1,
 
1428
  "should_evaluate": false,
1429
  "should_log": false,
1430
  "should_save": true,
1431
+ "should_training_stop": true
1432
  },
1433
  "attributes": {}
1434
  }
1435
  },
1436
+ "total_flos": 5.25183025013588e+17,
1437
  "train_batch_size": 18,
1438
  "trial_name": null,
1439
  "trial_params": null