Nadav commited on
Commit
14b16a5
1 Parent(s): 27631c9

Training in progress, step 20000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b6efcea1f718e992622738eebec6655b308d8abaef61a2f7a2af6fa93b0ce7b9
3
  size 893438545
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:227bb459455ca9395e8701d0b76fa538ae5551af488261f8a6c040d98a6dded3
3
  size 893438545
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d606552814a4ba7950abd9e209968958ecfd2f24d817fd26ef28c35737c8054d
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9ee7451dd5ec81a01259d5968e23ec645d0c3962b135c90aa707004f25bc4403
3
  size 449471589
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:45c1cdccf174ece9de604197435ede54ec32a3b78eb6226070a07ab576a5585d
3
- size 15523
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc90007b132e8f26a3d447fb1e8dee59d5ce3dfd07c853ec26d9c159e851307d
3
+ size 15459
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dd4ea5b486ac1940989e451ce41280116176be849f45b93fb6c79bbd63b9b878
3
  size 559
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:56d7925c4d371c4e7cd499e68da31588c0a08bf1a905f213b26db017a95291f6
3
  size 559
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2d524b4cb1391ef7e50966a3eef7ac714ecb6ed976eedf165d99a07d29c73b99
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a6464ca3b88e496d5d945a7955ac6cabd3c94f9611fe337e2e8a9af945aa679a
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.870852564660803,
5
- "global_step": 10000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -142,11 +142,147 @@
142
  "eval_samples_per_second": 46.975,
143
  "eval_steps_per_second": 0.742,
144
  "step": 10000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
145
  }
146
  ],
147
  "max_steps": 100000,
148
  "num_train_epochs": 9,
149
- "total_flos": 4.709861347295232e+20,
150
  "trial_name": null,
151
  "trial_params": null
152
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.8705493166187864,
5
+ "global_step": 20000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
142
  "eval_samples_per_second": 46.975,
143
  "eval_steps_per_second": 0.742,
144
  "step": 10000
145
+ },
146
+ {
147
+ "epoch": 0.04,
148
+ "learning_rate": 9.833541625738316e-05,
149
+ "loss": 0.3929,
150
+ "step": 10500
151
+ },
152
+ {
153
+ "epoch": 0.09,
154
+ "learning_rate": 9.809924007281187e-05,
155
+ "loss": 0.3906,
156
+ "step": 11000
157
+ },
158
+ {
159
+ "epoch": 0.13,
160
+ "learning_rate": 9.78525261765341e-05,
161
+ "loss": 0.3899,
162
+ "step": 11500
163
+ },
164
+ {
165
+ "epoch": 0.17,
166
+ "learning_rate": 9.759533544151208e-05,
167
+ "loss": 0.3892,
168
+ "step": 12000
169
+ },
170
+ {
171
+ "epoch": 0.22,
172
+ "learning_rate": 9.732773132571125e-05,
173
+ "loss": 0.3889,
174
+ "step": 12500
175
+ },
176
+ {
177
+ "epoch": 0.26,
178
+ "learning_rate": 9.705034604088048e-05,
179
+ "loss": 0.3865,
180
+ "step": 13000
181
+ },
182
+ {
183
+ "epoch": 0.3,
184
+ "learning_rate": 9.676213628592508e-05,
185
+ "loss": 0.3865,
186
+ "step": 13500
187
+ },
188
+ {
189
+ "epoch": 0.35,
190
+ "learning_rate": 9.64637187296151e-05,
191
+ "loss": 0.3867,
192
+ "step": 14000
193
+ },
194
+ {
195
+ "epoch": 0.39,
196
+ "learning_rate": 9.615516700201724e-05,
197
+ "loss": 0.3858,
198
+ "step": 14500
199
+ },
200
+ {
201
+ "epoch": 0.44,
202
+ "learning_rate": 9.583720443927501e-05,
203
+ "loss": 0.3853,
204
+ "step": 15000
205
+ },
206
+ {
207
+ "epoch": 0.44,
208
+ "eval_loss": 0.36506548523902893,
209
+ "eval_runtime": 96.1419,
210
+ "eval_samples_per_second": 52.006,
211
+ "eval_steps_per_second": 0.822,
212
+ "step": 15000
213
+ },
214
+ {
215
+ "epoch": 0.48,
216
+ "learning_rate": 9.550863512110018e-05,
217
+ "loss": 0.3844,
218
+ "step": 15500
219
+ },
220
+ {
221
+ "epoch": 0.52,
222
+ "learning_rate": 9.517016728422667e-05,
223
+ "loss": 0.3834,
224
+ "step": 16000
225
+ },
226
+ {
227
+ "epoch": 0.57,
228
+ "learning_rate": 9.482188444052858e-05,
229
+ "loss": 0.3826,
230
+ "step": 16500
231
+ },
232
+ {
233
+ "epoch": 0.61,
234
+ "learning_rate": 9.446387252358614e-05,
235
+ "loss": 0.3824,
236
+ "step": 17000
237
+ },
238
+ {
239
+ "epoch": 0.65,
240
+ "learning_rate": 9.40962198674828e-05,
241
+ "loss": 0.3825,
242
+ "step": 17500
243
+ },
244
+ {
245
+ "epoch": 0.7,
246
+ "learning_rate": 9.371901718501017e-05,
247
+ "loss": 0.3804,
248
+ "step": 18000
249
+ },
250
+ {
251
+ "epoch": 0.74,
252
+ "learning_rate": 9.333314023958391e-05,
253
+ "loss": 0.3803,
254
+ "step": 18500
255
+ },
256
+ {
257
+ "epoch": 0.78,
258
+ "learning_rate": 9.293713767144707e-05,
259
+ "loss": 0.381,
260
+ "step": 19000
261
+ },
262
+ {
263
+ "epoch": 0.83,
264
+ "learning_rate": 9.253187106312908e-05,
265
+ "loss": 0.3793,
266
+ "step": 19500
267
+ },
268
+ {
269
+ "epoch": 0.87,
270
+ "learning_rate": 9.211744040810141e-05,
271
+ "loss": 0.3793,
272
+ "step": 20000
273
+ },
274
+ {
275
+ "epoch": 0.87,
276
+ "eval_loss": 0.35968872904777527,
277
+ "eval_runtime": 95.0235,
278
+ "eval_samples_per_second": 52.619,
279
+ "eval_steps_per_second": 0.831,
280
+ "step": 20000
281
  }
282
  ],
283
  "max_steps": 100000,
284
  "num_train_epochs": 9,
285
+ "total_flos": 9.419722694590464e+20,
286
  "trial_name": null,
287
  "trial_params": null
288
  }
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cd49a349d77c455473f49543bf6e96561e293025b2e65d9b3c90c0be2504db32
3
  size 5615
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb90956c2fc47703463b50fd267631dda0878fd140ff91c80ab2b7c4fc6e5708
3
  size 5615
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d606552814a4ba7950abd9e209968958ecfd2f24d817fd26ef28c35737c8054d
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9ee7451dd5ec81a01259d5968e23ec645d0c3962b135c90aa707004f25bc4403
3
  size 449471589
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cd49a349d77c455473f49543bf6e96561e293025b2e65d9b3c90c0be2504db32
3
  size 5615
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb90956c2fc47703463b50fd267631dda0878fd140ff91c80ab2b7c4fc6e5708
3
  size 5615