abletobetable commited on
Commit
e126b28
1 Parent(s): 8663160

Training in progress, step 500

Browse files
all_results.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 144.44,
3
+ "eval_avg_rouge_f": 0.3778,
4
+ "eval_gen_len": 16.125,
5
+ "eval_loss": 2.663809299468994,
6
+ "eval_rouge-1": 0.4169,
7
+ "eval_rouge-2": 0.3153,
8
+ "eval_rouge-l": 0.4013,
9
+ "eval_runtime": 3.1537,
10
+ "eval_samples_per_second": 2.537,
11
+ "eval_steps_per_second": 2.537
12
+ }
eval_results.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 144.44,
3
+ "eval_avg_rouge_f": 0.3778,
4
+ "eval_gen_len": 16.125,
5
+ "eval_loss": 2.663809299468994,
6
+ "eval_rouge-1": 0.4169,
7
+ "eval_rouge-2": 0.3153,
8
+ "eval_rouge-l": 0.4013,
9
+ "eval_runtime": 3.1537,
10
+ "eval_samples_per_second": 2.537,
11
+ "eval_steps_per_second": 2.537
12
+ }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9684f41f1fa34b7c104aed8c068892f0216f3c3e2b5d51d955bd7ff12952097e
3
  size 977334453
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c05fd1a49472594926101c92d9bcc73ae7e57a4200e2e029e7eaf73d6b95cee1
3
  size 977334453
runs/May24_13-32-48_f2ddba3b469d/events.out.tfevents.1684936852.f2ddba3b469d.298.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:42ef5a9b37fa8b6416db78013d774033f65edc450b7993c22d316ac43cec2072
3
+ size 618
runs/May24_14-01-43_f2ddba3b469d/1684936921.7041402/events.out.tfevents.1684936921.f2ddba3b469d.298.4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ffa078a46b1a5a64393fbf4a47c6d8d948c30ab210eda9a7f22f3aea6b897635
3
+ size 6266
runs/May24_14-01-43_f2ddba3b469d/events.out.tfevents.1684936921.f2ddba3b469d.298.3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:074bfe1159188fdefa2728cbb138e66f52068771bffb6d36b9cf15c0bc1554ff
3
+ size 11274
trainer_state.json ADDED
@@ -0,0 +1,1013 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.3778,
3
+ "best_model_checkpoint": "rut5-base-absum-tech-support-calls/checkpoint-2000",
4
+ "epoch": 144.44444444444446,
5
+ "global_step": 2600,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 2.78,
12
+ "learning_rate": 5e-06,
13
+ "loss": 2.7022,
14
+ "step": 50
15
+ },
16
+ {
17
+ "epoch": 2.78,
18
+ "eval_avg_rouge_f": 0.0,
19
+ "eval_gen_len": 6.875,
20
+ "eval_loss": 2.296959400177002,
21
+ "eval_rouge-1": 0.0,
22
+ "eval_rouge-2": 0.0,
23
+ "eval_rouge-l": 0.0,
24
+ "eval_runtime": 3.1977,
25
+ "eval_samples_per_second": 2.502,
26
+ "eval_steps_per_second": 2.502,
27
+ "step": 50
28
+ },
29
+ {
30
+ "epoch": 5.56,
31
+ "learning_rate": 1e-05,
32
+ "loss": 2.2932,
33
+ "step": 100
34
+ },
35
+ {
36
+ "epoch": 5.56,
37
+ "eval_avg_rouge_f": 0.0,
38
+ "eval_gen_len": 10.375,
39
+ "eval_loss": 1.8183443546295166,
40
+ "eval_rouge-1": 0.0,
41
+ "eval_rouge-2": 0.0,
42
+ "eval_rouge-l": 0.0,
43
+ "eval_runtime": 1.7212,
44
+ "eval_samples_per_second": 4.648,
45
+ "eval_steps_per_second": 4.648,
46
+ "step": 100
47
+ },
48
+ {
49
+ "epoch": 8.33,
50
+ "learning_rate": 1.5000000000000002e-05,
51
+ "loss": 1.8234,
52
+ "step": 150
53
+ },
54
+ {
55
+ "epoch": 8.33,
56
+ "eval_avg_rouge_f": 0.3018,
57
+ "eval_gen_len": 14.0,
58
+ "eval_loss": 1.4890449047088623,
59
+ "eval_rouge-1": 0.3588,
60
+ "eval_rouge-2": 0.2205,
61
+ "eval_rouge-l": 0.3262,
62
+ "eval_runtime": 2.5527,
63
+ "eval_samples_per_second": 3.134,
64
+ "eval_steps_per_second": 3.134,
65
+ "step": 150
66
+ },
67
+ {
68
+ "epoch": 11.11,
69
+ "learning_rate": 2e-05,
70
+ "loss": 1.3727,
71
+ "step": 200
72
+ },
73
+ {
74
+ "epoch": 11.11,
75
+ "eval_avg_rouge_f": 0.2771,
76
+ "eval_gen_len": 12.375,
77
+ "eval_loss": 1.373950719833374,
78
+ "eval_rouge-1": 0.3493,
79
+ "eval_rouge-2": 0.1653,
80
+ "eval_rouge-l": 0.3167,
81
+ "eval_runtime": 2.0484,
82
+ "eval_samples_per_second": 3.906,
83
+ "eval_steps_per_second": 3.906,
84
+ "step": 200
85
+ },
86
+ {
87
+ "epoch": 13.89,
88
+ "learning_rate": 1.9767441860465116e-05,
89
+ "loss": 1.0367,
90
+ "step": 250
91
+ },
92
+ {
93
+ "epoch": 13.89,
94
+ "eval_avg_rouge_f": 0.1974,
95
+ "eval_gen_len": 15.375,
96
+ "eval_loss": 1.3832566738128662,
97
+ "eval_rouge-1": 0.2607,
98
+ "eval_rouge-2": 0.0984,
99
+ "eval_rouge-l": 0.2331,
100
+ "eval_runtime": 3.0521,
101
+ "eval_samples_per_second": 2.621,
102
+ "eval_steps_per_second": 2.621,
103
+ "step": 250
104
+ },
105
+ {
106
+ "epoch": 16.67,
107
+ "learning_rate": 1.9534883720930235e-05,
108
+ "loss": 0.841,
109
+ "step": 300
110
+ },
111
+ {
112
+ "epoch": 16.67,
113
+ "eval_avg_rouge_f": 0.3055,
114
+ "eval_gen_len": 16.0,
115
+ "eval_loss": 1.3515713214874268,
116
+ "eval_rouge-1": 0.3713,
117
+ "eval_rouge-2": 0.1857,
118
+ "eval_rouge-l": 0.3594,
119
+ "eval_runtime": 2.4808,
120
+ "eval_samples_per_second": 3.225,
121
+ "eval_steps_per_second": 3.225,
122
+ "step": 300
123
+ },
124
+ {
125
+ "epoch": 19.44,
126
+ "learning_rate": 1.9302325581395353e-05,
127
+ "loss": 0.7182,
128
+ "step": 350
129
+ },
130
+ {
131
+ "epoch": 19.44,
132
+ "eval_avg_rouge_f": 0.2672,
133
+ "eval_gen_len": 16.125,
134
+ "eval_loss": 1.3606797456741333,
135
+ "eval_rouge-1": 0.3352,
136
+ "eval_rouge-2": 0.143,
137
+ "eval_rouge-l": 0.3233,
138
+ "eval_runtime": 2.5469,
139
+ "eval_samples_per_second": 3.141,
140
+ "eval_steps_per_second": 3.141,
141
+ "step": 350
142
+ },
143
+ {
144
+ "epoch": 22.22,
145
+ "learning_rate": 1.9069767441860468e-05,
146
+ "loss": 0.5102,
147
+ "step": 400
148
+ },
149
+ {
150
+ "epoch": 22.22,
151
+ "eval_avg_rouge_f": 0.2849,
152
+ "eval_gen_len": 16.625,
153
+ "eval_loss": 1.3673444986343384,
154
+ "eval_rouge-1": 0.36,
155
+ "eval_rouge-2": 0.1597,
156
+ "eval_rouge-l": 0.3349,
157
+ "eval_runtime": 2.6789,
158
+ "eval_samples_per_second": 2.986,
159
+ "eval_steps_per_second": 2.986,
160
+ "step": 400
161
+ },
162
+ {
163
+ "epoch": 25.0,
164
+ "learning_rate": 1.8837209302325582e-05,
165
+ "loss": 0.4595,
166
+ "step": 450
167
+ },
168
+ {
169
+ "epoch": 25.0,
170
+ "eval_avg_rouge_f": 0.3228,
171
+ "eval_gen_len": 17.125,
172
+ "eval_loss": 1.371541976928711,
173
+ "eval_rouge-1": 0.3892,
174
+ "eval_rouge-2": 0.2153,
175
+ "eval_rouge-l": 0.3641,
176
+ "eval_runtime": 2.6184,
177
+ "eval_samples_per_second": 3.055,
178
+ "eval_steps_per_second": 3.055,
179
+ "step": 450
180
+ },
181
+ {
182
+ "epoch": 27.78,
183
+ "learning_rate": 1.86046511627907e-05,
184
+ "loss": 0.3886,
185
+ "step": 500
186
+ },
187
+ {
188
+ "epoch": 27.78,
189
+ "eval_avg_rouge_f": 0.3252,
190
+ "eval_gen_len": 16.375,
191
+ "eval_loss": 1.4634039402008057,
192
+ "eval_rouge-1": 0.3801,
193
+ "eval_rouge-2": 0.2274,
194
+ "eval_rouge-l": 0.3682,
195
+ "eval_runtime": 2.8205,
196
+ "eval_samples_per_second": 2.836,
197
+ "eval_steps_per_second": 2.836,
198
+ "step": 500
199
+ },
200
+ {
201
+ "epoch": 30.56,
202
+ "learning_rate": 1.8372093023255815e-05,
203
+ "loss": 0.3158,
204
+ "step": 550
205
+ },
206
+ {
207
+ "epoch": 30.56,
208
+ "eval_avg_rouge_f": 0.331,
209
+ "eval_gen_len": 16.75,
210
+ "eval_loss": 1.5123608112335205,
211
+ "eval_rouge-1": 0.3938,
212
+ "eval_rouge-2": 0.2319,
213
+ "eval_rouge-l": 0.3672,
214
+ "eval_runtime": 3.0478,
215
+ "eval_samples_per_second": 2.625,
216
+ "eval_steps_per_second": 2.625,
217
+ "step": 550
218
+ },
219
+ {
220
+ "epoch": 33.33,
221
+ "learning_rate": 1.813953488372093e-05,
222
+ "loss": 0.2687,
223
+ "step": 600
224
+ },
225
+ {
226
+ "epoch": 33.33,
227
+ "eval_avg_rouge_f": 0.3468,
228
+ "eval_gen_len": 16.5,
229
+ "eval_loss": 1.5868151187896729,
230
+ "eval_rouge-1": 0.3987,
231
+ "eval_rouge-2": 0.2568,
232
+ "eval_rouge-l": 0.3848,
233
+ "eval_runtime": 2.5696,
234
+ "eval_samples_per_second": 3.113,
235
+ "eval_steps_per_second": 3.113,
236
+ "step": 600
237
+ },
238
+ {
239
+ "epoch": 36.11,
240
+ "learning_rate": 1.790697674418605e-05,
241
+ "loss": 0.2361,
242
+ "step": 650
243
+ },
244
+ {
245
+ "epoch": 36.11,
246
+ "eval_avg_rouge_f": 0.3163,
247
+ "eval_gen_len": 17.75,
248
+ "eval_loss": 1.6459990739822388,
249
+ "eval_rouge-1": 0.375,
250
+ "eval_rouge-2": 0.2107,
251
+ "eval_rouge-l": 0.3631,
252
+ "eval_runtime": 2.8706,
253
+ "eval_samples_per_second": 2.787,
254
+ "eval_steps_per_second": 2.787,
255
+ "step": 650
256
+ },
257
+ {
258
+ "epoch": 38.89,
259
+ "learning_rate": 1.7674418604651163e-05,
260
+ "loss": 0.1991,
261
+ "step": 700
262
+ },
263
+ {
264
+ "epoch": 38.89,
265
+ "eval_avg_rouge_f": 0.3085,
266
+ "eval_gen_len": 16.25,
267
+ "eval_loss": 1.6946561336517334,
268
+ "eval_rouge-1": 0.3605,
269
+ "eval_rouge-2": 0.2177,
270
+ "eval_rouge-l": 0.3474,
271
+ "eval_runtime": 2.519,
272
+ "eval_samples_per_second": 3.176,
273
+ "eval_steps_per_second": 3.176,
274
+ "step": 700
275
+ },
276
+ {
277
+ "epoch": 41.67,
278
+ "learning_rate": 1.744186046511628e-05,
279
+ "loss": 0.151,
280
+ "step": 750
281
+ },
282
+ {
283
+ "epoch": 41.67,
284
+ "eval_avg_rouge_f": 0.3222,
285
+ "eval_gen_len": 16.5,
286
+ "eval_loss": 1.8248298168182373,
287
+ "eval_rouge-1": 0.3832,
288
+ "eval_rouge-2": 0.2274,
289
+ "eval_rouge-l": 0.3559,
290
+ "eval_runtime": 2.6923,
291
+ "eval_samples_per_second": 2.971,
292
+ "eval_steps_per_second": 2.971,
293
+ "step": 750
294
+ },
295
+ {
296
+ "epoch": 44.44,
297
+ "learning_rate": 1.7209302325581396e-05,
298
+ "loss": 0.1517,
299
+ "step": 800
300
+ },
301
+ {
302
+ "epoch": 44.44,
303
+ "eval_avg_rouge_f": 0.3811,
304
+ "eval_gen_len": 16.875,
305
+ "eval_loss": 1.7883902788162231,
306
+ "eval_rouge-1": 0.4309,
307
+ "eval_rouge-2": 0.294,
308
+ "eval_rouge-l": 0.4184,
309
+ "eval_runtime": 2.5559,
310
+ "eval_samples_per_second": 3.13,
311
+ "eval_steps_per_second": 3.13,
312
+ "step": 800
313
+ },
314
+ {
315
+ "epoch": 47.22,
316
+ "learning_rate": 1.697674418604651e-05,
317
+ "loss": 0.1444,
318
+ "step": 850
319
+ },
320
+ {
321
+ "epoch": 47.22,
322
+ "eval_avg_rouge_f": 0.322,
323
+ "eval_gen_len": 17.125,
324
+ "eval_loss": 1.8518762588500977,
325
+ "eval_rouge-1": 0.3843,
326
+ "eval_rouge-2": 0.2107,
327
+ "eval_rouge-l": 0.3711,
328
+ "eval_runtime": 2.7195,
329
+ "eval_samples_per_second": 2.942,
330
+ "eval_steps_per_second": 2.942,
331
+ "step": 850
332
+ },
333
+ {
334
+ "epoch": 50.0,
335
+ "learning_rate": 1.674418604651163e-05,
336
+ "loss": 0.1106,
337
+ "step": 900
338
+ },
339
+ {
340
+ "epoch": 50.0,
341
+ "eval_avg_rouge_f": 0.3209,
342
+ "eval_gen_len": 17.5,
343
+ "eval_loss": 1.9637408256530762,
344
+ "eval_rouge-1": 0.383,
345
+ "eval_rouge-2": 0.2107,
346
+ "eval_rouge-l": 0.3691,
347
+ "eval_runtime": 2.6625,
348
+ "eval_samples_per_second": 3.005,
349
+ "eval_steps_per_second": 3.005,
350
+ "step": 900
351
+ },
352
+ {
353
+ "epoch": 52.78,
354
+ "learning_rate": 1.6511627906976747e-05,
355
+ "loss": 0.0961,
356
+ "step": 950
357
+ },
358
+ {
359
+ "epoch": 52.78,
360
+ "eval_avg_rouge_f": 0.3103,
361
+ "eval_gen_len": 16.75,
362
+ "eval_loss": 2.07181715965271,
363
+ "eval_rouge-1": 0.3645,
364
+ "eval_rouge-2": 0.2177,
365
+ "eval_rouge-l": 0.3488,
366
+ "eval_runtime": 2.7383,
367
+ "eval_samples_per_second": 2.921,
368
+ "eval_steps_per_second": 2.921,
369
+ "step": 950
370
+ },
371
+ {
372
+ "epoch": 55.56,
373
+ "learning_rate": 1.6279069767441862e-05,
374
+ "loss": 0.1131,
375
+ "step": 1000
376
+ },
377
+ {
378
+ "epoch": 55.56,
379
+ "eval_avg_rouge_f": 0.3067,
380
+ "eval_gen_len": 16.75,
381
+ "eval_loss": 1.9934816360473633,
382
+ "eval_rouge-1": 0.3602,
383
+ "eval_rouge-2": 0.2153,
384
+ "eval_rouge-l": 0.3446,
385
+ "eval_runtime": 2.6295,
386
+ "eval_samples_per_second": 3.042,
387
+ "eval_steps_per_second": 3.042,
388
+ "step": 1000
389
+ },
390
+ {
391
+ "epoch": 58.33,
392
+ "learning_rate": 1.6046511627906977e-05,
393
+ "loss": 0.0996,
394
+ "step": 1050
395
+ },
396
+ {
397
+ "epoch": 58.33,
398
+ "eval_avg_rouge_f": 0.3712,
399
+ "eval_gen_len": 16.0,
400
+ "eval_loss": 2.06162166595459,
401
+ "eval_rouge-1": 0.4153,
402
+ "eval_rouge-2": 0.2986,
403
+ "eval_rouge-l": 0.3996,
404
+ "eval_runtime": 3.0388,
405
+ "eval_samples_per_second": 2.633,
406
+ "eval_steps_per_second": 2.633,
407
+ "step": 1050
408
+ },
409
+ {
410
+ "epoch": 61.11,
411
+ "learning_rate": 1.5813953488372095e-05,
412
+ "loss": 0.0663,
413
+ "step": 1100
414
+ },
415
+ {
416
+ "epoch": 61.11,
417
+ "eval_avg_rouge_f": 0.3786,
418
+ "eval_gen_len": 14.625,
419
+ "eval_loss": 2.1466333866119385,
420
+ "eval_rouge-1": 0.4257,
421
+ "eval_rouge-2": 0.301,
422
+ "eval_rouge-l": 0.409,
423
+ "eval_runtime": 3.1902,
424
+ "eval_samples_per_second": 2.508,
425
+ "eval_steps_per_second": 2.508,
426
+ "step": 1100
427
+ },
428
+ {
429
+ "epoch": 63.89,
430
+ "learning_rate": 1.558139534883721e-05,
431
+ "loss": 0.0789,
432
+ "step": 1150
433
+ },
434
+ {
435
+ "epoch": 63.89,
436
+ "eval_avg_rouge_f": 0.3728,
437
+ "eval_gen_len": 16.0,
438
+ "eval_loss": 2.1657214164733887,
439
+ "eval_rouge-1": 0.4166,
440
+ "eval_rouge-2": 0.301,
441
+ "eval_rouge-l": 0.4009,
442
+ "eval_runtime": 2.4781,
443
+ "eval_samples_per_second": 3.228,
444
+ "eval_steps_per_second": 3.228,
445
+ "step": 1150
446
+ },
447
+ {
448
+ "epoch": 66.67,
449
+ "learning_rate": 1.5348837209302328e-05,
450
+ "loss": 0.073,
451
+ "step": 1200
452
+ },
453
+ {
454
+ "epoch": 66.67,
455
+ "eval_avg_rouge_f": 0.3713,
456
+ "eval_gen_len": 16.25,
457
+ "eval_loss": 2.251979351043701,
458
+ "eval_rouge-1": 0.4131,
459
+ "eval_rouge-2": 0.301,
460
+ "eval_rouge-l": 0.3999,
461
+ "eval_runtime": 3.4503,
462
+ "eval_samples_per_second": 2.319,
463
+ "eval_steps_per_second": 2.319,
464
+ "step": 1200
465
+ },
466
+ {
467
+ "epoch": 69.44,
468
+ "learning_rate": 1.5116279069767443e-05,
469
+ "loss": 0.0739,
470
+ "step": 1250
471
+ },
472
+ {
473
+ "epoch": 69.44,
474
+ "eval_avg_rouge_f": 0.3051,
475
+ "eval_gen_len": 17.0,
476
+ "eval_loss": 2.260244369506836,
477
+ "eval_rouge-1": 0.3582,
478
+ "eval_rouge-2": 0.2145,
479
+ "eval_rouge-l": 0.3426,
480
+ "eval_runtime": 2.5948,
481
+ "eval_samples_per_second": 3.083,
482
+ "eval_steps_per_second": 3.083,
483
+ "step": 1250
484
+ },
485
+ {
486
+ "epoch": 72.22,
487
+ "learning_rate": 1.488372093023256e-05,
488
+ "loss": 0.0799,
489
+ "step": 1300
490
+ },
491
+ {
492
+ "epoch": 72.22,
493
+ "eval_avg_rouge_f": 0.3156,
494
+ "eval_gen_len": 16.75,
495
+ "eval_loss": 2.3278074264526367,
496
+ "eval_rouge-1": 0.369,
497
+ "eval_rouge-2": 0.2242,
498
+ "eval_rouge-l": 0.3534,
499
+ "eval_runtime": 3.3341,
500
+ "eval_samples_per_second": 2.399,
501
+ "eval_steps_per_second": 2.399,
502
+ "step": 1300
503
+ },
504
+ {
505
+ "epoch": 75.0,
506
+ "learning_rate": 1.4651162790697674e-05,
507
+ "loss": 0.0546,
508
+ "step": 1350
509
+ },
510
+ {
511
+ "epoch": 75.0,
512
+ "eval_avg_rouge_f": 0.3164,
513
+ "eval_gen_len": 16.5,
514
+ "eval_loss": 2.402118444442749,
515
+ "eval_rouge-1": 0.369,
516
+ "eval_rouge-2": 0.2242,
517
+ "eval_rouge-l": 0.3559,
518
+ "eval_runtime": 2.5497,
519
+ "eval_samples_per_second": 3.138,
520
+ "eval_steps_per_second": 3.138,
521
+ "step": 1350
522
+ },
523
+ {
524
+ "epoch": 77.78,
525
+ "learning_rate": 1.441860465116279e-05,
526
+ "loss": 0.0674,
527
+ "step": 1400
528
+ },
529
+ {
530
+ "epoch": 77.78,
531
+ "eval_avg_rouge_f": 0.3697,
532
+ "eval_gen_len": 17.25,
533
+ "eval_loss": 2.3492679595947266,
534
+ "eval_rouge-1": 0.4149,
535
+ "eval_rouge-2": 0.2924,
536
+ "eval_rouge-l": 0.4017,
537
+ "eval_runtime": 3.1613,
538
+ "eval_samples_per_second": 2.531,
539
+ "eval_steps_per_second": 2.531,
540
+ "step": 1400
541
+ },
542
+ {
543
+ "epoch": 80.56,
544
+ "learning_rate": 1.4186046511627909e-05,
545
+ "loss": 0.0459,
546
+ "step": 1450
547
+ },
548
+ {
549
+ "epoch": 80.56,
550
+ "eval_avg_rouge_f": 0.3839,
551
+ "eval_gen_len": 16.125,
552
+ "eval_loss": 2.3503048419952393,
553
+ "eval_rouge-1": 0.426,
554
+ "eval_rouge-2": 0.3153,
555
+ "eval_rouge-l": 0.4104,
556
+ "eval_runtime": 2.4756,
557
+ "eval_samples_per_second": 3.232,
558
+ "eval_steps_per_second": 3.232,
559
+ "step": 1450
560
+ },
561
+ {
562
+ "epoch": 83.33,
563
+ "learning_rate": 1.3953488372093025e-05,
564
+ "loss": 0.0501,
565
+ "step": 1500
566
+ },
567
+ {
568
+ "epoch": 83.33,
569
+ "eval_avg_rouge_f": 0.3732,
570
+ "eval_gen_len": 15.375,
571
+ "eval_loss": 2.371870517730713,
572
+ "eval_rouge-1": 0.4172,
573
+ "eval_rouge-2": 0.301,
574
+ "eval_rouge-l": 0.4016,
575
+ "eval_runtime": 2.8658,
576
+ "eval_samples_per_second": 2.792,
577
+ "eval_steps_per_second": 2.792,
578
+ "step": 1500
579
+ },
580
+ {
581
+ "epoch": 86.11,
582
+ "learning_rate": 1.372093023255814e-05,
583
+ "loss": 0.0509,
584
+ "step": 1550
585
+ },
586
+ {
587
+ "epoch": 86.11,
588
+ "eval_avg_rouge_f": 0.3926,
589
+ "eval_gen_len": 16.375,
590
+ "eval_loss": 2.4419479370117188,
591
+ "eval_rouge-1": 0.4361,
592
+ "eval_rouge-2": 0.3188,
593
+ "eval_rouge-l": 0.4229,
594
+ "eval_runtime": 3.0315,
595
+ "eval_samples_per_second": 2.639,
596
+ "eval_steps_per_second": 2.639,
597
+ "step": 1550
598
+ },
599
+ {
600
+ "epoch": 88.89,
601
+ "learning_rate": 1.3488372093023257e-05,
602
+ "loss": 0.0449,
603
+ "step": 1600
604
+ },
605
+ {
606
+ "epoch": 88.89,
607
+ "eval_avg_rouge_f": 0.4026,
608
+ "eval_gen_len": 16.375,
609
+ "eval_loss": 2.3171658515930176,
610
+ "eval_rouge-1": 0.4514,
611
+ "eval_rouge-2": 0.3188,
612
+ "eval_rouge-l": 0.4375,
613
+ "eval_runtime": 3.636,
614
+ "eval_samples_per_second": 2.2,
615
+ "eval_steps_per_second": 2.2,
616
+ "step": 1600
617
+ },
618
+ {
619
+ "epoch": 91.67,
620
+ "learning_rate": 1.3255813953488372e-05,
621
+ "loss": 0.0408,
622
+ "step": 1650
623
+ },
624
+ {
625
+ "epoch": 91.67,
626
+ "eval_avg_rouge_f": 0.3906,
627
+ "eval_gen_len": 16.25,
628
+ "eval_loss": 2.4437549114227295,
629
+ "eval_rouge-1": 0.4349,
630
+ "eval_rouge-2": 0.3153,
631
+ "eval_rouge-l": 0.4217,
632
+ "eval_runtime": 2.4939,
633
+ "eval_samples_per_second": 3.208,
634
+ "eval_steps_per_second": 3.208,
635
+ "step": 1650
636
+ },
637
+ {
638
+ "epoch": 94.44,
639
+ "learning_rate": 1.302325581395349e-05,
640
+ "loss": 0.0357,
641
+ "step": 1700
642
+ },
643
+ {
644
+ "epoch": 94.44,
645
+ "eval_avg_rouge_f": 0.3831,
646
+ "eval_gen_len": 16.25,
647
+ "eval_loss": 2.540635108947754,
648
+ "eval_rouge-1": 0.4236,
649
+ "eval_rouge-2": 0.3153,
650
+ "eval_rouge-l": 0.4104,
651
+ "eval_runtime": 3.5767,
652
+ "eval_samples_per_second": 2.237,
653
+ "eval_steps_per_second": 2.237,
654
+ "step": 1700
655
+ },
656
+ {
657
+ "epoch": 97.22,
658
+ "learning_rate": 1.2790697674418606e-05,
659
+ "loss": 0.0403,
660
+ "step": 1750
661
+ },
662
+ {
663
+ "epoch": 97.22,
664
+ "eval_avg_rouge_f": 0.3748,
665
+ "eval_gen_len": 16.375,
666
+ "eval_loss": 2.4441065788269043,
667
+ "eval_rouge-1": 0.4111,
668
+ "eval_rouge-2": 0.3153,
669
+ "eval_rouge-l": 0.398,
670
+ "eval_runtime": 2.5203,
671
+ "eval_samples_per_second": 3.174,
672
+ "eval_steps_per_second": 3.174,
673
+ "step": 1750
674
+ },
675
+ {
676
+ "epoch": 100.0,
677
+ "learning_rate": 1.2558139534883723e-05,
678
+ "loss": 0.0489,
679
+ "step": 1800
680
+ },
681
+ {
682
+ "epoch": 100.0,
683
+ "eval_avg_rouge_f": 0.3768,
684
+ "eval_gen_len": 16.125,
685
+ "eval_loss": 2.459872245788574,
686
+ "eval_rouge-1": 0.4154,
687
+ "eval_rouge-2": 0.3153,
688
+ "eval_rouge-l": 0.3997,
689
+ "eval_runtime": 3.4884,
690
+ "eval_samples_per_second": 2.293,
691
+ "eval_steps_per_second": 2.293,
692
+ "step": 1800
693
+ },
694
+ {
695
+ "epoch": 102.78,
696
+ "learning_rate": 1.2325581395348838e-05,
697
+ "loss": 0.032,
698
+ "step": 1850
699
+ },
700
+ {
701
+ "epoch": 102.78,
702
+ "eval_avg_rouge_f": 0.407,
703
+ "eval_gen_len": 15.0,
704
+ "eval_loss": 2.623534679412842,
705
+ "eval_rouge-1": 0.4515,
706
+ "eval_rouge-2": 0.3335,
707
+ "eval_rouge-l": 0.4359,
708
+ "eval_runtime": 2.3389,
709
+ "eval_samples_per_second": 3.42,
710
+ "eval_steps_per_second": 3.42,
711
+ "step": 1850
712
+ },
713
+ {
714
+ "epoch": 105.56,
715
+ "learning_rate": 1.2093023255813954e-05,
716
+ "loss": 0.0379,
717
+ "step": 1900
718
+ },
719
+ {
720
+ "epoch": 105.56,
721
+ "eval_avg_rouge_f": 0.407,
722
+ "eval_gen_len": 15.125,
723
+ "eval_loss": 2.6058127880096436,
724
+ "eval_rouge-1": 0.4515,
725
+ "eval_rouge-2": 0.3335,
726
+ "eval_rouge-l": 0.4359,
727
+ "eval_runtime": 3.2938,
728
+ "eval_samples_per_second": 2.429,
729
+ "eval_steps_per_second": 2.429,
730
+ "step": 1900
731
+ },
732
+ {
733
+ "epoch": 108.33,
734
+ "learning_rate": 1.1860465116279072e-05,
735
+ "loss": 0.0466,
736
+ "step": 1950
737
+ },
738
+ {
739
+ "epoch": 108.33,
740
+ "eval_avg_rouge_f": 0.3768,
741
+ "eval_gen_len": 16.125,
742
+ "eval_loss": 2.5748019218444824,
743
+ "eval_rouge-1": 0.4154,
744
+ "eval_rouge-2": 0.3153,
745
+ "eval_rouge-l": 0.3997,
746
+ "eval_runtime": 2.4802,
747
+ "eval_samples_per_second": 3.226,
748
+ "eval_steps_per_second": 3.226,
749
+ "step": 1950
750
+ },
751
+ {
752
+ "epoch": 111.11,
753
+ "learning_rate": 1.1627906976744187e-05,
754
+ "loss": 0.0317,
755
+ "step": 2000
756
+ },
757
+ {
758
+ "epoch": 111.11,
759
+ "eval_avg_rouge_f": 0.3778,
760
+ "eval_gen_len": 16.125,
761
+ "eval_loss": 2.663809299468994,
762
+ "eval_rouge-1": 0.4169,
763
+ "eval_rouge-2": 0.3153,
764
+ "eval_rouge-l": 0.4013,
765
+ "eval_runtime": 3.2301,
766
+ "eval_samples_per_second": 2.477,
767
+ "eval_steps_per_second": 2.477,
768
+ "step": 2000
769
+ },
770
+ {
771
+ "epoch": 113.89,
772
+ "learning_rate": 1.1395348837209304e-05,
773
+ "loss": 0.0234,
774
+ "step": 2050
775
+ },
776
+ {
777
+ "epoch": 113.89,
778
+ "eval_avg_rouge_f": 0.3888,
779
+ "eval_gen_len": 15.5,
780
+ "eval_loss": 2.740657091140747,
781
+ "eval_rouge-1": 0.4334,
782
+ "eval_rouge-2": 0.3153,
783
+ "eval_rouge-l": 0.4178,
784
+ "eval_runtime": 4.1851,
785
+ "eval_samples_per_second": 1.912,
786
+ "eval_steps_per_second": 1.912,
787
+ "step": 2050
788
+ },
789
+ {
790
+ "epoch": 116.67,
791
+ "learning_rate": 1.116279069767442e-05,
792
+ "loss": 0.0308,
793
+ "step": 2100
794
+ },
795
+ {
796
+ "epoch": 116.67,
797
+ "eval_avg_rouge_f": 0.3799,
798
+ "eval_gen_len": 16.125,
799
+ "eval_loss": 2.70857572555542,
800
+ "eval_rouge-1": 0.4201,
801
+ "eval_rouge-2": 0.3153,
802
+ "eval_rouge-l": 0.4044,
803
+ "eval_runtime": 2.5313,
804
+ "eval_samples_per_second": 3.16,
805
+ "eval_steps_per_second": 3.16,
806
+ "step": 2100
807
+ },
808
+ {
809
+ "epoch": 119.44,
810
+ "learning_rate": 1.0930232558139535e-05,
811
+ "loss": 0.0305,
812
+ "step": 2150
813
+ },
814
+ {
815
+ "epoch": 119.44,
816
+ "eval_avg_rouge_f": 0.3598,
817
+ "eval_gen_len": 15.5,
818
+ "eval_loss": 2.7068228721618652,
819
+ "eval_rouge-1": 0.4059,
820
+ "eval_rouge-2": 0.2831,
821
+ "eval_rouge-l": 0.3902,
822
+ "eval_runtime": 2.7634,
823
+ "eval_samples_per_second": 2.895,
824
+ "eval_steps_per_second": 2.895,
825
+ "step": 2150
826
+ },
827
+ {
828
+ "epoch": 122.22,
829
+ "learning_rate": 1.0697674418604651e-05,
830
+ "loss": 0.0289,
831
+ "step": 2200
832
+ },
833
+ {
834
+ "epoch": 122.22,
835
+ "eval_avg_rouge_f": 0.3598,
836
+ "eval_gen_len": 15.5,
837
+ "eval_loss": 2.8503403663635254,
838
+ "eval_rouge-1": 0.4059,
839
+ "eval_rouge-2": 0.2831,
840
+ "eval_rouge-l": 0.3902,
841
+ "eval_runtime": 2.4338,
842
+ "eval_samples_per_second": 3.287,
843
+ "eval_steps_per_second": 3.287,
844
+ "step": 2200
845
+ },
846
+ {
847
+ "epoch": 125.0,
848
+ "learning_rate": 1.046511627906977e-05,
849
+ "loss": 0.0555,
850
+ "step": 2250
851
+ },
852
+ {
853
+ "epoch": 125.0,
854
+ "eval_avg_rouge_f": 0.3598,
855
+ "eval_gen_len": 15.5,
856
+ "eval_loss": 2.8522231578826904,
857
+ "eval_rouge-1": 0.4059,
858
+ "eval_rouge-2": 0.2831,
859
+ "eval_rouge-l": 0.3902,
860
+ "eval_runtime": 2.8814,
861
+ "eval_samples_per_second": 2.776,
862
+ "eval_steps_per_second": 2.776,
863
+ "step": 2250
864
+ },
865
+ {
866
+ "epoch": 127.78,
867
+ "learning_rate": 1.0232558139534884e-05,
868
+ "loss": 0.022,
869
+ "step": 2300
870
+ },
871
+ {
872
+ "epoch": 127.78,
873
+ "eval_avg_rouge_f": 0.3598,
874
+ "eval_gen_len": 15.5,
875
+ "eval_loss": 2.9057185649871826,
876
+ "eval_rouge-1": 0.4059,
877
+ "eval_rouge-2": 0.2831,
878
+ "eval_rouge-l": 0.3902,
879
+ "eval_runtime": 2.43,
880
+ "eval_samples_per_second": 3.292,
881
+ "eval_steps_per_second": 3.292,
882
+ "step": 2300
883
+ },
884
+ {
885
+ "epoch": 130.56,
886
+ "learning_rate": 1e-05,
887
+ "loss": 0.0369,
888
+ "step": 2350
889
+ },
890
+ {
891
+ "epoch": 130.56,
892
+ "eval_avg_rouge_f": 0.3598,
893
+ "eval_gen_len": 15.5,
894
+ "eval_loss": 2.8735642433166504,
895
+ "eval_rouge-1": 0.4059,
896
+ "eval_rouge-2": 0.2831,
897
+ "eval_rouge-l": 0.3902,
898
+ "eval_runtime": 3.0784,
899
+ "eval_samples_per_second": 2.599,
900
+ "eval_steps_per_second": 2.599,
901
+ "step": 2350
902
+ },
903
+ {
904
+ "epoch": 133.33,
905
+ "learning_rate": 9.767441860465117e-06,
906
+ "loss": 0.0195,
907
+ "step": 2400
908
+ },
909
+ {
910
+ "epoch": 133.33,
911
+ "eval_avg_rouge_f": 0.3598,
912
+ "eval_gen_len": 15.5,
913
+ "eval_loss": 2.7636728286743164,
914
+ "eval_rouge-1": 0.4059,
915
+ "eval_rouge-2": 0.2831,
916
+ "eval_rouge-l": 0.3902,
917
+ "eval_runtime": 2.4413,
918
+ "eval_samples_per_second": 3.277,
919
+ "eval_steps_per_second": 3.277,
920
+ "step": 2400
921
+ },
922
+ {
923
+ "epoch": 136.11,
924
+ "learning_rate": 9.534883720930234e-06,
925
+ "loss": 0.0387,
926
+ "step": 2450
927
+ },
928
+ {
929
+ "epoch": 136.11,
930
+ "eval_avg_rouge_f": 0.3598,
931
+ "eval_gen_len": 15.5,
932
+ "eval_loss": 2.743685007095337,
933
+ "eval_rouge-1": 0.4059,
934
+ "eval_rouge-2": 0.2831,
935
+ "eval_rouge-l": 0.3902,
936
+ "eval_runtime": 2.9809,
937
+ "eval_samples_per_second": 2.684,
938
+ "eval_steps_per_second": 2.684,
939
+ "step": 2450
940
+ },
941
+ {
942
+ "epoch": 138.89,
943
+ "learning_rate": 9.30232558139535e-06,
944
+ "loss": 0.0298,
945
+ "step": 2500
946
+ },
947
+ {
948
+ "epoch": 138.89,
949
+ "eval_avg_rouge_f": 0.3443,
950
+ "eval_gen_len": 16.25,
951
+ "eval_loss": 2.8817646503448486,
952
+ "eval_rouge-1": 0.391,
953
+ "eval_rouge-2": 0.2665,
954
+ "eval_rouge-l": 0.3754,
955
+ "eval_runtime": 2.5341,
956
+ "eval_samples_per_second": 3.157,
957
+ "eval_steps_per_second": 3.157,
958
+ "step": 2500
959
+ },
960
+ {
961
+ "epoch": 141.67,
962
+ "learning_rate": 9.069767441860465e-06,
963
+ "loss": 0.0265,
964
+ "step": 2550
965
+ },
966
+ {
967
+ "epoch": 141.67,
968
+ "eval_avg_rouge_f": 0.3353,
969
+ "eval_gen_len": 16.5,
970
+ "eval_loss": 2.834005355834961,
971
+ "eval_rouge-1": 0.3776,
972
+ "eval_rouge-2": 0.2665,
973
+ "eval_rouge-l": 0.362,
974
+ "eval_runtime": 3.3656,
975
+ "eval_samples_per_second": 2.377,
976
+ "eval_steps_per_second": 2.377,
977
+ "step": 2550
978
+ },
979
+ {
980
+ "epoch": 144.44,
981
+ "learning_rate": 8.837209302325582e-06,
982
+ "loss": 0.0182,
983
+ "step": 2600
984
+ },
985
+ {
986
+ "epoch": 144.44,
987
+ "eval_avg_rouge_f": 0.3598,
988
+ "eval_gen_len": 15.5,
989
+ "eval_loss": 2.873906135559082,
990
+ "eval_rouge-1": 0.4059,
991
+ "eval_rouge-2": 0.2831,
992
+ "eval_rouge-l": 0.3902,
993
+ "eval_runtime": 2.4328,
994
+ "eval_samples_per_second": 3.288,
995
+ "eval_steps_per_second": 3.288,
996
+ "step": 2600
997
+ },
998
+ {
999
+ "epoch": 144.44,
1000
+ "step": 2600,
1001
+ "total_flos": 3409770731258880.0,
1002
+ "train_loss": 0.29600492647060983,
1003
+ "train_runtime": 1522.0987,
1004
+ "train_samples_per_second": 8.869,
1005
+ "train_steps_per_second": 2.956
1006
+ }
1007
+ ],
1008
+ "max_steps": 4500,
1009
+ "num_train_epochs": 250,
1010
+ "total_flos": 3409770731258880.0,
1011
+ "trial_name": null,
1012
+ "trial_params": null
1013
+ }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0def71e926dd74ecdd6c1f2e45020f04a28fe1561cad421ab9215cd1c448fadb
3
  size 4091
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:80379659fa2a444b0146368cbedeb2127a444464453e23c94bfa2ef88b1d7337
3
  size 4091