Thomas De Decker commited on
Commit
8f5c712
1 Parent(s): 2b941d3

Update model

Browse files
added_tokens.json DELETED
@@ -1 +0,0 @@
1
- {"[KP_SEP]": 32100}
 
 
config.json CHANGED
@@ -53,5 +53,5 @@
53
  "torch_dtype": "float32",
54
  "transformers_version": "4.17.0",
55
  "use_cache": true,
56
- "vocab_size": 32101
57
  }
 
53
  "torch_dtype": "float32",
54
  "transformers_version": "4.17.0",
55
  "use_cache": true,
56
+ "vocab_size": 32100
57
  }
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:749043cdff035d1ab2240dc75e407bd10596fb9bf80e36333404fcc387cc207d
3
- size 484018869
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:410621c6aa8c6adfb036a83b0a3c2983f98d9637d49375a4be7e94eac33d0c85
3
+ size 484014773
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:67394bed029cb1b3f08c8acf4c65f7cb03b273e38af19fc879982de6f233e2e6
3
- size 242030331
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:82e9a87611831a5973765076ad4ff63010b49c038377a4ab384607f9a2395632
3
+ size 242028283
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:36b59e660513c24c68e47eed7aa58cf2063c3481b9670625d35ade3d2691c30d
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4888381054d1e1f5929a6bebecb3961f40c26ef204be5c5a1e48aaeb5c5e2668
3
  size 14503
scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f460c9860f92d63723a55e876b98b52e4a8c8c8716d122bba2e0436a80cea0be
3
  size 559
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d9791d8712a0e3348275a42ccf808d67cc95de49fb4baa37c74b348e504e5510
3
  size 559
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e8cf943a69f9855f9d67f54e487e99ae49506ca61750728a12a0aa7d6c984206
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a2511f600c65b4a2d43a600e86c4efbc5f9b59faef0e34a2d5557ca0ed7c5f4c
3
  size 623
tokenizer.json CHANGED
@@ -1,21 +1,7 @@
1
  {
2
  "version": "1.0",
3
- "truncation": {
4
- "direction": "Right",
5
- "max_length": 40,
6
- "strategy": "LongestFirst",
7
- "stride": 0
8
- },
9
- "padding": {
10
- "strategy": {
11
- "Fixed": 40
12
- },
13
- "direction": "Right",
14
- "pad_to_multiple_of": null,
15
- "pad_id": 0,
16
- "pad_type_id": 0,
17
- "pad_token": "<pad>"
18
- },
19
  "added_tokens": [
20
  {
21
  "id": 0,
@@ -44,6 +30,15 @@
44
  "rstrip": false,
45
  "normalized": false
46
  },
 
 
 
 
 
 
 
 
 
47
  {
48
  "id": 32000,
49
  "special": true,
@@ -943,15 +938,6 @@
943
  "lstrip": false,
944
  "rstrip": false,
945
  "normalized": false
946
- },
947
- {
948
- "id": 32100,
949
- "special": false,
950
- "content": "[KP_SEP]",
951
- "single_word": false,
952
- "lstrip": false,
953
- "rstrip": false,
954
- "normalized": true
955
  }
956
  ],
957
  "normalizer": {
 
1
  {
2
  "version": "1.0",
3
+ "truncation": null,
4
+ "padding": null,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  "added_tokens": [
6
  {
7
  "id": 0,
 
30
  "rstrip": false,
31
  "normalized": false
32
  },
33
+ {
34
+ "id": 117,
35
+ "special": false,
36
+ "content": ";",
37
+ "single_word": false,
38
+ "lstrip": false,
39
+ "rstrip": false,
40
+ "normalized": true
41
+ },
42
  {
43
  "id": 32000,
44
  "special": true,
 
938
  "lstrip": false,
939
  "rstrip": false,
940
  "normalized": false
 
 
 
 
 
 
 
 
 
941
  }
942
  ],
943
  "normalizer": {
trainer_state.json CHANGED
@@ -1,976 +1,652 @@
1
  {
2
- "best_metric": 0.3611119221851426,
3
- "best_model_checkpoint": "t5-small-keyword-generation-inspec\\checkpoint-20000",
4
- "epoch": 20.0,
5
- "global_step": 20000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
  "epoch": 0.5,
12
- "learning_rate": 4.93e-05,
13
- "loss": 7.9531,
14
- "step": 500
15
- },
16
- {
17
- "epoch": 0.5,
18
- "eval_F1@10": 0.0,
19
- "eval_F1@5": 0.0,
20
- "eval_F1@M": 0.0,
21
- "eval_P@10": 0.0,
22
- "eval_P@5": 0.0,
23
- "eval_P@M": 0.0,
24
- "eval_R@10": 0.0,
25
- "eval_R@5": 0.0,
26
- "eval_R@M": 0.0,
27
- "eval_loss": 4.233538627624512,
28
- "eval_model_name": "t5-small-keyword-generation-inspec",
29
- "eval_runtime": 294.894,
30
- "eval_samples_per_second": 1.696,
31
- "eval_steps_per_second": 1.696,
32
  "step": 500
33
  },
34
  {
35
  "epoch": 1.0,
36
- "learning_rate": 9.93e-05,
37
- "loss": 3.9646,
38
- "step": 1000
39
- },
40
- {
41
- "epoch": 1.0,
42
- "eval_F1@10": 0.1977849653076471,
43
- "eval_F1@5": 0.2771428832605307,
44
- "eval_F1@M": 0.3029857823291265,
45
- "eval_P@10": 0.17059999999999995,
46
- "eval_P@5": 0.33559999999999995,
47
- "eval_P@M": 0.42588095238095236,
48
- "eval_R@10": 0.2556045288045292,
49
- "eval_R@5": 0.251349622599623,
50
- "eval_R@M": 0.2556045288045292,
51
- "eval_loss": 2.812638282775879,
52
- "eval_model_name": "t5-small-keyword-generation-inspec",
53
- "eval_runtime": 298.6756,
54
- "eval_samples_per_second": 1.674,
55
- "eval_steps_per_second": 1.674,
56
  "step": 1000
57
  },
58
  {
59
  "epoch": 1.5,
60
- "learning_rate": 9.899387755102041e-05,
61
- "loss": 2.9718,
62
- "step": 1500
63
- },
64
- {
65
- "epoch": 1.5,
66
- "eval_F1@10": 0.21689912919673357,
67
- "eval_F1@5": 0.303470865735572,
68
- "eval_F1@M": 0.3314612888315681,
69
- "eval_P@10": 0.18799999999999992,
70
- "eval_P@5": 0.3703999999999999,
71
- "eval_P@M": 0.4614,
72
- "eval_R@10": 0.2789250194250196,
73
- "eval_R@5": 0.27387566877566905,
74
- "eval_R@M": 0.2789250194250196,
75
- "eval_loss": 2.4728078842163086,
76
- "eval_model_name": "t5-small-keyword-generation-inspec",
77
- "eval_runtime": 307.3423,
78
- "eval_samples_per_second": 1.627,
79
- "eval_steps_per_second": 1.627,
80
  "step": 1500
81
  },
82
  {
83
  "epoch": 2.0,
84
- "learning_rate": 9.79734693877551e-05,
85
- "loss": 2.5763,
86
- "step": 2000
87
- },
88
- {
89
- "epoch": 2.0,
90
- "eval_F1@10": 0.21342376898165072,
91
- "eval_F1@5": 0.29715987117457776,
92
- "eval_F1@M": 0.3246857584795672,
93
- "eval_P@10": 0.18480000000000008,
94
- "eval_P@5": 0.3628000000000001,
95
- "eval_P@M": 0.44907619047619074,
96
- "eval_R@10": 0.2744417082917087,
97
- "eval_R@5": 0.2677502941502946,
98
- "eval_R@M": 0.2744417082917087,
99
- "eval_loss": 2.183018445968628,
100
- "eval_model_name": "t5-small-keyword-generation-inspec",
101
- "eval_runtime": 289.0824,
102
- "eval_samples_per_second": 1.73,
103
- "eval_steps_per_second": 1.73,
104
  "step": 2000
105
  },
106
  {
107
  "epoch": 2.5,
108
- "learning_rate": 9.695510204081633e-05,
109
- "loss": 2.2131,
110
- "step": 2500
111
- },
112
- {
113
- "epoch": 2.5,
114
- "eval_F1@10": 0.22083437505943945,
115
- "eval_F1@5": 0.3085772965596502,
116
- "eval_F1@M": 0.33408261078446894,
117
- "eval_P@10": 0.1906,
118
- "eval_P@5": 0.3736000000000001,
119
- "eval_P@M": 0.45226190476190464,
120
- "eval_R@10": 0.2875526029526035,
121
- "eval_R@5": 0.28179986124986167,
122
- "eval_R@M": 0.2875526029526035,
123
- "eval_loss": 2.037614107131958,
124
- "eval_model_name": "t5-small-keyword-generation-inspec",
125
- "eval_runtime": 294.2365,
126
- "eval_samples_per_second": 1.699,
127
- "eval_steps_per_second": 1.699,
128
  "step": 2500
129
  },
130
  {
131
  "epoch": 3.0,
132
- "learning_rate": 9.593469387755103e-05,
133
- "loss": 2.0094,
134
- "step": 3000
135
- },
136
- {
137
- "epoch": 3.0,
138
- "eval_F1@10": 0.23622480848222596,
139
- "eval_F1@5": 0.32680995997466633,
140
- "eval_F1@M": 0.3509576027172007,
141
- "eval_P@10": 0.2044,
142
- "eval_P@5": 0.39799999999999996,
143
- "eval_P@M": 0.4652142857142857,
144
- "eval_R@10": 0.30521768231768254,
145
- "eval_R@5": 0.29608175158175193,
146
- "eval_R@M": 0.30521768231768254,
147
- "eval_loss": 1.9597641229629517,
148
- "eval_model_name": "t5-small-keyword-generation-inspec",
149
- "eval_runtime": 299.8378,
150
- "eval_samples_per_second": 1.668,
151
- "eval_steps_per_second": 1.668,
152
  "step": 3000
153
  },
154
  {
155
  "epoch": 3.5,
156
- "learning_rate": 9.491428571428572e-05,
157
- "loss": 1.7087,
158
- "step": 3500
159
- },
160
- {
161
- "epoch": 3.5,
162
- "eval_F1@10": 0.23585947180128827,
163
- "eval_F1@5": 0.32583185474656123,
164
- "eval_F1@M": 0.3484568159051137,
165
- "eval_P@10": 0.20420000000000005,
166
- "eval_P@5": 0.39679999999999993,
167
- "eval_P@M": 0.457257142857143,
168
- "eval_R@10": 0.3021705960705962,
169
- "eval_R@5": 0.2934577533577537,
170
- "eval_R@M": 0.3021705960705962,
171
- "eval_loss": 1.9531850814819336,
172
- "eval_model_name": "t5-small-keyword-generation-inspec",
173
- "eval_runtime": 291.44,
174
- "eval_samples_per_second": 1.716,
175
- "eval_steps_per_second": 1.716,
176
  "step": 3500
177
  },
178
  {
179
  "epoch": 4.0,
180
- "learning_rate": 9.389387755102042e-05,
181
- "loss": 1.7346,
182
- "step": 4000
183
- },
184
- {
185
- "epoch": 4.0,
186
- "eval_F1@10": 0.23949844293171174,
187
- "eval_F1@5": 0.3324181589652182,
188
- "eval_F1@M": 0.3555839600200287,
189
- "eval_P@10": 0.20739999999999992,
190
- "eval_P@5": 0.40439999999999987,
191
- "eval_P@M": 0.46959285714285726,
192
- "eval_R@10": 0.307689471639472,
193
- "eval_R@5": 0.30029141969142004,
194
- "eval_R@M": 0.307689471639472,
195
- "eval_loss": 1.8844341039657593,
196
- "eval_model_name": "t5-small-keyword-generation-inspec",
197
- "eval_runtime": 279.8232,
198
- "eval_samples_per_second": 1.787,
199
- "eval_steps_per_second": 1.787,
200
  "step": 4000
201
  },
202
  {
203
  "epoch": 4.5,
204
- "learning_rate": 9.287551020408164e-05,
205
- "loss": 1.4093,
206
- "step": 4500
207
- },
208
- {
209
- "epoch": 4.5,
210
- "eval_F1@10": 0.23374703381451323,
211
- "eval_F1@5": 0.3258455720749842,
212
- "eval_F1@M": 0.34501260196631756,
213
- "eval_P@10": 0.2024,
214
- "eval_P@5": 0.3964,
215
- "eval_P@M": 0.4511738095238098,
216
- "eval_R@10": 0.29999401709401724,
217
- "eval_R@5": 0.2942325452325456,
218
- "eval_R@M": 0.29999401709401724,
219
- "eval_loss": 1.8626010417938232,
220
- "eval_model_name": "t5-small-keyword-generation-inspec",
221
- "eval_runtime": 281.3111,
222
- "eval_samples_per_second": 1.777,
223
- "eval_steps_per_second": 1.777,
224
  "step": 4500
225
  },
226
  {
227
  "epoch": 5.0,
228
- "learning_rate": 9.185510204081633e-05,
229
- "loss": 1.5068,
230
- "step": 5000
231
- },
232
- {
233
- "epoch": 5.0,
234
- "eval_F1@10": 0.24335615627100407,
235
- "eval_F1@5": 0.3396655599302659,
236
- "eval_F1@M": 0.3602646628912888,
237
- "eval_P@10": 0.21099999999999977,
238
- "eval_P@5": 0.4139999999999996,
239
- "eval_P@M": 0.4727857142857141,
240
- "eval_R@10": 0.3119000777000777,
241
- "eval_R@5": 0.30657915417915416,
242
- "eval_R@M": 0.3119000777000777,
243
- "eval_loss": 1.7979589700698853,
244
- "eval_model_name": "t5-small-keyword-generation-inspec",
245
- "eval_runtime": 280.6796,
246
- "eval_samples_per_second": 1.781,
247
- "eval_steps_per_second": 1.781,
248
  "step": 5000
249
  },
250
  {
251
  "epoch": 5.5,
252
- "learning_rate": 9.083469387755103e-05,
253
- "loss": 1.3146,
254
- "step": 5500
255
- },
256
- {
257
- "epoch": 5.5,
258
- "eval_F1@10": 0.24001641670200757,
259
- "eval_F1@5": 0.334409038673745,
260
- "eval_F1@M": 0.3550115948035458,
261
- "eval_P@10": 0.20839999999999997,
262
- "eval_P@5": 0.40799999999999986,
263
- "eval_P@M": 0.46737857142857153,
264
- "eval_R@10": 0.30688550338550347,
265
- "eval_R@5": 0.30161761016761024,
266
- "eval_R@M": 0.30688550338550347,
267
- "eval_loss": 1.8715476989746094,
268
- "eval_model_name": "t5-small-keyword-generation-inspec",
269
- "eval_runtime": 280.9732,
270
- "eval_samples_per_second": 1.78,
271
- "eval_steps_per_second": 1.78,
272
  "step": 5500
273
  },
274
  {
275
  "epoch": 6.0,
276
- "learning_rate": 8.981428571428572e-05,
277
- "loss": 1.2704,
278
- "step": 6000
279
- },
280
- {
281
- "epoch": 6.0,
282
- "eval_F1@10": 0.24096180425219377,
283
- "eval_F1@5": 0.334974600889307,
284
- "eval_F1@M": 0.35426202280691466,
285
- "eval_P@10": 0.20859999999999995,
286
- "eval_P@5": 0.4076,
287
- "eval_P@M": 0.4599071428571428,
288
- "eval_R@10": 0.30986234321234324,
289
- "eval_R@5": 0.30275404595404604,
290
- "eval_R@M": 0.30986234321234324,
291
- "eval_loss": 1.8688002824783325,
292
- "eval_model_name": "t5-small-keyword-generation-inspec",
293
- "eval_runtime": 279.0562,
294
- "eval_samples_per_second": 1.792,
295
- "eval_steps_per_second": 1.792,
296
  "step": 6000
297
  },
298
  {
299
  "epoch": 6.5,
300
- "learning_rate": 8.879591836734694e-05,
301
- "loss": 1.1731,
302
- "step": 6500
303
- },
304
- {
305
- "epoch": 6.5,
306
- "eval_F1@10": 0.2441899110215107,
307
- "eval_F1@5": 0.3420338693985756,
308
- "eval_F1@M": 0.36420169024302196,
309
- "eval_P@10": 0.2115999999999999,
310
- "eval_P@5": 0.41599999999999987,
311
- "eval_P@M": 0.4780619047619047,
312
- "eval_R@10": 0.31397009102009105,
313
- "eval_R@5": 0.3098475080475081,
314
- "eval_R@M": 0.31397009102009105,
315
- "eval_loss": 1.8536425828933716,
316
- "eval_model_name": "t5-small-keyword-generation-inspec",
317
- "eval_runtime": 291.8339,
318
- "eval_samples_per_second": 1.713,
319
- "eval_steps_per_second": 1.713,
320
  "step": 6500
321
  },
322
  {
323
  "epoch": 7.0,
324
- "learning_rate": 8.777551020408164e-05,
325
- "loss": 1.1174,
326
- "step": 7000
327
- },
328
- {
329
- "epoch": 7.0,
330
- "eval_F1@10": 0.24262947404967916,
331
- "eval_F1@5": 0.33894085685556324,
332
- "eval_F1@M": 0.35926122996370713,
333
- "eval_P@10": 0.21039999999999992,
334
- "eval_P@5": 0.4128,
335
- "eval_P@M": 0.46843333333333326,
336
- "eval_R@10": 0.31144045954045946,
337
- "eval_R@5": 0.30653605838605835,
338
- "eval_R@M": 0.31144045954045946,
339
- "eval_loss": 1.8547453880310059,
340
- "eval_model_name": "t5-small-keyword-generation-inspec",
341
- "eval_runtime": 293.7424,
342
- "eval_samples_per_second": 1.702,
343
- "eval_steps_per_second": 1.702,
344
  "step": 7000
345
  },
346
  {
347
  "epoch": 7.5,
348
- "learning_rate": 8.675510204081633e-05,
349
- "loss": 1.0319,
350
- "step": 7500
351
- },
352
- {
353
- "epoch": 7.5,
354
- "eval_F1@10": 0.24435341427092733,
355
- "eval_F1@5": 0.34094378497319694,
356
- "eval_F1@M": 0.36301657635450263,
357
- "eval_P@10": 0.21159999999999995,
358
- "eval_P@5": 0.4139999999999999,
359
- "eval_P@M": 0.4739333333333334,
360
- "eval_R@10": 0.31494397269397284,
361
- "eval_R@5": 0.30938704628704644,
362
- "eval_R@M": 0.31494397269397284,
363
- "eval_loss": 1.8866732120513916,
364
- "eval_model_name": "t5-small-keyword-generation-inspec",
365
- "eval_runtime": 298.9317,
366
- "eval_samples_per_second": 1.673,
367
- "eval_steps_per_second": 1.673,
368
  "step": 7500
369
  },
370
  {
371
  "epoch": 8.0,
372
- "learning_rate": 8.573469387755101e-05,
373
- "loss": 1.0172,
374
- "step": 8000
375
- },
376
- {
377
- "epoch": 8.0,
378
- "eval_F1@10": 0.2461478679992482,
379
- "eval_F1@5": 0.34169924781101274,
380
- "eval_F1@M": 0.36424212479986534,
381
- "eval_P@10": 0.21280000000000002,
382
- "eval_P@5": 0.41439999999999994,
383
- "eval_P@M": 0.47244523809523775,
384
- "eval_R@10": 0.3185173215673219,
385
- "eval_R@5": 0.3108509435009436,
386
- "eval_R@M": 0.3185173215673219,
387
- "eval_loss": 1.9575016498565674,
388
- "eval_model_name": "t5-small-keyword-generation-inspec",
389
- "eval_runtime": 264.7319,
390
- "eval_samples_per_second": 1.889,
391
- "eval_steps_per_second": 1.889,
392
  "step": 8000
393
  },
394
  {
395
  "epoch": 8.5,
396
- "learning_rate": 8.471632653061225e-05,
397
- "loss": 0.9324,
398
- "step": 8500
399
- },
400
- {
401
- "epoch": 8.5,
402
- "eval_F1@10": 0.23868666530445426,
403
- "eval_F1@5": 0.3324125028566208,
404
- "eval_F1@M": 0.3539066449202676,
405
- "eval_P@10": 0.20719999999999994,
406
- "eval_P@5": 0.4051999999999999,
407
- "eval_P@M": 0.46190476190476165,
408
- "eval_R@10": 0.3066111166611168,
409
- "eval_R@5": 0.3009154456654457,
410
- "eval_R@M": 0.3066111166611168,
411
- "eval_loss": 1.9488437175750732,
412
- "eval_model_name": "t5-small-keyword-generation-inspec",
413
- "eval_runtime": 286.0434,
414
- "eval_samples_per_second": 1.748,
415
- "eval_steps_per_second": 1.748,
416
  "step": 8500
417
  },
418
  {
419
  "epoch": 9.0,
420
- "learning_rate": 8.369591836734695e-05,
421
- "loss": 0.9587,
422
- "step": 9000
423
- },
424
- {
425
- "epoch": 9.0,
426
- "eval_F1@10": 0.23996280126000177,
427
- "eval_F1@5": 0.33370613928555143,
428
- "eval_F1@M": 0.3559531203402726,
429
- "eval_P@10": 0.20799999999999982,
430
- "eval_P@5": 0.4059999999999997,
431
- "eval_P@M": 0.4660785714285711,
432
- "eval_R@10": 0.30781457986458,
433
- "eval_R@5": 0.30168088578088587,
434
- "eval_R@M": 0.30781457986458,
435
- "eval_loss": 1.9363871812820435,
436
- "eval_model_name": "t5-small-keyword-generation-inspec",
437
- "eval_runtime": 282.9341,
438
- "eval_samples_per_second": 1.767,
439
- "eval_steps_per_second": 1.767,
440
  "step": 9000
441
  },
442
  {
443
  "epoch": 9.5,
444
- "learning_rate": 8.267551020408164e-05,
445
- "loss": 0.8864,
446
- "step": 9500
447
- },
448
- {
449
- "epoch": 9.5,
450
- "eval_F1@10": 0.24553398899899156,
451
- "eval_F1@5": 0.33974727821198425,
452
- "eval_F1@M": 0.36148470868656674,
453
- "eval_P@10": 0.21259999999999998,
454
- "eval_P@5": 0.4128,
455
- "eval_P@M": 0.46541666666666653,
456
- "eval_R@10": 0.3148071484071486,
457
- "eval_R@5": 0.3069217227217229,
458
- "eval_R@M": 0.3148071484071486,
459
- "eval_loss": 2.0047595500946045,
460
- "eval_model_name": "t5-small-keyword-generation-inspec",
461
- "eval_runtime": 283.5357,
462
- "eval_samples_per_second": 1.763,
463
- "eval_steps_per_second": 1.763,
464
  "step": 9500
465
  },
466
  {
467
  "epoch": 10.0,
468
- "learning_rate": 8.165510204081634e-05,
469
- "loss": 0.8219,
470
- "step": 10000
471
- },
472
- {
473
- "epoch": 10.0,
474
- "eval_F1@10": 0.24542163861026467,
475
- "eval_F1@5": 0.3405659428806491,
476
- "eval_F1@M": 0.36243521048954536,
477
- "eval_P@10": 0.2128,
478
- "eval_P@5": 0.4147999999999997,
479
- "eval_P@M": 0.471490476190476,
480
- "eval_R@10": 0.31549298479298493,
481
- "eval_R@5": 0.30797812187812196,
482
- "eval_R@M": 0.31549298479298493,
483
- "eval_loss": 2.050034999847412,
484
- "eval_model_name": "t5-small-keyword-generation-inspec",
485
- "eval_runtime": 284.698,
486
- "eval_samples_per_second": 1.756,
487
- "eval_steps_per_second": 1.756,
488
  "step": 10000
489
  },
490
  {
491
  "epoch": 10.5,
492
- "learning_rate": 8.063469387755103e-05,
493
- "loss": 0.7906,
494
- "step": 10500
495
- },
496
- {
497
- "epoch": 10.5,
498
- "eval_F1@10": 0.24112739408629902,
499
- "eval_F1@5": 0.3361905365875957,
500
- "eval_F1@M": 0.35434909789213215,
501
- "eval_P@10": 0.20859999999999984,
502
- "eval_P@5": 0.4083999999999998,
503
- "eval_P@M": 0.4588999999999998,
504
- "eval_R@10": 0.3117027250527253,
505
- "eval_R@5": 0.3052405316905319,
506
- "eval_R@M": 0.3117027250527253,
507
- "eval_loss": 2.076453685760498,
508
- "eval_model_name": "t5-small-keyword-generation-inspec",
509
- "eval_runtime": 283.1972,
510
- "eval_samples_per_second": 1.766,
511
- "eval_steps_per_second": 1.766,
512
  "step": 10500
513
  },
514
  {
515
  "epoch": 11.0,
516
- "learning_rate": 7.961632653061225e-05,
517
- "loss": 0.7828,
518
- "step": 11000
519
- },
520
- {
521
- "epoch": 11.0,
522
- "eval_F1@10": 0.2434173157573352,
523
- "eval_F1@5": 0.3369145302410012,
524
- "eval_F1@M": 0.35818623186239346,
525
- "eval_P@10": 0.21079999999999996,
526
- "eval_P@5": 0.40959999999999974,
527
- "eval_P@M": 0.4597690476190474,
528
- "eval_R@10": 0.3140421911421914,
529
- "eval_R@5": 0.3053971694971696,
530
- "eval_R@M": 0.3140421911421914,
531
- "eval_loss": 2.0442428588867188,
532
- "eval_model_name": "t5-small-keyword-generation-inspec",
533
- "eval_runtime": 283.6328,
534
- "eval_samples_per_second": 1.763,
535
- "eval_steps_per_second": 1.763,
536
  "step": 11000
537
  },
538
  {
539
  "epoch": 11.5,
540
- "learning_rate": 7.859795918367348e-05,
541
- "loss": 0.7724,
542
- "step": 11500
543
- },
544
- {
545
- "epoch": 11.5,
546
- "eval_F1@10": 0.24332780944321591,
547
- "eval_F1@5": 0.3358317365640898,
548
- "eval_F1@M": 0.3594637432399048,
549
- "eval_P@10": 0.21079999999999996,
550
- "eval_P@5": 0.40799999999999986,
551
- "eval_P@M": 0.4666904761904761,
552
- "eval_R@10": 0.31255604395604414,
553
- "eval_R@5": 0.3039292041292042,
554
- "eval_R@M": 0.31255604395604414,
555
- "eval_loss": 2.1262500286102295,
556
- "eval_model_name": "t5-small-keyword-generation-inspec",
557
- "eval_runtime": 283.3582,
558
- "eval_samples_per_second": 1.765,
559
- "eval_steps_per_second": 1.765,
560
  "step": 11500
561
  },
562
  {
563
  "epoch": 12.0,
564
- "learning_rate": 7.757755102040817e-05,
565
- "loss": 0.6985,
566
- "step": 12000
567
- },
568
- {
569
- "epoch": 12.0,
570
- "eval_F1@10": 0.24282010444872945,
571
- "eval_F1@5": 0.33782710328886834,
572
- "eval_F1@M": 0.35967159486540345,
573
- "eval_P@10": 0.2104,
574
- "eval_P@5": 0.4112,
575
- "eval_P@M": 0.4646380952380953,
576
- "eval_R@10": 0.31213939948939956,
577
- "eval_R@5": 0.30527879342879344,
578
- "eval_R@M": 0.31213939948939956,
579
- "eval_loss": 2.0867960453033447,
580
- "eval_model_name": "t5-small-keyword-generation-inspec",
581
- "eval_runtime": 281.5635,
582
- "eval_samples_per_second": 1.776,
583
- "eval_steps_per_second": 1.776,
584
  "step": 12000
585
  },
586
  {
587
  "epoch": 12.5,
588
- "learning_rate": 7.655714285714286e-05,
589
- "loss": 0.6647,
590
- "step": 12500
591
- },
592
- {
593
- "epoch": 12.5,
594
- "eval_F1@10": 0.24765508121946467,
595
- "eval_F1@5": 0.3433662775133368,
596
- "eval_F1@M": 0.3637068867664847,
597
- "eval_P@10": 0.21459999999999982,
598
- "eval_P@5": 0.4179999999999996,
599
- "eval_P@M": 0.4680285714285711,
600
- "eval_R@10": 0.3187178987678989,
601
- "eval_R@5": 0.3105277833277834,
602
- "eval_R@M": 0.3187178987678989,
603
- "eval_loss": 2.156297445297241,
604
- "eval_model_name": "t5-small-keyword-generation-inspec",
605
- "eval_runtime": 281.7625,
606
- "eval_samples_per_second": 1.775,
607
- "eval_steps_per_second": 1.775,
608
  "step": 12500
609
  },
610
  {
611
  "epoch": 13.0,
612
- "learning_rate": 7.553673469387756e-05,
613
- "loss": 0.6823,
614
- "step": 13000
615
- },
616
- {
617
- "epoch": 13.0,
618
- "eval_F1@10": 0.25070757222690915,
619
- "eval_F1@5": 0.34642384380325597,
620
- "eval_F1@M": 0.36789754023066784,
621
- "eval_P@10": 0.21759999999999993,
622
- "eval_P@5": 0.4223999999999997,
623
- "eval_P@M": 0.47199523809523813,
624
- "eval_R@10": 0.3204831224331225,
625
- "eval_R@5": 0.3122700632700633,
626
- "eval_R@M": 0.3204831224331225,
627
- "eval_loss": 2.154297113418579,
628
- "eval_model_name": "t5-small-keyword-generation-inspec",
629
- "eval_runtime": 283.3444,
630
- "eval_samples_per_second": 1.765,
631
- "eval_steps_per_second": 1.765,
632
  "step": 13000
633
  },
634
  {
635
  "epoch": 13.5,
636
- "learning_rate": 7.451632653061224e-05,
637
- "loss": 0.6039,
638
- "step": 13500
639
- },
640
- {
641
- "epoch": 13.5,
642
- "eval_F1@10": 0.24724974591747426,
643
- "eval_F1@5": 0.34381850469791675,
644
- "eval_F1@M": 0.36370015266810973,
645
- "eval_P@10": 0.21420000000000003,
646
- "eval_P@5": 0.4179999999999999,
647
- "eval_P@M": 0.4715571428571432,
648
- "eval_R@10": 0.31813237873237904,
649
- "eval_R@5": 0.31155258075258097,
650
- "eval_R@M": 0.31813237873237904,
651
- "eval_loss": 2.273547410964966,
652
- "eval_model_name": "t5-small-keyword-generation-inspec",
653
- "eval_runtime": 282.8458,
654
- "eval_samples_per_second": 1.768,
655
- "eval_steps_per_second": 1.768,
656
  "step": 13500
657
  },
658
  {
659
  "epoch": 14.0,
660
- "learning_rate": 7.349591836734695e-05,
661
- "loss": 0.6657,
662
- "step": 14000
663
- },
664
- {
665
- "epoch": 14.0,
666
- "eval_F1@10": 0.24294545526488653,
667
- "eval_F1@5": 0.33743259452965385,
668
- "eval_F1@M": 0.35771661821151046,
669
- "eval_P@10": 0.21059999999999995,
670
- "eval_P@5": 0.4103999999999999,
671
- "eval_P@M": 0.4632714285714285,
672
- "eval_R@10": 0.3123259573759575,
673
- "eval_R@5": 0.3057819458319459,
674
- "eval_R@M": 0.3123259573759575,
675
- "eval_loss": 2.2293343544006348,
676
- "eval_model_name": "t5-small-keyword-generation-inspec",
677
- "eval_runtime": 284.7498,
678
- "eval_samples_per_second": 1.756,
679
- "eval_steps_per_second": 1.756,
680
  "step": 14000
681
  },
682
  {
683
  "epoch": 14.5,
684
- "learning_rate": 7.247551020408163e-05,
685
- "loss": 0.5512,
686
- "step": 14500
687
- },
688
- {
689
- "epoch": 14.5,
690
- "eval_F1@10": 0.24609090645971843,
691
- "eval_F1@5": 0.341195668057433,
692
- "eval_F1@M": 0.3638307651069263,
693
- "eval_P@10": 0.2134,
694
- "eval_P@5": 0.4151999999999999,
695
- "eval_P@M": 0.47303809523809526,
696
- "eval_R@10": 0.31571549561549583,
697
- "eval_R@5": 0.3083380064380067,
698
- "eval_R@M": 0.31571549561549583,
699
- "eval_loss": 2.2570900917053223,
700
- "eval_model_name": "t5-small-keyword-generation-inspec",
701
- "eval_runtime": 289.2337,
702
- "eval_samples_per_second": 1.729,
703
- "eval_steps_per_second": 1.729,
704
  "step": 14500
705
  },
706
  {
707
  "epoch": 15.0,
708
- "learning_rate": 7.145510204081633e-05,
709
- "loss": 0.6125,
710
- "step": 15000
711
- },
712
- {
713
- "epoch": 15.0,
714
- "eval_F1@10": 0.24506923303510375,
715
- "eval_F1@5": 0.34101718379659585,
716
- "eval_F1@M": 0.36054259558810675,
717
- "eval_P@10": 0.21240000000000006,
718
- "eval_P@5": 0.41520000000000007,
719
- "eval_P@M": 0.46649523809523824,
720
- "eval_R@10": 0.3145362748362751,
721
- "eval_R@5": 0.30819146964146976,
722
- "eval_R@M": 0.3145362748362751,
723
- "eval_loss": 2.300617218017578,
724
- "eval_model_name": "t5-small-keyword-generation-inspec",
725
- "eval_runtime": 285.2317,
726
- "eval_samples_per_second": 1.753,
727
- "eval_steps_per_second": 1.753,
728
  "step": 15000
729
  },
730
  {
731
  "epoch": 15.5,
732
- "learning_rate": 7.043673469387755e-05,
733
- "loss": 0.5785,
734
  "step": 15500
735
  },
736
- {
737
- "epoch": 15.5,
738
- "eval_F1@10": 0.2442251260644657,
739
- "eval_F1@5": 0.33893644394820904,
740
- "eval_F1@M": 0.35870379697701743,
741
- "eval_P@10": 0.21159999999999998,
742
- "eval_P@5": 0.41240000000000004,
743
- "eval_P@M": 0.46347380952380973,
744
- "eval_R@10": 0.3134413031413033,
745
- "eval_R@5": 0.3057845210345211,
746
- "eval_R@M": 0.3134413031413033,
747
- "eval_loss": 2.338202953338623,
748
- "eval_model_name": "t5-small-keyword-generation-inspec",
749
- "eval_runtime": 289.9816,
750
- "eval_samples_per_second": 1.724,
751
- "eval_steps_per_second": 1.724,
752
- "step": 15500
753
- },
754
- {
755
- "epoch": 16.0,
756
- "learning_rate": 6.941632653061226e-05,
757
- "loss": 0.5428,
758
- "step": 16000
759
- },
760
  {
761
  "epoch": 16.0,
762
- "eval_F1@10": 0.2432725372472316,
763
- "eval_F1@5": 0.3373570716864839,
764
- "eval_F1@M": 0.3582228556960761,
765
- "eval_P@10": 0.21059999999999998,
766
- "eval_P@5": 0.4095999999999998,
767
- "eval_P@M": 0.46375,
768
- "eval_R@10": 0.3134618159618162,
769
- "eval_R@5": 0.30584154179154194,
770
- "eval_R@M": 0.3134618159618162,
771
- "eval_loss": 2.3253493309020996,
772
- "eval_model_name": "t5-small-keyword-generation-inspec",
773
- "eval_runtime": 289.4094,
774
- "eval_samples_per_second": 1.728,
775
- "eval_steps_per_second": 1.728,
776
  "step": 16000
777
  },
778
  {
779
  "epoch": 16.5,
780
- "learning_rate": 6.839591836734694e-05,
781
- "loss": 0.5194,
782
- "step": 16500
783
- },
784
- {
785
- "epoch": 16.5,
786
- "eval_F1@10": 0.2355793420179682,
787
- "eval_F1@5": 0.3278029424823546,
788
- "eval_F1@M": 0.3482424748282339,
789
- "eval_P@10": 0.20440000000000003,
790
- "eval_P@5": 0.3991999999999999,
791
- "eval_P@M": 0.4537833333333335,
792
- "eval_R@10": 0.30187379287379296,
793
- "eval_R@5": 0.2962392329892331,
794
- "eval_R@M": 0.30187379287379296,
795
- "eval_loss": 2.3528101444244385,
796
- "eval_model_name": "t5-small-keyword-generation-inspec",
797
- "eval_runtime": 290.7706,
798
- "eval_samples_per_second": 1.72,
799
- "eval_steps_per_second": 1.72,
800
  "step": 16500
801
  },
802
  {
803
  "epoch": 17.0,
804
- "learning_rate": 6.737551020408163e-05,
805
- "loss": 0.5194,
806
- "step": 17000
807
- },
808
- {
809
- "epoch": 17.0,
810
- "eval_F1@10": 0.24069023813939067,
811
- "eval_F1@5": 0.33479577971048596,
812
- "eval_F1@M": 0.355847497067776,
813
- "eval_P@10": 0.20800000000000002,
814
- "eval_P@5": 0.40559999999999996,
815
- "eval_P@M": 0.46179999999999993,
816
- "eval_R@10": 0.311403085803086,
817
- "eval_R@5": 0.3045922632922635,
818
- "eval_R@M": 0.311403085803086,
819
- "eval_loss": 2.3786723613739014,
820
- "eval_model_name": "t5-small-keyword-generation-inspec",
821
- "eval_runtime": 272.2042,
822
- "eval_samples_per_second": 1.837,
823
- "eval_steps_per_second": 1.837,
824
  "step": 17000
825
  },
826
  {
827
  "epoch": 17.5,
828
- "learning_rate": 6.635510204081633e-05,
829
- "loss": 0.4734,
830
- "step": 17500
831
- },
832
- {
833
- "epoch": 17.5,
834
- "eval_F1@10": 0.2424905535962076,
835
- "eval_F1@5": 0.33929873949579886,
836
- "eval_F1@M": 0.36014400369152755,
837
- "eval_P@10": 0.20979999999999996,
838
- "eval_P@5": 0.4119999999999998,
839
- "eval_P@M": 0.46821190476190494,
840
- "eval_R@10": 0.3125397380397382,
841
- "eval_R@5": 0.3077346153846155,
842
- "eval_R@M": 0.3125397380397382,
843
- "eval_loss": 2.3801848888397217,
844
- "eval_model_name": "t5-small-keyword-generation-inspec",
845
- "eval_runtime": 269.1685,
846
- "eval_samples_per_second": 1.858,
847
- "eval_steps_per_second": 1.858,
848
  "step": 17500
849
  },
850
  {
851
  "epoch": 18.0,
852
- "learning_rate": 6.533469387755102e-05,
853
- "loss": 0.4685,
854
- "step": 18000
855
- },
856
- {
857
- "epoch": 18.0,
858
- "eval_F1@10": 0.2428809489937239,
859
- "eval_F1@5": 0.34102215725451057,
860
- "eval_F1@M": 0.36136990799018703,
861
- "eval_P@10": 0.20919999999999994,
862
- "eval_P@5": 0.41119999999999984,
863
- "eval_P@M": 0.4681333333333333,
864
- "eval_R@10": 0.31634897324897343,
865
- "eval_R@5": 0.31194796314796325,
866
- "eval_R@M": 0.31634897324897343,
867
- "eval_loss": 2.4091179370880127,
868
- "eval_model_name": "t5-small-keyword-generation-inspec",
869
- "eval_runtime": 264.9477,
870
- "eval_samples_per_second": 1.887,
871
- "eval_steps_per_second": 1.887,
872
  "step": 18000
873
  },
874
  {
875
  "epoch": 18.5,
876
- "learning_rate": 6.431428571428572e-05,
877
- "loss": 0.4574,
878
- "step": 18500
879
- },
880
- {
881
- "epoch": 18.5,
882
- "eval_F1@10": 0.24470615952450722,
883
- "eval_F1@5": 0.34083279204455713,
884
- "eval_F1@M": 0.3591474804177593,
885
- "eval_P@10": 0.21199999999999997,
886
- "eval_P@5": 0.41440000000000005,
887
- "eval_P@M": 0.46359047619047605,
888
- "eval_R@10": 0.31490344655344676,
889
- "eval_R@5": 0.30879514929514945,
890
- "eval_R@M": 0.31490344655344676,
891
- "eval_loss": 2.414991617202759,
892
- "eval_model_name": "t5-small-keyword-generation-inspec",
893
- "eval_runtime": 289.5904,
894
- "eval_samples_per_second": 1.727,
895
- "eval_steps_per_second": 1.727,
896
  "step": 18500
897
  },
898
  {
899
  "epoch": 19.0,
900
- "learning_rate": 6.329387755102041e-05,
901
- "loss": 0.4411,
902
- "step": 19000
903
- },
904
- {
905
- "epoch": 19.0,
906
- "eval_F1@10": 0.2413860020074456,
907
- "eval_F1@5": 0.3365299011446076,
908
- "eval_F1@M": 0.3552537179710558,
909
- "eval_P@10": 0.20879999999999999,
910
- "eval_P@5": 0.4083999999999998,
911
- "eval_P@M": 0.4593714285714285,
912
- "eval_R@10": 0.31052307137307145,
913
- "eval_R@5": 0.3046235042735043,
914
- "eval_R@M": 0.31052307137307145,
915
- "eval_loss": 2.5369248390197754,
916
- "eval_model_name": "t5-small-keyword-generation-inspec",
917
- "eval_runtime": 281.7996,
918
- "eval_samples_per_second": 1.774,
919
- "eval_steps_per_second": 1.774,
920
  "step": 19000
921
  },
922
  {
923
  "epoch": 19.5,
924
- "learning_rate": 6.22734693877551e-05,
925
- "loss": 0.4032,
926
  "step": 19500
927
  },
928
  {
929
- "epoch": 19.5,
930
- "eval_F1@10": 0.2432083462438766,
931
- "eval_F1@5": 0.3403430030753566,
932
- "eval_F1@M": 0.35755804044596656,
933
- "eval_P@10": 0.2102,
934
- "eval_P@5": 0.4132000000000002,
935
- "eval_P@M": 0.4617999999999998,
936
- "eval_R@10": 0.3141284104784106,
937
- "eval_R@5": 0.30864017094017104,
938
- "eval_R@M": 0.3141284104784106,
939
- "eval_loss": 2.560804605484009,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
940
  "eval_model_name": "t5-small-keyword-generation-inspec",
941
- "eval_runtime": 296.5123,
942
- "eval_samples_per_second": 1.686,
943
- "eval_steps_per_second": 1.686,
944
- "step": 19500
945
  },
946
  {
947
- "epoch": 20.0,
948
- "learning_rate": 6.125510204081633e-05,
949
- "loss": 0.4402,
950
- "step": 20000
951
  },
952
  {
953
- "epoch": 20.0,
954
- "eval_F1@10": 0.24569117638243435,
955
- "eval_F1@5": 0.3409062858709922,
956
- "eval_F1@M": 0.3611119221851426,
957
- "eval_P@10": 0.21239999999999998,
958
- "eval_P@5": 0.41359999999999997,
959
- "eval_P@M": 0.4668095238095235,
960
- "eval_R@10": 0.3174460150960153,
961
- "eval_R@5": 0.3095908924408926,
962
- "eval_R@M": 0.3174460150960153,
963
- "eval_loss": 2.5116209983825684,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
964
  "eval_model_name": "t5-small-keyword-generation-inspec",
965
- "eval_runtime": 280.4633,
966
- "eval_samples_per_second": 1.783,
967
- "eval_steps_per_second": 1.783,
968
- "step": 20000
969
  }
970
  ],
971
  "max_steps": 50000,
972
  "num_train_epochs": 50,
973
- "total_flos": 2706836029440000.0,
974
  "trial_name": null,
975
  "trial_params": null
976
  }
 
1
  {
2
+ "best_metric": 0.3600367021867026,
3
+ "best_model_checkpoint": "t5-small-keyword-generation-inspec\\checkpoint-25000",
4
+ "epoch": 50.0,
5
+ "global_step": 50000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
  "epoch": 0.5,
12
+ "learning_rate": 2.465e-05,
13
+ "loss": 2.5363,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  "step": 500
15
  },
16
  {
17
  "epoch": 1.0,
18
+ "learning_rate": 4.965e-05,
19
+ "loss": 1.6228,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  "step": 1000
21
  },
22
  {
23
  "epoch": 1.5,
24
+ "learning_rate": 4.9496938775510204e-05,
25
+ "loss": 1.4329,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  "step": 1500
27
  },
28
  {
29
  "epoch": 2.0,
30
+ "learning_rate": 4.898673469387755e-05,
31
+ "loss": 1.3213,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  "step": 2000
33
  },
34
  {
35
  "epoch": 2.5,
36
+ "learning_rate": 4.8476530612244906e-05,
37
+ "loss": 1.2251,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  "step": 2500
39
  },
40
  {
41
  "epoch": 3.0,
42
+ "learning_rate": 4.7967346938775516e-05,
43
+ "loss": 1.1703,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
  "step": 3000
45
  },
46
  {
47
  "epoch": 3.5,
48
+ "learning_rate": 4.745714285714286e-05,
49
+ "loss": 1.0748,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
  "step": 3500
51
  },
52
  {
53
  "epoch": 4.0,
54
+ "learning_rate": 4.694693877551021e-05,
55
+ "loss": 1.1015,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
  "step": 4000
57
  },
58
  {
59
  "epoch": 4.5,
60
+ "learning_rate": 4.6436734693877554e-05,
61
+ "loss": 0.9512,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
  "step": 4500
63
  },
64
  {
65
  "epoch": 5.0,
66
+ "learning_rate": 4.5927551020408164e-05,
67
+ "loss": 1.0313,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
  "step": 5000
69
  },
70
  {
71
  "epoch": 5.5,
72
+ "learning_rate": 4.5417346938775515e-05,
73
+ "loss": 0.9283,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74
  "step": 5500
75
  },
76
  {
77
  "epoch": 6.0,
78
+ "learning_rate": 4.490714285714286e-05,
79
+ "loss": 0.9154,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80
  "step": 6000
81
  },
82
  {
83
  "epoch": 6.5,
84
+ "learning_rate": 4.43969387755102e-05,
85
+ "loss": 0.8623,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86
  "step": 6500
87
  },
88
  {
89
  "epoch": 7.0,
90
+ "learning_rate": 4.388673469387755e-05,
91
+ "loss": 0.8258,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
92
  "step": 7000
93
  },
94
  {
95
  "epoch": 7.5,
96
+ "learning_rate": 4.337755102040816e-05,
97
+ "loss": 0.7804,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
98
  "step": 7500
99
  },
100
  {
101
  "epoch": 8.0,
102
+ "learning_rate": 4.286734693877551e-05,
103
+ "loss": 0.7949,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
104
  "step": 8000
105
  },
106
  {
107
  "epoch": 8.5,
108
+ "learning_rate": 4.2357142857142864e-05,
109
+ "loss": 0.7321,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
110
  "step": 8500
111
  },
112
  {
113
  "epoch": 9.0,
114
+ "learning_rate": 4.184693877551021e-05,
115
+ "loss": 0.7393,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
116
  "step": 9000
117
  },
118
  {
119
  "epoch": 9.5,
120
+ "learning_rate": 4.133775510204082e-05,
121
+ "loss": 0.6964,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
122
  "step": 9500
123
  },
124
  {
125
  "epoch": 10.0,
126
+ "learning_rate": 4.082755102040817e-05,
127
+ "loss": 0.6759,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
128
  "step": 10000
129
  },
130
  {
131
  "epoch": 10.5,
132
+ "learning_rate": 4.031734693877551e-05,
133
+ "loss": 0.6642,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
134
  "step": 10500
135
  },
136
  {
137
  "epoch": 11.0,
138
+ "learning_rate": 3.980714285714286e-05,
139
+ "loss": 0.6444,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
140
  "step": 11000
141
  },
142
  {
143
  "epoch": 11.5,
144
+ "learning_rate": 3.929693877551021e-05,
145
+ "loss": 0.6389,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
146
  "step": 11500
147
  },
148
  {
149
  "epoch": 12.0,
150
+ "learning_rate": 3.878775510204082e-05,
151
+ "loss": 0.5822,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
152
  "step": 12000
153
  },
154
  {
155
  "epoch": 12.5,
156
+ "learning_rate": 3.827755102040816e-05,
157
+ "loss": 0.5617,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
158
  "step": 12500
159
  },
160
  {
161
  "epoch": 13.0,
162
+ "learning_rate": 3.776734693877551e-05,
163
+ "loss": 0.5885,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
164
  "step": 13000
165
  },
166
  {
167
  "epoch": 13.5,
168
+ "learning_rate": 3.7257142857142856e-05,
169
+ "loss": 0.5307,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
170
  "step": 13500
171
  },
172
  {
173
  "epoch": 14.0,
174
+ "learning_rate": 3.6746938775510206e-05,
175
+ "loss": 0.5597,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
176
  "step": 14000
177
  },
178
  {
179
  "epoch": 14.5,
180
+ "learning_rate": 3.623673469387756e-05,
181
+ "loss": 0.4839,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
182
  "step": 14500
183
  },
184
  {
185
  "epoch": 15.0,
186
+ "learning_rate": 3.57265306122449e-05,
187
+ "loss": 0.5524,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
188
  "step": 15000
189
  },
190
  {
191
  "epoch": 15.5,
192
+ "learning_rate": 3.521632653061225e-05,
193
+ "loss": 0.5058,
194
  "step": 15500
195
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
196
  {
197
  "epoch": 16.0,
198
+ "learning_rate": 3.470714285714286e-05,
199
+ "loss": 0.4807,
 
 
 
 
 
 
 
 
 
 
 
 
200
  "step": 16000
201
  },
202
  {
203
  "epoch": 16.5,
204
+ "learning_rate": 3.4196938775510205e-05,
205
+ "loss": 0.4749,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
206
  "step": 16500
207
  },
208
  {
209
  "epoch": 17.0,
210
+ "learning_rate": 3.3686734693877556e-05,
211
+ "loss": 0.4668,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
212
  "step": 17000
213
  },
214
  {
215
  "epoch": 17.5,
216
+ "learning_rate": 3.31765306122449e-05,
217
+ "loss": 0.4512,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
218
  "step": 17500
219
  },
220
  {
221
  "epoch": 18.0,
222
+ "learning_rate": 3.266734693877551e-05,
223
+ "loss": 0.4506,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
224
  "step": 18000
225
  },
226
  {
227
  "epoch": 18.5,
228
+ "learning_rate": 3.215714285714286e-05,
229
+ "loss": 0.4317,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
230
  "step": 18500
231
  },
232
  {
233
  "epoch": 19.0,
234
+ "learning_rate": 3.1646938775510204e-05,
235
+ "loss": 0.4262,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
236
  "step": 19000
237
  },
238
  {
239
  "epoch": 19.5,
240
+ "learning_rate": 3.113673469387755e-05,
241
+ "loss": 0.3938,
242
  "step": 19500
243
  },
244
  {
245
+ "epoch": 20.0,
246
+ "learning_rate": 3.06265306122449e-05,
247
+ "loss": 0.4165,
248
+ "step": 20000
249
+ },
250
+ {
251
+ "epoch": 20.5,
252
+ "learning_rate": 3.011734693877551e-05,
253
+ "loss": 0.4182,
254
+ "step": 20500
255
+ },
256
+ {
257
+ "epoch": 21.0,
258
+ "learning_rate": 2.960714285714286e-05,
259
+ "loss": 0.3608,
260
+ "step": 21000
261
+ },
262
+ {
263
+ "epoch": 21.5,
264
+ "learning_rate": 2.9096938775510207e-05,
265
+ "loss": 0.377,
266
+ "step": 21500
267
+ },
268
+ {
269
+ "epoch": 22.0,
270
+ "learning_rate": 2.8586734693877554e-05,
271
+ "loss": 0.3845,
272
+ "step": 22000
273
+ },
274
+ {
275
+ "epoch": 22.5,
276
+ "learning_rate": 2.80765306122449e-05,
277
+ "loss": 0.376,
278
+ "step": 22500
279
+ },
280
+ {
281
+ "epoch": 23.0,
282
+ "learning_rate": 2.7566326530612245e-05,
283
+ "loss": 0.3522,
284
+ "step": 23000
285
+ },
286
+ {
287
+ "epoch": 23.5,
288
+ "learning_rate": 2.7056122448979592e-05,
289
+ "loss": 0.3495,
290
+ "step": 23500
291
+ },
292
+ {
293
+ "epoch": 24.0,
294
+ "learning_rate": 2.654591836734694e-05,
295
+ "loss": 0.3539,
296
+ "step": 24000
297
+ },
298
+ {
299
+ "epoch": 24.5,
300
+ "learning_rate": 2.6036734693877553e-05,
301
+ "loss": 0.3357,
302
+ "step": 24500
303
+ },
304
+ {
305
+ "epoch": 25.0,
306
+ "learning_rate": 2.5526530612244897e-05,
307
+ "loss": 0.3341,
308
+ "step": 25000
309
+ },
310
+ {
311
+ "epoch": 25.0,
312
+ "eval_F1@10": 0.22188352801309513,
313
+ "eval_F1@5": 0.32039403374403397,
314
+ "eval_F1@M": 0.3600367021867026,
315
+ "eval_P@10": 0.18379999999999982,
316
+ "eval_P@5": 0.36759999999999965,
317
+ "eval_P@M": 0.48766666666666625,
318
+ "eval_R@10": 0.298240836940837,
319
+ "eval_R@5": 0.298240836940837,
320
+ "eval_R@M": 0.298240836940837,
321
+ "eval_loss": 1.5177357196807861,
322
  "eval_model_name": "t5-small-keyword-generation-inspec",
323
+ "eval_runtime": 282.7,
324
+ "eval_samples_per_second": 1.769,
325
+ "eval_steps_per_second": 1.769,
326
+ "step": 25000
327
  },
328
  {
329
+ "epoch": 25.5,
330
+ "learning_rate": 2.5016326530612244e-05,
331
+ "loss": 0.3203,
332
+ "step": 25500
333
  },
334
  {
335
+ "epoch": 26.0,
336
+ "learning_rate": 2.4506122448979594e-05,
337
+ "loss": 0.3257,
338
+ "step": 26000
339
+ },
340
+ {
341
+ "epoch": 26.5,
342
+ "learning_rate": 2.3995918367346938e-05,
343
+ "loss": 0.3046,
344
+ "step": 26500
345
+ },
346
+ {
347
+ "epoch": 27.0,
348
+ "learning_rate": 2.3485714285714285e-05,
349
+ "loss": 0.3236,
350
+ "step": 27000
351
+ },
352
+ {
353
+ "epoch": 27.5,
354
+ "learning_rate": 2.2975510204081636e-05,
355
+ "loss": 0.2983,
356
+ "step": 27500
357
+ },
358
+ {
359
+ "epoch": 28.0,
360
+ "learning_rate": 2.246530612244898e-05,
361
+ "loss": 0.3063,
362
+ "step": 28000
363
+ },
364
+ {
365
+ "epoch": 28.5,
366
+ "learning_rate": 2.1956122448979593e-05,
367
+ "loss": 0.2928,
368
+ "step": 28500
369
+ },
370
+ {
371
+ "epoch": 29.0,
372
+ "learning_rate": 2.144591836734694e-05,
373
+ "loss": 0.289,
374
+ "step": 29000
375
+ },
376
+ {
377
+ "epoch": 29.5,
378
+ "learning_rate": 2.0935714285714288e-05,
379
+ "loss": 0.2772,
380
+ "step": 29500
381
+ },
382
+ {
383
+ "epoch": 30.0,
384
+ "learning_rate": 2.042551020408163e-05,
385
+ "loss": 0.2835,
386
+ "step": 30000
387
+ },
388
+ {
389
+ "epoch": 30.5,
390
+ "learning_rate": 1.9915306122448982e-05,
391
+ "loss": 0.2804,
392
+ "step": 30500
393
+ },
394
+ {
395
+ "epoch": 31.0,
396
+ "learning_rate": 1.940510204081633e-05,
397
+ "loss": 0.2728,
398
+ "step": 31000
399
+ },
400
+ {
401
+ "epoch": 31.5,
402
+ "learning_rate": 1.8894897959183673e-05,
403
+ "loss": 0.2481,
404
+ "step": 31500
405
+ },
406
+ {
407
+ "epoch": 32.0,
408
+ "learning_rate": 1.838469387755102e-05,
409
+ "loss": 0.2734,
410
+ "step": 32000
411
+ },
412
+ {
413
+ "epoch": 32.5,
414
+ "learning_rate": 1.7874489795918368e-05,
415
+ "loss": 0.2629,
416
+ "step": 32500
417
+ },
418
+ {
419
+ "epoch": 33.0,
420
+ "learning_rate": 1.7365306122448978e-05,
421
+ "loss": 0.2546,
422
+ "step": 33000
423
+ },
424
+ {
425
+ "epoch": 33.5,
426
+ "learning_rate": 1.685510204081633e-05,
427
+ "loss": 0.2454,
428
+ "step": 33500
429
+ },
430
+ {
431
+ "epoch": 34.0,
432
+ "learning_rate": 1.6344897959183676e-05,
433
+ "loss": 0.2456,
434
+ "step": 34000
435
+ },
436
+ {
437
+ "epoch": 34.5,
438
+ "learning_rate": 1.5835714285714286e-05,
439
+ "loss": 0.2618,
440
+ "step": 34500
441
+ },
442
+ {
443
+ "epoch": 35.0,
444
+ "learning_rate": 1.5325510204081633e-05,
445
+ "loss": 0.2362,
446
+ "step": 35000
447
+ },
448
+ {
449
+ "epoch": 35.5,
450
+ "learning_rate": 1.481530612244898e-05,
451
+ "loss": 0.2249,
452
+ "step": 35500
453
+ },
454
+ {
455
+ "epoch": 36.0,
456
+ "learning_rate": 1.4305102040816326e-05,
457
+ "loss": 0.2566,
458
+ "step": 36000
459
+ },
460
+ {
461
+ "epoch": 36.5,
462
+ "learning_rate": 1.3794897959183675e-05,
463
+ "loss": 0.2247,
464
+ "step": 36500
465
+ },
466
+ {
467
+ "epoch": 37.0,
468
+ "learning_rate": 1.3285714285714288e-05,
469
+ "loss": 0.2434,
470
+ "step": 37000
471
+ },
472
+ {
473
+ "epoch": 37.5,
474
+ "learning_rate": 1.2775510204081634e-05,
475
+ "loss": 0.2291,
476
+ "step": 37500
477
+ },
478
+ {
479
+ "epoch": 38.0,
480
+ "learning_rate": 1.2265306122448981e-05,
481
+ "loss": 0.2301,
482
+ "step": 38000
483
+ },
484
+ {
485
+ "epoch": 38.5,
486
+ "learning_rate": 1.1755102040816326e-05,
487
+ "loss": 0.2281,
488
+ "step": 38500
489
+ },
490
+ {
491
+ "epoch": 39.0,
492
+ "learning_rate": 1.1244897959183674e-05,
493
+ "loss": 0.2203,
494
+ "step": 39000
495
+ },
496
+ {
497
+ "epoch": 39.5,
498
+ "learning_rate": 1.073469387755102e-05,
499
+ "loss": 0.2129,
500
+ "step": 39500
501
+ },
502
+ {
503
+ "epoch": 40.0,
504
+ "learning_rate": 1.0225510204081633e-05,
505
+ "loss": 0.2258,
506
+ "step": 40000
507
+ },
508
+ {
509
+ "epoch": 40.5,
510
+ "learning_rate": 9.71530612244898e-06,
511
+ "loss": 0.2254,
512
+ "step": 40500
513
+ },
514
+ {
515
+ "epoch": 41.0,
516
+ "learning_rate": 9.205102040816327e-06,
517
+ "loss": 0.208,
518
+ "step": 41000
519
+ },
520
+ {
521
+ "epoch": 41.5,
522
+ "learning_rate": 8.694897959183673e-06,
523
+ "loss": 0.2295,
524
+ "step": 41500
525
+ },
526
+ {
527
+ "epoch": 42.0,
528
+ "learning_rate": 8.184693877551021e-06,
529
+ "loss": 0.1969,
530
+ "step": 42000
531
+ },
532
+ {
533
+ "epoch": 42.5,
534
+ "learning_rate": 7.674489795918367e-06,
535
+ "loss": 0.2219,
536
+ "step": 42500
537
+ },
538
+ {
539
+ "epoch": 43.0,
540
+ "learning_rate": 7.164285714285715e-06,
541
+ "loss": 0.2032,
542
+ "step": 43000
543
+ },
544
+ {
545
+ "epoch": 43.5,
546
+ "learning_rate": 6.654081632653061e-06,
547
+ "loss": 0.2145,
548
+ "step": 43500
549
+ },
550
+ {
551
+ "epoch": 44.0,
552
+ "learning_rate": 6.144897959183673e-06,
553
+ "loss": 0.1968,
554
+ "step": 44000
555
+ },
556
+ {
557
+ "epoch": 44.5,
558
+ "learning_rate": 5.6346938775510205e-06,
559
+ "loss": 0.1956,
560
+ "step": 44500
561
+ },
562
+ {
563
+ "epoch": 45.0,
564
+ "learning_rate": 5.124489795918368e-06,
565
+ "loss": 0.212,
566
+ "step": 45000
567
+ },
568
+ {
569
+ "epoch": 45.5,
570
+ "learning_rate": 4.614285714285715e-06,
571
+ "loss": 0.2036,
572
+ "step": 45500
573
+ },
574
+ {
575
+ "epoch": 46.0,
576
+ "learning_rate": 4.104081632653061e-06,
577
+ "loss": 0.2101,
578
+ "step": 46000
579
+ },
580
+ {
581
+ "epoch": 46.5,
582
+ "learning_rate": 3.594897959183673e-06,
583
+ "loss": 0.1981,
584
+ "step": 46500
585
+ },
586
+ {
587
+ "epoch": 47.0,
588
+ "learning_rate": 3.0846938775510208e-06,
589
+ "loss": 0.2008,
590
+ "step": 47000
591
+ },
592
+ {
593
+ "epoch": 47.5,
594
+ "learning_rate": 2.575510204081633e-06,
595
+ "loss": 0.209,
596
+ "step": 47500
597
+ },
598
+ {
599
+ "epoch": 48.0,
600
+ "learning_rate": 2.0653061224489795e-06,
601
+ "loss": 0.1872,
602
+ "step": 48000
603
+ },
604
+ {
605
+ "epoch": 48.5,
606
+ "learning_rate": 1.5551020408163267e-06,
607
+ "loss": 0.1778,
608
+ "step": 48500
609
+ },
610
+ {
611
+ "epoch": 49.0,
612
+ "learning_rate": 1.0448979591836734e-06,
613
+ "loss": 0.2174,
614
+ "step": 49000
615
+ },
616
+ {
617
+ "epoch": 49.5,
618
+ "learning_rate": 5.346938775510204e-07,
619
+ "loss": 0.1982,
620
+ "step": 49500
621
+ },
622
+ {
623
+ "epoch": 50.0,
624
+ "learning_rate": 2.4489795918367346e-08,
625
+ "loss": 0.195,
626
+ "step": 50000
627
+ },
628
+ {
629
+ "epoch": 50.0,
630
+ "eval_F1@10": 0.21505513330838452,
631
+ "eval_F1@5": 0.31023088578088626,
632
+ "eval_F1@M": 0.3480389832389839,
633
+ "eval_P@10": 0.17839999999999995,
634
+ "eval_P@5": 0.3567999999999999,
635
+ "eval_P@M": 0.4749999999999997,
636
+ "eval_R@10": 0.28769942279942284,
637
+ "eval_R@5": 0.28769942279942284,
638
+ "eval_R@M": 0.28769942279942284,
639
+ "eval_loss": 1.8302260637283325,
640
  "eval_model_name": "t5-small-keyword-generation-inspec",
641
+ "eval_runtime": 260.956,
642
+ "eval_samples_per_second": 1.916,
643
+ "eval_steps_per_second": 1.916,
644
+ "step": 50000
645
  }
646
  ],
647
  "max_steps": 50000,
648
  "num_train_epochs": 50,
649
+ "total_flos": 6767090073600000.0,
650
  "trial_name": null,
651
  "trial_params": null
652
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ed43e6d8f4ec51ea2daf73c228ed358c1a0233bfe7c438145f1d47220364581b
3
  size 3183
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:11b98b1f77683211ae49f76b3771dd56ff856da5dd007e283b2c12afb3eef986
3
  size 3183