colerobertson commited on
Commit
19d8036
1 Parent(s): eaab99b

Training in progress, epoch 5

Browse files
Files changed (39) hide show
  1. model.safetensors +1 -1
  2. run-0/checkpoint-120/config.json +80 -0
  3. run-0/checkpoint-120/model.safetensors +3 -0
  4. run-0/checkpoint-120/optimizer.pt +3 -0
  5. run-0/checkpoint-120/preprocessor_config.json +9 -0
  6. run-0/checkpoint-120/rng_state.pth +3 -0
  7. run-0/checkpoint-120/scheduler.pt +3 -0
  8. run-0/checkpoint-120/trainer_state.json +237 -0
  9. run-0/checkpoint-120/training_args.bin +3 -0
  10. run-0/checkpoint-24/config.json +80 -0
  11. run-0/checkpoint-24/model.safetensors +3 -0
  12. run-0/checkpoint-24/optimizer.pt +3 -0
  13. run-0/checkpoint-24/preprocessor_config.json +9 -0
  14. run-0/checkpoint-24/rng_state.pth +3 -0
  15. run-0/checkpoint-24/scheduler.pt +3 -0
  16. run-0/checkpoint-24/trainer_state.json +61 -0
  17. run-0/checkpoint-24/training_args.bin +3 -0
  18. run-0/checkpoint-48/model.safetensors +1 -1
  19. run-0/checkpoint-48/optimizer.pt +1 -1
  20. run-0/checkpoint-48/rng_state.pth +1 -1
  21. run-0/checkpoint-48/scheduler.pt +1 -1
  22. run-0/checkpoint-48/trainer_state.json +56 -65
  23. run-0/checkpoint-48/training_args.bin +1 -1
  24. run-0/checkpoint-72/config.json +80 -0
  25. run-0/checkpoint-72/model.safetensors +3 -0
  26. run-0/checkpoint-72/optimizer.pt +3 -0
  27. run-0/checkpoint-72/preprocessor_config.json +9 -0
  28. run-0/checkpoint-72/rng_state.pth +3 -0
  29. run-0/checkpoint-72/scheduler.pt +3 -0
  30. run-0/checkpoint-72/trainer_state.json +149 -0
  31. run-0/checkpoint-72/training_args.bin +3 -0
  32. run-0/checkpoint-96/model.safetensors +1 -1
  33. run-0/checkpoint-96/optimizer.pt +1 -1
  34. run-0/checkpoint-96/rng_state.pth +1 -1
  35. run-0/checkpoint-96/scheduler.pt +1 -1
  36. run-0/checkpoint-96/trainer_state.json +111 -129
  37. run-0/checkpoint-96/training_args.bin +1 -1
  38. runs/Mar03_18-24-09_ca56ea9bc35e/events.out.tfevents.1709497668.ca56ea9bc35e.3883.24 +3 -0
  39. training_args.bin +1 -1
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c21673226edbab6b566ba6d5fd266227f96e9587c2c6a57041fb29591701883f
3
  size 94763496
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4970a2abd2c86347378245a23a90bcb1c03225871c0732deda4b110c1402b05e
3
  size 94763496
run-0/checkpoint-120/config.json ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "ntu-spml/distilhubert",
3
+ "activation_dropout": 0.1,
4
+ "apply_spec_augment": false,
5
+ "architectures": [
6
+ "HubertForSequenceClassification"
7
+ ],
8
+ "attention_dropout": 0.1,
9
+ "bos_token_id": 1,
10
+ "classifier_proj_size": 256,
11
+ "conv_bias": false,
12
+ "conv_dim": [
13
+ 512,
14
+ 512,
15
+ 512,
16
+ 512,
17
+ 512,
18
+ 512,
19
+ 512
20
+ ],
21
+ "conv_kernel": [
22
+ 10,
23
+ 3,
24
+ 3,
25
+ 3,
26
+ 3,
27
+ 2,
28
+ 2
29
+ ],
30
+ "conv_stride": [
31
+ 5,
32
+ 2,
33
+ 2,
34
+ 2,
35
+ 2,
36
+ 2,
37
+ 2
38
+ ],
39
+ "ctc_loss_reduction": "sum",
40
+ "ctc_zero_infinity": false,
41
+ "do_stable_layer_norm": false,
42
+ "eos_token_id": 2,
43
+ "feat_extract_activation": "gelu",
44
+ "feat_extract_norm": "group",
45
+ "feat_proj_dropout": 0.0,
46
+ "feat_proj_layer_norm": false,
47
+ "final_dropout": 0.0,
48
+ "hidden_act": "gelu",
49
+ "hidden_dropout": 0.1,
50
+ "hidden_size": 768,
51
+ "id2label": {
52
+ "0": "NOT_WORD",
53
+ "1": "WORD"
54
+ },
55
+ "initializer_range": 0.02,
56
+ "intermediate_size": 3072,
57
+ "label2id": {
58
+ "NOT_WORD": "0",
59
+ "WORD": "1"
60
+ },
61
+ "layer_norm_eps": 1e-05,
62
+ "layerdrop": 0.0,
63
+ "mask_feature_length": 10,
64
+ "mask_feature_min_masks": 0,
65
+ "mask_feature_prob": 0.0,
66
+ "mask_time_length": 10,
67
+ "mask_time_min_masks": 2,
68
+ "mask_time_prob": 0.05,
69
+ "model_type": "hubert",
70
+ "num_attention_heads": 12,
71
+ "num_conv_pos_embedding_groups": 16,
72
+ "num_conv_pos_embeddings": 128,
73
+ "num_feat_extract_layers": 7,
74
+ "num_hidden_layers": 2,
75
+ "pad_token_id": 0,
76
+ "torch_dtype": "float32",
77
+ "transformers_version": "4.38.1",
78
+ "use_weighted_layer_sum": false,
79
+ "vocab_size": 32
80
+ }
run-0/checkpoint-120/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4970a2abd2c86347378245a23a90bcb1c03225871c0732deda4b110c1402b05e
3
+ size 94763496
run-0/checkpoint-120/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:be897b050782e3854dad80b8194d6dc55b64e92a9f6a8de27ffb6bc2f519f358
3
+ size 189552570
run-0/checkpoint-120/preprocessor_config.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_normalize": true,
3
+ "feature_extractor_type": "Wav2Vec2FeatureExtractor",
4
+ "feature_size": 1,
5
+ "padding_side": "right",
6
+ "padding_value": 0,
7
+ "return_attention_mask": true,
8
+ "sampling_rate": 16000
9
+ }
run-0/checkpoint-120/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ea9b93979db9d187112d0877ca456edd569c23f080722f2fbbe337c28c1a6935
3
+ size 14244
run-0/checkpoint-120/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0473f5ebc64129e90069538909f48d93066ac572290ce5451bdd20c91af3d131
3
+ size 1064
run-0/checkpoint-120/trainer_state.json ADDED
@@ -0,0 +1,237 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.7326732673267327,
3
+ "best_model_checkpoint": "distilhubert-finetuned-not-a-word2/run-0/checkpoint-24",
4
+ "epoch": 5.0,
5
+ "eval_steps": 500,
6
+ "global_step": 120,
7
+ "is_hyper_param_search": true,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.21,
13
+ "grad_norm": 1.6479910612106323,
14
+ "learning_rate": 3.3535706116592527e-06,
15
+ "loss": 0.699,
16
+ "step": 5
17
+ },
18
+ {
19
+ "epoch": 0.42,
20
+ "grad_norm": 0.6159653663635254,
21
+ "learning_rate": 6.707141223318505e-06,
22
+ "loss": 0.6932,
23
+ "step": 10
24
+ },
25
+ {
26
+ "epoch": 0.62,
27
+ "grad_norm": 0.5607424974441528,
28
+ "learning_rate": 1.0060711834977758e-05,
29
+ "loss": 0.6812,
30
+ "step": 15
31
+ },
32
+ {
33
+ "epoch": 0.83,
34
+ "grad_norm": 1.401130199432373,
35
+ "learning_rate": 1.341428244663701e-05,
36
+ "loss": 0.6633,
37
+ "step": 20
38
+ },
39
+ {
40
+ "epoch": 1.0,
41
+ "eval_f1": 0.7326732673267327,
42
+ "eval_loss": 0.680633544921875,
43
+ "eval_runtime": 1.3853,
44
+ "eval_samples_per_second": 46.201,
45
+ "eval_steps_per_second": 5.775,
46
+ "step": 24
47
+ },
48
+ {
49
+ "epoch": 1.04,
50
+ "grad_norm": 0.9690802693367004,
51
+ "learning_rate": 1.602261514459421e-05,
52
+ "loss": 0.6644,
53
+ "step": 25
54
+ },
55
+ {
56
+ "epoch": 1.25,
57
+ "grad_norm": 1.8331551551818848,
58
+ "learning_rate": 1.564999618774318e-05,
59
+ "loss": 0.6073,
60
+ "step": 30
61
+ },
62
+ {
63
+ "epoch": 1.46,
64
+ "grad_norm": 1.799914002418518,
65
+ "learning_rate": 1.527737723089215e-05,
66
+ "loss": 0.5805,
67
+ "step": 35
68
+ },
69
+ {
70
+ "epoch": 1.67,
71
+ "grad_norm": 0.515367865562439,
72
+ "learning_rate": 1.4904758274041123e-05,
73
+ "loss": 0.6465,
74
+ "step": 40
75
+ },
76
+ {
77
+ "epoch": 1.88,
78
+ "grad_norm": 0.6179113388061523,
79
+ "learning_rate": 1.4532139317190096e-05,
80
+ "loss": 0.6147,
81
+ "step": 45
82
+ },
83
+ {
84
+ "epoch": 2.0,
85
+ "eval_f1": 0.7326732673267327,
86
+ "eval_loss": 0.701690673828125,
87
+ "eval_runtime": 1.4198,
88
+ "eval_samples_per_second": 45.078,
89
+ "eval_steps_per_second": 5.635,
90
+ "step": 48
91
+ },
92
+ {
93
+ "epoch": 2.08,
94
+ "grad_norm": 0.5513622760772705,
95
+ "learning_rate": 1.4159520360339068e-05,
96
+ "loss": 0.6301,
97
+ "step": 50
98
+ },
99
+ {
100
+ "epoch": 2.29,
101
+ "grad_norm": 0.5830497741699219,
102
+ "learning_rate": 1.3786901403488039e-05,
103
+ "loss": 0.5836,
104
+ "step": 55
105
+ },
106
+ {
107
+ "epoch": 2.5,
108
+ "grad_norm": 0.6891571879386902,
109
+ "learning_rate": 1.341428244663701e-05,
110
+ "loss": 0.6309,
111
+ "step": 60
112
+ },
113
+ {
114
+ "epoch": 2.71,
115
+ "grad_norm": 1.3063991069793701,
116
+ "learning_rate": 1.3041663489785983e-05,
117
+ "loss": 0.6199,
118
+ "step": 65
119
+ },
120
+ {
121
+ "epoch": 2.92,
122
+ "grad_norm": 1.164373517036438,
123
+ "learning_rate": 1.2669044532934955e-05,
124
+ "loss": 0.4871,
125
+ "step": 70
126
+ },
127
+ {
128
+ "epoch": 3.0,
129
+ "eval_f1": 0.7326732673267327,
130
+ "eval_loss": 0.7076148986816406,
131
+ "eval_runtime": 1.4157,
132
+ "eval_samples_per_second": 45.206,
133
+ "eval_steps_per_second": 5.651,
134
+ "step": 72
135
+ },
136
+ {
137
+ "epoch": 3.12,
138
+ "grad_norm": 0.5683964490890503,
139
+ "learning_rate": 1.2296425576083926e-05,
140
+ "loss": 0.5943,
141
+ "step": 75
142
+ },
143
+ {
144
+ "epoch": 3.33,
145
+ "grad_norm": 0.5915613770484924,
146
+ "learning_rate": 1.1923806619232898e-05,
147
+ "loss": 0.6051,
148
+ "step": 80
149
+ },
150
+ {
151
+ "epoch": 3.54,
152
+ "grad_norm": 1.4184000492095947,
153
+ "learning_rate": 1.155118766238187e-05,
154
+ "loss": 0.487,
155
+ "step": 85
156
+ },
157
+ {
158
+ "epoch": 3.75,
159
+ "grad_norm": 1.1929051876068115,
160
+ "learning_rate": 1.1178568705530843e-05,
161
+ "loss": 0.6096,
162
+ "step": 90
163
+ },
164
+ {
165
+ "epoch": 3.96,
166
+ "grad_norm": 1.1995503902435303,
167
+ "learning_rate": 1.0805949748679813e-05,
168
+ "loss": 0.5922,
169
+ "step": 95
170
+ },
171
+ {
172
+ "epoch": 4.0,
173
+ "eval_f1": 0.7326732673267327,
174
+ "eval_loss": 0.7007102966308594,
175
+ "eval_runtime": 1.4297,
176
+ "eval_samples_per_second": 44.764,
177
+ "eval_steps_per_second": 5.596,
178
+ "step": 96
179
+ },
180
+ {
181
+ "epoch": 4.17,
182
+ "grad_norm": 0.9711471796035767,
183
+ "learning_rate": 1.0433330791828785e-05,
184
+ "loss": 0.5077,
185
+ "step": 100
186
+ },
187
+ {
188
+ "epoch": 4.38,
189
+ "grad_norm": 2.008340358734131,
190
+ "learning_rate": 1.0060711834977758e-05,
191
+ "loss": 0.5911,
192
+ "step": 105
193
+ },
194
+ {
195
+ "epoch": 4.58,
196
+ "grad_norm": 0.9180999398231506,
197
+ "learning_rate": 9.68809287812673e-06,
198
+ "loss": 0.5369,
199
+ "step": 110
200
+ },
201
+ {
202
+ "epoch": 4.79,
203
+ "grad_norm": 1.1544266939163208,
204
+ "learning_rate": 9.315473921275702e-06,
205
+ "loss": 0.5334,
206
+ "step": 115
207
+ },
208
+ {
209
+ "epoch": 5.0,
210
+ "grad_norm": 2.4023351669311523,
211
+ "learning_rate": 8.942854964424674e-06,
212
+ "loss": 0.5347,
213
+ "step": 120
214
+ },
215
+ {
216
+ "epoch": 5.0,
217
+ "eval_f1": 0.7326732673267327,
218
+ "eval_loss": 0.6761245727539062,
219
+ "eval_runtime": 1.3752,
220
+ "eval_samples_per_second": 46.538,
221
+ "eval_steps_per_second": 5.817,
222
+ "step": 120
223
+ }
224
+ ],
225
+ "logging_steps": 5,
226
+ "max_steps": 240,
227
+ "num_input_tokens_seen": 0,
228
+ "num_train_epochs": 10,
229
+ "save_steps": 500,
230
+ "total_flos": 4193401989215328.0,
231
+ "train_batch_size": 8,
232
+ "trial_name": null,
233
+ "trial_params": {
234
+ "learning_rate": 1.6097138935964413e-05,
235
+ "per_device_train_batch_size": 8
236
+ }
237
+ }
run-0/checkpoint-120/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:69b0aae1b6cc94401448b152c38be14ffcd8839b1499a5701f300de8efdf55e0
3
+ size 4920
run-0/checkpoint-24/config.json ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "ntu-spml/distilhubert",
3
+ "activation_dropout": 0.1,
4
+ "apply_spec_augment": false,
5
+ "architectures": [
6
+ "HubertForSequenceClassification"
7
+ ],
8
+ "attention_dropout": 0.1,
9
+ "bos_token_id": 1,
10
+ "classifier_proj_size": 256,
11
+ "conv_bias": false,
12
+ "conv_dim": [
13
+ 512,
14
+ 512,
15
+ 512,
16
+ 512,
17
+ 512,
18
+ 512,
19
+ 512
20
+ ],
21
+ "conv_kernel": [
22
+ 10,
23
+ 3,
24
+ 3,
25
+ 3,
26
+ 3,
27
+ 2,
28
+ 2
29
+ ],
30
+ "conv_stride": [
31
+ 5,
32
+ 2,
33
+ 2,
34
+ 2,
35
+ 2,
36
+ 2,
37
+ 2
38
+ ],
39
+ "ctc_loss_reduction": "sum",
40
+ "ctc_zero_infinity": false,
41
+ "do_stable_layer_norm": false,
42
+ "eos_token_id": 2,
43
+ "feat_extract_activation": "gelu",
44
+ "feat_extract_norm": "group",
45
+ "feat_proj_dropout": 0.0,
46
+ "feat_proj_layer_norm": false,
47
+ "final_dropout": 0.0,
48
+ "hidden_act": "gelu",
49
+ "hidden_dropout": 0.1,
50
+ "hidden_size": 768,
51
+ "id2label": {
52
+ "0": "NOT_WORD",
53
+ "1": "WORD"
54
+ },
55
+ "initializer_range": 0.02,
56
+ "intermediate_size": 3072,
57
+ "label2id": {
58
+ "NOT_WORD": "0",
59
+ "WORD": "1"
60
+ },
61
+ "layer_norm_eps": 1e-05,
62
+ "layerdrop": 0.0,
63
+ "mask_feature_length": 10,
64
+ "mask_feature_min_masks": 0,
65
+ "mask_feature_prob": 0.0,
66
+ "mask_time_length": 10,
67
+ "mask_time_min_masks": 2,
68
+ "mask_time_prob": 0.05,
69
+ "model_type": "hubert",
70
+ "num_attention_heads": 12,
71
+ "num_conv_pos_embedding_groups": 16,
72
+ "num_conv_pos_embeddings": 128,
73
+ "num_feat_extract_layers": 7,
74
+ "num_hidden_layers": 2,
75
+ "pad_token_id": 0,
76
+ "torch_dtype": "float32",
77
+ "transformers_version": "4.38.1",
78
+ "use_weighted_layer_sum": false,
79
+ "vocab_size": 32
80
+ }
run-0/checkpoint-24/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:203ce75c172df60c43147ad639437bfae06a12fde458f545bcfa9566f71c66e2
3
+ size 94763496
run-0/checkpoint-24/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:44afb79906c8bbc5f625669f0a8610ac3248791cfe01df7397ef34f934faf328
3
+ size 189552570
run-0/checkpoint-24/preprocessor_config.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_normalize": true,
3
+ "feature_extractor_type": "Wav2Vec2FeatureExtractor",
4
+ "feature_size": 1,
5
+ "padding_side": "right",
6
+ "padding_value": 0,
7
+ "return_attention_mask": true,
8
+ "sampling_rate": 16000
9
+ }
run-0/checkpoint-24/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b5fd464cfdc34ccc4a78e84a563ff84847a7f2ca68f8d1be703c5be378ce9c86
3
+ size 14244
run-0/checkpoint-24/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:10b20289c4976533c8450c5bc9f8a90aa07e8c63cc39a142065bd92dfdbe4da6
3
+ size 1064
run-0/checkpoint-24/trainer_state.json ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.7326732673267327,
3
+ "best_model_checkpoint": "distilhubert-finetuned-not-a-word2/run-0/checkpoint-24",
4
+ "epoch": 1.0,
5
+ "eval_steps": 500,
6
+ "global_step": 24,
7
+ "is_hyper_param_search": true,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.21,
13
+ "grad_norm": 1.6479910612106323,
14
+ "learning_rate": 3.3535706116592527e-06,
15
+ "loss": 0.699,
16
+ "step": 5
17
+ },
18
+ {
19
+ "epoch": 0.42,
20
+ "grad_norm": 0.6159653663635254,
21
+ "learning_rate": 6.707141223318505e-06,
22
+ "loss": 0.6932,
23
+ "step": 10
24
+ },
25
+ {
26
+ "epoch": 0.62,
27
+ "grad_norm": 0.5607424974441528,
28
+ "learning_rate": 1.0060711834977758e-05,
29
+ "loss": 0.6812,
30
+ "step": 15
31
+ },
32
+ {
33
+ "epoch": 0.83,
34
+ "grad_norm": 1.401130199432373,
35
+ "learning_rate": 1.341428244663701e-05,
36
+ "loss": 0.6633,
37
+ "step": 20
38
+ },
39
+ {
40
+ "epoch": 1.0,
41
+ "eval_f1": 0.7326732673267327,
42
+ "eval_loss": 0.680633544921875,
43
+ "eval_runtime": 1.3853,
44
+ "eval_samples_per_second": 46.201,
45
+ "eval_steps_per_second": 5.775,
46
+ "step": 24
47
+ }
48
+ ],
49
+ "logging_steps": 5,
50
+ "max_steps": 240,
51
+ "num_input_tokens_seen": 0,
52
+ "num_train_epochs": 10,
53
+ "save_steps": 500,
54
+ "total_flos": 670768569859200.0,
55
+ "train_batch_size": 8,
56
+ "trial_name": null,
57
+ "trial_params": {
58
+ "learning_rate": 1.6097138935964413e-05,
59
+ "per_device_train_batch_size": 8
60
+ }
61
+ }
run-0/checkpoint-24/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:69b0aae1b6cc94401448b152c38be14ffcd8839b1499a5701f300de8efdf55e0
3
+ size 4920
run-0/checkpoint-48/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:57423a91e6c057a636223c342a1b20d353153082c46be8db94355dcf9a5a8ffe
3
  size 94763496
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:23842749f34d7cb29a9ff9626a7713537ce548c200d8713e828ad0cad4a9c15d
3
  size 94763496
run-0/checkpoint-48/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:72149d39a6e8251f868d0756d8f90b67d2989af4c30ffd230bcbb36dd736bd5b
3
  size 189552570
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:11c2cb808887601222fb9cf33c2d409d468ab8f8b241ccc1a2909d3ce6a5d95a
3
  size 189552570
run-0/checkpoint-48/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6057f3e71568989f3d2442d841f7f161902200ee453a6d60795ac4142ad66214
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0ea0f2253490889e9b5dbea96976236c3c1f025b5a24179f4bcaa44eec621be7
3
  size 14244
run-0/checkpoint-48/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1aad290205a8004232c0f396145527b973f24efe72219a18fc11c5367642bb72
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d11d359047a2e15674e19d2520244262d0d6bc2ec5ccc284a4928c5b79947d31
3
  size 1064
run-0/checkpoint-48/trainer_state.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "best_metric": 0.7326732673267327,
3
- "best_model_checkpoint": "distilhubert-finetuned-not-a-word2/run-0/checkpoint-16",
4
- "epoch": 3.0,
5
  "eval_steps": 500,
6
  "global_step": 48,
7
  "is_hyper_param_search": true,
@@ -9,106 +9,97 @@
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.31,
13
- "grad_norm": 0.8255003094673157,
14
- "learning_rate": 1.0604552517079013e-05,
15
- "loss": 0.6964,
16
  "step": 5
17
  },
18
  {
19
- "epoch": 0.62,
20
- "grad_norm": 0.6656044125556946,
21
- "learning_rate": 2.1209105034158027e-05,
22
- "loss": 0.6791,
23
  "step": 10
24
  },
25
  {
26
- "epoch": 0.94,
27
- "grad_norm": 0.6010500192642212,
28
- "learning_rate": 3.181365755123704e-05,
29
- "loss": 0.6408,
30
  "step": 15
31
  },
32
  {
33
- "epoch": 1.0,
34
- "eval_f1": 0.7326732673267327,
35
- "eval_loss": 0.68890380859375,
36
- "eval_runtime": 1.415,
37
- "eval_samples_per_second": 45.23,
38
- "eval_steps_per_second": 5.654,
39
- "step": 16
40
  },
41
  {
42
- "epoch": 1.25,
43
- "grad_norm": 1.3279176950454712,
44
- "learning_rate": 3.299194116424582e-05,
45
- "loss": 0.6072,
46
- "step": 20
 
 
47
  },
48
  {
49
- "epoch": 1.56,
50
- "grad_norm": 0.4112571179866791,
51
- "learning_rate": 3.181365755123704e-05,
52
- "loss": 0.598,
53
  "step": 25
54
  },
55
  {
56
- "epoch": 1.88,
57
- "grad_norm": 0.5132727026939392,
58
- "learning_rate": 3.0635373938228256e-05,
59
- "loss": 0.6135,
60
  "step": 30
61
  },
62
  {
63
- "epoch": 2.0,
64
- "eval_f1": 0.7326732673267327,
65
- "eval_loss": 0.7214889526367188,
66
- "eval_runtime": 1.4006,
67
- "eval_samples_per_second": 45.693,
68
- "eval_steps_per_second": 5.712,
69
- "step": 32
70
- },
71
- {
72
- "epoch": 2.19,
73
- "grad_norm": 0.758405327796936,
74
- "learning_rate": 2.945709032521948e-05,
75
- "loss": 0.5972,
76
  "step": 35
77
  },
78
  {
79
- "epoch": 2.5,
80
- "grad_norm": 0.5937344431877136,
81
- "learning_rate": 2.82788067122107e-05,
82
- "loss": 0.6351,
83
  "step": 40
84
  },
85
  {
86
- "epoch": 2.81,
87
- "grad_norm": 1.0145654678344727,
88
- "learning_rate": 2.7100523099201923e-05,
89
- "loss": 0.5693,
90
  "step": 45
91
  },
92
  {
93
- "epoch": 3.0,
94
  "eval_f1": 0.7326732673267327,
95
- "eval_loss": 0.698272705078125,
96
- "eval_runtime": 1.441,
97
- "eval_samples_per_second": 44.413,
98
- "eval_steps_per_second": 5.552,
99
  "step": 48
100
  }
101
  ],
102
  "logging_steps": 5,
103
- "max_steps": 160,
104
  "num_input_tokens_seen": 0,
105
  "num_train_epochs": 10,
106
  "save_steps": 500,
107
- "total_flos": 2550470417237664.0,
108
- "train_batch_size": 12,
109
  "trial_name": null,
110
  "trial_params": {
111
- "learning_rate": 3.393456805465284e-05,
112
- "per_device_train_batch_size": 12
113
  }
114
  }
 
1
  {
2
  "best_metric": 0.7326732673267327,
3
+ "best_model_checkpoint": "distilhubert-finetuned-not-a-word2/run-0/checkpoint-24",
4
+ "epoch": 2.0,
5
  "eval_steps": 500,
6
  "global_step": 48,
7
  "is_hyper_param_search": true,
 
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.21,
13
+ "grad_norm": 1.6479910612106323,
14
+ "learning_rate": 3.3535706116592527e-06,
15
+ "loss": 0.699,
16
  "step": 5
17
  },
18
  {
19
+ "epoch": 0.42,
20
+ "grad_norm": 0.6159653663635254,
21
+ "learning_rate": 6.707141223318505e-06,
22
+ "loss": 0.6932,
23
  "step": 10
24
  },
25
  {
26
+ "epoch": 0.62,
27
+ "grad_norm": 0.5607424974441528,
28
+ "learning_rate": 1.0060711834977758e-05,
29
+ "loss": 0.6812,
30
  "step": 15
31
  },
32
  {
33
+ "epoch": 0.83,
34
+ "grad_norm": 1.401130199432373,
35
+ "learning_rate": 1.341428244663701e-05,
36
+ "loss": 0.6633,
37
+ "step": 20
 
 
38
  },
39
  {
40
+ "epoch": 1.0,
41
+ "eval_f1": 0.7326732673267327,
42
+ "eval_loss": 0.680633544921875,
43
+ "eval_runtime": 1.3853,
44
+ "eval_samples_per_second": 46.201,
45
+ "eval_steps_per_second": 5.775,
46
+ "step": 24
47
  },
48
  {
49
+ "epoch": 1.04,
50
+ "grad_norm": 0.9690802693367004,
51
+ "learning_rate": 1.602261514459421e-05,
52
+ "loss": 0.6644,
53
  "step": 25
54
  },
55
  {
56
+ "epoch": 1.25,
57
+ "grad_norm": 1.8331551551818848,
58
+ "learning_rate": 1.564999618774318e-05,
59
+ "loss": 0.6073,
60
  "step": 30
61
  },
62
  {
63
+ "epoch": 1.46,
64
+ "grad_norm": 1.799914002418518,
65
+ "learning_rate": 1.527737723089215e-05,
66
+ "loss": 0.5805,
 
 
 
 
 
 
 
 
 
67
  "step": 35
68
  },
69
  {
70
+ "epoch": 1.67,
71
+ "grad_norm": 0.515367865562439,
72
+ "learning_rate": 1.4904758274041123e-05,
73
+ "loss": 0.6465,
74
  "step": 40
75
  },
76
  {
77
+ "epoch": 1.88,
78
+ "grad_norm": 0.6179113388061523,
79
+ "learning_rate": 1.4532139317190096e-05,
80
+ "loss": 0.6147,
81
  "step": 45
82
  },
83
  {
84
+ "epoch": 2.0,
85
  "eval_f1": 0.7326732673267327,
86
+ "eval_loss": 0.701690673828125,
87
+ "eval_runtime": 1.4198,
88
+ "eval_samples_per_second": 45.078,
89
+ "eval_steps_per_second": 5.635,
90
  "step": 48
91
  }
92
  ],
93
  "logging_steps": 5,
94
+ "max_steps": 240,
95
  "num_input_tokens_seen": 0,
96
  "num_train_epochs": 10,
97
  "save_steps": 500,
98
+ "total_flos": 1570339835242944.0,
99
+ "train_batch_size": 8,
100
  "trial_name": null,
101
  "trial_params": {
102
+ "learning_rate": 1.6097138935964413e-05,
103
+ "per_device_train_batch_size": 8
104
  }
105
  }
run-0/checkpoint-48/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dd5321a5a7bef447760ea7aec1bfb74d17f861ccab265be792f91de172e9f7cf
3
  size 4920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:69b0aae1b6cc94401448b152c38be14ffcd8839b1499a5701f300de8efdf55e0
3
  size 4920
run-0/checkpoint-72/config.json ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "ntu-spml/distilhubert",
3
+ "activation_dropout": 0.1,
4
+ "apply_spec_augment": false,
5
+ "architectures": [
6
+ "HubertForSequenceClassification"
7
+ ],
8
+ "attention_dropout": 0.1,
9
+ "bos_token_id": 1,
10
+ "classifier_proj_size": 256,
11
+ "conv_bias": false,
12
+ "conv_dim": [
13
+ 512,
14
+ 512,
15
+ 512,
16
+ 512,
17
+ 512,
18
+ 512,
19
+ 512
20
+ ],
21
+ "conv_kernel": [
22
+ 10,
23
+ 3,
24
+ 3,
25
+ 3,
26
+ 3,
27
+ 2,
28
+ 2
29
+ ],
30
+ "conv_stride": [
31
+ 5,
32
+ 2,
33
+ 2,
34
+ 2,
35
+ 2,
36
+ 2,
37
+ 2
38
+ ],
39
+ "ctc_loss_reduction": "sum",
40
+ "ctc_zero_infinity": false,
41
+ "do_stable_layer_norm": false,
42
+ "eos_token_id": 2,
43
+ "feat_extract_activation": "gelu",
44
+ "feat_extract_norm": "group",
45
+ "feat_proj_dropout": 0.0,
46
+ "feat_proj_layer_norm": false,
47
+ "final_dropout": 0.0,
48
+ "hidden_act": "gelu",
49
+ "hidden_dropout": 0.1,
50
+ "hidden_size": 768,
51
+ "id2label": {
52
+ "0": "NOT_WORD",
53
+ "1": "WORD"
54
+ },
55
+ "initializer_range": 0.02,
56
+ "intermediate_size": 3072,
57
+ "label2id": {
58
+ "NOT_WORD": "0",
59
+ "WORD": "1"
60
+ },
61
+ "layer_norm_eps": 1e-05,
62
+ "layerdrop": 0.0,
63
+ "mask_feature_length": 10,
64
+ "mask_feature_min_masks": 0,
65
+ "mask_feature_prob": 0.0,
66
+ "mask_time_length": 10,
67
+ "mask_time_min_masks": 2,
68
+ "mask_time_prob": 0.05,
69
+ "model_type": "hubert",
70
+ "num_attention_heads": 12,
71
+ "num_conv_pos_embedding_groups": 16,
72
+ "num_conv_pos_embeddings": 128,
73
+ "num_feat_extract_layers": 7,
74
+ "num_hidden_layers": 2,
75
+ "pad_token_id": 0,
76
+ "torch_dtype": "float32",
77
+ "transformers_version": "4.38.1",
78
+ "use_weighted_layer_sum": false,
79
+ "vocab_size": 32
80
+ }
run-0/checkpoint-72/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eb76baaa3e10292859eaaf54e1cd3dc5d0de162ba6ed9527301bfad27076b38d
3
+ size 94763496
run-0/checkpoint-72/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:06dd70ed020e580a47e998558c2ff850d8c86016146dbbbed371cb5740d031f2
3
+ size 189552570
run-0/checkpoint-72/preprocessor_config.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_normalize": true,
3
+ "feature_extractor_type": "Wav2Vec2FeatureExtractor",
4
+ "feature_size": 1,
5
+ "padding_side": "right",
6
+ "padding_value": 0,
7
+ "return_attention_mask": true,
8
+ "sampling_rate": 16000
9
+ }
run-0/checkpoint-72/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5ee3b4c1fd10f5b7dc9cd3892663bc52a1bc7290b86a11225b56750a1f3c0adf
3
+ size 14244
run-0/checkpoint-72/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7fffad6961893713483db1dcfd6ef80135279126147c8de8415994f26331d94f
3
+ size 1064
run-0/checkpoint-72/trainer_state.json ADDED
@@ -0,0 +1,149 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.7326732673267327,
3
+ "best_model_checkpoint": "distilhubert-finetuned-not-a-word2/run-0/checkpoint-24",
4
+ "epoch": 3.0,
5
+ "eval_steps": 500,
6
+ "global_step": 72,
7
+ "is_hyper_param_search": true,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.21,
13
+ "grad_norm": 1.6479910612106323,
14
+ "learning_rate": 3.3535706116592527e-06,
15
+ "loss": 0.699,
16
+ "step": 5
17
+ },
18
+ {
19
+ "epoch": 0.42,
20
+ "grad_norm": 0.6159653663635254,
21
+ "learning_rate": 6.707141223318505e-06,
22
+ "loss": 0.6932,
23
+ "step": 10
24
+ },
25
+ {
26
+ "epoch": 0.62,
27
+ "grad_norm": 0.5607424974441528,
28
+ "learning_rate": 1.0060711834977758e-05,
29
+ "loss": 0.6812,
30
+ "step": 15
31
+ },
32
+ {
33
+ "epoch": 0.83,
34
+ "grad_norm": 1.401130199432373,
35
+ "learning_rate": 1.341428244663701e-05,
36
+ "loss": 0.6633,
37
+ "step": 20
38
+ },
39
+ {
40
+ "epoch": 1.0,
41
+ "eval_f1": 0.7326732673267327,
42
+ "eval_loss": 0.680633544921875,
43
+ "eval_runtime": 1.3853,
44
+ "eval_samples_per_second": 46.201,
45
+ "eval_steps_per_second": 5.775,
46
+ "step": 24
47
+ },
48
+ {
49
+ "epoch": 1.04,
50
+ "grad_norm": 0.9690802693367004,
51
+ "learning_rate": 1.602261514459421e-05,
52
+ "loss": 0.6644,
53
+ "step": 25
54
+ },
55
+ {
56
+ "epoch": 1.25,
57
+ "grad_norm": 1.8331551551818848,
58
+ "learning_rate": 1.564999618774318e-05,
59
+ "loss": 0.6073,
60
+ "step": 30
61
+ },
62
+ {
63
+ "epoch": 1.46,
64
+ "grad_norm": 1.799914002418518,
65
+ "learning_rate": 1.527737723089215e-05,
66
+ "loss": 0.5805,
67
+ "step": 35
68
+ },
69
+ {
70
+ "epoch": 1.67,
71
+ "grad_norm": 0.515367865562439,
72
+ "learning_rate": 1.4904758274041123e-05,
73
+ "loss": 0.6465,
74
+ "step": 40
75
+ },
76
+ {
77
+ "epoch": 1.88,
78
+ "grad_norm": 0.6179113388061523,
79
+ "learning_rate": 1.4532139317190096e-05,
80
+ "loss": 0.6147,
81
+ "step": 45
82
+ },
83
+ {
84
+ "epoch": 2.0,
85
+ "eval_f1": 0.7326732673267327,
86
+ "eval_loss": 0.701690673828125,
87
+ "eval_runtime": 1.4198,
88
+ "eval_samples_per_second": 45.078,
89
+ "eval_steps_per_second": 5.635,
90
+ "step": 48
91
+ },
92
+ {
93
+ "epoch": 2.08,
94
+ "grad_norm": 0.5513622760772705,
95
+ "learning_rate": 1.4159520360339068e-05,
96
+ "loss": 0.6301,
97
+ "step": 50
98
+ },
99
+ {
100
+ "epoch": 2.29,
101
+ "grad_norm": 0.5830497741699219,
102
+ "learning_rate": 1.3786901403488039e-05,
103
+ "loss": 0.5836,
104
+ "step": 55
105
+ },
106
+ {
107
+ "epoch": 2.5,
108
+ "grad_norm": 0.6891571879386902,
109
+ "learning_rate": 1.341428244663701e-05,
110
+ "loss": 0.6309,
111
+ "step": 60
112
+ },
113
+ {
114
+ "epoch": 2.71,
115
+ "grad_norm": 1.3063991069793701,
116
+ "learning_rate": 1.3041663489785983e-05,
117
+ "loss": 0.6199,
118
+ "step": 65
119
+ },
120
+ {
121
+ "epoch": 2.92,
122
+ "grad_norm": 1.164373517036438,
123
+ "learning_rate": 1.2669044532934955e-05,
124
+ "loss": 0.4871,
125
+ "step": 70
126
+ },
127
+ {
128
+ "epoch": 3.0,
129
+ "eval_f1": 0.7326732673267327,
130
+ "eval_loss": 0.7076148986816406,
131
+ "eval_runtime": 1.4157,
132
+ "eval_samples_per_second": 45.206,
133
+ "eval_steps_per_second": 5.651,
134
+ "step": 72
135
+ }
136
+ ],
137
+ "logging_steps": 5,
138
+ "max_steps": 240,
139
+ "num_input_tokens_seen": 0,
140
+ "num_train_epochs": 10,
141
+ "save_steps": 500,
142
+ "total_flos": 2449725503657472.0,
143
+ "train_batch_size": 8,
144
+ "trial_name": null,
145
+ "trial_params": {
146
+ "learning_rate": 1.6097138935964413e-05,
147
+ "per_device_train_batch_size": 8
148
+ }
149
+ }
run-0/checkpoint-72/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:69b0aae1b6cc94401448b152c38be14ffcd8839b1499a5701f300de8efdf55e0
3
+ size 4920
run-0/checkpoint-96/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c21673226edbab6b566ba6d5fd266227f96e9587c2c6a57041fb29591701883f
3
  size 94763496
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c0c9139d8224355fe0e842d9efa7c44348ac0e82ff31134a167f9a33f4be03b4
3
  size 94763496
run-0/checkpoint-96/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0440869fc4ac86bfd89b2be1533510bdfe1633d0e38772e6f61612bc84de7093
3
  size 189552570
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:39d20d5d04c35d2b8653ce48db98b8aceb259e1d3129842531fe037342fd8eb3
3
  size 189552570
run-0/checkpoint-96/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:53f396ce65ed9535364efa4f874662b8f07e93a8b1799db89be140bf009657c2
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2b456b0645d03f4fc8a479113b5caa8bb02734d9c9788d3ded452d4ca10da7ef
3
  size 14244
run-0/checkpoint-96/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:94f957e39f2386f162d3c9e4397d136c0360104bc6ecac4ee610978ad2919d5f
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7b19b453bb5c60fec371fb0b599c4ea5138bc56060c0f92f5031e6bd36210973
3
  size 1064
run-0/checkpoint-96/trainer_state.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "best_metric": 0.7500000000000001,
3
- "best_model_checkpoint": "distilhubert-finetuned-not-a-word2/run-0/checkpoint-80",
4
- "epoch": 6.0,
5
  "eval_steps": 500,
6
  "global_step": 96,
7
  "is_hyper_param_search": true,
@@ -9,203 +9,185 @@
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.31,
13
- "grad_norm": 0.8255003094673157,
14
- "learning_rate": 1.0604552517079013e-05,
15
- "loss": 0.6964,
16
  "step": 5
17
  },
18
  {
19
- "epoch": 0.62,
20
- "grad_norm": 0.6656044125556946,
21
- "learning_rate": 2.1209105034158027e-05,
22
- "loss": 0.6791,
23
  "step": 10
24
  },
25
  {
26
- "epoch": 0.94,
27
- "grad_norm": 0.6010500192642212,
28
- "learning_rate": 3.181365755123704e-05,
29
- "loss": 0.6408,
30
  "step": 15
31
  },
32
  {
33
- "epoch": 1.0,
34
- "eval_f1": 0.7326732673267327,
35
- "eval_loss": 0.68890380859375,
36
- "eval_runtime": 1.415,
37
- "eval_samples_per_second": 45.23,
38
- "eval_steps_per_second": 5.654,
39
- "step": 16
40
  },
41
  {
42
- "epoch": 1.25,
43
- "grad_norm": 1.3279176950454712,
44
- "learning_rate": 3.299194116424582e-05,
45
- "loss": 0.6072,
46
- "step": 20
 
 
47
  },
48
  {
49
- "epoch": 1.56,
50
- "grad_norm": 0.4112571179866791,
51
- "learning_rate": 3.181365755123704e-05,
52
- "loss": 0.598,
53
  "step": 25
54
  },
55
  {
56
- "epoch": 1.88,
57
- "grad_norm": 0.5132727026939392,
58
- "learning_rate": 3.0635373938228256e-05,
59
- "loss": 0.6135,
60
  "step": 30
61
  },
62
  {
63
- "epoch": 2.0,
64
- "eval_f1": 0.7326732673267327,
65
- "eval_loss": 0.7214889526367188,
66
- "eval_runtime": 1.4006,
67
- "eval_samples_per_second": 45.693,
68
- "eval_steps_per_second": 5.712,
69
- "step": 32
70
- },
71
- {
72
- "epoch": 2.19,
73
- "grad_norm": 0.758405327796936,
74
- "learning_rate": 2.945709032521948e-05,
75
- "loss": 0.5972,
76
  "step": 35
77
  },
78
  {
79
- "epoch": 2.5,
80
- "grad_norm": 0.5937344431877136,
81
- "learning_rate": 2.82788067122107e-05,
82
- "loss": 0.6351,
83
  "step": 40
84
  },
85
  {
86
- "epoch": 2.81,
87
- "grad_norm": 1.0145654678344727,
88
- "learning_rate": 2.7100523099201923e-05,
89
- "loss": 0.5693,
90
  "step": 45
91
  },
92
  {
93
- "epoch": 3.0,
94
  "eval_f1": 0.7326732673267327,
95
- "eval_loss": 0.698272705078125,
96
- "eval_runtime": 1.441,
97
- "eval_samples_per_second": 44.413,
98
- "eval_steps_per_second": 5.552,
99
  "step": 48
100
  },
101
  {
102
- "epoch": 3.12,
103
- "grad_norm": 0.6083056330680847,
104
- "learning_rate": 2.592223948619314e-05,
105
- "loss": 0.5451,
106
  "step": 50
107
  },
108
  {
109
- "epoch": 3.44,
110
- "grad_norm": 1.0291730165481567,
111
- "learning_rate": 2.474395587318436e-05,
112
- "loss": 0.5484,
113
  "step": 55
114
  },
115
  {
116
- "epoch": 3.75,
117
- "grad_norm": 0.8883129954338074,
118
- "learning_rate": 2.3565672260175582e-05,
119
- "loss": 0.5352,
120
  "step": 60
121
  },
122
  {
123
- "epoch": 4.0,
124
- "eval_f1": 0.7326732673267327,
125
- "eval_loss": 0.6671600341796875,
126
- "eval_runtime": 1.5278,
127
- "eval_samples_per_second": 41.89,
128
- "eval_steps_per_second": 5.236,
129
- "step": 64
130
- },
131
- {
132
- "epoch": 4.06,
133
- "grad_norm": 1.3549830913543701,
134
- "learning_rate": 2.2387388647166805e-05,
135
- "loss": 0.5651,
136
  "step": 65
137
  },
138
  {
139
- "epoch": 4.38,
140
- "grad_norm": 1.6241841316223145,
141
- "learning_rate": 2.1209105034158027e-05,
142
- "loss": 0.4816,
143
  "step": 70
144
  },
145
  {
146
- "epoch": 4.69,
147
- "grad_norm": 1.7844226360321045,
148
- "learning_rate": 2.0030821421149246e-05,
149
- "loss": 0.4744,
150
- "step": 75
 
 
151
  },
152
  {
153
- "epoch": 5.0,
154
- "grad_norm": 4.98038387298584,
155
- "learning_rate": 1.8852537808140468e-05,
156
- "loss": 0.4257,
157
- "step": 80
158
  },
159
  {
160
- "epoch": 5.0,
161
- "eval_f1": 0.7500000000000001,
162
- "eval_loss": 0.6424350738525391,
163
- "eval_runtime": 1.3892,
164
- "eval_samples_per_second": 46.069,
165
- "eval_steps_per_second": 5.759,
166
  "step": 80
167
  },
168
  {
169
- "epoch": 5.31,
170
- "grad_norm": 2.9809491634368896,
171
- "learning_rate": 1.767425419513169e-05,
172
- "loss": 0.3976,
173
  "step": 85
174
  },
175
  {
176
- "epoch": 5.62,
177
- "grad_norm": 3.5094501972198486,
178
- "learning_rate": 1.649597058212291e-05,
179
- "loss": 0.3957,
180
  "step": 90
181
  },
182
  {
183
- "epoch": 5.94,
184
- "grad_norm": 3.999945878982544,
185
- "learning_rate": 1.5317686969114128e-05,
186
- "loss": 0.3643,
187
  "step": 95
188
  },
189
  {
190
- "epoch": 6.0,
191
- "eval_f1": 0.7415730337078652,
192
- "eval_loss": 0.6471805572509766,
193
- "eval_runtime": 1.3839,
194
- "eval_samples_per_second": 46.247,
195
- "eval_steps_per_second": 5.781,
196
  "step": 96
197
  }
198
  ],
199
  "logging_steps": 5,
200
- "max_steps": 160,
201
  "num_input_tokens_seen": 0,
202
  "num_train_epochs": 10,
203
  "save_steps": 500,
204
- "total_flos": 5365994483367216.0,
205
- "train_batch_size": 12,
206
  "trial_name": null,
207
  "trial_params": {
208
- "learning_rate": 3.393456805465284e-05,
209
- "per_device_train_batch_size": 12
210
  }
211
  }
 
1
  {
2
+ "best_metric": 0.7326732673267327,
3
+ "best_model_checkpoint": "distilhubert-finetuned-not-a-word2/run-0/checkpoint-24",
4
+ "epoch": 4.0,
5
  "eval_steps": 500,
6
  "global_step": 96,
7
  "is_hyper_param_search": true,
 
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.21,
13
+ "grad_norm": 1.6479910612106323,
14
+ "learning_rate": 3.3535706116592527e-06,
15
+ "loss": 0.699,
16
  "step": 5
17
  },
18
  {
19
+ "epoch": 0.42,
20
+ "grad_norm": 0.6159653663635254,
21
+ "learning_rate": 6.707141223318505e-06,
22
+ "loss": 0.6932,
23
  "step": 10
24
  },
25
  {
26
+ "epoch": 0.62,
27
+ "grad_norm": 0.5607424974441528,
28
+ "learning_rate": 1.0060711834977758e-05,
29
+ "loss": 0.6812,
30
  "step": 15
31
  },
32
  {
33
+ "epoch": 0.83,
34
+ "grad_norm": 1.401130199432373,
35
+ "learning_rate": 1.341428244663701e-05,
36
+ "loss": 0.6633,
37
+ "step": 20
 
 
38
  },
39
  {
40
+ "epoch": 1.0,
41
+ "eval_f1": 0.7326732673267327,
42
+ "eval_loss": 0.680633544921875,
43
+ "eval_runtime": 1.3853,
44
+ "eval_samples_per_second": 46.201,
45
+ "eval_steps_per_second": 5.775,
46
+ "step": 24
47
  },
48
  {
49
+ "epoch": 1.04,
50
+ "grad_norm": 0.9690802693367004,
51
+ "learning_rate": 1.602261514459421e-05,
52
+ "loss": 0.6644,
53
  "step": 25
54
  },
55
  {
56
+ "epoch": 1.25,
57
+ "grad_norm": 1.8331551551818848,
58
+ "learning_rate": 1.564999618774318e-05,
59
+ "loss": 0.6073,
60
  "step": 30
61
  },
62
  {
63
+ "epoch": 1.46,
64
+ "grad_norm": 1.799914002418518,
65
+ "learning_rate": 1.527737723089215e-05,
66
+ "loss": 0.5805,
 
 
 
 
 
 
 
 
 
67
  "step": 35
68
  },
69
  {
70
+ "epoch": 1.67,
71
+ "grad_norm": 0.515367865562439,
72
+ "learning_rate": 1.4904758274041123e-05,
73
+ "loss": 0.6465,
74
  "step": 40
75
  },
76
  {
77
+ "epoch": 1.88,
78
+ "grad_norm": 0.6179113388061523,
79
+ "learning_rate": 1.4532139317190096e-05,
80
+ "loss": 0.6147,
81
  "step": 45
82
  },
83
  {
84
+ "epoch": 2.0,
85
  "eval_f1": 0.7326732673267327,
86
+ "eval_loss": 0.701690673828125,
87
+ "eval_runtime": 1.4198,
88
+ "eval_samples_per_second": 45.078,
89
+ "eval_steps_per_second": 5.635,
90
  "step": 48
91
  },
92
  {
93
+ "epoch": 2.08,
94
+ "grad_norm": 0.5513622760772705,
95
+ "learning_rate": 1.4159520360339068e-05,
96
+ "loss": 0.6301,
97
  "step": 50
98
  },
99
  {
100
+ "epoch": 2.29,
101
+ "grad_norm": 0.5830497741699219,
102
+ "learning_rate": 1.3786901403488039e-05,
103
+ "loss": 0.5836,
104
  "step": 55
105
  },
106
  {
107
+ "epoch": 2.5,
108
+ "grad_norm": 0.6891571879386902,
109
+ "learning_rate": 1.341428244663701e-05,
110
+ "loss": 0.6309,
111
  "step": 60
112
  },
113
  {
114
+ "epoch": 2.71,
115
+ "grad_norm": 1.3063991069793701,
116
+ "learning_rate": 1.3041663489785983e-05,
117
+ "loss": 0.6199,
 
 
 
 
 
 
 
 
 
118
  "step": 65
119
  },
120
  {
121
+ "epoch": 2.92,
122
+ "grad_norm": 1.164373517036438,
123
+ "learning_rate": 1.2669044532934955e-05,
124
+ "loss": 0.4871,
125
  "step": 70
126
  },
127
  {
128
+ "epoch": 3.0,
129
+ "eval_f1": 0.7326732673267327,
130
+ "eval_loss": 0.7076148986816406,
131
+ "eval_runtime": 1.4157,
132
+ "eval_samples_per_second": 45.206,
133
+ "eval_steps_per_second": 5.651,
134
+ "step": 72
135
  },
136
  {
137
+ "epoch": 3.12,
138
+ "grad_norm": 0.5683964490890503,
139
+ "learning_rate": 1.2296425576083926e-05,
140
+ "loss": 0.5943,
141
+ "step": 75
142
  },
143
  {
144
+ "epoch": 3.33,
145
+ "grad_norm": 0.5915613770484924,
146
+ "learning_rate": 1.1923806619232898e-05,
147
+ "loss": 0.6051,
 
 
148
  "step": 80
149
  },
150
  {
151
+ "epoch": 3.54,
152
+ "grad_norm": 1.4184000492095947,
153
+ "learning_rate": 1.155118766238187e-05,
154
+ "loss": 0.487,
155
  "step": 85
156
  },
157
  {
158
+ "epoch": 3.75,
159
+ "grad_norm": 1.1929051876068115,
160
+ "learning_rate": 1.1178568705530843e-05,
161
+ "loss": 0.6096,
162
  "step": 90
163
  },
164
  {
165
+ "epoch": 3.96,
166
+ "grad_norm": 1.1995503902435303,
167
+ "learning_rate": 1.0805949748679813e-05,
168
+ "loss": 0.5922,
169
  "step": 95
170
  },
171
  {
172
+ "epoch": 4.0,
173
+ "eval_f1": 0.7326732673267327,
174
+ "eval_loss": 0.7007102966308594,
175
+ "eval_runtime": 1.4297,
176
+ "eval_samples_per_second": 44.764,
177
+ "eval_steps_per_second": 5.596,
178
  "step": 96
179
  }
180
  ],
181
  "logging_steps": 5,
182
+ "max_steps": 240,
183
  "num_input_tokens_seen": 0,
184
  "num_train_epochs": 10,
185
  "save_steps": 500,
186
+ "total_flos": 3314382463332576.0,
187
+ "train_batch_size": 8,
188
  "trial_name": null,
189
  "trial_params": {
190
+ "learning_rate": 1.6097138935964413e-05,
191
+ "per_device_train_batch_size": 8
192
  }
193
  }
run-0/checkpoint-96/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dd5321a5a7bef447760ea7aec1bfb74d17f861ccab265be792f91de172e9f7cf
3
  size 4920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:69b0aae1b6cc94401448b152c38be14ffcd8839b1499a5701f300de8efdf55e0
3
  size 4920
runs/Mar03_18-24-09_ca56ea9bc35e/events.out.tfevents.1709497668.ca56ea9bc35e.3883.24 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d02008ca9bff9e2d2736a788c7e63fb995b1dd9a85b32013ef07005a79b7bef
3
+ size 12043
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dd5321a5a7bef447760ea7aec1bfb74d17f861ccab265be792f91de172e9f7cf
3
  size 4920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:69b0aae1b6cc94401448b152c38be14ffcd8839b1499a5701f300de8efdf55e0
3
  size 4920