colerobertson commited on
Commit
02160f9
1 Parent(s): a7fc36e

Training in progress, epoch 1

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. model.safetensors +1 -1
  2. run-3/checkpoint-16/config.json +80 -0
  3. run-3/checkpoint-16/model.safetensors +3 -0
  4. run-3/checkpoint-16/optimizer.pt +3 -0
  5. run-3/checkpoint-16/preprocessor_config.json +9 -0
  6. run-3/checkpoint-16/rng_state.pth +3 -0
  7. run-3/checkpoint-16/scheduler.pt +3 -0
  8. run-3/checkpoint-16/trainer_state.json +63 -0
  9. run-3/checkpoint-16/training_args.bin +3 -0
  10. run-3/checkpoint-24/config.json +80 -0
  11. run-3/checkpoint-24/model.safetensors +3 -0
  12. run-3/checkpoint-24/optimizer.pt +3 -0
  13. run-3/checkpoint-24/preprocessor_config.json +9 -0
  14. run-3/checkpoint-24/rng_state.pth +3 -0
  15. run-3/checkpoint-24/scheduler.pt +3 -0
  16. run-3/checkpoint-24/trainer_state.json +79 -0
  17. run-3/checkpoint-24/training_args.bin +3 -0
  18. run-3/checkpoint-32/config.json +80 -0
  19. run-3/checkpoint-32/model.safetensors +3 -0
  20. run-3/checkpoint-32/optimizer.pt +3 -0
  21. run-3/checkpoint-32/preprocessor_config.json +9 -0
  22. run-3/checkpoint-32/rng_state.pth +3 -0
  23. run-3/checkpoint-32/scheduler.pt +3 -0
  24. run-3/checkpoint-32/trainer_state.json +102 -0
  25. run-3/checkpoint-32/training_args.bin +3 -0
  26. run-3/checkpoint-40/config.json +80 -0
  27. run-3/checkpoint-40/model.safetensors +3 -0
  28. run-3/checkpoint-40/optimizer.pt +3 -0
  29. run-3/checkpoint-40/preprocessor_config.json +9 -0
  30. run-3/checkpoint-40/rng_state.pth +3 -0
  31. run-3/checkpoint-40/scheduler.pt +3 -0
  32. run-3/checkpoint-40/trainer_state.json +125 -0
  33. run-3/checkpoint-40/training_args.bin +3 -0
  34. run-3/checkpoint-48/config.json +80 -0
  35. run-3/checkpoint-48/model.safetensors +3 -0
  36. run-3/checkpoint-48/optimizer.pt +3 -0
  37. run-3/checkpoint-48/preprocessor_config.json +9 -0
  38. run-3/checkpoint-48/rng_state.pth +3 -0
  39. run-3/checkpoint-48/scheduler.pt +3 -0
  40. run-3/checkpoint-48/trainer_state.json +141 -0
  41. run-3/checkpoint-48/training_args.bin +3 -0
  42. run-3/checkpoint-56/config.json +80 -0
  43. run-3/checkpoint-56/model.safetensors +3 -0
  44. run-3/checkpoint-56/optimizer.pt +3 -0
  45. run-3/checkpoint-56/preprocessor_config.json +9 -0
  46. run-3/checkpoint-56/rng_state.pth +3 -0
  47. run-3/checkpoint-56/scheduler.pt +3 -0
  48. run-3/checkpoint-56/trainer_state.json +164 -0
  49. run-3/checkpoint-56/training_args.bin +3 -0
  50. run-3/checkpoint-64/config.json +80 -0
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:149f6b51cfaa089494700030336b7742cbe5c97fa8c55460d1626a4326b4e9e1
3
  size 94763496
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:018f138040ad7aa88bf94f4b8deb9d4f75616181b7d07008cdde50dd03503a11
3
  size 94763496
run-3/checkpoint-16/config.json ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "ntu-spml/distilhubert",
3
+ "activation_dropout": 0.1,
4
+ "apply_spec_augment": false,
5
+ "architectures": [
6
+ "HubertForSequenceClassification"
7
+ ],
8
+ "attention_dropout": 0.1,
9
+ "bos_token_id": 1,
10
+ "classifier_proj_size": 256,
11
+ "conv_bias": false,
12
+ "conv_dim": [
13
+ 512,
14
+ 512,
15
+ 512,
16
+ 512,
17
+ 512,
18
+ 512,
19
+ 512
20
+ ],
21
+ "conv_kernel": [
22
+ 10,
23
+ 3,
24
+ 3,
25
+ 3,
26
+ 3,
27
+ 2,
28
+ 2
29
+ ],
30
+ "conv_stride": [
31
+ 5,
32
+ 2,
33
+ 2,
34
+ 2,
35
+ 2,
36
+ 2,
37
+ 2
38
+ ],
39
+ "ctc_loss_reduction": "sum",
40
+ "ctc_zero_infinity": false,
41
+ "do_stable_layer_norm": false,
42
+ "eos_token_id": 2,
43
+ "feat_extract_activation": "gelu",
44
+ "feat_extract_norm": "group",
45
+ "feat_proj_dropout": 0.0,
46
+ "feat_proj_layer_norm": false,
47
+ "final_dropout": 0.0,
48
+ "hidden_act": "gelu",
49
+ "hidden_dropout": 0.1,
50
+ "hidden_size": 768,
51
+ "id2label": {
52
+ "0": "NOT_WORD",
53
+ "1": "WORD"
54
+ },
55
+ "initializer_range": 0.02,
56
+ "intermediate_size": 3072,
57
+ "label2id": {
58
+ "NOT_WORD": "0",
59
+ "WORD": "1"
60
+ },
61
+ "layer_norm_eps": 1e-05,
62
+ "layerdrop": 0.0,
63
+ "mask_feature_length": 10,
64
+ "mask_feature_min_masks": 0,
65
+ "mask_feature_prob": 0.0,
66
+ "mask_time_length": 10,
67
+ "mask_time_min_masks": 2,
68
+ "mask_time_prob": 0.05,
69
+ "model_type": "hubert",
70
+ "num_attention_heads": 12,
71
+ "num_conv_pos_embedding_groups": 16,
72
+ "num_conv_pos_embeddings": 128,
73
+ "num_feat_extract_layers": 7,
74
+ "num_hidden_layers": 2,
75
+ "pad_token_id": 0,
76
+ "torch_dtype": "float32",
77
+ "transformers_version": "4.38.1",
78
+ "use_weighted_layer_sum": false,
79
+ "vocab_size": 32
80
+ }
run-3/checkpoint-16/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a760d27f41290ed80c1206e2480b47d92c8ce2c6ee1f3cd329e186b834a2b29
3
+ size 94763496
run-3/checkpoint-16/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:04b4c7382179a64f9ac6f6bc7805a19064505d76480c4aa8b8391170753af6e7
3
+ size 189552570
run-3/checkpoint-16/preprocessor_config.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_normalize": true,
3
+ "feature_extractor_type": "Wav2Vec2FeatureExtractor",
4
+ "feature_size": 1,
5
+ "padding_side": "right",
6
+ "padding_value": 0,
7
+ "return_attention_mask": true,
8
+ "sampling_rate": 16000
9
+ }
run-3/checkpoint-16/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b0b4c067c009eda80e8baa1203d26c442448a0cbb4afc20c352a7b4f4c31ecfc
3
+ size 14244
run-3/checkpoint-16/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:829126bb0c61bd2bc81a49adbe8fe99f684971512e37328fc21f35f04bc8cdd9
3
+ size 1064
run-3/checkpoint-16/trainer_state.json ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.7326732673267327,
3
+ "best_model_checkpoint": "distilhubert-finetuned-not-a-word2/run-3/checkpoint-8",
4
+ "epoch": 2.0,
5
+ "eval_steps": 500,
6
+ "global_step": 16,
7
+ "is_hyper_param_search": true,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.62,
13
+ "grad_norm": 0.94398033618927,
14
+ "learning_rate": 7.138113611405621e-06,
15
+ "loss": 0.6981,
16
+ "step": 5
17
+ },
18
+ {
19
+ "epoch": 1.0,
20
+ "eval_f1": 0.7326732673267327,
21
+ "eval_loss": 0.6864166259765625,
22
+ "eval_runtime": 1.3249,
23
+ "eval_samples_per_second": 48.305,
24
+ "eval_steps_per_second": 6.038,
25
+ "step": 8
26
+ },
27
+ {
28
+ "epoch": 1.25,
29
+ "grad_norm": 1.5316386222839355,
30
+ "learning_rate": 1.1103732284408743e-05,
31
+ "loss": 0.683,
32
+ "step": 10
33
+ },
34
+ {
35
+ "epoch": 1.88,
36
+ "grad_norm": 0.5028849840164185,
37
+ "learning_rate": 1.031060854980812e-05,
38
+ "loss": 0.6564,
39
+ "step": 15
40
+ },
41
+ {
42
+ "epoch": 2.0,
43
+ "eval_f1": 0.7326732673267327,
44
+ "eval_loss": 0.680694580078125,
45
+ "eval_runtime": 1.3565,
46
+ "eval_samples_per_second": 47.179,
47
+ "eval_steps_per_second": 5.897,
48
+ "step": 16
49
+ }
50
+ ],
51
+ "logging_steps": 5,
52
+ "max_steps": 80,
53
+ "num_input_tokens_seen": 0,
54
+ "num_train_epochs": 10,
55
+ "save_steps": 500,
56
+ "total_flos": 1863816719524704.0,
57
+ "train_batch_size": 24,
58
+ "trial_name": null,
59
+ "trial_params": {
60
+ "learning_rate": 1.1420981778248994e-05,
61
+ "per_device_train_batch_size": 24
62
+ }
63
+ }
run-3/checkpoint-16/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bb7020efd76104988408508d9c7e8a6e74af8904f5fd05940d43639ce4c80cf2
3
+ size 4920
run-3/checkpoint-24/config.json ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "ntu-spml/distilhubert",
3
+ "activation_dropout": 0.1,
4
+ "apply_spec_augment": false,
5
+ "architectures": [
6
+ "HubertForSequenceClassification"
7
+ ],
8
+ "attention_dropout": 0.1,
9
+ "bos_token_id": 1,
10
+ "classifier_proj_size": 256,
11
+ "conv_bias": false,
12
+ "conv_dim": [
13
+ 512,
14
+ 512,
15
+ 512,
16
+ 512,
17
+ 512,
18
+ 512,
19
+ 512
20
+ ],
21
+ "conv_kernel": [
22
+ 10,
23
+ 3,
24
+ 3,
25
+ 3,
26
+ 3,
27
+ 2,
28
+ 2
29
+ ],
30
+ "conv_stride": [
31
+ 5,
32
+ 2,
33
+ 2,
34
+ 2,
35
+ 2,
36
+ 2,
37
+ 2
38
+ ],
39
+ "ctc_loss_reduction": "sum",
40
+ "ctc_zero_infinity": false,
41
+ "do_stable_layer_norm": false,
42
+ "eos_token_id": 2,
43
+ "feat_extract_activation": "gelu",
44
+ "feat_extract_norm": "group",
45
+ "feat_proj_dropout": 0.0,
46
+ "feat_proj_layer_norm": false,
47
+ "final_dropout": 0.0,
48
+ "hidden_act": "gelu",
49
+ "hidden_dropout": 0.1,
50
+ "hidden_size": 768,
51
+ "id2label": {
52
+ "0": "NOT_WORD",
53
+ "1": "WORD"
54
+ },
55
+ "initializer_range": 0.02,
56
+ "intermediate_size": 3072,
57
+ "label2id": {
58
+ "NOT_WORD": "0",
59
+ "WORD": "1"
60
+ },
61
+ "layer_norm_eps": 1e-05,
62
+ "layerdrop": 0.0,
63
+ "mask_feature_length": 10,
64
+ "mask_feature_min_masks": 0,
65
+ "mask_feature_prob": 0.0,
66
+ "mask_time_length": 10,
67
+ "mask_time_min_masks": 2,
68
+ "mask_time_prob": 0.05,
69
+ "model_type": "hubert",
70
+ "num_attention_heads": 12,
71
+ "num_conv_pos_embedding_groups": 16,
72
+ "num_conv_pos_embeddings": 128,
73
+ "num_feat_extract_layers": 7,
74
+ "num_hidden_layers": 2,
75
+ "pad_token_id": 0,
76
+ "torch_dtype": "float32",
77
+ "transformers_version": "4.38.1",
78
+ "use_weighted_layer_sum": false,
79
+ "vocab_size": 32
80
+ }
run-3/checkpoint-24/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f3f932857479e62a819449b36f4c42e6e648d1fd17c406c7ee35d9c0b7ea261e
3
+ size 94763496
run-3/checkpoint-24/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:461c0fa31756b908c1ff77514b04efa451eae1a56488331e4a4c40726af10ca0
3
+ size 189552570
run-3/checkpoint-24/preprocessor_config.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_normalize": true,
3
+ "feature_extractor_type": "Wav2Vec2FeatureExtractor",
4
+ "feature_size": 1,
5
+ "padding_side": "right",
6
+ "padding_value": 0,
7
+ "return_attention_mask": true,
8
+ "sampling_rate": 16000
9
+ }
run-3/checkpoint-24/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:24d6a900c72e3e0f300fb2a89dab7159acab172bc37646dd605cc8c78374f6f9
3
+ size 14244
run-3/checkpoint-24/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ccd2ea597a4bea6c5e4d0cece4bbe69ed79e233292bc9053077d461aeb0ba5f
3
+ size 1064
run-3/checkpoint-24/trainer_state.json ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.7326732673267327,
3
+ "best_model_checkpoint": "distilhubert-finetuned-not-a-word2/run-3/checkpoint-8",
4
+ "epoch": 3.0,
5
+ "eval_steps": 500,
6
+ "global_step": 24,
7
+ "is_hyper_param_search": true,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.62,
13
+ "grad_norm": 0.94398033618927,
14
+ "learning_rate": 7.138113611405621e-06,
15
+ "loss": 0.6981,
16
+ "step": 5
17
+ },
18
+ {
19
+ "epoch": 1.0,
20
+ "eval_f1": 0.7326732673267327,
21
+ "eval_loss": 0.6864166259765625,
22
+ "eval_runtime": 1.3249,
23
+ "eval_samples_per_second": 48.305,
24
+ "eval_steps_per_second": 6.038,
25
+ "step": 8
26
+ },
27
+ {
28
+ "epoch": 1.25,
29
+ "grad_norm": 1.5316386222839355,
30
+ "learning_rate": 1.1103732284408743e-05,
31
+ "loss": 0.683,
32
+ "step": 10
33
+ },
34
+ {
35
+ "epoch": 1.88,
36
+ "grad_norm": 0.5028849840164185,
37
+ "learning_rate": 1.031060854980812e-05,
38
+ "loss": 0.6564,
39
+ "step": 15
40
+ },
41
+ {
42
+ "epoch": 2.0,
43
+ "eval_f1": 0.7326732673267327,
44
+ "eval_loss": 0.680694580078125,
45
+ "eval_runtime": 1.3565,
46
+ "eval_samples_per_second": 47.179,
47
+ "eval_steps_per_second": 5.897,
48
+ "step": 16
49
+ },
50
+ {
51
+ "epoch": 2.5,
52
+ "grad_norm": 0.5775301456451416,
53
+ "learning_rate": 9.517484815207495e-06,
54
+ "loss": 0.6436,
55
+ "step": 20
56
+ },
57
+ {
58
+ "epoch": 3.0,
59
+ "eval_f1": 0.7326732673267327,
60
+ "eval_loss": 0.6852684020996094,
61
+ "eval_runtime": 1.3445,
62
+ "eval_samples_per_second": 47.601,
63
+ "eval_steps_per_second": 5.95,
64
+ "step": 24
65
+ }
66
+ ],
67
+ "logging_steps": 5,
68
+ "max_steps": 80,
69
+ "num_input_tokens_seen": 0,
70
+ "num_train_epochs": 10,
71
+ "save_steps": 500,
72
+ "total_flos": 2484680744556000.0,
73
+ "train_batch_size": 24,
74
+ "trial_name": null,
75
+ "trial_params": {
76
+ "learning_rate": 1.1420981778248994e-05,
77
+ "per_device_train_batch_size": 24
78
+ }
79
+ }
run-3/checkpoint-24/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bb7020efd76104988408508d9c7e8a6e74af8904f5fd05940d43639ce4c80cf2
3
+ size 4920
run-3/checkpoint-32/config.json ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "ntu-spml/distilhubert",
3
+ "activation_dropout": 0.1,
4
+ "apply_spec_augment": false,
5
+ "architectures": [
6
+ "HubertForSequenceClassification"
7
+ ],
8
+ "attention_dropout": 0.1,
9
+ "bos_token_id": 1,
10
+ "classifier_proj_size": 256,
11
+ "conv_bias": false,
12
+ "conv_dim": [
13
+ 512,
14
+ 512,
15
+ 512,
16
+ 512,
17
+ 512,
18
+ 512,
19
+ 512
20
+ ],
21
+ "conv_kernel": [
22
+ 10,
23
+ 3,
24
+ 3,
25
+ 3,
26
+ 3,
27
+ 2,
28
+ 2
29
+ ],
30
+ "conv_stride": [
31
+ 5,
32
+ 2,
33
+ 2,
34
+ 2,
35
+ 2,
36
+ 2,
37
+ 2
38
+ ],
39
+ "ctc_loss_reduction": "sum",
40
+ "ctc_zero_infinity": false,
41
+ "do_stable_layer_norm": false,
42
+ "eos_token_id": 2,
43
+ "feat_extract_activation": "gelu",
44
+ "feat_extract_norm": "group",
45
+ "feat_proj_dropout": 0.0,
46
+ "feat_proj_layer_norm": false,
47
+ "final_dropout": 0.0,
48
+ "hidden_act": "gelu",
49
+ "hidden_dropout": 0.1,
50
+ "hidden_size": 768,
51
+ "id2label": {
52
+ "0": "NOT_WORD",
53
+ "1": "WORD"
54
+ },
55
+ "initializer_range": 0.02,
56
+ "intermediate_size": 3072,
57
+ "label2id": {
58
+ "NOT_WORD": "0",
59
+ "WORD": "1"
60
+ },
61
+ "layer_norm_eps": 1e-05,
62
+ "layerdrop": 0.0,
63
+ "mask_feature_length": 10,
64
+ "mask_feature_min_masks": 0,
65
+ "mask_feature_prob": 0.0,
66
+ "mask_time_length": 10,
67
+ "mask_time_min_masks": 2,
68
+ "mask_time_prob": 0.05,
69
+ "model_type": "hubert",
70
+ "num_attention_heads": 12,
71
+ "num_conv_pos_embedding_groups": 16,
72
+ "num_conv_pos_embeddings": 128,
73
+ "num_feat_extract_layers": 7,
74
+ "num_hidden_layers": 2,
75
+ "pad_token_id": 0,
76
+ "torch_dtype": "float32",
77
+ "transformers_version": "4.38.1",
78
+ "use_weighted_layer_sum": false,
79
+ "vocab_size": 32
80
+ }
run-3/checkpoint-32/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d1454cf89a31d874feb53d9bb87ed0b567c597730d05be3faabbbb19277c03e8
3
+ size 94763496
run-3/checkpoint-32/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e61a24407aa0c80c01a56c0188477f4d5473511e288cab61dfd72dd7238455c9
3
+ size 189552570
run-3/checkpoint-32/preprocessor_config.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_normalize": true,
3
+ "feature_extractor_type": "Wav2Vec2FeatureExtractor",
4
+ "feature_size": 1,
5
+ "padding_side": "right",
6
+ "padding_value": 0,
7
+ "return_attention_mask": true,
8
+ "sampling_rate": 16000
9
+ }
run-3/checkpoint-32/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:556c72cd347c734d7fdec24637de2c2dcd065c09bcdd940199a24e0091d021c0
3
+ size 14244
run-3/checkpoint-32/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:45ca9d40a3c2366bc01ceed0fa797d480ee4f2519ead3847177cdf537dc0ac31
3
+ size 1064
run-3/checkpoint-32/trainer_state.json ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.7326732673267327,
3
+ "best_model_checkpoint": "distilhubert-finetuned-not-a-word2/run-3/checkpoint-8",
4
+ "epoch": 4.0,
5
+ "eval_steps": 500,
6
+ "global_step": 32,
7
+ "is_hyper_param_search": true,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.62,
13
+ "grad_norm": 0.94398033618927,
14
+ "learning_rate": 7.138113611405621e-06,
15
+ "loss": 0.6981,
16
+ "step": 5
17
+ },
18
+ {
19
+ "epoch": 1.0,
20
+ "eval_f1": 0.7326732673267327,
21
+ "eval_loss": 0.6864166259765625,
22
+ "eval_runtime": 1.3249,
23
+ "eval_samples_per_second": 48.305,
24
+ "eval_steps_per_second": 6.038,
25
+ "step": 8
26
+ },
27
+ {
28
+ "epoch": 1.25,
29
+ "grad_norm": 1.5316386222839355,
30
+ "learning_rate": 1.1103732284408743e-05,
31
+ "loss": 0.683,
32
+ "step": 10
33
+ },
34
+ {
35
+ "epoch": 1.88,
36
+ "grad_norm": 0.5028849840164185,
37
+ "learning_rate": 1.031060854980812e-05,
38
+ "loss": 0.6564,
39
+ "step": 15
40
+ },
41
+ {
42
+ "epoch": 2.0,
43
+ "eval_f1": 0.7326732673267327,
44
+ "eval_loss": 0.680694580078125,
45
+ "eval_runtime": 1.3565,
46
+ "eval_samples_per_second": 47.179,
47
+ "eval_steps_per_second": 5.897,
48
+ "step": 16
49
+ },
50
+ {
51
+ "epoch": 2.5,
52
+ "grad_norm": 0.5775301456451416,
53
+ "learning_rate": 9.517484815207495e-06,
54
+ "loss": 0.6436,
55
+ "step": 20
56
+ },
57
+ {
58
+ "epoch": 3.0,
59
+ "eval_f1": 0.7326732673267327,
60
+ "eval_loss": 0.6852684020996094,
61
+ "eval_runtime": 1.3445,
62
+ "eval_samples_per_second": 47.601,
63
+ "eval_steps_per_second": 5.95,
64
+ "step": 24
65
+ },
66
+ {
67
+ "epoch": 3.12,
68
+ "grad_norm": 0.4549338221549988,
69
+ "learning_rate": 8.72436108060687e-06,
70
+ "loss": 0.6098,
71
+ "step": 25
72
+ },
73
+ {
74
+ "epoch": 3.75,
75
+ "grad_norm": 0.3741567134857178,
76
+ "learning_rate": 7.931237346006246e-06,
77
+ "loss": 0.609,
78
+ "step": 30
79
+ },
80
+ {
81
+ "epoch": 4.0,
82
+ "eval_f1": 0.7326732673267327,
83
+ "eval_loss": 0.6952018737792969,
84
+ "eval_runtime": 1.3401,
85
+ "eval_samples_per_second": 47.757,
86
+ "eval_steps_per_second": 5.97,
87
+ "step": 32
88
+ }
89
+ ],
90
+ "logging_steps": 5,
91
+ "max_steps": 80,
92
+ "num_input_tokens_seen": 0,
93
+ "num_train_epochs": 10,
94
+ "save_steps": 500,
95
+ "total_flos": 3687742096503552.0,
96
+ "train_batch_size": 24,
97
+ "trial_name": null,
98
+ "trial_params": {
99
+ "learning_rate": 1.1420981778248994e-05,
100
+ "per_device_train_batch_size": 24
101
+ }
102
+ }
run-3/checkpoint-32/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bb7020efd76104988408508d9c7e8a6e74af8904f5fd05940d43639ce4c80cf2
3
+ size 4920
run-3/checkpoint-40/config.json ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "ntu-spml/distilhubert",
3
+ "activation_dropout": 0.1,
4
+ "apply_spec_augment": false,
5
+ "architectures": [
6
+ "HubertForSequenceClassification"
7
+ ],
8
+ "attention_dropout": 0.1,
9
+ "bos_token_id": 1,
10
+ "classifier_proj_size": 256,
11
+ "conv_bias": false,
12
+ "conv_dim": [
13
+ 512,
14
+ 512,
15
+ 512,
16
+ 512,
17
+ 512,
18
+ 512,
19
+ 512
20
+ ],
21
+ "conv_kernel": [
22
+ 10,
23
+ 3,
24
+ 3,
25
+ 3,
26
+ 3,
27
+ 2,
28
+ 2
29
+ ],
30
+ "conv_stride": [
31
+ 5,
32
+ 2,
33
+ 2,
34
+ 2,
35
+ 2,
36
+ 2,
37
+ 2
38
+ ],
39
+ "ctc_loss_reduction": "sum",
40
+ "ctc_zero_infinity": false,
41
+ "do_stable_layer_norm": false,
42
+ "eos_token_id": 2,
43
+ "feat_extract_activation": "gelu",
44
+ "feat_extract_norm": "group",
45
+ "feat_proj_dropout": 0.0,
46
+ "feat_proj_layer_norm": false,
47
+ "final_dropout": 0.0,
48
+ "hidden_act": "gelu",
49
+ "hidden_dropout": 0.1,
50
+ "hidden_size": 768,
51
+ "id2label": {
52
+ "0": "NOT_WORD",
53
+ "1": "WORD"
54
+ },
55
+ "initializer_range": 0.02,
56
+ "intermediate_size": 3072,
57
+ "label2id": {
58
+ "NOT_WORD": "0",
59
+ "WORD": "1"
60
+ },
61
+ "layer_norm_eps": 1e-05,
62
+ "layerdrop": 0.0,
63
+ "mask_feature_length": 10,
64
+ "mask_feature_min_masks": 0,
65
+ "mask_feature_prob": 0.0,
66
+ "mask_time_length": 10,
67
+ "mask_time_min_masks": 2,
68
+ "mask_time_prob": 0.05,
69
+ "model_type": "hubert",
70
+ "num_attention_heads": 12,
71
+ "num_conv_pos_embedding_groups": 16,
72
+ "num_conv_pos_embeddings": 128,
73
+ "num_feat_extract_layers": 7,
74
+ "num_hidden_layers": 2,
75
+ "pad_token_id": 0,
76
+ "torch_dtype": "float32",
77
+ "transformers_version": "4.38.1",
78
+ "use_weighted_layer_sum": false,
79
+ "vocab_size": 32
80
+ }
run-3/checkpoint-40/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eb44356d7dd99d76e08bc8fec067d778a70ce340e950085f1f0d5532bcf71429
3
+ size 94763496
run-3/checkpoint-40/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:64d97fad5e3a7a342896071e42b90538b01c4168983183ecb488766c8d1179a3
3
+ size 189552570
run-3/checkpoint-40/preprocessor_config.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_normalize": true,
3
+ "feature_extractor_type": "Wav2Vec2FeatureExtractor",
4
+ "feature_size": 1,
5
+ "padding_side": "right",
6
+ "padding_value": 0,
7
+ "return_attention_mask": true,
8
+ "sampling_rate": 16000
9
+ }
run-3/checkpoint-40/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f04394838d097ad37a86e783ddc6557596b7da068eb72e2798acacf43f38924
3
+ size 14244
run-3/checkpoint-40/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:857294f0798588b687c4fd66b6e7d0f73c0b0a67baea1b5225ec246eeb80a3ae
3
+ size 1064
run-3/checkpoint-40/trainer_state.json ADDED
@@ -0,0 +1,125 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.7326732673267327,
3
+ "best_model_checkpoint": "distilhubert-finetuned-not-a-word2/run-3/checkpoint-8",
4
+ "epoch": 5.0,
5
+ "eval_steps": 500,
6
+ "global_step": 40,
7
+ "is_hyper_param_search": true,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.62,
13
+ "grad_norm": 0.94398033618927,
14
+ "learning_rate": 7.138113611405621e-06,
15
+ "loss": 0.6981,
16
+ "step": 5
17
+ },
18
+ {
19
+ "epoch": 1.0,
20
+ "eval_f1": 0.7326732673267327,
21
+ "eval_loss": 0.6864166259765625,
22
+ "eval_runtime": 1.3249,
23
+ "eval_samples_per_second": 48.305,
24
+ "eval_steps_per_second": 6.038,
25
+ "step": 8
26
+ },
27
+ {
28
+ "epoch": 1.25,
29
+ "grad_norm": 1.5316386222839355,
30
+ "learning_rate": 1.1103732284408743e-05,
31
+ "loss": 0.683,
32
+ "step": 10
33
+ },
34
+ {
35
+ "epoch": 1.88,
36
+ "grad_norm": 0.5028849840164185,
37
+ "learning_rate": 1.031060854980812e-05,
38
+ "loss": 0.6564,
39
+ "step": 15
40
+ },
41
+ {
42
+ "epoch": 2.0,
43
+ "eval_f1": 0.7326732673267327,
44
+ "eval_loss": 0.680694580078125,
45
+ "eval_runtime": 1.3565,
46
+ "eval_samples_per_second": 47.179,
47
+ "eval_steps_per_second": 5.897,
48
+ "step": 16
49
+ },
50
+ {
51
+ "epoch": 2.5,
52
+ "grad_norm": 0.5775301456451416,
53
+ "learning_rate": 9.517484815207495e-06,
54
+ "loss": 0.6436,
55
+ "step": 20
56
+ },
57
+ {
58
+ "epoch": 3.0,
59
+ "eval_f1": 0.7326732673267327,
60
+ "eval_loss": 0.6852684020996094,
61
+ "eval_runtime": 1.3445,
62
+ "eval_samples_per_second": 47.601,
63
+ "eval_steps_per_second": 5.95,
64
+ "step": 24
65
+ },
66
+ {
67
+ "epoch": 3.12,
68
+ "grad_norm": 0.4549338221549988,
69
+ "learning_rate": 8.72436108060687e-06,
70
+ "loss": 0.6098,
71
+ "step": 25
72
+ },
73
+ {
74
+ "epoch": 3.75,
75
+ "grad_norm": 0.3741567134857178,
76
+ "learning_rate": 7.931237346006246e-06,
77
+ "loss": 0.609,
78
+ "step": 30
79
+ },
80
+ {
81
+ "epoch": 4.0,
82
+ "eval_f1": 0.7326732673267327,
83
+ "eval_loss": 0.6952018737792969,
84
+ "eval_runtime": 1.3401,
85
+ "eval_samples_per_second": 47.757,
86
+ "eval_steps_per_second": 5.97,
87
+ "step": 32
88
+ },
89
+ {
90
+ "epoch": 4.38,
91
+ "grad_norm": 0.38969138264656067,
92
+ "learning_rate": 7.138113611405621e-06,
93
+ "loss": 0.6091,
94
+ "step": 35
95
+ },
96
+ {
97
+ "epoch": 5.0,
98
+ "grad_norm": 0.2588692009449005,
99
+ "learning_rate": 6.344989876804997e-06,
100
+ "loss": 0.6043,
101
+ "step": 40
102
+ },
103
+ {
104
+ "epoch": 5.0,
105
+ "eval_f1": 0.7326732673267327,
106
+ "eval_loss": 0.7042198181152344,
107
+ "eval_runtime": 1.3815,
108
+ "eval_samples_per_second": 46.326,
109
+ "eval_steps_per_second": 5.791,
110
+ "step": 40
111
+ }
112
+ ],
113
+ "logging_steps": 5,
114
+ "max_steps": 80,
115
+ "num_input_tokens_seen": 0,
116
+ "num_train_epochs": 10,
117
+ "save_steps": 500,
118
+ "total_flos": 5002699790009952.0,
119
+ "train_batch_size": 24,
120
+ "trial_name": null,
121
+ "trial_params": {
122
+ "learning_rate": 1.1420981778248994e-05,
123
+ "per_device_train_batch_size": 24
124
+ }
125
+ }
run-3/checkpoint-40/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bb7020efd76104988408508d9c7e8a6e74af8904f5fd05940d43639ce4c80cf2
3
+ size 4920
run-3/checkpoint-48/config.json ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "ntu-spml/distilhubert",
3
+ "activation_dropout": 0.1,
4
+ "apply_spec_augment": false,
5
+ "architectures": [
6
+ "HubertForSequenceClassification"
7
+ ],
8
+ "attention_dropout": 0.1,
9
+ "bos_token_id": 1,
10
+ "classifier_proj_size": 256,
11
+ "conv_bias": false,
12
+ "conv_dim": [
13
+ 512,
14
+ 512,
15
+ 512,
16
+ 512,
17
+ 512,
18
+ 512,
19
+ 512
20
+ ],
21
+ "conv_kernel": [
22
+ 10,
23
+ 3,
24
+ 3,
25
+ 3,
26
+ 3,
27
+ 2,
28
+ 2
29
+ ],
30
+ "conv_stride": [
31
+ 5,
32
+ 2,
33
+ 2,
34
+ 2,
35
+ 2,
36
+ 2,
37
+ 2
38
+ ],
39
+ "ctc_loss_reduction": "sum",
40
+ "ctc_zero_infinity": false,
41
+ "do_stable_layer_norm": false,
42
+ "eos_token_id": 2,
43
+ "feat_extract_activation": "gelu",
44
+ "feat_extract_norm": "group",
45
+ "feat_proj_dropout": 0.0,
46
+ "feat_proj_layer_norm": false,
47
+ "final_dropout": 0.0,
48
+ "hidden_act": "gelu",
49
+ "hidden_dropout": 0.1,
50
+ "hidden_size": 768,
51
+ "id2label": {
52
+ "0": "NOT_WORD",
53
+ "1": "WORD"
54
+ },
55
+ "initializer_range": 0.02,
56
+ "intermediate_size": 3072,
57
+ "label2id": {
58
+ "NOT_WORD": "0",
59
+ "WORD": "1"
60
+ },
61
+ "layer_norm_eps": 1e-05,
62
+ "layerdrop": 0.0,
63
+ "mask_feature_length": 10,
64
+ "mask_feature_min_masks": 0,
65
+ "mask_feature_prob": 0.0,
66
+ "mask_time_length": 10,
67
+ "mask_time_min_masks": 2,
68
+ "mask_time_prob": 0.05,
69
+ "model_type": "hubert",
70
+ "num_attention_heads": 12,
71
+ "num_conv_pos_embedding_groups": 16,
72
+ "num_conv_pos_embeddings": 128,
73
+ "num_feat_extract_layers": 7,
74
+ "num_hidden_layers": 2,
75
+ "pad_token_id": 0,
76
+ "torch_dtype": "float32",
77
+ "transformers_version": "4.38.1",
78
+ "use_weighted_layer_sum": false,
79
+ "vocab_size": 32
80
+ }
run-3/checkpoint-48/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3046efc5b477cb286b3e11b8ed1a45a21f1c828411af810695ffc44c6f2d00d9
3
+ size 94763496
run-3/checkpoint-48/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c4a85c054ab80630a8ef4559aef6814034b41e2b2616bd905706d9550320a94d
3
+ size 189552570
run-3/checkpoint-48/preprocessor_config.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_normalize": true,
3
+ "feature_extractor_type": "Wav2Vec2FeatureExtractor",
4
+ "feature_size": 1,
5
+ "padding_side": "right",
6
+ "padding_value": 0,
7
+ "return_attention_mask": true,
8
+ "sampling_rate": 16000
9
+ }
run-3/checkpoint-48/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b8eb236be6c806fe9309f674bf7e81272faf7f91242ad8752235487116506f5f
3
+ size 14244
run-3/checkpoint-48/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a43d0fcfc93299b44b2601f4ab406c95df97c50f9a36eb16ee234f5d3816f4a
3
+ size 1064
run-3/checkpoint-48/trainer_state.json ADDED
@@ -0,0 +1,141 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.7326732673267327,
3
+ "best_model_checkpoint": "distilhubert-finetuned-not-a-word2/run-3/checkpoint-8",
4
+ "epoch": 6.0,
5
+ "eval_steps": 500,
6
+ "global_step": 48,
7
+ "is_hyper_param_search": true,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.62,
13
+ "grad_norm": 0.94398033618927,
14
+ "learning_rate": 7.138113611405621e-06,
15
+ "loss": 0.6981,
16
+ "step": 5
17
+ },
18
+ {
19
+ "epoch": 1.0,
20
+ "eval_f1": 0.7326732673267327,
21
+ "eval_loss": 0.6864166259765625,
22
+ "eval_runtime": 1.3249,
23
+ "eval_samples_per_second": 48.305,
24
+ "eval_steps_per_second": 6.038,
25
+ "step": 8
26
+ },
27
+ {
28
+ "epoch": 1.25,
29
+ "grad_norm": 1.5316386222839355,
30
+ "learning_rate": 1.1103732284408743e-05,
31
+ "loss": 0.683,
32
+ "step": 10
33
+ },
34
+ {
35
+ "epoch": 1.88,
36
+ "grad_norm": 0.5028849840164185,
37
+ "learning_rate": 1.031060854980812e-05,
38
+ "loss": 0.6564,
39
+ "step": 15
40
+ },
41
+ {
42
+ "epoch": 2.0,
43
+ "eval_f1": 0.7326732673267327,
44
+ "eval_loss": 0.680694580078125,
45
+ "eval_runtime": 1.3565,
46
+ "eval_samples_per_second": 47.179,
47
+ "eval_steps_per_second": 5.897,
48
+ "step": 16
49
+ },
50
+ {
51
+ "epoch": 2.5,
52
+ "grad_norm": 0.5775301456451416,
53
+ "learning_rate": 9.517484815207495e-06,
54
+ "loss": 0.6436,
55
+ "step": 20
56
+ },
57
+ {
58
+ "epoch": 3.0,
59
+ "eval_f1": 0.7326732673267327,
60
+ "eval_loss": 0.6852684020996094,
61
+ "eval_runtime": 1.3445,
62
+ "eval_samples_per_second": 47.601,
63
+ "eval_steps_per_second": 5.95,
64
+ "step": 24
65
+ },
66
+ {
67
+ "epoch": 3.12,
68
+ "grad_norm": 0.4549338221549988,
69
+ "learning_rate": 8.72436108060687e-06,
70
+ "loss": 0.6098,
71
+ "step": 25
72
+ },
73
+ {
74
+ "epoch": 3.75,
75
+ "grad_norm": 0.3741567134857178,
76
+ "learning_rate": 7.931237346006246e-06,
77
+ "loss": 0.609,
78
+ "step": 30
79
+ },
80
+ {
81
+ "epoch": 4.0,
82
+ "eval_f1": 0.7326732673267327,
83
+ "eval_loss": 0.6952018737792969,
84
+ "eval_runtime": 1.3401,
85
+ "eval_samples_per_second": 47.757,
86
+ "eval_steps_per_second": 5.97,
87
+ "step": 32
88
+ },
89
+ {
90
+ "epoch": 4.38,
91
+ "grad_norm": 0.38969138264656067,
92
+ "learning_rate": 7.138113611405621e-06,
93
+ "loss": 0.6091,
94
+ "step": 35
95
+ },
96
+ {
97
+ "epoch": 5.0,
98
+ "grad_norm": 0.2588692009449005,
99
+ "learning_rate": 6.344989876804997e-06,
100
+ "loss": 0.6043,
101
+ "step": 40
102
+ },
103
+ {
104
+ "epoch": 5.0,
105
+ "eval_f1": 0.7326732673267327,
106
+ "eval_loss": 0.7042198181152344,
107
+ "eval_runtime": 1.3815,
108
+ "eval_samples_per_second": 46.326,
109
+ "eval_steps_per_second": 5.791,
110
+ "step": 40
111
+ },
112
+ {
113
+ "epoch": 5.62,
114
+ "grad_norm": 0.6705239415168762,
115
+ "learning_rate": 5.551866142204372e-06,
116
+ "loss": 0.6093,
117
+ "step": 45
118
+ },
119
+ {
120
+ "epoch": 6.0,
121
+ "eval_f1": 0.7326732673267327,
122
+ "eval_loss": 0.7079658508300781,
123
+ "eval_runtime": 1.3745,
124
+ "eval_samples_per_second": 46.561,
125
+ "eval_steps_per_second": 5.82,
126
+ "step": 48
127
+ }
128
+ ],
129
+ "logging_steps": 5,
130
+ "max_steps": 80,
131
+ "num_input_tokens_seen": 0,
132
+ "num_train_epochs": 10,
133
+ "save_steps": 500,
134
+ "total_flos": 5627200224409632.0,
135
+ "train_batch_size": 24,
136
+ "trial_name": null,
137
+ "trial_params": {
138
+ "learning_rate": 1.1420981778248994e-05,
139
+ "per_device_train_batch_size": 24
140
+ }
141
+ }
run-3/checkpoint-48/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bb7020efd76104988408508d9c7e8a6e74af8904f5fd05940d43639ce4c80cf2
3
+ size 4920
run-3/checkpoint-56/config.json ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "ntu-spml/distilhubert",
3
+ "activation_dropout": 0.1,
4
+ "apply_spec_augment": false,
5
+ "architectures": [
6
+ "HubertForSequenceClassification"
7
+ ],
8
+ "attention_dropout": 0.1,
9
+ "bos_token_id": 1,
10
+ "classifier_proj_size": 256,
11
+ "conv_bias": false,
12
+ "conv_dim": [
13
+ 512,
14
+ 512,
15
+ 512,
16
+ 512,
17
+ 512,
18
+ 512,
19
+ 512
20
+ ],
21
+ "conv_kernel": [
22
+ 10,
23
+ 3,
24
+ 3,
25
+ 3,
26
+ 3,
27
+ 2,
28
+ 2
29
+ ],
30
+ "conv_stride": [
31
+ 5,
32
+ 2,
33
+ 2,
34
+ 2,
35
+ 2,
36
+ 2,
37
+ 2
38
+ ],
39
+ "ctc_loss_reduction": "sum",
40
+ "ctc_zero_infinity": false,
41
+ "do_stable_layer_norm": false,
42
+ "eos_token_id": 2,
43
+ "feat_extract_activation": "gelu",
44
+ "feat_extract_norm": "group",
45
+ "feat_proj_dropout": 0.0,
46
+ "feat_proj_layer_norm": false,
47
+ "final_dropout": 0.0,
48
+ "hidden_act": "gelu",
49
+ "hidden_dropout": 0.1,
50
+ "hidden_size": 768,
51
+ "id2label": {
52
+ "0": "NOT_WORD",
53
+ "1": "WORD"
54
+ },
55
+ "initializer_range": 0.02,
56
+ "intermediate_size": 3072,
57
+ "label2id": {
58
+ "NOT_WORD": "0",
59
+ "WORD": "1"
60
+ },
61
+ "layer_norm_eps": 1e-05,
62
+ "layerdrop": 0.0,
63
+ "mask_feature_length": 10,
64
+ "mask_feature_min_masks": 0,
65
+ "mask_feature_prob": 0.0,
66
+ "mask_time_length": 10,
67
+ "mask_time_min_masks": 2,
68
+ "mask_time_prob": 0.05,
69
+ "model_type": "hubert",
70
+ "num_attention_heads": 12,
71
+ "num_conv_pos_embedding_groups": 16,
72
+ "num_conv_pos_embeddings": 128,
73
+ "num_feat_extract_layers": 7,
74
+ "num_hidden_layers": 2,
75
+ "pad_token_id": 0,
76
+ "torch_dtype": "float32",
77
+ "transformers_version": "4.38.1",
78
+ "use_weighted_layer_sum": false,
79
+ "vocab_size": 32
80
+ }
run-3/checkpoint-56/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d657c8a9a8d89910fa51b01473cddb30252c72c09d1387d54314134d8223e50e
3
+ size 94763496
run-3/checkpoint-56/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:59bed33f570bc4270a85ddd2ac7b86c353888ada5b8f8b946fb1763cc460d2e2
3
+ size 189552570
run-3/checkpoint-56/preprocessor_config.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_normalize": true,
3
+ "feature_extractor_type": "Wav2Vec2FeatureExtractor",
4
+ "feature_size": 1,
5
+ "padding_side": "right",
6
+ "padding_value": 0,
7
+ "return_attention_mask": true,
8
+ "sampling_rate": 16000
9
+ }
run-3/checkpoint-56/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:30f64c34451dd549a3dd22bd33aa2eb5ae76a449e28e339f69de985a5a36616a
3
+ size 14244
run-3/checkpoint-56/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:02589177af8711d6e7d186b2047b3051489461944bff68810fb8fe0a88b20fcc
3
+ size 1064
run-3/checkpoint-56/trainer_state.json ADDED
@@ -0,0 +1,164 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.7326732673267327,
3
+ "best_model_checkpoint": "distilhubert-finetuned-not-a-word2/run-3/checkpoint-8",
4
+ "epoch": 7.0,
5
+ "eval_steps": 500,
6
+ "global_step": 56,
7
+ "is_hyper_param_search": true,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.62,
13
+ "grad_norm": 0.94398033618927,
14
+ "learning_rate": 7.138113611405621e-06,
15
+ "loss": 0.6981,
16
+ "step": 5
17
+ },
18
+ {
19
+ "epoch": 1.0,
20
+ "eval_f1": 0.7326732673267327,
21
+ "eval_loss": 0.6864166259765625,
22
+ "eval_runtime": 1.3249,
23
+ "eval_samples_per_second": 48.305,
24
+ "eval_steps_per_second": 6.038,
25
+ "step": 8
26
+ },
27
+ {
28
+ "epoch": 1.25,
29
+ "grad_norm": 1.5316386222839355,
30
+ "learning_rate": 1.1103732284408743e-05,
31
+ "loss": 0.683,
32
+ "step": 10
33
+ },
34
+ {
35
+ "epoch": 1.88,
36
+ "grad_norm": 0.5028849840164185,
37
+ "learning_rate": 1.031060854980812e-05,
38
+ "loss": 0.6564,
39
+ "step": 15
40
+ },
41
+ {
42
+ "epoch": 2.0,
43
+ "eval_f1": 0.7326732673267327,
44
+ "eval_loss": 0.680694580078125,
45
+ "eval_runtime": 1.3565,
46
+ "eval_samples_per_second": 47.179,
47
+ "eval_steps_per_second": 5.897,
48
+ "step": 16
49
+ },
50
+ {
51
+ "epoch": 2.5,
52
+ "grad_norm": 0.5775301456451416,
53
+ "learning_rate": 9.517484815207495e-06,
54
+ "loss": 0.6436,
55
+ "step": 20
56
+ },
57
+ {
58
+ "epoch": 3.0,
59
+ "eval_f1": 0.7326732673267327,
60
+ "eval_loss": 0.6852684020996094,
61
+ "eval_runtime": 1.3445,
62
+ "eval_samples_per_second": 47.601,
63
+ "eval_steps_per_second": 5.95,
64
+ "step": 24
65
+ },
66
+ {
67
+ "epoch": 3.12,
68
+ "grad_norm": 0.4549338221549988,
69
+ "learning_rate": 8.72436108060687e-06,
70
+ "loss": 0.6098,
71
+ "step": 25
72
+ },
73
+ {
74
+ "epoch": 3.75,
75
+ "grad_norm": 0.3741567134857178,
76
+ "learning_rate": 7.931237346006246e-06,
77
+ "loss": 0.609,
78
+ "step": 30
79
+ },
80
+ {
81
+ "epoch": 4.0,
82
+ "eval_f1": 0.7326732673267327,
83
+ "eval_loss": 0.6952018737792969,
84
+ "eval_runtime": 1.3401,
85
+ "eval_samples_per_second": 47.757,
86
+ "eval_steps_per_second": 5.97,
87
+ "step": 32
88
+ },
89
+ {
90
+ "epoch": 4.38,
91
+ "grad_norm": 0.38969138264656067,
92
+ "learning_rate": 7.138113611405621e-06,
93
+ "loss": 0.6091,
94
+ "step": 35
95
+ },
96
+ {
97
+ "epoch": 5.0,
98
+ "grad_norm": 0.2588692009449005,
99
+ "learning_rate": 6.344989876804997e-06,
100
+ "loss": 0.6043,
101
+ "step": 40
102
+ },
103
+ {
104
+ "epoch": 5.0,
105
+ "eval_f1": 0.7326732673267327,
106
+ "eval_loss": 0.7042198181152344,
107
+ "eval_runtime": 1.3815,
108
+ "eval_samples_per_second": 46.326,
109
+ "eval_steps_per_second": 5.791,
110
+ "step": 40
111
+ },
112
+ {
113
+ "epoch": 5.62,
114
+ "grad_norm": 0.6705239415168762,
115
+ "learning_rate": 5.551866142204372e-06,
116
+ "loss": 0.6093,
117
+ "step": 45
118
+ },
119
+ {
120
+ "epoch": 6.0,
121
+ "eval_f1": 0.7326732673267327,
122
+ "eval_loss": 0.7079658508300781,
123
+ "eval_runtime": 1.3745,
124
+ "eval_samples_per_second": 46.561,
125
+ "eval_steps_per_second": 5.82,
126
+ "step": 48
127
+ },
128
+ {
129
+ "epoch": 6.25,
130
+ "grad_norm": 0.4161934554576874,
131
+ "learning_rate": 4.758742407603747e-06,
132
+ "loss": 0.5737,
133
+ "step": 50
134
+ },
135
+ {
136
+ "epoch": 6.88,
137
+ "grad_norm": 0.3530200719833374,
138
+ "learning_rate": 3.965618673003123e-06,
139
+ "loss": 0.6355,
140
+ "step": 55
141
+ },
142
+ {
143
+ "epoch": 7.0,
144
+ "eval_f1": 0.7326732673267327,
145
+ "eval_loss": 0.7088470458984375,
146
+ "eval_runtime": 1.3736,
147
+ "eval_samples_per_second": 46.592,
148
+ "eval_steps_per_second": 5.824,
149
+ "step": 56
150
+ }
151
+ ],
152
+ "logging_steps": 5,
153
+ "max_steps": 80,
154
+ "num_input_tokens_seen": 0,
155
+ "num_train_epochs": 10,
156
+ "save_steps": 500,
157
+ "total_flos": 6905111571067392.0,
158
+ "train_batch_size": 24,
159
+ "trial_name": null,
160
+ "trial_params": {
161
+ "learning_rate": 1.1420981778248994e-05,
162
+ "per_device_train_batch_size": 24
163
+ }
164
+ }
run-3/checkpoint-56/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bb7020efd76104988408508d9c7e8a6e74af8904f5fd05940d43639ce4c80cf2
3
+ size 4920
run-3/checkpoint-64/config.json ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "ntu-spml/distilhubert",
3
+ "activation_dropout": 0.1,
4
+ "apply_spec_augment": false,
5
+ "architectures": [
6
+ "HubertForSequenceClassification"
7
+ ],
8
+ "attention_dropout": 0.1,
9
+ "bos_token_id": 1,
10
+ "classifier_proj_size": 256,
11
+ "conv_bias": false,
12
+ "conv_dim": [
13
+ 512,
14
+ 512,
15
+ 512,
16
+ 512,
17
+ 512,
18
+ 512,
19
+ 512
20
+ ],
21
+ "conv_kernel": [
22
+ 10,
23
+ 3,
24
+ 3,
25
+ 3,
26
+ 3,
27
+ 2,
28
+ 2
29
+ ],
30
+ "conv_stride": [
31
+ 5,
32
+ 2,
33
+ 2,
34
+ 2,
35
+ 2,
36
+ 2,
37
+ 2
38
+ ],
39
+ "ctc_loss_reduction": "sum",
40
+ "ctc_zero_infinity": false,
41
+ "do_stable_layer_norm": false,
42
+ "eos_token_id": 2,
43
+ "feat_extract_activation": "gelu",
44
+ "feat_extract_norm": "group",
45
+ "feat_proj_dropout": 0.0,
46
+ "feat_proj_layer_norm": false,
47
+ "final_dropout": 0.0,
48
+ "hidden_act": "gelu",
49
+ "hidden_dropout": 0.1,
50
+ "hidden_size": 768,
51
+ "id2label": {
52
+ "0": "NOT_WORD",
53
+ "1": "WORD"
54
+ },
55
+ "initializer_range": 0.02,
56
+ "intermediate_size": 3072,
57
+ "label2id": {
58
+ "NOT_WORD": "0",
59
+ "WORD": "1"
60
+ },
61
+ "layer_norm_eps": 1e-05,
62
+ "layerdrop": 0.0,
63
+ "mask_feature_length": 10,
64
+ "mask_feature_min_masks": 0,
65
+ "mask_feature_prob": 0.0,
66
+ "mask_time_length": 10,
67
+ "mask_time_min_masks": 2,
68
+ "mask_time_prob": 0.05,
69
+ "model_type": "hubert",
70
+ "num_attention_heads": 12,
71
+ "num_conv_pos_embedding_groups": 16,
72
+ "num_conv_pos_embeddings": 128,
73
+ "num_feat_extract_layers": 7,
74
+ "num_hidden_layers": 2,
75
+ "pad_token_id": 0,
76
+ "torch_dtype": "float32",
77
+ "transformers_version": "4.38.1",
78
+ "use_weighted_layer_sum": false,
79
+ "vocab_size": 32
80
+ }