diff --git a/model.safetensors b/model.safetensors index 4318685961829f637c544a931391ea321f59791a..8b19dae92ab08f0f14f4192717a30fa580c4704c 100644 --- a/model.safetensors +++ b/model.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:149f6b51cfaa089494700030336b7742cbe5c97fa8c55460d1626a4326b4e9e1 +oid sha256:018f138040ad7aa88bf94f4b8deb9d4f75616181b7d07008cdde50dd03503a11 size 94763496 diff --git a/run-3/checkpoint-16/config.json b/run-3/checkpoint-16/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e8a488f827577f57ac81420fa1b836ae1d5ceaeb --- /dev/null +++ b/run-3/checkpoint-16/config.json @@ -0,0 +1,80 @@ +{ + "_name_or_path": "ntu-spml/distilhubert", + "activation_dropout": 0.1, + "apply_spec_augment": false, + "architectures": [ + "HubertForSequenceClassification" + ], + "attention_dropout": 0.1, + "bos_token_id": 1, + "classifier_proj_size": 256, + "conv_bias": false, + "conv_dim": [ + 512, + 512, + 512, + 512, + 512, + 512, + 512 + ], + "conv_kernel": [ + 10, + 3, + 3, + 3, + 3, + 2, + 2 + ], + "conv_stride": [ + 5, + 2, + 2, + 2, + 2, + 2, + 2 + ], + "ctc_loss_reduction": "sum", + "ctc_zero_infinity": false, + "do_stable_layer_norm": false, + "eos_token_id": 2, + "feat_extract_activation": "gelu", + "feat_extract_norm": "group", + "feat_proj_dropout": 0.0, + "feat_proj_layer_norm": false, + "final_dropout": 0.0, + "hidden_act": "gelu", + "hidden_dropout": 0.1, + "hidden_size": 768, + "id2label": { + "0": "NOT_WORD", + "1": "WORD" + }, + "initializer_range": 0.02, + "intermediate_size": 3072, + "label2id": { + "NOT_WORD": "0", + "WORD": "1" + }, + "layer_norm_eps": 1e-05, + "layerdrop": 0.0, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "model_type": "hubert", + "num_attention_heads": 12, + "num_conv_pos_embedding_groups": 16, + "num_conv_pos_embeddings": 128, + "num_feat_extract_layers": 7, + "num_hidden_layers": 2, + "pad_token_id": 0, + "torch_dtype": "float32", + "transformers_version": "4.38.1", + "use_weighted_layer_sum": false, + "vocab_size": 32 +} diff --git a/run-3/checkpoint-16/model.safetensors b/run-3/checkpoint-16/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..83b15cbce5d6d879913fcd3e35051ec9108b4f26 --- /dev/null +++ b/run-3/checkpoint-16/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a760d27f41290ed80c1206e2480b47d92c8ce2c6ee1f3cd329e186b834a2b29 +size 94763496 diff --git a/run-3/checkpoint-16/optimizer.pt b/run-3/checkpoint-16/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..f030c982596a6792a682b3c85b11ac005bf9734a --- /dev/null +++ b/run-3/checkpoint-16/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04b4c7382179a64f9ac6f6bc7805a19064505d76480c4aa8b8391170753af6e7 +size 189552570 diff --git a/run-3/checkpoint-16/preprocessor_config.json b/run-3/checkpoint-16/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..36ebe8b7c1cc967b3059f0494ae8a1069dd67655 --- /dev/null +++ b/run-3/checkpoint-16/preprocessor_config.json @@ -0,0 +1,9 @@ +{ + "do_normalize": true, + "feature_extractor_type": "Wav2Vec2FeatureExtractor", + "feature_size": 1, + "padding_side": "right", + "padding_value": 0, + "return_attention_mask": true, + "sampling_rate": 16000 +} diff --git a/run-3/checkpoint-16/rng_state.pth b/run-3/checkpoint-16/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..ac7d01df8b2a48c2fef09916807ca451a855a7a7 --- /dev/null +++ b/run-3/checkpoint-16/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b0b4c067c009eda80e8baa1203d26c442448a0cbb4afc20c352a7b4f4c31ecfc +size 14244 diff --git a/run-3/checkpoint-16/scheduler.pt b/run-3/checkpoint-16/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..825754d47b86bca517d84772788165eeea0346de --- /dev/null +++ b/run-3/checkpoint-16/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:829126bb0c61bd2bc81a49adbe8fe99f684971512e37328fc21f35f04bc8cdd9 +size 1064 diff --git a/run-3/checkpoint-16/trainer_state.json b/run-3/checkpoint-16/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..23a917246ca1807bf00d7d1c032275d4d0f5098b --- /dev/null +++ b/run-3/checkpoint-16/trainer_state.json @@ -0,0 +1,63 @@ +{ + "best_metric": 0.7326732673267327, + "best_model_checkpoint": "distilhubert-finetuned-not-a-word2/run-3/checkpoint-8", + "epoch": 2.0, + "eval_steps": 500, + "global_step": 16, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.62, + "grad_norm": 0.94398033618927, + "learning_rate": 7.138113611405621e-06, + "loss": 0.6981, + "step": 5 + }, + { + "epoch": 1.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.6864166259765625, + "eval_runtime": 1.3249, + "eval_samples_per_second": 48.305, + "eval_steps_per_second": 6.038, + "step": 8 + }, + { + "epoch": 1.25, + "grad_norm": 1.5316386222839355, + "learning_rate": 1.1103732284408743e-05, + "loss": 0.683, + "step": 10 + }, + { + "epoch": 1.88, + "grad_norm": 0.5028849840164185, + "learning_rate": 1.031060854980812e-05, + "loss": 0.6564, + "step": 15 + }, + { + "epoch": 2.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.680694580078125, + "eval_runtime": 1.3565, + "eval_samples_per_second": 47.179, + "eval_steps_per_second": 5.897, + "step": 16 + } + ], + "logging_steps": 5, + "max_steps": 80, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 500, + "total_flos": 1863816719524704.0, + "train_batch_size": 24, + "trial_name": null, + "trial_params": { + "learning_rate": 1.1420981778248994e-05, + "per_device_train_batch_size": 24 + } +} diff --git a/run-3/checkpoint-16/training_args.bin b/run-3/checkpoint-16/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..be43f8684da98f6c918b76cd1f79a3b0a0449b11 --- /dev/null +++ b/run-3/checkpoint-16/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb7020efd76104988408508d9c7e8a6e74af8904f5fd05940d43639ce4c80cf2 +size 4920 diff --git a/run-3/checkpoint-24/config.json b/run-3/checkpoint-24/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e8a488f827577f57ac81420fa1b836ae1d5ceaeb --- /dev/null +++ b/run-3/checkpoint-24/config.json @@ -0,0 +1,80 @@ +{ + "_name_or_path": "ntu-spml/distilhubert", + "activation_dropout": 0.1, + "apply_spec_augment": false, + "architectures": [ + "HubertForSequenceClassification" + ], + "attention_dropout": 0.1, + "bos_token_id": 1, + "classifier_proj_size": 256, + "conv_bias": false, + "conv_dim": [ + 512, + 512, + 512, + 512, + 512, + 512, + 512 + ], + "conv_kernel": [ + 10, + 3, + 3, + 3, + 3, + 2, + 2 + ], + "conv_stride": [ + 5, + 2, + 2, + 2, + 2, + 2, + 2 + ], + "ctc_loss_reduction": "sum", + "ctc_zero_infinity": false, + "do_stable_layer_norm": false, + "eos_token_id": 2, + "feat_extract_activation": "gelu", + "feat_extract_norm": "group", + "feat_proj_dropout": 0.0, + "feat_proj_layer_norm": false, + "final_dropout": 0.0, + "hidden_act": "gelu", + "hidden_dropout": 0.1, + "hidden_size": 768, + "id2label": { + "0": "NOT_WORD", + "1": "WORD" + }, + "initializer_range": 0.02, + "intermediate_size": 3072, + "label2id": { + "NOT_WORD": "0", + "WORD": "1" + }, + "layer_norm_eps": 1e-05, + "layerdrop": 0.0, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "model_type": "hubert", + "num_attention_heads": 12, + "num_conv_pos_embedding_groups": 16, + "num_conv_pos_embeddings": 128, + "num_feat_extract_layers": 7, + "num_hidden_layers": 2, + "pad_token_id": 0, + "torch_dtype": "float32", + "transformers_version": "4.38.1", + "use_weighted_layer_sum": false, + "vocab_size": 32 +} diff --git a/run-3/checkpoint-24/model.safetensors b/run-3/checkpoint-24/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ffee67c7492088d7a986b2d5aa8f51c9042cd899 --- /dev/null +++ b/run-3/checkpoint-24/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3f932857479e62a819449b36f4c42e6e648d1fd17c406c7ee35d9c0b7ea261e +size 94763496 diff --git a/run-3/checkpoint-24/optimizer.pt b/run-3/checkpoint-24/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..91d74dd1831b441d2cb5b699b992cd4992b686d0 --- /dev/null +++ b/run-3/checkpoint-24/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:461c0fa31756b908c1ff77514b04efa451eae1a56488331e4a4c40726af10ca0 +size 189552570 diff --git a/run-3/checkpoint-24/preprocessor_config.json b/run-3/checkpoint-24/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..36ebe8b7c1cc967b3059f0494ae8a1069dd67655 --- /dev/null +++ b/run-3/checkpoint-24/preprocessor_config.json @@ -0,0 +1,9 @@ +{ + "do_normalize": true, + "feature_extractor_type": "Wav2Vec2FeatureExtractor", + "feature_size": 1, + "padding_side": "right", + "padding_value": 0, + "return_attention_mask": true, + "sampling_rate": 16000 +} diff --git a/run-3/checkpoint-24/rng_state.pth b/run-3/checkpoint-24/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..28c01d4ea535e8997af4f6785766f6eb12960737 --- /dev/null +++ b/run-3/checkpoint-24/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24d6a900c72e3e0f300fb2a89dab7159acab172bc37646dd605cc8c78374f6f9 +size 14244 diff --git a/run-3/checkpoint-24/scheduler.pt b/run-3/checkpoint-24/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..29a731cbbe6e9fa477d2248bc85ee26e76e6f9e1 --- /dev/null +++ b/run-3/checkpoint-24/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ccd2ea597a4bea6c5e4d0cece4bbe69ed79e233292bc9053077d461aeb0ba5f +size 1064 diff --git a/run-3/checkpoint-24/trainer_state.json b/run-3/checkpoint-24/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..67f5b6a51e21dd2a65c8595014bc26407463962f --- /dev/null +++ b/run-3/checkpoint-24/trainer_state.json @@ -0,0 +1,79 @@ +{ + "best_metric": 0.7326732673267327, + "best_model_checkpoint": "distilhubert-finetuned-not-a-word2/run-3/checkpoint-8", + "epoch": 3.0, + "eval_steps": 500, + "global_step": 24, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.62, + "grad_norm": 0.94398033618927, + "learning_rate": 7.138113611405621e-06, + "loss": 0.6981, + "step": 5 + }, + { + "epoch": 1.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.6864166259765625, + "eval_runtime": 1.3249, + "eval_samples_per_second": 48.305, + "eval_steps_per_second": 6.038, + "step": 8 + }, + { + "epoch": 1.25, + "grad_norm": 1.5316386222839355, + "learning_rate": 1.1103732284408743e-05, + "loss": 0.683, + "step": 10 + }, + { + "epoch": 1.88, + "grad_norm": 0.5028849840164185, + "learning_rate": 1.031060854980812e-05, + "loss": 0.6564, + "step": 15 + }, + { + "epoch": 2.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.680694580078125, + "eval_runtime": 1.3565, + "eval_samples_per_second": 47.179, + "eval_steps_per_second": 5.897, + "step": 16 + }, + { + "epoch": 2.5, + "grad_norm": 0.5775301456451416, + "learning_rate": 9.517484815207495e-06, + "loss": 0.6436, + "step": 20 + }, + { + "epoch": 3.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.6852684020996094, + "eval_runtime": 1.3445, + "eval_samples_per_second": 47.601, + "eval_steps_per_second": 5.95, + "step": 24 + } + ], + "logging_steps": 5, + "max_steps": 80, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 500, + "total_flos": 2484680744556000.0, + "train_batch_size": 24, + "trial_name": null, + "trial_params": { + "learning_rate": 1.1420981778248994e-05, + "per_device_train_batch_size": 24 + } +} diff --git a/run-3/checkpoint-24/training_args.bin b/run-3/checkpoint-24/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..be43f8684da98f6c918b76cd1f79a3b0a0449b11 --- /dev/null +++ b/run-3/checkpoint-24/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb7020efd76104988408508d9c7e8a6e74af8904f5fd05940d43639ce4c80cf2 +size 4920 diff --git a/run-3/checkpoint-32/config.json b/run-3/checkpoint-32/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e8a488f827577f57ac81420fa1b836ae1d5ceaeb --- /dev/null +++ b/run-3/checkpoint-32/config.json @@ -0,0 +1,80 @@ +{ + "_name_or_path": "ntu-spml/distilhubert", + "activation_dropout": 0.1, + "apply_spec_augment": false, + "architectures": [ + "HubertForSequenceClassification" + ], + "attention_dropout": 0.1, + "bos_token_id": 1, + "classifier_proj_size": 256, + "conv_bias": false, + "conv_dim": [ + 512, + 512, + 512, + 512, + 512, + 512, + 512 + ], + "conv_kernel": [ + 10, + 3, + 3, + 3, + 3, + 2, + 2 + ], + "conv_stride": [ + 5, + 2, + 2, + 2, + 2, + 2, + 2 + ], + "ctc_loss_reduction": "sum", + "ctc_zero_infinity": false, + "do_stable_layer_norm": false, + "eos_token_id": 2, + "feat_extract_activation": "gelu", + "feat_extract_norm": "group", + "feat_proj_dropout": 0.0, + "feat_proj_layer_norm": false, + "final_dropout": 0.0, + "hidden_act": "gelu", + "hidden_dropout": 0.1, + "hidden_size": 768, + "id2label": { + "0": "NOT_WORD", + "1": "WORD" + }, + "initializer_range": 0.02, + "intermediate_size": 3072, + "label2id": { + "NOT_WORD": "0", + "WORD": "1" + }, + "layer_norm_eps": 1e-05, + "layerdrop": 0.0, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "model_type": "hubert", + "num_attention_heads": 12, + "num_conv_pos_embedding_groups": 16, + "num_conv_pos_embeddings": 128, + "num_feat_extract_layers": 7, + "num_hidden_layers": 2, + "pad_token_id": 0, + "torch_dtype": "float32", + "transformers_version": "4.38.1", + "use_weighted_layer_sum": false, + "vocab_size": 32 +} diff --git a/run-3/checkpoint-32/model.safetensors b/run-3/checkpoint-32/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e1bd886af0441c7d93e5cc0daa74d67317c05529 --- /dev/null +++ b/run-3/checkpoint-32/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1454cf89a31d874feb53d9bb87ed0b567c597730d05be3faabbbb19277c03e8 +size 94763496 diff --git a/run-3/checkpoint-32/optimizer.pt b/run-3/checkpoint-32/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..cfec61fdc949a4ee1790078cde515a2ae8a2ccd8 --- /dev/null +++ b/run-3/checkpoint-32/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e61a24407aa0c80c01a56c0188477f4d5473511e288cab61dfd72dd7238455c9 +size 189552570 diff --git a/run-3/checkpoint-32/preprocessor_config.json b/run-3/checkpoint-32/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..36ebe8b7c1cc967b3059f0494ae8a1069dd67655 --- /dev/null +++ b/run-3/checkpoint-32/preprocessor_config.json @@ -0,0 +1,9 @@ +{ + "do_normalize": true, + "feature_extractor_type": "Wav2Vec2FeatureExtractor", + "feature_size": 1, + "padding_side": "right", + "padding_value": 0, + "return_attention_mask": true, + "sampling_rate": 16000 +} diff --git a/run-3/checkpoint-32/rng_state.pth b/run-3/checkpoint-32/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..3b6be6f69fe6a35f8729aab39cf27523ee48a85a --- /dev/null +++ b/run-3/checkpoint-32/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:556c72cd347c734d7fdec24637de2c2dcd065c09bcdd940199a24e0091d021c0 +size 14244 diff --git a/run-3/checkpoint-32/scheduler.pt b/run-3/checkpoint-32/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..c01cdcda61f032fa896f7fb3e2fd3c2e1480a6a3 --- /dev/null +++ b/run-3/checkpoint-32/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45ca9d40a3c2366bc01ceed0fa797d480ee4f2519ead3847177cdf537dc0ac31 +size 1064 diff --git a/run-3/checkpoint-32/trainer_state.json b/run-3/checkpoint-32/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..6a36a5823b578a2277e8c4df60d87c60050b1288 --- /dev/null +++ b/run-3/checkpoint-32/trainer_state.json @@ -0,0 +1,102 @@ +{ + "best_metric": 0.7326732673267327, + "best_model_checkpoint": "distilhubert-finetuned-not-a-word2/run-3/checkpoint-8", + "epoch": 4.0, + "eval_steps": 500, + "global_step": 32, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.62, + "grad_norm": 0.94398033618927, + "learning_rate": 7.138113611405621e-06, + "loss": 0.6981, + "step": 5 + }, + { + "epoch": 1.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.6864166259765625, + "eval_runtime": 1.3249, + "eval_samples_per_second": 48.305, + "eval_steps_per_second": 6.038, + "step": 8 + }, + { + "epoch": 1.25, + "grad_norm": 1.5316386222839355, + "learning_rate": 1.1103732284408743e-05, + "loss": 0.683, + "step": 10 + }, + { + "epoch": 1.88, + "grad_norm": 0.5028849840164185, + "learning_rate": 1.031060854980812e-05, + "loss": 0.6564, + "step": 15 + }, + { + "epoch": 2.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.680694580078125, + "eval_runtime": 1.3565, + "eval_samples_per_second": 47.179, + "eval_steps_per_second": 5.897, + "step": 16 + }, + { + "epoch": 2.5, + "grad_norm": 0.5775301456451416, + "learning_rate": 9.517484815207495e-06, + "loss": 0.6436, + "step": 20 + }, + { + "epoch": 3.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.6852684020996094, + "eval_runtime": 1.3445, + "eval_samples_per_second": 47.601, + "eval_steps_per_second": 5.95, + "step": 24 + }, + { + "epoch": 3.12, + "grad_norm": 0.4549338221549988, + "learning_rate": 8.72436108060687e-06, + "loss": 0.6098, + "step": 25 + }, + { + "epoch": 3.75, + "grad_norm": 0.3741567134857178, + "learning_rate": 7.931237346006246e-06, + "loss": 0.609, + "step": 30 + }, + { + "epoch": 4.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.6952018737792969, + "eval_runtime": 1.3401, + "eval_samples_per_second": 47.757, + "eval_steps_per_second": 5.97, + "step": 32 + } + ], + "logging_steps": 5, + "max_steps": 80, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 500, + "total_flos": 3687742096503552.0, + "train_batch_size": 24, + "trial_name": null, + "trial_params": { + "learning_rate": 1.1420981778248994e-05, + "per_device_train_batch_size": 24 + } +} diff --git a/run-3/checkpoint-32/training_args.bin b/run-3/checkpoint-32/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..be43f8684da98f6c918b76cd1f79a3b0a0449b11 --- /dev/null +++ b/run-3/checkpoint-32/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb7020efd76104988408508d9c7e8a6e74af8904f5fd05940d43639ce4c80cf2 +size 4920 diff --git a/run-3/checkpoint-40/config.json b/run-3/checkpoint-40/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e8a488f827577f57ac81420fa1b836ae1d5ceaeb --- /dev/null +++ b/run-3/checkpoint-40/config.json @@ -0,0 +1,80 @@ +{ + "_name_or_path": "ntu-spml/distilhubert", + "activation_dropout": 0.1, + "apply_spec_augment": false, + "architectures": [ + "HubertForSequenceClassification" + ], + "attention_dropout": 0.1, + "bos_token_id": 1, + "classifier_proj_size": 256, + "conv_bias": false, + "conv_dim": [ + 512, + 512, + 512, + 512, + 512, + 512, + 512 + ], + "conv_kernel": [ + 10, + 3, + 3, + 3, + 3, + 2, + 2 + ], + "conv_stride": [ + 5, + 2, + 2, + 2, + 2, + 2, + 2 + ], + "ctc_loss_reduction": "sum", + "ctc_zero_infinity": false, + "do_stable_layer_norm": false, + "eos_token_id": 2, + "feat_extract_activation": "gelu", + "feat_extract_norm": "group", + "feat_proj_dropout": 0.0, + "feat_proj_layer_norm": false, + "final_dropout": 0.0, + "hidden_act": "gelu", + "hidden_dropout": 0.1, + "hidden_size": 768, + "id2label": { + "0": "NOT_WORD", + "1": "WORD" + }, + "initializer_range": 0.02, + "intermediate_size": 3072, + "label2id": { + "NOT_WORD": "0", + "WORD": "1" + }, + "layer_norm_eps": 1e-05, + "layerdrop": 0.0, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "model_type": "hubert", + "num_attention_heads": 12, + "num_conv_pos_embedding_groups": 16, + "num_conv_pos_embeddings": 128, + "num_feat_extract_layers": 7, + "num_hidden_layers": 2, + "pad_token_id": 0, + "torch_dtype": "float32", + "transformers_version": "4.38.1", + "use_weighted_layer_sum": false, + "vocab_size": 32 +} diff --git a/run-3/checkpoint-40/model.safetensors b/run-3/checkpoint-40/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..23cfa551fc94b09010ad25e9d72c3e38da160421 --- /dev/null +++ b/run-3/checkpoint-40/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb44356d7dd99d76e08bc8fec067d778a70ce340e950085f1f0d5532bcf71429 +size 94763496 diff --git a/run-3/checkpoint-40/optimizer.pt b/run-3/checkpoint-40/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..8ebc83ab4651bc4b341ac382e6da95e78c6cbc32 --- /dev/null +++ b/run-3/checkpoint-40/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64d97fad5e3a7a342896071e42b90538b01c4168983183ecb488766c8d1179a3 +size 189552570 diff --git a/run-3/checkpoint-40/preprocessor_config.json b/run-3/checkpoint-40/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..36ebe8b7c1cc967b3059f0494ae8a1069dd67655 --- /dev/null +++ b/run-3/checkpoint-40/preprocessor_config.json @@ -0,0 +1,9 @@ +{ + "do_normalize": true, + "feature_extractor_type": "Wav2Vec2FeatureExtractor", + "feature_size": 1, + "padding_side": "right", + "padding_value": 0, + "return_attention_mask": true, + "sampling_rate": 16000 +} diff --git a/run-3/checkpoint-40/rng_state.pth b/run-3/checkpoint-40/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..bd74b6ba63e493014ddf15a58a2c7a7388efcaef --- /dev/null +++ b/run-3/checkpoint-40/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f04394838d097ad37a86e783ddc6557596b7da068eb72e2798acacf43f38924 +size 14244 diff --git a/run-3/checkpoint-40/scheduler.pt b/run-3/checkpoint-40/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..3a909700f34d1f2c8f3b70d0e0cafb0ea9400f8e --- /dev/null +++ b/run-3/checkpoint-40/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:857294f0798588b687c4fd66b6e7d0f73c0b0a67baea1b5225ec246eeb80a3ae +size 1064 diff --git a/run-3/checkpoint-40/trainer_state.json b/run-3/checkpoint-40/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..2ca013c7c4665ce13d1256371ed176ea3bfb78fb --- /dev/null +++ b/run-3/checkpoint-40/trainer_state.json @@ -0,0 +1,125 @@ +{ + "best_metric": 0.7326732673267327, + "best_model_checkpoint": "distilhubert-finetuned-not-a-word2/run-3/checkpoint-8", + "epoch": 5.0, + "eval_steps": 500, + "global_step": 40, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.62, + "grad_norm": 0.94398033618927, + "learning_rate": 7.138113611405621e-06, + "loss": 0.6981, + "step": 5 + }, + { + "epoch": 1.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.6864166259765625, + "eval_runtime": 1.3249, + "eval_samples_per_second": 48.305, + "eval_steps_per_second": 6.038, + "step": 8 + }, + { + "epoch": 1.25, + "grad_norm": 1.5316386222839355, + "learning_rate": 1.1103732284408743e-05, + "loss": 0.683, + "step": 10 + }, + { + "epoch": 1.88, + "grad_norm": 0.5028849840164185, + "learning_rate": 1.031060854980812e-05, + "loss": 0.6564, + "step": 15 + }, + { + "epoch": 2.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.680694580078125, + "eval_runtime": 1.3565, + "eval_samples_per_second": 47.179, + "eval_steps_per_second": 5.897, + "step": 16 + }, + { + "epoch": 2.5, + "grad_norm": 0.5775301456451416, + "learning_rate": 9.517484815207495e-06, + "loss": 0.6436, + "step": 20 + }, + { + "epoch": 3.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.6852684020996094, + "eval_runtime": 1.3445, + "eval_samples_per_second": 47.601, + "eval_steps_per_second": 5.95, + "step": 24 + }, + { + "epoch": 3.12, + "grad_norm": 0.4549338221549988, + "learning_rate": 8.72436108060687e-06, + "loss": 0.6098, + "step": 25 + }, + { + "epoch": 3.75, + "grad_norm": 0.3741567134857178, + "learning_rate": 7.931237346006246e-06, + "loss": 0.609, + "step": 30 + }, + { + "epoch": 4.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.6952018737792969, + "eval_runtime": 1.3401, + "eval_samples_per_second": 47.757, + "eval_steps_per_second": 5.97, + "step": 32 + }, + { + "epoch": 4.38, + "grad_norm": 0.38969138264656067, + "learning_rate": 7.138113611405621e-06, + "loss": 0.6091, + "step": 35 + }, + { + "epoch": 5.0, + "grad_norm": 0.2588692009449005, + "learning_rate": 6.344989876804997e-06, + "loss": 0.6043, + "step": 40 + }, + { + "epoch": 5.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.7042198181152344, + "eval_runtime": 1.3815, + "eval_samples_per_second": 46.326, + "eval_steps_per_second": 5.791, + "step": 40 + } + ], + "logging_steps": 5, + "max_steps": 80, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 500, + "total_flos": 5002699790009952.0, + "train_batch_size": 24, + "trial_name": null, + "trial_params": { + "learning_rate": 1.1420981778248994e-05, + "per_device_train_batch_size": 24 + } +} diff --git a/run-3/checkpoint-40/training_args.bin b/run-3/checkpoint-40/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..be43f8684da98f6c918b76cd1f79a3b0a0449b11 --- /dev/null +++ b/run-3/checkpoint-40/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb7020efd76104988408508d9c7e8a6e74af8904f5fd05940d43639ce4c80cf2 +size 4920 diff --git a/run-3/checkpoint-48/config.json b/run-3/checkpoint-48/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e8a488f827577f57ac81420fa1b836ae1d5ceaeb --- /dev/null +++ b/run-3/checkpoint-48/config.json @@ -0,0 +1,80 @@ +{ + "_name_or_path": "ntu-spml/distilhubert", + "activation_dropout": 0.1, + "apply_spec_augment": false, + "architectures": [ + "HubertForSequenceClassification" + ], + "attention_dropout": 0.1, + "bos_token_id": 1, + "classifier_proj_size": 256, + "conv_bias": false, + "conv_dim": [ + 512, + 512, + 512, + 512, + 512, + 512, + 512 + ], + "conv_kernel": [ + 10, + 3, + 3, + 3, + 3, + 2, + 2 + ], + "conv_stride": [ + 5, + 2, + 2, + 2, + 2, + 2, + 2 + ], + "ctc_loss_reduction": "sum", + "ctc_zero_infinity": false, + "do_stable_layer_norm": false, + "eos_token_id": 2, + "feat_extract_activation": "gelu", + "feat_extract_norm": "group", + "feat_proj_dropout": 0.0, + "feat_proj_layer_norm": false, + "final_dropout": 0.0, + "hidden_act": "gelu", + "hidden_dropout": 0.1, + "hidden_size": 768, + "id2label": { + "0": "NOT_WORD", + "1": "WORD" + }, + "initializer_range": 0.02, + "intermediate_size": 3072, + "label2id": { + "NOT_WORD": "0", + "WORD": "1" + }, + "layer_norm_eps": 1e-05, + "layerdrop": 0.0, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "model_type": "hubert", + "num_attention_heads": 12, + "num_conv_pos_embedding_groups": 16, + "num_conv_pos_embeddings": 128, + "num_feat_extract_layers": 7, + "num_hidden_layers": 2, + "pad_token_id": 0, + "torch_dtype": "float32", + "transformers_version": "4.38.1", + "use_weighted_layer_sum": false, + "vocab_size": 32 +} diff --git a/run-3/checkpoint-48/model.safetensors b/run-3/checkpoint-48/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e1534387af764ae1ab2005e763ad7dcbd482e045 --- /dev/null +++ b/run-3/checkpoint-48/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3046efc5b477cb286b3e11b8ed1a45a21f1c828411af810695ffc44c6f2d00d9 +size 94763496 diff --git a/run-3/checkpoint-48/optimizer.pt b/run-3/checkpoint-48/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..1367d360426b3be5b304fbf7cd1d3273caa103fe --- /dev/null +++ b/run-3/checkpoint-48/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4a85c054ab80630a8ef4559aef6814034b41e2b2616bd905706d9550320a94d +size 189552570 diff --git a/run-3/checkpoint-48/preprocessor_config.json b/run-3/checkpoint-48/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..36ebe8b7c1cc967b3059f0494ae8a1069dd67655 --- /dev/null +++ b/run-3/checkpoint-48/preprocessor_config.json @@ -0,0 +1,9 @@ +{ + "do_normalize": true, + "feature_extractor_type": "Wav2Vec2FeatureExtractor", + "feature_size": 1, + "padding_side": "right", + "padding_value": 0, + "return_attention_mask": true, + "sampling_rate": 16000 +} diff --git a/run-3/checkpoint-48/rng_state.pth b/run-3/checkpoint-48/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..f14c60940aa7f4644eedab3c03294336305a5c97 --- /dev/null +++ b/run-3/checkpoint-48/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b8eb236be6c806fe9309f674bf7e81272faf7f91242ad8752235487116506f5f +size 14244 diff --git a/run-3/checkpoint-48/scheduler.pt b/run-3/checkpoint-48/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..581c1451e4f2e246a45bb661288a5deb5e8a733e --- /dev/null +++ b/run-3/checkpoint-48/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a43d0fcfc93299b44b2601f4ab406c95df97c50f9a36eb16ee234f5d3816f4a +size 1064 diff --git a/run-3/checkpoint-48/trainer_state.json b/run-3/checkpoint-48/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..1ac3c68ac601d7108317cba533e17006ca032e81 --- /dev/null +++ b/run-3/checkpoint-48/trainer_state.json @@ -0,0 +1,141 @@ +{ + "best_metric": 0.7326732673267327, + "best_model_checkpoint": "distilhubert-finetuned-not-a-word2/run-3/checkpoint-8", + "epoch": 6.0, + "eval_steps": 500, + "global_step": 48, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.62, + "grad_norm": 0.94398033618927, + "learning_rate": 7.138113611405621e-06, + "loss": 0.6981, + "step": 5 + }, + { + "epoch": 1.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.6864166259765625, + "eval_runtime": 1.3249, + "eval_samples_per_second": 48.305, + "eval_steps_per_second": 6.038, + "step": 8 + }, + { + "epoch": 1.25, + "grad_norm": 1.5316386222839355, + "learning_rate": 1.1103732284408743e-05, + "loss": 0.683, + "step": 10 + }, + { + "epoch": 1.88, + "grad_norm": 0.5028849840164185, + "learning_rate": 1.031060854980812e-05, + "loss": 0.6564, + "step": 15 + }, + { + "epoch": 2.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.680694580078125, + "eval_runtime": 1.3565, + "eval_samples_per_second": 47.179, + "eval_steps_per_second": 5.897, + "step": 16 + }, + { + "epoch": 2.5, + "grad_norm": 0.5775301456451416, + "learning_rate": 9.517484815207495e-06, + "loss": 0.6436, + "step": 20 + }, + { + "epoch": 3.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.6852684020996094, + "eval_runtime": 1.3445, + "eval_samples_per_second": 47.601, + "eval_steps_per_second": 5.95, + "step": 24 + }, + { + "epoch": 3.12, + "grad_norm": 0.4549338221549988, + "learning_rate": 8.72436108060687e-06, + "loss": 0.6098, + "step": 25 + }, + { + "epoch": 3.75, + "grad_norm": 0.3741567134857178, + "learning_rate": 7.931237346006246e-06, + "loss": 0.609, + "step": 30 + }, + { + "epoch": 4.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.6952018737792969, + "eval_runtime": 1.3401, + "eval_samples_per_second": 47.757, + "eval_steps_per_second": 5.97, + "step": 32 + }, + { + "epoch": 4.38, + "grad_norm": 0.38969138264656067, + "learning_rate": 7.138113611405621e-06, + "loss": 0.6091, + "step": 35 + }, + { + "epoch": 5.0, + "grad_norm": 0.2588692009449005, + "learning_rate": 6.344989876804997e-06, + "loss": 0.6043, + "step": 40 + }, + { + "epoch": 5.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.7042198181152344, + "eval_runtime": 1.3815, + "eval_samples_per_second": 46.326, + "eval_steps_per_second": 5.791, + "step": 40 + }, + { + "epoch": 5.62, + "grad_norm": 0.6705239415168762, + "learning_rate": 5.551866142204372e-06, + "loss": 0.6093, + "step": 45 + }, + { + "epoch": 6.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.7079658508300781, + "eval_runtime": 1.3745, + "eval_samples_per_second": 46.561, + "eval_steps_per_second": 5.82, + "step": 48 + } + ], + "logging_steps": 5, + "max_steps": 80, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 500, + "total_flos": 5627200224409632.0, + "train_batch_size": 24, + "trial_name": null, + "trial_params": { + "learning_rate": 1.1420981778248994e-05, + "per_device_train_batch_size": 24 + } +} diff --git a/run-3/checkpoint-48/training_args.bin b/run-3/checkpoint-48/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..be43f8684da98f6c918b76cd1f79a3b0a0449b11 --- /dev/null +++ b/run-3/checkpoint-48/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb7020efd76104988408508d9c7e8a6e74af8904f5fd05940d43639ce4c80cf2 +size 4920 diff --git a/run-3/checkpoint-56/config.json b/run-3/checkpoint-56/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e8a488f827577f57ac81420fa1b836ae1d5ceaeb --- /dev/null +++ b/run-3/checkpoint-56/config.json @@ -0,0 +1,80 @@ +{ + "_name_or_path": "ntu-spml/distilhubert", + "activation_dropout": 0.1, + "apply_spec_augment": false, + "architectures": [ + "HubertForSequenceClassification" + ], + "attention_dropout": 0.1, + "bos_token_id": 1, + "classifier_proj_size": 256, + "conv_bias": false, + "conv_dim": [ + 512, + 512, + 512, + 512, + 512, + 512, + 512 + ], + "conv_kernel": [ + 10, + 3, + 3, + 3, + 3, + 2, + 2 + ], + "conv_stride": [ + 5, + 2, + 2, + 2, + 2, + 2, + 2 + ], + "ctc_loss_reduction": "sum", + "ctc_zero_infinity": false, + "do_stable_layer_norm": false, + "eos_token_id": 2, + "feat_extract_activation": "gelu", + "feat_extract_norm": "group", + "feat_proj_dropout": 0.0, + "feat_proj_layer_norm": false, + "final_dropout": 0.0, + "hidden_act": "gelu", + "hidden_dropout": 0.1, + "hidden_size": 768, + "id2label": { + "0": "NOT_WORD", + "1": "WORD" + }, + "initializer_range": 0.02, + "intermediate_size": 3072, + "label2id": { + "NOT_WORD": "0", + "WORD": "1" + }, + "layer_norm_eps": 1e-05, + "layerdrop": 0.0, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "model_type": "hubert", + "num_attention_heads": 12, + "num_conv_pos_embedding_groups": 16, + "num_conv_pos_embeddings": 128, + "num_feat_extract_layers": 7, + "num_hidden_layers": 2, + "pad_token_id": 0, + "torch_dtype": "float32", + "transformers_version": "4.38.1", + "use_weighted_layer_sum": false, + "vocab_size": 32 +} diff --git a/run-3/checkpoint-56/model.safetensors b/run-3/checkpoint-56/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..030137c0afca60d6656e3746f9c1b412069a9eda --- /dev/null +++ b/run-3/checkpoint-56/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d657c8a9a8d89910fa51b01473cddb30252c72c09d1387d54314134d8223e50e +size 94763496 diff --git a/run-3/checkpoint-56/optimizer.pt b/run-3/checkpoint-56/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..4a59764d5c969639d8f2380751648168f9eb8487 --- /dev/null +++ b/run-3/checkpoint-56/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59bed33f570bc4270a85ddd2ac7b86c353888ada5b8f8b946fb1763cc460d2e2 +size 189552570 diff --git a/run-3/checkpoint-56/preprocessor_config.json b/run-3/checkpoint-56/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..36ebe8b7c1cc967b3059f0494ae8a1069dd67655 --- /dev/null +++ b/run-3/checkpoint-56/preprocessor_config.json @@ -0,0 +1,9 @@ +{ + "do_normalize": true, + "feature_extractor_type": "Wav2Vec2FeatureExtractor", + "feature_size": 1, + "padding_side": "right", + "padding_value": 0, + "return_attention_mask": true, + "sampling_rate": 16000 +} diff --git a/run-3/checkpoint-56/rng_state.pth b/run-3/checkpoint-56/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..2c2c442646ebb4dfc9d3299de99caf85648452de --- /dev/null +++ b/run-3/checkpoint-56/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:30f64c34451dd549a3dd22bd33aa2eb5ae76a449e28e339f69de985a5a36616a +size 14244 diff --git a/run-3/checkpoint-56/scheduler.pt b/run-3/checkpoint-56/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..e3cf42e9d8e7558da7e32ec23700afa942f98d1b --- /dev/null +++ b/run-3/checkpoint-56/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:02589177af8711d6e7d186b2047b3051489461944bff68810fb8fe0a88b20fcc +size 1064 diff --git a/run-3/checkpoint-56/trainer_state.json b/run-3/checkpoint-56/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..a0c256081de4692db0204b2149f7b802b8ac5862 --- /dev/null +++ b/run-3/checkpoint-56/trainer_state.json @@ -0,0 +1,164 @@ +{ + "best_metric": 0.7326732673267327, + "best_model_checkpoint": "distilhubert-finetuned-not-a-word2/run-3/checkpoint-8", + "epoch": 7.0, + "eval_steps": 500, + "global_step": 56, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.62, + "grad_norm": 0.94398033618927, + "learning_rate": 7.138113611405621e-06, + "loss": 0.6981, + "step": 5 + }, + { + "epoch": 1.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.6864166259765625, + "eval_runtime": 1.3249, + "eval_samples_per_second": 48.305, + "eval_steps_per_second": 6.038, + "step": 8 + }, + { + "epoch": 1.25, + "grad_norm": 1.5316386222839355, + "learning_rate": 1.1103732284408743e-05, + "loss": 0.683, + "step": 10 + }, + { + "epoch": 1.88, + "grad_norm": 0.5028849840164185, + "learning_rate": 1.031060854980812e-05, + "loss": 0.6564, + "step": 15 + }, + { + "epoch": 2.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.680694580078125, + "eval_runtime": 1.3565, + "eval_samples_per_second": 47.179, + "eval_steps_per_second": 5.897, + "step": 16 + }, + { + "epoch": 2.5, + "grad_norm": 0.5775301456451416, + "learning_rate": 9.517484815207495e-06, + "loss": 0.6436, + "step": 20 + }, + { + "epoch": 3.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.6852684020996094, + "eval_runtime": 1.3445, + "eval_samples_per_second": 47.601, + "eval_steps_per_second": 5.95, + "step": 24 + }, + { + "epoch": 3.12, + "grad_norm": 0.4549338221549988, + "learning_rate": 8.72436108060687e-06, + "loss": 0.6098, + "step": 25 + }, + { + "epoch": 3.75, + "grad_norm": 0.3741567134857178, + "learning_rate": 7.931237346006246e-06, + "loss": 0.609, + "step": 30 + }, + { + "epoch": 4.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.6952018737792969, + "eval_runtime": 1.3401, + "eval_samples_per_second": 47.757, + "eval_steps_per_second": 5.97, + "step": 32 + }, + { + "epoch": 4.38, + "grad_norm": 0.38969138264656067, + "learning_rate": 7.138113611405621e-06, + "loss": 0.6091, + "step": 35 + }, + { + "epoch": 5.0, + "grad_norm": 0.2588692009449005, + "learning_rate": 6.344989876804997e-06, + "loss": 0.6043, + "step": 40 + }, + { + "epoch": 5.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.7042198181152344, + "eval_runtime": 1.3815, + "eval_samples_per_second": 46.326, + "eval_steps_per_second": 5.791, + "step": 40 + }, + { + "epoch": 5.62, + "grad_norm": 0.6705239415168762, + "learning_rate": 5.551866142204372e-06, + "loss": 0.6093, + "step": 45 + }, + { + "epoch": 6.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.7079658508300781, + "eval_runtime": 1.3745, + "eval_samples_per_second": 46.561, + "eval_steps_per_second": 5.82, + "step": 48 + }, + { + "epoch": 6.25, + "grad_norm": 0.4161934554576874, + "learning_rate": 4.758742407603747e-06, + "loss": 0.5737, + "step": 50 + }, + { + "epoch": 6.88, + "grad_norm": 0.3530200719833374, + "learning_rate": 3.965618673003123e-06, + "loss": 0.6355, + "step": 55 + }, + { + "epoch": 7.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.7088470458984375, + "eval_runtime": 1.3736, + "eval_samples_per_second": 46.592, + "eval_steps_per_second": 5.824, + "step": 56 + } + ], + "logging_steps": 5, + "max_steps": 80, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 500, + "total_flos": 6905111571067392.0, + "train_batch_size": 24, + "trial_name": null, + "trial_params": { + "learning_rate": 1.1420981778248994e-05, + "per_device_train_batch_size": 24 + } +} diff --git a/run-3/checkpoint-56/training_args.bin b/run-3/checkpoint-56/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..be43f8684da98f6c918b76cd1f79a3b0a0449b11 --- /dev/null +++ b/run-3/checkpoint-56/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb7020efd76104988408508d9c7e8a6e74af8904f5fd05940d43639ce4c80cf2 +size 4920 diff --git a/run-3/checkpoint-64/config.json b/run-3/checkpoint-64/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e8a488f827577f57ac81420fa1b836ae1d5ceaeb --- /dev/null +++ b/run-3/checkpoint-64/config.json @@ -0,0 +1,80 @@ +{ + "_name_or_path": "ntu-spml/distilhubert", + "activation_dropout": 0.1, + "apply_spec_augment": false, + "architectures": [ + "HubertForSequenceClassification" + ], + "attention_dropout": 0.1, + "bos_token_id": 1, + "classifier_proj_size": 256, + "conv_bias": false, + "conv_dim": [ + 512, + 512, + 512, + 512, + 512, + 512, + 512 + ], + "conv_kernel": [ + 10, + 3, + 3, + 3, + 3, + 2, + 2 + ], + "conv_stride": [ + 5, + 2, + 2, + 2, + 2, + 2, + 2 + ], + "ctc_loss_reduction": "sum", + "ctc_zero_infinity": false, + "do_stable_layer_norm": false, + "eos_token_id": 2, + "feat_extract_activation": "gelu", + "feat_extract_norm": "group", + "feat_proj_dropout": 0.0, + "feat_proj_layer_norm": false, + "final_dropout": 0.0, + "hidden_act": "gelu", + "hidden_dropout": 0.1, + "hidden_size": 768, + "id2label": { + "0": "NOT_WORD", + "1": "WORD" + }, + "initializer_range": 0.02, + "intermediate_size": 3072, + "label2id": { + "NOT_WORD": "0", + "WORD": "1" + }, + "layer_norm_eps": 1e-05, + "layerdrop": 0.0, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "model_type": "hubert", + "num_attention_heads": 12, + "num_conv_pos_embedding_groups": 16, + "num_conv_pos_embeddings": 128, + "num_feat_extract_layers": 7, + "num_hidden_layers": 2, + "pad_token_id": 0, + "torch_dtype": "float32", + "transformers_version": "4.38.1", + "use_weighted_layer_sum": false, + "vocab_size": 32 +} diff --git a/run-3/checkpoint-64/model.safetensors b/run-3/checkpoint-64/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..606ee9088909a49b17e50a0ee0f791613e7239df --- /dev/null +++ b/run-3/checkpoint-64/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:711b9553d536db3d49b79da8847e90c3315be083722786acf7ea7bf1cbc17c2d +size 94763496 diff --git a/run-3/checkpoint-64/optimizer.pt b/run-3/checkpoint-64/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..61841ff723532dcf5c1636c29306bba3d4273561 --- /dev/null +++ b/run-3/checkpoint-64/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba48035c7301f32e01daabfeab7b8c1003871986a885e6791f47da0272be7c97 +size 189552570 diff --git a/run-3/checkpoint-64/preprocessor_config.json b/run-3/checkpoint-64/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..36ebe8b7c1cc967b3059f0494ae8a1069dd67655 --- /dev/null +++ b/run-3/checkpoint-64/preprocessor_config.json @@ -0,0 +1,9 @@ +{ + "do_normalize": true, + "feature_extractor_type": "Wav2Vec2FeatureExtractor", + "feature_size": 1, + "padding_side": "right", + "padding_value": 0, + "return_attention_mask": true, + "sampling_rate": 16000 +} diff --git a/run-3/checkpoint-64/rng_state.pth b/run-3/checkpoint-64/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..6aac58eb9db9f0c698caef7e9a1d2ad03bccfd28 --- /dev/null +++ b/run-3/checkpoint-64/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f1b673e22294536a7957bb7890d05b112f64cb515eb9ce49dc217b8d30f7372c +size 14244 diff --git a/run-3/checkpoint-64/scheduler.pt b/run-3/checkpoint-64/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..f06e7520df4d0e20f1f00b0d96be3dd55d61edf1 --- /dev/null +++ b/run-3/checkpoint-64/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:719bcc6d9186369827c1a6c07a54b72be52569203b0bcc54711ca44fcbd31799 +size 1064 diff --git a/run-3/checkpoint-64/trainer_state.json b/run-3/checkpoint-64/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..636c68052c8e1a626a57a4d8bcda236fda6ab165 --- /dev/null +++ b/run-3/checkpoint-64/trainer_state.json @@ -0,0 +1,180 @@ +{ + "best_metric": 0.7326732673267327, + "best_model_checkpoint": "distilhubert-finetuned-not-a-word2/run-3/checkpoint-8", + "epoch": 8.0, + "eval_steps": 500, + "global_step": 64, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.62, + "grad_norm": 0.94398033618927, + "learning_rate": 7.138113611405621e-06, + "loss": 0.6981, + "step": 5 + }, + { + "epoch": 1.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.6864166259765625, + "eval_runtime": 1.3249, + "eval_samples_per_second": 48.305, + "eval_steps_per_second": 6.038, + "step": 8 + }, + { + "epoch": 1.25, + "grad_norm": 1.5316386222839355, + "learning_rate": 1.1103732284408743e-05, + "loss": 0.683, + "step": 10 + }, + { + "epoch": 1.88, + "grad_norm": 0.5028849840164185, + "learning_rate": 1.031060854980812e-05, + "loss": 0.6564, + "step": 15 + }, + { + "epoch": 2.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.680694580078125, + "eval_runtime": 1.3565, + "eval_samples_per_second": 47.179, + "eval_steps_per_second": 5.897, + "step": 16 + }, + { + "epoch": 2.5, + "grad_norm": 0.5775301456451416, + "learning_rate": 9.517484815207495e-06, + "loss": 0.6436, + "step": 20 + }, + { + "epoch": 3.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.6852684020996094, + "eval_runtime": 1.3445, + "eval_samples_per_second": 47.601, + "eval_steps_per_second": 5.95, + "step": 24 + }, + { + "epoch": 3.12, + "grad_norm": 0.4549338221549988, + "learning_rate": 8.72436108060687e-06, + "loss": 0.6098, + "step": 25 + }, + { + "epoch": 3.75, + "grad_norm": 0.3741567134857178, + "learning_rate": 7.931237346006246e-06, + "loss": 0.609, + "step": 30 + }, + { + "epoch": 4.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.6952018737792969, + "eval_runtime": 1.3401, + "eval_samples_per_second": 47.757, + "eval_steps_per_second": 5.97, + "step": 32 + }, + { + "epoch": 4.38, + "grad_norm": 0.38969138264656067, + "learning_rate": 7.138113611405621e-06, + "loss": 0.6091, + "step": 35 + }, + { + "epoch": 5.0, + "grad_norm": 0.2588692009449005, + "learning_rate": 6.344989876804997e-06, + "loss": 0.6043, + "step": 40 + }, + { + "epoch": 5.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.7042198181152344, + "eval_runtime": 1.3815, + "eval_samples_per_second": 46.326, + "eval_steps_per_second": 5.791, + "step": 40 + }, + { + "epoch": 5.62, + "grad_norm": 0.6705239415168762, + "learning_rate": 5.551866142204372e-06, + "loss": 0.6093, + "step": 45 + }, + { + "epoch": 6.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.7079658508300781, + "eval_runtime": 1.3745, + "eval_samples_per_second": 46.561, + "eval_steps_per_second": 5.82, + "step": 48 + }, + { + "epoch": 6.25, + "grad_norm": 0.4161934554576874, + "learning_rate": 4.758742407603747e-06, + "loss": 0.5737, + "step": 50 + }, + { + "epoch": 6.88, + "grad_norm": 0.3530200719833374, + "learning_rate": 3.965618673003123e-06, + "loss": 0.6355, + "step": 55 + }, + { + "epoch": 7.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.7088470458984375, + "eval_runtime": 1.3736, + "eval_samples_per_second": 46.592, + "eval_steps_per_second": 5.824, + "step": 56 + }, + { + "epoch": 7.5, + "grad_norm": 0.7337541580200195, + "learning_rate": 3.1724949384024984e-06, + "loss": 0.5738, + "step": 60 + }, + { + "epoch": 8.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.7090225219726562, + "eval_runtime": 1.3562, + "eval_samples_per_second": 47.192, + "eval_steps_per_second": 5.899, + "step": 64 + } + ], + "logging_steps": 5, + "max_steps": 80, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 500, + "total_flos": 7525767508883424.0, + "train_batch_size": 24, + "trial_name": null, + "trial_params": { + "learning_rate": 1.1420981778248994e-05, + "per_device_train_batch_size": 24 + } +} diff --git a/run-3/checkpoint-64/training_args.bin b/run-3/checkpoint-64/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..be43f8684da98f6c918b76cd1f79a3b0a0449b11 --- /dev/null +++ b/run-3/checkpoint-64/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb7020efd76104988408508d9c7e8a6e74af8904f5fd05940d43639ce4c80cf2 +size 4920 diff --git a/run-3/checkpoint-72/config.json b/run-3/checkpoint-72/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e8a488f827577f57ac81420fa1b836ae1d5ceaeb --- /dev/null +++ b/run-3/checkpoint-72/config.json @@ -0,0 +1,80 @@ +{ + "_name_or_path": "ntu-spml/distilhubert", + "activation_dropout": 0.1, + "apply_spec_augment": false, + "architectures": [ + "HubertForSequenceClassification" + ], + "attention_dropout": 0.1, + "bos_token_id": 1, + "classifier_proj_size": 256, + "conv_bias": false, + "conv_dim": [ + 512, + 512, + 512, + 512, + 512, + 512, + 512 + ], + "conv_kernel": [ + 10, + 3, + 3, + 3, + 3, + 2, + 2 + ], + "conv_stride": [ + 5, + 2, + 2, + 2, + 2, + 2, + 2 + ], + "ctc_loss_reduction": "sum", + "ctc_zero_infinity": false, + "do_stable_layer_norm": false, + "eos_token_id": 2, + "feat_extract_activation": "gelu", + "feat_extract_norm": "group", + "feat_proj_dropout": 0.0, + "feat_proj_layer_norm": false, + "final_dropout": 0.0, + "hidden_act": "gelu", + "hidden_dropout": 0.1, + "hidden_size": 768, + "id2label": { + "0": "NOT_WORD", + "1": "WORD" + }, + "initializer_range": 0.02, + "intermediate_size": 3072, + "label2id": { + "NOT_WORD": "0", + "WORD": "1" + }, + "layer_norm_eps": 1e-05, + "layerdrop": 0.0, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "model_type": "hubert", + "num_attention_heads": 12, + "num_conv_pos_embedding_groups": 16, + "num_conv_pos_embeddings": 128, + "num_feat_extract_layers": 7, + "num_hidden_layers": 2, + "pad_token_id": 0, + "torch_dtype": "float32", + "transformers_version": "4.38.1", + "use_weighted_layer_sum": false, + "vocab_size": 32 +} diff --git a/run-3/checkpoint-72/model.safetensors b/run-3/checkpoint-72/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..32f8da55d004e8b7edfa31f7d4c78a6c3add3206 --- /dev/null +++ b/run-3/checkpoint-72/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f075052f9ebecc24ef6a4160666db3f40ef2dd01a9ba2059bc8aa3f826e174c0 +size 94763496 diff --git a/run-3/checkpoint-72/optimizer.pt b/run-3/checkpoint-72/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..837f0cf736ab38e378ac19879af1d253f0b8a414 --- /dev/null +++ b/run-3/checkpoint-72/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48ae1726e8436a5eda83874d0ba6124033863e23246459669d3c61f223a5e86b +size 189552570 diff --git a/run-3/checkpoint-72/preprocessor_config.json b/run-3/checkpoint-72/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..36ebe8b7c1cc967b3059f0494ae8a1069dd67655 --- /dev/null +++ b/run-3/checkpoint-72/preprocessor_config.json @@ -0,0 +1,9 @@ +{ + "do_normalize": true, + "feature_extractor_type": "Wav2Vec2FeatureExtractor", + "feature_size": 1, + "padding_side": "right", + "padding_value": 0, + "return_attention_mask": true, + "sampling_rate": 16000 +} diff --git a/run-3/checkpoint-72/rng_state.pth b/run-3/checkpoint-72/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..9abbaf7ba6e3b9a7bd68b6e24e4c92bbf3cea3a1 --- /dev/null +++ b/run-3/checkpoint-72/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12e22727a737d5214e5e4bb5d2c4d6b5bc51d41532847072e78e71dd1fe79e85 +size 14244 diff --git a/run-3/checkpoint-72/scheduler.pt b/run-3/checkpoint-72/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..8bd1853a4a805df92b47354c1c03bdb68d2a3dc2 --- /dev/null +++ b/run-3/checkpoint-72/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b10efb902b502dcd512fc90f83b748b9acf70d7536f5b36a17237388150994e1 +size 1064 diff --git a/run-3/checkpoint-72/trainer_state.json b/run-3/checkpoint-72/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..2c267ba724513ace6dc649d0ec7fcf64b0a1cebd --- /dev/null +++ b/run-3/checkpoint-72/trainer_state.json @@ -0,0 +1,203 @@ +{ + "best_metric": 0.7326732673267327, + "best_model_checkpoint": "distilhubert-finetuned-not-a-word2/run-3/checkpoint-8", + "epoch": 9.0, + "eval_steps": 500, + "global_step": 72, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.62, + "grad_norm": 0.94398033618927, + "learning_rate": 7.138113611405621e-06, + "loss": 0.6981, + "step": 5 + }, + { + "epoch": 1.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.6864166259765625, + "eval_runtime": 1.3249, + "eval_samples_per_second": 48.305, + "eval_steps_per_second": 6.038, + "step": 8 + }, + { + "epoch": 1.25, + "grad_norm": 1.5316386222839355, + "learning_rate": 1.1103732284408743e-05, + "loss": 0.683, + "step": 10 + }, + { + "epoch": 1.88, + "grad_norm": 0.5028849840164185, + "learning_rate": 1.031060854980812e-05, + "loss": 0.6564, + "step": 15 + }, + { + "epoch": 2.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.680694580078125, + "eval_runtime": 1.3565, + "eval_samples_per_second": 47.179, + "eval_steps_per_second": 5.897, + "step": 16 + }, + { + "epoch": 2.5, + "grad_norm": 0.5775301456451416, + "learning_rate": 9.517484815207495e-06, + "loss": 0.6436, + "step": 20 + }, + { + "epoch": 3.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.6852684020996094, + "eval_runtime": 1.3445, + "eval_samples_per_second": 47.601, + "eval_steps_per_second": 5.95, + "step": 24 + }, + { + "epoch": 3.12, + "grad_norm": 0.4549338221549988, + "learning_rate": 8.72436108060687e-06, + "loss": 0.6098, + "step": 25 + }, + { + "epoch": 3.75, + "grad_norm": 0.3741567134857178, + "learning_rate": 7.931237346006246e-06, + "loss": 0.609, + "step": 30 + }, + { + "epoch": 4.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.6952018737792969, + "eval_runtime": 1.3401, + "eval_samples_per_second": 47.757, + "eval_steps_per_second": 5.97, + "step": 32 + }, + { + "epoch": 4.38, + "grad_norm": 0.38969138264656067, + "learning_rate": 7.138113611405621e-06, + "loss": 0.6091, + "step": 35 + }, + { + "epoch": 5.0, + "grad_norm": 0.2588692009449005, + "learning_rate": 6.344989876804997e-06, + "loss": 0.6043, + "step": 40 + }, + { + "epoch": 5.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.7042198181152344, + "eval_runtime": 1.3815, + "eval_samples_per_second": 46.326, + "eval_steps_per_second": 5.791, + "step": 40 + }, + { + "epoch": 5.62, + "grad_norm": 0.6705239415168762, + "learning_rate": 5.551866142204372e-06, + "loss": 0.6093, + "step": 45 + }, + { + "epoch": 6.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.7079658508300781, + "eval_runtime": 1.3745, + "eval_samples_per_second": 46.561, + "eval_steps_per_second": 5.82, + "step": 48 + }, + { + "epoch": 6.25, + "grad_norm": 0.4161934554576874, + "learning_rate": 4.758742407603747e-06, + "loss": 0.5737, + "step": 50 + }, + { + "epoch": 6.88, + "grad_norm": 0.3530200719833374, + "learning_rate": 3.965618673003123e-06, + "loss": 0.6355, + "step": 55 + }, + { + "epoch": 7.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.7088470458984375, + "eval_runtime": 1.3736, + "eval_samples_per_second": 46.592, + "eval_steps_per_second": 5.824, + "step": 56 + }, + { + "epoch": 7.5, + "grad_norm": 0.7337541580200195, + "learning_rate": 3.1724949384024984e-06, + "loss": 0.5738, + "step": 60 + }, + { + "epoch": 8.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.7090225219726562, + "eval_runtime": 1.3562, + "eval_samples_per_second": 47.192, + "eval_steps_per_second": 5.899, + "step": 64 + }, + { + "epoch": 8.12, + "grad_norm": 0.3913467526435852, + "learning_rate": 2.3793712038018737e-06, + "loss": 0.5872, + "step": 65 + }, + { + "epoch": 8.75, + "grad_norm": 0.6211589574813843, + "learning_rate": 1.5862474692012492e-06, + "loss": 0.5549, + "step": 70 + }, + { + "epoch": 9.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.7093772888183594, + "eval_runtime": 1.3631, + "eval_samples_per_second": 46.952, + "eval_steps_per_second": 5.869, + "step": 72 + } + ], + "logging_steps": 5, + "max_steps": 80, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 500, + "total_flos": 8742484157924448.0, + "train_batch_size": 24, + "trial_name": null, + "trial_params": { + "learning_rate": 1.1420981778248994e-05, + "per_device_train_batch_size": 24 + } +} diff --git a/run-3/checkpoint-72/training_args.bin b/run-3/checkpoint-72/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..be43f8684da98f6c918b76cd1f79a3b0a0449b11 --- /dev/null +++ b/run-3/checkpoint-72/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb7020efd76104988408508d9c7e8a6e74af8904f5fd05940d43639ce4c80cf2 +size 4920 diff --git a/run-3/checkpoint-80/config.json b/run-3/checkpoint-80/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e8a488f827577f57ac81420fa1b836ae1d5ceaeb --- /dev/null +++ b/run-3/checkpoint-80/config.json @@ -0,0 +1,80 @@ +{ + "_name_or_path": "ntu-spml/distilhubert", + "activation_dropout": 0.1, + "apply_spec_augment": false, + "architectures": [ + "HubertForSequenceClassification" + ], + "attention_dropout": 0.1, + "bos_token_id": 1, + "classifier_proj_size": 256, + "conv_bias": false, + "conv_dim": [ + 512, + 512, + 512, + 512, + 512, + 512, + 512 + ], + "conv_kernel": [ + 10, + 3, + 3, + 3, + 3, + 2, + 2 + ], + "conv_stride": [ + 5, + 2, + 2, + 2, + 2, + 2, + 2 + ], + "ctc_loss_reduction": "sum", + "ctc_zero_infinity": false, + "do_stable_layer_norm": false, + "eos_token_id": 2, + "feat_extract_activation": "gelu", + "feat_extract_norm": "group", + "feat_proj_dropout": 0.0, + "feat_proj_layer_norm": false, + "final_dropout": 0.0, + "hidden_act": "gelu", + "hidden_dropout": 0.1, + "hidden_size": 768, + "id2label": { + "0": "NOT_WORD", + "1": "WORD" + }, + "initializer_range": 0.02, + "intermediate_size": 3072, + "label2id": { + "NOT_WORD": "0", + "WORD": "1" + }, + "layer_norm_eps": 1e-05, + "layerdrop": 0.0, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "model_type": "hubert", + "num_attention_heads": 12, + "num_conv_pos_embedding_groups": 16, + "num_conv_pos_embeddings": 128, + "num_feat_extract_layers": 7, + "num_hidden_layers": 2, + "pad_token_id": 0, + "torch_dtype": "float32", + "transformers_version": "4.38.1", + "use_weighted_layer_sum": false, + "vocab_size": 32 +} diff --git a/run-3/checkpoint-80/model.safetensors b/run-3/checkpoint-80/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..04f8de8f44d25982f8cefd47b1c8c50b29829374 --- /dev/null +++ b/run-3/checkpoint-80/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a10908ac09ba457410d5d25fef9cb689548c8edef2fae1cd6bdf45fd1034cd1c +size 94763496 diff --git a/run-3/checkpoint-80/optimizer.pt b/run-3/checkpoint-80/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..c0fc7aa659a0b8ca8e31565a29fef90fb2a08353 --- /dev/null +++ b/run-3/checkpoint-80/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7007069eff770880fcf8712b006eef66e2f5d037ca04de796c1477570332bac8 +size 189552570 diff --git a/run-3/checkpoint-80/preprocessor_config.json b/run-3/checkpoint-80/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..36ebe8b7c1cc967b3059f0494ae8a1069dd67655 --- /dev/null +++ b/run-3/checkpoint-80/preprocessor_config.json @@ -0,0 +1,9 @@ +{ + "do_normalize": true, + "feature_extractor_type": "Wav2Vec2FeatureExtractor", + "feature_size": 1, + "padding_side": "right", + "padding_value": 0, + "return_attention_mask": true, + "sampling_rate": 16000 +} diff --git a/run-3/checkpoint-80/rng_state.pth b/run-3/checkpoint-80/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..b35d63aebf845916c48bc0ed05c7024ddefd8bbb --- /dev/null +++ b/run-3/checkpoint-80/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:06a1cfa28050193686009b273b6588b7a4ba7e5d273238c9a261a36db87a1f14 +size 14244 diff --git a/run-3/checkpoint-80/scheduler.pt b/run-3/checkpoint-80/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..06bfd43228570bff76a784159dae22118420d3eb --- /dev/null +++ b/run-3/checkpoint-80/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09f8c268e5a132a2899822a8ed0d1c3fb552c4632f1ea462f5e38daf189c8575 +size 1064 diff --git a/run-3/checkpoint-80/trainer_state.json b/run-3/checkpoint-80/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..6797e7aada46a09728b33212fdbeaef05ff68d2c --- /dev/null +++ b/run-3/checkpoint-80/trainer_state.json @@ -0,0 +1,226 @@ +{ + "best_metric": 0.7326732673267327, + "best_model_checkpoint": "distilhubert-finetuned-not-a-word2/run-3/checkpoint-8", + "epoch": 10.0, + "eval_steps": 500, + "global_step": 80, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.62, + "grad_norm": 0.94398033618927, + "learning_rate": 7.138113611405621e-06, + "loss": 0.6981, + "step": 5 + }, + { + "epoch": 1.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.6864166259765625, + "eval_runtime": 1.3249, + "eval_samples_per_second": 48.305, + "eval_steps_per_second": 6.038, + "step": 8 + }, + { + "epoch": 1.25, + "grad_norm": 1.5316386222839355, + "learning_rate": 1.1103732284408743e-05, + "loss": 0.683, + "step": 10 + }, + { + "epoch": 1.88, + "grad_norm": 0.5028849840164185, + "learning_rate": 1.031060854980812e-05, + "loss": 0.6564, + "step": 15 + }, + { + "epoch": 2.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.680694580078125, + "eval_runtime": 1.3565, + "eval_samples_per_second": 47.179, + "eval_steps_per_second": 5.897, + "step": 16 + }, + { + "epoch": 2.5, + "grad_norm": 0.5775301456451416, + "learning_rate": 9.517484815207495e-06, + "loss": 0.6436, + "step": 20 + }, + { + "epoch": 3.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.6852684020996094, + "eval_runtime": 1.3445, + "eval_samples_per_second": 47.601, + "eval_steps_per_second": 5.95, + "step": 24 + }, + { + "epoch": 3.12, + "grad_norm": 0.4549338221549988, + "learning_rate": 8.72436108060687e-06, + "loss": 0.6098, + "step": 25 + }, + { + "epoch": 3.75, + "grad_norm": 0.3741567134857178, + "learning_rate": 7.931237346006246e-06, + "loss": 0.609, + "step": 30 + }, + { + "epoch": 4.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.6952018737792969, + "eval_runtime": 1.3401, + "eval_samples_per_second": 47.757, + "eval_steps_per_second": 5.97, + "step": 32 + }, + { + "epoch": 4.38, + "grad_norm": 0.38969138264656067, + "learning_rate": 7.138113611405621e-06, + "loss": 0.6091, + "step": 35 + }, + { + "epoch": 5.0, + "grad_norm": 0.2588692009449005, + "learning_rate": 6.344989876804997e-06, + "loss": 0.6043, + "step": 40 + }, + { + "epoch": 5.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.7042198181152344, + "eval_runtime": 1.3815, + "eval_samples_per_second": 46.326, + "eval_steps_per_second": 5.791, + "step": 40 + }, + { + "epoch": 5.62, + "grad_norm": 0.6705239415168762, + "learning_rate": 5.551866142204372e-06, + "loss": 0.6093, + "step": 45 + }, + { + "epoch": 6.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.7079658508300781, + "eval_runtime": 1.3745, + "eval_samples_per_second": 46.561, + "eval_steps_per_second": 5.82, + "step": 48 + }, + { + "epoch": 6.25, + "grad_norm": 0.4161934554576874, + "learning_rate": 4.758742407603747e-06, + "loss": 0.5737, + "step": 50 + }, + { + "epoch": 6.88, + "grad_norm": 0.3530200719833374, + "learning_rate": 3.965618673003123e-06, + "loss": 0.6355, + "step": 55 + }, + { + "epoch": 7.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.7088470458984375, + "eval_runtime": 1.3736, + "eval_samples_per_second": 46.592, + "eval_steps_per_second": 5.824, + "step": 56 + }, + { + "epoch": 7.5, + "grad_norm": 0.7337541580200195, + "learning_rate": 3.1724949384024984e-06, + "loss": 0.5738, + "step": 60 + }, + { + "epoch": 8.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.7090225219726562, + "eval_runtime": 1.3562, + "eval_samples_per_second": 47.192, + "eval_steps_per_second": 5.899, + "step": 64 + }, + { + "epoch": 8.12, + "grad_norm": 0.3913467526435852, + "learning_rate": 2.3793712038018737e-06, + "loss": 0.5872, + "step": 65 + }, + { + "epoch": 8.75, + "grad_norm": 0.6211589574813843, + "learning_rate": 1.5862474692012492e-06, + "loss": 0.5549, + "step": 70 + }, + { + "epoch": 9.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.7093772888183594, + "eval_runtime": 1.3631, + "eval_samples_per_second": 46.952, + "eval_steps_per_second": 5.869, + "step": 72 + }, + { + "epoch": 9.38, + "grad_norm": 0.30739858746528625, + "learning_rate": 7.931237346006246e-07, + "loss": 0.6143, + "step": 75 + }, + { + "epoch": 10.0, + "grad_norm": 0.41105917096138, + "learning_rate": 0.0, + "loss": 0.6183, + "step": 80 + }, + { + "epoch": 10.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.7090110778808594, + "eval_runtime": 1.366, + "eval_samples_per_second": 46.852, + "eval_steps_per_second": 5.857, + "step": 80 + } + ], + "logging_steps": 5, + "max_steps": 80, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 500, + "total_flos": 1.0052017938770688e+16, + "train_batch_size": 24, + "trial_name": null, + "trial_params": { + "learning_rate": 1.1420981778248994e-05, + "per_device_train_batch_size": 24 + } +} diff --git a/run-3/checkpoint-80/training_args.bin b/run-3/checkpoint-80/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..be43f8684da98f6c918b76cd1f79a3b0a0449b11 --- /dev/null +++ b/run-3/checkpoint-80/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb7020efd76104988408508d9c7e8a6e74af8904f5fd05940d43639ce4c80cf2 +size 4920 diff --git a/run-4/checkpoint-48/config.json b/run-4/checkpoint-48/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e8a488f827577f57ac81420fa1b836ae1d5ceaeb --- /dev/null +++ b/run-4/checkpoint-48/config.json @@ -0,0 +1,80 @@ +{ + "_name_or_path": "ntu-spml/distilhubert", + "activation_dropout": 0.1, + "apply_spec_augment": false, + "architectures": [ + "HubertForSequenceClassification" + ], + "attention_dropout": 0.1, + "bos_token_id": 1, + "classifier_proj_size": 256, + "conv_bias": false, + "conv_dim": [ + 512, + 512, + 512, + 512, + 512, + 512, + 512 + ], + "conv_kernel": [ + 10, + 3, + 3, + 3, + 3, + 2, + 2 + ], + "conv_stride": [ + 5, + 2, + 2, + 2, + 2, + 2, + 2 + ], + "ctc_loss_reduction": "sum", + "ctc_zero_infinity": false, + "do_stable_layer_norm": false, + "eos_token_id": 2, + "feat_extract_activation": "gelu", + "feat_extract_norm": "group", + "feat_proj_dropout": 0.0, + "feat_proj_layer_norm": false, + "final_dropout": 0.0, + "hidden_act": "gelu", + "hidden_dropout": 0.1, + "hidden_size": 768, + "id2label": { + "0": "NOT_WORD", + "1": "WORD" + }, + "initializer_range": 0.02, + "intermediate_size": 3072, + "label2id": { + "NOT_WORD": "0", + "WORD": "1" + }, + "layer_norm_eps": 1e-05, + "layerdrop": 0.0, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "model_type": "hubert", + "num_attention_heads": 12, + "num_conv_pos_embedding_groups": 16, + "num_conv_pos_embeddings": 128, + "num_feat_extract_layers": 7, + "num_hidden_layers": 2, + "pad_token_id": 0, + "torch_dtype": "float32", + "transformers_version": "4.38.1", + "use_weighted_layer_sum": false, + "vocab_size": 32 +} diff --git a/run-4/checkpoint-48/model.safetensors b/run-4/checkpoint-48/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8b19dae92ab08f0f14f4192717a30fa580c4704c --- /dev/null +++ b/run-4/checkpoint-48/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:018f138040ad7aa88bf94f4b8deb9d4f75616181b7d07008cdde50dd03503a11 +size 94763496 diff --git a/run-4/checkpoint-48/optimizer.pt b/run-4/checkpoint-48/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..8487d19b6d27fc87f2a3e98b1fd927477709a229 --- /dev/null +++ b/run-4/checkpoint-48/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b05a5c9e1f6f57f6e3b785c7061c895830d638ece011c9e0f083bff980d017e +size 189552570 diff --git a/run-4/checkpoint-48/preprocessor_config.json b/run-4/checkpoint-48/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..36ebe8b7c1cc967b3059f0494ae8a1069dd67655 --- /dev/null +++ b/run-4/checkpoint-48/preprocessor_config.json @@ -0,0 +1,9 @@ +{ + "do_normalize": true, + "feature_extractor_type": "Wav2Vec2FeatureExtractor", + "feature_size": 1, + "padding_side": "right", + "padding_value": 0, + "return_attention_mask": true, + "sampling_rate": 16000 +} diff --git a/run-4/checkpoint-48/rng_state.pth b/run-4/checkpoint-48/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..2f69ac2b3cc24a2d23f1e99dfab26d0a1d84a680 --- /dev/null +++ b/run-4/checkpoint-48/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7251f0e64bf9e5675ed89b468a7ff74c1c3fd6457742f84db0e5e361db11f13 +size 14244 diff --git a/run-4/checkpoint-48/scheduler.pt b/run-4/checkpoint-48/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..e81ea0accec64eba012ffdc049de9e18629cfdcc --- /dev/null +++ b/run-4/checkpoint-48/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:041dd94260eed70f53e2ce426d63c9f16a4d14fdfd3d9fdad3d2c80f43504e5a +size 1064 diff --git a/run-4/checkpoint-48/trainer_state.json b/run-4/checkpoint-48/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..59f28486a18402773f611d2023f83c332d0c5cb7 --- /dev/null +++ b/run-4/checkpoint-48/trainer_state.json @@ -0,0 +1,96 @@ +{ + "best_metric": 0.72, + "best_model_checkpoint": "distilhubert-finetuned-not-a-word2/run-4/checkpoint-48", + "epoch": 1.0, + "eval_steps": 500, + "global_step": 48, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.1, + "grad_norm": 1.2733114957809448, + "learning_rate": 2.2702186710865246e-07, + "loss": 0.7025, + "step": 5 + }, + { + "epoch": 0.21, + "grad_norm": 1.243804931640625, + "learning_rate": 4.5404373421730493e-07, + "loss": 0.6974, + "step": 10 + }, + { + "epoch": 0.31, + "grad_norm": 1.7711552381515503, + "learning_rate": 6.810656013259573e-07, + "loss": 0.696, + "step": 15 + }, + { + "epoch": 0.42, + "grad_norm": 1.1453403234481812, + "learning_rate": 9.080874684346099e-07, + "loss": 0.6989, + "step": 20 + }, + { + "epoch": 0.52, + "grad_norm": 1.2729355096817017, + "learning_rate": 1.1351093355432624e-06, + "loss": 0.6968, + "step": 25 + }, + { + "epoch": 0.62, + "grad_norm": 1.1592165231704712, + "learning_rate": 1.3621312026519146e-06, + "loss": 0.6959, + "step": 30 + }, + { + "epoch": 0.73, + "grad_norm": 1.1798148155212402, + "learning_rate": 1.589153069760567e-06, + "loss": 0.6952, + "step": 35 + }, + { + "epoch": 0.83, + "grad_norm": 2.1216671466827393, + "learning_rate": 1.8161749368692197e-06, + "loss": 0.6886, + "step": 40 + }, + { + "epoch": 0.94, + "grad_norm": 1.3416370153427124, + "learning_rate": 2.043196803977872e-06, + "loss": 0.6864, + "step": 45 + }, + { + "epoch": 1.0, + "eval_f1": 0.72, + "eval_loss": 0.688262939453125, + "eval_runtime": 1.3468, + "eval_samples_per_second": 47.521, + "eval_steps_per_second": 5.94, + "step": 48 + } + ], + "logging_steps": 5, + "max_steps": 480, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 500, + "total_flos": 670686130935120.0, + "train_batch_size": 4, + "trial_name": null, + "trial_params": { + "learning_rate": 2.1794099242430636e-06, + "per_device_train_batch_size": 4 + } +} diff --git a/run-4/checkpoint-48/training_args.bin b/run-4/checkpoint-48/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..2bb1c33e4434a3bb752764ab093170996801fe72 --- /dev/null +++ b/run-4/checkpoint-48/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6eadd8415a707c8e4de7440233358b89448cac114a2ada36aef1d9186553c0c3 +size 4920 diff --git a/runs/Mar03_18-24-09_ca56ea9bc35e/events.out.tfevents.1709492047.ca56ea9bc35e.3883.5 b/runs/Mar03_18-24-09_ca56ea9bc35e/events.out.tfevents.1709492047.ca56ea9bc35e.3883.5 new file mode 100644 index 0000000000000000000000000000000000000000..56d442120373da03f76049c61cc82762dda0c879 --- /dev/null +++ b/runs/Mar03_18-24-09_ca56ea9bc35e/events.out.tfevents.1709492047.ca56ea9bc35e.3883.5 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c2be1b3b7bf34efb75c70e70c8ce87d69a0ad73aba1ff4a98c2443bf200779d +size 29188 diff --git a/training_args.bin b/training_args.bin index be43f8684da98f6c918b76cd1f79a3b0a0449b11..2bb1c33e4434a3bb752764ab093170996801fe72 100644 --- a/training_args.bin +++ b/training_args.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:bb7020efd76104988408508d9c7e8a6e74af8904f5fd05940d43639ce4c80cf2 +oid sha256:6eadd8415a707c8e4de7440233358b89448cac114a2ada36aef1d9186553c0c3 size 4920