diff --git a/model.safetensors b/model.safetensors index 9877a03ed7ffd01a15db65fc288d2a8b522a91f9..4bab8f4b54c1f98c14358a5ceeca50e590739e9e 100644 --- a/model.safetensors +++ b/model.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e1ed6ffdb684cc537f47f54766448db627495bfcb076b58f5bd92f9f75833010 +oid sha256:bcd38943271d452fb41e0cc1c9ac715cd036eaf1fd6859055f6d2b6c6d5d0ccb size 94763496 diff --git a/run-5/checkpoint-120/config.json b/run-5/checkpoint-120/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e8a488f827577f57ac81420fa1b836ae1d5ceaeb --- /dev/null +++ b/run-5/checkpoint-120/config.json @@ -0,0 +1,80 @@ +{ + "_name_or_path": "ntu-spml/distilhubert", + "activation_dropout": 0.1, + "apply_spec_augment": false, + "architectures": [ + "HubertForSequenceClassification" + ], + "attention_dropout": 0.1, + "bos_token_id": 1, + "classifier_proj_size": 256, + "conv_bias": false, + "conv_dim": [ + 512, + 512, + 512, + 512, + 512, + 512, + 512 + ], + "conv_kernel": [ + 10, + 3, + 3, + 3, + 3, + 2, + 2 + ], + "conv_stride": [ + 5, + 2, + 2, + 2, + 2, + 2, + 2 + ], + "ctc_loss_reduction": "sum", + "ctc_zero_infinity": false, + "do_stable_layer_norm": false, + "eos_token_id": 2, + "feat_extract_activation": "gelu", + "feat_extract_norm": "group", + "feat_proj_dropout": 0.0, + "feat_proj_layer_norm": false, + "final_dropout": 0.0, + "hidden_act": "gelu", + "hidden_dropout": 0.1, + "hidden_size": 768, + "id2label": { + "0": "NOT_WORD", + "1": "WORD" + }, + "initializer_range": 0.02, + "intermediate_size": 3072, + "label2id": { + "NOT_WORD": "0", + "WORD": "1" + }, + "layer_norm_eps": 1e-05, + "layerdrop": 0.0, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "model_type": "hubert", + "num_attention_heads": 12, + "num_conv_pos_embedding_groups": 16, + "num_conv_pos_embeddings": 128, + "num_feat_extract_layers": 7, + "num_hidden_layers": 2, + "pad_token_id": 0, + "torch_dtype": "float32", + "transformers_version": "4.38.1", + "use_weighted_layer_sum": false, + "vocab_size": 32 +} diff --git a/run-5/checkpoint-120/model.safetensors b/run-5/checkpoint-120/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..310794f2157d053d75b635eed1664f6ef4703d27 --- /dev/null +++ b/run-5/checkpoint-120/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:454d504da2d40a1ac5002465cf473307c1be7b971d301e4f92ffa50b202f16fc +size 94763496 diff --git a/run-5/checkpoint-120/optimizer.pt b/run-5/checkpoint-120/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..b2472443ce8b4ece6fad4db9ce9a7703f3d4f303 --- /dev/null +++ b/run-5/checkpoint-120/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f06f2ae8c05c541ef9c93df0a93bccced30f1b4dc10eadbccef60a373d68af39 +size 189552570 diff --git a/run-5/checkpoint-120/preprocessor_config.json b/run-5/checkpoint-120/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..36ebe8b7c1cc967b3059f0494ae8a1069dd67655 --- /dev/null +++ b/run-5/checkpoint-120/preprocessor_config.json @@ -0,0 +1,9 @@ +{ + "do_normalize": true, + "feature_extractor_type": "Wav2Vec2FeatureExtractor", + "feature_size": 1, + "padding_side": "right", + "padding_value": 0, + "return_attention_mask": true, + "sampling_rate": 16000 +} diff --git a/run-5/checkpoint-120/rng_state.pth b/run-5/checkpoint-120/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..e7f7f63718a6f43937c9752e330bbe6b66b5ed44 --- /dev/null +++ b/run-5/checkpoint-120/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea9b93979db9d187112d0877ca456edd569c23f080722f2fbbe337c28c1a6935 +size 14244 diff --git a/run-5/checkpoint-120/scheduler.pt b/run-5/checkpoint-120/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..5761a684cc0a2f430b4e3e8fd7c1e2bc206d3f58 --- /dev/null +++ b/run-5/checkpoint-120/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:54d977ec7e793480ff2dd7eef923a400fad294e8389923d1392ea0eb694ad4d2 +size 1064 diff --git a/run-5/checkpoint-120/trainer_state.json b/run-5/checkpoint-120/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..ccfb9dfc84004c02ab986923aba10fc7481d2e4a --- /dev/null +++ b/run-5/checkpoint-120/trainer_state.json @@ -0,0 +1,237 @@ +{ + "best_metric": 0.7326732673267327, + "best_model_checkpoint": "distilhubert-finetuned-not-a-word2/run-5/checkpoint-24", + "epoch": 5.0, + "eval_steps": 500, + "global_step": 120, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.21, + "grad_norm": 1.6560618877410889, + "learning_rate": 9.928194518007172e-07, + "loss": 0.6997, + "step": 5 + }, + { + "epoch": 0.42, + "grad_norm": 0.6409028768539429, + "learning_rate": 1.9856389036014343e-06, + "loss": 0.6971, + "step": 10 + }, + { + "epoch": 0.62, + "grad_norm": 0.6359347105026245, + "learning_rate": 2.978458355402151e-06, + "loss": 0.6953, + "step": 15 + }, + { + "epoch": 0.83, + "grad_norm": 1.56245756149292, + "learning_rate": 3.971277807202869e-06, + "loss": 0.6889, + "step": 20 + }, + { + "epoch": 1.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.6865768432617188, + "eval_runtime": 1.3652, + "eval_samples_per_second": 46.879, + "eval_steps_per_second": 5.86, + "step": 24 + }, + { + "epoch": 1.04, + "grad_norm": 0.6976048946380615, + "learning_rate": 4.743470714158982e-06, + "loss": 0.6865, + "step": 25 + }, + { + "epoch": 1.25, + "grad_norm": 2.008615732192993, + "learning_rate": 4.63315744173668e-06, + "loss": 0.6681, + "step": 30 + }, + { + "epoch": 1.46, + "grad_norm": 1.9885936975479126, + "learning_rate": 4.522844169314378e-06, + "loss": 0.6531, + "step": 35 + }, + { + "epoch": 1.67, + "grad_norm": 0.5047381520271301, + "learning_rate": 4.412530896892076e-06, + "loss": 0.6664, + "step": 40 + }, + { + "epoch": 1.88, + "grad_norm": 0.9776627421379089, + "learning_rate": 4.302217624469774e-06, + "loss": 0.6545, + "step": 45 + }, + { + "epoch": 2.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.680206298828125, + "eval_runtime": 1.3708, + "eval_samples_per_second": 46.689, + "eval_steps_per_second": 5.836, + "step": 48 + }, + { + "epoch": 2.08, + "grad_norm": 0.46269264817237854, + "learning_rate": 4.191904352047472e-06, + "loss": 0.6518, + "step": 50 + }, + { + "epoch": 2.29, + "grad_norm": 0.8560582995414734, + "learning_rate": 4.081591079625171e-06, + "loss": 0.6333, + "step": 55 + }, + { + "epoch": 2.5, + "grad_norm": 0.9135796427726746, + "learning_rate": 3.971277807202869e-06, + "loss": 0.6494, + "step": 60 + }, + { + "epoch": 2.71, + "grad_norm": 0.71152263879776, + "learning_rate": 3.8609645347805665e-06, + "loss": 0.6444, + "step": 65 + }, + { + "epoch": 2.92, + "grad_norm": 1.3949508666992188, + "learning_rate": 3.750651262358265e-06, + "loss": 0.5726, + "step": 70 + }, + { + "epoch": 3.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.6824684143066406, + "eval_runtime": 1.373, + "eval_samples_per_second": 46.614, + "eval_steps_per_second": 5.827, + "step": 72 + }, + { + "epoch": 3.12, + "grad_norm": 0.7415557503700256, + "learning_rate": 3.6403379899359627e-06, + "loss": 0.6271, + "step": 75 + }, + { + "epoch": 3.33, + "grad_norm": 0.6959993243217468, + "learning_rate": 3.5300247175136605e-06, + "loss": 0.6316, + "step": 80 + }, + { + "epoch": 3.54, + "grad_norm": 1.2839587926864624, + "learning_rate": 3.419711445091359e-06, + "loss": 0.5607, + "step": 85 + }, + { + "epoch": 3.75, + "grad_norm": 1.2485283613204956, + "learning_rate": 3.309398172669057e-06, + "loss": 0.6403, + "step": 90 + }, + { + "epoch": 3.96, + "grad_norm": 1.1759371757507324, + "learning_rate": 3.199084900246755e-06, + "loss": 0.6242, + "step": 95 + }, + { + "epoch": 4.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.6892623901367188, + "eval_runtime": 1.3822, + "eval_samples_per_second": 46.302, + "eval_steps_per_second": 5.788, + "step": 96 + }, + { + "epoch": 4.17, + "grad_norm": 0.6085842251777649, + "learning_rate": 3.0887716278244532e-06, + "loss": 0.564, + "step": 100 + }, + { + "epoch": 4.38, + "grad_norm": 0.9830076098442078, + "learning_rate": 2.978458355402151e-06, + "loss": 0.641, + "step": 105 + }, + { + "epoch": 4.58, + "grad_norm": 0.6557525992393494, + "learning_rate": 2.8681450829798494e-06, + "loss": 0.5943, + "step": 110 + }, + { + "epoch": 4.79, + "grad_norm": 0.7436282634735107, + "learning_rate": 2.7578318105575477e-06, + "loss": 0.614, + "step": 115 + }, + { + "epoch": 5.0, + "grad_norm": 0.7114256620407104, + "learning_rate": 2.6475185381352455e-06, + "loss": 0.6081, + "step": 120 + }, + { + "epoch": 5.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.6939811706542969, + "eval_runtime": 1.3699, + "eval_samples_per_second": 46.719, + "eval_steps_per_second": 5.84, + "step": 120 + } + ], + "logging_steps": 5, + "max_steps": 240, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 500, + "total_flos": 4193401989215328.0, + "train_batch_size": 8, + "trial_name": null, + "trial_params": { + "learning_rate": 4.765533368643442e-06, + "per_device_train_batch_size": 8 + } +} diff --git a/run-5/checkpoint-120/training_args.bin b/run-5/checkpoint-120/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..35fa0af60e4dca57930333ada64bcb1b3d5e32fe --- /dev/null +++ b/run-5/checkpoint-120/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:064d1923d7ff14a10cb0625f0bc3dd479a67056d28f933cdfbfdecd320e675db +size 4920 diff --git a/run-5/checkpoint-144/config.json b/run-5/checkpoint-144/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e8a488f827577f57ac81420fa1b836ae1d5ceaeb --- /dev/null +++ b/run-5/checkpoint-144/config.json @@ -0,0 +1,80 @@ +{ + "_name_or_path": "ntu-spml/distilhubert", + "activation_dropout": 0.1, + "apply_spec_augment": false, + "architectures": [ + "HubertForSequenceClassification" + ], + "attention_dropout": 0.1, + "bos_token_id": 1, + "classifier_proj_size": 256, + "conv_bias": false, + "conv_dim": [ + 512, + 512, + 512, + 512, + 512, + 512, + 512 + ], + "conv_kernel": [ + 10, + 3, + 3, + 3, + 3, + 2, + 2 + ], + "conv_stride": [ + 5, + 2, + 2, + 2, + 2, + 2, + 2 + ], + "ctc_loss_reduction": "sum", + "ctc_zero_infinity": false, + "do_stable_layer_norm": false, + "eos_token_id": 2, + "feat_extract_activation": "gelu", + "feat_extract_norm": "group", + "feat_proj_dropout": 0.0, + "feat_proj_layer_norm": false, + "final_dropout": 0.0, + "hidden_act": "gelu", + "hidden_dropout": 0.1, + "hidden_size": 768, + "id2label": { + "0": "NOT_WORD", + "1": "WORD" + }, + "initializer_range": 0.02, + "intermediate_size": 3072, + "label2id": { + "NOT_WORD": "0", + "WORD": "1" + }, + "layer_norm_eps": 1e-05, + "layerdrop": 0.0, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "model_type": "hubert", + "num_attention_heads": 12, + "num_conv_pos_embedding_groups": 16, + "num_conv_pos_embeddings": 128, + "num_feat_extract_layers": 7, + "num_hidden_layers": 2, + "pad_token_id": 0, + "torch_dtype": "float32", + "transformers_version": "4.38.1", + "use_weighted_layer_sum": false, + "vocab_size": 32 +} diff --git a/run-5/checkpoint-144/model.safetensors b/run-5/checkpoint-144/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..aa68d981cf6bf8e8a8b6d9ca902c49e59297485e --- /dev/null +++ b/run-5/checkpoint-144/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:401a1802884ad357b5301a2252ce0f1ce6c69288db1ddd155757b374073dc7aa +size 94763496 diff --git a/run-5/checkpoint-144/optimizer.pt b/run-5/checkpoint-144/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..8577f84ef372ce422a7b26757128e8eaf13ca568 --- /dev/null +++ b/run-5/checkpoint-144/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a13a9395bc8e69da9e62af79e7b6270dc16576123bae7e84c80f3643f4cefe5c +size 189552570 diff --git a/run-5/checkpoint-144/preprocessor_config.json b/run-5/checkpoint-144/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..36ebe8b7c1cc967b3059f0494ae8a1069dd67655 --- /dev/null +++ b/run-5/checkpoint-144/preprocessor_config.json @@ -0,0 +1,9 @@ +{ + "do_normalize": true, + "feature_extractor_type": "Wav2Vec2FeatureExtractor", + "feature_size": 1, + "padding_side": "right", + "padding_value": 0, + "return_attention_mask": true, + "sampling_rate": 16000 +} diff --git a/run-5/checkpoint-144/rng_state.pth b/run-5/checkpoint-144/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..0dcad447546559109c3c49122d38655653738eb1 --- /dev/null +++ b/run-5/checkpoint-144/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eddddd854f935bdc6bf7e33f19c011df64d161505c6be9aa447288219fa3010a +size 14244 diff --git a/run-5/checkpoint-144/scheduler.pt b/run-5/checkpoint-144/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..173f730f61316417eb5e0e94dc63473602a252aa --- /dev/null +++ b/run-5/checkpoint-144/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f7632d13f0f8122a5f2aa2d52d94d9212cd5ebcf002004e470d98c2451054c23 +size 1064 diff --git a/run-5/checkpoint-144/trainer_state.json b/run-5/checkpoint-144/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..61bae52de4a38a70ef9e5bf11babf3d2952210d1 --- /dev/null +++ b/run-5/checkpoint-144/trainer_state.json @@ -0,0 +1,274 @@ +{ + "best_metric": 0.7326732673267327, + "best_model_checkpoint": "distilhubert-finetuned-not-a-word2/run-5/checkpoint-24", + "epoch": 6.0, + "eval_steps": 500, + "global_step": 144, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.21, + "grad_norm": 1.6560618877410889, + "learning_rate": 9.928194518007172e-07, + "loss": 0.6997, + "step": 5 + }, + { + "epoch": 0.42, + "grad_norm": 0.6409028768539429, + "learning_rate": 1.9856389036014343e-06, + "loss": 0.6971, + "step": 10 + }, + { + "epoch": 0.62, + "grad_norm": 0.6359347105026245, + "learning_rate": 2.978458355402151e-06, + "loss": 0.6953, + "step": 15 + }, + { + "epoch": 0.83, + "grad_norm": 1.56245756149292, + "learning_rate": 3.971277807202869e-06, + "loss": 0.6889, + "step": 20 + }, + { + "epoch": 1.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.6865768432617188, + "eval_runtime": 1.3652, + "eval_samples_per_second": 46.879, + "eval_steps_per_second": 5.86, + "step": 24 + }, + { + "epoch": 1.04, + "grad_norm": 0.6976048946380615, + "learning_rate": 4.743470714158982e-06, + "loss": 0.6865, + "step": 25 + }, + { + "epoch": 1.25, + "grad_norm": 2.008615732192993, + "learning_rate": 4.63315744173668e-06, + "loss": 0.6681, + "step": 30 + }, + { + "epoch": 1.46, + "grad_norm": 1.9885936975479126, + "learning_rate": 4.522844169314378e-06, + "loss": 0.6531, + "step": 35 + }, + { + "epoch": 1.67, + "grad_norm": 0.5047381520271301, + "learning_rate": 4.412530896892076e-06, + "loss": 0.6664, + "step": 40 + }, + { + "epoch": 1.88, + "grad_norm": 0.9776627421379089, + "learning_rate": 4.302217624469774e-06, + "loss": 0.6545, + "step": 45 + }, + { + "epoch": 2.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.680206298828125, + "eval_runtime": 1.3708, + "eval_samples_per_second": 46.689, + "eval_steps_per_second": 5.836, + "step": 48 + }, + { + "epoch": 2.08, + "grad_norm": 0.46269264817237854, + "learning_rate": 4.191904352047472e-06, + "loss": 0.6518, + "step": 50 + }, + { + "epoch": 2.29, + "grad_norm": 0.8560582995414734, + "learning_rate": 4.081591079625171e-06, + "loss": 0.6333, + "step": 55 + }, + { + "epoch": 2.5, + "grad_norm": 0.9135796427726746, + "learning_rate": 3.971277807202869e-06, + "loss": 0.6494, + "step": 60 + }, + { + "epoch": 2.71, + "grad_norm": 0.71152263879776, + "learning_rate": 3.8609645347805665e-06, + "loss": 0.6444, + "step": 65 + }, + { + "epoch": 2.92, + "grad_norm": 1.3949508666992188, + "learning_rate": 3.750651262358265e-06, + "loss": 0.5726, + "step": 70 + }, + { + "epoch": 3.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.6824684143066406, + "eval_runtime": 1.373, + "eval_samples_per_second": 46.614, + "eval_steps_per_second": 5.827, + "step": 72 + }, + { + "epoch": 3.12, + "grad_norm": 0.7415557503700256, + "learning_rate": 3.6403379899359627e-06, + "loss": 0.6271, + "step": 75 + }, + { + "epoch": 3.33, + "grad_norm": 0.6959993243217468, + "learning_rate": 3.5300247175136605e-06, + "loss": 0.6316, + "step": 80 + }, + { + "epoch": 3.54, + "grad_norm": 1.2839587926864624, + "learning_rate": 3.419711445091359e-06, + "loss": 0.5607, + "step": 85 + }, + { + "epoch": 3.75, + "grad_norm": 1.2485283613204956, + "learning_rate": 3.309398172669057e-06, + "loss": 0.6403, + "step": 90 + }, + { + "epoch": 3.96, + "grad_norm": 1.1759371757507324, + "learning_rate": 3.199084900246755e-06, + "loss": 0.6242, + "step": 95 + }, + { + "epoch": 4.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.6892623901367188, + "eval_runtime": 1.3822, + "eval_samples_per_second": 46.302, + "eval_steps_per_second": 5.788, + "step": 96 + }, + { + "epoch": 4.17, + "grad_norm": 0.6085842251777649, + "learning_rate": 3.0887716278244532e-06, + "loss": 0.564, + "step": 100 + }, + { + "epoch": 4.38, + "grad_norm": 0.9830076098442078, + "learning_rate": 2.978458355402151e-06, + "loss": 0.641, + "step": 105 + }, + { + "epoch": 4.58, + "grad_norm": 0.6557525992393494, + "learning_rate": 2.8681450829798494e-06, + "loss": 0.5943, + "step": 110 + }, + { + "epoch": 4.79, + "grad_norm": 0.7436282634735107, + "learning_rate": 2.7578318105575477e-06, + "loss": 0.614, + "step": 115 + }, + { + "epoch": 5.0, + "grad_norm": 0.7114256620407104, + "learning_rate": 2.6475185381352455e-06, + "loss": 0.6081, + "step": 120 + }, + { + "epoch": 5.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.6939811706542969, + "eval_runtime": 1.3699, + "eval_samples_per_second": 46.719, + "eval_steps_per_second": 5.84, + "step": 120 + }, + { + "epoch": 5.21, + "grad_norm": 0.6939758062362671, + "learning_rate": 2.537205265712944e-06, + "loss": 0.5683, + "step": 125 + }, + { + "epoch": 5.42, + "grad_norm": 0.7150554656982422, + "learning_rate": 2.426891993290642e-06, + "loss": 0.6389, + "step": 130 + }, + { + "epoch": 5.62, + "grad_norm": 0.5391159057617188, + "learning_rate": 2.31657872086834e-06, + "loss": 0.6185, + "step": 135 + }, + { + "epoch": 5.83, + "grad_norm": 0.657724142074585, + "learning_rate": 2.206265448446038e-06, + "loss": 0.6216, + "step": 140 + }, + { + "epoch": 6.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.69482421875, + "eval_runtime": 1.3614, + "eval_samples_per_second": 47.009, + "eval_steps_per_second": 5.876, + "step": 144 + } + ], + "logging_steps": 5, + "max_steps": 240, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 500, + "total_flos": 4911381340990080.0, + "train_batch_size": 8, + "trial_name": null, + "trial_params": { + "learning_rate": 4.765533368643442e-06, + "per_device_train_batch_size": 8 + } +} diff --git a/run-5/checkpoint-144/training_args.bin b/run-5/checkpoint-144/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..35fa0af60e4dca57930333ada64bcb1b3d5e32fe --- /dev/null +++ b/run-5/checkpoint-144/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:064d1923d7ff14a10cb0625f0bc3dd479a67056d28f933cdfbfdecd320e675db +size 4920 diff --git a/run-5/checkpoint-168/config.json b/run-5/checkpoint-168/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e8a488f827577f57ac81420fa1b836ae1d5ceaeb --- /dev/null +++ b/run-5/checkpoint-168/config.json @@ -0,0 +1,80 @@ +{ + "_name_or_path": "ntu-spml/distilhubert", + "activation_dropout": 0.1, + "apply_spec_augment": false, + "architectures": [ + "HubertForSequenceClassification" + ], + "attention_dropout": 0.1, + "bos_token_id": 1, + "classifier_proj_size": 256, + "conv_bias": false, + "conv_dim": [ + 512, + 512, + 512, + 512, + 512, + 512, + 512 + ], + "conv_kernel": [ + 10, + 3, + 3, + 3, + 3, + 2, + 2 + ], + "conv_stride": [ + 5, + 2, + 2, + 2, + 2, + 2, + 2 + ], + "ctc_loss_reduction": "sum", + "ctc_zero_infinity": false, + "do_stable_layer_norm": false, + "eos_token_id": 2, + "feat_extract_activation": "gelu", + "feat_extract_norm": "group", + "feat_proj_dropout": 0.0, + "feat_proj_layer_norm": false, + "final_dropout": 0.0, + "hidden_act": "gelu", + "hidden_dropout": 0.1, + "hidden_size": 768, + "id2label": { + "0": "NOT_WORD", + "1": "WORD" + }, + "initializer_range": 0.02, + "intermediate_size": 3072, + "label2id": { + "NOT_WORD": "0", + "WORD": "1" + }, + "layer_norm_eps": 1e-05, + "layerdrop": 0.0, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "model_type": "hubert", + "num_attention_heads": 12, + "num_conv_pos_embedding_groups": 16, + "num_conv_pos_embeddings": 128, + "num_feat_extract_layers": 7, + "num_hidden_layers": 2, + "pad_token_id": 0, + "torch_dtype": "float32", + "transformers_version": "4.38.1", + "use_weighted_layer_sum": false, + "vocab_size": 32 +} diff --git a/run-5/checkpoint-168/model.safetensors b/run-5/checkpoint-168/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9fa04cb8806b578dde8bcb0d4f640e260465f62d --- /dev/null +++ b/run-5/checkpoint-168/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:efc7bd641365c6786592c3515017b5ed2ae6a8819f32b77ec681108ed8e76dcb +size 94763496 diff --git a/run-5/checkpoint-168/optimizer.pt b/run-5/checkpoint-168/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..0853c5274b37736c9b9f435118e340e9dfc8939c --- /dev/null +++ b/run-5/checkpoint-168/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:815babcbbaa2ea7cdb1bc66bb94cf7e3cdb8ac6fd8db97a412629826b8eed8fc +size 189552570 diff --git a/run-5/checkpoint-168/preprocessor_config.json b/run-5/checkpoint-168/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..36ebe8b7c1cc967b3059f0494ae8a1069dd67655 --- /dev/null +++ b/run-5/checkpoint-168/preprocessor_config.json @@ -0,0 +1,9 @@ +{ + "do_normalize": true, + "feature_extractor_type": "Wav2Vec2FeatureExtractor", + "feature_size": 1, + "padding_side": "right", + "padding_value": 0, + "return_attention_mask": true, + "sampling_rate": 16000 +} diff --git a/run-5/checkpoint-168/rng_state.pth b/run-5/checkpoint-168/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..b5968c6a0d63ac9dece0214117bab8d5185d7c76 --- /dev/null +++ b/run-5/checkpoint-168/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3d1e8300999dd1e32b96ea9da5218bf1661f524b97023fae45e0a2d78d5309f +size 14244 diff --git a/run-5/checkpoint-168/scheduler.pt b/run-5/checkpoint-168/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..70b5e3f972c77a3ea10da68f5adcdaab07a4667f --- /dev/null +++ b/run-5/checkpoint-168/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f0c5b02573b46c9f41bc7a751eec9891494db2257743c0802d91b804b452d9a +size 1064 diff --git a/run-5/checkpoint-168/trainer_state.json b/run-5/checkpoint-168/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..d674a13b143dc77065b3c95aa3cc6f092873736d --- /dev/null +++ b/run-5/checkpoint-168/trainer_state.json @@ -0,0 +1,318 @@ +{ + "best_metric": 0.7326732673267327, + "best_model_checkpoint": "distilhubert-finetuned-not-a-word2/run-5/checkpoint-24", + "epoch": 7.0, + "eval_steps": 500, + "global_step": 168, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.21, + "grad_norm": 1.6560618877410889, + "learning_rate": 9.928194518007172e-07, + "loss": 0.6997, + "step": 5 + }, + { + "epoch": 0.42, + "grad_norm": 0.6409028768539429, + "learning_rate": 1.9856389036014343e-06, + "loss": 0.6971, + "step": 10 + }, + { + "epoch": 0.62, + "grad_norm": 0.6359347105026245, + "learning_rate": 2.978458355402151e-06, + "loss": 0.6953, + "step": 15 + }, + { + "epoch": 0.83, + "grad_norm": 1.56245756149292, + "learning_rate": 3.971277807202869e-06, + "loss": 0.6889, + "step": 20 + }, + { + "epoch": 1.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.6865768432617188, + "eval_runtime": 1.3652, + "eval_samples_per_second": 46.879, + "eval_steps_per_second": 5.86, + "step": 24 + }, + { + "epoch": 1.04, + "grad_norm": 0.6976048946380615, + "learning_rate": 4.743470714158982e-06, + "loss": 0.6865, + "step": 25 + }, + { + "epoch": 1.25, + "grad_norm": 2.008615732192993, + "learning_rate": 4.63315744173668e-06, + "loss": 0.6681, + "step": 30 + }, + { + "epoch": 1.46, + "grad_norm": 1.9885936975479126, + "learning_rate": 4.522844169314378e-06, + "loss": 0.6531, + "step": 35 + }, + { + "epoch": 1.67, + "grad_norm": 0.5047381520271301, + "learning_rate": 4.412530896892076e-06, + "loss": 0.6664, + "step": 40 + }, + { + "epoch": 1.88, + "grad_norm": 0.9776627421379089, + "learning_rate": 4.302217624469774e-06, + "loss": 0.6545, + "step": 45 + }, + { + "epoch": 2.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.680206298828125, + "eval_runtime": 1.3708, + "eval_samples_per_second": 46.689, + "eval_steps_per_second": 5.836, + "step": 48 + }, + { + "epoch": 2.08, + "grad_norm": 0.46269264817237854, + "learning_rate": 4.191904352047472e-06, + "loss": 0.6518, + "step": 50 + }, + { + "epoch": 2.29, + "grad_norm": 0.8560582995414734, + "learning_rate": 4.081591079625171e-06, + "loss": 0.6333, + "step": 55 + }, + { + "epoch": 2.5, + "grad_norm": 0.9135796427726746, + "learning_rate": 3.971277807202869e-06, + "loss": 0.6494, + "step": 60 + }, + { + "epoch": 2.71, + "grad_norm": 0.71152263879776, + "learning_rate": 3.8609645347805665e-06, + "loss": 0.6444, + "step": 65 + }, + { + "epoch": 2.92, + "grad_norm": 1.3949508666992188, + "learning_rate": 3.750651262358265e-06, + "loss": 0.5726, + "step": 70 + }, + { + "epoch": 3.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.6824684143066406, + "eval_runtime": 1.373, + "eval_samples_per_second": 46.614, + "eval_steps_per_second": 5.827, + "step": 72 + }, + { + "epoch": 3.12, + "grad_norm": 0.7415557503700256, + "learning_rate": 3.6403379899359627e-06, + "loss": 0.6271, + "step": 75 + }, + { + "epoch": 3.33, + "grad_norm": 0.6959993243217468, + "learning_rate": 3.5300247175136605e-06, + "loss": 0.6316, + "step": 80 + }, + { + "epoch": 3.54, + "grad_norm": 1.2839587926864624, + "learning_rate": 3.419711445091359e-06, + "loss": 0.5607, + "step": 85 + }, + { + "epoch": 3.75, + "grad_norm": 1.2485283613204956, + "learning_rate": 3.309398172669057e-06, + "loss": 0.6403, + "step": 90 + }, + { + "epoch": 3.96, + "grad_norm": 1.1759371757507324, + "learning_rate": 3.199084900246755e-06, + "loss": 0.6242, + "step": 95 + }, + { + "epoch": 4.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.6892623901367188, + "eval_runtime": 1.3822, + "eval_samples_per_second": 46.302, + "eval_steps_per_second": 5.788, + "step": 96 + }, + { + "epoch": 4.17, + "grad_norm": 0.6085842251777649, + "learning_rate": 3.0887716278244532e-06, + "loss": 0.564, + "step": 100 + }, + { + "epoch": 4.38, + "grad_norm": 0.9830076098442078, + "learning_rate": 2.978458355402151e-06, + "loss": 0.641, + "step": 105 + }, + { + "epoch": 4.58, + "grad_norm": 0.6557525992393494, + "learning_rate": 2.8681450829798494e-06, + "loss": 0.5943, + "step": 110 + }, + { + "epoch": 4.79, + "grad_norm": 0.7436282634735107, + "learning_rate": 2.7578318105575477e-06, + "loss": 0.614, + "step": 115 + }, + { + "epoch": 5.0, + "grad_norm": 0.7114256620407104, + "learning_rate": 2.6475185381352455e-06, + "loss": 0.6081, + "step": 120 + }, + { + "epoch": 5.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.6939811706542969, + "eval_runtime": 1.3699, + "eval_samples_per_second": 46.719, + "eval_steps_per_second": 5.84, + "step": 120 + }, + { + "epoch": 5.21, + "grad_norm": 0.6939758062362671, + "learning_rate": 2.537205265712944e-06, + "loss": 0.5683, + "step": 125 + }, + { + "epoch": 5.42, + "grad_norm": 0.7150554656982422, + "learning_rate": 2.426891993290642e-06, + "loss": 0.6389, + "step": 130 + }, + { + "epoch": 5.62, + "grad_norm": 0.5391159057617188, + "learning_rate": 2.31657872086834e-06, + "loss": 0.6185, + "step": 135 + }, + { + "epoch": 5.83, + "grad_norm": 0.657724142074585, + "learning_rate": 2.206265448446038e-06, + "loss": 0.6216, + "step": 140 + }, + { + "epoch": 6.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.69482421875, + "eval_runtime": 1.3614, + "eval_samples_per_second": 47.009, + "eval_steps_per_second": 5.876, + "step": 144 + }, + { + "epoch": 6.04, + "grad_norm": 1.1818499565124512, + "learning_rate": 2.095952176023736e-06, + "loss": 0.5331, + "step": 145 + }, + { + "epoch": 6.25, + "grad_norm": 0.6219866871833801, + "learning_rate": 1.9856389036014343e-06, + "loss": 0.5781, + "step": 150 + }, + { + "epoch": 6.46, + "grad_norm": 0.954688310623169, + "learning_rate": 1.8753256311791324e-06, + "loss": 0.6719, + "step": 155 + }, + { + "epoch": 6.67, + "grad_norm": 0.5371306538581848, + "learning_rate": 1.7650123587568303e-06, + "loss": 0.6174, + "step": 160 + }, + { + "epoch": 6.88, + "grad_norm": 1.098739743232727, + "learning_rate": 1.6546990863345285e-06, + "loss": 0.6005, + "step": 165 + }, + { + "epoch": 7.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.6969451904296875, + "eval_runtime": 1.371, + "eval_samples_per_second": 46.681, + "eval_steps_per_second": 5.835, + "step": 168 + } + ], + "logging_steps": 5, + "max_steps": 240, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 500, + "total_flos": 5784921236870880.0, + "train_batch_size": 8, + "trial_name": null, + "trial_params": { + "learning_rate": 4.765533368643442e-06, + "per_device_train_batch_size": 8 + } +} diff --git a/run-5/checkpoint-168/training_args.bin b/run-5/checkpoint-168/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..35fa0af60e4dca57930333ada64bcb1b3d5e32fe --- /dev/null +++ b/run-5/checkpoint-168/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:064d1923d7ff14a10cb0625f0bc3dd479a67056d28f933cdfbfdecd320e675db +size 4920 diff --git a/run-5/checkpoint-48/config.json b/run-5/checkpoint-48/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e8a488f827577f57ac81420fa1b836ae1d5ceaeb --- /dev/null +++ b/run-5/checkpoint-48/config.json @@ -0,0 +1,80 @@ +{ + "_name_or_path": "ntu-spml/distilhubert", + "activation_dropout": 0.1, + "apply_spec_augment": false, + "architectures": [ + "HubertForSequenceClassification" + ], + "attention_dropout": 0.1, + "bos_token_id": 1, + "classifier_proj_size": 256, + "conv_bias": false, + "conv_dim": [ + 512, + 512, + 512, + 512, + 512, + 512, + 512 + ], + "conv_kernel": [ + 10, + 3, + 3, + 3, + 3, + 2, + 2 + ], + "conv_stride": [ + 5, + 2, + 2, + 2, + 2, + 2, + 2 + ], + "ctc_loss_reduction": "sum", + "ctc_zero_infinity": false, + "do_stable_layer_norm": false, + "eos_token_id": 2, + "feat_extract_activation": "gelu", + "feat_extract_norm": "group", + "feat_proj_dropout": 0.0, + "feat_proj_layer_norm": false, + "final_dropout": 0.0, + "hidden_act": "gelu", + "hidden_dropout": 0.1, + "hidden_size": 768, + "id2label": { + "0": "NOT_WORD", + "1": "WORD" + }, + "initializer_range": 0.02, + "intermediate_size": 3072, + "label2id": { + "NOT_WORD": "0", + "WORD": "1" + }, + "layer_norm_eps": 1e-05, + "layerdrop": 0.0, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "model_type": "hubert", + "num_attention_heads": 12, + "num_conv_pos_embedding_groups": 16, + "num_conv_pos_embeddings": 128, + "num_feat_extract_layers": 7, + "num_hidden_layers": 2, + "pad_token_id": 0, + "torch_dtype": "float32", + "transformers_version": "4.38.1", + "use_weighted_layer_sum": false, + "vocab_size": 32 +} diff --git a/run-5/checkpoint-48/model.safetensors b/run-5/checkpoint-48/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..310f0e5d8b9214c8750688e56dd40a87bd776202 --- /dev/null +++ b/run-5/checkpoint-48/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44374b58af3c51b5eac7f464c67b1a6c5e2197656be426f7a0caf6bd7ebd43eb +size 94763496 diff --git a/run-5/checkpoint-48/optimizer.pt b/run-5/checkpoint-48/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..16124203fd507724e6c07752e83444aa5de04d79 --- /dev/null +++ b/run-5/checkpoint-48/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:560210c0f70f7f9718c0ae200103724f2f497040cd6f57db30cdeada4fbe508e +size 189552570 diff --git a/run-5/checkpoint-48/preprocessor_config.json b/run-5/checkpoint-48/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..36ebe8b7c1cc967b3059f0494ae8a1069dd67655 --- /dev/null +++ b/run-5/checkpoint-48/preprocessor_config.json @@ -0,0 +1,9 @@ +{ + "do_normalize": true, + "feature_extractor_type": "Wav2Vec2FeatureExtractor", + "feature_size": 1, + "padding_side": "right", + "padding_value": 0, + "return_attention_mask": true, + "sampling_rate": 16000 +} diff --git a/run-5/checkpoint-48/rng_state.pth b/run-5/checkpoint-48/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..f747b3a07143a0952937715136c3c6f3d385714b --- /dev/null +++ b/run-5/checkpoint-48/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0ea0f2253490889e9b5dbea96976236c3c1f025b5a24179f4bcaa44eec621be7 +size 14244 diff --git a/run-5/checkpoint-48/scheduler.pt b/run-5/checkpoint-48/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..ea48d9fd0346dd50180ae9fdb50a2ea347898d96 --- /dev/null +++ b/run-5/checkpoint-48/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49e1e8bf8425998d14aca500c6e7194f5af9e802d88054b4e269eeac25555423 +size 1064 diff --git a/run-5/checkpoint-48/trainer_state.json b/run-5/checkpoint-48/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..3a45c5c98ec224d25f0a13dbddf514685afd368e --- /dev/null +++ b/run-5/checkpoint-48/trainer_state.json @@ -0,0 +1,105 @@ +{ + "best_metric": 0.7326732673267327, + "best_model_checkpoint": "distilhubert-finetuned-not-a-word2/run-5/checkpoint-24", + "epoch": 2.0, + "eval_steps": 500, + "global_step": 48, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.21, + "grad_norm": 1.6560618877410889, + "learning_rate": 9.928194518007172e-07, + "loss": 0.6997, + "step": 5 + }, + { + "epoch": 0.42, + "grad_norm": 0.6409028768539429, + "learning_rate": 1.9856389036014343e-06, + "loss": 0.6971, + "step": 10 + }, + { + "epoch": 0.62, + "grad_norm": 0.6359347105026245, + "learning_rate": 2.978458355402151e-06, + "loss": 0.6953, + "step": 15 + }, + { + "epoch": 0.83, + "grad_norm": 1.56245756149292, + "learning_rate": 3.971277807202869e-06, + "loss": 0.6889, + "step": 20 + }, + { + "epoch": 1.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.6865768432617188, + "eval_runtime": 1.3652, + "eval_samples_per_second": 46.879, + "eval_steps_per_second": 5.86, + "step": 24 + }, + { + "epoch": 1.04, + "grad_norm": 0.6976048946380615, + "learning_rate": 4.743470714158982e-06, + "loss": 0.6865, + "step": 25 + }, + { + "epoch": 1.25, + "grad_norm": 2.008615732192993, + "learning_rate": 4.63315744173668e-06, + "loss": 0.6681, + "step": 30 + }, + { + "epoch": 1.46, + "grad_norm": 1.9885936975479126, + "learning_rate": 4.522844169314378e-06, + "loss": 0.6531, + "step": 35 + }, + { + "epoch": 1.67, + "grad_norm": 0.5047381520271301, + "learning_rate": 4.412530896892076e-06, + "loss": 0.6664, + "step": 40 + }, + { + "epoch": 1.88, + "grad_norm": 0.9776627421379089, + "learning_rate": 4.302217624469774e-06, + "loss": 0.6545, + "step": 45 + }, + { + "epoch": 2.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.680206298828125, + "eval_runtime": 1.3708, + "eval_samples_per_second": 46.689, + "eval_steps_per_second": 5.836, + "step": 48 + } + ], + "logging_steps": 5, + "max_steps": 240, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 500, + "total_flos": 1570339835242944.0, + "train_batch_size": 8, + "trial_name": null, + "trial_params": { + "learning_rate": 4.765533368643442e-06, + "per_device_train_batch_size": 8 + } +} diff --git a/run-5/checkpoint-48/training_args.bin b/run-5/checkpoint-48/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..35fa0af60e4dca57930333ada64bcb1b3d5e32fe --- /dev/null +++ b/run-5/checkpoint-48/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:064d1923d7ff14a10cb0625f0bc3dd479a67056d28f933cdfbfdecd320e675db +size 4920 diff --git a/run-5/checkpoint-72/config.json b/run-5/checkpoint-72/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e8a488f827577f57ac81420fa1b836ae1d5ceaeb --- /dev/null +++ b/run-5/checkpoint-72/config.json @@ -0,0 +1,80 @@ +{ + "_name_or_path": "ntu-spml/distilhubert", + "activation_dropout": 0.1, + "apply_spec_augment": false, + "architectures": [ + "HubertForSequenceClassification" + ], + "attention_dropout": 0.1, + "bos_token_id": 1, + "classifier_proj_size": 256, + "conv_bias": false, + "conv_dim": [ + 512, + 512, + 512, + 512, + 512, + 512, + 512 + ], + "conv_kernel": [ + 10, + 3, + 3, + 3, + 3, + 2, + 2 + ], + "conv_stride": [ + 5, + 2, + 2, + 2, + 2, + 2, + 2 + ], + "ctc_loss_reduction": "sum", + "ctc_zero_infinity": false, + "do_stable_layer_norm": false, + "eos_token_id": 2, + "feat_extract_activation": "gelu", + "feat_extract_norm": "group", + "feat_proj_dropout": 0.0, + "feat_proj_layer_norm": false, + "final_dropout": 0.0, + "hidden_act": "gelu", + "hidden_dropout": 0.1, + "hidden_size": 768, + "id2label": { + "0": "NOT_WORD", + "1": "WORD" + }, + "initializer_range": 0.02, + "intermediate_size": 3072, + "label2id": { + "NOT_WORD": "0", + "WORD": "1" + }, + "layer_norm_eps": 1e-05, + "layerdrop": 0.0, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "model_type": "hubert", + "num_attention_heads": 12, + "num_conv_pos_embedding_groups": 16, + "num_conv_pos_embeddings": 128, + "num_feat_extract_layers": 7, + "num_hidden_layers": 2, + "pad_token_id": 0, + "torch_dtype": "float32", + "transformers_version": "4.38.1", + "use_weighted_layer_sum": false, + "vocab_size": 32 +} diff --git a/run-5/checkpoint-72/model.safetensors b/run-5/checkpoint-72/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9b05408ec332ced8d3fd452b16187f5d6601dd1c --- /dev/null +++ b/run-5/checkpoint-72/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a00e0256d1ea1849afaec15d21adc0869da29e07ca765d0d9625d8cef6c46ae9 +size 94763496 diff --git a/run-5/checkpoint-72/optimizer.pt b/run-5/checkpoint-72/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..36d85eef52fb16b1301542bf89e2354bb8264a81 --- /dev/null +++ b/run-5/checkpoint-72/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:054e0c4c58449144ae1ba77189cbb26a582773d33a97bd494ab34c74ae905776 +size 189552570 diff --git a/run-5/checkpoint-72/preprocessor_config.json b/run-5/checkpoint-72/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..36ebe8b7c1cc967b3059f0494ae8a1069dd67655 --- /dev/null +++ b/run-5/checkpoint-72/preprocessor_config.json @@ -0,0 +1,9 @@ +{ + "do_normalize": true, + "feature_extractor_type": "Wav2Vec2FeatureExtractor", + "feature_size": 1, + "padding_side": "right", + "padding_value": 0, + "return_attention_mask": true, + "sampling_rate": 16000 +} diff --git a/run-5/checkpoint-72/rng_state.pth b/run-5/checkpoint-72/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..2a0931c97507f28c46325d60d49a5e7e573321a4 --- /dev/null +++ b/run-5/checkpoint-72/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ee3b4c1fd10f5b7dc9cd3892663bc52a1bc7290b86a11225b56750a1f3c0adf +size 14244 diff --git a/run-5/checkpoint-72/scheduler.pt b/run-5/checkpoint-72/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..4fa2c4640c3614f3f0f68fbca5230c2bcfb0262e --- /dev/null +++ b/run-5/checkpoint-72/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b8bfb2ec2dd0d59fe1c6c48a5eb057fb5f63d5cde1db459bdc9745595437e122 +size 1064 diff --git a/run-5/checkpoint-72/trainer_state.json b/run-5/checkpoint-72/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..a5642d9c9c278b02fc225ad1aa4b08a95137ba8e --- /dev/null +++ b/run-5/checkpoint-72/trainer_state.json @@ -0,0 +1,149 @@ +{ + "best_metric": 0.7326732673267327, + "best_model_checkpoint": "distilhubert-finetuned-not-a-word2/run-5/checkpoint-24", + "epoch": 3.0, + "eval_steps": 500, + "global_step": 72, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.21, + "grad_norm": 1.6560618877410889, + "learning_rate": 9.928194518007172e-07, + "loss": 0.6997, + "step": 5 + }, + { + "epoch": 0.42, + "grad_norm": 0.6409028768539429, + "learning_rate": 1.9856389036014343e-06, + "loss": 0.6971, + "step": 10 + }, + { + "epoch": 0.62, + "grad_norm": 0.6359347105026245, + "learning_rate": 2.978458355402151e-06, + "loss": 0.6953, + "step": 15 + }, + { + "epoch": 0.83, + "grad_norm": 1.56245756149292, + "learning_rate": 3.971277807202869e-06, + "loss": 0.6889, + "step": 20 + }, + { + "epoch": 1.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.6865768432617188, + "eval_runtime": 1.3652, + "eval_samples_per_second": 46.879, + "eval_steps_per_second": 5.86, + "step": 24 + }, + { + "epoch": 1.04, + "grad_norm": 0.6976048946380615, + "learning_rate": 4.743470714158982e-06, + "loss": 0.6865, + "step": 25 + }, + { + "epoch": 1.25, + "grad_norm": 2.008615732192993, + "learning_rate": 4.63315744173668e-06, + "loss": 0.6681, + "step": 30 + }, + { + "epoch": 1.46, + "grad_norm": 1.9885936975479126, + "learning_rate": 4.522844169314378e-06, + "loss": 0.6531, + "step": 35 + }, + { + "epoch": 1.67, + "grad_norm": 0.5047381520271301, + "learning_rate": 4.412530896892076e-06, + "loss": 0.6664, + "step": 40 + }, + { + "epoch": 1.88, + "grad_norm": 0.9776627421379089, + "learning_rate": 4.302217624469774e-06, + "loss": 0.6545, + "step": 45 + }, + { + "epoch": 2.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.680206298828125, + "eval_runtime": 1.3708, + "eval_samples_per_second": 46.689, + "eval_steps_per_second": 5.836, + "step": 48 + }, + { + "epoch": 2.08, + "grad_norm": 0.46269264817237854, + "learning_rate": 4.191904352047472e-06, + "loss": 0.6518, + "step": 50 + }, + { + "epoch": 2.29, + "grad_norm": 0.8560582995414734, + "learning_rate": 4.081591079625171e-06, + "loss": 0.6333, + "step": 55 + }, + { + "epoch": 2.5, + "grad_norm": 0.9135796427726746, + "learning_rate": 3.971277807202869e-06, + "loss": 0.6494, + "step": 60 + }, + { + "epoch": 2.71, + "grad_norm": 0.71152263879776, + "learning_rate": 3.8609645347805665e-06, + "loss": 0.6444, + "step": 65 + }, + { + "epoch": 2.92, + "grad_norm": 1.3949508666992188, + "learning_rate": 3.750651262358265e-06, + "loss": 0.5726, + "step": 70 + }, + { + "epoch": 3.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.6824684143066406, + "eval_runtime": 1.373, + "eval_samples_per_second": 46.614, + "eval_steps_per_second": 5.827, + "step": 72 + } + ], + "logging_steps": 5, + "max_steps": 240, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 500, + "total_flos": 2449725503657472.0, + "train_batch_size": 8, + "trial_name": null, + "trial_params": { + "learning_rate": 4.765533368643442e-06, + "per_device_train_batch_size": 8 + } +} diff --git a/run-5/checkpoint-72/training_args.bin b/run-5/checkpoint-72/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..35fa0af60e4dca57930333ada64bcb1b3d5e32fe --- /dev/null +++ b/run-5/checkpoint-72/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:064d1923d7ff14a10cb0625f0bc3dd479a67056d28f933cdfbfdecd320e675db +size 4920 diff --git a/run-5/checkpoint-96/config.json b/run-5/checkpoint-96/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e8a488f827577f57ac81420fa1b836ae1d5ceaeb --- /dev/null +++ b/run-5/checkpoint-96/config.json @@ -0,0 +1,80 @@ +{ + "_name_or_path": "ntu-spml/distilhubert", + "activation_dropout": 0.1, + "apply_spec_augment": false, + "architectures": [ + "HubertForSequenceClassification" + ], + "attention_dropout": 0.1, + "bos_token_id": 1, + "classifier_proj_size": 256, + "conv_bias": false, + "conv_dim": [ + 512, + 512, + 512, + 512, + 512, + 512, + 512 + ], + "conv_kernel": [ + 10, + 3, + 3, + 3, + 3, + 2, + 2 + ], + "conv_stride": [ + 5, + 2, + 2, + 2, + 2, + 2, + 2 + ], + "ctc_loss_reduction": "sum", + "ctc_zero_infinity": false, + "do_stable_layer_norm": false, + "eos_token_id": 2, + "feat_extract_activation": "gelu", + "feat_extract_norm": "group", + "feat_proj_dropout": 0.0, + "feat_proj_layer_norm": false, + "final_dropout": 0.0, + "hidden_act": "gelu", + "hidden_dropout": 0.1, + "hidden_size": 768, + "id2label": { + "0": "NOT_WORD", + "1": "WORD" + }, + "initializer_range": 0.02, + "intermediate_size": 3072, + "label2id": { + "NOT_WORD": "0", + "WORD": "1" + }, + "layer_norm_eps": 1e-05, + "layerdrop": 0.0, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "model_type": "hubert", + "num_attention_heads": 12, + "num_conv_pos_embedding_groups": 16, + "num_conv_pos_embeddings": 128, + "num_feat_extract_layers": 7, + "num_hidden_layers": 2, + "pad_token_id": 0, + "torch_dtype": "float32", + "transformers_version": "4.38.1", + "use_weighted_layer_sum": false, + "vocab_size": 32 +} diff --git a/run-5/checkpoint-96/model.safetensors b/run-5/checkpoint-96/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..88fe0941232337e50ed1c426a8680bac9a4c0c63 --- /dev/null +++ b/run-5/checkpoint-96/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d80eb03f980b18c53130ffe20612f24f0ad6b932416946a4a72ebe164dedf2d2 +size 94763496 diff --git a/run-5/checkpoint-96/optimizer.pt b/run-5/checkpoint-96/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..50eba18292111713dbb6e24a57f1ca81f2022672 --- /dev/null +++ b/run-5/checkpoint-96/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0343c275626ac316997263b3116cf0e218509562bd2f7af00c7a09411a332e2e +size 189552570 diff --git a/run-5/checkpoint-96/preprocessor_config.json b/run-5/checkpoint-96/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..36ebe8b7c1cc967b3059f0494ae8a1069dd67655 --- /dev/null +++ b/run-5/checkpoint-96/preprocessor_config.json @@ -0,0 +1,9 @@ +{ + "do_normalize": true, + "feature_extractor_type": "Wav2Vec2FeatureExtractor", + "feature_size": 1, + "padding_side": "right", + "padding_value": 0, + "return_attention_mask": true, + "sampling_rate": 16000 +} diff --git a/run-5/checkpoint-96/rng_state.pth b/run-5/checkpoint-96/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..bde550517cedc68a3ae49137c6221ff74725b994 --- /dev/null +++ b/run-5/checkpoint-96/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b456b0645d03f4fc8a479113b5caa8bb02734d9c9788d3ded452d4ca10da7ef +size 14244 diff --git a/run-5/checkpoint-96/scheduler.pt b/run-5/checkpoint-96/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..ccd0360175734ad4af18ac4917f96a8a3a169b4c --- /dev/null +++ b/run-5/checkpoint-96/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e35dfd154520d23e4be85bc337f841c5a10afc0834c04f9c8d53994824829a57 +size 1064 diff --git a/run-5/checkpoint-96/trainer_state.json b/run-5/checkpoint-96/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..6446ada7ea45b455f8c8ebf949dcaae8a9bc5fbb --- /dev/null +++ b/run-5/checkpoint-96/trainer_state.json @@ -0,0 +1,193 @@ +{ + "best_metric": 0.7326732673267327, + "best_model_checkpoint": "distilhubert-finetuned-not-a-word2/run-5/checkpoint-24", + "epoch": 4.0, + "eval_steps": 500, + "global_step": 96, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.21, + "grad_norm": 1.6560618877410889, + "learning_rate": 9.928194518007172e-07, + "loss": 0.6997, + "step": 5 + }, + { + "epoch": 0.42, + "grad_norm": 0.6409028768539429, + "learning_rate": 1.9856389036014343e-06, + "loss": 0.6971, + "step": 10 + }, + { + "epoch": 0.62, + "grad_norm": 0.6359347105026245, + "learning_rate": 2.978458355402151e-06, + "loss": 0.6953, + "step": 15 + }, + { + "epoch": 0.83, + "grad_norm": 1.56245756149292, + "learning_rate": 3.971277807202869e-06, + "loss": 0.6889, + "step": 20 + }, + { + "epoch": 1.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.6865768432617188, + "eval_runtime": 1.3652, + "eval_samples_per_second": 46.879, + "eval_steps_per_second": 5.86, + "step": 24 + }, + { + "epoch": 1.04, + "grad_norm": 0.6976048946380615, + "learning_rate": 4.743470714158982e-06, + "loss": 0.6865, + "step": 25 + }, + { + "epoch": 1.25, + "grad_norm": 2.008615732192993, + "learning_rate": 4.63315744173668e-06, + "loss": 0.6681, + "step": 30 + }, + { + "epoch": 1.46, + "grad_norm": 1.9885936975479126, + "learning_rate": 4.522844169314378e-06, + "loss": 0.6531, + "step": 35 + }, + { + "epoch": 1.67, + "grad_norm": 0.5047381520271301, + "learning_rate": 4.412530896892076e-06, + "loss": 0.6664, + "step": 40 + }, + { + "epoch": 1.88, + "grad_norm": 0.9776627421379089, + "learning_rate": 4.302217624469774e-06, + "loss": 0.6545, + "step": 45 + }, + { + "epoch": 2.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.680206298828125, + "eval_runtime": 1.3708, + "eval_samples_per_second": 46.689, + "eval_steps_per_second": 5.836, + "step": 48 + }, + { + "epoch": 2.08, + "grad_norm": 0.46269264817237854, + "learning_rate": 4.191904352047472e-06, + "loss": 0.6518, + "step": 50 + }, + { + "epoch": 2.29, + "grad_norm": 0.8560582995414734, + "learning_rate": 4.081591079625171e-06, + "loss": 0.6333, + "step": 55 + }, + { + "epoch": 2.5, + "grad_norm": 0.9135796427726746, + "learning_rate": 3.971277807202869e-06, + "loss": 0.6494, + "step": 60 + }, + { + "epoch": 2.71, + "grad_norm": 0.71152263879776, + "learning_rate": 3.8609645347805665e-06, + "loss": 0.6444, + "step": 65 + }, + { + "epoch": 2.92, + "grad_norm": 1.3949508666992188, + "learning_rate": 3.750651262358265e-06, + "loss": 0.5726, + "step": 70 + }, + { + "epoch": 3.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.6824684143066406, + "eval_runtime": 1.373, + "eval_samples_per_second": 46.614, + "eval_steps_per_second": 5.827, + "step": 72 + }, + { + "epoch": 3.12, + "grad_norm": 0.7415557503700256, + "learning_rate": 3.6403379899359627e-06, + "loss": 0.6271, + "step": 75 + }, + { + "epoch": 3.33, + "grad_norm": 0.6959993243217468, + "learning_rate": 3.5300247175136605e-06, + "loss": 0.6316, + "step": 80 + }, + { + "epoch": 3.54, + "grad_norm": 1.2839587926864624, + "learning_rate": 3.419711445091359e-06, + "loss": 0.5607, + "step": 85 + }, + { + "epoch": 3.75, + "grad_norm": 1.2485283613204956, + "learning_rate": 3.309398172669057e-06, + "loss": 0.6403, + "step": 90 + }, + { + "epoch": 3.96, + "grad_norm": 1.1759371757507324, + "learning_rate": 3.199084900246755e-06, + "loss": 0.6242, + "step": 95 + }, + { + "epoch": 4.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.6892623901367188, + "eval_runtime": 1.3822, + "eval_samples_per_second": 46.302, + "eval_steps_per_second": 5.788, + "step": 96 + } + ], + "logging_steps": 5, + "max_steps": 240, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 500, + "total_flos": 3314382463332576.0, + "train_batch_size": 8, + "trial_name": null, + "trial_params": { + "learning_rate": 4.765533368643442e-06, + "per_device_train_batch_size": 8 + } +} diff --git a/run-5/checkpoint-96/training_args.bin b/run-5/checkpoint-96/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..35fa0af60e4dca57930333ada64bcb1b3d5e32fe --- /dev/null +++ b/run-5/checkpoint-96/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:064d1923d7ff14a10cb0625f0bc3dd479a67056d28f933cdfbfdecd320e675db +size 4920 diff --git a/run-7/checkpoint-16/config.json b/run-7/checkpoint-16/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e8a488f827577f57ac81420fa1b836ae1d5ceaeb --- /dev/null +++ b/run-7/checkpoint-16/config.json @@ -0,0 +1,80 @@ +{ + "_name_or_path": "ntu-spml/distilhubert", + "activation_dropout": 0.1, + "apply_spec_augment": false, + "architectures": [ + "HubertForSequenceClassification" + ], + "attention_dropout": 0.1, + "bos_token_id": 1, + "classifier_proj_size": 256, + "conv_bias": false, + "conv_dim": [ + 512, + 512, + 512, + 512, + 512, + 512, + 512 + ], + "conv_kernel": [ + 10, + 3, + 3, + 3, + 3, + 2, + 2 + ], + "conv_stride": [ + 5, + 2, + 2, + 2, + 2, + 2, + 2 + ], + "ctc_loss_reduction": "sum", + "ctc_zero_infinity": false, + "do_stable_layer_norm": false, + "eos_token_id": 2, + "feat_extract_activation": "gelu", + "feat_extract_norm": "group", + "feat_proj_dropout": 0.0, + "feat_proj_layer_norm": false, + "final_dropout": 0.0, + "hidden_act": "gelu", + "hidden_dropout": 0.1, + "hidden_size": 768, + "id2label": { + "0": "NOT_WORD", + "1": "WORD" + }, + "initializer_range": 0.02, + "intermediate_size": 3072, + "label2id": { + "NOT_WORD": "0", + "WORD": "1" + }, + "layer_norm_eps": 1e-05, + "layerdrop": 0.0, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "model_type": "hubert", + "num_attention_heads": 12, + "num_conv_pos_embedding_groups": 16, + "num_conv_pos_embeddings": 128, + "num_feat_extract_layers": 7, + "num_hidden_layers": 2, + "pad_token_id": 0, + "torch_dtype": "float32", + "transformers_version": "4.38.1", + "use_weighted_layer_sum": false, + "vocab_size": 32 +} diff --git a/run-7/checkpoint-16/model.safetensors b/run-7/checkpoint-16/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..211e2f1f9e43be19a9968dd83fe932fc12893fda --- /dev/null +++ b/run-7/checkpoint-16/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f7ae57da8f34fa2c8f7948c18fc1df3ac37fbbd1682ae49a241062c5e944944 +size 94763496 diff --git a/run-7/checkpoint-16/optimizer.pt b/run-7/checkpoint-16/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..a4d848bff22490de03fb2b0cf397b53d3964a539 --- /dev/null +++ b/run-7/checkpoint-16/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb7014eb7abce5fdca1154b84ba292600a923fb88f8fa0bae43c300f91a3cc52 +size 189552570 diff --git a/run-7/checkpoint-16/preprocessor_config.json b/run-7/checkpoint-16/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..36ebe8b7c1cc967b3059f0494ae8a1069dd67655 --- /dev/null +++ b/run-7/checkpoint-16/preprocessor_config.json @@ -0,0 +1,9 @@ +{ + "do_normalize": true, + "feature_extractor_type": "Wav2Vec2FeatureExtractor", + "feature_size": 1, + "padding_side": "right", + "padding_value": 0, + "return_attention_mask": true, + "sampling_rate": 16000 +} diff --git a/run-7/checkpoint-16/rng_state.pth b/run-7/checkpoint-16/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..ac7d01df8b2a48c2fef09916807ca451a855a7a7 --- /dev/null +++ b/run-7/checkpoint-16/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b0b4c067c009eda80e8baa1203d26c442448a0cbb4afc20c352a7b4f4c31ecfc +size 14244 diff --git a/run-7/checkpoint-16/scheduler.pt b/run-7/checkpoint-16/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..d9d7f28072ba04e16009f947ee1030321dfb2f76 --- /dev/null +++ b/run-7/checkpoint-16/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:005ebbac8ff8de2b22bd06f759ac8c15a6a29e95d9fa3158864650621c3e7bc3 +size 1064 diff --git a/run-7/checkpoint-16/trainer_state.json b/run-7/checkpoint-16/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..89a9dc9c345adcd02bafea105891aa7f7262d5e8 --- /dev/null +++ b/run-7/checkpoint-16/trainer_state.json @@ -0,0 +1,63 @@ +{ + "best_metric": 0.7326732673267327, + "best_model_checkpoint": "distilhubert-finetuned-not-a-word2/run-7/checkpoint-8", + "epoch": 2.0, + "eval_steps": 500, + "global_step": 16, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.62, + "grad_norm": 0.9305598735809326, + "learning_rate": 1.0388926727261681e-05, + "loss": 0.6973, + "step": 5 + }, + { + "epoch": 1.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.68402099609375, + "eval_runtime": 1.3601, + "eval_samples_per_second": 47.054, + "eval_steps_per_second": 5.882, + "step": 8 + }, + { + "epoch": 1.25, + "grad_norm": 1.470983624458313, + "learning_rate": 1.6160552686851503e-05, + "loss": 0.6748, + "step": 10 + }, + { + "epoch": 1.88, + "grad_norm": 0.3902422785758972, + "learning_rate": 1.5006227494933539e-05, + "loss": 0.6423, + "step": 15 + }, + { + "epoch": 2.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.6836051940917969, + "eval_runtime": 1.3646, + "eval_samples_per_second": 46.9, + "eval_steps_per_second": 5.863, + "step": 16 + } + ], + "logging_steps": 5, + "max_steps": 80, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 500, + "total_flos": 2022063066922848.0, + "train_batch_size": 24, + "trial_name": null, + "trial_params": { + "learning_rate": 1.662228276361869e-05, + "per_device_train_batch_size": 24 + } +} diff --git a/run-7/checkpoint-16/training_args.bin b/run-7/checkpoint-16/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..f3ed091de26d4ed124925a68c24799ee73779421 --- /dev/null +++ b/run-7/checkpoint-16/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d102bc23c5f21041ad6ac7c59aca37f3d7061b6d3ffbee596206ded94a9f33c +size 4920 diff --git a/run-7/checkpoint-24/config.json b/run-7/checkpoint-24/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e8a488f827577f57ac81420fa1b836ae1d5ceaeb --- /dev/null +++ b/run-7/checkpoint-24/config.json @@ -0,0 +1,80 @@ +{ + "_name_or_path": "ntu-spml/distilhubert", + "activation_dropout": 0.1, + "apply_spec_augment": false, + "architectures": [ + "HubertForSequenceClassification" + ], + "attention_dropout": 0.1, + "bos_token_id": 1, + "classifier_proj_size": 256, + "conv_bias": false, + "conv_dim": [ + 512, + 512, + 512, + 512, + 512, + 512, + 512 + ], + "conv_kernel": [ + 10, + 3, + 3, + 3, + 3, + 2, + 2 + ], + "conv_stride": [ + 5, + 2, + 2, + 2, + 2, + 2, + 2 + ], + "ctc_loss_reduction": "sum", + "ctc_zero_infinity": false, + "do_stable_layer_norm": false, + "eos_token_id": 2, + "feat_extract_activation": "gelu", + "feat_extract_norm": "group", + "feat_proj_dropout": 0.0, + "feat_proj_layer_norm": false, + "final_dropout": 0.0, + "hidden_act": "gelu", + "hidden_dropout": 0.1, + "hidden_size": 768, + "id2label": { + "0": "NOT_WORD", + "1": "WORD" + }, + "initializer_range": 0.02, + "intermediate_size": 3072, + "label2id": { + "NOT_WORD": "0", + "WORD": "1" + }, + "layer_norm_eps": 1e-05, + "layerdrop": 0.0, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "model_type": "hubert", + "num_attention_heads": 12, + "num_conv_pos_embedding_groups": 16, + "num_conv_pos_embeddings": 128, + "num_feat_extract_layers": 7, + "num_hidden_layers": 2, + "pad_token_id": 0, + "torch_dtype": "float32", + "transformers_version": "4.38.1", + "use_weighted_layer_sum": false, + "vocab_size": 32 +} diff --git a/run-7/checkpoint-24/model.safetensors b/run-7/checkpoint-24/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7e1b4bbe9374135ea054830e4c9fe95c63011d22 --- /dev/null +++ b/run-7/checkpoint-24/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7141562c879dd5a199757b19cf06360d647a1797b8a80f56a45d79f046b47b1f +size 94763496 diff --git a/run-7/checkpoint-24/optimizer.pt b/run-7/checkpoint-24/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..7fa8f9ed0390da72320642a5e93b9923d51dc333 --- /dev/null +++ b/run-7/checkpoint-24/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76dcd3e47de107a3460f64f3a2e3f6e25361fab53709836dab44565c957826be +size 189552570 diff --git a/run-7/checkpoint-24/preprocessor_config.json b/run-7/checkpoint-24/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..36ebe8b7c1cc967b3059f0494ae8a1069dd67655 --- /dev/null +++ b/run-7/checkpoint-24/preprocessor_config.json @@ -0,0 +1,9 @@ +{ + "do_normalize": true, + "feature_extractor_type": "Wav2Vec2FeatureExtractor", + "feature_size": 1, + "padding_side": "right", + "padding_value": 0, + "return_attention_mask": true, + "sampling_rate": 16000 +} diff --git a/run-7/checkpoint-24/rng_state.pth b/run-7/checkpoint-24/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..28c01d4ea535e8997af4f6785766f6eb12960737 --- /dev/null +++ b/run-7/checkpoint-24/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24d6a900c72e3e0f300fb2a89dab7159acab172bc37646dd605cc8c78374f6f9 +size 14244 diff --git a/run-7/checkpoint-24/scheduler.pt b/run-7/checkpoint-24/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..d72f5d0d6478a301306829e64c0bd83f25247a02 --- /dev/null +++ b/run-7/checkpoint-24/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3485d9907f3ee8f5d953646af52f6a0291d2d5697c58ae60ae4f116d1ba97d2 +size 1064 diff --git a/run-7/checkpoint-24/trainer_state.json b/run-7/checkpoint-24/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..ed611be398fd9e5a44e62bbf1273900aac21eefd --- /dev/null +++ b/run-7/checkpoint-24/trainer_state.json @@ -0,0 +1,79 @@ +{ + "best_metric": 0.7326732673267327, + "best_model_checkpoint": "distilhubert-finetuned-not-a-word2/run-7/checkpoint-8", + "epoch": 3.0, + "eval_steps": 500, + "global_step": 24, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.62, + "grad_norm": 0.9305598735809326, + "learning_rate": 1.0388926727261681e-05, + "loss": 0.6973, + "step": 5 + }, + { + "epoch": 1.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.68402099609375, + "eval_runtime": 1.3601, + "eval_samples_per_second": 47.054, + "eval_steps_per_second": 5.882, + "step": 8 + }, + { + "epoch": 1.25, + "grad_norm": 1.470983624458313, + "learning_rate": 1.6160552686851503e-05, + "loss": 0.6748, + "step": 10 + }, + { + "epoch": 1.88, + "grad_norm": 0.3902422785758972, + "learning_rate": 1.5006227494933539e-05, + "loss": 0.6423, + "step": 15 + }, + { + "epoch": 2.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.6836051940917969, + "eval_runtime": 1.3646, + "eval_samples_per_second": 46.9, + "eval_steps_per_second": 5.863, + "step": 16 + }, + { + "epoch": 2.5, + "grad_norm": 0.4280204772949219, + "learning_rate": 1.3851902303015575e-05, + "loss": 0.6311, + "step": 20 + }, + { + "epoch": 3.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.6989364624023438, + "eval_runtime": 1.3707, + "eval_samples_per_second": 46.693, + "eval_steps_per_second": 5.837, + "step": 24 + } + ], + "logging_steps": 5, + "max_steps": 80, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 500, + "total_flos": 2642927091954144.0, + "train_batch_size": 24, + "trial_name": null, + "trial_params": { + "learning_rate": 1.662228276361869e-05, + "per_device_train_batch_size": 24 + } +} diff --git a/run-7/checkpoint-24/training_args.bin b/run-7/checkpoint-24/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..f3ed091de26d4ed124925a68c24799ee73779421 --- /dev/null +++ b/run-7/checkpoint-24/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d102bc23c5f21041ad6ac7c59aca37f3d7061b6d3ffbee596206ded94a9f33c +size 4920 diff --git a/run-7/checkpoint-32/config.json b/run-7/checkpoint-32/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e8a488f827577f57ac81420fa1b836ae1d5ceaeb --- /dev/null +++ b/run-7/checkpoint-32/config.json @@ -0,0 +1,80 @@ +{ + "_name_or_path": "ntu-spml/distilhubert", + "activation_dropout": 0.1, + "apply_spec_augment": false, + "architectures": [ + "HubertForSequenceClassification" + ], + "attention_dropout": 0.1, + "bos_token_id": 1, + "classifier_proj_size": 256, + "conv_bias": false, + "conv_dim": [ + 512, + 512, + 512, + 512, + 512, + 512, + 512 + ], + "conv_kernel": [ + 10, + 3, + 3, + 3, + 3, + 2, + 2 + ], + "conv_stride": [ + 5, + 2, + 2, + 2, + 2, + 2, + 2 + ], + "ctc_loss_reduction": "sum", + "ctc_zero_infinity": false, + "do_stable_layer_norm": false, + "eos_token_id": 2, + "feat_extract_activation": "gelu", + "feat_extract_norm": "group", + "feat_proj_dropout": 0.0, + "feat_proj_layer_norm": false, + "final_dropout": 0.0, + "hidden_act": "gelu", + "hidden_dropout": 0.1, + "hidden_size": 768, + "id2label": { + "0": "NOT_WORD", + "1": "WORD" + }, + "initializer_range": 0.02, + "intermediate_size": 3072, + "label2id": { + "NOT_WORD": "0", + "WORD": "1" + }, + "layer_norm_eps": 1e-05, + "layerdrop": 0.0, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "model_type": "hubert", + "num_attention_heads": 12, + "num_conv_pos_embedding_groups": 16, + "num_conv_pos_embeddings": 128, + "num_feat_extract_layers": 7, + "num_hidden_layers": 2, + "pad_token_id": 0, + "torch_dtype": "float32", + "transformers_version": "4.38.1", + "use_weighted_layer_sum": false, + "vocab_size": 32 +} diff --git a/run-7/checkpoint-32/model.safetensors b/run-7/checkpoint-32/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3bae9826eee5bd4b64ad75ffc1991d36da320132 --- /dev/null +++ b/run-7/checkpoint-32/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd4a4d6faab192d1b8fc17d9b67c9af965b10eeb31a717b99a95372e06325a2d +size 94763496 diff --git a/run-7/checkpoint-32/optimizer.pt b/run-7/checkpoint-32/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..63967f50860fe6025f47f4679dc37e95d638fe79 --- /dev/null +++ b/run-7/checkpoint-32/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b8d15d6890abb6aab203b42f813f5633c80fa2466266ac63b03cfd4473924b7 +size 189552570 diff --git a/run-7/checkpoint-32/preprocessor_config.json b/run-7/checkpoint-32/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..36ebe8b7c1cc967b3059f0494ae8a1069dd67655 --- /dev/null +++ b/run-7/checkpoint-32/preprocessor_config.json @@ -0,0 +1,9 @@ +{ + "do_normalize": true, + "feature_extractor_type": "Wav2Vec2FeatureExtractor", + "feature_size": 1, + "padding_side": "right", + "padding_value": 0, + "return_attention_mask": true, + "sampling_rate": 16000 +} diff --git a/run-7/checkpoint-32/rng_state.pth b/run-7/checkpoint-32/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..3b6be6f69fe6a35f8729aab39cf27523ee48a85a --- /dev/null +++ b/run-7/checkpoint-32/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:556c72cd347c734d7fdec24637de2c2dcd065c09bcdd940199a24e0091d021c0 +size 14244 diff --git a/run-7/checkpoint-32/scheduler.pt b/run-7/checkpoint-32/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..af5cdf94834bb049956971d47adda620ab216290 --- /dev/null +++ b/run-7/checkpoint-32/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ccd6f24409289d6db2f5f2bc9c836d5ed1214a397c11235a0d73ae19d28cfaae +size 1064 diff --git a/run-7/checkpoint-32/trainer_state.json b/run-7/checkpoint-32/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..f32c31a4932bc2c535ad339d1f42e3369e61db6e --- /dev/null +++ b/run-7/checkpoint-32/trainer_state.json @@ -0,0 +1,102 @@ +{ + "best_metric": 0.7326732673267327, + "best_model_checkpoint": "distilhubert-finetuned-not-a-word2/run-7/checkpoint-8", + "epoch": 4.0, + "eval_steps": 500, + "global_step": 32, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.62, + "grad_norm": 0.9305598735809326, + "learning_rate": 1.0388926727261681e-05, + "loss": 0.6973, + "step": 5 + }, + { + "epoch": 1.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.68402099609375, + "eval_runtime": 1.3601, + "eval_samples_per_second": 47.054, + "eval_steps_per_second": 5.882, + "step": 8 + }, + { + "epoch": 1.25, + "grad_norm": 1.470983624458313, + "learning_rate": 1.6160552686851503e-05, + "loss": 0.6748, + "step": 10 + }, + { + "epoch": 1.88, + "grad_norm": 0.3902422785758972, + "learning_rate": 1.5006227494933539e-05, + "loss": 0.6423, + "step": 15 + }, + { + "epoch": 2.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.6836051940917969, + "eval_runtime": 1.3646, + "eval_samples_per_second": 46.9, + "eval_steps_per_second": 5.863, + "step": 16 + }, + { + "epoch": 2.5, + "grad_norm": 0.4280204772949219, + "learning_rate": 1.3851902303015575e-05, + "loss": 0.6311, + "step": 20 + }, + { + "epoch": 3.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.6989364624023438, + "eval_runtime": 1.3707, + "eval_samples_per_second": 46.693, + "eval_steps_per_second": 5.837, + "step": 24 + }, + { + "epoch": 3.12, + "grad_norm": 0.3204731047153473, + "learning_rate": 1.269757711109761e-05, + "loss": 0.5902, + "step": 25 + }, + { + "epoch": 3.75, + "grad_norm": 0.26261427998542786, + "learning_rate": 1.1543251919179645e-05, + "loss": 0.5967, + "step": 30 + }, + { + "epoch": 4.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.7159881591796875, + "eval_runtime": 1.3893, + "eval_samples_per_second": 46.065, + "eval_steps_per_second": 5.758, + "step": 32 + } + ], + "logging_steps": 5, + "max_steps": 80, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 500, + "total_flos": 3845988443901696.0, + "train_batch_size": 24, + "trial_name": null, + "trial_params": { + "learning_rate": 1.662228276361869e-05, + "per_device_train_batch_size": 24 + } +} diff --git a/run-7/checkpoint-32/training_args.bin b/run-7/checkpoint-32/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..f3ed091de26d4ed124925a68c24799ee73779421 --- /dev/null +++ b/run-7/checkpoint-32/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d102bc23c5f21041ad6ac7c59aca37f3d7061b6d3ffbee596206ded94a9f33c +size 4920 diff --git a/run-7/checkpoint-40/config.json b/run-7/checkpoint-40/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e8a488f827577f57ac81420fa1b836ae1d5ceaeb --- /dev/null +++ b/run-7/checkpoint-40/config.json @@ -0,0 +1,80 @@ +{ + "_name_or_path": "ntu-spml/distilhubert", + "activation_dropout": 0.1, + "apply_spec_augment": false, + "architectures": [ + "HubertForSequenceClassification" + ], + "attention_dropout": 0.1, + "bos_token_id": 1, + "classifier_proj_size": 256, + "conv_bias": false, + "conv_dim": [ + 512, + 512, + 512, + 512, + 512, + 512, + 512 + ], + "conv_kernel": [ + 10, + 3, + 3, + 3, + 3, + 2, + 2 + ], + "conv_stride": [ + 5, + 2, + 2, + 2, + 2, + 2, + 2 + ], + "ctc_loss_reduction": "sum", + "ctc_zero_infinity": false, + "do_stable_layer_norm": false, + "eos_token_id": 2, + "feat_extract_activation": "gelu", + "feat_extract_norm": "group", + "feat_proj_dropout": 0.0, + "feat_proj_layer_norm": false, + "final_dropout": 0.0, + "hidden_act": "gelu", + "hidden_dropout": 0.1, + "hidden_size": 768, + "id2label": { + "0": "NOT_WORD", + "1": "WORD" + }, + "initializer_range": 0.02, + "intermediate_size": 3072, + "label2id": { + "NOT_WORD": "0", + "WORD": "1" + }, + "layer_norm_eps": 1e-05, + "layerdrop": 0.0, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "model_type": "hubert", + "num_attention_heads": 12, + "num_conv_pos_embedding_groups": 16, + "num_conv_pos_embeddings": 128, + "num_feat_extract_layers": 7, + "num_hidden_layers": 2, + "pad_token_id": 0, + "torch_dtype": "float32", + "transformers_version": "4.38.1", + "use_weighted_layer_sum": false, + "vocab_size": 32 +} diff --git a/run-7/checkpoint-40/model.safetensors b/run-7/checkpoint-40/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..10f2e7b1b7224ad052e307f1828ee6ca048e223f --- /dev/null +++ b/run-7/checkpoint-40/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f91cbeafc179cd8a00a66a633cffbe6469fc4cbca8038baa2d307a8c4bdfd34 +size 94763496 diff --git a/run-7/checkpoint-40/optimizer.pt b/run-7/checkpoint-40/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..e078fb86c9574e654e56b806840e26551eb703d9 --- /dev/null +++ b/run-7/checkpoint-40/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f9d44419b17f0a615aa7b2b7c9480759b353100742ee9604c0d98e9a0c05bcd +size 189552570 diff --git a/run-7/checkpoint-40/preprocessor_config.json b/run-7/checkpoint-40/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..36ebe8b7c1cc967b3059f0494ae8a1069dd67655 --- /dev/null +++ b/run-7/checkpoint-40/preprocessor_config.json @@ -0,0 +1,9 @@ +{ + "do_normalize": true, + "feature_extractor_type": "Wav2Vec2FeatureExtractor", + "feature_size": 1, + "padding_side": "right", + "padding_value": 0, + "return_attention_mask": true, + "sampling_rate": 16000 +} diff --git a/run-7/checkpoint-40/rng_state.pth b/run-7/checkpoint-40/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..bd74b6ba63e493014ddf15a58a2c7a7388efcaef --- /dev/null +++ b/run-7/checkpoint-40/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f04394838d097ad37a86e783ddc6557596b7da068eb72e2798acacf43f38924 +size 14244 diff --git a/run-7/checkpoint-40/scheduler.pt b/run-7/checkpoint-40/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..30c5f096eef6464133134af6c22f711d295f59ea --- /dev/null +++ b/run-7/checkpoint-40/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b7d374ab551d05f65adafbee84b368c814db764d8fcdc7da1fb12ac5dcd781b +size 1064 diff --git a/run-7/checkpoint-40/trainer_state.json b/run-7/checkpoint-40/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..9f824744650c65949d25f257ee2b2b21e3b1a441 --- /dev/null +++ b/run-7/checkpoint-40/trainer_state.json @@ -0,0 +1,125 @@ +{ + "best_metric": 0.7326732673267327, + "best_model_checkpoint": "distilhubert-finetuned-not-a-word2/run-7/checkpoint-8", + "epoch": 5.0, + "eval_steps": 500, + "global_step": 40, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.62, + "grad_norm": 0.9305598735809326, + "learning_rate": 1.0388926727261681e-05, + "loss": 0.6973, + "step": 5 + }, + { + "epoch": 1.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.68402099609375, + "eval_runtime": 1.3601, + "eval_samples_per_second": 47.054, + "eval_steps_per_second": 5.882, + "step": 8 + }, + { + "epoch": 1.25, + "grad_norm": 1.470983624458313, + "learning_rate": 1.6160552686851503e-05, + "loss": 0.6748, + "step": 10 + }, + { + "epoch": 1.88, + "grad_norm": 0.3902422785758972, + "learning_rate": 1.5006227494933539e-05, + "loss": 0.6423, + "step": 15 + }, + { + "epoch": 2.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.6836051940917969, + "eval_runtime": 1.3646, + "eval_samples_per_second": 46.9, + "eval_steps_per_second": 5.863, + "step": 16 + }, + { + "epoch": 2.5, + "grad_norm": 0.4280204772949219, + "learning_rate": 1.3851902303015575e-05, + "loss": 0.6311, + "step": 20 + }, + { + "epoch": 3.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.6989364624023438, + "eval_runtime": 1.3707, + "eval_samples_per_second": 46.693, + "eval_steps_per_second": 5.837, + "step": 24 + }, + { + "epoch": 3.12, + "grad_norm": 0.3204731047153473, + "learning_rate": 1.269757711109761e-05, + "loss": 0.5902, + "step": 25 + }, + { + "epoch": 3.75, + "grad_norm": 0.26261427998542786, + "learning_rate": 1.1543251919179645e-05, + "loss": 0.5967, + "step": 30 + }, + { + "epoch": 4.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.7159881591796875, + "eval_runtime": 1.3893, + "eval_samples_per_second": 46.065, + "eval_steps_per_second": 5.758, + "step": 32 + }, + { + "epoch": 4.38, + "grad_norm": 0.611359179019928, + "learning_rate": 1.0388926727261681e-05, + "loss": 0.6029, + "step": 35 + }, + { + "epoch": 5.0, + "grad_norm": 0.3782584071159363, + "learning_rate": 9.234601535343717e-06, + "loss": 0.5986, + "step": 40 + }, + { + "epoch": 5.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.7192001342773438, + "eval_runtime": 1.3984, + "eval_samples_per_second": 45.766, + "eval_steps_per_second": 5.721, + "step": 40 + } + ], + "logging_steps": 5, + "max_steps": 80, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 500, + "total_flos": 5160946137408096.0, + "train_batch_size": 24, + "trial_name": null, + "trial_params": { + "learning_rate": 1.662228276361869e-05, + "per_device_train_batch_size": 24 + } +} diff --git a/run-7/checkpoint-40/training_args.bin b/run-7/checkpoint-40/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..f3ed091de26d4ed124925a68c24799ee73779421 --- /dev/null +++ b/run-7/checkpoint-40/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d102bc23c5f21041ad6ac7c59aca37f3d7061b6d3ffbee596206ded94a9f33c +size 4920 diff --git a/run-7/checkpoint-48/config.json b/run-7/checkpoint-48/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e8a488f827577f57ac81420fa1b836ae1d5ceaeb --- /dev/null +++ b/run-7/checkpoint-48/config.json @@ -0,0 +1,80 @@ +{ + "_name_or_path": "ntu-spml/distilhubert", + "activation_dropout": 0.1, + "apply_spec_augment": false, + "architectures": [ + "HubertForSequenceClassification" + ], + "attention_dropout": 0.1, + "bos_token_id": 1, + "classifier_proj_size": 256, + "conv_bias": false, + "conv_dim": [ + 512, + 512, + 512, + 512, + 512, + 512, + 512 + ], + "conv_kernel": [ + 10, + 3, + 3, + 3, + 3, + 2, + 2 + ], + "conv_stride": [ + 5, + 2, + 2, + 2, + 2, + 2, + 2 + ], + "ctc_loss_reduction": "sum", + "ctc_zero_infinity": false, + "do_stable_layer_norm": false, + "eos_token_id": 2, + "feat_extract_activation": "gelu", + "feat_extract_norm": "group", + "feat_proj_dropout": 0.0, + "feat_proj_layer_norm": false, + "final_dropout": 0.0, + "hidden_act": "gelu", + "hidden_dropout": 0.1, + "hidden_size": 768, + "id2label": { + "0": "NOT_WORD", + "1": "WORD" + }, + "initializer_range": 0.02, + "intermediate_size": 3072, + "label2id": { + "NOT_WORD": "0", + "WORD": "1" + }, + "layer_norm_eps": 1e-05, + "layerdrop": 0.0, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "model_type": "hubert", + "num_attention_heads": 12, + "num_conv_pos_embedding_groups": 16, + "num_conv_pos_embeddings": 128, + "num_feat_extract_layers": 7, + "num_hidden_layers": 2, + "pad_token_id": 0, + "torch_dtype": "float32", + "transformers_version": "4.38.1", + "use_weighted_layer_sum": false, + "vocab_size": 32 +} diff --git a/run-7/checkpoint-48/model.safetensors b/run-7/checkpoint-48/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ff8d3711e2db3be079ae516e2c185e04c6f66b80 --- /dev/null +++ b/run-7/checkpoint-48/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b16b4c6dbe8ae9ea509fee2c574b68aec11825cc46212ec22995a151be26fd01 +size 94763496 diff --git a/run-7/checkpoint-48/optimizer.pt b/run-7/checkpoint-48/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..c8c0c1de5ed98060a6ed5d92acb2db9ae50f2025 --- /dev/null +++ b/run-7/checkpoint-48/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3fcd6303ef1dfd39ebec1392a1fc954b1998c81c69c57de3963fcda6ef91cd9 +size 189552570 diff --git a/run-7/checkpoint-48/preprocessor_config.json b/run-7/checkpoint-48/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..36ebe8b7c1cc967b3059f0494ae8a1069dd67655 --- /dev/null +++ b/run-7/checkpoint-48/preprocessor_config.json @@ -0,0 +1,9 @@ +{ + "do_normalize": true, + "feature_extractor_type": "Wav2Vec2FeatureExtractor", + "feature_size": 1, + "padding_side": "right", + "padding_value": 0, + "return_attention_mask": true, + "sampling_rate": 16000 +} diff --git a/run-7/checkpoint-48/rng_state.pth b/run-7/checkpoint-48/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..f14c60940aa7f4644eedab3c03294336305a5c97 --- /dev/null +++ b/run-7/checkpoint-48/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b8eb236be6c806fe9309f674bf7e81272faf7f91242ad8752235487116506f5f +size 14244 diff --git a/run-7/checkpoint-48/scheduler.pt b/run-7/checkpoint-48/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..efa58121aca565bc643a729204e0583dfeee47f1 --- /dev/null +++ b/run-7/checkpoint-48/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d38154d1c0e6a32715713a56943fcfb9ef06395c7cb651922ea92b53bd230c99 +size 1064 diff --git a/run-7/checkpoint-48/trainer_state.json b/run-7/checkpoint-48/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..c5f79459186ec47c43a1e6cc424355c73c8921b4 --- /dev/null +++ b/run-7/checkpoint-48/trainer_state.json @@ -0,0 +1,141 @@ +{ + "best_metric": 0.7326732673267327, + "best_model_checkpoint": "distilhubert-finetuned-not-a-word2/run-7/checkpoint-8", + "epoch": 6.0, + "eval_steps": 500, + "global_step": 48, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.62, + "grad_norm": 0.9305598735809326, + "learning_rate": 1.0388926727261681e-05, + "loss": 0.6973, + "step": 5 + }, + { + "epoch": 1.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.68402099609375, + "eval_runtime": 1.3601, + "eval_samples_per_second": 47.054, + "eval_steps_per_second": 5.882, + "step": 8 + }, + { + "epoch": 1.25, + "grad_norm": 1.470983624458313, + "learning_rate": 1.6160552686851503e-05, + "loss": 0.6748, + "step": 10 + }, + { + "epoch": 1.88, + "grad_norm": 0.3902422785758972, + "learning_rate": 1.5006227494933539e-05, + "loss": 0.6423, + "step": 15 + }, + { + "epoch": 2.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.6836051940917969, + "eval_runtime": 1.3646, + "eval_samples_per_second": 46.9, + "eval_steps_per_second": 5.863, + "step": 16 + }, + { + "epoch": 2.5, + "grad_norm": 0.4280204772949219, + "learning_rate": 1.3851902303015575e-05, + "loss": 0.6311, + "step": 20 + }, + { + "epoch": 3.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.6989364624023438, + "eval_runtime": 1.3707, + "eval_samples_per_second": 46.693, + "eval_steps_per_second": 5.837, + "step": 24 + }, + { + "epoch": 3.12, + "grad_norm": 0.3204731047153473, + "learning_rate": 1.269757711109761e-05, + "loss": 0.5902, + "step": 25 + }, + { + "epoch": 3.75, + "grad_norm": 0.26261427998542786, + "learning_rate": 1.1543251919179645e-05, + "loss": 0.5967, + "step": 30 + }, + { + "epoch": 4.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.7159881591796875, + "eval_runtime": 1.3893, + "eval_samples_per_second": 46.065, + "eval_steps_per_second": 5.758, + "step": 32 + }, + { + "epoch": 4.38, + "grad_norm": 0.611359179019928, + "learning_rate": 1.0388926727261681e-05, + "loss": 0.6029, + "step": 35 + }, + { + "epoch": 5.0, + "grad_norm": 0.3782584071159363, + "learning_rate": 9.234601535343717e-06, + "loss": 0.5986, + "step": 40 + }, + { + "epoch": 5.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.7192001342773438, + "eval_runtime": 1.3984, + "eval_samples_per_second": 45.766, + "eval_steps_per_second": 5.721, + "step": 40 + }, + { + "epoch": 5.62, + "grad_norm": 0.8225945234298706, + "learning_rate": 8.080276343425752e-06, + "loss": 0.6027, + "step": 45 + }, + { + "epoch": 6.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.7113037109375, + "eval_runtime": 1.3638, + "eval_samples_per_second": 46.927, + "eval_steps_per_second": 5.866, + "step": 48 + } + ], + "logging_steps": 5, + "max_steps": 80, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 500, + "total_flos": 5785446571807776.0, + "train_batch_size": 24, + "trial_name": null, + "trial_params": { + "learning_rate": 1.662228276361869e-05, + "per_device_train_batch_size": 24 + } +} diff --git a/run-7/checkpoint-48/training_args.bin b/run-7/checkpoint-48/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..f3ed091de26d4ed124925a68c24799ee73779421 --- /dev/null +++ b/run-7/checkpoint-48/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d102bc23c5f21041ad6ac7c59aca37f3d7061b6d3ffbee596206ded94a9f33c +size 4920 diff --git a/run-7/checkpoint-56/config.json b/run-7/checkpoint-56/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e8a488f827577f57ac81420fa1b836ae1d5ceaeb --- /dev/null +++ b/run-7/checkpoint-56/config.json @@ -0,0 +1,80 @@ +{ + "_name_or_path": "ntu-spml/distilhubert", + "activation_dropout": 0.1, + "apply_spec_augment": false, + "architectures": [ + "HubertForSequenceClassification" + ], + "attention_dropout": 0.1, + "bos_token_id": 1, + "classifier_proj_size": 256, + "conv_bias": false, + "conv_dim": [ + 512, + 512, + 512, + 512, + 512, + 512, + 512 + ], + "conv_kernel": [ + 10, + 3, + 3, + 3, + 3, + 2, + 2 + ], + "conv_stride": [ + 5, + 2, + 2, + 2, + 2, + 2, + 2 + ], + "ctc_loss_reduction": "sum", + "ctc_zero_infinity": false, + "do_stable_layer_norm": false, + "eos_token_id": 2, + "feat_extract_activation": "gelu", + "feat_extract_norm": "group", + "feat_proj_dropout": 0.0, + "feat_proj_layer_norm": false, + "final_dropout": 0.0, + "hidden_act": "gelu", + "hidden_dropout": 0.1, + "hidden_size": 768, + "id2label": { + "0": "NOT_WORD", + "1": "WORD" + }, + "initializer_range": 0.02, + "intermediate_size": 3072, + "label2id": { + "NOT_WORD": "0", + "WORD": "1" + }, + "layer_norm_eps": 1e-05, + "layerdrop": 0.0, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "model_type": "hubert", + "num_attention_heads": 12, + "num_conv_pos_embedding_groups": 16, + "num_conv_pos_embeddings": 128, + "num_feat_extract_layers": 7, + "num_hidden_layers": 2, + "pad_token_id": 0, + "torch_dtype": "float32", + "transformers_version": "4.38.1", + "use_weighted_layer_sum": false, + "vocab_size": 32 +} diff --git a/run-7/checkpoint-56/model.safetensors b/run-7/checkpoint-56/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..95412795b16ceba9c5a76ed8503f9e9be4d1e0d2 --- /dev/null +++ b/run-7/checkpoint-56/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07c6ab88268dc194515b45e45069e0fcb46eba8d3756b250a684302d4f4fd01d +size 94763496 diff --git a/run-7/checkpoint-56/optimizer.pt b/run-7/checkpoint-56/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..e56065afca0218dbaacd4639df95582069b5c614 --- /dev/null +++ b/run-7/checkpoint-56/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:31f317a82c0c3b2dfad12ebb17952bce3013fcec7e9aef12a47769825535a9c6 +size 189552570 diff --git a/run-7/checkpoint-56/preprocessor_config.json b/run-7/checkpoint-56/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..36ebe8b7c1cc967b3059f0494ae8a1069dd67655 --- /dev/null +++ b/run-7/checkpoint-56/preprocessor_config.json @@ -0,0 +1,9 @@ +{ + "do_normalize": true, + "feature_extractor_type": "Wav2Vec2FeatureExtractor", + "feature_size": 1, + "padding_side": "right", + "padding_value": 0, + "return_attention_mask": true, + "sampling_rate": 16000 +} diff --git a/run-7/checkpoint-56/rng_state.pth b/run-7/checkpoint-56/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..2c2c442646ebb4dfc9d3299de99caf85648452de --- /dev/null +++ b/run-7/checkpoint-56/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:30f64c34451dd549a3dd22bd33aa2eb5ae76a449e28e339f69de985a5a36616a +size 14244 diff --git a/run-7/checkpoint-56/scheduler.pt b/run-7/checkpoint-56/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..a2443d63cbfce5e43cdb4b6fa728b45d3f01ca2b --- /dev/null +++ b/run-7/checkpoint-56/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e995eda7dbc93a2783acfebe4d812299d35f11f95c29229d31ded227dc053391 +size 1064 diff --git a/run-7/checkpoint-56/trainer_state.json b/run-7/checkpoint-56/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..8be8d16f905c936fa276d3b1f625a8e0dae5b096 --- /dev/null +++ b/run-7/checkpoint-56/trainer_state.json @@ -0,0 +1,164 @@ +{ + "best_metric": 0.7326732673267327, + "best_model_checkpoint": "distilhubert-finetuned-not-a-word2/run-7/checkpoint-8", + "epoch": 7.0, + "eval_steps": 500, + "global_step": 56, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.62, + "grad_norm": 0.9305598735809326, + "learning_rate": 1.0388926727261681e-05, + "loss": 0.6973, + "step": 5 + }, + { + "epoch": 1.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.68402099609375, + "eval_runtime": 1.3601, + "eval_samples_per_second": 47.054, + "eval_steps_per_second": 5.882, + "step": 8 + }, + { + "epoch": 1.25, + "grad_norm": 1.470983624458313, + "learning_rate": 1.6160552686851503e-05, + "loss": 0.6748, + "step": 10 + }, + { + "epoch": 1.88, + "grad_norm": 0.3902422785758972, + "learning_rate": 1.5006227494933539e-05, + "loss": 0.6423, + "step": 15 + }, + { + "epoch": 2.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.6836051940917969, + "eval_runtime": 1.3646, + "eval_samples_per_second": 46.9, + "eval_steps_per_second": 5.863, + "step": 16 + }, + { + "epoch": 2.5, + "grad_norm": 0.4280204772949219, + "learning_rate": 1.3851902303015575e-05, + "loss": 0.6311, + "step": 20 + }, + { + "epoch": 3.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.6989364624023438, + "eval_runtime": 1.3707, + "eval_samples_per_second": 46.693, + "eval_steps_per_second": 5.837, + "step": 24 + }, + { + "epoch": 3.12, + "grad_norm": 0.3204731047153473, + "learning_rate": 1.269757711109761e-05, + "loss": 0.5902, + "step": 25 + }, + { + "epoch": 3.75, + "grad_norm": 0.26261427998542786, + "learning_rate": 1.1543251919179645e-05, + "loss": 0.5967, + "step": 30 + }, + { + "epoch": 4.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.7159881591796875, + "eval_runtime": 1.3893, + "eval_samples_per_second": 46.065, + "eval_steps_per_second": 5.758, + "step": 32 + }, + { + "epoch": 4.38, + "grad_norm": 0.611359179019928, + "learning_rate": 1.0388926727261681e-05, + "loss": 0.6029, + "step": 35 + }, + { + "epoch": 5.0, + "grad_norm": 0.3782584071159363, + "learning_rate": 9.234601535343717e-06, + "loss": 0.5986, + "step": 40 + }, + { + "epoch": 5.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.7192001342773438, + "eval_runtime": 1.3984, + "eval_samples_per_second": 45.766, + "eval_steps_per_second": 5.721, + "step": 40 + }, + { + "epoch": 5.62, + "grad_norm": 0.8225945234298706, + "learning_rate": 8.080276343425752e-06, + "loss": 0.6027, + "step": 45 + }, + { + "epoch": 6.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.7113037109375, + "eval_runtime": 1.3638, + "eval_samples_per_second": 46.927, + "eval_steps_per_second": 5.866, + "step": 48 + }, + { + "epoch": 6.25, + "grad_norm": 0.4112108051776886, + "learning_rate": 6.9259511515077876e-06, + "loss": 0.5633, + "step": 50 + }, + { + "epoch": 6.88, + "grad_norm": 0.43113499879837036, + "learning_rate": 5.771625959589823e-06, + "loss": 0.6274, + "step": 55 + }, + { + "epoch": 7.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.7047462463378906, + "eval_runtime": 1.3741, + "eval_samples_per_second": 46.577, + "eval_steps_per_second": 5.822, + "step": 56 + } + ], + "logging_steps": 5, + "max_steps": 80, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 500, + "total_flos": 7063357918465536.0, + "train_batch_size": 24, + "trial_name": null, + "trial_params": { + "learning_rate": 1.662228276361869e-05, + "per_device_train_batch_size": 24 + } +} diff --git a/run-7/checkpoint-56/training_args.bin b/run-7/checkpoint-56/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..f3ed091de26d4ed124925a68c24799ee73779421 --- /dev/null +++ b/run-7/checkpoint-56/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d102bc23c5f21041ad6ac7c59aca37f3d7061b6d3ffbee596206ded94a9f33c +size 4920 diff --git a/run-7/checkpoint-64/config.json b/run-7/checkpoint-64/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e8a488f827577f57ac81420fa1b836ae1d5ceaeb --- /dev/null +++ b/run-7/checkpoint-64/config.json @@ -0,0 +1,80 @@ +{ + "_name_or_path": "ntu-spml/distilhubert", + "activation_dropout": 0.1, + "apply_spec_augment": false, + "architectures": [ + "HubertForSequenceClassification" + ], + "attention_dropout": 0.1, + "bos_token_id": 1, + "classifier_proj_size": 256, + "conv_bias": false, + "conv_dim": [ + 512, + 512, + 512, + 512, + 512, + 512, + 512 + ], + "conv_kernel": [ + 10, + 3, + 3, + 3, + 3, + 2, + 2 + ], + "conv_stride": [ + 5, + 2, + 2, + 2, + 2, + 2, + 2 + ], + "ctc_loss_reduction": "sum", + "ctc_zero_infinity": false, + "do_stable_layer_norm": false, + "eos_token_id": 2, + "feat_extract_activation": "gelu", + "feat_extract_norm": "group", + "feat_proj_dropout": 0.0, + "feat_proj_layer_norm": false, + "final_dropout": 0.0, + "hidden_act": "gelu", + "hidden_dropout": 0.1, + "hidden_size": 768, + "id2label": { + "0": "NOT_WORD", + "1": "WORD" + }, + "initializer_range": 0.02, + "intermediate_size": 3072, + "label2id": { + "NOT_WORD": "0", + "WORD": "1" + }, + "layer_norm_eps": 1e-05, + "layerdrop": 0.0, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "model_type": "hubert", + "num_attention_heads": 12, + "num_conv_pos_embedding_groups": 16, + "num_conv_pos_embeddings": 128, + "num_feat_extract_layers": 7, + "num_hidden_layers": 2, + "pad_token_id": 0, + "torch_dtype": "float32", + "transformers_version": "4.38.1", + "use_weighted_layer_sum": false, + "vocab_size": 32 +} diff --git a/run-7/checkpoint-64/model.safetensors b/run-7/checkpoint-64/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0622a923ac105b52d0d7aebabc39a3a79b30f598 --- /dev/null +++ b/run-7/checkpoint-64/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:19e97c5bd4bc46e6ae059d65c13c7c9588ed85aa2f814f34f20000f3cb5b6038 +size 94763496 diff --git a/run-7/checkpoint-64/optimizer.pt b/run-7/checkpoint-64/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..3b52be44423a6190531723d45001e152a8deeb23 --- /dev/null +++ b/run-7/checkpoint-64/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3bb5bf45022cf0d15509145b22cd06713213a86b51404b2015b9f7395404c5cc +size 189552570 diff --git a/run-7/checkpoint-64/preprocessor_config.json b/run-7/checkpoint-64/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..36ebe8b7c1cc967b3059f0494ae8a1069dd67655 --- /dev/null +++ b/run-7/checkpoint-64/preprocessor_config.json @@ -0,0 +1,9 @@ +{ + "do_normalize": true, + "feature_extractor_type": "Wav2Vec2FeatureExtractor", + "feature_size": 1, + "padding_side": "right", + "padding_value": 0, + "return_attention_mask": true, + "sampling_rate": 16000 +} diff --git a/run-7/checkpoint-64/rng_state.pth b/run-7/checkpoint-64/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..6aac58eb9db9f0c698caef7e9a1d2ad03bccfd28 --- /dev/null +++ b/run-7/checkpoint-64/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f1b673e22294536a7957bb7890d05b112f64cb515eb9ce49dc217b8d30f7372c +size 14244 diff --git a/run-7/checkpoint-64/scheduler.pt b/run-7/checkpoint-64/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..d7b0fd81b4a93239d21838f876e6fadefc71ee3f --- /dev/null +++ b/run-7/checkpoint-64/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76e2a687a3be5d60bc45d96e2b82b1cbd3c15175a5d42c6dea58af94a32e1579 +size 1064 diff --git a/run-7/checkpoint-64/trainer_state.json b/run-7/checkpoint-64/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..3e2ea6fb3bb9223fedb0608b26a67ddb104340e5 --- /dev/null +++ b/run-7/checkpoint-64/trainer_state.json @@ -0,0 +1,180 @@ +{ + "best_metric": 0.7326732673267327, + "best_model_checkpoint": "distilhubert-finetuned-not-a-word2/run-7/checkpoint-8", + "epoch": 8.0, + "eval_steps": 500, + "global_step": 64, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.62, + "grad_norm": 0.9305598735809326, + "learning_rate": 1.0388926727261681e-05, + "loss": 0.6973, + "step": 5 + }, + { + "epoch": 1.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.68402099609375, + "eval_runtime": 1.3601, + "eval_samples_per_second": 47.054, + "eval_steps_per_second": 5.882, + "step": 8 + }, + { + "epoch": 1.25, + "grad_norm": 1.470983624458313, + "learning_rate": 1.6160552686851503e-05, + "loss": 0.6748, + "step": 10 + }, + { + "epoch": 1.88, + "grad_norm": 0.3902422785758972, + "learning_rate": 1.5006227494933539e-05, + "loss": 0.6423, + "step": 15 + }, + { + "epoch": 2.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.6836051940917969, + "eval_runtime": 1.3646, + "eval_samples_per_second": 46.9, + "eval_steps_per_second": 5.863, + "step": 16 + }, + { + "epoch": 2.5, + "grad_norm": 0.4280204772949219, + "learning_rate": 1.3851902303015575e-05, + "loss": 0.6311, + "step": 20 + }, + { + "epoch": 3.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.6989364624023438, + "eval_runtime": 1.3707, + "eval_samples_per_second": 46.693, + "eval_steps_per_second": 5.837, + "step": 24 + }, + { + "epoch": 3.12, + "grad_norm": 0.3204731047153473, + "learning_rate": 1.269757711109761e-05, + "loss": 0.5902, + "step": 25 + }, + { + "epoch": 3.75, + "grad_norm": 0.26261427998542786, + "learning_rate": 1.1543251919179645e-05, + "loss": 0.5967, + "step": 30 + }, + { + "epoch": 4.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.7159881591796875, + "eval_runtime": 1.3893, + "eval_samples_per_second": 46.065, + "eval_steps_per_second": 5.758, + "step": 32 + }, + { + "epoch": 4.38, + "grad_norm": 0.611359179019928, + "learning_rate": 1.0388926727261681e-05, + "loss": 0.6029, + "step": 35 + }, + { + "epoch": 5.0, + "grad_norm": 0.3782584071159363, + "learning_rate": 9.234601535343717e-06, + "loss": 0.5986, + "step": 40 + }, + { + "epoch": 5.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.7192001342773438, + "eval_runtime": 1.3984, + "eval_samples_per_second": 45.766, + "eval_steps_per_second": 5.721, + "step": 40 + }, + { + "epoch": 5.62, + "grad_norm": 0.8225945234298706, + "learning_rate": 8.080276343425752e-06, + "loss": 0.6027, + "step": 45 + }, + { + "epoch": 6.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.7113037109375, + "eval_runtime": 1.3638, + "eval_samples_per_second": 46.927, + "eval_steps_per_second": 5.866, + "step": 48 + }, + { + "epoch": 6.25, + "grad_norm": 0.4112108051776886, + "learning_rate": 6.9259511515077876e-06, + "loss": 0.5633, + "step": 50 + }, + { + "epoch": 6.88, + "grad_norm": 0.43113499879837036, + "learning_rate": 5.771625959589823e-06, + "loss": 0.6274, + "step": 55 + }, + { + "epoch": 7.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.7047462463378906, + "eval_runtime": 1.3741, + "eval_samples_per_second": 46.577, + "eval_steps_per_second": 5.822, + "step": 56 + }, + { + "epoch": 7.5, + "grad_norm": 0.7747747302055359, + "learning_rate": 4.617300767671859e-06, + "loss": 0.5591, + "step": 60 + }, + { + "epoch": 8.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.70208740234375, + "eval_runtime": 1.3676, + "eval_samples_per_second": 46.799, + "eval_steps_per_second": 5.85, + "step": 64 + } + ], + "logging_steps": 5, + "max_steps": 80, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 500, + "total_flos": 7684013856281568.0, + "train_batch_size": 24, + "trial_name": null, + "trial_params": { + "learning_rate": 1.662228276361869e-05, + "per_device_train_batch_size": 24 + } +} diff --git a/run-7/checkpoint-64/training_args.bin b/run-7/checkpoint-64/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..f3ed091de26d4ed124925a68c24799ee73779421 --- /dev/null +++ b/run-7/checkpoint-64/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d102bc23c5f21041ad6ac7c59aca37f3d7061b6d3ffbee596206ded94a9f33c +size 4920 diff --git a/run-7/checkpoint-72/config.json b/run-7/checkpoint-72/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e8a488f827577f57ac81420fa1b836ae1d5ceaeb --- /dev/null +++ b/run-7/checkpoint-72/config.json @@ -0,0 +1,80 @@ +{ + "_name_or_path": "ntu-spml/distilhubert", + "activation_dropout": 0.1, + "apply_spec_augment": false, + "architectures": [ + "HubertForSequenceClassification" + ], + "attention_dropout": 0.1, + "bos_token_id": 1, + "classifier_proj_size": 256, + "conv_bias": false, + "conv_dim": [ + 512, + 512, + 512, + 512, + 512, + 512, + 512 + ], + "conv_kernel": [ + 10, + 3, + 3, + 3, + 3, + 2, + 2 + ], + "conv_stride": [ + 5, + 2, + 2, + 2, + 2, + 2, + 2 + ], + "ctc_loss_reduction": "sum", + "ctc_zero_infinity": false, + "do_stable_layer_norm": false, + "eos_token_id": 2, + "feat_extract_activation": "gelu", + "feat_extract_norm": "group", + "feat_proj_dropout": 0.0, + "feat_proj_layer_norm": false, + "final_dropout": 0.0, + "hidden_act": "gelu", + "hidden_dropout": 0.1, + "hidden_size": 768, + "id2label": { + "0": "NOT_WORD", + "1": "WORD" + }, + "initializer_range": 0.02, + "intermediate_size": 3072, + "label2id": { + "NOT_WORD": "0", + "WORD": "1" + }, + "layer_norm_eps": 1e-05, + "layerdrop": 0.0, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "model_type": "hubert", + "num_attention_heads": 12, + "num_conv_pos_embedding_groups": 16, + "num_conv_pos_embeddings": 128, + "num_feat_extract_layers": 7, + "num_hidden_layers": 2, + "pad_token_id": 0, + "torch_dtype": "float32", + "transformers_version": "4.38.1", + "use_weighted_layer_sum": false, + "vocab_size": 32 +} diff --git a/run-7/checkpoint-72/model.safetensors b/run-7/checkpoint-72/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6cc49a8cfaef7b77679676b46f257cf3facb9d8e --- /dev/null +++ b/run-7/checkpoint-72/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d63de214cb63e787b9c250d97b4a4863eafa217bcb19687be7893a5c5b87b93 +size 94763496 diff --git a/run-7/checkpoint-72/optimizer.pt b/run-7/checkpoint-72/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..ce82a4b844bada1276c4d84848f51ed4a5d3cc09 --- /dev/null +++ b/run-7/checkpoint-72/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04461b4044d820352eb1ed6ede14e6791035be8794ea3773ae6580a1c6426d42 +size 189552570 diff --git a/run-7/checkpoint-72/preprocessor_config.json b/run-7/checkpoint-72/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..36ebe8b7c1cc967b3059f0494ae8a1069dd67655 --- /dev/null +++ b/run-7/checkpoint-72/preprocessor_config.json @@ -0,0 +1,9 @@ +{ + "do_normalize": true, + "feature_extractor_type": "Wav2Vec2FeatureExtractor", + "feature_size": 1, + "padding_side": "right", + "padding_value": 0, + "return_attention_mask": true, + "sampling_rate": 16000 +} diff --git a/run-7/checkpoint-72/rng_state.pth b/run-7/checkpoint-72/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..9abbaf7ba6e3b9a7bd68b6e24e4c92bbf3cea3a1 --- /dev/null +++ b/run-7/checkpoint-72/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12e22727a737d5214e5e4bb5d2c4d6b5bc51d41532847072e78e71dd1fe79e85 +size 14244 diff --git a/run-7/checkpoint-72/scheduler.pt b/run-7/checkpoint-72/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..1889655195f7f59435e6c39202646a89b9d604cf --- /dev/null +++ b/run-7/checkpoint-72/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da72eb1d0d7712d7fefb634df37dd842202156ced77a27ea9b22b6b5a5ea6ffc +size 1064 diff --git a/run-7/checkpoint-72/trainer_state.json b/run-7/checkpoint-72/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..aa61aec3a0973f700d4ac5c9f26dddab258e9939 --- /dev/null +++ b/run-7/checkpoint-72/trainer_state.json @@ -0,0 +1,203 @@ +{ + "best_metric": 0.7326732673267327, + "best_model_checkpoint": "distilhubert-finetuned-not-a-word2/run-7/checkpoint-8", + "epoch": 9.0, + "eval_steps": 500, + "global_step": 72, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.62, + "grad_norm": 0.9305598735809326, + "learning_rate": 1.0388926727261681e-05, + "loss": 0.6973, + "step": 5 + }, + { + "epoch": 1.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.68402099609375, + "eval_runtime": 1.3601, + "eval_samples_per_second": 47.054, + "eval_steps_per_second": 5.882, + "step": 8 + }, + { + "epoch": 1.25, + "grad_norm": 1.470983624458313, + "learning_rate": 1.6160552686851503e-05, + "loss": 0.6748, + "step": 10 + }, + { + "epoch": 1.88, + "grad_norm": 0.3902422785758972, + "learning_rate": 1.5006227494933539e-05, + "loss": 0.6423, + "step": 15 + }, + { + "epoch": 2.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.6836051940917969, + "eval_runtime": 1.3646, + "eval_samples_per_second": 46.9, + "eval_steps_per_second": 5.863, + "step": 16 + }, + { + "epoch": 2.5, + "grad_norm": 0.4280204772949219, + "learning_rate": 1.3851902303015575e-05, + "loss": 0.6311, + "step": 20 + }, + { + "epoch": 3.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.6989364624023438, + "eval_runtime": 1.3707, + "eval_samples_per_second": 46.693, + "eval_steps_per_second": 5.837, + "step": 24 + }, + { + "epoch": 3.12, + "grad_norm": 0.3204731047153473, + "learning_rate": 1.269757711109761e-05, + "loss": 0.5902, + "step": 25 + }, + { + "epoch": 3.75, + "grad_norm": 0.26261427998542786, + "learning_rate": 1.1543251919179645e-05, + "loss": 0.5967, + "step": 30 + }, + { + "epoch": 4.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.7159881591796875, + "eval_runtime": 1.3893, + "eval_samples_per_second": 46.065, + "eval_steps_per_second": 5.758, + "step": 32 + }, + { + "epoch": 4.38, + "grad_norm": 0.611359179019928, + "learning_rate": 1.0388926727261681e-05, + "loss": 0.6029, + "step": 35 + }, + { + "epoch": 5.0, + "grad_norm": 0.3782584071159363, + "learning_rate": 9.234601535343717e-06, + "loss": 0.5986, + "step": 40 + }, + { + "epoch": 5.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.7192001342773438, + "eval_runtime": 1.3984, + "eval_samples_per_second": 45.766, + "eval_steps_per_second": 5.721, + "step": 40 + }, + { + "epoch": 5.62, + "grad_norm": 0.8225945234298706, + "learning_rate": 8.080276343425752e-06, + "loss": 0.6027, + "step": 45 + }, + { + "epoch": 6.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.7113037109375, + "eval_runtime": 1.3638, + "eval_samples_per_second": 46.927, + "eval_steps_per_second": 5.866, + "step": 48 + }, + { + "epoch": 6.25, + "grad_norm": 0.4112108051776886, + "learning_rate": 6.9259511515077876e-06, + "loss": 0.5633, + "step": 50 + }, + { + "epoch": 6.88, + "grad_norm": 0.43113499879837036, + "learning_rate": 5.771625959589823e-06, + "loss": 0.6274, + "step": 55 + }, + { + "epoch": 7.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.7047462463378906, + "eval_runtime": 1.3741, + "eval_samples_per_second": 46.577, + "eval_steps_per_second": 5.822, + "step": 56 + }, + { + "epoch": 7.5, + "grad_norm": 0.7747747302055359, + "learning_rate": 4.617300767671859e-06, + "loss": 0.5591, + "step": 60 + }, + { + "epoch": 8.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.70208740234375, + "eval_runtime": 1.3676, + "eval_samples_per_second": 46.799, + "eval_steps_per_second": 5.85, + "step": 64 + }, + { + "epoch": 8.12, + "grad_norm": 0.4591865837574005, + "learning_rate": 3.4629755757538938e-06, + "loss": 0.5763, + "step": 65 + }, + { + "epoch": 8.75, + "grad_norm": 0.6979279518127441, + "learning_rate": 2.3086503838359293e-06, + "loss": 0.5406, + "step": 70 + }, + { + "epoch": 9.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.7020683288574219, + "eval_runtime": 1.37, + "eval_samples_per_second": 46.716, + "eval_steps_per_second": 5.84, + "step": 72 + } + ], + "logging_steps": 5, + "max_steps": 80, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 500, + "total_flos": 8900730505322592.0, + "train_batch_size": 24, + "trial_name": null, + "trial_params": { + "learning_rate": 1.662228276361869e-05, + "per_device_train_batch_size": 24 + } +} diff --git a/run-7/checkpoint-72/training_args.bin b/run-7/checkpoint-72/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..f3ed091de26d4ed124925a68c24799ee73779421 --- /dev/null +++ b/run-7/checkpoint-72/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d102bc23c5f21041ad6ac7c59aca37f3d7061b6d3ffbee596206ded94a9f33c +size 4920 diff --git a/run-7/checkpoint-8/config.json b/run-7/checkpoint-8/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e8a488f827577f57ac81420fa1b836ae1d5ceaeb --- /dev/null +++ b/run-7/checkpoint-8/config.json @@ -0,0 +1,80 @@ +{ + "_name_or_path": "ntu-spml/distilhubert", + "activation_dropout": 0.1, + "apply_spec_augment": false, + "architectures": [ + "HubertForSequenceClassification" + ], + "attention_dropout": 0.1, + "bos_token_id": 1, + "classifier_proj_size": 256, + "conv_bias": false, + "conv_dim": [ + 512, + 512, + 512, + 512, + 512, + 512, + 512 + ], + "conv_kernel": [ + 10, + 3, + 3, + 3, + 3, + 2, + 2 + ], + "conv_stride": [ + 5, + 2, + 2, + 2, + 2, + 2, + 2 + ], + "ctc_loss_reduction": "sum", + "ctc_zero_infinity": false, + "do_stable_layer_norm": false, + "eos_token_id": 2, + "feat_extract_activation": "gelu", + "feat_extract_norm": "group", + "feat_proj_dropout": 0.0, + "feat_proj_layer_norm": false, + "final_dropout": 0.0, + "hidden_act": "gelu", + "hidden_dropout": 0.1, + "hidden_size": 768, + "id2label": { + "0": "NOT_WORD", + "1": "WORD" + }, + "initializer_range": 0.02, + "intermediate_size": 3072, + "label2id": { + "NOT_WORD": "0", + "WORD": "1" + }, + "layer_norm_eps": 1e-05, + "layerdrop": 0.0, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "model_type": "hubert", + "num_attention_heads": 12, + "num_conv_pos_embedding_groups": 16, + "num_conv_pos_embeddings": 128, + "num_feat_extract_layers": 7, + "num_hidden_layers": 2, + "pad_token_id": 0, + "torch_dtype": "float32", + "transformers_version": "4.38.1", + "use_weighted_layer_sum": false, + "vocab_size": 32 +} diff --git a/run-7/checkpoint-8/model.safetensors b/run-7/checkpoint-8/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e46c73cf8479409fbfbcf64ef883885f1f866b0f --- /dev/null +++ b/run-7/checkpoint-8/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4652d206f002a9487eb5d7d33a33e545f267aa2d99f9a24bd3f5d9a89b3e6d3 +size 94763496 diff --git a/run-7/checkpoint-8/optimizer.pt b/run-7/checkpoint-8/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..d4889ec173788c189d1b8c8c80d54c64fc29d91f --- /dev/null +++ b/run-7/checkpoint-8/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f88841e8d0a9b7743434211f1981a05fa54d1051d03a5179e611bf694aaa5743 +size 189552570 diff --git a/run-7/checkpoint-8/preprocessor_config.json b/run-7/checkpoint-8/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..36ebe8b7c1cc967b3059f0494ae8a1069dd67655 --- /dev/null +++ b/run-7/checkpoint-8/preprocessor_config.json @@ -0,0 +1,9 @@ +{ + "do_normalize": true, + "feature_extractor_type": "Wav2Vec2FeatureExtractor", + "feature_size": 1, + "padding_side": "right", + "padding_value": 0, + "return_attention_mask": true, + "sampling_rate": 16000 +} diff --git a/run-7/checkpoint-8/rng_state.pth b/run-7/checkpoint-8/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..e10d5126e04c60511c8c05e1784185c09e323e2c --- /dev/null +++ b/run-7/checkpoint-8/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e03091e2157388fb8b0f9ca4d26aec69b92a5e0de9b959a167dd80de177794e4 +size 14244 diff --git a/run-7/checkpoint-8/scheduler.pt b/run-7/checkpoint-8/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..757cc1e6edce8b49e1d4f4537b253ad81f129c44 --- /dev/null +++ b/run-7/checkpoint-8/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4cb0138b392487976350b664f227394decf5b1f1f16f5a0a6d190fdd828554db +size 1064 diff --git a/run-7/checkpoint-8/trainer_state.json b/run-7/checkpoint-8/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..95726b956ff3914b74dda11a731c84a9225a1223 --- /dev/null +++ b/run-7/checkpoint-8/trainer_state.json @@ -0,0 +1,40 @@ +{ + "best_metric": 0.7326732673267327, + "best_model_checkpoint": "distilhubert-finetuned-not-a-word2/run-7/checkpoint-8", + "epoch": 1.0, + "eval_steps": 500, + "global_step": 8, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.62, + "grad_norm": 0.9305598735809326, + "learning_rate": 1.0388926727261681e-05, + "loss": 0.6973, + "step": 5 + }, + { + "epoch": 1.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.68402099609375, + "eval_runtime": 1.3601, + "eval_samples_per_second": 47.054, + "eval_steps_per_second": 5.882, + "step": 8 + } + ], + "logging_steps": 5, + "max_steps": 80, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 500, + "total_flos": 763220696221248.0, + "train_batch_size": 24, + "trial_name": null, + "trial_params": { + "learning_rate": 1.662228276361869e-05, + "per_device_train_batch_size": 24 + } +} diff --git a/run-7/checkpoint-8/training_args.bin b/run-7/checkpoint-8/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..f3ed091de26d4ed124925a68c24799ee73779421 --- /dev/null +++ b/run-7/checkpoint-8/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d102bc23c5f21041ad6ac7c59aca37f3d7061b6d3ffbee596206ded94a9f33c +size 4920 diff --git a/run-7/checkpoint-80/config.json b/run-7/checkpoint-80/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e8a488f827577f57ac81420fa1b836ae1d5ceaeb --- /dev/null +++ b/run-7/checkpoint-80/config.json @@ -0,0 +1,80 @@ +{ + "_name_or_path": "ntu-spml/distilhubert", + "activation_dropout": 0.1, + "apply_spec_augment": false, + "architectures": [ + "HubertForSequenceClassification" + ], + "attention_dropout": 0.1, + "bos_token_id": 1, + "classifier_proj_size": 256, + "conv_bias": false, + "conv_dim": [ + 512, + 512, + 512, + 512, + 512, + 512, + 512 + ], + "conv_kernel": [ + 10, + 3, + 3, + 3, + 3, + 2, + 2 + ], + "conv_stride": [ + 5, + 2, + 2, + 2, + 2, + 2, + 2 + ], + "ctc_loss_reduction": "sum", + "ctc_zero_infinity": false, + "do_stable_layer_norm": false, + "eos_token_id": 2, + "feat_extract_activation": "gelu", + "feat_extract_norm": "group", + "feat_proj_dropout": 0.0, + "feat_proj_layer_norm": false, + "final_dropout": 0.0, + "hidden_act": "gelu", + "hidden_dropout": 0.1, + "hidden_size": 768, + "id2label": { + "0": "NOT_WORD", + "1": "WORD" + }, + "initializer_range": 0.02, + "intermediate_size": 3072, + "label2id": { + "NOT_WORD": "0", + "WORD": "1" + }, + "layer_norm_eps": 1e-05, + "layerdrop": 0.0, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "model_type": "hubert", + "num_attention_heads": 12, + "num_conv_pos_embedding_groups": 16, + "num_conv_pos_embeddings": 128, + "num_feat_extract_layers": 7, + "num_hidden_layers": 2, + "pad_token_id": 0, + "torch_dtype": "float32", + "transformers_version": "4.38.1", + "use_weighted_layer_sum": false, + "vocab_size": 32 +} diff --git a/run-7/checkpoint-80/model.safetensors b/run-7/checkpoint-80/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..28d4e4c355e2a0bbcd54f3274134fabe8b82b10a --- /dev/null +++ b/run-7/checkpoint-80/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b47112ae16a8563eb7e2092fb485ee6956d3dfde66a2b42a6a542b55f5251fb9 +size 94763496 diff --git a/run-7/checkpoint-80/optimizer.pt b/run-7/checkpoint-80/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..aa5e356027f0adcad2db691c99abde0938171828 --- /dev/null +++ b/run-7/checkpoint-80/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:768e3d4a2246f88ebfac5d024b9da6f0b1b3fa8cc2eaa72c4c4e26c0105cf21c +size 189552570 diff --git a/run-7/checkpoint-80/preprocessor_config.json b/run-7/checkpoint-80/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..36ebe8b7c1cc967b3059f0494ae8a1069dd67655 --- /dev/null +++ b/run-7/checkpoint-80/preprocessor_config.json @@ -0,0 +1,9 @@ +{ + "do_normalize": true, + "feature_extractor_type": "Wav2Vec2FeatureExtractor", + "feature_size": 1, + "padding_side": "right", + "padding_value": 0, + "return_attention_mask": true, + "sampling_rate": 16000 +} diff --git a/run-7/checkpoint-80/rng_state.pth b/run-7/checkpoint-80/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..b35d63aebf845916c48bc0ed05c7024ddefd8bbb --- /dev/null +++ b/run-7/checkpoint-80/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:06a1cfa28050193686009b273b6588b7a4ba7e5d273238c9a261a36db87a1f14 +size 14244 diff --git a/run-7/checkpoint-80/scheduler.pt b/run-7/checkpoint-80/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..3f0fba78ca37067a212787f562224e3eb187fc86 --- /dev/null +++ b/run-7/checkpoint-80/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:200681f249603746d29726ceea84cba411a5136359de300bb2979b96061ad745 +size 1064 diff --git a/run-7/checkpoint-80/trainer_state.json b/run-7/checkpoint-80/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..f1c34f8e32d9cb4a2703e2e2cde4b50ad393e437 --- /dev/null +++ b/run-7/checkpoint-80/trainer_state.json @@ -0,0 +1,226 @@ +{ + "best_metric": 0.7326732673267327, + "best_model_checkpoint": "distilhubert-finetuned-not-a-word2/run-7/checkpoint-8", + "epoch": 10.0, + "eval_steps": 500, + "global_step": 80, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.62, + "grad_norm": 0.9305598735809326, + "learning_rate": 1.0388926727261681e-05, + "loss": 0.6973, + "step": 5 + }, + { + "epoch": 1.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.68402099609375, + "eval_runtime": 1.3601, + "eval_samples_per_second": 47.054, + "eval_steps_per_second": 5.882, + "step": 8 + }, + { + "epoch": 1.25, + "grad_norm": 1.470983624458313, + "learning_rate": 1.6160552686851503e-05, + "loss": 0.6748, + "step": 10 + }, + { + "epoch": 1.88, + "grad_norm": 0.3902422785758972, + "learning_rate": 1.5006227494933539e-05, + "loss": 0.6423, + "step": 15 + }, + { + "epoch": 2.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.6836051940917969, + "eval_runtime": 1.3646, + "eval_samples_per_second": 46.9, + "eval_steps_per_second": 5.863, + "step": 16 + }, + { + "epoch": 2.5, + "grad_norm": 0.4280204772949219, + "learning_rate": 1.3851902303015575e-05, + "loss": 0.6311, + "step": 20 + }, + { + "epoch": 3.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.6989364624023438, + "eval_runtime": 1.3707, + "eval_samples_per_second": 46.693, + "eval_steps_per_second": 5.837, + "step": 24 + }, + { + "epoch": 3.12, + "grad_norm": 0.3204731047153473, + "learning_rate": 1.269757711109761e-05, + "loss": 0.5902, + "step": 25 + }, + { + "epoch": 3.75, + "grad_norm": 0.26261427998542786, + "learning_rate": 1.1543251919179645e-05, + "loss": 0.5967, + "step": 30 + }, + { + "epoch": 4.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.7159881591796875, + "eval_runtime": 1.3893, + "eval_samples_per_second": 46.065, + "eval_steps_per_second": 5.758, + "step": 32 + }, + { + "epoch": 4.38, + "grad_norm": 0.611359179019928, + "learning_rate": 1.0388926727261681e-05, + "loss": 0.6029, + "step": 35 + }, + { + "epoch": 5.0, + "grad_norm": 0.3782584071159363, + "learning_rate": 9.234601535343717e-06, + "loss": 0.5986, + "step": 40 + }, + { + "epoch": 5.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.7192001342773438, + "eval_runtime": 1.3984, + "eval_samples_per_second": 45.766, + "eval_steps_per_second": 5.721, + "step": 40 + }, + { + "epoch": 5.62, + "grad_norm": 0.8225945234298706, + "learning_rate": 8.080276343425752e-06, + "loss": 0.6027, + "step": 45 + }, + { + "epoch": 6.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.7113037109375, + "eval_runtime": 1.3638, + "eval_samples_per_second": 46.927, + "eval_steps_per_second": 5.866, + "step": 48 + }, + { + "epoch": 6.25, + "grad_norm": 0.4112108051776886, + "learning_rate": 6.9259511515077876e-06, + "loss": 0.5633, + "step": 50 + }, + { + "epoch": 6.88, + "grad_norm": 0.43113499879837036, + "learning_rate": 5.771625959589823e-06, + "loss": 0.6274, + "step": 55 + }, + { + "epoch": 7.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.7047462463378906, + "eval_runtime": 1.3741, + "eval_samples_per_second": 46.577, + "eval_steps_per_second": 5.822, + "step": 56 + }, + { + "epoch": 7.5, + "grad_norm": 0.7747747302055359, + "learning_rate": 4.617300767671859e-06, + "loss": 0.5591, + "step": 60 + }, + { + "epoch": 8.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.70208740234375, + "eval_runtime": 1.3676, + "eval_samples_per_second": 46.799, + "eval_steps_per_second": 5.85, + "step": 64 + }, + { + "epoch": 8.12, + "grad_norm": 0.4591865837574005, + "learning_rate": 3.4629755757538938e-06, + "loss": 0.5763, + "step": 65 + }, + { + "epoch": 8.75, + "grad_norm": 0.6979279518127441, + "learning_rate": 2.3086503838359293e-06, + "loss": 0.5406, + "step": 70 + }, + { + "epoch": 9.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.7020683288574219, + "eval_runtime": 1.37, + "eval_samples_per_second": 46.716, + "eval_steps_per_second": 5.84, + "step": 72 + }, + { + "epoch": 9.38, + "grad_norm": 0.3886575698852539, + "learning_rate": 1.1543251919179647e-06, + "loss": 0.5971, + "step": 75 + }, + { + "epoch": 10.0, + "grad_norm": 0.48529914021492004, + "learning_rate": 0.0, + "loss": 0.6026, + "step": 80 + }, + { + "epoch": 10.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.7013587951660156, + "eval_runtime": 1.3618, + "eval_samples_per_second": 46.998, + "eval_steps_per_second": 5.875, + "step": 80 + } + ], + "logging_steps": 5, + "max_steps": 80, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 500, + "total_flos": 1.0210264286168832e+16, + "train_batch_size": 24, + "trial_name": null, + "trial_params": { + "learning_rate": 1.662228276361869e-05, + "per_device_train_batch_size": 24 + } +} diff --git a/run-7/checkpoint-80/training_args.bin b/run-7/checkpoint-80/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..f3ed091de26d4ed124925a68c24799ee73779421 --- /dev/null +++ b/run-7/checkpoint-80/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d102bc23c5f21041ad6ac7c59aca37f3d7061b6d3ffbee596206ded94a9f33c +size 4920 diff --git a/run-8/checkpoint-24/config.json b/run-8/checkpoint-24/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e8a488f827577f57ac81420fa1b836ae1d5ceaeb --- /dev/null +++ b/run-8/checkpoint-24/config.json @@ -0,0 +1,80 @@ +{ + "_name_or_path": "ntu-spml/distilhubert", + "activation_dropout": 0.1, + "apply_spec_augment": false, + "architectures": [ + "HubertForSequenceClassification" + ], + "attention_dropout": 0.1, + "bos_token_id": 1, + "classifier_proj_size": 256, + "conv_bias": false, + "conv_dim": [ + 512, + 512, + 512, + 512, + 512, + 512, + 512 + ], + "conv_kernel": [ + 10, + 3, + 3, + 3, + 3, + 2, + 2 + ], + "conv_stride": [ + 5, + 2, + 2, + 2, + 2, + 2, + 2 + ], + "ctc_loss_reduction": "sum", + "ctc_zero_infinity": false, + "do_stable_layer_norm": false, + "eos_token_id": 2, + "feat_extract_activation": "gelu", + "feat_extract_norm": "group", + "feat_proj_dropout": 0.0, + "feat_proj_layer_norm": false, + "final_dropout": 0.0, + "hidden_act": "gelu", + "hidden_dropout": 0.1, + "hidden_size": 768, + "id2label": { + "0": "NOT_WORD", + "1": "WORD" + }, + "initializer_range": 0.02, + "intermediate_size": 3072, + "label2id": { + "NOT_WORD": "0", + "WORD": "1" + }, + "layer_norm_eps": 1e-05, + "layerdrop": 0.0, + "mask_feature_length": 10, + "mask_feature_min_masks": 0, + "mask_feature_prob": 0.0, + "mask_time_length": 10, + "mask_time_min_masks": 2, + "mask_time_prob": 0.05, + "model_type": "hubert", + "num_attention_heads": 12, + "num_conv_pos_embedding_groups": 16, + "num_conv_pos_embeddings": 128, + "num_feat_extract_layers": 7, + "num_hidden_layers": 2, + "pad_token_id": 0, + "torch_dtype": "float32", + "transformers_version": "4.38.1", + "use_weighted_layer_sum": false, + "vocab_size": 32 +} diff --git a/run-8/checkpoint-24/model.safetensors b/run-8/checkpoint-24/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4bab8f4b54c1f98c14358a5ceeca50e590739e9e --- /dev/null +++ b/run-8/checkpoint-24/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bcd38943271d452fb41e0cc1c9ac715cd036eaf1fd6859055f6d2b6c6d5d0ccb +size 94763496 diff --git a/run-8/checkpoint-24/optimizer.pt b/run-8/checkpoint-24/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..e958708f839e2c14a48a0ec3328ee518cea07318 --- /dev/null +++ b/run-8/checkpoint-24/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73919292560355372fecc04314b092f40a10fca249a859642f1dde8b1f28390f +size 189552570 diff --git a/run-8/checkpoint-24/preprocessor_config.json b/run-8/checkpoint-24/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..36ebe8b7c1cc967b3059f0494ae8a1069dd67655 --- /dev/null +++ b/run-8/checkpoint-24/preprocessor_config.json @@ -0,0 +1,9 @@ +{ + "do_normalize": true, + "feature_extractor_type": "Wav2Vec2FeatureExtractor", + "feature_size": 1, + "padding_side": "right", + "padding_value": 0, + "return_attention_mask": true, + "sampling_rate": 16000 +} diff --git a/run-8/checkpoint-24/rng_state.pth b/run-8/checkpoint-24/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..0dfccad4abff54de4d7ebc9ca77c59ef24efaf5b --- /dev/null +++ b/run-8/checkpoint-24/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5fd464cfdc34ccc4a78e84a563ff84847a7f2ca68f8d1be703c5be378ce9c86 +size 14244 diff --git a/run-8/checkpoint-24/scheduler.pt b/run-8/checkpoint-24/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..58eb821591c2b98ebfab23fbc228d0a8970247d1 --- /dev/null +++ b/run-8/checkpoint-24/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7dfde920da609cab1c702bf08823a86773b7a4ce3d65517cbe902fe3d312d523 +size 1064 diff --git a/run-8/checkpoint-24/trainer_state.json b/run-8/checkpoint-24/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..82896273dd81b9c1e3a7aa55c2ba0d94f168f248 --- /dev/null +++ b/run-8/checkpoint-24/trainer_state.json @@ -0,0 +1,61 @@ +{ + "best_metric": 0.7326732673267327, + "best_model_checkpoint": "distilhubert-finetuned-not-a-word2/run-8/checkpoint-24", + "epoch": 1.0, + "eval_steps": 500, + "global_step": 24, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.21, + "grad_norm": 1.6496813297271729, + "learning_rate": 2.9441483262927863e-06, + "loss": 0.6992, + "step": 5 + }, + { + "epoch": 0.42, + "grad_norm": 0.620004415512085, + "learning_rate": 5.888296652585573e-06, + "loss": 0.6939, + "step": 10 + }, + { + "epoch": 0.62, + "grad_norm": 0.5726878046989441, + "learning_rate": 8.832444978878358e-06, + "loss": 0.6835, + "step": 15 + }, + { + "epoch": 0.83, + "grad_norm": 1.4239176511764526, + "learning_rate": 1.1776593305171145e-05, + "loss": 0.6673, + "step": 20 + }, + { + "epoch": 1.0, + "eval_f1": 0.7326732673267327, + "eval_loss": 0.6805419921875, + "eval_runtime": 1.3716, + "eval_samples_per_second": 46.661, + "eval_steps_per_second": 5.833, + "step": 24 + } + ], + "logging_steps": 5, + "max_steps": 240, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 500, + "total_flos": 670768569859200.0, + "train_batch_size": 8, + "trial_name": null, + "trial_params": { + "learning_rate": 1.4131911966205373e-05, + "per_device_train_batch_size": 8 + } +} diff --git a/run-8/checkpoint-24/training_args.bin b/run-8/checkpoint-24/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..d5c1c69e2334e136936b2e49c08c4956c68cf869 --- /dev/null +++ b/run-8/checkpoint-24/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd5bd287ce4223bd729031c331a786d123069a13358bbb44dde2321174ac948e +size 4920 diff --git a/runs/Mar03_18-24-09_ca56ea9bc35e/events.out.tfevents.1709492234.ca56ea9bc35e.3883.7 b/runs/Mar03_18-24-09_ca56ea9bc35e/events.out.tfevents.1709492234.ca56ea9bc35e.3883.7 new file mode 100644 index 0000000000000000000000000000000000000000..42f7304b4b3c1e8e6a378b6f71ae7d261d57edb1 --- /dev/null +++ b/runs/Mar03_18-24-09_ca56ea9bc35e/events.out.tfevents.1709492234.ca56ea9bc35e.3883.7 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d83715af9a743ea34cee8cceadf6c4cf05fd9f8efad6c7c33ea0c265e703512 +size 6659 diff --git a/runs/Mar03_18-24-09_ca56ea9bc35e/events.out.tfevents.1709492241.ca56ea9bc35e.3883.8 b/runs/Mar03_18-24-09_ca56ea9bc35e/events.out.tfevents.1709492241.ca56ea9bc35e.3883.8 new file mode 100644 index 0000000000000000000000000000000000000000..82ea1e073755cee5dc5bcd89189a11ec57bd831b --- /dev/null +++ b/runs/Mar03_18-24-09_ca56ea9bc35e/events.out.tfevents.1709492241.ca56ea9bc35e.3883.8 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0872a75ff5a2fab451e79ab906d8264f80d02cccac8a316c3b688b5901311ba +size 12290 diff --git a/runs/Mar03_18-24-09_ca56ea9bc35e/events.out.tfevents.1709492371.ca56ea9bc35e.3883.9 b/runs/Mar03_18-24-09_ca56ea9bc35e/events.out.tfevents.1709492371.ca56ea9bc35e.3883.9 new file mode 100644 index 0000000000000000000000000000000000000000..0961952b9057558b8f33dbe159f5ea786aab733a --- /dev/null +++ b/runs/Mar03_18-24-09_ca56ea9bc35e/events.out.tfevents.1709492371.ca56ea9bc35e.3883.9 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8838e4f07a77ec6758014106ab1ec42e4fb7bd0f7ede99893b304d1774b90fbf +size 18688 diff --git a/training_args.bin b/training_args.bin index 35fa0af60e4dca57930333ada64bcb1b3d5e32fe..d5c1c69e2334e136936b2e49c08c4956c68cf869 100644 --- a/training_args.bin +++ b/training_args.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:064d1923d7ff14a10cb0625f0bc3dd479a67056d28f933cdfbfdecd320e675db +oid sha256:dd5bd287ce4223bd729031c331a786d123069a13358bbb44dde2321174ac948e size 4920