bengali_1B/trainer_push

Browse files

Files changed (13) hide show

.gitattributes +1 -0
README.md +56 -0
config.json +108 -0
dataset-overlaps-with-commonvoice-11-bn.log +157 -0
filtered.csv +0 -0
indexes.csv +0 -0
kaggle.json +1 -0
macro-normalization.log +0 -0
normalized.csv +3 -0
preprocessor_config.json +10 -0
python-packages2.zip +3 -0
pytorch_model.bin +3 -0
training_args.bin +3 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+normalized.csv filter=lfs diff=lfs merge=lfs -text

README.md ADDED Viewed

	@@ -0,0 +1,56 @@

+---
+license: cc-by-nc-4.0
+base_model: Umong/wav2vec2-large-mms-1b-bengali
+tags:
+- generated_from_trainer
+model-index:
+- name: Umong/wav2vec2-large-mms-1b-bengali
+  results: []
+---
+<!-- This model card has been generated automatically according to the information the Trainer had access to. You
+should probably proofread and complete it, then remove this comment. -->
+# Umong/wav2vec2-large-mms-1b-bengali
+This model is a fine-tuned version of [Umong/wav2vec2-large-mms-1b-bengali](https://huggingface.co/Umong/wav2vec2-large-mms-1b-bengali) on an unknown dataset.
+It achieves the following results on the evaluation set:
+- eval_loss: 0.8979
+- eval_runtime: 77.1222
+- eval_samples_per_second: 6.483
+- eval_steps_per_second: 1.621
+- epoch: 0.84
+- step: 9000
+## Model description
+More information needed
+## Intended uses & limitations
+More information needed
+## Training and evaluation data
+More information needed
+## Training procedure
+### Training hyperparameters
+The following hyperparameters were used during training:
+- learning_rate: 2e-06
+- train_batch_size: 2
+- eval_batch_size: 4
+- seed: 42
+- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
+- lr_scheduler_type: cosine
+- lr_scheduler_warmup_steps: 2000
+- num_epochs: 1
+### Framework versions
+- Transformers 4.33.3
+- Pytorch 2.0.1+cu118
+- Datasets 2.14.5
+- Tokenizers 0.13.3

config.json ADDED Viewed

	@@ -0,0 +1,108 @@

+{
+  "_name_or_path": "Umong/wav2vec2-large-mms-1b-bengali",
+  "activation_dropout": 0.05,
+  "adapter_attn_dim": 16,
+  "adapter_kernel_size": 3,
+  "adapter_stride": 2,
+  "add_adapter": false,
+  "apply_spec_augment": true,
+  "architectures": [
+    "Wav2Vec2ForCTC"
+  ],
+  "attention_dropout": 0.1,
+  "bos_token_id": 1,
+  "classifier_proj_size": 256,
+  "codevector_dim": 1024,
+  "contrastive_logits_temperature": 0.1,
+  "conv_bias": true,
+  "conv_dim": [
+    512,
+    512,
+    512,
+    512,
+    512,
+    512,
+    512
+  ],
+  "conv_kernel": [
+    10,
+    3,
+    3,
+    3,
+    3,
+    2,
+    2
+  ],
+  "conv_stride": [
+    5,
+    2,
+    2,
+    2,
+    2,
+    2,
+    2
+  ],
+  "ctc_loss_reduction": "mean",
+  "ctc_zero_infinity": true,
+  "diversity_loss_weight": 100,
+  "do_stable_layer_norm": true,
+  "eos_token_id": 2,
+  "feat_extract_activation": "gelu",
+  "feat_extract_dropout": 0.0,
+  "feat_extract_norm": "layer",
+  "feat_proj_dropout": 0.0,
+  "feat_quantizer_dropout": 0.0,
+  "final_dropout": 0.05,
+  "hidden_act": "gelu",
+  "hidden_dropout": 0.2,
+  "hidden_size": 1280,
+  "initializer_range": 0.02,
+  "intermediate_size": 5120,
+  "layer_norm_eps": 1e-05,
+  "layerdrop": 0.2,
+  "mask_feature_length": 10,
+  "mask_feature_min_masks": 0,
+  "mask_feature_prob": 0.0,
+  "mask_time_length": 10,
+  "mask_time_min_masks": 2,
+  "mask_time_prob": 0.1,
+  "model_type": "wav2vec2",
+  "num_adapter_layers": 3,
+  "num_attention_heads": 16,
+  "num_codevector_groups": 2,
+  "num_codevectors_per_group": 320,
+  "num_conv_pos_embedding_groups": 16,
+  "num_conv_pos_embeddings": 128,
+  "num_feat_extract_layers": 7,
+  "num_hidden_layers": 48,
+  "num_negatives": 100,
+  "output_hidden_size": 1280,
+  "pad_token_id": 63,
+  "proj_codevector_dim": 1024,
+  "tdnn_dilation": [
+    1,
+    2,
+    3,
+    1,
+    1
+  ],
+  "tdnn_dim": [
+    512,
+    512,
+    512,
+    512,
+    1500
+  ],
+  "tdnn_kernel": [
+    5,
+    3,
+    3,
+    1,
+    1
+  ],
+  "torch_dtype": "float32",
+  "transformers_version": "4.33.3",
+  "use_weighted_layer_sum": false,
+  "vocab_size": 66,
+  "xvector_output_dim": 512
+}

dataset-overlaps-with-commonvoice-11-bn.log ADDED Viewed

	@@ -0,0 +1,157 @@

+[{"stream_name":"stderr","time":9.00453959,"data":"[IPKernelApp] WARNING | Error in loading extension: bq_stats\n"}
+,{"stream_name":"stderr","time":9.0046337,"data":"Check your config files in /root/.ipython/profile_default\n"}
+,{"stream_name":"stderr","time":9.00466419,"data":"Traceback (most recent call last):\n"}
+,{"stream_name":"stderr","time":9.00467042,"data":"  File \"/opt/conda/lib/python3.10/site-packages/IPython/core/shellapp.py\", line 282, in init_extensions\n"}
+,{"stream_name":"stderr","time":9.00467969,"data":"    self.shell.extension_manager.load_extension(ext)\n"}
+,{"stream_name":"stderr","time":9.00468438,"data":"  File \"/opt/conda/lib/python3.10/site-packages/IPython/core/extensions.py\", line 76, in load_extension\n"}
+,{"stream_name":"stderr","time":9.00468905,"data":"    return self._load_extension(module_str)\n"}
+,{"stream_name":"stderr","time":9.00469301,"data":"  File \"/opt/conda/lib/python3.10/site-packages/IPython/core/extensions.py\", line 91, in _load_extension\n"}
+,{"stream_name":"stderr","time":9.00469716,"data":"    mod = import_module(module_str)\n"}
+,{"stream_name":"stderr","time":9.00470076,"data":"  File \"/opt/conda/lib/python3.10/importlib/__init__.py\", line 126, in import_module\n"}
+,{"stream_name":"stderr","time":9.00470429,"data":"    return _bootstrap._gcd_import(name[level:], package, level)\n"}
+,{"stream_name":"stderr","time":9.00470784,"data":"  File \"\u003cfrozen importlib._bootstrap\u003e\", line 1050, in _gcd_import\n"}
+,{"stream_name":"stderr","time":9.00471202,"data":"  File \"\u003cfrozen importlib._bootstrap\u003e\", line 1027, in _find_and_load\n"}
+,{"stream_name":"stderr","time":9.00471592,"data":"  File \"\u003cfrozen importlib._bootstrap\u003e\", line 1004, in _find_and_load_unlocked\n"}
+,{"stream_name":"stderr","time":9.0047199,"data":"ModuleNotFoundError: No module named 'bq_stats'\n"}
+,{"stream_name":"stdout","time":12.189450805,"data":"Downloading and preparing dataset common_voice/bn to /root/.cache/huggingface/datasets/mozilla-foundation___common_voice/bn/11.0.0/3f27acf10f303eac5b6fbbbe02495aeddb46ecffdb0a2fe3507fcfbf89094631...\n"}
+,{"stream_name":"stderr","time":282.186192879,"data":"\n"}
+,{"stream_name":"stderr","time":282.292363731,"data":"\rReading metadata...: 0it [00:00, ?it/s]\u001b[A\n"}
+,{"stream_name":"stderr","time":282.386399175,"data":"\rReading metadata...: 11398it [00:00, 113963.81it/s]\u001b[A\rReading metadata...: 16777it [00:00, 107248.43it/s]\n"}
+,{"stream_name":"stderr","time":291.233711451,"data":"\n"}
+,{"stream_name":"stderr","time":291.432171637,"data":"\rReading metadata...: 0it [00:00, ?it/s]\u001b[A\rReading metadata...: 8353it [00:00, 132338.47it/s]\n"}
+,{"stream_name":"stderr","time":294.545279107,"data":"\n"}
+,{"stream_name":"stderr","time":294.740125853,"data":"\rReading metadata...: 0it [00:00, ?it/s]\u001b[A\rReading metadata...: 8353it [00:00, 137305.01it/s]\n"}
+,{"stream_name":"stderr","time":298.394769736,"data":"\n"}
+,{"stream_name":"stderr","time":298.496171909,"data":"\rReading metadata...: 0it [00:00, ?it/s]\u001b[A\n"}
+,{"stream_name":"stderr","time":298.604225211,"data":"\rReading metadata...: 14119it [00:00, 141173.98it/s]\u001b[A\n"}
+,{"stream_name":"stderr","time":298.708220034,"data":"\rReading metadata...: 28237it [00:00, 134681.19it/s]\u001b[A\n"}
+,{"stream_name":"stderr","time":298.808317277,"data":"\rReading metadata...: 41726it [00:00, 132718.44it/s]\u001b[A\n"}
+,{"stream_name":"stderr","time":298.90815864,"data":"\rReading metadata...: 55106it [00:00, 133130.71it/s]\u001b[A\n"}
+,{"stream_name":"stderr","time":299.012041292,"data":"\rReading metadata...: 68773it [00:00, 134382.80it/s]\u001b[A\n"}
+,{"stream_name":"stderr","time":299.117872665,"data":"\rReading metadata...: 82218it [00:00, 132626.22it/s]\u001b[A\n"}
+,{"stream_name":"stderr","time":299.221650208,"data":"\rReading metadata...: 95488it [00:00, 130352.31it/s]\u001b[A\n"}
+,{"stream_name":"stderr","time":299.32609062,"data":"\rReading metadata...: 108533it [00:00, 128713.86it/s]\u001b[A\n"}
+,{"stream_name":"stderr","time":299.430928863,"data":"\rReading metadata...: 121412it [00:00, 126866.61it/s]\u001b[A\n"}
+,{"stream_name":"stderr","time":299.530856126,"data":"\rReading metadata...: 134105it [00:01, 125081.83it/s]\u001b[A\n"}
+,{"stream_name":"stderr","time":299.633776039,"data":"\rReading metadata...: 146905it [00:01, 125945.08it/s]\u001b[A\n"}
+,{"stream_name":"stderr","time":299.734081722,"data":"\rReading metadata...: 159506it [00:01, 124890.05it/s]\u001b[A\n"}
+,{"stream_name":"stderr","time":299.837771394,"data":"\rReading metadata...: 172505it [00:01, 126400.87it/s]\u001b[A\n"}
+,{"stream_name":"stderr","time":299.939560147,"data":"\rReading metadata...: 185152it [00:01, 124997.06it/s]\u001b[A\n"}
+,{"stream_name":"stderr","time":300.0414487,"data":"\rReading metadata...: 197658it [00:01, 124365.12it/s]\u001b[A\n"}
+,{"stream_name":"stderr","time":300.139746573,"data":"\rReading metadata...: 210414it [00:01, 125308.06it/s]\u001b[A\n"}
+,{"stream_name":"stderr","time":300.240470356,"data":"\rReading metadata...: 222996it [00:01, 125457.18it/s]\u001b[A\rReading metadata...: 225826it [00:01, 127471.88it/s]\n"}
+,{"stream_name":"stderr","time":433.997174478,"data":"\n"}
+,{"stream_name":"stderr","time":434.195508939,"data":"\rReading metadata...: 0it [00:00, ?it/s]\u001b[A\rReading metadata...: 6447it [00:00, 80059.80it/s]\n"}
+,{"stream_name":"stdout","time":437.937668012,"data":"Dataset common_voice downloaded and prepared to /root/.cache/huggingface/datasets/mozilla-foundation___common_voice/bn/11.0.0/3f27acf10f303eac5b6fbbbe02495aeddb46ecffdb0a2fe3507fcfbf89094631. Subsequent calls will reuse this data.\n"}
+,{"stream_name":"stdout","time":444.668977881,"data":"Split Name :  train\n"}
+,{"stream_name":"stdout","time":444.669015671,"data":"Total audios in commonvoice train:  16777\n"}
+,{"stream_name":"stdout","time":444.669026061,"data":"Total audios in train :  16041\n"}
+,{"stream_name":"stdout","time":444.669031571,"data":"Total audios in val :  0\n"}
+,{"stream_name":"stdout","time":444.669035791,"data":"--------------------------------------------------------------------------------\n"}
+,{"stream_name":"stdout","time":444.669039811,"data":"Split Name :  test\n"}
+,{"stream_name":"stdout","time":444.669044411,"data":"Total audios in commonvoice test:  8353\n"}
+,{"stream_name":"stdout","time":444.669048451,"data":"Total audios in train :  7531\n"}
+,{"stream_name":"stdout","time":444.669052721,"data":"Total audios in val :  0\n"}
+,{"stream_name":"stdout","time":444.669056741,"data":"--------------------------------------------------------------------------------\n"}
+,{"stream_name":"stdout","time":444.669060981,"data":"Split Name :  validation\n"}
+,{"stream_name":"stdout","time":444.669065171,"data":"Total audios in commonvoice validation:  8353\n"}
+,{"stream_name":"stdout","time":444.669069191,"data":"Total audios in train :  7769\n"}
+,{"stream_name":"stdout","time":444.669073661,"data":"Total audios in val :  0\n"}
+,{"stream_name":"stdout","time":444.669077531,"data":"--------------------------------------------------------------------------------\n"}
+,{"stream_name":"stdout","time":445.519689576,"data":"Split Name :  other\n"}
+,{"stream_name":"stdout","time":445.559222476,"data":"Total audios in commonvoice other:  225826\n"}
+,{"stream_name":"stdout","time":445.559279306,"data":"Total audios in train :  224996\n"}
+,{"stream_name":"stdout","time":445.559313656,"data":"Total audios in val :  0\n"}
+,{"stream_name":"stdout","time":445.559319516,"data":"--------------------------------------------------------------------------------\n"}
+,{"stream_name":"stdout","time":445.559323646,"data":"Split Name :  invalidated\n"}
+,{"stream_name":"stdout","time":445.559327506,"data":"Total audios in commonvoice invalidated:  6447\n"}
+,{"stream_name":"stdout","time":445.559332256,"data":"Total audios in train :  5627\n"}
+,{"stream_name":"stdout","time":445.559335886,"data":"Total audios in val :  0\n"}
+,{"stream_name":"stdout","time":445.559339526,"data":"--------------------------------------------------------------------------------\n"}
+,{"stream_name":"stdout","time":445.559342966,"data":"Total common voice audio :265756\n"}
+,{"stream_name":"stdout","time":445.559346556,"data":" Audios present here : 261964\n"}
+,{"stream_name":"stderr","time":445.560502156,"data":"\r  0%|          | 0/5 [00:00\u003c?, ?it/s]\r 20%|██        | 1/5 [00:00\u003c00:00,  6.84it/s]\r 60%|██████    | 3/5 [00:00\u003c00:00, 10.79it/s]\r100%|██████████| 5/5 [00:01\u003c00:00,  2.58it/s]\r100%|██████████| 5/5 [00:01\u003c00:00,  3.07it/s]\n"}
+,{"stream_name":"stderr","time":446.123441549,"data":"\r  0%|          | 0/5 [00:00\u003c?, ?it/s]\r 80%|████████  | 4/5 [00:00\u003c00:00,  8.48it/s]\r100%|██████████| 5/5 [00:00\u003c00:00, 10.24it/s]\n"}
+,{"stream_name":"stderr","time":447.040023925,"data":"\r0it [00:00, ?it/s]\r106171it [00:00, 1061592.05it/s]\r216964it [00:00, 1088775.74it/s]\r337425it [00:00, 1141613.17it/s]\r452967it [00:00, 1147016.53it/s]\r572951it [00:00, 1166016.06it/s]\r689553it [00:00, 1157771.60it/s]\r810008it [00:00, 1172950.29it/s]\r935257it [00:00, 1198143.09it/s]\r963636it [00:00, 1155136.54it/s]\n"}
+,{"stream_name":"stdout","time":453.23643988,"data":"Sentence :  এরা সবাই দাস হিসেবে একটি জাহাজে করে বিদেশে পাচার হচ্ছিল।\n"}
+,{"stream_name":"stdout","time":453.23650391,"data":"Common Voice audio :\n"}
+,{"stream_name":"stdout","time":453.432182231,"data":"Competition data audio :  ae1be00ad59d.mp3\n"}
+,{"stream_name":"stdout","time":453.783724884,"data":"--------------------------------------------------------------------------------\n"}
+,{"stream_name":"stdout","time":453.783783484,"data":"--------------------------------------------------------------------------------\n"}
+,{"stream_name":"stdout","time":453.783788414,"data":"Sentence :  তিনি জানান এই কাজ সুভাষ দত্ত করবে এবং রহমানকে তার সহকারী হিসেবে যোগ দিতে বলেন।\n"}
+,{"stream_name":"stdout","time":453.783794164,"data":"Common Voice audio :\n"}
+,{"stream_name":"stdout","time":453.815708834,"data":"Competition data audio :  2bfa78215372.mp3\n"}
+,{"stream_name":"stdout","time":454.228451986,"data":"--------------------------------------------------------------------------------\n"}
+,{"stream_name":"stdout","time":454.228507256,"data":"--------------------------------------------------------------------------------\n"}
+,{"stream_name":"stdout","time":454.228511766,"data":"Sentence :  এটি দক্ষিণ মিশরের একটি শহর।\n"}
+,{"stream_name":"stdout","time":454.228515496,"data":"Common Voice audio :\n"}
+,{"stream_name":"stdout","time":454.250528576,"data":"Competition data audio :  98f9873ba235.mp3\n"}
+,{"stream_name":"stdout","time":454.698182469,"data":"--------------------------------------------------------------------------------\n"}
+,{"stream_name":"stdout","time":454.698266699,"data":"--------------------------------------------------------------------------------\n"}
+,{"stream_name":"stdout","time":454.702478109,"data":"Sentence :  বর্তমানে এদের আবাসস্থল হুমকির মুখে।\n"}
+,{"stream_name":"stdout","time":454.702567559,"data":"Common Voice audio :\n"}
+,{"stream_name":"stdout","time":454.90345134,"data":"Competition data audio :  5831fd3d7134.mp3\n"}
+,{"stream_name":"stdout","time":455.111504631,"data":"--------------------------------------------------------------------------------\n"}
+,{"stream_name":"stdout","time":455.111528311,"data":"--------------------------------------------------------------------------------\n"}
+,{"stream_name":"stdout","time":455.111531851,"data":"Sentence :  এটি একটি গুরুত্বপূর্ণ রেল বিভাগের জন্য পরিচিত।\n"}
+,{"stream_name":"stdout","time":455.111535181,"data":"Common Voice audio :\n"}
+,{"stream_name":"stdout","time":455.135717701,"data":"Competition data audio :  ba8e9236358f.mp3\n"}
+,{"stream_name":"stdout","time":455.327957002,"data":"--------------------------------------------------------------------------------\n"}
+,{"stream_name":"stdout","time":455.328001362,"data":"--------------------------------------------------------------------------------\n"}
+,{"stream_name":"stdout","time":457.842483597,"data":"Sentence :  এটি সামগ্রিক পাঠক্রম, কোর্স, পরীক্ষা এবং ফলাফলগুলি নিয়ন্ত্রণ করে এবং অনুমোদন করে।\n"}
+,{"stream_name":"stdout","time":457.842531877,"data":"Common Voice audio :\n"}
+,{"stream_name":"stdout","time":458.040373948,"data":"Multiple audios in the competition dataset with the same sentence \n"}
+,{"stream_name":"stdout","time":458.040398808,"data":"\n"}
+,{"stream_name":"stdout","time":458.040401888,"data":"Competition data audio :  79660f8540b0.mp3\n"}
+,{"stream_name":"stdout","time":458.302004169,"data":"Competition data audio :  88db4447d274.mp3\n"}
+,{"stream_name":"stdout","time":458.532606861,"data":"Competition data audio :  ed34fbd6cf0b.mp3\n"}
+,{"stream_name":"stdout","time":458.770749963,"data":"--------------------------------------------------------------------------------\n"}
+,{"stream_name":"stdout","time":458.770780352,"data":"--------------------------------------------------------------------------------\n"}
+,{"stream_name":"stdout","time":464.378259715,"data":"Sentence :  তাদের একটি ছেলে এবং একটি মেয়ে আছে।\n"}
+,{"stream_name":"stdout","time":464.378324565,"data":"Common Voice audio :\n"}
+,{"stream_name":"stdout","time":464.576431276,"data":"Multiple audios in the competition dataset with the same sentence \n"}
+,{"stream_name":"stdout","time":464.576468386,"data":"\n"}
+,{"stream_name":"stdout","time":464.576498526,"data":"Competition data audio :  30e710e39566.mp3\n"}
+,{"stream_name":"stdout","time":464.811630637,"data":"Competition data audio :  b4c61f0f5afd.mp3\n"}
+,{"stream_name":"stdout","time":465.036936918,"data":"Competition data audio :  e4cc57dcf517.mp3\n"}
+,{"stream_name":"stdout","time":465.30257676,"data":"--------------------------------------------------------------------------------\n"}
+,{"stream_name":"stdout","time":465.30260857,"data":"--------------------------------------------------------------------------------\n"}
+,{"stream_name":"stdout","time":466.057549354,"data":"Sentence :  শীতল জলবায়ুতে কাণ্ডের বেশিরভাগ বৃদ্ধি বসন্ত এবং গ্রীষ্মের শুরুতে ঘটে।\n"}
+,{"stream_name":"stdout","time":466.057645774,"data":"Common Voice audio :\n"}
+,{"stream_name":"stdout","time":466.255401516,"data":"Multiple audios in the competition dataset with the same sentence \n"}
+,{"stream_name":"stdout","time":466.255426845,"data":"\n"}
+,{"stream_name":"stdout","time":466.255431045,"data":"Competition data audio :  113cd642691c.mp3\n"}
+,{"stream_name":"stdout","time":466.499374247,"data":"Competition data audio :  968e266863e8.mp3\n"}
+,{"stream_name":"stdout","time":466.720088108,"data":"Competition data audio :  b5764e70557d.mp3\n"}
+,{"stream_name":"stdout","time":467.01409308,"data":"--------------------------------------------------------------------------------\n"}
+,{"stream_name":"stdout","time":467.01413339,"data":"--------------------------------------------------------------------------------\n"}
+,{"stream_name":"stdout","time":468.061828886,"data":"Sentence :  ইউনিটটি বাংলাদেশে বৈজ্ঞানিক গবেষণা তহবিল গঠন করেছে।\n"}
+,{"stream_name":"stdout","time":468.061865106,"data":"Common Voice audio :\n"}
+,{"stream_name":"stdout","time":468.260425757,"data":"Multiple audios in the competition dataset with the same sentence \n"}
+,{"stream_name":"stdout","time":468.260479287,"data":"\n"}
+,{"stream_name":"stdout","time":468.260483737,"data":"Competition data audio :  30621db2115d.mp3\n"}
+,{"stream_name":"stdout","time":468.491173878,"data":"Competition data audio :  578726cb78a4.mp3\n"}
+,{"stream_name":"stdout","time":468.7056183,"data":"Competition data audio :  ec9f81af5c0a.mp3\n"}
+,{"stream_name":"stdout","time":468.71190234,"data":"--------------------------------------------------------------------------------\n"}
+,{"stream_name":"stdout","time":468.71194852,"data":"--------------------------------------------------------------------------------\n"}
+,{"stream_name":"stdout","time":471.298306515,"data":"Sentence :  বর্তমানে জমিদার বাড়ির ভৌত কাঠামো সংরক্ষণের অভাবে নষ্ট হয়ে যাচ্ছে।\n"}
+,{"stream_name":"stdout","time":471.298344275,"data":"Common Voice audio :\n"}
+,{"stream_name":"stdout","time":471.496049196,"data":"Multiple audios in the competition dataset with the same sentence \n"}
+,{"stream_name":"stdout","time":471.496102506,"data":"\n"}
+,{"stream_name":"stdout","time":471.496109866,"data":"Competition data audio :  17f4979f652e.mp3\n"}
+,{"stream_name":"stdout","time":471.730514617,"data":"Competition data audio :  87f243f631ea.mp3\n"}
+,{"stream_name":"stdout","time":471.959682938,"data":"Competition data audio :  90f45aad66a0.mp3\n"}
+,{"stream_name":"stdout","time":471.963997418,"data":"--------------------------------------------------------------------------------\n"}
+,{"stream_name":"stdout","time":471.964014048,"data":"--------------------------------------------------------------------------------\n"}
+,{"stream_name":"stdout","time":473.207362935,"data":"Audio in Common Voice dataset : \n"}
+,{"stream_name":"stderr","time":479.415976081,"data":"/opt/conda/lib/python3.10/site-packages/traitlets/traitlets.py:2930: FutureWarning: --Exporter.preprocessors=[\"remove_papermill_header.RemovePapermillHeader\"] for containers is deprecated in traitlets 5.0. You can pass `--Exporter.preprocessors item` ... multiple times to add items to a list.\n"}
+,{"stream_name":"stderr","time":479.416022431,"data":"  warn(\n"}
+,{"stream_name":"stderr","time":479.420470711,"data":"[NbConvertApp] WARNING | Config option `kernel_spec_manager_class` not recognized by `NbConvertApp`.\n"}
+,{"stream_name":"stderr","time":479.445263511,"data":"[NbConvertApp] Converting notebook __notebook__.ipynb to notebook\n"}
+,{"stream_name":"stderr","time":480.106029845,"data":"[NbConvertApp] Writing 9378981 bytes to __notebook__.ipynb\n"}
+,{"stream_name":"stderr","time":481.686893084,"data":"/opt/conda/lib/python3.10/site-packages/traitlets/traitlets.py:2930: FutureWarning: --Exporter.preprocessors=[\"nbconvert.preprocessors.ExtractOutputPreprocessor\"] for containers is deprecated in traitlets 5.0. You can pass `--Exporter.preprocessors item` ... multiple times to add items to a list.\n"}
+,{"stream_name":"stderr","time":481.687327044,"data":"  warn(\n"}
+,{"stream_name":"stderr","time":481.690677654,"data":"[NbConvertApp] WARNING | Config option `kernel_spec_manager_class` not recognized by `NbConvertApp`.\n"}
+,{"stream_name":"stderr","time":481.722324954,"data":"[NbConvertApp] Converting notebook __notebook__.ipynb to html\n"}
+,{"stream_name":"stderr","time":483.099897632,"data":"[NbConvertApp] Writing 9589984 bytes to __results__.html\n"}
+]

filtered.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

indexes.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

kaggle.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"username":"nguynminhph","key":"cd06165eeba79f29a4db53f5d87eaf31"}

macro-normalization.log ADDED Viewed

The diff for this file is too large to render. See raw diff

normalized.csv ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:06432a5dd7b5b27d38d9bbaebcd64aa85b79b85d0c4770ba66420bb63457fd24
+size 297143462

preprocessor_config.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+  "do_normalize": true,
+  "feature_extractor_type": "Wav2Vec2FeatureExtractor",
+  "feature_size": 1,
+  "padding_side": "right",
+  "padding_value": 0.0,
+  "processor_class": "Wav2Vec2Processor",
+  "return_attention_mask": true,
+  "sampling_rate": 16000
+}

python-packages2.zip ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c54f691d9a222bb3c61dc4db5574d209628f4e667be964cb8d50c1d7db11ef8f
+size 17653201

pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0f0960f05cdfd4480f2339a52034944ac99249181b2f20278a9b26ee3950c577
+size 3859313933

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:dd6572be7f0af6eabc0c935a13bbdbe6d745016ae2cdfc5b122b6a37750c92ac
+size 3963