pepoo20 commited on
Commit
146fd74
1 Parent(s): 71ed9e1

bengali_1B/trainer_push

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ normalized.csv filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: cc-by-nc-4.0
3
+ base_model: Umong/wav2vec2-large-mms-1b-bengali
4
+ tags:
5
+ - generated_from_trainer
6
+ model-index:
7
+ - name: Umong/wav2vec2-large-mms-1b-bengali
8
+ results: []
9
+ ---
10
+
11
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
12
+ should probably proofread and complete it, then remove this comment. -->
13
+
14
+ # Umong/wav2vec2-large-mms-1b-bengali
15
+
16
+ This model is a fine-tuned version of [Umong/wav2vec2-large-mms-1b-bengali](https://huggingface.co/Umong/wav2vec2-large-mms-1b-bengali) on an unknown dataset.
17
+ It achieves the following results on the evaluation set:
18
+ - eval_loss: 0.8979
19
+ - eval_runtime: 77.1222
20
+ - eval_samples_per_second: 6.483
21
+ - eval_steps_per_second: 1.621
22
+ - epoch: 0.84
23
+ - step: 9000
24
+
25
+ ## Model description
26
+
27
+ More information needed
28
+
29
+ ## Intended uses & limitations
30
+
31
+ More information needed
32
+
33
+ ## Training and evaluation data
34
+
35
+ More information needed
36
+
37
+ ## Training procedure
38
+
39
+ ### Training hyperparameters
40
+
41
+ The following hyperparameters were used during training:
42
+ - learning_rate: 2e-06
43
+ - train_batch_size: 2
44
+ - eval_batch_size: 4
45
+ - seed: 42
46
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
47
+ - lr_scheduler_type: cosine
48
+ - lr_scheduler_warmup_steps: 2000
49
+ - num_epochs: 1
50
+
51
+ ### Framework versions
52
+
53
+ - Transformers 4.33.3
54
+ - Pytorch 2.0.1+cu118
55
+ - Datasets 2.14.5
56
+ - Tokenizers 0.13.3
config.json ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "Umong/wav2vec2-large-mms-1b-bengali",
3
+ "activation_dropout": 0.05,
4
+ "adapter_attn_dim": 16,
5
+ "adapter_kernel_size": 3,
6
+ "adapter_stride": 2,
7
+ "add_adapter": false,
8
+ "apply_spec_augment": true,
9
+ "architectures": [
10
+ "Wav2Vec2ForCTC"
11
+ ],
12
+ "attention_dropout": 0.1,
13
+ "bos_token_id": 1,
14
+ "classifier_proj_size": 256,
15
+ "codevector_dim": 1024,
16
+ "contrastive_logits_temperature": 0.1,
17
+ "conv_bias": true,
18
+ "conv_dim": [
19
+ 512,
20
+ 512,
21
+ 512,
22
+ 512,
23
+ 512,
24
+ 512,
25
+ 512
26
+ ],
27
+ "conv_kernel": [
28
+ 10,
29
+ 3,
30
+ 3,
31
+ 3,
32
+ 3,
33
+ 2,
34
+ 2
35
+ ],
36
+ "conv_stride": [
37
+ 5,
38
+ 2,
39
+ 2,
40
+ 2,
41
+ 2,
42
+ 2,
43
+ 2
44
+ ],
45
+ "ctc_loss_reduction": "mean",
46
+ "ctc_zero_infinity": true,
47
+ "diversity_loss_weight": 100,
48
+ "do_stable_layer_norm": true,
49
+ "eos_token_id": 2,
50
+ "feat_extract_activation": "gelu",
51
+ "feat_extract_dropout": 0.0,
52
+ "feat_extract_norm": "layer",
53
+ "feat_proj_dropout": 0.0,
54
+ "feat_quantizer_dropout": 0.0,
55
+ "final_dropout": 0.05,
56
+ "hidden_act": "gelu",
57
+ "hidden_dropout": 0.2,
58
+ "hidden_size": 1280,
59
+ "initializer_range": 0.02,
60
+ "intermediate_size": 5120,
61
+ "layer_norm_eps": 1e-05,
62
+ "layerdrop": 0.2,
63
+ "mask_feature_length": 10,
64
+ "mask_feature_min_masks": 0,
65
+ "mask_feature_prob": 0.0,
66
+ "mask_time_length": 10,
67
+ "mask_time_min_masks": 2,
68
+ "mask_time_prob": 0.1,
69
+ "model_type": "wav2vec2",
70
+ "num_adapter_layers": 3,
71
+ "num_attention_heads": 16,
72
+ "num_codevector_groups": 2,
73
+ "num_codevectors_per_group": 320,
74
+ "num_conv_pos_embedding_groups": 16,
75
+ "num_conv_pos_embeddings": 128,
76
+ "num_feat_extract_layers": 7,
77
+ "num_hidden_layers": 48,
78
+ "num_negatives": 100,
79
+ "output_hidden_size": 1280,
80
+ "pad_token_id": 63,
81
+ "proj_codevector_dim": 1024,
82
+ "tdnn_dilation": [
83
+ 1,
84
+ 2,
85
+ 3,
86
+ 1,
87
+ 1
88
+ ],
89
+ "tdnn_dim": [
90
+ 512,
91
+ 512,
92
+ 512,
93
+ 512,
94
+ 1500
95
+ ],
96
+ "tdnn_kernel": [
97
+ 5,
98
+ 3,
99
+ 3,
100
+ 1,
101
+ 1
102
+ ],
103
+ "torch_dtype": "float32",
104
+ "transformers_version": "4.33.3",
105
+ "use_weighted_layer_sum": false,
106
+ "vocab_size": 66,
107
+ "xvector_output_dim": 512
108
+ }
dataset-overlaps-with-commonvoice-11-bn.log ADDED
@@ -0,0 +1,157 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [{"stream_name":"stderr","time":9.00453959,"data":"[IPKernelApp] WARNING | Error in loading extension: bq_stats\n"}
2
+ ,{"stream_name":"stderr","time":9.0046337,"data":"Check your config files in /root/.ipython/profile_default\n"}
3
+ ,{"stream_name":"stderr","time":9.00466419,"data":"Traceback (most recent call last):\n"}
4
+ ,{"stream_name":"stderr","time":9.00467042,"data":" File \"/opt/conda/lib/python3.10/site-packages/IPython/core/shellapp.py\", line 282, in init_extensions\n"}
5
+ ,{"stream_name":"stderr","time":9.00467969,"data":" self.shell.extension_manager.load_extension(ext)\n"}
6
+ ,{"stream_name":"stderr","time":9.00468438,"data":" File \"/opt/conda/lib/python3.10/site-packages/IPython/core/extensions.py\", line 76, in load_extension\n"}
7
+ ,{"stream_name":"stderr","time":9.00468905,"data":" return self._load_extension(module_str)\n"}
8
+ ,{"stream_name":"stderr","time":9.00469301,"data":" File \"/opt/conda/lib/python3.10/site-packages/IPython/core/extensions.py\", line 91, in _load_extension\n"}
9
+ ,{"stream_name":"stderr","time":9.00469716,"data":" mod = import_module(module_str)\n"}
10
+ ,{"stream_name":"stderr","time":9.00470076,"data":" File \"/opt/conda/lib/python3.10/importlib/__init__.py\", line 126, in import_module\n"}
11
+ ,{"stream_name":"stderr","time":9.00470429,"data":" return _bootstrap._gcd_import(name[level:], package, level)\n"}
12
+ ,{"stream_name":"stderr","time":9.00470784,"data":" File \"\u003cfrozen importlib._bootstrap\u003e\", line 1050, in _gcd_import\n"}
13
+ ,{"stream_name":"stderr","time":9.00471202,"data":" File \"\u003cfrozen importlib._bootstrap\u003e\", line 1027, in _find_and_load\n"}
14
+ ,{"stream_name":"stderr","time":9.00471592,"data":" File \"\u003cfrozen importlib._bootstrap\u003e\", line 1004, in _find_and_load_unlocked\n"}
15
+ ,{"stream_name":"stderr","time":9.0047199,"data":"ModuleNotFoundError: No module named 'bq_stats'\n"}
16
+ ,{"stream_name":"stdout","time":12.189450805,"data":"Downloading and preparing dataset common_voice/bn to /root/.cache/huggingface/datasets/mozilla-foundation___common_voice/bn/11.0.0/3f27acf10f303eac5b6fbbbe02495aeddb46ecffdb0a2fe3507fcfbf89094631...\n"}
17
+ ,{"stream_name":"stderr","time":282.186192879,"data":"\n"}
18
+ ,{"stream_name":"stderr","time":282.292363731,"data":"\rReading metadata...: 0it [00:00, ?it/s]\u001b[A\n"}
19
+ ,{"stream_name":"stderr","time":282.386399175,"data":"\rReading metadata...: 11398it [00:00, 113963.81it/s]\u001b[A\rReading metadata...: 16777it [00:00, 107248.43it/s]\n"}
20
+ ,{"stream_name":"stderr","time":291.233711451,"data":"\n"}
21
+ ,{"stream_name":"stderr","time":291.432171637,"data":"\rReading metadata...: 0it [00:00, ?it/s]\u001b[A\rReading metadata...: 8353it [00:00, 132338.47it/s]\n"}
22
+ ,{"stream_name":"stderr","time":294.545279107,"data":"\n"}
23
+ ,{"stream_name":"stderr","time":294.740125853,"data":"\rReading metadata...: 0it [00:00, ?it/s]\u001b[A\rReading metadata...: 8353it [00:00, 137305.01it/s]\n"}
24
+ ,{"stream_name":"stderr","time":298.394769736,"data":"\n"}
25
+ ,{"stream_name":"stderr","time":298.496171909,"data":"\rReading metadata...: 0it [00:00, ?it/s]\u001b[A\n"}
26
+ ,{"stream_name":"stderr","time":298.604225211,"data":"\rReading metadata...: 14119it [00:00, 141173.98it/s]\u001b[A\n"}
27
+ ,{"stream_name":"stderr","time":298.708220034,"data":"\rReading metadata...: 28237it [00:00, 134681.19it/s]\u001b[A\n"}
28
+ ,{"stream_name":"stderr","time":298.808317277,"data":"\rReading metadata...: 41726it [00:00, 132718.44it/s]\u001b[A\n"}
29
+ ,{"stream_name":"stderr","time":298.90815864,"data":"\rReading metadata...: 55106it [00:00, 133130.71it/s]\u001b[A\n"}
30
+ ,{"stream_name":"stderr","time":299.012041292,"data":"\rReading metadata...: 68773it [00:00, 134382.80it/s]\u001b[A\n"}
31
+ ,{"stream_name":"stderr","time":299.117872665,"data":"\rReading metadata...: 82218it [00:00, 132626.22it/s]\u001b[A\n"}
32
+ ,{"stream_name":"stderr","time":299.221650208,"data":"\rReading metadata...: 95488it [00:00, 130352.31it/s]\u001b[A\n"}
33
+ ,{"stream_name":"stderr","time":299.32609062,"data":"\rReading metadata...: 108533it [00:00, 128713.86it/s]\u001b[A\n"}
34
+ ,{"stream_name":"stderr","time":299.430928863,"data":"\rReading metadata...: 121412it [00:00, 126866.61it/s]\u001b[A\n"}
35
+ ,{"stream_name":"stderr","time":299.530856126,"data":"\rReading metadata...: 134105it [00:01, 125081.83it/s]\u001b[A\n"}
36
+ ,{"stream_name":"stderr","time":299.633776039,"data":"\rReading metadata...: 146905it [00:01, 125945.08it/s]\u001b[A\n"}
37
+ ,{"stream_name":"stderr","time":299.734081722,"data":"\rReading metadata...: 159506it [00:01, 124890.05it/s]\u001b[A\n"}
38
+ ,{"stream_name":"stderr","time":299.837771394,"data":"\rReading metadata...: 172505it [00:01, 126400.87it/s]\u001b[A\n"}
39
+ ,{"stream_name":"stderr","time":299.939560147,"data":"\rReading metadata...: 185152it [00:01, 124997.06it/s]\u001b[A\n"}
40
+ ,{"stream_name":"stderr","time":300.0414487,"data":"\rReading metadata...: 197658it [00:01, 124365.12it/s]\u001b[A\n"}
41
+ ,{"stream_name":"stderr","time":300.139746573,"data":"\rReading metadata...: 210414it [00:01, 125308.06it/s]\u001b[A\n"}
42
+ ,{"stream_name":"stderr","time":300.240470356,"data":"\rReading metadata...: 222996it [00:01, 125457.18it/s]\u001b[A\rReading metadata...: 225826it [00:01, 127471.88it/s]\n"}
43
+ ,{"stream_name":"stderr","time":433.997174478,"data":"\n"}
44
+ ,{"stream_name":"stderr","time":434.195508939,"data":"\rReading metadata...: 0it [00:00, ?it/s]\u001b[A\rReading metadata...: 6447it [00:00, 80059.80it/s]\n"}
45
+ ,{"stream_name":"stdout","time":437.937668012,"data":"Dataset common_voice downloaded and prepared to /root/.cache/huggingface/datasets/mozilla-foundation___common_voice/bn/11.0.0/3f27acf10f303eac5b6fbbbe02495aeddb46ecffdb0a2fe3507fcfbf89094631. Subsequent calls will reuse this data.\n"}
46
+ ,{"stream_name":"stdout","time":444.668977881,"data":"Split Name : train\n"}
47
+ ,{"stream_name":"stdout","time":444.669015671,"data":"Total audios in commonvoice train: 16777\n"}
48
+ ,{"stream_name":"stdout","time":444.669026061,"data":"Total audios in train : 16041\n"}
49
+ ,{"stream_name":"stdout","time":444.669031571,"data":"Total audios in val : 0\n"}
50
+ ,{"stream_name":"stdout","time":444.669035791,"data":"--------------------------------------------------------------------------------\n"}
51
+ ,{"stream_name":"stdout","time":444.669039811,"data":"Split Name : test\n"}
52
+ ,{"stream_name":"stdout","time":444.669044411,"data":"Total audios in commonvoice test: 8353\n"}
53
+ ,{"stream_name":"stdout","time":444.669048451,"data":"Total audios in train : 7531\n"}
54
+ ,{"stream_name":"stdout","time":444.669052721,"data":"Total audios in val : 0\n"}
55
+ ,{"stream_name":"stdout","time":444.669056741,"data":"--------------------------------------------------------------------------------\n"}
56
+ ,{"stream_name":"stdout","time":444.669060981,"data":"Split Name : validation\n"}
57
+ ,{"stream_name":"stdout","time":444.669065171,"data":"Total audios in commonvoice validation: 8353\n"}
58
+ ,{"stream_name":"stdout","time":444.669069191,"data":"Total audios in train : 7769\n"}
59
+ ,{"stream_name":"stdout","time":444.669073661,"data":"Total audios in val : 0\n"}
60
+ ,{"stream_name":"stdout","time":444.669077531,"data":"--------------------------------------------------------------------------------\n"}
61
+ ,{"stream_name":"stdout","time":445.519689576,"data":"Split Name : other\n"}
62
+ ,{"stream_name":"stdout","time":445.559222476,"data":"Total audios in commonvoice other: 225826\n"}
63
+ ,{"stream_name":"stdout","time":445.559279306,"data":"Total audios in train : 224996\n"}
64
+ ,{"stream_name":"stdout","time":445.559313656,"data":"Total audios in val : 0\n"}
65
+ ,{"stream_name":"stdout","time":445.559319516,"data":"--------------------------------------------------------------------------------\n"}
66
+ ,{"stream_name":"stdout","time":445.559323646,"data":"Split Name : invalidated\n"}
67
+ ,{"stream_name":"stdout","time":445.559327506,"data":"Total audios in commonvoice invalidated: 6447\n"}
68
+ ,{"stream_name":"stdout","time":445.559332256,"data":"Total audios in train : 5627\n"}
69
+ ,{"stream_name":"stdout","time":445.559335886,"data":"Total audios in val : 0\n"}
70
+ ,{"stream_name":"stdout","time":445.559339526,"data":"--------------------------------------------------------------------------------\n"}
71
+ ,{"stream_name":"stdout","time":445.559342966,"data":"Total common voice audio :265756\n"}
72
+ ,{"stream_name":"stdout","time":445.559346556,"data":" Audios present here : 261964\n"}
73
+ ,{"stream_name":"stderr","time":445.560502156,"data":"\r 0%| | 0/5 [00:00\u003c?, ?it/s]\r 20%|██ | 1/5 [00:00\u003c00:00, 6.84it/s]\r 60%|██████ | 3/5 [00:00\u003c00:00, 10.79it/s]\r100%|██████████| 5/5 [00:01\u003c00:00, 2.58it/s]\r100%|██████████| 5/5 [00:01\u003c00:00, 3.07it/s]\n"}
74
+ ,{"stream_name":"stderr","time":446.123441549,"data":"\r 0%| | 0/5 [00:00\u003c?, ?it/s]\r 80%|████████ | 4/5 [00:00\u003c00:00, 8.48it/s]\r100%|██████████| 5/5 [00:00\u003c00:00, 10.24it/s]\n"}
75
+ ,{"stream_name":"stderr","time":447.040023925,"data":"\r0it [00:00, ?it/s]\r106171it [00:00, 1061592.05it/s]\r216964it [00:00, 1088775.74it/s]\r337425it [00:00, 1141613.17it/s]\r452967it [00:00, 1147016.53it/s]\r572951it [00:00, 1166016.06it/s]\r689553it [00:00, 1157771.60it/s]\r810008it [00:00, 1172950.29it/s]\r935257it [00:00, 1198143.09it/s]\r963636it [00:00, 1155136.54it/s]\n"}
76
+ ,{"stream_name":"stdout","time":453.23643988,"data":"Sentence : এরা সবাই দাস হিসেবে একটি জাহাজে করে বিদেশে পাচার হচ্ছিল।\n"}
77
+ ,{"stream_name":"stdout","time":453.23650391,"data":"Common Voice audio :\n"}
78
+ ,{"stream_name":"stdout","time":453.432182231,"data":"Competition data audio : ae1be00ad59d.mp3\n"}
79
+ ,{"stream_name":"stdout","time":453.783724884,"data":"--------------------------------------------------------------------------------\n"}
80
+ ,{"stream_name":"stdout","time":453.783783484,"data":"--------------------------------------------------------------------------------\n"}
81
+ ,{"stream_name":"stdout","time":453.783788414,"data":"Sentence : তিনি জানান এই কাজ সুভাষ দত্ত করবে এবং রহমানকে তার সহকারী হিসেবে যোগ দিতে বলেন।\n"}
82
+ ,{"stream_name":"stdout","time":453.783794164,"data":"Common Voice audio :\n"}
83
+ ,{"stream_name":"stdout","time":453.815708834,"data":"Competition data audio : 2bfa78215372.mp3\n"}
84
+ ,{"stream_name":"stdout","time":454.228451986,"data":"--------------------------------------------------------------------------------\n"}
85
+ ,{"stream_name":"stdout","time":454.228507256,"data":"--------------------------------------------------------------------------------\n"}
86
+ ,{"stream_name":"stdout","time":454.228511766,"data":"Sentence : এটি দক্ষিণ মিশরের একটি শহর।\n"}
87
+ ,{"stream_name":"stdout","time":454.228515496,"data":"Common Voice audio :\n"}
88
+ ,{"stream_name":"stdout","time":454.250528576,"data":"Competition data audio : 98f9873ba235.mp3\n"}
89
+ ,{"stream_name":"stdout","time":454.698182469,"data":"--------------------------------------------------------------------------------\n"}
90
+ ,{"stream_name":"stdout","time":454.698266699,"data":"--------------------------------------------------------------------------------\n"}
91
+ ,{"stream_name":"stdout","time":454.702478109,"data":"Sentence : বর্তমানে এদের আবাসস্থল হুমকির মুখে।\n"}
92
+ ,{"stream_name":"stdout","time":454.702567559,"data":"Common Voice audio :\n"}
93
+ ,{"stream_name":"stdout","time":454.90345134,"data":"Competition data audio : 5831fd3d7134.mp3\n"}
94
+ ,{"stream_name":"stdout","time":455.111504631,"data":"--------------------------------------------------------------------------------\n"}
95
+ ,{"stream_name":"stdout","time":455.111528311,"data":"--------------------------------------------------------------------------------\n"}
96
+ ,{"stream_name":"stdout","time":455.111531851,"data":"Sentence : এটি একটি গুরুত্বপূর্ণ রেল বিভাগের জন্য পরিচিত।\n"}
97
+ ,{"stream_name":"stdout","time":455.111535181,"data":"Common Voice audio :\n"}
98
+ ,{"stream_name":"stdout","time":455.135717701,"data":"Competition data audio : ba8e9236358f.mp3\n"}
99
+ ,{"stream_name":"stdout","time":455.327957002,"data":"--------------------------------------------------------------------------------\n"}
100
+ ,{"stream_name":"stdout","time":455.328001362,"data":"--------------------------------------------------------------------------------\n"}
101
+ ,{"stream_name":"stdout","time":457.842483597,"data":"Sentence : এটি সামগ্রিক পাঠক্রম, কোর্স, পরীক্ষা এবং ফলাফলগুলি নিয়ন্ত্রণ করে এবং অনুমোদন করে।\n"}
102
+ ,{"stream_name":"stdout","time":457.842531877,"data":"Common Voice audio :\n"}
103
+ ,{"stream_name":"stdout","time":458.040373948,"data":"Multiple audios in the competition dataset with the same sentence \n"}
104
+ ,{"stream_name":"stdout","time":458.040398808,"data":"\n"}
105
+ ,{"stream_name":"stdout","time":458.040401888,"data":"Competition data audio : 79660f8540b0.mp3\n"}
106
+ ,{"stream_name":"stdout","time":458.302004169,"data":"Competition data audio : 88db4447d274.mp3\n"}
107
+ ,{"stream_name":"stdout","time":458.532606861,"data":"Competition data audio : ed34fbd6cf0b.mp3\n"}
108
+ ,{"stream_name":"stdout","time":458.770749963,"data":"--------------------------------------------------------------------------------\n"}
109
+ ,{"stream_name":"stdout","time":458.770780352,"data":"--------------------------------------------------------------------------------\n"}
110
+ ,{"stream_name":"stdout","time":464.378259715,"data":"Sentence : তাদের একটি ছেলে এবং একটি মেয়ে আছে।\n"}
111
+ ,{"stream_name":"stdout","time":464.378324565,"data":"Common Voice audio :\n"}
112
+ ,{"stream_name":"stdout","time":464.576431276,"data":"Multiple audios in the competition dataset with the same sentence \n"}
113
+ ,{"stream_name":"stdout","time":464.576468386,"data":"\n"}
114
+ ,{"stream_name":"stdout","time":464.576498526,"data":"Competition data audio : 30e710e39566.mp3\n"}
115
+ ,{"stream_name":"stdout","time":464.811630637,"data":"Competition data audio : b4c61f0f5afd.mp3\n"}
116
+ ,{"stream_name":"stdout","time":465.036936918,"data":"Competition data audio : e4cc57dcf517.mp3\n"}
117
+ ,{"stream_name":"stdout","time":465.30257676,"data":"--------------------------------------------------------------------------------\n"}
118
+ ,{"stream_name":"stdout","time":465.30260857,"data":"--------------------------------------------------------------------------------\n"}
119
+ ,{"stream_name":"stdout","time":466.057549354,"data":"Sentence : শীতল জলবায়ুতে কাণ্ডের বেশিরভাগ বৃদ্ধি বসন্ত এবং গ্রীষ্মের শুরুতে ঘটে।\n"}
120
+ ,{"stream_name":"stdout","time":466.057645774,"data":"Common Voice audio :\n"}
121
+ ,{"stream_name":"stdout","time":466.255401516,"data":"Multiple audios in the competition dataset with the same sentence \n"}
122
+ ,{"stream_name":"stdout","time":466.255426845,"data":"\n"}
123
+ ,{"stream_name":"stdout","time":466.255431045,"data":"Competition data audio : 113cd642691c.mp3\n"}
124
+ ,{"stream_name":"stdout","time":466.499374247,"data":"Competition data audio : 968e266863e8.mp3\n"}
125
+ ,{"stream_name":"stdout","time":466.720088108,"data":"Competition data audio : b5764e70557d.mp3\n"}
126
+ ,{"stream_name":"stdout","time":467.01409308,"data":"--------------------------------------------------------------------------------\n"}
127
+ ,{"stream_name":"stdout","time":467.01413339,"data":"--------------------------------------------------------------------------------\n"}
128
+ ,{"stream_name":"stdout","time":468.061828886,"data":"Sentence : ইউনিটটি বাংলাদেশে বৈজ্ঞানিক গবেষণা তহবিল গঠন করেছে।\n"}
129
+ ,{"stream_name":"stdout","time":468.061865106,"data":"Common Voice audio :\n"}
130
+ ,{"stream_name":"stdout","time":468.260425757,"data":"Multiple audios in the competition dataset with the same sentence \n"}
131
+ ,{"stream_name":"stdout","time":468.260479287,"data":"\n"}
132
+ ,{"stream_name":"stdout","time":468.260483737,"data":"Competition data audio : 30621db2115d.mp3\n"}
133
+ ,{"stream_name":"stdout","time":468.491173878,"data":"Competition data audio : 578726cb78a4.mp3\n"}
134
+ ,{"stream_name":"stdout","time":468.7056183,"data":"Competition data audio : ec9f81af5c0a.mp3\n"}
135
+ ,{"stream_name":"stdout","time":468.71190234,"data":"--------------------------------------------------------------------------------\n"}
136
+ ,{"stream_name":"stdout","time":468.71194852,"data":"--------------------------------------------------------------------------------\n"}
137
+ ,{"stream_name":"stdout","time":471.298306515,"data":"Sentence : বর্তমানে জমিদার বাড়ির ভৌত কাঠামো সংরক্ষণের অভাবে নষ্ট হয়ে যাচ্ছে।\n"}
138
+ ,{"stream_name":"stdout","time":471.298344275,"data":"Common Voice audio :\n"}
139
+ ,{"stream_name":"stdout","time":471.496049196,"data":"Multiple audios in the competition dataset with the same sentence \n"}
140
+ ,{"stream_name":"stdout","time":471.496102506,"data":"\n"}
141
+ ,{"stream_name":"stdout","time":471.496109866,"data":"Competition data audio : 17f4979f652e.mp3\n"}
142
+ ,{"stream_name":"stdout","time":471.730514617,"data":"Competition data audio : 87f243f631ea.mp3\n"}
143
+ ,{"stream_name":"stdout","time":471.959682938,"data":"Competition data audio : 90f45aad66a0.mp3\n"}
144
+ ,{"stream_name":"stdout","time":471.963997418,"data":"--------------------------------------------------------------------------------\n"}
145
+ ,{"stream_name":"stdout","time":471.964014048,"data":"--------------------------------------------------------------------------------\n"}
146
+ ,{"stream_name":"stdout","time":473.207362935,"data":"Audio in Common Voice dataset : \n"}
147
+ ,{"stream_name":"stderr","time":479.415976081,"data":"/opt/conda/lib/python3.10/site-packages/traitlets/traitlets.py:2930: FutureWarning: --Exporter.preprocessors=[\"remove_papermill_header.RemovePapermillHeader\"] for containers is deprecated in traitlets 5.0. You can pass `--Exporter.preprocessors item` ... multiple times to add items to a list.\n"}
148
+ ,{"stream_name":"stderr","time":479.416022431,"data":" warn(\n"}
149
+ ,{"stream_name":"stderr","time":479.420470711,"data":"[NbConvertApp] WARNING | Config option `kernel_spec_manager_class` not recognized by `NbConvertApp`.\n"}
150
+ ,{"stream_name":"stderr","time":479.445263511,"data":"[NbConvertApp] Converting notebook __notebook__.ipynb to notebook\n"}
151
+ ,{"stream_name":"stderr","time":480.106029845,"data":"[NbConvertApp] Writing 9378981 bytes to __notebook__.ipynb\n"}
152
+ ,{"stream_name":"stderr","time":481.686893084,"data":"/opt/conda/lib/python3.10/site-packages/traitlets/traitlets.py:2930: FutureWarning: --Exporter.preprocessors=[\"nbconvert.preprocessors.ExtractOutputPreprocessor\"] for containers is deprecated in traitlets 5.0. You can pass `--Exporter.preprocessors item` ... multiple times to add items to a list.\n"}
153
+ ,{"stream_name":"stderr","time":481.687327044,"data":" warn(\n"}
154
+ ,{"stream_name":"stderr","time":481.690677654,"data":"[NbConvertApp] WARNING | Config option `kernel_spec_manager_class` not recognized by `NbConvertApp`.\n"}
155
+ ,{"stream_name":"stderr","time":481.722324954,"data":"[NbConvertApp] Converting notebook __notebook__.ipynb to html\n"}
156
+ ,{"stream_name":"stderr","time":483.099897632,"data":"[NbConvertApp] Writing 9589984 bytes to __results__.html\n"}
157
+ ]
filtered.csv ADDED
The diff for this file is too large to render. See raw diff
 
indexes.csv ADDED
The diff for this file is too large to render. See raw diff
 
kaggle.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"username":"nguynminhph","key":"cd06165eeba79f29a4db53f5d87eaf31"}
macro-normalization.log ADDED
The diff for this file is too large to render. See raw diff
 
normalized.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:06432a5dd7b5b27d38d9bbaebcd64aa85b79b85d0c4770ba66420bb63457fd24
3
+ size 297143462
preprocessor_config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_normalize": true,
3
+ "feature_extractor_type": "Wav2Vec2FeatureExtractor",
4
+ "feature_size": 1,
5
+ "padding_side": "right",
6
+ "padding_value": 0.0,
7
+ "processor_class": "Wav2Vec2Processor",
8
+ "return_attention_mask": true,
9
+ "sampling_rate": 16000
10
+ }
python-packages2.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c54f691d9a222bb3c61dc4db5574d209628f4e667be964cb8d50c1d7db11ef8f
3
+ size 17653201
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0f0960f05cdfd4480f2339a52034944ac99249181b2f20278a9b26ee3950c577
3
+ size 3859313933
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dd6572be7f0af6eabc0c935a13bbdbe6d745016ae2cdfc5b122b6a37750c92ac
3
+ size 3963