bengali_1B/trainer_push
Browse files- .gitattributes +1 -0
- README.md +56 -0
- config.json +108 -0
- dataset-overlaps-with-commonvoice-11-bn.log +157 -0
- filtered.csv +0 -0
- indexes.csv +0 -0
- kaggle.json +1 -0
- macro-normalization.log +0 -0
- normalized.csv +3 -0
- preprocessor_config.json +10 -0
- python-packages2.zip +3 -0
- pytorch_model.bin +3 -0
- training_args.bin +3 -0
.gitattributes
CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
normalized.csv filter=lfs diff=lfs merge=lfs -text
|
README.md
ADDED
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
license: cc-by-nc-4.0
|
3 |
+
base_model: Umong/wav2vec2-large-mms-1b-bengali
|
4 |
+
tags:
|
5 |
+
- generated_from_trainer
|
6 |
+
model-index:
|
7 |
+
- name: Umong/wav2vec2-large-mms-1b-bengali
|
8 |
+
results: []
|
9 |
+
---
|
10 |
+
|
11 |
+
<!-- This model card has been generated automatically according to the information the Trainer had access to. You
|
12 |
+
should probably proofread and complete it, then remove this comment. -->
|
13 |
+
|
14 |
+
# Umong/wav2vec2-large-mms-1b-bengali
|
15 |
+
|
16 |
+
This model is a fine-tuned version of [Umong/wav2vec2-large-mms-1b-bengali](https://huggingface.co/Umong/wav2vec2-large-mms-1b-bengali) on an unknown dataset.
|
17 |
+
It achieves the following results on the evaluation set:
|
18 |
+
- eval_loss: 0.8979
|
19 |
+
- eval_runtime: 77.1222
|
20 |
+
- eval_samples_per_second: 6.483
|
21 |
+
- eval_steps_per_second: 1.621
|
22 |
+
- epoch: 0.84
|
23 |
+
- step: 9000
|
24 |
+
|
25 |
+
## Model description
|
26 |
+
|
27 |
+
More information needed
|
28 |
+
|
29 |
+
## Intended uses & limitations
|
30 |
+
|
31 |
+
More information needed
|
32 |
+
|
33 |
+
## Training and evaluation data
|
34 |
+
|
35 |
+
More information needed
|
36 |
+
|
37 |
+
## Training procedure
|
38 |
+
|
39 |
+
### Training hyperparameters
|
40 |
+
|
41 |
+
The following hyperparameters were used during training:
|
42 |
+
- learning_rate: 2e-06
|
43 |
+
- train_batch_size: 2
|
44 |
+
- eval_batch_size: 4
|
45 |
+
- seed: 42
|
46 |
+
- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
|
47 |
+
- lr_scheduler_type: cosine
|
48 |
+
- lr_scheduler_warmup_steps: 2000
|
49 |
+
- num_epochs: 1
|
50 |
+
|
51 |
+
### Framework versions
|
52 |
+
|
53 |
+
- Transformers 4.33.3
|
54 |
+
- Pytorch 2.0.1+cu118
|
55 |
+
- Datasets 2.14.5
|
56 |
+
- Tokenizers 0.13.3
|
config.json
ADDED
@@ -0,0 +1,108 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "Umong/wav2vec2-large-mms-1b-bengali",
|
3 |
+
"activation_dropout": 0.05,
|
4 |
+
"adapter_attn_dim": 16,
|
5 |
+
"adapter_kernel_size": 3,
|
6 |
+
"adapter_stride": 2,
|
7 |
+
"add_adapter": false,
|
8 |
+
"apply_spec_augment": true,
|
9 |
+
"architectures": [
|
10 |
+
"Wav2Vec2ForCTC"
|
11 |
+
],
|
12 |
+
"attention_dropout": 0.1,
|
13 |
+
"bos_token_id": 1,
|
14 |
+
"classifier_proj_size": 256,
|
15 |
+
"codevector_dim": 1024,
|
16 |
+
"contrastive_logits_temperature": 0.1,
|
17 |
+
"conv_bias": true,
|
18 |
+
"conv_dim": [
|
19 |
+
512,
|
20 |
+
512,
|
21 |
+
512,
|
22 |
+
512,
|
23 |
+
512,
|
24 |
+
512,
|
25 |
+
512
|
26 |
+
],
|
27 |
+
"conv_kernel": [
|
28 |
+
10,
|
29 |
+
3,
|
30 |
+
3,
|
31 |
+
3,
|
32 |
+
3,
|
33 |
+
2,
|
34 |
+
2
|
35 |
+
],
|
36 |
+
"conv_stride": [
|
37 |
+
5,
|
38 |
+
2,
|
39 |
+
2,
|
40 |
+
2,
|
41 |
+
2,
|
42 |
+
2,
|
43 |
+
2
|
44 |
+
],
|
45 |
+
"ctc_loss_reduction": "mean",
|
46 |
+
"ctc_zero_infinity": true,
|
47 |
+
"diversity_loss_weight": 100,
|
48 |
+
"do_stable_layer_norm": true,
|
49 |
+
"eos_token_id": 2,
|
50 |
+
"feat_extract_activation": "gelu",
|
51 |
+
"feat_extract_dropout": 0.0,
|
52 |
+
"feat_extract_norm": "layer",
|
53 |
+
"feat_proj_dropout": 0.0,
|
54 |
+
"feat_quantizer_dropout": 0.0,
|
55 |
+
"final_dropout": 0.05,
|
56 |
+
"hidden_act": "gelu",
|
57 |
+
"hidden_dropout": 0.2,
|
58 |
+
"hidden_size": 1280,
|
59 |
+
"initializer_range": 0.02,
|
60 |
+
"intermediate_size": 5120,
|
61 |
+
"layer_norm_eps": 1e-05,
|
62 |
+
"layerdrop": 0.2,
|
63 |
+
"mask_feature_length": 10,
|
64 |
+
"mask_feature_min_masks": 0,
|
65 |
+
"mask_feature_prob": 0.0,
|
66 |
+
"mask_time_length": 10,
|
67 |
+
"mask_time_min_masks": 2,
|
68 |
+
"mask_time_prob": 0.1,
|
69 |
+
"model_type": "wav2vec2",
|
70 |
+
"num_adapter_layers": 3,
|
71 |
+
"num_attention_heads": 16,
|
72 |
+
"num_codevector_groups": 2,
|
73 |
+
"num_codevectors_per_group": 320,
|
74 |
+
"num_conv_pos_embedding_groups": 16,
|
75 |
+
"num_conv_pos_embeddings": 128,
|
76 |
+
"num_feat_extract_layers": 7,
|
77 |
+
"num_hidden_layers": 48,
|
78 |
+
"num_negatives": 100,
|
79 |
+
"output_hidden_size": 1280,
|
80 |
+
"pad_token_id": 63,
|
81 |
+
"proj_codevector_dim": 1024,
|
82 |
+
"tdnn_dilation": [
|
83 |
+
1,
|
84 |
+
2,
|
85 |
+
3,
|
86 |
+
1,
|
87 |
+
1
|
88 |
+
],
|
89 |
+
"tdnn_dim": [
|
90 |
+
512,
|
91 |
+
512,
|
92 |
+
512,
|
93 |
+
512,
|
94 |
+
1500
|
95 |
+
],
|
96 |
+
"tdnn_kernel": [
|
97 |
+
5,
|
98 |
+
3,
|
99 |
+
3,
|
100 |
+
1,
|
101 |
+
1
|
102 |
+
],
|
103 |
+
"torch_dtype": "float32",
|
104 |
+
"transformers_version": "4.33.3",
|
105 |
+
"use_weighted_layer_sum": false,
|
106 |
+
"vocab_size": 66,
|
107 |
+
"xvector_output_dim": 512
|
108 |
+
}
|
dataset-overlaps-with-commonvoice-11-bn.log
ADDED
@@ -0,0 +1,157 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[{"stream_name":"stderr","time":9.00453959,"data":"[IPKernelApp] WARNING | Error in loading extension: bq_stats\n"}
|
2 |
+
,{"stream_name":"stderr","time":9.0046337,"data":"Check your config files in /root/.ipython/profile_default\n"}
|
3 |
+
,{"stream_name":"stderr","time":9.00466419,"data":"Traceback (most recent call last):\n"}
|
4 |
+
,{"stream_name":"stderr","time":9.00467042,"data":" File \"/opt/conda/lib/python3.10/site-packages/IPython/core/shellapp.py\", line 282, in init_extensions\n"}
|
5 |
+
,{"stream_name":"stderr","time":9.00467969,"data":" self.shell.extension_manager.load_extension(ext)\n"}
|
6 |
+
,{"stream_name":"stderr","time":9.00468438,"data":" File \"/opt/conda/lib/python3.10/site-packages/IPython/core/extensions.py\", line 76, in load_extension\n"}
|
7 |
+
,{"stream_name":"stderr","time":9.00468905,"data":" return self._load_extension(module_str)\n"}
|
8 |
+
,{"stream_name":"stderr","time":9.00469301,"data":" File \"/opt/conda/lib/python3.10/site-packages/IPython/core/extensions.py\", line 91, in _load_extension\n"}
|
9 |
+
,{"stream_name":"stderr","time":9.00469716,"data":" mod = import_module(module_str)\n"}
|
10 |
+
,{"stream_name":"stderr","time":9.00470076,"data":" File \"/opt/conda/lib/python3.10/importlib/__init__.py\", line 126, in import_module\n"}
|
11 |
+
,{"stream_name":"stderr","time":9.00470429,"data":" return _bootstrap._gcd_import(name[level:], package, level)\n"}
|
12 |
+
,{"stream_name":"stderr","time":9.00470784,"data":" File \"\u003cfrozen importlib._bootstrap\u003e\", line 1050, in _gcd_import\n"}
|
13 |
+
,{"stream_name":"stderr","time":9.00471202,"data":" File \"\u003cfrozen importlib._bootstrap\u003e\", line 1027, in _find_and_load\n"}
|
14 |
+
,{"stream_name":"stderr","time":9.00471592,"data":" File \"\u003cfrozen importlib._bootstrap\u003e\", line 1004, in _find_and_load_unlocked\n"}
|
15 |
+
,{"stream_name":"stderr","time":9.0047199,"data":"ModuleNotFoundError: No module named 'bq_stats'\n"}
|
16 |
+
,{"stream_name":"stdout","time":12.189450805,"data":"Downloading and preparing dataset common_voice/bn to /root/.cache/huggingface/datasets/mozilla-foundation___common_voice/bn/11.0.0/3f27acf10f303eac5b6fbbbe02495aeddb46ecffdb0a2fe3507fcfbf89094631...\n"}
|
17 |
+
,{"stream_name":"stderr","time":282.186192879,"data":"\n"}
|
18 |
+
,{"stream_name":"stderr","time":282.292363731,"data":"\rReading metadata...: 0it [00:00, ?it/s]\u001b[A\n"}
|
19 |
+
,{"stream_name":"stderr","time":282.386399175,"data":"\rReading metadata...: 11398it [00:00, 113963.81it/s]\u001b[A\rReading metadata...: 16777it [00:00, 107248.43it/s]\n"}
|
20 |
+
,{"stream_name":"stderr","time":291.233711451,"data":"\n"}
|
21 |
+
,{"stream_name":"stderr","time":291.432171637,"data":"\rReading metadata...: 0it [00:00, ?it/s]\u001b[A\rReading metadata...: 8353it [00:00, 132338.47it/s]\n"}
|
22 |
+
,{"stream_name":"stderr","time":294.545279107,"data":"\n"}
|
23 |
+
,{"stream_name":"stderr","time":294.740125853,"data":"\rReading metadata...: 0it [00:00, ?it/s]\u001b[A\rReading metadata...: 8353it [00:00, 137305.01it/s]\n"}
|
24 |
+
,{"stream_name":"stderr","time":298.394769736,"data":"\n"}
|
25 |
+
,{"stream_name":"stderr","time":298.496171909,"data":"\rReading metadata...: 0it [00:00, ?it/s]\u001b[A\n"}
|
26 |
+
,{"stream_name":"stderr","time":298.604225211,"data":"\rReading metadata...: 14119it [00:00, 141173.98it/s]\u001b[A\n"}
|
27 |
+
,{"stream_name":"stderr","time":298.708220034,"data":"\rReading metadata...: 28237it [00:00, 134681.19it/s]\u001b[A\n"}
|
28 |
+
,{"stream_name":"stderr","time":298.808317277,"data":"\rReading metadata...: 41726it [00:00, 132718.44it/s]\u001b[A\n"}
|
29 |
+
,{"stream_name":"stderr","time":298.90815864,"data":"\rReading metadata...: 55106it [00:00, 133130.71it/s]\u001b[A\n"}
|
30 |
+
,{"stream_name":"stderr","time":299.012041292,"data":"\rReading metadata...: 68773it [00:00, 134382.80it/s]\u001b[A\n"}
|
31 |
+
,{"stream_name":"stderr","time":299.117872665,"data":"\rReading metadata...: 82218it [00:00, 132626.22it/s]\u001b[A\n"}
|
32 |
+
,{"stream_name":"stderr","time":299.221650208,"data":"\rReading metadata...: 95488it [00:00, 130352.31it/s]\u001b[A\n"}
|
33 |
+
,{"stream_name":"stderr","time":299.32609062,"data":"\rReading metadata...: 108533it [00:00, 128713.86it/s]\u001b[A\n"}
|
34 |
+
,{"stream_name":"stderr","time":299.430928863,"data":"\rReading metadata...: 121412it [00:00, 126866.61it/s]\u001b[A\n"}
|
35 |
+
,{"stream_name":"stderr","time":299.530856126,"data":"\rReading metadata...: 134105it [00:01, 125081.83it/s]\u001b[A\n"}
|
36 |
+
,{"stream_name":"stderr","time":299.633776039,"data":"\rReading metadata...: 146905it [00:01, 125945.08it/s]\u001b[A\n"}
|
37 |
+
,{"stream_name":"stderr","time":299.734081722,"data":"\rReading metadata...: 159506it [00:01, 124890.05it/s]\u001b[A\n"}
|
38 |
+
,{"stream_name":"stderr","time":299.837771394,"data":"\rReading metadata...: 172505it [00:01, 126400.87it/s]\u001b[A\n"}
|
39 |
+
,{"stream_name":"stderr","time":299.939560147,"data":"\rReading metadata...: 185152it [00:01, 124997.06it/s]\u001b[A\n"}
|
40 |
+
,{"stream_name":"stderr","time":300.0414487,"data":"\rReading metadata...: 197658it [00:01, 124365.12it/s]\u001b[A\n"}
|
41 |
+
,{"stream_name":"stderr","time":300.139746573,"data":"\rReading metadata...: 210414it [00:01, 125308.06it/s]\u001b[A\n"}
|
42 |
+
,{"stream_name":"stderr","time":300.240470356,"data":"\rReading metadata...: 222996it [00:01, 125457.18it/s]\u001b[A\rReading metadata...: 225826it [00:01, 127471.88it/s]\n"}
|
43 |
+
,{"stream_name":"stderr","time":433.997174478,"data":"\n"}
|
44 |
+
,{"stream_name":"stderr","time":434.195508939,"data":"\rReading metadata...: 0it [00:00, ?it/s]\u001b[A\rReading metadata...: 6447it [00:00, 80059.80it/s]\n"}
|
45 |
+
,{"stream_name":"stdout","time":437.937668012,"data":"Dataset common_voice downloaded and prepared to /root/.cache/huggingface/datasets/mozilla-foundation___common_voice/bn/11.0.0/3f27acf10f303eac5b6fbbbe02495aeddb46ecffdb0a2fe3507fcfbf89094631. Subsequent calls will reuse this data.\n"}
|
46 |
+
,{"stream_name":"stdout","time":444.668977881,"data":"Split Name : train\n"}
|
47 |
+
,{"stream_name":"stdout","time":444.669015671,"data":"Total audios in commonvoice train: 16777\n"}
|
48 |
+
,{"stream_name":"stdout","time":444.669026061,"data":"Total audios in train : 16041\n"}
|
49 |
+
,{"stream_name":"stdout","time":444.669031571,"data":"Total audios in val : 0\n"}
|
50 |
+
,{"stream_name":"stdout","time":444.669035791,"data":"--------------------------------------------------------------------------------\n"}
|
51 |
+
,{"stream_name":"stdout","time":444.669039811,"data":"Split Name : test\n"}
|
52 |
+
,{"stream_name":"stdout","time":444.669044411,"data":"Total audios in commonvoice test: 8353\n"}
|
53 |
+
,{"stream_name":"stdout","time":444.669048451,"data":"Total audios in train : 7531\n"}
|
54 |
+
,{"stream_name":"stdout","time":444.669052721,"data":"Total audios in val : 0\n"}
|
55 |
+
,{"stream_name":"stdout","time":444.669056741,"data":"--------------------------------------------------------------------------------\n"}
|
56 |
+
,{"stream_name":"stdout","time":444.669060981,"data":"Split Name : validation\n"}
|
57 |
+
,{"stream_name":"stdout","time":444.669065171,"data":"Total audios in commonvoice validation: 8353\n"}
|
58 |
+
,{"stream_name":"stdout","time":444.669069191,"data":"Total audios in train : 7769\n"}
|
59 |
+
,{"stream_name":"stdout","time":444.669073661,"data":"Total audios in val : 0\n"}
|
60 |
+
,{"stream_name":"stdout","time":444.669077531,"data":"--------------------------------------------------------------------------------\n"}
|
61 |
+
,{"stream_name":"stdout","time":445.519689576,"data":"Split Name : other\n"}
|
62 |
+
,{"stream_name":"stdout","time":445.559222476,"data":"Total audios in commonvoice other: 225826\n"}
|
63 |
+
,{"stream_name":"stdout","time":445.559279306,"data":"Total audios in train : 224996\n"}
|
64 |
+
,{"stream_name":"stdout","time":445.559313656,"data":"Total audios in val : 0\n"}
|
65 |
+
,{"stream_name":"stdout","time":445.559319516,"data":"--------------------------------------------------------------------------------\n"}
|
66 |
+
,{"stream_name":"stdout","time":445.559323646,"data":"Split Name : invalidated\n"}
|
67 |
+
,{"stream_name":"stdout","time":445.559327506,"data":"Total audios in commonvoice invalidated: 6447\n"}
|
68 |
+
,{"stream_name":"stdout","time":445.559332256,"data":"Total audios in train : 5627\n"}
|
69 |
+
,{"stream_name":"stdout","time":445.559335886,"data":"Total audios in val : 0\n"}
|
70 |
+
,{"stream_name":"stdout","time":445.559339526,"data":"--------------------------------------------------------------------------------\n"}
|
71 |
+
,{"stream_name":"stdout","time":445.559342966,"data":"Total common voice audio :265756\n"}
|
72 |
+
,{"stream_name":"stdout","time":445.559346556,"data":" Audios present here : 261964\n"}
|
73 |
+
,{"stream_name":"stderr","time":445.560502156,"data":"\r 0%| | 0/5 [00:00\u003c?, ?it/s]\r 20%|██ | 1/5 [00:00\u003c00:00, 6.84it/s]\r 60%|██████ | 3/5 [00:00\u003c00:00, 10.79it/s]\r100%|██████████| 5/5 [00:01\u003c00:00, 2.58it/s]\r100%|██████████| 5/5 [00:01\u003c00:00, 3.07it/s]\n"}
|
74 |
+
,{"stream_name":"stderr","time":446.123441549,"data":"\r 0%| | 0/5 [00:00\u003c?, ?it/s]\r 80%|████████ | 4/5 [00:00\u003c00:00, 8.48it/s]\r100%|██████████| 5/5 [00:00\u003c00:00, 10.24it/s]\n"}
|
75 |
+
,{"stream_name":"stderr","time":447.040023925,"data":"\r0it [00:00, ?it/s]\r106171it [00:00, 1061592.05it/s]\r216964it [00:00, 1088775.74it/s]\r337425it [00:00, 1141613.17it/s]\r452967it [00:00, 1147016.53it/s]\r572951it [00:00, 1166016.06it/s]\r689553it [00:00, 1157771.60it/s]\r810008it [00:00, 1172950.29it/s]\r935257it [00:00, 1198143.09it/s]\r963636it [00:00, 1155136.54it/s]\n"}
|
76 |
+
,{"stream_name":"stdout","time":453.23643988,"data":"Sentence : এরা সবাই দাস হিসেবে একটি জাহাজে করে বিদেশে পাচার হচ্ছিল।\n"}
|
77 |
+
,{"stream_name":"stdout","time":453.23650391,"data":"Common Voice audio :\n"}
|
78 |
+
,{"stream_name":"stdout","time":453.432182231,"data":"Competition data audio : ae1be00ad59d.mp3\n"}
|
79 |
+
,{"stream_name":"stdout","time":453.783724884,"data":"--------------------------------------------------------------------------------\n"}
|
80 |
+
,{"stream_name":"stdout","time":453.783783484,"data":"--------------------------------------------------------------------------------\n"}
|
81 |
+
,{"stream_name":"stdout","time":453.783788414,"data":"Sentence : তিনি জানান এই কাজ সুভাষ দত্ত করবে এবং রহমানকে তার সহকারী হিসেবে যোগ দিতে বলেন।\n"}
|
82 |
+
,{"stream_name":"stdout","time":453.783794164,"data":"Common Voice audio :\n"}
|
83 |
+
,{"stream_name":"stdout","time":453.815708834,"data":"Competition data audio : 2bfa78215372.mp3\n"}
|
84 |
+
,{"stream_name":"stdout","time":454.228451986,"data":"--------------------------------------------------------------------------------\n"}
|
85 |
+
,{"stream_name":"stdout","time":454.228507256,"data":"--------------------------------------------------------------------------------\n"}
|
86 |
+
,{"stream_name":"stdout","time":454.228511766,"data":"Sentence : এটি দক্ষিণ মিশরের একটি শহর।\n"}
|
87 |
+
,{"stream_name":"stdout","time":454.228515496,"data":"Common Voice audio :\n"}
|
88 |
+
,{"stream_name":"stdout","time":454.250528576,"data":"Competition data audio : 98f9873ba235.mp3\n"}
|
89 |
+
,{"stream_name":"stdout","time":454.698182469,"data":"--------------------------------------------------------------------------------\n"}
|
90 |
+
,{"stream_name":"stdout","time":454.698266699,"data":"--------------------------------------------------------------------------------\n"}
|
91 |
+
,{"stream_name":"stdout","time":454.702478109,"data":"Sentence : বর্তমানে এদের আবাসস্থল হুমকির মুখে।\n"}
|
92 |
+
,{"stream_name":"stdout","time":454.702567559,"data":"Common Voice audio :\n"}
|
93 |
+
,{"stream_name":"stdout","time":454.90345134,"data":"Competition data audio : 5831fd3d7134.mp3\n"}
|
94 |
+
,{"stream_name":"stdout","time":455.111504631,"data":"--------------------------------------------------------------------------------\n"}
|
95 |
+
,{"stream_name":"stdout","time":455.111528311,"data":"--------------------------------------------------------------------------------\n"}
|
96 |
+
,{"stream_name":"stdout","time":455.111531851,"data":"Sentence : এটি একটি গুরুত্বপূর্ণ রেল বিভাগের জন্য পরিচিত।\n"}
|
97 |
+
,{"stream_name":"stdout","time":455.111535181,"data":"Common Voice audio :\n"}
|
98 |
+
,{"stream_name":"stdout","time":455.135717701,"data":"Competition data audio : ba8e9236358f.mp3\n"}
|
99 |
+
,{"stream_name":"stdout","time":455.327957002,"data":"--------------------------------------------------------------------------------\n"}
|
100 |
+
,{"stream_name":"stdout","time":455.328001362,"data":"--------------------------------------------------------------------------------\n"}
|
101 |
+
,{"stream_name":"stdout","time":457.842483597,"data":"Sentence : এটি সামগ্রিক পাঠক্রম, কোর্স, পরীক্ষা এবং ফলাফলগুলি নিয়ন্ত্রণ করে এবং অনুমোদন করে।\n"}
|
102 |
+
,{"stream_name":"stdout","time":457.842531877,"data":"Common Voice audio :\n"}
|
103 |
+
,{"stream_name":"stdout","time":458.040373948,"data":"Multiple audios in the competition dataset with the same sentence \n"}
|
104 |
+
,{"stream_name":"stdout","time":458.040398808,"data":"\n"}
|
105 |
+
,{"stream_name":"stdout","time":458.040401888,"data":"Competition data audio : 79660f8540b0.mp3\n"}
|
106 |
+
,{"stream_name":"stdout","time":458.302004169,"data":"Competition data audio : 88db4447d274.mp3\n"}
|
107 |
+
,{"stream_name":"stdout","time":458.532606861,"data":"Competition data audio : ed34fbd6cf0b.mp3\n"}
|
108 |
+
,{"stream_name":"stdout","time":458.770749963,"data":"--------------------------------------------------------------------------------\n"}
|
109 |
+
,{"stream_name":"stdout","time":458.770780352,"data":"--------------------------------------------------------------------------------\n"}
|
110 |
+
,{"stream_name":"stdout","time":464.378259715,"data":"Sentence : তাদের একটি ছেলে এবং একটি মেয়ে আছে।\n"}
|
111 |
+
,{"stream_name":"stdout","time":464.378324565,"data":"Common Voice audio :\n"}
|
112 |
+
,{"stream_name":"stdout","time":464.576431276,"data":"Multiple audios in the competition dataset with the same sentence \n"}
|
113 |
+
,{"stream_name":"stdout","time":464.576468386,"data":"\n"}
|
114 |
+
,{"stream_name":"stdout","time":464.576498526,"data":"Competition data audio : 30e710e39566.mp3\n"}
|
115 |
+
,{"stream_name":"stdout","time":464.811630637,"data":"Competition data audio : b4c61f0f5afd.mp3\n"}
|
116 |
+
,{"stream_name":"stdout","time":465.036936918,"data":"Competition data audio : e4cc57dcf517.mp3\n"}
|
117 |
+
,{"stream_name":"stdout","time":465.30257676,"data":"--------------------------------------------------------------------------------\n"}
|
118 |
+
,{"stream_name":"stdout","time":465.30260857,"data":"--------------------------------------------------------------------------------\n"}
|
119 |
+
,{"stream_name":"stdout","time":466.057549354,"data":"Sentence : শীতল জলবায়ুতে কাণ্ডের বেশিরভাগ বৃদ্ধি বসন্ত এবং গ্রীষ্মের শুরুতে ঘটে।\n"}
|
120 |
+
,{"stream_name":"stdout","time":466.057645774,"data":"Common Voice audio :\n"}
|
121 |
+
,{"stream_name":"stdout","time":466.255401516,"data":"Multiple audios in the competition dataset with the same sentence \n"}
|
122 |
+
,{"stream_name":"stdout","time":466.255426845,"data":"\n"}
|
123 |
+
,{"stream_name":"stdout","time":466.255431045,"data":"Competition data audio : 113cd642691c.mp3\n"}
|
124 |
+
,{"stream_name":"stdout","time":466.499374247,"data":"Competition data audio : 968e266863e8.mp3\n"}
|
125 |
+
,{"stream_name":"stdout","time":466.720088108,"data":"Competition data audio : b5764e70557d.mp3\n"}
|
126 |
+
,{"stream_name":"stdout","time":467.01409308,"data":"--------------------------------------------------------------------------------\n"}
|
127 |
+
,{"stream_name":"stdout","time":467.01413339,"data":"--------------------------------------------------------------------------------\n"}
|
128 |
+
,{"stream_name":"stdout","time":468.061828886,"data":"Sentence : ইউনিটটি বাংলাদেশে বৈজ্ঞানিক গবেষণা তহবিল গঠন করেছে।\n"}
|
129 |
+
,{"stream_name":"stdout","time":468.061865106,"data":"Common Voice audio :\n"}
|
130 |
+
,{"stream_name":"stdout","time":468.260425757,"data":"Multiple audios in the competition dataset with the same sentence \n"}
|
131 |
+
,{"stream_name":"stdout","time":468.260479287,"data":"\n"}
|
132 |
+
,{"stream_name":"stdout","time":468.260483737,"data":"Competition data audio : 30621db2115d.mp3\n"}
|
133 |
+
,{"stream_name":"stdout","time":468.491173878,"data":"Competition data audio : 578726cb78a4.mp3\n"}
|
134 |
+
,{"stream_name":"stdout","time":468.7056183,"data":"Competition data audio : ec9f81af5c0a.mp3\n"}
|
135 |
+
,{"stream_name":"stdout","time":468.71190234,"data":"--------------------------------------------------------------------------------\n"}
|
136 |
+
,{"stream_name":"stdout","time":468.71194852,"data":"--------------------------------------------------------------------------------\n"}
|
137 |
+
,{"stream_name":"stdout","time":471.298306515,"data":"Sentence : বর্তমানে জমিদার বাড়ির ভৌত কাঠামো সংরক্ষণের অভাবে নষ্ট হয়ে যাচ্ছে।\n"}
|
138 |
+
,{"stream_name":"stdout","time":471.298344275,"data":"Common Voice audio :\n"}
|
139 |
+
,{"stream_name":"stdout","time":471.496049196,"data":"Multiple audios in the competition dataset with the same sentence \n"}
|
140 |
+
,{"stream_name":"stdout","time":471.496102506,"data":"\n"}
|
141 |
+
,{"stream_name":"stdout","time":471.496109866,"data":"Competition data audio : 17f4979f652e.mp3\n"}
|
142 |
+
,{"stream_name":"stdout","time":471.730514617,"data":"Competition data audio : 87f243f631ea.mp3\n"}
|
143 |
+
,{"stream_name":"stdout","time":471.959682938,"data":"Competition data audio : 90f45aad66a0.mp3\n"}
|
144 |
+
,{"stream_name":"stdout","time":471.963997418,"data":"--------------------------------------------------------------------------------\n"}
|
145 |
+
,{"stream_name":"stdout","time":471.964014048,"data":"--------------------------------------------------------------------------------\n"}
|
146 |
+
,{"stream_name":"stdout","time":473.207362935,"data":"Audio in Common Voice dataset : \n"}
|
147 |
+
,{"stream_name":"stderr","time":479.415976081,"data":"/opt/conda/lib/python3.10/site-packages/traitlets/traitlets.py:2930: FutureWarning: --Exporter.preprocessors=[\"remove_papermill_header.RemovePapermillHeader\"] for containers is deprecated in traitlets 5.0. You can pass `--Exporter.preprocessors item` ... multiple times to add items to a list.\n"}
|
148 |
+
,{"stream_name":"stderr","time":479.416022431,"data":" warn(\n"}
|
149 |
+
,{"stream_name":"stderr","time":479.420470711,"data":"[NbConvertApp] WARNING | Config option `kernel_spec_manager_class` not recognized by `NbConvertApp`.\n"}
|
150 |
+
,{"stream_name":"stderr","time":479.445263511,"data":"[NbConvertApp] Converting notebook __notebook__.ipynb to notebook\n"}
|
151 |
+
,{"stream_name":"stderr","time":480.106029845,"data":"[NbConvertApp] Writing 9378981 bytes to __notebook__.ipynb\n"}
|
152 |
+
,{"stream_name":"stderr","time":481.686893084,"data":"/opt/conda/lib/python3.10/site-packages/traitlets/traitlets.py:2930: FutureWarning: --Exporter.preprocessors=[\"nbconvert.preprocessors.ExtractOutputPreprocessor\"] for containers is deprecated in traitlets 5.0. You can pass `--Exporter.preprocessors item` ... multiple times to add items to a list.\n"}
|
153 |
+
,{"stream_name":"stderr","time":481.687327044,"data":" warn(\n"}
|
154 |
+
,{"stream_name":"stderr","time":481.690677654,"data":"[NbConvertApp] WARNING | Config option `kernel_spec_manager_class` not recognized by `NbConvertApp`.\n"}
|
155 |
+
,{"stream_name":"stderr","time":481.722324954,"data":"[NbConvertApp] Converting notebook __notebook__.ipynb to html\n"}
|
156 |
+
,{"stream_name":"stderr","time":483.099897632,"data":"[NbConvertApp] Writing 9589984 bytes to __results__.html\n"}
|
157 |
+
]
|
filtered.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|
indexes.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|
kaggle.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"username":"nguynminhph","key":"cd06165eeba79f29a4db53f5d87eaf31"}
|
macro-normalization.log
ADDED
The diff for this file is too large to render.
See raw diff
|
|
normalized.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:06432a5dd7b5b27d38d9bbaebcd64aa85b79b85d0c4770ba66420bb63457fd24
|
3 |
+
size 297143462
|
preprocessor_config.json
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"do_normalize": true,
|
3 |
+
"feature_extractor_type": "Wav2Vec2FeatureExtractor",
|
4 |
+
"feature_size": 1,
|
5 |
+
"padding_side": "right",
|
6 |
+
"padding_value": 0.0,
|
7 |
+
"processor_class": "Wav2Vec2Processor",
|
8 |
+
"return_attention_mask": true,
|
9 |
+
"sampling_rate": 16000
|
10 |
+
}
|
python-packages2.zip
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c54f691d9a222bb3c61dc4db5574d209628f4e667be964cb8d50c1d7db11ef8f
|
3 |
+
size 17653201
|
pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0f0960f05cdfd4480f2339a52034944ac99249181b2f20278a9b26ee3950c577
|
3 |
+
size 3859313933
|
training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dd6572be7f0af6eabc0c935a13bbdbe6d745016ae2cdfc5b122b6a37750c92ac
|
3 |
+
size 3963
|