Hezar: Upload model_config.yaml
Browse files- model_config.yaml +130 -0
model_config.yaml
ADDED
@@ -0,0 +1,130 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
name: whisper_speech_recognition
|
2 |
+
config_type: model
|
3 |
+
vocab_size: 51865
|
4 |
+
num_mel_bins: 80
|
5 |
+
encoder_layers: 12
|
6 |
+
encoder_attention_heads: 12
|
7 |
+
decoder_layers: 12
|
8 |
+
decoder_attention_heads: 12
|
9 |
+
num_hidden_layers: 12
|
10 |
+
decoder_ffn_dim: 3072
|
11 |
+
encoder_ffn_dim: 3072
|
12 |
+
encoder_layerdrop: 0.0
|
13 |
+
decoder_layerdrop: 0.0
|
14 |
+
decoder_start_token_id: 50258
|
15 |
+
use_cache: true
|
16 |
+
sampling_rate: 16000
|
17 |
+
is_encoder_decoder: true
|
18 |
+
activation_function: gelu
|
19 |
+
d_model: 768
|
20 |
+
dropout: 0.0
|
21 |
+
torch_dtype: float32
|
22 |
+
attention_dropout: 0.0
|
23 |
+
activation_dropout: 0.0
|
24 |
+
init_std: 0.02
|
25 |
+
scale_embedding: false
|
26 |
+
max_source_positions: 1500
|
27 |
+
max_target_positions: 448
|
28 |
+
pad_token_id: 50257
|
29 |
+
bos_token_id: 50257
|
30 |
+
eos_token_id: 50257
|
31 |
+
suppress_tokens:
|
32 |
+
- 1
|
33 |
+
- 2
|
34 |
+
- 7
|
35 |
+
- 8
|
36 |
+
- 9
|
37 |
+
- 10
|
38 |
+
- 14
|
39 |
+
- 25
|
40 |
+
- 26
|
41 |
+
- 27
|
42 |
+
- 28
|
43 |
+
- 29
|
44 |
+
- 31
|
45 |
+
- 58
|
46 |
+
- 59
|
47 |
+
- 60
|
48 |
+
- 61
|
49 |
+
- 62
|
50 |
+
- 63
|
51 |
+
- 90
|
52 |
+
- 91
|
53 |
+
- 92
|
54 |
+
- 93
|
55 |
+
- 359
|
56 |
+
- 503
|
57 |
+
- 522
|
58 |
+
- 542
|
59 |
+
- 873
|
60 |
+
- 893
|
61 |
+
- 902
|
62 |
+
- 918
|
63 |
+
- 922
|
64 |
+
- 931
|
65 |
+
- 1350
|
66 |
+
- 1853
|
67 |
+
- 1982
|
68 |
+
- 2460
|
69 |
+
- 2627
|
70 |
+
- 3246
|
71 |
+
- 3253
|
72 |
+
- 3268
|
73 |
+
- 3536
|
74 |
+
- 3846
|
75 |
+
- 3961
|
76 |
+
- 4183
|
77 |
+
- 4667
|
78 |
+
- 6585
|
79 |
+
- 6647
|
80 |
+
- 7273
|
81 |
+
- 9061
|
82 |
+
- 9383
|
83 |
+
- 10428
|
84 |
+
- 10929
|
85 |
+
- 11938
|
86 |
+
- 12033
|
87 |
+
- 12331
|
88 |
+
- 12562
|
89 |
+
- 13793
|
90 |
+
- 14157
|
91 |
+
- 14635
|
92 |
+
- 15265
|
93 |
+
- 15618
|
94 |
+
- 16553
|
95 |
+
- 16604
|
96 |
+
- 18362
|
97 |
+
- 18956
|
98 |
+
- 20075
|
99 |
+
- 21675
|
100 |
+
- 22520
|
101 |
+
- 26130
|
102 |
+
- 26161
|
103 |
+
- 26435
|
104 |
+
- 28279
|
105 |
+
- 29464
|
106 |
+
- 31650
|
107 |
+
- 32302
|
108 |
+
- 32470
|
109 |
+
- 36865
|
110 |
+
- 42863
|
111 |
+
- 47425
|
112 |
+
- 49870
|
113 |
+
- 50254
|
114 |
+
- 50258
|
115 |
+
- 50360
|
116 |
+
- 50361
|
117 |
+
- 50362
|
118 |
+
begin_suppress_tokens:
|
119 |
+
- 220
|
120 |
+
- 50256
|
121 |
+
use_weighted_layer_sum: false
|
122 |
+
classifier_proj_size: 256
|
123 |
+
apply_spec_augment: false
|
124 |
+
mask_time_prob: 0.05
|
125 |
+
mask_time_length: 10
|
126 |
+
mask_time_min_masks: 2
|
127 |
+
mask_feature_prob: 0.0
|
128 |
+
mask_feature_length: 10
|
129 |
+
mask_feature_min_masks: 0
|
130 |
+
max_new_tokens: 448
|