whisper-small / model_config.yaml
arxyzan's picture
Hezar: Upload model_config.yaml
c21d951
name: whisper_speech_recognition
config_type: model
vocab_size: 51865
num_mel_bins: 80
encoder_layers: 12
encoder_attention_heads: 12
decoder_layers: 12
decoder_attention_heads: 12
num_hidden_layers: 12
decoder_ffn_dim: 3072
encoder_ffn_dim: 3072
encoder_layerdrop: 0.0
decoder_layerdrop: 0.0
decoder_start_token_id: 50258
use_cache: true
sampling_rate: 16000
is_encoder_decoder: true
activation_function: gelu
d_model: 768
dropout: 0.0
torch_dtype: float32
attention_dropout: 0.0
activation_dropout: 0.0
init_std: 0.02
scale_embedding: false
max_source_positions: 1500
max_target_positions: 448
pad_token_id: 50257
bos_token_id: 50257
eos_token_id: 50257
suppress_tokens:
- 1
- 2
- 7
- 8
- 9
- 10
- 14
- 25
- 26
- 27
- 28
- 29
- 31
- 58
- 59
- 60
- 61
- 62
- 63
- 90
- 91
- 92
- 93
- 359
- 503
- 522
- 542
- 873
- 893
- 902
- 918
- 922
- 931
- 1350
- 1853
- 1982
- 2460
- 2627
- 3246
- 3253
- 3268
- 3536
- 3846
- 3961
- 4183
- 4667
- 6585
- 6647
- 7273
- 9061
- 9383
- 10428
- 10929
- 11938
- 12033
- 12331
- 12562
- 13793
- 14157
- 14635
- 15265
- 15618
- 16553
- 16604
- 18362
- 18956
- 20075
- 21675
- 22520
- 26130
- 26161
- 26435
- 28279
- 29464
- 31650
- 32302
- 32470
- 36865
- 42863
- 47425
- 49870
- 50254
- 50258
- 50360
- 50361
- 50362
begin_suppress_tokens:
- 220
- 50256
use_weighted_layer_sum: false
classifier_proj_size: 256
apply_spec_augment: false
mask_time_prob: 0.05
mask_time_length: 10
mask_time_min_masks: 2
mask_feature_prob: 0.0
mask_feature_length: 10
mask_feature_min_masks: 0
max_new_tokens: 448