File size: 1,802 Bytes
370d26b 54b8211 370d26b dbc5dc8 54b8211 370d26b 54b8211 370d26b 54b8211 370d26b 562d414 370d26b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 |
name: whisper_bpe_tokenizer
config_type: preprocessor
max_length: 448
truncation_strategy: longest_first
truncation_direction: right
stride: 0
padding_strategy: longest
padding_direction: right
pad_to_multiple_of: 0
pad_token_type_id: 0
bos_token: <|startoftranscript|>
eos_token: <|endoftext|>
unk_token: <|endoftext|>
pad_token: <|endoftext|>
additional_special_tokens:
- <|endoftext|>
- <|endoftext|>
- <|startoftranscript|>
- <|en|>
- <|zh|>
- <|de|>
- <|es|>
- <|ru|>
- <|ko|>
- <|fr|>
- <|ja|>
- <|pt|>
- <|tr|>
- <|pl|>
- <|ca|>
- <|nl|>
- <|ar|>
- <|sv|>
- <|it|>
- <|id|>
- <|hi|>
- <|fi|>
- <|vi|>
- <|he|>
- <|uk|>
- <|el|>
- <|ms|>
- <|cs|>
- <|ro|>
- <|da|>
- <|hu|>
- <|ta|>
- <|no|>
- <|th|>
- <|ur|>
- <|hr|>
- <|bg|>
- <|lt|>
- <|la|>
- <|mi|>
- <|ml|>
- <|cy|>
- <|sk|>
- <|te|>
- <|fa|>
- <|lv|>
- <|bn|>
- <|sr|>
- <|az|>
- <|sl|>
- <|kn|>
- <|et|>
- <|mk|>
- <|br|>
- <|eu|>
- <|is|>
- <|hy|>
- <|ne|>
- <|mn|>
- <|bs|>
- <|kk|>
- <|sq|>
- <|sw|>
- <|gl|>
- <|mr|>
- <|pa|>
- <|si|>
- <|km|>
- <|sn|>
- <|yo|>
- <|so|>
- <|af|>
- <|oc|>
- <|ka|>
- <|be|>
- <|tg|>
- <|sd|>
- <|gu|>
- <|am|>
- <|yi|>
- <|lo|>
- <|uz|>
- <|fo|>
- <|ht|>
- <|ps|>
- <|tk|>
- <|nn|>
- <|mt|>
- <|sa|>
- <|lb|>
- <|my|>
- <|bo|>
- <|tl|>
- <|mg|>
- <|as|>
- <|tt|>
- <|haw|>
- <|ln|>
- <|ha|>
- <|ba|>
- <|jw|>
- <|su|>
- <|translate|>
- <|transcribe|>
- <|startoflm|>
- <|startofprev|>
- <|nocaptions|>
- <|notimestamps|>
continuing_subword_prefix: ''
end_of_word_suffix: ''
fuse_unk: false
vocab_size: 50364
min_frequency: 2
limit_alphabet: 1000
initial_alphabet: []
show_progress: true
translate_token: <|translate|>
transcribe_token: <|transcribe|>
notimestamps_token: <|notimestamps|>
add_prefix_space: false
add_bos_token: false
model_max_length: 1024
language: persian
predict_timestamps: false
|