{ "version": "1.0", "truncation": null, "padding": { "strategy": "BatchLongest", "direction": "Right", "pad_to_multiple_of": null, "pad_id": 0, "pad_type_id": 0, "pad_token": "[PAD]" }, "added_tokens": [ { "id": 0, "content": "[PAD]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 1, "content": "[SEP]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 2, "content": "[SOS]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 3, "content": "[EOS]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 4, "content": "[UNK]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true } ], "normalizer": null, "pre_tokenizer": { "type": "Whitespace" }, "post_processor": null, "decoder": null, "model": { "type": "BPE", "dropout": null, "unk_token": null, "continuing_subword_prefix": null, "end_of_word_suffix": null, "fuse_unk": false, "byte_fallback": false, "vocab": { "[PAD]": 0, "[SEP]": 1, "[SOS]": 2, "[EOS]": 3, "[UNK]": 4, "!": 5, ":": 6, "«": 7, "»": 8, "،": 9, "؟": 10, "آ": 11, "أ": 12, "ؤ": 13, "ئ": 14, "ا": 15, "ب": 16, "ت": 17, "ث": 18, "ج": 19, "ح": 20, "خ": 21, "د": 22, "ذ": 23, "ر": 24, "ز": 25, "س": 26, "ش": 27, "ص": 28, "ض": 29, "ط": 30, "ظ": 31, "ع": 32, "غ": 33, "ـ": 34, "ف": 35, "ق": 36, "ل": 37, "م": 38, "ن": 39, "ه": 40, "و": 41, "َ": 42, "ُ": 43, "ِ": 44, "ّ": 45, "ْ": 46, "ٔ": 47, "پ": 48, "چ": 49, "ژ": 50, "ک": 51, "گ": 52, "ی": 53, "‌": 54 }, "merges": [] } }