{ "version": "1.0", "truncation": null, "padding": { "strategy": "BatchLongest", "direction": "Right", "pad_to_multiple_of": null, "pad_id": 0, "pad_type_id": 0, "pad_token": "[PAD]" }, "added_tokens": [ { "id": 0, "content": "[PAD]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 1, "content": "[SOS]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 2, "content": "[EOS]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true } ], "normalizer": null, "pre_tokenizer": null, "post_processor": null, "decoder": null, "model": { "type": "BPE", "dropout": null, "unk_token": null, "continuing_subword_prefix": null, "end_of_word_suffix": null, "fuse_unk": false, "byte_fallback": false, "vocab": { "[PAD]": 0, "[SOS]": 1, "[EOS]": 2, " ": 3, "!": 4, ":": 5, "«": 6, "»": 7, "،": 8, "؟": 9, "آ": 10, "أ": 11, "ؤ": 12, "ئ": 13, "ا": 14, "ب": 15, "ت": 16, "ث": 17, "ج": 18, "ح": 19, "خ": 20, "د": 21, "ذ": 22, "ر": 23, "ز": 24, "س": 25, "ش": 26, "ص": 27, "ض": 28, "ط": 29, "ظ": 30, "ع": 31, "غ": 32, "ـ": 33, "ف": 34, "ق": 35, "ل": 36, "م": 37, "ن": 38, "ه": 39, "و": 40, "َ": 41, "ُ": 42, "ِ": 43, "ّ": 44, "ٔ": 45, "پ": 46, "چ": 47, "ژ": 48, "ک": 49, "گ": 50, "ی": 51, "‌": 52 }, "merges": [] } }