Aeirya commited on
Commit
efb97c2
1 Parent(s): 0d6439a

Upload poem-char-tokenizer.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. poem-char-tokenizer.json +110 -0
poem-char-tokenizer.json ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "version": "1.0",
3
+ "truncation": null,
4
+ "padding": {
5
+ "strategy": "BatchLongest",
6
+ "direction": "Right",
7
+ "pad_to_multiple_of": null,
8
+ "pad_id": 0,
9
+ "pad_type_id": 0,
10
+ "pad_token": "[PAD]"
11
+ },
12
+ "added_tokens": [
13
+ {
14
+ "id": 0,
15
+ "content": "[PAD]",
16
+ "single_word": false,
17
+ "lstrip": false,
18
+ "rstrip": false,
19
+ "normalized": false,
20
+ "special": true
21
+ },
22
+ {
23
+ "id": 1,
24
+ "content": "[SOS]",
25
+ "single_word": false,
26
+ "lstrip": false,
27
+ "rstrip": false,
28
+ "normalized": false,
29
+ "special": true
30
+ },
31
+ {
32
+ "id": 2,
33
+ "content": "[EOS]",
34
+ "single_word": false,
35
+ "lstrip": false,
36
+ "rstrip": false,
37
+ "normalized": false,
38
+ "special": true
39
+ }
40
+ ],
41
+ "normalizer": null,
42
+ "pre_tokenizer": null,
43
+ "post_processor": null,
44
+ "decoder": null,
45
+ "model": {
46
+ "type": "BPE",
47
+ "dropout": null,
48
+ "unk_token": null,
49
+ "continuing_subword_prefix": null,
50
+ "end_of_word_suffix": null,
51
+ "fuse_unk": false,
52
+ "byte_fallback": false,
53
+ "vocab": {
54
+ "[PAD]": 0,
55
+ "[SOS]": 1,
56
+ "[EOS]": 2,
57
+ " ": 3,
58
+ "!": 4,
59
+ ":": 5,
60
+ "«": 6,
61
+ "»": 7,
62
+ "،": 8,
63
+ "؟": 9,
64
+ "آ": 10,
65
+ "أ": 11,
66
+ "ؤ": 12,
67
+ "ئ": 13,
68
+ "ا": 14,
69
+ "ب": 15,
70
+ "ت": 16,
71
+ "ث": 17,
72
+ "ج": 18,
73
+ "ح": 19,
74
+ "خ": 20,
75
+ "د": 21,
76
+ "ذ": 22,
77
+ "ر": 23,
78
+ "ز": 24,
79
+ "س": 25,
80
+ "ش": 26,
81
+ "ص": 27,
82
+ "ض": 28,
83
+ "ط": 29,
84
+ "ظ": 30,
85
+ "ع": 31,
86
+ "غ": 32,
87
+ "ـ": 33,
88
+ "ف": 34,
89
+ "ق": 35,
90
+ "ل": 36,
91
+ "م": 37,
92
+ "ن": 38,
93
+ "ه": 39,
94
+ "و": 40,
95
+ "َ": 41,
96
+ "ُ": 42,
97
+ "ِ": 43,
98
+ "ّ": 44,
99
+ "ٔ": 45,
100
+ "پ": 46,
101
+ "چ": 47,
102
+ "ژ": 48,
103
+ "ک": 49,
104
+ "گ": 50,
105
+ "ی": 51,
106
+ "‌": 52
107
+ },
108
+ "merges": []
109
+ }
110
+ }