huseinzol05 commited on
Commit
43e32db
1 Parent(s): 2b2582c

Upload tokenizer

Browse files
Files changed (2) hide show
  1. special_tokens_map.json +4 -28
  2. tokenizer_config.json +0 -4
special_tokens_map.json CHANGED
@@ -5,32 +5,8 @@
5
  "</s>",
6
  "<unk>"
7
  ],
8
- "bos_token": {
9
- "content": "<s>",
10
- "lstrip": false,
11
- "normalized": false,
12
- "rstrip": false,
13
- "single_word": false
14
- },
15
- "eos_token": {
16
- "content": "</s>",
17
- "lstrip": false,
18
- "normalized": false,
19
- "rstrip": false,
20
- "single_word": false
21
- },
22
- "pad_token": {
23
- "content": "<pad>",
24
- "lstrip": false,
25
- "normalized": false,
26
- "rstrip": false,
27
- "single_word": false
28
- },
29
- "unk_token": {
30
- "content": "<unk>",
31
- "lstrip": false,
32
- "normalized": false,
33
- "rstrip": false,
34
- "single_word": false
35
- }
36
  }
 
5
  "</s>",
6
  "<unk>"
7
  ],
8
+ "bos_token": "<s>",
9
+ "eos_token": "</s>",
10
+ "pad_token": "<pad>",
11
+ "unk_token": "<unk>"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  }
tokenizer_config.json CHANGED
@@ -42,10 +42,6 @@
42
  "bos_token": "<s>",
43
  "clean_up_tokenization_spaces": true,
44
  "eos_token": "</s>",
45
- "model_input_names": [
46
- "input_ids",
47
- "attention_mask"
48
- ],
49
  "model_max_length": 1000000000000000019884624838656,
50
  "pad_token": "<pad>",
51
  "tokenizer_class": "PreTrainedTokenizerFast",
 
42
  "bos_token": "<s>",
43
  "clean_up_tokenization_spaces": true,
44
  "eos_token": "</s>",
 
 
 
 
45
  "model_max_length": 1000000000000000019884624838656,
46
  "pad_token": "<pad>",
47
  "tokenizer_class": "PreTrainedTokenizerFast",