yujiepan commited on
Commit
7377069
1 Parent(s): 3b5961d

Upload tokenizer

Browse files
Files changed (3) hide show
  1. README.md +2 -3
  2. tokenizer.json +1 -0
  3. tokenizer_config.json +3 -0
README.md CHANGED
@@ -1,12 +1,11 @@
1
-
2
  ---
 
3
  pipeline_tag: text-generation
4
  inference: true
5
  widget:
6
- - text: 'Hello!'
7
  example_title: Hello world
8
  group: Python
9
- library_name: transformers
10
  ---
11
 
12
  This model is randomly initialized, using the config from [mistralai/Mistral-7B-v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1) but with smaller size.
 
 
1
  ---
2
+ library_name: transformers
3
  pipeline_tag: text-generation
4
  inference: true
5
  widget:
6
+ - text: Hello!
7
  example_title: Hello world
8
  group: Python
 
9
  ---
10
 
11
  This model is randomly initialized, using the config from [mistralai/Mistral-7B-v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1) but with smaller size.
tokenizer.json CHANGED
@@ -134,6 +134,7 @@
134
  "end_of_word_suffix": null,
135
  "fuse_unk": true,
136
  "byte_fallback": true,
 
137
  "vocab": {
138
  "<unk>": 0,
139
  "<s>": 1,
 
134
  "end_of_word_suffix": null,
135
  "fuse_unk": true,
136
  "byte_fallback": true,
137
+ "ignore_merges": false,
138
  "vocab": {
139
  "<unk>": 0,
140
  "<s>": 1,
tokenizer_config.json CHANGED
@@ -1,4 +1,6 @@
1
  {
 
 
2
  "added_tokens_decoder": {
3
  "0": {
4
  "content": "<unk>",
@@ -27,6 +29,7 @@
27
  },
28
  "additional_special_tokens": [],
29
  "bos_token": "<s>",
 
30
  "clean_up_tokenization_spaces": false,
31
  "eos_token": "</s>",
32
  "legacy": true,
 
1
  {
2
+ "add_bos_token": true,
3
+ "add_eos_token": false,
4
  "added_tokens_decoder": {
5
  "0": {
6
  "content": "<unk>",
 
29
  },
30
  "additional_special_tokens": [],
31
  "bos_token": "<s>",
32
+ "chat_template": "{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token}}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}",
33
  "clean_up_tokenization_spaces": false,
34
  "eos_token": "</s>",
35
  "legacy": true,