Abhaykoul commited on
Commit
88b0992
1 Parent(s): c9fbd0a

Upload tokenizer

Browse files
added_tokens.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "<|endoftext|>": 151643,
3
+ "<|im_end|>": 151645,
4
+ "<|im_start|>": 151644
5
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
special_tokens_map.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<|im_start|>",
4
+ "<|im_end|>"
5
+ ],
6
+ "eos_token": {
7
+ "content": "<|im_end|>",
8
+ "lstrip": false,
9
+ "normalized": false,
10
+ "rstrip": false,
11
+ "single_word": false
12
+ },
13
+ "pad_token": {
14
+ "content": "<|endoftext|>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false
19
+ }
20
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "151643": {
5
+ "content": "<|endoftext|>",
6
+ "lstrip": false,
7
+ "normalized": false,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "151644": {
13
+ "content": "<|im_start|>",
14
+ "lstrip": false,
15
+ "normalized": false,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": true
19
+ },
20
+ "151645": {
21
+ "content": "<|im_end|>",
22
+ "lstrip": false,
23
+ "normalized": false,
24
+ "rstrip": false,
25
+ "single_word": false,
26
+ "special": true
27
+ }
28
+ },
29
+ "additional_special_tokens": [
30
+ "<|im_start|>",
31
+ "<|im_end|>"
32
+ ],
33
+ "bos_token": null,
34
+ "chat_template": "{% if messages[0]['role'] == 'system' %}<|im_start|>system\n{% if builtin_tools is defined or tools is not none %}Environment: ipython\n{% endif %}{% if builtin_tools is defined %}Tools: {{ builtin_tools | reject('equalto', 'code_interpreter') | join(\", \") }}\n{% endif %}Cutting Knowledge Date: December 2023\nToday Date: {{ date_string | default(\"26 Jul 2024\") }}\n\n{% if tools is not none and not tools_in_user_message %}You have access to the following functions. To call a function, please respond with JSON for a function call.\nRespond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.\nDo not use variables.\n\n{% for t in tools %}{{ t | tojson(indent=4) }}\n\n{% endfor %}{% endif %}{{ messages[0]['content']|trim }}<|im_end|>\n\n{% set messages = messages[1:] %}{% endif %}{% for message in messages %}{% if message.role == 'user' or message.role == 'assistant' %}<|im_start|>{{ message['role'] }}\n{{ message['content'] | trim }}<|im_end|>\n\n{% elif 'tool_calls' in message %}<|im_start|>assistant\n{% set tool_call = message.tool_calls[0].function %}{% if builtin_tools is defined and tool_call.name in builtin_tools %}<|python_tag|>{{ tool_call.name }}.call({% for arg_name, arg_val in tool_call.arguments | items %}{{ arg_name }}=\"{{ arg_val }}\"{% if not loop.last %}, {% endif %}{% endfor %}){% else %}{\"name\": \"{{ tool_call.name }}\", \"parameters\": {{ tool_call.arguments | tojson }}}{% endif %}<|im_end|>\n\n{% elif message.role == \"tool\" or message.role == \"ipython\" %}<|im_start|>ipython\n{% if message.content is mapping or message.content is iterable %}{{ message.content | tojson }}{% else %}{{ message.content }}{% endif %}<|im_end|>\n\n{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %}",
35
+ "clean_up_tokenization_spaces": false,
36
+ "eos_token": "<|im_end|>",
37
+ "errors": "replace",
38
+ "model_max_length": 32768,
39
+ "pad_token": "<|endoftext|>",
40
+ "padding_side": "left",
41
+ "split_special_tokens": false,
42
+ "tokenizer_class": "Qwen2Tokenizer",
43
+ "unk_token": null
44
+ }
vocab.json ADDED
The diff for this file is too large to render. See raw diff