|
{ |
|
"add_prefix_space": false, |
|
"additional_special_tokens": [ |
|
"<|reg_extra|>", |
|
"<|endoftext|>", |
|
"<|fim_prefix|>", |
|
"<|fim_middle|>", |
|
"<|fim_suffix|>", |
|
"<|fim_pad|>", |
|
"<gh_stars>", |
|
"<filename>", |
|
"<issue_start>", |
|
"<issue_comment>", |
|
"<issue_closed>", |
|
"<jupyter_start>", |
|
"<jupyter_text>", |
|
"<jupyter_code>", |
|
"<jupyter_output>", |
|
"<empty_output>", |
|
"<commit_before>", |
|
"<commit_msg>", |
|
"<commit_after>", |
|
"<reponame>", |
|
"<|endofprompt|>", |
|
"<|im_start|>", |
|
"<|im_end|>", |
|
"<|pause|>", |
|
"<|reg0|>", |
|
"<|reg1|>", |
|
"<|reg2|>", |
|
"<|reg3|>", |
|
"<|reg4|>", |
|
"<|reg5|>", |
|
"<|reg6|>", |
|
"<|reg7|>", |
|
"<|extra0|>" |
|
], |
|
"bos_token": "<|endoftext|>", |
|
"chat_template": "{% for message in messages %}\n{% if message['role'] == 'user' %}\n{{ '<|user|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'system' %}\n{{ '<|system|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'assistant' %}\n{{ '<|assistant|>\n' + message['content'] + eos_token }}\n{% endif %}\n{% if loop.last and add_generation_prompt %}\n{{ '<|assistant|>' }}\n{% endif %}\n{% endfor %}", |
|
"clean_up_tokenization_spaces": true, |
|
"eos_token": "<|endoftext|>", |
|
"tokenizer_class": "GPT2TokenizerFast", |
|
"model_max_length": 4096, |
|
"pad_token": "<|endoftext|>", |
|
"unk_token": "<|endoftext|>" |
|
} |
|
|