rAIfle commited on
Commit
57858dc
1 Parent(s): 25697b0

Upload tokenizer

Browse files
Files changed (1) hide show
  1. tokenizer_config.json +2 -2
tokenizer_config.json CHANGED
@@ -2050,14 +2050,14 @@
2050
  }
2051
  },
2052
  "bos_token": "<|begin_of_text|>",
2053
- "chat_template": "{% if messages[0]['from'] == 'system' %}{% if messages[1]['from'] == 'human' %}{{ bos_token + '[INST] <<SYS>>\n' + messages[0]['value'] + '\n<</SYS>>\n\n' + messages[1]['value'] + ' [/INST]' }}{% set loop_messages = messages[2:] %}{% else %}{{ bos_token + '[INST] ' + messages[0]['value'] + ' [/INST]' }}{% set loop_messages = messages[1:] %}{% endif %}{% else %}{% set loop_messages = messages %}{% endif %}{% for message in loop_messages %}{% if message['from'] == 'human' %}{{ bos_token + '[INST] ' + message['value'].strip() + ' [/INST]' }}{% elif message['from'] == 'gpt' %}{{ ' ' + message['value'].strip() + ' ' + eos_token }}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}",
2054
  "clean_up_tokenization_spaces": true,
2055
  "eos_token": "<|end_of_text|>",
2056
  "model_input_names": [
2057
  "input_ids",
2058
  "attention_mask"
2059
  ],
2060
- "model_max_length": 8192,
2061
  "pad_token": "<|end_of_text|>",
2062
  "padding_side": "left",
2063
  "tokenizer_class": "PreTrainedTokenizerFast"
 
2050
  }
2051
  },
2052
  "bos_token": "<|begin_of_text|>",
2053
+ "chat_template": "{{ bos_token }}{% if messages[0]['from'] == 'system' %}{{ messages[0]['value'] + ' ' }}{% set loop_messages = messages[1:] %}{% else %}{{ 'A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user\\'s questions.' + ' ' }}{% set loop_messages = messages %}{% endif %}{% for message in loop_messages %}{% if message['from'] == 'human' %}{{ 'USER: ' + message['value'] + ' ' }}{% elif message['from'] == 'gpt' %}{{ 'ASSISTANT: ' + message['value'] + eos_token }}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ 'ASSISTANT:' }}{% endif %}",
2054
  "clean_up_tokenization_spaces": true,
2055
  "eos_token": "<|end_of_text|>",
2056
  "model_input_names": [
2057
  "input_ids",
2058
  "attention_mask"
2059
  ],
2060
+ "model_max_length": 16384,
2061
  "pad_token": "<|end_of_text|>",
2062
  "padding_side": "left",
2063
  "tokenizer_class": "PreTrainedTokenizerFast"