an error when trying to infer in Chinese

#8
by mario479 - opened

I get an error when trying to infer in Chinese (can successfully use English), which seems to be the tokenizer.
The details of the error report are as follows:

[rank0]:     outputs = self.__model.chat(self.history, sampling_params)
[rank0]:   File "/usr/local/lib/python3.9/dist-packages/vllm/entrypoints/llm.py", line 580, in chat
[rank0]:     return self.generate(
[rank0]:   File "/usr/local/lib/python3.9/dist-packages/vllm/utils.py", line 1047, in inner
[rank0]:     return fn(*args, **kwargs)
[rank0]:   File "/usr/local/lib/python3.9/dist-packages/vllm/entrypoints/llm.py", line 388, in generate
[rank0]:     outputs = self._run_engine(use_tqdm=use_tqdm)
[rank0]:   File "/usr/local/lib/python3.9/dist-packages/vllm/entrypoints/llm.py", line 877, in _run_engine
[rank0]:     step_outputs = self.llm_engine.step()
[rank0]:   File "/usr/local/lib/python3.9/dist-packages/vllm/engine/llm_engine.py", line 1306, in step
[rank0]:     self._process_model_outputs(ctx=ctx)
[rank0]:   File "/usr/local/lib/python3.9/dist-packages/vllm/engine/llm_engine.py", line 1014, in _process_model_outputs
[rank0]:     self.output_processor.process_outputs(
[rank0]:   File "/usr/local/lib/python3.9/dist-packages/vllm/engine/output_processor/single_step.py", line 96, in process_outputs
[rank0]:     return self._process_sequence_group_outputs(sequence_group, outputs[0],
[rank0]:   File "/usr/local/lib/python3.9/dist-packages/vllm/engine/output_processor/single_step.py", line 124, in _process_sequence_group_outputs
[rank0]:     new_char_count = self.detokenizer.decode_sequence_inplace(
[rank0]:   File "/usr/local/lib/python3.9/dist-packages/vllm/transformers_utils/detokenizer.py", line 115, in decode_sequence_inplace
[rank0]:     seq.read_offset) = convert_prompt_ids_to_tokens(
[rank0]:   File "/usr/local/lib/python3.9/dist-packages/vllm/transformers_utils/detokenizer.py", line 224, in convert_prompt_ids_to_tokens
[rank0]:     new_tokens = tokenizer.convert_ids_to_tokens(
[rank0]:   File "/usr/local/lib/python3.9/dist-packages/vllm/transformers_utils/tokenizers/mistral.py", line 227, in convert_ids_to_tokens
[rank0]:     tokens = [self.tokenizer.id_to_byte_piece(id) for id in ids]
[rank0]:   File "/usr/local/lib/python3.9/dist-packages/vllm/transformers_utils/tokenizers/mistral.py", line 227, in <listcomp>
[rank0]:     tokens = [self.tokenizer.id_to_byte_piece(id) for id in ids]
[rank0]:   File "/usr/local/lib/python3.9/dist-packages/mistral_common/tokens/tokenizers/tekken.py", line 280, in id_to_byte_piece
[rank0]:     return self._model.decode_single_token_bytes(token_id - self.num_special_tokens)
[rank0]:   File "/usr/local/lib/python3.9/dist-packages/tiktoken/core.py", line 272, in decode_single_token_bytes
[rank0]:     return self._core_bpe.decode_single_token_bytes(token)
[rank0]: OverflowError: out of range integral type conversion attempted

I have the same problem, my vllm version is 0.6.3.

Sign up or log in to comment