x54-729 commited on
Commit
11a2f20
1 Parent(s): 43367a7

Fix InternLMTokenizer

Browse files
Files changed (1) hide show
  1. tokenization_internlm.py +6 -6
tokenization_internlm.py CHANGED
@@ -65,8 +65,13 @@ class InternLMTokenizer(PreTrainedTokenizer):
65
  **kwargs,
66
  ):
67
  self.sp_model_kwargs = {} if sp_model_kwargs is None else sp_model_kwargs
 
 
 
 
68
  self.sp_model = spm.SentencePieceProcessor(**self.sp_model_kwargs)
69
  self.sp_model.Load(vocab_file)
 
70
  super().__init__(
71
  bos_token=bos_token,
72
  eos_token=eos_token,
@@ -75,13 +80,8 @@ class InternLMTokenizer(PreTrainedTokenizer):
75
  clean_up_tokenization_spaces=clean_up_tokenization_spaces,
76
  **kwargs,
77
  )
78
- self.vocab_file = vocab_file
79
- self.add_bos_token = add_bos_token
80
- self.add_eos_token = add_eos_token
81
- self.decode_with_prefix_space = decode_with_prefix_space
82
- self._no_prefix_space_tokens = None
83
 
84
- """ Initialisation"""
85
 
86
  @property
87
  def no_prefix_space_tokens(self):
 
65
  **kwargs,
66
  ):
67
  self.sp_model_kwargs = {} if sp_model_kwargs is None else sp_model_kwargs
68
+ self.vocab_file = vocab_file
69
+ self.add_bos_token = add_bos_token
70
+ self.add_eos_token = add_eos_token
71
+ self.decode_with_prefix_space = decode_with_prefix_space
72
  self.sp_model = spm.SentencePieceProcessor(**self.sp_model_kwargs)
73
  self.sp_model.Load(vocab_file)
74
+ self._no_prefix_space_tokens = None
75
  super().__init__(
76
  bos_token=bos_token,
77
  eos_token=eos_token,
 
80
  clean_up_tokenization_spaces=clean_up_tokenization_spaces,
81
  **kwargs,
82
  )
 
 
 
 
 
83
 
84
+ """ Initialization"""
85
 
86
  @property
87
  def no_prefix_space_tokens(self):