mzbac commited on
Commit
ab4f102
1 Parent(s): 04f1d03

Update tokenization_yayi.py

Browse files
Files changed (1) hide show
  1. tokenization_yayi.py +3 -2
tokenization_yayi.py CHANGED
@@ -71,6 +71,8 @@ class YayiTokenizer(PreTrainedTokenizer):
71
  eos_token = AddedToken(eos_token, lstrip=False, rstrip=False) if isinstance(eos_token, str) else eos_token
72
  unk_token = AddedToken(unk_token, lstrip=False, rstrip=False) if isinstance(unk_token, str) else unk_token
73
  pad_token = AddedToken(pad_token, lstrip=False, rstrip=False) if isinstance(pad_token, str) else pad_token
 
 
74
  super().__init__(
75
  bos_token=bos_token,
76
  eos_token=eos_token,
@@ -85,8 +87,7 @@ class YayiTokenizer(PreTrainedTokenizer):
85
  self.vocab_file = vocab_file
86
  self.add_bos_token = add_bos_token
87
  self.add_eos_token = add_eos_token
88
- self.sp_model = spm.SentencePieceProcessor(**self.sp_model_kwargs)
89
- self.sp_model.Load(vocab_file)
90
 
91
  def __getstate__(self):
92
  state = self.__dict__.copy()
 
71
  eos_token = AddedToken(eos_token, lstrip=False, rstrip=False) if isinstance(eos_token, str) else eos_token
72
  unk_token = AddedToken(unk_token, lstrip=False, rstrip=False) if isinstance(unk_token, str) else unk_token
73
  pad_token = AddedToken(pad_token, lstrip=False, rstrip=False) if isinstance(pad_token, str) else pad_token
74
+ self.sp_model = spm.SentencePieceProcessor(**self.sp_model_kwargs)
75
+ self.sp_model.Load(vocab_file)
76
  super().__init__(
77
  bos_token=bos_token,
78
  eos_token=eos_token,
 
87
  self.vocab_file = vocab_file
88
  self.add_bos_token = add_bos_token
89
  self.add_eos_token = add_eos_token
90
+
 
91
 
92
  def __getstate__(self):
93
  state = self.__dict__.copy()