Salesforce
/

xgen-7b-8k-base

Text Generation

text-generation-inference

Inference Endpoints

Model card Files Files and versions Community

_convert_id_to_token

#12

by tianxie-sf - opened Jul 2, 2023

base: refs/heads/main

←

from: refs/pr/12

Discussion Files changed

Files changed (1) hide show

tokenization_xgen.py +2 -2

tokenization_xgen.py CHANGED Viewed

@@ -155,7 +155,7 @@ class XgenTokenizer(PreTrainedTokenizer):
     def _convert_id_to_token(self, index):
         """Converts an index (integer) in a token (str) using the vocab."""
-        return self.encoder.decode_single_token_bytes(index)
     def _decode(self, token_ids: List[int], skip_special_tokens: bool = False, **kwargs):
         if skip_special_tokens:
@@ -231,4 +231,4 @@ class XgenTokenizer(PreTrainedTokenizer):
     # has no vocab file
     def save_vocabulary(self, save_directory: str, filename_prefix: Optional[str] = None):
-        return ()

     def _convert_id_to_token(self, index):
         """Converts an index (integer) in a token (str) using the vocab."""
+        return self.encoder.decode_single_token_bytes(index).decode("utf-8")
     def _decode(self, token_ids: List[int], skip_special_tokens: bool = False, **kwargs):
         if skip_special_tokens:
     # has no vocab file
     def save_vocabulary(self, save_directory: str, filename_prefix: Optional[str] = None):
+        return ()