Commit
•
582db72
1
Parent(s):
a473726
Update _decode method to accept integer element and convert it to sequence (#30)
Browse files- Update _decode method to accept integer element and convert it to sequence (154b41190c5d1ce10b4c8eea23a1080c2f7c81fe)
Co-authored-by: Ramkrithik S <[email protected]>
- tokenization_xgen.py +3 -1
tokenization_xgen.py
CHANGED
@@ -169,7 +169,9 @@ class XgenTokenizer(PreTrainedTokenizer):
|
|
169 |
"""Converts an index (integer) in a token (str) using the vocab."""
|
170 |
return self.encoder.decode_single_token_bytes(index).decode("utf-8")
|
171 |
|
172 |
-
def _decode(self, token_ids
|
|
|
|
|
173 |
if skip_special_tokens:
|
174 |
token_ids = [t for t in token_ids if t not in self.all_special_ids]
|
175 |
return self.encoder.decode(token_ids)
|
|
|
169 |
"""Converts an index (integer) in a token (str) using the vocab."""
|
170 |
return self.encoder.decode_single_token_bytes(index).decode("utf-8")
|
171 |
|
172 |
+
def _decode(self, token_ids, skip_special_tokens: bool = False, **kwargs):
|
173 |
+
if not isinstance(token_ids, list):
|
174 |
+
token_ids = [token_ids]
|
175 |
if skip_special_tokens:
|
176 |
token_ids = [t for t in token_ids if t not in self.all_special_ids]
|
177 |
return self.encoder.decode(token_ids)
|