Spaces:
Sleeping
Sleeping
File size: 390 Bytes
ea507ec |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 |
import tiktoken
tiktoken_encoding = tiktoken.get_encoding("cl100k_base") # this used in gpt-4 amd 3.5-turbo
# old:
#.get_encoding("o200k_base") # this is used for gpt-4o apparently
vocab_size = tiktoken_encoding.n_vocab
print("vocab_size updated to",vocab_size)
def encode(text):
return tiktoken_encoding.encode(text)
def decode(tokens):
return tiktoken_encoding.decode(tokens)
|