File size: 390 Bytes
ea507ec
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
import tiktoken

tiktoken_encoding = tiktoken.get_encoding("cl100k_base") # this used in gpt-4 amd 3.5-turbo
# old:
#.get_encoding("o200k_base") # this is used for gpt-4o apparently
vocab_size = tiktoken_encoding.n_vocab
print("vocab_size updated to",vocab_size)

def encode(text):
    return tiktoken_encoding.encode(text)

def decode(tokens):
    return tiktoken_encoding.decode(tokens)