ibrim's picture
Upload 20 files
b347aa0 verified
# from minbpe import BasicTokenizer, RegexTokenizer
# tokenizer = RegexTokenizer()
# tokenizer.load("first.model")
# text_to_encode = "मुझसे क्या होगा अब"
# encoded_text = tokenizer.encode(text_to_encode)
# print("Encoded:", encoded_text) # Output: [258, 100, 258, 97, 99]
# # Print the tokenized text
# print("Tokenized Text:", encoded_text)
# # Decode text
# decoded_text = tokenizer.decode(encoded_text)
# print("Decoded:", decoded_text) # Output: "aaabdaaabac"
import gradio as gr
from minbpe import BasicTokenizer, RegexTokenizer
# Initialize the tokenizer
tokenizer = RegexTokenizer()
tokenizer.load("first.model")
# Define the encoding function
def encode_text(text):
encoded_text = tokenizer.encode(text)
return str(encoded_text)
# Define the decoding function
def decode_text(encoded_text):
encoded_list = list(map(int, encoded_text.strip('[]').split(',')))
decoded_text = tokenizer.decode(encoded_list)
return decoded_text
# Define the Gradio interface
def gradio_app():
with gr.Blocks() as demo:
gr.Markdown("# Text Encoder and Decoder")
with gr.Row():
with gr.Column():
text_input = gr.Textbox(label="Text to Encode")
encoded_output = gr.Textbox(label="Encoded Text", interactive=False)
encode_button = gr.Button("Encode")
def encode_handler(text):
return encode_text(text)
encode_button.click(fn=encode_handler, inputs=text_input, outputs=encoded_output)
with gr.Column():
encoded_input = gr.Textbox(label="Encoded Text")
decoded_output = gr.Textbox(label="Decoded Text", interactive=False)
decode_button = gr.Button("Decode")
def decode_handler(encoded_text):
return decode_text(encoded_text)
decode_button.click(fn=decode_handler, inputs=encoded_input, outputs=decoded_output)
return demo
# Launch the app
if __name__ == "__main__":
app = gradio_app()
app.launch()