Spaces:
Running
Running
# from minbpe import BasicTokenizer, RegexTokenizer | |
# tokenizer = RegexTokenizer() | |
# tokenizer.load("first.model") | |
# text_to_encode = "मुझसे क्या होगा अब" | |
# encoded_text = tokenizer.encode(text_to_encode) | |
# print("Encoded:", encoded_text) # Output: [258, 100, 258, 97, 99] | |
# # Print the tokenized text | |
# print("Tokenized Text:", encoded_text) | |
# # Decode text | |
# decoded_text = tokenizer.decode(encoded_text) | |
# print("Decoded:", decoded_text) # Output: "aaabdaaabac" | |
import gradio as gr | |
from minbpe import BasicTokenizer, RegexTokenizer | |
# Initialize the tokenizer | |
tokenizer = RegexTokenizer() | |
tokenizer.load("first.model") | |
# Define the encoding function | |
def encode_text(text): | |
encoded_text = tokenizer.encode(text) | |
return str(encoded_text) | |
# Define the decoding function | |
def decode_text(encoded_text): | |
encoded_list = list(map(int, encoded_text.strip('[]').split(','))) | |
decoded_text = tokenizer.decode(encoded_list) | |
return decoded_text | |
# Define the Gradio interface | |
def gradio_app(): | |
with gr.Blocks() as demo: | |
gr.Markdown("# Text Encoder and Decoder") | |
with gr.Row(): | |
with gr.Column(): | |
text_input = gr.Textbox(label="Text to Encode") | |
encoded_output = gr.Textbox(label="Encoded Text", interactive=False) | |
encode_button = gr.Button("Encode") | |
def encode_handler(text): | |
return encode_text(text) | |
encode_button.click(fn=encode_handler, inputs=text_input, outputs=encoded_output) | |
with gr.Column(): | |
encoded_input = gr.Textbox(label="Encoded Text") | |
decoded_output = gr.Textbox(label="Decoded Text", interactive=False) | |
decode_button = gr.Button("Decode") | |
def decode_handler(encoded_text): | |
return decode_text(encoded_text) | |
decode_button.click(fn=decode_handler, inputs=encoded_input, outputs=decoded_output) | |
return demo | |
# Launch the app | |
if __name__ == "__main__": | |
app = gradio_app() | |
app.launch() | |