Commit
9715926
1 Parent(s): 3154b98

Upload files

Browse files
Files changed (2) hide show
  1. app.py +32 -0
  2. requirements.txt +2 -0
app.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import AutoTokenizer
3
+
4
+ # Define a function to tokenize text with a selected tokenizer
5
+ def tokenize_text(text, tokenizer_name):
6
+ tokenizer = AutoTokenizer.from_pretrained(tokenizer_name)
7
+ tokenized_text = tokenizer.tokenize(text)
8
+ input_ids = tokenizer.convert_tokens_to_ids(tokenized_text)
9
+ return f"Tokenized Text: {tokenized_text}\nInput IDs: {input_ids}"
10
+
11
+ # Define available tokenizers
12
+ tokenizer_names = [
13
+ "riotu-lab/ArabianGPT-01B",
14
+ "riotu-lab/ArabianGPT-03B",
15
+ "riotu-lab/ArabianGPT-08B",
16
+ # Add more tokenizers here
17
+ ]
18
+
19
+ # Create the Gradio interface
20
+ iface = gr.Interface(
21
+ fn=tokenize_text,
22
+ inputs=[
23
+ gr.Textbox(label="Enter Text"),
24
+ gr.Dropdown(choices=tokenizer_names, label="Select Tokenizer"),
25
+ ],
26
+ outputs="text",
27
+ title="Hugging Face Tokenizer Demo",
28
+ description="Try different tokenizers and see the tokenized form with input IDs.",
29
+ )
30
+
31
+ # Launch the app
32
+ iface.launch()
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ transformers
2
+ gradio