Spaces:
Running
Running
import gradio as gr | |
from transformers import AutoModelForSequenceClassification, AutoTokenizer | |
import torch | |
# Load the saved models and tokenizers | |
model_roberta = AutoModelForSequenceClassification.from_pretrained("./models/roberta-base-offensive") | |
tokenizer_roberta = AutoTokenizer.from_pretrained("./models/roberta-base-offensive") | |
model_distilbert = AutoModelForSequenceClassification.from_pretrained("./models/distilbert-base-uncased-offensive") | |
tokenizer_distilbert = AutoTokenizer.from_pretrained("./models/distilbert-base-uncased-offensive") | |
model_deberta = AutoModelForSequenceClassification.from_pretrained("./models/deberta-offensive") | |
tokenizer_deberta = AutoTokenizer.from_pretrained("./models/deberta-offensive") | |
model_bert = AutoModelForSequenceClassification.from_pretrained("./models/bert-offensive") | |
tokenizer_bert = AutoTokenizer.from_pretrained("./models/bert-offensive") | |
# Arabic saved Models and tokenizers | |
model_arbert = AutoModelForSequenceClassification.from_pretrained("./models/UBC-NLP/ARBERT") | |
tokenizer_arbert = AutoTokenizer.from_pretrained("./models/UBC-NLP/ARBERT") | |
model_marbert = AutoModelForSequenceClassification.from_pretrained("./models/UBC-NLP/MARBERT") | |
tokenizer_marbert = AutoTokenizer.from_pretrained("./models/UBC-NLP/MARBERT") | |
def predict(tweet, model_choice): | |
if model_choice == "RoBERTa": | |
model = model_roberta | |
tokenizer = tokenizer_roberta | |
elif model_choice == "DistilBERT": | |
model = model_distilbert | |
tokenizer = tokenizer_distilbert | |
elif model_choice == "ARBERT": | |
model = model_arbert | |
tokenizer = tokenizer_arbert | |
elif model_choice == "MARBERT": | |
model = model_marbert | |
tokenizer = tokenizer_marbert | |
elif model_choice == "DeBERTa": | |
model = model_deberta | |
tokenizer = tokenizer_deberta | |
elif model_choice == "BERT": | |
model = model_bert | |
tokenizer = tokenizer_bert | |
else: | |
return "Model not selected", "Please select a model." | |
encoded_input = tokenizer.encode(tweet, return_tensors='pt', truncation=True, max_length=512, padding=True) | |
with torch.no_grad(): | |
output = model(encoded_input) | |
logits = output.logits | |
probabilities = torch.softmax(logits, dim=-1) | |
prediction_index = probabilities.argmax().item() | |
prediction_map = {0: "Not Offensive", 1: "Offensive"} | |
prediction = prediction_map[prediction_index] | |
confidence = probabilities[0, prediction_index].item() | |
return prediction, f"Confidence: {confidence:.4f}" | |
def app_interface(): | |
with gr.Blocks() as app: | |
gr.Markdown("## Offensive Language Detection") | |
gr.Markdown("### Instructions:") | |
gr.Markdown("1. Select the language of the text.\n2. Choose a model corresponding to the selected language:\n - For **English**: BERT, DeBERTa, RoBERTa, or DistilBERT\n - For **Tunisian Arabic**: ARBERT or MARBERT") | |
with gr.Row(): | |
language = gr.Radio(["English", "Tunisian Arabic"], label="Choose Language") | |
with gr.Row(): | |
model_choice = gr.Dropdown(["RoBERTa", "DistilBERT", "ARBERT", "MARBERT", "DeBERTa", "BERT"], label="Choose Model") | |
with gr.Row(): | |
tweet = gr.Textbox(lines=4, placeholder="Enter your text here...", label="Text") | |
submit_btn = gr.Button("Predict") | |
with gr.Row(): | |
prediction = gr.Textbox(label="Prediction") | |
confidence = gr.Textbox(label="Confidence") | |
submit_btn.click(fn=predict, inputs=[tweet, model_choice], outputs=[prediction, confidence]) | |
return app | |
app = app_interface() | |
app.launch() | |