File size: 3,646 Bytes
139e538
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
import gradio as gr
from transformers import AutoModelForSequenceClassification, AutoTokenizer
import torch

# Load the saved models and tokenizers
model_roberta = AutoModelForSequenceClassification.from_pretrained("./models/roberta-base-offensive")
tokenizer_roberta = AutoTokenizer.from_pretrained("./models/roberta-base-offensive")

model_distilbert = AutoModelForSequenceClassification.from_pretrained("./models/distilbert-base-uncased-offensive")
tokenizer_distilbert = AutoTokenizer.from_pretrained("./models/distilbert-base-uncased-offensive")

model_deberta = AutoModelForSequenceClassification.from_pretrained("./models/deberta-offensive")
tokenizer_deberta = AutoTokenizer.from_pretrained("./models/deberta-offensive")

model_bert = AutoModelForSequenceClassification.from_pretrained("./models/bert-offensive")
tokenizer_bert = AutoTokenizer.from_pretrained("./models/bert-offensive")

# Arabic saved Models and tokenizers
model_arbert = AutoModelForSequenceClassification.from_pretrained("./models/UBC-NLP/ARBERT")
tokenizer_arbert = AutoTokenizer.from_pretrained("./models/UBC-NLP/ARBERT")

model_marbert = AutoModelForSequenceClassification.from_pretrained("./models/UBC-NLP/MARBERT")
tokenizer_marbert = AutoTokenizer.from_pretrained("./models/UBC-NLP/MARBERT")

def predict(tweet, model_choice):
    if model_choice == "RoBERTa":
        model = model_roberta
        tokenizer = tokenizer_roberta
    elif model_choice == "DistilBERT":
        model = model_distilbert
        tokenizer = tokenizer_distilbert
    elif model_choice == "ARBERT":
        model = model_arbert
        tokenizer = tokenizer_arbert
    elif model_choice == "MARBERT":
        model = model_marbert
        tokenizer = tokenizer_marbert
    elif model_choice == "DeBERTa":
        model = model_deberta
        tokenizer = tokenizer_deberta
    elif model_choice == "BERT":
        model = model_bert
        tokenizer = tokenizer_bert
    else:
        return "Model not selected", "Please select a model."
    
    encoded_input = tokenizer.encode(tweet, return_tensors='pt', truncation=True, max_length=512, padding=True)
    with torch.no_grad():
        output = model(encoded_input)
        logits = output.logits
        probabilities = torch.softmax(logits, dim=-1)
        prediction_index = probabilities.argmax().item()
    
    prediction_map = {0: "Not Offensive", 1: "Offensive"}
    prediction = prediction_map[prediction_index]
    confidence = probabilities[0, prediction_index].item()
    
    return prediction, f"Confidence: {confidence:.4f}"

def app_interface():
    with gr.Blocks() as app:
        gr.Markdown("## Offensive Language Detection")
        gr.Markdown("### Instructions:")
        gr.Markdown("1. Select the language of the text.\n2. Choose a model corresponding to the selected language:\n   - For **English**: BERT, DeBERTa, RoBERTa, or DistilBERT\n   - For **Tunisian Arabic**: ARBERT or MARBERT")
        with gr.Row():
            language = gr.Radio(["English", "Tunisian Arabic"], label="Choose Language")
        with gr.Row():
            model_choice = gr.Dropdown(["RoBERTa", "DistilBERT", "ARBERT", "MARBERT", "DeBERTa", "BERT"], label="Choose Model")
        with gr.Row():
            tweet = gr.Textbox(lines=4, placeholder="Enter your text here...", label="Text")
            submit_btn = gr.Button("Predict")
        with gr.Row():
            prediction = gr.Textbox(label="Prediction")
            confidence = gr.Textbox(label="Confidence")

        submit_btn.click(fn=predict, inputs=[tweet, model_choice], outputs=[prediction, confidence])

    return app

app = app_interface()
app.launch()