Spaces:

MaroueneA
/

OffensiveDetection

Running

MaroueneA

Initial commit of my Gradio NLP app

139e538 7 months ago

3.65 kB

	import gradio as gr
	from transformers import AutoModelForSequenceClassification, AutoTokenizer
	import torch

	# Load the saved models and tokenizers
	model_roberta = AutoModelForSequenceClassification.from_pretrained("./models/roberta-base-offensive")
	tokenizer_roberta = AutoTokenizer.from_pretrained("./models/roberta-base-offensive")

	model_distilbert = AutoModelForSequenceClassification.from_pretrained("./models/distilbert-base-uncased-offensive")
	tokenizer_distilbert = AutoTokenizer.from_pretrained("./models/distilbert-base-uncased-offensive")

	model_deberta = AutoModelForSequenceClassification.from_pretrained("./models/deberta-offensive")
	tokenizer_deberta = AutoTokenizer.from_pretrained("./models/deberta-offensive")

	model_bert = AutoModelForSequenceClassification.from_pretrained("./models/bert-offensive")
	tokenizer_bert = AutoTokenizer.from_pretrained("./models/bert-offensive")

	# Arabic saved Models and tokenizers
	model_arbert = AutoModelForSequenceClassification.from_pretrained("./models/UBC-NLP/ARBERT")
	tokenizer_arbert = AutoTokenizer.from_pretrained("./models/UBC-NLP/ARBERT")

	model_marbert = AutoModelForSequenceClassification.from_pretrained("./models/UBC-NLP/MARBERT")
	tokenizer_marbert = AutoTokenizer.from_pretrained("./models/UBC-NLP/MARBERT")

	def predict(tweet, model_choice):
	if model_choice == "RoBERTa":
	model = model_roberta
	tokenizer = tokenizer_roberta
	elif model_choice == "DistilBERT":
	model = model_distilbert
	tokenizer = tokenizer_distilbert
	elif model_choice == "ARBERT":
	model = model_arbert
	tokenizer = tokenizer_arbert
	elif model_choice == "MARBERT":
	model = model_marbert
	tokenizer = tokenizer_marbert
	elif model_choice == "DeBERTa":
	model = model_deberta
	tokenizer = tokenizer_deberta
	elif model_choice == "BERT":
	model = model_bert
	tokenizer = tokenizer_bert
	else:
	return "Model not selected", "Please select a model."

	encoded_input = tokenizer.encode(tweet, return_tensors='pt', truncation=True, max_length=512, padding=True)
	with torch.no_grad():
	output = model(encoded_input)
	logits = output.logits
	probabilities = torch.softmax(logits, dim=-1)
	prediction_index = probabilities.argmax().item()

	prediction_map = {0: "Not Offensive", 1: "Offensive"}
	prediction = prediction_map[prediction_index]
	confidence = probabilities[0, prediction_index].item()

	return prediction, f"Confidence: {confidence:.4f}"

	def app_interface():
	with gr.Blocks() as app:
	gr.Markdown("## Offensive Language Detection")
	gr.Markdown("### Instructions:")
	gr.Markdown("1. Select the language of the text.\n2. Choose a model corresponding to the selected language:\n - For English: BERT, DeBERTa, RoBERTa, or DistilBERT\n - For Tunisian Arabic: ARBERT or MARBERT")
	with gr.Row():
	language = gr.Radio(["English", "Tunisian Arabic"], label="Choose Language")
	with gr.Row():
	model_choice = gr.Dropdown(["RoBERTa", "DistilBERT", "ARBERT", "MARBERT", "DeBERTa", "BERT"], label="Choose Model")
	with gr.Row():
	tweet = gr.Textbox(lines=4, placeholder="Enter your text here...", label="Text")
	submit_btn = gr.Button("Predict")
	with gr.Row():
	prediction = gr.Textbox(label="Prediction")
	confidence = gr.Textbox(label="Confidence")

	submit_btn.click(fn=predict, inputs=[tweet, model_choice], outputs=[prediction, confidence])

	return app

	app = app_interface()
	app.launch()