Spaces:

valurank
/

spam_comment_detection

Runtime error

App Files Files Community

abdulmatinomotoso commited on Mar 8

Commit

2ad849f

•

1 Parent(s): dfae088

Create app.py

Browse files

Files changed (1) hide show

app.py +59 -0

app.py ADDED Viewed

	@@ -0,0 +1,59 @@

+import gradio as gr
+import numpy as np
+import pandas as pd
+import re
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
+import torch
+#Defining the models and tokenuzer
+model_name = "valurank/distilroberta-spam-comments-detection"
+model = AutoModelForSequenceClassification.from_pretrained(model_name)
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+def clean_text(raw_text):
+  text = raw_text.encode("ascii", errors="ignore").decode(
+          "ascii"
+    )  # remove non-ascii, Chinese characters
+  text = re.sub(r"\n", " ", text)
+  text = re.sub(r"\n\n", " ", text)
+  text = re.sub(r"\t", " ", text)
+  text = text.strip(" ")
+  text = re.sub(
+        " +", " ", text
+    ).strip()  # get rid of multiple spaces and replace with a single
+  text = re.sub(r"Date\s\d{1,2}\/\d{1,2}\/\d{4}", "", text) #remove date
+  text = re.sub(r"\d{1,2}:\d{2}\s[A-Z]+\s[A-Z]+", "", text) #remove time
+  return text
+#Defining a function to get the category of the news article
+def get_category(text):
+  text = clean_text(text)
+  input_tensor = tokenizer.encode(text, return_tensors="pt", truncation=True)
+  input_tensor = input_tensor.to(device)
+  logits = model(input_tensor).logits
+  softmax = torch.nn.Softmax(dim=1)
+  probs = softmax(logits)[0]
+  p = probs.cpu().detach().numpy()
+  pred = {l: p[int(i)] for i, l in model.config.id2label.items()}
+  category = max(pred, key=lambda k: pred[k])
+  return category
+#Creating the interface for the radio app
+demo = gr.Interface(get_category, inputs=gr.Textbox(label="Drop your comment here"),
+                    outputs = "text",
+                    title="Spam comments detection")
+#Launching the gradio app
+if __name__ == "__main__":
+  demo.launch(debug=True)