Spaces:

TahaRasouli
/

Topic-Classification

Sleeping

App Files Files Community

TahaRasouli commited on Jul 21

Commit

2427679

•

1 Parent(s): 9f1df33

Create app.py

Browse files

Files changed (1) hide show

app.py +82 -0

app.py ADDED Viewed

	@@ -0,0 +1,82 @@

+#importing the necessary libraries
+import gradio as gr
+import numpy as np
+import pandas as pd
+import re
+import torch
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
+from topic_labels import labels
+#Defining the models and tokenuzer
+model_name = "valurank/distilroberta-topic-classification"
+model = AutoModelForSequenceClassification.from_pretrained(model_name)
+#model.to(device)
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+def clean_text(raw_text):
+  text = raw_text.encode("ascii", errors="ignore").decode(
+          "ascii"
+    )  # remove non-ascii, Chinese characters
+  text = re.sub(r"\n", " ", text)
+  text = re.sub(r"\n\n", " ", text)
+  text = re.sub(r"\t", " ", text)
+  text = text.strip(" ")
+  text = re.sub(
+        " +", " ", text
+    ).strip()  # get rid of multiple spaces and replace with a single
+  text = re.sub(r"Date\s\d{1,2}\/\d{1,2}\/\d{4}", "", text) #remove date
+  text = re.sub(r"\d{1,2}:\d{2}\s[A-Z]+\s[A-Z]+", "", text) #remove time
+  return text
+def find_two_highest_indices(arr):
+    if len(arr) < 2:
+        raise ValueError("Array must have at least two elements")
+    # Initialize the indices of the two highest values
+    max_idx = second_max_idx = None
+    for i, value in enumerate(arr):
+        if max_idx is None or value > arr[max_idx]:
+            second_max_idx = max_idx
+            max_idx = i
+        elif second_max_idx is None or value > arr[second_max_idx]:
+            second_max_idx = i
+    return max_idx, second_max_idx
+def predict_topic(text):
+  text = clean_text(text)
+  dict_topic = {}
+  input_tensor = tokenizer.encode(text, return_tensors="pt", truncation=True)
+  logits = model(input_tensor).logits
+  softmax = torch.nn.Softmax(dim=1)
+  probs = softmax(logits)[0]
+  probs = probs.cpu().detach().numpy()
+  max_index = find_two_highest_indices(probs)
+  emotion_1, emotion_2 = labels[max_index[0]], labels[max_index[1]]
+  probs_1, probs_2 = probs[max_index[0]], probs[max_index[1]]
+  dict_topic[emotion_1] = round((probs_1), 2)
+  #if probs_2 > 0.01:
+  dict_topic[emotion_2] = round((probs_2), 2)
+  return dict_topic
+#Creating the interface for the radio appdemo = gr.Interface(multi_label_emotions, inputs=gr.Textbox(),
+demo = gr.Interface(predict_topic, inputs=gr.Textbox(),
+                    outputs = gr.Label(num_top_classes=2),
+                    title="Topic Classification")
+if __name__ == "__main__":
+  demo.launch(debug=True)