Alesmikes's picture
Duplicate from ja-818/speech_and_text_emotion_recognition
c059522
# Import the necessary libraries
from transformers import pipeline
# Initialize the text classification model with a pre-trained model
model_text_emotion = pipeline("text-classification", model="j-hartmann/emotion-english-distilroberta-base")
# Initialize the audio classification model with a pre-trained SER model
model_speech_emotion = pipeline("audio-classification", model="aherzberg/ser_model_fixed_label")
# Initialize the automatic speech recognition model with a pre-trained model that is capable of converting speech to text
model_voice2text = pipeline("automatic-speech-recognition", model="openai/whisper-tiny.en")
# A function that uses the initialized text classification model to predict the emotion of a given text input
def infere_text_emotion(text):
return model_text_emotion(text)[0]["label"].capitalize()
# A function that uses the initialized audio classification model to predict the emotion of a given speech input
def infere_speech_emotion(text):
# Dict that maps the speech model emotions with the text's ones
emotions_dict = {"angry": "Anger", "disgust": "Disgust", "fear": "Fear", "happy": "Joy", "neutral": "Neutral", "sad": "Sadness"}
inference = model_speech_emotion(text)[0]["label"]
return emotions_dict[inference]
# A function that uses the initialized automatic speech recognition model to convert speech (as an audio file) to text
def infere_voice2text(audio_file):
return model_voice2text(audio_file)["text"]