Spaces:
Build error
Build error
File size: 1,974 Bytes
5557412 feec576 89bf06f 6b4e503 5557412 89bf06f 1ebf1c8 1d19528 89bf06f 1ebf1c8 8ffa72a 89bf06f c29a642 1ebf1c8 89bf06f 5557412 9585e6d 1ebf1c8 89bf06f 5557412 9585e6d 5557412 1b27893 5557412 89bf06f 5557412 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 |
from transformers import pipeline
import gradio as gr
from pyctcdecode import BeamSearchDecoderCTC
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchaudio
from transformers import AutoConfig, AutoModel, Wav2Vec2FeatureExtractor
import librosa
import numpy as np
import subprocess
def resample(speech_array, sampling_rate):
resampler = torchaudio.transforms.Resample(sampling_rate)
speech = resampler(speech_array).squeeze()
return speech
def predict(speech_array, sampling_rate):
speech = resample(speech_array, sampling_rate)
inputs = feature_extractor(speech, sampling_rate=SR, return_tensors="pt", padding=True)
inputs = {key: inputs[key].to(device) for key in inputs}
with torch.no_grad():
logits = model_(**inputs).logits
scores = F.softmax(logits, dim=1).detach().cpu().numpy()[0]
outputs = [{"Emotion": config.id2label[i], "Score": f"{round(score * 100, 3):.1f}%"} for i, score in enumerate(scores)]
return outputs
TRUST = True
SR = 16000
config = AutoConfig.from_pretrained('Aniemore/wav2vec2-xlsr-53-russian-emotion-recognition', trust_remote_code=TRUST)
model = AutoModel.from_pretrained("Aniemore/wav2vec2-xlsr-53-russian-emotion-recognition", trust_remote_code=TRUST)
feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained("Aniemore/wav2vec2-xlsr-53-russian-emotion-recognition")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
def transcribe(audio):
sr, audio = audio[0], audio[1]
return predict(audio, sr)
def get_asr_interface():
return gr.Interface(
fn=transcribe,
inputs=[
gr.inputs.Audio(source="upload", type="numpy")
],
outputs=[
"textbox"
])
interfaces = [
get_asr_interface()
]
names = [
"Russian Emotion Recognition"
]
gr.TabbedInterface(interfaces, names).launch(server_name = "0.0.0.0", enable_queue=False) |