!pip install opensmile !pip install scikit-learn==1.3.2 !pip install gradio import opensmile import joblib import wave import datetime import os import pandas as pd from sklearn.preprocessing import StandardScaler from IPython.display import Javascript, Audio from base64 import b64decode import gradio as gr model_path = "RF_emobase_20_model_top1_score0.6863_20231207_1537.joblib" model = joblib.load(model_path) def extract_features(audio_path): smile = opensmile.Smile( #feature_set=opensmile.FeatureSet.GeMAPSv01b, feature_set=opensmile.FeatureSet.emobase, feature_level=opensmile.FeatureLevel.Functionals, ) feature_df = smile.process_files(audio_path) output_features = ['F0env_sma_de_amean', 'lspFreq_sma_de[5]_linregc1', 'mfcc_sma[3]_linregc1', 'lspFreq_sma[6]_quartile1', 'lspFreq_sma_de[6]_linregerrQ', 'lspFreq_sma_de[6]_maxPos', 'lspFreq_sma_de[6]_iqr2-3', 'lspFreq_sma_de[7]_minPos', 'lspFreq_sma_de[4]_linregc1', 'lspFreq_sma_de[6]_linregerrA', 'lspFreq_sma_de[6]_linregc2', 'lspFreq_sma[5]_amean', 'lspFreq_sma_de[6]_iqr1-2', 'mfcc_sma[1]_minPos', 'mfcc_sma[4]_linregc1', 'mfcc_sma[9]_iqr2-3', 'lspFreq_sma[5]_kurtosis', 'lspFreq_sma_de[3]_skewness', 'mfcc_sma[3]_minPos', 'mfcc_sma[12]_linregc1'] df = pd.DataFrame(feature_df.values[0], index=feature_df.columns) df = df[df.index.isin(output_features)] df = df.T scaler = StandardScaler() feature = scaler.fit_transform(df) print(df.shape) return feature def main(input): # openSMILEで特徴量抽出 feature_vector = extract_features([input]) # ロードしたモデルで推論 prediction = model.predict(feature_vector) #print(f"Prediction: {prediction}") return prediction gr.Interface( title = 'Question Classifier Model', fn = main, inputs=[ gr.Audio(sources=["microphone","upload"], type="filepath") ], outputs=[ "textbox" ], live=True ).launch(debug=True)