Spaces:

ttt-tkmr
/

Question_Classify_Project

Sleeping

App Files Files Community

ttt-tkmr commited on Mar 4

Commit

981f00a

•

1 Parent(s): 7254741

Upload 2 files

Browse files

Files changed (2) hide show

NN_emobase_allfeature_model_score_69.00_20240304_1432.onnx +3 -0
app.py +70 -11

NN_emobase_allfeature_model_score_69.00_20240304_1432.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:769acacd25d1bcc6206a7e22473da4f805e766adaad016e11217cccb5414b18d
+size 1833169

app.py CHANGED Viewed

@@ -6,16 +6,24 @@ import os
 import pandas as pd
 from sklearn.preprocessing import StandardScaler
 from base64 import b64decode
 import gradio as gr
-model_path = "RF_emobase_20_model_top1_score0.6863_20231207_1537.joblib"
-model = joblib.load(model_path)
-def extract_features(audio_path):
     smile = opensmile.Smile(
     #feature_set=opensmile.FeatureSet.GeMAPSv01b,
     feature_set=opensmile.FeatureSet.emobase,
@@ -32,17 +40,65 @@ def extract_features(audio_path):
     return feature
-def main(input):
   # openSMILEで特徴量抽出
-  feature_vector = extract_features([input])
   # ロードしたモデルで推論
-  prediction = model.predict(feature_vector)
   #print(f"Prediction: {prediction}")
   return prediction
-gr.Interface(
-    title = 'Question Classifier Model',
     fn = main,
     inputs=[
         gr.Audio(sources=["microphone","upload"], type="filepath")
@@ -50,5 +106,8 @@ gr.Interface(
     outputs=[
         "textbox"
     ],
-    live=True
-    ).launch(debug=True)

 import pandas as pd
 from sklearn.preprocessing import StandardScaler
 from base64 import b64decode
+import onnx
+import onnxruntime
+import torch
 import gradio as gr
+model_names = ["DNN", "RandomForest"]
+rf_model_path = "RF_emobase_20_model_top1_score0.6863_20231207_1537.joblib"
+dnn_model_path = "NN_emobase_allfeature_model_score_69.00_20240304_1432.onnx"
+dnn_model = onnxruntime.InferenceSession(dnn_model_path)
+rf_model = joblib.load(rf_model_path)
+def extract_features_rf(audio_path):
     smile = opensmile.Smile(
     #feature_set=opensmile.FeatureSet.GeMAPSv01b,
     feature_set=opensmile.FeatureSet.emobase,
     return feature
+def predict_rf(input):
   # openSMILEで特徴量抽出
+  feature_vector = extract_features_rf([input])
   # ロードしたモデルで推論
+  prediction = rf_model.predict(feature_vector)
   #print(f"Prediction: {prediction}")
   return prediction
+def extract_features_dnn(audio_path):
+    smile = opensmile.Smile(
+    #feature_set=opensmile.FeatureSet.GeMAPSv01b,
+    feature_set=opensmile.FeatureSet.emobase,
+    feature_level=opensmile.FeatureLevel.Functionals,
+    )
+    feature_df = smile.process_files(audio_path)
+    #output_features = ['F0env_sma_de_amean', 'lspFreq_sma_de[5]_linregc1', 'mfcc_sma[3]_linregc1', 'lspFreq_sma[6]_quartile1', 'lspFreq_sma_de[6]_linregerrQ', 'lspFreq_sma_de[6]_maxPos', 'lspFreq_sma_de[6]_iqr2-3', 'lspFreq_sma_de[7]_minPos', 'lspFreq_sma_de[4]_linregc1', 'lspFreq_sma_de[6]_linregerrA', 'lspFreq_sma_de[6]_linregc2', 'lspFreq_sma[5]_amean', 'lspFreq_sma_de[6]_iqr1-2', 'mfcc_sma[1]_minPos', 'mfcc_sma[4]_linregc1', 'mfcc_sma[9]_iqr2-3', 'lspFreq_sma[5]_kurtosis', 'lspFreq_sma_de[3]_skewness', 'mfcc_sma[3]_minPos', 'mfcc_sma[12]_linregc1']
+    df = pd.DataFrame(feature_df.values[0], index=feature_df.columns)
+    #df = df[df.index.isin(output_features)]
+    df = df.T
+    scaler = StandardScaler()
+    feature = scaler.fit_transform(df)
+    print(df.shape)
+    return feature
+def softmax_calc_(pred):
+    if torch.argmax(pred) == torch.tensor(0) :
+        prediction = "question"
+    else:
+        prediction = "declarative"
+    return prediction
+def predict_dnn(input):
+    # openSMILEで特徴量抽出
+    feature_vector = extract_features_dnn([input])
+    # ロードしたモデルで推論
+    onnx_outs = dnn_model.run(None, {"model_input":feature_vector})
+    print(onnx_outs)
+    prediction = softmax_calc_(torch.FloatTensor(onnx_outs))
+    print(f"Prediction: {prediction}")
+    return prediction
+def main(model):
+    if model == "DNN":
+        return predict_dnn(input)
+    elif model == "RandomForest":
+        return predict_rf(input)
+with gr.Blocks() as demo:
+    model = gr.Dropdown(choices=model_names),
     fn = main,
     inputs=[
         gr.Audio(sources=["microphone","upload"], type="filepath")
     outputs=[
         "textbox"
     ],
+    live=True,
+    description="demo for Audio to question classifier"
+demo.launch()