ttt-tkmr commited on
Commit
981f00a
1 Parent(s): 7254741

Upload 2 files

Browse files
NN_emobase_allfeature_model_score_69.00_20240304_1432.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:769acacd25d1bcc6206a7e22473da4f805e766adaad016e11217cccb5414b18d
3
+ size 1833169
app.py CHANGED
@@ -6,16 +6,24 @@ import os
6
  import pandas as pd
7
  from sklearn.preprocessing import StandardScaler
8
 
9
-
10
  from base64 import b64decode
11
 
 
 
 
 
12
  import gradio as gr
13
 
14
 
15
- model_path = "RF_emobase_20_model_top1_score0.6863_20231207_1537.joblib"
16
- model = joblib.load(model_path)
 
 
 
 
 
17
 
18
- def extract_features(audio_path):
19
  smile = opensmile.Smile(
20
  #feature_set=opensmile.FeatureSet.GeMAPSv01b,
21
  feature_set=opensmile.FeatureSet.emobase,
@@ -32,17 +40,65 @@ def extract_features(audio_path):
32
 
33
  return feature
34
 
35
- def main(input):
36
  # openSMILEで特徴量抽出
37
- feature_vector = extract_features([input])
38
 
39
  # ロードしたモデルで推論
40
- prediction = model.predict(feature_vector)
41
  #print(f"Prediction: {prediction}")
42
  return prediction
43
 
44
- gr.Interface(
45
- title = 'Question Classifier Model',
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
  fn = main,
47
  inputs=[
48
  gr.Audio(sources=["microphone","upload"], type="filepath")
@@ -50,5 +106,8 @@ gr.Interface(
50
  outputs=[
51
  "textbox"
52
  ],
53
- live=True
54
- ).launch(debug=True)
 
 
 
 
6
  import pandas as pd
7
  from sklearn.preprocessing import StandardScaler
8
 
 
9
  from base64 import b64decode
10
 
11
+ import onnx
12
+ import onnxruntime
13
+ import torch
14
+
15
  import gradio as gr
16
 
17
 
18
+ model_names = ["DNN", "RandomForest"]
19
+
20
+ rf_model_path = "RF_emobase_20_model_top1_score0.6863_20231207_1537.joblib"
21
+ dnn_model_path = "NN_emobase_allfeature_model_score_69.00_20240304_1432.onnx"
22
+
23
+ dnn_model = onnxruntime.InferenceSession(dnn_model_path)
24
+ rf_model = joblib.load(rf_model_path)
25
 
26
+ def extract_features_rf(audio_path):
27
  smile = opensmile.Smile(
28
  #feature_set=opensmile.FeatureSet.GeMAPSv01b,
29
  feature_set=opensmile.FeatureSet.emobase,
 
40
 
41
  return feature
42
 
43
+ def predict_rf(input):
44
  # openSMILEで特徴量抽出
45
+ feature_vector = extract_features_rf([input])
46
 
47
  # ロードしたモデルで推論
48
+ prediction = rf_model.predict(feature_vector)
49
  #print(f"Prediction: {prediction}")
50
  return prediction
51
 
52
+
53
+
54
+
55
+
56
+
57
+ def extract_features_dnn(audio_path):
58
+ smile = opensmile.Smile(
59
+ #feature_set=opensmile.FeatureSet.GeMAPSv01b,
60
+ feature_set=opensmile.FeatureSet.emobase,
61
+ feature_level=opensmile.FeatureLevel.Functionals,
62
+ )
63
+ feature_df = smile.process_files(audio_path)
64
+ #output_features = ['F0env_sma_de_amean', 'lspFreq_sma_de[5]_linregc1', 'mfcc_sma[3]_linregc1', 'lspFreq_sma[6]_quartile1', 'lspFreq_sma_de[6]_linregerrQ', 'lspFreq_sma_de[6]_maxPos', 'lspFreq_sma_de[6]_iqr2-3', 'lspFreq_sma_de[7]_minPos', 'lspFreq_sma_de[4]_linregc1', 'lspFreq_sma_de[6]_linregerrA', 'lspFreq_sma_de[6]_linregc2', 'lspFreq_sma[5]_amean', 'lspFreq_sma_de[6]_iqr1-2', 'mfcc_sma[1]_minPos', 'mfcc_sma[4]_linregc1', 'mfcc_sma[9]_iqr2-3', 'lspFreq_sma[5]_kurtosis', 'lspFreq_sma_de[3]_skewness', 'mfcc_sma[3]_minPos', 'mfcc_sma[12]_linregc1']
65
+ df = pd.DataFrame(feature_df.values[0], index=feature_df.columns)
66
+ #df = df[df.index.isin(output_features)]
67
+ df = df.T
68
+ scaler = StandardScaler()
69
+ feature = scaler.fit_transform(df)
70
+ print(df.shape)
71
+
72
+ return feature
73
+
74
+ def softmax_calc_(pred):
75
+ if torch.argmax(pred) == torch.tensor(0) :
76
+ prediction = "question"
77
+
78
+ else:
79
+ prediction = "declarative"
80
+ return prediction
81
+
82
+ def predict_dnn(input):
83
+ # openSMILEで特徴量抽出
84
+ feature_vector = extract_features_dnn([input])
85
+
86
+ # ロードしたモデルで推論
87
+ onnx_outs = dnn_model.run(None, {"model_input":feature_vector})
88
+ print(onnx_outs)
89
+ prediction = softmax_calc_(torch.FloatTensor(onnx_outs))
90
+ print(f"Prediction: {prediction}")
91
+ return prediction
92
+
93
+ def main(model):
94
+ if model == "DNN":
95
+ return predict_dnn(input)
96
+ elif model == "RandomForest":
97
+ return predict_rf(input)
98
+
99
+
100
+ with gr.Blocks() as demo:
101
+ model = gr.Dropdown(choices=model_names),
102
  fn = main,
103
  inputs=[
104
  gr.Audio(sources=["microphone","upload"], type="filepath")
 
106
  outputs=[
107
  "textbox"
108
  ],
109
+ live=True,
110
+ description="demo for Audio to question classifier"
111
+
112
+
113
+ demo.launch()