Spaces:

ttt-tkmr
/

Question_Classify_Project

Sleeping

App Files Files Community

Question_Classify_Project / app.py

ttt-tkmr

Update app.py

af16a0b verified 9 months ago

raw

history blame

3.93 kB

	import opensmile
	import joblib
	import wave
	import datetime
	import os
	import pandas as pd
	from sklearn.preprocessing import StandardScaler

	from base64 import b64decode

	import onnx
	import onnxruntime
	import torch

	import gradio as gr


	model_names = ["DNN", "RandomForest"]

	rf_model_path = "RF_emobase_20_model_top1_score0.6863_20231207_1537.joblib"
	dnn_model_path = "NN_emobase_allfeature_model_score_69.00_20240304_1432.onnx"

	dnn_model = onnxruntime.InferenceSession(dnn_model_path)
	rf_model = joblib.load(rf_model_path)

	def extract_features_rf(audio_path):
	smile = opensmile.Smile(
	#feature_set=opensmile.FeatureSet.GeMAPSv01b,
	feature_set=opensmile.FeatureSet.emobase,
	feature_level=opensmile.FeatureLevel.Functionals,
	)
	feature_df = smile.process_files(audio_path)
	output_features = ['F0env_sma_de_amean', 'lspFreq_sma_de[5]_linregc1', 'mfcc_sma[3]_linregc1', 'lspFreq_sma[6]_quartile1', 'lspFreq_sma_de[6]_linregerrQ', 'lspFreq_sma_de[6]_maxPos', 'lspFreq_sma_de[6]_iqr2-3', 'lspFreq_sma_de[7]_minPos', 'lspFreq_sma_de[4]_linregc1', 'lspFreq_sma_de[6]_linregerrA', 'lspFreq_sma_de[6]_linregc2', 'lspFreq_sma[5]_amean', 'lspFreq_sma_de[6]_iqr1-2', 'mfcc_sma[1]_minPos', 'mfcc_sma[4]_linregc1', 'mfcc_sma[9]_iqr2-3', 'lspFreq_sma[5]_kurtosis', 'lspFreq_sma_de[3]_skewness', 'mfcc_sma[3]_minPos', 'mfcc_sma[12]_linregc1']
	df = pd.DataFrame(feature_df.values[0], index=feature_df.columns)
	df = df[df.index.isin(output_features)]
	df = df.T
	scaler = StandardScaler()
	feature = scaler.fit_transform(df)
	print(df.shape)

	return feature

	def predict_rf(input):
	# openSMILEで特徴量抽出
	feature_vector = extract_features_rf([input])

	# ロードしたモデルで推論
	prediction = rf_model.predict(feature_vector)
	#print(f"Prediction: {prediction}")
	return prediction






	def extract_features_dnn(audio_path):
	smile = opensmile.Smile(
	#feature_set=opensmile.FeatureSet.GeMAPSv01b,
	feature_set=opensmile.FeatureSet.emobase,
	feature_level=opensmile.FeatureLevel.Functionals,
	)
	feature_df = smile.process_files(audio_path)
	#output_features = ['F0env_sma_de_amean', 'lspFreq_sma_de[5]_linregc1', 'mfcc_sma[3]_linregc1', 'lspFreq_sma[6]_quartile1', 'lspFreq_sma_de[6]_linregerrQ', 'lspFreq_sma_de[6]_maxPos', 'lspFreq_sma_de[6]_iqr2-3', 'lspFreq_sma_de[7]_minPos', 'lspFreq_sma_de[4]_linregc1', 'lspFreq_sma_de[6]_linregerrA', 'lspFreq_sma_de[6]_linregc2', 'lspFreq_sma[5]_amean', 'lspFreq_sma_de[6]_iqr1-2', 'mfcc_sma[1]_minPos', 'mfcc_sma[4]_linregc1', 'mfcc_sma[9]_iqr2-3', 'lspFreq_sma[5]_kurtosis', 'lspFreq_sma_de[3]_skewness', 'mfcc_sma[3]_minPos', 'mfcc_sma[12]_linregc1']
	df = pd.DataFrame(feature_df.values[0], index=feature_df.columns)
	#df = df[df.index.isin(output_features)]
	df = df.T
	scaler = StandardScaler()
	feature = scaler.fit_transform(df)
	print(df.shape)

	return feature

	def softmax_calc_(pred):
	if torch.argmax(pred) == torch.tensor(0) :
	prediction = "question"

	else:
	prediction = "declarative"
	return prediction

	def predict_dnn(input):
	# openSMILEで特徴量抽出
	feature_vector = extract_features_dnn([input])

	# ロードしたモデルで推論
	onnx_outs = dnn_model.run(None, {"model_input":feature_vector})
	print(onnx_outs)
	prediction = softmax_calc_(torch.FloatTensor(onnx_outs))
	print(f"Prediction: {prediction}")
	return prediction

	def main(model, audio):
	if model == "DNN":
	predict = predict_dnn(audio)

	elif model == "RandomForest":
	predict = predict_rf(audio)
	return predict

	iface = gr.Interface(
	fn = main,
	inputs=[
	gr.Dropdown(choices=model_names),
	gr.Audio(sources=["microphone","upload"], type="filepath")
	],
	outputs=[
	"textbox"
	],
	live=True,
	description="demo for Audio to question classifier"
	)

	iface.launch()