Cpuritan commited on
Commit
b7bec50
1 Parent(s): e77e3fa

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +147 -0
app.py ADDED
@@ -0,0 +1,147 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import io
3
+ import PIL
4
+ import torch
5
+ import librosa
6
+ import gradio as gr
7
+ import numpy as np
8
+ import pandas as pd
9
+ import matplotlib.pyplot as plt
10
+ from transformers import BertConfig, BertTokenizer, XLMRobertaForSequenceClassification
11
+ from keras.models import load_model
12
+
13
+
14
+ def text_clf(text):
15
+ # os.environ['CUDA_VISIBLE_DEVICES'] = '1'
16
+ # device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
17
+ vocab_file = "vocab.txt" # 词汇表
18
+ tokenizer = BertTokenizer(vocab_file)
19
+ # 加载模型
20
+ config = BertConfig.from_pretrained("nanaaaa/emotion_chinese_english")
21
+ model = XLMRobertaForSequenceClassification.from_pretrained("nanaaaa/emotion_chinese_english", config=config)
22
+ # model.to(device)
23
+ inputs = tokenizer(text, return_tensors="pt")
24
+ # inputs.to(device)
25
+ # 模型推断
26
+ outputs = model(**inputs)
27
+ probs = torch.nn.functional.softmax(outputs.logits, dim=1)
28
+ # 创建标签和概率列表
29
+ labels = ["害怕", "高兴喵", "惊喜", "伤心", "生气"]
30
+ probabilities = probs.detach().cpu().numpy()[0].tolist()
31
+ # 返回标签和概率列表
32
+ return {labels[i]: float(probabilities[i]) for i in range(len(labels))}
33
+
34
+
35
+ def audio_clf(aud):
36
+ my_model = load_model('D://speech_mfcc_model.h5')
37
+
38
+ def normalizeVoiceLen(y, normalizedLen):
39
+ nframes = len(y)
40
+ y = np.reshape(y, [nframes, 1]).T
41
+ # 归一化音频长度为2s,32000数据点
42
+ if (nframes < normalizedLen):
43
+ res = normalizedLen - nframes
44
+ res_data = np.zeros([1, res], dtype=np.float32)
45
+ y = np.reshape(y, [nframes, 1]).T
46
+ y = np.c_[y, res_data]
47
+ else:
48
+ y = y[:, 0:normalizedLen]
49
+ return y[0]
50
+
51
+ def getNearestLen(framelength, sr):
52
+ framesize = framelength * sr
53
+ # 找到与当前framesize最接近的2的正整数次方
54
+ nfftdict = {}
55
+ lists = [32, 64, 128, 256, 512, 1024]
56
+ for i in lists:
57
+ nfftdict[i] = abs(framesize - i)
58
+ print(nfftdict)
59
+ sortlist = sorted(nfftdict.items(), key=lambda x: x[1])
60
+ print(sortlist)
61
+ framesize = int(sortlist[0][0]) # 取最接近当前framesize的那个2的正整数次方值为新的framesize
62
+ return framesize
63
+
64
+ VOICE_LEN = 35000
65
+ sr, y = aud
66
+ N_FFT = getNearestLen(0.5, sr)
67
+ y = normalizeVoiceLen(y, VOICE_LEN) # 归一化长度
68
+ mfcc_data = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13, n_fft=N_FFT, hop_length=int(N_FFT / 4))
69
+ feature = np.mean(mfcc_data, axis=0)
70
+ # 数据标准化
71
+ data = feature.tolist()
72
+ DATA_MEAN = np.mean(feature.tolist(), axis=0)
73
+ DATA_STD = np.std(feature.tolist(), axis=0)
74
+ data -= DATA_MEAN
75
+ data /= DATA_STD
76
+ data = np.array(data)
77
+ data = data.reshape((1, data.shape[0], 1))
78
+ pred = my_model.predict(data)
79
+ labels1 = ["angry", "fear", "joy", "neutral", "sadness", "surprise"]
80
+ probabilities1 = pred[0].tolist()
81
+ return {labels1[i]: float(probabilities1[i]) for i in range(len(labels1))}
82
+
83
+
84
+ def cir_clf(L, R):
85
+ df_4 = pd.read_csv(r'../df_4.csv', encoding="gbk")
86
+ fig, ax = plt.subplots()
87
+ r = df_4["R_nor"][int(L):int(R)]
88
+ theta = (2 * np.pi * df_4["Theta_nor"])[int(L):int(R)]
89
+
90
+ def clf_col(x):
91
+ if -1.5 * np.pi > x > -2 * np.pi:
92
+ return 5
93
+ if -1.5 * np.pi < x < -1.1 * np.pi:
94
+ return 2
95
+ if -1.1 * np.pi < x < -1 * np.pi:
96
+ return 3
97
+ if 1.04 * np.pi > x > 1 * np.pi:
98
+ return 3
99
+ if 1.1 * np.pi < x < 1.375 * np.pi:
100
+ return 4
101
+ if 1.625 * np.pi > x > 1.375 * np.pi:
102
+ return 1
103
+ if 1.625 * np.pi < x < 2 * np.pi:
104
+ return 0
105
+
106
+ theta1 = theta.copy()
107
+ colors = theta1.apply(lambda x: clf_col(x))
108
+ ax = plt.subplot(111, projection="polar")
109
+ c = ax.scatter(theta, r, c=colors, cmap="hsv", alpha=0.6)
110
+ fig.set_size_inches(10, 10)
111
+
112
+ def fig2data(fig):
113
+ import PIL.Image as Image
114
+ fig.canvas.draw()
115
+ w, h = fig.canvas.get_width_height()
116
+ buf = np.fromstring(fig.canvas.tostring_argb(), dtype=np.uint8)
117
+ buf.shape = (w, h, 4)
118
+ buf = np.roll(buf, 3, axis=2)
119
+ image = Image.frombytes("RGBA", (w, h), buf.tostring())
120
+ image = np.asarray(image)
121
+ return image
122
+
123
+ return fig2data(fig)
124
+
125
+
126
+ with gr.Blocks() as demo:
127
+ with gr.Tab("Flip Text"):
128
+ text = gr.Textbox(label="文本哟")
129
+ text_output = gr.outputs.Label(label="情感呢")
130
+ text_button = gr.Button("确认")
131
+
132
+ with gr.Tab("Flip Audio"):
133
+ audio = gr.Audio(label="音频捏")
134
+ audio_output = gr.outputs.Label(label="情感哟")
135
+ audio_button = gr.Button("确认")
136
+
137
+ with gr.Tab("Flip Circle"):
138
+ cir_l = gr.Slider(0, 30000, step=1)
139
+ cir_r = gr.Slider(0, 30000, step=1)
140
+ cir_output = gr.outputs.Image(type='numpy', label="情感圈")
141
+ cir_button = gr.Button("确认")
142
+
143
+ text_button.click(fn=text_clf, inputs=text, outputs=text_output)
144
+ audio_button.click(fn=audio_clf, inputs=audio, outputs=audio_output)
145
+ cir_button.click(fn=cir_clf, inputs=[cir_l, cir_r], outputs=cir_output)
146
+
147
+ demo.launch(share=True)