amarchheda commited on
Commit
a433359
1 Parent(s): c1b97fa

added file

Browse files
Files changed (1) hide show
  1. main_code.py +80 -0
main_code.py ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import tensorflow as tf
3
+ from scipy.io.wavfile import write
4
+ import keras.backend as K
5
+ import librosa.display
6
+ import cv2
7
+ import librosa
8
+ import matplotlib.pyplot as plt
9
+ import librosa.display
10
+ import numpy as np
11
+ from keras.applications import VGG16
12
+ import os
13
+ import scipy
14
+
15
+ # Define function to preprocess input audio
16
+ #convert song to mel spectogram as siamese network doesn't work on sound directly
17
+ def create_spectrogram(clip,sample_rate,save_path):
18
+ plt.interactive(False)
19
+ fig=plt.figure(figsize=[0.72,0.72])
20
+ S=librosa.feature.melspectrogram(y=clip,sr=sample_rate)
21
+ librosa.display.specshow(librosa.power_to_db(S,ref=np.max))
22
+ fig.savefig(save_path,dpi=400,bbox_inches='tight',pad_inches=0)
23
+ plt.close()
24
+ fig.clf()
25
+ plt.close(fig)
26
+ plt.close('all')
27
+ del save_path,clip,sample_rate,fig,S
28
+
29
+ def load_img(path):
30
+ img=cv2.imread(path)
31
+ img=cv2.cvtColor(img,cv2.COLOR_BGR2RGB)
32
+ img=cv2.resize(img,(150,150))
33
+ return img
34
+
35
+ import pickle
36
+
37
+ def main_loop():
38
+
39
+ with open('dict.pickle', 'rb') as handle:
40
+ songspecdict = pickle.load(handle)
41
+
42
+ # Load the song to match
43
+ song, sr = librosa.load("my_audio.wav")
44
+ to_match = np.copy(song[0:220500])
45
+ print("Loaded data into librosa...")
46
+
47
+ # Create spectrogram image of the song to match
48
+ create_spectrogram(to_match, sr, 'test.png')
49
+ print("Created spectogram...")
50
+
51
+ # Load the spectrogram image of the song to match
52
+ to_match_img = load_img('test.png')
53
+ to_match_img = np.expand_dims(to_match_img, axis=0)
54
+ print("Loaded spectrum image...")
55
+
56
+ # Get the embedding of the song to match
57
+ # Load the tune recognition model
58
+ model = tf.keras.models.load_model('./embdmodel_1.hdf5')
59
+ embedding_model=model.layers[2]
60
+ to_match_emb = embedding_model.predict(to_match_img)
61
+ print("Get song embedding...")
62
+
63
+ # Calculate the distances between the song to match and the songs in the database
64
+ songsdistdict = {}
65
+ for key, values in songspecdict.items():
66
+ dist_array = []
67
+ for embd in values:
68
+ dist_array.append(np.linalg.norm(to_match_emb - embd))
69
+
70
+ songsdistdict[key] = min(dist_array)
71
+ song_titles=list(songsdistdict.keys())
72
+ distances=list(songsdistdict.values())
73
+
74
+ # Get the title and artist of the recognized song
75
+ recognized_song_artist, recognized_song_title = song_titles[distances.index(min(distances))].split('-')
76
+ recognized_song_title = os.path.splitext(recognized_song_title)[0]
77
+ print(f'Artist: {recognized_song_artist}')
78
+ print(f'Title: {recognized_song_title}')
79
+
80
+ return recognized_song_title