amarchheda commited on
Commit
c1b97fa
1 Parent(s): 175c5b3

updated files

Browse files
Files changed (1) hide show
  1. app.py +5 -112
app.py CHANGED
@@ -1,54 +1,13 @@
1
-
2
- import numpy as np
3
- import tensorflow as tf
4
- from scipy.io.wavfile import write
5
- import keras.backend as K
6
- import librosa.display
7
- import cv2
8
- import librosa
9
- import matplotlib.pyplot as plt
10
- import librosa.display
11
- import numpy as np
12
- from keras.applications import VGG16
13
  import os
14
- import scipy
15
  import gradio as gr
16
  import shutil
 
 
17
 
18
- # Load the tune recognition model
19
- model = tf.keras.models.load_model('embdmodel_1.hdf5')
20
- embedding_model=model.layers[2]
21
 
22
  DURATION = 10
23
  WAVE_OUTPUT_FILE = "my_audio.wav"
24
 
25
-
26
- # Define function to preprocess input audio
27
- #convert song to mel spectogram as siamese network doesn't work on sound directly
28
- def create_spectrogram(clip,sample_rate,save_path):
29
- plt.interactive(False)
30
- fig=plt.figure(figsize=[0.72,0.72])
31
- S=librosa.feature.melspectrogram(y=clip,sr=sample_rate)
32
- librosa.display.specshow(librosa.power_to_db(S,ref=np.max))
33
- fig.savefig(save_path,dpi=400,bbox_inches='tight',pad_inches=0)
34
- plt.close()
35
- fig.clf()
36
- plt.close(fig)
37
- plt.close('all')
38
- del save_path,clip,sample_rate,fig,S
39
-
40
- def load_img(path):
41
- img=cv2.imread(path)
42
- img=cv2.cvtColor(img,cv2.COLOR_BGR2RGB)
43
- img=cv2.resize(img,(150,150))
44
- return img
45
-
46
-
47
- import pickle
48
- with open('dict.pickle', 'rb') as handle:
49
- songspecdict = pickle.load(handle)
50
-
51
-
52
  def list_file_sizes():
53
  path = "."
54
 
@@ -76,75 +35,9 @@ def main(audio):
76
 
77
  list_file_sizes()
78
 
79
- # Load the song to match
80
- song, sr = librosa.load("my_audio.wav")
81
- to_match = np.copy(song[0:220500])
82
- print("Loaded data into librosa...")
83
-
84
- # Create spectrogram image of the song to match
85
- create_spectrogram(to_match, sr, 'test.png')
86
- print("Created spectogram...")
87
-
88
- # Load the spectrogram image of the song to match
89
- to_match_img = load_img('test.png')
90
- to_match_img = np.expand_dims(to_match_img, axis=0)
91
- print("Loaded spectrum image...")
92
-
93
- # Get the embedding of the song to match
94
- to_match_emb = embedding_model.predict(to_match_img)
95
- print("Get song embedding...")
96
-
97
- # Calculate the distances between the song to match and the songs in the database
98
- songsdistdict = {}
99
- for key, values in songspecdict.items():
100
- dist_array = []
101
- for embd in values:
102
- dist_array.append(np.linalg.norm(to_match_emb - embd))
103
-
104
- songsdistdict[key] = min(dist_array)
105
- song_titles=list(songsdistdict.keys())
106
- distances=list(songsdistdict.values())
107
-
108
- # Get the title and artist of the recognized song
109
- recognized_song_artist, recognized_song_title = song_titles[distances.index(min(distances))].split('-')
110
- recognized_song_title = os.path.splitext(recognized_song_title)[0]
111
- print(f'Artist: {recognized_song_artist}')
112
- print(f'Title: {recognized_song_title}')
113
-
114
- from musixmatch import Musixmatch
115
-
116
- # Initialize Musixmatch API
117
- musixmatch = Musixmatch(apikey='2b0d0615efa782e95598a0e99bda4a60')
118
-
119
- # Search for the recognized song
120
- track_search_results = musixmatch.track_search(q_track=recognized_song_title, q_artist=recognized_song_artist, page_size=1, page=1, s_track_rating='desc')
121
-
122
- if track_search_results['message']['header']['status_code'] == 200:
123
- # Get the track ID for the top result
124
- track_id = track_search_results['message']['body']['track_list'][0]['track']['track_id']
125
-
126
- # Get the lyrics for the recognized song
127
- lyrics_result = musixmatch.track_lyrics_get(track_id=track_id)
128
-
129
- if lyrics_result['message']['header']['status_code'] == 200:
130
- # Get the lyrics
131
- lyrics = lyrics_result['message']['body']['lyrics']['lyrics_body']
132
- # Remove the annotation tags from the lyrics
133
- lyrics = lyrics.replace('******* This Lyrics is NOT for Commercial use *******', '').strip()
134
- print("Lyrics:\n", lyrics)
135
- else:
136
- print("Couldn't find lyrics for the recognized song.")
137
-
138
-
139
-
140
- # Play the recognized song
141
- recognized_song_file = f'https://huggingface.co/spaces/prerna9811/Chord/tree/main/seismese_net_songs/{song_titles[distances.index(min(distances))]}'
142
- recognized_song_audio, recognized_song_sr = librosa.load(recognized_song_file)
143
-
144
- audio_file = open(recognized_song_file, 'rb') # enter the filename with filepath
145
- audio_bytes = audio_file.read() # reading the file
146
 
147
- return audio_bytes
148
 
149
 
150
  import asyncio
@@ -155,7 +48,7 @@ demo = gr.Blocks()
155
  mf_transcribe = gr.Interface(
156
  fn=main,
157
  inputs=gr.inputs.Audio(source="microphone", type="filepath"),
158
- outputs="audio",
159
  layout="horizontal",
160
  theme="huggingface",
161
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import os
 
2
  import gradio as gr
3
  import shutil
4
+ from main_code import main_loop
5
+
6
 
 
 
 
7
 
8
  DURATION = 10
9
  WAVE_OUTPUT_FILE = "my_audio.wav"
10
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  def list_file_sizes():
12
  path = "."
13
 
 
35
 
36
  list_file_sizes()
37
 
38
+ song = main_loop()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
 
40
+ return audio
41
 
42
 
43
  import asyncio
 
48
  mf_transcribe = gr.Interface(
49
  fn=main,
50
  inputs=gr.inputs.Audio(source="microphone", type="filepath"),
51
+ outputs="text",
52
  layout="horizontal",
53
  theme="huggingface",
54
  )