Spaces:

amarchheda
/

ChordDuplicate

Runtime error

File size: 6,234 Bytes


import numpy as np
import tensorflow as tf
from scipy.io.wavfile import write
import keras.backend as K
import librosa.display
import cv2
import librosa
import matplotlib.pyplot as plt
import librosa.display
import numpy as np
from keras.applications import VGG16
import os
import scipy
import gradio as gr

# Load the tune recognition model
model = tf.keras.models.load_model('embdmodel_1.hdf5')
embedding_model=model.layers[2]

DURATION = 10
WAVE_OUTPUT_FILE = "my_audio.wav"


# Define function to preprocess input audio
#convert song to mel spectogram as siamese network doesn't work on sound directly
def create_spectrogram(clip,sample_rate,save_path):
    plt.interactive(False)
    fig=plt.figure(figsize=[0.72,0.72])
    S=librosa.feature.melspectrogram(y=clip,sr=sample_rate)
    librosa.display.specshow(librosa.power_to_db(S,ref=np.max))
    fig.savefig(save_path,dpi=400,bbox_inches='tight',pad_inches=0)
    plt.close()
    fig.clf()
    plt.close(fig)
    plt.close('all')
    del save_path,clip,sample_rate,fig,S
    
def load_img(path):
    img=cv2.imread(path)
    img=cv2.cvtColor(img,cv2.COLOR_BGR2RGB)
    img=cv2.resize(img,(150,150))
    return img


import pickle
with open('dict.pickle', 'rb') as handle:
    songspecdict = pickle.load(handle)


def list_file_sizes():
    path = "."
 
    # Get list of all files only in the given directory
    fun = lambda x : os.path.isfile(os.path.join(path,x))
    files_list = filter(fun, os.listdir(path))
    
    # Create a list of files in directory along with the size
    size_of_file = [
        (f,os.stat(os.path.join(path, f)).st_size)
        for f in files_list
    ]
    # Iterate over list of files along with size
    # and print them one by one.
    for f,s in size_of_file:
        print("{} : {}MB".format(f, round(s/(1024*1024),3)))



def main(audio):

    with open(WAVE_OUTPUT_FILE, "wb") as file:
        file.write(audio)

    list_file_sizes()

    # Load the song to match
    song, sr = librosa.load("my_audio.wav")
    to_match = np.copy(song[0:220500])
    print("Loaded data into librosa...")

    # Create spectrogram image of the song to match
    create_spectrogram(to_match, sr, 'test.png')
    print("Created spectogram...")

    # Load the spectrogram image of the song to match
    to_match_img = load_img('test.png')
    to_match_img = np.expand_dims(to_match_img, axis=0)
    print("Loaded spectrum image...")

    # Get the embedding of the song to match
    to_match_emb = embedding_model.predict(to_match_img)
    print("Get song embedding...")

    # Calculate the distances between the song to match and the songs in the database
    songsdistdict = {}
    for key, values in songspecdict.items():
        dist_array = []
        for embd in values:
            dist_array.append(np.linalg.norm(to_match_emb - embd))
            
        songsdistdict[key] = min(dist_array)
    song_titles=list(songsdistdict.keys())
    distances=list(songsdistdict.values())

    # Get the title and artist of the recognized song
    recognized_song_artist, recognized_song_title = song_titles[distances.index(min(distances))].split('-')
    recognized_song_title = os.path.splitext(recognized_song_title)[0]
    print(f'Artist: {recognized_song_artist}')
    print(f'Title: {recognized_song_title}')

    from musixmatch import Musixmatch

    # Initialize Musixmatch API
    musixmatch = Musixmatch(apikey='2b0d0615efa782e95598a0e99bda4a60')

    # Search for the recognized song
    track_search_results = musixmatch.track_search(q_track=recognized_song_title, q_artist=recognized_song_artist, page_size=1, page=1, s_track_rating='desc')

    if track_search_results['message']['header']['status_code'] == 200:
        # Get the track ID for the top result
        track_id = track_search_results['message']['body']['track_list'][0]['track']['track_id']

        # Get the lyrics for the recognized song
        lyrics_result = musixmatch.track_lyrics_get(track_id=track_id)

        if lyrics_result['message']['header']['status_code'] == 200:
            # Get the lyrics
            lyrics = lyrics_result['message']['body']['lyrics']['lyrics_body']
            # Remove the annotation tags from the lyrics
            lyrics = lyrics.replace('******* This Lyrics is NOT for Commercial use *******', '').strip()
            print("Lyrics:\n", lyrics)
    else:
        print("Couldn't find lyrics for the recognized song.")



    # Play the recognized song
    recognized_song_file = f'https://huggingface.co/spaces/prerna9811/Chord/tree/main/seismese_net_songs/{song_titles[distances.index(min(distances))]}'
    recognized_song_audio, recognized_song_sr = librosa.load(recognized_song_file)

    audio_file = open(recognized_song_file, 'rb') # enter the filename with filepath
    audio_bytes = audio_file.read() # reading the file

    return audio_bytes


css = """
footer {display:none !important}
.output-markdown{display:none !important}
button.primary {
    z-index: 14;
    left: 0px;
    top: 0px;
    cursor: pointer !important; 
    background: none rgb(17, 20, 45) !important;
    border: none !important;
    color: rgb(255, 255, 255) !important;
    line-height: 1 !important;
    border-radius: 6px !important;
    transition: box-shadow 200ms ease 0s, background 200ms ease 0s !important;
    box-shadow: none !important;
}
button.primary:hover{
    z-index: 14;
    left: 0px;
    top: 0px;
    cursor: pointer !important;
    background: none rgb(37, 56, 133) !important;
    border: none !important;
    color: rgb(255, 255, 255) !important;
    line-height: 1 !important;
    border-radius: 6px !important;
    transition: box-shadow 200ms ease 0s, background 200ms ease 0s !important;
    box-shadow: rgb(0 0 0 / 23%) 0px 1px 7px 0px !important;
}
button.gallery-item:hover {
    border-color: rgb(37 56 133) !important;
    background-color: rgb(229,225,255) !important;
}
"""


demo = gr.Blocks()
mf_transcribe = gr.Interface(
            fn=main,
            inputs=gr.inputs.Audio(source="microphone", type="filepath"),
            outputs="audio",
            layout="horizontal",
            theme="huggingface",
            allow_flagging="never",
            css = css
        )
mf_transcribe.launch()