File size: 1,437 Bytes
c17b696
 
 
 
d533c9c
c17b696
 
 
 
 
 
02a7301
2b5d8ed
 
d533c9c
600e950
 
2b5d8ed
 
600e950
d533c9c
 
c17b696
 
 
 
 
 
 
 
d533c9c
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
from io import BytesIO
import streamlit as st
import soundfile as sf
from librosa.util import normalize
from librosa.beat import beat_track

from audiodiffusion import AudioDiffusion

if __name__ == "__main__":
    st.header("Audio Diffusion")
    st.markdown("Generate audio using Huggingface diffusers.\
        This takes about 20 minutes without a GPU, so why not make yourself a \
            cup of tea in the meantime? (Or try the teticio/audio-diffusion-ddim-256 \
                model which is faster.)")

    model_id = st.selectbox("Model", [
        "teticio/audio-diffusion-256", "teticio/audio-diffusion-breaks-256",
        "teticio/audio-diffusion-instrumental-hiphop-256",
        "teticio/audio-diffusion-ddim-256"
    ])
    audio_diffusion = AudioDiffusion(model_id=model_id)

    if st.button("Generate"):
        st.markdown("Generating...")
        image, (sample_rate,
                audio) = audio_diffusion.generate_spectrogram_and_audio()
        st.image(image, caption="Mel spectrogram")
        buffer = BytesIO()
        sf.write(buffer, normalize(audio), sample_rate, format="WAV")
        st.audio(buffer, format="audio/wav")

        audio = AudioDiffusion.loop_it(audio, sample_rate)
        if audio is not None:
            st.markdown("Loop")
            buffer = BytesIO()
            sf.write(buffer, normalize(audio), sample_rate, format="WAV")
            st.audio(buffer, format="audio/wav")