teticio commited on
Commit
d533c9c
1 Parent(s): 869c0ac

refactor and improve apps

Browse files
app.py CHANGED
@@ -4,7 +4,16 @@ import gradio as gr
4
 
5
  from audiodiffusion import AudioDiffusion
6
 
7
- audio_diffusion = AudioDiffusion()
 
 
 
 
 
 
 
 
 
8
 
9
  if __name__ == "__main__":
10
  parser = argparse.ArgumentParser()
@@ -13,14 +22,23 @@ if __name__ == "__main__":
13
  args = parser.parse_args()
14
 
15
  demo = gr.Interface(
16
- fn=audio_diffusion.generate_spectrogram_and_audio,
17
  title="Audio Diffusion",
18
  description="Generate audio using Huggingface diffusers.\
19
  This takes about 20 minutes without a GPU, so why not make yourself a cup of tea in the meantime?",
20
- inputs=[],
 
 
 
 
 
 
 
21
  outputs=[
22
  gr.Image(label="Mel spectrogram", image_mode="L"),
23
  gr.Audio(label="Audio"),
 
24
  ],
 
25
  )
26
  demo.launch(server_name=args.server or "0.0.0.0", server_port=args.port)
 
4
 
5
  from audiodiffusion import AudioDiffusion
6
 
7
+
8
+ def generate_spectrogram_audio_and_loop(model_id):
9
+ audio_diffusion = AudioDiffusion(model_id=model_id)
10
+ image, (sample_rate,
11
+ audio) = audio_diffusion.generate_spectrogram_and_audio()
12
+ loop = AudioDiffusion.loop_it(audio, sample_rate)
13
+ if loop is None:
14
+ loop = audio
15
+ return image, (sample_rate, audio), (sample_rate, loop)
16
+
17
 
18
  if __name__ == "__main__":
19
  parser = argparse.ArgumentParser()
 
22
  args = parser.parse_args()
23
 
24
  demo = gr.Interface(
25
+ fn=generate_spectrogram_audio_and_loop,
26
  title="Audio Diffusion",
27
  description="Generate audio using Huggingface diffusers.\
28
  This takes about 20 minutes without a GPU, so why not make yourself a cup of tea in the meantime?",
29
+ inputs=[
30
+ gr.Dropdown(label="Model",
31
+ choices=[
32
+ "teticio/audio-diffusion-256",
33
+ "teticio/audio-diffusion-breaks-256"
34
+ ],
35
+ value="teticio/audio-diffusion-256")
36
+ ],
37
  outputs=[
38
  gr.Image(label="Mel spectrogram", image_mode="L"),
39
  gr.Audio(label="Audio"),
40
+ gr.Audio(label="Loop"),
41
  ],
42
+ allow_flagging="never"
43
  )
44
  demo.launch(server_name=args.server or "0.0.0.0", server_port=args.port)
audiodiffusion/__init__.py CHANGED
@@ -1,6 +1,8 @@
 
1
  from PIL import Image
2
  from torch import cuda
3
  from diffusers import DDPMPipeline
 
4
 
5
  from .mel import Mel
6
 
@@ -38,3 +40,12 @@ class AudioDiffusion:
38
  image = Image.fromarray(images[0][0])
39
  audio = self.mel.image_to_audio(image)
40
  return image, (self.mel.get_sample_rate(), audio)
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
  from PIL import Image
3
  from torch import cuda
4
  from diffusers import DDPMPipeline
5
+ from librosa.beat import beat_track
6
 
7
  from .mel import Mel
8
 
 
40
  image = Image.fromarray(images[0][0])
41
  audio = self.mel.image_to_audio(image)
42
  return image, (self.mel.get_sample_rate(), audio)
43
+
44
+ @staticmethod
45
+ def loop_it(audio, sample_rate, loops=12):
46
+ tempo, beats = beat_track(y=audio, sr=sample_rate, units='samples')
47
+ if len(beats) > 8:
48
+ return np.tile(audio[beats[0]:beats[8]], loops)
49
+ if len(beats) > 4:
50
+ return np.tile(audio[beats[0]:beats[4]], loops)
51
+ return None
notebooks/test_model.ipynb CHANGED
The diff for this file is too large to render. See raw diff
 
notebooks/test_model_breaks.ipynb CHANGED
The diff for this file is too large to render. See raw diff
 
streamlit_app.py CHANGED
@@ -2,16 +2,21 @@ from io import BytesIO
2
  import streamlit as st
3
  import soundfile as sf
4
  from librosa.util import normalize
 
5
 
6
  from audiodiffusion import AudioDiffusion
7
 
8
- audio_diffusion = AudioDiffusion()
9
-
10
  if __name__ == "__main__":
11
  st.header("Audio Diffusion")
12
  st.markdown("Generate audio using Huggingface diffusers.\
13
  This takes about 20 minutes without a GPU, so why not make yourself a cup of tea in the meantime?"
14
  )
 
 
 
 
 
 
15
  if st.button("Generate"):
16
  st.markdown("Generating...")
17
  image, (sample_rate,
@@ -20,3 +25,10 @@ if __name__ == "__main__":
20
  buffer = BytesIO()
21
  sf.write(buffer, normalize(audio), sample_rate, format="WAV")
22
  st.audio(buffer, format="audio/wav")
 
 
 
 
 
 
 
 
2
  import streamlit as st
3
  import soundfile as sf
4
  from librosa.util import normalize
5
+ from librosa.beat import beat_track
6
 
7
  from audiodiffusion import AudioDiffusion
8
 
 
 
9
  if __name__ == "__main__":
10
  st.header("Audio Diffusion")
11
  st.markdown("Generate audio using Huggingface diffusers.\
12
  This takes about 20 minutes without a GPU, so why not make yourself a cup of tea in the meantime?"
13
  )
14
+
15
+ model_id = st.selectbox(
16
+ "Model",
17
+ ["teticio/audio-diffusion-256", "teticio/audio-diffusion-breaks-256"])
18
+ audio_diffusion = AudioDiffusion(model_id=model_id)
19
+
20
  if st.button("Generate"):
21
  st.markdown("Generating...")
22
  image, (sample_rate,
 
25
  buffer = BytesIO()
26
  sf.write(buffer, normalize(audio), sample_rate, format="WAV")
27
  st.audio(buffer, format="audio/wav")
28
+
29
+ audio = AudioDiffusion.loop_it(audio, sample_rate)
30
+ if audio is not None:
31
+ st.markdown("Loop")
32
+ buffer = BytesIO()
33
+ sf.write(buffer, normalize(audio), sample_rate, format="WAV")
34
+ st.audio(buffer, format="audio/wav")