Spaces:
Runtime error
Runtime error
fix audio loading
Browse files
notebooks/test_model.ipynb
CHANGED
@@ -46,6 +46,7 @@
|
|
46 |
"source": [
|
47 |
"import torch\n",
|
48 |
"import random\n",
|
|
|
49 |
"import numpy as np\n",
|
50 |
"from datasets import load_dataset\n",
|
51 |
"from IPython.display import Audio\n",
|
@@ -266,8 +267,8 @@
|
|
266 |
"source": [
|
267 |
"start_step = 500 #@param {type:\"slider\", min:0, max:1000, step:10}\n",
|
268 |
"overlap_secs = 2 #@param {type:\"integer\"}\n",
|
269 |
-
"
|
270 |
-
"overlap_samples = overlap_secs *
|
271 |
"slice_size = mel.x_res * mel.hop_length\n",
|
272 |
"stride = slice_size - overlap_samples\n",
|
273 |
"generator = torch.Generator(device=device)\n",
|
@@ -275,9 +276,9 @@
|
|
275 |
"print(f'Seed = {seed}')\n",
|
276 |
"track = np.array([])\n",
|
277 |
"not_first = 0\n",
|
278 |
-
"for sample in range(len(
|
279 |
" generator.manual_seed(seed)\n",
|
280 |
-
" audio = np.array(
|
281 |
" if not_first:\n",
|
282 |
" # Normalize and re-insert generated audio\n",
|
283 |
" audio[:overlap_samples] = audio2[-overlap_samples:] * np.max(\n",
|
@@ -309,7 +310,7 @@
|
|
309 |
"outputs": [],
|
310 |
"source": [
|
311 |
"slice = 3 #@param {type:\"integer\"}\n",
|
312 |
-
"raw_audio =
|
313 |
"_, (sample_rate,\n",
|
314 |
" audio2) = audio_diffusion.generate_spectrogram_and_audio_from_audio(\n",
|
315 |
" raw_audio=raw_audio,\n",
|
@@ -507,7 +508,7 @@
|
|
507 |
"metadata": {},
|
508 |
"outputs": [],
|
509 |
"source": [
|
510 |
-
"model_id = \"teticio/latent-audio-diffusion-ddim-256
|
511 |
]
|
512 |
},
|
513 |
{
|
|
|
46 |
"source": [
|
47 |
"import torch\n",
|
48 |
"import random\n",
|
49 |
+
"import librosa\n",
|
50 |
"import numpy as np\n",
|
51 |
"from datasets import load_dataset\n",
|
52 |
"from IPython.display import Audio\n",
|
|
|
267 |
"source": [
|
268 |
"start_step = 500 #@param {type:\"slider\", min:0, max:1000, step:10}\n",
|
269 |
"overlap_secs = 2 #@param {type:\"integer\"}\n",
|
270 |
+
"track_audio, _ = librosa.load(audio_file, mono=True, sr=sample_rate)\n",
|
271 |
+
"overlap_samples = overlap_secs * sample_rate\n",
|
272 |
"slice_size = mel.x_res * mel.hop_length\n",
|
273 |
"stride = slice_size - overlap_samples\n",
|
274 |
"generator = torch.Generator(device=device)\n",
|
|
|
276 |
"print(f'Seed = {seed}')\n",
|
277 |
"track = np.array([])\n",
|
278 |
"not_first = 0\n",
|
279 |
+
"for sample in range(len(track_audio) // stride):\n",
|
280 |
" generator.manual_seed(seed)\n",
|
281 |
+
" audio = np.array(track_audio[sample * stride:sample * stride + slice_size])\n",
|
282 |
" if not_first:\n",
|
283 |
" # Normalize and re-insert generated audio\n",
|
284 |
" audio[:overlap_samples] = audio2[-overlap_samples:] * np.max(\n",
|
|
|
310 |
"outputs": [],
|
311 |
"source": [
|
312 |
"slice = 3 #@param {type:\"integer\"}\n",
|
313 |
+
"raw_audio = track_audio[sample * stride:sample * stride + slice_size]\n",
|
314 |
"_, (sample_rate,\n",
|
315 |
" audio2) = audio_diffusion.generate_spectrogram_and_audio_from_audio(\n",
|
316 |
" raw_audio=raw_audio,\n",
|
|
|
508 |
"metadata": {},
|
509 |
"outputs": [],
|
510 |
"source": [
|
511 |
+
"model_id = \"teticio/latent-audio-diffusion-ddim-256\" #@param [\"teticio/latent-audio-diffusion-256\", \"teticio/latent-audio-diffusion-ddim-256\"]"
|
512 |
]
|
513 |
},
|
514 |
{
|