teticio commited on
Commit
5ecf3c1
1 Parent(s): a71fd34

clarification

Browse files
Files changed (1) hide show
  1. notebooks/test_model.ipynb +15 -15
notebooks/test_model.ipynb CHANGED
@@ -492,7 +492,7 @@
492
  },
493
  {
494
  "cell_type": "markdown",
495
- "id": "5b7081f7",
496
  "metadata": {},
497
  "source": [
498
  "## Latent Audio Diffusion\n",
@@ -502,7 +502,7 @@
502
  {
503
  "cell_type": "code",
504
  "execution_count": null,
505
- "id": "17610772",
506
  "metadata": {},
507
  "outputs": [],
508
  "source": [
@@ -512,7 +512,7 @@
512
  {
513
  "cell_type": "code",
514
  "execution_count": null,
515
- "id": "9e6c73e6",
516
  "metadata": {},
517
  "outputs": [],
518
  "source": [
@@ -522,7 +522,7 @@
522
  {
523
  "cell_type": "code",
524
  "execution_count": null,
525
- "id": "d37a03a9",
526
  "metadata": {},
527
  "outputs": [],
528
  "source": [
@@ -537,7 +537,7 @@
537
  {
538
  "cell_type": "code",
539
  "execution_count": null,
540
- "id": "c0328a56",
541
  "metadata": {},
542
  "outputs": [],
543
  "source": [
@@ -551,7 +551,7 @@
551
  },
552
  {
553
  "cell_type": "markdown",
554
- "id": "bd1f2b58",
555
  "metadata": {},
556
  "source": [
557
  "### Interpolation in latent space\n",
@@ -561,43 +561,43 @@
561
  {
562
  "cell_type": "code",
563
  "execution_count": null,
564
- "id": "23ff0ee7",
565
  "metadata": {},
566
  "outputs": [],
567
  "source": [
568
  "generator.manual_seed(seed)\n",
569
- "noise = torch.randn((1, audio_diffusion.pipe.unet.in_channels,\n",
570
  " audio_diffusion.pipe.unet.sample_size[0],\n",
571
  " audio_diffusion.pipe.unet.sample_size[1]),\n",
572
  " generator=generator)\n",
573
- "noise.shape"
574
  ]
575
  },
576
  {
577
  "cell_type": "code",
578
  "execution_count": null,
579
- "id": "ff13a2cb",
580
  "metadata": {},
581
  "outputs": [],
582
  "source": [
583
  "generator.manual_seed(seed2)\n",
584
- "noise2 = torch.randn((1, audio_diffusion.pipe.unet.in_channels,\n",
585
  " audio_diffusion.pipe.unet.sample_size[0],\n",
586
  " audio_diffusion.pipe.unet.sample_size[1]),\n",
587
  " generator=generator)\n",
588
- "noise2.shape"
589
  ]
590
  },
591
  {
592
  "cell_type": "code",
593
  "execution_count": null,
594
- "id": "bea26a5e",
595
  "metadata": {},
596
  "outputs": [],
597
  "source": [
598
  "alpha = 0.5 #@param {type:\"slider\", min:0, max:1, step:0.1}\n",
599
  "_, (sample_rate, audio3) = audio_diffusion.generate_spectrogram_and_audio(\n",
600
- " noise=audio_diffusion.pipe.slerp(noise, noise2, alpha),\n",
601
  " generator=generator)\n",
602
  "display(Audio(audio, rate=mel.get_sample_rate()))\n",
603
  "display(Audio(audio2, rate=mel.get_sample_rate()))\n",
@@ -607,7 +607,7 @@
607
  {
608
  "cell_type": "code",
609
  "execution_count": null,
610
- "id": "60080eed",
611
  "metadata": {},
612
  "outputs": [],
613
  "source": []
 
492
  },
493
  {
494
  "cell_type": "markdown",
495
+ "id": "9b244547",
496
  "metadata": {},
497
  "source": [
498
  "## Latent Audio Diffusion\n",
 
502
  {
503
  "cell_type": "code",
504
  "execution_count": null,
505
+ "id": "a88b3fbb",
506
  "metadata": {},
507
  "outputs": [],
508
  "source": [
 
512
  {
513
  "cell_type": "code",
514
  "execution_count": null,
515
+ "id": "15e353ee",
516
  "metadata": {},
517
  "outputs": [],
518
  "source": [
 
522
  {
523
  "cell_type": "code",
524
  "execution_count": null,
525
+ "id": "fa0f0c8c",
526
  "metadata": {},
527
  "outputs": [],
528
  "source": [
 
537
  {
538
  "cell_type": "code",
539
  "execution_count": null,
540
+ "id": "73dc575d",
541
  "metadata": {},
542
  "outputs": [],
543
  "source": [
 
551
  },
552
  {
553
  "cell_type": "markdown",
554
+ "id": "428d2d67",
555
  "metadata": {},
556
  "source": [
557
  "### Interpolation in latent space\n",
 
561
  {
562
  "cell_type": "code",
563
  "execution_count": null,
564
+ "id": "72211c2b",
565
  "metadata": {},
566
  "outputs": [],
567
  "source": [
568
  "generator.manual_seed(seed)\n",
569
+ "latents = torch.randn((1, audio_diffusion.pipe.unet.in_channels,\n",
570
  " audio_diffusion.pipe.unet.sample_size[0],\n",
571
  " audio_diffusion.pipe.unet.sample_size[1]),\n",
572
  " generator=generator)\n",
573
+ "latents.shape"
574
  ]
575
  },
576
  {
577
  "cell_type": "code",
578
  "execution_count": null,
579
+ "id": "6c732dbe",
580
  "metadata": {},
581
  "outputs": [],
582
  "source": [
583
  "generator.manual_seed(seed2)\n",
584
+ "latents2 = torch.randn((1, audio_diffusion.pipe.unet.in_channels,\n",
585
  " audio_diffusion.pipe.unet.sample_size[0],\n",
586
  " audio_diffusion.pipe.unet.sample_size[1]),\n",
587
  " generator=generator)\n",
588
+ "latents2.shape"
589
  ]
590
  },
591
  {
592
  "cell_type": "code",
593
  "execution_count": null,
594
+ "id": "159bcfc4",
595
  "metadata": {},
596
  "outputs": [],
597
  "source": [
598
  "alpha = 0.5 #@param {type:\"slider\", min:0, max:1, step:0.1}\n",
599
  "_, (sample_rate, audio3) = audio_diffusion.generate_spectrogram_and_audio(\n",
600
+ " noise=audio_diffusion.pipe.slerp(latents, latents2, alpha),\n",
601
  " generator=generator)\n",
602
  "display(Audio(audio, rate=mel.get_sample_rate()))\n",
603
  "display(Audio(audio2, rate=mel.get_sample_rate()))\n",
 
607
  {
608
  "cell_type": "code",
609
  "execution_count": null,
610
+ "id": "ce6c9cc1",
611
  "metadata": {},
612
  "outputs": [],
613
  "source": []