[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/tulasiram58827/TTS_TFLite/blob/main/Parallel_WaveGAN_TFLite.ipynb)

This notebook contains code to convert TensorFlow ParallelWaveGAN to TFLite

## Acknowledgments

- Pretrained model(in PyTorch) downloaded from [Parallel WaveGAN Repository](https://github.com/kan-bayashi/ParallelWaveGAN#results)

- Converted PyTorch weights to Tensorflow Compatible using [Tensorflow TTS Repository](https://github.com/TensorSpeech/TensorFlowTTS) with this [Notebook](https://github.com/TensorSpeech/TensorFlowTTS/blob/master/examples/parallel_wavegan/convert_pwgan_from_pytorch_to_tensorflow.ipynb)

## Imports

In [None]:
!git clone https://github.com/TensorSpeech/TensorFlowTTS.git
!cd TensorFlowTTS
!pip install /content/TensorFlowTTS/

In [None]:
!pip install parallel_wavegan

In [None]:
import tensorflow as tf
import torch
import sys
sys.path.append('/content/TensorFlowTTS')
from tensorflow_tts.models import TFParallelWaveGANGenerator
from tensorflow_tts.configs import ParallelWaveGANGeneratorConfig

from parallel_wavegan.models import ParallelWaveGANGenerator
import numpy as np

from IPython.display import Audio

## Intialize Model

In [4]:
tf_model = TFParallelWaveGANGenerator(config=ParallelWaveGANGeneratorConfig(), name="parallel_wavegan_generator")

In [5]:
tf_model._build()

## Load PyTorch Checkpoints

In [7]:
!gdown --id 1wPwO9K-0Yq-GYcXbHseaqt8kUpa_ojJf -O checkpoint-400000steps.pkl

Downloading...
From: https://drive.google.com/uc?id=1wPwO9K-0Yq-GYcXbHseaqt8kUpa_ojJf
To: /content/checkpoint-400000steps.pkl
0.00B [00:00, ?B/s]17.5MB [00:00, 154MB/s]


In [8]:
torch_checkpoints = torch.load("checkpoint-400000steps.pkl", map_location=torch.device('cpu'))
torch_generator_weights = torch_checkpoints["model"]["generator"]
torch_model = ParallelWaveGANGenerator()
torch_model.load_state_dict(torch_checkpoints["model"]["generator"])
torch_model.remove_weight_norm()

In [9]:
model_parameters = filter(lambda p: p.requires_grad, torch_model.parameters())
params = sum([np.prod(p.size()) for p in model_parameters])
params

1334309

## Convert PyTorch weights to TensorFlow

In [10]:
# in pytorch, in convolution layer, the order is bias -> weight, in tf it is weight -> bias. We need re-order.

def convert_weights_pytorch_to_tensorflow(weights_pytorch):
 """
 Convert pytorch Conv1d weight variable to tensorflow Conv2D weights.
 1D: Pytorch (f_output, f_input, kernel_size) -> TF (kernel_size, f_input, 1, f_output)
 2D: Pytorch (f_output, f_input, kernel_size_h, kernel_size_w) -> TF (kernel_size_w, kernel_size_h, f_input, 1, f_output)
 """
 if len(weights_pytorch.shape) == 3: # conv1d-kernel
 weights_tensorflow = np.transpose(weights_pytorch, (0,2,1)) # [f_output, kernel_size, f_input]
 weights_tensorflow = np.transpose(weights_tensorflow, (1,0,2)) # [kernel-size, f_output, f_input]
 weights_tensorflow = np.transpose(weights_tensorflow, (0,2,1)) # [kernel-size, f_input, f_output]
 return weights_tensorflow
 elif len(weights_pytorch.shape) == 1: # conv1d-bias
 return weights_pytorch
 elif len(weights_pytorch.shape) == 4: # conv2d-kernel
 weights_tensorflow = np.transpose(weights_pytorch, (0,2,1,3)) # [f_output, kernel_size_h, f_input, kernel_size_w]
 weights_tensorflow = np.transpose(weights_tensorflow, (1,0,2,3)) # [kernel-size_h, f_output, f_input, kernel-size-w]
 weights_tensorflow = np.transpose(weights_tensorflow, (0,2,1,3)) # [kernel_size_h, f_input, f_output, kernel-size-w]
 weights_tensorflow = np.transpose(weights_tensorflow, (0,1,3,2)) # [kernel_size_h, f_input, kernel-size-w, f_output]
 weights_tensorflow = np.transpose(weights_tensorflow, (0,2,1,3)) # [kernel_size_h, kernel-size-w, f_input, f_output]
 weights_tensorflow = np.transpose(weights_tensorflow, (1,0,2,3)) # [kernel-size_w, kernel_size_h, f_input, f_output]
 return weights_tensorflow

torch_weights = []
all_keys = list(torch_model.state_dict().keys())
all_values = list(torch_model.state_dict().values())

idx_already_append = []

for i in range(len(all_keys) -1):
 if i not in idx_already_append:
 if all_keys[i].split(".")[0:-1] == all_keys[i + 1].split(".")[0:-1]:
 if all_keys[i].split(".")[-1] == "bias" and all_keys[i + 1].split(".")[-1] == "weight":
 torch_weights.append(convert_weights_pytorch_to_tensorflow(all_values[i + 1].cpu().detach().numpy()))
 torch_weights.append(convert_weights_pytorch_to_tensorflow(all_values[i].cpu().detach().numpy()))
 idx_already_append.append(i)
 idx_already_append.append(i + 1)
 else:
 if i not in idx_already_append:
 torch_weights.append(convert_weights_pytorch_to_tensorflow(all_values[i].cpu().detach().numpy()))
 idx_already_append.append(i)

In [11]:
tf_var = tf_model.trainable_variables
for i, var in enumerate(tf_var):
 tf.keras.backend.set_value(var, torch_weights[i])

## Convert to TFLite

In [22]:
def convert_to_tflite(quantization):
 pwg_concrete_function = tf_model.inference.get_concrete_function()
 converter = tf.lite.TFLiteConverter.from_concrete_functions([pwg_concrete_function])
 converter.optimizations = [tf.lite.Optimize.DEFAULT]
 converter.target_spec.supported_ops = [tf.lite.OpsSet.SELECT_TF_OPS]
 if quantization == 'float16':
 converter.target_spec.supported_types = [tf.float16]
 tf_lite_model = converter.convert()
 model_name = f'parallel_wavegan_{quantization}.tflite'
 with open(model_name, 'wb') as f:
 f.write(tf_lite_model)

#### Dynamic Range Quantization

In [23]:
quantization = 'dr' #@param ["dr", "float16"]
convert_to_tflite(quantization)

In [25]:
!du -sh parallel_wavegan_dr.tflite

5.7M	parallel_wavegan_dr.tflite


#### Float16 Quantization

In [24]:
quantization = 'float16'
convert_to_tflite(quantization)
!du -sh parallel_wavegan_float16.tflite

3.2M	parallel_wavegan_float16.tflite


## Download Sample Output of Tacotron2

In [14]:
!gdown --id 1LmU3j8yedwBzXKVDo9tCvozLM4iwkRnP -O tac_output.npy

Downloading...
From: https://drive.google.com/uc?id=1LmU3j8yedwBzXKVDo9tCvozLM4iwkRnP
To: /content/tac_output.npy
 0% 0.00/36.0k [00:00, ?B/s]100% 36.0k/36.0k [00:00<00:00, 59.6MB/s]


## TFLite Inference

In [30]:
data = np.load('tac_output.npy')
feats = np.expand_dims(data, 0)

interpreter = tf.lite.Interpreter(model_path='parallel_wavegan_dr.tflite')

input_details = interpreter.get_input_details()

output_details = interpreter.get_output_details()

interpreter.resize_tensor_input(input_details[0]['index'], [1, feats.shape[1], feats.shape[2]], strict=True)
interpreter.allocate_tensors()

interpreter.set_tensor(input_details[0]['index'], feats)

interpreter.invoke()

output = interpreter.get_tensor(output_details[0]['index'])

## Play Audio

In [31]:
output = output[0, :, 0]

Audio(output, rate=22050)