|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import logging |
|
import os |
|
|
|
import pytest |
|
import tensorflow as tf |
|
|
|
from tensorflow_tts.configs import ( |
|
HifiGANDiscriminatorConfig, |
|
HifiGANGeneratorConfig, |
|
MelGANDiscriminatorConfig, |
|
) |
|
from tensorflow_tts.models import ( |
|
TFHifiGANGenerator, |
|
TFHifiGANMultiPeriodDiscriminator, |
|
TFMelGANMultiScaleDiscriminator, |
|
) |
|
|
|
from examples.hifigan.train_hifigan import TFHifiGANDiscriminator |
|
|
|
os.environ["CUDA_VISIBLE_DEVICES"] = "" |
|
|
|
logging.basicConfig( |
|
level=logging.DEBUG, |
|
format="%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s", |
|
) |
|
|
|
|
|
def make_hifigan_generator_args(**kwargs): |
|
defaults = dict( |
|
out_channels=1, |
|
kernel_size=7, |
|
filters=128, |
|
use_bias=True, |
|
upsample_scales=[8, 8, 2, 2], |
|
stacks=3, |
|
stack_kernel_size=[3, 7, 11], |
|
stack_dilation_rate=[[1, 3, 5], [1, 3, 5], [1, 3, 5]], |
|
nonlinear_activation="LeakyReLU", |
|
nonlinear_activation_params={"alpha": 0.2}, |
|
padding_type="REFLECT", |
|
use_final_nolinear_activation=True, |
|
is_weight_norm=True, |
|
initializer_seed=42, |
|
) |
|
defaults.update(kwargs) |
|
return defaults |
|
|
|
|
|
def make_hifigan_discriminator_args(**kwargs): |
|
defaults_multisperiod = dict( |
|
out_channels=1, |
|
period_scales=[2, 3, 5, 7, 11], |
|
n_layers=5, |
|
kernel_size=5, |
|
strides=3, |
|
filters=8, |
|
filter_scales=4, |
|
max_filters=1024, |
|
nonlinear_activation="LeakyReLU", |
|
nonlinear_activation_params={"alpha": 0.2}, |
|
is_weight_norm=True, |
|
initializer_seed=42, |
|
) |
|
defaults_multisperiod.update(kwargs) |
|
defaults_multiscale = dict( |
|
out_channels=1, |
|
scales=3, |
|
downsample_pooling="AveragePooling1D", |
|
downsample_pooling_params={"pool_size": 4, "strides": 2,}, |
|
kernel_sizes=[5, 3], |
|
filters=16, |
|
max_downsample_filters=1024, |
|
use_bias=True, |
|
downsample_scales=[4, 4, 4, 4], |
|
nonlinear_activation="LeakyReLU", |
|
nonlinear_activation_params={"alpha": 0.2}, |
|
padding_type="REFLECT", |
|
) |
|
defaults_multiscale.update(kwargs) |
|
return [defaults_multisperiod, defaults_multiscale] |
|
|
|
|
|
@pytest.mark.parametrize( |
|
"dict_g, dict_d, dict_loss", |
|
[ |
|
({}, {}, {}), |
|
({"kernel_size": 3}, {}, {}), |
|
({"filters": 1024}, {}, {}), |
|
({"stack_kernel_size": [1, 2, 3]}, {}, {}), |
|
({"stack_kernel_size": [3, 5, 7], "stacks": 3}, {}, {}), |
|
({"upsample_scales": [4, 4, 4, 4]}, {}, {}), |
|
({"upsample_scales": [8, 8, 2, 2]}, {}, {}), |
|
({"filters": 1024, "upsample_scales": [8, 8, 2, 2]}, {}, {}), |
|
], |
|
) |
|
def test_hifigan_trainable(dict_g, dict_d, dict_loss): |
|
batch_size = 4 |
|
batch_length = 4096 |
|
args_g = make_hifigan_generator_args(**dict_g) |
|
args_d_p, args_d_s = make_hifigan_discriminator_args(**dict_d) |
|
|
|
args_g = HifiGANGeneratorConfig(**args_g) |
|
args_d_p = HifiGANDiscriminatorConfig(**args_d_p) |
|
args_d_s = MelGANDiscriminatorConfig(**args_d_s) |
|
|
|
generator = TFHifiGANGenerator(args_g) |
|
|
|
discriminator_p = TFHifiGANMultiPeriodDiscriminator(args_d_p) |
|
discriminator_s = TFMelGANMultiScaleDiscriminator(args_d_s) |
|
discriminator = TFHifiGANDiscriminator(discriminator_p, discriminator_s) |
|
|