KdaiP's picture
Upload 80 files
3dd84f8 verified
raw
history blame
1.81 kB
import torch
import torch.nn as nn
from .backbone import ConvNeXtEncoder
from .head import HiFiGANGenerator
config_dict = {
"backbone": {
# "input_channels": "${model.num_mels}",
"input_channels": 128,
"depths": [3, 3, 9, 3],
"dims": [128, 256, 384, 512],
"drop_path_rate": 0.2,
"kernel_size": 7,
},
"head": {
# "hop_length": "${model.hop_length}",
"hop_length": 512,
"upsample_rates": [8, 8, 2, 2, 2],
"upsample_kernel_sizes": [16, 16, 4, 4, 4],
"resblock_kernel_sizes": [3, 7, 11],
"resblock_dilation_sizes": [[1, 3, 5], [1, 3, 5], [1, 3, 5]],
"num_mels": 512, # consistent with the output of the backbone
"upsample_initial_channel": 512,
"use_template": False,
"pre_conv_kernel_size": 13,
"post_conv_kernel_size": 13,
}
}
# download_link: https://github.com/fishaudio/vocoder/releases/download/1.0.0/firefly-gan-base-generator.ckpt
class FireflyGANBaseWrapper(nn.Module):
def __init__(self, model_path):
super().__init__()
self.model = FireflyGANBase()
self.model.load_state_dict(torch.load(model_path, weights_only=True, map_location='cpu'))
self.model.eval()
@ torch.inference_mode()
def forward(self, x: torch.Tensor) -> torch.Tensor:
return self.model(x)
class FireflyGANBase(nn.Module):
def __init__(self):
super().__init__()
self.backbone = ConvNeXtEncoder(**config_dict["backbone"])
self.head = HiFiGANGenerator(**config_dict["head"])
self.head.checkpointing = False
@ torch.inference_mode()
def forward(self, x: torch.Tensor) -> torch.Tensor:
x = self.backbone(x)
x = self.head(x)
return x.squeeze(1)