ZiqianLiu
/

yolov8_face

Model card Files Files and versions Community

ZiqianLiu commited on Apr 8

Commit

55f6076

•

1 Parent(s): cb2f529

Upload 14 files

Browse files

Files changed (14) hide show

scripts/__init__.py +0 -0
scripts/__pycache__/__init__.cpython-310.pyc +0 -0
scripts/__pycache__/reactor_faceswap.cpython-310.pyc +0 -0
scripts/__pycache__/reactor_logger.cpython-310.pyc +0 -0
scripts/__pycache__/reactor_swapper.cpython-310.pyc +0 -0
scripts/__pycache__/reactor_version.cpython-310.pyc +0 -0
scripts/r_archs/__pycache__/codeformer_arch.cpython-310.pyc +0 -0
scripts/r_archs/__pycache__/vqgan_arch.cpython-310.pyc +0 -0
scripts/r_archs/codeformer_arch.py +278 -0
scripts/r_archs/vqgan_arch.py +437 -0
scripts/reactor_faceswap.py +126 -0
scripts/reactor_logger.py +47 -0
scripts/reactor_swapper.py +301 -0
scripts/reactor_version.py +13 -0

scripts/__init__.py ADDED Viewed

File without changes

scripts/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (175 Bytes). View file

scripts/__pycache__/reactor_faceswap.cpython-310.pyc ADDED Viewed

Binary file (3.59 kB). View file

scripts/__pycache__/reactor_logger.cpython-310.pyc ADDED Viewed

Binary file (1.4 kB). View file

scripts/__pycache__/reactor_swapper.cpython-310.pyc ADDED Viewed

Binary file (7.24 kB). View file

scripts/__pycache__/reactor_version.cpython-310.pyc ADDED Viewed

Binary file (534 Bytes). View file

scripts/r_archs/__pycache__/codeformer_arch.cpython-310.pyc ADDED Viewed

Binary file (9.24 kB). View file

scripts/r_archs/__pycache__/vqgan_arch.cpython-310.pyc ADDED Viewed

Binary file (11.2 kB). View file

scripts/r_archs/codeformer_arch.py ADDED Viewed

	@@ -0,0 +1,278 @@

+import math
+import numpy as np
+import torch
+from torch import nn, Tensor
+import torch.nn.functional as F
+from typing import Optional, List
+from scripts.r_archs.vqgan_arch import *
+from r_basicsr.utils import get_root_logger
+from r_basicsr.utils.registry import ARCH_REGISTRY
+def calc_mean_std(feat, eps=1e-5):
+    """Calculate mean and std for adaptive_instance_normalization.
+    Args:
+        feat (Tensor): 4D tensor.
+        eps (float): A small value added to the variance to avoid
+            divide-by-zero. Default: 1e-5.
+    """
+    size = feat.size()
+    assert len(size) == 4, 'The input feature should be 4D tensor.'
+    b, c = size[:2]
+    feat_var = feat.view(b, c, -1).var(dim=2) + eps
+    feat_std = feat_var.sqrt().view(b, c, 1, 1)
+    feat_mean = feat.view(b, c, -1).mean(dim=2).view(b, c, 1, 1)
+    return feat_mean, feat_std
+def adaptive_instance_normalization(content_feat, style_feat):
+    """Adaptive instance normalization.
+    Adjust the reference features to have the similar color and illuminations
+    as those in the degradate features.
+    Args:
+        content_feat (Tensor): The reference feature.
+        style_feat (Tensor): The degradate features.
+    """
+    size = content_feat.size()
+    style_mean, style_std = calc_mean_std(style_feat)
+    content_mean, content_std = calc_mean_std(content_feat)
+    normalized_feat = (content_feat - content_mean.expand(size)) / content_std.expand(size)
+    return normalized_feat * style_std.expand(size) + style_mean.expand(size)
+class PositionEmbeddingSine(nn.Module):
+    """
+    This is a more standard version of the position embedding, very similar to the one
+    used by the Attention is all you need paper, generalized to work on images.
+    """
+    def __init__(self, num_pos_feats=64, temperature=10000, normalize=False, scale=None):
+        super().__init__()
+        self.num_pos_feats = num_pos_feats
+        self.temperature = temperature
+        self.normalize = normalize
+        if scale is not None and normalize is False:
+            raise ValueError("normalize should be True if scale is passed")
+        if scale is None:
+            scale = 2 * math.pi
+        self.scale = scale
+    def forward(self, x, mask=None):
+        if mask is None:
+            mask = torch.zeros((x.size(0), x.size(2), x.size(3)), device=x.device, dtype=torch.bool)
+        not_mask = ~mask
+        y_embed = not_mask.cumsum(1, dtype=torch.float32)
+        x_embed = not_mask.cumsum(2, dtype=torch.float32)
+        if self.normalize:
+            eps = 1e-6
+            y_embed = y_embed / (y_embed[:, -1:, :] + eps) * self.scale
+            x_embed = x_embed / (x_embed[:, :, -1:] + eps) * self.scale
+        dim_t = torch.arange(self.num_pos_feats, dtype=torch.float32, device=x.device)
+        dim_t = self.temperature ** (2 * (dim_t // 2) / self.num_pos_feats)
+        pos_x = x_embed[:, :, :, None] / dim_t
+        pos_y = y_embed[:, :, :, None] / dim_t
+        pos_x = torch.stack(
+            (pos_x[:, :, :, 0::2].sin(), pos_x[:, :, :, 1::2].cos()), dim=4
+        ).flatten(3)
+        pos_y = torch.stack(
+            (pos_y[:, :, :, 0::2].sin(), pos_y[:, :, :, 1::2].cos()), dim=4
+        ).flatten(3)
+        pos = torch.cat((pos_y, pos_x), dim=3).permute(0, 3, 1, 2)
+        return pos
+def _get_activation_fn(activation):
+    """Return an activation function given a string"""
+    if activation == "relu":
+        return F.relu
+    if activation == "gelu":
+        return F.gelu
+    if activation == "glu":
+        return F.glu
+    raise RuntimeError(F"activation should be relu/gelu, not {activation}.")
+class TransformerSALayer(nn.Module):
+    def __init__(self, embed_dim, nhead=8, dim_mlp=2048, dropout=0.0, activation="gelu"):
+        super().__init__()
+        self.self_attn = nn.MultiheadAttention(embed_dim, nhead, dropout=dropout)
+        # Implementation of Feedforward model - MLP
+        self.linear1 = nn.Linear(embed_dim, dim_mlp)
+        self.dropout = nn.Dropout(dropout)
+        self.linear2 = nn.Linear(dim_mlp, embed_dim)
+        self.norm1 = nn.LayerNorm(embed_dim)
+        self.norm2 = nn.LayerNorm(embed_dim)
+        self.dropout1 = nn.Dropout(dropout)
+        self.dropout2 = nn.Dropout(dropout)
+        self.activation = _get_activation_fn(activation)
+    def with_pos_embed(self, tensor, pos: Optional[Tensor]):
+        return tensor if pos is None else tensor + pos
+    def forward(self, tgt,
+                tgt_mask: Optional[Tensor] = None,
+                tgt_key_padding_mask: Optional[Tensor] = None,
+                query_pos: Optional[Tensor] = None):
+        # self attention
+        tgt2 = self.norm1(tgt)
+        q = k = self.with_pos_embed(tgt2, query_pos)
+        tgt2 = self.self_attn(q, k, value=tgt2, attn_mask=tgt_mask,
+                              key_padding_mask=tgt_key_padding_mask)[0]
+        tgt = tgt + self.dropout1(tgt2)
+        # ffn
+        tgt2 = self.norm2(tgt)
+        tgt2 = self.linear2(self.dropout(self.activation(self.linear1(tgt2))))
+        tgt = tgt + self.dropout2(tgt2)
+        return tgt
+class Fuse_sft_block(nn.Module):
+    def __init__(self, in_ch, out_ch):
+        super().__init__()
+        self.encode_enc = ResBlock(2*in_ch, out_ch)
+        self.scale = nn.Sequential(
+                    nn.Conv2d(in_ch, out_ch, kernel_size=3, padding=1),
+                    nn.LeakyReLU(0.2, True),
+                    nn.Conv2d(out_ch, out_ch, kernel_size=3, padding=1))
+        self.shift = nn.Sequential(
+                    nn.Conv2d(in_ch, out_ch, kernel_size=3, padding=1),
+                    nn.LeakyReLU(0.2, True),
+                    nn.Conv2d(out_ch, out_ch, kernel_size=3, padding=1))
+    def forward(self, enc_feat, dec_feat, w=1):
+        enc_feat = self.encode_enc(torch.cat([enc_feat, dec_feat], dim=1))
+        scale = self.scale(enc_feat)
+        shift = self.shift(enc_feat)
+        residual = w * (dec_feat * scale + shift)
+        out = dec_feat + residual
+        return out
+@ARCH_REGISTRY.register()
+class CodeFormer(VQAutoEncoder):
+    def __init__(self, dim_embd=512, n_head=8, n_layers=9,
+                codebook_size=1024, latent_size=256,
+                connect_list=['32', '64', '128', '256'],
+                fix_modules=['quantize','generator']):
+        super(CodeFormer, self).__init__(512, 64, [1, 2, 2, 4, 4, 8], 'nearest',2, [16], codebook_size)
+        if fix_modules is not None:
+            for module in fix_modules:
+                for param in getattr(self, module).parameters():
+                    param.requires_grad = False
+        self.connect_list = connect_list
+        self.n_layers = n_layers
+        self.dim_embd = dim_embd
+        self.dim_mlp = dim_embd*2
+        self.position_emb = nn.Parameter(torch.zeros(latent_size, self.dim_embd))
+        self.feat_emb = nn.Linear(256, self.dim_embd)
+        # transformer
+        self.ft_layers = nn.Sequential(*[TransformerSALayer(embed_dim=dim_embd, nhead=n_head, dim_mlp=self.dim_mlp, dropout=0.0)
+                                    for _ in range(self.n_layers)])
+        # logits_predict head
+        self.idx_pred_layer = nn.Sequential(
+            nn.LayerNorm(dim_embd),
+            nn.Linear(dim_embd, codebook_size, bias=False))
+        self.channels = {
+            '16': 512,
+            '32': 256,
+            '64': 256,
+            '128': 128,
+            '256': 128,
+            '512': 64,
+        }
+        # after second residual block for > 16, before attn layer for ==16
+        self.fuse_encoder_block = {'512':2, '256':5, '128':8, '64':11, '32':14, '16':18}
+        # after first residual block for > 16, before attn layer for ==16
+        self.fuse_generator_block = {'16':6, '32': 9, '64':12, '128':15, '256':18, '512':21}
+        # fuse_convs_dict
+        self.fuse_convs_dict = nn.ModuleDict()
+        for f_size in self.connect_list:
+            in_ch = self.channels[f_size]
+            self.fuse_convs_dict[f_size] = Fuse_sft_block(in_ch, in_ch)
+    def _init_weights(self, module):
+        if isinstance(module, (nn.Linear, nn.Embedding)):
+            module.weight.data.normal_(mean=0.0, std=0.02)
+            if isinstance(module, nn.Linear) and module.bias is not None:
+                module.bias.data.zero_()
+        elif isinstance(module, nn.LayerNorm):
+            module.bias.data.zero_()
+            module.weight.data.fill_(1.0)
+    def forward(self, x, w=0, detach_16=True, code_only=False, adain=False):
+        # ################### Encoder #####################
+        enc_feat_dict = {}
+        out_list = [self.fuse_encoder_block[f_size] for f_size in self.connect_list]
+        for i, block in enumerate(self.encoder.blocks):
+            x = block(x)
+            if i in out_list:
+                enc_feat_dict[str(x.shape[-1])] = x.clone()
+        lq_feat = x
+        # ################# Transformer ###################
+        # quant_feat, codebook_loss, quant_stats = self.quantize(lq_feat)
+        pos_emb = self.position_emb.unsqueeze(1).repeat(1,x.shape[0],1)
+        # BCHW -> BC(HW) -> (HW)BC
+        feat_emb = self.feat_emb(lq_feat.flatten(2).permute(2,0,1))
+        query_emb = feat_emb
+        # Transformer encoder
+        for layer in self.ft_layers:
+            query_emb = layer(query_emb, query_pos=pos_emb)
+        # output logits
+        logits = self.idx_pred_layer(query_emb) # (hw)bn
+        logits = logits.permute(1,0,2) # (hw)bn -> b(hw)n
+        if code_only: # for training stage II
+          # logits doesn't need softmax before cross_entropy loss
+            return logits, lq_feat
+        # ################# Quantization ###################
+        # if self.training:
+        #     quant_feat = torch.einsum('btn,nc->btc', [soft_one_hot, self.quantize.embedding.weight])
+        #     # b(hw)c -> bc(hw) -> bchw
+        #     quant_feat = quant_feat.permute(0,2,1).view(lq_feat.shape)
+        # ------------
+        soft_one_hot = F.softmax(logits, dim=2)
+        _, top_idx = torch.topk(soft_one_hot, 1, dim=2)
+        quant_feat = self.quantize.get_codebook_feat(top_idx, shape=[x.shape[0],16,16,256])
+        # preserve gradients
+        # quant_feat = lq_feat + (quant_feat - lq_feat).detach()
+        if detach_16:
+            quant_feat = quant_feat.detach() # for training stage III
+        if adain:
+            quant_feat = adaptive_instance_normalization(quant_feat, lq_feat)
+        # ################## Generator ####################
+        x = quant_feat
+        fuse_list = [self.fuse_generator_block[f_size] for f_size in self.connect_list]
+        for i, block in enumerate(self.generator.blocks):
+            x = block(x)
+            if i in fuse_list: # fuse after i-th block
+                f_size = str(x.shape[-1])
+                if w>0:
+                    x = self.fuse_convs_dict[f_size](enc_feat_dict[f_size].detach(), x, w)
+        out = x
+        # logits doesn't need softmax before cross_entropy loss
+        return out, logits, lq_feat

scripts/r_archs/vqgan_arch.py ADDED Viewed

	@@ -0,0 +1,437 @@

+'''
+VQGAN code, adapted from the original created by the Unleashing Transformers authors:
+https://github.com/samb-t/unleashing-transformers/blob/master/models/vqgan.py
+'''
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import copy
+from r_basicsr.utils import get_root_logger
+from r_basicsr.utils.registry import ARCH_REGISTRY
+def normalize(in_channels):
+    return torch.nn.GroupNorm(num_groups=32, num_channels=in_channels, eps=1e-6, affine=True)
+@torch.jit.script
+def swish(x):
+    return x*torch.sigmoid(x)
+#  Define VQVAE classes
+class VectorQuantizer(nn.Module):
+    def __init__(self, codebook_size, emb_dim, beta):
+        super(VectorQuantizer, self).__init__()
+        self.codebook_size = codebook_size  # number of embeddings
+        self.emb_dim = emb_dim  # dimension of embedding
+        self.beta = beta  # commitment cost used in loss term, beta * ||z_e(x)-sg[e]||^2
+        self.embedding = nn.Embedding(self.codebook_size, self.emb_dim)
+        self.embedding.weight.data.uniform_(-1.0 / self.codebook_size, 1.0 / self.codebook_size)
+    def forward(self, z):
+        # reshape z -> (batch, height, width, channel) and flatten
+        z = z.permute(0, 2, 3, 1).contiguous()
+        z_flattened = z.view(-1, self.emb_dim)
+        # distances from z to embeddings e_j (z - e)^2 = z^2 + e^2 - 2 e * z
+        d = (z_flattened ** 2).sum(dim=1, keepdim=True) + (self.embedding.weight**2).sum(1) - \
+            2 * torch.matmul(z_flattened, self.embedding.weight.t())
+        mean_distance = torch.mean(d)
+        # find closest encodings
+        # min_encoding_indices = torch.argmin(d, dim=1).unsqueeze(1)
+        min_encoding_scores, min_encoding_indices = torch.topk(d, 1, dim=1, largest=False)
+        # [0-1], higher score, higher confidence
+        min_encoding_scores = torch.exp(-min_encoding_scores/10)
+        min_encodings = torch.zeros(min_encoding_indices.shape[0], self.codebook_size).to(z)
+        min_encodings.scatter_(1, min_encoding_indices, 1)
+        # get quantized latent vectors
+        z_q = torch.matmul(min_encodings, self.embedding.weight).view(z.shape)
+        # compute loss for embedding
+        loss = torch.mean((z_q.detach()-z)**2) + self.beta * torch.mean((z_q - z.detach()) ** 2)
+        # preserve gradients
+        z_q = z + (z_q - z).detach()
+        # perplexity
+        e_mean = torch.mean(min_encodings, dim=0)
+        perplexity = torch.exp(-torch.sum(e_mean * torch.log(e_mean + 1e-10)))
+        # reshape back to match original input shape
+        z_q = z_q.permute(0, 3, 1, 2).contiguous()
+        return z_q, loss, {
+            "perplexity": perplexity,
+            "min_encodings": min_encodings,
+            "min_encoding_indices": min_encoding_indices,
+            "min_encoding_scores": min_encoding_scores,
+            "mean_distance": mean_distance
+            }
+    def get_codebook_feat(self, indices, shape):
+        # input indices: batch*token_num -> (batch*token_num)*1
+        # shape: batch, height, width, channel
+        indices = indices.view(-1,1)
+        min_encodings = torch.zeros(indices.shape[0], self.codebook_size).to(indices)
+        min_encodings.scatter_(1, indices, 1)
+        # get quantized latent vectors
+        z_q = torch.matmul(min_encodings.float(), self.embedding.weight)
+        if shape is not None:  # reshape back to match original input shape
+            z_q = z_q.view(shape).permute(0, 3, 1, 2).contiguous()
+        return z_q
+class GumbelQuantizer(nn.Module):
+    def __init__(self, codebook_size, emb_dim, num_hiddens, straight_through=False, kl_weight=5e-4, temp_init=1.0):
+        super().__init__()
+        self.codebook_size = codebook_size  # number of embeddings
+        self.emb_dim = emb_dim  # dimension of embedding
+        self.straight_through = straight_through
+        self.temperature = temp_init
+        self.kl_weight = kl_weight
+        self.proj = nn.Conv2d(num_hiddens, codebook_size, 1)  # projects last encoder layer to quantized logits
+        self.embed = nn.Embedding(codebook_size, emb_dim)
+    def forward(self, z):
+        hard = self.straight_through if self.training else True
+        logits = self.proj(z)
+        soft_one_hot = F.gumbel_softmax(logits, tau=self.temperature, dim=1, hard=hard)
+        z_q = torch.einsum("b n h w, n d -> b d h w", soft_one_hot, self.embed.weight)
+        # + kl divergence to the prior loss
+        qy = F.softmax(logits, dim=1)
+        diff = self.kl_weight * torch.sum(qy * torch.log(qy * self.codebook_size + 1e-10), dim=1).mean()
+        min_encoding_indices = soft_one_hot.argmax(dim=1)
+        return z_q, diff, {
+            "min_encoding_indices": min_encoding_indices
+        }
+class Downsample(nn.Module):
+    def __init__(self, in_channels):
+        super().__init__()
+        self.conv = torch.nn.Conv2d(in_channels, in_channels, kernel_size=3, stride=2, padding=0)
+    def forward(self, x):
+        pad = (0, 1, 0, 1)
+        x = torch.nn.functional.pad(x, pad, mode="constant", value=0)
+        x = self.conv(x)
+        return x
+class Upsample(nn.Module):
+    def __init__(self, in_channels):
+        super().__init__()
+        self.conv = nn.Conv2d(in_channels, in_channels, kernel_size=3, stride=1, padding=1)
+    def forward(self, x):
+        x = F.interpolate(x, scale_factor=2.0, mode="nearest")
+        x = self.conv(x)
+        return x
+class ResBlock(nn.Module):
+    def __init__(self, in_channels, out_channels=None):
+        super(ResBlock, self).__init__()
+        self.in_channels = in_channels
+        self.out_channels = in_channels if out_channels is None else out_channels
+        self.norm1 = normalize(in_channels)
+        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=1, padding=1)
+        self.norm2 = normalize(out_channels)
+        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1)
+        if self.in_channels != self.out_channels:
+            self.conv_out = nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=1, padding=0)
+    def forward(self, x_in):
+        x = x_in
+        x = self.norm1(x)
+        x = swish(x)
+        x = self.conv1(x)
+        x = self.norm2(x)
+        x = swish(x)
+        x = self.conv2(x)
+        if self.in_channels != self.out_channels:
+            x_in = self.conv_out(x_in)
+        return x + x_in
+class AttnBlock(nn.Module):
+    def __init__(self, in_channels):
+        super().__init__()
+        self.in_channels = in_channels
+        self.norm = normalize(in_channels)
+        self.q = torch.nn.Conv2d(
+            in_channels,
+            in_channels,
+            kernel_size=1,
+            stride=1,
+            padding=0
+        )
+        self.k = torch.nn.Conv2d(
+            in_channels,
+            in_channels,
+            kernel_size=1,
+            stride=1,
+            padding=0
+        )
+        self.v = torch.nn.Conv2d(
+            in_channels,
+            in_channels,
+            kernel_size=1,
+            stride=1,
+            padding=0
+        )
+        self.proj_out = torch.nn.Conv2d(
+            in_channels,
+            in_channels,
+            kernel_size=1,
+            stride=1,
+            padding=0
+        )
+    def forward(self, x):
+        h_ = x
+        h_ = self.norm(h_)
+        q = self.q(h_)
+        k = self.k(h_)
+        v = self.v(h_)
+        # compute attention
+        b, c, h, w = q.shape
+        q = q.reshape(b, c, h*w)
+        q = q.permute(0, 2, 1)
+        k = k.reshape(b, c, h*w)
+        w_ = torch.bmm(q, k)
+        w_ = w_ * (int(c)**(-0.5))
+        w_ = F.softmax(w_, dim=2)
+        # attend to values
+        v = v.reshape(b, c, h*w)
+        w_ = w_.permute(0, 2, 1)
+        h_ = torch.bmm(v, w_)
+        h_ = h_.reshape(b, c, h, w)
+        h_ = self.proj_out(h_)
+        return x+h_
+class Encoder(nn.Module):
+    def __init__(self, in_channels, nf, emb_dim, ch_mult, num_res_blocks, resolution, attn_resolutions):
+        super().__init__()
+        self.nf = nf
+        self.num_resolutions = len(ch_mult)
+        self.num_res_blocks = num_res_blocks
+        self.resolution = resolution
+        self.attn_resolutions = attn_resolutions
+        curr_res = self.resolution
+        in_ch_mult = (1,)+tuple(ch_mult)
+        blocks = []
+        # initial convultion
+        blocks.append(nn.Conv2d(in_channels, nf, kernel_size=3, stride=1, padding=1))
+        # residual and downsampling blocks, with attention on smaller res (16x16)
+        for i in range(self.num_resolutions):
+            block_in_ch = nf * in_ch_mult[i]
+            block_out_ch = nf * ch_mult[i]
+            for _ in range(self.num_res_blocks):
+                blocks.append(ResBlock(block_in_ch, block_out_ch))
+                block_in_ch = block_out_ch
+                if curr_res in attn_resolutions:
+                    blocks.append(AttnBlock(block_in_ch))
+            if i != self.num_resolutions - 1:
+                blocks.append(Downsample(block_in_ch))
+                curr_res = curr_res // 2
+        # non-local attention block
+        blocks.append(ResBlock(block_in_ch, block_in_ch))
+        blocks.append(AttnBlock(block_in_ch))
+        blocks.append(ResBlock(block_in_ch, block_in_ch))
+        # normalise and convert to latent size
+        blocks.append(normalize(block_in_ch))
+        blocks.append(nn.Conv2d(block_in_ch, emb_dim, kernel_size=3, stride=1, padding=1))
+        self.blocks = nn.ModuleList(blocks)
+    def forward(self, x):
+        for block in self.blocks:
+            x = block(x)
+        return x
+class Generator(nn.Module):
+    def __init__(self, nf, emb_dim, ch_mult, res_blocks, img_size, attn_resolutions):
+        super().__init__()
+        self.nf = nf
+        self.ch_mult = ch_mult
+        self.num_resolutions = len(self.ch_mult)
+        self.num_res_blocks = res_blocks
+        self.resolution = img_size
+        self.attn_resolutions = attn_resolutions
+        self.in_channels = emb_dim
+        self.out_channels = 3
+        block_in_ch = self.nf * self.ch_mult[-1]
+        curr_res = self.resolution // 2 ** (self.num_resolutions-1)
+        blocks = []
+        # initial conv
+        blocks.append(nn.Conv2d(self.in_channels, block_in_ch, kernel_size=3, stride=1, padding=1))
+        # non-local attention block
+        blocks.append(ResBlock(block_in_ch, block_in_ch))
+        blocks.append(AttnBlock(block_in_ch))
+        blocks.append(ResBlock(block_in_ch, block_in_ch))
+        for i in reversed(range(self.num_resolutions)):
+            block_out_ch = self.nf * self.ch_mult[i]
+            for _ in range(self.num_res_blocks):
+                blocks.append(ResBlock(block_in_ch, block_out_ch))
+                block_in_ch = block_out_ch
+                if curr_res in self.attn_resolutions:
+                    blocks.append(AttnBlock(block_in_ch))
+            if i != 0:
+                blocks.append(Upsample(block_in_ch))
+                curr_res = curr_res * 2
+        blocks.append(normalize(block_in_ch))
+        blocks.append(nn.Conv2d(block_in_ch, self.out_channels, kernel_size=3, stride=1, padding=1))
+        self.blocks = nn.ModuleList(blocks)
+    def forward(self, x):
+        for block in self.blocks:
+            x = block(x)
+        return x
+@ARCH_REGISTRY.register()
+class VQAutoEncoder(nn.Module):
+    def __init__(self, img_size, nf, ch_mult, quantizer="nearest", res_blocks=2, attn_resolutions=[16], codebook_size=1024, emb_dim=256,
+                beta=0.25, gumbel_straight_through=False, gumbel_kl_weight=1e-8, model_path=None):
+        super().__init__()
+        logger = get_root_logger()
+        self.in_channels = 3
+        self.nf = nf
+        self.n_blocks = res_blocks
+        self.codebook_size = codebook_size
+        self.embed_dim = emb_dim
+        self.ch_mult = ch_mult
+        self.resolution = img_size
+        self.attn_resolutions = attn_resolutions
+        self.quantizer_type = quantizer
+        self.encoder = Encoder(
+            self.in_channels,
+            self.nf,
+            self.embed_dim,
+            self.ch_mult,
+            self.n_blocks,
+            self.resolution,
+            self.attn_resolutions
+        )
+        if self.quantizer_type == "nearest":
+            self.beta = beta #0.25
+            self.quantize = VectorQuantizer(self.codebook_size, self.embed_dim, self.beta)
+        elif self.quantizer_type == "gumbel":
+            self.gumbel_num_hiddens = emb_dim
+            self.straight_through = gumbel_straight_through
+            self.kl_weight = gumbel_kl_weight
+            self.quantize = GumbelQuantizer(
+                self.codebook_size,
+                self.embed_dim,
+                self.gumbel_num_hiddens,
+                self.straight_through,
+                self.kl_weight
+            )
+        self.generator = Generator(
+            self.nf,
+            self.embed_dim,
+            self.ch_mult,
+            self.n_blocks,
+            self.resolution,
+            self.attn_resolutions
+        )
+        if model_path is not None:
+            chkpt = torch.load(model_path, map_location='cpu')
+            if 'params_ema' in chkpt:
+                self.load_state_dict(torch.load(model_path, map_location='cpu')['params_ema'])
+                logger.info(f'vqgan is loaded from: {model_path} [params_ema]')
+            elif 'params' in chkpt:
+                self.load_state_dict(torch.load(model_path, map_location='cpu')['params'])
+                logger.info(f'vqgan is loaded from: {model_path} [params]')
+            else:
+                raise ValueError(f'Wrong params!')
+    def forward(self, x):
+        x = self.encoder(x)
+        quant, codebook_loss, quant_stats = self.quantize(x)
+        x = self.generator(quant)
+        return x, codebook_loss, quant_stats
+# patch based discriminator
+@ARCH_REGISTRY.register()
+class VQGANDiscriminator(nn.Module):
+    def __init__(self, nc=3, ndf=64, n_layers=4, model_path=None):
+        super().__init__()
+        layers = [nn.Conv2d(nc, ndf, kernel_size=4, stride=2, padding=1), nn.LeakyReLU(0.2, True)]
+        ndf_mult = 1
+        ndf_mult_prev = 1
+        for n in range(1, n_layers):  # gradually increase the number of filters
+            ndf_mult_prev = ndf_mult
+            ndf_mult = min(2 ** n, 8)
+            layers += [
+                nn.Conv2d(ndf * ndf_mult_prev, ndf * ndf_mult, kernel_size=4, stride=2, padding=1, bias=False),
+                nn.BatchNorm2d(ndf * ndf_mult),
+                nn.LeakyReLU(0.2, True)
+            ]
+        ndf_mult_prev = ndf_mult
+        ndf_mult = min(2 ** n_layers, 8)
+        layers += [
+            nn.Conv2d(ndf * ndf_mult_prev, ndf * ndf_mult, kernel_size=4, stride=1, padding=1, bias=False),
+            nn.BatchNorm2d(ndf * ndf_mult),
+            nn.LeakyReLU(0.2, True)
+        ]
+        layers += [
+            nn.Conv2d(ndf * ndf_mult, 1, kernel_size=4, stride=1, padding=1)]  # output 1 channel prediction map
+        self.main = nn.Sequential(*layers)
+        if model_path is not None:
+            chkpt = torch.load(model_path, map_location='cpu')
+            if 'params_d' in chkpt:
+                self.load_state_dict(torch.load(model_path, map_location='cpu')['params_d'])
+            elif 'params' in chkpt:
+                self.load_state_dict(torch.load(model_path, map_location='cpu')['params'])
+            else:
+                raise ValueError(f'Wrong params!')
+    def forward(self, x):
+        return self.main(x)

scripts/reactor_faceswap.py ADDED Viewed

	@@ -0,0 +1,126 @@

+import os, glob
+from PIL import Image
+import modules.scripts as scripts
+# from modules.upscaler import Upscaler, UpscalerData
+from modules import scripts, scripts_postprocessing
+from modules.processing import (
+    StableDiffusionProcessing,
+    StableDiffusionProcessingImg2Img,
+)
+from modules.shared import state
+from scripts.reactor_logger import logger
+from scripts.reactor_swapper import swap_face, get_current_faces_model, analyze_faces, half_det_size
+import folder_paths
+import comfy.model_management as model_management
+def get_models():
+    models_path = os.path.join(folder_paths.models_dir,"insightface/*")
+    models = glob.glob(models_path)
+    models = [x for x in models if x.endswith(".onnx") or x.endswith(".pth")]
+    return models
+class FaceSwapScript(scripts.Script):
+    def process(
+        self,
+        p: StableDiffusionProcessing,
+        img,
+        enable,
+        source_faces_index,
+        faces_index,
+        model,
+        swap_in_source,
+        swap_in_generated,
+        gender_source,
+        gender_target,
+        face_model,
+    ):
+        self.enable = enable
+        if self.enable:
+            self.source = img
+            self.swap_in_generated = swap_in_generated
+            self.gender_source = gender_source
+            self.gender_target = gender_target
+            self.model = model
+            self.face_model = face_model
+            self.source_faces_index = [
+                int(x) for x in source_faces_index.strip(",").split(",") if x.isnumeric()
+            ]
+            self.faces_index = [
+                int(x) for x in faces_index.strip(",").split(",") if x.isnumeric()
+            ]
+            if len(self.source_faces_index) == 0:
+                self.source_faces_index = [0]
+            if len(self.faces_index) == 0:
+                self.faces_index = [0]
+            if self.gender_source is None or self.gender_source == "no":
+                self.gender_source = 0
+            elif self.gender_source  == "female":
+                self.gender_source = 1
+            elif self.gender_source  == "male":
+                self.gender_source = 2
+            if self.gender_target is None or self.gender_target == "no":
+                self.gender_target = 0
+            elif self.gender_target  == "female":
+                self.gender_target = 1
+            elif self.gender_target  == "male":
+                self.gender_target = 2
+            # if self.source is not None:
+            if isinstance(p, StableDiffusionProcessingImg2Img) and swap_in_source:
+                logger.status(f"Working: source face index %s, target face index %s", self.source_faces_index, self.faces_index)
+                for i in range(len(p.init_images)):
+                    if state.interrupted or model_management.processing_interrupted():
+                        logger.status("Interrupted by User")
+                        break
+                    if len(p.init_images) > 1:
+                        logger.status(f"Swap in %s", i)
+                    result = swap_face(
+                        self.source,
+                        p.init_images[i],
+                        source_faces_index=self.source_faces_index,
+                        faces_index=self.faces_index,
+                        model=self.model,
+                        gender_source=self.gender_source,
+                        gender_target=self.gender_target,
+                        face_model=self.face_model,
+                    )
+                    p.init_images[i] = result
+                logger.status("--Done!--")
+            # else:
+            #     logger.error(f"Please provide a source face")
+    def postprocess_batch(self, p, *args, **kwargs):
+        if self.enable:
+            images = kwargs["images"]
+    def postprocess_image(self, p, script_pp: scripts.PostprocessImageArgs, *args):
+        if self.enable and self.swap_in_generated:
+            if self.source is not None:
+                logger.status(f"Working: source face index %s, target face index %s", self.source_faces_index, self.faces_index)
+                image: Image.Image = script_pp.image
+                result = swap_face(
+                    self.source,
+                    image,
+                    source_faces_index=self.source_faces_index,
+                    faces_index=self.faces_index,
+                    model=self.model,
+                    upscale_options=self.upscale_options,
+                    gender_source=self.gender_source,
+                    gender_target=self.gender_target,
+                )
+                try:
+                    pp = scripts_postprocessing.PostprocessedImage(result)
+                    pp.info = {}
+                    p.extra_generation_params.update(pp.info)
+                    script_pp.image = pp.image
+                except:
+                    logger.error(f"Cannot create a result image")

scripts/reactor_logger.py ADDED Viewed

	@@ -0,0 +1,47 @@

+import logging
+import copy
+import sys
+from modules import shared
+from reactor_utils import addLoggingLevel
+class ColoredFormatter(logging.Formatter):
+    COLORS = {
+        "DEBUG": "\033[0;36m",  # CYAN
+        "STATUS": "\033[38;5;173m",  # Calm ORANGE
+        "INFO": "\033[0;32m",  # GREEN
+        "WARNING": "\033[0;33m",  # YELLOW
+        "ERROR": "\033[0;31m",  # RED
+        "CRITICAL": "\033[0;37;41m",  # WHITE ON RED
+        "RESET": "\033[0m",  # RESET COLOR
+    }
+    def format(self, record):
+        colored_record = copy.copy(record)
+        levelname = colored_record.levelname
+        seq = self.COLORS.get(levelname, self.COLORS["RESET"])
+        colored_record.levelname = f"{seq}{levelname}{self.COLORS['RESET']}"
+        return super().format(colored_record)
+# Create a new logger
+logger = logging.getLogger("ReActor")
+logger.propagate = False
+# Add Custom Level
+# logging.addLevelName(logging.INFO, "STATUS")
+addLoggingLevel("STATUS", logging.INFO + 5)
+# Add handler if we don't have one.
+if not logger.handlers:
+    handler = logging.StreamHandler(sys.stdout)
+    handler.setFormatter(
+        ColoredFormatter("[%(name)s] %(asctime)s - %(levelname)s - %(message)s",datefmt="%H:%M:%S")
+    )
+    logger.addHandler(handler)
+# Configure logger
+loglevel_string = getattr(shared.cmd_opts, "reactor_loglevel", "INFO")
+loglevel = getattr(logging, loglevel_string.upper(), "info")
+logger.setLevel(loglevel)

scripts/reactor_swapper.py ADDED Viewed

	@@ -0,0 +1,301 @@

+import copy
+import os
+import shutil
+from dataclasses import dataclass
+from typing import List, Union
+import cv2
+import numpy as np
+from PIL import Image
+import insightface
+from insightface.app.common import Face
+try:
+    import torch.cuda as cuda
+except:
+    cuda = None
+from scripts.reactor_logger import logger
+from reactor_utils import move_path, get_image_md5hash
+import folder_paths
+import warnings
+np.warnings = warnings
+np.warnings.filterwarnings('ignore')
+if cuda is not None:
+    if cuda.is_available():
+        providers = ["CUDAExecutionProvider"]
+    else:
+        providers = ["CPUExecutionProvider"]
+else:
+    providers = ["CPUExecutionProvider"]
+models_path_old = os.path.join(os.path.dirname(os.path.dirname(__file__)), "models")
+insightface_path_old = os.path.join(models_path_old, "insightface")
+insightface_models_path_old = os.path.join(insightface_path_old, "models")
+models_path = folder_paths.models_dir
+insightface_path = os.path.join(models_path, "insightface")
+insightface_models_path = os.path.join(insightface_path, "models")
+if os.path.exists(models_path_old):
+    move_path(insightface_models_path_old, insightface_models_path)
+    move_path(insightface_path_old, insightface_path)
+    move_path(models_path_old, models_path)
+if os.path.exists(insightface_path) and os.path.exists(insightface_path_old):
+    shutil.rmtree(insightface_path_old)
+    shutil.rmtree(models_path_old)
+FS_MODEL = None
+CURRENT_FS_MODEL_PATH = None
+ANALYSIS_MODEL = None
+SOURCE_FACES = None
+SOURCE_IMAGE_HASH = None
+TARGET_FACES = None
+TARGET_IMAGE_HASH = None
+def get_current_faces_model():
+    global SOURCE_FACES
+    return SOURCE_FACES
+def getAnalysisModel():
+    global ANALYSIS_MODEL
+    if ANALYSIS_MODEL is None:
+        ANALYSIS_MODEL = insightface.app.FaceAnalysis(
+            name="buffalo_l", providers=providers, root=insightface_path
+        )
+    return ANALYSIS_MODEL
+def getFaceSwapModel(model_path: str):
+    global FS_MODEL
+    global CURRENT_FS_MODEL_PATH
+    if CURRENT_FS_MODEL_PATH is None or CURRENT_FS_MODEL_PATH != model_path:
+        CURRENT_FS_MODEL_PATH = model_path
+        FS_MODEL = insightface.model_zoo.get_model(model_path, providers=providers)
+    return FS_MODEL
+def get_face_gender(
+        face,
+        face_index,
+        gender_condition,
+        operated: str
+):
+    gender = [
+        x.sex
+        for x in face
+    ]
+    gender.reverse()
+    # If index is outside of bounds, return None, avoid exception
+    if face_index >= len(gender):
+        logger.status("Requested face index (%s) is out of bounds (max available index is %s)", face_index, len(gender))
+        return None, 0
+    face_gender = gender[face_index]
+    logger.status("%s Face %s: Detected Gender -%s-", operated, face_index, face_gender)
+    if (gender_condition == 1 and face_gender == "F") or (gender_condition == 2 and face_gender == "M"):
+        logger.status("OK - Detected Gender matches Condition")
+        try:
+            return sorted(face, key=lambda x: x.bbox[0])[face_index], 0
+        except IndexError:
+            return None, 0
+    else:
+        logger.status("WRONG - Detected Gender doesn't match Condition")
+        return sorted(face, key=lambda x: x.bbox[0])[face_index], 1
+def half_det_size(det_size):
+    logger.status("Trying to halve 'det_size' parameter")
+    return (det_size[0] // 2, det_size[1] // 2)
+def analyze_faces(img_data: np.ndarray, det_size=(640, 640)):
+    face_analyser = copy.deepcopy(getAnalysisModel())
+    face_analyser.prepare(ctx_id=0, det_size=det_size)
+    return face_analyser.get(img_data)
+def get_face_single(img_data: np.ndarray, face, face_index=0, det_size=(640, 640), gender_source=0, gender_target=0):
+    buffalo_path = os.path.join(insightface_models_path, "buffalo_l.zip")
+    if os.path.exists(buffalo_path):
+        os.remove(buffalo_path)
+    if gender_source != 0:
+        if len(face) == 0 and det_size[0] > 320 and det_size[1] > 320:
+            det_size_half = half_det_size(det_size)
+            return get_face_single(img_data, analyze_faces(img_data, det_size_half), face_index, det_size_half, gender_source, gender_target)
+        return get_face_gender(face,face_index,gender_source,"Source")
+    if gender_target != 0:
+        if len(face) == 0 and det_size[0] > 320 and det_size[1] > 320:
+            det_size_half = half_det_size(det_size)
+            return get_face_single(img_data, analyze_faces(img_data, det_size_half), face_index, det_size_half, gender_source, gender_target)
+        return get_face_gender(face,face_index,gender_target,"Target")
+    if len(face) == 0 and det_size[0] > 320 and det_size[1] > 320:
+        det_size_half = half_det_size(det_size)
+        return get_face_single(img_data, analyze_faces(img_data, det_size_half), face_index, det_size_half, gender_source, gender_target)
+    try:
+        return sorted(face, key=lambda x: x.bbox[0])[face_index], 0
+    except IndexError:
+        return None, 0
+def swap_face(
+    source_img: Union[Image.Image, None],
+    target_img: Image.Image,
+    model: Union[str, None] = None,
+    source_faces_index: List[int] = [0],
+    faces_index: List[int] = [0],
+    gender_source: int = 0,
+    gender_target: int = 0,
+    face_model: Union[Face, None] = None,
+):
+    global SOURCE_FACES, SOURCE_IMAGE_HASH, TARGET_FACES, TARGET_IMAGE_HASH
+    result_image = target_img
+    if model is not None:
+        if isinstance(source_img, str):  # source_img is a base64 string
+            import base64, io
+            if 'base64,' in source_img:  # check if the base64 string has a data URL scheme
+                # split the base64 string to get the actual base64 encoded image data
+                base64_data = source_img.split('base64,')[-1]
+                # decode base64 string to bytes
+                img_bytes = base64.b64decode(base64_data)
+            else:
+                # if no data URL scheme, just decode
+                img_bytes = base64.b64decode(source_img)
+            source_img = Image.open(io.BytesIO(img_bytes))
+        target_img = cv2.cvtColor(np.array(target_img), cv2.COLOR_RGB2BGR)
+        if source_img is not None:
+            source_img = cv2.cvtColor(np.array(source_img), cv2.COLOR_RGB2BGR)
+            source_image_md5hash = get_image_md5hash(source_img)
+            if SOURCE_IMAGE_HASH is None:
+                SOURCE_IMAGE_HASH = source_image_md5hash
+                source_image_same = False
+            else:
+                source_image_same = True if SOURCE_IMAGE_HASH == source_image_md5hash else False
+                if not source_image_same:
+                    SOURCE_IMAGE_HASH = source_image_md5hash
+            logger.info("Source Image MD5 Hash = %s", SOURCE_IMAGE_HASH)
+            logger.info("Source Image the Same? %s", source_image_same)
+            if SOURCE_FACES is None or not source_image_same:
+                logger.status("Analyzing Source Image...")
+                source_faces = analyze_faces(source_img)
+                SOURCE_FACES = source_faces
+            elif source_image_same:
+                logger.status("Using Hashed Source Face(s) Model...")
+                source_faces = SOURCE_FACES
+        elif face_model is not None:
+            source_faces_index = [0]
+            logger.status("Using Loaded Source Face Model...")
+            source_face_model = [face_model]
+            source_faces = source_face_model
+        else:
+            logger.error("Cannot detect any Source")
+        if source_faces is not None:
+            target_image_md5hash = get_image_md5hash(target_img)
+            if TARGET_IMAGE_HASH is None:
+                TARGET_IMAGE_HASH = target_image_md5hash
+                target_image_same = False
+            else:
+                target_image_same = True if TARGET_IMAGE_HASH == target_image_md5hash else False
+                if not target_image_same:
+                    TARGET_IMAGE_HASH = target_image_md5hash
+            logger.info("Target Image MD5 Hash = %s", TARGET_IMAGE_HASH)
+            logger.info("Target Image the Same? %s", target_image_same)
+            if TARGET_FACES is None or not target_image_same:
+                logger.status("Analyzing Target Image...")
+                target_faces = analyze_faces(target_img)
+                TARGET_FACES = target_faces
+            elif target_image_same:
+                logger.status("Using Hashed Target Face(s) Model...")
+                target_faces = TARGET_FACES
+            # No use in trying to swap faces if no faces are found, enhancement
+            if len(target_faces) == 0:
+                logger.status("Cannot detect any Target, skipping swapping...")
+                return result_image
+            if source_img is not None:
+                # separated management of wrong_gender between source and target, enhancement
+                source_face, src_wrong_gender = get_face_single(source_img, source_faces, face_index=source_faces_index[0], gender_source=gender_source)
+            else:
+                source_face = sorted(source_faces, key=lambda x: x.bbox[0])[source_faces_index[0]]
+                src_wrong_gender = 0
+            if len(source_faces_index) != 0 and len(source_faces_index) != 1 and len(source_faces_index) != len(faces_index):
+                logger.status(f'Source Faces must have no entries (default=0), one entry, or same number of entries as target faces.')
+            elif source_face is not None:
+                result = target_img
+                model_path = model_path = os.path.join(insightface_path, model)
+                face_swapper = getFaceSwapModel(model_path)
+                source_face_idx = 0
+                for face_num in faces_index:
+                    # No use in trying to swap faces if no further faces are found, enhancement
+                    if face_num >= len(target_faces):
+                        logger.status("Checked all existing target faces, skipping swapping...")
+                        break
+                    if len(source_faces_index) > 1 and source_face_idx > 0:
+                        source_face, src_wrong_gender = get_face_single(source_img, source_faces, face_index=source_faces_index[source_face_idx], gender_source=gender_source)
+                    source_face_idx += 1
+                    if source_face is not None and src_wrong_gender == 0:
+                        target_face, wrong_gender = get_face_single(target_img, target_faces, face_index=face_num, gender_target=gender_target)
+                        if target_face is not None and wrong_gender == 0:
+                            logger.status(f"Swapping...")
+                            result = face_swapper.get(result, target_face, source_face)
+                        elif wrong_gender == 1:
+                            wrong_gender = 0
+                            # Keep searching for other faces if wrong gender is detected, enhancement
+                            #if source_face_idx == len(source_faces_index):
+                            #    result_image = Image.fromarray(cv2.cvtColor(result, cv2.COLOR_BGR2RGB))
+                            #    return result_image
+                            logger.status("Wrong target gender detected")
+                            continue
+                        else:
+                            logger.status(f"No target face found for {face_num}")
+                    elif src_wrong_gender == 1:
+                        src_wrong_gender = 0
+                        # Keep searching for other faces if wrong gender is detected, enhancement
+                        #if source_face_idx == len(source_faces_index):
+                        #    result_image = Image.fromarray(cv2.cvtColor(result, cv2.COLOR_BGR2RGB))
+                        #    return result_image
+                        logger.status("Wrong source gender detected")
+                        continue
+                    else:
+                        logger.status(f"No source face found for face number {source_face_idx}.")
+                result_image = Image.fromarray(cv2.cvtColor(result, cv2.COLOR_BGR2RGB))
+            else:
+                logger.status("No source face(s) in the provided Index")
+        else:
+            logger.status("No source face(s) found")
+    return result_image

scripts/reactor_version.py ADDED Viewed

	@@ -0,0 +1,13 @@

+app_title = "ReActor Node for ComfyUI"
+version_flag = "v0.4.1-b12"
+COLORS = {
+    "CYAN": "\033[0;36m",  # CYAN
+    "ORANGE": "\033[38;5;173m",  # Calm ORANGE
+    "GREEN": "\033[0;32m",  # GREEN
+    "YELLOW": "\033[0;33m",  # YELLOW
+    "RED": "\033[0;91m",  # RED
+    "0": "\033[0m",  # RESET COLOR
+}
+print(f"{COLORS['YELLOW']}[ReActor]{COLORS['0']} - {COLORS['ORANGE']}STATUS{COLORS['0']} - {COLORS['GREEN']}Running {version_flag} in ComfyUI{COLORS['0']}")