Spaces:

power2
/

JoJoGan-powerhow2

Runtime error

App Files Files Community

Sanket commited on Sep 1, 2022

Commit

3d37b6e

•

1 Parent(s): ff4715d

.

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +2 -6
LICENSE +21 -0
README.md +32 -6
app.py +204 -0
e4e/.gitignore +129 -0
e4e/criteria/__init__.py +0 -0
e4e/criteria/id_loss.py +47 -0
e4e/criteria/lpips/__init__.py +0 -0
e4e/criteria/lpips/lpips.py +35 -0
e4e/criteria/lpips/networks.py +96 -0
e4e/criteria/lpips/utils.py +30 -0
e4e/criteria/moco_loss.py +71 -0
e4e/criteria/w_norm.py +14 -0
e4e/datasets/__init__.py +0 -0
e4e/datasets/gt_res_dataset.py +32 -0
e4e/datasets/images_dataset.py +33 -0
e4e/datasets/inference_dataset.py +25 -0
e4e/editings/ganspace.py +22 -0
e4e/editings/ganspace_pca/cars_pca.pt +3 -0
e4e/editings/ganspace_pca/ffhq_pca.pt +3 -0
e4e/editings/interfacegan_directions/age.pt +3 -0
e4e/editings/interfacegan_directions/pose.pt +3 -0
e4e/editings/interfacegan_directions/smile.pt +3 -0
e4e/editings/latent_editor.py +45 -0
e4e/editings/sefa.py +46 -0
e4e/environment/e4e_env.yaml +73 -0
e4e/metrics/LEC.py +134 -0
e4e/models/__init__.py +0 -0
e4e/models/discriminator.py +20 -0
e4e/models/encoders/__init__.py +0 -0
e4e/models/encoders/helpers.py +140 -0
e4e/models/encoders/model_irse.py +84 -0
e4e/models/encoders/psp_encoders.py +200 -0
e4e/models/latent_codes_pool.py +55 -0
e4e/models/psp.py +99 -0
e4e/models/stylegan2/__init__.py +0 -0
e4e/models/stylegan2/model.py +678 -0
e4e/models/stylegan2/op/__init__.py +0 -0
e4e/models/stylegan2/op/fused_act.py +85 -0
e4e/models/stylegan2/op/fused_bias_act.cpp +21 -0
e4e/models/stylegan2/op/fused_bias_act_kernel.cu +99 -0
e4e/models/stylegan2/op/upfirdn2d.cpp +23 -0
e4e/models/stylegan2/op/upfirdn2d.py +184 -0
e4e/models/stylegan2/op/upfirdn2d_kernel.cu +272 -0
e4e/notebooks/images/car_img.jpg +0 -0
e4e/notebooks/images/church_img.jpg +0 -0
e4e/notebooks/images/horse_img.jpg +0 -0
e4e/notebooks/images/input_img.jpg +0 -0
e4e/options/__init__.py +0 -0
e4e/options/train_options.py +84 -0

.gitattributes CHANGED Viewed

@@ -1,6 +1,7 @@
 *.7z filter=lfs diff=lfs merge=lfs -text
 *.arrow filter=lfs diff=lfs merge=lfs -text
 *.bin filter=lfs diff=lfs merge=lfs -text
 *.bz2 filter=lfs diff=lfs merge=lfs -text
 *.ftz filter=lfs diff=lfs merge=lfs -text
 *.gz filter=lfs diff=lfs merge=lfs -text
@@ -9,13 +10,9 @@
 *.lfs.* filter=lfs diff=lfs merge=lfs -text
 *.model filter=lfs diff=lfs merge=lfs -text
 *.msgpack filter=lfs diff=lfs merge=lfs -text
-*.npy filter=lfs diff=lfs merge=lfs -text
-*.npz filter=lfs diff=lfs merge=lfs -text
 *.onnx filter=lfs diff=lfs merge=lfs -text
 *.ot filter=lfs diff=lfs merge=lfs -text
 *.parquet filter=lfs diff=lfs merge=lfs -text
-*.pickle filter=lfs diff=lfs merge=lfs -text
-*.pkl filter=lfs diff=lfs merge=lfs -text
 *.pb filter=lfs diff=lfs merge=lfs -text
 *.pt filter=lfs diff=lfs merge=lfs -text
 *.pth filter=lfs diff=lfs merge=lfs -text
@@ -24,8 +21,7 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.tar.* filter=lfs diff=lfs merge=lfs -text
 *.tflite filter=lfs diff=lfs merge=lfs -text
 *.tgz filter=lfs diff=lfs merge=lfs -text
-*.wasm filter=lfs diff=lfs merge=lfs -text
 *.xz filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
-*.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.7z filter=lfs diff=lfs merge=lfs -text
 *.arrow filter=lfs diff=lfs merge=lfs -text
 *.bin filter=lfs diff=lfs merge=lfs -text
+*.bin.* filter=lfs diff=lfs merge=lfs -text
 *.bz2 filter=lfs diff=lfs merge=lfs -text
 *.ftz filter=lfs diff=lfs merge=lfs -text
 *.gz filter=lfs diff=lfs merge=lfs -text
 *.lfs.* filter=lfs diff=lfs merge=lfs -text
 *.model filter=lfs diff=lfs merge=lfs -text
 *.msgpack filter=lfs diff=lfs merge=lfs -text
 *.onnx filter=lfs diff=lfs merge=lfs -text
 *.ot filter=lfs diff=lfs merge=lfs -text
 *.parquet filter=lfs diff=lfs merge=lfs -text
 *.pb filter=lfs diff=lfs merge=lfs -text
 *.pt filter=lfs diff=lfs merge=lfs -text
 *.pth filter=lfs diff=lfs merge=lfs -text
 *.tar.* filter=lfs diff=lfs merge=lfs -text
 *.tflite filter=lfs diff=lfs merge=lfs -text
 *.tgz filter=lfs diff=lfs merge=lfs -text
 *.xz filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
+*.zstandard filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

LICENSE ADDED Viewed

	@@ -0,0 +1,21 @@

+MIT License
+Copyright (c) 2021 Min Jin Chong
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

README.md CHANGED Viewed

@@ -1,12 +1,38 @@
 ---
-title: JoJoGan Powerhow2
-emoji: 📚
-colorFrom: red
-colorTo: blue
 sdk: gradio
-sdk_version: 3.2
 app_file: app.py
 pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: JoJoGAN
+emoji: 🌍
+colorFrom: green
+colorTo: yellow
 sdk: gradio
+sdk_version: 3.1.1
 app_file: app.py
 pinned: false
 ---
+# Configuration
+`title`: _string_
+Display title for the Space
+`emoji`: _string_
+Space emoji (emoji-only character allowed)
+`colorFrom`: _string_
+Color for Thumbnail gradient (red, yellow, green, blue, indigo, purple, pink, gray)
+`colorTo`: _string_
+Color for Thumbnail gradient (red, yellow, green, blue, indigo, purple, pink, gray)
+`sdk`: _string_
+Can be either `gradio` or `streamlit`
+`sdk_version` : _string_
+Only applicable for `streamlit` SDK.
+See [doc](https://hf.co/docs/hub/spaces) for more info on supported versions.
+`app_file`: _string_
+Path to your main application file (which contains either `gradio` or `streamlit` Python code).
+Path is relative to the root of the repository.
+`pinned`: _boolean_
+Whether the Space stays on top of your list.

app.py ADDED Viewed

	@@ -0,0 +1,204 @@

+import os
+from PIL import Image
+import torch
+import gradio as gr
+import torch
+torch.backends.cudnn.benchmark = True
+from torchvision import transforms, utils
+from util import *
+from PIL import Image
+import math
+import random
+import numpy as np
+from torch import nn, autograd, optim
+from torch.nn import functional as F
+from tqdm import tqdm
+import lpips
+from model import *
+#from e4e_projection import projection as e4e_projection
+from copy import deepcopy
+import imageio
+import os
+import sys
+import numpy as np
+from PIL import Image
+import torch
+import torchvision.transforms as transforms
+from argparse import Namespace
+from e4e.models.psp import pSp
+from util import *
+from huggingface_hub import hf_hub_download
+device= 'cpu'
+model_path_e = hf_hub_download(repo_id="akhaliq/JoJoGAN_e4e_ffhq_encode", filename="e4e_ffhq_encode.pt")
+ckpt = torch.load(model_path_e, map_location='cpu')
+opts = ckpt['opts']
+opts['checkpoint_path'] = model_path_e
+opts= Namespace(**opts)
+net = pSp(opts, device).eval().to(device)
+@ torch.no_grad()
+def projection(img, name, device='cuda'):
+    transform = transforms.Compose(
+        [
+            transforms.Resize(256),
+            transforms.CenterCrop(256),
+            transforms.ToTensor(),
+            transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5]),
+        ]
+    )
+    img = transform(img).unsqueeze(0).to(device)
+    images, w_plus = net(img, randomize_noise=False, return_latents=True)
+    result_file = {}
+    result_file['latent'] = w_plus[0]
+    torch.save(result_file, name)
+    return w_plus[0]
+device = 'cpu'
+latent_dim = 512
+model_path_s = hf_hub_download(repo_id="akhaliq/jojogan-stylegan2-ffhq-config-f", filename="stylegan2-ffhq-config-f.pt")
+original_generator = Generator(1024, latent_dim, 8, 2).to(device)
+ckpt = torch.load(model_path_s, map_location=lambda storage, loc: storage)
+original_generator.load_state_dict(ckpt["g_ema"], strict=False)
+mean_latent = original_generator.mean_latent(10000)
+generatorjojo = deepcopy(original_generator)
+generatordisney = deepcopy(original_generator)
+generatorjinx = deepcopy(original_generator)
+generatorcaitlyn = deepcopy(original_generator)
+generatoryasuho = deepcopy(original_generator)
+generatorarcanemulti = deepcopy(original_generator)
+generatorart = deepcopy(original_generator)
+generatorspider = deepcopy(original_generator)
+generatorsketch = deepcopy(original_generator)
+transform = transforms.Compose(
+    [
+        transforms.Resize((1024, 1024)),
+        transforms.ToTensor(),
+        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
+    ]
+)
+modeljojo = hf_hub_download(repo_id="akhaliq/JoJoGAN-jojo", filename="jojo_preserve_color.pt")
+ckptjojo = torch.load(modeljojo, map_location=lambda storage, loc: storage)
+generatorjojo.load_state_dict(ckptjojo["g"], strict=False)
+modeldisney = hf_hub_download(repo_id="akhaliq/jojogan-disney", filename="disney_preserve_color.pt")
+ckptdisney = torch.load(modeldisney, map_location=lambda storage, loc: storage)
+generatordisney.load_state_dict(ckptdisney["g"], strict=False)
+modeljinx = hf_hub_download(repo_id="akhaliq/jojo-gan-jinx", filename="arcane_jinx_preserve_color.pt")
+ckptjinx = torch.load(modeljinx, map_location=lambda storage, loc: storage)
+generatorjinx.load_state_dict(ckptjinx["g"], strict=False)
+modelcaitlyn = hf_hub_download(repo_id="akhaliq/jojogan-arcane", filename="arcane_caitlyn_preserve_color.pt")
+ckptcaitlyn = torch.load(modelcaitlyn, map_location=lambda storage, loc: storage)
+generatorcaitlyn.load_state_dict(ckptcaitlyn["g"], strict=False)
+modelyasuho = hf_hub_download(repo_id="akhaliq/JoJoGAN-jojo", filename="jojo_yasuho_preserve_color.pt")
+ckptyasuho = torch.load(modelyasuho, map_location=lambda storage, loc: storage)
+generatoryasuho.load_state_dict(ckptyasuho["g"], strict=False)
+model_arcane_multi = hf_hub_download(repo_id="akhaliq/jojogan-arcane", filename="arcane_multi_preserve_color.pt")
+ckptarcanemulti = torch.load(model_arcane_multi, map_location=lambda storage, loc: storage)
+generatorarcanemulti.load_state_dict(ckptarcanemulti["g"], strict=False)
+modelart = hf_hub_download(repo_id="akhaliq/jojo-gan-art", filename="art.pt")
+ckptart = torch.load(modelart, map_location=lambda storage, loc: storage)
+generatorart.load_state_dict(ckptart["g"], strict=False)
+modelSpiderverse = hf_hub_download(repo_id="akhaliq/jojo-gan-spiderverse", filename="Spiderverse-face-500iters-8face.pt")
+ckptspider = torch.load(modelSpiderverse, map_location=lambda storage, loc: storage)
+generatorspider.load_state_dict(ckptspider["g"], strict=False)
+modelSketch = hf_hub_download(repo_id="akhaliq/jojogan-sketch", filename="sketch_multi.pt")
+ckptsketch = torch.load(modelSketch, map_location=lambda storage, loc: storage)
+generatorsketch.load_state_dict(ckptsketch["g"], strict=False)
+def inference(img, model):
+    img.save('out.jpg')
+    aligned_face = align_face('out.jpg')
+    my_w = projection(aligned_face, "test.pt", device).unsqueeze(0)
+    if model == 'JoJo':
+        with torch.no_grad():
+            my_sample = generatorjojo(my_w, input_is_latent=True)
+    elif model == 'Disney':
+        with torch.no_grad():
+            my_sample = generatordisney(my_w, input_is_latent=True)
+    elif model == 'Jinx':
+        with torch.no_grad():
+            my_sample = generatorjinx(my_w, input_is_latent=True)
+    elif model == 'Caitlyn':
+        with torch.no_grad():
+            my_sample = generatorcaitlyn(my_w, input_is_latent=True)
+    elif model == 'Yasuho':
+        with torch.no_grad():
+            my_sample = generatoryasuho(my_w, input_is_latent=True)
+    elif model == 'Arcane Multi':
+        with torch.no_grad():
+            my_sample = generatorarcanemulti(my_w, input_is_latent=True)
+    elif model == 'Art':
+        with torch.no_grad():
+            my_sample = generatorart(my_w, input_is_latent=True)
+    elif model == 'Spider-Verse':
+        with torch.no_grad():
+            my_sample = generatorspider(my_w, input_is_latent=True)
+    else:
+        with torch.no_grad():
+            my_sample = generatorsketch(my_w, input_is_latent=True)
+    npimage = my_sample[0].permute(1, 2, 0).detach().numpy()
+    imageio.imwrite('filename.jpeg', npimage)
+    return 'filename.jpeg'
+title = "JoJoGAN"
+description = "Gradio Demo for JoJoGAN: One Shot Face Stylization. To use it, simply upload your image, or click one of the examples to load them. Read more at the links below."
+article = "<p style='text-align: center'><a href='https://arxiv.org/abs/2112.11641' target='_blank'>JoJoGAN: One Shot Face Stylization</a>| <a href='https://github.com/mchong6/JoJoGAN' target='_blank'>Github Repo Pytorch</a></p> <center><img src='https://visitor-badge.glitch.me/badge?page_id=akhaliq_jojogan' alt='visitor badge'></center>"
+examples=[['mona.png','Jinx']]
+gr.Interface(inference, [gr.inputs.Image(type="pil"),gr.inputs.Dropdown(choices=['JoJo', 'Disney','Jinx','Caitlyn','Yasuho','Arcane Multi','Art','Spider-Verse','Sketch'], type="value", default='JoJo', label="Model")], gr.outputs.Image(type="file"),title=title,description=description,article=article,allow_flagging=False,examples=examples,allow_screenshot=False).launch()

e4e/.gitignore ADDED Viewed

	@@ -0,0 +1,129 @@

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+pip-wheel-metadata/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+.python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/

e4e/criteria/__init__.py ADDED Viewed

File without changes

e4e/criteria/id_loss.py ADDED Viewed

	@@ -0,0 +1,47 @@

+import torch
+from torch import nn
+from configs.paths_config import model_paths
+from models.encoders.model_irse import Backbone
+class IDLoss(nn.Module):
+    def __init__(self):
+        super(IDLoss, self).__init__()
+        print('Loading ResNet ArcFace')
+        self.facenet = Backbone(input_size=112, num_layers=50, drop_ratio=0.6, mode='ir_se')
+        self.facenet.load_state_dict(torch.load(model_paths['ir_se50']))
+        self.face_pool = torch.nn.AdaptiveAvgPool2d((112, 112))
+        self.facenet.eval()
+        for module in [self.facenet, self.face_pool]:
+            for param in module.parameters():
+                param.requires_grad = False
+    def extract_feats(self, x):
+        x = x[:, :, 35:223, 32:220]  # Crop interesting region
+        x = self.face_pool(x)
+        x_feats = self.facenet(x)
+        return x_feats
+    def forward(self, y_hat, y, x):
+        n_samples = x.shape[0]
+        x_feats = self.extract_feats(x)
+        y_feats = self.extract_feats(y)  # Otherwise use the feature from there
+        y_hat_feats = self.extract_feats(y_hat)
+        y_feats = y_feats.detach()
+        loss = 0
+        sim_improvement = 0
+        id_logs = []
+        count = 0
+        for i in range(n_samples):
+            diff_target = y_hat_feats[i].dot(y_feats[i])
+            diff_input = y_hat_feats[i].dot(x_feats[i])
+            diff_views = y_feats[i].dot(x_feats[i])
+            id_logs.append({'diff_target': float(diff_target),
+                            'diff_input': float(diff_input),
+                            'diff_views': float(diff_views)})
+            loss += 1 - diff_target
+            id_diff = float(diff_target) - float(diff_views)
+            sim_improvement += id_diff
+            count += 1
+        return loss / count, sim_improvement / count, id_logs

e4e/criteria/lpips/__init__.py ADDED Viewed

File without changes

e4e/criteria/lpips/lpips.py ADDED Viewed

	@@ -0,0 +1,35 @@

+import torch
+import torch.nn as nn
+from criteria.lpips.networks import get_network, LinLayers
+from criteria.lpips.utils import get_state_dict
+class LPIPS(nn.Module):
+    r"""Creates a criterion that measures
+    Learned Perceptual Image Patch Similarity (LPIPS).
+    Arguments:
+        net_type (str): the network type to compare the features:
+                        'alex' | 'squeeze' | 'vgg'. Default: 'alex'.
+        version (str): the version of LPIPS. Default: 0.1.
+    """
+    def __init__(self, net_type: str = 'alex', version: str = '0.1'):
+        assert version in ['0.1'], 'v0.1 is only supported now'
+        super(LPIPS, self).__init__()
+        # pretrained network
+        self.net = get_network(net_type).to("cuda")
+        # linear layers
+        self.lin = LinLayers(self.net.n_channels_list).to("cuda")
+        self.lin.load_state_dict(get_state_dict(net_type, version))
+    def forward(self, x: torch.Tensor, y: torch.Tensor):
+        feat_x, feat_y = self.net(x), self.net(y)
+        diff = [(fx - fy) ** 2 for fx, fy in zip(feat_x, feat_y)]
+        res = [l(d).mean((2, 3), True) for d, l in zip(diff, self.lin)]
+        return torch.sum(torch.cat(res, 0)) / x.shape[0]

e4e/criteria/lpips/networks.py ADDED Viewed

	@@ -0,0 +1,96 @@

+from typing import Sequence
+from itertools import chain
+import torch
+import torch.nn as nn
+from torchvision import models
+from criteria.lpips.utils import normalize_activation
+def get_network(net_type: str):
+    if net_type == 'alex':
+        return AlexNet()
+    elif net_type == 'squeeze':
+        return SqueezeNet()
+    elif net_type == 'vgg':
+        return VGG16()
+    else:
+        raise NotImplementedError('choose net_type from [alex, squeeze, vgg].')
+class LinLayers(nn.ModuleList):
+    def __init__(self, n_channels_list: Sequence[int]):
+        super(LinLayers, self).__init__([
+            nn.Sequential(
+                nn.Identity(),
+                nn.Conv2d(nc, 1, 1, 1, 0, bias=False)
+            ) for nc in n_channels_list
+        ])
+        for param in self.parameters():
+            param.requires_grad = False
+class BaseNet(nn.Module):
+    def __init__(self):
+        super(BaseNet, self).__init__()
+        # register buffer
+        self.register_buffer(
+            'mean', torch.Tensor([-.030, -.088, -.188])[None, :, None, None])
+        self.register_buffer(
+            'std', torch.Tensor([.458, .448, .450])[None, :, None, None])
+    def set_requires_grad(self, state: bool):
+        for param in chain(self.parameters(), self.buffers()):
+            param.requires_grad = state
+    def z_score(self, x: torch.Tensor):
+        return (x - self.mean) / self.std
+    def forward(self, x: torch.Tensor):
+        x = self.z_score(x)
+        output = []
+        for i, (_, layer) in enumerate(self.layers._modules.items(), 1):
+            x = layer(x)
+            if i in self.target_layers:
+                output.append(normalize_activation(x))
+            if len(output) == len(self.target_layers):
+                break
+        return output
+class SqueezeNet(BaseNet):
+    def __init__(self):
+        super(SqueezeNet, self).__init__()
+        self.layers = models.squeezenet1_1(True).features
+        self.target_layers = [2, 5, 8, 10, 11, 12, 13]
+        self.n_channels_list = [64, 128, 256, 384, 384, 512, 512]
+        self.set_requires_grad(False)
+class AlexNet(BaseNet):
+    def __init__(self):
+        super(AlexNet, self).__init__()
+        self.layers = models.alexnet(True).features
+        self.target_layers = [2, 5, 8, 10, 12]
+        self.n_channels_list = [64, 192, 384, 256, 256]
+        self.set_requires_grad(False)
+class VGG16(BaseNet):
+    def __init__(self):
+        super(VGG16, self).__init__()
+        self.layers = models.vgg16(True).features
+        self.target_layers = [4, 9, 16, 23, 30]
+        self.n_channels_list = [64, 128, 256, 512, 512]
+        self.set_requires_grad(False)

e4e/criteria/lpips/utils.py ADDED Viewed

	@@ -0,0 +1,30 @@

+from collections import OrderedDict
+import torch
+def normalize_activation(x, eps=1e-10):
+    norm_factor = torch.sqrt(torch.sum(x ** 2, dim=1, keepdim=True))
+    return x / (norm_factor + eps)
+def get_state_dict(net_type: str = 'alex', version: str = '0.1'):
+    # build url
+    url = 'https://raw.githubusercontent.com/richzhang/PerceptualSimilarity/' \
+        + f'master/lpips/weights/v{version}/{net_type}.pth'
+    # download
+    old_state_dict = torch.hub.load_state_dict_from_url(
+        url, progress=True,
+        map_location=None if torch.cuda.is_available() else torch.device('cpu')
+    )
+    # rename keys
+    new_state_dict = OrderedDict()
+    for key, val in old_state_dict.items():
+        new_key = key
+        new_key = new_key.replace('lin', '')
+        new_key = new_key.replace('model.', '')
+        new_state_dict[new_key] = val
+    return new_state_dict

e4e/criteria/moco_loss.py ADDED Viewed

	@@ -0,0 +1,71 @@

+import torch
+from torch import nn
+import torch.nn.functional as F
+from configs.paths_config import model_paths
+class MocoLoss(nn.Module):
+    def __init__(self, opts):
+        super(MocoLoss, self).__init__()
+        print("Loading MOCO model from path: {}".format(model_paths["moco"]))
+        self.model = self.__load_model()
+        self.model.eval()
+        for param in self.model.parameters():
+            param.requires_grad = False
+    @staticmethod
+    def __load_model():
+        import torchvision.models as models
+        model = models.__dict__["resnet50"]()
+        # freeze all layers but the last fc
+        for name, param in model.named_parameters():
+            if name not in ['fc.weight', 'fc.bias']:
+                param.requires_grad = False
+        checkpoint = torch.load(model_paths['moco'], map_location="cpu")
+        state_dict = checkpoint['state_dict']
+        # rename moco pre-trained keys
+        for k in list(state_dict.keys()):
+            # retain only encoder_q up to before the embedding layer
+            if k.startswith('module.encoder_q') and not k.startswith('module.encoder_q.fc'):
+                # remove prefix
+                state_dict[k[len("module.encoder_q."):]] = state_dict[k]
+            # delete renamed or unused k
+            del state_dict[k]
+        msg = model.load_state_dict(state_dict, strict=False)
+        assert set(msg.missing_keys) == {"fc.weight", "fc.bias"}
+        # remove output layer
+        model = nn.Sequential(*list(model.children())[:-1]).cuda()
+        return model
+    def extract_feats(self, x):
+        x = F.interpolate(x, size=224)
+        x_feats = self.model(x)
+        x_feats = nn.functional.normalize(x_feats, dim=1)
+        x_feats = x_feats.squeeze()
+        return x_feats
+    def forward(self, y_hat, y, x):
+        n_samples = x.shape[0]
+        x_feats = self.extract_feats(x)
+        y_feats = self.extract_feats(y)
+        y_hat_feats = self.extract_feats(y_hat)
+        y_feats = y_feats.detach()
+        loss = 0
+        sim_improvement = 0
+        sim_logs = []
+        count = 0
+        for i in range(n_samples):
+            diff_target = y_hat_feats[i].dot(y_feats[i])
+            diff_input = y_hat_feats[i].dot(x_feats[i])
+            diff_views = y_feats[i].dot(x_feats[i])
+            sim_logs.append({'diff_target': float(diff_target),
+                             'diff_input': float(diff_input),
+                             'diff_views': float(diff_views)})
+            loss += 1 - diff_target
+            sim_diff = float(diff_target) - float(diff_views)
+            sim_improvement += sim_diff
+            count += 1
+        return loss / count, sim_improvement / count, sim_logs

e4e/criteria/w_norm.py ADDED Viewed

	@@ -0,0 +1,14 @@

+import torch
+from torch import nn
+class WNormLoss(nn.Module):
+	def __init__(self, start_from_latent_avg=True):
+		super(WNormLoss, self).__init__()
+		self.start_from_latent_avg = start_from_latent_avg
+	def forward(self, latent, latent_avg=None):
+		if self.start_from_latent_avg:
+			latent = latent - latent_avg
+		return torch.sum(latent.norm(2, dim=(1, 2))) / latent.shape[0]

e4e/datasets/__init__.py ADDED Viewed

File without changes

e4e/datasets/gt_res_dataset.py ADDED Viewed

	@@ -0,0 +1,32 @@

+#!/usr/bin/python
+# encoding: utf-8
+import os
+from torch.utils.data import Dataset
+from PIL import Image
+import torch
+class GTResDataset(Dataset):
+	def __init__(self, root_path, gt_dir=None, transform=None, transform_train=None):
+		self.pairs = []
+		for f in os.listdir(root_path):
+			image_path = os.path.join(root_path, f)
+			gt_path = os.path.join(gt_dir, f)
+			if f.endswith(".jpg") or f.endswith(".png"):
+				self.pairs.append([image_path, gt_path.replace('.png', '.jpg'), None])
+		self.transform = transform
+		self.transform_train = transform_train
+	def __len__(self):
+		return len(self.pairs)
+	def __getitem__(self, index):
+		from_path, to_path, _ = self.pairs[index]
+		from_im = Image.open(from_path).convert('RGB')
+		to_im = Image.open(to_path).convert('RGB')
+		if self.transform:
+			to_im = self.transform(to_im)
+			from_im = self.transform(from_im)
+		return from_im, to_im

e4e/datasets/images_dataset.py ADDED Viewed

	@@ -0,0 +1,33 @@

+from torch.utils.data import Dataset
+from PIL import Image
+from utils import data_utils
+class ImagesDataset(Dataset):
+	def __init__(self, source_root, target_root, opts, target_transform=None, source_transform=None):
+		self.source_paths = sorted(data_utils.make_dataset(source_root))
+		self.target_paths = sorted(data_utils.make_dataset(target_root))
+		self.source_transform = source_transform
+		self.target_transform = target_transform
+		self.opts = opts
+	def __len__(self):
+		return len(self.source_paths)
+	def __getitem__(self, index):
+		from_path = self.source_paths[index]
+		from_im = Image.open(from_path)
+		from_im = from_im.convert('RGB')
+		to_path = self.target_paths[index]
+		to_im = Image.open(to_path).convert('RGB')
+		if self.target_transform:
+			to_im = self.target_transform(to_im)
+		if self.source_transform:
+			from_im = self.source_transform(from_im)
+		else:
+			from_im = to_im
+		return from_im, to_im

e4e/datasets/inference_dataset.py ADDED Viewed

	@@ -0,0 +1,25 @@

+from torch.utils.data import Dataset
+from PIL import Image
+from utils import data_utils
+class InferenceDataset(Dataset):
+	def __init__(self, root, opts, transform=None, preprocess=None):
+		self.paths = sorted(data_utils.make_dataset(root))
+		self.transform = transform
+		self.preprocess = preprocess
+		self.opts = opts
+	def __len__(self):
+		return len(self.paths)
+	def __getitem__(self, index):
+		from_path = self.paths[index]
+		if self.preprocess is not None:
+			from_im = self.preprocess(from_path)
+		else:
+			from_im = Image.open(from_path).convert('RGB')
+		if self.transform:
+			from_im = self.transform(from_im)
+		return from_im

e4e/editings/ganspace.py ADDED Viewed

	@@ -0,0 +1,22 @@

+import torch
+def edit(latents, pca, edit_directions):
+    edit_latents = []
+    for latent in latents:
+        for pca_idx, start, end, strength in edit_directions:
+            delta = get_delta(pca, latent, pca_idx, strength)
+            delta_padded = torch.zeros(latent.shape).to('cuda')
+            delta_padded[start:end] += delta.repeat(end - start, 1)
+            edit_latents.append(latent + delta_padded)
+    return torch.stack(edit_latents)
+def get_delta(pca, latent, idx, strength):
+    # pca: ganspace checkpoint. latent: (16, 512) w+
+    w_centered = latent - pca['mean'].to('cuda')
+    lat_comp = pca['comp'].to('cuda')
+    lat_std = pca['std'].to('cuda')
+    w_coord = torch.sum(w_centered[0].reshape(-1)*lat_comp[idx].reshape(-1)) / lat_std[idx]
+    delta = (strength - w_coord)*lat_comp[idx]*lat_std[idx]
+    return delta

e4e/editings/ganspace_pca/cars_pca.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a5c3bae61ecd85de077fbbf103f5f30cf4b7676fe23a8508166eaf2ce73c8392
+size 167562

e4e/editings/ganspace_pca/ffhq_pca.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4d7f9df1c96180d9026b9cb8d04753579fbf385f321a9d0e263641601c5e5d36
+size 167562

e4e/editings/interfacegan_directions/age.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:50074516b1629707d89b5e43d6b8abd1792212fa3b961a87a11323d6a5222ae0
+size 2808

e4e/editings/interfacegan_directions/pose.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:736e0eacc8488fa0b020a2e7bd256b957284c364191dfea693705e5d06d43e7d
+size 37624

e4e/editings/interfacegan_directions/smile.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:817a7e732b59dee9eba862bec8bd7e8373568443bc9f9731a21cf9b0356f0653
+size 2808

e4e/editings/latent_editor.py ADDED Viewed

	@@ -0,0 +1,45 @@

+import torch
+import sys
+sys.path.append(".")
+sys.path.append("..")
+from editings import ganspace, sefa
+from utils.common import tensor2im
+class LatentEditor(object):
+    def __init__(self, stylegan_generator, is_cars=False):
+        self.generator = stylegan_generator
+        self.is_cars = is_cars  # Since the cars StyleGAN output is 384x512, there is a need to crop the 512x512 output.
+    def apply_ganspace(self, latent, ganspace_pca, edit_directions):
+        edit_latents = ganspace.edit(latent, ganspace_pca, edit_directions)
+        return self._latents_to_image(edit_latents)
+    def apply_interfacegan(self, latent, direction, factor=1, factor_range=None):
+        edit_latents = []
+        if factor_range is not None:  # Apply a range of editing factors. for example, (-5, 5)
+            for f in range(*factor_range):
+                edit_latent = latent + f * direction
+                edit_latents.append(edit_latent)
+            edit_latents = torch.cat(edit_latents)
+        else:
+            edit_latents = latent + factor * direction
+        return self._latents_to_image(edit_latents)
+    def apply_sefa(self, latent, indices=[2, 3, 4, 5], **kwargs):
+        edit_latents = sefa.edit(self.generator, latent, indices, **kwargs)
+        return self._latents_to_image(edit_latents)
+    # Currently, in order to apply StyleFlow editings, one should run inference,
+    # save the latent codes and load them form the official StyleFlow repository.
+    # def apply_styleflow(self):
+    #     pass
+    def _latents_to_image(self, latents):
+        with torch.no_grad():
+            images, _ = self.generator([latents], randomize_noise=False, input_is_latent=True)
+            if self.is_cars:
+                images = images[:, :, 64:448, :]  # 512x512 -> 384x512
+        horizontal_concat_image = torch.cat(list(images), 2)
+        final_image = tensor2im(horizontal_concat_image)
+        return final_image

e4e/editings/sefa.py ADDED Viewed

	@@ -0,0 +1,46 @@

+import torch
+import numpy as np
+from tqdm import tqdm
+def edit(generator, latents, indices, semantics=1, start_distance=-15.0, end_distance=15.0, num_samples=1, step=11):
+    layers, boundaries, values = factorize_weight(generator, indices)
+    codes = latents.detach().cpu().numpy()  # (1,18,512)
+    # Generate visualization pages.
+    distances = np.linspace(start_distance, end_distance, step)
+    num_sam = num_samples
+    num_sem = semantics
+    edited_latents = []
+    for sem_id in tqdm(range(num_sem), desc='Semantic ', leave=False):
+        boundary = boundaries[sem_id:sem_id + 1]
+        for sam_id in tqdm(range(num_sam), desc='Sample ', leave=False):
+            code = codes[sam_id:sam_id + 1]
+            for col_id, d in enumerate(distances, start=1):
+                temp_code = code.copy()
+                temp_code[:, layers, :] += boundary * d
+                edited_latents.append(torch.from_numpy(temp_code).float().cuda())
+    return torch.cat(edited_latents)
+def factorize_weight(g_ema, layers='all'):
+    weights = []
+    if layers == 'all' or 0 in layers:
+        weight = g_ema.conv1.conv.modulation.weight.T
+        weights.append(weight.cpu().detach().numpy())
+    if layers == 'all':
+        layers = list(range(g_ema.num_layers - 1))
+    else:
+        layers = [l - 1 for l in layers if l != 0]
+    for idx in layers:
+        weight = g_ema.convs[idx].conv.modulation.weight.T
+        weights.append(weight.cpu().detach().numpy())
+    weight = np.concatenate(weights, axis=1).astype(np.float32)
+    weight = weight / np.linalg.norm(weight, axis=0, keepdims=True)
+    eigen_values, eigen_vectors = np.linalg.eig(weight.dot(weight.T))
+    return layers, eigen_vectors.T, eigen_values

e4e/environment/e4e_env.yaml ADDED Viewed

	@@ -0,0 +1,73 @@

+name: e4e_env
+channels:
+  - conda-forge
+  - defaults
+dependencies:
+  - _libgcc_mutex=0.1=main
+  - ca-certificates=2020.4.5.1=hecc5488_0
+  - certifi=2020.4.5.1=py36h9f0ad1d_0
+  - libedit=3.1.20181209=hc058e9b_0
+  - libffi=3.2.1=hd88cf55_4
+  - libgcc-ng=9.1.0=hdf63c60_0
+  - libstdcxx-ng=9.1.0=hdf63c60_0
+  - ncurses=6.2=he6710b0_1
+  - ninja=1.10.0=hc9558a2_0
+  - openssl=1.1.1g=h516909a_0
+  - pip=20.0.2=py36_3
+  - python=3.6.7=h0371630_0
+  - python_abi=3.6=1_cp36m
+  - readline=7.0=h7b6447c_5
+  - setuptools=46.4.0=py36_0
+  - sqlite=3.31.1=h62c20be_1
+  - tk=8.6.8=hbc83047_0
+  - wheel=0.34.2=py36_0
+  - xz=5.2.5=h7b6447c_0
+  - zlib=1.2.11=h7b6447c_3
+  - pip:
+    - absl-py==0.9.0
+    - cachetools==4.1.0
+    - chardet==3.0.4
+    - cycler==0.10.0
+    - decorator==4.4.2
+    - future==0.18.2
+    - google-auth==1.15.0
+    - google-auth-oauthlib==0.4.1
+    - grpcio==1.29.0
+    - idna==2.9
+    - imageio==2.8.0
+    - importlib-metadata==1.6.0
+    - kiwisolver==1.2.0
+    - markdown==3.2.2
+    - matplotlib==3.2.1
+    - mxnet==1.6.0
+    - networkx==2.4
+    - numpy==1.18.4
+    - oauthlib==3.1.0
+    - opencv-python==4.2.0.34
+    - pillow==7.1.2
+    - protobuf==3.12.1
+    - pyasn1==0.4.8
+    - pyasn1-modules==0.2.8
+    - pyparsing==2.4.7
+    - python-dateutil==2.8.1
+    - pytorch-lightning==0.7.1
+    - pywavelets==1.1.1
+    - requests==2.23.0
+    - requests-oauthlib==1.3.0
+    - rsa==4.0
+    - scikit-image==0.17.2
+    - scipy==1.4.1
+    - six==1.15.0
+    - tensorboard==2.2.1
+    - tensorboard-plugin-wit==1.6.0.post3
+    - tensorboardx==1.9
+    - tifffile==2020.5.25
+    - torch==1.6.0
+    - torchvision==0.7.1
+    - tqdm==4.46.0
+    - urllib3==1.25.9
+    - werkzeug==1.0.1
+    - zipp==3.1.0
+    - pyaml
+prefix: ~/anaconda3/envs/e4e_env

e4e/metrics/LEC.py ADDED Viewed

	@@ -0,0 +1,134 @@

+import sys
+import argparse
+import torch
+import numpy as np
+from torch.utils.data import DataLoader
+sys.path.append(".")
+sys.path.append("..")
+from configs import data_configs
+from datasets.images_dataset import ImagesDataset
+from utils.model_utils import setup_model
+class LEC:
+    def __init__(self, net, is_cars=False):
+        """
+        Latent Editing Consistency metric as proposed in the main paper.
+        :param net: e4e model loaded over the pSp framework.
+        :param is_cars: An indication as to whether or not to crop the middle of the StyleGAN's output images.
+        """
+        self.net = net
+        self.is_cars = is_cars
+    def _encode(self, images):
+        """
+        Encodes the given images into StyleGAN's latent space.
+        :param images: Tensor of shape NxCxHxW representing the images to be encoded.
+        :return: Tensor of shape NxKx512 representing the latent space embeddings of the given image (in W(K, *) space).
+        """
+        codes = self.net.encoder(images)
+        assert codes.ndim == 3, f"Invalid latent codes shape, should be NxKx512 but is {codes.shape}"
+        # normalize with respect to the center of an average face
+        if self.net.opts.start_from_latent_avg:
+            codes = codes + self.net.latent_avg.repeat(codes.shape[0], 1, 1)
+        return codes
+    def _generate(self, codes):
+        """
+        Generate the StyleGAN2 images of the given codes
+        :param codes: Tensor of shape NxKx512 representing the StyleGAN's latent codes (in W(K, *) space).
+        :return: Tensor of shape  NxCxHxW representing the generated images.
+        """
+        images, _ = self.net.decoder([codes], input_is_latent=True, randomize_noise=False, return_latents=True)
+        images = self.net.face_pool(images)
+        if self.is_cars:
+            images = images[:, :, 32:224, :]
+        return images
+    @staticmethod
+    def _filter_outliers(arr):
+        arr = np.array(arr)
+        lo = np.percentile(arr, 1, interpolation="lower")
+        hi = np.percentile(arr, 99, interpolation="higher")
+        return np.extract(
+            np.logical_and(lo <= arr, arr <= hi), arr
+        )
+    def calculate_metric(self, data_loader, edit_function, inverse_edit_function):
+        """
+        Calculate the LEC metric score.
+        :param data_loader: An iterable that returns a tuple of (images, _), similar to the training data loader.
+        :param edit_function: A function that receives latent codes and performs a semantically meaningful edit in the
+                              latent space.
+        :param inverse_edit_function: A function that receives latent codes and performs the inverse edit of the
+                                      `edit_function` parameter.
+        :return: The LEC metric score.
+        """
+        distances = []
+        with torch.no_grad():
+            for batch in data_loader:
+                x, _ = batch
+                inputs = x.to(device).float()
+                codes = self._encode(inputs)
+                edited_codes = edit_function(codes)
+                edited_image = self._generate(edited_codes)
+                edited_image_inversion_codes = self._encode(edited_image)
+                inverse_edit_codes = inverse_edit_function(edited_image_inversion_codes)
+                dist = (codes - inverse_edit_codes).norm(2, dim=(1, 2)).mean()
+                distances.append(dist.to("cpu").numpy())
+        distances = self._filter_outliers(distances)
+        return distances.mean()
+if __name__ == "__main__":
+    device = "cuda"
+    parser = argparse.ArgumentParser(description="LEC metric calculator")
+    parser.add_argument("--batch", type=int, default=8, help="batch size for the models")
+    parser.add_argument("--images_dir", type=str, default=None,
+                        help="Path to the images directory on which we calculate the LEC score")
+    parser.add_argument("ckpt", metavar="CHECKPOINT", help="path to the model checkpoints")
+    args = parser.parse_args()
+    print(args)
+    net, opts = setup_model(args.ckpt, device)
+    dataset_args = data_configs.DATASETS[opts.dataset_type]
+    transforms_dict = dataset_args['transforms'](opts).get_transforms()
+    images_directory = dataset_args['test_source_root'] if args.images_dir is None else args.images_dir
+    test_dataset = ImagesDataset(source_root=images_directory,
+                                 target_root=images_directory,
+                                 source_transform=transforms_dict['transform_source'],
+                                 target_transform=transforms_dict['transform_test'],
+                                 opts=opts)
+    data_loader = DataLoader(test_dataset,
+                             batch_size=args.batch,
+                             shuffle=False,
+                             num_workers=2,
+                             drop_last=True)
+    print(f'dataset length: {len(test_dataset)}')
+    # In the following example, we are using an InterfaceGAN based editing to calculate the LEC metric.
+    # Change the provided example according to your domain and needs.
+    direction = torch.load('../editings/interfacegan_directions/age.pt').to(device)
+    def edit_func_example(codes):
+        return codes + 3 * direction
+    def inverse_edit_func_example(codes):
+        return codes - 3 * direction
+    lec = LEC(net, is_cars='car' in opts.dataset_type)
+    result = lec.calculate_metric(data_loader, edit_func_example, inverse_edit_func_example)
+    print(f"LEC: {result}")

e4e/models/__init__.py ADDED Viewed

File without changes

e4e/models/discriminator.py ADDED Viewed

	@@ -0,0 +1,20 @@

+from torch import nn
+class LatentCodesDiscriminator(nn.Module):
+    def __init__(self, style_dim, n_mlp):
+        super().__init__()
+        self.style_dim = style_dim
+        layers = []
+        for i in range(n_mlp-1):
+            layers.append(
+                nn.Linear(style_dim, style_dim)
+            )
+            layers.append(nn.LeakyReLU(0.2))
+        layers.append(nn.Linear(512, 1))
+        self.mlp = nn.Sequential(*layers)
+    def forward(self, w):
+        return self.mlp(w)

e4e/models/encoders/__init__.py ADDED Viewed

File without changes

e4e/models/encoders/helpers.py ADDED Viewed

	@@ -0,0 +1,140 @@

+from collections import namedtuple
+import torch
+import torch.nn.functional as F
+from torch.nn import Conv2d, BatchNorm2d, PReLU, ReLU, Sigmoid, MaxPool2d, AdaptiveAvgPool2d, Sequential, Module
+"""
+ArcFace implementation from [TreB1eN](https://github.com/TreB1eN/InsightFace_Pytorch)
+"""
+class Flatten(Module):
+    def forward(self, input):
+        return input.view(input.size(0), -1)
+def l2_norm(input, axis=1):
+    norm = torch.norm(input, 2, axis, True)
+    output = torch.div(input, norm)
+    return output
+class Bottleneck(namedtuple('Block', ['in_channel', 'depth', 'stride'])):
+    """ A named tuple describing a ResNet block. """
+def get_block(in_channel, depth, num_units, stride=2):
+    return [Bottleneck(in_channel, depth, stride)] + [Bottleneck(depth, depth, 1) for i in range(num_units - 1)]
+def get_blocks(num_layers):
+    if num_layers == 50:
+        blocks = [
+            get_block(in_channel=64, depth=64, num_units=3),
+            get_block(in_channel=64, depth=128, num_units=4),
+            get_block(in_channel=128, depth=256, num_units=14),
+            get_block(in_channel=256, depth=512, num_units=3)
+        ]
+    elif num_layers == 100:
+        blocks = [
+            get_block(in_channel=64, depth=64, num_units=3),
+            get_block(in_channel=64, depth=128, num_units=13),
+            get_block(in_channel=128, depth=256, num_units=30),
+            get_block(in_channel=256, depth=512, num_units=3)
+        ]
+    elif num_layers == 152:
+        blocks = [
+            get_block(in_channel=64, depth=64, num_units=3),
+            get_block(in_channel=64, depth=128, num_units=8),
+            get_block(in_channel=128, depth=256, num_units=36),
+            get_block(in_channel=256, depth=512, num_units=3)
+        ]
+    else:
+        raise ValueError("Invalid number of layers: {}. Must be one of [50, 100, 152]".format(num_layers))
+    return blocks
+class SEModule(Module):
+    def __init__(self, channels, reduction):
+        super(SEModule, self).__init__()
+        self.avg_pool = AdaptiveAvgPool2d(1)
+        self.fc1 = Conv2d(channels, channels // reduction, kernel_size=1, padding=0, bias=False)
+        self.relu = ReLU(inplace=True)
+        self.fc2 = Conv2d(channels // reduction, channels, kernel_size=1, padding=0, bias=False)
+        self.sigmoid = Sigmoid()
+    def forward(self, x):
+        module_input = x
+        x = self.avg_pool(x)
+        x = self.fc1(x)
+        x = self.relu(x)
+        x = self.fc2(x)
+        x = self.sigmoid(x)
+        return module_input * x
+class bottleneck_IR(Module):
+    def __init__(self, in_channel, depth, stride):
+        super(bottleneck_IR, self).__init__()
+        if in_channel == depth:
+            self.shortcut_layer = MaxPool2d(1, stride)
+        else:
+            self.shortcut_layer = Sequential(
+                Conv2d(in_channel, depth, (1, 1), stride, bias=False),
+                BatchNorm2d(depth)
+            )
+        self.res_layer = Sequential(
+            BatchNorm2d(in_channel),
+            Conv2d(in_channel, depth, (3, 3), (1, 1), 1, bias=False), PReLU(depth),
+            Conv2d(depth, depth, (3, 3), stride, 1, bias=False), BatchNorm2d(depth)
+        )
+    def forward(self, x):
+        shortcut = self.shortcut_layer(x)
+        res = self.res_layer(x)
+        return res + shortcut
+class bottleneck_IR_SE(Module):
+    def __init__(self, in_channel, depth, stride):
+        super(bottleneck_IR_SE, self).__init__()
+        if in_channel == depth:
+            self.shortcut_layer = MaxPool2d(1, stride)
+        else:
+            self.shortcut_layer = Sequential(
+                Conv2d(in_channel, depth, (1, 1), stride, bias=False),
+                BatchNorm2d(depth)
+            )
+        self.res_layer = Sequential(
+            BatchNorm2d(in_channel),
+            Conv2d(in_channel, depth, (3, 3), (1, 1), 1, bias=False),
+            PReLU(depth),
+            Conv2d(depth, depth, (3, 3), stride, 1, bias=False),
+            BatchNorm2d(depth),
+            SEModule(depth, 16)
+        )
+    def forward(self, x):
+        shortcut = self.shortcut_layer(x)
+        res = self.res_layer(x)
+        return res + shortcut
+def _upsample_add(x, y):
+    """Upsample and add two feature maps.
+    Args:
+      x: (Variable) top feature map to be upsampled.
+      y: (Variable) lateral feature map.
+    Returns:
+      (Variable) added feature map.
+    Note in PyTorch, when input size is odd, the upsampled feature map
+    with `F.upsample(..., scale_factor=2, mode='nearest')`
+    maybe not equal to the lateral feature map size.
+    e.g.
+    original input size: [N,_,15,15] ->
+    conv2d feature map size: [N,_,8,8] ->
+    upsampled feature map size: [N,_,16,16]
+    So we choose bilinear upsample which supports arbitrary output sizes.
+    """
+    _, _, H, W = y.size()
+    return F.interpolate(x, size=(H, W), mode='bilinear', align_corners=True) + y

e4e/models/encoders/model_irse.py ADDED Viewed

	@@ -0,0 +1,84 @@

+from torch.nn import Linear, Conv2d, BatchNorm1d, BatchNorm2d, PReLU, Dropout, Sequential, Module
+from e4e.models.encoders.helpers import get_blocks, Flatten, bottleneck_IR, bottleneck_IR_SE, l2_norm
+"""
+Modified Backbone implementation from [TreB1eN](https://github.com/TreB1eN/InsightFace_Pytorch)
+"""
+class Backbone(Module):
+    def __init__(self, input_size, num_layers, mode='ir', drop_ratio=0.4, affine=True):
+        super(Backbone, self).__init__()
+        assert input_size in [112, 224], "input_size should be 112 or 224"
+        assert num_layers in [50, 100, 152], "num_layers should be 50, 100 or 152"
+        assert mode in ['ir', 'ir_se'], "mode should be ir or ir_se"
+        blocks = get_blocks(num_layers)
+        if mode == 'ir':
+            unit_module = bottleneck_IR
+        elif mode == 'ir_se':
+            unit_module = bottleneck_IR_SE
+        self.input_layer = Sequential(Conv2d(3, 64, (3, 3), 1, 1, bias=False),
+                                      BatchNorm2d(64),
+                                      PReLU(64))
+        if input_size == 112:
+            self.output_layer = Sequential(BatchNorm2d(512),
+                                           Dropout(drop_ratio),
+                                           Flatten(),
+                                           Linear(512 * 7 * 7, 512),
+                                           BatchNorm1d(512, affine=affine))
+        else:
+            self.output_layer = Sequential(BatchNorm2d(512),
+                                           Dropout(drop_ratio),
+                                           Flatten(),
+                                           Linear(512 * 14 * 14, 512),
+                                           BatchNorm1d(512, affine=affine))
+        modules = []
+        for block in blocks:
+            for bottleneck in block:
+                modules.append(unit_module(bottleneck.in_channel,
+                                           bottleneck.depth,
+                                           bottleneck.stride))
+        self.body = Sequential(*modules)
+    def forward(self, x):
+        x = self.input_layer(x)
+        x = self.body(x)
+        x = self.output_layer(x)
+        return l2_norm(x)
+def IR_50(input_size):
+    """Constructs a ir-50 model."""
+    model = Backbone(input_size, num_layers=50, mode='ir', drop_ratio=0.4, affine=False)
+    return model
+def IR_101(input_size):
+    """Constructs a ir-101 model."""
+    model = Backbone(input_size, num_layers=100, mode='ir', drop_ratio=0.4, affine=False)
+    return model
+def IR_152(input_size):
+    """Constructs a ir-152 model."""
+    model = Backbone(input_size, num_layers=152, mode='ir', drop_ratio=0.4, affine=False)
+    return model
+def IR_SE_50(input_size):
+    """Constructs a ir_se-50 model."""
+    model = Backbone(input_size, num_layers=50, mode='ir_se', drop_ratio=0.4, affine=False)
+    return model
+def IR_SE_101(input_size):
+    """Constructs a ir_se-101 model."""
+    model = Backbone(input_size, num_layers=100, mode='ir_se', drop_ratio=0.4, affine=False)
+    return model
+def IR_SE_152(input_size):
+    """Constructs a ir_se-152 model."""
+    model = Backbone(input_size, num_layers=152, mode='ir_se', drop_ratio=0.4, affine=False)
+    return model

e4e/models/encoders/psp_encoders.py ADDED Viewed

	@@ -0,0 +1,200 @@

+from enum import Enum
+import math
+import numpy as np
+import torch
+from torch import nn
+from torch.nn import Conv2d, BatchNorm2d, PReLU, Sequential, Module
+from e4e.models.encoders.helpers import get_blocks, bottleneck_IR, bottleneck_IR_SE, _upsample_add
+from e4e.models.stylegan2.model import EqualLinear
+class ProgressiveStage(Enum):
+    WTraining = 0
+    Delta1Training = 1
+    Delta2Training = 2
+    Delta3Training = 3
+    Delta4Training = 4
+    Delta5Training = 5
+    Delta6Training = 6
+    Delta7Training = 7
+    Delta8Training = 8
+    Delta9Training = 9
+    Delta10Training = 10
+    Delta11Training = 11
+    Delta12Training = 12
+    Delta13Training = 13
+    Delta14Training = 14
+    Delta15Training = 15
+    Delta16Training = 16
+    Delta17Training = 17
+    Inference = 18
+class GradualStyleBlock(Module):
+    def __init__(self, in_c, out_c, spatial):
+        super(GradualStyleBlock, self).__init__()
+        self.out_c = out_c
+        self.spatial = spatial
+        num_pools = int(np.log2(spatial))
+        modules = []
+        modules += [Conv2d(in_c, out_c, kernel_size=3, stride=2, padding=1),
+                    nn.LeakyReLU()]
+        for i in range(num_pools - 1):
+            modules += [
+                Conv2d(out_c, out_c, kernel_size=3, stride=2, padding=1),
+                nn.LeakyReLU()
+            ]
+        self.convs = nn.Sequential(*modules)
+        self.linear = EqualLinear(out_c, out_c, lr_mul=1)
+    def forward(self, x):
+        x = self.convs(x)
+        x = x.view(-1, self.out_c)
+        x = self.linear(x)
+        return x
+class GradualStyleEncoder(Module):
+    def __init__(self, num_layers, mode='ir', opts=None):
+        super(GradualStyleEncoder, self).__init__()
+        assert num_layers in [50, 100, 152], 'num_layers should be 50,100, or 152'
+        assert mode in ['ir', 'ir_se'], 'mode should be ir or ir_se'
+        blocks = get_blocks(num_layers)
+        if mode == 'ir':
+            unit_module = bottleneck_IR
+        elif mode == 'ir_se':
+            unit_module = bottleneck_IR_SE
+        self.input_layer = Sequential(Conv2d(3, 64, (3, 3), 1, 1, bias=False),
+                                      BatchNorm2d(64),
+                                      PReLU(64))
+        modules = []
+        for block in blocks:
+            for bottleneck in block:
+                modules.append(unit_module(bottleneck.in_channel,
+                                           bottleneck.depth,
+                                           bottleneck.stride))
+        self.body = Sequential(*modules)
+        self.styles = nn.ModuleList()
+        log_size = int(math.log(opts.stylegan_size, 2))
+        self.style_count = 2 * log_size - 2
+        self.coarse_ind = 3
+        self.middle_ind = 7
+        for i in range(self.style_count):
+            if i < self.coarse_ind:
+                style = GradualStyleBlock(512, 512, 16)
+            elif i < self.middle_ind:
+                style = GradualStyleBlock(512, 512, 32)
+            else:
+                style = GradualStyleBlock(512, 512, 64)
+            self.styles.append(style)
+        self.latlayer1 = nn.Conv2d(256, 512, kernel_size=1, stride=1, padding=0)
+        self.latlayer2 = nn.Conv2d(128, 512, kernel_size=1, stride=1, padding=0)
+    def forward(self, x):
+        x = self.input_layer(x)
+        latents = []
+        modulelist = list(self.body._modules.values())
+        for i, l in enumerate(modulelist):
+            x = l(x)
+            if i == 6:
+                c1 = x
+            elif i == 20:
+                c2 = x
+            elif i == 23:
+                c3 = x
+        for j in range(self.coarse_ind):
+            latents.append(self.styles[j](c3))
+        p2 = _upsample_add(c3, self.latlayer1(c2))
+        for j in range(self.coarse_ind, self.middle_ind):
+            latents.append(self.styles[j](p2))
+        p1 = _upsample_add(p2, self.latlayer2(c1))
+        for j in range(self.middle_ind, self.style_count):
+            latents.append(self.styles[j](p1))
+        out = torch.stack(latents, dim=1)
+        return out
+class Encoder4Editing(Module):
+    def __init__(self, num_layers, mode='ir', opts=None):
+        super(Encoder4Editing, self).__init__()
+        assert num_layers in [50, 100, 152], 'num_layers should be 50,100, or 152'
+        assert mode in ['ir', 'ir_se'], 'mode should be ir or ir_se'
+        blocks = get_blocks(num_layers)
+        if mode == 'ir':
+            unit_module = bottleneck_IR
+        elif mode == 'ir_se':
+            unit_module = bottleneck_IR_SE
+        self.input_layer = Sequential(Conv2d(3, 64, (3, 3), 1, 1, bias=False),
+                                      BatchNorm2d(64),
+                                      PReLU(64))
+        modules = []
+        for block in blocks:
+            for bottleneck in block:
+                modules.append(unit_module(bottleneck.in_channel,
+                                           bottleneck.depth,
+                                           bottleneck.stride))
+        self.body = Sequential(*modules)
+        self.styles = nn.ModuleList()
+        log_size = int(math.log(opts.stylegan_size, 2))
+        self.style_count = 2 * log_size - 2
+        self.coarse_ind = 3
+        self.middle_ind = 7
+        for i in range(self.style_count):
+            if i < self.coarse_ind:
+                style = GradualStyleBlock(512, 512, 16)
+            elif i < self.middle_ind:
+                style = GradualStyleBlock(512, 512, 32)
+            else:
+                style = GradualStyleBlock(512, 512, 64)
+            self.styles.append(style)
+        self.latlayer1 = nn.Conv2d(256, 512, kernel_size=1, stride=1, padding=0)
+        self.latlayer2 = nn.Conv2d(128, 512, kernel_size=1, stride=1, padding=0)
+        self.progressive_stage = ProgressiveStage.Inference
+    def get_deltas_starting_dimensions(self):
+        ''' Get a list of the initial dimension of every delta from which it is applied '''
+        return list(range(self.style_count))  # Each dimension has a delta applied to it
+    def set_progressive_stage(self, new_stage: ProgressiveStage):
+        self.progressive_stage = new_stage
+        print('Changed progressive stage to: ', new_stage)
+    def forward(self, x):
+        x = self.input_layer(x)
+        modulelist = list(self.body._modules.values())
+        for i, l in enumerate(modulelist):
+            x = l(x)
+            if i == 6:
+                c1 = x
+            elif i == 20:
+                c2 = x
+            elif i == 23:
+                c3 = x
+        # Infer main W and duplicate it
+        w0 = self.styles[0](c3)
+        w = w0.repeat(self.style_count, 1, 1).permute(1, 0, 2)
+        stage = self.progressive_stage.value
+        features = c3
+        for i in range(1, min(stage + 1, self.style_count)):  # Infer additional deltas
+            if i == self.coarse_ind:
+                p2 = _upsample_add(c3, self.latlayer1(c2))  # FPN's middle features
+                features = p2
+            elif i == self.middle_ind:
+                p1 = _upsample_add(p2, self.latlayer2(c1))  # FPN's fine features
+                features = p1
+            delta_i = self.styles[i](features)
+            w[:, i] += delta_i
+        return w

e4e/models/latent_codes_pool.py ADDED Viewed

	@@ -0,0 +1,55 @@

+import random
+import torch
+class LatentCodesPool:
+    """This class implements latent codes buffer that stores previously generated w latent codes.
+    This buffer enables us to update discriminators using a history of generated w's
+    rather than the ones produced by the latest encoder.
+    """
+    def __init__(self, pool_size):
+        """Initialize the ImagePool class
+        Parameters:
+            pool_size (int) -- the size of image buffer, if pool_size=0, no buffer will be created
+        """
+        self.pool_size = pool_size
+        if self.pool_size > 0:  # create an empty pool
+            self.num_ws = 0
+            self.ws = []
+    def query(self, ws):
+        """Return w's from the pool.
+        Parameters:
+            ws: the latest generated w's from the generator
+        Returns w's from the buffer.
+        By 50/100, the buffer will return input w's.
+        By 50/100, the buffer will return w's previously stored in the buffer,
+        and insert the current w's to the buffer.
+        """
+        if self.pool_size == 0:  # if the buffer size is 0, do nothing
+            return ws
+        return_ws = []
+        for w in ws:  # ws.shape: (batch, 512) or (batch, n_latent, 512)
+            # w = torch.unsqueeze(image.data, 0)
+            if w.ndim == 2:
+                i = random.randint(0, len(w) - 1)  # apply a random latent index as a candidate
+                w = w[i]
+            self.handle_w(w, return_ws)
+        return_ws = torch.stack(return_ws, 0)   # collect all the images and return
+        return return_ws
+    def handle_w(self, w, return_ws):
+        if self.num_ws < self.pool_size:  # if the buffer is not full; keep inserting current codes to the buffer
+            self.num_ws = self.num_ws + 1
+            self.ws.append(w)
+            return_ws.append(w)
+        else:
+            p = random.uniform(0, 1)
+            if p > 0.5:  # by 50% chance, the buffer will return a previously stored latent code, and insert the current code into the buffer
+                random_id = random.randint(0, self.pool_size - 1)  # randint is inclusive
+                tmp = self.ws[random_id].clone()
+                self.ws[random_id] = w
+                return_ws.append(tmp)
+            else:  # by another 50% chance, the buffer will return the current image
+                return_ws.append(w)

e4e/models/psp.py ADDED Viewed

	@@ -0,0 +1,99 @@

+import matplotlib
+matplotlib.use('Agg')
+import torch
+from torch import nn
+from e4e.models.encoders import psp_encoders
+from e4e.models.stylegan2.model import Generator
+from e4e.configs.paths_config import model_paths
+def get_keys(d, name):
+    if 'state_dict' in d:
+        d = d['state_dict']
+    d_filt = {k[len(name) + 1:]: v for k, v in d.items() if k[:len(name)] == name}
+    return d_filt
+class pSp(nn.Module):
+    def __init__(self, opts, device):
+        super(pSp, self).__init__()
+        self.opts = opts
+        self.device = device
+        # Define architecture
+        self.encoder = self.set_encoder()
+        self.decoder = Generator(opts.stylegan_size, 512, 8, channel_multiplier=2)
+        self.face_pool = torch.nn.AdaptiveAvgPool2d((256, 256))
+        # Load weights if needed
+        self.load_weights()
+    def set_encoder(self):
+        if self.opts.encoder_type == 'GradualStyleEncoder':
+            encoder = psp_encoders.GradualStyleEncoder(50, 'ir_se', self.opts)
+        elif self.opts.encoder_type == 'Encoder4Editing':
+            encoder = psp_encoders.Encoder4Editing(50, 'ir_se', self.opts)
+        else:
+            raise Exception('{} is not a valid encoders'.format(self.opts.encoder_type))
+        return encoder
+    def load_weights(self):
+        if self.opts.checkpoint_path is not None:
+            print('Loading e4e over the pSp framework from checkpoint: {}'.format(self.opts.checkpoint_path))
+            ckpt = torch.load(self.opts.checkpoint_path, map_location='cpu')
+            self.encoder.load_state_dict(get_keys(ckpt, 'encoder'), strict=True)
+            self.decoder.load_state_dict(get_keys(ckpt, 'decoder'), strict=True)
+            self.__load_latent_avg(ckpt)
+        else:
+            print('Loading encoders weights from irse50!')
+            encoder_ckpt = torch.load(model_paths['ir_se50'])
+            self.encoder.load_state_dict(encoder_ckpt, strict=False)
+            print('Loading decoder weights from pretrained!')
+            ckpt = torch.load(self.opts.stylegan_weights)
+            self.decoder.load_state_dict(ckpt['g_ema'], strict=False)
+            self.__load_latent_avg(ckpt, repeat=self.encoder.style_count)
+    def forward(self, x, resize=True, latent_mask=None, input_code=False, randomize_noise=True,
+                inject_latent=None, return_latents=False, alpha=None):
+        if input_code:
+            codes = x
+        else:
+            codes = self.encoder(x)
+            # normalize with respect to the center of an average face
+            if self.opts.start_from_latent_avg:
+                if codes.ndim == 2:
+                    codes = codes + self.latent_avg.repeat(codes.shape[0], 1, 1)[:, 0, :]
+                else:
+                    codes = codes + self.latent_avg.repeat(codes.shape[0], 1, 1)
+        if latent_mask is not None:
+            for i in latent_mask:
+                if inject_latent is not None:
+                    if alpha is not None:
+                        codes[:, i] = alpha * inject_latent[:, i] + (1 - alpha) * codes[:, i]
+                    else:
+                        codes[:, i] = inject_latent[:, i]
+                else:
+                    codes[:, i] = 0
+        input_is_latent = not input_code
+        images, result_latent = self.decoder([codes],
+                                             input_is_latent=input_is_latent,
+                                             randomize_noise=randomize_noise,
+                                             return_latents=return_latents)
+        if resize:
+            images = self.face_pool(images)
+        if return_latents:
+            return images, result_latent
+        else:
+            return images
+    def __load_latent_avg(self, ckpt, repeat=None):
+        if 'latent_avg' in ckpt:
+            self.latent_avg = ckpt['latent_avg'].to(self.device)
+            if repeat is not None:
+                self.latent_avg = self.latent_avg.repeat(repeat, 1)
+        else:
+            self.latent_avg = None

e4e/models/stylegan2/__init__.py ADDED Viewed

File without changes

e4e/models/stylegan2/model.py ADDED Viewed

	@@ -0,0 +1,678 @@

+import math
+import random
+import torch
+from torch import nn
+from torch.nn import functional as F
+if torch.cuda.is_available():
+    from op.fused_act import FusedLeakyReLU, fused_leaky_relu
+    from op.upfirdn2d import upfirdn2d
+else:
+    from op.fused_act_cpu import FusedLeakyReLU, fused_leaky_relu
+    from op.upfirdn2d_cpu import upfirdn2d
+class PixelNorm(nn.Module):
+    def __init__(self):
+        super().__init__()
+    def forward(self, input):
+        return input * torch.rsqrt(torch.mean(input ** 2, dim=1, keepdim=True) + 1e-8)
+def make_kernel(k):
+    k = torch.tensor(k, dtype=torch.float32)
+    if k.ndim == 1:
+        k = k[None, :] * k[:, None]
+    k /= k.sum()
+    return k
+class Upsample(nn.Module):
+    def __init__(self, kernel, factor=2):
+        super().__init__()
+        self.factor = factor
+        kernel = make_kernel(kernel) * (factor ** 2)
+        self.register_buffer('kernel', kernel)
+        p = kernel.shape[0] - factor
+        pad0 = (p + 1) // 2 + factor - 1
+        pad1 = p // 2
+        self.pad = (pad0, pad1)
+    def forward(self, input):
+        out = upfirdn2d(input, self.kernel, up=self.factor, down=1, pad=self.pad)
+        return out
+class Downsample(nn.Module):
+    def __init__(self, kernel, factor=2):
+        super().__init__()
+        self.factor = factor
+        kernel = make_kernel(kernel)
+        self.register_buffer('kernel', kernel)
+        p = kernel.shape[0] - factor
+        pad0 = (p + 1) // 2
+        pad1 = p // 2
+        self.pad = (pad0, pad1)
+    def forward(self, input):
+        out = upfirdn2d(input, self.kernel, up=1, down=self.factor, pad=self.pad)
+        return out
+class Blur(nn.Module):
+    def __init__(self, kernel, pad, upsample_factor=1):
+        super().__init__()
+        kernel = make_kernel(kernel)
+        if upsample_factor > 1:
+            kernel = kernel * (upsample_factor ** 2)
+        self.register_buffer('kernel', kernel)
+        self.pad = pad
+    def forward(self, input):
+        out = upfirdn2d(input, self.kernel, pad=self.pad)
+        return out
+class EqualConv2d(nn.Module):
+    def __init__(
+            self, in_channel, out_channel, kernel_size, stride=1, padding=0, bias=True
+    ):
+        super().__init__()
+        self.weight = nn.Parameter(
+            torch.randn(out_channel, in_channel, kernel_size, kernel_size)
+        )
+        self.scale = 1 / math.sqrt(in_channel * kernel_size ** 2)
+        self.stride = stride
+        self.padding = padding
+        if bias:
+            self.bias = nn.Parameter(torch.zeros(out_channel))
+        else:
+            self.bias = None
+    def forward(self, input):
+        out = F.conv2d(
+            input,
+            self.weight * self.scale,
+            bias=self.bias,
+            stride=self.stride,
+            padding=self.padding,
+        )
+        return out
+    def __repr__(self):
+        return (
+            f'{self.__class__.__name__}({self.weight.shape[1]}, {self.weight.shape[0]},'
+            f' {self.weight.shape[2]}, stride={self.stride}, padding={self.padding})'
+        )
+class EqualLinear(nn.Module):
+    def __init__(
+            self, in_dim, out_dim, bias=True, bias_init=0, lr_mul=1, activation=None
+    ):
+        super().__init__()
+        self.weight = nn.Parameter(torch.randn(out_dim, in_dim).div_(lr_mul))
+        if bias:
+            self.bias = nn.Parameter(torch.zeros(out_dim).fill_(bias_init))
+        else:
+            self.bias = None
+        self.activation = activation
+        self.scale = (1 / math.sqrt(in_dim)) * lr_mul
+        self.lr_mul = lr_mul
+    def forward(self, input):
+        if self.activation:
+            out = F.linear(input, self.weight * self.scale)
+            out = fused_leaky_relu(out, self.bias * self.lr_mul)
+        else:
+            out = F.linear(
+                input, self.weight * self.scale, bias=self.bias * self.lr_mul
+            )
+        return out
+    def __repr__(self):
+        return (
+            f'{self.__class__.__name__}({self.weight.shape[1]}, {self.weight.shape[0]})'
+        )
+class ScaledLeakyReLU(nn.Module):
+    def __init__(self, negative_slope=0.2):
+        super().__init__()
+        self.negative_slope = negative_slope
+    def forward(self, input):
+        out = F.leaky_relu(input, negative_slope=self.negative_slope)
+        return out * math.sqrt(2)
+class ModulatedConv2d(nn.Module):
+    def __init__(
+            self,
+            in_channel,
+            out_channel,
+            kernel_size,
+            style_dim,
+            demodulate=True,
+            upsample=False,
+            downsample=False,
+            blur_kernel=[1, 3, 3, 1],
+    ):
+        super().__init__()
+        self.eps = 1e-8
+        self.kernel_size = kernel_size
+        self.in_channel = in_channel
+        self.out_channel = out_channel
+        self.upsample = upsample
+        self.downsample = downsample
+        if upsample:
+            factor = 2
+            p = (len(blur_kernel) - factor) - (kernel_size - 1)
+            pad0 = (p + 1) // 2 + factor - 1
+            pad1 = p // 2 + 1
+            self.blur = Blur(blur_kernel, pad=(pad0, pad1), upsample_factor=factor)
+        if downsample:
+            factor = 2
+            p = (len(blur_kernel) - factor) + (kernel_size - 1)
+            pad0 = (p + 1) // 2
+            pad1 = p // 2
+            self.blur = Blur(blur_kernel, pad=(pad0, pad1))
+        fan_in = in_channel * kernel_size ** 2
+        self.scale = 1 / math.sqrt(fan_in)
+        self.padding = kernel_size // 2
+        self.weight = nn.Parameter(
+            torch.randn(1, out_channel, in_channel, kernel_size, kernel_size)
+        )
+        self.modulation = EqualLinear(style_dim, in_channel, bias_init=1)
+        self.demodulate = demodulate
+    def __repr__(self):
+        return (
+            f'{self.__class__.__name__}({self.in_channel}, {self.out_channel}, {self.kernel_size}, '
+            f'upsample={self.upsample}, downsample={self.downsample})'
+        )
+    def forward(self, input, style):
+        batch, in_channel, height, width = input.shape
+        style = self.modulation(style).view(batch, 1, in_channel, 1, 1)
+        weight = self.scale * self.weight * style
+        if self.demodulate:
+            demod = torch.rsqrt(weight.pow(2).sum([2, 3, 4]) + 1e-8)
+            weight = weight * demod.view(batch, self.out_channel, 1, 1, 1)
+        weight = weight.view(
+            batch * self.out_channel, in_channel, self.kernel_size, self.kernel_size
+        )
+        if self.upsample:
+            input = input.view(1, batch * in_channel, height, width)
+            weight = weight.view(
+                batch, self.out_channel, in_channel, self.kernel_size, self.kernel_size
+            )
+            weight = weight.transpose(1, 2).reshape(
+                batch * in_channel, self.out_channel, self.kernel_size, self.kernel_size
+            )
+            out = F.conv_transpose2d(input, weight, padding=0, stride=2, groups=batch)
+            _, _, height, width = out.shape
+            out = out.view(batch, self.out_channel, height, width)
+            out = self.blur(out)
+        elif self.downsample:
+            input = self.blur(input)
+            _, _, height, width = input.shape
+            input = input.view(1, batch * in_channel, height, width)
+            out = F.conv2d(input, weight, padding=0, stride=2, groups=batch)
+            _, _, height, width = out.shape
+            out = out.view(batch, self.out_channel, height, width)
+        else:
+            input = input.view(1, batch * in_channel, height, width)
+            out = F.conv2d(input, weight, padding=self.padding, groups=batch)
+            _, _, height, width = out.shape
+            out = out.view(batch, self.out_channel, height, width)
+        return out
+class NoiseInjection(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.weight = nn.Parameter(torch.zeros(1))
+    def forward(self, image, noise=None):
+        if noise is None:
+            batch, _, height, width = image.shape
+            noise = image.new_empty(batch, 1, height, width).normal_()
+        return image + self.weight * noise
+class ConstantInput(nn.Module):
+    def __init__(self, channel, size=4):
+        super().__init__()
+        self.input = nn.Parameter(torch.randn(1, channel, size, size))
+    def forward(self, input):
+        batch = input.shape[0]
+        out = self.input.repeat(batch, 1, 1, 1)
+        return out
+class StyledConv(nn.Module):
+    def __init__(
+            self,
+            in_channel,
+            out_channel,
+            kernel_size,
+            style_dim,
+            upsample=False,
+            blur_kernel=[1, 3, 3, 1],
+            demodulate=True,
+    ):
+        super().__init__()
+        self.conv = ModulatedConv2d(
+            in_channel,
+            out_channel,
+            kernel_size,
+            style_dim,
+            upsample=upsample,
+            blur_kernel=blur_kernel,
+            demodulate=demodulate,
+        )
+        self.noise = NoiseInjection()
+        # self.bias = nn.Parameter(torch.zeros(1, out_channel, 1, 1))
+        # self.activate = ScaledLeakyReLU(0.2)
+        self.activate = FusedLeakyReLU(out_channel)
+    def forward(self, input, style, noise=None):
+        out = self.conv(input, style)
+        out = self.noise(out, noise=noise)
+        # out = out + self.bias
+        out = self.activate(out)
+        return out
+class ToRGB(nn.Module):
+    def __init__(self, in_channel, style_dim, upsample=True, blur_kernel=[1, 3, 3, 1]):
+        super().__init__()
+        if upsample:
+            self.upsample = Upsample(blur_kernel)
+        self.conv = ModulatedConv2d(in_channel, 3, 1, style_dim, demodulate=False)
+        self.bias = nn.Parameter(torch.zeros(1, 3, 1, 1))
+    def forward(self, input, style, skip=None):
+        out = self.conv(input, style)
+        out = out + self.bias
+        if skip is not None:
+            skip = self.upsample(skip)
+            out = out + skip
+        return out
+class Generator(nn.Module):
+    def __init__(
+            self,
+            size,
+            style_dim,
+            n_mlp,
+            channel_multiplier=2,
+            blur_kernel=[1, 3, 3, 1],
+            lr_mlp=0.01,
+    ):
+        super().__init__()
+        self.size = size
+        self.style_dim = style_dim
+        layers = [PixelNorm()]
+        for i in range(n_mlp):
+            layers.append(
+                EqualLinear(
+                    style_dim, style_dim, lr_mul=lr_mlp, activation='fused_lrelu'
+                )
+            )
+        self.style = nn.Sequential(*layers)
+        self.channels = {
+            4: 512,
+            8: 512,
+            16: 512,
+            32: 512,
+            64: 256 * channel_multiplier,
+            128: 128 * channel_multiplier,
+            256: 64 * channel_multiplier,
+            512: 32 * channel_multiplier,
+            1024: 16 * channel_multiplier,
+        }
+        self.input = ConstantInput(self.channels[4])
+        self.conv1 = StyledConv(
+            self.channels[4], self.channels[4], 3, style_dim, blur_kernel=blur_kernel
+        )
+        self.to_rgb1 = ToRGB(self.channels[4], style_dim, upsample=False)
+        self.log_size = int(math.log(size, 2))
+        self.num_layers = (self.log_size - 2) * 2 + 1
+        self.convs = nn.ModuleList()
+        self.upsamples = nn.ModuleList()
+        self.to_rgbs = nn.ModuleList()
+        self.noises = nn.Module()
+        in_channel = self.channels[4]
+        for layer_idx in range(self.num_layers):
+            res = (layer_idx + 5) // 2
+            shape = [1, 1, 2 ** res, 2 ** res]
+            self.noises.register_buffer(f'noise_{layer_idx}', torch.randn(*shape))
+        for i in range(3, self.log_size + 1):
+            out_channel = self.channels[2 ** i]
+            self.convs.append(
+                StyledConv(
+                    in_channel,
+                    out_channel,
+                    3,
+                    style_dim,
+                    upsample=True,
+                    blur_kernel=blur_kernel,
+                )
+            )
+            self.convs.append(
+                StyledConv(
+                    out_channel, out_channel, 3, style_dim, blur_kernel=blur_kernel
+                )
+            )
+            self.to_rgbs.append(ToRGB(out_channel, style_dim))
+            in_channel = out_channel
+        self.n_latent = self.log_size * 2 - 2
+    def make_noise(self):
+        device = self.input.input.device
+        noises = [torch.randn(1, 1, 2 ** 2, 2 ** 2, device=device)]
+        for i in range(3, self.log_size + 1):
+            for _ in range(2):
+                noises.append(torch.randn(1, 1, 2 ** i, 2 ** i, device=device))
+        return noises
+    def mean_latent(self, n_latent):
+        latent_in = torch.randn(
+            n_latent, self.style_dim, device=self.input.input.device
+        )
+        latent = self.style(latent_in).mean(0, keepdim=True)
+        return latent
+    def get_latent(self, input):
+        return self.style(input)
+    def forward(
+            self,
+            styles,
+            return_latents=False,
+            return_features=False,
+            inject_index=None,
+            truncation=1,
+            truncation_latent=None,
+            input_is_latent=False,
+            noise=None,
+            randomize_noise=True,
+    ):
+        if not input_is_latent:
+            styles = [self.style(s) for s in styles]
+        if noise is None:
+            if randomize_noise:
+                noise = [None] * self.num_layers
+            else:
+                noise = [
+                    getattr(self.noises, f'noise_{i}') for i in range(self.num_layers)
+                ]
+        if truncation < 1:
+            style_t = []
+            for style in styles:
+                style_t.append(
+                    truncation_latent + truncation * (style - truncation_latent)
+                )
+            styles = style_t
+        if len(styles) < 2:
+            inject_index = self.n_latent
+            if styles[0].ndim < 3:
+                latent = styles[0].unsqueeze(1).repeat(1, inject_index, 1)
+            else:
+                latent = styles[0]
+        else:
+            if inject_index is None:
+                inject_index = random.randint(1, self.n_latent - 1)
+            latent = styles[0].unsqueeze(1).repeat(1, inject_index, 1)
+            latent2 = styles[1].unsqueeze(1).repeat(1, self.n_latent - inject_index, 1)
+            latent = torch.cat([latent, latent2], 1)
+        out = self.input(latent)
+        out = self.conv1(out, latent[:, 0], noise=noise[0])
+        skip = self.to_rgb1(out, latent[:, 1])
+        i = 1
+        for conv1, conv2, noise1, noise2, to_rgb in zip(
+                self.convs[::2], self.convs[1::2], noise[1::2], noise[2::2], self.to_rgbs
+        ):
+            out = conv1(out, latent[:, i], noise=noise1)
+            out = conv2(out, latent[:, i + 1], noise=noise2)
+            skip = to_rgb(out, latent[:, i + 2], skip)
+            i += 2
+        image = skip
+        if return_latents:
+            return image, latent
+        elif return_features:
+            return image, out
+        else:
+            return image, None
+class ConvLayer(nn.Sequential):
+    def __init__(
+            self,
+            in_channel,
+            out_channel,
+            kernel_size,
+            downsample=False,
+            blur_kernel=[1, 3, 3, 1],
+            bias=True,
+            activate=True,
+    ):
+        layers = []
+        if downsample:
+            factor = 2
+            p = (len(blur_kernel) - factor) + (kernel_size - 1)
+            pad0 = (p + 1) // 2
+            pad1 = p // 2
+            layers.append(Blur(blur_kernel, pad=(pad0, pad1)))
+            stride = 2
+            self.padding = 0
+        else:
+            stride = 1
+            self.padding = kernel_size // 2
+        layers.append(
+            EqualConv2d(
+                in_channel,
+                out_channel,
+                kernel_size,
+                padding=self.padding,
+                stride=stride,
+                bias=bias and not activate,
+            )
+        )
+        if activate:
+            if bias:
+                layers.append(FusedLeakyReLU(out_channel))
+            else:
+                layers.append(ScaledLeakyReLU(0.2))
+        super().__init__(*layers)
+class ResBlock(nn.Module):
+    def __init__(self, in_channel, out_channel, blur_kernel=[1, 3, 3, 1]):
+        super().__init__()
+        self.conv1 = ConvLayer(in_channel, in_channel, 3)
+        self.conv2 = ConvLayer(in_channel, out_channel, 3, downsample=True)
+        self.skip = ConvLayer(
+            in_channel, out_channel, 1, downsample=True, activate=False, bias=False
+        )
+    def forward(self, input):
+        out = self.conv1(input)
+        out = self.conv2(out)
+        skip = self.skip(input)
+        out = (out + skip) / math.sqrt(2)
+        return out
+class Discriminator(nn.Module):
+    def __init__(self, size, channel_multiplier=2, blur_kernel=[1, 3, 3, 1]):
+        super().__init__()
+        channels = {
+            4: 512,
+            8: 512,
+            16: 512,
+            32: 512,
+            64: 256 * channel_multiplier,
+            128: 128 * channel_multiplier,
+            256: 64 * channel_multiplier,
+            512: 32 * channel_multiplier,
+            1024: 16 * channel_multiplier,
+        }
+        convs = [ConvLayer(3, channels[size], 1)]
+        log_size = int(math.log(size, 2))
+        in_channel = channels[size]
+        for i in range(log_size, 2, -1):
+            out_channel = channels[2 ** (i - 1)]
+            convs.append(ResBlock(in_channel, out_channel, blur_kernel))
+            in_channel = out_channel
+        self.convs = nn.Sequential(*convs)
+        self.stddev_group = 4
+        self.stddev_feat = 1
+        self.final_conv = ConvLayer(in_channel + 1, channels[4], 3)
+        self.final_linear = nn.Sequential(
+            EqualLinear(channels[4] * 4 * 4, channels[4], activation='fused_lrelu'),
+            EqualLinear(channels[4], 1),
+        )
+    def forward(self, input):
+        out = self.convs(input)
+        batch, channel, height, width = out.shape
+        group = min(batch, self.stddev_group)
+        stddev = out.view(
+            group, -1, self.stddev_feat, channel // self.stddev_feat, height, width
+        )
+        stddev = torch.sqrt(stddev.var(0, unbiased=False) + 1e-8)
+        stddev = stddev.mean([2, 3, 4], keepdims=True).squeeze(2)
+        stddev = stddev.repeat(group, 1, height, width)
+        out = torch.cat([out, stddev], 1)
+        out = self.final_conv(out)
+        out = out.view(batch, -1)
+        out = self.final_linear(out)
+        return out

e4e/models/stylegan2/op/__init__.py ADDED Viewed

File without changes

e4e/models/stylegan2/op/fused_act.py ADDED Viewed

	@@ -0,0 +1,85 @@

+import os
+import torch
+from torch import nn
+from torch.autograd import Function
+from torch.utils.cpp_extension import load
+module_path = os.path.dirname(__file__)
+fused = load(
+    'fused',
+    sources=[
+        os.path.join(module_path, 'fused_bias_act.cpp'),
+        os.path.join(module_path, 'fused_bias_act_kernel.cu'),
+    ],
+)
+class FusedLeakyReLUFunctionBackward(Function):
+    @staticmethod
+    def forward(ctx, grad_output, out, negative_slope, scale):
+        ctx.save_for_backward(out)
+        ctx.negative_slope = negative_slope
+        ctx.scale = scale
+        empty = grad_output.new_empty(0)
+        grad_input = fused.fused_bias_act(
+            grad_output, empty, out, 3, 1, negative_slope, scale
+        )
+        dim = [0]
+        if grad_input.ndim > 2:
+            dim += list(range(2, grad_input.ndim))
+        grad_bias = grad_input.sum(dim).detach()
+        return grad_input, grad_bias
+    @staticmethod
+    def backward(ctx, gradgrad_input, gradgrad_bias):
+        out, = ctx.saved_tensors
+        gradgrad_out = fused.fused_bias_act(
+            gradgrad_input, gradgrad_bias, out, 3, 1, ctx.negative_slope, ctx.scale
+        )
+        return gradgrad_out, None, None, None
+class FusedLeakyReLUFunction(Function):
+    @staticmethod
+    def forward(ctx, input, bias, negative_slope, scale):
+        empty = input.new_empty(0)
+        out = fused.fused_bias_act(input, bias, empty, 3, 0, negative_slope, scale)
+        ctx.save_for_backward(out)
+        ctx.negative_slope = negative_slope
+        ctx.scale = scale
+        return out
+    @staticmethod
+    def backward(ctx, grad_output):
+        out, = ctx.saved_tensors
+        grad_input, grad_bias = FusedLeakyReLUFunctionBackward.apply(
+            grad_output, out, ctx.negative_slope, ctx.scale
+        )
+        return grad_input, grad_bias, None, None
+class FusedLeakyReLU(nn.Module):
+    def __init__(self, channel, negative_slope=0.2, scale=2 ** 0.5):
+        super().__init__()
+        self.bias = nn.Parameter(torch.zeros(channel))
+        self.negative_slope = negative_slope
+        self.scale = scale
+    def forward(self, input):
+        return fused_leaky_relu(input, self.bias, self.negative_slope, self.scale)
+def fused_leaky_relu(input, bias, negative_slope=0.2, scale=2 ** 0.5):
+    return FusedLeakyReLUFunction.apply(input, bias, negative_slope, scale)

e4e/models/stylegan2/op/fused_bias_act.cpp ADDED Viewed

	@@ -0,0 +1,21 @@

+#include <torch/extension.h>
+torch::Tensor fused_bias_act_op(const torch::Tensor& input, const torch::Tensor& bias, const torch::Tensor& refer,
+    int act, int grad, float alpha, float scale);
+#define CHECK_CUDA(x) TORCH_CHECK(x.type().is_cuda(), #x " must be a CUDA tensor")
+#define CHECK_CONTIGUOUS(x) TORCH_CHECK(x.is_contiguous(), #x " must be contiguous")
+#define CHECK_INPUT(x) CHECK_CUDA(x); CHECK_CONTIGUOUS(x)
+torch::Tensor fused_bias_act(const torch::Tensor& input, const torch::Tensor& bias, const torch::Tensor& refer,
+    int act, int grad, float alpha, float scale) {
+    CHECK_CUDA(input);
+    CHECK_CUDA(bias);
+    return fused_bias_act_op(input, bias, refer, act, grad, alpha, scale);
+}
+PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
+    m.def("fused_bias_act", &fused_bias_act, "fused bias act (CUDA)");
+}

e4e/models/stylegan2/op/fused_bias_act_kernel.cu ADDED Viewed

	@@ -0,0 +1,99 @@

+// Copyright (c) 2019, NVIDIA Corporation. All rights reserved.
+//
+// This work is made available under the Nvidia Source Code License-NC.
+// To view a copy of this license, visit
+// https://nvlabs.github.io/stylegan2/license.html
+#include <torch/types.h>
+#include <ATen/ATen.h>
+#include <ATen/AccumulateType.h>
+#include <ATen/cuda/CUDAContext.h>
+#include <ATen/cuda/CUDAApplyUtils.cuh>
+#include <cuda.h>
+#include <cuda_runtime.h>
+template <typename scalar_t>
+static __global__ void fused_bias_act_kernel(scalar_t* out, const scalar_t* p_x, const scalar_t* p_b, const scalar_t* p_ref,
+    int act, int grad, scalar_t alpha, scalar_t scale, int loop_x, int size_x, int step_b, int size_b, int use_bias, int use_ref) {
+    int xi = blockIdx.x * loop_x * blockDim.x + threadIdx.x;
+    scalar_t zero = 0.0;
+    for (int loop_idx = 0; loop_idx < loop_x && xi < size_x; loop_idx++, xi += blockDim.x) {
+        scalar_t x = p_x[xi];
+        if (use_bias) {
+            x += p_b[(xi / step_b) % size_b];
+        }
+        scalar_t ref = use_ref ? p_ref[xi] : zero;
+        scalar_t y;
+        switch (act * 10 + grad) {
+            default:
+            case 10: y = x; break;
+            case 11: y = x; break;
+            case 12: y = 0.0; break;
+            case 30: y = (x > 0.0) ? x : x * alpha; break;
+            case 31: y = (ref > 0.0) ? x : x * alpha; break;
+            case 32: y = 0.0; break;
+        }
+        out[xi] = y * scale;
+    }
+}
+torch::Tensor fused_bias_act_op(const torch::Tensor& input, const torch::Tensor& bias, const torch::Tensor& refer,
+    int act, int grad, float alpha, float scale) {
+    int curDevice = -1;
+    cudaGetDevice(&curDevice);
+    cudaStream_t stream = at::cuda::getCurrentCUDAStream(curDevice);
+    auto x = input.contiguous();
+    auto b = bias.contiguous();
+    auto ref = refer.contiguous();
+    int use_bias = b.numel() ? 1 : 0;
+    int use_ref = ref.numel() ? 1 : 0;
+    int size_x = x.numel();
+    int size_b = b.numel();
+    int step_b = 1;
+    for (int i = 1 + 1; i < x.dim(); i++) {
+        step_b *= x.size(i);
+    }
+    int loop_x = 4;
+    int block_size = 4 * 32;
+    int grid_size = (size_x - 1) / (loop_x * block_size) + 1;
+    auto y = torch::empty_like(x);
+    AT_DISPATCH_FLOATING_TYPES_AND_HALF(x.scalar_type(), "fused_bias_act_kernel", [&] {
+        fused_bias_act_kernel<scalar_t><<<grid_size, block_size, 0, stream>>>(
+            y.data_ptr<scalar_t>(),
+            x.data_ptr<scalar_t>(),
+            b.data_ptr<scalar_t>(),
+            ref.data_ptr<scalar_t>(),
+            act,
+            grad,
+            alpha,
+            scale,
+            loop_x,
+            size_x,
+            step_b,
+            size_b,
+            use_bias,
+            use_ref
+        );
+    });
+    return y;
+}

e4e/models/stylegan2/op/upfirdn2d.cpp ADDED Viewed

	@@ -0,0 +1,23 @@

+#include <torch/extension.h>
+torch::Tensor upfirdn2d_op(const torch::Tensor& input, const torch::Tensor& kernel,
+                            int up_x, int up_y, int down_x, int down_y,
+                            int pad_x0, int pad_x1, int pad_y0, int pad_y1);
+#define CHECK_CUDA(x) TORCH_CHECK(x.type().is_cuda(), #x " must be a CUDA tensor")
+#define CHECK_CONTIGUOUS(x) TORCH_CHECK(x.is_contiguous(), #x " must be contiguous")
+#define CHECK_INPUT(x) CHECK_CUDA(x); CHECK_CONTIGUOUS(x)
+torch::Tensor upfirdn2d(const torch::Tensor& input, const torch::Tensor& kernel,
+                        int up_x, int up_y, int down_x, int down_y,
+                        int pad_x0, int pad_x1, int pad_y0, int pad_y1) {
+    CHECK_CUDA(input);
+    CHECK_CUDA(kernel);
+    return upfirdn2d_op(input, kernel, up_x, up_y, down_x, down_y, pad_x0, pad_x1, pad_y0, pad_y1);
+}
+PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
+    m.def("upfirdn2d", &upfirdn2d, "upfirdn2d (CUDA)");
+}

e4e/models/stylegan2/op/upfirdn2d.py ADDED Viewed

	@@ -0,0 +1,184 @@

+import os
+import torch
+from torch.autograd import Function
+from torch.utils.cpp_extension import load
+module_path = os.path.dirname(__file__)
+upfirdn2d_op = load(
+    'upfirdn2d',
+    sources=[
+        os.path.join(module_path, 'upfirdn2d.cpp'),
+        os.path.join(module_path, 'upfirdn2d_kernel.cu'),
+    ],
+)
+class UpFirDn2dBackward(Function):
+    @staticmethod
+    def forward(
+            ctx, grad_output, kernel, grad_kernel, up, down, pad, g_pad, in_size, out_size
+    ):
+        up_x, up_y = up
+        down_x, down_y = down
+        g_pad_x0, g_pad_x1, g_pad_y0, g_pad_y1 = g_pad
+        grad_output = grad_output.reshape(-1, out_size[0], out_size[1], 1)
+        grad_input = upfirdn2d_op.upfirdn2d(
+            grad_output,
+            grad_kernel,
+            down_x,
+            down_y,
+            up_x,
+            up_y,
+            g_pad_x0,
+            g_pad_x1,
+            g_pad_y0,
+            g_pad_y1,
+        )
+        grad_input = grad_input.view(in_size[0], in_size[1], in_size[2], in_size[3])
+        ctx.save_for_backward(kernel)
+        pad_x0, pad_x1, pad_y0, pad_y1 = pad
+        ctx.up_x = up_x
+        ctx.up_y = up_y
+        ctx.down_x = down_x
+        ctx.down_y = down_y
+        ctx.pad_x0 = pad_x0
+        ctx.pad_x1 = pad_x1
+        ctx.pad_y0 = pad_y0
+        ctx.pad_y1 = pad_y1
+        ctx.in_size = in_size
+        ctx.out_size = out_size
+        return grad_input
+    @staticmethod
+    def backward(ctx, gradgrad_input):
+        kernel, = ctx.saved_tensors
+        gradgrad_input = gradgrad_input.reshape(-1, ctx.in_size[2], ctx.in_size[3], 1)
+        gradgrad_out = upfirdn2d_op.upfirdn2d(
+            gradgrad_input,
+            kernel,
+            ctx.up_x,
+            ctx.up_y,
+            ctx.down_x,
+            ctx.down_y,
+            ctx.pad_x0,
+            ctx.pad_x1,
+            ctx.pad_y0,
+            ctx.pad_y1,
+        )
+        # gradgrad_out = gradgrad_out.view(ctx.in_size[0], ctx.out_size[0], ctx.out_size[1], ctx.in_size[3])
+        gradgrad_out = gradgrad_out.view(
+            ctx.in_size[0], ctx.in_size[1], ctx.out_size[0], ctx.out_size[1]
+        )
+        return gradgrad_out, None, None, None, None, None, None, None, None
+class UpFirDn2d(Function):
+    @staticmethod
+    def forward(ctx, input, kernel, up, down, pad):
+        up_x, up_y = up
+        down_x, down_y = down
+        pad_x0, pad_x1, pad_y0, pad_y1 = pad
+        kernel_h, kernel_w = kernel.shape
+        batch, channel, in_h, in_w = input.shape
+        ctx.in_size = input.shape
+        input = input.reshape(-1, in_h, in_w, 1)
+        ctx.save_for_backward(kernel, torch.flip(kernel, [0, 1]))
+        out_h = (in_h * up_y + pad_y0 + pad_y1 - kernel_h) // down_y + 1
+        out_w = (in_w * up_x + pad_x0 + pad_x1 - kernel_w) // down_x + 1
+        ctx.out_size = (out_h, out_w)
+        ctx.up = (up_x, up_y)
+        ctx.down = (down_x, down_y)
+        ctx.pad = (pad_x0, pad_x1, pad_y0, pad_y1)
+        g_pad_x0 = kernel_w - pad_x0 - 1
+        g_pad_y0 = kernel_h - pad_y0 - 1
+        g_pad_x1 = in_w * up_x - out_w * down_x + pad_x0 - up_x + 1
+        g_pad_y1 = in_h * up_y - out_h * down_y + pad_y0 - up_y + 1
+        ctx.g_pad = (g_pad_x0, g_pad_x1, g_pad_y0, g_pad_y1)
+        out = upfirdn2d_op.upfirdn2d(
+            input, kernel, up_x, up_y, down_x, down_y, pad_x0, pad_x1, pad_y0, pad_y1
+        )
+        # out = out.view(major, out_h, out_w, minor)
+        out = out.view(-1, channel, out_h, out_w)
+        return out
+    @staticmethod
+    def backward(ctx, grad_output):
+        kernel, grad_kernel = ctx.saved_tensors
+        grad_input = UpFirDn2dBackward.apply(
+            grad_output,
+            kernel,
+            grad_kernel,
+            ctx.up,
+            ctx.down,
+            ctx.pad,
+            ctx.g_pad,
+            ctx.in_size,
+            ctx.out_size,
+        )
+        return grad_input, None, None, None, None
+def upfirdn2d(input, kernel, up=1, down=1, pad=(0, 0)):
+    out = UpFirDn2d.apply(
+        input, kernel, (up, up), (down, down), (pad[0], pad[1], pad[0], pad[1])
+    )
+    return out
+def upfirdn2d_native(
+        input, kernel, up_x, up_y, down_x, down_y, pad_x0, pad_x1, pad_y0, pad_y1
+):
+    _, in_h, in_w, minor = input.shape
+    kernel_h, kernel_w = kernel.shape
+    out = input.view(-1, in_h, 1, in_w, 1, minor)
+    out = F.pad(out, [0, 0, 0, up_x - 1, 0, 0, 0, up_y - 1])
+    out = out.view(-1, in_h * up_y, in_w * up_x, minor)
+    out = F.pad(
+        out, [0, 0, max(pad_x0, 0), max(pad_x1, 0), max(pad_y0, 0), max(pad_y1, 0)]
+    )
+    out = out[
+          :,
+          max(-pad_y0, 0): out.shape[1] - max(-pad_y1, 0),
+          max(-pad_x0, 0): out.shape[2] - max(-pad_x1, 0),
+          :,
+          ]
+    out = out.permute(0, 3, 1, 2)
+    out = out.reshape(
+        [-1, 1, in_h * up_y + pad_y0 + pad_y1, in_w * up_x + pad_x0 + pad_x1]
+    )
+    w = torch.flip(kernel, [0, 1]).view(1, 1, kernel_h, kernel_w)
+    out = F.conv2d(out, w)
+    out = out.reshape(
+        -1,
+        minor,
+        in_h * up_y + pad_y0 + pad_y1 - kernel_h + 1,
+        in_w * up_x + pad_x0 + pad_x1 - kernel_w + 1,
+    )
+    out = out.permute(0, 2, 3, 1)
+    return out[:, ::down_y, ::down_x, :]

e4e/models/stylegan2/op/upfirdn2d_kernel.cu ADDED Viewed

	@@ -0,0 +1,272 @@

+// Copyright (c) 2019, NVIDIA Corporation. All rights reserved.
+//
+// This work is made available under the Nvidia Source Code License-NC.
+// To view a copy of this license, visit
+// https://nvlabs.github.io/stylegan2/license.html
+#include <torch/types.h>
+#include <ATen/ATen.h>
+#include <ATen/AccumulateType.h>
+#include <ATen/cuda/CUDAContext.h>
+#include <ATen/cuda/CUDAApplyUtils.cuh>
+#include <cuda.h>
+#include <cuda_runtime.h>
+static __host__ __device__ __forceinline__ int floor_div(int a, int b) {
+    int c = a / b;
+    if (c * b > a) {
+        c--;
+    }
+    return c;
+}
+struct UpFirDn2DKernelParams {
+    int up_x;
+    int up_y;
+    int down_x;
+    int down_y;
+    int pad_x0;
+    int pad_x1;
+    int pad_y0;
+    int pad_y1;
+    int major_dim;
+    int in_h;
+    int in_w;
+    int minor_dim;
+    int kernel_h;
+    int kernel_w;
+    int out_h;
+    int out_w;
+    int loop_major;
+    int loop_x;
+};
+template <typename scalar_t, int up_x, int up_y, int down_x, int down_y, int kernel_h, int kernel_w, int tile_out_h, int tile_out_w>
+__global__ void upfirdn2d_kernel(scalar_t* out, const scalar_t* input, const scalar_t* kernel, const UpFirDn2DKernelParams p) {
+    const int tile_in_h = ((tile_out_h - 1) * down_y + kernel_h - 1) / up_y + 1;
+    const int tile_in_w = ((tile_out_w - 1) * down_x + kernel_w - 1) / up_x + 1;
+    __shared__ volatile float sk[kernel_h][kernel_w];
+    __shared__ volatile float sx[tile_in_h][tile_in_w];
+    int minor_idx = blockIdx.x;
+    int tile_out_y = minor_idx / p.minor_dim;
+    minor_idx -= tile_out_y * p.minor_dim;
+    tile_out_y *= tile_out_h;
+    int tile_out_x_base = blockIdx.y * p.loop_x * tile_out_w;
+    int major_idx_base = blockIdx.z * p.loop_major;
+    if (tile_out_x_base >= p.out_w | tile_out_y >= p.out_h | major_idx_base >= p.major_dim) {
+        return;
+    }
+    for (int tap_idx = threadIdx.x; tap_idx < kernel_h * kernel_w; tap_idx += blockDim.x) {
+        int ky = tap_idx / kernel_w;
+        int kx = tap_idx - ky * kernel_w;
+        scalar_t v = 0.0;
+        if (kx < p.kernel_w & ky < p.kernel_h) {
+            v = kernel[(p.kernel_h - 1 - ky) * p.kernel_w + (p.kernel_w - 1 - kx)];
+        }
+        sk[ky][kx] = v;
+    }
+    for (int loop_major = 0, major_idx = major_idx_base; loop_major < p.loop_major & major_idx < p.major_dim; loop_major++, major_idx++) {
+        for (int loop_x = 0, tile_out_x = tile_out_x_base; loop_x < p.loop_x & tile_out_x < p.out_w; loop_x++, tile_out_x += tile_out_w) {
+            int tile_mid_x = tile_out_x * down_x + up_x - 1 - p.pad_x0;
+            int tile_mid_y = tile_out_y * down_y + up_y - 1 - p.pad_y0;
+            int tile_in_x = floor_div(tile_mid_x, up_x);
+            int tile_in_y = floor_div(tile_mid_y, up_y);
+            __syncthreads();
+            for (int in_idx = threadIdx.x; in_idx < tile_in_h * tile_in_w; in_idx += blockDim.x) {
+                int rel_in_y = in_idx / tile_in_w;
+                int rel_in_x = in_idx - rel_in_y * tile_in_w;
+                int in_x = rel_in_x + tile_in_x;
+                int in_y = rel_in_y + tile_in_y;
+                scalar_t v = 0.0;
+                if (in_x >= 0 & in_y >= 0 & in_x < p.in_w & in_y < p.in_h) {
+                    v = input[((major_idx * p.in_h + in_y) * p.in_w + in_x) * p.minor_dim + minor_idx];
+                }
+                sx[rel_in_y][rel_in_x] = v;
+            }
+            __syncthreads();
+            for (int out_idx = threadIdx.x; out_idx < tile_out_h * tile_out_w; out_idx += blockDim.x) {
+                int rel_out_y = out_idx / tile_out_w;
+                int rel_out_x = out_idx - rel_out_y * tile_out_w;
+                int out_x = rel_out_x + tile_out_x;
+                int out_y = rel_out_y + tile_out_y;
+                int mid_x = tile_mid_x + rel_out_x * down_x;
+                int mid_y = tile_mid_y + rel_out_y * down_y;
+                int in_x = floor_div(mid_x, up_x);
+                int in_y = floor_div(mid_y, up_y);
+                int rel_in_x = in_x - tile_in_x;
+                int rel_in_y = in_y - tile_in_y;
+                int kernel_x = (in_x + 1) * up_x - mid_x - 1;
+                int kernel_y = (in_y + 1) * up_y - mid_y - 1;
+                scalar_t v = 0.0;
+                #pragma unroll
+                for (int y = 0; y < kernel_h / up_y; y++)
+                    #pragma unroll
+                    for (int x = 0; x < kernel_w / up_x; x++)
+                        v += sx[rel_in_y + y][rel_in_x + x] * sk[kernel_y + y * up_y][kernel_x + x * up_x];
+                if (out_x < p.out_w & out_y < p.out_h) {
+                    out[((major_idx * p.out_h + out_y) * p.out_w + out_x) * p.minor_dim + minor_idx] = v;
+                }
+            }
+        }
+    }
+}
+torch::Tensor upfirdn2d_op(const torch::Tensor& input, const torch::Tensor& kernel,
+    int up_x, int up_y, int down_x, int down_y,
+    int pad_x0, int pad_x1, int pad_y0, int pad_y1) {
+    int curDevice = -1;
+    cudaGetDevice(&curDevice);
+    cudaStream_t stream = at::cuda::getCurrentCUDAStream(curDevice);
+    UpFirDn2DKernelParams p;
+    auto x = input.contiguous();
+    auto k = kernel.contiguous();
+    p.major_dim = x.size(0);
+    p.in_h = x.size(1);
+    p.in_w = x.size(2);
+    p.minor_dim = x.size(3);
+    p.kernel_h = k.size(0);
+    p.kernel_w = k.size(1);
+    p.up_x = up_x;
+    p.up_y = up_y;
+    p.down_x = down_x;
+    p.down_y = down_y;
+    p.pad_x0 = pad_x0;
+    p.pad_x1 = pad_x1;
+    p.pad_y0 = pad_y0;
+    p.pad_y1 = pad_y1;
+    p.out_h = (p.in_h * p.up_y + p.pad_y0 + p.pad_y1 - p.kernel_h + p.down_y) / p.down_y;
+    p.out_w = (p.in_w * p.up_x + p.pad_x0 + p.pad_x1 - p.kernel_w + p.down_x) / p.down_x;
+    auto out = at::empty({p.major_dim, p.out_h, p.out_w, p.minor_dim}, x.options());
+    int mode = -1;
+    int tile_out_h;
+    int tile_out_w;
+    if (p.up_x == 1 && p.up_y == 1 && p.down_x == 1 && p.down_y == 1 && p.kernel_h <= 4 && p.kernel_w <= 4) {
+        mode = 1;
+        tile_out_h = 16;
+        tile_out_w = 64;
+    }
+    if (p.up_x == 1 && p.up_y == 1 && p.down_x == 1 && p.down_y == 1 && p.kernel_h <= 3 && p.kernel_w <= 3) {
+        mode = 2;
+        tile_out_h = 16;
+        tile_out_w = 64;
+    }
+    if (p.up_x == 2 && p.up_y == 2 && p.down_x == 1 && p.down_y == 1 && p.kernel_h <= 4 && p.kernel_w <= 4) {
+        mode = 3;
+        tile_out_h = 16;
+        tile_out_w = 64;
+    }
+    if (p.up_x == 2 && p.up_y == 2 && p.down_x == 1 && p.down_y == 1 && p.kernel_h <= 2 && p.kernel_w <= 2) {
+        mode = 4;
+        tile_out_h = 16;
+        tile_out_w = 64;
+    }
+    if (p.up_x == 1 && p.up_y == 1 && p.down_x == 2 && p.down_y == 2 && p.kernel_h <= 4 && p.kernel_w <= 4) {
+        mode = 5;
+        tile_out_h = 8;
+        tile_out_w = 32;
+    }
+    if (p.up_x == 1 && p.up_y == 1 && p.down_x == 2 && p.down_y == 2 && p.kernel_h <= 2 && p.kernel_w <= 2) {
+        mode = 6;
+        tile_out_h = 8;
+        tile_out_w = 32;
+    }
+    dim3 block_size;
+    dim3 grid_size;
+    if (tile_out_h > 0 && tile_out_w) {
+        p.loop_major = (p.major_dim - 1) / 16384 + 1;
+        p.loop_x = 1;
+        block_size = dim3(32 * 8, 1, 1);
+        grid_size = dim3(((p.out_h - 1) / tile_out_h + 1) * p.minor_dim,
+                         (p.out_w - 1) / (p.loop_x * tile_out_w) + 1,
+                         (p.major_dim - 1) / p.loop_major + 1);
+    }
+    AT_DISPATCH_FLOATING_TYPES_AND_HALF(x.scalar_type(), "upfirdn2d_cuda", [&] {
+        switch (mode) {
+        case 1:
+            upfirdn2d_kernel<scalar_t, 1, 1, 1, 1, 4, 4, 16, 64><<<grid_size, block_size, 0, stream>>>(
+                out.data_ptr<scalar_t>(), x.data_ptr<scalar_t>(), k.data_ptr<scalar_t>(), p
+            );
+            break;
+        case 2:
+            upfirdn2d_kernel<scalar_t, 1, 1, 1, 1, 3, 3, 16, 64><<<grid_size, block_size, 0, stream>>>(
+                out.data_ptr<scalar_t>(), x.data_ptr<scalar_t>(), k.data_ptr<scalar_t>(), p
+            );
+            break;
+        case 3:
+            upfirdn2d_kernel<scalar_t, 2, 2, 1, 1, 4, 4, 16, 64><<<grid_size, block_size, 0, stream>>>(
+                out.data_ptr<scalar_t>(), x.data_ptr<scalar_t>(), k.data_ptr<scalar_t>(), p
+            );
+            break;
+        case 4:
+            upfirdn2d_kernel<scalar_t, 2, 2, 1, 1, 2, 2, 16, 64><<<grid_size, block_size, 0, stream>>>(
+                out.data_ptr<scalar_t>(), x.data_ptr<scalar_t>(), k.data_ptr<scalar_t>(), p
+            );
+            break;
+        case 5:
+            upfirdn2d_kernel<scalar_t, 1, 1, 2, 2, 4, 4, 8, 32><<<grid_size, block_size, 0, stream>>>(
+                out.data_ptr<scalar_t>(), x.data_ptr<scalar_t>(), k.data_ptr<scalar_t>(), p
+            );
+            break;
+        case 6:
+            upfirdn2d_kernel<scalar_t, 1, 1, 2, 2, 4, 4, 8, 32><<<grid_size, block_size, 0, stream>>>(
+                out.data_ptr<scalar_t>(), x.data_ptr<scalar_t>(), k.data_ptr<scalar_t>(), p
+            );
+            break;
+        }
+    });
+    return out;
+}

e4e/notebooks/images/car_img.jpg ADDED Viewed

e4e/notebooks/images/church_img.jpg ADDED Viewed

e4e/notebooks/images/horse_img.jpg ADDED Viewed

e4e/notebooks/images/input_img.jpg ADDED Viewed

e4e/options/__init__.py ADDED Viewed

File without changes

e4e/options/train_options.py ADDED Viewed

	@@ -0,0 +1,84 @@

+from argparse import ArgumentParser
+from configs.paths_config import model_paths
+class TrainOptions:
+    def __init__(self):
+        self.parser = ArgumentParser()
+        self.initialize()
+    def initialize(self):
+        self.parser.add_argument('--exp_dir', type=str, help='Path to experiment output directory')
+        self.parser.add_argument('--dataset_type', default='ffhq_encode', type=str,
+                                 help='Type of dataset/experiment to run')
+        self.parser.add_argument('--encoder_type', default='Encoder4Editing', type=str, help='Which encoder to use')
+        self.parser.add_argument('--batch_size', default=4, type=int, help='Batch size for training')
+        self.parser.add_argument('--test_batch_size', default=2, type=int, help='Batch size for testing and inference')
+        self.parser.add_argument('--workers', default=4, type=int, help='Number of train dataloader workers')
+        self.parser.add_argument('--test_workers', default=2, type=int,
+                                 help='Number of test/inference dataloader workers')
+        self.parser.add_argument('--learning_rate', default=0.0001, type=float, help='Optimizer learning rate')
+        self.parser.add_argument('--optim_name', default='ranger', type=str, help='Which optimizer to use')
+        self.parser.add_argument('--train_decoder', default=False, type=bool, help='Whether to train the decoder model')
+        self.parser.add_argument('--start_from_latent_avg', action='store_true',
+                                 help='Whether to add average latent vector to generate codes from encoder.')
+        self.parser.add_argument('--lpips_type', default='alex', type=str, help='LPIPS backbone')
+        self.parser.add_argument('--lpips_lambda', default=0.8, type=float, help='LPIPS loss multiplier factor')
+        self.parser.add_argument('--id_lambda', default=0.1, type=float, help='ID loss multiplier factor')
+        self.parser.add_argument('--l2_lambda', default=1.0, type=float, help='L2 loss multiplier factor')
+        self.parser.add_argument('--stylegan_weights', default=model_paths['stylegan_ffhq'], type=str,
+                                 help='Path to StyleGAN model weights')
+        self.parser.add_argument('--stylegan_size', default=1024, type=int,
+                                 help='size of pretrained StyleGAN Generator')
+        self.parser.add_argument('--checkpoint_path', default=None, type=str, help='Path to pSp model checkpoint')
+        self.parser.add_argument('--max_steps', default=500000, type=int, help='Maximum number of training steps')
+        self.parser.add_argument('--image_interval', default=100, type=int,
+                                 help='Interval for logging train images during training')
+        self.parser.add_argument('--board_interval', default=50, type=int,
+                                 help='Interval for logging metrics to tensorboard')
+        self.parser.add_argument('--val_interval', default=1000, type=int, help='Validation interval')
+        self.parser.add_argument('--save_interval', default=None, type=int, help='Model checkpoint interval')
+        # Discriminator flags
+        self.parser.add_argument('--w_discriminator_lambda', default=0, type=float, help='Dw loss multiplier')
+        self.parser.add_argument('--w_discriminator_lr', default=2e-5, type=float, help='Dw learning rate')
+        self.parser.add_argument("--r1", type=float, default=10, help="weight of the r1 regularization")
+        self.parser.add_argument("--d_reg_every", type=int, default=16,
+                                 help="interval for applying r1 regularization")
+        self.parser.add_argument('--use_w_pool', action='store_true',
+                                 help='Whether to store a latnet codes pool for the discriminator\'s training')
+        self.parser.add_argument("--w_pool_size", type=int, default=50,
+                                 help="W\'s pool size, depends on --use_w_pool")
+        # e4e specific
+        self.parser.add_argument('--delta_norm', type=int, default=2, help="norm type of the deltas")
+        self.parser.add_argument('--delta_norm_lambda', type=float, default=2e-4, help="lambda for delta norm loss")
+        # Progressive training
+        self.parser.add_argument('--progressive_steps', nargs='+', type=int, default=None,
+                                 help="The training steps of training new deltas. steps[i] starts the delta_i training")
+        self.parser.add_argument('--progressive_start', type=int, default=None,
+                                 help="The training step to start training the deltas, overrides progressive_steps")
+        self.parser.add_argument('--progressive_step_every', type=int, default=2_000,
+                                 help="Amount of training steps for each progressive step")
+        # Save additional training info to enable future training continuation from produced checkpoints
+        self.parser.add_argument('--save_training_data', action='store_true',
+                                 help='Save intermediate training data to resume training from the checkpoint')
+        self.parser.add_argument('--sub_exp_dir', default=None, type=str, help='Name of sub experiment directory')
+        self.parser.add_argument('--keep_optimizer', action='store_true',
+                                 help='Whether to continue from the checkpoint\'s optimizer')
+        self.parser.add_argument('--resume_training_from_ckpt', default=None, type=str,
+                                 help='Path to training checkpoint, works when --save_training_data was set to True')
+        self.parser.add_argument('--update_param_list', nargs='+', type=str, default=None,
+                                 help="Name of training parameters to update the loaded training checkpoint")
+    def parse(self):
+        opts = self.parser.parse_args()
+        return opts