From 1fa30794d00c3e94f7d3faeb50edc331b6a59217 Mon Sep 17 00:00:00 2001
From: benjaminaubin <aubin.benjamin@gmail.com>
Date: Tue, 25 Jul 2023 12:43:44 +0000
Subject: [PATCH 01/11] clean requirements

---
 .gitignore            |  9 +++++++--
 README.md             | 23 +++++++++++++++++------
 requirements/pt13.txt | 39 +++++++++++++++++++++++++++++++++++++++
 requirements/pt2.txt  | 38 ++++++++++++++++++++++++++++++++++++++
 requirements_pt13.txt | 41 -----------------------------------------
 requirements_pt2.txt  | 41 -----------------------------------------
 6 files changed, 101 insertions(+), 90 deletions(-)
 create mode 100644 requirements/pt13.txt
 create mode 100644 requirements/pt2.txt
 delete mode 100644 requirements_pt13.txt
 delete mode 100644 requirements_pt2.txt

diff --git a/.gitignore b/.gitignore
index c0902eb8a..5506c38d8 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,9 +1,14 @@
+# extensions
 *.egg-info
 *.py[cod]
+
+# envs
 .pt13
 .pt2
-.pt2_2
+
+# directories
 /checkpoints
 /dist
 /outputs
-build
+/build
+/src
\ No newline at end of file
diff --git a/README.md b/README.md
index 2c779e5bb..d6367b0b5 100644
--- a/README.md
+++ b/README.md
@@ -59,10 +59,9 @@ This is assuming you have navigated to the `generative-models` root after clonin
 
 ```shell
 # install required packages from pypi
-python3 -m venv .pt1
-source .pt1/bin/activate
-pip3 install wheel
-pip3 install -r requirements_pt13.txt
+python3 -m venv .pt13
+source .pt13/bin/activate
+pip3 install -r requirements/pt13.txt
 ```
 
 **PyTorch 2.0** 
@@ -72,8 +71,20 @@ pip3 install -r requirements_pt13.txt
 # install required packages from pypi
 python3 -m venv .pt2
 source .pt2/bin/activate
-pip3 install wheel
-pip3 install -r requirements_pt2.txt
+pip3 install -r requirements/pt2.txt
+```
+
+
+#### 3. Install `sgm`
+
+```shell
+pip3 install .
+```
+
+#### 4. Install `sdata` for training
+
+```shell
+pip3 install -e git+https://github.com/Stability-AI/datapipelines.git@main#egg=sdata
 ```
 
 ## Packaging
diff --git a/requirements/pt13.txt b/requirements/pt13.txt
new file mode 100644
index 000000000..ce91e506d
--- /dev/null
+++ b/requirements/pt13.txt
@@ -0,0 +1,39 @@
+chardet>=5.1.0
+clip @ git+https://github.com/openai/CLIP.git
+einops>=0.6.1
+fairscale>=0.4.13
+fire>=0.5.0
+fsspec>=2023.6.0
+invisible-watermark>=0.2.0
+kornia==0.6.9
+matplotlib>=3.7.2
+natsort>=8.4.0
+numpy>=1.25.1
+omegaconf>=2.3.0
+onnx<=1.12.0
+open-clip-torch>=2.20.0
+opencv-python==4.6.0.66
+pandas>=2.0.3
+pillow>=9.5.0
+pudb>=2022.1.3
+pytorch-lightning==1.8.5
+pyyaml>=6.0.1
+scipy>=1.11.1
+streamlit>=0.73.1
+tensorboardx==2.5.1
+timm>=0.9.2
+tokenizers==0.12.1
+--extra-index-url https://download.pytorch.org/whl/cu117
+torch==1.13.1+cu117
+torchaudio==0.13.1
+torchdata==0.5.1
+torchmetrics>=1.0.1
+torchvision==0.14.1+cu117
+tqdm>=4.65.0
+transformers==4.19.1
+triton==2.0.0.post1
+urllib3<1.27,>=1.25.4
+wandb>=0.15.6
+webdataset>=0.2.33
+wheel>=0.41.0
+xformers==0.0.16
diff --git a/requirements/pt2.txt b/requirements/pt2.txt
new file mode 100644
index 000000000..f41dbceb5
--- /dev/null
+++ b/requirements/pt2.txt
@@ -0,0 +1,38 @@
+black==23.7.0
+chardet==5.1.0
+clip @ git+https://github.com/openai/CLIP.git
+einops>=0.6.1
+fairscale>=0.4.13
+fire>=0.5.0
+fsspec>=2023.6.0
+invisible-watermark>=0.2.0
+kornia==0.6.9
+matplotlib>=3.7.2
+natsort>=8.4.0
+ninja>=1.11.1
+numpy>=1.25.1
+omegaconf>=2.3.0
+open-clip-torch>=2.20.0
+opencv-python==4.6.0.66
+pandas>=2.0.3
+pillow>=9.5.0
+pudb>=2022.1.3
+pytorch-lightning==2.0.1
+pyyaml>=6.0.1
+scipy>=1.11.1
+streamlit>=0.73.1
+tensorboardx==2.6
+timm>=0.9.2
+tokenizers==0.12.1
+torch>=2.0.1
+torchaudio>=2.0.2
+torchdata==0.6.1
+torchmetrics>=1.0.1
+torchvision>=0.15.2
+tqdm>=4.65.0
+transformers==4.19.1
+triton==2.0.0
+urllib3<1.27,>=1.25.4
+wandb>=0.15.6
+webdataset>=0.2.33
+xformers>=0.0.20
diff --git a/requirements_pt13.txt b/requirements_pt13.txt
deleted file mode 100644
index 3d5b117c3..000000000
--- a/requirements_pt13.txt
+++ /dev/null
@@ -1,41 +0,0 @@
-omegaconf
-einops
-fire
-tqdm
-pillow
-numpy
-webdataset>=0.2.33
---extra-index-url https://download.pytorch.org/whl/cu117
-torch==1.13.1+cu117
-xformers==0.0.16
-torchaudio==0.13.1
-torchvision==0.14.1+cu117
-torchmetrics
-opencv-python==4.6.0.66
-fairscale
-pytorch-lightning==1.8.5
-fsspec
-kornia==0.6.9
-matplotlib
-natsort
-tensorboardx==2.5.1
-open-clip-torch
-chardet
-scipy
-pandas
-pudb
-pyyaml
-urllib3<1.27,>=1.25.4
-streamlit>=0.73.1
-timm
-tokenizers==0.12.1
-torchdata==0.5.1
-transformers==4.19.1
-onnx<=1.12.0
-triton
-wandb
-invisible-watermark
--e git+https://github.com/CompVis/taming-transformers.git@master#egg=taming-transformers
--e git+https://github.com/openai/CLIP.git@main#egg=clip
--e git+https://github.com/Stability-AI/datapipelines.git@main#egg=sdata
--e .
\ No newline at end of file
diff --git a/requirements_pt2.txt b/requirements_pt2.txt
deleted file mode 100644
index 9988b9084..000000000
--- a/requirements_pt2.txt
+++ /dev/null
@@ -1,41 +0,0 @@
-omegaconf
-einops
-fire
-tqdm
-pillow
-numpy
-webdataset>=0.2.33
-ninja
-torch
-matplotlib
-torchaudio>=2.0.2
-torchmetrics
-torchvision>=0.15.2
-opencv-python==4.6.0.66
-fairscale
-pytorch-lightning==2.0.1
-fire
-fsspec
-kornia==0.6.9
-natsort
-open-clip-torch
-chardet==5.1.0
-tensorboardx==2.6
-pandas
-pudb
-pyyaml
-urllib3<1.27,>=1.25.4
-scipy
-streamlit>=0.73.1
-timm
-tokenizers==0.12.1
-transformers==4.19.1
-triton==2.0.0
-torchdata==0.6.1
-wandb
-invisible-watermark
-xformers
--e git+https://github.com/CompVis/taming-transformers.git@master#egg=taming-transformers
--e git+https://github.com/openai/CLIP.git@main#egg=clip
--e git+https://github.com/Stability-AI/datapipelines.git@main#egg=sdata
--e .
\ No newline at end of file

From aec2b8d75373379615846cd7a6da0782fa81cf51 Mon Sep 17 00:00:00 2001
From: benjaminaubin <aubin.benjamin@gmail.com>
Date: Tue, 25 Jul 2023 12:56:00 +0000
Subject: [PATCH 02/11] rm taming deps

---
 sgm/__init__.py                               |   3 +-
 sgm/modules/autoencoding/losses/__init__.py   |   7 +-
 sgm/modules/diffusionmodules/loss.py          |   2 +-
 .../diffusionmodules/lpips/__init__.py        |   0
 sgm/modules/diffusionmodules/lpips/lpips.py   | 123 ++++++++++++++++
 sgm/modules/diffusionmodules/lpips/model.py   |  65 +++++++++
 sgm/modules/diffusionmodules/lpips/util.py    | 132 +++++++++++++++++
 .../diffusionmodules/lpips/vqperceptual.py    | 136 ++++++++++++++++++
 8 files changed, 461 insertions(+), 7 deletions(-)
 create mode 100644 sgm/modules/diffusionmodules/lpips/__init__.py
 create mode 100644 sgm/modules/diffusionmodules/lpips/lpips.py
 create mode 100644 sgm/modules/diffusionmodules/lpips/model.py
 create mode 100644 sgm/modules/diffusionmodules/lpips/util.py
 create mode 100644 sgm/modules/diffusionmodules/lpips/vqperceptual.py

diff --git a/sgm/__init__.py b/sgm/__init__.py
index f639416e8..6ecf0f239 100644
--- a/sgm/__init__.py
+++ b/sgm/__init__.py
@@ -1,5 +1,4 @@
-from .data import StableDataModuleFromConfig
 from .models import AutoencodingEngine, DiffusionEngine
 from .util import instantiate_from_config, get_configs_path
 
-__version__ = "0.0.1"
+__version__ = "0.1.0"
diff --git a/sgm/modules/autoencoding/losses/__init__.py b/sgm/modules/autoencoding/losses/__init__.py
index 6a3b54f72..a75426789 100644
--- a/sgm/modules/autoencoding/losses/__init__.py
+++ b/sgm/modules/autoencoding/losses/__init__.py
@@ -3,10 +3,9 @@
 import torch
 import torch.nn as nn
 from einops import rearrange
-from taming.modules.discriminator.model import NLayerDiscriminator, weights_init
-from taming.modules.losses.lpips import LPIPS
-from taming.modules.losses.vqperceptual import hinge_d_loss, vanilla_d_loss
-
+from ...diffusionmodules.lpips.lpips import LPIPS
+from ...diffusionmodules.lpips.model import NLayerDiscriminator, weights_init
+from ...diffusionmodules.lpips.vqperceptual import hinge_d_loss, vanilla_d_loss
 from ....util import default, instantiate_from_config
 
 
diff --git a/sgm/modules/diffusionmodules/loss.py b/sgm/modules/diffusionmodules/loss.py
index 555abc1c3..9dc986740 100644
--- a/sgm/modules/diffusionmodules/loss.py
+++ b/sgm/modules/diffusionmodules/loss.py
@@ -3,7 +3,7 @@
 import torch
 import torch.nn as nn
 from omegaconf import ListConfig
-from taming.modules.losses.lpips import LPIPS
+from .lpips import LPIPS
 
 from ...util import append_dims, instantiate_from_config
 
diff --git a/sgm/modules/diffusionmodules/lpips/__init__.py b/sgm/modules/diffusionmodules/lpips/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/sgm/modules/diffusionmodules/lpips/lpips.py b/sgm/modules/diffusionmodules/lpips/lpips.py
new file mode 100644
index 000000000..3012914c7
--- /dev/null
+++ b/sgm/modules/diffusionmodules/lpips/lpips.py
@@ -0,0 +1,123 @@
+"""Stripped version of https://github.com/richzhang/PerceptualSimilarity/tree/master/models"""
+
+import torch
+import torch.nn as nn
+from torchvision import models
+from collections import namedtuple
+
+from .util import get_ckpt_path
+
+
+class LPIPS(nn.Module):
+    # Learned perceptual metric
+    def __init__(self, use_dropout=True):
+        super().__init__()
+        self.scaling_layer = ScalingLayer()
+        self.chns = [64, 128, 256, 512, 512]  # vg16 features
+        self.net = vgg16(pretrained=True, requires_grad=False)
+        self.lin0 = NetLinLayer(self.chns[0], use_dropout=use_dropout)
+        self.lin1 = NetLinLayer(self.chns[1], use_dropout=use_dropout)
+        self.lin2 = NetLinLayer(self.chns[2], use_dropout=use_dropout)
+        self.lin3 = NetLinLayer(self.chns[3], use_dropout=use_dropout)
+        self.lin4 = NetLinLayer(self.chns[4], use_dropout=use_dropout)
+        self.load_from_pretrained()
+        for param in self.parameters():
+            param.requires_grad = False
+
+    def load_from_pretrained(self, name="vgg_lpips"):
+        ckpt = get_ckpt_path(name, "taming/modules/autoencoder/lpips")
+        self.load_state_dict(torch.load(ckpt, map_location=torch.device("cpu")), strict=False)
+        print("loaded pretrained LPIPS loss from {}".format(ckpt))
+
+    @classmethod
+    def from_pretrained(cls, name="vgg_lpips"):
+        if name != "vgg_lpips":
+            raise NotImplementedError
+        model = cls()
+        ckpt = get_ckpt_path(name)
+        model.load_state_dict(torch.load(ckpt, map_location=torch.device("cpu")), strict=False)
+        return model
+
+    def forward(self, input, target):
+        in0_input, in1_input = (self.scaling_layer(input), self.scaling_layer(target))
+        outs0, outs1 = self.net(in0_input), self.net(in1_input)
+        feats0, feats1, diffs = {}, {}, {}
+        lins = [self.lin0, self.lin1, self.lin2, self.lin3, self.lin4]
+        for kk in range(len(self.chns)):
+            feats0[kk], feats1[kk] = normalize_tensor(outs0[kk]), normalize_tensor(outs1[kk])
+            diffs[kk] = (feats0[kk] - feats1[kk]) ** 2
+
+        res = [spatial_average(lins[kk].model(diffs[kk]), keepdim=True) for kk in range(len(self.chns))]
+        val = res[0]
+        for l in range(1, len(self.chns)):
+            val += res[l]
+        return val
+
+
+class ScalingLayer(nn.Module):
+    def __init__(self):
+        super(ScalingLayer, self).__init__()
+        self.register_buffer('shift', torch.Tensor([-.030, -.088, -.188])[None, :, None, None])
+        self.register_buffer('scale', torch.Tensor([.458, .448, .450])[None, :, None, None])
+
+    def forward(self, inp):
+        return (inp - self.shift) / self.scale
+
+
+class NetLinLayer(nn.Module):
+    """ A single linear layer which does a 1x1 conv """
+    def __init__(self, chn_in, chn_out=1, use_dropout=False):
+        super(NetLinLayer, self).__init__()
+        layers = [nn.Dropout(), ] if (use_dropout) else []
+        layers += [nn.Conv2d(chn_in, chn_out, 1, stride=1, padding=0, bias=False), ]
+        self.model = nn.Sequential(*layers)
+
+
+class vgg16(torch.nn.Module):
+    def __init__(self, requires_grad=False, pretrained=True):
+        super(vgg16, self).__init__()
+        vgg_pretrained_features = models.vgg16(pretrained=pretrained).features
+        self.slice1 = torch.nn.Sequential()
+        self.slice2 = torch.nn.Sequential()
+        self.slice3 = torch.nn.Sequential()
+        self.slice4 = torch.nn.Sequential()
+        self.slice5 = torch.nn.Sequential()
+        self.N_slices = 5
+        for x in range(4):
+            self.slice1.add_module(str(x), vgg_pretrained_features[x])
+        for x in range(4, 9):
+            self.slice2.add_module(str(x), vgg_pretrained_features[x])
+        for x in range(9, 16):
+            self.slice3.add_module(str(x), vgg_pretrained_features[x])
+        for x in range(16, 23):
+            self.slice4.add_module(str(x), vgg_pretrained_features[x])
+        for x in range(23, 30):
+            self.slice5.add_module(str(x), vgg_pretrained_features[x])
+        if not requires_grad:
+            for param in self.parameters():
+                param.requires_grad = False
+
+    def forward(self, X):
+        h = self.slice1(X)
+        h_relu1_2 = h
+        h = self.slice2(h)
+        h_relu2_2 = h
+        h = self.slice3(h)
+        h_relu3_3 = h
+        h = self.slice4(h)
+        h_relu4_3 = h
+        h = self.slice5(h)
+        h_relu5_3 = h
+        vgg_outputs = namedtuple("VggOutputs", ['relu1_2', 'relu2_2', 'relu3_3', 'relu4_3', 'relu5_3'])
+        out = vgg_outputs(h_relu1_2, h_relu2_2, h_relu3_3, h_relu4_3, h_relu5_3)
+        return out
+
+
+def normalize_tensor(x,eps=1e-10):
+    norm_factor = torch.sqrt(torch.sum(x**2,dim=1,keepdim=True))
+    return x/(norm_factor+eps)
+
+
+def spatial_average(x, keepdim=True):
+    return x.mean([2,3],keepdim=keepdim)
+
diff --git a/sgm/modules/diffusionmodules/lpips/model.py b/sgm/modules/diffusionmodules/lpips/model.py
new file mode 100644
index 000000000..caafbbb0f
--- /dev/null
+++ b/sgm/modules/diffusionmodules/lpips/model.py
@@ -0,0 +1,65 @@
+import functools
+import torch.nn as nn
+
+from .util import ActNorm
+
+def weights_init(m):
+    classname = m.__class__.__name__
+    if classname.find('Conv') != -1:
+        nn.init.normal_(m.weight.data, 0.0, 0.02)
+    elif classname.find('BatchNorm') != -1:
+        nn.init.normal_(m.weight.data, 1.0, 0.02)
+        nn.init.constant_(m.bias.data, 0)
+
+
+class NLayerDiscriminator(nn.Module):
+    """Defines a PatchGAN discriminator as in Pix2Pix
+        --> see https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix/blob/master/models/networks.py
+    """
+    def __init__(self, input_nc=3, ndf=64, n_layers=3, use_actnorm=False):
+        """Construct a PatchGAN discriminator
+        Parameters:
+            input_nc (int)  -- the number of channels in input images
+            ndf (int)       -- the number of filters in the last conv layer
+            n_layers (int)  -- the number of conv layers in the discriminator
+            norm_layer      -- normalization layer
+        """
+        super(NLayerDiscriminator, self).__init__()
+        if not use_actnorm:
+            norm_layer = nn.BatchNorm2d
+        else:
+            norm_layer = ActNorm
+        if type(norm_layer) == functools.partial:  # no need to use bias as BatchNorm2d has affine parameters
+            use_bias = norm_layer.func != nn.BatchNorm2d
+        else:
+            use_bias = norm_layer != nn.BatchNorm2d
+
+        kw = 4
+        padw = 1
+        sequence = [nn.Conv2d(input_nc, ndf, kernel_size=kw, stride=2, padding=padw), nn.LeakyReLU(0.2, True)]
+        nf_mult = 1
+        nf_mult_prev = 1
+        for n in range(1, n_layers):  # gradually increase the number of filters
+            nf_mult_prev = nf_mult
+            nf_mult = min(2 ** n, 8)
+            sequence += [
+                nn.Conv2d(ndf * nf_mult_prev, ndf * nf_mult, kernel_size=kw, stride=2, padding=padw, bias=use_bias),
+                norm_layer(ndf * nf_mult),
+                nn.LeakyReLU(0.2, True)
+            ]
+
+        nf_mult_prev = nf_mult
+        nf_mult = min(2 ** n_layers, 8)
+        sequence += [
+            nn.Conv2d(ndf * nf_mult_prev, ndf * nf_mult, kernel_size=kw, stride=1, padding=padw, bias=use_bias),
+            norm_layer(ndf * nf_mult),
+            nn.LeakyReLU(0.2, True)
+        ]
+
+        sequence += [
+            nn.Conv2d(ndf * nf_mult, 1, kernel_size=kw, stride=1, padding=padw)]  # output 1 channel prediction map
+        self.main = nn.Sequential(*sequence)
+
+    def forward(self, input):
+        """Standard forward."""
+        return self.main(input)
diff --git a/sgm/modules/diffusionmodules/lpips/util.py b/sgm/modules/diffusionmodules/lpips/util.py
new file mode 100644
index 000000000..d52110572
--- /dev/null
+++ b/sgm/modules/diffusionmodules/lpips/util.py
@@ -0,0 +1,132 @@
+import torch
+import torch.nn as nn
+
+import os, hashlib
+import requests
+from tqdm import tqdm
+
+URL_MAP = {
+    "vgg_lpips": "https://heibox.uni-heidelberg.de/f/607503859c864bc1b30b/?dl=1"
+}
+
+CKPT_MAP = {
+    "vgg_lpips": "vgg.pth"
+}
+
+MD5_MAP = {
+    "vgg_lpips": "d507d7349b931f0638a25a48a722f98a"
+}
+
+
+def download(url, local_path, chunk_size=1024):
+    os.makedirs(os.path.split(local_path)[0], exist_ok=True)
+    with requests.get(url, stream=True) as r:
+        total_size = int(r.headers.get("content-length", 0))
+        with tqdm(total=total_size, unit="B", unit_scale=True) as pbar:
+            with open(local_path, "wb") as f:
+                for data in r.iter_content(chunk_size=chunk_size):
+                    if data:
+                        f.write(data)
+                        pbar.update(chunk_size)
+
+
+def md5_hash(path):
+    with open(path, "rb") as f:
+        content = f.read()
+    return hashlib.md5(content).hexdigest()
+
+
+def get_ckpt_path(name, root, check=False):
+    assert name in URL_MAP
+    path = os.path.join(root, CKPT_MAP[name])
+    if not os.path.exists(path) or (check and not md5_hash(path) == MD5_MAP[name]):
+        print("Downloading {} model from {} to {}".format(name, URL_MAP[name], path))
+        download(URL_MAP[name], path)
+        md5 = md5_hash(path)
+        assert md5 == MD5_MAP[name], md5
+    return path
+
+
+class ActNorm(nn.Module):
+    def __init__(self, num_features, logdet=False, affine=True,
+                 allow_reverse_init=False):
+        assert affine
+        super().__init__()
+        self.logdet = logdet
+        self.loc = nn.Parameter(torch.zeros(1, num_features, 1, 1))
+        self.scale = nn.Parameter(torch.ones(1, num_features, 1, 1))
+        self.allow_reverse_init = allow_reverse_init
+
+        self.register_buffer('initialized', torch.tensor(0, dtype=torch.uint8))
+
+    def initialize(self, input):
+        with torch.no_grad():
+            flatten = input.permute(1, 0, 2, 3).contiguous().view(input.shape[1], -1)
+            mean = (
+                flatten.mean(1)
+                .unsqueeze(1)
+                .unsqueeze(2)
+                .unsqueeze(3)
+                .permute(1, 0, 2, 3)
+            )
+            std = (
+                flatten.std(1)
+                .unsqueeze(1)
+                .unsqueeze(2)
+                .unsqueeze(3)
+                .permute(1, 0, 2, 3)
+            )
+
+            self.loc.data.copy_(-mean)
+            self.scale.data.copy_(1 / (std + 1e-6))
+
+    def forward(self, input, reverse=False):
+        if reverse:
+            return self.reverse(input)
+        if len(input.shape) == 2:
+            input = input[:,:,None,None]
+            squeeze = True
+        else:
+            squeeze = False
+
+        _, _, height, width = input.shape
+
+        if self.training and self.initialized.item() == 0:
+            self.initialize(input)
+            self.initialized.fill_(1)
+
+        h = self.scale * (input + self.loc)
+
+        if squeeze:
+            h = h.squeeze(-1).squeeze(-1)
+
+        if self.logdet:
+            log_abs = torch.log(torch.abs(self.scale))
+            logdet = height*width*torch.sum(log_abs)
+            logdet = logdet * torch.ones(input.shape[0]).to(input)
+            return h, logdet
+
+        return h
+
+    def reverse(self, output):
+        if self.training and self.initialized.item() == 0:
+            if not self.allow_reverse_init:
+                raise RuntimeError(
+                    "Initializing ActNorm in reverse direction is "
+                    "disabled by default. Use allow_reverse_init=True to enable."
+                )
+            else:
+                self.initialize(output)
+                self.initialized.fill_(1)
+
+        if len(output.shape) == 2:
+            output = output[:,:,None,None]
+            squeeze = True
+        else:
+            squeeze = False
+
+        h = output / self.scale - self.loc
+
+        if squeeze:
+            h = h.squeeze(-1).squeeze(-1)
+        return h
\ No newline at end of file
diff --git a/sgm/modules/diffusionmodules/lpips/vqperceptual.py b/sgm/modules/diffusionmodules/lpips/vqperceptual.py
new file mode 100644
index 000000000..39dc35784
--- /dev/null
+++ b/sgm/modules/diffusionmodules/lpips/vqperceptual.py
@@ -0,0 +1,136 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+from .lpips import LPIPS
+from .model import NLayerDiscriminator, weights_init
+
+
+class DummyLoss(nn.Module):
+    def __init__(self):
+        super().__init__()
+
+
+def adopt_weight(weight, global_step, threshold=0, value=0.):
+    if global_step < threshold:
+        weight = value
+    return weight
+
+
+def hinge_d_loss(logits_real, logits_fake):
+    loss_real = torch.mean(F.relu(1. - logits_real))
+    loss_fake = torch.mean(F.relu(1. + logits_fake))
+    d_loss = 0.5 * (loss_real + loss_fake)
+    return d_loss
+
+
+def vanilla_d_loss(logits_real, logits_fake):
+    d_loss = 0.5 * (
+        torch.mean(torch.nn.functional.softplus(-logits_real)) +
+        torch.mean(torch.nn.functional.softplus(logits_fake)))
+    return d_loss
+
+
+class VQLPIPSWithDiscriminator(nn.Module):
+    def __init__(self, disc_start, codebook_weight=1.0, pixelloss_weight=1.0,
+                 disc_num_layers=3, disc_in_channels=3, disc_factor=1.0, disc_weight=1.0,
+                 perceptual_weight=1.0, use_actnorm=False, disc_conditional=False,
+                 disc_ndf=64, disc_loss="hinge"):
+        super().__init__()
+        assert disc_loss in ["hinge", "vanilla"]
+        self.codebook_weight = codebook_weight
+        self.pixel_weight = pixelloss_weight
+        self.perceptual_loss = LPIPS().eval()
+        self.perceptual_weight = perceptual_weight
+
+        self.discriminator = NLayerDiscriminator(input_nc=disc_in_channels,
+                                                 n_layers=disc_num_layers,
+                                                 use_actnorm=use_actnorm,
+                                                 ndf=disc_ndf
+                                                 ).apply(weights_init)
+        self.discriminator_iter_start = disc_start
+        if disc_loss == "hinge":
+            self.disc_loss = hinge_d_loss
+        elif disc_loss == "vanilla":
+            self.disc_loss = vanilla_d_loss
+        else:
+            raise ValueError(f"Unknown GAN loss '{disc_loss}'.")
+        print(f"VQLPIPSWithDiscriminator running with {disc_loss} loss.")
+        self.disc_factor = disc_factor
+        self.discriminator_weight = disc_weight
+        self.disc_conditional = disc_conditional
+
+    def calculate_adaptive_weight(self, nll_loss, g_loss, last_layer=None):
+        if last_layer is not None:
+            nll_grads = torch.autograd.grad(nll_loss, last_layer, retain_graph=True)[0]
+            g_grads = torch.autograd.grad(g_loss, last_layer, retain_graph=True)[0]
+        else:
+            nll_grads = torch.autograd.grad(nll_loss, self.last_layer[0], retain_graph=True)[0]
+            g_grads = torch.autograd.grad(g_loss, self.last_layer[0], retain_graph=True)[0]
+
+        d_weight = torch.norm(nll_grads) / (torch.norm(g_grads) + 1e-4)
+        d_weight = torch.clamp(d_weight, 0.0, 1e4).detach()
+        d_weight = d_weight * self.discriminator_weight
+        return d_weight
+
+    def forward(self, codebook_loss, inputs, reconstructions, optimizer_idx,
+                global_step, last_layer=None, cond=None, split="train"):
+        rec_loss = torch.abs(inputs.contiguous() - reconstructions.contiguous())
+        if self.perceptual_weight > 0:
+            p_loss = self.perceptual_loss(inputs.contiguous(), reconstructions.contiguous())
+            rec_loss = rec_loss + self.perceptual_weight * p_loss
+        else:
+            p_loss = torch.tensor([0.0])
+
+        nll_loss = rec_loss
+        #nll_loss = torch.sum(nll_loss) / nll_loss.shape[0]
+        nll_loss = torch.mean(nll_loss)
+
+        # now the GAN part
+        if optimizer_idx == 0:
+            # generator update
+            if cond is None:
+                assert not self.disc_conditional
+                logits_fake = self.discriminator(reconstructions.contiguous())
+            else:
+                assert self.disc_conditional
+                logits_fake = self.discriminator(torch.cat((reconstructions.contiguous(), cond), dim=1))
+            g_loss = -torch.mean(logits_fake)
+
+            try:
+                d_weight = self.calculate_adaptive_weight(nll_loss, g_loss, last_layer=last_layer)
+            except RuntimeError:
+                assert not self.training
+                d_weight = torch.tensor(0.0)
+
+            disc_factor = adopt_weight(self.disc_factor, global_step, threshold=self.discriminator_iter_start)
+            loss = nll_loss + d_weight * disc_factor * g_loss + self.codebook_weight * codebook_loss.mean()
+
+            log = {"{}/total_loss".format(split): loss.clone().detach().mean(),
+                   "{}/quant_loss".format(split): codebook_loss.detach().mean(),
+                   "{}/nll_loss".format(split): nll_loss.detach().mean(),
+                   "{}/rec_loss".format(split): rec_loss.detach().mean(),
+                   "{}/p_loss".format(split): p_loss.detach().mean(),
+                   "{}/d_weight".format(split): d_weight.detach(),
+                   "{}/disc_factor".format(split): torch.tensor(disc_factor),
+                   "{}/g_loss".format(split): g_loss.detach().mean(),
+                   }
+            return loss, log
+
+        if optimizer_idx == 1:
+            # second pass for discriminator update
+            if cond is None:
+                logits_real = self.discriminator(inputs.contiguous().detach())
+                logits_fake = self.discriminator(reconstructions.contiguous().detach())
+            else:
+                logits_real = self.discriminator(torch.cat((inputs.contiguous().detach(), cond), dim=1))
+                logits_fake = self.discriminator(torch.cat((reconstructions.contiguous().detach(), cond), dim=1))
+
+            disc_factor = adopt_weight(self.disc_factor, global_step, threshold=self.discriminator_iter_start)
+            d_loss = disc_factor * self.disc_loss(logits_real, logits_fake)
+
+            log = {"{}/disc_loss".format(split): d_loss.clone().detach().mean(),
+                   "{}/logits_real".format(split): logits_real.detach().mean(),
+                   "{}/logits_fake".format(split): logits_fake.detach().mean()
+                   }
+            return d_loss, log

From 029ad74ef6a7b950e8237e8d6b563322b2d4db9e Mon Sep 17 00:00:00 2001
From: benjaminaubin <aubin.benjamin@gmail.com>
Date: Tue, 25 Jul 2023 13:13:42 +0000
Subject: [PATCH 03/11] isort, black

---
 main.py                                       |  11 +-
 scripts/demo/sampling.py                      |   1 +
 scripts/demo/streamlit_helpers.py             |  19 ++-
 .../nsfw_and_watermark_dectection.py          |   5 +-
 sgm/__init__.py                               |   2 +-
 sgm/data/cifar10.py                           |   4 +-
 sgm/data/mnist.py                             |   4 +-
 sgm/modules/autoencoding/losses/__init__.py   |   3 +-
 sgm/modules/diffusionmodules/__init__.py      |   2 +-
 sgm/modules/diffusionmodules/discretizer.py   |   9 +-
 sgm/modules/diffusionmodules/loss.py          |   2 +-
 sgm/modules/diffusionmodules/lpips/lpips.py   |  56 +++++---
 sgm/modules/diffusionmodules/lpips/model.py   |  47 +++++--
 sgm/modules/diffusionmodules/lpips/util.py    |  34 ++---
 .../diffusionmodules/lpips/vqperceptual.py    | 130 ++++++++++++------
 sgm/modules/diffusionmodules/wrappers.py      |   2 +-
 sgm/modules/distributions/distributions.py    |   2 +-
 17 files changed, 211 insertions(+), 122 deletions(-)

diff --git a/main.py b/main.py
index c916f512f..5e03c1c5b 100644
--- a/main.py
+++ b/main.py
@@ -12,22 +12,18 @@
 import torch
 import torchvision
 import wandb
-from PIL import Image
 from matplotlib import pyplot as plt
 from natsort import natsorted
 from omegaconf import OmegaConf
 from packaging import version
+from PIL import Image
 from pytorch_lightning import seed_everything
 from pytorch_lightning.callbacks import Callback
 from pytorch_lightning.loggers import WandbLogger
 from pytorch_lightning.trainer import Trainer
 from pytorch_lightning.utilities import rank_zero_only
 
-from sgm.util import (
-    exists,
-    instantiate_from_config,
-    isheatmap,
-)
+from sgm.util import exists, instantiate_from_config, isheatmap
 
 MULTINODE_HACKS = True
 
@@ -910,11 +906,12 @@ def divein(*args, **kwargs):
             trainer.test(model, data)
     except RuntimeError as err:
         if MULTINODE_HACKS:
-            import requests
             import datetime
             import os
             import socket
 
+            import requests
+
             device = os.environ.get("CUDA_VISIBLE_DEVICES", "?")
             hostname = socket.gethostname()
             ts = datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S")
diff --git a/scripts/demo/sampling.py b/scripts/demo/sampling.py
index 98d0af30f..87d801556 100644
--- a/scripts/demo/sampling.py
+++ b/scripts/demo/sampling.py
@@ -1,4 +1,5 @@
 from pytorch_lightning import seed_everything
+
 from scripts.demo.streamlit_helpers import *
 from scripts.util.detection.nsfw_and_watermark_dectection import DeepFloydDataFiltering
 
diff --git a/scripts/demo/streamlit_helpers.py b/scripts/demo/streamlit_helpers.py
index 8f53b5db3..2cf165b69 100644
--- a/scripts/demo/streamlit_helpers.py
+++ b/scripts/demo/streamlit_helpers.py
@@ -1,29 +1,28 @@
+import math
 import os
-from typing import Union, List
+from typing import List, Union
 
-import math
 import numpy as np
 import streamlit as st
 import torch
-from PIL import Image
 from einops import rearrange, repeat
 from imwatermark import WatermarkEncoder
-from omegaconf import OmegaConf, ListConfig
+from omegaconf import ListConfig, OmegaConf
+from PIL import Image
+from safetensors.torch import load_file as load_safetensors
 from torch import autocast
 from torchvision import transforms
 from torchvision.utils import make_grid
-from safetensors.torch import load_file as load_safetensors
 
 from sgm.modules.diffusionmodules.sampling import (
+    DPMPP2MSampler,
+    DPMPP2SAncestralSampler,
+    EulerAncestralSampler,
     EulerEDMSampler,
     HeunEDMSampler,
-    EulerAncestralSampler,
-    DPMPP2SAncestralSampler,
-    DPMPP2MSampler,
     LinearMultistepSampler,
 )
-from sgm.util import append_dims
-from sgm.util import instantiate_from_config
+from sgm.util import append_dims, instantiate_from_config
 
 
 class WatermarkEmbedder:
diff --git a/scripts/util/detection/nsfw_and_watermark_dectection.py b/scripts/util/detection/nsfw_and_watermark_dectection.py
index af84acf30..ab450c13d 100644
--- a/scripts/util/detection/nsfw_and_watermark_dectection.py
+++ b/scripts/util/detection/nsfw_and_watermark_dectection.py
@@ -1,9 +1,10 @@
 import os
-import torch
+
+import clip
 import numpy as np
+import torch
 import torchvision.transforms as T
 from PIL import Image
-import clip
 
 RESOURCES_ROOT = "scripts/util/detection/"
 
diff --git a/sgm/__init__.py b/sgm/__init__.py
index 6ecf0f239..24bc84af8 100644
--- a/sgm/__init__.py
+++ b/sgm/__init__.py
@@ -1,4 +1,4 @@
 from .models import AutoencodingEngine, DiffusionEngine
-from .util import instantiate_from_config, get_configs_path
+from .util import get_configs_path, instantiate_from_config
 
 __version__ = "0.1.0"
diff --git a/sgm/data/cifar10.py b/sgm/data/cifar10.py
index aa3ae6777..6083646f1 100644
--- a/sgm/data/cifar10.py
+++ b/sgm/data/cifar10.py
@@ -1,7 +1,7 @@
-import torchvision
 import pytorch_lightning as pl
-from torchvision import transforms
+import torchvision
 from torch.utils.data import DataLoader, Dataset
+from torchvision import transforms
 
 
 class CIFAR10DataDictWrapper(Dataset):
diff --git a/sgm/data/mnist.py b/sgm/data/mnist.py
index ab7478f40..dea4d7e67 100644
--- a/sgm/data/mnist.py
+++ b/sgm/data/mnist.py
@@ -1,7 +1,7 @@
-import torchvision
 import pytorch_lightning as pl
-from torchvision import transforms
+import torchvision
 from torch.utils.data import DataLoader, Dataset
+from torchvision import transforms
 
 
 class MNISTDataDictWrapper(Dataset):
diff --git a/sgm/modules/autoencoding/losses/__init__.py b/sgm/modules/autoencoding/losses/__init__.py
index a75426789..1de0d60ef 100644
--- a/sgm/modules/autoencoding/losses/__init__.py
+++ b/sgm/modules/autoencoding/losses/__init__.py
@@ -3,10 +3,11 @@
 import torch
 import torch.nn as nn
 from einops import rearrange
+
+from ....util import default, instantiate_from_config
 from ...diffusionmodules.lpips.lpips import LPIPS
 from ...diffusionmodules.lpips.model import NLayerDiscriminator, weights_init
 from ...diffusionmodules.lpips.vqperceptual import hinge_d_loss, vanilla_d_loss
-from ....util import default, instantiate_from_config
 
 
 def adopt_weight(weight, global_step, threshold=0, value=0.0):
diff --git a/sgm/modules/diffusionmodules/__init__.py b/sgm/modules/diffusionmodules/__init__.py
index ce7968af9..867b69e89 100644
--- a/sgm/modules/diffusionmodules/__init__.py
+++ b/sgm/modules/diffusionmodules/__init__.py
@@ -1,7 +1,7 @@
 from .denoiser import Denoiser
 from .discretizer import Discretization
 from .loss import StandardDiffusionLoss
-from .model import Model, Encoder, Decoder
+from .model import Decoder, Encoder, Model
 from .openaimodel import UNetModel
 from .sampling import BaseDiffusionSampler
 from .wrappers import OpenAIWrapper
diff --git a/sgm/modules/diffusionmodules/discretizer.py b/sgm/modules/diffusionmodules/discretizer.py
index 397b8f386..4135ac99b 100644
--- a/sgm/modules/diffusionmodules/discretizer.py
+++ b/sgm/modules/diffusionmodules/discretizer.py
@@ -1,10 +1,11 @@
-import torch
-import numpy as np
-from functools import partial
 from abc import abstractmethod
+from functools import partial
+
+import numpy as np
+import torch
 
-from ...util import append_zero
 from ...modules.diffusionmodules.util import make_beta_schedule
+from ...util import append_zero
 
 
 def generate_roughly_equally_spaced_steps(
diff --git a/sgm/modules/diffusionmodules/loss.py b/sgm/modules/diffusionmodules/loss.py
index 9dc986740..fcd10865b 100644
--- a/sgm/modules/diffusionmodules/loss.py
+++ b/sgm/modules/diffusionmodules/loss.py
@@ -3,9 +3,9 @@
 import torch
 import torch.nn as nn
 from omegaconf import ListConfig
-from .lpips import LPIPS
 
 from ...util import append_dims, instantiate_from_config
+from .lpips import LPIPS
 
 
 class StandardDiffusionLoss(nn.Module):
diff --git a/sgm/modules/diffusionmodules/lpips/lpips.py b/sgm/modules/diffusionmodules/lpips/lpips.py
index 3012914c7..2a35f92b0 100644
--- a/sgm/modules/diffusionmodules/lpips/lpips.py
+++ b/sgm/modules/diffusionmodules/lpips/lpips.py
@@ -1,9 +1,10 @@
 """Stripped version of https://github.com/richzhang/PerceptualSimilarity/tree/master/models"""
 
+from collections import namedtuple
+
 import torch
 import torch.nn as nn
 from torchvision import models
-from collections import namedtuple
 
 from .util import get_ckpt_path
 
@@ -26,7 +27,9 @@ def __init__(self, use_dropout=True):
 
     def load_from_pretrained(self, name="vgg_lpips"):
         ckpt = get_ckpt_path(name, "taming/modules/autoencoder/lpips")
-        self.load_state_dict(torch.load(ckpt, map_location=torch.device("cpu")), strict=False)
+        self.load_state_dict(
+            torch.load(ckpt, map_location=torch.device("cpu")), strict=False
+        )
         print("loaded pretrained LPIPS loss from {}".format(ckpt))
 
     @classmethod
@@ -35,7 +38,9 @@ def from_pretrained(cls, name="vgg_lpips"):
             raise NotImplementedError
         model = cls()
         ckpt = get_ckpt_path(name)
-        model.load_state_dict(torch.load(ckpt, map_location=torch.device("cpu")), strict=False)
+        model.load_state_dict(
+            torch.load(ckpt, map_location=torch.device("cpu")), strict=False
+        )
         return model
 
     def forward(self, input, target):
@@ -44,10 +49,15 @@ def forward(self, input, target):
         feats0, feats1, diffs = {}, {}, {}
         lins = [self.lin0, self.lin1, self.lin2, self.lin3, self.lin4]
         for kk in range(len(self.chns)):
-            feats0[kk], feats1[kk] = normalize_tensor(outs0[kk]), normalize_tensor(outs1[kk])
+            feats0[kk], feats1[kk] = normalize_tensor(outs0[kk]), normalize_tensor(
+                outs1[kk]
+            )
             diffs[kk] = (feats0[kk] - feats1[kk]) ** 2
 
-        res = [spatial_average(lins[kk].model(diffs[kk]), keepdim=True) for kk in range(len(self.chns))]
+        res = [
+            spatial_average(lins[kk].model(diffs[kk]), keepdim=True)
+            for kk in range(len(self.chns))
+        ]
         val = res[0]
         for l in range(1, len(self.chns)):
             val += res[l]
@@ -57,19 +67,32 @@ def forward(self, input, target):
 class ScalingLayer(nn.Module):
     def __init__(self):
         super(ScalingLayer, self).__init__()
-        self.register_buffer('shift', torch.Tensor([-.030, -.088, -.188])[None, :, None, None])
-        self.register_buffer('scale', torch.Tensor([.458, .448, .450])[None, :, None, None])
+        self.register_buffer(
+            "shift", torch.Tensor([-0.030, -0.088, -0.188])[None, :, None, None]
+        )
+        self.register_buffer(
+            "scale", torch.Tensor([0.458, 0.448, 0.450])[None, :, None, None]
+        )
 
     def forward(self, inp):
         return (inp - self.shift) / self.scale
 
 
 class NetLinLayer(nn.Module):
-    """ A single linear layer which does a 1x1 conv """
+    """A single linear layer which does a 1x1 conv"""
+
     def __init__(self, chn_in, chn_out=1, use_dropout=False):
         super(NetLinLayer, self).__init__()
-        layers = [nn.Dropout(), ] if (use_dropout) else []
-        layers += [nn.Conv2d(chn_in, chn_out, 1, stride=1, padding=0, bias=False), ]
+        layers = (
+            [
+                nn.Dropout(),
+            ]
+            if (use_dropout)
+            else []
+        )
+        layers += [
+            nn.Conv2d(chn_in, chn_out, 1, stride=1, padding=0, bias=False),
+        ]
         self.model = nn.Sequential(*layers)
 
 
@@ -108,16 +131,17 @@ def forward(self, X):
         h_relu4_3 = h
         h = self.slice5(h)
         h_relu5_3 = h
-        vgg_outputs = namedtuple("VggOutputs", ['relu1_2', 'relu2_2', 'relu3_3', 'relu4_3', 'relu5_3'])
+        vgg_outputs = namedtuple(
+            "VggOutputs", ["relu1_2", "relu2_2", "relu3_3", "relu4_3", "relu5_3"]
+        )
         out = vgg_outputs(h_relu1_2, h_relu2_2, h_relu3_3, h_relu4_3, h_relu5_3)
         return out
 
 
-def normalize_tensor(x,eps=1e-10):
-    norm_factor = torch.sqrt(torch.sum(x**2,dim=1,keepdim=True))
-    return x/(norm_factor+eps)
+def normalize_tensor(x, eps=1e-10):
+    norm_factor = torch.sqrt(torch.sum(x**2, dim=1, keepdim=True))
+    return x / (norm_factor + eps)
 
 
 def spatial_average(x, keepdim=True):
-    return x.mean([2,3],keepdim=keepdim)
-
+    return x.mean([2, 3], keepdim=keepdim)
diff --git a/sgm/modules/diffusionmodules/lpips/model.py b/sgm/modules/diffusionmodules/lpips/model.py
index caafbbb0f..43e64ea7a 100644
--- a/sgm/modules/diffusionmodules/lpips/model.py
+++ b/sgm/modules/diffusionmodules/lpips/model.py
@@ -1,21 +1,24 @@
 import functools
+
 import torch.nn as nn
 
 from .util import ActNorm
 
+
 def weights_init(m):
     classname = m.__class__.__name__
-    if classname.find('Conv') != -1:
+    if classname.find("Conv") != -1:
         nn.init.normal_(m.weight.data, 0.0, 0.02)
-    elif classname.find('BatchNorm') != -1:
+    elif classname.find("BatchNorm") != -1:
         nn.init.normal_(m.weight.data, 1.0, 0.02)
         nn.init.constant_(m.bias.data, 0)
 
 
 class NLayerDiscriminator(nn.Module):
     """Defines a PatchGAN discriminator as in Pix2Pix
-        --> see https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix/blob/master/models/networks.py
+    --> see https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix/blob/master/models/networks.py
     """
+
     def __init__(self, input_nc=3, ndf=64, n_layers=3, use_actnorm=False):
         """Construct a PatchGAN discriminator
         Parameters:
@@ -29,35 +32,55 @@ def __init__(self, input_nc=3, ndf=64, n_layers=3, use_actnorm=False):
             norm_layer = nn.BatchNorm2d
         else:
             norm_layer = ActNorm
-        if type(norm_layer) == functools.partial:  # no need to use bias as BatchNorm2d has affine parameters
+        if (
+            type(norm_layer) == functools.partial
+        ):  # no need to use bias as BatchNorm2d has affine parameters
             use_bias = norm_layer.func != nn.BatchNorm2d
         else:
             use_bias = norm_layer != nn.BatchNorm2d
 
         kw = 4
         padw = 1
-        sequence = [nn.Conv2d(input_nc, ndf, kernel_size=kw, stride=2, padding=padw), nn.LeakyReLU(0.2, True)]
+        sequence = [
+            nn.Conv2d(input_nc, ndf, kernel_size=kw, stride=2, padding=padw),
+            nn.LeakyReLU(0.2, True),
+        ]
         nf_mult = 1
         nf_mult_prev = 1
         for n in range(1, n_layers):  # gradually increase the number of filters
             nf_mult_prev = nf_mult
-            nf_mult = min(2 ** n, 8)
+            nf_mult = min(2**n, 8)
             sequence += [
-                nn.Conv2d(ndf * nf_mult_prev, ndf * nf_mult, kernel_size=kw, stride=2, padding=padw, bias=use_bias),
+                nn.Conv2d(
+                    ndf * nf_mult_prev,
+                    ndf * nf_mult,
+                    kernel_size=kw,
+                    stride=2,
+                    padding=padw,
+                    bias=use_bias,
+                ),
                 norm_layer(ndf * nf_mult),
-                nn.LeakyReLU(0.2, True)
+                nn.LeakyReLU(0.2, True),
             ]
 
         nf_mult_prev = nf_mult
-        nf_mult = min(2 ** n_layers, 8)
+        nf_mult = min(2**n_layers, 8)
         sequence += [
-            nn.Conv2d(ndf * nf_mult_prev, ndf * nf_mult, kernel_size=kw, stride=1, padding=padw, bias=use_bias),
+            nn.Conv2d(
+                ndf * nf_mult_prev,
+                ndf * nf_mult,
+                kernel_size=kw,
+                stride=1,
+                padding=padw,
+                bias=use_bias,
+            ),
             norm_layer(ndf * nf_mult),
-            nn.LeakyReLU(0.2, True)
+            nn.LeakyReLU(0.2, True),
         ]
 
         sequence += [
-            nn.Conv2d(ndf * nf_mult, 1, kernel_size=kw, stride=1, padding=padw)]  # output 1 channel prediction map
+            nn.Conv2d(ndf * nf_mult, 1, kernel_size=kw, stride=1, padding=padw)
+        ]  # output 1 channel prediction map
         self.main = nn.Sequential(*sequence)
 
     def forward(self, input):
diff --git a/sgm/modules/diffusionmodules/lpips/util.py b/sgm/modules/diffusionmodules/lpips/util.py
index d52110572..49c76e370 100644
--- a/sgm/modules/diffusionmodules/lpips/util.py
+++ b/sgm/modules/diffusionmodules/lpips/util.py
@@ -1,21 +1,16 @@
-import torch
-import torch.nn as nn
+import hashlib
+import os
 
-import os, hashlib
 import requests
+import torch
+import torch.nn as nn
 from tqdm import tqdm
 
-URL_MAP = {
-    "vgg_lpips": "https://heibox.uni-heidelberg.de/f/607503859c864bc1b30b/?dl=1"
-}
+URL_MAP = {"vgg_lpips": "https://heibox.uni-heidelberg.de/f/607503859c864bc1b30b/?dl=1"}
 
-CKPT_MAP = {
-    "vgg_lpips": "vgg.pth"
-}
+CKPT_MAP = {"vgg_lpips": "vgg.pth"}
 
-MD5_MAP = {
-    "vgg_lpips": "d507d7349b931f0638a25a48a722f98a"
-}
+MD5_MAP = {"vgg_lpips": "d507d7349b931f0638a25a48a722f98a"}
 
 
 def download(url, local_path, chunk_size=1024):
@@ -48,8 +43,9 @@ def get_ckpt_path(name, root, check=False):
 
 
 class ActNorm(nn.Module):
-    def __init__(self, num_features, logdet=False, affine=True,
-                 allow_reverse_init=False):
+    def __init__(
+        self, num_features, logdet=False, affine=True, allow_reverse_init=False
+    ):
         assert affine
         super().__init__()
         self.logdet = logdet
@@ -57,7 +53,7 @@ def __init__(self, num_features, logdet=False, affine=True,
         self.scale = nn.Parameter(torch.ones(1, num_features, 1, 1))
         self.allow_reverse_init = allow_reverse_init
 
-        self.register_buffer('initialized', torch.tensor(0, dtype=torch.uint8))
+        self.register_buffer("initialized", torch.tensor(0, dtype=torch.uint8))
 
     def initialize(self, input):
         with torch.no_grad():
@@ -84,7 +80,7 @@ def forward(self, input, reverse=False):
         if reverse:
             return self.reverse(input)
         if len(input.shape) == 2:
-            input = input[:,:,None,None]
+            input = input[:, :, None, None]
             squeeze = True
         else:
             squeeze = False
@@ -102,7 +98,7 @@ def forward(self, input, reverse=False):
 
         if self.logdet:
             log_abs = torch.log(torch.abs(self.scale))
-            logdet = height*width*torch.sum(log_abs)
+            logdet = height * width * torch.sum(log_abs)
             logdet = logdet * torch.ones(input.shape[0]).to(input)
             return h, logdet
 
@@ -120,7 +116,7 @@ def reverse(self, output):
                 self.initialized.fill_(1)
 
         if len(output.shape) == 2:
-            output = output[:,:,None,None]
+            output = output[:, :, None, None]
             squeeze = True
         else:
             squeeze = False
@@ -129,4 +125,4 @@ def reverse(self, output):
 
         if squeeze:
             h = h.squeeze(-1).squeeze(-1)
-        return h
\ No newline at end of file
+        return h
diff --git a/sgm/modules/diffusionmodules/lpips/vqperceptual.py b/sgm/modules/diffusionmodules/lpips/vqperceptual.py
index 39dc35784..90a3323f2 100644
--- a/sgm/modules/diffusionmodules/lpips/vqperceptual.py
+++ b/sgm/modules/diffusionmodules/lpips/vqperceptual.py
@@ -11,31 +11,43 @@ def __init__(self):
         super().__init__()
 
 
-def adopt_weight(weight, global_step, threshold=0, value=0.):
+def adopt_weight(weight, global_step, threshold=0, value=0.0):
     if global_step < threshold:
         weight = value
     return weight
 
 
 def hinge_d_loss(logits_real, logits_fake):
-    loss_real = torch.mean(F.relu(1. - logits_real))
-    loss_fake = torch.mean(F.relu(1. + logits_fake))
+    loss_real = torch.mean(F.relu(1.0 - logits_real))
+    loss_fake = torch.mean(F.relu(1.0 + logits_fake))
     d_loss = 0.5 * (loss_real + loss_fake)
     return d_loss
 
 
 def vanilla_d_loss(logits_real, logits_fake):
     d_loss = 0.5 * (
-        torch.mean(torch.nn.functional.softplus(-logits_real)) +
-        torch.mean(torch.nn.functional.softplus(logits_fake)))
+        torch.mean(torch.nn.functional.softplus(-logits_real))
+        + torch.mean(torch.nn.functional.softplus(logits_fake))
+    )
     return d_loss
 
 
 class VQLPIPSWithDiscriminator(nn.Module):
-    def __init__(self, disc_start, codebook_weight=1.0, pixelloss_weight=1.0,
-                 disc_num_layers=3, disc_in_channels=3, disc_factor=1.0, disc_weight=1.0,
-                 perceptual_weight=1.0, use_actnorm=False, disc_conditional=False,
-                 disc_ndf=64, disc_loss="hinge"):
+    def __init__(
+        self,
+        disc_start,
+        codebook_weight=1.0,
+        pixelloss_weight=1.0,
+        disc_num_layers=3,
+        disc_in_channels=3,
+        disc_factor=1.0,
+        disc_weight=1.0,
+        perceptual_weight=1.0,
+        use_actnorm=False,
+        disc_conditional=False,
+        disc_ndf=64,
+        disc_loss="hinge",
+    ):
         super().__init__()
         assert disc_loss in ["hinge", "vanilla"]
         self.codebook_weight = codebook_weight
@@ -43,11 +55,12 @@ def __init__(self, disc_start, codebook_weight=1.0, pixelloss_weight=1.0,
         self.perceptual_loss = LPIPS().eval()
         self.perceptual_weight = perceptual_weight
 
-        self.discriminator = NLayerDiscriminator(input_nc=disc_in_channels,
-                                                 n_layers=disc_num_layers,
-                                                 use_actnorm=use_actnorm,
-                                                 ndf=disc_ndf
-                                                 ).apply(weights_init)
+        self.discriminator = NLayerDiscriminator(
+            input_nc=disc_in_channels,
+            n_layers=disc_num_layers,
+            use_actnorm=use_actnorm,
+            ndf=disc_ndf,
+        ).apply(weights_init)
         self.discriminator_iter_start = disc_start
         if disc_loss == "hinge":
             self.disc_loss = hinge_d_loss
@@ -65,25 +78,40 @@ def calculate_adaptive_weight(self, nll_loss, g_loss, last_layer=None):
             nll_grads = torch.autograd.grad(nll_loss, last_layer, retain_graph=True)[0]
             g_grads = torch.autograd.grad(g_loss, last_layer, retain_graph=True)[0]
         else:
-            nll_grads = torch.autograd.grad(nll_loss, self.last_layer[0], retain_graph=True)[0]
-            g_grads = torch.autograd.grad(g_loss, self.last_layer[0], retain_graph=True)[0]
+            nll_grads = torch.autograd.grad(
+                nll_loss, self.last_layer[0], retain_graph=True
+            )[0]
+            g_grads = torch.autograd.grad(
+                g_loss, self.last_layer[0], retain_graph=True
+            )[0]
 
         d_weight = torch.norm(nll_grads) / (torch.norm(g_grads) + 1e-4)
         d_weight = torch.clamp(d_weight, 0.0, 1e4).detach()
         d_weight = d_weight * self.discriminator_weight
         return d_weight
 
-    def forward(self, codebook_loss, inputs, reconstructions, optimizer_idx,
-                global_step, last_layer=None, cond=None, split="train"):
+    def forward(
+        self,
+        codebook_loss,
+        inputs,
+        reconstructions,
+        optimizer_idx,
+        global_step,
+        last_layer=None,
+        cond=None,
+        split="train",
+    ):
         rec_loss = torch.abs(inputs.contiguous() - reconstructions.contiguous())
         if self.perceptual_weight > 0:
-            p_loss = self.perceptual_loss(inputs.contiguous(), reconstructions.contiguous())
+            p_loss = self.perceptual_loss(
+                inputs.contiguous(), reconstructions.contiguous()
+            )
             rec_loss = rec_loss + self.perceptual_weight * p_loss
         else:
             p_loss = torch.tensor([0.0])
 
         nll_loss = rec_loss
-        #nll_loss = torch.sum(nll_loss) / nll_loss.shape[0]
+        # nll_loss = torch.sum(nll_loss) / nll_loss.shape[0]
         nll_loss = torch.mean(nll_loss)
 
         # now the GAN part
@@ -94,27 +122,38 @@ def forward(self, codebook_loss, inputs, reconstructions, optimizer_idx,
                 logits_fake = self.discriminator(reconstructions.contiguous())
             else:
                 assert self.disc_conditional
-                logits_fake = self.discriminator(torch.cat((reconstructions.contiguous(), cond), dim=1))
+                logits_fake = self.discriminator(
+                    torch.cat((reconstructions.contiguous(), cond), dim=1)
+                )
             g_loss = -torch.mean(logits_fake)
 
             try:
-                d_weight = self.calculate_adaptive_weight(nll_loss, g_loss, last_layer=last_layer)
+                d_weight = self.calculate_adaptive_weight(
+                    nll_loss, g_loss, last_layer=last_layer
+                )
             except RuntimeError:
                 assert not self.training
                 d_weight = torch.tensor(0.0)
 
-            disc_factor = adopt_weight(self.disc_factor, global_step, threshold=self.discriminator_iter_start)
-            loss = nll_loss + d_weight * disc_factor * g_loss + self.codebook_weight * codebook_loss.mean()
-
-            log = {"{}/total_loss".format(split): loss.clone().detach().mean(),
-                   "{}/quant_loss".format(split): codebook_loss.detach().mean(),
-                   "{}/nll_loss".format(split): nll_loss.detach().mean(),
-                   "{}/rec_loss".format(split): rec_loss.detach().mean(),
-                   "{}/p_loss".format(split): p_loss.detach().mean(),
-                   "{}/d_weight".format(split): d_weight.detach(),
-                   "{}/disc_factor".format(split): torch.tensor(disc_factor),
-                   "{}/g_loss".format(split): g_loss.detach().mean(),
-                   }
+            disc_factor = adopt_weight(
+                self.disc_factor, global_step, threshold=self.discriminator_iter_start
+            )
+            loss = (
+                nll_loss
+                + d_weight * disc_factor * g_loss
+                + self.codebook_weight * codebook_loss.mean()
+            )
+
+            log = {
+                "{}/total_loss".format(split): loss.clone().detach().mean(),
+                "{}/quant_loss".format(split): codebook_loss.detach().mean(),
+                "{}/nll_loss".format(split): nll_loss.detach().mean(),
+                "{}/rec_loss".format(split): rec_loss.detach().mean(),
+                "{}/p_loss".format(split): p_loss.detach().mean(),
+                "{}/d_weight".format(split): d_weight.detach(),
+                "{}/disc_factor".format(split): torch.tensor(disc_factor),
+                "{}/g_loss".format(split): g_loss.detach().mean(),
+            }
             return loss, log
 
         if optimizer_idx == 1:
@@ -123,14 +162,21 @@ def forward(self, codebook_loss, inputs, reconstructions, optimizer_idx,
                 logits_real = self.discriminator(inputs.contiguous().detach())
                 logits_fake = self.discriminator(reconstructions.contiguous().detach())
             else:
-                logits_real = self.discriminator(torch.cat((inputs.contiguous().detach(), cond), dim=1))
-                logits_fake = self.discriminator(torch.cat((reconstructions.contiguous().detach(), cond), dim=1))
-
-            disc_factor = adopt_weight(self.disc_factor, global_step, threshold=self.discriminator_iter_start)
+                logits_real = self.discriminator(
+                    torch.cat((inputs.contiguous().detach(), cond), dim=1)
+                )
+                logits_fake = self.discriminator(
+                    torch.cat((reconstructions.contiguous().detach(), cond), dim=1)
+                )
+
+            disc_factor = adopt_weight(
+                self.disc_factor, global_step, threshold=self.discriminator_iter_start
+            )
             d_loss = disc_factor * self.disc_loss(logits_real, logits_fake)
 
-            log = {"{}/disc_loss".format(split): d_loss.clone().detach().mean(),
-                   "{}/logits_real".format(split): logits_real.detach().mean(),
-                   "{}/logits_fake".format(split): logits_fake.detach().mean()
-                   }
+            log = {
+                "{}/disc_loss".format(split): d_loss.clone().detach().mean(),
+                "{}/logits_real".format(split): logits_real.detach().mean(),
+                "{}/logits_fake".format(split): logits_fake.detach().mean(),
+            }
             return d_loss, log
diff --git a/sgm/modules/diffusionmodules/wrappers.py b/sgm/modules/diffusionmodules/wrappers.py
index 87ede6061..37449ea63 100644
--- a/sgm/modules/diffusionmodules/wrappers.py
+++ b/sgm/modules/diffusionmodules/wrappers.py
@@ -30,5 +30,5 @@ def forward(
             timesteps=t,
             context=c.get("crossattn", None),
             y=c.get("vector", None),
-            **kwargs
+            **kwargs,
         )
diff --git a/sgm/modules/distributions/distributions.py b/sgm/modules/distributions/distributions.py
index 0b61f0307..016be3552 100644
--- a/sgm/modules/distributions/distributions.py
+++ b/sgm/modules/distributions/distributions.py
@@ -1,5 +1,5 @@
-import torch
 import numpy as np
+import torch
 
 
 class AbstractDistribution:

From c74373109df0a443c15f4cb83bee82463f8e083f Mon Sep 17 00:00:00 2001
From: benjaminaubin <aubin.benjamin@gmail.com>
Date: Tue, 25 Jul 2023 14:14:08 +0000
Subject: [PATCH 04/11] mv lipips, license

---
 sgm/modules/autoencoding/losses/__init__.py   |  6 +-
 .../lpips/__init__.py                         |  0
 sgm/modules/autoencoding/lpips/loss/LICENSE   | 23 ++++++++
 .../autoencoding/lpips/loss/__init__.py       |  0
 .../lpips/loss}/lpips.py                      |  2 +-
 sgm/modules/autoencoding/lpips/model/LICENSE  | 58 +++++++++++++++++++
 .../autoencoding/lpips/model/__init__.py      |  0
 .../lpips/model}/model.py                     |  2 +-
 .../lpips/util.py                             |  0
 .../lpips/vqperceptual.py                     |  4 +-
 10 files changed, 88 insertions(+), 7 deletions(-)
 rename sgm/modules/{diffusionmodules => autoencoding}/lpips/__init__.py (100%)
 create mode 100644 sgm/modules/autoencoding/lpips/loss/LICENSE
 create mode 100644 sgm/modules/autoencoding/lpips/loss/__init__.py
 rename sgm/modules/{diffusionmodules/lpips => autoencoding/lpips/loss}/lpips.py (99%)
 create mode 100644 sgm/modules/autoencoding/lpips/model/LICENSE
 create mode 100644 sgm/modules/autoencoding/lpips/model/__init__.py
 rename sgm/modules/{diffusionmodules/lpips => autoencoding/lpips/model}/model.py (99%)
 rename sgm/modules/{diffusionmodules => autoencoding}/lpips/util.py (100%)
 rename sgm/modules/{diffusionmodules => autoencoding}/lpips/vqperceptual.py (98%)

diff --git a/sgm/modules/autoencoding/losses/__init__.py b/sgm/modules/autoencoding/losses/__init__.py
index 1de0d60ef..cc1bdb491 100644
--- a/sgm/modules/autoencoding/losses/__init__.py
+++ b/sgm/modules/autoencoding/losses/__init__.py
@@ -5,9 +5,9 @@
 from einops import rearrange
 
 from ....util import default, instantiate_from_config
-from ...diffusionmodules.lpips.lpips import LPIPS
-from ...diffusionmodules.lpips.model import NLayerDiscriminator, weights_init
-from ...diffusionmodules.lpips.vqperceptual import hinge_d_loss, vanilla_d_loss
+from ..lpips.loss.lpips import LPIPS
+from ..lpips.model.model import NLayerDiscriminator, weights_init
+from ..lpips.vqperceptual import hinge_d_loss, vanilla_d_loss
 
 
 def adopt_weight(weight, global_step, threshold=0, value=0.0):
diff --git a/sgm/modules/diffusionmodules/lpips/__init__.py b/sgm/modules/autoencoding/lpips/__init__.py
similarity index 100%
rename from sgm/modules/diffusionmodules/lpips/__init__.py
rename to sgm/modules/autoencoding/lpips/__init__.py
diff --git a/sgm/modules/autoencoding/lpips/loss/LICENSE b/sgm/modules/autoencoding/lpips/loss/LICENSE
new file mode 100644
index 000000000..924cfc85b
--- /dev/null
+++ b/sgm/modules/autoencoding/lpips/loss/LICENSE
@@ -0,0 +1,23 @@
+Copyright (c) 2018, Richard Zhang, Phillip Isola, Alexei A. Efros, Eli Shechtman, Oliver Wang
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright notice, this
+  list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright notice,
+  this list of conditions and the following disclaimer in the documentation
+  and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
\ No newline at end of file
diff --git a/sgm/modules/autoencoding/lpips/loss/__init__.py b/sgm/modules/autoencoding/lpips/loss/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/sgm/modules/diffusionmodules/lpips/lpips.py b/sgm/modules/autoencoding/lpips/loss/lpips.py
similarity index 99%
rename from sgm/modules/diffusionmodules/lpips/lpips.py
rename to sgm/modules/autoencoding/lpips/loss/lpips.py
index 2a35f92b0..d4922258d 100644
--- a/sgm/modules/diffusionmodules/lpips/lpips.py
+++ b/sgm/modules/autoencoding/lpips/loss/lpips.py
@@ -6,7 +6,7 @@
 import torch.nn as nn
 from torchvision import models
 
-from .util import get_ckpt_path
+from ..util import get_ckpt_path
 
 
 class LPIPS(nn.Module):
diff --git a/sgm/modules/autoencoding/lpips/model/LICENSE b/sgm/modules/autoencoding/lpips/model/LICENSE
new file mode 100644
index 000000000..4b356e66b
--- /dev/null
+++ b/sgm/modules/autoencoding/lpips/model/LICENSE
@@ -0,0 +1,58 @@
+Copyright (c) 2017, Jun-Yan Zhu and Taesung Park
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright notice, this
+  list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright notice,
+  this list of conditions and the following disclaimer in the documentation
+  and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+--------------------------- LICENSE FOR pix2pix --------------------------------
+BSD License
+
+For pix2pix software
+Copyright (c) 2016, Phillip Isola and Jun-Yan Zhu
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright notice, this
+  list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright notice,
+  this list of conditions and the following disclaimer in the documentation
+  and/or other materials provided with the distribution.
+
+----------------------------- LICENSE FOR DCGAN --------------------------------
+BSD License
+
+For dcgan.torch software
+
+Copyright (c) 2015, Facebook, Inc. All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
+
+Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
+
+Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
+
+Neither the name Facebook nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
\ No newline at end of file
diff --git a/sgm/modules/autoencoding/lpips/model/__init__.py b/sgm/modules/autoencoding/lpips/model/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/sgm/modules/diffusionmodules/lpips/model.py b/sgm/modules/autoencoding/lpips/model/model.py
similarity index 99%
rename from sgm/modules/diffusionmodules/lpips/model.py
rename to sgm/modules/autoencoding/lpips/model/model.py
index 43e64ea7a..66357d4e6 100644
--- a/sgm/modules/diffusionmodules/lpips/model.py
+++ b/sgm/modules/autoencoding/lpips/model/model.py
@@ -2,7 +2,7 @@
 
 import torch.nn as nn
 
-from .util import ActNorm
+from ..util import ActNorm
 
 
 def weights_init(m):
diff --git a/sgm/modules/diffusionmodules/lpips/util.py b/sgm/modules/autoencoding/lpips/util.py
similarity index 100%
rename from sgm/modules/diffusionmodules/lpips/util.py
rename to sgm/modules/autoencoding/lpips/util.py
diff --git a/sgm/modules/diffusionmodules/lpips/vqperceptual.py b/sgm/modules/autoencoding/lpips/vqperceptual.py
similarity index 98%
rename from sgm/modules/diffusionmodules/lpips/vqperceptual.py
rename to sgm/modules/autoencoding/lpips/vqperceptual.py
index 90a3323f2..2698648a7 100644
--- a/sgm/modules/diffusionmodules/lpips/vqperceptual.py
+++ b/sgm/modules/autoencoding/lpips/vqperceptual.py
@@ -2,8 +2,8 @@
 import torch.nn as nn
 import torch.nn.functional as F
 
-from .lpips import LPIPS
-from .model import NLayerDiscriminator, weights_init
+from .loss.lpips import LPIPS
+from .model.model import NLayerDiscriminator, weights_init
 
 
 class DummyLoss(nn.Module):

From 04e76dfb45fe9103f2e0dacf30aa6632fa9e640f Mon Sep 17 00:00:00 2001
From: benjaminaubin <aubin.benjamin@gmail.com>
Date: Tue, 25 Jul 2023 14:33:45 +0000
Subject: [PATCH 05/11] clean vq, fix path

---
 .../autoencoding/lpips/vqperceptual.py        | 165 ------------------
 sgm/modules/diffusionmodules/loss.py          |   2 +-
 2 files changed, 1 insertion(+), 166 deletions(-)

diff --git a/sgm/modules/autoencoding/lpips/vqperceptual.py b/sgm/modules/autoencoding/lpips/vqperceptual.py
index 2698648a7..6195f0a6e 100644
--- a/sgm/modules/autoencoding/lpips/vqperceptual.py
+++ b/sgm/modules/autoencoding/lpips/vqperceptual.py
@@ -1,21 +1,6 @@
 import torch
-import torch.nn as nn
 import torch.nn.functional as F
 
-from .loss.lpips import LPIPS
-from .model.model import NLayerDiscriminator, weights_init
-
-
-class DummyLoss(nn.Module):
-    def __init__(self):
-        super().__init__()
-
-
-def adopt_weight(weight, global_step, threshold=0, value=0.0):
-    if global_step < threshold:
-        weight = value
-    return weight
-
 
 def hinge_d_loss(logits_real, logits_fake):
     loss_real = torch.mean(F.relu(1.0 - logits_real))
@@ -30,153 +15,3 @@ def vanilla_d_loss(logits_real, logits_fake):
         + torch.mean(torch.nn.functional.softplus(logits_fake))
     )
     return d_loss
-
-
-class VQLPIPSWithDiscriminator(nn.Module):
-    def __init__(
-        self,
-        disc_start,
-        codebook_weight=1.0,
-        pixelloss_weight=1.0,
-        disc_num_layers=3,
-        disc_in_channels=3,
-        disc_factor=1.0,
-        disc_weight=1.0,
-        perceptual_weight=1.0,
-        use_actnorm=False,
-        disc_conditional=False,
-        disc_ndf=64,
-        disc_loss="hinge",
-    ):
-        super().__init__()
-        assert disc_loss in ["hinge", "vanilla"]
-        self.codebook_weight = codebook_weight
-        self.pixel_weight = pixelloss_weight
-        self.perceptual_loss = LPIPS().eval()
-        self.perceptual_weight = perceptual_weight
-
-        self.discriminator = NLayerDiscriminator(
-            input_nc=disc_in_channels,
-            n_layers=disc_num_layers,
-            use_actnorm=use_actnorm,
-            ndf=disc_ndf,
-        ).apply(weights_init)
-        self.discriminator_iter_start = disc_start
-        if disc_loss == "hinge":
-            self.disc_loss = hinge_d_loss
-        elif disc_loss == "vanilla":
-            self.disc_loss = vanilla_d_loss
-        else:
-            raise ValueError(f"Unknown GAN loss '{disc_loss}'.")
-        print(f"VQLPIPSWithDiscriminator running with {disc_loss} loss.")
-        self.disc_factor = disc_factor
-        self.discriminator_weight = disc_weight
-        self.disc_conditional = disc_conditional
-
-    def calculate_adaptive_weight(self, nll_loss, g_loss, last_layer=None):
-        if last_layer is not None:
-            nll_grads = torch.autograd.grad(nll_loss, last_layer, retain_graph=True)[0]
-            g_grads = torch.autograd.grad(g_loss, last_layer, retain_graph=True)[0]
-        else:
-            nll_grads = torch.autograd.grad(
-                nll_loss, self.last_layer[0], retain_graph=True
-            )[0]
-            g_grads = torch.autograd.grad(
-                g_loss, self.last_layer[0], retain_graph=True
-            )[0]
-
-        d_weight = torch.norm(nll_grads) / (torch.norm(g_grads) + 1e-4)
-        d_weight = torch.clamp(d_weight, 0.0, 1e4).detach()
-        d_weight = d_weight * self.discriminator_weight
-        return d_weight
-
-    def forward(
-        self,
-        codebook_loss,
-        inputs,
-        reconstructions,
-        optimizer_idx,
-        global_step,
-        last_layer=None,
-        cond=None,
-        split="train",
-    ):
-        rec_loss = torch.abs(inputs.contiguous() - reconstructions.contiguous())
-        if self.perceptual_weight > 0:
-            p_loss = self.perceptual_loss(
-                inputs.contiguous(), reconstructions.contiguous()
-            )
-            rec_loss = rec_loss + self.perceptual_weight * p_loss
-        else:
-            p_loss = torch.tensor([0.0])
-
-        nll_loss = rec_loss
-        # nll_loss = torch.sum(nll_loss) / nll_loss.shape[0]
-        nll_loss = torch.mean(nll_loss)
-
-        # now the GAN part
-        if optimizer_idx == 0:
-            # generator update
-            if cond is None:
-                assert not self.disc_conditional
-                logits_fake = self.discriminator(reconstructions.contiguous())
-            else:
-                assert self.disc_conditional
-                logits_fake = self.discriminator(
-                    torch.cat((reconstructions.contiguous(), cond), dim=1)
-                )
-            g_loss = -torch.mean(logits_fake)
-
-            try:
-                d_weight = self.calculate_adaptive_weight(
-                    nll_loss, g_loss, last_layer=last_layer
-                )
-            except RuntimeError:
-                assert not self.training
-                d_weight = torch.tensor(0.0)
-
-            disc_factor = adopt_weight(
-                self.disc_factor, global_step, threshold=self.discriminator_iter_start
-            )
-            loss = (
-                nll_loss
-                + d_weight * disc_factor * g_loss
-                + self.codebook_weight * codebook_loss.mean()
-            )
-
-            log = {
-                "{}/total_loss".format(split): loss.clone().detach().mean(),
-                "{}/quant_loss".format(split): codebook_loss.detach().mean(),
-                "{}/nll_loss".format(split): nll_loss.detach().mean(),
-                "{}/rec_loss".format(split): rec_loss.detach().mean(),
-                "{}/p_loss".format(split): p_loss.detach().mean(),
-                "{}/d_weight".format(split): d_weight.detach(),
-                "{}/disc_factor".format(split): torch.tensor(disc_factor),
-                "{}/g_loss".format(split): g_loss.detach().mean(),
-            }
-            return loss, log
-
-        if optimizer_idx == 1:
-            # second pass for discriminator update
-            if cond is None:
-                logits_real = self.discriminator(inputs.contiguous().detach())
-                logits_fake = self.discriminator(reconstructions.contiguous().detach())
-            else:
-                logits_real = self.discriminator(
-                    torch.cat((inputs.contiguous().detach(), cond), dim=1)
-                )
-                logits_fake = self.discriminator(
-                    torch.cat((reconstructions.contiguous().detach(), cond), dim=1)
-                )
-
-            disc_factor = adopt_weight(
-                self.disc_factor, global_step, threshold=self.discriminator_iter_start
-            )
-            d_loss = disc_factor * self.disc_loss(logits_real, logits_fake)
-
-            log = {
-                "{}/disc_loss".format(split): d_loss.clone().detach().mean(),
-                "{}/logits_real".format(split): logits_real.detach().mean(),
-                "{}/logits_fake".format(split): logits_fake.detach().mean(),
-            }
-            return d_loss, log
diff --git a/sgm/modules/diffusionmodules/loss.py b/sgm/modules/diffusionmodules/loss.py
index fcd10865b..508230c9f 100644
--- a/sgm/modules/diffusionmodules/loss.py
+++ b/sgm/modules/diffusionmodules/loss.py
@@ -5,7 +5,7 @@
 from omegaconf import ListConfig
 
 from ...util import append_dims, instantiate_from_config
-from .lpips import LPIPS
+from ...modules.autoencoding.lpips.loss.lpips import LPIPS
 
 
 class StandardDiffusionLoss(nn.Module):

From 1814ad246def3923f74c30b81a5611be5c4100a1 Mon Sep 17 00:00:00 2001
From: benjaminaubin <aubin.benjamin@gmail.com>
Date: Tue, 25 Jul 2023 15:13:03 +0000
Subject: [PATCH 06/11] fix loss path, gitignore

---
 sgm/modules/autoencoding/lpips/loss/.gitignore | 1 +
 sgm/modules/autoencoding/lpips/loss/lpips.py   | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)
 create mode 100644 sgm/modules/autoencoding/lpips/loss/.gitignore

diff --git a/sgm/modules/autoencoding/lpips/loss/.gitignore b/sgm/modules/autoencoding/lpips/loss/.gitignore
new file mode 100644
index 000000000..a92958a1c
--- /dev/null
+++ b/sgm/modules/autoencoding/lpips/loss/.gitignore
@@ -0,0 +1 @@
+vgg.pth
\ No newline at end of file
diff --git a/sgm/modules/autoencoding/lpips/loss/lpips.py b/sgm/modules/autoencoding/lpips/loss/lpips.py
index d4922258d..3e34f3d08 100644
--- a/sgm/modules/autoencoding/lpips/loss/lpips.py
+++ b/sgm/modules/autoencoding/lpips/loss/lpips.py
@@ -26,7 +26,7 @@ def __init__(self, use_dropout=True):
             param.requires_grad = False
 
     def load_from_pretrained(self, name="vgg_lpips"):
-        ckpt = get_ckpt_path(name, "taming/modules/autoencoder/lpips")
+        ckpt = get_ckpt_path(name, "sgm/modules/autoencoding/lpips/loss")
         self.load_state_dict(
             torch.load(ckpt, map_location=torch.device("cpu")), strict=False
         )

From 800e8d39079386cbe453bc352e05712ced0ea612 Mon Sep 17 00:00:00 2001
From: benjaminaubin <aubin.benjamin@gmail.com>
Date: Tue, 25 Jul 2023 16:40:20 +0000
Subject: [PATCH 07/11] tested requirements pt13

---
 requirements/pt13.txt | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/requirements/pt13.txt b/requirements/pt13.txt
index ce91e506d..8bae6c3bc 100644
--- a/requirements/pt13.txt
+++ b/requirements/pt13.txt
@@ -8,7 +8,7 @@ invisible-watermark>=0.2.0
 kornia==0.6.9
 matplotlib>=3.7.2
 natsort>=8.4.0
-numpy>=1.25.1
+numpy>=1.24.4
 omegaconf>=2.3.0
 onnx<=1.12.0
 open-clip-torch>=2.20.0
@@ -18,8 +18,8 @@ pillow>=9.5.0
 pudb>=2022.1.3
 pytorch-lightning==1.8.5
 pyyaml>=6.0.1
-scipy>=1.11.1
-streamlit>=0.73.1
+scipy>=1.10.1
+streamlit>=1.25.0
 tensorboardx==2.5.1
 timm>=0.9.2
 tokenizers==0.12.1
@@ -36,4 +36,4 @@ urllib3<1.27,>=1.25.4
 wandb>=0.15.6
 webdataset>=0.2.33
 wheel>=0.41.0
-xformers==0.0.16
+xformers==0.0.16
\ No newline at end of file

From 4207a3ade75e164bc96e13d0c9ffcfe4a84dbde8 Mon Sep 17 00:00:00 2001
From: benjaminaubin <aubin.benjamin@gmail.com>
Date: Wed, 26 Jul 2023 09:04:58 +0000
Subject: [PATCH 08/11] fix numpy req for python3.8, add tests

---
 .github/worflows/test-build.yaml | 26 ++++++++++++++++++++++++++
 requirements/pt2.txt             |  2 +-
 2 files changed, 27 insertions(+), 1 deletion(-)
 create mode 100644 .github/worflows/test-build.yaml

diff --git a/.github/worflows/test-build.yaml b/.github/worflows/test-build.yaml
new file mode 100644
index 000000000..ffbeff460
--- /dev/null
+++ b/.github/worflows/test-build.yaml
@@ -0,0 +1,26 @@
+name: Build package
+
+on:
+  push:
+  pull_request:
+
+jobs:
+  build:
+    name: Build
+    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        python-version: ["3.8", "3.10"]
+        requirements-file: ["pt2", "pt13"]
+    steps:
+      - uses: actions/checkout@v2
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v2
+        with:
+          python-version: ${{ matrix.python-version }}
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install -r requirements/${{ matrix.requirements-file }}.txt
+          pip install .
\ No newline at end of file
diff --git a/requirements/pt2.txt b/requirements/pt2.txt
index f41dbceb5..ce3b92510 100644
--- a/requirements/pt2.txt
+++ b/requirements/pt2.txt
@@ -10,7 +10,7 @@ kornia==0.6.9
 matplotlib>=3.7.2
 natsort>=8.4.0
 ninja>=1.11.1
-numpy>=1.25.1
+numpy>=1.24.4
 omegaconf>=2.3.0
 open-clip-torch>=2.20.0
 opencv-python==4.6.0.66

From a7d408afab28970b020c39179115ecd3d3975c9d Mon Sep 17 00:00:00 2001
From: benjaminaubin <aubin.benjamin@gmail.com>
Date: Wed, 26 Jul 2023 09:08:50 +0000
Subject: [PATCH 09/11] fix name

---
 .github/{worflows => workflows}/test-build.yaml | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename .github/{worflows => workflows}/test-build.yaml (100%)

diff --git a/.github/worflows/test-build.yaml b/.github/workflows/test-build.yaml
similarity index 100%
rename from .github/worflows/test-build.yaml
rename to .github/workflows/test-build.yaml

From 77907260fef9e1c4ee102bf9b768b4d66dba3d45 Mon Sep 17 00:00:00 2001
From: benjaminaubin <aubin.benjamin@gmail.com>
Date: Wed, 26 Jul 2023 09:10:45 +0000
Subject: [PATCH 10/11] fix dep scipy 3.8 pt2

---
 requirements/pt2.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements/pt2.txt b/requirements/pt2.txt
index ce3b92510..0ef6ed184 100644
--- a/requirements/pt2.txt
+++ b/requirements/pt2.txt
@@ -19,7 +19,7 @@ pillow>=9.5.0
 pudb>=2022.1.3
 pytorch-lightning==2.0.1
 pyyaml>=6.0.1
-scipy>=1.11.1
+scipy>=1.10.1
 streamlit>=0.73.1
 tensorboardx==2.6
 timm>=0.9.2

From 52340147602295b0453f5bcc78c9cf67724ddd1c Mon Sep 17 00:00:00 2001
From: benjaminaubin <aubin.benjamin@gmail.com>
Date: Wed, 26 Jul 2023 09:53:12 +0000
Subject: [PATCH 11/11] add black test formatter

---
 .github/workflows/black.yml | 15 +++++++++++++++
 requirements/pt13.txt       |  1 +
 requirements/pt2.txt        |  1 +
 3 files changed, 17 insertions(+)
 create mode 100644 .github/workflows/black.yml

diff --git a/.github/workflows/black.yml b/.github/workflows/black.yml
new file mode 100644
index 000000000..80823b44b
--- /dev/null
+++ b/.github/workflows/black.yml
@@ -0,0 +1,15 @@
+name: Run black
+on: [push, pull_request]
+
+jobs:
+  lint:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v3
+      - name: Install venv
+        run: |
+          sudo apt-get -y install python3.10-venv
+      - uses: psf/black@stable
+        with:
+          options: "--check --verbose -l88"
+          src: "./sgm ./scripts ./main.py"
diff --git a/requirements/pt13.txt b/requirements/pt13.txt
index 8bae6c3bc..b4f1272d8 100644
--- a/requirements/pt13.txt
+++ b/requirements/pt13.txt
@@ -1,3 +1,4 @@
+black==23.7.0
 chardet>=5.1.0
 clip @ git+https://github.com/openai/CLIP.git
 einops>=0.6.1
diff --git a/requirements/pt2.txt b/requirements/pt2.txt
index 0ef6ed184..003a5264d 100644
--- a/requirements/pt2.txt
+++ b/requirements/pt2.txt
@@ -35,4 +35,5 @@ triton==2.0.0
 urllib3<1.27,>=1.25.4
 wandb>=0.15.6
 webdataset>=0.2.33
+wheel>=0.41.0
 xformers>=0.0.20