From 3eeb507b229acdeb48e2781413dda5afbf21515d Mon Sep 17 00:00:00 2001
From: Aarni Koskela <akx@iki.fi>
Date: Tue, 18 Jul 2023 15:56:16 +0300
Subject: [PATCH 1/3] Remove old commented-out attention code

---
 sgm/modules/attention.py | 21 ---------------------
 1 file changed, 21 deletions(-)

diff --git a/sgm/modules/attention.py b/sgm/modules/attention.py
index a17edda72..e1274afca 100644
--- a/sgm/modules/attention.py
+++ b/sgm/modules/attention.py
@@ -67,10 +67,6 @@ def default(val, d):
     return d() if isfunction(d) else d
 
 
-def max_neg_value(t):
-    return -torch.finfo(t.dtype).max
-
-
 def init_(tensor):
     dim = tensor.shape[-1]
     std = 1 / math.sqrt(dim)
@@ -251,23 +247,6 @@ def forward(
 
         q, k, v = map(lambda t: rearrange(t, "b n (h d) -> b h n d", h=h), (q, k, v))
 
-        ## old
-        """
-        sim = einsum('b i d, b j d -> b i j', q, k) * self.scale
-        del q, k
-
-        if exists(mask):
-            mask = rearrange(mask, 'b ... -> b (...)')
-            max_neg_value = -torch.finfo(sim.dtype).max
-            mask = repeat(mask, 'b j -> (b h) () j', h=h)
-            sim.masked_fill_(~mask, max_neg_value)
-
-        # attention, what we cannot get enough of
-        sim = sim.softmax(dim=-1)
-
-        out = einsum('b i j, b j d -> b i d', sim, v)
-        """
-        ## new
         with sdp_kernel(**BACKEND_MAP[self.backend]):
             # print("dispatching into backend", self.backend, "q/k/v shape: ", q.shape, k.shape, v.shape)
             out = F.scaled_dot_product_attention(

From 358f1faca36826b5c8911e382f1c071dd95f6613 Mon Sep 17 00:00:00 2001
From: Aarni Koskela <akx@iki.fi>
Date: Tue, 18 Jul 2023 15:57:19 +0300
Subject: [PATCH 2/3] Mark two functions as likely unused

---
 sgm/modules/attention.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sgm/modules/attention.py b/sgm/modules/attention.py
index e1274afca..9b0a4df93 100644
--- a/sgm/modules/attention.py
+++ b/sgm/modules/attention.py
@@ -57,7 +57,7 @@ def exists(val):
     return val is not None
 
 
-def uniq(arr):
+def uniq(arr):  # TODO: this seems unused
     return {el: True for el in arr}.keys()
 
 
@@ -67,7 +67,7 @@ def default(val, d):
     return d() if isfunction(d) else d
 
 
-def init_(tensor):
+def init_(tensor):  # TODO: this seems unused
     dim = tensor.shape[-1]
     std = 1 / math.sqrt(dim)
     tensor.uniform_(-std, std)

From 8448386e6a719452dbbe27e4a0e8e614d91de6b2 Mon Sep 17 00:00:00 2001
From: Aarni Koskela <akx@iki.fi>
Date: Tue, 18 Jul 2023 15:58:10 +0300
Subject: [PATCH 3/3] Use exists() and default() from sgm.util

---
 sgm/modules/attention.py | 13 ++-----------
 1 file changed, 2 insertions(+), 11 deletions(-)

diff --git a/sgm/modules/attention.py b/sgm/modules/attention.py
index 9b0a4df93..88005b8e8 100644
--- a/sgm/modules/attention.py
+++ b/sgm/modules/attention.py
@@ -1,5 +1,4 @@
 import math
-from inspect import isfunction
 from typing import Any, Optional
 
 import torch
@@ -7,6 +6,8 @@
 from einops import rearrange, repeat
 from packaging import version
 from torch import nn
+from ..util import exists, default
+
 
 if version.parse(torch.__version__) >= version.parse("2.0.0"):
     SDP_IS_AVAILABLE = True
@@ -53,20 +54,10 @@
 from .diffusionmodules.util import checkpoint
 
 
-def exists(val):
-    return val is not None
-
-
 def uniq(arr):  # TODO: this seems unused
     return {el: True for el in arr}.keys()
 
 
-def default(val, d):
-    if exists(val):
-        return val
-    return d() if isfunction(d) else d
-
-
 def init_(tensor):  # TODO: this seems unused
     dim = tensor.shape[-1]
     std = 1 / math.sqrt(dim)