From 5da694a7505e03487b116f446de13570c8b7aa8d Mon Sep 17 00:00:00 2001
From: fszontagh <szf@fsociety.hu>
Date: Fri, 1 May 2026 17:14:22 +0200
Subject: [PATCH] Skip empty MultiLoraAdapter when no LoRAs target a model

apply_loras_at_runtime always wraps each model (cond_stage, diffusion,
first_stage) with a MultiLoraAdapter, even when no LoRA tensors match
that model's prefix. The empty adapter routes every linear/conv through
forward_with_lora() instead of the direct kernel path, adding an extra
pointer indirection and a no-op iteration over an empty lora_models
vector for every weighted op in the model.

Skip the wrap when the matching lora_models list is empty so unaffected
models keep the fast direct path. Also avoids attaching a stale adapter
to first_stage_model in the common case where the LoRA only targets
the diffusion model.

set_weight_adapter(nullptr) is already called at the top of
apply_loras_at_runtime, so skipping the assignment leaves the adapter
correctly cleared.
---
 src/stable-diffusion.cpp | 25 +++++++++++++++++--------
 1 file changed, 17 insertions(+), 8 deletions(-)
diff --git a/src/stable-diffusion.cpp b/src/stable-diffusion.cpp
index 88102ff61..860cff85c 100644
--- a/src/stable-diffusion.cpp
+++ b/src/stable-diffusion.cpp
@@ -1104,8 +1104,13 @@ class StableDiffusionGGML {
                     cond_stage_lora_models.push_back(lora);
                 }
             }
-            auto multi_lora_adapter = std::make_shared<MultiLoraAdapter>(cond_stage_lora_models);
-            cond_stage_model->set_weight_adapter(multi_lora_adapter);
+            // Only attach the adapter when there are LoRAs targeting the cond_stage model.
+            // An empty MultiLoraAdapter still routes every linear/conv through
+            // forward_with_lora() instead of the direct kernel path — slower for no benefit.
+            if (!cond_stage_lora_models.empty()) {
+                auto multi_lora_adapter = std::make_shared<MultiLoraAdapter>(cond_stage_lora_models);
+                cond_stage_model->set_weight_adapter(multi_lora_adapter);
+            }
         }
         if (diffusion_model) {
             std::vector<std::shared_ptr<LoraModel>> lora_models;
@@ -1136,10 +1141,12 @@ class StableDiffusionGGML {
                     diffusion_lora_models.push_back(lora);
                 }
             }
-            auto multi_lora_adapter = std::make_shared<MultiLoraAdapter>(diffusion_lora_models);
-            diffusion_model->set_weight_adapter(multi_lora_adapter);
-            if (high_noise_diffusion_model) {
-                high_noise_diffusion_model->set_weight_adapter(multi_lora_adapter);
+            if (!diffusion_lora_models.empty()) {
+                auto multi_lora_adapter = std::make_shared<MultiLoraAdapter>(diffusion_lora_models);
+                diffusion_model->set_weight_adapter(multi_lora_adapter);
+                if (high_noise_diffusion_model) {
+                    high_noise_diffusion_model->set_weight_adapter(multi_lora_adapter);
+                }
             }
         }
 
@@ -1172,8 +1179,10 @@ class StableDiffusionGGML {
                     first_stage_lora_models.push_back(lora);
                 }
             }
-            auto multi_lora_adapter = std::make_shared<MultiLoraAdapter>(first_stage_lora_models);
-            first_stage_model->set_weight_adapter(multi_lora_adapter);
+            if (!first_stage_lora_models.empty()) {
+                auto multi_lora_adapter = std::make_shared<MultiLoraAdapter>(first_stage_lora_models);
+                first_stage_model->set_weight_adapter(multi_lora_adapter);
+            }
         }
     }