From 8d5a4461bde5e57ef5401aca91ef393e84c9e421 Mon Sep 17 00:00:00 2001 From: Wagner Bruna Date: Tue, 5 May 2026 18:34:00 -0300 Subject: [PATCH 1/6] refactor: merge Euler and Euler A implementations The sigma_to == 0 simplification is: d = (x - denoised) / sigma x = x + d * (sigma_to - sigma) = x + (x - denoised) / sigma * (0 - sigma) = x + (x - denoised) * -1 = denoised For eta == 0, sigma_down = sigma_to, and sigma_up = 0. The non-flow case is straightforward: x = x + d * (sigma_down - sigma) = x + d * (sigma_to - sigma) The flow case: sigma_ratio = sigma_down / sigma = sigma_to / sigma x = sigma_ratio * x + (1 - sigma_ratio) * denoised = x * sigma_ratio + denoised * (1 - sigma_ratio) = x * sigma_to / sigma - denoised * (sigma_to / sigma + 1) = x + x * sigma_to / sigma - x - denoised * sigma_to / sigma + denoised = x + (x - denoised) * (sigma_to / sigma - 1) = x + (x - denoised) / sigma * (sigma_to - sigma) = x + d * (sigma_to - sigma) --- src/denoiser.hpp | 78 ++++++++++++++++-------------------------------- 1 file changed, 25 insertions(+), 53 deletions(-) diff --git a/src/denoiser.hpp b/src/denoiser.hpp index 831da2580..da4b60cca 100644 --- a/src/denoiser.hpp +++ b/src/denoiser.hpp @@ -823,63 +823,38 @@ static std::tuple get_ancestral_step(float sigma_from, static sd::Tensor sample_euler_ancestral(denoise_cb_t model, sd::Tensor x, const std::vector& sigmas, - std::shared_ptr rng, - float eta) { + std::shared_ptr rng = nullptr, + bool is_flow_denoiser = false, + float eta = 0.f) { int steps = static_cast(sigmas.size()) - 1; for (int i = 0; i < steps; i++) { float sigma = sigmas[i]; + float sigma_to = sigmas[i + 1]; auto denoised_opt = model(x, sigma, i + 1); if (denoised_opt.empty()) { return {}; } sd::Tensor denoised = std::move(denoised_opt); - sd::Tensor d = (x - denoised) / sigma; - auto [sigma_down, sigma_up] = get_ancestral_step(sigmas[i], sigmas[i + 1], eta); - x += d * (sigma_down - sigmas[i]); - if (sigmas[i + 1] > 0) { - x += sd::Tensor::randn_like(x, rng) * sigma_up; - } - } - return x; -} - -static sd::Tensor sample_euler_flow(denoise_cb_t model, - sd::Tensor x, - const std::vector& sigmas, - std::shared_ptr rng, - float eta) { - int steps = static_cast(sigmas.size()) - 1; - for (int i = 0; i < steps; i++) { - float sigma = sigmas[i]; - auto denoised_opt = model(x, sigma, i + 1); - if (denoised_opt.empty()) { - return {}; - } - sd::Tensor denoised = std::move(denoised_opt); - auto [sigma_down, sigma_up, alpha_scale] = get_ancestral_step_flow(sigma, sigmas[i + 1], eta); - float sigma_ratio = sigma_down / sigma; - x = sigma_ratio * x + (1.0f - sigma_ratio) * denoised; - - if (sigma_up > 0.0f) { - x = alpha_scale * x + sd::Tensor::randn_like(x, rng) * sigma_up; - } - } - return x; -} - -static sd::Tensor sample_euler(denoise_cb_t model, - sd::Tensor x, - const std::vector& sigmas) { - int steps = static_cast(sigmas.size()) - 1; - for (int i = 0; i < steps; i++) { - float sigma = sigmas[i]; - auto denoised_opt = model(x, sigma, i + 1); - if (denoised_opt.empty()) { - return {}; + if (sigma_to == 0.f) { + x = denoised; + } else if (eta == 0.f) { + sd::Tensor d = (x - denoised) / sigma; + x += d * (sigma_to - sigma); + } else if (is_flow_denoiser) { + auto [sigma_down, sigma_up, alpha_scale] = get_ancestral_step_flow(sigma, sigma_to, eta); + float sigma_ratio = sigma_down / sigma; + x = sigma_ratio * x + (1.0f - sigma_ratio) * denoised; + if (sigma_up > 0.f) { + x = alpha_scale * x + sd::Tensor::randn_like(x, rng) * sigma_up; + } + } else { + sd::Tensor d = (x - denoised) / sigma; + auto [sigma_down, sigma_up] = get_ancestral_step(sigma, sigma_to, eta); + x += d * (sigma_down - sigma); + if (sigma_up > 0.f) { + x += sd::Tensor::randn_like(x, rng) * sigma_up; + } } - sd::Tensor denoised = std::move(denoised_opt); - sd::Tensor d = (x - denoised) / sigma; - x += d * (sigmas[i + 1] - sigma); } return x; } @@ -1659,12 +1634,9 @@ static sd::Tensor sample_k_diffusion(sample_method_t method, bool is_flow_denoiser) { switch (method) { case EULER_A_SAMPLE_METHOD: - if (is_flow_denoiser) - return sample_euler_flow(model, std::move(x), sigmas, rng, eta); - else - return sample_euler_ancestral(model, std::move(x), sigmas, rng, eta); + return sample_euler_ancestral(model, std::move(x), sigmas, rng, is_flow_denoiser, eta); case EULER_SAMPLE_METHOD: - return sample_euler(model, std::move(x), sigmas); + return sample_euler_ancestral(model, std::move(x), sigmas); case HEUN_SAMPLE_METHOD: return sample_heun(model, std::move(x), sigmas); case DPM2_SAMPLE_METHOD: From 8c70013f7078ed48626e9047651aee85905dedeb Mon Sep 17 00:00:00 2001 From: Wagner Bruna Date: Tue, 5 May 2026 18:34:52 -0300 Subject: [PATCH 2/6] refactor: merge Euler Ancestral calculations Euler Ancestral does: d = (x - denoised) / sigma x = x + d * (sigma_down - sigma) = x + (x - denoised) / sigma * (sigma_down - sigma) = x + (x - denoised) * (sigma_down / sigma - 1) = x + (x - denoised) * (sigma_ratio - 1) = x + x * sigma_ratio - x - denoised * sigma_ratio + denoised = x * sigma_ratio + denoised * (1 - sigma_ratio) The ancestral noise is also identical, except for the alpha_scale. I've kept the explicit test just to avoid an unnecessary tensor multiplication. Also, use the same calculation for the deterministic Euler implementation: it has one less tensor operation, and slightly better numerical stability. --- src/denoiser.hpp | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/src/denoiser.hpp b/src/denoiser.hpp index da4b60cca..91a0ee751 100644 --- a/src/denoiser.hpp +++ b/src/denoiser.hpp @@ -838,20 +838,16 @@ static sd::Tensor sample_euler_ancestral(denoise_cb_t model, if (sigma_to == 0.f) { x = denoised; } else if (eta == 0.f) { - sd::Tensor d = (x - denoised) / sigma; - x += d * (sigma_to - sigma); - } else if (is_flow_denoiser) { - auto [sigma_down, sigma_up, alpha_scale] = get_ancestral_step_flow(sigma, sigma_to, eta); + float sigma_ratio = sigma_to / sigma; + x = sigma_ratio * x + (1.0 - sigma_ratio) * denoised; + } else { + auto [sigma_down, sigma_up, alpha_scale] = get_ancestral_step(sigma, sigma_to, eta, is_flow_denoiser); float sigma_ratio = sigma_down / sigma; x = sigma_ratio * x + (1.0f - sigma_ratio) * denoised; if (sigma_up > 0.f) { - x = alpha_scale * x + sd::Tensor::randn_like(x, rng) * sigma_up; - } - } else { - sd::Tensor d = (x - denoised) / sigma; - auto [sigma_down, sigma_up] = get_ancestral_step(sigma, sigma_to, eta); - x += d * (sigma_down - sigma); - if (sigma_up > 0.f) { + if (is_flow_denoiser) { + x *= alpha_scale; + } x += sd::Tensor::randn_like(x, rng) * sigma_up; } } From 7d692193bb66469015146330e08a46cf70eb5085 Mon Sep 17 00:00:00 2001 From: Wagner Bruna Date: Tue, 5 May 2026 18:36:35 -0300 Subject: [PATCH 3/6] refactor: simplify DDIM and TCD pred_original_sample MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We have: model_output = (x - denoised) / sigma = d alpha_prod_t = 1 / (sigma² + 1) beta_prod_t = 1 - alpha_prod_t = sigma² / (sigma² + 1) Substitute alpha_prod_t: sqrt(1 / alpha_prod_t) = sqrt(sigma² + 1) sqrt(beta_prod_t) = sqrt(sigma² / (sigma² + 1)) = sigma / sqrt(sigma² + 1) Then: pred_original_sample = (x / sqrt(sigma² + 1) - sqrt(beta_prod_t) * d) * (1 / sqrt(alpha_prod_t)) = (x / sqrt(sigma² + 1) - (sigma / sqrt(sigma² + 1)) * d) * sqrt(sigma² + 1) = x - sigma * d = x - sigma * ((x - denoised) / sigma) = x - (x - denoised) = denoised --- src/denoiser.hpp | 32 ++++++++++++-------------------- 1 file changed, 12 insertions(+), 20 deletions(-) diff --git a/src/denoiser.hpp b/src/denoiser.hpp index 91a0ee751..9a50551bf 100644 --- a/src/denoiser.hpp +++ b/src/denoiser.hpp @@ -1520,28 +1520,24 @@ static sd::Tensor sample_ddim_trailing(denoise_cb_t model, float sigma = sigmas[i]; float sigma_to = sigmas[i + 1]; - auto model_output_opt = model(x, sigma, i + 1); - if (model_output_opt.empty()) { + auto denoised_opt = model(x, sigma, i + 1); + if (denoised_opt.empty()) { return {}; } - sd::Tensor model_output = std::move(model_output_opt); - model_output = (x - model_output) * (1.0f / sigma); + sd::Tensor denoised = std::move(denoised_opt); + sd::Tensor d = (x - denoised) / sigma; float alpha_prod_t = 1.0f / (sigma * sigma + 1.0f); float alpha_prod_t_prev = 1.0f / (sigma_to * sigma_to + 1.0f); float beta_prod_t = 1.0f - alpha_prod_t; - sd::Tensor pred_original_sample = ((x / std::sqrt(sigma * sigma + 1)) - - std::sqrt(beta_prod_t) * model_output) * - (1.0f / std::sqrt(alpha_prod_t)); - float beta_prod_t_prev = 1.0f - alpha_prod_t_prev; float variance = (beta_prod_t_prev / beta_prod_t) * (1.0f - alpha_prod_t / alpha_prod_t_prev); float std_dev_t = eta * std::sqrt(variance); - x = pred_original_sample + - std::sqrt((1.0f - alpha_prod_t_prev - std::pow(std_dev_t, 2)) / alpha_prod_t_prev) * model_output; + x = denoised + + std::sqrt((1.0f - alpha_prod_t_prev - std::pow(std_dev_t, 2)) / alpha_prod_t_prev) * d; if (eta > 0) { x += std_dev_t / std::sqrt(alpha_prod_t_prev) * sd::Tensor::randn_like(x, rng); @@ -1592,12 +1588,12 @@ static sd::Tensor sample_tcd(denoise_cb_t model, int timestep_s = (int)floor((1 - eta) * prev_timestep); float sigma = sigmas[i]; - auto model_output_opt = model(x, sigma, i + 1); - if (model_output_opt.empty()) { + auto denoised_opt = model(x, sigma, i + 1); + if (denoised_opt.empty()) { return {}; } - sd::Tensor model_output = std::move(model_output_opt); - model_output = (x - model_output) * (1.0f / sigma); + sd::Tensor denoised = std::move(denoised_opt); + sd::Tensor d = (x - denoised) / sigma; float alpha_prod_t = 1.0f / (sigma * sigma + 1.0f); float beta_prod_t = 1.0f - alpha_prod_t; @@ -1605,12 +1601,8 @@ static sd::Tensor sample_tcd(denoise_cb_t model, float alpha_prod_s = static_cast(alphas_cumprod[timestep_s]); float beta_prod_s = 1.0f - alpha_prod_s; - sd::Tensor pred_original_sample = ((x / std::sqrt(sigma * sigma + 1)) - - std::sqrt(beta_prod_t) * model_output) * - (1.0f / std::sqrt(alpha_prod_t)); - - x = std::sqrt(alpha_prod_s / alpha_prod_t_prev) * pred_original_sample + - std::sqrt(beta_prod_s / alpha_prod_t_prev) * model_output; + x = std::sqrt(alpha_prod_s / alpha_prod_t_prev) * denoised + + std::sqrt(beta_prod_s / alpha_prod_t_prev) * d; if (eta > 0 && sigma_to > 0.0f) { x = std::sqrt(alpha_prod_t_prev / alpha_prod_s) * x + From 4294cb065dee49eadc5442f0a0cfa7354ed1d895 Mon Sep 17 00:00:00 2001 From: Wagner Bruna Date: Tue, 5 May 2026 18:36:56 -0300 Subject: [PATCH 4/6] refactor: simplify DDIM deterministic step calculation When eta = 0, std_dev_t = 0. The sqrt term becomes: sqrt((1 - alpha_prod_t_prev - std_dev_t^2) / alpha_prod_t_prev) = sqrt((1 - alpha_prod_t_prev) / alpha_prod_t_prev) = sqrt(beta_prod_t_prev / alpha_prod_t_prev) Given: alpha_prod_t = 1 / (sigma^2 + 1) beta_prod_t = sigma^2 / (sigma^2 + 1) alpha_prod_t_prev = 1 / (sigma_to^2 + 1) beta_prod_t_prev = sigma_to^2 / (sigma_to^2 + 1) sqrt(beta_prod_t_prev / alpha_prod_t_prev) = sqrt((sigma_to^2 / (sigma_to^2 + 1)) / (1 / (sigma_to^2 + 1))) = sqrt(sigma_to^2) = sigma_to So the deterministic step becomes: x = denoised + sigma_to * model_output = denoised + sigma_to * (x - denoised) / sigma = denoised + (x - denoised) * sigma_to / sigma = denoised + x * sigma_to / sigma - denoised * sigma_to / sigma = denoised * (1 - sigma_to / sigma) + x * sigma_to / sigma = x + denoised * (1 - sigma_to / sigma) + x * sigma_to / sigma - x = x + denoised * (1 - sigma_to / sigma) - x * (1 - sigma_to / sigma) = x + (denoised - x) * (1 - sigma_to / sigma) = x + (denoised - x) * (1 - sigma_to / sigma) = x + (x - denoised) * (sigma_to / sigma - 1) = x + (x - denoised) / sigma * (sigma_to - sigma) = x + d * (sigma_to - sigma); --- src/denoiser.hpp | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/src/denoiser.hpp b/src/denoiser.hpp index 9a50551bf..1338edd24 100644 --- a/src/denoiser.hpp +++ b/src/denoiser.hpp @@ -1526,20 +1526,22 @@ static sd::Tensor sample_ddim_trailing(denoise_cb_t model, } sd::Tensor denoised = std::move(denoised_opt); sd::Tensor d = (x - denoised) / sigma; + if (eta == 0.f) { + x += d * (sigma_to - sigma); + } else { - float alpha_prod_t = 1.0f / (sigma * sigma + 1.0f); - float alpha_prod_t_prev = 1.0f / (sigma_to * sigma_to + 1.0f); - float beta_prod_t = 1.0f - alpha_prod_t; + float alpha_prod_t = 1.0f / (sigma * sigma + 1.0f); + float alpha_prod_t_prev = 1.0f / (sigma_to * sigma_to + 1.0f); + float beta_prod_t = 1.0f - alpha_prod_t; - float beta_prod_t_prev = 1.0f - alpha_prod_t_prev; - float variance = (beta_prod_t_prev / beta_prod_t) * - (1.0f - alpha_prod_t / alpha_prod_t_prev); - float std_dev_t = eta * std::sqrt(variance); + float beta_prod_t_prev = 1.0f - alpha_prod_t_prev; + float variance = (beta_prod_t_prev / beta_prod_t) * + (1.0f - alpha_prod_t / alpha_prod_t_prev); + float std_dev_t = eta * std::sqrt(variance); - x = denoised + - std::sqrt((1.0f - alpha_prod_t_prev - std::pow(std_dev_t, 2)) / alpha_prod_t_prev) * d; + x = denoised + + std::sqrt((1.0f - alpha_prod_t_prev - std::pow(std_dev_t, 2)) / alpha_prod_t_prev) * d; - if (eta > 0) { x += std_dev_t / std::sqrt(alpha_prod_t_prev) * sd::Tensor::randn_like(x, rng); } } From a12283d32b07c0aeef515a7de9d8003139e315db Mon Sep 17 00:00:00 2001 From: Wagner Bruna Date: Tue, 5 May 2026 18:37:25 -0300 Subject: [PATCH 5/6] refactor: simplify DDIM ancestral step calculation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit From the DDIM definitions: alpha_prod_t = 1 / (sigma² + 1) beta_prod_t = 1 - alpha_prod_t = sigma² / (sigma² + 1) d = (x - denoised) / sigma We have the coefficient of d in the x update: coeff² = (1 - alpha_prod_t_prev - std_dev_t²) / alpha_prod_t_prev Where: std_dev_t² = eta² * variance variance = (beta_prod_t_prev / beta_prod_t) * (1 - alpha_prod_t / alpha_prod_t_prev) = sigma_to² (sigma² - sigma_to²) / (sigma² (sigma_to² + 1)) Substituting variance: coeff² = ( sigma_to² / (sigma_to² + 1) - eta² * sigma_to² (sigma² - sigma_to²) / (sigma² * (sigma_to² + 1)) ) * (sigma_to² + 1) = sigma_to² - eta² * sigma_to² * (sigma² - sigma_to²) / sigma² = sigma_to² * ( 1 - eta² * (sigma² - sigma_to²) / sigma² ) From get_ancestral_step: sigma_down² = sigma_to² - sigma_up² = sigma_to² - eta² * sigma_to² * (sigma² - sigma_to²) / sigma² = coeff² So coeff = sigma_down, and the x update becomes: x = denoised + sigma_down * d = denoised + sigma_down * (x - denoised) / sigma = denoised + (x - denoised) * sigma_ratio = x * sigma_ratio + denoised - denoised * sigma_ratio = x * sigma_ratio + denoised * (1 - sigma_ratio) And the noise coefficient: noise_coeff = std_dev_t / sqrt(alpha_prod_t_prev) = eta * sqrt( sigma_to² * (sigma² - sigma_to²) / (sigma² * (sigma_to² + 1)) ) * sqrt(sigma_to² + 1) = eta * sigma_to * sqrt( (sigma² - sigma_to²) / sigma² ) = sigma_up --- src/denoiser.hpp | 20 ++++++-------------- 1 file changed, 6 insertions(+), 14 deletions(-) diff --git a/src/denoiser.hpp b/src/denoiser.hpp index 1338edd24..5f1e166d5 100644 --- a/src/denoiser.hpp +++ b/src/denoiser.hpp @@ -1529,20 +1529,12 @@ static sd::Tensor sample_ddim_trailing(denoise_cb_t model, if (eta == 0.f) { x += d * (sigma_to - sigma); } else { - - float alpha_prod_t = 1.0f / (sigma * sigma + 1.0f); - float alpha_prod_t_prev = 1.0f / (sigma_to * sigma_to + 1.0f); - float beta_prod_t = 1.0f - alpha_prod_t; - - float beta_prod_t_prev = 1.0f - alpha_prod_t_prev; - float variance = (beta_prod_t_prev / beta_prod_t) * - (1.0f - alpha_prod_t / alpha_prod_t_prev); - float std_dev_t = eta * std::sqrt(variance); - - x = denoised + - std::sqrt((1.0f - alpha_prod_t_prev - std::pow(std_dev_t, 2)) / alpha_prod_t_prev) * d; - - x += std_dev_t / std::sqrt(alpha_prod_t_prev) * sd::Tensor::randn_like(x, rng); + auto [sigma_down, sigma_up] = get_ancestral_step(sigma, sigma_to, eta); + float sigma_ratio = sigma_down / sigma; + x = sigma_ratio * x + (1.0f - sigma_ratio) * denoised; + if (sigma_up > 0.f) { + x += sd::Tensor::randn_like(x, rng) * sigma_up; + } } } return x; From b95fd291ef4cd847c3993a5584f55b340e8f504d Mon Sep 17 00:00:00 2001 From: Wagner Bruna Date: Tue, 5 May 2026 18:37:54 -0300 Subject: [PATCH 6/6] refactor: remove DDIM standalone implementation It is equivalent to Euler Ancestral with the Simple scheduler. --- src/denoiser.hpp | 33 ++------------------------------- 1 file changed, 2 insertions(+), 31 deletions(-) diff --git a/src/denoiser.hpp b/src/denoiser.hpp index 5f1e166d5..c646ec707 100644 --- a/src/denoiser.hpp +++ b/src/denoiser.hpp @@ -1510,36 +1510,6 @@ static sd::Tensor sample_er_sde(denoise_cb_t model, return x; } -static sd::Tensor sample_ddim_trailing(denoise_cb_t model, - sd::Tensor x, - const std::vector& sigmas, - std::shared_ptr rng, - float eta) { - int steps = static_cast(sigmas.size()) - 1; - for (int i = 0; i < steps; i++) { - float sigma = sigmas[i]; - float sigma_to = sigmas[i + 1]; - - auto denoised_opt = model(x, sigma, i + 1); - if (denoised_opt.empty()) { - return {}; - } - sd::Tensor denoised = std::move(denoised_opt); - sd::Tensor d = (x - denoised) / sigma; - if (eta == 0.f) { - x += d * (sigma_to - sigma); - } else { - auto [sigma_down, sigma_up] = get_ancestral_step(sigma, sigma_to, eta); - float sigma_ratio = sigma_down / sigma; - x = sigma_ratio * x + (1.0f - sigma_ratio) * denoised; - if (sigma_up > 0.f) { - x += sd::Tensor::randn_like(x, rng) * sigma_up; - } - } - } - return x; -} - static sd::Tensor sample_tcd(denoise_cb_t model, sd::Tensor x, const std::vector& sigmas, @@ -1645,7 +1615,8 @@ static sd::Tensor sample_k_diffusion(sample_method_t method, case ER_SDE_SAMPLE_METHOD: return sample_er_sde(model, std::move(x), sigmas, rng, is_flow_denoiser, eta); case DDIM_TRAILING_SAMPLE_METHOD: - return sample_ddim_trailing(model, std::move(x), sigmas, rng, eta); + // DDIM is equivalent to Euler Ancestral with the Simple scheduler + return sample_euler_ancestral(model, std::move(x), sigmas, rng, is_flow_denoiser, eta); case TCD_SAMPLE_METHOD: return sample_tcd(model, std::move(x), sigmas, rng, eta); default: