diff --git a/include/stable-diffusion.h b/include/stable-diffusion.h index c4c14949c..377859238 100644 --- a/include/stable-diffusion.h +++ b/include/stable-diffusion.h @@ -51,6 +51,7 @@ enum sample_method_t { RES_MULTISTEP_SAMPLE_METHOD, RES_2S_SAMPLE_METHOD, ER_SDE_SAMPLE_METHOD, + EULER_GE_SAMPLE_METHOD, SAMPLE_METHOD_COUNT }; diff --git a/src/denoiser.hpp b/src/denoiser.hpp index 831da2580..3a7489d13 100644 --- a/src/denoiser.hpp +++ b/src/denoiser.hpp @@ -1649,6 +1649,50 @@ static sd::Tensor sample_tcd(denoise_cb_t model, return x; } +// https://github.com/ToyotaResearchInstitute/gradient-estimation-sampler +static sd::Tensor sample_gradient_estimation(denoise_cb_t model, + sd::Tensor x, + const std::vector& sigmas, + std::shared_ptr rng = nullptr, + bool is_flow_denoiser = false, + float eta = 0.f, + float ge_gamma = 2.0f) { + int steps = static_cast(sigmas.size()) - 1; + sd::Tensor old_d; + bool has_old_d = false; + for (int i = 0; i < steps; i++) { + float sigma = sigmas[i]; + float sigma_to = sigmas[i + 1]; + auto denoised_opt = model(x, sigma, i + 1); + if (denoised_opt.empty()) { + return {}; + } + sd::Tensor denoised = std::move(denoised_opt); + if (sigma_to == 0.f) { + x = denoised; + } else { + auto [sigma_down, sigma_up, alpha_scale] = get_ancestral_step(sigma, sigma_to, eta, is_flow_denoiser); + sd::Tensor d = (x - denoised) / sigma; + float dt = sigma_down - sigma; + if (has_old_d) { + sd::Tensor d_bar = d * ge_gamma + old_d * (1.0f - ge_gamma); + x += d_bar * dt; + } else { + x += d * dt; + } + old_d = std::move(d); + has_old_d = true; + if (sigma_up > 0.f) { + if (is_flow_denoiser) { + x *= alpha_scale; + } + x += sd::Tensor::randn_like(x, rng) * sigma_up; + } + } + } + return x; +} + // k diffusion reverse ODE: dx = (x - D(x;\sigma)) / \sigma dt; \sigma(t) = t static sd::Tensor sample_k_diffusion(sample_method_t method, denoise_cb_t model, @@ -1694,6 +1738,8 @@ static sd::Tensor sample_k_diffusion(sample_method_t method, return sample_ddim_trailing(model, std::move(x), sigmas, rng, eta); case TCD_SAMPLE_METHOD: return sample_tcd(model, std::move(x), sigmas, rng, eta); + case EULER_GE_SAMPLE_METHOD: + return sample_gradient_estimation(model, std::move(x), sigmas, rng, is_flow_denoiser, eta); default: return {}; } diff --git a/src/stable-diffusion.cpp b/src/stable-diffusion.cpp index fd439ff1d..e9e26a504 100644 --- a/src/stable-diffusion.cpp +++ b/src/stable-diffusion.cpp @@ -73,6 +73,7 @@ const char* sampling_methods_str[] = { "Res Multistep", "Res 2s", "ER-SDE", + "Euler GE" }; /*================================================== Helper Functions ================================================*/ @@ -1965,6 +1966,7 @@ const char* sample_method_to_str[] = { "res_multistep", "res_2s", "er_sde", + "euler_ge", }; const char* sd_sample_method_name(enum sample_method_t sample_method) {