Skip to content

Commit e63daba

Browse files
authored
feat: add res_multistep, res_2s sampler and bong tangent scheduler (#1234)
1 parent 3959109 commit e63daba

7 files changed

Lines changed: 335 additions & 18 deletions

File tree

denoiser.hpp

Lines changed: 305 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
#ifndef __DENOISER_HPP__
22
#define __DENOISER_HPP__
33

4+
#include <cmath>
5+
46
#include "ggml_extend.hpp"
57
#include "gits_noise.inl"
68

@@ -351,6 +353,95 @@ struct SmoothStepScheduler : SigmaScheduler {
351353
}
352354
};
353355

356+
struct BongTangentScheduler : SigmaScheduler {
357+
static constexpr float kPi = 3.14159265358979323846f;
358+
359+
static std::vector<float> get_bong_tangent_sigmas(int steps, float slope, float pivot, float start, float end) {
360+
std::vector<float> sigmas;
361+
if (steps <= 0) {
362+
return sigmas;
363+
}
364+
365+
float smax = ((2.0f / kPi) * atanf(-slope * (0.0f - pivot)) + 1.0f) * 0.5f;
366+
float smin = ((2.0f / kPi) * atanf(-slope * ((float)(steps - 1) - pivot)) + 1.0f) * 0.5f;
367+
float srange = smax - smin;
368+
float sscale = start - end;
369+
370+
sigmas.reserve(steps);
371+
372+
if (fabsf(srange) < 1e-8f) {
373+
if (steps == 1) {
374+
sigmas.push_back(start);
375+
return sigmas;
376+
}
377+
for (int i = 0; i < steps; ++i) {
378+
float t = (float)i / (float)(steps - 1);
379+
sigmas.push_back(start + (end - start) * t);
380+
}
381+
return sigmas;
382+
}
383+
384+
float inv_srange = 1.0f / srange;
385+
for (int x = 0; x < steps; ++x) {
386+
float v = ((2.0f / kPi) * atanf(-slope * ((float)x - pivot)) + 1.0f) * 0.5f;
387+
float sigma = ((v - smin) * inv_srange) * sscale + end;
388+
sigmas.push_back(sigma);
389+
}
390+
391+
return sigmas;
392+
}
393+
394+
std::vector<float> get_sigmas(uint32_t n, float sigma_min, float sigma_max, t_to_sigma_t /*t_to_sigma*/) override {
395+
std::vector<float> result;
396+
if (n == 0) {
397+
return result;
398+
}
399+
400+
float start = sigma_max;
401+
float end = sigma_min;
402+
float middle = sigma_min + (sigma_max - sigma_min) * 0.5f;
403+
404+
float pivot_1 = 0.6f;
405+
float pivot_2 = 0.6f;
406+
float slope_1 = 0.2f;
407+
float slope_2 = 0.2f;
408+
409+
int steps = static_cast<int>(n) + 2;
410+
int midpoint = static_cast<int>(((float)steps * pivot_1 + (float)steps * pivot_2) * 0.5f);
411+
int pivot_1_i = static_cast<int>((float)steps * pivot_1);
412+
int pivot_2_i = static_cast<int>((float)steps * pivot_2);
413+
414+
float slope_scale = (float)steps / 40.0f;
415+
slope_1 = slope_1 / slope_scale;
416+
slope_2 = slope_2 / slope_scale;
417+
418+
int stage_2_len = steps - midpoint;
419+
int stage_1_len = steps - stage_2_len;
420+
421+
std::vector<float> sigmas_1 = get_bong_tangent_sigmas(stage_1_len, slope_1, (float)pivot_1_i, start, middle);
422+
std::vector<float> sigmas_2 = get_bong_tangent_sigmas(stage_2_len, slope_2, (float)(pivot_2_i - stage_1_len), middle, end);
423+
424+
if (!sigmas_1.empty()) {
425+
sigmas_1.pop_back();
426+
}
427+
428+
result.reserve(n + 1);
429+
result.insert(result.end(), sigmas_1.begin(), sigmas_1.end());
430+
result.insert(result.end(), sigmas_2.begin(), sigmas_2.end());
431+
432+
if (result.size() < n + 1) {
433+
while (result.size() < n + 1) {
434+
result.push_back(end);
435+
}
436+
} else if (result.size() > n + 1) {
437+
result.resize(n + 1);
438+
}
439+
440+
result[n] = 0.0f;
441+
return result;
442+
}
443+
};
444+
354445
struct KLOptimalScheduler : SigmaScheduler {
355446
std::vector<float> get_sigmas(uint32_t n, float sigma_min, float sigma_max, t_to_sigma_t t_to_sigma) override {
356447
std::vector<float> sigmas;
@@ -431,6 +522,10 @@ struct Denoiser {
431522
LOG_INFO("get_sigmas with SmoothStep scheduler");
432523
scheduler = std::make_shared<SmoothStepScheduler>();
433524
break;
525+
case BONG_TANGENT_SCHEDULER:
526+
LOG_INFO("get_sigmas with bong_tangent scheduler");
527+
scheduler = std::make_shared<BongTangentScheduler>();
528+
break;
434529
case KL_OPTIMAL_SCHEDULER:
435530
LOG_INFO("get_sigmas with KL Optimal scheduler");
436531
scheduler = std::make_shared<KLOptimalScheduler>();
@@ -1634,6 +1729,216 @@ static bool sample_k_diffusion(sample_method_t method,
16341729
}
16351730
}
16361731
} break;
1732+
case RES_MULTISTEP_SAMPLE_METHOD: // Res Multistep sampler
1733+
{
1734+
struct ggml_tensor* noise = ggml_dup_tensor(work_ctx, x);
1735+
struct ggml_tensor* old_denoised = ggml_dup_tensor(work_ctx, x);
1736+
1737+
bool have_old_sigma = false;
1738+
float old_sigma_down = 0.0f;
1739+
1740+
auto t_fn = [](float sigma) -> float { return -logf(sigma); };
1741+
auto sigma_fn = [](float t) -> float { return expf(-t); };
1742+
auto phi1_fn = [](float t) -> float {
1743+
if (fabsf(t) < 1e-6f) {
1744+
return 1.0f + t * 0.5f + (t * t) / 6.0f;
1745+
}
1746+
return (expf(t) - 1.0f) / t;
1747+
};
1748+
auto phi2_fn = [&](float t) -> float {
1749+
if (fabsf(t) < 1e-6f) {
1750+
return 0.5f + t / 6.0f + (t * t) / 24.0f;
1751+
}
1752+
float phi1_val = phi1_fn(t);
1753+
return (phi1_val - 1.0f) / t;
1754+
};
1755+
1756+
for (int i = 0; i < steps; i++) {
1757+
ggml_tensor* denoised = model(x, sigmas[i], i + 1);
1758+
if (denoised == nullptr) {
1759+
return false;
1760+
}
1761+
1762+
float sigma_from = sigmas[i];
1763+
float sigma_to = sigmas[i + 1];
1764+
float sigma_up = 0.0f;
1765+
float sigma_down = sigma_to;
1766+
1767+
if (eta > 0.0f) {
1768+
float sigma_from_sq = sigma_from * sigma_from;
1769+
float sigma_to_sq = sigma_to * sigma_to;
1770+
if (sigma_from_sq > 0.0f) {
1771+
float term = sigma_to_sq * (sigma_from_sq - sigma_to_sq) / sigma_from_sq;
1772+
if (term > 0.0f) {
1773+
sigma_up = eta * std::sqrt(term);
1774+
}
1775+
}
1776+
sigma_up = std::min(sigma_up, sigma_to);
1777+
float sigma_down_sq = sigma_to_sq - sigma_up * sigma_up;
1778+
sigma_down = sigma_down_sq > 0.0f ? std::sqrt(sigma_down_sq) : 0.0f;
1779+
}
1780+
1781+
if (sigma_down == 0.0f || !have_old_sigma) {
1782+
float dt = sigma_down - sigma_from;
1783+
float* vec_x = (float*)x->data;
1784+
float* vec_denoised = (float*)denoised->data;
1785+
1786+
for (int j = 0; j < ggml_nelements(x); j++) {
1787+
float d = (vec_x[j] - vec_denoised[j]) / sigma_from;
1788+
vec_x[j] = vec_x[j] + d * dt;
1789+
}
1790+
} else {
1791+
float t = t_fn(sigma_from);
1792+
float t_old = t_fn(old_sigma_down);
1793+
float t_next = t_fn(sigma_down);
1794+
float t_prev = t_fn(sigmas[i - 1]);
1795+
float h = t_next - t;
1796+
float c2 = (t_prev - t_old) / h;
1797+
1798+
float phi1_val = phi1_fn(-h);
1799+
float phi2_val = phi2_fn(-h);
1800+
float b1 = phi1_val - phi2_val / c2;
1801+
float b2 = phi2_val / c2;
1802+
1803+
if (!std::isfinite(b1)) {
1804+
b1 = 0.0f;
1805+
}
1806+
if (!std::isfinite(b2)) {
1807+
b2 = 0.0f;
1808+
}
1809+
1810+
float sigma_h = sigma_fn(h);
1811+
float* vec_x = (float*)x->data;
1812+
float* vec_denoised = (float*)denoised->data;
1813+
float* vec_old_denoised = (float*)old_denoised->data;
1814+
1815+
for (int j = 0; j < ggml_nelements(x); j++) {
1816+
vec_x[j] = sigma_h * vec_x[j] + h * (b1 * vec_denoised[j] + b2 * vec_old_denoised[j]);
1817+
}
1818+
}
1819+
1820+
if (sigmas[i + 1] > 0 && sigma_up > 0.0f) {
1821+
ggml_ext_im_set_randn_f32(noise, rng);
1822+
float* vec_x = (float*)x->data;
1823+
float* vec_noise = (float*)noise->data;
1824+
1825+
for (int j = 0; j < ggml_nelements(x); j++) {
1826+
vec_x[j] = vec_x[j] + vec_noise[j] * sigma_up;
1827+
}
1828+
}
1829+
1830+
float* vec_old_denoised = (float*)old_denoised->data;
1831+
float* vec_denoised = (float*)denoised->data;
1832+
for (int j = 0; j < ggml_nelements(x); j++) {
1833+
vec_old_denoised[j] = vec_denoised[j];
1834+
}
1835+
1836+
old_sigma_down = sigma_down;
1837+
have_old_sigma = true;
1838+
}
1839+
} break;
1840+
case RES_2S_SAMPLE_METHOD: // Res 2s sampler
1841+
{
1842+
struct ggml_tensor* noise = ggml_dup_tensor(work_ctx, x);
1843+
struct ggml_tensor* x0 = ggml_dup_tensor(work_ctx, x);
1844+
struct ggml_tensor* x2 = ggml_dup_tensor(work_ctx, x);
1845+
1846+
const float c2 = 0.5f;
1847+
auto t_fn = [](float sigma) -> float { return -logf(sigma); };
1848+
auto phi1_fn = [](float t) -> float {
1849+
if (fabsf(t) < 1e-6f) {
1850+
return 1.0f + t * 0.5f + (t * t) / 6.0f;
1851+
}
1852+
return (expf(t) - 1.0f) / t;
1853+
};
1854+
auto phi2_fn = [&](float t) -> float {
1855+
if (fabsf(t) < 1e-6f) {
1856+
return 0.5f + t / 6.0f + (t * t) / 24.0f;
1857+
}
1858+
float phi1_val = phi1_fn(t);
1859+
return (phi1_val - 1.0f) / t;
1860+
};
1861+
1862+
for (int i = 0; i < steps; i++) {
1863+
float sigma_from = sigmas[i];
1864+
float sigma_to = sigmas[i + 1];
1865+
1866+
ggml_tensor* denoised = model(x, sigma_from, -(i + 1));
1867+
if (denoised == nullptr) {
1868+
return false;
1869+
}
1870+
1871+
float sigma_up = 0.0f;
1872+
float sigma_down = sigma_to;
1873+
if (eta > 0.0f) {
1874+
float sigma_from_sq = sigma_from * sigma_from;
1875+
float sigma_to_sq = sigma_to * sigma_to;
1876+
if (sigma_from_sq > 0.0f) {
1877+
float term = sigma_to_sq * (sigma_from_sq - sigma_to_sq) / sigma_from_sq;
1878+
if (term > 0.0f) {
1879+
sigma_up = eta * std::sqrt(term);
1880+
}
1881+
}
1882+
sigma_up = std::min(sigma_up, sigma_to);
1883+
float sigma_down_sq = sigma_to_sq - sigma_up * sigma_up;
1884+
sigma_down = sigma_down_sq > 0.0f ? std::sqrt(sigma_down_sq) : 0.0f;
1885+
}
1886+
1887+
float* vec_x = (float*)x->data;
1888+
float* vec_x0 = (float*)x0->data;
1889+
for (int j = 0; j < ggml_nelements(x); j++) {
1890+
vec_x0[j] = vec_x[j];
1891+
}
1892+
1893+
if (sigma_down == 0.0f || sigma_from == 0.0f) {
1894+
float* vec_denoised = (float*)denoised->data;
1895+
for (int j = 0; j < ggml_nelements(x); j++) {
1896+
vec_x[j] = vec_denoised[j];
1897+
}
1898+
} else {
1899+
float t = t_fn(sigma_from);
1900+
float t_next = t_fn(sigma_down);
1901+
float h = t_next - t;
1902+
1903+
float a21 = c2 * phi1_fn(-h * c2);
1904+
float phi1_val = phi1_fn(-h);
1905+
float phi2_val = phi2_fn(-h);
1906+
float b2 = phi2_val / c2;
1907+
float b1 = phi1_val - b2;
1908+
1909+
float sigma_c2 = expf(-(t + h * c2));
1910+
1911+
float* vec_denoised = (float*)denoised->data;
1912+
float* vec_x2 = (float*)x2->data;
1913+
for (int j = 0; j < ggml_nelements(x); j++) {
1914+
float eps1 = vec_denoised[j] - vec_x0[j];
1915+
vec_x2[j] = vec_x0[j] + h * a21 * eps1;
1916+
}
1917+
1918+
ggml_tensor* denoised2 = model(x2, sigma_c2, i + 1);
1919+
if (denoised2 == nullptr) {
1920+
return false;
1921+
}
1922+
float* vec_denoised2 = (float*)denoised2->data;
1923+
1924+
for (int j = 0; j < ggml_nelements(x); j++) {
1925+
float eps1 = vec_denoised[j] - vec_x0[j];
1926+
float eps2 = vec_denoised2[j] - vec_x0[j];
1927+
vec_x[j] = vec_x0[j] + h * (b1 * eps1 + b2 * eps2);
1928+
}
1929+
}
1930+
1931+
if (sigmas[i + 1] > 0 && sigma_up > 0.0f) {
1932+
ggml_ext_im_set_randn_f32(noise, rng);
1933+
float* vec_x = (float*)x->data;
1934+
float* vec_noise = (float*)noise->data;
1935+
1936+
for (int j = 0; j < ggml_nelements(x); j++) {
1937+
vec_x[j] = vec_x[j] + vec_noise[j] * sigma_up;
1938+
}
1939+
}
1940+
}
1941+
} break;
16371942

16381943
default:
16391944
LOG_ERROR("Attempting to sample with nonexisting sample method %i", method);

examples/cli/README.md

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -107,14 +107,14 @@ Generation Options:
107107
medium
108108
--skip-layer-start <float> SLG enabling point (default: 0.01)
109109
--skip-layer-end <float> SLG disabling point (default: 0.2)
110-
--eta <float> eta in DDIM, only for DDIM and TCD (default: 0)
110+
--eta <float> eta in DDIM, only for DDIM/TCD/res_multistep/res_2s (default: 0)
111111
--high-noise-cfg-scale <float> (high noise) unconditional guidance scale: (default: 7.0)
112112
--high-noise-img-cfg-scale <float> (high noise) image guidance scale for inpaint or instruct-pix2pix models (default: same as --cfg-scale)
113113
--high-noise-guidance <float> (high noise) distilled guidance scale for models with guidance input (default: 3.5)
114114
--high-noise-slg-scale <float> (high noise) skip layer guidance (SLG) scale, only for DiT models: (default: 0)
115115
--high-noise-skip-layer-start <float> (high noise) SLG enabling point (default: 0.01)
116116
--high-noise-skip-layer-end <float> (high noise) SLG disabling point (default: 0.2)
117-
--high-noise-eta <float> (high noise) eta in DDIM, only for DDIM and TCD (default: 0)
117+
--high-noise-eta <float> (high noise) eta in DDIM, only for DDIM/TCD/res_multistep/res_2s (default: 0)
118118
--strength <float> strength for noising/unnoising (default: 0.75)
119119
--pm-style-strength <float>
120120
--control-strength <float> strength to apply Control Net (default: 0.9). 1.0 corresponds to full destruction of information in init image
@@ -123,12 +123,12 @@ Generation Options:
123123
--increase-ref-index automatically increase the indices of references images based on the order they are listed (starting with 1).
124124
--disable-auto-resize-ref-image disable auto resize of ref images
125125
-s, --seed RNG seed (default: 42, use random seed for < 0)
126-
--sampling-method sampling method, one of [euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm, ddim_trailing,
127-
tcd] (default: euler for Flux/SD3/Wan, euler_a otherwise)
128-
--high-noise-sampling-method (high noise) sampling method, one of [euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm,
129-
ddim_trailing, tcd] default: euler for Flux/SD3/Wan, euler_a otherwise
126+
--sampling-method sampling method, one of [euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm, ddim_trailing, tcd,
127+
res_multistep, res_2s] (default: euler for Flux/SD3/Wan, euler_a otherwise)
128+
--high-noise-sampling-method (high noise) sampling method, one of [euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm, ddim_trailing,
129+
tcd, res_multistep, res_2s] default: euler for Flux/SD3/Wan, euler_a otherwise
130130
--scheduler denoiser sigma scheduler, one of [discrete, karras, exponential, ays, gits, smoothstep, sgm_uniform, simple,
131-
kl_optimal, lcm], default: discrete
131+
kl_optimal, lcm, bong_tangent], default: discrete
132132
--sigmas custom sigma values for the sampler, comma-separated (e.g., "14.61,7.8,3.5,0.0").
133133
--skip-layers layers to skip for SLG steps (default: [7,8,9])
134134
--high-noise-skip-layers (high noise) layers to skip for SLG steps (default: [7,8,9])

examples/common/common.hpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1478,17 +1478,17 @@ struct SDGenerationParams {
14781478
on_seed_arg},
14791479
{"",
14801480
"--sampling-method",
1481-
"sampling method, one of [euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm, ddim_trailing, tcd] "
1481+
"sampling method, one of [euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm, ddim_trailing, tcd, res_multistep, res_2s] "
14821482
"(default: euler for Flux/SD3/Wan, euler_a otherwise)",
14831483
on_sample_method_arg},
14841484
{"",
14851485
"--high-noise-sampling-method",
1486-
"(high noise) sampling method, one of [euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm, ddim_trailing, tcd]"
1486+
"(high noise) sampling method, one of [euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm, ddim_trailing, tcd, res_multistep, res_2s]"
14871487
" default: euler for Flux/SD3/Wan, euler_a otherwise",
14881488
on_high_noise_sample_method_arg},
14891489
{"",
14901490
"--scheduler",
1491-
"denoiser sigma scheduler, one of [discrete, karras, exponential, ays, gits, smoothstep, sgm_uniform, simple, kl_optimal, lcm], default: discrete",
1491+
"denoiser sigma scheduler, one of [discrete, karras, exponential, ays, gits, smoothstep, sgm_uniform, simple, kl_optimal, lcm, bong_tangent], default: discrete",
14921492
on_scheduler_arg},
14931493
{"",
14941494
"--sigmas",

0 commit comments

Comments
 (0)