|
1 | 1 | #ifndef __DENOISER_HPP__ |
2 | 2 | #define __DENOISER_HPP__ |
3 | 3 |
|
| 4 | +#include <cmath> |
| 5 | + |
4 | 6 | #include "ggml_extend.hpp" |
5 | 7 | #include "gits_noise.inl" |
6 | 8 |
|
@@ -351,6 +353,95 @@ struct SmoothStepScheduler : SigmaScheduler { |
351 | 353 | } |
352 | 354 | }; |
353 | 355 |
|
| 356 | +struct BongTangentScheduler : SigmaScheduler { |
| 357 | + static constexpr float kPi = 3.14159265358979323846f; |
| 358 | + |
| 359 | + static std::vector<float> get_bong_tangent_sigmas(int steps, float slope, float pivot, float start, float end) { |
| 360 | + std::vector<float> sigmas; |
| 361 | + if (steps <= 0) { |
| 362 | + return sigmas; |
| 363 | + } |
| 364 | + |
| 365 | + float smax = ((2.0f / kPi) * atanf(-slope * (0.0f - pivot)) + 1.0f) * 0.5f; |
| 366 | + float smin = ((2.0f / kPi) * atanf(-slope * ((float)(steps - 1) - pivot)) + 1.0f) * 0.5f; |
| 367 | + float srange = smax - smin; |
| 368 | + float sscale = start - end; |
| 369 | + |
| 370 | + sigmas.reserve(steps); |
| 371 | + |
| 372 | + if (fabsf(srange) < 1e-8f) { |
| 373 | + if (steps == 1) { |
| 374 | + sigmas.push_back(start); |
| 375 | + return sigmas; |
| 376 | + } |
| 377 | + for (int i = 0; i < steps; ++i) { |
| 378 | + float t = (float)i / (float)(steps - 1); |
| 379 | + sigmas.push_back(start + (end - start) * t); |
| 380 | + } |
| 381 | + return sigmas; |
| 382 | + } |
| 383 | + |
| 384 | + float inv_srange = 1.0f / srange; |
| 385 | + for (int x = 0; x < steps; ++x) { |
| 386 | + float v = ((2.0f / kPi) * atanf(-slope * ((float)x - pivot)) + 1.0f) * 0.5f; |
| 387 | + float sigma = ((v - smin) * inv_srange) * sscale + end; |
| 388 | + sigmas.push_back(sigma); |
| 389 | + } |
| 390 | + |
| 391 | + return sigmas; |
| 392 | + } |
| 393 | + |
| 394 | + std::vector<float> get_sigmas(uint32_t n, float sigma_min, float sigma_max, t_to_sigma_t /*t_to_sigma*/) override { |
| 395 | + std::vector<float> result; |
| 396 | + if (n == 0) { |
| 397 | + return result; |
| 398 | + } |
| 399 | + |
| 400 | + float start = sigma_max; |
| 401 | + float end = sigma_min; |
| 402 | + float middle = sigma_min + (sigma_max - sigma_min) * 0.5f; |
| 403 | + |
| 404 | + float pivot_1 = 0.6f; |
| 405 | + float pivot_2 = 0.6f; |
| 406 | + float slope_1 = 0.2f; |
| 407 | + float slope_2 = 0.2f; |
| 408 | + |
| 409 | + int steps = static_cast<int>(n) + 2; |
| 410 | + int midpoint = static_cast<int>(((float)steps * pivot_1 + (float)steps * pivot_2) * 0.5f); |
| 411 | + int pivot_1_i = static_cast<int>((float)steps * pivot_1); |
| 412 | + int pivot_2_i = static_cast<int>((float)steps * pivot_2); |
| 413 | + |
| 414 | + float slope_scale = (float)steps / 40.0f; |
| 415 | + slope_1 = slope_1 / slope_scale; |
| 416 | + slope_2 = slope_2 / slope_scale; |
| 417 | + |
| 418 | + int stage_2_len = steps - midpoint; |
| 419 | + int stage_1_len = steps - stage_2_len; |
| 420 | + |
| 421 | + std::vector<float> sigmas_1 = get_bong_tangent_sigmas(stage_1_len, slope_1, (float)pivot_1_i, start, middle); |
| 422 | + std::vector<float> sigmas_2 = get_bong_tangent_sigmas(stage_2_len, slope_2, (float)(pivot_2_i - stage_1_len), middle, end); |
| 423 | + |
| 424 | + if (!sigmas_1.empty()) { |
| 425 | + sigmas_1.pop_back(); |
| 426 | + } |
| 427 | + |
| 428 | + result.reserve(n + 1); |
| 429 | + result.insert(result.end(), sigmas_1.begin(), sigmas_1.end()); |
| 430 | + result.insert(result.end(), sigmas_2.begin(), sigmas_2.end()); |
| 431 | + |
| 432 | + if (result.size() < n + 1) { |
| 433 | + while (result.size() < n + 1) { |
| 434 | + result.push_back(end); |
| 435 | + } |
| 436 | + } else if (result.size() > n + 1) { |
| 437 | + result.resize(n + 1); |
| 438 | + } |
| 439 | + |
| 440 | + result[n] = 0.0f; |
| 441 | + return result; |
| 442 | + } |
| 443 | +}; |
| 444 | + |
354 | 445 | struct KLOptimalScheduler : SigmaScheduler { |
355 | 446 | std::vector<float> get_sigmas(uint32_t n, float sigma_min, float sigma_max, t_to_sigma_t t_to_sigma) override { |
356 | 447 | std::vector<float> sigmas; |
@@ -431,6 +522,10 @@ struct Denoiser { |
431 | 522 | LOG_INFO("get_sigmas with SmoothStep scheduler"); |
432 | 523 | scheduler = std::make_shared<SmoothStepScheduler>(); |
433 | 524 | break; |
| 525 | + case BONG_TANGENT_SCHEDULER: |
| 526 | + LOG_INFO("get_sigmas with bong_tangent scheduler"); |
| 527 | + scheduler = std::make_shared<BongTangentScheduler>(); |
| 528 | + break; |
434 | 529 | case KL_OPTIMAL_SCHEDULER: |
435 | 530 | LOG_INFO("get_sigmas with KL Optimal scheduler"); |
436 | 531 | scheduler = std::make_shared<KLOptimalScheduler>(); |
@@ -1634,6 +1729,216 @@ static bool sample_k_diffusion(sample_method_t method, |
1634 | 1729 | } |
1635 | 1730 | } |
1636 | 1731 | } break; |
| 1732 | + case RES_MULTISTEP_SAMPLE_METHOD: // Res Multistep sampler |
| 1733 | + { |
| 1734 | + struct ggml_tensor* noise = ggml_dup_tensor(work_ctx, x); |
| 1735 | + struct ggml_tensor* old_denoised = ggml_dup_tensor(work_ctx, x); |
| 1736 | + |
| 1737 | + bool have_old_sigma = false; |
| 1738 | + float old_sigma_down = 0.0f; |
| 1739 | + |
| 1740 | + auto t_fn = [](float sigma) -> float { return -logf(sigma); }; |
| 1741 | + auto sigma_fn = [](float t) -> float { return expf(-t); }; |
| 1742 | + auto phi1_fn = [](float t) -> float { |
| 1743 | + if (fabsf(t) < 1e-6f) { |
| 1744 | + return 1.0f + t * 0.5f + (t * t) / 6.0f; |
| 1745 | + } |
| 1746 | + return (expf(t) - 1.0f) / t; |
| 1747 | + }; |
| 1748 | + auto phi2_fn = [&](float t) -> float { |
| 1749 | + if (fabsf(t) < 1e-6f) { |
| 1750 | + return 0.5f + t / 6.0f + (t * t) / 24.0f; |
| 1751 | + } |
| 1752 | + float phi1_val = phi1_fn(t); |
| 1753 | + return (phi1_val - 1.0f) / t; |
| 1754 | + }; |
| 1755 | + |
| 1756 | + for (int i = 0; i < steps; i++) { |
| 1757 | + ggml_tensor* denoised = model(x, sigmas[i], i + 1); |
| 1758 | + if (denoised == nullptr) { |
| 1759 | + return false; |
| 1760 | + } |
| 1761 | + |
| 1762 | + float sigma_from = sigmas[i]; |
| 1763 | + float sigma_to = sigmas[i + 1]; |
| 1764 | + float sigma_up = 0.0f; |
| 1765 | + float sigma_down = sigma_to; |
| 1766 | + |
| 1767 | + if (eta > 0.0f) { |
| 1768 | + float sigma_from_sq = sigma_from * sigma_from; |
| 1769 | + float sigma_to_sq = sigma_to * sigma_to; |
| 1770 | + if (sigma_from_sq > 0.0f) { |
| 1771 | + float term = sigma_to_sq * (sigma_from_sq - sigma_to_sq) / sigma_from_sq; |
| 1772 | + if (term > 0.0f) { |
| 1773 | + sigma_up = eta * std::sqrt(term); |
| 1774 | + } |
| 1775 | + } |
| 1776 | + sigma_up = std::min(sigma_up, sigma_to); |
| 1777 | + float sigma_down_sq = sigma_to_sq - sigma_up * sigma_up; |
| 1778 | + sigma_down = sigma_down_sq > 0.0f ? std::sqrt(sigma_down_sq) : 0.0f; |
| 1779 | + } |
| 1780 | + |
| 1781 | + if (sigma_down == 0.0f || !have_old_sigma) { |
| 1782 | + float dt = sigma_down - sigma_from; |
| 1783 | + float* vec_x = (float*)x->data; |
| 1784 | + float* vec_denoised = (float*)denoised->data; |
| 1785 | + |
| 1786 | + for (int j = 0; j < ggml_nelements(x); j++) { |
| 1787 | + float d = (vec_x[j] - vec_denoised[j]) / sigma_from; |
| 1788 | + vec_x[j] = vec_x[j] + d * dt; |
| 1789 | + } |
| 1790 | + } else { |
| 1791 | + float t = t_fn(sigma_from); |
| 1792 | + float t_old = t_fn(old_sigma_down); |
| 1793 | + float t_next = t_fn(sigma_down); |
| 1794 | + float t_prev = t_fn(sigmas[i - 1]); |
| 1795 | + float h = t_next - t; |
| 1796 | + float c2 = (t_prev - t_old) / h; |
| 1797 | + |
| 1798 | + float phi1_val = phi1_fn(-h); |
| 1799 | + float phi2_val = phi2_fn(-h); |
| 1800 | + float b1 = phi1_val - phi2_val / c2; |
| 1801 | + float b2 = phi2_val / c2; |
| 1802 | + |
| 1803 | + if (!std::isfinite(b1)) { |
| 1804 | + b1 = 0.0f; |
| 1805 | + } |
| 1806 | + if (!std::isfinite(b2)) { |
| 1807 | + b2 = 0.0f; |
| 1808 | + } |
| 1809 | + |
| 1810 | + float sigma_h = sigma_fn(h); |
| 1811 | + float* vec_x = (float*)x->data; |
| 1812 | + float* vec_denoised = (float*)denoised->data; |
| 1813 | + float* vec_old_denoised = (float*)old_denoised->data; |
| 1814 | + |
| 1815 | + for (int j = 0; j < ggml_nelements(x); j++) { |
| 1816 | + vec_x[j] = sigma_h * vec_x[j] + h * (b1 * vec_denoised[j] + b2 * vec_old_denoised[j]); |
| 1817 | + } |
| 1818 | + } |
| 1819 | + |
| 1820 | + if (sigmas[i + 1] > 0 && sigma_up > 0.0f) { |
| 1821 | + ggml_ext_im_set_randn_f32(noise, rng); |
| 1822 | + float* vec_x = (float*)x->data; |
| 1823 | + float* vec_noise = (float*)noise->data; |
| 1824 | + |
| 1825 | + for (int j = 0; j < ggml_nelements(x); j++) { |
| 1826 | + vec_x[j] = vec_x[j] + vec_noise[j] * sigma_up; |
| 1827 | + } |
| 1828 | + } |
| 1829 | + |
| 1830 | + float* vec_old_denoised = (float*)old_denoised->data; |
| 1831 | + float* vec_denoised = (float*)denoised->data; |
| 1832 | + for (int j = 0; j < ggml_nelements(x); j++) { |
| 1833 | + vec_old_denoised[j] = vec_denoised[j]; |
| 1834 | + } |
| 1835 | + |
| 1836 | + old_sigma_down = sigma_down; |
| 1837 | + have_old_sigma = true; |
| 1838 | + } |
| 1839 | + } break; |
| 1840 | + case RES_2S_SAMPLE_METHOD: // Res 2s sampler |
| 1841 | + { |
| 1842 | + struct ggml_tensor* noise = ggml_dup_tensor(work_ctx, x); |
| 1843 | + struct ggml_tensor* x0 = ggml_dup_tensor(work_ctx, x); |
| 1844 | + struct ggml_tensor* x2 = ggml_dup_tensor(work_ctx, x); |
| 1845 | + |
| 1846 | + const float c2 = 0.5f; |
| 1847 | + auto t_fn = [](float sigma) -> float { return -logf(sigma); }; |
| 1848 | + auto phi1_fn = [](float t) -> float { |
| 1849 | + if (fabsf(t) < 1e-6f) { |
| 1850 | + return 1.0f + t * 0.5f + (t * t) / 6.0f; |
| 1851 | + } |
| 1852 | + return (expf(t) - 1.0f) / t; |
| 1853 | + }; |
| 1854 | + auto phi2_fn = [&](float t) -> float { |
| 1855 | + if (fabsf(t) < 1e-6f) { |
| 1856 | + return 0.5f + t / 6.0f + (t * t) / 24.0f; |
| 1857 | + } |
| 1858 | + float phi1_val = phi1_fn(t); |
| 1859 | + return (phi1_val - 1.0f) / t; |
| 1860 | + }; |
| 1861 | + |
| 1862 | + for (int i = 0; i < steps; i++) { |
| 1863 | + float sigma_from = sigmas[i]; |
| 1864 | + float sigma_to = sigmas[i + 1]; |
| 1865 | + |
| 1866 | + ggml_tensor* denoised = model(x, sigma_from, -(i + 1)); |
| 1867 | + if (denoised == nullptr) { |
| 1868 | + return false; |
| 1869 | + } |
| 1870 | + |
| 1871 | + float sigma_up = 0.0f; |
| 1872 | + float sigma_down = sigma_to; |
| 1873 | + if (eta > 0.0f) { |
| 1874 | + float sigma_from_sq = sigma_from * sigma_from; |
| 1875 | + float sigma_to_sq = sigma_to * sigma_to; |
| 1876 | + if (sigma_from_sq > 0.0f) { |
| 1877 | + float term = sigma_to_sq * (sigma_from_sq - sigma_to_sq) / sigma_from_sq; |
| 1878 | + if (term > 0.0f) { |
| 1879 | + sigma_up = eta * std::sqrt(term); |
| 1880 | + } |
| 1881 | + } |
| 1882 | + sigma_up = std::min(sigma_up, sigma_to); |
| 1883 | + float sigma_down_sq = sigma_to_sq - sigma_up * sigma_up; |
| 1884 | + sigma_down = sigma_down_sq > 0.0f ? std::sqrt(sigma_down_sq) : 0.0f; |
| 1885 | + } |
| 1886 | + |
| 1887 | + float* vec_x = (float*)x->data; |
| 1888 | + float* vec_x0 = (float*)x0->data; |
| 1889 | + for (int j = 0; j < ggml_nelements(x); j++) { |
| 1890 | + vec_x0[j] = vec_x[j]; |
| 1891 | + } |
| 1892 | + |
| 1893 | + if (sigma_down == 0.0f || sigma_from == 0.0f) { |
| 1894 | + float* vec_denoised = (float*)denoised->data; |
| 1895 | + for (int j = 0; j < ggml_nelements(x); j++) { |
| 1896 | + vec_x[j] = vec_denoised[j]; |
| 1897 | + } |
| 1898 | + } else { |
| 1899 | + float t = t_fn(sigma_from); |
| 1900 | + float t_next = t_fn(sigma_down); |
| 1901 | + float h = t_next - t; |
| 1902 | + |
| 1903 | + float a21 = c2 * phi1_fn(-h * c2); |
| 1904 | + float phi1_val = phi1_fn(-h); |
| 1905 | + float phi2_val = phi2_fn(-h); |
| 1906 | + float b2 = phi2_val / c2; |
| 1907 | + float b1 = phi1_val - b2; |
| 1908 | + |
| 1909 | + float sigma_c2 = expf(-(t + h * c2)); |
| 1910 | + |
| 1911 | + float* vec_denoised = (float*)denoised->data; |
| 1912 | + float* vec_x2 = (float*)x2->data; |
| 1913 | + for (int j = 0; j < ggml_nelements(x); j++) { |
| 1914 | + float eps1 = vec_denoised[j] - vec_x0[j]; |
| 1915 | + vec_x2[j] = vec_x0[j] + h * a21 * eps1; |
| 1916 | + } |
| 1917 | + |
| 1918 | + ggml_tensor* denoised2 = model(x2, sigma_c2, i + 1); |
| 1919 | + if (denoised2 == nullptr) { |
| 1920 | + return false; |
| 1921 | + } |
| 1922 | + float* vec_denoised2 = (float*)denoised2->data; |
| 1923 | + |
| 1924 | + for (int j = 0; j < ggml_nelements(x); j++) { |
| 1925 | + float eps1 = vec_denoised[j] - vec_x0[j]; |
| 1926 | + float eps2 = vec_denoised2[j] - vec_x0[j]; |
| 1927 | + vec_x[j] = vec_x0[j] + h * (b1 * eps1 + b2 * eps2); |
| 1928 | + } |
| 1929 | + } |
| 1930 | + |
| 1931 | + if (sigmas[i + 1] > 0 && sigma_up > 0.0f) { |
| 1932 | + ggml_ext_im_set_randn_f32(noise, rng); |
| 1933 | + float* vec_x = (float*)x->data; |
| 1934 | + float* vec_noise = (float*)noise->data; |
| 1935 | + |
| 1936 | + for (int j = 0; j < ggml_nelements(x); j++) { |
| 1937 | + vec_x[j] = vec_x[j] + vec_noise[j] * sigma_up; |
| 1938 | + } |
| 1939 | + } |
| 1940 | + } |
| 1941 | + } break; |
1637 | 1942 |
|
1638 | 1943 | default: |
1639 | 1944 | LOG_ERROR("Attempting to sample with nonexisting sample method %i", method); |
|
0 commit comments