Skip to content

Commit 4d73bfb

Browse files
committed
fix: remove recursion in gamma function compiled to GLSL
1 parent 9651f39 commit 4d73bfb

3 files changed

Lines changed: 37 additions & 28 deletions

File tree

CHANGELOG.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,10 @@
11
### [Unreleased]
22

3+
- **Fix recursive GLSL gamma function**: The `_gpu_gamma()` preamble in the GPU
4+
and interval-GLSL compilation targets used recursion for the reflection formula
5+
(z < 0.5), which is illegal in GLSL. Replaced with a non-recursive
6+
implementation that inlines the Lanczos approximation for both branches.
7+
38
- **Non-strict parser supports exponents on bare functions**: In non-strict mode
49
(`strict: false`), bare function names like `sin`, `cos`, `tan` can now
510
include an exponent before the argument list. For example, `sin^2(x)` and

src/compute-engine/compilation/gpu-target.ts

Lines changed: 16 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -570,21 +570,23 @@ export function compileGPUMatrix(
570570
export const GPU_GAMMA_PREAMBLE = `
571571
float _gpu_gamma(float z) {
572572
const float PI = 3.14159265358979;
573-
if (z < 0.5) {
574-
return PI / (sin(PI * z) * _gpu_gamma(1.0 - z));
575-
}
576-
z -= 1.0;
573+
// For z < 0.5, use reflection formula with inlined Lanczos (non-recursive)
574+
float w = z;
575+
if (z < 0.5) w = 1.0 - z;
576+
w -= 1.0;
577577
float x = 0.99999999999980993;
578-
x += 676.5203681218851 / (z + 1.0);
579-
x += -1259.1392167224028 / (z + 2.0);
580-
x += 771.32342877765313 / (z + 3.0);
581-
x += -176.61502916214059 / (z + 4.0);
582-
x += 12.507343278686905 / (z + 5.0);
583-
x += -0.13857109526572012 / (z + 6.0);
584-
x += 9.9843695780195716e-6 / (z + 7.0);
585-
x += 1.5056327351493116e-7 / (z + 8.0);
586-
float t = z + 7.5;
587-
return sqrt(2.0 * PI) * pow(t, z + 0.5) * exp(-t) * x;
578+
x += 676.5203681218851 / (w + 1.0);
579+
x += -1259.1392167224028 / (w + 2.0);
580+
x += 771.32342877765313 / (w + 3.0);
581+
x += -176.61502916214059 / (w + 4.0);
582+
x += 12.507343278686905 / (w + 5.0);
583+
x += -0.13857109526572012 / (w + 6.0);
584+
x += 9.9843695780195716e-6 / (w + 7.0);
585+
x += 1.5056327351493116e-7 / (w + 8.0);
586+
float t = w + 7.5;
587+
float g = sqrt(2.0 * PI) * pow(t, w + 0.5) * exp(-t) * x;
588+
if (z < 0.5) return PI / (sin(PI * z) * g);
589+
return g;
588590
}
589591
590592
float _gpu_gammaln(float z) {

src/compute-engine/compilation/interval-glsl-target.ts

Lines changed: 16 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -990,21 +990,23 @@ IntervalResult ia_asech(IntervalResult x) {
990990
// Poles at non-positive integers; minimum at x ≈ 1.4616
991991
float _gpu_gamma(float z) {
992992
const float PI = 3.14159265358979;
993-
if (z < 0.5) {
994-
return PI / (sin(PI * z) * _gpu_gamma(1.0 - z));
995-
}
996-
z -= 1.0;
993+
// For z < 0.5, use reflection formula with inlined Lanczos (non-recursive)
994+
float w = z;
995+
if (z < 0.5) w = 1.0 - z;
996+
w -= 1.0;
997997
float x = 0.99999999999980993;
998-
x += 676.5203681218851 / (z + 1.0);
999-
x += -1259.1392167224028 / (z + 2.0);
1000-
x += 771.32342877765313 / (z + 3.0);
1001-
x += -176.61502916214059 / (z + 4.0);
1002-
x += 12.507343278686905 / (z + 5.0);
1003-
x += -0.13857109526572012 / (z + 6.0);
1004-
x += 9.9843695780195716e-6 / (z + 7.0);
1005-
x += 1.5056327351493116e-7 / (z + 8.0);
1006-
float t = z + 7.5;
1007-
return sqrt(2.0 * PI) * pow(t, z + 0.5) * exp(-t) * x;
998+
x += 676.5203681218851 / (w + 1.0);
999+
x += -1259.1392167224028 / (w + 2.0);
1000+
x += 771.32342877765313 / (w + 3.0);
1001+
x += -176.61502916214059 / (w + 4.0);
1002+
x += 12.507343278686905 / (w + 5.0);
1003+
x += -0.13857109526572012 / (w + 6.0);
1004+
x += 9.9843695780195716e-6 / (w + 7.0);
1005+
x += 1.5056327351493116e-7 / (w + 8.0);
1006+
float t = w + 7.5;
1007+
float g = sqrt(2.0 * PI) * pow(t, w + 0.5) * exp(-t) * x;
1008+
if (z < 0.5) return PI / (sin(PI * z) * g);
1009+
return g;
10081010
}
10091011
10101012
// Interval gamma function

0 commit comments

Comments
 (0)