diff --git a/pcg_basic.c b/pcg_basic.c index 8c2fd0d..9181c54 100644 --- a/pcg_basic.c +++ b/pcg_basic.c @@ -78,22 +78,6 @@ uint32_t pcg32_random() uint32_t pcg32_boundedrand_r(pcg32_random_t* rng, uint32_t bound) { - // To avoid bias, we need to make the range of the RNG a multiple of - // bound, which we do by dropping output less than a threshold. - // A naive scheme to calculate the threshold would be to do - // - // uint32_t threshold = 0x100000000ull % bound; - // - // but 64-bit div/mod is slower than 32-bit div/mod (especially on - // 32-bit platforms). In essence, we do - // - // uint32_t threshold = (0x100000000ull-bound) % bound; - // - // because this version will calculate the same modulus, but the LHS - // value is less than 2^32. - - uint32_t threshold = -bound % bound; - // Uniformity guarantees that this loop will terminate. In practice, it // should usually terminate quickly; on average (assuming all bounds are // equally likely), 82.25% of the time, we can expect it to require just @@ -103,8 +87,15 @@ uint32_t pcg32_boundedrand_r(pcg32_random_t* rng, uint32_t bound) // is eliminated. for (;;) { uint32_t r = pcg32_random_r(rng); - if (r >= threshold) - return r % bound; + + // Start of bound-multiple segment which holds r + uint32_t rint = r / bound * bound; + + // Check if the end of that segment do not overflow, + // i.e. rint + bound <= 2^32 or + // rint <= 2^32 - bound = (uint32_t)(-bound) + if (rint <= (uint32_t)(-bound)) + return r - rint; } }