X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Futil%2Ffast_idiv_by_const.h;h=638b52a3ffbb8428adc7066429ae87a363bf8dbd;hb=35938c15e22e3021f7693425f0d2134845c81f6b;hp=92a3ccdf2226a14e81e65f45b07a85fda666ec39;hpb=a9be8dddfedb1d19e43b900bdfd33731d3c390c4;p=mesa.git diff --git a/src/util/fast_idiv_by_const.h b/src/util/fast_idiv_by_const.h index 92a3ccdf222..638b52a3ffb 100644 --- a/src/util/fast_idiv_by_const.h +++ b/src/util/fast_idiv_by_const.h @@ -135,7 +135,13 @@ static inline uint32_t util_fast_udiv32(uint32_t n, struct util_fast_udiv_info info) { n = n >> info.pre_shift; - /* For non-power-of-two divisors, use a 32-bit ADD that clamps to UINT_MAX. */ + /* If the divisor is not 1, you can instead use a 32-bit ADD that clamps + * to UINT_MAX. Dividing by 1 needs the full 64-bit ADD. + * + * If you have unsigned 64-bit MAD with 32-bit inputs, you can do: + * increment = increment ? multiplier : 0; // on the CPU + * (n * multiplier + increment) // on the GPU using unsigned 64-bit MAD + */ n = (((uint64_t)n + info.increment) * info.multiplier) >> 32; n = n >> info.post_shift; return n;