X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Futil%2Ffast_idiv_by_const.h;h=638b52a3ffbb8428adc7066429ae87a363bf8dbd;hb=a4c708dd24e5ba8ac381973c14db8d23f4ac97bf;hp=1ba9f9a20b8279d022f809885a497f9df115067f;hpb=64eb0738d4e35e9ceb4bf99b028bdd5e12c59c34;p=mesa.git diff --git a/src/util/fast_idiv_by_const.h b/src/util/fast_idiv_by_const.h index 1ba9f9a20b8..638b52a3ffb 100644 --- a/src/util/fast_idiv_by_const.h +++ b/src/util/fast_idiv_by_const.h @@ -36,10 +36,6 @@ extern "C" { #endif -/* You can set these to different types to get different precision. */ -typedef int32_t sint_t; -typedef uint32_t uint_t; - /* Computes "magic info" for performing signed division by a fixed integer D. * The type 'sint_t' is assumed to be defined as a signed integer type large * enough to hold both the dividend and the divisor. @@ -68,19 +64,19 @@ typedef uint32_t uint_t; */ struct util_fast_sdiv_info { - sint_t multiplier; /* the "magic number" multiplier */ + int64_t multiplier; /* the "magic number" multiplier */ unsigned shift; /* shift for the dividend after multiplying */ }; struct util_fast_sdiv_info -util_compute_fast_sdiv_info(sint_t D); +util_compute_fast_sdiv_info(int64_t D, unsigned SINT_BITS); /* Computes "magic info" for performing unsigned division by a fixed positive - * integer D. The type 'uint_t' is assumed to be defined as an unsigned - * integer type large enough to hold both the dividend and the divisor. - * num_bits can be set appropriately if n is known to be smaller than - * the largest uint_t; if this is not known then pass - * "(sizeof(uint_t) * CHAR_BIT)" for num_bits. + * integer D. UINT_BITS is the bit size at which the final "magic" + * calculation will be performed; it is assumed to be large enough to hold + * both the dividand and the divisor. num_bits can be set appropriately if n + * is known to be smaller than calc_bits; if this is not known then UINT_BITS + * for num_bits. * * Assume we have a hardware register of width UINT_BITS, a known constant D * which is not zero and not a power of 2, and a variable n of width num_bits @@ -102,8 +98,8 @@ util_compute_fast_sdiv_info(sint_t D); * emit("result >>>= UINT_BITS") * if m.post_shift > 0: emit("result >>>= m.post_shift") * - * The shifts by UINT_BITS may be "free" if the high half of the full multiply - * is put in a separate register. + * This second version works even if D is 1. The shifts by UINT_BITS may be + * "free" if the high half of the full multiply is put in a separate register. * * saturated_increment(n) means "increment n unless it would wrap to 0," i.e. * if n == (1 << UINT_BITS)-1: result = n @@ -120,7 +116,7 @@ util_compute_fast_sdiv_info(sint_t D); */ struct util_fast_udiv_info { - uint_t multiplier; /* the "magic number" multiplier */ + uint64_t multiplier; /* the "magic number" multiplier */ unsigned pre_shift; /* shift for the dividend before multiplying */ unsigned post_shift; /* shift for the dividend after multiplying */ int increment; /* 0 or 1; if set then increment the numerator, using one of @@ -128,7 +124,7 @@ struct util_fast_udiv_info { }; struct util_fast_udiv_info -util_compute_fast_udiv_info(uint_t D, unsigned num_bits); +util_compute_fast_udiv_info(uint64_t D, unsigned num_bits, unsigned UINT_BITS); /* Below are possible options for dividing by a uniform in a shader where * the divisor is constant but not known at compile time. @@ -139,7 +135,13 @@ static inline uint32_t util_fast_udiv32(uint32_t n, struct util_fast_udiv_info info) { n = n >> info.pre_shift; - /* For non-power-of-two divisors, use a 32-bit ADD that clamps to UINT_MAX. */ + /* If the divisor is not 1, you can instead use a 32-bit ADD that clamps + * to UINT_MAX. Dividing by 1 needs the full 64-bit ADD. + * + * If you have unsigned 64-bit MAD with 32-bit inputs, you can do: + * increment = increment ? multiplier : 0; // on the CPU + * (n * multiplier + increment) // on the GPU using unsigned 64-bit MAD + */ n = (((uint64_t)n + info.increment) * info.multiplier) >> 32; n = n >> info.post_shift; return n;