util: document a limitation of util_fast_udiv32

author Marek Olšák <marek.olsak@amd.com>

Sun, 23 Sep 2018 00:03:27 +0000 (20:03 -0400)

committer Marek Olšák <marek.olsak@amd.com>

Wed, 17 Oct 2018 16:27:58 +0000 (12:27 -0400)
author Marek Olšák <marek.olsak@amd.com>
Sun, 23 Sep 2018 00:03:27 +0000 (20:03 -0400)
committer Marek Olšák <marek.olsak@amd.com>
Wed, 17 Oct 2018 16:27:58 +0000 (12:27 -0400)
diff --git a/src/util/fast_idiv_by_const.h b/src/util/fast_idiv_by_const.h

index 92a3ccdf2226a14e81e65f45b07a85fda666ec39..638b52a3ffbb8428adc7066429ae87a363bf8dbd 100644 (file)
--- a/src/util/fast_idiv_by_const.h
+++ b/src/util/fast_idiv_by_const.h
@@ -135,7 +135,13 @@ static inline uint32_t
  util_fast_udiv32(uint32_t n, struct util_fast_udiv_info info)
  {
     n = n >> info.pre_shift;
-   /* For non-power-of-two divisors, use a 32-bit ADD that clamps to UINT_MAX. */
+   /* If the divisor is not 1, you can instead use a 32-bit ADD that clamps
+    * to UINT_MAX. Dividing by 1 needs the full 64-bit ADD.
+    *
+    * If you have unsigned 64-bit MAD with 32-bit inputs, you can do:
+    *    increment = increment ? multiplier : 0; // on the CPU
+    *    (n * multiplier + increment) // on the GPU using unsigned 64-bit MAD
+    */
     n = (((uint64_t)n + info.increment) * info.multiplier) >> 32;
     n = n >> info.post_shift;
     return n;
author	Marek Olšák <marek.olsak@amd.com>
	Sun, 23 Sep 2018 00:03:27 +0000 (20:03 -0400)
committer	Marek Olšák <marek.olsak@amd.com>
	Wed, 17 Oct 2018 16:27:58 +0000 (12:27 -0400)