util: Add power-of-two divisor support to compute_fast_udiv_info

author Marek Olšák <marek.olsak@amd.com>

Sat, 6 Oct 2018 01:42:16 +0000 (20:42 -0500)

committer Jason Ekstrand <jason.ekstrand@intel.com>

Wed, 10 Oct 2018 18:13:12 +0000 (13:13 -0500)
author Marek Olšák <marek.olsak@amd.com>
Sat, 6 Oct 2018 01:42:16 +0000 (20:42 -0500)
committer Jason Ekstrand <jason.ekstrand@intel.com>
Wed, 10 Oct 2018 18:13:12 +0000 (13:13 -0500)
diff --git a/src/util/fast_idiv_by_const.c b/src/util/fast_idiv_by_const.c

index 65a9e640789dfe14de0616da1323a4eb476384ce..7b93316268c2a3e00827bf1829d3966aa6a4d39b 100644 (file)
--- a/src/util/fast_idiv_by_const.c
+++ b/src/util/fast_idiv_by_const.c
@@ -52,6 +52,27 @@ util_compute_fast_udiv_info(uint64_t D, unsigned num_bits, unsigned UINT_BITS)
     /* The eventual result */
     struct util_fast_udiv_info result;
  
+   if (util_is_power_of_two_or_zero64(D)) {
+      unsigned div_shift = util_logbase2_64(D);
+
+      if (div_shift) {
+         /* Dividing by a power of two. */
+         result.multiplier = 1ull << (UINT_BITS - div_shift);
+         result.pre_shift = 0;
+         result.post_shift = 0;
+         result.increment = 0;
+         return result;
+      } else {
+         /* Dividing by 1. */
+         /* Assuming: floor((num + 1) * (2^32 - 1) / 2^32) = num */
+         result.multiplier = UINT_BITS == 64 ? UINT64_MAX :
+                                               (1ull << UINT_BITS) - 1;
+         result.pre_shift = 0;
+         result.post_shift = 0;
+         result.increment = 1;
+         return result;
+      }
+   }
  
     /* The extra shift implicit in the difference between UINT_BITS and num_bits
      */
diff --git a/src/util/fast_idiv_by_const.h b/src/util/fast_idiv_by_const.h

index 231311f84be2d3d361364389c10075d123c7b3a5..92a3ccdf2226a14e81e65f45b07a85fda666ec39 100644 (file)
--- a/src/util/fast_idiv_by_const.h
+++ b/src/util/fast_idiv_by_const.h
@@ -98,8 +98,8 @@ util_compute_fast_sdiv_info(int64_t D, unsigned SINT_BITS);
   *   emit("result >>>= UINT_BITS")
   *   if m.post_shift > 0: emit("result >>>= m.post_shift")
   *
- * The shifts by UINT_BITS may be "free" if the high half of the full multiply
- * is put in a separate register.
+ * This second version works even if D is 1.  The shifts by UINT_BITS may be
+ * "free" if the high half of the full multiply is put in a separate register.
   *
   * saturated_increment(n) means "increment n unless it would wrap to 0," i.e.
   *   if n == (1 << UINT_BITS)-1: result = n
author	Marek Olšák <marek.olsak@amd.com>
	Sat, 6 Oct 2018 01:42:16 +0000 (20:42 -0500)
committer	Jason Ekstrand <jason.ekstrand@intel.com>
	Wed, 10 Oct 2018 18:13:12 +0000 (13:13 -0500)
src/util/fast_idiv_by_const.c		patch \| blob \| history
src/util/fast_idiv_by_const.h		patch \| blob \| history