util: Expose rgba unpack/fetch functions as external functions as well.

[mesa.git] / src / util / fast_idiv_by_const.h
diff --git a/src/util/fast_idiv_by_const.h b/src/util/fast_idiv_by_const.h

index ac10cf79ba8abd9b5b486cd637e691bcec453bbf..638b52a3ffbb8428adc7066429ae87a363bf8dbd 100644 (file)
--- a/src/util/fast_idiv_by_const.h
+++ b/src/util/fast_idiv_by_const.h
@@ -36,10 +36,6 @@
  extern "C" {
  #endif
  
-/* You can set these to different types to get different precision. */
-typedef int32_t sint_t;
-typedef uint32_t uint_t;
-
  /* Computes "magic info" for performing signed division by a fixed integer D.
   * The type 'sint_t' is assumed to be defined as a signed integer type large
   * enough to hold both the dividend and the divisor.
@@ -68,19 +64,19 @@ typedef uint32_t uint_t;
   */
  
  struct util_fast_sdiv_info {
-   sint_t multiplier; /* the "magic number" multiplier */
+   int64_t multiplier; /* the "magic number" multiplier */
     unsigned shift; /* shift for the dividend after multiplying */
  };
  
  struct util_fast_sdiv_info
-util_compute_fast_sdiv_info(sint_t D);
+util_compute_fast_sdiv_info(int64_t D, unsigned SINT_BITS);
  
  /* Computes "magic info" for performing unsigned division by a fixed positive
- * integer D. The type 'uint_t' is assumed to be defined as an unsigned
- * integer type large enough to hold both the dividend and the divisor.
- * num_bits can be set appropriately if n is known to be smaller than
- * the largest uint_t; if this is not known then pass
- * "(sizeof(uint_t) * CHAR_BIT)" for num_bits.
+ * integer D.  UINT_BITS is the bit size at which the final "magic"
+ * calculation will be performed; it is assumed to be large enough to hold
+ * both the dividand and the divisor.  num_bits can be set appropriately if n
+ * is known to be smaller than calc_bits; if this is not known then UINT_BITS
+ * for num_bits.
   *
   * Assume we have a hardware register of width UINT_BITS, a known constant D
   * which is not zero and not a power of 2, and a variable n of width num_bits
@@ -102,8 +98,8 @@ util_compute_fast_sdiv_info(sint_t D);
   *   emit("result >>>= UINT_BITS")
   *   if m.post_shift > 0: emit("result >>>= m.post_shift")
   *
- * The shifts by UINT_BITS may be "free" if the high half of the full multiply
- * is put in a separate register.
+ * This second version works even if D is 1.  The shifts by UINT_BITS may be
+ * "free" if the high half of the full multiply is put in a separate register.
   *
   * saturated_increment(n) means "increment n unless it would wrap to 0," i.e.
   *   if n == (1 << UINT_BITS)-1: result = n
@@ -120,7 +116,7 @@ util_compute_fast_sdiv_info(sint_t D);
   */
  
  struct util_fast_udiv_info {
-   uint_t multiplier; /* the "magic number" multiplier */
+   uint64_t multiplier; /* the "magic number" multiplier */
     unsigned pre_shift; /* shift for the dividend before multiplying */
     unsigned post_shift; /* shift for the dividend after multiplying */
     int increment; /* 0 or 1; if set then increment the numerator, using one of
@@ -128,7 +124,57 @@ struct util_fast_udiv_info {
  };
  
  struct util_fast_udiv_info
-util_compute_fast_udiv_info(uint_t D, unsigned num_bits);
+util_compute_fast_udiv_info(uint64_t D, unsigned num_bits, unsigned UINT_BITS);
+
+/* Below are possible options for dividing by a uniform in a shader where
+ * the divisor is constant but not known at compile time.
+ */
+
+/* Full version. */
+static inline uint32_t
+util_fast_udiv32(uint32_t n, struct util_fast_udiv_info info)
+{
+   n = n >> info.pre_shift;
+   /* If the divisor is not 1, you can instead use a 32-bit ADD that clamps
+    * to UINT_MAX. Dividing by 1 needs the full 64-bit ADD.
+    *
+    * If you have unsigned 64-bit MAD with 32-bit inputs, you can do:
+    *    increment = increment ? multiplier : 0; // on the CPU
+    *    (n * multiplier + increment) // on the GPU using unsigned 64-bit MAD
+    */
+   n = (((uint64_t)n + info.increment) * info.multiplier) >> 32;
+   n = n >> info.post_shift;
+   return n;
+}
+
+/* A little more efficient version if n != UINT_MAX, i.e. no unsigned
+ * wraparound in the computation.
+ */
+static inline uint32_t
+util_fast_udiv32_nuw(uint32_t n, struct util_fast_udiv_info info)
+{
+   assert(n != UINT32_MAX);
+   n = n >> info.pre_shift;
+   n = n + info.increment;
+   n = ((uint64_t)n * info.multiplier) >> 32;
+   n = n >> info.post_shift;
+   return n;
+}
+
+/* Even faster version but both operands must be 31-bit unsigned integers
+ * and the divisor must be greater than 1.
+ *
+ * info must be computed with num_bits == 31.
+ */
+static inline uint32_t
+util_fast_udiv32_u31_d_not_one(uint32_t n, struct util_fast_udiv_info info)
+{
+   assert(info.pre_shift == 0);
+   assert(info.increment == 0);
+   n = ((uint64_t)n * info.multiplier) >> 32;
+   n = n >> info.post_shift;
+   return n;
+}
  
  #ifdef __cplusplus
  } /* extern C */