gallium: enable GL_AMD_depth_clamp_separate on r600, radeonsi

[mesa.git] / src / gallium / auxiliary / util / u_math.h
diff --git a/src/gallium/auxiliary/util/u_math.h b/src/gallium/auxiliary/util/u_math.h

index 4e58e505572e6d9280d3993d4225c373b6ab7cbf..712305c98a43e9f1a02dbc5bd2569cce73a68948 100644 (file)
--- a/src/gallium/auxiliary/util/u_math.h
+++ b/src/gallium/auxiliary/util/u_math.h
@@ -46,14 +46,7 @@
  #include <float.h>
  #include <stdarg.h>
  
-#ifdef PIPE_OS_UNIX
-#include <strings.h> /* for ffs */
-#endif
-
-#if defined(_MSC_VER)
-#include <intrin.h>
-#endif
-
+#include "util/bitscan.h"
  
  #ifdef __cplusplus
  extern "C" {
@@ -186,14 +179,6 @@ util_fast_pow(float x, float y)
     return util_fast_exp2(util_fast_log2(x) * y);
  }
  
-/* Note that this counts zero as a power of two.
- */
-static inline boolean
-util_is_power_of_two( unsigned v )
-{
-   return (v & (v-1)) == 0;
-}
-
  
  /**
   * Floor(x), returned as int.
@@ -353,198 +338,6 @@ util_half_inf_sign(int16_t x)
  }
  
  
-/**
- * Find first bit set in word.  Least significant bit is 1.
- * Return 0 if no bits set.
- */
-#ifndef FFS_DEFINED
-#define FFS_DEFINED 1
-
-#if defined(_MSC_VER) && (_M_IX86 || _M_AMD64 || _M_IA64)
-static inline
-unsigned long ffs( unsigned long u )
-{
-   unsigned long i;
-   if (_BitScanForward(&i, u))
-      return i + 1;
-   else
-      return 0;
-}
-#elif defined(PIPE_CC_MSVC) && defined(PIPE_ARCH_X86)
-static inline
-unsigned ffs( unsigned u )
-{
-   unsigned i;
-
-   if (u == 0) {
-      return 0;
-   }
-
-   __asm bsf eax, [u]
-   __asm inc eax
-   __asm mov [i], eax
-
-   return i;
-}
-#elif defined(__MINGW32__) || defined(PIPE_OS_ANDROID) || \
-    defined(HAVE___BUILTIN_FFS)
-#define ffs __builtin_ffs
-#endif
-
-#ifdef HAVE___BUILTIN_FFSLL
-#define ffsll __builtin_ffsll
-#else
-static inline int
-ffsll(long long int val)
-{
-   int bit;
-
-   bit = ffs((unsigned) (val & 0xffffffff));
-   if (bit != 0)
-      return bit;
-
-   bit = ffs((unsigned) (val >> 32));
-   if (bit != 0)
-      return 32 + bit;
-
-   return 0;
-}
-#endif
-
-#endif /* FFS_DEFINED */
-
-/**
- * Find first bit set in long long.  Least significant bit is 1.
- * Return 0 if no bits set.
- */
-#ifndef FFSLL_DEFINED
-#define FFSLL_DEFINED 1
-
-#if defined(__MINGW32__) || defined(PIPE_OS_ANDROID) || \
-    defined(HAVE___BUILTIN_FFSLL)
-#define ffsll __builtin_ffsll
-#endif
-
-#endif /* FFSLL_DEFINED */
-
-/**
- * Find last bit set in a word.  The least significant bit is 1.
- * Return 0 if no bits are set.
- */
-static inline unsigned
-util_last_bit(unsigned u)
-{
-#if defined(HAVE___BUILTIN_CLZ)
-   return u == 0 ? 0 : 32 - __builtin_clz(u);
-#else
-   unsigned r = 0;
-   while (u) {
-       r++;
-       u >>= 1;
-   }
-   return r;
-#endif
-}
-
-/**
- * Find last bit set in a word.  The least significant bit is 1.
- * Return 0 if no bits are set.
- */
-static inline unsigned
-util_last_bit64(uint64_t u)
-{
-#if defined(HAVE___BUILTIN_CLZLL)
-   return u == 0 ? 0 : 64 - __builtin_clzll(u);
-#else
-   unsigned r = 0;
-   while (u) {
-       r++;
-       u >>= 1;
-   }
-   return r;
-#endif
-}
-
-/**
- * Find last bit in a word that does not match the sign bit. The least
- * significant bit is 1.
- * Return 0 if no bits are set.
- */
-static inline unsigned
-util_last_bit_signed(int i)
-{
-   if (i >= 0)
-      return util_last_bit(i);
-   else
-      return util_last_bit(~(unsigned)i);
-}
-
-/* Destructively loop over all of the bits in a mask as in:
- *
- * while (mymask) {
- *   int i = u_bit_scan(&mymask);
- *   ... process element i
- * }
- *
- */
-static inline int
-u_bit_scan(unsigned *mask)
-{
-   int i = ffs(*mask) - 1;
-   *mask &= ~(1u << i);
-   return i;
-}
-
-#ifndef _MSC_VER
-static inline int
-u_bit_scan64(uint64_t *mask)
-{
-   int i = ffsll(*mask) - 1;
-   *mask &= ~(1llu << i);
-   return i;
-}
-#endif
-
-/* For looping over a bitmask when you want to loop over consecutive bits
- * manually, for example:
- *
- * while (mask) {
- *    int start, count, i;
- *
- *    u_bit_scan_consecutive_range(&mask, &start, &count);
- *
- *    for (i = 0; i < count; i++)
- *       ... process element (start+i)
- * }
- */
-static inline void
-u_bit_scan_consecutive_range(unsigned *mask, int *start, int *count)
-{
-   if (*mask == 0xffffffff) {
-      *start = 0;
-      *count = 32;
-      *mask = 0;
-      return;
-   }
-   *start = ffs(*mask) - 1;
-   *count = ffs(~(*mask >> *start)) - 1;
-   *mask &= ~(((1u << *count) - 1) << *start);
-}
-
-static inline void
-u_bit_scan_consecutive_range64(uint64_t *mask, int *start, int *count)
-{
-   if (*mask == ~0llu) {
-      *start = 0;
-      *count = 64;
-      *mask = 0;
-      return;
-   }
-   *start = ffsll(*mask) - 1;
-   *count = ffsll(~(*mask >> *start)) - 1;
-   *mask &= ~(((1llu << *count) - 1) << *start);
-}
-
  /**
   * Return float bits.
   */
@@ -567,7 +360,6 @@ uif(uint32_t ui)
  
  /**
   * Convert ubyte to float in [0, 1].
- * XXX a 256-entry lookup table would be slightly faster.
   */
  static inline float
  ubyte_to_float(ubyte ub)
@@ -582,16 +374,16 @@ ubyte_to_float(ubyte ub)
  static inline ubyte
  float_to_ubyte(float f)
  {
-   union fi tmp;
-
-   tmp.f = f;
-   if (tmp.i < 0) {
+   /* return 0 for NaN too */
+   if (!(f > 0.0f)) {
        return (ubyte) 0;
     }
-   else if (tmp.i >= 0x3f800000 /* 1.0f */) {
+   else if (f >= 1.0f) {
        return (ubyte) 255;
     }
     else {
+      union fi tmp;
+      tmp.f = f;
        tmp.f = tmp.f * (255.0f/256.0f) + 32768.0f;
        return (ubyte) tmp.i;
     }
@@ -628,6 +420,44 @@ util_logbase2(unsigned n)
  #endif
  }
  
+static inline uint64_t
+util_logbase2_64(uint64_t n)
+{
+#if defined(HAVE___BUILTIN_CLZLL)
+   return ((sizeof(uint64_t) * 8 - 1) - __builtin_clzll(n | 1));
+#else
+   uint64_t pos = 0ull;
+   if (n >= 1ull<<32) { n >>= 32; pos += 32; }
+   if (n >= 1ull<<16) { n >>= 16; pos += 16; }
+   if (n >= 1ull<< 8) { n >>=  8; pos +=  8; }
+   if (n >= 1ull<< 4) { n >>=  4; pos +=  4; }
+   if (n >= 1ull<< 2) { n >>=  2; pos +=  2; }
+   if (n >= 1ull<< 1) {           pos +=  1; }
+   return pos;
+#endif
+}
+
+/**
+ * Returns the ceiling of log n base 2, and 0 when n == 0. Equivalently,
+ * returns the smallest x such that n <= 2**x.
+ */
+static inline unsigned
+util_logbase2_ceil(unsigned n)
+{
+   if (n <= 1)
+      return 0;
+
+   return 1 + util_logbase2(n - 1);
+}
+
+static inline uint64_t
+util_logbase2_ceil64(uint64_t n)
+{
+   if (n <= 1)
+      return 0;
+
+   return 1ull + util_logbase2_64(n - 1);
+}
  
  /**
   * Returns the smallest power of two >= x
@@ -646,7 +476,7 @@ util_next_power_of_two(unsigned x)
     if (x <= 1)
        return 1;
  
-   if (util_is_power_of_two(x))
+   if (util_is_power_of_two_or_zero(x))
        return x;
  
     val--;
@@ -660,6 +490,35 @@ util_next_power_of_two(unsigned x)
  #endif
  }
  
+static inline uint64_t
+util_next_power_of_two64(uint64_t x)
+{
+#if defined(HAVE___BUILTIN_CLZLL)
+   if (x <= 1)
+       return 1;
+
+   return (1ull << ((sizeof(uint64_t) * 8) - __builtin_clzll(x - 1)));
+#else
+   uint64_t val = x;
+
+   if (x <= 1)
+      return 1;
+
+   if (util_is_power_of_two_or_zero64(x))
+      return x;
+
+   val--;
+   val = (val >> 1)  | val;
+   val = (val >> 2)  | val;
+   val = (val >> 4)  | val;
+   val = (val >> 8)  | val;
+   val = (val >> 16) | val;
+   val = (val >> 32) | val;
+   val++;
+   return val;
+#endif
+}
+
  
  /**
   * Return number of bits set in n.
@@ -792,8 +651,9 @@ util_memcpy_cpu_to_le32(void * restrict dest, const void * restrict src, size_t
  /**
   * Clamp X to [MIN, MAX].
   * This is a macro to allow float, int, uint, etc. types.
+ * We arbitrarily turn NaN into MIN.
   */
-#define CLAMP( X, MIN, MAX )  ( (X)<(MIN) ? (MIN) : ((X)>(MAX) ? (MAX) : (X)) )
+#define CLAMP( X, MIN, MAX )  ( (X)>(MIN) ? ((X)>(MAX) ? (MAX) : (X)) : (MIN) )
  
  #define MIN2( A, B )   ( (A)<(B) ? (A) : (B) )
  #define MAX2( A, B )   ( (A)>(B) ? (A) : (B) )
@@ -817,7 +677,7 @@ align(int value, int alignment)
  static inline uint64_t
  align64(uint64_t value, unsigned alignment)
  {
-   return (value + alignment - 1) & ~(alignment - 1);
+   return (value + alignment - 1) & ~((uint64_t)alignment - 1);
  }
  
  /**