X-Git-Url: https://git.libre-soc.org/?p=mesa.git;a=blobdiff_plain;f=src%2Futil%2Fu_math.h;h=59266c16922824e25c4ca5c0c5b65af9a6e4b80a;hp=e7dbbe5ca220cbdaea7d8b4d255abdd194b3cea1;hb=38e68db778439c34ebb876273fcf9139764abc80;hpb=80825abb5d1a7491035880253ffd531c55acae6b

diff --git a/src/util/u_math.h b/src/util/u_math.h
index e7dbbe5ca22..59266c16922 100644
--- a/src/util/u_math.h
+++ b/src/util/u_math.h
@@ -39,14 +39,13 @@
 #define U_MATH_H
 
 
-#include "pipe/p_compiler.h"
-
 #include "c99_math.h"
 #include <assert.h>
 #include <float.h>
 #include <stdarg.h>
 
 #include "bitscan.h"
+#include "u_endian.h" /* for UTIL_ARCH_BIG_ENDIAN */
 
 #ifdef __cplusplus
 extern "C" {
@@ -186,6 +185,23 @@ util_fast_pow(float x, float y)
 static inline int
 util_ifloor(float f)
 {
+#if defined(USE_X86_ASM) && defined(__GNUC__) && defined(__i386__)
+   /*
+    * IEEE floor for computers that round to nearest or even.
+    * 'f' must be between -4194304 and 4194303.
+    * This floor operation is done by "(iround(f + .5) + iround(f - .5)) >> 1",
+    * but uses some IEEE specific tricks for better speed.
+    * Contributed by Josh Vanderhoof
+    */
+   int ai, bi;
+   double af, bf;
+   af = (3 << 22) + 0.5 + (double)f;
+   bf = (3 << 22) + 0.5 - (double)f;
+   /* GCC generates an extra fstp/fld without this. */
+   __asm__ ("fstps %0" : "=m" (ai) : "t" (af) : "st");
+   __asm__ ("fstps %0" : "=m" (bi) : "t" (bf) : "st");
+   return (ai - bi) >> 1;
+#else
    int ai, bi;
    double af, bf;
    union fi u;
@@ -194,6 +210,7 @@ util_ifloor(float f)
    u.f = (float) af;  ai = u.i;
    u.f = (float) bf;  bi = u.i;
    return (ai - bi) >> 1;
+#endif
 }
 
 
@@ -226,7 +243,7 @@ util_iround(float f)
 /**
  * Approximate floating point comparison
  */
-static inline boolean
+static inline bool
 util_is_approx(float a, float b, float tol)
 {
    return fabsf(b - a) <= tol;
@@ -245,7 +262,7 @@ util_is_approx(float a, float b, float tol)
 /**
  * Single-float
  */
-static inline boolean
+static inline bool
 util_is_inf_or_nan(float x)
 {
    union fi tmp;
@@ -254,7 +271,7 @@ util_is_inf_or_nan(float x)
 }
 
 
-static inline boolean
+static inline bool
 util_is_nan(float x)
 {
    union fi tmp;
@@ -279,7 +296,7 @@ util_inf_sign(float x)
 /**
  * Double-float
  */
-static inline boolean
+static inline bool
 util_is_double_inf_or_nan(double x)
 {
    union di tmp;
@@ -288,7 +305,7 @@ util_is_double_inf_or_nan(double x)
 }
 
 
-static inline boolean
+static inline bool
 util_is_double_nan(double x)
 {
    union di tmp;
@@ -313,14 +330,14 @@ util_double_inf_sign(double x)
 /**
  * Half-float
  */
-static inline boolean
+static inline bool
 util_is_half_inf_or_nan(int16_t x)
 {
    return (x & 0x7c00) == 0x7c00;
 }
 
 
-static inline boolean
+static inline bool
 util_is_half_nan(int16_t x)
 {
    return (x & 0x7fff) > 0x7c00;
@@ -359,33 +376,64 @@ uif(uint32_t ui)
 
 
 /**
- * Convert ubyte to float in [0, 1].
+ * Convert uint8_t to float in [0, 1].
  */
 static inline float
-ubyte_to_float(ubyte ub)
+ubyte_to_float(uint8_t ub)
 {
    return (float) ub * (1.0f / 255.0f);
 }
 
 
 /**
- * Convert float in [0,1] to ubyte in [0,255] with clamping.
+ * Convert float in [0,1] to uint8_t in [0,255] with clamping.
  */
-static inline ubyte
+static inline uint8_t
 float_to_ubyte(float f)
 {
    /* return 0 for NaN too */
    if (!(f > 0.0f)) {
-      return (ubyte) 0;
+      return (uint8_t) 0;
    }
    else if (f >= 1.0f) {
-      return (ubyte) 255;
+      return (uint8_t) 255;
    }
    else {
       union fi tmp;
       tmp.f = f;
       tmp.f = tmp.f * (255.0f/256.0f) + 32768.0f;
-      return (ubyte) tmp.i;
+      return (uint8_t) tmp.i;
+   }
+}
+
+/**
+ * Convert uint16_t to float in [0, 1].
+ */
+static inline float
+ushort_to_float(uint16_t us)
+{
+   return (float) us * (1.0f / 65535.0f);
+}
+
+
+/**
+ * Convert float in [0,1] to uint16_t in [0,65535] with clamping.
+ */
+static inline uint16_t
+float_to_ushort(float f)
+{
+   /* return 0 for NaN too */
+   if (!(f > 0.0f)) {
+      return (uint16_t) 0;
+   }
+   else if (f >= 1.0f) {
+      return (uint16_t) 65535;
+   }
+   else {
+      union fi tmp;
+      tmp.f = f;
+      tmp.f = tmp.f * (65535.0f/65536.0f) + 128.0f;
+      return (uint16_t) tmp.i;
    }
 }
 
@@ -519,42 +567,6 @@ util_next_power_of_two64(uint64_t x)
 #endif
 }
 
-
-/**
- * Return number of bits set in n.
- */
-static inline unsigned
-util_bitcount(unsigned n)
-{
-#if defined(HAVE___BUILTIN_POPCOUNT)
-   return __builtin_popcount(n);
-#else
-   /* K&R classic bitcount.
-    *
-    * For each iteration, clear the LSB from the bitfield.
-    * Requires only one iteration per set bit, instead of
-    * one iteration per bit less than highest set bit.
-    */
-   unsigned bits;
-   for (bits = 0; n; bits++) {
-      n &= n - 1;
-   }
-   return bits;
-#endif
-}
-
-
-static inline unsigned
-util_bitcount64(uint64_t n)
-{
-#ifdef HAVE___BUILTIN_POPCOUNTLL
-   return __builtin_popcountll(n);
-#else
-   return util_bitcount(n) + util_bitcount(n >> 32);
-#endif
-}
-
-
 /**
  * Reverse bits in n
  * Algorithm taken from:
@@ -575,7 +587,7 @@ util_bitreverse(unsigned n)
  * Convert from little endian to CPU byte order.
  */
 
-#ifdef PIPE_ARCH_BIG_ENDIAN
+#if UTIL_ARCH_BIG_ENDIAN
 #define util_le64_to_cpu(x) util_bswap64(x)
 #define util_le32_to_cpu(x) util_bswap32(x)
 #define util_le16_to_cpu(x) util_bswap16(x)
@@ -633,7 +645,7 @@ util_bswap16(uint16_t n)
 static inline void*
 util_memcpy_cpu_to_le32(void * restrict dest, const void * restrict src, size_t n)
 {
-#ifdef PIPE_ARCH_BIG_ENDIAN
+#if UTIL_ARCH_BIG_ENDIAN
    size_t i, e;
    assert(n % 4 == 0);
 
@@ -665,6 +677,52 @@ util_memcpy_cpu_to_le32(void * restrict dest, const void * restrict src, size_t
 #define MAX4( A, B, C, D ) ((A) > (B) ? MAX3(A, C, D) : MAX3(B, C, D))
 
 
+/**
+ * Align a value up to an alignment value
+ *
+ * If \c value is not already aligned to the requested alignment value, it
+ * will be rounded up.
+ *
+ * \param value  Value to be rounded
+ * \param alignment  Alignment value to be used.  This must be a power of two.
+ *
+ * \sa ROUND_DOWN_TO()
+ */
+static inline uintptr_t
+ALIGN(uintptr_t value, int32_t alignment)
+{
+   assert(util_is_power_of_two_nonzero(alignment));
+   return (((value) + (alignment) - 1) & ~((alignment) - 1));
+}
+
+/**
+ * Like ALIGN(), but works with a non-power-of-two alignment.
+ */
+static inline uintptr_t
+ALIGN_NPOT(uintptr_t value, int32_t alignment)
+{
+   assert(alignment > 0);
+   return (value + alignment - 1) / alignment * alignment;
+}
+
+/**
+ * Align a value down to an alignment value
+ *
+ * If \c value is not already aligned to the requested alignment value, it
+ * will be rounded down.
+ *
+ * \param value  Value to be rounded
+ * \param alignment  Alignment value to be used.  This must be a power of two.
+ *
+ * \sa ALIGN()
+ */
+static inline uintptr_t
+ROUND_DOWN_TO(uintptr_t value, int32_t alignment)
+{
+   assert(util_is_power_of_two_nonzero(alignment));
+   return ((value) & ~(alignment - 1));
+}
+
 /**
  * Align a value, only works pot alignemnts.
  */
@@ -743,7 +801,25 @@ util_fpstate_set_denorms_to_zero(unsigned current_fpstate);
 void
 util_fpstate_set(unsigned fpstate);
 
-
+/**
+ * For indexed draw calls, return true if the vertex count to be drawn is
+ * much lower than the vertex count that has to be uploaded, meaning
+ * that the driver should flatten indices instead of trying to upload
+ * a too big range.
+ *
+ * This is used by vertex upload code in u_vbuf and glthread.
+ */
+static inline bool
+util_is_vbo_upload_ratio_too_large(unsigned draw_vertex_count,
+                                   unsigned upload_vertex_count)
+{
+   if (draw_vertex_count > 1024)
+      return upload_vertex_count > draw_vertex_count * 4;
+   else if (draw_vertex_count > 32)
+      return upload_vertex_count > draw_vertex_count * 8;
+   else
+      return upload_vertex_count > draw_vertex_count * 16;
+}
 
 #ifdef __cplusplus
 }