v3d/tex: only look up the 2nd texture gather offset for 1d non-arrays

[mesa.git] / src / util / u_math.h
diff --git a/src/util/u_math.h b/src/util/u_math.h

index 4c3aafe80ab9e4d366da346d1784d8c440d67a76..59266c16922824e25c4ca5c0c5b65af9a6e4b80a 100644 (file)
--- a/src/util/u_math.h
+++ b/src/util/u_math.h
@@ -45,7 +45,7 @@
  #include <stdarg.h>
  
  #include "bitscan.h"
-#include "u_endian.h" /* for PIPE_ARCH_BIG_ENDIAN */
+#include "u_endian.h" /* for UTIL_ARCH_BIG_ENDIAN */
  
  #ifdef __cplusplus
  extern "C" {
@@ -185,6 +185,23 @@ util_fast_pow(float x, float y)
  static inline int
  util_ifloor(float f)
  {
+#if defined(USE_X86_ASM) && defined(__GNUC__) && defined(__i386__)
+   /*
+    * IEEE floor for computers that round to nearest or even.
+    * 'f' must be between -4194304 and 4194303.
+    * This floor operation is done by "(iround(f + .5) + iround(f - .5)) >> 1",
+    * but uses some IEEE specific tricks for better speed.
+    * Contributed by Josh Vanderhoof
+    */
+   int ai, bi;
+   double af, bf;
+   af = (3 << 22) + 0.5 + (double)f;
+   bf = (3 << 22) + 0.5 - (double)f;
+   /* GCC generates an extra fstp/fld without this. */
+   __asm__ ("fstps %0" : "=m" (ai) : "t" (af) : "st");
+   __asm__ ("fstps %0" : "=m" (bi) : "t" (bf) : "st");
+   return (ai - bi) >> 1;
+#else
     int ai, bi;
     double af, bf;
     union fi u;
@@ -193,6 +210,7 @@ util_ifloor(float f)
     u.f = (float) af;  ai = u.i;
     u.f = (float) bf;  bi = u.i;
     return (ai - bi) >> 1;
+#endif
  }
  
  
@@ -569,7 +587,7 @@ util_bitreverse(unsigned n)
   * Convert from little endian to CPU byte order.
   */
  
-#if PIPE_ARCH_BIG_ENDIAN
+#if UTIL_ARCH_BIG_ENDIAN
  #define util_le64_to_cpu(x) util_bswap64(x)
  #define util_le32_to_cpu(x) util_bswap32(x)
  #define util_le16_to_cpu(x) util_bswap16(x)
@@ -627,7 +645,7 @@ util_bswap16(uint16_t n)
  static inline void*
  util_memcpy_cpu_to_le32(void * restrict dest, const void * restrict src, size_t n)
  {
-#if PIPE_ARCH_BIG_ENDIAN
+#if UTIL_ARCH_BIG_ENDIAN
     size_t i, e;
     assert(n % 4 == 0);
  
@@ -659,6 +677,52 @@ util_memcpy_cpu_to_le32(void * restrict dest, const void * restrict src, size_t
  #define MAX4( A, B, C, D ) ((A) > (B) ? MAX3(A, C, D) : MAX3(B, C, D))
  
  
+/**
+ * Align a value up to an alignment value
+ *
+ * If \c value is not already aligned to the requested alignment value, it
+ * will be rounded up.
+ *
+ * \param value  Value to be rounded
+ * \param alignment  Alignment value to be used.  This must be a power of two.
+ *
+ * \sa ROUND_DOWN_TO()
+ */
+static inline uintptr_t
+ALIGN(uintptr_t value, int32_t alignment)
+{
+   assert(util_is_power_of_two_nonzero(alignment));
+   return (((value) + (alignment) - 1) & ~((alignment) - 1));
+}
+
+/**
+ * Like ALIGN(), but works with a non-power-of-two alignment.
+ */
+static inline uintptr_t
+ALIGN_NPOT(uintptr_t value, int32_t alignment)
+{
+   assert(alignment > 0);
+   return (value + alignment - 1) / alignment * alignment;
+}
+
+/**
+ * Align a value down to an alignment value
+ *
+ * If \c value is not already aligned to the requested alignment value, it
+ * will be rounded down.
+ *
+ * \param value  Value to be rounded
+ * \param alignment  Alignment value to be used.  This must be a power of two.
+ *
+ * \sa ALIGN()
+ */
+static inline uintptr_t
+ROUND_DOWN_TO(uintptr_t value, int32_t alignment)
+{
+   assert(util_is_power_of_two_nonzero(alignment));
+   return ((value) & ~(alignment - 1));
+}
+
  /**
   * Align a value, only works pot alignemnts.
   */
@@ -737,7 +801,25 @@ util_fpstate_set_denorms_to_zero(unsigned current_fpstate);
  void
  util_fpstate_set(unsigned fpstate);
  
-
+/**
+ * For indexed draw calls, return true if the vertex count to be drawn is
+ * much lower than the vertex count that has to be uploaded, meaning
+ * that the driver should flatten indices instead of trying to upload
+ * a too big range.
+ *
+ * This is used by vertex upload code in u_vbuf and glthread.
+ */
+static inline bool
+util_is_vbo_upload_ratio_too_large(unsigned draw_vertex_count,
+                                   unsigned upload_vertex_count)
+{
+   if (draw_vertex_count > 1024)
+      return upload_vertex_count > draw_vertex_count * 4;
+   else if (draw_vertex_count > 32)
+      return upload_vertex_count > draw_vertex_count * 8;
+   else
+      return upload_vertex_count > draw_vertex_count * 16;
+}
  
  #ifdef __cplusplus
  }