X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fauxiliary%2Futil%2Fu_math.h;h=2ab5f03a6628b79bc4801ad4b1f8d26c1a035aa7;hb=70c272004f727457e852ba5f2498754b07a7d995;hp=b76592d1ec61d2d99569d86a1d3c5c89231add20;hpb=eaa3a025da18a0b7f25460f86bb4afcd08017dfe;p=mesa.git diff --git a/src/gallium/auxiliary/util/u_math.h b/src/gallium/auxiliary/util/u_math.h index b76592d1ec6..2ab5f03a662 100644 --- a/src/gallium/auxiliary/util/u_math.h +++ b/src/gallium/auxiliary/util/u_math.h @@ -1,6 +1,6 @@ /************************************************************************** * - * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * Copyright 2008 VMware, Inc. * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a @@ -18,7 +18,7 @@ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. @@ -40,148 +40,23 @@ #include "pipe/p_compiler.h" -#include "util/u_debug.h" +#include "c99_math.h" +#include +#include +#include + +#include "util/bitscan.h" #ifdef __cplusplus extern "C" { #endif -#if defined(PIPE_SUBSYSTEM_WINDOWS_MINIPORT) -__inline double ceil(double val) -{ - double ceil_val; - - if ((val - (long) val) == 0) { - ceil_val = val; - } - else { - if (val > 0) { - ceil_val = (long) val + 1; - } - else { - ceil_val = (long) val; - } - } - - return ceil_val; -} - -#ifndef PIPE_SUBSYSTEM_WINDOWS_CE_OGL -__inline double floor(double val) -{ - double floor_val; - - if ((val - (long) val) == 0) { - floor_val = val; - } - else { - if (val > 0) { - floor_val = (long) val; - } - else { - floor_val = (long) val - 1; - } - } - - return floor_val; -} +#ifndef M_SQRT2 +#define M_SQRT2 1.41421356237309504880 #endif -#pragma function(pow) -__inline double __cdecl pow(double val, double exponent) -{ - /* XXX */ - assert(0); - return 0; -} - -#pragma function(log) -__inline double __cdecl log(double val) -{ - /* XXX */ - assert(0); - return 0; -} - -#pragma function(atan2) -__inline double __cdecl atan2(double val) -{ - /* XXX */ - assert(0); - return 0; -} -#else -#include -#include -#endif - - -#if defined(_MSC_VER) - -#if _MSC_VER < 1400 && !defined(__cplusplus) || defined(PIPE_SUBSYSTEM_WINDOWS_CE) - -static INLINE float cosf( float f ) -{ - return (float) cos( (double) f ); -} - -static INLINE float sinf( float f ) -{ - return (float) sin( (double) f ); -} - -static INLINE float ceilf( float f ) -{ - return (float) ceil( (double) f ); -} - -static INLINE float floorf( float f ) -{ - return (float) floor( (double) f ); -} - -static INLINE float powf( float f, float g ) -{ - return (float) pow( (double) f, (double) g ); -} - -static INLINE float sqrtf( float f ) -{ - return (float) sqrt( (double) f ); -} - -static INLINE float fabsf( float f ) -{ - return (float) fabs( (double) f ); -} - -static INLINE float logf( float f ) -{ - return (float) log( (double) f ); -} - -#else -/* Work-around an extra semi-colon in VS 2005 logf definition */ -#ifdef logf -#undef logf -#define logf(x) ((float)log((double)(x))) -#endif /* logf */ -#endif - -static INLINE double log2( double x ) -{ - const double invln2 = 1.442695041; - return log( x ) * invln2; -} - -#endif /* _MSC_VER */ - - - - - #define POW2_TABLE_SIZE_LOG2 9 #define POW2_TABLE_SIZE (1 << POW2_TABLE_SIZE_LOG2) #define POW2_TABLE_OFFSET (POW2_TABLE_SIZE/2) @@ -204,6 +79,27 @@ union fi { }; +union di { + double d; + int64_t i; + uint64_t ui; +}; + + +/** + * Extract the IEEE float32 exponent. + */ +static inline signed +util_get_float32_exponent(float x) +{ + union fi f; + + f.f = x; + + return ((f.ui >> 23) & 0xff) - 127; +} + + /** * Fast version of 2^x * Identity: exp2(a + b) = exp2(a) * exp2(b) @@ -213,7 +109,7 @@ union fi { * Compute exp2(ipart) with i << ipart * Compute exp2(fpart) with lookup table. */ -static INLINE float +static inline float util_fast_exp2(float x) { int32_t ipart; @@ -244,7 +140,7 @@ util_fast_exp2(float x) /** * Fast approximation to exp(x). */ -static INLINE float +static inline float util_fast_exp(float x) { const float k = 1.44269f; /* = log2(e) */ @@ -261,7 +157,7 @@ extern float log2_table[LOG2_TABLE_SIZE]; /** * Fast approximation to log2(x). */ -static INLINE float +static inline float util_fast_log2(float x) { union fi num; @@ -277,7 +173,7 @@ util_fast_log2(float x) /** * Fast approximation to x^y. */ -static INLINE float +static inline float util_fast_pow(float x, float y) { return util_fast_exp2(util_fast_log2(x) * y); @@ -285,7 +181,7 @@ util_fast_pow(float x, float y) /* Note that this counts zero as a power of two. */ -static INLINE boolean +static inline boolean util_is_power_of_two( unsigned v ) { return (v & (v-1)) == 0; @@ -295,7 +191,7 @@ util_is_power_of_two( unsigned v ) /** * Floor(x), returned as int. */ -static INLINE int +static inline int util_ifloor(float f) { int ai, bi; @@ -312,7 +208,7 @@ util_ifloor(float f) /** * Round float to nearest int. */ -static INLINE int +static inline int util_iround(float f) { #if defined(PIPE_CC_GCC) && defined(PIPE_ARCH_X86) @@ -335,86 +231,125 @@ util_iround(float f) } +/** + * Approximate floating point comparison + */ +static inline boolean +util_is_approx(float a, float b, float tol) +{ + return fabsf(b - a) <= tol; +} + /** - * Test if x is NaN or +/- infinity. + * util_is_X_inf_or_nan = test if x is NaN or +/- Inf + * util_is_X_nan = test if x is NaN + * util_X_inf_sign = return +1 for +Inf, -1 for -Inf, or 0 for not Inf + * + * NaN can be checked with x != x, however this fails with the fast math flag + **/ + + +/** + * Single-float */ -static INLINE boolean +static inline boolean util_is_inf_or_nan(float x) { union fi tmp; tmp.f = x; - return !(int)((unsigned int)((tmp.i & 0x7fffffff)-0x7f800000) >> 31); + return (tmp.ui & 0x7f800000) == 0x7f800000; } -/** - * Test whether x is a power of two. - */ -static INLINE boolean -util_is_pot(unsigned x) +static inline boolean +util_is_nan(float x) { - return (x & (x - 1)) == 0; + union fi tmp; + tmp.f = x; + return (tmp.ui & 0x7fffffff) > 0x7f800000; +} + + +static inline int +util_inf_sign(float x) +{ + union fi tmp; + tmp.f = x; + if ((tmp.ui & 0x7fffffff) != 0x7f800000) { + return 0; + } + + return (x < 0) ? -1 : 1; } /** - * Find first bit set in word. Least significant bit is 1. - * Return 0 if no bits set. + * Double-float */ -#if defined(_MSC_VER) && _MSC_VER >= 1300 && (_M_IX86 || _M_AMD64 || _M_IA64) -unsigned char _BitScanForward(unsigned long* Index, unsigned long Mask); -#pragma intrinsic(_BitScanForward) -static INLINE -unsigned long ffs( unsigned long u ) -{ - unsigned long i; - if (_BitScanForward(&i, u)) - return i + 1; - else - return 0; +static inline boolean +util_is_double_inf_or_nan(double x) +{ + union di tmp; + tmp.d = x; + return (tmp.ui & 0x7ff0000000000000ULL) == 0x7ff0000000000000ULL; } -#elif defined(PIPE_CC_MSVC) && defined(PIPE_ARCH_X86) -static INLINE -unsigned ffs( unsigned u ) + + +static inline boolean +util_is_double_nan(double x) { - unsigned i; + union di tmp; + tmp.d = x; + return (tmp.ui & 0x7fffffffffffffffULL) > 0x7ff0000000000000ULL; +} + - if (u == 0) { +static inline int +util_double_inf_sign(double x) +{ + union di tmp; + tmp.d = x; + if ((tmp.ui & 0x7fffffffffffffffULL) != 0x7ff0000000000000ULL) { return 0; } - __asm bsf eax, [u] - __asm inc eax - __asm mov [i], eax + return (x < 0) ? -1 : 1; +} + - return i; +/** + * Half-float + */ +static inline boolean +util_is_half_inf_or_nan(int16_t x) +{ + return (x & 0x7c00) == 0x7c00; } -#elif defined(__MINGW32__) -#define ffs __builtin_ffs -#endif -#ifdef __MINGW32__ -#define ffs __builtin_ffs -#endif + +static inline boolean +util_is_half_nan(int16_t x) +{ + return (x & 0x7fff) > 0x7c00; +} -/* Could also binary search for the highest bit. - */ -static INLINE unsigned -util_unsigned_logbase2(unsigned n) +static inline int +util_half_inf_sign(int16_t x) { - unsigned log2 = 0; - while (n >>= 1) - ++log2; - return log2; + if ((x & 0x7fff) != 0x7c00) { + return 0; + } + + return (x < 0) ? -1 : 1; } /** * Return float bits. */ -static INLINE unsigned +static inline unsigned fui( float f ) { union fi fi; @@ -422,12 +357,20 @@ fui( float f ) return fi.ui; } +static inline float +uif(uint32_t ui) +{ + union fi fi; + fi.ui = ui; + return fi.f; +} + /** * Convert ubyte to float in [0, 1]. * XXX a 256-entry lookup table would be slightly faster. */ -static INLINE float +static inline float ubyte_to_float(ubyte ub) { return (float) ub * (1.0f / 255.0f); @@ -437,17 +380,16 @@ ubyte_to_float(ubyte ub) /** * Convert float in [0,1] to ubyte in [0,255] with clamping. */ -static INLINE ubyte +static inline ubyte float_to_ubyte(float f) { - const int ieee_0996 = 0x3f7f0000; /* 0.996 or so */ union fi tmp; tmp.f = f; if (tmp.i < 0) { return (ubyte) 0; } - else if (tmp.i >= ieee_0996) { + else if (tmp.i >= 0x3f800000 /* 1.0f */) { return (ubyte) 255; } else { @@ -456,47 +398,89 @@ float_to_ubyte(float f) } } +static inline float +byte_to_float_tex(int8_t b) +{ + return (b == -128) ? -1.0F : b * 1.0F / 127.0F; +} + +static inline int8_t +float_to_byte_tex(float f) +{ + return (int8_t) (127.0F * f); +} /** * Calc log base 2 */ -static INLINE unsigned +static inline unsigned util_logbase2(unsigned n) { - unsigned log2 = 0; - while (n >>= 1) - ++log2; - return log2; +#if defined(HAVE___BUILTIN_CLZ) + return ((sizeof(unsigned) * 8 - 1) - __builtin_clz(n | 1)); +#else + unsigned pos = 0; + if (n >= 1<<16) { n >>= 16; pos += 16; } + if (n >= 1<< 8) { n >>= 8; pos += 8; } + if (n >= 1<< 4) { n >>= 4; pos += 4; } + if (n >= 1<< 2) { n >>= 2; pos += 2; } + if (n >= 1<< 1) { pos += 1; } + return pos; +#endif } +/** + * Returns the ceiling of log n base 2, and 0 when n == 0. Equivalently, + * returns the smallest x such that n <= 2**x. + */ +static inline unsigned +util_logbase2_ceil(unsigned n) +{ + if (n <= 1) + return 0; + + return 1 + util_logbase2(n - 1); +} /** * Returns the smallest power of two >= x */ -static INLINE unsigned +static inline unsigned util_next_power_of_two(unsigned x) { - unsigned i; +#if defined(HAVE___BUILTIN_CLZ) + if (x <= 1) + return 1; - if (x == 0) - return 1; - - --x; + return (1 << ((sizeof(unsigned) * 8) - __builtin_clz(x - 1))); +#else + unsigned val = x; - for (i = 1; i < sizeof(unsigned) * 8; i <<= 1) - x |= x >> i; + if (x <= 1) + return 1; - return x + 1; + if (util_is_power_of_two(x)) + return x; + + val--; + val = (val >> 1) | val; + val = (val >> 2) | val; + val = (val >> 4) | val; + val = (val >> 8) | val; + val = (val >> 16) | val; + val++; + return val; +#endif } /** * Return number of bits set in n. */ -static INLINE unsigned +static inline unsigned util_bitcount(unsigned n) { -#if defined(PIPE_CC_GCC) +#if defined(HAVE___BUILTIN_POPCOUNT) return __builtin_popcount(n); #else /* K&R classic bitcount. @@ -505,8 +489,8 @@ util_bitcount(unsigned n) * Requires only one iteration per set bit, instead of * one iteration per bit less than highest set bit. */ - unsigned bits = 0; - for (bits; n; bits++) { + unsigned bits; + for (bits = 0; n; bits++) { n &= n - 1; } return bits; @@ -514,13 +498,58 @@ util_bitcount(unsigned n) } +static inline unsigned +util_bitcount64(uint64_t n) +{ +#ifdef HAVE___BUILTIN_POPCOUNTLL + return __builtin_popcountll(n); +#else + return util_bitcount(n) + util_bitcount(n >> 32); +#endif +} + + +/** + * Reverse bits in n + * Algorithm taken from: + * http://stackoverflow.com/questions/9144800/c-reverse-bits-in-unsigned-integer + */ +static inline unsigned +util_bitreverse(unsigned n) +{ + n = ((n >> 1) & 0x55555555u) | ((n & 0x55555555u) << 1); + n = ((n >> 2) & 0x33333333u) | ((n & 0x33333333u) << 2); + n = ((n >> 4) & 0x0f0f0f0fu) | ((n & 0x0f0f0f0fu) << 4); + n = ((n >> 8) & 0x00ff00ffu) | ((n & 0x00ff00ffu) << 8); + n = ((n >> 16) & 0xffffu) | ((n & 0xffffu) << 16); + return n; +} + +/** + * Convert from little endian to CPU byte order. + */ + +#ifdef PIPE_ARCH_BIG_ENDIAN +#define util_le64_to_cpu(x) util_bswap64(x) +#define util_le32_to_cpu(x) util_bswap32(x) +#define util_le16_to_cpu(x) util_bswap16(x) +#else +#define util_le64_to_cpu(x) (x) +#define util_le32_to_cpu(x) (x) +#define util_le16_to_cpu(x) (x) +#endif + +#define util_cpu_to_le64(x) util_le64_to_cpu(x) +#define util_cpu_to_le32(x) util_le32_to_cpu(x) +#define util_cpu_to_le16(x) util_le16_to_cpu(x) + /** * Reverse byte order of a 32 bit word. */ -static INLINE uint32_t +static inline uint32_t util_bswap32(uint32_t n) { -#if defined(PIPE_CC_GCC) && (PIPE_CC_GCC_VERSION >= 403) +#if defined(HAVE___BUILTIN_BSWAP32) return __builtin_bswap32(n); #else return (n >> 24) | @@ -530,6 +559,48 @@ util_bswap32(uint32_t n) #endif } +/** + * Reverse byte order of a 64bit word. + */ +static inline uint64_t +util_bswap64(uint64_t n) +{ +#if defined(HAVE___BUILTIN_BSWAP64) + return __builtin_bswap64(n); +#else + return ((uint64_t)util_bswap32((uint32_t)n) << 32) | + util_bswap32((n >> 32)); +#endif +} + + +/** + * Reverse byte order of a 16 bit word. + */ +static inline uint16_t +util_bswap16(uint16_t n) +{ + return (n >> 8) | + (n << 8); +} + +static inline void* +util_memcpy_cpu_to_le32(void * restrict dest, const void * restrict src, size_t n) +{ +#ifdef PIPE_ARCH_BIG_ENDIAN + size_t i, e; + assert(n % 4 == 0); + + for (i = 0, e = n / 4; i < e; i++) { + uint32_t * restrict d = (uint32_t* restrict)dest; + const uint32_t * restrict s = (const uint32_t* restrict)src; + d[i] = util_bswap32(s[i]); + } + return dest; +#else + return memcpy(dest, src, n); +#endif +} /** * Clamp X to [MIN, MAX]. @@ -540,17 +611,40 @@ util_bswap32(uint32_t n) #define MIN2( A, B ) ( (A)<(B) ? (A) : (B) ) #define MAX2( A, B ) ( (A)>(B) ? (A) : (B) ) -#define MIN3( A, B, C ) MIN2( MIN2( A, B ), C ) -#define MAX3( A, B, C ) MAX2( MAX2( A, B ), C ) +#define MIN3( A, B, C ) ((A) < (B) ? MIN2(A, C) : MIN2(B, C)) +#define MAX3( A, B, C ) ((A) > (B) ? MAX2(A, C) : MAX2(B, C)) + +#define MIN4( A, B, C, D ) ((A) < (B) ? MIN3(A, C, D) : MIN3(B, C, D)) +#define MAX4( A, B, C, D ) ((A) > (B) ? MAX3(A, C, D) : MAX3(B, C, D)) -static INLINE int +/** + * Align a value, only works pot alignemnts. + */ +static inline int align(int value, int alignment) { return (value + alignment - 1) & ~(alignment - 1); } -static INLINE unsigned +static inline uint64_t +align64(uint64_t value, unsigned alignment) +{ + return (value + alignment - 1) & ~((uint64_t)alignment - 1); +} + +/** + * Works like align but on npot alignments. + */ +static inline size_t +util_align_npot(size_t value, size_t alignment) +{ + if (value % alignment) + return value + (alignment - (value % alignment)); + return value; +} + +static inline unsigned u_minify(unsigned value, unsigned levels) { return MAX2(1, value >> levels); @@ -583,6 +677,27 @@ do { \ #endif +static inline uint32_t +util_unsigned_fixed(float value, unsigned frac_bits) +{ + return value < 0 ? 0 : (uint32_t)(value * (1<