X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Futil%2Fbitscan.h;h=895a1e7a3721a2f90a22babbcd60eb9fabe1b034;hb=39006590517de05709d32378d6024de8041035d0;hp=611e812059696c10c2d82bb171c57343cff1dd71;hpb=aaee0d1bbf60de17c95c0e2bc5a905753e8deef6;p=mesa.git diff --git a/src/util/bitscan.h b/src/util/bitscan.h index 611e8120596..895a1e7a372 100644 --- a/src/util/bitscan.h +++ b/src/util/bitscan.h @@ -31,12 +31,17 @@ #include #include +#include #include #if defined(_MSC_VER) #include #endif +#if defined(__POPCNT__) +#include +#endif + #include "c99_compat.h" #ifdef __cplusplus @@ -67,7 +72,7 @@ int ffs(int i); #ifdef HAVE___BUILTIN_FFSLL #define ffsll __builtin_ffsll -#elif defined(_MSC_VER) && (_M_AMD64 || _M_ARM || _M_IA64) +#elif defined(_MSC_VER) && (_M_AMD64 || _M_ARM64 || _M_IA64) static inline int ffsll(long long int i) { @@ -107,6 +112,52 @@ u_bit_scan64(uint64_t *mask) return i; } +/* Determine if an unsigned value is a power of two. + * + * \note + * Zero is treated as a power of two. + */ +static inline bool +util_is_power_of_two_or_zero(unsigned v) +{ + return (v & (v - 1)) == 0; +} + +/* Determine if an uint64_t value is a power of two. + * + * \note + * Zero is treated as a power of two. + */ +static inline bool +util_is_power_of_two_or_zero64(uint64_t v) +{ + return (v & (v - 1)) == 0; +} + +/* Determine if an unsigned value is a power of two. + * + * \note + * Zero is \b not treated as a power of two. + */ +static inline bool +util_is_power_of_two_nonzero(unsigned v) +{ + /* __POPCNT__ is different from HAVE___BUILTIN_POPCOUNT. The latter + * indicates the existence of the __builtin_popcount function. The former + * indicates that _mm_popcnt_u32 exists and is a native instruction. + * + * The other alternative is to use SSE 4.2 compile-time flags. This has + * two drawbacks. First, there is currently no build infrastructure for + * SSE 4.2 (only 4.1), so that would have to be added. Second, some AMD + * CPUs support POPCNT but not SSE 4.2 (e.g., Barcelona). + */ +#ifdef __POPCNT__ + return _mm_popcnt_u32(v) == 1; +#else + return v != 0 && (v & (v - 1)) == 0; +#endif +} + /* For looping over a bitmask when you want to loop over consecutive bits * manually, for example: * @@ -184,7 +235,7 @@ util_last_bit64(uint64_t u) { #if defined(HAVE___BUILTIN_CLZLL) return u == 0 ? 0 : 64 - __builtin_clzll(u); -#elif defined(_MSC_VER) && (_M_AMD64 || _M_ARM || _M_IA64) +#elif defined(_MSC_VER) && (_M_AMD64 || _M_ARM64 || _M_IA64) unsigned long index; if (_BitScanReverse64(&index, u)) return index + 1; @@ -235,6 +286,38 @@ u_bit_consecutive64(unsigned start, unsigned count) return (((uint64_t)1 << count) - 1) << start; } +/** + * Return number of bits set in n. + */ +static inline unsigned +util_bitcount(unsigned n) +{ +#if defined(HAVE___BUILTIN_POPCOUNT) + return __builtin_popcount(n); +#else + /* K&R classic bitcount. + * + * For each iteration, clear the LSB from the bitfield. + * Requires only one iteration per set bit, instead of + * one iteration per bit less than highest set bit. + */ + unsigned bits; + for (bits = 0; n; bits++) { + n &= n - 1; + } + return bits; +#endif +} + +static inline unsigned +util_bitcount64(uint64_t n) +{ +#ifdef HAVE___BUILTIN_POPCOUNTLL + return __builtin_popcountll(n); +#else + return util_bitcount(n) + util_bitcount(n >> 32); +#endif +} #ifdef __cplusplus }