2 * Copyright 2017 Jacob Lifshay
4 * Permission is hereby granted, free of charge, to any person obtaining a copy
5 * of this software and associated documentation files (the "Software"), to deal
6 * in the Software without restriction, including without limitation the rights
7 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 * copies of the Software, and to permit persons to whom the Software is
9 * furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in all
12 * copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24 #ifndef UTIL_BIT_INTRINSICS_H_
25 #define UTIL_BIT_INTRINSICS_H_
30 #if defined(__clang__)
31 #if defined(__apple_build_version__)
32 #if __clang_major__ > 5 || (__clang_major__ == 5 && __clang_minor__ >= 1)
33 #define VULKAN_CPU_UTIL_CONSTEXPR_BUILTIN_CLZ_CTZ_POPCOUNT_SUPPORTED 1
36 #if __clang_major__ > 3 || (__clang_major__ == 3 && __clang_minor__ >= 4)
37 #define VULKAN_CPU_UTIL_CONSTEXPR_BUILTIN_CLZ_CTZ_POPCOUNT_SUPPORTED 1
40 #elif defined(__INTEL_COMPILER)
41 #warning figure out icc version numbers for constexpr __builtin_clz and __builtin_ctz
42 #elif defined(__GNUC__)
43 // gcc supports constexpr __builtin_clz and __builtin_ctz before it supports c++14
44 #define VULKAN_CPU_UTIL_CONSTEXPR_BUILTIN_CLZ_CTZ_POPCOUNT_SUPPORTED 1
48 #ifdef VULKAN_CPU_UTIL_CONSTEXPR_BUILTIN_CLZ_CTZ_POPCOUNT_SUPPORTED
49 #undef VULKAN_CPU_UTIL_CONSTEXPR_BUILTIN_CLZ_CTZ_POPCOUNT_SUPPORTED
57 constexpr std::uint32_t clz4(std::uint8_t v
) noexcept
59 #ifdef VULKAN_CPU_UTIL_CONSTEXPR_BUILTIN_CLZ_CTZ_POPCOUNT_SUPPORTED
60 return v
== 0 ? 4 : __builtin_clz(v
) - __builtin_clz(0x8U
);
62 typedef const std::uint_fast8_t LookupTableType
[0x10];
63 return LookupTableType
65 4, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0
71 constexpr std::uint32_t clz8(std::uint8_t v
) noexcept
73 #ifdef VULKAN_CPU_UTIL_CONSTEXPR_BUILTIN_CLZ_CTZ_POPCOUNT_SUPPORTED
74 return v
== 0 ? 8 : __builtin_clz(v
) - __builtin_clz(0x80U
);
76 return ((v
& 0xF0) == 0) ? 4 + clz4(v
) : clz4(v
>> 4);
80 constexpr std::uint32_t clz16(std::uint16_t v
) noexcept
82 #ifdef VULKAN_CPU_UTIL_CONSTEXPR_BUILTIN_CLZ_CTZ_POPCOUNT_SUPPORTED
83 return v
== 0 ? 16 : __builtin_clz(v
) - (std::numeric_limits
<int>::digits
- 16);
85 return ((v
& 0xFF00U
) == 0) ? 8 + clz8(v
) : clz8(v
>> 8);
89 constexpr std::uint32_t clz32(std::uint32_t v
) noexcept
91 #ifdef VULKAN_CPU_UTIL_CONSTEXPR_BUILTIN_CLZ_CTZ_POPCOUNT_SUPPORTED
92 return v
== 0 ? 32 : __builtin_clzl(v
) - (std::numeric_limits
<long>::digits
- 32);
94 return ((v
& 0xFFFF0000UL
) == 0) ? 16 + clz16(v
) : clz16(v
>> 16);
98 constexpr std::uint32_t clz64(std::uint64_t v
) noexcept
100 #ifdef VULKAN_CPU_UTIL_CONSTEXPR_BUILTIN_CLZ_CTZ_POPCOUNT_SUPPORTED
101 return v
== 0 ? 64 : __builtin_clzll(v
) - (std::numeric_limits
<long long>::digits
- 64);
103 return ((v
& 0xFFFFFFFF00000000ULL
) == 0) ? 32 + clz32(v
) : clz32(v
>> 32);
107 constexpr std::uint32_t ctz4(std::uint8_t v
) noexcept
109 #ifdef VULKAN_CPU_UTIL_CONSTEXPR_BUILTIN_CLZ_CTZ_POPCOUNT_SUPPORTED
110 return v
== 0 ? 4 : __builtin_ctz(v
);
112 typedef const std::uint_fast8_t LookupTableType
[0x10];
113 return LookupTableType
115 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
121 constexpr std::uint32_t ctz8(std::uint8_t v
) noexcept
123 #ifdef VULKAN_CPU_UTIL_CONSTEXPR_BUILTIN_CLZ_CTZ_POPCOUNT_SUPPORTED
124 return v
== 0 ? 8 : __builtin_ctz(v
);
126 return ((v
& 0xF0) == 0) ? ctz4(v
) : 4 + ctz4(v
>> 4);
130 constexpr std::uint32_t ctz16(std::uint16_t v
) noexcept
132 #ifdef VULKAN_CPU_UTIL_CONSTEXPR_BUILTIN_CLZ_CTZ_POPCOUNT_SUPPORTED
133 return v
== 0 ? 16 : __builtin_ctz(v
);
135 return ((v
& 0xFF00U
) == 0) ? ctz8(v
) : 8 + ctz8(v
>> 8);
139 constexpr std::uint32_t ctz32(std::uint32_t v
) noexcept
141 #ifdef VULKAN_CPU_UTIL_CONSTEXPR_BUILTIN_CLZ_CTZ_POPCOUNT_SUPPORTED
142 return v
== 0 ? 32 : __builtin_ctzl(v
);
144 return ((v
& 0xFFFF0000UL
) == 0) ? ctz16(v
) : 16 + ctz16(v
>> 16);
148 constexpr std::uint32_t ctz64(std::uint64_t v
) noexcept
150 #ifdef VULKAN_CPU_UTIL_CONSTEXPR_BUILTIN_CLZ_CTZ_POPCOUNT_SUPPORTED
151 return v
== 0 ? 64 : __builtin_ctzll(v
);
153 return ((v
& 0xFFFFFFFF00000000ULL
) == 0) ? ctz32(v
) : 32 + ctz32(v
>> 32);
157 constexpr std::uint32_t popcount8(std::uint8_t v
) noexcept
159 #ifdef VULKAN_CPU_UTIL_CONSTEXPR_BUILTIN_CLZ_CTZ_POPCOUNT_SUPPORTED
160 return __builtin_popcount(v
);
162 constexpr std::uint8_t lookup_table
[0x10] = {
163 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
165 return lookup_table
[v
& 0xF] + lookup_table
[v
>> 4];
169 constexpr std::uint32_t popcount32(std::uint32_t v
) noexcept
171 #ifdef VULKAN_CPU_UTIL_CONSTEXPR_BUILTIN_CLZ_CTZ_POPCOUNT_SUPPORTED
172 return __builtin_popcountl(v
);
174 constexpr std::uint32_t m1
= 0x5555'5555UL
;
175 constexpr std::uint32_t m2
= 0x3333'3333UL
;
176 constexpr std::uint32_t m4
= 0x0F0F'0F0FUL
;
178 v
= (v
& m2
) + ((v
>> 2) & m2
);
179 v
= (v
& m4
) + ((v
>> 4) & m4
);
180 return static_cast<std::uint32_t>(v
* 0x0101'0101UL
) >> 24;
184 constexpr std::uint32_t popcount16(std::uint16_t v
) noexcept
186 return popcount32(v
);
189 constexpr std::uint32_t popcount64(std::uint64_t v
) noexcept
191 #ifdef VULKAN_CPU_UTIL_CONSTEXPR_BUILTIN_CLZ_CTZ_POPCOUNT_SUPPORTED
192 return __builtin_popcountll(v
);
194 constexpr std::uint64_t m1
= 0x5555'5555'5555'5555ULL
;
195 constexpr std::uint64_t m2
= 0x3333'3333'3333'3333ULL
;
196 constexpr std::uint64_t m4
= 0x0F0F'0F0F'0F0F'0F0FULL
;
198 v
= (v
& m2
) + ((v
>> 2) & m2
);
199 v
= (v
& m4
) + ((v
>> 4) & m4
);
200 return static_cast<std::uint64_t>(v
* 0x0101'0101'0101'0101ULL
) >> 56;
206 #endif /* UTIL_BIT_INTRINSICS_H_ */