a8c58d9d4efa011cee0e8bb2ba6a9996ab34f786
1 /****************************************************************************
2 * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * @brief Utilities used by SWR core.
27 ******************************************************************************/
31 #include <type_traits>
33 #include "common/os.h"
34 #include "common/intrin.h"
35 #include "common/swr_assert.h"
46 #if ENABLE_AVX512_SIMD16
57 // helper function to unroll loops
58 template<int Begin
, int End
, int Step
= 1>
60 template<typename Lambda
>
61 INLINE
static void step(Lambda
& func
) {
63 UnrollerL
<Begin
+ Step
, End
, Step
>::step(func
);
67 template<int End
, int Step
>
68 struct UnrollerL
<End
, End
, Step
> {
69 template<typename Lambda
>
70 static void step(Lambda
& func
) {
74 // helper function to unroll loops, with mask to skip specific iterations
75 template<int Begin
, int End
, int Step
= 1, int Mask
= 0x7f>
76 struct UnrollerLMask
{
77 template<typename Lambda
>
78 INLINE
static void step(Lambda
& func
) {
79 if(Mask
& (1 << Begin
))
83 UnrollerL
<Begin
+ Step
, End
, Step
>::step(func
);
87 template<int End
, int Step
, int Mask
>
88 struct UnrollerLMask
<End
, End
, Step
, Mask
> {
89 template<typename Lambda
>
90 static void step(Lambda
& func
) {
94 // general CRC compute
96 uint32_t ComputeCRC(uint32_t crc
, const void *pData
, uint32_t size
)
98 #if defined(_WIN64) || defined(__x86_64__)
99 uint32_t sizeInQwords
= size
/ sizeof(uint64_t);
100 uint32_t sizeRemainderBytes
= size
% sizeof(uint64_t);
101 uint64_t* pDataWords
= (uint64_t*)pData
;
102 for (uint32_t i
= 0; i
< sizeInQwords
; ++i
)
104 crc
= (uint32_t)_mm_crc32_u64(crc
, *pDataWords
++);
107 uint32_t sizeInDwords
= size
/ sizeof(uint32_t);
108 uint32_t sizeRemainderBytes
= size
% sizeof(uint32_t);
109 uint32_t* pDataWords
= (uint32_t*)pData
;
110 for (uint32_t i
= 0; i
< sizeInDwords
; ++i
)
112 crc
= _mm_crc32_u32(crc
, *pDataWords
++);
116 uint8_t* pRemainderBytes
= (uint8_t*)pDataWords
;
117 for (uint32_t i
= 0; i
< sizeRemainderBytes
; ++i
)
119 crc
= _mm_crc32_u8(crc
, *pRemainderBytes
++);
125 //////////////////////////////////////////////////////////////////////////
126 /// Add byte offset to any-type pointer
127 //////////////////////////////////////////////////////////////////////////
128 template <typename T
>
130 static T
* PtrAdd(T
* p
, intptr_t offset
)
132 intptr_t intp
= reinterpret_cast<intptr_t>(p
);
133 return reinterpret_cast<T
*>(intp
+ offset
);
136 //////////////////////////////////////////////////////////////////////////
138 //////////////////////////////////////////////////////////////////////////
139 template <typename T
>
141 static bool IsPow2(T value
)
143 return value
== (value
& (T(0) - value
));
146 //////////////////////////////////////////////////////////////////////////
147 /// Align down to specified alignment
148 /// Note: IsPow2(alignment) MUST be true
149 //////////////////////////////////////////////////////////////////////////
150 template <typename T1
, typename T2
>
152 static T1
AlignDownPow2(T1 value
, T2 alignment
)
154 SWR_ASSERT(IsPow2(alignment
));
155 return value
& ~T1(alignment
- 1);
158 //////////////////////////////////////////////////////////////////////////
159 /// Align up to specified alignment
160 /// Note: IsPow2(alignment) MUST be true
161 //////////////////////////////////////////////////////////////////////////
162 template <typename T1
, typename T2
>
164 static T1
AlignUpPow2(T1 value
, T2 alignment
)
166 return AlignDownPow2(value
+ T1(alignment
- 1), alignment
);
169 //////////////////////////////////////////////////////////////////////////
170 /// Align up ptr to specified alignment
171 /// Note: IsPow2(alignment) MUST be true
172 //////////////////////////////////////////////////////////////////////////
173 template <typename T1
, typename T2
>
175 static T1
* AlignUpPow2(T1
* value
, T2 alignment
)
177 return reinterpret_cast<T1
*>(
178 AlignDownPow2(reinterpret_cast<uintptr_t>(value
) + uintptr_t(alignment
- 1), alignment
));
181 //////////////////////////////////////////////////////////////////////////
182 /// Align down to specified alignment
183 //////////////////////////////////////////////////////////////////////////
184 template <typename T1
, typename T2
>
186 static T1
AlignDown(T1 value
, T2 alignment
)
188 if (IsPow2(alignment
)) { return AlignDownPow2(value
, alignment
); }
189 return value
- T1(value
% alignment
);
192 //////////////////////////////////////////////////////////////////////////
193 /// Align down to specified alignment
194 //////////////////////////////////////////////////////////////////////////
195 template <typename T1
, typename T2
>
197 static T1
* AlignDown(T1
* value
, T2 alignment
)
199 return (T1
*)AlignDown(uintptr_t(value
), alignment
);
202 //////////////////////////////////////////////////////////////////////////
203 /// Align up to specified alignment
204 /// Note: IsPow2(alignment) MUST be true
205 //////////////////////////////////////////////////////////////////////////
206 template <typename T1
, typename T2
>
208 static T1
AlignUp(T1 value
, T2 alignment
)
210 return AlignDown(value
+ T1(alignment
- 1), alignment
);
213 //////////////////////////////////////////////////////////////////////////
214 /// Align up to specified alignment
215 /// Note: IsPow2(alignment) MUST be true
216 //////////////////////////////////////////////////////////////////////////
217 template <typename T1
, typename T2
>
219 static T1
* AlignUp(T1
* value
, T2 alignment
)
221 return AlignDown(PtrAdd(value
, alignment
- 1), alignment
);
224 //////////////////////////////////////////////////////////////////////////
225 /// Helper structure used to access an array of elements that don't
226 /// correspond to a typical word size.
227 //////////////////////////////////////////////////////////////////////////
228 template<typename T
, size_t BitsPerElementT
, size_t ArrayLenT
>
232 static const size_t BITS_PER_WORD
= sizeof(size_t) * 8;
233 static const size_t ELEMENTS_PER_WORD
= BITS_PER_WORD
/ BitsPerElementT
;
234 static const size_t NUM_WORDS
= (ArrayLenT
+ ELEMENTS_PER_WORD
- 1) / ELEMENTS_PER_WORD
;
235 static const size_t ELEMENT_MASK
= (size_t(1) << BitsPerElementT
) - 1;
237 static_assert(ELEMENTS_PER_WORD
* BitsPerElementT
== BITS_PER_WORD
,
238 "Element size must an integral fraction of pointer size");
240 size_t m_words
[NUM_WORDS
] = {};
244 T
operator[] (size_t elementIndex
) const
246 size_t word
= m_words
[elementIndex
/ ELEMENTS_PER_WORD
];
247 word
>>= ((elementIndex
% ELEMENTS_PER_WORD
) * BitsPerElementT
);
248 return T(word
& ELEMENT_MASK
);
252 // Ranged integer argument for TemplateArgUnroller
253 template <uint32_t TMin
, uint32_t TMax
>
259 // Recursive template used to auto-nest conditionals. Converts dynamic boolean function
260 // arguments to static template arguments.
261 template <typename TermT
, typename
... ArgsB
>
262 struct TemplateArgUnroller
264 //-----------------------------------------
266 //-----------------------------------------
268 // Last Arg Terminator
269 static typename
TermT::FuncType
GetFunc(bool bArg
)
273 return TermT::template GetFunc
<ArgsB
..., std::true_type
>();
276 return TermT::template GetFunc
<ArgsB
..., std::false_type
>();
279 // Recursively parse args
280 template <typename
... TArgsT
>
281 static typename
TermT::FuncType
GetFunc(bool bArg
, TArgsT
... remainingArgs
)
285 return TemplateArgUnroller
<TermT
, ArgsB
..., std::true_type
>::GetFunc(remainingArgs
...);
288 return TemplateArgUnroller
<TermT
, ArgsB
..., std::false_type
>::GetFunc(remainingArgs
...);
291 //-----------------------------------------
292 // Integer value (within specified range)
293 //-----------------------------------------
295 // Last Arg Terminator
296 template <uint32_t TMin
, uint32_t TMax
>
297 static typename
TermT::FuncType
GetFunc(IntArg
<TMin
, TMax
> iArg
)
299 if (iArg
.val
== TMax
)
301 return TermT::template GetFunc
<ArgsB
..., std::integral_constant
<uint32_t, TMax
>>();
305 return TemplateArgUnroller
<TermT
, ArgsB
...>::GetFunc(IntArg
<TMin
, TMax
-1>{iArg
.val
});
307 SWR_ASSUME(false); return nullptr;
309 template <uint32_t TVal
>
310 static typename
TermT::FuncType
GetFunc(IntArg
<TVal
, TVal
> iArg
)
312 SWR_ASSERT(iArg
.val
== TVal
);
313 return TermT::template GetFunc
<ArgsB
..., std::integral_constant
<uint32_t, TVal
>>();
316 // Recursively parse args
317 template <uint32_t TMin
, uint32_t TMax
, typename
... TArgsT
>
318 static typename
TermT::FuncType
GetFunc(IntArg
<TMin
, TMax
> iArg
, TArgsT
... remainingArgs
)
320 if (iArg
.val
== TMax
)
322 return TemplateArgUnroller
<TermT
, ArgsB
..., std::integral_constant
<uint32_t, TMax
>>::GetFunc(remainingArgs
...);
326 return TemplateArgUnroller
<TermT
, ArgsB
...>::GetFunc(IntArg
<TMin
, TMax
- 1>{iArg
.val
}, remainingArgs
...);
328 SWR_ASSUME(false); return nullptr;
330 template <uint32_t TVal
, typename
... TArgsT
>
331 static typename
TermT::FuncType
GetFunc(IntArg
<TVal
, TVal
> iArg
, TArgsT
... remainingArgs
)
333 SWR_ASSERT(iArg
.val
== TVal
);
334 return TemplateArgUnroller
<TermT
, ArgsB
..., std::integral_constant
<uint32_t, TVal
>>::GetFunc(remainingArgs
...);
338 //////////////////////////////////////////////////////////////////////////
339 /// Helpers used to get / set environment variable
340 //////////////////////////////////////////////////////////////////////////
341 static INLINE
std::string
GetEnv(const std::string
& variableName
)
345 DWORD valueSize
= GetEnvironmentVariableA(variableName
.c_str(), nullptr, 0);
346 if (!valueSize
) return output
;
347 output
.resize(valueSize
- 1); // valueSize includes null, output.resize() does not
348 GetEnvironmentVariableA(variableName
.c_str(), &output
[0], valueSize
);
350 output
= getenv(variableName
.c_str());
356 static INLINE
void SetEnv(const std::string
& variableName
, const std::string
& value
)
359 SetEnvironmentVariableA(variableName
.c_str(), value
.c_str());
361 setenv(variableName
.c_str(), value
.c_str(), true);