1 /****************************************************************************
2 * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * @brief Utilities used by SWR core.
27 ******************************************************************************/
31 #include <type_traits>
33 #include "common/os.h"
34 #include "common/intrin.h"
35 #include "common/swr_assert.h"
46 #if ENABLE_AVX512_SIMD16
56 template<typename SIMD_T
>
59 typename
SIMD_T::Integer ymin
;
60 typename
SIMD_T::Integer ymax
;
61 typename
SIMD_T::Integer xmin
;
62 typename
SIMD_T::Integer xmax
;
65 // helper function to unroll loops
66 template<int Begin
, int End
, int Step
= 1>
68 template<typename Lambda
>
69 INLINE
static void step(Lambda
& func
) {
71 UnrollerL
<Begin
+ Step
, End
, Step
>::step(func
);
75 template<int End
, int Step
>
76 struct UnrollerL
<End
, End
, Step
> {
77 template<typename Lambda
>
78 static void step(Lambda
& func
) {
82 // helper function to unroll loops, with mask to skip specific iterations
83 template<int Begin
, int End
, int Step
= 1, int Mask
= 0x7f>
84 struct UnrollerLMask
{
85 template<typename Lambda
>
86 INLINE
static void step(Lambda
& func
) {
87 if(Mask
& (1 << Begin
))
91 UnrollerL
<Begin
+ Step
, End
, Step
>::step(func
);
95 template<int End
, int Step
, int Mask
>
96 struct UnrollerLMask
<End
, End
, Step
, Mask
> {
97 template<typename Lambda
>
98 static void step(Lambda
& func
) {
102 // general CRC compute
104 uint32_t ComputeCRC(uint32_t crc
, const void *pData
, uint32_t size
)
106 #if defined(_WIN64) || defined(__x86_64__)
107 uint32_t sizeInQwords
= size
/ sizeof(uint64_t);
108 uint32_t sizeRemainderBytes
= size
% sizeof(uint64_t);
109 uint64_t* pDataWords
= (uint64_t*)pData
;
110 for (uint32_t i
= 0; i
< sizeInQwords
; ++i
)
112 crc
= (uint32_t)_mm_crc32_u64(crc
, *pDataWords
++);
115 uint32_t sizeInDwords
= size
/ sizeof(uint32_t);
116 uint32_t sizeRemainderBytes
= size
% sizeof(uint32_t);
117 uint32_t* pDataWords
= (uint32_t*)pData
;
118 for (uint32_t i
= 0; i
< sizeInDwords
; ++i
)
120 crc
= _mm_crc32_u32(crc
, *pDataWords
++);
124 uint8_t* pRemainderBytes
= (uint8_t*)pDataWords
;
125 for (uint32_t i
= 0; i
< sizeRemainderBytes
; ++i
)
127 crc
= _mm_crc32_u8(crc
, *pRemainderBytes
++);
133 //////////////////////////////////////////////////////////////////////////
134 /// Check specified bit within a data word
135 //////////////////////////////////////////////////////////////////////////
136 template <typename T
>
138 static bool CheckBit(T word
, uint32_t bit
)
140 return 0 != (word
& (T(1) << bit
));
143 //////////////////////////////////////////////////////////////////////////
144 /// Add byte offset to any-type pointer
145 //////////////////////////////////////////////////////////////////////////
146 template <typename T
>
148 static T
* PtrAdd(T
* p
, intptr_t offset
)
150 intptr_t intp
= reinterpret_cast<intptr_t>(p
);
151 return reinterpret_cast<T
*>(intp
+ offset
);
154 //////////////////////////////////////////////////////////////////////////
156 //////////////////////////////////////////////////////////////////////////
157 template <typename T
>
159 static bool IsPow2(T value
)
161 return value
== (value
& (T(0) - value
));
164 //////////////////////////////////////////////////////////////////////////
165 /// Align down to specified alignment
166 /// Note: IsPow2(alignment) MUST be true
167 //////////////////////////////////////////////////////////////////////////
168 template <typename T1
, typename T2
>
170 static T1
AlignDownPow2(T1 value
, T2 alignment
)
172 SWR_ASSERT(IsPow2(alignment
));
173 return value
& ~T1(alignment
- 1);
176 //////////////////////////////////////////////////////////////////////////
177 /// Align up to specified alignment
178 /// Note: IsPow2(alignment) MUST be true
179 //////////////////////////////////////////////////////////////////////////
180 template <typename T1
, typename T2
>
182 static T1
AlignUpPow2(T1 value
, T2 alignment
)
184 return AlignDownPow2(value
+ T1(alignment
- 1), alignment
);
187 //////////////////////////////////////////////////////////////////////////
188 /// Align up ptr to specified alignment
189 /// Note: IsPow2(alignment) MUST be true
190 //////////////////////////////////////////////////////////////////////////
191 template <typename T1
, typename T2
>
193 static T1
* AlignUpPow2(T1
* value
, T2 alignment
)
195 return reinterpret_cast<T1
*>(
196 AlignDownPow2(reinterpret_cast<uintptr_t>(value
) + uintptr_t(alignment
- 1), alignment
));
199 //////////////////////////////////////////////////////////////////////////
200 /// Align down to specified alignment
201 //////////////////////////////////////////////////////////////////////////
202 template <typename T1
, typename T2
>
204 static T1
AlignDown(T1 value
, T2 alignment
)
206 if (IsPow2(alignment
)) { return AlignDownPow2(value
, alignment
); }
207 return value
- T1(value
% alignment
);
210 //////////////////////////////////////////////////////////////////////////
211 /// Align down to specified alignment
212 //////////////////////////////////////////////////////////////////////////
213 template <typename T1
, typename T2
>
215 static T1
* AlignDown(T1
* value
, T2 alignment
)
217 return (T1
*)AlignDown(uintptr_t(value
), alignment
);
220 //////////////////////////////////////////////////////////////////////////
221 /// Align up to specified alignment
222 /// Note: IsPow2(alignment) MUST be true
223 //////////////////////////////////////////////////////////////////////////
224 template <typename T1
, typename T2
>
226 static T1
AlignUp(T1 value
, T2 alignment
)
228 return AlignDown(value
+ T1(alignment
- 1), alignment
);
231 //////////////////////////////////////////////////////////////////////////
232 /// Align up to specified alignment
233 /// Note: IsPow2(alignment) MUST be true
234 //////////////////////////////////////////////////////////////////////////
235 template <typename T1
, typename T2
>
237 static T1
* AlignUp(T1
* value
, T2 alignment
)
239 return AlignDown(PtrAdd(value
, alignment
- 1), alignment
);
242 //////////////////////////////////////////////////////////////////////////
243 /// Helper structure used to access an array of elements that don't
244 /// correspond to a typical word size.
245 //////////////////////////////////////////////////////////////////////////
246 template<typename T
, size_t BitsPerElementT
, size_t ArrayLenT
>
250 static const size_t BITS_PER_WORD
= sizeof(size_t) * 8;
251 static const size_t ELEMENTS_PER_WORD
= BITS_PER_WORD
/ BitsPerElementT
;
252 static const size_t NUM_WORDS
= (ArrayLenT
+ ELEMENTS_PER_WORD
- 1) / ELEMENTS_PER_WORD
;
253 static const size_t ELEMENT_MASK
= (size_t(1) << BitsPerElementT
) - 1;
255 static_assert(ELEMENTS_PER_WORD
* BitsPerElementT
== BITS_PER_WORD
,
256 "Element size must an integral fraction of pointer size");
258 size_t m_words
[NUM_WORDS
] = {};
262 T
operator[] (size_t elementIndex
) const
264 size_t word
= m_words
[elementIndex
/ ELEMENTS_PER_WORD
];
265 word
>>= ((elementIndex
% ELEMENTS_PER_WORD
) * BitsPerElementT
);
266 return T(word
& ELEMENT_MASK
);
270 // Ranged integer argument for TemplateArgUnroller
271 template <uint32_t TMin
, uint32_t TMax
>
277 // Recursive template used to auto-nest conditionals. Converts dynamic boolean function
278 // arguments to static template arguments.
279 template <typename TermT
, typename
... ArgsB
>
280 struct TemplateArgUnroller
282 //-----------------------------------------
284 //-----------------------------------------
286 // Last Arg Terminator
287 static typename
TermT::FuncType
GetFunc(bool bArg
)
291 return TermT::template GetFunc
<ArgsB
..., std::true_type
>();
294 return TermT::template GetFunc
<ArgsB
..., std::false_type
>();
297 // Recursively parse args
298 template <typename
... TArgsT
>
299 static typename
TermT::FuncType
GetFunc(bool bArg
, TArgsT
... remainingArgs
)
303 return TemplateArgUnroller
<TermT
, ArgsB
..., std::true_type
>::GetFunc(remainingArgs
...);
306 return TemplateArgUnroller
<TermT
, ArgsB
..., std::false_type
>::GetFunc(remainingArgs
...);
309 //-----------------------------------------
310 // Integer value (within specified range)
311 //-----------------------------------------
313 // Last Arg Terminator
314 template <uint32_t TMin
, uint32_t TMax
>
315 static typename
TermT::FuncType
GetFunc(IntArg
<TMin
, TMax
> iArg
)
317 if (iArg
.val
== TMax
)
319 return TermT::template GetFunc
<ArgsB
..., std::integral_constant
<uint32_t, TMax
>>();
323 return TemplateArgUnroller
<TermT
, ArgsB
...>::GetFunc(IntArg
<TMin
, TMax
-1>{iArg
.val
});
325 SWR_ASSUME(false); return nullptr;
327 template <uint32_t TVal
>
328 static typename
TermT::FuncType
GetFunc(IntArg
<TVal
, TVal
> iArg
)
330 SWR_ASSERT(iArg
.val
== TVal
);
331 return TermT::template GetFunc
<ArgsB
..., std::integral_constant
<uint32_t, TVal
>>();
334 // Recursively parse args
335 template <uint32_t TMin
, uint32_t TMax
, typename
... TArgsT
>
336 static typename
TermT::FuncType
GetFunc(IntArg
<TMin
, TMax
> iArg
, TArgsT
... remainingArgs
)
338 if (iArg
.val
== TMax
)
340 return TemplateArgUnroller
<TermT
, ArgsB
..., std::integral_constant
<uint32_t, TMax
>>::GetFunc(remainingArgs
...);
344 return TemplateArgUnroller
<TermT
, ArgsB
...>::GetFunc(IntArg
<TMin
, TMax
- 1>{iArg
.val
}, remainingArgs
...);
346 SWR_ASSUME(false); return nullptr;
348 template <uint32_t TVal
, typename
... TArgsT
>
349 static typename
TermT::FuncType
GetFunc(IntArg
<TVal
, TVal
> iArg
, TArgsT
... remainingArgs
)
351 SWR_ASSERT(iArg
.val
== TVal
);
352 return TemplateArgUnroller
<TermT
, ArgsB
..., std::integral_constant
<uint32_t, TVal
>>::GetFunc(remainingArgs
...);
356 //////////////////////////////////////////////////////////////////////////
357 /// Helpers used to get / set environment variable
358 //////////////////////////////////////////////////////////////////////////
359 static INLINE
std::string
GetEnv(const std::string
& variableName
)
363 DWORD valueSize
= GetEnvironmentVariableA(variableName
.c_str(), nullptr, 0);
364 if (!valueSize
) return output
;
365 output
.resize(valueSize
- 1); // valueSize includes null, output.resize() does not
366 GetEnvironmentVariableA(variableName
.c_str(), &output
[0], valueSize
);
368 output
= getenv(variableName
.c_str());
374 static INLINE
void SetEnv(const std::string
& variableName
, const std::string
& value
)
377 SetEnvironmentVariableA(variableName
.c_str(), value
.c_str());
379 setenv(variableName
.c_str(), value
.c_str(), true);