1 /****************************************************************************
2 * Copyright (C) 2014-2017 Intel Corporation. All Rights Reserved.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22 ****************************************************************************/
28 #include "core/knobs.h"
30 #if (defined(FORCE_WINDOWS) || defined(_WIN32)) && !defined(FORCE_LINUX)
32 #define SWR_API __cdecl
33 #define SWR_VISIBLE __declspec(dllexport)
45 #if defined(MemoryFence)
46 // Windows.h defines MemoryFence as _mm_mfence, but this conflicts with llvm::sys::MemoryFence
50 #define OSALIGN(RWORD, WIDTH) __declspec(align(WIDTH)) RWORD
53 // We compile Debug builds with inline function expansion enabled. This allows
54 // functions compiled with __forceinline to be inlined even in Debug builds.
55 // The inline_depth(0) pragma below will disable inline function expansion for
56 // normal INLINE / inline functions, but not for __forceinline functions.
57 // Our SIMD function wrappers (see simdlib.hpp) use __forceinline even in
60 #pragma inline_depth(0)
62 // Use of __forceinline increases compile time dramatically in release builds
63 // and provides almost 0 measurable benefit. Disable until we have a compelling
65 // #define INLINE __forceinline
69 #define FORCEINLINE __forceinline
72 #define DEBUGBREAK __debugbreak()
74 #define PRAGMA_WARNING_PUSH_DISABLE(...) \
75 __pragma(warning(push)); \
76 __pragma(warning(disable : __VA_ARGS__));
78 #define PRAGMA_WARNING_POP() __pragma(warning(pop))
80 static inline void* AlignedMalloc(size_t _Size
, size_t _Alignment
)
82 return _aligned_malloc(_Size
, _Alignment
);
85 static inline void AlignedFree(void* p
)
87 return _aligned_free(p
);
91 #define BitScanReverseSizeT BitScanReverse64
92 #define BitScanForwardSizeT BitScanForward64
93 #define _mm_popcount_sizeT _mm_popcnt_u64
95 #define BitScanReverseSizeT BitScanReverse
96 #define BitScanForwardSizeT BitScanForward
97 #define _mm_popcount_sizeT _mm_popcnt_u32
100 #elif defined(__APPLE__) || defined(FORCE_LINUX) || defined(__linux__) || defined(__gnu_linux__)
103 #define SWR_VISIBLE __attribute__((visibility("default")))
107 #include <x86intrin.h>
109 #include <sys/types.h>
111 #include <sys/stat.h>
116 typedef void* LPVOID
;
118 typedef unsigned int UINT
;
119 typedef void* HANDLE
;
121 typedef unsigned int DWORD
;
129 #define MAX_PATH PATH_MAX
131 #define OSALIGN(RWORD, WIDTH) RWORD __attribute__((aligned(WIDTH)))
133 #define INLINE __inline
136 #define FORCEINLINE INLINE
138 #define DEBUGBREAK asm("int $3")
140 #if !defined(__CYGWIN__)
149 #if defined(__GNUC__) && !defined(__INTEL_COMPILER)
150 #define __declspec(x) __declspec_##x
151 #define __declspec_align(y) __attribute__((aligned(y)))
152 #define __declspec_deprecated __attribute__((deprecated))
153 #define __declspec_dllexport
154 #define __declspec_dllimport
155 #define __declspec_noinline __attribute__((__noinline__))
156 #define __declspec_nothrow __attribute__((nothrow))
157 #define __declspec_novtable
158 #define __declspec_thread __thread
160 #define __declspec(X)
165 #define GCC_VERSION (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__)
167 #if !defined(__clang__) && (__GNUC__) && (GCC_VERSION < 40500)
168 inline uint64_t __rdtsc()
171 asm volatile("rdtsc" : "=a"(low
), "=d"(high
));
172 return (low
| ((uint64_t)high
<< 32));
176 #if !defined(__clang__) && !defined(__INTEL_COMPILER)
177 // Intrinsic not defined in gcc < 10
178 #if (__GNUC__) && (GCC_VERSION < 100000)
179 static INLINE
void _mm256_storeu2_m128i(__m128i
* hi
, __m128i
* lo
, __m256i a
)
181 _mm_storeu_si128((__m128i
*)lo
, _mm256_castsi256_si128(a
));
182 _mm_storeu_si128((__m128i
*)hi
, _mm256_extractf128_si256(a
, 0x1));
186 // gcc prior to 4.9 doesn't have _mm*_undefined_*
187 #if (__GNUC__) && (GCC_VERSION < 409000)
188 #define _mm_undefined_si128 _mm_setzero_si128
189 #define _mm256_undefined_ps _mm256_setzero_ps
193 inline unsigned char _BitScanForward(unsigned long* Index
, unsigned long Mask
)
195 *Index
= __builtin_ctz(Mask
);
199 inline unsigned char _BitScanForward(unsigned int* Index
, unsigned int Mask
)
201 *Index
= __builtin_ctz(Mask
);
205 inline unsigned char _BitScanReverse(unsigned long* Index
, unsigned long Mask
)
207 *Index
= 63 - __builtin_clz(Mask
);
211 inline unsigned char _BitScanReverse(unsigned int* Index
, unsigned int Mask
)
213 *Index
= 31 - __builtin_clz(Mask
);
217 #define _BitScanForward64 _BitScanForward
218 #define _BitScanReverse64 _BitScanReverse
220 inline void* AlignedMalloc(size_t size
, size_t alignment
)
223 if (posix_memalign(&ret
, alignment
, size
))
230 static inline void AlignedFree(void* p
)
235 #define _countof(a) (sizeof(a) / sizeof(*(a)))
237 #define sprintf_s sprintf
238 #define strcpy_s(dst, size, src) strncpy(dst, src, size)
239 #define GetCurrentProcessId getpid
241 #define InterlockedCompareExchange(Dest, Exchange, Comparand) \
242 __sync_val_compare_and_swap(Dest, Comparand, Exchange)
243 #define InterlockedExchangeAdd(Addend, Value) __sync_fetch_and_add(Addend, Value)
244 #define InterlockedDecrement(Append) __sync_sub_and_fetch(Append, 1)
245 #define InterlockedDecrement64(Append) __sync_sub_and_fetch(Append, 1)
246 #define InterlockedIncrement(Append) __sync_add_and_fetch(Append, 1)
247 #define InterlockedAdd(Addend, Value) __sync_add_and_fetch(Addend, Value)
248 #define InterlockedAdd64(Addend, Value) __sync_add_and_fetch(Addend, Value)
249 #define _ReadWriteBarrier() asm volatile("" ::: "memory")
251 #define PRAGMA_WARNING_PUSH_DISABLE(...)
252 #define PRAGMA_WARNING_POP()
254 #define ZeroMemory(dst, size) memset(dst, 0, size)
257 #error Unsupported OS/system.
261 #define THREAD thread_local
264 typedef uint8_t KILOBYTE
[1024];
265 typedef KILOBYTE MEGABYTE
[1024];
266 typedef MEGABYTE GIGABYTE
[1024];
268 #define OSALIGNLINE(RWORD) OSALIGN(RWORD, 64)
269 #define OSALIGNSIMD(RWORD) OSALIGN(RWORD, KNOB_SIMD_BYTES)
270 #define OSALIGNSIMD16(RWORD) OSALIGN(RWORD, KNOB_SIMD16_BYTES)
272 #include "common/swr_assert.h"
275 #define ATTR_UNUSED __attribute__((unused))
280 #define SWR_FUNC(_retType, _funcName, /* args */...) \
281 typedef _retType(SWR_API* PFN##_funcName)(__VA_ARGS__); \
282 _retType SWR_API _funcName(__VA_ARGS__);
285 void SWR_API
SetCurrentThreadName(const char* pThreadName
);
286 void SWR_API
CreateDirectoryPath(const std::string
& path
);
288 /// Execute Command (block until finished)
289 /// @returns process exit value
291 ExecCmd(const std::string
& cmd
, ///< (In) Command line string
292 const char* pOptEnvStrings
= nullptr, ///< (Optional In) Environment block for new process
293 std::string
* pOptStdOut
= nullptr, ///< (Optional Out) Standard Output text
294 std::string
* pOptStdErr
= nullptr, ///< (Optional Out) Standard Error text
295 const std::string
* pOptStdIn
= nullptr); ///< (Optional In) Standard Input text
298 /// Helper for setting up FP state
299 /// @returns old csr state
300 static INLINE
uint32_t SetOptimalVectorCSR()
302 uint32_t oldCSR
= _mm_getcsr();
304 uint32_t newCSR
= (oldCSR
& ~(_MM_ROUND_MASK
| _MM_DENORMALS_ZERO_MASK
| _MM_FLUSH_ZERO_MASK
));
305 newCSR
|= (_MM_ROUND_NEAREST
| _MM_FLUSH_ZERO_ON
| _MM_DENORMALS_ZERO_ON
);
311 /// Set Vector CSR state.
312 /// @param csrState - should be value returned from SetOptimalVectorCSR()
313 static INLINE
void RestoreVectorCSR(uint32_t csrState
)
315 _mm_setcsr(csrState
);
318 #endif //__SWR_OS_H__