From 6970f48b6eca3e7025d30209ad454f891e87d80c Mon Sep 17 00:00:00 2001 From: Tim Rowley Date: Tue, 18 Jul 2017 12:04:41 -0500 Subject: [PATCH] swr/rast: simdlib changes for clang/gcc Tested with clang-4.0 and gcc-6.3. Reviewed-by: Bruce Cherniak --- .../rasterizer/common/simdlib_512_avx512.inl | 43 +++++++++++++++---- .../swr/rasterizer/common/simdlib_types.hpp | 2 +- 2 files changed, 35 insertions(+), 10 deletions(-) diff --git a/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512.inl b/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512.inl index 7d90b7d1b05..7447d35ee2f 100644 --- a/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512.inl +++ b/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512.inl @@ -24,6 +24,21 @@ #error Do not include this file directly, use "simdlib.hpp" instead. #endif +#if defined(__GNUC__) && !defined( __clang__) && !defined(__INTEL_COMPILER) +// gcc missing these intrinsics +#ifndef _mm512_cmpneq_ps_mask +#define _mm512_cmpneq_ps_mask(a,b) _mm512_cmp_ps_mask((a),(b),_CMP_NEQ_UQ) +#endif + +#ifndef _mm512_cmplt_ps_mask +#define _mm512_cmplt_ps_mask(a,b) _mm512_cmp_ps_mask((a),(b),_CMP_LT_OS) +#endif + +#ifndef _mm512_cmplt_pd_mask +#define _mm512_cmplt_pd_mask(a,b) _mm512_cmp_pd_mask((a),(b),_CMP_LT_OS) +#endif +#endif + //============================================================================ // SIMD16 AVX512 (F) implementation // @@ -138,6 +153,17 @@ using SIMD256T = SIMD256Impl::AVX2Impl; } #define SIMD_IWRAPPER_2I(op) SIMD_IWRAPPER_2I_(op, op) +#define SIMD_EMU_IWRAPPER_2(op) \ + static SIMDINLINE \ + Integer SIMDCALL op(Integer a, Integer b)\ + {\ + return Integer\ + {\ + SIMD256T::op(a.v8[0], b.v8[0]),\ + SIMD256T::op(a.v8[1], b.v8[1]),\ + };\ + } + private: static SIMDINLINE Integer vmask(__mmask8 m) { @@ -234,14 +260,6 @@ SIMD_IWRAPPER_1I(slli_epi32); // return a << ImmT SIMD_IWRAPPER_2(sllv_epi32); SIMD_IWRAPPER_1I(srai_epi32); // return a >> ImmT (int32) SIMD_IWRAPPER_1I(srli_epi32); // return a >> ImmT (uint32) -SIMD_IWRAPPER_1I_(srli_si, srli_si512); // return a >> (ImmT*8) (uint) - -template // same as srli_si, but with Float cast to int -static SIMDINLINE Float SIMDCALL srlisi_ps(Float a) -{ - return castsi_ps(srli_si(castps_si(a))); -} - SIMD_IWRAPPER_2(srlv_epi32); //----------------------------------------------------------------------- @@ -443,10 +461,17 @@ static SIMDINLINE Integer SIMDCALL insert_si(Integer a, SIMD256Impl::Integer b) return _mm512_inserti64x4(a, b, imm); } +#if !defined(AVX512F_STRICT) SIMD_IWRAPPER_2(packs_epi16); // See documentation for _mm512_packs_epi16 and _mm512_packs_epi16 SIMD_IWRAPPER_2(packs_epi32); // See documentation for _mm512_packs_epi32 and _mm512_packs_epi32 SIMD_IWRAPPER_2(packus_epi16); // See documentation for _mm512_packus_epi16 and _mm512_packus_epi16 SIMD_IWRAPPER_2(packus_epi32); // See documentation for _mm512_packus_epi32 and _mm512_packus_epi32 +#else +SIMD_EMU_IWRAPPER_2(packs_epi16) +SIMD_EMU_IWRAPPER_2(packs_epi32) +SIMD_EMU_IWRAPPER_2(packus_epi16) +SIMD_EMU_IWRAPPER_2(packus_epi32) +#endif static SIMDINLINE Integer SIMDCALL permute_epi32(Integer a, Integer swiz) // return a[swiz[i]] for each 32-bit lane i (float) { @@ -679,4 +704,4 @@ static SIMDINLINE Float SIMDCALL vmask_ps(int32_t mask) #undef SIMD_IWRAPPER_2 #undef SIMD_IWRAPPER_2_ #undef SIMD_IWRAPPER_2I - +#undef SIMD_EMU_IWRAPPER_2 diff --git a/src/gallium/drivers/swr/rasterizer/common/simdlib_types.hpp b/src/gallium/drivers/swr/rasterizer/common/simdlib_types.hpp index 07775e7b83f..bc23867c7be 100644 --- a/src/gallium/drivers/swr/rasterizer/common/simdlib_types.hpp +++ b/src/gallium/drivers/swr/rasterizer/common/simdlib_types.hpp @@ -262,7 +262,7 @@ namespace SIMDImpl namespace SIMD512Impl { -#if !defined(_MM_K0_REG) +#if !defined(__AVX512F__) // Define AVX512 types if not included via immintrin.h. // All data members of these types are ONLY to viewed // in a debugger. Do NOT access them via code! -- 2.30.2