From 07062daae93b146458db55ba22a2e27d3d59552b Mon Sep 17 00:00:00 2001 From: Tim Rowley Date: Mon, 24 Jul 2017 16:13:12 -0500 Subject: [PATCH] swr/rast: simdlib better separation of core vs knights avx512 Reviewed-by: Bruce Cherniak --- src/gallium/drivers/swr/Makefile.am | 2 +- src/gallium/drivers/swr/Makefile.sources | 8 + .../drivers/swr/rasterizer/common/simdlib.hpp | 21 +- .../rasterizer/common/simdlib_128_avx512.inl | 108 +++------- .../common/simdlib_128_avx512_core.inl | 193 ++++++++++++++++++ .../common/simdlib_128_avx512_knights.inl | 35 ++++ .../rasterizer/common/simdlib_256_avx512.inl | 128 +++--------- .../common/simdlib_256_avx512_core.inl | 127 ++++++++++++ .../common/simdlib_256_avx512_knights.inl | 35 ++++ .../rasterizer/common/simdlib_512_avx512.inl | 79 +++---- .../common/simdlib_512_avx512_core.inl | 181 ++++++++++++++++ .../common/simdlib_512_avx512_knights.inl | 183 +++++++++++++++++ .../common/simdlib_512_avx512_masks_core.inl | 27 +++ .../simdlib_512_avx512_masks_knights.inl | 27 +++ .../swr/rasterizer/common/simdlib_types.hpp | 2 +- 15 files changed, 911 insertions(+), 245 deletions(-) create mode 100644 src/gallium/drivers/swr/rasterizer/common/simdlib_128_avx512_core.inl create mode 100644 src/gallium/drivers/swr/rasterizer/common/simdlib_128_avx512_knights.inl create mode 100644 src/gallium/drivers/swr/rasterizer/common/simdlib_256_avx512_core.inl create mode 100644 src/gallium/drivers/swr/rasterizer/common/simdlib_256_avx512_knights.inl create mode 100644 src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512_core.inl create mode 100644 src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512_knights.inl create mode 100644 src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512_masks_core.inl create mode 100644 src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512_masks_knights.inl diff --git a/src/gallium/drivers/swr/Makefile.am b/src/gallium/drivers/swr/Makefile.am index 05fc3b3595b..73fe904a7d5 100644 --- a/src/gallium/drivers/swr/Makefile.am +++ b/src/gallium/drivers/swr/Makefile.am @@ -285,7 +285,7 @@ lib_LTLIBRARIES += libswrKNL.la libswrKNL_la_CXXFLAGS = \ $(PTHREAD_CFLAGS) \ $(SWR_KNL_CXXFLAGS) \ - -DKNOB_ARCH=KNOB_ARCH_AVX512 -DAVX512F_STRICT \ + -DKNOB_ARCH=KNOB_ARCH_AVX512 -DSIMD_ARCH_KNIGHTS \ $(COMMON_CXXFLAGS) libswrKNL_la_SOURCES = \ diff --git a/src/gallium/drivers/swr/Makefile.sources b/src/gallium/drivers/swr/Makefile.sources index 3c1118b1ad7..53f8bf011b4 100644 --- a/src/gallium/drivers/swr/Makefile.sources +++ b/src/gallium/drivers/swr/Makefile.sources @@ -69,11 +69,19 @@ COMMON_CXX_SOURCES := \ rasterizer/common/simdlib_128_avx.inl \ rasterizer/common/simdlib_128_avx2.inl \ rasterizer/common/simdlib_128_avx512.inl \ + rasterizer/common/simdlib_128_avx512_core.inl \ + rasterizer/common/simdlib_128_avx512_knights.inl \ rasterizer/common/simdlib_256_avx.inl \ rasterizer/common/simdlib_256_avx2.inl \ rasterizer/common/simdlib_256_avx512.inl \ + rasterizer/common/simdlib_256_avx512_core.inl \ + rasterizer/common/simdlib_256_avx512_knights.inl \ rasterizer/common/simdlib_512_avx512.inl \ + rasterizer/common/simdlib_512_avx512_core.inl \ + rasterizer/common/simdlib_512_avx512_knights.inl \ rasterizer/common/simdlib_512_avx512_masks.inl \ + rasterizer/common/simdlib_512_avx512_masks_core.inl \ + rasterizer/common/simdlib_512_avx512_masks_knights.inl \ rasterizer/common/simdlib_512_emu.inl \ rasterizer/common/simdlib_512_emu_masks.inl \ rasterizer/common/simdlib_interface.hpp \ diff --git a/src/gallium/drivers/swr/rasterizer/common/simdlib.hpp b/src/gallium/drivers/swr/rasterizer/common/simdlib.hpp index fb1113204d5..0c79cdd6605 100644 --- a/src/gallium/drivers/swr/rasterizer/common/simdlib.hpp +++ b/src/gallium/drivers/swr/rasterizer/common/simdlib.hpp @@ -55,6 +55,11 @@ namespace SIMDImpl { #define __SIMD_LIB_AVX512_HPP__ #include "simdlib_128_avx512.inl" +#if defined(SIMD_ARCH_KNIGHTS) +#include "simdlib_128_avx512_knights.inl" +#else // optimize for core +#include "simdlib_128_avx512_core.inl" +#endif // defined(SIMD_ARCH_KNIGHTS) #undef __SIMD_LIB_AVX512_HPP__ }; // struct AVX2Impl #endif // #if SIMD_ARCH >= SIMD_ARCH_AVX512 @@ -105,6 +110,11 @@ namespace SIMDImpl { #define __SIMD_LIB_AVX512_HPP__ #include "simdlib_256_avx512.inl" +#if defined(SIMD_ARCH_KNIGHTS) +#include "simdlib_256_avx512_knights.inl" +#else // optimize for core +#include "simdlib_256_avx512_core.inl" +#endif // defined(SIMD_ARCH_KNIGHTS) #undef __SIMD_LIB_AVX512_HPP__ }; // struct AVX2Impl #endif // #if SIMD_ARCH >= SIMD_ARCH_AVX512 @@ -150,13 +160,20 @@ namespace SIMDImpl #if SIMD_ARCH >= SIMD_ARCH_AVX512 - struct AVX512Impl + struct AVX512Impl : AVXImplBase { #define __SIMD_LIB_AVX512_HPP__ #include "simdlib_512_avx512.inl" #include "simdlib_512_avx512_masks.inl" +#if defined(SIMD_ARCH_KNIGHTS) +#include "simdlib_512_avx512_knights.inl" +#include "simdlib_512_avx512_masks_knights.inl" +#else // optimize for core +#include "simdlib_512_avx512_core.inl" +#include "simdlib_512_avx512_masks_core.inl" +#endif // defined(SIMD_ARCH_KNIGHTS) #undef __SIMD_LIB_AVX512_HPP__ - }; // struct AVX512Impl + }; // struct AVX512ImplBase #endif // #if SIMD_ARCH >= SIMD_ARCH_AVX512 struct Traits : SIMDImpl::Traits diff --git a/src/gallium/drivers/swr/rasterizer/common/simdlib_128_avx512.inl b/src/gallium/drivers/swr/rasterizer/common/simdlib_128_avx512.inl index 012f3105e9f..66e83096104 100644 --- a/src/gallium/drivers/swr/rasterizer/common/simdlib_128_avx512.inl +++ b/src/gallium/drivers/swr/rasterizer/common/simdlib_128_avx512.inl @@ -78,34 +78,6 @@ public: } #define SIMD_WRAPPER_3(op) SIMD_WRAPPER_3_(op, op, __mmask16(0xf)) -#define SIMD_DWRAPPER_1_(op, intrin, mask) \ - static SIMDINLINE Double SIMDCALL op(Double a) \ - {\ - return __conv(_mm512_maskz_##intrin((mask), __conv(a)));\ - } -#if !defined(AVX512F_STRICT) -#define SIMD_DWRAPPER_1(op) SIMD_DWRAPPER_1_(op, op, __mmask8(0x3)) -#endif - -#define SIMD_DWRAPPER_1I_(op, intrin, mask) \ - template \ - static SIMDINLINE Double SIMDCALL op(Double a) \ - {\ - return __conv(_mm512_maskz_##intrin((mask), __conv(a), ImmT));\ - } -#if !defined(AVX512F_STRICT) -#define SIMD_DWRAPPER_1I(op) SIMD_DWRAPPER_1I_(op, op, __mmask8(0x3)) -#endif - -#define SIMD_DWRAPPER_2_(op, intrin, mask) \ - static SIMDINLINE Double SIMDCALL op(Double a, Double b) \ - {\ - return __conv(_mm512_maskz_##intrin((mask), __conv(a), __conv(b)));\ - } -#if !defined(AVX512F_STRICT) -#define SIMD_DWRAPPER_2(op) SIMD_DWRAPPER_2_(op, op, __mmask8(0x3)) -#endif - #define SIMD_DWRAPPER_2I(op) \ template\ static SIMDINLINE Double SIMDCALL op(Double a, Double b) \ @@ -119,11 +91,6 @@ public: return __conv(_mm512_maskz_##intrin((mask), __conv(a)));\ } #define SIMD_IWRAPPER_1_32(op) SIMD_IWRAPPER_1_(op, op, __mmask16(0xf)) -#if !defined(AVX512F_STRICT) -#define SIMD_IWRAPPER_1_8(op) SIMD_IWRAPPER_1_(op, op, __mmask64(0xffffull)) -#define SIMD_IWRAPPER_1_16(op) SIMD_IWRAPPER_1_(op, op, __mmask32(0xff)) -#define SIMD_IWRAPPER_1_64(op) SIMD_IWRAPPER_1_(op, op, __mmask8(0x3)) -#endif #define SIMD_IWRAPPER_1I_(op, intrin, mask) \ template \ @@ -132,11 +99,6 @@ public: return __conv(_mm512_maskz_##intrin((mask), __conv(a), ImmT));\ } #define SIMD_IWRAPPER_1I_32(op) SIMD_IWRAPPER_1I_(op, op, __mmask16(0xf)) -#if !defined(AVX512F_STRICT) -#define SIMD_IWRAPPER_1I_8(op) SIMD_IWRAPPER_1I_(op, op, __mmask64(0xffffull)) -#define SIMD_IWRAPPER_1I_16(op) SIMD_IWRAPPER_1I_(op, op, __mmask32(0xff)) -#define SIMD_IWRAPPER_1I_64(op) SIMD_IWRAPPER_1I_(op, op, __mmask8(0x3)) -#endif #define SIMD_IWRAPPER_2_(op, intrin, mask) \ static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b) \ @@ -144,11 +106,6 @@ public: return __conv(_mm512_maskz_##intrin((mask), __conv(a), __conv(b)));\ } #define SIMD_IWRAPPER_2_32(op) SIMD_IWRAPPER_2_(op, op, __mmask16(0xf)) -#if !defined(AVX512F_STRICT) -#define SIMD_IWRAPPER_2_8(op) SIMD_IWRAPPER_2_(op, op, __mmask64(0xffffull)) -#define SIMD_IWRAPPER_2_16(op) SIMD_IWRAPPER_2_(op, op, __mmask32(0xff)) -#define SIMD_IWRAPPER_2_64(op) SIMD_IWRAPPER_2_(op, op, __mmask8(0x3)) -#endif #define SIMD_IWRAPPER_2I(op) \ template\ @@ -182,12 +139,8 @@ SIMD_IWRAPPER_2_32(min_epi32); // return (a < b) ? a : b (int32) SIMD_IWRAPPER_2_32(min_epu32); // return (a < b) ? a : b (uint32) SIMD_IWRAPPER_2_32(mul_epi32); // return a * b (int32) -#if !defined(AVX512F_STRICT) - -SIMD_IWRAPPER_2_8(add_epi8); // return a + b (int8) -SIMD_IWRAPPER_2_8(adds_epu8); // return ((a + b) > 0xff) ? 0xff : (a + b) (uint8) - -#endif +// SIMD_IWRAPPER_2_8(add_epi8); // return a + b (int8) +// SIMD_IWRAPPER_2_8(adds_epu8); // return ((a + b) > 0xff) ? 0xff : (a + b) (uint8) // return (a * b) & 0xFFFFFFFF // @@ -196,12 +149,8 @@ SIMD_IWRAPPER_2_8(adds_epu8); // return ((a + b) > 0xff) ? 0xff : (a + b) (uin SIMD_IWRAPPER_2_32(mullo_epi32); SIMD_IWRAPPER_2_32(sub_epi32); // return a - b (int32) -#if !defined(AVX512F_STRICT) - -SIMD_IWRAPPER_2_64(sub_epi64); // return a - b (int64) -SIMD_IWRAPPER_2_8(subs_epu8); // return (b > a) ? 0 : (a - b) (uint8) - -#endif +// SIMD_IWRAPPER_2_64(sub_epi64); // return a - b (int64) +// SIMD_IWRAPPER_2_8(subs_epu8); // return (b > a) ? 0 : (a - b) (uint8) //----------------------------------------------------------------------- // Logical operations @@ -253,14 +202,10 @@ SIMD_IWRAPPER_2_32(srlv_epi32); // return a >> b (uint32) //----------------------------------------------------------------------- // Blend / shuffle / permute operations //----------------------------------------------------------------------- -#if !defined(AVX512F_STRICT) - -SIMD_IWRAPPER_2_8(packs_epi16); // int16 --> int8 See documentation for _mm256_packs_epi16 and _mm512_packs_epi16 -SIMD_IWRAPPER_2_16(packs_epi32); // int32 --> int16 See documentation for _mm256_packs_epi32 and _mm512_packs_epi32 -SIMD_IWRAPPER_2_8(packus_epi16); // uint16 --> uint8 See documentation for _mm256_packus_epi16 and _mm512_packus_epi16 -SIMD_IWRAPPER_2_16(packus_epi32); // uint32 --> uint16 See documentation for _mm256_packus_epi32 and _mm512_packus_epi32 - -#endif +// SIMD_IWRAPPER_2_8(packs_epi16); // int16 --> int8 See documentation for _mm256_packs_epi16 and _mm512_packs_epi16 +// SIMD_IWRAPPER_2_16(packs_epi32); // int32 --> int16 See documentation for _mm256_packs_epi32 and _mm512_packs_epi32 +// SIMD_IWRAPPER_2_8(packus_epi16); // uint16 --> uint8 See documentation for _mm256_packus_epi16 and _mm512_packus_epi16 +// SIMD_IWRAPPER_2_16(packus_epi32); // uint32 --> uint16 See documentation for _mm256_packus_epi32 and _mm512_packus_epi32 // SIMD_IWRAPPER_2_(permute_epi32, permutevar8x32_epi32); //static SIMDINLINE Float SIMDCALL permute_ps(Float a, Integer swiz) // return a[swiz[i]] for each 32-bit lane i (float) @@ -278,16 +223,12 @@ SIMD_IWRAPPER_1I_32(shuffle_epi32); SIMD_IWRAPPER_2_32(unpackhi_epi32); SIMD_IWRAPPER_2_32(unpacklo_epi32); -#if !defined(AVX512F_STRICT) - -SIMD_IWRAPPER_2_16(unpackhi_epi16); -SIMD_IWRAPPER_2_64(unpackhi_epi64); -SIMD_IWRAPPER_2_8(unpackhi_epi8); -SIMD_IWRAPPER_2_16(unpacklo_epi16); -SIMD_IWRAPPER_2_64(unpacklo_epi64); -SIMD_IWRAPPER_2_8(unpacklo_epi8); - -#endif +// SIMD_IWRAPPER_2_16(unpackhi_epi16); +// SIMD_IWRAPPER_2_64(unpackhi_epi64); +// SIMD_IWRAPPER_2_8(unpackhi_epi8); +// SIMD_IWRAPPER_2_16(unpacklo_epi16); +// SIMD_IWRAPPER_2_64(unpacklo_epi64); +// SIMD_IWRAPPER_2_8(unpacklo_epi8); //----------------------------------------------------------------------- // Load / store operations @@ -338,16 +279,12 @@ static SIMDINLINE Float SIMDCALL mask_i32gather_ps(Float old, float const* p, In static_cast(ScaleT))); } -#if !defined(AVX512F_STRICT) - -static SIMDINLINE uint32_t SIMDCALL movemask_epi8(Integer a) -{ - __mmask64 m = 0xffffull; - return static_cast( - _mm512_mask_test_epi8_mask(m, __conv(a), _mm512_set1_epi8(0x80))); -} - -#endif +// static SIMDINLINE uint32_t SIMDCALL movemask_epi8(Integer a) +// { +// __mmask64 m = 0xffffull; +// return static_cast( +// _mm512_mask_test_epi8_mask(m, __conv(a), _mm512_set1_epi8(0x80))); +// } static SIMDINLINE void SIMDCALL maskstore_ps(float *p, Integer mask, Float src) { @@ -366,6 +303,11 @@ static SIMDINLINE void SIMDCALL store_si(Integer *p, Integer a) // *p = a _mm512_mask_storeu_epi32(p, __mmask16(0xf), __conv(a)); } +static SIMDINLINE Float SIMDCALL vmask_ps(int32_t mask) +{ + return castsi_ps(__conv(_mm512_maskz_set1_epi32(__mmask16(mask & 0xf), -1))); +} + //======================================================================= // Legacy interface (available only in SIMD256 width) //======================================================================= diff --git a/src/gallium/drivers/swr/rasterizer/common/simdlib_128_avx512_core.inl b/src/gallium/drivers/swr/rasterizer/common/simdlib_128_avx512_core.inl new file mode 100644 index 00000000000..a4ecd09f164 --- /dev/null +++ b/src/gallium/drivers/swr/rasterizer/common/simdlib_128_avx512_core.inl @@ -0,0 +1,193 @@ +/**************************************************************************** +* Copyright (C) 2017 Intel Corporation. All Rights Reserved. +* +* Permission is hereby granted, free of charge, to any person obtaining a +* copy of this software and associated documentation files (the "Software"), +* to deal in the Software without restriction, including without limitation +* the rights to use, copy, modify, merge, publish, distribute, sublicense, +* and/or sell copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following conditions: +* +* The above copyright notice and this permission notice (including the next +* paragraph) shall be included in all copies or substantial portions of the +* Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +* IN THE SOFTWARE. +****************************************************************************/ +#if !defined(__SIMD_LIB_AVX512_HPP__) +#error Do not include this file directly, use "simdlib.hpp" instead. +#endif + +//============================================================================ +// SIMD128 AVX (512) implementation +// +// Since this implementation inherits from the AVX (2) implementation, +// the only operations below ones that replace AVX (2) operations. +// These use native AVX512 instructions with masking to enable a larger +// register set. +//============================================================================ + +#define SIMD_WRAPPER_1_(op, intrin, mask) \ + static SIMDINLINE Float SIMDCALL op(Float a) \ + {\ + return __conv(_mm512_maskz_##intrin((mask), __conv(a)));\ + } +#define SIMD_WRAPPER_1(op) SIMD_WRAPPER_1_(op, op, __mmask16(0xf)) + +#define SIMD_WRAPPER_1I_(op, intrin, mask) \ + template \ + static SIMDINLINE Float SIMDCALL op(Float a) \ + {\ + return __conv(_mm512_maskz_##intrin((mask), __conv(a), ImmT));\ + } +#define SIMD_WRAPPER_1I(op) SIMD_WRAPPER_1I_(op, op, __mmask16(0xf)) + +#define SIMD_WRAPPER_2_(op, intrin, mask) \ + static SIMDINLINE Float SIMDCALL op(Float a, Float b) \ + {\ + return __conv(_mm512_maskz_##intrin((mask), __conv(a), __conv(b)));\ + } +#define SIMD_WRAPPER_2(op) SIMD_WRAPPER_2_(op, op, __mmask16(0xf)) + +#define SIMD_WRAPPER_2I(op) \ + template\ + static SIMDINLINE Float SIMDCALL op(Float a, Float b) \ + {\ + return __conv(_mm512_maskz_##op(0xf, __conv(a), __conv(b), ImmT));\ + } + +#define SIMD_WRAPPER_3_(op, intrin, mask) \ + static SIMDINLINE Float SIMDCALL op(Float a, Float b, Float c) \ + {\ + return __conv(_mm512_maskz_##intrin((mask), __conv(a), __conv(b), __conv(c)));\ + } +#define SIMD_WRAPPER_3(op) SIMD_WRAPPER_3_(op, op, __mmask16(0xf)) + +#define SIMD_DWRAPPER_1_(op, intrin, mask) \ + static SIMDINLINE Double SIMDCALL op(Double a) \ + {\ + return __conv(_mm512_maskz_##intrin((mask), __conv(a)));\ + } +#define SIMD_DWRAPPER_1(op) SIMD_DWRAPPER_1_(op, op, __mmask8(0x3)) + +#define SIMD_DWRAPPER_1I_(op, intrin, mask) \ + template \ + static SIMDINLINE Double SIMDCALL op(Double a) \ + {\ + return __conv(_mm512_maskz_##intrin((mask), __conv(a), ImmT));\ + } +#define SIMD_DWRAPPER_1I(op) SIMD_DWRAPPER_1I_(op, op, __mmask8(0x3)) + +#define SIMD_DWRAPPER_2_(op, intrin, mask) \ + static SIMDINLINE Double SIMDCALL op(Double a, Double b) \ + {\ + return __conv(_mm512_maskz_##intrin((mask), __conv(a), __conv(b)));\ + } +#define SIMD_DWRAPPER_2(op) SIMD_DWRAPPER_2_(op, op, __mmask8(0x3)) + +#define SIMD_DWRAPPER_2I(op) \ + template\ + static SIMDINLINE Double SIMDCALL op(Double a, Double b) \ + {\ + return __conv(_mm512_maskz_##op(0x3, __conv(a), __conv(b), ImmT));\ + } + +#define SIMD_IWRAPPER_1_(op, intrin, mask) \ + static SIMDINLINE Integer SIMDCALL op(Integer a) \ + {\ + return __conv(_mm512_maskz_##intrin((mask), __conv(a)));\ + } +#define SIMD_IWRAPPER_1_8(op) SIMD_IWRAPPER_1_(op, op, __mmask64(0xffffull)) +#define SIMD_IWRAPPER_1_16(op) SIMD_IWRAPPER_1_(op, op, __mmask32(0xff)) +#define SIMD_IWRAPPER_1_64(op) SIMD_IWRAPPER_1_(op, op, __mmask8(0x3)) + +#define SIMD_IWRAPPER_1I_(op, intrin, mask) \ + template \ + static SIMDINLINE Integer SIMDCALL op(Integer a) \ + {\ + return __conv(_mm512_maskz_##intrin((mask), __conv(a), ImmT));\ + } +#define SIMD_IWRAPPER_1I_8(op) SIMD_IWRAPPER_1I_(op, op, __mmask64(0xffffull)) +#define SIMD_IWRAPPER_1I_16(op) SIMD_IWRAPPER_1I_(op, op, __mmask32(0xff)) +#define SIMD_IWRAPPER_1I_64(op) SIMD_IWRAPPER_1I_(op, op, __mmask8(0x3)) + +#define SIMD_IWRAPPER_2_(op, intrin, mask) \ + static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b) \ + {\ + return __conv(_mm512_maskz_##intrin((mask), __conv(a), __conv(b)));\ + } +#define SIMD_IWRAPPER_2_8(op) SIMD_IWRAPPER_2_(op, op, __mmask64(0xffffull)) +#define SIMD_IWRAPPER_2_16(op) SIMD_IWRAPPER_2_(op, op, __mmask32(0xff)) +#define SIMD_IWRAPPER_2_64(op) SIMD_IWRAPPER_2_(op, op, __mmask8(0x3)) + +#define SIMD_IWRAPPER_2I(op) \ + template\ + static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b) \ + {\ + return __conv(_mm512_maskz_##op(0xf, __conv(a), __conv(b), ImmT));\ + } + +SIMD_IWRAPPER_2_8(add_epi8); // return a + b (int8) +SIMD_IWRAPPER_2_8(adds_epu8); // return ((a + b) > 0xff) ? 0xff : (a + b) (uint8) +SIMD_IWRAPPER_2_64(sub_epi64); // return a - b (int64) +SIMD_IWRAPPER_2_8(subs_epu8); // return (b > a) ? 0 : (a - b) (uint8) +SIMD_IWRAPPER_2_8(packs_epi16); // int16 --> int8 See documentation for _mm256_packs_epi16 and _mm512_packs_epi16 +SIMD_IWRAPPER_2_16(packs_epi32); // int32 --> int16 See documentation for _mm256_packs_epi32 and _mm512_packs_epi32 +SIMD_IWRAPPER_2_8(packus_epi16); // uint16 --> uint8 See documentation for _mm256_packus_epi16 and _mm512_packus_epi16 +SIMD_IWRAPPER_2_16(packus_epi32); // uint32 --> uint16 See documentation for _mm256_packus_epi32 and _mm512_packus_epi32 +SIMD_IWRAPPER_2_16(unpackhi_epi16); +SIMD_IWRAPPER_2_64(unpackhi_epi64); +SIMD_IWRAPPER_2_8(unpackhi_epi8); +SIMD_IWRAPPER_2_16(unpacklo_epi16); +SIMD_IWRAPPER_2_64(unpacklo_epi64); +SIMD_IWRAPPER_2_8(unpacklo_epi8); + +static SIMDINLINE uint32_t SIMDCALL movemask_epi8(Integer a) +{ + __mmask64 m = 0xffffull; + return static_cast( + _mm512_mask_test_epi8_mask(m, __conv(a), _mm512_set1_epi8(0x80))); +} + +#undef SIMD_WRAPPER_1_ +#undef SIMD_WRAPPER_1 +#undef SIMD_WRAPPER_1I_ +#undef SIMD_WRAPPER_1I +#undef SIMD_WRAPPER_2_ +#undef SIMD_WRAPPER_2 +#undef SIMD_WRAPPER_2I +#undef SIMD_WRAPPER_3_ +#undef SIMD_WRAPPER_3 +#undef SIMD_DWRAPPER_1_ +#undef SIMD_DWRAPPER_1 +#undef SIMD_DWRAPPER_1I_ +#undef SIMD_DWRAPPER_1I +#undef SIMD_DWRAPPER_2_ +#undef SIMD_DWRAPPER_2 +#undef SIMD_DWRAPPER_2I +#undef SIMD_IWRAPPER_1_ +#undef SIMD_IWRAPPER_1_8 +#undef SIMD_IWRAPPER_1_16 +#undef SIMD_IWRAPPER_1_32 +#undef SIMD_IWRAPPER_1_64 +#undef SIMD_IWRAPPER_1I_ +#undef SIMD_IWRAPPER_1I_8 +#undef SIMD_IWRAPPER_1I_16 +#undef SIMD_IWRAPPER_1I_32 +#undef SIMD_IWRAPPER_1I_64 +#undef SIMD_IWRAPPER_2_ +#undef SIMD_IWRAPPER_2_8 +#undef SIMD_IWRAPPER_2_16 +#undef SIMD_IWRAPPER_2_32 +#undef SIMD_IWRAPPER_2_64 +#undef SIMD_IWRAPPER_2I +//#undef SIMD_IWRAPPER_2I_8 +//#undef SIMD_IWRAPPER_2I_16 +//#undef SIMD_IWRAPPER_2I_32 +//#undef SIMD_IWRAPPER_2I_64 diff --git a/src/gallium/drivers/swr/rasterizer/common/simdlib_128_avx512_knights.inl b/src/gallium/drivers/swr/rasterizer/common/simdlib_128_avx512_knights.inl new file mode 100644 index 00000000000..b0cae503419 --- /dev/null +++ b/src/gallium/drivers/swr/rasterizer/common/simdlib_128_avx512_knights.inl @@ -0,0 +1,35 @@ +/**************************************************************************** +* Copyright (C) 2017 Intel Corporation. All Rights Reserved. +* +* Permission is hereby granted, free of charge, to any person obtaining a +* copy of this software and associated documentation files (the "Software"), +* to deal in the Software without restriction, including without limitation +* the rights to use, copy, modify, merge, publish, distribute, sublicense, +* and/or sell copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following conditions: +* +* The above copyright notice and this permission notice (including the next +* paragraph) shall be included in all copies or substantial portions of the +* Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +* IN THE SOFTWARE. +****************************************************************************/ +#if !defined(__SIMD_LIB_AVX512_HPP__) +#error Do not include this file directly, use "simdlib.hpp" instead. +#endif + +//============================================================================ +// SIMD128 AVX (512) implementation for Knights Family +// +// Since this implementation inherits from the AVX512Base implementation, +// the only operations below ones that replace AVX512F / AVX512CD operations +// These use native AVX512 instructions with masking to enable a larger +// register set. +//============================================================================ + diff --git a/src/gallium/drivers/swr/rasterizer/common/simdlib_256_avx512.inl b/src/gallium/drivers/swr/rasterizer/common/simdlib_256_avx512.inl index a8d2a4b8bfd..3f93cfbd7f1 100644 --- a/src/gallium/drivers/swr/rasterizer/common/simdlib_256_avx512.inl +++ b/src/gallium/drivers/swr/rasterizer/common/simdlib_256_avx512.inl @@ -78,34 +78,6 @@ public: } #define SIMD_WRAPPER_3(op) SIMD_WRAPPER_3_(op, op, __mmask16(0xff)) -#define SIMD_DWRAPPER_1_(op, intrin, mask) \ - static SIMDINLINE Double SIMDCALL op(Double a) \ - {\ - return __conv(_mm512_maskz_##intrin((mask), __conv(a)));\ - } -#if !defined(AVX512F_STRICT) -#define SIMD_DWRAPPER_1(op) SIMD_DWRAPPER_1_(op, op, __mmask8(0xf)) -#endif - -#define SIMD_DWRAPPER_1I_(op, intrin, mask) \ - template \ - static SIMDINLINE Double SIMDCALL op(Double a) \ - {\ - return __conv(_mm512_maskz_##intrin((mask), __conv(a), ImmT));\ - } -#if !defined(AVX512F_STRICT) -#define SIMD_DWRAPPER_1I(op) SIMD_DWRAPPER_1I_(op, op, __mmask8(0xf)) -#endif - -#define SIMD_DWRAPPER_2_(op, intrin, mask) \ - static SIMDINLINE Double SIMDCALL op(Double a, Double b) \ - {\ - return __conv(_mm512_maskz_##intrin((mask), __conv(a), __conv(b)));\ - } -#if !defined(AVX512F_STRICT) -#define SIMD_DWRAPPER_2(op) SIMD_DWRAPPER_2_(op, op, __mmask8(0xf)) -#endif - #define SIMD_DWRAPPER_2I(op) \ template\ static SIMDINLINE Double SIMDCALL op(Double a, Double b) \ @@ -119,11 +91,6 @@ public: return __conv(_mm512_maskz_##intrin((mask), __conv(a)));\ } #define SIMD_IWRAPPER_1_32(op) SIMD_IWRAPPER_1_(op, op, __mmask16(0xff)) -#if !defined(AVX512F_STRICT) -#define SIMD_IWRAPPER_1_8(op) SIMD_IWRAPPER_1_(op, op, __mmask64(0xffffffffull)) -#define SIMD_IWRAPPER_1_16(op) SIMD_IWRAPPER_1_(op, op, __mmask32(0xffff)) -#define SIMD_IWRAPPER_1_64(op) SIMD_IWRAPPER_1_(op, op, __mmask8(0xf)) -#endif #define SIMD_IWRAPPER_1I_(op, intrin, mask) \ template \ @@ -132,11 +99,6 @@ public: return __conv(_mm512_maskz_##intrin((mask), __conv(a), ImmT));\ } #define SIMD_IWRAPPER_1I_32(op) SIMD_IWRAPPER_1I_(op, op, __mmask16(0xff)) -#if !defined(AVX512F_STRICT) -#define SIMD_IWRAPPER_1I_8(op) SIMD_IWRAPPER_1I_(op, op, __mmask64(0xffffffffull)) -#define SIMD_IWRAPPER_1I_16(op) SIMD_IWRAPPER_1I_(op, op, __mmask32(0xffff)) -#define SIMD_IWRAPPER_1I_64(op) SIMD_IWRAPPER_1I_(op, op, __mmask8(0xf)) -#endif #define SIMD_IWRAPPER_2_(op, intrin, mask) \ static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b) \ @@ -144,11 +106,6 @@ public: return __conv(_mm512_maskz_##intrin((mask), __conv(a), __conv(b)));\ } #define SIMD_IWRAPPER_2_32(op) SIMD_IWRAPPER_2_(op, op, __mmask16(0xff)) -#if !defined(AVX512F_STRICT) -#define SIMD_IWRAPPER_2_8(op) SIMD_IWRAPPER_2_(op, op, __mmask64(0xffffffffull)) -#define SIMD_IWRAPPER_2_16(op) SIMD_IWRAPPER_2_(op, op, __mmask32(0xffff)) -#define SIMD_IWRAPPER_2_64(op) SIMD_IWRAPPER_2_(op, op, __mmask8(0xf)) -#endif #define SIMD_IWRAPPER_2I(op) \ template\ @@ -182,12 +139,8 @@ SIMD_IWRAPPER_2_32(min_epi32); // return (a < b) ? a : b (int32) SIMD_IWRAPPER_2_32(min_epu32); // return (a < b) ? a : b (uint32) SIMD_IWRAPPER_2_32(mul_epi32); // return a * b (int32) -#if !defined(AVX512F_STRICT) - -SIMD_IWRAPPER_2_8(add_epi8); // return a + b (int8) -SIMD_IWRAPPER_2_8(adds_epu8); // return ((a + b) > 0xff) ? 0xff : (a + b) (uint8) - -#endif +// SIMD_IWRAPPER_2_8(add_epi8); // return a + b (int8) +// SIMD_IWRAPPER_2_8(adds_epu8); // return ((a + b) > 0xff) ? 0xff : (a + b) (uint8) // return (a * b) & 0xFFFFFFFF // @@ -196,12 +149,8 @@ SIMD_IWRAPPER_2_8(adds_epu8); // return ((a + b) > 0xff) ? 0xff : (a + b) (uin SIMD_IWRAPPER_2_32(mullo_epi32); SIMD_IWRAPPER_2_32(sub_epi32); // return a - b (int32) -#if !defined(AVX512F_STRICT) - -SIMD_IWRAPPER_2_64(sub_epi64); // return a - b (int64) -SIMD_IWRAPPER_2_8(subs_epu8); // return (b > a) ? 0 : (a - b) (uint8) - -#endif +// SIMD_IWRAPPER_2_64(sub_epi64); // return a - b (int64) +// SIMD_IWRAPPER_2_8(subs_epu8); // return (b > a) ? 0 : (a - b) (uint8) //----------------------------------------------------------------------- // Logical operations @@ -253,14 +202,10 @@ SIMD_IWRAPPER_2_32(srlv_epi32); // return a >> b (uint32) //----------------------------------------------------------------------- // Blend / shuffle / permute operations //----------------------------------------------------------------------- -#if !defined(AVX512F_STRICT) - -SIMD_IWRAPPER_2_8(packs_epi16); // int16 --> int8 See documentation for _mm256_packs_epi16 and _mm512_packs_epi16 -SIMD_IWRAPPER_2_16(packs_epi32); // int32 --> int16 See documentation for _mm256_packs_epi32 and _mm512_packs_epi32 -SIMD_IWRAPPER_2_8(packus_epi16); // uint16 --> uint8 See documentation for _mm256_packus_epi16 and _mm512_packus_epi16 -SIMD_IWRAPPER_2_16(packus_epi32); // uint32 --> uint16 See documentation for _mm256_packus_epi32 and _mm512_packus_epi32 - -#endif +// SIMD_IWRAPPER_2_8(packs_epi16); // int16 --> int8 See documentation for _mm256_packs_epi16 and _mm512_packs_epi16 +// SIMD_IWRAPPER_2_16(packs_epi32); // int32 --> int16 See documentation for _mm256_packs_epi32 and _mm512_packs_epi32 +// SIMD_IWRAPPER_2_8(packus_epi16); // uint16 --> uint8 See documentation for _mm256_packus_epi16 and _mm512_packus_epi16 +// SIMD_IWRAPPER_2_16(packus_epi32); // uint32 --> uint16 See documentation for _mm256_packus_epi32 and _mm512_packus_epi32 // SIMD_IWRAPPER_2_(permute_epi32, permutevar8x32_epi32); @@ -279,16 +224,12 @@ SIMD_IWRAPPER_1I_32(shuffle_epi32); SIMD_IWRAPPER_2_32(unpackhi_epi32); SIMD_IWRAPPER_2_32(unpacklo_epi32); -#if !defined(AVX512F_STRICT) - -SIMD_IWRAPPER_2_16(unpackhi_epi16); -SIMD_IWRAPPER_2_64(unpackhi_epi64); -SIMD_IWRAPPER_2_8(unpackhi_epi8); -SIMD_IWRAPPER_2_16(unpacklo_epi16); -SIMD_IWRAPPER_2_64(unpacklo_epi64); -SIMD_IWRAPPER_2_8(unpacklo_epi8); - -#endif +// SIMD_IWRAPPER_2_16(unpackhi_epi16); +// SIMD_IWRAPPER_2_64(unpackhi_epi64); +// SIMD_IWRAPPER_2_8(unpackhi_epi8); +// SIMD_IWRAPPER_2_16(unpacklo_epi16); +// SIMD_IWRAPPER_2_64(unpacklo_epi64); +// SIMD_IWRAPPER_2_8(unpacklo_epi8); //----------------------------------------------------------------------- // Load / store operations @@ -339,16 +280,12 @@ static SIMDINLINE Float SIMDCALL mask_i32gather_ps(Float old, float const* p, In static_cast(ScaleT))); } -#if !defined(AVX512F_STRICT) - -static SIMDINLINE uint32_t SIMDCALL movemask_epi8(Integer a) -{ - __mmask64 m = 0xffffffffull; - return static_cast( - _mm512_mask_test_epi8_mask(m, __conv(a), _mm512_set1_epi8(0x80))); -} - -#endif +// static SIMDINLINE uint32_t SIMDCALL movemask_epi8(Integer a) +// { +// __mmask64 m = 0xffffffffull; +// return static_cast( +// _mm512_mask_test_epi8_mask(m, __conv(a), _mm512_set1_epi8(0x80))); +// } static SIMDINLINE void SIMDCALL maskstore_ps(float *p, Integer mask, Float src) { @@ -367,6 +304,11 @@ static SIMDINLINE void SIMDCALL store_si(Integer *p, Integer a) // *p = a _mm512_mask_storeu_epi32(p, __mmask16(0xff), __conv(a)); } +static SIMDINLINE Float SIMDCALL vmask_ps(int32_t mask) +{ + return castsi_ps(__conv(_mm512_maskz_set1_epi32(__mmask16(mask & 0xff), -1))); +} + //======================================================================= // Legacy interface (available only in SIMD256 width) //======================================================================= @@ -380,30 +322,10 @@ static SIMDINLINE void SIMDCALL store_si(Integer *p, Integer a) // *p = a #undef SIMD_WRAPPER_2I #undef SIMD_WRAPPER_3_ #undef SIMD_WRAPPER_3 -#undef SIMD_DWRAPPER_1_ -#undef SIMD_DWRAPPER_1 -#undef SIMD_DWRAPPER_1I_ -#undef SIMD_DWRAPPER_1I -#undef SIMD_DWRAPPER_2_ -#undef SIMD_DWRAPPER_2 -#undef SIMD_DWRAPPER_2I #undef SIMD_IWRAPPER_1_ -#undef SIMD_IWRAPPER_1_8 -#undef SIMD_IWRAPPER_1_16 #undef SIMD_IWRAPPER_1_32 -#undef SIMD_IWRAPPER_1_64 #undef SIMD_IWRAPPER_1I_ -#undef SIMD_IWRAPPER_1I_8 -#undef SIMD_IWRAPPER_1I_16 #undef SIMD_IWRAPPER_1I_32 -#undef SIMD_IWRAPPER_1I_64 #undef SIMD_IWRAPPER_2_ -#undef SIMD_IWRAPPER_2_8 -#undef SIMD_IWRAPPER_2_16 #undef SIMD_IWRAPPER_2_32 -#undef SIMD_IWRAPPER_2_64 #undef SIMD_IWRAPPER_2I -//#undef SIMD_IWRAPPER_2I_8 -//#undef SIMD_IWRAPPER_2I_16 -//#undef SIMD_IWRAPPER_2I_32 -//#undef SIMD_IWRAPPER_2I_64 diff --git a/src/gallium/drivers/swr/rasterizer/common/simdlib_256_avx512_core.inl b/src/gallium/drivers/swr/rasterizer/common/simdlib_256_avx512_core.inl new file mode 100644 index 00000000000..6ffe7c2a0f0 --- /dev/null +++ b/src/gallium/drivers/swr/rasterizer/common/simdlib_256_avx512_core.inl @@ -0,0 +1,127 @@ +/**************************************************************************** +* Copyright (C) 2017 Intel Corporation. All Rights Reserved. +* +* Permission is hereby granted, free of charge, to any person obtaining a +* copy of this software and associated documentation files (the "Software"), +* to deal in the Software without restriction, including without limitation +* the rights to use, copy, modify, merge, publish, distribute, sublicense, +* and/or sell copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following conditions: +* +* The above copyright notice and this permission notice (including the next +* paragraph) shall be included in all copies or substantial portions of the +* Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +* IN THE SOFTWARE. +****************************************************************************/ +#if !defined(__SIMD_LIB_AVX512_HPP__) +#error Do not include this file directly, use "simdlib.hpp" instead. +#endif + +//============================================================================ +// SIMD256 AVX (512) implementation for Core processors +// +// Since this implementation inherits from the AVX (2) implementation, +// the only operations below ones that replace AVX (2) operations. +// These use native AVX512 instructions with masking to enable a larger +// register set. +//============================================================================ + +#define SIMD_DWRAPPER_1_(op, intrin, mask) \ + static SIMDINLINE Double SIMDCALL op(Double a) \ + {\ + return __conv(_mm512_maskz_##intrin((mask), __conv(a)));\ + } +#define SIMD_DWRAPPER_1(op) SIMD_DWRAPPER_1_(op, op, __mmask8(0xf)) + +#define SIMD_DWRAPPER_1I_(op, intrin, mask) \ + template \ + static SIMDINLINE Double SIMDCALL op(Double a) \ + {\ + return __conv(_mm512_maskz_##intrin((mask), __conv(a), ImmT));\ + } +#define SIMD_DWRAPPER_1I(op) SIMD_DWRAPPER_1I_(op, op, __mmask8(0xf)) + +#define SIMD_DWRAPPER_2_(op, intrin, mask) \ + static SIMDINLINE Double SIMDCALL op(Double a, Double b) \ + {\ + return __conv(_mm512_maskz_##intrin((mask), __conv(a), __conv(b)));\ + } +#define SIMD_DWRAPPER_2(op) SIMD_DWRAPPER_2_(op, op, __mmask8(0xf)) + +#define SIMD_IWRAPPER_1_(op, intrin, mask) \ + static SIMDINLINE Integer SIMDCALL op(Integer a) \ + {\ + return __conv(_mm512_maskz_##intrin((mask), __conv(a)));\ + } +#define SIMD_IWRAPPER_1_8(op) SIMD_IWRAPPER_1_(op, op, __mmask64(0xffffffffull)) +#define SIMD_IWRAPPER_1_16(op) SIMD_IWRAPPER_1_(op, op, __mmask32(0xffff)) +#define SIMD_IWRAPPER_1_64(op) SIMD_IWRAPPER_1_(op, op, __mmask8(0xf)) + +#define SIMD_IWRAPPER_1I_(op, intrin, mask) \ + template \ + static SIMDINLINE Integer SIMDCALL op(Integer a) \ + {\ + return __conv(_mm512_maskz_##intrin((mask), __conv(a), ImmT));\ + } +#define SIMD_IWRAPPER_1I_8(op) SIMD_IWRAPPER_1I_(op, op, __mmask64(0xffffffffull)) +#define SIMD_IWRAPPER_1I_16(op) SIMD_IWRAPPER_1I_(op, op, __mmask32(0xffff)) +#define SIMD_IWRAPPER_1I_64(op) SIMD_IWRAPPER_1I_(op, op, __mmask8(0xf)) + +#define SIMD_IWRAPPER_2_(op, intrin, mask) \ + static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b) \ + {\ + return __conv(_mm512_maskz_##intrin((mask), __conv(a), __conv(b)));\ + } +#define SIMD_IWRAPPER_2_8(op) SIMD_IWRAPPER_2_(op, op, __mmask64(0xffffffffull)) +#define SIMD_IWRAPPER_2_16(op) SIMD_IWRAPPER_2_(op, op, __mmask32(0xffff)) +#define SIMD_IWRAPPER_2_64(op) SIMD_IWRAPPER_2_(op, op, __mmask8(0xf)) + + +SIMD_IWRAPPER_2_8(add_epi8); // return a + b (int8) +SIMD_IWRAPPER_2_8(adds_epu8); // return ((a + b) > 0xff) ? 0xff : (a + b) (uint8) +SIMD_IWRAPPER_2_64(sub_epi64); // return a - b (int64) +SIMD_IWRAPPER_2_8(subs_epu8); // return (b > a) ? 0 : (a - b) (uint8) +SIMD_IWRAPPER_2_8(packs_epi16); // int16 --> int8 See documentation for _mm256_packs_epi16 and _mm512_packs_epi16 +SIMD_IWRAPPER_2_16(packs_epi32); // int32 --> int16 See documentation for _mm256_packs_epi32 and _mm512_packs_epi32 +SIMD_IWRAPPER_2_8(packus_epi16); // uint16 --> uint8 See documentation for _mm256_packus_epi16 and _mm512_packus_epi16 +SIMD_IWRAPPER_2_16(packus_epi32); // uint32 --> uint16 See documentation for _mm256_packus_epi32 and _mm512_packus_epi32 +SIMD_IWRAPPER_2_16(unpackhi_epi16); +SIMD_IWRAPPER_2_64(unpackhi_epi64); +SIMD_IWRAPPER_2_8(unpackhi_epi8); +SIMD_IWRAPPER_2_16(unpacklo_epi16); +SIMD_IWRAPPER_2_64(unpacklo_epi64); +SIMD_IWRAPPER_2_8(unpacklo_epi8); + +static SIMDINLINE uint32_t SIMDCALL movemask_epi8(Integer a) +{ + __mmask64 m = 0xffffffffull; + return static_cast( + _mm512_mask_test_epi8_mask(m, __conv(a), _mm512_set1_epi8(0x80))); +} + +#undef SIMD_DWRAPPER_1_ +#undef SIMD_DWRAPPER_1 +#undef SIMD_DWRAPPER_1I_ +#undef SIMD_DWRAPPER_1I +#undef SIMD_DWRAPPER_2_ +#undef SIMD_DWRAPPER_2 +#undef SIMD_DWRAPPER_2I +#undef SIMD_IWRAPPER_1_ +#undef SIMD_IWRAPPER_1_8 +#undef SIMD_IWRAPPER_1_16 +#undef SIMD_IWRAPPER_1_64 +#undef SIMD_IWRAPPER_1I_ +#undef SIMD_IWRAPPER_1I_8 +#undef SIMD_IWRAPPER_1I_16 +#undef SIMD_IWRAPPER_1I_64 +#undef SIMD_IWRAPPER_2_ +#undef SIMD_IWRAPPER_2_8 +#undef SIMD_IWRAPPER_2_16 +#undef SIMD_IWRAPPER_2_64 diff --git a/src/gallium/drivers/swr/rasterizer/common/simdlib_256_avx512_knights.inl b/src/gallium/drivers/swr/rasterizer/common/simdlib_256_avx512_knights.inl new file mode 100644 index 00000000000..acd8ffd9688 --- /dev/null +++ b/src/gallium/drivers/swr/rasterizer/common/simdlib_256_avx512_knights.inl @@ -0,0 +1,35 @@ +/**************************************************************************** +* Copyright (C) 2017 Intel Corporation. All Rights Reserved. +* +* Permission is hereby granted, free of charge, to any person obtaining a +* copy of this software and associated documentation files (the "Software"), +* to deal in the Software without restriction, including without limitation +* the rights to use, copy, modify, merge, publish, distribute, sublicense, +* and/or sell copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following conditions: +* +* The above copyright notice and this permission notice (including the next +* paragraph) shall be included in all copies or substantial portions of the +* Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +* IN THE SOFTWARE. +****************************************************************************/ +#if !defined(__SIMD_LIB_AVX512_HPP__) +#error Do not include this file directly, use "simdlib.hpp" instead. +#endif + +//============================================================================ +// SIMD256 AVX (512) implementation for Knights Family +// +// Since this implementation inherits from the AVX (2) implementation, +// the only operations below ones that replace AVX (2) operations. +// These use native AVX512 instructions with masking to enable a larger +// register set. +//============================================================================ + diff --git a/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512.inl b/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512.inl index 7447d35ee2f..1f93da7345f 100644 --- a/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512.inl +++ b/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512.inl @@ -25,7 +25,7 @@ #endif #if defined(__GNUC__) && !defined( __clang__) && !defined(__INTEL_COMPILER) -// gcc missing these intrinsics +// gcc as of 7.1 was missing these intrinsics #ifndef _mm512_cmpneq_ps_mask #define _mm512_cmpneq_ps_mask(a,b) _mm512_cmp_ps_mask((a),(b),_CMP_NEQ_UQ) #endif @@ -37,14 +37,13 @@ #ifndef _mm512_cmplt_pd_mask #define _mm512_cmplt_pd_mask(a,b) _mm512_cmp_pd_mask((a),(b),_CMP_LT_OS) #endif + #endif //============================================================================ -// SIMD16 AVX512 (F) implementation +// SIMD16 AVX512 (F) implementation (compatible with Knights and Core +// processors) // -// TODO: Optimize for KNL / KNH or for SKX?? -// For now probably optimizing more for KNL as that's where -// immediate customers are. //============================================================================ static const int TARGET_SIMD_WIDTH = 16; @@ -153,34 +152,11 @@ using SIMD256T = SIMD256Impl::AVX2Impl; } #define SIMD_IWRAPPER_2I(op) SIMD_IWRAPPER_2I_(op, op) -#define SIMD_EMU_IWRAPPER_2(op) \ - static SIMDINLINE \ - Integer SIMDCALL op(Integer a, Integer b)\ - {\ - return Integer\ - {\ - SIMD256T::op(a.v8[0], b.v8[0]),\ - SIMD256T::op(a.v8[1], b.v8[1]),\ - };\ - } - private: - static SIMDINLINE Integer vmask(__mmask8 m) - { - return _mm512_maskz_set1_epi64(m, -1LL); - } static SIMDINLINE Integer vmask(__mmask16 m) { return _mm512_maskz_set1_epi32(m, -1); } - static SIMDINLINE Integer vmask(__mmask32 m) - { - return _mm512_maskz_set1_epi16(m, -1); - } - static SIMDINLINE Integer vmask(__mmask64 m) - { - return _mm512_maskz_set1_epi8(m, -1); - } public: //----------------------------------------------------------------------- @@ -236,21 +212,10 @@ SIMD_IWRAPPER_2_(andnot_si, andnot_si512); // return (~a) & b (int) SIMD_IWRAPPER_2_(or_si, or_si512); // return a | b (int) SIMD_IWRAPPER_2_(xor_si, xor_si512); // return a ^ b (int) -#if defined(AVX512F_STRICT) - -SIMD_WRAPPERI_2_(and_ps, and_epi32); // return a & b (float treated as int) -SIMD_WRAPPERI_2_(andnot_ps, andnot_epi32); // return (~a) & b (float treated as int) -SIMD_WRAPPERI_2_(or_ps, or_epi32); // return a | b (float treated as int) -SIMD_WRAPPERI_2_(xor_ps, xor_epi32); // return a ^ b (float treated as int) - -#else - -SIMD_WRAPPER_2(and_ps); // return a & b (float treated as int) -SIMD_WRAPPER_2(andnot_ps); // return (~a) & b (float treated as int) -SIMD_WRAPPER_2(or_ps); // return a | b (float treated as int) -SIMD_WRAPPER_2(xor_ps); // return a ^ b (float treated as int) - -#endif +// SIMD_WRAPPER_2(and_ps); // return a & b (float treated as int) +// SIMD_WRAPPER_2(andnot_ps); // return (~a) & b (float treated as int) +// SIMD_WRAPPER_2(or_ps); // return a | b (float treated as int) +// SIMD_WRAPPER_2(xor_ps); // return a ^ b (float treated as int) //----------------------------------------------------------------------- @@ -260,6 +225,17 @@ SIMD_IWRAPPER_1I(slli_epi32); // return a << ImmT SIMD_IWRAPPER_2(sllv_epi32); SIMD_IWRAPPER_1I(srai_epi32); // return a >> ImmT (int32) SIMD_IWRAPPER_1I(srli_epi32); // return a >> ImmT (uint32) + +#if 0 +SIMD_IWRAPPER_1I_(srli_si, srli_si512); // return a >> (ImmT*8) (uint) + +template // same as srli_si, but with Float cast to int +static SIMDINLINE Float SIMDCALL srlisi_ps(Float a) +{ + return castsi_ps(srli_si(castps_si(a))); +} +#endif + SIMD_IWRAPPER_2(srlv_epi32); //----------------------------------------------------------------------- @@ -461,17 +437,10 @@ static SIMDINLINE Integer SIMDCALL insert_si(Integer a, SIMD256Impl::Integer b) return _mm512_inserti64x4(a, b, imm); } -#if !defined(AVX512F_STRICT) -SIMD_IWRAPPER_2(packs_epi16); // See documentation for _mm512_packs_epi16 and _mm512_packs_epi16 -SIMD_IWRAPPER_2(packs_epi32); // See documentation for _mm512_packs_epi32 and _mm512_packs_epi32 -SIMD_IWRAPPER_2(packus_epi16); // See documentation for _mm512_packus_epi16 and _mm512_packus_epi16 -SIMD_IWRAPPER_2(packus_epi32); // See documentation for _mm512_packus_epi32 and _mm512_packus_epi32 -#else -SIMD_EMU_IWRAPPER_2(packs_epi16) -SIMD_EMU_IWRAPPER_2(packs_epi32) -SIMD_EMU_IWRAPPER_2(packus_epi16) -SIMD_EMU_IWRAPPER_2(packus_epi32) -#endif +// SIMD_IWRAPPER_2(packs_epi16); // See documentation for _mm512_packs_epi16 and _mm512_packs_epi16 +// SIMD_IWRAPPER_2(packs_epi32); // See documentation for _mm512_packs_epi32 and _mm512_packs_epi32 +// SIMD_IWRAPPER_2(packus_epi16); // See documentation for _mm512_packus_epi16 and _mm512_packus_epi16 +// SIMD_IWRAPPER_2(packus_epi32); // See documentation for _mm512_packus_epi32 and _mm512_packus_epi32 static SIMDINLINE Integer SIMDCALL permute_epi32(Integer a, Integer swiz) // return a[swiz[i]] for each 32-bit lane i (float) { @@ -704,4 +673,4 @@ static SIMDINLINE Float SIMDCALL vmask_ps(int32_t mask) #undef SIMD_IWRAPPER_2 #undef SIMD_IWRAPPER_2_ #undef SIMD_IWRAPPER_2I -#undef SIMD_EMU_IWRAPPER_2 + diff --git a/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512_core.inl b/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512_core.inl new file mode 100644 index 00000000000..5063c529306 --- /dev/null +++ b/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512_core.inl @@ -0,0 +1,181 @@ +/**************************************************************************** +* Copyright (C) 2017 Intel Corporation. All Rights Reserved. +* +* Permission is hereby granted, free of charge, to any person obtaining a +* copy of this software and associated documentation files (the "Software"), +* to deal in the Software without restriction, including without limitation +* the rights to use, copy, modify, merge, publish, distribute, sublicense, +* and/or sell copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following conditions: +* +* The above copyright notice and this permission notice (including the next +* paragraph) shall be included in all copies or substantial portions of the +* Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +* IN THE SOFTWARE. +****************************************************************************/ +#if !defined(__SIMD_LIB_AVX512_HPP__) +#error Do not include this file directly, use "simdlib.hpp" instead. +#endif + +//============================================================================ +// SIMD16 AVX512 (F) implementation for Core processors +// +//============================================================================ + +#define SIMD_WRAPPER_1_(op, intrin) \ + static SIMDINLINE Float SIMDCALL op(Float a) \ + {\ + return intrin(a);\ + } + +#define SIMD_WRAPPER_1(op) \ + SIMD_WRAPPER_1_(op, _mm512_##op) + +#define SIMD_WRAPPER_2_(op, intrin) \ + static SIMDINLINE Float SIMDCALL op(Float a, Float b) \ + {\ + return _mm512_##intrin(a, b);\ + } +#define SIMD_WRAPPER_2(op) SIMD_WRAPPER_2_(op, op) + +#define SIMD_WRAPPERI_2_(op, intrin) \ + static SIMDINLINE Float SIMDCALL op(Float a, Float b) \ + {\ + return _mm512_castsi512_ps(_mm512_##intrin(\ + _mm512_castps_si512(a), _mm512_castps_si512(b)));\ + } + +#define SIMD_DWRAPPER_2(op) \ + static SIMDINLINE Double SIMDCALL op(Double a, Double b) \ + {\ + return _mm512_##op(a, b);\ + } + +#define SIMD_WRAPPER_2I_(op, intrin) \ + template\ + static SIMDINLINE Float SIMDCALL op(Float a, Float b) \ + {\ + return _mm512_##intrin(a, b, ImmT);\ + } +#define SIMD_WRAPPER_2I(op) SIMD_WRAPPER_2I_(op, op) + +#define SIMD_DWRAPPER_2I_(op, intrin) \ + template\ + static SIMDINLINE Double SIMDCALL op(Double a, Double b) \ + {\ + return _mm512_##intrin(a, b, ImmT);\ + } +#define SIMD_DWRAPPER_2I(op) SIMD_DWRAPPER_2I_(op, op) + +#define SIMD_WRAPPER_3(op) \ + static SIMDINLINE Float SIMDCALL op(Float a, Float b, Float c) \ + {\ + return _mm512_##op(a, b, c);\ + } + +#define SIMD_IWRAPPER_1(op) \ + static SIMDINLINE Integer SIMDCALL op(Integer a) \ + {\ + return _mm512_##op(a);\ + } +#define SIMD_IWRAPPER_1_8(op) \ + static SIMDINLINE Integer SIMDCALL op(SIMD256Impl::Integer a) \ + {\ + return _mm512_##op(a);\ + } + +#define SIMD_IWRAPPER_1_4(op) \ + static SIMDINLINE Integer SIMDCALL op(SIMD128Impl::Integer a) \ + {\ + return _mm512_##op(a);\ + } + +#define SIMD_IWRAPPER_1I_(op, intrin) \ + template \ + static SIMDINLINE Integer SIMDCALL op(Integer a) \ + {\ + return intrin(a, ImmT);\ + } +#define SIMD_IWRAPPER_1I(op) SIMD_IWRAPPER_1I_(op, _mm512_##op) + +#define SIMD_IWRAPPER_2_(op, intrin) \ + static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b) \ + {\ + return _mm512_##intrin(a, b);\ + } +#define SIMD_IWRAPPER_2(op) SIMD_IWRAPPER_2_(op, op) + +#define SIMD_IWRAPPER_2_CMP(op, cmp) \ + static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b) \ + {\ + return cmp(a, b);\ + } + +#define SIMD_IFWRAPPER_2(op, intrin) \ + static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b) \ + {\ + return castps_si(_mm512_##intrin(castsi_ps(a), castsi_ps(b)) );\ + } + +#define SIMD_IWRAPPER_2I_(op, intrin) \ + template\ + static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b) \ + {\ + return _mm512_##intrin(a, b, ImmT);\ + } +#define SIMD_IWRAPPER_2I(op) SIMD_IWRAPPER_2I_(op, op) + +private: + static SIMDINLINE Integer vmask(__mmask8 m) + { + return _mm512_maskz_set1_epi64(m, -1LL); + } + static SIMDINLINE Integer vmask(__mmask32 m) + { + return _mm512_maskz_set1_epi16(m, -1); + } + static SIMDINLINE Integer vmask(__mmask64 m) + { + return _mm512_maskz_set1_epi8(m, -1); + } + +public: +SIMD_WRAPPER_2(and_ps); // return a & b (float treated as int) +SIMD_WRAPPER_2(andnot_ps); // return (~a) & b (float treated as int) +SIMD_WRAPPER_2(or_ps); // return a | b (float treated as int) +SIMD_WRAPPER_2(xor_ps); // return a ^ b (float treated as int) + +SIMD_IWRAPPER_2(packs_epi16); // See documentation for _mm512_packs_epi16 and _mm512_packs_epi16 +SIMD_IWRAPPER_2(packs_epi32); // See documentation for _mm512_packs_epi32 and _mm512_packs_epi32 +SIMD_IWRAPPER_2(packus_epi16); // See documentation for _mm512_packus_epi16 and _mm512_packus_epi16 +SIMD_IWRAPPER_2(packus_epi32); // See documentation for _mm512_packus_epi32 and _mm512_packus_epi32 + +#undef SIMD_WRAPPER_1_ +#undef SIMD_WRAPPER_1 +#undef SIMD_WRAPPER_2 +#undef SIMD_WRAPPER_2_ +#undef SIMD_WRAPPERI_2_ +#undef SIMD_DWRAPPER_2 +#undef SIMD_DWRAPPER_2I +#undef SIMD_WRAPPER_2I_ +#undef SIMD_WRAPPER_3_ +#undef SIMD_WRAPPER_2I +#undef SIMD_WRAPPER_3 +#undef SIMD_IWRAPPER_1 +#undef SIMD_IWRAPPER_2 +#undef SIMD_IFWRAPPER_2 +#undef SIMD_IWRAPPER_2I +#undef SIMD_IWRAPPER_1 +#undef SIMD_IWRAPPER_1I +#undef SIMD_IWRAPPER_1I_ +#undef SIMD_IWRAPPER_2 +#undef SIMD_IWRAPPER_2_ +#undef SIMD_IWRAPPER_2I + diff --git a/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512_knights.inl b/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512_knights.inl new file mode 100644 index 00000000000..310f1540065 --- /dev/null +++ b/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512_knights.inl @@ -0,0 +1,183 @@ +/**************************************************************************** +* Copyright (C) 2017 Intel Corporation. All Rights Reserved. +* +* Permission is hereby granted, free of charge, to any person obtaining a +* copy of this software and associated documentation files (the "Software"), +* to deal in the Software without restriction, including without limitation +* the rights to use, copy, modify, merge, publish, distribute, sublicense, +* and/or sell copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following conditions: +* +* The above copyright notice and this permission notice (including the next +* paragraph) shall be included in all copies or substantial portions of the +* Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +* IN THE SOFTWARE. +****************************************************************************/ +#if !defined(__SIMD_LIB_AVX512_HPP__) +#error Do not include this file directly, use "simdlib.hpp" instead. +#endif + +//============================================================================ +// SIMD16 AVX512 (F) implementation for Knights Family Processors +// +//============================================================================ + +static const int TARGET_SIMD_WIDTH = 16; +using SIMD256T = SIMD256Impl::AVX2Impl; + +#define SIMD_WRAPPER_1_(op, intrin) \ + static SIMDINLINE Float SIMDCALL op(Float a) \ + {\ + return intrin(a);\ + } + +#define SIMD_WRAPPER_1(op) \ + SIMD_WRAPPER_1_(op, _mm512_##op) + +#define SIMD_WRAPPER_2_(op, intrin) \ + static SIMDINLINE Float SIMDCALL op(Float a, Float b) \ + {\ + return _mm512_##intrin(a, b);\ + } +#define SIMD_WRAPPER_2(op) SIMD_WRAPPER_2_(op, op) + +#define SIMD_WRAPPERI_2_(op, intrin) \ + static SIMDINLINE Float SIMDCALL op(Float a, Float b) \ + {\ + return _mm512_castsi512_ps(_mm512_##intrin(\ + _mm512_castps_si512(a), _mm512_castps_si512(b)));\ + } + +#define SIMD_DWRAPPER_2(op) \ + static SIMDINLINE Double SIMDCALL op(Double a, Double b) \ + {\ + return _mm512_##op(a, b);\ + } + +#define SIMD_WRAPPER_2I_(op, intrin) \ + template\ + static SIMDINLINE Float SIMDCALL op(Float a, Float b) \ + {\ + return _mm512_##intrin(a, b, ImmT);\ + } +#define SIMD_WRAPPER_2I(op) SIMD_WRAPPER_2I_(op, op) + +#define SIMD_DWRAPPER_2I_(op, intrin) \ + template\ + static SIMDINLINE Double SIMDCALL op(Double a, Double b) \ + {\ + return _mm512_##intrin(a, b, ImmT);\ + } +#define SIMD_DWRAPPER_2I(op) SIMD_DWRAPPER_2I_(op, op) + +#define SIMD_WRAPPER_3(op) \ + static SIMDINLINE Float SIMDCALL op(Float a, Float b, Float c) \ + {\ + return _mm512_##op(a, b, c);\ + } + +#define SIMD_IWRAPPER_1(op) \ + static SIMDINLINE Integer SIMDCALL op(Integer a) \ + {\ + return _mm512_##op(a);\ + } +#define SIMD_IWRAPPER_1_8(op) \ + static SIMDINLINE Integer SIMDCALL op(SIMD256Impl::Integer a) \ + {\ + return _mm512_##op(a);\ + } + +#define SIMD_IWRAPPER_1_4(op) \ + static SIMDINLINE Integer SIMDCALL op(SIMD128Impl::Integer a) \ + {\ + return _mm512_##op(a);\ + } + +#define SIMD_IWRAPPER_1I_(op, intrin) \ + template \ + static SIMDINLINE Integer SIMDCALL op(Integer a) \ + {\ + return intrin(a, ImmT);\ + } +#define SIMD_IWRAPPER_1I(op) SIMD_IWRAPPER_1I_(op, _mm512_##op) + +#define SIMD_IWRAPPER_2_(op, intrin) \ + static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b) \ + {\ + return _mm512_##intrin(a, b);\ + } +#define SIMD_IWRAPPER_2(op) SIMD_IWRAPPER_2_(op, op) + +#define SIMD_IWRAPPER_2_CMP(op, cmp) \ + static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b) \ + {\ + return cmp(a, b);\ + } + +#define SIMD_IFWRAPPER_2(op, intrin) \ + static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b) \ + {\ + return castps_si(_mm512_##intrin(castsi_ps(a), castsi_ps(b)) );\ + } + +#define SIMD_IWRAPPER_2I_(op, intrin) \ + template\ + static SIMDINLINE Integer SIMDCALL op(Integer a, Integer b) \ + {\ + return _mm512_##intrin(a, b, ImmT);\ + } +#define SIMD_IWRAPPER_2I(op) SIMD_IWRAPPER_2I_(op, op) + +private: + static SIMDINLINE Integer vmask(__mmask8 m) + { + return _mm512_maskz_set1_epi64(m, -1LL); + } + static SIMDINLINE Integer vmask(__mmask16 m) + { + return _mm512_maskz_set1_epi32(m, -1); + } + static SIMDINLINE Integer vmask(__mmask32 m) + { + return _mm512_maskz_set1_epi16(m, -1); + } + static SIMDINLINE Integer vmask(__mmask64 m) + { + return _mm512_maskz_set1_epi8(m, -1); + } + +public: +SIMD_WRAPPERI_2_(and_ps, and_epi32); // return a & b (float treated as int) +SIMD_WRAPPERI_2_(andnot_ps, andnot_epi32); // return (~a) & b (float treated as int) +SIMD_WRAPPERI_2_(or_ps, or_epi32); // return a | b (float treated as int) +SIMD_WRAPPERI_2_(xor_ps, xor_epi32); // return a ^ b (float treated as int) + +#undef SIMD_WRAPPER_1_ +#undef SIMD_WRAPPER_1 +#undef SIMD_WRAPPER_2 +#undef SIMD_WRAPPER_2_ +#undef SIMD_WRAPPERI_2_ +#undef SIMD_DWRAPPER_2 +#undef SIMD_DWRAPPER_2I +#undef SIMD_WRAPPER_2I_ +#undef SIMD_WRAPPER_3_ +#undef SIMD_WRAPPER_2I +#undef SIMD_WRAPPER_3 +#undef SIMD_IWRAPPER_1 +#undef SIMD_IWRAPPER_2 +#undef SIMD_IFWRAPPER_2 +#undef SIMD_IWRAPPER_2I +#undef SIMD_IWRAPPER_1 +#undef SIMD_IWRAPPER_1I +#undef SIMD_IWRAPPER_1I_ +#undef SIMD_IWRAPPER_2 +#undef SIMD_IWRAPPER_2_ +#undef SIMD_IWRAPPER_2I + diff --git a/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512_masks_core.inl b/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512_masks_core.inl new file mode 100644 index 00000000000..3e36ce5bd36 --- /dev/null +++ b/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512_masks_core.inl @@ -0,0 +1,27 @@ +/**************************************************************************** +* Copyright (C) 2017 Intel Corporation. All Rights Reserved. +* +* Permission is hereby granted, free of charge, to any person obtaining a +* copy of this software and associated documentation files (the "Software"), +* to deal in the Software without restriction, including without limitation +* the rights to use, copy, modify, merge, publish, distribute, sublicense, +* and/or sell copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following conditions: +* +* The above copyright notice and this permission notice (including the next +* paragraph) shall be included in all copies or substantial portions of the +* Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +* IN THE SOFTWARE. +****************************************************************************/ +#if !defined(__SIMD_LIB_AVX512_HPP__) +#error Do not include this file directly, use "simdlib.hpp" instead. +#endif + +// Implement mask-enabled SIMD functions diff --git a/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512_masks_knights.inl b/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512_masks_knights.inl new file mode 100644 index 00000000000..3e36ce5bd36 --- /dev/null +++ b/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512_masks_knights.inl @@ -0,0 +1,27 @@ +/**************************************************************************** +* Copyright (C) 2017 Intel Corporation. All Rights Reserved. +* +* Permission is hereby granted, free of charge, to any person obtaining a +* copy of this software and associated documentation files (the "Software"), +* to deal in the Software without restriction, including without limitation +* the rights to use, copy, modify, merge, publish, distribute, sublicense, +* and/or sell copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following conditions: +* +* The above copyright notice and this permission notice (including the next +* paragraph) shall be included in all copies or substantial portions of the +* Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +* IN THE SOFTWARE. +****************************************************************************/ +#if !defined(__SIMD_LIB_AVX512_HPP__) +#error Do not include this file directly, use "simdlib.hpp" instead. +#endif + +// Implement mask-enabled SIMD functions diff --git a/src/gallium/drivers/swr/rasterizer/common/simdlib_types.hpp b/src/gallium/drivers/swr/rasterizer/common/simdlib_types.hpp index bc23867c7be..236257fed84 100644 --- a/src/gallium/drivers/swr/rasterizer/common/simdlib_types.hpp +++ b/src/gallium/drivers/swr/rasterizer/common/simdlib_types.hpp @@ -262,7 +262,7 @@ namespace SIMDImpl namespace SIMD512Impl { -#if !defined(__AVX512F__) +#if !(defined(__AVX512F__) || defined(_MM_K0_REG)) // Define AVX512 types if not included via immintrin.h. // All data members of these types are ONLY to viewed // in a debugger. Do NOT access them via code! -- 2.30.2