swr: [rasterizer] add support for building avx512 version
authorTim Rowley <timothy.o.rowley@intel.com>
Wed, 1 Jun 2016 02:01:40 +0000 (20:01 -0600)
committerTim Rowley <timothy.o.rowley@intel.com>
Thu, 23 Jun 2016 15:50:05 +0000 (10:50 -0500)
Currently, most code paths between AVX2 and AVX512 are identical
(see changes to knobs.h).

Reviewed-by: Bruce Cherniak <bruce.cherniak@intel.com>
src/gallium/drivers/swr/rasterizer/common/simdintrin.h
src/gallium/drivers/swr/rasterizer/core/format_types.h
src/gallium/drivers/swr/rasterizer/core/knobs.h
src/gallium/drivers/swr/rasterizer/memory/Convert.h
src/gallium/drivers/swr/rasterizer/memory/StoreTile.cpp

index 5ec1f7193480e4d4ec4b02772632c6463dacb7a0..cc29b5d6a939f4e528d68ddbc8f6a5d45801dcbd 100644 (file)
@@ -1002,7 +1002,7 @@ static INLINE simdscalar _simd_abs_ps(simdscalar a)
 INLINE
 UINT pdep_u32(UINT a, UINT mask)
 {
-#if KNOB_ARCH==KNOB_ARCH_AVX2
+#if KNOB_ARCH >= KNOB_ARCH_AVX2
     return _pdep_u32(a, mask);
 #else
     UINT result = 0;
@@ -1035,7 +1035,7 @@ UINT pdep_u32(UINT a, UINT mask)
 INLINE
 UINT pext_u32(UINT a, UINT mask)
 {
-#if KNOB_ARCH==KNOB_ARCH_AVX2
+#if KNOB_ARCH >= KNOB_ARCH_AVX2
     return _pext_u32(a, mask);
 #else
     UINT result = 0;
index afb63378c0dc9ce12289c8cfbb82c4ddeda840ae..6612c83beb497494e9b6743f8b9fa4ae35c1fe7f 100644 (file)
@@ -98,7 +98,7 @@ struct PackTraits<8, false>
         __m256i result = _mm256_castsi128_si256(resLo);
         result = _mm256_insertf128_si256(result, resHi, 1);
         return _mm256_castsi256_ps(result);
-#elif KNOB_ARCH==KNOB_ARCH_AVX2
+#elif KNOB_ARCH>=KNOB_ARCH_AVX2
         return _mm256_castsi256_ps(_mm256_cvtepu8_epi32(_mm_castps_si128(_mm256_castps256_ps128(in))));
 #endif
 #else
@@ -161,7 +161,7 @@ struct PackTraits<8, true>
         __m256i result = _mm256_castsi128_si256(resLo);
         result = _mm256_insertf128_si256(result, resHi, 1);
         return _mm256_castsi256_ps(result);
-#elif KNOB_ARCH==KNOB_ARCH_AVX2
+#elif KNOB_ARCH>=KNOB_ARCH_AVX2
         return _mm256_castsi256_ps(_mm256_cvtepi8_epi32(_mm_castps_si128(_mm256_castps256_ps128(in))));
 #endif
 #else
@@ -223,7 +223,7 @@ struct PackTraits<16, false>
         __m256i result = _mm256_castsi128_si256(resLo);
         result = _mm256_insertf128_si256(result, resHi, 1);
         return _mm256_castsi256_ps(result);
-#elif KNOB_ARCH==KNOB_ARCH_AVX2
+#elif KNOB_ARCH>=KNOB_ARCH_AVX2
         return _mm256_castsi256_ps(_mm256_cvtepu16_epi32(_mm_castps_si128(_mm256_castps256_ps128(in))));
 #endif
 #else
@@ -285,7 +285,7 @@ struct PackTraits<16, true>
         __m256i result = _mm256_castsi128_si256(resLo);
         result = _mm256_insertf128_si256(result, resHi, 1);
         return _mm256_castsi256_ps(result);
-#elif KNOB_ARCH==KNOB_ARCH_AVX2
+#elif KNOB_ARCH>=KNOB_ARCH_AVX2
         return _mm256_castsi256_ps(_mm256_cvtepi16_epi32(_mm_castps_si128(_mm256_castps256_ps128(in))));
 #endif
 #else
index 55a22a67f4c6335a38d0dc2a7066eaf7134abbcd..2629276ed59c459a7a7cca1684291adf14f8e1f8 100644 (file)
 #define KNOB_SIMD_WIDTH 8
 #define KNOB_SIMD_BYTES 32
 #elif (KNOB_ARCH == KNOB_ARCH_AVX512)
-#define KNOB_ARCH_ISA AVX512F
-#define KNOB_ARCH_STR "AVX512"
-#define KNOB_SIMD_WIDTH 16
-#define KNOB_SIMD_BYTES 64
-#error "AVX512 not yet supported"
+#define KNOB_ARCH_ISA AVX2
+#define KNOB_ARCH_STR "AVX2"
+#define KNOB_SIMD_WIDTH 8
+#define KNOB_SIMD_BYTES 32
+// Disable AVX512 for now...
+//#define KNOB_ARCH_ISA AVX512F
+//#define KNOB_ARCH_STR "AVX512"
+//#define KNOB_SIMD_WIDTH 16
+//#define KNOB_SIMD_BYTES 64
+//#error "AVX512 not yet supported"
 #else
 #error "Unknown architecture"
 #endif
index 42b973c13fc6ba7dba963be333f97d81925b10aa..b790d35e49d264ad858bd6d9c3da61bb6da15bf8 100644 (file)
@@ -336,7 +336,7 @@ static void ConvertPixelFromFloat(
                 // Convert from 32-bit float to 16-bit float using _mm_cvtps_ph
                 // @todo 16bit float instruction support is orthogonal to avx support.  need to
                 // add check for F16C support instead.
-#if KNOB_ARCH == KNOB_ARCH_AVX2
+#if KNOB_ARCH >= KNOB_ARCH_AVX2
                 __m128 src128 = _mm_set1_ps(src);
                 __m128i srci128 = _mm_cvtps_ph(src128, _MM_FROUND_TRUNC);
                 UINT value = _mm_extract_epi16(srci128, 0);
@@ -519,7 +519,7 @@ INLINE static void ConvertPixelToFloat(
             float dst;
             if (FormatTraits<SrcFormat>::GetBPC(comp) == 16)
             {
-#if KNOB_ARCH == KNOB_ARCH_AVX2
+#if KNOB_ARCH >= KNOB_ARCH_AVX2
                 // Convert from 16-bit float to 32-bit float using _mm_cvtph_ps
                 // @todo 16bit float instruction support is orthogonal to avx support.  need to
                 // add check for F16C support instead.
index 2ab29362a520cd5c13ca9af2f595df9ab82735a5..8a26ff63595245c9a3a7d621a2d64e0c6e4e3b06 100644 (file)
@@ -454,7 +454,7 @@ INLINE static void FlatConvert(const uint8_t* pSrc, uint8_t* pDst, uint8_t* pDst
     __m256i final = _mm256_castsi128_si256(vRow00);
     final = _mm256_insertf128_si256(final, vRow10, 1);
 
-#elif KNOB_ARCH == KNOB_ARCH_AVX2
+#elif KNOB_ARCH >= KNOB_ARCH_AVX2
 
     // logic is as above, only wider
     src1 = _mm256_slli_si256(src1, 1);
@@ -542,7 +542,7 @@ INLINE static void FlatConvertNoAlpha(const uint8_t* pSrc, uint8_t* pDst, uint8_
     __m256i final = _mm256_castsi128_si256(vRow00);
     final = _mm256_insertf128_si256(final, vRow10, 1);
 
-#elif KNOB_ARCH == KNOB_ARCH_AVX2
+#elif KNOB_ARCH >= KNOB_ARCH_AVX2
 
                                               // logic is as above, only wider
     src1 = _mm256_slli_si256(src1, 1);