swr/rast: fix core / knights split of AVX512 intrinsics

author Tim Rowley <timothy.o.rowley@intel.com>

Thu, 27 Jul 2017 20:33:10 +0000 (15:33 -0500)

committer Tim Rowley <timothy.o.rowley@intel.com>

Wed, 2 Aug 2017 16:39:33 +0000 (11:39 -0500)
author Tim Rowley <timothy.o.rowley@intel.com>
Thu, 27 Jul 2017 20:33:10 +0000 (15:33 -0500)
committer Tim Rowley <timothy.o.rowley@intel.com>
Wed, 2 Aug 2017 16:39:33 +0000 (11:39 -0500)
diff --git a/src/gallium/drivers/swr/rasterizer/common/simdlib.hpp b/src/gallium/drivers/swr/rasterizer/common/simdlib.hpp

index 22d7da42d0af1f84570a1d9e68caa917cffc9496..500cf8a87e3d6c8a9985a4b3c3e81bcc88b425b3 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/common/simdlib.hpp
+++ b/src/gallium/drivers/swr/rasterizer/common/simdlib.hpp
@@ -214,6 +214,8 @@ struct SIMDBase : Traits::IsaImpl
      using Vec4          = typename Traits::Vec4;
      using Mask          = typename Traits::Mask;
  
+    static const size_t VECTOR_BYTES = sizeof(Float);
+
      // Populates a SIMD Vec4 from a non-simd vector. So p = xyzw becomes xxxx yyyy zzzz wwww.
      static SIMDINLINE
      void vec4_load1_ps(Vec4& r, const float *p)
diff --git a/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512.inl b/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512.inl

index 1dbfff8c9c193d2f4cb077f908f1c1f0bc0455e9..95e4c319099f6c356b443863dcb8881de38275d8 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512.inl
+++ b/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512.inl
@@ -158,6 +158,11 @@ private:
          return _mm512_maskz_set1_epi32(m, -1);
      }
  
+    static SIMDINLINE Integer vmask(__mmask8 m)
+    {
+        return _mm512_maskz_set1_epi64(m, -1LL);
+    }
+
  public:
  //-----------------------------------------------------------------------
  // Single precision floating point arithmetic operations
@@ -187,8 +192,8 @@ static SIMDINLINE Float SIMDCALL floor_ps(Float a) { return round_ps<RoundMode::
  //-----------------------------------------------------------------------
  SIMD_IWRAPPER_1(abs_epi32); // return absolute_value(a) (int32)
  SIMD_IWRAPPER_2(add_epi32); // return a + b (int32)
-SIMD_IWRAPPER_2(add_epi8);  // return a + b (int8)
-SIMD_IWRAPPER_2(adds_epu8); // return ((a + b) > 0xff) ? 0xff : (a + b) (uint8) 
+//SIMD_IWRAPPER_2(add_epi8);  // return a + b (int8)
+//SIMD_IWRAPPER_2(adds_epu8); // return ((a + b) > 0xff) ? 0xff : (a + b) (uint8) 
  SIMD_IWRAPPER_2(max_epi32); // return (a > b) ? a : b (int32)
  SIMD_IWRAPPER_2(max_epu32); // return (a > b) ? a : b (uint32)
  SIMD_IWRAPPER_2(min_epi32); // return (a < b) ? a : b (int32)
@@ -202,7 +207,7 @@ SIMD_IWRAPPER_2(mul_epi32); // return a * b (int32)
  SIMD_IWRAPPER_2(mullo_epi32);
  SIMD_IWRAPPER_2(sub_epi32); // return a - b (int32)
  SIMD_IWRAPPER_2(sub_epi64); // return a - b (int64)
-SIMD_IWRAPPER_2(subs_epu8); // return (b > a) ? 0 : (a - b) (uint8)
+//SIMD_IWRAPPER_2(subs_epu8); // return (b > a) ? 0 : (a - b) (uint8)
  
  //-----------------------------------------------------------------------
  // Logical operations
@@ -276,7 +281,7 @@ static SIMDINLINE Float SIMDCALL cvtepi32_ps(Integer a) // return (float)a    (i
      return _mm512_cvtepi32_ps(a);
  }
  
-SIMD_IWRAPPER_1_8(cvtepu8_epi16);     // return (int16)a    (uint8 --> int16)
+//SIMD_IWRAPPER_1_8(cvtepu8_epi16);     // return (int16)a    (uint8 --> int16)
  SIMD_IWRAPPER_1_4(cvtepu8_epi32);     // return (int32)a    (uint8 --> int32)
  SIMD_IWRAPPER_1_8(cvtepu16_epi32);    // return (int32)a    (uint16 --> int32)
  SIMD_IWRAPPER_1_4(cvtepu16_epi64);    // return (int64)a    (uint16 --> int64)
@@ -316,20 +321,6 @@ static SIMDINLINE Float SIMDCALL cmpeq_ps(Float a, Float b) { return cmp_ps<Comp
  static SIMDINLINE Float SIMDCALL cmpge_ps(Float a, Float b) { return cmp_ps<CompareType::GE_OQ>(a, b); }
  static SIMDINLINE Float SIMDCALL cmple_ps(Float a, Float b) { return cmp_ps<CompareType::LE_OQ>(a, b); }
  
-template<CompareTypeInt CmpTypeT>
-static SIMDINLINE Integer SIMDCALL cmp_epi8(Integer a, Integer b)
-{
-    // Legacy vector mask generator
-    __mmask64 result = _mm512_cmp_epi8_mask(a, b, static_cast<const int>(CmpTypeT));
-    return vmask(result);
-}
-template<CompareTypeInt CmpTypeT>
-static SIMDINLINE Integer SIMDCALL cmp_epi16(Integer a, Integer b)
-{
-    // Legacy vector mask generator
-    __mmask32 result = _mm512_cmp_epi16_mask(a, b, static_cast<const int>(CmpTypeT));
-    return vmask(result);
-}
  template<CompareTypeInt CmpTypeT>
  static SIMDINLINE Integer SIMDCALL cmp_epi32(Integer a, Integer b)
  {
@@ -345,12 +336,12 @@ static SIMDINLINE Integer SIMDCALL cmp_epi64(Integer a, Integer b)
      return vmask(result);
  }
  
-SIMD_IWRAPPER_2_CMP(cmpeq_epi8,  cmp_epi8<CompareTypeInt::EQ>);    // return a == b (int8)
-SIMD_IWRAPPER_2_CMP(cmpeq_epi16, cmp_epi16<CompareTypeInt::EQ>);   // return a == b (int16)
+//SIMD_IWRAPPER_2_CMP(cmpeq_epi8,  cmp_epi8<CompareTypeInt::EQ>);    // return a == b (int8)
+//SIMD_IWRAPPER_2_CMP(cmpeq_epi16, cmp_epi16<CompareTypeInt::EQ>);   // return a == b (int16)
  SIMD_IWRAPPER_2_CMP(cmpeq_epi32, cmp_epi32<CompareTypeInt::EQ>);   // return a == b (int32)
  SIMD_IWRAPPER_2_CMP(cmpeq_epi64, cmp_epi64<CompareTypeInt::EQ>);   // return a == b (int64)
-SIMD_IWRAPPER_2_CMP(cmpgt_epi8,  cmp_epi8<CompareTypeInt::GT>);    // return a > b (int8)
-SIMD_IWRAPPER_2_CMP(cmpgt_epi16, cmp_epi16<CompareTypeInt::GT>);   // return a > b (int16)
+//SIMD_IWRAPPER_2_CMP(cmpgt_epi8,  cmp_epi8<CompareTypeInt::GT>);    // return a > b (int8)
+//SIMD_IWRAPPER_2_CMP(cmpgt_epi16, cmp_epi16<CompareTypeInt::GT>);   // return a > b (int16)
  SIMD_IWRAPPER_2_CMP(cmpgt_epi32, cmp_epi32<CompareTypeInt::GT>);   // return a > b (int32)
  SIMD_IWRAPPER_2_CMP(cmpgt_epi64, cmp_epi64<CompareTypeInt::GT>);   // return a > b (int64)
  SIMD_IWRAPPER_2_CMP(cmplt_epi32, cmp_epi32<CompareTypeInt::LT>);   // return a < b (int32)
@@ -458,7 +449,7 @@ SIMD_IWRAPPER_2I_(permute2f128_si, shuffle_i32x4);
  
  SIMD_IWRAPPER_1I(shuffle_epi32);
  
-SIMD_IWRAPPER_2(shuffle_epi8);
+//SIMD_IWRAPPER_2(shuffle_epi8);
  SIMD_DWRAPPER_2I(shuffle_pd);
  SIMD_WRAPPER_2I(shuffle_ps);
  
@@ -477,13 +468,13 @@ static SIMDINLINE Integer SIMDCALL unpackhi_epi32(Integer a, Integer b)
  }
  
  SIMD_IWRAPPER_2(unpackhi_epi64);
-SIMD_IWRAPPER_2(unpackhi_epi8);
+//SIMD_IWRAPPER_2(unpackhi_epi8);
  SIMD_DWRAPPER_2(unpackhi_pd);
  SIMD_WRAPPER_2(unpackhi_ps);
-SIMD_IWRAPPER_2(unpacklo_epi16);
+//SIMD_IWRAPPER_2(unpacklo_epi16);
  SIMD_IFWRAPPER_2(unpacklo_epi32, unpacklo_ps);
  SIMD_IWRAPPER_2(unpacklo_epi64);
-SIMD_IWRAPPER_2(unpacklo_epi8);
+//SIMD_IWRAPPER_2(unpacklo_epi8);
  SIMD_DWRAPPER_2(unpacklo_pd);
  SIMD_WRAPPER_2(unpacklo_ps);
  
@@ -546,11 +537,11 @@ static SIMDINLINE void SIMDCALL maskstore_ps(float *p, Integer mask, Float src)
      _mm512_mask_store_ps(p, m, src);
  }
  
-static SIMDINLINE uint64_t SIMDCALL movemask_epi8(Integer a)
-{
-    __mmask64 m = _mm512_cmplt_epi8_mask(a, setzero_si());
-    return static_cast<uint64_t>(m);
-}
+//static SIMDINLINE uint64_t SIMDCALL movemask_epi8(Integer a)
+//{
+//    __mmask64 m = _mm512_cmplt_epi8_mask(a, setzero_si());
+//    return static_cast<uint64_t>(m);
+//}
  
  static SIMDINLINE uint32_t SIMDCALL movemask_pd(Double a)
  {
diff --git a/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512_core.inl b/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512_core.inl

index 5063c5293068b98acd5e377f93e66b8cf20d5c35..fed6307f4bc3c4e9ac97c3d5c51f4edc1069422b 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512_core.inl
+++ b/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512_core.inl
@@ -133,10 +133,6 @@
  #define SIMD_IWRAPPER_2I(op) SIMD_IWRAPPER_2I_(op, op)
  
  private:
-    static SIMDINLINE Integer vmask(__mmask8 m)
-    {
-        return _mm512_maskz_set1_epi64(m, -1LL);
-    }
      static SIMDINLINE Integer vmask(__mmask32 m)
      {
          return _mm512_maskz_set1_epi16(m, -1);
@@ -145,17 +141,57 @@ private:
      {
          return _mm512_maskz_set1_epi8(m, -1);
      }
-
  public:
+
+SIMD_IWRAPPER_2(add_epi8);                  // return a + b (int8)
+SIMD_IWRAPPER_2(adds_epu8);                 // return ((a + b) > 0xff) ? 0xff : (a + b) (uint8) 
+SIMD_IWRAPPER_2(subs_epu8);                 // return (b > a) ? 0 : (a - b) (uint8)
+
  SIMD_WRAPPER_2(and_ps);                     // return a & b       (float treated as int)
  SIMD_WRAPPER_2(andnot_ps);                  // return (~a) & b    (float treated as int)
  SIMD_WRAPPER_2(or_ps);                      // return a | b       (float treated as int)
  SIMD_WRAPPER_2(xor_ps);                     // return a ^ b       (float treated as int)
  
-SIMD_IWRAPPER_2(packs_epi16);   // See documentation for _mm512_packs_epi16 and _mm512_packs_epi16
-SIMD_IWRAPPER_2(packs_epi32);   // See documentation for _mm512_packs_epi32 and _mm512_packs_epi32
-SIMD_IWRAPPER_2(packus_epi16);  // See documentation for _mm512_packus_epi16 and _mm512_packus_epi16
-SIMD_IWRAPPER_2(packus_epi32);  // See documentation for _mm512_packus_epi32 and _mm512_packus_epi32
+SIMD_IWRAPPER_1_8(cvtepu8_epi16);           // return (int16)a    (uint8 --> int16)
+
+template<CompareTypeInt CmpTypeT>
+static SIMDINLINE Integer SIMDCALL cmp_epi8(Integer a, Integer b)
+{
+    // Legacy vector mask generator
+    __mmask64 result = _mm512_cmp_epi8_mask(a, b, static_cast<const int>(CmpTypeT));
+    return vmask(result);
+}
+template<CompareTypeInt CmpTypeT>
+static SIMDINLINE Integer SIMDCALL cmp_epi16(Integer a, Integer b)
+{
+    // Legacy vector mask generator
+    __mmask32 result = _mm512_cmp_epi16_mask(a, b, static_cast<const int>(CmpTypeT));
+    return vmask(result);
+}
+
+SIMD_IWRAPPER_2_CMP(cmpeq_epi8,  cmp_epi8<CompareTypeInt::EQ>);    // return a == b (int8)
+SIMD_IWRAPPER_2_CMP(cmpeq_epi16, cmp_epi16<CompareTypeInt::EQ>);   // return a == b (int16)
+SIMD_IWRAPPER_2_CMP(cmpgt_epi8,  cmp_epi8<CompareTypeInt::GT>);    // return a > b (int8)
+SIMD_IWRAPPER_2_CMP(cmpgt_epi16, cmp_epi16<CompareTypeInt::GT>);   // return a > b (int16)
+
+SIMD_IWRAPPER_2(packs_epi16);               // See documentation for _mm512_packs_epi16
+SIMD_IWRAPPER_2(packs_epi32);               // See documentation for _mm512_packs_epi32
+SIMD_IWRAPPER_2(packus_epi16);              // See documentation for _mm512_packus_epi16
+SIMD_IWRAPPER_2(packus_epi32);              // See documentation for _mm512_packus_epi32
+
+SIMD_IWRAPPER_2(unpackhi_epi8);             // See documentation for _mm512_unpackhi_epi8
+SIMD_IWRAPPER_2(unpacklo_epi16);            // See documentation for _mm512_unpacklo_epi16
+SIMD_IWRAPPER_2(unpacklo_epi8);             // See documentation for _mm512_unpacklo_epi8
+
+SIMD_IWRAPPER_2(shuffle_epi8);
+
+static SIMDINLINE uint64_t SIMDCALL movemask_epi8(Integer a)
+{
+    __mmask64 m = _mm512_cmplt_epi8_mask(a, setzero_si());
+    return static_cast<uint64_t>(m);
+}
+
+
  
  #undef SIMD_WRAPPER_1_
  #undef SIMD_WRAPPER_1
diff --git a/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512_knights.inl b/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512_knights.inl

index 2ee7639ccfa7bbf2d75a67e77d1577b68f9cdb11..690ab386b46e648a4ffee33e90ab166064120200 100644 (file)
--- a/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512_knights.inl
+++ b/src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512_knights.inl
@@ -132,21 +132,6 @@
      }
  #define SIMD_IWRAPPER_2I(op) SIMD_IWRAPPER_2I_(op, op)
  
-private:
-    static SIMDINLINE Integer vmask(__mmask8 m)
-    {
-        return _mm512_mask_set1_epi64(_mm512_setzero_si512(), m, -1LL);
-    }
-    static SIMDINLINE Integer vmask(__mmask32 m)
-    {
-        return _mm512_mask_set1_epi16(_mm512_setzero_si512(), m, -1);
-    }
-    static SIMDINLINE Integer vmask(__mmask64 m)
-    {
-        return _mm512_mask_set1_epi8(_mm512_setzero_si512(), m, -1);
-    }
-
-public:
  SIMD_WRAPPERI_2_(and_ps, and_epi32);          // return a & b       (float treated as int)
  SIMD_WRAPPERI_2_(andnot_ps, andnot_epi32);    // return (~a) & b    (float treated as int)
  SIMD_WRAPPERI_2_(or_ps, or_epi32);            // return a | b       (float treated as int)
author	Tim Rowley <timothy.o.rowley@intel.com>
	Thu, 27 Jul 2017 20:33:10 +0000 (15:33 -0500)
committer	Tim Rowley <timothy.o.rowley@intel.com>
	Wed, 2 Aug 2017 16:39:33 +0000 (11:39 -0500)
src/gallium/drivers/swr/rasterizer/common/simdlib.hpp		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512.inl		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512_core.inl		patch \| blob \| history
src/gallium/drivers/swr/rasterizer/common/simdlib_512_avx512_knights.inl		patch \| blob \| history