swr/rast: fix _simd16_movemask_(ps,pd) native AVX512 intrinsics
authorTim Rowley <timothy.o.rowley@intel.com>
Thu, 4 May 2017 23:40:35 +0000 (18:40 -0500)
committerTim Rowley <timothy.o.rowley@intel.com>
Tue, 30 May 2017 22:20:51 +0000 (17:20 -0500)
Reviewed-by: Bruce Cherniak <bruce.cherniak@intel.com>
src/gallium/drivers/swr/rasterizer/common/simd16intrin.h

index aa4757428fa1a3be1cd5192d820f9b41dd58993c..2fe18f2828249bb3522dc8756a4f738123045401 100644 (file)
@@ -539,8 +539,6 @@ INLINE int SIMDAPI _simd16_testz_ps(simd16scalar a, simd16scalar b)
     return lo & hi;
 }
 
-#define _simd16_cmplt_epi32(a, b) _simd16_cmpgt_epi32(b, a)
-
 SIMD16_EMU_AVX512_2(simd16scalar, _simd16_unpacklo_ps, _simd_unpacklo_ps)
 SIMD16_EMU_AVX512_2(simd16scalar, _simd16_unpackhi_ps, _simd_unpackhi_ps)
 SIMD16_EMU_AVX512_2(simd16scalard, _simd16_unpacklo_pd, _simd_unpacklo_pd)
@@ -898,12 +896,14 @@ INLINE simd16scalari SIMDAPI _simd16_blendv_epi32(simd16scalari a, simd16scalari
 
 INLINE simd16mask SIMDAPI _simd16_movemask_ps(simd16scalar a)
 {
-    return  _simd16_scalari2mask(_mm512_castps_si512(a));
+    // movemask_ps only checks the top bit of the float single elements
+    return  _simd16_scalari2mask(_mm512_and_si512(_mm512_castps_si512(a), _mm512_set1_epi32(0x80000000)));
 }
 
 INLINE simd16mask SIMDAPI _simd16_movemask_pd(simd16scalard a)
 {
-    return  _simd16_scalard2mask(a);
+    // movemask_pd only checks the top bit of the float double elements
+    return  _simd16_scalard2mask(_mm512_castsi512_pd(_mm512_and_si512(_mm512_castpd_si512(a), _mm512_set1_epi64(0x8000000000000000))));
 }
 
 #if 0