re PR target/72805 (AVX512: invalid code generation involving masks)
authorUros Bizjak <ubizjak@gmail.com>
Thu, 4 Aug 2016 21:14:39 +0000 (23:14 +0200)
committerUros Bizjak <uros@gcc.gnu.org>
Thu, 4 Aug 2016 21:14:39 +0000 (23:14 +0200)
PR target/72805
* config/i386/avx512fintrin.h (_mm512_cmp_epi32_mask) [!__OPTIMIZE__]:
Cast builtin function result to __mmask16 instead of __mmask8.
(_mm512_cmp_epu32_mask) [!__OPTIMIZE__]: Ditto.
(_mm512_mask_cmp_epi32_mask) [!__OPTIMIZE__]: Ditto.
(_mm512_mask_cmp_epu32_mask) [!__OPTIMIZE__]: Ditto.

From-SVN: r239152

gcc/ChangeLog
gcc/config/i386/avx512fintrin.h

index 025476c075900cba23901c1fe68493b660bd4f22..fd6fa781277b5c63d92c8cf516a89e9c6d6b5ffc 100644 (file)
@@ -1,3 +1,12 @@
+2016-08-04  Uros Bizjak  <ubizjak@gmail.com>
+
+       PR target/72805
+       * config/i386/avx512fintrin.h (_mm512_cmp_epi32_mask) [!__OPTIMIZE__]:
+       Cast builtin function result to __mmask16 instead of __mmask8.
+       (_mm512_cmp_epu32_mask) [!__OPTIMIZE__]: Ditto.
+       (_mm512_mask_cmp_epi32_mask) [!__OPTIMIZE__]: Ditto.
+       (_mm512_mask_cmp_epu32_mask) [!__OPTIMIZE__]: Ditto.
+
 2016-08-04  David Malcolm  <dmalcolm@redhat.com>
 
        * selftest.h (ASSERT_TRUE): Reimplement in terms of...
index 2f51be995fa6820bb65281a0f8aa4aaaa38085b3..2b30eae8ce06062e2e0daef69ba09c20b42224cd 100644 (file)
@@ -9130,9 +9130,9 @@ _mm_mask_cmp_round_ss_mask (__mmask8 __M, __m128 __X, __m128 __Y,
                                           (__mmask8)-1))
 
 #define _mm512_cmp_epi32_mask(X, Y, P)                                 \
-  ((__mmask8) __builtin_ia32_cmpd512_mask ((__v16si)(__m512i)(X),      \
-                                          (__v16si)(__m512i)(Y), (int)(P),\
-                                          (__mmask16)-1))
+  ((__mmask16) __builtin_ia32_cmpd512_mask ((__v16si)(__m512i)(X),     \
+                                           (__v16si)(__m512i)(Y), (int)(P), \
+                                           (__mmask16)-1))
 
 #define _mm512_cmp_epu64_mask(X, Y, P)                                 \
   ((__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di)(__m512i)(X),      \
@@ -9140,66 +9140,66 @@ _mm_mask_cmp_round_ss_mask (__mmask8 __M, __m128 __X, __m128 __Y,
                                            (__mmask8)-1))
 
 #define _mm512_cmp_epu32_mask(X, Y, P)                                 \
-  ((__mmask8) __builtin_ia32_ucmpd512_mask ((__v16si)(__m512i)(X),     \
-                                           (__v16si)(__m512i)(Y), (int)(P),\
-                                           (__mmask16)-1))
+  ((__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si)(__m512i)(X),    \
+                                            (__v16si)(__m512i)(Y), (int)(P), \
+                                            (__mmask16)-1))
 
-#define _mm512_cmp_round_pd_mask(X, Y, P, R)                                   \
+#define _mm512_cmp_round_pd_mask(X, Y, P, R)                           \
   ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X),      \
                                            (__v8df)(__m512d)(Y), (int)(P),\
                                            (__mmask8)-1, R))
 
-#define _mm512_cmp_round_ps_mask(X, Y, P, R)                                   \
+#define _mm512_cmp_round_ps_mask(X, Y, P, R)                           \
   ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X),     \
                                             (__v16sf)(__m512)(Y), (int)(P),\
                                             (__mmask16)-1, R))
 
-#define _mm512_mask_cmp_epi64_mask(M, X, Y, P)                                 \
+#define _mm512_mask_cmp_epi64_mask(M, X, Y, P)                         \
   ((__mmask8) __builtin_ia32_cmpq512_mask ((__v8di)(__m512i)(X),       \
                                           (__v8di)(__m512i)(Y), (int)(P),\
                                           (__mmask8)M))
 
-#define _mm512_mask_cmp_epi32_mask(M, X, Y, P)                                 \
-  ((__mmask8) __builtin_ia32_cmpd512_mask ((__v16si)(__m512i)(X),      \
-                                          (__v16si)(__m512i)(Y), (int)(P),\
-                                          (__mmask16)M))
+#define _mm512_mask_cmp_epi32_mask(M, X, Y, P)                         \
+  ((__mmask16) __builtin_ia32_cmpd512_mask ((__v16si)(__m512i)(X),     \
+                                           (__v16si)(__m512i)(Y), (int)(P), \
+                                           (__mmask16)M))
 
-#define _mm512_mask_cmp_epu64_mask(M, X, Y, P)                                 \
+#define _mm512_mask_cmp_epu64_mask(M, X, Y, P)                         \
   ((__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di)(__m512i)(X),      \
                                            (__v8di)(__m512i)(Y), (int)(P),\
                                            (__mmask8)M))
 
-#define _mm512_mask_cmp_epu32_mask(M, X, Y, P)                                 \
-  ((__mmask8) __builtin_ia32_ucmpd512_mask ((__v16si)(__m512i)(X),     \
-                                           (__v16si)(__m512i)(Y), (int)(P),\
-                                           (__mmask16)M))
+#define _mm512_mask_cmp_epu32_mask(M, X, Y, P)                         \
+  ((__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si)(__m512i)(X),    \
+                                            (__v16si)(__m512i)(Y), (int)(P), \
+                                            (__mmask16)M))
 
-#define _mm512_mask_cmp_round_pd_mask(M, X, Y, P, R)                                   \
+#define _mm512_mask_cmp_round_pd_mask(M, X, Y, P, R)                   \
   ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X),      \
                                            (__v8df)(__m512d)(Y), (int)(P),\
                                            (__mmask8)M, R))
 
-#define _mm512_mask_cmp_round_ps_mask(M, X, Y, P, R)                                   \
+#define _mm512_mask_cmp_round_ps_mask(M, X, Y, P, R)                   \
   ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X),     \
                                             (__v16sf)(__m512)(Y), (int)(P),\
                                             (__mmask16)M, R))
 
-#define _mm_cmp_round_sd_mask(X, Y, P, R)                                      \
+#define _mm_cmp_round_sd_mask(X, Y, P, R)                              \
   ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X),         \
                                         (__v2df)(__m128d)(Y), (int)(P),\
                                         (__mmask8)-1, R))
 
-#define _mm_mask_cmp_round_sd_mask(M, X, Y, P, R)                                      \
+#define _mm_mask_cmp_round_sd_mask(M, X, Y, P, R)                      \
   ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X),         \
                                         (__v2df)(__m128d)(Y), (int)(P),\
                                         (M), R))
 
-#define _mm_cmp_round_ss_mask(X, Y, P, R)                                      \
+#define _mm_cmp_round_ss_mask(X, Y, P, R)                              \
   ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X),          \
                                         (__v4sf)(__m128)(Y), (int)(P), \
                                         (__mmask8)-1, R))
 
-#define _mm_mask_cmp_round_ss_mask(M, X, Y, P, R)                                      \
+#define _mm_mask_cmp_round_ss_mask(M, X, Y, P, R)                      \
   ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X),          \
                                         (__v4sf)(__m128)(Y), (int)(P), \
                                         (M), R))