From 12d69dbfff9dd5ad4a30b20d1636f5cab6425e8c Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Wed, 15 Jul 2020 11:34:44 +0200 Subject: [PATCH] fix _mm512_{,mask_}cmp*_p[ds]_mask at -O0 [PR96174] The _mm512_{,mask_}cmp_p[ds]_mask and also _mm_{,mask_}cmp_s[ds]_mask intrinsics have an argument which must have a constant passed to it and so use an inline version only for ifdef __OPTIMIZE__ and have a #define for -O0. But the _mm512_{,mask_}cmp*_p[ds]_mask intrinsics don't need a constant argument, they are essentially the first set with the constant added to them implicitly based on the comparison name, and so there is no #define version for them (correctly). But their inline versions are defined in between the first and s[ds] set and so inside of ifdef __OPTIMIZE__, which means that with -O0 they aren't defined at all. This patch fixes that by moving those after the #ifdef __OPTIMIZE #else use #define #endif block. 2020-07-15 Jakub Jelinek PR target/96174 * config/i386/avx512fintrin.h (_mm512_cmpeq_pd_mask, _mm512_mask_cmpeq_pd_mask, _mm512_cmplt_pd_mask, _mm512_mask_cmplt_pd_mask, _mm512_cmple_pd_mask, _mm512_mask_cmple_pd_mask, _mm512_cmpunord_pd_mask, _mm512_mask_cmpunord_pd_mask, _mm512_cmpneq_pd_mask, _mm512_mask_cmpneq_pd_mask, _mm512_cmpnlt_pd_mask, _mm512_mask_cmpnlt_pd_mask, _mm512_cmpnle_pd_mask, _mm512_mask_cmpnle_pd_mask, _mm512_cmpord_pd_mask, _mm512_mask_cmpord_pd_mask, _mm512_cmpeq_ps_mask, _mm512_mask_cmpeq_ps_mask, _mm512_cmplt_ps_mask, _mm512_mask_cmplt_ps_mask, _mm512_cmple_ps_mask, _mm512_mask_cmple_ps_mask, _mm512_cmpunord_ps_mask, _mm512_mask_cmpunord_ps_mask, _mm512_cmpneq_ps_mask, _mm512_mask_cmpneq_ps_mask, _mm512_cmpnlt_ps_mask, _mm512_mask_cmpnlt_ps_mask, _mm512_cmpnle_ps_mask, _mm512_mask_cmpnle_ps_mask, _mm512_cmpord_ps_mask, _mm512_mask_cmpord_ps_mask): Move outside of __OPTIMIZE__ guarded section. * gcc.target/i386/avx512f-vcmppd-3.c: New test. * gcc.target/i386/avx512f-vcmpps-3.c: New test. --- gcc/config/i386/avx512fintrin.h | 164 +++++++++--------- .../gcc.target/i386/avx512f-vcmppd-3.c | 4 + .../gcc.target/i386/avx512f-vcmpps-3.c | 4 + 3 files changed, 90 insertions(+), 82 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-vcmppd-3.c create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-vcmpps-3.c diff --git a/gcc/config/i386/avx512fintrin.h b/gcc/config/i386/avx512fintrin.h index 4bcd697387a..0d53dda3a27 100644 --- a/gcc/config/i386/avx512fintrin.h +++ b/gcc/config/i386/avx512fintrin.h @@ -15125,6 +15125,88 @@ _mm512_mask_cmp_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y, const int __P) _MM_FROUND_CUR_DIRECTION); } +extern __inline __mmask8 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cmp_sd_mask (__m128d __X, __m128d __Y, const int __P) +{ + return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X, + (__v2df) __Y, __P, + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __mmask8 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_cmp_sd_mask (__mmask8 __M, __m128d __X, __m128d __Y, const int __P) +{ + return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X, + (__v2df) __Y, __P, + (__mmask8) __M, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __mmask8 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cmp_ss_mask (__m128 __X, __m128 __Y, const int __P) +{ + return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X, + (__v4sf) __Y, __P, + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); +} + +extern __inline __mmask8 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_cmp_ss_mask (__mmask8 __M, __m128 __X, __m128 __Y, const int __P) +{ + return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X, + (__v4sf) __Y, __P, + (__mmask8) __M, + _MM_FROUND_CUR_DIRECTION); +} + +#else +#define _mm512_cmp_pd_mask(X, Y, P) \ + ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \ + (__v8df)(__m512d)(Y), (int)(P),\ + (__mmask8)-1,_MM_FROUND_CUR_DIRECTION)) + +#define _mm512_cmp_ps_mask(X, Y, P) \ + ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \ + (__v16sf)(__m512)(Y), (int)(P),\ + (__mmask16)-1,_MM_FROUND_CUR_DIRECTION)) + +#define _mm512_mask_cmp_pd_mask(M, X, Y, P) \ + ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \ + (__v8df)(__m512d)(Y), (int)(P),\ + (__mmask8)(M), _MM_FROUND_CUR_DIRECTION)) + +#define _mm512_mask_cmp_ps_mask(M, X, Y, P) \ + ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \ + (__v16sf)(__m512)(Y), (int)(P),\ + (__mmask16)(M),_MM_FROUND_CUR_DIRECTION)) + +#define _mm_cmp_sd_mask(X, Y, P) \ + ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \ + (__v2df)(__m128d)(Y), (int)(P),\ + (__mmask8)-1,_MM_FROUND_CUR_DIRECTION)) + +#define _mm_mask_cmp_sd_mask(M, X, Y, P) \ + ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \ + (__v2df)(__m128d)(Y), (int)(P),\ + M,_MM_FROUND_CUR_DIRECTION)) + +#define _mm_cmp_ss_mask(X, Y, P) \ + ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \ + (__v4sf)(__m128)(Y), (int)(P), \ + (__mmask8)-1,_MM_FROUND_CUR_DIRECTION)) + +#define _mm_mask_cmp_ss_mask(M, X, Y, P) \ + ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \ + (__v4sf)(__m128)(Y), (int)(P), \ + M,_MM_FROUND_CUR_DIRECTION)) +#endif + extern __inline __mmask8 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_cmpeq_pd_mask (__m512d __X, __m512d __Y) @@ -15445,88 +15527,6 @@ _mm512_mask_cmpord_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y) _MM_FROUND_CUR_DIRECTION); } -extern __inline __mmask8 -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_cmp_sd_mask (__m128d __X, __m128d __Y, const int __P) -{ - return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X, - (__v2df) __Y, __P, - (__mmask8) -1, - _MM_FROUND_CUR_DIRECTION); -} - -extern __inline __mmask8 -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_mask_cmp_sd_mask (__mmask8 __M, __m128d __X, __m128d __Y, const int __P) -{ - return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X, - (__v2df) __Y, __P, - (__mmask8) __M, - _MM_FROUND_CUR_DIRECTION); -} - -extern __inline __mmask8 -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_cmp_ss_mask (__m128 __X, __m128 __Y, const int __P) -{ - return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X, - (__v4sf) __Y, __P, - (__mmask8) -1, - _MM_FROUND_CUR_DIRECTION); -} - -extern __inline __mmask8 -__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm_mask_cmp_ss_mask (__mmask8 __M, __m128 __X, __m128 __Y, const int __P) -{ - return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X, - (__v4sf) __Y, __P, - (__mmask8) __M, - _MM_FROUND_CUR_DIRECTION); -} - -#else -#define _mm512_cmp_pd_mask(X, Y, P) \ - ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \ - (__v8df)(__m512d)(Y), (int)(P),\ - (__mmask8)-1,_MM_FROUND_CUR_DIRECTION)) - -#define _mm512_cmp_ps_mask(X, Y, P) \ - ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \ - (__v16sf)(__m512)(Y), (int)(P),\ - (__mmask16)-1,_MM_FROUND_CUR_DIRECTION)) - -#define _mm512_mask_cmp_pd_mask(M, X, Y, P) \ - ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \ - (__v8df)(__m512d)(Y), (int)(P),\ - (__mmask8)(M), _MM_FROUND_CUR_DIRECTION)) - -#define _mm512_mask_cmp_ps_mask(M, X, Y, P) \ - ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \ - (__v16sf)(__m512)(Y), (int)(P),\ - (__mmask16)(M),_MM_FROUND_CUR_DIRECTION)) - -#define _mm_cmp_sd_mask(X, Y, P) \ - ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \ - (__v2df)(__m128d)(Y), (int)(P),\ - (__mmask8)-1,_MM_FROUND_CUR_DIRECTION)) - -#define _mm_mask_cmp_sd_mask(M, X, Y, P) \ - ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \ - (__v2df)(__m128d)(Y), (int)(P),\ - M,_MM_FROUND_CUR_DIRECTION)) - -#define _mm_cmp_ss_mask(X, Y, P) \ - ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \ - (__v4sf)(__m128)(Y), (int)(P), \ - (__mmask8)-1,_MM_FROUND_CUR_DIRECTION)) - -#define _mm_mask_cmp_ss_mask(M, X, Y, P) \ - ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \ - (__v4sf)(__m128)(Y), (int)(P), \ - M,_MM_FROUND_CUR_DIRECTION)) -#endif - extern __inline __mmask16 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_kmov (__mmask16 __A) diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vcmppd-3.c b/gcc/testsuite/gcc.target/i386/avx512f-vcmppd-3.c new file mode 100644 index 00000000000..3d36eca78fa --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512f-vcmppd-3.c @@ -0,0 +1,4 @@ +/* { dg-do compile } */ +/* { dg-options "-O0 -mavx512f" } */ + +#include "avx512f-vcmppd-1.c" diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vcmpps-3.c b/gcc/testsuite/gcc.target/i386/avx512f-vcmpps-3.c new file mode 100644 index 00000000000..af797aad2de --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512f-vcmpps-3.c @@ -0,0 +1,4 @@ +/* { dg-do compile } */ +/* { dg-options "-O0 -mavx512f" } */ + +#include "avx512f-vcmpps-1.c" -- 2.30.2