From: Olga Makhotina Date: Mon, 12 Feb 2018 06:09:20 +0000 (+0000) Subject: Add missing mask[z]_scalef_round_s[d,s] intrinsics X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=158061a65bf2e68e44bc2f9622be41f57b0a47d0;p=gcc.git Add missing mask[z]_scalef_round_s[d,s] intrinsics gcc/ * config/i386/avx512fintrin.h (_mm_mask_scalef_round_sd, _mm_maskz_scalef_round_sd, _mm_mask_scalef_round_ss, _mm_maskz_scalef_round_ss): New intrinsics. (__builtin_ia32_scalefsd_round, __builtin_ia32_scalefss_round): Fix. * config/i386/i386-builtin.def (__builtin_ia32_scalefsd_round, __builtin_ia32_scalefss_round): Remove. (__builtin_ia32_scalefsd_mask_round, __builtin_ia32_scalefss_mask_round): New intrinsics. * config/i386/sse.md (vmscalef): Renamed to ... (vmscalef): ... this. ((match_operand:VF_128 2 "" "")): Changed to ... ((match_operand:VF_128 2 "" "")): ... this. ("vscalef\t{%2, %1, %0| %0, %1, %2}"): Changed to ... ("vscalef\t{%2, %1, %0|%0, %1, %2}"): ... this. * config/i386/subst.md (round_scalar_nimm_predicate): New. gcc/testsuite/ * gcc.target/i386/avx512f-vscalefsd-1.c (_mm_mask_scalef_round_sd, _mm_maskz_scalef_round_sd): Test new intrinsics. * gcc.target/i386/avx512f-vscalefsd-2.c (_mm_scalef_round_sd, _mm_mask_scalef_round_sd, _mm_maskz_scalef_round_sd): Test new intrinsics. * gcc.target/i386/avx512f-vscalefss-1.c (_mm_mask_scalef_round_ss, _mm_maskz_scalef_round_ss): Test new intrinsics. * gcc.target/i386/avx512f-vscalefss-2.c (_mm_scalef_round_ss, _mm_mask_scalef_round_ss, _mm_maskz_scalef_round_ss): Test new intrinsics. * gcc.target/i386/avx-1.c (__builtin_ia32_scalefsd_round, __builtin_ia32_scalefss_round): Remove builtin. (__builtin_ia32_scalefsd_mask_round, __builtin_ia32_scalefss_mask_round): Test new builtin. * gcc.target/i386/sse-13.c: Ditto. * gcc.target/i386/sse-23.c: Ditto. From-SVN: r257578 --- diff --git a/gcc/ChangeLog b/gcc/ChangeLog index ac1803e9e4b..b29822b0ebd 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,26 @@ +2018-02-12 Olga Makhotina + + * config/i386/avx512fintrin.h (_mm_mask_scalef_round_sd, + _mm_maskz_scalef_round_sd, _mm_mask_scalef_round_ss, + _mm_maskz_scalef_round_ss): New intrinsics. + (__builtin_ia32_scalefsd_round, __builtin_ia32_scalefss_round): Fix. + * config/i386/i386-builtin.def (__builtin_ia32_scalefsd_round, + __builtin_ia32_scalefss_round): Remove. + (__builtin_ia32_scalefsd_mask_round, + __builtin_ia32_scalefss_mask_round): New intrinsics. + * config/i386/sse.md (vmscalef): Renamed to ... + (vmscalef): ... this. + ((match_operand:VF_128 2 "" + "")): Changed to ... + ((match_operand:VF_128 2 "" + "")): ... this. + ("vscalef\t{%2, %1, %0| + %0, %1, %2}"): Changed to ... + ("vscalef\t{%2, %1, + %0|%0, %1, + %2}"): ... this. + * config/i386/subst.md (round_scalar_nimm_predicate): New. + 2018-02-12 Olga Makhotina * config/i386/avx512fintrin.h (_mm_mask_sqrt_round_sd) diff --git a/gcc/config/i386/avx512fintrin.h b/gcc/config/i386/avx512fintrin.h index ffbb1d9c14c..ba65acadf8d 100644 --- a/gcc/config/i386/avx512fintrin.h +++ b/gcc/config/i386/avx512fintrin.h @@ -3103,18 +3103,67 @@ extern __inline __m128d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_scalef_round_sd (__m128d __A, __m128d __B, const int __R) { - return (__m128d) __builtin_ia32_scalefsd_round ((__v2df) __A, - (__v2df) __B, - __R); + return (__m128d) __builtin_ia32_scalefsd_mask_round ((__v2df) __A, + (__v2df) __B, + (__v2df) + _mm_setzero_pd (), + (__mmask8) -1, __R); +} + +extern __inline __m128d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_scalef_round_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B, + const int __R) +{ + return (__m128d) __builtin_ia32_scalefsd_mask_round ((__v2df) __A, + (__v2df) __B, + (__v2df) __W, + (__mmask8) __U, __R); +} + +extern __inline __m128d +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_scalef_round_sd (__mmask8 __U, __m128d __A, __m128d __B, + const int __R) +{ + return (__m128d) __builtin_ia32_scalefsd_mask_round ((__v2df) __A, + (__v2df) __B, + (__v2df) + _mm_setzero_pd (), + (__mmask8) __U, __R); } extern __inline __m128 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_scalef_round_ss (__m128 __A, __m128 __B, const int __R) { - return (__m128) __builtin_ia32_scalefss_round ((__v4sf) __A, - (__v4sf) __B, - __R); + return (__m128) __builtin_ia32_scalefss_mask_round ((__v4sf) __A, + (__v4sf) __B, + (__v4sf) + _mm_setzero_ps (), + (__mmask8) -1, __R); +} + +extern __inline __m128 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_scalef_round_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B, + const int __R) +{ + return (__m128) __builtin_ia32_scalefss_mask_round ((__v4sf) __A, + (__v4sf) __B, + (__v4sf) __W, + (__mmask8) __U, __R); +} + +extern __inline __m128 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_scalef_round_ss (__mmask8 __U, __m128 __A, __m128 __B, const int __R) +{ + return (__m128) __builtin_ia32_scalefss_mask_round ((__v4sf) __A, + (__v4sf) __B, + (__v4sf) + _mm_setzero_ps (), + (__mmask8) __U, __R); } #else #define _mm512_scalef_round_pd(A, B, C) \ @@ -3136,10 +3185,12 @@ _mm_scalef_round_ss (__m128 __A, __m128 __B, const int __R) (__m512)__builtin_ia32_scalefps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C) #define _mm_scalef_round_sd(A, B, C) \ - (__m128d)__builtin_ia32_scalefsd_round(A, B, C) + (__m128d)__builtin_ia32_scalefsd_mask_round (A, B, \ + (__v2df)_mm_setzero_pd (), -1, C) #define _mm_scalef_round_ss(A, B, C) \ - (__m128)__builtin_ia32_scalefss_round(A, B, C) + (__m128)__builtin_ia32_scalefss_mask_round (A, B, \ + (__v4sf)_mm_setzero_ps (), -1, C) #endif #ifdef __OPTIMIZE__ @@ -12182,18 +12233,24 @@ extern __inline __m128d __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_scalef_sd (__m128d __A, __m128d __B) { - return (__m128d) __builtin_ia32_scalefsd_round ((__v2df) __A, - (__v2df) __B, - _MM_FROUND_CUR_DIRECTION); + return (__m128d) __builtin_ia32_scalefsd_mask_round ((__v2df) __A, + (__v2df) __B, + (__v2df) + _mm_setzero_pd (), + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); } extern __inline __m128 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_scalef_ss (__m128 __A, __m128 __B) { - return (__m128) __builtin_ia32_scalefss_round ((__v4sf) __A, - (__v4sf) __B, - _MM_FROUND_CUR_DIRECTION); + return (__m128) __builtin_ia32_scalefss_mask_round ((__v4sf) __A, + (__v4sf) __B, + (__v4sf) + _mm_setzero_ps (), + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); } extern __inline __m512d diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def index 506104221e7..169189d962c 100644 --- a/gcc/config/i386/i386-builtin.def +++ b/gcc/config/i386/i386-builtin.def @@ -2718,8 +2718,8 @@ BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev2df_round, "__builtin_ BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev4sf_round, "__builtin_ia32_rndscaless_round", IX86_BUILTIN_RNDSCALESS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT) BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv8df_mask_round, "__builtin_ia32_scalefpd512_mask", IX86_BUILTIN_SCALEFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT) BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv16sf_mask_round, "__builtin_ia32_scalefps512_mask", IX86_BUILTIN_SCALEFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT) -BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv2df_round, "__builtin_ia32_scalefsd_round", IX86_BUILTIN_SCALEFSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT) -BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv4sf_round, "__builtin_ia32_scalefss_round", IX86_BUILTIN_SCALEFSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT) +BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv2df_mask_round, "__builtin_ia32_scalefsd_mask_round", IX86_BUILTIN_SCALEFSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI_INT) +BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv4sf_mask_round, "__builtin_ia32_scalefss_mask_round", IX86_BUILTIN_SCALEFSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI_INT) BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2_mask_round, "__builtin_ia32_sqrtpd512_mask", IX86_BUILTIN_SQRTPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT) BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv16sf2_mask_round, "__builtin_ia32_sqrtps512_mask", IX86_BUILTIN_SQRTPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT) BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsqrtv2df2_mask_round, "__builtin_ia32_sqrtsd_mask_round", IX86_BUILTIN_SQRTSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI_INT) diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 47687a614fb..da9af2367ae 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -8318,17 +8318,17 @@ operands[1] = adjust_address (operands[1], DFmode, INTVAL (operands[2]) * 8); }) -(define_insn "avx512f_vmscalef" +(define_insn "avx512f_vmscalef" [(set (match_operand:VF_128 0 "register_operand" "=v") (vec_merge:VF_128 (unspec:VF_128 [(match_operand:VF_128 1 "register_operand" "v") - (match_operand:VF_128 2 "" "")] + (match_operand:VF_128 2 "" "")] UNSPEC_SCALEF) (match_dup 1) (const_int 1)))] "TARGET_AVX512F" - "vscalef\t{%2, %1, %0|%0, %1, %2}" + "vscalef\t{%2, %1, %0|%0, %1, %2}" [(set_attr "prefix" "evex") (set_attr "mode" "")]) diff --git a/gcc/config/i386/subst.md b/gcc/config/i386/subst.md index 9ed2e8dd3d0..3f638650846 100644 --- a/gcc/config/i386/subst.md +++ b/gcc/config/i386/subst.md @@ -271,6 +271,7 @@ (define_subst_attr "round_scalar_mask_op3" "round_scalar" "" "") (define_subst_attr "round_scalar_constraint" "round_scalar" "vm" "v") (define_subst_attr "round_scalar_prefix" "round_scalar" "vex" "evex") +(define_subst_attr "round_scalar_nimm_predicate" "round_scalar" "vector_operand" "register_operand") (define_subst "round_scalar" [(set (match_operand:SUBST_V 0) diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 9c53f99d3fd..1a76336d0d0 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,22 @@ +2018-02-12 Olga Makhotina + + * gcc.target/i386/avx512f-vscalefsd-1.c (_mm_mask_scalef_round_sd, + _mm_maskz_scalef_round_sd): Test new intrinsics. + * gcc.target/i386/avx512f-vscalefsd-2.c (_mm_scalef_round_sd, + (_mm_mask_scalef_round_sd, _mm_maskz_scalef_round_sd): Test new + intrinsics. + * gcc.target/i386/avx512f-vscalefss-1.c (_mm_mask_scalef_round_ss, + _mm_maskz_scalef_round_ss): Test new intrinsics. + * gcc.target/i386/avx512f-vscalefss-2.c (_mm_scalef_round_ss, + _mm_mask_scalef_round_ss, _mm_maskz_scalef_round_ss): Test new + intrinsics. + * gcc.target/i386/avx-1.c (__builtin_ia32_scalefsd_round, + __builtin_ia32_scalefss_round): Remove builtin. + (__builtin_ia32_scalefsd_mask_round, + __builtin_ia32_scalefss_mask_round): Test new builtin. + * gcc.target/i386/sse-13.c: Ditto. + * gcc.target/i386/sse-23.c: Ditto. + 2018-02-12 Olga Makhotina * gcc.target/i386/avx512f-vsqrtsd-1.c (_mm_mask_sqrt_round_sd) diff --git a/gcc/testsuite/gcc.target/i386/avx-1.c b/gcc/testsuite/gcc.target/i386/avx-1.c index acfc85bfaa4..c877f9996b3 100644 --- a/gcc/testsuite/gcc.target/i386/avx-1.c +++ b/gcc/testsuite/gcc.target/i386/avx-1.c @@ -287,8 +287,8 @@ #define __builtin_ia32_rndscaless_round(A, B, C, D) __builtin_ia32_rndscaless_round(A, B, 1, 4) #define __builtin_ia32_scalefpd512_mask(A, B, C, D, E) __builtin_ia32_scalefpd512_mask(A, B, C, D, 8) #define __builtin_ia32_scalefps512_mask(A, B, C, D, E) __builtin_ia32_scalefps512_mask(A, B, C, D, 8) -#define __builtin_ia32_scalefsd_round(A, B, C) __builtin_ia32_scalefsd_round(A, B, 8) -#define __builtin_ia32_scalefss_round(A, B, C) __builtin_ia32_scalefss_round(A, B, 8) +#define __builtin_ia32_scalefsd_mask_round(A, B, C, D, E) __builtin_ia32_scalefsd_mask_round(A, B, C, D, 8) +#define __builtin_ia32_scalefss_mask_round(A, B, C, D, E) __builtin_ia32_scalefss_mask_round(A, B, C, D, 8) #define __builtin_ia32_scatterdiv8df(A, B, C, D, F) __builtin_ia32_scatterdiv8df(A, B, C, D, 8) #define __builtin_ia32_scatterdiv8di(A, B, C, D, F) __builtin_ia32_scatterdiv8di(A, B, C, D, 8) #define __builtin_ia32_scatterdiv16sf(A, B, C, D, F) __builtin_ia32_scatterdiv16sf(A, B, C, D, 8) diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vscalefsd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vscalefsd-1.c index c883192db13..09bc5c63bb7 100644 --- a/gcc/testsuite/gcc.target/i386/avx512f-vscalefsd-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512f-vscalefsd-1.c @@ -1,14 +1,21 @@ /* { dg-do compile } */ /* { dg-options "-mavx512f -O2" } */ /* { dg-final { scan-assembler-times "vscalefsd\[ \\t\]+\[^\n\]*\{rn-sae\}\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vscalefsd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vscalefsd\[ \\t\]+\[^\n\]*\{rd-sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vscalefsd\[ \\t\]+\[^\n\]*\{rz-sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ + #include volatile __m128d x; +volatile __mmask8 m; void extern avx512f_test (void) { x = _mm_scalef_sd (x, x); x = _mm_scalef_round_sd (x, x, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + x = _mm_mask_scalef_round_sd (x, m, x, x, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC); + x = _mm_maskz_scalef_round_sd (m, x, x, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vscalefsd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vscalefsd-2.c index 28738f77837..afe73dc5e83 100644 --- a/gcc/testsuite/gcc.target/i386/avx512f-vscalefsd-2.c +++ b/gcc/testsuite/gcc.target/i386/avx512f-vscalefsd-2.c @@ -6,6 +6,7 @@ #include "avx512f-check.h" #define SIZE (128 / 64) +#include "avx512f-mask-type.h" static void compute_scalefsd (double *s1, double *s2, double *r) @@ -17,20 +18,45 @@ compute_scalefsd (double *s1, double *s2, double *r) void static avx512f_test (void) { - union128d res1, s1, s2; + union128d res1, res2, res3, res4; + union128d s1, s2; double res_ref[SIZE]; + MASK_TYPE mask = MASK_VALUE; int i; for (i = 0; i < SIZE; i++) { s1.a[i] = 11.5 * (i + 1); s2.a[i] = 10.5 * (i + 1); + res_ref[i] = 9.5 * (i + 1); + res1.a[i] = DEFAULT_VALUE; + res2.a[i] = DEFAULT_VALUE; + res3.a[i] = DEFAULT_VALUE; + res4.a[i] = DEFAULT_VALUE; } res1.x = _mm_scalef_sd (s1.x, s2.x); + res2.x = _mm_scalef_round_sd (s1.x, s2.x, + _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + res3.x = _mm_mask_scalef_round_sd (s1.x, mask, s1.x, s2.x, + _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + res4.x = _mm_maskz_scalef_round_sd (mask, s1.x, s2.x, + _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); compute_scalefsd (s1.a, s2.a, res_ref); if (check_union128d (res1, res_ref)) abort (); + if (check_union128d (res2, res_ref)) + abort (); + + MASK_MERGE (d) (res_ref, mask, 1); + + if (check_union128d (res3, res_ref)) + abort (); + + MASK_ZERO (d) (res_ref, mask, 1); + + if (check_union128d (res4, res_ref)) + abort (); } diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vscalefss-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vscalefss-1.c index f59525f9733..d1af336c267 100644 --- a/gcc/testsuite/gcc.target/i386/avx512f-vscalefss-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512f-vscalefss-1.c @@ -1,14 +1,20 @@ /* { dg-do compile } */ /* { dg-options "-mavx512f -O2" } */ /* { dg-final { scan-assembler-times "vscalefss\[ \\t\]+\[^\n\]*\{rn-sae\}\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vscalefss\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vscalefss\[ \\t\]+\[^\n\]*\{ru-sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vscalefss\[ \\t\]+\[^\n\]*\{rz-sae\}\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ #include volatile __m128 x; +volatile __mmask8 m; void extern avx512f_test (void) { x = _mm_scalef_ss (x, x); x = _mm_scalef_round_ss (x, x, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + x = _mm_mask_scalef_round_ss (x, m, x, x, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC); + x = _mm_maskz_scalef_round_ss (m, x, x, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); } diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vscalefss-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vscalefss-2.c index 935618438a1..811ff15e5e6 100644 --- a/gcc/testsuite/gcc.target/i386/avx512f-vscalefss-2.c +++ b/gcc/testsuite/gcc.target/i386/avx512f-vscalefss-2.c @@ -6,6 +6,7 @@ #include "avx512f-check.h" #define SIZE (128 / 32) +#include "avx512f-mask-type.h" static void compute_scalefss (float *s1, float *s2, float *r) @@ -19,20 +20,45 @@ compute_scalefss (float *s1, float *s2, float *r) static void avx512f_test (void) { - union128 res1, s1, s2; + union128 res1, res2, res3, res4; + union128 s1, s2; float res_ref[SIZE]; + MASK_TYPE mask = MASK_VALUE; int i; for (i = 0; i < SIZE; i++) { s1.a[i] = 11.5 * (i + 1); s2.a[i] = 10.5 * (i + 1); + res_ref[i] = 9.5 * (i + 1); + res1.a[i] = DEFAULT_VALUE; + res2.a[i] = DEFAULT_VALUE; + res3.a[i] = DEFAULT_VALUE; + res4.a[i] = DEFAULT_VALUE; } res1.x = _mm_scalef_ss (s1.x, s2.x); + res2.x = _mm_scalef_round_ss (s1.x, s2.x, + _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + res3.x = _mm_mask_scalef_round_ss (s1.x, mask, s1.x, s2.x, + _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); + res4.x = _mm_maskz_scalef_round_ss (mask, s1.x, s2.x, + _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); compute_scalefss (s1.a, s2.a, res_ref); if (check_union128 (res1, res_ref)) abort (); + if (check_union128 (res2, res_ref)) + abort (); + + MASK_MERGE () (res_ref, mask, 1); + + if (check_union128 (res3, res_ref)) + abort (); + + MASK_ZERO () (res_ref, mask, 1); + + if (check_union128 (res4, res_ref)) + abort (); } diff --git a/gcc/testsuite/gcc.target/i386/sse-13.c b/gcc/testsuite/gcc.target/i386/sse-13.c index 7b031998387..b43f903086e 100644 --- a/gcc/testsuite/gcc.target/i386/sse-13.c +++ b/gcc/testsuite/gcc.target/i386/sse-13.c @@ -304,8 +304,8 @@ #define __builtin_ia32_rndscaless_round(A, B, C, D) __builtin_ia32_rndscaless_round(A, B, 1, 4) #define __builtin_ia32_scalefpd512_mask(A, B, C, D, E) __builtin_ia32_scalefpd512_mask(A, B, C, D, 8) #define __builtin_ia32_scalefps512_mask(A, B, C, D, E) __builtin_ia32_scalefps512_mask(A, B, C, D, 8) -#define __builtin_ia32_scalefsd_round(A, B, C) __builtin_ia32_scalefsd_round(A, B, 8) -#define __builtin_ia32_scalefss_round(A, B, C) __builtin_ia32_scalefss_round(A, B, 8) +#define __builtin_ia32_scalefsd_mask_round(A, B, C, D, E) __builtin_ia32_scalefsd_mask_round(A, B, C, D, 8) +#define __builtin_ia32_scalefss_mask_round(A, B, C, D, E) __builtin_ia32_scalefss_mask_round(A, B, C, D, 8) #define __builtin_ia32_scatterdiv8df(A, B, C, D, F) __builtin_ia32_scatterdiv8df(A, B, C, D, 8) #define __builtin_ia32_scatterdiv8di(A, B, C, D, F) __builtin_ia32_scatterdiv8di(A, B, C, D, 8) #define __builtin_ia32_scatterdiv16sf(A, B, C, D, F) __builtin_ia32_scatterdiv16sf(A, B, C, D, 8) diff --git a/gcc/testsuite/gcc.target/i386/sse-23.c b/gcc/testsuite/gcc.target/i386/sse-23.c index 0b18eecf6b9..8f93d65f5c1 100644 --- a/gcc/testsuite/gcc.target/i386/sse-23.c +++ b/gcc/testsuite/gcc.target/i386/sse-23.c @@ -305,8 +305,8 @@ #define __builtin_ia32_rndscaless_round(A, B, C, D) __builtin_ia32_rndscaless_round(A, B, 1, 4) #define __builtin_ia32_scalefpd512_mask(A, B, C, D, E) __builtin_ia32_scalefpd512_mask(A, B, C, D, 8) #define __builtin_ia32_scalefps512_mask(A, B, C, D, E) __builtin_ia32_scalefps512_mask(A, B, C, D, 8) -#define __builtin_ia32_scalefsd_round(A, B, C) __builtin_ia32_scalefsd_round(A, B, 8) -#define __builtin_ia32_scalefss_round(A, B, C) __builtin_ia32_scalefss_round(A, B, 8) +#define __builtin_ia32_scalefsd_mask_round(A, B, C, D, E) __builtin_ia32_scalefsd_mask_round(A, B, C, D, 8) +#define __builtin_ia32_scalefss_mask_round(A, B, C, D, E) __builtin_ia32_scalefss_mask_round(A, B, C, D, 8) #define __builtin_ia32_scatterdiv8df(A, B, C, D, F) __builtin_ia32_scatterdiv8df(A, B, C, D, 8) #define __builtin_ia32_scatterdiv8di(A, B, C, D, F) __builtin_ia32_scatterdiv8di(A, B, C, D, 8) #define __builtin_ia32_scatterdiv16sf(A, B, C, D, F) __builtin_ia32_scatterdiv16sf(A, B, C, D, 8)