(__v2df) __A);
}
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_rsqrt14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_rsqrt14sd_mask ((__v2df) __B,
+ (__v2df) __A,
+ (__v2df) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_rsqrt14_sd (__mmask8 __U, __m128d __A, __m128d __B)
+{
+ return (__m128d) __builtin_ia32_rsqrt14sd_mask ((__v2df) __B,
+ (__v2df) __A,
+ (__v2df) _mm_setzero_pd (),
+ (__mmask8) __U);
+}
+
extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_rsqrt14_ss (__m128 __A, __m128 __B)
(__v4sf) __A);
}
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_rsqrt14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __B,
+ (__v4sf) __A,
+ (__v4sf) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_rsqrt14_ss (__mmask8 __U, __m128 __A, __m128 __B)
+{
+ return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __B,
+ (__v4sf) __A,
+ (__v4sf) _mm_setzero_ps (),
+ (__mmask8) __U);
+}
+
#ifdef __OPTIMIZE__
extern __inline __m512d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v8df_mask, "__builtin_ia32_rsqrt14pd512_mask", IX86_BUILTIN_RSQRT14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI)
BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v16sf_mask, "__builtin_ia32_rsqrt14ps512_mask", IX86_BUILTIN_RSQRT14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI)
BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v2df, "__builtin_ia32_rsqrt14sd", IX86_BUILTIN_RSQRT14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF)
+BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14_v2df_mask, "__builtin_ia32_rsqrt14sd_mask", IX86_BUILTIN_RSQRT14SDMASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI)
BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v4sf, "__builtin_ia32_rsqrt14ss", IX86_BUILTIN_RSQRT14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF)
+BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14_v4sf_mask, "__builtin_ia32_rsqrt14ss_mask", IX86_BUILTIN_RSQRT14SSMASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI)
BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufpd512_mask, "__builtin_ia32_shufpd512_mask", IX86_BUILTIN_SHUFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_UQI)
BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufps512_mask, "__builtin_ia32_shufps512_mask", IX86_BUILTIN_SHUFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_UHI)
BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f32x4_mask, "__builtin_ia32_shuf_f32x4_mask", IX86_BUILTIN_SHUF_F32x4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_UHI)
(set_attr "prefix" "evex")
(set_attr "mode" "<MODE>")])
+(define_insn "rsqrt14_<mode>_mask"
+ [(set (match_operand:VF_128 0 "register_operand" "=v")
+ (vec_merge:VF_128
+ (vec_merge:VF_128
+ (unspec:VF_128
+ [(match_operand:VF_128 1 "nonimmediate_operand" "vm")]
+ UNSPEC_RSQRT14)
+ (match_operand:VF_128 3 "vector_move_operand" "0C")
+ (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk"))
+ (match_operand:VF_128 2 "register_operand" "v")
+ (const_int 1)))]
+ "TARGET_AVX512F"
+ "vrsqrt14<ssescalarmodesuffix>\t{%1, %2, %0%{%4%}%N3|%0%{%4%}%N3, %2, %<iptr>1}"
+ [(set_attr "type" "sse")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<MODE>")])
+
(define_insn "sse_vmrsqrtv4sf2"
[(set (match_operand:V4SF 0 "register_operand" "=x,x")
(vec_merge:V4SF
+2017-05-11 Julia Koval <julia.koval@intel.com>
+
+ * gcc.target/i386/avx512f-vrsqrt14sd-1.c: Test new intrinsics.
+ * gcc.target/i386/avx512f-vrsqrt14sd-2.c: Ditto.
+ * gcc.target/i386/avx512f-vrsqrt14ss-1.c: Ditto.
+ * gcc.target/i386/avx512f-vrsqrt14ss-2.c: Ditto.
+
2017-05-11 Paolo Carlini <paolo.carlini@oracle.com>
PR c++/70538
/* { dg-do compile } */
/* { dg-options "-mavx512f -O2" } */
/* { dg-final { scan-assembler-times "vrsqrt14sd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vrsqrt14sd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vrsqrt14sd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
#include <immintrin.h>
-volatile __m128d x1, x2;
+volatile __m128d x1, x2, x3;
volatile __mmask8 m;
void extern
avx512f_test (void)
{
x1 = _mm_rsqrt14_sd (x1, x2);
+ x1 = _mm_mask_rsqrt14_sd (x3, m, x1, x2);
+ x1 = _mm_maskz_rsqrt14_sd (m, x1, x2);
}
#include <math.h>
#include "avx512f-check.h"
+#include "avx512f-helper.h"
static void
compute_vrsqrt14sd (double *s1, double *s2, double *r)
static void
avx512f_test (void)
{
- union128d s1, s2, res1;
+ union128d s1, s2, res1, res2, res3;
+ __mmask8 m = 0;
double res_ref[2];
s1.x = _mm_set_pd (-3.0, 111.111);
if (check_fp_union128d (res1, res_ref))
abort ();
+
+ res2.x = _mm_set_pd (-4.0, DEFAULT_VALUE);
+ res2.x = _mm_mask_rsqrt14_sd(res2.x, m, s1.x, s2.x);
+
+ MASK_MERGE (d) (res_ref, m, 1);
+ if (checkVd (res2.a, res_ref, 2))
+ abort();
+
+ res3.x = _mm_maskz_rsqrt14_sd(m, s1.x, s2.x);
+
+ MASK_ZERO (d) (res_ref, m, 1);
+ if (checkVd (res3.a, res_ref, 2))
+ abort();
}
/* { dg-do compile } */
/* { dg-options "-mavx512f -O2" } */
/* { dg-final { scan-assembler-times "vrsqrt14ss\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vrsqrt14ss\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vrsqrt14ss\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
#include <immintrin.h>
-volatile __m128 x1, x2;
+volatile __m128 x1, x2, x3;
+volatile __mmask8 m;
void extern
avx512f_test (void)
{
x1 = _mm_rsqrt14_ss (x1, x2);
+ x1 = _mm_mask_rsqrt14_ss (x3, m, x1, x2);
+ x1 = _mm_maskz_rsqrt14_ss (m, x1, x2);
}
#include <math.h>
#include "avx512f-check.h"
+#include "avx512f-helper.h"
static void
compute_vrsqrt14ss (float *s1, float *s2, float *r)
static void
avx512f_test (void)
{
- union128 s1, s2, res1;
+ union128 s1, s2, res1, res2, res3;
+ __mmask8 m = 0;
float res_ref[4];
s1.x = _mm_set_ps (-24.43, 68.346, -43.35, 546.46);
if (check_fp_union128 (res1, res_ref))
abort ();
+
+ res2.x = _mm_set_ps (5.0, 6.0, 7.0, DEFAULT_VALUE);
+ res2.x = _mm_mask_rsqrt14_ss(res2.x, m, s1.x, s2.x);
+
+ MASK_MERGE () (res_ref, m, 1);
+ if (checkVf (res2.a, res_ref, 4))
+ abort();
+
+ res3.x = _mm_maskz_rsqrt14_ss(m, s1.x, s2.x);
+
+ MASK_ZERO () (res_ref, m, 1);
+ if (checkVf (res3.a, res_ref, 4))
+ abort();
}