PR tree-optimization/90356
* match.pd ((X +/- 0.0) +/- 0.0): Optimize into X +/- 0.0 if possible.
+2019-05-07 Wei Xiao <wei3.xiao@intel.com>
+
+ * common/config/i386/i386-common.c (OPTION_MASK_ISA_AVX512BF16_SET
+ OPTION_MASK_ISA_AVX512BF16_UNSET, OPTION_MASK_ISA2_AVX512BW_UNSET): New.
+ (OPTION_MASK_ISA2_AVX512F_UNSET): Add OPTION_MASK_ISA_AVX512BF16_UNSET.
+ (ix86_handle_option): Handle -mavx512bf16.
+ * config.gcc: Add avx512bf16vlintrin.h and avx512bf16intrin.h
+ to extra_headers.
+ * config/i386/avx512bf16vlintrin.h: New.
+ * config/i386/avx512bf16intrin.h: New.
+ * config/i386/cpuid.h (bit_AVX512BF16): New.
+ * config/i386/driver-i386.c (host_detect_local_cpu): Detect BF16.
+ * config/i386/i386-builtin-types.def: Add new types.
+ * config/i386/i386-builtin.def: Add new builtins.
+ * config/i386/i386-c.c (ix86_target_macros_internal): Define
+ __AVX512BF16__.
+ * config/i386/i386-option.c (ix86_target_string): Add -mavx512bf16.
+ (ix86_option_override_internal): Handle BF16.
+ (ix86_valid_target_attribute_inner_p): Ditto.
+ * config/i386/i386-expand.c (ix86_expand_args_builtin): Ditto.
+ * config/i386/i386-builtin.c (enum processor_features): Add
+ F_AVX512BF16.
+ (static const _isa_names_table isa_names_table): Ditto.
+ * config/i386/i386.h (TARGET_AVX512BF16, TARGET_AVX512BF16_P): New.
+ (PTA_AVX512BF16): Ditto.
+ * config/i386/i386.opt: Add -mavx512bf16.
+ * config/i386/immintrin.h: Include avx512bf16intrin.h
+ and avx512bf16vlintrin.h.
+ * config/i386/sse.md (avx512f_cvtne2ps2bf16_<mode><mask_name>,
+ avx512f_cvtneps2bf16_<mode><mask_name>,
+ avx512f_dpbf16ps_<mode><mask_half_name>): New define_insn patterns.
+ * config/i386/subst.md (mask_half): Add new subst.
+ * doc/invoke.texi: Document -mavx512bf16.
+
2019-05-07 Segher Boessenkool <segher@kernel.crashing.org>
* config/rs6000/rs6000-protos.h (rs6000_legitimize_reload_address_ptr):
(OPTION_MASK_ISA_AVX512VPOPCNTDQ | OPTION_MASK_ISA_AVX512F_SET)
#define OPTION_MASK_ISA_AVX512BITALG_SET \
(OPTION_MASK_ISA_AVX512BITALG | OPTION_MASK_ISA_AVX512F_SET)
+#define OPTION_MASK_ISA_AVX512BF16_SET OPTION_MASK_ISA_AVX512BF16
#define OPTION_MASK_ISA_RTM_SET OPTION_MASK_ISA_RTM
#define OPTION_MASK_ISA_PRFCHW_SET OPTION_MASK_ISA_PRFCHW
#define OPTION_MASK_ISA_RDSEED_SET OPTION_MASK_ISA_RDSEED
#define OPTION_MASK_ISA_AVX512VNNI_UNSET OPTION_MASK_ISA_AVX512VNNI
#define OPTION_MASK_ISA_AVX512VPOPCNTDQ_UNSET OPTION_MASK_ISA_AVX512VPOPCNTDQ
#define OPTION_MASK_ISA_AVX512BITALG_UNSET OPTION_MASK_ISA_AVX512BITALG
+#define OPTION_MASK_ISA_AVX512BF16_UNSET OPTION_MASK_ISA_AVX512BF16
#define OPTION_MASK_ISA_RTM_UNSET OPTION_MASK_ISA_RTM
#define OPTION_MASK_ISA_PRFCHW_UNSET OPTION_MASK_ISA_PRFCHW
#define OPTION_MASK_ISA_RDSEED_UNSET OPTION_MASK_ISA_RDSEED
| OPTION_MASK_ISA_SSE_UNSET)
#define OPTION_MASK_ISA2_AVX512F_UNSET \
- (OPTION_MASK_ISA_AVX5124FMAPS_UNSET | OPTION_MASK_ISA_AVX5124VNNIW_UNSET)
+ (OPTION_MASK_ISA_AVX512BF16_UNSET \
+ | OPTION_MASK_ISA_AVX5124FMAPS_UNSET \
+ | OPTION_MASK_ISA_AVX5124VNNIW_UNSET)
#define OPTION_MASK_ISA2_GENERAL_REGS_ONLY_UNSET \
(OPTION_MASK_ISA2_AVX512F_UNSET)
+#define OPTION_MASK_ISA2_AVX512BW_UNSET OPTION_MASK_ISA_AVX512BF16_UNSET
+
/* Set 1 << value as value of -malign-FLAG option. */
static void
}
return true;
+ case OPT_mavx512bf16:
+ if (value)
+ {
+ opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_AVX512BF16_SET;
+ opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA_AVX512BF16_SET;
+ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512BW_SET;
+ opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX512BW_SET;
+ }
+ else
+ {
+ opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA_AVX512BF16_UNSET;
+ opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA_AVX512BF16_UNSET;
+ }
+ return true;
+
case OPT_msgx:
if (value)
{
{
opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_AVX512BW_UNSET;
opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX512BW_UNSET;
+ opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA2_AVX512BW_UNSET;
+ opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_AVX512BW_UNSET;
}
return true;
avx512vnnivlintrin.h vaesintrin.h vpclmulqdqintrin.h
avx512vpopcntdqvlintrin.h avx512bitalgintrin.h
pconfigintrin.h wbnoinvdintrin.h movdirintrin.h
- waitpkgintrin.h cldemoteintrin.h"
+ waitpkgintrin.h cldemoteintrin.h avx512bf16vlintrin.h avx512bf16intrin.h"
;;
x86_64-*-*)
cpu_type=i386
avx512vnnivlintrin.h vaesintrin.h vpclmulqdqintrin.h
avx512vpopcntdqvlintrin.h avx512bitalgintrin.h
pconfigintrin.h wbnoinvdintrin.h movdirintrin.h
- waitpkgintrin.h cldemoteintrin.h"
+ waitpkgintrin.h cldemoteintrin.h avx512bf16vlintrin.h avx512bf16intrin.h"
;;
ia64-*-*)
extra_headers=ia64intrin.h
--- /dev/null
+/* Copyright (C) 2019 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#ifndef _IMMINTRIN_H_INCLUDED
+#error "Never use <avx512bf16intrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef _AVX512BF16INTRIN_H_INCLUDED
+#define _AVX512BF16INTRIN_H_INCLUDED
+
+#ifndef __AVX512BF16__
+#pragma GCC push_options
+#pragma GCC target("avx512bf16")
+#define __DISABLE_AVX512BF16__
+#endif /* __AVX512BF16__ */
+
+/* Internal data types for implementing the intrinsics. */
+typedef short __v32bh __attribute__ ((__vector_size__ (64)));
+
+/* The Intel API is flexible enough that we must allow aliasing with other
+ vector types, and their scalar components. */
+typedef short __m512bh __attribute__ ((__vector_size__ (64), __may_alias__));
+
+/* vcvtne2ps2bf16 */
+
+extern __inline __m512bh
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtne2ps_pbh (__m512 __A, __m512 __B)
+{
+ return (__m512bh)__builtin_ia32_cvtne2ps2bf16_v32hi(__A, __B);
+}
+
+extern __inline __m512bh
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtne2ps_pbh (__m512bh __A, __mmask32 __B, __m512 __C, __m512 __D)
+{
+ return (__m512bh)__builtin_ia32_cvtne2ps2bf16_v32hi_mask(__C, __D, __A, __B);
+}
+
+extern __inline __m512bh
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtne2ps_pbh (__mmask32 __A, __m512 __B, __m512 __C)
+{
+ return (__m512bh)__builtin_ia32_cvtne2ps2bf16_v32hi_maskz(__B, __C, __A);
+}
+
+/* vcvtneps2bf16 */
+
+extern __inline __m256bh
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtneps_pbh (__m512 __A)
+{
+ return (__m256bh)__builtin_ia32_cvtneps2bf16_v16sf(__A);
+}
+
+extern __inline __m256bh
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtneps_pbh (__m256bh __A, __mmask16 __B, __m512 __C)
+{
+ return (__m256bh)__builtin_ia32_cvtneps2bf16_v16sf_mask(__C, __A, __B);
+}
+
+extern __inline __m256bh
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtneps_pbh (__mmask16 __A, __m512 __B)
+{
+ return (__m256bh)__builtin_ia32_cvtneps2bf16_v16sf_maskz(__B, __A);
+}
+
+/* vdpbf16ps */
+
+extern __inline __m512
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_dpbf16_ps (__m512 __A, __m512bh __B, __m512bh __C)
+{
+ return (__m512)__builtin_ia32_dpbf16ps_v16sf(__A, __B, __C);
+}
+
+extern __inline __m512
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_dpbf16_ps (__m512 __A, __mmask16 __B, __m512bh __C, __m512bh __D)
+{
+ return (__m512)__builtin_ia32_dpbf16ps_v16sf_mask(__A, __C, __D, __B);
+}
+
+extern __inline __m512
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_dpbf16_ps (__mmask16 __A, __m512 __B, __m512bh __C, __m512bh __D)
+{
+ return (__m512)__builtin_ia32_dpbf16ps_v16sf_maskz(__B, __C, __D, __A);
+}
+
+#ifdef __DISABLE_AVX512BF16__
+#undef __DISABLE_AVX512BF16__
+#pragma GCC pop_options
+#endif /* __DISABLE_AVX512BF16__ */
+
+#endif /* _AVX512BF16INTRIN_H_INCLUDED */
--- /dev/null
+/* Copyright (C) 2019 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#ifndef _IMMINTRIN_H_INCLUDED
+#error "Never use <avx512bf16vlintrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef _AVX512BF16VLINTRIN_H_INCLUDED
+#define _AVX512BF16VLINTRIN_H_INCLUDED
+
+#if !defined(__AVX512VL__) || !defined(__AVX512BF16__)
+#pragma GCC push_options
+#pragma GCC target("avx512bf16,avx512vl")
+#define __DISABLE_AVX512BF16VL__
+#endif /* __AVX512BF16__ */
+
+/* Internal data types for implementing the intrinsics. */
+typedef short __v16bh __attribute__ ((__vector_size__ (32)));
+typedef short __v8bh __attribute__ ((__vector_size__ (16)));
+
+/* The Intel API is flexible enough that we must allow aliasing with other
+ vector types, and their scalar components. */
+typedef short __m256bh __attribute__ ((__vector_size__ (32), __may_alias__));
+typedef short __m128bh __attribute__ ((__vector_size__ (16), __may_alias__));
+
+/* vcvtne2ps2bf16 */
+
+extern __inline __m256bh
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtne2ps_pbh (__m256 __A, __m256 __B)
+{
+ return (__m256bh)__builtin_ia32_cvtne2ps2bf16_v16hi(__A, __B);
+}
+
+extern __inline __m256bh
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtne2ps_pbh (__m256bh __A, __mmask16 __B, __m256 __C, __m256 __D)
+{
+ return (__m256bh)__builtin_ia32_cvtne2ps2bf16_v16hi_mask(__C, __D, __A, __B);
+}
+
+extern __inline __m256bh
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtne2ps_pbh (__mmask16 __A, __m256 __B, __m256 __C)
+{
+ return (__m256bh)__builtin_ia32_cvtne2ps2bf16_v16hi_maskz(__B, __C, __A);
+}
+
+extern __inline __m128bh
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtne2ps_pbh (__m128 __A, __m128 __B)
+{
+ return (__m128bh)__builtin_ia32_cvtne2ps2bf16_v8hi(__A, __B);
+}
+
+extern __inline __m128bh
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtne2ps_pbh (__m128bh __A, __mmask8 __B, __m128 __C, __m128 __D)
+{
+ return (__m128bh)__builtin_ia32_cvtne2ps2bf16_v8hi_mask(__C, __D, __A, __B);
+}
+
+extern __inline __m128bh
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtne2ps_pbh (__mmask8 __A, __m128 __B, __m128 __C)
+{
+ return (__m128bh)__builtin_ia32_cvtne2ps2bf16_v8hi_maskz(__B, __C, __A);
+}
+
+/* vcvtneps2bf16 */
+
+extern __inline __m128bh
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtneps_pbh (__m256 __A)
+{
+ return (__m128bh)__builtin_ia32_cvtneps2bf16_v8sf(__A);
+}
+
+extern __inline __m128bh
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtneps_pbh (__m128bh __A, __mmask8 __B, __m256 __C)
+{
+ return (__m128bh)__builtin_ia32_cvtneps2bf16_v8sf_mask(__C, __A, __B);
+}
+
+extern __inline __m128bh
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtneps_pbh (__mmask8 __A, __m256 __B)
+{
+ return (__m128bh)__builtin_ia32_cvtneps2bf16_v8sf_maskz(__B, __A);
+}
+
+extern __inline __m128bh
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtneps_pbh (__m128 __A)
+{
+ return (__m128bh)__builtin_ia32_cvtneps2bf16_v4sf(__A);
+}
+
+extern __inline __m128bh
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtneps_pbh (__m128bh __A, __mmask8 __B, __m128 __C)
+{
+ return (__m128bh)__builtin_ia32_cvtneps2bf16_v4sf_mask(__C, __A, __B);
+}
+
+extern __inline __m128bh
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtneps_pbh (__mmask8 __A, __m128 __B)
+{
+ return (__m128bh)__builtin_ia32_cvtneps2bf16_v4sf_maskz(__B, __A);
+}
+
+/* vdpbf16ps */
+
+extern __inline __m256
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_dpbf16_ps (__m256 __A, __m256bh __B, __m256bh __C)
+{
+ return (__m256)__builtin_ia32_dpbf16ps_v8sf(__A, __B, __C);
+}
+
+extern __inline __m256
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_dpbf16_ps (__m256 __A, __mmask8 __B, __m256bh __C, __m256bh __D)
+{
+ return (__m256)__builtin_ia32_dpbf16ps_v8sf_mask(__A, __C, __D, __B);
+}
+
+extern __inline __m256
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_dpbf16_ps (__mmask8 __A, __m256 __B, __m256bh __C, __m256bh __D)
+{
+ return (__m256)__builtin_ia32_dpbf16ps_v8sf_maskz(__B, __C, __D, __A);
+}
+
+extern __inline __m128
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_dpbf16_ps (__m128 __A, __m128bh __B, __m128bh __C)
+{
+ return (__m128)__builtin_ia32_dpbf16ps_v4sf(__A, __B, __C);
+}
+
+extern __inline __m128
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_dpbf16_ps (__m128 __A, __mmask8 __B, __m128bh __C, __m128bh __D)
+{
+ return (__m128)__builtin_ia32_dpbf16ps_v4sf_mask(__A, __C, __D, __B);
+}
+
+extern __inline __m128
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_dpbf16_ps (__mmask8 __A, __m128 __B, __m128bh __C, __m128bh __D)
+{
+ return (__m128)__builtin_ia32_dpbf16ps_v4sf_maskz(__B, __C, __D, __A);
+}
+
+#ifdef __DISABLE_AVX512BF16VL__
+#undef __DISABLE_AVX512BF16VL__
+#pragma GCC pop_options
+#endif /* __DISABLE_AVX512BF16VL__ */
+
+#endif /* _AVX512BF16VLINTRIN_H_INCLUDED */
* <http://www.gnu.org/licenses/>.
*/
+/* %eax */
+#define bit_AVX512BF16 (1 << 5)
+
/* %ecx */
#define bit_SSE3 (1 << 0)
#define bit_PCLMUL (1 << 1)
unsigned int has_movdiri = 0, has_movdir64b = 0;
unsigned int has_waitpkg = 0;
unsigned int has_cldemote = 0;
+ unsigned int has_avx512bf16 = 0;
unsigned int has_ptwrite = 0;
has_shstk = ecx & bit_SHSTK;
has_pconfig = edx & bit_PCONFIG;
has_waitpkg = ecx & bit_WAITPKG;
+
+ __cpuid_count (7, 1, eax, ebx, ecx, edx);
+ has_avx512bf16 = eax & bit_AVX512BF16;
}
if (max_level >= 13)
const char *waitpkg = has_waitpkg ? " -mwaitpkg" : " -mno-waitpkg";
const char *cldemote = has_cldemote ? " -mcldemote" : " -mno-cldemote";
const char *ptwrite = has_ptwrite ? " -mptwrite" : " -mno-ptwrite";
+ const char *avx512bf16 = has_avx512bf16 ? " -mavx512bf16" : " -mno-avx512bf16";
options = concat (options, mmx, mmx3dnow, sse, sse2, sse3, ssse3,
sse4a, cx16, sahf, movbe, aes, sha, pclmul,
clwb, mwaitx, clzero, pku, rdpid, gfni, shstk,
avx512vbmi2, avx512vnni, vaes, vpclmulqdq,
avx512bitalg, movdiri, movdir64b, waitpkg, cldemote,
- ptwrite,
+ ptwrite, avx512bf16,
NULL);
}
DEF_FUNCTION_TYPE (V4DI, V4DI)
DEF_FUNCTION_TYPE (V4SI, V4SI, V4SI, UHI)
DEF_FUNCTION_TYPE (V8SI, V8SI, V8SI, UHI)
+
+# BF16 builtins
+DEF_FUNCTION_TYPE (V32HI, V16SF, V16SF)
+DEF_FUNCTION_TYPE (V32HI, V16SF, V16SF, V32HI, USI)
+DEF_FUNCTION_TYPE (V32HI, V16SF, V16SF, USI)
+DEF_FUNCTION_TYPE (V16HI, V8SF, V8SF)
+DEF_FUNCTION_TYPE (V16HI, V8SF, V8SF, V16HI, UHI)
+DEF_FUNCTION_TYPE (V16HI, V8SF, V8SF, UHI)
+DEF_FUNCTION_TYPE (V8HI, V4SF, V4SF)
+DEF_FUNCTION_TYPE (V8HI, V4SF, V4SF, V8HI, UQI)
+DEF_FUNCTION_TYPE (V8HI, V4SF, V4SF, UQI)
+DEF_FUNCTION_TYPE (V16HI, V16SF)
+DEF_FUNCTION_TYPE (V16HI, V16SF, V16HI, UHI)
+DEF_FUNCTION_TYPE (V16HI, V16SF, UHI)
+DEF_FUNCTION_TYPE (V8HI, V8SF)
+DEF_FUNCTION_TYPE (V8HI, V8SF, V8HI, UQI)
+DEF_FUNCTION_TYPE (V8HI, V8SF, UQI)
+DEF_FUNCTION_TYPE (V8HI, V4SF)
+DEF_FUNCTION_TYPE (V8HI, V4SF, V8HI, UQI)
+DEF_FUNCTION_TYPE (V8HI, V4SF, UQI)
+DEF_FUNCTION_TYPE (V16SF, V16SF, V32HI, V32HI)
+DEF_FUNCTION_TYPE (V16SF, V16SF, V32HI, V32HI, UHI)
+DEF_FUNCTION_TYPE (V8SF, V8SF, V16HI, V16HI)
+DEF_FUNCTION_TYPE (V8SF, V8SF, V16HI, V16HI, UQI)
+DEF_FUNCTION_TYPE (V4SF, V4SF, V8HI, V8HI)
+DEF_FUNCTION_TYPE (V4SF, V4SF, V8HI, V8HI, UQI)
BDESC (0, OPTION_MASK_ISA_VAES, CODE_FOR_vaesenclast_v32qi, "__builtin_ia32_vaesenclast_v32qi", IX86_BUILTIN_VAESENCLAST32, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI)
BDESC (0, OPTION_MASK_ISA_VAES, CODE_FOR_vaesenclast_v64qi, "__builtin_ia32_vaesenclast_v64qi", IX86_BUILTIN_VAESENCLAST64, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI)
+/* BF16 */
+BDESC (0, OPTION_MASK_ISA_AVX512BF16, CODE_FOR_avx512f_cvtne2ps2bf16_v32hi, "__builtin_ia32_cvtne2ps2bf16_v32hi", IX86_BUILTIN_CVTNE2PS2HI16_V32HI, UNKNOWN, (int) V32HI_FTYPE_V16SF_V16SF)
+BDESC (0, OPTION_MASK_ISA_AVX512BF16, CODE_FOR_avx512f_cvtne2ps2bf16_v32hi_mask, "__builtin_ia32_cvtne2ps2bf16_v32hi_mask", IX86_BUILTIN_CVTNE2PS2HI16_V32HI_MASK, UNKNOWN, (int) V32HI_FTYPE_V16SF_V16SF_V32HI_USI)
+BDESC (0, OPTION_MASK_ISA_AVX512BF16, CODE_FOR_avx512f_cvtne2ps2bf16_v32hi_maskz, "__builtin_ia32_cvtne2ps2bf16_v32hi_maskz", IX86_BUILTIN_CVTNE2PS2HI16_V32HI_MASKZ, UNKNOWN, (int) V32HI_FTYPE_V16SF_V16SF_USI)
+BDESC (0, OPTION_MASK_ISA_AVX512BF16, CODE_FOR_avx512f_cvtne2ps2bf16_v16hi, "__builtin_ia32_cvtne2ps2bf16_v16hi", IX86_BUILTIN_CVTNE2PS2HI16_V16HI, UNKNOWN, (int) V16HI_FTYPE_V8SF_V8SF)
+BDESC (0, OPTION_MASK_ISA_AVX512BF16, CODE_FOR_avx512f_cvtne2ps2bf16_v16hi_mask, "__builtin_ia32_cvtne2ps2bf16_v16hi_mask", IX86_BUILTIN_CVTNE2PS2HI16_V16HI_MASK, UNKNOWN, (int) V16HI_FTYPE_V8SF_V8SF_V16HI_UHI)
+BDESC (0, OPTION_MASK_ISA_AVX512BF16, CODE_FOR_avx512f_cvtne2ps2bf16_v16hi_maskz, "__builtin_ia32_cvtne2ps2bf16_v16hi_maskz", IX86_BUILTIN_CVTNE2PS2HI16_V16HI_MASKZ, UNKNOWN, (int) V16HI_FTYPE_V8SF_V8SF_UHI)
+BDESC (0, OPTION_MASK_ISA_AVX512BF16, CODE_FOR_avx512f_cvtne2ps2bf16_v8hi, "__builtin_ia32_cvtne2ps2bf16_v8hi", IX86_BUILTIN_CVTNE2PS2HI16_V8HI, UNKNOWN, (int) V8HI_FTYPE_V4SF_V4SF)
+BDESC (0, OPTION_MASK_ISA_AVX512BF16, CODE_FOR_avx512f_cvtne2ps2bf16_v8hi_mask, "__builtin_ia32_cvtne2ps2bf16_v8hi_mask", IX86_BUILTIN_CVTNE2PS2HI16_V8HI_MASK, UNKNOWN, (int) V8HI_FTYPE_V4SF_V4SF_V8HI_UQI)
+BDESC (0, OPTION_MASK_ISA_AVX512BF16, CODE_FOR_avx512f_cvtne2ps2bf16_v8hi_maskz, "__builtin_ia32_cvtne2ps2bf16_v8hi_maskz", IX86_BUILTIN_CVTNE2PS2HI16_V8HI_MASKZ, UNKNOWN, (int) V8HI_FTYPE_V4SF_V4SF_UQI)
+BDESC (0, OPTION_MASK_ISA_AVX512BF16, CODE_FOR_avx512f_cvtneps2bf16_v16sf, "__builtin_ia32_cvtneps2bf16_v16sf", IX86_BUILTIN_CVTNEPS2HI16_V16SF, UNKNOWN, (int) V16HI_FTYPE_V16SF)
+BDESC (0, OPTION_MASK_ISA_AVX512BF16, CODE_FOR_avx512f_cvtneps2bf16_v16sf_mask, "__builtin_ia32_cvtneps2bf16_v16sf_mask", IX86_BUILTIN_CVTNEPS2HI16_V16SF_MASK, UNKNOWN, (int) V16HI_FTYPE_V16SF_V16HI_UHI)
+BDESC (0, OPTION_MASK_ISA_AVX512BF16, CODE_FOR_avx512f_cvtneps2bf16_v16sf_maskz, "__builtin_ia32_cvtneps2bf16_v16sf_maskz", IX86_BUILTIN_CVTNE2PS2HI16_V16SF_MASKZ, UNKNOWN, (int) V16HI_FTYPE_V16SF_UHI)
+BDESC (0, OPTION_MASK_ISA_AVX512BF16, CODE_FOR_avx512f_cvtneps2bf16_v8sf, "__builtin_ia32_cvtneps2bf16_v8sf", IX86_BUILTIN_CVTNEPS2HI16_V8SF, UNKNOWN, (int) V8HI_FTYPE_V8SF)
+BDESC (0, OPTION_MASK_ISA_AVX512BF16, CODE_FOR_avx512f_cvtneps2bf16_v8sf_mask, "__builtin_ia32_cvtneps2bf16_v8sf_mask", IX86_BUILTIN_CVTNEPS2HI16_V8SF_MASK, UNKNOWN, (int) V8HI_FTYPE_V8SF_V8HI_UQI)
+BDESC (0, OPTION_MASK_ISA_AVX512BF16, CODE_FOR_avx512f_cvtneps2bf16_v8sf_maskz, "__builtin_ia32_cvtneps2bf16_v8sf_maskz", IX86_BUILTIN_CVTNE2PS2HI16_V8SF_MASKZ, UNKNOWN, (int) V8HI_FTYPE_V8SF_UQI)
+BDESC (0, OPTION_MASK_ISA_AVX512BF16, CODE_FOR_avx512f_cvtneps2bf16_v4sf, "__builtin_ia32_cvtneps2bf16_v4sf", IX86_BUILTIN_CVTNEPS2HI16_V4SF, UNKNOWN, (int) V8HI_FTYPE_V4SF)
+BDESC (0, OPTION_MASK_ISA_AVX512BF16, CODE_FOR_avx512f_cvtneps2bf16_v4sf_mask, "__builtin_ia32_cvtneps2bf16_v4sf_mask", IX86_BUILTIN_CVTNEPS2HI16_V4SF_MASK, UNKNOWN, (int) V8HI_FTYPE_V4SF_V8HI_UQI)
+BDESC (0, OPTION_MASK_ISA_AVX512BF16, CODE_FOR_avx512f_cvtneps2bf16_v4sf_maskz, "__builtin_ia32_cvtneps2bf16_v4sf_maskz", IX86_BUILTIN_CVTNE2PS2HI16_V4SF_MASKZ, UNKNOWN, (int) V8HI_FTYPE_V4SF_UQI)
+BDESC (0, OPTION_MASK_ISA_AVX512BF16, CODE_FOR_avx512f_dpbf16ps_v16sf, "__builtin_ia32_dpbf16ps_v16sf", IX86_BUILTIN_DPHI16PS_V16SF, UNKNOWN, (int) V16SF_FTYPE_V16SF_V32HI_V32HI)
+BDESC (0, OPTION_MASK_ISA_AVX512BF16, CODE_FOR_avx512f_dpbf16ps_v16sf_mask, "__builtin_ia32_dpbf16ps_v16sf_mask", IX86_BUILTIN_DPHI16PS_V16SF_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V32HI_V32HI_UHI)
+BDESC (0, OPTION_MASK_ISA_AVX512BF16, CODE_FOR_avx512f_dpbf16ps_v16sf_maskz, "__builtin_ia32_dpbf16ps_v16sf_maskz", IX86_BUILTIN_DPHI16PS_V16SF_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V32HI_V32HI_UHI)
+BDESC (0, OPTION_MASK_ISA_AVX512BF16, CODE_FOR_avx512f_dpbf16ps_v8sf, "__builtin_ia32_dpbf16ps_v8sf", IX86_BUILTIN_DPHI16PS_V8SF, UNKNOWN, (int) V8SF_FTYPE_V8SF_V16HI_V16HI)
+BDESC (0, OPTION_MASK_ISA_AVX512BF16, CODE_FOR_avx512f_dpbf16ps_v8sf_mask, "__builtin_ia32_dpbf16ps_v8sf_mask", IX86_BUILTIN_DPHI16PS_V8SF_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V16HI_V16HI_UQI)
+BDESC (0, OPTION_MASK_ISA_AVX512BF16, CODE_FOR_avx512f_dpbf16ps_v8sf_maskz, "__builtin_ia32_dpbf16ps_v8sf_maskz", IX86_BUILTIN_DPHI16PS_V8SF_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V16HI_V16HI_UQI)
+BDESC (0, OPTION_MASK_ISA_AVX512BF16, CODE_FOR_avx512f_dpbf16ps_v4sf, "__builtin_ia32_dpbf16ps_v4sf", IX86_BUILTIN_DPHI16PS_V4SF, UNKNOWN, (int) V4SF_FTYPE_V4SF_V8HI_V8HI)
+BDESC (0, OPTION_MASK_ISA_AVX512BF16, CODE_FOR_avx512f_dpbf16ps_v4sf_mask, "__builtin_ia32_dpbf16ps_v4sf_mask", IX86_BUILTIN_DPHI16PS_V4SF_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V8HI_V8HI_UQI)
+BDESC (0, OPTION_MASK_ISA_AVX512BF16, CODE_FOR_avx512f_dpbf16ps_v4sf_maskz, "__builtin_ia32_dpbf16ps_v4sf_maskz", IX86_BUILTIN_DPHI16PS_V4SF_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V8HI_V8HI_UQI)
+
/* Builtins with rounding support. */
BDESC_END (ARGS, ROUND_ARGS)
F_VPCLMULQDQ,
F_AVX512VNNI,
F_AVX512BITALG,
+ F_AVX512BF16,
F_MAX
};
{"gfni", F_GFNI, P_ZERO},
{"vpclmulqdq", F_VPCLMULQDQ, P_ZERO},
{"avx512vnni", F_AVX512VNNI, P_ZERO},
- {"avx512bitalg", F_AVX512BITALG, P_ZERO}
+ {"avx512bitalg", F_AVX512BITALG, P_ZERO},
+ {"avx512bf16", F_AVX512BF16, P_ZERO}
};
/* This parses the attribute arguments to target in DECL and determines
def_or_undef (parse_in, "__CLDEMOTE__");
if (isa_flag2 & OPTION_MASK_ISA_PTWRITE)
def_or_undef (parse_in, "__PTWRITE__");
+ if (isa_flag2 & OPTION_MASK_ISA_AVX512BF16)
+ def_or_undef (parse_in, "__AVX512BF16__");
if (TARGET_IAMCU)
{
def_or_undef (parse_in, "__iamcu");
case V8DF_FTYPE_V2DF:
case V8DF_FTYPE_V8DF:
case V4DI_FTYPE_V4DI:
+ case V16HI_FTYPE_V16SF:
+ case V8HI_FTYPE_V8SF:
+ case V8HI_FTYPE_V4SF:
nargs = 1;
break;
case V4SF_FTYPE_V4SF_VEC_MERGE:
case USI_FTYPE_USI_USI:
case UDI_FTYPE_UDI_UDI:
case V16SI_FTYPE_V8DF_V8DF:
+ case V32HI_FTYPE_V16SF_V16SF:
+ case V16HI_FTYPE_V8SF_V8SF:
+ case V8HI_FTYPE_V4SF_V4SF:
+ case V16HI_FTYPE_V16SF_UHI:
+ case V8HI_FTYPE_V8SF_UQI:
+ case V8HI_FTYPE_V4SF_UQI:
nargs = 2;
break;
case V2DI_FTYPE_V2DI_INT_CONVERT:
case V16HI_FTYPE_V16HI_V16HI_V16HI:
case V8SI_FTYPE_V8SI_V8SI_V8SI:
case V8HI_FTYPE_V8HI_V8HI_V8HI:
+ case V32HI_FTYPE_V16SF_V16SF_USI:
+ case V16HI_FTYPE_V8SF_V8SF_UHI:
+ case V8HI_FTYPE_V4SF_V4SF_UQI:
+ case V16HI_FTYPE_V16SF_V16HI_UHI:
+ case V8HI_FTYPE_V8SF_V8HI_UQI:
+ case V8HI_FTYPE_V4SF_V8HI_UQI:
+ case V16SF_FTYPE_V16SF_V32HI_V32HI:
+ case V8SF_FTYPE_V8SF_V16HI_V16HI:
+ case V4SF_FTYPE_V4SF_V8HI_V8HI:
nargs = 3;
break;
case V32QI_FTYPE_V32QI_V32QI_INT:
case V16HI_FTYPE_V32QI_V32QI_V16HI_UHI:
case V8SI_FTYPE_V16HI_V16HI_V8SI_UQI:
case V4SI_FTYPE_V8HI_V8HI_V4SI_UQI:
+ case V32HI_FTYPE_V16SF_V16SF_V32HI_USI:
+ case V16HI_FTYPE_V8SF_V8SF_V16HI_UHI:
+ case V8HI_FTYPE_V4SF_V4SF_V8HI_UQI:
nargs = 4;
break;
case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
break;
case UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED:
case UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG:
+ case V16SF_FTYPE_V16SF_V32HI_V32HI_UHI:
+ case V8SF_FTYPE_V8SF_V16HI_V16HI_UQI:
+ case V4SF_FTYPE_V4SF_V8HI_V8HI_UQI:
nargs = 4;
break;
case UQI_FTYPE_V8DI_V8DI_INT_UQI:
{ "-mmovdir64b", OPTION_MASK_ISA_MOVDIR64B },
{ "-mwaitpkg", OPTION_MASK_ISA_WAITPKG },
{ "-mcldemote", OPTION_MASK_ISA_CLDEMOTE },
- { "-mptwrite", OPTION_MASK_ISA_PTWRITE }
+ { "-mptwrite", OPTION_MASK_ISA_PTWRITE },
+ { "-mavx512bf16", OPTION_MASK_ISA_AVX512BF16 }
};
static struct ix86_target_opts isa_opts[] =
{
IX86_ATTR_ISA ("waitpkg", OPT_mwaitpkg),
IX86_ATTR_ISA ("cldemote", OPT_mcldemote),
IX86_ATTR_ISA ("ptwrite", OPT_mptwrite),
+ IX86_ATTR_ISA ("avx512bf16", OPT_mavx512bf16),
/* enum options */
IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_),
&& !(opts->x_ix86_isa_flags_explicit
& OPTION_MASK_ISA_AVX512VPOPCNTDQ))
opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VPOPCNTDQ;
+ if (((processor_alias_table[i].flags & PTA_AVX512BF16) != 0)
+ && !(opts->x_ix86_isa_flags2_explicit
+ & OPTION_MASK_ISA_AVX512BF16))
+ opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_AVX512BF16;
if (((processor_alias_table[i].flags & PTA_SGX) != 0)
&& !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA_SGX))
opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_SGX;
#define TARGET_CLDEMOTE_P(x) TARGET_ISA_CLDEMOTE_P(x)
#define TARGET_PTWRITE TARGET_ISA_PTWRITE
#define TARGET_PTWRITE_P(x) TARGET_ISA_PTWRITE_P(x)
+#define TARGET_AVX512BF16 TARGET_ISA_AVX512BF16
+#define TARGET_AVX512BF16_P(x) TARGET_ISA_AVX512BF16_P(x)
#define TARGET_LP64 TARGET_ABI_64
#define TARGET_LP64_P(x) TARGET_ABI_64_P(x)
const wide_int_bitmask PTA_WBNOINVD (0, HOST_WIDE_INT_1U << 8);
const wide_int_bitmask PTA_WAITPKG (0, HOST_WIDE_INT_1U << 9);
const wide_int_bitmask PTA_PTWRITE (0, HOST_WIDE_INT_1U << 10);
+const wide_int_bitmask PTA_AVX512BF16 (0, HOST_WIDE_INT_1U << 11);
const wide_int_bitmask PTA_CORE2 = PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2
| PTA_SSE3 | PTA_SSSE3 | PTA_CX16 | PTA_FXSR;
mrecord-return
Target Report Var(ix86_flag_record_return) Init(0)
Generate a __return_loc section pointing to all return instrumentation code.
+
+mavx512bf16
+Target Report Mask(ISA_AVX512BF16) Var(ix86_isa_flags2) Save
+Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2, AVX512F and
+AVX512BF16 built-in functions and code generation.
#include <cldemoteintrin.h>
+#include <avx512bf16vlintrin.h>
+
+#include <avx512bf16intrin.h>
+
#include <rdseedintrin.h>
#include <prfchwintrin.h>
;; For AVX512BITALG support
UNSPEC_VPSHUFBIT
+
+ ;; For AVX512BF16 support
+ UNSPEC_VCVTNE2PS2BF16
+ UNSPEC_VCVTNEPS2BF16
+ UNSPEC_VDPBF16PS
])
(define_c_enum "unspecv" [
(V16SF "hi") (V8SF "qi") (V4SF "qi")
(V8DF "qi") (V4DF "qi") (V2DF "qi")])
+;; Mapping of vector modes to corresponding mask half size
+(define_mode_attr avx512fmaskhalfmode
+ [(V64QI "SI") (V32QI "HI") (V16QI "QI")
+ (V32HI "HI") (V16HI "QI") (V8HI "QI") (V4HI "QI")
+ (V16SI "QI") (V8SI "QI") (V4SI "QI")
+ (V8DI "QI") (V4DI "QI") (V2DI "QI")
+ (V16SF "QI") (V8SF "QI") (V4SF "QI")
+ (V8DF "QI") (V4DF "QI") (V2DF "QI")])
+
;; Mapping of vector float modes to an integer mode of the same size
(define_mode_attr sseintvecmode
[(V16SF "V16SI") (V8DF "V8DI")
"vpshufbitqmb\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
[(set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")])
+
+(define_mode_iterator BF16 [V32HI (V16HI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL")])
+;; Converting from BF to SF
+(define_mode_attr bf16_cvt_2sf
+ [(V32HI "V16SF") (V16HI "V8SF") (V8HI "V4SF")])
+;; Converting from SF to BF
+(define_mode_attr sf_cvt_bf16
+ [(V4SF "V8HI") (V8SF "V8HI") (V16SF "V16HI")])
+;; Mapping from BF to SF
+(define_mode_attr sf_bf16
+ [(V4SF "V8HI") (V8SF "V16HI") (V16SF "V32HI")])
+
+(define_expand "avx512f_cvtne2ps2bf16_<mode>_maskz"
+ [(match_operand:BF16 0 "register_operand")
+ (match_operand:<bf16_cvt_2sf> 1 "register_operand")
+ (match_operand:<bf16_cvt_2sf> 2 "register_operand")
+ (match_operand:<avx512fmaskmode> 3 "register_operand")]
+ "TARGET_AVX512BF16"
+{
+ emit_insn (gen_avx512f_cvtne2ps2bf16_<mode>_mask(operands[0], operands[1],
+ operands[2], CONST0_RTX(<MODE>mode), operands[3]));
+ DONE;
+})
+
+(define_insn "avx512f_cvtne2ps2bf16_<mode><mask_name>"
+ [(set (match_operand:BF16 0 "register_operand" "=v")
+ (unspec:BF16
+ [(match_operand:<bf16_cvt_2sf> 1 "register_operand" "v")
+ (match_operand:<bf16_cvt_2sf> 2 "register_operand" "v")]
+ UNSPEC_VCVTNE2PS2BF16))]
+ "TARGET_AVX512BF16"
+ "vcvtne2ps2bf16\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}")
+
+(define_expand "avx512f_cvtneps2bf16_<mode>_maskz"
+ [(match_operand:<sf_cvt_bf16> 0 "register_operand")
+ (match_operand:VF1_AVX512VL 1 "register_operand")
+ (match_operand:<avx512fmaskmode> 2 "register_operand")]
+ "TARGET_AVX512BF16"
+{
+ emit_insn (gen_avx512f_cvtneps2bf16_<mode>_mask(operands[0], operands[1],
+ CONST0_RTX(<sf_cvt_bf16>mode), operands[2]));
+ DONE;
+})
+
+(define_insn "avx512f_cvtneps2bf16_<mode><mask_name>"
+ [(set (match_operand:<sf_cvt_bf16> 0 "register_operand" "=v")
+ (unspec:<sf_cvt_bf16>
+ [(match_operand:VF1_AVX512VL 1 "register_operand" "v")]
+ UNSPEC_VCVTNEPS2BF16))]
+ "TARGET_AVX512BF16"
+ "vcvtneps2bf16\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}")
+
+(define_expand "avx512f_dpbf16ps_<mode>_maskz"
+ [(match_operand:VF1_AVX512VL 0 "register_operand")
+ (match_operand:VF1_AVX512VL 1 "register_operand")
+ (match_operand:<sf_bf16> 2 "register_operand")
+ (match_operand:<sf_bf16> 3 "register_operand")
+ (match_operand:<avx512fmaskhalfmode> 4 "register_operand")]
+ "TARGET_AVX512BF16"
+{
+ emit_insn (gen_avx512f_dpbf16ps_<mode>_maskz_1(operands[0], operands[1],
+ operands[2], operands[3], CONST0_RTX(<MODE>mode), operands[4]));
+ DONE;
+})
+
+(define_insn "avx512f_dpbf16ps_<mode><maskz_half_name>"
+ [(set (match_operand:VF1_AVX512VL 0 "register_operand" "=v")
+ (unspec:VF1_AVX512VL
+ [(match_operand:VF1_AVX512VL 1 "register_operand" "0")
+ (match_operand:<sf_bf16> 2 "register_operand" "v")
+ (match_operand:<sf_bf16> 3 "register_operand" "v")]
+ UNSPEC_VDPBF16PS))]
+ "TARGET_AVX512BF16"
+ "vdpbf16ps\t{%3, %2, %0<maskz_half_operand4>|%0<maskz_half_operand4>, %2, %3}")
+
+(define_insn "avx512f_dpbf16ps_<mode>_mask"
+ [(set (match_operand:VF1_AVX512VL 0 "register_operand" "=v")
+ (vec_merge:VF1_AVX512VL
+ (unspec:VF1_AVX512VL
+ [(match_operand:VF1_AVX512VL 1 "register_operand" "0")
+ (match_operand:<sf_bf16> 2 "register_operand" "v")
+ (match_operand:<sf_bf16> 3 "register_operand" "v")]
+ UNSPEC_VDPBF16PS)
+ (match_dup 1)
+ (match_operand:<avx512fmaskhalfmode> 4 "register_operand" "Yk")))]
+ "TARGET_AVX512BF16"
+ "vdpbf16ps\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3}")
(const_int 1))
(match_operand:SI 3 "const48_operand")]
UNSPEC_EMBEDDED_ROUNDING))])
+
+(define_subst_attr "maskz_half_name" "maskz_half" "" "_maskz_1")
+(define_subst_attr "maskz_half_operand4" "maskz_half" "" "%{%5%}%N4")
+
+(define_subst "maskz_half"
+ [(set (match_operand:SUBST_V 0)
+ (match_operand:SUBST_V 1))]
+ ""
+ [(set (match_dup 0)
+ (vec_merge:SUBST_V
+ (match_dup 1)
+ (match_operand:SUBST_V 2 "const0_operand" "C")
+ (match_operand:<avx512fmaskhalfmode> 3 "register_operand" "Yk")))])
-msse4a -m3dnow -m3dnowa -mpopcnt -mabm -mbmi -mtbm -mfma4 -mxop @gol
-madx -mlzcnt -mbmi2 -mfxsr -mxsave -mxsaveopt -mrtm -mhle -mlwp @gol
-mmwaitx -mclzero -mpku -mthreads -mgfni -mvaes -mwaitpkg @gol
--mshstk -mmanual-endbr -mforce-indirect-call -mavx512vbmi2 @gol
+-mshstk -mmanual-endbr -mforce-indirect-call -mavx512vbmi2 -mavx512bf16 @gol
-mvpclmulqdq -mavx512bitalg -mmovdiri -mmovdir64b -mavx512vpopcntdq @gol
-mavx5124fmaps -mavx512vnni -mavx5124vnniw -mprfchw -mrdpid @gol
-mrdseed -msgx @gol
@itemx -mavx512vbmi2
@opindex mavx512vbmi2
@need 200
+@itemx -mavx512bf16
+@opindex mavx512bf16
+@need 200
@itemx -mgfni
@opindex mgfni
@need 200
WBNOINVD, FMA4, PREFETCHW, RDPID, PREFETCHWT1, RDSEED, SGX, XOP, LWP,
3DNow!@:, enhanced 3DNow!@:, POPCNT, ABM, ADX, BMI, BMI2, LZCNT, FXSR, XSAVE,
XSAVEOPT, XSAVEC, XSAVES, RTM, HLE, TBM, MWAITX, CLZERO, PKU, AVX512VBMI2,
-GFNI, VAES, WAITPKG, VPCLMULQDQ, AVX512BITALG, MOVDIRI, MOVDIR64B,
+GFNI, VAES, WAITPKG, VPCLMULQDQ, AVX512BITALG, MOVDIRI, MOVDIR64B, AVX512BF16
AVX512VPOPCNTDQ, AVX5124FMAPS, AVX512VNNI, AVX5124VNNIW, or CLDEMOTE
extended instruction sets. Each has a corresponding @option{-mno-} option to
disable use of these instructions.
* gcc.dg/tree-ssa/pr90356-3.c: New test.
* gcc.dg/tree-ssa/pr90356-4.c: New test.
+2019-05-07 Wei Xiao <wei3.xiao@intel.com>
+
+ * gcc.target/i386/avx512bf16-vcvtne2ps2bf16-1.c: New test.
+ * gcc.target/i386/avx512bf16-vcvtneps2bf16-1.c: New test.
+ * gcc.target/i386/avx512bf16-vdpbf16ps-1.c: New test.
+ * gcc.target/i386/avx512bf16vl-vcvtne2ps2bf16-1.c: New test.
+ * gcc.target/i386/avx512bf16vl-vcvtneps2bf16-1.c: New test.
+ * gcc.target/i386/avx512bf16vl-vdpbf16ps-1.c: New test.
+ * gcc.target/i386/builtin_target.c: Handle avx512bf16.
+ * gcc.target/i386/sse-12.c: Add -mavx512bf16.
+ * gcc.target/i386/sse-13.c: Ditto.
+ * gcc.target/i386/sse-14.c: Ditto.
+ * gcc.target/i386/sse-22.c: Ditto.
+ * gcc.target/i386/sse-23.c: Ditto.
+ * g++.dg/other/i386-2.C: Ditto.
+ * g++.dg/other/i386-3.C: Ditto.
+
2019-05-07 Cherry Zhang <cherryyz@google.com>
* go.dg/arrayclear.go: New test.
/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
-/* { dg-options "-O -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd" } */
+/* { dg-options "-O -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16" } */
/* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h, fma4intrin.h,
xopintrin.h, abmintrin.h, bmiintrin.h, tbmintrin.h, lwpintrin.h,
/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
-/* { dg-options "-O -fkeep-inline-functions -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd" } */
+/* { dg-options "-O -fkeep-inline-functions -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16" } */
/* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h, fma4intrin.h,
xopintrin.h, abmintrin.h, bmiintrin.h, tbmintrin.h, lwpintrin.h,
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-mavx512bf16 -O2" } */
+/* { dg-final { scan-assembler-times "vcvtne2ps2bf16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtne2ps2bf16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 2 } } */
+/* { dg-final { scan-assembler-times "vcvtne2ps2bf16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512bh res;
+volatile __m512 x1, x2;
+volatile __mmask32 m32;
+
+void extern
+avx512bf16_test (void)
+{
+ res = _mm512_cvtne2ps_pbh (x1, x2);
+ res = _mm512_mask_cvtne2ps_pbh (res, m32, x1, x2);
+ res = _mm512_maskz_cvtne2ps_pbh (m32, x1, x2);
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-mavx512bf16 -O2" } */
+/* { dg-final { scan-assembler-times "vcvtneps2bf16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtneps2bf16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 2 } } */
+/* { dg-final { scan-assembler-times "vcvtneps2bf16\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256bh res;
+volatile __m512 x1;
+volatile __mmask16 m16;
+
+void extern
+avx512bf16_test (void)
+{
+ res = _mm512_cvtneps_pbh (x1);
+ res = _mm512_mask_cvtneps_pbh (res, m16, x1);
+ res = _mm512_maskz_cvtneps_pbh (m16, x1);
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-mavx512bf16 -O2" } */
+/* { dg-final { scan-assembler-times "vdpbf16ps\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\{\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vdpbf16ps\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 2 } } */
+/* { dg-final { scan-assembler-times "vdpbf16ps\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m512 res;
+volatile __m512bh x1, x2;
+volatile __mmask16 m16;
+
+void extern
+avx512bf16_test (void)
+{
+ res = _mm512_dpbf16_ps (res, x1, x2);
+ res = _mm512_mask_dpbf16_ps (res, m16, x1, x2);
+ res = _mm512_maskz_dpbf16_ps (m16, res, x1, x2);
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-mavx512bf16 -O2" } */
+/* { dg-final { scan-assembler-times "vdpbf16ps\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\{%k\[0-9\]\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */
+
+#include <immintrin.h>
+
+typedef union
+{
+ __m512 x;
+ float a[16];
+} union512s;
+
+float res_ref[16];
+union512s res;
+__m512bh x1, x2;
+__mmask16 m16;
+
+static void __attribute__((noinline, unused))
+merge_masking_s (float *arr, unsigned long long mask, int size)
+{
+ int i;
+ for (i = 0; i < size; i++)
+ {
+ arr[i] = (mask & (1LL << i)) ? arr[i] : 117;
+ }
+}
+
+static int __attribute__((noinline, unused))
+check_union512s (union512s u, const float *v)
+{
+ int i;
+ int err = 0;
+ for (i = 0; i < (sizeof (u.a) / sizeof ((u.a)[0])); i++)
+ if (u.a[i] != v[i])
+ {
+ err++;
+ ;
+ }
+ return err;
+}
+
+void extern
+avx512bf16_test (void)
+{
+ res.x = _mm512_mask_dpbf16_ps (res.x, m16, x1, x2);
+ merge_masking_s (res_ref, m16, 16);
+ if (check_union512s (res, res_ref))
+ abort ();
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-mavx512bf16 -mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vcvtne2ps2bf16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtne2ps2bf16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtne2ps2bf16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtne2ps2bf16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtne2ps2bf16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtne2ps2bf16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128bh res1;
+volatile __m256bh res2;
+volatile __m128 x1, x2;
+volatile __m256 x3, x4;
+volatile __mmask8 m8;
+volatile __mmask16 m16;
+
+void extern
+avx512bf16_test (void)
+{
+ res2 = _mm256_cvtne2ps_pbh (x3, x4);
+ res2 = _mm256_mask_cvtne2ps_pbh (res2, m16, x3, x4);
+ res2 = _mm256_maskz_cvtne2ps_pbh (m16, x3, x4);
+
+ res1 = _mm_cvtne2ps_pbh (x1, x2);
+ res1 = _mm_mask_cvtne2ps_pbh (res1, m8, x1, x2);
+ res1 = _mm_maskz_cvtne2ps_pbh (m8, x1, x2);
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-mavx512bf16 -mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vcvtneps2bf16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtneps2bf16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtneps2bf16\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtneps2bf16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtneps2bf16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtneps2bf16\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128bh res1, res2;
+volatile __m128 x1;
+volatile __m256 x2;
+volatile __mmask8 m8;
+
+void extern
+avx512bf16_test (void)
+{
+ res2 = _mm256_cvtneps_pbh (x2);
+ res2 = _mm256_mask_cvtneps_pbh (res2, m8, x2);
+ res2 = _mm256_maskz_cvtneps_pbh (m8, x2);
+
+ res1 = _mm_cvtneps_pbh (x1);
+ res1 = _mm_mask_cvtneps_pbh (res1, m8, x1);
+ res1 = _mm_maskz_cvtneps_pbh (m8, x1);
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-mavx512bf16 -mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vdpbf16ps\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\{\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vdpbf16ps\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\{%k\[0-9\]\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 2 } } */
+/* { dg-final { scan-assembler-times "vdpbf16ps\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vdpbf16ps\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vdpbf16ps\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[0-9\]\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 2 } } */
+/* { dg-final { scan-assembler-times "vdpbf16ps\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[0-9\]\}\{z\}\[^\n\r]*(?:\n|\[ \\t\]+#)" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m256 res1;
+volatile __m256bh x1, x2;
+volatile __m128 res2;
+volatile __m128bh x3, x4;
+volatile __mmask8 m8;
+
+void extern
+avx512bf16_test (void)
+{
+ res1 = _mm256_dpbf16_ps (res1, x1, x2);
+ res1 = _mm256_mask_dpbf16_ps (res1, m8, x1, x2);
+ res1 = _mm256_maskz_dpbf16_ps (m8, res1, x1, x2);
+
+ res2 = _mm_dpbf16_ps (res2, x3, x4);
+ res2 = _mm_mask_dpbf16_ps (res2, m8, x3, x4);
+ res2 = _mm_maskz_dpbf16_ps (m8, res2, x3, x4);
+}
assert (__builtin_cpu_supports ("avx5124vnniw"));
if (edx & bit_AVX5124FMAPS)
assert (__builtin_cpu_supports ("avx5124fmaps"));
+
+ __cpuid_count (7, 1, eax, ebx, ecx, edx);
+ if (eax & bit_AVX512BF16)
+ assert (__builtin_cpu_supports ("avx512bf16"));
}
/* Check cpuid level of extended features. */
popcntintrin.h gfniintrin.h and mm_malloc.h are usable
with -O -std=c89 -pedantic-errors. */
/* { dg-do compile } */
-/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512bw -mavx512dq -mavx512vl -mavx512vbmi -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd" } */
+/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512bw -mavx512dq -mavx512vl -mavx512vbmi -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16" } */
#include <x86intrin.h>
/* { dg-do compile } */
-/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512vl -mavx512dq -mavx512bw -mavx512vbmi -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd" } */
+/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512vl -mavx512dq -mavx512bw -mavx512vbmi -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16" } */
/* { dg-add-options bind_pic_locally } */
#include <mm_malloc.h>
/* { dg-do compile } */
-/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mpconfig -mwbnoinvd" } */
+/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mpconfig -mwbnoinvd -mavx512vl -mavx512bf16" } */
/* { dg-add-options bind_pic_locally } */
#include <mm_malloc.h>
#ifndef DIFFERENT_PRAGMAS
-#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,avx512vl,avx512bw,avx512dq,avx512vbmi,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni,avx512bitalg")
+#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,avx512vl,avx512bw,avx512dq,avx512vbmi,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni,avx512bitalg,avx512bf16")
#endif
/* Following intrinsics require immediate arguments. They
/* immintrin.h (AVX/AVX2/RDRND/FSGSBASE/F16C/RTM/AVX512F/SHA) */
#ifdef DIFFERENT_PRAGMAS
-#pragma GCC target ("avx,avx2,rdrnd,fsgsbase,f16c,rtm,avx512f,avx512er,avx512cd,avx512pf,sha,avx512vl,avx512bw,avx512dq,avx512ifma,avx512vbmi,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni,avx512bitalg")
+#pragma GCC target ("avx,avx2,rdrnd,fsgsbase,f16c,rtm,avx512f,avx512er,avx512cd,avx512pf,sha,avx512vl,avx512bw,avx512dq,avx512ifma,avx512vbmi,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni,avx512bitalg,avx512bf16")
#endif
#include <immintrin.h>
test_1 (_cvtss_sh, unsigned short, float, 1)
#define __builtin_ia32_vpclmulqdq_v2di(A, B, C) __builtin_ia32_vpclmulqdq_v2di(A, B, 1)
#define __builtin_ia32_vpclmulqdq_v8di(A, B, C) __builtin_ia32_vpclmulqdq_v8di(A, B, 1)
-#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,fma,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,xsavec,xsaves,clflushopt,avx512bw,avx512dq,avx512vl,avx512vbmi,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,clwb,mwaitx,clzero,pku,sgx,rdpid,gfni,avx512vbmi2,vpclmulqdq,avx512bitalg,pconfig,wbnoinvd")
+#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,fma,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,xsavec,xsaves,clflushopt,avx512bw,avx512dq,avx512vl,avx512vbmi,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,clwb,mwaitx,clzero,pku,sgx,rdpid,gfni,avx512vbmi2,vpclmulqdq,avx512bitalg,pconfig,wbnoinvd,avx512bf16")
#include <x86intrin.h>
+2019-05-07 Hongtao Liu <hongtao.liu@intel.com>
+
+ * config/i386/cpuinfo.c (get_available_features): Detect BF16.
+ * config/i386/cpuinfo.h (enum processor_features): Add
+ FEATURE_AVX512BF16.
+
2019-04-23 Ramana Radhakrishnan <ramana.radhakrishnan@arm.com>
Bernd Edlinger <bernd.edlinger@hotmail.de>
Jakub Jelinek <jakub@redhat.com>
set_feature (FEATURE_FMA);
}
- /* Get Advanced Features at level 7 (eax = 7, ecx = 0). */
+ /* Get Advanced Features at level 7 (eax = 7, ecx = 0/1). */
if (max_cpuid_level >= 7)
{
__cpuid_count (7, 0, eax, ebx, ecx, edx);
set_feature (FEATURE_AVX5124VNNIW);
if (edx & bit_AVX5124FMAPS)
set_feature (FEATURE_AVX5124FMAPS);
+
+ __cpuid_count (7, 1, eax, ebx, ecx, edx);
+ if (eax & bit_AVX512BF16)
+ set_feature (FEATURE_AVX512BF16);
}
}
FEATURE_GFNI,
FEATURE_VPCLMULQDQ,
FEATURE_AVX512VNNI,
- FEATURE_AVX512BITALG
+ FEATURE_AVX512BITALG,
+ FEATURE_AVX512BF16
};
extern struct __processor_model