+2017-12-22 Julia Koval <julia.koval@intel.com>
+ Sebastian Peryt <sebastian.peryt@intel.com>
+
+ * common/config/i386/i386-common.c (OPTION_MASK_ISA_AVX512BITALG_SET,
+ OPTION_MASK_ISA_AVX512BITALG_UNSET): New.
+ (ix86_handle_option): Handle -mavx512bitalg, fix 4VNNIW formatting.
+ * config.gcc: Add avx512vpopcntdqvlintrin.h and avx512bitalgintrin.h.
+ * config/i386/avx512bitalgintrin.h (_mm512_popcnt_epi8, _mm512_popcnt_epi16,
+ _mm512_mask_popcnt_epi8, _mm512_maskz_popcnt_epi8, _mm512_mask_popcnt_epi16,
+ _mm512_maskz_popcnt_epi16, _mm512_bitshuffle_epi64_mask, _mm256_popcnt_epi8,
+ _mm512_mask_bitshuffle_epi64_mask, _mm256_mask_popcnt_epi8, _mm_popcnt_epi8,
+ _mm256_maskz_popcnt_epi8, _mm_bitshuffle_epi64_mask, _mm256_popcnt_epi16,
+ _mm_mask_bitshuffle_epi64_mask, _mm256_bitshuffle_epi64_mask,
+ _mm256_mask_bitshuffle_epi64_mask, _mm_popcnt_epi16, _mm_maskz_popcnt_epi8,
+ _mm256_mask_popcnt_epi16, _mm256_maskz_popcnt_epi16, _mm_mask_popcnt_epi8,
+ _mm_mask_popcnt_epi16, _mm_maskz_popcnt_epi16): New intrinsics.
+ * config/i386/avx512vpopcntdqvlintrin.h (_mm_popcnt_epi32, _mm_popcnt_epi64,
+ _mm_mask_popcnt_epi32, _mm_maskz_popcnt_epi32, _mm256_popcnt_epi32,
+ _mm256_mask_popcnt_epi32, _mm256_maskz_popcnt_epi32, _mm_mask_popcnt_epi64,
+ _mm_maskz_popcnt_epi64, _mm256_popcnt_epi64, _mm256_mask_popcnt_epi64,
+ _mm256_maskz_popcnt_epi64): New intrinsics.
+ * config/i386/cpuid.h (bit_AVX512BITALG): New bit.
+ * config/i386/driver-i386.c (host_detect_local_cpu): Detect -mavx512bitalg.
+ * config/i386/i386-builtin-types.def (V64QI_FTYPE_V64QI, V64QI_FTYPE_V64QI,
+ V4DI_FTYPE_V4DI, UHI_FTYPE_V2DI_V2DI_UHI, USI_FTYPE_V4DI_V4DI_USI,
+ V4SI_FTYPE_V4SI_V4SI_UHI, V8SI_FTYPE_V8SI_V8SI_UHI): New types.
+ * config/i386/i386-builtin.def (__builtin_ia32_vpopcountq_v4di,
+ __builtin_ia32_vpopcountq_v4di_mask, __builtin_ia32_vpopcountq_v2di,
+ __builtin_ia32_vpopcountq_v2di_mask, __builtin_ia32_vpopcountd_v4si,
+ __builtin_ia32_vpopcountd_v4si_mask, __builtin_ia32_vpopcountd_v8si,
+ __builtin_ia32_vpopcountd_v8si_mask, __builtin_ia32_vpopcountb_v64qi,
+ __builtin_ia32_vpopcountb_v64qi_mask, __builtin_ia32_vpopcountb_v32qi,
+ __builtin_ia32_vpopcountb_v32qi_mask, __builtin_ia32_vpopcountb_v16qi,
+ __builtin_ia32_vpopcountb_v16qi_mask, __builtin_ia32_vpopcountw_v32hi,
+ __builtin_ia32_vpopcountw_v32hi_mask, __builtin_ia32_vpopcountw_v16hi,
+ __builtin_ia32_vpopcountw_v16hi_mask, __builtin_ia32_vpopcountw_v8hi,
+ __builtin_ia32_vpopcountw_v8hi_mask, __builtin_ia32_vpshufbitqmb128_mask,
+ __builtin_ia32_vpshufbitqmb256_mask,
+ __builtin_ia32_vpshufbitqmb512_mask): New builtins.
+ * config/i386/i386-c.c (__AVX512BITALG__): New.
+ * config/i386/i386.c (isa2_opts): Add -mavx512bitalg.
+ (ix86_valid_target_attribute_inner_p): Ditto.
+ (ix86_expand_args_builtin): Handle new types.
+ * config/i386/i386.h (TARGET_AVX512BITALG, TARGET_AVX512BITALG_P): New.
+ * config/i386/i386.opt: Add -mavx512bitalg.
+ * config/i386/immintrin.h: Add avx512vpopcntdqvlintrin.h and
+ avx512bitalgintrin.h.
+ * config/i386/sse.md (VI48_AVX512VLBW): New iterator.
+ (vpopcount<mode><mask_name>): Add more types.
+ (avx512vl_vpshufbitqmb<mode><mask_scalar_merge_name>): New.
+ * doc/invoke.texi: Add -mavx512bitalg and -mavx512vpopcntdq.
+
2017-12-22 Igor Tsimbalist <igor.v.tsimbalist@intel.com>
* common/config/i386/i386-common.c (OPTION_MASK_ISA_AVX512VNNI_SET):
#define OPTION_MASK_ISA_AVX512VNNI_SET \
(OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512F_SET)
#define OPTION_MASK_ISA_AVX512VPOPCNTDQ_SET OPTION_MASK_ISA_AVX512VPOPCNTDQ
+#define OPTION_MASK_ISA_AVX512BITALG_SET OPTION_MASK_ISA_AVX512BITALG
#define OPTION_MASK_ISA_RTM_SET OPTION_MASK_ISA_RTM
#define OPTION_MASK_ISA_PRFCHW_SET OPTION_MASK_ISA_PRFCHW
#define OPTION_MASK_ISA_RDSEED_SET OPTION_MASK_ISA_RDSEED
#define OPTION_MASK_ISA_AVX512VBMI2_UNSET OPTION_MASK_ISA_AVX512VBMI2
#define OPTION_MASK_ISA_AVX512VNNI_UNSET OPTION_MASK_ISA_AVX512VNNI
#define OPTION_MASK_ISA_AVX512VPOPCNTDQ_UNSET OPTION_MASK_ISA_AVX512VPOPCNTDQ
+#define OPTION_MASK_ISA_AVX512BITALG_UNSET OPTION_MASK_ISA_AVX512BITALG
#define OPTION_MASK_ISA_RTM_UNSET OPTION_MASK_ISA_RTM
#define OPTION_MASK_ISA_PRFCHW_UNSET OPTION_MASK_ISA_PRFCHW
#define OPTION_MASK_ISA_RDSEED_UNSET OPTION_MASK_ISA_RDSEED
/* Turn off additional isa flags. */
opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA_AVX5124FMAPS_UNSET;
- opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA_AVX5124FMAPS_UNSET;
+ opts->x_ix86_isa_flags2_explicit
+ |= OPTION_MASK_ISA_AVX5124FMAPS_UNSET;
opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA_AVX5124VNNIW_UNSET;
- opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA_AVX5124VNNIW_UNSET;
+ opts->x_ix86_isa_flags2_explicit
+ |= OPTION_MASK_ISA_AVX5124VNNIW_UNSET;
opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA_AVX512VPOPCNTDQ_UNSET;
- opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA_AVX512VPOPCNTDQ_UNSET;
+ opts->x_ix86_isa_flags2_explicit
+ |= OPTION_MASK_ISA_AVX512VPOPCNTDQ_UNSET;
+ opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA_AVX512BITALG_UNSET;
+ opts->x_ix86_isa_flags2_explicit
+ |= OPTION_MASK_ISA_AVX512BITALG_UNSET;
}
return true;
}
return true;
+ case OPT_mavx512bitalg:
+ if (value)
+ {
+ opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_AVX512BITALG_SET;
+ opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA_AVX512BITALG_SET;
+ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512F_SET;
+ opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX512F_SET;
+ }
+ else
+ {
+ opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA_AVX512BITALG_UNSET;
+ opts->x_ix86_isa_flags2_explicit
+ |= OPTION_MASK_ISA_AVX512BITALG_UNSET;
+ }
+ return true;
+
case OPT_msgx:
if (value)
{
gfniintrin.h cet.h avx512vbmi2intrin.h
avx512vbmi2vlintrin.h avx512vnniintrin.h
avx512vnnivlintrin.h gfniintrin.h vaesintrin.h
- vpclmulqdqintrin.h"
+ vpclmulqdqintrin.h avx512vpopcntdqvlintrin.h
+ avx512bitalgintrin.h"
;;
x86_64-*-*)
cpu_type=i386
gfniintrin.h cet.h avx512vbmi2intrin.h
avx512vbmi2vlintrin.h avx512vnniintrin.h
avx512vnnivlintrin.h gfniintrin.h vaesintrin.h
- vpclmulqdqintrin.h"
+ vpclmulqdqintrin.h gfniintrin.h
+ avx512vpopcntdqvlintrin.h avx512bitalgintrin.h"
;;
ia64-*-*)
extra_headers=ia64intrin.h
--- /dev/null
+/* Copyright (C) 2017 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#if !defined _IMMINTRIN_H_INCLUDED
+# error "Never use <avx512bitalgintrin.h> directly; include <x86intrin.h> instead."
+#endif
+
+#ifndef _AVX512BITALGINTRIN_H_INCLUDED
+#define _AVX512BITALGINTRIN_H_INCLUDED
+
+#ifndef __AVX512BITALG__
+#pragma GCC push_options
+#pragma GCC target("avx512bitalg")
+#define __DISABLE_AVX512BITALG__
+#endif /* __AVX512BITALG__ */
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_popcnt_epi8 (__m512i __A)
+{
+ return (__m512i) __builtin_ia32_vpopcountb_v64qi ((__v64qi) __A);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_popcnt_epi16 (__m512i __A)
+{
+ return (__m512i) __builtin_ia32_vpopcountw_v32hi ((__v32hi) __A);
+}
+
+#ifdef __DISABLE_AVX512BITALG__
+#undef __DISABLE_AVX512BITALG__
+#pragma GCC pop_options
+#endif /* __DISABLE_AVX512BITALG__ */
+
+#if !defined(__AVX512BITALG__) || !defined(__AVX512BW__)
+#pragma GCC push_options
+#pragma GCC target("avx512bitalg,avx512bw")
+#define __DISABLE_AVX512BITALGBW__
+#endif /* __AVX512VLBW__ */
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_popcnt_epi8 (__m512i __A, __mmask64 __U, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_vpopcountb_v64qi_mask ((__v64qi) __A,
+ (__v64qi) __B,
+ (__mmask64) __U);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_popcnt_epi8 (__mmask64 __U, __m512i __A)
+{
+ return (__m512i) __builtin_ia32_vpopcountb_v64qi_mask ((__v64qi) __A,
+ (__v64qi)
+ _mm512_setzero_si512 (),
+ (__mmask64) __U);
+}
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_popcnt_epi16 (__m512i __A, __mmask32 __U, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_vpopcountw_v32hi_mask ((__v32hi) __A,
+ (__v32hi) __B,
+ (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_popcnt_epi16 (__mmask32 __U, __m512i __A)
+{
+ return (__m512i) __builtin_ia32_vpopcountw_v32hi_mask ((__v32hi) __A,
+ (__v32hi)
+ _mm512_setzero_si512 (),
+ (__mmask32) __U);
+}
+
+extern __inline __mmask64
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_bitshuffle_epi64_mask (__m512i __A, __m512i __B)
+{
+ return (__mmask64) __builtin_ia32_vpshufbitqmb512_mask ((__v8di) __A,
+ (__v8di) __B,
+ (__mmask64) -1);
+}
+
+extern __inline __mmask64
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_bitshuffle_epi64_mask (__mmask8 __M, __m512i __A, __m512i __B)
+{
+ return (__mmask64) __builtin_ia32_vpshufbitqmb512_mask ((__v8di) __A,
+ (__v8di) __B,
+ (__mmask64) __M);
+}
+
+#ifdef __DISABLE_AVX512BITALGBW__
+#undef __DISABLE_AVX512BITALGBW__
+#pragma GCC pop_options
+#endif /* __DISABLE_AVX512BITALGBW__ */
+
+#if !defined(__AVX512BITALG__) || !defined(__AVX512VL__) || !defined(__AVX512BW__)
+#pragma GCC push_options
+#pragma GCC target("avx512bitalg,avx512vl,avx512bw")
+#define __DISABLE_AVX512BITALGVLBW__
+#endif /* __AVX512VLBW__ */
+
+extern __inline __m256i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_popcnt_epi8 (__m256i __A, __mmask32 __U, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_vpopcountb_v32qi_mask ((__v32qi) __A,
+ (__v32qi) __B,
+ (__mmask32) __U);
+}
+
+extern __inline __m256i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_popcnt_epi8 (__mmask32 __U, __m256i __A)
+{
+ return (__m256i) __builtin_ia32_vpopcountb_v32qi_mask ((__v32qi) __A,
+ (__v32qi)
+ _mm256_setzero_si256 (),
+ (__mmask32) __U);
+}
+extern __inline __mmask16
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_bitshuffle_epi64_mask (__m128i __A, __m128i __B)
+{
+ return (__mmask16) __builtin_ia32_vpshufbitqmb128_mask ((__v2di) __A,
+ (__v2di) __B,
+ (__mmask16) -1);
+}
+
+extern __inline __mmask16
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_bitshuffle_epi64_mask (__mmask16 __M, __m128i __A, __m128i __B)
+{
+ return (__mmask16) __builtin_ia32_vpshufbitqmb128_mask ((__v2di) __A,
+ (__v2di) __B,
+ (__mmask16) __M);
+}
+
+extern __inline __mmask32
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_bitshuffle_epi64_mask (__m256i __A, __m256i __B)
+{
+ return (__mmask32) __builtin_ia32_vpshufbitqmb256_mask ((__v4di) __A,
+ (__v4di) __B,
+ (__mmask32) -1);
+}
+
+extern __inline __mmask32
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_bitshuffle_epi64_mask (__mmask32 __M, __m256i __A, __m256i __B)
+{
+ return (__mmask32) __builtin_ia32_vpshufbitqmb256_mask ((__v4di) __A,
+ (__v4di) __B,
+ (__mmask32) __M);
+}
+
+#ifdef __DISABLE_AVX512BITALGVLBW__
+#undef __DISABLE_AVX512BITALGVLBW__
+#pragma GCC pop_options
+#endif /* __DISABLE_AVX512BITALGVLBW__ */
+
+
+#if !defined(__AVX512BITALG__) || !defined(__AVX512VL__)
+#pragma GCC push_options
+#pragma GCC target("avx512bitalg,avx512vl")
+#define __DISABLE_AVX512BITALGVL__
+#endif /* __AVX512VLBW__ */
+
+extern __inline __m256i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_popcnt_epi8 (__m256i __A)
+{
+ return (__m256i) __builtin_ia32_vpopcountb_v32qi ((__v32qi) __A);
+}
+
+extern __inline __m256i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_popcnt_epi16 (__m256i __A)
+{
+ return (__m256i) __builtin_ia32_vpopcountw_v16hi ((__v16hi) __A);
+}
+
+extern __inline __m128i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_popcnt_epi8 (__m128i __A)
+{
+ return (__m128i) __builtin_ia32_vpopcountb_v16qi ((__v16qi) __A);
+}
+
+extern __inline __m128i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_popcnt_epi16 (__m128i __A)
+{
+ return (__m128i) __builtin_ia32_vpopcountw_v8hi ((__v8hi) __A);
+}
+
+extern __inline __m256i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_popcnt_epi16 (__m256i __A, __mmask16 __U, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_vpopcountw_v16hi_mask ((__v16hi) __A,
+ (__v16hi) __B,
+ (__mmask16) __U);
+}
+
+extern __inline __m256i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_popcnt_epi16 (__mmask16 __U, __m256i __A)
+{
+ return (__m256i) __builtin_ia32_vpopcountw_v16hi_mask ((__v16hi) __A,
+ (__v16hi)
+ _mm256_setzero_si256 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m128i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_popcnt_epi8 (__m128i __A, __mmask16 __U, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpopcountb_v16qi_mask ((__v16qi) __A,
+ (__v16qi) __B,
+ (__mmask16) __U);
+}
+
+extern __inline __m128i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_popcnt_epi8 (__mmask16 __U, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_vpopcountb_v16qi_mask ((__v16qi) __A,
+ (__v16qi)
+ _mm_setzero_si128 (),
+ (__mmask16) __U);
+}
+extern __inline __m128i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_popcnt_epi16 (__m128i __A, __mmask8 __U, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpopcountw_v8hi_mask ((__v8hi) __A,
+ (__v8hi) __B,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_popcnt_epi16 (__mmask8 __U, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_vpopcountw_v8hi_mask ((__v8hi) __A,
+ (__v8hi)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+#ifdef __DISABLE_AVX512BITALGVL__
+#undef __DISABLE_AVX512BITALGVL__
+#pragma GCC pop_options
+#endif /* __DISABLE_AVX512BITALGBW__ */
+
+#endif /* _AVX512BITALGINTRIN_H_INCLUDED */
--- /dev/null
+/* Copyright (C) 2017 Free Software Foundation, Inc.
+
+ This file is part of GCC.
+
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#if !defined _IMMINTRIN_H_INCLUDED
+# error "Never use <avx512vpopcntdqvlintrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef _AVX512VPOPCNTDQVLINTRIN_H_INCLUDED
+#define _AVX512VPOPCNTDQVLINTRIN_H_INCLUDED
+
+#if !defined(__AVX512VPOPCNTDQ__) || !defined(__AVX512VL__)
+#pragma GCC push_options
+#pragma GCC target("avx512vpopcntdq,avx512vl")
+#define __DISABLE_AVX512VPOPCNTDQVL__
+#endif /* __AVX512VPOPCNTDQVL__ */
+
+extern __inline __m128i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_popcnt_epi32 (__m128i __A)
+{
+ return (__m128i) __builtin_ia32_vpopcountd_v4si ((__v4si) __A);
+}
+
+extern __inline __m128i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_popcnt_epi32 (__m128i __A, __mmask16 __U, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpopcountd_v4si_mask ((__v4si) __A,
+ (__v4si) __B,
+ (__mmask16) __U);
+}
+
+extern __inline __m128i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_popcnt_epi32 (__mmask16 __U, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_vpopcountd_v4si_mask ((__v4si) __A,
+ (__v4si)
+ _mm_setzero_si128 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m256i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_popcnt_epi32 (__m256i __A)
+{
+ return (__m256i) __builtin_ia32_vpopcountd_v8si ((__v8si) __A);
+}
+
+extern __inline __m256i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_popcnt_epi32 (__m256i __A, __mmask16 __U, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_vpopcountd_v8si_mask ((__v8si) __A,
+ (__v8si) __B,
+ (__mmask16) __U);
+}
+
+extern __inline __m256i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_popcnt_epi32 (__mmask16 __U, __m256i __A)
+{
+ return (__m256i) __builtin_ia32_vpopcountd_v8si_mask ((__v8si) __A,
+ (__v8si)
+ _mm256_setzero_si256 (),
+ (__mmask16) __U);
+}
+
+extern __inline __m128i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_popcnt_epi64 (__m128i __A)
+{
+ return (__m128i) __builtin_ia32_vpopcountq_v2di ((__v2di) __A);
+}
+
+extern __inline __m128i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_popcnt_epi64 (__m128i __A, __mmask8 __U, __m128i __B)
+{
+ return (__m128i) __builtin_ia32_vpopcountq_v2di_mask ((__v2di) __A,
+ (__v2di) __B,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_popcnt_epi64 (__mmask8 __U, __m128i __A)
+{
+ return (__m128i) __builtin_ia32_vpopcountq_v2di_mask ((__v2di) __A,
+ (__v2di)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_popcnt_epi64 (__m256i __A)
+{
+ return (__m256i) __builtin_ia32_vpopcountq_v4di ((__v4di) __A);
+}
+
+extern __inline __m256i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_popcnt_epi64 (__m256i __A, __mmask8 __U, __m256i __B)
+{
+ return (__m256i) __builtin_ia32_vpopcountq_v4di_mask ((__v4di) __A,
+ (__v4di) __B,
+ (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_popcnt_epi64 (__mmask8 __U, __m256i __A)
+{
+ return (__m256i) __builtin_ia32_vpopcountq_v4di_mask ((__v4di) __A,
+ (__v4di)
+ _mm256_setzero_si256 (),
+ (__mmask8) __U);
+}
+
+#ifdef __DISABLE_AVX512VPOPCNTDQVL__
+#undef __DISABLE_AVX512VPOPCNTDQVL__
+#pragma GCC pop_options
+#endif /* __DISABLE_AVX512VPOPCNTDQVL__ */
+
+#endif /* _AVX512VPOPCNTDQVLINTRIN_H_INCLUDED */
+
#define bit_VAES (1 << 9)
#define bit_AVX512VNNI (1 << 11)
#define bit_VPCLMULQDQ (1 << 10)
+#define bit_AVX512BITALG (1 << 12)
#define bit_AVX512VPOPCNTDQ (1 << 14)
#define bit_RDPID (1 << 22)
unsigned int has_mwaitx = 0, has_clzero = 0, has_pku = 0, has_rdpid = 0;
unsigned int has_avx5124fmaps = 0, has_avx5124vnniw = 0;
unsigned int has_gfni = 0, has_avx512vbmi2 = 0;
+ unsigned int has_avx512bitalg = 0;
unsigned int has_ibt = 0, has_shstk = 0;
unsigned int has_avx512vnni = 0, has_vaes = 0;
unsigned int has_vpclmulqdq = 0;
has_gfni = ecx & bit_GFNI;
has_vaes = ecx & bit_VAES;
has_vpclmulqdq = ecx & bit_VPCLMULQDQ;
+ has_avx512bitalg = ecx & bit_AVX512BITALG;
has_avx5124vnniw = edx & bit_AVX5124VNNIW;
has_avx5124fmaps = edx & bit_AVX5124FMAPS;
const char *shstk = has_shstk ? " -mshstk" : " -mno-shstk";
const char *vaes = has_vaes ? " -mvaes" : " -mno-vaes";
const char *vpclmulqdq = has_vpclmulqdq ? " -mvpclmulqdq" : " -mno-vpclmulqdq";
+ const char *avx512bitalg = has_avx512bitalg ? " -mavx512bitalg" : " -mno-avx512bitalg";
options = concat (options, mmx, mmx3dnow, sse, sse2, sse3, ssse3,
sse4a, cx16, sahf, movbe, aes, sha, pclmul,
popcnt, abm, lwp, fma, fma4, xop, bmi, sgx, bmi2,
xsavec, xsaves, avx512dq, avx512bw, avx512vl,
avx512ifma, avx512vbmi, avx5124fmaps, avx5124vnniw,
clwb, mwaitx, clzero, pku, rdpid, gfni, ibt, shstk,
- avx512vbmi2, avx512vnni, vaes, vpclmulqdq, NULL);
+ avx512vbmi2, avx512vnni, vaes, vpclmulqdq,
+ avx512bitalg, NULL);
}
done:
DEF_FUNCTION_TYPE (V16SI, V8SI)
DEF_FUNCTION_TYPE (V16SI, V16SF)
DEF_FUNCTION_TYPE (V16SI, V16SI)
+DEF_FUNCTION_TYPE (V32HI, V32HI)
+DEF_FUNCTION_TYPE (V64QI, V64QI)
DEF_FUNCTION_TYPE (V16SI, V16SI, V16SI, UHI)
DEF_FUNCTION_TYPE (V8DI, V8DI, V8DI, UQI)
DEF_FUNCTION_TYPE (V8DI, PV8DI)
DEF_FUNCTION_TYPE (V16SI, V16SI, V16SI, V16SI)
DEF_FUNCTION_TYPE (V16SI, V16SI, V16SI, V16SI, INT)
DEF_FUNCTION_TYPE (V2DI, V2DI, V2DI, V2DI, INT)
+
+# BITALG builtins
+DEF_FUNCTION_TYPE (UHI, V2DI, V2DI, UHI)
+DEF_FUNCTION_TYPE (USI, V4DI, V4DI, USI)
+DEF_FUNCTION_TYPE (V4DI, V4DI)
+DEF_FUNCTION_TYPE (V4SI, V4SI, V4SI, UHI)
+DEF_FUNCTION_TYPE (V8SI, V8SI, V8SI, UHI)
BDESC (OPTION_MASK_ISA_AVX512VPOPCNTDQ, CODE_FOR_vpopcountv8di, "__builtin_ia32_vpopcountq_v8di", IX86_BUILTIN_VPOPCOUNTQV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI)
BDESC (OPTION_MASK_ISA_AVX512VPOPCNTDQ, CODE_FOR_vpopcountv8di_mask, "__builtin_ia32_vpopcountq_v8di_mask", IX86_BUILTIN_VPOPCOUNTQV8DI_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512VPOPCNTDQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpopcountv4di, "__builtin_ia32_vpopcountq_v4di", IX86_BUILTIN_VPOPCOUNTQV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI)
+BDESC (OPTION_MASK_ISA_AVX512VPOPCNTDQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpopcountv4di_mask, "__builtin_ia32_vpopcountq_v4di_mask", IX86_BUILTIN_VPOPCOUNTQV4DI_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512VPOPCNTDQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpopcountv2di, "__builtin_ia32_vpopcountq_v2di", IX86_BUILTIN_VPOPCOUNTQV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI)
+BDESC (OPTION_MASK_ISA_AVX512VPOPCNTDQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpopcountv2di_mask, "__builtin_ia32_vpopcountq_v2di_mask", IX86_BUILTIN_VPOPCOUNTQV2DI_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512VPOPCNTDQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpopcountv4si, "__builtin_ia32_vpopcountd_v4si", IX86_BUILTIN_VPOPCOUNTDV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI)
+BDESC (OPTION_MASK_ISA_AVX512VPOPCNTDQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpopcountv4si_mask, "__builtin_ia32_vpopcountd_v4si_mask", IX86_BUILTIN_VPOPCOUNTDV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512VPOPCNTDQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpopcountv8si, "__builtin_ia32_vpopcountd_v8si", IX86_BUILTIN_VPOPCOUNTDV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI)
+BDESC (OPTION_MASK_ISA_AVX512VPOPCNTDQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpopcountv8si_mask, "__builtin_ia32_vpopcountd_v8si_mask", IX86_BUILTIN_VPOPCOUNTDV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_UHI)
+
+
/* RDPID */
BDESC (OPTION_MASK_ISA_RDPID, CODE_FOR_rdpid, "__builtin_ia32_rdpid", IX86_BUILTIN_RDPID, UNKNOWN, (int) UNSIGNED_FTYPE_VOID)
BDESC (OPTION_MASK_ISA_VAES, CODE_FOR_vaesenclast_v32qi, "__builtin_ia32_vaesenclast_v32qi", IX86_BUILTIN_VAESENCLAST32, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI)
BDESC (OPTION_MASK_ISA_VAES, CODE_FOR_vaesenclast_v64qi, "__builtin_ia32_vaesenclast_v64qi", IX86_BUILTIN_VAESENCLAST64, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI)
+/* BITALG */
+BDESC (OPTION_MASK_ISA_AVX512BITALG, CODE_FOR_vpopcountv64qi, "__builtin_ia32_vpopcountb_v64qi", IX86_BUILTIN_VPOPCOUNTBV64QI, UNKNOWN, (int) V64QI_FTYPE_V64QI)
+BDESC (OPTION_MASK_ISA_AVX512BITALG, CODE_FOR_vpopcountv64qi_mask, "__builtin_ia32_vpopcountb_v64qi_mask", IX86_BUILTIN_VPOPCOUNTBV64QI_MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512BITALG, CODE_FOR_vpopcountv32qi, "__builtin_ia32_vpopcountb_v32qi", IX86_BUILTIN_VPOPCOUNTBV32QI, UNKNOWN, (int) V32QI_FTYPE_V32QI)
+BDESC (OPTION_MASK_ISA_AVX512BITALG, CODE_FOR_vpopcountv32qi_mask, "__builtin_ia32_vpopcountb_v32qi_mask", IX86_BUILTIN_VPOPCOUNTBV32QI_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BITALG, CODE_FOR_vpopcountv16qi, "__builtin_ia32_vpopcountb_v16qi", IX86_BUILTIN_VPOPCOUNTBV16QI, UNKNOWN, (int) V16QI_FTYPE_V16QI)
+BDESC (OPTION_MASK_ISA_AVX512BITALG, CODE_FOR_vpopcountv16qi_mask, "__builtin_ia32_vpopcountb_v16qi_mask", IX86_BUILTIN_VPOPCOUNTBV16QI_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_UHI)
+
+BDESC (OPTION_MASK_ISA_AVX512BITALG, CODE_FOR_vpopcountv32hi, "__builtin_ia32_vpopcountw_v32hi", IX86_BUILTIN_VPOPCOUNTWV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI)
+BDESC (OPTION_MASK_ISA_AVX512BITALG, CODE_FOR_vpopcountv32hi_mask, "__builtin_ia32_vpopcountw_v32hi_mask", IX86_BUILTIN_VPOPCOUNTQV32HI_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BITALG, CODE_FOR_vpopcountv16hi, "__builtin_ia32_vpopcountw_v16hi", IX86_BUILTIN_VPOPCOUNTWV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI)
+BDESC (OPTION_MASK_ISA_AVX512BITALG, CODE_FOR_vpopcountv16hi_mask, "__builtin_ia32_vpopcountw_v16hi_mask", IX86_BUILTIN_VPOPCOUNTQV16HI_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512BITALG, CODE_FOR_vpopcountv8hi, "__builtin_ia32_vpopcountw_v8hi", IX86_BUILTIN_VPOPCOUNTWV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI)
+BDESC (OPTION_MASK_ISA_AVX512BITALG, CODE_FOR_vpopcountv8hi_mask, "__builtin_ia32_vpopcountw_v8hi_mask", IX86_BUILTIN_VPOPCOUNTQV8HI_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_UQI)
+
+BDESC (OPTION_MASK_ISA_AVX512BITALG | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpshufbitqmbv2di_mask, "__builtin_ia32_vpshufbitqmb128_mask", IX86_BUILTIN_VPSHUFBITQMB128_MASK, UNKNOWN, (int) UHI_FTYPE_V2DI_V2DI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512BITALG | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpshufbitqmbv4di_mask, "__builtin_ia32_vpshufbitqmb256_mask", IX86_BUILTIN_VPSHUFBITQMB256_MASK, UNKNOWN, (int) USI_FTYPE_V4DI_V4DI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BITALG, CODE_FOR_avx512vl_vpshufbitqmbv8di_mask, "__builtin_ia32_vpshufbitqmb512_mask", IX86_BUILTIN_VPSHUFBITQMB512_MASK, UNKNOWN, (int) UQI_FTYPE_V8DI_V8DI_UQI)
BDESC_END (ARGS2, MPX)
/* Builtins for MPX. */
def_or_undef (parse_in, "__SGX__");
if (isa_flag2 & OPTION_MASK_ISA_AVX5124FMAPS)
def_or_undef (parse_in, "__AVX5124FMAPS__");
+ if (isa_flag2 & OPTION_MASK_ISA_AVX512BITALG)
+ def_or_undef (parse_in, "__AVX512BITALG__");
if (isa_flag2 & OPTION_MASK_ISA_AVX512VPOPCNTDQ)
def_or_undef (parse_in, "__AVX512VPOPCNTDQ__");
if (isa_flag & OPTION_MASK_ISA_FMA)
{ "-mhle", OPTION_MASK_ISA_HLE },
{ "-mmovbe", OPTION_MASK_ISA_MOVBE },
{ "-mclzero", OPTION_MASK_ISA_CLZERO },
- { "-mmwaitx", OPTION_MASK_ISA_MWAITX }
+ { "-mmwaitx", OPTION_MASK_ISA_MWAITX },
+ { "-mavx512bitalg", OPTION_MASK_ISA_AVX512BITALG }
};
static struct ix86_target_opts isa_opts[] =
{
IX86_ATTR_ISA ("avx512vpopcntdq", OPT_mavx512vpopcntdq),
IX86_ATTR_ISA ("avx512vbmi2", OPT_mavx512vbmi2),
IX86_ATTR_ISA ("avx512vnni", OPT_mavx512vnni),
+ IX86_ATTR_ISA ("avx512bitalg", OPT_mavx512bitalg),
IX86_ATTR_ISA ("avx512vbmi", OPT_mavx512vbmi),
IX86_ATTR_ISA ("avx512ifma", OPT_mavx512ifma),
case V16SI_FTYPE_V4SI:
case V16SI_FTYPE_V16SF:
case V16SI_FTYPE_V16SI:
+ case V64QI_FTYPE_V64QI:
+ case V32HI_FTYPE_V32HI:
case V16SF_FTYPE_V16SF:
case V8DI_FTYPE_UQI:
case V8DI_FTYPE_V8DI:
case V8DF_FTYPE_V4DF:
case V8DF_FTYPE_V2DF:
case V8DF_FTYPE_V8DF:
+ case V4DI_FTYPE_V4DI:
nargs = 1;
break;
case V4SF_FTYPE_V4SF_VEC_MERGE:
case HI_FTYPE_V16SF_INT_UHI:
case QI_FTYPE_V8SF_INT_UQI:
case QI_FTYPE_V4SF_INT_UQI:
+ case UHI_FTYPE_V2DI_V2DI_UHI:
+ case USI_FTYPE_V4DI_V4DI_USI:
+ case V4SI_FTYPE_V4SI_V4SI_UHI:
+ case V8SI_FTYPE_V8SI_V8SI_UHI:
nargs = 3;
mask_pos = 1;
nargs_constant = 1;
#define TARGET_AVX512VPOPCNTDQ_P(x) TARGET_ISA_AVX512VPOPCNTDQ_P(x)
#define TARGET_AVX512VNNI TARGET_ISA_AVX512VNNI
#define TARGET_AVX512VNNI_P(x) TARGET_ISA_AVX512VNNI_P(x)
+#define TARGET_AVX512BITALG TARGET_ISA_AVX512BITALG
+#define TARGET_AVX512BITALG_P(x) TARGET_ISA_AVX512BITALG_P(x)
#define TARGET_FMA TARGET_ISA_FMA
#define TARGET_FMA_P(x) TARGET_ISA_FMA_P(x)
#define TARGET_SSE4A TARGET_ISA_SSE4A
Target Report Mask(ISA_AVX512VNNI) Var(ix86_isa_flags) Save
Support AVX512VNNI built-in functions and code generation.
+mavx512bitalg
+Target Report Mask(ISA_AVX512BITALG) Var(ix86_isa_flags2) Save
+Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2, AVX512F and AVX512BITALG built-in functions and code generation.
+
mfma
Target Report Mask(ISA_FMA) Var(ix86_isa_flags) Save
Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX and FMA built-in functions and code generation.
#include <avx512vnnivlintrin.h>
+#include <avx512vpopcntdqvlintrin.h>
+
+#include <avx512bitalgintrin.h>
+
#include <shaintrin.h>
#include <lzcntintrin.h>
;; For VPCLMULQDQ support
UNSPEC_VPCLMULQDQ
+
+ ;; For AVX512BITALG support
+ UNSPEC_VPSHUFBIT
])
(define_c_enum "unspecv" [
(V4SI "TARGET_AVX2") (V2DI "TARGET_AVX2")
(V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")])
+(define_mode_iterator VI48_AVX512VLBW
+ [(V8DI "TARGET_AVX512BW") (V4DI "TARGET_AVX512VL")
+ (V2DI "TARGET_AVX512VL")])
+
(define_mode_attr avx512
[(V16QI "avx512vl") (V32QI "avx512vl") (V64QI "avx512bw")
(V8HI "avx512vl") (V16HI "avx512vl") (V32HI "avx512bw")
(set_attr ("mode") ("TI"))])
(define_insn "vpopcount<mode><mask_name>"
- [(set (match_operand:VI48_512 0 "register_operand" "=v")
- (popcount:VI48_512
- (match_operand:VI48_512 1 "nonimmediate_operand" "vm")))]
+ [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
+ (popcount:VI48_AVX512VL
+ (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm")))]
"TARGET_AVX512VPOPCNTDQ"
"vpopcnt<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}")
"TARGET_SSE && TARGET_64BIT"
"jmp\t%P1")
+(define_insn "vpopcount<mode><mask_name>"
+ [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
+ (popcount:VI12_AVX512VL
+ (match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "vm")))]
+ "TARGET_AVX512BITALG"
+ "vpopcnt<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}")
+
(define_insn "vgf2p8affineinvqb_<mode><mask_name>"
[(set (match_operand:VI1_AVX512F 0 "register_operand" "=x,x,v")
(unspec:VI1_AVX512F [(match_operand:VI1_AVX512F 1 "register_operand" "%0,x,v")
"TARGET_VPCLMULQDQ"
"vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}"
[(set_attr "mode" "DI")])
+
+(define_insn "avx512vl_vpshufbitqmb<mode><mask_scalar_merge_name>"
+ [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
+ (unspec:<avx512fmaskmode>
+ [(match_operand:VI48_AVX512VLBW 1 "register_operand" "v")
+ (match_operand:VI48_AVX512VLBW 2 "nonimmediate_operand" "vm")]
+ UNSPEC_VPSHUFBIT))]
+ "TARGET_AVX512BITALG"
+ "vpshufbitqmb\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
+ [(set_attr "prefix" "evex")
+ (set_attr "mode" "<sseinsnmode>")])
-mlzcnt -mbmi2 -mfxsr -mxsave -mxsaveopt -mrtm -mlwp -mmpx @gol
-mmwaitx -mclzero -mpku -mthreads -mgfni -mvaes @gol
-mcet -mibt -mshstk -mforce-indirect-call -mavx512vbmi2 @gol
--mvpclmulqdq @gol
+-mvpclmulqdq -mavx512bitalg -mavx512vpopcntdq @gol
-mms-bitfields -mno-align-stringops -minline-all-stringops @gol
-minline-stringops-dynamically -mstringop-strategy=@var{alg} @gol
-mmemcpy-strategy=@var{strategy} -mmemset-strategy=@var{strategy} @gol
@need 200
@itemx -mvpclmulqdq
@opindex mvpclmulqdq
+@need 200
+@itemx -mavx512bitalg
+@opindex mavx512bitalg
+@need 200
+@itemx -mavx512vpopcntdq
+@opindex mavx512vpopcntdq
These switches enable the use of instructions in the MMX, SSE,
SSE2, SSE3, SSSE3, SSE4.1, AVX, AVX2, AVX512F, AVX512PF, AVX512ER, AVX512CD,
SHA, AES, PCLMUL, FSGSBASE, RDRND, F16C, FMA, SSE4A, FMA4, XOP, LWP, ABM,
AVX512VL, AVX512BW, AVX512DQ, AVX512IFMA, AVX512VBMI, BMI, BMI2, VAES,
FXSR, XSAVE, XSAVEOPT, LZCNT, RTM, MPX, MWAITX, PKU, IBT, SHSTK, AVX512VBMI2,
-GFNI, VPCLMULQDQ, 3DNow!@: or enhanced 3DNow!@: extended instruction sets.
+GFNI, VPCLMULQDQ, AVX512BITALG, AVX512VPOPCNTDQ3DNow!@: or enhanced 3DNow!@:
+extended instruction sets.
Each has a corresponding @option{-mno-} option to disable use of these
instructions.
+2017-12-22 Julia Koval <julia.koval@intel.com>
+ Sebastian Peryt <sebastian.peryt@intel.com>
+
+ * g++.dg/other/i386-2.C: Add new options.
+ * g++.dg/other/i386-3.C: Ditto.
+ * gcc.target/i386/sse-12.c: Ditto.
+ * gcc.target/i386/sse-13.c: Ditto.
+ * gcc.target/i386/sse-22.c: Ditto.
+ * gcc.target/i386/sse-23.c: Ditto.
+ * gcc.target/i386/avx512-check.h: Handle bit_AVX512BITALG.
+ * gcc.target/i386/avx512bitalg-vpopcntb-1.c: New.
+ * gcc.target/i386/avx512bitalg-vpopcntb.c: Ditto.
+ * gcc.target/i386/avx512bitalg-vpopcntbvl.c: Ditto.
+ * gcc.target/i386/avx512bitalg-vpopcntw-1.c: Ditto.
+ * gcc.target/i386/avx512bitalg-vpopcntw.c: Ditto.
+ * gcc.target/i386/avx512bitalg-vpopcntwvl.c: Ditto.
+ * gcc.target/i386/avx512bitalg-vpshufbitqmb-1.c: Ditto.
+ * gcc.target/i386/avx512bitalg-vpshufbitqmb.c: Ditto.
+ * gcc.target/i386/avx512bitalgvl-vpopcntb-1.c: Ditto.
+ * gcc.target/i386/avx512bitalgvl-vpopcntw-1.c: Ditto.
+ * gcc.target/i386/avx512bitalgvl-vpshufbitqmb-1.c: Ditto.
+ * gcc.target/i386/avx512vpopcntdqvl-vpopcntd-1.c: Ditto.
+ * gcc.target/i386/avx512vpopcntdqvl-vpopcntq-1.c: Ditto.
+ * gcc.target/i386/i386.exp (check_effective_target_avx512bitalg): New.
+ * gcc.target/i386/avx512vpopcntdq-vpopcntd-1.c: Add more types.
+ * gcc.target/i386/avx512vpopcntdq-vpopcntd.c: Handle new intrinsics.
+ * gcc.target/i386/avx512vpopcntdq-vpopcntq-1.c: Ditto.
+ * gcc.target/i386/avx512vpopcntdq-vpopcntq.c: Ditto.
+
2017-12-22 Mike Stump <mikestump@comcast.net>
Eric Botcazou <ebotcazou@adacore.com>
/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
-/* { dg-options "-O -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni" } */
-
+/* { dg-options "-O -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg" } */
/* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h, fma4intrin.h,
xopintrin.h, abmintrin.h, bmiintrin.h, tbmintrin.h, lwpintrin.h,
popcntintrin.h, fmaintrin.h, pkuintrin.h, avx5124fmapsintrin.h,
avx5124vnniwintrin.h, avx512vpopcntdqintrin.h gfniintrin.h
- and mm_malloc.h.h are usable with -O -pedantic-errors. */
+ avx512bitalgintrin.h and mm_malloc.h.h are usable with -O
+ -pedantic-errors. */
#include <x86intrin.h>
/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
-/* { dg-options "-O -fkeep-inline-functions -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni" } */
+/* { dg-options "-O -fkeep-inline-functions -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg" } */
/* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h, fma4intrin.h,
xopintrin.h, abmintrin.h, bmiintrin.h, tbmintrin.h, lwpintrin.h,
popcntintrin.h, fmaintrin.h, pkuintrin.h, avx5124fmapsintrin.h,
- avx5124vnniwintrin.h, avx512vpopcntdqintrin.h gfniintrin.h and
- mm_malloc.h are usable with -O -fkeep-inline-functions. */
+ avx5124vnniwintrin.h, avx512vpopcntdqintrin.h gfniintrin.h
+ avx512bitalgintrin.h and mm_malloc.h are usable with -O
+ -fkeep-inline-functions. */
#include <x86intrin.h>
#ifdef AVX512VPOPCNTDQ
&& (ecx & bit_AVX512VPOPCNTDQ)
#endif
+#ifdef AVX512BITALG
+ && (ecx & bit_AVX512BITALG)
+#endif
#ifdef GFNI
&& (ecx & bit_GFNI)
#endif
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bitalg" } */
+/* { dg-require-effective-target avx512bitalg } */
+
+#include "avx512f-helper.h"
+
+#define AVX512BITALG
+#define SIZE (AVX512F_LEN / 8)
+
+#include "avx512f-mask-type.h"
+
+#define TYPE char
+
+int
+CALC (TYPE v)
+{
+ int ret;
+ int i;
+
+ ret = 0;
+ for (i = 0; i < sizeof(v) * 8; i++)
+ if ((v & ((TYPE)1 << (TYPE) i)))
+ ret++;
+
+ return ret;
+}
+
+void
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, i_b) res1, res2, res3, src, src0;
+ MASK_TYPE mask = MASK_VALUE;
+ TYPE res_ref[SIZE];
+ src.x = INTRINSIC (_set1_epi8) (0x3D);
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ res_ref[i] = CALC (src.a[i]);
+ src0.a[i] = DEFAULT_VALUE;
+ }
+
+ res1.x = INTRINSIC (_popcnt_epi8) (src.x);
+ res2.x = INTRINSIC (_mask_popcnt_epi8) (src.x, mask, src0.x);
+ res3.x = INTRINSIC (_maskz_popcnt_epi8) (mask, src.x);
+
+ if (UNION_CHECK (AVX512F_LEN, i_b) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_b) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_b) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_b) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_b) (res3, res_ref))
+ abort ();
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512bitalg -mavx512bw" } */
+/* { dg-final { scan-assembler-times "vpopcntb\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vpopcntb\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vpopcntb\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+
+#include <x86intrin.h>
+
+extern __m512i z, z1;
+
+int foo ()
+{
+ __mmask16 msk;
+ __m512i c = _mm512_popcnt_epi8 (z);
+ asm volatile ("" : "+v" (c));
+ c = _mm512_mask_popcnt_epi8 (z, msk, z1);
+ asm volatile ("" : "+v" (c));
+ c = _mm512_maskz_popcnt_epi8 (msk, z);
+ asm volatile ("" : "+v" (c));
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512bitalg -mavx512bw -mavx512vl" } */
+/* { dg-final { scan-assembler-times "vpopcntb\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vpopcntb\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vpopcntb\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vpopcntb\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vpopcntb\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vpopcntb\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+
+#include <x86intrin.h>
+
+extern __m256i y, y_1;
+extern __m128i x, x_1;
+
+int foo ()
+{
+ __mmask32 msk32;
+ __mmask16 msk16;
+ __m256i c256 = _mm256_popcnt_epi8 (y);
+ asm volatile ("" : "+v" (c256));
+ c256 = _mm256_mask_popcnt_epi8 (y, msk32, y_1);
+ asm volatile ("" : "+v" (c256));
+ c256 = _mm256_maskz_popcnt_epi8 (msk32, y);
+ asm volatile ("" : "+v" (c256));
+ __m128i c128 = _mm_popcnt_epi8 (x);
+ asm volatile ("" : "+v" (c128));
+ c128 = _mm_mask_popcnt_epi8 (x, msk16, x_1);
+ asm volatile ("" : "+v" (c128));
+ c128 = _mm_maskz_popcnt_epi8 (msk16, x);
+ asm volatile ("" : "+v" (c128));
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bitalg" } */
+/* { dg-require-effective-target avx512bitalg } */
+
+#include "avx512f-helper.h"
+
+#define AVX512BITALG
+#define SIZE (AVX512F_LEN / 16)
+
+#include "avx512f-mask-type.h"
+
+#define TYPE short
+
+int
+CALC (TYPE v)
+{
+ int ret;
+ int i;
+
+ ret = 0;
+ for (i = 0; i < sizeof(v) * 8; i++)
+ if ((v & ((TYPE)1 << (TYPE) i)))
+ ret++;
+
+ return ret;
+}
+
+void
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, i_w) res1, res2, res3, src, src0;
+ MASK_TYPE mask = MASK_VALUE;
+ TYPE res_ref[SIZE];
+ src.x = INTRINSIC (_set1_epi8) (0x3D);
+ int i;
+
+ for (i = 0; i < SIZE; i++)
+ {
+ res_ref[i] = CALC (src.a[i]);
+ src0.a[i] = DEFAULT_VALUE;
+ }
+
+ res1.x = INTRINSIC (_popcnt_epi16) (src.x);
+ res2.x = INTRINSIC (_mask_popcnt_epi16) (src.x, mask, src0.x);
+ res3.x = INTRINSIC (_maskz_popcnt_epi16) (mask, src.x);
+
+ if (UNION_CHECK (AVX512F_LEN, i_w) (res1, res_ref))
+ abort ();
+
+ MASK_MERGE (i_w) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_w) (res2, res_ref))
+ abort ();
+
+ MASK_ZERO (i_w) (res_ref, mask, SIZE);
+ if (UNION_CHECK (AVX512F_LEN, i_w) (res3, res_ref))
+ abort ();
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512bitalg -mavx512bw" } */
+/* { dg-final { scan-assembler-times "vpopcntw\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vpopcntw\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vpopcntw\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+
+#include <x86intrin.h>
+
+extern __m512i z, z1;
+
+int foo ()
+{
+ __mmask16 msk;
+ __m512i c = _mm512_popcnt_epi16 (z);
+ asm volatile ("" : "+v" (c));
+ c = _mm512_mask_popcnt_epi16 (z, msk, z1);
+ asm volatile ("" : "+v" (c));
+ c = _mm512_maskz_popcnt_epi16 (msk, z);
+ asm volatile ("" : "+v" (c));
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512bitalg -mavx512bw -mavx512vl" } */
+/* { dg-final { scan-assembler-times "vpopcntw\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vpopcntw\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vpopcntw\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vpopcntw\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vpopcntw\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vpopcntw\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+
+#include <x86intrin.h>
+
+extern __m256i y, y_1;
+extern __m128i x, x_1;
+
+int foo ()
+{
+ __mmask16 msk16;
+ __mmask8 msk8;
+ __m256i c256 = _mm256_popcnt_epi16 (y);
+ asm volatile ("" : "+v" (c256));
+ c256 = _mm256_mask_popcnt_epi16 (y, msk16, y_1);
+ asm volatile ("" : "+v" (c256));
+ c256 = _mm256_maskz_popcnt_epi16 (msk16, y);
+ asm volatile ("" : "+v" (c256));
+ __m128i c128 = _mm_popcnt_epi16 (x);
+ asm volatile ("" : "+v" (c128));
+ c128 = _mm_mask_popcnt_epi16 (x, msk8, x_1);
+ asm volatile ("" : "+v" (c128));
+ c128 = _mm_maskz_popcnt_epi16 (msk8, x);
+ asm volatile ("" : "+v" (c128));
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512bitalg" } */
+/* { dg-require-effective-target avx512bitalg } */
+
+#include "avx512f-helper.h"
+
+#define AVX512BITALG
+#define SIZE (AVX512F_LEN / 8)
+
+#include "avx512f-mask-type.h"
+
+#define TYPE unsigned long long
+
+unsigned char
+CALC (TYPE a, TYPE b)
+{
+ unsigned char res = 0;
+ for (int i = 0; i < 8; i++)
+ {
+ unsigned char m = (b >> (64 - ((i+1)*8))) & 0x3F;
+ unsigned char bit = (a >> m) & 1;
+ res |= (bit << (8 - i - 1));
+ }
+
+ return res;
+}
+
+void
+TEST (void)
+{
+ UNION_TYPE (AVX512F_LEN, i_q) src1, src2;
+ MASK_TYPE mask = MASK_VALUE;
+ TYPE res1, res2;
+ TYPE res_ref = 0;
+
+ src1.x = INTRINSIC (_set1_epi8) (0x13);
+ src2.x = INTRINSIC (_set1_epi8) (0x17);
+
+ src1.a[0] = 0xff;
+ src2.a[0] = 0xff;
+
+ for (int i = 0; i < SIZE/8; i++)
+ {
+ unsigned long long bit = CALC (src1.a[i], src2.a[i]);
+ res_ref |= ((unsigned long long)(CALC (src1.a[i], src2.a[i])) << (i*8));
+ }
+
+ res1 = INTRINSIC (_bitshuffle_epi64_mask) (src1.x, src2.x);
+ res2 = INTRINSIC (_mask_bitshuffle_epi64_mask) (mask, src1.x, src2.x);
+
+ if (res1 != res_ref)
+ abort();
+
+ for (int i = 0; i < SIZE; i++)
+ {
+ if (!((mask >> i) & 1))
+ res_ref &= ~((unsigned long long)1 <<i);
+ }
+ if (res2 != res_ref)
+ abort();
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-mavx512bitalg -mavx512vl -mavx512bw -O2" } */
+/* { dg-final { scan-assembler-times "vpshufbitqmb\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*k\[1-7\]\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vpshufbitqmb\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*k\[1-7\]\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vpshufbitqmb\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*k\[1-7\]\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vpshufbitqmb\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*k\[1-7\](?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vpshufbitqmb\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*k\[1-7\](?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vpshufbitqmb\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\]*k\[1-7\](?:\n|\[ \\t\]+#)" 1 } } */
+
+#include <immintrin.h>
+
+volatile __m128i x128;
+volatile __m256i x256;
+volatile __m512i x512;
+
+volatile __mmask16 m16;
+volatile __mmask32 m32;
+volatile __mmask64 m64;
+
+void extern
+avx512vl_test (void)
+{
+ m16 = _mm_bitshuffle_epi64_mask (x128, x128);
+ m32 = _mm256_bitshuffle_epi64_mask (x256, x256);
+ m64 = _mm512_bitshuffle_epi64_mask (x512, x512);
+ m16 = _mm_mask_bitshuffle_epi64_mask (m16, x128, x128);
+ m32 = _mm256_mask_bitshuffle_epi64_mask (m32, x256, x256);
+ m64 = _mm512_mask_bitshuffle_epi64_mask (m64, x512, x512);
+}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -mavx512bitalg" } */
+/* { dg-require-effective-target avx512vl } */
+/* { dg-require-effective-target avx512bitalg } */
+
+#define AVX512VL
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512bitalg-vpopcntb-1.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512bitalg-vpopcntb-1.c"
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -mavx512bitalg" } */
+/* { dg-require-effective-target avx512vl } */
+/* { dg-require-effective-target avx512bitalg } */
+
+#define AVX512VL
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512bitalg-vpopcntw-1.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512bitalg-vpopcntw-1.c"
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -mavx512bitalg" } */
+/* { dg-require-effective-target avx512vl } */
+/* { dg-require-effective-target avx512bitalg } */
+
+#define AVX512VL
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512bitalg-vpshufbitqmb-1.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512bitalg-vpshufbitqmb-1.c"
/* { dg-options "-O2 -mavx512vpopcntdq" } */
/* { dg-require-effective-target avx512vpopcntdq } */
-#define AVX512VPOPCNTDQ
#include "avx512f-helper.h"
+#define AVX512VPOPCNTDQ
#define SIZE (AVX512F_LEN / 32)
#include "avx512f-mask-type.h"
#define TYPE int
-static int
-compute_popcnt (TYPE v)
+int
+CALC (TYPE v)
{
int ret;
int i;
UNION_TYPE (AVX512F_LEN, i_d) res1, res2, res3, src, src0;
MASK_TYPE mask = MASK_VALUE;
TYPE res_ref[SIZE];
- src.x = _mm512_set1_epi8 (0x3D);
+ src.x = INTRINSIC (_set1_epi8) (0x3D);
int i;
for (i = 0; i < SIZE; i++)
{
- res_ref[i] = compute_popcnt (src.a[i]);
+ res_ref[i] = CALC (src.a[i]);
src0.a[i] = DEFAULT_VALUE;
}
/* { dg-do compile } */
-/* { dg-options "-O2 -mavx512vpopcntdq" } */
+/* { dg-options "-O2 -mavx512vpopcntdq -mavx512vl" } */
+/* { dg-final { scan-assembler-times "vpopcntd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vpopcntd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vpopcntd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vpopcntd\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vpopcntd\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vpopcntd\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vpopcntd\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vpopcntd\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vpopcntd\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
#include <x86intrin.h>
-extern __m512i z, z1;
+extern __m128i x, x_1;
+extern __m256i y, y_1;
+extern __m512i z, z_1;
int foo ()
{
__mmask16 msk;
+ __mmask8 msk8;
+ __m128i a = _mm_popcnt_epi32 (x);
+ asm volatile ("" : "+v" (a));
+ a = _mm_mask_popcnt_epi32 (x, msk8, x_1);
+ asm volatile ("" : "+v" (a));
+ a = _mm_maskz_popcnt_epi32 (msk8, x);
+ asm volatile ("" : "+v" (a));
+ __m256i b = _mm256_popcnt_epi32 (y);
+ asm volatile ("" : "+v" (b));
+ b = _mm256_mask_popcnt_epi32 (y, msk8, y_1);
+ asm volatile ("" : "+v" (b));
+ b = _mm256_maskz_popcnt_epi32 (msk8, y);
+ asm volatile ("" : "+v" (b));
__m512i c = _mm512_popcnt_epi32 (z);
asm volatile ("" : "+v" (c));
- c = _mm512_mask_popcnt_epi32 (z, msk, z1);
+ c = _mm512_mask_popcnt_epi32 (z, msk, z_1);
asm volatile ("" : "+v" (c));
c = _mm512_maskz_popcnt_epi32 (msk, z);
asm volatile ("" : "+v" (c));
/* { dg-options "-O2 -mavx512vpopcntdq" } */
/* { dg-require-effective-target avx512vpopcntdq } */
-#define AVX512VPOPCNTDQ
#include "avx512f-helper.h"
+#define AVX512VPOPCNTDQ
#define SIZE (AVX512F_LEN / 64)
#include "avx512f-mask-type.h"
#define TYPE long long
-static int
-compute_popcnt (TYPE v)
+int
+CALC (TYPE v)
{
int ret;
int i;
UNION_TYPE (AVX512F_LEN, i_q) res1, res2, res3, src, src0;
MASK_TYPE mask = MASK_VALUE;
TYPE res_ref[SIZE];
- src.x = _mm512_set1_epi8 (0x3D);
+ src.x = INTRINSIC (_set1_epi8) (0x3D);
int i;
for (i = 0; i < SIZE; i++)
{
- res_ref[i] = compute_popcnt (src.a[i]);
+ res_ref[i] = CALC (src.a[i]);
src0.a[i] = DEFAULT_VALUE;
}
/* { dg-do compile } */
-/* { dg-options "-O2 -mavx512vpopcntdq" } */
+/* { dg-options "-O2 -mavx512vpopcntdq -mavx512vl" } */
+/* { dg-final { scan-assembler-times "vpopcntq\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vpopcntq\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vpopcntq\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vpopcntq\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vpopcntq\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vpopcntq\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vpopcntq\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vpopcntq\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vpopcntq\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
#include <x86intrin.h>
-extern __m512i z, z1;
+extern __m128i x, x_1;
+extern __m256i y, y_1;
+extern __m512i z, z_1;
int foo ()
{
__mmask8 msk;
+ __m128i a = _mm_popcnt_epi64 (x);
+ asm volatile ("" : "+v" (a));
+ a = _mm_mask_popcnt_epi64 (x, msk, x_1);
+ asm volatile ("" : "+v" (a));
+ a = _mm_maskz_popcnt_epi64 (msk, x);
+ asm volatile ("" : "+v" (a));
+ __m256i b = _mm256_popcnt_epi64 (y);
+ asm volatile ("" : "+v" (b));
+ b = _mm256_mask_popcnt_epi64 (y, msk, y_1);
+ asm volatile ("" : "+v" (b));
+ b = _mm256_maskz_popcnt_epi64 (msk, y);
+ asm volatile ("" : "+v" (b));
__m512i c = _mm512_popcnt_epi64 (z);
asm volatile ("" : "+v" (c));
- c = _mm512_mask_popcnt_epi64 (z, msk, z1);
+ c = _mm512_mask_popcnt_epi64 (z, msk, z_1);
asm volatile ("" : "+v" (c));
- c = _mm512_maskz_popcnt_epi64 (msk, z);
+ c = _mm512_maskz_popcnt_epi64 (msk, z);
asm volatile ("" : "+v" (c));
}
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vpopcntdq -mavx512bw -mavx512vl" } */
+/* { dg-require-effective-target avx512vl } */
+/* { dg-require-effective-target avx512vpopcntdq } */
+/* { dg-require-effective-target avx512bw } */
+
+#define AVX512VL
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512vpopcntdq-vpopcntd-1.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512vpopcntdq-vpopcntd-1.c"
--- /dev/null
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -mavx512vpopcntdq" } */
+/* { dg-require-effective-target avx512vl } */
+/* { dg-require-effective-target avx512vpopcntdq } */
+
+#define AVX512VL
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512vpopcntdq-vpopcntq-1.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512vpopcntdq-vpopcntq-1.c"
} "-mvpclmulqdq -mavx512vl" ]
}
+# Return 1 if avx512_bitalg instructions can be compiled.
+proc check_effective_target_avx512bitalg { } {
+ return [check_no_compiler_messages avx512bitalg object {
+ typedef int __v32hi __attribute__ ((__vector_size__ (64)));
+
+ __v32hi
+ _mm512_popcnt_epi16 (__v32hi __A)
+ {
+ return (__v32hi) __builtin_ia32_vpopcountd_v32hi ((__v32hi) __A);
+ }
+ } "-mavx512bitalg" ]
+}
+
# If a testcase doesn't have special options, use these.
global DEFAULT_CFLAGS
if ![info exists DEFAULT_CFLAGS] then {
popcntintrin.h gfniintrin.h and mm_malloc.h are usable
with -O -std=c89 -pedantic-errors. */
/* { dg-do compile } */
-/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512bw -mavx512dq -mavx512vl -mavx512vbmi -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni" } */
+/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512bw -mavx512dq -mavx512vl -mavx512vbmi -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg" } */
#include <x86intrin.h>
/* { dg-do compile } */
-/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512vl -mavx512dq -mavx512bw -mavx512vbmi -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni" } */
+/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512vl -mavx512dq -mavx512bw -mavx512vbmi -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg" } */
/* { dg-add-options bind_pic_locally } */
#include <mm_malloc.h>
are defined as inline functions in {,x,e,p,t,s,w,a,b,i}mmintrin.h,
mm3dnow.h, fma4intrin.h, xopintrin.h, abmintrin.h, bmiintrin.h,
tbmintrin.h, lwpintrin.h, popcntintrin.h, fmaintrin.h,
- avx5124fmapsintrin.h, avx5124vnniwintrin.h, avx512vpopcntdqintrin.h and
- mm_malloc.h that reference the proper builtin functions.
-
+ avx5124fmapsintrin.h, avx5124vnniwintrin.h, avx512vpopcntdqintrin.h,
+ avx512bitalgintrin.h and mm_malloc.h that reference the proper builtin
+ functions.
Defining away "extern" and "__inline" results in all of them being
compiled as proper functions. */
#ifndef DIFFERENT_PRAGMAS
-#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,avx512vl,avx512bw,avx512dq,avx512vbmi,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni")
+#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,avx512vl,avx512bw,avx512dq,avx512vbmi,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni,avx512bitalg")
#endif
/* Following intrinsics require immediate arguments. They
/* immintrin.h (AVX/AVX2/RDRND/FSGSBASE/F16C/RTM/AVX512F/SHA) */
#ifdef DIFFERENT_PRAGMAS
-#pragma GCC target ("avx,avx2,rdrnd,fsgsbase,f16c,rtm,avx512f,avx512er,avx512cd,avx512pf,sha,avx512vl,avx512bw,avx512dq,avx512ifma,avx512vbmi,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni")
+#pragma GCC target ("avx,avx2,rdrnd,fsgsbase,f16c,rtm,avx512f,avx512er,avx512cd,avx512pf,sha,avx512vl,avx512bw,avx512dq,avx512ifma,avx512vbmi,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni,avx512bitalg")
#endif
#include <immintrin.h>
test_1 (_cvtss_sh, unsigned short, float, 1)
are defined as inline functions in {,x,e,p,t,s,w,a,b,i}mmintrin.h,
mm3dnow.h, fma4intrin.h, xopintrin.h, abmintrin.h, bmiintrin.h,
tbmintrin.h, lwpintrin.h, popcntintrin.h, fmaintrin.h,
- avx5124fmapsintrin.h, avx5124vnniwintrin.h, avx512vpopcntdqintrin.h
- and mm_malloc.h that reference the proper builtin functions.
-
+ avx5124fmapsintrin.h, avx5124vnniwintrin.h, avx512vpopcntdqintrin.h,
+ avx512bitalgintrin.h and mm_malloc.h that reference the proper builtin
+ functions.
Defining away "extern" and "__inline" results in all of them being
compiled as proper functions. */
#define __builtin_ia32_vpclmulqdq_v2di(A, B, C) __builtin_ia32_vpclmulqdq_v2di(A, B, 1)
#define __builtin_ia32_vpclmulqdq_v8di(A, B, C) __builtin_ia32_vpclmulqdq_v8di(A, B, 1)
-#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,fma,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,xsavec,xsaves,clflushopt,avx512bw,avx512dq,avx512vl,avx512vbmi,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,clwb,mwaitx,clzero,pku,sgx,rdpid,gfni,avx512vbmi2,vpclmulqdq")
+#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,fma,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,xsavec,xsaves,clflushopt,avx512bw,avx512dq,avx512vl,avx512vbmi,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,clwb,mwaitx,clzero,pku,sgx,rdpid,gfni,avx512vbmi2,vpclmulqdq,avx512bitalg")
#include <x86intrin.h>