From: Julia Koval Date: Fri, 22 Dec 2017 12:37:16 +0000 (+0100) Subject: Enable AVX512BITALG X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=e2a29465e91c75b337aabd5886af982653faf00e;p=gcc.git Enable AVX512BITALG gcc/ * common/config/i386/i386-common.c (OPTION_MASK_ISA_AVX512BITALG_SET, OPTION_MASK_ISA_AVX512BITALG_UNSET): New. (ix86_handle_option): Handle -mavx512bitalg, fix 4VNNIW formatting. * config.gcc: Add avx512vpopcntdqvlintrin.h and avx512bitalgintrin.h. * config/i386/avx512bitalgintrin.h (_mm512_popcnt_epi8, _mm512_popcnt_epi16, _mm512_mask_popcnt_epi8, _mm512_maskz_popcnt_epi8, _mm512_mask_popcnt_epi16, _mm512_maskz_popcnt_epi16, _mm512_bitshuffle_epi64_mask, _mm256_popcnt_epi8, _mm512_mask_bitshuffle_epi64_mask, _mm256_mask_popcnt_epi8, _mm_popcnt_epi8, _mm256_maskz_popcnt_epi8, _mm_bitshuffle_epi64_mask, _mm256_popcnt_epi16, _mm_mask_bitshuffle_epi64_mask, _mm256_bitshuffle_epi64_mask, _mm256_mask_bitshuffle_epi64_mask, _mm_popcnt_epi16, _mm_maskz_popcnt_epi8, _mm256_mask_popcnt_epi16, _mm256_maskz_popcnt_epi16, _mm_mask_popcnt_epi8, _mm_mask_popcnt_epi16, _mm_maskz_popcnt_epi16): New intrinsics. * config/i386/avx512vpopcntdqvlintrin.h (_mm_popcnt_epi32, _mm_popcnt_epi64, _mm_mask_popcnt_epi32, _mm_maskz_popcnt_epi32, _mm256_popcnt_epi32, _mm256_mask_popcnt_epi32, _mm256_maskz_popcnt_epi32, _mm_mask_popcnt_epi64, _mm_maskz_popcnt_epi64, _mm256_popcnt_epi64, _mm256_mask_popcnt_epi64, _mm256_maskz_popcnt_epi64): New intrinsics. * config/i386/cpuid.h (bit_AVX512BITALG): New bit. * config/i386/driver-i386.c (host_detect_local_cpu): Detect -mavx512bitalg. * config/i386/i386-builtin-types.def (V64QI_FTYPE_V64QI, V64QI_FTYPE_V64QI, V4DI_FTYPE_V4DI, UHI_FTYPE_V2DI_V2DI_UHI, USI_FTYPE_V4DI_V4DI_USI, V4SI_FTYPE_V4SI_V4SI_UHI, V8SI_FTYPE_V8SI_V8SI_UHI): New types. * config/i386/i386-builtin.def (__builtin_ia32_vpopcountq_v4di, __builtin_ia32_vpopcountq_v4di_mask, __builtin_ia32_vpopcountq_v2di, __builtin_ia32_vpopcountq_v2di_mask, __builtin_ia32_vpopcountd_v4si, __builtin_ia32_vpopcountd_v4si_mask, __builtin_ia32_vpopcountd_v8si, __builtin_ia32_vpopcountd_v8si_mask, __builtin_ia32_vpopcountb_v64qi, __builtin_ia32_vpopcountb_v64qi_mask, __builtin_ia32_vpopcountb_v32qi, __builtin_ia32_vpopcountb_v32qi_mask, __builtin_ia32_vpopcountb_v16qi, __builtin_ia32_vpopcountb_v16qi_mask, __builtin_ia32_vpopcountw_v32hi, __builtin_ia32_vpopcountw_v32hi_mask, __builtin_ia32_vpopcountw_v16hi, __builtin_ia32_vpopcountw_v16hi_mask, __builtin_ia32_vpopcountw_v8hi, __builtin_ia32_vpopcountw_v8hi_mask, __builtin_ia32_vpshufbitqmb128_mask, __builtin_ia32_vpshufbitqmb256_mask, __builtin_ia32_vpshufbitqmb512_mask): New builtins. * config/i386/i386-c.c (__AVX512BITALG__): New. * config/i386/i386.c (isa2_opts): Add -mavx512bitalg. (ix86_valid_target_attribute_inner_p): Ditto. (ix86_expand_args_builtin): Handle new types. * config/i386/i386.h (TARGET_AVX512BITALG, TARGET_AVX512BITALG_P): New. * config/i386/i386.opt: Add -mavx512bitalg. * config/i386/immintrin.h: Add avx512vpopcntdqvlintrin.h and avx512bitalgintrin.h. * config/i386/sse.md (VI48_AVX512VLBW): New iterator. (vpopcount): Add more types. (avx512vl_vpshufbitqmb): New. * doc/invoke.texi: Add -mavx512bitalg and -mavx512vpopcntdq. gcc/testsuite/ * g++.dg/other/i386-2.C: Add new options. * g++.dg/other/i386-3.C: Ditto. * gcc.target/i386/sse-12.c: Ditto. * gcc.target/i386/sse-13.c: Ditto. * gcc.target/i386/sse-22.c: Ditto. * gcc.target/i386/sse-23.c: Ditto. * gcc.target/i386/avx512-check.h: Handle bit_AVX512BITALG. * gcc.target/i386/avx512bitalg-vpopcntb-1.c: New. * gcc.target/i386/avx512bitalg-vpopcntb.c: Ditto. * gcc.target/i386/avx512bitalg-vpopcntbvl.c: Ditto. * gcc.target/i386/avx512bitalg-vpopcntw-1.c: Ditto. * gcc.target/i386/avx512bitalg-vpopcntw.c: Ditto. * gcc.target/i386/avx512bitalg-vpopcntwvl.c: Ditto. * gcc.target/i386/avx512bitalg-vpshufbitqmb-1.c: Ditto. * gcc.target/i386/avx512bitalg-vpshufbitqmb.c: Ditto. * gcc.target/i386/avx512bitalgvl-vpopcntb-1.c: Ditto. * gcc.target/i386/avx512bitalgvl-vpopcntw-1.c: Ditto. * gcc.target/i386/avx512bitalgvl-vpshufbitqmb-1.c: Ditto. * gcc.target/i386/avx512vpopcntdqvl-vpopcntd-1.c: Ditto. * gcc.target/i386/avx512vpopcntdqvl-vpopcntq-1.c: Ditto. * gcc.target/i386/i386.exp (check_effective_target_avx512bitalg): New. * gcc.target/i386/avx512vpopcntdq-vpopcntd-1.c: Add more types. * gcc.target/i386/avx512vpopcntdq-vpopcntd.c: Handle new intrinsics. * gcc.target/i386/avx512vpopcntdq-vpopcntq-1.c: Ditto. * gcc.target/i386/avx512vpopcntdq-vpopcntq.c: Ditto. Co-Authored-By: Sebastian Peryt From-SVN: r255975 --- diff --git a/gcc/ChangeLog b/gcc/ChangeLog index efef50ac25e..084718852be 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,55 @@ +2017-12-22 Julia Koval + Sebastian Peryt + + * common/config/i386/i386-common.c (OPTION_MASK_ISA_AVX512BITALG_SET, + OPTION_MASK_ISA_AVX512BITALG_UNSET): New. + (ix86_handle_option): Handle -mavx512bitalg, fix 4VNNIW formatting. + * config.gcc: Add avx512vpopcntdqvlintrin.h and avx512bitalgintrin.h. + * config/i386/avx512bitalgintrin.h (_mm512_popcnt_epi8, _mm512_popcnt_epi16, + _mm512_mask_popcnt_epi8, _mm512_maskz_popcnt_epi8, _mm512_mask_popcnt_epi16, + _mm512_maskz_popcnt_epi16, _mm512_bitshuffle_epi64_mask, _mm256_popcnt_epi8, + _mm512_mask_bitshuffle_epi64_mask, _mm256_mask_popcnt_epi8, _mm_popcnt_epi8, + _mm256_maskz_popcnt_epi8, _mm_bitshuffle_epi64_mask, _mm256_popcnt_epi16, + _mm_mask_bitshuffle_epi64_mask, _mm256_bitshuffle_epi64_mask, + _mm256_mask_bitshuffle_epi64_mask, _mm_popcnt_epi16, _mm_maskz_popcnt_epi8, + _mm256_mask_popcnt_epi16, _mm256_maskz_popcnt_epi16, _mm_mask_popcnt_epi8, + _mm_mask_popcnt_epi16, _mm_maskz_popcnt_epi16): New intrinsics. + * config/i386/avx512vpopcntdqvlintrin.h (_mm_popcnt_epi32, _mm_popcnt_epi64, + _mm_mask_popcnt_epi32, _mm_maskz_popcnt_epi32, _mm256_popcnt_epi32, + _mm256_mask_popcnt_epi32, _mm256_maskz_popcnt_epi32, _mm_mask_popcnt_epi64, + _mm_maskz_popcnt_epi64, _mm256_popcnt_epi64, _mm256_mask_popcnt_epi64, + _mm256_maskz_popcnt_epi64): New intrinsics. + * config/i386/cpuid.h (bit_AVX512BITALG): New bit. + * config/i386/driver-i386.c (host_detect_local_cpu): Detect -mavx512bitalg. + * config/i386/i386-builtin-types.def (V64QI_FTYPE_V64QI, V64QI_FTYPE_V64QI, + V4DI_FTYPE_V4DI, UHI_FTYPE_V2DI_V2DI_UHI, USI_FTYPE_V4DI_V4DI_USI, + V4SI_FTYPE_V4SI_V4SI_UHI, V8SI_FTYPE_V8SI_V8SI_UHI): New types. + * config/i386/i386-builtin.def (__builtin_ia32_vpopcountq_v4di, + __builtin_ia32_vpopcountq_v4di_mask, __builtin_ia32_vpopcountq_v2di, + __builtin_ia32_vpopcountq_v2di_mask, __builtin_ia32_vpopcountd_v4si, + __builtin_ia32_vpopcountd_v4si_mask, __builtin_ia32_vpopcountd_v8si, + __builtin_ia32_vpopcountd_v8si_mask, __builtin_ia32_vpopcountb_v64qi, + __builtin_ia32_vpopcountb_v64qi_mask, __builtin_ia32_vpopcountb_v32qi, + __builtin_ia32_vpopcountb_v32qi_mask, __builtin_ia32_vpopcountb_v16qi, + __builtin_ia32_vpopcountb_v16qi_mask, __builtin_ia32_vpopcountw_v32hi, + __builtin_ia32_vpopcountw_v32hi_mask, __builtin_ia32_vpopcountw_v16hi, + __builtin_ia32_vpopcountw_v16hi_mask, __builtin_ia32_vpopcountw_v8hi, + __builtin_ia32_vpopcountw_v8hi_mask, __builtin_ia32_vpshufbitqmb128_mask, + __builtin_ia32_vpshufbitqmb256_mask, + __builtin_ia32_vpshufbitqmb512_mask): New builtins. + * config/i386/i386-c.c (__AVX512BITALG__): New. + * config/i386/i386.c (isa2_opts): Add -mavx512bitalg. + (ix86_valid_target_attribute_inner_p): Ditto. + (ix86_expand_args_builtin): Handle new types. + * config/i386/i386.h (TARGET_AVX512BITALG, TARGET_AVX512BITALG_P): New. + * config/i386/i386.opt: Add -mavx512bitalg. + * config/i386/immintrin.h: Add avx512vpopcntdqvlintrin.h and + avx512bitalgintrin.h. + * config/i386/sse.md (VI48_AVX512VLBW): New iterator. + (vpopcount): Add more types. + (avx512vl_vpshufbitqmb): New. + * doc/invoke.texi: Add -mavx512bitalg and -mavx512vpopcntdq. + 2017-12-22 Igor Tsimbalist * common/config/i386/i386-common.c (OPTION_MASK_ISA_AVX512VNNI_SET): diff --git a/gcc/common/config/i386/i386-common.c b/gcc/common/config/i386/i386-common.c index 421b450b207..73224f60e44 100644 --- a/gcc/common/config/i386/i386-common.c +++ b/gcc/common/config/i386/i386-common.c @@ -85,6 +85,7 @@ along with GCC; see the file COPYING3. If not see #define OPTION_MASK_ISA_AVX512VNNI_SET \ (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512F_SET) #define OPTION_MASK_ISA_AVX512VPOPCNTDQ_SET OPTION_MASK_ISA_AVX512VPOPCNTDQ +#define OPTION_MASK_ISA_AVX512BITALG_SET OPTION_MASK_ISA_AVX512BITALG #define OPTION_MASK_ISA_RTM_SET OPTION_MASK_ISA_RTM #define OPTION_MASK_ISA_PRFCHW_SET OPTION_MASK_ISA_PRFCHW #define OPTION_MASK_ISA_RDSEED_SET OPTION_MASK_ISA_RDSEED @@ -201,6 +202,7 @@ along with GCC; see the file COPYING3. If not see #define OPTION_MASK_ISA_AVX512VBMI2_UNSET OPTION_MASK_ISA_AVX512VBMI2 #define OPTION_MASK_ISA_AVX512VNNI_UNSET OPTION_MASK_ISA_AVX512VNNI #define OPTION_MASK_ISA_AVX512VPOPCNTDQ_UNSET OPTION_MASK_ISA_AVX512VPOPCNTDQ +#define OPTION_MASK_ISA_AVX512BITALG_UNSET OPTION_MASK_ISA_AVX512BITALG #define OPTION_MASK_ISA_RTM_UNSET OPTION_MASK_ISA_RTM #define OPTION_MASK_ISA_PRFCHW_UNSET OPTION_MASK_ISA_PRFCHW #define OPTION_MASK_ISA_RDSEED_UNSET OPTION_MASK_ISA_RDSEED @@ -444,11 +446,17 @@ ix86_handle_option (struct gcc_options *opts, /* Turn off additional isa flags. */ opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA_AVX5124FMAPS_UNSET; - opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA_AVX5124FMAPS_UNSET; + opts->x_ix86_isa_flags2_explicit + |= OPTION_MASK_ISA_AVX5124FMAPS_UNSET; opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA_AVX5124VNNIW_UNSET; - opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA_AVX5124VNNIW_UNSET; + opts->x_ix86_isa_flags2_explicit + |= OPTION_MASK_ISA_AVX5124VNNIW_UNSET; opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA_AVX512VPOPCNTDQ_UNSET; - opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA_AVX512VPOPCNTDQ_UNSET; + opts->x_ix86_isa_flags2_explicit + |= OPTION_MASK_ISA_AVX512VPOPCNTDQ_UNSET; + opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA_AVX512BITALG_UNSET; + opts->x_ix86_isa_flags2_explicit + |= OPTION_MASK_ISA_AVX512BITALG_UNSET; } return true; @@ -643,6 +651,22 @@ ix86_handle_option (struct gcc_options *opts, } return true; + case OPT_mavx512bitalg: + if (value) + { + opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_AVX512BITALG_SET; + opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA_AVX512BITALG_SET; + opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512F_SET; + opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX512F_SET; + } + else + { + opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA_AVX512BITALG_UNSET; + opts->x_ix86_isa_flags2_explicit + |= OPTION_MASK_ISA_AVX512BITALG_UNSET; + } + return true; + case OPT_msgx: if (value) { diff --git a/gcc/config.gcc b/gcc/config.gcc index 1a0d6765552..8dc83c8cb2f 100644 --- a/gcc/config.gcc +++ b/gcc/config.gcc @@ -382,7 +382,8 @@ i[34567]86-*-*) gfniintrin.h cet.h avx512vbmi2intrin.h avx512vbmi2vlintrin.h avx512vnniintrin.h avx512vnnivlintrin.h gfniintrin.h vaesintrin.h - vpclmulqdqintrin.h" + vpclmulqdqintrin.h avx512vpopcntdqvlintrin.h + avx512bitalgintrin.h" ;; x86_64-*-*) cpu_type=i386 @@ -410,7 +411,8 @@ x86_64-*-*) gfniintrin.h cet.h avx512vbmi2intrin.h avx512vbmi2vlintrin.h avx512vnniintrin.h avx512vnnivlintrin.h gfniintrin.h vaesintrin.h - vpclmulqdqintrin.h" + vpclmulqdqintrin.h gfniintrin.h + avx512vpopcntdqvlintrin.h avx512bitalgintrin.h" ;; ia64-*-*) extra_headers=ia64intrin.h diff --git a/gcc/config/i386/avx512bitalgintrin.h b/gcc/config/i386/avx512bitalgintrin.h new file mode 100644 index 00000000000..b5077078b64 --- /dev/null +++ b/gcc/config/i386/avx512bitalgintrin.h @@ -0,0 +1,282 @@ +/* Copyright (C) 2017 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +#if !defined _IMMINTRIN_H_INCLUDED +# error "Never use directly; include instead." +#endif + +#ifndef _AVX512BITALGINTRIN_H_INCLUDED +#define _AVX512BITALGINTRIN_H_INCLUDED + +#ifndef __AVX512BITALG__ +#pragma GCC push_options +#pragma GCC target("avx512bitalg") +#define __DISABLE_AVX512BITALG__ +#endif /* __AVX512BITALG__ */ + +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_popcnt_epi8 (__m512i __A) +{ + return (__m512i) __builtin_ia32_vpopcountb_v64qi ((__v64qi) __A); +} + +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_popcnt_epi16 (__m512i __A) +{ + return (__m512i) __builtin_ia32_vpopcountw_v32hi ((__v32hi) __A); +} + +#ifdef __DISABLE_AVX512BITALG__ +#undef __DISABLE_AVX512BITALG__ +#pragma GCC pop_options +#endif /* __DISABLE_AVX512BITALG__ */ + +#if !defined(__AVX512BITALG__) || !defined(__AVX512BW__) +#pragma GCC push_options +#pragma GCC target("avx512bitalg,avx512bw") +#define __DISABLE_AVX512BITALGBW__ +#endif /* __AVX512VLBW__ */ + +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_popcnt_epi8 (__m512i __A, __mmask64 __U, __m512i __B) +{ + return (__m512i) __builtin_ia32_vpopcountb_v64qi_mask ((__v64qi) __A, + (__v64qi) __B, + (__mmask64) __U); +} + +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_popcnt_epi8 (__mmask64 __U, __m512i __A) +{ + return (__m512i) __builtin_ia32_vpopcountb_v64qi_mask ((__v64qi) __A, + (__v64qi) + _mm512_setzero_si512 (), + (__mmask64) __U); +} +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_popcnt_epi16 (__m512i __A, __mmask32 __U, __m512i __B) +{ + return (__m512i) __builtin_ia32_vpopcountw_v32hi_mask ((__v32hi) __A, + (__v32hi) __B, + (__mmask32) __U); +} + +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_popcnt_epi16 (__mmask32 __U, __m512i __A) +{ + return (__m512i) __builtin_ia32_vpopcountw_v32hi_mask ((__v32hi) __A, + (__v32hi) + _mm512_setzero_si512 (), + (__mmask32) __U); +} + +extern __inline __mmask64 +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_bitshuffle_epi64_mask (__m512i __A, __m512i __B) +{ + return (__mmask64) __builtin_ia32_vpshufbitqmb512_mask ((__v8di) __A, + (__v8di) __B, + (__mmask64) -1); +} + +extern __inline __mmask64 +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_bitshuffle_epi64_mask (__mmask8 __M, __m512i __A, __m512i __B) +{ + return (__mmask64) __builtin_ia32_vpshufbitqmb512_mask ((__v8di) __A, + (__v8di) __B, + (__mmask64) __M); +} + +#ifdef __DISABLE_AVX512BITALGBW__ +#undef __DISABLE_AVX512BITALGBW__ +#pragma GCC pop_options +#endif /* __DISABLE_AVX512BITALGBW__ */ + +#if !defined(__AVX512BITALG__) || !defined(__AVX512VL__) || !defined(__AVX512BW__) +#pragma GCC push_options +#pragma GCC target("avx512bitalg,avx512vl,avx512bw") +#define __DISABLE_AVX512BITALGVLBW__ +#endif /* __AVX512VLBW__ */ + +extern __inline __m256i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_popcnt_epi8 (__m256i __A, __mmask32 __U, __m256i __B) +{ + return (__m256i) __builtin_ia32_vpopcountb_v32qi_mask ((__v32qi) __A, + (__v32qi) __B, + (__mmask32) __U); +} + +extern __inline __m256i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_popcnt_epi8 (__mmask32 __U, __m256i __A) +{ + return (__m256i) __builtin_ia32_vpopcountb_v32qi_mask ((__v32qi) __A, + (__v32qi) + _mm256_setzero_si256 (), + (__mmask32) __U); +} +extern __inline __mmask16 +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_bitshuffle_epi64_mask (__m128i __A, __m128i __B) +{ + return (__mmask16) __builtin_ia32_vpshufbitqmb128_mask ((__v2di) __A, + (__v2di) __B, + (__mmask16) -1); +} + +extern __inline __mmask16 +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_bitshuffle_epi64_mask (__mmask16 __M, __m128i __A, __m128i __B) +{ + return (__mmask16) __builtin_ia32_vpshufbitqmb128_mask ((__v2di) __A, + (__v2di) __B, + (__mmask16) __M); +} + +extern __inline __mmask32 +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_bitshuffle_epi64_mask (__m256i __A, __m256i __B) +{ + return (__mmask32) __builtin_ia32_vpshufbitqmb256_mask ((__v4di) __A, + (__v4di) __B, + (__mmask32) -1); +} + +extern __inline __mmask32 +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_bitshuffle_epi64_mask (__mmask32 __M, __m256i __A, __m256i __B) +{ + return (__mmask32) __builtin_ia32_vpshufbitqmb256_mask ((__v4di) __A, + (__v4di) __B, + (__mmask32) __M); +} + +#ifdef __DISABLE_AVX512BITALGVLBW__ +#undef __DISABLE_AVX512BITALGVLBW__ +#pragma GCC pop_options +#endif /* __DISABLE_AVX512BITALGVLBW__ */ + + +#if !defined(__AVX512BITALG__) || !defined(__AVX512VL__) +#pragma GCC push_options +#pragma GCC target("avx512bitalg,avx512vl") +#define __DISABLE_AVX512BITALGVL__ +#endif /* __AVX512VLBW__ */ + +extern __inline __m256i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_popcnt_epi8 (__m256i __A) +{ + return (__m256i) __builtin_ia32_vpopcountb_v32qi ((__v32qi) __A); +} + +extern __inline __m256i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_popcnt_epi16 (__m256i __A) +{ + return (__m256i) __builtin_ia32_vpopcountw_v16hi ((__v16hi) __A); +} + +extern __inline __m128i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_popcnt_epi8 (__m128i __A) +{ + return (__m128i) __builtin_ia32_vpopcountb_v16qi ((__v16qi) __A); +} + +extern __inline __m128i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_popcnt_epi16 (__m128i __A) +{ + return (__m128i) __builtin_ia32_vpopcountw_v8hi ((__v8hi) __A); +} + +extern __inline __m256i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_popcnt_epi16 (__m256i __A, __mmask16 __U, __m256i __B) +{ + return (__m256i) __builtin_ia32_vpopcountw_v16hi_mask ((__v16hi) __A, + (__v16hi) __B, + (__mmask16) __U); +} + +extern __inline __m256i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_popcnt_epi16 (__mmask16 __U, __m256i __A) +{ + return (__m256i) __builtin_ia32_vpopcountw_v16hi_mask ((__v16hi) __A, + (__v16hi) + _mm256_setzero_si256 (), + (__mmask16) __U); +} + +extern __inline __m128i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_popcnt_epi8 (__m128i __A, __mmask16 __U, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpopcountb_v16qi_mask ((__v16qi) __A, + (__v16qi) __B, + (__mmask16) __U); +} + +extern __inline __m128i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_popcnt_epi8 (__mmask16 __U, __m128i __A) +{ + return (__m128i) __builtin_ia32_vpopcountb_v16qi_mask ((__v16qi) __A, + (__v16qi) + _mm_setzero_si128 (), + (__mmask16) __U); +} +extern __inline __m128i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_popcnt_epi16 (__m128i __A, __mmask8 __U, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpopcountw_v8hi_mask ((__v8hi) __A, + (__v8hi) __B, + (__mmask8) __U); +} + +extern __inline __m128i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_popcnt_epi16 (__mmask8 __U, __m128i __A) +{ + return (__m128i) __builtin_ia32_vpopcountw_v8hi_mask ((__v8hi) __A, + (__v8hi) + _mm_setzero_si128 (), + (__mmask8) __U); +} +#ifdef __DISABLE_AVX512BITALGVL__ +#undef __DISABLE_AVX512BITALGVL__ +#pragma GCC pop_options +#endif /* __DISABLE_AVX512BITALGBW__ */ + +#endif /* _AVX512BITALGINTRIN_H_INCLUDED */ diff --git a/gcc/config/i386/avx512vpopcntdqvlintrin.h b/gcc/config/i386/avx512vpopcntdqvlintrin.h new file mode 100644 index 00000000000..c8f57170e01 --- /dev/null +++ b/gcc/config/i386/avx512vpopcntdqvlintrin.h @@ -0,0 +1,147 @@ +/* Copyright (C) 2017 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +#if !defined _IMMINTRIN_H_INCLUDED +# error "Never use directly; include instead." +#endif + +#ifndef _AVX512VPOPCNTDQVLINTRIN_H_INCLUDED +#define _AVX512VPOPCNTDQVLINTRIN_H_INCLUDED + +#if !defined(__AVX512VPOPCNTDQ__) || !defined(__AVX512VL__) +#pragma GCC push_options +#pragma GCC target("avx512vpopcntdq,avx512vl") +#define __DISABLE_AVX512VPOPCNTDQVL__ +#endif /* __AVX512VPOPCNTDQVL__ */ + +extern __inline __m128i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_popcnt_epi32 (__m128i __A) +{ + return (__m128i) __builtin_ia32_vpopcountd_v4si ((__v4si) __A); +} + +extern __inline __m128i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_popcnt_epi32 (__m128i __A, __mmask16 __U, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpopcountd_v4si_mask ((__v4si) __A, + (__v4si) __B, + (__mmask16) __U); +} + +extern __inline __m128i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_popcnt_epi32 (__mmask16 __U, __m128i __A) +{ + return (__m128i) __builtin_ia32_vpopcountd_v4si_mask ((__v4si) __A, + (__v4si) + _mm_setzero_si128 (), + (__mmask16) __U); +} + +extern __inline __m256i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_popcnt_epi32 (__m256i __A) +{ + return (__m256i) __builtin_ia32_vpopcountd_v8si ((__v8si) __A); +} + +extern __inline __m256i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_popcnt_epi32 (__m256i __A, __mmask16 __U, __m256i __B) +{ + return (__m256i) __builtin_ia32_vpopcountd_v8si_mask ((__v8si) __A, + (__v8si) __B, + (__mmask16) __U); +} + +extern __inline __m256i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_popcnt_epi32 (__mmask16 __U, __m256i __A) +{ + return (__m256i) __builtin_ia32_vpopcountd_v8si_mask ((__v8si) __A, + (__v8si) + _mm256_setzero_si256 (), + (__mmask16) __U); +} + +extern __inline __m128i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_popcnt_epi64 (__m128i __A) +{ + return (__m128i) __builtin_ia32_vpopcountq_v2di ((__v2di) __A); +} + +extern __inline __m128i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_popcnt_epi64 (__m128i __A, __mmask8 __U, __m128i __B) +{ + return (__m128i) __builtin_ia32_vpopcountq_v2di_mask ((__v2di) __A, + (__v2di) __B, + (__mmask8) __U); +} + +extern __inline __m128i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_popcnt_epi64 (__mmask8 __U, __m128i __A) +{ + return (__m128i) __builtin_ia32_vpopcountq_v2di_mask ((__v2di) __A, + (__v2di) + _mm_setzero_si128 (), + (__mmask8) __U); +} + +extern __inline __m256i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_popcnt_epi64 (__m256i __A) +{ + return (__m256i) __builtin_ia32_vpopcountq_v4di ((__v4di) __A); +} + +extern __inline __m256i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_popcnt_epi64 (__m256i __A, __mmask8 __U, __m256i __B) +{ + return (__m256i) __builtin_ia32_vpopcountq_v4di_mask ((__v4di) __A, + (__v4di) __B, + (__mmask8) __U); +} + +extern __inline __m256i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_popcnt_epi64 (__mmask8 __U, __m256i __A) +{ + return (__m256i) __builtin_ia32_vpopcountq_v4di_mask ((__v4di) __A, + (__v4di) + _mm256_setzero_si256 (), + (__mmask8) __U); +} + +#ifdef __DISABLE_AVX512VPOPCNTDQVL__ +#undef __DISABLE_AVX512VPOPCNTDQVL__ +#pragma GCC pop_options +#endif /* __DISABLE_AVX512VPOPCNTDQVL__ */ + +#endif /* _AVX512VPOPCNTDQVLINTRIN_H_INCLUDED */ + diff --git a/gcc/config/i386/cpuid.h b/gcc/config/i386/cpuid.h index 37f3e1a96bb..1660d26579a 100644 --- a/gcc/config/i386/cpuid.h +++ b/gcc/config/i386/cpuid.h @@ -103,6 +103,7 @@ #define bit_VAES (1 << 9) #define bit_AVX512VNNI (1 << 11) #define bit_VPCLMULQDQ (1 << 10) +#define bit_AVX512BITALG (1 << 12) #define bit_AVX512VPOPCNTDQ (1 << 14) #define bit_RDPID (1 << 22) diff --git a/gcc/config/i386/driver-i386.c b/gcc/config/i386/driver-i386.c index 99826fd651e..1e069368a91 100644 --- a/gcc/config/i386/driver-i386.c +++ b/gcc/config/i386/driver-i386.c @@ -418,6 +418,7 @@ const char *host_detect_local_cpu (int argc, const char **argv) unsigned int has_mwaitx = 0, has_clzero = 0, has_pku = 0, has_rdpid = 0; unsigned int has_avx5124fmaps = 0, has_avx5124vnniw = 0; unsigned int has_gfni = 0, has_avx512vbmi2 = 0; + unsigned int has_avx512bitalg = 0; unsigned int has_ibt = 0, has_shstk = 0; unsigned int has_avx512vnni = 0, has_vaes = 0; unsigned int has_vpclmulqdq = 0; @@ -515,6 +516,7 @@ const char *host_detect_local_cpu (int argc, const char **argv) has_gfni = ecx & bit_GFNI; has_vaes = ecx & bit_VAES; has_vpclmulqdq = ecx & bit_VPCLMULQDQ; + has_avx512bitalg = ecx & bit_AVX512BITALG; has_avx5124vnniw = edx & bit_AVX5124VNNIW; has_avx5124fmaps = edx & bit_AVX5124FMAPS; @@ -1083,6 +1085,7 @@ const char *host_detect_local_cpu (int argc, const char **argv) const char *shstk = has_shstk ? " -mshstk" : " -mno-shstk"; const char *vaes = has_vaes ? " -mvaes" : " -mno-vaes"; const char *vpclmulqdq = has_vpclmulqdq ? " -mvpclmulqdq" : " -mno-vpclmulqdq"; + const char *avx512bitalg = has_avx512bitalg ? " -mavx512bitalg" : " -mno-avx512bitalg"; options = concat (options, mmx, mmx3dnow, sse, sse2, sse3, ssse3, sse4a, cx16, sahf, movbe, aes, sha, pclmul, popcnt, abm, lwp, fma, fma4, xop, bmi, sgx, bmi2, @@ -1093,7 +1096,8 @@ const char *host_detect_local_cpu (int argc, const char **argv) xsavec, xsaves, avx512dq, avx512bw, avx512vl, avx512ifma, avx512vbmi, avx5124fmaps, avx5124vnniw, clwb, mwaitx, clzero, pku, rdpid, gfni, ibt, shstk, - avx512vbmi2, avx512vnni, vaes, vpclmulqdq, NULL); + avx512vbmi2, avx512vnni, vaes, vpclmulqdq, + avx512bitalg, NULL); } done: diff --git a/gcc/config/i386/i386-builtin-types.def b/gcc/config/i386/i386-builtin-types.def index 1423f3ee8b7..9ecdcc08736 100644 --- a/gcc/config/i386/i386-builtin-types.def +++ b/gcc/config/i386/i386-builtin-types.def @@ -309,6 +309,8 @@ DEF_FUNCTION_TYPE (V16SI, V4SI) DEF_FUNCTION_TYPE (V16SI, V8SI) DEF_FUNCTION_TYPE (V16SI, V16SF) DEF_FUNCTION_TYPE (V16SI, V16SI) +DEF_FUNCTION_TYPE (V32HI, V32HI) +DEF_FUNCTION_TYPE (V64QI, V64QI) DEF_FUNCTION_TYPE (V16SI, V16SI, V16SI, UHI) DEF_FUNCTION_TYPE (V8DI, V8DI, V8DI, UQI) DEF_FUNCTION_TYPE (V8DI, PV8DI) @@ -1256,3 +1258,10 @@ DEF_FUNCTION_TYPE (V4DI, V4DI, V4DI, V4DI, INT) DEF_FUNCTION_TYPE (V16SI, V16SI, V16SI, V16SI) DEF_FUNCTION_TYPE (V16SI, V16SI, V16SI, V16SI, INT) DEF_FUNCTION_TYPE (V2DI, V2DI, V2DI, V2DI, INT) + +# BITALG builtins +DEF_FUNCTION_TYPE (UHI, V2DI, V2DI, UHI) +DEF_FUNCTION_TYPE (USI, V4DI, V4DI, USI) +DEF_FUNCTION_TYPE (V4DI, V4DI) +DEF_FUNCTION_TYPE (V4SI, V4SI, V4SI, UHI) +DEF_FUNCTION_TYPE (V8SI, V8SI, V8SI, UHI) diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def index 3365cea46ab..2c6ea3c1042 100644 --- a/gcc/config/i386/i386-builtin.def +++ b/gcc/config/i386/i386-builtin.def @@ -2788,6 +2788,16 @@ BDESC (OPTION_MASK_ISA_AVX512VPOPCNTDQ, CODE_FOR_vpopcountv16si_mask, "__builtin BDESC (OPTION_MASK_ISA_AVX512VPOPCNTDQ, CODE_FOR_vpopcountv8di, "__builtin_ia32_vpopcountq_v8di", IX86_BUILTIN_VPOPCOUNTQV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI) BDESC (OPTION_MASK_ISA_AVX512VPOPCNTDQ, CODE_FOR_vpopcountv8di_mask, "__builtin_ia32_vpopcountq_v8di_mask", IX86_BUILTIN_VPOPCOUNTQV8DI_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512VPOPCNTDQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpopcountv4di, "__builtin_ia32_vpopcountq_v4di", IX86_BUILTIN_VPOPCOUNTQV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI) +BDESC (OPTION_MASK_ISA_AVX512VPOPCNTDQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpopcountv4di_mask, "__builtin_ia32_vpopcountq_v4di_mask", IX86_BUILTIN_VPOPCOUNTQV4DI_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512VPOPCNTDQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpopcountv2di, "__builtin_ia32_vpopcountq_v2di", IX86_BUILTIN_VPOPCOUNTQV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI) +BDESC (OPTION_MASK_ISA_AVX512VPOPCNTDQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpopcountv2di_mask, "__builtin_ia32_vpopcountq_v2di_mask", IX86_BUILTIN_VPOPCOUNTQV2DI_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UQI) +BDESC (OPTION_MASK_ISA_AVX512VPOPCNTDQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpopcountv4si, "__builtin_ia32_vpopcountd_v4si", IX86_BUILTIN_VPOPCOUNTDV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI) +BDESC (OPTION_MASK_ISA_AVX512VPOPCNTDQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpopcountv4si_mask, "__builtin_ia32_vpopcountd_v4si_mask", IX86_BUILTIN_VPOPCOUNTDV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_UHI) +BDESC (OPTION_MASK_ISA_AVX512VPOPCNTDQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpopcountv8si, "__builtin_ia32_vpopcountd_v8si", IX86_BUILTIN_VPOPCOUNTDV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI) +BDESC (OPTION_MASK_ISA_AVX512VPOPCNTDQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpopcountv8si_mask, "__builtin_ia32_vpopcountd_v8si_mask", IX86_BUILTIN_VPOPCOUNTDV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_UHI) + + /* RDPID */ BDESC (OPTION_MASK_ISA_RDPID, CODE_FOR_rdpid, "__builtin_ia32_rdpid", IX86_BUILTIN_RDPID, UNKNOWN, (int) UNSIGNED_FTYPE_VOID) @@ -2805,6 +2815,24 @@ BDESC (OPTION_MASK_ISA_VAES, CODE_FOR_vaesenclast_v16qi, "__builtin_ia32_vaesenc BDESC (OPTION_MASK_ISA_VAES, CODE_FOR_vaesenclast_v32qi, "__builtin_ia32_vaesenclast_v32qi", IX86_BUILTIN_VAESENCLAST32, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI) BDESC (OPTION_MASK_ISA_VAES, CODE_FOR_vaesenclast_v64qi, "__builtin_ia32_vaesenclast_v64qi", IX86_BUILTIN_VAESENCLAST64, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI) +/* BITALG */ +BDESC (OPTION_MASK_ISA_AVX512BITALG, CODE_FOR_vpopcountv64qi, "__builtin_ia32_vpopcountb_v64qi", IX86_BUILTIN_VPOPCOUNTBV64QI, UNKNOWN, (int) V64QI_FTYPE_V64QI) +BDESC (OPTION_MASK_ISA_AVX512BITALG, CODE_FOR_vpopcountv64qi_mask, "__builtin_ia32_vpopcountb_v64qi_mask", IX86_BUILTIN_VPOPCOUNTBV64QI_MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_UDI) +BDESC (OPTION_MASK_ISA_AVX512BITALG, CODE_FOR_vpopcountv32qi, "__builtin_ia32_vpopcountb_v32qi", IX86_BUILTIN_VPOPCOUNTBV32QI, UNKNOWN, (int) V32QI_FTYPE_V32QI) +BDESC (OPTION_MASK_ISA_AVX512BITALG, CODE_FOR_vpopcountv32qi_mask, "__builtin_ia32_vpopcountb_v32qi_mask", IX86_BUILTIN_VPOPCOUNTBV32QI_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_USI) +BDESC (OPTION_MASK_ISA_AVX512BITALG, CODE_FOR_vpopcountv16qi, "__builtin_ia32_vpopcountb_v16qi", IX86_BUILTIN_VPOPCOUNTBV16QI, UNKNOWN, (int) V16QI_FTYPE_V16QI) +BDESC (OPTION_MASK_ISA_AVX512BITALG, CODE_FOR_vpopcountv16qi_mask, "__builtin_ia32_vpopcountb_v16qi_mask", IX86_BUILTIN_VPOPCOUNTBV16QI_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_UHI) + +BDESC (OPTION_MASK_ISA_AVX512BITALG, CODE_FOR_vpopcountv32hi, "__builtin_ia32_vpopcountw_v32hi", IX86_BUILTIN_VPOPCOUNTWV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI) +BDESC (OPTION_MASK_ISA_AVX512BITALG, CODE_FOR_vpopcountv32hi_mask, "__builtin_ia32_vpopcountw_v32hi_mask", IX86_BUILTIN_VPOPCOUNTQV32HI_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_USI) +BDESC (OPTION_MASK_ISA_AVX512BITALG, CODE_FOR_vpopcountv16hi, "__builtin_ia32_vpopcountw_v16hi", IX86_BUILTIN_VPOPCOUNTWV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI) +BDESC (OPTION_MASK_ISA_AVX512BITALG, CODE_FOR_vpopcountv16hi_mask, "__builtin_ia32_vpopcountw_v16hi_mask", IX86_BUILTIN_VPOPCOUNTQV16HI_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_UHI) +BDESC (OPTION_MASK_ISA_AVX512BITALG, CODE_FOR_vpopcountv8hi, "__builtin_ia32_vpopcountw_v8hi", IX86_BUILTIN_VPOPCOUNTWV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI) +BDESC (OPTION_MASK_ISA_AVX512BITALG, CODE_FOR_vpopcountv8hi_mask, "__builtin_ia32_vpopcountw_v8hi_mask", IX86_BUILTIN_VPOPCOUNTQV8HI_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_UQI) + +BDESC (OPTION_MASK_ISA_AVX512BITALG | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpshufbitqmbv2di_mask, "__builtin_ia32_vpshufbitqmb128_mask", IX86_BUILTIN_VPSHUFBITQMB128_MASK, UNKNOWN, (int) UHI_FTYPE_V2DI_V2DI_UHI) +BDESC (OPTION_MASK_ISA_AVX512BITALG | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpshufbitqmbv4di_mask, "__builtin_ia32_vpshufbitqmb256_mask", IX86_BUILTIN_VPSHUFBITQMB256_MASK, UNKNOWN, (int) USI_FTYPE_V4DI_V4DI_USI) +BDESC (OPTION_MASK_ISA_AVX512BITALG, CODE_FOR_avx512vl_vpshufbitqmbv8di_mask, "__builtin_ia32_vpshufbitqmb512_mask", IX86_BUILTIN_VPSHUFBITQMB512_MASK, UNKNOWN, (int) UQI_FTYPE_V8DI_V8DI_UQI) BDESC_END (ARGS2, MPX) /* Builtins for MPX. */ diff --git a/gcc/config/i386/i386-c.c b/gcc/config/i386/i386-c.c index d9de37bfeff..dbd5f43b8ca 100644 --- a/gcc/config/i386/i386-c.c +++ b/gcc/config/i386/i386-c.c @@ -402,6 +402,8 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag, def_or_undef (parse_in, "__SGX__"); if (isa_flag2 & OPTION_MASK_ISA_AVX5124FMAPS) def_or_undef (parse_in, "__AVX5124FMAPS__"); + if (isa_flag2 & OPTION_MASK_ISA_AVX512BITALG) + def_or_undef (parse_in, "__AVX512BITALG__"); if (isa_flag2 & OPTION_MASK_ISA_AVX512VPOPCNTDQ) def_or_undef (parse_in, "__AVX512VPOPCNTDQ__"); if (isa_flag & OPTION_MASK_ISA_FMA) diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 575e75abe71..7b055d19fc1 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -2763,7 +2763,8 @@ ix86_target_string (HOST_WIDE_INT isa, HOST_WIDE_INT isa2, { "-mhle", OPTION_MASK_ISA_HLE }, { "-mmovbe", OPTION_MASK_ISA_MOVBE }, { "-mclzero", OPTION_MASK_ISA_CLZERO }, - { "-mmwaitx", OPTION_MASK_ISA_MWAITX } + { "-mmwaitx", OPTION_MASK_ISA_MWAITX }, + { "-mavx512bitalg", OPTION_MASK_ISA_AVX512BITALG } }; static struct ix86_target_opts isa_opts[] = { @@ -5266,6 +5267,7 @@ ix86_valid_target_attribute_inner_p (tree args, char *p_strings[], IX86_ATTR_ISA ("avx512vpopcntdq", OPT_mavx512vpopcntdq), IX86_ATTR_ISA ("avx512vbmi2", OPT_mavx512vbmi2), IX86_ATTR_ISA ("avx512vnni", OPT_mavx512vnni), + IX86_ATTR_ISA ("avx512bitalg", OPT_mavx512bitalg), IX86_ATTR_ISA ("avx512vbmi", OPT_mavx512vbmi), IX86_ATTR_ISA ("avx512ifma", OPT_mavx512ifma), @@ -33536,12 +33538,15 @@ ix86_expand_args_builtin (const struct builtin_description *d, case V16SI_FTYPE_V4SI: case V16SI_FTYPE_V16SF: case V16SI_FTYPE_V16SI: + case V64QI_FTYPE_V64QI: + case V32HI_FTYPE_V32HI: case V16SF_FTYPE_V16SF: case V8DI_FTYPE_UQI: case V8DI_FTYPE_V8DI: case V8DF_FTYPE_V4DF: case V8DF_FTYPE_V2DF: case V8DF_FTYPE_V8DF: + case V4DI_FTYPE_V4DI: nargs = 1; break; case V4SF_FTYPE_V4SF_VEC_MERGE: @@ -33918,6 +33923,10 @@ ix86_expand_args_builtin (const struct builtin_description *d, case HI_FTYPE_V16SF_INT_UHI: case QI_FTYPE_V8SF_INT_UQI: case QI_FTYPE_V4SF_INT_UQI: + case UHI_FTYPE_V2DI_V2DI_UHI: + case USI_FTYPE_V4DI_V4DI_USI: + case V4SI_FTYPE_V4SI_V4SI_UHI: + case V8SI_FTYPE_V8SI_V8SI_UHI: nargs = 3; mask_pos = 1; nargs_constant = 1; diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index 7da8573bc40..3b953de506a 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -91,6 +91,8 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see #define TARGET_AVX512VPOPCNTDQ_P(x) TARGET_ISA_AVX512VPOPCNTDQ_P(x) #define TARGET_AVX512VNNI TARGET_ISA_AVX512VNNI #define TARGET_AVX512VNNI_P(x) TARGET_ISA_AVX512VNNI_P(x) +#define TARGET_AVX512BITALG TARGET_ISA_AVX512BITALG +#define TARGET_AVX512BITALG_P(x) TARGET_ISA_AVX512BITALG_P(x) #define TARGET_FMA TARGET_ISA_FMA #define TARGET_FMA_P(x) TARGET_ISA_FMA_P(x) #define TARGET_SSE4A TARGET_ISA_SSE4A diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt index 9e7bccef4ce..01cdac80b19 100644 --- a/gcc/config/i386/i386.opt +++ b/gcc/config/i386/i386.opt @@ -745,6 +745,10 @@ mavx512vnni Target Report Mask(ISA_AVX512VNNI) Var(ix86_isa_flags) Save Support AVX512VNNI built-in functions and code generation. +mavx512bitalg +Target Report Mask(ISA_AVX512BITALG) Var(ix86_isa_flags2) Save +Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2, AVX512F and AVX512BITALG built-in functions and code generation. + mfma Target Report Mask(ISA_FMA) Var(ix86_isa_flags) Save Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX and FMA built-in functions and code generation. diff --git a/gcc/config/i386/immintrin.h b/gcc/config/i386/immintrin.h index 7fcaa695b0a..0a68501c127 100644 --- a/gcc/config/i386/immintrin.h +++ b/gcc/config/i386/immintrin.h @@ -82,6 +82,10 @@ #include +#include + +#include + #include #include diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 20e7b160ac9..f4f68eb5699 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -181,6 +181,9 @@ ;; For VPCLMULQDQ support UNSPEC_VPCLMULQDQ + + ;; For AVX512BITALG support + UNSPEC_VPSHUFBIT ]) (define_c_enum "unspecv" [ @@ -501,6 +504,10 @@ (V4SI "TARGET_AVX2") (V2DI "TARGET_AVX2") (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")]) +(define_mode_iterator VI48_AVX512VLBW + [(V8DI "TARGET_AVX512BW") (V4DI "TARGET_AVX512VL") + (V2DI "TARGET_AVX512VL")]) + (define_mode_attr avx512 [(V16QI "avx512vl") (V32QI "avx512vl") (V64QI "avx512bw") (V8HI "avx512vl") (V16HI "avx512vl") (V32HI "avx512bw") @@ -20023,9 +20030,9 @@ (set_attr ("mode") ("TI"))]) (define_insn "vpopcount" - [(set (match_operand:VI48_512 0 "register_operand" "=v") - (popcount:VI48_512 - (match_operand:VI48_512 1 "nonimmediate_operand" "vm")))] + [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v") + (popcount:VI48_AVX512VL + (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm")))] "TARGET_AVX512VPOPCNTDQ" "vpopcnt\t{%1, %0|%0, %1}") @@ -20066,6 +20073,13 @@ "TARGET_SSE && TARGET_64BIT" "jmp\t%P1") +(define_insn "vpopcount" + [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v") + (popcount:VI12_AVX512VL + (match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "vm")))] + "TARGET_AVX512BITALG" + "vpopcnt\t{%1, %0|%0, %1}") + (define_insn "vgf2p8affineinvqb_" [(set (match_operand:VI1_AVX512F 0 "register_operand" "=x,x,v") (unspec:VI1_AVX512F [(match_operand:VI1_AVX512F 1 "register_operand" "%0,x,v") @@ -20514,3 +20528,14 @@ "TARGET_VPCLMULQDQ" "vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "mode" "DI")]) + +(define_insn "avx512vl_vpshufbitqmb" + [(set (match_operand: 0 "register_operand" "=Yk") + (unspec: + [(match_operand:VI48_AVX512VLBW 1 "register_operand" "v") + (match_operand:VI48_AVX512VLBW 2 "nonimmediate_operand" "vm")] + UNSPEC_VPSHUFBIT))] + "TARGET_AVX512BITALG" + "vpshufbitqmb\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "prefix" "evex") + (set_attr "mode" "")]) diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 283eab82d05..b15cc447092 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -1208,7 +1208,7 @@ See RS/6000 and PowerPC Options. -mlzcnt -mbmi2 -mfxsr -mxsave -mxsaveopt -mrtm -mlwp -mmpx @gol -mmwaitx -mclzero -mpku -mthreads -mgfni -mvaes @gol -mcet -mibt -mshstk -mforce-indirect-call -mavx512vbmi2 @gol --mvpclmulqdq @gol +-mvpclmulqdq -mavx512bitalg -mavx512vpopcntdq @gol -mms-bitfields -mno-align-stringops -minline-all-stringops @gol -minline-stringops-dynamically -mstringop-strategy=@var{alg} @gol -mmemcpy-strategy=@var{strategy} -mmemset-strategy=@var{strategy} @gol @@ -26165,12 +26165,19 @@ preferred alignment to @option{-mpreferred-stack-boundary=2}. @need 200 @itemx -mvpclmulqdq @opindex mvpclmulqdq +@need 200 +@itemx -mavx512bitalg +@opindex mavx512bitalg +@need 200 +@itemx -mavx512vpopcntdq +@opindex mavx512vpopcntdq These switches enable the use of instructions in the MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, AVX, AVX2, AVX512F, AVX512PF, AVX512ER, AVX512CD, SHA, AES, PCLMUL, FSGSBASE, RDRND, F16C, FMA, SSE4A, FMA4, XOP, LWP, ABM, AVX512VL, AVX512BW, AVX512DQ, AVX512IFMA, AVX512VBMI, BMI, BMI2, VAES, FXSR, XSAVE, XSAVEOPT, LZCNT, RTM, MPX, MWAITX, PKU, IBT, SHSTK, AVX512VBMI2, -GFNI, VPCLMULQDQ, 3DNow!@: or enhanced 3DNow!@: extended instruction sets. +GFNI, VPCLMULQDQ, AVX512BITALG, AVX512VPOPCNTDQ3DNow!@: or enhanced 3DNow!@: +extended instruction sets. Each has a corresponding @option{-mno-} option to disable use of these instructions. diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 91d1102e206..fb9c339ea3f 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,32 @@ +2017-12-22 Julia Koval + Sebastian Peryt + + * g++.dg/other/i386-2.C: Add new options. + * g++.dg/other/i386-3.C: Ditto. + * gcc.target/i386/sse-12.c: Ditto. + * gcc.target/i386/sse-13.c: Ditto. + * gcc.target/i386/sse-22.c: Ditto. + * gcc.target/i386/sse-23.c: Ditto. + * gcc.target/i386/avx512-check.h: Handle bit_AVX512BITALG. + * gcc.target/i386/avx512bitalg-vpopcntb-1.c: New. + * gcc.target/i386/avx512bitalg-vpopcntb.c: Ditto. + * gcc.target/i386/avx512bitalg-vpopcntbvl.c: Ditto. + * gcc.target/i386/avx512bitalg-vpopcntw-1.c: Ditto. + * gcc.target/i386/avx512bitalg-vpopcntw.c: Ditto. + * gcc.target/i386/avx512bitalg-vpopcntwvl.c: Ditto. + * gcc.target/i386/avx512bitalg-vpshufbitqmb-1.c: Ditto. + * gcc.target/i386/avx512bitalg-vpshufbitqmb.c: Ditto. + * gcc.target/i386/avx512bitalgvl-vpopcntb-1.c: Ditto. + * gcc.target/i386/avx512bitalgvl-vpopcntw-1.c: Ditto. + * gcc.target/i386/avx512bitalgvl-vpshufbitqmb-1.c: Ditto. + * gcc.target/i386/avx512vpopcntdqvl-vpopcntd-1.c: Ditto. + * gcc.target/i386/avx512vpopcntdqvl-vpopcntq-1.c: Ditto. + * gcc.target/i386/i386.exp (check_effective_target_avx512bitalg): New. + * gcc.target/i386/avx512vpopcntdq-vpopcntd-1.c: Add more types. + * gcc.target/i386/avx512vpopcntdq-vpopcntd.c: Handle new intrinsics. + * gcc.target/i386/avx512vpopcntdq-vpopcntq-1.c: Ditto. + * gcc.target/i386/avx512vpopcntdq-vpopcntq.c: Ditto. + 2017-12-22 Mike Stump Eric Botcazou diff --git a/gcc/testsuite/g++.dg/other/i386-2.C b/gcc/testsuite/g++.dg/other/i386-2.C index 7e35e686cff..75a8c27bb16 100644 --- a/gcc/testsuite/g++.dg/other/i386-2.C +++ b/gcc/testsuite/g++.dg/other/i386-2.C @@ -1,12 +1,12 @@ /* { dg-do compile { target i?86-*-* x86_64-*-* } } */ -/* { dg-options "-O -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni" } */ - +/* { dg-options "-O -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg" } */ /* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h, fma4intrin.h, xopintrin.h, abmintrin.h, bmiintrin.h, tbmintrin.h, lwpintrin.h, popcntintrin.h, fmaintrin.h, pkuintrin.h, avx5124fmapsintrin.h, avx5124vnniwintrin.h, avx512vpopcntdqintrin.h gfniintrin.h - and mm_malloc.h.h are usable with -O -pedantic-errors. */ + avx512bitalgintrin.h and mm_malloc.h.h are usable with -O + -pedantic-errors. */ #include diff --git a/gcc/testsuite/g++.dg/other/i386-3.C b/gcc/testsuite/g++.dg/other/i386-3.C index 7e44d47a93c..444c246aa32 100644 --- a/gcc/testsuite/g++.dg/other/i386-3.C +++ b/gcc/testsuite/g++.dg/other/i386-3.C @@ -1,10 +1,11 @@ /* { dg-do compile { target i?86-*-* x86_64-*-* } } */ -/* { dg-options "-O -fkeep-inline-functions -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni" } */ +/* { dg-options "-O -fkeep-inline-functions -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg" } */ /* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h, fma4intrin.h, xopintrin.h, abmintrin.h, bmiintrin.h, tbmintrin.h, lwpintrin.h, popcntintrin.h, fmaintrin.h, pkuintrin.h, avx5124fmapsintrin.h, - avx5124vnniwintrin.h, avx512vpopcntdqintrin.h gfniintrin.h and - mm_malloc.h are usable with -O -fkeep-inline-functions. */ + avx5124vnniwintrin.h, avx512vpopcntdqintrin.h gfniintrin.h + avx512bitalgintrin.h and mm_malloc.h are usable with -O + -fkeep-inline-functions. */ #include diff --git a/gcc/testsuite/gcc.target/i386/avx512-check.h b/gcc/testsuite/gcc.target/i386/avx512-check.h index 2d174f9df5d..234e60c818b 100644 --- a/gcc/testsuite/gcc.target/i386/avx512-check.h +++ b/gcc/testsuite/gcc.target/i386/avx512-check.h @@ -75,6 +75,9 @@ main () #ifdef AVX512VPOPCNTDQ && (ecx & bit_AVX512VPOPCNTDQ) #endif +#ifdef AVX512BITALG + && (ecx & bit_AVX512BITALG) +#endif #ifdef GFNI && (ecx & bit_GFNI) #endif diff --git a/gcc/testsuite/gcc.target/i386/avx512bitalg-vpopcntb-1.c b/gcc/testsuite/gcc.target/i386/avx512bitalg-vpopcntb-1.c new file mode 100644 index 00000000000..2c1a9a54ca6 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512bitalg-vpopcntb-1.c @@ -0,0 +1,57 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx512bitalg" } */ +/* { dg-require-effective-target avx512bitalg } */ + +#include "avx512f-helper.h" + +#define AVX512BITALG +#define SIZE (AVX512F_LEN / 8) + +#include "avx512f-mask-type.h" + +#define TYPE char + +int +CALC (TYPE v) +{ + int ret; + int i; + + ret = 0; + for (i = 0; i < sizeof(v) * 8; i++) + if ((v & ((TYPE)1 << (TYPE) i))) + ret++; + + return ret; +} + +void +TEST (void) +{ + UNION_TYPE (AVX512F_LEN, i_b) res1, res2, res3, src, src0; + MASK_TYPE mask = MASK_VALUE; + TYPE res_ref[SIZE]; + src.x = INTRINSIC (_set1_epi8) (0x3D); + int i; + + for (i = 0; i < SIZE; i++) + { + res_ref[i] = CALC (src.a[i]); + src0.a[i] = DEFAULT_VALUE; + } + + res1.x = INTRINSIC (_popcnt_epi8) (src.x); + res2.x = INTRINSIC (_mask_popcnt_epi8) (src.x, mask, src0.x); + res3.x = INTRINSIC (_maskz_popcnt_epi8) (mask, src.x); + + if (UNION_CHECK (AVX512F_LEN, i_b) (res1, res_ref)) + abort (); + + MASK_MERGE (i_b) (res_ref, mask, SIZE); + if (UNION_CHECK (AVX512F_LEN, i_b) (res2, res_ref)) + abort (); + + MASK_ZERO (i_b) (res_ref, mask, SIZE); + if (UNION_CHECK (AVX512F_LEN, i_b) (res3, res_ref)) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512bitalg-vpopcntb.c b/gcc/testsuite/gcc.target/i386/avx512bitalg-vpopcntb.c new file mode 100644 index 00000000000..b23da58dbaf --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512bitalg-vpopcntb.c @@ -0,0 +1,20 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavx512bitalg -mavx512bw" } */ +/* { dg-final { scan-assembler-times "vpopcntb\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vpopcntb\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vpopcntb\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ + +#include + +extern __m512i z, z1; + +int foo () +{ + __mmask16 msk; + __m512i c = _mm512_popcnt_epi8 (z); + asm volatile ("" : "+v" (c)); + c = _mm512_mask_popcnt_epi8 (z, msk, z1); + asm volatile ("" : "+v" (c)); + c = _mm512_maskz_popcnt_epi8 (msk, z); + asm volatile ("" : "+v" (c)); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512bitalg-vpopcntbvl.c b/gcc/testsuite/gcc.target/i386/avx512bitalg-vpopcntbvl.c new file mode 100644 index 00000000000..e6d60f7596c --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512bitalg-vpopcntbvl.c @@ -0,0 +1,31 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavx512bitalg -mavx512bw -mavx512vl" } */ +/* { dg-final { scan-assembler-times "vpopcntb\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vpopcntb\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vpopcntb\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vpopcntb\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vpopcntb\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vpopcntb\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ + +#include + +extern __m256i y, y_1; +extern __m128i x, x_1; + +int foo () +{ + __mmask32 msk32; + __mmask16 msk16; + __m256i c256 = _mm256_popcnt_epi8 (y); + asm volatile ("" : "+v" (c256)); + c256 = _mm256_mask_popcnt_epi8 (y, msk32, y_1); + asm volatile ("" : "+v" (c256)); + c256 = _mm256_maskz_popcnt_epi8 (msk32, y); + asm volatile ("" : "+v" (c256)); + __m128i c128 = _mm_popcnt_epi8 (x); + asm volatile ("" : "+v" (c128)); + c128 = _mm_mask_popcnt_epi8 (x, msk16, x_1); + asm volatile ("" : "+v" (c128)); + c128 = _mm_maskz_popcnt_epi8 (msk16, x); + asm volatile ("" : "+v" (c128)); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512bitalg-vpopcntw-1.c b/gcc/testsuite/gcc.target/i386/avx512bitalg-vpopcntw-1.c new file mode 100644 index 00000000000..500b7f3d1d4 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512bitalg-vpopcntw-1.c @@ -0,0 +1,57 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx512bitalg" } */ +/* { dg-require-effective-target avx512bitalg } */ + +#include "avx512f-helper.h" + +#define AVX512BITALG +#define SIZE (AVX512F_LEN / 16) + +#include "avx512f-mask-type.h" + +#define TYPE short + +int +CALC (TYPE v) +{ + int ret; + int i; + + ret = 0; + for (i = 0; i < sizeof(v) * 8; i++) + if ((v & ((TYPE)1 << (TYPE) i))) + ret++; + + return ret; +} + +void +TEST (void) +{ + UNION_TYPE (AVX512F_LEN, i_w) res1, res2, res3, src, src0; + MASK_TYPE mask = MASK_VALUE; + TYPE res_ref[SIZE]; + src.x = INTRINSIC (_set1_epi8) (0x3D); + int i; + + for (i = 0; i < SIZE; i++) + { + res_ref[i] = CALC (src.a[i]); + src0.a[i] = DEFAULT_VALUE; + } + + res1.x = INTRINSIC (_popcnt_epi16) (src.x); + res2.x = INTRINSIC (_mask_popcnt_epi16) (src.x, mask, src0.x); + res3.x = INTRINSIC (_maskz_popcnt_epi16) (mask, src.x); + + if (UNION_CHECK (AVX512F_LEN, i_w) (res1, res_ref)) + abort (); + + MASK_MERGE (i_w) (res_ref, mask, SIZE); + if (UNION_CHECK (AVX512F_LEN, i_w) (res2, res_ref)) + abort (); + + MASK_ZERO (i_w) (res_ref, mask, SIZE); + if (UNION_CHECK (AVX512F_LEN, i_w) (res3, res_ref)) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512bitalg-vpopcntw.c b/gcc/testsuite/gcc.target/i386/avx512bitalg-vpopcntw.c new file mode 100644 index 00000000000..2c49583b597 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512bitalg-vpopcntw.c @@ -0,0 +1,20 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavx512bitalg -mavx512bw" } */ +/* { dg-final { scan-assembler-times "vpopcntw\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vpopcntw\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vpopcntw\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ + +#include + +extern __m512i z, z1; + +int foo () +{ + __mmask16 msk; + __m512i c = _mm512_popcnt_epi16 (z); + asm volatile ("" : "+v" (c)); + c = _mm512_mask_popcnt_epi16 (z, msk, z1); + asm volatile ("" : "+v" (c)); + c = _mm512_maskz_popcnt_epi16 (msk, z); + asm volatile ("" : "+v" (c)); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512bitalg-vpopcntwvl.c b/gcc/testsuite/gcc.target/i386/avx512bitalg-vpopcntwvl.c new file mode 100644 index 00000000000..b55adc6023a --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512bitalg-vpopcntwvl.c @@ -0,0 +1,31 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavx512bitalg -mavx512bw -mavx512vl" } */ +/* { dg-final { scan-assembler-times "vpopcntw\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vpopcntw\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vpopcntw\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vpopcntw\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vpopcntw\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vpopcntw\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ + +#include + +extern __m256i y, y_1; +extern __m128i x, x_1; + +int foo () +{ + __mmask16 msk16; + __mmask8 msk8; + __m256i c256 = _mm256_popcnt_epi16 (y); + asm volatile ("" : "+v" (c256)); + c256 = _mm256_mask_popcnt_epi16 (y, msk16, y_1); + asm volatile ("" : "+v" (c256)); + c256 = _mm256_maskz_popcnt_epi16 (msk16, y); + asm volatile ("" : "+v" (c256)); + __m128i c128 = _mm_popcnt_epi16 (x); + asm volatile ("" : "+v" (c128)); + c128 = _mm_mask_popcnt_epi16 (x, msk8, x_1); + asm volatile ("" : "+v" (c128)); + c128 = _mm_maskz_popcnt_epi16 (msk8, x); + asm volatile ("" : "+v" (c128)); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512bitalg-vpshufbitqmb-1.c b/gcc/testsuite/gcc.target/i386/avx512bitalg-vpshufbitqmb-1.c new file mode 100644 index 00000000000..2ee6ca64131 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512bitalg-vpshufbitqmb-1.c @@ -0,0 +1,61 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx512bitalg" } */ +/* { dg-require-effective-target avx512bitalg } */ + +#include "avx512f-helper.h" + +#define AVX512BITALG +#define SIZE (AVX512F_LEN / 8) + +#include "avx512f-mask-type.h" + +#define TYPE unsigned long long + +unsigned char +CALC (TYPE a, TYPE b) +{ + unsigned char res = 0; + for (int i = 0; i < 8; i++) + { + unsigned char m = (b >> (64 - ((i+1)*8))) & 0x3F; + unsigned char bit = (a >> m) & 1; + res |= (bit << (8 - i - 1)); + } + + return res; +} + +void +TEST (void) +{ + UNION_TYPE (AVX512F_LEN, i_q) src1, src2; + MASK_TYPE mask = MASK_VALUE; + TYPE res1, res2; + TYPE res_ref = 0; + + src1.x = INTRINSIC (_set1_epi8) (0x13); + src2.x = INTRINSIC (_set1_epi8) (0x17); + + src1.a[0] = 0xff; + src2.a[0] = 0xff; + + for (int i = 0; i < SIZE/8; i++) + { + unsigned long long bit = CALC (src1.a[i], src2.a[i]); + res_ref |= ((unsigned long long)(CALC (src1.a[i], src2.a[i])) << (i*8)); + } + + res1 = INTRINSIC (_bitshuffle_epi64_mask) (src1.x, src2.x); + res2 = INTRINSIC (_mask_bitshuffle_epi64_mask) (mask, src1.x, src2.x); + + if (res1 != res_ref) + abort(); + + for (int i = 0; i < SIZE; i++) + { + if (!((mask >> i) & 1)) + res_ref &= ~((unsigned long long)1 < + +volatile __m128i x128; +volatile __m256i x256; +volatile __m512i x512; + +volatile __mmask16 m16; +volatile __mmask32 m32; +volatile __mmask64 m64; + +void extern +avx512vl_test (void) +{ + m16 = _mm_bitshuffle_epi64_mask (x128, x128); + m32 = _mm256_bitshuffle_epi64_mask (x256, x256); + m64 = _mm512_bitshuffle_epi64_mask (x512, x512); + m16 = _mm_mask_bitshuffle_epi64_mask (m16, x128, x128); + m32 = _mm256_mask_bitshuffle_epi64_mask (m32, x256, x256); + m64 = _mm512_mask_bitshuffle_epi64_mask (m64, x512, x512); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512bitalgvl-vpopcntb-1.c b/gcc/testsuite/gcc.target/i386/avx512bitalgvl-vpopcntb-1.c new file mode 100644 index 00000000000..a4e9d63fc1c --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512bitalgvl-vpopcntb-1.c @@ -0,0 +1,16 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx512vl -mavx512bitalg" } */ +/* { dg-require-effective-target avx512vl } */ +/* { dg-require-effective-target avx512bitalg } */ + +#define AVX512VL +#define AVX512F_LEN 256 +#define AVX512F_LEN_HALF 128 +#include "avx512bitalg-vpopcntb-1.c" + +#undef AVX512F_LEN +#undef AVX512F_LEN_HALF + +#define AVX512F_LEN 128 +#define AVX512F_LEN_HALF 128 +#include "avx512bitalg-vpopcntb-1.c" diff --git a/gcc/testsuite/gcc.target/i386/avx512bitalgvl-vpopcntw-1.c b/gcc/testsuite/gcc.target/i386/avx512bitalgvl-vpopcntw-1.c new file mode 100644 index 00000000000..55fa811fb46 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512bitalgvl-vpopcntw-1.c @@ -0,0 +1,16 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx512vl -mavx512bitalg" } */ +/* { dg-require-effective-target avx512vl } */ +/* { dg-require-effective-target avx512bitalg } */ + +#define AVX512VL +#define AVX512F_LEN 256 +#define AVX512F_LEN_HALF 128 +#include "avx512bitalg-vpopcntw-1.c" + +#undef AVX512F_LEN +#undef AVX512F_LEN_HALF + +#define AVX512F_LEN 128 +#define AVX512F_LEN_HALF 128 +#include "avx512bitalg-vpopcntw-1.c" diff --git a/gcc/testsuite/gcc.target/i386/avx512bitalgvl-vpshufbitqmb-1.c b/gcc/testsuite/gcc.target/i386/avx512bitalgvl-vpshufbitqmb-1.c new file mode 100644 index 00000000000..497e369bf80 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512bitalgvl-vpshufbitqmb-1.c @@ -0,0 +1,16 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx512vl -mavx512bitalg" } */ +/* { dg-require-effective-target avx512vl } */ +/* { dg-require-effective-target avx512bitalg } */ + +#define AVX512VL +#define AVX512F_LEN 256 +#define AVX512F_LEN_HALF 128 +#include "avx512bitalg-vpshufbitqmb-1.c" + +#undef AVX512F_LEN +#undef AVX512F_LEN_HALF + +#define AVX512F_LEN 128 +#define AVX512F_LEN_HALF 128 +#include "avx512bitalg-vpshufbitqmb-1.c" diff --git a/gcc/testsuite/gcc.target/i386/avx512vpopcntdq-vpopcntd-1.c b/gcc/testsuite/gcc.target/i386/avx512vpopcntdq-vpopcntd-1.c index d9faf0adc4e..4fb949fb4c3 100644 --- a/gcc/testsuite/gcc.target/i386/avx512vpopcntdq-vpopcntd-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512vpopcntdq-vpopcntd-1.c @@ -2,17 +2,17 @@ /* { dg-options "-O2 -mavx512vpopcntdq" } */ /* { dg-require-effective-target avx512vpopcntdq } */ -#define AVX512VPOPCNTDQ #include "avx512f-helper.h" +#define AVX512VPOPCNTDQ #define SIZE (AVX512F_LEN / 32) #include "avx512f-mask-type.h" #define TYPE int -static int -compute_popcnt (TYPE v) +int +CALC (TYPE v) { int ret; int i; @@ -31,12 +31,12 @@ TEST (void) UNION_TYPE (AVX512F_LEN, i_d) res1, res2, res3, src, src0; MASK_TYPE mask = MASK_VALUE; TYPE res_ref[SIZE]; - src.x = _mm512_set1_epi8 (0x3D); + src.x = INTRINSIC (_set1_epi8) (0x3D); int i; for (i = 0; i < SIZE; i++) { - res_ref[i] = compute_popcnt (src.a[i]); + res_ref[i] = CALC (src.a[i]); src0.a[i] = DEFAULT_VALUE; } diff --git a/gcc/testsuite/gcc.target/i386/avx512vpopcntdq-vpopcntd.c b/gcc/testsuite/gcc.target/i386/avx512vpopcntdq-vpopcntd.c index c55a05a25fb..c70f226824e 100644 --- a/gcc/testsuite/gcc.target/i386/avx512vpopcntdq-vpopcntd.c +++ b/gcc/testsuite/gcc.target/i386/avx512vpopcntdq-vpopcntd.c @@ -1,19 +1,40 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -mavx512vpopcntdq" } */ +/* { dg-options "-O2 -mavx512vpopcntdq -mavx512vl" } */ +/* { dg-final { scan-assembler-times "vpopcntd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vpopcntd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vpopcntd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vpopcntd\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vpopcntd\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vpopcntd\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vpopcntd\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vpopcntd\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vpopcntd\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ #include -extern __m512i z, z1; +extern __m128i x, x_1; +extern __m256i y, y_1; +extern __m512i z, z_1; int foo () { __mmask16 msk; + __mmask8 msk8; + __m128i a = _mm_popcnt_epi32 (x); + asm volatile ("" : "+v" (a)); + a = _mm_mask_popcnt_epi32 (x, msk8, x_1); + asm volatile ("" : "+v" (a)); + a = _mm_maskz_popcnt_epi32 (msk8, x); + asm volatile ("" : "+v" (a)); + __m256i b = _mm256_popcnt_epi32 (y); + asm volatile ("" : "+v" (b)); + b = _mm256_mask_popcnt_epi32 (y, msk8, y_1); + asm volatile ("" : "+v" (b)); + b = _mm256_maskz_popcnt_epi32 (msk8, y); + asm volatile ("" : "+v" (b)); __m512i c = _mm512_popcnt_epi32 (z); asm volatile ("" : "+v" (c)); - c = _mm512_mask_popcnt_epi32 (z, msk, z1); + c = _mm512_mask_popcnt_epi32 (z, msk, z_1); asm volatile ("" : "+v" (c)); c = _mm512_maskz_popcnt_epi32 (msk, z); asm volatile ("" : "+v" (c)); diff --git a/gcc/testsuite/gcc.target/i386/avx512vpopcntdq-vpopcntq-1.c b/gcc/testsuite/gcc.target/i386/avx512vpopcntdq-vpopcntq-1.c index 5a628210f63..cc0d8b8e036 100644 --- a/gcc/testsuite/gcc.target/i386/avx512vpopcntdq-vpopcntq-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512vpopcntdq-vpopcntq-1.c @@ -2,17 +2,17 @@ /* { dg-options "-O2 -mavx512vpopcntdq" } */ /* { dg-require-effective-target avx512vpopcntdq } */ -#define AVX512VPOPCNTDQ #include "avx512f-helper.h" +#define AVX512VPOPCNTDQ #define SIZE (AVX512F_LEN / 64) #include "avx512f-mask-type.h" #define TYPE long long -static int -compute_popcnt (TYPE v) +int +CALC (TYPE v) { int ret; int i; @@ -31,12 +31,12 @@ TEST (void) UNION_TYPE (AVX512F_LEN, i_q) res1, res2, res3, src, src0; MASK_TYPE mask = MASK_VALUE; TYPE res_ref[SIZE]; - src.x = _mm512_set1_epi8 (0x3D); + src.x = INTRINSIC (_set1_epi8) (0x3D); int i; for (i = 0; i < SIZE; i++) { - res_ref[i] = compute_popcnt (src.a[i]); + res_ref[i] = CALC (src.a[i]); src0.a[i] = DEFAULT_VALUE; } diff --git a/gcc/testsuite/gcc.target/i386/avx512vpopcntdq-vpopcntq.c b/gcc/testsuite/gcc.target/i386/avx512vpopcntdq-vpopcntq.c index 2698ec370ad..9f400c005f3 100644 --- a/gcc/testsuite/gcc.target/i386/avx512vpopcntdq-vpopcntq.c +++ b/gcc/testsuite/gcc.target/i386/avx512vpopcntdq-vpopcntq.c @@ -1,20 +1,40 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -mavx512vpopcntdq" } */ +/* { dg-options "-O2 -mavx512vpopcntdq -mavx512vl" } */ +/* { dg-final { scan-assembler-times "vpopcntq\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vpopcntq\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vpopcntq\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vpopcntq\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vpopcntq\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vpopcntq\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vpopcntq\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vpopcntq\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vpopcntq\[ \\t\]+\[^\{\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ #include -extern __m512i z, z1; +extern __m128i x, x_1; +extern __m256i y, y_1; +extern __m512i z, z_1; int foo () { __mmask8 msk; + __m128i a = _mm_popcnt_epi64 (x); + asm volatile ("" : "+v" (a)); + a = _mm_mask_popcnt_epi64 (x, msk, x_1); + asm volatile ("" : "+v" (a)); + a = _mm_maskz_popcnt_epi64 (msk, x); + asm volatile ("" : "+v" (a)); + __m256i b = _mm256_popcnt_epi64 (y); + asm volatile ("" : "+v" (b)); + b = _mm256_mask_popcnt_epi64 (y, msk, y_1); + asm volatile ("" : "+v" (b)); + b = _mm256_maskz_popcnt_epi64 (msk, y); + asm volatile ("" : "+v" (b)); __m512i c = _mm512_popcnt_epi64 (z); asm volatile ("" : "+v" (c)); - c = _mm512_mask_popcnt_epi64 (z, msk, z1); + c = _mm512_mask_popcnt_epi64 (z, msk, z_1); asm volatile ("" : "+v" (c)); - c = _mm512_maskz_popcnt_epi64 (msk, z); + c = _mm512_maskz_popcnt_epi64 (msk, z); asm volatile ("" : "+v" (c)); } diff --git a/gcc/testsuite/gcc.target/i386/avx512vpopcntdqvl-vpopcntd-1.c b/gcc/testsuite/gcc.target/i386/avx512vpopcntdqvl-vpopcntd-1.c new file mode 100644 index 00000000000..95e43ce1a98 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512vpopcntdqvl-vpopcntd-1.c @@ -0,0 +1,17 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx512vpopcntdq -mavx512bw -mavx512vl" } */ +/* { dg-require-effective-target avx512vl } */ +/* { dg-require-effective-target avx512vpopcntdq } */ +/* { dg-require-effective-target avx512bw } */ + +#define AVX512VL +#define AVX512F_LEN 256 +#define AVX512F_LEN_HALF 128 +#include "avx512vpopcntdq-vpopcntd-1.c" + +#undef AVX512F_LEN +#undef AVX512F_LEN_HALF + +#define AVX512F_LEN 128 +#define AVX512F_LEN_HALF 128 +#include "avx512vpopcntdq-vpopcntd-1.c" diff --git a/gcc/testsuite/gcc.target/i386/avx512vpopcntdqvl-vpopcntq-1.c b/gcc/testsuite/gcc.target/i386/avx512vpopcntdqvl-vpopcntq-1.c new file mode 100644 index 00000000000..6e110e1b565 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512vpopcntdqvl-vpopcntq-1.c @@ -0,0 +1,16 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx512vl -mavx512vpopcntdq" } */ +/* { dg-require-effective-target avx512vl } */ +/* { dg-require-effective-target avx512vpopcntdq } */ + +#define AVX512VL +#define AVX512F_LEN 256 +#define AVX512F_LEN_HALF 128 +#include "avx512vpopcntdq-vpopcntq-1.c" + +#undef AVX512F_LEN +#undef AVX512F_LEN_HALF + +#define AVX512F_LEN 128 +#define AVX512F_LEN_HALF 128 +#include "avx512vpopcntdq-vpopcntq-1.c" diff --git a/gcc/testsuite/gcc.target/i386/i386.exp b/gcc/testsuite/gcc.target/i386/i386.exp index 0b53023945c..79d97c31530 100644 --- a/gcc/testsuite/gcc.target/i386/i386.exp +++ b/gcc/testsuite/gcc.target/i386/i386.exp @@ -483,6 +483,19 @@ proc check_effective_target_vpclmulqdq { } { } "-mvpclmulqdq -mavx512vl" ] } +# Return 1 if avx512_bitalg instructions can be compiled. +proc check_effective_target_avx512bitalg { } { + return [check_no_compiler_messages avx512bitalg object { + typedef int __v32hi __attribute__ ((__vector_size__ (64))); + + __v32hi + _mm512_popcnt_epi16 (__v32hi __A) + { + return (__v32hi) __builtin_ia32_vpopcountd_v32hi ((__v32hi) __A); + } + } "-mavx512bitalg" ] +} + # If a testcase doesn't have special options, use these. global DEFAULT_CFLAGS if ![info exists DEFAULT_CFLAGS] then { diff --git a/gcc/testsuite/gcc.target/i386/sse-12.c b/gcc/testsuite/gcc.target/i386/sse-12.c index 82f5d3c653b..cd4509699b7 100644 --- a/gcc/testsuite/gcc.target/i386/sse-12.c +++ b/gcc/testsuite/gcc.target/i386/sse-12.c @@ -3,7 +3,7 @@ popcntintrin.h gfniintrin.h and mm_malloc.h are usable with -O -std=c89 -pedantic-errors. */ /* { dg-do compile } */ -/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512bw -mavx512dq -mavx512vl -mavx512vbmi -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni" } */ +/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512bw -mavx512dq -mavx512vl -mavx512vbmi -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg" } */ #include diff --git a/gcc/testsuite/gcc.target/i386/sse-13.c b/gcc/testsuite/gcc.target/i386/sse-13.c index 62f87f00b07..cc9d00aa1db 100644 --- a/gcc/testsuite/gcc.target/i386/sse-13.c +++ b/gcc/testsuite/gcc.target/i386/sse-13.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512vl -mavx512dq -mavx512bw -mavx512vbmi -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni" } */ +/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512vl -mavx512dq -mavx512bw -mavx512vbmi -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg" } */ /* { dg-add-options bind_pic_locally } */ #include diff --git a/gcc/testsuite/gcc.target/i386/sse-22.c b/gcc/testsuite/gcc.target/i386/sse-22.c index 3e64e2915ec..99af58a995d 100644 --- a/gcc/testsuite/gcc.target/i386/sse-22.c +++ b/gcc/testsuite/gcc.target/i386/sse-22.c @@ -9,9 +9,9 @@ are defined as inline functions in {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h, fma4intrin.h, xopintrin.h, abmintrin.h, bmiintrin.h, tbmintrin.h, lwpintrin.h, popcntintrin.h, fmaintrin.h, - avx5124fmapsintrin.h, avx5124vnniwintrin.h, avx512vpopcntdqintrin.h and - mm_malloc.h that reference the proper builtin functions. - + avx5124fmapsintrin.h, avx5124vnniwintrin.h, avx512vpopcntdqintrin.h, + avx512bitalgintrin.h and mm_malloc.h that reference the proper builtin + functions. Defining away "extern" and "__inline" results in all of them being compiled as proper functions. */ @@ -101,7 +101,7 @@ #ifndef DIFFERENT_PRAGMAS -#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,avx512vl,avx512bw,avx512dq,avx512vbmi,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni") +#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,avx512vl,avx512bw,avx512dq,avx512vbmi,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni,avx512bitalg") #endif /* Following intrinsics require immediate arguments. They @@ -218,7 +218,7 @@ test_4 (_mm_cmpestrz, int, __m128i, int, __m128i, int, 1) /* immintrin.h (AVX/AVX2/RDRND/FSGSBASE/F16C/RTM/AVX512F/SHA) */ #ifdef DIFFERENT_PRAGMAS -#pragma GCC target ("avx,avx2,rdrnd,fsgsbase,f16c,rtm,avx512f,avx512er,avx512cd,avx512pf,sha,avx512vl,avx512bw,avx512dq,avx512ifma,avx512vbmi,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni") +#pragma GCC target ("avx,avx2,rdrnd,fsgsbase,f16c,rtm,avx512f,avx512er,avx512cd,avx512pf,sha,avx512vl,avx512bw,avx512dq,avx512ifma,avx512vbmi,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni,avx512bitalg") #endif #include test_1 (_cvtss_sh, unsigned short, float, 1) diff --git a/gcc/testsuite/gcc.target/i386/sse-23.c b/gcc/testsuite/gcc.target/i386/sse-23.c index 65f6ccffe25..00d30ba1cdf 100644 --- a/gcc/testsuite/gcc.target/i386/sse-23.c +++ b/gcc/testsuite/gcc.target/i386/sse-23.c @@ -8,9 +8,9 @@ are defined as inline functions in {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h, fma4intrin.h, xopintrin.h, abmintrin.h, bmiintrin.h, tbmintrin.h, lwpintrin.h, popcntintrin.h, fmaintrin.h, - avx5124fmapsintrin.h, avx5124vnniwintrin.h, avx512vpopcntdqintrin.h - and mm_malloc.h that reference the proper builtin functions. - + avx5124fmapsintrin.h, avx5124vnniwintrin.h, avx512vpopcntdqintrin.h, + avx512bitalgintrin.h and mm_malloc.h that reference the proper builtin + functions. Defining away "extern" and "__inline" results in all of them being compiled as proper functions. */ @@ -676,6 +676,6 @@ #define __builtin_ia32_vpclmulqdq_v2di(A, B, C) __builtin_ia32_vpclmulqdq_v2di(A, B, 1) #define __builtin_ia32_vpclmulqdq_v8di(A, B, C) __builtin_ia32_vpclmulqdq_v8di(A, B, 1) -#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,fma,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,xsavec,xsaves,clflushopt,avx512bw,avx512dq,avx512vl,avx512vbmi,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,clwb,mwaitx,clzero,pku,sgx,rdpid,gfni,avx512vbmi2,vpclmulqdq") +#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,fma,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,xsavec,xsaves,clflushopt,avx512bw,avx512dq,avx512vl,avx512vbmi,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,clwb,mwaitx,clzero,pku,sgx,rdpid,gfni,avx512vbmi2,vpclmulqdq,avx512bitalg") #include