From ca813880dcaae71f664d8f386b1a584cfefbbd4b Mon Sep 17 00:00:00 2001 From: liuhongt Date: Tue, 10 Nov 2020 13:01:32 +0800 Subject: [PATCH] Support Intel AVX VNNI 2020-10-13 Hongtao Liu Hongyu Wang gcc/ * common/config/i386/cpuinfo.h (get_available_features): Detect AVXVNNI. * common/config/i386/i386-common.c (OPTION_MASK_ISA2_AVXVNNI_SET, OPTION_MASK_ISA2_AVXVNNI_UNSET): New. (OPTION_MASK_ISA2_AVX2_UNSET): Add AVXVNNI. (ix86_hanlde_option): Handle -mavxvnni, unset avxvnni when avx2 is disabled. * common/config/i386/i386-cpuinfo.h (enum processor_features): Add FEATURE_AVXVNNI. * common/config/i386/i386-isas.h: Add ISA_NAMES_TABLE_ENTRY for avxvnni. * config.gcc: Add avxvnniintrin.h. * config/i386/avx512vnnivlintrin.h: Reimplement 128/256 bit non-mask intrinsics with macros to support unified interface. * config/i386/avxvnniintrin.h: New header file. * config/i386/cpuid.h (bit_AVXVNNI): New. * config/i386/i386-builtins.c (def_builtin): Handle AVXVNNI mask for unified builtin. * config/i386/i386-builtin.def (BDESC): Adjust AVX512VNNI builtins for AVXVNNI. * config/i386/i386-c.c (ix86_target_macros_internal): Define __AVXVNNI__. * config/i386/i386-expand.c (ix86_expand_builtin): Handle bisa for AVXVNNI to support unified intrinsic name, since there is no dependency between AVX512VNNI and AVXVNNI. * config/i386/i386-options.c (isa2_opts): Add -mavxvnni. (ix86_valid_target_attribute_inner_p): Handle avxnnni. (ix86_option_override_internal): Ditto. * config/i386/i386.h (TARGET_AVXVNNI, TARGET_AVXVNNI_P, TARGET_AVXVNNI_P, PTA_AVXVNNI): New. (PTA_SAPPHIRERAPIDS): Add AVX_VNNI. (PTA_ALDERLAKE): Likewise. * config/i386/i386.md ("isa"): Add avxvnni, avx512vnnivl. ("enabled"): Adjust for avxvnni and avx512vnnivl. * config/i386/i386.opt: Add option -mavxvnni. * config/i386/immintrin.h: Include avxvnniintrin.h. * config/i386/sse.md (vpdpbusd_): Adjust for AVXVNNI. (vpdpbusds_): Likewise. (vpdpwssd_): Likewise. (vpdpwssds_): Likewise. (vpdpbusd_v16si): New. (vpdpbusds_v16si): Likewise. (vpdpwssd_v16si): Likewise. (vpdpwssds_v16si): Likewise. * doc/invoke.texi: Document -mavxvnni. * doc/extend.texi: Document avxvnni. * doc/sourcebuild.texi: Document target avxvnni. gcc/testsuite/ * gcc.target/i386/avx512vl-vnni-1.c: Rename.. * gcc.target/i386/avx512vl-vnni-1a.c: To This. * gcc.target/i386/avx512vl-vnni-1b.c: New test. * gcc.target/i386/avx512vl-vnni-2.c: Ditto. * gcc.target/i386/avx512vl-vnni-3.c: Ditto. * gcc.target/i386/avx-vnni-1.c: Ditto. * gcc.target/i386/avx-vnni-2.c: Ditto. * gcc.target/i386/avx-vnni-3.c: Ditto. * gcc.target/i386/avx-vnni-4.c: Ditto. * gcc.target/i386/avx-vnni-5.c: Ditto. * gcc.target/i386/avx-vnni-6.c: Ditto. * gcc.target/i386/avx-vpdpbusd-2.c: Ditto. * gcc.target/i386/avx-vpdpbusds-2.c: Ditto. * gcc.target/i386/avx-vpdpwssd-2.c: Ditto. * gcc.target/i386/avx-vpdpwssds-2.c: Ditto. * gcc.target/i386/vnni_inline_error.c: Ditto. * gcc.target/i386/avx512vnnivl-builtin.c: Ditto. * gcc.target/i386/avxvnni-builtin.c: Ditto. * gcc.target/i386/funcspec-56.inc: Add new target attribute. * gcc.target/i386/sse-12.c: Add -mavxvnni. * gcc.target/i386/sse-13.c: Ditto. * gcc.target/i386/sse-14.c: Ditto. * gcc.target/i386/sse-22.c: Ditto. * gcc.target/i386/sse-23.c: Ditto. * g++.dg/other/i386-2.C: Ditto. * g++.dg/other/i386-3.C: Ditto. * lib/target-supports.exp (check_effective_target_avxvnni): New proc. --- gcc/common/config/i386/cpuinfo.h | 2 + gcc/common/config/i386/i386-common.c | 20 ++- gcc/common/config/i386/i386-cpuinfo.h | 1 + gcc/common/config/i386/i386-isas.h | 1 + gcc/config.gcc | 4 +- gcc/config/i386/avx512vnnivlintrin.h | 88 +++++-------- gcc/config/i386/avxvnniintrin.h | 113 +++++++++++++++++ gcc/config/i386/cpuid.h | 1 + gcc/config/i386/i386-builtin.def | 18 +-- gcc/config/i386/i386-builtins.c | 4 + gcc/config/i386/i386-c.c | 2 + gcc/config/i386/i386-expand.c | 13 ++ gcc/config/i386/i386-options.c | 8 +- gcc/config/i386/i386.h | 7 +- gcc/config/i386/i386.md | 5 +- gcc/config/i386/i386.opt | 5 + gcc/config/i386/immintrin.h | 2 + gcc/config/i386/sse.md | 117 +++++++++++++----- gcc/doc/extend.texi | 5 + gcc/doc/invoke.texi | 11 +- gcc/doc/sourcebuild.texi | 3 + gcc/testsuite/g++.dg/other/i386-2.C | 2 +- gcc/testsuite/g++.dg/other/i386-3.C | 2 +- gcc/testsuite/gcc.target/i386/avx-vnni-1.c | 29 +++++ gcc/testsuite/gcc.target/i386/avx-vnni-2.c | 30 +++++ gcc/testsuite/gcc.target/i386/avx-vnni-3.c | 16 +++ gcc/testsuite/gcc.target/i386/avx-vnni-4.c | 16 +++ gcc/testsuite/gcc.target/i386/avx-vnni-5.c | 29 +++++ gcc/testsuite/gcc.target/i386/avx-vnni-6.c | 29 +++++ .../gcc.target/i386/avx-vpdpbusd-2.c | 74 +++++++++++ .../gcc.target/i386/avx-vpdpbusds-2.c | 74 +++++++++++ .../gcc.target/i386/avx-vpdpwssd-2.c | 70 +++++++++++ .../gcc.target/i386/avx-vpdpwssds-2.c | 70 +++++++++++ .../{avx512vl-vnni-1.c => avx512vl-vnni-1a.c} | 0 .../gcc.target/i386/avx512vl-vnni-1b.c | 69 +++++++++++ .../gcc.target/i386/avx512vl-vnni-2.c | 30 +++++ .../gcc.target/i386/avx512vl-vnni-3.c | 47 +++++++ .../gcc.target/i386/avx512vnnivl-builtin.c | 8 ++ .../gcc.target/i386/avxvnni-builtin.c | 8 ++ gcc/testsuite/gcc.target/i386/funcspec-56.inc | 2 + gcc/testsuite/gcc.target/i386/sse-12.c | 2 +- gcc/testsuite/gcc.target/i386/sse-13.c | 2 +- gcc/testsuite/gcc.target/i386/sse-14.c | 2 +- gcc/testsuite/gcc.target/i386/sse-22.c | 4 +- gcc/testsuite/gcc.target/i386/sse-23.c | 2 +- .../gcc.target/i386/vnni_inline_error.c | 13 ++ gcc/testsuite/lib/target-supports.exp | 12 ++ 47 files changed, 956 insertions(+), 116 deletions(-) create mode 100644 gcc/config/i386/avxvnniintrin.h create mode 100644 gcc/testsuite/gcc.target/i386/avx-vnni-1.c create mode 100644 gcc/testsuite/gcc.target/i386/avx-vnni-2.c create mode 100644 gcc/testsuite/gcc.target/i386/avx-vnni-3.c create mode 100644 gcc/testsuite/gcc.target/i386/avx-vnni-4.c create mode 100644 gcc/testsuite/gcc.target/i386/avx-vnni-5.c create mode 100644 gcc/testsuite/gcc.target/i386/avx-vnni-6.c create mode 100644 gcc/testsuite/gcc.target/i386/avx-vpdpbusd-2.c create mode 100644 gcc/testsuite/gcc.target/i386/avx-vpdpbusds-2.c create mode 100644 gcc/testsuite/gcc.target/i386/avx-vpdpwssd-2.c create mode 100644 gcc/testsuite/gcc.target/i386/avx-vpdpwssds-2.c rename gcc/testsuite/gcc.target/i386/{avx512vl-vnni-1.c => avx512vl-vnni-1a.c} (100%) create mode 100644 gcc/testsuite/gcc.target/i386/avx512vl-vnni-1b.c create mode 100644 gcc/testsuite/gcc.target/i386/avx512vl-vnni-2.c create mode 100644 gcc/testsuite/gcc.target/i386/avx512vl-vnni-3.c create mode 100644 gcc/testsuite/gcc.target/i386/avx512vnnivl-builtin.c create mode 100644 gcc/testsuite/gcc.target/i386/avxvnni-builtin.c create mode 100644 gcc/testsuite/gcc.target/i386/vnni_inline_error.c diff --git a/gcc/common/config/i386/cpuinfo.h b/gcc/common/config/i386/cpuinfo.h index 7a93e170608..41728a2fceb 100644 --- a/gcc/common/config/i386/cpuinfo.h +++ b/gcc/common/config/i386/cpuinfo.h @@ -713,6 +713,8 @@ get_available_features (struct __processor_model *cpu_model, set_feature (FEATURE_AVX512BF16); if (eax & bit_HRESET) set_feature (FEATURE_HRESET); + if (eax & bit_AVXVNNI) + set_feature (FEATURE_AVXVNNI); } } diff --git a/gcc/common/config/i386/i386-common.c b/gcc/common/config/i386/i386-common.c index e29320d68cc..8f809c1604c 100644 --- a/gcc/common/config/i386/i386-common.c +++ b/gcc/common/config/i386/i386-common.c @@ -84,6 +84,7 @@ along with GCC; see the file COPYING3. If not see (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512F_SET) #define OPTION_MASK_ISA_AVX512VNNI_SET \ (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512F_SET) +#define OPTION_MASK_ISA2_AVXVNNI_SET OPTION_MASK_ISA2_AVXVNNI #define OPTION_MASK_ISA_AVX512VPOPCNTDQ_SET \ (OPTION_MASK_ISA_AVX512VPOPCNTDQ | OPTION_MASK_ISA_AVX512F_SET) #define OPTION_MASK_ISA_AVX512BITALG_SET \ @@ -206,6 +207,8 @@ along with GCC; see the file COPYING3. If not see #define OPTION_MASK_ISA_XSAVEOPT_UNSET OPTION_MASK_ISA_XSAVEOPT #define OPTION_MASK_ISA_AVX2_UNSET \ (OPTION_MASK_ISA_AVX2 | OPTION_MASK_ISA_AVX512F_UNSET) +#define OPTION_MASK_ISA2_AVX2_UNSET \ + (OPTION_MASK_ISA2_AVXVNNI_UNSET | OPTION_MASK_ISA2_AVX512F_UNSET) #define OPTION_MASK_ISA_AVX512F_UNSET \ (OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_AVX512CD_UNSET \ | OPTION_MASK_ISA_AVX512PF_UNSET | OPTION_MASK_ISA_AVX512ER_UNSET \ @@ -228,6 +231,7 @@ along with GCC; see the file COPYING3. If not see #define OPTION_MASK_ISA2_AVX5124VNNIW_UNSET OPTION_MASK_ISA2_AVX5124VNNIW #define OPTION_MASK_ISA_AVX512VBMI2_UNSET OPTION_MASK_ISA_AVX512VBMI2 #define OPTION_MASK_ISA_AVX512VNNI_UNSET OPTION_MASK_ISA_AVX512VNNI +#define OPTION_MASK_ISA2_AVXVNNI_UNSET OPTION_MASK_ISA2_AVXVNNI #define OPTION_MASK_ISA_AVX512VPOPCNTDQ_UNSET OPTION_MASK_ISA_AVX512VPOPCNTDQ #define OPTION_MASK_ISA_AVX512BITALG_UNSET OPTION_MASK_ISA_AVX512BITALG #define OPTION_MASK_ISA2_AVX512BF16_UNSET OPTION_MASK_ISA2_AVX512BF16 @@ -310,7 +314,6 @@ along with GCC; see the file COPYING3. If not see | OPTION_MASK_ISA2_AVX512VP2INTERSECT_UNSET) #define OPTION_MASK_ISA2_GENERAL_REGS_ONLY_UNSET \ (OPTION_MASK_ISA2_AVX512F_UNSET) -#define OPTION_MASK_ISA2_AVX2_UNSET OPTION_MASK_ISA2_AVX512F_UNSET #define OPTION_MASK_ISA2_AVX_UNSET OPTION_MASK_ISA2_AVX2_UNSET #define OPTION_MASK_ISA2_SSE4_2_UNSET OPTION_MASK_ISA2_AVX_UNSET #define OPTION_MASK_ISA2_SSE4_1_UNSET OPTION_MASK_ISA2_SSE4_2_UNSET @@ -882,6 +885,21 @@ ix86_handle_option (struct gcc_options *opts, } return true; + case OPT_mavxvnni: + if (value) + { + opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA2_AVXVNNI_SET; + opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_AVXVNNI_SET; + opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX2_SET; + opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX2_SET; + } + else + { + opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA2_AVXVNNI_UNSET; + opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_AVXVNNI_UNSET; + } + return true; + case OPT_msgx: if (value) { diff --git a/gcc/common/config/i386/i386-cpuinfo.h b/gcc/common/config/i386/i386-cpuinfo.h index 2138220aba2..af02be57812 100644 --- a/gcc/common/config/i386/i386-cpuinfo.h +++ b/gcc/common/config/i386/i386-cpuinfo.h @@ -224,6 +224,7 @@ enum processor_features FEATURE_KL, FEATURE_AESKLE, FEATURE_WIDEKL, + FEATURE_AVXVNNI, CPU_FEATURE_MAX }; diff --git a/gcc/common/config/i386/i386-isas.h b/gcc/common/config/i386/i386-isas.h index 921db066471..c4fd0363da2 100644 --- a/gcc/common/config/i386/i386-isas.h +++ b/gcc/common/config/i386/i386-isas.h @@ -168,4 +168,5 @@ ISA_NAMES_TABLE_START ISA_NAMES_TABLE_ENTRY("kl", FEATURE_KL, P_NONE, "-mkl") ISA_NAMES_TABLE_ENTRY("aeskle", FEATURE_AESKLE, P_NONE, NULL) ISA_NAMES_TABLE_ENTRY("widekl", FEATURE_WIDEKL, P_NONE, "-mwidekl") + ISA_NAMES_TABLE_ENTRY("avxvnni", FEATURE_AVXVNNI, P_NONE, "-mavxvnni") ISA_NAMES_TABLE_END diff --git a/gcc/config.gcc b/gcc/config.gcc index dc6d68bd4eb..15318b20ce0 100644 --- a/gcc/config.gcc +++ b/gcc/config.gcc @@ -414,7 +414,7 @@ i[34567]86-*-*) avx512vp2intersectintrin.h avx512vp2intersectvlintrin.h tsxldtrkintrin.h amxtileintrin.h amxint8intrin.h amxbf16intrin.h x86gprintrin.h uintrintrin.h - hresetintrin.h keylockerintrin.h" + hresetintrin.h keylockerintrin.h avxvnniintrin.h" ;; x86_64-*-*) cpu_type=i386 @@ -451,7 +451,7 @@ x86_64-*-*) avx512vp2intersectintrin.h avx512vp2intersectvlintrin.h tsxldtrkintrin.h amxtileintrin.h amxint8intrin.h amxbf16intrin.h x86gprintrin.h uintrintrin.h - hresetintrin.h keylockerintrin.h" + hresetintrin.h keylockerintrin.h avxvnniintrin.h" ;; ia64-*-*) extra_headers=ia64intrin.h diff --git a/gcc/config/i386/avx512vnnivlintrin.h b/gcc/config/i386/avx512vnnivlintrin.h index b4a6db37ba4..3845b038c9e 100644 --- a/gcc/config/i386/avx512vnnivlintrin.h +++ b/gcc/config/i386/avx512vnnivlintrin.h @@ -34,13 +34,10 @@ #define __DISABLE_AVX512VNNIVL__ #endif /* __AVX512VNNIVL__ */ -extern __inline __m256i -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_dpbusd_epi32 (__m256i __A, __m256i __B, __m256i __C) -{ - return (__m256i) __builtin_ia32_vpdpbusd_v8si ((__v8si)__A, (__v8si) __B, - (__v8si) __C); -} +#define _mm256_dpbusd_epi32(A, B, C) \ + ((__m256i) __builtin_ia32_vpdpbusd_v8si ((__v8si) (A), \ + (__v8si) (B), \ + (__v8si) (C))) extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) @@ -58,13 +55,10 @@ _mm256_maskz_dpbusd_epi32 (__mmask8 __A, __m256i __B, __m256i __C, __m256i __D) (__v8si) __C, (__v8si) __D, (__mmask8)__A); } -extern __inline __m128i -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm_dpbusd_epi32 (__m128i __A, __m128i __B, __m128i __C) -{ - return (__m128i) __builtin_ia32_vpdpbusd_v4si ((__v4si)__A, (__v4si) __B, - (__v4si) __C); -} +#define _mm_dpbusd_epi32(A, B, C) \ + ((__m128i) __builtin_ia32_vpdpbusd_v4si ((__v4si) (A), \ + (__v4si) (B), \ + (__v4si) (C))) extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) @@ -82,13 +76,10 @@ _mm_maskz_dpbusd_epi32 (__mmask8 __A, __m128i __B, __m128i __C, __m128i __D) (__v4si) __C, (__v4si) __D, (__mmask8)__A); } -extern __inline __m256i -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_dpbusds_epi32 (__m256i __A, __m256i __B, __m256i __C) -{ - return (__m256i) __builtin_ia32_vpdpbusds_v8si ((__v8si)__A, (__v8si) __B, - (__v8si) __C); -} +#define _mm256_dpbusds_epi32(A, B, C) \ + ((__m256i) __builtin_ia32_vpdpbusds_v8si ((__v8si) (A), \ + (__v8si) (B), \ + (__v8si) (C))) extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) @@ -107,13 +98,10 @@ _mm256_maskz_dpbusds_epi32 (__mmask8 __A, __m256i __B, __m256i __C, (__v8si) __C, (__v8si) __D, (__mmask8)__A); } -extern __inline __m128i -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm_dpbusds_epi32 (__m128i __A, __m128i __B, __m128i __C) -{ - return (__m128i) __builtin_ia32_vpdpbusds_v4si ((__v4si)__A, (__v4si) __B, - (__v4si) __C); -} +#define _mm_dpbusds_epi32(A, B, C) \ + ((__m128i) __builtin_ia32_vpdpbusds_v4si ((__v4si) (A), \ + (__v4si) (B), \ + (__v4si) (C))) extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) @@ -131,13 +119,10 @@ _mm_maskz_dpbusds_epi32 (__mmask8 __A, __m128i __B, __m128i __C, __m128i __D) (__v4si) __C, (__v4si) __D, (__mmask8)__A); } -extern __inline __m256i -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_dpwssd_epi32 (__m256i __A, __m256i __B, __m256i __C) -{ - return (__m256i) __builtin_ia32_vpdpwssd_v8si ((__v8si)__A, (__v8si) __B, - (__v8si) __C); -} +#define _mm256_dpwssd_epi32(A, B, C) \ + ((__m256i) __builtin_ia32_vpdpwssd_v8si ((__v8si) (A), \ + (__v8si) (B), \ + (__v8si) (C))) extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) @@ -155,13 +140,10 @@ _mm256_maskz_dpwssd_epi32 (__mmask8 __A, __m256i __B, __m256i __C, __m256i __D) (__v8si) __C, (__v8si) __D, (__mmask8)__A); } -extern __inline __m128i -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm_dpwssd_epi32 (__m128i __A, __m128i __B, __m128i __C) -{ - return (__m128i) __builtin_ia32_vpdpwssd_v4si ((__v4si)__A, (__v4si) __B, - (__v4si) __C); -} +#define _mm_dpwssd_epi32(A, B, C) \ + ((__m128i) __builtin_ia32_vpdpwssd_v4si ((__v4si) (A), \ + (__v4si) (B), \ + (__v4si) (C))) extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) @@ -179,13 +161,10 @@ _mm_maskz_dpwssd_epi32 (__mmask8 __A, __m128i __B, __m128i __C, __m128i __D) (__v4si) __C, (__v4si) __D, (__mmask8)__A); } -extern __inline __m256i -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_dpwssds_epi32 (__m256i __A, __m256i __B, __m256i __C) -{ - return (__m256i) __builtin_ia32_vpdpwssds_v8si ((__v8si)__A, (__v8si) __B, - (__v8si) __C); -} +#define _mm256_dpwssds_epi32(A, B, C) \ + ((__m256i) __builtin_ia32_vpdpwssds_v8si ((__v8si) (A), \ + (__v8si) (B), \ + (__v8si) (C))) extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) @@ -204,13 +183,10 @@ _mm256_maskz_dpwssds_epi32 (__mmask8 __A, __m256i __B, __m256i __C, (__v8si) __C, (__v8si) __D, (__mmask8)__A); } -extern __inline __m128i -__attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm_dpwssds_epi32 (__m128i __A, __m128i __B, __m128i __C) -{ - return (__m128i) __builtin_ia32_vpdpwssds_v4si ((__v4si)__A, (__v4si) __B, - (__v4si) __C); -} +#define _mm_dpwssds_epi32(A, B, C) \ + ((__m128i) __builtin_ia32_vpdpwssds_v4si ((__v4si) (A), \ + (__v4si) (B), \ + (__v4si) (C))) extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) diff --git a/gcc/config/i386/avxvnniintrin.h b/gcc/config/i386/avxvnniintrin.h new file mode 100644 index 00000000000..de7e6a906e7 --- /dev/null +++ b/gcc/config/i386/avxvnniintrin.h @@ -0,0 +1,113 @@ +/* Copyright (C) 2020 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +#ifndef _IMMINTRIN_H_INCLUDED +#error "Never use directly; include instead." +#endif + +#ifndef _AVXVNNIINTRIN_H_INCLUDED +#define _AVXVNNIINTRIN_H_INCLUDED + +#if !defined(__AVXVNNI__) +#pragma GCC push_options +#pragma GCC target("avxvnni") +#define __DISABLE_AVXVNNIVL__ +#endif /* __AVXVNNIVL__ */ + +extern __inline __m256i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_dpbusd_avx_epi32(__m256i __A, __m256i __B, __m256i __C) +{ + return (__m256i) __builtin_ia32_vpdpbusd_v8si ((__v8si) __A, + (__v8si) __B, + (__v8si) __C); +} + +extern __inline __m128i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_dpbusd_avx_epi32(__m128i __A, __m128i __B, __m128i __C) +{ + return (__m128i) __builtin_ia32_vpdpbusd_v4si ((__v4si) __A, + (__v4si) __B, + (__v4si) __C); +} + +extern __inline __m256i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_dpbusds_avx_epi32(__m256i __A, __m256i __B, __m256i __C) +{ + return (__m256i) __builtin_ia32_vpdpbusds_v8si ((__v8si) __A, + (__v8si) __B, + (__v8si) __C); +} + +extern __inline __m128i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_dpbusds_avx_epi32(__m128i __A,__m128i __B,__m128i __C) +{ + return (__m128i) __builtin_ia32_vpdpbusds_v4si ((__v4si) __A, + (__v4si) __B, + (__v4si) __C); +} + +extern __inline __m256i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_dpwssd_avx_epi32(__m256i __A,__m256i __B,__m256i __C) +{ + return (__m256i) __builtin_ia32_vpdpwssd_v8si ((__v8si) __A, + (__v8si) __B, + (__v8si) __C); +} + +extern __inline __m128i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_dpwssd_avx_epi32(__m128i __A,__m128i __B,__m128i __C) +{ + return (__m128i) __builtin_ia32_vpdpwssd_v4si ((__v4si) __A, + (__v4si) __B, + (__v4si) __C); +} + +extern __inline __m256i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_dpwssds_avx_epi32(__m256i __A,__m256i __B,__m256i __C) +{ + return (__m256i) __builtin_ia32_vpdpwssds_v8si ((__v8si) __A, + (__v8si) __B, + (__v8si) __C); +} + +extern __inline __m128i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_dpwssds_avx_epi32(__m128i __A,__m128i __B,__m128i __C) +{ + return (__m128i) __builtin_ia32_vpdpwssds_v4si ((__v4si) __A, + (__v4si) __B, + (__v4si) __C); +} + +#ifdef __DISABLE_AVXVNNIVL__ +#undef __DISABLE_AVXVNNIVL__ +#pragma GCC pop_options +#endif /* __DISABLE_AVXVNNIVL__ */ +#endif /* _AVXVNNIINTRIN_H_INCLUDED */ diff --git a/gcc/config/i386/cpuid.h b/gcc/config/i386/cpuid.h index 595b4238ba5..d2d42f71a63 100644 --- a/gcc/config/i386/cpuid.h +++ b/gcc/config/i386/cpuid.h @@ -25,6 +25,7 @@ #define _CPUID_H_INCLUDED /* %eax */ +#define bit_AVXVNNI (1 << 4) #define bit_AVX512BF16 (1 << 5) #define bit_HRESET (1 << 22) diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def index 4d38ceab087..67d5f2efc74 100644 --- a/gcc/config/i386/i386-builtin.def +++ b/gcc/config/i386/i386-builtin.def @@ -2626,45 +2626,45 @@ BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512B BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_vgf2p8mulb_v16qi, "__builtin_ia32_vgf2p8mulb_v16qi", IX86_BUILTIN_VGF2P8MULB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI) BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vgf2p8mulb_v16qi_mask, "__builtin_ia32_vgf2p8mulb_v16qi_mask", IX86_BUILTIN_VGF2P8MULB128MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI) -/* VNNI */ +/* AVX512_VNNI */ BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpbusd_v16si, "__builtin_ia32_vpdpbusd_v16si", IX86_BUILTIN_VPDPBUSDV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI) BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpbusd_v16si_mask, "__builtin_ia32_vpdpbusd_v16si_mask", IX86_BUILTIN_VPDPBUSDV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpbusd_v16si_maskz, "__builtin_ia32_vpdpbusd_v16si_maskz", IX86_BUILTIN_VPDPBUSDV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusd_v8si, "__builtin_ia32_vpdpbusd_v8si", IX86_BUILTIN_VPDPBUSDV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI) +BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVXVNNI, CODE_FOR_vpdpbusd_v8si, "__builtin_ia32_vpdpbusd_v8si", IX86_BUILTIN_VPDPBUSDV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI) BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusd_v8si_mask, "__builtin_ia32_vpdpbusd_v8si_mask", IX86_BUILTIN_VPDPBUSDV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI) BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusd_v8si_maskz, "__builtin_ia32_vpdpbusd_v8si_maskz", IX86_BUILTIN_VPDPBUSDV8SI_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI) -BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusd_v4si, "__builtin_ia32_vpdpbusd_v4si", IX86_BUILTIN_VPDPBUSDV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI) +BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVXVNNI, CODE_FOR_vpdpbusd_v4si, "__builtin_ia32_vpdpbusd_v4si", IX86_BUILTIN_VPDPBUSDV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI) BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusd_v4si_mask, "__builtin_ia32_vpdpbusd_v4si_mask", IX86_BUILTIN_VPDPBUSDV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI) BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusd_v4si_maskz, "__builtin_ia32_vpdpbusd_v4si_maskz", IX86_BUILTIN_VPDPBUSDV4SI_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI) BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpbusds_v16si, "__builtin_ia32_vpdpbusds_v16si", IX86_BUILTIN_VPDPBUSDSV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI) BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpbusds_v16si_mask, "__builtin_ia32_vpdpbusds_v16si_mask", IX86_BUILTIN_VPDPBUSDSV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpbusds_v16si_maskz, "__builtin_ia32_vpdpbusds_v16si_maskz", IX86_BUILTIN_VPDPBUSDSV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusds_v8si, "__builtin_ia32_vpdpbusds_v8si", IX86_BUILTIN_VPDPBUSDSV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI) +BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVXVNNI, CODE_FOR_vpdpbusds_v8si, "__builtin_ia32_vpdpbusds_v8si", IX86_BUILTIN_VPDPBUSDSV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI) BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusds_v8si_mask, "__builtin_ia32_vpdpbusds_v8si_mask", IX86_BUILTIN_VPDPBUSDSV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI) BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusds_v8si_maskz, "__builtin_ia32_vpdpbusds_v8si_maskz", IX86_BUILTIN_VPDPBUSDSV8SI_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI) -BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusds_v4si, "__builtin_ia32_vpdpbusds_v4si", IX86_BUILTIN_VPDPBUSDSV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI) +BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVXVNNI, CODE_FOR_vpdpbusds_v4si, "__builtin_ia32_vpdpbusds_v4si", IX86_BUILTIN_VPDPBUSDSV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI) BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusds_v4si_mask, "__builtin_ia32_vpdpbusds_v4si_mask", IX86_BUILTIN_VPDPBUSDSV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI) BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusds_v4si_maskz, "__builtin_ia32_vpdpbusds_v4si_maskz", IX86_BUILTIN_VPDPBUSDSV4SI_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI) BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpwssd_v16si, "__builtin_ia32_vpdpwssd_v16si", IX86_BUILTIN_VPDPWSSDV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI) BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpwssd_v16si_mask, "__builtin_ia32_vpdpwssd_v16si_mask", IX86_BUILTIN_VPDPWSSDV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpwssd_v16si_maskz, "__builtin_ia32_vpdpwssd_v16si_maskz", IX86_BUILTIN_VPDPWSSDV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssd_v8si, "__builtin_ia32_vpdpwssd_v8si", IX86_BUILTIN_VPDPWSSDV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI) +BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVXVNNI, CODE_FOR_vpdpwssd_v8si, "__builtin_ia32_vpdpwssd_v8si", IX86_BUILTIN_VPDPWSSDV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI) BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssd_v8si_mask, "__builtin_ia32_vpdpwssd_v8si_mask", IX86_BUILTIN_VPDPWSSDV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI) BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssd_v8si_maskz, "__builtin_ia32_vpdpwssd_v8si_maskz", IX86_BUILTIN_VPDPWSSDV8SI_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI) -BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssd_v4si, "__builtin_ia32_vpdpwssd_v4si", IX86_BUILTIN_VPDPWSSDV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI) +BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVXVNNI, CODE_FOR_vpdpwssd_v4si, "__builtin_ia32_vpdpwssd_v4si", IX86_BUILTIN_VPDPWSSDV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI) BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssd_v4si_mask, "__builtin_ia32_vpdpwssd_v4si_mask", IX86_BUILTIN_VPDPWSSDV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI) BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssd_v4si_maskz, "__builtin_ia32_vpdpwssd_v4si_maskz", IX86_BUILTIN_VPDPWSSDV4SI_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI) BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpwssds_v16si, "__builtin_ia32_vpdpwssds_v16si", IX86_BUILTIN_VPDPWSSDSV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI) BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpwssds_v16si_mask, "__builtin_ia32_vpdpwssds_v16si_mask", IX86_BUILTIN_VPDPWSSDSV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpwssds_v16si_maskz, "__builtin_ia32_vpdpwssds_v16si_maskz", IX86_BUILTIN_VPDPWSSDSV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI) -BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssds_v8si, "__builtin_ia32_vpdpwssds_v8si", IX86_BUILTIN_VPDPWSSDSV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI) +BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVXVNNI, CODE_FOR_vpdpwssds_v8si, "__builtin_ia32_vpdpwssds_v8si", IX86_BUILTIN_VPDPWSSDSV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI) BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssds_v8si_mask, "__builtin_ia32_vpdpwssds_v8si_mask", IX86_BUILTIN_VPDPWSSDSV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI) BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssds_v8si_maskz, "__builtin_ia32_vpdpwssds_v8si_maskz", IX86_BUILTIN_VPDPWSSDSV8SI_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI) -BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssds_v4si, "__builtin_ia32_vpdpwssds_v4si", IX86_BUILTIN_VPDPWSSDSV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI) +BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVXVNNI, CODE_FOR_vpdpwssds_v4si, "__builtin_ia32_vpdpwssds_v4si", IX86_BUILTIN_VPDPWSSDSV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI) BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssds_v4si_mask, "__builtin_ia32_vpdpwssds_v4si_mask", IX86_BUILTIN_VPDPWSSDSV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI) BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssds_v4si_maskz, "__builtin_ia32_vpdpwssds_v4si_maskz", IX86_BUILTIN_VPDPWSSDSV4SI_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI) diff --git a/gcc/config/i386/i386-builtins.c b/gcc/config/i386/i386-builtins.c index 504987a5410..d8ec1e5900a 100644 --- a/gcc/config/i386/i386-builtins.c +++ b/gcc/config/i386/i386-builtins.c @@ -274,6 +274,10 @@ def_builtin (HOST_WIDE_INT mask, HOST_WIDE_INT mask2, if (((mask2 == 0 || (mask2 & ix86_isa_flags2) != 0) && (mask == 0 || (mask & ix86_isa_flags) != 0)) || ((mask & OPTION_MASK_ISA_MMX) != 0 && TARGET_MMX_WITH_SSE) + /* "Unified" builtin used by either AVXVNNI intrinsics or AVX512VNNIVL + non-mask intrinsics should be defined whenever avxvnni + or avx512vnni && avx512vl exist. */ + || (mask2 == OPTION_MASK_ISA2_AVXVNNI) || (lang_hooks.builtin_function == lang_hooks.builtin_function_ext_scope)) { diff --git a/gcc/config/i386/i386-c.c b/gcc/config/i386/i386-c.c index 3299a566746..87b3a2bf143 100644 --- a/gcc/config/i386/i386-c.c +++ b/gcc/config/i386/i386-c.c @@ -606,6 +606,8 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag, def_or_undef (parse_in, "__KL__"); if (isa_flag2 & OPTION_MASK_ISA2_WIDEKL) def_or_undef (parse_in, "__WIDEKL__"); + if (isa_flag2 & OPTION_MASK_ISA2_AVXVNNI) + def_or_undef (parse_in, "__AVXVNNI__"); if (TARGET_IAMCU) { def_or_undef (parse_in, "__iamcu"); diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c index 6f81b58a08e..795320b4557 100644 --- a/gcc/config/i386/i386-expand.c +++ b/gcc/config/i386/i386-expand.c @@ -11059,6 +11059,8 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget, OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4 + (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL) or + OPTION_MASK_ISA2_AVXVNNI where for each such pair it is sufficient if either of the ISAs is enabled, plus if it is ored with other options also those others. OPTION_MASK_ISA_MMX in bisa is satisfied also if TARGET_MMX_WITH_SSE. */ @@ -11077,6 +11079,17 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget, && (isa & (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4)) != 0) isa |= (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4); + if ((((bisa & (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL)) + == (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL)) + || (bisa2 & OPTION_MASK_ISA2_AVXVNNI) != 0) + && (((isa & (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL)) + == (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL)) + || (isa2 & OPTION_MASK_ISA2_AVXVNNI) != 0)) + { + isa |= OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL; + isa2 |= OPTION_MASK_ISA2_AVXVNNI; + } + if ((bisa & OPTION_MASK_ISA_MMX) && !TARGET_MMX && TARGET_MMX_WITH_SSE /* __builtin_ia32_maskmovq requires MMX registers. */ && fcode != IX86_BUILTIN_MASKMOVQ) diff --git a/gcc/config/i386/i386-options.c b/gcc/config/i386/i386-options.c index 4128e933291..467bed82f47 100644 --- a/gcc/config/i386/i386-options.c +++ b/gcc/config/i386/i386-options.c @@ -216,7 +216,8 @@ static struct ix86_target_opts isa2_opts[] = { "-muintr", OPTION_MASK_ISA2_UINTR }, { "-mhreset", OPTION_MASK_ISA2_HRESET }, { "-mkl", OPTION_MASK_ISA2_KL }, - { "-mwidekl", OPTION_MASK_ISA2_WIDEKL } + { "-mwidekl", OPTION_MASK_ISA2_WIDEKL }, + { "-mavxvnni", OPTION_MASK_ISA2_AVXVNNI } }; static struct ix86_target_opts isa_opts[] = { @@ -1047,6 +1048,7 @@ ix86_valid_target_attribute_inner_p (tree fndecl, tree args, char *p_strings[], IX86_ATTR_ISA ("amx-int8", OPT_mamx_int8), IX86_ATTR_ISA ("amx-bf16", OPT_mamx_bf16), IX86_ATTR_ISA ("hreset", OPT_mhreset), + IX86_ATTR_ISA ("avxvnni", OPT_mavxvnni), /* enum options */ IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_), @@ -2304,6 +2306,10 @@ ix86_option_override_internal (bool main_args_p, && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA2_AMX_BF16)) opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA2_AMX_BF16; + if (((processor_alias_table[i].flags & PTA_AVXVNNI) != 0) + && !(opts->x_ix86_isa_flags2_explicit + & OPTION_MASK_ISA2_AVXVNNI)) + opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA2_AVXVNNI; if (((processor_alias_table[i].flags & PTA_MOVDIRI) != 0) && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVDIRI)) opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MOVDIRI; diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index e882977f308..3be7551d6c3 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -217,6 +217,8 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see #define TARGET_KL_P(x) TARGET_ISA2_KL_P(x) #define TARGET_WIDEKL TARGET_ISA2_WIDEKL #define TARGET_WIDEKL_P(x) TARGET_ISA2_WIDEKL_P(x) +#define TARGET_AVXVNNI TARGET_ISA2_AVXVNNI +#define TARGET_AVXVNNI_P(x) TARGET_ISA2_AVXVNNI_P(x) #define TARGET_LP64 TARGET_ABI_64 #define TARGET_LP64_P(x) TARGET_ABI_64_P(x) @@ -2493,6 +2495,7 @@ const wide_int_bitmask PTA_UINTR (0, HOST_WIDE_INT_1U << 22); const wide_int_bitmask PTA_HRESET (0, HOST_WIDE_INT_1U << 23); const wide_int_bitmask PTA_KL (0, HOST_WIDE_INT_1U << 24); const wide_int_bitmask PTA_WIDEKL (0, HOST_WIDE_INT_1U << 25); +const wide_int_bitmask PTA_AVXVNNI (0, HOST_WIDE_INT_1U << 26); const wide_int_bitmask PTA_X86_64_BASELINE = PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR; @@ -2536,9 +2539,9 @@ const wide_int_bitmask PTA_TIGERLAKE = PTA_ICELAKE_CLIENT | PTA_MOVDIRI const wide_int_bitmask PTA_SAPPHIRERAPIDS = PTA_COOPERLAKE | PTA_MOVDIRI | PTA_MOVDIR64B | PTA_AVX512VP2INTERSECT | PTA_ENQCMD | PTA_CLDEMOTE | PTA_PTWRITE | PTA_WAITPKG | PTA_SERIALIZE | PTA_TSXLDTRK | PTA_AMX_TILE - | PTA_AMX_INT8 | PTA_AMX_BF16 | PTA_UINTR; + | PTA_AMX_INT8 | PTA_AMX_BF16 | PTA_UINTR | PTA_AVXVNNI; const wide_int_bitmask PTA_ALDERLAKE = PTA_SKYLAKE | PTA_CLDEMOTE | PTA_PTWRITE - | PTA_WAITPKG | PTA_SERIALIZE | PTA_HRESET | PTA_KL | PTA_WIDEKL; + | PTA_WAITPKG | PTA_SERIALIZE | PTA_HRESET | PTA_KL | PTA_WIDEKL | PTA_AVXVNNI; const wide_int_bitmask PTA_KNL = PTA_BROADWELL | PTA_AVX512PF | PTA_AVX512ER | PTA_AVX512F | PTA_AVX512CD | PTA_PREFETCHWT1; const wide_int_bitmask PTA_BONNELL = PTA_CORE2 | PTA_MOVBE; diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 979e49d4723..80f1ccccf27 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -818,7 +818,8 @@ sse_noavx,sse2,sse2_noavx,sse3,sse3_noavx,sse4,sse4_noavx, avx,noavx,avx2,noavx2,bmi,bmi2,fma4,fma,avx512f,noavx512f, avx512bw,noavx512bw,avx512dq,noavx512dq, - avx512vl,noavx512vl,x64_avx512dq,x64_avx512bw" + avx512vl,noavx512vl,x64_avx512dq,x64_avx512bw, + avxvnni,avx512vnnivl" (const_string "base")) ;; Define instruction set of MMX instructions @@ -867,6 +868,8 @@ (eq_attr "isa" "noavx512dq") (symbol_ref "!TARGET_AVX512DQ") (eq_attr "isa" "avx512vl") (symbol_ref "TARGET_AVX512VL") (eq_attr "isa" "noavx512vl") (symbol_ref "!TARGET_AVX512VL") + (eq_attr "isa" "avxvnni") (symbol_ref "TARGET_AVXVNNI") + (eq_attr "isa" "avx512vnnivl") (symbol_ref "TARGET_AVX512VNNI && TARGET_AVX512VL") (eq_attr "mmx_isa" "native") (symbol_ref "!TARGET_MMX_WITH_SSE") diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt index 029cacb38e0..fac76e4a985 100644 --- a/gcc/config/i386/i386.opt +++ b/gcc/config/i386/i386.opt @@ -1143,3 +1143,8 @@ Support KL built-in functions and code generation. mwidekl Target Report Mask(ISA2_WIDEKL) Var(ix86_isa_flags2) Save Support WIDEKL built-in functions and code generation. + +mavxvnni +Target Report Mask(ISA2_AVXVNNI) Var(ix86_isa_flags2) Save +Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2, and +AVXVNNI built-in functions and code generation. diff --git a/gcc/config/i386/immintrin.h b/gcc/config/i386/immintrin.h index 0ce08e5b341..b7879678378 100644 --- a/gcc/config/i386/immintrin.h +++ b/gcc/config/i386/immintrin.h @@ -42,6 +42,8 @@ #include +#include + #include #include diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index b153a87fb98..8437ad27087 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -22915,16 +22915,30 @@ [(set_attr ("prefix") ("evex")) (set_attr "mode" "")]) -(define_insn "vpdpbusd_" - [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v") - (unspec:VI4_AVX512VL - [(match_operand:VI4_AVX512VL 1 "register_operand" "0") - (match_operand:VI4_AVX512VL 2 "register_operand" "v") - (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")] +(define_insn "vpdpbusd_v16si" + [(set (match_operand:V16SI 0 "register_operand" "=v") + (unspec:V16SI + [(match_operand:V16SI 1 "register_operand" "0") + (match_operand:V16SI 2 "register_operand" "v") + (match_operand:V16SI 3 "nonimmediate_operand" "vm")] UNSPEC_VPMADDUBSWACCD))] "TARGET_AVX512VNNI" - "vpdpbusd\t{%3, %2, %0|%0, %2, %3 }" - [(set_attr ("prefix") ("evex"))]) + "vpdpbusd\t{%3, %2, %0|%0, %2, %3}" + [(set_attr ("prefix") ("evex"))]) + +(define_insn "vpdpbusd_" + [(set (match_operand:VI4_AVX2 0 "register_operand" "=x,v") + (unspec:VI4_AVX2 + [(match_operand:VI4_AVX2 1 "register_operand" "0,0") + (match_operand:VI4_AVX2 2 "register_operand" "x,v") + (match_operand:VI4_AVX2 3 "nonimmediate_operand" "xm,vm")] + UNSPEC_VPMADDUBSWACCD))] + "TARGET_AVXVNNI || (TARGET_AVX512VNNI && TARGET_AVX512VL)" + "@ + %{vex%} vpdpbusd\t{%3, %2, %0|%0, %2, %3} + vpdpbusd\t{%3, %2, %0|%0, %2, %3}" + [(set_attr ("prefix") ("vex,evex")) + (set_attr ("isa") ("avxvnni,avx512vnnivl"))]) (define_insn "vpdpbusd__mask" [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v") @@ -22969,17 +22983,30 @@ "vpdpbusd\t{%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %2, %3 }" [(set_attr ("prefix") ("evex"))]) +(define_insn "vpdpbusds_v16si" + [(set (match_operand:V16SI 0 "register_operand" "=v") + (unspec:V16SI + [(match_operand:V16SI 1 "register_operand" "0") + (match_operand:V16SI 2 "register_operand" "v") + (match_operand:V16SI 3 "nonimmediate_operand" "vm")] + UNSPEC_VPMADDUBSWACCSSD))] + "TARGET_AVX512VNNI" + "vpdpbusds\t{%3, %2, %0|%0, %2, %3}" + [(set_attr ("prefix") ("evex"))]) (define_insn "vpdpbusds_" - [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v") - (unspec:VI4_AVX512VL - [(match_operand:VI4_AVX512VL 1 "register_operand" "0") - (match_operand:VI4_AVX512VL 2 "register_operand" "v") - (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")] + [(set (match_operand:VI4_AVX2 0 "register_operand" "=x,v") + (unspec:VI4_AVX2 + [(match_operand:VI4_AVX2 1 "register_operand" "0,0") + (match_operand:VI4_AVX2 2 "register_operand" "x,v") + (match_operand:VI4_AVX2 3 "nonimmediate_operand" "xm,vm")] UNSPEC_VPMADDUBSWACCSSD))] - "TARGET_AVX512VNNI" - "vpdpbusds\t{%3, %2, %0|%0, %2, %3 }" - [(set_attr ("prefix") ("evex"))]) + "TARGET_AVXVNNI || (TARGET_AVX512VNNI && TARGET_AVX512VL)" + "@ + %{vex%} vpdpbusds\t{%3, %2, %0|%0, %2, %3} + vpdpbusds\t{%3, %2, %0|%0, %2, %3}" + [(set_attr ("prefix") ("vex,evex")) + (set_attr ("isa") ("avxvnni,avx512vnnivl"))]) (define_insn "vpdpbusds__mask" [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v") @@ -23024,17 +23051,30 @@ "vpdpbusds\t{%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %2, %3 }" [(set_attr ("prefix") ("evex"))]) +(define_insn "vpdpwssd_v16si" + [(set (match_operand:V16SI 0 "register_operand" "=v") + (unspec:V16SI + [(match_operand:V16SI 1 "register_operand" "0") + (match_operand:V16SI 2 "register_operand" "v") + (match_operand:V16SI 3 "nonimmediate_operand" "vm")] + UNSPEC_VPMADDWDACCD))] + "TARGET_AVX512VNNI" + "vpdpwssd\t{%3, %2, %0|%0, %2, %3}" + [(set_attr ("prefix") ("evex"))]) (define_insn "vpdpwssd_" - [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v") - (unspec:VI4_AVX512VL - [(match_operand:VI4_AVX512VL 1 "register_operand" "0") - (match_operand:VI4_AVX512VL 2 "register_operand" "v") - (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")] + [(set (match_operand:VI4_AVX2 0 "register_operand" "=x,v") + (unspec:VI4_AVX2 + [(match_operand:VI4_AVX2 1 "register_operand" "0,0") + (match_operand:VI4_AVX2 2 "register_operand" "x,v") + (match_operand:VI4_AVX2 3 "nonimmediate_operand" "xm,vm")] UNSPEC_VPMADDWDACCD))] - "TARGET_AVX512VNNI" - "vpdpwssd\t{%3, %2, %0|%0, %2, %3 }" - [(set_attr ("prefix") ("evex"))]) + "TARGET_AVXVNNI || (TARGET_AVX512VNNI && TARGET_AVX512VL)" + "@ + %{vex%} vpdpwssd\t{%3, %2, %0|%0, %2, %3} + vpdpwssd\t{%3, %2, %0|%0, %2, %3}" + [(set_attr ("prefix") ("vex,evex")) + (set_attr ("isa") ("avxvnni,avx512vnnivl"))]) (define_insn "vpdpwssd__mask" [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v") @@ -23079,17 +23119,30 @@ "vpdpwssd\t{%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %2, %3 }" [(set_attr ("prefix") ("evex"))]) +(define_insn "vpdpwssds_v16si" + [(set (match_operand:V16SI 0 "register_operand" "=v") + (unspec:V16SI + [(match_operand:V16SI 1 "register_operand" "0") + (match_operand:V16SI 2 "register_operand" "v") + (match_operand:V16SI 3 "nonimmediate_operand" "vm")] + UNSPEC_VPMADDWDACCSSD))] + "TARGET_AVX512VNNI" + "vpdpwssds\t{%3, %2, %0|%0, %2, %3}" + [(set_attr ("prefix") ("evex"))]) (define_insn "vpdpwssds_" - [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v") - (unspec:VI4_AVX512VL - [(match_operand:VI4_AVX512VL 1 "register_operand" "0") - (match_operand:VI4_AVX512VL 2 "register_operand" "v") - (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")] + [(set (match_operand:VI4_AVX2 0 "register_operand" "=x,v") + (unspec:VI4_AVX2 + [(match_operand:VI4_AVX2 1 "register_operand" "0,0") + (match_operand:VI4_AVX2 2 "register_operand" "x,v") + (match_operand:VI4_AVX2 3 "nonimmediate_operand" "xm,vm")] UNSPEC_VPMADDWDACCSSD))] - "TARGET_AVX512VNNI" - "vpdpwssds\t{%3, %2, %0|%0, %2, %3 }" - [(set_attr ("prefix") ("evex"))]) + "TARGET_AVXVNNI || (TARGET_AVX512VNNI && TARGET_AVX512VL)" + "@ + %{vex%} vpdpwssds\t{%3, %2, %0|%0, %2, %3} + vpdpwssds\t{%3, %2, %0|%0, %2, %3}" + [(set_attr ("prefix") ("vex,evex")) + (set_attr ("isa") ("avxvnni,avx512vnnivl"))]) (define_insn "vpdpwssds__mask" [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v") diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi index 5f1e3bf8a2e..420a14b66b0 100644 --- a/gcc/doc/extend.texi +++ b/gcc/doc/extend.texi @@ -6750,6 +6750,11 @@ Enable/disable the generation of the KEYLOCKER instructions. @cindex @code{target("widekl")} function attribute, x86 Enable/disable the generation of the WIDEKL instructions. +@item avxvnni +@itemx no-avxvnni +@cindex @code{target("avxvnni")} function attribute, x86 +Enable/disable the generation of the AVXVNNI instructions. + @item cld @itemx no-cld @cindex @code{target("cld")} function attribute, x86 diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index d01beb248e1..18ca759c8be 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -1367,7 +1367,7 @@ See RS/6000 and PowerPC Options. -mvpclmulqdq -mavx512bitalg -mmovdiri -mmovdir64b -mavx512vpopcntdq @gol -mavx5124fmaps -mavx512vnni -mavx5124vnniw -mprfchw -mrdpid @gol -mrdseed -msgx -mavx512vp2intersect -mserialize -mtsxldtrk@gol --mamx-tile -mamx-int8 -mamx-bf16 -muintr -mhreset@gol +-mamx-tile -mamx-int8 -mamx-bf16 -muintr -mhreset -mavxvnni@gol -mcldemote -mms-bitfields -mno-align-stringops -minline-all-stringops @gol -minline-stringops-dynamically -mstringop-strategy=@var{alg} @gol -mkl -mwidekl @gol @@ -30409,6 +30409,9 @@ preferred alignment to @option{-mpreferred-stack-boundary=2}. @itemx -mavx512vnni @opindex mavx512vnni @need 200 +@itemx -mavxvnni +@opindex mavxvnni +@need 200 @itemx -mavx5124vnniw @opindex mavx5124vnniw @need 200 @@ -30443,9 +30446,9 @@ WBNOINVD, FMA4, PREFETCHW, RDPID, PREFETCHWT1, RDSEED, SGX, XOP, LWP, XSAVEOPT, XSAVEC, XSAVES, RTM, HLE, TBM, MWAITX, CLZERO, PKU, AVX512VBMI2, GFNI, VAES, WAITPKG, VPCLMULQDQ, AVX512BITALG, MOVDIRI, MOVDIR64B, AVX512BF16, ENQCMD, AVX512VPOPCNTDQ, AVX5124FMAPS, AVX512VNNI, AVX5124VNNIW, SERIALIZE, -UINTR, HRESET, AMXTILE, AMXINT8, AMXBF16, KL, WIDEKL or CLDEMOTE extended -instruction sets. Each has a corresponding @option{-mno-} option to disable -use of these instructions. +UINTR, HRESET, AMXTILE, AMXINT8, AMXBF16, KL, WIDEKL, AVXVNNI or CLDEMOTE +extended instruction sets. Each has a corresponding @option{-mno-} option to +disable use of these instructions. These extensions are also available as built-in functions: see @ref{x86 Built-in Functions}, for details of the functions enabled and diff --git a/gcc/doc/sourcebuild.texi b/gcc/doc/sourcebuild.texi index b3c5e530423..4822efe0a58 100644 --- a/gcc/doc/sourcebuild.texi +++ b/gcc/doc/sourcebuild.texi @@ -2243,6 +2243,9 @@ Target supports compiling @code{avx2} instructions. @item avx2_runtime Target supports the execution of @code{avx2} instructions. +@item avxvnni +Target supports the execution of @code{avxvnni} instructions. + @item avx512f Target supports compiling @code{avx512f} instructions. diff --git a/gcc/testsuite/g++.dg/other/i386-2.C b/gcc/testsuite/g++.dg/other/i386-2.C index b964248fc0d..62b2132957a 100644 --- a/gcc/testsuite/g++.dg/other/i386-2.C +++ b/gcc/testsuite/g++.dg/other/i386-2.C @@ -1,5 +1,5 @@ /* { dg-do compile { target i?86-*-* x86_64-*-* } } */ -/* { dg-options "-O -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl" } */ +/* { dg-options "-O -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni" } */ /* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h, fma4intrin.h, xopintrin.h, abmintrin.h, bmiintrin.h, tbmintrin.h, lwpintrin.h, diff --git a/gcc/testsuite/g++.dg/other/i386-3.C b/gcc/testsuite/g++.dg/other/i386-3.C index 2f73de27c11..843aa2bdb2f 100644 --- a/gcc/testsuite/g++.dg/other/i386-3.C +++ b/gcc/testsuite/g++.dg/other/i386-3.C @@ -1,5 +1,5 @@ /* { dg-do compile { target i?86-*-* x86_64-*-* } } */ -/* { dg-options "-O -fkeep-inline-functions -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl" } */ +/* { dg-options "-O -fkeep-inline-functions -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni" } */ /* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h, fma4intrin.h, xopintrin.h, abmintrin.h, bmiintrin.h, tbmintrin.h, lwpintrin.h, diff --git a/gcc/testsuite/gcc.target/i386/avx-vnni-1.c b/gcc/testsuite/gcc.target/i386/avx-vnni-1.c new file mode 100644 index 00000000000..a22d12aa980 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx-vnni-1.c @@ -0,0 +1,29 @@ +/* { dg-do compile } */ +/* { dg-options "-mavxvnni -O2" } */ +/* { dg-final { scan-assembler-times "\{vex\} vpdpbusd\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "\{vex\} vpdpbusd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "\{vex\} vpdpbusds\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "\{vex\} vpdpbusds\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "\{vex\} vpdpwssd\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "\{vex\} vpdpwssd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "\{vex\} vpdpwssds\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "\{vex\} vpdpwssds\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ + + +#include + +volatile __m256i x,y,z; +volatile __m128i x_,y_,z_; + +void extern +avxvnni_test (void) +{ + x = _mm256_dpbusd_epi32 (x, y, z); + x_ = _mm_dpbusd_epi32 (x_, y_, z_); + x = _mm256_dpbusds_epi32 (x, y, z); + x_ = _mm_dpbusds_epi32 (x_, y_, z_); + x = _mm256_dpwssd_epi32 (x, y, z); + x_ = _mm_dpwssd_epi32 (x_, y_, z_); + x = _mm256_dpwssds_epi32 (x, y, z); + x_ = _mm_dpwssds_epi32 (x_, y_, z_); +} diff --git a/gcc/testsuite/gcc.target/i386/avx-vnni-2.c b/gcc/testsuite/gcc.target/i386/avx-vnni-2.c new file mode 100644 index 00000000000..4ab6f0c8c1e --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx-vnni-2.c @@ -0,0 +1,30 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ +/* { dg-final { scan-assembler-times "\{vex\} vpdpbusd\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "\{vex\} vpdpbusd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "\{vex\} vpdpbusds\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "\{vex\} vpdpbusds\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "\{vex\} vpdpwssd\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "\{vex\} vpdpwssd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "\{vex\} vpdpwssds\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "\{vex\} vpdpwssds\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ + + +#include + +volatile __m256i x,y,z; +volatile __m128i x_,y_,z_; + +__attribute__((target("avxvnni"))) +void +avxvnni_test (void) +{ + x = _mm256_dpbusd_epi32 (x, y, z); + x_ = _mm_dpbusd_epi32 (x_, y_, z_); + x = _mm256_dpbusds_epi32 (x, y, z); + x_ = _mm_dpbusds_epi32 (x_, y_, z_); + x = _mm256_dpwssd_epi32 (x, y, z); + x_ = _mm_dpwssd_epi32 (x_, y_, z_); + x = _mm256_dpwssds_epi32 (x, y, z); + x_ = _mm_dpwssds_epi32 (x_, y_, z_); +} diff --git a/gcc/testsuite/gcc.target/i386/avx-vnni-3.c b/gcc/testsuite/gcc.target/i386/avx-vnni-3.c new file mode 100644 index 00000000000..fdea7f95808 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx-vnni-3.c @@ -0,0 +1,16 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=x86-64" } */ + +__attribute__ ((__gnu_inline__, __always_inline__, target("avxvnni"))) +inline int +foo (void) /* { dg-error "inlining failed in call to 'always_inline' .* target specific option mismatch" } */ +{ + return 0; +} + +__attribute__ ((target("avx512vnni,avx512vl"))) +int +bar (void) +{ + return foo (); /* { dg-message "called from here" } */ +} diff --git a/gcc/testsuite/gcc.target/i386/avx-vnni-4.c b/gcc/testsuite/gcc.target/i386/avx-vnni-4.c new file mode 100644 index 00000000000..1ef3edc140e --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx-vnni-4.c @@ -0,0 +1,16 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=x86-64" } */ + +__attribute__ ((__gnu_inline__, __always_inline__, target("avx512vnni,avx512vl"))) +inline int +foo (void) /* { dg-error "inlining failed in call to 'always_inline' .* target specific option mismatch" } */ +{ + return 0; +} + +__attribute__ ((target("avxvnni"))) +int +bar (void) +{ + return foo (); /* { dg-message "called from here" } */ +} diff --git a/gcc/testsuite/gcc.target/i386/avx-vnni-5.c b/gcc/testsuite/gcc.target/i386/avx-vnni-5.c new file mode 100644 index 00000000000..6556a323b1d --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx-vnni-5.c @@ -0,0 +1,29 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mavxvnni -mavx512vnni -mavx512vl" } */ +/* { dg-final { scan-assembler-times "\{vex\} vpdpbusd\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "\{vex\} vpdpbusd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "\{vex\} vpdpbusds\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "\{vex\} vpdpbusds\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "\{vex\} vpdpwssd\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "\{vex\} vpdpwssd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "\{vex\} vpdpwssds\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "\{vex\} vpdpwssds\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ + + +#include + +volatile __m256i x,y,z; +volatile __m128i x_,y_,z_; + +void +avxvnni_test (void) +{ + x = _mm256_dpbusd_epi32 (x, y, z); + x_ = _mm_dpbusd_epi32 (x_, y_, z_); + x = _mm256_dpbusds_epi32 (x, y, z); + x_ = _mm_dpbusds_epi32 (x_, y_, z_); + x = _mm256_dpwssd_epi32 (x, y, z); + x_ = _mm_dpwssd_epi32 (x_, y_, z_); + x = _mm256_dpwssds_epi32 (x, y, z); + x_ = _mm_dpwssds_epi32 (x_, y_, z_); +} diff --git a/gcc/testsuite/gcc.target/i386/avx-vnni-6.c b/gcc/testsuite/gcc.target/i386/avx-vnni-6.c new file mode 100644 index 00000000000..2c4262788a7 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx-vnni-6.c @@ -0,0 +1,29 @@ +/* { dg-do compile } */ +/* { dg-options "-mavxvnni -O2" } */ +/* { dg-final { scan-assembler-times "\{vex\} vpdpbusd\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "\{vex\} vpdpbusd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "\{vex\} vpdpbusds\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "\{vex\} vpdpbusds\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "\{vex\} vpdpwssd\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "\{vex\} vpdpwssd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "\{vex\} vpdpwssds\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "\{vex\} vpdpwssds\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ + + +#include + +volatile __m256i x,y,z; +volatile __m128i x_,y_,z_; + +void extern +avxvnni_test (void) +{ + x = _mm256_dpbusd_avx_epi32 (x, y, z); + x_ = _mm_dpbusd_avx_epi32 (x_, y_, z_); + x = _mm256_dpbusds_avx_epi32 (x, y, z); + x_ = _mm_dpbusds_avx_epi32 (x_, y_, z_); + x = _mm256_dpwssd_avx_epi32 (x, y, z); + x_ = _mm_dpwssd_avx_epi32 (x_, y_, z_); + x = _mm256_dpwssds_avx_epi32 (x, y, z); + x_ = _mm_dpwssds_avx_epi32 (x_, y_, z_); +} diff --git a/gcc/testsuite/gcc.target/i386/avx-vpdpbusd-2.c b/gcc/testsuite/gcc.target/i386/avx-vpdpbusd-2.c new file mode 100644 index 00000000000..d4b8d89710d --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx-vpdpbusd-2.c @@ -0,0 +1,74 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavxvnni" } */ +/* { dg-require-effective-target avxvnni } */ + +#ifndef CHECK +#define CHECK "avx-check.h" +#endif + +#ifndef TEST +#define TEST avx_test +#endif + +#include CHECK + +static void +CALC (int *r, int *dst, unsigned char *s1, char *s2, int size) +{ + short tempres[32]; + for (int i = 0; i < size; i++) { + tempres[i] = ((unsigned short)(s1[i]) * (short)(s2[i])); + } + for (int i = 0; i < size / 4; i++) { + long long test = (long long)dst[i] + tempres[i*4] + tempres[i*4 + 1] + tempres[i*4 + 2] + tempres[i*4 + 3]; + r[i] = test; + } +} + +void +TEST (void) +{ + int i; + union256i_d res_256; + union256i_b src2_256; + union256i_ub src1_256; + int res_ref_256[8]; + + if (!__builtin_cpu_supports ("avxvnni")) + return; + + for (i = 0; i < 32; i++) + { + int sign = i % 2 ? 1 : -1; + src1_256.a[i] = 10 + 3*i + sign; + src2_256.a[i] = sign*10*i*i; + } + + for (i = 0; i < 8; i++) + res_256.a[i] = 0x7fffffff; + + CALC (res_ref_256, res_256.a, src1_256.a, src2_256.a, 32); + res_256.x = _mm256_dpbusd_avx_epi32 (res_256.x, src1_256.x, src2_256.x); + if (check_union256i_d (res_256, res_ref_256)) + abort (); + + union128i_d res_128; + union128i_b src2_128; + union128i_ub src1_128; + int res_ref_128[4]; + + for (i = 0; i < 16; i++) + { + int sign = i % 2 ? 1 : -1; + src1_128.a[i] = 10 + 3*i*i + sign; + src2_128.a[i] = sign*10*i*i; + } + + for (i = 0; i < 4; i++) + res_128.a[i] = 0x7fffffff; + + CALC (res_ref_128, res_128.a, src1_128.a, src2_128.a, 16); + res_128.x = _mm_dpbusd_avx_epi32 (res_128.x, src1_128.x, src2_128.x); + if (check_union128i_d (res_128, res_ref_128)) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/avx-vpdpbusds-2.c b/gcc/testsuite/gcc.target/i386/avx-vpdpbusds-2.c new file mode 100644 index 00000000000..5041ffe98b5 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx-vpdpbusds-2.c @@ -0,0 +1,74 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavxvnni " } */ +/* { dg-require-effective-target avxvnni } */ + +#ifndef CHECK +#define CHECK "avx-check.h" +#endif + +#ifndef TEST +#define TEST avx_test +#endif + +#include CHECK + +static void +CALC (int *r, int *dst, unsigned char *s1, char *s2, int size) +{ + short tempres[32]; + for (int i = 0; i < size; i++) { + tempres[i] = ((unsigned short)(s1[i]) * (short)(s2[i])); + } + for (int i = 0; i < size / 4; i++) { + long long test = (long long)dst[i] + tempres[i*4] + tempres[i*4 + 1] + tempres[i*4 + 2] + tempres[i*4 + 3]; + r[i] = test > 0x7FFFFFFF ? 0x7FFFFFFF : test; + } +} + +void +TEST (void) +{ + int i; + union256i_d res_256; + union256i_b src2_256; + union256i_ub src1_256; + int res_ref_256[8]; + + if (!__builtin_cpu_supports ("avxvnni")) + return; + + for (i = 0; i < 32; i++) + { + int sign = i % 2 ? 1 : -1; + src1_256.a[i] = 10 + 3*i*i + sign; + src2_256.a[i] = sign*10*i*i; + } + + for (i = 0; i < 8; i++) + res_256.a[i] = 0x7fffffff; + + CALC (res_ref_256, res_256.a, src1_256.a, src2_256.a, 32); + res_256.x = _mm256_dpbusds_avx_epi32 (res_256.x, src1_256.x, src2_256.x); + if (check_union256i_d (res_256, res_ref_256)) + abort (); + + union128i_d res_128; + union128i_b src2_128; + union128i_ub src1_128; + int res_ref_128[4]; + + for (i = 0; i < 16; i++) + { + int sign = i % 2 ? 1 : -1; + src1_128.a[i] = 10 + 3*i*i + sign; + src2_128.a[i] = sign*10*i*i; + } + + for (i = 0; i < 4; i++) + res_128.a[i] = 0x7fffffff; + + CALC (res_ref_128, res_128.a, src1_128.a, src2_128.a, 16); + res_128.x = _mm_dpbusds_avx_epi32 (res_128.x, src1_128.x, src2_128.x); + if (check_union128i_d (res_128, res_ref_128)) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/avx-vpdpwssd-2.c b/gcc/testsuite/gcc.target/i386/avx-vpdpwssd-2.c new file mode 100644 index 00000000000..2630c97a4df --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx-vpdpwssd-2.c @@ -0,0 +1,70 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavxvnni" } */ +/* { dg-require-effective-target avxvnni } */ + +#ifndef CHECK +#define CHECK "avx-check.h" +#endif + +#ifndef TEST +#define TEST avx_test +#endif + +#include CHECK + +static void +CALC (int *r, int *dst, short *s1, short *s2, int size) +{ + short tempres[16]; + for (int i = 0; i < size; i++) { + tempres[i] = ((int)(s1[i]) * (int)(s2[i])); + } + for (int i = 0; i < size / 2; i++) { + long long test = (long long)dst[i] + tempres[i*2] + tempres[i*2 + 1]; + r[i] = test; + } +} + +void +TEST (void) +{ + int i; + union256i_d res_256; + union256i_w src1_256, src2_256; + int res_ref_256[8]; + + if (!__builtin_cpu_supports ("avxvnni")) + return; + + for (i = 0; i < 16; i++) + { + src1_256.a[i] = 1 + i; + src2_256.a[i] = 2 + 2*i + i * i; + } + + for (i = 0; i < 8; i++) + res_256.a[i] = 0x7fffffff; + + CALC (res_ref_256, res_256.a, src1_256.a, src2_256.a, 16); + res_256.x = _mm256_dpwssd_avx_epi32 (res_256.x, src1_256.x, src2_256.x); + if (check_union256i_d (res_256, res_ref_256)) + abort (); + + union128i_d res_128; + union128i_w src1_128, src2_128; + int res_ref_128[4]; + + for (i = 0; i < 8; i++) + { + src1_128.a[i] = 1 + i; + src2_128.a[i] = 2 + 2*i + i * i; + } + + for (i = 0; i < 4; i++) + res_128.a[i] = 0x7fffffff; + + CALC (res_ref_128, res_128.a, src1_128.a, src2_128.a, 8); + res_128.x = _mm_dpwssd_avx_epi32 (res_128.x, src1_128.x, src2_128.x); + if (check_union128i_d (res_128, res_ref_128)) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/avx-vpdpwssds-2.c b/gcc/testsuite/gcc.target/i386/avx-vpdpwssds-2.c new file mode 100644 index 00000000000..bc4395a21f1 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx-vpdpwssds-2.c @@ -0,0 +1,70 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavxvnni" } */ +/* { dg-require-effective-target avxvnni } */ + +#ifndef CHECK +#define CHECK "avx-check.h" +#endif + +#ifndef TEST +#define TEST avx_test +#endif + +#include CHECK + +static void +CALC (int *r, int *dst, short *s1, short *s2, int size) +{ + short tempres[16]; + for (int i = 0; i < size; i++) { + tempres[i] = ((int)(s1[i]) * (int)(s2[i])); + } + for (int i = 0; i < size / 2; i++) { + long long test = (long long)dst[i] + tempres[i*2] + tempres[i*2 + 1]; + r[i] = test > 0x7FFFFFFF ? 0x7FFFFFFF : test; + } +} + +void +TEST (void) +{ + int i; + union256i_d res_256; + union256i_w src1_256, src2_256; + int res_ref_256[8]; + + if (!__builtin_cpu_supports ("avxvnni")) + return; + + for (i = 0; i < 16; i++) + { + src1_256.a[i] = 1 + i; + src2_256.a[i] = 2 + 2*i + i * i; + } + + for (i = 0; i < 8; i++) + res_256.a[i] = 0x7fffffff; + + CALC (res_ref_256, res_256.a, src1_256.a, src2_256.a, 16); + res_256.x = _mm256_dpwssds_avx_epi32 (res_256.x, src1_256.x, src2_256.x); + if (check_union256i_d (res_256, res_ref_256)) + abort (); + + union128i_d res_128; + union128i_w src1_128, src2_128; + int res_ref_128[4]; + + for (i = 0; i < 8; i++) + { + src1_128.a[i] = 1 + i; + src2_128.a[i] = 2 + 2*i + i * i; + } + + for (i = 0; i < 4; i++) + res_128.a[i] = 0x7fffffff; + + CALC (res_ref_128, res_128.a, src1_128.a, src2_128.a, 8); + res_128.x = _mm_dpwssds_avx_epi32 (res_128.x, src1_128.x, src2_128.x); + if (check_union128i_d (res_128, res_ref_128)) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vnni-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vnni-1a.c similarity index 100% rename from gcc/testsuite/gcc.target/i386/avx512vl-vnni-1.c rename to gcc/testsuite/gcc.target/i386/avx512vl-vnni-1a.c diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vnni-1b.c b/gcc/testsuite/gcc.target/i386/avx512vl-vnni-1b.c new file mode 100644 index 00000000000..067e631c89a --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512vl-vnni-1b.c @@ -0,0 +1,69 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512vl -mavx512vnni -mavx512bw -mavxvnni -O2" } */ +/* { dg-final { scan-assembler-times "\{vex\} vpdpbusd\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vpdpbusd\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\\n\\r]*%ymm\[0-9\]+\[^\\n\\r\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vpdpbusd\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\\n\\r]*%ymm\[0-9\]+\[^\\n\\r\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "\{vex\} vpdpbusd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vpdpbusd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\\n\\r]*%xmm\[0-9\]+\[^\\n\\r\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vpdpbusd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\\n\\r]*%xmm\[0-9\]+\[^\\n\\r\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "\{vex\} vpdpbusds\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vpdpbusds\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\\n\\r]*%ymm\[0-9\]+\[^\\n\\r\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vpdpbusds\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\\n\\r]*%ymm\[0-9\]+\[^\\n\\r\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "\{vex\} vpdpbusds\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vpdpbusds\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\\n\\r]*%xmm\[0-9\]+\[^\\n\\r\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vpdpbusds\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\\n\\r]*%xmm\[0-9\]+\[^\\n\\r\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "\{vex\} vpdpwssd\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vpdpwssd\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\\n\\r]*%ymm\[0-9\]+\[^\\n\\r\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vpdpwssd\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\\n\\r]*%ymm\[0-9\]+\[^\\n\\r\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "\{vex\} vpdpwssd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vpdpwssd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\\n\\r]*%xmm\[0-9\]+\[^\\n\\r\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vpdpwssd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\\n\\r]*%xmm\[0-9\]+\[^\\n\\r\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "\{vex\} vpdpwssds\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vpdpwssds\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\\n\\r]*%ymm\[0-9\]+\[^\\n\\r\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vpdpwssds\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\\n\\r]*%ymm\[0-9\]+\[^\\n\\r\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "\{vex\} vpdpwssds\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vpdpwssds\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\\n\\r]*%xmm\[0-9\]+\[^\\n\\r\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vpdpwssds\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\\n\\r]*%xmm\[0-9\]+\[^\\n\\r\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ + + +#include + +volatile __m256i x,y,z; +volatile __m128i x_,y_,z_; +volatile __mmask32 m; + +void extern +avx512f_test (void) +{ + x = _mm256_dpbusd_epi32 (x, y, z); + x = _mm256_mask_dpbusd_epi32 (x, m, y, z); + x = _mm256_maskz_dpbusd_epi32 (m, x, y, z); + + x_ = _mm_dpbusd_epi32 (x_, y_, z_); + x_ = _mm_mask_dpbusd_epi32 (x_, m, y_, z_); + x_ = _mm_maskz_dpbusd_epi32 (m, x_, y_, z_); + + x = _mm256_dpbusds_epi32 (x, y, z); + x = _mm256_mask_dpbusds_epi32 (x, m, y, z); + x = _mm256_maskz_dpbusds_epi32 (m, x, y, z); + + x_ = _mm_dpbusds_epi32 (x_, y_, z_); + x_ = _mm_mask_dpbusds_epi32 (x_, m, y_, z_); + x_ = _mm_maskz_dpbusds_epi32 (m, x_, y_, z_); + + x = _mm256_dpwssd_epi32 (x, y, z); + x = _mm256_mask_dpwssd_epi32 (x, m, y, z); + x = _mm256_maskz_dpwssd_epi32 (m, x, y, z); + + x_ = _mm_dpwssd_epi32 (x_, y_, z_); + x_ = _mm_mask_dpwssd_epi32 (x_, m, y_, z_); + x_ = _mm_maskz_dpwssd_epi32 (m, x_, y_, z_); + + x = _mm256_dpwssds_epi32 (x, y, z); + x = _mm256_mask_dpwssds_epi32 (x, m, y, z); + x = _mm256_maskz_dpwssds_epi32 (m, x, y, z); + + x_ = _mm_dpwssds_epi32 (x_, y_, z_); + x_ = _mm_mask_dpwssds_epi32 (x_, m, y_, z_); + x_ = _mm_maskz_dpwssds_epi32 (m, x_, y_, z_); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vnni-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vnni-2.c new file mode 100644 index 00000000000..d4b46356d80 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512vl-vnni-2.c @@ -0,0 +1,30 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ +/* { dg-final { scan-assembler-times "\\tvpdpbusd\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "\\tvpdpbusd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "\\tvpdpbusds\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "\\tvpdpbusds\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "\\tvpdpwssd\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "\\tvpdpwssd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "\\tvpdpwssds\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "\\tvpdpwssds\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ + + +#include + +volatile __m256i x,y,z; +volatile __m128i x_,y_,z_; + +__attribute__((target("avx512vnni,avx512vl"))) +void +avxvnni_test (void) +{ + x = _mm256_dpbusd_epi32 (x, y, z); + x_ = _mm_dpbusd_epi32 (x_, y_, z_); + x = _mm256_dpbusds_epi32 (x, y, z); + x_ = _mm_dpbusds_epi32 (x_, y_, z_); + x = _mm256_dpwssd_epi32 (x, y, z); + x_ = _mm_dpwssd_epi32 (x_, y_, z_); + x = _mm256_dpwssds_epi32 (x, y, z); + x_ = _mm_dpwssds_epi32 (x_, y_, z_); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vnni-3.c b/gcc/testsuite/gcc.target/i386/avx512vl-vnni-3.c new file mode 100644 index 00000000000..15a95abe83a --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512vl-vnni-3.c @@ -0,0 +1,47 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O2 -mavxvnni -mavx512vnni -mavx512vl" } */ +/* { dg-final { scan-assembler-times "\\tvpdpbusd\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "\\tvpdpbusd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "\\tvpdpbusds\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "\\tvpdpbusds\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "\\tvpdpwssd\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "\\tvpdpwssd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "\\tvpdpwssds\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "\\tvpdpwssds\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ + + +#include + +volatile __m256i x,y,z; +volatile __m128i x_,y_,z_; + +void +avxvnni_test (void) +{ + register __m256i a __asm ("xmm16"); + register __m128i a_ __asm ("xmm26"); + a = _mm256_dpbusd_epi32 (x, y, z); + asm volatile ("" : "+v" (a)); + x = a; + a_ = _mm_dpbusd_epi32 (x_, y_, z_); + asm volatile ("" : "+v" (a_)); + x_ = a_; + a = _mm256_dpbusds_epi32 (x, y, z); + asm volatile ("" : "+v" (a)); + x = a; + a_ = _mm_dpbusds_epi32 (x_, y_, z_); + asm volatile ("" : "+v" (a_)); + x_ = a_; + a = _mm256_dpwssd_epi32 (x, y, z); + asm volatile ("" : "+v" (a)); + x = a; + a_ = _mm_dpwssd_epi32 (x_, y_, z_); + asm volatile ("" : "+v" (a_)); + x_ = a_; + a = _mm256_dpwssds_epi32 (x, y, z); + asm volatile ("" : "+v" (a)); + x = a; + a_ = _mm_dpwssds_epi32 (x_, y_, z_); + asm volatile ("" : "+v" (a_)); + x_ = a_; +} diff --git a/gcc/testsuite/gcc.target/i386/avx512vnnivl-builtin.c b/gcc/testsuite/gcc.target/i386/avx512vnnivl-builtin.c new file mode 100644 index 00000000000..97aaba0c3ef --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512vnnivl-builtin.c @@ -0,0 +1,8 @@ +/* { dg-do compile } */ +/* { dg-options "-O0 -mno-avxvnni -mavx512vnni -mavx512vl" } */ +typedef int v8si __attribute__ ((vector_size (32))); +v8si +foo (v8si a, v8si b, v8si c) +{ + return __builtin_ia32_vpdpbusd_v8si (a, b, c); +} diff --git a/gcc/testsuite/gcc.target/i386/avxvnni-builtin.c b/gcc/testsuite/gcc.target/i386/avxvnni-builtin.c new file mode 100644 index 00000000000..893a62ae185 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avxvnni-builtin.c @@ -0,0 +1,8 @@ +/* { dg-do compile } */ +/* { dg-options "-O0 -mavxvnni -mno-avx512vnni" } */ +typedef int v8si __attribute__ ((vector_size (32))); +v8si +foo (v8si a, v8si b, v8si c) +{ + return __builtin_ia32_vpdpbusd_v8si (a, b, c); +} diff --git a/gcc/testsuite/gcc.target/i386/funcspec-56.inc b/gcc/testsuite/gcc.target/i386/funcspec-56.inc index b8e3b1f7dee..395a21c8668 100644 --- a/gcc/testsuite/gcc.target/i386/funcspec-56.inc +++ b/gcc/testsuite/gcc.target/i386/funcspec-56.inc @@ -78,6 +78,7 @@ extern void test_uintr (void) __attribute__((__target__("uintr"))); extern void test_hreset (void) __attribute__((__target__("hreset"))); extern void test_keylocker (void) __attribute__((__target__("kl"))); extern void test_widekl (void) __attribute__((__target__("widekl"))); +extern void test_avxvnni (void) __attribute__((__target__("avxvnni"))); extern void test_no_sgx (void) __attribute__((__target__("no-sgx"))); extern void test_no_avx5124fmaps(void) __attribute__((__target__("no-avx5124fmaps"))); @@ -157,6 +158,7 @@ extern void test_no_uintr (void) __attribute__((__target__("no-uintr"))); extern void test_no_hreset (void) __attribute__((__target__("no-hreset"))); extern void test_no_keylocker (void) __attribute__((__target__("no-kl"))); extern void test_no_widekl (void) __attribute__((__target__("no-widekl"))); +extern void test_no_avxvnni (void) __attribute__((__target__("no-avxvnni"))); extern void test_arch_nocona (void) __attribute__((__target__("arch=nocona"))); extern void test_arch_core2 (void) __attribute__((__target__("arch=core2"))); diff --git a/gcc/testsuite/gcc.target/i386/sse-12.c b/gcc/testsuite/gcc.target/i386/sse-12.c index f1e05e62d8a..375d4d1b4de 100644 --- a/gcc/testsuite/gcc.target/i386/sse-12.c +++ b/gcc/testsuite/gcc.target/i386/sse-12.c @@ -3,7 +3,7 @@ popcntintrin.h gfniintrin.h and mm_malloc.h are usable with -O -std=c89 -pedantic-errors. */ /* { dg-do compile } */ -/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512bw -mavx512dq -mavx512vl -mavx512vbmi -mavx512vbmi2 -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl" } */ +/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512bw -mavx512dq -mavx512vl -mavx512vbmi -mavx512vbmi2 -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni" } */ #include diff --git a/gcc/testsuite/gcc.target/i386/sse-13.c b/gcc/testsuite/gcc.target/i386/sse-13.c index 7f96331ffe9..7029771334b 100644 --- a/gcc/testsuite/gcc.target/i386/sse-13.c +++ b/gcc/testsuite/gcc.target/i386/sse-13.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512vl -mavx512dq -mavx512bw -mavx512vbmi -mavx512vbmi2 -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mavx512vp2intersect -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl" } */ +/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512vl -mavx512dq -mavx512bw -mavx512vbmi -mavx512vbmi2 -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mavx512vp2intersect -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni" } */ /* { dg-add-options bind_pic_locally } */ #include diff --git a/gcc/testsuite/gcc.target/i386/sse-14.c b/gcc/testsuite/gcc.target/i386/sse-14.c index 27704c3e23b..4ce0ffffaf3 100644 --- a/gcc/testsuite/gcc.target/i386/sse-14.c +++ b/gcc/testsuite/gcc.target/i386/sse-14.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mpconfig -mwbnoinvd -mavx512vl -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl" } */ +/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mpconfig -mwbnoinvd -mavx512vl -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni" } */ /* { dg-add-options bind_pic_locally } */ #include diff --git a/gcc/testsuite/gcc.target/i386/sse-22.c b/gcc/testsuite/gcc.target/i386/sse-22.c index 789c8bebbab..6e8b6f3fa1b 100644 --- a/gcc/testsuite/gcc.target/i386/sse-22.c +++ b/gcc/testsuite/gcc.target/i386/sse-22.c @@ -103,7 +103,7 @@ #ifndef DIFFERENT_PRAGMAS -#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,avx512vl,avx512bw,avx512dq,avx512vbmi,avx512vbmi2,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni,avx512bitalg,avx512bf16,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl") +#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,avx512vl,avx512bw,avx512dq,avx512vbmi,avx512vbmi2,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni,avx512bitalg,avx512bf16,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni") #endif /* Following intrinsics require immediate arguments. They @@ -220,7 +220,7 @@ test_4 (_mm_cmpestrz, int, __m128i, int, __m128i, int, 1) /* immintrin.h (AVX/AVX2/RDRND/FSGSBASE/F16C/RTM/AVX512F/SHA) */ #ifdef DIFFERENT_PRAGMAS -#pragma GCC target ("avx,avx2,rdrnd,fsgsbase,f16c,rtm,avx512f,avx512er,avx512cd,avx512pf,sha,avx512vl,avx512bw,avx512dq,avx512ifma,avx512vbmi,avx512vbmi2,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni,avx512bitalg,avx512bf16,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl") +#pragma GCC target ("avx,avx2,rdrnd,fsgsbase,f16c,rtm,avx512f,avx512er,avx512cd,avx512pf,sha,avx512vl,avx512bw,avx512dq,avx512ifma,avx512vbmi,avx512vbmi2,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni,avx512bitalg,avx512bf16,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni") #endif #include test_1 (_cvtss_sh, unsigned short, float, 1) diff --git a/gcc/testsuite/gcc.target/i386/sse-23.c b/gcc/testsuite/gcc.target/i386/sse-23.c index 3e5e3e98f24..7faa053ace8 100644 --- a/gcc/testsuite/gcc.target/i386/sse-23.c +++ b/gcc/testsuite/gcc.target/i386/sse-23.c @@ -708,6 +708,6 @@ #define __builtin_ia32_vpclmulqdq_v2di(A, B, C) __builtin_ia32_vpclmulqdq_v2di(A, B, 1) #define __builtin_ia32_vpclmulqdq_v8di(A, B, C) __builtin_ia32_vpclmulqdq_v8di(A, B, 1) -#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,fma,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,xsavec,xsaves,clflushopt,avx512bw,avx512dq,avx512vl,avx512vbmi,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,clwb,mwaitx,clzero,pku,sgx,rdpid,gfni,avx512vbmi2,vpclmulqdq,avx512bitalg,pconfig,wbnoinvd,avx512bf16,enqcmd,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl") +#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,fma,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,xsavec,xsaves,clflushopt,avx512bw,avx512dq,avx512vl,avx512vbmi,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,clwb,mwaitx,clzero,pku,sgx,rdpid,gfni,avx512vbmi2,vpclmulqdq,avx512bitalg,pconfig,wbnoinvd,avx512bf16,enqcmd,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni") #include diff --git a/gcc/testsuite/gcc.target/i386/vnni_inline_error.c b/gcc/testsuite/gcc.target/i386/vnni_inline_error.c new file mode 100644 index 00000000000..eaed9842604 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/vnni_inline_error.c @@ -0,0 +1,13 @@ +/* { dg-do compile } */ +/* { dg-options "-O0 -mavx512vnni -mavx512vl -mno-popcnt" } */ + +inline int __attribute__ ((__gnu_inline__, __always_inline__, target("popcnt"))) +foo () /* { dg-error "inlining failed in call to 'always_inline' .* target specific option mismatch" } */ +{ + return 0; +} + +int bar() +{ + return foo (); /* { dg-message "called from here" } */ +} diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index 60ebbb39f9d..ceee78c26a9 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -8461,6 +8461,18 @@ proc check_effective_target_avx2 { } { } "-O0 -mavx2" ] } +# Return 1 if avxvnni instructions can be compiled. +proc check_effective_target_avxvnni { } { + return [check_no_compiler_messages avxvnni object { + typedef int __v8si __attribute__ ((__vector_size__ (32))); + __v8si + _mm256_dpbusd_epi32 (__v8si __A, __v8si __B, __v8si __C) + { + return __builtin_ia32_vpdpbusd_v8si (__A, __B, __C); + } + } "-mavxvnni" ] +} + # Return 1 if sse instructions can be compiled. proc check_effective_target_sse { } { return [check_no_compiler_messages sse object { -- 2.30.2