From: Ilya Tocar Date: Fri, 21 Nov 2014 15:28:24 +0000 (+0000) Subject: Add avx512ifma instructions. X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=4190ea38047f64e87940fc747d108a6bfdaa809f;p=gcc.git Add avx512ifma instructions. gcc/ * common/config/i386/i386-common.c (OPTION_MASK_ISA_AVX512IFMA_SET, OPTION_MASK_ISA_AVX512IFMA_UNSET): New. (ix86_handle_option): Handle OPT_mavx512ifma. * config.gcc: Add avx512ifmaintrin.h, avx512ifmavlintrin.h. * config/i386/avx512ifmaintrin.h: New file. * config/i386/avx512ifmaivlntrin.h: Ditto. * config/i386/cpuid.h (bit_AVX512IFMA): New. * config/i386/driver-i386.c (host_detect_local_cpu): Detect avx512ifma. * config/i386/i386-c.c (ix86_target_macros_internal): Define __AVX512IFMA__. * config/i386/i386.c (ix86_target_string): Add -mavx512ifma. (PTA_AVX512IFMA): Define. (ix86_option_override_internal): Handle new options. (ix86_valid_target_attribute_inner_p): Add avx512ifma. (ix86_builtins): Add IX86_BUILTIN_VPMADD52LUQ512, IX86_BUILTIN_VPMADD52HUQ512, IX86_BUILTIN_VPMADD52LUQ256, IX86_BUILTIN_VPMADD52HUQ256, IX86_BUILTIN_VPMADD52LUQ128, IX86_BUILTIN_VPMADD52HUQ128, IX86_BUILTIN_VPMADD52LUQ512_MASKZ, IX86_BUILTIN_VPMADD52HUQ512_MASKZ, IX86_BUILTIN_VPMADD52LUQ256_MASKZ, IX86_BUILTIN_VPMADD52HUQ256_MASKZ, IX86_BUILTIN_VPMADD52LUQ128_MASKZ, IX86_BUILTIN_VPMADD52HUQ128_MASKZ. (bdesc_special_args): Add __builtin_ia32_vpmadd52luq512_mask, __builtin_ia32_vpmadd52luq512_maskz, __builtin_ia32_vpmadd52huq512_mask, __builtin_ia32_vpmadd52huq512_maskx, __builtin_ia32_vpmadd52luq256_mask, __builtin_ia32_vpmadd52luq256_maskz, __builtin_ia32_vpmadd52huq256_mask, __builtin_ia32_vpmadd52huq256_maskz, __builtin_ia32_vpmadd52luq128_mask, __builtin_ia32_vpmadd52luq128_maskz, __builtin_ia32_vpmadd52huq128_mask, __builtin_ia32_vpmadd52huq128_maskz, * config/i386/i386.h (TARGET_AVX512IFMA, TARGET_AVX512IFMA_P): Define. * config/i386/i386.opt: Add mavx512ifma. * config/i386/immintrin.h: Include avx512ifmaintrin.h, avx512ifmavlintrin.h. * config/i386/sse.md (unspec): Add UNSPEC_VPMADD52LUQ, UNSPEC_VPMADD52HUQ. (VPMADD52): New iterator. (vpmadd52type): New attribute. (vpamdd52huq_maskz): New. (vpamdd52luq_maskz): Ditto. (vpamdd52): Ditto. (vpamdd52_mask): Ditto. gcc/testsuite/ * g++.dg/other/i386-2.C: Add -mavx512ifma. * g++.dg/other/i386-3.C: Ditto. * gcc.target/i386/avx512f-helper.h: Add avx512ifma-check.h. * gcc.target/i386/avx512ifma-check.h: New. * gcc.target/i386/avx512ifma-vpmaddhuq-1.c: Ditto. * gcc.target/i386/avx512ifma-vpmaddhuq-2.c: Ditto. * gcc.target/i386/avx512ifma-vpmaddluq-1.c: Ditto. * gcc.target/i386/avx512ifma-vpmaddluq-2.c: Ditto. * gcc.target/i386/avx512vl-vpmaddhuq-2.c: Ditto. * gcc.target/i386/avx512vl-vpmaddluq-2.c: Ditto. * gcc.target/i386/i386.exp (check_effective_target_avx512ifma): New. * gcc.target/i386/sse-12.c: Add new options. * gcc.target/i386/sse-13.c: Ditto. * gcc.target/i386/sse-14.c: Ditto. * gcc.target/i386/sse-22.c: Ditto. * gcc.target/i386/sse-23.c: Ditto. 2014-11-21 Georg-Johann Lay From-SVN: r217928 --- diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 9902e537dbb..4a4597877f3 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,52 @@ +2014-11-21 Ilya Tocar + + * common/config/i386/i386-common.c (OPTION_MASK_ISA_AVX512IFMA_SET, + OPTION_MASK_ISA_AVX512IFMA_UNSET): New. + (ix86_handle_option): Handle OPT_mavx512ifma. + * config.gcc: Add avx512ifmaintrin.h, avx512ifmavlintrin.h. + * config/i386/avx512ifmaintrin.h: New file. + * config/i386/avx512ifmaivlntrin.h: Ditto. + * config/i386/cpuid.h (bit_AVX512IFMA): New. + * config/i386/driver-i386.c (host_detect_local_cpu): Detect + avx512ifma. + * config/i386/i386-c.c (ix86_target_macros_internal): Define + __AVX512IFMA__. + * config/i386/i386.c (ix86_target_string): Add -mavx512ifma. + (PTA_AVX512IFMA): Define. + (ix86_option_override_internal): Handle new options. + (ix86_valid_target_attribute_inner_p): Add avx512ifma. + (ix86_builtins): Add IX86_BUILTIN_VPMADD52LUQ512, + IX86_BUILTIN_VPMADD52HUQ512, IX86_BUILTIN_VPMADD52LUQ256, + IX86_BUILTIN_VPMADD52HUQ256, IX86_BUILTIN_VPMADD52LUQ128, + IX86_BUILTIN_VPMADD52HUQ128, IX86_BUILTIN_VPMADD52LUQ512_MASKZ, + IX86_BUILTIN_VPMADD52HUQ512_MASKZ, IX86_BUILTIN_VPMADD52LUQ256_MASKZ, + IX86_BUILTIN_VPMADD52HUQ256_MASKZ, IX86_BUILTIN_VPMADD52LUQ128_MASKZ, + IX86_BUILTIN_VPMADD52HUQ128_MASKZ. + (bdesc_special_args): Add __builtin_ia32_vpmadd52luq512_mask, + __builtin_ia32_vpmadd52luq512_maskz, + __builtin_ia32_vpmadd52huq512_mask, + __builtin_ia32_vpmadd52huq512_maskx, + __builtin_ia32_vpmadd52luq256_mask, + __builtin_ia32_vpmadd52luq256_maskz, + __builtin_ia32_vpmadd52huq256_mask, + __builtin_ia32_vpmadd52huq256_maskz, + __builtin_ia32_vpmadd52luq128_mask, + __builtin_ia32_vpmadd52luq128_maskz, + __builtin_ia32_vpmadd52huq128_mask, + __builtin_ia32_vpmadd52huq128_maskz, + * config/i386/i386.h (TARGET_AVX512IFMA, TARGET_AVX512IFMA_P): Define. + * config/i386/i386.opt: Add mavx512ifma. + * config/i386/immintrin.h: Include avx512ifmaintrin.h, + avx512ifmavlintrin.h. + * config/i386/sse.md (unspec): Add UNSPEC_VPMADD52LUQ, + UNSPEC_VPMADD52HUQ. + (VPMADD52): New iterator. + (vpmadd52type): New attribute. + (vpamdd52huq_maskz): New. + (vpamdd52luq_maskz): Ditto. + (vpamdd52): Ditto. + (vpamdd52_mask): Ditto. + 2014-11-21 Alan Lawrence Revert: diff --git a/gcc/common/config/i386/i386-common.c b/gcc/common/config/i386/i386-common.c index da47e644b43..73044a07c25 100644 --- a/gcc/common/config/i386/i386-common.c +++ b/gcc/common/config/i386/i386-common.c @@ -71,6 +71,8 @@ along with GCC; see the file COPYING3. If not see (OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512F_SET) #define OPTION_MASK_ISA_AVX512VL_SET \ (OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512F_SET) +#define OPTION_MASK_ISA_AVX512IFMA_SET \ + (OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512F_SET) #define OPTION_MASK_ISA_RTM_SET OPTION_MASK_ISA_RTM #define OPTION_MASK_ISA_PRFCHW_SET OPTION_MASK_ISA_PRFCHW #define OPTION_MASK_ISA_RDSEED_SET OPTION_MASK_ISA_RDSEED @@ -167,6 +169,7 @@ along with GCC; see the file COPYING3. If not see #define OPTION_MASK_ISA_AVX512DQ_UNSET OPTION_MASK_ISA_AVX512DQ #define OPTION_MASK_ISA_AVX512BW_UNSET OPTION_MASK_ISA_AVX512BW #define OPTION_MASK_ISA_AVX512VL_UNSET OPTION_MASK_ISA_AVX512VL +#define OPTION_MASK_ISA_AVX512IFMA_UNSET OPTION_MASK_ISA_AVX512IFMA #define OPTION_MASK_ISA_RTM_UNSET OPTION_MASK_ISA_RTM #define OPTION_MASK_ISA_PRFCHW_UNSET OPTION_MASK_ISA_PRFCHW #define OPTION_MASK_ISA_RDSEED_UNSET OPTION_MASK_ISA_RDSEED @@ -443,6 +446,19 @@ ix86_handle_option (struct gcc_options *opts, } return true; + case OPT_mavx512ifma: + if (value) + { + opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512IFMA_SET; + opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX512IFMA_SET; + } + else + { + opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_AVX512IFMA_UNSET; + opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX512IFMA_UNSET; + } + return true; + case OPT_mfma: if (value) { diff --git a/gcc/config.gcc b/gcc/config.gcc index 88309b645e3..dbf4191c1ab 100644 --- a/gcc/config.gcc +++ b/gcc/config.gcc @@ -367,7 +367,8 @@ i[34567]86-*-*) avx512cdintrin.h avx512erintrin.h avx512pfintrin.h shaintrin.h clflushoptintrin.h xsavecintrin.h xsavesintrin.h avx512dqintrin.h avx512bwintrin.h - avx512vlintrin.h avx512vlbwintrin.h avx512vldqintrin.h" + avx512vlintrin.h avx512vlbwintrin.h avx512vldqintrin.h + avx512ifmaintrin.h avx512ifmavlintrin.h" ;; x86_64-*-*) cpu_type=i386 @@ -386,7 +387,8 @@ x86_64-*-*) avx512cdintrin.h avx512erintrin.h avx512pfintrin.h shaintrin.h clflushoptintrin.h xsavecintrin.h xsavesintrin.h avx512dqintrin.h avx512bwintrin.h - avx512vlintrin.h avx512vlbwintrin.h avx512vldqintrin.h" + avx512vlintrin.h avx512vlbwintrin.h avx512vldqintrin.h + avx512ifmaintrin.h avx512ifmavlintrin.h" ;; ia64-*-*) extra_headers=ia64intrin.h diff --git a/gcc/config/i386/avx512ifmaintrin.h b/gcc/config/i386/avx512ifmaintrin.h new file mode 100644 index 00000000000..45b08297e46 --- /dev/null +++ b/gcc/config/i386/avx512ifmaintrin.h @@ -0,0 +1,104 @@ +/* Copyright (C) 2013-2014 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +#ifndef _IMMINTRIN_H_INCLUDED +#error "Never use directly; include instead." +#endif + +#ifndef _AVX512IFMAINTRIN_H_INCLUDED +#define _AVX512IFMAINTRIN_H_INCLUDED + +#ifndef __AVX512IFMA__ +#pragma GCC push_options +#pragma GCC target("avx512ifma") +#define __DISABLE_AVX512IFMA__ +#endif /* __AVX512IFMA__ */ + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_madd52lo_epu64 (__m512i __X, __m512i __Y, __m512i __Z) +{ + return (__m512i) __builtin_ia32_vpmadd52luq512_mask ((__v8di) __X, + (__v8di) __Y, + (__v8di) __Z, + (__mmask8) - 1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_madd52hi_epu64 (__m512i __X, __m512i __Y, __m512i __Z) +{ + return (__m512i) __builtin_ia32_vpmadd52huq512_mask ((__v8di) __X, + (__v8di) __Y, + (__v8di) __Z, + (__mmask8) - 1); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_madd52lo_epu64 (__m512i __W, __mmask8 __M, __m512i __X, + __m512i __Y) +{ + return (__m512i) __builtin_ia32_vpmadd52luq512_mask ((__v8di) __W, + (__v8di) __X, + (__v8di) __Y, + (__mmask8) __M); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_mask_madd52hi_epu64 (__m512i __W, __mmask8 __M, __m512i __X, + __m512i __Y) +{ + return (__m512i) __builtin_ia32_vpmadd52huq512_mask ((__v8di) __W, + (__v8di) __X, + (__v8di) __Y, + (__mmask8) __M); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_madd52lo_epu64 (__mmask8 __M, __m512i __X, __m512i __Y, __m512i __Z) +{ + return (__m512i) __builtin_ia32_vpmadd52luq512_maskz ((__v8di) __X, + (__v8di) __Y, + (__v8di) __Z, + (__mmask8) __M); +} + +extern __inline __m512i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_maskz_madd52hi_epu64 (__mmask8 __M, __m512i __X, __m512i __Y, __m512i __Z) +{ + return (__m512i) __builtin_ia32_vpmadd52huq512_maskz ((__v8di) __X, + (__v8di) __Y, + (__v8di) __Z, + (__mmask8) __M); +} + +#ifdef __DISABLE_AVX512IFMA__ +#undef __DISABLE_AVX512IFMA__ +#pragma GCC pop_options +#endif /* __DISABLE_AVX512IFMA__ */ + +#endif /* _AVX512IFMAINTRIN_H_INCLUDED */ diff --git a/gcc/config/i386/avx512ifmavlintrin.h b/gcc/config/i386/avx512ifmavlintrin.h new file mode 100644 index 00000000000..7c858ba0fff --- /dev/null +++ b/gcc/config/i386/avx512ifmavlintrin.h @@ -0,0 +1,164 @@ +/* Copyright (C) 2013-2014 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +#ifndef _IMMINTRIN_H_INCLUDED +#error "Never use directly; include instead." +#endif + +#ifndef _AVX512IFMAVLINTRIN_H_INCLUDED +#define _AVX512IFMAVLINTRIN_H_INCLUDED + +#if !defined(__AVX512VL__) || !defined(__AVX512IFMA__) +#pragma GCC push_options +#pragma GCC target("avx512ifma,avx512vl") +#define __DISABLE_AVX512IFMAVL__ +#endif /* __AVX512IFMAVL__ */ + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_madd52lo_epu64 (__m128i __X, __m128i __Y, __m128i __Z) +{ + return (__m128i) __builtin_ia32_vpmadd52luq128_mask ((__v2di) __X, + (__v2di) __Y, + (__v2di) __Z, + (__mmask8) - 1); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_madd52hi_epu64 (__m128i __X, __m128i __Y, __m128i __Z) +{ + return (__m128i) __builtin_ia32_vpmadd52huq128_mask ((__v2di) __X, + (__v2di) __Y, + (__v2di) __Z, + (__mmask8) - 1); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_madd52lo_epu64 (__m256i __X, __m256i __Y, __m256i __Z) +{ + return (__m256i) __builtin_ia32_vpmadd52luq256_mask ((__v4di) __X, + (__v4di) __Y, + (__v4di) __Z, + (__mmask8) - 1); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_madd52hi_epu64 (__m256i __X, __m256i __Y, __m256i __Z) +{ + return (__m256i) __builtin_ia32_vpmadd52huq256_mask ((__v4di) __X, + (__v4di) __Y, + (__v4di) __Z, + (__mmask8) - 1); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_madd52lo_epu64 (__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y) +{ + return (__m128i) __builtin_ia32_vpmadd52luq128_mask ((__v2di) __W, + (__v2di) __X, + (__v2di) __Y, + (__mmask8) __M); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mask_madd52hi_epu64 (__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y) +{ + return (__m128i) __builtin_ia32_vpmadd52huq128_mask ((__v2di) __W, + (__v2di) __X, + (__v2di) __Y, + (__mmask8) __M); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_madd52lo_epu64 (__m256i __W, __mmask8 __M, __m256i __X, + __m256i __Y) +{ + return (__m256i) __builtin_ia32_vpmadd52luq256_mask ((__v4di) __W, + (__v4di) __X, + (__v4di) __Y, + (__mmask8) __M); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_mask_madd52hi_epu64 (__m256i __W, __mmask8 __M, __m256i __X, + __m256i __Y) +{ + return (__m256i) __builtin_ia32_vpmadd52huq256_mask ((__v4di) __W, + (__v4di) __X, + (__v4di) __Y, + (__mmask8) __M); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_madd52lo_epu64 (__mmask8 __M, __m128i __X, __m128i __Y, __m128i __Z) +{ + return (__m128i) __builtin_ia32_vpmadd52luq128_maskz ((__v2di) __X, + (__v2di) __Y, + (__v2di) __Z, + (__mmask8) __M); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskz_madd52hi_epu64 (__mmask8 __M, __m128i __X, __m128i __Y, __m128i __Z) +{ + return (__m128i) __builtin_ia32_vpmadd52huq128_maskz ((__v2di) __X, + (__v2di) __Y, + (__v2di) __Z, + (__mmask8) __M); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_madd52lo_epu64 (__mmask8 __M, __m256i __X, __m256i __Y, __m256i __Z) +{ + return (__m256i) __builtin_ia32_vpmadd52luq256_maskz ((__v4di) __X, + (__v4di) __Y, + (__v4di) __Z, + (__mmask8) __M); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_maskz_madd52hi_epu64 (__mmask8 __M, __m256i __X, __m256i __Y, __m256i __Z) +{ + return (__m256i) __builtin_ia32_vpmadd52huq256_maskz ((__v4di) __X, + (__v4di) __Y, + (__v4di) __Z, + (__mmask8) __M); +} + +#ifdef __DISABLE_AVX512IFMAVL__ +#undef __DISABLE_AVX512IFMAVL__ +#pragma GCC pop_options +#endif /* __DISABLE_AVX512IFMAVL__ */ + +#endif /* _AVX512IFMAVLINTRIN_H_INCLUDED */ diff --git a/gcc/config/i386/cpuid.h b/gcc/config/i386/cpuid.h index 133e356c2eb..e3e1ed6ea55 100644 --- a/gcc/config/i386/cpuid.h +++ b/gcc/config/i386/cpuid.h @@ -76,6 +76,7 @@ #define bit_AVX512DQ (1 << 17) #define bit_RDSEED (1 << 18) #define bit_ADX (1 << 19) +#define bit_AVX512IFMA (1 << 21) #define bit_CLFLUSHOPT (1 << 23) #define bit_AVX512PF (1 << 26) #define bit_AVX512ER (1 << 27) diff --git a/gcc/config/i386/driver-i386.c b/gcc/config/i386/driver-i386.c index c913113e1a1..cb82945f4b9 100644 --- a/gcc/config/i386/driver-i386.c +++ b/gcc/config/i386/driver-i386.c @@ -412,6 +412,7 @@ const char *host_detect_local_cpu (int argc, const char **argv) unsigned int has_avx512f = 0, has_sha = 0, has_prefetchwt1 = 0; unsigned int has_clflushopt = 0, has_xsavec = 0, has_xsaves = 0; unsigned int has_avx512dq = 0, has_avx512bw = 0, has_avx512vl = 0; + unsigned int has_avx512ifma = 0; bool arch; @@ -493,6 +494,7 @@ const char *host_detect_local_cpu (int argc, const char **argv) has_avx512dq = ebx & bit_AVX512DQ; has_avx512bw = ebx & bit_AVX512BW; has_avx512vl = ebx & bit_AVX512VL; + has_avx512vl = ebx & bit_AVX512IFMA; has_prefetchwt1 = ecx & bit_PREFETCHWT1; } @@ -925,6 +927,7 @@ const char *host_detect_local_cpu (int argc, const char **argv) const char *avx512dq = has_avx512dq ? " -mavx512dq" : " -mno-avx512dq"; const char *avx512bw = has_avx512bw ? " -mavx512bw" : " -mno-avx512bw"; const char *avx512vl = has_avx512vl ? " -mavx512vl" : " -mno-avx512vl"; + const char *avx512ifma = has_avx512ifma ? " -mavx512ifma" : " -mno-avx512ifma"; options = concat (options, mmx, mmx3dnow, sse, sse2, sse3, ssse3, sse4a, cx16, sahf, movbe, aes, sha, pclmul, @@ -934,7 +937,7 @@ const char *host_detect_local_cpu (int argc, const char **argv) fxsr, xsave, xsaveopt, avx512f, avx512er, avx512cd, avx512pf, prefetchwt1, clflushopt, xsavec, xsaves, avx512dq, avx512bw, avx512vl, - NULL); + avx512ifma, NULL); } done: diff --git a/gcc/config/i386/i386-c.c b/gcc/config/i386/i386-c.c index 0a0775dbbcc..bf993d18570 100644 --- a/gcc/config/i386/i386-c.c +++ b/gcc/config/i386/i386-c.c @@ -351,6 +351,8 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag, def_or_undef (parse_in, "__AVX512BW__"); if (isa_flag & OPTION_MASK_ISA_AVX512VL) def_or_undef (parse_in, "__AVX512VL__"); + if (isa_flag & OPTION_MASK_ISA_AVX512IFMA) + def_or_undef (parse_in, "__AVX512IFMA__"); if (isa_flag & OPTION_MASK_ISA_FMA) def_or_undef (parse_in, "__FMA__"); if (isa_flag & OPTION_MASK_ISA_RTM) diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index b017ace29fa..55c57dd1adf 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -2618,6 +2618,7 @@ ix86_target_string (HOST_WIDE_INT isa, int flags, const char *arch, { "-mavx512dq", OPTION_MASK_ISA_AVX512DQ }, { "-mavx512bw", OPTION_MASK_ISA_AVX512BW }, { "-mavx512vl", OPTION_MASK_ISA_AVX512VL }, + { "-mavx512ifma", OPTION_MASK_ISA_AVX512IFMA }, { "-msse4a", OPTION_MASK_ISA_SSE4A }, { "-msse4.2", OPTION_MASK_ISA_SSE4_2 }, { "-msse4.1", OPTION_MASK_ISA_SSE4_1 }, @@ -3153,6 +3154,7 @@ ix86_option_override_internal (bool main_args_p, #define PTA_AVX512DQ (HOST_WIDE_INT_1 << 50) #define PTA_AVX512BW (HOST_WIDE_INT_1 << 51) #define PTA_AVX512VL (HOST_WIDE_INT_1 << 52) +#define PTA_AVX512IFMA (HOST_WIDE_INT_1 << 53) #define PTA_CORE2 \ (PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3 \ @@ -3733,6 +3735,9 @@ ix86_option_override_internal (bool main_args_p, if (processor_alias_table[i].flags & PTA_MPX && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MPX)) opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MPX; + if (processor_alias_table[i].flags & PTA_AVX512IFMA + && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512IFMA)) + opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512IFMA; if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)) x86_prefetch_sse = true; @@ -4655,6 +4660,7 @@ ix86_valid_target_attribute_inner_p (tree args, char *p_strings[], IX86_ATTR_ISA ("clflushopt", OPT_mclflushopt), IX86_ATTR_ISA ("xsavec", OPT_mxsavec), IX86_ATTR_ISA ("xsaves", OPT_mxsaves), + IX86_ATTR_ISA ("avx512ifma", OPT_mavx512ifma), /* enum options */ IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_), @@ -30076,6 +30082,20 @@ enum ix86_builtins IX86_BUILTIN_RSQRT28SD, IX86_BUILTIN_RSQRT28SS, + /* AVX-512IFMA */ + IX86_BUILTIN_VPMADD52LUQ512, + IX86_BUILTIN_VPMADD52HUQ512, + IX86_BUILTIN_VPMADD52LUQ256, + IX86_BUILTIN_VPMADD52HUQ256, + IX86_BUILTIN_VPMADD52LUQ128, + IX86_BUILTIN_VPMADD52HUQ128, + IX86_BUILTIN_VPMADD52LUQ512_MASKZ, + IX86_BUILTIN_VPMADD52HUQ512_MASKZ, + IX86_BUILTIN_VPMADD52LUQ256_MASKZ, + IX86_BUILTIN_VPMADD52HUQ256_MASKZ, + IX86_BUILTIN_VPMADD52LUQ128_MASKZ, + IX86_BUILTIN_VPMADD52HUQ128_MASKZ, + /* SHA builtins. */ IX86_BUILTIN_SHA1MSG1, IX86_BUILTIN_SHA1MSG2, @@ -32746,6 +32766,21 @@ static const struct builtin_description bdesc_args[] = { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cmpv32hi3_mask, "__builtin_ia32_cmpw512_mask", IX86_BUILTIN_CMPW512, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_INT_SI }, { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ucmpv64qi3_mask, "__builtin_ia32_ucmpb512_mask", IX86_BUILTIN_UCMPB512, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_INT_DI }, { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ucmpv32hi3_mask, "__builtin_ia32_ucmpw512_mask", IX86_BUILTIN_UCMPW512, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_INT_SI }, + + /* AVX512IFMA */ + { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52luqv8di_mask, "__builtin_ia32_vpmadd52luq512_mask", IX86_BUILTIN_VPMADD52LUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI }, + { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52luqv8di_maskz, "__builtin_ia32_vpmadd52luq512_maskz", IX86_BUILTIN_VPMADD52LUQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI }, + { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52huqv8di_mask, "__builtin_ia32_vpmadd52huq512_mask", IX86_BUILTIN_VPMADD52HUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI }, + { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52huqv8di_maskz, "__builtin_ia32_vpmadd52huq512_maskz", IX86_BUILTIN_VPMADD52HUQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI }, + { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv4di_mask, "__builtin_ia32_vpmadd52luq256_mask", IX86_BUILTIN_VPMADD52LUQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI }, + { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv4di_maskz, "__builtin_ia32_vpmadd52luq256_maskz", IX86_BUILTIN_VPMADD52LUQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI }, + { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv4di_mask, "__builtin_ia32_vpmadd52huq256_mask", IX86_BUILTIN_VPMADD52HUQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI }, + { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv4di_maskz, "__builtin_ia32_vpmadd52huq256_maskz", IX86_BUILTIN_VPMADD52HUQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI }, + { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv2di_mask, "__builtin_ia32_vpmadd52luq128_mask", IX86_BUILTIN_VPMADD52LUQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI }, + { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv2di_maskz, "__builtin_ia32_vpmadd52luq128_maskz", IX86_BUILTIN_VPMADD52LUQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI }, + { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv2di_mask, "__builtin_ia32_vpmadd52huq128_mask", IX86_BUILTIN_VPMADD52HUQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI }, + { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv2di_maskz, "__builtin_ia32_vpmadd52huq128_maskz", IX86_BUILTIN_VPMADD52HUQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI }, + }; /* Builtins with rounding support. */ diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index 53dfd223cda..481d68c609e 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -77,6 +77,8 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see #define TARGET_AVX512BW_P(x) TARGET_ISA_AVX512BW_P(x) #define TARGET_AVX512VL TARGET_ISA_AVX512VL #define TARGET_AVX512VL_P(x) TARGET_ISA_AVX512VL_P(x) +#define TARGET_AVX512IFMA TARGET_ISA_AVX512IFMA +#define TARGET_AVX512IFMA_P(x) TARGET_ISA_AVX512IFMA_P(x) #define TARGET_FMA TARGET_ISA_FMA #define TARGET_FMA_P(x) TARGET_ISA_FMA_P(x) #define TARGET_SSE4A TARGET_ISA_SSE4A diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt index 5dfa9bf8b96..012ff8dafc2 100644 --- a/gcc/config/i386/i386.opt +++ b/gcc/config/i386/i386.opt @@ -653,6 +653,10 @@ mavx512vl Target Report Mask(ISA_AVX512VL) Var(ix86_isa_flags) Save Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2 and AVX512F and AVX512VL built-in functions and code generation +mavx512ifma +Target Report Mask(ISA_AVX512IFMA) Var(ix86_isa_flags) Save +Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2 and AVX512F and AVX512IFMA built-in functions and code generation + mfma Target Report Mask(ISA_FMA) Var(ix86_isa_flags) Save Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX and FMA built-in functions and code generation diff --git a/gcc/config/i386/immintrin.h b/gcc/config/i386/immintrin.h index 5d921822248..5f11432f07b 100644 --- a/gcc/config/i386/immintrin.h +++ b/gcc/config/i386/immintrin.h @@ -60,6 +60,10 @@ #include +#include + +#include + #include #include diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 13ddd290bdb..61cc904f372 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -141,6 +141,10 @@ UNSPEC_REDUCE UNSPEC_FPCLASS UNSPEC_RANGE + + ;; For AVX512IFMA support + UNSPEC_VPMADD52LUQ + UNSPEC_VPMADD52HUQ ]) (define_c_enum "unspecv" [ @@ -18450,3 +18454,68 @@ emit_move_insn (op0, op1); DONE; }) + +(define_int_iterator VPMADD52 + [UNSPEC_VPMADD52LUQ + UNSPEC_VPMADD52HUQ]) + +(define_int_attr vpmadd52type + [(UNSPEC_VPMADD52LUQ "luq") (UNSPEC_VPMADD52HUQ "huq")]) + +(define_expand "vpamdd52huq_maskz" + [(match_operand:VI8_AVX512VL 0 "register_operand") + (match_operand:VI8_AVX512VL 1 "register_operand") + (match_operand:VI8_AVX512VL 2 "register_operand") + (match_operand:VI8_AVX512VL 3 "nonimmediate_operand") + (match_operand: 4 "register_operand")] + "TARGET_AVX512IFMA" +{ + emit_insn (gen_vpamdd52huq_maskz_1 ( + operands[0], operands[1], operands[2], operands[3], + CONST0_RTX (mode), operands[4])); + DONE; +}) + +(define_expand "vpamdd52luq_maskz" + [(match_operand:VI8_AVX512VL 0 "register_operand") + (match_operand:VI8_AVX512VL 1 "register_operand") + (match_operand:VI8_AVX512VL 2 "register_operand") + (match_operand:VI8_AVX512VL 3 "nonimmediate_operand") + (match_operand: 4 "register_operand")] + "TARGET_AVX512IFMA" +{ + emit_insn (gen_vpamdd52luq_maskz_1 ( + operands[0], operands[1], operands[2], operands[3], + CONST0_RTX (mode), operands[4])); + DONE; +}) + +(define_insn "vpamdd52" + [(set (match_operand:VI8_AVX512VL 0 "register_operand" "=v") + (unspec:VI8_AVX512VL + [(match_operand:VI8_AVX512VL 1 "register_operand" "0") + (match_operand:VI8_AVX512VL 2 "register_operand" "v") + (match_operand:VI8_AVX512VL 3 "nonimmediate_operand" "vm")] + VPMADD52))] + "TARGET_AVX512IFMA" + "vpmadd52\t{%3, %2, %0|%0, %2, %3}" + [(set_attr "type" "ssemuladd") + (set_attr "prefix" "evex") + (set_attr "mode" "")]) + +(define_insn "vpamdd52_mask" + [(set (match_operand:VI8_AVX512VL 0 "register_operand" "=v") + (vec_merge:VI8_AVX512VL + (unspec:VI8_AVX512VL + [(match_operand:VI8_AVX512VL 1 "register_operand" "0") + (match_operand:VI8_AVX512VL 2 "register_operand" "v") + (match_operand:VI8_AVX512VL 3 "nonimmediate_operand" "vm")] + VPMADD52) + (match_dup 1) + (match_operand: 4 "register_operand" "Yk")))] + "TARGET_AVX512IFMA" + "vpmadd52\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3}" + [(set_attr "type" "ssemuladd") + (set_attr "prefix" "evex") + (set_attr "mode" "")]) + diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 0885198b953..4d4560be5e1 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,22 @@ +2014-11-21 Ilya Tocar + + * g++.dg/other/i386-2.C: Add -mavx512ifma. + * g++.dg/other/i386-3.C: Ditto. + * gcc.target/i386/avx512f-helper.h: Add avx512ifma-check.h. + * gcc.target/i386/avx512ifma-check.h: New. + * gcc.target/i386/avx512ifma-vpmaddhuq-1.c: Ditto. + * gcc.target/i386/avx512ifma-vpmaddhuq-2.c: Ditto. + * gcc.target/i386/avx512ifma-vpmaddluq-1.c: Ditto. + * gcc.target/i386/avx512ifma-vpmaddluq-2.c: Ditto. + * gcc.target/i386/avx512vl-vpmaddhuq-2.c: Ditto. + * gcc.target/i386/avx512vl-vpmaddluq-2.c: Ditto. + * gcc.target/i386/i386.exp (check_effective_target_avx512ifma): New. + * gcc.target/i386/sse-12.c: Add new options. + * gcc.target/i386/sse-13.c: Ditto. + * gcc.target/i386/sse-14.c: Ditto. + * gcc.target/i386/sse-22.c: Ditto. + * gcc.target/i386/sse-23.c: Ditto. + 2014-11-21 Georg-Johann Lay Forward-port from 2014-10-30 4_9-branch r216934 diff --git a/gcc/testsuite/g++.dg/other/i386-2.C b/gcc/testsuite/g++.dg/other/i386-2.C index d642accd441..a69a5e331d0 100644 --- a/gcc/testsuite/g++.dg/other/i386-2.C +++ b/gcc/testsuite/g++.dg/other/i386-2.C @@ -1,5 +1,5 @@ /* { dg-do compile { target i?86-*-* x86_64-*-* } } */ -/* { dg-options "-O -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl" } */ +/* { dg-options "-O -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma" } */ /* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h, fma4intrin.h, xopintrin.h, abmintrin.h, bmiintrin.h, tbmintrin.h, lwpintrin.h, diff --git a/gcc/testsuite/g++.dg/other/i386-3.C b/gcc/testsuite/g++.dg/other/i386-3.C index 6d3e24f8262..d3a5bbd29ca 100644 --- a/gcc/testsuite/g++.dg/other/i386-3.C +++ b/gcc/testsuite/g++.dg/other/i386-3.C @@ -1,5 +1,5 @@ /* { dg-do compile { target i?86-*-* x86_64-*-* } } */ -/* { dg-options "-O -fkeep-inline-functions -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl" } */ +/* { dg-options "-O -fkeep-inline-functions -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma" } */ /* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h, fma4intrin.h, xopintrin.h, abmintrin.h, bmiintrin.h, tbmintrin.h, lwpintrin.h, diff --git a/gcc/testsuite/gcc.target/i386/avx512f-helper.h b/gcc/testsuite/gcc.target/i386/avx512f-helper.h index 04a1a89da51..d1774291755 100644 --- a/gcc/testsuite/gcc.target/i386/avx512f-helper.h +++ b/gcc/testsuite/gcc.target/i386/avx512f-helper.h @@ -20,6 +20,8 @@ #include "avx512bw-check.h" #elif defined (AVX512VL) #include "avx512vl-check.h" +#elif defined (AVX512IFMA) +#include "avx512ifma-check.h" #endif /* Macros expansion. */ @@ -125,6 +127,9 @@ avx512bw_test (void) { test_512 (); } #elif defined (AVX512VL) void avx512vl_test (void) { test_256 (); test_128 (); } +#elif defined (AVX512IFMA) +void +avx512ifma_test (void) { test_512 (); } #endif #endif /* AVX512F_HELPER_INCLUDED */ diff --git a/gcc/testsuite/gcc.target/i386/avx512ifma-check.h b/gcc/testsuite/gcc.target/i386/avx512ifma-check.h new file mode 100644 index 00000000000..9c17a549ce7 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512ifma-check.h @@ -0,0 +1,46 @@ +#include +#include "cpuid.h" +#include "m512-check.h" +#include "avx512f-os-support.h" + +static void avx512ifma_test (void); + +static void __attribute__ ((noinline)) do_test (void) +{ + avx512ifma_test (); +} + +int +main () +{ + unsigned int eax, ebx, ecx, edx; + + if (!__get_cpuid (1, &eax, &ebx, &ecx, &edx)) + return 0; + + if ((ecx & bit_OSXSAVE) == (bit_OSXSAVE)) + { + if (__get_cpuid_max (0, NULL) < 7) + return 0; + + __cpuid_count (7, 0, eax, ebx, ecx, edx); + + if ((avx512f_os_support ()) && ((ebx & bit_AVX512IFMA) == bit_AVX512IFMA)) + { + do_test (); +#ifdef DEBUG + printf ("PASSED\n"); +#endif + return 0; + } +#ifdef DEBUG + printf ("SKIPPED\n"); +#endif + } +#ifdef DEBUG + else + printf ("SKIPPED\n"); +#endif + + return 0; +} diff --git a/gcc/testsuite/gcc.target/i386/avx512ifma-vpmaddhuq-1.c b/gcc/testsuite/gcc.target/i386/avx512ifma-vpmaddhuq-1.c new file mode 100644 index 00000000000..5bc3311643a --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512ifma-vpmaddhuq-1.c @@ -0,0 +1,31 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512ifma -mavx512vl -O2" } */ +/* { dg-final { scan-assembler-times "vpmadd52huq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]" 3 } } */ +/* { dg-final { scan-assembler-times "vpmadd52huq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */ +/* { dg-final { scan-assembler-times "vpmadd52huq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */ +/* { dg-final { scan-assembler-times "vpmadd52huq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%ymm\[0-9\]\[^\n\]*%ymm\[0-9\]" 3 } } */ +/* { dg-final { scan-assembler-times "vpmadd52huq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%ymm\[0-9\]\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */ +/* { dg-final { scan-assembler-times "vpmadd52huq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%ymm\[0-9\]\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */ +/* { dg-final { scan-assembler-times "vpmadd52huq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\n\]*%zmm\[0-9\]\[^\n\]*%zmm\[0-9\]" 3 } } */ +/* { dg-final { scan-assembler-times "vpmadd52huq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\n\]*%zmm\[0-9\]\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */ +/* { dg-final { scan-assembler-times "vpmadd52huq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\n\]*%zmm\[0-9\]\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */ + +#include + +volatile __m512i _x1, _y1, _z1; +volatile __m256i _x2, _y2, _z2; +volatile __m128i _x3, _y3, _z3; + +void extern +avx512ifma_test (void) +{ + _x3 = _mm_madd52hi_epu64 (_x3, _y3, _z3); + _x3 = _mm_mask_madd52hi_epu64 (_x3, 2, _y3, _z3); + _x3 = _mm_maskz_madd52hi_epu64 (2, _x3, _y3, _z3); + _x2 = _mm256_madd52hi_epu64 (_x2, _y2, _z2); + _x2 = _mm256_mask_madd52hi_epu64 (_x2, 3, _y2, _z2); + _x2 = _mm256_maskz_madd52hi_epu64 (3, _x2, _y2, _z2); + _x1 = _mm512_madd52hi_epu64 (_x1, _y1, _z1); + _x1 = _mm512_mask_madd52hi_epu64 (_x1, 3, _y1, _z1); + _x1 = _mm512_maskz_madd52hi_epu64 (3, _x1, _y1, _z1); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512ifma-vpmaddhuq-2.c b/gcc/testsuite/gcc.target/i386/avx512ifma-vpmaddhuq-2.c new file mode 100644 index 00000000000..edb28cb74fc --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512ifma-vpmaddhuq-2.c @@ -0,0 +1,62 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx512ifma -DAVX512IFMA" } */ +/* { dg-require-effective-target avx512ifma } */ + +#include "avx512f-helper.h" + +#define SIZE (AVX512F_LEN / 64) +#include "avx512f-mask-type.h" + +void +CALC (long long *r, long long *s1, long long *s2, long long *s3) +{ + int i; + long long a, b; + + for (i = 0; i < SIZE; i++) + { + /* Simulate higher 52 bits out of 104 bit, + by shifting opernads with 0 in lower 26 bits. */ + a = s2[i] >> 26; + b = s3[i] >> 26; + r[i] = a * b + s1[i]; + } +} + +void +TEST (void) +{ + UNION_TYPE (AVX512F_LEN, i_q) src1, src2, dst1, dst2, dst3; + long long dst_ref[SIZE]; + int i; + MASK_TYPE mask = MASK_VALUE; + + for (i = 0; i < SIZE; i++) + { + src1.a[i] = 15 + 3467 * i; + src2.a[i] = 9217 + i; + src1.a[i] = src1.a[i] << 26; + src1.a[i] = src1.a[i] << 26; + src1.a[i] &= ((1LL << 52) - 1); + src2.a[i] &= ((1LL << 52) - 1); + dst1.a[i] = DEFAULT_VALUE; + dst2.a[i] = DEFAULT_VALUE; + dst3.a[i] = DEFAULT_VALUE; + } + + CALC (dst_ref, dst1.a, src1.a, src2.a); + dst1.x = INTRINSIC (_madd52hi_epu64) (dst1.x, src1.x, src2.x); + dst2.x = INTRINSIC (_mask_madd52hi_epu64) (dst2.x, mask, src1.x, src2.x); + dst3.x = INTRINSIC (_maskz_madd52hi_epu64) (mask, dst3.x, src1.x, src2.x); + + if (UNION_CHECK (AVX512F_LEN, i_q) (dst1, dst_ref)) + abort (); + + MASK_MERGE (i_q) (dst_ref, mask, SIZE); + if (UNION_CHECK (AVX512F_LEN, i_q) (dst2, dst_ref)) + abort (); + + MASK_ZERO (i_q) (dst_ref, mask, SIZE); + if (UNION_CHECK (AVX512F_LEN, i_q) (dst3, dst_ref)) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512ifma-vpmaddluq-1.c b/gcc/testsuite/gcc.target/i386/avx512ifma-vpmaddluq-1.c new file mode 100644 index 00000000000..5a17cf1b4fb --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512ifma-vpmaddluq-1.c @@ -0,0 +1,31 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512ifma -mavx512vl -O2" } */ +/* { dg-final { scan-assembler-times "vpmadd52luq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]" 3 } } */ +/* { dg-final { scan-assembler-times "vpmadd52luq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */ +/* { dg-final { scan-assembler-times "vpmadd52luq\[ \\t\]+\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\[^\n\]*%xmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */ +/* { dg-final { scan-assembler-times "vpmadd52luq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%ymm\[0-9\]\[^\n\]*%ymm\[0-9\]" 3 } } */ +/* { dg-final { scan-assembler-times "vpmadd52luq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%ymm\[0-9\]\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */ +/* { dg-final { scan-assembler-times "vpmadd52luq\[ \\t\]+\[^\n\]*%ymm\[0-9\]\[^\n\]*%ymm\[0-9\]\[^\n\]*%ymm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */ +/* { dg-final { scan-assembler-times "vpmadd52luq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\n\]*%zmm\[0-9\]\[^\n\]*%zmm\[0-9\]" 3 } } */ +/* { dg-final { scan-assembler-times "vpmadd52luq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\n\]*%zmm\[0-9\]\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\[^\{\]" 1 } } */ +/* { dg-final { scan-assembler-times "vpmadd52luq\[ \\t\]+\[^\n\]*%zmm\[0-9\]\[^\n\]*%zmm\[0-9\]\[^\n\]*%zmm\[0-9\]\{%k\[1-7\]\}\{z\}" 1 } } */ + +#include + +volatile __m512i _x1, _y1, _z1; +volatile __m256i _x2, _y2, _z2; +volatile __m128i _x3, _y3, _z3; + +void extern +avx512ifma_test (void) +{ + _x3 = _mm_madd52lo_epu64 (_x3, _y3, _z3); + _x3 = _mm_mask_madd52lo_epu64 (_x3, 2, _y3, _z3); + _x3 = _mm_maskz_madd52lo_epu64 (2, _x3, _y3, _z3); + _x2 = _mm256_madd52lo_epu64 (_x2, _y2, _z2); + _x2 = _mm256_mask_madd52lo_epu64 (_x2, 3, _y2, _z2); + _x2 = _mm256_maskz_madd52lo_epu64 (3, _x2, _y2, _z2); + _x1 = _mm512_madd52lo_epu64 (_x1, _y1, _z1); + _x1 = _mm512_mask_madd52lo_epu64 (_x1, 3, _y1, _z1); + _x1 = _mm512_maskz_madd52lo_epu64 (3, _x1, _y1, _z1); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512ifma-vpmaddluq-2.c b/gcc/testsuite/gcc.target/i386/avx512ifma-vpmaddluq-2.c new file mode 100644 index 00000000000..6937d1334f1 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512ifma-vpmaddluq-2.c @@ -0,0 +1,53 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx512ifma -DAVX512IFMA" } */ +/* { dg-require-effective-target avx512ifma } */ + +#include "avx512f-helper.h" + +#define SIZE (AVX512F_LEN / 64) +#include "avx512f-mask-type.h" + +void +CALC (unsigned long long *r, unsigned long long *s1, + unsigned long long *s2, unsigned long long *s3) +{ + int i; + + /* Valid, because values are less than 1 << 26. */ + for (i = 0; i < SIZE; i++) + r[i] = s2[i] * s3[i] + s1[i]; +} + +void +TEST (void) +{ + UNION_TYPE (AVX512F_LEN, i_q) src1, src2, dst1, dst2, dst3; + unsigned long long dst_ref[SIZE]; + int i; + MASK_TYPE mask = MASK_VALUE; + + for (i = 0; i < SIZE; i++) + { + src1.a[i] = i + 50; + src2.a[i] = i + 100; + dst1.a[i] = DEFAULT_VALUE; + dst2.a[i] = DEFAULT_VALUE; + dst3.a[i] = DEFAULT_VALUE; + } + + CALC (dst_ref, dst1.a, src1.a, src2.a); + dst1.x = INTRINSIC (_madd52lo_epu64) (dst1.x, src1.x, src2.x); + dst2.x = INTRINSIC (_mask_madd52lo_epu64) (dst2.x, mask, src1.x, src2.x); + dst3.x = INTRINSIC (_maskz_madd52lo_epu64) (mask, dst3.x, src1.x, src2.x); + + if (UNION_CHECK (AVX512F_LEN, i_q) (dst1, dst_ref)) + abort (); + + MASK_MERGE (i_q) (dst_ref, mask, SIZE); + if (UNION_CHECK (AVX512F_LEN, i_q) (dst2, dst_ref)) + abort (); + + MASK_ZERO (i_q) (dst_ref, mask, SIZE); + if (UNION_CHECK (AVX512F_LEN, i_q) (dst3, dst_ref)) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpmaddhuq-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpmaddhuq-2.c new file mode 100644 index 00000000000..92d1bf7d1e0 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpmaddhuq-2.c @@ -0,0 +1,14 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx512ifma -mavx512vl -DAVX512VL" } */ +/* { dg-require-effective-target avx512vl } */ + +#define AVX512F_LEN 256 +#define AVX512F_LEN_HALF 128 +#include "avx512ifma-vpmaddhuq-2.c" + +#undef AVX512F_LEN +#undef AVX512F_LEN_HALF + +#define AVX512F_LEN 128 +#define AVX512F_LEN_HALF 128 +#include "avx512ifma-vpmaddhuq-2.c" diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpmaddluq-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpmaddluq-2.c new file mode 100644 index 00000000000..6698ad2fb96 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpmaddluq-2.c @@ -0,0 +1,14 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx512ifma -mavx512vl -DAVX512VL" } */ +/* { dg-require-effective-target avx512vl } */ + +#define AVX512F_LEN 256 +#define AVX512F_LEN_HALF 128 +#include "avx512ifma-vpmaddluq-2.c" + +#undef AVX512F_LEN +#undef AVX512F_LEN_HALF + +#define AVX512F_LEN 128 +#define AVX512F_LEN_HALF 128 +#include "avx512ifma-vpmaddluq-2.c" diff --git a/gcc/testsuite/gcc.target/i386/i386.exp b/gcc/testsuite/gcc.target/i386/i386.exp index 4fcb8a6ab5e..060eed30c6b 100644 --- a/gcc/testsuite/gcc.target/i386/i386.exp +++ b/gcc/testsuite/gcc.target/i386/i386.exp @@ -350,6 +350,21 @@ proc check_effective_target_avx512bw { } { } "-mavx512bw" ] } +# Return 1 if avx512ifma instructions can be compiled. +proc check_effective_target_avx512ifma { } { + return [check_no_compiler_messages avx512ifma object { + typedef long long __v8di __attribute__ ((__vector_size__ (64))); + __v8di + _mm512_madd52lo_epu64 (__v8di __X, __v8di __Y, __v8di __Z) + { + return (__v8di) __builtin_ia32_vpmadd52luq512_mask ((__v8di) __X, + (__v8di) __Y, + (__v8di) __Z, + -1); + } + } "-mavx512ifma" ] +} + # If a testcase doesn't have special options, use these. global DEFAULT_CFLAGS if ![info exists DEFAULT_CFLAGS] then { diff --git a/gcc/testsuite/gcc.target/i386/sse-12.c b/gcc/testsuite/gcc.target/i386/sse-12.c index 67bcf48fbc0..1d8fa82b20b 100644 --- a/gcc/testsuite/gcc.target/i386/sse-12.c +++ b/gcc/testsuite/gcc.target/i386/sse-12.c @@ -3,7 +3,7 @@ popcntintrin.h and mm_malloc.h are usable with -O -std=c89 -pedantic-errors. */ /* { dg-do compile } */ -/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512bw -mavx512dq -mavx512vl" } */ +/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512bw -mavx512dq -mavx512vl -mavx512ifma" } */ #include diff --git a/gcc/testsuite/gcc.target/i386/sse-13.c b/gcc/testsuite/gcc.target/i386/sse-13.c index ec8b56a4536..878c47577f7 100644 --- a/gcc/testsuite/gcc.target/i386/sse-13.c +++ b/gcc/testsuite/gcc.target/i386/sse-13.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512vl -mavx512dq -mavx512bw" } */ +/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512vl -mavx512dq -mavx512bw -mavx512ifma" } */ #include diff --git a/gcc/testsuite/gcc.target/i386/sse-14.c b/gcc/testsuite/gcc.target/i386/sse-14.c index a2a4f73cabb..4d3acb49304 100644 --- a/gcc/testsuite/gcc.target/i386/sse-14.c +++ b/gcc/testsuite/gcc.target/i386/sse-14.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl" } */ +/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma" } */ /* { dg-add-options bind_pic_locally } */ #include diff --git a/gcc/testsuite/gcc.target/i386/sse-22.c b/gcc/testsuite/gcc.target/i386/sse-22.c index 1d1ed7b9d08..7861cf54f9e 100644 --- a/gcc/testsuite/gcc.target/i386/sse-22.c +++ b/gcc/testsuite/gcc.target/i386/sse-22.c @@ -100,7 +100,7 @@ #ifndef DIFFERENT_PRAGMAS -#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,avx512vl,avx512bw,avx512dq") +#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,avx512vl,avx512bw,avx512dq,avx512ifma") #endif /* Following intrinsics require immediate arguments. They @@ -215,7 +215,7 @@ test_4 (_mm_cmpestrz, int, __m128i, int, __m128i, int, 1) /* immintrin.h (AVX/AVX2/RDRND/FSGSBASE/F16C/RTM/AVX512F/SHA) */ #ifdef DIFFERENT_PRAGMAS -#pragma GCC target ("avx,avx2,rdrnd,fsgsbase,f16c,rtm,avx512f,avx512er,avx512cd,avx512pf,sha,avx512vl,avx512bw,avx512dq") +#pragma GCC target ("avx,avx2,rdrnd,fsgsbase,f16c,rtm,avx512f,avx512er,avx512cd,avx512pf,sha,avx512vl,avx512bw,avx512dq,avx512ifma") #endif #include test_1 (_cvtss_sh, unsigned short, float, 1) diff --git a/gcc/testsuite/gcc.target/i386/sse-23.c b/gcc/testsuite/gcc.target/i386/sse-23.c index f54f98d23dd..85d403e9f71 100644 --- a/gcc/testsuite/gcc.target/i386/sse-23.c +++ b/gcc/testsuite/gcc.target/i386/sse-23.c @@ -594,7 +594,7 @@ #define __builtin_ia32_extracti64x2_256_mask(A, E, C, D) __builtin_ia32_extracti64x2_256_mask(A, 1, C, D) #define __builtin_ia32_extractf64x2_256_mask(A, E, C, D) __builtin_ia32_extractf64x2_256_mask(A, 1, C, D) -#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,fma,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,xsavec,xsaves,clflushopt,avx512bw,avx512dq,avx512vl") +#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,fma,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,xsavec,xsaves,clflushopt,avx512bw,avx512dq,avx512vl,avx512ifma") #include #include #include