From: Julia Koval Date: Wed, 20 Dec 2017 06:20:44 +0000 (+0100) Subject: Enable VPCLMULQDQ support X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=6557be99afd301b8d7f2b142b12fb47ae6cb823d;p=gcc.git Enable VPCLMULQDQ support gcc/ * common/config/i386/i386-common.c (OPTION_MASK_ISA_VPCLMULQDQ_SET, OPTION_MASK_ISA_VPCLMULQDQ_UNSET): New. (ix86_handle_option): Handle -mvpclmulqdq, move cx6 to flags2. * config.gcc: Include vpclmulqdqintrin.h. * config/i386/cpuid.h: Handle bit_VPCLMULQDQ. * config/i386/driver-i386.c (host_detect_local_cpu): Handle -mvpclmulqdq. * config/i386/i386-builtin.def (__builtin_ia32_vpclmulqdq_v2di, __builtin_ia32_vpclmulqdq_v4di, __builtin_ia32_vpclmulqdq_v8di): New. * config/i386/i386-c.c (__VPCLMULQDQ__): New. * config/i386/i386.c (isa2_opts): Add -mcx16. (isa_opts): Add -mpclmulqdq, remove -mcx16. (ix86_option_override_internal): Move mcx16 to flags2. (ix86_valid_target_attribute_inner_p): Add vpclmulqdq. (ix86_expand_builtin): Handle OPTION_MASK_ISA_VPCLMULQDQ. * config/i386/i386.h (TARGET_VPCLMULQDQ, TARGET_VPCLMULQDQ_P): New. * config/i386/i386.opt: Add mvpclmulqdq, move mcx16 to flags2. * config/i386/immintrin.h: Include vpclmulqdqintrin.h. * config/i386/sse.md (vpclmulqdq_): New pattern. * config/i386/vpclmulqdqintrin.h (_mm512_clmulepi64_epi128, _mm_clmulepi64_epi128, _mm256_clmulepi64_epi128): New intrinsics. * doc/invoke.texi: Add -mvpclmulqdq. gcc/testsuite/ * gcc.target/i386/avx-1.c: Handle new intrinsics. * gcc.target/i386/sse-13.c: Ditto. * gcc.target/i386/sse-23.c: Ditto. * gcc.target/i386/avx512-check.h: Handle bit_VPCLMULQDQ. * gcc.target/i386/avx512f-vpclmulqdq-2.c: New test. * gcc.target/i386/avx512vl-vpclmulqdq-2.c: Ditto. * gcc.target/i386/vpclmulqdq.c: Ditto. * gcc.target/i386/i386.exp (check_effective_target_vpclmulqdq): New. From-SVN: r255850 --- diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 9284a8c07fc..c90777035d0 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,28 @@ +2017-12-20 Julia Koval + + * common/config/i386/i386-common.c (OPTION_MASK_ISA_VPCLMULQDQ_SET, + OPTION_MASK_ISA_VPCLMULQDQ_UNSET): New. + (ix86_handle_option): Handle -mvpclmulqdq, move cx6 to flags2. + * config.gcc: Include vpclmulqdqintrin.h. + * config/i386/cpuid.h: Handle bit_VPCLMULQDQ. + * config/i386/driver-i386.c (host_detect_local_cpu): Handle + -mvpclmulqdq. + * config/i386/i386-builtin.def (__builtin_ia32_vpclmulqdq_v2di, + __builtin_ia32_vpclmulqdq_v4di, __builtin_ia32_vpclmulqdq_v8di): New. + * config/i386/i386-c.c (__VPCLMULQDQ__): New. + * config/i386/i386.c (isa2_opts): Add -mcx16. + (isa_opts): Add -mpclmulqdq, remove -mcx16. + (ix86_option_override_internal): Move mcx16 to flags2. + (ix86_valid_target_attribute_inner_p): Add vpclmulqdq. + (ix86_expand_builtin): Handle OPTION_MASK_ISA_VPCLMULQDQ. + * config/i386/i386.h (TARGET_VPCLMULQDQ, TARGET_VPCLMULQDQ_P): New. + * config/i386/i386.opt: Add mvpclmulqdq, move mcx16 to flags2. + * config/i386/immintrin.h: Include vpclmulqdqintrin.h. + * config/i386/sse.md (vpclmulqdq_): New pattern. + * config/i386/vpclmulqdqintrin.h (_mm512_clmulepi64_epi128, + _mm_clmulepi64_epi128, _mm256_clmulepi64_epi128): New intrinsics. + * doc/invoke.texi: Add -mvpclmulqdq. + 2017-12-20 Tom de Vries PR middle-end/83423 diff --git a/gcc/common/config/i386/i386-common.c b/gcc/common/config/i386/i386-common.c index 575a914311f..00eb01754bf 100644 --- a/gcc/common/config/i386/i386-common.c +++ b/gcc/common/config/i386/i386-common.c @@ -143,6 +143,7 @@ along with GCC; see the file COPYING3. If not see #define OPTION_MASK_ISA_IBT_SET OPTION_MASK_ISA_IBT #define OPTION_MASK_ISA_SHSTK_SET OPTION_MASK_ISA_SHSTK #define OPTION_MASK_ISA_VAES_SET OPTION_MASK_ISA_VAES +#define OPTION_MASK_ISA_VPCLMULQDQ_SET OPTION_MASK_ISA_VPCLMULQDQ /* Define a set of ISAs which aren't available when a given ISA is disabled. MMX and SSE ISAs are handled separately. */ @@ -214,6 +215,7 @@ along with GCC; see the file COPYING3. If not see #define OPTION_MASK_ISA_IBT_UNSET OPTION_MASK_ISA_IBT #define OPTION_MASK_ISA_SHSTK_UNSET OPTION_MASK_ISA_SHSTK #define OPTION_MASK_ISA_VAES_UNSET OPTION_MASK_ISA_VAES +#define OPTION_MASK_ISA_VPCLMULQDQ_UNSET OPTION_MASK_ISA_VPCLMULQDQ /* SSE4 includes both SSE4.1 and SSE4.2. -mno-sse4 should the same as -mno-sse4.1. */ @@ -554,6 +556,19 @@ ix86_handle_option (struct gcc_options *opts, } return true; + case OPT_mvpclmulqdq: + if (value) + { + opts->x_ix86_isa_flags |= OPTION_MASK_ISA_VPCLMULQDQ_SET; + opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_VPCLMULQDQ_SET; + } + else + { + opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_VPCLMULQDQ_UNSET; + opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_VPCLMULQDQ_UNSET; + } + return true; + case OPT_mavx5124fmaps: if (value) { @@ -889,13 +904,13 @@ ix86_handle_option (struct gcc_options *opts, case OPT_mcx16: if (value) { - opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CX16_SET; - opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_SET; + opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_CX16_SET; + opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA_CX16_SET; } else { - opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_CX16_UNSET; - opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_UNSET; + opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA_CX16_UNSET; + opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA_CX16_UNSET; } return true; diff --git a/gcc/config.gcc b/gcc/config.gcc index e208d00bd5b..4c2b3824f98 100644 --- a/gcc/config.gcc +++ b/gcc/config.gcc @@ -381,7 +381,8 @@ i[34567]86-*-*) clzerointrin.h pkuintrin.h sgxintrin.h cetintrin.h gfniintrin.h cet.h avx512vbmi2intrin.h avx512vbmi2vlintrin.h avx512vnniintrin.h - avx512vnnivlintrin.h gfniintrin.h vaesintrin.h" + avx512vnnivlintrin.h gfniintrin.h vaesintrin.h + vpclmulqdqintrin.h" ;; x86_64-*-*) cpu_type=i386 @@ -408,7 +409,8 @@ x86_64-*-*) clzerointrin.h pkuintrin.h sgxintrin.h cetintrin.h gfniintrin.h cet.h avx512vbmi2intrin.h avx512vbmi2vlintrin.h avx512vnniintrin.h - avx512vnnivlintrin.h gfniintrin.h vaesintrin.h" + avx512vnnivlintrin.h gfniintrin.h vaesintrin.h + vpclmulqdqintrin.h" ;; ia64-*-*) extra_headers=ia64intrin.h diff --git a/gcc/config/i386/cpuid.h b/gcc/config/i386/cpuid.h index 41369c2e348..37f3e1a96bb 100644 --- a/gcc/config/i386/cpuid.h +++ b/gcc/config/i386/cpuid.h @@ -102,6 +102,7 @@ #define bit_GFNI (1 << 8) #define bit_VAES (1 << 9) #define bit_AVX512VNNI (1 << 11) +#define bit_VPCLMULQDQ (1 << 10) #define bit_AVX512VPOPCNTDQ (1 << 14) #define bit_RDPID (1 << 22) diff --git a/gcc/config/i386/driver-i386.c b/gcc/config/i386/driver-i386.c index 013107ae722..99826fd651e 100644 --- a/gcc/config/i386/driver-i386.c +++ b/gcc/config/i386/driver-i386.c @@ -420,6 +420,7 @@ const char *host_detect_local_cpu (int argc, const char **argv) unsigned int has_gfni = 0, has_avx512vbmi2 = 0; unsigned int has_ibt = 0, has_shstk = 0; unsigned int has_avx512vnni = 0, has_vaes = 0; + unsigned int has_vpclmulqdq = 0; bool arch; @@ -513,6 +514,7 @@ const char *host_detect_local_cpu (int argc, const char **argv) has_rdpid = ecx & bit_RDPID; has_gfni = ecx & bit_GFNI; has_vaes = ecx & bit_VAES; + has_vpclmulqdq = ecx & bit_VPCLMULQDQ; has_avx5124vnniw = edx & bit_AVX5124VNNIW; has_avx5124fmaps = edx & bit_AVX5124FMAPS; @@ -1080,6 +1082,7 @@ const char *host_detect_local_cpu (int argc, const char **argv) const char *ibt = has_ibt ? " -mibt" : " -mno-ibt"; const char *shstk = has_shstk ? " -mshstk" : " -mno-shstk"; const char *vaes = has_vaes ? " -mvaes" : " -mno-vaes"; + const char *vpclmulqdq = has_vpclmulqdq ? " -mvpclmulqdq" : " -mno-vpclmulqdq"; options = concat (options, mmx, mmx3dnow, sse, sse2, sse3, ssse3, sse4a, cx16, sahf, movbe, aes, sha, pclmul, popcnt, abm, lwp, fma, fma4, xop, bmi, sgx, bmi2, @@ -1090,7 +1093,7 @@ const char *host_detect_local_cpu (int argc, const char **argv) xsavec, xsaves, avx512dq, avx512bw, avx512vl, avx512ifma, avx512vbmi, avx5124fmaps, avx5124vnniw, clwb, mwaitx, clzero, pku, rdpid, gfni, ibt, shstk, - avx512vbmi2, avx512vnni, vaes, NULL); + avx512vbmi2, avx512vnni, vaes, vpclmulqdq, NULL); } done: diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def index e3b12bdc318..7d65b0b3fc4 100644 --- a/gcc/config/i386/i386-builtin.def +++ b/gcc/config/i386/i386-builtin.def @@ -2414,6 +2414,11 @@ BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX512BW, CODE_FOR_vgf2p8mulb_v32q BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_SSE, CODE_FOR_vgf2p8mulb_v16qi, "__builtin_ia32_vgf2p8mulb_v16qi", IX86_BUILTIN_VGF2P8MULB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI) BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX512BW, CODE_FOR_vgf2p8mulb_v16qi_mask, "__builtin_ia32_vgf2p8mulb_v16qi_mask", IX86_BUILTIN_VGF2P8MULB128MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI) +/* VPCLMULQDQ */ +BDESC (OPTION_MASK_ISA_VPCLMULQDQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpclmulqdq_v2di, "__builtin_ia32_vpclmulqdq_v2di", IX86_BUILTIN_VPCLMULQDQ2, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT) +BDESC (OPTION_MASK_ISA_VPCLMULQDQ | OPTION_MASK_ISA_AVX, CODE_FOR_vpclmulqdq_v4di, "__builtin_ia32_vpclmulqdq_v4di", IX86_BUILTIN_VPCLMULQDQ4, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT) +BDESC (OPTION_MASK_ISA_VPCLMULQDQ | OPTION_MASK_ISA_AVX512F, CODE_FOR_vpclmulqdq_v8di, "__builtin_ia32_vpclmulqdq_v8di", IX86_BUILTIN_VPCLMULQDQ8, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT) + /* Builtins with rounding support. */ BDESC_END (ARGS, ROUND_ARGS) diff --git a/gcc/config/i386/i386-c.c b/gcc/config/i386/i386-c.c index 072e49bbc0f..de1b0e299b6 100644 --- a/gcc/config/i386/i386-c.c +++ b/gcc/config/i386/i386-c.c @@ -486,6 +486,8 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag, } if (isa_flag2 & OPTION_MASK_ISA_VAES) def_or_undef (parse_in, "__VAES__"); + if (isa_flag & OPTION_MASK_ISA_VPCLMULQDQ) + def_or_undef (parse_in, "__VPCLMULQDQ__"); if (TARGET_IAMCU) { def_or_undef (parse_in, "__iamcu"); diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 48d5640f84e..ef321d32c0b 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -2751,6 +2751,7 @@ ix86_target_string (HOST_WIDE_INT isa, HOST_WIDE_INT isa2, ISAs come first. Target string will be displayed in the same order. */ static struct ix86_target_opts isa2_opts[] = { + { "-mcx16", OPTION_MASK_ISA_CX16 }, { "-mmpx", OPTION_MASK_ISA_MPX }, { "-mavx512vbmi2", OPTION_MASK_ISA_AVX512VBMI2 }, { "-mavx512vnni", OPTION_MASK_ISA_AVX512VNNI }, @@ -2765,6 +2766,7 @@ ix86_target_string (HOST_WIDE_INT isa, HOST_WIDE_INT isa2, }; static struct ix86_target_opts isa_opts[] = { + { "-mvpclmulqdq", OPTION_MASK_ISA_VPCLMULQDQ }, { "-mgfni", OPTION_MASK_ISA_GFNI }, { "-mavx512vbmi", OPTION_MASK_ISA_AVX512VBMI }, { "-mavx512ifma", OPTION_MASK_ISA_AVX512IFMA }, @@ -2811,7 +2813,6 @@ ix86_target_string (HOST_WIDE_INT isa, HOST_WIDE_INT isa2, { "-mlzcnt", OPTION_MASK_ISA_LZCNT }, { "-mtbm", OPTION_MASK_ISA_TBM }, { "-mpopcnt", OPTION_MASK_ISA_POPCNT }, - { "-mcx16", OPTION_MASK_ISA_CX16 }, { "-msahf", OPTION_MASK_ISA_SAHF }, { "-mmovbe", OPTION_MASK_ISA_MOVBE }, { "-mcrc32", OPTION_MASK_ISA_CRC32 }, @@ -3998,8 +3999,8 @@ ix86_option_override_internal (bool main_args_p, && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI2)) opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI2; if (processor_alias_table[i].flags & PTA_CX16 - && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16)) - opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CX16; + && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA_CX16)) + opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_CX16; if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM) && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT)) opts->x_ix86_isa_flags |= OPTION_MASK_ISA_POPCNT; @@ -5330,6 +5331,7 @@ ix86_valid_target_attribute_inner_p (tree args, char *p_strings[], IX86_ATTR_ISA ("ibt", OPT_mibt), IX86_ATTR_ISA ("shstk", OPT_mshstk), IX86_ATTR_ISA ("vaes", OPT_mvaes), + IX86_ATTR_ISA ("vpclmulqdq", OPT_mvpclmulqdq), /* enum options */ IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_), @@ -35376,10 +35378,12 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget, at all, -m64 is a whole TU option. */ if (((ix86_builtins_isa[fcode].isa & ~(OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_MMX - | OPTION_MASK_ISA_64BIT | OPTION_MASK_ISA_GFNI)) + | OPTION_MASK_ISA_64BIT | OPTION_MASK_ISA_GFNI + | OPTION_MASK_ISA_VPCLMULQDQ)) && !(ix86_builtins_isa[fcode].isa & ~(OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_MMX - | OPTION_MASK_ISA_64BIT | OPTION_MASK_ISA_GFNI) + | OPTION_MASK_ISA_64BIT | OPTION_MASK_ISA_GFNI + | OPTION_MASK_ISA_VPCLMULQDQ) & ix86_isa_flags)) || ((ix86_builtins_isa[fcode].isa & OPTION_MASK_ISA_AVX512VL) && !(ix86_isa_flags & OPTION_MASK_ISA_AVX512VL)) diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index 01fd6ce52bb..7da8573bc40 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -111,6 +111,8 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see #define TARGET_GFNI_P(x) TARGET_ISA_GFNI_P(x) #define TARGET_VAES TARGET_ISA_VAES #define TARGET_VAES_P(x) TARGET_ISA_VAES_P(x) +#define TARGET_VPCLMULQDQ TARGET_ISA_VPCLMULQDQ +#define TARGET_VPCLMULQDQ_P(x) TARGET_ISA_VPCLMULQDQ_P(x) #define TARGET_BMI TARGET_ISA_BMI #define TARGET_BMI_P(x) TARGET_ISA_BMI_P(x) #define TARGET_BMI2 TARGET_ISA_BMI2 diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt index 04e391d311d..0e58d3862f5 100644 --- a/gcc/config/i386/i386.opt +++ b/gcc/config/i386/i386.opt @@ -789,6 +789,10 @@ mvaes Target Report Mask(ISA_VAES) Var(ix86_isa_flags2) Save Support VAES built-in functions and code generation. +mvpclmulqdq +Target Report Mask(ISA_VPCLMULQDQ) Var(ix86_isa_flags) Save +Support VPCLMULQDQ built-in functions and code generation. + mbmi Target Report Mask(ISA_BMI) Var(ix86_isa_flags) Save Support BMI built-in functions and code generation. @@ -854,7 +858,7 @@ Target Report Mask(ISA_TBM) Var(ix86_isa_flags) Save Support TBM built-in functions and code generation. mcx16 -Target Report Mask(ISA_CX16) Var(ix86_isa_flags) Save +Target Report Mask(ISA_CX16) Var(ix86_isa_flags2) Save Support code generation of cmpxchg16b instruction. msahf diff --git a/gcc/config/i386/immintrin.h b/gcc/config/i386/immintrin.h index a6e27dd43a3..7fcaa695b0a 100644 --- a/gcc/config/i386/immintrin.h +++ b/gcc/config/i386/immintrin.h @@ -104,6 +104,8 @@ #include +#include + #ifndef __RDRND__ #pragma GCC push_options #pragma GCC target("rdrnd") diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index c1469f457f5..20e7b160ac9 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -178,6 +178,9 @@ UNSPEC_VAESDECLAST UNSPEC_VAESENC UNSPEC_VAESENCLAST + + ;; For VPCLMULQDQ support + UNSPEC_VPCLMULQDQ ]) (define_c_enum "unspecv" [ @@ -340,6 +343,9 @@ (define_mode_iterator VI8 [(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI]) +(define_mode_iterator VI8_FVL + [(V8DI "TARGET_AVX512F") V4DI (V2DI "TARGET_AVX512VL")]) + (define_mode_iterator VI8_AVX512VL [V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")]) @@ -20498,3 +20504,13 @@ "TARGET_VAES" "vaesenclast\t{%2, %1, %0|%0, %1, %2}" ) + +(define_insn "vpclmulqdq_" + [(set (match_operand:VI8_FVL 0 "register_operand" "=v") + (unspec:VI8_FVL [(match_operand:VI8_FVL 1 "register_operand" "v") + (match_operand:VI8_FVL 2 "vector_operand" "vm") + (match_operand:SI 3 "const_0_to_255_operand" "n")] + UNSPEC_VPCLMULQDQ))] + "TARGET_VPCLMULQDQ" + "vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}" + [(set_attr "mode" "DI")]) diff --git a/gcc/config/i386/vpclmulqdqintrin.h b/gcc/config/i386/vpclmulqdqintrin.h new file mode 100644 index 00000000000..483e1602c00 --- /dev/null +++ b/gcc/config/i386/vpclmulqdqintrin.h @@ -0,0 +1,108 @@ +/* Copyright (C) 2014-2017 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +#ifndef _IMMINTRIN_H_INCLUDED +#error "Never use directly; include instead." +#endif + +#ifndef _VPCLMULQDQINTRIN_H_INCLUDED +#define _VPCLMULQDQINTRIN_H_INCLUDED + +#if !defined(__VPCLMULQDQ__) || !defined(__AVX512F__) +#pragma GCC push_options +#pragma GCC target("vpclmulqdq,avx512f") +#define __DISABLE_VPCLMULQDQF__ +#endif /* __VPCLMULQDQF__ */ + +#ifdef __OPTIMIZE__ +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_clmulepi64_epi128 (__m512i __A, __m512i __B, const int __C) +{ + return (__m512i) __builtin_ia32_vpclmulqdq_v8di ((__v8di)__A, + (__v8di) __B, __C); +} +#else +#define _mm512_clmulepi64_epi128(A, B, C) \ + ((__m512i) __builtin_ia32_vpclmulqdq_v8di ((__v8di)(__m512i)(A), \ + (__v8di)(__m512i)(B), (int)(C))) +#endif + +#ifdef __DISABLE_VPCLMULQDQF__ +#undef __DISABLE_VPCLMULQDQF__ +#pragma GCC pop_options +#endif /* __DISABLE_VPCLMULQDQF__ */ + +#if !defined(__VPCLMULQDQ__) || !defined(__AVX512VL__) +#pragma GCC push_options +#pragma GCC target("vpclmulqdq,avx512vl") +#define __DISABLE_VPCLMULQDQVL__ +#endif /* __VPCLMULQDQVL__ */ + +#ifdef __OPTIMIZE__ +extern __inline __m128i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_clmulepi64_epi128 (__m128i __A, __m128i __B, const int __C) +{ + return (__m128i) __builtin_ia32_vpclmulqdq_v2di ((__v2di)__A, + (__v2di) __B, __C); +} +#else +#define _mm_clmulepi64_epi128(A, B, C) \ + ((__m128i) __builtin_ia32_vpclmulqdq_v2di ((__v2di)(__m128i)(A), \ + (__v2di)(__m128i)(B), (int)(C))) +#endif + +#ifdef __DISABLE_VPCLMULQDQVL__ +#undef __DISABLE_VPCLMULQDQVL__ +#pragma GCC pop_options +#endif /* __DISABLE_VPCLMULQDQVL__ */ + +#if !defined(__VPCLMULQDQ__) || !defined(__AVX512VL__) +#pragma GCC push_options +#pragma GCC target("vpclmulqdq,avx512vl") +#define __DISABLE_VPCLMULQDQ__ +#endif /* __VPCLMULQDQ__ */ + +#ifdef __OPTIMIZE__ +extern __inline __m256i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_clmulepi64_epi128 (__m256i __A, __m256i __B, const int __C) +{ + return (__m256i) __builtin_ia32_vpclmulqdq_v4di ((__v4di)__A, + (__v4di) __B, __C); +} +#else +#define _mm256_clmulepi64_epi128(A, B, C) \ + ((__m256i) __builtin_ia32_vpclmulqdq_v4di ((__v4di)(__m256i)(A), \ + (__v4di)(__m256i)(B), (int)(C))) +#endif + +#ifdef __DISABLE_VPCLMULQDQ__ +#undef __DISABLE_VPCLMULQDQ__ +#pragma GCC pop_options +#endif /* __DISABLE_VPCLMULQDQ__ */ + + +#endif /* _VPCLMULQDQINTRIN_H_INCLUDED */ + diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 2049c2789db..cde0c73f9e0 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -1208,6 +1208,7 @@ See RS/6000 and PowerPC Options. -mlzcnt -mbmi2 -mfxsr -mxsave -mxsaveopt -mrtm -mlwp -mmpx @gol -mmwaitx -mclzero -mpku -mthreads -mgfni -mvaes @gol -mcet -mibt -mshstk -mforce-indirect-call -mavx512vbmi2 @gol +-mvpclmulqdq @gol -mms-bitfields -mno-align-stringops -minline-all-stringops @gol -minline-stringops-dynamically -mstringop-strategy=@var{alg} @gol -mmemcpy-strategy=@var{strategy} -mmemset-strategy=@var{strategy} @gol @@ -26160,13 +26161,17 @@ preferred alignment to @option{-mpreferred-stack-boundary=2}. @need 200 @itemx -mvaes @opindex mvaes +@need 200 +@itemx -mvpclmulqdq +@opindex mvpclmulqdq These switches enable the use of instructions in the MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, AVX, AVX2, AVX512F, AVX512PF, AVX512ER, AVX512CD, SHA, AES, PCLMUL, FSGSBASE, RDRND, F16C, FMA, SSE4A, FMA4, XOP, LWP, ABM, AVX512VL, AVX512BW, AVX512DQ, AVX512IFMA, AVX512VBMI, BMI, BMI2, VAES, FXSR, XSAVE, XSAVEOPT, LZCNT, RTM, MPX, MWAITX, PKU, IBT, SHSTK, AVX512VBMI2, -GFNI, 3DNow!@: or enhanced 3DNow!@: extended instruction sets. Each has a -corresponding @option{-mno-} option to disable use of these instructions. +GFNI, VPCLMULQDQ, 3DNow!@: or enhanced 3DNow!@: extended instruction sets. +Each has a corresponding @option{-mno-} option to disable use of these +instructions. These extensions are also available as built-in functions: see @ref{x86 Built-in Functions}, for details of the functions enabled and diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index cb9e01223d4..aaa8c0551fe 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,14 @@ +2017-12-20 Julia Koval + + * gcc.target/i386/avx-1.c: Handle new intrinsics. + * gcc.target/i386/sse-13.c: Ditto. + * gcc.target/i386/sse-23.c: Ditto. + * gcc.target/i386/avx512-check.h: Handle bit_VPCLMULQDQ. + * gcc.target/i386/avx512f-vpclmulqdq-2.c: New test. + * gcc.target/i386/avx512vl-vpclmulqdq-2.c: Ditto. + * gcc.target/i386/vpclmulqdq.c: Ditto. + * gcc.target/i386/i386.exp (check_effective_target_vpclmulqdq): New. + 2017-12-19 Martin Sebor PR c++/83394 diff --git a/gcc/testsuite/gcc.target/i386/avx-1.c b/gcc/testsuite/gcc.target/i386/avx-1.c index bbb4ae2a24a..db772449c42 100644 --- a/gcc/testsuite/gcc.target/i386/avx-1.c +++ b/gcc/testsuite/gcc.target/i386/avx-1.c @@ -655,6 +655,11 @@ #define __builtin_ia32_vpshld_v2di(A, B, C) __builtin_ia32_vpshld_v2di(A, B, 1) #define __builtin_ia32_vpshld_v2di_mask(A, B, C, D, E) __builtin_ia32_vpshld_v2di_mask(A, B, 1, D, E) +/* vpclmulqdqintrin.h */ +#define __builtin_ia32_vpclmulqdq_v4di(A, B, C) __builtin_ia32_vpclmulqdq_v4di(A, B, 1) +#define __builtin_ia32_vpclmulqdq_v2di(A, B, C) __builtin_ia32_vpclmulqdq_v2di(A, B, 1) +#define __builtin_ia32_vpclmulqdq_v8di(A, B, C) __builtin_ia32_vpclmulqdq_v8di(A, B, 1) + #include #include #include diff --git a/gcc/testsuite/gcc.target/i386/avx512-check.h b/gcc/testsuite/gcc.target/i386/avx512-check.h index 8ea8751990e..2d174f9df5d 100644 --- a/gcc/testsuite/gcc.target/i386/avx512-check.h +++ b/gcc/testsuite/gcc.target/i386/avx512-check.h @@ -86,6 +86,9 @@ main () #endif #ifdef VAES && (ecx & bit_VAES) +#endif +#ifdef VPCLMULQDQ + && (ecx & bit_VPCLMULQDQ) #endif && avx512f_os_support ()) { diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vpclmulqdq-2.c b/gcc/testsuite/gcc.target/i386/avx512f-vpclmulqdq-2.c new file mode 100644 index 00000000000..fe746a1095c --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512f-vpclmulqdq-2.c @@ -0,0 +1,60 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx512f -mvpclmulqdq" } */ +/* { dg-require-effective-target avx512f } */ +/* { dg-require-effective-target vpclmulqdq } */ + +#define AVX512F + +#define VPCLMULQDQ +#include "avx512f-helper.h" + +#define SIZE (AVX512F_LEN / 64) + +#include "avx512f-mask-type.h" + +static void +CALC (unsigned long long *r, unsigned long long *s1, unsigned long long *s2, unsigned char imm) +{ + for (int len = 0; len < SIZE/2; len++) + { + unsigned long long src1, src2; + src1 = (imm & 1) ? s1[len*2 + 1] : s1[len*2]; + src2 = ((imm >> 4) & 1) ? s2[len*2 + 1] : s2[len*2]; + for (int i = 0; i < 64; i++) + { + if ((src1 >> i) & 1) + { + if (i) + r[len*2 + 1] ^= src2 >> (64 - i); + r[len*2] ^= src2 << i; + } + } + } +} + +void +TEST (void) +{ + int i; + UNION_TYPE (AVX512F_LEN, i_q) res, src1, src2; + unsigned long long res_ref[SIZE]; + unsigned char imm = 1; + + for (i = 0; i < SIZE; i++) + { + src1.a[i] = 0xFFFFFFFFF + i; + src2.a[i] = 0xFFFFFFFFF + i*i; + } + + for (i = 0; i < SIZE; i++) + { + res.a[i] = 0; + res_ref[i] = 0; + } + + CALC (res_ref, src1.a, src2.a, imm); + res.x = INTRINSIC (_clmulepi64_epi128) (src1.x, src2.x, imm); + + if (UNION_CHECK (AVX512F_LEN, i_q) (res, res_ref)) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vpclmulqdq-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vpclmulqdq-2.c new file mode 100644 index 00000000000..61288a00307 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512vl-vpclmulqdq-2.c @@ -0,0 +1,17 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavx512bw -mavx512vl -mvpclmulqdq" } */ +/* { dg-require-effective-target avx512vl } */ +/* { dg-require-effective-target avx512bw } */ +/* { dg-require-effective-target vpclmulqdq } */ + +#define AVX512VL +#define AVX512F_LEN 256 +#define AVX512F_LEN_HALF 128 +#include "avx512f-vpclmulqdq-2.c" + +#undef AVX512F_LEN +#undef AVX512F_LEN_HALF + +#define AVX512F_LEN 128 +#define AVX512F_LEN_HALF 128 +#include "avx512f-vpclmulqdq-2.c" diff --git a/gcc/testsuite/gcc.target/i386/i386.exp b/gcc/testsuite/gcc.target/i386/i386.exp index bebc6dd7935..0b53023945c 100644 --- a/gcc/testsuite/gcc.target/i386/i386.exp +++ b/gcc/testsuite/gcc.target/i386/i386.exp @@ -470,6 +470,19 @@ proc check_effective_target_avx512vaes { } { } "-mvaes" ] } +# Return 1 if vpclmulqdq instructions can be compiled. +proc check_effective_target_vpclmulqdq { } { + return [check_no_compiler_messages vpclmulqdq object { + typedef long long __v4di __attribute__ ((__vector_size__ (32))); + + __v4di + _mm256_clmulepi64_epi128 (__v4di __A, __v4di __B) + { + return (__v4di) __builtin_ia32_vpclmulqdq_v4di (__A, __B, 0); + } + } "-mvpclmulqdq -mavx512vl" ] +} + # If a testcase doesn't have special options, use these. global DEFAULT_CFLAGS if ![info exists DEFAULT_CFLAGS] then { diff --git a/gcc/testsuite/gcc.target/i386/sse-13.c b/gcc/testsuite/gcc.target/i386/sse-13.c index 89feeca7d3f..62f87f00b07 100644 --- a/gcc/testsuite/gcc.target/i386/sse-13.c +++ b/gcc/testsuite/gcc.target/i386/sse-13.c @@ -672,4 +672,9 @@ #define __builtin_ia32_vpshld_v2di(A, B, C) __builtin_ia32_vpshld_v2di(A, B, 1) #define __builtin_ia32_vpshld_v2di_mask(A, B, C, D, E) __builtin_ia32_vpshld_v2di_mask(A, B, 1, D, E) +/* vpclmulqdqintrin.h */ +#define __builtin_ia32_vpclmulqdq_v4di(A, B, C) __builtin_ia32_vpclmulqdq_v4di(A, B, 1) +#define __builtin_ia32_vpclmulqdq_v2di(A, B, C) __builtin_ia32_vpclmulqdq_v2di(A, B, 1) +#define __builtin_ia32_vpclmulqdq_v8di(A, B, C) __builtin_ia32_vpclmulqdq_v8di(A, B, 1) + #include diff --git a/gcc/testsuite/gcc.target/i386/sse-23.c b/gcc/testsuite/gcc.target/i386/sse-23.c index c1ae48b87d9..65f6ccffe25 100644 --- a/gcc/testsuite/gcc.target/i386/sse-23.c +++ b/gcc/testsuite/gcc.target/i386/sse-23.c @@ -633,7 +633,6 @@ #define __builtin_ia32_vgf2p8affineqb_v32qi_mask(A, B, C, D, E) __builtin_ia32_vgf2p8affineqb_v32qi_mask(A, B, 1, D, E) #define __builtin_ia32_vgf2p8affineqb_v64qi_mask(A, B, C, D, E) __builtin_ia32_vgf2p8affineqb_v64qi_mask(A, B, 1, D, E) - /* avx512vbmi2intrin.h */ #define __builtin_ia32_vpshrd_v32hi(A, B, C) __builtin_ia32_vpshrd_v32hi(A, B, 1) #define __builtin_ia32_vpshrd_v32hi_mask(A, B, C, D, E) __builtin_ia32_vpshrd_v32hi_mask(A, B, 1, D, E) @@ -672,6 +671,11 @@ #define __builtin_ia32_vpshld_v2di(A, B, C) __builtin_ia32_vpshld_v2di(A, B, 1) #define __builtin_ia32_vpshld_v2di_mask(A, B, C, D, E) __builtin_ia32_vpshld_v2di_mask(A, B, 1, D, E) -#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,fma,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,xsavec,xsaves,clflushopt,avx512bw,avx512dq,avx512vl,avx512vbmi,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,clwb,mwaitx,clzero,pku,sgx,rdpid,gfni,avx512vbmi2") +/* vpclmulqdqintrin.h */ +#define __builtin_ia32_vpclmulqdq_v4di(A, B, C) __builtin_ia32_vpclmulqdq_v4di(A, B, 1) +#define __builtin_ia32_vpclmulqdq_v2di(A, B, C) __builtin_ia32_vpclmulqdq_v2di(A, B, 1) +#define __builtin_ia32_vpclmulqdq_v8di(A, B, C) __builtin_ia32_vpclmulqdq_v8di(A, B, 1) + +#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,fma,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,xsavec,xsaves,clflushopt,avx512bw,avx512dq,avx512vl,avx512vbmi,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,clwb,mwaitx,clzero,pku,sgx,rdpid,gfni,avx512vbmi2,vpclmulqdq") #include diff --git a/gcc/testsuite/gcc.target/i386/vpclmulqdq.c b/gcc/testsuite/gcc.target/i386/vpclmulqdq.c new file mode 100644 index 00000000000..0ce1a061298 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/vpclmulqdq.c @@ -0,0 +1,20 @@ +/* { dg-do compile } */ +/* { dg-options "-mvpclmulqdq -mavx512vl -mavx512f -O2" } */ +/* { dg-final { scan-assembler-times "vpclmulqdq\[ \\t\]+\[^\{\n\]*\\\$3\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vpclmulqdq\[ \\t\]+\[^\{\n\]*\\\$3\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ +/* { dg-final { scan-assembler-times "vpclmulqdq\[ \\t\]+\[^\{\n\]*\\\$3\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */ + +#include + +volatile __m512i x1, x2; +volatile __m256i x3, x4; +volatile __m128i x5, x6; + +void extern +avx512vl_test (void) +{ + x1 = _mm512_clmulepi64_epi128(x1, x2, 3); + x3 = _mm256_clmulepi64_epi128(x3, x4, 3); + x5 = _mm_clmulepi64_epi128(x5, x6, 3); +} +