From: Florian Weimer Date: Thu, 1 Oct 2020 08:08:24 +0000 (+0200) Subject: PR target/97250: i386: Add support for x86-64-v2, x86-64-v3, x86-64-v4 levels for... X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=324bec558e95584e8c1997575ae9d75978af59f1;p=gcc.git PR target/97250: i386: Add support for x86-64-v2, x86-64-v3, x86-64-v4 levels for x86-64 These micro-architecture levels are defined in the x86-64 psABI: https://gitlab.com/x86-psABIs/x86-64-ABI/-/commit/77566eb03bc6a326811cb7e9 PTA_NO_TUNE is introduced so that the new processor alias table entries do not affect the CPU tuning setting in ix86_tune. The tests depend on the macros added in commit 92e652d8c21bd7e66cbb0f900 ("i386: Define __LAHF_SAHF__ and __MOVBE__ macros, based on ISA flags"). gcc/: PR target/97250 * config/i386/i386.h (PTA_NO_TUNE, PTA_X86_64_BASELINE) (PTA_X86_64_V2, PTA_X86_64_V3, PTA_X86_64_V4): New. * common/config/i386/i386-common.c (processor_alias_table): Add "x86-64-v2", "x86-64-v3", "x86-64-v4". * config/i386/i386-options.c (ix86_option_override_internal): Handle new PTA_NO_TUNE processor table entries. * doc/invoke.texi (x86 Options): Document new -march values. gcc/testsuite/: PR target/97250 * gcc.target/i386/x86-64-v2.c: New test. * gcc.target/i386/x86-64-v3.c: New test. * gcc.target/i386/x86-64-v3-haswell.c: New test. * gcc.target/i386/x86-64-v3-skylake.c: New test. * gcc.target/i386/x86-64-v4.c: New test. --- diff --git a/gcc/common/config/i386/i386-common.c b/gcc/common/config/i386/i386-common.c index 10142149115..62a620b4430 100644 --- a/gcc/common/config/i386/i386-common.c +++ b/gcc/common/config/i386/i386-common.c @@ -1795,9 +1795,13 @@ const pta processor_alias_table[] = PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_FXSR, 0, P_NONE}, {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON, PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_FXSR, 0, P_NONE}, - {"x86-64", PROCESSOR_K8, CPU_K8, - PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR, - 0, P_NONE}, + {"x86-64", PROCESSOR_K8, CPU_K8, PTA_X86_64_BASELINE, 0, P_NONE}, + {"x86-64-v2", PROCESSOR_K8, CPU_GENERIC, PTA_X86_64_V2 | PTA_NO_TUNE, + 0, P_NONE}, + {"x86-64-v3", PROCESSOR_K8, CPU_GENERIC, PTA_X86_64_V3 | PTA_NO_TUNE, + 0, P_NONE}, + {"x86-64-v4", PROCESSOR_K8, CPU_GENERIC, PTA_X86_64_V4 | PTA_NO_TUNE, + 0, P_NONE}, {"eden-x2", PROCESSOR_K8, CPU_K8, PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_FXSR, 0, P_NONE}, diff --git a/gcc/config/i386/i386-options.c b/gcc/config/i386/i386-options.c index 597de533fbd..a59bd703880 100644 --- a/gcc/config/i386/i386-options.c +++ b/gcc/config/i386/i386-options.c @@ -2058,10 +2058,27 @@ ix86_option_override_internal (bool main_args_p, return false; } + /* The feature-only micro-architecture levels that use + PTA_NO_TUNE are only defined for the x86-64 psABI. */ + if ((processor_alias_table[i].flags & PTA_NO_TUNE) != 0 + && (!TARGET_64BIT_P (opts->x_ix86_isa_flags) + || opts->x_ix86_abi != SYSV_ABI)) + { + error (G_("%<%s%> architecture level is only defined" + " for the x86-64 psABI"), opts->x_ix86_arch_string); + return false; + } + ix86_schedule = processor_alias_table[i].schedule; ix86_arch = processor_alias_table[i].processor; - /* Default cpu tuning to the architecture. */ - ix86_tune = ix86_arch; + + /* Default cpu tuning to the architecture, unless the table + entry requests not to do this. Used by the x86-64 psABI + micro-architecture levels. */ + if ((processor_alias_table[i].flags & PTA_NO_TUNE) == 0) + ix86_tune = ix86_arch; + else + ix86_tune = PROCESSOR_GENERIC; if (((processor_alias_table[i].flags & PTA_MMX) != 0) && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX)) @@ -2384,7 +2401,8 @@ ix86_option_override_internal (bool main_args_p, ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask); for (i = 0; i < pta_size; i++) - if (! strcmp (opts->x_ix86_tune_string, processor_alias_table[i].name)) + if (! strcmp (opts->x_ix86_tune_string, processor_alias_table[i].name) + && (processor_alias_table[i].flags & PTA_NO_TUNE) == 0) { ix86_schedule = processor_alias_table[i].schedule; ix86_tune = processor_alias_table[i].processor; @@ -2428,8 +2446,9 @@ ix86_option_override_internal (bool main_args_p, auto_vec candidates; for (i = 0; i < pta_size; i++) - if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) - || ((processor_alias_table[i].flags & PTA_64BIT) != 0)) + if ((!TARGET_64BIT_P (opts->x_ix86_isa_flags) + || ((processor_alias_table[i].flags & PTA_64BIT) != 0)) + && (processor_alias_table[i].flags & PTA_NO_TUNE) == 0) candidates.safe_push (processor_alias_table[i].name); #ifdef HAVE_LOCAL_CPU_DETECT diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index a449653cc3e..9a5de6a0e9c 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -2433,7 +2433,7 @@ const wide_int_bitmask PTA_AVX512F (HOST_WIDE_INT_1U << 40); const wide_int_bitmask PTA_AVX512ER (HOST_WIDE_INT_1U << 41); const wide_int_bitmask PTA_AVX512PF (HOST_WIDE_INT_1U << 42); const wide_int_bitmask PTA_AVX512CD (HOST_WIDE_INT_1U << 43); -/* Hole after PTA_MPX was removed. */ +const wide_int_bitmask PTA_NO_TUNE (HOST_WIDE_INT_1U << 44); const wide_int_bitmask PTA_SHA (HOST_WIDE_INT_1U << 45); const wide_int_bitmask PTA_PREFETCHWT1 (HOST_WIDE_INT_1U << 46); const wide_int_bitmask PTA_CLFLUSHOPT (HOST_WIDE_INT_1U << 47); @@ -2476,6 +2476,15 @@ const wide_int_bitmask PTA_AMX_TILE(0, HOST_WIDE_INT_1U << 19); const wide_int_bitmask PTA_AMX_INT8(0, HOST_WIDE_INT_1U << 20); const wide_int_bitmask PTA_AMX_BF16(0, HOST_WIDE_INT_1U << 21); +const wide_int_bitmask PTA_X86_64_BASELINE = PTA_64BIT | PTA_MMX | PTA_SSE + | PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR; +const wide_int_bitmask PTA_X86_64_V2 = (PTA_X86_64_BASELINE & (~PTA_NO_SAHF)) + | PTA_CX16 | PTA_POPCNT | PTA_SSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_SSSE3; +const wide_int_bitmask PTA_X86_64_V3 = PTA_X86_64_V2 + | PTA_AVX | PTA_AVX2 | PTA_BMI | PTA_BMI2 | PTA_F16C | PTA_FMA | PTA_LZCNT + | PTA_MOVBE | PTA_XSAVE; +const wide_int_bitmask PTA_X86_64_V4 = PTA_X86_64_V3 + | PTA_AVX512F | PTA_AVX512BW | PTA_AVX512CD | PTA_AVX512DQ | PTA_AVX512VL; const wide_int_bitmask PTA_CORE2 = PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3 | PTA_CX16 | PTA_FXSR; const wide_int_bitmask PTA_NEHALEM = PTA_CORE2 | PTA_SSE4_1 | PTA_SSE4_2 diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index c0499324d5f..a5ecb1b4937 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -29244,7 +29244,7 @@ Generate instructions for the machine type @var{cpu-type}. In contrast to for the specified @var{cpu-type}, @option{-march=@var{cpu-type}} allows GCC to generate code that may not run at all on processors other than the one indicated. Specifying @option{-march=@var{cpu-type}} implies -@option{-mtune=@var{cpu-type}}. +@option{-mtune=@var{cpu-type}}, except where noted otherwise. The choices for @var{cpu-type} are: @@ -29260,6 +29260,19 @@ of the selected instruction set. @item x86-64 A generic CPU with 64-bit extensions. +@item x86-64-v2 +@itemx x86-64-v3 +@itemx x86-64-v4 +These choices for @var{cpu-type} select the corresponding +micro-architecture level from the x86-64 psABI. They are only available +when compiling for an x86-64 target that uses the System V psABI@. + +Since these @var{cpu-type} values do not have a corresponding +@option{-mtune} setting, using @option{-march} with these values enables +generic tuning. Specific tuning can be enabled using the +@option{-mtune=@var{other-cpu-type}} option with an appropriate +@var{other-cpu-type} value. + @item i386 Original Intel i386 CPU@. diff --git a/gcc/testsuite/gcc.target/i386/x86-64-v2.c b/gcc/testsuite/gcc.target/i386/x86-64-v2.c new file mode 100644 index 00000000000..0f3df3605b5 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/x86-64-v2.c @@ -0,0 +1,116 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-mabi=sysv -march=x86-64-v2" } */ + +/* Verify that the CPU features required by x86-64-v2 are enabled. */ + +#ifndef __MMX__ +# error __MMX__ not defined +#endif +#ifndef __SSE__ +# error __SSE__ not defined +#endif +#ifndef __SSE2__ +# error __SSE2__ not defined +#endif +#ifndef __LAHF_SAHF__ +# error __LAHF_SAHF__ not defined +#endif +#ifndef __POPCNT__ +# error __POPCNT__ not defined +#endif +#ifndef __SSE3__ +# error __SSE3__ not defined +#endif +#ifndef __SSE4_1__ +# error __SSE4_1__ not defined +#endif +#ifndef __SSE4_2__ +# error __SSE4_2__ not defined +#endif +#ifndef __SSSE3__ +# error __SSSE3__ not defined +#endif +#ifdef __SSE4A__ +# error __SSE4A__ defined +#endif +#ifdef __AVX__ +# error __AVX__ defined +#endif +#ifdef __AVX2__ +# error __AVX2__ defined +#endif +#ifdef __F16C__ +# error __F16C__ defined +#endif +#ifdef __FMA__ +# error __FMA__ defined +#endif +#ifdef __LZCNT__ +# error __LZCNT__ defined +#endif +#ifdef __MOVBE__ +# error __MOVBE__ defined +#endif +#ifdef __XSAVE__ +# error __XSAVE__ defined +#endif +#ifdef __XSAVEC__ +# error __XSAVEC__ defined +#endif +#ifdef __AVX512F__ +# error __AVX512F__ defined +#endif +#ifdef __AVX512BW__ +# error __AVX512BW__ defined +#endif +#ifdef __AVX512CD__ +# error __AVX512CD__ defined +#endif +#ifdef __AVX512DQ__ +# error __AVX512DQ__ defined +#endif +#ifdef __AVX512VL__ +# error __AVX512VL__ defined +#endif +#ifdef __AVX512PF__ +# error __AVX512PF__ defined +#endif +#ifdef __AVX512VBMI__ +# error __AVX512VBMI__ defined +#endif +#ifdef __AVX512IFMA__ +# error __AVX512IFMA__ defined +#endif +#ifdef __AVX512VNNIW__ +# error __AVX512VNNIW__ defined +#endif +#ifdef __AVX512VBMI2__ +# error __AVX512VBMI2__ defined +#endif +#ifdef __AVX5124FMAPS__ +# error __AVX5124FMAPS__ defined +#endif +#ifdef __AVX5124BITALG__ +# error __AVX5124BITALG__ defined +#endif +#ifdef __AVX5124VPOPCNTDQ__ +# error __AVX5124VPOPCNTDQ__ defined +#endif +#ifdef __AVX5124BF16__ +# error __AVX5124BF16__ defined +#endif +#ifdef __AVX512VP2INTERSECT__ +# error __AVX512VP2INTERSECT__ defined +#endif +#ifdef __AVX512VNNI__ +# error __AVX512VNNI__ defined +#endif +#ifdef __FMA4__ +# error __FMA4__ defined +#endif +#ifdef __3dNOW__ +# error __3dNOW__ defined +#endif +#ifdef __tune_k8__ +# error __tune_k8__ defined +#endif diff --git a/gcc/testsuite/gcc.target/i386/x86-64-v3-haswell.c b/gcc/testsuite/gcc.target/i386/x86-64-v3-haswell.c new file mode 100644 index 00000000000..216467e1376 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/x86-64-v3-haswell.c @@ -0,0 +1,18 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-mabi=sysv -mtune=haswell -march=x86-64-v3" } */ + +/* Check that -march=x86-64-v3 preserves tuning. */ + +/* PCLMUL is not in x86-64-v3, but in -march=haswell. Make sure that + it is absent. */ +#ifdef __PCLMUL__ +# error __PCLMUL__ is defined +#endif + +/* -mtune=haswell tuning is preserved. */ +#ifndef __k8__ +# error __k8__ is not defined +#endif +#ifndef __tune_haswell__ +# error __tune_haswell__ is not defined +#endif diff --git a/gcc/testsuite/gcc.target/i386/x86-64-v3-skylake.c b/gcc/testsuite/gcc.target/i386/x86-64-v3-skylake.c new file mode 100644 index 00000000000..aa34862cc59 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/x86-64-v3-skylake.c @@ -0,0 +1,21 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-mabi=sysv -march=skylake -march=x86-64-v3" } */ + +/* Check that -march=x86-64-v3 overrides a previous -march= setting. */ + +/* PCLMUL is not in x86-64-v3, but in -march=skylake. Make sure that + it is absent. */ +#ifdef __PCLMUL__ +# error __PCLMUL__ is defined +#endif + +/* -march=skylake tuning is deactivated. */ +#ifndef __k8__ +# error __k8__ is not defined +#endif +#ifdef __skylake__ +# error __skylake__ is defined +#endif +#ifdef __tune_skylake__ +# error __tune_skylake__ is defined +#endif diff --git a/gcc/testsuite/gcc.target/i386/x86-64-v3.c b/gcc/testsuite/gcc.target/i386/x86-64-v3.c new file mode 100644 index 00000000000..16a94b18021 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/x86-64-v3.c @@ -0,0 +1,116 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-mabi=sysv -march=x86-64-v3" } */ + +/* Verify that the CPU features required by x86-64-v4 are enabled. */ + +#ifndef __MMX__ +# error __MMX__ not defined +#endif +#ifndef __SSE__ +# error __SSE__ not defined +#endif +#ifndef __SSE2__ +# error __SSE2__ not defined +#endif +#ifndef __LAHF_SAHF__ +# error __LAHF_SAHF__ not defined +#endif +#ifndef __POPCNT__ +# error __POPCNT__ not defined +#endif +#ifndef __SSE3__ +# error __SSE3__ not defined +#endif +#ifndef __SSE4_1__ +# error __SSE4_1__ not defined +#endif +#ifndef __SSE4_2__ +# error __SSE4_2__ not defined +#endif +#ifndef __SSSE3__ +# error __SSSE3__ not defined +#endif +#ifdef __SSE4A__ +# error __SSE4A__ defined +#endif +#ifndef __AVX__ +# error __AVX__ not defined +#endif +#ifndef __AVX2__ +# error __AVX2__ not defined +#endif +#ifndef __F16C__ +# error __F16C__ not defined +#endif +#ifndef __FMA__ +# error __FMA__ not defined +#endif +#ifndef __LZCNT__ +# error __LZCNT__ not defined +#endif +#ifndef __MOVBE__ +# error __MOVBE__ not defined +#endif +#ifndef __XSAVE__ +# error __XSAVE__ not defined +#endif +#ifdef __XSAVEC__ +# error __XSAVEC__ defined +#endif +#ifdef __AVX512F__ +# error __AVX512F__ defined +#endif +#ifdef __AVX512BW__ +# error __AVX512BW__ defined +#endif +#ifdef __AVX512CD__ +# error __AVX512CD__ defined +#endif +#ifdef __AVX512DQ__ +# error __AVX512DQ__ defined +#endif +#ifdef __AVX512VL__ +# error __AVX512VL__ defined +#endif +#ifdef __AVX512PF__ +# error __AVX512PF__ defined +#endif +#ifdef __AVX512VBMI__ +# error __AVX512VBMI__ defined +#endif +#ifdef __AVX512IFMA__ +# error __AVX512IFMA__ defined +#endif +#ifdef __AVX512VNNIW__ +# error __AVX512VNNIW__ defined +#endif +#ifdef __AVX512VBMI2__ +# error __AVX512VBMI2__ defined +#endif +#ifdef __AVX5124FMAPS__ +# error __AVX5124FMAPS__ defined +#endif +#ifdef __AVX5124BITALG__ +# error __AVX5124BITALG__ defined +#endif +#ifdef __AVX5124VPOPCNTDQ__ +# error __AVX5124VPOPCNTDQ__ defined +#endif +#ifdef __AVX5124BF16__ +# error __AVX5124BF16__ defined +#endif +#ifdef __AVX512VP2INTERSECT__ +# error __AVX512VP2INTERSECT__ defined +#endif +#ifdef __AVX512VNNI__ +# error __AVX512VNNI__ defined +#endif +#ifdef __FMA4__ +# error __FMA4__ defined +#endif +#ifdef __3dNOW__ +# error __3dNOW__ defined +#endif +#ifdef __tune_k8__ +# error __tune_k8__ defined +#endif diff --git a/gcc/testsuite/gcc.target/i386/x86-64-v4.c b/gcc/testsuite/gcc.target/i386/x86-64-v4.c new file mode 100644 index 00000000000..48e928c2955 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/x86-64-v4.c @@ -0,0 +1,116 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-mabi=sysv -march=x86-64-v4" } */ + +/* Verify that the CPU features required by x86-64-v4 are enabled. */ + +#ifndef __MMX__ +# error __MMX__ not defined +#endif +#ifndef __SSE__ +# error __SSE__ not defined +#endif +#ifndef __SSE2__ +# error __SSE2__ not defined +#endif +#ifndef __LAHF_SAHF__ +# error __LAHF_SAHF__ not defined +#endif +#ifndef __POPCNT__ +# error __POPCNT__ not defined +#endif +#ifndef __SSE3__ +# error __SSE3__ not defined +#endif +#ifndef __SSE4_1__ +# error __SSE4_1__ not defined +#endif +#ifndef __SSE4_2__ +# error __SSE4_2__ not defined +#endif +#ifndef __SSSE3__ +# error __SSSE3__ not defined +#endif +#ifdef __SSE4A__ +# error __SSE4A__ defined +#endif +#ifndef __AVX__ +# error __AVX__ not defined +#endif +#ifndef __AVX2__ +# error __AVX2__ not defined +#endif +#ifndef __F16C__ +# error __F16C__ not defined +#endif +#ifndef __FMA__ +# error __FMA__ not defined +#endif +#ifndef __LZCNT__ +# error __LZCNT__ not defined +#endif +#ifndef __MOVBE__ +# error __MOVBE__ not defined +#endif +#ifndef __XSAVE__ +# error __XSAVE__ not defined +#endif +#ifdef __XSAVEC__ +# error __XSAVEC__ defined +#endif +#ifndef __AVX512F__ +# error __AVX512F__ not defined +#endif +#ifndef __AVX512BW__ +# error __AVX512BW__ not defined +#endif +#ifndef __AVX512CD__ +# error __AVX512CD__ not defined +#endif +#ifndef __AVX512DQ__ +# error __AVX512DQ__ not defined +#endif +#ifndef __AVX512VL__ +# error __AVX512VL__ not defined +#endif +#ifdef __AVX512PF__ +# error __AVX512PF__ defined +#endif +#ifdef __AVX512VBMI__ +# error __AVX512VBMI__ defined +#endif +#ifdef __AVX512IFMA__ +# error __AVX512IFMA__ defined +#endif +#ifdef __AVX512VNNIW__ +# error __AVX512VNNIW__ defined +#endif +#ifdef __AVX512VBMI2__ +# error __AVX512VBMI2__ defined +#endif +#ifdef __AVX5124FMAPS__ +# error __AVX5124FMAPS__ defined +#endif +#ifdef __AVX5124BITALG__ +# error __AVX5124BITALG__ defined +#endif +#ifdef __AVX5124VPOPCNTDQ__ +# error __AVX5124VPOPCNTDQ__ defined +#endif +#ifdef __AVX5124BF16__ +# error __AVX5124BF16__ defined +#endif +#ifdef __AVX512VP2INTERSECT__ +# error __AVX512VP2INTERSECT__ defined +#endif +#ifdef __AVX512VNNI__ +# error __AVX512VNNI__ defined +#endif +#ifdef __FMA4__ +# error __FMA4__ defined +#endif +#ifdef __3dNOW__ +# error __3dNOW__ defined +#endif +#ifdef __tune_k8__ +# error __tune_k8__ defined +#endif