From 3e2ae3ee285a57455d5a23bd352a68c289130186 Mon Sep 17 00:00:00 2001 From: Venkataramanan Kumar Date: Sat, 5 Dec 2020 11:12:15 +0530 Subject: [PATCH] X86_64: Enable support for next generation AMD Zen3 CPU. 2020-12-03 Venkataramanan Kumar Sharavan Kumar gcc/ChangeLog: * common/config/i386/cpuinfo.h (get_amd_cpu) recognize znver3. * common/config/i386/i386-common.c (processor_names): Add znver3. (processor_alias_table): Add znver3 and AMDFAM19H entry. * common/config/i386/i386-cpuinfo.h (processor_types): Add AMDFAM19H. (processor_subtypes): AMDFAM19H_ZNVER3. * config.gcc (i[34567]86-*-linux* | ...): Likewise. * config/i386/driver-i386.c: (host_detect_local_cpu): Let -march=native recognize znver3 processors. * config/i386/i386-c.c (ix86_target_macros_internal): Add znver3. * config/i386/i386-options.c (m_znver3): New definition. (m_ZNVER): Include m_znver3. (processor_cost_table): Add znver3. * config/i386/i386.c (ix86_reassociation_width): Likewise. * config/i386/i386.h (TARGET_znver3): New definition. (enum processor_type): Add PROCESSOR_ZNVER3. * config/i386/i386.md (define_attr "cpu"): Add znver3. * config/i386/x86-tune-sched.c: (ix86_issue_rate): Likewise. (ix86_adjust_cost): Likewise. * config/i386/x86-tune.def (X86_TUNE_AVOID_256FMA_CHAINS: Likewise. * config/i386/znver1.md: Add new reservations for znver3. * doc/extend.texi: Add details about znver3. * doc/invoke.texi: Likewise. gcc/testsuite/ChangeLog: * gcc.target/i386/funcspec-56.inc: Handle new march. * g++.target/i386/mv29.C: New file. --- gcc/common/config/i386/cpuinfo.h | 17 + gcc/common/config/i386/i386-common.c | 16 +- gcc/common/config/i386/i386-cpuinfo.h | 2 + gcc/config.gcc | 10 +- gcc/config/i386/driver-i386.c | 5 + gcc/config/i386/i386-c.c | 7 + gcc/config/i386/i386-options.c | 4 +- gcc/config/i386/i386.c | 5 +- gcc/config/i386/i386.h | 2 + gcc/config/i386/i386.md | 2 +- gcc/config/i386/x86-tune-sched.c | 2 + gcc/config/i386/x86-tune.def | 2 +- gcc/config/i386/znver1.md | 353 ++++++++++++------ gcc/doc/extend.texi | 6 + gcc/doc/invoke.texi | 7 + gcc/testsuite/g++.target/i386/mv29.C | 79 ++++ gcc/testsuite/gcc.target/i386/funcspec-56.inc | 6 + 17 files changed, 397 insertions(+), 128 deletions(-) create mode 100644 gcc/testsuite/g++.target/i386/mv29.C diff --git a/gcc/common/config/i386/cpuinfo.h b/gcc/common/config/i386/cpuinfo.h index 41728a2fceb..4f1ab636807 100644 --- a/gcc/common/config/i386/cpuinfo.h +++ b/gcc/common/config/i386/cpuinfo.h @@ -241,6 +241,23 @@ get_amd_cpu (struct __processor_model *cpu_model, cpu_model->__cpu_subtype = AMDFAM17H_ZNVER1; } break; + case 0x19: + cpu_model->__cpu_type = AMDFAM19H; + /* AMD family 19h version 1. */ + if (model <= 0x0f) + { + cpu = "znver3"; + CHECK___builtin_cpu_is ("znver3"); + cpu_model->__cpu_subtype = AMDFAM19H_ZNVER3; + } + else if (has_cpu_feature (cpu_model, cpu_features2, + FEATURE_VAES)) + { + cpu = "znver3"; + CHECK___builtin_cpu_is ("znver3"); + cpu_model->__cpu_subtype = AMDFAM19H_ZNVER3; + } + break; default: break; } diff --git a/gcc/common/config/i386/i386-common.c b/gcc/common/config/i386/i386-common.c index 8f809c1604c..2a1d31f3a4b 100644 --- a/gcc/common/config/i386/i386-common.c +++ b/gcc/common/config/i386/i386-common.c @@ -1762,7 +1762,8 @@ const char *const processor_names[] = "btver1", "btver2", "znver1", - "znver2" + "znver2", + "znver3" }; /* Guarantee that the array is aligned with enum processor_type. */ @@ -2004,6 +2005,17 @@ const pta processor_alias_table[] = | PTA_SHA | PTA_LZCNT | PTA_POPCNT | PTA_CLWB | PTA_RDPID | PTA_WBNOINVD, M_CPU_SUBTYPE (AMDFAM17H_ZNVER2), P_PROC_AVX2}, + {"znver3", PROCESSOR_ZNVER3, CPU_ZNVER3, + PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 + | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1 + | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_AVX2 + | PTA_BMI | PTA_BMI2 | PTA_F16C | PTA_FMA | PTA_PRFCHW + | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE + | PTA_RDRND | PTA_MOVBE | PTA_MWAITX | PTA_ADX | PTA_RDSEED + | PTA_CLZERO | PTA_CLFLUSHOPT | PTA_XSAVEC | PTA_XSAVES + | PTA_SHA | PTA_LZCNT | PTA_POPCNT | PTA_CLWB | PTA_RDPID + | PTA_WBNOINVD | PTA_VAES | PTA_VPCLMULQDQ | PTA_PKU, + M_CPU_SUBTYPE (AMDFAM19H_ZNVER3), P_PROC_AVX2}, {"btver1", PROCESSOR_BTVER1, CPU_GENERIC, PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3 | PTA_SSE4A | PTA_ABM | PTA_CX16 | PTA_PRFCHW @@ -2030,6 +2042,8 @@ const pta processor_alias_table[] = M_CPU_TYPE (AMDFAM15H), P_NONE}, {"amdfam17h", PROCESSOR_GENERIC, CPU_GENERIC, 0, M_CPU_TYPE (AMDFAM17H), P_NONE}, + {"amdfam19h", PROCESSOR_GENERIC, CPU_GENERIC, 0, + M_CPU_TYPE (AMDFAM19H), P_NONE}, {"shanghai", PROCESSOR_GENERIC, CPU_GENERIC, 0, M_CPU_TYPE (AMDFAM10H_SHANGHAI), P_NONE}, {"istanbul", PROCESSOR_GENERIC, CPU_GENERIC, 0, diff --git a/gcc/common/config/i386/i386-cpuinfo.h b/gcc/common/config/i386/i386-cpuinfo.h index af02be57812..849e95a4d28 100644 --- a/gcc/common/config/i386/i386-cpuinfo.h +++ b/gcc/common/config/i386/i386-cpuinfo.h @@ -55,6 +55,7 @@ enum processor_types INTEL_GOLDMONT, INTEL_GOLDMONT_PLUS, INTEL_TREMONT, + AMDFAM19H, CPU_TYPE_MAX, BUILTIN_CPU_TYPE_MAX = CPU_TYPE_MAX }; @@ -86,6 +87,7 @@ enum processor_subtypes INTEL_COREI7_COOPERLAKE, INTEL_COREI7_SAPPHIRERAPIDS, INTEL_COREI7_ALDERLAKE, + AMDFAM19H_ZNVER3, CPU_SUBTYPE_MAX }; diff --git a/gcc/config.gcc b/gcc/config.gcc index 7b138d1bee1..9c7604481f1 100644 --- a/gcc/config.gcc +++ b/gcc/config.gcc @@ -668,7 +668,7 @@ c7 esther" # 64-bit x86 processors supported by --with-arch=. Each processor # MUST be separated by exactly one space. x86_64_archs="amdfam10 athlon64 athlon64-sse3 barcelona bdver1 bdver2 \ -bdver3 bdver4 znver1 znver2 btver1 btver2 k8 k8-sse3 opteron \ +bdver3 bdver4 znver1 znver2 znver3 btver1 btver2 k8 k8-sse3 opteron \ opteron-sse3 nocona core2 corei7 corei7-avx core-avx-i core-avx2 atom \ slm nehalem westmere sandybridge ivybridge haswell broadwell bonnell \ silvermont knl knm skylake-avx512 cannonlake icelake-client icelake-server \ @@ -3678,6 +3678,10 @@ case ${target} in arch=znver2 cpu=znver2 ;; + znver3-*) + arch=znver3 + cpu=znver3 + ;; bdver4-*) arch=bdver4 cpu=bdver4 @@ -3799,6 +3803,10 @@ case ${target} in arch=znver2 cpu=znver2 ;; + znver3-*) + arch=znver3 + cpu=znver3 + ;; bdver4-*) arch=bdver4 cpu=bdver4 diff --git a/gcc/config/i386/driver-i386.c b/gcc/config/i386/driver-i386.c index ecdad5765d5..2bfa037dd8b 100644 --- a/gcc/config/i386/driver-i386.c +++ b/gcc/config/i386/driver-i386.c @@ -455,6 +455,8 @@ const char *host_detect_local_cpu (int argc, const char **argv) processor = PROCESSOR_GEODE; else if (has_feature (FEATURE_MOVBE) && family == 22) processor = PROCESSOR_BTVER2; + else if (has_feature (FEATURE_VAES)) + processor = PROCESSOR_ZNVER3; else if (has_feature (FEATURE_CLWB)) processor = PROCESSOR_ZNVER2; else if (has_feature (FEATURE_CLZERO)) @@ -753,6 +755,9 @@ const char *host_detect_local_cpu (int argc, const char **argv) case PROCESSOR_ZNVER2: cpu = "znver2"; break; + case PROCESSOR_ZNVER3: + cpu = "znver3"; + break; case PROCESSOR_BTVER1: cpu = "btver1"; break; diff --git a/gcc/config/i386/i386-c.c b/gcc/config/i386/i386-c.c index 87b3a2bf143..6d690e01c3a 100644 --- a/gcc/config/i386/i386-c.c +++ b/gcc/config/i386/i386-c.c @@ -128,6 +128,10 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag, def_or_undef (parse_in, "__znver2"); def_or_undef (parse_in, "__znver2__"); break; + case PROCESSOR_ZNVER3: + def_or_undef (parse_in, "__znver3"); + def_or_undef (parse_in, "__znver3__"); + break; case PROCESSOR_BTVER1: def_or_undef (parse_in, "__btver1"); def_or_undef (parse_in, "__btver1__"); @@ -315,6 +319,9 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag, case PROCESSOR_ZNVER2: def_or_undef (parse_in, "__tune_znver2__"); break; + case PROCESSOR_ZNVER3: + def_or_undef (parse_in, "__tune_znver3__"); + break; case PROCESSOR_BTVER1: def_or_undef (parse_in, "__tune_btver1__"); break; diff --git a/gcc/config/i386/i386-options.c b/gcc/config/i386/i386-options.c index dc07697f622..40714c8ad3b 100644 --- a/gcc/config/i386/i386-options.c +++ b/gcc/config/i386/i386-options.c @@ -147,11 +147,12 @@ along with GCC; see the file COPYING3. If not see #define m_BDVER4 (HOST_WIDE_INT_1U< + +int __attribute__ ((target("default"))) +foo () +{ + return 0; +} + +int __attribute__ ((target("arch=amdfam10"))) foo () { + return 1; +} + +int __attribute__ ((target("arch=btver1"))) foo () { + return 2; +} + +int __attribute__ ((target("arch=btver2"))) foo () { + return 3; +} + +int __attribute__ ((target("arch=bdver1"))) foo () { + return 4; +} + +int __attribute__ ((target("arch=bdver2"))) foo () { + return 5; +} + +int __attribute__ ((target("arch=bdver3"))) foo () { + return 6; +} + +int __attribute__ ((target("arch=znver1"))) foo () { + return 7; +} + +int __attribute__ ((target("arch=znver2"))) foo () { + return 8; +} + +int __attribute__ ((target("arch=znver3"))) foo () { + return 9; +} + + +int main () +{ + int val = foo (); + + if (__builtin_cpu_is ("amdfam10h")) + assert (val == 1); + else if (__builtin_cpu_is ("btver1")) + assert (val == 2); + else if (__builtin_cpu_is ("btver2")) + assert (val == 3); + else if (__builtin_cpu_is ("bdver1")) + assert (val == 4); + else if (__builtin_cpu_is ("bdver2")) + assert (val == 5); + else if (__builtin_cpu_is ("bdver3")) + assert (val == 6); + else if (__builtin_cpu_is ("znver1")) + assert (val == 7); + else if (__builtin_cpu_is ("znver2")) + assert (val == 8); + else if (__builtin_cpu_is ("znver3")) + assert (val == 9); + else + assert (val == 0); + + return 0; +} diff --git a/gcc/testsuite/gcc.target/i386/funcspec-56.inc b/gcc/testsuite/gcc.target/i386/funcspec-56.inc index 395a21c8668..5d4800f2802 100644 --- a/gcc/testsuite/gcc.target/i386/funcspec-56.inc +++ b/gcc/testsuite/gcc.target/i386/funcspec-56.inc @@ -193,6 +193,9 @@ extern void test_arch_barcelona (void) __attribute__((__target__("arch=barcelon extern void test_arch_bdver1 (void) __attribute__((__target__("arch=bdver1"))); extern void test_arch_bdver2 (void) __attribute__((__target__("arch=bdver2"))); extern void test_arch_bdver3 (void) __attribute__((__target__("arch=bdver3"))); +extern void test_arch_znver1 (void) __attribute__((__target__("arch=znver1"))); +extern void test_arch_znver2 (void) __attribute__((__target__("arch=znver2"))); +extern void test_arch_znver3 (void) __attribute__((__target__("arch=znver3"))); extern void test_tune_nocona (void) __attribute__((__target__("tune=nocona"))); extern void test_tune_core2 (void) __attribute__((__target__("tune=core2"))); @@ -212,6 +215,9 @@ extern void test_tune_bdver1 (void) __attribute__((__target__("tune=bdver1"))); extern void test_tune_bdver2 (void) __attribute__((__target__("tune=bdver2"))); extern void test_tune_bdver3 (void) __attribute__((__target__("tune=bdver3"))); extern void test_tune_generic (void) __attribute__((__target__("tune=generic"))); +extern void test_tune_znver1 (void) __attribute__((__target__("tune=znver1"))); +extern void test_tune_znver2 (void) __attribute__((__target__("tune=znver2"))); +extern void test_tune_znver3 (void) __attribute__((__target__("tune=znver3"))); extern void test_fpmath_sse (void) __attribute__((__target__("sse2,fpmath=sse"))); extern void test_fpmath_387 (void) __attribute__((__target__("sse2,fpmath=387"))); -- 2.30.2