[AArch64] Initial -mcpu=ares tuning
authorKyrylo Tkachov <kyrylo.tkachov@arm.com>
Thu, 17 Jan 2019 10:08:55 +0000 (10:08 +0000)
committerKyrylo Tkachov <ktkachov@gcc.gnu.org>
Thu, 17 Jan 2019 10:08:55 +0000 (10:08 +0000)
This patch adds a tuning struct for the Arm Ares CPU and uses it for -m{cpu,tune}=ares.
The tunings are an initial attempt and may be improved upon in the future, but they serve
as a decent starting point for GCC 9.

With this I see a 1.3% improvement on SPEC2006 int and 0.3% on SPEC2006 fp with -mcpu=ares.
On SPEC2017 I see a 0.6% improvement in intrate and changes in the noise for fprate.

        * config/aarch64/aarch64.c (ares_tunings): Define.
        * config/aarch64/aarch64-cores.def (ares): Use the above.

From-SVN: r268015

gcc/ChangeLog
gcc/config/aarch64/aarch64-cores.def
gcc/config/aarch64/aarch64.c

index 7eeb89acd63fd13e447ebc9676d94edadba8e717..59f39740a7b940a3092a37d1576528fd0b8b6f4e 100644 (file)
@@ -1,3 +1,8 @@
+2019-01-17  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
+
+       * config/aarch64/aarch64.c (ares_tunings): Define.
+       * config/aarch64/aarch64-cores.def (ares): Use the above.
+
 2019-01-17  Wei Xiao  <wei3.xiao@intel.com>
 
        PR target/88794
index 70b076694d9901ccf15bfd26c950b6466d3d1cc2..7c4bd52049e5ae33241acce37414da91abaa989c 100644 (file)
@@ -100,7 +100,7 @@ AARCH64_CORE("thunderx2t99",  thunderx2t99,  thunderx2t99, 8_1A,  AARCH64_FL_FOR
 AARCH64_CORE("cortex-a55",  cortexa55, cortexa53, 8_2A,  AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, cortexa53, 0x41, 0xd05, -1)
 AARCH64_CORE("cortex-a75",  cortexa75, cortexa57, 8_2A,  AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, cortexa73, 0x41, 0xd0a, -1)
 AARCH64_CORE("cortex-a76",  cortexa76, cortexa57, 8_2A,  AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, cortexa72, 0x41, 0xd0b, -1)
-AARCH64_CORE("ares",  ares, cortexa57, 8_2A,  AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_PROFILE, cortexa72, 0x41, 0xd0c, -1)
+AARCH64_CORE("ares",  ares, cortexa57, 8_2A,  AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_PROFILE, ares, 0x41, 0xd0c, -1)
 
 /* HiSilicon ('H') cores. */
 AARCH64_CORE("tsv110",  tsv110, cortexa57, 8_2A,  AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_CRYPTO | AARCH64_FL_F16 | AARCH64_FL_AES | AARCH64_FL_SHA2, tsv110,   0x48, 0xd01, -1)
index da48c0969dc31d96a3c1b639babd65622929e321..3c1818ad96d9263d7b23bc7d9e917061f95352ec 100644 (file)
@@ -1083,6 +1083,32 @@ static const struct tune_params thunderx2t99_tunings =
   &thunderx2t99_prefetch_tune
 };
 
+static const struct tune_params ares_tunings =
+{
+  &cortexa57_extra_costs,
+  &generic_addrcost_table,
+  &generic_regmove_cost,
+  &cortexa57_vector_cost,
+  &generic_branch_cost,
+  &generic_approx_modes,
+  SVE_NOT_IMPLEMENTED, /* sve_width  */
+  4, /* memmov_cost  */
+  3, /* issue_rate  */
+  AARCH64_FUSE_AES_AESMC, /* fusible_ops  */
+  "32:16",     /* function_align.  */
+  "32:16",     /* jump_align.  */
+  "32:16",     /* loop_align.  */
+  2,   /* int_reassoc_width.  */
+  4,   /* fp_reassoc_width.  */
+  2,   /* vec_reassoc_width.  */
+  2,   /* min_div_recip_mul_sf.  */
+  2,   /* min_div_recip_mul_df.  */
+  0,   /* max_case_values.  */
+  tune_params::AUTOPREFETCHER_WEAK,    /* autoprefetcher_model.  */
+  (AARCH64_EXTRA_TUNE_NONE),   /* tune_flags.  */
+  &generic_prefetch_tune
+};
+
 /* Support for fine-grained override of the tuning structures.  */
 struct aarch64_tuning_override_function
 {