[AArch64][1/5] Implement TARGET_SCHED_MACRO_FUSION_PAIR_P
authorKyrylo Tkachov <kyrylo.tkachov@arm.com>
Mon, 24 Nov 2014 10:37:30 +0000 (10:37 +0000)
committerKyrylo Tkachov <ktkachov@gcc.gnu.org>
Mon, 24 Nov 2014 10:37:30 +0000 (10:37 +0000)
* config/aarch64/aarch64-protos.h (struct tune_params): Add
fuseable_ops field.
* config/aarch64/aarch64.c (generic_tunings): Specify fuseable_ops.
(cortexa53_tunings): Likewise.
(cortexa57_tunings): Likewise.
(thunderx_tunings): Likewise.
(aarch64_macro_fusion_p): New function.
(aarch_macro_fusion_pair_p): Likewise.
(TARGET_SCHED_MACRO_FUSION_P): Define.
(TARGET_SCHED_MACRO_FUSION_PAIR_P): Likewise.
(AARCH64_FUSE_MOV_MOVK): Likewise.
(AARCH64_FUSE_NOTHING): Likewise.

From-SVN: r218007

gcc/ChangeLog
gcc/config/aarch64/aarch64-protos.h
gcc/config/aarch64/aarch64.c

index 8f42d0902450ff2dcde71f68461fe2c88793ae28..562ef8a0582e0342dc54fea1df8118be50f0a6c0 100644 (file)
@@ -1,3 +1,18 @@
+2014-11-24  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
+
+       * config/aarch64/aarch64-protos.h (struct tune_params): Add
+       fuseable_ops field.
+       * config/aarch64/aarch64.c (generic_tunings): Specify fuseable_ops.
+       (cortexa53_tunings): Likewise.
+       (cortexa57_tunings): Likewise.
+       (thunderx_tunings): Likewise.
+       (aarch64_macro_fusion_p): New function.
+       (aarch_macro_fusion_pair_p): Likewise.
+       (TARGET_SCHED_MACRO_FUSION_P): Define.
+       (TARGET_SCHED_MACRO_FUSION_PAIR_P): Likewise.
+       (AARCH64_FUSE_MOV_MOVK): Likewise.
+       (AARCH64_FUSE_NOTHING): Likewise.
+
 2014-11-24  Martin Liska  <mliska@suse.cz>
 
        PR lto/63968
index 52e97e745a01933beccecea0a40a51c6f5d2fe97..f5db563189d1c39fda14b8fca3022617fcb2e577 100644 (file)
@@ -170,6 +170,7 @@ struct tune_params
   const struct cpu_vector_cost *const vec_costs;
   const int memmov_cost;
   const int issue_rate;
+  const unsigned int fuseable_ops;
 };
 
 HOST_WIDE_INT aarch64_initial_elimination_offset (unsigned, unsigned);
index 180951366481ccb78a48eb22c3c2d464d1279408..e9be8bc95e53b7fca7def751a84660fdb0d3259f 100644 (file)
@@ -304,6 +304,9 @@ static const struct cpu_vector_cost cortexa57_vector_cost =
   NAMED_PARAM (cond_not_taken_branch_cost, 1)
 };
 
+#define AARCH64_FUSE_NOTHING   (0)
+#define AARCH64_FUSE_MOV_MOVK  (1 << 0)
+
 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
 __extension__
 #endif
@@ -314,7 +317,8 @@ static const struct tune_params generic_tunings =
   &generic_regmove_cost,
   &generic_vector_cost,
   NAMED_PARAM (memmov_cost, 4),
-  NAMED_PARAM (issue_rate, 2)
+  NAMED_PARAM (issue_rate, 2),
+  NAMED_PARAM (fuseable_ops, AARCH64_FUSE_NOTHING)
 };
 
 static const struct tune_params cortexa53_tunings =
@@ -324,7 +328,8 @@ static const struct tune_params cortexa53_tunings =
   &cortexa53_regmove_cost,
   &generic_vector_cost,
   NAMED_PARAM (memmov_cost, 4),
-  NAMED_PARAM (issue_rate, 2)
+  NAMED_PARAM (issue_rate, 2),
+  NAMED_PARAM (fuseable_ops, AARCH64_FUSE_MOV_MOVK)
 };
 
 static const struct tune_params cortexa57_tunings =
@@ -334,7 +339,8 @@ static const struct tune_params cortexa57_tunings =
   &cortexa57_regmove_cost,
   &cortexa57_vector_cost,
   NAMED_PARAM (memmov_cost, 4),
-  NAMED_PARAM (issue_rate, 3)
+  NAMED_PARAM (issue_rate, 3),
+  NAMED_PARAM (fuseable_ops, AARCH64_FUSE_MOV_MOVK)
 };
 
 static const struct tune_params thunderx_tunings =
@@ -344,7 +350,8 @@ static const struct tune_params thunderx_tunings =
   &thunderx_regmove_cost,
   &generic_vector_cost,
   NAMED_PARAM (memmov_cost, 6),
-  NAMED_PARAM (issue_rate, 2)
+  NAMED_PARAM (issue_rate, 2),
+  NAMED_PARAM (fuseable_ops, AARCH64_FUSE_NOTHING)
 };
 
 /* A processor implementing AArch64.  */
@@ -10370,6 +10377,59 @@ aarch64_gen_ccmp_next (rtx prev, int cmp_code, rtx op0, rtx op1, int bit_code)
 #undef TARGET_GEN_CCMP_NEXT
 #define TARGET_GEN_CCMP_NEXT aarch64_gen_ccmp_next
 
+/* Implement TARGET_SCHED_MACRO_FUSION_P.  Return true if target supports
+   instruction fusion of some sort.  */
+
+static bool
+aarch64_macro_fusion_p (void)
+{
+  return aarch64_tune_params->fuseable_ops != AARCH64_FUSE_NOTHING;
+}
+
+
+/* Implement TARGET_SCHED_MACRO_FUSION_PAIR_P.  Return true if PREV and CURR
+   should be kept together during scheduling.  */
+
+static bool
+aarch_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
+{
+  rtx set_dest;
+  rtx prev_set = single_set (prev);
+  rtx curr_set = single_set (curr);
+  /* prev and curr are simple SET insns i.e. no flag setting or branching.  */
+  bool simple_sets_p = prev_set && curr_set && !any_condjump_p (curr);
+
+  if (!aarch64_macro_fusion_p ())
+    return false;
+
+  if (simple_sets_p
+      && (aarch64_tune_params->fuseable_ops & AARCH64_FUSE_MOV_MOVK))
+    {
+      /* We are trying to match:
+         prev (mov)  == (set (reg r0) (const_int imm16))
+         curr (movk) == (set (zero_extract (reg r0)
+                                           (const_int 16)
+                                           (const_int 16))
+                             (const_int imm16_1))  */
+
+      set_dest = SET_DEST (curr_set);
+
+      if (GET_CODE (set_dest) == ZERO_EXTRACT
+          && CONST_INT_P (SET_SRC (curr_set))
+          && CONST_INT_P (SET_SRC (prev_set))
+          && CONST_INT_P (XEXP (set_dest, 2))
+          && INTVAL (XEXP (set_dest, 2)) == 16
+          && REG_P (XEXP (set_dest, 0))
+          && REG_P (SET_DEST (prev_set))
+          && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set)))
+        {
+          return true;
+        }
+    }
+
+  return false;
+}
+
 #undef TARGET_ADDRESS_COST
 #define TARGET_ADDRESS_COST aarch64_address_cost
 
@@ -10629,6 +10689,12 @@ aarch64_gen_ccmp_next (rtx prev, int cmp_code, rtx op0, rtx op1, int bit_code)
 #undef TARGET_CAN_USE_DOLOOP_P
 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
 
+#undef TARGET_SCHED_MACRO_FUSION_P
+#define TARGET_SCHED_MACRO_FUSION_P aarch64_macro_fusion_p
+
+#undef TARGET_SCHED_MACRO_FUSION_PAIR_P
+#define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
+
 struct gcc_target targetm = TARGET_INITIALIZER;
 
 #include "gt-aarch64.h"