[AArch64] Implement workaround for ARM Cortex-A53 erratum 835769

author Kyrylo Tkachov <kyrylo.tkachov@arm.com>

Fri, 10 Oct 2014 12:28:45 +0000 (12:28 +0000)

committer Kyrylo Tkachov <ktkachov@gcc.gnu.org>

Fri, 10 Oct 2014 12:28:45 +0000 (12:28 +0000)
author Kyrylo Tkachov <kyrylo.tkachov@arm.com>
Fri, 10 Oct 2014 12:28:45 +0000 (12:28 +0000)
committer Kyrylo Tkachov <ktkachov@gcc.gnu.org>
Fri, 10 Oct 2014 12:28:45 +0000 (12:28 +0000)
diff --git a/gcc/ChangeLog b/gcc/ChangeLog

index f91582b93cebad9948781b9767404c8e352a0dde..87991acb90592f588d71162a2b7523169ee8df74 100644 (file)
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,19 @@
+2014-10-10  Kyrylo Tkachov<kyrylo.tkachov@arm.com>
+            Ramana Radhakrishnan<ramana.radhakrishnan@arm.com>
+
+       * config/aarch64/aarch64.h (FINAL_PRESCAN_INSN): Define.
+       (ADJUST_INSN_LENGTH): Define.
+       * config/aarch64/aarch64.opt (mfix-cortex-a53-835769): New option.
+       * config/aarch64/aarch64.c (is_mem_p): New function.
+       (is_memory_op): Likewise.
+       (aarch64_prev_real_insn): Likewise.
+       (is_madd_op): Likewise.
+       (dep_between_memop_and_curr): Likewise.
+       (aarch64_madd_needs_nop): Likewise.
+       (aarch64_final_prescan_insn): Likewise.
+       * doc/invoke.texi (AArch64 Options): Document -mfix-cortex-a53-835769
+       and -mno-fix-cortex-a53-835769 options.
+
  2014-10-10  Jakub Jelinek  <jakub@redhat.com>
  
         PR tree-optimization/63464
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h

index b5f53d21cdf3bc7672338b9e27dbe56242369a24..c57a467c32712cfa703261e1be3b8f77f06d4392 100644 (file)
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -308,6 +308,8 @@ aarch64_builtin_vectorized_function (tree fndecl,
  
  extern void aarch64_split_combinev16qi (rtx operands[3]);
  extern void aarch64_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel);
+extern bool aarch64_madd_needs_nop (rtx_insn *);
+extern void aarch64_final_prescan_insn (rtx_insn *);
  extern bool
  aarch64_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel);
  void aarch64_atomic_assign_expand_fenv (tree *, tree *, tree *);
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c

index 5144c35e29f67d3ee76883f22eb71a3daa880c81..76a2480582a53ef0b465c1da8d7a20f9a2f51402 100644 (file)
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -7586,6 +7586,128 @@ aarch64_mangle_type (const_tree type)
    return NULL;
  }
  
+static int
+is_mem_p (rtx *x, void *data ATTRIBUTE_UNUSED)
+{
+  return MEM_P (*x);
+}
+
+static bool
+is_memory_op (rtx_insn *mem_insn)
+{
+   rtx pattern = PATTERN (mem_insn);
+   return for_each_rtx (&pattern, is_mem_p, NULL);
+}
+
+/* Find the first rtx_insn before insn that will generate an assembly
+   instruction.  */
+
+static rtx_insn *
+aarch64_prev_real_insn (rtx_insn *insn)
+{
+  if (!insn)
+    return NULL;
+
+  do
+    {
+      insn = prev_real_insn (insn);
+    }
+  while (insn && recog_memoized (insn) < 0);
+
+  return insn;
+}
+
+static bool
+is_madd_op (enum attr_type t1)
+{
+  unsigned int i;
+  /* A number of these may be AArch32 only.  */
+  enum attr_type mlatypes[] = {
+    TYPE_MLA, TYPE_MLAS, TYPE_SMLAD, TYPE_SMLADX, TYPE_SMLAL, TYPE_SMLALD,
+    TYPE_SMLALS, TYPE_SMLALXY, TYPE_SMLAWX, TYPE_SMLAWY, TYPE_SMLAXY,
+    TYPE_SMMLA, TYPE_UMLAL, TYPE_UMLALS,TYPE_SMLSD, TYPE_SMLSDX, TYPE_SMLSLD
+  };
+
+  for (i = 0; i < sizeof (mlatypes) / sizeof (enum attr_type); i++)
+    {
+      if (t1 == mlatypes[i])
+       return true;
+    }
+
+  return false;
+}
+
+/* Check if there is a register dependency between a load and the insn
+   for which we hold recog_data.  */
+
+static bool
+dep_between_memop_and_curr (rtx memop)
+{
+  rtx load_reg;
+  int opno;
+
+  if (!memop)
+    return false;
+
+  if (!REG_P (SET_DEST (memop)))
+    return false;
+
+  load_reg = SET_DEST (memop);
+  for (opno = 0; opno < recog_data.n_operands; opno++)
+    {
+      rtx operand = recog_data.operand[opno];
+      if (REG_P (operand)
+          && reg_overlap_mentioned_p (load_reg, operand))
+        return true;
+
+    }
+  return false;
+}
+
+bool
+aarch64_madd_needs_nop (rtx_insn* insn)
+{
+  enum attr_type attr_type;
+  rtx_insn *prev;
+  rtx body;
+
+  if (!aarch64_fix_a53_err835769)
+    return false;
+
+  if (recog_memoized (insn) < 0)
+    return false;
+
+  attr_type = get_attr_type (insn);
+  if (!is_madd_op (attr_type))
+    return false;
+
+  prev = aarch64_prev_real_insn (insn);
+  if (!prev)
+    return false;
+
+  body = single_set (prev);
+
+  /* If the previous insn is a memory op and there is no dependency between
+     it and the madd, emit a nop between them.  If we know the previous insn is
+     a memory op but body is NULL, emit the nop to be safe, it's probably a
+     load/store pair insn.  */
+  if (is_memory_op (prev)
+      && GET_MODE (recog_data.operand[0]) == DImode
+      && (!dep_between_memop_and_curr (body)))
+    return true;
+
+  return false;
+
+}
+
+void
+aarch64_final_prescan_insn (rtx_insn *insn)
+{
+  if (aarch64_madd_needs_nop (insn))
+    fprintf (asm_out_file, "\tnop // between mem op and mult-accumulate\n");
+}
+
+
  /* Return the equivalent letter for size.  */
  static char
  sizetochar (int size)
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h

index db950da8f5ec7f8447943533966fd90eeb133276..e9e5fd838fc8ce9d11bbff5287091813ec733401 100644 (file)
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -486,6 +486,15 @@ enum target_cpus
    (TARGET_CPU_generic | (AARCH64_CPU_DEFAULT_FLAGS << 6))
  #endif
  
+/* If inserting NOP before a mult-accumulate insn remember to adjust the
+   length so that conditional branching code is updated appropriately.  */
+#define ADJUST_INSN_LENGTH(insn, length)       \
+  if (aarch64_madd_needs_nop (insn))           \
+    length += 4;
+
+#define FINAL_PRESCAN_INSN(INSN, OPVEC, NOPERANDS)     \
+    aarch64_final_prescan_insn (INSN);                 \
+
  /* The processor for which instructions should be scheduled.  */
  extern enum aarch64_processor aarch64_tune;
  
diff --git a/gcc/config/aarch64/aarch64.opt b/gcc/config/aarch64/aarch64.opt

index f5a15b7298e903cd0a4b4b93a09b789ce6595470..77deb2e7d364208024404e82bbda30cdc1b32ed1 100644 (file)
--- a/gcc/config/aarch64/aarch64.opt
+++ b/gcc/config/aarch64/aarch64.opt
@@ -67,6 +67,10 @@ mgeneral-regs-only
  Target Report RejectNegative Mask(GENERAL_REGS_ONLY)
  Generate code which uses only the general registers
  
+mfix-cortex-a53-835769
+Target Report Var(aarch64_fix_a53_err835769) Init(0)
+Workaround for ARM Cortex-A53 Erratum number 835769
+
  mlittle-endian
  Target Report RejectNegative InverseMask(BIG_END)
  Assume target CPU is configured as little endian
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi

index 8f3eb16beb29ff18c0a0c49383be21d6bb4f8782..f1ba77bdabe2c6719ecfa343a2f273c4fa1da494 100644 (file)
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -489,6 +489,7 @@ Objective-C and Objective-C++ Dialects}.
  -mstrict-align @gol
  -momit-leaf-frame-pointer  -mno-omit-leaf-frame-pointer @gol
  -mtls-dialect=desc  -mtls-dialect=traditional @gol
+-mfix-cortex-a53-835769  -mno-fix-cortex-a53-835769 @gol
  -march=@var{name}  -mcpu=@var{name}  -mtune=@var{name}}
  
  @emph{Adapteva Epiphany Options}
@@ -11757,6 +11758,14 @@ of TLS variables.  This is the default.
  Use traditional TLS as the thread-local storage mechanism for dynamic accesses
  of TLS variables.
  
+@item -mfix-cortex-a53-835769
+@itemx -mno-fix-cortex-a53-835769
+@opindex -mfix-cortex-a53-835769
+@opindex -mno-fix-cortex-a53-835769
+Enable or disable the workaround for the ARM Cortex-A53 erratum number 835769.
+This will involve inserting a NOP instruction between memory instructions and
+64-bit integer multiply-accumulate instructions.
+
  @item -march=@var{name}
  @opindex march
  Specify the name of the target architecture, optionally suffixed by one or
author	Kyrylo Tkachov <kyrylo.tkachov@arm.com>
	Fri, 10 Oct 2014 12:28:45 +0000 (12:28 +0000)
committer	Kyrylo Tkachov <ktkachov@gcc.gnu.org>
	Fri, 10 Oct 2014 12:28:45 +0000 (12:28 +0000)
gcc/ChangeLog		patch \| blob \| history
gcc/config/aarch64/aarch64-protos.h		patch \| blob \| history
gcc/config/aarch64/aarch64.c		patch \| blob \| history
gcc/config/aarch64/aarch64.h		patch \| blob \| history
gcc/config/aarch64/aarch64.opt		patch \| blob \| history
gcc/doc/invoke.texi		patch \| blob \| history