[AArch64] Allow shrink-wrapping of non-leaf vector PCS functions
authorRichard Sandiford <richard.sandiford@arm.com>
Mon, 30 Sep 2019 16:47:21 +0000 (16:47 +0000)
committerRichard Sandiford <rsandifo@gcc.gnu.org>
Mon, 30 Sep 2019 16:47:21 +0000 (16:47 +0000)
With the function ABI stuff, we can now support shrink-wrapping of
non-leaf vector PCS functions.  This is particularly useful if the
vector PCS function calls an ordinary function on an error path,
since we can then keep the extra saves and restores specific to
that path too.

2019-09-30  Richard Sandiford  <richard.sandiford@arm.com>

gcc/
* config/aarch64/aarch64-protos.h (aarch64_use_simple_return_insn_p):
Delete.
* config/aarch64/aarch64.c (aarch64_components_for_bb): Check
whether the block calls a function that clobbers more registers
than the current function is allowed to.
(aarch64_use_simple_return_insn_p): Delete.
* config/aarch64/aarch64.md (simple_return): Remove condition.

gcc/testsuite/
* gcc.target/aarch64/torture/simd-abi-9.c: New test.

From-SVN: r276340

gcc/ChangeLog
gcc/config/aarch64/aarch64-protos.h
gcc/config/aarch64/aarch64.c
gcc/config/aarch64/aarch64.md
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.target/aarch64/torture/simd-abi-9.c [new file with mode: 0644]

index 28e411c9efe43e3826b6e879918c34f25b816c3e..7b0bcef0248d8a107f7a2be8a4943d033657411a 100644 (file)
@@ -1,3 +1,13 @@
+2019-09-30  Richard Sandiford  <richard.sandiford@arm.com>
+
+       * config/aarch64/aarch64-protos.h (aarch64_use_simple_return_insn_p):
+       Delete.
+       * config/aarch64/aarch64.c (aarch64_components_for_bb): Check
+       whether the block calls a function that clobbers more registers
+       than the current function is allowed to.
+       (aarch64_use_simple_return_insn_p): Delete.
+       * config/aarch64/aarch64.md (simple_return): Remove condition.
+
 2019-09-30  Richard Sandiford  <richard.sandiford@arm.com>
 
        * function-abi.h (function_abi_aggregator): New class.
index a870eb7713c4959ebc012cbb0439b2f95ab2787f..c9a342304b602a17877a5a4248463864067c89d3 100644 (file)
@@ -531,7 +531,6 @@ bool aarch64_split_dimode_const_store (rtx, rtx);
 bool aarch64_symbolic_address_p (rtx);
 bool aarch64_uimm12_shift (HOST_WIDE_INT);
 bool aarch64_use_return_insn_p (void);
-bool aarch64_use_simple_return_insn_p (void);
 const char *aarch64_output_casesi (rtx *);
 
 enum aarch64_symbol_type aarch64_classify_symbol (rtx, HOST_WIDE_INT);
index 2d4cd3794ce5bed75f0a692b42a2c67793d35c5c..3da92a2d41aa573d747bfa5051604039d9159124 100644 (file)
@@ -5976,13 +5976,30 @@ aarch64_components_for_bb (basic_block bb)
   sbitmap components = sbitmap_alloc (LAST_SAVED_REGNUM + 1);
   bitmap_clear (components);
 
+  /* Clobbered registers don't generate values in any meaningful sense,
+     since nothing after the clobber can rely on their value.  And we can't
+     say that partially-clobbered registers are unconditionally killed,
+     because whether they're killed or not depends on the mode of the
+     value they're holding.  Thus partially call-clobbered registers
+     appear in neither the kill set nor the gen set.
+
+     Check manually for any calls that clobber more of a register than the
+     current function can.  */
+  function_abi_aggregator callee_abis;
+  rtx_insn *insn;
+  FOR_BB_INSNS (bb, insn)
+    if (CALL_P (insn))
+      callee_abis.note_callee_abi (insn_callee_abi (insn));
+  HARD_REG_SET extra_caller_saves = callee_abis.caller_save_regs (*crtl->abi);
+
   /* GPRs are used in a bb if they are in the IN, GEN, or KILL sets.  */
   for (unsigned regno = 0; regno <= LAST_SAVED_REGNUM; regno++)
     if ((!call_used_or_fixed_reg_p (regno)
        || (simd_function && FP_SIMD_SAVED_REGNUM_P (regno)))
-       && (bitmap_bit_p (in, regno)
-          || bitmap_bit_p (gen, regno)
-          || bitmap_bit_p (kill, regno)))
+       && (TEST_HARD_REG_BIT (extra_caller_saves, regno)
+           || bitmap_bit_p (in, regno)
+           || bitmap_bit_p (gen, regno)
+           || bitmap_bit_p (kill, regno)))
       {
        unsigned regno2, offset, offset2;
        bitmap_set_bit (components, regno);
@@ -6648,19 +6665,6 @@ aarch64_use_return_insn_p (void)
   return known_eq (cfun->machine->frame.frame_size, 0);
 }
 
-/* Return false for non-leaf SIMD functions in order to avoid
-   shrink-wrapping them.  Doing this will lose the necessary
-   save/restore of FP registers.  */
-
-bool
-aarch64_use_simple_return_insn_p (void)
-{
-  if (aarch64_simd_decl_p (cfun->decl) && !crtl->is_leaf)
-    return false;
-
-  return true;
-}
-
 /* Generate the epilogue instructions for returning from a function.
    This is almost exactly the reverse of the prolog sequence, except
    that we need to insert barriers to avoid scheduling loads that read
index edeaa6fe785fc13fef10319e783634799f48ec7e..fcba5ace8673667f8dadb29ff14c60957fe0a75f 100644 (file)
 
 (define_insn "simple_return"
   [(simple_return)]
-  "aarch64_use_simple_return_insn_p ()"
+  ""
   "ret"
   [(set_attr "type" "branch")]
 )
index 2ac61fffaad33bd88fb447ef31ff6f27bc760526..ce3a967935631b3109fd9d4db1a3a3ba6f4b03d0 100644 (file)
@@ -1,3 +1,7 @@
+2019-09-30  Richard Sandiford  <richard.sandiford@arm.com>
+
+       * gcc.target/aarch64/torture/simd-abi-9.c: New test.
+
 2019-09-30  Richard Sandiford  <richard.sandiford@arm.com>
 
        * gcc.target/aarch64/torture/simd-abi-8.c: New test.
diff --git a/gcc/testsuite/gcc.target/aarch64/torture/simd-abi-9.c b/gcc/testsuite/gcc.target/aarch64/torture/simd-abi-9.c
new file mode 100644 (file)
index 0000000..aaa0316
--- /dev/null
@@ -0,0 +1,48 @@
+/* { dg-do compile } */
+/* { dg-options "-fshrink-wrap -ffat-lto-objects" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+int callee (void);
+
+/*
+** caller:
+**     ldr     (w[0-9]+), \[x0\]
+**     cbn?z   \1, [^\n]*
+**     ...
+**     ret
+*/
+int __attribute__ ((aarch64_vector_pcs))
+caller (int *x)
+{
+  if (*x)
+    return callee () + 1;
+  else
+    return 0;
+}
+
+/* { dg-final { scan-assembler {\sstp\tq8, q9} } } */
+/* { dg-final { scan-assembler {\sstp\tq10, q11} } } */
+/* { dg-final { scan-assembler {\sstp\tq12, q13} } } */
+/* { dg-final { scan-assembler {\sstp\tq14, q15} } } */
+/* { dg-final { scan-assembler {\sstp\tq16, q17} } } */
+/* { dg-final { scan-assembler {\sstp\tq18, q19} } } */
+/* { dg-final { scan-assembler {\sstp\tq20, q21} } } */
+/* { dg-final { scan-assembler {\sstp\tq22, q23} } } */
+/* { dg-final { scan-assembler {\sldp\tq8, q9} } } */
+/* { dg-final { scan-assembler {\sldp\tq10, q11} } } */
+/* { dg-final { scan-assembler {\sldp\tq12, q13} } } */
+/* { dg-final { scan-assembler {\sldp\tq14, q15} } } */
+/* { dg-final { scan-assembler {\sldp\tq16, q17} } } */
+/* { dg-final { scan-assembler {\sldp\tq18, q19} } } */
+/* { dg-final { scan-assembler {\sldp\tq20, q21} } } */
+/* { dg-final { scan-assembler {\sldp\tq22, q23} } } */
+
+/* { dg-final { scan-assembler-not {\tstp\tq[0-7],} } } */
+/* { dg-final { scan-assembler-not {\tldp\tq[0-7],} } } */
+/* { dg-final { scan-assembler-not {\tstp\tq2[4-9],} } } */
+/* { dg-final { scan-assembler-not {\tldp\tq2[4-9],} } } */
+/* { dg-final { scan-assembler-not {\tstp\td} } } */
+/* { dg-final { scan-assembler-not {\tldp\td} } } */
+/* { dg-final { scan-assembler-not {\tstr\tq} } } */
+/* { dg-final { scan-assembler-not {\tldr\tq} } } */