From 70b55b25aa14b60f0e0f0193f7178bae756076ad Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Wed, 8 Apr 2020 12:04:46 +0200 Subject: [PATCH] postreload: Fix autoinc handling in reload_cse_move2add [PR94516] The following testcase shows two separate issues caused by the cselib changes. One is that through the cselib sp tracking improvements on ... r12 = rsp; rsp -= 8; push cst1; push cst2; push cst3; call rsp += 32; rsp -= 8; push cst4; push cst5; push cst6; call rsp += 32; rsp -= 8; push cst7; push cst8; push cst9; call rsp += 32 reload_cse_simplify_set decides to optimize the rsp += 32 insns into rsp = r12 because cselib figures that the r12 register holds the right value. From the pure cost perspective that seems like a win and on its own at least for -Os that would be beneficial, except that there are those rsp -= 8 stack adjustments after it, where rsp += 32; rsp -= 8; is optimized into rsp += 24; by the csa pass, but rsp = r12; rsp -= 8 can't. Dunno what to do about this part, the PR has a hack in a comment. Anyway, the following patch fixes the other part, which isn't a missed optimization, but a wrong-code issue. The problem is that the pushes of constant are on x86 represented through PRE_MODIFY and while move2add_note_store has some code to handle {PRE,POST}_{INC,DEC} without REG_INC note, it doesn't handle {PRE,POST}_MODIFY (that would be enough to fix this testcase). But additionally it looks misplaced, because move2add_note_store is only called on the rtxes that are stored into, while RTX_AUTOINC can happen not just in those, but anywhere else in the instruction (e.g. pop insn can have autoinc in the SET_SRC MEM). REG_INC note seems to be required for any autoinc except for stack pointer autoinc which doesn't have those notes, so this patch just handles the sp autoinc after the REG_INC note handling loop. 2020-04-08 Jakub Jelinek PR rtl-optimization/94516 * postreload.c: Include rtl-iter.h. (reload_cse_move2add): Handle SP autoinc here by FOR_EACH_SUBRTX_VAR looking for all MEMs with RTX_AUTOINC operand. (move2add_note_store): Remove {PRE,POST}_{INC,DEC} handling. * gcc.dg/torture/pr94516.c: New test. --- gcc/ChangeLog | 8 +++++++ gcc/postreload.c | 27 +++++++++++++--------- gcc/testsuite/ChangeLog | 5 +++++ gcc/testsuite/gcc.dg/torture/pr94516.c | 31 ++++++++++++++++++++++++++ 4 files changed, 60 insertions(+), 11 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/torture/pr94516.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 07fff87d34d..d432912a973 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,11 @@ +2020-04-08 Jakub Jelinek + + PR rtl-optimization/94516 + * postreload.c: Include rtl-iter.h. + (reload_cse_move2add): Handle SP autoinc here by FOR_EACH_SUBRTX_VAR + looking for all MEMs with RTX_AUTOINC operand. + (move2add_note_store): Remove {PRE,POST}_{INC,DEC} handling. + 2020-04-08 Tobias Burnus * omp-grid.c (grid_eliminate_combined_simd_part): Use diff --git a/gcc/postreload.c b/gcc/postreload.c index 7cd5c7fc55f..8849679ae0f 100644 --- a/gcc/postreload.c +++ b/gcc/postreload.c @@ -41,6 +41,7 @@ along with GCC; see the file COPYING3. If not see #include "tree-pass.h" #include "dbgcnt.h" #include "function-abi.h" +#include "rtl-iter.h" static int reload_cse_noop_set_p (rtx); static bool reload_cse_simplify (rtx_insn *, rtx); @@ -2090,6 +2091,21 @@ reload_cse_move2add (rtx_insn *first) } } } + + /* There are no REG_INC notes for SP autoinc. */ + subrtx_var_iterator::array_type array; + FOR_EACH_SUBRTX_VAR (iter, array, PATTERN (insn), NONCONST) + { + rtx mem = *iter; + if (mem + && MEM_P (mem) + && GET_RTX_CLASS (GET_CODE (XEXP (mem, 0))) == RTX_AUTOINC) + { + if (XEXP (XEXP (mem, 0), 0) == stack_pointer_rtx) + reg_mode[STACK_POINTER_REGNUM] = VOIDmode; + } + } + note_stores (insn, move2add_note_store, insn); /* If INSN is a conditional branch, we try to extract an @@ -2144,17 +2160,6 @@ move2add_note_store (rtx dst, const_rtx set, void *data) unsigned int regno = 0; scalar_int_mode mode; - /* Some targets do argument pushes without adding REG_INC notes. */ - - if (MEM_P (dst)) - { - dst = XEXP (dst, 0); - if (GET_CODE (dst) == PRE_INC || GET_CODE (dst) == POST_INC - || GET_CODE (dst) == PRE_DEC || GET_CODE (dst) == POST_DEC) - reg_mode[REGNO (XEXP (dst, 0))] = VOIDmode; - return; - } - if (GET_CODE (dst) == SUBREG) regno = subreg_regno (dst); else if (REG_P (dst)) diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index e4e6ecf6786..f0d7dae5719 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,8 @@ +2020-04-08 Jakub Jelinek + + PR rtl-optimization/94516 + * gcc.dg/torture/pr94516.c: New test. + 2020-04-08 Tobias Burnus PR middle-end/94120 diff --git a/gcc/testsuite/gcc.dg/torture/pr94516.c b/gcc/testsuite/gcc.dg/torture/pr94516.c new file mode 100644 index 00000000000..b1b68ce5774 --- /dev/null +++ b/gcc/testsuite/gcc.dg/torture/pr94516.c @@ -0,0 +1,31 @@ +/* PR rtl-optimization/94516 */ +/* { dg-do run } */ +/* { dg-additional-options "-fpie" { target pie } } */ + +struct S { unsigned char *a; unsigned int b; }; +typedef int V __attribute__((vector_size (sizeof (int) * 4))); + +__attribute__((noipa)) void +foo (const char *a, const char *b, const char *c, const struct S *d, int e, int f, int g, int h, int i) +{ + V v = { 1, 2, 3, 4 }; + asm volatile ("" : : "g" (&v) : "memory"); + v += (V) { 5, 6, 7, 8 }; + asm volatile ("" : : "g" (&v) : "memory"); +} + +__attribute__((noipa)) void +bar (void) +{ + const struct S s = { "foobarbaz", 9 }; + foo ("foo", (const char *) 0, "corge", &s, 0, 1, 0, -12, -31); + foo ("bar", "quux", "qux", &s, 0, 0, 9, 0, 0); + foo ("baz", (const char *) 0, "qux", &s, 1, 0, 0, -12, -32); +} + +int +main () +{ + bar (); + return 0; +} -- 2.30.2