From dbe4e3f44abdd929d7149ce596805f659114417c Mon Sep 17 00:00:00 2001 From: Segher Boessenkool Date: Fri, 1 Sep 2017 18:54:53 +0200 Subject: [PATCH] combine: Fix for PR82024 With the testcase in the PR, with all the command line options mentioned there, a (comparison) instruction becomes dead in fwprop1 but is not deleted until all the way in rtl_dce. Before combine this insn look like: 20: flags:CC=cmp(r106:DI,0xffffffffffffffff) REG_DEAD r106:DI REG_UNUSED flags:CC REG_EQUAL cmp(0,0xffffffffffffffff) (note the only output is unused). Combining some earlier insns gives 13: r106:DI=0 14: r105:DI=r101:DI+r103:DI 14+13+20 then gives (parallel [ (set (reg:CC 17 flags) (compare:CC (const_int 0 [0]) (const_int -1 [0xffffffffffffffff]))) (set (reg:DI 105) (plus:DI (reg/v:DI 101 [ e ]) (reg:DI 103))) ]) which doesn't match; but the set of flags is dead, so combine makes the set of r105 the whole new instruction, which it then places at i3. But that is wrong, because r105 is used after i2 but before i3! We forget to check for that in this case. This patch fixes it. PR rtl-optimization/82024 * combine.c (try_combine): If the combination result is a PARALLEL, and we only need to retain the SET in there that would be placed at I2, check that we can place that at I3 instead, before doing so. From-SVN: r251607 --- gcc/ChangeLog | 7 +++++++ gcc/combine.c | 19 ++++++++++++++----- 2 files changed, 21 insertions(+), 5 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 4cc3ac68d87..3f632ca31c2 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,10 @@ +2017-09-01 Segher Boessenkool + + PR rtl-optimization/82024 + * combine.c (try_combine): If the combination result is a PARALLEL, + and we only need to retain the SET in there that would be placed + at I2, check that we can place that at I3 instead, before doing so. + 2017-09-01 Jakub Jelinek PR target/81766 diff --git a/gcc/combine.c b/gcc/combine.c index 86055385d78..c748c92d2ed 100644 --- a/gcc/combine.c +++ b/gcc/combine.c @@ -3488,7 +3488,10 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0, i3, and one from i2. Combining then splitting the parallel results in the original i2 again plus an invalid insn (which we delete). The net effect is only to move instructions around, which makes - debug info less accurate. */ + debug info less accurate. + + If the remaining SET came from I2 its destination should not be used + between I2 and I3. See PR82024. */ if (!(added_sets_2 && i1 == 0) && is_parallel_of_n_reg_sets (newpat, 2) @@ -3517,11 +3520,17 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0, && insn_nothrow_p (i3) && !side_effects_p (SET_SRC (set0))) { - newpat = set1; - insn_code_number = recog_for_combine (&newpat, i3, &new_i3_notes); + rtx dest = SET_DEST (set1); + if (GET_CODE (dest) == SUBREG) + dest = SUBREG_REG (dest); + if (!reg_used_between_p (dest, i2, i3)) + { + newpat = set1; + insn_code_number = recog_for_combine (&newpat, i3, &new_i3_notes); - if (insn_code_number >= 0) - changed_i3_dest = 1; + if (insn_code_number >= 0) + changed_i3_dest = 1; + } } if (insn_code_number < 0) -- 2.30.2