From ebe8f3226c94173073958da74e9acb4e444e4097 Mon Sep 17 00:00:00 2001 From: James Greenhalgh Date: Thu, 21 Jul 2016 15:40:24 +0000 Subject: [PATCH] [Patch 2/2 ifcvt costs] Introduce a new cost model for ifcvt. gcc/ * ifcvt.c (noce_if_info): New fields: speed_p, original_cost, max_seq_cost. Removed fields: then_cost, else_cost, branch_cost. (noce_conversion_profitable_p): New. (noce_try_store_flag_constants): Use it. (noce_try_addcc): Likewise. (noce_try_store_flag_mask): Likewise. (noce_try_cmove): Likewise. (noce_try_cmove_arith): Likewise. (bb_valid_for_noce_process_p): Add to the cost parameter rather than overwriting it. (noce_convert_multiple_sets): Move cost model to here, from... (bb_ok_for_noce_convert_multiple_sets) ...here. (noce_process_if_block): Update calls for above changes. (noce_find_if_block): Record new noce_if_info parameters. gcc/testsuite/ * gcc.dg/ifcvt-2.c: Use parameter to guide if-conversion heuristics. * gcc.dg/ifcvt-3.c: Use parameter to guide if-conversion heuristics. * gcc.dg/pr68435.c: Use parameter to guide if-conversion heuristics. * gcc.dg/ifcvt-4.c: Use parameter to guide if-conversion heuristics. * gcc.dg/ifcvt-5.c: Use parameter to guide if-conversion heuristics. From-SVN: r238594 --- gcc/ChangeLog | 17 +++ gcc/ifcvt.c | 188 +++++++++++++++++---------------- gcc/testsuite/ChangeLog | 8 ++ gcc/testsuite/gcc.dg/ifcvt-2.c | 2 +- gcc/testsuite/gcc.dg/ifcvt-3.c | 2 +- gcc/testsuite/gcc.dg/ifcvt-4.c | 2 +- gcc/testsuite/gcc.dg/ifcvt-5.c | 5 +- gcc/testsuite/gcc.dg/pr68435.c | 2 +- 8 files changed, 127 insertions(+), 99 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 3aebf5c3b9c..602c7b8eeaa 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,20 @@ +2016-07-21 James Greenhalgh + + * ifcvt.c (noce_if_info): New fields: speed_p, original_cost, + max_seq_cost. Removed fields: then_cost, else_cost, branch_cost. + (noce_conversion_profitable_p): New. + (noce_try_store_flag_constants): Use it. + (noce_try_addcc): Likewise. + (noce_try_store_flag_mask): Likewise. + (noce_try_cmove): Likewise. + (noce_try_cmove_arith): Likewise. + (bb_valid_for_noce_process_p): Add to the cost parameter rather than + overwriting it. + (noce_convert_multiple_sets): Move cost model to here, from... + (bb_ok_for_noce_convert_multiple_sets) ...here. + (noce_process_if_block): Update calls for above changes. + (noce_find_if_block): Record new noce_if_info parameters. + 2016-07-21 James Greenhalgh * target.def (max_noce_ifcvt_seq_cost): New. diff --git a/gcc/ifcvt.c b/gcc/ifcvt.c index a92ab6dd105..ecdfc2e2bf3 100644 --- a/gcc/ifcvt.c +++ b/gcc/ifcvt.c @@ -807,12 +807,17 @@ struct noce_if_info bool then_simple; bool else_simple; - /* The total rtx cost of the instructions in then_bb and else_bb. */ - unsigned int then_cost; - unsigned int else_cost; + /* True if we're optimisizing the control block for speed, false if + we're optimizing for size. */ + bool speed_p; - /* Estimated cost of the particular branch instruction. */ - unsigned int branch_cost; + /* The combined cost of COND, JUMP and the costs for THEN_BB and + ELSE_BB. */ + unsigned int original_cost; + + /* Maximum permissible cost for the unconditional sequence we should + generate to replace this branch. */ + unsigned int max_seq_cost; /* The name of the noce transform that succeeded in if-converting this structure. Used for debugging. */ @@ -835,6 +840,25 @@ static int noce_try_minmax (struct noce_if_info *); static int noce_try_abs (struct noce_if_info *); static int noce_try_sign_mask (struct noce_if_info *); +/* Return TRUE if SEQ is a good candidate as a replacement for the + if-convertible sequence described in IF_INFO. */ + +inline static bool +noce_conversion_profitable_p (rtx_insn *seq, struct noce_if_info *if_info) +{ + bool speed_p = if_info->speed_p; + + /* Cost up the new sequence. */ + unsigned int cost = seq_cost (seq, speed_p); + + /* When compiling for size, we can make a reasonably accurately guess + at the size growth. */ + if (!speed_p) + return cost <= if_info->original_cost; + else + return cost <= if_info->max_seq_cost; +} + /* Helper function for noce_try_store_flag*. */ static rtx @@ -1319,8 +1343,7 @@ noce_try_store_flag_constants (struct noce_if_info *if_info) registers where we handle overlap below. */ && (REG_P (XEXP (a, 0)) || (noce_operand_ok (XEXP (a, 0)) - && ! reg_overlap_mentioned_p (if_info->x, XEXP (a, 0)))) - && if_info->branch_cost >= 2) + && ! reg_overlap_mentioned_p (if_info->x, XEXP (a, 0))))) { common = XEXP (a, 0); a = XEXP (a, 1); @@ -1393,22 +1416,24 @@ noce_try_store_flag_constants (struct noce_if_info *if_info) else gcc_unreachable (); } + /* Is this (cond) ? 2^n : 0? */ else if (ifalse == 0 && exact_log2 (itrue) >= 0 - && (STORE_FLAG_VALUE == 1 - || if_info->branch_cost >= 2)) + && STORE_FLAG_VALUE == 1) normalize = 1; + /* Is this (cond) ? 0 : 2^n? */ else if (itrue == 0 && exact_log2 (ifalse) >= 0 && can_reverse - && (STORE_FLAG_VALUE == 1 || if_info->branch_cost >= 2)) + && STORE_FLAG_VALUE == 1) { normalize = 1; reversep = true; } + /* Is this (cond) ? -1 : x? */ else if (itrue == -1 - && (STORE_FLAG_VALUE == -1 - || if_info->branch_cost >= 2)) + && STORE_FLAG_VALUE == -1) normalize = -1; + /* Is this (cond) ? x : -1? */ else if (ifalse == -1 && can_reverse - && (STORE_FLAG_VALUE == -1 || if_info->branch_cost >= 2)) + && STORE_FLAG_VALUE == -1) { normalize = -1; reversep = true; @@ -1497,7 +1522,7 @@ noce_try_store_flag_constants (struct noce_if_info *if_info) noce_emit_move_insn (if_info->x, target); seq = end_ifcvt_sequence (if_info); - if (!seq) + if (!seq || !noce_conversion_profitable_p (seq, if_info)) return FALSE; emit_insn_before_setloc (seq, if_info->jump, @@ -1551,7 +1576,7 @@ noce_try_addcc (struct noce_if_info *if_info) noce_emit_move_insn (if_info->x, target); seq = end_ifcvt_sequence (if_info); - if (!seq) + if (!seq || !noce_conversion_profitable_p (seq, if_info)) return FALSE; emit_insn_before_setloc (seq, if_info->jump, @@ -1564,10 +1589,10 @@ noce_try_addcc (struct noce_if_info *if_info) } /* If that fails, construct conditional increment or decrement using - setcc. */ - if (if_info->branch_cost >= 2 - && (XEXP (if_info->a, 1) == const1_rtx - || XEXP (if_info->a, 1) == constm1_rtx)) + setcc. We're changing a branch and an increment to a comparison and + an ADD/SUB. */ + if (XEXP (if_info->a, 1) == const1_rtx + || XEXP (if_info->a, 1) == constm1_rtx) { start_sequence (); if (STORE_FLAG_VALUE == INTVAL (XEXP (if_info->a, 1))) @@ -1593,7 +1618,7 @@ noce_try_addcc (struct noce_if_info *if_info) noce_emit_move_insn (if_info->x, target); seq = end_ifcvt_sequence (if_info); - if (!seq) + if (!seq || !noce_conversion_profitable_p (seq, if_info)) return FALSE; emit_insn_before_setloc (seq, if_info->jump, @@ -1621,15 +1646,14 @@ noce_try_store_flag_mask (struct noce_if_info *if_info) return FALSE; reversep = 0; - if ((if_info->branch_cost >= 2 - || STORE_FLAG_VALUE == -1) - && ((if_info->a == const0_rtx - && rtx_equal_p (if_info->b, if_info->x)) - || ((reversep = (reversed_comparison_code (if_info->cond, - if_info->jump) - != UNKNOWN)) - && if_info->b == const0_rtx - && rtx_equal_p (if_info->a, if_info->x)))) + + if ((if_info->a == const0_rtx + && rtx_equal_p (if_info->b, if_info->x)) + || ((reversep = (reversed_comparison_code (if_info->cond, + if_info->jump) + != UNKNOWN)) + && if_info->b == const0_rtx + && rtx_equal_p (if_info->a, if_info->x))) { start_sequence (); target = noce_emit_store_flag (if_info, @@ -1643,22 +1667,11 @@ noce_try_store_flag_mask (struct noce_if_info *if_info) if (target) { - int old_cost, new_cost, insn_cost; - int speed_p; - if (target != if_info->x) noce_emit_move_insn (if_info->x, target); seq = end_ifcvt_sequence (if_info); - if (!seq) - return FALSE; - - speed_p = optimize_bb_for_speed_p (BLOCK_FOR_INSN (if_info->insn_a)); - insn_cost = insn_rtx_cost (PATTERN (if_info->insn_a), speed_p); - old_cost = COSTS_N_INSNS (if_info->branch_cost) + insn_cost; - new_cost = seq_cost (seq, speed_p); - - if (new_cost > old_cost) + if (!seq || !noce_conversion_profitable_p (seq, if_info)) return FALSE; emit_insn_before_setloc (seq, if_info->jump, @@ -1827,9 +1840,7 @@ noce_try_cmove (struct noce_if_info *if_info) we don't know about, so give them a chance before trying this approach. */ else if (!targetm.have_conditional_execution () - && CONST_INT_P (if_info->a) && CONST_INT_P (if_info->b) - && ((if_info->branch_cost >= 2 && STORE_FLAG_VALUE == -1) - || if_info->branch_cost >= 3)) + && CONST_INT_P (if_info->a) && CONST_INT_P (if_info->b)) { machine_mode mode = GET_MODE (if_info->x); HOST_WIDE_INT ifalse = INTVAL (if_info->a); @@ -1865,7 +1876,7 @@ noce_try_cmove (struct noce_if_info *if_info) noce_emit_move_insn (if_info->x, target); seq = end_ifcvt_sequence (if_info); - if (!seq) + if (!seq || !noce_conversion_profitable_p (seq, if_info)) return FALSE; emit_insn_before_setloc (seq, if_info->jump, @@ -2078,11 +2089,9 @@ noce_try_cmove_arith (struct noce_if_info *if_info) conditional on their addresses followed by a load. Don't do this early because it'll screw alias analysis. Note that we've already checked for no side effects. */ - /* ??? FIXME: Magic number 5. */ if (cse_not_expected && MEM_P (a) && MEM_P (b) - && MEM_ADDR_SPACE (a) == MEM_ADDR_SPACE (b) - && if_info->branch_cost >= 5) + && MEM_ADDR_SPACE (a) == MEM_ADDR_SPACE (b)) { machine_mode address_mode = get_address_mode (a); @@ -2114,23 +2123,6 @@ noce_try_cmove_arith (struct noce_if_info *if_info) if (!can_conditionally_move_p (x_mode)) return FALSE; - unsigned int then_cost; - unsigned int else_cost; - if (insn_a) - then_cost = if_info->then_cost; - else - then_cost = 0; - - if (insn_b) - else_cost = if_info->else_cost; - else - else_cost = 0; - - /* We're going to execute one of the basic blocks anyway, so - bail out if the most expensive of the two blocks is unacceptable. */ - if (MAX (then_cost, else_cost) > COSTS_N_INSNS (if_info->branch_cost)) - return FALSE; - /* Possibly rearrange operands to make things come out more natural. */ if (reversed_comparison_code (if_info->cond, if_info->jump) != UNKNOWN) { @@ -2319,7 +2311,7 @@ noce_try_cmove_arith (struct noce_if_info *if_info) noce_emit_move_insn (x, target); ifcvt_seq = end_ifcvt_sequence (if_info); - if (!ifcvt_seq) + if (!ifcvt_seq || !noce_conversion_profitable_p (ifcvt_seq, if_info)) return FALSE; emit_insn_before_setloc (ifcvt_seq, if_info->jump, @@ -2805,7 +2797,7 @@ noce_try_sign_mask (struct noce_if_info *if_info) && (if_info->insn_b == NULL_RTX || BLOCK_FOR_INSN (if_info->insn_b) == if_info->test_bb)); if (!(t_unconditional - || (set_src_cost (t, mode, optimize_bb_for_speed_p (if_info->test_bb)) + || (set_src_cost (t, mode, if_info->speed_p) < COSTS_N_INSNS (2)))) return FALSE; @@ -3034,8 +3026,8 @@ contains_mem_rtx_p (rtx x) x := a and all previous computations in TEST_BB don't produce any values that are live after TEST_BB. In other words, all the insns in TEST_BB are there only - to compute a value for x. Put the rtx cost of the insns - in TEST_BB into COST. Record whether TEST_BB is a single simple + to compute a value for x. Add the rtx cost of the insns + in TEST_BB to COST. Record whether TEST_BB is a single simple set instruction in SIMPLE_P. */ static bool @@ -3067,7 +3059,7 @@ bb_valid_for_noce_process_p (basic_block test_bb, rtx cond, if (first_insn == last_insn) { *simple_p = noce_operand_ok (SET_DEST (first_set)); - *cost = insn_rtx_cost (first_set, speed_p); + *cost += insn_rtx_cost (first_set, speed_p); return *simple_p; } @@ -3114,7 +3106,7 @@ bb_valid_for_noce_process_p (basic_block test_bb, rtx cond, goto free_bitmap_and_fail; BITMAP_FREE (test_bb_temps); - *cost = potential_cost; + *cost += potential_cost; *simple_p = false; return true; @@ -3290,9 +3282,15 @@ noce_convert_multiple_sets (struct noce_if_info *if_info) for (int i = 0; i < count; i++) noce_emit_move_insn (targets[i], temporaries[i]); - /* Actually emit the sequence. */ + /* Actually emit the sequence if it isn't too expensive. */ rtx_insn *seq = get_insns (); + if (!noce_conversion_profitable_p (seq, if_info)) + { + end_sequence (); + return FALSE; + } + for (insn = seq; insn; insn = NEXT_INSN (insn)) set_used_flags (insn); @@ -3342,22 +3340,16 @@ noce_convert_multiple_sets (struct noce_if_info *if_info) /* Return true iff basic block TEST_BB is comprised of only (SET (REG) (REG)) insns suitable for conversion to a series - of conditional moves. FORNOW: Use II to find the expected cost of - the branch into/over TEST_BB. - - TODO: This creates an implicit "magic number" for branch_cost. - II->branch_cost now guides the maximum number of set instructions in - a basic block which is considered profitable to completely - if-convert. */ + of conditional moves. Also check that we have more than one set + (other routines can handle a single set better than we would), and + fewer than PARAM_MAX_RTL_IF_CONVERSION_INSNS sets. */ static bool -bb_ok_for_noce_convert_multiple_sets (basic_block test_bb, - struct noce_if_info *ii) +bb_ok_for_noce_convert_multiple_sets (basic_block test_bb) { rtx_insn *insn; unsigned count = 0; unsigned param = PARAM_VALUE (PARAM_MAX_RTL_IF_CONVERSION_INSNS); - unsigned limit = MIN (ii->branch_cost, param); FOR_BB_INSNS (test_bb, insn) { @@ -3393,14 +3385,15 @@ bb_ok_for_noce_convert_multiple_sets (basic_block test_bb, if (!can_conditionally_move_p (GET_MODE (dest))) return false; - /* FORNOW: Our cost model is a count of the number of instructions we - would if-convert. This is suboptimal, and should be improved as part - of a wider rework of branch_cost. */ - if (++count > limit) - return false; + count++; } - return count > 1; + /* If we would only put out one conditional move, the other strategies + this pass tries are better optimized and will be more appropriate. + Some targets want to strictly limit the number of conditional moves + that are emitted, they set this through PARAM, we need to respect + that. */ + return count > 1 && count <= param; } /* Given a simple IF-THEN-JOIN or IF-THEN-ELSE-JOIN block, attempt to convert @@ -3436,7 +3429,7 @@ noce_process_if_block (struct noce_if_info *if_info) if (!else_bb && HAVE_conditional_move && !HAVE_cc0 - && bb_ok_for_noce_convert_multiple_sets (then_bb, if_info)) + && bb_ok_for_noce_convert_multiple_sets (then_bb)) { if (noce_convert_multiple_sets (if_info)) { @@ -3447,12 +3440,12 @@ noce_process_if_block (struct noce_if_info *if_info) } } - if (! bb_valid_for_noce_process_p (then_bb, cond, &if_info->then_cost, + if (! bb_valid_for_noce_process_p (then_bb, cond, &if_info->original_cost, &if_info->then_simple)) return false; if (else_bb - && ! bb_valid_for_noce_process_p (else_bb, cond, &if_info->else_cost, + && ! bb_valid_for_noce_process_p (else_bb, cond, &if_info->original_cost, &if_info->else_simple)) return false; @@ -3983,6 +3976,7 @@ noce_find_if_block (basic_block test_bb, edge then_edge, edge else_edge, rtx cond; rtx_insn *cond_earliest; struct noce_if_info if_info; + bool speed_p = optimize_bb_for_speed_p (test_bb); /* We only ever should get here before reload. */ gcc_assert (!reload_completed); @@ -4074,8 +4068,16 @@ noce_find_if_block (basic_block test_bb, edge then_edge, edge else_edge, if_info.cond_earliest = cond_earliest; if_info.jump = jump; if_info.then_else_reversed = then_else_reversed; - if_info.branch_cost = BRANCH_COST (optimize_bb_for_speed_p (test_bb), - predictable_edge_p (then_edge)); + if_info.speed_p = speed_p; + if_info.max_seq_cost + = targetm.max_noce_ifcvt_seq_cost (then_edge); + /* We'll add in the cost of THEN_BB and ELSE_BB later, when we check + that they are valid to transform. We can't easily get back to the insn + for COND (and it may not exist if we had to canonicalize to get COND), + and jump_insns are always given a cost of 1 by seq_cost, so treat + both instructions as having cost COSTS_N_INSNS (1). */ + if_info.original_cost = COSTS_N_INSNS (2); + /* Do the real work. */ diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index cb2f9e04103..d0b0c1d165d 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,11 @@ +2016-07-21 James Greenhalgh + + * gcc.dg/ifcvt-2.c: Use parameter to guide if-conversion heuristics. + * gcc.dg/ifcvt-3.c: Use parameter to guide if-conversion heuristics. + * gcc.dg/pr68435.c: Use parameter to guide if-conversion heuristics. + * gcc.dg/ifcvt-4.c: Use parameter to guide if-conversion heuristics. + * gcc.dg/ifcvt-5.c: Use parameter to guide if-conversion heuristics. + 2016-07-21 Richard Biener PR tree-optimization/71947 diff --git a/gcc/testsuite/gcc.dg/ifcvt-2.c b/gcc/testsuite/gcc.dg/ifcvt-2.c index e0e1728a34c..73e0dccdce3 100644 --- a/gcc/testsuite/gcc.dg/ifcvt-2.c +++ b/gcc/testsuite/gcc.dg/ifcvt-2.c @@ -1,5 +1,5 @@ /* { dg-do compile { target aarch64*-*-* x86_64-*-* } } */ -/* { dg-options "-fdump-rtl-ce1 -O2" } */ +/* { dg-options "-fdump-rtl-ce1 -O2 --param max-rtl-if-conversion-unpredictable-cost=100" } */ typedef unsigned char uint8_t; diff --git a/gcc/testsuite/gcc.dg/ifcvt-3.c b/gcc/testsuite/gcc.dg/ifcvt-3.c index 44233d4df6b..b250bc15e08 100644 --- a/gcc/testsuite/gcc.dg/ifcvt-3.c +++ b/gcc/testsuite/gcc.dg/ifcvt-3.c @@ -1,5 +1,5 @@ /* { dg-do compile { target { { aarch64*-*-* i?86-*-* x86_64-*-* } && lp64 } } } */ -/* { dg-options "-fdump-rtl-ce1 -O2" } */ +/* { dg-options "-fdump-rtl-ce1 -O2 --param max-rtl-if-conversion-unpredictable-cost=100" } */ typedef long long s64; diff --git a/gcc/testsuite/gcc.dg/ifcvt-4.c b/gcc/testsuite/gcc.dg/ifcvt-4.c index 319b583e4e2..0d1671c302a 100644 --- a/gcc/testsuite/gcc.dg/ifcvt-4.c +++ b/gcc/testsuite/gcc.dg/ifcvt-4.c @@ -1,4 +1,4 @@ -/* { dg-options "-fdump-rtl-ce1 -O2 --param max-rtl-if-conversion-insns=3" } */ +/* { dg-options "-fdump-rtl-ce1 -O2 --param max-rtl-if-conversion-insns=3 --param max-rtl-if-conversion-unpredictable-cost=100" } */ /* { dg-additional-options "-misel" { target { powerpc*-*-* } } } */ /* { dg-skip-if "Multiple set if-conversion not guaranteed on all subtargets" { "arm*-*-* hppa*64*-*-* visium-*-*" } } */ diff --git a/gcc/testsuite/gcc.dg/ifcvt-5.c b/gcc/testsuite/gcc.dg/ifcvt-5.c index 818099a0c44..d2a947646af 100644 --- a/gcc/testsuite/gcc.dg/ifcvt-5.c +++ b/gcc/testsuite/gcc.dg/ifcvt-5.c @@ -1,7 +1,8 @@ /* Check that multi-insn if-conversion is not done if the override - parameter would not allow it. */ + parameter would not allow it. Set the cost parameter very high + to ensure that the limiting factor is actually the count parameter. */ -/* { dg-options "-fdump-rtl-ce1 -O2 --param max-rtl-if-conversion-insns=1" } */ +/* { dg-options "-fdump-rtl-ce1 -O2 --param max-rtl-if-conversion-insns=1 --param max-rtl-if-conversion-unpredictable-cost=200" } */ typedef int word __attribute__((mode(word))); diff --git a/gcc/testsuite/gcc.dg/pr68435.c b/gcc/testsuite/gcc.dg/pr68435.c index 765699a19e7..f86b7f8da16 100644 --- a/gcc/testsuite/gcc.dg/pr68435.c +++ b/gcc/testsuite/gcc.dg/pr68435.c @@ -1,5 +1,5 @@ /* { dg-do compile { target aarch64*-*-* x86_64-*-* } } */ -/* { dg-options "-fdump-rtl-ce1 -O2 -w" } */ +/* { dg-options "-fdump-rtl-ce1 -O2 -w --param max-rtl-if-conversion-unpredictable-cost=100" } */ typedef struct cpp_reader cpp_reader; enum cpp_ttype -- 2.30.2