From 4eb8f93d026eaa1de9b4820337069f3ce3465cd0 Mon Sep 17 00:00:00 2001 From: Roman Zhuykov Date: Sat, 5 Dec 2020 18:45:27 +0300 Subject: [PATCH] modulo-sched: Carefully process loop counter initialization [PR97421] Do not allow direct adjustment of pre-header initialization instruction for count register if is read in some instruction below in that basic block. gcc/ChangeLog: PR rtl-optimization/97421 * modulo-sched.c (generate_prolog_epilog): Remove forward declaration, adjust last argument name and type. (const_iteration_count): Add bool pointer parameter to return whether count register is read in pre-header after its initialization. (sms_schedule): Fix count register initialization adjustment procedure according to what const_iteration_count said. gcc/testsuite/ChangeLog: PR rtl-optimization/97421 * gcc.c-torture/execute/pr97421-1.c: New test. * gcc.c-torture/execute/pr97421-2.c: New test. * gcc.c-torture/execute/pr97421-3.c: New test. --- gcc/modulo-sched.c | 71 +++++++++++-------- .../gcc.c-torture/execute/pr97421-1.c | 23 ++++++ .../gcc.c-torture/execute/pr97421-2.c | 18 +++++ .../gcc.c-torture/execute/pr97421-3.c | 22 ++++++ 4 files changed, 103 insertions(+), 31 deletions(-) create mode 100644 gcc/testsuite/gcc.c-torture/execute/pr97421-1.c create mode 100644 gcc/testsuite/gcc.c-torture/execute/pr97421-2.c create mode 100644 gcc/testsuite/gcc.c-torture/execute/pr97421-3.c diff --git a/gcc/modulo-sched.c b/gcc/modulo-sched.c index 6f699a874e3..4568674aa6c 100644 --- a/gcc/modulo-sched.c +++ b/gcc/modulo-sched.c @@ -210,8 +210,6 @@ static int sms_order_nodes (ddg_ptr, int, int *, int *); static void set_node_sched_params (ddg_ptr); static partial_schedule_ptr sms_schedule_by_order (ddg_ptr, int, int, int *); static void permute_partial_schedule (partial_schedule_ptr, rtx_insn *); -static void generate_prolog_epilog (partial_schedule_ptr, class loop *, - rtx, rtx); static int calculate_stage_count (partial_schedule_ptr, int); static void calculate_must_precede_follow (ddg_node_ptr, int, int, int, int, sbitmap, sbitmap, sbitmap); @@ -391,30 +389,40 @@ doloop_register_get (rtx_insn *head, rtx_insn *tail) this constant. Otherwise return 0. */ static rtx_insn * const_iteration_count (rtx count_reg, basic_block pre_header, - int64_t * count) + int64_t *count, bool* adjust_inplace) { rtx_insn *insn; rtx_insn *head, *tail; + *adjust_inplace = false; + bool read_after = false; + if (! pre_header) return NULL; get_ebb_head_tail (pre_header, pre_header, &head, &tail); for (insn = tail; insn != PREV_INSN (head); insn = PREV_INSN (insn)) - if (NONDEBUG_INSN_P (insn) && single_set (insn) && - rtx_equal_p (count_reg, SET_DEST (single_set (insn)))) + if (single_set (insn) && rtx_equal_p (count_reg, + SET_DEST (single_set (insn)))) { rtx pat = single_set (insn); if (CONST_INT_P (SET_SRC (pat))) { *count = INTVAL (SET_SRC (pat)); + *adjust_inplace = !read_after; return insn; } return NULL; } + else if (NONDEBUG_INSN_P (insn) && reg_mentioned_p (count_reg, insn)) + { + read_after = true; + if (reg_set_p (count_reg, insn)) + break; + } return NULL; } @@ -1126,7 +1134,7 @@ duplicate_insns_of_cycles (partial_schedule_ptr ps, int from_stage, /* Generate the instructions (including reg_moves) for prolog & epilog. */ static void generate_prolog_epilog (partial_schedule_ptr ps, class loop *loop, - rtx count_reg, rtx count_init) + rtx count_reg, bool adjust_init) { int i; int last_stage = PS_STAGE_COUNT (ps) - 1; @@ -1135,12 +1143,12 @@ generate_prolog_epilog (partial_schedule_ptr ps, class loop *loop, /* Generate the prolog, inserting its insns on the loop-entry edge. */ start_sequence (); - if (!count_init) + if (adjust_init) { /* Generate instructions at the beginning of the prolog to - adjust the loop count by STAGE_COUNT. If loop count is constant - (count_init), this constant is adjusted by STAGE_COUNT in - generate_prolog_epilog function. */ + adjust the loop count by STAGE_COUNT. If loop count is constant + and it not used anywhere in prologue, this constant is adjusted by + STAGE_COUNT outside of generate_prolog_epilog function. */ rtx sub_reg = NULL_RTX; sub_reg = expand_simple_binop (GET_MODE (count_reg), MINUS, count_reg, @@ -1528,7 +1536,8 @@ sms_schedule (void) rtx_insn *count_init; int mii, rec_mii, stage_count, min_cycle; int64_t loop_count = 0; - bool opt_sc_p; + bool opt_sc_p, adjust_inplace = false; + basic_block pre_header; if (! (g = g_arr[loop->num])) continue; @@ -1569,19 +1578,13 @@ sms_schedule (void) } - /* In case of th loop have doloop register it gets special - handling. */ - count_init = NULL; - if ((count_reg = doloop_register_get (head, tail))) - { - basic_block pre_header; - - pre_header = loop_preheader_edge (loop)->src; - count_init = const_iteration_count (count_reg, pre_header, - &loop_count); - } + count_reg = doloop_register_get (head, tail); gcc_assert (count_reg); + pre_header = loop_preheader_edge (loop)->src; + count_init = const_iteration_count (count_reg, pre_header, &loop_count, + &adjust_inplace); + if (dump_file && count_init) { fprintf (dump_file, "SMS const-doloop "); @@ -1701,9 +1704,20 @@ sms_schedule (void) print_partial_schedule (ps, dump_file); } - /* case the BCT count is not known , Do loop-versioning */ - if (count_reg && ! count_init) + if (count_init) + { + if (adjust_inplace) + { + /* When possible, set new iteration count of loop kernel in + place. Otherwise, generate_prolog_epilog creates an insn + to adjust. */ + SET_SRC (single_set (count_init)) = GEN_INT (loop_count + - stage_count + 1); + } + } + else { + /* case the BCT count is not known , Do loop-versioning */ rtx comp_rtx = gen_rtx_GT (VOIDmode, count_reg, gen_int_mode (stage_count, GET_MODE (count_reg))); @@ -1713,12 +1727,7 @@ sms_schedule (void) loop_version (loop, comp_rtx, &condition_bb, prob, prob.invert (), prob, prob.invert (), true); - } - - /* Set new iteration count of loop kernel. */ - if (count_reg && count_init) - SET_SRC (single_set (count_init)) = GEN_INT (loop_count - - stage_count + 1); + } /* Now apply the scheduled kernel to the RTL of the loop. */ permute_partial_schedule (ps, g->closing_branch->first_note); @@ -1735,7 +1744,7 @@ sms_schedule (void) if (dump_file) print_node_sched_params (dump_file, g->num_nodes, ps); /* Generate prolog and epilog. */ - generate_prolog_epilog (ps, loop, count_reg, count_init); + generate_prolog_epilog (ps, loop, count_reg, !adjust_inplace); break; } diff --git a/gcc/testsuite/gcc.c-torture/execute/pr97421-1.c b/gcc/testsuite/gcc.c-torture/execute/pr97421-1.c new file mode 100644 index 00000000000..e32fb129f18 --- /dev/null +++ b/gcc/testsuite/gcc.c-torture/execute/pr97421-1.c @@ -0,0 +1,23 @@ +/* PR rtl-optimization/97421 */ +/* { dg-additional-options "-fmodulo-sched" } */ + +int a, b, d, e; +int *volatile c = &a; + +__attribute__((noinline)) +void f(void) +{ + for (int g = 2; g >= 0; g--) { + d = 0; + for (b = 0; b <= 2; b++) + ; + e = *c; + } +} + +int main(void) +{ + f(); + if (b != 3) + __builtin_abort(); +} diff --git a/gcc/testsuite/gcc.c-torture/execute/pr97421-2.c b/gcc/testsuite/gcc.c-torture/execute/pr97421-2.c new file mode 100644 index 00000000000..142bcbcee91 --- /dev/null +++ b/gcc/testsuite/gcc.c-torture/execute/pr97421-2.c @@ -0,0 +1,18 @@ +/* PR rtl-optimization/97421 */ +/* { dg-additional-options "-fmodulo-sched -fno-dce -fno-strict-aliasing" } */ + +static int a, b, c; +int *d = &c; +int **e = &d; +int ***f = &e; +int main() +{ + int h; + for (a = 2; a; a--) + for (h = 0; h <= 2; h++) + for (b = 0; b <= 2; b++) + ***f = 6; + + if (b != 3) + __builtin_abort(); +} diff --git a/gcc/testsuite/gcc.c-torture/execute/pr97421-3.c b/gcc/testsuite/gcc.c-torture/execute/pr97421-3.c new file mode 100644 index 00000000000..3f1485a4a3d --- /dev/null +++ b/gcc/testsuite/gcc.c-torture/execute/pr97421-3.c @@ -0,0 +1,22 @@ +/* PR rtl-optimization/97421 */ +/* { dg-additional-options "-fmodulo-sched" } */ + +int a, b, c; +short d; +void e(void) { + unsigned f = 0; + for (; f <= 2; f++) { + int g[1]; + int h = (long)g; + c = 0; + for (; c < 10; c++) + g[0] = a = 0; + for (; a <= 2; a++) + b = d; + } +} +int main(void) { + e(); + if (a != 3) + __builtin_abort(); +} -- 2.30.2