From d72372e46ac06b4db61143f91367a777089cc6f6 Mon Sep 17 00:00:00 2001 From: Mostafa Hagog Date: Sun, 8 Aug 2004 21:35:53 +0000 Subject: [PATCH] common.opt (freschedule-modulo-scheduled-loops): New flag. 2004-08-08 Mostafa Hagog Ayal Zaks * common.opt (freschedule-modulo-scheduled-loops): New flag. * final.c (final_scan_insn): Handle NOTE_DISABLE_SCHED_OF_BLOCK. * modulo-sched.c (sms_schedule): Emit a note to disable scheduling when -freschedule-modulo-scheduled-loops flag is not specified. (sms_schedule_by_order, ps_insn_advance_column, add_node_to_ps, add_node_to_ps, ps_has_conflicts, ps_add_node_check_conflicts): More accurate placing of insn in row of partial schedule. (ps_insn_find_column): New function. * rtl.h (NOTE_DISABLE_SCHED_OF_BLOCK): New note. * sched-rgn.c (sched_is_disabled_for_current_region_p): New. (schedule_region): Use sched_is_disabled_for_current_region_p. * docs/invoke.texi: Document -freschedule-modulo-scheduled-loops. Co-Authored-By: Ayal Zaks From-SVN: r85696 --- gcc/ChangeLog | 16 ++++ gcc/common.opt | 4 + gcc/doc/invoke.texi | 10 ++- gcc/final.c | 1 + gcc/modulo-sched.c | 193 ++++++++++++++++++++++++++++++++------------ gcc/rtl.h | 6 ++ gcc/sched-rgn.c | 37 +++++++++ 7 files changed, 212 insertions(+), 55 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index db17d3e15a5..d5efb3426e7 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,19 @@ +2004-08-08 Mostafa Hagog + Ayal Zaks + + * common.opt (freschedule-modulo-scheduled-loops): New flag. + * final.c (final_scan_insn): Handle NOTE_DISABLE_SCHED_OF_BLOCK. + * modulo-sched.c (sms_schedule): Emit a note to disable scheduling + when -freschedule-modulo-scheduled-loops flag is not specified. + (sms_schedule_by_order, ps_insn_advance_column, add_node_to_ps, + add_node_to_ps, ps_has_conflicts, ps_add_node_check_conflicts): + More accurate placing of insn in row of partial schedule. + (ps_insn_find_column): New function. + * rtl.h (NOTE_DISABLE_SCHED_OF_BLOCK): New note. + * sched-rgn.c (sched_is_disabled_for_current_region_p): New. + (schedule_region): Use sched_is_disabled_for_current_region_p. + * docs/invoke.texi: Document -freschedule-modulo-scheduled-loops. + 2004-08-07 H.J. Lu * config/i386/i386.c (ix86_expand_clrmem): Revert the last diff --git a/gcc/common.opt b/gcc/common.opt index 24310f19322..7694df121c6 100644 --- a/gcc/common.opt +++ b/gcc/common.opt @@ -543,6 +543,10 @@ fnew-ra Common Report Var(flag_new_regalloc) Use graph-coloring register allocation +freschedule-modulo-scheduled-loops +Common Report Var(flag_resched_modulo_sched) +Enable/Disable the traditional scheduling in loops that already passed modulo scheduling + fnon-call-exceptions Common Report Var(flag_non_call_exceptions) Support synchronous non-call exceptions diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index f1988b0b856..0018fc60183 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -308,8 +308,8 @@ in the following sections. -fsched-spec-load-dangerous @gol -fsched-stalled-insns=@var{n} -sched-stalled-insns-dep=@var{n} @gol -fsched2-use-superblocks @gol --fsched2-use-traces -fsignaling-nans @gol --fsingle-precision-constant @gol +-fsched2-use-traces -freschedule-modulo-scheduled-loops @gol +-fsignaling-nans -fsingle-precision-constant @gol -fstrength-reduce -fstrict-aliasing -ftracer -fthread-jumps @gol -funroll-all-loops -funroll-loops -fpeel-loops @gol -funswitch-loops -fold-unroll-loops -fold-unroll-all-loops @gol @@ -4393,6 +4393,12 @@ reality and hurt the performance. This only makes sense when scheduling after register allocation, i.e.@: with @option{-fschedule-insns2} or at @option{-O2} or higher. +@item -freschedule-modulo-scheduled-loops +@opindex fscheduling-in-modulo-scheduled-loops +The modulo scheduling comes before the traditional scheduling, if a loop was modulo scheduled +we may want to prevent the later scheduling passes from changing its schedule, we use this +option to control that. + @item -fcaller-saves @opindex fcaller-saves Enable values to be allocated in registers that will be clobbered by diff --git a/gcc/final.c b/gcc/final.c index 623582ce707..90311bc5371 100644 --- a/gcc/final.c +++ b/gcc/final.c @@ -1704,6 +1704,7 @@ final_scan_insn (rtx insn, FILE *file, int optimize ATTRIBUTE_UNUSED, case NOTE_INSN_FUNCTION_END: case NOTE_INSN_REPEATED_LINE_NUMBER: case NOTE_INSN_EXPECTED_VALUE: + case NOTE_DISABLE_SCHED_OF_BLOCK: break; case NOTE_INSN_UNLIKELY_EXECUTED_CODE: diff --git a/gcc/modulo-sched.c b/gcc/modulo-sched.c index d9cb45c83c1..86e0f8c4640 100644 --- a/gcc/modulo-sched.c +++ b/gcc/modulo-sched.c @@ -152,7 +152,9 @@ void free_partial_schedule (partial_schedule_ptr); void reset_partial_schedule (partial_schedule_ptr, int new_ii); void print_partial_schedule (partial_schedule_ptr, FILE *); ps_insn_ptr ps_add_node_check_conflicts (partial_schedule_ptr, - ddg_node_ptr node, int cycle); + ddg_node_ptr node, int cycle, + sbitmap must_precede, + sbitmap must_follow); void rotate_partial_schedule (partial_schedule_ptr, int); void set_row_column_for_ps (partial_schedule_ptr); @@ -874,7 +876,7 @@ sms_schedule (FILE *dump_file) continue; /* For debugging. */ - if (passes++ > MAX_SMS_LOOP_NUMBER && MAX_SMS_LOOP_NUMBER != -1) + if ((passes++ > MAX_SMS_LOOP_NUMBER) && (MAX_SMS_LOOP_NUMBER != -1)) { if (dump_file) fprintf (dump_file, "SMS reached MAX_PASSES... \n"); @@ -1086,8 +1088,8 @@ sms_schedule (FILE *dump_file) int i; start_sequence (); - /* Copy the original loop code before modifying it - so we can use - it later. */ + /* Copy the original loop code before modifying it - + so we can use it later. */ for (i = 0; i < ps->g->num_nodes; i++) duplicate_insn_chain (ps->g->nodes[i].first_note, ps->g->nodes[i].insn); @@ -1106,6 +1108,13 @@ sms_schedule (FILE *dump_file) set_columns_for_ps (ps); permute_partial_schedule (ps, g->closing_branch->first_note); + + /* Mark this loop as software pipelined so the later + scheduling passes doesn't touch it. */ + if (! flag_resched_modulo_sched) + emit_note_before (NOTE_DISABLE_SCHED_OF_BLOCK, + g->closing_branch->insn); + generate_reg_moves (ps); if (dump_file) print_node_sched_params (dump_file, g->num_nodes); @@ -1217,6 +1226,9 @@ sms_schedule_by_order (ddg_ptr g, int mii, int maxii, int *nodes_order, FILE *du sbitmap sched_nodes = sbitmap_alloc (num_nodes); sbitmap psp = sbitmap_alloc (num_nodes); sbitmap pss = sbitmap_alloc (num_nodes); + sbitmap must_precede = sbitmap_alloc (num_nodes); + sbitmap must_follow = sbitmap_alloc (num_nodes); + partial_schedule_ptr ps = create_partial_schedule (ii, g, DFA_HISTORY); while (try_again_with_larger_ii && ii < maxii) @@ -1258,9 +1270,11 @@ sms_schedule_by_order (ddg_ptr g, int mii, int maxii, int *nodes_order, FILE *du ddg_node_ptr v_node = e->src; if (TEST_BIT (sched_nodes, v_node->cuid)) { - early_start = MAX (early_start, - SCHED_TIME (v_node) - + e->latency - (e->distance * ii)); + int node_st = SCHED_TIME (v_node) + + e->latency - (e->distance * ii); + + early_start = MAX (early_start, node_st); + if (e->data_type == MEM_DEP) end = MIN (end, SCHED_TIME (v_node) + ii - 1); } @@ -1341,11 +1355,31 @@ sms_schedule_by_order (ddg_ptr g, int mii, int maxii, int *nodes_order, FILE *du fprintf(dump_file, "Trying to schedule node %d in (%d .. %d) step %d\n", u, start, end, step); + /* use must_follow & must_precede bitmaps to determine order + of nodes within the cycle. */ + sbitmap_zero (must_precede); + sbitmap_zero (must_follow); + for (e = u_node->in; e != 0; e = e->next_in) + if (TEST_BIT (sched_nodes, e->src->cuid) + && e->latency == (ii * e->distance) + && start == SCHED_TIME (e->src)) + SET_BIT (must_precede, e->src->cuid); + + for (e = u_node->out; e != 0; e = e->next_out) + if (TEST_BIT (sched_nodes, e->dest->cuid) + && e->latency == (ii * e->distance) + && end == SCHED_TIME (e->dest)) + SET_BIT (must_follow, e->dest->cuid); + success = 0; if ((step > 0 && start < end) || (step < 0 && start > end)) for (c = start; c != end; c += step) { - ps_insn_ptr psi = ps_add_node_check_conflicts (ps, u_node, c); + ps_insn_ptr psi; + + psi = ps_add_node_check_conflicts (ps, u_node, c, + must_precede, + must_follow); if (psi) { @@ -1884,13 +1918,80 @@ remove_node_from_ps (partial_schedule_ptr ps, ps_insn_ptr ps_i) return true; } +/* Unlike what literature describes for modulo scheduling (which focuses + on VLIW machines) the order of the instructions inside a cycle is + important. Given the bitmaps MUST_FOLLOW and MUST_PRECEDE we know + where the current instruction should go relative to the already + scheduled instructions in the given cycle. Go over these + instructions and find the first possible column to put it in. */ +static bool +ps_insn_find_column (partial_schedule_ptr ps, ps_insn_ptr ps_i, + sbitmap must_precede, sbitmap must_follow) +{ + ps_insn_ptr next_ps_i; + ps_insn_ptr first_must_follow = NULL; + ps_insn_ptr last_must_precede = NULL; + int row; + + if (! ps_i) + return false; + + row = SMODULO (ps_i->cycle, ps->ii); + + /* Find the first must follow and the last must precede + and insert the node immediatly after the must precede + but make sure that it there is no must follow after it. */ + for (next_ps_i = ps->rows[row]; + next_ps_i; + next_ps_i = next_ps_i->next_in_row) + { + if (TEST_BIT (must_follow, next_ps_i->node->cuid) + && ! first_must_follow) + first_must_follow = next_ps_i; + if (TEST_BIT (must_precede, next_ps_i->node->cuid)) + { + /* If we have already met a node that must follow, then + there is no possible column. */ + if (first_must_follow) + return false; + else + last_must_precede = next_ps_i; + } + } + + /* Now insert the node after INSERT_AFTER_PSI. */ + + if (! last_must_precede) + { + ps_i->next_in_row = ps->rows[row]; + ps_i->prev_in_row = NULL; + if (ps_i->next_in_row) + ps_i->next_in_row->prev_in_row = ps_i; + ps->rows[row] = ps_i; + } + else + { + ps_i->next_in_row = last_must_precede->next_in_row; + last_must_precede->next_in_row = ps_i; + ps_i->prev_in_row = last_must_precede; + if (ps_i->next_in_row) + ps_i->next_in_row->prev_in_row = ps_i; + } + + return true; +} + /* Advances the PS_INSN one column in its current row; returns false - in failure and true in success. */ + in failure and true in success. Bit N is set in MUST_FOLLOW if + the node with cuid N must be come after the node pointed to by + PS_I when scheduled in the same cycle. */ static int -ps_insn_advance_column (partial_schedule_ptr ps, ps_insn_ptr ps_i) +ps_insn_advance_column (partial_schedule_ptr ps, ps_insn_ptr ps_i, + sbitmap must_follow) { ps_insn_ptr prev, next; int row; + ddg_node_ptr next_node; if (!ps || !ps_i) return false; @@ -1900,17 +2001,12 @@ ps_insn_advance_column (partial_schedule_ptr ps, ps_insn_ptr ps_i) if (! ps_i->next_in_row) return false; + next_node = ps_i->next_in_row->node; + /* Check if next_in_row is dependent on ps_i, both having same sched times (typically ANTI_DEP). If so, ps_i cannot skip over it. */ - if (ps_i->cycle == ps_i->next_in_row->cycle) - { - ddg_edge_ptr e; - ddg_node_ptr next_node = ps_i->next_in_row->node; - - for (e = ps_i->node->out; e; e = e->next_out) - if (e->dest == next_node) - return false; - } + if (TEST_BIT (must_follow, next_node->cuid)) + return false; /* Advace PS_I over its next_in_row in the doubly linked list. */ prev = ps_i->prev_in_row; @@ -1935,14 +2031,17 @@ ps_insn_advance_column (partial_schedule_ptr ps, ps_insn_ptr ps_i) } /* Inserts a DDG_NODE to the given partial schedule at the given cycle. - Returns 0 if this is not possible and a PS_INSN otherwise. */ + Returns 0 if this is not possible and a PS_INSN otherwise. Bit N is + set in MUST_PRECEDE/MUST_FOLLOW if the node with cuid N must be come + before/after (respectively) the node pointed to by PS_I when scheduled + in the same cycle. */ static ps_insn_ptr -add_node_to_ps (partial_schedule_ptr ps, ddg_node_ptr node, int cycle) +add_node_to_ps (partial_schedule_ptr ps, ddg_node_ptr node, int cycle, + sbitmap must_precede, sbitmap must_follow) { - ps_insn_ptr ps_i, next_ps_i, advance_after; + ps_insn_ptr ps_i; int rest_count = 1; int row = SMODULO (cycle, ps->ii); - ddg_edge_ptr e; if (ps->rows[row] && ps->rows[row]->row_rest_count >= issue_rate) @@ -1952,30 +2051,14 @@ add_node_to_ps (partial_schedule_ptr ps, ddg_node_ptr node, int cycle) rest_count += ps->rows[row]->row_rest_count; ps_i = create_ps_insn (node, rest_count, cycle); - ps_i->next_in_row = ps->rows[row]; - ps_i->prev_in_row = NULL; - if (ps_i->next_in_row) - ps_i->next_in_row->prev_in_row = ps_i; - ps->rows[row] = ps_i; - - /* Check if n is dependent on an insn already in row, having same cycle - (typically ANTI_DEP). If so, n must skip over it. */ - advance_after = NULL; - for (next_ps_i = ps_i->next_in_row; - next_ps_i; - next_ps_i = next_ps_i->next_in_row) - if (next_ps_i->cycle == cycle) - for (e = node->in; e; e = e->next_in) - if (e->src == next_ps_i->node) - advance_after = next_ps_i; - - if (advance_after) - while (ps_i->prev_in_row != advance_after) - if (!ps_insn_advance_column (ps, ps_i)) - { - remove_node_from_ps (ps, ps_i); - return NULL; - } + + /* Finds and inserts PS_I according to MUST_FOLLOW and + MUST_PRECEDE. */ + if (! ps_insn_find_column (ps, ps_i, must_precede, must_follow)) + { + free (ps_i); + return NULL; + } return ps_i; } @@ -2049,16 +2132,20 @@ ps_has_conflicts (partial_schedule_ptr ps, int from, int to) /* Checks if the given node causes resource conflicts when added to PS at cycle C. If not the node is added to PS and returned; otherwise zero - is returned. */ + is returned. Bit N is set in MUST_PRECEDE/MUST_FOLLOW if the node with + cuid N must be come before/after (respectively) the node pointed to by + PS_I when scheduled in the same cycle. */ ps_insn_ptr -ps_add_node_check_conflicts (partial_schedule_ptr ps, ddg_node_ptr n, int c) +ps_add_node_check_conflicts (partial_schedule_ptr ps, ddg_node_ptr n, + int c, sbitmap must_precede, + sbitmap must_follow) { int has_conflicts = 0; ps_insn_ptr ps_i; - /* First add the node to the PS, if this succeeds check for conflicts, - trying different issue slots in the same row. */ - if (! (ps_i = add_node_to_ps (ps, n, c))) + /* First add the node to the PS, if this succeeds check for + conflicts, trying different issue slots in the same row. */ + if (! (ps_i = add_node_to_ps (ps, n, c, must_precede, must_follow))) return NULL; /* Failed to insert the node at the given cycle. */ has_conflicts = ps_has_conflicts (ps, c, c) @@ -2071,7 +2158,7 @@ ps_add_node_check_conflicts (partial_schedule_ptr ps, ddg_node_ptr n, int c) scheduled in without conflicts. */ while (has_conflicts) { - if (! ps_insn_advance_column (ps, ps_i)) + if (! ps_insn_advance_column (ps, ps_i, must_follow)) break; has_conflicts = ps_has_conflicts (ps, c, c) || (ps->history > 0 diff --git a/gcc/rtl.h b/gcc/rtl.h index 81f0fa1e5db..8174b6e3c4a 100644 --- a/gcc/rtl.h +++ b/gcc/rtl.h @@ -983,6 +983,12 @@ enum insn_note /* Generated at the start of a duplicated exit test. */ NOTE_INSN_LOOP_VTOP, + /* Mark that a block shouldn't be scheduled. This is currently + used in modulo scheduling. Modulo scheduling adds this note + to the blocks of the modulo-scheduled loops to disable scheduling + them in the later traditional scheduling passes. */ + NOTE_DISABLE_SCHED_OF_BLOCK, + /* This kind of note is generated at the end of the function body, just before the return insn or return label. In an optimizing compilation it is deleted by the first jump optimization, after diff --git a/gcc/sched-rgn.c b/gcc/sched-rgn.c index 325e1693cd4..62d2f2343b7 100644 --- a/gcc/sched-rgn.c +++ b/gcc/sched-rgn.c @@ -117,6 +117,7 @@ static int *out_edges; static int is_cfg_nonregular (void); static int build_control_flow (struct edge_list *); static void new_edge (int, int); +static bool sched_is_disabled_for_current_region_p (void); /* A region is the main entity for interblock scheduling: insns are allowed to move between blocks in the same region, along @@ -2332,6 +2333,37 @@ debug_dependencies (void) fprintf (sched_dump, "\n"); } +/* Returns true if all the basic blocks of the current region have + NOTE_DISABLE_SCHED_OF_BLOCK which means not to schedule that region. */ +static bool +sched_is_disabled_for_current_region_p (void) +{ + rtx first_bb_insn, last_bb_insn, insn; + int bb; + + for (bb = 0; bb < current_nr_blocks; bb++) + { + bool disable_sched = false; + /* Searching for NOTE_DISABLE_SCHED_OF_BLOCK note between the + start and end of the basic block. */ + get_block_head_tail (BB_TO_BLOCK (bb), &first_bb_insn, + &last_bb_insn); + for (insn = last_bb_insn; insn != NULL && insn != first_bb_insn; + insn = PREV_INSN (insn)) + if (GET_CODE (insn) == NOTE + && (NOTE_LINE_NUMBER (insn) + == NOTE_DISABLE_SCHED_OF_BLOCK)) + { + disable_sched = true; + break; + } + if (! disable_sched) + return false; + } + + return true; +} + /* Schedule a region. A region is either an inner loop, a loop-free subroutine, or a single basic block. Each bb in the region is scheduled after its flow predecessors. */ @@ -2347,6 +2379,11 @@ schedule_region (int rgn) current_nr_blocks = RGN_NR_BLOCKS (rgn); current_blocks = RGN_BLOCKS (rgn); + /* Don't schedule region that is marked by + NOTE_DISABLE_SCHED_OF_BLOCK. */ + if (sched_is_disabled_for_current_region_p ()) + return; + init_deps_global (); /* Initializations for region data dependence analysis. */ -- 2.30.2