From f67d92e937b860e1f8d832c8de8249dbc7d657c3 Mon Sep 17 00:00:00 2001 From: Daniel Berlin Date: Thu, 16 Sep 2004 16:16:14 +0000 Subject: [PATCH] [multiple changes] 2004-09-16 Daniel Berlin * cfgloop.h (duplicate_loop): Add prototype. * cfgloopmanip.c (duplicate_loop): Make non-static. * lambda-code.c (perfect_nestify): Factor out test whether we can handle this loop into separate function. Call it. (can_convert_to_perfect_nest): New function. (replace_uses_of_x_with_y): Add modify_stmt call. * tree-loop-linear.c (linear_transform_loops): Call rewrite_into_loop_closed_ssa and free_df. 2004-09-16 Daniel Berlin * lambda-code.c (invariant_in_loop): is_gimple_min_invariant is loop invariant as well. (perfect_nestify): new function. (gcc_loop_to_lambda_loop): New parameters to track lower bounds, upper bounds, and steps. Set outerinductionvar properly. (gcc_loopnest_to_lambda_loopnest): Add loops and need_perfect parameters. Return NULL if we need a perfect loop and can't make one. (lambda_loopnest_to_gcc_loopnest): Correct algorithm. (not_interesting_stmt): New function. (phi_loop_edge_uses_def): Ditto. (stmt_uses_phi_result): Ditto. (stmt_is_bumper_for_loop): Ditto. (perfect_nest_p): Ditto. (nestify_update_pending_stmts): Ditto. (replace_uses_of_x_with_y): Ditto. (stmt_uses_op): Ditto. (perfect_nestify): Ditto. * lambda-mat.c (lambda_matrix_id_p): New function. * lambda-trans.c (lambda_trans_matrix_id_p): Ditto. * lambda.h: Update prototypes. * tree-loop-linear (linear_transform_loop): Use new perfect_nest_p. Detect and ignore identity transform. * tree-ssa-loop.c (pass_linear_transform): Use TODO_write_loop_closed. 2004-09-16 Sebastian Pop * tree-loop-linear.c (gather_interchange_stats): Add more comments. Gather also strides of accessed data. Pass in the data references array. (try_interchange_loops): Add a new heuristic for handling the temporal locality. Pass in the data references array. (linear_transform_loops): Pass the data references array to try_interchange_loops. From-SVN: r87607 --- gcc/ChangeLog | 50 ++++ gcc/cfgloop.h | 2 + gcc/cfgloopmanip.c | 4 +- gcc/lambda-code.c | 653 +++++++++++++++++++++++++++++++++++------ gcc/lambda-mat.c | 23 ++ gcc/lambda-trans.c | 13 +- gcc/lambda.h | 14 +- gcc/tree-loop-linear.c | 171 ++++++++--- 8 files changed, 786 insertions(+), 144 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 010eea9c5d3..265495f32c5 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,53 @@ +2004-09-16 Daniel Berlin + + * cfgloop.h (duplicate_loop): Add prototype. + * cfgloopmanip.c (duplicate_loop): Make non-static. + * lambda-code.c (perfect_nestify): Factor out test whether + we can handle this loop into separate function. + Call it. + (can_convert_to_perfect_nest): New function. + (replace_uses_of_x_with_y): Add modify_stmt call. + * tree-loop-linear.c (linear_transform_loops): Call + rewrite_into_loop_closed_ssa and free_df. + +2004-09-16 Daniel Berlin + + * lambda-code.c (invariant_in_loop): is_gimple_min_invariant is + loop invariant as well. + (perfect_nestify): new function. + (gcc_loop_to_lambda_loop): New parameters to track lower bounds, + upper bounds, and steps. + Set outerinductionvar properly. + (gcc_loopnest_to_lambda_loopnest): Add loops and need_perfect + parameters. + Return NULL if we need a perfect loop and can't make one. + (lambda_loopnest_to_gcc_loopnest): Correct algorithm. + (not_interesting_stmt): New function. + (phi_loop_edge_uses_def): Ditto. + (stmt_uses_phi_result): Ditto. + (stmt_is_bumper_for_loop): Ditto. + (perfect_nest_p): Ditto. + (nestify_update_pending_stmts): Ditto. + (replace_uses_of_x_with_y): Ditto. + (stmt_uses_op): Ditto. + (perfect_nestify): Ditto. + * lambda-mat.c (lambda_matrix_id_p): New function. + * lambda-trans.c (lambda_trans_matrix_id_p): Ditto. + * lambda.h: Update prototypes. + * tree-loop-linear (linear_transform_loop): Use new + perfect_nest_p. Detect and ignore identity transform. + * tree-ssa-loop.c (pass_linear_transform): Use TODO_write_loop_closed. + +2004-09-16 Sebastian Pop + + * tree-loop-linear.c (gather_interchange_stats): Add more comments. + Gather also strides of accessed data. Pass in the data references + array. + (try_interchange_loops): Add a new heuristic for handling the temporal + locality. Pass in the data references array. + (linear_transform_loops): Pass the data references array to + try_interchange_loops. + 2004-09-16 Kazu Hirata * doc/invoke.texi: Fix typos. Follow spelling conventions. diff --git a/gcc/cfgloop.h b/gcc/cfgloop.h index 210d7b5d62e..cfa8e100078 100644 --- a/gcc/cfgloop.h +++ b/gcc/cfgloop.h @@ -308,6 +308,8 @@ extern bool can_duplicate_loop_p (struct loop *loop); #define DLTHE_FLAG_UPDATE_FREQ 1 /* Update frequencies in duplicate_loop_to_header_edge. */ +extern struct loop * duplicate_loop (struct loops *, struct loop *, + struct loop *); extern int duplicate_loop_to_header_edge (struct loop *, edge, struct loops *, unsigned, sbitmap, edge, edge *, unsigned *, int); diff --git a/gcc/cfgloopmanip.c b/gcc/cfgloopmanip.c index e20433bcb4b..616909700db 100644 --- a/gcc/cfgloopmanip.c +++ b/gcc/cfgloopmanip.c @@ -29,8 +29,6 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA #include "cfglayout.h" #include "output.h" -static struct loop * duplicate_loop (struct loops *, struct loop *, - struct loop *); static void duplicate_subloops (struct loops *, struct loop *, struct loop *); static void copy_loops_to (struct loops *, struct loop **, int, struct loop *); @@ -701,7 +699,7 @@ place_new_loop (struct loops *loops, struct loop *loop) /* Copies copy of LOOP as subloop of TARGET loop, placing newly created loop into LOOPS structure. */ -static struct loop * +struct loop * duplicate_loop (struct loops *loops, struct loop *loop, struct loop *target) { struct loop *cloop; diff --git a/gcc/lambda-code.c b/gcc/lambda-code.c index 21bea190574..f438fb6e27c 100644 --- a/gcc/lambda-code.c +++ b/gcc/lambda-code.c @@ -115,6 +115,13 @@ Fourier-Motzkin elimination is used to compute the bounds of the base space of the lattice. */ + + +DEF_VEC_GC_P(int); + +static bool perfect_nestify (struct loops *, + struct loop *, VEC (tree) *, + VEC (tree) *, VEC (int) *, VEC (tree) *); /* Lattice stuff that is internal to the code generation algorithm. */ typedef struct @@ -1160,20 +1167,23 @@ gcc_tree_to_linear_expression (int depth, tree expr, static bool invariant_in_loop (struct loop *loop, tree op) { + if (is_gimple_min_invariant (op)) + return true; if (loop->depth == 0) return true; if (TREE_CODE (op) == SSA_NAME) { + tree def; + def = SSA_NAME_DEF_STMT (op); if (TREE_CODE (SSA_NAME_VAR (op)) == PARM_DECL - && IS_EMPTY_STMT (SSA_NAME_DEF_STMT (op))) + && IS_EMPTY_STMT (def)) return true; - if (IS_EMPTY_STMT (SSA_NAME_DEF_STMT (op))) + if (IS_EMPTY_STMT (def)) return false; - if (loop->outer) - if (!invariant_in_loop (loop->outer, op)) + if (loop->outer + && !invariant_in_loop (loop->outer, op)) return false; - return !flow_bb_inside_loop_p (loop, - bb_for_stmt (SSA_NAME_DEF_STMT (op))); + return !flow_bb_inside_loop_p (loop, bb_for_stmt (def)); } return false; } @@ -1190,7 +1200,10 @@ static lambda_loop gcc_loop_to_lambda_loop (struct loop *loop, int depth, VEC (tree) ** invariants, tree * ourinductionvar, - VEC (tree) * outerinductionvars) + VEC (tree) * outerinductionvars, + VEC (tree) ** lboundvars, + VEC (tree) ** uboundvars, + VEC (int) ** steps) { tree phi; tree exit_cond; @@ -1201,15 +1214,11 @@ gcc_loop_to_lambda_loop (struct loop *loop, int depth, tree test; int stepint; int extra = 0; - tree uboundvar; + tree lboundvar, uboundvar; use_optype uses; - /* Find out induction var and set the pointer so that the caller can - append it to the outerinductionvars array later. */ - + /* Find out induction var and exit condition. */ inductionvar = find_induction_var_from_exit_cond (loop); - *ourinductionvar = inductionvar; - exit_cond = get_loop_exit_condition (loop); if (inductionvar == NULL || exit_cond == NULL) @@ -1260,7 +1269,9 @@ gcc_loop_to_lambda_loop (struct loop *loop, int depth, } } - + /* The induction variable name/version we want to put in the array is the + result of the induction variable phi node. */ + *ourinductionvar = PHI_RESULT (phi); access_fn = instantiate_parameters (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi))); if (!access_fn) @@ -1316,14 +1327,20 @@ gcc_loop_to_lambda_loop (struct loop *loop, int depth, } if (flow_bb_inside_loop_p (loop, PHI_ARG_EDGE (phi, 0)->src)) - - lbound = gcc_tree_to_linear_expression (depth, PHI_ARG_DEF (phi, 1), - outerinductionvars, *invariants, - 0); + { + lboundvar = PHI_ARG_DEF (phi, 1); + lbound = gcc_tree_to_linear_expression (depth, lboundvar, + outerinductionvars, *invariants, + 0); + } else - lbound = gcc_tree_to_linear_expression (depth, PHI_ARG_DEF (phi, 0), - outerinductionvars, *invariants, - 0); + { + lboundvar = PHI_ARG_DEF (phi, 0); + lbound = gcc_tree_to_linear_expression (depth, lboundvar, + outerinductionvars, *invariants, + 0); + } + if (!lbound) { @@ -1368,6 +1385,11 @@ gcc_loop_to_lambda_loop (struct loop *loop, int depth, uboundvar, outerinductionvars, *invariants, extra); + VEC_safe_push (tree, *uboundvars, build (PLUS_EXPR, integer_type_node, + uboundvar, + build_int_cst (integer_type_node, extra))); + VEC_safe_push (tree, *lboundvars, lboundvar); + VEC_safe_push (int, *steps, stepint); if (!ubound) { @@ -1400,7 +1422,7 @@ find_induction_var_from_exit_cond (struct loop *loop) test = TREE_OPERAND (expr, 0); if (TREE_CODE_CLASS (TREE_CODE (test)) != '<') return NULL_TREE; - /* This is a guess. We say that for a <,!=,<= b, a is the induction + /* This is a guess. We say that for a <,!=,<= b, a is the induction variable. For >, >=, we guess b is the induction variable. If we are wrong, it'll fail the rest of the induction variable tests, and @@ -1433,15 +1455,20 @@ DEF_VEC_GC_P(lambda_loop); during this process. */ lambda_loopnest -gcc_loopnest_to_lambda_loopnest (struct loop * loop_nest, +gcc_loopnest_to_lambda_loopnest (struct loops *currloops, + struct loop * loop_nest, VEC (tree) **inductionvars, - VEC (tree) **invariants) + VEC (tree) **invariants, + bool need_perfect_nest) { lambda_loopnest ret; struct loop *temp; int depth = 0; size_t i; VEC (lambda_loop) *loops; + VEC (tree) *uboundvars; + VEC (tree) *lboundvars; + VEC (int) *steps; lambda_loop newloop; tree inductionvar = NULL; @@ -1454,18 +1481,30 @@ gcc_loopnest_to_lambda_loopnest (struct loop * loop_nest, loops = VEC_alloc (lambda_loop, 1); *inductionvars = VEC_alloc (tree, 1); *invariants = VEC_alloc (tree, 1); + lboundvars = VEC_alloc (tree, 1); + uboundvars = VEC_alloc (tree, 1); + steps = VEC_alloc (int, 1); temp = loop_nest; while (temp) { newloop = gcc_loop_to_lambda_loop (temp, depth, invariants, - &inductionvar, *inductionvars); + &inductionvar, *inductionvars, + &lboundvars, &uboundvars, + &steps); if (!newloop) return NULL; VEC_safe_push (tree, *inductionvars, inductionvar); VEC_safe_push (lambda_loop, loops, newloop); temp = temp->inner; } - + if (need_perfect_nest + && !perfect_nestify (currloops, loop_nest, + lboundvars, uboundvars, steps, *inductionvars)) + { + if (dump_file) + fprintf (dump_file, "Not a perfect nest and couldn't convert to one.\n"); + return NULL; + } ret = lambda_loopnest_new (depth, 2 * depth); for (i = 0; VEC_iterate (lambda_loop, loops, i, newloop); i++) LN_LOOPS (ret)[i] = newloop; @@ -1489,7 +1528,7 @@ lbv_to_gcc_expression (lambda_body_vector lbv, /* Create a statement list and a linear expression temporary. */ stmts = alloc_stmt_list (); - resvar = create_tmp_var (integer_type_node, "lletmp"); + resvar = create_tmp_var (integer_type_node, "lbvtmp"); add_referenced_tmp_var (resvar); /* Start at 0. */ @@ -1767,7 +1806,6 @@ lambda_loopnest_to_gcc_loopnest (struct loop *old_loopnest, size_t depth = 0; VEC(tree) *new_ivs; block_stmt_iterator bsi; - basic_block *bbs; if (dump_file) { @@ -1849,66 +1887,56 @@ lambda_loopnest_to_gcc_loopnest (struct loop *old_loopnest, i++; temp = temp->inner; } - - /* Go through the loop and make iv replacements. */ - bbs = get_loop_body (old_loopnest); - for (i = 0; i < old_loopnest->num_nodes; i++) - for (bsi = bsi_start (bbs[i]); !bsi_end_p (bsi); bsi_next (&bsi)) - { - tree stmt = bsi_stmt (bsi); - use_optype uses; - size_t j; - - get_stmt_operands (stmt); - uses = STMT_USE_OPS (stmt); - for (j = 0; j < NUM_USES (uses); j++) - { - size_t k; - use_operand_p use = USE_OP_PTR (uses, j); - for (k = 0; k < VEC_length (tree, old_ivs); k++) - { - tree oldiv = VEC_index (tree, old_ivs, k); - if (USE_FROM_PTR (use) == oldiv) - { - tree newiv, stmts; - lambda_body_vector lbv; - - /* Compute the new expression for the induction - variable. */ - depth = VEC_length (tree, new_ivs); - lbv = lambda_body_vector_new (depth); - LBV_COEFFICIENTS (lbv)[k] = 1; - lbv = lambda_body_vector_compute_new (transform, lbv); - newiv = lbv_to_gcc_expression (lbv, new_ivs, &stmts); - - /* Insert the statements to build that - expression. */ - bsi_insert_before (&bsi, stmts, BSI_SAME_STMT); - - /* Replace the use with the result of that - expression. */ - if (dump_file) - { - fprintf (dump_file, - "Replacing induction variable use of "); - print_generic_stmt (dump_file, USE_FROM_PTR (use), 0); - fprintf (dump_file, " with "); - print_generic_stmt (dump_file, newiv, 0); - fprintf (dump_file, "\n"); - } - SET_USE (use, newiv); - } - } - - } - } + + /* Rewrite uses of the old ivs so that they are now specified in terms of + the new ivs. */ + temp = old_loopnest; + for (i = 0; i < VEC_length (tree, old_ivs); i++) + { + int j; + tree oldiv = VEC_index (tree, old_ivs, i); + dataflow_t imm = get_immediate_uses (SSA_NAME_DEF_STMT (oldiv)); + for (j = 0; j < num_immediate_uses (imm); j++) + { + size_t k; + tree stmt = immediate_use (imm, j); + use_optype uses; + get_stmt_operands (stmt); + uses = STMT_USE_OPS (stmt); + for (k = 0; k < NUM_USES (uses); k++) + { + use_operand_p use = USE_OP_PTR (uses, k); + if (USE_FROM_PTR (use) == oldiv) + { + tree newiv, stmts; + lambda_body_vector lbv; + /* Compute the new expression for the induction + variable. */ + depth = VEC_length (tree, new_ivs); + lbv = lambda_body_vector_new (depth); + LBV_COEFFICIENTS (lbv)[i] = 1; + lbv = lambda_body_vector_compute_new (transform, lbv); + newiv = lbv_to_gcc_expression (lbv, new_ivs, &stmts); + bsi = stmt_for_bsi (stmt); + /* Insert the statements to build that + expression. */ + bsi_insert_before (&bsi, stmts, BSI_SAME_STMT); + SET_USE (use, newiv); + modify_stmt (stmt); + + } + } + } + } } + /* Returns true when the vector V is lexicographically positive, in other words, when the first non zero element is positive. */ static bool -lambda_vector_lexico_pos (lambda_vector v, unsigned n) +lambda_vector_lexico_pos (lambda_vector v, + unsigned n) { unsigned i; for (i = 0; i < n; i++) @@ -1923,6 +1951,442 @@ lambda_vector_lexico_pos (lambda_vector v, unsigned n) return true; } + +/* Return TRUE if this is not interesting statement from the perspective of + determining if we have a perfect loop nest. */ + +static bool +not_interesting_stmt (tree stmt) +{ + /* Note that COND_EXPR's aren't interesting because if they were exiting the + loop, we would have already failed the number of exits tests. */ + if (TREE_CODE (stmt) == LABEL_EXPR + || TREE_CODE (stmt) == GOTO_EXPR + || TREE_CODE (stmt) == COND_EXPR) + return true; + return false; +} + +/* Return TRUE if PHI uses DEF for it's in-the-loop edge for LOOP. */ + +static bool +phi_loop_edge_uses_def (struct loop *loop, tree phi, tree def) +{ + int i; + for (i = 0; i < PHI_NUM_ARGS (phi); i++) + if (flow_bb_inside_loop_p (loop, PHI_ARG_EDGE (phi, i)->src)) + if (PHI_ARG_DEF (phi, i) == def) + return true; + return false; +} + +/* Return TRUE if STMT is a use of PHI_RESULT. */ + +static bool +stmt_uses_phi_result (tree stmt, tree phi_result) +{ + use_optype uses = STMT_USE_OPS (stmt); + + /* This is conservatively true, because we only want SIMPLE bumpers + of the form x +- constant for our pass. */ + if (NUM_USES (uses) != 1) + return false; + if (USE_OP (uses, 0) == phi_result) + return true; + + return false; +} + +/* STMT is a bumper stmt for LOOP if the version it defines is used in the + in-loop-edge in a phi node, and the operand it uses is the result of that + phi node. + I.E. i_29 = i_3 + 1 + i_3 = PHI (0, i_29); */ + +static bool +stmt_is_bumper_for_loop (struct loop *loop, tree stmt) +{ + tree use; + tree def; + def_optype defs = STMT_DEF_OPS (stmt); + dataflow_t imm; + int i; + + if (NUM_DEFS (defs) != 1) + return false; + def = DEF_OP (defs, 0); + imm = get_immediate_uses (stmt); + for (i = 0; i < num_immediate_uses (imm); i++) + { + use = immediate_use (imm, i); + if (TREE_CODE (use) == PHI_NODE) + { + if (phi_loop_edge_uses_def (loop, use, def)) + if (stmt_uses_phi_result (stmt, PHI_RESULT (use))) + return true; + } + } + return false; +} +/* Return true if LOOP is a perfect loop nest. + Perfect loop nests are those loop nests where all code occurs in the + innermost loop body. + If S is a program statement, then + + ie + DO I = 1, 20 + S1 + DO J = 1, 20 + ... + END DO + END DO + is not a perfect loop nest because of S1. + + DO I = 1, 20 + DO J = 1, 20 + S1 + ... + END DO + END DO + is a perfect loop nest. + + Since we don't have high level loops anymore, we basically have to walk our + statements and ignore those that are there because the loop needs them (IE + the induction variable increment, and jump back to the top of the loop). */ + +bool +perfect_nest_p (struct loop *loop) +{ + basic_block *bbs; + size_t i; + tree exit_cond; + + if (!loop->inner) + return true; + bbs = get_loop_body (loop); + exit_cond = get_loop_exit_condition (loop); + for (i = 0; i < loop->num_nodes; i++) + { + if (bbs[i]->loop_father == loop) + { + block_stmt_iterator bsi; + for (bsi = bsi_start (bbs[i]); !bsi_end_p (bsi); bsi_next (&bsi)) + { + tree stmt = bsi_stmt (bsi); + if (stmt == exit_cond + || not_interesting_stmt (stmt) + || stmt_is_bumper_for_loop (loop, stmt)) + continue; + free (bbs); + return false; + } + } + } + free (bbs); + /* See if the inner loops are perfectly nested as well. */ + if (loop->inner) + return perfect_nest_p (loop->inner); + return true; +} + + +/* Add phi args using PENDINT_STMT list. */ + +static void +nestify_update_pending_stmts (edge e) +{ + basic_block dest; + tree phi, arg, def; + + if (!PENDING_STMT (e)) + return; + + dest = e->dest; + + for (phi = phi_nodes (dest), arg = PENDING_STMT (e); + phi; + phi = TREE_CHAIN (phi), arg = TREE_CHAIN (arg)) + { + def = TREE_VALUE (arg); + add_phi_arg (&phi, def, e); + } + + PENDING_STMT (e) = NULL; +} + +/* Replace the USES of tree X in STMT with tree Y */ + +static void +replace_uses_of_x_with_y (tree stmt, tree x, tree y) +{ + use_optype uses = STMT_USE_OPS (stmt); + size_t i; + for (i = 0; i < NUM_USES (uses); i++) + { + if (USE_OP (uses, i) == x) + SET_USE_OP (uses, i, y); + } +} + +/* Return TRUE if STMT uses tree OP in it's uses. */ + +static bool +stmt_uses_op (tree stmt, tree op) +{ + use_optype uses = STMT_USE_OPS (stmt); + size_t i; + for (i = 0; i < NUM_USES (uses); i++) + { + if (USE_OP (uses, i) == op) + return true; + } + return false; +} + +/* Return TRUE if LOOP is an imperfect nest that we can convert to a perfect + one. LOOPIVS is a vector of induction variables, one per loop. + ATM, we only handle imperfect nests of depth 2, where all of the statements + occur after the inner loop. */ + +static bool +can_convert_to_perfect_nest (struct loop *loop, + VEC (tree) *loopivs) +{ + basic_block *bbs; + tree exit_condition; + size_t i; + block_stmt_iterator bsi; + + /* Can't handle triply nested+ loops yet. */ + if (!loop->inner || loop->inner->inner) + return false; + + /* We only handle moving the after-inner-body statements right now, so make + sure all the statements we need to move are located in that position. */ + bbs = get_loop_body (loop); + exit_condition = get_loop_exit_condition (loop); + for (i = 0; i < loop->num_nodes; i++) + { + if (bbs[i]->loop_father == loop) + { + for (bsi = bsi_start (bbs[i]); !bsi_end_p (bsi); bsi_next (&bsi)) + { + size_t j; + tree stmt = bsi_stmt (bsi); + if (stmt == exit_condition + || not_interesting_stmt (stmt) + || stmt_is_bumper_for_loop (loop, stmt)) + continue; + /* If the statement uses inner loop ivs, we == screwed. */ + for (j = 1; j < VEC_length (tree, loopivs); j++) + if (stmt_uses_op (stmt, VEC_index (tree, loopivs, j))) + { + free (bbs); + return false; + } + + /* If the bb of a statement we care about isn't dominated by + the header of the inner loop, then we are also screwed. */ + if (!dominated_by_p (CDI_DOMINATORS, + bb_for_stmt (stmt), + loop->inner->header)) + { + free (bbs); + return false; + } + } + } + } + return true; +} + +/* Transform the loop nest into a perfect nest, if possible. + LOOPS is the current struct loops * + LOOP is the loop nest to transform into a perfect nest + LBOUNDS are the lower bounds for the loops to transform + UBOUNDS are the upper bounds for the loops to transform + STEPS is the STEPS for the loops to transform. + LOOPIVS is the induction variables for the loops to transform. + + Basically, for the case of + + FOR (i = 0; i < 50; i++) + { + FOR (j =0; j < 50; j++) + { + + } + + } + + This function will transform it into a perfect loop nest by splitting the + outer loop into two loops, like so: + + FOR (i = 0; i < 50; i++) + { + FOR (j = 0; j < 50; j++) + { + + } + } + + FOR (i = 0; i < 50; i ++) + { + + } + + Return FALSE if we can't make this loop into a perfect nest. */ +static bool +perfect_nestify (struct loops *loops, + struct loop *loop, + VEC (tree) *lbounds, + VEC (tree) *ubounds, + VEC (int) *steps, + VEC (tree) *loopivs) +{ + basic_block *bbs; + tree exit_condition; + tree then_label, else_label, cond_stmt; + basic_block preheaderbb, headerbb, bodybb, latchbb, olddest; + size_t i; + block_stmt_iterator bsi; + edge e; + struct loop *newloop; + tree phi; + tree uboundvar; + tree stmt; + tree ivvar, ivvarinced; + VEC (tree) *phis; + + if (!can_convert_to_perfect_nest (loop, loopivs)) + return false; + + phis = VEC_alloc (tree, 1); + + /* Create the new loop */ + + olddest = loop->single_exit->dest; + preheaderbb = loop_split_edge_with (loop->single_exit, NULL); + headerbb = create_empty_bb (EXIT_BLOCK_PTR->prev_bb); + + /* This is done because otherwise, it will release the ssa_name too early + when the edge gets redirected and it will get reused, causing the use of + the phi node to get rewritten. */ + + for (phi = phi_nodes (olddest); phi; phi = PHI_CHAIN (phi)) + { + /* These should be simple exit phi copies. */ + if (PHI_NUM_ARGS (phi) != 1) + return false; + VEC_safe_push (tree, phis, PHI_RESULT (phi)); + VEC_safe_push (tree, phis, PHI_ARG_DEF (phi, 0)); + mark_for_rewrite (PHI_RESULT (phi)); + } + e = redirect_edge_and_branch (preheaderbb->succ, headerbb); + unmark_all_for_rewrite (); + bb_ann (olddest)->phi_nodes = NULL; + /* Add back the old exit phis. */ + while (VEC_length (tree, phis) != 0) + { + tree def; + tree phiname; + def = VEC_pop (tree, phis); + phiname = VEC_pop (tree, phis); + + phi = create_phi_node (phiname, preheaderbb); + add_phi_arg (&phi, def, preheaderbb->pred); + } + + nestify_update_pending_stmts (e); + bodybb = create_empty_bb (EXIT_BLOCK_PTR->prev_bb); + latchbb = create_empty_bb (EXIT_BLOCK_PTR->prev_bb); + make_edge (headerbb, bodybb, EDGE_FALLTHRU); + then_label = build1 (GOTO_EXPR, void_type_node, tree_block_label (latchbb)); + else_label = build1 (GOTO_EXPR, void_type_node, tree_block_label (olddest)); + cond_stmt = build (COND_EXPR, void_type_node, + build (NE_EXPR, boolean_type_node, + integer_one_node, + integer_zero_node), + then_label, else_label); + bsi = bsi_start (bodybb); + bsi_insert_after (&bsi, cond_stmt, BSI_NEW_STMT); + e = make_edge (bodybb, olddest, EDGE_FALSE_VALUE); + make_edge (bodybb, latchbb, EDGE_TRUE_VALUE); + make_edge (latchbb, headerbb, EDGE_FALLTHRU); + + /* Update the loop structures. */ + newloop = duplicate_loop (loops, loop, olddest->loop_father); + newloop->header = headerbb; + newloop->latch = latchbb; + newloop->single_exit = e; + add_bb_to_loop (latchbb, newloop); + add_bb_to_loop (bodybb, newloop); + add_bb_to_loop (headerbb, newloop); + add_bb_to_loop (preheaderbb, olddest->loop_father); + set_immediate_dominator (CDI_DOMINATORS, bodybb, headerbb); + set_immediate_dominator (CDI_DOMINATORS, headerbb, preheaderbb); + set_immediate_dominator (CDI_DOMINATORS, preheaderbb, + loop->single_exit->src); + set_immediate_dominator (CDI_DOMINATORS, latchbb, bodybb); + set_immediate_dominator (CDI_DOMINATORS, olddest, bodybb); + /* Create the new iv. */ + ivvar = create_tmp_var (integer_type_node, "perfectiv"); + add_referenced_tmp_var (ivvar); + bsi = bsi_last (newloop->latch->pred->src); + create_iv (VEC_index (tree, lbounds, 0), + build_int_cst (integer_type_node, + VEC_index (int, steps, 0)), + ivvar, newloop, &bsi, false, &ivvar, &ivvarinced); + + /* Create the new upper bound. This may be not just a variable, so we copy + it to one just in case. */ + + exit_condition = get_loop_exit_condition (newloop); + uboundvar = create_tmp_var (integer_type_node, "uboundvar"); + add_referenced_tmp_var (uboundvar); + stmt = build (MODIFY_EXPR, void_type_node, uboundvar, + VEC_index (tree, ubounds, 0)); + uboundvar = make_ssa_name (uboundvar, stmt); + TREE_OPERAND (stmt, 0) = uboundvar; + bsi_insert_before (&bsi, stmt, BSI_SAME_STMT); + COND_EXPR_COND (exit_condition) = build (LE_EXPR, + boolean_type_node, + ivvarinced, + uboundvar); + + bbs = get_loop_body (loop); + /* Now replace the induction variable in the moved statements with the + correct loop induction variable. */ + for (i = 0; i < loop->num_nodes; i++) + { + block_stmt_iterator tobsi = bsi_last (bodybb); + if (bbs[i]->loop_father == loop) + { + /* Note that the bsi only needs to be explicitly incremented + when we don't move something, since it is automatically + incremented when we do. */ + for (bsi = bsi_start (bbs[i]); !bsi_end_p (bsi);) + { + tree stmt = bsi_stmt (bsi); + if (stmt == exit_condition + || not_interesting_stmt (stmt) + || stmt_is_bumper_for_loop (loop, stmt)) + { + bsi_next (&bsi); + continue; + } + replace_uses_of_x_with_y (stmt, + VEC_index (tree, loopivs, 0), + ivvar); + bsi_move_before (&bsi, &tobsi); + } + } + } + free (bbs); + flow_loops_find (loops, LOOP_ALL); + return perfect_nest_p (loop); +} + /* Return true if TRANS is a legal transformation matrix that respects the dependence vectors in DISTS and DIRS. The conservative answer is false. @@ -1931,27 +2395,29 @@ lambda_vector_lexico_pos (lambda_vector v, unsigned n) matrix T is legal when applied to a loop nest with a set of lexicographically non-negative distance vectors RDG if and only if for each vector d in RDG, (T.d >= 0) is lexicographically positive. - i.e.: if and only if it transforms the lexicographically positive + ie.: if and only if it transforms the lexicographically positive distance vectors to lexicographically positive vectors. Note that a unimodular matrix must transform the zero vector (and only it) to the zero vector." S.Muchnick. */ bool -lambda_transform_legal_p (lambda_trans_matrix trans, - int nb_loops, varray_type dependence_relations) +lambda_transform_legal_p (lambda_trans_matrix trans, + int nb_loops, + varray_type dependence_relations) { unsigned int i; lambda_vector distres; struct data_dependence_relation *ddr; #if defined ENABLE_CHECKING - gcc_assert (LTM_COLSIZE (trans) == nb_loops - && LTM_ROWSIZE (trans) == nb_loops); + if (LTM_COLSIZE (trans) != nb_loops + || LTM_ROWSIZE (trans) != nb_loops) + abort (); #endif /* When there is an unknown relation in the dependence_relations, we know that it is no worth looking at this loop nest: give up. */ - ddr = (struct data_dependence_relation *) + ddr = (struct data_dependence_relation *) VARRAY_GENERIC_PTR (dependence_relations, 0); if (ddr == NULL) return true; @@ -1963,12 +2429,14 @@ lambda_transform_legal_p (lambda_trans_matrix trans, /* For each distance vector in the dependence graph. */ for (i = 0; i < VARRAY_ACTIVE_SIZE (dependence_relations); i++) { - ddr = (struct data_dependence_relation *) + ddr = (struct data_dependence_relation *) VARRAY_GENERIC_PTR (dependence_relations, i); + + /* Don't care about relations for which we know that there is no - dependence, nor about read-read (aka. output-dependences): - these data accesses can happen in any order. */ + dependence, nor about read-read (aka. output-dependences): + these data accesses can happen in any order. */ if (DDR_ARE_DEPENDENT (ddr) == chrec_known || (DR_IS_READ (DDR_A (ddr)) && DR_IS_READ (DDR_B (ddr)))) continue; @@ -1977,12 +2445,11 @@ lambda_transform_legal_p (lambda_trans_matrix trans, return false; /* Compute trans.dist_vect */ - lambda_matrix_vector_mult (LTM_MATRIX (trans), nb_loops, nb_loops, + lambda_matrix_vector_mult (LTM_MATRIX (trans), nb_loops, nb_loops, DDR_DIST_VECT (ddr), distres); if (!lambda_vector_lexico_pos (distres, nb_loops)) return false; } - return true; } diff --git a/gcc/lambda-mat.c b/gcc/lambda-mat.c index 16fd65eb4d7..8f4cbd0e485 100644 --- a/gcc/lambda-mat.c +++ b/gcc/lambda-mat.c @@ -70,6 +70,29 @@ lambda_matrix_id (lambda_matrix mat, int size) mat[i][j] = (i == j) ? 1 : 0; } +/* Return true if MAT is the identity matrix of SIZE */ + +bool +lambda_matrix_id_p (lambda_matrix mat, int size) +{ + int i, j; + for (i = 0; i < size; i++) + for (j = 0; j < size; j++) + { + if (i == j) + { + if (mat[i][j] != 1) + return false; + } + else + { + if (mat[i][j] != 0) + return false; + } + } + return true; +} + /* Negate the elements of the M x N matrix MAT1 and store it in MAT2. */ void diff --git a/gcc/lambda-trans.c b/gcc/lambda-trans.c index 5195bb61c8f..2179c7f113e 100644 --- a/gcc/lambda-trans.c +++ b/gcc/lambda-trans.c @@ -45,7 +45,18 @@ lambda_trans_matrix_new (int colsize, int rowsize) return ret; } -/* Compute the inverse of the transformation. */ +/* Return true if MAT is an identity matrix. */ + +bool +lambda_trans_matrix_id_p (lambda_trans_matrix mat) +{ + if (LTM_ROWSIZE (mat) != LTM_COLSIZE (mat)) + return false; + return lambda_matrix_id_p (LTM_MATRIX (mat), LTM_ROWSIZE (mat)); +} + + +/* Compute the inverse of the transformation matrix MAT. */ lambda_trans_matrix lambda_trans_matrix_inverse (lambda_trans_matrix mat) diff --git a/gcc/lambda.h b/gcc/lambda.h index 36a9664e8c9..b7360241acc 100644 --- a/gcc/lambda.h +++ b/gcc/lambda.h @@ -105,7 +105,9 @@ typedef struct lambda_loopnest lambda_loopnest_new (int, int); lambda_loopnest lambda_loopnest_transform (lambda_loopnest, lambda_trans_matrix); - +struct loop; +struct loops; +bool perfect_nest_p (struct loop *); bool lambda_transform_legal_p (lambda_trans_matrix, int, varray_type); void print_lambda_loopnest (FILE *, lambda_loopnest, char); @@ -116,6 +118,7 @@ void print_lambda_loop (FILE *, lambda_loop, int, int, char); lambda_matrix lambda_matrix_new (int, int); void lambda_matrix_id (lambda_matrix, int); +bool lambda_matrix_id_p (lambda_matrix, int); void lambda_matrix_copy (lambda_matrix, lambda_matrix, int, int); void lambda_matrix_negate (lambda_matrix, lambda_matrix, int, int); void lambda_matrix_transpose (lambda_matrix, lambda_matrix, int, int); @@ -153,16 +156,17 @@ lambda_trans_matrix lambda_trans_matrix_inverse (lambda_trans_matrix); void print_lambda_trans_matrix (FILE *, lambda_trans_matrix); void lambda_matrix_vector_mult (lambda_matrix, int, int, lambda_vector, lambda_vector); +bool lambda_trans_matrix_id_p (lambda_trans_matrix); lambda_body_vector lambda_body_vector_new (int); lambda_body_vector lambda_body_vector_compute_new (lambda_trans_matrix, lambda_body_vector); void print_lambda_body_vector (FILE *, lambda_body_vector); -struct loop; - -lambda_loopnest gcc_loopnest_to_lambda_loopnest (struct loop *, +lambda_loopnest gcc_loopnest_to_lambda_loopnest (struct loops *, + struct loop *, + VEC(tree) **, VEC(tree) **, - VEC(tree) **); + bool); void lambda_loopnest_to_gcc_loopnest (struct loop *, VEC(tree) *, VEC(tree) *, lambda_loopnest, diff --git a/gcc/tree-loop-linear.c b/gcc/tree-loop-linear.c index 856867e9b66..de16a1ecf92 100644 --- a/gcc/tree-loop-linear.c +++ b/gcc/tree-loop-linear.c @@ -55,22 +55,56 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA transform matrix for locality purposes. TODO: Completion of partial transforms. */ -/* Gather statistics for loop interchange. Initializes SUM the sum of - all the data dependence distances carried by loop LOOP_NUMBER. - NB_DEPS_NOT_CARRIED_BY_LOOP is initialized to the number of - dependence relations for which the loop LOOP_NUMBER is not carrying - any dependence. */ +/* Gather statistics for loop interchange. LOOP_NUMBER is a relative + index in the considered loop nest. The first loop in the + considered loop nest is FIRST_LOOP, and consequently the index of + the considered loop is obtained by FIRST_LOOP + LOOP_NUMBER. + + Initializes: + - DEPENDENCE_STEPS the sum of all the data dependence distances + carried by loop LOOP_NUMBER, + + - NB_DEPS_NOT_CARRIED_BY_LOOP the number of dependence relations + for which the loop LOOP_NUMBER is not carrying any dependence, + + - ACCESS_STRIDES the sum of all the strides in LOOP_NUMBER. + + Example: for the following loop, + + | loop_1 runs 1335 times + | loop_2 runs 1335 times + | A[{{0, +, 1}_1, +, 1335}_2] + | B[{{0, +, 1}_1, +, 1335}_2] + | endloop_2 + | A[{0, +, 1336}_1] + | endloop_1 + + gather_interchange_stats (in loop_1) will return + DEPENDENCE_STEPS = 3002 + NB_DEPS_NOT_CARRIED_BY_LOOP = 5 + ACCESS_STRIDES = 10694 + + gather_interchange_stats (in loop_2) will return + DEPENDENCE_STEPS = 3000 + NB_DEPS_NOT_CARRIED_BY_LOOP = 7 + ACCESS_STRIDES = 8010 + */ static void gather_interchange_stats (varray_type dependence_relations, + varray_type datarefs, unsigned int loop_number, - unsigned int *sum, - unsigned int *nb_deps_not_carried_by_loop) + unsigned int first_loop, + unsigned int *dependence_steps, + unsigned int *nb_deps_not_carried_by_loop, + unsigned int *access_strides) { unsigned int i; - *sum = 0; + *dependence_steps = 0; *nb_deps_not_carried_by_loop = 0; + *access_strides = 0; + for (i = 0; i < VARRAY_ACTIVE_SIZE (dependence_relations); i++) { int dist; @@ -78,11 +112,11 @@ gather_interchange_stats (varray_type dependence_relations, (struct data_dependence_relation *) VARRAY_GENERIC_PTR (dependence_relations, i); + /* Compute the dependence strides. */ + if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know) { - /* Some constants will need tweaking, but not something that should - be user-accessible. Thus, no --param. */ - *sum += 100; + (*dependence_steps) += 0; continue; } @@ -90,34 +124,64 @@ gather_interchange_stats (varray_type dependence_relations, is no reuse of the data. */ if (DDR_ARE_DEPENDENT (ddr) == chrec_known) { - /* Ditto on the no --param here */ - *sum += 1000; + (*dependence_steps) += 0; continue; } dist = DDR_DIST_VECT (ddr)[loop_number]; if (dist == 0) - *nb_deps_not_carried_by_loop++; + (*nb_deps_not_carried_by_loop) += 1; else if (dist < 0) - *sum += -dist; + (*dependence_steps) += -dist; else - *sum += dist; + (*dependence_steps) += dist; + } + + /* Compute the access strides. */ + for (i = 0; i < VARRAY_ACTIVE_SIZE (datarefs); i++) + { + unsigned int it; + struct data_reference *dr = VARRAY_GENERIC_PTR (datarefs, i); + tree stmt = DR_STMT (dr); + struct loop *stmt_loop = loop_containing_stmt (stmt); + struct loop *inner_loop = current_loops->parray[first_loop + 1]; + + if (!flow_loop_nested_p (inner_loop, stmt_loop) + && inner_loop->num != stmt_loop->num) + continue; + + for (it = 0; it < DR_NUM_DIMENSIONS (dr); it++) + { + tree chrec = DR_ACCESS_FN (dr, it); + tree tstride = evolution_part_in_loop_num + (chrec, first_loop + loop_number); + + if (tstride == NULL_TREE + || TREE_CODE (tstride) != INTEGER_CST) + continue; + + (*access_strides) += int_cst_value (tstride); + } } } /* Apply to TRANS any loop interchange that minimize inner loop steps. - DEPTH is the depth of the loop nest, and DEPENDENCE_RELATIONS is an array - of dependence relations. Returns the new transform matrix. The smaller the reuse vector - distances in the inner loops, the fewer the cache misses. */ + distances in the inner loops, the fewer the cache misses. + FIRST_LOOP is the loop->num of the first loop in the analyzed loop + nest. */ + static lambda_trans_matrix try_interchange_loops (lambda_trans_matrix trans, unsigned int depth, - varray_type dependence_relations) + varray_type dependence_relations, + varray_type datarefs, + unsigned int first_loop) { unsigned int loop_i, loop_j; - unsigned int steps_i, steps_j; + unsigned int dependence_steps_i, dependence_steps_j; + unsigned int access_strides_i, access_strides_j; unsigned int nb_deps_not_carried_by_i, nb_deps_not_carried_by_j; struct data_dependence_relation *ddr; @@ -132,33 +196,40 @@ try_interchange_loops (lambda_trans_matrix trans, for (loop_j = 1; loop_j < depth; loop_j++) for (loop_i = 0; loop_i < loop_j; loop_i++) { - gather_interchange_stats (dependence_relations, loop_i, &steps_i, - &nb_deps_not_carried_by_i); - gather_interchange_stats (dependence_relations, loop_j, &steps_j, - &nb_deps_not_carried_by_j); + gather_interchange_stats (dependence_relations, datarefs, + loop_i, first_loop, + &dependence_steps_i, + &nb_deps_not_carried_by_i, + &access_strides_i); + gather_interchange_stats (dependence_relations, datarefs, + loop_j, first_loop, + &dependence_steps_j, + &nb_deps_not_carried_by_j, + &access_strides_j); /* Heuristics for loop interchange profitability: - 1. Inner loops should have smallest steps. - 2. Inner loops should contain more dependence relations not - carried by the loop. + + 1. (spatial locality) Inner loops should have smallest + dependence steps. + + 2. (spatial locality) Inner loops should contain more + dependence relations not carried by the loop. + + 3. (temporal locality) Inner loops should have smallest + array access strides. */ - if (steps_i < steps_j - || nb_deps_not_carried_by_i > nb_deps_not_carried_by_j) + if (dependence_steps_i < dependence_steps_j + || nb_deps_not_carried_by_i > nb_deps_not_carried_by_j + || access_strides_i < access_strides_j) { lambda_matrix_row_exchange (LTM_MATRIX (trans), loop_i, loop_j); - /* Validate the resulting matrix. When the transformation - is not valid, reverse to the previous matrix. - - FIXME: In this case of transformation it could be - faster to verify the validity of the interchange - without applying the transform to the matrix. But for - the moment do it cleanly: this is just a prototype. */ + is not valid, reverse to the previous transformation. */ if (!lambda_transform_legal_p (trans, depth, dependence_relations)) lambda_matrix_row_exchange (LTM_MATRIX (trans), loop_i, loop_j); } } - + return trans; } @@ -181,6 +252,7 @@ linear_transform_loops (struct loops *loops) lambda_loopnest before, after; lambda_trans_matrix trans; bool problem = false; + bool need_perfect_nest = false; /* If it's not a loop nest, we don't want it. We also don't handle sibling loops properly, which are loops of the following form: @@ -197,7 +269,8 @@ linear_transform_loops (struct loops *loops) } */ if (!loop_nest->inner) continue; - for (temp = loop_nest; temp; temp = temp->inner) + depth = 1; + for (temp = loop_nest->inner; temp; temp = temp->inner) { flow_loop_scan (temp, LOOP_ALL); /* If we have a sibling loop or multiple exit edges, jump ship. */ @@ -246,7 +319,15 @@ linear_transform_loops (struct loops *loops) /* Build the transformation matrix. */ trans = lambda_trans_matrix_new (depth, depth); lambda_matrix_id (LTM_MATRIX (trans), depth); - trans = try_interchange_loops (trans, depth, dependence_relations); + trans = try_interchange_loops (trans, depth, dependence_relations, + datarefs, loop_nest->num); + + if (lambda_trans_matrix_id_p (trans)) + { + if (dump_file) + fprintf (dump_file, "Won't transform loop. Optimal transform is the identity transform\n"); + continue; + } /* Check whether the transformation is legal. */ if (!lambda_transform_legal_p (trans, depth, dependence_relations)) @@ -255,8 +336,12 @@ linear_transform_loops (struct loops *loops) fprintf (dump_file, "Can't transform loop, transform is illegal:\n"); continue; } - before = gcc_loopnest_to_lambda_loopnest (loop_nest, &oldivs, - &invariants); + if (!perfect_nest_p (loop_nest)) + need_perfect_nest = true; + before = gcc_loopnest_to_lambda_loopnest (loops, + loop_nest, &oldivs, + &invariants, + need_perfect_nest); if (!before) continue; @@ -279,4 +364,6 @@ linear_transform_loops (struct loops *loops) free_dependence_relations (dependence_relations); free_data_refs (datarefs); } + rewrite_into_loop_closed_ssa (); + free_df (); } -- 2.30.2