+2004-09-08 Daniel Berlin <dberlin@dberlin.org>
+
+ * Makefile.in (tree-loop-linear.o): Added.
+ (OBJS-common): Add tree-loop-linear.o
+ * common.opt: New flag, ftree-loop-linear.
+ * timevar.def: New timevar, TV_TREE_LOOP_LINEAR.
+ * tree-flow.h: Add prototype for linear_transform_loops.
+ * tree-optimize.c: Add linear transform after vectorization.
+ * tree-pass.h: Add struct pass_linear_transform.
+ * tree-ssa-loop.c: Add pass_linear_transform.
+ * tree-loop-linear.c: New file.
+ * lambda-code.c: gcc_assertify.
+ (gcc_loop_to_lambda_loop): Handle all exit tests.
+ Handle case where we have (invariant >= induction var).
+ (find_induction_var_from_exit_cond): Ditto.
+
2004-09-08 Jie Zhang <zhangjie@magima.com.cn>
* tree-ssa-alias.c (compute_flow_insensitive_aliasing): If type
varasm.o varray.o vec.o version.o vmsdbgout.o xcoffout.o alloc-pool.o \
et-forest.o cfghooks.o bt-load.o pretty-print.o $(GGC) web.o passes.o \
rtl-profile.o tree-profile.o rtlhooks.o cfgexpand.o lambda-mat.o \
- lambda-trans.o lambda-code.o
+ lambda-trans.o lambda-code.o tree-loop-linear.o
OBJS-md = $(out_object_file)
OBJS-archive = $(EXTRA_OBJS) $(host_hook_obj) tree-inline.o \
errors.h $(GGC_H) $(OPTABS_H) $(TREE_H) $(RTL_H) $(BASIC_BLOCK_H) diagnostic.h \
$(TREE_FLOW_H) $(TREE_DUMP_H) $(TIMEVAR_H) cfgloop.h tree-pass.h \
tree-vectorizer.h tree-data-ref.h $(SCEV_H)
+tree-loop-linear.o: tree-loop-linear.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \
+ errors.h $(GGC_H) $(OPTABS_H) $(TREE_H) $(RTL_H) $(BASIC_BLOCK_H) diagnostic.h \
+ $(TREE_FLOW_H) $(TREE_DUMP_H) $(TIMEVAR_H) cfgloop.h tree-pass.h \
+ $(TREE_DATA_REF_H) $(SCEV_H)
tree-gimple.o : tree-gimple.c $(CONFIG_H) $(SYSTEM_H) $(TREE_H) $(EXPR_H) \
$(RTL_H) $(TREE_GIMPLE_H) $(TM_H) coretypes.h bitmap.h $(GGC_H)
tree-mudflap.o : $(CONFIG_H) errors.h $(SYSTEM_H) $(TREE_H) tree-inline.h \
Common Report Var(flag_tree_loop_im) Init(1)
Enable loop invariant motion on trees
+ftree-loop-linear
+Common Report Var(flag_tree_loop_linear)
+Enable linear loop transforms on trees
+
ftree-loop-ivcanon
Common Report Var(flag_tree_loop_ivcanon)
Create canonical induction variables in loops
-funroll-all-loops -funroll-loops -fpeel-loops @gol
-funswitch-loops -fold-unroll-loops -fold-unroll-all-loops @gol
-ftree-pre -ftree-ccp -ftree-dce -ftree-loop-optimize @gol
--ftree-loop-im -ftree-loop-ivcanon -fivopts @gol
+-ftree-loop-linear -ftree-loop-im -ftree-loop-ivcanon -fivopts @gol
-ftree-dominator-opts -ftree-dse -ftree-copyrename @gol
-ftree-ch -ftree-sra -ftree-ter -ftree-lrs -ftree-fre -ftree-vectorize @gol
--param @var{name}=@var{value}
Perform loop optimizations on trees. This flag is enabled by default at -O
and higher.
+@item -ftree-loop-linear
+Perform linear loop transformations on tree. This flag can improve cache
+performance and allow further loop optimizations to take place.
+
@item -ftree-lim
Perform loop invariant motion on trees. This pass moves only invartiants that
would be hard to handle on rtl level (function calls, operations that expand to
int depth;
/* Make sure the matrix is square. */
- if (LTM_ROWSIZE (transform) != LTM_COLSIZE (transform))
- abort ();
+ gcc_assert (LTM_ROWSIZE (transform) == LTM_COLSIZE (transform));
depth = LTM_ROWSIZE (transform);
int step;
lambda_linear_expression expr;
- if (!loop)
- abort ();
+ gcc_assert (loop);
expr = LL_LINEAR_OFFSET (loop);
step = LL_STEP (loop);
for (i = 0; i < depth; i++)
{
loop = LN_LOOPS (nest)[i];
- if (!loop)
- abort ();
+ gcc_assert (loop);
step = LL_STEP (loop);
/* If we have a step of 1, then the base is one, and the
origin and invariant coefficients are 0. */
/* Otherwise, we need the lower bound expression (which must
be an affine function) to determine the base. */
expression = LL_LOWER_BOUND (loop);
- if (!expression
- || LLE_NEXT (expression) || LLE_DENOMINATOR (expression) != 1)
- abort ();
+ gcc_assert (expression && LLE_NEXT (expression)
+ && LLE_DENOMINATOR (expression) == 1);
/* The lower triangular portion of the base is going to be the
coefficient times the step */
size++;
/* Need to increase matrix sizes above. */
- if (size > 127)
- abort ();
+ gcc_assert (size <= 127);
+
}
/* Then do the exact same thing for the upper bounds. */
A[size][i] = LLE_DENOMINATOR (expression);
size++;
/* Need to increase matrix sizes above. */
- if (size > 127)
- abort ();
+ gcc_assert (size <= 127);
+
}
}
tree test;
int stepint;
int extra = 0;
-
+ tree uboundvar;
use_optype uses;
/* Find out induction var and set the pointer so that the caller can
}
test = TREE_OPERAND (exit_cond, 0);
- if (TREE_CODE (test) != LE_EXPR
- && TREE_CODE (test) != LT_EXPR && TREE_CODE (test) != NE_EXPR)
- {
- if (dump_file && (dump_flags & TDF_DETAILS))
- {
- fprintf (dump_file,
- "Unable to convert loop: Loop exit test uses unhandled test condition:");
- print_generic_stmt (dump_file, test, 0);
- fprintf (dump_file, "\n");
- }
- return NULL;
- }
if (SSA_NAME_DEF_STMT (inductionvar) == NULL_TREE)
{
return NULL;
}
- if (TREE_CODE (TREE_OPERAND (test, 1)) == SSA_NAME)
- if (invariant_in_loop (loop, TREE_OPERAND (test, 1)))
- VEC_safe_push (tree, *invariants, TREE_OPERAND (test, 1));
+ /* One part of the test may be a loop invariant tree. */
+ if (TREE_CODE (TREE_OPERAND (test, 1)) == SSA_NAME
+ && invariant_in_loop (loop, TREE_OPERAND (test, 1)))
+ VEC_safe_push (tree, *invariants, TREE_OPERAND (test, 1));
+ else if (TREE_CODE (TREE_OPERAND (test, 0)) == SSA_NAME
+ && invariant_in_loop (loop, TREE_OPERAND (test, 0)))
+ VEC_safe_push (tree, *invariants, TREE_OPERAND (test, 0));
+
+ /* The non-induction variable part of the test is the upper bound variable.
+ */
+ if (TREE_OPERAND (test, 0) == inductionvar)
+ uboundvar = TREE_OPERAND (test, 1);
+ else
+ uboundvar = TREE_OPERAND (test, 0);
+
/* We only size the vectors assuming we have, at max, 2 times as many
invariants as we do loops (one for each bound).
This is just an arbitrary number, but it has to be matched against the
code below. */
- if (VEC_length (tree, *invariants) > (unsigned int) (2 * depth))
- abort ();
+ gcc_assert (VEC_length (tree, *invariants) <= (unsigned int) (2 * depth));
+
/* We might have some leftover. */
if (TREE_CODE (test) == LT_EXPR)
extra = -1 * stepint;
else if (TREE_CODE (test) == NE_EXPR)
extra = -1 * stepint;
+ else if (TREE_CODE (test) == GT_EXPR)
+ extra = -1 * stepint;
ubound = gcc_tree_to_linear_expression (depth,
- TREE_OPERAND (test, 1),
+ uboundvar,
outerinductionvars,
*invariants, extra);
if (!ubound)
find_induction_var_from_exit_cond (struct loop *loop)
{
tree expr = get_loop_exit_condition (loop);
+ tree ivarop;
tree test;
if (expr == NULL_TREE)
return NULL_TREE;
test = TREE_OPERAND (expr, 0);
if (TREE_CODE_CLASS (TREE_CODE (test)) != '<')
return NULL_TREE;
- if (TREE_CODE (TREE_OPERAND (test, 0)) != SSA_NAME)
+ /* This is a guess. We say that for a <,!=,<= b, a is the induction
+ variable.
+ For >, >=, we guess b is the induction variable.
+ If we are wrong, it'll fail the rest of the induction variable tests, and
+ everything will be fine anyway. */
+ switch (TREE_CODE (test))
+ {
+ case LT_EXPR:
+ case LE_EXPR:
+ case NE_EXPR:
+ ivarop = TREE_OPERAND (test, 0);
+ break;
+ case GT_EXPR:
+ case GE_EXPR:
+ ivarop = TREE_OPERAND (test, 1);
+ break;
+ default:
+ gcc_unreachable();
+ }
+ if (TREE_CODE (ivarop) != SSA_NAME)
return NULL_TREE;
- return TREE_OPERAND (test, 0);
+ return ivarop;
}
DEF_VEC_GC_P(lambda_loop);
name, build_int_cst (integer_type_node,
LLE_DENOMINATOR (lle))));
else
- abort ();
+ gcc_unreachable();
/* name = {ceil, floor}(name/denominator) */
name = make_ssa_name (resvar, stmt);
/* Again, out of laziness, we don't handle this case yet. It's not
hard, it just hasn't occurred. */
- if (VEC_length (tree, results) > 2)
- abort ();
-
+ gcc_assert (VEC_length (tree, results) <= 2);
+
/* We may need to wrap the results in a MAX_EXPR or MIN_EXPR. */
if (VEC_length (tree, results) > 1)
{
cases for now. */
offset = LL_LINEAR_OFFSET (newloop);
- if (LLE_DENOMINATOR (offset) != 1
- || !lambda_vector_zerop (LLE_COEFFICIENTS (offset), depth))
- abort ();
-
+ gcc_assert (LLE_DENOMINATOR (offset) == 1 &&
+ lambda_vector_zerop (LLE_COEFFICIENTS (offset), depth));
+
/* Now build the new lower bounds, and insert the statements
necessary to generate it on the loop preheader. */
newlowerbound = lle_to_gcc_expression (LL_LOWER_BOUND (newloop),
struct data_dependence_relation *ddr;
#if defined ENABLE_CHECKING
- if (LTM_COLSIZE (trans) != nb_loops || LTM_ROWSIZE (trans) != nb_loops)
- abort ();
+ gcc_assert (LTM_COLSIZE (trans) == nb_loops
+ && LTM_ROWSIZE (trans) == nb_loops);
#endif
/* When there is an unknown relation in the dependence_relations, we
DEFTIMEVAR (TV_TREE_LOOP_IVCANON , "tree canonical iv creation")
DEFTIMEVAR (TV_COMPLETE_UNROLL , "complete unrolling")
DEFTIMEVAR (TV_TREE_VECTORIZATION , "tree loop vectorization")
+DEFTIMEVAR (TV_TREE_LINEAR_TRANSFORM , "tree loop linear transforms")
DEFTIMEVAR (TV_TREE_LOOP_IVOPTS , "tree iv optimization")
DEFTIMEVAR (TV_TREE_CH , "tree copy headers")
DEFTIMEVAR (TV_TREE_SSA_TO_NORMAL , "tree SSA to normal")
extern void build_ssa_operands (tree, stmt_ann_t, stmt_operands_p,
stmt_operands_p);
+/* In tree-loop-linear.c */
+extern void linear_transform_loops (struct loops *);
+
/* In gimplify.c */
tree force_gimple_operand (tree, tree *, bool, tree);
--- /dev/null
+/* Linear Loop transforms
+ Copyright (C) 2003, 2004 Free Software Foundation, Inc.
+ Contributed by Daniel Berlin <dberlin@dberlin.org>.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 2, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING. If not, write to the Free
+Software Foundation, 59 Temple Place - Suite 330, Boston, MA
+02111-1307, USA. */
+
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "errors.h"
+#include "ggc.h"
+#include "tree.h"
+#include "target.h"
+
+#include "rtl.h"
+#include "basic-block.h"
+#include "diagnostic.h"
+#include "tree-flow.h"
+#include "tree-dump.h"
+#include "timevar.h"
+#include "cfgloop.h"
+#include "expr.h"
+#include "optabs.h"
+#include "tree-chrec.h"
+#include "tree-data-ref.h"
+#include "tree-scalar-evolution.h"
+#include "tree-pass.h"
+#include "varray.h"
+#include "lambda.h"
+
+/* Linear loop transforms include any composition of interchange,
+ scaling, skewing, and reversal. They are used to change the
+ iteration order of loop nests in order to optimize data locality of
+ traversals, or remove dependences that prevent
+ parallelization/vectorization/etc.
+
+ TODO: Determine reuse vectors/matrix and use it to determine optimal
+ transform matrix for locality purposes.
+ TODO: Completion of partial transforms. */
+
+/* Gather statistics for loop interchange. Initializes SUM the sum of
+ all the data dependence distances carried by loop LOOP_NUMBER.
+ NB_DEPS_NOT_CARRIED_BY_LOOP is initialized to the number of
+ dependence relations for which the loop LOOP_NUMBER is not carrying
+ any dependence. */
+
+static void
+gather_interchange_stats (varray_type dependence_relations,
+ unsigned int loop_number,
+ unsigned int *sum,
+ unsigned int *nb_deps_not_carried_by_loop)
+{
+ unsigned int i;
+
+ *sum = 0;
+ *nb_deps_not_carried_by_loop = 0;
+ for (i = 0; i < VARRAY_ACTIVE_SIZE (dependence_relations); i++)
+ {
+ int dist;
+ struct data_dependence_relation *ddr =
+ (struct data_dependence_relation *)
+ VARRAY_GENERIC_PTR (dependence_relations, i);
+
+ if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
+ {
+ /* Some constants will need tweaking, but not something that should
+ be user-accessible. Thus, no --param. */
+ *sum += 100;
+ continue;
+ }
+
+ /* When we know that there is no dependence, we know that there
+ is no reuse of the data. */
+ if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
+ {
+ /* Ditto on the no --param here */
+ *sum += 1000;
+ continue;
+ }
+
+ dist = DDR_DIST_VECT (ddr)[loop_number];
+ if (dist == 0)
+ *nb_deps_not_carried_by_loop++;
+ else if (dist < 0)
+ *sum += -dist;
+ else
+ *sum += dist;
+ }
+}
+
+/* Apply to TRANS any loop interchange that minimize inner loop steps.
+ DEPTH is the depth of the loop nest, and DEPENDENCE_RELATIONS is an array
+ of dependence relations.
+ Returns the new transform matrix. The smaller the reuse vector
+ distances in the inner loops, the fewer the cache misses. */
+
+static lambda_trans_matrix
+try_interchange_loops (lambda_trans_matrix trans,
+ unsigned int depth,
+ varray_type dependence_relations)
+{
+ unsigned int loop_i, loop_j;
+ unsigned int steps_i, steps_j;
+ unsigned int nb_deps_not_carried_by_i, nb_deps_not_carried_by_j;
+ struct data_dependence_relation *ddr;
+
+ /* When there is an unknown relation in the dependence_relations, we
+ know that it is no worth looking at this loop nest: give up. */
+ ddr = (struct data_dependence_relation *)
+ VARRAY_GENERIC_PTR (dependence_relations, 0);
+ if (ddr == NULL || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
+ return trans;
+
+ /* LOOP_I is always the outer loop. */
+ for (loop_j = 1; loop_j < depth; loop_j++)
+ for (loop_i = 0; loop_i < loop_j; loop_i++)
+ {
+ gather_interchange_stats (dependence_relations, loop_i, &steps_i,
+ &nb_deps_not_carried_by_i);
+ gather_interchange_stats (dependence_relations, loop_j, &steps_j,
+ &nb_deps_not_carried_by_j);
+
+ /* Heuristics for loop interchange profitability:
+ 1. Inner loops should have smallest steps.
+ 2. Inner loops should contain more dependence relations not
+ carried by the loop.
+ */
+ if (steps_i < steps_j
+ || nb_deps_not_carried_by_i > nb_deps_not_carried_by_j)
+ {
+ lambda_matrix_row_exchange (LTM_MATRIX (trans), loop_i, loop_j);
+
+ /* Validate the resulting matrix. When the transformation
+ is not valid, reverse to the previous matrix.
+
+ FIXME: In this case of transformation it could be
+ faster to verify the validity of the interchange
+ without applying the transform to the matrix. But for
+ the moment do it cleanly: this is just a prototype. */
+ if (!lambda_transform_legal_p (trans, depth, dependence_relations))
+ lambda_matrix_row_exchange (LTM_MATRIX (trans), loop_i, loop_j);
+ }
+ }
+
+ return trans;
+}
+
+/* Perform a set of linear transforms on LOOPS. */
+
+void
+linear_transform_loops (struct loops *loops)
+{
+ unsigned int i;
+
+ for (i = 1; i < loops->num; i++)
+ {
+ unsigned int depth = 0;
+ varray_type datarefs;
+ varray_type dependence_relations;
+ struct loop *loop_nest = loops->parray[i];
+ struct loop *temp;
+ VEC (tree) *oldivs;
+ VEC (tree) *invariants;
+ lambda_loopnest before, after;
+ lambda_trans_matrix trans;
+ bool problem = false;
+ /* If it's not a loop nest, we don't want it.
+ We also don't handle sibling loops properly,
+ which are loops of the following form:
+ for (i = 0; i < 50; i++)
+ {
+ for (j = 0; j < 50; j++)
+ {
+ ...
+ }
+ for (j = 0; j < 50; j++)
+ {
+ ...
+ }
+ } */
+ if (!loop_nest->inner)
+ continue;
+ for (temp = loop_nest; temp; temp = temp->inner)
+ {
+ flow_loop_scan (temp, LOOP_ALL);
+ /* If we have a sibling loop or multiple exit edges, jump ship. */
+ if (temp->next || temp->num_exits != 1)
+ {
+ problem = true;
+ break;
+ }
+ depth ++;
+ }
+ if (problem)
+ continue;
+
+ /* Analyze data references and dependence relations using scev. */
+
+ VARRAY_GENERIC_PTR_INIT (datarefs, 10, "datarefs");
+ VARRAY_GENERIC_PTR_INIT (dependence_relations, 10,
+ "dependence_relations");
+
+
+ compute_data_dependences_for_loop (depth, loop_nest,
+ &datarefs, &dependence_relations);
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ unsigned int j;
+ for (j = 0; j < VARRAY_ACTIVE_SIZE (dependence_relations); j++)
+ {
+ struct data_dependence_relation *ddr =
+ (struct data_dependence_relation *)
+ VARRAY_GENERIC_PTR (dependence_relations, j);
+
+ if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
+ {
+ fprintf (dump_file, "DISTANCE_V (");
+ print_lambda_vector (dump_file, DDR_DIST_VECT (ddr),
+ loops->num);
+ fprintf (dump_file, ")\n");
+ fprintf (dump_file, "DIRECTION_V (");
+ print_lambda_vector (dump_file, DDR_DIR_VECT (ddr),
+ loops->num);
+ fprintf (dump_file, ")\n");
+ }
+ }
+ fprintf (dump_file, "\n\n");
+ }
+ /* Build the transformation matrix. */
+ trans = lambda_trans_matrix_new (depth, depth);
+ lambda_matrix_id (LTM_MATRIX (trans), depth);
+ trans = try_interchange_loops (trans, depth, dependence_relations);
+
+ /* Check whether the transformation is legal. */
+ if (!lambda_transform_legal_p (trans, depth, dependence_relations))
+ {
+ if (dump_file)
+ fprintf (dump_file, "Can't transform loop, transform is illegal:\n");
+ continue;
+ }
+ before = gcc_loopnest_to_lambda_loopnest (loop_nest, &oldivs,
+ &invariants);
+ if (!before)
+ continue;
+
+ if (dump_file)
+ {
+ fprintf (dump_file, "Before:\n");
+ print_lambda_loopnest (dump_file, before, 'i');
+ }
+
+ after = lambda_loopnest_transform (before, trans);
+ if (dump_file)
+ {
+ fprintf (dump_file, "After:\n");
+ print_lambda_loopnest (dump_file, after, 'u');
+ }
+ lambda_loopnest_to_gcc_loopnest (loop_nest, oldivs, invariants,
+ after, trans);
+ oldivs = NULL;
+ invariants = NULL;
+ free_dependence_relations (dependence_relations);
+ free_data_refs (datarefs);
+ }
+}
NEXT_PASS (pass_iv_canon);
NEXT_PASS (pass_if_conversion);
NEXT_PASS (pass_vectorize);
+ NEXT_PASS (pass_linear_transform);
NEXT_PASS (pass_complete_unroll);
NEXT_PASS (pass_iv_optimize);
NEXT_PASS (pass_loop_done);
extern struct tree_opt_pass pass_expand;
extern struct tree_opt_pass pass_rest_of_compilation;
extern struct tree_opt_pass pass_fre;
-
+extern struct tree_opt_pass pass_linear_transform;
#endif /* GCC_TREE_PASS_H */
0 /* letter */
};
+
+/* Loop nest optimizations. */
+
+static void
+tree_linear_transform (void)
+{
+ if (!current_loops)
+ return;
+
+ linear_transform_loops (current_loops);
+}
+
+static bool
+gate_tree_linear_transform (void)
+{
+ return flag_tree_loop_linear != 0;
+}
+
+struct tree_opt_pass pass_linear_transform =
+{
+ "ltrans", /* name */
+ gate_tree_linear_transform, /* gate */
+ tree_linear_transform, /* execute */
+ NULL, /* sub */
+ NULL, /* next */
+ 0, /* static_pass_number */
+ TV_TREE_LINEAR_TRANSFORM, /* tv_id */
+ PROP_cfg | PROP_ssa, /* properties_required */
+ 0, /* properties_provided */
+ 0, /* properties_destroyed */
+ 0, /* todo_flags_start */
+ TODO_dump_func, /* todo_flags_finish */
+ 0 /* letter */
+};
+
/* Canonical induction variable creation pass. */
static void