invoke.texi (-fprefetch-loop-arrays, [...]): Document.
authorZdenek Dvorak <dvorakz@suse.cz>
Tue, 14 Feb 2006 12:22:11 +0000 (13:22 +0100)
committerZdenek Dvorak <rakdver@gcc.gnu.org>
Tue, 14 Feb 2006 12:22:11 +0000 (12:22 +0000)
* doc/invoke.texi (-fprefetch-loop-arrays, -fprefetch-loop-arrays-rtl):
Document.
* tree-ssa-loop-niter.c (number_of_iterations_ne,
number_of_iterations_lt, number_of_iterations_cond): Remember the shape
of the ending condition.
* tree-ssa-loop-manip.c: Include params.h.
(build_if_stmt, can_unroll_loop_p, determine_exit_conditions,
tree_unroll_loop): New functions.
* tree-pass.h (pass_loop_prefetch): Declare.
* loop.c (rest_of_handle_loop_optimize): Test for
-fprefetch-loop-arrays-rtl.
* tree-scalar-evolution.h (affine_iv): Moved to tree-flow.h.
* timevar.def (TV_TREE_PREFETCH): New timevar.
* tree-ssa-loop.c (tree_ssa_loop_prefetch, gate_tree_ssa_loop_prefetch,
pass_loop_prefetch): New.
* tree-cfgcleanup.c: Include tree-scalar-evolution.h.
(cleanup_tree_cfg_loop): Call scev_reset.
* common.opt (fprefetch-loop-arrays-rtl): Add.
* tree-ssa-loop-prefetch.c: New file.
* tree-outof-ssa.c (struct value_expr_d): Add expr_vars field.
(new_temp_expr_table): Initialize expr_vars.
(free_temp_expr_table): Cleanup expr_vars.
(check_replaceable, find_replaceable_in_bb): Prevent accumulating
expressions from being merged into one.
* tree-flow.h (affine_iv): Moved from tree-scalar-evolution.h.
(struct tree_niter_desc): Add control, bound and cmp fields.
(tree_ssa_prefetch_arrays, can_unroll_loop_p, tree_unroll_loop):
Declare.
* Makefile.in (tree-ssa-loop-prefetch.o): Add.
(tree-cfgcleanup.o): Add SCEV_H dependency.
(tree-ssa-loop-manip.o): Add PARAMS_H dependency.
* passes.c (init_optimization_passes): Add pass_loop_prefetch.

From-SVN: r110964

15 files changed:
gcc/ChangeLog
gcc/Makefile.in
gcc/common.opt
gcc/doc/invoke.texi
gcc/loop.c
gcc/passes.c
gcc/timevar.def
gcc/tree-cfgcleanup.c
gcc/tree-flow.h
gcc/tree-outof-ssa.c
gcc/tree-pass.h
gcc/tree-scalar-evolution.h
gcc/tree-ssa-loop-manip.c
gcc/tree-ssa-loop-niter.c
gcc/tree-ssa-loop.c

index 2811d2e9d7104e50bf62a480408aad5f27badf94..9aec203aa99a8dd05bef130be1f3c6bc23f668d5 100644 (file)
@@ -1,3 +1,38 @@
+2006-02-14  Zdenek Dvorak <dvorakz@suse.cz>
+
+       * doc/invoke.texi (-fprefetch-loop-arrays, -fprefetch-loop-arrays-rtl):
+       Document.
+       * tree-ssa-loop-niter.c (number_of_iterations_ne,
+       number_of_iterations_lt, number_of_iterations_cond): Remember the shape
+       of the ending condition.
+       * tree-ssa-loop-manip.c: Include params.h.
+       (build_if_stmt, can_unroll_loop_p, determine_exit_conditions,
+       tree_unroll_loop): New functions.
+       * tree-pass.h (pass_loop_prefetch): Declare.
+       * loop.c (rest_of_handle_loop_optimize): Test for
+       -fprefetch-loop-arrays-rtl.
+       * tree-scalar-evolution.h (affine_iv): Moved to tree-flow.h.
+       * timevar.def (TV_TREE_PREFETCH): New timevar.
+       * tree-ssa-loop.c (tree_ssa_loop_prefetch, gate_tree_ssa_loop_prefetch,
+       pass_loop_prefetch): New.
+       * tree-cfgcleanup.c: Include tree-scalar-evolution.h.
+       (cleanup_tree_cfg_loop): Call scev_reset.
+       * common.opt (fprefetch-loop-arrays-rtl): Add.
+       * tree-ssa-loop-prefetch.c: New file.
+       * tree-outof-ssa.c (struct value_expr_d): Add expr_vars field.
+       (new_temp_expr_table): Initialize expr_vars.
+       (free_temp_expr_table): Cleanup expr_vars.
+       (check_replaceable, find_replaceable_in_bb): Prevent accumulating
+       expressions from being merged into one.
+       * tree-flow.h (affine_iv): Moved from tree-scalar-evolution.h.
+       (struct tree_niter_desc): Add control, bound and cmp fields.
+       (tree_ssa_prefetch_arrays, can_unroll_loop_p, tree_unroll_loop):
+       Declare.
+       * Makefile.in (tree-ssa-loop-prefetch.o): Add.
+       (tree-cfgcleanup.o): Add SCEV_H dependency.
+       (tree-ssa-loop-manip.o): Add PARAMS_H dependency.
+       * passes.c (init_optimization_passes): Add pass_loop_prefetch.
+
 2006-02-14  Richard Guenther  <rguenther@suse.de>
 
        PR tree-optimization/26258
index 09786c990873d691207e7a37d841b138ecfea142..d6d79005cdd50b6f2d622871e6c3f7f8c0212b40 100644 (file)
@@ -963,7 +963,7 @@ OBJS-common = \
  tree-vect-generic.o tree-ssa-loop.o tree-ssa-loop-niter.o                \
  tree-ssa-loop-manip.o tree-ssa-threadupdate.o tree-ssa-threadedge.o      \
  tree-vectorizer.o tree-vect-analyze.o tree-vect-transform.o              \
- tree-vect-patterns.o                                                      \
+ tree-vect-patterns.o tree-ssa-loop-prefetch.o                            \
  tree-ssa-loop-ivcanon.o tree-ssa-propagate.o tree-ssa-address.o          \
  tree-ssa-math-opts.o                                                     \
  tree-ssa-loop-ivopts.o tree-if-conv.o tree-ssa-loop-unswitch.o                   \
@@ -1975,6 +1975,12 @@ tree-ssa-loop-ch.o : tree-ssa-loop-ch.c $(TREE_FLOW_H) $(CONFIG_H) \
    $(SYSTEM_H) $(RTL_H) $(TREE_H) $(TM_P_H) $(CFGLOOP_H) $(TREE_INLINE_H) \
    output.h $(DIAGNOSTIC_H) $(TIMEVAR_H) $(TM_H) coretypes.h $(TREE_DUMP_H) \
    tree-pass.h $(FLAGS_H) $(BASIC_BLOCK_H) hard-reg-set.h
+tree-ssa-loop-prefetch.o: tree-ssa-loop-prefetch.c $(TREE_FLOW_H) $(CONFIG_H) \
+   $(SYSTEM_H) $(RTL_H) $(TREE_H) $(TM_P_H) $(CFGLOOP_H) $(EXPR_H) \
+   output.h $(DIAGNOSTIC_H) $(TIMEVAR_H) $(TM_H) coretypes.h $(TREE_DUMP_H) \
+   tree-pass.h $(GGC_H) $(RECOG_H) insn-config.h $(HASHTAB_H) $(SCEV_H) \
+   $(CFGLOOP_H) $(PARAMS_H) langhooks.h $(BASIC_BLOCK_H) hard-reg-set.h \
+   tree-chrec.h toplev.h langhooks.h
 tree-ssa-loop-ivopts.o : tree-ssa-loop-ivopts.c $(TREE_FLOW_H) $(CONFIG_H) \
    $(SYSTEM_H) $(RTL_H) $(TREE_H) $(TM_P_H) $(CFGLOOP_H) $(EXPR_H) \
    output.h $(DIAGNOSTIC_H) $(TIMEVAR_H) $(TM_H) coretypes.h $(TREE_DUMP_H) \
@@ -1984,7 +1990,8 @@ tree-ssa-loop-ivopts.o : tree-ssa-loop-ivopts.c $(TREE_FLOW_H) $(CONFIG_H) \
 tree-ssa-loop-manip.o : tree-ssa-loop-manip.c $(TREE_FLOW_H) $(CONFIG_H) \
    $(SYSTEM_H) $(RTL_H) $(TREE_H) $(TM_P_H) $(CFGLOOP_H) \
    output.h $(DIAGNOSTIC_H) $(TIMEVAR_H) $(TM_H) coretypes.h $(TREE_DUMP_H) \
-   tree-pass.h $(CFGLAYOUT_H) $(SCEV_H) $(BASIC_BLOCK_H) hard-reg-set.h
+   tree-pass.h $(CFGLAYOUT_H) $(SCEV_H) $(BASIC_BLOCK_H) hard-reg-set.h \
+   $(PARAMS_H)
 tree-ssa-loop-im.o : tree-ssa-loop-im.c $(TREE_FLOW_H) $(CONFIG_H) \
    $(SYSTEM_H) $(RTL_H) $(TREE_H) $(TM_P_H) $(CFGLOOP_H) domwalk.h \
    $(PARAMS_H) output.h $(DIAGNOSTIC_H) $(TIMEVAR_H) $(TM_H) coretypes.h \
index f8077ae37517036c0377bb0ee0598f69579a328f..c7fee2c9c090d3f5deb59cbc79b6408059283dca 100644 (file)
@@ -659,7 +659,11 @@ Common Report Var(flag_pie,1) VarExists
 Generate position-independent code for executables if possible (small mode)
 
 fprefetch-loop-arrays
-Common Report Var(flag_prefetch_loop_arrays)
+Common Report Var(flag_prefetch_loop_arrays,1)
+Generate prefetch instructions, if available, for arrays in loops
+
+fprefetch-loop-arrays-rtl
+Common Report Var(flag_prefetch_loop_arrays,2)
 Generate prefetch instructions, if available, for arrays in loops
 
 fprofile
index 6fb86e314e8ee7de176022048f0767fbdafe4606..5c7ff5ebeae90832de082467c8e136a2fc751764 100644 (file)
@@ -321,7 +321,7 @@ Objective-C and Objective-C++ Dialects}.
 -funsafe-math-optimizations  -funsafe-loop-optimizations  -ffinite-math-only @gol
 -fno-toplevel-reorder -fno-trapping-math  -fno-zero-initialized-in-bss @gol
 -fomit-frame-pointer  -foptimize-register-move @gol
--foptimize-sibling-calls  -fprefetch-loop-arrays @gol
+-foptimize-sibling-calls  -fprefetch-loop-arrays -fprefetch-loop-arrays-rtl @gol
 -fprofile-generate -fprofile-use @gol
 -fregmove  -frename-registers @gol
 -freorder-blocks  -freorder-blocks-and-partition -freorder-functions @gol
@@ -5171,7 +5171,9 @@ With this option, the compiler will create multiple copies of some
 local variables when unrolling a loop which can result in superior code.
 
 @item -fprefetch-loop-arrays
+@itemx -fprefetch-loop-arrays-rtl
 @opindex fprefetch-loop-arrays
+@opindex fprefetch-loop-arrays-rtl
 If supported by the target machine, generate instructions to prefetch
 memory to improve the performance of loops that access large arrays.
 
@@ -5709,7 +5711,9 @@ Move branches with loop invariant conditions out of the loop, with duplicates
 of the loop on both branches (modified according to result of the condition).
 
 @item -fprefetch-loop-arrays
+@itemx -fprefetch-loop-arrays-rtl
 @opindex fprefetch-loop-arrays
+@opindex fprefetch-loop-arrays-rtl
 If supported by the target machine, generate instructions to prefetch
 memory to improve the performance of loops that access large arrays.
 
index fcb7d1ab21f3cce31965f17c3816d2fc4bc9bd2f..1beb4dc16a6ae8ff37791a540426bcb03a1548b4 100644 (file)
@@ -11780,7 +11780,7 @@ rest_of_handle_loop_optimize (void)
   free_bb_for_insn ();
   profile_status = PROFILE_ABSENT;
   
-  do_prefetch = flag_prefetch_loop_arrays ? LOOP_PREFETCH : 0;
+  do_prefetch = flag_prefetch_loop_arrays == 2 ? LOOP_PREFETCH : 0;
   
   if (flag_rerun_loop_opt)
     {
index 5e026957307d261a6ce7065be5562e58cc86e32c..2e7f0b53b32edea3f6994ce70d624debfa439f0c 100644 (file)
@@ -601,6 +601,7 @@ init_optimization_passes (void)
      vectorizer creates alias relations that are not supported by
      pass_may_alias.  */
   NEXT_PASS (pass_complete_unroll);
+  NEXT_PASS (pass_loop_prefetch);
   NEXT_PASS (pass_iv_optimize);
   NEXT_PASS (pass_tree_loop_done);
   *p = NULL;
index e769cba2fe45ba033988c8e087034ad48131c9ec..d6065e7ca01b8e3f3f3af13b4bdf586b84ea18a5 100644 (file)
@@ -107,6 +107,7 @@ DEFTIMEVAR (TV_TREE_LOOP_UNSWITCH    , "tree loop unswitching")
 DEFTIMEVAR (TV_COMPLETE_UNROLL       , "complete unrolling")
 DEFTIMEVAR (TV_TREE_VECTORIZATION    , "tree vectorization")
 DEFTIMEVAR (TV_TREE_LINEAR_TRANSFORM , "tree loop linear")
+DEFTIMEVAR (TV_TREE_PREFETCH        , "tree prefetching")
 DEFTIMEVAR (TV_TREE_LOOP_IVOPTS             , "tree iv optimization")
 DEFTIMEVAR (TV_TREE_LOOP_INIT       , "tree loop init")
 DEFTIMEVAR (TV_TREE_LOOP_FINI       , "tree loop fini")
index 4619d1dbad09d1abaaeb9ab5db3330bad4235340..76667a6edc17d6301898de34c772a0602093eb26 100644 (file)
@@ -45,6 +45,7 @@ Boston, MA 02110-1301, USA.  */
 #include "cfglayout.h"
 #include "hashtab.h"
 #include "tree-ssa-propagate.h"
+#include "tree-scalar-evolution.h"
 
 /* Remove any fallthru edge from EV.  Return true if an edge was removed.  */
 
@@ -559,23 +560,26 @@ cleanup_tree_cfg (void)
 void
 cleanup_tree_cfg_loop (void)
 {
-  bitmap changed_bbs = BITMAP_ALLOC (NULL);
+  bool changed = cleanup_tree_cfg ();
 
-  cleanup_tree_cfg ();
-
-  fix_loop_structure (current_loops, changed_bbs);
-  calculate_dominance_info (CDI_DOMINATORS);
+  if (changed)
+    {
+      bitmap changed_bbs = BITMAP_ALLOC (NULL);
+      fix_loop_structure (current_loops, changed_bbs);
+      calculate_dominance_info (CDI_DOMINATORS);
 
-  /* This usually does nothing.  But sometimes parts of cfg that originally
-     were inside a loop get out of it due to edge removal (since they
-     become unreachable by back edges from latch).  */
-  rewrite_into_loop_closed_ssa (changed_bbs, TODO_update_ssa);
+      /* This usually does nothing.  But sometimes parts of cfg that originally
+        were inside a loop get out of it due to edge removal (since they
+        become unreachable by back edges from latch).  */
+      rewrite_into_loop_closed_ssa (changed_bbs, TODO_update_ssa);
 
-  BITMAP_FREE (changed_bbs);
+      BITMAP_FREE (changed_bbs);
 
 #ifdef ENABLE_CHECKING
-  verify_loop_structure (current_loops);
+      verify_loop_structure (current_loops);
 #endif
+      scev_reset ();
+    }
 }
 
 /* Merge the PHI nodes at BB into those at BB's sole successor.  */
index 0e4824e93217c13bad0c6cdbb81ae473b9a0390d..7774c3b42b7dc88664be7b284280fdc9b8449e84 100644 (file)
@@ -667,6 +667,17 @@ extern void replace_exp (use_operand_p, tree);
 extern bool may_propagate_copy (tree, tree);
 extern bool may_propagate_copy_into_asm (tree);
 
+/* Affine iv.  */
+
+typedef struct
+{
+  /* Iv = BASE + STEP * i.  */
+  tree base, step;
+
+  /* True if this iv does not overflow.  */
+  bool no_overflow;
+} affine_iv;
+
 /* Description of number of iterations of a loop.  All the expressions inside
    the structure can be evaluated at the end of the loop's preheader
    (and due to ssa form, also anywhere inside the body of the loop).  */
@@ -697,6 +708,15 @@ struct tree_niter_desc
                           MAX_SIGNED_INT.  However if the (n <= 0) assumption
                           is eliminated (by looking at the guard on entry of
                           the loop), then the information would be lost.  */
+
+  /* The simplified shape of the exit condition.  The loop exits if
+     CONTROL CMP BOUND is false, where CMP is one of NE_EXPR,
+     LT_EXPR, or GT_EXPR, and step of CONTROL is positive if CMP is
+     LE_EXPR and negative if CMP is GE_EXPR.  This information is used
+     by loop unrolling.  */
+  affine_iv control;
+  tree bound;
+  enum tree_code cmp;
 };
 
 /* In tree-vectorizer.c */
@@ -711,6 +731,7 @@ void tree_ssa_lim (struct loops *);
 void tree_ssa_unswitch_loops (struct loops *);
 void canonicalize_induction_variables (struct loops *);
 void tree_unroll_loops_completely (struct loops *, bool);
+void tree_ssa_prefetch_arrays (struct loops *);
 void remove_empty_loops (struct loops *);
 void tree_ssa_iv_optimize (struct loops *);
 
@@ -748,6 +769,10 @@ struct loop *tree_ssa_loop_version (struct loops *, struct loop *, tree,
 tree expand_simple_operations (tree);
 void substitute_in_loop_info (struct loop *, tree, tree);
 edge single_dom_exit (struct loop *);
+bool can_unroll_loop_p (struct loop *loop, unsigned factor,
+                       struct tree_niter_desc *niter);
+void tree_unroll_loop (struct loops *, struct loop *, unsigned,
+                      edge, struct tree_niter_desc *);
 
 /* In tree-ssa-threadedge.c */
 extern bool potentially_threadable_block (basic_block);
index 78ec5e7b35645059ba8bb296383e45db6580047e..40d6c9305112c939689ebc0277cf449e374b4084 100644 (file)
@@ -1299,7 +1299,8 @@ typedef struct value_expr_d
 typedef struct temp_expr_table_d 
 {
   var_map map;
-  void **version_info;         
+  void **version_info;
+  bitmap *expr_vars;
   value_expr_p *partition_dep_list;
   bitmap replaceable;
   bool saw_replaceable;
@@ -1344,6 +1345,7 @@ new_temp_expr_table (var_map map)
   t->map = map;
 
   t->version_info = XCNEWVEC (void *, num_ssa_names + 1);
+  t->expr_vars = XCNEWVEC (bitmap, num_ssa_names + 1);
   t->partition_dep_list = XCNEWVEC (value_expr_p,
                                     num_var_partitions (map) + 1);
 
@@ -1367,6 +1369,7 @@ free_temp_expr_table (temp_expr_table_p t)
 {
   value_expr_p p;
   tree *ret = NULL;
+  unsigned i;
 
 #ifdef ENABLE_CHECKING
   unsigned x;
@@ -1383,6 +1386,11 @@ free_temp_expr_table (temp_expr_table_p t)
   BITMAP_FREE (t->partition_in_use);
   BITMAP_FREE (t->replaceable);
 
+  for (i = 0; i <= num_ssa_names; i++)
+    if (t->expr_vars[i])
+      BITMAP_FREE (t->expr_vars[i]);
+  free (t->expr_vars);
+
   free (t->partition_dep_list);
   if (t->saw_replaceable)
     ret = (tree *)t->version_info;
@@ -1545,11 +1553,12 @@ add_dependance (temp_expr_table_p tab, int version, tree var)
 static bool
 check_replaceable (temp_expr_table_p tab, tree stmt)
 {
-  tree var, def;
+  tree var, def, basevar;
   int version;
   var_map map = tab->map;
   ssa_op_iter iter;
   tree call_expr;
+  bitmap def_vars = BITMAP_ALLOC (NULL), use_vars;
 
   if (TREE_CODE (stmt) != MODIFY_EXPR)
     return false;
@@ -1580,12 +1589,19 @@ check_replaceable (temp_expr_table_p tab, tree stmt)
     }
 
   version = SSA_NAME_VERSION (def);
+  basevar = SSA_NAME_VAR (def);
+  bitmap_set_bit (def_vars, DECL_UID (basevar));
 
   /* Add this expression to the dependency list for each use partition.  */
   FOR_EACH_SSA_TREE_OPERAND (var, stmt, iter, SSA_OP_USE)
     {
       add_dependance (tab, version, var);
+
+      use_vars = tab->expr_vars[SSA_NAME_VERSION (var)];
+      if (use_vars)
+       bitmap_ior_into (def_vars, use_vars);
     }
+  tab->expr_vars[version] = def_vars;
 
   /* If there are VUSES, add a dependence on virtual defs.  */
   if (!ZERO_SSA_OPERANDS (stmt, SSA_OP_VUSE))
@@ -1704,7 +1720,7 @@ static void
 find_replaceable_in_bb (temp_expr_table_p tab, basic_block bb)
 {
   block_stmt_iterator bsi;
-  tree stmt, def;
+  tree stmt, def, use;
   stmt_ann_t ann;
   int partition;
   var_map map = tab->map;
@@ -1717,30 +1733,34 @@ find_replaceable_in_bb (temp_expr_table_p tab, basic_block bb)
       ann = stmt_ann (stmt);
 
       /* Determine if this stmt finishes an existing expression.  */
-      FOR_EACH_SSA_TREE_OPERAND (def, stmt, iter, SSA_OP_USE)
+      FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE)
        {
-         if (tab->version_info[SSA_NAME_VERSION (def)])
+         unsigned ver = SSA_NAME_VERSION (use);
+
+         if (tab->version_info[ver])
            {
              bool same_root_var = false;
-             tree def2;
              ssa_op_iter iter2;
+             bitmap vars = tab->expr_vars[ver];
 
              /* See if the root variables are the same.  If they are, we
                 do not want to do the replacement to avoid problems with
                 code size, see PR tree-optimization/17549.  */
-             FOR_EACH_SSA_TREE_OPERAND (def2, stmt, iter2, SSA_OP_DEF)
-               if (SSA_NAME_VAR (def) == SSA_NAME_VAR (def2))
-                 {
-                   same_root_var = true;
-                   break;
-                 }
+             FOR_EACH_SSA_TREE_OPERAND (def, stmt, iter2, SSA_OP_DEF)
+               {
+                 if (bitmap_bit_p (vars, DECL_UID (SSA_NAME_VAR (def))))
+                   {
+                     same_root_var = true;
+                     break;
+                   }
+               }
 
              /* Mark expression as replaceable unless stmt is volatile
                 or DEF sets the same root variable as STMT.  */
              if (!ann->has_volatile_ops && !same_root_var)
-               mark_replaceable (tab, def);
+               mark_replaceable (tab, use);
              else
-               finish_expr (tab, SSA_NAME_VERSION (def), false);
+               finish_expr (tab, ver, false);
            }
        }
       
index 5d49b6c75695715323f46752b87b8f7ef113fdae..baa60da96e38dfab358ff838d8e57f06612d99de 100644 (file)
@@ -247,6 +247,7 @@ extern struct tree_opt_pass pass_record_bounds;
 extern struct tree_opt_pass pass_if_conversion;
 extern struct tree_opt_pass pass_vectorize;
 extern struct tree_opt_pass pass_complete_unroll;
+extern struct tree_opt_pass pass_loop_prefetch;
 extern struct tree_opt_pass pass_iv_optimize;
 extern struct tree_opt_pass pass_tree_loop_done;
 extern struct tree_opt_pass pass_ch;
index 0fecaee2390ec1fb2fe3d4754ce3f6ad5b207559..f7749545f9aa589cf2e5bb6f800ad7bb05ebb8a6 100644 (file)
@@ -34,17 +34,6 @@ extern void gather_stats_on_scev_database (void);
 extern void scev_analysis (void);
 void scev_const_prop (void);
 
-/* Affine iv.  */
-
-typedef struct
-{
-  /* Iv = BASE + STEP * i.  */
-  tree base, step;
-
-  /* True if this iv does not overflow.  */
-  bool no_overflow;
-} affine_iv;
-
 extern bool simple_iv (struct loop *, tree, tree, affine_iv *, bool);
 
 #endif  /* GCC_TREE_SCALAR_EVOLUTION_H  */
index ab9971dfabf4a877b075c79738c83352220c072c..21d1ea14b143e14d46d2712eb28bddc85529a53d 100644 (file)
@@ -36,6 +36,7 @@ Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
 #include "tree-pass.h"
 #include "cfglayout.h"
 #include "tree-scalar-evolution.h"
+#include "params.h"
 
 /* Creates an induction variable with value BASE + STEP * iteration in LOOP.
    It is expected that neither BASE nor STEP are shared with other expressions
@@ -618,3 +619,328 @@ tree_duplicate_loop_to_header_edge (struct loop *loop, edge e,
 
   return true;
 }
+
+/* Build if (COND) goto THEN_LABEL; else goto ELSE_LABEL;  */
+
+static tree
+build_if_stmt (tree cond, tree then_label, tree else_label)
+{
+  return build3 (COND_EXPR, void_type_node,
+                cond,
+                build1 (GOTO_EXPR, void_type_node, then_label),
+                build1 (GOTO_EXPR, void_type_node, else_label));
+}
+
+/* Returns true if we can unroll LOOP FACTOR times.  Number
+   of iterations of the loop is returned in NITER.  */
+
+bool
+can_unroll_loop_p (struct loop *loop, unsigned factor,
+                  struct tree_niter_desc *niter)
+{
+  edge exit;
+
+  /* Check whether unrolling is possible.  We only want to unroll loops
+     for that we are able to determine number of iterations.  We also
+     want to split the extra iterations of the loop from its end,
+     therefore we require that the loop has precisely one
+     exit.  */
+
+  exit = single_dom_exit (loop);
+  if (!exit)
+    return false;
+
+  if (!number_of_iterations_exit (loop, exit, niter, false)
+      || niter->cmp == ERROR_MARK)
+    return false;
+
+  /* And of course, we must be able to duplicate the loop.  */
+  if (!can_duplicate_loop_p (loop))
+    return false;
+
+  /* The final loop should be small enough.  */
+  if (tree_num_loop_insns (loop) * factor
+      > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS))
+    return false;
+
+  return true;
+}
+
+/* Determines the conditions that control execution of LOOP unrolled FACTOR
+   times.  DESC is number of iterations of LOOP.  ENTER_COND is set to
+   condition that must be true if the main loop can be entered.
+   EXIT_BASE, EXIT_STEP, EXIT_CMP and EXIT_BOUND are set to values describing
+   how the exit from the unrolled loop should be controlled.  */
+
+static void
+determine_exit_conditions (struct loop *loop, struct tree_niter_desc *desc,
+                          unsigned factor, tree *enter_cond,
+                          tree *exit_base, tree *exit_step,
+                          enum tree_code *exit_cmp, tree *exit_bound)
+{
+  tree stmts;
+  tree base = desc->control.base;
+  tree step = desc->control.step;
+  tree bound = desc->bound;
+  tree type = TREE_TYPE (base);
+  tree bigstep, delta;
+  tree min = lower_bound_in_type (type, type);
+  tree max = upper_bound_in_type (type, type);
+  enum tree_code cmp = desc->cmp;
+  tree cond = boolean_true_node, assum;
+
+  *enter_cond = boolean_false_node;
+  *exit_base = NULL_TREE;
+  *exit_step = NULL_TREE;
+  *exit_cmp = ERROR_MARK;
+  *exit_bound = NULL_TREE;
+  gcc_assert (cmp != ERROR_MARK);
+
+  /* We only need to be correct when we answer question
+     "Do at least FACTOR more iterations remain?" in the unrolled loop.
+     Thus, transforming BASE + STEP * i <> BOUND to
+     BASE + STEP * i < BOUND is ok.  */
+  if (cmp == NE_EXPR)
+    {
+      if (tree_int_cst_sign_bit (step))
+       cmp = GT_EXPR;
+      else
+       cmp = LT_EXPR;
+    }
+  else if (cmp == LT_EXPR)
+    {
+      gcc_assert (!tree_int_cst_sign_bit (step));
+    }
+  else if (cmp == GT_EXPR)
+    {
+      gcc_assert (tree_int_cst_sign_bit (step));
+    }
+  else
+    gcc_unreachable ();
+
+  /* The main body of the loop may be entered iff:
+
+     1) desc->may_be_zero is false.
+     2) it is possible to check that there are at least FACTOR iterations
+       of the loop, i.e., BOUND - step * FACTOR does not overflow.
+     3) # of iterations is at least FACTOR  */
+
+  if (!zero_p (desc->may_be_zero))
+    cond = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
+                       invert_truthvalue (desc->may_be_zero),
+                       cond);
+
+  bigstep = fold_build2 (MULT_EXPR, type, step,
+                        build_int_cst_type (type, factor));
+  delta = fold_build2 (MINUS_EXPR, type, bigstep, step);
+  if (cmp == LT_EXPR)
+    assum = fold_build2 (GE_EXPR, boolean_type_node,
+                        bound,
+                        fold_build2 (PLUS_EXPR, type, min, delta));
+  else
+    assum = fold_build2 (LE_EXPR, boolean_type_node,
+                        bound,
+                        fold_build2 (PLUS_EXPR, type, max, delta));
+  cond = fold_build2 (TRUTH_AND_EXPR, boolean_type_node, assum, cond);
+
+  bound = fold_build2 (MINUS_EXPR, type, bound, delta);
+  assum = fold_build2 (cmp, boolean_type_node, base, bound);
+  cond = fold_build2 (TRUTH_AND_EXPR, boolean_type_node, assum, cond);
+
+  cond = force_gimple_operand (unshare_expr (cond), &stmts, false, NULL_TREE);
+  if (stmts)
+    bsi_insert_on_edge_immediate_loop (loop_preheader_edge (loop), stmts);
+  /* cond now may be a gimple comparison, which would be OK, but also any
+     other gimple rhs (say a && b).  In this case we need to force it to
+     operand.  */
+  if (!is_gimple_condexpr (cond))
+    {
+      cond = force_gimple_operand (cond, &stmts, true, NULL_TREE);
+      if (stmts)
+       bsi_insert_on_edge_immediate_loop (loop_preheader_edge (loop), stmts);
+    }
+  *enter_cond = cond;
+
+  base = force_gimple_operand (unshare_expr (base), &stmts, true, NULL_TREE);
+  if (stmts)
+    bsi_insert_on_edge_immediate_loop (loop_preheader_edge (loop), stmts);
+  bound = force_gimple_operand (unshare_expr (bound), &stmts, true, NULL_TREE);
+  if (stmts)
+    bsi_insert_on_edge_immediate_loop (loop_preheader_edge (loop), stmts);
+
+  *exit_base = base;
+  *exit_step = bigstep;
+  *exit_cmp = cmp;
+  *exit_bound = bound;
+}
+
+/* Unroll LOOP FACTOR times.  LOOPS is the loops tree.  DESC describes
+   number of iterations of LOOP.  EXIT is the exit of the loop to that
+   DESC corresponds.
+   
+   If N is number of iterations of the loop and MAY_BE_ZERO is the condition
+   under that loop exits in the first iteration even if N != 0,
+   
+   while (1)
+     {
+       x = phi (init, next);
+
+       pre;
+       if (st)
+         break;
+       post;
+     }
+
+   becomes (with possibly the exit conditions formulated a bit differently,
+   avoiding the need to create a new iv):
+   
+   if (MAY_BE_ZERO || N < FACTOR)
+     goto rest;
+
+   do
+     {
+       x = phi (init, next);
+
+       pre;
+       post;
+       pre;
+       post;
+       ...
+       pre;
+       post;
+       N -= FACTOR;
+       
+     } while (N >= FACTOR);
+
+   rest:
+     init' = phi (init, x);
+
+   while (1)
+     {
+       x = phi (init', next);
+
+       pre;
+       if (st)
+         break;
+       post;
+     } */
+
+void
+tree_unroll_loop (struct loops *loops, struct loop *loop, unsigned factor,
+                 edge exit, struct tree_niter_desc *desc)
+{
+  tree dont_exit, exit_if, ctr_before, ctr_after;
+  tree enter_main_cond, exit_base, exit_step, exit_bound;
+  enum tree_code exit_cmp;
+  tree phi_old_loop, phi_new_loop, phi_rest, init, next, new_init, var;
+  struct loop *new_loop;
+  basic_block rest, exit_bb;
+  edge old_entry, new_entry, old_latch, precond_edge, new_exit;
+  edge nonexit, new_nonexit;
+  block_stmt_iterator bsi;
+  use_operand_p op;
+  bool ok;
+  unsigned est_niter;
+  sbitmap wont_exit;
+
+  est_niter = expected_loop_iterations (loop);
+  determine_exit_conditions (loop, desc, factor,
+                            &enter_main_cond, &exit_base, &exit_step,
+                            &exit_cmp, &exit_bound);
+
+  new_loop = loop_version (loops, loop, enter_main_cond, NULL, true);
+  gcc_assert (new_loop != NULL);
+  update_ssa (TODO_update_ssa);
+
+  /* Unroll the loop and remove the old exits.  */
+  dont_exit = ((exit->flags & EDGE_TRUE_VALUE)
+              ? boolean_false_node
+              : boolean_true_node);
+  if (exit == EDGE_SUCC (exit->src, 0))
+    nonexit = EDGE_SUCC (exit->src, 1);
+  else
+    nonexit = EDGE_SUCC (exit->src, 0);
+  nonexit->probability = REG_BR_PROB_BASE;
+  exit->probability = 0;
+  nonexit->count += exit->count;
+  exit->count = 0;
+  exit_if = last_stmt (exit->src);
+  COND_EXPR_COND (exit_if) = dont_exit;
+  update_stmt (exit_if);
+      
+  wont_exit = sbitmap_alloc (factor);
+  sbitmap_ones (wont_exit);
+  ok = tree_duplicate_loop_to_header_edge
+         (loop, loop_latch_edge (loop), loops, factor - 1,
+          wont_exit, NULL, NULL, NULL, DLTHE_FLAG_UPDATE_FREQ);
+  free (wont_exit);
+  gcc_assert (ok);
+  update_ssa (TODO_update_ssa);
+
+  /* Prepare the cfg and update the phi nodes.  */
+  rest = loop_preheader_edge (new_loop)->src;
+  precond_edge = single_pred_edge (rest);
+  loop_split_edge_with (loop_latch_edge (loop), NULL);
+  exit_bb = single_pred (loop->latch);
+
+  new_exit = make_edge (exit_bb, rest, EDGE_FALSE_VALUE);
+  new_exit->count = loop_preheader_edge (loop)->count;
+  est_niter = est_niter / factor + 1;
+  new_exit->probability = REG_BR_PROB_BASE / est_niter;
+
+  new_nonexit = single_pred_edge (loop->latch);
+  new_nonexit->flags = EDGE_TRUE_VALUE;
+  new_nonexit->probability = REG_BR_PROB_BASE - new_exit->probability;
+
+  old_entry = loop_preheader_edge (loop);
+  new_entry = loop_preheader_edge (new_loop);
+  old_latch = loop_latch_edge (loop);
+  for (phi_old_loop = phi_nodes (loop->header),
+       phi_new_loop = phi_nodes (new_loop->header);
+       phi_old_loop;
+       phi_old_loop = PHI_CHAIN (phi_old_loop),
+       phi_new_loop = PHI_CHAIN (phi_new_loop))
+    {
+      init = PHI_ARG_DEF_FROM_EDGE (phi_old_loop, old_entry);
+      op = PHI_ARG_DEF_PTR_FROM_EDGE (phi_new_loop, new_entry);
+      gcc_assert (operand_equal_for_phi_arg_p (init, USE_FROM_PTR (op)));
+      next = PHI_ARG_DEF_FROM_EDGE (phi_old_loop, old_latch);
+
+      /* Prefer using original variable as a base for the new ssa name.
+        This is necessary for virtual ops, and useful in order to avoid
+        losing debug info for real ops.  */
+      if (TREE_CODE (next) == SSA_NAME)
+       var = SSA_NAME_VAR (next);
+      else if (TREE_CODE (init) == SSA_NAME)
+       var = SSA_NAME_VAR (init);
+      else
+       {
+         var = create_tmp_var (TREE_TYPE (init), "unrinittmp");
+         add_referenced_tmp_var (var);
+       }
+
+      new_init = make_ssa_name (var, NULL_TREE);
+      phi_rest = create_phi_node (new_init, rest);
+      SSA_NAME_DEF_STMT (new_init) = phi_rest;
+
+      add_phi_arg (phi_rest, init, precond_edge);
+      add_phi_arg (phi_rest, next, new_exit);
+      SET_USE (op, new_init);
+    }
+
+  /* Finally create the new counter for number of iterations and add the new
+     exit instruction.  */
+  bsi = bsi_last (exit_bb);
+  create_iv (exit_base, exit_step, NULL_TREE, loop,
+            &bsi, true, &ctr_before, &ctr_after);
+  exit_if = build_if_stmt (build2 (exit_cmp, boolean_type_node, ctr_after,
+                                  exit_bound),
+                          tree_block_label (loop->latch),
+                          tree_block_label (rest));
+  bsi_insert_after (&bsi, exit_if, BSI_NEW_STMT);
+
+  verify_flow_info ();
+  verify_dominators (CDI_DOMINATORS);
+  verify_loop_structure (loops);
+  verify_loop_closed_ssa ();
+}
index 7566e7cad49b5be9379951bcc49adcf34c5c7a93..f913df3141b989cffc4fc2e322870506979803be 100644 (file)
@@ -140,6 +140,10 @@ number_of_iterations_ne (tree type, affine_iv *iv, tree final,
   tree niter_type = unsigned_type_for (type);
   tree s, c, d, bits, assumption, tmp, bound;
 
+  niter->control = *iv;
+  niter->bound = final;
+  niter->cmp = NE_EXPR;
+
   /* Rearrange the terms so that we get inequality s * i <> c, with s
      positive.  Also cast everything to the unsigned type.  */
   if (tree_int_cst_sign_bit (iv->step))
@@ -410,6 +414,19 @@ number_of_iterations_lt (tree type, affine_iv *iv0, affine_iv *iv1,
   tree niter_type = unsigned_type_for (type);
   tree delta, step, s;
 
+  if (nonzero_p (iv0->step))
+    {
+      niter->control = *iv0;
+      niter->cmp = LT_EXPR;
+      niter->bound = iv1->base;
+    }
+  else
+    {
+      niter->control = *iv1;
+      niter->cmp = GT_EXPR;
+      niter->bound = iv0->base;
+    }
+
   delta = fold_build2 (MINUS_EXPR, niter_type,
                       fold_convert (niter_type, iv1->base),
                       fold_convert (niter_type, iv0->base));
@@ -543,6 +560,9 @@ number_of_iterations_cond (tree type, affine_iv *iv0, enum tree_code code,
   niter->niter = NULL_TREE;
   niter->additional_info = boolean_true_node;
 
+  niter->bound = NULL_TREE;
+  niter->cmp = ERROR_MARK;
+
   /* Make < comparison from > ones, and for NE_EXPR comparisons, ensure that
      the control variable is on lhs.  */
   if (code == GE_EXPR || code == GT_EXPR
index a735084803e2bfe9b25dff0c44442e59709aa7ee..60cdefcbe242bdcaab2b5fd1abfb0979a835bd17 100644 (file)
@@ -401,6 +401,40 @@ struct tree_opt_pass pass_complete_unroll =
   0                                    /* letter */
 };
 
+/* Prefetching.  */
+
+static void
+tree_ssa_loop_prefetch (void)
+{
+  if (!current_loops)
+    return;
+
+  tree_ssa_prefetch_arrays (current_loops);
+}
+
+static bool
+gate_tree_ssa_loop_prefetch (void)
+{
+  return flag_prefetch_loop_arrays == 1;
+}
+
+struct tree_opt_pass pass_loop_prefetch =
+{
+  "prefetch",                          /* name */
+  gate_tree_ssa_loop_prefetch,         /* gate */
+  tree_ssa_loop_prefetch,              /* execute */
+  NULL,                                        /* sub */
+  NULL,                                        /* next */
+  0,                                   /* static_pass_number */
+  TV_TREE_PREFETCH,                    /* tv_id */
+  PROP_cfg | PROP_ssa,                 /* properties_required */
+  0,                                   /* properties_provided */
+  0,                                   /* properties_destroyed */
+  0,                                   /* todo_flags_start */
+  TODO_dump_func | TODO_verify_loops,  /* todo_flags_finish */
+  0                                    /* letter */
+};
+
 /* Induction variable optimizations.  */
 
 static void