* doc/generic.texi (ANNOTATE_EXPR): Document 3rd operand.
* cfgloop.h (struct loop): Add unroll field.
* function.h (struct function): Add has_unroll bitfield.
* gimplify.c (gimple_boolify) <ANNOTATE_EXPR>: Deal with unroll kind.
(gimplify_expr) <ANNOTATE_EXPR>: Propagate 3rd operand.
* loop-init.c (pass_loop2::gate): Return true if cfun->has_unroll.
(pass_rtl_unroll_loops::gate): Likewise.
* loop-unroll.c (decide_unrolling): Tweak note message. Skip loops
for which loop->unroll==1.
(decide_unroll_constant_iterations): Use note for consistency and
take loop->unroll into account. Return early if loop->unroll is set.
Fix thinko in existing test.
(decide_unroll_runtime_iterations): Use note for consistency and
take loop->unroll into account.
(decide_unroll_stupid): Likewise.
* lto-streamer-in.c (input_cfg): Read loop->unroll.
* lto-streamer-out.c (output_cfg): Write loop->unroll.
* tree-cfg.c (replace_loop_annotate_in_block) <annot_expr_unroll_kind>:
New case.
(replace_loop_annotate) <annot_expr_unroll_kind>: Likewise.
(print_loop): Print loop->unroll if set.
* tree-core.h (enum annot_expr_kind): Add annot_expr_unroll_kind.
* tree-inline.c (copy_loops): Copy unroll and set cfun->has_unroll.
* tree-pretty-print.c (dump_generic_node) <annot_expr_unroll_kind>:
New case.
* tree-ssa-loop-ivcanon.c (try_unroll_loop_completely): Bail out if
loop->unroll is set and smaller than the trip count. Otherwise bypass
entirely the heuristics if loop->unroll is set. Remove dead note.
Fix off-by-one bug in other note.
(try_peel_loop): Bail out if loop->unroll is set. Fix formatting.
(tree_unroll_loops_completely_1): Force unrolling if loop->unroll
is greater than 1.
(tree_unroll_loops_completely): Make static.
(pass_complete_unroll::execute): Use correct type for variable.
(pass_complete_unrolli::execute): Fix formatting.
* tree.def (ANNOTATE_EXPR): Add 3rd operand.
ada/
* gcc-interface/trans.c (gnat_gimplify_stmt) <LOOP_STMT>: Pass 3rd
operand to ANNOTATE_EXPR and also pass unrolling hints.
c/
* c-parser.c (c_parser_while_statement): Pass 3rd operand to
ANNOTATE_EXPR.
(c_parser_do_statement): Likewise.
(c_parser_for_statement): Likewise.
cp/
* pt.c (tsubst_expr) <ANNOTATE_EXPR>: Recurse on 3rd operand.
* semantics.c (finish_while_stmt_cond): Pass 3rd operand to
ANNOTATE_EXPR.
(finish_do_stmt): Likewise.
(finish_for_cond): Likewise.
fortran/
* trans-stmt.c (gfc_trans_forall_loop): Pass 3rd operand to
ANNOTATE_EXPR.
From-SVN: r255106
+2017-11-23 Mike Stump <mikestump@comcast.net>
+ Eric Botcazou <ebotcazou@adacore.com>
+
+ * doc/generic.texi (ANNOTATE_EXPR): Document 3rd operand.
+ * cfgloop.h (struct loop): Add unroll field.
+ * function.h (struct function): Add has_unroll bitfield.
+ * gimplify.c (gimple_boolify) <ANNOTATE_EXPR>: Deal with unroll kind.
+ (gimplify_expr) <ANNOTATE_EXPR>: Propagate 3rd operand.
+ * loop-init.c (pass_loop2::gate): Return true if cfun->has_unroll.
+ (pass_rtl_unroll_loops::gate): Likewise.
+ * loop-unroll.c (decide_unrolling): Tweak note message. Skip loops
+ for which loop->unroll==1.
+ (decide_unroll_constant_iterations): Use note for consistency and
+ take loop->unroll into account. Return early if loop->unroll is set.
+ Fix thinko in existing test.
+ (decide_unroll_runtime_iterations): Use note for consistency and
+ take loop->unroll into account.
+ (decide_unroll_stupid): Likewise.
+ * lto-streamer-in.c (input_cfg): Read loop->unroll.
+ * lto-streamer-out.c (output_cfg): Write loop->unroll.
+ * tree-cfg.c (replace_loop_annotate_in_block) <annot_expr_unroll_kind>:
+ New case.
+ (replace_loop_annotate) <annot_expr_unroll_kind>: Likewise.
+ (print_loop): Print loop->unroll if set.
+ * tree-core.h (enum annot_expr_kind): Add annot_expr_unroll_kind.
+ * tree-inline.c (copy_loops): Copy unroll and set cfun->has_unroll.
+ * tree-pretty-print.c (dump_generic_node) <annot_expr_unroll_kind>:
+ New case.
+ * tree-ssa-loop-ivcanon.c (try_unroll_loop_completely): Bail out if
+ loop->unroll is set and smaller than the trip count. Otherwise bypass
+ entirely the heuristics if loop->unroll is set. Remove dead note.
+ Fix off-by-one bug in other note.
+ (try_peel_loop): Bail out if loop->unroll is set. Fix formatting.
+ (tree_unroll_loops_completely_1): Force unrolling if loop->unroll
+ is greater than 1.
+ (tree_unroll_loops_completely): Make static.
+ (pass_complete_unroll::execute): Use correct type for variable.
+ (pass_complete_unrolli::execute): Fix formatting.
+ * tree.def (ANNOTATE_EXPR): Add 3rd operand.
+
2017-11-23 Sergey Shalnov <Sergey.Shalnov@intel.com>
* config/i386/i386.h (TARGET_PREFER_AVX256): Also
+2017-11-23 Mike Stump <mikestump@comcast.net>
+ Eric Botcazou <ebotcazou@adacore.com>
+
+ * gcc-interface/trans.c (gnat_gimplify_stmt) <LOOP_STMT>: Pass 3rd
+ operand to ANNOTATE_EXPR and also pass unrolling hints.
+
2017-11-20 Eric Botcazou <ebotcazou@adacore.com>
PR ada/83016
{
/* Deal with the optimization hints. */
if (LOOP_STMT_IVDEP (stmt))
- gnu_cond = build2 (ANNOTATE_EXPR, TREE_TYPE (gnu_cond), gnu_cond,
+ gnu_cond = build3 (ANNOTATE_EXPR, TREE_TYPE (gnu_cond), gnu_cond,
build_int_cst (integer_type_node,
- annot_expr_ivdep_kind));
+ annot_expr_ivdep_kind),
+ integer_zero_node);
+ if (LOOP_STMT_NO_UNROLL (stmt))
+ gnu_cond = build3 (ANNOTATE_EXPR, TREE_TYPE (gnu_cond), gnu_cond,
+ build_int_cst (integer_type_node,
+ annot_expr_unroll_kind),
+ integer_one_node);
+ if (LOOP_STMT_UNROLL (stmt))
+ gnu_cond = build3 (ANNOTATE_EXPR, TREE_TYPE (gnu_cond), gnu_cond,
+ build_int_cst (integer_type_node,
+ annot_expr_unroll_kind),
+ build_int_cst (NULL_TREE, USHRT_MAX));
if (LOOP_STMT_NO_VECTOR (stmt))
- gnu_cond = build2 (ANNOTATE_EXPR, TREE_TYPE (gnu_cond), gnu_cond,
+ gnu_cond = build3 (ANNOTATE_EXPR, TREE_TYPE (gnu_cond), gnu_cond,
build_int_cst (integer_type_node,
- annot_expr_no_vector_kind));
+ annot_expr_no_vector_kind),
+ integer_zero_node);
if (LOOP_STMT_VECTOR (stmt))
- gnu_cond = build2 (ANNOTATE_EXPR, TREE_TYPE (gnu_cond), gnu_cond,
+ gnu_cond = build3 (ANNOTATE_EXPR, TREE_TYPE (gnu_cond), gnu_cond,
build_int_cst (integer_type_node,
- annot_expr_vector_kind));
+ annot_expr_vector_kind),
+ integer_zero_node);
gnu_cond
= build3 (COND_EXPR, void_type_node, gnu_cond, NULL_TREE,
+2017-11-23 Mike Stump <mikestump@comcast.net>
+ Eric Botcazou <ebotcazou@adacore.com>
+
+ * c-parser.c (c_parser_while_statement): Pass 3rd operand to
+ ANNOTATE_EXPR.
+ (c_parser_do_statement): Likewise.
+ (c_parser_for_statement): Likewise.
+
2017-11-22 David Malcolm <dmalcolm@redhat.com>
PR c++/62170
"%<_Cilk_spawn%> statement cannot be used as a condition for while statement"))
cond = error_mark_node;
if (ivdep && cond != error_mark_node)
- cond = build2 (ANNOTATE_EXPR, TREE_TYPE (cond), cond,
+ cond = build3 (ANNOTATE_EXPR, TREE_TYPE (cond), cond,
build_int_cst (integer_type_node,
- annot_expr_ivdep_kind));
+ annot_expr_ivdep_kind),
+ integer_zero_node);
save_break = c_break_label;
c_break_label = NULL_TREE;
save_cont = c_cont_label;
"%<_Cilk_spawn%> statement cannot be used as a condition for a do-while statement"))
cond = error_mark_node;
if (ivdep && cond != error_mark_node)
- cond = build2 (ANNOTATE_EXPR, TREE_TYPE (cond), cond,
+ cond = build3 (ANNOTATE_EXPR, TREE_TYPE (cond), cond,
build_int_cst (integer_type_node,
- annot_expr_ivdep_kind));
+ annot_expr_ivdep_kind),
+ integer_zero_node);
if (!c_parser_require (parser, CPP_SEMICOLON, "expected %<;%>"))
c_parser_skip_to_end_of_block_or_statement (parser);
c_finish_loop (loc, cond, NULL, body, new_break, new_cont, false);
"expected %<;%>");
}
if (ivdep && cond != error_mark_node)
- cond = build2 (ANNOTATE_EXPR, TREE_TYPE (cond), cond,
+ cond = build3 (ANNOTATE_EXPR, TREE_TYPE (cond), cond,
build_int_cst (integer_type_node,
- annot_expr_ivdep_kind));
+ annot_expr_ivdep_kind),
+ integer_zero_node);
}
/* Parse the increment expression (the third expression in a
for-statement). In the case of a foreach-statement, this is
/* True if the loop is part of an oacc kernels region. */
unsigned in_oacc_kernels_region : 1;
+ /* The number of times to unroll the loop. 0, means no information
+ given, just do what we always do. A value of 1, means don't unroll
+ the loop. */
+ unsigned short unroll;
+
/* For SIMD loops, this is a unique identifier of the loop, referenced
by IFN_GOMP_SIMD_VF, IFN_GOMP_SIMD_LANE and IFN_GOMP_SIMD_LAST_LANE
builtins. */
+2017-11-23 Mike Stump <mikestump@comcast.net>
+ Eric Botcazou <ebotcazou@adacore.com>
+
+ * pt.c (tsubst_expr) <ANNOTATE_EXPR>: Recurse on 3rd operand.
+ * semantics.c (finish_while_stmt_cond): Pass 3rd operand to
+ ANNOTATE_EXPR.
+ (finish_do_stmt): Likewise.
+ (finish_for_cond): Likewise.
+
2017-11-22 Jakub Jelinek <jakub@redhat.com>
PR c++/82401
case ANNOTATE_EXPR:
tmp = RECUR (TREE_OPERAND (t, 0));
- RETURN (build2_loc (EXPR_LOCATION (t), ANNOTATE_EXPR,
- TREE_TYPE (tmp), tmp, RECUR (TREE_OPERAND (t, 1))));
+ RETURN (build3_loc (EXPR_LOCATION (t), ANNOTATE_EXPR,
+ TREE_TYPE (tmp), tmp,
+ RECUR (TREE_OPERAND (t, 1)),
+ RECUR (TREE_OPERAND (t, 2))));
default:
gcc_assert (!STATEMENT_CODE_P (TREE_CODE (t)));
finish_cond (&WHILE_COND (while_stmt), cond);
begin_maybe_infinite_loop (cond);
if (ivdep && cond != error_mark_node)
- WHILE_COND (while_stmt) = build2 (ANNOTATE_EXPR,
+ WHILE_COND (while_stmt) = build3 (ANNOTATE_EXPR,
TREE_TYPE (WHILE_COND (while_stmt)),
WHILE_COND (while_stmt),
build_int_cst (integer_type_node,
- annot_expr_ivdep_kind));
+ annot_expr_ivdep_kind),
+ integer_zero_node);
simplify_loop_decl_cond (&WHILE_COND (while_stmt), WHILE_BODY (while_stmt));
}
cond = maybe_convert_cond (cond);
end_maybe_infinite_loop (cond);
if (ivdep && cond != error_mark_node)
- cond = build2 (ANNOTATE_EXPR, TREE_TYPE (cond), cond,
- build_int_cst (integer_type_node, annot_expr_ivdep_kind));
+ cond = build3 (ANNOTATE_EXPR, TREE_TYPE (cond), cond,
+ build_int_cst (integer_type_node, annot_expr_ivdep_kind),
+ integer_zero_node);
DO_COND (do_stmt) = cond;
}
finish_cond (&FOR_COND (for_stmt), cond);
begin_maybe_infinite_loop (cond);
if (ivdep && cond != error_mark_node)
- FOR_COND (for_stmt) = build2 (ANNOTATE_EXPR,
+ FOR_COND (for_stmt) = build3 (ANNOTATE_EXPR,
TREE_TYPE (FOR_COND (for_stmt)),
FOR_COND (for_stmt),
build_int_cst (integer_type_node,
- annot_expr_ivdep_kind));
+ annot_expr_ivdep_kind),
+ integer_zero_node);
simplify_loop_decl_cond (&FOR_COND (for_stmt), FOR_BODY (for_stmt));
}
@item ANNOTATE_EXPR
This node is used to attach markers to an expression. The first operand
is the annotated expression, the second is an @code{INTEGER_CST} with
-a value from @code{enum annot_expr_kind}.
+a value from @code{enum annot_expr_kind}, the third is an @code{INTEGER_CST}.
@end table
+2017-11-23 Mike Stump <mikestump@comcast.net>
+ Eric Botcazou <ebotcazou@adacore.com>
+
+ * trans-stmt.c (gfc_trans_forall_loop): Pass 3rd operand to
+ ANNOTATE_EXPR.
+
2017-11-23 Paul Thomas <pault@gcc.gnu.org>
PR fortran/82814
cond = fold_build2_loc (input_location, LE_EXPR, logical_type_node,
count, build_int_cst (TREE_TYPE (count), 0));
if (forall_tmp->do_concurrent)
- cond = build2 (ANNOTATE_EXPR, TREE_TYPE (cond), cond,
+ cond = build3 (ANNOTATE_EXPR, TREE_TYPE (cond), cond,
build_int_cst (integer_type_node,
- annot_expr_parallel_kind));
+ annot_expr_parallel_kind),
+ integer_zero_node);
tmp = build1_v (GOTO_EXPR, exit_label);
tmp = fold_build3_loc (input_location, COND_EXPR, void_type_node,
nonzero value in loop->simduid. */
unsigned int has_simduid_loops : 1;
- /* Set when the tail call has been identified. */
+ /* Nonzero when the tail call has been identified. */
unsigned int tail_call_marked : 1;
+
+ /* Nonzero if the current function contains a #pragma GCC unroll. */
+ unsigned int has_unroll : 1;
};
/* Add the decl D to the local_decls list of FUN. */
switch ((enum annot_expr_kind) TREE_INT_CST_LOW (TREE_OPERAND (expr, 1)))
{
case annot_expr_ivdep_kind:
+ case annot_expr_unroll_kind:
case annot_expr_no_vector_kind:
case annot_expr_vector_kind:
case annot_expr_parallel_kind:
{
tree cond = TREE_OPERAND (*expr_p, 0);
tree kind = TREE_OPERAND (*expr_p, 1);
+ tree data = TREE_OPERAND (*expr_p, 2);
tree type = TREE_TYPE (cond);
if (!INTEGRAL_TYPE_P (type))
{
tree tmp = create_tmp_var (type);
gimplify_arg (&cond, pre_p, EXPR_LOCATION (*expr_p));
gcall *call
- = gimple_build_call_internal (IFN_ANNOTATE, 2, cond, kind);
+ = gimple_build_call_internal (IFN_ANNOTATE, 3, cond, kind, data);
gimple_call_set_lhs (call, tmp);
gimplify_seq_add_stmt (pre_p, call);
*expr_p = tmp;
&& (flag_move_loop_invariants
|| flag_unswitch_loops
|| flag_unroll_loops
- || (flag_branch_on_count_reg
- && targetm.have_doloop_end ())))
+ || (flag_branch_on_count_reg && targetm.have_doloop_end ())
+ || cfun->has_unroll))
return true;
else
{
/* opt_pass methods: */
virtual bool gate (function *)
{
- return (flag_unroll_loops || flag_unroll_all_loops);
+ return (flag_unroll_loops || flag_unroll_all_loops || cfun->has_unroll);
}
virtual unsigned int execute (function *);
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, locus,
- ";; *** Considering loop %d at BB %d for "
- "unrolling ***\n",
- loop->num, loop->header->index);
+ "considering unrolling loop %d at BB %d\n",
+ loop->num, loop->header->index);
+
+ if (loop->unroll == 1)
+ {
+ if (dump_file)
+ fprintf (dump_file,
+ ";; Not unrolling loop, user didn't want it unrolled\n");
+ continue;
+ }
/* Do not peel cold areas. */
if (optimize_loop_for_size_p (loop))
loop->ninsns = num_loop_insns (loop);
loop->av_ninsns = average_num_loop_insns (loop);
- /* Try transformations one by one in decreasing order of
- priority. */
-
+ /* Try transformations one by one in decreasing order of priority. */
decide_unroll_constant_iterations (loop, flags);
if (loop->lpt_decision.decision == LPT_NONE)
decide_unroll_runtime_iterations (loop, flags);
struct niter_desc *desc;
widest_int iterations;
- if (!(flags & UAP_UNROLL))
- {
- /* We were not asked to, just return back silently. */
- return;
- }
+ /* If we were not asked to unroll this loop, just return back silently. */
+ if (!(flags & UAP_UNROLL) && !loop->unroll)
+ return;
- if (dump_file)
- fprintf (dump_file,
- "\n;; Considering unrolling loop with constant "
- "number of iterations\n");
+ if (dump_enabled_p ())
+ dump_printf (MSG_NOTE,
+ "considering unrolling loop with constant "
+ "number of iterations\n");
/* nunroll = total number of copies of the original loop body in
- unrolled loop (i.e. if it is 2, we have to duplicate loop body once. */
+ unrolled loop (i.e. if it is 2, we have to duplicate loop body once). */
nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns;
nunroll_by_av
= PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns;
return;
}
+ /* Check for an explicit unrolling factor. */
+ if (loop->unroll)
+ {
+ /* However we cannot unroll completely at the RTL level a loop with
+ constant number of iterations; it should have been peeled instead. */
+ if ((unsigned) loop->unroll - 1 > desc->niter - 2)
+ {
+ if (dump_file)
+ fprintf (dump_file, ";; Loop should have been peeled\n");
+ }
+ else
+ {
+ loop->lpt_decision.decision = LPT_UNROLL_CONSTANT;
+ loop->lpt_decision.times = loop->unroll - 1;
+ }
+ return;
+ }
+
/* Check whether the loop rolls enough to consider.
Consult also loop bounds and profile; in the case the loop has more
than one exit it may well loop less than determined maximal number
best_copies = 2 * nunroll + 10;
i = 2 * nunroll + 2;
- if (i - 1 >= desc->niter)
+ if (i > desc->niter - 2)
i = desc->niter - 2;
for (; i >= nunroll - 1; i--)
struct niter_desc *desc;
widest_int iterations;
- if (!(flags & UAP_UNROLL))
- {
- /* We were not asked to, just return back silently. */
- return;
- }
+ /* If we were not asked to unroll this loop, just return back silently. */
+ if (!(flags & UAP_UNROLL) && !loop->unroll)
+ return;
- if (dump_file)
- fprintf (dump_file,
- "\n;; Considering unrolling loop with runtime "
- "computable number of iterations\n");
+ if (dump_enabled_p ())
+ dump_printf (MSG_NOTE,
+ "considering unrolling loop with runtime-"
+ "computable number of iterations\n");
/* nunroll = total number of copies of the original loop body in
unrolled loop (i.e. if it is 2, we have to duplicate loop body once. */
if (targetm.loop_unroll_adjust)
nunroll = targetm.loop_unroll_adjust (nunroll, loop);
+ if (loop->unroll)
+ nunroll = loop->unroll;
+
/* Skip big loops. */
if (nunroll <= 1)
{
return;
}
- /* Success; now force nunroll to be power of 2, as we are unable to
- cope with overflows in computation of number of iterations. */
+ /* Success; now force nunroll to be power of 2, as code-gen
+ requires it, we are unable to cope with overflows in
+ computation of number of iterations. */
for (i = 1; 2 * i <= nunroll; i *= 2)
continue;
return seq;
}
-/* Unroll LOOP for which we are able to count number of iterations in runtime
- LOOP->LPT_DECISION.TIMES times. The transformation does this (with some
- extra care for case n < 0):
+/* Unroll LOOP for which we are able to count number of iterations in
+ runtime LOOP->LPT_DECISION.TIMES times. The times value must be a
+ power of two. The transformation does this (with some extra care
+ for case n < 0):
for (i = 0; i < n; i++)
body;
struct niter_desc *desc;
widest_int iterations;
- if (!(flags & UAP_UNROLL_ALL))
- {
- /* We were not asked to, just return back silently. */
- return;
- }
+ /* If we were not asked to unroll this loop, just return back silently. */
+ if (!(flags & UAP_UNROLL_ALL) && !loop->unroll)
+ return;
- if (dump_file)
- fprintf (dump_file, "\n;; Considering unrolling loop stupidly\n");
+ if (dump_enabled_p ())
+ dump_printf (MSG_NOTE, "considering unrolling loop stupidly\n");
/* nunroll = total number of copies of the original loop body in
unrolled loop (i.e. if it is 2, we have to duplicate loop body once. */
if (targetm.loop_unroll_adjust)
nunroll = targetm.loop_unroll_adjust (nunroll, loop);
+ if (loop->unroll)
+ nunroll = loop->unroll;
+
/* Skip big loops. */
if (nunroll <= 1)
{
if (desc->simple_p && !desc->assumptions)
{
if (dump_file)
- fprintf (dump_file, ";; The loop is simple\n");
+ fprintf (dump_file, ";; Loop is simple\n");
return;
}
/* Read OMP SIMD related info. */
loop->safelen = streamer_read_hwi (ib);
+ loop->unroll = streamer_read_hwi (ib);
loop->dont_vectorize = streamer_read_hwi (ib);
loop->force_vectorize = streamer_read_hwi (ib);
loop->simduid = stream_read_tree (ib, data_in);
/* Write OMP SIMD related info. */
streamer_write_hwi (ob, loop->safelen);
+ streamer_write_hwi (ob, loop->unroll);
streamer_write_hwi (ob, loop->dont_vectorize);
streamer_write_hwi (ob, loop->force_vectorize);
stream_write_tree (ob, loop->simduid, true);
+2017-11-23 Eric Botcazou <ebotcazou@adacore.com>
+
+ * testsuite/gcc.dg/pr64277.c: Adjust scan.
+ * gcc.dg/tree-prof/unroll-1.c: Use detailed dump and adjust scan.
+ * gcc.dg/tree-ssa/cunroll-1.c: Adjust scan.
+ * gcc.dg/tree-ssa/cunroll-12.c: Likewise.
+ * gcc.dg/tree-ssa/cunroll-13.c: Likewise.
+ * gcc.dg/tree-ssa/cunroll-14.c: Likewise.
+ * gcc.dg/tree-ssa/cunroll-2.c: Likewise.
+ * gcc.dg/tree-ssa/cunroll-3.c: Likewise.
+ * gcc.dg/tree-ssa/cunroll-5.c: Likewise.
+ * gcc.dg/tree-ssa/loop-1.c: Likewise.
+ * gcc.dg/tree-ssa/loop-23.c: Likewise.
+ * gcc.dg/tree-ssa/pr61743-1.c: Likewise.
+ * gcc.dg/tree-ssa/pr61743-2.c: Likewise.
+ * gcc.dg/unroll-2.c (foo): Adjust message.
+ (foo2): Likewise.
+ * gcc.dg/unroll-3.c: Adjust scan.
+ * gcc.dg/unroll-4.c: Likewise.
+ * gcc.dg/unroll-5.c: Likewise.
+ * gcc.dg/unroll-7.c: Use detailed dump and adjust scan.
+ * gnat.dg/unroll1.ad[sb]: New test.
+ * gnat.dg/unroll2.ad[sb]: Likewise.
+ * gnat.dg/unroll3.ad[sb]: Likewise.
+
2017-11-23 Jan Hubicka <hubicka@ucw.cz>
* gcc.dg/ipa/inline-1.c: Update template.
/* PR tree-optimization/64277 */
/* { dg-do compile } */
/* { dg-options "-O3 -Wall -Werror -fdump-tree-cunroll-details" } */
+/* { dg-final { scan-tree-dump "loop with 4 iterations completely unrolled" "cunroll" } } */
/* { dg-final { scan-tree-dump "loop with 5 iterations completely unrolled" "cunroll" } } */
-/* { dg-final { scan-tree-dump "loop with 6 iterations completely unrolled" "cunroll" } } */
#if __SIZEOF_INT__ < 4
__extension__ typedef __INT32_TYPE__ int32_t;
-/* { dg-options "-O3 -fdump-rtl-loop2_unroll -funroll-loops -fno-peel-loops" } */
+/* { dg-options "-O3 -fdump-rtl-loop2_unroll-details -funroll-loops -fno-peel-loops" } */
void abort ();
int a[1000];
t();
return 0;
}
-/* { dg-final-use { scan-rtl-dump "Considering unrolling loop with constant number of iterations" "loop2_unroll" } } */
+/* { dg-final-use { scan-rtl-dump "considering unrolling loop with constant number of iterations" "loop2_unroll" } } */
a[i]=5;
}
/* Array bounds says the loop will not roll much. */
-/* { dg-final { scan-tree-dump "loop with 3 iterations completely unrolled" "cunrolli"} } */
+/* { dg-final { scan-tree-dump "loop with 2 iterations completely unrolled" "cunrolli"} } */
/* { dg-final { scan-tree-dump "Last iteration exit edge was proved true." "cunrolli"} } */
for (int i=0;a->a[i];i++)
a->a[i]++;
}
-/* { dg-final { scan-tree-dump-times "loop with 7 iterations completely unrolled" 1 "cunroll" } } */
+/* { dg-final { scan-tree-dump-times "loop with 6 iterations completely unrolled" 1 "cunroll" } } */
/* { dg-final { scan-tree-dump-not "Invalid sum" "cunroll" } } */
/* { dg-final { scan-tree-dump-times "Loop 1 iterates 123454 times" 1 "cunroll" } } */
/* { dg-final { scan-tree-dump-times "Last iteration exit edge was proved true" 1 "cunroll" } } */
/* { dg-final { scan-tree-dump-times "Exit condition of peeled iterations was eliminated" 1 "cunroll" } } */
-/* { dg-final { scan-tree-dump-times "loop with 7 iterations completely unrolled" 1 "cunroll" } } */
+/* { dg-final { scan-tree-dump-times "loop with 6 iterations completely unrolled" 1 "cunroll" } } */
/* { dg-final { scan-tree-dump-not "Invalid sum" "cunroll" } } */
for (int i=0;i<5 && a->a[i];i++)
a->a[i]++;
}
-/* { dg-final { scan-tree-dump-times "loop with 5 iterations completely unrolled" 1 "cunroll" } } */
+/* { dg-final { scan-tree-dump-times "loop with 4 iterations completely unrolled" 1 "cunroll" } } */
/* { dg-final { scan-tree-dump-not "Invalid sum" "cunroll" } } */
/* { dg-final { scan-tree-dump-times "Loop 1 iterates 4 times" 1 "cunroll" } } */
/* { dg-final { scan-tree-dump-times "Last iteration exit edge was proved true" 1 "cunroll" } } */
}
}
/* We are not able to get rid of the final conditional because the loop has two exits. */
-/* { dg-final { scan-tree-dump "loop with 2 iterations completely unrolled" "cunroll"} } */
+/* { dg-final { scan-tree-dump "loop with 1 iterations completely unrolled" "cunroll"} } */
}
/* If we start duplicating headers prior curoll, this loop will have 0 iterations. */
-/* { dg-final { scan-tree-dump "loop with 2 iterations completely unrolled" "cunrolli"} } */
+/* { dg-final { scan-tree-dump "loop with 1 iterations completely unrolled" "cunrolli"} } */
a[i]=5;
}
/* Basic testcase for complette unrolling. */
-/* { dg-final { scan-tree-dump "loop with 6 iterations completely unrolled" "cunroll"} } */
+/* { dg-final { scan-tree-dump "loop with 5 iterations completely unrolled" "cunroll"} } */
/* { dg-final { scan-tree-dump "Exit condition of peeled iterations was eliminated." "cunroll"} } */
/* { dg-final { scan-tree-dump "Last iteration exit edge was proved true." "cunroll"} } */
/* We should be able to find out that the loop iterates four times and unroll it completely. */
/* { dg-final { scan-tree-dump-times "Added canonical iv to loop 1, 4 iterations" 1 "ivcanon"} } */
-/* { dg-final { scan-tree-dump-times "loop with 5 iterations completely unrolled" 1 "cunroll"} } */
+/* { dg-final { scan-tree-dump-times "loop with 4 iterations completely unrolled" 1 "cunroll"} } */
/* { dg-final { scan-tree-dump-times "foo" 5 "optimized"} } */
/* Because hppa, ia64 and Windows targets include an external declaration
return sum;
}
-/* { dg-final { scan-tree-dump-times "loop with 4 iterations completely unrolled" 1 "cunroll" } } */
-
+/* { dg-final { scan-tree-dump-times "loop with 3 iterations completely unrolled" 1 "cunroll" } } */
return 0;\r
}\r
\r
-/* { dg-final { scan-tree-dump-times "loop with 4 iterations completely unrolled" 8 "cunroll" } } */\r
-/* { dg-final { scan-tree-dump-times "loop with 9 iterations completely unrolled" 2 "cunrolli" } } */\r
+/* { dg-final { scan-tree-dump-times "loop with 3 iterations completely unrolled" 8 "cunroll" } } */\r
+/* { dg-final { scan-tree-dump-times "loop with 8 iterations completely unrolled" 2 "cunrolli" } } */\r
return 0;\r
}\r
\r
-/* { dg-final { scan-tree-dump-times "loop with 4 iterations completely unrolled" 2 "cunroll" } } */\r
-/* { dg-final { scan-tree-dump-times "loop with 8 iterations completely unrolled" 2 "cunroll" } } */\r
+/* { dg-final { scan-tree-dump-times "loop with 3 iterations completely unrolled" 2 "cunroll" } } */\r
+/* { dg-final { scan-tree-dump-times "loop with 7 iterations completely unrolled" 2 "cunroll" } } */\r
{
int i;
bar();
- for (i = 0; i < 2; i++) /* { dg-message "note: loop with 3 iterations completely unrolled" } */
+ for (i = 0; i < 2; i++) /* { dg-message "note: loop with 2 iterations completely unrolled" } */
{
a[i]= b[i] + 1;
}
int foo2(void)
{
int i;
- for (i = 0; i < 2; i++) /* { dg-message "note: loop with 3 iterations completely unrolled" } */
+ for (i = 0; i < 2; i++) /* { dg-message "note: loop with 2 iterations completely unrolled" } */
{
a[i]= b[i] + 1;
}
return 1;
}
-/* { dg-final { scan-tree-dump-times "loop with 3 iterations completely unrolled" 1 "cunrolli" } } */
+/* { dg-final { scan-tree-dump-times "loop with 2 iterations completely unrolled" 1 "cunrolli" } } */
return 1;
}
-/* { dg-final { scan-tree-dump-times "loop with 3 iterations completely unrolled" 1 "cunrolli" } } */
+/* { dg-final { scan-tree-dump-times "loop with 2 iterations completely unrolled" 1 "cunrolli" } } */
return 1;
}
-/* { dg-final { scan-tree-dump-times "loop with 3 iterations completely unrolled" 1 "cunrolli" } } */
+/* { dg-final { scan-tree-dump-times "loop with 2 iterations completely unrolled" 1 "cunrolli" } } */
/* { dg-do compile } */
-/* { dg-options "-O2 -fdump-rtl-loop2_unroll -funroll-loops" } */
+/* { dg-options "-O2 -fdump-rtl-loop2_unroll-details -funroll-loops" } */
/* { dg-require-effective-target int32plus } */
extern int *a;
/* { dg-final { scan-rtl-dump "number of iterations: .const_int 999999" "loop2_unroll" } } */
/* { dg-final { scan-rtl-dump "upper bound: 999999" "loop2_unroll" } } */
/* { dg-final { scan-rtl-dump "realistic bound: 999999" "loop2_unroll" } } */
-/* { dg-final { scan-rtl-dump "Considering unrolling loop with constant number of iterations" "loop2_unroll" } } */
+/* { dg-final { scan-rtl-dump "considering unrolling loop with constant number of iterations" "loop2_unroll" } } */
/* { dg-final { scan-rtl-dump-not "Invalid sum" "loop2_unroll" } } */
--- /dev/null
+-- { dg-do compile }
+-- { dg-options "-O2 -funroll-all-loops -fdump-rtl-loop2_unroll-details -fdump-tree-cunrolli-details" }
+
+package body Unroll1 is
+
+ function "+" (X, Y : Sarray) return Sarray is
+ R : Sarray;
+ begin
+ for I in Sarray'Range loop
+ pragma Loop_Optimize (No_Unroll);
+ R(I) := X(I) + Y(I);
+ end loop;
+ return R;
+ end;
+
+ procedure Add (X, Y : Sarray; R : out Sarray) is
+ begin
+ for I in Sarray'Range loop
+ pragma Loop_Optimize (No_Unroll);
+ R(I) := X(I) + Y(I);
+ end loop;
+ end;
+
+end Unroll1;
+
+-- { dg-final { scan-tree-dump-times "Not unrolling loop .: user didn't want it unrolled completely" 2 "cunrolli" } } */
+-- { dg-final { scan-rtl-dump-times "Not unrolling loop, user didn't want it unrolled" 2 "loop2_unroll" } } */
--- /dev/null
+package Unroll1 is
+
+ type Sarray is array (1 .. 4) of Float;
+ for Sarray'Alignment use 16;
+
+ function "+" (X, Y : Sarray) return Sarray;
+ procedure Add (X, Y : Sarray; R : out Sarray);
+
+end Unroll1;
--- /dev/null
+-- { dg-do compile }
+-- { dg-options "-O2 -fdump-tree-cunrolli-details" }
+
+package body Unroll2 is
+
+ function "+" (X, Y : Sarray) return Sarray is
+ R : Sarray;
+ begin
+ for I in Sarray'Range loop
+ pragma Loop_Optimize (Unroll);
+ R(I) := X(I) + Y(I);
+ end loop;
+ return R;
+ end;
+
+ procedure Add (X, Y : Sarray; R : out Sarray) is
+ begin
+ for I in Sarray'Range loop
+ pragma Loop_Optimize (Unroll);
+ R(I) := X(I) + Y(I);
+ end loop;
+ end;
+
+end Unroll2;
+
+-- { dg-final { scan-tree-dump-times "note: loop with 3 iterations completely unrolled" 2 "cunrolli" } } */
--- /dev/null
+package Unroll2 is
+
+ type Sarray is array (1 .. 4) of Float;
+ for Sarray'Alignment use 16;
+
+ function "+" (X, Y : Sarray) return Sarray;
+ procedure Add (X, Y : Sarray; R : out Sarray);
+
+end Unroll2;
--- /dev/null
+-- { dg-do compile }
+-- { dg-options "-O -fdump-tree-cunroll-details" }
+
+package body Unroll3 is
+
+ function "+" (X, Y : Sarray) return Sarray is
+ R : Sarray;
+ begin
+ for I in Sarray'Range loop
+ pragma Loop_Optimize (Unroll);
+ R(I) := X(I) + Y(I);
+ end loop;
+ return R;
+ end;
+
+ procedure Add (X, Y : Sarray; R : out Sarray) is
+ begin
+ for I in Sarray'Range loop
+ pragma Loop_Optimize (Unroll);
+ R(I) := X(I) + Y(I);
+ end loop;
+ end;
+
+end Unroll3;
+
+-- { dg-final { scan-tree-dump-times "note: loop with 3 iterations completely unrolled" 2 "cunroll" } } */
--- /dev/null
+package Unroll3 is
+
+ type Sarray is array (1 .. 4) of Float;
+ for Sarray'Alignment use 16;
+
+ function "+" (X, Y : Sarray) return Sarray;
+ procedure Add (X, Y : Sarray; R : out Sarray);
+
+end Unroll3;
case annot_expr_ivdep_kind:
loop->safelen = INT_MAX;
break;
+ case annot_expr_unroll_kind:
+ loop->unroll
+ = (unsigned short) tree_to_shwi (gimple_call_arg (stmt, 2));
+ cfun->has_unroll = true;
+ break;
case annot_expr_no_vector_kind:
loop->dont_vectorize = true;
break;
switch ((annot_expr_kind) tree_to_shwi (gimple_call_arg (stmt, 1)))
{
case annot_expr_ivdep_kind:
+ case annot_expr_unroll_kind:
case annot_expr_no_vector_kind:
case annot_expr_vector_kind:
break;
fprintf (file, ", estimate = ");
print_decu (loop->nb_iterations_estimate, file);
}
+ if (loop->unroll)
+ fprintf (file, ", unroll = %d", loop->unroll);
fprintf (file, ")\n");
/* Print loop's body. */
enum annot_expr_kind {
annot_expr_ivdep_kind,
+ annot_expr_unroll_kind,
annot_expr_no_vector_kind,
annot_expr_vector_kind,
annot_expr_parallel_kind,
flow_loop_tree_node_add (dest_parent, dest_loop);
dest_loop->safelen = src_loop->safelen;
+ if (src_loop->unroll)
+ {
+ dest_loop->unroll = src_loop->unroll;
+ cfun->has_unroll = true;
+ }
dest_loop->dont_vectorize = src_loop->dont_vectorize;
if (src_loop->force_vectorize)
{
case annot_expr_ivdep_kind:
pp_string (pp, ", ivdep");
break;
+ case annot_expr_unroll_kind:
+ pp_printf (pp, ", unroll %d",
+ (int) TREE_INT_CST_LOW (TREE_OPERAND (node, 2)));
+ break;
case annot_expr_no_vector_kind:
pp_string (pp, ", no-vector");
break;
HOST_WIDE_INT maxiter,
location_t locus)
{
- unsigned HOST_WIDE_INT n_unroll = 0, ninsns, unr_insns;
- struct loop_size size;
+ unsigned HOST_WIDE_INT n_unroll = 0;
bool n_unroll_found = false;
edge edge_to_cancel = NULL;
- dump_flags_t report_flags = MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS;
/* See if we proved number of iterations to be low constant.
if (!n_unroll_found)
return false;
- if (n_unroll > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES))
+ if (!loop->unroll
+ && n_unroll > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES))
{
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file, "Not unrolling loop %d "
if (n_unroll)
{
- bool large;
if (ul == UL_SINGLE_ITER)
return false;
- /* EXIT can be removed only if we are sure it passes first N_UNROLL
- iterations. */
- bool remove_exit = (exit && niter
- && TREE_CODE (niter) == INTEGER_CST
- && wi::leu_p (n_unroll, wi::to_widest (niter)));
-
- large = tree_estimate_loop_size
- (loop, remove_exit ? exit : NULL, edge_to_cancel, &size,
- PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS));
- ninsns = size.overall;
- if (large)
+ if (loop->unroll)
{
- if (dump_file && (dump_flags & TDF_DETAILS))
- fprintf (dump_file, "Not unrolling loop %d: it is too large.\n",
- loop->num);
- return false;
+ /* If the unrolling factor is too large, bail out. */
+ if (n_unroll > (unsigned)loop->unroll)
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file,
+ "Not unrolling loop %d: "
+ "user didn't want it unrolled completely.\n",
+ loop->num);
+ return false;
+ }
}
-
- unr_insns = estimated_unrolled_size (&size, n_unroll);
- if (dump_file && (dump_flags & TDF_DETAILS))
+ else
{
- fprintf (dump_file, " Loop size: %d\n", (int) ninsns);
- fprintf (dump_file, " Estimated size after unrolling: %d\n",
- (int) unr_insns);
- }
+ struct loop_size size;
+ /* EXIT can be removed only if we are sure it passes first N_UNROLL
+ iterations. */
+ bool remove_exit = (exit && niter
+ && TREE_CODE (niter) == INTEGER_CST
+ && wi::leu_p (n_unroll, wi::to_widest (niter)));
+ bool large
+ = tree_estimate_loop_size
+ (loop, remove_exit ? exit : NULL, edge_to_cancel, &size,
+ PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS));
+ if (large)
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "Not unrolling loop %d: it is too large.\n",
+ loop->num);
+ return false;
+ }
- /* If the code is going to shrink, we don't need to be extra cautious
- on guessing if the unrolling is going to be profitable. */
- if (unr_insns
- /* If there is IV variable that will become constant, we save
- one instruction in the loop prologue we do not account
- otherwise. */
- <= ninsns + (size.constant_iv != false))
- ;
- /* We unroll only inner loops, because we do not consider it profitable
- otheriwse. We still can cancel loopback edge of not rolling loop;
- this is always a good idea. */
- else if (ul == UL_NO_GROWTH)
- {
- if (dump_file && (dump_flags & TDF_DETAILS))
- fprintf (dump_file, "Not unrolling loop %d: size would grow.\n",
- loop->num);
- return false;
- }
- /* Outer loops tend to be less interesting candidates for complete
- unrolling unless we can do a lot of propagation into the inner loop
- body. For now we disable outer loop unrolling when the code would
- grow. */
- else if (loop->inner)
- {
- if (dump_file && (dump_flags & TDF_DETAILS))
- fprintf (dump_file, "Not unrolling loop %d: "
- "it is not innermost and code would grow.\n",
- loop->num);
- return false;
- }
- /* If there is call on a hot path through the loop, then
- there is most probably not much to optimize. */
- else if (size.num_non_pure_calls_on_hot_path)
- {
- if (dump_file && (dump_flags & TDF_DETAILS))
- fprintf (dump_file, "Not unrolling loop %d: "
- "contains call and code would grow.\n",
- loop->num);
- return false;
- }
- /* If there is pure/const call in the function, then we
- can still optimize the unrolled loop body if it contains
- some other interesting code than the calls and code
- storing or cumulating the return value. */
- else if (size.num_pure_calls_on_hot_path
- /* One IV increment, one test, one ivtmp store
- and one useful stmt. That is about minimal loop
- doing pure call. */
- && (size.non_call_stmts_on_hot_path
- <= 3 + size.num_pure_calls_on_hot_path))
- {
- if (dump_file && (dump_flags & TDF_DETAILS))
- fprintf (dump_file, "Not unrolling loop %d: "
- "contains just pure calls and code would grow.\n",
- loop->num);
- return false;
- }
- /* Complete unrolling is a major win when control flow is removed and
- one big basic block is created. If the loop contains control flow
- the optimization may still be a win because of eliminating the loop
- overhead but it also may blow the branch predictor tables.
- Limit number of branches on the hot path through the peeled
- sequence. */
- else if (size.num_branches_on_hot_path * (int)n_unroll
- > PARAM_VALUE (PARAM_MAX_PEEL_BRANCHES))
- {
+ unsigned HOST_WIDE_INT ninsns = size.overall;
+ unsigned HOST_WIDE_INT unr_insns
+ = estimated_unrolled_size (&size, n_unroll);
if (dump_file && (dump_flags & TDF_DETAILS))
- fprintf (dump_file, "Not unrolling loop %d: "
- " number of branches on hot path in the unrolled sequence"
- " reach --param max-peel-branches limit.\n",
- loop->num);
- return false;
- }
- else if (unr_insns
- > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS))
- {
- if (dump_file && (dump_flags & TDF_DETAILS))
- fprintf (dump_file, "Not unrolling loop %d: "
- "(--param max-completely-peeled-insns limit reached).\n",
- loop->num);
- return false;
+ {
+ fprintf (dump_file, " Loop size: %d\n", (int) ninsns);
+ fprintf (dump_file, " Estimated size after unrolling: %d\n",
+ (int) unr_insns);
+ }
+
+ /* If the code is going to shrink, we don't need to be extra
+ cautious on guessing if the unrolling is going to be
+ profitable. */
+ if (unr_insns
+ /* If there is IV variable that will become constant, we
+ save one instruction in the loop prologue we do not
+ account otherwise. */
+ <= ninsns + (size.constant_iv != false))
+ ;
+ /* We unroll only inner loops, because we do not consider it
+ profitable otheriwse. We still can cancel loopback edge
+ of not rolling loop; this is always a good idea. */
+ else if (ul == UL_NO_GROWTH)
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "Not unrolling loop %d: size would grow.\n",
+ loop->num);
+ return false;
+ }
+ /* Outer loops tend to be less interesting candidates for
+ complete unrolling unless we can do a lot of propagation
+ into the inner loop body. For now we disable outer loop
+ unrolling when the code would grow. */
+ else if (loop->inner)
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "Not unrolling loop %d: "
+ "it is not innermost and code would grow.\n",
+ loop->num);
+ return false;
+ }
+ /* If there is call on a hot path through the loop, then
+ there is most probably not much to optimize. */
+ else if (size.num_non_pure_calls_on_hot_path)
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "Not unrolling loop %d: "
+ "contains call and code would grow.\n",
+ loop->num);
+ return false;
+ }
+ /* If there is pure/const call in the function, then we can
+ still optimize the unrolled loop body if it contains some
+ other interesting code than the calls and code storing or
+ cumulating the return value. */
+ else if (size.num_pure_calls_on_hot_path
+ /* One IV increment, one test, one ivtmp store and
+ one useful stmt. That is about minimal loop
+ doing pure call. */
+ && (size.non_call_stmts_on_hot_path
+ <= 3 + size.num_pure_calls_on_hot_path))
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "Not unrolling loop %d: "
+ "contains just pure calls and code would grow.\n",
+ loop->num);
+ return false;
+ }
+ /* Complete unrolling is major win when control flow is
+ removed and one big basic block is created. If the loop
+ contains control flow the optimization may still be a win
+ because of eliminating the loop overhead but it also may
+ blow the branch predictor tables. Limit number of
+ branches on the hot path through the peeled sequence. */
+ else if (size.num_branches_on_hot_path * (int)n_unroll
+ > PARAM_VALUE (PARAM_MAX_PEEL_BRANCHES))
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "Not unrolling loop %d: "
+ "number of branches on hot path in the unrolled "
+ "sequence reaches --param max-peel-branches limit.\n",
+ loop->num);
+ return false;
+ }
+ else if (unr_insns
+ > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS))
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "Not unrolling loop %d: "
+ "number of insns in the unrolled sequence reaches "
+ "--param max-completely-peeled-insns limit.\n",
+ loop->num);
+ return false;
+ }
}
- if (!n_unroll)
- dump_printf_loc (report_flags, locus,
- "loop turned into non-loop; it never loops.\n");
initialize_original_copy_tables ();
auto_sbitmap wont_exit (n_unroll + 1);
else
gimple_cond_make_true (cond);
update_stmt (cond);
- /* Do not remove the path. Doing so may remove outer loop
- and confuse bookkeeping code in tree_unroll_loops_completelly. */
+ /* Do not remove the path, as doing so may remove outer loop and
+ confuse bookkeeping code in tree_unroll_loops_completely. */
}
/* Store the loop for later unlooping and exit removal. */
{
dump_printf_loc (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS, locus,
"loop with %d iterations completely unrolled",
- (int) (n_unroll + 1));
+ (int) n_unroll);
if (loop->header->count.initialized_p ())
dump_printf (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS,
" (header execution count %d)",
struct loop_size size;
int peeled_size;
- if (!flag_peel_loops || PARAM_VALUE (PARAM_MAX_PEEL_TIMES) <= 0
+ if (!flag_peel_loops
+ || PARAM_VALUE (PARAM_MAX_PEEL_TIMES) <= 0
|| !peeled_loops)
return false;
return false;
}
+ /* We don't peel loops that will be unrolled as this can duplicate a
+ loop more times than the user requested. */
+ if (loop->unroll)
+ {
+ if (dump_file)
+ fprintf (dump_file, "Not peeling: user didn't want it peeled.\n");
+ return false;
+ }
+
/* Peel only innermost loops.
While the code is perfectly capable of peeling non-innermost loops,
the heuristics would probably need some improvements. */
if (loop->inner)
{
if (dump_file)
- fprintf (dump_file, "Not peeling: outer loop\n");
+ fprintf (dump_file, "Not peeling: outer loop\n");
return false;
}
if (!optimize_loop_for_speed_p (loop))
{
if (dump_file)
- fprintf (dump_file, "Not peeling: cold loop\n");
+ fprintf (dump_file, "Not peeling: cold loop\n");
return false;
}
if (maxiter >= 0 && maxiter <= npeel)
{
if (dump_file)
- fprintf (dump_file, "Not peeling: upper bound is known so can "
+ fprintf (dump_file, "Not peeling: upper bound is known so can "
"unroll completely\n");
return false;
}
if (npeel > PARAM_VALUE (PARAM_MAX_PEEL_TIMES) - 1)
{
if (dump_file)
- fprintf (dump_file, "Not peeling: rolls too much "
+ fprintf (dump_file, "Not peeling: rolls too much "
"(%i + 1 > --param max-peel-times)\n", (int) npeel);
return false;
}
> PARAM_VALUE (PARAM_MAX_PEELED_INSNS))
{
if (dump_file)
- fprintf (dump_file, "Not peeling: peeled sequence size is too large "
+ fprintf (dump_file, "Not peeling: peeled sequence size is too large "
"(%i insns > --param max-peel-insns)", peeled_size);
return false;
}
if (!loop_father)
return false;
- if (may_increase_size && optimize_loop_nest_for_speed_p (loop)
+ if (loop->unroll > 1)
+ ul = UL_ALL;
+ else if (may_increase_size && optimize_loop_nest_for_speed_p (loop)
/* Unroll outermost loops only if asked to do so or they do
not cause code growth. */
&& (unroll_outer || loop_outer (loop_father)))
MAY_INCREASE_SIZE is true, perform the unrolling only if the
size of the code does not increase. */
-unsigned int
+static unsigned int
tree_unroll_loops_completely (bool may_increase_size, bool unroll_outer)
{
bitmap father_bbs = BITMAP_ALLOC (NULL);
re-peeling the same loop multiple times. */
if (flag_peel_loops)
peeled_loops = BITMAP_ALLOC (NULL);
- int val = tree_unroll_loops_completely (flag_unroll_loops
- || flag_peel_loops
- || optimize >= 3, true);
+ unsigned int val = tree_unroll_loops_completely (flag_unroll_loops
+ || flag_peel_loops
+ || optimize >= 3, true);
if (peeled_loops)
{
BITMAP_FREE (peeled_loops);
{
unsigned ret = 0;
- loop_optimizer_init (LOOPS_NORMAL
- | LOOPS_HAVE_RECORDED_EXITS);
+ loop_optimizer_init (LOOPS_NORMAL | LOOPS_HAVE_RECORDED_EXITS);
if (number_of_loops (fun) > 1)
{
scev_initialize ();
/* ANNOTATE_EXPR.
Operand 0 is the expression to be annotated.
- Operand 1 is the annotation kind. */
-DEFTREECODE (ANNOTATE_EXPR, "annotate_expr", tcc_expression, 2)
+ Operand 1 is the annotation kind.
+ Operand 2 is additional data. */
+DEFTREECODE (ANNOTATE_EXPR, "annotate_expr", tcc_expression, 3)
/* Cilk spawn statement
Operand 0 is the CALL_EXPR. */