From: Eric Botcazou Date: Thu, 23 Nov 2017 16:36:28 +0000 (+0000) Subject: generic.texi (ANNOTATE_EXPR): Document 3rd operand. X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=ac9effeda3bb29c0adcd8834b45b6e5613413049;p=gcc.git generic.texi (ANNOTATE_EXPR): Document 3rd operand. * doc/generic.texi (ANNOTATE_EXPR): Document 3rd operand. * cfgloop.h (struct loop): Add unroll field. * function.h (struct function): Add has_unroll bitfield. * gimplify.c (gimple_boolify) : Deal with unroll kind. (gimplify_expr) : Propagate 3rd operand. * loop-init.c (pass_loop2::gate): Return true if cfun->has_unroll. (pass_rtl_unroll_loops::gate): Likewise. * loop-unroll.c (decide_unrolling): Tweak note message. Skip loops for which loop->unroll==1. (decide_unroll_constant_iterations): Use note for consistency and take loop->unroll into account. Return early if loop->unroll is set. Fix thinko in existing test. (decide_unroll_runtime_iterations): Use note for consistency and take loop->unroll into account. (decide_unroll_stupid): Likewise. * lto-streamer-in.c (input_cfg): Read loop->unroll. * lto-streamer-out.c (output_cfg): Write loop->unroll. * tree-cfg.c (replace_loop_annotate_in_block) : New case. (replace_loop_annotate) : Likewise. (print_loop): Print loop->unroll if set. * tree-core.h (enum annot_expr_kind): Add annot_expr_unroll_kind. * tree-inline.c (copy_loops): Copy unroll and set cfun->has_unroll. * tree-pretty-print.c (dump_generic_node) : New case. * tree-ssa-loop-ivcanon.c (try_unroll_loop_completely): Bail out if loop->unroll is set and smaller than the trip count. Otherwise bypass entirely the heuristics if loop->unroll is set. Remove dead note. Fix off-by-one bug in other note. (try_peel_loop): Bail out if loop->unroll is set. Fix formatting. (tree_unroll_loops_completely_1): Force unrolling if loop->unroll is greater than 1. (tree_unroll_loops_completely): Make static. (pass_complete_unroll::execute): Use correct type for variable. (pass_complete_unrolli::execute): Fix formatting. * tree.def (ANNOTATE_EXPR): Add 3rd operand. ada/ * gcc-interface/trans.c (gnat_gimplify_stmt) : Pass 3rd operand to ANNOTATE_EXPR and also pass unrolling hints. c/ * c-parser.c (c_parser_while_statement): Pass 3rd operand to ANNOTATE_EXPR. (c_parser_do_statement): Likewise. (c_parser_for_statement): Likewise. cp/ * pt.c (tsubst_expr) : Recurse on 3rd operand. * semantics.c (finish_while_stmt_cond): Pass 3rd operand to ANNOTATE_EXPR. (finish_do_stmt): Likewise. (finish_for_cond): Likewise. fortran/ * trans-stmt.c (gfc_trans_forall_loop): Pass 3rd operand to ANNOTATE_EXPR. From-SVN: r255106 --- diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 2cc6402df26..98932b4b7b3 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,43 @@ +2017-11-23 Mike Stump + Eric Botcazou + + * doc/generic.texi (ANNOTATE_EXPR): Document 3rd operand. + * cfgloop.h (struct loop): Add unroll field. + * function.h (struct function): Add has_unroll bitfield. + * gimplify.c (gimple_boolify) : Deal with unroll kind. + (gimplify_expr) : Propagate 3rd operand. + * loop-init.c (pass_loop2::gate): Return true if cfun->has_unroll. + (pass_rtl_unroll_loops::gate): Likewise. + * loop-unroll.c (decide_unrolling): Tweak note message. Skip loops + for which loop->unroll==1. + (decide_unroll_constant_iterations): Use note for consistency and + take loop->unroll into account. Return early if loop->unroll is set. + Fix thinko in existing test. + (decide_unroll_runtime_iterations): Use note for consistency and + take loop->unroll into account. + (decide_unroll_stupid): Likewise. + * lto-streamer-in.c (input_cfg): Read loop->unroll. + * lto-streamer-out.c (output_cfg): Write loop->unroll. + * tree-cfg.c (replace_loop_annotate_in_block) : + New case. + (replace_loop_annotate) : Likewise. + (print_loop): Print loop->unroll if set. + * tree-core.h (enum annot_expr_kind): Add annot_expr_unroll_kind. + * tree-inline.c (copy_loops): Copy unroll and set cfun->has_unroll. + * tree-pretty-print.c (dump_generic_node) : + New case. + * tree-ssa-loop-ivcanon.c (try_unroll_loop_completely): Bail out if + loop->unroll is set and smaller than the trip count. Otherwise bypass + entirely the heuristics if loop->unroll is set. Remove dead note. + Fix off-by-one bug in other note. + (try_peel_loop): Bail out if loop->unroll is set. Fix formatting. + (tree_unroll_loops_completely_1): Force unrolling if loop->unroll + is greater than 1. + (tree_unroll_loops_completely): Make static. + (pass_complete_unroll::execute): Use correct type for variable. + (pass_complete_unrolli::execute): Fix formatting. + * tree.def (ANNOTATE_EXPR): Add 3rd operand. + 2017-11-23 Sergey Shalnov * config/i386/i386.h (TARGET_PREFER_AVX256): Also diff --git a/gcc/ada/ChangeLog b/gcc/ada/ChangeLog index 1d829f2040a..5a40cc14e5a 100644 --- a/gcc/ada/ChangeLog +++ b/gcc/ada/ChangeLog @@ -1,3 +1,9 @@ +2017-11-23 Mike Stump + Eric Botcazou + + * gcc-interface/trans.c (gnat_gimplify_stmt) : Pass 3rd + operand to ANNOTATE_EXPR and also pass unrolling hints. + 2017-11-20 Eric Botcazou PR ada/83016 diff --git a/gcc/ada/gcc-interface/trans.c b/gcc/ada/gcc-interface/trans.c index 25f12371999..2544b4c3498 100644 --- a/gcc/ada/gcc-interface/trans.c +++ b/gcc/ada/gcc-interface/trans.c @@ -8506,17 +8506,30 @@ gnat_gimplify_stmt (tree *stmt_p) { /* Deal with the optimization hints. */ if (LOOP_STMT_IVDEP (stmt)) - gnu_cond = build2 (ANNOTATE_EXPR, TREE_TYPE (gnu_cond), gnu_cond, + gnu_cond = build3 (ANNOTATE_EXPR, TREE_TYPE (gnu_cond), gnu_cond, build_int_cst (integer_type_node, - annot_expr_ivdep_kind)); + annot_expr_ivdep_kind), + integer_zero_node); + if (LOOP_STMT_NO_UNROLL (stmt)) + gnu_cond = build3 (ANNOTATE_EXPR, TREE_TYPE (gnu_cond), gnu_cond, + build_int_cst (integer_type_node, + annot_expr_unroll_kind), + integer_one_node); + if (LOOP_STMT_UNROLL (stmt)) + gnu_cond = build3 (ANNOTATE_EXPR, TREE_TYPE (gnu_cond), gnu_cond, + build_int_cst (integer_type_node, + annot_expr_unroll_kind), + build_int_cst (NULL_TREE, USHRT_MAX)); if (LOOP_STMT_NO_VECTOR (stmt)) - gnu_cond = build2 (ANNOTATE_EXPR, TREE_TYPE (gnu_cond), gnu_cond, + gnu_cond = build3 (ANNOTATE_EXPR, TREE_TYPE (gnu_cond), gnu_cond, build_int_cst (integer_type_node, - annot_expr_no_vector_kind)); + annot_expr_no_vector_kind), + integer_zero_node); if (LOOP_STMT_VECTOR (stmt)) - gnu_cond = build2 (ANNOTATE_EXPR, TREE_TYPE (gnu_cond), gnu_cond, + gnu_cond = build3 (ANNOTATE_EXPR, TREE_TYPE (gnu_cond), gnu_cond, build_int_cst (integer_type_node, - annot_expr_vector_kind)); + annot_expr_vector_kind), + integer_zero_node); gnu_cond = build3 (COND_EXPR, void_type_node, gnu_cond, NULL_TREE, diff --git a/gcc/c/ChangeLog b/gcc/c/ChangeLog index 344eb262a1b..1d71829d919 100644 --- a/gcc/c/ChangeLog +++ b/gcc/c/ChangeLog @@ -1,3 +1,11 @@ +2017-11-23 Mike Stump + Eric Botcazou + + * c-parser.c (c_parser_while_statement): Pass 3rd operand to + ANNOTATE_EXPR. + (c_parser_do_statement): Likewise. + (c_parser_for_statement): Likewise. + 2017-11-22 David Malcolm PR c++/62170 diff --git a/gcc/c/c-parser.c b/gcc/c/c-parser.c index 4afa9ceb5e9..7eb0f562518 100644 --- a/gcc/c/c-parser.c +++ b/gcc/c/c-parser.c @@ -6055,9 +6055,10 @@ c_parser_while_statement (c_parser *parser, bool ivdep, bool *if_p) "%<_Cilk_spawn%> statement cannot be used as a condition for while statement")) cond = error_mark_node; if (ivdep && cond != error_mark_node) - cond = build2 (ANNOTATE_EXPR, TREE_TYPE (cond), cond, + cond = build3 (ANNOTATE_EXPR, TREE_TYPE (cond), cond, build_int_cst (integer_type_node, - annot_expr_ivdep_kind)); + annot_expr_ivdep_kind), + integer_zero_node); save_break = c_break_label; c_break_label = NULL_TREE; save_cont = c_cont_label; @@ -6120,9 +6121,10 @@ c_parser_do_statement (c_parser *parser, bool ivdep) "%<_Cilk_spawn%> statement cannot be used as a condition for a do-while statement")) cond = error_mark_node; if (ivdep && cond != error_mark_node) - cond = build2 (ANNOTATE_EXPR, TREE_TYPE (cond), cond, + cond = build3 (ANNOTATE_EXPR, TREE_TYPE (cond), cond, build_int_cst (integer_type_node, - annot_expr_ivdep_kind)); + annot_expr_ivdep_kind), + integer_zero_node); if (!c_parser_require (parser, CPP_SEMICOLON, "expected %<;%>")) c_parser_skip_to_end_of_block_or_statement (parser); c_finish_loop (loc, cond, NULL, body, new_break, new_cont, false); @@ -6327,9 +6329,10 @@ c_parser_for_statement (c_parser *parser, bool ivdep, bool *if_p) "expected %<;%>"); } if (ivdep && cond != error_mark_node) - cond = build2 (ANNOTATE_EXPR, TREE_TYPE (cond), cond, + cond = build3 (ANNOTATE_EXPR, TREE_TYPE (cond), cond, build_int_cst (integer_type_node, - annot_expr_ivdep_kind)); + annot_expr_ivdep_kind), + integer_zero_node); } /* Parse the increment expression (the third expression in a for-statement). In the case of a foreach-statement, this is diff --git a/gcc/cfgloop.h b/gcc/cfgloop.h index 0b164e97b1f..be2ba8cf1a1 100644 --- a/gcc/cfgloop.h +++ b/gcc/cfgloop.h @@ -221,6 +221,11 @@ struct GTY ((chain_next ("%h.next"))) loop { /* True if the loop is part of an oacc kernels region. */ unsigned in_oacc_kernels_region : 1; + /* The number of times to unroll the loop. 0, means no information + given, just do what we always do. A value of 1, means don't unroll + the loop. */ + unsigned short unroll; + /* For SIMD loops, this is a unique identifier of the loop, referenced by IFN_GOMP_SIMD_VF, IFN_GOMP_SIMD_LANE and IFN_GOMP_SIMD_LAST_LANE builtins. */ diff --git a/gcc/cp/ChangeLog b/gcc/cp/ChangeLog index 59e890f6efd..ac623f08f40 100644 --- a/gcc/cp/ChangeLog +++ b/gcc/cp/ChangeLog @@ -1,3 +1,12 @@ +2017-11-23 Mike Stump + Eric Botcazou + + * pt.c (tsubst_expr) : Recurse on 3rd operand. + * semantics.c (finish_while_stmt_cond): Pass 3rd operand to + ANNOTATE_EXPR. + (finish_do_stmt): Likewise. + (finish_for_cond): Likewise. + 2017-11-22 Jakub Jelinek PR c++/82401 diff --git a/gcc/cp/pt.c b/gcc/cp/pt.c index 2b70969999e..8bc9c221da7 100644 --- a/gcc/cp/pt.c +++ b/gcc/cp/pt.c @@ -16728,8 +16728,10 @@ tsubst_expr (tree t, tree args, tsubst_flags_t complain, tree in_decl, case ANNOTATE_EXPR: tmp = RECUR (TREE_OPERAND (t, 0)); - RETURN (build2_loc (EXPR_LOCATION (t), ANNOTATE_EXPR, - TREE_TYPE (tmp), tmp, RECUR (TREE_OPERAND (t, 1)))); + RETURN (build3_loc (EXPR_LOCATION (t), ANNOTATE_EXPR, + TREE_TYPE (tmp), tmp, + RECUR (TREE_OPERAND (t, 1)), + RECUR (TREE_OPERAND (t, 2)))); default: gcc_assert (!STATEMENT_CODE_P (TREE_CODE (t))); diff --git a/gcc/cp/semantics.c b/gcc/cp/semantics.c index 51489d17ad5..c316ad29409 100644 --- a/gcc/cp/semantics.c +++ b/gcc/cp/semantics.c @@ -812,11 +812,12 @@ finish_while_stmt_cond (tree cond, tree while_stmt, bool ivdep) finish_cond (&WHILE_COND (while_stmt), cond); begin_maybe_infinite_loop (cond); if (ivdep && cond != error_mark_node) - WHILE_COND (while_stmt) = build2 (ANNOTATE_EXPR, + WHILE_COND (while_stmt) = build3 (ANNOTATE_EXPR, TREE_TYPE (WHILE_COND (while_stmt)), WHILE_COND (while_stmt), build_int_cst (integer_type_node, - annot_expr_ivdep_kind)); + annot_expr_ivdep_kind), + integer_zero_node); simplify_loop_decl_cond (&WHILE_COND (while_stmt), WHILE_BODY (while_stmt)); } @@ -870,8 +871,9 @@ finish_do_stmt (tree cond, tree do_stmt, bool ivdep) cond = maybe_convert_cond (cond); end_maybe_infinite_loop (cond); if (ivdep && cond != error_mark_node) - cond = build2 (ANNOTATE_EXPR, TREE_TYPE (cond), cond, - build_int_cst (integer_type_node, annot_expr_ivdep_kind)); + cond = build3 (ANNOTATE_EXPR, TREE_TYPE (cond), cond, + build_int_cst (integer_type_node, annot_expr_ivdep_kind), + integer_zero_node); DO_COND (do_stmt) = cond; } @@ -990,11 +992,12 @@ finish_for_cond (tree cond, tree for_stmt, bool ivdep) finish_cond (&FOR_COND (for_stmt), cond); begin_maybe_infinite_loop (cond); if (ivdep && cond != error_mark_node) - FOR_COND (for_stmt) = build2 (ANNOTATE_EXPR, + FOR_COND (for_stmt) = build3 (ANNOTATE_EXPR, TREE_TYPE (FOR_COND (for_stmt)), FOR_COND (for_stmt), build_int_cst (integer_type_node, - annot_expr_ivdep_kind)); + annot_expr_ivdep_kind), + integer_zero_node); simplify_loop_decl_cond (&FOR_COND (for_stmt), FOR_BODY (for_stmt)); } diff --git a/gcc/doc/generic.texi b/gcc/doc/generic.texi index b03970f74cc..1a37615dcc0 100644 --- a/gcc/doc/generic.texi +++ b/gcc/doc/generic.texi @@ -1695,7 +1695,7 @@ its sole argument yields the representation for @code{ap}. @item ANNOTATE_EXPR This node is used to attach markers to an expression. The first operand is the annotated expression, the second is an @code{INTEGER_CST} with -a value from @code{enum annot_expr_kind}. +a value from @code{enum annot_expr_kind}, the third is an @code{INTEGER_CST}. @end table diff --git a/gcc/fortran/ChangeLog b/gcc/fortran/ChangeLog index 036bf7b2994..a05bf83ee67 100644 --- a/gcc/fortran/ChangeLog +++ b/gcc/fortran/ChangeLog @@ -1,3 +1,9 @@ +2017-11-23 Mike Stump + Eric Botcazou + + * trans-stmt.c (gfc_trans_forall_loop): Pass 3rd operand to + ANNOTATE_EXPR. + 2017-11-23 Paul Thomas PR fortran/82814 diff --git a/gcc/fortran/trans-stmt.c b/gcc/fortran/trans-stmt.c index db913bfe515..96ca2175674 100644 --- a/gcc/fortran/trans-stmt.c +++ b/gcc/fortran/trans-stmt.c @@ -3459,9 +3459,10 @@ gfc_trans_forall_loop (forall_info *forall_tmp, tree body, cond = fold_build2_loc (input_location, LE_EXPR, logical_type_node, count, build_int_cst (TREE_TYPE (count), 0)); if (forall_tmp->do_concurrent) - cond = build2 (ANNOTATE_EXPR, TREE_TYPE (cond), cond, + cond = build3 (ANNOTATE_EXPR, TREE_TYPE (cond), cond, build_int_cst (integer_type_node, - annot_expr_parallel_kind)); + annot_expr_parallel_kind), + integer_zero_node); tmp = build1_v (GOTO_EXPR, exit_label); tmp = fold_build3_loc (input_location, COND_EXPR, void_type_node, diff --git a/gcc/function.h b/gcc/function.h index 76434cd59a5..32c9893cd59 100644 --- a/gcc/function.h +++ b/gcc/function.h @@ -385,8 +385,11 @@ struct GTY(()) function { nonzero value in loop->simduid. */ unsigned int has_simduid_loops : 1; - /* Set when the tail call has been identified. */ + /* Nonzero when the tail call has been identified. */ unsigned int tail_call_marked : 1; + + /* Nonzero if the current function contains a #pragma GCC unroll. */ + unsigned int has_unroll : 1; }; /* Add the decl D to the local_decls list of FUN. */ diff --git a/gcc/gimplify.c b/gcc/gimplify.c index d71bd2ab01e..13dbbe0538d 100644 --- a/gcc/gimplify.c +++ b/gcc/gimplify.c @@ -3747,6 +3747,7 @@ gimple_boolify (tree expr) switch ((enum annot_expr_kind) TREE_INT_CST_LOW (TREE_OPERAND (expr, 1))) { case annot_expr_ivdep_kind: + case annot_expr_unroll_kind: case annot_expr_no_vector_kind: case annot_expr_vector_kind: case annot_expr_parallel_kind: @@ -11390,6 +11391,7 @@ gimplify_expr (tree *expr_p, gimple_seq *pre_p, gimple_seq *post_p, { tree cond = TREE_OPERAND (*expr_p, 0); tree kind = TREE_OPERAND (*expr_p, 1); + tree data = TREE_OPERAND (*expr_p, 2); tree type = TREE_TYPE (cond); if (!INTEGRAL_TYPE_P (type)) { @@ -11400,7 +11402,7 @@ gimplify_expr (tree *expr_p, gimple_seq *pre_p, gimple_seq *post_p, tree tmp = create_tmp_var (type); gimplify_arg (&cond, pre_p, EXPR_LOCATION (*expr_p)); gcall *call - = gimple_build_call_internal (IFN_ANNOTATE, 2, cond, kind); + = gimple_build_call_internal (IFN_ANNOTATE, 3, cond, kind, data); gimple_call_set_lhs (call, tmp); gimplify_seq_add_stmt (pre_p, call); *expr_p = tmp; diff --git a/gcc/loop-init.c b/gcc/loop-init.c index 609a804460d..fd5069caa29 100644 --- a/gcc/loop-init.c +++ b/gcc/loop-init.c @@ -361,8 +361,8 @@ pass_loop2::gate (function *fun) && (flag_move_loop_invariants || flag_unswitch_loops || flag_unroll_loops - || (flag_branch_on_count_reg - && targetm.have_doloop_end ()))) + || (flag_branch_on_count_reg && targetm.have_doloop_end ()) + || cfun->has_unroll)) return true; else { @@ -560,7 +560,7 @@ public: /* opt_pass methods: */ virtual bool gate (function *) { - return (flag_unroll_loops || flag_unroll_all_loops); + return (flag_unroll_loops || flag_unroll_all_loops || cfun->has_unroll); } virtual unsigned int execute (function *); diff --git a/gcc/loop-unroll.c b/gcc/loop-unroll.c index 91bf5dddeed..bbba35fbff1 100644 --- a/gcc/loop-unroll.c +++ b/gcc/loop-unroll.c @@ -224,9 +224,16 @@ decide_unrolling (int flags) if (dump_enabled_p ()) dump_printf_loc (MSG_NOTE, locus, - ";; *** Considering loop %d at BB %d for " - "unrolling ***\n", - loop->num, loop->header->index); + "considering unrolling loop %d at BB %d\n", + loop->num, loop->header->index); + + if (loop->unroll == 1) + { + if (dump_file) + fprintf (dump_file, + ";; Not unrolling loop, user didn't want it unrolled\n"); + continue; + } /* Do not peel cold areas. */ if (optimize_loop_for_size_p (loop)) @@ -256,9 +263,7 @@ decide_unrolling (int flags) loop->ninsns = num_loop_insns (loop); loop->av_ninsns = average_num_loop_insns (loop); - /* Try transformations one by one in decreasing order of - priority. */ - + /* Try transformations one by one in decreasing order of priority. */ decide_unroll_constant_iterations (loop, flags); if (loop->lpt_decision.decision == LPT_NONE) decide_unroll_runtime_iterations (loop, flags); @@ -347,19 +352,17 @@ decide_unroll_constant_iterations (struct loop *loop, int flags) struct niter_desc *desc; widest_int iterations; - if (!(flags & UAP_UNROLL)) - { - /* We were not asked to, just return back silently. */ - return; - } + /* If we were not asked to unroll this loop, just return back silently. */ + if (!(flags & UAP_UNROLL) && !loop->unroll) + return; - if (dump_file) - fprintf (dump_file, - "\n;; Considering unrolling loop with constant " - "number of iterations\n"); + if (dump_enabled_p ()) + dump_printf (MSG_NOTE, + "considering unrolling loop with constant " + "number of iterations\n"); /* nunroll = total number of copies of the original loop body in - unrolled loop (i.e. if it is 2, we have to duplicate loop body once. */ + unrolled loop (i.e. if it is 2, we have to duplicate loop body once). */ nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns; nunroll_by_av = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns; @@ -391,6 +394,24 @@ decide_unroll_constant_iterations (struct loop *loop, int flags) return; } + /* Check for an explicit unrolling factor. */ + if (loop->unroll) + { + /* However we cannot unroll completely at the RTL level a loop with + constant number of iterations; it should have been peeled instead. */ + if ((unsigned) loop->unroll - 1 > desc->niter - 2) + { + if (dump_file) + fprintf (dump_file, ";; Loop should have been peeled\n"); + } + else + { + loop->lpt_decision.decision = LPT_UNROLL_CONSTANT; + loop->lpt_decision.times = loop->unroll - 1; + } + return; + } + /* Check whether the loop rolls enough to consider. Consult also loop bounds and profile; in the case the loop has more than one exit it may well loop less than determined maximal number @@ -412,7 +433,7 @@ decide_unroll_constant_iterations (struct loop *loop, int flags) best_copies = 2 * nunroll + 10; i = 2 * nunroll + 2; - if (i - 1 >= desc->niter) + if (i > desc->niter - 2) i = desc->niter - 2; for (; i >= nunroll - 1; i--) @@ -651,16 +672,14 @@ decide_unroll_runtime_iterations (struct loop *loop, int flags) struct niter_desc *desc; widest_int iterations; - if (!(flags & UAP_UNROLL)) - { - /* We were not asked to, just return back silently. */ - return; - } + /* If we were not asked to unroll this loop, just return back silently. */ + if (!(flags & UAP_UNROLL) && !loop->unroll) + return; - if (dump_file) - fprintf (dump_file, - "\n;; Considering unrolling loop with runtime " - "computable number of iterations\n"); + if (dump_enabled_p ()) + dump_printf (MSG_NOTE, + "considering unrolling loop with runtime-" + "computable number of iterations\n"); /* nunroll = total number of copies of the original loop body in unrolled loop (i.e. if it is 2, we have to duplicate loop body once. */ @@ -674,6 +693,9 @@ decide_unroll_runtime_iterations (struct loop *loop, int flags) if (targetm.loop_unroll_adjust) nunroll = targetm.loop_unroll_adjust (nunroll, loop); + if (loop->unroll) + nunroll = loop->unroll; + /* Skip big loops. */ if (nunroll <= 1) { @@ -712,8 +734,9 @@ decide_unroll_runtime_iterations (struct loop *loop, int flags) return; } - /* Success; now force nunroll to be power of 2, as we are unable to - cope with overflows in computation of number of iterations. */ + /* Success; now force nunroll to be power of 2, as code-gen + requires it, we are unable to cope with overflows in + computation of number of iterations. */ for (i = 1; 2 * i <= nunroll; i *= 2) continue; @@ -824,9 +847,10 @@ compare_and_jump_seq (rtx op0, rtx op1, enum rtx_code comp, return seq; } -/* Unroll LOOP for which we are able to count number of iterations in runtime - LOOP->LPT_DECISION.TIMES times. The transformation does this (with some - extra care for case n < 0): +/* Unroll LOOP for which we are able to count number of iterations in + runtime LOOP->LPT_DECISION.TIMES times. The times value must be a + power of two. The transformation does this (with some extra care + for case n < 0): for (i = 0; i < n; i++) body; @@ -1133,14 +1157,12 @@ decide_unroll_stupid (struct loop *loop, int flags) struct niter_desc *desc; widest_int iterations; - if (!(flags & UAP_UNROLL_ALL)) - { - /* We were not asked to, just return back silently. */ - return; - } + /* If we were not asked to unroll this loop, just return back silently. */ + if (!(flags & UAP_UNROLL_ALL) && !loop->unroll) + return; - if (dump_file) - fprintf (dump_file, "\n;; Considering unrolling loop stupidly\n"); + if (dump_enabled_p ()) + dump_printf (MSG_NOTE, "considering unrolling loop stupidly\n"); /* nunroll = total number of copies of the original loop body in unrolled loop (i.e. if it is 2, we have to duplicate loop body once. */ @@ -1155,6 +1177,9 @@ decide_unroll_stupid (struct loop *loop, int flags) if (targetm.loop_unroll_adjust) nunroll = targetm.loop_unroll_adjust (nunroll, loop); + if (loop->unroll) + nunroll = loop->unroll; + /* Skip big loops. */ if (nunroll <= 1) { @@ -1170,7 +1195,7 @@ decide_unroll_stupid (struct loop *loop, int flags) if (desc->simple_p && !desc->assumptions) { if (dump_file) - fprintf (dump_file, ";; The loop is simple\n"); + fprintf (dump_file, ";; Loop is simple\n"); return; } diff --git a/gcc/lto-streamer-in.c b/gcc/lto-streamer-in.c index d852a3b3855..fd6bd06ffb2 100644 --- a/gcc/lto-streamer-in.c +++ b/gcc/lto-streamer-in.c @@ -825,6 +825,7 @@ input_cfg (struct lto_input_block *ib, struct data_in *data_in, /* Read OMP SIMD related info. */ loop->safelen = streamer_read_hwi (ib); + loop->unroll = streamer_read_hwi (ib); loop->dont_vectorize = streamer_read_hwi (ib); loop->force_vectorize = streamer_read_hwi (ib); loop->simduid = stream_read_tree (ib, data_in); diff --git a/gcc/lto-streamer-out.c b/gcc/lto-streamer-out.c index e127e508f97..0442ec72d06 100644 --- a/gcc/lto-streamer-out.c +++ b/gcc/lto-streamer-out.c @@ -1931,6 +1931,7 @@ output_cfg (struct output_block *ob, struct function *fn) /* Write OMP SIMD related info. */ streamer_write_hwi (ob, loop->safelen); + streamer_write_hwi (ob, loop->unroll); streamer_write_hwi (ob, loop->dont_vectorize); streamer_write_hwi (ob, loop->force_vectorize); stream_write_tree (ob, loop->simduid, true); diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 341f3495114..34d31b88760 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,28 @@ +2017-11-23 Eric Botcazou + + * testsuite/gcc.dg/pr64277.c: Adjust scan. + * gcc.dg/tree-prof/unroll-1.c: Use detailed dump and adjust scan. + * gcc.dg/tree-ssa/cunroll-1.c: Adjust scan. + * gcc.dg/tree-ssa/cunroll-12.c: Likewise. + * gcc.dg/tree-ssa/cunroll-13.c: Likewise. + * gcc.dg/tree-ssa/cunroll-14.c: Likewise. + * gcc.dg/tree-ssa/cunroll-2.c: Likewise. + * gcc.dg/tree-ssa/cunroll-3.c: Likewise. + * gcc.dg/tree-ssa/cunroll-5.c: Likewise. + * gcc.dg/tree-ssa/loop-1.c: Likewise. + * gcc.dg/tree-ssa/loop-23.c: Likewise. + * gcc.dg/tree-ssa/pr61743-1.c: Likewise. + * gcc.dg/tree-ssa/pr61743-2.c: Likewise. + * gcc.dg/unroll-2.c (foo): Adjust message. + (foo2): Likewise. + * gcc.dg/unroll-3.c: Adjust scan. + * gcc.dg/unroll-4.c: Likewise. + * gcc.dg/unroll-5.c: Likewise. + * gcc.dg/unroll-7.c: Use detailed dump and adjust scan. + * gnat.dg/unroll1.ad[sb]: New test. + * gnat.dg/unroll2.ad[sb]: Likewise. + * gnat.dg/unroll3.ad[sb]: Likewise. + 2017-11-23 Jan Hubicka * gcc.dg/ipa/inline-1.c: Update template. diff --git a/gcc/testsuite/gcc.dg/pr64277.c b/gcc/testsuite/gcc.dg/pr64277.c index 813301f2d04..aebb0fe19d6 100644 --- a/gcc/testsuite/gcc.dg/pr64277.c +++ b/gcc/testsuite/gcc.dg/pr64277.c @@ -1,8 +1,8 @@ /* PR tree-optimization/64277 */ /* { dg-do compile } */ /* { dg-options "-O3 -Wall -Werror -fdump-tree-cunroll-details" } */ +/* { dg-final { scan-tree-dump "loop with 4 iterations completely unrolled" "cunroll" } } */ /* { dg-final { scan-tree-dump "loop with 5 iterations completely unrolled" "cunroll" } } */ -/* { dg-final { scan-tree-dump "loop with 6 iterations completely unrolled" "cunroll" } } */ #if __SIZEOF_INT__ < 4 __extension__ typedef __INT32_TYPE__ int32_t; diff --git a/gcc/testsuite/gcc.dg/tree-prof/unroll-1.c b/gcc/testsuite/gcc.dg/tree-prof/unroll-1.c index 1e95c3d0f0a..3ad0cf019b3 100644 --- a/gcc/testsuite/gcc.dg/tree-prof/unroll-1.c +++ b/gcc/testsuite/gcc.dg/tree-prof/unroll-1.c @@ -1,4 +1,4 @@ -/* { dg-options "-O3 -fdump-rtl-loop2_unroll -funroll-loops -fno-peel-loops" } */ +/* { dg-options "-O3 -fdump-rtl-loop2_unroll-details -funroll-loops -fno-peel-loops" } */ void abort (); int a[1000]; @@ -20,4 +20,4 @@ main() t(); return 0; } -/* { dg-final-use { scan-rtl-dump "Considering unrolling loop with constant number of iterations" "loop2_unroll" } } */ +/* { dg-final-use { scan-rtl-dump "considering unrolling loop with constant number of iterations" "loop2_unroll" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/cunroll-1.c b/gcc/testsuite/gcc.dg/tree-ssa/cunroll-1.c index 23cacab6f96..bcafbfe86b5 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/cunroll-1.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/cunroll-1.c @@ -9,5 +9,5 @@ test(int c) a[i]=5; } /* Array bounds says the loop will not roll much. */ -/* { dg-final { scan-tree-dump "loop with 3 iterations completely unrolled" "cunrolli"} } */ +/* { dg-final { scan-tree-dump "loop with 2 iterations completely unrolled" "cunrolli"} } */ /* { dg-final { scan-tree-dump "Last iteration exit edge was proved true." "cunrolli"} } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/cunroll-12.c b/gcc/testsuite/gcc.dg/tree-ssa/cunroll-12.c index 0cc44f42fd8..aa472a6f0ad 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/cunroll-12.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/cunroll-12.c @@ -7,5 +7,5 @@ t(struct a *a) for (int i=0;a->a[i];i++) a->a[i]++; } -/* { dg-final { scan-tree-dump-times "loop with 7 iterations completely unrolled" 1 "cunroll" } } */ +/* { dg-final { scan-tree-dump-times "loop with 6 iterations completely unrolled" 1 "cunroll" } } */ /* { dg-final { scan-tree-dump-not "Invalid sum" "cunroll" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/cunroll-13.c b/gcc/testsuite/gcc.dg/tree-ssa/cunroll-13.c index 904e6dc075b..dcc19f552d8 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/cunroll-13.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/cunroll-13.c @@ -19,5 +19,5 @@ t(struct a *a) /* { dg-final { scan-tree-dump-times "Loop 1 iterates 123454 times" 1 "cunroll" } } */ /* { dg-final { scan-tree-dump-times "Last iteration exit edge was proved true" 1 "cunroll" } } */ /* { dg-final { scan-tree-dump-times "Exit condition of peeled iterations was eliminated" 1 "cunroll" } } */ -/* { dg-final { scan-tree-dump-times "loop with 7 iterations completely unrolled" 1 "cunroll" } } */ +/* { dg-final { scan-tree-dump-times "loop with 6 iterations completely unrolled" 1 "cunroll" } } */ /* { dg-final { scan-tree-dump-not "Invalid sum" "cunroll" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/cunroll-14.c b/gcc/testsuite/gcc.dg/tree-ssa/cunroll-14.c index 4d27b185a86..5f112da310c 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/cunroll-14.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/cunroll-14.c @@ -7,7 +7,7 @@ t(struct a *a) for (int i=0;i<5 && a->a[i];i++) a->a[i]++; } -/* { dg-final { scan-tree-dump-times "loop with 5 iterations completely unrolled" 1 "cunroll" } } */ +/* { dg-final { scan-tree-dump-times "loop with 4 iterations completely unrolled" 1 "cunroll" } } */ /* { dg-final { scan-tree-dump-not "Invalid sum" "cunroll" } } */ /* { dg-final { scan-tree-dump-times "Loop 1 iterates 4 times" 1 "cunroll" } } */ /* { dg-final { scan-tree-dump-times "Last iteration exit edge was proved true" 1 "cunroll" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/cunroll-2.c b/gcc/testsuite/gcc.dg/tree-ssa/cunroll-2.c index 2e8be848175..b1d1c7d3d85 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/cunroll-2.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/cunroll-2.c @@ -14,4 +14,4 @@ test(int c) } } /* We are not able to get rid of the final conditional because the loop has two exits. */ -/* { dg-final { scan-tree-dump "loop with 2 iterations completely unrolled" "cunroll"} } */ +/* { dg-final { scan-tree-dump "loop with 1 iterations completely unrolled" "cunroll"} } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/cunroll-3.c b/gcc/testsuite/gcc.dg/tree-ssa/cunroll-3.c index d95d4498b97..e25c638ac51 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/cunroll-3.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/cunroll-3.c @@ -12,4 +12,4 @@ test(int c) } /* If we start duplicating headers prior curoll, this loop will have 0 iterations. */ -/* { dg-final { scan-tree-dump "loop with 2 iterations completely unrolled" "cunrolli"} } */ +/* { dg-final { scan-tree-dump "loop with 1 iterations completely unrolled" "cunrolli"} } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/cunroll-5.c b/gcc/testsuite/gcc.dg/tree-ssa/cunroll-5.c index b61b570491a..8ec4a029b1d 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/cunroll-5.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/cunroll-5.c @@ -9,6 +9,6 @@ test(int c) a[i]=5; } /* Basic testcase for complette unrolling. */ -/* { dg-final { scan-tree-dump "loop with 6 iterations completely unrolled" "cunroll"} } */ +/* { dg-final { scan-tree-dump "loop with 5 iterations completely unrolled" "cunroll"} } */ /* { dg-final { scan-tree-dump "Exit condition of peeled iterations was eliminated." "cunroll"} } */ /* { dg-final { scan-tree-dump "Last iteration exit edge was proved true." "cunroll"} } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/loop-1.c b/gcc/testsuite/gcc.dg/tree-ssa/loop-1.c index 01c37a56671..18627505180 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/loop-1.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/loop-1.c @@ -34,7 +34,7 @@ int xxx(void) /* We should be able to find out that the loop iterates four times and unroll it completely. */ /* { dg-final { scan-tree-dump-times "Added canonical iv to loop 1, 4 iterations" 1 "ivcanon"} } */ -/* { dg-final { scan-tree-dump-times "loop with 5 iterations completely unrolled" 1 "cunroll"} } */ +/* { dg-final { scan-tree-dump-times "loop with 4 iterations completely unrolled" 1 "cunroll"} } */ /* { dg-final { scan-tree-dump-times "foo" 5 "optimized"} } */ /* Because hppa, ia64 and Windows targets include an external declaration diff --git a/gcc/testsuite/gcc.dg/tree-ssa/loop-23.c b/gcc/testsuite/gcc.dg/tree-ssa/loop-23.c index 50ae1a8c1cf..4feada3e311 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/loop-23.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/loop-23.c @@ -24,5 +24,4 @@ int foo(void) return sum; } -/* { dg-final { scan-tree-dump-times "loop with 4 iterations completely unrolled" 1 "cunroll" } } */ - +/* { dg-final { scan-tree-dump-times "loop with 3 iterations completely unrolled" 1 "cunroll" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr61743-1.c b/gcc/testsuite/gcc.dg/tree-ssa/pr61743-1.c index a5c83cf8ecf..f7cbda68324 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/pr61743-1.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr61743-1.c @@ -48,5 +48,5 @@ int foo1 (e_u8 a[4][N], int b1, int b2, e_u8 b[M+1][4][N]) return 0; } -/* { dg-final { scan-tree-dump-times "loop with 4 iterations completely unrolled" 8 "cunroll" } } */ -/* { dg-final { scan-tree-dump-times "loop with 9 iterations completely unrolled" 2 "cunrolli" } } */ +/* { dg-final { scan-tree-dump-times "loop with 3 iterations completely unrolled" 8 "cunroll" } } */ +/* { dg-final { scan-tree-dump-times "loop with 8 iterations completely unrolled" 2 "cunrolli" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr61743-2.c b/gcc/testsuite/gcc.dg/tree-ssa/pr61743-2.c index 5bf38c5c743..b16e6312948 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/pr61743-2.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr61743-2.c @@ -48,5 +48,5 @@ int foo1 (e_u8 a[4][N], int b1, int b2, e_u8 b[M+1][4][N]) return 0; } -/* { dg-final { scan-tree-dump-times "loop with 4 iterations completely unrolled" 2 "cunroll" } } */ -/* { dg-final { scan-tree-dump-times "loop with 8 iterations completely unrolled" 2 "cunroll" } } */ +/* { dg-final { scan-tree-dump-times "loop with 3 iterations completely unrolled" 2 "cunroll" } } */ +/* { dg-final { scan-tree-dump-times "loop with 7 iterations completely unrolled" 2 "cunroll" } } */ diff --git a/gcc/testsuite/gcc.dg/unroll-2.c b/gcc/testsuite/gcc.dg/unroll-2.c index 05a141056d7..46126c30356 100644 --- a/gcc/testsuite/gcc.dg/unroll-2.c +++ b/gcc/testsuite/gcc.dg/unroll-2.c @@ -15,7 +15,7 @@ int foo(void) { int i; bar(); - for (i = 0; i < 2; i++) /* { dg-message "note: loop with 3 iterations completely unrolled" } */ + for (i = 0; i < 2; i++) /* { dg-message "note: loop with 2 iterations completely unrolled" } */ { a[i]= b[i] + 1; } @@ -25,7 +25,7 @@ int foo(void) int foo2(void) { int i; - for (i = 0; i < 2; i++) /* { dg-message "note: loop with 3 iterations completely unrolled" } */ + for (i = 0; i < 2; i++) /* { dg-message "note: loop with 2 iterations completely unrolled" } */ { a[i]= b[i] + 1; } diff --git a/gcc/testsuite/gcc.dg/unroll-3.c b/gcc/testsuite/gcc.dg/unroll-3.c index 9ea462e2a0b..10bf59b9a2e 100644 --- a/gcc/testsuite/gcc.dg/unroll-3.c +++ b/gcc/testsuite/gcc.dg/unroll-3.c @@ -28,4 +28,4 @@ int foo2(void) return 1; } -/* { dg-final { scan-tree-dump-times "loop with 3 iterations completely unrolled" 1 "cunrolli" } } */ +/* { dg-final { scan-tree-dump-times "loop with 2 iterations completely unrolled" 1 "cunrolli" } } */ diff --git a/gcc/testsuite/gcc.dg/unroll-4.c b/gcc/testsuite/gcc.dg/unroll-4.c index 242c416bb33..17f19421227 100644 --- a/gcc/testsuite/gcc.dg/unroll-4.c +++ b/gcc/testsuite/gcc.dg/unroll-4.c @@ -28,4 +28,4 @@ int foo2(void) return 1; } -/* { dg-final { scan-tree-dump-times "loop with 3 iterations completely unrolled" 1 "cunrolli" } } */ +/* { dg-final { scan-tree-dump-times "loop with 2 iterations completely unrolled" 1 "cunrolli" } } */ diff --git a/gcc/testsuite/gcc.dg/unroll-5.c b/gcc/testsuite/gcc.dg/unroll-5.c index 329b9c07463..f3bdebe9882 100644 --- a/gcc/testsuite/gcc.dg/unroll-5.c +++ b/gcc/testsuite/gcc.dg/unroll-5.c @@ -28,4 +28,4 @@ int foo2(void) return 1; } -/* { dg-final { scan-tree-dump-times "loop with 3 iterations completely unrolled" 1 "cunrolli" } } */ +/* { dg-final { scan-tree-dump-times "loop with 2 iterations completely unrolled" 1 "cunrolli" } } */ diff --git a/gcc/testsuite/gcc.dg/unroll-7.c b/gcc/testsuite/gcc.dg/unroll-7.c index e76d4faf16c..055369bf8b1 100644 --- a/gcc/testsuite/gcc.dg/unroll-7.c +++ b/gcc/testsuite/gcc.dg/unroll-7.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -fdump-rtl-loop2_unroll -funroll-loops" } */ +/* { dg-options "-O2 -fdump-rtl-loop2_unroll-details -funroll-loops" } */ /* { dg-require-effective-target int32plus } */ extern int *a; @@ -14,5 +14,5 @@ int t(void) /* { dg-final { scan-rtl-dump "number of iterations: .const_int 999999" "loop2_unroll" } } */ /* { dg-final { scan-rtl-dump "upper bound: 999999" "loop2_unroll" } } */ /* { dg-final { scan-rtl-dump "realistic bound: 999999" "loop2_unroll" } } */ -/* { dg-final { scan-rtl-dump "Considering unrolling loop with constant number of iterations" "loop2_unroll" } } */ +/* { dg-final { scan-rtl-dump "considering unrolling loop with constant number of iterations" "loop2_unroll" } } */ /* { dg-final { scan-rtl-dump-not "Invalid sum" "loop2_unroll" } } */ diff --git a/gcc/testsuite/gnat.dg/unroll1.adb b/gcc/testsuite/gnat.dg/unroll1.adb new file mode 100644 index 00000000000..ff9222de0e0 --- /dev/null +++ b/gcc/testsuite/gnat.dg/unroll1.adb @@ -0,0 +1,27 @@ +-- { dg-do compile } +-- { dg-options "-O2 -funroll-all-loops -fdump-rtl-loop2_unroll-details -fdump-tree-cunrolli-details" } + +package body Unroll1 is + + function "+" (X, Y : Sarray) return Sarray is + R : Sarray; + begin + for I in Sarray'Range loop + pragma Loop_Optimize (No_Unroll); + R(I) := X(I) + Y(I); + end loop; + return R; + end; + + procedure Add (X, Y : Sarray; R : out Sarray) is + begin + for I in Sarray'Range loop + pragma Loop_Optimize (No_Unroll); + R(I) := X(I) + Y(I); + end loop; + end; + +end Unroll1; + +-- { dg-final { scan-tree-dump-times "Not unrolling loop .: user didn't want it unrolled completely" 2 "cunrolli" } } */ +-- { dg-final { scan-rtl-dump-times "Not unrolling loop, user didn't want it unrolled" 2 "loop2_unroll" } } */ diff --git a/gcc/testsuite/gnat.dg/unroll1.ads b/gcc/testsuite/gnat.dg/unroll1.ads new file mode 100644 index 00000000000..28dbea8a9c0 --- /dev/null +++ b/gcc/testsuite/gnat.dg/unroll1.ads @@ -0,0 +1,9 @@ +package Unroll1 is + + type Sarray is array (1 .. 4) of Float; + for Sarray'Alignment use 16; + + function "+" (X, Y : Sarray) return Sarray; + procedure Add (X, Y : Sarray; R : out Sarray); + +end Unroll1; diff --git a/gcc/testsuite/gnat.dg/unroll2.adb b/gcc/testsuite/gnat.dg/unroll2.adb new file mode 100644 index 00000000000..01af9d2b788 --- /dev/null +++ b/gcc/testsuite/gnat.dg/unroll2.adb @@ -0,0 +1,26 @@ +-- { dg-do compile } +-- { dg-options "-O2 -fdump-tree-cunrolli-details" } + +package body Unroll2 is + + function "+" (X, Y : Sarray) return Sarray is + R : Sarray; + begin + for I in Sarray'Range loop + pragma Loop_Optimize (Unroll); + R(I) := X(I) + Y(I); + end loop; + return R; + end; + + procedure Add (X, Y : Sarray; R : out Sarray) is + begin + for I in Sarray'Range loop + pragma Loop_Optimize (Unroll); + R(I) := X(I) + Y(I); + end loop; + end; + +end Unroll2; + +-- { dg-final { scan-tree-dump-times "note: loop with 3 iterations completely unrolled" 2 "cunrolli" } } */ diff --git a/gcc/testsuite/gnat.dg/unroll2.ads b/gcc/testsuite/gnat.dg/unroll2.ads new file mode 100644 index 00000000000..efae982c4c6 --- /dev/null +++ b/gcc/testsuite/gnat.dg/unroll2.ads @@ -0,0 +1,9 @@ +package Unroll2 is + + type Sarray is array (1 .. 4) of Float; + for Sarray'Alignment use 16; + + function "+" (X, Y : Sarray) return Sarray; + procedure Add (X, Y : Sarray; R : out Sarray); + +end Unroll2; diff --git a/gcc/testsuite/gnat.dg/unroll3.adb b/gcc/testsuite/gnat.dg/unroll3.adb new file mode 100644 index 00000000000..3a0725b8093 --- /dev/null +++ b/gcc/testsuite/gnat.dg/unroll3.adb @@ -0,0 +1,26 @@ +-- { dg-do compile } +-- { dg-options "-O -fdump-tree-cunroll-details" } + +package body Unroll3 is + + function "+" (X, Y : Sarray) return Sarray is + R : Sarray; + begin + for I in Sarray'Range loop + pragma Loop_Optimize (Unroll); + R(I) := X(I) + Y(I); + end loop; + return R; + end; + + procedure Add (X, Y : Sarray; R : out Sarray) is + begin + for I in Sarray'Range loop + pragma Loop_Optimize (Unroll); + R(I) := X(I) + Y(I); + end loop; + end; + +end Unroll3; + +-- { dg-final { scan-tree-dump-times "note: loop with 3 iterations completely unrolled" 2 "cunroll" } } */ diff --git a/gcc/testsuite/gnat.dg/unroll3.ads b/gcc/testsuite/gnat.dg/unroll3.ads new file mode 100644 index 00000000000..8264fc7b489 --- /dev/null +++ b/gcc/testsuite/gnat.dg/unroll3.ads @@ -0,0 +1,9 @@ +package Unroll3 is + + type Sarray is array (1 .. 4) of Float; + for Sarray'Alignment use 16; + + function "+" (X, Y : Sarray) return Sarray; + procedure Add (X, Y : Sarray; R : out Sarray); + +end Unroll3; diff --git a/gcc/tree-cfg.c b/gcc/tree-cfg.c index bb30a921899..2835a4b722c 100644 --- a/gcc/tree-cfg.c +++ b/gcc/tree-cfg.c @@ -280,6 +280,11 @@ replace_loop_annotate_in_block (basic_block bb, struct loop *loop) case annot_expr_ivdep_kind: loop->safelen = INT_MAX; break; + case annot_expr_unroll_kind: + loop->unroll + = (unsigned short) tree_to_shwi (gimple_call_arg (stmt, 2)); + cfun->has_unroll = true; + break; case annot_expr_no_vector_kind: loop->dont_vectorize = true; break; @@ -338,6 +343,7 @@ replace_loop_annotate (void) switch ((annot_expr_kind) tree_to_shwi (gimple_call_arg (stmt, 1))) { case annot_expr_ivdep_kind: + case annot_expr_unroll_kind: case annot_expr_no_vector_kind: case annot_expr_vector_kind: break; @@ -8019,6 +8025,8 @@ print_loop (FILE *file, struct loop *loop, int indent, int verbosity) fprintf (file, ", estimate = "); print_decu (loop->nb_iterations_estimate, file); } + if (loop->unroll) + fprintf (file, ", unroll = %d", loop->unroll); fprintf (file, ")\n"); /* Print loop's body. */ diff --git a/gcc/tree-core.h b/gcc/tree-core.h index 4f9c6c7be63..a2633af478e 100644 --- a/gcc/tree-core.h +++ b/gcc/tree-core.h @@ -851,6 +851,7 @@ enum tree_node_kind { enum annot_expr_kind { annot_expr_ivdep_kind, + annot_expr_unroll_kind, annot_expr_no_vector_kind, annot_expr_vector_kind, annot_expr_parallel_kind, diff --git a/gcc/tree-inline.c b/gcc/tree-inline.c index 5747785caea..ca98e43327e 100644 --- a/gcc/tree-inline.c +++ b/gcc/tree-inline.c @@ -2597,6 +2597,11 @@ copy_loops (copy_body_data *id, flow_loop_tree_node_add (dest_parent, dest_loop); dest_loop->safelen = src_loop->safelen; + if (src_loop->unroll) + { + dest_loop->unroll = src_loop->unroll; + cfun->has_unroll = true; + } dest_loop->dont_vectorize = src_loop->dont_vectorize; if (src_loop->force_vectorize) { diff --git a/gcc/tree-pretty-print.c b/gcc/tree-pretty-print.c index c79b79f64e7..c9a3e6360e0 100644 --- a/gcc/tree-pretty-print.c +++ b/gcc/tree-pretty-print.c @@ -2633,6 +2633,10 @@ dump_generic_node (pretty_printer *pp, tree node, int spc, dump_flags_t flags, case annot_expr_ivdep_kind: pp_string (pp, ", ivdep"); break; + case annot_expr_unroll_kind: + pp_printf (pp, ", unroll %d", + (int) TREE_INT_CST_LOW (TREE_OPERAND (node, 2))); + break; case annot_expr_no_vector_kind: pp_string (pp, ", no-vector"); break; diff --git a/gcc/tree-ssa-loop-ivcanon.c b/gcc/tree-ssa-loop-ivcanon.c index 8b1daa66b3e..a32e12bee07 100644 --- a/gcc/tree-ssa-loop-ivcanon.c +++ b/gcc/tree-ssa-loop-ivcanon.c @@ -681,11 +681,9 @@ try_unroll_loop_completely (struct loop *loop, HOST_WIDE_INT maxiter, location_t locus) { - unsigned HOST_WIDE_INT n_unroll = 0, ninsns, unr_insns; - struct loop_size size; + unsigned HOST_WIDE_INT n_unroll = 0; bool n_unroll_found = false; edge edge_to_cancel = NULL; - dump_flags_t report_flags = MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS; /* See if we proved number of iterations to be low constant. @@ -726,7 +724,8 @@ try_unroll_loop_completely (struct loop *loop, if (!n_unroll_found) return false; - if (n_unroll > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES)) + if (!loop->unroll + && n_unroll > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES)) { if (dump_file && (dump_flags & TDF_DETAILS)) fprintf (dump_file, "Not unrolling loop %d " @@ -740,121 +739,137 @@ try_unroll_loop_completely (struct loop *loop, if (n_unroll) { - bool large; if (ul == UL_SINGLE_ITER) return false; - /* EXIT can be removed only if we are sure it passes first N_UNROLL - iterations. */ - bool remove_exit = (exit && niter - && TREE_CODE (niter) == INTEGER_CST - && wi::leu_p (n_unroll, wi::to_widest (niter))); - - large = tree_estimate_loop_size - (loop, remove_exit ? exit : NULL, edge_to_cancel, &size, - PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS)); - ninsns = size.overall; - if (large) + if (loop->unroll) { - if (dump_file && (dump_flags & TDF_DETAILS)) - fprintf (dump_file, "Not unrolling loop %d: it is too large.\n", - loop->num); - return false; + /* If the unrolling factor is too large, bail out. */ + if (n_unroll > (unsigned)loop->unroll) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, + "Not unrolling loop %d: " + "user didn't want it unrolled completely.\n", + loop->num); + return false; + } } - - unr_insns = estimated_unrolled_size (&size, n_unroll); - if (dump_file && (dump_flags & TDF_DETAILS)) + else { - fprintf (dump_file, " Loop size: %d\n", (int) ninsns); - fprintf (dump_file, " Estimated size after unrolling: %d\n", - (int) unr_insns); - } + struct loop_size size; + /* EXIT can be removed only if we are sure it passes first N_UNROLL + iterations. */ + bool remove_exit = (exit && niter + && TREE_CODE (niter) == INTEGER_CST + && wi::leu_p (n_unroll, wi::to_widest (niter))); + bool large + = tree_estimate_loop_size + (loop, remove_exit ? exit : NULL, edge_to_cancel, &size, + PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS)); + if (large) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Not unrolling loop %d: it is too large.\n", + loop->num); + return false; + } - /* If the code is going to shrink, we don't need to be extra cautious - on guessing if the unrolling is going to be profitable. */ - if (unr_insns - /* If there is IV variable that will become constant, we save - one instruction in the loop prologue we do not account - otherwise. */ - <= ninsns + (size.constant_iv != false)) - ; - /* We unroll only inner loops, because we do not consider it profitable - otheriwse. We still can cancel loopback edge of not rolling loop; - this is always a good idea. */ - else if (ul == UL_NO_GROWTH) - { - if (dump_file && (dump_flags & TDF_DETAILS)) - fprintf (dump_file, "Not unrolling loop %d: size would grow.\n", - loop->num); - return false; - } - /* Outer loops tend to be less interesting candidates for complete - unrolling unless we can do a lot of propagation into the inner loop - body. For now we disable outer loop unrolling when the code would - grow. */ - else if (loop->inner) - { - if (dump_file && (dump_flags & TDF_DETAILS)) - fprintf (dump_file, "Not unrolling loop %d: " - "it is not innermost and code would grow.\n", - loop->num); - return false; - } - /* If there is call on a hot path through the loop, then - there is most probably not much to optimize. */ - else if (size.num_non_pure_calls_on_hot_path) - { - if (dump_file && (dump_flags & TDF_DETAILS)) - fprintf (dump_file, "Not unrolling loop %d: " - "contains call and code would grow.\n", - loop->num); - return false; - } - /* If there is pure/const call in the function, then we - can still optimize the unrolled loop body if it contains - some other interesting code than the calls and code - storing or cumulating the return value. */ - else if (size.num_pure_calls_on_hot_path - /* One IV increment, one test, one ivtmp store - and one useful stmt. That is about minimal loop - doing pure call. */ - && (size.non_call_stmts_on_hot_path - <= 3 + size.num_pure_calls_on_hot_path)) - { - if (dump_file && (dump_flags & TDF_DETAILS)) - fprintf (dump_file, "Not unrolling loop %d: " - "contains just pure calls and code would grow.\n", - loop->num); - return false; - } - /* Complete unrolling is a major win when control flow is removed and - one big basic block is created. If the loop contains control flow - the optimization may still be a win because of eliminating the loop - overhead but it also may blow the branch predictor tables. - Limit number of branches on the hot path through the peeled - sequence. */ - else if (size.num_branches_on_hot_path * (int)n_unroll - > PARAM_VALUE (PARAM_MAX_PEEL_BRANCHES)) - { + unsigned HOST_WIDE_INT ninsns = size.overall; + unsigned HOST_WIDE_INT unr_insns + = estimated_unrolled_size (&size, n_unroll); if (dump_file && (dump_flags & TDF_DETAILS)) - fprintf (dump_file, "Not unrolling loop %d: " - " number of branches on hot path in the unrolled sequence" - " reach --param max-peel-branches limit.\n", - loop->num); - return false; - } - else if (unr_insns - > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS)) - { - if (dump_file && (dump_flags & TDF_DETAILS)) - fprintf (dump_file, "Not unrolling loop %d: " - "(--param max-completely-peeled-insns limit reached).\n", - loop->num); - return false; + { + fprintf (dump_file, " Loop size: %d\n", (int) ninsns); + fprintf (dump_file, " Estimated size after unrolling: %d\n", + (int) unr_insns); + } + + /* If the code is going to shrink, we don't need to be extra + cautious on guessing if the unrolling is going to be + profitable. */ + if (unr_insns + /* If there is IV variable that will become constant, we + save one instruction in the loop prologue we do not + account otherwise. */ + <= ninsns + (size.constant_iv != false)) + ; + /* We unroll only inner loops, because we do not consider it + profitable otheriwse. We still can cancel loopback edge + of not rolling loop; this is always a good idea. */ + else if (ul == UL_NO_GROWTH) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Not unrolling loop %d: size would grow.\n", + loop->num); + return false; + } + /* Outer loops tend to be less interesting candidates for + complete unrolling unless we can do a lot of propagation + into the inner loop body. For now we disable outer loop + unrolling when the code would grow. */ + else if (loop->inner) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Not unrolling loop %d: " + "it is not innermost and code would grow.\n", + loop->num); + return false; + } + /* If there is call on a hot path through the loop, then + there is most probably not much to optimize. */ + else if (size.num_non_pure_calls_on_hot_path) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Not unrolling loop %d: " + "contains call and code would grow.\n", + loop->num); + return false; + } + /* If there is pure/const call in the function, then we can + still optimize the unrolled loop body if it contains some + other interesting code than the calls and code storing or + cumulating the return value. */ + else if (size.num_pure_calls_on_hot_path + /* One IV increment, one test, one ivtmp store and + one useful stmt. That is about minimal loop + doing pure call. */ + && (size.non_call_stmts_on_hot_path + <= 3 + size.num_pure_calls_on_hot_path)) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Not unrolling loop %d: " + "contains just pure calls and code would grow.\n", + loop->num); + return false; + } + /* Complete unrolling is major win when control flow is + removed and one big basic block is created. If the loop + contains control flow the optimization may still be a win + because of eliminating the loop overhead but it also may + blow the branch predictor tables. Limit number of + branches on the hot path through the peeled sequence. */ + else if (size.num_branches_on_hot_path * (int)n_unroll + > PARAM_VALUE (PARAM_MAX_PEEL_BRANCHES)) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Not unrolling loop %d: " + "number of branches on hot path in the unrolled " + "sequence reaches --param max-peel-branches limit.\n", + loop->num); + return false; + } + else if (unr_insns + > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS)) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Not unrolling loop %d: " + "number of insns in the unrolled sequence reaches " + "--param max-completely-peeled-insns limit.\n", + loop->num); + return false; + } } - if (!n_unroll) - dump_printf_loc (report_flags, locus, - "loop turned into non-loop; it never loops.\n"); initialize_original_copy_tables (); auto_sbitmap wont_exit (n_unroll + 1); @@ -898,8 +913,8 @@ try_unroll_loop_completely (struct loop *loop, else gimple_cond_make_true (cond); update_stmt (cond); - /* Do not remove the path. Doing so may remove outer loop - and confuse bookkeeping code in tree_unroll_loops_completelly. */ + /* Do not remove the path, as doing so may remove outer loop and + confuse bookkeeping code in tree_unroll_loops_completely. */ } /* Store the loop for later unlooping and exit removal. */ @@ -915,7 +930,7 @@ try_unroll_loop_completely (struct loop *loop, { dump_printf_loc (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS, locus, "loop with %d iterations completely unrolled", - (int) (n_unroll + 1)); + (int) n_unroll); if (loop->header->count.initialized_p ()) dump_printf (MSG_OPTIMIZED_LOCATIONS | TDF_DETAILS, " (header execution count %d)", @@ -963,7 +978,8 @@ try_peel_loop (struct loop *loop, struct loop_size size; int peeled_size; - if (!flag_peel_loops || PARAM_VALUE (PARAM_MAX_PEEL_TIMES) <= 0 + if (!flag_peel_loops + || PARAM_VALUE (PARAM_MAX_PEEL_TIMES) <= 0 || !peeled_loops) return false; @@ -974,20 +990,29 @@ try_peel_loop (struct loop *loop, return false; } + /* We don't peel loops that will be unrolled as this can duplicate a + loop more times than the user requested. */ + if (loop->unroll) + { + if (dump_file) + fprintf (dump_file, "Not peeling: user didn't want it peeled.\n"); + return false; + } + /* Peel only innermost loops. While the code is perfectly capable of peeling non-innermost loops, the heuristics would probably need some improvements. */ if (loop->inner) { if (dump_file) - fprintf (dump_file, "Not peeling: outer loop\n"); + fprintf (dump_file, "Not peeling: outer loop\n"); return false; } if (!optimize_loop_for_speed_p (loop)) { if (dump_file) - fprintf (dump_file, "Not peeling: cold loop\n"); + fprintf (dump_file, "Not peeling: cold loop\n"); return false; } @@ -1005,7 +1030,7 @@ try_peel_loop (struct loop *loop, if (maxiter >= 0 && maxiter <= npeel) { if (dump_file) - fprintf (dump_file, "Not peeling: upper bound is known so can " + fprintf (dump_file, "Not peeling: upper bound is known so can " "unroll completely\n"); return false; } @@ -1016,7 +1041,7 @@ try_peel_loop (struct loop *loop, if (npeel > PARAM_VALUE (PARAM_MAX_PEEL_TIMES) - 1) { if (dump_file) - fprintf (dump_file, "Not peeling: rolls too much " + fprintf (dump_file, "Not peeling: rolls too much " "(%i + 1 > --param max-peel-times)\n", (int) npeel); return false; } @@ -1029,7 +1054,7 @@ try_peel_loop (struct loop *loop, > PARAM_VALUE (PARAM_MAX_PEELED_INSNS)) { if (dump_file) - fprintf (dump_file, "Not peeling: peeled sequence size is too large " + fprintf (dump_file, "Not peeling: peeled sequence size is too large " "(%i insns > --param max-peel-insns)", peeled_size); return false; } @@ -1317,7 +1342,9 @@ tree_unroll_loops_completely_1 (bool may_increase_size, bool unroll_outer, if (!loop_father) return false; - if (may_increase_size && optimize_loop_nest_for_speed_p (loop) + if (loop->unroll > 1) + ul = UL_ALL; + else if (may_increase_size && optimize_loop_nest_for_speed_p (loop) /* Unroll outermost loops only if asked to do so or they do not cause code growth. */ && (unroll_outer || loop_outer (loop_father))) @@ -1345,7 +1372,7 @@ tree_unroll_loops_completely_1 (bool may_increase_size, bool unroll_outer, MAY_INCREASE_SIZE is true, perform the unrolling only if the size of the code does not increase. */ -unsigned int +static unsigned int tree_unroll_loops_completely (bool may_increase_size, bool unroll_outer) { bitmap father_bbs = BITMAP_ALLOC (NULL); @@ -1522,9 +1549,9 @@ pass_complete_unroll::execute (function *fun) re-peeling the same loop multiple times. */ if (flag_peel_loops) peeled_loops = BITMAP_ALLOC (NULL); - int val = tree_unroll_loops_completely (flag_unroll_loops - || flag_peel_loops - || optimize >= 3, true); + unsigned int val = tree_unroll_loops_completely (flag_unroll_loops + || flag_peel_loops + || optimize >= 3, true); if (peeled_loops) { BITMAP_FREE (peeled_loops); @@ -1576,8 +1603,7 @@ pass_complete_unrolli::execute (function *fun) { unsigned ret = 0; - loop_optimizer_init (LOOPS_NORMAL - | LOOPS_HAVE_RECORDED_EXITS); + loop_optimizer_init (LOOPS_NORMAL | LOOPS_HAVE_RECORDED_EXITS); if (number_of_loops (fun) > 1) { scev_initialize (); diff --git a/gcc/tree.def b/gcc/tree.def index 063e90826a3..7523f5747bf 100644 --- a/gcc/tree.def +++ b/gcc/tree.def @@ -1406,8 +1406,9 @@ DEFTREECODE (TARGET_OPTION_NODE, "target_option_node", tcc_exceptional, 0) /* ANNOTATE_EXPR. Operand 0 is the expression to be annotated. - Operand 1 is the annotation kind. */ -DEFTREECODE (ANNOTATE_EXPR, "annotate_expr", tcc_expression, 2) + Operand 1 is the annotation kind. + Operand 2 is additional data. */ +DEFTREECODE (ANNOTATE_EXPR, "annotate_expr", tcc_expression, 3) /* Cilk spawn statement Operand 0 is the CALL_EXPR. */