From a696bc4fec986318a1765c31ac9ee2db3849934a Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Thu, 21 Dec 2017 07:02:46 +0000 Subject: [PATCH] poly_int: loop versioning threshold This patch splits the loop versioning threshold out from the cost model threshold so that the former can become a poly_uint64. We still use a single test to enforce both limits where possible. 2017-12-21 Richard Sandiford Alan Hayward David Sherwood gcc/ * tree-vectorizer.h (_loop_vec_info): Add a versioning_threshold field. (LOOP_VINFO_VERSIONING_THRESHOLD): New macro (vect_loop_versioning): Take the loop versioning threshold as a separate parameter. * tree-vect-loop-manip.c (vect_loop_versioning): Likewise. * tree-vect-loop.c (_loop_vec_info::_loop_vec_info): Initialize versioning_threshold. (vect_analyze_loop_2): Compute the loop versioning threshold whenever loop versioning is needed, and store it in the new field rather than combining it with the cost model threshold. (vect_transform_loop): Update call to vect_loop_versioning. Try to combine the loop versioning and cost thresholds here. Co-Authored-By: Alan Hayward Co-Authored-By: David Sherwood From-SVN: r255934 --- gcc/ChangeLog | 18 ++++++++++++++++++ gcc/tree-vect-loop-manip.c | 14 +++++++++++++- gcc/tree-vect-loop.c | 25 +++++++++++++++++-------- gcc/tree-vectorizer.h | 10 +++++++++- 4 files changed, 57 insertions(+), 10 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 5f45bd15d9b..2a69053477c 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,21 @@ +2017-12-21 Richard Sandiford + Alan Hayward + David Sherwood + + * tree-vectorizer.h (_loop_vec_info): Add a versioning_threshold + field. + (LOOP_VINFO_VERSIONING_THRESHOLD): New macro + (vect_loop_versioning): Take the loop versioning threshold as a + separate parameter. + * tree-vect-loop-manip.c (vect_loop_versioning): Likewise. + * tree-vect-loop.c (_loop_vec_info::_loop_vec_info): Initialize + versioning_threshold. + (vect_analyze_loop_2): Compute the loop versioning threshold + whenever loop versioning is needed, and store it in the new + field rather than combining it with the cost model threshold. + (vect_transform_loop): Update call to vect_loop_versioning. + Try to combine the loop versioning and cost thresholds here. + 2017-12-21 Richard Sandiford Alan Hayward David Sherwood diff --git a/gcc/tree-vect-loop-manip.c b/gcc/tree-vect-loop-manip.c index d56fbfcbde5..4b02a58313e 100644 --- a/gcc/tree-vect-loop-manip.c +++ b/gcc/tree-vect-loop-manip.c @@ -2154,7 +2154,8 @@ vect_create_cond_for_alias_checks (loop_vec_info loop_vinfo, tree * cond_expr) void vect_loop_versioning (loop_vec_info loop_vinfo, - unsigned int th, bool check_profitability) + unsigned int th, bool check_profitability, + poly_uint64 versioning_threshold) { struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo), *nloop; struct loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo); @@ -2179,6 +2180,17 @@ vect_loop_versioning (loop_vec_info loop_vinfo, cond_expr = fold_build2 (GE_EXPR, boolean_type_node, scalar_loop_iters, build_int_cst (TREE_TYPE (scalar_loop_iters), th - 1)); + if (maybe_ne (versioning_threshold, 0U)) + { + tree expr = fold_build2 (GE_EXPR, boolean_type_node, scalar_loop_iters, + build_int_cst (TREE_TYPE (scalar_loop_iters), + versioning_threshold - 1)); + if (cond_expr) + cond_expr = fold_build2 (BIT_AND_EXPR, boolean_type_node, + expr, cond_expr); + else + cond_expr = expr; + } if (version_niter) vect_create_cond_for_niters_checks (loop_vinfo, &cond_expr); diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c index 58e46a0f4d3..810fa5f3ce9 100644 --- a/gcc/tree-vect-loop.c +++ b/gcc/tree-vect-loop.c @@ -1112,6 +1112,7 @@ _loop_vec_info::_loop_vec_info (struct loop *loop_in) num_iters_unchanged (NULL_TREE), num_iters_assumptions (NULL_TREE), th (0), + versioning_threshold (0), vectorization_factor (0), max_vectorization_factor (0), unaligned_dr (NULL), @@ -2176,11 +2177,9 @@ start_over: enough for both peeled prolog loop and vector loop. This check can be merged along with threshold check of loop versioning, so increase threshold for this case if necessary. */ - if (LOOP_REQUIRES_VERSIONING (loop_vinfo) - && (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) - || LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo))) + if (LOOP_REQUIRES_VERSIONING (loop_vinfo)) { - unsigned niters_th; + poly_uint64 niters_th; /* Niters for peeled prolog loop. */ if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) < 0) @@ -2197,9 +2196,8 @@ start_over: niters_th += LOOP_VINFO_VECT_FACTOR (loop_vinfo); /* One additional iteration because of peeling for gap. */ if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)) - niters_th++; - if (LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo) < niters_th) - LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo) = niters_th; + niters_th += 1; + LOOP_VINFO_VERSIONING_THRESHOLD (loop_vinfo) = niters_th; } gcc_assert (vectorization_factor @@ -2302,6 +2300,7 @@ again: LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = false; LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = false; LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo) = 0; + LOOP_VINFO_VERSIONING_THRESHOLD (loop_vinfo) = 0; goto start_over; } @@ -7380,7 +7379,17 @@ vect_transform_loop (loop_vec_info loop_vinfo) if (LOOP_REQUIRES_VERSIONING (loop_vinfo)) { - vect_loop_versioning (loop_vinfo, th, check_profitability); + poly_uint64 versioning_threshold + = LOOP_VINFO_VERSIONING_THRESHOLD (loop_vinfo); + if (check_profitability + && ordered_p (poly_uint64 (th), versioning_threshold)) + { + versioning_threshold = ordered_max (poly_uint64 (th), + versioning_threshold); + check_profitability = false; + } + vect_loop_versioning (loop_vinfo, th, check_profitability, + versioning_threshold); check_profitability = false; } diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index 56811e5e14b..8291a71a057 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -238,6 +238,12 @@ typedef struct _loop_vec_info : public vec_info { PARAM_MIN_VECT_LOOP_BOUND. */ unsigned int th; + /* When applying loop versioning, the vector form should only be used + if the number of scalar iterations is >= this value, on top of all + the other requirements. Ignored when loop versioning is not being + used. */ + poly_uint64 versioning_threshold; + /* Unrolling factor */ int vectorization_factor; @@ -357,6 +363,7 @@ typedef struct _loop_vec_info : public vec_info { #define LOOP_VINFO_NITERS_UNCHANGED(L) (L)->num_iters_unchanged #define LOOP_VINFO_NITERS_ASSUMPTIONS(L) (L)->num_iters_assumptions #define LOOP_VINFO_COST_MODEL_THRESHOLD(L) (L)->th +#define LOOP_VINFO_VERSIONING_THRESHOLD(L) (L)->versioning_threshold #define LOOP_VINFO_VECTORIZABLE_P(L) (L)->vectorizable #define LOOP_VINFO_VECT_FACTOR(L) (L)->vectorization_factor #define LOOP_VINFO_MAX_VECT_FACTOR(L) (L)->max_vectorization_factor @@ -1142,7 +1149,8 @@ extern void slpeel_make_loop_iterate_ntimes (struct loop *, tree); extern bool slpeel_can_duplicate_loop_p (const struct loop *, const_edge); struct loop *slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *, struct loop *, edge); -extern void vect_loop_versioning (loop_vec_info, unsigned int, bool); +extern void vect_loop_versioning (loop_vec_info, unsigned int, bool, + poly_uint64); extern struct loop *vect_do_peeling (loop_vec_info, tree, tree, tree *, int, bool, bool); extern source_location find_loop_location (struct loop *); -- 2.30.2