poly_int: loop versioning threshold
authorRichard Sandiford <richard.sandiford@linaro.org>
Thu, 21 Dec 2017 07:02:46 +0000 (07:02 +0000)
committerRichard Sandiford <rsandifo@gcc.gnu.org>
Thu, 21 Dec 2017 07:02:46 +0000 (07:02 +0000)
This patch splits the loop versioning threshold out from the
cost model threshold so that the former can become a poly_uint64.
We still use a single test to enforce both limits where possible.

2017-12-21  Richard Sandiford  <richard.sandiford@linaro.org>
    Alan Hayward  <alan.hayward@arm.com>
    David Sherwood  <david.sherwood@arm.com>

gcc/
* tree-vectorizer.h (_loop_vec_info): Add a versioning_threshold
field.
(LOOP_VINFO_VERSIONING_THRESHOLD): New macro
(vect_loop_versioning): Take the loop versioning threshold as a
separate parameter.
* tree-vect-loop-manip.c (vect_loop_versioning): Likewise.
* tree-vect-loop.c (_loop_vec_info::_loop_vec_info): Initialize
versioning_threshold.
(vect_analyze_loop_2): Compute the loop versioning threshold
whenever loop versioning is needed, and store it in the new
field rather than combining it with the cost model threshold.
(vect_transform_loop): Update call to vect_loop_versioning.
Try to combine the loop versioning and cost thresholds here.

Co-Authored-By: Alan Hayward <alan.hayward@arm.com>
Co-Authored-By: David Sherwood <david.sherwood@arm.com>
From-SVN: r255934

gcc/ChangeLog
gcc/tree-vect-loop-manip.c
gcc/tree-vect-loop.c
gcc/tree-vectorizer.h

index 5f45bd15d9b4c2a81324b7d01349d3a904576180..2a69053477cd363d45c5c410e50a5e86187e8aff 100644 (file)
@@ -1,3 +1,21 @@
+2017-12-21  Richard Sandiford  <richard.sandiford@linaro.org>
+           Alan Hayward  <alan.hayward@arm.com>
+           David Sherwood  <david.sherwood@arm.com>
+
+       * tree-vectorizer.h (_loop_vec_info): Add a versioning_threshold
+       field.
+       (LOOP_VINFO_VERSIONING_THRESHOLD): New macro
+       (vect_loop_versioning): Take the loop versioning threshold as a
+       separate parameter.
+       * tree-vect-loop-manip.c (vect_loop_versioning): Likewise.
+       * tree-vect-loop.c (_loop_vec_info::_loop_vec_info): Initialize
+       versioning_threshold.
+       (vect_analyze_loop_2): Compute the loop versioning threshold
+       whenever loop versioning is needed, and store it in the new
+       field rather than combining it with the cost model threshold.
+       (vect_transform_loop): Update call to vect_loop_versioning.
+       Try to combine the loop versioning and cost thresholds here.
+
 2017-12-21  Richard Sandiford  <richard.sandiford@linaro.org>
            Alan Hayward  <alan.hayward@arm.com>
            David Sherwood  <david.sherwood@arm.com>
index d56fbfcbde5a484ad3a7213ce529cdabcd0cf4ea..4b02a58313ef178875c8b753afae1985f4b6a7cb 100644 (file)
@@ -2154,7 +2154,8 @@ vect_create_cond_for_alias_checks (loop_vec_info loop_vinfo, tree * cond_expr)
 
 void
 vect_loop_versioning (loop_vec_info loop_vinfo,
-                     unsigned int th, bool check_profitability)
+                     unsigned int th, bool check_profitability,
+                     poly_uint64 versioning_threshold)
 {
   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo), *nloop;
   struct loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
@@ -2179,6 +2180,17 @@ vect_loop_versioning (loop_vec_info loop_vinfo,
     cond_expr = fold_build2 (GE_EXPR, boolean_type_node, scalar_loop_iters,
                             build_int_cst (TREE_TYPE (scalar_loop_iters),
                                            th - 1));
+  if (maybe_ne (versioning_threshold, 0U))
+    {
+      tree expr = fold_build2 (GE_EXPR, boolean_type_node, scalar_loop_iters,
+                              build_int_cst (TREE_TYPE (scalar_loop_iters),
+                                             versioning_threshold - 1));
+      if (cond_expr)
+       cond_expr = fold_build2 (BIT_AND_EXPR, boolean_type_node,
+                                expr, cond_expr);
+      else
+       cond_expr = expr;
+    }
 
   if (version_niter)
     vect_create_cond_for_niters_checks (loop_vinfo, &cond_expr);
index 58e46a0f4d30d9bdf3f92e8c32eef8d8c028f693..810fa5f3ce9f225d2e22b4372e39c4f4eff5a68f 100644 (file)
@@ -1112,6 +1112,7 @@ _loop_vec_info::_loop_vec_info (struct loop *loop_in)
     num_iters_unchanged (NULL_TREE),
     num_iters_assumptions (NULL_TREE),
     th (0),
+    versioning_threshold (0),
     vectorization_factor (0),
     max_vectorization_factor (0),
     unaligned_dr (NULL),
@@ -2176,11 +2177,9 @@ start_over:
      enough for both peeled prolog loop and vector loop.  This check
      can be merged along with threshold check of loop versioning, so
      increase threshold for this case if necessary.  */
-  if (LOOP_REQUIRES_VERSIONING (loop_vinfo)
-      && (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
-         || LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo)))
+  if (LOOP_REQUIRES_VERSIONING (loop_vinfo))
     {
-      unsigned niters_th;
+      poly_uint64 niters_th;
 
       /* Niters for peeled prolog loop.  */
       if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) < 0)
@@ -2197,9 +2196,8 @@ start_over:
       niters_th += LOOP_VINFO_VECT_FACTOR (loop_vinfo);
       /* One additional iteration because of peeling for gap.  */
       if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
-       niters_th++;
-      if (LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo) < niters_th)
-       LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo) = niters_th;
+       niters_th += 1;
+      LOOP_VINFO_VERSIONING_THRESHOLD (loop_vinfo) = niters_th;
     }
 
   gcc_assert (vectorization_factor
@@ -2302,6 +2300,7 @@ again:
   LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = false;
   LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = false;
   LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo) = 0;
+  LOOP_VINFO_VERSIONING_THRESHOLD (loop_vinfo) = 0;
 
   goto start_over;
 }
@@ -7380,7 +7379,17 @@ vect_transform_loop (loop_vec_info loop_vinfo)
 
   if (LOOP_REQUIRES_VERSIONING (loop_vinfo))
     {
-      vect_loop_versioning (loop_vinfo, th, check_profitability);
+      poly_uint64 versioning_threshold
+       = LOOP_VINFO_VERSIONING_THRESHOLD (loop_vinfo);
+      if (check_profitability
+         && ordered_p (poly_uint64 (th), versioning_threshold))
+       {
+         versioning_threshold = ordered_max (poly_uint64 (th),
+                                             versioning_threshold);
+         check_profitability = false;
+       }
+      vect_loop_versioning (loop_vinfo, th, check_profitability,
+                           versioning_threshold);
       check_profitability = false;
     }
 
index 56811e5e14b419f5196858fc36e2c907f85d9a3e..8291a71a0574a791a95706f99407f7ae64921804 100644 (file)
@@ -238,6 +238,12 @@ typedef struct _loop_vec_info : public vec_info {
      PARAM_MIN_VECT_LOOP_BOUND.  */
   unsigned int th;
 
+  /* When applying loop versioning, the vector form should only be used
+     if the number of scalar iterations is >= this value, on top of all
+     the other requirements.  Ignored when loop versioning is not being
+     used.  */
+  poly_uint64 versioning_threshold;
+
   /* Unrolling factor  */
   int vectorization_factor;
 
@@ -357,6 +363,7 @@ typedef struct _loop_vec_info : public vec_info {
 #define LOOP_VINFO_NITERS_UNCHANGED(L)     (L)->num_iters_unchanged
 #define LOOP_VINFO_NITERS_ASSUMPTIONS(L)   (L)->num_iters_assumptions
 #define LOOP_VINFO_COST_MODEL_THRESHOLD(L) (L)->th
+#define LOOP_VINFO_VERSIONING_THRESHOLD(L) (L)->versioning_threshold
 #define LOOP_VINFO_VECTORIZABLE_P(L)       (L)->vectorizable
 #define LOOP_VINFO_VECT_FACTOR(L)          (L)->vectorization_factor
 #define LOOP_VINFO_MAX_VECT_FACTOR(L)      (L)->max_vectorization_factor
@@ -1142,7 +1149,8 @@ extern void slpeel_make_loop_iterate_ntimes (struct loop *, tree);
 extern bool slpeel_can_duplicate_loop_p (const struct loop *, const_edge);
 struct loop *slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *,
                                                     struct loop *, edge);
-extern void vect_loop_versioning (loop_vec_info, unsigned int, bool);
+extern void vect_loop_versioning (loop_vec_info, unsigned int, bool,
+                                 poly_uint64);
 extern struct loop *vect_do_peeling (loop_vec_info, tree, tree,
                                     tree *, int, bool, bool);
 extern source_location find_loop_location (struct loop *);