re PR tree-optimization/84037 (Speed regression of polyhedron benchmark since r256644)
authorRichard Biener <rguenther@suse.de>
Wed, 7 Feb 2018 15:46:17 +0000 (15:46 +0000)
committerRichard Biener <rguenth@gcc.gnu.org>
Wed, 7 Feb 2018 15:46:17 +0000 (15:46 +0000)
2018-02-07  Richard Biener  <rguenther@suse.de>

PR tree-optimization/84037
* tree-vectorizer.h (struct _loop_vec_info): Add ivexpr_map member.
(cse_and_gimplify_to_preheader): Declare.
(vect_get_place_in_interleaving_chain): Likewise.
* tree-vect-loop.c (_loop_vec_info::_loop_vec_info): Initialize
ivexpr_map.
(_loop_vec_info::~_loop_vec_info): Delete it.
(cse_and_gimplify_to_preheader): New function.
* tree-vect-slp.c (vect_get_place_in_interleaving_chain): Export.
* tree-vect-stmts.c (vectorizable_store): CSE base and steps.
(vectorizable_load): Likewise.  For grouped stores always base
the IV on the first element.
* tree-vect-loop-manip.c (vect_loop_versioning): Unshare versioning
condition before gimplifying.

From-SVN: r257453

gcc/ChangeLog
gcc/tree-vect-loop-manip.c
gcc/tree-vect-loop.c
gcc/tree-vect-slp.c
gcc/tree-vect-stmts.c
gcc/tree-vectorizer.h

index 73c144f5f6ac7a96adfbcc56487335c1c9c6c467..9c4d0e87b87388ce78369e978b0d8ff6e3ef92c6 100644 (file)
@@ -1,3 +1,20 @@
+2018-02-07  Richard Biener  <rguenther@suse.de>
+
+       PR tree-optimization/84037
+       * tree-vectorizer.h (struct _loop_vec_info): Add ivexpr_map member.
+       (cse_and_gimplify_to_preheader): Declare.
+       (vect_get_place_in_interleaving_chain): Likewise.
+       * tree-vect-loop.c (_loop_vec_info::_loop_vec_info): Initialize
+       ivexpr_map.
+       (_loop_vec_info::~_loop_vec_info): Delete it.
+       (cse_and_gimplify_to_preheader): New function.
+       * tree-vect-slp.c (vect_get_place_in_interleaving_chain): Export.
+       * tree-vect-stmts.c (vectorizable_store): CSE base and steps.
+       (vectorizable_load): Likewise.  For grouped stores always base
+       the IV on the first element.
+       * tree-vect-loop-manip.c (vect_loop_versioning): Unshare versioning
+       condition before gimplifying.
+
 2018-02-07  Jakub Jelinek  <jakub@redhat.com>
 
        * tree-eh.c (operation_could_trap_helper_p): Ignore honor_trapv for
index 53684e57e9490f3ffef6eea46f40a08a9a1669ee..96d40c8c4b328145a469e997addaf99862dd55a9 100644 (file)
@@ -3015,7 +3015,8 @@ vect_loop_versioning (loop_vec_info loop_vinfo,
       vect_create_cond_for_alias_checks (loop_vinfo, &cond_expr);
     }
 
-  cond_expr = force_gimple_operand_1 (cond_expr, &gimplify_stmt_list,
+  cond_expr = force_gimple_operand_1 (unshare_expr (cond_expr),
+                                     &gimplify_stmt_list,
                                      is_gimple_condexpr, NULL_TREE);
   gimple_seq_add_seq (&cond_expr_stmt_list, gimplify_stmt_list);
 
index c5301684028562656c951ff1e0d7623ffc96c4ad..3a5114748131a6fdeb73ea1767612144960ea7ee 100644 (file)
@@ -1128,6 +1128,7 @@ _loop_vec_info::_loop_vec_info (struct loop *loop_in)
     unaligned_dr (NULL),
     peeling_for_alignment (0),
     ptr_mask (0),
+    ivexpr_map (NULL),
     slp_unrolling_factor (1),
     single_scalar_iteration_cost (0),
     vectorizable (false),
@@ -1251,10 +1252,38 @@ _loop_vec_info::~_loop_vec_info ()
   free (bbs);
 
   release_vec_loop_masks (&masks);
+  delete ivexpr_map;
 
   loop->aux = NULL;
 }
 
+/* Return an invariant or register for EXPR and emit necessary
+   computations in the LOOP_VINFO loop preheader.  */
+
+tree
+cse_and_gimplify_to_preheader (loop_vec_info loop_vinfo, tree expr)
+{
+  if (is_gimple_reg (expr)
+      || is_gimple_min_invariant (expr))
+    return expr;
+
+  if (! loop_vinfo->ivexpr_map)
+    loop_vinfo->ivexpr_map = new hash_map<tree_operand_hash, tree>;
+  tree &cached = loop_vinfo->ivexpr_map->get_or_insert (expr);
+  if (! cached)
+    {
+      gimple_seq stmts = NULL;
+      cached = force_gimple_operand (unshare_expr (expr),
+                                    &stmts, true, NULL_TREE);
+      if (stmts)
+       {
+         edge e = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
+         gsi_insert_seq_on_edge_immediate (e, stmts);
+       }
+    }
+  return cached;
+}
+
 /* Return true if we can use CMP_TYPE as the comparison type to produce
    all masks required to mask LOOP_VINFO.  */
 
index ca28632d6ba92b234860517d59a124535b67e527..c9f0feac76a032b365cec554556d6799fdb35584 100644 (file)
@@ -188,7 +188,7 @@ vect_free_oprnd_info (vec<slp_oprnd_info> &oprnds_info)
 /* Find the place of the data-ref in STMT in the interleaving chain that starts
    from FIRST_STMT.  Return -1 if the data-ref is not a part of the chain.  */
 
-static int
+int
 vect_get_place_in_interleaving_chain (gimple *stmt, gimple *first_stmt)
 {
   gimple *next_stmt = first_stmt;
index 0a5c4cbacdaf4de624437be259f0cfa2ca850989..64a728eb1294fe579491e95b6f031f5a50dcd4f9 100644 (file)
@@ -6455,7 +6455,6 @@ vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
       tree offvar;
       tree ivstep;
       tree running_off;
-      gimple_seq stmts = NULL;
       tree stride_base, stride_step, alias_off;
       tree vec_oprnd;
       unsigned int g;
@@ -6467,11 +6466,11 @@ vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
 
       stride_base
        = fold_build_pointer_plus
-           (unshare_expr (DR_BASE_ADDRESS (first_dr)),
+           (DR_BASE_ADDRESS (first_dr),
             size_binop (PLUS_EXPR,
-                        convert_to_ptrofftype (unshare_expr (DR_OFFSET (first_dr))),
+                        convert_to_ptrofftype (DR_OFFSET (first_dr)),
                         convert_to_ptrofftype (DR_INIT (first_dr))));
-      stride_step = fold_convert (sizetype, unshare_expr (DR_STEP (first_dr)));
+      stride_step = fold_convert (sizetype, DR_STEP (first_dr));
 
       /* For a store with loop-invariant (but other than power-of-2)
          stride (i.e. not a grouped access) like so:
@@ -6563,15 +6562,15 @@ vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
 
       standard_iv_increment_position (loop, &incr_gsi, &insert_after);
 
+      stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
+      ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
       create_iv (stride_base, ivstep, NULL,
                 loop, &incr_gsi, insert_after,
                 &offvar, NULL);
       incr = gsi_stmt (incr_gsi);
       set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
 
-      stride_step = force_gimple_operand (stride_step, &stmts, true, NULL_TREE);
-      if (stmts)
-       gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
+      stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
 
       prev_stmt_info = NULL;
       alias_off = build_int_cst (ref_type, 0);
@@ -7484,27 +7483,37 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
       tree ivstep;
       tree running_off;
       vec<constructor_elt, va_gc> *v = NULL;
-      gimple_seq stmts = NULL;
       tree stride_base, stride_step, alias_off;
       /* Checked by get_load_store_type.  */
       unsigned int const_nunits = nunits.to_constant ();
+      unsigned HOST_WIDE_INT cst_offset = 0;
 
       gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
       gcc_assert (!nested_in_vect_loop);
 
-      if (slp && grouped_load)
+      if (grouped_load)
        {
          first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
          first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
-         group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
-         ref_type = get_group_alias_ptr_type (first_stmt);
        }
       else
        {
          first_stmt = stmt;
          first_dr = dr;
+       }
+      if (slp && grouped_load)
+       {
+         group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
+         ref_type = get_group_alias_ptr_type (first_stmt);
+       }
+      else
+       {
+         if (grouped_load)
+           cst_offset
+             = (tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)))
+                * vect_get_place_in_interleaving_chain (stmt, first_stmt));
          group_size = 1;
-         ref_type = reference_alias_ptr_type (DR_REF (first_dr));
+         ref_type = reference_alias_ptr_type (DR_REF (dr));
        }
 
       stride_base
@@ -7536,16 +7545,15 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
 
       standard_iv_increment_position (loop, &incr_gsi, &insert_after);
 
-      create_iv (unshare_expr (stride_base), unshare_expr (ivstep), NULL,
+      stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
+      ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
+      create_iv (stride_base, ivstep, NULL,
                 loop, &incr_gsi, insert_after,
                 &offvar, NULL);
       incr = gsi_stmt (incr_gsi);
       set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
 
-      stride_step = force_gimple_operand (unshare_expr (stride_step),
-                                         &stmts, true, NULL_TREE);
-      if (stmts)
-       gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
+      stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
 
       prev_stmt_info = NULL;
       running_off = offvar;
@@ -7634,7 +7642,7 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
          for (i = 0; i < nloads; i++)
            {
              tree this_off = build_int_cst (TREE_TYPE (alias_off),
-                                            group_el * elsz);
+                                            group_el * elsz + cst_offset);
              new_stmt = gimple_build_assign (make_ssa_name (ltype),
                                              build2 (MEM_REF, ltype,
                                                      running_off, this_off));
index 56e875f20caaf8f576f803fe0a093ca9106e851d..33e6a915ea4b19946d6e8590e97688764a22a9f5 100644 (file)
@@ -440,6 +440,9 @@ typedef struct _loop_vec_info : public vec_info {
   /* Cost vector for a single scalar iteration.  */
   auto_vec<stmt_info_for_cost> scalar_cost_vec;
 
+  /* Map of IV base/step expressions to inserted name in the preheader.  */
+  hash_map<tree_operand_hash, tree> *ivexpr_map;
+
   /* The unrolling factor needed to SLP the loop. In case of that pure SLP is
      applied to the loop, i.e., no unrolling is needed, this is 1.  */
   poly_uint64 slp_unrolling_factor;
@@ -1544,6 +1547,7 @@ extern int vect_get_known_peeling_cost (loop_vec_info, int, int *,
                                        stmt_vector_for_cost *,
                                        stmt_vector_for_cost *,
                                        stmt_vector_for_cost *);
+extern tree cse_and_gimplify_to_preheader (loop_vec_info, tree);
 
 /* In tree-vect-slp.c.  */
 extern void vect_free_slp_instance (slp_instance);
@@ -1564,6 +1568,7 @@ extern bool can_duplicate_and_interleave_p (unsigned int, machine_mode,
                                            tree * = NULL, tree * = NULL);
 extern void duplicate_and_interleave (gimple_seq *, tree, vec<tree>,
                                      unsigned int, vec<tree> &);
+extern int vect_get_place_in_interleaving_chain (gimple *, gimple *);
 
 /* In tree-vect-patterns.c.  */
 /* Pattern recognition functions.