re PR tree-optimization/53342 (rnflow.f90 is ~5% slower after revision 187340)
authorRichard Biener <rguenther@suse.de>
Tue, 5 Feb 2013 15:33:35 +0000 (15:33 +0000)
committerRichard Biener <rguenth@gcc.gnu.org>
Tue, 5 Feb 2013 15:33:35 +0000 (15:33 +0000)
2013-02-05  Richard Biener  <rguenther@suse.de>

PR tree-optimization/53342
PR tree-optimization/53185
* tree-vectorizer.h (vect_check_strided_load): Remove.
* tree-vect-data-refs.c (vect_enhance_data_refs_alignment): Do
not disallow peeling for vectorized strided loads.
(vect_check_strided_load): Make static and simplify.
(vect_analyze_data_refs): Adjust.
* tree-vect-stmts.c (vectorizable_load): Handle peeled loops
correctly when vectorizing strided loads.

* gcc.dg/vect/pr53185-2.c: New testcase.

From-SVN: r195759

gcc/ChangeLog
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.dg/vect/pr53185-2.c [new file with mode: 0644]
gcc/tree-vect-data-refs.c
gcc/tree-vect-stmts.c
gcc/tree-vectorizer.h

index 4150bb821f90e8dbd90c529a689e6621f2e221a7..595dad4cf5b7d255ba3cecb042441c6bf5fe504b 100644 (file)
@@ -1,3 +1,15 @@
+2013-02-05  Richard Biener  <rguenther@suse.de>
+
+       PR tree-optimization/53342
+       PR tree-optimization/53185
+       * tree-vectorizer.h (vect_check_strided_load): Remove.
+       * tree-vect-data-refs.c (vect_enhance_data_refs_alignment): Do
+       not disallow peeling for vectorized strided loads.
+       (vect_check_strided_load): Make static and simplify.
+       (vect_analyze_data_refs): Adjust.
+       * tree-vect-stmts.c (vectorizable_load): Handle peeled loops
+       correctly when vectorizing strided loads.
+
 2013-02-05  Richard Biener  <rguenther@suse.de>
 
        * doc/install.texi: Refer to ISL, not PPL.
index a9216474cb8ad44f6d42f214d565af899728d1c6..a9d19c9cfab35797a3962015e80dde4f602954b1 100644 (file)
@@ -1,3 +1,9 @@
+2013-02-05  Richard Biener  <rguenther@suse.de>
+
+       PR tree-optimization/53342
+       PR tree-optimization/53185
+       * gcc.dg/vect/pr53185-2.c: New testcase.
+
 2013-02-05  Jan Hubicka  <jh@suse.cz>
 
        PR tree-optimization/55789
diff --git a/gcc/testsuite/gcc.dg/vect/pr53185-2.c b/gcc/testsuite/gcc.dg/vect/pr53185-2.c
new file mode 100644 (file)
index 0000000..2f9ea16
--- /dev/null
@@ -0,0 +1,27 @@
+void __attribute__((noinline,noclone))
+fn1 (int * __restrict f, int * __restrict d, unsigned short a, int c)
+{
+  unsigned short e;
+  for (e = 0; e < a; ++e)
+    f[e] = d[e * c];
+}
+
+extern void abort (void);
+
+int main ()
+{
+  int a[32], b[3 * 32];
+  int i, off;
+  for (i = 0; i < 3 * 32; ++i)
+    b[i] = i;
+  for (off = 0; off < 8; ++off)
+    {
+      fn1 (&a[off], &b[off], 32 - off, 3);
+      for (i = 0; i < 32 - off; ++i)
+       if (a[off+i] != b[off+i*3])
+         abort ();
+    }
+  return 0;
+}
+
+/* { dg-final { cleanup-tree-dump "vect" } } */
index 8a772750c681c8d7156e3218bad2e3138fbd44db..74d8c3a41fa97dc230a3cd5e5bb564afe297a284 100644 (file)
@@ -1615,18 +1615,6 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
           && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
         continue;
 
-      /* FORNOW: Any strided load prevents peeling.  The induction
-         variable analysis will fail when the prologue loop is generated,
-        and so we can't generate the new base for the pointer.  */
-      if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
-       {
-         if (dump_enabled_p ())
-           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-                             "strided load prevents peeling");
-         do_peeling = false;
-         break;
-       }
-
       /* For invariant accesses there is nothing to enhance.  */
       if (integer_zerop (DR_STEP (dr)))
        continue;
@@ -2890,9 +2878,8 @@ vect_check_gather (gimple stmt, loop_vec_info loop_vinfo, tree *basep,
    This handles ARRAY_REFs (with variant index) and MEM_REFs (with variant
    base pointer) only.  */
 
-bool
-vect_check_strided_load (gimple stmt, loop_vec_info loop_vinfo, tree *basep,
-                        tree *stepp)
+static bool
+vect_check_strided_load (gimple stmt, loop_vec_info loop_vinfo)
 {
   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
@@ -2925,10 +2912,6 @@ vect_check_strided_load (gimple stmt, loop_vec_info loop_vinfo, tree *basep,
       || !simple_iv (loop, loop_containing_stmt (stmt), off, &iv, true))
     return false;
 
-  if (basep)
-    *basep = iv.base;
-  if (stepp)
-    *stepp = iv.step;
   return true;
 }
 
@@ -3473,8 +3456,7 @@ vect_analyze_data_refs (loop_vec_info loop_vinfo,
        {
          bool strided_load = false;
          if (!nested_in_vect_loop_p (loop, stmt))
-           strided_load
-             = vect_check_strided_load (stmt, loop_vinfo, NULL, NULL);
+           strided_load = vect_check_strided_load (stmt, loop_vinfo);
          if (!strided_load)
            {
              if (dump_enabled_p ())
index 1f7da00ffea7bd835ca5e8ead08201cedccb4621..1712d950e609b1356b4b2db0653d668be7de1801 100644 (file)
@@ -4353,7 +4353,6 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
   tree aggr_type;
   tree gather_base = NULL_TREE, gather_off = NULL_TREE;
   tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE;
-  tree stride_base, stride_step;
   int gather_scale = 1;
   enum vect_def_type gather_dt = vect_unknown_def_type;
 
@@ -4462,11 +4461,7 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
        }
     }
   else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
-    {
-      if (!vect_check_strided_load (stmt, loop_vinfo,
-                                   &stride_base, &stride_step))
-       return false;
-    }
+    ;
   else
     {
       negative = tree_int_cst_compare (nested_in_vect_loop
@@ -4674,13 +4669,21 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
       bool insert_after;
       gimple incr;
       tree offvar;
-      tree ref = DR_REF (dr);
       tree ivstep;
       tree running_off;
       vec<constructor_elt, va_gc> *v = NULL;
       gimple_seq stmts = NULL;
+      tree stride_base, stride_step, alias_off;
+
+      gcc_assert (!nested_in_vect_loop);
 
-      gcc_assert (stride_base && stride_step);
+      stride_base
+       = fold_build_pointer_plus
+           (unshare_expr (DR_BASE_ADDRESS (dr)),
+            size_binop (PLUS_EXPR,
+                        convert_to_ptrofftype (unshare_expr (DR_OFFSET (dr))),
+                        convert_to_ptrofftype (DR_INIT(dr))));
+      stride_step = fold_convert (sizetype, unshare_expr (DR_STEP (dr)));
 
       /* For a load with loop-invariant (but other than power-of-2)
          stride (i.e. not a grouped access) like so:
@@ -4716,6 +4719,7 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
 
       prev_stmt_info = NULL;
       running_off = offvar;
+      alias_off = build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0);
       for (j = 0; j < ncopies; j++)
        {
          tree vec_inv;
@@ -4725,33 +4729,16 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
            {
              tree newref, newoff;
              gimple incr;
-             if (TREE_CODE (ref) == ARRAY_REF)
-               {
-                 newref = build4 (ARRAY_REF, TREE_TYPE (ref),
-                                  unshare_expr (TREE_OPERAND (ref, 0)),
-                                  running_off,
-                                  NULL_TREE, NULL_TREE);
-                 if (!useless_type_conversion_p (TREE_TYPE (vectype),
-                                                 TREE_TYPE (newref)))
-                   newref = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype),
-                                    newref);
-               }
-             else
-               newref = build2 (MEM_REF, TREE_TYPE (vectype),
-                                running_off,
-                                TREE_OPERAND (ref, 1));
+             newref = build2 (MEM_REF, TREE_TYPE (vectype),
+                              running_off, alias_off);
 
              newref = force_gimple_operand_gsi (gsi, newref, true,
                                                 NULL_TREE, true,
                                                 GSI_SAME_STMT);
              CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, newref);
              newoff = copy_ssa_name (running_off, NULL);
-             if (POINTER_TYPE_P (TREE_TYPE (newoff)))
-               incr = gimple_build_assign_with_ops (POINTER_PLUS_EXPR, newoff,
-                                                    running_off, stride_step);
-             else
-               incr = gimple_build_assign_with_ops (PLUS_EXPR, newoff,
-                                                    running_off, stride_step);
+             incr = gimple_build_assign_with_ops (POINTER_PLUS_EXPR, newoff,
+                                                  running_off, stride_step);
              vect_finish_stmt_generation (stmt, incr, gsi);
 
              running_off = newoff;
index 6fb9cfd39d8534675b066365539097ac888dda8b..8d3a3def1f4425b12f254d10d66c41aabb9b64a3 100644 (file)
@@ -923,7 +923,6 @@ extern bool vect_analyze_data_ref_accesses (loop_vec_info, bb_vec_info);
 extern bool vect_prune_runtime_alias_test_list (loop_vec_info);
 extern tree vect_check_gather (gimple, loop_vec_info, tree *, tree *,
                               int *);
-extern bool vect_check_strided_load (gimple, loop_vec_info, tree *, tree *);
 extern bool vect_analyze_data_refs (loop_vec_info, bb_vec_info, int *);
 extern tree vect_create_data_ref_ptr (gimple, tree, struct loop *, tree,
                                      tree *, gimple_stmt_iterator *,