re PR tree-optimization/66510 (gcc.target/arm/pr53636.c FAILs after r224221)

author Richard Biener <rguenther@suse.de>

Thu, 18 Jun 2015 09:39:13 +0000 (09:39 +0000)

committer Richard Biener <rguenth@gcc.gnu.org>

Thu, 18 Jun 2015 09:39:13 +0000 (09:39 +0000)
author Richard Biener <rguenther@suse.de>
Thu, 18 Jun 2015 09:39:13 +0000 (09:39 +0000)
committer Richard Biener <rguenth@gcc.gnu.org>
Thu, 18 Jun 2015 09:39:13 +0000 (09:39 +0000)
diff --git a/gcc/ChangeLog b/gcc/ChangeLog

index 17e1c3125fb6374d3cf7ea3427b3e275c629e053..22b3325fffff1f98935730148f456d3fba65d4e5 100644 (file)
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,22 @@
+2015-06-18  Richard Biener  <rguenther@suse.de>
+
+       PR tree-optimization/66510
+       * tree-vect-stmts.c (vectorizable_load): Properly compute the
+       number of vector loads for SLP permuted loads.
+       * tree-vect-data-refs.c (vect_compute_data_ref_alignment): Also
+       check the stride for loop vectorization.
+       (vect_enhance_data_refs_alignment): Deal with SLP adjusted
+       vectorization factor.
+       (vect_analyze_group_access): If the group size is not a power
+       of two require a epilogue loop.
+       * tree-vect-loop.c (vect_analyze_loop_2): Move alignment
+       compute and optimizing and alias test pruning after final
+       vectorization factor computation.
+       * tree-vect-slp.c (vect_build_slp_tree_1): Remove check on
+       vector alignment.
+       (vect_transform_slp_perm_load): Properly compute the original
+       number of vector load stmts.
+
  2015-06-18  Uros Bizjak  <ubizjak@gmail.com>
  
         * doc/invoke.texi (-fsanitize-sections): Split @var to avoid
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog

index bdf4bd2e175750ec026581fa40f9729b47116bd3..80727d1620c8e3a58ac607b09f73ed88c6f8a565 100644 (file)
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,8 @@
+2015-06-18  Richard Biener  <rguenther@suse.de>
+
+       PR tree-optimization/66510
+       * gcc.dg/vect/slp-perm-12.c: New testcase.
+
  2015-06-17  Uros Bizjak  <ubizjak@gmail.com>
  
         * gcc.target/i386/noplt-1.c (dg-do): Fix target selector.
diff --git a/gcc/testsuite/gcc.dg/vect/slp-perm-12.c b/gcc/testsuite/gcc.dg/vect/slp-perm-12.c

new file mode 100644 (file)

index 0000000..4d4c534
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/slp-perm-12.c
@@ -0,0 +1,52 @@
+/* { dg-require-effective-target vect_int } */
+/* { dg-require-effective-target vect_pack_trunc } */
+/* { dg-additional-options "-msse4" { target { i?86-*-* x86_64-*-* } } } */
+
+#include "tree-vect.h"
+
+extern void abort (void);
+
+unsigned char a[64];
+short b[88];
+
+void __attribute__((noinline))
+test(unsigned char * __restrict__ dst, short * __restrict__ tptr)
+{
+  int i;
+  for (i = 0; i < 8; i++)
+    {
+      dst[0] = (tptr[0] - tptr[0 + 3]);
+      dst[1] = (tptr[1] - tptr[1 + 3]);
+      dst[2] = (tptr[2] - tptr[2 + 3]);
+      dst[3] = (tptr[3] - tptr[3 + 3]);
+      dst[4] = (tptr[4] - tptr[4 + 3]);
+      dst[5] = (tptr[5] - tptr[5 + 3]);
+      dst[6] = (tptr[6] - tptr[6 + 3]);
+      dst[7] = (tptr[7] - tptr[7 + 3]);
+      dst += 8;
+      tptr += 11;
+    }
+}
+
+int main()
+{
+  int i;
+
+  check_vect ();
+
+  for (i = 0; i < 88; ++i)
+    {
+      b[i] = i;
+      __asm__ volatile ("");
+    }
+
+  test (a, b);
+
+  for (i = 0; i < 64; ++i)
+    if (a[i] != 253)
+      abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_perm } } } */
diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c

index 3fc1226718098b8df964625d0cdd4a3627857cf8..b626e383f3b57f623636af784c358f85821b1306 100644 (file)
--- a/gcc/tree-vect-data-refs.c
+++ b/gcc/tree-vect-data-refs.c
@@ -691,21 +691,22 @@ vect_compute_data_ref_alignment (struct data_reference *dr)
         }
      }
  
-  /* Similarly, if we're doing basic-block vectorization, we can only use
-     base and misalignment information relative to an innermost loop if the
-     misalignment stays the same throughout the execution of the loop.
-     As above, this is the case if the stride of the dataref evenly divides
-     by the vector size.  */
-  if (!loop)
+  /* Similarly we can only use base and misalignment information relative to
+     an innermost loop if the misalignment stays the same throughout the
+     execution of the loop.  As above, this is the case if the stride of
+     the dataref evenly divides by the vector size.  */
+  else
      {
        tree step = DR_STEP (dr);
+      unsigned vf = loop ? LOOP_VINFO_VECT_FACTOR (loop_vinfo) : 1;
  
        if (tree_fits_shwi_p (step)
-         && tree_to_shwi (step) % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0)
+         && ((tree_to_shwi (step) * vf)
+             % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0))
         {
           if (dump_enabled_p ())
             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-                            "SLP: step doesn't divide the vector-size.\n");
+                            "step doesn't divide the vector-size.\n");
           misalign = NULL_TREE;
         }
      }
@@ -1440,7 +1441,13 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
                   We do this automtically for cost model, since we calculate cost
                   for every peeling option.  */
                if (unlimited_cost_model (LOOP_VINFO_LOOP (loop_vinfo)))
-                possible_npeel_number = vf /nelements;
+               {
+                 if (STMT_SLP_TYPE (stmt_info))
+                   possible_npeel_number
+                     = (vf * GROUP_SIZE (stmt_info)) / nelements;
+                 else
+                   possible_npeel_number = vf / nelements;
+               }
  
                /* Handle the aligned case. We may decide to align some other
                   access, making DR unaligned.  */
@@ -1453,7 +1460,6 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
  
                for (j = 0; j < possible_npeel_number; j++)
                  {
-                  gcc_assert (npeel_tmp <= vf);
                    vect_peeling_hash_insert (loop_vinfo, dr, npeel_tmp);
                    npeel_tmp += nelements;
                  }
@@ -2230,8 +2236,13 @@ vect_analyze_group_access (struct data_reference *dr)
              BB_VINFO_GROUPED_STORES (bb_vinfo).safe_push (stmt);
          }
  
-      /* There is a gap in the end of the group.  */
-      if (groupsize - last_accessed_element > 0 && loop_vinfo)
+      /* If there is a gap in the end of the group or the group size cannot
+         be made a multiple of the vector element count then we access excess
+        elements in the last iteration and thus need to peel that off.  */
+      if (loop_vinfo
+         && (groupsize - last_accessed_element > 0
+             || exact_log2 (groupsize) == -1))
+
         {
           if (dump_enabled_p ())
             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c

index 584a32cb79bf7bbb147f36438b000f2d4986b2ba..4b01ade1398f0ec9843cf579795aa72e228234bc 100644 (file)
--- a/gcc/tree-vect-loop.c
+++ b/gcc/tree-vect-loop.c
@@ -1791,6 +1791,22 @@ vect_analyze_loop_2 (loop_vec_info loop_vinfo)
        return false;
      }
  
+  /* Check the SLP opportunities in the loop, analyze and build SLP trees.  */
+  ok = vect_analyze_slp (loop_vinfo, NULL, n_stmts);
+  if (!ok)
+    return false;
+
+  /* If there are any SLP instances mark them as pure_slp.  */
+  bool slp = vect_make_slp_decision (loop_vinfo);
+  if (slp)
+    {
+      /* Find stmts that need to be both vectorized and SLPed.  */
+      vect_detect_hybrid_slp (loop_vinfo);
+
+      /* Update the vectorization factor based on the SLP decision.  */
+      vect_update_vf_for_slp (loop_vinfo);
+    }
+
    /* Analyze the alignment of the data-refs in the loop.
       Fail if a data reference is found that cannot be vectorized.  */
  
@@ -1830,31 +1846,17 @@ vect_analyze_loop_2 (loop_vec_info loop_vinfo)
        return false;
      }
  
-  /* Check the SLP opportunities in the loop, analyze and build SLP trees.  */
-  ok = vect_analyze_slp (loop_vinfo, NULL, n_stmts);
-  if (ok)
+  if (slp)
      {
-      /* If there are any SLP instances mark them as pure_slp.  */
-      if (vect_make_slp_decision (loop_vinfo))
-       {
-         /* Find stmts that need to be both vectorized and SLPed.  */
-         vect_detect_hybrid_slp (loop_vinfo);
-
-         /* Update the vectorization factor based on the SLP decision.  */
-         vect_update_vf_for_slp (loop_vinfo);
-
-         /* Analyze operations in the SLP instances.  Note this may
-            remove unsupported SLP instances which makes the above
-            SLP kind detection invalid.  */
-         unsigned old_size = LOOP_VINFO_SLP_INSTANCES (loop_vinfo).length ();
-         vect_slp_analyze_operations (LOOP_VINFO_SLP_INSTANCES (loop_vinfo),
-                                      LOOP_VINFO_TARGET_COST_DATA (loop_vinfo));
-         if (LOOP_VINFO_SLP_INSTANCES (loop_vinfo).length () != old_size)
-           return false;
-       }
+      /* Analyze operations in the SLP instances.  Note this may
+        remove unsupported SLP instances which makes the above
+        SLP kind detection invalid.  */
+      unsigned old_size = LOOP_VINFO_SLP_INSTANCES (loop_vinfo).length ();
+      vect_slp_analyze_operations (LOOP_VINFO_SLP_INSTANCES (loop_vinfo),
+                                  LOOP_VINFO_TARGET_COST_DATA (loop_vinfo));
+      if (LOOP_VINFO_SLP_INSTANCES (loop_vinfo).length () != old_size)
+       return false;
      }
-  else
-    return false;
  
    /* Scan all the remaining operations in the loop that are not subject
       to SLP and make sure they are vectorizable.  */
diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c

index e44396af6295bca6eabba3f7fe5888902e7ced45..47d8a42474e9786cbadb19a214798eda4f752a97 100644 (file)
--- a/gcc/tree-vect-slp.c
+++ b/gcc/tree-vect-slp.c
@@ -485,9 +485,8 @@ vect_build_slp_tree_1 (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
    int icode;
    machine_mode optab_op2_mode;
    machine_mode vec_mode;
-  struct data_reference *first_dr;
    HOST_WIDE_INT dummy;
-  gimple first_load = NULL, prev_first_load = NULL, old_first_load = NULL;
+  gimple first_load = NULL, prev_first_load = NULL;
    tree cond;
  
    /* For every stmt in NODE find its def stmt/s.  */
@@ -785,7 +784,6 @@ vect_build_slp_tree_1 (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
                    return false;
                  }
  
-             old_first_load = first_load;
                first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
                if (prev_first_load)
                  {
@@ -809,30 +807,6 @@ vect_build_slp_tree_1 (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
                  }
                else
                  prev_first_load = first_load;
-
-             /* In some cases a group of loads is just the same load
-                repeated N times.  Only analyze its cost once.  */
-              if (first_load == stmt && old_first_load != first_load)
-                {
-                  first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
-                  if (vect_supportable_dr_alignment (first_dr, false)
-                      == dr_unaligned_unsupported)
-                    {
-                      if (dump_enabled_p ())
-                        {
-                          dump_printf_loc (MSG_MISSED_OPTIMIZATION,
-                                          vect_location, 
-                                          "Build SLP failed: unsupported "
-                                          "unaligned load ");
-                          dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
-                                           stmt, 0);
-                          dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
-                        }
-                     /* Fatal mismatch.  */
-                     matches[0] = false;
-                      return false;
-                    }
-                }
             }
          } /* Grouped access.  */
        else
@@ -3201,6 +3175,11 @@ vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain,
    bool needs_first_vector = false;
    machine_mode mode;
  
+  if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
+    return false;
+
+  stmt_info = vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info));
+
    mode = TYPE_MODE (vectype);
  
    if (!can_vec_perm_p (mode, false, NULL))
@@ -3226,8 +3205,10 @@ vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain,
  
    /* The number of vector stmts to generate based only on SLP_NODE_INSTANCE
       unrolling factor.  */
-  orig_vec_stmts_num = group_size *
-                SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance) / nunits;
+  orig_vec_stmts_num
+    = (STMT_VINFO_GROUP_SIZE (stmt_info)
+       * SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance)
+       + nunits - 1) / nunits;
    if (orig_vec_stmts_num == 1)
      only_one_vec = true;
  
@@ -3235,11 +3216,6 @@ vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain,
       relatively to SLP_NODE_INSTANCE unrolling factor.  */
    ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
  
-  if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
-    return false;
-
-  stmt_info = vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info));
-
    /* Generate permutation masks for every NODE. Number of masks for each NODE
       is equal to GROUP_SIZE.
       E.g., we have a group of three nodes with three loads from the same
diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c

index 7ba0d8f64b767721f865b5a0fd75ac1484aea0f7..d4d3b91dd99b47d011ba39d9ffb2d4b5d630b5ae 100644 (file)
--- a/gcc/tree-vect-stmts.c
+++ b/gcc/tree-vect-stmts.c
@@ -6422,7 +6422,13 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
        if (slp)
         {
           grouped_load = false;
-         vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
+         /* For SLP permutation support we need to load the whole group,
+            not only the number of vector stmts the permutation result
+            fits in.  */
+         if (slp_perm)
+           vec_num = (group_size * vf + nunits - 1) / nunits;
+         else
+           vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
           group_gap_adj = vf * group_size - nunits * vec_num;
         }
        else
author	Richard Biener <rguenther@suse.de>
	Thu, 18 Jun 2015 09:39:13 +0000 (09:39 +0000)
committer	Richard Biener <rguenth@gcc.gnu.org>
	Thu, 18 Jun 2015 09:39:13 +0000 (09:39 +0000)
gcc/ChangeLog		patch \| blob \| history
gcc/testsuite/ChangeLog		patch \| blob \| history
gcc/testsuite/gcc.dg/vect/slp-perm-12.c	[new file with mode: 0644]	patch \| blob
gcc/tree-vect-data-refs.c		patch \| blob \| history
gcc/tree-vect-loop.c		patch \| blob \| history
gcc/tree-vect-slp.c		patch \| blob \| history
gcc/tree-vect-stmts.c		patch \| blob \| history