[SLP] SLP vectorization: vectorize vector constructors
authorJoel Hutton <Joel.Hutton@arm.com>
Mon, 4 Nov 2019 16:39:32 +0000 (16:39 +0000)
committerAndre Vieira <avieira@gcc.gnu.org>
Mon, 4 Nov 2019 16:39:32 +0000 (16:39 +0000)
gcc/ChangeLog:
2019-11-04  Joel Hutton  <Joel.Hutton@arm.com>

* expr.c (store_constructor): Modify to handle single element vectors.
* tree-vect-slp.c (vect_analyze_slp_instance): Add case for vector
constructors.
(vect_slp_check_for_constructors): New function.
(vect_slp_analyze_bb_1): Call new function to check for vector
constructors.
(vectorize_slp_instance_root_stmt): New function.
(vect_schedule_slp): Call new function to vectorize root stmt of vector
constructors.
* tree-vectorizer.h (SLP_INSTANCE_ROOT_STMT): New field.

gcc/testsuite/ChangeLog:
2019-11-04  Joel Hutton  <Joel.Hutton@arm.com>

* gcc.dg/vect/bb-slp-40.c: New test.
* gcc.dg/vect/bb-slp-41.c: New test.

From-SVN: r277784

gcc/ChangeLog
gcc/expr.c
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.dg/vect/bb-slp-40.c [new file with mode: 0644]
gcc/testsuite/gcc.dg/vect/bb-slp-41.c [new file with mode: 0644]
gcc/tree-vect-slp.c
gcc/tree-vectorizer.h

index ae177e25f1f345ef1774ea4fd558b62be0f3e028..25af3fc51d694a3a2fa8deb2b6a8ccc33d740613 100644 (file)
@@ -1,3 +1,16 @@
+2019-11-04  Joel Hutton  <Joel.Hutton@arm.com>
+
+       * expr.c (store_constructor): Modify to handle single element vectors.
+       * tree-vect-slp.c (vect_analyze_slp_instance): Add case for vector
+       constructors.
+       (vect_slp_check_for_constructors): New function.
+       (vect_slp_analyze_bb_1): Call new function to check for vector
+       constructors.
+       (vectorize_slp_instance_root_stmt): New function.
+       (vect_schedule_slp): Call new function to vectorize root stmt of vector
+       constructors.
+       * tree-vectorizer.h (SLP_INSTANCE_ROOT_STMT): New field.
+
 2019-11-04  Richard Biener  <rguenther@suse.de>
 
        PR tree-optimization/92345
index 476c6865f20828fc68f455e70d4874eaabd9d08d..12165af21cb0ded943b63e9e503e42e267de8ce5 100644 (file)
@@ -6809,6 +6809,7 @@ store_constructor (tree exp, rtx target, int cleared, poly_int64 size,
            && n_elts.is_constant (&const_n_elts))
          {
            machine_mode emode = eltmode;
+           bool vector_typed_elts_p = false;
 
            if (CONSTRUCTOR_NELTS (exp)
                && (TREE_CODE (TREE_TYPE (CONSTRUCTOR_ELT (exp, 0)->value))
@@ -6819,13 +6820,14 @@ store_constructor (tree exp, rtx target, int cleared, poly_int64 size,
                                      * TYPE_VECTOR_SUBPARTS (etype),
                                      n_elts));
                emode = TYPE_MODE (etype);
+               vector_typed_elts_p = true;
              }
            icode = convert_optab_handler (vec_init_optab, mode, emode);
            if (icode != CODE_FOR_nothing)
              {
                unsigned int n = const_n_elts;
 
-               if (emode != eltmode)
+               if (vector_typed_elts_p)
                  {
                    n = CONSTRUCTOR_NELTS (exp);
                    vec_vec_init_p = true;
index a44eca595dc10282ab123ac0caa7eeca57838193..2059b594e16b6c25cf66b26d7b0cbb1350e6c105 100644 (file)
@@ -1,3 +1,8 @@
+2019-11-04  Joel Hutton  <Joel.Hutton@arm.com>
+
+       * gcc.dg/vect/bb-slp-40.c: New test.
+       * gcc.dg/vect/bb-slp-41.c: New test.
+
 2019-11-04  Richard Biener  <rguenther@suse.de>
 
        PR tree-optimization/92345
diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-40.c b/gcc/testsuite/gcc.dg/vect/bb-slp-40.c
new file mode 100644 (file)
index 0000000..a1dd372
--- /dev/null
@@ -0,0 +1,34 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -fdump-tree-slp-all" } */
+/* { dg-require-effective-target vect_int } */
+
+char g_d[1024], g_s1[1024], g_s2[1024];
+void foo(void)
+{
+    char *d = g_d, *s1 = g_s1, *s2 = g_s2;
+
+    for ( int y = 0; y < 128; y++ )
+    {
+      d[0 ] = s1[0 ] + s2[0 ];
+      d[1 ] = s1[1 ] + s2[1 ];
+      d[2 ] = s1[2 ] + s2[2 ];
+      d[3 ] = s1[3 ] + s2[3 ];
+      d[4 ] = s1[4 ] + s2[4 ];
+      d[5 ] = s1[5 ] + s2[5 ];
+      d[6 ] = s1[6 ] + s2[6 ];
+      d[7 ] = s1[7 ] + s2[7 ];
+      d[8 ] = s1[8 ] + s2[8 ];
+      d[9 ] = s1[9 ] + s2[9 ];
+      d[10] = s1[10] + s2[10];
+      d[11] = s1[11] + s2[11];
+      d[12] = s1[12] + s2[12];
+      d[13] = s1[13] + s2[13];
+      d[14] = s1[14] + s2[14];
+      d[15] = s1[15] + s2[15];
+      d += 16;
+    }
+}
+
+/* See that we vectorize an SLP instance.  */
+/* { dg-final { scan-tree-dump-times "Found vectorizable constructor" 1 "slp1" } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "slp1" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-41.c b/gcc/testsuite/gcc.dg/vect/bb-slp-41.c
new file mode 100644 (file)
index 0000000..b4a7124
--- /dev/null
@@ -0,0 +1,61 @@
+/* { dg-do run } */
+/* { dg-options "-O3 -fdump-tree-slp-all -fno-vect-cost-model" } */
+/* { dg-require-effective-target vect_int } */
+
+#define ARR_SIZE 1000
+
+void foo (int *a, int *b)
+{
+  int i;
+  for (i = 0; i < (ARR_SIZE - 2); ++i)
+    a[i] = b[0] + b[1] + b[i+1] + b[i+2];
+}
+
+void bar (int *a, int *b)
+{
+  int i;
+  for (i = 0; i < (ARR_SIZE - 2); ++i)
+  {
+    a[i] = b[0];
+  }
+  for (i = 0; i < (ARR_SIZE - 2); ++i)
+  {
+    a[i] = a[i] + b[1];
+  }
+  for (i = 0; i < (ARR_SIZE - 2); ++i)
+  {
+    a[i] = a[i] + b[i+1];
+  }
+  for (i = 0; i < (ARR_SIZE - 2); ++i)
+  {
+    a[i] = a[i] + b[i+2];
+  }
+}
+
+int main ()
+{
+  int a1[ARR_SIZE];
+  int a2[ARR_SIZE];
+  int b[ARR_SIZE];
+  int i;
+
+  for (i = 0; i < ARR_SIZE; i++)
+  {
+    a1[i] = 0;
+    a2[i] = 0;
+    b[i]  = i;
+  }
+
+  foo (a1, b);
+  bar (a2, b);
+
+  for (i = 0; i < ARR_SIZE; i++)
+    if (a1[i] != a2[i])
+      return 1;
+
+  return 0;
+
+}
+/* See that we vectorize an SLP instance.  */
+/* { dg-final { scan-tree-dump-times "Found vectorizable constructor" 12 "slp1" } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 4 "slp1" } } */
index 4b1a231bb6ddf60b4e943fe9066fb87f97a4993a..5549d053bf3cd102a4c4fcdc2e890c596927bd55 100644 (file)
@@ -1993,6 +1993,7 @@ vect_analyze_slp_instance (vec_info *vinfo,
   unsigned int i;
   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
   vec<stmt_vec_info> scalar_stmts;
+  bool constructor = false;
 
   if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
     {
@@ -2006,6 +2007,13 @@ vect_analyze_slp_instance (vec_info *vinfo,
       vectype = STMT_VINFO_VECTYPE (stmt_info);
       group_size = REDUC_GROUP_SIZE (stmt_info);
     }
+  else if (is_gimple_assign (stmt_info->stmt)
+           && gimple_assign_rhs_code (stmt_info->stmt) == CONSTRUCTOR)
+    {
+      vectype = TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt));
+      group_size = CONSTRUCTOR_NELTS (gimple_assign_rhs1 (stmt_info->stmt));
+      constructor = true;
+    }
   else
     {
       gcc_assert (is_a <loop_vec_info> (vinfo));
@@ -2053,6 +2061,25 @@ vect_analyze_slp_instance (vec_info *vinfo,
       STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info))
        = STMT_VINFO_REDUC_DEF (vect_orig_stmt (scalar_stmts.last ()));
     }
+  else if (constructor)
+    {
+      tree rhs = gimple_assign_rhs1 (stmt_info->stmt);
+      tree val;
+      FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (rhs), i, val)
+       {
+         if (TREE_CODE (val) == SSA_NAME)
+           {
+             gimple* def = SSA_NAME_DEF_STMT (val);
+             stmt_vec_info def_info = vinfo->lookup_stmt (def);
+             /* Value is defined in another basic block.  */
+             if (!def_info)
+               return false;
+             scalar_stmts.safe_push (def_info);
+           }
+         else
+           return false;
+       }
+    }
   else
     {
       /* Collect reduction statements.  */
@@ -2138,6 +2165,8 @@ vect_analyze_slp_instance (vec_info *vinfo,
          SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
          SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
          SLP_INSTANCE_LOADS (new_instance) = vNULL;
+         SLP_INSTANCE_ROOT_STMT (new_instance) = constructor ? stmt_info : NULL;
+
          vect_gather_slp_loads (new_instance, node);
          if (dump_enabled_p ())
            dump_printf_loc (MSG_NOTE, vect_location,
@@ -2955,6 +2984,43 @@ vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
   return true;
 }
 
+/* Find any vectorizable constructors and add them to the grouped_store
+   array.  */
+
+static void
+vect_slp_check_for_constructors (bb_vec_info bb_vinfo)
+{
+  gimple_stmt_iterator gsi;
+
+  for (gsi = bb_vinfo->region_begin;
+      gsi_stmt (gsi) != gsi_stmt (bb_vinfo->region_end); gsi_next (&gsi))
+    {
+      gimple *stmt = gsi_stmt (gsi);
+
+      if (is_gimple_assign (stmt)
+         && gimple_assign_rhs_code (stmt) == CONSTRUCTOR
+         && TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME
+         && TREE_CODE (TREE_TYPE (gimple_assign_lhs (stmt))) == VECTOR_TYPE)
+       {
+         tree rhs = gimple_assign_rhs1 (stmt);
+
+         if (CONSTRUCTOR_NELTS (rhs) == 0)
+           continue;
+
+         poly_uint64 subparts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (rhs));
+
+         if (maybe_ne (subparts, CONSTRUCTOR_NELTS (rhs)))
+           continue;
+
+         if (dump_enabled_p ())
+           dump_printf_loc (MSG_NOTE, vect_location,
+                            "Found vectorizable constructor: %G\n", stmt);
+         stmt_vec_info stmt_info = bb_vinfo->lookup_stmt (stmt);
+         BB_VINFO_GROUPED_STORES (bb_vinfo).safe_push (stmt_info);
+       }
+    }
+}
+
 /* Check if the region described by BB_VINFO can be vectorized, returning
    true if so.  When returning false, set FATAL to true if the same failure
    would prevent vectorization at other vector sizes, false if it is still
@@ -3002,6 +3068,8 @@ vect_slp_analyze_bb_1 (bb_vec_info bb_vinfo, int n_stmts, bool &fatal)
       return false;
     }
 
+  vect_slp_check_for_constructors (bb_vinfo);
+
   /* If there are no grouped stores in the region there is no need
      to continue with pattern recog as vect_analyze_slp will fail
      anyway.  */
@@ -3058,6 +3126,8 @@ vect_slp_analyze_bb_1 (bb_vec_info bb_vinfo, int n_stmts, bool &fatal)
         relevant.  */
       vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance));
       vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
+      if (SLP_INSTANCE_ROOT_STMT (instance))
+       STMT_SLP_TYPE (SLP_INSTANCE_ROOT_STMT (instance)) = pure_slp;
 
       i++;
     }
@@ -4074,6 +4144,49 @@ vect_remove_slp_scalar_calls (slp_tree node)
   vect_remove_slp_scalar_calls (node, visited);
 }
 
+/* Vectorize the instance root.  */
+
+void
+vectorize_slp_instance_root_stmt (slp_tree node, slp_instance instance)
+{
+  gassign *rstmt;
+
+  if (SLP_TREE_NUMBER_OF_VEC_STMTS (node) == 1)
+    {
+      stmt_vec_info child_stmt_info;
+      int j;
+
+      FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (node), j, child_stmt_info)
+       {
+         tree vect_lhs = gimple_get_lhs (child_stmt_info->stmt);
+         tree root_lhs = gimple_get_lhs (instance->root_stmt->stmt);
+         rstmt = gimple_build_assign (root_lhs, vect_lhs);
+         break;
+       }
+    }
+  else if (SLP_TREE_NUMBER_OF_VEC_STMTS (node) > 1)
+    {
+      int nelts = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
+      stmt_vec_info child_stmt_info;
+      int j;
+      vec<constructor_elt, va_gc> *v;
+      vec_alloc (v, nelts);
+
+      FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (node), j, child_stmt_info)
+       {
+         CONSTRUCTOR_APPEND_ELT (v,
+                                 NULL_TREE,
+                                 gimple_get_lhs (child_stmt_info->stmt));
+       }
+      tree lhs = gimple_get_lhs (instance->root_stmt->stmt);
+      tree rtype = TREE_TYPE (gimple_assign_rhs1 (instance->root_stmt->stmt));
+      tree r_constructor = build_constructor (rtype, v);
+      rstmt = gimple_build_assign (lhs, r_constructor);
+    }
+    gimple_stmt_iterator rgsi = gsi_for_stmt (instance->root_stmt->stmt);
+    gsi_replace (&rgsi, rstmt, true);
+}
+
 /* Generate vector code for all SLP instances in the loop/basic block.  */
 
 void
@@ -4088,9 +4201,13 @@ vect_schedule_slp (vec_info *vinfo)
   slp_instances = vinfo->slp_instances;
   FOR_EACH_VEC_ELT (slp_instances, i, instance)
     {
+      slp_tree node = SLP_INSTANCE_TREE (instance);
       /* Schedule the tree of INSTANCE.  */
-      vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
-                                 instance, bst_map);
+      vect_schedule_slp_instance (node, instance, bst_map);
+
+      if (SLP_INSTANCE_ROOT_STMT (instance))
+       vectorize_slp_instance_root_stmt (node, instance);
+
       if (dump_enabled_p ())
        dump_printf_loc (MSG_NOTE, vect_location,
                          "vectorizing stmts using SLP.\n");
@@ -4119,6 +4236,9 @@ vect_schedule_slp (vec_info *vinfo)
          if (!STMT_VINFO_DATA_REF (store_info))
            break;
 
+         if (SLP_INSTANCE_ROOT_STMT (instance))
+           continue;
+
          store_info = vect_orig_stmt (store_info);
          /* Free the attached stmt_vec_info and remove the stmt.  */
          vinfo->remove_stmt (store_info);
index 71b5f380e2c91a7a551f6e26920bb17809abedf0..0fa64357be2c154662378af85642632aac50c523 100644 (file)
@@ -152,6 +152,10 @@ public:
   /* The root of SLP tree.  */
   slp_tree root;
 
+  /* For vector constructors, the constructor stmt that the SLP tree is built
+     from, NULL otherwise.  */
+  stmt_vec_info root_stmt;
+
   /* Size of groups of scalar stmts that will be replaced by SIMD stmt/s.  */
   unsigned int group_size;
 
@@ -171,6 +175,7 @@ public:
 #define SLP_INSTANCE_GROUP_SIZE(S)               (S)->group_size
 #define SLP_INSTANCE_UNROLLING_FACTOR(S)         (S)->unrolling_factor
 #define SLP_INSTANCE_LOADS(S)                    (S)->loads
+#define SLP_INSTANCE_ROOT_STMT(S)                (S)->root_stmt
 
 #define SLP_TREE_CHILDREN(S)                     (S)->children
 #define SLP_TREE_SCALAR_STMTS(S)                 (S)->stmts