Simplify SLP code wrt SLP_TREE_DEF_TYPE
authorRichard Biener <rguenther@suse.de>
Thu, 4 Jun 2020 12:17:21 +0000 (14:17 +0200)
committerRichard Biener <rguenther@suse.de>
Thu, 4 Jun 2020 12:38:09 +0000 (14:38 +0200)
The following removes the ugly pushing of SLP_TREE_DEF_TYPE to
stmt_infos and instead makes sure to handle invariants fully
in vect_is_simple_use plus adjusting a few places I refrained
from touching when enforcing vector types for them.

It also simplifies building SLP nodes with all external operands
from scalars by not doing that in the parent but instead not
building those from the start.  That also gets rid of
vect_update_all_shared_vectypes.

2020-06-04  Richard Biener  <rguenther@suse.de>

* tree-vect-slp.c (vect_update_all_shared_vectypes): Remove.
(vect_build_slp_tree_2): Simplify building all external op
nodes from scalars.
(vect_slp_analyze_node_operations): Remove push/pop of
STMT_VINFO_DEF_TYPE.
(vect_schedule_slp_instance): Likewise.
* tree-vect-stmts.c (ect_check_store_rhs): Pass in the
stmt_info, use the vect_is_simple_use overload combining
SLP and stmt_info analysis.
(vect_is_simple_cond): Likewise.
(vectorizable_store): Adjust.
(vectorizable_condition): Likewise.
(vect_is_simple_use): Fully handle invariant SLP nodes
here.  Amend stmt_info operand extraction with COND_EXPR
and masked stores.
* tree-vect-loop.c (vectorizable_reduction): Deal with
COND_EXPR representation ugliness.

gcc/tree-vect-loop.c
gcc/tree-vect-slp.c
gcc/tree-vect-stmts.c

index 3c5c0ea9ebcf853729886d5cbf15688b7a86784c..ad26663595c64f4ada8cd6b701613f3cd808b159 100644 (file)
@@ -6197,6 +6197,12 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
       gcc_assert (SLP_TREE_REPRESENTATIVE (slp_for_stmt_info) == stmt_info);
     }
   slp_tree *slp_op = XALLOCAVEC (slp_tree, op_type);
+  /* We need to skip an extra operand for COND_EXPRs with embedded
+     comparison.  */
+  unsigned opno_adjust = 0;
+  if (code == COND_EXPR
+      && COMPARISON_CLASS_P (gimple_assign_rhs1 (stmt)))
+    opno_adjust = 1;
   for (i = 0; i < op_type; i++)
     {
       /* The condition of COND_EXPR is checked in vectorizable_condition().  */
@@ -6207,7 +6213,7 @@ vectorizable_reduction (loop_vec_info loop_vinfo,
       enum vect_def_type dt;
       tree op;
       if (!vect_is_simple_use (loop_vinfo, stmt_info, slp_for_stmt_info,
-                              i, &op, &slp_op[i], &dt, &tem,
+                              i + opno_adjust, &op, &slp_op[i], &dt, &tem,
                               &def_stmt_info))
        {
          if (dump_enabled_p ())
index cc33b64454c0be974e77017ff32588b4da56f7a4..f8b12f0dae9e2275b1cdcce5bc252521825ec172 100644 (file)
@@ -616,26 +616,6 @@ vect_update_shared_vectype (stmt_vec_info stmt_info, tree vectype)
   return false;
 }
 
-/* Try to infer and assign a vector type to all the statements in STMTS.
-   Used only for BB vectorization.  */
-
-static bool
-vect_update_all_shared_vectypes (vec_info *vinfo, vec<stmt_vec_info> stmts)
-{
-  tree vectype, nunits_vectype;
-  if (!vect_get_vector_types_for_stmt (vinfo, stmts[0], &vectype,
-                                      &nunits_vectype, stmts.length ()))
-    return false;
-
-  stmt_vec_info stmt_info;
-  unsigned int i;
-  FOR_EACH_VEC_ELT (stmts, i, stmt_info)
-    if (!vect_update_shared_vectype (stmt_info, vectype))
-      return false;
-
-  return true;
-}
-
 /* Return true if call statements CALL1 and CALL2 are similar enough
    to be combined into the same SLP group.  */
 
@@ -1349,7 +1329,6 @@ vect_build_slp_tree_2 (vec_info *vinfo,
   FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
     {
       slp_tree child;
-      unsigned old_tree_size = this_tree_size;
       unsigned int j;
 
       if (oprnd_info->first_dt == vect_uninitialized_def)
@@ -1376,45 +1355,6 @@ vect_build_slp_tree_2 (vec_info *vinfo,
                                        matches, npermutes,
                                        &this_tree_size, bst_map)) != NULL)
        {
-         /* If we have all children of a non-unary child built up from
-            scalars then just throw that away and build it up this node
-            from scalars.  */
-         if (is_a <bb_vec_info> (vinfo)
-             && SLP_TREE_CHILDREN (child).length () > 1
-             /* ???  Rejecting patterns this way doesn't work.  We'd have to
-                do extra work to cancel the pattern so the uses see the
-                scalar version.  */
-             && !oprnd_info->any_pattern)
-           {
-             slp_tree grandchild;
-
-             FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
-               if (SLP_TREE_DEF_TYPE (grandchild) != vect_external_def)
-                 break;
-             if (!grandchild
-                 && vect_update_all_shared_vectypes (vinfo,
-                                                     oprnd_info->def_stmts))
-               {
-                 /* Roll back.  */
-                 this_tree_size = old_tree_size;
-                 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
-                   vect_free_slp_tree (grandchild, false);
-                 SLP_TREE_CHILDREN (child).truncate (0);
-
-                 if (dump_enabled_p ())
-                   dump_printf_loc (MSG_NOTE, vect_location,
-                                    "Building parent vector operands from "
-                                    "scalars instead\n");
-                 oprnd_info->def_stmts = vNULL;
-                 SLP_TREE_DEF_TYPE (child) = vect_external_def;
-                 SLP_TREE_SCALAR_OPS (child) = oprnd_info->ops;
-                 oprnd_info->ops = vNULL;
-                 ++this_tree_size;
-                 children.safe_push (child);
-                 continue;
-               }
-           }
-
          oprnd_info->def_stmts = vNULL;
          children.safe_push (child);
          continue;
@@ -1434,16 +1374,13 @@ vect_build_slp_tree_2 (vec_info *vinfo,
             do extra work to cancel the pattern so the uses see the
             scalar version.  */
          && !is_pattern_stmt_p (stmt_info)
-         && !oprnd_info->any_pattern
-         && vect_update_all_shared_vectypes (vinfo, oprnd_info->def_stmts))
+         && !oprnd_info->any_pattern)
        {
          if (dump_enabled_p ())
            dump_printf_loc (MSG_NOTE, vect_location,
                             "Building vector operands from scalars\n");
          this_tree_size++;
-         child = vect_create_new_slp_node (oprnd_info->def_stmts, 0);
-         SLP_TREE_DEF_TYPE (child) = vect_external_def;
-         SLP_TREE_SCALAR_OPS (child) = oprnd_info->ops;
+         child = vect_create_new_slp_node (oprnd_info->ops);
          children.safe_push (child);
          oprnd_info->ops = vNULL;
          oprnd_info->def_stmts = vNULL;
@@ -1517,46 +1454,6 @@ vect_build_slp_tree_2 (vec_info *vinfo,
                                            tem, npermutes,
                                            &this_tree_size, bst_map)) != NULL)
            {
-             /* If we have all children of a non-unary child built up from
-                scalars then just throw that away and build it up this node
-                from scalars.  */
-             if (is_a <bb_vec_info> (vinfo)
-                 && SLP_TREE_CHILDREN (child).length () > 1
-                 /* ???  Rejecting patterns this way doesn't work.  We'd have
-                    to do extra work to cancel the pattern so the uses see the
-                    scalar version.  */
-                 && !oprnd_info->any_pattern)
-               {
-                 unsigned int j;
-                 slp_tree grandchild;
-
-                 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
-                   if (SLP_TREE_DEF_TYPE (grandchild) != vect_external_def)
-                     break;
-                 if (!grandchild
-                     && (vect_update_all_shared_vectypes
-                           (vinfo, oprnd_info->def_stmts)))
-                   {
-                     /* Roll back.  */
-                     this_tree_size = old_tree_size;
-                     FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
-                       vect_free_slp_tree (grandchild, false);
-                     SLP_TREE_CHILDREN (child).truncate (0);
-
-                     if (dump_enabled_p ())
-                       dump_printf_loc (MSG_NOTE, vect_location,
-                                        "Building parent vector operands from "
-                                        "scalars instead\n");
-                     oprnd_info->def_stmts = vNULL;
-                     SLP_TREE_DEF_TYPE (child) = vect_external_def;
-                     SLP_TREE_SCALAR_OPS (child) = oprnd_info->ops;
-                     oprnd_info->ops = vNULL;
-                     ++this_tree_size;
-                     children.safe_push (child);
-                     continue;
-                   }
-               }
-
              oprnd_info->def_stmts = vNULL;
              children.safe_push (child);
              continue;
@@ -1575,6 +1472,35 @@ fail:
 
   vect_free_oprnd_info (oprnds_info);
 
+  /* If we have all children of a non-unary child built up from
+     scalars then just throw that away, causing it built up
+     from scalars.  */
+  if (nops > 1
+      && is_a <bb_vec_info> (vinfo)
+      /* ???  Rejecting patterns this way doesn't work.  We'd have to
+        do extra work to cancel the pattern so the uses see the
+        scalar version.  */
+      && !is_pattern_stmt_p (stmt_info))
+    {
+      slp_tree child;
+      unsigned j;
+      FOR_EACH_VEC_ELT (children, j, child)
+       if (SLP_TREE_DEF_TYPE (child) != vect_external_def)
+         break;
+      if (!child)
+       {
+         /* Roll back.  */
+         FOR_EACH_VEC_ELT (children, j, child)
+           vect_free_slp_tree (child, false);
+
+         if (dump_enabled_p ())
+           dump_printf_loc (MSG_NOTE, vect_location,
+                            "Building parent vector operands from "
+                            "scalars instead\n");
+         return NULL;
+       }
+    }
+
   *tree_size += this_tree_size + 1;
   *max_nunits = this_max_nunits;
 
@@ -2817,51 +2743,8 @@ vect_slp_analyze_node_operations (vec_info *vinfo, slp_tree node,
                                           visited, lvisited, cost_vec))
       return false;
 
-  /* ???  We have to catch the case late where two first scalar stmts appear
-     in multiple SLP children with different def type and fail.  Remember
-     original def types first since SLP_TREE_DEF_TYPE doesn't necessarily
-     match it when that is vect_internal_def.  */
-  auto_vec<vect_def_type, 4> dt;
-  dt.safe_grow (SLP_TREE_CHILDREN (node).length ());
-  FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
-    if (SLP_TREE_SCALAR_STMTS (child).length () != 0)
-      dt[j] = STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0]);
-
-  /* Push SLP node def-type to stmt operands.  */
-  FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
-    if (SLP_TREE_DEF_TYPE (child) != vect_internal_def
-       && SLP_TREE_SCALAR_STMTS (child).length () != 0)
-      STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0])
-       = SLP_TREE_DEF_TYPE (child);
-
-  /* Check everything worked out.  */
-  bool res = true;
-  FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
-      if (SLP_TREE_SCALAR_STMTS (child).length () != 0)
-       {
-         if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
-           {
-             if (STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0])
-                 != SLP_TREE_DEF_TYPE (child))
-               res = false;
-           }
-         else if (STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0])
-                  != dt[j])
-           res = false;
-       }
-  if (!res && dump_enabled_p ())
-    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-                    "not vectorized: same operand with different "
-                    "def type in stmt.\n");
-
-  if (res)
-    res = vect_slp_analyze_node_operations_1 (vinfo, node, node_instance,
-                                             cost_vec);
-
-  /* Restore def-types.  */
-  FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
-    if (SLP_TREE_SCALAR_STMTS (child).length () != 0)
-      STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0]) = dt[j];
+  bool res = vect_slp_analyze_node_operations_1 (vinfo, node, node_instance,
+                                                cost_vec);
 
   /* When the node can be vectorized cost invariant nodes it references.
      This is not done in DFS order to allow the refering node
@@ -4038,7 +3921,7 @@ vect_schedule_slp_instance (vec_info *vinfo,
                            slp_tree node, slp_instance instance)
 {
   gimple_stmt_iterator si;
-  int i, j;
+  int i;
   slp_tree child;
 
   /* See if we have already vectorized the node in the graph of the
@@ -4065,15 +3948,6 @@ vect_schedule_slp_instance (vec_info *vinfo,
   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
     vect_schedule_slp_instance (vinfo, child, instance);
 
-  /* Push SLP node def-type to stmts.  */
-  FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
-    if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
-      {
-       stmt_vec_info child_stmt_info;
-       FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, child_stmt_info)
-         STMT_VINFO_DEF_TYPE (child_stmt_info) = SLP_TREE_DEF_TYPE (child);
-      }
-
   stmt_vec_info stmt_info = SLP_TREE_REPRESENTATIVE (node);
 
   /* VECTYPE is the type of the destination.  */
@@ -4172,15 +4046,6 @@ vect_schedule_slp_instance (vec_info *vinfo,
     }
   if (!done_p)
     vect_transform_stmt (vinfo, stmt_info, &si, node, instance);
-
-  /* Restore stmt def-types.  */
-  FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
-    if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
-      {
-       stmt_vec_info child_stmt_info;
-       FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, child_stmt_info)
-         STMT_VINFO_DEF_TYPE (child_stmt_info) = vect_internal_def;
-      }
 }
 
 /* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
index 5548f0d987f430afeefc4650750631ff07e1268c..4cca06ee96e2c7fa251e48d7f7dd27017c3f2236 100644 (file)
@@ -2573,7 +2573,8 @@ vect_check_scalar_mask (vec_info *vinfo, stmt_vec_info stmt_info, tree mask,
    *RHS_VECTYPE_OUT and the type of the store in *VLS_TYPE_OUT.  */
 
 static bool
-vect_check_store_rhs (vec_info *vinfo, stmt_vec_info stmt_info, tree rhs,
+vect_check_store_rhs (vec_info *vinfo, stmt_vec_info stmt_info,
+                     slp_tree slp_node, tree rhs,
                      vect_def_type *rhs_dt_out, tree *rhs_vectype_out,
                      vec_load_store_type *vls_type_out)
 {
@@ -2589,7 +2590,9 @@ vect_check_store_rhs (vec_info *vinfo, stmt_vec_info stmt_info, tree rhs,
 
   enum vect_def_type rhs_dt;
   tree rhs_vectype;
-  if (!vect_is_simple_use (rhs, vinfo, &rhs_dt, &rhs_vectype))
+  slp_tree slp_op;
+  if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0,
+                          &rhs, &slp_op, &rhs_dt, &rhs_vectype))
     {
       if (dump_enabled_p ())
        dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -7487,7 +7490,7 @@ vectorizable_store (vec_info *vinfo,
       return false;
     }
 
-  if (!vect_check_store_rhs (vinfo, stmt_info,
+  if (!vect_check_store_rhs (vinfo, stmt_info, slp_node,
                             op, &rhs_dt, &rhs_vectype, &vls_type))
     return false;
 
@@ -9977,18 +9980,20 @@ vectorizable_load (vec_info *vinfo,
    condition operands are supportable using vec_is_simple_use.  */
 
 static bool
-vect_is_simple_cond (tree cond, vec_info *vinfo, slp_tree slp_node,
-                    tree *comp_vectype, enum vect_def_type *dts,
-                    tree vectype)
+vect_is_simple_cond (tree cond, vec_info *vinfo, stmt_vec_info stmt_info,
+                    slp_tree slp_node, tree *comp_vectype,
+                    enum vect_def_type *dts, tree vectype)
 {
   tree lhs, rhs;
   tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
+  slp_tree slp_op;
 
   /* Mask case.  */
   if (TREE_CODE (cond) == SSA_NAME
       && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond)))
     {
-      if (!vect_is_simple_use (cond, vinfo, &dts[0], comp_vectype)
+      if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0, &cond,
+                              &slp_op, &dts[0], comp_vectype)
          || !*comp_vectype
          || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
        return false;
@@ -10003,7 +10008,8 @@ vect_is_simple_cond (tree cond, vec_info *vinfo, slp_tree slp_node,
 
   if (TREE_CODE (lhs) == SSA_NAME)
     {
-      if (!vect_is_simple_use (lhs, vinfo, &dts[0], &vectype1))
+      if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0,
+                              &lhs, &slp_op, &dts[0], &vectype1))
        return false;
     }
   else if (TREE_CODE (lhs) == INTEGER_CST || TREE_CODE (lhs) == REAL_CST
@@ -10014,7 +10020,8 @@ vect_is_simple_cond (tree cond, vec_info *vinfo, slp_tree slp_node,
 
   if (TREE_CODE (rhs) == SSA_NAME)
     {
-      if (!vect_is_simple_use (rhs, vinfo, &dts[1], &vectype2))
+      if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1,
+                              &rhs, &slp_op, &dts[1], &vectype2))
        return false;
     }
   else if (TREE_CODE (rhs) == INTEGER_CST || TREE_CODE (rhs) == REAL_CST
@@ -10158,21 +10165,17 @@ vectorizable_condition (vec_info *vinfo,
 
   cond_expr = gimple_assign_rhs1 (stmt);
 
-  if (!vect_is_simple_cond (cond_expr, vinfo, slp_node,
+  if (!vect_is_simple_cond (cond_expr, vinfo, stmt_info, slp_node,
                            &comp_vectype, &dts[0], vectype)
       || !comp_vectype)
     return false;
 
-  unsigned slp_adjust = 0;
-  if (slp_node && SLP_TREE_CHILDREN (slp_node).length () == 4)
-    /* ???  Hack.  Hope for COND_EXPR GIMPLE sanitizing or refactor
-       things more...  */
-    slp_adjust = 1;
+  unsigned op_adjust = COMPARISON_CLASS_P (cond_expr) ? 1 : 0;
   slp_tree then_slp_node, else_slp_node;
-  if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1 + slp_adjust,
+  if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1 + op_adjust,
                           &then_clause, &then_slp_node, &dts[2], &vectype1))
     return false;
-  if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 2 + slp_adjust,
+  if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 2 + op_adjust,
                           &else_clause, &else_slp_node, &dts[3], &vectype2))
     return false;
 
@@ -10301,7 +10304,7 @@ vectorizable_condition (vec_info *vinfo,
       if (slp_node
          && (!vect_maybe_update_slp_op_vectype
                 (SLP_TREE_CHILDREN (slp_node)[0], comp_vectype)
-             || (slp_adjust == 1
+             || (op_adjust == 1
                  && !vect_maybe_update_slp_op_vectype
                        (SLP_TREE_CHILDREN (slp_node)[1], comp_vectype))
              || !vect_maybe_update_slp_op_vectype (then_slp_node, vectype)
@@ -11853,19 +11856,40 @@ vect_is_simple_use (vec_info *vinfo, stmt_vec_info stmt, slp_tree slp_node,
       if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
        *op = gimple_get_lhs (SLP_TREE_SCALAR_STMTS (child)[0]->stmt);
       else
-       *op = SLP_TREE_SCALAR_OPS (child)[0];
+       {
+         if (def_stmt_info_out)
+           *def_stmt_info_out = NULL;
+         *op = SLP_TREE_SCALAR_OPS (child)[0];
+         *dt = SLP_TREE_DEF_TYPE (child);
+         *vectype = SLP_TREE_VECTYPE (child);
+         return true;
+       }
     }
   else
     {
       if (gassign *ass = dyn_cast <gassign *> (stmt->stmt))
        {
-         *op = gimple_op (ass, operand + 1);
-         /* ???  Ick.  But it will vanish with SLP only.  */
-         if (TREE_CODE (*op) == VIEW_CONVERT_EXPR)
-           *op = TREE_OPERAND (*op, 0);
+         if (gimple_assign_rhs_code (ass) == COND_EXPR
+             && COMPARISON_CLASS_P (gimple_assign_rhs1 (ass)))
+           {
+             if (operand < 2)
+               *op = TREE_OPERAND (gimple_assign_rhs1 (ass), operand);
+             else
+               *op = gimple_op (ass, operand);
+           }
+         else if (gimple_assign_rhs_code (ass) == VIEW_CONVERT_EXPR)
+           *op = TREE_OPERAND (gimple_assign_rhs1 (ass), 0);
+         else
+           *op = gimple_op (ass, operand + 1);
        }
       else if (gcall *call = dyn_cast <gcall *> (stmt->stmt))
-       *op = gimple_call_arg (call, operand);
+       {
+         if (gimple_call_internal_p (call)
+             && internal_store_fn_p (gimple_call_internal_fn (call)))
+           operand = internal_fn_stored_value_index (gimple_call_internal_fn
+                                                                       (call));
+         *op = gimple_call_arg (call, operand);
+       }
       else
        gcc_unreachable ();
     }