fix scalar BB vectorization costing
authorRichard Biener <rguenther@suse.de>
Fri, 3 Jul 2020 09:28:53 +0000 (11:28 +0200)
committerRichard Biener <rguenther@suse.de>
Fri, 3 Jul 2020 10:54:12 +0000 (12:54 +0200)
We were costing the scalar pattern stmts rather than the scalar
original stmt and also not appropriately looking at the pattern
stmt for whether the stmt is vectorized.

2020-07-03  Richard Biener  <rguenther@suse.de>

* tree-vect-slp.c (vect_bb_slp_scalar_cost): Cost the
original non-pattern stmts, look at the pattern stmt
vectorization status.

* gcc.dg/vect/costmodel/x86_64/costmodel-vect-slp-2.c: New
testcase.

gcc/testsuite/gcc.dg/vect/costmodel/x86_64/costmodel-vect-slp-2.c [new file with mode: 0644]
gcc/tree-vect-slp.c

diff --git a/gcc/testsuite/gcc.dg/vect/costmodel/x86_64/costmodel-vect-slp-2.c b/gcc/testsuite/gcc.dg/vect/costmodel/x86_64/costmodel-vect-slp-2.c
new file mode 100644 (file)
index 0000000..1b7ac34
--- /dev/null
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-fdump-tree-slp-details" } */
+
+int a[4], b[4];
+void foo()
+{
+  a[0] = b[0] / 7;
+  a[1] = b[1] / 7;
+  a[2] = b[2] / 7;
+  a[3] = b[3] / 7;
+}
+
+/* We should cost the original division stmt, not the scalar pattern stmts.  */
+/* { dg-final { scan-tree-dump-times " / 7 1 times scalar_stmt costs" 4 "slp2" } } */
index 727eba0b12f7e3eb55de419a4c979a392229c720..33fc87a9f861d3e92c7f1c8ad5844ee782507ad9 100644 (file)
@@ -3039,7 +3039,6 @@ vect_bb_slp_scalar_cost (vec_info *vinfo,
 
   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
     {
-      gimple *stmt = stmt_info->stmt;
       ssa_op_iter op_iter;
       def_operand_p def_p;
 
@@ -3051,7 +3050,9 @@ vect_bb_slp_scalar_cost (vec_info *vinfo,
         required defs in the SLP children in the scalar cost.  This
         way we make the vectorization more costly when compared to
         the scalar cost.  */
-      FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
+      stmt_vec_info orig_stmt_info = vect_orig_stmt (stmt_info);
+      gimple *orig_stmt = orig_stmt_info->stmt;
+      FOR_EACH_SSA_DEF_OPERAND (def_p, orig_stmt, op_iter, SSA_OP_DEF)
        {
          imm_use_iterator use_iter;
          gimple *use_stmt;
@@ -3059,7 +3060,8 @@ vect_bb_slp_scalar_cost (vec_info *vinfo,
            if (!is_gimple_debug (use_stmt))
              {
                stmt_vec_info use_stmt_info = vinfo->lookup_stmt (use_stmt);
-               if (!use_stmt_info || !PURE_SLP_STMT (use_stmt_info))
+               if (!use_stmt_info
+                   || !PURE_SLP_STMT (vect_stmt_to_vectorize (use_stmt_info)))
                  {
                    (*life)[i] = true;
                    BREAK_FROM_IMM_USE_STMT (use_iter);
@@ -3070,23 +3072,23 @@ vect_bb_slp_scalar_cost (vec_info *vinfo,
        continue;
 
       /* Count scalar stmts only once.  */
-      if (gimple_visited_p (stmt))
+      if (gimple_visited_p (orig_stmt))
        continue;
-      gimple_set_visited (stmt, true);
+      gimple_set_visited (orig_stmt, true);
 
       vect_cost_for_stmt kind;
-      if (STMT_VINFO_DATA_REF (stmt_info))
-        {
-          if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
+      if (STMT_VINFO_DATA_REF (orig_stmt_info))
+       {
+         if (DR_IS_READ (STMT_VINFO_DATA_REF (orig_stmt_info)))
            kind = scalar_load;
-          else
+         else
            kind = scalar_store;
-        }
-      else if (vect_nop_conversion_p (stmt_info))
+       }
+      else if (vect_nop_conversion_p (orig_stmt_info))
        continue;
       else
        kind = scalar_stmt;
-      record_stmt_cost (cost_vec, 1, kind, stmt_info, 0, vect_body);
+      record_stmt_cost (cost_vec, 1, kind, orig_stmt_info, 0, vect_body);
     }
 
   auto_vec<bool, 20> subtree_life;