re PR tree-optimization/92645 (Hand written vector code is 450 times slower when...
authorRichard Biener <rguenther@suse.de>
Wed, 27 Nov 2019 08:56:23 +0000 (08:56 +0000)
committerRichard Biener <rguenth@gcc.gnu.org>
Wed, 27 Nov 2019 08:56:23 +0000 (08:56 +0000)
2019-11-27  Richard Biener  <rguenther@suse.de>

PR tree-optimization/92645
* tree-ssa-forwprop.c (simplify_vector_constructor): Handle
CTORs with just a subset of the original vectors.

* gcc.target/i386/pr92645-2.c: New testcase.
* gcc.target/i386/pr92645-3.c: Likewise.

From-SVN: r278758

gcc/ChangeLog
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.target/i386/pr92645-2.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/pr92645-3.c [new file with mode: 0644]
gcc/tree-ssa-forwprop.c

index 1235b4411c25cdb6ddf6230a9c8429b2a279a819..78aa3ca818950f9439eedc9fe8b21f825f0f8a4f 100644 (file)
@@ -1,3 +1,9 @@
+2019-11-27  Richard Biener  <rguenther@suse.de>
+
+       PR tree-optimization/92645
+       * tree-ssa-forwprop.c (simplify_vector_constructor): Handle
+       CTORs with just a subset of the original vectors.
+
 2019-11-27  Richard Biener  <rguenther@suse.de>
 
        PR middle-end/92674
index 1b56973b06e44e53bb3d2e70069d91f078352d9d..df74ba61243539b5a0a2d8c1588ec09b92349b21 100644 (file)
@@ -1,3 +1,9 @@
+2019-11-27  Richard Biener  <rguenther@suse.de>
+
+       PR tree-optimization/92645
+       * gcc.target/i386/pr92645-2.c: New testcase.
+       * gcc.target/i386/pr92645-3.c: Likewise.
+
 2019-11-26  Paolo Carlini  <paolo.carlini@oracle.com>
 
        * g++.dg/cpp1z/bool-increment1.C: Test location(s) too.
diff --git a/gcc/testsuite/gcc.target/i386/pr92645-2.c b/gcc/testsuite/gcc.target/i386/pr92645-2.c
new file mode 100644 (file)
index 0000000..d34ed3a
--- /dev/null
@@ -0,0 +1,34 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse2 -fdump-tree-cddce1" } */
+
+typedef int v4si __attribute__((vector_size(16)));
+typedef int v2si __attribute__((vector_size(8)));
+
+void low (v2si *dst, v4si *srcp)
+{
+  v4si src = *srcp;
+  *dst = (v2si) { src[0], src[1] };
+}
+
+void high (v2si *dst, v4si *srcp)
+{
+  v4si src = *srcp;
+  *dst = (v2si) { src[2], src[3] };
+}
+
+void even (v2si *dst, v4si *srcp)
+{
+  v4si src = *srcp;
+  *dst = (v2si) { src[0], src[2] };
+}
+
+void odd (v2si *dst, v4si *srcp)
+{
+  v4si src = *srcp;
+  *dst = (v2si) { src[1], src[3] };
+}
+
+/* { dg-final { scan-tree-dump-times "BIT_FIELD_REF" 4 "cddce1" } } */
+/* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 3 "cddce1" } } */
+/* Ideally highpart extraction would elide the permutation as well.  */
+/* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 2 "cddce1" { xfail *-*-* } } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr92645-3.c b/gcc/testsuite/gcc.target/i386/pr92645-3.c
new file mode 100644 (file)
index 0000000..9c08c9f
--- /dev/null
@@ -0,0 +1,37 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx2 -fdump-tree-cddce1" } */
+
+typedef int v8si __attribute__((vector_size(32)));
+typedef float v4sf __attribute__((vector_size(16)));
+
+void low (v4sf *dst, v8si *srcp)
+{
+  v8si src = *srcp;
+  *dst = (v4sf) { src[0], src[1], src[2], src[3] };
+}
+
+void high (v4sf *dst, v8si *srcp)
+{
+  v8si src = *srcp;
+  *dst = (v4sf) { src[4], src[5], src[6], src[7] };
+}
+
+void even (v4sf *dst, v8si *srcp)
+{
+  v8si src = *srcp;
+  *dst = (v4sf) { src[0], src[2], src[4], src[6] };
+}
+
+void odd (v4sf *dst, v8si *srcp)
+{
+  v8si src = *srcp;
+  *dst = (v4sf) { src[1], src[3], src[5], src[7] };
+}
+
+/* { dg-final { scan-tree-dump-times "BIT_FIELD_REF" 4 "cddce1" } } */
+/* Four conversions, on the smaller vector type, to not convert excess
+   elements.  */
+/* { dg-final { scan-tree-dump-times " = \\\(vector\\\(4\\\) float\\\)" 4 "cddce1" } } */
+/* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 3 "cddce1" } } */
+/* Ideally highpart extraction would elide the VEC_PERM_EXPR as well.  */
+/* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 2 "cddce1" { xfail *-*-* } } } */
index 386406fbb38e46aded92f2bd84f287b0382523d3..7202d4ac554b9a2739687382911e3da475e581c4 100644 (file)
@@ -2037,6 +2037,7 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi)
   tree op, op2, orig[2], type, elem_type;
   unsigned elem_size, i;
   unsigned HOST_WIDE_INT nelts;
+  unsigned HOST_WIDE_INT refnelts;
   enum tree_code conv_code;
   constructor_elt *elt;
   bool maybe_ident;
@@ -2052,7 +2053,6 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi)
   elem_type = TREE_TYPE (type);
   elem_size = TREE_INT_CST_LOW (TYPE_SIZE (elem_type));
 
-  vec_perm_builder sel (nelts, nelts, 1);
   orig[0] = NULL;
   orig[1] = NULL;
   conv_code = ERROR_MARK;
@@ -2061,6 +2061,7 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi)
   tree one_nonconstant = NULL_TREE;
   auto_vec<tree> constants;
   constants.safe_grow_cleared (nelts);
+  auto_vec<std::pair<unsigned, unsigned>, 64> elts;
   FOR_EACH_VEC_SAFE_ELT (CONSTRUCTOR_ELTS (op), i, elt)
     {
       tree ref, op1;
@@ -2079,7 +2080,8 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi)
                                        TREE_TYPE (TREE_TYPE (ref)))
          && known_eq (bit_field_size (op1), elem_size)
          && constant_multiple_p (bit_field_offset (op1),
-                                 elem_size, &elem))
+                                 elem_size, &elem)
+         && TYPE_VECTOR_SUBPARTS (TREE_TYPE (ref)).is_constant (&refnelts))
        {
          unsigned int j;
          for (j = 0; j < 2; ++j)
@@ -2098,11 +2100,9 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi)
          if (j < 2)
            {
              orig[j] = ref;
-             if (j)
-               elem += nelts;
-             if (elem != i)
+             if (elem != i || j != 0)
                maybe_ident = false;
-             sel.quick_push (elem);
+             elts.safe_push (std::make_pair (j, elem));
              continue;
            }
          /* Else fallthru.  */
@@ -2131,28 +2131,41 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi)
          else if (!operand_equal_p (one_nonconstant, elt->value, 0))
            return false;
        }
-      sel.quick_push (i + nelts);
+      elts.safe_push (std::make_pair (1, i));
       maybe_ident = false;
     }
   if (i < nelts)
     return false;
 
   if (! orig[0]
-      || ! VECTOR_TYPE_P (TREE_TYPE (orig[0]))
-      || maybe_ne (TYPE_VECTOR_SUBPARTS (type),
-                  TYPE_VECTOR_SUBPARTS (TREE_TYPE (orig[0]))))
-    return false;
-
-  tree tem;
-  if (conv_code != ERROR_MARK
-      && (! supportable_convert_operation (conv_code, type,
-                                          TREE_TYPE (orig[0]),
-                                          &tem, &conv_code)
-         || conv_code == CALL_EXPR))
+      || ! VECTOR_TYPE_P (TREE_TYPE (orig[0])))
     return false;
+  refnelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (orig[0])).to_constant ();
 
   if (maybe_ident)
     {
+      tree conv_src_type
+       = (nelts != refnelts
+          ? (conv_code != ERROR_MARK
+             ? build_vector_type (TREE_TYPE (TREE_TYPE (orig[0])), nelts)
+             : type)
+          : TREE_TYPE (orig[0]));
+      tree tem;
+      if (conv_code != ERROR_MARK
+         && (!supportable_convert_operation (conv_code, type, conv_src_type,
+                                             &tem, &conv_code)
+             || conv_code == CALL_EXPR))
+       return false;
+      if (nelts != refnelts)
+       {
+         gassign *lowpart
+           = gimple_build_assign (make_ssa_name (conv_src_type),
+                                  build3 (BIT_FIELD_REF, conv_src_type,
+                                          orig[0], TYPE_SIZE (conv_src_type),
+                                          bitsize_zero_node));
+         gsi_insert_before (gsi, lowpart, GSI_SAME_STMT);
+         orig[0] = gimple_assign_lhs (lowpart);
+       }
       if (conv_code == ERROR_MARK)
        gimple_assign_set_rhs_from_tree (gsi, orig[0]);
       else
@@ -2161,34 +2174,66 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi)
     }
   else
     {
-      tree mask_type;
+      tree mask_type, perm_type, conv_src_type;
+      if (orig[1] == error_mark_node && conv_code != ERROR_MARK)
+       {
+         /* ???  For subsetting a larger vector we need to permute the original
+            but then the constants are in the converted type already which is
+            why for that case we first convert and then permute.  */
+         if (nelts != refnelts)
+           return false;
+         conv_src_type = TREE_TYPE (orig[0]);
+         perm_type = type;
+       }
+      else
+       {
+         perm_type = TREE_TYPE (orig[0]);
+         conv_src_type = (nelts == refnelts
+                          ? perm_type
+                          : build_vector_type (TREE_TYPE (perm_type), nelts));
+       }
+      tree tem;
+      if (conv_code != ERROR_MARK
+         && (!supportable_convert_operation (conv_code, type, conv_src_type,
+                                             &tem, &conv_code)
+             || conv_code == CALL_EXPR))
+       return false;
 
-      vec_perm_indices indices (sel, orig[1] ? 2 : 1, nelts);
-      if (!can_vec_perm_const_p (TYPE_MODE (type), indices))
+      /* Now that we know the number of elements of the source build the
+        permute vector.  */
+      vec_perm_builder sel (refnelts, refnelts, 1);
+      for (i = 0; i < elts.length (); ++i)
+       sel.quick_push (elts[i].second + elts[i].first * refnelts);
+      /* And fill the tail with "something".  It's really don't care,
+         and ideally we'd allow VEC_PERM to have a smaller destination
+        vector.  */
+      for (; i < refnelts; ++i)
+       sel.quick_push (i - elts.length ());
+      vec_perm_indices indices (sel, orig[1] ? 2 : 1, refnelts);
+      if (!can_vec_perm_const_p (TYPE_MODE (perm_type), indices))
        return false;
       mask_type
        = build_vector_type (build_nonstandard_integer_type (elem_size, 1),
-                            nelts);
+                            refnelts);
       if (GET_MODE_CLASS (TYPE_MODE (mask_type)) != MODE_VECTOR_INT
          || maybe_ne (GET_MODE_SIZE (TYPE_MODE (mask_type)),
-                      GET_MODE_SIZE (TYPE_MODE (type))))
+                      GET_MODE_SIZE (TYPE_MODE (perm_type))))
        return false;
       op2 = vec_perm_indices_to_tree (mask_type, indices);
       bool convert_orig0 = false;
+      gimple_seq stmts = NULL;
       if (!orig[1])
        orig[1] = orig[0];
       else if (orig[1] == error_mark_node
               && one_nonconstant)
        {
-         gimple_seq seq = NULL;
-         orig[1] = gimple_build_vector_from_val (&seq, UNKNOWN_LOCATION,
-                                                 type, one_nonconstant);
-         gsi_insert_seq_before (gsi, seq, GSI_SAME_STMT);
-         convert_orig0 = true;
+         orig[1] = gimple_build_vector_from_val (&stmts, UNKNOWN_LOCATION,
+                                                 perm_type, one_nonconstant);
+         convert_orig0 = conv_code != ERROR_MARK;
        }
       else if (orig[1] == error_mark_node)
        {
-         tree_vector_builder vec (type, nelts, 1);
+         tree_vector_builder vec (perm_type, nelts, 1);
          for (unsigned i = 0; i < nelts; ++i)
            if (constants[i])
              vec.quick_push (constants[i]);
@@ -2196,30 +2241,29 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi)
              /* ??? Push a don't-care value.  */
              vec.quick_push (one_constant);
          orig[1] = vec.build ();
-         convert_orig0 = true;
+         convert_orig0 = conv_code != ERROR_MARK;
        }
-      if (conv_code == ERROR_MARK)
-       gimple_assign_set_rhs_with_ops (gsi, VEC_PERM_EXPR, orig[0],
-                                       orig[1], op2);
-      else if (convert_orig0)
+      tree res;
+      if (convert_orig0)
        {
-         gimple *conv
-           = gimple_build_assign (make_ssa_name (type), conv_code, orig[0]);
-         orig[0] = gimple_assign_lhs (conv);
-         gsi_insert_before (gsi, conv, GSI_SAME_STMT);
-         gimple_assign_set_rhs_with_ops (gsi, VEC_PERM_EXPR,
-                                         orig[0], orig[1], op2);
+         gcc_assert (nelts == refnelts);
+         res = gimple_build (&stmts, conv_code, type, orig[0]);
+         res = gimple_build (&stmts, VEC_PERM_EXPR, perm_type,
+                             res, orig[1], op2);
        }
       else
        {
-         gimple *perm
-           = gimple_build_assign (make_ssa_name (TREE_TYPE (orig[0])),
-                                  VEC_PERM_EXPR, orig[0], orig[1], op2);
-         orig[0] = gimple_assign_lhs (perm);
-         gsi_insert_before (gsi, perm, GSI_SAME_STMT);
-         gimple_assign_set_rhs_with_ops (gsi, conv_code, orig[0],
-                                         NULL_TREE, NULL_TREE);
+         res = gimple_build (&stmts, VEC_PERM_EXPR, perm_type,
+                             orig[0], orig[1], op2);
+         if (nelts != refnelts)
+           res = gimple_build (&stmts, BIT_FIELD_REF,
+                               conv_code != ERROR_MARK ? conv_src_type : type,
+                               res, TYPE_SIZE (type), bitsize_zero_node);
+         if (conv_code != ERROR_MARK)
+           res = gimple_build (&stmts, conv_code, type, res);
        }
+      gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
+      gimple_assign_set_rhs_with_ops (gsi, SSA_NAME, res);
     }
   update_stmt (gsi_stmt (*gsi));
   return true;