tree-optimization/97334 - improve BB SLP discovery
authorRichard Biener <rguenther@suse.de>
Fri, 9 Oct 2020 10:24:46 +0000 (12:24 +0200)
committerRichard Biener <rguenther@suse.de>
Fri, 9 Oct 2020 11:15:10 +0000 (13:15 +0200)
We're running into a multiplication with one unvectorizable
operand we expect to build from scalars but SLP discovery
fatally fails the build of both since one stmt is commutated:

  _60 = _58 * _59;
  _63 = _59 * _62;
  _66 = _59 * _65;
...

where _59 is the "bad" operand.  The following patch makes the
case work where the first stmt has a good operand by not fatally
failing the SLP build for the operand but communicating upwards
how to commutate.

2020-10-09  Richard Biener  <rguenther@suse.de>

PR tree-optimization/97334
* tree-vect-slp.c (vect_build_slp_tree_1): Do not fatally
fail lanes other than zero when BB vectorizing.

* gcc.dg/vect/bb-slp-pr65935.c: Amend.

gcc/testsuite/gcc.dg/vect/bb-slp-pr65935.c
gcc/tree-vect-slp.c

index 4e3448eccd7cd4cdb6448aea5038a3e650f597c6..ea37e4e614cecbf4a13ea48f2b8cee8698f2ac62 100644 (file)
@@ -60,3 +60,6 @@ int main()
 /* We should also be able to use 2-lane SLP to initialize the real and
    imaginary components in the first loop of main.  */
 /* { dg-final { scan-tree-dump-times "optimized: basic block" 10 "slp1" } } */
+/* We should see the s->phase[dir] operand and only that operand built
+   from scalars.  See PR97334.  */
+/* { dg-final { scan-tree-dump-times "Building vector operands from scalars" 1 "slp1" } } */
index 479c3eeaec7c926d99f7fe89d32eecab108ecf58..495fb970e24c8c50685abf772db0692455e573dc 100644 (file)
@@ -773,6 +773,12 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap,
            dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
                             "Build SLP failed: unvectorizable statement %G",
                             stmt);
+         /* ???  For BB vectorization we want to commutate operands in a way
+            to shuffle all unvectorizable defs into one operand and have
+            the other still vectorized.  The following doesn't reliably
+            work for this though but it's the easiest we can do here.  */
+         if (is_a <bb_vec_info> (vinfo) && i != 0)
+           continue;
          /* Fatal mismatch.  */
          matches[0] = false;
           return false;
@@ -785,6 +791,8 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap,
            dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
                             "Build SLP failed: not GIMPLE_ASSIGN nor "
                             "GIMPLE_CALL %G", stmt);
+         if (is_a <bb_vec_info> (vinfo) && i != 0)
+           continue;
          /* Fatal mismatch.  */
          matches[0] = false;
          return false;
@@ -797,6 +805,8 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap,
              && !vect_record_max_nunits (vinfo, stmt_info, group_size,
                                          nunits_vectype, max_nunits)))
        {
+         if (is_a <bb_vec_info> (vinfo) && i != 0)
+           continue;
          /* Fatal mismatch.  */
          matches[0] = false;
          return false;
@@ -823,6 +833,8 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap,
                dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
                                 "Build SLP failed: unsupported call type %G",
                                 call_stmt);
+             if (is_a <bb_vec_info> (vinfo) && i != 0)
+               continue;
              /* Fatal mismatch.  */
              matches[0] = false;
              return false;
@@ -865,6 +877,8 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap,
                      if (dump_enabled_p ())
                        dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
                                         "Build SLP failed: no optab.\n");
+                     if (is_a <bb_vec_info> (vinfo) && i != 0)
+                       continue;
                      /* Fatal mismatch.  */
                      matches[0] = false;
                      return false;
@@ -876,6 +890,8 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap,
                        dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
                                         "Build SLP failed: "
                                         "op not supported by target.\n");
+                     if (is_a <bb_vec_info> (vinfo) && i != 0)
+                       continue;
                      /* Fatal mismatch.  */
                      matches[0] = false;
                      return false;
@@ -900,6 +916,8 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap,
              if (TREE_CODE (vec) != SSA_NAME
                  || !types_compatible_p (vectype, TREE_TYPE (vec)))
                {
+                 if (is_a <bb_vec_info> (vinfo) && i != 0)
+                   continue;
                  if (dump_enabled_p ())
                    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
                                     "Build SLP failed: "
@@ -1048,6 +1066,8 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap,
                                 "Build SLP failed: not grouped load %G", stmt);
 
              /* FORNOW: Not grouped loads are not supported.  */
+             if (is_a <bb_vec_info> (vinfo) && i != 0)
+               continue;
              /* Fatal mismatch.  */
              matches[0] = false;
              return false;
@@ -1066,6 +1086,8 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap,
                dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
                                 "Build SLP failed: operation unsupported %G",
                                 stmt);
+             if (is_a <bb_vec_info> (vinfo) && i != 0)
+               continue;
              /* Fatal mismatch.  */
              matches[0] = false;
              return false;