middle-end: Move load/store-lanes check till late.
authorTamar Christina <tamar.christina@arm.com>
Wed, 4 Nov 2020 22:33:11 +0000 (22:33 +0000)
committerTamar Christina <tamar.christina@arm.com>
Wed, 4 Nov 2020 22:33:11 +0000 (22:33 +0000)
This moves the code that checks for load/store lanes further in the pipeline and
places it after slp_optimize.  This would allow us to perform optimizations on
the SLP tree and only bail out if we really have a permute.

With this change it allows us to handle permutes such as {1,1,1,1} which should
be handled by a load and replicate.

This change however makes it all or nothing. Either all instances can be handled
or none at all.  This is why some of the test cases have been adjusted.

gcc/ChangeLog:

* tree-vect-slp.c (vect_analyze_slp_instance): Moved load/store lanes
check to ...
* tree-vect-loop.c (vect_analyze_loop_2): ..Here

gcc/testsuite/ChangeLog:

* gcc.dg/vect/slp-11b.c: Update output scan.
* gcc.dg/vect/slp-perm-6.c: Likewise.

gcc/testsuite/gcc.dg/vect/slp-11b.c
gcc/testsuite/gcc.dg/vect/slp-perm-6.c
gcc/tree-vect-loop.c
gcc/tree-vect-slp.c

index 0cc23770badf0e00ef98769a2dd14a92dca32cca..3f16c9cec3e0ab6672d321e1bb265e7853920341 100644 (file)
@@ -45,4 +45,4 @@ int main (void)
 
 /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_strided4 && vect_int_mult } } } } */
 /* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" { target { ! { vect_strided4 && vect_int_mult } } } } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" } } */
index 38489291a2659c989121d44c9e9e7bdfaa12f868..816486a050d4fbf7d3365c829d48175ff7ba7f60 100644 (file)
@@ -106,7 +106,7 @@ int main (int argc, const char* argv[])
 /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target { vect_perm3_int && { {! vect_load_lanes } && {! vect_partial_vectors_usage_1 } } } } } } */
 /* The epilogues are vectorized using partial vectors.  */
 /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 4 "vect" { target { vect_perm3_int && { {! vect_load_lanes } && vect_partial_vectors_usage_1 } } } } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_load_lanes } } } */
-/* { dg-final { scan-tree-dump "Built SLP cancelled: can use load/store-lanes" "vect" { target { vect_perm3_int && vect_load_lanes } } } } */
-/* { dg-final { scan-tree-dump "LOAD_LANES" "vect" { target vect_load_lanes } } } */
-/* { dg-final { scan-tree-dump "STORE_LANES" "vect" { target vect_load_lanes } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target vect_load_lanes } } } */
+/* { dg-final { scan-tree-dump "Built SLP cancelled: can use load/store-lanes" "vect" { xfail { vect_perm3_int && vect_load_lanes } } } } */
+/* { dg-final { scan-tree-dump "LOAD_LANES" "vect" { xfail vect_load_lanes } } } */
+/* { dg-final { scan-tree-dump "STORE_LANES" "vect" { xfail vect_load_lanes } } } */
index 5e7188ab87a0649a1cfba42916a05a816a30f11f..547894c39c8033ff7348718b118864a9ea763494 100644 (file)
@@ -2365,6 +2365,78 @@ start_over:
                                       "unsupported SLP instances\n");
          goto again;
        }
+
+      /* Check whether any load in ALL SLP instances is possibly permuted.  */
+      slp_tree load_node, slp_root;
+      unsigned i, x;
+      slp_instance instance;
+      bool can_use_lanes = true;
+      FOR_EACH_VEC_ELT (LOOP_VINFO_SLP_INSTANCES (loop_vinfo), x, instance)
+       {
+         slp_root = SLP_INSTANCE_TREE (instance);
+         int group_size = SLP_TREE_LANES (slp_root);
+         tree vectype = SLP_TREE_VECTYPE (slp_root);
+         bool loads_permuted = false;
+         FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), i, load_node)
+           {
+             if (!SLP_TREE_LOAD_PERMUTATION (load_node).exists ())
+               continue;
+             unsigned j;
+             stmt_vec_info load_info;
+             FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load_info)
+               if (SLP_TREE_LOAD_PERMUTATION (load_node)[j] != j)
+                 {
+                   loads_permuted = true;
+                   break;
+                 }
+           }
+
+         /* If the loads and stores can be handled with load/store-lane
+            instructions record it and move on to the next instance.  */
+         if (loads_permuted
+             && vect_store_lanes_supported (vectype, group_size, false))
+           {
+             FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), i, load_node)
+               {
+                 stmt_vec_info stmt_vinfo = DR_GROUP_FIRST_ELEMENT
+                     (SLP_TREE_SCALAR_STMTS (load_node)[0]);
+                 /* Use SLP for strided accesses (or if we can't
+                    load-lanes).  */
+                 if (STMT_VINFO_STRIDED_P (stmt_vinfo)
+                     || ! vect_load_lanes_supported
+                           (STMT_VINFO_VECTYPE (stmt_vinfo),
+                            DR_GROUP_SIZE (stmt_vinfo), false))
+                   break;
+               }
+
+             can_use_lanes
+               = can_use_lanes && i == SLP_INSTANCE_LOADS (instance).length ();
+
+             if (can_use_lanes && dump_enabled_p ())
+               dump_printf_loc (MSG_NOTE, vect_location,
+                                "SLP instance %p can use load/store-lanes\n",
+                                instance);
+           }
+         else
+           {
+             can_use_lanes = false;
+             break;
+           }
+       }
+
+      /* If all SLP instances can use load/store-lanes abort SLP and try again
+        with SLP disabled.  */
+      if (can_use_lanes)
+       {
+         ok = opt_result::failure_at (vect_location,
+                                      "Built SLP cancelled: can use "
+                                      "load/store-lanes\n");
+         if (dump_enabled_p ())
+           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+                            "Built SLP cancelled: all SLP instances support "
+                            "load/store-lanes\n");
+         goto again;
+       }
     }
 
   /* Dissolve SLP-only groups.  */
index 11fe685bab8f4099a1f4511dfbf8e64db026b4a9..d498cd466eb7e8d98230f6b662437cf4fa312f27 100644 (file)
@@ -2263,54 +2263,6 @@ vect_build_slp_instance (vec_info *vinfo,
                             "SLP size %u vs. limit %u.\n",
                             tree_size, max_tree_size);
 
-         /* Check whether any load is possibly permuted.  */
-         slp_tree load_node;
-         bool loads_permuted = false;
-         FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (new_instance), i, load_node)
-           {
-             if (!SLP_TREE_LOAD_PERMUTATION (load_node).exists ())
-               continue;
-             unsigned j;
-             stmt_vec_info load_info;
-             FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load_info)
-               if (SLP_TREE_LOAD_PERMUTATION (load_node)[j] != j)
-                 {
-                   loads_permuted = true;
-                   break;
-                 }
-           }
-
-         /* If the loads and stores can be handled with load/store-lane
-            instructions do not generate this SLP instance.  */
-         if (is_a <loop_vec_info> (vinfo)
-             && loads_permuted
-             && kind == slp_inst_kind_store
-             && vect_store_lanes_supported
-                  (STMT_VINFO_VECTYPE (scalar_stmts[0]), group_size, false))
-           {
-             slp_tree load_node;
-             FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (new_instance), i, load_node)
-               {
-                 stmt_vec_info stmt_vinfo = DR_GROUP_FIRST_ELEMENT
-                     (SLP_TREE_SCALAR_STMTS (load_node)[0]);
-                 /* Use SLP for strided accesses (or if we can't load-lanes).  */
-                 if (STMT_VINFO_STRIDED_P (stmt_vinfo)
-                     || ! vect_load_lanes_supported
-                     (STMT_VINFO_VECTYPE (stmt_vinfo),
-                      DR_GROUP_SIZE (stmt_vinfo), false))
-                   break;
-               }
-             if (i == SLP_INSTANCE_LOADS (new_instance).length ())
-               {
-                 if (dump_enabled_p ())
-                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-                                    "Built SLP cancelled: can use "
-                                    "load/store-lanes\n");
-                 vect_free_slp_instance (new_instance);
-                 return false;
-               }
-           }
-
          /* Fixup SLP reduction chains.  */
          if (kind == slp_inst_kind_reduc_chain)
            {