middle-end: optimize slp simplify back to back permutes.
authorTamar Christina <tamar.christina@arm.com>
Thu, 5 Nov 2020 11:46:35 +0000 (11:46 +0000)
committerTamar Christina <tamar.christina@arm.com>
Thu, 5 Nov 2020 11:46:35 +0000 (11:46 +0000)
This optimizes sequential permutes. i.e. if there are two permutes back to back
this function applies the permute of the parent to the child and removed the
parent.

This relies on the materialization point calculation in optimize SLP.

This allows us to remove useless permutes such as

ldr     q0, [x0, x3]
ldr     q2, [x1, x3]
trn1    v1.4s, v0.4s, v0.4s
trn2    v0.4s, v0.4s, v0.4s
trn1    v0.4s, v1.4s, v0.4s
mov     v1.16b, v3.16b
fcmla   v1.4s, v0.4s, v2.4s, #0
fcmla   v1.4s, v0.4s, v2.4s, #90
str     q1, [x2, x3]

from the sequence the vectorizer puts out and give

ldr     q0, [x0, x3]
ldr     q2, [x1, x3]
mov     v1.16b, v3.16b
fcmla   v1.4s, v0.4s, v2.4s, #0
fcmla   v1.4s, v0.4s, v2.4s, #90
str     q1, [x2, x3]

instead.

gcc/ChangeLog:

* tree-vect-slp.c (vect_slp_tree_permute_noop_p): New.
(vect_optimize_slp): Optimize permutes.
(vectorizable_slp_permutation): Fix typo.

gcc/tree-vect-slp.c

index 420c3c93374b788d96779bf0b730d1bc47a98f58..016883a26553e90cd955cf5df6bb0585cb38063d 100644 (file)
@@ -2941,6 +2941,18 @@ vect_optimize_slp (vec_info *vinfo)
            /* For loads simply drop the permutation, the load permutation
               already performs the desired permutation.  */
            ;
+         else if (SLP_TREE_LANE_PERMUTATION (node).exists ())
+           {
+             /* If the node if already a permute node we just need to apply
+                the permutation to the permute node itself.  */
+             if (dump_enabled_p ())
+               dump_printf_loc (MSG_NOTE, vect_location,
+                                "simplifying permute node %p\n",
+                                node);
+
+             vect_slp_permute (perms[perm], SLP_TREE_LANE_PERMUTATION (node),
+                               true);
+           }
          else
            {
              if (dump_enabled_p ())
@@ -5152,7 +5164,7 @@ vectorizable_slp_permutation (vec_info *vinfo, gimple_stmt_iterator *gsi,
          if (dump_enabled_p ())
            dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
                             "permutation requires at "
-                            "least three vectors");
+                            "least three vectors\n");
          gcc_assert (!gsi);
          return false;
        }