SLP: fix SVE issues
authorMartin Liska <mliska@suse.cz>
Mon, 12 Oct 2020 10:54:31 +0000 (12:54 +0200)
committerMartin Liska <mliska@suse.cz>
Mon, 12 Oct 2020 13:23:20 +0000 (15:23 +0200)
The patch fixes the following 2 issues:

  .MASK_STORE_LANES (&a, 4B, max_mask_34, vect_array.12);

here we miss to return the last argument as stored value.

ivtmp_32 = ivtmp_31 + POLY_INT_CST [4, 4];

here we miss a bail out in vect_recog_over_widening_pattern.

gcc/ChangeLog:

PR tree-optimization/97079
* internal-fn.c (internal_fn_stored_value_index): Handle also
.MASK_STORE_LANES.
* tree-vect-patterns.c (vect_recog_over_widening_pattern): Bail
out for unsupported TREE_TYPE.

gcc/testsuite/ChangeLog:

PR tree-optimization/97079
* gcc.target/aarch64/sve/pr97079.c: New test.

gcc/internal-fn.c
gcc/testsuite/gcc.target/aarch64/sve/pr97079.c [new file with mode: 0644]
gcc/tree-vect-patterns.c

index 92cb3cd845a6078c6ec833e512ca54443d970fe5..792d2ca568a4a11c3e3ad8abf6443f173b0650a7 100644 (file)
@@ -3795,6 +3795,7 @@ internal_fn_stored_value_index (internal_fn fn)
   switch (fn)
     {
     case IFN_MASK_STORE:
+    case IFN_MASK_STORE_LANES:
     case IFN_SCATTER_STORE:
     case IFN_MASK_SCATTER_STORE:
     case IFN_LEN_STORE:
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr97079.c b/gcc/testsuite/gcc.target/aarch64/sve/pr97079.c
new file mode 100644 (file)
index 0000000..06e6a7c
--- /dev/null
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=armv8.2-a+sve" } */
+
+void g(void);
+
+int a[8][3];
+int b;
+void c(void)
+{
+  int d[] = {7, 3};
+  int *e = a[0];
+  int f;
+  b = 0;
+  for (; b < 8; b++)
+    {
+      f = 0;
+      for (; f < 3; f++)
+       a[b][f] = 0;
+    }
+  g();
+  *e = (long)d;
+}
index bc5f2e6500172ab74d6a9e518ab2bb153db9faaa..ac56acebe016058cbbc9599cef348ec4211c19d6 100644 (file)
@@ -1638,6 +1638,8 @@ vect_recog_over_widening_pattern (vec_info *vinfo,
              single_use_p |= op_single_use_p;
            }
        }
+      else
+       return NULL;
     }
 
   /* Although the operation could be done in operation_precision, we have