Vectorise conversions between differently-sized integer vectors
authorRichard Sandiford <richard.sandiford@arm.com>
Thu, 14 Nov 2019 15:31:25 +0000 (15:31 +0000)
committerRichard Sandiford <rsandifo@gcc.gnu.org>
Thu, 14 Nov 2019 15:31:25 +0000 (15:31 +0000)
This patch adds AArch64 patterns for converting between 64-bit and
128-bit integer vectors, and makes the vectoriser and expand pass
use them.

2019-11-14  Richard Sandiford  <richard.sandiford@arm.com>

gcc/
* tree-cfg.c (verify_gimple_assign_unary): Handle conversions
between vector types.
* tree-vect-stmts.c (vectorizable_conversion): Extend the
non-widening and non-narrowing path to handle standard
conversion codes, if the target supports them.
* expr.c (convert_move): Try using the extend and truncate optabs
for vectors.
* optabs-tree.c (supportable_convert_operation): Likewise.
* config/aarch64/iterators.md (Vnarroqw): New iterator.
* config/aarch64/aarch64-simd.md (<optab><Vnarrowq><mode>2)
(trunc<mode><Vnarrowq>2): New patterns.

gcc/testsuite/
* gcc.dg/vect/bb-slp-pr69907.c: Do not expect BB vectorization
to fail for aarch64 targets.
* gcc.dg/vect/no-scevccp-outer-12.c: Expect the test to pass
on aarch64 targets.
* gcc.dg/vect/vect-double-reduc-5.c: Likewise.
* gcc.dg/vect/vect-outer-4e.c: Likewise.
* gcc.target/aarch64/vect_mixed_sizes_5.c: New test.
* gcc.target/aarch64/vect_mixed_sizes_6.c: Likewise.
* gcc.target/aarch64/vect_mixed_sizes_7.c: Likewise.
* gcc.target/aarch64/vect_mixed_sizes_8.c: Likewise.
* gcc.target/aarch64/vect_mixed_sizes_9.c: Likewise.
* gcc.target/aarch64/vect_mixed_sizes_10.c: Likewise.
* gcc.target/aarch64/vect_mixed_sizes_11.c: Likewise.
* gcc.target/aarch64/vect_mixed_sizes_12.c: Likewise.
* gcc.target/aarch64/vect_mixed_sizes_13.c: Likewise.

From-SVN: r278245

21 files changed:
gcc/ChangeLog
gcc/config/aarch64/aarch64-simd.md
gcc/config/aarch64/iterators.md
gcc/expr.c
gcc/optabs-tree.c
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.dg/vect/bb-slp-pr69907.c
gcc/testsuite/gcc.dg/vect/no-scevccp-outer-12.c
gcc/testsuite/gcc.dg/vect/vect-double-reduc-5.c
gcc/testsuite/gcc.dg/vect/vect-outer-4e.c
gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_10.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_11.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_12.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_13.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_5.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_6.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_7.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_8.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_9.c [new file with mode: 0644]
gcc/tree-cfg.c
gcc/tree-vect-stmts.c

index 6bb0a80e19db96916a9bcf3f5a4f00a5b2fc3552..2798f159cc0c2997b0db359fde6836a0651c3702 100644 (file)
@@ -1,3 +1,17 @@
+2019-11-14  Richard Sandiford  <richard.sandiford@arm.com>
+
+       * tree-cfg.c (verify_gimple_assign_unary): Handle conversions
+       between vector types.
+       * tree-vect-stmts.c (vectorizable_conversion): Extend the
+       non-widening and non-narrowing path to handle standard
+       conversion codes, if the target supports them.
+       * expr.c (convert_move): Try using the extend and truncate optabs
+       for vectors.
+       * optabs-tree.c (supportable_convert_operation): Likewise.
+       * config/aarch64/iterators.md (Vnarroqw): New iterator.
+       * config/aarch64/aarch64-simd.md (<optab><Vnarrowq><mode>2)
+       (trunc<mode><Vnarrowq>2): New patterns.
+
 2019-11-14  Richard Sandiford  <richard.sandiford@arm.com>
 
        * tree-vect-stmts.c (vect_get_vector_types_for_stmt): Don't
index 6f7fb1c9b0c9dd3f618ea1c04d75072af5f28026..ad4676bc167f08951e693916c7ef796e3501762a 100644 (file)
   "pmull2\\t%0.1q, %1.2d, %2.2d"
   [(set_attr "type" "crypto_pmull")]
 )
+
+;; Sign- or zero-extend a 64-bit integer vector to a 128-bit vector.
+(define_insn "<optab><Vnarrowq><mode>2"
+  [(set (match_operand:VQN 0 "register_operand" "=w")
+       (ANY_EXTEND:VQN (match_operand:<VNARROWQ> 1 "register_operand" "w")))]
+  "TARGET_SIMD"
+  "<su>xtl\t%0.<Vtype>, %1.<Vntype>"
+  [(set_attr "type" "neon_shift_imm_long")]
+)
+
+;; Truncate a 128-bit integer vector to a 64-bit vector.
+(define_insn "trunc<mode><Vnarrowq>2"
+  [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
+       (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")))]
+  "TARGET_SIMD"
+  "xtn\t%0.<Vntype>, %1.<Vtype>"
+  [(set_attr "type" "neon_shift_imm_narrow_q")]
+)
index 2fd5467d32fe44fedd0f45c561f5ceae6361aab9..3e37f80081df9899e103e19b8e2bc96454efd199 100644 (file)
                            (V2DI "V2SI")
                            (DI   "SI")   (SI   "HI")
                            (HI   "QI")])
+(define_mode_attr Vnarrowq [(V8HI "v8qi") (V4SI "v4hi")
+                           (V2DI "v2si")])
 
 ;; Narrowed quad-modes for VQN (Used for XTN2).
 (define_mode_attr VNARROWQ2 [(V8HI "V16QI") (V4SI "V8HI")
index 0fd5890f8b4484b74c7f690fb748ef414c812945..ed50586971f711552060be678c408709dbaf5154 100644 (file)
@@ -250,6 +250,31 @@ convert_move (rtx to, rtx from, int unsignedp)
 
   if (VECTOR_MODE_P (to_mode) || VECTOR_MODE_P (from_mode))
     {
+      if (GET_MODE_UNIT_PRECISION (to_mode)
+         > GET_MODE_UNIT_PRECISION (from_mode))
+       {
+         optab op = unsignedp ? zext_optab : sext_optab;
+         insn_code icode = convert_optab_handler (op, to_mode, from_mode);
+         if (icode != CODE_FOR_nothing)
+           {
+             emit_unop_insn (icode, to, from,
+                             unsignedp ? ZERO_EXTEND : SIGN_EXTEND);
+             return;
+           }
+       }
+
+      if (GET_MODE_UNIT_PRECISION (to_mode)
+         < GET_MODE_UNIT_PRECISION (from_mode))
+       {
+         insn_code icode = convert_optab_handler (trunc_optab,
+                                                  to_mode, from_mode);
+         if (icode != CODE_FOR_nothing)
+           {
+             emit_unop_insn (icode, to, from, TRUNCATE);
+             return;
+           }
+       }
+
       gcc_assert (known_eq (GET_MODE_BITSIZE (from_mode),
                            GET_MODE_BITSIZE (to_mode)));
 
index a5ecbf075b5d0cce6f1dfbcc51399fae87f5e1be..97cc59255e081eded31c53280e5ac308d3d71fd3 100644 (file)
@@ -303,6 +303,20 @@ supportable_convert_operation (enum tree_code code,
       return true;
     }
 
+  if (GET_MODE_UNIT_PRECISION (m1) > GET_MODE_UNIT_PRECISION (m2)
+      && can_extend_p (m1, m2, TYPE_UNSIGNED (vectype_in)))
+    {
+      *code1 = code;
+      return true;
+    }
+
+  if (GET_MODE_UNIT_PRECISION (m1) < GET_MODE_UNIT_PRECISION (m2)
+      && convert_optab_handler (trunc_optab, m1, m2) != CODE_FOR_nothing)
+    {
+      *code1 = code;
+      return true;
+    }
+
   /* Now check for builtin.  */
   if (targetm.vectorize.builtin_conversion
       && targetm.vectorize.builtin_conversion (code, vectype_out, vectype_in))
index c1fa46589420d056523d7d607975addcb86090c0..29da1f347bd9869adf069f00657b2307e2181022 100644 (file)
@@ -1,3 +1,21 @@
+2019-11-14  Richard Sandiford  <richard.sandiford@arm.com>
+
+       * gcc.dg/vect/bb-slp-pr69907.c: Do not expect BB vectorization
+       to fail for aarch64 targets.
+       * gcc.dg/vect/no-scevccp-outer-12.c: Expect the test to pass
+       on aarch64 targets.
+       * gcc.dg/vect/vect-double-reduc-5.c: Likewise.
+       * gcc.dg/vect/vect-outer-4e.c: Likewise.
+       * gcc.target/aarch64/vect_mixed_sizes_5.c: New test.
+       * gcc.target/aarch64/vect_mixed_sizes_6.c: Likewise.
+       * gcc.target/aarch64/vect_mixed_sizes_7.c: Likewise.
+       * gcc.target/aarch64/vect_mixed_sizes_8.c: Likewise.
+       * gcc.target/aarch64/vect_mixed_sizes_9.c: Likewise.
+       * gcc.target/aarch64/vect_mixed_sizes_10.c: Likewise.
+       * gcc.target/aarch64/vect_mixed_sizes_11.c: Likewise.
+       * gcc.target/aarch64/vect_mixed_sizes_12.c: Likewise.
+       * gcc.target/aarch64/vect_mixed_sizes_13.c: Likewise.
+
 2019-11-14  Richard Sandiford  <richard.sandiford@arm.com>
 
        * gcc.dg/vect/vect-outer-4f.c: Expect the test to pass on aarch64
index 85f9a02582f480fc92d4a2b4dea82a358d6e2bb3..813b1af089a81e941663a7219f95d3d5a61c236b 100644 (file)
@@ -18,5 +18,6 @@ void foo(unsigned *p1, unsigned short *p2)
 }
 
 /* Disable for SVE because for long or variable-length vectors we don't
-   get an unrolled epilogue loop.  */
-/* { dg-final { scan-tree-dump "BB vectorization with gaps at the end of a load is not supported" "slp1" { target { ! aarch64_sve } } } } */
+   get an unrolled epilogue loop.  Also disable for AArch64 Advanced SIMD,
+   because there we can vectorize the epilogue using mixed vector sizes.  */
+/* { dg-final { scan-tree-dump "BB vectorization with gaps at the end of a load is not supported" "slp1" { target { ! aarch64*-*-* } } } } */
index e4202b10d064ff74350f629f7d9b8756bc9f8146..b5f8c3c88e4d0a6562ba867ae83c1ab120077111 100644 (file)
@@ -46,4 +46,4 @@ int main (void)
 }
 
 /* Until we support multiple types in the inner loop  */
-/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED." 1 "vect" { xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED." 1 "vect" { xfail { ! aarch64*-*-* } } } } */
index 0ba33895592c46e2f0db495afd21c2fa13af3ae4..079704cee81cc17b882b476c42cbeee0280369cf 100644 (file)
@@ -52,5 +52,5 @@ int main ()
 
 /* Vectorization of loops with multiple types and double reduction is not 
    supported yet.  */       
-/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail { ! aarch64*-*-* } } } } */
       
index 13238dbe2f90d2ebc2389c3059bcea92748c1ebe..e65a092f5bfdcf86ac6bfb97aa4fb65c48920548 100644 (file)
@@ -23,4 +23,4 @@ foo (){
   return;
 }
 
-/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail { ! aarch64*-*-* } } } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_10.c b/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_10.c
new file mode 100644 (file)
index 0000000..81e77a8
--- /dev/null
@@ -0,0 +1,18 @@
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#pragma GCC target "+nosve"
+
+#include <stdint.h>
+
+void
+f (int16_t *x, int16_t *y, uint8_t *z, int n)
+{
+  for (int i = 0; i < n; ++i)
+    {
+      x[i] = z[i];
+      y[i] += y[i - 8];
+    }
+}
+
+/* { dg-final { scan-assembler-times {\tuxtl\tv[0-9]+\.8h, v[0-9]+\.8b\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tadd\tv[0-9]+\.8h,} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_11.c b/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_11.c
new file mode 100644 (file)
index 0000000..d9da6c1
--- /dev/null
@@ -0,0 +1,18 @@
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#pragma GCC target "+nosve"
+
+#include <stdint.h>
+
+void
+f (int32_t *x, int64_t *y, int64_t *z, int n)
+{
+  for (int i = 0; i < n; ++i)
+    {
+      x[i] = z[i];
+      y[i] += y[i - 2];
+    }
+}
+
+/* { dg-final { scan-assembler-times {\txtn\tv[0-9]+\.2s, v[0-9]+\.2d\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tadd\tv[0-9]+\.2d,} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_12.c b/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_12.c
new file mode 100644 (file)
index 0000000..80dab8b
--- /dev/null
@@ -0,0 +1,18 @@
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#pragma GCC target "+nosve"
+
+#include <stdint.h>
+
+void
+f (int16_t *x, int32_t *y, int32_t *z, int n)
+{
+  for (int i = 0; i < n; ++i)
+    {
+      x[i] = z[i];
+      y[i] += y[i - 4];
+    }
+}
+
+/* { dg-final { scan-assembler-times {\txtn\tv[0-9]+\.4h, v[0-9]+\.4s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tadd\tv[0-9]+\.4s,} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_13.c b/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_13.c
new file mode 100644 (file)
index 0000000..655fa7d
--- /dev/null
@@ -0,0 +1,18 @@
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#pragma GCC target "+nosve"
+
+#include <stdint.h>
+
+void
+f (int8_t *x, int16_t *y, int16_t *z, int n)
+{
+  for (int i = 0; i < n; ++i)
+    {
+      x[i] = z[i];
+      y[i] += y[i - 8];
+    }
+}
+
+/* { dg-final { scan-assembler-times {\txtn\tv[0-9]+\.8b, v[0-9]+\.8h\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tadd\tv[0-9]+\.8h,} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_5.c b/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_5.c
new file mode 100644 (file)
index 0000000..ca8a65a
--- /dev/null
@@ -0,0 +1,18 @@
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#pragma GCC target "+nosve"
+
+#include <stdint.h>
+
+void
+f (int64_t *x, int64_t *y, int32_t *z, int n)
+{
+  for (int i = 0; i < n; ++i)
+    {
+      x[i] = z[i];
+      y[i] += y[i - 2];
+    }
+}
+
+/* { dg-final { scan-assembler-times {\tsxtl\tv[0-9]+\.2d, v[0-9]+\.2s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tadd\tv[0-9]+\.2d,} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_6.c b/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_6.c
new file mode 100644 (file)
index 0000000..6c09b5b
--- /dev/null
@@ -0,0 +1,18 @@
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#pragma GCC target "+nosve"
+
+#include <stdint.h>
+
+void
+f (int32_t *x, int32_t *y, int16_t *z, int n)
+{
+  for (int i = 0; i < n; ++i)
+    {
+      x[i] = z[i];
+      y[i] += y[i - 4];
+    }
+}
+
+/* { dg-final { scan-assembler-times {\tsxtl\tv[0-9]+\.4s, v[0-9]+\.4h\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tadd\tv[0-9]+\.4s,} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_7.c b/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_7.c
new file mode 100644 (file)
index 0000000..94a66c5
--- /dev/null
@@ -0,0 +1,18 @@
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#pragma GCC target "+nosve"
+
+#include <stdint.h>
+
+void
+f (int16_t *x, int16_t *y, int8_t *z, int n)
+{
+  for (int i = 0; i < n; ++i)
+    {
+      x[i] = z[i];
+      y[i] += y[i - 8];
+    }
+}
+
+/* { dg-final { scan-assembler-times {\tsxtl\tv[0-9]+\.8h, v[0-9]+\.8b\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tadd\tv[0-9]+\.8h,} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_8.c b/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_8.c
new file mode 100644 (file)
index 0000000..9531966
--- /dev/null
@@ -0,0 +1,18 @@
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#pragma GCC target "+nosve"
+
+#include <stdint.h>
+
+void
+f (int64_t *x, int64_t *y, uint32_t *z, int n)
+{
+  for (int i = 0; i < n; ++i)
+    {
+      x[i] = z[i];
+      y[i] += y[i - 2];
+    }
+}
+
+/* { dg-final { scan-assembler-times {\tuxtl\tv[0-9]+\.2d, v[0-9]+\.2s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tadd\tv[0-9]+\.2d,} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_9.c b/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_9.c
new file mode 100644 (file)
index 0000000..de8f698
--- /dev/null
@@ -0,0 +1,18 @@
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#pragma GCC target "+nosve"
+
+#include <stdint.h>
+
+void
+f (int32_t *x, int32_t *y, uint16_t *z, int n)
+{
+  for (int i = 0; i < n; ++i)
+    {
+      x[i] = z[i];
+      y[i] += y[i - 4];
+    }
+}
+
+/* { dg-final { scan-assembler-times {\tuxtl\tv[0-9]+\.4s, v[0-9]+\.4h\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tadd\tv[0-9]+\.4s,} 1 } } */
index b75fdb2e63fcd14a2196ec0ba739c5866b8cedba..eb268e32b2ddb605d1aa24f827db601758374429 100644 (file)
@@ -3553,6 +3553,24 @@ verify_gimple_assign_unary (gassign *stmt)
     {
     CASE_CONVERT:
       {
+       /* Allow conversions between vectors with the same number of elements,
+          provided that the conversion is OK for the element types too.  */
+       if (VECTOR_TYPE_P (lhs_type)
+           && VECTOR_TYPE_P (rhs1_type)
+           && known_eq (TYPE_VECTOR_SUBPARTS (lhs_type),
+                        TYPE_VECTOR_SUBPARTS (rhs1_type)))
+         {
+           lhs_type = TREE_TYPE (lhs_type);
+           rhs1_type = TREE_TYPE (rhs1_type);
+         }
+       else if (VECTOR_TYPE_P (lhs_type) || VECTOR_TYPE_P (rhs1_type))
+         {
+           error ("invalid vector types in nop conversion");
+           debug_generic_expr (lhs_type);
+           debug_generic_expr (rhs1_type);
+           return true;
+         }
+
        /* Allow conversions from pointer type to integral type only if
           there is no sign or zero extension involved.
           For targets were the precision of ptrofftype doesn't match that
index 5f30d1e84f5fbb64a4fb3a72a9bf1d5ffb461805..a65b4cb6c1a16deefaf3ab1a8ee984bf7e73e446 100644 (file)
@@ -4861,7 +4861,9 @@ vectorizable_conversion (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
   switch (modifier)
     {
     case NONE:
-      if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
+      if (code != FIX_TRUNC_EXPR
+         && code != FLOAT_EXPR
+         && !CONVERT_EXPR_CODE_P (code))
        return false;
       if (supportable_convert_operation (code, vectype_out, vectype_in,
                                         &decl1, &code1))