[AArch64] Use all SVE LD1RQ variants

author Richard Sandiford <richard.sandiford@linaro.org>

Thu, 1 Feb 2018 11:03:36 +0000 (11:03 +0000)

committer Richard Sandiford <rsandifo@gcc.gnu.org>

Thu, 1 Feb 2018 11:03:36 +0000 (11:03 +0000)
author Richard Sandiford <richard.sandiford@linaro.org>
Thu, 1 Feb 2018 11:03:36 +0000 (11:03 +0000)
committer Richard Sandiford <rsandifo@gcc.gnu.org>
Thu, 1 Feb 2018 11:03:36 +0000 (11:03 +0000)
diff --git a/gcc/ChangeLog b/gcc/ChangeLog

index d665f3df3266015179f0780d66cdaae285d6fe1a..36d3f50ce9e3078abe3c78b9e2e6e38eae49573f 100644 (file)
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,12 @@
+2018-02-01  Richard Sandiford  <richard.sandiford@linaro.org>
+
+       * config/aarch64/aarch64-sve.md (sve_ld1rq): Replace with...
+       (*sve_ld1rq<Vesize>): ... this new pattern.  Handle all element sizes,
+       not just bytes.
+       * config/aarch64/aarch64.c (aarch64_expand_sve_widened_duplicate):
+       Remove BSWAP handing for big-endian targets and use the form of
+       LD1RQ appropariate for the mode.
+
  2018-02-01  Richard Sandiford  <richard.sandiford@linaro.org>
  
         * config/aarch64/aarch64.c (aarch64_simd_valid_immediate): Handle
diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md

index ee942dfad789175491b6dcb613181828e1dfe907..068fd8cbf81a15830e8d00541de078eae1303291 100644 (file)
--- a/gcc/config/aarch64/aarch64-sve.md
+++ b/gcc/config/aarch64/aarch64-sve.md
@@ -652,14 +652,14 @@
  ;; Load 128 bits from memory and duplicate to fill a vector.  Since there
  ;; are so few operations on 128-bit "elements", we don't define a VNx1TI
  ;; and simply use vectors of bytes instead.
-(define_insn "sve_ld1rq"
-  [(set (match_operand:VNx16QI 0 "register_operand" "=w")
-       (unspec:VNx16QI
-         [(match_operand:VNx16BI 1 "register_operand" "Upl")
+(define_insn "*sve_ld1rq<Vesize>"
+  [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
+       (unspec:SVE_ALL
+         [(match_operand:<VPRED> 1 "register_operand" "Upl")
            (match_operand:TI 2 "aarch64_sve_ld1r_operand" "Uty")]
           UNSPEC_LD1RQ))]
    "TARGET_SVE"
-  "ld1rqb\t%0.b, %1/z, %2"
+  "ld1rq<Vesize>\t%0.<Vetype>, %1/z, %2"
  )
  
  ;; Implement a predicate broadcast by shifting the low bit of the scalar
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c

index 1278f83aaa9092ab6b3bb999e61744989857e082..ae142b45bdf099ba48a2f1e9deb290e46e9ee0e8 100644 (file)
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -2787,16 +2787,7 @@ aarch64_expand_sve_widened_duplicate (rtx dest, scalar_int_mode src_mode,
        return true;
      }
  
-  /* The bytes are loaded in little-endian order, so do a byteswap on
-     big-endian targets.  */
-  if (BYTES_BIG_ENDIAN)
-    {
-      src = simplify_unary_operation (BSWAP, src_mode, src, src_mode);
-      if (!src)
-       return NULL_RTX;
-    }
-
-  /* Use LD1RQ to load the 128 bits from memory.  */
+  /* Use LD1RQ[BHWD] to load the 128 bits from memory.  */
    src = force_const_mem (src_mode, src);
    if (!src)
      return false;
@@ -2808,8 +2799,12 @@ aarch64_expand_sve_widened_duplicate (rtx dest, scalar_int_mode src_mode,
        src = replace_equiv_address (src, addr);
      }
  
-  rtx ptrue = force_reg (VNx16BImode, CONSTM1_RTX (VNx16BImode));
-  emit_insn (gen_sve_ld1rq (gen_lowpart (VNx16QImode, dest), ptrue, src));
+  machine_mode mode = GET_MODE (dest);
+  unsigned int elem_bytes = GET_MODE_UNIT_SIZE (mode);
+  machine_mode pred_mode = aarch64_sve_pred_mode (elem_bytes).require ();
+  rtx ptrue = force_reg (pred_mode, CONSTM1_RTX (pred_mode));
+  src = gen_rtx_UNSPEC (mode, gen_rtvec (2, ptrue, src), UNSPEC_LD1RQ);
+  emit_insn (gen_rtx_SET (dest, src));
    return true;
  }
  
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog

index 92d013e48799382979322c0ae2c6373d640a4f7e..f440d1e42c8c5be7920fcccbc99c7a93d29f7c3a 100644 (file)
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,9 @@
+2018-02-01  Richard Sandiford  <richard.sandiford@linaro.org>
+
+       * gcc.target/aarch64/sve/slp_2.c: Expect LD1RQD rather than LD1RQB.
+       * gcc.target/aarch64/sve/slp_3.c: Expect LD1RQW rather than LD1RQB.
+       * gcc.target/aarch64/sve/slp_4.c: Expect LD1RQH rather than LD1RQB.
+
  2018-02-01  Jakub Jelinek  <jakub@redhat.com>
  
         PR tree-optimization/81661
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/slp_2.c b/gcc/testsuite/gcc.target/aarch64/sve/slp_2.c

index 4a219f21155bb396aadaeef923a52087e65e0b18..657abb0e902ee99da281363da64a91a45ba218a5 100644 (file)
--- a/gcc/testsuite/gcc.target/aarch64/sve/slp_2.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/slp_2.c
@@ -32,7 +32,7 @@ TEST_ALL (VEC_PERM)
  /* { dg-final { scan-assembler-times {\tld1rh\tz[0-9]+\.h, } 2 } } */
  /* { dg-final { scan-assembler-times {\tld1rw\tz[0-9]+\.s, } 3 } } */
  /* { dg-final { scan-assembler-times {\tld1rd\tz[0-9]+\.d, } 3 } } */
-/* { dg-final { scan-assembler-times {\tld1rqb\tz[0-9]+\.b, } 3 } } */
+/* { dg-final { scan-assembler-times {\tld1rqd\tz[0-9]+\.d, } 3 } } */
  /* { dg-final { scan-assembler-not {\tzip1\t} } } */
  /* { dg-final { scan-assembler-not {\tzip2\t} } } */
  
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/slp_3.c b/gcc/testsuite/gcc.target/aarch64/sve/slp_3.c

index cfe20a8991715f2db1f3c775961cd4c6fa207c4e..dd47502134b530a0ccb007758b5290f13783d412 100644 (file)
--- a/gcc/testsuite/gcc.target/aarch64/sve/slp_3.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/slp_3.c
@@ -36,7 +36,7 @@ TEST_ALL (VEC_PERM)
  /* 1 for each 16-bit type and 4 for double.  */
  /* { dg-final { scan-assembler-times {\tld1rd\tz[0-9]+\.d, } 7 } } */
  /* 1 for each 32-bit type.  */
-/* { dg-final { scan-assembler-times {\tld1rqb\tz[0-9]+\.b, } 3 } } */
+/* { dg-final { scan-assembler-times {\tld1rqw\tz[0-9]+\.s, } 3 } } */
  /* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #41\n} 2 } } */
  /* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #25\n} 2 } } */
  /* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #31\n} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/slp_4.c b/gcc/testsuite/gcc.target/aarch64/sve/slp_4.c

index 98ff68f68df6d8b086a7145ad2d447d74dcc1728..026fa8c82af0a10589cab5eeffb865cf3dfdba4f 100644 (file)
--- a/gcc/testsuite/gcc.target/aarch64/sve/slp_4.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/slp_4.c
@@ -38,7 +38,7 @@ TEST_ALL (VEC_PERM)
  /* 1 for each 8-bit type, 4 for each 32-bit type and 8 for double.  */
  /* { dg-final { scan-assembler-times {\tld1rd\tz[0-9]+\.d, } 22 } } */
  /* 1 for each 16-bit type.  */
-/* { dg-final { scan-assembler-times {\tld1rqb\tz[0-9]\.b, } 3 } } */
+/* { dg-final { scan-assembler-times {\tld1rqh\tz[0-9]\.h, } 3 } } */
  /* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #99\n} 2 } } */
  /* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #11\n} 2 } } */
  /* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, #17\n} 2 } } */
author	Richard Sandiford <richard.sandiford@linaro.org>
	Thu, 1 Feb 2018 11:03:36 +0000 (11:03 +0000)
committer	Richard Sandiford <rsandifo@gcc.gnu.org>
	Thu, 1 Feb 2018 11:03:36 +0000 (11:03 +0000)
gcc/ChangeLog		patch \| blob \| history
gcc/config/aarch64/aarch64-sve.md		patch \| blob \| history
gcc/config/aarch64/aarch64.c		patch \| blob \| history
gcc/testsuite/ChangeLog		patch \| blob \| history
gcc/testsuite/gcc.target/aarch64/sve/slp_2.c		patch \| blob \| history
gcc/testsuite/gcc.target/aarch64/sve/slp_3.c		patch \| blob \| history
gcc/testsuite/gcc.target/aarch64/sve/slp_4.c		patch \| blob \| history