[SLP/AArch64] Fix unpack handling for big-endian SVE

author Richard Sandiford <richard.sandiford@linaro.org>

Tue, 13 Mar 2018 15:13:37 +0000 (15:13 +0000)

committer Richard Sandiford <rsandifo@gcc.gnu.org>

Tue, 13 Mar 2018 15:13:37 +0000 (15:13 +0000)
author Richard Sandiford <richard.sandiford@linaro.org>
Tue, 13 Mar 2018 15:13:37 +0000 (15:13 +0000)
committer Richard Sandiford <rsandifo@gcc.gnu.org>
Tue, 13 Mar 2018 15:13:37 +0000 (15:13 +0000)
diff --git a/gcc/ChangeLog b/gcc/ChangeLog

index 8e150de2a570ec4c4663e2b9a1ce6717cbcdc4b4..4a6f4d47b60d35cf62fc627f54836f6d528acd6d 100644 (file)
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,26 @@
+2018-03-13  Richard Sandiford  <richard.sandiford@linaro.org>
+
+       * tree-vect-loop-manip.c (vect_maybe_permute_loop_masks):
+       Reverse the choice between VEC_UNPACK_LO_EXPR and VEC_UNPACK_HI_EXPR
+       for big-endian.
+       * config/aarch64/iterators.md (hi_lanes_optab): New int attribute.
+       * config/aarch64/aarch64-sve.md
+       (*aarch64_sve_<perm_insn><perm_hilo><mode>): Rename to...
+       (aarch64_sve_<perm_insn><perm_hilo><mode>): ...this.
+       (*extend<mode><Vwide>2): Rename to...
+       (aarch64_sve_extend<mode><Vwide>2): ...this.
+       (vec_unpack<su>_<perm_hilo>_<mode>): Turn into a define_expand,
+       renaming the old pattern to...
+       (aarch64_sve_punpk<perm_hilo>_<mode>): ...this.  Only define
+       unsigned packs.
+       (vec_unpack<su>_<perm_hilo>_<SVE_BHSI:mode>): Turn into a
+       define_expand, renaming the old pattern to...
+       (aarch64_sve_<su>unpk<perm_hilo>_<SVE_BHSI:mode>): ...this.
+       (*vec_unpacku_<perm_hilo>_<mode>_no_convert): Delete.
+       (vec_unpacks_<perm_hilo>_<mode>): Take BYTES_BIG_ENDIAN into
+       account when deciding which SVE instruction the optab should use.
+       (vec_unpack<su_optab>_float_<perm_hilo>_vnx4si): Likewise.
+
  2018-03-13  Richard Sandiford  <richard.sandiford@linaro.org>
  
         * config/aarch64/aarch64.md (V4_REGNUM, V8_REGNUM, V12_REGNUM)
diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md

index 2e7f0a45f793081d80e43a68929a07abc3b44117..d88553405094853ab58ba1c55cd212092a14116c 100644 (file)
--- a/gcc/config/aarch64/aarch64-sve.md
+++ b/gcc/config/aarch64/aarch64-sve.md
@@ -817,7 +817,7 @@
    "<perm_insn><perm_hilo>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
  )
  
-(define_insn "*aarch64_sve_<perm_insn><perm_hilo><mode>"
+(define_insn "aarch64_sve_<perm_insn><perm_hilo><mode>"
    [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
         (unspec:SVE_ALL [(match_operand:SVE_ALL 1 "register_operand" "w")
                          (match_operand:SVE_ALL 2 "register_operand" "w")]
@@ -2184,7 +2184,7 @@
  )
  
  ;; Conversion of DI or SI to DF, predicated with a PTRUE.
-(define_insn "*<optab><mode>vnx2df2"
+(define_insn "aarch64_sve_<optab><mode>vnx2df2"
    [(set (match_operand:VNx2DF 0 "register_operand" "=w")
         (unspec:VNx2DF
           [(match_operand:VNx2BI 1 "register_operand" "Upl")
@@ -2211,7 +2211,7 @@
  
  ;; Conversion of SFs to the same number of DFs, or HFs to the same number
  ;; of SFs.
-(define_insn "*extend<mode><Vwide>2"
+(define_insn "aarch64_sve_extend<mode><Vwide>2"
    [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
         (unspec:<VWIDE>
           [(match_operand:<VWIDE_PRED> 1 "register_operand" "Upl")
@@ -2223,17 +2223,50 @@
    "fcvt\t%0.<Vewtype>, %1/m, %2.<Vetype>"
  )
  
+;; Unpack the low or high half of a predicate, where "high" refers to
+;; the low-numbered lanes for big-endian and the high-numbered lanes
+;; for little-endian.
+(define_expand "vec_unpack<su>_<perm_hilo>_<mode>"
+  [(match_operand:<VWIDE> 0 "register_operand")
+   (unspec:<VWIDE> [(match_operand:PRED_BHS 1 "register_operand")]
+                  UNPACK)]
+  "TARGET_SVE"
+  {
+    emit_insn ((<hi_lanes_optab>
+               ? gen_aarch64_sve_punpkhi_<PRED_BHS:mode>
+               : gen_aarch64_sve_punpklo_<PRED_BHS:mode>)
+              (operands[0], operands[1]));
+    DONE;
+  }
+)
+
  ;; PUNPKHI and PUNPKLO.
-(define_insn "vec_unpack<su>_<perm_hilo>_<mode>"
+(define_insn "aarch64_sve_punpk<perm_hilo>_<mode>"
    [(set (match_operand:<VWIDE> 0 "register_operand" "=Upa")
         (unspec:<VWIDE> [(match_operand:PRED_BHS 1 "register_operand" "Upa")]
-                       UNPACK))]
+                       UNPACK_UNSIGNED))]
    "TARGET_SVE"
    "punpk<perm_hilo>\t%0.h, %1.b"
  )
  
+;; Unpack the low or high half of a vector, where "high" refers to
+;; the low-numbered lanes for big-endian and the high-numbered lanes
+;; for little-endian.
+(define_expand "vec_unpack<su>_<perm_hilo>_<SVE_BHSI:mode>"
+  [(match_operand:<VWIDE> 0 "register_operand")
+   (unspec:<VWIDE> [(match_operand:SVE_BHSI 1 "register_operand")] UNPACK)]
+  "TARGET_SVE"
+  {
+    emit_insn ((<hi_lanes_optab>
+               ? gen_aarch64_sve_<su>unpkhi_<SVE_BHSI:mode>
+               : gen_aarch64_sve_<su>unpklo_<SVE_BHSI:mode>)
+              (operands[0], operands[1]));
+    DONE;
+  }
+)
+
  ;; SUNPKHI, UUNPKHI, SUNPKLO and UUNPKLO.
-(define_insn "vec_unpack<su>_<perm_hilo>_<SVE_BHSI:mode>"
+(define_insn "aarch64_sve_<su>unpk<perm_hilo>_<SVE_BHSI:mode>"
    [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
         (unspec:<VWIDE> [(match_operand:SVE_BHSI 1 "register_operand" "w")]
                         UNPACK))]
@@ -2241,32 +2274,28 @@
    "<su>unpk<perm_hilo>\t%0.<Vewtype>, %1.<Vetype>"
  )
  
-;; Used by the vec_unpacks_<perm_hilo>_<mode> expander to unpack the bit
-;; representation of a VNx4SF or VNx8HF without conversion.  The choice
-;; between signed and unsigned isn't significant.
-(define_insn "*vec_unpacku_<perm_hilo>_<mode>_no_convert"
-  [(set (match_operand:SVE_HSF 0 "register_operand" "=w")
-       (unspec:SVE_HSF [(match_operand:SVE_HSF 1 "register_operand" "w")]
-                       UNPACK_UNSIGNED))]
-  "TARGET_SVE"
-  "uunpk<perm_hilo>\t%0.<Vewtype>, %1.<Vetype>"
-)
-
  ;; Unpack one half of a VNx4SF to VNx2DF, or one half of a VNx8HF to VNx4SF.
  ;; First unpack the source without conversion, then float-convert the
  ;; unpacked source.
  (define_expand "vec_unpacks_<perm_hilo>_<mode>"
-  [(set (match_dup 2)
-       (unspec:SVE_HSF [(match_operand:SVE_HSF 1 "register_operand")]
-                       UNPACK_UNSIGNED))
-   (set (match_operand:<VWIDE> 0 "register_operand")
-       (unspec:<VWIDE> [(match_dup 3)
-                        (unspec:<VWIDE> [(match_dup 2)] UNSPEC_FLOAT_CONVERT)]
-                       UNSPEC_MERGE_PTRUE))]
-  "TARGET_SVE"
-  {
-    operands[2] = gen_reg_rtx (<MODE>mode);
-    operands[3] = force_reg (<VWIDE_PRED>mode, CONSTM1_RTX (<VWIDE_PRED>mode));
+  [(match_operand:<VWIDE> 0 "register_operand")
+   (unspec:SVE_HSF [(match_operand:SVE_HSF 1 "register_operand")]
+                  UNPACK_UNSIGNED)]
+  "TARGET_SVE"
+  {
+    /* Use ZIP to do the unpack, since we don't care about the upper halves
+       and since it has the nice property of not needing any subregs.
+       If using UUNPK* turns out to be preferable, we could model it as
+       a ZIP whose first operand is zero.  */
+    rtx temp = gen_reg_rtx (<MODE>mode);
+    emit_insn ((<hi_lanes_optab>
+               ? gen_aarch64_sve_zip2<mode>
+               : gen_aarch64_sve_zip1<mode>)
+               (temp, operands[1], operands[1]));
+    rtx ptrue = force_reg (<VWIDE_PRED>mode, CONSTM1_RTX (<VWIDE_PRED>mode));
+    emit_insn (gen_aarch64_sve_extend<mode><Vwide>2 (operands[0],
+                                                    ptrue, temp));
+    DONE;
    }
  )
  
@@ -2274,18 +2303,25 @@
  ;; to VNx2DI, reinterpret the VNx2DI as a VNx4SI, then convert the
  ;; unpacked VNx4SI to VNx2DF.
  (define_expand "vec_unpack<su_optab>_float_<perm_hilo>_vnx4si"
-  [(set (match_dup 2)
-       (unspec:VNx2DI [(match_operand:VNx4SI 1 "register_operand")]
-                      UNPACK_UNSIGNED))
-   (set (match_operand:VNx2DF 0 "register_operand")
-       (unspec:VNx2DF [(match_dup 3)
-                       (FLOATUORS:VNx2DF (match_dup 4))]
-                      UNSPEC_MERGE_PTRUE))]
-  "TARGET_SVE"
-  {
-    operands[2] = gen_reg_rtx (VNx2DImode);
-    operands[3] = force_reg (VNx2BImode, CONSTM1_RTX (VNx2BImode));
-    operands[4] = gen_rtx_SUBREG (VNx4SImode, operands[2], 0);
+  [(match_operand:VNx2DF 0 "register_operand")
+   (FLOATUORS:VNx2DF
+     (unspec:VNx2DI [(match_operand:VNx4SI 1 "register_operand")]
+                   UNPACK_UNSIGNED))]
+  "TARGET_SVE"
+  {
+    /* Use ZIP to do the unpack, since we don't care about the upper halves
+       and since it has the nice property of not needing any subregs.
+       If using UUNPK* turns out to be preferable, we could model it as
+       a ZIP whose first operand is zero.  */
+    rtx temp = gen_reg_rtx (VNx4SImode);
+    emit_insn ((<hi_lanes_optab>
+               ? gen_aarch64_sve_zip2vnx4si
+               : gen_aarch64_sve_zip1vnx4si)
+              (temp, operands[1], operands[1]));
+    rtx ptrue = force_reg (VNx2BImode, CONSTM1_RTX (VNx2BImode));
+    emit_insn (gen_aarch64_sve_<FLOATUORS:optab>vnx4sivnx2df2 (operands[0],
+                                                              ptrue, temp));
+    DONE;
    }
  )
  
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md

index a2945a81848b326dcc317c771ca83822bdd64fe5..fa181794392d4dc48e9a6df5cf5db14a9824cd2d 100644 (file)
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -1680,6 +1680,15 @@
                             (UNSPEC_UNPACKSHI "hi") (UNSPEC_UNPACKUHI "hi")
                             (UNSPEC_UNPACKSLO "lo") (UNSPEC_UNPACKULO "lo")])
  
+;; Return true if the associated optab refers to the high-numbered lanes,
+;; false if it refers to the low-numbered lanes.  The convention is for
+;; "hi" to refer to the low-numbered lanes (the first ones in memory)
+;; for big-endian.
+(define_int_attr hi_lanes_optab [(UNSPEC_UNPACKSHI "!BYTES_BIG_ENDIAN")
+                                (UNSPEC_UNPACKUHI "!BYTES_BIG_ENDIAN")
+                                (UNSPEC_UNPACKSLO "BYTES_BIG_ENDIAN")
+                                (UNSPEC_UNPACKULO "BYTES_BIG_ENDIAN")])
+
  (define_int_attr frecp_suffix  [(UNSPEC_FRECPE "e") (UNSPEC_FRECPX "x")])
  
  (define_int_attr crc_variant [(UNSPEC_CRC32B "crc32b") (UNSPEC_CRC32H "crc32h")
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog

index 84c64b2e77b6456edf29686944d95ab248fa2d97..cee70dc0bc9b8f567c7bdfdfcbdb90c5907199d0 100644 (file)
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,10 @@
+2018-03-13  Richard Sandiford  <richard.sandiford@linaro.org>
+
+       * gcc.target/aarch64/sve/unpack_fcvt_signed_1.c: Expect zips rather
+       than unpacks.
+       * gcc.target/aarch64/sve/unpack_fcvt_unsigned_1.c: Likewise.
+       * gcc.target/aarch64/sve/unpack_float_1.c: Likewise.
+
  2018-03-13  Richard Sandiford  <richard.sandiford@linaro.org>
  
         * gcc.target/aarch64/sve/tls_1.c: New test.
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpack_fcvt_signed_1.c b/gcc/testsuite/gcc.target/aarch64/sve/unpack_fcvt_signed_1.c

index 83ffe8552c2091317191fbf550226fd09548bea6..0f96dc2ff007340541c2ba7d51e1ccfa0f3f2d39 100644 (file)
--- a/gcc/testsuite/gcc.target/aarch64/sve/unpack_fcvt_signed_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/unpack_fcvt_signed_1.c
@@ -10,6 +10,6 @@ unpack_double_int_plus8 (double *d, int32_t *s, int size)
      d[i] = s[i] + 8;
  }
  
-/* { dg-final { scan-assembler-times {\tuunpklo\tz[0-9]+\.d, z[0-9]+\.s\n} 1 } } */
-/* { dg-final { scan-assembler-times {\tuunpkhi\tz[0-9]+\.d, z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tzip1\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tzip2\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
  /* { dg-final { scan-assembler-times {\tscvtf\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.s\n} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpack_fcvt_unsigned_1.c b/gcc/testsuite/gcc.target/aarch64/sve/unpack_fcvt_unsigned_1.c

index e2f6b1a45ce239ea700b9d50de59cd5f3ac7553d..70465f91eba4f80140b2059481eb8f06bbc9ace7 100644 (file)
--- a/gcc/testsuite/gcc.target/aarch64/sve/unpack_fcvt_unsigned_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/unpack_fcvt_unsigned_1.c
@@ -10,6 +10,6 @@ unpack_double_int_plus9 (double *d, uint32_t *s, int size)
      d[i] = (double) (s[i] + 9);
  }
  
-/* { dg-final { scan-assembler-times {\tuunpklo\tz[0-9]+\.d, z[0-9]+\.s\n} 1 } } */
-/* { dg-final { scan-assembler-times {\tuunpkhi\tz[0-9]+\.d, z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tzip1\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tzip2\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
  /* { dg-final { scan-assembler-times {\tucvtf\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.s\n} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpack_float_1.c b/gcc/testsuite/gcc.target/aarch64/sve/unpack_float_1.c

index 14a636b5fda120243efc4f6a257aba0864c619c9..deb4cf5e940b186fb11468fea27b697cc2737010 100644 (file)
--- a/gcc/testsuite/gcc.target/aarch64/sve/unpack_float_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/unpack_float_1.c
@@ -8,6 +8,6 @@ unpack_float_plus_7point9 (double *d, float *s, int size)
      d[i] = s[i] + 7.9;
  }
  
-/* { dg-final { scan-assembler-times {\tuunpklo\tz[0-9]+\.d, z[0-9]+\.s\n} 1 } } */
-/* { dg-final { scan-assembler-times {\tuunpkhi\tz[0-9]+\.d, z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tzip1\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tzip2\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
  /* { dg-final { scan-assembler-times {\tfcvt\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.s\n} 2 } } */
diff --git a/gcc/tree-vect-loop-manip.c b/gcc/tree-vect-loop-manip.c

index 96d40c8c4b328145a469e997addaf99862dd55a9..1c43ed0becd58abc81ca12c527672ec55c4a0809 100644 (file)
--- a/gcc/tree-vect-loop-manip.c
+++ b/gcc/tree-vect-loop-manip.c
@@ -334,7 +334,8 @@ vect_maybe_permute_loop_masks (gimple_seq *seq, rgroup_masks *dest_rgm,
         {
           tree src = src_rgm->masks[i / 2];
           tree dest = dest_rgm->masks[i];
-         tree_code code = (i & 1 ? VEC_UNPACK_HI_EXPR
+         tree_code code = ((i & 1) == (BYTES_BIG_ENDIAN ? 0 : 1)
+                           ? VEC_UNPACK_HI_EXPR
                             : VEC_UNPACK_LO_EXPR);
           gassign *stmt;
           if (dest_masktype == unpack_masktype)
author	Richard Sandiford <richard.sandiford@linaro.org>
	Tue, 13 Mar 2018 15:13:37 +0000 (15:13 +0000)
committer	Richard Sandiford <rsandifo@gcc.gnu.org>
	Tue, 13 Mar 2018 15:13:37 +0000 (15:13 +0000)
gcc/ChangeLog		patch \| blob \| history
gcc/config/aarch64/aarch64-sve.md		patch \| blob \| history
gcc/config/aarch64/iterators.md		patch \| blob \| history
gcc/testsuite/ChangeLog		patch \| blob \| history
gcc/testsuite/gcc.target/aarch64/sve/unpack_fcvt_signed_1.c		patch \| blob \| history
gcc/testsuite/gcc.target/aarch64/sve/unpack_fcvt_unsigned_1.c		patch \| blob \| history
gcc/testsuite/gcc.target/aarch64/sve/unpack_float_1.c		patch \| blob \| history
gcc/tree-vect-loop-manip.c		patch \| blob \| history