[AArch64] Handle SVE subregs that are effectively REVs

author Richard Sandiford <richard.sandiford@linaro.org>

Thu, 1 Feb 2018 11:04:28 +0000 (11:04 +0000)

committer Richard Sandiford <rsandifo@gcc.gnu.org>

Thu, 1 Feb 2018 11:04:28 +0000 (11:04 +0000)
author Richard Sandiford <richard.sandiford@linaro.org>
Thu, 1 Feb 2018 11:04:28 +0000 (11:04 +0000)
committer Richard Sandiford <rsandifo@gcc.gnu.org>
Thu, 1 Feb 2018 11:04:28 +0000 (11:04 +0000)
diff --git a/gcc/ChangeLog b/gcc/ChangeLog

index db763a636485adf0895d542a960c5cd6af72e32c..b655476aaac247dfe50fb8b002d545b34b907692 100644 (file)
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,19 @@
+2018-02-01  Richard Sandiford  <richard.sandiford@linaro.org>
+
+       * config/aarch64/aarch64-protos.h (aarch64_split_sve_subreg_move)
+       (aarch64_maybe_expand_sve_subreg_move): Declare.
+       * config/aarch64/aarch64.md (UNSPEC_REV_SUBREG): New unspec.
+       * config/aarch64/predicates.md (aarch64_any_register_operand): New
+       predicate.
+       * config/aarch64/aarch64-sve.md (mov<mode>): Optimize subreg moves
+       that are semantically a reverse operation.
+       (*aarch64_sve_mov<mode>_subreg_be): New pattern.
+       * config/aarch64/aarch64.c (aarch64_maybe_expand_sve_subreg_move):
+       (aarch64_replace_reg_mode, aarch64_split_sve_subreg_move): New
+       functions.
+       (aarch64_can_change_mode_class): For big-endian, forbid changes
+       between two SVE modes if they have different element sizes.
+
  2018-02-01  Richard Sandiford  <richard.sandiford@linaro.org>
  
         * config/aarch64/aarch64.c (aarch64_expand_sve_const_vector): Prefer
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h

index ef1b0bc8e28582287c8141429644a5a40375c441..cda2895d28e7496f8fd6c1b365c4bb497b54c323 100644 (file)
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -447,6 +447,8 @@ void aarch64_expand_epilogue (bool);
  void aarch64_expand_mov_immediate (rtx, rtx, rtx (*) (rtx, rtx) = 0);
  void aarch64_emit_sve_pred_move (rtx, rtx, rtx);
  void aarch64_expand_sve_mem_move (rtx, rtx, machine_mode);
+bool aarch64_maybe_expand_sve_subreg_move (rtx, rtx);
+void aarch64_split_sve_subreg_move (rtx, rtx, rtx);
  void aarch64_expand_prologue (void);
  void aarch64_expand_vector_init (rtx, rtx);
  void aarch64_init_cumulative_args (CUMULATIVE_ARGS *, const_tree, rtx,
diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md

index 068fd8cbf81a15830e8d00541de078eae1303291..9140862d7473f88ac2fa6cd3de57e66ee3c7d3f9 100644 (file)
--- a/gcc/config/aarch64/aarch64-sve.md
+++ b/gcc/config/aarch64/aarch64-sve.md
@@ -84,6 +84,32 @@
                                       gen_vec_duplicate<mode>);
         DONE;
        }
+
+    /* Optimize subregs on big-endian targets: we can use REV[BHW]
+       instead of going through memory.  */
+    if (BYTES_BIG_ENDIAN
+        && aarch64_maybe_expand_sve_subreg_move (operands[0], operands[1]))
+      DONE;
+  }
+)
+
+;; A pattern for optimizing SUBREGs that have a reinterpreting effect
+;; on big-endian targets; see aarch64_maybe_expand_sve_subreg_move
+;; for details.  We use a special predicate for operand 2 to reduce
+;; the number of patterns.
+(define_insn_and_split "*aarch64_sve_mov<mode>_subreg_be"
+  [(set (match_operand:SVE_ALL 0 "aarch64_sve_nonimmediate_operand" "=w")
+       (unspec:SVE_ALL
+          [(match_operand:VNx16BI 1 "register_operand" "Upl")
+          (match_operand 2 "aarch64_any_register_operand" "w")]
+         UNSPEC_REV_SUBREG))]
+  "TARGET_SVE && BYTES_BIG_ENDIAN"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  {
+    aarch64_split_sve_subreg_move (operands[0], operands[1], operands[2]);
+    DONE;
    }
  )
  
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c

index 6296ffe959f62ac5515a3d32e617a909f829f090..7b34bdf5cf46443696318e148574bb58881d9dc3 100644 (file)
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -3074,6 +3074,120 @@ aarch64_expand_sve_mem_move (rtx dest, rtx src, machine_mode pred_mode)
    aarch64_emit_sve_pred_move (dest, ptrue, src);
  }
  
+/* Called only on big-endian targets.  See whether an SVE vector move
+   from SRC to DEST is effectively a REV[BHW] instruction, because at
+   least one operand is a subreg of an SVE vector that has wider or
+   narrower elements.  Return true and emit the instruction if so.
+
+   For example:
+
+     (set (reg:VNx8HI R1) (subreg:VNx8HI (reg:VNx16QI R2) 0))
+
+   represents a VIEW_CONVERT between the following vectors, viewed
+   in memory order:
+
+     R2: { [0].high, [0].low,  [1].high, [1].low, ... }
+     R1: { [0],      [1],      [2],      [3],     ... }
+
+   The high part of lane X in R2 should therefore correspond to lane X*2
+   of R1, but the register representations are:
+
+         msb                                      lsb
+     R2: ...... [1].high  [1].low   [0].high  [0].low
+     R1: ...... [3]       [2]       [1]       [0]
+
+   where the low part of lane X in R2 corresponds to lane X*2 in R1.
+   We therefore need a reverse operation to swap the high and low values
+   around.
+
+   This is purely an optimization.  Without it we would spill the
+   subreg operand to the stack in one mode and reload it in the
+   other mode, which has the same effect as the REV.  */
+
+bool
+aarch64_maybe_expand_sve_subreg_move (rtx dest, rtx src)
+{
+  gcc_assert (BYTES_BIG_ENDIAN);
+  if (GET_CODE (dest) == SUBREG)
+    dest = SUBREG_REG (dest);
+  if (GET_CODE (src) == SUBREG)
+    src = SUBREG_REG (src);
+
+  /* The optimization handles two single SVE REGs with different element
+     sizes.  */
+  if (!REG_P (dest)
+      || !REG_P (src)
+      || aarch64_classify_vector_mode (GET_MODE (dest)) != VEC_SVE_DATA
+      || aarch64_classify_vector_mode (GET_MODE (src)) != VEC_SVE_DATA
+      || (GET_MODE_UNIT_SIZE (GET_MODE (dest))
+         == GET_MODE_UNIT_SIZE (GET_MODE (src))))
+    return false;
+
+  /* Generate *aarch64_sve_mov<mode>_subreg_be.  */
+  rtx ptrue = force_reg (VNx16BImode, CONSTM1_RTX (VNx16BImode));
+  rtx unspec = gen_rtx_UNSPEC (GET_MODE (dest), gen_rtvec (2, ptrue, src),
+                              UNSPEC_REV_SUBREG);
+  emit_insn (gen_rtx_SET (dest, unspec));
+  return true;
+}
+
+/* Return a copy of X with mode MODE, without changing its other
+   attributes.  Unlike gen_lowpart, this doesn't care whether the
+   mode change is valid.  */
+
+static rtx
+aarch64_replace_reg_mode (rtx x, machine_mode mode)
+{
+  if (GET_MODE (x) == mode)
+    return x;
+
+  x = shallow_copy_rtx (x);
+  set_mode_and_regno (x, mode, REGNO (x));
+  return x;
+}
+
+/* Split a *aarch64_sve_mov<mode>_subreg_be pattern with the given
+   operands.  */
+
+void
+aarch64_split_sve_subreg_move (rtx dest, rtx ptrue, rtx src)
+{
+  /* Decide which REV operation we need.  The mode with narrower elements
+     determines the mode of the operands and the mode with the wider
+     elements determines the reverse width.  */
+  machine_mode mode_with_wider_elts = GET_MODE (dest);
+  machine_mode mode_with_narrower_elts = GET_MODE (src);
+  if (GET_MODE_UNIT_SIZE (mode_with_wider_elts)
+      < GET_MODE_UNIT_SIZE (mode_with_narrower_elts))
+    std::swap (mode_with_wider_elts, mode_with_narrower_elts);
+
+  unsigned int wider_bytes = GET_MODE_UNIT_SIZE (mode_with_wider_elts);
+  unsigned int unspec;
+  if (wider_bytes == 8)
+    unspec = UNSPEC_REV64;
+  else if (wider_bytes == 4)
+    unspec = UNSPEC_REV32;
+  else if (wider_bytes == 2)
+    unspec = UNSPEC_REV16;
+  else
+    gcc_unreachable ();
+  machine_mode pred_mode = aarch64_sve_pred_mode (wider_bytes).require ();
+
+  /* Emit:
+
+       (set DEST (unspec [PTRUE (unspec [SRC] UNSPEC_REV<nn>)]
+                        UNSPEC_MERGE_PTRUE))
+
+     with the appropriate modes.  */
+  ptrue = gen_lowpart (pred_mode, ptrue);
+  dest = aarch64_replace_reg_mode (dest, mode_with_narrower_elts);
+  src = aarch64_replace_reg_mode (src, mode_with_narrower_elts);
+  src = gen_rtx_UNSPEC (mode_with_narrower_elts, gen_rtvec (1, src), unspec);
+  src = gen_rtx_UNSPEC (mode_with_narrower_elts, gen_rtvec (2, ptrue, src),
+                       UNSPEC_MERGE_PTRUE);
+  emit_insn (gen_rtx_SET (dest, src));
+}
+
  static bool
  aarch64_function_ok_for_sibcall (tree decl ATTRIBUTE_UNUSED,
                                  tree exp ATTRIBUTE_UNUSED)
@@ -17197,10 +17311,27 @@ static bool
  aarch64_can_change_mode_class (machine_mode from,
                                machine_mode to, reg_class_t)
  {
-  /* See the comment at the head of aarch64-sve.md for details.  */
-  if (BYTES_BIG_ENDIAN
-      && (aarch64_sve_data_mode_p (from) != aarch64_sve_data_mode_p (to)))
-    return false;
+  if (BYTES_BIG_ENDIAN)
+    {
+      bool from_sve_p = aarch64_sve_data_mode_p (from);
+      bool to_sve_p = aarch64_sve_data_mode_p (to);
+
+      /* Don't allow changes between SVE data modes and non-SVE modes.
+        See the comment at the head of aarch64-sve.md for details.  */
+      if (from_sve_p != to_sve_p)
+       return false;
+
+      /* Don't allow changes in element size: lane 0 of the new vector
+        would not then be lane 0 of the old vector.  See the comment
+        above aarch64_maybe_expand_sve_subreg_move for a more detailed
+        description.
+
+        In the worst case, this forces a register to be spilled in
+        one mode and reloaded in the other, which handles the
+        endianness correctly.  */
+      if (from_sve_p && GET_MODE_UNIT_SIZE (from) != GET_MODE_UNIT_SIZE (to))
+       return false;
+    }
    return true;
  }
  
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md

index 49095f8f3d995907903c11b68cae25a919204a76..5a2a9309a3bbbfad6fcb6db07422d774909f0ba1 100644 (file)
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -168,6 +168,7 @@
      UNSPEC_INSR
      UNSPEC_CLASTB
      UNSPEC_FADDA
+    UNSPEC_REV_SUBREG
  ])
  
  (define_c_enum "unspecv" [
diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md

index 159e74a1ed737c49063620564c2125215c37a4b6..804be16a13a78e989b15023259872f2c81e2e037 100644 (file)
--- a/gcc/config/aarch64/predicates.md
+++ b/gcc/config/aarch64/predicates.md
@@ -617,3 +617,7 @@
  (define_predicate "aarch64_gather_scale_operand_d"
    (and (match_code "const_int")
         (match_test "INTVAL (op) == 1 || INTVAL (op) == 8")))
+
+;; A special predicate that doesn't match a particular mode.
+(define_special_predicate "aarch64_any_register_operand"
+  (match_code "reg"))
author	Richard Sandiford <richard.sandiford@linaro.org>
	Thu, 1 Feb 2018 11:04:28 +0000 (11:04 +0000)
committer	Richard Sandiford <rsandifo@gcc.gnu.org>
	Thu, 1 Feb 2018 11:04:28 +0000 (11:04 +0000)
gcc/ChangeLog		patch \| blob \| history
gcc/config/aarch64/aarch64-protos.h		patch \| blob \| history
gcc/config/aarch64/aarch64-sve.md		patch \| blob \| history
gcc/config/aarch64/aarch64.c		patch \| blob \| history
gcc/config/aarch64/aarch64.md		patch \| blob \| history
gcc/config/aarch64/predicates.md		patch \| blob \| history