[AArch64] Reorganise aarch64-sve.md

author Richard Sandiford <richard.sandiford@arm.com>

Wed, 7 Aug 2019 18:37:21 +0000 (18:37 +0000)

committer Richard Sandiford <rsandifo@gcc.gnu.org>

Wed, 7 Aug 2019 18:37:21 +0000 (18:37 +0000)
author Richard Sandiford <richard.sandiford@arm.com>
Wed, 7 Aug 2019 18:37:21 +0000 (18:37 +0000)
committer Richard Sandiford <rsandifo@gcc.gnu.org>
Wed, 7 Aug 2019 18:37:21 +0000 (18:37 +0000)
diff --git a/gcc/ChangeLog b/gcc/ChangeLog

index 31f0fece2a112c8d8c73f976f9204678eea41b45..4328352cb9a0e8b5ca2bfb26ac186ed5d8b40f04 100644 (file)
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,11 @@
+2019-08-07  Richard Sandiford  <richard.sandiford@arm.com>
+
+       * config/aarch64/aarch64-sve.md: Reorganize contents and add
+       banner comments.
+       * config/aarch64/check-sve-md.awk: New file.
+       * config/aarch64/t-aarch64 (s-check-sve-md): New rule.
+       (insn-conditions.md): Depend on it.
+
  2019-08-07  Uroš Bizjak  <ubizjak@gmail.com>
  
         PR target/91385
diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md

index e489afbdde947cbb9cf1a41c38bd72dac576f8f1..b66066becb6fe15600640af36fa7c4da0440d06c 100644 (file)
--- a/gcc/config/aarch64/aarch64-sve.md
+++ b/gcc/config/aarch64/aarch64-sve.md
@@ -18,8 +18,120 @@
  ;; along with GCC; see the file COPYING3.  If not see
  ;; <http://www.gnu.org/licenses/>.
  
-;; Note on the handling of big-endian SVE
-;; --------------------------------------
+;; The file is organised into the following sections (search for the full
+;; line):
+;;
+;; == General notes
+;; ---- Note on the handling of big-endian SVE
+;;
+;; == Moves
+;; ---- Moves of single vectors
+;; ---- Moves of multiple vectors
+;; ---- Moves of predicates
+;;
+;; == Loads
+;; ---- Normal contiguous loads
+;; ---- Normal gather loads
+;;
+;; == Stores
+;; ---- Normal contiguous stores
+;; ---- Normal scatter stores
+;;
+;; == Vector creation
+;; ---- [INT,FP] Duplicate element
+;; ---- [INT,FP] Initialize from individual elements
+;; ---- [INT] Linear series
+;; ---- [PRED] Duplicate element
+;;
+;; == Vector decomposition
+;; ---- [INT,FP] Extract index
+;; ---- [INT,FP] Extract active element
+;; ---- [PRED] Extract index
+;;
+;; == Unary arithmetic
+;; ---- [INT] General unary arithmetic corresponding to rtx codes
+;; ---- [FP] General unary arithmetic corresponding to rtx codes
+;; ---- [FP] Rounding
+;; ---- [PRED] Inverse
+
+;; == Binary arithmetic
+;; ---- [INT] General binary arithmetic corresponding to rtx codes
+;; ---- [INT] Addition
+;; ---- [INT] Subtraction
+;; ---- [INT] Absolute difference
+;; ---- [INT] Multiplication
+;; ---- [INT] Highpart multiplication
+;; ---- [INT] Division
+;; ---- [INT] Binary logical operations
+;; ---- [INT] Binary logical operations (inverted second input)
+;; ---- [INT] Shifts
+;; ---- [INT] Maximum and minimum
+;; ---- [FP] General binary arithmetic corresponding to rtx codes
+;; ---- [FP] General binary arithmetic corresponding to unspecs
+;; ---- [FP] Addition
+;; ---- [FP] Subtraction
+;; ---- [FP] Absolute difference
+;; ---- [FP] Multiplication
+;; ---- [FP] Division
+;; ---- [FP] Binary logical operations
+;; ---- [FP] Sign copying
+;; ---- [FP] Maximum and minimum
+;; ---- [PRED] Binary logical operations
+;; ---- [PRED] Binary logical operations (inverted second input)
+;; ---- [PRED] Binary logical operations (inverted result)
+;;
+;; == Ternary arithmetic
+;; ---- [INT] MLA and MAD
+;; ---- [INT] MLS and MSB
+;; ---- [INT] Dot product
+;; ---- [INT] Sum of absolute differences
+;; ---- [FP] General ternary arithmetic corresponding to unspecs
+;; ---- [FP] FMLA and FMAD
+;; ---- [FP] FMLS and FMSB
+;; ---- [FP] FNMLA and FNMAD
+;; ---- [FP] FNMLS and FNMSB
+;;
+;; == Comparisons and selects
+;; ---- [INT,FP] Select based on predicates
+;; ---- [INT,FP] Compare and select
+;; ---- [INT] Comparisons
+;; ---- [INT] While tests
+;; ---- [FP] Comparisons
+;; ---- [PRED] Test bits
+;;
+;; == Reductions
+;; ---- [INT,FP] Conditional reductions
+;; ---- [INT] Tree reductions
+;; ---- [FP] Tree reductions
+;; ---- [FP] Left-to-right reductions
+;;
+;; == Permutes
+;; ---- [INT,FP] General permutes
+;; ---- [INT,FP] Special-purpose unary permutes
+;; ---- [INT,FP] Special-purpose binary permutes
+;; ---- [PRED] Special-purpose binary permutes
+;;
+;; == Conversions
+;; ---- [INT<-INT] Packs
+;; ---- [INT<-INT] Unpacks
+;; ---- [INT<-FP] Conversions
+;; ---- [INT<-FP] Packs
+;; ---- [INT<-FP] Unpacks
+;; ---- [FP<-INT] Conversions
+;; ---- [FP<-INT] Packs
+;; ---- [FP<-INT] Unpacks
+;; ---- [FP<-FP] Packs
+;; ---- [FP<-FP] Unpacks
+;; ---- [PRED<-PRED] Packs
+;; ---- [PRED<-PRED] Unpacks
+
+;; =========================================================================
+;; == General notes
+;; =========================================================================
+;;
+;; -------------------------------------------------------------------------
+;; ---- Note on the handling of big-endian SVE
+;; -------------------------------------------------------------------------
  ;;
  ;; On big-endian systems, Advanced SIMD mov<mode> patterns act in the
  ;; same way as movdi or movti would: the first byte of memory goes
@@ -61,7 +173,27 @@
  ;;   reserve a predicate register.
  
  
-;; SVE data moves.
+;; =========================================================================
+;; == Moves
+;; =========================================================================
+
+;; -------------------------------------------------------------------------
+;; ---- Moves of single vectors
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - MOV  (including aliases)
+;; - LD1B (contiguous form)
+;; - LD1D (    "    "     )
+;; - LD1H (    "    "     )
+;; - LD1W (    "    "     )
+;; - LDR
+;; - ST1B (contiguous form)
+;; - ST1D (    "    "     )
+;; - ST1H (    "    "     )
+;; - ST1W (    "    "     )
+;; - STR
+;; -------------------------------------------------------------------------
+
  (define_expand "mov<mode>"
    [(set (match_operand:SVE_ALL 0 "nonimmediate_operand")
         (match_operand:SVE_ALL 1 "general_operand"))]
@@ -93,22 +225,13 @@
    }
  )
  
-;; A pattern for optimizing SUBREGs that have a reinterpreting effect
-;; on big-endian targets; see aarch64_maybe_expand_sve_subreg_move
-;; for details.  We use a special predicate for operand 2 to reduce
-;; the number of patterns.
-(define_insn_and_split "*aarch64_sve_mov<mode>_subreg_be"
-  [(set (match_operand:SVE_ALL 0 "aarch64_sve_nonimmediate_operand" "=w")
-       (unspec:SVE_ALL
-         [(match_operand:VNx16BI 1 "register_operand" "Upl")
-          (match_operand 2 "aarch64_any_register_operand" "w")]
-         UNSPEC_REV_SUBREG))]
-  "TARGET_SVE && BYTES_BIG_ENDIAN"
-  "#"
-  "&& reload_completed"
-  [(const_int 0)]
+(define_expand "movmisalign<mode>"
+  [(set (match_operand:SVE_ALL 0 "nonimmediate_operand")
+       (match_operand:SVE_ALL 1 "general_operand"))]
+  "TARGET_SVE"
    {
-    aarch64_split_sve_subreg_move (operands[0], operands[1], operands[2]);
+    /* Equivalent to a normal move for our purpooses.  */
+    emit_move_insn (operands[0], operands[1]);
      DONE;
    }
  )
@@ -167,10 +290,9 @@
    }
  )
  
-;; A predicated load or store for which the predicate is known to be
-;; all-true.  Note that this pattern is generated directly by
-;; aarch64_emit_sve_pred_move, so changes to this pattern will
-;; need changes there as well.
+;; A predicated move in which the predicate is known to be all-true.
+;; Note that this pattern is generated directly by aarch64_emit_sve_pred_move,
+;; so changes to this pattern will need changes there as well.
  (define_insn_and_split "@aarch64_pred_mov<mode>"
    [(set (match_operand:SVE_ALL 0 "nonimmediate_operand" "=w, w, m")
         (unspec:SVE_ALL
@@ -189,152 +311,33 @@
    [(set (match_dup 0) (match_dup 2))]
  )
  
-(define_expand "movmisalign<mode>"
-  [(set (match_operand:SVE_ALL 0 "nonimmediate_operand")
-       (match_operand:SVE_ALL 1 "general_operand"))]
-  "TARGET_SVE"
-  {
-    /* Equivalent to a normal move for our purpooses.  */
-    emit_move_insn (operands[0], operands[1]);
-    DONE;
-  }
-)
-
-(define_insn "maskload<mode><vpred>"
-  [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
+;; A pattern for optimizing SUBREGs that have a reinterpreting effect
+;; on big-endian targets; see aarch64_maybe_expand_sve_subreg_move
+;; for details.  We use a special predicate for operand 2 to reduce
+;; the number of patterns.
+(define_insn_and_split "*aarch64_sve_mov<mode>_subreg_be"
+  [(set (match_operand:SVE_ALL 0 "aarch64_sve_nonimmediate_operand" "=w")
         (unspec:SVE_ALL
-         [(match_operand:<VPRED> 2 "register_operand" "Upl")
-          (match_operand:SVE_ALL 1 "memory_operand" "m")]
-         UNSPEC_LD1_SVE))]
-  "TARGET_SVE"
-  "ld1<Vesize>\t%0.<Vetype>, %2/z, %1"
-)
-
-(define_insn "maskstore<mode><vpred>"
-  [(set (match_operand:SVE_ALL 0 "memory_operand" "+m")
-       (unspec:SVE_ALL [(match_operand:<VPRED> 2 "register_operand" "Upl")
-                        (match_operand:SVE_ALL 1 "register_operand" "w")
-                        (match_dup 0)]
-                       UNSPEC_ST1_SVE))]
-  "TARGET_SVE"
-  "st1<Vesize>\t%1.<Vetype>, %2, %0"
-)
-
-;; Unpredicated gather loads.
-(define_expand "gather_load<mode>"
-  [(set (match_operand:SVE_SD 0 "register_operand")
-       (unspec:SVE_SD
-         [(match_dup 5)
-          (match_operand:DI 1 "aarch64_reg_or_zero")
-          (match_operand:<V_INT_EQUIV> 2 "register_operand")
-          (match_operand:DI 3 "const_int_operand")
-          (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>")
-          (mem:BLK (scratch))]
-         UNSPEC_LD1_GATHER))]
-  "TARGET_SVE"
-  {
-    operands[5] = aarch64_ptrue_reg (<VPRED>mode);
-  }
-)
-
-;; Predicated gather loads for 32-bit elements.  Operand 3 is true for
-;; unsigned extension and false for signed extension.
-(define_insn "mask_gather_load<mode>"
-  [(set (match_operand:SVE_S 0 "register_operand" "=w, w, w, w, w")
-       (unspec:SVE_S
-         [(match_operand:<VPRED> 5 "register_operand" "Upl, Upl, Upl, Upl, Upl")
-          (match_operand:DI 1 "aarch64_reg_or_zero" "Z, rk, rk, rk, rk")
-          (match_operand:<V_INT_EQUIV> 2 "register_operand" "w, w, w, w, w")
-          (match_operand:DI 3 "const_int_operand" "i, Z, Ui1, Z, Ui1")
-          (match_operand:DI 4 "aarch64_gather_scale_operand_w" "Ui1, Ui1, Ui1, i, i")
-          (mem:BLK (scratch))]
-         UNSPEC_LD1_GATHER))]
-  "TARGET_SVE"
-  "@
-   ld1w\t%0.s, %5/z, [%2.s]
-   ld1w\t%0.s, %5/z, [%1, %2.s, sxtw]
-   ld1w\t%0.s, %5/z, [%1, %2.s, uxtw]
-   ld1w\t%0.s, %5/z, [%1, %2.s, sxtw %p4]
-   ld1w\t%0.s, %5/z, [%1, %2.s, uxtw %p4]"
-)
-
-;; Predicated gather loads for 64-bit elements.  The value of operand 3
-;; doesn't matter in this case.
-(define_insn "mask_gather_load<mode>"
-  [(set (match_operand:SVE_D 0 "register_operand" "=w, w, w")
-       (unspec:SVE_D
-         [(match_operand:<VPRED> 5 "register_operand" "Upl, Upl, Upl")
-          (match_operand:DI 1 "aarch64_reg_or_zero" "Z, rk, rk")
-          (match_operand:<V_INT_EQUIV> 2 "register_operand" "w, w, w")
-          (match_operand:DI 3 "const_int_operand")
-          (match_operand:DI 4 "aarch64_gather_scale_operand_d" "Ui1, Ui1, i")
-          (mem:BLK (scratch))]
-         UNSPEC_LD1_GATHER))]
-  "TARGET_SVE"
-  "@
-   ld1d\t%0.d, %5/z, [%2.d]
-   ld1d\t%0.d, %5/z, [%1, %2.d]
-   ld1d\t%0.d, %5/z, [%1, %2.d, lsl %p4]"
-)
-
-;; Unpredicated scatter store.
-(define_expand "scatter_store<mode>"
-  [(set (mem:BLK (scratch))
-       (unspec:BLK
-         [(match_dup 5)
-          (match_operand:DI 0 "aarch64_reg_or_zero")
-          (match_operand:<V_INT_EQUIV> 1 "register_operand")
-          (match_operand:DI 2 "const_int_operand")
-          (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>")
-          (match_operand:SVE_SD 4 "register_operand")]
-         UNSPEC_ST1_SCATTER))]
-  "TARGET_SVE"
+         [(match_operand:VNx16BI 1 "register_operand" "Upl")
+          (match_operand 2 "aarch64_any_register_operand" "w")]
+         UNSPEC_REV_SUBREG))]
+  "TARGET_SVE && BYTES_BIG_ENDIAN"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
    {
-    operands[5] = aarch64_ptrue_reg (<VPRED>mode);
+    aarch64_split_sve_subreg_move (operands[0], operands[1], operands[2]);
+    DONE;
    }
  )
  
-;; Predicated scatter stores for 32-bit elements.  Operand 2 is true for
-;; unsigned extension and false for signed extension.
-(define_insn "mask_scatter_store<mode>"
-  [(set (mem:BLK (scratch))
-       (unspec:BLK
-         [(match_operand:<VPRED> 5 "register_operand" "Upl, Upl, Upl, Upl, Upl")
-          (match_operand:DI 0 "aarch64_reg_or_zero" "Z, rk, rk, rk, rk")
-          (match_operand:<V_INT_EQUIV> 1 "register_operand" "w, w, w, w, w")
-          (match_operand:DI 2 "const_int_operand" "i, Z, Ui1, Z, Ui1")
-          (match_operand:DI 3 "aarch64_gather_scale_operand_w" "Ui1, Ui1, Ui1, i, i")
-          (match_operand:SVE_S 4 "register_operand" "w, w, w, w, w")]
-         UNSPEC_ST1_SCATTER))]
-  "TARGET_SVE"
-  "@
-   st1w\t%4.s, %5, [%1.s]
-   st1w\t%4.s, %5, [%0, %1.s, sxtw]
-   st1w\t%4.s, %5, [%0, %1.s, uxtw]
-   st1w\t%4.s, %5, [%0, %1.s, sxtw %p3]
-   st1w\t%4.s, %5, [%0, %1.s, uxtw %p3]"
-)
-
-;; Predicated scatter stores for 64-bit elements.  The value of operand 2
-;; doesn't matter in this case.
-(define_insn "mask_scatter_store<mode>"
-  [(set (mem:BLK (scratch))
-       (unspec:BLK
-         [(match_operand:<VPRED> 5 "register_operand" "Upl, Upl, Upl")
-          (match_operand:DI 0 "aarch64_reg_or_zero" "Z, rk, rk")
-          (match_operand:<V_INT_EQUIV> 1 "register_operand" "w, w, w")
-          (match_operand:DI 2 "const_int_operand")
-          (match_operand:DI 3 "aarch64_gather_scale_operand_d" "Ui1, Ui1, i")
-          (match_operand:SVE_D 4 "register_operand" "w, w, w")]
-         UNSPEC_ST1_SCATTER))]
-  "TARGET_SVE"
-  "@
-   st1d\t%4.d, %5, [%1.d]
-   st1d\t%4.d, %5, [%0, %1.d]
-   st1d\t%4.d, %5, [%0, %1.d, lsl %p3]"
-)
+;; -------------------------------------------------------------------------
+;; ---- Moves of multiple vectors
+;; -------------------------------------------------------------------------
+;; All patterns in this section are synthetic and split to real
+;; instructions after reload.
+;; -------------------------------------------------------------------------
  
-;; SVE structure moves.
  (define_expand "mov<mode>"
    [(set (match_operand:SVE_STRUCT 0 "nonimmediate_operand")
         (match_operand:SVE_STRUCT 1 "general_operand"))]
@@ -369,7 +372,7 @@
  
  ;; Unpredicated structure moves (big-endian).  Memory accesses require
  ;; secondary reloads.
-(define_insn "*aarch64_sve_mov<mode>_le"
+(define_insn "*aarch64_sve_mov<mode>_be"
    [(set (match_operand:SVE_STRUCT 0 "register_operand" "=w, w")
         (match_operand:SVE_STRUCT 1 "aarch64_nonmemory_operand" "w, Dn"))]
    "TARGET_SVE && BYTES_BIG_ENDIAN"
@@ -433,6 +436,17 @@
    [(set_attr "length" "<insn_length>")]
  )
  
+;; -------------------------------------------------------------------------
+;; ---- Moves of predicates
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - MOV
+;; - LDR
+;; - PFALSE
+;; - PTRUE
+;; - STR
+;; -------------------------------------------------------------------------
+
  (define_expand "mov<mode>"
    [(set (match_operand:PRED_ALL 0 "nonimmediate_operand")
         (match_operand:PRED_ALL 1 "general_operand"))]
@@ -457,169 +471,283 @@
     * return aarch64_output_ptrue (<MODE>mode, '<Vetype>');"
  )
  
-;; Handle extractions from a predicate by converting to an integer vector
-;; and extracting from there.
-(define_expand "vec_extract<vpred><Vel>"
-  [(match_operand:<VEL> 0 "register_operand")
-   (match_operand:<VPRED> 1 "register_operand")
-   (match_operand:SI 2 "nonmemory_operand")
-   ;; Dummy operand to which we can attach the iterator.
-   (reg:SVE_I V0_REGNUM)]
+;; =========================================================================
+;; == Loads
+;; =========================================================================
+
+;; -------------------------------------------------------------------------
+;; ---- Normal contiguous loads
+;; -------------------------------------------------------------------------
+;; Includes contiguous forms of:
+;; - LD1B
+;; - LD1D
+;; - LD1H
+;; - LD1W
+;; - LD2B
+;; - LD2D
+;; - LD2H
+;; - LD2W
+;; - LD3B
+;; - LD3D
+;; - LD3H
+;; - LD3W
+;; - LD4B
+;; - LD4D
+;; - LD4H
+;; - LD4W
+;; -------------------------------------------------------------------------
+
+;; Predicated LD1.
+(define_insn "maskload<mode><vpred>"
+  [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
+       (unspec:SVE_ALL
+         [(match_operand:<VPRED> 2 "register_operand" "Upl")
+          (match_operand:SVE_ALL 1 "memory_operand" "m")]
+         UNSPEC_LD1_SVE))]
    "TARGET_SVE"
-  {
-    rtx tmp = gen_reg_rtx (<MODE>mode);
-    emit_insn (gen_aarch64_sve_dup<mode>_const (tmp, operands[1],
-                                               CONST1_RTX (<MODE>mode),
-                                               CONST0_RTX (<MODE>mode)));
-    emit_insn (gen_vec_extract<mode><Vel> (operands[0], tmp, operands[2]));
-    DONE;
-  }
+  "ld1<Vesize>\t%0.<Vetype>, %2/z, %1"
  )
  
-(define_expand "vec_extract<mode><Vel>"
-  [(set (match_operand:<VEL> 0 "register_operand")
-       (vec_select:<VEL>
-         (match_operand:SVE_ALL 1 "register_operand")
-         (parallel [(match_operand:SI 2 "nonmemory_operand")])))]
+;; Unpredicated LD[234].
+(define_expand "vec_load_lanes<mode><vsingle>"
+  [(set (match_operand:SVE_STRUCT 0 "register_operand")
+       (unspec:SVE_STRUCT
+         [(match_dup 2)
+          (match_operand:SVE_STRUCT 1 "memory_operand")]
+         UNSPEC_LDN))]
    "TARGET_SVE"
    {
-    poly_int64 val;
-    if (poly_int_rtx_p (operands[2], &val)
-       && known_eq (val, GET_MODE_NUNITS (<MODE>mode) - 1))
-      {
-       /* The last element can be extracted with a LASTB and a false
-          predicate.  */
-       rtx sel = aarch64_pfalse_reg (<VPRED>mode);
-       emit_insn (gen_extract_last_<mode> (operands[0], sel, operands[1]));
-       DONE;
-      }
-    if (!CONST_INT_P (operands[2]))
-      {
-       /* Create an index with operand[2] as the base and -1 as the step.
-          It will then be zero for the element we care about.  */
-       rtx index = gen_lowpart (<VEL_INT>mode, operands[2]);
-       index = force_reg (<VEL_INT>mode, index);
-       rtx series = gen_reg_rtx (<V_INT_EQUIV>mode);
-       emit_insn (gen_vec_series<v_int_equiv> (series, index, constm1_rtx));
-
-       /* Get a predicate that is true for only that element.  */
-       rtx zero = CONST0_RTX (<V_INT_EQUIV>mode);
-       rtx cmp = gen_rtx_EQ (<V_INT_EQUIV>mode, series, zero);
-       rtx sel = gen_reg_rtx (<VPRED>mode);
-       emit_insn (gen_vec_cmp<v_int_equiv><vpred> (sel, cmp, series, zero));
-
-       /* Select the element using LASTB.  */
-       emit_insn (gen_extract_last_<mode> (operands[0], sel, operands[1]));
-       DONE;
-      }
+    operands[2] = aarch64_ptrue_reg (<VPRED>mode);
    }
  )
  
-;; Extract element zero.  This is a special case because we want to force
-;; the registers to be the same for the second alternative, and then
-;; split the instruction into nothing after RA.
-(define_insn_and_split "*vec_extract<mode><Vel>_0"
-  [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv")
-       (vec_select:<VEL>
-         (match_operand:SVE_ALL 1 "register_operand" "w, 0, w")
-         (parallel [(const_int 0)])))]
+;; Predicated LD[234].
+(define_insn "vec_mask_load_lanes<mode><vsingle>"
+  [(set (match_operand:SVE_STRUCT 0 "register_operand" "=w")
+       (unspec:SVE_STRUCT
+         [(match_operand:<VPRED> 2 "register_operand" "Upl")
+          (match_operand:SVE_STRUCT 1 "memory_operand" "m")]
+         UNSPEC_LDN))]
    "TARGET_SVE"
-  {
-    operands[1] = gen_rtx_REG (<V128>mode, REGNO (operands[1]));
-    switch (which_alternative)
-      {
-       case 0:
-         return "umov\\t%<vwcore>0, %1.<Vetype>[0]";
-       case 1:
-         return "#";
-       case 2:
-         return "st1\\t{%1.<Vetype>}[0], %0";
-       default:
-         gcc_unreachable ();
-      }
-  }
-  "&& reload_completed
-   && REG_P (operands[0])
-   && REGNO (operands[0]) == REGNO (operands[1])"
-  [(const_int 0)]
-  {
-    emit_note (NOTE_INSN_DELETED);
-    DONE;
-  }
-  [(set_attr "type" "neon_to_gp_q, untyped, neon_store1_one_lane_q")]
+  "ld<vector_count><Vesize>\t%0, %2/z, %1"
  )
  
-;; Extract an element from the Advanced SIMD portion of the register.
-;; We don't just reuse the aarch64-simd.md pattern because we don't
-;; want any change in lane number on big-endian targets.
-(define_insn "*vec_extract<mode><Vel>_v128"
-  [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv")
-       (vec_select:<VEL>
-         (match_operand:SVE_ALL 1 "register_operand" "w, w, w")
-         (parallel [(match_operand:SI 2 "const_int_operand")])))]
-  "TARGET_SVE
-   && IN_RANGE (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode), 1, 15)"
+;; -------------------------------------------------------------------------
+;; ---- Normal gather loads
+;; -------------------------------------------------------------------------
+;; Includes gather forms of:
+;; - LD1D
+;; - LD1W
+;; -------------------------------------------------------------------------
+
+;; Unpredicated gather loads.
+(define_expand "gather_load<mode>"
+  [(set (match_operand:SVE_SD 0 "register_operand")
+       (unspec:SVE_SD
+         [(match_dup 5)
+          (match_operand:DI 1 "aarch64_reg_or_zero")
+          (match_operand:<V_INT_EQUIV> 2 "register_operand")
+          (match_operand:DI 3 "const_int_operand")
+          (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>")
+          (mem:BLK (scratch))]
+         UNSPEC_LD1_GATHER))]
+  "TARGET_SVE"
    {
-    operands[1] = gen_rtx_REG (<V128>mode, REGNO (operands[1]));
-    switch (which_alternative)
-      {
-       case 0:
-         return "umov\\t%<vwcore>0, %1.<Vetype>[%2]";
-       case 1:
-         return "dup\\t%<Vetype>0, %1.<Vetype>[%2]";
-       case 2:
-         return "st1\\t{%1.<Vetype>}[%2], %0";
-       default:
-         gcc_unreachable ();
-      }
+    operands[5] = aarch64_ptrue_reg (<VPRED>mode);
    }
-  [(set_attr "type" "neon_to_gp_q, neon_dup_q, neon_store1_one_lane_q")]
  )
  
-;; Extract an element in the range of DUP.  This pattern allows the
-;; source and destination to be different.
-(define_insn "*vec_extract<mode><Vel>_dup"
-  [(set (match_operand:<VEL> 0 "register_operand" "=w")
-       (vec_select:<VEL>
-         (match_operand:SVE_ALL 1 "register_operand" "w")
-         (parallel [(match_operand:SI 2 "const_int_operand")])))]
-  "TARGET_SVE
-   && IN_RANGE (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode), 16, 63)"
+;; Predicated gather loads for 32-bit elements.  Operand 3 is true for
+;; unsigned extension and false for signed extension.
+(define_insn "mask_gather_load<mode>"
+  [(set (match_operand:SVE_S 0 "register_operand" "=w, w, w, w, w")
+       (unspec:SVE_S
+         [(match_operand:<VPRED> 5 "register_operand" "Upl, Upl, Upl, Upl, Upl")
+          (match_operand:DI 1 "aarch64_reg_or_zero" "Z, rk, rk, rk, rk")
+          (match_operand:<V_INT_EQUIV> 2 "register_operand" "w, w, w, w, w")
+          (match_operand:DI 3 "const_int_operand" "i, Z, Ui1, Z, Ui1")
+          (match_operand:DI 4 "aarch64_gather_scale_operand_w" "Ui1, Ui1, Ui1, i, i")
+          (mem:BLK (scratch))]
+         UNSPEC_LD1_GATHER))]
+  "TARGET_SVE"
+  "@
+   ld1w\t%0.s, %5/z, [%2.s]
+   ld1w\t%0.s, %5/z, [%1, %2.s, sxtw]
+   ld1w\t%0.s, %5/z, [%1, %2.s, uxtw]
+   ld1w\t%0.s, %5/z, [%1, %2.s, sxtw %p4]
+   ld1w\t%0.s, %5/z, [%1, %2.s, uxtw %p4]"
+)
+
+;; Predicated gather loads for 64-bit elements.  The value of operand 3
+;; doesn't matter in this case.
+(define_insn "mask_gather_load<mode>"
+  [(set (match_operand:SVE_D 0 "register_operand" "=w, w, w")
+       (unspec:SVE_D
+         [(match_operand:<VPRED> 5 "register_operand" "Upl, Upl, Upl")
+          (match_operand:DI 1 "aarch64_reg_or_zero" "Z, rk, rk")
+          (match_operand:<V_INT_EQUIV> 2 "register_operand" "w, w, w")
+          (match_operand:DI 3 "const_int_operand")
+          (match_operand:DI 4 "aarch64_gather_scale_operand_d" "Ui1, Ui1, i")
+          (mem:BLK (scratch))]
+         UNSPEC_LD1_GATHER))]
+  "TARGET_SVE"
+  "@
+   ld1d\t%0.d, %5/z, [%2.d]
+   ld1d\t%0.d, %5/z, [%1, %2.d]
+   ld1d\t%0.d, %5/z, [%1, %2.d, lsl %p4]"
+)
+
+;; =========================================================================
+;; == Stores
+;; =========================================================================
+
+;; -------------------------------------------------------------------------
+;; ---- Normal contiguous stores
+;; -------------------------------------------------------------------------
+;; Includes contiguous forms of:
+;; - ST1B
+;; - ST1D
+;; - ST1H
+;; - ST1W
+;; - ST2B
+;; - ST2D
+;; - ST2H
+;; - ST2W
+;; - ST3B
+;; - ST3D
+;; - ST3H
+;; - ST3W
+;; - ST4B
+;; - ST4D
+;; - ST4H
+;; - ST4W
+;; -------------------------------------------------------------------------
+
+;; Predicated ST1.
+(define_insn "maskstore<mode><vpred>"
+  [(set (match_operand:SVE_ALL 0 "memory_operand" "+m")
+       (unspec:SVE_ALL [(match_operand:<VPRED> 2 "register_operand" "Upl")
+                        (match_operand:SVE_ALL 1 "register_operand" "w")
+                        (match_dup 0)]
+                       UNSPEC_ST1_SVE))]
+  "TARGET_SVE"
+  "st1<Vesize>\t%1.<Vetype>, %2, %0"
+)
+
+;; Unpredicated ST[234].  This is always a full update, so the dependence
+;; on the old value of the memory location (via (match_dup 0)) is redundant.
+;; There doesn't seem to be any obvious benefit to treating the all-true
+;; case differently though.  In particular, it's very unlikely that we'll
+;; only find out during RTL that a store_lanes is dead.
+(define_expand "vec_store_lanes<mode><vsingle>"
+  [(set (match_operand:SVE_STRUCT 0 "memory_operand")
+       (unspec:SVE_STRUCT
+         [(match_dup 2)
+          (match_operand:SVE_STRUCT 1 "register_operand")
+          (match_dup 0)]
+         UNSPEC_STN))]
+  "TARGET_SVE"
    {
-    operands[0] = gen_rtx_REG (<MODE>mode, REGNO (operands[0]));
-    return "dup\t%0.<Vetype>, %1.<Vetype>[%2]";
+    operands[2] = aarch64_ptrue_reg (<VPRED>mode);
    }
  )
  
-;; Extract an element outside the range of DUP.  This pattern requires the
-;; source and destination to be the same.
-(define_insn "*vec_extract<mode><Vel>_ext"
-  [(set (match_operand:<VEL> 0 "register_operand" "=w")
-       (vec_select:<VEL>
-         (match_operand:SVE_ALL 1 "register_operand" "0")
-         (parallel [(match_operand:SI 2 "const_int_operand")])))]
-  "TARGET_SVE && INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode) >= 64"
+;; Predicated ST[234].
+(define_insn "vec_mask_store_lanes<mode><vsingle>"
+  [(set (match_operand:SVE_STRUCT 0 "memory_operand" "+m")
+       (unspec:SVE_STRUCT
+         [(match_operand:<VPRED> 2 "register_operand" "Upl")
+          (match_operand:SVE_STRUCT 1 "register_operand" "w")
+          (match_dup 0)]
+         UNSPEC_STN))]
+  "TARGET_SVE"
+  "st<vector_count><Vesize>\t%1, %2, %0"
+)
+
+;; -------------------------------------------------------------------------
+;; ---- Normal scatter stores
+;; -------------------------------------------------------------------------
+;; Includes scatter forms of:
+;; - ST1D
+;; - ST1W
+;; -------------------------------------------------------------------------
+
+;; Unpredicated scatter stores.
+(define_expand "scatter_store<mode>"
+  [(set (mem:BLK (scratch))
+       (unspec:BLK
+         [(match_dup 5)
+          (match_operand:DI 0 "aarch64_reg_or_zero")
+          (match_operand:<V_INT_EQUIV> 1 "register_operand")
+          (match_operand:DI 2 "const_int_operand")
+          (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>")
+          (match_operand:SVE_SD 4 "register_operand")]
+         UNSPEC_ST1_SCATTER))]
+  "TARGET_SVE"
    {
-    operands[0] = gen_rtx_REG (<MODE>mode, REGNO (operands[0]));
-    operands[2] = GEN_INT (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode));
-    return "ext\t%0.b, %0.b, %0.b, #%2";
+    operands[5] = aarch64_ptrue_reg (<VPRED>mode);
    }
  )
  
-;; Extract the last active element of operand 1 into operand 0.
-;; If no elements are active, extract the last inactive element instead.
-(define_insn "extract_last_<mode>"
-  [(set (match_operand:<VEL> 0 "register_operand" "=r, w")
-       (unspec:<VEL>
-         [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
-          (match_operand:SVE_ALL 2 "register_operand" "w, w")]
-         UNSPEC_LASTB))]
+;; Predicated scatter stores for 32-bit elements.  Operand 2 is true for
+;; unsigned extension and false for signed extension.
+(define_insn "mask_scatter_store<mode>"
+  [(set (mem:BLK (scratch))
+       (unspec:BLK
+         [(match_operand:<VPRED> 5 "register_operand" "Upl, Upl, Upl, Upl, Upl")
+          (match_operand:DI 0 "aarch64_reg_or_zero" "Z, rk, rk, rk, rk")
+          (match_operand:<V_INT_EQUIV> 1 "register_operand" "w, w, w, w, w")
+          (match_operand:DI 2 "const_int_operand" "i, Z, Ui1, Z, Ui1")
+          (match_operand:DI 3 "aarch64_gather_scale_operand_w" "Ui1, Ui1, Ui1, i, i")
+          (match_operand:SVE_S 4 "register_operand" "w, w, w, w, w")]
+         UNSPEC_ST1_SCATTER))]
    "TARGET_SVE"
    "@
-   lastb\t%<vwcore>0, %1, %2.<Vetype>
-   lastb\t%<Vetype>0, %1, %2.<Vetype>"
+   st1w\t%4.s, %5, [%1.s]
+   st1w\t%4.s, %5, [%0, %1.s, sxtw]
+   st1w\t%4.s, %5, [%0, %1.s, uxtw]
+   st1w\t%4.s, %5, [%0, %1.s, sxtw %p3]
+   st1w\t%4.s, %5, [%0, %1.s, uxtw %p3]"
+)
+
+;; Predicated scatter stores for 64-bit elements.  The value of operand 2
+;; doesn't matter in this case.
+(define_insn "mask_scatter_store<mode>"
+  [(set (mem:BLK (scratch))
+       (unspec:BLK
+         [(match_operand:<VPRED> 5 "register_operand" "Upl, Upl, Upl")
+          (match_operand:DI 0 "aarch64_reg_or_zero" "Z, rk, rk")
+          (match_operand:<V_INT_EQUIV> 1 "register_operand" "w, w, w")
+          (match_operand:DI 2 "const_int_operand")
+          (match_operand:DI 3 "aarch64_gather_scale_operand_d" "Ui1, Ui1, i")
+          (match_operand:SVE_D 4 "register_operand" "w, w, w")]
+         UNSPEC_ST1_SCATTER))]
+  "TARGET_SVE"
+  "@
+   st1d\t%4.d, %5, [%1.d]
+   st1d\t%4.d, %5, [%0, %1.d]
+   st1d\t%4.d, %5, [%0, %1.d, lsl %p3]"
  )
  
+;; =========================================================================
+;; == Vector creation
+;; =========================================================================
+
+;; -------------------------------------------------------------------------
+;; ---- [INT,FP] Duplicate element
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - MOV
+;; - LD1RB
+;; - LD1RD
+;; - LD1RH
+;; - LD1RW
+;; - LD1RQB
+;; - LD1RQD
+;; - LD1RQH
+;; - LD1RQW
+;; -------------------------------------------------------------------------
+
  (define_expand "vec_duplicate<mode>"
    [(parallel
      [(set (match_operand:SVE_ALL 0 "register_operand")
@@ -693,30 +821,51 @@
    "ld1rq<Vesize>\t%0.<Vetype>, %1/z, %2"
  )
  
-;; Implement a predicate broadcast by shifting the low bit of the scalar
-;; input into the top bit and using a WHILELO.  An alternative would be to
-;; duplicate the input and do a compare with zero.
-(define_expand "vec_duplicate<mode>"
-  [(set (match_operand:PRED_ALL 0 "register_operand")
-       (vec_duplicate:PRED_ALL (match_operand 1 "register_operand")))]
+;; -------------------------------------------------------------------------
+;; ---- [INT,FP] Initialize from individual elements
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - INSR
+;; -------------------------------------------------------------------------
+
+(define_expand "vec_init<mode><Vel>"
+  [(match_operand:SVE_ALL 0 "register_operand")
+    (match_operand 1 "")]
    "TARGET_SVE"
    {
-    rtx tmp = gen_reg_rtx (DImode);
-    rtx op1 = gen_lowpart (DImode, operands[1]);
-    emit_insn (gen_ashldi3 (tmp, op1, gen_int_mode (63, DImode)));
-    emit_insn (gen_while_ultdi<mode> (operands[0], const0_rtx, tmp));
+    aarch64_sve_expand_vector_init (operands[0], operands[1]);
      DONE;
    }
  )
  
-(define_insn "vec_series<mode>"
-  [(set (match_operand:SVE_I 0 "register_operand" "=w, w, w")
-       (vec_series:SVE_I
-         (match_operand:<VEL> 1 "aarch64_sve_index_operand" "Usi, r, r")
-         (match_operand:<VEL> 2 "aarch64_sve_index_operand" "r, Usi, r")))]
-  "TARGET_SVE"
-  "@
-   index\t%0.<Vetype>, #%1, %<vw>2
+;; Shift an SVE vector left and insert a scalar into element 0.
+(define_insn "vec_shl_insert_<mode>"
+  [(set (match_operand:SVE_ALL 0 "register_operand" "=w, w")
+       (unspec:SVE_ALL
+         [(match_operand:SVE_ALL 1 "register_operand" "0, 0")
+          (match_operand:<VEL> 2 "register_operand" "rZ, w")]
+         UNSPEC_INSR))]
+  "TARGET_SVE"
+  "@
+   insr\t%0.<Vetype>, %<vwcore>2
+   insr\t%0.<Vetype>, %<Vetype>2"
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [INT] Linear series
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - INDEX
+;; -------------------------------------------------------------------------
+
+(define_insn "vec_series<mode>"
+  [(set (match_operand:SVE_I 0 "register_operand" "=w, w, w")
+       (vec_series:SVE_I
+         (match_operand:<VEL> 1 "aarch64_sve_index_operand" "Usi, r, r")
+         (match_operand:<VEL> 2 "aarch64_sve_index_operand" "r, Usi, r")))]
+  "TARGET_SVE"
+  "@
+   index\t%0.<Vetype>, #%1, %<vw>2
     index\t%0.<Vetype>, %<vw>1, #%2
     index\t%0.<Vetype>, %<vw>1, %<vw>2"
  )
@@ -736,168 +885,472 @@
    }
  )
  
-;; Unpredicated LD[234].
-(define_expand "vec_load_lanes<mode><vsingle>"
-  [(set (match_operand:SVE_STRUCT 0 "register_operand")
-       (unspec:SVE_STRUCT
-         [(match_dup 2)
-          (match_operand:SVE_STRUCT 1 "memory_operand")]
-         UNSPEC_LDN))]
+;; -------------------------------------------------------------------------
+;; ---- [PRED] Duplicate element
+;; -------------------------------------------------------------------------
+;; The patterns in this section are synthetic.
+;; -------------------------------------------------------------------------
+
+;; Implement a predicate broadcast by shifting the low bit of the scalar
+;; input into the top bit and using a WHILELO.  An alternative would be to
+;; duplicate the input and do a compare with zero.
+(define_expand "vec_duplicate<mode>"
+  [(set (match_operand:PRED_ALL 0 "register_operand")
+       (vec_duplicate:PRED_ALL (match_operand 1 "register_operand")))]
    "TARGET_SVE"
    {
-    operands[2] = aarch64_ptrue_reg (<VPRED>mode);
+    rtx tmp = gen_reg_rtx (DImode);
+    rtx op1 = gen_lowpart (DImode, operands[1]);
+    emit_insn (gen_ashldi3 (tmp, op1, gen_int_mode (63, DImode)));
+    emit_insn (gen_while_ultdi<mode> (operands[0], const0_rtx, tmp));
+    DONE;
    }
  )
  
-;; Predicated LD[234].
-(define_insn "vec_mask_load_lanes<mode><vsingle>"
-  [(set (match_operand:SVE_STRUCT 0 "register_operand" "=w")
-       (unspec:SVE_STRUCT
-         [(match_operand:<VPRED> 2 "register_operand" "Upl")
-          (match_operand:SVE_STRUCT 1 "memory_operand" "m")]
-         UNSPEC_LDN))]
-  "TARGET_SVE"
-  "ld<vector_count><Vesize>\t%0, %2/z, %1"
-)
+;; =========================================================================
+;; == Vector decomposition
+;; =========================================================================
  
-;; Unpredicated ST[234].  This is always a full update, so the dependence
-;; on the old value of the memory location (via (match_dup 0)) is redundant.
-;; There doesn't seem to be any obvious benefit to treating the all-true
-;; case differently though.  In particular, it's very unlikely that we'll
-;; only find out during RTL that a store_lanes is dead.
-(define_expand "vec_store_lanes<mode><vsingle>"
-  [(set (match_operand:SVE_STRUCT 0 "memory_operand")
-       (unspec:SVE_STRUCT
-         [(match_dup 2)
-          (match_operand:SVE_STRUCT 1 "register_operand")
-          (match_dup 0)]
-         UNSPEC_STN))]
+;; -------------------------------------------------------------------------
+;; ---- [INT,FP] Extract index
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - DUP    (Advanced SIMD)
+;; - DUP    (SVE)
+;; - EXT    (SVE)
+;; - ST1    (Advanced SIMD)
+;; - UMOV   (Advanced SIMD)
+;; -------------------------------------------------------------------------
+
+(define_expand "vec_extract<mode><Vel>"
+  [(set (match_operand:<VEL> 0 "register_operand")
+       (vec_select:<VEL>
+         (match_operand:SVE_ALL 1 "register_operand")
+         (parallel [(match_operand:SI 2 "nonmemory_operand")])))]
    "TARGET_SVE"
    {
-    operands[2] = aarch64_ptrue_reg (<VPRED>mode);
+    poly_int64 val;
+    if (poly_int_rtx_p (operands[2], &val)
+       && known_eq (val, GET_MODE_NUNITS (<MODE>mode) - 1))
+      {
+       /* The last element can be extracted with a LASTB and a false
+          predicate.  */
+       rtx sel = aarch64_pfalse_reg (<VPRED>mode);
+       emit_insn (gen_extract_last_<mode> (operands[0], sel, operands[1]));
+       DONE;
+      }
+    if (!CONST_INT_P (operands[2]))
+      {
+       /* Create an index with operand[2] as the base and -1 as the step.
+          It will then be zero for the element we care about.  */
+       rtx index = gen_lowpart (<VEL_INT>mode, operands[2]);
+       index = force_reg (<VEL_INT>mode, index);
+       rtx series = gen_reg_rtx (<V_INT_EQUIV>mode);
+       emit_insn (gen_vec_series<v_int_equiv> (series, index, constm1_rtx));
+
+       /* Get a predicate that is true for only that element.  */
+       rtx zero = CONST0_RTX (<V_INT_EQUIV>mode);
+       rtx cmp = gen_rtx_EQ (<V_INT_EQUIV>mode, series, zero);
+       rtx sel = gen_reg_rtx (<VPRED>mode);
+       emit_insn (gen_vec_cmp<v_int_equiv><vpred> (sel, cmp, series, zero));
+
+       /* Select the element using LASTB.  */
+       emit_insn (gen_extract_last_<mode> (operands[0], sel, operands[1]));
+       DONE;
+      }
    }
  )
  
-;; Predicated ST[234].
-(define_insn "vec_mask_store_lanes<mode><vsingle>"
-  [(set (match_operand:SVE_STRUCT 0 "memory_operand" "+m")
-       (unspec:SVE_STRUCT
-         [(match_operand:<VPRED> 2 "register_operand" "Upl")
-          (match_operand:SVE_STRUCT 1 "register_operand" "w")
-          (match_dup 0)]
-         UNSPEC_STN))]
+;; Extract element zero.  This is a special case because we want to force
+;; the registers to be the same for the second alternative, and then
+;; split the instruction into nothing after RA.
+(define_insn_and_split "*vec_extract<mode><Vel>_0"
+  [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv")
+       (vec_select:<VEL>
+         (match_operand:SVE_ALL 1 "register_operand" "w, 0, w")
+         (parallel [(const_int 0)])))]
    "TARGET_SVE"
-  "st<vector_count><Vesize>\t%1, %2, %0"
+  {
+    operands[1] = gen_rtx_REG (<V128>mode, REGNO (operands[1]));
+    switch (which_alternative)
+      {
+       case 0:
+         return "umov\\t%<vwcore>0, %1.<Vetype>[0]";
+       case 1:
+         return "#";
+       case 2:
+         return "st1\\t{%1.<Vetype>}[0], %0";
+       default:
+         gcc_unreachable ();
+      }
+  }
+  "&& reload_completed
+   && REG_P (operands[0])
+   && REGNO (operands[0]) == REGNO (operands[1])"
+  [(const_int 0)]
+  {
+    emit_note (NOTE_INSN_DELETED);
+    DONE;
+  }
+  [(set_attr "type" "neon_to_gp_q, untyped, neon_store1_one_lane_q")]
  )
  
-(define_expand "vec_perm<mode>"
-  [(match_operand:SVE_ALL 0 "register_operand")
-   (match_operand:SVE_ALL 1 "register_operand")
-   (match_operand:SVE_ALL 2 "register_operand")
-   (match_operand:<V_INT_EQUIV> 3 "aarch64_sve_vec_perm_operand")]
-  "TARGET_SVE && GET_MODE_NUNITS (<MODE>mode).is_constant ()"
+;; Extract an element from the Advanced SIMD portion of the register.
+;; We don't just reuse the aarch64-simd.md pattern because we don't
+;; want any change in lane number on big-endian targets.
+(define_insn "*vec_extract<mode><Vel>_v128"
+  [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv")
+       (vec_select:<VEL>
+         (match_operand:SVE_ALL 1 "register_operand" "w, w, w")
+         (parallel [(match_operand:SI 2 "const_int_operand")])))]
+  "TARGET_SVE
+   && IN_RANGE (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode), 1, 15)"
    {
-    aarch64_expand_sve_vec_perm (operands[0], operands[1],
-                                operands[2], operands[3]);
-    DONE;
+    operands[1] = gen_rtx_REG (<V128>mode, REGNO (operands[1]));
+    switch (which_alternative)
+      {
+       case 0:
+         return "umov\\t%<vwcore>0, %1.<Vetype>[%2]";
+       case 1:
+         return "dup\\t%<Vetype>0, %1.<Vetype>[%2]";
+       case 2:
+         return "st1\\t{%1.<Vetype>}[%2], %0";
+       default:
+         gcc_unreachable ();
+      }
    }
+  [(set_attr "type" "neon_to_gp_q, neon_dup_q, neon_store1_one_lane_q")]
  )
  
-(define_insn "*aarch64_sve_tbl<mode>"
-  [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
-       (unspec:SVE_ALL
-         [(match_operand:SVE_ALL 1 "register_operand" "w")
-          (match_operand:<V_INT_EQUIV> 2 "register_operand" "w")]
-         UNSPEC_TBL))]
-  "TARGET_SVE"
-  "tbl\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
+;; Extract an element in the range of DUP.  This pattern allows the
+;; source and destination to be different.
+(define_insn "*vec_extract<mode><Vel>_dup"
+  [(set (match_operand:<VEL> 0 "register_operand" "=w")
+       (vec_select:<VEL>
+         (match_operand:SVE_ALL 1 "register_operand" "w")
+         (parallel [(match_operand:SI 2 "const_int_operand")])))]
+  "TARGET_SVE
+   && IN_RANGE (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode), 16, 63)"
+  {
+    operands[0] = gen_rtx_REG (<MODE>mode, REGNO (operands[0]));
+    return "dup\t%0.<Vetype>, %1.<Vetype>[%2]";
+  }
  )
  
-(define_insn "*aarch64_sve_<perm_insn><perm_hilo><mode>"
-  [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
-       (unspec:PRED_ALL [(match_operand:PRED_ALL 1 "register_operand" "Upa")
-                         (match_operand:PRED_ALL 2 "register_operand" "Upa")]
-                        PERMUTE))]
-  "TARGET_SVE"
-  "<perm_insn><perm_hilo>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
+;; Extract an element outside the range of DUP.  This pattern requires the
+;; source and destination to be the same.
+(define_insn "*vec_extract<mode><Vel>_ext"
+  [(set (match_operand:<VEL> 0 "register_operand" "=w")
+       (vec_select:<VEL>
+         (match_operand:SVE_ALL 1 "register_operand" "0")
+         (parallel [(match_operand:SI 2 "const_int_operand")])))]
+  "TARGET_SVE && INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode) >= 64"
+  {
+    operands[0] = gen_rtx_REG (<MODE>mode, REGNO (operands[0]));
+    operands[2] = GEN_INT (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode));
+    return "ext\t%0.b, %0.b, %0.b, #%2";
+  }
  )
  
-(define_insn "aarch64_sve_<perm_insn><perm_hilo><mode>"
-  [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
-       (unspec:SVE_ALL [(match_operand:SVE_ALL 1 "register_operand" "w")
-                        (match_operand:SVE_ALL 2 "register_operand" "w")]
-                       PERMUTE))]
+;; -------------------------------------------------------------------------
+;; ---- [INT,FP] Extract active element
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - LASTB
+;; -------------------------------------------------------------------------
+
+;; Extract the last active element of operand 1 into operand 0.
+;; If no elements are active, extract the last inactive element instead.
+(define_insn "extract_last_<mode>"
+  [(set (match_operand:<VEL> 0 "register_operand" "=r, w")
+       (unspec:<VEL>
+         [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+          (match_operand:SVE_ALL 2 "register_operand" "w, w")]
+         UNSPEC_LASTB))]
    "TARGET_SVE"
-  "<perm_insn><perm_hilo>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
+  "@
+   lastb\t%<vwcore>0, %1, %2.<Vetype>
+   lastb\t%<Vetype>0, %1, %2.<Vetype>"
  )
  
-(define_insn "*aarch64_sve_rev64<mode>"
-  [(set (match_operand:SVE_BHS 0 "register_operand" "=w")
-       (unspec:SVE_BHS
-         [(match_operand:VNx2BI 1 "register_operand" "Upl")
-          (unspec:SVE_BHS [(match_operand:SVE_BHS 2 "register_operand" "w")]
-                          UNSPEC_REV64)]
-         UNSPEC_MERGE_PTRUE))]
+;; -------------------------------------------------------------------------
+;; ---- [PRED] Extract index
+;; -------------------------------------------------------------------------
+;; The patterns in this section are synthetic.
+;; -------------------------------------------------------------------------
+
+;; Handle extractions from a predicate by converting to an integer vector
+;; and extracting from there.
+(define_expand "vec_extract<vpred><Vel>"
+  [(match_operand:<VEL> 0 "register_operand")
+   (match_operand:<VPRED> 1 "register_operand")
+   (match_operand:SI 2 "nonmemory_operand")
+   ;; Dummy operand to which we can attach the iterator.
+   (reg:SVE_I V0_REGNUM)]
    "TARGET_SVE"
-  "rev<Vesize>\t%0.d, %1/m, %2.d"
+  {
+    rtx tmp = gen_reg_rtx (<MODE>mode);
+    emit_insn (gen_aarch64_sve_dup<mode>_const (tmp, operands[1],
+                                               CONST1_RTX (<MODE>mode),
+                                               CONST0_RTX (<MODE>mode)));
+    emit_insn (gen_vec_extract<mode><Vel> (operands[0], tmp, operands[2]));
+    DONE;
+  }
  )
  
-(define_insn "*aarch64_sve_rev32<mode>"
-  [(set (match_operand:SVE_BH 0 "register_operand" "=w")
-       (unspec:SVE_BH
-         [(match_operand:VNx4BI 1 "register_operand" "Upl")
-          (unspec:SVE_BH [(match_operand:SVE_BH 2 "register_operand" "w")]
-                         UNSPEC_REV32)]
+;; =========================================================================
+;; == Unary arithmetic
+;; =========================================================================
+
+;; -------------------------------------------------------------------------
+;; ---- [INT] General unary arithmetic corresponding to rtx codes
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - ABS
+;; - CNT (= popcount)
+;; - NEG
+;; - NOT
+;; -------------------------------------------------------------------------
+
+;; Unpredicated integer unary arithmetic.
+(define_expand "<optab><mode>2"
+  [(set (match_operand:SVE_I 0 "register_operand")
+       (unspec:SVE_I
+         [(match_dup 2)
+          (SVE_INT_UNARY:SVE_I (match_operand:SVE_I 1 "register_operand"))]
           UNSPEC_MERGE_PTRUE))]
    "TARGET_SVE"
-  "rev<Vesize>\t%0.s, %1/m, %2.s"
+  {
+    operands[2] = aarch64_ptrue_reg (<VPRED>mode);
+  }
  )
  
-(define_insn "*aarch64_sve_rev16vnx16qi"
-  [(set (match_operand:VNx16QI 0 "register_operand" "=w")
-       (unspec:VNx16QI
-         [(match_operand:VNx8BI 1 "register_operand" "Upl")
-          (unspec:VNx16QI [(match_operand:VNx16QI 2 "register_operand" "w")]
-                          UNSPEC_REV16)]
+;; Integer unary arithmetic predicated with a PTRUE.
+(define_insn "*<optab><mode>2"
+  [(set (match_operand:SVE_I 0 "register_operand" "=w")
+       (unspec:SVE_I
+         [(match_operand:<VPRED> 1 "register_operand" "Upl")
+          (SVE_INT_UNARY:SVE_I
+            (match_operand:SVE_I 2 "register_operand" "w"))]
           UNSPEC_MERGE_PTRUE))]
    "TARGET_SVE"
-  "revb\t%0.h, %1/m, %2.h"
+  "<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
  )
  
-(define_insn "@aarch64_sve_rev<mode>"
-  [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
-       (unspec:SVE_ALL [(match_operand:SVE_ALL 1 "register_operand" "w")]
-                       UNSPEC_REV))]
+;; -------------------------------------------------------------------------
+;; ---- [FP] General unary arithmetic corresponding to rtx codes
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - FABS
+;; - FNEG
+;; - FSQRT
+;; -------------------------------------------------------------------------
+
+;; Unpredicated floating-point unary operations.
+(define_expand "<optab><mode>2"
+  [(set (match_operand:SVE_F 0 "register_operand")
+       (unspec:SVE_F
+         [(match_dup 2)
+          (SVE_FP_UNARY:SVE_F (match_operand:SVE_F 1 "register_operand"))]
+         UNSPEC_MERGE_PTRUE))]
    "TARGET_SVE"
-  "rev\t%0.<Vetype>, %1.<Vetype>")
+  {
+    operands[2] = aarch64_ptrue_reg (<VPRED>mode);
+  }
+)
  
-(define_insn "*aarch64_sve_dup_lane<mode>"
-  [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
-       (vec_duplicate:SVE_ALL
-         (vec_select:<VEL>
-           (match_operand:SVE_ALL 1 "register_operand" "w")
-           (parallel [(match_operand:SI 2 "const_int_operand")]))))]
-  "TARGET_SVE
-   && IN_RANGE (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode), 0, 63)"
-  "dup\t%0.<Vetype>, %1.<Vetype>[%2]"
+;; Predicated floating-point unary operations.
+(define_insn "*<optab><mode>2"
+  [(set (match_operand:SVE_F 0 "register_operand" "=w")
+       (unspec:SVE_F
+         [(match_operand:<VPRED> 1 "register_operand" "Upl")
+          (SVE_FP_UNARY:SVE_F (match_operand:SVE_F 2 "register_operand" "w"))]
+         UNSPEC_MERGE_PTRUE))]
+  "TARGET_SVE"
+  "<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
  )
  
-;; Note that the immediate (third) operand is the lane index not
-;; the byte index.
-(define_insn "*aarch64_sve_ext<mode>"
-  [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
-       (unspec:SVE_ALL [(match_operand:SVE_ALL 1 "register_operand" "0")
-                        (match_operand:SVE_ALL 2 "register_operand" "w")
-                        (match_operand:SI 3 "const_int_operand")]
-                       UNSPEC_EXT))]
+;; -------------------------------------------------------------------------
+;; ---- [FP] Rounding
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - FRINTA
+;; - FRINTI
+;; - FRINTM
+;; - FRINTN
+;; - FRINTP
+;; - FRINTX
+;; - FRINTZ
+;; -------------------------------------------------------------------------
+
+;; Unpredicated FRINTy.
+(define_expand "<frint_pattern><mode>2"
+  [(set (match_operand:SVE_F 0 "register_operand")
+       (unspec:SVE_F
+         [(match_dup 2)
+          (unspec:SVE_F [(match_operand:SVE_F 1 "register_operand")]
+                        FRINT)]
+         UNSPEC_MERGE_PTRUE))]
+  "TARGET_SVE"
+  {
+    operands[2] = aarch64_ptrue_reg (<VPRED>mode);
+  }
+)
+
+;; FRINTy predicated with a PTRUE.
+(define_insn "*<frint_pattern><mode>2"
+  [(set (match_operand:SVE_F 0 "register_operand" "=w")
+       (unspec:SVE_F
+         [(match_operand:<VPRED> 1 "register_operand" "Upl")
+          (unspec:SVE_F [(match_operand:SVE_F 2 "register_operand" "w")]
+                        FRINT)]
+         UNSPEC_MERGE_PTRUE))]
+  "TARGET_SVE"
+  "frint<frint_suffix>\t%0.<Vetype>, %1/m, %2.<Vetype>"
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [PRED] Inverse
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - NOT
+;; -------------------------------------------------------------------------
+
+;; Unpredicated predicate inverse.
+(define_expand "one_cmpl<mode>2"
+  [(set (match_operand:PRED_ALL 0 "register_operand")
+       (and:PRED_ALL
+         (not:PRED_ALL (match_operand:PRED_ALL 1 "register_operand"))
+         (match_dup 2)))]
+  "TARGET_SVE"
+  {
+    operands[2] = aarch64_ptrue_reg (<MODE>mode);
+  }
+)
+
+;; Predicated predicate inverse.
+(define_insn "*one_cmpl<mode>3"
+  [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
+       (and:PRED_ALL
+         (not:PRED_ALL (match_operand:PRED_ALL 2 "register_operand" "Upa"))
+         (match_operand:PRED_ALL 1 "register_operand" "Upa")))]
+  "TARGET_SVE"
+  "not\t%0.b, %1/z, %2.b"
+)
+
+;; =========================================================================
+;; == Binary arithmetic
+;; =========================================================================
+
+;; -------------------------------------------------------------------------
+;; ---- [INT] General binary arithmetic corresponding to rtx codes
+;; -------------------------------------------------------------------------
+;; Includes merging patterns for:
+;; - ADD
+;; - AND
+;; - EOR
+;; - MUL
+;; - ORR
+;; - SMAX
+;; - SMIN
+;; - SUB
+;; - UMAX
+;; - UMIN
+;; -------------------------------------------------------------------------
+
+;; Predicated integer operations with merging.
+(define_expand "cond_<optab><mode>"
+  [(set (match_operand:SVE_I 0 "register_operand")
+       (unspec:SVE_I
+         [(match_operand:<VPRED> 1 "register_operand")
+          (SVE_INT_BINARY:SVE_I
+            (match_operand:SVE_I 2 "register_operand")
+            (match_operand:SVE_I 3 "register_operand"))
+          (match_operand:SVE_I 4 "aarch64_simd_reg_or_zero")]
+         UNSPEC_SEL))]
+  "TARGET_SVE"
+)
+
+;; Predicated integer operations, merging with the first input.
+(define_insn "*cond_<optab><mode>_2"
+  [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
+       (unspec:SVE_I
+         [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+          (SVE_INT_BINARY:SVE_I
+            (match_operand:SVE_I 2 "register_operand" "0, w")
+            (match_operand:SVE_I 3 "register_operand" "w, w"))
+          (match_dup 2)]
+         UNSPEC_SEL))]
+  "TARGET_SVE"
+  "@
+   <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+   movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
+  [(set_attr "movprfx" "*,yes")]
+)
+
+;; Predicated integer operations, merging with the second input.
+(define_insn "*cond_<optab><mode>_3"
+  [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
+       (unspec:SVE_I
+         [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+          (SVE_INT_BINARY:SVE_I
+            (match_operand:SVE_I 2 "register_operand" "w, w")
+            (match_operand:SVE_I 3 "register_operand" "0, w"))
+          (match_dup 3)]
+         UNSPEC_SEL))]
+  "TARGET_SVE"
+  "@
+   <sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
+   movprfx\t%0, %3\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
+  [(set_attr "movprfx" "*,yes")]
+)
+
+;; Predicated integer operations, merging with an independent value.
+(define_insn_and_rewrite "*cond_<optab><mode>_any"
+  [(set (match_operand:SVE_I 0 "register_operand" "=&w, &w, &w, &w, ?&w")
+       (unspec:SVE_I
+         [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl")
+          (SVE_INT_BINARY:SVE_I
+            (match_operand:SVE_I 2 "register_operand" "0, w, w, w, w")
+            (match_operand:SVE_I 3 "register_operand" "w, 0, w, w, w"))
+          (match_operand:SVE_I 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")]
+         UNSPEC_SEL))]
    "TARGET_SVE
-   && IN_RANGE (INTVAL (operands[3]) * GET_MODE_SIZE (<VEL>mode), 0, 255)"
+   && !rtx_equal_p (operands[2], operands[4])
+   && !rtx_equal_p (operands[3], operands[4])"
+  "@
+   movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+   movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
+   movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+   movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+   #"
+  "&& reload_completed
+   && register_operand (operands[4], <MODE>mode)
+   && !rtx_equal_p (operands[0], operands[4])"
    {
-    operands[3] = GEN_INT (INTVAL (operands[3]) * GET_MODE_SIZE (<VEL>mode));
-    return "ext\\t%0.b, %0.b, %2.b, #%3";
+    emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
+                                            operands[4], operands[1]));
+    operands[4] = operands[2] = operands[0];
    }
+  [(set_attr "movprfx" "yes")]
  )
  
+;; -------------------------------------------------------------------------
+;; ---- [INT] Addition
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - ADD
+;; - DECB
+;; - DECD
+;; - DECH
+;; - DECW
+;; - INCB
+;; - INCD
+;; - INCH
+;; - INCW
+;; - SUB
+;; -------------------------------------------------------------------------
+
  (define_insn "add<mode>3"
    [(set (match_operand:SVE_I 0 "register_operand" "=w, w, w, w")
         (plus:SVE_I
@@ -911,6 +1364,16 @@
     add\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
  )
  
+;; Merging forms are handled through SVE_INT_BINARY.
+
+;; -------------------------------------------------------------------------
+;; ---- [INT] Subtraction
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - SUB
+;; - SUBR
+;; -------------------------------------------------------------------------
+
  (define_insn "sub<mode>3"
    [(set (match_operand:SVE_I 0 "register_operand" "=w, w")
         (minus:SVE_I
@@ -922,6 +1385,57 @@
     subr\t%0.<Vetype>, %0.<Vetype>, #%D1"
  )
  
+;; Merging forms are handled through SVE_INT_BINARY.
+
+;; -------------------------------------------------------------------------
+;; ---- [INT] Absolute difference
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - SABD
+;; - UABD
+;; -------------------------------------------------------------------------
+
+;; Unpredicated integer absolute difference.
+(define_expand "<su>abd<mode>_3"
+  [(use (match_operand:SVE_I 0 "register_operand"))
+   (USMAX:SVE_I (match_operand:SVE_I 1 "register_operand")
+               (match_operand:SVE_I 2 "register_operand"))]
+  "TARGET_SVE"
+  {
+    rtx pred = aarch64_ptrue_reg (<VPRED>mode);
+    emit_insn (gen_aarch64_<su>abd<mode>_3 (operands[0], pred, operands[1],
+                                           operands[2]));
+    DONE;
+  }
+)
+
+;; Predicated integer absolute difference.
+(define_insn "aarch64_<su>abd<mode>_3"
+  [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
+       (unspec:SVE_I
+         [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+          (minus:SVE_I
+            (USMAX:SVE_I
+              (match_operand:SVE_I 2 "register_operand" "0, w")
+              (match_operand:SVE_I 3 "register_operand" "w, w"))
+            (<max_opp>:SVE_I
+              (match_dup 2)
+              (match_dup 3)))]
+         UNSPEC_MERGE_PTRUE))]
+  "TARGET_SVE"
+  "@
+   <su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+   movprfx\t%0, %2\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
+  [(set_attr "movprfx" "*,yes")]
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [INT] Multiplication
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - MUL
+;; -------------------------------------------------------------------------
+
  ;; Unpredicated multiplication.
  (define_expand "mul<mode>3"
    [(set (match_operand:SVE_I 0 "register_operand")
@@ -975,39 +1489,15 @@
    "mul\t%0.<Vetype>, %0.<Vetype>, #%2"
  )
  
-(define_insn "*madd<mode>"
-  [(set (match_operand:SVE_I 0 "register_operand" "=w, w, ?&w")
-       (plus:SVE_I
-         (unspec:SVE_I
-           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
-            (mult:SVE_I (match_operand:SVE_I 2 "register_operand" "%0, w, w")
-                        (match_operand:SVE_I 3 "register_operand" "w, w, w"))]
-           UNSPEC_MERGE_PTRUE)
-         (match_operand:SVE_I 4 "register_operand" "w, 0, w")))]
-  "TARGET_SVE"
-  "@
-   mad\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
-   mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
-   movprfx\t%0, %4\;mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
-  [(set_attr "movprfx" "*,*,yes")]
-)
+;; Merging forms are handled through SVE_INT_BINARY.
  
-(define_insn "*msub<mode>3"
-  [(set (match_operand:SVE_I 0 "register_operand" "=w, w, ?&w")
-       (minus:SVE_I
-         (match_operand:SVE_I 4 "register_operand" "w, 0, w")
-         (unspec:SVE_I
-           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
-            (mult:SVE_I (match_operand:SVE_I 2 "register_operand" "%0, w, w")
-                        (match_operand:SVE_I 3 "register_operand" "w, w, w"))]
-           UNSPEC_MERGE_PTRUE)))]
-  "TARGET_SVE"
-  "@
-   msb\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
-   mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
-   movprfx\t%0, %4\;mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
-  [(set_attr "movprfx" "*,*,yes")]
-)
+;; -------------------------------------------------------------------------
+;; ---- [INT] Highpart multiplication
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - SMULH
+;; - UMULH
+;; -------------------------------------------------------------------------
  
  ;; Unpredicated highpart multiplication.
  (define_expand "<su>mul<mode>3_highpart"
@@ -1040,7 +1530,17 @@
    [(set_attr "movprfx" "*,yes")]
  )
  
-;; Unpredicated division.
+;; -------------------------------------------------------------------------
+;; ---- [INT] Division
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - SDIV
+;; - SDIVR
+;; - UDIV
+;; - UDIVR
+;; -------------------------------------------------------------------------
+
+;; Unpredicated integer division.
  (define_expand "<optab><mode>3"
    [(set (match_operand:SVE_SDI 0 "register_operand")
         (unspec:SVE_SDI
@@ -1055,7 +1555,7 @@
    }
  )
  
-;; Division predicated with a PTRUE.
+;; Integer division predicated with a PTRUE.
  (define_insn "*<optab><mode>3"
    [(set (match_operand:SVE_SDI 0 "register_operand" "=w, w, ?&w")
         (unspec:SVE_SDI
@@ -1072,32 +1572,93 @@
    [(set_attr "movprfx" "*,*,yes")]
  )
  
-;; Unpredicated NEG, NOT and POPCOUNT.
-(define_expand "<optab><mode>2"
-  [(set (match_operand:SVE_I 0 "register_operand")
-       (unspec:SVE_I
-         [(match_dup 2)
-          (SVE_INT_UNARY:SVE_I (match_operand:SVE_I 1 "register_operand"))]
-         UNSPEC_MERGE_PTRUE))]
+;; Predicated integer division with merging.
+(define_expand "cond_<optab><mode>"
+  [(set (match_operand:SVE_SDI 0 "register_operand")
+       (unspec:SVE_SDI
+         [(match_operand:<VPRED> 1 "register_operand")
+          (SVE_INT_BINARY_SD:SVE_SDI
+            (match_operand:SVE_SDI 2 "register_operand")
+            (match_operand:SVE_SDI 3 "register_operand"))
+          (match_operand:SVE_SDI 4 "aarch64_simd_reg_or_zero")]
+         UNSPEC_SEL))]
    "TARGET_SVE"
-  {
-    operands[2] = aarch64_ptrue_reg (<VPRED>mode);
-  }
  )
  
-;; NEG, NOT and POPCOUNT predicated with a PTRUE.
-(define_insn "*<optab><mode>2"
-  [(set (match_operand:SVE_I 0 "register_operand" "=w")
-       (unspec:SVE_I
-         [(match_operand:<VPRED> 1 "register_operand" "Upl")
-          (SVE_INT_UNARY:SVE_I
-            (match_operand:SVE_I 2 "register_operand" "w"))]
-         UNSPEC_MERGE_PTRUE))]
+;; Predicated integer division, merging with the first input.
+(define_insn "*cond_<optab><mode>_2"
+  [(set (match_operand:SVE_SDI 0 "register_operand" "=w, ?&w")
+       (unspec:SVE_SDI
+         [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+          (SVE_INT_BINARY_SD:SVE_SDI
+            (match_operand:SVE_SDI 2 "register_operand" "0, w")
+            (match_operand:SVE_SDI 3 "register_operand" "w, w"))
+          (match_dup 2)]
+         UNSPEC_SEL))]
    "TARGET_SVE"
-  "<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
+  "@
+   <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+   movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
+  [(set_attr "movprfx" "*,yes")]
+)
+
+;; Predicated integer division, merging with the second input.
+(define_insn "*cond_<optab><mode>_3"
+  [(set (match_operand:SVE_SDI 0 "register_operand" "=w, ?&w")
+       (unspec:SVE_SDI
+         [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+          (SVE_INT_BINARY_SD:SVE_SDI
+            (match_operand:SVE_SDI 2 "register_operand" "w, w")
+            (match_operand:SVE_SDI 3 "register_operand" "0, w"))
+          (match_dup 3)]
+         UNSPEC_SEL))]
+  "TARGET_SVE"
+  "@
+   <sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
+   movprfx\t%0, %3\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
+  [(set_attr "movprfx" "*,yes")]
+)
+
+;; Predicated integer division, merging with an independent value.
+(define_insn_and_rewrite "*cond_<optab><mode>_any"
+  [(set (match_operand:SVE_SDI 0 "register_operand" "=&w, &w, &w, &w, ?&w")
+       (unspec:SVE_SDI
+         [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl")
+          (SVE_INT_BINARY_SD:SVE_SDI
+            (match_operand:SVE_SDI 2 "register_operand" "0, w, w, w, w")
+            (match_operand:SVE_SDI 3 "register_operand" "w, 0, w, w, w"))
+          (match_operand:SVE_SDI 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")]
+         UNSPEC_SEL))]
+  "TARGET_SVE
+   && !rtx_equal_p (operands[2], operands[4])
+   && !rtx_equal_p (operands[3], operands[4])"
+  "@
+   movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+   movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
+   movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+   movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+   #"
+  "&& reload_completed
+   && register_operand (operands[4], <MODE>mode)
+   && !rtx_equal_p (operands[0], operands[4])"
+  {
+    emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
+                                            operands[4], operands[1]));
+    operands[4] = operands[2] = operands[0];
+  }
+  [(set_attr "movprfx" "yes")]
  )
  
-;; Vector AND, ORR and XOR.
+;; -------------------------------------------------------------------------
+;; ---- [INT] Binary logical operations
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - AND
+;; - EOR
+;; - ORR
+;; -------------------------------------------------------------------------
+
+;; Unpredicated integer binary logical operations.
  (define_insn "<optab><mode>3"
    [(set (match_operand:SVE_I 0 "register_operand" "=w, w")
         (LOGICAL:SVE_I
@@ -1109,17 +1670,14 @@
     <logical>\t%0.d, %1.d, %2.d"
  )
  
-;; Vector AND, ORR and XOR on floating-point modes.  We avoid subregs
-;; by providing this, but we need to use UNSPECs since rtx logical ops
-;; aren't defined for floating-point modes.
-(define_insn "*<optab><mode>3"
-  [(set (match_operand:SVE_F 0 "register_operand" "=w")
-       (unspec:SVE_F [(match_operand:SVE_F 1 "register_operand" "w")
-                      (match_operand:SVE_F 2 "register_operand" "w")]
-                     LOGICALF))]
-  "TARGET_SVE"
-  "<logicalf_op>\t%0.d, %1.d, %2.d"
-)
+;; Merging forms are handled through SVE_INT_BINARY.
+
+;; -------------------------------------------------------------------------
+;; ---- [INT] Binary logical operations (inverted second input)
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - BIC
+;; -------------------------------------------------------------------------
  
  ;; REG_EQUAL notes on "not<mode>3" should ensure that we can generate
  ;; this pattern even though the NOT instruction itself is predicated.
@@ -1132,110 +1690,43 @@
    "bic\t%0.d, %2.d, %1.d"
  )
  
-;; Predicate AND.  We can reuse one of the inputs as the GP.
-(define_insn "and<mode>3"
-  [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
-       (and:PRED_ALL (match_operand:PRED_ALL 1 "register_operand" "Upa")
-                     (match_operand:PRED_ALL 2 "register_operand" "Upa")))]
-  "TARGET_SVE"
-  "and\t%0.b, %1/z, %1.b, %2.b"
-)
-
-;; Unpredicated predicate ORR and XOR.
-(define_expand "<optab><mode>3"
-  [(set (match_operand:PRED_ALL 0 "register_operand")
-       (and:PRED_ALL
-         (LOGICAL_OR:PRED_ALL
-           (match_operand:PRED_ALL 1 "register_operand")
-           (match_operand:PRED_ALL 2 "register_operand"))
-         (match_dup 3)))]
-  "TARGET_SVE"
-  {
-    operands[3] = aarch64_ptrue_reg (<MODE>mode);
-  }
-)
-
-;; Predicated predicate ORR and XOR.
-(define_insn "pred_<optab><mode>3"
-  [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
-       (and:PRED_ALL
-         (LOGICAL:PRED_ALL
-           (match_operand:PRED_ALL 2 "register_operand" "Upa")
-           (match_operand:PRED_ALL 3 "register_operand" "Upa"))
-         (match_operand:PRED_ALL 1 "register_operand" "Upa")))]
-  "TARGET_SVE"
-  "<logical>\t%0.b, %1/z, %2.b, %3.b"
-)
-
-;; Perform a logical operation on operands 2 and 3, using operand 1 as
-;; the GP (which is known to be a PTRUE).  Store the result in operand 0
-;; and set the flags in the same way as for PTEST.  The (and ...) in the
-;; UNSPEC_PTEST_PTRUE is logically redundant, but means that the tested
-;; value is structurally equivalent to rhs of the second set.
-(define_insn "*<optab><mode>3_cc"
-  [(set (reg:CC_NZC CC_REGNUM)
-       (unspec:CC_NZC
-         [(match_operand:PRED_ALL 1 "register_operand" "Upa")
-          (and:PRED_ALL
-            (LOGICAL:PRED_ALL
-              (match_operand:PRED_ALL 2 "register_operand" "Upa")
-              (match_operand:PRED_ALL 3 "register_operand" "Upa"))
-            (match_dup 1))]
-         UNSPEC_PTEST_PTRUE))
-   (set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
-       (and:PRED_ALL (LOGICAL:PRED_ALL (match_dup 2) (match_dup 3))
-                     (match_dup 1)))]
-  "TARGET_SVE"
-  "<logical>s\t%0.b, %1/z, %2.b, %3.b"
-)
+;; -------------------------------------------------------------------------
+;; ---- [INT] Shifts
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - ASR
+;; - LSL
+;; - LSR
+;; -------------------------------------------------------------------------
  
-;; Unpredicated predicate inverse.
-(define_expand "one_cmpl<mode>2"
-  [(set (match_operand:PRED_ALL 0 "register_operand")
-       (and:PRED_ALL
-         (not:PRED_ALL (match_operand:PRED_ALL 1 "register_operand"))
-         (match_dup 2)))]
+;; Unpredicated shift by a scalar, which expands into one of the vector
+;; shifts below.
+(define_expand "<ASHIFT:optab><mode>3"
+  [(set (match_operand:SVE_I 0 "register_operand")
+       (ASHIFT:SVE_I (match_operand:SVE_I 1 "register_operand")
+                     (match_operand:<VEL> 2 "general_operand")))]
    "TARGET_SVE"
    {
-    operands[2] = aarch64_ptrue_reg (<MODE>mode);
+    rtx amount;
+    if (CONST_INT_P (operands[2]))
+      {
+       amount = gen_const_vec_duplicate (<MODE>mode, operands[2]);
+       if (!aarch64_sve_<lr>shift_operand (operands[2], <MODE>mode))
+         amount = force_reg (<MODE>mode, amount);
+      }
+    else
+      {
+       amount = gen_reg_rtx (<MODE>mode);
+       emit_insn (gen_vec_duplicate<mode> (amount,
+                                           convert_to_mode (<VEL>mode,
+                                                            operands[2], 0)));
+      }
+    emit_insn (gen_v<optab><mode>3 (operands[0], operands[1], amount));
+    DONE;
    }
  )
  
-;; Predicated predicate inverse.
-(define_insn "*one_cmpl<mode>3"
-  [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
-       (and:PRED_ALL
-         (not:PRED_ALL (match_operand:PRED_ALL 2 "register_operand" "Upa"))
-         (match_operand:PRED_ALL 1 "register_operand" "Upa")))]
-  "TARGET_SVE"
-  "not\t%0.b, %1/z, %2.b"
-)
-
-;; Predicated predicate BIC and ORN.
-(define_insn "*<nlogical><mode>3"
-  [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
-       (and:PRED_ALL
-         (NLOGICAL:PRED_ALL
-           (not:PRED_ALL (match_operand:PRED_ALL 2 "register_operand" "Upa"))
-           (match_operand:PRED_ALL 3 "register_operand" "Upa"))
-         (match_operand:PRED_ALL 1 "register_operand" "Upa")))]
-  "TARGET_SVE"
-  "<nlogical>\t%0.b, %1/z, %3.b, %2.b"
-)
-
-;; Predicated predicate NAND and NOR.
-(define_insn "*<logical_nn><mode>3"
-  [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
-       (and:PRED_ALL
-         (NLOGICAL:PRED_ALL
-           (not:PRED_ALL (match_operand:PRED_ALL 2 "register_operand" "Upa"))
-           (not:PRED_ALL (match_operand:PRED_ALL 3 "register_operand" "Upa")))
-         (match_operand:PRED_ALL 1 "register_operand" "Upa")))]
-  "TARGET_SVE"
-  "<logical_nn>\t%0.b, %1/z, %2.b, %3.b"
-)
-
-;; Unpredicated LSL, LSR and ASR by a vector.
+;; Unpredicated shift by a vector.
  (define_expand "v<optab><mode>3"
    [(set (match_operand:SVE_I 0 "register_operand")
         (unspec:SVE_I
@@ -1250,10 +1741,10 @@
    }
  )
  
-;; LSL, LSR and ASR by a vector, predicated with a PTRUE.  We don't
-;; actually need the predicate for the first alternative, but using Upa
-;; or X isn't likely to gain much and would make the instruction seem
-;; less uniform to the register allocator.
+;; Shift by a vector, predicated with a PTRUE.  We don't actually need
+;; the predicate for the first alternative, but using Upa or X isn't
+;; likely to gain much and would make the instruction seem less uniform
+;; to the register allocator.
  (define_insn_and_split "*v<optab><mode>3"
    [(set (match_operand:SVE_I 0 "register_operand" "=w, w, ?&w")
         (unspec:SVE_I
@@ -1286,498 +1777,456 @@
    "<shift>\t%0.<Vetype>, %1.<Vetype>, #%2"
  )
  
-;; LSL, LSR and ASR by a scalar, which expands into one of the vector
-;; shifts above.
-(define_expand "<ASHIFT:optab><mode>3"
+;; -------------------------------------------------------------------------
+;; ---- [INT] Maximum and minimum
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - SMAX
+;; - SMIN
+;; - UMAX
+;; - UMIN
+;; -------------------------------------------------------------------------
+
+;; Unpredicated integer MAX/MIN.
+(define_expand "<su><maxmin><mode>3"
    [(set (match_operand:SVE_I 0 "register_operand")
-       (ASHIFT:SVE_I (match_operand:SVE_I 1 "register_operand")
-                     (match_operand:<VEL> 2 "general_operand")))]
+       (unspec:SVE_I
+         [(match_dup 3)
+          (MAXMIN:SVE_I (match_operand:SVE_I 1 "register_operand")
+                        (match_operand:SVE_I 2 "register_operand"))]
+         UNSPEC_MERGE_PTRUE))]
    "TARGET_SVE"
    {
-    rtx amount;
-    if (CONST_INT_P (operands[2]))
-      {
-       amount = gen_const_vec_duplicate (<MODE>mode, operands[2]);
-       if (!aarch64_sve_<lr>shift_operand (operands[2], <MODE>mode))
-         amount = force_reg (<MODE>mode, amount);
-      }
-    else
-      {
-       amount = gen_reg_rtx (<MODE>mode);
-       emit_insn (gen_vec_duplicate<mode> (amount,
-                                           convert_to_mode (<VEL>mode,
-                                                            operands[2], 0)));
-      }
-    emit_insn (gen_v<optab><mode>3 (operands[0], operands[1], amount));
-    DONE;
+    operands[3] = aarch64_ptrue_reg (<VPRED>mode);
    }
  )
  
-;; Test all bits of operand 1.  Operand 0 is a GP that is known to hold PTRUE.
-;;
-;; Using UNSPEC_PTEST_PTRUE allows combine patterns to assume that the GP
-;; is a PTRUE even if the optimizers haven't yet been able to propagate
-;; the constant.  We would use a separate unspec code for PTESTs involving
-;; GPs that might not be PTRUEs.
-(define_insn "ptest_ptrue<mode>"
-  [(set (reg:CC_NZC CC_REGNUM)
-       (unspec:CC_NZC
-         [(match_operand:PRED_ALL 0 "register_operand" "Upa")
-          (match_operand:PRED_ALL 1 "register_operand" "Upa")]
-         UNSPEC_PTEST_PTRUE))]
+;; Integer MAX/MIN predicated with a PTRUE.
+(define_insn "*<su><maxmin><mode>3"
+  [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
+       (unspec:SVE_I
+         [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+          (MAXMIN:SVE_I (match_operand:SVE_I 2 "register_operand" "%0, w")
+                        (match_operand:SVE_I 3 "register_operand" "w, w"))]
+         UNSPEC_MERGE_PTRUE))]
    "TARGET_SVE"
-  "ptest\t%0, %1.b"
+  "@
+   <su><maxmin>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+   movprfx\t%0, %2\;<su><maxmin>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
+  [(set_attr "movprfx" "*,yes")]
  )
  
-;; Set element I of the result if operand1 + J < operand2 for all J in [0, I].
-;; with the comparison being unsigned.
-(define_insn "while_ult<GPI:mode><PRED_ALL:mode>"
-  [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
-       (unspec:PRED_ALL [(match_operand:GPI 1 "aarch64_reg_or_zero" "rZ")
-                         (match_operand:GPI 2 "aarch64_reg_or_zero" "rZ")]
-                        UNSPEC_WHILE_LO))
-   (clobber (reg:CC_NZC CC_REGNUM))]
-  "TARGET_SVE"
-  "whilelo\t%0.<PRED_ALL:Vetype>, %<w>1, %<w>2"
-)
+;; Merging forms are handled through SVE_INT_BINARY.
  
-;; WHILELO sets the flags in the same way as a PTEST with a PTRUE GP.
-;; Handle the case in which both results are useful.  The GP operand
-;; to the PTEST isn't needed, so we allow it to be anything.
-(define_insn_and_rewrite "*while_ult<GPI:mode><PRED_ALL:mode>_cc"
-  [(set (reg:CC_NZC CC_REGNUM)
-       (unspec:CC_NZC
-         [(match_operand:PRED_ALL 1)
-          (unspec:PRED_ALL
-            [(match_operand:GPI 2 "aarch64_reg_or_zero" "rZ")
-             (match_operand:GPI 3 "aarch64_reg_or_zero" "rZ")]
-            UNSPEC_WHILE_LO)]
-         UNSPEC_PTEST_PTRUE))
-   (set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
-       (unspec:PRED_ALL [(match_dup 2)
-                         (match_dup 3)]
-                        UNSPEC_WHILE_LO))]
-  "TARGET_SVE"
-  "whilelo\t%0.<PRED_ALL:Vetype>, %<w>2, %<w>3"
-  ;; Force the compiler to drop the unused predicate operand, so that we
-  ;; don't have an unnecessary PTRUE.
-  "&& !CONSTANT_P (operands[1])"
-  {
-    operands[1] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
-  }
-)
+;; -------------------------------------------------------------------------
+;; ---- [FP] General binary arithmetic corresponding to rtx codes
+;; -------------------------------------------------------------------------
+;; Includes post-RA forms of:
+;; - FADD
+;; - FMUL
+;; - FSUB
+;; -------------------------------------------------------------------------
  
-;; Integer comparisons predicated with a PTRUE.
-(define_insn "*cmp<cmp_op><mode>"
-  [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
-       (unspec:<VPRED>
-         [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
-          (SVE_INT_CMP:<VPRED>
-            (match_operand:SVE_I 2 "register_operand" "w, w")
-            (match_operand:SVE_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))]
-         UNSPEC_MERGE_PTRUE))
-   (clobber (reg:CC_NZC CC_REGNUM))]
+;; Unpredicated floating-point binary operations (post-RA only).
+;; These are generated by splitting a predicated instruction whose
+;; predicate is unused.
+(define_insn "*post_ra_<sve_fp_op><mode>3"
+  [(set (match_operand:SVE_F 0 "register_operand" "=w")
+       (SVE_UNPRED_FP_BINARY:SVE_F
+         (match_operand:SVE_F 1 "register_operand" "w")
+         (match_operand:SVE_F 2 "register_operand" "w")))]
+  "TARGET_SVE && reload_completed"
+  "<sve_fp_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>")
+
+;; -------------------------------------------------------------------------
+;; ---- [FP] General binary arithmetic corresponding to unspecs
+;; -------------------------------------------------------------------------
+;; Includes merging forms of:
+;; - FADD
+;; - FDIV
+;; - FDIVR
+;; - FMAXNM
+;; - FMINNM
+;; - FMUL
+;; - FSUB
+;; - FSUBR
+;; -------------------------------------------------------------------------
+
+;; Predicated floating-point operations with merging.
+(define_expand "cond_<optab><mode>"
+  [(set (match_operand:SVE_F 0 "register_operand")
+       (unspec:SVE_F
+         [(match_operand:<VPRED> 1 "register_operand")
+          (unspec:SVE_F
+            [(match_operand:SVE_F 2 "register_operand")
+             (match_operand:SVE_F 3 "register_operand")]
+            SVE_COND_FP_BINARY)
+          (match_operand:SVE_F 4 "aarch64_simd_reg_or_zero")]
+         UNSPEC_SEL))]
    "TARGET_SVE"
-  "@
-   cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #%3
-   cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
  )
  
-;; Integer comparisons predicated with a PTRUE in which only the flags result
-;; is interesting.
-(define_insn "*cmp<cmp_op><mode>_ptest"
-  [(set (reg:CC_NZC CC_REGNUM)
-       (unspec:CC_NZC
+;; Predicated floating-point operations, merging with the first input.
+(define_insn "*cond_<optab><mode>_2"
+  [(set (match_operand:SVE_F 0 "register_operand" "=w, ?&w")
+       (unspec:SVE_F
           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
-          (unspec:<VPRED>
-            [(match_dup 1)
-             (SVE_INT_CMP:<VPRED>
-               (match_operand:SVE_I 2 "register_operand" "w, w")
-               (match_operand:SVE_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))]
-            UNSPEC_MERGE_PTRUE)]
-         UNSPEC_PTEST_PTRUE))
-   (clobber (match_scratch:<VPRED> 0 "=Upa, Upa"))]
+          (unspec:SVE_F
+            [(match_operand:SVE_F 2 "register_operand" "0, w")
+             (match_operand:SVE_F 3 "register_operand" "w, w")]
+            SVE_COND_FP_BINARY)
+          (match_dup 2)]
+         UNSPEC_SEL))]
    "TARGET_SVE"
    "@
-   cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #%3
-   cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
+   <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+   movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
+  [(set_attr "movprfx" "*,yes")]
  )
  
-;; Integer comparisons predicated with a PTRUE in which both the flag and
-;; predicate results are interesting.
-(define_insn "*cmp<cmp_op><mode>_cc"
-  [(set (reg:CC_NZC CC_REGNUM)
-       (unspec:CC_NZC
+;; Predicated floating-point operations, merging with the second input.
+(define_insn "*cond_<optab><mode>_3"
+  [(set (match_operand:SVE_F 0 "register_operand" "=w, ?&w")
+       (unspec:SVE_F
           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
-          (unspec:<VPRED>
-            [(match_dup 1)
-             (SVE_INT_CMP:<VPRED>
-               (match_operand:SVE_I 2 "register_operand" "w, w")
-               (match_operand:SVE_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))]
-            UNSPEC_MERGE_PTRUE)]
-         UNSPEC_PTEST_PTRUE))
-   (set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
-       (unspec:<VPRED>
-         [(match_dup 1)
-          (SVE_INT_CMP:<VPRED>
-            (match_dup 2)
-            (match_dup 3))]
-         UNSPEC_MERGE_PTRUE))]
+          (unspec:SVE_F
+            [(match_operand:SVE_F 2 "register_operand" "w, w")
+             (match_operand:SVE_F 3 "register_operand" "0, w")]
+            SVE_COND_FP_BINARY)
+          (match_dup 3)]
+         UNSPEC_SEL))]
    "TARGET_SVE"
    "@
-   cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #%3
-   cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
-)
-
-;; Predicated integer comparisons, formed by combining a PTRUE-predicated
-;; comparison with an AND.  Split the instruction into its preferred form
-;; (below) at the earliest opportunity, in order to get rid of the
-;; redundant operand 1.
-(define_insn_and_split "*pred_cmp<cmp_op><mode>_combine"
-  [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
-       (and:<VPRED>
-        (unspec:<VPRED>
-          [(match_operand:<VPRED> 1)
-           (SVE_INT_CMP:<VPRED>
-             (match_operand:SVE_I 2 "register_operand" "w, w")
-             (match_operand:SVE_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))]
-          UNSPEC_MERGE_PTRUE)
-        (match_operand:<VPRED> 4 "register_operand" "Upl, Upl")))
-   (clobber (reg:CC_NZC CC_REGNUM))]
-  "TARGET_SVE"
-  "#"
-  "&& 1"
-  [(parallel
-     [(set (match_dup 0)
-         (and:<VPRED>
-           (SVE_INT_CMP:<VPRED>
-             (match_dup 2)
-             (match_dup 3))
-           (match_dup 4)))
-      (clobber (reg:CC_NZC CC_REGNUM))])]
+   <sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
+   movprfx\t%0, %3\;<sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
+  [(set_attr "movprfx" "*,yes")]
  )
  
-;; Predicated integer comparisons.
-(define_insn "*pred_cmp<cmp_op><mode>"
-  [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
-       (and:<VPRED>
-         (SVE_INT_CMP:<VPRED>
-           (match_operand:SVE_I 2 "register_operand" "w, w")
-           (match_operand:SVE_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))
-         (match_operand:<VPRED> 1 "register_operand" "Upl, Upl")))
-   (clobber (reg:CC_NZC CC_REGNUM))]
-  "TARGET_SVE"
+;; Predicated floating-point operations, merging with an independent value.
+(define_insn_and_rewrite "*cond_<optab><mode>_any"
+  [(set (match_operand:SVE_F 0 "register_operand" "=&w, &w, &w, &w, ?&w")
+       (unspec:SVE_F
+         [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl")
+          (unspec:SVE_F
+            [(match_operand:SVE_F 2 "register_operand" "0, w, w, w, w")
+             (match_operand:SVE_F 3 "register_operand" "w, 0, w, w, w")]
+            SVE_COND_FP_BINARY)
+          (match_operand:SVE_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")]
+         UNSPEC_SEL))]
+  "TARGET_SVE
+   && !rtx_equal_p (operands[2], operands[4])
+   && !rtx_equal_p (operands[3], operands[4])"
    "@
-   cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #%3
-   cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
+   movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+   movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
+   movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+   movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+   #"
+  "&& reload_completed
+   && register_operand (operands[4], <MODE>mode)
+   && !rtx_equal_p (operands[0], operands[4])"
+  {
+    emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
+                                            operands[4], operands[1]));
+    operands[4] = operands[2] = operands[0];
+  }
+  [(set_attr "movprfx" "yes")]
  )
  
-;; Floating-point comparisons predicated with a PTRUE.
-(define_insn "*fcm<cmp_op><mode>"
-  [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
-       (unspec:<VPRED>
-         [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
-          (SVE_FP_CMP:<VPRED>
-            (match_operand:SVE_F 2 "register_operand" "w, w")
-            (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero" "Dz, w"))]
-         UNSPEC_MERGE_PTRUE))]
-  "TARGET_SVE"
-  "@
-   fcm<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #0.0
-   fcm<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
-)
+;; -------------------------------------------------------------------------
+;; ---- [FP] Addition
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - FADD
+;; - FSUB
+;; -------------------------------------------------------------------------
  
-(define_insn "*fcmuo<mode>"
-  [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
-       (unspec:<VPRED>
-         [(match_operand:<VPRED> 1 "register_operand" "Upl")
-          (unordered:<VPRED>
-            (match_operand:SVE_F 2 "register_operand" "w")
-            (match_operand:SVE_F 3 "register_operand" "w"))]
+;; Unpredicated floating-point addition.
+(define_expand "add<mode>3"
+  [(set (match_operand:SVE_F 0 "register_operand")
+       (unspec:SVE_F
+         [(match_dup 3)
+          (plus:SVE_F
+            (match_operand:SVE_F 1 "register_operand")
+            (match_operand:SVE_F 2 "aarch64_sve_float_arith_with_sub_operand"))]
           UNSPEC_MERGE_PTRUE))]
    "TARGET_SVE"
-  "fcmuo\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
+  {
+    operands[3] = aarch64_ptrue_reg (<VPRED>mode);
+  }
  )
  
-;; Floating-point comparisons predicated on a PTRUE, with the results ANDed
-;; with another predicate P.  This does not have the same trapping behavior
-;; as predicating the comparison itself on P, but it's a legitimate fold,
-;; since we can drop any potentially-trapping operations whose results
-;; are not needed.
-;;
-;; Split the instruction into its preferred form (below) at the earliest
-;; opportunity, in order to get rid of the redundant operand 1.
-(define_insn_and_split "*fcm<cmp_op><mode>_and_combine"
-  [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
-       (and:<VPRED>
-         (unspec:<VPRED>
-           [(match_operand:<VPRED> 1)
-            (SVE_FP_CMP
-              (match_operand:SVE_F 2 "register_operand" "w, w")
-              (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero" "Dz, w"))]
-           UNSPEC_MERGE_PTRUE)
-         (match_operand:<VPRED> 4 "register_operand" "Upl, Upl")))]
+;; Floating-point addition predicated with a PTRUE.
+(define_insn_and_split "*add<mode>3"
+  [(set (match_operand:SVE_F 0 "register_operand" "=w, w, w")
+       (unspec:SVE_F
+         [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
+          (plus:SVE_F
+             (match_operand:SVE_F 2 "register_operand" "%0, 0, w")
+             (match_operand:SVE_F 3 "aarch64_sve_float_arith_with_sub_operand" "vsA, vsN, w"))]
+         UNSPEC_MERGE_PTRUE))]
    "TARGET_SVE"
-  "#"
-  "&& 1"
-  [(set (match_dup 0)
-       (and:<VPRED>
-         (SVE_FP_CMP:<VPRED>
-           (match_dup 2)
-           (match_dup 3))
-         (match_dup 4)))]
+  "@
+   fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
+   fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
+   #"
+  ; Split the unpredicated form after reload, so that we don't have
+  ; the unnecessary PTRUE.
+  "&& reload_completed
+   && register_operand (operands[3], <MODE>mode)"
+  [(set (match_dup 0) (plus:SVE_F (match_dup 2) (match_dup 3)))]
  )
  
-(define_insn_and_split "*fcmuo<mode>_and_combine"
-  [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
-       (and:<VPRED>
-         (unspec:<VPRED>
-           [(match_operand:<VPRED> 1)
-            (unordered
-              (match_operand:SVE_F 2 "register_operand" "w")
-              (match_operand:SVE_F 3 "register_operand" "w"))]
-           UNSPEC_MERGE_PTRUE)
-         (match_operand:<VPRED> 4 "register_operand" "Upl")))]
-  "TARGET_SVE"
-  "#"
-  "&& 1"
-  [(set (match_dup 0)
-       (and:<VPRED>
-         (unordered:<VPRED>
-           (match_dup 2)
-           (match_dup 3))
-         (match_dup 4)))]
-)
+;; Merging forms are handled through SVE_COND_FP_BINARY.
  
-;; Unpredicated floating-point comparisons, with the results ANDed
-;; with another predicate.  This is a valid fold for the same reasons
-;; as above.
-(define_insn "*fcm<cmp_op><mode>_and"
-  [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
-       (and:<VPRED>
-         (SVE_FP_CMP:<VPRED>
-           (match_operand:SVE_F 2 "register_operand" "w, w")
-           (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero" "Dz, w"))
-         (match_operand:<VPRED> 1 "register_operand" "Upl, Upl")))]
-  "TARGET_SVE"
-  "@
-   fcm<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #0.0
-   fcm<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
-)
+;; -------------------------------------------------------------------------
+;; ---- [FP] Subtraction
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - FADD
+;; - FSUB
+;; - FSUBR
+;; -------------------------------------------------------------------------
  
-(define_insn "*fcmuo<mode>_and"
-  [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
-       (and:<VPRED>
-         (unordered:<VPRED>
-           (match_operand:SVE_F 2 "register_operand" "w")
-           (match_operand:SVE_F 3 "register_operand" "w"))
-         (match_operand:<VPRED> 1 "register_operand" "Upl")))]
+;; Unpredicated floating-point subtraction.
+(define_expand "sub<mode>3"
+  [(set (match_operand:SVE_F 0 "register_operand")
+       (unspec:SVE_F
+         [(match_dup 3)
+          (minus:SVE_F
+            (match_operand:SVE_F 1 "aarch64_sve_float_arith_operand")
+            (match_operand:SVE_F 2 "register_operand"))]
+         UNSPEC_MERGE_PTRUE))]
    "TARGET_SVE"
-  "fcmuo\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
+  {
+    operands[3] = aarch64_ptrue_reg (<VPRED>mode);
+  }
  )
  
-;; Predicated floating-point comparisons.  We don't need a version
-;; of this for unordered comparisons.
-(define_insn "*pred_fcm<cmp_op><mode>"
-  [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
-       (unspec:<VPRED>
-         [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
-          (match_operand:SVE_F 2 "register_operand" "w, w")
-          (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero" "Dz, w")]
-         SVE_COND_FP_CMP))]
-  "TARGET_SVE"
+;; Floating-point subtraction predicated with a PTRUE.
+(define_insn_and_split "*sub<mode>3"
+  [(set (match_operand:SVE_F 0 "register_operand" "=w, w, w, w")
+       (unspec:SVE_F
+         [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
+          (minus:SVE_F
+            (match_operand:SVE_F 2 "aarch64_sve_float_arith_operand" "0, 0, vsA, w")
+            (match_operand:SVE_F 3 "aarch64_sve_float_arith_with_sub_operand" "vsA, vsN, 0, w"))]
+         UNSPEC_MERGE_PTRUE))]
+  "TARGET_SVE
+   && (register_operand (operands[2], <MODE>mode)
+       || register_operand (operands[3], <MODE>mode))"
    "@
-   fcm<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #0.0
-   fcm<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
+   fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
+   fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
+   fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2
+   #"
+  ; Split the unpredicated form after reload, so that we don't have
+  ; the unnecessary PTRUE.
+  "&& reload_completed
+   && register_operand (operands[2], <MODE>mode)
+   && register_operand (operands[3], <MODE>mode)"
+  [(set (match_dup 0) (minus:SVE_F (match_dup 2) (match_dup 3)))]
  )
  
-;; vcond_mask operand order: true, false, mask
-;; UNSPEC_SEL operand order: mask, true, false (as for VEC_COND_EXPR)
-;; SEL operand order:        mask, true, false
-(define_insn "vcond_mask_<mode><vpred>"
-  [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
-       (unspec:SVE_ALL
-         [(match_operand:<VPRED> 3 "register_operand" "Upa")
-          (match_operand:SVE_ALL 1 "register_operand" "w")
-          (match_operand:SVE_ALL 2 "register_operand" "w")]
-         UNSPEC_SEL))]
-  "TARGET_SVE"
-  "sel\t%0.<Vetype>, %3, %1.<Vetype>, %2.<Vetype>"
-)
+;; Merging forms are handled through SVE_COND_FP_BINARY.
  
-;; Selects between a duplicated immediate and zero.
-(define_insn "aarch64_sve_dup<mode>_const"
-  [(set (match_operand:SVE_I 0 "register_operand" "=w")
-       (unspec:SVE_I
+;; -------------------------------------------------------------------------
+;; ---- [FP] Absolute difference
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - FABD
+;; -------------------------------------------------------------------------
+
+;; Predicated floating-point absolute difference.
+(define_insn "*fabd<mode>3"
+  [(set (match_operand:SVE_F 0 "register_operand" "=w")
+       (unspec:SVE_F
           [(match_operand:<VPRED> 1 "register_operand" "Upl")
-          (match_operand:SVE_I 2 "aarch64_sve_dup_immediate")
-          (match_operand:SVE_I 3 "aarch64_simd_imm_zero")]
-         UNSPEC_SEL))]
+          (abs:SVE_F
+            (minus:SVE_F
+              (match_operand:SVE_F 2 "register_operand" "0")
+              (match_operand:SVE_F 3 "register_operand" "w")))]
+       UNSPEC_MERGE_PTRUE))]
    "TARGET_SVE"
-  "mov\t%0.<Vetype>, %1/z, #%2"
+  "fabd\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
  )
  
-;; Integer (signed) vcond.  Don't enforce an immediate range here, since it
-;; depends on the comparison; leave it to aarch64_expand_sve_vcond instead.
-(define_expand "vcond<mode><v_int_equiv>"
-  [(set (match_operand:SVE_ALL 0 "register_operand")
-       (if_then_else:SVE_ALL
-         (match_operator 3 "comparison_operator"
-           [(match_operand:<V_INT_EQUIV> 4 "register_operand")
-            (match_operand:<V_INT_EQUIV> 5 "nonmemory_operand")])
-         (match_operand:SVE_ALL 1 "register_operand")
-         (match_operand:SVE_ALL 2 "register_operand")))]
+;; -------------------------------------------------------------------------
+;; ---- [FP] Multiplication
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - FMUL
+;; -------------------------------------------------------------------------
+
+;; Unpredicated floating-point multiplication.
+(define_expand "mul<mode>3"
+  [(set (match_operand:SVE_F 0 "register_operand")
+       (unspec:SVE_F
+         [(match_dup 3)
+          (mult:SVE_F
+            (match_operand:SVE_F 1 "register_operand")
+            (match_operand:SVE_F 2 "aarch64_sve_float_mul_operand"))]
+         UNSPEC_MERGE_PTRUE))]
    "TARGET_SVE"
    {
-    aarch64_expand_sve_vcond (<MODE>mode, <V_INT_EQUIV>mode, operands);
-    DONE;
+    operands[3] = aarch64_ptrue_reg (<VPRED>mode);
    }
  )
  
-;; Integer vcondu.  Don't enforce an immediate range here, since it
-;; depends on the comparison; leave it to aarch64_expand_sve_vcond instead.
-(define_expand "vcondu<mode><v_int_equiv>"
-  [(set (match_operand:SVE_ALL 0 "register_operand")
-       (if_then_else:SVE_ALL
-         (match_operator 3 "comparison_operator"
-           [(match_operand:<V_INT_EQUIV> 4 "register_operand")
-            (match_operand:<V_INT_EQUIV> 5 "nonmemory_operand")])
-         (match_operand:SVE_ALL 1 "register_operand")
-         (match_operand:SVE_ALL 2 "register_operand")))]
+;; Floating-point multiplication predicated with a PTRUE.
+(define_insn_and_split "*mul<mode>3"
+  [(set (match_operand:SVE_F 0 "register_operand" "=w, w")
+       (unspec:SVE_F
+         [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+          (mult:SVE_F
+            (match_operand:SVE_F 2 "register_operand" "%0, w")
+            (match_operand:SVE_F 3 "aarch64_sve_float_mul_operand" "vsM, w"))]
+         UNSPEC_MERGE_PTRUE))]
    "TARGET_SVE"
-  {
-    aarch64_expand_sve_vcond (<MODE>mode, <V_INT_EQUIV>mode, operands);
-    DONE;
-  }
+  "@
+   fmul\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
+   #"
+  ; Split the unpredicated form after reload, so that we don't have
+  ; the unnecessary PTRUE.
+  "&& reload_completed
+   && register_operand (operands[3], <MODE>mode)"
+  [(set (match_dup 0) (mult:SVE_F (match_dup 2) (match_dup 3)))]
  )
  
-;; Floating-point vcond.  All comparisons except FCMUO allow a zero
-;; operand; aarch64_expand_sve_vcond handles the case of an FCMUO
-;; with zero.
-(define_expand "vcond<mode><v_fp_equiv>"
-  [(set (match_operand:SVE_SD 0 "register_operand")
-       (if_then_else:SVE_SD
-         (match_operator 3 "comparison_operator"
-           [(match_operand:<V_FP_EQUIV> 4 "register_operand")
-            (match_operand:<V_FP_EQUIV> 5 "aarch64_simd_reg_or_zero")])
-         (match_operand:SVE_SD 1 "register_operand")
-         (match_operand:SVE_SD 2 "register_operand")))]
+;; Merging forms are handled through SVE_COND_FP_BINARY.
+
+;; -------------------------------------------------------------------------
+;; ---- [FP] Division
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - FDIV
+;; - FDIVR
+;; -------------------------------------------------------------------------
+
+;; Unpredicated floating-point division.
+(define_expand "div<mode>3"
+  [(set (match_operand:SVE_F 0 "register_operand")
+       (unspec:SVE_F
+         [(match_dup 3)
+          (div:SVE_F (match_operand:SVE_F 1 "register_operand")
+                     (match_operand:SVE_F 2 "register_operand"))]
+         UNSPEC_MERGE_PTRUE))]
    "TARGET_SVE"
    {
-    aarch64_expand_sve_vcond (<MODE>mode, <V_FP_EQUIV>mode, operands);
-    DONE;
+    operands[3] = aarch64_ptrue_reg (<VPRED>mode);
    }
  )
  
-;; Signed integer comparisons.  Don't enforce an immediate range here, since
-;; it depends on the comparison; leave it to aarch64_expand_sve_vec_cmp_int
-;; instead.
-(define_expand "vec_cmp<mode><vpred>"
-  [(parallel
-    [(set (match_operand:<VPRED> 0 "register_operand")
-         (match_operator:<VPRED> 1 "comparison_operator"
-           [(match_operand:SVE_I 2 "register_operand")
-            (match_operand:SVE_I 3 "nonmemory_operand")]))
-     (clobber (reg:CC_NZC CC_REGNUM))])]
+;; Floating-point division predicated with a PTRUE.
+(define_insn "*div<mode>3"
+  [(set (match_operand:SVE_F 0 "register_operand" "=w, w, ?&w")
+       (unspec:SVE_F
+         [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
+          (div:SVE_F (match_operand:SVE_F 2 "register_operand" "0, w, w")
+                     (match_operand:SVE_F 3 "register_operand" "w, 0, w"))]
+         UNSPEC_MERGE_PTRUE))]
    "TARGET_SVE"
-  {
-    aarch64_expand_sve_vec_cmp_int (operands[0], GET_CODE (operands[1]),
-                                   operands[2], operands[3]);
-    DONE;
-  }
+  "@
+   fdiv\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+   fdivr\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
+   movprfx\t%0, %2\;fdiv\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
+  [(set_attr "movprfx" "*,*,yes")]
  )
  
-;; Unsigned integer comparisons.  Don't enforce an immediate range here, since
-;; it depends on the comparison; leave it to aarch64_expand_sve_vec_cmp_int
-;; instead.
-(define_expand "vec_cmpu<mode><vpred>"
-  [(parallel
-    [(set (match_operand:<VPRED> 0 "register_operand")
-         (match_operator:<VPRED> 1 "comparison_operator"
-           [(match_operand:SVE_I 2 "register_operand")
-            (match_operand:SVE_I 3 "nonmemory_operand")]))
-     (clobber (reg:CC_NZC CC_REGNUM))])]
+;; Merging forms are handled through SVE_COND_FP_BINARY.
+
+;; -------------------------------------------------------------------------
+;; ---- [FP] Binary logical operations
+;; -------------------------------------------------------------------------
+;; Includes
+;; - AND
+;; - EOR
+;; - ORR
+;; -------------------------------------------------------------------------
+
+;; Binary logical operations on floating-point modes.  We avoid subregs
+;; by providing this, but we need to use UNSPECs since rtx logical ops
+;; aren't defined for floating-point modes.
+(define_insn "*<optab><mode>3"
+  [(set (match_operand:SVE_F 0 "register_operand" "=w")
+       (unspec:SVE_F [(match_operand:SVE_F 1 "register_operand" "w")
+                      (match_operand:SVE_F 2 "register_operand" "w")]
+                     LOGICALF))]
    "TARGET_SVE"
-  {
-    aarch64_expand_sve_vec_cmp_int (operands[0], GET_CODE (operands[1]),
-                                   operands[2], operands[3]);
-    DONE;
-  }
+  "<logicalf_op>\t%0.d, %1.d, %2.d"
  )
  
-;; Floating-point comparisons.  All comparisons except FCMUO allow a zero
-;; operand; aarch64_expand_sve_vec_cmp_float handles the case of an FCMUO
-;; with zero.
-(define_expand "vec_cmp<mode><vpred>"
-  [(set (match_operand:<VPRED> 0 "register_operand")
-       (match_operator:<VPRED> 1 "comparison_operator"
-         [(match_operand:SVE_F 2 "register_operand")
-          (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero")]))]
+;; -------------------------------------------------------------------------
+;; ---- [FP] Sign copying
+;; -------------------------------------------------------------------------
+;; The patterns in this section are synthetic.
+;; -------------------------------------------------------------------------
+
+(define_expand "copysign<mode>3"
+  [(match_operand:SVE_F 0 "register_operand")
+   (match_operand:SVE_F 1 "register_operand")
+   (match_operand:SVE_F 2 "register_operand")]
    "TARGET_SVE"
    {
-    aarch64_expand_sve_vec_cmp_float (operands[0], GET_CODE (operands[1]),
-                                     operands[2], operands[3], false);
-    DONE;
-  }
-)
+    rtx sign = gen_reg_rtx (<V_INT_EQUIV>mode);
+    rtx mant = gen_reg_rtx (<V_INT_EQUIV>mode);
+    rtx int_res = gen_reg_rtx (<V_INT_EQUIV>mode);
+    int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
  
-;; Branch based on predicate equality or inequality.
-(define_expand "cbranch<mode>4"
-  [(set (pc)
-       (if_then_else
-         (match_operator 0 "aarch64_equality_operator"
-           [(match_operand:PRED_ALL 1 "register_operand")
-            (match_operand:PRED_ALL 2 "aarch64_simd_reg_or_zero")])
-         (label_ref (match_operand 3 ""))
-         (pc)))]
-  ""
-  {
-    rtx ptrue = aarch64_ptrue_reg (<MODE>mode);
-    rtx pred;
-    if (operands[2] == CONST0_RTX (<MODE>mode))
-      pred = operands[1];
-    else
-      {
-       pred = gen_reg_rtx (<MODE>mode);
-       emit_insn (gen_pred_xor<mode>3 (pred, ptrue, operands[1],
-                                       operands[2]));
-      }
-    emit_insn (gen_ptest_ptrue<mode> (ptrue, pred));
-    operands[1] = gen_rtx_REG (CC_NZCmode, CC_REGNUM);
-    operands[2] = const0_rtx;
+    rtx arg1 = lowpart_subreg (<V_INT_EQUIV>mode, operands[1], <MODE>mode);
+    rtx arg2 = lowpart_subreg (<V_INT_EQUIV>mode, operands[2], <MODE>mode);
+
+    emit_insn (gen_and<v_int_equiv>3
+              (sign, arg2,
+               aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
+                                                  HOST_WIDE_INT_M1U
+                                                  << bits)));
+    emit_insn (gen_and<v_int_equiv>3
+              (mant, arg1,
+               aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
+                                                  ~(HOST_WIDE_INT_M1U
+                                                    << bits))));
+    emit_insn (gen_ior<v_int_equiv>3 (int_res, sign, mant));
+    emit_move_insn (operands[0], gen_lowpart (<MODE>mode, int_res));
+    DONE;
    }
  )
  
-;; Unpredicated integer MIN/MAX.
-(define_expand "<su><maxmin><mode>3"
-  [(set (match_operand:SVE_I 0 "register_operand")
-       (unspec:SVE_I
-         [(match_dup 3)
-          (MAXMIN:SVE_I (match_operand:SVE_I 1 "register_operand")
-                        (match_operand:SVE_I 2 "register_operand"))]
-         UNSPEC_MERGE_PTRUE))]
+(define_expand "xorsign<mode>3"
+  [(match_operand:SVE_F 0 "register_operand")
+   (match_operand:SVE_F 1 "register_operand")
+   (match_operand:SVE_F 2 "register_operand")]
    "TARGET_SVE"
    {
-    operands[3] = aarch64_ptrue_reg (<VPRED>mode);
+    rtx sign = gen_reg_rtx (<V_INT_EQUIV>mode);
+    rtx int_res = gen_reg_rtx (<V_INT_EQUIV>mode);
+    int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
+
+    rtx arg1 = lowpart_subreg (<V_INT_EQUIV>mode, operands[1], <MODE>mode);
+    rtx arg2 = lowpart_subreg (<V_INT_EQUIV>mode, operands[2], <MODE>mode);
+
+    emit_insn (gen_and<v_int_equiv>3
+              (sign, arg2,
+               aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
+                                                  HOST_WIDE_INT_M1U
+                                                  << bits)));
+    emit_insn (gen_xor<v_int_equiv>3 (int_res, arg1, sign));
+    emit_move_insn (operands[0], gen_lowpart (<MODE>mode, int_res));
+    DONE;
    }
  )
  
-;; Integer MIN/MAX predicated with a PTRUE.
-(define_insn "*<su><maxmin><mode>3"
-  [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
-       (unspec:SVE_I
-         [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
-          (MAXMIN:SVE_I (match_operand:SVE_I 2 "register_operand" "%0, w")
-                        (match_operand:SVE_I 3 "register_operand" "w, w"))]
-         UNSPEC_MERGE_PTRUE))]
-  "TARGET_SVE"
-  "@
-   <su><maxmin>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
-   movprfx\t%0, %2\;<su><maxmin>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
-  [(set_attr "movprfx" "*,yes")]
-)
+;; -------------------------------------------------------------------------
+;; ---- [FP] Maximum and minimum
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - FMAX
+;; - FMAXNM
+;; - FMIN
+;; - FMINNM
+;; -------------------------------------------------------------------------
  
-;; Unpredicated floating-point MIN/MAX.
+;; Unpredicated floating-point MAX/MIN.
  (define_expand "<su><maxmin><mode>3"
    [(set (match_operand:SVE_F 0 "register_operand")
         (unspec:SVE_F
@@ -1791,7 +2240,7 @@
    }
  )
  
-;; Floating-point MIN/MAX predicated with a PTRUE.
+;; Floating-point MAX/MIN predicated with a PTRUE.
  (define_insn "*<su><maxmin><mode>3"
    [(set (match_operand:SVE_F 0 "register_operand" "=w, ?&w")
         (unspec:SVE_F
@@ -1806,7 +2255,7 @@
    [(set_attr "movprfx" "*,yes")]
  )
  
-;; Unpredicated fmin/fmax.
+;; Unpredicated fmax/fmin.
  (define_expand "<maxmin_uns><mode>3"
    [(set (match_operand:SVE_F 0 "register_operand")
         (unspec:SVE_F
@@ -1821,7 +2270,7 @@
    }
  )
  
-;; fmin/fmax predicated with a PTRUE.
+;; fmax/fmin predicated with a PTRUE.
  (define_insn "*<maxmin_uns><mode>3"
    [(set (match_operand:SVE_F 0 "register_operand" "=w, ?&w")
         (unspec:SVE_F
@@ -1837,915 +2286,1377 @@
    [(set_attr "movprfx" "*,yes")]
  )
  
-;; Predicated integer operations with select.
-(define_expand "cond_<optab><mode>"
-  [(set (match_operand:SVE_I 0 "register_operand")
-       (unspec:SVE_I
-         [(match_operand:<VPRED> 1 "register_operand")
-          (SVE_INT_BINARY:SVE_I
-            (match_operand:SVE_I 2 "register_operand")
-            (match_operand:SVE_I 3 "register_operand"))
-          (match_operand:SVE_I 4 "aarch64_simd_reg_or_zero")]
-         UNSPEC_SEL))]
+;; Merging forms are handled through SVE_COND_FP_BINARY.
+
+;; -------------------------------------------------------------------------
+;; ---- [PRED] Binary logical operations
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - AND
+;; - ANDS
+;; - EOR
+;; - EORS
+;; - ORR
+;; - ORRS
+;; -------------------------------------------------------------------------
+
+;; Predicate AND.  We can reuse one of the inputs as the GP.
+(define_insn "and<mode>3"
+  [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
+       (and:PRED_ALL (match_operand:PRED_ALL 1 "register_operand" "Upa")
+                     (match_operand:PRED_ALL 2 "register_operand" "Upa")))]
    "TARGET_SVE"
+  "and\t%0.b, %1/z, %1.b, %2.b"
  )
  
-(define_expand "cond_<optab><mode>"
-  [(set (match_operand:SVE_SDI 0 "register_operand")
-       (unspec:SVE_SDI
-         [(match_operand:<VPRED> 1 "register_operand")
-          (SVE_INT_BINARY_SD:SVE_SDI
-            (match_operand:SVE_SDI 2 "register_operand")
-            (match_operand:SVE_SDI 3 "register_operand"))
-          (match_operand:SVE_SDI 4 "aarch64_simd_reg_or_zero")]
-         UNSPEC_SEL))]
+;; Unpredicated predicate EOR and ORR.
+(define_expand "<optab><mode>3"
+  [(set (match_operand:PRED_ALL 0 "register_operand")
+       (and:PRED_ALL
+         (LOGICAL_OR:PRED_ALL
+           (match_operand:PRED_ALL 1 "register_operand")
+           (match_operand:PRED_ALL 2 "register_operand"))
+         (match_dup 3)))]
    "TARGET_SVE"
+  {
+    operands[3] = aarch64_ptrue_reg (<MODE>mode);
+  }
  )
  
-;; Predicated integer operations with select matching the first operand.
-(define_insn "*cond_<optab><mode>_2"
-  [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
-       (unspec:SVE_I
-         [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
-          (SVE_INT_BINARY:SVE_I
-            (match_operand:SVE_I 2 "register_operand" "0, w")
-            (match_operand:SVE_I 3 "register_operand" "w, w"))
-          (match_dup 2)]
-         UNSPEC_SEL))]
+;; Predicated predicate AND, EOR and ORR.
+(define_insn "pred_<optab><mode>3"
+  [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
+       (and:PRED_ALL
+         (LOGICAL:PRED_ALL
+           (match_operand:PRED_ALL 2 "register_operand" "Upa")
+           (match_operand:PRED_ALL 3 "register_operand" "Upa"))
+         (match_operand:PRED_ALL 1 "register_operand" "Upa")))]
+  "TARGET_SVE"
+  "<logical>\t%0.b, %1/z, %2.b, %3.b"
+)
+
+;; Perform a logical operation on operands 2 and 3, using operand 1 as
+;; the GP (which is known to be a PTRUE).  Store the result in operand 0
+;; and set the flags in the same way as for PTEST.  The (and ...) in the
+;; UNSPEC_PTEST_PTRUE is logically redundant, but means that the tested
+;; value is structurally equivalent to rhs of the second set.
+(define_insn "*<optab><mode>3_cc"
+  [(set (reg:CC_NZC CC_REGNUM)
+       (unspec:CC_NZC
+         [(match_operand:PRED_ALL 1 "register_operand" "Upa")
+          (and:PRED_ALL
+            (LOGICAL:PRED_ALL
+              (match_operand:PRED_ALL 2 "register_operand" "Upa")
+              (match_operand:PRED_ALL 3 "register_operand" "Upa"))
+            (match_dup 1))]
+         UNSPEC_PTEST_PTRUE))
+   (set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
+       (and:PRED_ALL (LOGICAL:PRED_ALL (match_dup 2) (match_dup 3))
+                     (match_dup 1)))]
+  "TARGET_SVE"
+  "<logical>s\t%0.b, %1/z, %2.b, %3.b"
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [PRED] Binary logical operations (inverted second input)
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - BIC
+;; - ORN
+;; -------------------------------------------------------------------------
+
+;; Predicated predicate BIC and ORN.
+(define_insn "*<nlogical><mode>3"
+  [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
+       (and:PRED_ALL
+         (NLOGICAL:PRED_ALL
+           (not:PRED_ALL (match_operand:PRED_ALL 2 "register_operand" "Upa"))
+           (match_operand:PRED_ALL 3 "register_operand" "Upa"))
+         (match_operand:PRED_ALL 1 "register_operand" "Upa")))]
+  "TARGET_SVE"
+  "<nlogical>\t%0.b, %1/z, %3.b, %2.b"
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [PRED] Binary logical operations (inverted result)
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - NAND
+;; - NOR
+;; -------------------------------------------------------------------------
+
+;; Predicated predicate NAND and NOR.
+(define_insn "*<logical_nn><mode>3"
+  [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
+       (and:PRED_ALL
+         (NLOGICAL:PRED_ALL
+           (not:PRED_ALL (match_operand:PRED_ALL 2 "register_operand" "Upa"))
+           (not:PRED_ALL (match_operand:PRED_ALL 3 "register_operand" "Upa")))
+         (match_operand:PRED_ALL 1 "register_operand" "Upa")))]
+  "TARGET_SVE"
+  "<logical_nn>\t%0.b, %1/z, %2.b, %3.b"
+)
+
+;; =========================================================================
+;; == Ternary arithmetic
+;; =========================================================================
+
+;; -------------------------------------------------------------------------
+;; ---- [INT] MLA and MAD
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - MAD
+;; - MLA
+;; -------------------------------------------------------------------------
+
+;; Predicated integer addition of product.
+(define_insn "*madd<mode>"
+  [(set (match_operand:SVE_I 0 "register_operand" "=w, w, ?&w")
+       (plus:SVE_I
+         (unspec:SVE_I
+           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
+            (mult:SVE_I (match_operand:SVE_I 2 "register_operand" "%0, w, w")
+                        (match_operand:SVE_I 3 "register_operand" "w, w, w"))]
+           UNSPEC_MERGE_PTRUE)
+         (match_operand:SVE_I 4 "register_operand" "w, 0, w")))]
    "TARGET_SVE"
    "@
-   <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
-   movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
-  [(set_attr "movprfx" "*,yes")]
+   mad\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
+   mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
+   movprfx\t%0, %4\;mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
+  [(set_attr "movprfx" "*,*,yes")]
  )
  
-(define_insn "*cond_<optab><mode>_2"
+;; -------------------------------------------------------------------------
+;; ---- [INT] MLS and MSB
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - MLS
+;; - MSB
+;; -------------------------------------------------------------------------
+
+;; Predicated integer subtraction of product.
+(define_insn "*msub<mode>3"
+  [(set (match_operand:SVE_I 0 "register_operand" "=w, w, ?&w")
+       (minus:SVE_I
+         (match_operand:SVE_I 4 "register_operand" "w, 0, w")
+         (unspec:SVE_I
+           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
+            (mult:SVE_I (match_operand:SVE_I 2 "register_operand" "%0, w, w")
+                        (match_operand:SVE_I 3 "register_operand" "w, w, w"))]
+           UNSPEC_MERGE_PTRUE)))]
+  "TARGET_SVE"
+  "@
+   msb\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
+   mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
+   movprfx\t%0, %4\;mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
+  [(set_attr "movprfx" "*,*,yes")]
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [INT] Dot product
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - SDOT
+;; - UDOT
+;; -------------------------------------------------------------------------
+
+;; Four-element integer dot-product with accumulation.
+(define_insn "<sur>dot_prod<vsi2qi>"
    [(set (match_operand:SVE_SDI 0 "register_operand" "=w, ?&w")
-       (unspec:SVE_SDI
-         [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
-          (SVE_INT_BINARY_SD:SVE_SDI
-            (match_operand:SVE_SDI 2 "register_operand" "0, w")
-            (match_operand:SVE_SDI 3 "register_operand" "w, w"))
-          (match_dup 2)]
-         UNSPEC_SEL))]
+       (plus:SVE_SDI
+         (unspec:SVE_SDI
+           [(match_operand:<VSI2QI> 1 "register_operand" "w, w")
+            (match_operand:<VSI2QI> 2 "register_operand" "w, w")]
+           DOTPROD)
+         (match_operand:SVE_SDI 3 "register_operand" "0, w")))]
    "TARGET_SVE"
    "@
-   <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
-   movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
+   <sur>dot\\t%0.<Vetype>, %1.<Vetype_fourth>, %2.<Vetype_fourth>
+   movprfx\t%0, %3\;<sur>dot\\t%0.<Vetype>, %1.<Vetype_fourth>, %2.<Vetype_fourth>"
    [(set_attr "movprfx" "*,yes")]
  )
  
-;; Predicated integer operations with select matching the second operand.
-(define_insn "*cond_<optab><mode>_3"
-  [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
-       (unspec:SVE_I
+;; -------------------------------------------------------------------------
+;; ---- [INT] Sum of absolute differences
+;; -------------------------------------------------------------------------
+;; The patterns in this section are synthetic.
+;; -------------------------------------------------------------------------
+
+;; Emit a sequence to produce a sum-of-absolute-differences of the inputs in
+;; operands 1 and 2.  The sequence also has to perform a widening reduction of
+;; the difference into a vector and accumulate that into operand 3 before
+;; copying that into the result operand 0.
+;; Perform that with a sequence of:
+;; MOV         ones.b, #1
+;; [SU]ABD     diff.b, p0/m, op1.b, op2.b
+;; MOVPRFX     op0, op3        // If necessary
+;; UDOT                op0.s, diff.b, ones.b
+(define_expand "<sur>sad<vsi2qi>"
+  [(use (match_operand:SVE_SDI 0 "register_operand"))
+   (unspec:<VSI2QI> [(use (match_operand:<VSI2QI> 1 "register_operand"))
+                   (use (match_operand:<VSI2QI> 2 "register_operand"))] ABAL)
+   (use (match_operand:SVE_SDI 3 "register_operand"))]
+  "TARGET_SVE"
+  {
+    rtx ones = force_reg (<VSI2QI>mode, CONST1_RTX (<VSI2QI>mode));
+    rtx diff = gen_reg_rtx (<VSI2QI>mode);
+    emit_insn (gen_<sur>abd<vsi2qi>_3 (diff, operands[1], operands[2]));
+    emit_insn (gen_udot_prod<vsi2qi> (operands[0], diff, ones, operands[3]));
+    DONE;
+  }
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [FP] General ternary arithmetic corresponding to unspecs
+;; -------------------------------------------------------------------------
+;; Includes merging patterns for:
+;; - FMAD
+;; - FMLA
+;; - FMLS
+;; - FMSB
+;; - FNMAD
+;; - FNMLA
+;; - FNMLS
+;; - FNMSB
+;; -------------------------------------------------------------------------
+
+;; Predicated floating-point ternary operations with merging.
+(define_expand "cond_<optab><mode>"
+  [(set (match_operand:SVE_F 0 "register_operand")
+       (unspec:SVE_F
+         [(match_operand:<VPRED> 1 "register_operand")
+          (unspec:SVE_F
+            [(match_operand:SVE_F 2 "register_operand")
+             (match_operand:SVE_F 3 "register_operand")
+             (match_operand:SVE_F 4 "register_operand")]
+            SVE_COND_FP_TERNARY)
+          (match_operand:SVE_F 5 "aarch64_simd_reg_or_zero")]
+         UNSPEC_SEL))]
+  "TARGET_SVE"
+{
+  /* Swap the multiplication operands if the fallback value is the
+     second of the two.  */
+  if (rtx_equal_p (operands[3], operands[5]))
+    std::swap (operands[2], operands[3]);
+})
+
+;; Predicated floating-point ternary operations, merging with the
+;; first input.
+(define_insn "*cond_<optab><mode>_2"
+  [(set (match_operand:SVE_F 0 "register_operand" "=w, ?&w")
+       (unspec:SVE_F
           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
-          (SVE_INT_BINARY:SVE_I
-            (match_operand:SVE_I 2 "register_operand" "w, w")
-            (match_operand:SVE_I 3 "register_operand" "0, w"))
-          (match_dup 3)]
+          (unspec:SVE_F
+            [(match_operand:SVE_F 2 "register_operand" "0, w")
+             (match_operand:SVE_F 3 "register_operand" "w, w")
+             (match_operand:SVE_F 4 "register_operand" "w, w")]
+            SVE_COND_FP_TERNARY)
+          (match_dup 2)]
           UNSPEC_SEL))]
    "TARGET_SVE"
    "@
-   <sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
-   movprfx\t%0, %3\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
+   <sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
+   movprfx\t%0, %2\;<sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>"
    [(set_attr "movprfx" "*,yes")]
  )
  
-(define_insn "*cond_<optab><mode>_3"
-  [(set (match_operand:SVE_SDI 0 "register_operand" "=w, ?&w")
-       (unspec:SVE_SDI
+;; Predicated floating-point ternary operations, merging with the
+;; third input.
+(define_insn "*cond_<optab><mode>_4"
+  [(set (match_operand:SVE_F 0 "register_operand" "=w, ?&w")
+       (unspec:SVE_F
           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
-          (SVE_INT_BINARY_SD:SVE_SDI
-            (match_operand:SVE_SDI 2 "register_operand" "w, w")
-            (match_operand:SVE_SDI 3 "register_operand" "0, w"))
-          (match_dup 3)]
+          (unspec:SVE_F
+            [(match_operand:SVE_F 2 "register_operand" "w, w")
+             (match_operand:SVE_F 3 "register_operand" "w, w")
+             (match_operand:SVE_F 4 "register_operand" "0, w")]
+            SVE_COND_FP_TERNARY)
+          (match_dup 4)]
           UNSPEC_SEL))]
    "TARGET_SVE"
    "@
-   <sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
-   movprfx\t%0, %3\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
+   <sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
+   movprfx\t%0, %4\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
    [(set_attr "movprfx" "*,yes")]
  )
  
-;; Predicated integer binary operations in which the values of inactive
-;; lanes are distinct from the other inputs.
+;; Predicated floating-point ternary operations, merging with an
+;; independent value.
  (define_insn_and_rewrite "*cond_<optab><mode>_any"
-  [(set (match_operand:SVE_I 0 "register_operand" "=&w, &w, &w, &w, ?&w")
-       (unspec:SVE_I
-         [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl")
-          (SVE_INT_BINARY:SVE_I
-            (match_operand:SVE_I 2 "register_operand" "0, w, w, w, w")
-            (match_operand:SVE_I 3 "register_operand" "w, 0, w, w, w"))
-          (match_operand:SVE_I 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")]
+  [(set (match_operand:SVE_F 0 "register_operand" "=&w, &w, ?&w")
+       (unspec:SVE_F
+         [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
+          (unspec:SVE_F
+            [(match_operand:SVE_F 2 "register_operand" "w, w, w")
+             (match_operand:SVE_F 3 "register_operand" "w, w, w")
+             (match_operand:SVE_F 4 "register_operand" "w, w, w")]
+            SVE_COND_FP_TERNARY)
+          (match_operand:SVE_F 5 "aarch64_simd_reg_or_zero" "Dz, 0, w")]
           UNSPEC_SEL))]
    "TARGET_SVE
-   && !rtx_equal_p (operands[2], operands[4])
-   && !rtx_equal_p (operands[3], operands[4])"
+   && !rtx_equal_p (operands[2], operands[5])
+   && !rtx_equal_p (operands[3], operands[5])
+   && !rtx_equal_p (operands[4], operands[5])"
    "@
-   movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
-   movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
-   movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
-   movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+   movprfx\t%0.<Vetype>, %1/z, %4.<Vetype>\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
+   movprfx\t%0.<Vetype>, %1/m, %4.<Vetype>\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
     #"
    "&& reload_completed
-   && register_operand (operands[4], <MODE>mode)
-   && !rtx_equal_p (operands[0], operands[4])"
+   && !CONSTANT_P (operands[5])
+   && !rtx_equal_p (operands[0], operands[5])"
    {
-    emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
-                                            operands[4], operands[1]));
-    operands[4] = operands[2] = operands[0];
+    emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[4],
+                                            operands[5], operands[1]));
+    operands[5] = operands[4] = operands[0];
    }
    [(set_attr "movprfx" "yes")]
  )
  
-(define_insn_and_rewrite "*cond_<optab><mode>_any"
-  [(set (match_operand:SVE_SDI 0 "register_operand" "=&w, &w, &w, &w, ?&w")
-       (unspec:SVE_SDI
-         [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl")
-          (SVE_INT_BINARY_SD:SVE_SDI
-            (match_operand:SVE_SDI 2 "register_operand" "0, w, w, w, w")
-            (match_operand:SVE_SDI 3 "register_operand" "w, 0, w, w, w"))
-          (match_operand:SVE_SDI 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")]
-         UNSPEC_SEL))]
-  "TARGET_SVE
-   && !rtx_equal_p (operands[2], operands[4])
-   && !rtx_equal_p (operands[3], operands[4])"
-  "@
-   movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
-   movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
-   movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
-   movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
-   #"
-  "&& reload_completed
-   && register_operand (operands[4], <MODE>mode)
-   && !rtx_equal_p (operands[0], operands[4])"
+;; -------------------------------------------------------------------------
+;; ---- [FP] FMLA and FMAD
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - FMAD
+;; - FMLA
+;; -------------------------------------------------------------------------
+
+;; Unpredicated fma (%0 = (%1 * %2) + %3).
+(define_expand "fma<mode>4"
+  [(set (match_operand:SVE_F 0 "register_operand")
+       (unspec:SVE_F
+         [(match_dup 4)
+          (fma:SVE_F (match_operand:SVE_F 1 "register_operand")
+                     (match_operand:SVE_F 2 "register_operand")
+                     (match_operand:SVE_F 3 "register_operand"))]
+         UNSPEC_MERGE_PTRUE))]
+  "TARGET_SVE"
    {
-    emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
-                                            operands[4], operands[1]));
-    operands[4] = operands[2] = operands[0];
+    operands[4] = aarch64_ptrue_reg (<VPRED>mode);
    }
-  [(set_attr "movprfx" "yes")]
  )
  
-;; Set operand 0 to the last active element in operand 3, or to tied
-;; operand 1 if no elements are active.
-(define_insn "fold_extract_last_<mode>"
-  [(set (match_operand:<VEL> 0 "register_operand" "=r, w")
-       (unspec:<VEL>
-         [(match_operand:<VEL> 1 "register_operand" "0, 0")
-          (match_operand:<VPRED> 2 "register_operand" "Upl, Upl")
-          (match_operand:SVE_ALL 3 "register_operand" "w, w")]
-         UNSPEC_CLASTB))]
+;; fma predicated with a PTRUE.
+(define_insn "*fma<mode>4"
+  [(set (match_operand:SVE_F 0 "register_operand" "=w, w, ?&w")
+       (unspec:SVE_F
+         [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
+          (fma:SVE_F (match_operand:SVE_F 3 "register_operand" "%0, w, w")
+                     (match_operand:SVE_F 4 "register_operand" "w, w, w")
+                     (match_operand:SVE_F 2 "register_operand" "w, 0, w"))]
+         UNSPEC_MERGE_PTRUE))]
    "TARGET_SVE"
    "@
-   clastb\t%<vwcore>0, %2, %<vwcore>0, %3.<Vetype>
-   clastb\t%<vw>0, %2, %<vw>0, %3.<Vetype>"
+   fmad\t%0.<Vetype>, %1/m, %4.<Vetype>, %2.<Vetype>
+   fmla\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
+   movprfx\t%0, %2\;fmla\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>"
+  [(set_attr "movprfx" "*,*,yes")]
  )
  
-;; Unpredicated integer add reduction.
-(define_expand "reduc_plus_scal_<mode>"
-  [(set (match_operand:<VEL> 0 "register_operand")
-       (unspec:<VEL> [(match_dup 2)
-                      (match_operand:SVE_I 1 "register_operand")]
-                     UNSPEC_ADDV))]
+;; -------------------------------------------------------------------------
+;; ---- [FP] FMLS and FMSB
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - FMLS
+;; - FMSB
+;; -------------------------------------------------------------------------
+
+;; Unpredicated fnma (%0 = (-%1 * %2) + %3).
+(define_expand "fnma<mode>4"
+  [(set (match_operand:SVE_F 0 "register_operand")
+       (unspec:SVE_F
+         [(match_dup 4)
+          (fma:SVE_F (neg:SVE_F
+                       (match_operand:SVE_F 1 "register_operand"))
+                     (match_operand:SVE_F 2 "register_operand")
+                     (match_operand:SVE_F 3 "register_operand"))]
+         UNSPEC_MERGE_PTRUE))]
    "TARGET_SVE"
    {
-    operands[2] = aarch64_ptrue_reg (<VPRED>mode);
+    operands[4] = aarch64_ptrue_reg (<VPRED>mode);
    }
  )
  
-;; Predicated integer add reduction.  The result is always 64-bits.
-(define_insn "*reduc_plus_scal_<mode>"
-  [(set (match_operand:<VEL> 0 "register_operand" "=w")
-       (unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl")
-                      (match_operand:SVE_I 2 "register_operand" "w")]
-                     UNSPEC_ADDV))]
+;; fnma predicated with a PTRUE.
+(define_insn "*fnma<mode>4"
+  [(set (match_operand:SVE_F 0 "register_operand" "=w, w, ?&w")
+       (unspec:SVE_F
+         [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
+          (fma:SVE_F (neg:SVE_F
+                       (match_operand:SVE_F 3 "register_operand" "%0, w, w"))
+                     (match_operand:SVE_F 4 "register_operand" "w, w, w")
+                     (match_operand:SVE_F 2 "register_operand" "w, 0, w"))]
+         UNSPEC_MERGE_PTRUE))]
    "TARGET_SVE"
-  "uaddv\t%d0, %1, %2.<Vetype>"
+  "@
+   fmsb\t%0.<Vetype>, %1/m, %4.<Vetype>, %2.<Vetype>
+   fmls\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
+   movprfx\t%0, %2\;fmls\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>"
+  [(set_attr "movprfx" "*,*,yes")]
  )
  
-;; Unpredicated floating-point add reduction.
-(define_expand "reduc_plus_scal_<mode>"
-  [(set (match_operand:<VEL> 0 "register_operand")
-       (unspec:<VEL> [(match_dup 2)
-                      (match_operand:SVE_F 1 "register_operand")]
-                     UNSPEC_FADDV))]
+;; -------------------------------------------------------------------------
+;; ---- [FP] FNMLA and FNMAD
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - FNMAD
+;; - FNMLA
+;; -------------------------------------------------------------------------
+
+;; Unpredicated fnms (%0 = (-%1 * %2) - %3).
+(define_expand "fnms<mode>4"
+  [(set (match_operand:SVE_F 0 "register_operand")
+       (unspec:SVE_F
+         [(match_dup 4)
+          (fma:SVE_F (neg:SVE_F
+                       (match_operand:SVE_F 1 "register_operand"))
+                     (match_operand:SVE_F 2 "register_operand")
+                     (neg:SVE_F
+                       (match_operand:SVE_F 3 "register_operand")))]
+         UNSPEC_MERGE_PTRUE))]
    "TARGET_SVE"
    {
-    operands[2] = aarch64_ptrue_reg (<VPRED>mode);
+    operands[4] = aarch64_ptrue_reg (<VPRED>mode);
    }
  )
  
-;; Predicated floating-point add reduction.
-(define_insn "*reduc_plus_scal_<mode>"
-  [(set (match_operand:<VEL> 0 "register_operand" "=w")
-       (unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl")
-                      (match_operand:SVE_F 2 "register_operand" "w")]
-                     UNSPEC_FADDV))]
+;; fnms predicated with a PTRUE.
+(define_insn "*fnms<mode>4"
+  [(set (match_operand:SVE_F 0 "register_operand" "=w, w, ?&w")
+       (unspec:SVE_F
+         [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
+          (fma:SVE_F (neg:SVE_F
+                       (match_operand:SVE_F 3 "register_operand" "%0, w, w"))
+                     (match_operand:SVE_F 4 "register_operand" "w, w, w")
+                     (neg:SVE_F
+                       (match_operand:SVE_F 2 "register_operand" "w, 0, w")))]
+         UNSPEC_MERGE_PTRUE))]
    "TARGET_SVE"
-  "faddv\t%<Vetype>0, %1, %2.<Vetype>"
+  "@
+   fnmad\t%0.<Vetype>, %1/m, %4.<Vetype>, %2.<Vetype>
+   fnmla\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
+   movprfx\t%0, %2\;fnmla\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>"
+  [(set_attr "movprfx" "*,*,yes")]
  )
  
-;; Unpredicated integer MIN/MAX reduction.
-(define_expand "reduc_<maxmin_uns>_scal_<mode>"
-  [(set (match_operand:<VEL> 0 "register_operand")
-       (unspec:<VEL> [(match_dup 2)
-                      (match_operand:SVE_I 1 "register_operand")]
-                     MAXMINV))]
+;; -------------------------------------------------------------------------
+;; ---- [FP] FNMLS and FNMSB
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - FNMLS
+;; - FNMSB
+;; -------------------------------------------------------------------------
+
+;; Unpredicated fms (%0 = (%1 * %2) - %3).
+(define_expand "fms<mode>4"
+  [(set (match_operand:SVE_F 0 "register_operand")
+       (unspec:SVE_F
+         [(match_dup 4)
+          (fma:SVE_F (match_operand:SVE_F 1 "register_operand")
+                     (match_operand:SVE_F 2 "register_operand")
+                     (neg:SVE_F
+                       (match_operand:SVE_F 3 "register_operand")))]
+         UNSPEC_MERGE_PTRUE))]
    "TARGET_SVE"
    {
-    operands[2] = aarch64_ptrue_reg (<VPRED>mode);
+    operands[4] = aarch64_ptrue_reg (<VPRED>mode);
    }
  )
  
-;; Predicated integer MIN/MAX reduction.
-(define_insn "*reduc_<maxmin_uns>_scal_<mode>"
-  [(set (match_operand:<VEL> 0 "register_operand" "=w")
-       (unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl")
-                      (match_operand:SVE_I 2 "register_operand" "w")]
-                     MAXMINV))]
+;; fms predicated with a PTRUE.
+(define_insn "*fms<mode>4"
+  [(set (match_operand:SVE_F 0 "register_operand" "=w, w, ?&w")
+       (unspec:SVE_F
+         [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
+          (fma:SVE_F (match_operand:SVE_F 3 "register_operand" "%0, w, w")
+                     (match_operand:SVE_F 4 "register_operand" "w, w, w")
+                     (neg:SVE_F
+                       (match_operand:SVE_F 2 "register_operand" "w, 0, w")))]
+         UNSPEC_MERGE_PTRUE))]
    "TARGET_SVE"
-  "<maxmin_uns_op>v\t%<Vetype>0, %1, %2.<Vetype>"
+  "@
+   fnmsb\t%0.<Vetype>, %1/m, %4.<Vetype>, %2.<Vetype>
+   fnmls\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
+   movprfx\t%0, %2\;fnmls\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>"
+  [(set_attr "movprfx" "*,*,yes")]
  )
  
-;; Unpredicated floating-point MIN/MAX reduction.
-(define_expand "reduc_<maxmin_uns>_scal_<mode>"
-  [(set (match_operand:<VEL> 0 "register_operand")
-       (unspec:<VEL> [(match_dup 2)
-                      (match_operand:SVE_F 1 "register_operand")]
-                     FMAXMINV))]
+;; =========================================================================
+;; == Comparisons and selects
+;; =========================================================================
+
+;; -------------------------------------------------------------------------
+;; ---- [INT,FP] Select based on predicates
+;; -------------------------------------------------------------------------
+;; Includes merging patterns for:
+;; - MOV
+;; - SEL
+;; -------------------------------------------------------------------------
+
+;; vcond_mask operand order: true, false, mask
+;; UNSPEC_SEL operand order: mask, true, false (as for VEC_COND_EXPR)
+;; SEL operand order:        mask, true, false
+(define_insn "vcond_mask_<mode><vpred>"
+  [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
+       (unspec:SVE_ALL
+         [(match_operand:<VPRED> 3 "register_operand" "Upa")
+          (match_operand:SVE_ALL 1 "register_operand" "w")
+          (match_operand:SVE_ALL 2 "register_operand" "w")]
+         UNSPEC_SEL))]
+  "TARGET_SVE"
+  "sel\t%0.<Vetype>, %3, %1.<Vetype>, %2.<Vetype>"
+)
+
+;; Selects between a duplicated immediate and zero.
+(define_insn "aarch64_sve_dup<mode>_const"
+  [(set (match_operand:SVE_I 0 "register_operand" "=w")
+       (unspec:SVE_I
+         [(match_operand:<VPRED> 1 "register_operand" "Upl")
+          (match_operand:SVE_I 2 "aarch64_sve_dup_immediate")
+          (match_operand:SVE_I 3 "aarch64_simd_imm_zero")]
+         UNSPEC_SEL))]
    "TARGET_SVE"
-  {
-    operands[2] = aarch64_ptrue_reg (<VPRED>mode);
-  }
+  "mov\t%0.<Vetype>, %1/z, #%2"
  )
  
-;; Predicated floating-point MIN/MAX reduction.
-(define_insn "*reduc_<maxmin_uns>_scal_<mode>"
-  [(set (match_operand:<VEL> 0 "register_operand" "=w")
-       (unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl")
-                      (match_operand:SVE_F 2 "register_operand" "w")]
-                     FMAXMINV))]
-  "TARGET_SVE"
-  "<maxmin_uns_op>v\t%<Vetype>0, %1, %2.<Vetype>"
-)
+;; -------------------------------------------------------------------------
+;; ---- [INT,FP] Compare and select
+;; -------------------------------------------------------------------------
+;; The patterns in this section are synthetic.
+;; -------------------------------------------------------------------------
  
-(define_expand "reduc_<optab>_scal_<mode>"
-  [(set (match_operand:<VEL> 0 "register_operand")
-       (unspec:<VEL> [(match_dup 2)
-                      (match_operand:SVE_I 1 "register_operand")]
-                     BITWISEV))]
+;; Integer (signed) vcond.  Don't enforce an immediate range here, since it
+;; depends on the comparison; leave it to aarch64_expand_sve_vcond instead.
+(define_expand "vcond<mode><v_int_equiv>"
+  [(set (match_operand:SVE_ALL 0 "register_operand")
+       (if_then_else:SVE_ALL
+         (match_operator 3 "comparison_operator"
+           [(match_operand:<V_INT_EQUIV> 4 "register_operand")
+            (match_operand:<V_INT_EQUIV> 5 "nonmemory_operand")])
+         (match_operand:SVE_ALL 1 "register_operand")
+         (match_operand:SVE_ALL 2 "register_operand")))]
    "TARGET_SVE"
    {
-    operands[2] = aarch64_ptrue_reg (<VPRED>mode);
+    aarch64_expand_sve_vcond (<MODE>mode, <V_INT_EQUIV>mode, operands);
+    DONE;
    }
  )
  
-(define_insn "*reduc_<optab>_scal_<mode>"
-  [(set (match_operand:<VEL> 0 "register_operand" "=w")
-       (unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl")
-                      (match_operand:SVE_I 2 "register_operand" "w")]
-                     BITWISEV))]
+;; Integer vcondu.  Don't enforce an immediate range here, since it
+;; depends on the comparison; leave it to aarch64_expand_sve_vcond instead.
+(define_expand "vcondu<mode><v_int_equiv>"
+  [(set (match_operand:SVE_ALL 0 "register_operand")
+       (if_then_else:SVE_ALL
+         (match_operator 3 "comparison_operator"
+           [(match_operand:<V_INT_EQUIV> 4 "register_operand")
+            (match_operand:<V_INT_EQUIV> 5 "nonmemory_operand")])
+         (match_operand:SVE_ALL 1 "register_operand")
+         (match_operand:SVE_ALL 2 "register_operand")))]
    "TARGET_SVE"
-  "<bit_reduc_op>\t%<Vetype>0, %1, %2.<Vetype>"
+  {
+    aarch64_expand_sve_vcond (<MODE>mode, <V_INT_EQUIV>mode, operands);
+    DONE;
+  }
  )
  
-;; Unpredicated in-order FP reductions.
-(define_expand "fold_left_plus_<mode>"
-  [(set (match_operand:<VEL> 0 "register_operand")
-       (unspec:<VEL> [(match_dup 3)
-                      (match_operand:<VEL> 1 "register_operand")
-                      (match_operand:SVE_F 2 "register_operand")]
-                     UNSPEC_FADDA))]
+;; Floating-point vcond.  All comparisons except FCMUO allow a zero operand;
+;; aarch64_expand_sve_vcond handles the case of an FCMUO with zero.
+(define_expand "vcond<mode><v_fp_equiv>"
+  [(set (match_operand:SVE_SD 0 "register_operand")
+       (if_then_else:SVE_SD
+         (match_operator 3 "comparison_operator"
+           [(match_operand:<V_FP_EQUIV> 4 "register_operand")
+            (match_operand:<V_FP_EQUIV> 5 "aarch64_simd_reg_or_zero")])
+         (match_operand:SVE_SD 1 "register_operand")
+         (match_operand:SVE_SD 2 "register_operand")))]
    "TARGET_SVE"
    {
-    operands[3] = aarch64_ptrue_reg (<VPRED>mode);
+    aarch64_expand_sve_vcond (<MODE>mode, <V_FP_EQUIV>mode, operands);
+    DONE;
    }
  )
  
-;; In-order FP reductions predicated with PTRUE.
-(define_insn "mask_fold_left_plus_<mode>"
-  [(set (match_operand:<VEL> 0 "register_operand" "=w")
-       (unspec:<VEL> [(match_operand:<VPRED> 3 "register_operand" "Upl")
-                      (match_operand:<VEL> 1 "register_operand" "0")
-                      (match_operand:SVE_F 2 "register_operand" "w")]
-                     UNSPEC_FADDA))]
-  "TARGET_SVE"
-  "fadda\t%<Vetype>0, %3, %<Vetype>0, %2.<Vetype>"
-)
+;; -------------------------------------------------------------------------
+;; ---- [INT] Comparisons
+;; -------------------------------------------------------------------------
+;; Includes merging patterns for:
+;; - CMPEQ
+;; - CMPGE
+;; - CMPGT
+;; - CMPHI
+;; - CMPHS
+;; - CMPLE
+;; - CMPLO
+;; - CMPLS
+;; - CMPLT
+;; - CMPNE
+;; -------------------------------------------------------------------------
  
-;; Predicated form of the above in-order reduction.
-(define_insn "*pred_fold_left_plus_<mode>"
-  [(set (match_operand:<VEL> 0 "register_operand" "=w")
-       (unspec:<VEL>
-         [(match_operand:<VEL> 1 "register_operand" "0")
-          (unspec:SVE_F
-            [(match_operand:<VPRED> 2 "register_operand" "Upl")
-             (match_operand:SVE_F 3 "register_operand" "w")
-             (match_operand:SVE_F 4 "aarch64_simd_imm_zero")]
-            UNSPEC_SEL)]
-         UNSPEC_FADDA))]
+;; Signed integer comparisons.  Don't enforce an immediate range here, since
+;; it depends on the comparison; leave it to aarch64_expand_sve_vec_cmp_int
+;; instead.
+(define_expand "vec_cmp<mode><vpred>"
+  [(parallel
+    [(set (match_operand:<VPRED> 0 "register_operand")
+         (match_operator:<VPRED> 1 "comparison_operator"
+           [(match_operand:SVE_I 2 "register_operand")
+            (match_operand:SVE_I 3 "nonmemory_operand")]))
+     (clobber (reg:CC_NZC CC_REGNUM))])]
    "TARGET_SVE"
-  "fadda\t%<Vetype>0, %2, %<Vetype>0, %3.<Vetype>"
+  {
+    aarch64_expand_sve_vec_cmp_int (operands[0], GET_CODE (operands[1]),
+                                   operands[2], operands[3]);
+    DONE;
+  }
  )
  
-;; Unpredicated floating-point addition.
-(define_expand "add<mode>3"
-  [(set (match_operand:SVE_F 0 "register_operand")
-       (unspec:SVE_F
-         [(match_dup 3)
-          (plus:SVE_F
-            (match_operand:SVE_F 1 "register_operand")
-            (match_operand:SVE_F 2 "aarch64_sve_float_arith_with_sub_operand"))]
-         UNSPEC_MERGE_PTRUE))]
+;; Unsigned integer comparisons.  Don't enforce an immediate range here, since
+;; it depends on the comparison; leave it to aarch64_expand_sve_vec_cmp_int
+;; instead.
+(define_expand "vec_cmpu<mode><vpred>"
+  [(parallel
+    [(set (match_operand:<VPRED> 0 "register_operand")
+         (match_operator:<VPRED> 1 "comparison_operator"
+           [(match_operand:SVE_I 2 "register_operand")
+            (match_operand:SVE_I 3 "nonmemory_operand")]))
+     (clobber (reg:CC_NZC CC_REGNUM))])]
    "TARGET_SVE"
    {
-    operands[3] = aarch64_ptrue_reg (<VPRED>mode);
+    aarch64_expand_sve_vec_cmp_int (operands[0], GET_CODE (operands[1]),
+                                   operands[2], operands[3]);
+    DONE;
    }
  )
  
-;; Floating-point addition predicated with a PTRUE.
-(define_insn_and_split "*add<mode>3"
-  [(set (match_operand:SVE_F 0 "register_operand" "=w, w, w")
-       (unspec:SVE_F
-         [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
-          (plus:SVE_F
-             (match_operand:SVE_F 2 "register_operand" "%0, 0, w")
-             (match_operand:SVE_F 3 "aarch64_sve_float_arith_with_sub_operand" "vsA, vsN, w"))]
-         UNSPEC_MERGE_PTRUE))]
+;; Integer comparisons predicated with a PTRUE.
+(define_insn "*cmp<cmp_op><mode>"
+  [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
+       (unspec:<VPRED>
+         [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+          (SVE_INT_CMP:<VPRED>
+            (match_operand:SVE_I 2 "register_operand" "w, w")
+            (match_operand:SVE_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))]
+         UNSPEC_MERGE_PTRUE))
+   (clobber (reg:CC_NZC CC_REGNUM))]
    "TARGET_SVE"
    "@
-   fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
-   fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
-   #"
-  ; Split the unpredicated form after reload, so that we don't have
-  ; the unnecessary PTRUE.
-  "&& reload_completed
-   && register_operand (operands[3], <MODE>mode)"
-  [(set (match_dup 0) (plus:SVE_F (match_dup 2) (match_dup 3)))]
+   cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #%3
+   cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
  )
  
-;; Unpredicated floating-point subtraction.
-(define_expand "sub<mode>3"
-  [(set (match_operand:SVE_F 0 "register_operand")
-       (unspec:SVE_F
-         [(match_dup 3)
-          (minus:SVE_F
-            (match_operand:SVE_F 1 "aarch64_sve_float_arith_operand")
-            (match_operand:SVE_F 2 "register_operand"))]
+;; Integer comparisons predicated with a PTRUE in which both the flag and
+;; predicate results are interesting.
+(define_insn "*cmp<cmp_op><mode>_cc"
+  [(set (reg:CC_NZC CC_REGNUM)
+       (unspec:CC_NZC
+         [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+          (unspec:<VPRED>
+            [(match_dup 1)
+             (SVE_INT_CMP:<VPRED>
+               (match_operand:SVE_I 2 "register_operand" "w, w")
+               (match_operand:SVE_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))]
+            UNSPEC_MERGE_PTRUE)]
+         UNSPEC_PTEST_PTRUE))
+   (set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
+       (unspec:<VPRED>
+         [(match_dup 1)
+          (SVE_INT_CMP:<VPRED>
+            (match_dup 2)
+            (match_dup 3))]
           UNSPEC_MERGE_PTRUE))]
    "TARGET_SVE"
-  {
-    operands[3] = aarch64_ptrue_reg (<VPRED>mode);
-  }
+  "@
+   cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #%3
+   cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
  )
  
-;; Floating-point subtraction predicated with a PTRUE.
-(define_insn_and_split "*sub<mode>3"
-  [(set (match_operand:SVE_F 0 "register_operand" "=w, w, w, w")
-       (unspec:SVE_F
-         [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
-          (minus:SVE_F
-            (match_operand:SVE_F 2 "aarch64_sve_float_arith_operand" "0, 0, vsA, w")
-            (match_operand:SVE_F 3 "aarch64_sve_float_arith_with_sub_operand" "vsA, vsN, 0, w"))]
-         UNSPEC_MERGE_PTRUE))]
-  "TARGET_SVE
-   && (register_operand (operands[2], <MODE>mode)
-       || register_operand (operands[3], <MODE>mode))"
+;; Integer comparisons predicated with a PTRUE in which only the flags result
+;; is interesting.
+(define_insn "*cmp<cmp_op><mode>_ptest"
+  [(set (reg:CC_NZC CC_REGNUM)
+       (unspec:CC_NZC
+         [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+          (unspec:<VPRED>
+            [(match_dup 1)
+             (SVE_INT_CMP:<VPRED>
+               (match_operand:SVE_I 2 "register_operand" "w, w")
+               (match_operand:SVE_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))]
+            UNSPEC_MERGE_PTRUE)]
+         UNSPEC_PTEST_PTRUE))
+   (clobber (match_scratch:<VPRED> 0 "=Upa, Upa"))]
+  "TARGET_SVE"
    "@
-   fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
-   fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
-   fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2
-   #"
-  ; Split the unpredicated form after reload, so that we don't have
-  ; the unnecessary PTRUE.
-  "&& reload_completed
-   && register_operand (operands[2], <MODE>mode)
-   && register_operand (operands[3], <MODE>mode)"
-  [(set (match_dup 0) (minus:SVE_F (match_dup 2) (match_dup 3)))]
+   cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #%3
+   cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
  )
  
-;; Unpredicated floating-point multiplication.
-(define_expand "mul<mode>3"
-  [(set (match_operand:SVE_F 0 "register_operand")
-       (unspec:SVE_F
-         [(match_dup 3)
-          (mult:SVE_F
-            (match_operand:SVE_F 1 "register_operand")
-            (match_operand:SVE_F 2 "aarch64_sve_float_mul_operand"))]
-         UNSPEC_MERGE_PTRUE))]
-  "TARGET_SVE"
-  {
-    operands[3] = aarch64_ptrue_reg (<VPRED>mode);
-  }
+;; Predicated integer comparisons, formed by combining a PTRUE-predicated
+;; comparison with an AND.  Split the instruction into its preferred form
+;; (below) at the earliest opportunity, in order to get rid of the
+;; redundant operand 1.
+(define_insn_and_split "*pred_cmp<cmp_op><mode>_combine"
+  [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
+       (and:<VPRED>
+        (unspec:<VPRED>
+          [(match_operand:<VPRED> 1)
+           (SVE_INT_CMP:<VPRED>
+             (match_operand:SVE_I 2 "register_operand" "w, w")
+             (match_operand:SVE_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))]
+          UNSPEC_MERGE_PTRUE)
+        (match_operand:<VPRED> 4 "register_operand" "Upl, Upl")))
+   (clobber (reg:CC_NZC CC_REGNUM))]
+  "TARGET_SVE"
+  "#"
+  "&& 1"
+  [(parallel
+     [(set (match_dup 0)
+         (and:<VPRED>
+           (SVE_INT_CMP:<VPRED>
+             (match_dup 2)
+             (match_dup 3))
+           (match_dup 4)))
+      (clobber (reg:CC_NZC CC_REGNUM))])]
  )
  
-;; Floating-point multiplication predicated with a PTRUE.
-(define_insn_and_split "*mul<mode>3"
-  [(set (match_operand:SVE_F 0 "register_operand" "=w, w")
-       (unspec:SVE_F
-         [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
-          (mult:SVE_F
-            (match_operand:SVE_F 2 "register_operand" "%0, w")
-            (match_operand:SVE_F 3 "aarch64_sve_float_mul_operand" "vsM, w"))]
-         UNSPEC_MERGE_PTRUE))]
+;; Predicated integer comparisons.
+(define_insn "*pred_cmp<cmp_op><mode>"
+  [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
+       (and:<VPRED>
+         (SVE_INT_CMP:<VPRED>
+           (match_operand:SVE_I 2 "register_operand" "w, w")
+           (match_operand:SVE_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))
+         (match_operand:<VPRED> 1 "register_operand" "Upl, Upl")))
+   (clobber (reg:CC_NZC CC_REGNUM))]
    "TARGET_SVE"
    "@
-   fmul\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
-   #"
-  ; Split the unpredicated form after reload, so that we don't have
-  ; the unnecessary PTRUE.
-  "&& reload_completed
-   && register_operand (operands[3], <MODE>mode)"
-  [(set (match_dup 0) (mult:SVE_F (match_dup 2) (match_dup 3)))]
+   cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #%3
+   cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
  )
  
-;; Unpredicated floating-point binary operations (post-RA only).
-;; These are generated by splitting a predicated instruction whose
-;; predicate is unused.
-(define_insn "*post_ra_<sve_fp_op><mode>3"
-  [(set (match_operand:SVE_F 0 "register_operand" "=w")
-       (SVE_UNPRED_FP_BINARY:SVE_F
-         (match_operand:SVE_F 1 "register_operand" "w")
-         (match_operand:SVE_F 2 "register_operand" "w")))]
-  "TARGET_SVE && reload_completed"
-  "<sve_fp_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>")
+;; -------------------------------------------------------------------------
+;; ---- [INT] While tests
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - WHILELO
+;; -------------------------------------------------------------------------
  
-;; Unpredicated fma (%0 = (%1 * %2) + %3).
-(define_expand "fma<mode>4"
-  [(set (match_operand:SVE_F 0 "register_operand")
-       (unspec:SVE_F
-         [(match_dup 4)
-          (fma:SVE_F (match_operand:SVE_F 1 "register_operand")
-                     (match_operand:SVE_F 2 "register_operand")
-                     (match_operand:SVE_F 3 "register_operand"))]
-         UNSPEC_MERGE_PTRUE))]
+;; Set element I of the result if operand1 + J < operand2 for all J in [0, I],
+;; with the comparison being unsigned.
+(define_insn "while_ult<GPI:mode><PRED_ALL:mode>"
+  [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
+       (unspec:PRED_ALL [(match_operand:GPI 1 "aarch64_reg_or_zero" "rZ")
+                         (match_operand:GPI 2 "aarch64_reg_or_zero" "rZ")]
+                        UNSPEC_WHILE_LO))
+   (clobber (reg:CC_NZC CC_REGNUM))]
    "TARGET_SVE"
-  {
-    operands[4] = aarch64_ptrue_reg (<VPRED>mode);
-  }
+  "whilelo\t%0.<PRED_ALL:Vetype>, %<w>1, %<w>2"
  )
  
-;; fma predicated with a PTRUE.
-(define_insn "*fma<mode>4"
-  [(set (match_operand:SVE_F 0 "register_operand" "=w, w, ?&w")
-       (unspec:SVE_F
-         [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
-          (fma:SVE_F (match_operand:SVE_F 3 "register_operand" "%0, w, w")
-                     (match_operand:SVE_F 4 "register_operand" "w, w, w")
-                     (match_operand:SVE_F 2 "register_operand" "w, 0, w"))]
-         UNSPEC_MERGE_PTRUE))]
+;; WHILELO sets the flags in the same way as a PTEST with a PTRUE GP.
+;; Handle the case in which both results are useful.  The GP operand
+;; to the PTEST isn't needed, so we allow it to be anything.
+(define_insn_and_rewrite "*while_ult<GPI:mode><PRED_ALL:mode>_cc"
+  [(set (reg:CC_NZC CC_REGNUM)
+       (unspec:CC_NZC
+         [(match_operand:PRED_ALL 1)
+          (unspec:PRED_ALL
+            [(match_operand:GPI 2 "aarch64_reg_or_zero" "rZ")
+             (match_operand:GPI 3 "aarch64_reg_or_zero" "rZ")]
+            UNSPEC_WHILE_LO)]
+         UNSPEC_PTEST_PTRUE))
+   (set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
+       (unspec:PRED_ALL [(match_dup 2)
+                         (match_dup 3)]
+                        UNSPEC_WHILE_LO))]
    "TARGET_SVE"
-  "@
-   fmad\t%0.<Vetype>, %1/m, %4.<Vetype>, %2.<Vetype>
-   fmla\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
-   movprfx\t%0, %2\;fmla\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>"
-  [(set_attr "movprfx" "*,*,yes")]
+  "whilelo\t%0.<PRED_ALL:Vetype>, %<w>2, %<w>3"
+  ;; Force the compiler to drop the unused predicate operand, so that we
+  ;; don't have an unnecessary PTRUE.
+  "&& !CONSTANT_P (operands[1])"
+  {
+    operands[1] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
+  }
  )
  
-;; Unpredicated fnma (%0 = (-%1 * %2) + %3).
-(define_expand "fnma<mode>4"
-  [(set (match_operand:SVE_F 0 "register_operand")
-       (unspec:SVE_F
-         [(match_dup 4)
-          (fma:SVE_F (neg:SVE_F
-                       (match_operand:SVE_F 1 "register_operand"))
-                     (match_operand:SVE_F 2 "register_operand")
-                     (match_operand:SVE_F 3 "register_operand"))]
-         UNSPEC_MERGE_PTRUE))]
+;; -------------------------------------------------------------------------
+;; ---- [FP] Comparisons
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - FCMEQ
+;; - FCMGE
+;; - FCMGT
+;; - FCMLE
+;; - FCMLT
+;; - FCMNE
+;; - FCMUO
+;; -------------------------------------------------------------------------
+
+;; Floating-point comparisons.  All comparisons except FCMUO allow a zero
+;; operand; aarch64_expand_sve_vec_cmp_float handles the case of an FCMUO
+;; with zero.
+(define_expand "vec_cmp<mode><vpred>"
+  [(set (match_operand:<VPRED> 0 "register_operand")
+       (match_operator:<VPRED> 1 "comparison_operator"
+         [(match_operand:SVE_F 2 "register_operand")
+          (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero")]))]
    "TARGET_SVE"
    {
-    operands[4] = aarch64_ptrue_reg (<VPRED>mode);
+    aarch64_expand_sve_vec_cmp_float (operands[0], GET_CODE (operands[1]),
+                                     operands[2], operands[3], false);
+    DONE;
    }
  )
  
-;; fnma predicated with a PTRUE.
-(define_insn "*fnma<mode>4"
-  [(set (match_operand:SVE_F 0 "register_operand" "=w, w, ?&w")
-       (unspec:SVE_F
-         [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
-          (fma:SVE_F (neg:SVE_F
-                       (match_operand:SVE_F 3 "register_operand" "%0, w, w"))
-                     (match_operand:SVE_F 4 "register_operand" "w, w, w")
-                     (match_operand:SVE_F 2 "register_operand" "w, 0, w"))]
+;; Floating-point comparisons predicated with a PTRUE.
+(define_insn "*fcm<cmp_op><mode>"
+  [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
+       (unspec:<VPRED>
+         [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+          (SVE_FP_CMP:<VPRED>
+            (match_operand:SVE_F 2 "register_operand" "w, w")
+            (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero" "Dz, w"))]
           UNSPEC_MERGE_PTRUE))]
    "TARGET_SVE"
    "@
-   fmsb\t%0.<Vetype>, %1/m, %4.<Vetype>, %2.<Vetype>
-   fmls\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
-   movprfx\t%0, %2\;fmls\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>"
-  [(set_attr "movprfx" "*,*,yes")]
+   fcm<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #0.0
+   fcm<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
  )
  
-;; Unpredicated fms (%0 = (%1 * %2) - %3).
-(define_expand "fms<mode>4"
-  [(set (match_operand:SVE_F 0 "register_operand")
-       (unspec:SVE_F
-         [(match_dup 4)
-          (fma:SVE_F (match_operand:SVE_F 1 "register_operand")
-                     (match_operand:SVE_F 2 "register_operand")
-                     (neg:SVE_F
-                       (match_operand:SVE_F 3 "register_operand")))]
+;; Same for unordered comparisons.
+(define_insn "*fcmuo<mode>"
+  [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
+       (unspec:<VPRED>
+         [(match_operand:<VPRED> 1 "register_operand" "Upl")
+          (unordered:<VPRED>
+            (match_operand:SVE_F 2 "register_operand" "w")
+            (match_operand:SVE_F 3 "register_operand" "w"))]
           UNSPEC_MERGE_PTRUE))]
    "TARGET_SVE"
-  {
-    operands[4] = aarch64_ptrue_reg (<VPRED>mode);
-  }
+  "fcmuo\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
  )
  
-;; fms predicated with a PTRUE.
-(define_insn "*fms<mode>4"
-  [(set (match_operand:SVE_F 0 "register_operand" "=w, w, ?&w")
-       (unspec:SVE_F
-         [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
-          (fma:SVE_F (match_operand:SVE_F 3 "register_operand" "%0, w, w")
-                     (match_operand:SVE_F 4 "register_operand" "w, w, w")
-                     (neg:SVE_F
-                       (match_operand:SVE_F 2 "register_operand" "w, 0, w")))]
-         UNSPEC_MERGE_PTRUE))]
+;; Floating-point comparisons predicated on a PTRUE, with the results ANDed
+;; with another predicate P.  This does not have the same trapping behavior
+;; as predicating the comparison itself on P, but it's a legitimate fold,
+;; since we can drop any potentially-trapping operations whose results
+;; are not needed.
+;;
+;; Split the instruction into its preferred form (below) at the earliest
+;; opportunity, in order to get rid of the redundant operand 1.
+(define_insn_and_split "*fcm<cmp_op><mode>_and_combine"
+  [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
+       (and:<VPRED>
+         (unspec:<VPRED>
+           [(match_operand:<VPRED> 1)
+            (SVE_FP_CMP
+              (match_operand:SVE_F 2 "register_operand" "w, w")
+              (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero" "Dz, w"))]
+           UNSPEC_MERGE_PTRUE)
+         (match_operand:<VPRED> 4 "register_operand" "Upl, Upl")))]
    "TARGET_SVE"
-  "@
-   fnmsb\t%0.<Vetype>, %1/m, %4.<Vetype>, %2.<Vetype>
-   fnmls\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
-   movprfx\t%0, %2\;fnmls\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>"
-  [(set_attr "movprfx" "*,*,yes")]
+  "#"
+  "&& 1"
+  [(set (match_dup 0)
+       (and:<VPRED>
+         (SVE_FP_CMP:<VPRED>
+           (match_dup 2)
+           (match_dup 3))
+         (match_dup 4)))]
  )
  
-;; Unpredicated fnms (%0 = (-%1 * %2) - %3).
-(define_expand "fnms<mode>4"
-  [(set (match_operand:SVE_F 0 "register_operand")
-       (unspec:SVE_F
-         [(match_dup 4)
-          (fma:SVE_F (neg:SVE_F
-                       (match_operand:SVE_F 1 "register_operand"))
-                     (match_operand:SVE_F 2 "register_operand")
-                     (neg:SVE_F
-                       (match_operand:SVE_F 3 "register_operand")))]
-         UNSPEC_MERGE_PTRUE))]
+;; Same for unordered comparisons.
+(define_insn_and_split "*fcmuo<mode>_and_combine"
+  [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
+       (and:<VPRED>
+         (unspec:<VPRED>
+           [(match_operand:<VPRED> 1)
+            (unordered
+              (match_operand:SVE_F 2 "register_operand" "w")
+              (match_operand:SVE_F 3 "register_operand" "w"))]
+           UNSPEC_MERGE_PTRUE)
+         (match_operand:<VPRED> 4 "register_operand" "Upl")))]
    "TARGET_SVE"
-  {
-    operands[4] = aarch64_ptrue_reg (<VPRED>mode);
-  }
+  "#"
+  "&& 1"
+  [(set (match_dup 0)
+       (and:<VPRED>
+         (unordered:<VPRED>
+           (match_dup 2)
+           (match_dup 3))
+         (match_dup 4)))]
  )
  
-;; fnms predicated with a PTRUE.
-(define_insn "*fnms<mode>4"
-  [(set (match_operand:SVE_F 0 "register_operand" "=w, w, ?&w")
-       (unspec:SVE_F
-         [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
-          (fma:SVE_F (neg:SVE_F
-                       (match_operand:SVE_F 3 "register_operand" "%0, w, w"))
-                     (match_operand:SVE_F 4 "register_operand" "w, w, w")
-                     (neg:SVE_F
-                       (match_operand:SVE_F 2 "register_operand" "w, 0, w")))]
-         UNSPEC_MERGE_PTRUE))]
+;; Unpredicated floating-point comparisons, with the results ANDed with
+;; another predicate.  This is a valid fold for the same reasons as above.
+(define_insn "*fcm<cmp_op><mode>_and"
+  [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
+       (and:<VPRED>
+         (SVE_FP_CMP:<VPRED>
+           (match_operand:SVE_F 2 "register_operand" "w, w")
+           (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero" "Dz, w"))
+         (match_operand:<VPRED> 1 "register_operand" "Upl, Upl")))]
    "TARGET_SVE"
    "@
-   fnmad\t%0.<Vetype>, %1/m, %4.<Vetype>, %2.<Vetype>
-   fnmla\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
-   movprfx\t%0, %2\;fnmla\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>"
-  [(set_attr "movprfx" "*,*,yes")]
+   fcm<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #0.0
+   fcm<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
  )
  
-;; Unpredicated floating-point division.
-(define_expand "div<mode>3"
-  [(set (match_operand:SVE_F 0 "register_operand")
-       (unspec:SVE_F
-         [(match_dup 3)
-          (div:SVE_F (match_operand:SVE_F 1 "register_operand")
-                     (match_operand:SVE_F 2 "register_operand"))]
-         UNSPEC_MERGE_PTRUE))]
+;; Same for unordered comparisons.
+(define_insn "*fcmuo<mode>_and"
+  [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
+       (and:<VPRED>
+         (unordered:<VPRED>
+           (match_operand:SVE_F 2 "register_operand" "w")
+           (match_operand:SVE_F 3 "register_operand" "w"))
+         (match_operand:<VPRED> 1 "register_operand" "Upl")))]
    "TARGET_SVE"
-  {
-    operands[3] = aarch64_ptrue_reg (<VPRED>mode);
-  }
+  "fcmuo\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
  )
  
-;; Floating-point division predicated with a PTRUE.
-(define_insn "*div<mode>3"
-  [(set (match_operand:SVE_F 0 "register_operand" "=w, w, ?&w")
-       (unspec:SVE_F
-         [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
-          (div:SVE_F (match_operand:SVE_F 2 "register_operand" "0, w, w")
-                     (match_operand:SVE_F 3 "register_operand" "w, 0, w"))]
-         UNSPEC_MERGE_PTRUE))]
+;; Predicated floating-point comparisons.  We don't need a version
+;; of this for unordered comparisons.
+(define_insn "*pred_fcm<cmp_op><mode>"
+  [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
+       (unspec:<VPRED>
+         [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+          (match_operand:SVE_F 2 "register_operand" "w, w")
+          (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero" "Dz, w")]
+         SVE_COND_FP_CMP))]
    "TARGET_SVE"
    "@
-   fdiv\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
-   fdivr\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
-   movprfx\t%0, %2\;fdiv\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
-  [(set_attr "movprfx" "*,*,yes")]
+   fcm<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #0.0
+   fcm<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
  )
  
-;; Unpredicated FNEG, FABS and FSQRT.
-(define_expand "<optab><mode>2"
-  [(set (match_operand:SVE_F 0 "register_operand")
-       (unspec:SVE_F
-         [(match_dup 2)
-          (SVE_FP_UNARY:SVE_F (match_operand:SVE_F 1 "register_operand"))]
-         UNSPEC_MERGE_PTRUE))]
-  "TARGET_SVE"
+;; -------------------------------------------------------------------------
+;; ---- [PRED] Test bits
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - PTEST
+;; -------------------------------------------------------------------------
+
+;; Branch based on predicate equality or inequality.
+(define_expand "cbranch<mode>4"
+  [(set (pc)
+       (if_then_else
+         (match_operator 0 "aarch64_equality_operator"
+           [(match_operand:PRED_ALL 1 "register_operand")
+            (match_operand:PRED_ALL 2 "aarch64_simd_reg_or_zero")])
+         (label_ref (match_operand 3 ""))
+         (pc)))]
+  ""
    {
-    operands[2] = aarch64_ptrue_reg (<VPRED>mode);
+    rtx ptrue = aarch64_ptrue_reg (<MODE>mode);
+    rtx pred;
+    if (operands[2] == CONST0_RTX (<MODE>mode))
+      pred = operands[1];
+    else
+      {
+       pred = gen_reg_rtx (<MODE>mode);
+       emit_insn (gen_pred_xor<mode>3 (pred, ptrue, operands[1],
+                                       operands[2]));
+      }
+    emit_insn (gen_ptest_ptrue<mode> (ptrue, pred));
+    operands[1] = gen_rtx_REG (CC_NZCmode, CC_REGNUM);
+    operands[2] = const0_rtx;
    }
  )
  
-;; FNEG, FABS and FSQRT predicated with a PTRUE.
-(define_insn "*<optab><mode>2"
-  [(set (match_operand:SVE_F 0 "register_operand" "=w")
-       (unspec:SVE_F
-         [(match_operand:<VPRED> 1 "register_operand" "Upl")
-          (SVE_FP_UNARY:SVE_F (match_operand:SVE_F 2 "register_operand" "w"))]
-         UNSPEC_MERGE_PTRUE))]
+;; Test all bits of operand 1.  Operand 0 is a GP that is known to hold PTRUE.
+;;
+;; Using UNSPEC_PTEST_PTRUE allows combine patterns to assume that the GP
+;; is a PTRUE even if the optimizers haven't yet been able to propagate
+;; the constant.  We would use a separate unspec code for PTESTs involving
+;; GPs that might not be PTRUEs.
+(define_insn "ptest_ptrue<mode>"
+  [(set (reg:CC_NZC CC_REGNUM)
+       (unspec:CC_NZC
+         [(match_operand:PRED_ALL 0 "register_operand" "Upa")
+          (match_operand:PRED_ALL 1 "register_operand" "Upa")]
+         UNSPEC_PTEST_PTRUE))]
    "TARGET_SVE"
-  "<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
+  "ptest\t%0, %1.b"
  )
  
-(define_insn "*fabd<mode>3"
-  [(set (match_operand:SVE_F 0 "register_operand" "=w")
-       (unspec:SVE_F
-         [(match_operand:<VPRED> 1 "register_operand" "Upl")
-          (abs:SVE_F
-           (minus:SVE_F
-               (match_operand:SVE_F 2 "register_operand" "0")
-               (match_operand:SVE_F 3 "register_operand" "w")))]
-       UNSPEC_MERGE_PTRUE))]
+;; =========================================================================
+;; == Reductions
+;; =========================================================================
+
+;; -------------------------------------------------------------------------
+;; ---- [INT,FP] Conditional reductions
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - CLASTB
+;; -------------------------------------------------------------------------
+
+;; Set operand 0 to the last active element in operand 3, or to tied
+;; operand 1 if no elements are active.
+(define_insn "fold_extract_last_<mode>"
+  [(set (match_operand:<VEL> 0 "register_operand" "=r, w")
+       (unspec:<VEL>
+         [(match_operand:<VEL> 1 "register_operand" "0, 0")
+          (match_operand:<VPRED> 2 "register_operand" "Upl, Upl")
+          (match_operand:SVE_ALL 3 "register_operand" "w, w")]
+         UNSPEC_CLASTB))]
    "TARGET_SVE"
-  "fabd\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
+  "@
+   clastb\t%<vwcore>0, %2, %<vwcore>0, %3.<Vetype>
+   clastb\t%<vw>0, %2, %<vw>0, %3.<Vetype>"
  )
  
-;; Unpredicated FRINTy.
-(define_expand "<frint_pattern><mode>2"
-  [(set (match_operand:SVE_F 0 "register_operand")
-       (unspec:SVE_F
-         [(match_dup 2)
-          (unspec:SVE_F [(match_operand:SVE_F 1 "register_operand")]
-                        FRINT)]
-         UNSPEC_MERGE_PTRUE))]
+;; -------------------------------------------------------------------------
+;; ---- [INT] Tree reductions
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - ANDV
+;; - EORV
+;; - ORV
+;; - SMAXV
+;; - SMINV
+;; - UADDV
+;; - UMAXV
+;; - UMINV
+;; -------------------------------------------------------------------------
+
+;; Unpredicated integer add reduction.
+(define_expand "reduc_plus_scal_<mode>"
+  [(set (match_operand:<VEL> 0 "register_operand")
+       (unspec:<VEL> [(match_dup 2)
+                      (match_operand:SVE_I 1 "register_operand")]
+                     UNSPEC_ADDV))]
    "TARGET_SVE"
    {
      operands[2] = aarch64_ptrue_reg (<VPRED>mode);
    }
  )
  
-;; FRINTy predicated with a PTRUE.
-(define_insn "*<frint_pattern><mode>2"
-  [(set (match_operand:SVE_F 0 "register_operand" "=w")
-       (unspec:SVE_F
-         [(match_operand:<VPRED> 1 "register_operand" "Upl")
-          (unspec:SVE_F [(match_operand:SVE_F 2 "register_operand" "w")]
-                        FRINT)]
-         UNSPEC_MERGE_PTRUE))]
+;; Predicated integer add reduction.  The result is always 64-bits.
+(define_insn "*reduc_plus_scal_<mode>"
+  [(set (match_operand:<VEL> 0 "register_operand" "=w")
+       (unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl")
+                      (match_operand:SVE_I 2 "register_operand" "w")]
+                     UNSPEC_ADDV))]
    "TARGET_SVE"
-  "frint<frint_suffix>\t%0.<Vetype>, %1/m, %2.<Vetype>"
+  "uaddv\t%d0, %1, %2.<Vetype>"
  )
  
-;; Unpredicated conversion of floats to integers of the same size (HF to HI,
-;; SF to SI or DF to DI).
-(define_expand "<fix_trunc_optab><mode><v_int_equiv>2"
-  [(set (match_operand:<V_INT_EQUIV> 0 "register_operand")
-       (unspec:<V_INT_EQUIV>
-         [(match_dup 2)
-          (FIXUORS:<V_INT_EQUIV>
-            (match_operand:SVE_F 1 "register_operand"))]
-         UNSPEC_MERGE_PTRUE))]
+;; Unpredicated integer MAX/MIN reduction.
+(define_expand "reduc_<maxmin_uns>_scal_<mode>"
+  [(set (match_operand:<VEL> 0 "register_operand")
+       (unspec:<VEL> [(match_dup 2)
+                      (match_operand:SVE_I 1 "register_operand")]
+                     MAXMINV))]
    "TARGET_SVE"
    {
      operands[2] = aarch64_ptrue_reg (<VPRED>mode);
    }
  )
  
-;; Conversion of SF to DI, SI or HI, predicated with a PTRUE.
-(define_insn "*<fix_trunc_optab>v16hsf<mode>2"
-  [(set (match_operand:SVE_HSDI 0 "register_operand" "=w")
-       (unspec:SVE_HSDI
-         [(match_operand:<VPRED> 1 "register_operand" "Upl")
-          (FIXUORS:SVE_HSDI
-            (match_operand:VNx8HF 2 "register_operand" "w"))]
-         UNSPEC_MERGE_PTRUE))]
+;; Predicated integer MAX/MIN reduction.
+(define_insn "*reduc_<maxmin_uns>_scal_<mode>"
+  [(set (match_operand:<VEL> 0 "register_operand" "=w")
+       (unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl")
+                      (match_operand:SVE_I 2 "register_operand" "w")]
+                     MAXMINV))]
    "TARGET_SVE"
-  "fcvtz<su>\t%0.<Vetype>, %1/m, %2.h"
+  "<maxmin_uns_op>v\t%<Vetype>0, %1, %2.<Vetype>"
  )
  
-;; Conversion of SF to DI or SI, predicated with a PTRUE.
-(define_insn "*<fix_trunc_optab>vnx4sf<mode>2"
-  [(set (match_operand:SVE_SDI 0 "register_operand" "=w")
-       (unspec:SVE_SDI
-         [(match_operand:<VPRED> 1 "register_operand" "Upl")
-          (FIXUORS:SVE_SDI
-            (match_operand:VNx4SF 2 "register_operand" "w"))]
-         UNSPEC_MERGE_PTRUE))]
+(define_expand "reduc_<optab>_scal_<mode>"
+  [(set (match_operand:<VEL> 0 "register_operand")
+       (unspec:<VEL> [(match_dup 2)
+                      (match_operand:SVE_I 1 "register_operand")]
+                     BITWISEV))]
    "TARGET_SVE"
-  "fcvtz<su>\t%0.<Vetype>, %1/m, %2.s"
+  {
+    operands[2] = aarch64_ptrue_reg (<VPRED>mode);
+  }
  )
  
-;; Conversion of DF to DI or SI, predicated with a PTRUE.
-(define_insn "*<fix_trunc_optab>vnx2df<mode>2"
-  [(set (match_operand:SVE_SDI 0 "register_operand" "=w")
-       (unspec:SVE_SDI
-         [(match_operand:VNx2BI 1 "register_operand" "Upl")
-          (FIXUORS:SVE_SDI
-            (match_operand:VNx2DF 2 "register_operand" "w"))]
-         UNSPEC_MERGE_PTRUE))]
+(define_insn "*reduc_<optab>_scal_<mode>"
+  [(set (match_operand:<VEL> 0 "register_operand" "=w")
+       (unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl")
+                      (match_operand:SVE_I 2 "register_operand" "w")]
+                     BITWISEV))]
    "TARGET_SVE"
-  "fcvtz<su>\t%0.<Vetype>, %1/m, %2.d"
+  "<bit_reduc_op>\t%<Vetype>0, %1, %2.<Vetype>"
  )
  
-;; Unpredicated conversion of integers to floats of the same size
-;; (HI to HF, SI to SF or DI to DF).
-(define_expand "<optab><v_int_equiv><mode>2"
-  [(set (match_operand:SVE_F 0 "register_operand")
-       (unspec:SVE_F
-         [(match_dup 2)
-          (FLOATUORS:SVE_F
-            (match_operand:<V_INT_EQUIV> 1 "register_operand"))]
-         UNSPEC_MERGE_PTRUE))]
+;; -------------------------------------------------------------------------
+;; ---- [FP] Tree reductions
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - FADDV
+;; - FMAXNMV
+;; - FMAXV
+;; - FMINNMV
+;; - FMINV
+;; -------------------------------------------------------------------------
+
+;; Unpredicated floating-point add reduction.
+(define_expand "reduc_plus_scal_<mode>"
+  [(set (match_operand:<VEL> 0 "register_operand")
+       (unspec:<VEL> [(match_dup 2)
+                      (match_operand:SVE_F 1 "register_operand")]
+                     UNSPEC_FADDV))]
    "TARGET_SVE"
    {
      operands[2] = aarch64_ptrue_reg (<VPRED>mode);
    }
  )
  
-;; Conversion of DI, SI or HI to the same number of HFs, predicated
-;; with a PTRUE.
-(define_insn "*<optab><mode>vnx8hf2"
-  [(set (match_operand:VNx8HF 0 "register_operand" "=w")
-       (unspec:VNx8HF
-         [(match_operand:<VPRED> 1 "register_operand" "Upl")
-          (FLOATUORS:VNx8HF
-            (match_operand:SVE_HSDI 2 "register_operand" "w"))]
-         UNSPEC_MERGE_PTRUE))]
+;; Predicated floating-point add reduction.
+(define_insn "*reduc_plus_scal_<mode>"
+  [(set (match_operand:<VEL> 0 "register_operand" "=w")
+       (unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl")
+                      (match_operand:SVE_F 2 "register_operand" "w")]
+                     UNSPEC_FADDV))]
+  "TARGET_SVE"
+  "faddv\t%<Vetype>0, %1, %2.<Vetype>"
+)
+
+;; Unpredicated floating-point MAX/MIN reduction.
+(define_expand "reduc_<maxmin_uns>_scal_<mode>"
+  [(set (match_operand:<VEL> 0 "register_operand")
+       (unspec:<VEL> [(match_dup 2)
+                      (match_operand:SVE_F 1 "register_operand")]
+                     FMAXMINV))]
    "TARGET_SVE"
-  "<su_optab>cvtf\t%0.h, %1/m, %2.<Vetype>"
+  {
+    operands[2] = aarch64_ptrue_reg (<VPRED>mode);
+  }
  )
  
-;; Conversion of DI or SI to the same number of SFs, predicated with a PTRUE.
-(define_insn "*<optab><mode>vnx4sf2"
-  [(set (match_operand:VNx4SF 0 "register_operand" "=w")
-       (unspec:VNx4SF
-         [(match_operand:<VPRED> 1 "register_operand" "Upl")
-          (FLOATUORS:VNx4SF
-            (match_operand:SVE_SDI 2 "register_operand" "w"))]
-         UNSPEC_MERGE_PTRUE))]
+;; Predicated floating-point MAX/MIN reduction.
+(define_insn "*reduc_<maxmin_uns>_scal_<mode>"
+  [(set (match_operand:<VEL> 0 "register_operand" "=w")
+       (unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl")
+                      (match_operand:SVE_F 2 "register_operand" "w")]
+                     FMAXMINV))]
    "TARGET_SVE"
-  "<su_optab>cvtf\t%0.s, %1/m, %2.<Vetype>"
+  "<maxmin_uns_op>v\t%<Vetype>0, %1, %2.<Vetype>"
  )
  
-;; Conversion of DI or SI to DF, predicated with a PTRUE.
-(define_insn "aarch64_sve_<optab><mode>vnx2df2"
-  [(set (match_operand:VNx2DF 0 "register_operand" "=w")
-       (unspec:VNx2DF
-         [(match_operand:VNx2BI 1 "register_operand" "Upl")
-          (FLOATUORS:VNx2DF
-            (match_operand:SVE_SDI 2 "register_operand" "w"))]
-         UNSPEC_MERGE_PTRUE))]
+;; -------------------------------------------------------------------------
+;; ---- [FP] Left-to-right reductions
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - FADDA
+;; -------------------------------------------------------------------------
+
+;; Unpredicated in-order FP reductions.
+(define_expand "fold_left_plus_<mode>"
+  [(set (match_operand:<VEL> 0 "register_operand")
+       (unspec:<VEL> [(match_dup 3)
+                      (match_operand:<VEL> 1 "register_operand")
+                      (match_operand:SVE_F 2 "register_operand")]
+                     UNSPEC_FADDA))]
    "TARGET_SVE"
-  "<su_optab>cvtf\t%0.d, %1/m, %2.<Vetype>"
+  {
+    operands[3] = aarch64_ptrue_reg (<VPRED>mode);
+  }
  )
  
-;; Conversion of DFs to the same number of SFs, or SFs to the same number
-;; of HFs.
-(define_insn "*trunc<Vwide><mode>2"
-  [(set (match_operand:SVE_HSF 0 "register_operand" "=w")
-       (unspec:SVE_HSF
-         [(match_operand:<VWIDE_PRED> 1 "register_operand" "Upl")
-          (unspec:SVE_HSF
-            [(match_operand:<VWIDE> 2 "register_operand" "w")]
-            UNSPEC_FLOAT_CONVERT)]
-         UNSPEC_MERGE_PTRUE))]
+;; Predicated in-order FP reductions.
+(define_insn "mask_fold_left_plus_<mode>"
+  [(set (match_operand:<VEL> 0 "register_operand" "=w")
+       (unspec:<VEL> [(match_operand:<VPRED> 3 "register_operand" "Upl")
+                      (match_operand:<VEL> 1 "register_operand" "0")
+                      (match_operand:SVE_F 2 "register_operand" "w")]
+                     UNSPEC_FADDA))]
    "TARGET_SVE"
-  "fcvt\t%0.<Vetype>, %1/m, %2.<Vewtype>"
+  "fadda\t%<Vetype>0, %3, %<Vetype>0, %2.<Vetype>"
  )
  
-;; Conversion of SFs to the same number of DFs, or HFs to the same number
-;; of SFs.
-(define_insn "aarch64_sve_extend<mode><Vwide>2"
-  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
-       (unspec:<VWIDE>
-         [(match_operand:<VWIDE_PRED> 1 "register_operand" "Upl")
-          (unspec:<VWIDE>
-            [(match_operand:SVE_HSF 2 "register_operand" "w")]
-            UNSPEC_FLOAT_CONVERT)]
-         UNSPEC_MERGE_PTRUE))]
+;; Predicated form of the above in-order reduction.
+(define_insn "*pred_fold_left_plus_<mode>"
+  [(set (match_operand:<VEL> 0 "register_operand" "=w")
+       (unspec:<VEL>
+         [(match_operand:<VEL> 1 "register_operand" "0")
+          (unspec:SVE_F
+            [(match_operand:<VPRED> 2 "register_operand" "Upl")
+             (match_operand:SVE_F 3 "register_operand" "w")
+             (match_operand:SVE_F 4 "aarch64_simd_imm_zero")]
+            UNSPEC_SEL)]
+         UNSPEC_FADDA))]
    "TARGET_SVE"
-  "fcvt\t%0.<Vewtype>, %1/m, %2.<Vetype>"
+  "fadda\t%<Vetype>0, %2, %<Vetype>0, %3.<Vetype>"
  )
  
-;; Unpack the low or high half of a predicate, where "high" refers to
-;; the low-numbered lanes for big-endian and the high-numbered lanes
-;; for little-endian.
-(define_expand "vec_unpack<su>_<perm_hilo>_<mode>"
-  [(match_operand:<VWIDE> 0 "register_operand")
-   (unspec:<VWIDE> [(match_operand:PRED_BHS 1 "register_operand")]
-                  UNPACK)]
-  "TARGET_SVE"
+;; =========================================================================
+;; == Permutes
+;; =========================================================================
+
+;; -------------------------------------------------------------------------
+;; ---- [INT,FP] General permutes
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - TBL
+;; -------------------------------------------------------------------------
+
+(define_expand "vec_perm<mode>"
+  [(match_operand:SVE_ALL 0 "register_operand")
+   (match_operand:SVE_ALL 1 "register_operand")
+   (match_operand:SVE_ALL 2 "register_operand")
+   (match_operand:<V_INT_EQUIV> 3 "aarch64_sve_vec_perm_operand")]
+  "TARGET_SVE && GET_MODE_NUNITS (<MODE>mode).is_constant ()"
    {
-    emit_insn ((<hi_lanes_optab>
-               ? gen_aarch64_sve_punpkhi_<PRED_BHS:mode>
-               : gen_aarch64_sve_punpklo_<PRED_BHS:mode>)
-              (operands[0], operands[1]));
+    aarch64_expand_sve_vec_perm (operands[0], operands[1],
+                                operands[2], operands[3]);
      DONE;
    }
  )
  
-;; PUNPKHI and PUNPKLO.
-(define_insn "aarch64_sve_punpk<perm_hilo>_<mode>"
-  [(set (match_operand:<VWIDE> 0 "register_operand" "=Upa")
-       (unspec:<VWIDE> [(match_operand:PRED_BHS 1 "register_operand" "Upa")]
-                       UNPACK_UNSIGNED))]
+(define_insn "*aarch64_sve_tbl<mode>"
+  [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
+       (unspec:SVE_ALL
+         [(match_operand:SVE_ALL 1 "register_operand" "w")
+          (match_operand:<V_INT_EQUIV> 2 "register_operand" "w")]
+         UNSPEC_TBL))]
    "TARGET_SVE"
-  "punpk<perm_hilo>\t%0.h, %1.b"
+  "tbl\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
  )
  
-;; Unpack the low or high half of a vector, where "high" refers to
-;; the low-numbered lanes for big-endian and the high-numbered lanes
-;; for little-endian.
-(define_expand "vec_unpack<su>_<perm_hilo>_<SVE_BHSI:mode>"
-  [(match_operand:<VWIDE> 0 "register_operand")
-   (unspec:<VWIDE> [(match_operand:SVE_BHSI 1 "register_operand")] UNPACK)]
+;; -------------------------------------------------------------------------
+;; ---- [INT,FP] Special-purpose unary permutes
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - DUP
+;; - REV
+;; - REVB
+;; - REVH
+;; - REVW
+;; -------------------------------------------------------------------------
+
+;; Duplicate one element of a vector.
+(define_insn "*aarch64_sve_dup_lane<mode>"
+  [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
+       (vec_duplicate:SVE_ALL
+         (vec_select:<VEL>
+           (match_operand:SVE_ALL 1 "register_operand" "w")
+           (parallel [(match_operand:SI 2 "const_int_operand")]))))]
+  "TARGET_SVE
+   && IN_RANGE (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode), 0, 63)"
+  "dup\t%0.<Vetype>, %1.<Vetype>[%2]"
+)
+
+;; Reverse the order of elements within a full vector.
+(define_insn "@aarch64_sve_rev<mode>"
+  [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
+       (unspec:SVE_ALL [(match_operand:SVE_ALL 1 "register_operand" "w")]
+                       UNSPEC_REV))]
    "TARGET_SVE"
-  {
-    emit_insn ((<hi_lanes_optab>
-               ? gen_aarch64_sve_<su>unpkhi_<SVE_BHSI:mode>
-               : gen_aarch64_sve_<su>unpklo_<SVE_BHSI:mode>)
-              (operands[0], operands[1]));
-    DONE;
-  }
+  "rev\t%0.<Vetype>, %1.<Vetype>")
+
+;; Reverse the order elements within a 64-bit container.
+(define_insn "*aarch64_sve_rev64<mode>"
+  [(set (match_operand:SVE_BHS 0 "register_operand" "=w")
+       (unspec:SVE_BHS
+         [(match_operand:VNx2BI 1 "register_operand" "Upl")
+          (unspec:SVE_BHS [(match_operand:SVE_BHS 2 "register_operand" "w")]
+                          UNSPEC_REV64)]
+         UNSPEC_MERGE_PTRUE))]
+  "TARGET_SVE"
+  "rev<Vesize>\t%0.d, %1/m, %2.d"
  )
  
-;; SUNPKHI, UUNPKHI, SUNPKLO and UUNPKLO.
-(define_insn "aarch64_sve_<su>unpk<perm_hilo>_<SVE_BHSI:mode>"
-  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
-       (unspec:<VWIDE> [(match_operand:SVE_BHSI 1 "register_operand" "w")]
-                       UNPACK))]
+;; Reverse the order elements within a 32-bit container.
+(define_insn "*aarch64_sve_rev32<mode>"
+  [(set (match_operand:SVE_BH 0 "register_operand" "=w")
+       (unspec:SVE_BH
+         [(match_operand:VNx4BI 1 "register_operand" "Upl")
+          (unspec:SVE_BH [(match_operand:SVE_BH 2 "register_operand" "w")]
+                         UNSPEC_REV32)]
+         UNSPEC_MERGE_PTRUE))]
    "TARGET_SVE"
-  "<su>unpk<perm_hilo>\t%0.<Vewtype>, %1.<Vetype>"
+  "rev<Vesize>\t%0.s, %1/m, %2.s"
  )
  
-;; Unpack one half of a VNx4SF to VNx2DF, or one half of a VNx8HF to VNx4SF.
-;; First unpack the source without conversion, then float-convert the
-;; unpacked source.
-(define_expand "vec_unpacks_<perm_hilo>_<mode>"
-  [(match_operand:<VWIDE> 0 "register_operand")
-   (unspec:SVE_HSF [(match_operand:SVE_HSF 1 "register_operand")]
-                  UNPACK_UNSIGNED)]
+;; Reverse the order elements within a 16-bit container.
+(define_insn "*aarch64_sve_rev16vnx16qi"
+  [(set (match_operand:VNx16QI 0 "register_operand" "=w")
+       (unspec:VNx16QI
+         [(match_operand:VNx8BI 1 "register_operand" "Upl")
+          (unspec:VNx16QI [(match_operand:VNx16QI 2 "register_operand" "w")]
+                          UNSPEC_REV16)]
+         UNSPEC_MERGE_PTRUE))]
    "TARGET_SVE"
-  {
-    /* Use ZIP to do the unpack, since we don't care about the upper halves
-       and since it has the nice property of not needing any subregs.
-       If using UUNPK* turns out to be preferable, we could model it as
-       a ZIP whose first operand is zero.  */
-    rtx temp = gen_reg_rtx (<MODE>mode);
-    emit_insn ((<hi_lanes_optab>
-               ? gen_aarch64_sve_zip2<mode>
-               : gen_aarch64_sve_zip1<mode>)
-               (temp, operands[1], operands[1]));
-    rtx ptrue = aarch64_ptrue_reg (<VWIDE_PRED>mode);
-    emit_insn (gen_aarch64_sve_extend<mode><Vwide>2 (operands[0],
-                                                    ptrue, temp));
-    DONE;
-  }
+  "revb\t%0.h, %1/m, %2.h"
  )
  
-;; Unpack one half of a VNx4SI to VNx2DF.  First unpack from VNx4SI
-;; to VNx2DI, reinterpret the VNx2DI as a VNx4SI, then convert the
-;; unpacked VNx4SI to VNx2DF.
-(define_expand "vec_unpack<su_optab>_float_<perm_hilo>_vnx4si"
-  [(match_operand:VNx2DF 0 "register_operand")
-   (FLOATUORS:VNx2DF
-     (unspec:VNx2DI [(match_operand:VNx4SI 1 "register_operand")]
-                   UNPACK_UNSIGNED))]
+;; -------------------------------------------------------------------------
+;; ---- [INT,FP] Special-purpose binary permutes
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - TRN1
+;; - TRN2
+;; - UZP1
+;; - UZP2
+;; - ZIP1
+;; - ZIP2
+;; -------------------------------------------------------------------------
+
+;; Permutes that take half the elements from one vector and half the
+;; elements from the other.
+(define_insn "aarch64_sve_<perm_insn><perm_hilo><mode>"
+  [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
+       (unspec:SVE_ALL [(match_operand:SVE_ALL 1 "register_operand" "w")
+                        (match_operand:SVE_ALL 2 "register_operand" "w")]
+                       PERMUTE))]
    "TARGET_SVE"
+  "<perm_insn><perm_hilo>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
+)
+
+;; Concatenate two vectors and extract a subvector.  Note that the
+;; immediate (third) operand is the lane index not the byte index.
+(define_insn "*aarch64_sve_ext<mode>"
+  [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
+       (unspec:SVE_ALL [(match_operand:SVE_ALL 1 "register_operand" "0")
+                        (match_operand:SVE_ALL 2 "register_operand" "w")
+                        (match_operand:SI 3 "const_int_operand")]
+                       UNSPEC_EXT))]
+  "TARGET_SVE
+   && IN_RANGE (INTVAL (operands[3]) * GET_MODE_SIZE (<VEL>mode), 0, 255)"
    {
-    /* Use ZIP to do the unpack, since we don't care about the upper halves
-       and since it has the nice property of not needing any subregs.
-       If using UUNPK* turns out to be preferable, we could model it as
-       a ZIP whose first operand is zero.  */
-    rtx temp = gen_reg_rtx (VNx4SImode);
-    emit_insn ((<hi_lanes_optab>
-               ? gen_aarch64_sve_zip2vnx4si
-               : gen_aarch64_sve_zip1vnx4si)
-              (temp, operands[1], operands[1]));
-    rtx ptrue = aarch64_ptrue_reg (VNx2BImode);
-    emit_insn (gen_aarch64_sve_<FLOATUORS:optab>vnx4sivnx2df2 (operands[0],
-                                                              ptrue, temp));
-    DONE;
+    operands[3] = GEN_INT (INTVAL (operands[3]) * GET_MODE_SIZE (<VEL>mode));
+    return "ext\\t%0.b, %0.b, %2.b, #%3";
    }
  )
  
-;; Predicate pack.  Use UZP1 on the narrower type, which discards
-;; the high part of each wide element.
-(define_insn "vec_pack_trunc_<Vwide>"
-  [(set (match_operand:PRED_BHS 0 "register_operand" "=Upa")
-       (unspec:PRED_BHS
-         [(match_operand:<VWIDE> 1 "register_operand" "Upa")
-          (match_operand:<VWIDE> 2 "register_operand" "Upa")]
-         UNSPEC_PACK))]
+;; -------------------------------------------------------------------------
+;; ---- [PRED] Special-purpose binary permutes
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - TRN1
+;; - TRN2
+;; - UZP1
+;; - UZP2
+;; - ZIP1
+;; - ZIP2
+;; -------------------------------------------------------------------------
+
+;; Permutes that take half the elements from one vector and half the
+;; elements from the other.
+(define_insn "*aarch64_sve_<perm_insn><perm_hilo><mode>"
+  [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
+       (unspec:PRED_ALL [(match_operand:PRED_ALL 1 "register_operand" "Upa")
+                         (match_operand:PRED_ALL 2 "register_operand" "Upa")]
+                        PERMUTE))]
    "TARGET_SVE"
-  "uzp1\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
+  "<perm_insn><perm_hilo>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
  )
  
+;; =========================================================================
+;; == Conversions
+;; =========================================================================
+
+;; -------------------------------------------------------------------------
+;; ---- [INT<-INT] Packs
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - UZP1
+;; -------------------------------------------------------------------------
+
  ;; Integer pack.  Use UZP1 on the narrower type, which discards
  ;; the high part of each wide element.
  (define_insn "vec_pack_trunc_<Vwide>"
@@ -2758,31 +3669,105 @@
    "uzp1\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
  )
  
-;; Convert two vectors of DF to SF, or two vectors of SF to HF, and pack
-;; the results into a single vector.
-(define_expand "vec_pack_trunc_<Vwide>"
-  [(set (match_dup 4)
-       (unspec:SVE_HSF
-         [(match_dup 3)
-          (unspec:SVE_HSF [(match_operand:<VWIDE> 1 "register_operand")]
-                          UNSPEC_FLOAT_CONVERT)]
-         UNSPEC_MERGE_PTRUE))
-   (set (match_dup 5)
-       (unspec:SVE_HSF
-         [(match_dup 3)
-          (unspec:SVE_HSF [(match_operand:<VWIDE> 2 "register_operand")]
-                          UNSPEC_FLOAT_CONVERT)]
-         UNSPEC_MERGE_PTRUE))
-   (set (match_operand:SVE_HSF 0 "register_operand")
-       (unspec:SVE_HSF [(match_dup 4) (match_dup 5)] UNSPEC_UZP1))]
+;; -------------------------------------------------------------------------
+;; ---- [INT<-INT] Unpacks
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - SUNPKHI
+;; - SUNPKLO
+;; - UUNPKHI
+;; - UUNPKLO
+;; -------------------------------------------------------------------------
+
+;; Unpack the low or high half of a vector, where "high" refers to
+;; the low-numbered lanes for big-endian and the high-numbered lanes
+;; for little-endian.
+(define_expand "vec_unpack<su>_<perm_hilo>_<SVE_BHSI:mode>"
+  [(match_operand:<VWIDE> 0 "register_operand")
+   (unspec:<VWIDE> [(match_operand:SVE_BHSI 1 "register_operand")] UNPACK)]
    "TARGET_SVE"
    {
-    operands[3] = aarch64_ptrue_reg (<VWIDE_PRED>mode);
-    operands[4] = gen_reg_rtx (<MODE>mode);
-    operands[5] = gen_reg_rtx (<MODE>mode);
+    emit_insn ((<hi_lanes_optab>
+               ? gen_aarch64_sve_<su>unpkhi_<SVE_BHSI:mode>
+               : gen_aarch64_sve_<su>unpklo_<SVE_BHSI:mode>)
+              (operands[0], operands[1]));
+    DONE;
+  }
+)
+
+(define_insn "aarch64_sve_<su>unpk<perm_hilo>_<SVE_BHSI:mode>"
+  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+       (unspec:<VWIDE> [(match_operand:SVE_BHSI 1 "register_operand" "w")]
+                       UNPACK))]
+  "TARGET_SVE"
+  "<su>unpk<perm_hilo>\t%0.<Vewtype>, %1.<Vetype>"
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [INT<-FP] Conversions
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - FCVTZS
+;; - FCVTZU
+;; -------------------------------------------------------------------------
+
+;; Unpredicated conversion of floats to integers of the same size (HF to HI,
+;; SF to SI or DF to DI).
+(define_expand "<fix_trunc_optab><mode><v_int_equiv>2"
+  [(set (match_operand:<V_INT_EQUIV> 0 "register_operand")
+       (unspec:<V_INT_EQUIV>
+         [(match_dup 2)
+          (FIXUORS:<V_INT_EQUIV>
+            (match_operand:SVE_F 1 "register_operand"))]
+         UNSPEC_MERGE_PTRUE))]
+  "TARGET_SVE"
+  {
+    operands[2] = aarch64_ptrue_reg (<VPRED>mode);
    }
  )
  
+;; Conversion of SF to DI, SI or HI, predicated with a PTRUE.
+(define_insn "*<fix_trunc_optab>v16hsf<mode>2"
+  [(set (match_operand:SVE_HSDI 0 "register_operand" "=w")
+       (unspec:SVE_HSDI
+         [(match_operand:<VPRED> 1 "register_operand" "Upl")
+          (FIXUORS:SVE_HSDI
+            (match_operand:VNx8HF 2 "register_operand" "w"))]
+         UNSPEC_MERGE_PTRUE))]
+  "TARGET_SVE"
+  "fcvtz<su>\t%0.<Vetype>, %1/m, %2.h"
+)
+
+;; Conversion of SF to DI or SI, predicated with a PTRUE.
+(define_insn "*<fix_trunc_optab>vnx4sf<mode>2"
+  [(set (match_operand:SVE_SDI 0 "register_operand" "=w")
+       (unspec:SVE_SDI
+         [(match_operand:<VPRED> 1 "register_operand" "Upl")
+          (FIXUORS:SVE_SDI
+            (match_operand:VNx4SF 2 "register_operand" "w"))]
+         UNSPEC_MERGE_PTRUE))]
+  "TARGET_SVE"
+  "fcvtz<su>\t%0.<Vetype>, %1/m, %2.s"
+)
+
+;; Conversion of DF to DI or SI, predicated with a PTRUE.
+(define_insn "*<fix_trunc_optab>vnx2df<mode>2"
+  [(set (match_operand:SVE_SDI 0 "register_operand" "=w")
+       (unspec:SVE_SDI
+         [(match_operand:VNx2BI 1 "register_operand" "Upl")
+          (FIXUORS:SVE_SDI
+            (match_operand:VNx2DF 2 "register_operand" "w"))]
+         UNSPEC_MERGE_PTRUE))]
+  "TARGET_SVE"
+  "fcvtz<su>\t%0.<Vetype>, %1/m, %2.d"
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [INT<-FP] Packs
+;; -------------------------------------------------------------------------
+;; The patterns in this section are synthetic.
+;; -------------------------------------------------------------------------
+
  ;; Convert two vectors of DF to SI and pack the results into a single vector.
  (define_expand "vec_pack_<su>fix_trunc_vnx2df"
    [(set (match_dup 4)
@@ -2805,327 +3790,250 @@
    }
  )
  
-;; Predicated floating-point operations with select.
-(define_expand "cond_<optab><mode>"
-  [(set (match_operand:SVE_F 0 "register_operand")
-       (unspec:SVE_F
-         [(match_operand:<VPRED> 1 "register_operand")
-          (unspec:SVE_F
-            [(match_operand:SVE_F 2 "register_operand")
-             (match_operand:SVE_F 3 "register_operand")]
-            SVE_COND_FP_BINARY)
-          (match_operand:SVE_F 4 "aarch64_simd_reg_or_zero")]
-         UNSPEC_SEL))]
-  "TARGET_SVE"
-)
+;; -------------------------------------------------------------------------
+;; ---- [INT<-FP] Unpacks
+;; -------------------------------------------------------------------------
+;; No patterns here yet!
+;; -------------------------------------------------------------------------
  
-;; Predicated floating-point operations with select matching first operand.
-(define_insn "*cond_<optab><mode>_2"
-  [(set (match_operand:SVE_F 0 "register_operand" "=w, ?&w")
-       (unspec:SVE_F
-         [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
-          (unspec:SVE_F
-            [(match_operand:SVE_F 2 "register_operand" "0, w")
-             (match_operand:SVE_F 3 "register_operand" "w, w")]
-            SVE_COND_FP_BINARY)
-          (match_dup 2)]
-         UNSPEC_SEL))]
-  "TARGET_SVE"
-  "@
-   <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
-   movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
-  [(set_attr "movprfx" "*,yes")]
-)
+;; -------------------------------------------------------------------------
+;; ---- [FP<-INT] Conversions
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - SCVTF
+;; - UCVTF
+;; -------------------------------------------------------------------------
  
-;; Predicated floating-point operations with select matching second operand.
-(define_insn "*cond_<optab><mode>_3"
-  [(set (match_operand:SVE_F 0 "register_operand" "=w, ?&w")
+;; Unpredicated conversion of integers to floats of the same size
+;; (HI to HF, SI to SF or DI to DF).
+(define_expand "<optab><v_int_equiv><mode>2"
+  [(set (match_operand:SVE_F 0 "register_operand")
         (unspec:SVE_F
-         [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
-          (unspec:SVE_F
-            [(match_operand:SVE_F 2 "register_operand" "w, w")
-             (match_operand:SVE_F 3 "register_operand" "0, w")]
-            SVE_COND_FP_BINARY)
-          (match_dup 3)]
-         UNSPEC_SEL))]
+         [(match_dup 2)
+          (FLOATUORS:SVE_F
+            (match_operand:<V_INT_EQUIV> 1 "register_operand"))]
+         UNSPEC_MERGE_PTRUE))]
    "TARGET_SVE"
-  "@
-   <sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
-   movprfx\t%0, %3\;<sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
-  [(set_attr "movprfx" "*,yes")]
-)
-
-;; Predicated floating-point binary operations in which the values of
-;; inactive lanes are distinct from the other inputs.
-(define_insn_and_rewrite "*cond_<optab><mode>_any"
-  [(set (match_operand:SVE_F 0 "register_operand" "=&w, &w, &w, &w, ?&w")
-       (unspec:SVE_F
-         [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl")
-          (unspec:SVE_F
-            [(match_operand:SVE_F 2 "register_operand" "0, w, w, w, w")
-             (match_operand:SVE_F 3 "register_operand" "w, 0, w, w, w")]
-            SVE_COND_FP_BINARY)
-          (match_operand:SVE_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")]
-         UNSPEC_SEL))]
-  "TARGET_SVE
-   && !rtx_equal_p (operands[2], operands[4])
-   && !rtx_equal_p (operands[3], operands[4])"
-  "@
-   movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
-   movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
-   movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
-   movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
-   #"
-  "&& reload_completed
-   && register_operand (operands[4], <MODE>mode)
-   && !rtx_equal_p (operands[0], operands[4])"
    {
-    emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
-                                            operands[4], operands[1]));
-    operands[4] = operands[2] = operands[0];
+    operands[2] = aarch64_ptrue_reg (<VPRED>mode);
    }
-  [(set_attr "movprfx" "yes")]
-)
-
-;; Predicated floating-point ternary operations with select.
-(define_expand "cond_<optab><mode>"
-  [(set (match_operand:SVE_F 0 "register_operand")
-       (unspec:SVE_F
-         [(match_operand:<VPRED> 1 "register_operand")
-          (unspec:SVE_F
-            [(match_operand:SVE_F 2 "register_operand")
-             (match_operand:SVE_F 3 "register_operand")
-             (match_operand:SVE_F 4 "register_operand")]
-            SVE_COND_FP_TERNARY)
-          (match_operand:SVE_F 5 "aarch64_simd_reg_or_zero")]
-         UNSPEC_SEL))]
-  "TARGET_SVE"
-{
-  /* Swap the multiplication operands if the fallback value is the
-     second of the two.  */
-  if (rtx_equal_p (operands[3], operands[5]))
-    std::swap (operands[2], operands[3]);
-})
-
-;; Predicated floating-point ternary operations using the FMAD-like form.
-(define_insn "*cond_<optab><mode>_2"
-  [(set (match_operand:SVE_F 0 "register_operand" "=w, ?&w")
-       (unspec:SVE_F
-         [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
-          (unspec:SVE_F
-            [(match_operand:SVE_F 2 "register_operand" "0, w")
-             (match_operand:SVE_F 3 "register_operand" "w, w")
-             (match_operand:SVE_F 4 "register_operand" "w, w")]
-            SVE_COND_FP_TERNARY)
-          (match_dup 2)]
-         UNSPEC_SEL))]
-  "TARGET_SVE"
-  "@
-   <sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
-   movprfx\t%0, %2\;<sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>"
-  [(set_attr "movprfx" "*,yes")]
-)
-
-;; Predicated floating-point ternary operations using the FMLA-like form.
-(define_insn "*cond_<optab><mode>_4"
-  [(set (match_operand:SVE_F 0 "register_operand" "=w, ?&w")
-       (unspec:SVE_F
-         [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
-          (unspec:SVE_F
-            [(match_operand:SVE_F 2 "register_operand" "w, w")
-             (match_operand:SVE_F 3 "register_operand" "w, w")
-             (match_operand:SVE_F 4 "register_operand" "0, w")]
-            SVE_COND_FP_TERNARY)
-          (match_dup 4)]
-         UNSPEC_SEL))]
-  "TARGET_SVE"
-  "@
-   <sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
-   movprfx\t%0, %4\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
-  [(set_attr "movprfx" "*,yes")]
  )
  
-;; Predicated floating-point ternary operations in which the value for
-;; inactive lanes is distinct from the other inputs.
-(define_insn_and_rewrite "*cond_<optab><mode>_any"
-  [(set (match_operand:SVE_F 0 "register_operand" "=&w, &w, ?&w")
-       (unspec:SVE_F
-         [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
-          (unspec:SVE_F
-            [(match_operand:SVE_F 2 "register_operand" "w, w, w")
-             (match_operand:SVE_F 3 "register_operand" "w, w, w")
-             (match_operand:SVE_F 4 "register_operand" "w, w, w")]
-            SVE_COND_FP_TERNARY)
-          (match_operand:SVE_F 5 "aarch64_simd_reg_or_zero" "Dz, 0, w")]
-         UNSPEC_SEL))]
-  "TARGET_SVE
-   && !rtx_equal_p (operands[2], operands[5])
-   && !rtx_equal_p (operands[3], operands[5])
-   && !rtx_equal_p (operands[4], operands[5])"
-  "@
-   movprfx\t%0.<Vetype>, %1/z, %4.<Vetype>\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
-   movprfx\t%0.<Vetype>, %1/m, %4.<Vetype>\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
-   #"
-  "&& reload_completed
-   && !CONSTANT_P (operands[5])
-   && !rtx_equal_p (operands[0], operands[5])"
-  {
-    emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[4],
-                                            operands[5], operands[1]));
-    operands[5] = operands[4] = operands[0];
-  }
-  [(set_attr "movprfx" "yes")]
+;; Conversion of DI, SI or HI to the same number of HFs, predicated
+;; with a PTRUE.
+(define_insn "*<optab><mode>vnx8hf2"
+  [(set (match_operand:VNx8HF 0 "register_operand" "=w")
+       (unspec:VNx8HF
+         [(match_operand:<VPRED> 1 "register_operand" "Upl")
+          (FLOATUORS:VNx8HF
+            (match_operand:SVE_HSDI 2 "register_operand" "w"))]
+         UNSPEC_MERGE_PTRUE))]
+  "TARGET_SVE"
+  "<su_optab>cvtf\t%0.h, %1/m, %2.<Vetype>"
  )
  
-;; Shift an SVE vector left and insert a scalar into element 0.
-(define_insn "vec_shl_insert_<mode>"
-  [(set (match_operand:SVE_ALL 0 "register_operand" "=w, w")
-       (unspec:SVE_ALL
-         [(match_operand:SVE_ALL 1 "register_operand" "0, 0")
-          (match_operand:<VEL> 2 "register_operand" "rZ, w")]
-         UNSPEC_INSR))]
+;; Conversion of DI or SI to the same number of SFs, predicated with a PTRUE.
+(define_insn "*<optab><mode>vnx4sf2"
+  [(set (match_operand:VNx4SF 0 "register_operand" "=w")
+       (unspec:VNx4SF
+         [(match_operand:<VPRED> 1 "register_operand" "Upl")
+          (FLOATUORS:VNx4SF
+            (match_operand:SVE_SDI 2 "register_operand" "w"))]
+         UNSPEC_MERGE_PTRUE))]
    "TARGET_SVE"
-  "@
-   insr\t%0.<Vetype>, %<vwcore>2
-   insr\t%0.<Vetype>, %<Vetype>2"
+  "<su_optab>cvtf\t%0.s, %1/m, %2.<Vetype>"
  )
  
-(define_expand "copysign<mode>3"
-  [(match_operand:SVE_F 0 "register_operand")
-   (match_operand:SVE_F 1 "register_operand")
-   (match_operand:SVE_F 2 "register_operand")]
+;; Conversion of DI or SI to DF, predicated with a PTRUE.
+(define_insn "aarch64_sve_<optab><mode>vnx2df2"
+  [(set (match_operand:VNx2DF 0 "register_operand" "=w")
+       (unspec:VNx2DF
+         [(match_operand:VNx2BI 1 "register_operand" "Upl")
+          (FLOATUORS:VNx2DF
+            (match_operand:SVE_SDI 2 "register_operand" "w"))]
+         UNSPEC_MERGE_PTRUE))]
    "TARGET_SVE"
-  {
-    rtx sign = gen_reg_rtx (<V_INT_EQUIV>mode);
-    rtx mant = gen_reg_rtx (<V_INT_EQUIV>mode);
-    rtx int_res = gen_reg_rtx (<V_INT_EQUIV>mode);
-    int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
+  "<su_optab>cvtf\t%0.d, %1/m, %2.<Vetype>"
+)
  
-    rtx arg1 = lowpart_subreg (<V_INT_EQUIV>mode, operands[1], <MODE>mode);
-    rtx arg2 = lowpart_subreg (<V_INT_EQUIV>mode, operands[2], <MODE>mode);
+;; -------------------------------------------------------------------------
+;; ---- [FP<-INT] Packs
+;; -------------------------------------------------------------------------
+;; No patterns here yet!
+;; -------------------------------------------------------------------------
  
-    emit_insn (gen_and<v_int_equiv>3
-              (sign, arg2,
-               aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
-                                                  HOST_WIDE_INT_M1U
-                                                  << bits)));
-    emit_insn (gen_and<v_int_equiv>3
-              (mant, arg1,
-               aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
-                                                  ~(HOST_WIDE_INT_M1U
-                                                    << bits))));
-    emit_insn (gen_ior<v_int_equiv>3 (int_res, sign, mant));
-    emit_move_insn (operands[0], gen_lowpart (<MODE>mode, int_res));
+;; -------------------------------------------------------------------------
+;; ---- [FP<-INT] Unpacks
+;; -------------------------------------------------------------------------
+;; The patterns in this section are synthetic.
+;; -------------------------------------------------------------------------
+
+;; Unpack one half of a VNx4SI to VNx2DF.  First unpack from VNx4SI
+;; to VNx2DI, reinterpret the VNx2DI as a VNx4SI, then convert the
+;; unpacked VNx4SI to VNx2DF.
+(define_expand "vec_unpack<su_optab>_float_<perm_hilo>_vnx4si"
+  [(match_operand:VNx2DF 0 "register_operand")
+   (FLOATUORS:VNx2DF
+     (unspec:VNx2DI [(match_operand:VNx4SI 1 "register_operand")]
+                   UNPACK_UNSIGNED))]
+  "TARGET_SVE"
+  {
+    /* Use ZIP to do the unpack, since we don't care about the upper halves
+       and since it has the nice property of not needing any subregs.
+       If using UUNPK* turns out to be preferable, we could model it as
+       a ZIP whose first operand is zero.  */
+    rtx temp = gen_reg_rtx (VNx4SImode);
+    emit_insn ((<hi_lanes_optab>
+               ? gen_aarch64_sve_zip2vnx4si
+               : gen_aarch64_sve_zip1vnx4si)
+              (temp, operands[1], operands[1]));
+    rtx ptrue = aarch64_ptrue_reg (VNx2BImode);
+    emit_insn (gen_aarch64_sve_<FLOATUORS:optab>vnx4sivnx2df2 (operands[0],
+                                                              ptrue, temp));
      DONE;
    }
  )
  
-(define_expand "xorsign<mode>3"
-  [(match_operand:SVE_F 0 "register_operand")
-   (match_operand:SVE_F 1 "register_operand")
-   (match_operand:SVE_F 2 "register_operand")]
+;; -------------------------------------------------------------------------
+;; ---- [FP<-FP] Packs
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - FCVT
+;; -------------------------------------------------------------------------
+
+;; Convert two vectors of DF to SF, or two vectors of SF to HF, and pack
+;; the results into a single vector.
+(define_expand "vec_pack_trunc_<Vwide>"
+  [(set (match_dup 4)
+       (unspec:SVE_HSF
+         [(match_dup 3)
+          (unspec:SVE_HSF [(match_operand:<VWIDE> 1 "register_operand")]
+                          UNSPEC_FLOAT_CONVERT)]
+         UNSPEC_MERGE_PTRUE))
+   (set (match_dup 5)
+       (unspec:SVE_HSF
+         [(match_dup 3)
+          (unspec:SVE_HSF [(match_operand:<VWIDE> 2 "register_operand")]
+                          UNSPEC_FLOAT_CONVERT)]
+         UNSPEC_MERGE_PTRUE))
+   (set (match_operand:SVE_HSF 0 "register_operand")
+       (unspec:SVE_HSF [(match_dup 4) (match_dup 5)] UNSPEC_UZP1))]
    "TARGET_SVE"
    {
-    rtx sign = gen_reg_rtx (<V_INT_EQUIV>mode);
-    rtx int_res = gen_reg_rtx (<V_INT_EQUIV>mode);
-    int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
-
-    rtx arg1 = lowpart_subreg (<V_INT_EQUIV>mode, operands[1], <MODE>mode);
-    rtx arg2 = lowpart_subreg (<V_INT_EQUIV>mode, operands[2], <MODE>mode);
-
-    emit_insn (gen_and<v_int_equiv>3
-              (sign, arg2,
-               aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
-                                                  HOST_WIDE_INT_M1U
-                                                  << bits)));
-    emit_insn (gen_xor<v_int_equiv>3 (int_res, arg1, sign));
-    emit_move_insn (operands[0], gen_lowpart (<MODE>mode, int_res));
-    DONE;
+    operands[3] = aarch64_ptrue_reg (<VWIDE_PRED>mode);
+    operands[4] = gen_reg_rtx (<MODE>mode);
+    operands[5] = gen_reg_rtx (<MODE>mode);
    }
  )
  
-;; Unpredicated DOT product.
-(define_insn "<sur>dot_prod<vsi2qi>"
-  [(set (match_operand:SVE_SDI 0 "register_operand" "=w, ?&w")
-       (plus:SVE_SDI
-         (unspec:SVE_SDI
-           [(match_operand:<VSI2QI> 1 "register_operand" "w, w")
-            (match_operand:<VSI2QI> 2 "register_operand" "w, w")]
-           DOTPROD)
-         (match_operand:SVE_SDI 3 "register_operand" "0, w")))]
+;; Conversion of DFs to the same number of SFs, or SFs to the same number
+;; of HFs.
+(define_insn "*trunc<Vwide><mode>2"
+  [(set (match_operand:SVE_HSF 0 "register_operand" "=w")
+       (unspec:SVE_HSF
+         [(match_operand:<VWIDE_PRED> 1 "register_operand" "Upl")
+          (unspec:SVE_HSF
+            [(match_operand:<VWIDE> 2 "register_operand" "w")]
+            UNSPEC_FLOAT_CONVERT)]
+         UNSPEC_MERGE_PTRUE))]
    "TARGET_SVE"
-  "@
-   <sur>dot\\t%0.<Vetype>, %1.<Vetype_fourth>, %2.<Vetype_fourth>
-   movprfx\t%0, %3\;<sur>dot\\t%0.<Vetype>, %1.<Vetype_fourth>, %2.<Vetype_fourth>"
-  [(set_attr "movprfx" "*,yes")]
+  "fcvt\t%0.<Vetype>, %1/m, %2.<Vewtype>"
  )
  
-;; Unpredicated integer absolute difference.
-(define_expand "<su>abd<mode>_3"
-  [(use (match_operand:SVE_I 0 "register_operand"))
-   (USMAX:SVE_I (match_operand:SVE_I 1 "register_operand")
-               (match_operand:SVE_I 2 "register_operand"))]
+;; -------------------------------------------------------------------------
+;; ---- [FP<-FP] Unpacks
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - FCVT
+;; -------------------------------------------------------------------------
+
+;; Unpack one half of a VNx4SF to VNx2DF, or one half of a VNx8HF to VNx4SF.
+;; First unpack the source without conversion, then float-convert the
+;; unpacked source.
+(define_expand "vec_unpacks_<perm_hilo>_<mode>"
+  [(match_operand:<VWIDE> 0 "register_operand")
+   (unspec:SVE_HSF [(match_operand:SVE_HSF 1 "register_operand")]
+                  UNPACK_UNSIGNED)]
    "TARGET_SVE"
    {
-    rtx pred = aarch64_ptrue_reg (<VPRED>mode);
-    emit_insn (gen_aarch64_<su>abd<mode>_3 (operands[0], pred, operands[1],
-                                           operands[2]));
+    /* Use ZIP to do the unpack, since we don't care about the upper halves
+       and since it has the nice property of not needing any subregs.
+       If using UUNPK* turns out to be preferable, we could model it as
+       a ZIP whose first operand is zero.  */
+    rtx temp = gen_reg_rtx (<MODE>mode);
+    emit_insn ((<hi_lanes_optab>
+               ? gen_aarch64_sve_zip2<mode>
+               : gen_aarch64_sve_zip1<mode>)
+               (temp, operands[1], operands[1]));
+    rtx ptrue = aarch64_ptrue_reg (<VWIDE_PRED>mode);
+    emit_insn (gen_aarch64_sve_extend<mode><Vwide>2 (operands[0],
+                                                    ptrue, temp));
      DONE;
    }
  )
  
-;; Predicated integer absolute difference.
-(define_insn "aarch64_<su>abd<mode>_3"
-  [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
-       (unspec:SVE_I
-         [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
-          (minus:SVE_I
-            (USMAX:SVE_I
-              (match_operand:SVE_I 2 "register_operand" "0, w")
-              (match_operand:SVE_I 3 "register_operand" "w, w"))
-            (<max_opp>:SVE_I
-              (match_dup 2)
-              (match_dup 3)))]
+;; Conversion of SFs to the same number of DFs, or HFs to the same number
+;; of SFs.
+(define_insn "aarch64_sve_extend<mode><Vwide>2"
+  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+       (unspec:<VWIDE>
+         [(match_operand:<VWIDE_PRED> 1 "register_operand" "Upl")
+          (unspec:<VWIDE>
+            [(match_operand:SVE_HSF 2 "register_operand" "w")]
+            UNSPEC_FLOAT_CONVERT)]
           UNSPEC_MERGE_PTRUE))]
    "TARGET_SVE"
-  "@
-   <su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
-   movprfx\t%0, %2\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
-  [(set_attr "movprfx" "*,yes")]
+  "fcvt\t%0.<Vewtype>, %1/m, %2.<Vetype>"
  )
  
-;; Emit a sequence to produce a sum-of-absolute-differences of the inputs in
-;; operands 1 and 2.  The sequence also has to perform a widening reduction of
-;; the difference into a vector and accumulate that into operand 3 before
-;; copying that into the result operand 0.
-;; Perform that with a sequence of:
-;; MOV         ones.b, #1
-;; [SU]ABD     diff.b, p0/m, op1.b, op2.b
-;; MOVPRFX     op0, op3        // If necessary
-;; UDOT                op0.s, diff.b, ones.b
+;; -------------------------------------------------------------------------
+;; ---- [PRED<-PRED] Packs
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - UZP1
+;; -------------------------------------------------------------------------
  
-(define_expand "<sur>sad<vsi2qi>"
-  [(use (match_operand:SVE_SDI 0 "register_operand"))
-   (unspec:<VSI2QI> [(use (match_operand:<VSI2QI> 1 "register_operand"))
-                   (use (match_operand:<VSI2QI> 2 "register_operand"))] ABAL)
-   (use (match_operand:SVE_SDI 3 "register_operand"))]
+;; Predicate pack.  Use UZP1 on the narrower type, which discards
+;; the high part of each wide element.
+(define_insn "vec_pack_trunc_<Vwide>"
+  [(set (match_operand:PRED_BHS 0 "register_operand" "=Upa")
+       (unspec:PRED_BHS
+         [(match_operand:<VWIDE> 1 "register_operand" "Upa")
+          (match_operand:<VWIDE> 2 "register_operand" "Upa")]
+         UNSPEC_PACK))]
    "TARGET_SVE"
-  {
-    rtx ones = force_reg (<VSI2QI>mode, CONST1_RTX (<VSI2QI>mode));
-    rtx diff = gen_reg_rtx (<VSI2QI>mode);
-    emit_insn (gen_<sur>abd<vsi2qi>_3 (diff, operands[1], operands[2]));
-    emit_insn (gen_udot_prod<vsi2qi> (operands[0], diff, ones, operands[3]));
-    DONE;
-  }
+  "uzp1\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
  )
  
-;; Standard pattern name vec_init<mode><Vel>.
-(define_expand "vec_init<mode><Vel>"
-  [(match_operand:SVE_ALL 0 "register_operand")
-    (match_operand 1 "" "")]
+;; -------------------------------------------------------------------------
+;; ---- [PRED<-PRED] Unpacks
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - PUNPKHI
+;; - PUNPKLO
+;; -------------------------------------------------------------------------
+
+;; Unpack the low or high half of a predicate, where "high" refers to
+;; the low-numbered lanes for big-endian and the high-numbered lanes
+;; for little-endian.
+(define_expand "vec_unpack<su>_<perm_hilo>_<mode>"
+  [(match_operand:<VWIDE> 0 "register_operand")
+   (unspec:<VWIDE> [(match_operand:PRED_BHS 1 "register_operand")]
+                  UNPACK)]
    "TARGET_SVE"
    {
-    aarch64_sve_expand_vector_init (operands[0], operands[1]);
+    emit_insn ((<hi_lanes_optab>
+               ? gen_aarch64_sve_punpkhi_<PRED_BHS:mode>
+               : gen_aarch64_sve_punpklo_<PRED_BHS:mode>)
+              (operands[0], operands[1]));
      DONE;
    }
  )
+
+(define_insn "aarch64_sve_punpk<perm_hilo>_<mode>"
+  [(set (match_operand:<VWIDE> 0 "register_operand" "=Upa")
+       (unspec:<VWIDE> [(match_operand:PRED_BHS 1 "register_operand" "Upa")]
+                       UNPACK_UNSIGNED))]
+  "TARGET_SVE"
+  "punpk<perm_hilo>\t%0.h, %1.b"
+)
diff --git a/gcc/config/aarch64/check-sve-md.awk b/gcc/config/aarch64/check-sve-md.awk

new file mode 100644 (file)

index 0000000..3da78f3
--- /dev/null
+++ b/gcc/config/aarch64/check-sve-md.awk
@@ -0,0 +1,66 @@
+#!/usr/bin/awk -f
+# Copyright (C) 2019 Free Software Foundation, Inc.
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the
+# Free Software Foundation; either version 3, or (at your option) any
+# later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# This awk script checks that aarch64-sve.md (passed either on the
+# command line or via stdin) has an up-to-date contents section.
+
+BEGIN {
+  seen1 = 0
+  seen2 = 0
+  errors = 0
+}
+
+# The headings in the comments use a two-level hierarchy: ";; == ..."
+# for major sections and ";; ---- ..." for minor sections.  Each section
+# heading must be unique.
+#
+# The contents section should list all the section headings, using the
+# same text and in the same order.  We should therefore see exactly two
+# copies of the section list.
+/^;; == / || /^;; ---- / {
+  if ($0 in seen || seen2 > 0)
+    {
+      if (seen2 >= seen1)
+       {
+         printf "error: line not in contents: %s\n", $0 > "/dev/stderr"
+         errors += 1
+         exit(1)
+       }
+      if ($0 != order[seen2])
+       {
+         printf "error: mismatched contents\n     saw: %s\nexpected: %s\n", \
+           $0, order[seen2] > "/dev/stderr"
+         errors += 1
+         exit(1)
+       }
+      seen2 += 1
+    }
+  else
+    {
+      seen[$0] = 1
+      order[seen1] = $0
+      seen1 += 1
+    }
+}
+
+END {
+  if (seen2 < seen1 && errors == 0)
+    {
+      printf "error: line only in contents: %s\n", order[seen2] > "/dev/stderr"
+      exit(1)
+    }
+}
diff --git a/gcc/config/aarch64/t-aarch64 b/gcc/config/aarch64/t-aarch64

index ee471f8983f2afb1f1198a60c95d9df662fbad90..391b4a2f7fced9c9b19a8377cd050bee91e452a3 100644 (file)
--- a/gcc/config/aarch64/t-aarch64
+++ b/gcc/config/aarch64/t-aarch64
@@ -103,3 +103,10 @@ aarch64-bti-insert.o: $(srcdir)/config/aarch64/aarch64-bti-insert.c \
  comma=,
  MULTILIB_OPTIONS    = $(subst $(comma),/, $(patsubst %, mabi=%, $(subst $(comma),$(comma)mabi=,$(TM_MULTILIB_CONFIG))))
  MULTILIB_DIRNAMES   = $(subst $(comma), ,$(TM_MULTILIB_CONFIG))
+
+insn-conditions.md: s-check-sve-md
+s-check-sve-md: $(srcdir)/config/aarch64/check-sve-md.awk \
+               $(srcdir)/config/aarch64/aarch64-sve.md
+       $(AWK) -f $(srcdir)/config/aarch64/check-sve-md.awk \
+         $(srcdir)/config/aarch64/aarch64-sve.md
+       $(STAMP) s-check-sve-md
author	Richard Sandiford <richard.sandiford@arm.com>
	Wed, 7 Aug 2019 18:37:21 +0000 (18:37 +0000)
committer	Richard Sandiford <rsandifo@gcc.gnu.org>
	Wed, 7 Aug 2019 18:37:21 +0000 (18:37 +0000)
gcc/ChangeLog		patch \| blob \| history
gcc/config/aarch64/aarch64-sve.md		patch \| blob \| history
gcc/config/aarch64/check-sve-md.awk	[new file with mode: 0644]	patch \| blob
gcc/config/aarch64/t-aarch64		patch \| blob \| history