;; along with GCC; see the file COPYING3. If not see
;; <http://www.gnu.org/licenses/>.
-;; Note on the handling of big-endian SVE
-;; --------------------------------------
+;; The file is organised into the following sections (search for the full
+;; line):
+;;
+;; == General notes
+;; ---- Note on the handling of big-endian SVE
+;;
+;; == Moves
+;; ---- Moves of single vectors
+;; ---- Moves of multiple vectors
+;; ---- Moves of predicates
+;;
+;; == Loads
+;; ---- Normal contiguous loads
+;; ---- Normal gather loads
+;;
+;; == Stores
+;; ---- Normal contiguous stores
+;; ---- Normal scatter stores
+;;
+;; == Vector creation
+;; ---- [INT,FP] Duplicate element
+;; ---- [INT,FP] Initialize from individual elements
+;; ---- [INT] Linear series
+;; ---- [PRED] Duplicate element
+;;
+;; == Vector decomposition
+;; ---- [INT,FP] Extract index
+;; ---- [INT,FP] Extract active element
+;; ---- [PRED] Extract index
+;;
+;; == Unary arithmetic
+;; ---- [INT] General unary arithmetic corresponding to rtx codes
+;; ---- [FP] General unary arithmetic corresponding to rtx codes
+;; ---- [FP] Rounding
+;; ---- [PRED] Inverse
+
+;; == Binary arithmetic
+;; ---- [INT] General binary arithmetic corresponding to rtx codes
+;; ---- [INT] Addition
+;; ---- [INT] Subtraction
+;; ---- [INT] Absolute difference
+;; ---- [INT] Multiplication
+;; ---- [INT] Highpart multiplication
+;; ---- [INT] Division
+;; ---- [INT] Binary logical operations
+;; ---- [INT] Binary logical operations (inverted second input)
+;; ---- [INT] Shifts
+;; ---- [INT] Maximum and minimum
+;; ---- [FP] General binary arithmetic corresponding to rtx codes
+;; ---- [FP] General binary arithmetic corresponding to unspecs
+;; ---- [FP] Addition
+;; ---- [FP] Subtraction
+;; ---- [FP] Absolute difference
+;; ---- [FP] Multiplication
+;; ---- [FP] Division
+;; ---- [FP] Binary logical operations
+;; ---- [FP] Sign copying
+;; ---- [FP] Maximum and minimum
+;; ---- [PRED] Binary logical operations
+;; ---- [PRED] Binary logical operations (inverted second input)
+;; ---- [PRED] Binary logical operations (inverted result)
+;;
+;; == Ternary arithmetic
+;; ---- [INT] MLA and MAD
+;; ---- [INT] MLS and MSB
+;; ---- [INT] Dot product
+;; ---- [INT] Sum of absolute differences
+;; ---- [FP] General ternary arithmetic corresponding to unspecs
+;; ---- [FP] FMLA and FMAD
+;; ---- [FP] FMLS and FMSB
+;; ---- [FP] FNMLA and FNMAD
+;; ---- [FP] FNMLS and FNMSB
+;;
+;; == Comparisons and selects
+;; ---- [INT,FP] Select based on predicates
+;; ---- [INT,FP] Compare and select
+;; ---- [INT] Comparisons
+;; ---- [INT] While tests
+;; ---- [FP] Comparisons
+;; ---- [PRED] Test bits
+;;
+;; == Reductions
+;; ---- [INT,FP] Conditional reductions
+;; ---- [INT] Tree reductions
+;; ---- [FP] Tree reductions
+;; ---- [FP] Left-to-right reductions
+;;
+;; == Permutes
+;; ---- [INT,FP] General permutes
+;; ---- [INT,FP] Special-purpose unary permutes
+;; ---- [INT,FP] Special-purpose binary permutes
+;; ---- [PRED] Special-purpose binary permutes
+;;
+;; == Conversions
+;; ---- [INT<-INT] Packs
+;; ---- [INT<-INT] Unpacks
+;; ---- [INT<-FP] Conversions
+;; ---- [INT<-FP] Packs
+;; ---- [INT<-FP] Unpacks
+;; ---- [FP<-INT] Conversions
+;; ---- [FP<-INT] Packs
+;; ---- [FP<-INT] Unpacks
+;; ---- [FP<-FP] Packs
+;; ---- [FP<-FP] Unpacks
+;; ---- [PRED<-PRED] Packs
+;; ---- [PRED<-PRED] Unpacks
+
+;; =========================================================================
+;; == General notes
+;; =========================================================================
+;;
+;; -------------------------------------------------------------------------
+;; ---- Note on the handling of big-endian SVE
+;; -------------------------------------------------------------------------
;;
;; On big-endian systems, Advanced SIMD mov<mode> patterns act in the
;; same way as movdi or movti would: the first byte of memory goes
;; reserve a predicate register.
-;; SVE data moves.
+;; =========================================================================
+;; == Moves
+;; =========================================================================
+
+;; -------------------------------------------------------------------------
+;; ---- Moves of single vectors
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - MOV (including aliases)
+;; - LD1B (contiguous form)
+;; - LD1D ( " " )
+;; - LD1H ( " " )
+;; - LD1W ( " " )
+;; - LDR
+;; - ST1B (contiguous form)
+;; - ST1D ( " " )
+;; - ST1H ( " " )
+;; - ST1W ( " " )
+;; - STR
+;; -------------------------------------------------------------------------
+
(define_expand "mov<mode>"
[(set (match_operand:SVE_ALL 0 "nonimmediate_operand")
(match_operand:SVE_ALL 1 "general_operand"))]
}
)
-;; A pattern for optimizing SUBREGs that have a reinterpreting effect
-;; on big-endian targets; see aarch64_maybe_expand_sve_subreg_move
-;; for details. We use a special predicate for operand 2 to reduce
-;; the number of patterns.
-(define_insn_and_split "*aarch64_sve_mov<mode>_subreg_be"
- [(set (match_operand:SVE_ALL 0 "aarch64_sve_nonimmediate_operand" "=w")
- (unspec:SVE_ALL
- [(match_operand:VNx16BI 1 "register_operand" "Upl")
- (match_operand 2 "aarch64_any_register_operand" "w")]
- UNSPEC_REV_SUBREG))]
- "TARGET_SVE && BYTES_BIG_ENDIAN"
- "#"
- "&& reload_completed"
- [(const_int 0)]
+(define_expand "movmisalign<mode>"
+ [(set (match_operand:SVE_ALL 0 "nonimmediate_operand")
+ (match_operand:SVE_ALL 1 "general_operand"))]
+ "TARGET_SVE"
{
- aarch64_split_sve_subreg_move (operands[0], operands[1], operands[2]);
+ /* Equivalent to a normal move for our purpooses. */
+ emit_move_insn (operands[0], operands[1]);
DONE;
}
)
}
)
-;; A predicated load or store for which the predicate is known to be
-;; all-true. Note that this pattern is generated directly by
-;; aarch64_emit_sve_pred_move, so changes to this pattern will
-;; need changes there as well.
+;; A predicated move in which the predicate is known to be all-true.
+;; Note that this pattern is generated directly by aarch64_emit_sve_pred_move,
+;; so changes to this pattern will need changes there as well.
(define_insn_and_split "@aarch64_pred_mov<mode>"
[(set (match_operand:SVE_ALL 0 "nonimmediate_operand" "=w, w, m")
(unspec:SVE_ALL
[(set (match_dup 0) (match_dup 2))]
)
-(define_expand "movmisalign<mode>"
- [(set (match_operand:SVE_ALL 0 "nonimmediate_operand")
- (match_operand:SVE_ALL 1 "general_operand"))]
- "TARGET_SVE"
- {
- /* Equivalent to a normal move for our purpooses. */
- emit_move_insn (operands[0], operands[1]);
- DONE;
- }
-)
-
-(define_insn "maskload<mode><vpred>"
- [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
+;; A pattern for optimizing SUBREGs that have a reinterpreting effect
+;; on big-endian targets; see aarch64_maybe_expand_sve_subreg_move
+;; for details. We use a special predicate for operand 2 to reduce
+;; the number of patterns.
+(define_insn_and_split "*aarch64_sve_mov<mode>_subreg_be"
+ [(set (match_operand:SVE_ALL 0 "aarch64_sve_nonimmediate_operand" "=w")
(unspec:SVE_ALL
- [(match_operand:<VPRED> 2 "register_operand" "Upl")
- (match_operand:SVE_ALL 1 "memory_operand" "m")]
- UNSPEC_LD1_SVE))]
- "TARGET_SVE"
- "ld1<Vesize>\t%0.<Vetype>, %2/z, %1"
-)
-
-(define_insn "maskstore<mode><vpred>"
- [(set (match_operand:SVE_ALL 0 "memory_operand" "+m")
- (unspec:SVE_ALL [(match_operand:<VPRED> 2 "register_operand" "Upl")
- (match_operand:SVE_ALL 1 "register_operand" "w")
- (match_dup 0)]
- UNSPEC_ST1_SVE))]
- "TARGET_SVE"
- "st1<Vesize>\t%1.<Vetype>, %2, %0"
-)
-
-;; Unpredicated gather loads.
-(define_expand "gather_load<mode>"
- [(set (match_operand:SVE_SD 0 "register_operand")
- (unspec:SVE_SD
- [(match_dup 5)
- (match_operand:DI 1 "aarch64_reg_or_zero")
- (match_operand:<V_INT_EQUIV> 2 "register_operand")
- (match_operand:DI 3 "const_int_operand")
- (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>")
- (mem:BLK (scratch))]
- UNSPEC_LD1_GATHER))]
- "TARGET_SVE"
- {
- operands[5] = aarch64_ptrue_reg (<VPRED>mode);
- }
-)
-
-;; Predicated gather loads for 32-bit elements. Operand 3 is true for
-;; unsigned extension and false for signed extension.
-(define_insn "mask_gather_load<mode>"
- [(set (match_operand:SVE_S 0 "register_operand" "=w, w, w, w, w")
- (unspec:SVE_S
- [(match_operand:<VPRED> 5 "register_operand" "Upl, Upl, Upl, Upl, Upl")
- (match_operand:DI 1 "aarch64_reg_or_zero" "Z, rk, rk, rk, rk")
- (match_operand:<V_INT_EQUIV> 2 "register_operand" "w, w, w, w, w")
- (match_operand:DI 3 "const_int_operand" "i, Z, Ui1, Z, Ui1")
- (match_operand:DI 4 "aarch64_gather_scale_operand_w" "Ui1, Ui1, Ui1, i, i")
- (mem:BLK (scratch))]
- UNSPEC_LD1_GATHER))]
- "TARGET_SVE"
- "@
- ld1w\t%0.s, %5/z, [%2.s]
- ld1w\t%0.s, %5/z, [%1, %2.s, sxtw]
- ld1w\t%0.s, %5/z, [%1, %2.s, uxtw]
- ld1w\t%0.s, %5/z, [%1, %2.s, sxtw %p4]
- ld1w\t%0.s, %5/z, [%1, %2.s, uxtw %p4]"
-)
-
-;; Predicated gather loads for 64-bit elements. The value of operand 3
-;; doesn't matter in this case.
-(define_insn "mask_gather_load<mode>"
- [(set (match_operand:SVE_D 0 "register_operand" "=w, w, w")
- (unspec:SVE_D
- [(match_operand:<VPRED> 5 "register_operand" "Upl, Upl, Upl")
- (match_operand:DI 1 "aarch64_reg_or_zero" "Z, rk, rk")
- (match_operand:<V_INT_EQUIV> 2 "register_operand" "w, w, w")
- (match_operand:DI 3 "const_int_operand")
- (match_operand:DI 4 "aarch64_gather_scale_operand_d" "Ui1, Ui1, i")
- (mem:BLK (scratch))]
- UNSPEC_LD1_GATHER))]
- "TARGET_SVE"
- "@
- ld1d\t%0.d, %5/z, [%2.d]
- ld1d\t%0.d, %5/z, [%1, %2.d]
- ld1d\t%0.d, %5/z, [%1, %2.d, lsl %p4]"
-)
-
-;; Unpredicated scatter store.
-(define_expand "scatter_store<mode>"
- [(set (mem:BLK (scratch))
- (unspec:BLK
- [(match_dup 5)
- (match_operand:DI 0 "aarch64_reg_or_zero")
- (match_operand:<V_INT_EQUIV> 1 "register_operand")
- (match_operand:DI 2 "const_int_operand")
- (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>")
- (match_operand:SVE_SD 4 "register_operand")]
- UNSPEC_ST1_SCATTER))]
- "TARGET_SVE"
+ [(match_operand:VNx16BI 1 "register_operand" "Upl")
+ (match_operand 2 "aarch64_any_register_operand" "w")]
+ UNSPEC_REV_SUBREG))]
+ "TARGET_SVE && BYTES_BIG_ENDIAN"
+ "#"
+ "&& reload_completed"
+ [(const_int 0)]
{
- operands[5] = aarch64_ptrue_reg (<VPRED>mode);
+ aarch64_split_sve_subreg_move (operands[0], operands[1], operands[2]);
+ DONE;
}
)
-;; Predicated scatter stores for 32-bit elements. Operand 2 is true for
-;; unsigned extension and false for signed extension.
-(define_insn "mask_scatter_store<mode>"
- [(set (mem:BLK (scratch))
- (unspec:BLK
- [(match_operand:<VPRED> 5 "register_operand" "Upl, Upl, Upl, Upl, Upl")
- (match_operand:DI 0 "aarch64_reg_or_zero" "Z, rk, rk, rk, rk")
- (match_operand:<V_INT_EQUIV> 1 "register_operand" "w, w, w, w, w")
- (match_operand:DI 2 "const_int_operand" "i, Z, Ui1, Z, Ui1")
- (match_operand:DI 3 "aarch64_gather_scale_operand_w" "Ui1, Ui1, Ui1, i, i")
- (match_operand:SVE_S 4 "register_operand" "w, w, w, w, w")]
- UNSPEC_ST1_SCATTER))]
- "TARGET_SVE"
- "@
- st1w\t%4.s, %5, [%1.s]
- st1w\t%4.s, %5, [%0, %1.s, sxtw]
- st1w\t%4.s, %5, [%0, %1.s, uxtw]
- st1w\t%4.s, %5, [%0, %1.s, sxtw %p3]
- st1w\t%4.s, %5, [%0, %1.s, uxtw %p3]"
-)
-
-;; Predicated scatter stores for 64-bit elements. The value of operand 2
-;; doesn't matter in this case.
-(define_insn "mask_scatter_store<mode>"
- [(set (mem:BLK (scratch))
- (unspec:BLK
- [(match_operand:<VPRED> 5 "register_operand" "Upl, Upl, Upl")
- (match_operand:DI 0 "aarch64_reg_or_zero" "Z, rk, rk")
- (match_operand:<V_INT_EQUIV> 1 "register_operand" "w, w, w")
- (match_operand:DI 2 "const_int_operand")
- (match_operand:DI 3 "aarch64_gather_scale_operand_d" "Ui1, Ui1, i")
- (match_operand:SVE_D 4 "register_operand" "w, w, w")]
- UNSPEC_ST1_SCATTER))]
- "TARGET_SVE"
- "@
- st1d\t%4.d, %5, [%1.d]
- st1d\t%4.d, %5, [%0, %1.d]
- st1d\t%4.d, %5, [%0, %1.d, lsl %p3]"
-)
+;; -------------------------------------------------------------------------
+;; ---- Moves of multiple vectors
+;; -------------------------------------------------------------------------
+;; All patterns in this section are synthetic and split to real
+;; instructions after reload.
+;; -------------------------------------------------------------------------
-;; SVE structure moves.
(define_expand "mov<mode>"
[(set (match_operand:SVE_STRUCT 0 "nonimmediate_operand")
(match_operand:SVE_STRUCT 1 "general_operand"))]
;; Unpredicated structure moves (big-endian). Memory accesses require
;; secondary reloads.
-(define_insn "*aarch64_sve_mov<mode>_le"
+(define_insn "*aarch64_sve_mov<mode>_be"
[(set (match_operand:SVE_STRUCT 0 "register_operand" "=w, w")
(match_operand:SVE_STRUCT 1 "aarch64_nonmemory_operand" "w, Dn"))]
"TARGET_SVE && BYTES_BIG_ENDIAN"
[(set_attr "length" "<insn_length>")]
)
+;; -------------------------------------------------------------------------
+;; ---- Moves of predicates
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - MOV
+;; - LDR
+;; - PFALSE
+;; - PTRUE
+;; - STR
+;; -------------------------------------------------------------------------
+
(define_expand "mov<mode>"
[(set (match_operand:PRED_ALL 0 "nonimmediate_operand")
(match_operand:PRED_ALL 1 "general_operand"))]
* return aarch64_output_ptrue (<MODE>mode, '<Vetype>');"
)
-;; Handle extractions from a predicate by converting to an integer vector
-;; and extracting from there.
-(define_expand "vec_extract<vpred><Vel>"
- [(match_operand:<VEL> 0 "register_operand")
- (match_operand:<VPRED> 1 "register_operand")
- (match_operand:SI 2 "nonmemory_operand")
- ;; Dummy operand to which we can attach the iterator.
- (reg:SVE_I V0_REGNUM)]
+;; =========================================================================
+;; == Loads
+;; =========================================================================
+
+;; -------------------------------------------------------------------------
+;; ---- Normal contiguous loads
+;; -------------------------------------------------------------------------
+;; Includes contiguous forms of:
+;; - LD1B
+;; - LD1D
+;; - LD1H
+;; - LD1W
+;; - LD2B
+;; - LD2D
+;; - LD2H
+;; - LD2W
+;; - LD3B
+;; - LD3D
+;; - LD3H
+;; - LD3W
+;; - LD4B
+;; - LD4D
+;; - LD4H
+;; - LD4W
+;; -------------------------------------------------------------------------
+
+;; Predicated LD1.
+(define_insn "maskload<mode><vpred>"
+ [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
+ (unspec:SVE_ALL
+ [(match_operand:<VPRED> 2 "register_operand" "Upl")
+ (match_operand:SVE_ALL 1 "memory_operand" "m")]
+ UNSPEC_LD1_SVE))]
"TARGET_SVE"
- {
- rtx tmp = gen_reg_rtx (<MODE>mode);
- emit_insn (gen_aarch64_sve_dup<mode>_const (tmp, operands[1],
- CONST1_RTX (<MODE>mode),
- CONST0_RTX (<MODE>mode)));
- emit_insn (gen_vec_extract<mode><Vel> (operands[0], tmp, operands[2]));
- DONE;
- }
+ "ld1<Vesize>\t%0.<Vetype>, %2/z, %1"
)
-(define_expand "vec_extract<mode><Vel>"
- [(set (match_operand:<VEL> 0 "register_operand")
- (vec_select:<VEL>
- (match_operand:SVE_ALL 1 "register_operand")
- (parallel [(match_operand:SI 2 "nonmemory_operand")])))]
+;; Unpredicated LD[234].
+(define_expand "vec_load_lanes<mode><vsingle>"
+ [(set (match_operand:SVE_STRUCT 0 "register_operand")
+ (unspec:SVE_STRUCT
+ [(match_dup 2)
+ (match_operand:SVE_STRUCT 1 "memory_operand")]
+ UNSPEC_LDN))]
"TARGET_SVE"
{
- poly_int64 val;
- if (poly_int_rtx_p (operands[2], &val)
- && known_eq (val, GET_MODE_NUNITS (<MODE>mode) - 1))
- {
- /* The last element can be extracted with a LASTB and a false
- predicate. */
- rtx sel = aarch64_pfalse_reg (<VPRED>mode);
- emit_insn (gen_extract_last_<mode> (operands[0], sel, operands[1]));
- DONE;
- }
- if (!CONST_INT_P (operands[2]))
- {
- /* Create an index with operand[2] as the base and -1 as the step.
- It will then be zero for the element we care about. */
- rtx index = gen_lowpart (<VEL_INT>mode, operands[2]);
- index = force_reg (<VEL_INT>mode, index);
- rtx series = gen_reg_rtx (<V_INT_EQUIV>mode);
- emit_insn (gen_vec_series<v_int_equiv> (series, index, constm1_rtx));
-
- /* Get a predicate that is true for only that element. */
- rtx zero = CONST0_RTX (<V_INT_EQUIV>mode);
- rtx cmp = gen_rtx_EQ (<V_INT_EQUIV>mode, series, zero);
- rtx sel = gen_reg_rtx (<VPRED>mode);
- emit_insn (gen_vec_cmp<v_int_equiv><vpred> (sel, cmp, series, zero));
-
- /* Select the element using LASTB. */
- emit_insn (gen_extract_last_<mode> (operands[0], sel, operands[1]));
- DONE;
- }
+ operands[2] = aarch64_ptrue_reg (<VPRED>mode);
}
)
-;; Extract element zero. This is a special case because we want to force
-;; the registers to be the same for the second alternative, and then
-;; split the instruction into nothing after RA.
-(define_insn_and_split "*vec_extract<mode><Vel>_0"
- [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv")
- (vec_select:<VEL>
- (match_operand:SVE_ALL 1 "register_operand" "w, 0, w")
- (parallel [(const_int 0)])))]
+;; Predicated LD[234].
+(define_insn "vec_mask_load_lanes<mode><vsingle>"
+ [(set (match_operand:SVE_STRUCT 0 "register_operand" "=w")
+ (unspec:SVE_STRUCT
+ [(match_operand:<VPRED> 2 "register_operand" "Upl")
+ (match_operand:SVE_STRUCT 1 "memory_operand" "m")]
+ UNSPEC_LDN))]
"TARGET_SVE"
- {
- operands[1] = gen_rtx_REG (<V128>mode, REGNO (operands[1]));
- switch (which_alternative)
- {
- case 0:
- return "umov\\t%<vwcore>0, %1.<Vetype>[0]";
- case 1:
- return "#";
- case 2:
- return "st1\\t{%1.<Vetype>}[0], %0";
- default:
- gcc_unreachable ();
- }
- }
- "&& reload_completed
- && REG_P (operands[0])
- && REGNO (operands[0]) == REGNO (operands[1])"
- [(const_int 0)]
- {
- emit_note (NOTE_INSN_DELETED);
- DONE;
- }
- [(set_attr "type" "neon_to_gp_q, untyped, neon_store1_one_lane_q")]
+ "ld<vector_count><Vesize>\t%0, %2/z, %1"
)
-;; Extract an element from the Advanced SIMD portion of the register.
-;; We don't just reuse the aarch64-simd.md pattern because we don't
-;; want any change in lane number on big-endian targets.
-(define_insn "*vec_extract<mode><Vel>_v128"
- [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv")
- (vec_select:<VEL>
- (match_operand:SVE_ALL 1 "register_operand" "w, w, w")
- (parallel [(match_operand:SI 2 "const_int_operand")])))]
- "TARGET_SVE
- && IN_RANGE (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode), 1, 15)"
+;; -------------------------------------------------------------------------
+;; ---- Normal gather loads
+;; -------------------------------------------------------------------------
+;; Includes gather forms of:
+;; - LD1D
+;; - LD1W
+;; -------------------------------------------------------------------------
+
+;; Unpredicated gather loads.
+(define_expand "gather_load<mode>"
+ [(set (match_operand:SVE_SD 0 "register_operand")
+ (unspec:SVE_SD
+ [(match_dup 5)
+ (match_operand:DI 1 "aarch64_reg_or_zero")
+ (match_operand:<V_INT_EQUIV> 2 "register_operand")
+ (match_operand:DI 3 "const_int_operand")
+ (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>")
+ (mem:BLK (scratch))]
+ UNSPEC_LD1_GATHER))]
+ "TARGET_SVE"
{
- operands[1] = gen_rtx_REG (<V128>mode, REGNO (operands[1]));
- switch (which_alternative)
- {
- case 0:
- return "umov\\t%<vwcore>0, %1.<Vetype>[%2]";
- case 1:
- return "dup\\t%<Vetype>0, %1.<Vetype>[%2]";
- case 2:
- return "st1\\t{%1.<Vetype>}[%2], %0";
- default:
- gcc_unreachable ();
- }
+ operands[5] = aarch64_ptrue_reg (<VPRED>mode);
}
- [(set_attr "type" "neon_to_gp_q, neon_dup_q, neon_store1_one_lane_q")]
)
-;; Extract an element in the range of DUP. This pattern allows the
-;; source and destination to be different.
-(define_insn "*vec_extract<mode><Vel>_dup"
- [(set (match_operand:<VEL> 0 "register_operand" "=w")
- (vec_select:<VEL>
- (match_operand:SVE_ALL 1 "register_operand" "w")
- (parallel [(match_operand:SI 2 "const_int_operand")])))]
- "TARGET_SVE
- && IN_RANGE (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode), 16, 63)"
+;; Predicated gather loads for 32-bit elements. Operand 3 is true for
+;; unsigned extension and false for signed extension.
+(define_insn "mask_gather_load<mode>"
+ [(set (match_operand:SVE_S 0 "register_operand" "=w, w, w, w, w")
+ (unspec:SVE_S
+ [(match_operand:<VPRED> 5 "register_operand" "Upl, Upl, Upl, Upl, Upl")
+ (match_operand:DI 1 "aarch64_reg_or_zero" "Z, rk, rk, rk, rk")
+ (match_operand:<V_INT_EQUIV> 2 "register_operand" "w, w, w, w, w")
+ (match_operand:DI 3 "const_int_operand" "i, Z, Ui1, Z, Ui1")
+ (match_operand:DI 4 "aarch64_gather_scale_operand_w" "Ui1, Ui1, Ui1, i, i")
+ (mem:BLK (scratch))]
+ UNSPEC_LD1_GATHER))]
+ "TARGET_SVE"
+ "@
+ ld1w\t%0.s, %5/z, [%2.s]
+ ld1w\t%0.s, %5/z, [%1, %2.s, sxtw]
+ ld1w\t%0.s, %5/z, [%1, %2.s, uxtw]
+ ld1w\t%0.s, %5/z, [%1, %2.s, sxtw %p4]
+ ld1w\t%0.s, %5/z, [%1, %2.s, uxtw %p4]"
+)
+
+;; Predicated gather loads for 64-bit elements. The value of operand 3
+;; doesn't matter in this case.
+(define_insn "mask_gather_load<mode>"
+ [(set (match_operand:SVE_D 0 "register_operand" "=w, w, w")
+ (unspec:SVE_D
+ [(match_operand:<VPRED> 5 "register_operand" "Upl, Upl, Upl")
+ (match_operand:DI 1 "aarch64_reg_or_zero" "Z, rk, rk")
+ (match_operand:<V_INT_EQUIV> 2 "register_operand" "w, w, w")
+ (match_operand:DI 3 "const_int_operand")
+ (match_operand:DI 4 "aarch64_gather_scale_operand_d" "Ui1, Ui1, i")
+ (mem:BLK (scratch))]
+ UNSPEC_LD1_GATHER))]
+ "TARGET_SVE"
+ "@
+ ld1d\t%0.d, %5/z, [%2.d]
+ ld1d\t%0.d, %5/z, [%1, %2.d]
+ ld1d\t%0.d, %5/z, [%1, %2.d, lsl %p4]"
+)
+
+;; =========================================================================
+;; == Stores
+;; =========================================================================
+
+;; -------------------------------------------------------------------------
+;; ---- Normal contiguous stores
+;; -------------------------------------------------------------------------
+;; Includes contiguous forms of:
+;; - ST1B
+;; - ST1D
+;; - ST1H
+;; - ST1W
+;; - ST2B
+;; - ST2D
+;; - ST2H
+;; - ST2W
+;; - ST3B
+;; - ST3D
+;; - ST3H
+;; - ST3W
+;; - ST4B
+;; - ST4D
+;; - ST4H
+;; - ST4W
+;; -------------------------------------------------------------------------
+
+;; Predicated ST1.
+(define_insn "maskstore<mode><vpred>"
+ [(set (match_operand:SVE_ALL 0 "memory_operand" "+m")
+ (unspec:SVE_ALL [(match_operand:<VPRED> 2 "register_operand" "Upl")
+ (match_operand:SVE_ALL 1 "register_operand" "w")
+ (match_dup 0)]
+ UNSPEC_ST1_SVE))]
+ "TARGET_SVE"
+ "st1<Vesize>\t%1.<Vetype>, %2, %0"
+)
+
+;; Unpredicated ST[234]. This is always a full update, so the dependence
+;; on the old value of the memory location (via (match_dup 0)) is redundant.
+;; There doesn't seem to be any obvious benefit to treating the all-true
+;; case differently though. In particular, it's very unlikely that we'll
+;; only find out during RTL that a store_lanes is dead.
+(define_expand "vec_store_lanes<mode><vsingle>"
+ [(set (match_operand:SVE_STRUCT 0 "memory_operand")
+ (unspec:SVE_STRUCT
+ [(match_dup 2)
+ (match_operand:SVE_STRUCT 1 "register_operand")
+ (match_dup 0)]
+ UNSPEC_STN))]
+ "TARGET_SVE"
{
- operands[0] = gen_rtx_REG (<MODE>mode, REGNO (operands[0]));
- return "dup\t%0.<Vetype>, %1.<Vetype>[%2]";
+ operands[2] = aarch64_ptrue_reg (<VPRED>mode);
}
)
-;; Extract an element outside the range of DUP. This pattern requires the
-;; source and destination to be the same.
-(define_insn "*vec_extract<mode><Vel>_ext"
- [(set (match_operand:<VEL> 0 "register_operand" "=w")
- (vec_select:<VEL>
- (match_operand:SVE_ALL 1 "register_operand" "0")
- (parallel [(match_operand:SI 2 "const_int_operand")])))]
- "TARGET_SVE && INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode) >= 64"
+;; Predicated ST[234].
+(define_insn "vec_mask_store_lanes<mode><vsingle>"
+ [(set (match_operand:SVE_STRUCT 0 "memory_operand" "+m")
+ (unspec:SVE_STRUCT
+ [(match_operand:<VPRED> 2 "register_operand" "Upl")
+ (match_operand:SVE_STRUCT 1 "register_operand" "w")
+ (match_dup 0)]
+ UNSPEC_STN))]
+ "TARGET_SVE"
+ "st<vector_count><Vesize>\t%1, %2, %0"
+)
+
+;; -------------------------------------------------------------------------
+;; ---- Normal scatter stores
+;; -------------------------------------------------------------------------
+;; Includes scatter forms of:
+;; - ST1D
+;; - ST1W
+;; -------------------------------------------------------------------------
+
+;; Unpredicated scatter stores.
+(define_expand "scatter_store<mode>"
+ [(set (mem:BLK (scratch))
+ (unspec:BLK
+ [(match_dup 5)
+ (match_operand:DI 0 "aarch64_reg_or_zero")
+ (match_operand:<V_INT_EQUIV> 1 "register_operand")
+ (match_operand:DI 2 "const_int_operand")
+ (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>")
+ (match_operand:SVE_SD 4 "register_operand")]
+ UNSPEC_ST1_SCATTER))]
+ "TARGET_SVE"
{
- operands[0] = gen_rtx_REG (<MODE>mode, REGNO (operands[0]));
- operands[2] = GEN_INT (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode));
- return "ext\t%0.b, %0.b, %0.b, #%2";
+ operands[5] = aarch64_ptrue_reg (<VPRED>mode);
}
)
-;; Extract the last active element of operand 1 into operand 0.
-;; If no elements are active, extract the last inactive element instead.
-(define_insn "extract_last_<mode>"
- [(set (match_operand:<VEL> 0 "register_operand" "=r, w")
- (unspec:<VEL>
- [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
- (match_operand:SVE_ALL 2 "register_operand" "w, w")]
- UNSPEC_LASTB))]
+;; Predicated scatter stores for 32-bit elements. Operand 2 is true for
+;; unsigned extension and false for signed extension.
+(define_insn "mask_scatter_store<mode>"
+ [(set (mem:BLK (scratch))
+ (unspec:BLK
+ [(match_operand:<VPRED> 5 "register_operand" "Upl, Upl, Upl, Upl, Upl")
+ (match_operand:DI 0 "aarch64_reg_or_zero" "Z, rk, rk, rk, rk")
+ (match_operand:<V_INT_EQUIV> 1 "register_operand" "w, w, w, w, w")
+ (match_operand:DI 2 "const_int_operand" "i, Z, Ui1, Z, Ui1")
+ (match_operand:DI 3 "aarch64_gather_scale_operand_w" "Ui1, Ui1, Ui1, i, i")
+ (match_operand:SVE_S 4 "register_operand" "w, w, w, w, w")]
+ UNSPEC_ST1_SCATTER))]
"TARGET_SVE"
"@
- lastb\t%<vwcore>0, %1, %2.<Vetype>
- lastb\t%<Vetype>0, %1, %2.<Vetype>"
+ st1w\t%4.s, %5, [%1.s]
+ st1w\t%4.s, %5, [%0, %1.s, sxtw]
+ st1w\t%4.s, %5, [%0, %1.s, uxtw]
+ st1w\t%4.s, %5, [%0, %1.s, sxtw %p3]
+ st1w\t%4.s, %5, [%0, %1.s, uxtw %p3]"
+)
+
+;; Predicated scatter stores for 64-bit elements. The value of operand 2
+;; doesn't matter in this case.
+(define_insn "mask_scatter_store<mode>"
+ [(set (mem:BLK (scratch))
+ (unspec:BLK
+ [(match_operand:<VPRED> 5 "register_operand" "Upl, Upl, Upl")
+ (match_operand:DI 0 "aarch64_reg_or_zero" "Z, rk, rk")
+ (match_operand:<V_INT_EQUIV> 1 "register_operand" "w, w, w")
+ (match_operand:DI 2 "const_int_operand")
+ (match_operand:DI 3 "aarch64_gather_scale_operand_d" "Ui1, Ui1, i")
+ (match_operand:SVE_D 4 "register_operand" "w, w, w")]
+ UNSPEC_ST1_SCATTER))]
+ "TARGET_SVE"
+ "@
+ st1d\t%4.d, %5, [%1.d]
+ st1d\t%4.d, %5, [%0, %1.d]
+ st1d\t%4.d, %5, [%0, %1.d, lsl %p3]"
)
+;; =========================================================================
+;; == Vector creation
+;; =========================================================================
+
+;; -------------------------------------------------------------------------
+;; ---- [INT,FP] Duplicate element
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - MOV
+;; - LD1RB
+;; - LD1RD
+;; - LD1RH
+;; - LD1RW
+;; - LD1RQB
+;; - LD1RQD
+;; - LD1RQH
+;; - LD1RQW
+;; -------------------------------------------------------------------------
+
(define_expand "vec_duplicate<mode>"
[(parallel
[(set (match_operand:SVE_ALL 0 "register_operand")
"ld1rq<Vesize>\t%0.<Vetype>, %1/z, %2"
)
-;; Implement a predicate broadcast by shifting the low bit of the scalar
-;; input into the top bit and using a WHILELO. An alternative would be to
-;; duplicate the input and do a compare with zero.
-(define_expand "vec_duplicate<mode>"
- [(set (match_operand:PRED_ALL 0 "register_operand")
- (vec_duplicate:PRED_ALL (match_operand 1 "register_operand")))]
+;; -------------------------------------------------------------------------
+;; ---- [INT,FP] Initialize from individual elements
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - INSR
+;; -------------------------------------------------------------------------
+
+(define_expand "vec_init<mode><Vel>"
+ [(match_operand:SVE_ALL 0 "register_operand")
+ (match_operand 1 "")]
"TARGET_SVE"
{
- rtx tmp = gen_reg_rtx (DImode);
- rtx op1 = gen_lowpart (DImode, operands[1]);
- emit_insn (gen_ashldi3 (tmp, op1, gen_int_mode (63, DImode)));
- emit_insn (gen_while_ultdi<mode> (operands[0], const0_rtx, tmp));
+ aarch64_sve_expand_vector_init (operands[0], operands[1]);
DONE;
}
)
-(define_insn "vec_series<mode>"
- [(set (match_operand:SVE_I 0 "register_operand" "=w, w, w")
- (vec_series:SVE_I
- (match_operand:<VEL> 1 "aarch64_sve_index_operand" "Usi, r, r")
- (match_operand:<VEL> 2 "aarch64_sve_index_operand" "r, Usi, r")))]
- "TARGET_SVE"
- "@
- index\t%0.<Vetype>, #%1, %<vw>2
+;; Shift an SVE vector left and insert a scalar into element 0.
+(define_insn "vec_shl_insert_<mode>"
+ [(set (match_operand:SVE_ALL 0 "register_operand" "=w, w")
+ (unspec:SVE_ALL
+ [(match_operand:SVE_ALL 1 "register_operand" "0, 0")
+ (match_operand:<VEL> 2 "register_operand" "rZ, w")]
+ UNSPEC_INSR))]
+ "TARGET_SVE"
+ "@
+ insr\t%0.<Vetype>, %<vwcore>2
+ insr\t%0.<Vetype>, %<Vetype>2"
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [INT] Linear series
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - INDEX
+;; -------------------------------------------------------------------------
+
+(define_insn "vec_series<mode>"
+ [(set (match_operand:SVE_I 0 "register_operand" "=w, w, w")
+ (vec_series:SVE_I
+ (match_operand:<VEL> 1 "aarch64_sve_index_operand" "Usi, r, r")
+ (match_operand:<VEL> 2 "aarch64_sve_index_operand" "r, Usi, r")))]
+ "TARGET_SVE"
+ "@
+ index\t%0.<Vetype>, #%1, %<vw>2
index\t%0.<Vetype>, %<vw>1, #%2
index\t%0.<Vetype>, %<vw>1, %<vw>2"
)
}
)
-;; Unpredicated LD[234].
-(define_expand "vec_load_lanes<mode><vsingle>"
- [(set (match_operand:SVE_STRUCT 0 "register_operand")
- (unspec:SVE_STRUCT
- [(match_dup 2)
- (match_operand:SVE_STRUCT 1 "memory_operand")]
- UNSPEC_LDN))]
+;; -------------------------------------------------------------------------
+;; ---- [PRED] Duplicate element
+;; -------------------------------------------------------------------------
+;; The patterns in this section are synthetic.
+;; -------------------------------------------------------------------------
+
+;; Implement a predicate broadcast by shifting the low bit of the scalar
+;; input into the top bit and using a WHILELO. An alternative would be to
+;; duplicate the input and do a compare with zero.
+(define_expand "vec_duplicate<mode>"
+ [(set (match_operand:PRED_ALL 0 "register_operand")
+ (vec_duplicate:PRED_ALL (match_operand 1 "register_operand")))]
"TARGET_SVE"
{
- operands[2] = aarch64_ptrue_reg (<VPRED>mode);
+ rtx tmp = gen_reg_rtx (DImode);
+ rtx op1 = gen_lowpart (DImode, operands[1]);
+ emit_insn (gen_ashldi3 (tmp, op1, gen_int_mode (63, DImode)));
+ emit_insn (gen_while_ultdi<mode> (operands[0], const0_rtx, tmp));
+ DONE;
}
)
-;; Predicated LD[234].
-(define_insn "vec_mask_load_lanes<mode><vsingle>"
- [(set (match_operand:SVE_STRUCT 0 "register_operand" "=w")
- (unspec:SVE_STRUCT
- [(match_operand:<VPRED> 2 "register_operand" "Upl")
- (match_operand:SVE_STRUCT 1 "memory_operand" "m")]
- UNSPEC_LDN))]
- "TARGET_SVE"
- "ld<vector_count><Vesize>\t%0, %2/z, %1"
-)
+;; =========================================================================
+;; == Vector decomposition
+;; =========================================================================
-;; Unpredicated ST[234]. This is always a full update, so the dependence
-;; on the old value of the memory location (via (match_dup 0)) is redundant.
-;; There doesn't seem to be any obvious benefit to treating the all-true
-;; case differently though. In particular, it's very unlikely that we'll
-;; only find out during RTL that a store_lanes is dead.
-(define_expand "vec_store_lanes<mode><vsingle>"
- [(set (match_operand:SVE_STRUCT 0 "memory_operand")
- (unspec:SVE_STRUCT
- [(match_dup 2)
- (match_operand:SVE_STRUCT 1 "register_operand")
- (match_dup 0)]
- UNSPEC_STN))]
+;; -------------------------------------------------------------------------
+;; ---- [INT,FP] Extract index
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - DUP (Advanced SIMD)
+;; - DUP (SVE)
+;; - EXT (SVE)
+;; - ST1 (Advanced SIMD)
+;; - UMOV (Advanced SIMD)
+;; -------------------------------------------------------------------------
+
+(define_expand "vec_extract<mode><Vel>"
+ [(set (match_operand:<VEL> 0 "register_operand")
+ (vec_select:<VEL>
+ (match_operand:SVE_ALL 1 "register_operand")
+ (parallel [(match_operand:SI 2 "nonmemory_operand")])))]
"TARGET_SVE"
{
- operands[2] = aarch64_ptrue_reg (<VPRED>mode);
+ poly_int64 val;
+ if (poly_int_rtx_p (operands[2], &val)
+ && known_eq (val, GET_MODE_NUNITS (<MODE>mode) - 1))
+ {
+ /* The last element can be extracted with a LASTB and a false
+ predicate. */
+ rtx sel = aarch64_pfalse_reg (<VPRED>mode);
+ emit_insn (gen_extract_last_<mode> (operands[0], sel, operands[1]));
+ DONE;
+ }
+ if (!CONST_INT_P (operands[2]))
+ {
+ /* Create an index with operand[2] as the base and -1 as the step.
+ It will then be zero for the element we care about. */
+ rtx index = gen_lowpart (<VEL_INT>mode, operands[2]);
+ index = force_reg (<VEL_INT>mode, index);
+ rtx series = gen_reg_rtx (<V_INT_EQUIV>mode);
+ emit_insn (gen_vec_series<v_int_equiv> (series, index, constm1_rtx));
+
+ /* Get a predicate that is true for only that element. */
+ rtx zero = CONST0_RTX (<V_INT_EQUIV>mode);
+ rtx cmp = gen_rtx_EQ (<V_INT_EQUIV>mode, series, zero);
+ rtx sel = gen_reg_rtx (<VPRED>mode);
+ emit_insn (gen_vec_cmp<v_int_equiv><vpred> (sel, cmp, series, zero));
+
+ /* Select the element using LASTB. */
+ emit_insn (gen_extract_last_<mode> (operands[0], sel, operands[1]));
+ DONE;
+ }
}
)
-;; Predicated ST[234].
-(define_insn "vec_mask_store_lanes<mode><vsingle>"
- [(set (match_operand:SVE_STRUCT 0 "memory_operand" "+m")
- (unspec:SVE_STRUCT
- [(match_operand:<VPRED> 2 "register_operand" "Upl")
- (match_operand:SVE_STRUCT 1 "register_operand" "w")
- (match_dup 0)]
- UNSPEC_STN))]
+;; Extract element zero. This is a special case because we want to force
+;; the registers to be the same for the second alternative, and then
+;; split the instruction into nothing after RA.
+(define_insn_and_split "*vec_extract<mode><Vel>_0"
+ [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv")
+ (vec_select:<VEL>
+ (match_operand:SVE_ALL 1 "register_operand" "w, 0, w")
+ (parallel [(const_int 0)])))]
"TARGET_SVE"
- "st<vector_count><Vesize>\t%1, %2, %0"
+ {
+ operands[1] = gen_rtx_REG (<V128>mode, REGNO (operands[1]));
+ switch (which_alternative)
+ {
+ case 0:
+ return "umov\\t%<vwcore>0, %1.<Vetype>[0]";
+ case 1:
+ return "#";
+ case 2:
+ return "st1\\t{%1.<Vetype>}[0], %0";
+ default:
+ gcc_unreachable ();
+ }
+ }
+ "&& reload_completed
+ && REG_P (operands[0])
+ && REGNO (operands[0]) == REGNO (operands[1])"
+ [(const_int 0)]
+ {
+ emit_note (NOTE_INSN_DELETED);
+ DONE;
+ }
+ [(set_attr "type" "neon_to_gp_q, untyped, neon_store1_one_lane_q")]
)
-(define_expand "vec_perm<mode>"
- [(match_operand:SVE_ALL 0 "register_operand")
- (match_operand:SVE_ALL 1 "register_operand")
- (match_operand:SVE_ALL 2 "register_operand")
- (match_operand:<V_INT_EQUIV> 3 "aarch64_sve_vec_perm_operand")]
- "TARGET_SVE && GET_MODE_NUNITS (<MODE>mode).is_constant ()"
+;; Extract an element from the Advanced SIMD portion of the register.
+;; We don't just reuse the aarch64-simd.md pattern because we don't
+;; want any change in lane number on big-endian targets.
+(define_insn "*vec_extract<mode><Vel>_v128"
+ [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv")
+ (vec_select:<VEL>
+ (match_operand:SVE_ALL 1 "register_operand" "w, w, w")
+ (parallel [(match_operand:SI 2 "const_int_operand")])))]
+ "TARGET_SVE
+ && IN_RANGE (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode), 1, 15)"
{
- aarch64_expand_sve_vec_perm (operands[0], operands[1],
- operands[2], operands[3]);
- DONE;
+ operands[1] = gen_rtx_REG (<V128>mode, REGNO (operands[1]));
+ switch (which_alternative)
+ {
+ case 0:
+ return "umov\\t%<vwcore>0, %1.<Vetype>[%2]";
+ case 1:
+ return "dup\\t%<Vetype>0, %1.<Vetype>[%2]";
+ case 2:
+ return "st1\\t{%1.<Vetype>}[%2], %0";
+ default:
+ gcc_unreachable ();
+ }
}
+ [(set_attr "type" "neon_to_gp_q, neon_dup_q, neon_store1_one_lane_q")]
)
-(define_insn "*aarch64_sve_tbl<mode>"
- [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
- (unspec:SVE_ALL
- [(match_operand:SVE_ALL 1 "register_operand" "w")
- (match_operand:<V_INT_EQUIV> 2 "register_operand" "w")]
- UNSPEC_TBL))]
- "TARGET_SVE"
- "tbl\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
+;; Extract an element in the range of DUP. This pattern allows the
+;; source and destination to be different.
+(define_insn "*vec_extract<mode><Vel>_dup"
+ [(set (match_operand:<VEL> 0 "register_operand" "=w")
+ (vec_select:<VEL>
+ (match_operand:SVE_ALL 1 "register_operand" "w")
+ (parallel [(match_operand:SI 2 "const_int_operand")])))]
+ "TARGET_SVE
+ && IN_RANGE (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode), 16, 63)"
+ {
+ operands[0] = gen_rtx_REG (<MODE>mode, REGNO (operands[0]));
+ return "dup\t%0.<Vetype>, %1.<Vetype>[%2]";
+ }
)
-(define_insn "*aarch64_sve_<perm_insn><perm_hilo><mode>"
- [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
- (unspec:PRED_ALL [(match_operand:PRED_ALL 1 "register_operand" "Upa")
- (match_operand:PRED_ALL 2 "register_operand" "Upa")]
- PERMUTE))]
- "TARGET_SVE"
- "<perm_insn><perm_hilo>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
+;; Extract an element outside the range of DUP. This pattern requires the
+;; source and destination to be the same.
+(define_insn "*vec_extract<mode><Vel>_ext"
+ [(set (match_operand:<VEL> 0 "register_operand" "=w")
+ (vec_select:<VEL>
+ (match_operand:SVE_ALL 1 "register_operand" "0")
+ (parallel [(match_operand:SI 2 "const_int_operand")])))]
+ "TARGET_SVE && INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode) >= 64"
+ {
+ operands[0] = gen_rtx_REG (<MODE>mode, REGNO (operands[0]));
+ operands[2] = GEN_INT (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode));
+ return "ext\t%0.b, %0.b, %0.b, #%2";
+ }
)
-(define_insn "aarch64_sve_<perm_insn><perm_hilo><mode>"
- [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
- (unspec:SVE_ALL [(match_operand:SVE_ALL 1 "register_operand" "w")
- (match_operand:SVE_ALL 2 "register_operand" "w")]
- PERMUTE))]
+;; -------------------------------------------------------------------------
+;; ---- [INT,FP] Extract active element
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - LASTB
+;; -------------------------------------------------------------------------
+
+;; Extract the last active element of operand 1 into operand 0.
+;; If no elements are active, extract the last inactive element instead.
+(define_insn "extract_last_<mode>"
+ [(set (match_operand:<VEL> 0 "register_operand" "=r, w")
+ (unspec:<VEL>
+ [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+ (match_operand:SVE_ALL 2 "register_operand" "w, w")]
+ UNSPEC_LASTB))]
"TARGET_SVE"
- "<perm_insn><perm_hilo>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
+ "@
+ lastb\t%<vwcore>0, %1, %2.<Vetype>
+ lastb\t%<Vetype>0, %1, %2.<Vetype>"
)
-(define_insn "*aarch64_sve_rev64<mode>"
- [(set (match_operand:SVE_BHS 0 "register_operand" "=w")
- (unspec:SVE_BHS
- [(match_operand:VNx2BI 1 "register_operand" "Upl")
- (unspec:SVE_BHS [(match_operand:SVE_BHS 2 "register_operand" "w")]
- UNSPEC_REV64)]
- UNSPEC_MERGE_PTRUE))]
+;; -------------------------------------------------------------------------
+;; ---- [PRED] Extract index
+;; -------------------------------------------------------------------------
+;; The patterns in this section are synthetic.
+;; -------------------------------------------------------------------------
+
+;; Handle extractions from a predicate by converting to an integer vector
+;; and extracting from there.
+(define_expand "vec_extract<vpred><Vel>"
+ [(match_operand:<VEL> 0 "register_operand")
+ (match_operand:<VPRED> 1 "register_operand")
+ (match_operand:SI 2 "nonmemory_operand")
+ ;; Dummy operand to which we can attach the iterator.
+ (reg:SVE_I V0_REGNUM)]
"TARGET_SVE"
- "rev<Vesize>\t%0.d, %1/m, %2.d"
+ {
+ rtx tmp = gen_reg_rtx (<MODE>mode);
+ emit_insn (gen_aarch64_sve_dup<mode>_const (tmp, operands[1],
+ CONST1_RTX (<MODE>mode),
+ CONST0_RTX (<MODE>mode)));
+ emit_insn (gen_vec_extract<mode><Vel> (operands[0], tmp, operands[2]));
+ DONE;
+ }
)
-(define_insn "*aarch64_sve_rev32<mode>"
- [(set (match_operand:SVE_BH 0 "register_operand" "=w")
- (unspec:SVE_BH
- [(match_operand:VNx4BI 1 "register_operand" "Upl")
- (unspec:SVE_BH [(match_operand:SVE_BH 2 "register_operand" "w")]
- UNSPEC_REV32)]
+;; =========================================================================
+;; == Unary arithmetic
+;; =========================================================================
+
+;; -------------------------------------------------------------------------
+;; ---- [INT] General unary arithmetic corresponding to rtx codes
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - ABS
+;; - CNT (= popcount)
+;; - NEG
+;; - NOT
+;; -------------------------------------------------------------------------
+
+;; Unpredicated integer unary arithmetic.
+(define_expand "<optab><mode>2"
+ [(set (match_operand:SVE_I 0 "register_operand")
+ (unspec:SVE_I
+ [(match_dup 2)
+ (SVE_INT_UNARY:SVE_I (match_operand:SVE_I 1 "register_operand"))]
UNSPEC_MERGE_PTRUE))]
"TARGET_SVE"
- "rev<Vesize>\t%0.s, %1/m, %2.s"
+ {
+ operands[2] = aarch64_ptrue_reg (<VPRED>mode);
+ }
)
-(define_insn "*aarch64_sve_rev16vnx16qi"
- [(set (match_operand:VNx16QI 0 "register_operand" "=w")
- (unspec:VNx16QI
- [(match_operand:VNx8BI 1 "register_operand" "Upl")
- (unspec:VNx16QI [(match_operand:VNx16QI 2 "register_operand" "w")]
- UNSPEC_REV16)]
+;; Integer unary arithmetic predicated with a PTRUE.
+(define_insn "*<optab><mode>2"
+ [(set (match_operand:SVE_I 0 "register_operand" "=w")
+ (unspec:SVE_I
+ [(match_operand:<VPRED> 1 "register_operand" "Upl")
+ (SVE_INT_UNARY:SVE_I
+ (match_operand:SVE_I 2 "register_operand" "w"))]
UNSPEC_MERGE_PTRUE))]
"TARGET_SVE"
- "revb\t%0.h, %1/m, %2.h"
+ "<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
)
-(define_insn "@aarch64_sve_rev<mode>"
- [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
- (unspec:SVE_ALL [(match_operand:SVE_ALL 1 "register_operand" "w")]
- UNSPEC_REV))]
+;; -------------------------------------------------------------------------
+;; ---- [FP] General unary arithmetic corresponding to rtx codes
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - FABS
+;; - FNEG
+;; - FSQRT
+;; -------------------------------------------------------------------------
+
+;; Unpredicated floating-point unary operations.
+(define_expand "<optab><mode>2"
+ [(set (match_operand:SVE_F 0 "register_operand")
+ (unspec:SVE_F
+ [(match_dup 2)
+ (SVE_FP_UNARY:SVE_F (match_operand:SVE_F 1 "register_operand"))]
+ UNSPEC_MERGE_PTRUE))]
"TARGET_SVE"
- "rev\t%0.<Vetype>, %1.<Vetype>")
+ {
+ operands[2] = aarch64_ptrue_reg (<VPRED>mode);
+ }
+)
-(define_insn "*aarch64_sve_dup_lane<mode>"
- [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
- (vec_duplicate:SVE_ALL
- (vec_select:<VEL>
- (match_operand:SVE_ALL 1 "register_operand" "w")
- (parallel [(match_operand:SI 2 "const_int_operand")]))))]
- "TARGET_SVE
- && IN_RANGE (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode), 0, 63)"
- "dup\t%0.<Vetype>, %1.<Vetype>[%2]"
+;; Predicated floating-point unary operations.
+(define_insn "*<optab><mode>2"
+ [(set (match_operand:SVE_F 0 "register_operand" "=w")
+ (unspec:SVE_F
+ [(match_operand:<VPRED> 1 "register_operand" "Upl")
+ (SVE_FP_UNARY:SVE_F (match_operand:SVE_F 2 "register_operand" "w"))]
+ UNSPEC_MERGE_PTRUE))]
+ "TARGET_SVE"
+ "<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
)
-;; Note that the immediate (third) operand is the lane index not
-;; the byte index.
-(define_insn "*aarch64_sve_ext<mode>"
- [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
- (unspec:SVE_ALL [(match_operand:SVE_ALL 1 "register_operand" "0")
- (match_operand:SVE_ALL 2 "register_operand" "w")
- (match_operand:SI 3 "const_int_operand")]
- UNSPEC_EXT))]
+;; -------------------------------------------------------------------------
+;; ---- [FP] Rounding
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - FRINTA
+;; - FRINTI
+;; - FRINTM
+;; - FRINTN
+;; - FRINTP
+;; - FRINTX
+;; - FRINTZ
+;; -------------------------------------------------------------------------
+
+;; Unpredicated FRINTy.
+(define_expand "<frint_pattern><mode>2"
+ [(set (match_operand:SVE_F 0 "register_operand")
+ (unspec:SVE_F
+ [(match_dup 2)
+ (unspec:SVE_F [(match_operand:SVE_F 1 "register_operand")]
+ FRINT)]
+ UNSPEC_MERGE_PTRUE))]
+ "TARGET_SVE"
+ {
+ operands[2] = aarch64_ptrue_reg (<VPRED>mode);
+ }
+)
+
+;; FRINTy predicated with a PTRUE.
+(define_insn "*<frint_pattern><mode>2"
+ [(set (match_operand:SVE_F 0 "register_operand" "=w")
+ (unspec:SVE_F
+ [(match_operand:<VPRED> 1 "register_operand" "Upl")
+ (unspec:SVE_F [(match_operand:SVE_F 2 "register_operand" "w")]
+ FRINT)]
+ UNSPEC_MERGE_PTRUE))]
+ "TARGET_SVE"
+ "frint<frint_suffix>\t%0.<Vetype>, %1/m, %2.<Vetype>"
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [PRED] Inverse
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - NOT
+;; -------------------------------------------------------------------------
+
+;; Unpredicated predicate inverse.
+(define_expand "one_cmpl<mode>2"
+ [(set (match_operand:PRED_ALL 0 "register_operand")
+ (and:PRED_ALL
+ (not:PRED_ALL (match_operand:PRED_ALL 1 "register_operand"))
+ (match_dup 2)))]
+ "TARGET_SVE"
+ {
+ operands[2] = aarch64_ptrue_reg (<MODE>mode);
+ }
+)
+
+;; Predicated predicate inverse.
+(define_insn "*one_cmpl<mode>3"
+ [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
+ (and:PRED_ALL
+ (not:PRED_ALL (match_operand:PRED_ALL 2 "register_operand" "Upa"))
+ (match_operand:PRED_ALL 1 "register_operand" "Upa")))]
+ "TARGET_SVE"
+ "not\t%0.b, %1/z, %2.b"
+)
+
+;; =========================================================================
+;; == Binary arithmetic
+;; =========================================================================
+
+;; -------------------------------------------------------------------------
+;; ---- [INT] General binary arithmetic corresponding to rtx codes
+;; -------------------------------------------------------------------------
+;; Includes merging patterns for:
+;; - ADD
+;; - AND
+;; - EOR
+;; - MUL
+;; - ORR
+;; - SMAX
+;; - SMIN
+;; - SUB
+;; - UMAX
+;; - UMIN
+;; -------------------------------------------------------------------------
+
+;; Predicated integer operations with merging.
+(define_expand "cond_<optab><mode>"
+ [(set (match_operand:SVE_I 0 "register_operand")
+ (unspec:SVE_I
+ [(match_operand:<VPRED> 1 "register_operand")
+ (SVE_INT_BINARY:SVE_I
+ (match_operand:SVE_I 2 "register_operand")
+ (match_operand:SVE_I 3 "register_operand"))
+ (match_operand:SVE_I 4 "aarch64_simd_reg_or_zero")]
+ UNSPEC_SEL))]
+ "TARGET_SVE"
+)
+
+;; Predicated integer operations, merging with the first input.
+(define_insn "*cond_<optab><mode>_2"
+ [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
+ (unspec:SVE_I
+ [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+ (SVE_INT_BINARY:SVE_I
+ (match_operand:SVE_I 2 "register_operand" "0, w")
+ (match_operand:SVE_I 3 "register_operand" "w, w"))
+ (match_dup 2)]
+ UNSPEC_SEL))]
+ "TARGET_SVE"
+ "@
+ <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+ movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
+ [(set_attr "movprfx" "*,yes")]
+)
+
+;; Predicated integer operations, merging with the second input.
+(define_insn "*cond_<optab><mode>_3"
+ [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
+ (unspec:SVE_I
+ [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+ (SVE_INT_BINARY:SVE_I
+ (match_operand:SVE_I 2 "register_operand" "w, w")
+ (match_operand:SVE_I 3 "register_operand" "0, w"))
+ (match_dup 3)]
+ UNSPEC_SEL))]
+ "TARGET_SVE"
+ "@
+ <sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
+ movprfx\t%0, %3\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
+ [(set_attr "movprfx" "*,yes")]
+)
+
+;; Predicated integer operations, merging with an independent value.
+(define_insn_and_rewrite "*cond_<optab><mode>_any"
+ [(set (match_operand:SVE_I 0 "register_operand" "=&w, &w, &w, &w, ?&w")
+ (unspec:SVE_I
+ [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl")
+ (SVE_INT_BINARY:SVE_I
+ (match_operand:SVE_I 2 "register_operand" "0, w, w, w, w")
+ (match_operand:SVE_I 3 "register_operand" "w, 0, w, w, w"))
+ (match_operand:SVE_I 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")]
+ UNSPEC_SEL))]
"TARGET_SVE
- && IN_RANGE (INTVAL (operands[3]) * GET_MODE_SIZE (<VEL>mode), 0, 255)"
+ && !rtx_equal_p (operands[2], operands[4])
+ && !rtx_equal_p (operands[3], operands[4])"
+ "@
+ movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+ movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
+ movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+ movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+ #"
+ "&& reload_completed
+ && register_operand (operands[4], <MODE>mode)
+ && !rtx_equal_p (operands[0], operands[4])"
{
- operands[3] = GEN_INT (INTVAL (operands[3]) * GET_MODE_SIZE (<VEL>mode));
- return "ext\\t%0.b, %0.b, %2.b, #%3";
+ emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
+ operands[4], operands[1]));
+ operands[4] = operands[2] = operands[0];
}
+ [(set_attr "movprfx" "yes")]
)
+;; -------------------------------------------------------------------------
+;; ---- [INT] Addition
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - ADD
+;; - DECB
+;; - DECD
+;; - DECH
+;; - DECW
+;; - INCB
+;; - INCD
+;; - INCH
+;; - INCW
+;; - SUB
+;; -------------------------------------------------------------------------
+
(define_insn "add<mode>3"
[(set (match_operand:SVE_I 0 "register_operand" "=w, w, w, w")
(plus:SVE_I
add\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
)
+;; Merging forms are handled through SVE_INT_BINARY.
+
+;; -------------------------------------------------------------------------
+;; ---- [INT] Subtraction
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - SUB
+;; - SUBR
+;; -------------------------------------------------------------------------
+
(define_insn "sub<mode>3"
[(set (match_operand:SVE_I 0 "register_operand" "=w, w")
(minus:SVE_I
subr\t%0.<Vetype>, %0.<Vetype>, #%D1"
)
+;; Merging forms are handled through SVE_INT_BINARY.
+
+;; -------------------------------------------------------------------------
+;; ---- [INT] Absolute difference
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - SABD
+;; - UABD
+;; -------------------------------------------------------------------------
+
+;; Unpredicated integer absolute difference.
+(define_expand "<su>abd<mode>_3"
+ [(use (match_operand:SVE_I 0 "register_operand"))
+ (USMAX:SVE_I (match_operand:SVE_I 1 "register_operand")
+ (match_operand:SVE_I 2 "register_operand"))]
+ "TARGET_SVE"
+ {
+ rtx pred = aarch64_ptrue_reg (<VPRED>mode);
+ emit_insn (gen_aarch64_<su>abd<mode>_3 (operands[0], pred, operands[1],
+ operands[2]));
+ DONE;
+ }
+)
+
+;; Predicated integer absolute difference.
+(define_insn "aarch64_<su>abd<mode>_3"
+ [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
+ (unspec:SVE_I
+ [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+ (minus:SVE_I
+ (USMAX:SVE_I
+ (match_operand:SVE_I 2 "register_operand" "0, w")
+ (match_operand:SVE_I 3 "register_operand" "w, w"))
+ (<max_opp>:SVE_I
+ (match_dup 2)
+ (match_dup 3)))]
+ UNSPEC_MERGE_PTRUE))]
+ "TARGET_SVE"
+ "@
+ <su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+ movprfx\t%0, %2\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
+ [(set_attr "movprfx" "*,yes")]
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [INT] Multiplication
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - MUL
+;; -------------------------------------------------------------------------
+
;; Unpredicated multiplication.
(define_expand "mul<mode>3"
[(set (match_operand:SVE_I 0 "register_operand")
"mul\t%0.<Vetype>, %0.<Vetype>, #%2"
)
-(define_insn "*madd<mode>"
- [(set (match_operand:SVE_I 0 "register_operand" "=w, w, ?&w")
- (plus:SVE_I
- (unspec:SVE_I
- [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
- (mult:SVE_I (match_operand:SVE_I 2 "register_operand" "%0, w, w")
- (match_operand:SVE_I 3 "register_operand" "w, w, w"))]
- UNSPEC_MERGE_PTRUE)
- (match_operand:SVE_I 4 "register_operand" "w, 0, w")))]
- "TARGET_SVE"
- "@
- mad\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
- mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
- movprfx\t%0, %4\;mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
- [(set_attr "movprfx" "*,*,yes")]
-)
+;; Merging forms are handled through SVE_INT_BINARY.
-(define_insn "*msub<mode>3"
- [(set (match_operand:SVE_I 0 "register_operand" "=w, w, ?&w")
- (minus:SVE_I
- (match_operand:SVE_I 4 "register_operand" "w, 0, w")
- (unspec:SVE_I
- [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
- (mult:SVE_I (match_operand:SVE_I 2 "register_operand" "%0, w, w")
- (match_operand:SVE_I 3 "register_operand" "w, w, w"))]
- UNSPEC_MERGE_PTRUE)))]
- "TARGET_SVE"
- "@
- msb\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
- mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
- movprfx\t%0, %4\;mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
- [(set_attr "movprfx" "*,*,yes")]
-)
+;; -------------------------------------------------------------------------
+;; ---- [INT] Highpart multiplication
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - SMULH
+;; - UMULH
+;; -------------------------------------------------------------------------
;; Unpredicated highpart multiplication.
(define_expand "<su>mul<mode>3_highpart"
[(set_attr "movprfx" "*,yes")]
)
-;; Unpredicated division.
+;; -------------------------------------------------------------------------
+;; ---- [INT] Division
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - SDIV
+;; - SDIVR
+;; - UDIV
+;; - UDIVR
+;; -------------------------------------------------------------------------
+
+;; Unpredicated integer division.
(define_expand "<optab><mode>3"
[(set (match_operand:SVE_SDI 0 "register_operand")
(unspec:SVE_SDI
}
)
-;; Division predicated with a PTRUE.
+;; Integer division predicated with a PTRUE.
(define_insn "*<optab><mode>3"
[(set (match_operand:SVE_SDI 0 "register_operand" "=w, w, ?&w")
(unspec:SVE_SDI
[(set_attr "movprfx" "*,*,yes")]
)
-;; Unpredicated NEG, NOT and POPCOUNT.
-(define_expand "<optab><mode>2"
- [(set (match_operand:SVE_I 0 "register_operand")
- (unspec:SVE_I
- [(match_dup 2)
- (SVE_INT_UNARY:SVE_I (match_operand:SVE_I 1 "register_operand"))]
- UNSPEC_MERGE_PTRUE))]
+;; Predicated integer division with merging.
+(define_expand "cond_<optab><mode>"
+ [(set (match_operand:SVE_SDI 0 "register_operand")
+ (unspec:SVE_SDI
+ [(match_operand:<VPRED> 1 "register_operand")
+ (SVE_INT_BINARY_SD:SVE_SDI
+ (match_operand:SVE_SDI 2 "register_operand")
+ (match_operand:SVE_SDI 3 "register_operand"))
+ (match_operand:SVE_SDI 4 "aarch64_simd_reg_or_zero")]
+ UNSPEC_SEL))]
"TARGET_SVE"
- {
- operands[2] = aarch64_ptrue_reg (<VPRED>mode);
- }
)
-;; NEG, NOT and POPCOUNT predicated with a PTRUE.
-(define_insn "*<optab><mode>2"
- [(set (match_operand:SVE_I 0 "register_operand" "=w")
- (unspec:SVE_I
- [(match_operand:<VPRED> 1 "register_operand" "Upl")
- (SVE_INT_UNARY:SVE_I
- (match_operand:SVE_I 2 "register_operand" "w"))]
- UNSPEC_MERGE_PTRUE))]
+;; Predicated integer division, merging with the first input.
+(define_insn "*cond_<optab><mode>_2"
+ [(set (match_operand:SVE_SDI 0 "register_operand" "=w, ?&w")
+ (unspec:SVE_SDI
+ [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+ (SVE_INT_BINARY_SD:SVE_SDI
+ (match_operand:SVE_SDI 2 "register_operand" "0, w")
+ (match_operand:SVE_SDI 3 "register_operand" "w, w"))
+ (match_dup 2)]
+ UNSPEC_SEL))]
"TARGET_SVE"
- "<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
+ "@
+ <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+ movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
+ [(set_attr "movprfx" "*,yes")]
+)
+
+;; Predicated integer division, merging with the second input.
+(define_insn "*cond_<optab><mode>_3"
+ [(set (match_operand:SVE_SDI 0 "register_operand" "=w, ?&w")
+ (unspec:SVE_SDI
+ [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+ (SVE_INT_BINARY_SD:SVE_SDI
+ (match_operand:SVE_SDI 2 "register_operand" "w, w")
+ (match_operand:SVE_SDI 3 "register_operand" "0, w"))
+ (match_dup 3)]
+ UNSPEC_SEL))]
+ "TARGET_SVE"
+ "@
+ <sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
+ movprfx\t%0, %3\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
+ [(set_attr "movprfx" "*,yes")]
+)
+
+;; Predicated integer division, merging with an independent value.
+(define_insn_and_rewrite "*cond_<optab><mode>_any"
+ [(set (match_operand:SVE_SDI 0 "register_operand" "=&w, &w, &w, &w, ?&w")
+ (unspec:SVE_SDI
+ [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl")
+ (SVE_INT_BINARY_SD:SVE_SDI
+ (match_operand:SVE_SDI 2 "register_operand" "0, w, w, w, w")
+ (match_operand:SVE_SDI 3 "register_operand" "w, 0, w, w, w"))
+ (match_operand:SVE_SDI 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")]
+ UNSPEC_SEL))]
+ "TARGET_SVE
+ && !rtx_equal_p (operands[2], operands[4])
+ && !rtx_equal_p (operands[3], operands[4])"
+ "@
+ movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+ movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
+ movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+ movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+ #"
+ "&& reload_completed
+ && register_operand (operands[4], <MODE>mode)
+ && !rtx_equal_p (operands[0], operands[4])"
+ {
+ emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
+ operands[4], operands[1]));
+ operands[4] = operands[2] = operands[0];
+ }
+ [(set_attr "movprfx" "yes")]
)
-;; Vector AND, ORR and XOR.
+;; -------------------------------------------------------------------------
+;; ---- [INT] Binary logical operations
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - AND
+;; - EOR
+;; - ORR
+;; -------------------------------------------------------------------------
+
+;; Unpredicated integer binary logical operations.
(define_insn "<optab><mode>3"
[(set (match_operand:SVE_I 0 "register_operand" "=w, w")
(LOGICAL:SVE_I
<logical>\t%0.d, %1.d, %2.d"
)
-;; Vector AND, ORR and XOR on floating-point modes. We avoid subregs
-;; by providing this, but we need to use UNSPECs since rtx logical ops
-;; aren't defined for floating-point modes.
-(define_insn "*<optab><mode>3"
- [(set (match_operand:SVE_F 0 "register_operand" "=w")
- (unspec:SVE_F [(match_operand:SVE_F 1 "register_operand" "w")
- (match_operand:SVE_F 2 "register_operand" "w")]
- LOGICALF))]
- "TARGET_SVE"
- "<logicalf_op>\t%0.d, %1.d, %2.d"
-)
+;; Merging forms are handled through SVE_INT_BINARY.
+
+;; -------------------------------------------------------------------------
+;; ---- [INT] Binary logical operations (inverted second input)
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - BIC
+;; -------------------------------------------------------------------------
;; REG_EQUAL notes on "not<mode>3" should ensure that we can generate
;; this pattern even though the NOT instruction itself is predicated.
"bic\t%0.d, %2.d, %1.d"
)
-;; Predicate AND. We can reuse one of the inputs as the GP.
-(define_insn "and<mode>3"
- [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
- (and:PRED_ALL (match_operand:PRED_ALL 1 "register_operand" "Upa")
- (match_operand:PRED_ALL 2 "register_operand" "Upa")))]
- "TARGET_SVE"
- "and\t%0.b, %1/z, %1.b, %2.b"
-)
-
-;; Unpredicated predicate ORR and XOR.
-(define_expand "<optab><mode>3"
- [(set (match_operand:PRED_ALL 0 "register_operand")
- (and:PRED_ALL
- (LOGICAL_OR:PRED_ALL
- (match_operand:PRED_ALL 1 "register_operand")
- (match_operand:PRED_ALL 2 "register_operand"))
- (match_dup 3)))]
- "TARGET_SVE"
- {
- operands[3] = aarch64_ptrue_reg (<MODE>mode);
- }
-)
-
-;; Predicated predicate ORR and XOR.
-(define_insn "pred_<optab><mode>3"
- [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
- (and:PRED_ALL
- (LOGICAL:PRED_ALL
- (match_operand:PRED_ALL 2 "register_operand" "Upa")
- (match_operand:PRED_ALL 3 "register_operand" "Upa"))
- (match_operand:PRED_ALL 1 "register_operand" "Upa")))]
- "TARGET_SVE"
- "<logical>\t%0.b, %1/z, %2.b, %3.b"
-)
-
-;; Perform a logical operation on operands 2 and 3, using operand 1 as
-;; the GP (which is known to be a PTRUE). Store the result in operand 0
-;; and set the flags in the same way as for PTEST. The (and ...) in the
-;; UNSPEC_PTEST_PTRUE is logically redundant, but means that the tested
-;; value is structurally equivalent to rhs of the second set.
-(define_insn "*<optab><mode>3_cc"
- [(set (reg:CC_NZC CC_REGNUM)
- (unspec:CC_NZC
- [(match_operand:PRED_ALL 1 "register_operand" "Upa")
- (and:PRED_ALL
- (LOGICAL:PRED_ALL
- (match_operand:PRED_ALL 2 "register_operand" "Upa")
- (match_operand:PRED_ALL 3 "register_operand" "Upa"))
- (match_dup 1))]
- UNSPEC_PTEST_PTRUE))
- (set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
- (and:PRED_ALL (LOGICAL:PRED_ALL (match_dup 2) (match_dup 3))
- (match_dup 1)))]
- "TARGET_SVE"
- "<logical>s\t%0.b, %1/z, %2.b, %3.b"
-)
+;; -------------------------------------------------------------------------
+;; ---- [INT] Shifts
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - ASR
+;; - LSL
+;; - LSR
+;; -------------------------------------------------------------------------
-;; Unpredicated predicate inverse.
-(define_expand "one_cmpl<mode>2"
- [(set (match_operand:PRED_ALL 0 "register_operand")
- (and:PRED_ALL
- (not:PRED_ALL (match_operand:PRED_ALL 1 "register_operand"))
- (match_dup 2)))]
+;; Unpredicated shift by a scalar, which expands into one of the vector
+;; shifts below.
+(define_expand "<ASHIFT:optab><mode>3"
+ [(set (match_operand:SVE_I 0 "register_operand")
+ (ASHIFT:SVE_I (match_operand:SVE_I 1 "register_operand")
+ (match_operand:<VEL> 2 "general_operand")))]
"TARGET_SVE"
{
- operands[2] = aarch64_ptrue_reg (<MODE>mode);
+ rtx amount;
+ if (CONST_INT_P (operands[2]))
+ {
+ amount = gen_const_vec_duplicate (<MODE>mode, operands[2]);
+ if (!aarch64_sve_<lr>shift_operand (operands[2], <MODE>mode))
+ amount = force_reg (<MODE>mode, amount);
+ }
+ else
+ {
+ amount = gen_reg_rtx (<MODE>mode);
+ emit_insn (gen_vec_duplicate<mode> (amount,
+ convert_to_mode (<VEL>mode,
+ operands[2], 0)));
+ }
+ emit_insn (gen_v<optab><mode>3 (operands[0], operands[1], amount));
+ DONE;
}
)
-;; Predicated predicate inverse.
-(define_insn "*one_cmpl<mode>3"
- [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
- (and:PRED_ALL
- (not:PRED_ALL (match_operand:PRED_ALL 2 "register_operand" "Upa"))
- (match_operand:PRED_ALL 1 "register_operand" "Upa")))]
- "TARGET_SVE"
- "not\t%0.b, %1/z, %2.b"
-)
-
-;; Predicated predicate BIC and ORN.
-(define_insn "*<nlogical><mode>3"
- [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
- (and:PRED_ALL
- (NLOGICAL:PRED_ALL
- (not:PRED_ALL (match_operand:PRED_ALL 2 "register_operand" "Upa"))
- (match_operand:PRED_ALL 3 "register_operand" "Upa"))
- (match_operand:PRED_ALL 1 "register_operand" "Upa")))]
- "TARGET_SVE"
- "<nlogical>\t%0.b, %1/z, %3.b, %2.b"
-)
-
-;; Predicated predicate NAND and NOR.
-(define_insn "*<logical_nn><mode>3"
- [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
- (and:PRED_ALL
- (NLOGICAL:PRED_ALL
- (not:PRED_ALL (match_operand:PRED_ALL 2 "register_operand" "Upa"))
- (not:PRED_ALL (match_operand:PRED_ALL 3 "register_operand" "Upa")))
- (match_operand:PRED_ALL 1 "register_operand" "Upa")))]
- "TARGET_SVE"
- "<logical_nn>\t%0.b, %1/z, %2.b, %3.b"
-)
-
-;; Unpredicated LSL, LSR and ASR by a vector.
+;; Unpredicated shift by a vector.
(define_expand "v<optab><mode>3"
[(set (match_operand:SVE_I 0 "register_operand")
(unspec:SVE_I
}
)
-;; LSL, LSR and ASR by a vector, predicated with a PTRUE. We don't
-;; actually need the predicate for the first alternative, but using Upa
-;; or X isn't likely to gain much and would make the instruction seem
-;; less uniform to the register allocator.
+;; Shift by a vector, predicated with a PTRUE. We don't actually need
+;; the predicate for the first alternative, but using Upa or X isn't
+;; likely to gain much and would make the instruction seem less uniform
+;; to the register allocator.
(define_insn_and_split "*v<optab><mode>3"
[(set (match_operand:SVE_I 0 "register_operand" "=w, w, ?&w")
(unspec:SVE_I
"<shift>\t%0.<Vetype>, %1.<Vetype>, #%2"
)
-;; LSL, LSR and ASR by a scalar, which expands into one of the vector
-;; shifts above.
-(define_expand "<ASHIFT:optab><mode>3"
+;; -------------------------------------------------------------------------
+;; ---- [INT] Maximum and minimum
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - SMAX
+;; - SMIN
+;; - UMAX
+;; - UMIN
+;; -------------------------------------------------------------------------
+
+;; Unpredicated integer MAX/MIN.
+(define_expand "<su><maxmin><mode>3"
[(set (match_operand:SVE_I 0 "register_operand")
- (ASHIFT:SVE_I (match_operand:SVE_I 1 "register_operand")
- (match_operand:<VEL> 2 "general_operand")))]
+ (unspec:SVE_I
+ [(match_dup 3)
+ (MAXMIN:SVE_I (match_operand:SVE_I 1 "register_operand")
+ (match_operand:SVE_I 2 "register_operand"))]
+ UNSPEC_MERGE_PTRUE))]
"TARGET_SVE"
{
- rtx amount;
- if (CONST_INT_P (operands[2]))
- {
- amount = gen_const_vec_duplicate (<MODE>mode, operands[2]);
- if (!aarch64_sve_<lr>shift_operand (operands[2], <MODE>mode))
- amount = force_reg (<MODE>mode, amount);
- }
- else
- {
- amount = gen_reg_rtx (<MODE>mode);
- emit_insn (gen_vec_duplicate<mode> (amount,
- convert_to_mode (<VEL>mode,
- operands[2], 0)));
- }
- emit_insn (gen_v<optab><mode>3 (operands[0], operands[1], amount));
- DONE;
+ operands[3] = aarch64_ptrue_reg (<VPRED>mode);
}
)
-;; Test all bits of operand 1. Operand 0 is a GP that is known to hold PTRUE.
-;;
-;; Using UNSPEC_PTEST_PTRUE allows combine patterns to assume that the GP
-;; is a PTRUE even if the optimizers haven't yet been able to propagate
-;; the constant. We would use a separate unspec code for PTESTs involving
-;; GPs that might not be PTRUEs.
-(define_insn "ptest_ptrue<mode>"
- [(set (reg:CC_NZC CC_REGNUM)
- (unspec:CC_NZC
- [(match_operand:PRED_ALL 0 "register_operand" "Upa")
- (match_operand:PRED_ALL 1 "register_operand" "Upa")]
- UNSPEC_PTEST_PTRUE))]
+;; Integer MAX/MIN predicated with a PTRUE.
+(define_insn "*<su><maxmin><mode>3"
+ [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
+ (unspec:SVE_I
+ [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+ (MAXMIN:SVE_I (match_operand:SVE_I 2 "register_operand" "%0, w")
+ (match_operand:SVE_I 3 "register_operand" "w, w"))]
+ UNSPEC_MERGE_PTRUE))]
"TARGET_SVE"
- "ptest\t%0, %1.b"
+ "@
+ <su><maxmin>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+ movprfx\t%0, %2\;<su><maxmin>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
+ [(set_attr "movprfx" "*,yes")]
)
-;; Set element I of the result if operand1 + J < operand2 for all J in [0, I].
-;; with the comparison being unsigned.
-(define_insn "while_ult<GPI:mode><PRED_ALL:mode>"
- [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
- (unspec:PRED_ALL [(match_operand:GPI 1 "aarch64_reg_or_zero" "rZ")
- (match_operand:GPI 2 "aarch64_reg_or_zero" "rZ")]
- UNSPEC_WHILE_LO))
- (clobber (reg:CC_NZC CC_REGNUM))]
- "TARGET_SVE"
- "whilelo\t%0.<PRED_ALL:Vetype>, %<w>1, %<w>2"
-)
+;; Merging forms are handled through SVE_INT_BINARY.
-;; WHILELO sets the flags in the same way as a PTEST with a PTRUE GP.
-;; Handle the case in which both results are useful. The GP operand
-;; to the PTEST isn't needed, so we allow it to be anything.
-(define_insn_and_rewrite "*while_ult<GPI:mode><PRED_ALL:mode>_cc"
- [(set (reg:CC_NZC CC_REGNUM)
- (unspec:CC_NZC
- [(match_operand:PRED_ALL 1)
- (unspec:PRED_ALL
- [(match_operand:GPI 2 "aarch64_reg_or_zero" "rZ")
- (match_operand:GPI 3 "aarch64_reg_or_zero" "rZ")]
- UNSPEC_WHILE_LO)]
- UNSPEC_PTEST_PTRUE))
- (set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
- (unspec:PRED_ALL [(match_dup 2)
- (match_dup 3)]
- UNSPEC_WHILE_LO))]
- "TARGET_SVE"
- "whilelo\t%0.<PRED_ALL:Vetype>, %<w>2, %<w>3"
- ;; Force the compiler to drop the unused predicate operand, so that we
- ;; don't have an unnecessary PTRUE.
- "&& !CONSTANT_P (operands[1])"
- {
- operands[1] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
- }
-)
+;; -------------------------------------------------------------------------
+;; ---- [FP] General binary arithmetic corresponding to rtx codes
+;; -------------------------------------------------------------------------
+;; Includes post-RA forms of:
+;; - FADD
+;; - FMUL
+;; - FSUB
+;; -------------------------------------------------------------------------
-;; Integer comparisons predicated with a PTRUE.
-(define_insn "*cmp<cmp_op><mode>"
- [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
- (unspec:<VPRED>
- [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
- (SVE_INT_CMP:<VPRED>
- (match_operand:SVE_I 2 "register_operand" "w, w")
- (match_operand:SVE_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))]
- UNSPEC_MERGE_PTRUE))
- (clobber (reg:CC_NZC CC_REGNUM))]
+;; Unpredicated floating-point binary operations (post-RA only).
+;; These are generated by splitting a predicated instruction whose
+;; predicate is unused.
+(define_insn "*post_ra_<sve_fp_op><mode>3"
+ [(set (match_operand:SVE_F 0 "register_operand" "=w")
+ (SVE_UNPRED_FP_BINARY:SVE_F
+ (match_operand:SVE_F 1 "register_operand" "w")
+ (match_operand:SVE_F 2 "register_operand" "w")))]
+ "TARGET_SVE && reload_completed"
+ "<sve_fp_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>")
+
+;; -------------------------------------------------------------------------
+;; ---- [FP] General binary arithmetic corresponding to unspecs
+;; -------------------------------------------------------------------------
+;; Includes merging forms of:
+;; - FADD
+;; - FDIV
+;; - FDIVR
+;; - FMAXNM
+;; - FMINNM
+;; - FMUL
+;; - FSUB
+;; - FSUBR
+;; -------------------------------------------------------------------------
+
+;; Predicated floating-point operations with merging.
+(define_expand "cond_<optab><mode>"
+ [(set (match_operand:SVE_F 0 "register_operand")
+ (unspec:SVE_F
+ [(match_operand:<VPRED> 1 "register_operand")
+ (unspec:SVE_F
+ [(match_operand:SVE_F 2 "register_operand")
+ (match_operand:SVE_F 3 "register_operand")]
+ SVE_COND_FP_BINARY)
+ (match_operand:SVE_F 4 "aarch64_simd_reg_or_zero")]
+ UNSPEC_SEL))]
"TARGET_SVE"
- "@
- cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #%3
- cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
)
-;; Integer comparisons predicated with a PTRUE in which only the flags result
-;; is interesting.
-(define_insn "*cmp<cmp_op><mode>_ptest"
- [(set (reg:CC_NZC CC_REGNUM)
- (unspec:CC_NZC
+;; Predicated floating-point operations, merging with the first input.
+(define_insn "*cond_<optab><mode>_2"
+ [(set (match_operand:SVE_F 0 "register_operand" "=w, ?&w")
+ (unspec:SVE_F
[(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
- (unspec:<VPRED>
- [(match_dup 1)
- (SVE_INT_CMP:<VPRED>
- (match_operand:SVE_I 2 "register_operand" "w, w")
- (match_operand:SVE_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))]
- UNSPEC_MERGE_PTRUE)]
- UNSPEC_PTEST_PTRUE))
- (clobber (match_scratch:<VPRED> 0 "=Upa, Upa"))]
+ (unspec:SVE_F
+ [(match_operand:SVE_F 2 "register_operand" "0, w")
+ (match_operand:SVE_F 3 "register_operand" "w, w")]
+ SVE_COND_FP_BINARY)
+ (match_dup 2)]
+ UNSPEC_SEL))]
"TARGET_SVE"
"@
- cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #%3
- cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
+ <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+ movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
+ [(set_attr "movprfx" "*,yes")]
)
-;; Integer comparisons predicated with a PTRUE in which both the flag and
-;; predicate results are interesting.
-(define_insn "*cmp<cmp_op><mode>_cc"
- [(set (reg:CC_NZC CC_REGNUM)
- (unspec:CC_NZC
+;; Predicated floating-point operations, merging with the second input.
+(define_insn "*cond_<optab><mode>_3"
+ [(set (match_operand:SVE_F 0 "register_operand" "=w, ?&w")
+ (unspec:SVE_F
[(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
- (unspec:<VPRED>
- [(match_dup 1)
- (SVE_INT_CMP:<VPRED>
- (match_operand:SVE_I 2 "register_operand" "w, w")
- (match_operand:SVE_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))]
- UNSPEC_MERGE_PTRUE)]
- UNSPEC_PTEST_PTRUE))
- (set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
- (unspec:<VPRED>
- [(match_dup 1)
- (SVE_INT_CMP:<VPRED>
- (match_dup 2)
- (match_dup 3))]
- UNSPEC_MERGE_PTRUE))]
+ (unspec:SVE_F
+ [(match_operand:SVE_F 2 "register_operand" "w, w")
+ (match_operand:SVE_F 3 "register_operand" "0, w")]
+ SVE_COND_FP_BINARY)
+ (match_dup 3)]
+ UNSPEC_SEL))]
"TARGET_SVE"
"@
- cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #%3
- cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
-)
-
-;; Predicated integer comparisons, formed by combining a PTRUE-predicated
-;; comparison with an AND. Split the instruction into its preferred form
-;; (below) at the earliest opportunity, in order to get rid of the
-;; redundant operand 1.
-(define_insn_and_split "*pred_cmp<cmp_op><mode>_combine"
- [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
- (and:<VPRED>
- (unspec:<VPRED>
- [(match_operand:<VPRED> 1)
- (SVE_INT_CMP:<VPRED>
- (match_operand:SVE_I 2 "register_operand" "w, w")
- (match_operand:SVE_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))]
- UNSPEC_MERGE_PTRUE)
- (match_operand:<VPRED> 4 "register_operand" "Upl, Upl")))
- (clobber (reg:CC_NZC CC_REGNUM))]
- "TARGET_SVE"
- "#"
- "&& 1"
- [(parallel
- [(set (match_dup 0)
- (and:<VPRED>
- (SVE_INT_CMP:<VPRED>
- (match_dup 2)
- (match_dup 3))
- (match_dup 4)))
- (clobber (reg:CC_NZC CC_REGNUM))])]
+ <sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
+ movprfx\t%0, %3\;<sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
+ [(set_attr "movprfx" "*,yes")]
)
-;; Predicated integer comparisons.
-(define_insn "*pred_cmp<cmp_op><mode>"
- [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
- (and:<VPRED>
- (SVE_INT_CMP:<VPRED>
- (match_operand:SVE_I 2 "register_operand" "w, w")
- (match_operand:SVE_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))
- (match_operand:<VPRED> 1 "register_operand" "Upl, Upl")))
- (clobber (reg:CC_NZC CC_REGNUM))]
- "TARGET_SVE"
+;; Predicated floating-point operations, merging with an independent value.
+(define_insn_and_rewrite "*cond_<optab><mode>_any"
+ [(set (match_operand:SVE_F 0 "register_operand" "=&w, &w, &w, &w, ?&w")
+ (unspec:SVE_F
+ [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl")
+ (unspec:SVE_F
+ [(match_operand:SVE_F 2 "register_operand" "0, w, w, w, w")
+ (match_operand:SVE_F 3 "register_operand" "w, 0, w, w, w")]
+ SVE_COND_FP_BINARY)
+ (match_operand:SVE_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")]
+ UNSPEC_SEL))]
+ "TARGET_SVE
+ && !rtx_equal_p (operands[2], operands[4])
+ && !rtx_equal_p (operands[3], operands[4])"
"@
- cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #%3
- cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
+ movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+ movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
+ movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+ movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+ #"
+ "&& reload_completed
+ && register_operand (operands[4], <MODE>mode)
+ && !rtx_equal_p (operands[0], operands[4])"
+ {
+ emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
+ operands[4], operands[1]));
+ operands[4] = operands[2] = operands[0];
+ }
+ [(set_attr "movprfx" "yes")]
)
-;; Floating-point comparisons predicated with a PTRUE.
-(define_insn "*fcm<cmp_op><mode>"
- [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
- (unspec:<VPRED>
- [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
- (SVE_FP_CMP:<VPRED>
- (match_operand:SVE_F 2 "register_operand" "w, w")
- (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero" "Dz, w"))]
- UNSPEC_MERGE_PTRUE))]
- "TARGET_SVE"
- "@
- fcm<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #0.0
- fcm<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
-)
+;; -------------------------------------------------------------------------
+;; ---- [FP] Addition
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - FADD
+;; - FSUB
+;; -------------------------------------------------------------------------
-(define_insn "*fcmuo<mode>"
- [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
- (unspec:<VPRED>
- [(match_operand:<VPRED> 1 "register_operand" "Upl")
- (unordered:<VPRED>
- (match_operand:SVE_F 2 "register_operand" "w")
- (match_operand:SVE_F 3 "register_operand" "w"))]
+;; Unpredicated floating-point addition.
+(define_expand "add<mode>3"
+ [(set (match_operand:SVE_F 0 "register_operand")
+ (unspec:SVE_F
+ [(match_dup 3)
+ (plus:SVE_F
+ (match_operand:SVE_F 1 "register_operand")
+ (match_operand:SVE_F 2 "aarch64_sve_float_arith_with_sub_operand"))]
UNSPEC_MERGE_PTRUE))]
"TARGET_SVE"
- "fcmuo\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
+ {
+ operands[3] = aarch64_ptrue_reg (<VPRED>mode);
+ }
)
-;; Floating-point comparisons predicated on a PTRUE, with the results ANDed
-;; with another predicate P. This does not have the same trapping behavior
-;; as predicating the comparison itself on P, but it's a legitimate fold,
-;; since we can drop any potentially-trapping operations whose results
-;; are not needed.
-;;
-;; Split the instruction into its preferred form (below) at the earliest
-;; opportunity, in order to get rid of the redundant operand 1.
-(define_insn_and_split "*fcm<cmp_op><mode>_and_combine"
- [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
- (and:<VPRED>
- (unspec:<VPRED>
- [(match_operand:<VPRED> 1)
- (SVE_FP_CMP
- (match_operand:SVE_F 2 "register_operand" "w, w")
- (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero" "Dz, w"))]
- UNSPEC_MERGE_PTRUE)
- (match_operand:<VPRED> 4 "register_operand" "Upl, Upl")))]
+;; Floating-point addition predicated with a PTRUE.
+(define_insn_and_split "*add<mode>3"
+ [(set (match_operand:SVE_F 0 "register_operand" "=w, w, w")
+ (unspec:SVE_F
+ [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
+ (plus:SVE_F
+ (match_operand:SVE_F 2 "register_operand" "%0, 0, w")
+ (match_operand:SVE_F 3 "aarch64_sve_float_arith_with_sub_operand" "vsA, vsN, w"))]
+ UNSPEC_MERGE_PTRUE))]
"TARGET_SVE"
- "#"
- "&& 1"
- [(set (match_dup 0)
- (and:<VPRED>
- (SVE_FP_CMP:<VPRED>
- (match_dup 2)
- (match_dup 3))
- (match_dup 4)))]
+ "@
+ fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
+ fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
+ #"
+ ; Split the unpredicated form after reload, so that we don't have
+ ; the unnecessary PTRUE.
+ "&& reload_completed
+ && register_operand (operands[3], <MODE>mode)"
+ [(set (match_dup 0) (plus:SVE_F (match_dup 2) (match_dup 3)))]
)
-(define_insn_and_split "*fcmuo<mode>_and_combine"
- [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
- (and:<VPRED>
- (unspec:<VPRED>
- [(match_operand:<VPRED> 1)
- (unordered
- (match_operand:SVE_F 2 "register_operand" "w")
- (match_operand:SVE_F 3 "register_operand" "w"))]
- UNSPEC_MERGE_PTRUE)
- (match_operand:<VPRED> 4 "register_operand" "Upl")))]
- "TARGET_SVE"
- "#"
- "&& 1"
- [(set (match_dup 0)
- (and:<VPRED>
- (unordered:<VPRED>
- (match_dup 2)
- (match_dup 3))
- (match_dup 4)))]
-)
+;; Merging forms are handled through SVE_COND_FP_BINARY.
-;; Unpredicated floating-point comparisons, with the results ANDed
-;; with another predicate. This is a valid fold for the same reasons
-;; as above.
-(define_insn "*fcm<cmp_op><mode>_and"
- [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
- (and:<VPRED>
- (SVE_FP_CMP:<VPRED>
- (match_operand:SVE_F 2 "register_operand" "w, w")
- (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero" "Dz, w"))
- (match_operand:<VPRED> 1 "register_operand" "Upl, Upl")))]
- "TARGET_SVE"
- "@
- fcm<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #0.0
- fcm<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
-)
+;; -------------------------------------------------------------------------
+;; ---- [FP] Subtraction
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - FADD
+;; - FSUB
+;; - FSUBR
+;; -------------------------------------------------------------------------
-(define_insn "*fcmuo<mode>_and"
- [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
- (and:<VPRED>
- (unordered:<VPRED>
- (match_operand:SVE_F 2 "register_operand" "w")
- (match_operand:SVE_F 3 "register_operand" "w"))
- (match_operand:<VPRED> 1 "register_operand" "Upl")))]
+;; Unpredicated floating-point subtraction.
+(define_expand "sub<mode>3"
+ [(set (match_operand:SVE_F 0 "register_operand")
+ (unspec:SVE_F
+ [(match_dup 3)
+ (minus:SVE_F
+ (match_operand:SVE_F 1 "aarch64_sve_float_arith_operand")
+ (match_operand:SVE_F 2 "register_operand"))]
+ UNSPEC_MERGE_PTRUE))]
"TARGET_SVE"
- "fcmuo\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
+ {
+ operands[3] = aarch64_ptrue_reg (<VPRED>mode);
+ }
)
-;; Predicated floating-point comparisons. We don't need a version
-;; of this for unordered comparisons.
-(define_insn "*pred_fcm<cmp_op><mode>"
- [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
- (unspec:<VPRED>
- [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
- (match_operand:SVE_F 2 "register_operand" "w, w")
- (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero" "Dz, w")]
- SVE_COND_FP_CMP))]
- "TARGET_SVE"
+;; Floating-point subtraction predicated with a PTRUE.
+(define_insn_and_split "*sub<mode>3"
+ [(set (match_operand:SVE_F 0 "register_operand" "=w, w, w, w")
+ (unspec:SVE_F
+ [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
+ (minus:SVE_F
+ (match_operand:SVE_F 2 "aarch64_sve_float_arith_operand" "0, 0, vsA, w")
+ (match_operand:SVE_F 3 "aarch64_sve_float_arith_with_sub_operand" "vsA, vsN, 0, w"))]
+ UNSPEC_MERGE_PTRUE))]
+ "TARGET_SVE
+ && (register_operand (operands[2], <MODE>mode)
+ || register_operand (operands[3], <MODE>mode))"
"@
- fcm<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #0.0
- fcm<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
+ fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
+ fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
+ fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2
+ #"
+ ; Split the unpredicated form after reload, so that we don't have
+ ; the unnecessary PTRUE.
+ "&& reload_completed
+ && register_operand (operands[2], <MODE>mode)
+ && register_operand (operands[3], <MODE>mode)"
+ [(set (match_dup 0) (minus:SVE_F (match_dup 2) (match_dup 3)))]
)
-;; vcond_mask operand order: true, false, mask
-;; UNSPEC_SEL operand order: mask, true, false (as for VEC_COND_EXPR)
-;; SEL operand order: mask, true, false
-(define_insn "vcond_mask_<mode><vpred>"
- [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
- (unspec:SVE_ALL
- [(match_operand:<VPRED> 3 "register_operand" "Upa")
- (match_operand:SVE_ALL 1 "register_operand" "w")
- (match_operand:SVE_ALL 2 "register_operand" "w")]
- UNSPEC_SEL))]
- "TARGET_SVE"
- "sel\t%0.<Vetype>, %3, %1.<Vetype>, %2.<Vetype>"
-)
+;; Merging forms are handled through SVE_COND_FP_BINARY.
-;; Selects between a duplicated immediate and zero.
-(define_insn "aarch64_sve_dup<mode>_const"
- [(set (match_operand:SVE_I 0 "register_operand" "=w")
- (unspec:SVE_I
+;; -------------------------------------------------------------------------
+;; ---- [FP] Absolute difference
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - FABD
+;; -------------------------------------------------------------------------
+
+;; Predicated floating-point absolute difference.
+(define_insn "*fabd<mode>3"
+ [(set (match_operand:SVE_F 0 "register_operand" "=w")
+ (unspec:SVE_F
[(match_operand:<VPRED> 1 "register_operand" "Upl")
- (match_operand:SVE_I 2 "aarch64_sve_dup_immediate")
- (match_operand:SVE_I 3 "aarch64_simd_imm_zero")]
- UNSPEC_SEL))]
+ (abs:SVE_F
+ (minus:SVE_F
+ (match_operand:SVE_F 2 "register_operand" "0")
+ (match_operand:SVE_F 3 "register_operand" "w")))]
+ UNSPEC_MERGE_PTRUE))]
"TARGET_SVE"
- "mov\t%0.<Vetype>, %1/z, #%2"
+ "fabd\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
)
-;; Integer (signed) vcond. Don't enforce an immediate range here, since it
-;; depends on the comparison; leave it to aarch64_expand_sve_vcond instead.
-(define_expand "vcond<mode><v_int_equiv>"
- [(set (match_operand:SVE_ALL 0 "register_operand")
- (if_then_else:SVE_ALL
- (match_operator 3 "comparison_operator"
- [(match_operand:<V_INT_EQUIV> 4 "register_operand")
- (match_operand:<V_INT_EQUIV> 5 "nonmemory_operand")])
- (match_operand:SVE_ALL 1 "register_operand")
- (match_operand:SVE_ALL 2 "register_operand")))]
+;; -------------------------------------------------------------------------
+;; ---- [FP] Multiplication
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - FMUL
+;; -------------------------------------------------------------------------
+
+;; Unpredicated floating-point multiplication.
+(define_expand "mul<mode>3"
+ [(set (match_operand:SVE_F 0 "register_operand")
+ (unspec:SVE_F
+ [(match_dup 3)
+ (mult:SVE_F
+ (match_operand:SVE_F 1 "register_operand")
+ (match_operand:SVE_F 2 "aarch64_sve_float_mul_operand"))]
+ UNSPEC_MERGE_PTRUE))]
"TARGET_SVE"
{
- aarch64_expand_sve_vcond (<MODE>mode, <V_INT_EQUIV>mode, operands);
- DONE;
+ operands[3] = aarch64_ptrue_reg (<VPRED>mode);
}
)
-;; Integer vcondu. Don't enforce an immediate range here, since it
-;; depends on the comparison; leave it to aarch64_expand_sve_vcond instead.
-(define_expand "vcondu<mode><v_int_equiv>"
- [(set (match_operand:SVE_ALL 0 "register_operand")
- (if_then_else:SVE_ALL
- (match_operator 3 "comparison_operator"
- [(match_operand:<V_INT_EQUIV> 4 "register_operand")
- (match_operand:<V_INT_EQUIV> 5 "nonmemory_operand")])
- (match_operand:SVE_ALL 1 "register_operand")
- (match_operand:SVE_ALL 2 "register_operand")))]
+;; Floating-point multiplication predicated with a PTRUE.
+(define_insn_and_split "*mul<mode>3"
+ [(set (match_operand:SVE_F 0 "register_operand" "=w, w")
+ (unspec:SVE_F
+ [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+ (mult:SVE_F
+ (match_operand:SVE_F 2 "register_operand" "%0, w")
+ (match_operand:SVE_F 3 "aarch64_sve_float_mul_operand" "vsM, w"))]
+ UNSPEC_MERGE_PTRUE))]
"TARGET_SVE"
- {
- aarch64_expand_sve_vcond (<MODE>mode, <V_INT_EQUIV>mode, operands);
- DONE;
- }
+ "@
+ fmul\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
+ #"
+ ; Split the unpredicated form after reload, so that we don't have
+ ; the unnecessary PTRUE.
+ "&& reload_completed
+ && register_operand (operands[3], <MODE>mode)"
+ [(set (match_dup 0) (mult:SVE_F (match_dup 2) (match_dup 3)))]
)
-;; Floating-point vcond. All comparisons except FCMUO allow a zero
-;; operand; aarch64_expand_sve_vcond handles the case of an FCMUO
-;; with zero.
-(define_expand "vcond<mode><v_fp_equiv>"
- [(set (match_operand:SVE_SD 0 "register_operand")
- (if_then_else:SVE_SD
- (match_operator 3 "comparison_operator"
- [(match_operand:<V_FP_EQUIV> 4 "register_operand")
- (match_operand:<V_FP_EQUIV> 5 "aarch64_simd_reg_or_zero")])
- (match_operand:SVE_SD 1 "register_operand")
- (match_operand:SVE_SD 2 "register_operand")))]
+;; Merging forms are handled through SVE_COND_FP_BINARY.
+
+;; -------------------------------------------------------------------------
+;; ---- [FP] Division
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - FDIV
+;; - FDIVR
+;; -------------------------------------------------------------------------
+
+;; Unpredicated floating-point division.
+(define_expand "div<mode>3"
+ [(set (match_operand:SVE_F 0 "register_operand")
+ (unspec:SVE_F
+ [(match_dup 3)
+ (div:SVE_F (match_operand:SVE_F 1 "register_operand")
+ (match_operand:SVE_F 2 "register_operand"))]
+ UNSPEC_MERGE_PTRUE))]
"TARGET_SVE"
{
- aarch64_expand_sve_vcond (<MODE>mode, <V_FP_EQUIV>mode, operands);
- DONE;
+ operands[3] = aarch64_ptrue_reg (<VPRED>mode);
}
)
-;; Signed integer comparisons. Don't enforce an immediate range here, since
-;; it depends on the comparison; leave it to aarch64_expand_sve_vec_cmp_int
-;; instead.
-(define_expand "vec_cmp<mode><vpred>"
- [(parallel
- [(set (match_operand:<VPRED> 0 "register_operand")
- (match_operator:<VPRED> 1 "comparison_operator"
- [(match_operand:SVE_I 2 "register_operand")
- (match_operand:SVE_I 3 "nonmemory_operand")]))
- (clobber (reg:CC_NZC CC_REGNUM))])]
+;; Floating-point division predicated with a PTRUE.
+(define_insn "*div<mode>3"
+ [(set (match_operand:SVE_F 0 "register_operand" "=w, w, ?&w")
+ (unspec:SVE_F
+ [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
+ (div:SVE_F (match_operand:SVE_F 2 "register_operand" "0, w, w")
+ (match_operand:SVE_F 3 "register_operand" "w, 0, w"))]
+ UNSPEC_MERGE_PTRUE))]
"TARGET_SVE"
- {
- aarch64_expand_sve_vec_cmp_int (operands[0], GET_CODE (operands[1]),
- operands[2], operands[3]);
- DONE;
- }
+ "@
+ fdiv\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+ fdivr\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
+ movprfx\t%0, %2\;fdiv\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
+ [(set_attr "movprfx" "*,*,yes")]
)
-;; Unsigned integer comparisons. Don't enforce an immediate range here, since
-;; it depends on the comparison; leave it to aarch64_expand_sve_vec_cmp_int
-;; instead.
-(define_expand "vec_cmpu<mode><vpred>"
- [(parallel
- [(set (match_operand:<VPRED> 0 "register_operand")
- (match_operator:<VPRED> 1 "comparison_operator"
- [(match_operand:SVE_I 2 "register_operand")
- (match_operand:SVE_I 3 "nonmemory_operand")]))
- (clobber (reg:CC_NZC CC_REGNUM))])]
+;; Merging forms are handled through SVE_COND_FP_BINARY.
+
+;; -------------------------------------------------------------------------
+;; ---- [FP] Binary logical operations
+;; -------------------------------------------------------------------------
+;; Includes
+;; - AND
+;; - EOR
+;; - ORR
+;; -------------------------------------------------------------------------
+
+;; Binary logical operations on floating-point modes. We avoid subregs
+;; by providing this, but we need to use UNSPECs since rtx logical ops
+;; aren't defined for floating-point modes.
+(define_insn "*<optab><mode>3"
+ [(set (match_operand:SVE_F 0 "register_operand" "=w")
+ (unspec:SVE_F [(match_operand:SVE_F 1 "register_operand" "w")
+ (match_operand:SVE_F 2 "register_operand" "w")]
+ LOGICALF))]
"TARGET_SVE"
- {
- aarch64_expand_sve_vec_cmp_int (operands[0], GET_CODE (operands[1]),
- operands[2], operands[3]);
- DONE;
- }
+ "<logicalf_op>\t%0.d, %1.d, %2.d"
)
-;; Floating-point comparisons. All comparisons except FCMUO allow a zero
-;; operand; aarch64_expand_sve_vec_cmp_float handles the case of an FCMUO
-;; with zero.
-(define_expand "vec_cmp<mode><vpred>"
- [(set (match_operand:<VPRED> 0 "register_operand")
- (match_operator:<VPRED> 1 "comparison_operator"
- [(match_operand:SVE_F 2 "register_operand")
- (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero")]))]
+;; -------------------------------------------------------------------------
+;; ---- [FP] Sign copying
+;; -------------------------------------------------------------------------
+;; The patterns in this section are synthetic.
+;; -------------------------------------------------------------------------
+
+(define_expand "copysign<mode>3"
+ [(match_operand:SVE_F 0 "register_operand")
+ (match_operand:SVE_F 1 "register_operand")
+ (match_operand:SVE_F 2 "register_operand")]
"TARGET_SVE"
{
- aarch64_expand_sve_vec_cmp_float (operands[0], GET_CODE (operands[1]),
- operands[2], operands[3], false);
- DONE;
- }
-)
+ rtx sign = gen_reg_rtx (<V_INT_EQUIV>mode);
+ rtx mant = gen_reg_rtx (<V_INT_EQUIV>mode);
+ rtx int_res = gen_reg_rtx (<V_INT_EQUIV>mode);
+ int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
-;; Branch based on predicate equality or inequality.
-(define_expand "cbranch<mode>4"
- [(set (pc)
- (if_then_else
- (match_operator 0 "aarch64_equality_operator"
- [(match_operand:PRED_ALL 1 "register_operand")
- (match_operand:PRED_ALL 2 "aarch64_simd_reg_or_zero")])
- (label_ref (match_operand 3 ""))
- (pc)))]
- ""
- {
- rtx ptrue = aarch64_ptrue_reg (<MODE>mode);
- rtx pred;
- if (operands[2] == CONST0_RTX (<MODE>mode))
- pred = operands[1];
- else
- {
- pred = gen_reg_rtx (<MODE>mode);
- emit_insn (gen_pred_xor<mode>3 (pred, ptrue, operands[1],
- operands[2]));
- }
- emit_insn (gen_ptest_ptrue<mode> (ptrue, pred));
- operands[1] = gen_rtx_REG (CC_NZCmode, CC_REGNUM);
- operands[2] = const0_rtx;
+ rtx arg1 = lowpart_subreg (<V_INT_EQUIV>mode, operands[1], <MODE>mode);
+ rtx arg2 = lowpart_subreg (<V_INT_EQUIV>mode, operands[2], <MODE>mode);
+
+ emit_insn (gen_and<v_int_equiv>3
+ (sign, arg2,
+ aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
+ HOST_WIDE_INT_M1U
+ << bits)));
+ emit_insn (gen_and<v_int_equiv>3
+ (mant, arg1,
+ aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
+ ~(HOST_WIDE_INT_M1U
+ << bits))));
+ emit_insn (gen_ior<v_int_equiv>3 (int_res, sign, mant));
+ emit_move_insn (operands[0], gen_lowpart (<MODE>mode, int_res));
+ DONE;
}
)
-;; Unpredicated integer MIN/MAX.
-(define_expand "<su><maxmin><mode>3"
- [(set (match_operand:SVE_I 0 "register_operand")
- (unspec:SVE_I
- [(match_dup 3)
- (MAXMIN:SVE_I (match_operand:SVE_I 1 "register_operand")
- (match_operand:SVE_I 2 "register_operand"))]
- UNSPEC_MERGE_PTRUE))]
+(define_expand "xorsign<mode>3"
+ [(match_operand:SVE_F 0 "register_operand")
+ (match_operand:SVE_F 1 "register_operand")
+ (match_operand:SVE_F 2 "register_operand")]
"TARGET_SVE"
{
- operands[3] = aarch64_ptrue_reg (<VPRED>mode);
+ rtx sign = gen_reg_rtx (<V_INT_EQUIV>mode);
+ rtx int_res = gen_reg_rtx (<V_INT_EQUIV>mode);
+ int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
+
+ rtx arg1 = lowpart_subreg (<V_INT_EQUIV>mode, operands[1], <MODE>mode);
+ rtx arg2 = lowpart_subreg (<V_INT_EQUIV>mode, operands[2], <MODE>mode);
+
+ emit_insn (gen_and<v_int_equiv>3
+ (sign, arg2,
+ aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
+ HOST_WIDE_INT_M1U
+ << bits)));
+ emit_insn (gen_xor<v_int_equiv>3 (int_res, arg1, sign));
+ emit_move_insn (operands[0], gen_lowpart (<MODE>mode, int_res));
+ DONE;
}
)
-;; Integer MIN/MAX predicated with a PTRUE.
-(define_insn "*<su><maxmin><mode>3"
- [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
- (unspec:SVE_I
- [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
- (MAXMIN:SVE_I (match_operand:SVE_I 2 "register_operand" "%0, w")
- (match_operand:SVE_I 3 "register_operand" "w, w"))]
- UNSPEC_MERGE_PTRUE))]
- "TARGET_SVE"
- "@
- <su><maxmin>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
- movprfx\t%0, %2\;<su><maxmin>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
- [(set_attr "movprfx" "*,yes")]
-)
+;; -------------------------------------------------------------------------
+;; ---- [FP] Maximum and minimum
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - FMAX
+;; - FMAXNM
+;; - FMIN
+;; - FMINNM
+;; -------------------------------------------------------------------------
-;; Unpredicated floating-point MIN/MAX.
+;; Unpredicated floating-point MAX/MIN.
(define_expand "<su><maxmin><mode>3"
[(set (match_operand:SVE_F 0 "register_operand")
(unspec:SVE_F
}
)
-;; Floating-point MIN/MAX predicated with a PTRUE.
+;; Floating-point MAX/MIN predicated with a PTRUE.
(define_insn "*<su><maxmin><mode>3"
[(set (match_operand:SVE_F 0 "register_operand" "=w, ?&w")
(unspec:SVE_F
[(set_attr "movprfx" "*,yes")]
)
-;; Unpredicated fmin/fmax.
+;; Unpredicated fmax/fmin.
(define_expand "<maxmin_uns><mode>3"
[(set (match_operand:SVE_F 0 "register_operand")
(unspec:SVE_F
}
)
-;; fmin/fmax predicated with a PTRUE.
+;; fmax/fmin predicated with a PTRUE.
(define_insn "*<maxmin_uns><mode>3"
[(set (match_operand:SVE_F 0 "register_operand" "=w, ?&w")
(unspec:SVE_F
[(set_attr "movprfx" "*,yes")]
)
-;; Predicated integer operations with select.
-(define_expand "cond_<optab><mode>"
- [(set (match_operand:SVE_I 0 "register_operand")
- (unspec:SVE_I
- [(match_operand:<VPRED> 1 "register_operand")
- (SVE_INT_BINARY:SVE_I
- (match_operand:SVE_I 2 "register_operand")
- (match_operand:SVE_I 3 "register_operand"))
- (match_operand:SVE_I 4 "aarch64_simd_reg_or_zero")]
- UNSPEC_SEL))]
+;; Merging forms are handled through SVE_COND_FP_BINARY.
+
+;; -------------------------------------------------------------------------
+;; ---- [PRED] Binary logical operations
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - AND
+;; - ANDS
+;; - EOR
+;; - EORS
+;; - ORR
+;; - ORRS
+;; -------------------------------------------------------------------------
+
+;; Predicate AND. We can reuse one of the inputs as the GP.
+(define_insn "and<mode>3"
+ [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
+ (and:PRED_ALL (match_operand:PRED_ALL 1 "register_operand" "Upa")
+ (match_operand:PRED_ALL 2 "register_operand" "Upa")))]
"TARGET_SVE"
+ "and\t%0.b, %1/z, %1.b, %2.b"
)
-(define_expand "cond_<optab><mode>"
- [(set (match_operand:SVE_SDI 0 "register_operand")
- (unspec:SVE_SDI
- [(match_operand:<VPRED> 1 "register_operand")
- (SVE_INT_BINARY_SD:SVE_SDI
- (match_operand:SVE_SDI 2 "register_operand")
- (match_operand:SVE_SDI 3 "register_operand"))
- (match_operand:SVE_SDI 4 "aarch64_simd_reg_or_zero")]
- UNSPEC_SEL))]
+;; Unpredicated predicate EOR and ORR.
+(define_expand "<optab><mode>3"
+ [(set (match_operand:PRED_ALL 0 "register_operand")
+ (and:PRED_ALL
+ (LOGICAL_OR:PRED_ALL
+ (match_operand:PRED_ALL 1 "register_operand")
+ (match_operand:PRED_ALL 2 "register_operand"))
+ (match_dup 3)))]
"TARGET_SVE"
+ {
+ operands[3] = aarch64_ptrue_reg (<MODE>mode);
+ }
)
-;; Predicated integer operations with select matching the first operand.
-(define_insn "*cond_<optab><mode>_2"
- [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
- (unspec:SVE_I
- [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
- (SVE_INT_BINARY:SVE_I
- (match_operand:SVE_I 2 "register_operand" "0, w")
- (match_operand:SVE_I 3 "register_operand" "w, w"))
- (match_dup 2)]
- UNSPEC_SEL))]
+;; Predicated predicate AND, EOR and ORR.
+(define_insn "pred_<optab><mode>3"
+ [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
+ (and:PRED_ALL
+ (LOGICAL:PRED_ALL
+ (match_operand:PRED_ALL 2 "register_operand" "Upa")
+ (match_operand:PRED_ALL 3 "register_operand" "Upa"))
+ (match_operand:PRED_ALL 1 "register_operand" "Upa")))]
+ "TARGET_SVE"
+ "<logical>\t%0.b, %1/z, %2.b, %3.b"
+)
+
+;; Perform a logical operation on operands 2 and 3, using operand 1 as
+;; the GP (which is known to be a PTRUE). Store the result in operand 0
+;; and set the flags in the same way as for PTEST. The (and ...) in the
+;; UNSPEC_PTEST_PTRUE is logically redundant, but means that the tested
+;; value is structurally equivalent to rhs of the second set.
+(define_insn "*<optab><mode>3_cc"
+ [(set (reg:CC_NZC CC_REGNUM)
+ (unspec:CC_NZC
+ [(match_operand:PRED_ALL 1 "register_operand" "Upa")
+ (and:PRED_ALL
+ (LOGICAL:PRED_ALL
+ (match_operand:PRED_ALL 2 "register_operand" "Upa")
+ (match_operand:PRED_ALL 3 "register_operand" "Upa"))
+ (match_dup 1))]
+ UNSPEC_PTEST_PTRUE))
+ (set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
+ (and:PRED_ALL (LOGICAL:PRED_ALL (match_dup 2) (match_dup 3))
+ (match_dup 1)))]
+ "TARGET_SVE"
+ "<logical>s\t%0.b, %1/z, %2.b, %3.b"
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [PRED] Binary logical operations (inverted second input)
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - BIC
+;; - ORN
+;; -------------------------------------------------------------------------
+
+;; Predicated predicate BIC and ORN.
+(define_insn "*<nlogical><mode>3"
+ [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
+ (and:PRED_ALL
+ (NLOGICAL:PRED_ALL
+ (not:PRED_ALL (match_operand:PRED_ALL 2 "register_operand" "Upa"))
+ (match_operand:PRED_ALL 3 "register_operand" "Upa"))
+ (match_operand:PRED_ALL 1 "register_operand" "Upa")))]
+ "TARGET_SVE"
+ "<nlogical>\t%0.b, %1/z, %3.b, %2.b"
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [PRED] Binary logical operations (inverted result)
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - NAND
+;; - NOR
+;; -------------------------------------------------------------------------
+
+;; Predicated predicate NAND and NOR.
+(define_insn "*<logical_nn><mode>3"
+ [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
+ (and:PRED_ALL
+ (NLOGICAL:PRED_ALL
+ (not:PRED_ALL (match_operand:PRED_ALL 2 "register_operand" "Upa"))
+ (not:PRED_ALL (match_operand:PRED_ALL 3 "register_operand" "Upa")))
+ (match_operand:PRED_ALL 1 "register_operand" "Upa")))]
+ "TARGET_SVE"
+ "<logical_nn>\t%0.b, %1/z, %2.b, %3.b"
+)
+
+;; =========================================================================
+;; == Ternary arithmetic
+;; =========================================================================
+
+;; -------------------------------------------------------------------------
+;; ---- [INT] MLA and MAD
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - MAD
+;; - MLA
+;; -------------------------------------------------------------------------
+
+;; Predicated integer addition of product.
+(define_insn "*madd<mode>"
+ [(set (match_operand:SVE_I 0 "register_operand" "=w, w, ?&w")
+ (plus:SVE_I
+ (unspec:SVE_I
+ [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
+ (mult:SVE_I (match_operand:SVE_I 2 "register_operand" "%0, w, w")
+ (match_operand:SVE_I 3 "register_operand" "w, w, w"))]
+ UNSPEC_MERGE_PTRUE)
+ (match_operand:SVE_I 4 "register_operand" "w, 0, w")))]
"TARGET_SVE"
"@
- <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
- movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
- [(set_attr "movprfx" "*,yes")]
+ mad\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
+ mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
+ movprfx\t%0, %4\;mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
+ [(set_attr "movprfx" "*,*,yes")]
)
-(define_insn "*cond_<optab><mode>_2"
+;; -------------------------------------------------------------------------
+;; ---- [INT] MLS and MSB
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - MLS
+;; - MSB
+;; -------------------------------------------------------------------------
+
+;; Predicated integer subtraction of product.
+(define_insn "*msub<mode>3"
+ [(set (match_operand:SVE_I 0 "register_operand" "=w, w, ?&w")
+ (minus:SVE_I
+ (match_operand:SVE_I 4 "register_operand" "w, 0, w")
+ (unspec:SVE_I
+ [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
+ (mult:SVE_I (match_operand:SVE_I 2 "register_operand" "%0, w, w")
+ (match_operand:SVE_I 3 "register_operand" "w, w, w"))]
+ UNSPEC_MERGE_PTRUE)))]
+ "TARGET_SVE"
+ "@
+ msb\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
+ mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
+ movprfx\t%0, %4\;mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
+ [(set_attr "movprfx" "*,*,yes")]
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [INT] Dot product
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - SDOT
+;; - UDOT
+;; -------------------------------------------------------------------------
+
+;; Four-element integer dot-product with accumulation.
+(define_insn "<sur>dot_prod<vsi2qi>"
[(set (match_operand:SVE_SDI 0 "register_operand" "=w, ?&w")
- (unspec:SVE_SDI
- [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
- (SVE_INT_BINARY_SD:SVE_SDI
- (match_operand:SVE_SDI 2 "register_operand" "0, w")
- (match_operand:SVE_SDI 3 "register_operand" "w, w"))
- (match_dup 2)]
- UNSPEC_SEL))]
+ (plus:SVE_SDI
+ (unspec:SVE_SDI
+ [(match_operand:<VSI2QI> 1 "register_operand" "w, w")
+ (match_operand:<VSI2QI> 2 "register_operand" "w, w")]
+ DOTPROD)
+ (match_operand:SVE_SDI 3 "register_operand" "0, w")))]
"TARGET_SVE"
"@
- <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
- movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
+ <sur>dot\\t%0.<Vetype>, %1.<Vetype_fourth>, %2.<Vetype_fourth>
+ movprfx\t%0, %3\;<sur>dot\\t%0.<Vetype>, %1.<Vetype_fourth>, %2.<Vetype_fourth>"
[(set_attr "movprfx" "*,yes")]
)
-;; Predicated integer operations with select matching the second operand.
-(define_insn "*cond_<optab><mode>_3"
- [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
- (unspec:SVE_I
+;; -------------------------------------------------------------------------
+;; ---- [INT] Sum of absolute differences
+;; -------------------------------------------------------------------------
+;; The patterns in this section are synthetic.
+;; -------------------------------------------------------------------------
+
+;; Emit a sequence to produce a sum-of-absolute-differences of the inputs in
+;; operands 1 and 2. The sequence also has to perform a widening reduction of
+;; the difference into a vector and accumulate that into operand 3 before
+;; copying that into the result operand 0.
+;; Perform that with a sequence of:
+;; MOV ones.b, #1
+;; [SU]ABD diff.b, p0/m, op1.b, op2.b
+;; MOVPRFX op0, op3 // If necessary
+;; UDOT op0.s, diff.b, ones.b
+(define_expand "<sur>sad<vsi2qi>"
+ [(use (match_operand:SVE_SDI 0 "register_operand"))
+ (unspec:<VSI2QI> [(use (match_operand:<VSI2QI> 1 "register_operand"))
+ (use (match_operand:<VSI2QI> 2 "register_operand"))] ABAL)
+ (use (match_operand:SVE_SDI 3 "register_operand"))]
+ "TARGET_SVE"
+ {
+ rtx ones = force_reg (<VSI2QI>mode, CONST1_RTX (<VSI2QI>mode));
+ rtx diff = gen_reg_rtx (<VSI2QI>mode);
+ emit_insn (gen_<sur>abd<vsi2qi>_3 (diff, operands[1], operands[2]));
+ emit_insn (gen_udot_prod<vsi2qi> (operands[0], diff, ones, operands[3]));
+ DONE;
+ }
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [FP] General ternary arithmetic corresponding to unspecs
+;; -------------------------------------------------------------------------
+;; Includes merging patterns for:
+;; - FMAD
+;; - FMLA
+;; - FMLS
+;; - FMSB
+;; - FNMAD
+;; - FNMLA
+;; - FNMLS
+;; - FNMSB
+;; -------------------------------------------------------------------------
+
+;; Predicated floating-point ternary operations with merging.
+(define_expand "cond_<optab><mode>"
+ [(set (match_operand:SVE_F 0 "register_operand")
+ (unspec:SVE_F
+ [(match_operand:<VPRED> 1 "register_operand")
+ (unspec:SVE_F
+ [(match_operand:SVE_F 2 "register_operand")
+ (match_operand:SVE_F 3 "register_operand")
+ (match_operand:SVE_F 4 "register_operand")]
+ SVE_COND_FP_TERNARY)
+ (match_operand:SVE_F 5 "aarch64_simd_reg_or_zero")]
+ UNSPEC_SEL))]
+ "TARGET_SVE"
+{
+ /* Swap the multiplication operands if the fallback value is the
+ second of the two. */
+ if (rtx_equal_p (operands[3], operands[5]))
+ std::swap (operands[2], operands[3]);
+})
+
+;; Predicated floating-point ternary operations, merging with the
+;; first input.
+(define_insn "*cond_<optab><mode>_2"
+ [(set (match_operand:SVE_F 0 "register_operand" "=w, ?&w")
+ (unspec:SVE_F
[(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
- (SVE_INT_BINARY:SVE_I
- (match_operand:SVE_I 2 "register_operand" "w, w")
- (match_operand:SVE_I 3 "register_operand" "0, w"))
- (match_dup 3)]
+ (unspec:SVE_F
+ [(match_operand:SVE_F 2 "register_operand" "0, w")
+ (match_operand:SVE_F 3 "register_operand" "w, w")
+ (match_operand:SVE_F 4 "register_operand" "w, w")]
+ SVE_COND_FP_TERNARY)
+ (match_dup 2)]
UNSPEC_SEL))]
"TARGET_SVE"
"@
- <sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
- movprfx\t%0, %3\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
+ <sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
+ movprfx\t%0, %2\;<sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>"
[(set_attr "movprfx" "*,yes")]
)
-(define_insn "*cond_<optab><mode>_3"
- [(set (match_operand:SVE_SDI 0 "register_operand" "=w, ?&w")
- (unspec:SVE_SDI
+;; Predicated floating-point ternary operations, merging with the
+;; third input.
+(define_insn "*cond_<optab><mode>_4"
+ [(set (match_operand:SVE_F 0 "register_operand" "=w, ?&w")
+ (unspec:SVE_F
[(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
- (SVE_INT_BINARY_SD:SVE_SDI
- (match_operand:SVE_SDI 2 "register_operand" "w, w")
- (match_operand:SVE_SDI 3 "register_operand" "0, w"))
- (match_dup 3)]
+ (unspec:SVE_F
+ [(match_operand:SVE_F 2 "register_operand" "w, w")
+ (match_operand:SVE_F 3 "register_operand" "w, w")
+ (match_operand:SVE_F 4 "register_operand" "0, w")]
+ SVE_COND_FP_TERNARY)
+ (match_dup 4)]
UNSPEC_SEL))]
"TARGET_SVE"
"@
- <sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
- movprfx\t%0, %3\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
+ <sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
+ movprfx\t%0, %4\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
[(set_attr "movprfx" "*,yes")]
)
-;; Predicated integer binary operations in which the values of inactive
-;; lanes are distinct from the other inputs.
+;; Predicated floating-point ternary operations, merging with an
+;; independent value.
(define_insn_and_rewrite "*cond_<optab><mode>_any"
- [(set (match_operand:SVE_I 0 "register_operand" "=&w, &w, &w, &w, ?&w")
- (unspec:SVE_I
- [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl")
- (SVE_INT_BINARY:SVE_I
- (match_operand:SVE_I 2 "register_operand" "0, w, w, w, w")
- (match_operand:SVE_I 3 "register_operand" "w, 0, w, w, w"))
- (match_operand:SVE_I 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")]
+ [(set (match_operand:SVE_F 0 "register_operand" "=&w, &w, ?&w")
+ (unspec:SVE_F
+ [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
+ (unspec:SVE_F
+ [(match_operand:SVE_F 2 "register_operand" "w, w, w")
+ (match_operand:SVE_F 3 "register_operand" "w, w, w")
+ (match_operand:SVE_F 4 "register_operand" "w, w, w")]
+ SVE_COND_FP_TERNARY)
+ (match_operand:SVE_F 5 "aarch64_simd_reg_or_zero" "Dz, 0, w")]
UNSPEC_SEL))]
"TARGET_SVE
- && !rtx_equal_p (operands[2], operands[4])
- && !rtx_equal_p (operands[3], operands[4])"
+ && !rtx_equal_p (operands[2], operands[5])
+ && !rtx_equal_p (operands[3], operands[5])
+ && !rtx_equal_p (operands[4], operands[5])"
"@
- movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
- movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
- movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
- movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+ movprfx\t%0.<Vetype>, %1/z, %4.<Vetype>\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
+ movprfx\t%0.<Vetype>, %1/m, %4.<Vetype>\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
#"
"&& reload_completed
- && register_operand (operands[4], <MODE>mode)
- && !rtx_equal_p (operands[0], operands[4])"
+ && !CONSTANT_P (operands[5])
+ && !rtx_equal_p (operands[0], operands[5])"
{
- emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
- operands[4], operands[1]));
- operands[4] = operands[2] = operands[0];
+ emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[4],
+ operands[5], operands[1]));
+ operands[5] = operands[4] = operands[0];
}
[(set_attr "movprfx" "yes")]
)
-(define_insn_and_rewrite "*cond_<optab><mode>_any"
- [(set (match_operand:SVE_SDI 0 "register_operand" "=&w, &w, &w, &w, ?&w")
- (unspec:SVE_SDI
- [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl")
- (SVE_INT_BINARY_SD:SVE_SDI
- (match_operand:SVE_SDI 2 "register_operand" "0, w, w, w, w")
- (match_operand:SVE_SDI 3 "register_operand" "w, 0, w, w, w"))
- (match_operand:SVE_SDI 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")]
- UNSPEC_SEL))]
- "TARGET_SVE
- && !rtx_equal_p (operands[2], operands[4])
- && !rtx_equal_p (operands[3], operands[4])"
- "@
- movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
- movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
- movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
- movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
- #"
- "&& reload_completed
- && register_operand (operands[4], <MODE>mode)
- && !rtx_equal_p (operands[0], operands[4])"
+;; -------------------------------------------------------------------------
+;; ---- [FP] FMLA and FMAD
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - FMAD
+;; - FMLA
+;; -------------------------------------------------------------------------
+
+;; Unpredicated fma (%0 = (%1 * %2) + %3).
+(define_expand "fma<mode>4"
+ [(set (match_operand:SVE_F 0 "register_operand")
+ (unspec:SVE_F
+ [(match_dup 4)
+ (fma:SVE_F (match_operand:SVE_F 1 "register_operand")
+ (match_operand:SVE_F 2 "register_operand")
+ (match_operand:SVE_F 3 "register_operand"))]
+ UNSPEC_MERGE_PTRUE))]
+ "TARGET_SVE"
{
- emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
- operands[4], operands[1]));
- operands[4] = operands[2] = operands[0];
+ operands[4] = aarch64_ptrue_reg (<VPRED>mode);
}
- [(set_attr "movprfx" "yes")]
)
-;; Set operand 0 to the last active element in operand 3, or to tied
-;; operand 1 if no elements are active.
-(define_insn "fold_extract_last_<mode>"
- [(set (match_operand:<VEL> 0 "register_operand" "=r, w")
- (unspec:<VEL>
- [(match_operand:<VEL> 1 "register_operand" "0, 0")
- (match_operand:<VPRED> 2 "register_operand" "Upl, Upl")
- (match_operand:SVE_ALL 3 "register_operand" "w, w")]
- UNSPEC_CLASTB))]
+;; fma predicated with a PTRUE.
+(define_insn "*fma<mode>4"
+ [(set (match_operand:SVE_F 0 "register_operand" "=w, w, ?&w")
+ (unspec:SVE_F
+ [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
+ (fma:SVE_F (match_operand:SVE_F 3 "register_operand" "%0, w, w")
+ (match_operand:SVE_F 4 "register_operand" "w, w, w")
+ (match_operand:SVE_F 2 "register_operand" "w, 0, w"))]
+ UNSPEC_MERGE_PTRUE))]
"TARGET_SVE"
"@
- clastb\t%<vwcore>0, %2, %<vwcore>0, %3.<Vetype>
- clastb\t%<vw>0, %2, %<vw>0, %3.<Vetype>"
+ fmad\t%0.<Vetype>, %1/m, %4.<Vetype>, %2.<Vetype>
+ fmla\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
+ movprfx\t%0, %2\;fmla\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>"
+ [(set_attr "movprfx" "*,*,yes")]
)
-;; Unpredicated integer add reduction.
-(define_expand "reduc_plus_scal_<mode>"
- [(set (match_operand:<VEL> 0 "register_operand")
- (unspec:<VEL> [(match_dup 2)
- (match_operand:SVE_I 1 "register_operand")]
- UNSPEC_ADDV))]
+;; -------------------------------------------------------------------------
+;; ---- [FP] FMLS and FMSB
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - FMLS
+;; - FMSB
+;; -------------------------------------------------------------------------
+
+;; Unpredicated fnma (%0 = (-%1 * %2) + %3).
+(define_expand "fnma<mode>4"
+ [(set (match_operand:SVE_F 0 "register_operand")
+ (unspec:SVE_F
+ [(match_dup 4)
+ (fma:SVE_F (neg:SVE_F
+ (match_operand:SVE_F 1 "register_operand"))
+ (match_operand:SVE_F 2 "register_operand")
+ (match_operand:SVE_F 3 "register_operand"))]
+ UNSPEC_MERGE_PTRUE))]
"TARGET_SVE"
{
- operands[2] = aarch64_ptrue_reg (<VPRED>mode);
+ operands[4] = aarch64_ptrue_reg (<VPRED>mode);
}
)
-;; Predicated integer add reduction. The result is always 64-bits.
-(define_insn "*reduc_plus_scal_<mode>"
- [(set (match_operand:<VEL> 0 "register_operand" "=w")
- (unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl")
- (match_operand:SVE_I 2 "register_operand" "w")]
- UNSPEC_ADDV))]
+;; fnma predicated with a PTRUE.
+(define_insn "*fnma<mode>4"
+ [(set (match_operand:SVE_F 0 "register_operand" "=w, w, ?&w")
+ (unspec:SVE_F
+ [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
+ (fma:SVE_F (neg:SVE_F
+ (match_operand:SVE_F 3 "register_operand" "%0, w, w"))
+ (match_operand:SVE_F 4 "register_operand" "w, w, w")
+ (match_operand:SVE_F 2 "register_operand" "w, 0, w"))]
+ UNSPEC_MERGE_PTRUE))]
"TARGET_SVE"
- "uaddv\t%d0, %1, %2.<Vetype>"
+ "@
+ fmsb\t%0.<Vetype>, %1/m, %4.<Vetype>, %2.<Vetype>
+ fmls\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
+ movprfx\t%0, %2\;fmls\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>"
+ [(set_attr "movprfx" "*,*,yes")]
)
-;; Unpredicated floating-point add reduction.
-(define_expand "reduc_plus_scal_<mode>"
- [(set (match_operand:<VEL> 0 "register_operand")
- (unspec:<VEL> [(match_dup 2)
- (match_operand:SVE_F 1 "register_operand")]
- UNSPEC_FADDV))]
+;; -------------------------------------------------------------------------
+;; ---- [FP] FNMLA and FNMAD
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - FNMAD
+;; - FNMLA
+;; -------------------------------------------------------------------------
+
+;; Unpredicated fnms (%0 = (-%1 * %2) - %3).
+(define_expand "fnms<mode>4"
+ [(set (match_operand:SVE_F 0 "register_operand")
+ (unspec:SVE_F
+ [(match_dup 4)
+ (fma:SVE_F (neg:SVE_F
+ (match_operand:SVE_F 1 "register_operand"))
+ (match_operand:SVE_F 2 "register_operand")
+ (neg:SVE_F
+ (match_operand:SVE_F 3 "register_operand")))]
+ UNSPEC_MERGE_PTRUE))]
"TARGET_SVE"
{
- operands[2] = aarch64_ptrue_reg (<VPRED>mode);
+ operands[4] = aarch64_ptrue_reg (<VPRED>mode);
}
)
-;; Predicated floating-point add reduction.
-(define_insn "*reduc_plus_scal_<mode>"
- [(set (match_operand:<VEL> 0 "register_operand" "=w")
- (unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl")
- (match_operand:SVE_F 2 "register_operand" "w")]
- UNSPEC_FADDV))]
+;; fnms predicated with a PTRUE.
+(define_insn "*fnms<mode>4"
+ [(set (match_operand:SVE_F 0 "register_operand" "=w, w, ?&w")
+ (unspec:SVE_F
+ [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
+ (fma:SVE_F (neg:SVE_F
+ (match_operand:SVE_F 3 "register_operand" "%0, w, w"))
+ (match_operand:SVE_F 4 "register_operand" "w, w, w")
+ (neg:SVE_F
+ (match_operand:SVE_F 2 "register_operand" "w, 0, w")))]
+ UNSPEC_MERGE_PTRUE))]
"TARGET_SVE"
- "faddv\t%<Vetype>0, %1, %2.<Vetype>"
+ "@
+ fnmad\t%0.<Vetype>, %1/m, %4.<Vetype>, %2.<Vetype>
+ fnmla\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
+ movprfx\t%0, %2\;fnmla\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>"
+ [(set_attr "movprfx" "*,*,yes")]
)
-;; Unpredicated integer MIN/MAX reduction.
-(define_expand "reduc_<maxmin_uns>_scal_<mode>"
- [(set (match_operand:<VEL> 0 "register_operand")
- (unspec:<VEL> [(match_dup 2)
- (match_operand:SVE_I 1 "register_operand")]
- MAXMINV))]
+;; -------------------------------------------------------------------------
+;; ---- [FP] FNMLS and FNMSB
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - FNMLS
+;; - FNMSB
+;; -------------------------------------------------------------------------
+
+;; Unpredicated fms (%0 = (%1 * %2) - %3).
+(define_expand "fms<mode>4"
+ [(set (match_operand:SVE_F 0 "register_operand")
+ (unspec:SVE_F
+ [(match_dup 4)
+ (fma:SVE_F (match_operand:SVE_F 1 "register_operand")
+ (match_operand:SVE_F 2 "register_operand")
+ (neg:SVE_F
+ (match_operand:SVE_F 3 "register_operand")))]
+ UNSPEC_MERGE_PTRUE))]
"TARGET_SVE"
{
- operands[2] = aarch64_ptrue_reg (<VPRED>mode);
+ operands[4] = aarch64_ptrue_reg (<VPRED>mode);
}
)
-;; Predicated integer MIN/MAX reduction.
-(define_insn "*reduc_<maxmin_uns>_scal_<mode>"
- [(set (match_operand:<VEL> 0 "register_operand" "=w")
- (unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl")
- (match_operand:SVE_I 2 "register_operand" "w")]
- MAXMINV))]
+;; fms predicated with a PTRUE.
+(define_insn "*fms<mode>4"
+ [(set (match_operand:SVE_F 0 "register_operand" "=w, w, ?&w")
+ (unspec:SVE_F
+ [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
+ (fma:SVE_F (match_operand:SVE_F 3 "register_operand" "%0, w, w")
+ (match_operand:SVE_F 4 "register_operand" "w, w, w")
+ (neg:SVE_F
+ (match_operand:SVE_F 2 "register_operand" "w, 0, w")))]
+ UNSPEC_MERGE_PTRUE))]
"TARGET_SVE"
- "<maxmin_uns_op>v\t%<Vetype>0, %1, %2.<Vetype>"
+ "@
+ fnmsb\t%0.<Vetype>, %1/m, %4.<Vetype>, %2.<Vetype>
+ fnmls\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
+ movprfx\t%0, %2\;fnmls\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>"
+ [(set_attr "movprfx" "*,*,yes")]
)
-;; Unpredicated floating-point MIN/MAX reduction.
-(define_expand "reduc_<maxmin_uns>_scal_<mode>"
- [(set (match_operand:<VEL> 0 "register_operand")
- (unspec:<VEL> [(match_dup 2)
- (match_operand:SVE_F 1 "register_operand")]
- FMAXMINV))]
+;; =========================================================================
+;; == Comparisons and selects
+;; =========================================================================
+
+;; -------------------------------------------------------------------------
+;; ---- [INT,FP] Select based on predicates
+;; -------------------------------------------------------------------------
+;; Includes merging patterns for:
+;; - MOV
+;; - SEL
+;; -------------------------------------------------------------------------
+
+;; vcond_mask operand order: true, false, mask
+;; UNSPEC_SEL operand order: mask, true, false (as for VEC_COND_EXPR)
+;; SEL operand order: mask, true, false
+(define_insn "vcond_mask_<mode><vpred>"
+ [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
+ (unspec:SVE_ALL
+ [(match_operand:<VPRED> 3 "register_operand" "Upa")
+ (match_operand:SVE_ALL 1 "register_operand" "w")
+ (match_operand:SVE_ALL 2 "register_operand" "w")]
+ UNSPEC_SEL))]
+ "TARGET_SVE"
+ "sel\t%0.<Vetype>, %3, %1.<Vetype>, %2.<Vetype>"
+)
+
+;; Selects between a duplicated immediate and zero.
+(define_insn "aarch64_sve_dup<mode>_const"
+ [(set (match_operand:SVE_I 0 "register_operand" "=w")
+ (unspec:SVE_I
+ [(match_operand:<VPRED> 1 "register_operand" "Upl")
+ (match_operand:SVE_I 2 "aarch64_sve_dup_immediate")
+ (match_operand:SVE_I 3 "aarch64_simd_imm_zero")]
+ UNSPEC_SEL))]
"TARGET_SVE"
- {
- operands[2] = aarch64_ptrue_reg (<VPRED>mode);
- }
+ "mov\t%0.<Vetype>, %1/z, #%2"
)
-;; Predicated floating-point MIN/MAX reduction.
-(define_insn "*reduc_<maxmin_uns>_scal_<mode>"
- [(set (match_operand:<VEL> 0 "register_operand" "=w")
- (unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl")
- (match_operand:SVE_F 2 "register_operand" "w")]
- FMAXMINV))]
- "TARGET_SVE"
- "<maxmin_uns_op>v\t%<Vetype>0, %1, %2.<Vetype>"
-)
+;; -------------------------------------------------------------------------
+;; ---- [INT,FP] Compare and select
+;; -------------------------------------------------------------------------
+;; The patterns in this section are synthetic.
+;; -------------------------------------------------------------------------
-(define_expand "reduc_<optab>_scal_<mode>"
- [(set (match_operand:<VEL> 0 "register_operand")
- (unspec:<VEL> [(match_dup 2)
- (match_operand:SVE_I 1 "register_operand")]
- BITWISEV))]
+;; Integer (signed) vcond. Don't enforce an immediate range here, since it
+;; depends on the comparison; leave it to aarch64_expand_sve_vcond instead.
+(define_expand "vcond<mode><v_int_equiv>"
+ [(set (match_operand:SVE_ALL 0 "register_operand")
+ (if_then_else:SVE_ALL
+ (match_operator 3 "comparison_operator"
+ [(match_operand:<V_INT_EQUIV> 4 "register_operand")
+ (match_operand:<V_INT_EQUIV> 5 "nonmemory_operand")])
+ (match_operand:SVE_ALL 1 "register_operand")
+ (match_operand:SVE_ALL 2 "register_operand")))]
"TARGET_SVE"
{
- operands[2] = aarch64_ptrue_reg (<VPRED>mode);
+ aarch64_expand_sve_vcond (<MODE>mode, <V_INT_EQUIV>mode, operands);
+ DONE;
}
)
-(define_insn "*reduc_<optab>_scal_<mode>"
- [(set (match_operand:<VEL> 0 "register_operand" "=w")
- (unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl")
- (match_operand:SVE_I 2 "register_operand" "w")]
- BITWISEV))]
+;; Integer vcondu. Don't enforce an immediate range here, since it
+;; depends on the comparison; leave it to aarch64_expand_sve_vcond instead.
+(define_expand "vcondu<mode><v_int_equiv>"
+ [(set (match_operand:SVE_ALL 0 "register_operand")
+ (if_then_else:SVE_ALL
+ (match_operator 3 "comparison_operator"
+ [(match_operand:<V_INT_EQUIV> 4 "register_operand")
+ (match_operand:<V_INT_EQUIV> 5 "nonmemory_operand")])
+ (match_operand:SVE_ALL 1 "register_operand")
+ (match_operand:SVE_ALL 2 "register_operand")))]
"TARGET_SVE"
- "<bit_reduc_op>\t%<Vetype>0, %1, %2.<Vetype>"
+ {
+ aarch64_expand_sve_vcond (<MODE>mode, <V_INT_EQUIV>mode, operands);
+ DONE;
+ }
)
-;; Unpredicated in-order FP reductions.
-(define_expand "fold_left_plus_<mode>"
- [(set (match_operand:<VEL> 0 "register_operand")
- (unspec:<VEL> [(match_dup 3)
- (match_operand:<VEL> 1 "register_operand")
- (match_operand:SVE_F 2 "register_operand")]
- UNSPEC_FADDA))]
+;; Floating-point vcond. All comparisons except FCMUO allow a zero operand;
+;; aarch64_expand_sve_vcond handles the case of an FCMUO with zero.
+(define_expand "vcond<mode><v_fp_equiv>"
+ [(set (match_operand:SVE_SD 0 "register_operand")
+ (if_then_else:SVE_SD
+ (match_operator 3 "comparison_operator"
+ [(match_operand:<V_FP_EQUIV> 4 "register_operand")
+ (match_operand:<V_FP_EQUIV> 5 "aarch64_simd_reg_or_zero")])
+ (match_operand:SVE_SD 1 "register_operand")
+ (match_operand:SVE_SD 2 "register_operand")))]
"TARGET_SVE"
{
- operands[3] = aarch64_ptrue_reg (<VPRED>mode);
+ aarch64_expand_sve_vcond (<MODE>mode, <V_FP_EQUIV>mode, operands);
+ DONE;
}
)
-;; In-order FP reductions predicated with PTRUE.
-(define_insn "mask_fold_left_plus_<mode>"
- [(set (match_operand:<VEL> 0 "register_operand" "=w")
- (unspec:<VEL> [(match_operand:<VPRED> 3 "register_operand" "Upl")
- (match_operand:<VEL> 1 "register_operand" "0")
- (match_operand:SVE_F 2 "register_operand" "w")]
- UNSPEC_FADDA))]
- "TARGET_SVE"
- "fadda\t%<Vetype>0, %3, %<Vetype>0, %2.<Vetype>"
-)
+;; -------------------------------------------------------------------------
+;; ---- [INT] Comparisons
+;; -------------------------------------------------------------------------
+;; Includes merging patterns for:
+;; - CMPEQ
+;; - CMPGE
+;; - CMPGT
+;; - CMPHI
+;; - CMPHS
+;; - CMPLE
+;; - CMPLO
+;; - CMPLS
+;; - CMPLT
+;; - CMPNE
+;; -------------------------------------------------------------------------
-;; Predicated form of the above in-order reduction.
-(define_insn "*pred_fold_left_plus_<mode>"
- [(set (match_operand:<VEL> 0 "register_operand" "=w")
- (unspec:<VEL>
- [(match_operand:<VEL> 1 "register_operand" "0")
- (unspec:SVE_F
- [(match_operand:<VPRED> 2 "register_operand" "Upl")
- (match_operand:SVE_F 3 "register_operand" "w")
- (match_operand:SVE_F 4 "aarch64_simd_imm_zero")]
- UNSPEC_SEL)]
- UNSPEC_FADDA))]
+;; Signed integer comparisons. Don't enforce an immediate range here, since
+;; it depends on the comparison; leave it to aarch64_expand_sve_vec_cmp_int
+;; instead.
+(define_expand "vec_cmp<mode><vpred>"
+ [(parallel
+ [(set (match_operand:<VPRED> 0 "register_operand")
+ (match_operator:<VPRED> 1 "comparison_operator"
+ [(match_operand:SVE_I 2 "register_operand")
+ (match_operand:SVE_I 3 "nonmemory_operand")]))
+ (clobber (reg:CC_NZC CC_REGNUM))])]
"TARGET_SVE"
- "fadda\t%<Vetype>0, %2, %<Vetype>0, %3.<Vetype>"
+ {
+ aarch64_expand_sve_vec_cmp_int (operands[0], GET_CODE (operands[1]),
+ operands[2], operands[3]);
+ DONE;
+ }
)
-;; Unpredicated floating-point addition.
-(define_expand "add<mode>3"
- [(set (match_operand:SVE_F 0 "register_operand")
- (unspec:SVE_F
- [(match_dup 3)
- (plus:SVE_F
- (match_operand:SVE_F 1 "register_operand")
- (match_operand:SVE_F 2 "aarch64_sve_float_arith_with_sub_operand"))]
- UNSPEC_MERGE_PTRUE))]
+;; Unsigned integer comparisons. Don't enforce an immediate range here, since
+;; it depends on the comparison; leave it to aarch64_expand_sve_vec_cmp_int
+;; instead.
+(define_expand "vec_cmpu<mode><vpred>"
+ [(parallel
+ [(set (match_operand:<VPRED> 0 "register_operand")
+ (match_operator:<VPRED> 1 "comparison_operator"
+ [(match_operand:SVE_I 2 "register_operand")
+ (match_operand:SVE_I 3 "nonmemory_operand")]))
+ (clobber (reg:CC_NZC CC_REGNUM))])]
"TARGET_SVE"
{
- operands[3] = aarch64_ptrue_reg (<VPRED>mode);
+ aarch64_expand_sve_vec_cmp_int (operands[0], GET_CODE (operands[1]),
+ operands[2], operands[3]);
+ DONE;
}
)
-;; Floating-point addition predicated with a PTRUE.
-(define_insn_and_split "*add<mode>3"
- [(set (match_operand:SVE_F 0 "register_operand" "=w, w, w")
- (unspec:SVE_F
- [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
- (plus:SVE_F
- (match_operand:SVE_F 2 "register_operand" "%0, 0, w")
- (match_operand:SVE_F 3 "aarch64_sve_float_arith_with_sub_operand" "vsA, vsN, w"))]
- UNSPEC_MERGE_PTRUE))]
+;; Integer comparisons predicated with a PTRUE.
+(define_insn "*cmp<cmp_op><mode>"
+ [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
+ (unspec:<VPRED>
+ [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+ (SVE_INT_CMP:<VPRED>
+ (match_operand:SVE_I 2 "register_operand" "w, w")
+ (match_operand:SVE_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))]
+ UNSPEC_MERGE_PTRUE))
+ (clobber (reg:CC_NZC CC_REGNUM))]
"TARGET_SVE"
"@
- fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
- fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
- #"
- ; Split the unpredicated form after reload, so that we don't have
- ; the unnecessary PTRUE.
- "&& reload_completed
- && register_operand (operands[3], <MODE>mode)"
- [(set (match_dup 0) (plus:SVE_F (match_dup 2) (match_dup 3)))]
+ cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #%3
+ cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
)
-;; Unpredicated floating-point subtraction.
-(define_expand "sub<mode>3"
- [(set (match_operand:SVE_F 0 "register_operand")
- (unspec:SVE_F
- [(match_dup 3)
- (minus:SVE_F
- (match_operand:SVE_F 1 "aarch64_sve_float_arith_operand")
- (match_operand:SVE_F 2 "register_operand"))]
+;; Integer comparisons predicated with a PTRUE in which both the flag and
+;; predicate results are interesting.
+(define_insn "*cmp<cmp_op><mode>_cc"
+ [(set (reg:CC_NZC CC_REGNUM)
+ (unspec:CC_NZC
+ [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+ (unspec:<VPRED>
+ [(match_dup 1)
+ (SVE_INT_CMP:<VPRED>
+ (match_operand:SVE_I 2 "register_operand" "w, w")
+ (match_operand:SVE_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))]
+ UNSPEC_MERGE_PTRUE)]
+ UNSPEC_PTEST_PTRUE))
+ (set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
+ (unspec:<VPRED>
+ [(match_dup 1)
+ (SVE_INT_CMP:<VPRED>
+ (match_dup 2)
+ (match_dup 3))]
UNSPEC_MERGE_PTRUE))]
"TARGET_SVE"
- {
- operands[3] = aarch64_ptrue_reg (<VPRED>mode);
- }
+ "@
+ cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #%3
+ cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
)
-;; Floating-point subtraction predicated with a PTRUE.
-(define_insn_and_split "*sub<mode>3"
- [(set (match_operand:SVE_F 0 "register_operand" "=w, w, w, w")
- (unspec:SVE_F
- [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
- (minus:SVE_F
- (match_operand:SVE_F 2 "aarch64_sve_float_arith_operand" "0, 0, vsA, w")
- (match_operand:SVE_F 3 "aarch64_sve_float_arith_with_sub_operand" "vsA, vsN, 0, w"))]
- UNSPEC_MERGE_PTRUE))]
- "TARGET_SVE
- && (register_operand (operands[2], <MODE>mode)
- || register_operand (operands[3], <MODE>mode))"
+;; Integer comparisons predicated with a PTRUE in which only the flags result
+;; is interesting.
+(define_insn "*cmp<cmp_op><mode>_ptest"
+ [(set (reg:CC_NZC CC_REGNUM)
+ (unspec:CC_NZC
+ [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+ (unspec:<VPRED>
+ [(match_dup 1)
+ (SVE_INT_CMP:<VPRED>
+ (match_operand:SVE_I 2 "register_operand" "w, w")
+ (match_operand:SVE_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))]
+ UNSPEC_MERGE_PTRUE)]
+ UNSPEC_PTEST_PTRUE))
+ (clobber (match_scratch:<VPRED> 0 "=Upa, Upa"))]
+ "TARGET_SVE"
"@
- fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
- fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
- fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2
- #"
- ; Split the unpredicated form after reload, so that we don't have
- ; the unnecessary PTRUE.
- "&& reload_completed
- && register_operand (operands[2], <MODE>mode)
- && register_operand (operands[3], <MODE>mode)"
- [(set (match_dup 0) (minus:SVE_F (match_dup 2) (match_dup 3)))]
+ cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #%3
+ cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
)
-;; Unpredicated floating-point multiplication.
-(define_expand "mul<mode>3"
- [(set (match_operand:SVE_F 0 "register_operand")
- (unspec:SVE_F
- [(match_dup 3)
- (mult:SVE_F
- (match_operand:SVE_F 1 "register_operand")
- (match_operand:SVE_F 2 "aarch64_sve_float_mul_operand"))]
- UNSPEC_MERGE_PTRUE))]
- "TARGET_SVE"
- {
- operands[3] = aarch64_ptrue_reg (<VPRED>mode);
- }
+;; Predicated integer comparisons, formed by combining a PTRUE-predicated
+;; comparison with an AND. Split the instruction into its preferred form
+;; (below) at the earliest opportunity, in order to get rid of the
+;; redundant operand 1.
+(define_insn_and_split "*pred_cmp<cmp_op><mode>_combine"
+ [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
+ (and:<VPRED>
+ (unspec:<VPRED>
+ [(match_operand:<VPRED> 1)
+ (SVE_INT_CMP:<VPRED>
+ (match_operand:SVE_I 2 "register_operand" "w, w")
+ (match_operand:SVE_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))]
+ UNSPEC_MERGE_PTRUE)
+ (match_operand:<VPRED> 4 "register_operand" "Upl, Upl")))
+ (clobber (reg:CC_NZC CC_REGNUM))]
+ "TARGET_SVE"
+ "#"
+ "&& 1"
+ [(parallel
+ [(set (match_dup 0)
+ (and:<VPRED>
+ (SVE_INT_CMP:<VPRED>
+ (match_dup 2)
+ (match_dup 3))
+ (match_dup 4)))
+ (clobber (reg:CC_NZC CC_REGNUM))])]
)
-;; Floating-point multiplication predicated with a PTRUE.
-(define_insn_and_split "*mul<mode>3"
- [(set (match_operand:SVE_F 0 "register_operand" "=w, w")
- (unspec:SVE_F
- [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
- (mult:SVE_F
- (match_operand:SVE_F 2 "register_operand" "%0, w")
- (match_operand:SVE_F 3 "aarch64_sve_float_mul_operand" "vsM, w"))]
- UNSPEC_MERGE_PTRUE))]
+;; Predicated integer comparisons.
+(define_insn "*pred_cmp<cmp_op><mode>"
+ [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
+ (and:<VPRED>
+ (SVE_INT_CMP:<VPRED>
+ (match_operand:SVE_I 2 "register_operand" "w, w")
+ (match_operand:SVE_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))
+ (match_operand:<VPRED> 1 "register_operand" "Upl, Upl")))
+ (clobber (reg:CC_NZC CC_REGNUM))]
"TARGET_SVE"
"@
- fmul\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
- #"
- ; Split the unpredicated form after reload, so that we don't have
- ; the unnecessary PTRUE.
- "&& reload_completed
- && register_operand (operands[3], <MODE>mode)"
- [(set (match_dup 0) (mult:SVE_F (match_dup 2) (match_dup 3)))]
+ cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #%3
+ cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
)
-;; Unpredicated floating-point binary operations (post-RA only).
-;; These are generated by splitting a predicated instruction whose
-;; predicate is unused.
-(define_insn "*post_ra_<sve_fp_op><mode>3"
- [(set (match_operand:SVE_F 0 "register_operand" "=w")
- (SVE_UNPRED_FP_BINARY:SVE_F
- (match_operand:SVE_F 1 "register_operand" "w")
- (match_operand:SVE_F 2 "register_operand" "w")))]
- "TARGET_SVE && reload_completed"
- "<sve_fp_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>")
+;; -------------------------------------------------------------------------
+;; ---- [INT] While tests
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - WHILELO
+;; -------------------------------------------------------------------------
-;; Unpredicated fma (%0 = (%1 * %2) + %3).
-(define_expand "fma<mode>4"
- [(set (match_operand:SVE_F 0 "register_operand")
- (unspec:SVE_F
- [(match_dup 4)
- (fma:SVE_F (match_operand:SVE_F 1 "register_operand")
- (match_operand:SVE_F 2 "register_operand")
- (match_operand:SVE_F 3 "register_operand"))]
- UNSPEC_MERGE_PTRUE))]
+;; Set element I of the result if operand1 + J < operand2 for all J in [0, I],
+;; with the comparison being unsigned.
+(define_insn "while_ult<GPI:mode><PRED_ALL:mode>"
+ [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
+ (unspec:PRED_ALL [(match_operand:GPI 1 "aarch64_reg_or_zero" "rZ")
+ (match_operand:GPI 2 "aarch64_reg_or_zero" "rZ")]
+ UNSPEC_WHILE_LO))
+ (clobber (reg:CC_NZC CC_REGNUM))]
"TARGET_SVE"
- {
- operands[4] = aarch64_ptrue_reg (<VPRED>mode);
- }
+ "whilelo\t%0.<PRED_ALL:Vetype>, %<w>1, %<w>2"
)
-;; fma predicated with a PTRUE.
-(define_insn "*fma<mode>4"
- [(set (match_operand:SVE_F 0 "register_operand" "=w, w, ?&w")
- (unspec:SVE_F
- [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
- (fma:SVE_F (match_operand:SVE_F 3 "register_operand" "%0, w, w")
- (match_operand:SVE_F 4 "register_operand" "w, w, w")
- (match_operand:SVE_F 2 "register_operand" "w, 0, w"))]
- UNSPEC_MERGE_PTRUE))]
+;; WHILELO sets the flags in the same way as a PTEST with a PTRUE GP.
+;; Handle the case in which both results are useful. The GP operand
+;; to the PTEST isn't needed, so we allow it to be anything.
+(define_insn_and_rewrite "*while_ult<GPI:mode><PRED_ALL:mode>_cc"
+ [(set (reg:CC_NZC CC_REGNUM)
+ (unspec:CC_NZC
+ [(match_operand:PRED_ALL 1)
+ (unspec:PRED_ALL
+ [(match_operand:GPI 2 "aarch64_reg_or_zero" "rZ")
+ (match_operand:GPI 3 "aarch64_reg_or_zero" "rZ")]
+ UNSPEC_WHILE_LO)]
+ UNSPEC_PTEST_PTRUE))
+ (set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
+ (unspec:PRED_ALL [(match_dup 2)
+ (match_dup 3)]
+ UNSPEC_WHILE_LO))]
"TARGET_SVE"
- "@
- fmad\t%0.<Vetype>, %1/m, %4.<Vetype>, %2.<Vetype>
- fmla\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
- movprfx\t%0, %2\;fmla\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>"
- [(set_attr "movprfx" "*,*,yes")]
+ "whilelo\t%0.<PRED_ALL:Vetype>, %<w>2, %<w>3"
+ ;; Force the compiler to drop the unused predicate operand, so that we
+ ;; don't have an unnecessary PTRUE.
+ "&& !CONSTANT_P (operands[1])"
+ {
+ operands[1] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
+ }
)
-;; Unpredicated fnma (%0 = (-%1 * %2) + %3).
-(define_expand "fnma<mode>4"
- [(set (match_operand:SVE_F 0 "register_operand")
- (unspec:SVE_F
- [(match_dup 4)
- (fma:SVE_F (neg:SVE_F
- (match_operand:SVE_F 1 "register_operand"))
- (match_operand:SVE_F 2 "register_operand")
- (match_operand:SVE_F 3 "register_operand"))]
- UNSPEC_MERGE_PTRUE))]
+;; -------------------------------------------------------------------------
+;; ---- [FP] Comparisons
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - FCMEQ
+;; - FCMGE
+;; - FCMGT
+;; - FCMLE
+;; - FCMLT
+;; - FCMNE
+;; - FCMUO
+;; -------------------------------------------------------------------------
+
+;; Floating-point comparisons. All comparisons except FCMUO allow a zero
+;; operand; aarch64_expand_sve_vec_cmp_float handles the case of an FCMUO
+;; with zero.
+(define_expand "vec_cmp<mode><vpred>"
+ [(set (match_operand:<VPRED> 0 "register_operand")
+ (match_operator:<VPRED> 1 "comparison_operator"
+ [(match_operand:SVE_F 2 "register_operand")
+ (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero")]))]
"TARGET_SVE"
{
- operands[4] = aarch64_ptrue_reg (<VPRED>mode);
+ aarch64_expand_sve_vec_cmp_float (operands[0], GET_CODE (operands[1]),
+ operands[2], operands[3], false);
+ DONE;
}
)
-;; fnma predicated with a PTRUE.
-(define_insn "*fnma<mode>4"
- [(set (match_operand:SVE_F 0 "register_operand" "=w, w, ?&w")
- (unspec:SVE_F
- [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
- (fma:SVE_F (neg:SVE_F
- (match_operand:SVE_F 3 "register_operand" "%0, w, w"))
- (match_operand:SVE_F 4 "register_operand" "w, w, w")
- (match_operand:SVE_F 2 "register_operand" "w, 0, w"))]
+;; Floating-point comparisons predicated with a PTRUE.
+(define_insn "*fcm<cmp_op><mode>"
+ [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
+ (unspec:<VPRED>
+ [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+ (SVE_FP_CMP:<VPRED>
+ (match_operand:SVE_F 2 "register_operand" "w, w")
+ (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero" "Dz, w"))]
UNSPEC_MERGE_PTRUE))]
"TARGET_SVE"
"@
- fmsb\t%0.<Vetype>, %1/m, %4.<Vetype>, %2.<Vetype>
- fmls\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
- movprfx\t%0, %2\;fmls\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>"
- [(set_attr "movprfx" "*,*,yes")]
+ fcm<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #0.0
+ fcm<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
)
-;; Unpredicated fms (%0 = (%1 * %2) - %3).
-(define_expand "fms<mode>4"
- [(set (match_operand:SVE_F 0 "register_operand")
- (unspec:SVE_F
- [(match_dup 4)
- (fma:SVE_F (match_operand:SVE_F 1 "register_operand")
- (match_operand:SVE_F 2 "register_operand")
- (neg:SVE_F
- (match_operand:SVE_F 3 "register_operand")))]
+;; Same for unordered comparisons.
+(define_insn "*fcmuo<mode>"
+ [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
+ (unspec:<VPRED>
+ [(match_operand:<VPRED> 1 "register_operand" "Upl")
+ (unordered:<VPRED>
+ (match_operand:SVE_F 2 "register_operand" "w")
+ (match_operand:SVE_F 3 "register_operand" "w"))]
UNSPEC_MERGE_PTRUE))]
"TARGET_SVE"
- {
- operands[4] = aarch64_ptrue_reg (<VPRED>mode);
- }
+ "fcmuo\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
)
-;; fms predicated with a PTRUE.
-(define_insn "*fms<mode>4"
- [(set (match_operand:SVE_F 0 "register_operand" "=w, w, ?&w")
- (unspec:SVE_F
- [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
- (fma:SVE_F (match_operand:SVE_F 3 "register_operand" "%0, w, w")
- (match_operand:SVE_F 4 "register_operand" "w, w, w")
- (neg:SVE_F
- (match_operand:SVE_F 2 "register_operand" "w, 0, w")))]
- UNSPEC_MERGE_PTRUE))]
+;; Floating-point comparisons predicated on a PTRUE, with the results ANDed
+;; with another predicate P. This does not have the same trapping behavior
+;; as predicating the comparison itself on P, but it's a legitimate fold,
+;; since we can drop any potentially-trapping operations whose results
+;; are not needed.
+;;
+;; Split the instruction into its preferred form (below) at the earliest
+;; opportunity, in order to get rid of the redundant operand 1.
+(define_insn_and_split "*fcm<cmp_op><mode>_and_combine"
+ [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
+ (and:<VPRED>
+ (unspec:<VPRED>
+ [(match_operand:<VPRED> 1)
+ (SVE_FP_CMP
+ (match_operand:SVE_F 2 "register_operand" "w, w")
+ (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero" "Dz, w"))]
+ UNSPEC_MERGE_PTRUE)
+ (match_operand:<VPRED> 4 "register_operand" "Upl, Upl")))]
"TARGET_SVE"
- "@
- fnmsb\t%0.<Vetype>, %1/m, %4.<Vetype>, %2.<Vetype>
- fnmls\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
- movprfx\t%0, %2\;fnmls\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>"
- [(set_attr "movprfx" "*,*,yes")]
+ "#"
+ "&& 1"
+ [(set (match_dup 0)
+ (and:<VPRED>
+ (SVE_FP_CMP:<VPRED>
+ (match_dup 2)
+ (match_dup 3))
+ (match_dup 4)))]
)
-;; Unpredicated fnms (%0 = (-%1 * %2) - %3).
-(define_expand "fnms<mode>4"
- [(set (match_operand:SVE_F 0 "register_operand")
- (unspec:SVE_F
- [(match_dup 4)
- (fma:SVE_F (neg:SVE_F
- (match_operand:SVE_F 1 "register_operand"))
- (match_operand:SVE_F 2 "register_operand")
- (neg:SVE_F
- (match_operand:SVE_F 3 "register_operand")))]
- UNSPEC_MERGE_PTRUE))]
+;; Same for unordered comparisons.
+(define_insn_and_split "*fcmuo<mode>_and_combine"
+ [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
+ (and:<VPRED>
+ (unspec:<VPRED>
+ [(match_operand:<VPRED> 1)
+ (unordered
+ (match_operand:SVE_F 2 "register_operand" "w")
+ (match_operand:SVE_F 3 "register_operand" "w"))]
+ UNSPEC_MERGE_PTRUE)
+ (match_operand:<VPRED> 4 "register_operand" "Upl")))]
"TARGET_SVE"
- {
- operands[4] = aarch64_ptrue_reg (<VPRED>mode);
- }
+ "#"
+ "&& 1"
+ [(set (match_dup 0)
+ (and:<VPRED>
+ (unordered:<VPRED>
+ (match_dup 2)
+ (match_dup 3))
+ (match_dup 4)))]
)
-;; fnms predicated with a PTRUE.
-(define_insn "*fnms<mode>4"
- [(set (match_operand:SVE_F 0 "register_operand" "=w, w, ?&w")
- (unspec:SVE_F
- [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
- (fma:SVE_F (neg:SVE_F
- (match_operand:SVE_F 3 "register_operand" "%0, w, w"))
- (match_operand:SVE_F 4 "register_operand" "w, w, w")
- (neg:SVE_F
- (match_operand:SVE_F 2 "register_operand" "w, 0, w")))]
- UNSPEC_MERGE_PTRUE))]
+;; Unpredicated floating-point comparisons, with the results ANDed with
+;; another predicate. This is a valid fold for the same reasons as above.
+(define_insn "*fcm<cmp_op><mode>_and"
+ [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
+ (and:<VPRED>
+ (SVE_FP_CMP:<VPRED>
+ (match_operand:SVE_F 2 "register_operand" "w, w")
+ (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero" "Dz, w"))
+ (match_operand:<VPRED> 1 "register_operand" "Upl, Upl")))]
"TARGET_SVE"
"@
- fnmad\t%0.<Vetype>, %1/m, %4.<Vetype>, %2.<Vetype>
- fnmla\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
- movprfx\t%0, %2\;fnmla\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>"
- [(set_attr "movprfx" "*,*,yes")]
+ fcm<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #0.0
+ fcm<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
)
-;; Unpredicated floating-point division.
-(define_expand "div<mode>3"
- [(set (match_operand:SVE_F 0 "register_operand")
- (unspec:SVE_F
- [(match_dup 3)
- (div:SVE_F (match_operand:SVE_F 1 "register_operand")
- (match_operand:SVE_F 2 "register_operand"))]
- UNSPEC_MERGE_PTRUE))]
+;; Same for unordered comparisons.
+(define_insn "*fcmuo<mode>_and"
+ [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
+ (and:<VPRED>
+ (unordered:<VPRED>
+ (match_operand:SVE_F 2 "register_operand" "w")
+ (match_operand:SVE_F 3 "register_operand" "w"))
+ (match_operand:<VPRED> 1 "register_operand" "Upl")))]
"TARGET_SVE"
- {
- operands[3] = aarch64_ptrue_reg (<VPRED>mode);
- }
+ "fcmuo\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
)
-;; Floating-point division predicated with a PTRUE.
-(define_insn "*div<mode>3"
- [(set (match_operand:SVE_F 0 "register_operand" "=w, w, ?&w")
- (unspec:SVE_F
- [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
- (div:SVE_F (match_operand:SVE_F 2 "register_operand" "0, w, w")
- (match_operand:SVE_F 3 "register_operand" "w, 0, w"))]
- UNSPEC_MERGE_PTRUE))]
+;; Predicated floating-point comparisons. We don't need a version
+;; of this for unordered comparisons.
+(define_insn "*pred_fcm<cmp_op><mode>"
+ [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
+ (unspec:<VPRED>
+ [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+ (match_operand:SVE_F 2 "register_operand" "w, w")
+ (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero" "Dz, w")]
+ SVE_COND_FP_CMP))]
"TARGET_SVE"
"@
- fdiv\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
- fdivr\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
- movprfx\t%0, %2\;fdiv\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
- [(set_attr "movprfx" "*,*,yes")]
+ fcm<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #0.0
+ fcm<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
)
-;; Unpredicated FNEG, FABS and FSQRT.
-(define_expand "<optab><mode>2"
- [(set (match_operand:SVE_F 0 "register_operand")
- (unspec:SVE_F
- [(match_dup 2)
- (SVE_FP_UNARY:SVE_F (match_operand:SVE_F 1 "register_operand"))]
- UNSPEC_MERGE_PTRUE))]
- "TARGET_SVE"
+;; -------------------------------------------------------------------------
+;; ---- [PRED] Test bits
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - PTEST
+;; -------------------------------------------------------------------------
+
+;; Branch based on predicate equality or inequality.
+(define_expand "cbranch<mode>4"
+ [(set (pc)
+ (if_then_else
+ (match_operator 0 "aarch64_equality_operator"
+ [(match_operand:PRED_ALL 1 "register_operand")
+ (match_operand:PRED_ALL 2 "aarch64_simd_reg_or_zero")])
+ (label_ref (match_operand 3 ""))
+ (pc)))]
+ ""
{
- operands[2] = aarch64_ptrue_reg (<VPRED>mode);
+ rtx ptrue = aarch64_ptrue_reg (<MODE>mode);
+ rtx pred;
+ if (operands[2] == CONST0_RTX (<MODE>mode))
+ pred = operands[1];
+ else
+ {
+ pred = gen_reg_rtx (<MODE>mode);
+ emit_insn (gen_pred_xor<mode>3 (pred, ptrue, operands[1],
+ operands[2]));
+ }
+ emit_insn (gen_ptest_ptrue<mode> (ptrue, pred));
+ operands[1] = gen_rtx_REG (CC_NZCmode, CC_REGNUM);
+ operands[2] = const0_rtx;
}
)
-;; FNEG, FABS and FSQRT predicated with a PTRUE.
-(define_insn "*<optab><mode>2"
- [(set (match_operand:SVE_F 0 "register_operand" "=w")
- (unspec:SVE_F
- [(match_operand:<VPRED> 1 "register_operand" "Upl")
- (SVE_FP_UNARY:SVE_F (match_operand:SVE_F 2 "register_operand" "w"))]
- UNSPEC_MERGE_PTRUE))]
+;; Test all bits of operand 1. Operand 0 is a GP that is known to hold PTRUE.
+;;
+;; Using UNSPEC_PTEST_PTRUE allows combine patterns to assume that the GP
+;; is a PTRUE even if the optimizers haven't yet been able to propagate
+;; the constant. We would use a separate unspec code for PTESTs involving
+;; GPs that might not be PTRUEs.
+(define_insn "ptest_ptrue<mode>"
+ [(set (reg:CC_NZC CC_REGNUM)
+ (unspec:CC_NZC
+ [(match_operand:PRED_ALL 0 "register_operand" "Upa")
+ (match_operand:PRED_ALL 1 "register_operand" "Upa")]
+ UNSPEC_PTEST_PTRUE))]
"TARGET_SVE"
- "<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
+ "ptest\t%0, %1.b"
)
-(define_insn "*fabd<mode>3"
- [(set (match_operand:SVE_F 0 "register_operand" "=w")
- (unspec:SVE_F
- [(match_operand:<VPRED> 1 "register_operand" "Upl")
- (abs:SVE_F
- (minus:SVE_F
- (match_operand:SVE_F 2 "register_operand" "0")
- (match_operand:SVE_F 3 "register_operand" "w")))]
- UNSPEC_MERGE_PTRUE))]
+;; =========================================================================
+;; == Reductions
+;; =========================================================================
+
+;; -------------------------------------------------------------------------
+;; ---- [INT,FP] Conditional reductions
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - CLASTB
+;; -------------------------------------------------------------------------
+
+;; Set operand 0 to the last active element in operand 3, or to tied
+;; operand 1 if no elements are active.
+(define_insn "fold_extract_last_<mode>"
+ [(set (match_operand:<VEL> 0 "register_operand" "=r, w")
+ (unspec:<VEL>
+ [(match_operand:<VEL> 1 "register_operand" "0, 0")
+ (match_operand:<VPRED> 2 "register_operand" "Upl, Upl")
+ (match_operand:SVE_ALL 3 "register_operand" "w, w")]
+ UNSPEC_CLASTB))]
"TARGET_SVE"
- "fabd\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
+ "@
+ clastb\t%<vwcore>0, %2, %<vwcore>0, %3.<Vetype>
+ clastb\t%<vw>0, %2, %<vw>0, %3.<Vetype>"
)
-;; Unpredicated FRINTy.
-(define_expand "<frint_pattern><mode>2"
- [(set (match_operand:SVE_F 0 "register_operand")
- (unspec:SVE_F
- [(match_dup 2)
- (unspec:SVE_F [(match_operand:SVE_F 1 "register_operand")]
- FRINT)]
- UNSPEC_MERGE_PTRUE))]
+;; -------------------------------------------------------------------------
+;; ---- [INT] Tree reductions
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - ANDV
+;; - EORV
+;; - ORV
+;; - SMAXV
+;; - SMINV
+;; - UADDV
+;; - UMAXV
+;; - UMINV
+;; -------------------------------------------------------------------------
+
+;; Unpredicated integer add reduction.
+(define_expand "reduc_plus_scal_<mode>"
+ [(set (match_operand:<VEL> 0 "register_operand")
+ (unspec:<VEL> [(match_dup 2)
+ (match_operand:SVE_I 1 "register_operand")]
+ UNSPEC_ADDV))]
"TARGET_SVE"
{
operands[2] = aarch64_ptrue_reg (<VPRED>mode);
}
)
-;; FRINTy predicated with a PTRUE.
-(define_insn "*<frint_pattern><mode>2"
- [(set (match_operand:SVE_F 0 "register_operand" "=w")
- (unspec:SVE_F
- [(match_operand:<VPRED> 1 "register_operand" "Upl")
- (unspec:SVE_F [(match_operand:SVE_F 2 "register_operand" "w")]
- FRINT)]
- UNSPEC_MERGE_PTRUE))]
+;; Predicated integer add reduction. The result is always 64-bits.
+(define_insn "*reduc_plus_scal_<mode>"
+ [(set (match_operand:<VEL> 0 "register_operand" "=w")
+ (unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl")
+ (match_operand:SVE_I 2 "register_operand" "w")]
+ UNSPEC_ADDV))]
"TARGET_SVE"
- "frint<frint_suffix>\t%0.<Vetype>, %1/m, %2.<Vetype>"
+ "uaddv\t%d0, %1, %2.<Vetype>"
)
-;; Unpredicated conversion of floats to integers of the same size (HF to HI,
-;; SF to SI or DF to DI).
-(define_expand "<fix_trunc_optab><mode><v_int_equiv>2"
- [(set (match_operand:<V_INT_EQUIV> 0 "register_operand")
- (unspec:<V_INT_EQUIV>
- [(match_dup 2)
- (FIXUORS:<V_INT_EQUIV>
- (match_operand:SVE_F 1 "register_operand"))]
- UNSPEC_MERGE_PTRUE))]
+;; Unpredicated integer MAX/MIN reduction.
+(define_expand "reduc_<maxmin_uns>_scal_<mode>"
+ [(set (match_operand:<VEL> 0 "register_operand")
+ (unspec:<VEL> [(match_dup 2)
+ (match_operand:SVE_I 1 "register_operand")]
+ MAXMINV))]
"TARGET_SVE"
{
operands[2] = aarch64_ptrue_reg (<VPRED>mode);
}
)
-;; Conversion of SF to DI, SI or HI, predicated with a PTRUE.
-(define_insn "*<fix_trunc_optab>v16hsf<mode>2"
- [(set (match_operand:SVE_HSDI 0 "register_operand" "=w")
- (unspec:SVE_HSDI
- [(match_operand:<VPRED> 1 "register_operand" "Upl")
- (FIXUORS:SVE_HSDI
- (match_operand:VNx8HF 2 "register_operand" "w"))]
- UNSPEC_MERGE_PTRUE))]
+;; Predicated integer MAX/MIN reduction.
+(define_insn "*reduc_<maxmin_uns>_scal_<mode>"
+ [(set (match_operand:<VEL> 0 "register_operand" "=w")
+ (unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl")
+ (match_operand:SVE_I 2 "register_operand" "w")]
+ MAXMINV))]
"TARGET_SVE"
- "fcvtz<su>\t%0.<Vetype>, %1/m, %2.h"
+ "<maxmin_uns_op>v\t%<Vetype>0, %1, %2.<Vetype>"
)
-;; Conversion of SF to DI or SI, predicated with a PTRUE.
-(define_insn "*<fix_trunc_optab>vnx4sf<mode>2"
- [(set (match_operand:SVE_SDI 0 "register_operand" "=w")
- (unspec:SVE_SDI
- [(match_operand:<VPRED> 1 "register_operand" "Upl")
- (FIXUORS:SVE_SDI
- (match_operand:VNx4SF 2 "register_operand" "w"))]
- UNSPEC_MERGE_PTRUE))]
+(define_expand "reduc_<optab>_scal_<mode>"
+ [(set (match_operand:<VEL> 0 "register_operand")
+ (unspec:<VEL> [(match_dup 2)
+ (match_operand:SVE_I 1 "register_operand")]
+ BITWISEV))]
"TARGET_SVE"
- "fcvtz<su>\t%0.<Vetype>, %1/m, %2.s"
+ {
+ operands[2] = aarch64_ptrue_reg (<VPRED>mode);
+ }
)
-;; Conversion of DF to DI or SI, predicated with a PTRUE.
-(define_insn "*<fix_trunc_optab>vnx2df<mode>2"
- [(set (match_operand:SVE_SDI 0 "register_operand" "=w")
- (unspec:SVE_SDI
- [(match_operand:VNx2BI 1 "register_operand" "Upl")
- (FIXUORS:SVE_SDI
- (match_operand:VNx2DF 2 "register_operand" "w"))]
- UNSPEC_MERGE_PTRUE))]
+(define_insn "*reduc_<optab>_scal_<mode>"
+ [(set (match_operand:<VEL> 0 "register_operand" "=w")
+ (unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl")
+ (match_operand:SVE_I 2 "register_operand" "w")]
+ BITWISEV))]
"TARGET_SVE"
- "fcvtz<su>\t%0.<Vetype>, %1/m, %2.d"
+ "<bit_reduc_op>\t%<Vetype>0, %1, %2.<Vetype>"
)
-;; Unpredicated conversion of integers to floats of the same size
-;; (HI to HF, SI to SF or DI to DF).
-(define_expand "<optab><v_int_equiv><mode>2"
- [(set (match_operand:SVE_F 0 "register_operand")
- (unspec:SVE_F
- [(match_dup 2)
- (FLOATUORS:SVE_F
- (match_operand:<V_INT_EQUIV> 1 "register_operand"))]
- UNSPEC_MERGE_PTRUE))]
+;; -------------------------------------------------------------------------
+;; ---- [FP] Tree reductions
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - FADDV
+;; - FMAXNMV
+;; - FMAXV
+;; - FMINNMV
+;; - FMINV
+;; -------------------------------------------------------------------------
+
+;; Unpredicated floating-point add reduction.
+(define_expand "reduc_plus_scal_<mode>"
+ [(set (match_operand:<VEL> 0 "register_operand")
+ (unspec:<VEL> [(match_dup 2)
+ (match_operand:SVE_F 1 "register_operand")]
+ UNSPEC_FADDV))]
"TARGET_SVE"
{
operands[2] = aarch64_ptrue_reg (<VPRED>mode);
}
)
-;; Conversion of DI, SI or HI to the same number of HFs, predicated
-;; with a PTRUE.
-(define_insn "*<optab><mode>vnx8hf2"
- [(set (match_operand:VNx8HF 0 "register_operand" "=w")
- (unspec:VNx8HF
- [(match_operand:<VPRED> 1 "register_operand" "Upl")
- (FLOATUORS:VNx8HF
- (match_operand:SVE_HSDI 2 "register_operand" "w"))]
- UNSPEC_MERGE_PTRUE))]
+;; Predicated floating-point add reduction.
+(define_insn "*reduc_plus_scal_<mode>"
+ [(set (match_operand:<VEL> 0 "register_operand" "=w")
+ (unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl")
+ (match_operand:SVE_F 2 "register_operand" "w")]
+ UNSPEC_FADDV))]
+ "TARGET_SVE"
+ "faddv\t%<Vetype>0, %1, %2.<Vetype>"
+)
+
+;; Unpredicated floating-point MAX/MIN reduction.
+(define_expand "reduc_<maxmin_uns>_scal_<mode>"
+ [(set (match_operand:<VEL> 0 "register_operand")
+ (unspec:<VEL> [(match_dup 2)
+ (match_operand:SVE_F 1 "register_operand")]
+ FMAXMINV))]
"TARGET_SVE"
- "<su_optab>cvtf\t%0.h, %1/m, %2.<Vetype>"
+ {
+ operands[2] = aarch64_ptrue_reg (<VPRED>mode);
+ }
)
-;; Conversion of DI or SI to the same number of SFs, predicated with a PTRUE.
-(define_insn "*<optab><mode>vnx4sf2"
- [(set (match_operand:VNx4SF 0 "register_operand" "=w")
- (unspec:VNx4SF
- [(match_operand:<VPRED> 1 "register_operand" "Upl")
- (FLOATUORS:VNx4SF
- (match_operand:SVE_SDI 2 "register_operand" "w"))]
- UNSPEC_MERGE_PTRUE))]
+;; Predicated floating-point MAX/MIN reduction.
+(define_insn "*reduc_<maxmin_uns>_scal_<mode>"
+ [(set (match_operand:<VEL> 0 "register_operand" "=w")
+ (unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl")
+ (match_operand:SVE_F 2 "register_operand" "w")]
+ FMAXMINV))]
"TARGET_SVE"
- "<su_optab>cvtf\t%0.s, %1/m, %2.<Vetype>"
+ "<maxmin_uns_op>v\t%<Vetype>0, %1, %2.<Vetype>"
)
-;; Conversion of DI or SI to DF, predicated with a PTRUE.
-(define_insn "aarch64_sve_<optab><mode>vnx2df2"
- [(set (match_operand:VNx2DF 0 "register_operand" "=w")
- (unspec:VNx2DF
- [(match_operand:VNx2BI 1 "register_operand" "Upl")
- (FLOATUORS:VNx2DF
- (match_operand:SVE_SDI 2 "register_operand" "w"))]
- UNSPEC_MERGE_PTRUE))]
+;; -------------------------------------------------------------------------
+;; ---- [FP] Left-to-right reductions
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - FADDA
+;; -------------------------------------------------------------------------
+
+;; Unpredicated in-order FP reductions.
+(define_expand "fold_left_plus_<mode>"
+ [(set (match_operand:<VEL> 0 "register_operand")
+ (unspec:<VEL> [(match_dup 3)
+ (match_operand:<VEL> 1 "register_operand")
+ (match_operand:SVE_F 2 "register_operand")]
+ UNSPEC_FADDA))]
"TARGET_SVE"
- "<su_optab>cvtf\t%0.d, %1/m, %2.<Vetype>"
+ {
+ operands[3] = aarch64_ptrue_reg (<VPRED>mode);
+ }
)
-;; Conversion of DFs to the same number of SFs, or SFs to the same number
-;; of HFs.
-(define_insn "*trunc<Vwide><mode>2"
- [(set (match_operand:SVE_HSF 0 "register_operand" "=w")
- (unspec:SVE_HSF
- [(match_operand:<VWIDE_PRED> 1 "register_operand" "Upl")
- (unspec:SVE_HSF
- [(match_operand:<VWIDE> 2 "register_operand" "w")]
- UNSPEC_FLOAT_CONVERT)]
- UNSPEC_MERGE_PTRUE))]
+;; Predicated in-order FP reductions.
+(define_insn "mask_fold_left_plus_<mode>"
+ [(set (match_operand:<VEL> 0 "register_operand" "=w")
+ (unspec:<VEL> [(match_operand:<VPRED> 3 "register_operand" "Upl")
+ (match_operand:<VEL> 1 "register_operand" "0")
+ (match_operand:SVE_F 2 "register_operand" "w")]
+ UNSPEC_FADDA))]
"TARGET_SVE"
- "fcvt\t%0.<Vetype>, %1/m, %2.<Vewtype>"
+ "fadda\t%<Vetype>0, %3, %<Vetype>0, %2.<Vetype>"
)
-;; Conversion of SFs to the same number of DFs, or HFs to the same number
-;; of SFs.
-(define_insn "aarch64_sve_extend<mode><Vwide>2"
- [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
- (unspec:<VWIDE>
- [(match_operand:<VWIDE_PRED> 1 "register_operand" "Upl")
- (unspec:<VWIDE>
- [(match_operand:SVE_HSF 2 "register_operand" "w")]
- UNSPEC_FLOAT_CONVERT)]
- UNSPEC_MERGE_PTRUE))]
+;; Predicated form of the above in-order reduction.
+(define_insn "*pred_fold_left_plus_<mode>"
+ [(set (match_operand:<VEL> 0 "register_operand" "=w")
+ (unspec:<VEL>
+ [(match_operand:<VEL> 1 "register_operand" "0")
+ (unspec:SVE_F
+ [(match_operand:<VPRED> 2 "register_operand" "Upl")
+ (match_operand:SVE_F 3 "register_operand" "w")
+ (match_operand:SVE_F 4 "aarch64_simd_imm_zero")]
+ UNSPEC_SEL)]
+ UNSPEC_FADDA))]
"TARGET_SVE"
- "fcvt\t%0.<Vewtype>, %1/m, %2.<Vetype>"
+ "fadda\t%<Vetype>0, %2, %<Vetype>0, %3.<Vetype>"
)
-;; Unpack the low or high half of a predicate, where "high" refers to
-;; the low-numbered lanes for big-endian and the high-numbered lanes
-;; for little-endian.
-(define_expand "vec_unpack<su>_<perm_hilo>_<mode>"
- [(match_operand:<VWIDE> 0 "register_operand")
- (unspec:<VWIDE> [(match_operand:PRED_BHS 1 "register_operand")]
- UNPACK)]
- "TARGET_SVE"
+;; =========================================================================
+;; == Permutes
+;; =========================================================================
+
+;; -------------------------------------------------------------------------
+;; ---- [INT,FP] General permutes
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - TBL
+;; -------------------------------------------------------------------------
+
+(define_expand "vec_perm<mode>"
+ [(match_operand:SVE_ALL 0 "register_operand")
+ (match_operand:SVE_ALL 1 "register_operand")
+ (match_operand:SVE_ALL 2 "register_operand")
+ (match_operand:<V_INT_EQUIV> 3 "aarch64_sve_vec_perm_operand")]
+ "TARGET_SVE && GET_MODE_NUNITS (<MODE>mode).is_constant ()"
{
- emit_insn ((<hi_lanes_optab>
- ? gen_aarch64_sve_punpkhi_<PRED_BHS:mode>
- : gen_aarch64_sve_punpklo_<PRED_BHS:mode>)
- (operands[0], operands[1]));
+ aarch64_expand_sve_vec_perm (operands[0], operands[1],
+ operands[2], operands[3]);
DONE;
}
)
-;; PUNPKHI and PUNPKLO.
-(define_insn "aarch64_sve_punpk<perm_hilo>_<mode>"
- [(set (match_operand:<VWIDE> 0 "register_operand" "=Upa")
- (unspec:<VWIDE> [(match_operand:PRED_BHS 1 "register_operand" "Upa")]
- UNPACK_UNSIGNED))]
+(define_insn "*aarch64_sve_tbl<mode>"
+ [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
+ (unspec:SVE_ALL
+ [(match_operand:SVE_ALL 1 "register_operand" "w")
+ (match_operand:<V_INT_EQUIV> 2 "register_operand" "w")]
+ UNSPEC_TBL))]
"TARGET_SVE"
- "punpk<perm_hilo>\t%0.h, %1.b"
+ "tbl\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
)
-;; Unpack the low or high half of a vector, where "high" refers to
-;; the low-numbered lanes for big-endian and the high-numbered lanes
-;; for little-endian.
-(define_expand "vec_unpack<su>_<perm_hilo>_<SVE_BHSI:mode>"
- [(match_operand:<VWIDE> 0 "register_operand")
- (unspec:<VWIDE> [(match_operand:SVE_BHSI 1 "register_operand")] UNPACK)]
+;; -------------------------------------------------------------------------
+;; ---- [INT,FP] Special-purpose unary permutes
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - DUP
+;; - REV
+;; - REVB
+;; - REVH
+;; - REVW
+;; -------------------------------------------------------------------------
+
+;; Duplicate one element of a vector.
+(define_insn "*aarch64_sve_dup_lane<mode>"
+ [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
+ (vec_duplicate:SVE_ALL
+ (vec_select:<VEL>
+ (match_operand:SVE_ALL 1 "register_operand" "w")
+ (parallel [(match_operand:SI 2 "const_int_operand")]))))]
+ "TARGET_SVE
+ && IN_RANGE (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode), 0, 63)"
+ "dup\t%0.<Vetype>, %1.<Vetype>[%2]"
+)
+
+;; Reverse the order of elements within a full vector.
+(define_insn "@aarch64_sve_rev<mode>"
+ [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
+ (unspec:SVE_ALL [(match_operand:SVE_ALL 1 "register_operand" "w")]
+ UNSPEC_REV))]
"TARGET_SVE"
- {
- emit_insn ((<hi_lanes_optab>
- ? gen_aarch64_sve_<su>unpkhi_<SVE_BHSI:mode>
- : gen_aarch64_sve_<su>unpklo_<SVE_BHSI:mode>)
- (operands[0], operands[1]));
- DONE;
- }
+ "rev\t%0.<Vetype>, %1.<Vetype>")
+
+;; Reverse the order elements within a 64-bit container.
+(define_insn "*aarch64_sve_rev64<mode>"
+ [(set (match_operand:SVE_BHS 0 "register_operand" "=w")
+ (unspec:SVE_BHS
+ [(match_operand:VNx2BI 1 "register_operand" "Upl")
+ (unspec:SVE_BHS [(match_operand:SVE_BHS 2 "register_operand" "w")]
+ UNSPEC_REV64)]
+ UNSPEC_MERGE_PTRUE))]
+ "TARGET_SVE"
+ "rev<Vesize>\t%0.d, %1/m, %2.d"
)
-;; SUNPKHI, UUNPKHI, SUNPKLO and UUNPKLO.
-(define_insn "aarch64_sve_<su>unpk<perm_hilo>_<SVE_BHSI:mode>"
- [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
- (unspec:<VWIDE> [(match_operand:SVE_BHSI 1 "register_operand" "w")]
- UNPACK))]
+;; Reverse the order elements within a 32-bit container.
+(define_insn "*aarch64_sve_rev32<mode>"
+ [(set (match_operand:SVE_BH 0 "register_operand" "=w")
+ (unspec:SVE_BH
+ [(match_operand:VNx4BI 1 "register_operand" "Upl")
+ (unspec:SVE_BH [(match_operand:SVE_BH 2 "register_operand" "w")]
+ UNSPEC_REV32)]
+ UNSPEC_MERGE_PTRUE))]
"TARGET_SVE"
- "<su>unpk<perm_hilo>\t%0.<Vewtype>, %1.<Vetype>"
+ "rev<Vesize>\t%0.s, %1/m, %2.s"
)
-;; Unpack one half of a VNx4SF to VNx2DF, or one half of a VNx8HF to VNx4SF.
-;; First unpack the source without conversion, then float-convert the
-;; unpacked source.
-(define_expand "vec_unpacks_<perm_hilo>_<mode>"
- [(match_operand:<VWIDE> 0 "register_operand")
- (unspec:SVE_HSF [(match_operand:SVE_HSF 1 "register_operand")]
- UNPACK_UNSIGNED)]
+;; Reverse the order elements within a 16-bit container.
+(define_insn "*aarch64_sve_rev16vnx16qi"
+ [(set (match_operand:VNx16QI 0 "register_operand" "=w")
+ (unspec:VNx16QI
+ [(match_operand:VNx8BI 1 "register_operand" "Upl")
+ (unspec:VNx16QI [(match_operand:VNx16QI 2 "register_operand" "w")]
+ UNSPEC_REV16)]
+ UNSPEC_MERGE_PTRUE))]
"TARGET_SVE"
- {
- /* Use ZIP to do the unpack, since we don't care about the upper halves
- and since it has the nice property of not needing any subregs.
- If using UUNPK* turns out to be preferable, we could model it as
- a ZIP whose first operand is zero. */
- rtx temp = gen_reg_rtx (<MODE>mode);
- emit_insn ((<hi_lanes_optab>
- ? gen_aarch64_sve_zip2<mode>
- : gen_aarch64_sve_zip1<mode>)
- (temp, operands[1], operands[1]));
- rtx ptrue = aarch64_ptrue_reg (<VWIDE_PRED>mode);
- emit_insn (gen_aarch64_sve_extend<mode><Vwide>2 (operands[0],
- ptrue, temp));
- DONE;
- }
+ "revb\t%0.h, %1/m, %2.h"
)
-;; Unpack one half of a VNx4SI to VNx2DF. First unpack from VNx4SI
-;; to VNx2DI, reinterpret the VNx2DI as a VNx4SI, then convert the
-;; unpacked VNx4SI to VNx2DF.
-(define_expand "vec_unpack<su_optab>_float_<perm_hilo>_vnx4si"
- [(match_operand:VNx2DF 0 "register_operand")
- (FLOATUORS:VNx2DF
- (unspec:VNx2DI [(match_operand:VNx4SI 1 "register_operand")]
- UNPACK_UNSIGNED))]
+;; -------------------------------------------------------------------------
+;; ---- [INT,FP] Special-purpose binary permutes
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - TRN1
+;; - TRN2
+;; - UZP1
+;; - UZP2
+;; - ZIP1
+;; - ZIP2
+;; -------------------------------------------------------------------------
+
+;; Permutes that take half the elements from one vector and half the
+;; elements from the other.
+(define_insn "aarch64_sve_<perm_insn><perm_hilo><mode>"
+ [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
+ (unspec:SVE_ALL [(match_operand:SVE_ALL 1 "register_operand" "w")
+ (match_operand:SVE_ALL 2 "register_operand" "w")]
+ PERMUTE))]
"TARGET_SVE"
+ "<perm_insn><perm_hilo>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
+)
+
+;; Concatenate two vectors and extract a subvector. Note that the
+;; immediate (third) operand is the lane index not the byte index.
+(define_insn "*aarch64_sve_ext<mode>"
+ [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
+ (unspec:SVE_ALL [(match_operand:SVE_ALL 1 "register_operand" "0")
+ (match_operand:SVE_ALL 2 "register_operand" "w")
+ (match_operand:SI 3 "const_int_operand")]
+ UNSPEC_EXT))]
+ "TARGET_SVE
+ && IN_RANGE (INTVAL (operands[3]) * GET_MODE_SIZE (<VEL>mode), 0, 255)"
{
- /* Use ZIP to do the unpack, since we don't care about the upper halves
- and since it has the nice property of not needing any subregs.
- If using UUNPK* turns out to be preferable, we could model it as
- a ZIP whose first operand is zero. */
- rtx temp = gen_reg_rtx (VNx4SImode);
- emit_insn ((<hi_lanes_optab>
- ? gen_aarch64_sve_zip2vnx4si
- : gen_aarch64_sve_zip1vnx4si)
- (temp, operands[1], operands[1]));
- rtx ptrue = aarch64_ptrue_reg (VNx2BImode);
- emit_insn (gen_aarch64_sve_<FLOATUORS:optab>vnx4sivnx2df2 (operands[0],
- ptrue, temp));
- DONE;
+ operands[3] = GEN_INT (INTVAL (operands[3]) * GET_MODE_SIZE (<VEL>mode));
+ return "ext\\t%0.b, %0.b, %2.b, #%3";
}
)
-;; Predicate pack. Use UZP1 on the narrower type, which discards
-;; the high part of each wide element.
-(define_insn "vec_pack_trunc_<Vwide>"
- [(set (match_operand:PRED_BHS 0 "register_operand" "=Upa")
- (unspec:PRED_BHS
- [(match_operand:<VWIDE> 1 "register_operand" "Upa")
- (match_operand:<VWIDE> 2 "register_operand" "Upa")]
- UNSPEC_PACK))]
+;; -------------------------------------------------------------------------
+;; ---- [PRED] Special-purpose binary permutes
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - TRN1
+;; - TRN2
+;; - UZP1
+;; - UZP2
+;; - ZIP1
+;; - ZIP2
+;; -------------------------------------------------------------------------
+
+;; Permutes that take half the elements from one vector and half the
+;; elements from the other.
+(define_insn "*aarch64_sve_<perm_insn><perm_hilo><mode>"
+ [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
+ (unspec:PRED_ALL [(match_operand:PRED_ALL 1 "register_operand" "Upa")
+ (match_operand:PRED_ALL 2 "register_operand" "Upa")]
+ PERMUTE))]
"TARGET_SVE"
- "uzp1\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
+ "<perm_insn><perm_hilo>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
)
+;; =========================================================================
+;; == Conversions
+;; =========================================================================
+
+;; -------------------------------------------------------------------------
+;; ---- [INT<-INT] Packs
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - UZP1
+;; -------------------------------------------------------------------------
+
;; Integer pack. Use UZP1 on the narrower type, which discards
;; the high part of each wide element.
(define_insn "vec_pack_trunc_<Vwide>"
"uzp1\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
)
-;; Convert two vectors of DF to SF, or two vectors of SF to HF, and pack
-;; the results into a single vector.
-(define_expand "vec_pack_trunc_<Vwide>"
- [(set (match_dup 4)
- (unspec:SVE_HSF
- [(match_dup 3)
- (unspec:SVE_HSF [(match_operand:<VWIDE> 1 "register_operand")]
- UNSPEC_FLOAT_CONVERT)]
- UNSPEC_MERGE_PTRUE))
- (set (match_dup 5)
- (unspec:SVE_HSF
- [(match_dup 3)
- (unspec:SVE_HSF [(match_operand:<VWIDE> 2 "register_operand")]
- UNSPEC_FLOAT_CONVERT)]
- UNSPEC_MERGE_PTRUE))
- (set (match_operand:SVE_HSF 0 "register_operand")
- (unspec:SVE_HSF [(match_dup 4) (match_dup 5)] UNSPEC_UZP1))]
+;; -------------------------------------------------------------------------
+;; ---- [INT<-INT] Unpacks
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - SUNPKHI
+;; - SUNPKLO
+;; - UUNPKHI
+;; - UUNPKLO
+;; -------------------------------------------------------------------------
+
+;; Unpack the low or high half of a vector, where "high" refers to
+;; the low-numbered lanes for big-endian and the high-numbered lanes
+;; for little-endian.
+(define_expand "vec_unpack<su>_<perm_hilo>_<SVE_BHSI:mode>"
+ [(match_operand:<VWIDE> 0 "register_operand")
+ (unspec:<VWIDE> [(match_operand:SVE_BHSI 1 "register_operand")] UNPACK)]
"TARGET_SVE"
{
- operands[3] = aarch64_ptrue_reg (<VWIDE_PRED>mode);
- operands[4] = gen_reg_rtx (<MODE>mode);
- operands[5] = gen_reg_rtx (<MODE>mode);
+ emit_insn ((<hi_lanes_optab>
+ ? gen_aarch64_sve_<su>unpkhi_<SVE_BHSI:mode>
+ : gen_aarch64_sve_<su>unpklo_<SVE_BHSI:mode>)
+ (operands[0], operands[1]));
+ DONE;
+ }
+)
+
+(define_insn "aarch64_sve_<su>unpk<perm_hilo>_<SVE_BHSI:mode>"
+ [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+ (unspec:<VWIDE> [(match_operand:SVE_BHSI 1 "register_operand" "w")]
+ UNPACK))]
+ "TARGET_SVE"
+ "<su>unpk<perm_hilo>\t%0.<Vewtype>, %1.<Vetype>"
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [INT<-FP] Conversions
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - FCVTZS
+;; - FCVTZU
+;; -------------------------------------------------------------------------
+
+;; Unpredicated conversion of floats to integers of the same size (HF to HI,
+;; SF to SI or DF to DI).
+(define_expand "<fix_trunc_optab><mode><v_int_equiv>2"
+ [(set (match_operand:<V_INT_EQUIV> 0 "register_operand")
+ (unspec:<V_INT_EQUIV>
+ [(match_dup 2)
+ (FIXUORS:<V_INT_EQUIV>
+ (match_operand:SVE_F 1 "register_operand"))]
+ UNSPEC_MERGE_PTRUE))]
+ "TARGET_SVE"
+ {
+ operands[2] = aarch64_ptrue_reg (<VPRED>mode);
}
)
+;; Conversion of SF to DI, SI or HI, predicated with a PTRUE.
+(define_insn "*<fix_trunc_optab>v16hsf<mode>2"
+ [(set (match_operand:SVE_HSDI 0 "register_operand" "=w")
+ (unspec:SVE_HSDI
+ [(match_operand:<VPRED> 1 "register_operand" "Upl")
+ (FIXUORS:SVE_HSDI
+ (match_operand:VNx8HF 2 "register_operand" "w"))]
+ UNSPEC_MERGE_PTRUE))]
+ "TARGET_SVE"
+ "fcvtz<su>\t%0.<Vetype>, %1/m, %2.h"
+)
+
+;; Conversion of SF to DI or SI, predicated with a PTRUE.
+(define_insn "*<fix_trunc_optab>vnx4sf<mode>2"
+ [(set (match_operand:SVE_SDI 0 "register_operand" "=w")
+ (unspec:SVE_SDI
+ [(match_operand:<VPRED> 1 "register_operand" "Upl")
+ (FIXUORS:SVE_SDI
+ (match_operand:VNx4SF 2 "register_operand" "w"))]
+ UNSPEC_MERGE_PTRUE))]
+ "TARGET_SVE"
+ "fcvtz<su>\t%0.<Vetype>, %1/m, %2.s"
+)
+
+;; Conversion of DF to DI or SI, predicated with a PTRUE.
+(define_insn "*<fix_trunc_optab>vnx2df<mode>2"
+ [(set (match_operand:SVE_SDI 0 "register_operand" "=w")
+ (unspec:SVE_SDI
+ [(match_operand:VNx2BI 1 "register_operand" "Upl")
+ (FIXUORS:SVE_SDI
+ (match_operand:VNx2DF 2 "register_operand" "w"))]
+ UNSPEC_MERGE_PTRUE))]
+ "TARGET_SVE"
+ "fcvtz<su>\t%0.<Vetype>, %1/m, %2.d"
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [INT<-FP] Packs
+;; -------------------------------------------------------------------------
+;; The patterns in this section are synthetic.
+;; -------------------------------------------------------------------------
+
;; Convert two vectors of DF to SI and pack the results into a single vector.
(define_expand "vec_pack_<su>fix_trunc_vnx2df"
[(set (match_dup 4)
}
)
-;; Predicated floating-point operations with select.
-(define_expand "cond_<optab><mode>"
- [(set (match_operand:SVE_F 0 "register_operand")
- (unspec:SVE_F
- [(match_operand:<VPRED> 1 "register_operand")
- (unspec:SVE_F
- [(match_operand:SVE_F 2 "register_operand")
- (match_operand:SVE_F 3 "register_operand")]
- SVE_COND_FP_BINARY)
- (match_operand:SVE_F 4 "aarch64_simd_reg_or_zero")]
- UNSPEC_SEL))]
- "TARGET_SVE"
-)
+;; -------------------------------------------------------------------------
+;; ---- [INT<-FP] Unpacks
+;; -------------------------------------------------------------------------
+;; No patterns here yet!
+;; -------------------------------------------------------------------------
-;; Predicated floating-point operations with select matching first operand.
-(define_insn "*cond_<optab><mode>_2"
- [(set (match_operand:SVE_F 0 "register_operand" "=w, ?&w")
- (unspec:SVE_F
- [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
- (unspec:SVE_F
- [(match_operand:SVE_F 2 "register_operand" "0, w")
- (match_operand:SVE_F 3 "register_operand" "w, w")]
- SVE_COND_FP_BINARY)
- (match_dup 2)]
- UNSPEC_SEL))]
- "TARGET_SVE"
- "@
- <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
- movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
- [(set_attr "movprfx" "*,yes")]
-)
+;; -------------------------------------------------------------------------
+;; ---- [FP<-INT] Conversions
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - SCVTF
+;; - UCVTF
+;; -------------------------------------------------------------------------
-;; Predicated floating-point operations with select matching second operand.
-(define_insn "*cond_<optab><mode>_3"
- [(set (match_operand:SVE_F 0 "register_operand" "=w, ?&w")
+;; Unpredicated conversion of integers to floats of the same size
+;; (HI to HF, SI to SF or DI to DF).
+(define_expand "<optab><v_int_equiv><mode>2"
+ [(set (match_operand:SVE_F 0 "register_operand")
(unspec:SVE_F
- [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
- (unspec:SVE_F
- [(match_operand:SVE_F 2 "register_operand" "w, w")
- (match_operand:SVE_F 3 "register_operand" "0, w")]
- SVE_COND_FP_BINARY)
- (match_dup 3)]
- UNSPEC_SEL))]
+ [(match_dup 2)
+ (FLOATUORS:SVE_F
+ (match_operand:<V_INT_EQUIV> 1 "register_operand"))]
+ UNSPEC_MERGE_PTRUE))]
"TARGET_SVE"
- "@
- <sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
- movprfx\t%0, %3\;<sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
- [(set_attr "movprfx" "*,yes")]
-)
-
-;; Predicated floating-point binary operations in which the values of
-;; inactive lanes are distinct from the other inputs.
-(define_insn_and_rewrite "*cond_<optab><mode>_any"
- [(set (match_operand:SVE_F 0 "register_operand" "=&w, &w, &w, &w, ?&w")
- (unspec:SVE_F
- [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl")
- (unspec:SVE_F
- [(match_operand:SVE_F 2 "register_operand" "0, w, w, w, w")
- (match_operand:SVE_F 3 "register_operand" "w, 0, w, w, w")]
- SVE_COND_FP_BINARY)
- (match_operand:SVE_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")]
- UNSPEC_SEL))]
- "TARGET_SVE
- && !rtx_equal_p (operands[2], operands[4])
- && !rtx_equal_p (operands[3], operands[4])"
- "@
- movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
- movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
- movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
- movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
- #"
- "&& reload_completed
- && register_operand (operands[4], <MODE>mode)
- && !rtx_equal_p (operands[0], operands[4])"
{
- emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
- operands[4], operands[1]));
- operands[4] = operands[2] = operands[0];
+ operands[2] = aarch64_ptrue_reg (<VPRED>mode);
}
- [(set_attr "movprfx" "yes")]
-)
-
-;; Predicated floating-point ternary operations with select.
-(define_expand "cond_<optab><mode>"
- [(set (match_operand:SVE_F 0 "register_operand")
- (unspec:SVE_F
- [(match_operand:<VPRED> 1 "register_operand")
- (unspec:SVE_F
- [(match_operand:SVE_F 2 "register_operand")
- (match_operand:SVE_F 3 "register_operand")
- (match_operand:SVE_F 4 "register_operand")]
- SVE_COND_FP_TERNARY)
- (match_operand:SVE_F 5 "aarch64_simd_reg_or_zero")]
- UNSPEC_SEL))]
- "TARGET_SVE"
-{
- /* Swap the multiplication operands if the fallback value is the
- second of the two. */
- if (rtx_equal_p (operands[3], operands[5]))
- std::swap (operands[2], operands[3]);
-})
-
-;; Predicated floating-point ternary operations using the FMAD-like form.
-(define_insn "*cond_<optab><mode>_2"
- [(set (match_operand:SVE_F 0 "register_operand" "=w, ?&w")
- (unspec:SVE_F
- [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
- (unspec:SVE_F
- [(match_operand:SVE_F 2 "register_operand" "0, w")
- (match_operand:SVE_F 3 "register_operand" "w, w")
- (match_operand:SVE_F 4 "register_operand" "w, w")]
- SVE_COND_FP_TERNARY)
- (match_dup 2)]
- UNSPEC_SEL))]
- "TARGET_SVE"
- "@
- <sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
- movprfx\t%0, %2\;<sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>"
- [(set_attr "movprfx" "*,yes")]
-)
-
-;; Predicated floating-point ternary operations using the FMLA-like form.
-(define_insn "*cond_<optab><mode>_4"
- [(set (match_operand:SVE_F 0 "register_operand" "=w, ?&w")
- (unspec:SVE_F
- [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
- (unspec:SVE_F
- [(match_operand:SVE_F 2 "register_operand" "w, w")
- (match_operand:SVE_F 3 "register_operand" "w, w")
- (match_operand:SVE_F 4 "register_operand" "0, w")]
- SVE_COND_FP_TERNARY)
- (match_dup 4)]
- UNSPEC_SEL))]
- "TARGET_SVE"
- "@
- <sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
- movprfx\t%0, %4\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
- [(set_attr "movprfx" "*,yes")]
)
-;; Predicated floating-point ternary operations in which the value for
-;; inactive lanes is distinct from the other inputs.
-(define_insn_and_rewrite "*cond_<optab><mode>_any"
- [(set (match_operand:SVE_F 0 "register_operand" "=&w, &w, ?&w")
- (unspec:SVE_F
- [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
- (unspec:SVE_F
- [(match_operand:SVE_F 2 "register_operand" "w, w, w")
- (match_operand:SVE_F 3 "register_operand" "w, w, w")
- (match_operand:SVE_F 4 "register_operand" "w, w, w")]
- SVE_COND_FP_TERNARY)
- (match_operand:SVE_F 5 "aarch64_simd_reg_or_zero" "Dz, 0, w")]
- UNSPEC_SEL))]
- "TARGET_SVE
- && !rtx_equal_p (operands[2], operands[5])
- && !rtx_equal_p (operands[3], operands[5])
- && !rtx_equal_p (operands[4], operands[5])"
- "@
- movprfx\t%0.<Vetype>, %1/z, %4.<Vetype>\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
- movprfx\t%0.<Vetype>, %1/m, %4.<Vetype>\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
- #"
- "&& reload_completed
- && !CONSTANT_P (operands[5])
- && !rtx_equal_p (operands[0], operands[5])"
- {
- emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[4],
- operands[5], operands[1]));
- operands[5] = operands[4] = operands[0];
- }
- [(set_attr "movprfx" "yes")]
+;; Conversion of DI, SI or HI to the same number of HFs, predicated
+;; with a PTRUE.
+(define_insn "*<optab><mode>vnx8hf2"
+ [(set (match_operand:VNx8HF 0 "register_operand" "=w")
+ (unspec:VNx8HF
+ [(match_operand:<VPRED> 1 "register_operand" "Upl")
+ (FLOATUORS:VNx8HF
+ (match_operand:SVE_HSDI 2 "register_operand" "w"))]
+ UNSPEC_MERGE_PTRUE))]
+ "TARGET_SVE"
+ "<su_optab>cvtf\t%0.h, %1/m, %2.<Vetype>"
)
-;; Shift an SVE vector left and insert a scalar into element 0.
-(define_insn "vec_shl_insert_<mode>"
- [(set (match_operand:SVE_ALL 0 "register_operand" "=w, w")
- (unspec:SVE_ALL
- [(match_operand:SVE_ALL 1 "register_operand" "0, 0")
- (match_operand:<VEL> 2 "register_operand" "rZ, w")]
- UNSPEC_INSR))]
+;; Conversion of DI or SI to the same number of SFs, predicated with a PTRUE.
+(define_insn "*<optab><mode>vnx4sf2"
+ [(set (match_operand:VNx4SF 0 "register_operand" "=w")
+ (unspec:VNx4SF
+ [(match_operand:<VPRED> 1 "register_operand" "Upl")
+ (FLOATUORS:VNx4SF
+ (match_operand:SVE_SDI 2 "register_operand" "w"))]
+ UNSPEC_MERGE_PTRUE))]
"TARGET_SVE"
- "@
- insr\t%0.<Vetype>, %<vwcore>2
- insr\t%0.<Vetype>, %<Vetype>2"
+ "<su_optab>cvtf\t%0.s, %1/m, %2.<Vetype>"
)
-(define_expand "copysign<mode>3"
- [(match_operand:SVE_F 0 "register_operand")
- (match_operand:SVE_F 1 "register_operand")
- (match_operand:SVE_F 2 "register_operand")]
+;; Conversion of DI or SI to DF, predicated with a PTRUE.
+(define_insn "aarch64_sve_<optab><mode>vnx2df2"
+ [(set (match_operand:VNx2DF 0 "register_operand" "=w")
+ (unspec:VNx2DF
+ [(match_operand:VNx2BI 1 "register_operand" "Upl")
+ (FLOATUORS:VNx2DF
+ (match_operand:SVE_SDI 2 "register_operand" "w"))]
+ UNSPEC_MERGE_PTRUE))]
"TARGET_SVE"
- {
- rtx sign = gen_reg_rtx (<V_INT_EQUIV>mode);
- rtx mant = gen_reg_rtx (<V_INT_EQUIV>mode);
- rtx int_res = gen_reg_rtx (<V_INT_EQUIV>mode);
- int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
+ "<su_optab>cvtf\t%0.d, %1/m, %2.<Vetype>"
+)
- rtx arg1 = lowpart_subreg (<V_INT_EQUIV>mode, operands[1], <MODE>mode);
- rtx arg2 = lowpart_subreg (<V_INT_EQUIV>mode, operands[2], <MODE>mode);
+;; -------------------------------------------------------------------------
+;; ---- [FP<-INT] Packs
+;; -------------------------------------------------------------------------
+;; No patterns here yet!
+;; -------------------------------------------------------------------------
- emit_insn (gen_and<v_int_equiv>3
- (sign, arg2,
- aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
- HOST_WIDE_INT_M1U
- << bits)));
- emit_insn (gen_and<v_int_equiv>3
- (mant, arg1,
- aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
- ~(HOST_WIDE_INT_M1U
- << bits))));
- emit_insn (gen_ior<v_int_equiv>3 (int_res, sign, mant));
- emit_move_insn (operands[0], gen_lowpart (<MODE>mode, int_res));
+;; -------------------------------------------------------------------------
+;; ---- [FP<-INT] Unpacks
+;; -------------------------------------------------------------------------
+;; The patterns in this section are synthetic.
+;; -------------------------------------------------------------------------
+
+;; Unpack one half of a VNx4SI to VNx2DF. First unpack from VNx4SI
+;; to VNx2DI, reinterpret the VNx2DI as a VNx4SI, then convert the
+;; unpacked VNx4SI to VNx2DF.
+(define_expand "vec_unpack<su_optab>_float_<perm_hilo>_vnx4si"
+ [(match_operand:VNx2DF 0 "register_operand")
+ (FLOATUORS:VNx2DF
+ (unspec:VNx2DI [(match_operand:VNx4SI 1 "register_operand")]
+ UNPACK_UNSIGNED))]
+ "TARGET_SVE"
+ {
+ /* Use ZIP to do the unpack, since we don't care about the upper halves
+ and since it has the nice property of not needing any subregs.
+ If using UUNPK* turns out to be preferable, we could model it as
+ a ZIP whose first operand is zero. */
+ rtx temp = gen_reg_rtx (VNx4SImode);
+ emit_insn ((<hi_lanes_optab>
+ ? gen_aarch64_sve_zip2vnx4si
+ : gen_aarch64_sve_zip1vnx4si)
+ (temp, operands[1], operands[1]));
+ rtx ptrue = aarch64_ptrue_reg (VNx2BImode);
+ emit_insn (gen_aarch64_sve_<FLOATUORS:optab>vnx4sivnx2df2 (operands[0],
+ ptrue, temp));
DONE;
}
)
-(define_expand "xorsign<mode>3"
- [(match_operand:SVE_F 0 "register_operand")
- (match_operand:SVE_F 1 "register_operand")
- (match_operand:SVE_F 2 "register_operand")]
+;; -------------------------------------------------------------------------
+;; ---- [FP<-FP] Packs
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - FCVT
+;; -------------------------------------------------------------------------
+
+;; Convert two vectors of DF to SF, or two vectors of SF to HF, and pack
+;; the results into a single vector.
+(define_expand "vec_pack_trunc_<Vwide>"
+ [(set (match_dup 4)
+ (unspec:SVE_HSF
+ [(match_dup 3)
+ (unspec:SVE_HSF [(match_operand:<VWIDE> 1 "register_operand")]
+ UNSPEC_FLOAT_CONVERT)]
+ UNSPEC_MERGE_PTRUE))
+ (set (match_dup 5)
+ (unspec:SVE_HSF
+ [(match_dup 3)
+ (unspec:SVE_HSF [(match_operand:<VWIDE> 2 "register_operand")]
+ UNSPEC_FLOAT_CONVERT)]
+ UNSPEC_MERGE_PTRUE))
+ (set (match_operand:SVE_HSF 0 "register_operand")
+ (unspec:SVE_HSF [(match_dup 4) (match_dup 5)] UNSPEC_UZP1))]
"TARGET_SVE"
{
- rtx sign = gen_reg_rtx (<V_INT_EQUIV>mode);
- rtx int_res = gen_reg_rtx (<V_INT_EQUIV>mode);
- int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
-
- rtx arg1 = lowpart_subreg (<V_INT_EQUIV>mode, operands[1], <MODE>mode);
- rtx arg2 = lowpart_subreg (<V_INT_EQUIV>mode, operands[2], <MODE>mode);
-
- emit_insn (gen_and<v_int_equiv>3
- (sign, arg2,
- aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
- HOST_WIDE_INT_M1U
- << bits)));
- emit_insn (gen_xor<v_int_equiv>3 (int_res, arg1, sign));
- emit_move_insn (operands[0], gen_lowpart (<MODE>mode, int_res));
- DONE;
+ operands[3] = aarch64_ptrue_reg (<VWIDE_PRED>mode);
+ operands[4] = gen_reg_rtx (<MODE>mode);
+ operands[5] = gen_reg_rtx (<MODE>mode);
}
)
-;; Unpredicated DOT product.
-(define_insn "<sur>dot_prod<vsi2qi>"
- [(set (match_operand:SVE_SDI 0 "register_operand" "=w, ?&w")
- (plus:SVE_SDI
- (unspec:SVE_SDI
- [(match_operand:<VSI2QI> 1 "register_operand" "w, w")
- (match_operand:<VSI2QI> 2 "register_operand" "w, w")]
- DOTPROD)
- (match_operand:SVE_SDI 3 "register_operand" "0, w")))]
+;; Conversion of DFs to the same number of SFs, or SFs to the same number
+;; of HFs.
+(define_insn "*trunc<Vwide><mode>2"
+ [(set (match_operand:SVE_HSF 0 "register_operand" "=w")
+ (unspec:SVE_HSF
+ [(match_operand:<VWIDE_PRED> 1 "register_operand" "Upl")
+ (unspec:SVE_HSF
+ [(match_operand:<VWIDE> 2 "register_operand" "w")]
+ UNSPEC_FLOAT_CONVERT)]
+ UNSPEC_MERGE_PTRUE))]
"TARGET_SVE"
- "@
- <sur>dot\\t%0.<Vetype>, %1.<Vetype_fourth>, %2.<Vetype_fourth>
- movprfx\t%0, %3\;<sur>dot\\t%0.<Vetype>, %1.<Vetype_fourth>, %2.<Vetype_fourth>"
- [(set_attr "movprfx" "*,yes")]
+ "fcvt\t%0.<Vetype>, %1/m, %2.<Vewtype>"
)
-;; Unpredicated integer absolute difference.
-(define_expand "<su>abd<mode>_3"
- [(use (match_operand:SVE_I 0 "register_operand"))
- (USMAX:SVE_I (match_operand:SVE_I 1 "register_operand")
- (match_operand:SVE_I 2 "register_operand"))]
+;; -------------------------------------------------------------------------
+;; ---- [FP<-FP] Unpacks
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - FCVT
+;; -------------------------------------------------------------------------
+
+;; Unpack one half of a VNx4SF to VNx2DF, or one half of a VNx8HF to VNx4SF.
+;; First unpack the source without conversion, then float-convert the
+;; unpacked source.
+(define_expand "vec_unpacks_<perm_hilo>_<mode>"
+ [(match_operand:<VWIDE> 0 "register_operand")
+ (unspec:SVE_HSF [(match_operand:SVE_HSF 1 "register_operand")]
+ UNPACK_UNSIGNED)]
"TARGET_SVE"
{
- rtx pred = aarch64_ptrue_reg (<VPRED>mode);
- emit_insn (gen_aarch64_<su>abd<mode>_3 (operands[0], pred, operands[1],
- operands[2]));
+ /* Use ZIP to do the unpack, since we don't care about the upper halves
+ and since it has the nice property of not needing any subregs.
+ If using UUNPK* turns out to be preferable, we could model it as
+ a ZIP whose first operand is zero. */
+ rtx temp = gen_reg_rtx (<MODE>mode);
+ emit_insn ((<hi_lanes_optab>
+ ? gen_aarch64_sve_zip2<mode>
+ : gen_aarch64_sve_zip1<mode>)
+ (temp, operands[1], operands[1]));
+ rtx ptrue = aarch64_ptrue_reg (<VWIDE_PRED>mode);
+ emit_insn (gen_aarch64_sve_extend<mode><Vwide>2 (operands[0],
+ ptrue, temp));
DONE;
}
)
-;; Predicated integer absolute difference.
-(define_insn "aarch64_<su>abd<mode>_3"
- [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
- (unspec:SVE_I
- [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
- (minus:SVE_I
- (USMAX:SVE_I
- (match_operand:SVE_I 2 "register_operand" "0, w")
- (match_operand:SVE_I 3 "register_operand" "w, w"))
- (<max_opp>:SVE_I
- (match_dup 2)
- (match_dup 3)))]
+;; Conversion of SFs to the same number of DFs, or HFs to the same number
+;; of SFs.
+(define_insn "aarch64_sve_extend<mode><Vwide>2"
+ [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+ (unspec:<VWIDE>
+ [(match_operand:<VWIDE_PRED> 1 "register_operand" "Upl")
+ (unspec:<VWIDE>
+ [(match_operand:SVE_HSF 2 "register_operand" "w")]
+ UNSPEC_FLOAT_CONVERT)]
UNSPEC_MERGE_PTRUE))]
"TARGET_SVE"
- "@
- <su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
- movprfx\t%0, %2\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
- [(set_attr "movprfx" "*,yes")]
+ "fcvt\t%0.<Vewtype>, %1/m, %2.<Vetype>"
)
-;; Emit a sequence to produce a sum-of-absolute-differences of the inputs in
-;; operands 1 and 2. The sequence also has to perform a widening reduction of
-;; the difference into a vector and accumulate that into operand 3 before
-;; copying that into the result operand 0.
-;; Perform that with a sequence of:
-;; MOV ones.b, #1
-;; [SU]ABD diff.b, p0/m, op1.b, op2.b
-;; MOVPRFX op0, op3 // If necessary
-;; UDOT op0.s, diff.b, ones.b
+;; -------------------------------------------------------------------------
+;; ---- [PRED<-PRED] Packs
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - UZP1
+;; -------------------------------------------------------------------------
-(define_expand "<sur>sad<vsi2qi>"
- [(use (match_operand:SVE_SDI 0 "register_operand"))
- (unspec:<VSI2QI> [(use (match_operand:<VSI2QI> 1 "register_operand"))
- (use (match_operand:<VSI2QI> 2 "register_operand"))] ABAL)
- (use (match_operand:SVE_SDI 3 "register_operand"))]
+;; Predicate pack. Use UZP1 on the narrower type, which discards
+;; the high part of each wide element.
+(define_insn "vec_pack_trunc_<Vwide>"
+ [(set (match_operand:PRED_BHS 0 "register_operand" "=Upa")
+ (unspec:PRED_BHS
+ [(match_operand:<VWIDE> 1 "register_operand" "Upa")
+ (match_operand:<VWIDE> 2 "register_operand" "Upa")]
+ UNSPEC_PACK))]
"TARGET_SVE"
- {
- rtx ones = force_reg (<VSI2QI>mode, CONST1_RTX (<VSI2QI>mode));
- rtx diff = gen_reg_rtx (<VSI2QI>mode);
- emit_insn (gen_<sur>abd<vsi2qi>_3 (diff, operands[1], operands[2]));
- emit_insn (gen_udot_prod<vsi2qi> (operands[0], diff, ones, operands[3]));
- DONE;
- }
+ "uzp1\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
)
-;; Standard pattern name vec_init<mode><Vel>.
-(define_expand "vec_init<mode><Vel>"
- [(match_operand:SVE_ALL 0 "register_operand")
- (match_operand 1 "" "")]
+;; -------------------------------------------------------------------------
+;; ---- [PRED<-PRED] Unpacks
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - PUNPKHI
+;; - PUNPKLO
+;; -------------------------------------------------------------------------
+
+;; Unpack the low or high half of a predicate, where "high" refers to
+;; the low-numbered lanes for big-endian and the high-numbered lanes
+;; for little-endian.
+(define_expand "vec_unpack<su>_<perm_hilo>_<mode>"
+ [(match_operand:<VWIDE> 0 "register_operand")
+ (unspec:<VWIDE> [(match_operand:PRED_BHS 1 "register_operand")]
+ UNPACK)]
"TARGET_SVE"
{
- aarch64_sve_expand_vector_init (operands[0], operands[1]);
+ emit_insn ((<hi_lanes_optab>
+ ? gen_aarch64_sve_punpkhi_<PRED_BHS:mode>
+ : gen_aarch64_sve_punpklo_<PRED_BHS:mode>)
+ (operands[0], operands[1]));
DONE;
}
)
+
+(define_insn "aarch64_sve_punpk<perm_hilo>_<mode>"
+ [(set (match_operand:<VWIDE> 0 "register_operand" "=Upa")
+ (unspec:<VWIDE> [(match_operand:PRED_BHS 1 "register_operand" "Upa")]
+ UNPACK_UNSIGNED))]
+ "TARGET_SVE"
+ "punpk<perm_hilo>\t%0.h, %1.b"
+)