;; along with GCC; see the file COPYING3. If not see
;; <http://www.gnu.org/licenses/>.
+;; The file is organised into the following sections (search for the full
+;; line):
+;;
+;; == Uniform binary arithmnetic
+;; ---- [INT] Scaled high-part multiplication
+;; ---- [INT] General binary arithmetic that maps to unspecs
+;;
+;; == Uniform ternary arithmnetic
+;; ---- [INT] Ternary logic operations
+;; ---- [INT] Shift-and-accumulate operations
+;;
+;; == Extending arithmetic
+;; ---- [INT] Long binary arithmetic
+;;
+;; == Narrowing arithnetic
+;; ---- [INT] Narrowing right shifts
+;;
+;; == General
+;; ---- Check for aliases between pointers
+
+;; =========================================================================
+;; == Uniform binary arithmnetic
+;; =========================================================================
+
+;; -------------------------------------------------------------------------
+;; ---- [INT] Scaled high-part multiplication
+;; -------------------------------------------------------------------------
+;; The patterns in this section are synthetic.
+;; -------------------------------------------------------------------------
+
+;; Unpredicated integer multiply-high-with-(round-and-)scale.
+(define_expand "<su>mulh<r>s<mode>3"
+ [(set (match_operand:SVE_FULL_BHSI 0 "register_operand")
+ (unspec:SVE_FULL_BHSI
+ [(match_dup 3)
+ (unspec:SVE_FULL_BHSI
+ [(match_operand:SVE_FULL_BHSI 1 "register_operand")
+ (match_operand:SVE_FULL_BHSI 2 "register_operand")]
+ MULHRS)]
+ UNSPEC_PRED_X))]
+ "TARGET_SVE2"
+ {
+ operands[3] = aarch64_ptrue_reg (<VPRED>mode);
+
+ rtx prod_b = gen_reg_rtx (<VWIDE>mode);
+ rtx prod_t = gen_reg_rtx (<VWIDE>mode);
+ emit_insn (gen_<su>mullb<Vwide> (prod_b, operands[1], operands[2]));
+ emit_insn (gen_<su>mullt<Vwide> (prod_t, operands[1], operands[2]));
+
+ rtx shift = GEN_INT (GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1);
+ emit_insn (gen_<r>shrnb<mode> (operands[0], prod_b, shift));
+ emit_insn (gen_<r>shrnt<mode> (operands[0], operands[0], prod_t, shift));
+
+ DONE;
+ }
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [INT] General binary arithmetic that maps to unspecs
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - SHADD
+;; - SHSUB
+;; - SRHADD
+;; - UHADD
+;; - UHSUB
+;; - URHADD
+;; -------------------------------------------------------------------------
+
;; Integer average (floor).
(define_expand "<u>avg<mode>3_floor"
[(set (match_operand:SVE_FULL_I 0 "register_operand")
[(set_attr "movprfx" "*,yes")]
)
-;; Multiply long top / bottom.
-(define_insn "<su>mull<bt><Vwide>"
- [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
- (unspec:<VWIDE>
- [(match_operand:SVE_FULL_BHSI 1 "register_operand" "w")
- (match_operand:SVE_FULL_BHSI 2 "register_operand" "w")]
- MULLBT))]
- "TARGET_SVE2"
- "<su>mull<bt>\t%0.<Vewtype>, %1.<Vetype>, %2.<Vetype>"
-)
-
-;; (Rounding) Right shift narrow bottom.
-(define_insn "<r>shrnb<mode>"
- [(set (match_operand:SVE_FULL_BHSI 0 "register_operand" "=w")
- (unspec:SVE_FULL_BHSI
- [(match_operand:<VWIDE> 1 "register_operand" "w")
- (match_operand 2 "aarch64_simd_shift_imm_offset_<Vel>" "")]
- SHRNB))]
- "TARGET_SVE2"
- "<r>shrnb\t%0.<Vetype>, %1.<Vewtype>, #%2"
-)
-
-;; (Rounding) Right shift narrow top.
-(define_insn "<r>shrnt<mode>"
- [(set (match_operand:SVE_FULL_BHSI 0 "register_operand" "=w")
- (unspec:SVE_FULL_BHSI
- [(match_operand:SVE_FULL_BHSI 1 "register_operand" "0")
- (match_operand:<VWIDE> 2 "register_operand" "w")
- (match_operand 3 "aarch64_simd_shift_imm_offset_<Vel>" "i")]
- SHRNT))]
- "TARGET_SVE2"
- "<r>shrnt\t%0.<Vetype>, %2.<Vewtype>, #%3"
-)
-
-;; Unpredicated integer multiply-high-with-(round-and-)scale.
-(define_expand "<su>mulh<r>s<mode>3"
- [(set (match_operand:SVE_FULL_BHSI 0 "register_operand")
- (unspec:SVE_FULL_BHSI
- [(match_dup 3)
- (unspec:SVE_FULL_BHSI
- [(match_operand:SVE_FULL_BHSI 1 "register_operand")
- (match_operand:SVE_FULL_BHSI 2 "register_operand")]
- MULHRS)]
- UNSPEC_PRED_X))]
- "TARGET_SVE2"
- {
- operands[3] = aarch64_ptrue_reg (<VPRED>mode);
-
- rtx prod_b = gen_reg_rtx (<VWIDE>mode);
- rtx prod_t = gen_reg_rtx (<VWIDE>mode);
- emit_insn (gen_<su>mullb<Vwide> (prod_b, operands[1], operands[2]));
- emit_insn (gen_<su>mullt<Vwide> (prod_t, operands[1], operands[2]));
-
- rtx shift = GEN_INT (GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1);
- emit_insn (gen_<r>shrnb<mode> (operands[0], prod_b, shift));
- emit_insn (gen_<r>shrnt<mode> (operands[0], operands[0], prod_t, shift));
+;; =========================================================================
+;; == Uniform ternary arithmnetic
+;; =========================================================================
- DONE;
- }
-)
-
-;; Unpredicated signed / unsigned shift-right accumulate.
-(define_insn_and_rewrite "*aarch64_sve2_sra<mode>"
- [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w")
- (plus:SVE_FULL_I
- (unspec:SVE_FULL_I
- [(match_operand 4)
- (SHIFTRT:SVE_FULL_I
- (match_operand:SVE_FULL_I 2 "register_operand" "w")
- (match_operand:SVE_FULL_I 3 "aarch64_simd_rshift_imm" "Dr"))]
- UNSPEC_PRED_X)
- (match_operand:SVE_FULL_I 1 "register_operand" "0")))]
- "TARGET_SVE2"
- "<sra_op>sra\t%0.<Vetype>, %2.<Vetype>, #%3"
- "&& !CONSTANT_P (operands[4])"
- {
- operands[4] = CONSTM1_RTX (<VPRED>mode);
- }
-)
+;; -------------------------------------------------------------------------
+;; ---- [INT] Ternary logic operations
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - BSL
+;; - BSL1N
+;; - BSL2N
+;; - EOR3
+;; - NBSL
+;; -------------------------------------------------------------------------
;; Unpredicated 3-way exclusive OR.
(define_insn "*aarch64_sve2_eor3<mode>"
[(set_attr "movprfx" "*,yes")]
)
+;; -------------------------------------------------------------------------
+;; ---- [INT] Shift-and-accumulate operations
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - SSRA
+;; - USRA
+;; -------------------------------------------------------------------------
+
+;; Unpredicated signed / unsigned shift-right accumulate.
+(define_insn_and_rewrite "*aarch64_sve2_sra<mode>"
+ [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w")
+ (plus:SVE_FULL_I
+ (unspec:SVE_FULL_I
+ [(match_operand 4)
+ (SHIFTRT:SVE_FULL_I
+ (match_operand:SVE_FULL_I 2 "register_operand" "w")
+ (match_operand:SVE_FULL_I 3 "aarch64_simd_rshift_imm" "Dr"))]
+ UNSPEC_PRED_X)
+ (match_operand:SVE_FULL_I 1 "register_operand" "0")))]
+ "TARGET_SVE2"
+ "<sra_op>sra\t%0.<Vetype>, %2.<Vetype>, #%3"
+ "&& !CONSTANT_P (operands[4])"
+ {
+ operands[4] = CONSTM1_RTX (<VPRED>mode);
+ }
+)
+
+;; =========================================================================
+;; == Extending arithmetic
+;; =========================================================================
+
+;; -------------------------------------------------------------------------
+;; ---- [INT] Long binary arithmetic
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - SMULLB
+;; - SMULLT
+;; - UMULLB
+;; - UMULLT
+;; -------------------------------------------------------------------------
+
+;; Multiply long top / bottom.
+(define_insn "<su>mull<bt><Vwide>"
+ [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+ (unspec:<VWIDE>
+ [(match_operand:SVE_FULL_BHSI 1 "register_operand" "w")
+ (match_operand:SVE_FULL_BHSI 2 "register_operand" "w")]
+ MULLBT))]
+ "TARGET_SVE2"
+ "<su>mull<bt>\t%0.<Vewtype>, %1.<Vetype>, %2.<Vetype>"
+)
+
+;; =========================================================================
+;; == Narrowing arithnetic
+;; =========================================================================
+
+;; -------------------------------------------------------------------------
+;; ---- [INT] Narrowing right shifts
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - RSHRNB
+;; - RSHRNT
+;; - SHRNB
+;; - SHRNT
+;; -------------------------------------------------------------------------
+
+;; (Rounding) Right shift narrow bottom.
+(define_insn "<r>shrnb<mode>"
+ [(set (match_operand:SVE_FULL_BHSI 0 "register_operand" "=w")
+ (unspec:SVE_FULL_BHSI
+ [(match_operand:<VWIDE> 1 "register_operand" "w")
+ (match_operand 2 "aarch64_simd_shift_imm_offset_<Vel>" "")]
+ SHRNB))]
+ "TARGET_SVE2"
+ "<r>shrnb\t%0.<Vetype>, %1.<Vewtype>, #%2"
+)
+
+;; (Rounding) Right shift narrow top.
+(define_insn "<r>shrnt<mode>"
+ [(set (match_operand:SVE_FULL_BHSI 0 "register_operand" "=w")
+ (unspec:SVE_FULL_BHSI
+ [(match_operand:SVE_FULL_BHSI 1 "register_operand" "0")
+ (match_operand:<VWIDE> 2 "register_operand" "w")
+ (match_operand 3 "aarch64_simd_shift_imm_offset_<Vel>" "i")]
+ SHRNT))]
+ "TARGET_SVE2"
+ "<r>shrnt\t%0.<Vetype>, %2.<Vewtype>, #%3"
+)
+
+;; =========================================================================
+;; == General
+;; =========================================================================
+
+;; -------------------------------------------------------------------------
+;; ---- Check for aliases between pointers
+;; -------------------------------------------------------------------------
+;; The patterns in this section are synthetic: WHILERW and WHILEWR are
+;; defined in aarch64-sve.md instead.
+;; -------------------------------------------------------------------------
+
;; Use WHILERW and WHILEWR to accelerate alias checks. This is only
;; possible if the accesses we're checking are exactly the same size
;; as an SVE vector.