+2019-08-15 Richard Sandiford <richard.sandiford@arm.com>
+
+ * config/aarch64/aarch64-protos.h
+ (aarch64_sve_scalar_inc_dec_immediate_p): Declare.
+ (aarch64_sve_inc_dec_immediate_p): Rename to...
+ (aarch64_sve_vector_inc_dec_immediate_p): ...this.
+ (aarch64_output_sve_addvl_addpl): Take a single rtx argument.
+ (aarch64_output_sve_scalar_inc_dec): Declare.
+ (aarch64_output_sve_inc_dec_immediate): Rename to...
+ (aarch64_output_sve_vector_inc_dec): ...this.
+ * config/aarch64/aarch64.c (aarch64_sve_scalar_inc_dec_immediate_p)
+ (aarch64_output_sve_scalar_inc_dec): New functions.
+ (aarch64_output_sve_addvl_addpl): Remove the base and offset
+ arguments. Only handle true ADDVL and ADDPL instructions;
+ don't emit an INC or DEC.
+ (aarch64_sve_inc_dec_immediate_p): Rename to...
+ (aarch64_sve_vector_inc_dec_immediate_p): ...this.
+ (aarch64_output_sve_inc_dec_immediate): Rename to...
+ (aarch64_output_sve_vector_inc_dec): ...this. Update call to
+ aarch64_sve_vector_inc_dec_immediate_p.
+ * config/aarch64/predicates.md (aarch64_sve_scalar_inc_dec_immediate)
+ (aarch64_sve_plus_immediate): New predicates.
+ (aarch64_pluslong_operand): Accept aarch64_sve_plus_immediate
+ rather than aarch64_sve_addvl_addpl_immediate.
+ (aarch64_sve_inc_dec_immediate): Rename to...
+ (aarch64_sve_vector_inc_dec_immediate): ...this. Update call to
+ aarch64_sve_vector_inc_dec_immediate_p.
+ (aarch64_sve_add_operand): Update accordingly.
+ * config/aarch64/constraints.md (Uai): New constraint.
+ (vsi): Update call to aarch64_sve_vector_inc_dec_immediate_p.
+ * config/aarch64/aarch64.md (add<GPI:mode>3): Don't force the second
+ operand into a register if it satisfies aarch64_sve_plus_immediate.
+ (*add<GPI:mode>3_aarch64, *add<GPI:mode>3_poly_1): Add an alternative
+ for Uai. Update calls to aarch64_output_sve_addvl_addpl.
+ * config/aarch64/aarch64-sve.md (add<mode>3): Call
+ aarch64_output_sve_vector_inc_dec instead of
+ aarch64_output_sve_inc_dec_immediate.
+
2019-08-15 Richard Sandiford <richard.sandiford@arm.com>
* config/aarch64/iterators.md (UNSPEC_REVB, UNSPEC_REVH)
bool aarch64_move_imm (HOST_WIDE_INT, machine_mode);
opt_machine_mode aarch64_sve_pred_mode (unsigned int);
bool aarch64_sve_cnt_immediate_p (rtx);
+bool aarch64_sve_scalar_inc_dec_immediate_p (rtx);
bool aarch64_sve_addvl_addpl_immediate_p (rtx);
-bool aarch64_sve_inc_dec_immediate_p (rtx);
+bool aarch64_sve_vector_inc_dec_immediate_p (rtx);
int aarch64_add_offset_temporaries (rtx);
void aarch64_split_add_offset (scalar_int_mode, rtx, rtx, rtx, rtx, rtx);
bool aarch64_mov_operand_p (rtx, machine_mode);
bool aarch64_offset_7bit_signed_scaled_p (machine_mode, poly_int64);
bool aarch64_offset_9bit_signed_unscaled_p (machine_mode, poly_int64);
char *aarch64_output_sve_cnt_immediate (const char *, const char *, rtx);
-char *aarch64_output_sve_addvl_addpl (rtx, rtx, rtx);
-char *aarch64_output_sve_inc_dec_immediate (const char *, rtx);
+char *aarch64_output_sve_scalar_inc_dec (rtx);
+char *aarch64_output_sve_addvl_addpl (rtx);
+char *aarch64_output_sve_vector_inc_dec (const char *, rtx);
char *aarch64_output_scalar_simd_mov_immediate (rtx, scalar_int_mode);
char *aarch64_output_simd_mov_immediate (rtx, unsigned,
enum simd_immediate_check w = AARCH64_CHECK_MOV);
"@
add\t%0.<Vetype>, %0.<Vetype>, #%D2
sub\t%0.<Vetype>, %0.<Vetype>, #%N2
- * return aarch64_output_sve_inc_dec_immediate (\"%0.<Vetype>\", operands[2]);
+ * return aarch64_output_sve_vector_inc_dec (\"%0.<Vetype>\", operands[2]);
movprfx\t%0, %1\;add\t%0.<Vetype>, %0.<Vetype>, #%D2
movprfx\t%0, %1\;sub\t%0.<Vetype>, %0.<Vetype>, #%N2
add\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
value.coeffs[1], 0);
}
+/* Return true if we can add X using a single SVE INC or DEC instruction. */
+
+bool
+aarch64_sve_scalar_inc_dec_immediate_p (rtx x)
+{
+ poly_int64 value;
+ return (poly_int_rtx_p (x, &value)
+ && (aarch64_sve_cnt_immediate_p (value)
+ || aarch64_sve_cnt_immediate_p (-value)));
+}
+
+/* Return the asm string for adding SVE INC/DEC immediate OFFSET to
+ operand 0. */
+
+char *
+aarch64_output_sve_scalar_inc_dec (rtx offset)
+{
+ poly_int64 offset_value = rtx_to_poly_int64 (offset);
+ gcc_assert (offset_value.coeffs[0] == offset_value.coeffs[1]);
+ if (offset_value.coeffs[1] > 0)
+ return aarch64_output_sve_cnt_immediate ("inc", "%x0",
+ offset_value.coeffs[1], 0);
+ else
+ return aarch64_output_sve_cnt_immediate ("dec", "%x0",
+ -offset_value.coeffs[1], 0);
+}
+
/* Return true if we can add VALUE to a register using a single ADDVL
or ADDPL instruction. */
&& aarch64_sve_addvl_addpl_immediate_p (value));
}
-/* Return the asm string for adding ADDVL or ADDPL immediate X to operand 1
- and storing the result in operand 0. */
+/* Return the asm string for adding ADDVL or ADDPL immediate OFFSET
+ to operand 1 and storing the result in operand 0. */
char *
-aarch64_output_sve_addvl_addpl (rtx dest, rtx base, rtx offset)
+aarch64_output_sve_addvl_addpl (rtx offset)
{
static char buffer[sizeof ("addpl\t%x0, %x1, #-") + 3 * sizeof (int)];
poly_int64 offset_value = rtx_to_poly_int64 (offset);
gcc_assert (aarch64_sve_addvl_addpl_immediate_p (offset_value));
- /* Use INC or DEC if possible. */
- if (rtx_equal_p (dest, base) && GP_REGNUM_P (REGNO (dest)))
- {
- if (aarch64_sve_cnt_immediate_p (offset_value))
- return aarch64_output_sve_cnt_immediate ("inc", "%x0",
- offset_value.coeffs[1], 0);
- if (aarch64_sve_cnt_immediate_p (-offset_value))
- return aarch64_output_sve_cnt_immediate ("dec", "%x0",
- -offset_value.coeffs[1], 0);
- }
-
int factor = offset_value.coeffs[1];
if ((factor & 15) == 0)
snprintf (buffer, sizeof (buffer), "addvl\t%%x0, %%x1, #%d", factor / 16);
factor in *FACTOR_OUT (if nonnull). */
bool
-aarch64_sve_inc_dec_immediate_p (rtx x, int *factor_out,
- unsigned int *nelts_per_vq_out)
+aarch64_sve_vector_inc_dec_immediate_p (rtx x, int *factor_out,
+ unsigned int *nelts_per_vq_out)
{
rtx elt;
poly_int64 value;
instruction. */
bool
-aarch64_sve_inc_dec_immediate_p (rtx x)
+aarch64_sve_vector_inc_dec_immediate_p (rtx x)
{
- return aarch64_sve_inc_dec_immediate_p (x, NULL, NULL);
+ return aarch64_sve_vector_inc_dec_immediate_p (x, NULL, NULL);
}
/* Return the asm template for an SVE vector INC or DEC instruction.
value of the vector count operand itself. */
char *
-aarch64_output_sve_inc_dec_immediate (const char *operands, rtx x)
+aarch64_output_sve_vector_inc_dec (const char *operands, rtx x)
{
int factor;
unsigned int nelts_per_vq;
- if (!aarch64_sve_inc_dec_immediate_p (x, &factor, &nelts_per_vq))
+ if (!aarch64_sve_vector_inc_dec_immediate_p (x, &factor, &nelts_per_vq))
gcc_unreachable ();
if (factor < 0)
return aarch64_output_sve_cnt_immediate ("dec", operands, -factor,
/* If the constant is too large for a single instruction and isn't frame
based, split off the immediate so it is available for CSE. */
if (!aarch64_plus_immediate (operands[2], <MODE>mode)
+ && !(TARGET_SVE && aarch64_sve_plus_immediate (operands[2], <MODE>mode))
&& can_create_pseudo_p ()
&& (!REG_P (op1)
|| !REGNO_PTR_FRAME_P (REGNO (op1))))
(define_insn "*add<mode>3_aarch64"
[(set
- (match_operand:GPI 0 "register_operand" "=rk,rk,w,rk,r,rk")
+ (match_operand:GPI 0 "register_operand" "=rk,rk,w,rk,r,r,rk")
(plus:GPI
- (match_operand:GPI 1 "register_operand" "%rk,rk,w,rk,rk,rk")
- (match_operand:GPI 2 "aarch64_pluslong_operand" "I,r,w,J,Uaa,Uav")))]
+ (match_operand:GPI 1 "register_operand" "%rk,rk,w,rk,rk,0,rk")
+ (match_operand:GPI 2 "aarch64_pluslong_operand" "I,r,w,J,Uaa,Uai,Uav")))]
""
"@
add\\t%<w>0, %<w>1, %2
add\\t%<rtn>0<vas>, %<rtn>1<vas>, %<rtn>2<vas>
sub\\t%<w>0, %<w>1, #%n2
#
- * return aarch64_output_sve_addvl_addpl (operands[0], operands[1], operands[2]);"
- ;; The "alu_imm" type for ADDVL/ADDPL is just a placeholder.
- [(set_attr "type" "alu_imm,alu_sreg,neon_add,alu_imm,multiple,alu_imm")
- (set_attr "arch" "*,*,simd,*,*,*")]
+ * return aarch64_output_sve_scalar_inc_dec (operands[2]);
+ * return aarch64_output_sve_addvl_addpl (operands[2]);"
+ ;; The "alu_imm" types for INC/DEC and ADDVL/ADDPL are just placeholders.
+ [(set_attr "type" "alu_imm,alu_sreg,neon_add,alu_imm,multiple,alu_imm,alu_imm")
+ (set_attr "arch" "*,*,simd,*,*,sve,sve")]
)
;; zero_extend version of above
;; this pattern.
(define_insn_and_split "*add<mode>3_poly_1"
[(set
- (match_operand:GPI 0 "register_operand" "=r,r,r,r,r,&r")
+ (match_operand:GPI 0 "register_operand" "=r,r,r,r,r,r,&r")
(plus:GPI
- (match_operand:GPI 1 "register_operand" "%rk,rk,rk,rk,rk,rk")
- (match_operand:GPI 2 "aarch64_pluslong_or_poly_operand" "I,r,J,Uaa,Uav,Uat")))]
+ (match_operand:GPI 1 "register_operand" "%rk,rk,rk,rk,rk,0,rk")
+ (match_operand:GPI 2 "aarch64_pluslong_or_poly_operand" "I,r,J,Uaa,Uav,Uai,Uat")))]
"TARGET_SVE && operands[0] != stack_pointer_rtx"
"@
add\\t%<w>0, %<w>1, %2
add\\t%<w>0, %<w>1, %<w>2
sub\\t%<w>0, %<w>1, #%n2
#
- * return aarch64_output_sve_addvl_addpl (operands[0], operands[1], operands[2]);
+ * return aarch64_output_sve_scalar_inc_dec (operands[2]);
+ * return aarch64_output_sve_addvl_addpl (operands[2]);
#"
"&& epilogue_completed
&& !reg_overlap_mentioned_p (operands[0], operands[1])
operands[2], operands[0], NULL_RTX);
DONE;
}
- ;; The "alu_imm" type for ADDVL/ADDPL is just a placeholder.
- [(set_attr "type" "alu_imm,alu_sreg,alu_imm,multiple,alu_imm,multiple")]
+ ;; The "alu_imm" types for INC/DEC and ADDVL/ADDPL are just placeholders.
+ [(set_attr "type" "alu_imm,alu_sreg,alu_imm,multiple,alu_imm,alu_imm,multiple")]
)
(define_split
(and (match_code "const_int")
(match_test "aarch64_pluslong_strict_immedate (op, VOIDmode)")))
+(define_constraint "Uai"
+ "@internal
+ A constraint that matches a VG-based constant that can be added by
+ a single INC or DEC."
+ (match_operand 0 "aarch64_sve_scalar_inc_dec_immediate"))
+
(define_constraint "Uav"
"@internal
A constraint that matches a VG-based constant that can be added by
"@internal
A constraint that matches a vector count operand valid for SVE INC and
DEC instructions."
- (match_operand 0 "aarch64_sve_inc_dec_immediate"))
+ (match_operand 0 "aarch64_sve_vector_inc_dec_immediate"))
(define_constraint "vsn"
"@internal
(and (match_operand 0 "aarch64_pluslong_immediate")
(not (match_operand 0 "aarch64_plus_immediate"))))
+(define_predicate "aarch64_sve_scalar_inc_dec_immediate"
+ (and (match_code "const_poly_int")
+ (match_test "aarch64_sve_scalar_inc_dec_immediate_p (op)")))
+
(define_predicate "aarch64_sve_addvl_addpl_immediate"
(and (match_code "const_poly_int")
(match_test "aarch64_sve_addvl_addpl_immediate_p (op)")))
+(define_predicate "aarch64_sve_plus_immediate"
+ (ior (match_operand 0 "aarch64_sve_scalar_inc_dec_immediate")
+ (match_operand 0 "aarch64_sve_addvl_addpl_immediate")))
+
(define_predicate "aarch64_split_add_offset_immediate"
(and (match_code "const_poly_int")
(match_test "aarch64_add_offset_temporaries (op) == 1")))
(define_predicate "aarch64_pluslong_operand"
(ior (match_operand 0 "register_operand")
(match_operand 0 "aarch64_pluslong_immediate")
- (match_operand 0 "aarch64_sve_addvl_addpl_immediate")))
+ (and (match_test "TARGET_SVE")
+ (match_operand 0 "aarch64_sve_plus_immediate"))))
(define_predicate "aarch64_pluslong_or_poly_operand"
(ior (match_operand 0 "aarch64_pluslong_operand")
(and (match_code "const,const_vector")
(match_test "aarch64_sve_arith_immediate_p (op, true)")))
-(define_predicate "aarch64_sve_inc_dec_immediate"
+(define_predicate "aarch64_sve_vector_inc_dec_immediate"
(and (match_code "const,const_vector")
- (match_test "aarch64_sve_inc_dec_immediate_p (op)")))
+ (match_test "aarch64_sve_vector_inc_dec_immediate_p (op)")))
(define_predicate "aarch64_sve_uxtb_immediate"
(and (match_code "const_vector")
(define_predicate "aarch64_sve_add_operand"
(ior (match_operand 0 "aarch64_sve_arith_operand")
(match_operand 0 "aarch64_sve_sub_arith_immediate")
- (match_operand 0 "aarch64_sve_inc_dec_immediate")))
+ (match_operand 0 "aarch64_sve_vector_inc_dec_immediate")))
(define_predicate "aarch64_sve_pred_and_operand"
(ior (match_operand 0 "register_operand")