else
FAIL;
})
+
+(define_peephole2
+ [(match_scratch:DI 8 "r")
+ (set (match_operand:VP_2E 0 "memory_operand" "")
+ (match_operand:VP_2E 1 "aarch64_reg_or_zero" ""))
+ (set (match_operand:VP_2E 2 "memory_operand" "")
+ (match_operand:VP_2E 3 "aarch64_reg_or_zero" ""))
+ (set (match_operand:VP_2E 4 "memory_operand" "")
+ (match_operand:VP_2E 5 "aarch64_reg_or_zero" ""))
+ (set (match_operand:VP_2E 6 "memory_operand" "")
+ (match_operand:VP_2E 7 "aarch64_reg_or_zero" ""))
+ (match_dup 8)]
+ "TARGET_SIMD
+ && aarch64_operands_adjust_ok_for_ldpstp (operands, false, <MODE>mode)"
+ [(const_int 0)]
+{
+ if (aarch64_gen_adjusted_ldpstp (operands, false, <MODE>mode, UNKNOWN))
+ DONE;
+ else
+ FAIL;
+})
+
+(define_peephole2
+ [(match_scratch:DI 8 "r")
+ (set (match_operand:VP_2E 0 "register_operand" "")
+ (match_operand:VP_2E 1 "memory_operand" ""))
+ (set (match_operand:VP_2E 2 "register_operand" "")
+ (match_operand:VP_2E 3 "memory_operand" ""))
+ (set (match_operand:VP_2E 4 "register_operand" "")
+ (match_operand:VP_2E 5 "memory_operand" ""))
+ (set (match_operand:VP_2E 6 "register_operand" "")
+ (match_operand:VP_2E 7 "memory_operand" ""))
+ (match_dup 8)]
+ "TARGET_SIMD
+ && aarch64_operands_adjust_ok_for_ldpstp (operands, true, <MODE>mode)"
+ [(const_int 0)]
+{
+ if (aarch64_gen_adjusted_ldpstp (operands, true, <MODE>mode, UNKNOWN))
+ DONE;
+ else
+ FAIL;
+})
void aarch64_split_atomic_op (enum rtx_code, rtx, rtx, rtx, rtx, rtx, rtx);
-bool aarch64_gen_adjusted_ldpstp (rtx *, bool, scalar_mode, RTX_CODE);
+bool aarch64_gen_adjusted_ldpstp (rtx *, bool, machine_mode, RTX_CODE);
void aarch64_expand_sve_vec_cmp_int (rtx, rtx_code, rtx, rtx);
bool aarch64_expand_sve_vec_cmp_float (rtx, rtx_code, rtx, rtx, bool);
bool extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset);
bool aarch64_operands_ok_for_ldpstp (rtx *, bool, machine_mode);
-bool aarch64_operands_adjust_ok_for_ldpstp (rtx *, bool, scalar_mode);
+bool aarch64_operands_adjust_ok_for_ldpstp (rtx *, bool, machine_mode);
void aarch64_swap_ldrstr_operands (rtx *, bool);
extern void aarch64_asm_output_pool_epilogue (FILE *, const char *,
bool
aarch64_operands_adjust_ok_for_ldpstp (rtx *operands, bool load,
- scalar_mode mode)
+ machine_mode mode)
{
const int num_insns = 4;
enum reg_class rclass;
for (int i = 0; i < num_insns; i++)
offvals[i] = INTVAL (offset[i]);
- msize = GET_MODE_SIZE (mode);
+ msize = GET_MODE_SIZE (mode).to_constant ();
/* Check if the offsets can be put in the right order to do a ldp/stp. */
qsort (offvals, num_insns, sizeof (HOST_WIDE_INT),
bool
aarch64_gen_adjusted_ldpstp (rtx *operands, bool load,
- scalar_mode mode, RTX_CODE code)
+ machine_mode mode, RTX_CODE code)
{
rtx base, offset_1, offset_3, t1, t2;
rtx mem_1, mem_2, mem_3, mem_4;
&& offset_3 != NULL_RTX);
/* Adjust offset so it can fit in LDP/STP instruction. */
- msize = GET_MODE_SIZE (mode);
+ msize = GET_MODE_SIZE (mode).to_constant();
stp_off_upper_limit = msize * (0x40 - 1);
stp_off_lower_limit = - msize * 0x40;
;; Copy of the above.
(define_mode_iterator DREG2 [V8QI V4HI V4HF V2SI V2SF DF])
+;; All modes suitable to store/load pair (2 elements) using STP/LDP.
+(define_mode_iterator VP_2E [V2SI V2SF V2DI V2DF])
+
;; Advanced SIMD, 64-bit container, all integer modes.
(define_mode_iterator VD_BHSI [V8QI V4HI V2SI])
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+typedef float __attribute__((vector_size(8))) vec;
+
+vec
+load_long(vec *v) {
+ return v[110] + v[111] + v[112] + v[113];
+}
+
+/* { dg-final { scan-assembler {add\tx[0-9]+, x[0-9]+, 880} } } */
+/* { dg-final { scan-assembler {ldp\td[0-9]+, d[0-9]+, \[x[0-9]+\]} } } */
+/* { dg-final { scan-assembler {ldp\td[0-9]+, d[0-9]+, \[x[0-9]+, 16\]} } } */
+/* { dg-final { scan-assembler-not "ldr\t" } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+typedef int __attribute__((vector_size(8))) vec;
+
+vec
+load_long(vec *v) {
+ return v[110] + v[111] + v[112] + v[113];
+}
+
+/* { dg-final { scan-assembler {add\tx[0-9]+, x[0-9]+, 880} } } */
+/* { dg-final { scan-assembler {ldp\td[0-9]+, d[0-9]+, \[x[0-9]+\]} } } */
+/* { dg-final { scan-assembler {ldp\td[0-9]+, d[0-9]+, \[x[0-9]+, 16\]} } } */
+/* { dg-final { scan-assembler-not "ldr\t" } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+typedef double __attribute__((vector_size(16))) vec;
+
+void
+store_adjusted(vec *out, vec x, vec y)
+{
+ out[100] = x;
+ out[101] = y;
+ out[102] = y;
+ out[103] = x;
+}
+
+/* { dg-final { scan-assembler {add\tx[0-9]+, x[0-9]+, 1600} } } */
+/* { dg-final { scan-assembler {stp\tq[0-9]+, q[0-9]+, \[x[0-9]+\]} } } */
+/* { dg-final { scan-assembler {stp\tq[0-9]+, q[0-9]+, \[x[0-9]+, 32\]} } } */
+/* { dg-final { scan-assembler-not "str\t" } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+typedef long long __attribute__((vector_size(16))) vec;
+
+void
+store_adjusted(vec *out, vec x, vec y)
+{
+ out[100] = x;
+ out[101] = y;
+ out[102] = y;
+ out[103] = x;
+}
+
+/* { dg-final { scan-assembler {add\tx[0-9]+, x[0-9]+, 1600} } } */
+/* { dg-final { scan-assembler {stp\tq[0-9]+, q[0-9]+, \[x[0-9]+\]} } } */
+/* { dg-final { scan-assembler {stp\tq[0-9]+, q[0-9]+, \[x[0-9]+, 32\]} } } */
+/* { dg-final { scan-assembler-not "str\t" } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+typedef float __attribute__((vector_size(8))) vec;
+
+void
+store_adjusted(vec *out, vec x, vec y)
+{
+ out[400] = x;
+ out[401] = y;
+ out[402] = y;
+ out[403] = x;
+}
+
+/* { dg-final { scan-assembler {add\tx[0-9]+, x[0-9]+, 3200} } } */
+/* { dg-final { scan-assembler {stp\td[0-9]+, d[0-9]+, \[x[0-9]+\]} } } */
+/* { dg-final { scan-assembler {stp\td[0-9]+, d[0-9]+, \[x[0-9]+, 16\]} } } */
+/* { dg-final { scan-assembler-not "str\t" } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+typedef int __attribute__((vector_size(8))) vec;
+
+void
+store_adjusted(vec *out, vec x, vec y)
+{
+ out[400] = x;
+ out[401] = y;
+ out[402] = y;
+ out[403] = x;
+}
+
+/* { dg-final { scan-assembler {add\tx[0-9]+, x[0-9]+, 3200} } } */
+/* { dg-final { scan-assembler {stp\td[0-9]+, d[0-9]+, \[x[0-9]+\]} } } */
+/* { dg-final { scan-assembler {stp\td[0-9]+, d[0-9]+, \[x[0-9]+, 16\]} } } */
+/* { dg-final { scan-assembler-not "str\t" } } */