+2019-02-07 Matthew Malcomson <matthew.malcomson@arm.com>
+ Jakub Jelinek <jakub@redhat.com>
+
+ PR bootstrap/88714
+ * config/arm/arm-protos.h (valid_operands_ldrd_strd,
+ arm_count_ldrdstrd_insns): New declarations.
+ * config/arm/arm.c (mem_ok_for_ldrd_strd): Remove broken handling of
+ MINUS.
+ (valid_operands_ldrd_strd): New function.
+ (arm_count_ldrdstrd_insns): New function.
+ * config/arm/ldrdstrd.md: Change peepholes to generate PARALLEL SImode
+ sets instead of single DImode set and define new insns to match this.
+
2019-02-07 Tamar Christina <tamar.christina@arm.com>
* config/aarch64/aarch64-builtins.c (aarch64_fcmla_lane_builtin_data):
extern bool offset_ok_for_ldrd_strd (HOST_WIDE_INT);
extern bool operands_ok_ldrd_strd (rtx, rtx, rtx, HOST_WIDE_INT, bool, bool);
extern bool gen_operands_ldrd_strd (rtx *, bool, bool, bool);
+extern bool valid_operands_ldrd_strd (rtx *, bool);
extern int arm_gen_movmemqi (rtx *);
extern bool gen_movmem_ldrd_strd (rtx *);
extern machine_mode arm_select_cc_mode (RTX_CODE, rtx, rtx);
extern const char *output_move_double (rtx *, bool, int *count);
extern const char *output_move_quad (rtx *);
extern int arm_count_output_move_double_insns (rtx *);
+extern int arm_count_ldrdstrd_insns (rtx *, bool);
extern const char *output_move_vfp (rtx *operands);
extern const char *output_move_neon (rtx *operands);
extern int arm_attr_length_move_neon (rtx_insn *);
*base = addr;
return true;
}
- else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == MINUS)
+ else if (GET_CODE (addr) == PLUS)
{
*base = XEXP (addr, 0);
*offset = XEXP (addr, 1);
}
/* Make sure accesses are to consecutive memory locations. */
- if (gap != 4)
+ if (gap != GET_MODE_SIZE (SImode))
return false;
if (!align_ok_ldrd_strd (align[0], offset))
}
+/* Return true if parallel execution of the two word-size accesses provided
+ could be satisfied with a single LDRD/STRD instruction. Two word-size
+ accesses are represented by the OPERANDS array, where OPERANDS[0,1] are
+ register operands and OPERANDS[2,3] are the corresponding memory operands.
+ */
+bool
+valid_operands_ldrd_strd (rtx *operands, bool load)
+{
+ int nops = 2;
+ HOST_WIDE_INT offsets[2], offset, align[2];
+ rtx base = NULL_RTX;
+ rtx cur_base, cur_offset;
+ int i, gap;
+
+ /* Check that the memory references are immediate offsets from the
+ same base register. Extract the base register, the destination
+ registers, and the corresponding memory offsets. */
+ for (i = 0; i < nops; i++)
+ {
+ if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset,
+ &align[i]))
+ return false;
+
+ if (i == 0)
+ base = cur_base;
+ else if (REGNO (base) != REGNO (cur_base))
+ return false;
+
+ offsets[i] = INTVAL (cur_offset);
+ if (GET_CODE (operands[i]) == SUBREG)
+ return false;
+ }
+
+ if (offsets[0] > offsets[1])
+ return false;
+
+ gap = offsets[1] - offsets[0];
+ offset = offsets[0];
+
+ /* Make sure accesses are to consecutive memory locations. */
+ if (gap != GET_MODE_SIZE (SImode))
+ return false;
+
+ if (!align_ok_ldrd_strd (align[0], offset))
+ return false;
+
+ return operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
+ false, load);
+}
\f
/* Print a symbolic form of X to the debug file, F. */
return count;
}
+/* Same as above, but operands are a register/memory pair in SImode.
+ Assumes operands has the base register in position 0 and memory in position
+ 2 (which is the order provided by the arm_{ldrd,strd} patterns). */
+int
+arm_count_ldrdstrd_insns (rtx *operands, bool load)
+{
+ int count;
+ rtx ops[2];
+ int regnum, memnum;
+ if (load)
+ regnum = 0, memnum = 1;
+ else
+ regnum = 1, memnum = 0;
+ ops[regnum] = gen_rtx_REG (DImode, REGNO (operands[0]));
+ ops[memnum] = adjust_address (operands[2], DImode, 0);
+ output_move_double (ops, false, &count);
+ return count;
+}
+
+
int
vfp3_const_double_for_fract_bits (rtx operand)
{
;; The following peephole optimizations identify consecutive memory
;; accesses, and try to rearrange the operands to enable generation of
;; ldrd/strd.
+;;
+;; In many cases they behave in the same way that patterns in ldmstm.md behave,
+;; but there is extra logic in gen_operands_ldrd_strd to try and ensure the
+;; registers used are an (r<N>, r<N + 1>) pair where N is even.
(define_peephole2 ; ldrd
[(set (match_operand:SI 0 "arm_general_register_operand" "")
- (match_operand:SI 2 "memory_operand" ""))
+ (match_operand:SI 2 "memory_operand" ""))
(set (match_operand:SI 1 "arm_general_register_operand" "")
- (match_operand:SI 3 "memory_operand" ""))]
+ (match_operand:SI 3 "memory_operand" ""))]
"TARGET_LDRD"
- [(const_int 0)]
+ [(parallel [(set (match_dup 0) (match_dup 2))
+ (set (match_dup 1) (match_dup 3))])]
{
if (!gen_operands_ldrd_strd (operands, true, false, false))
FAIL;
- else if (TARGET_ARM)
- {
- /* In ARM state, the destination registers of LDRD/STRD must be
- consecutive. We emit DImode access. */
- operands[0] = gen_rtx_REG (DImode, REGNO (operands[0]));
- operands[2] = adjust_address (operands[2], DImode, 0);
- /* Emit [(set (match_dup 0) (match_dup 2))] */
- emit_insn (gen_rtx_SET (operands[0], operands[2]));
- DONE;
- }
- else if (TARGET_THUMB2)
- {
- /* Emit the pattern:
- [(parallel [(set (match_dup 0) (match_dup 2))
- (set (match_dup 1) (match_dup 3))])] */
- rtx t1 = gen_rtx_SET (operands[0], operands[2]);
- rtx t2 = gen_rtx_SET (operands[1], operands[3]);
- emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, t1, t2)));
- DONE;
- }
})
(define_peephole2 ; strd
(set (match_operand:SI 3 "memory_operand" "")
(match_operand:SI 1 "arm_general_register_operand" ""))]
"TARGET_LDRD"
- [(const_int 0)]
+ [(parallel [(set (match_dup 2) (match_dup 0))
+ (set (match_dup 3) (match_dup 1))])]
{
if (!gen_operands_ldrd_strd (operands, false, false, false))
FAIL;
- else if (TARGET_ARM)
- {
- /* In ARM state, the destination registers of LDRD/STRD must be
- consecutive. We emit DImode access. */
- operands[0] = gen_rtx_REG (DImode, REGNO (operands[0]));
- operands[2] = adjust_address (operands[2], DImode, 0);
- /* Emit [(set (match_dup 2) (match_dup 0))] */
- emit_insn (gen_rtx_SET (operands[2], operands[0]));
- DONE;
- }
- else if (TARGET_THUMB2)
- {
- /* Emit the pattern:
- [(parallel [(set (match_dup 2) (match_dup 0))
- (set (match_dup 3) (match_dup 1))])] */
- rtx t1 = gen_rtx_SET (operands[2], operands[0]);
- rtx t2 = gen_rtx_SET (operands[3], operands[1]);
- emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, t1, t2)));
- DONE;
- }
})
;; The following peepholes reorder registers to enable LDRD/STRD.
(define_peephole2 ; strd of constants
[(set (match_operand:SI 0 "arm_general_register_operand" "")
- (match_operand:SI 4 "const_int_operand" ""))
+ (match_operand:SI 4 "const_int_operand" ""))
(set (match_operand:SI 2 "memory_operand" "")
- (match_dup 0))
+ (match_dup 0))
(set (match_operand:SI 1 "arm_general_register_operand" "")
- (match_operand:SI 5 "const_int_operand" ""))
+ (match_operand:SI 5 "const_int_operand" ""))
(set (match_operand:SI 3 "memory_operand" "")
- (match_dup 1))]
+ (match_dup 1))]
"TARGET_LDRD"
- [(const_int 0)]
+ [(set (match_dup 0) (match_dup 4))
+ (set (match_dup 1) (match_dup 5))
+ (parallel [(set (match_dup 2) (match_dup 0))
+ (set (match_dup 3) (match_dup 1))])]
{
if (!gen_operands_ldrd_strd (operands, false, true, false))
FAIL;
- else if (TARGET_ARM)
- {
- rtx tmp = gen_rtx_REG (DImode, REGNO (operands[0]));
- operands[2] = adjust_address (operands[2], DImode, 0);
- /* Emit the pattern:
- [(set (match_dup 0) (match_dup 4))
- (set (match_dup 1) (match_dup 5))
- (set (match_dup 2) tmp)] */
- emit_insn (gen_rtx_SET (operands[0], operands[4]));
- emit_insn (gen_rtx_SET (operands[1], operands[5]));
- emit_insn (gen_rtx_SET (operands[2], tmp));
- DONE;
- }
- else if (TARGET_THUMB2)
- {
- /* Emit the pattern:
- [(set (match_dup 0) (match_dup 4))
- (set (match_dup 1) (match_dup 5))
- (parallel [(set (match_dup 2) (match_dup 0))
- (set (match_dup 3) (match_dup 1))])] */
- emit_insn (gen_rtx_SET (operands[0], operands[4]));
- emit_insn (gen_rtx_SET (operands[1], operands[5]));
- rtx t1 = gen_rtx_SET (operands[2], operands[0]);
- rtx t2 = gen_rtx_SET (operands[3], operands[1]);
- emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, t1, t2)));
- DONE;
- }
})
(define_peephole2 ; strd of constants
[(set (match_operand:SI 0 "arm_general_register_operand" "")
- (match_operand:SI 4 "const_int_operand" ""))
+ (match_operand:SI 4 "const_int_operand" ""))
(set (match_operand:SI 1 "arm_general_register_operand" "")
- (match_operand:SI 5 "const_int_operand" ""))
+ (match_operand:SI 5 "const_int_operand" ""))
(set (match_operand:SI 2 "memory_operand" "")
- (match_dup 0))
+ (match_dup 0))
(set (match_operand:SI 3 "memory_operand" "")
- (match_dup 1))]
+ (match_dup 1))]
"TARGET_LDRD"
- [(const_int 0)]
+ [(set (match_dup 0) (match_dup 4))
+ (set (match_dup 1) (match_dup 5))
+ (parallel [(set (match_dup 2) (match_dup 0))
+ (set (match_dup 3) (match_dup 1))])]
{
if (!gen_operands_ldrd_strd (operands, false, true, false))
FAIL;
- else if (TARGET_ARM)
- {
- rtx tmp = gen_rtx_REG (DImode, REGNO (operands[0]));
- operands[2] = adjust_address (operands[2], DImode, 0);
- /* Emit the pattern
- [(set (match_dup 0) (match_dup 4))
- (set (match_dup 1) (match_dup 5))
- (set (match_dup 2) tmp)] */
- emit_insn (gen_rtx_SET (operands[0], operands[4]));
- emit_insn (gen_rtx_SET (operands[1], operands[5]));
- emit_insn (gen_rtx_SET (operands[2], tmp));
- DONE;
- }
- else if (TARGET_THUMB2)
- {
- /* Emit the pattern:
- [(set (match_dup 0) (match_dup 4))
- (set (match_dup 1) (match_dup 5))
- (parallel [(set (match_dup 2) (match_dup 0))
- (set (match_dup 3) (match_dup 1))])] */
- emit_insn (gen_rtx_SET (operands[0], operands[4]));
- emit_insn (gen_rtx_SET (operands[1], operands[5]));
- rtx t1 = gen_rtx_SET (operands[2], operands[0]);
- rtx t2 = gen_rtx_SET (operands[3], operands[1]);
- emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, t1, t2)));
- DONE;
- }
})
;; The following two peephole optimizations are only relevant for ARM
(define_peephole2 ; swap the destination registers of two loads
; before a commutative operation.
[(set (match_operand:SI 0 "arm_general_register_operand" "")
- (match_operand:SI 2 "memory_operand" ""))
+ (match_operand:SI 2 "memory_operand" ""))
(set (match_operand:SI 1 "arm_general_register_operand" "")
- (match_operand:SI 3 "memory_operand" ""))
+ (match_operand:SI 3 "memory_operand" ""))
(set (match_operand:SI 4 "arm_general_register_operand" "")
- (match_operator:SI 5 "commutative_binary_operator"
+ (match_operator:SI 5 "commutative_binary_operator"
[(match_operand 6 "arm_general_register_operand" "")
(match_operand 7 "arm_general_register_operand" "") ]))]
"TARGET_LDRD && TARGET_ARM
&& ( ((rtx_equal_p(operands[0], operands[6])) && (rtx_equal_p(operands[1], operands[7])))
- ||((rtx_equal_p(operands[0], operands[7])) && (rtx_equal_p(operands[1], operands[6]))))
+ ||((rtx_equal_p(operands[0], operands[7])) && (rtx_equal_p(operands[1], operands[6]))))
&& (peep2_reg_dead_p (3, operands[0]) || rtx_equal_p (operands[0], operands[4]))
&& (peep2_reg_dead_p (3, operands[1]) || rtx_equal_p (operands[1], operands[4]))"
- [(set (match_dup 0) (match_dup 2))
+ [(parallel [(set (match_dup 0) (match_dup 2))
+ (set (match_dup 1) (match_dup 3))])
(set (match_dup 4) (match_op_dup 5 [(match_dup 6) (match_dup 7)]))]
- {
- if (!gen_operands_ldrd_strd (operands, true, false, true))
- {
- FAIL;
- }
- else
- {
- operands[0] = gen_rtx_REG (DImode, REGNO (operands[0]));
- operands[2] = adjust_address (operands[2], DImode, 0);
- }
- }
-)
+{
+ if (!gen_operands_ldrd_strd (operands, true, false, true))
+ FAIL;
+})
(define_peephole2 ; swap the destination registers of two loads
; before a commutative operation that sets the flags.
[(set (match_operand:SI 0 "arm_general_register_operand" "")
- (match_operand:SI 2 "memory_operand" ""))
+ (match_operand:SI 2 "memory_operand" ""))
(set (match_operand:SI 1 "arm_general_register_operand" "")
- (match_operand:SI 3 "memory_operand" ""))
+ (match_operand:SI 3 "memory_operand" ""))
(parallel
[(set (match_operand:SI 4 "arm_general_register_operand" "")
(match_operator:SI 5 "commutative_binary_operator"
||((rtx_equal_p(operands[0], operands[7])) && (rtx_equal_p(operands[1], operands[6]))))
&& (peep2_reg_dead_p (3, operands[0]) || rtx_equal_p (operands[0], operands[4]))
&& (peep2_reg_dead_p (3, operands[1]) || rtx_equal_p (operands[1], operands[4]))"
- [(set (match_dup 0) (match_dup 2))
+ [(parallel [(set (match_dup 0) (match_dup 2))
+ (set (match_dup 1) (match_dup 3))])
(parallel
[(set (match_dup 4)
(match_op_dup 5 [(match_dup 6) (match_dup 7)]))
(clobber (reg:CC CC_REGNUM))])]
- {
- if (!gen_operands_ldrd_strd (operands, true, false, true))
- {
- FAIL;
- }
- else
- {
- operands[0] = gen_rtx_REG (DImode, REGNO (operands[0]));
- operands[2] = adjust_address (operands[2], DImode, 0);
- }
- }
-)
+{
+ if (!gen_operands_ldrd_strd (operands, true, false, true))
+ FAIL;
+})
;; TODO: Handle LDRD/STRD with writeback:
;; (a) memory operands can be POST_INC, POST_DEC, PRE_MODIFY, POST_MODIFY
;; (b) Patterns may be followed by an update of the base address.
+
+
+;; insns matching the LDRD/STRD patterns that will get created by the above
+;; peepholes.
+;; We use gen_operands_ldrd_strd() with a modify argument as false so that the
+;; operands are not changed.
+(define_insn "*arm_ldrd"
+ [(parallel [(set (match_operand:SI 0 "s_register_operand" "=r")
+ (match_operand:SI 2 "memory_operand" "m"))
+ (set (match_operand:SI 1 "s_register_operand" "=r")
+ (match_operand:SI 3 "memory_operand" "m"))])]
+ "TARGET_LDRD && TARGET_ARM && reload_completed
+ && valid_operands_ldrd_strd (operands, true)"
+ {
+ rtx op[2];
+ op[0] = gen_rtx_REG (DImode, REGNO (operands[0]));
+ op[1] = adjust_address (operands[2], DImode, 0);
+ return output_move_double (op, true, NULL);
+ }
+ [(set (attr "length")
+ (symbol_ref "arm_count_ldrdstrd_insns (operands, true) * 4"))
+ (set (attr "ce_count") (symbol_ref "get_attr_length (insn) / 4"))
+ (set_attr "type" "load_8")
+ (set_attr "predicable" "yes")]
+)
+
+(define_insn "*arm_strd"
+ [(parallel [(set (match_operand:SI 2 "memory_operand" "=m")
+ (match_operand:SI 0 "s_register_operand" "r"))
+ (set (match_operand:SI 3 "memory_operand" "=m")
+ (match_operand:SI 1 "s_register_operand" "r"))])]
+ "TARGET_LDRD && TARGET_ARM && reload_completed
+ && valid_operands_ldrd_strd (operands, false)"
+ {
+ rtx op[2];
+ op[0] = adjust_address (operands[2], DImode, 0);
+ op[1] = gen_rtx_REG (DImode, REGNO (operands[0]));
+ return output_move_double (op, true, NULL);
+ }
+ [(set (attr "length")
+ (symbol_ref "arm_count_ldrdstrd_insns (operands, false) * 4"))
+ (set (attr "ce_count") (symbol_ref "get_attr_length (insn) / 4"))
+ (set_attr "type" "store_8")
+ (set_attr "predicable" "yes")]
+)
+2019-02-07 Matthew Malcomson <matthew.malcomson@arm.com>
+ Jakub Jelinek <jakub@redhat.com>
+
+ PR bootstrap/88714
+ * gcc.c-torture/execute/pr88714.c: New test.
+ * gcc.dg/rtl/arm/ldrd-peepholes.c: New test.
+
2019-02-07 Tamar Christina <tamar.christina@arm.com>
PR/target 88850
--- /dev/null
+/* PR bootstrap/88714 */
+
+struct S { int a, b, c; int *d; };
+struct T { int *e, *f, *g; } *t = 0;
+int *o = 0;
+
+__attribute__((noipa))
+void bar (int *x, int y, int z, int w)
+{
+ if (w == -1)
+ {
+ if (x != 0 || y != 0 || z != 0)
+ __builtin_abort ();
+ }
+ else if (w != 0 || x != t->g || y != 0 || z != 12)
+ __builtin_abort ();
+}
+
+__attribute__((noipa)) void
+foo (struct S *x, struct S *y, int *z, int w)
+{
+ *o = w;
+ if (w)
+ bar (0, 0, 0, -1);
+ x->d = z;
+ if (y->d)
+ y->c = y->c + y->d[0];
+ bar (t->g, 0, y->c, 0);
+}
+
+int
+main ()
+{
+ int a[4] = { 8, 9, 10, 11 };
+ struct S s = { 1, 2, 3, &a[0] };
+ struct T u = { 0, 0, &a[3] };
+ o = &a[2];
+ t = &u;
+ foo (&s, &s, &a[1], 5);
+ if (s.c != 12 || s.d != &a[1])
+ __builtin_abort ();
+ return 0;
+}
--- /dev/null
+/* { dg-do compile } */
+/* { dg-skip-if "Ensure only targetting arm with TARGET_LDRD" { *-*-* } { "-mthumb" } { "" } } */
+/* { dg-options "-O3 -marm -fdump-rtl-peephole2" } */
+
+/*
+ Test file contains testcases that are there to check.
+ 1) Each peephole generates the expected patterns.
+ 2) These patterns match the expected define_insns and generate ldrd/strd.
+ 2) Memory alias information is not lost in the peephole transformation.
+
+ I don't check the peephole pass on most of the functions here but just check
+ the correct assembly is output. The ldrd/strd peepholes only generate a
+ different pattern to the ldm/stm peepholes in some specific cases, and those
+ are checked.
+
+ The exceptions are tested by the crafted testcases at the end of this file
+ that are named in the pattern foo_x[[:digit:]].
+
+ The first testcase (foo_mem_11) demonstrates bug 88714 is fixed by checking
+ that both alias sets in the RTL are preserved.
+
+ All other testcases are only checked to see that they generate a LDRD or
+ STRD instruction accordingly.
+ */
+
+
+/* Example of bugzilla 88714 -- memory aliasing info needs to be retained. */
+int __RTL (startwith ("peephole2")) foo_mem_11 (int *a, int *b)
+{
+(function "foo_mem_11"
+ (insn-chain
+ (cnote 1 NOTE_INSN_DELETED)
+ (block 2
+ (edge-from entry (flags "FALLTHRU"))
+ (cnote 3 [bb 2] NOTE_INSN_BASIC_BLOCK)
+ (cinsn 101 (set (reg:SI r2)
+ (mem/c:SI (reg:SI r0) [1 S4 A64])) "/home/matmal01/test.c":18)
+ (cinsn 102 (set (reg:SI r3)
+ (mem/c:SI (plus:SI (reg:SI r0) (const_int 4)) [2 S4 A32])) "/home/matmal01/test.c":18)
+ (cinsn 103 (set (reg:SI r0)
+ (plus:SI (reg:SI r2) (reg:SI r3))) "/home/matmal01/test.c":18)
+ (edge-to exit (flags "FALLTHRU"))
+ ) ;; block 2
+ ) ;; insn-chain
+ (crtl
+ (return_rtx
+ (reg/i:SI r0)
+ ) ;; return_rtx
+ ) ;; crtl
+) ;; function "main"
+}
+/* { dg-final { scan-rtl-dump {Function foo_mem_11.*\(mem/c:SI[^\n]*\[1.*\(mem/c:SI[^\n]*\n[^\n]*\[2.*Function foo11} "peephole2" } } */
+
+/* ldrd plain peephole2. */
+int __RTL (startwith ("peephole2")) foo11 (int *a)
+{
+(function "foo11"
+ (insn-chain
+ (cnote 1 NOTE_INSN_DELETED)
+ (block 2
+ (edge-from entry (flags "FALLTHRU"))
+ (cnote 3 [bb 2] NOTE_INSN_BASIC_BLOCK)
+ (cinsn 101 (set (reg:SI r2)
+ (mem/c:SI (reg:SI r0) [0 S4 A64])) "/home/matmal01/test.c":18)
+ (cinsn 102 (set (reg:SI r3)
+ (mem/c:SI (plus:SI (reg:SI r0) (const_int 4)) [0 S4 A32])) "/home/matmal01/test.c":18)
+ (cinsn 103 (set (reg:SI r0)
+ (plus:SI (reg:SI r2) (reg:SI r3))) "/home/matmal01/test.c":18)
+ (edge-to exit (flags "FALLTHRU"))
+ ) ;; block 2
+ ) ;; insn-chain
+ (crtl
+ (return_rtx
+ (reg/i:SI r0)
+ ) ;; return_rtx
+ ) ;; crtl
+) ;; function "main"
+}
+
+/* ldrd plain peephole2, which accepts insns initially out of order. */
+int __RTL (startwith ("peephole2")) foo11_alt (int *a)
+{
+(function "foo11_alt"
+ (insn-chain
+ (cnote 1 NOTE_INSN_DELETED)
+ (block 2
+ (edge-from entry (flags "FALLTHRU"))
+ (cnote 3 [bb 2] NOTE_INSN_BASIC_BLOCK)
+ (cinsn 102 (set (reg:SI r3)
+ (mem/c:SI (plus:SI (reg:SI r0) (const_int 4)) [0 S4 A32])) "/home/matmal01/test.c":18)
+ (cinsn 101 (set (reg:SI r2)
+ (mem/c:SI (reg:SI r0) [0 S4 A64])) "/home/matmal01/test.c":18)
+ (cinsn 103 (set (reg:SI r0)
+ (plus:SI (reg:SI r2) (reg:SI r3))) "/home/matmal01/test.c":18)
+ (edge-to exit (flags "FALLTHRU"))
+ ) ;; block 2
+ ) ;; insn-chain
+ (crtl
+ (return_rtx
+ (reg/i:SI r0)
+ ) ;; return_rtx
+ ) ;; crtl
+) ;; function "main"
+}
+
+/* strd plain peephole2. */
+int __RTL (startwith ("peephole2")) foo12 (int *a)
+{
+(function "foo12"
+ (insn-chain
+ (cnote 1 NOTE_INSN_DELETED)
+ (block 2
+ (edge-from entry (flags "FALLTHRU"))
+ (cnote 3 [bb 2] NOTE_INSN_BASIC_BLOCK)
+ (cinsn 101 (set (mem/c:SI (reg:SI r0) [0 S4 A64])
+ (reg:SI r2)) "/home/matmal01/test.c":18)
+ (cinsn 102 (set (mem/c:SI (plus:SI (reg:SI r0) (const_int 4)) [0 S4 A32])
+ (reg:SI r3)) "/home/matmal01/test.c":18)
+ (edge-to exit (flags "FALLTHRU"))
+ ) ;; block 2
+ ) ;; insn-chain
+ (crtl
+ (return_rtx
+ (reg/i:SI r0)
+ ) ;; return_rtx
+ ) ;; crtl
+) ;; function "main"
+}
+
+/* strd of constants -- store interleaved with constant move into register.
+ Use same register twice to ensure we use the relevant pattern. */
+int __RTL (startwith ("peephole2")) foo13 (int *a)
+{
+(function "foo13"
+ (insn-chain
+ (cnote 1 NOTE_INSN_DELETED)
+ (block 2
+ (edge-from entry (flags "FALLTHRU"))
+ (cnote 3 [bb 2] NOTE_INSN_BASIC_BLOCK)
+ (cinsn 99 (set (reg:SI r2)
+ (const_int 1)) "/home/matmal01/test.c":18)
+ (cinsn 101 (set (mem/c:SI (reg:SI r0) [0 S4 A64])
+ (reg:SI r2)) "/home/matmal01/test.c":18)
+ (cinsn 100 (set (reg:SI r2)
+ (const_int 0)) "/home/matmal01/test.c":18)
+ (cinsn 102 (set (mem/c:SI (plus:SI (reg:SI r0) (const_int 4)) [0 S4 A32])
+ (reg:SI r2)) "/home/matmal01/test.c":18)
+ (edge-to exit (flags "FALLTHRU"))
+ ) ;; block 2
+ ) ;; insn-chain
+ (crtl
+ (return_rtx
+ (reg/i:SI r0)
+ ) ;; return_rtx
+ ) ;; crtl
+) ;; function "main"
+}
+
+/* strd of constants -- stores after constant moves into registers.
+ Use registers out of order, is only way to avoid plain strd while hitting
+ this pattern. */
+int __RTL (startwith ("peephole2")) foo14 (int *a)
+{
+(function "foo14"
+ (insn-chain
+ (cnote 1 NOTE_INSN_DELETED)
+ (block 2
+ (edge-from entry (flags "FALLTHRU"))
+ (cnote 3 [bb 2] NOTE_INSN_BASIC_BLOCK)
+ (cinsn 99 (set (reg:SI r3)
+ (const_int 1)) "/home/matmal01/test.c":18)
+ (cinsn 100 (set (reg:SI r2)
+ (const_int 0)) "/home/matmal01/test.c":18)
+ (cinsn 101 (set (mem/c:SI (reg:SI r0) [0 S4 A64])
+ (reg:SI r3)) "/home/matmal01/test.c":18)
+ (cinsn 102 (set (mem/c:SI (plus:SI (reg:SI r0) (const_int 4)) [0 S4 A32])
+ (reg:SI r2)) "/home/matmal01/test.c":18)
+ (edge-to exit (flags "FALLTHRU"))
+ ) ;; block 2
+ ) ;; insn-chain
+ (crtl
+ (return_rtx
+ (reg/i:SI r0)
+ ) ;; return_rtx
+ ) ;; crtl
+) ;; function "main"
+}
+
+/* swap the destination registers of two loads before a commutative operation.
+ Here the commutative operation is what the peephole uses to know it can
+ swap the register loads around. */
+int __RTL (startwith ("peephole2")) foo15 (int *a)
+{
+(function "foo15"
+ (insn-chain
+ (cnote 1 NOTE_INSN_DELETED)
+ (block 2
+ (edge-from entry (flags "FALLTHRU"))
+ (cnote 3 [bb 2] NOTE_INSN_BASIC_BLOCK)
+ (cinsn 100 (set (reg:SI r3)
+ (mem/c:SI (reg:SI r0) [0 S4 A64])) "/home/matmal01/test.c":18)
+ (cinsn 101 (set (reg:SI r2)
+ (mem/c:SI (plus:SI (reg:SI r0) (const_int 4)) [0 S4 A32])) "/home/matmal01/test.c":18)
+ (cinsn 102 (set (reg:SI r0)
+ (plus:SI (reg:SI r2) (reg:SI r3))) "/home/matmal01/test.c":18
+ (expr_list:REG_DEAD (reg:SI r2)
+ (expr_list:REG_DEAD (reg:SI r3)
+ (nil))))
+ (edge-to exit (flags "FALLTHRU"))
+ ) ;; block 2
+ ) ;; insn-chain
+ (crtl
+ (return_rtx
+ (reg/i:SI r0)
+ ) ;; return_rtx
+ ) ;; crtl
+) ;; function "main"
+}
+
+
+/* swap the destination registers of two loads before a commutative operation
+ that sets the flags. */
+/*
+ NOTE Can't make a testcase for this pattern since there are no insn patterns
+ matching the parallel insn in the peephole.
+
+ i.e. until some define_insn is defined matching that insn that peephole can
+ never match in real code, and in artificial RTL code any pattern that can
+ match it will cause an ICE.
+
+int __RTL (startwith ("peephole2")) foo16 (int *a)
+{
+(function "foo16"
+ (insn-chain
+ (cnote 1 NOTE_INSN_DELETED)
+ (block 2
+ (edge-from entry (flags "FALLTHRU"))
+ (cnote 3 [bb 2] NOTE_INSN_BASIC_BLOCK)
+ (cinsn 100 (set (reg:SI r3)
+ (mem/c:SI (reg:SI r0) [0 S4 A64])) "/home/matmal01/test.c":18)
+ (cinsn 101 (set (reg:SI r2)
+ (mem/c:SI (plus:SI (reg:SI r0) (const_int 4)) [0 S4 A32])) "/home/matmal01/test.c":18)
+ (cinsn 103 (parallel
+ [(set (reg:SI r0)
+ (and:SI (reg:SI r3) (reg:SI r2)))
+ (clobber (reg:CC cc))]) "/home/matmal01/test.c":18
+ (expr_list:REG_DEAD (reg:SI r2)
+ (expr_list:REG_DEAD (reg:SI r3)
+ (nil))))
+ (edge-to exit (flags "FALLTHRU"))
+ ) ;; block 2
+ ) ;; insn-chain
+ (crtl
+ (return_rtx
+ (reg/i:SI r0)
+ ) ;; return_rtx
+ ) ;; crtl
+) ;; function "main"
+}
+*/
+
+
+/* Making patterns that will behave differently between the LDM/STM peepholes
+ and LDRD/STRD peepholes.
+ gen_operands_ldrd_strd() uses peep2_find_free_register() to find spare
+ registers to use.
+ peep2_find_free_register() only ever returns registers marked in
+ call_used_regs, hence we make sure to leave register 2 and 3 available (as
+ they are always on in the defaults marked by CALL_USED_REGISTERS). */
+
+/* gen_operands_ldrd_strd() purposefully finds an even register to look at
+ which would treat the following pattern differently to the stm peepholes.
+ */
+int __RTL (startwith ("peephole2")) foo_x1 (int *a)
+{
+(function "foo_x1"
+ (insn-chain
+ (cnote 1 NOTE_INSN_DELETED)
+ (block 2
+ (edge-from entry (flags "FALLTHRU"))
+ (cnote 3 [bb 2] NOTE_INSN_BASIC_BLOCK)
+ (cinsn 99 (set (reg:SI r5)
+ (const_int 1)) "/home/matmal01/test.c":18)
+ (cinsn 101 (set (mem/c:SI (reg:SI r0) [0 S4 A64])
+ (reg:SI r5)) "/home/matmal01/test.c":18)
+ (cinsn 100 (set (reg:SI r5)
+ (const_int 0)) "/home/matmal01/test.c":18)
+ (cinsn 102 (set (mem/c:SI (plus:SI (reg:SI r0) (const_int 4)) [0 S4 A32])
+ (reg:SI r5)) "/home/matmal01/test.c":18
+ (expr_list:REG_DEAD (reg:SI r5)
+ (nil)))
+ (edge-to exit (flags "FALLTHRU"))
+ ) ;; block 2
+ ) ;; insn-chain
+ (crtl
+ (return_rtx
+ (reg/i:SI r0)
+ ) ;; return_rtx
+ ) ;; crtl
+) ;; function "main"
+}
+/* Ensure we generated a parallel that started with a set from an even register.
+ i.e.
+ (parallel [
+ (set (mem
+ (reg:SI <even>
+ */
+/* { dg-final { scan-rtl-dump {Function foo_x1.*\(parallel \[\n[^\n]*\(set \(mem[^\n]*\n[^\n]*\(reg:SI (?:[12])?[2468] r(?:[12])?[2468]\).*Function foo_x2} "peephole2" } } */
+
+/* Like above gen_operands_ldrd_strd() would look to start with an even
+ register while gen_const_stm_seq() doesn't care. */
+int __RTL (startwith ("peephole2")) foo_x2 (int *a)
+{
+(function "foo_x2"
+ (insn-chain
+ (cnote 1 NOTE_INSN_DELETED)
+ (block 2
+ (edge-from entry (flags "FALLTHRU"))
+ (cnote 3 [bb 2] NOTE_INSN_BASIC_BLOCK)
+ (cinsn 99 (set (reg:SI r5)
+ (const_int 1)) "/home/matmal01/test.c":18)
+ (cinsn 100 (set (reg:SI r6)
+ (const_int 0)) "/home/matmal01/test.c":18)
+ (cinsn 101 (set (mem/c:SI (reg:SI r0) [0 S4 A64])
+ (reg:SI r5)) "/home/matmal01/test.c":18)
+ (cinsn 102 (set (mem/c:SI (plus:SI (reg:SI r0) (const_int 4)) [0 S4 A32])
+ (reg:SI r6)) "/home/matmal01/test.c":18)
+ (edge-to exit (flags "FALLTHRU"))
+ ) ;; block 2
+ ) ;; insn-chain
+ (crtl
+ (return_rtx
+ (reg/i:SI r0)
+ ) ;; return_rtx
+ ) ;; crtl
+) ;; function "main"
+}
+/* Ensure generated parallel starts with a set from an even register (as foo_x1). */
+/* { dg-final { scan-rtl-dump {Function foo_x2.*\(parallel \[\n[^\n]*\(set \(mem[^\n]*\n[^\n]*\(reg:SI (?:[12])?[2468] r(?:[12])?[2468]\).*Function foo_x3} "peephole2" } } */
+
+/* When storing multiple values into a register that will be used later, ldrd
+ searches for another register to use instead of just giving up. */
+int __RTL (startwith ("peephole2")) foo_x3 (int *a)
+{
+(function "foo_x3"
+ (insn-chain
+ (cnote 1 NOTE_INSN_DELETED)
+ (block 2
+ (edge-from entry (flags "FALLTHRU"))
+ (cnote 3 [bb 2] NOTE_INSN_BASIC_BLOCK)
+ (cinsn 99 (set (reg:SI r3)
+ (const_int 1)) "/home/matmal01/test.c":18)
+ (cinsn 101 (set (mem/c:SI (reg:SI r0) [0 S4 A64])
+ (reg:SI r3)) "/home/matmal01/test.c":18)
+ (cinsn 100 (set (reg:SI r3)
+ (const_int 0)) "/home/matmal01/test.c":18)
+ (cinsn 102 (set (mem/c:SI (plus:SI (reg:SI r0) (const_int 4)) [0 S4 A32])
+ (reg:SI r3)) "/home/matmal01/test.c":18)
+ (cinsn 103 (set (reg:SI r0)
+ (plus:SI (reg:SI r0) (reg:SI r3))) "/home/matmal01/test.c":18)
+ (edge-to exit (flags "FALLTHRU"))
+ ) ;; block 2
+ ) ;; insn-chain
+ (crtl
+ (return_rtx
+ (reg/i:SI r0)
+ ) ;; return_rtx
+ ) ;; crtl
+) ;; function "main"
+}
+/* Ensure generated parallel starts with a set from an even register (as foo_x1). */
+/* { dg-final { scan-rtl-dump {Function foo_x3.*\(parallel \[\n[^\n]*\(set \(mem[^\n]*\n[^\n]*\(reg:SI (?:[12])?[2468] r(?:[12])?[2468]\).*Function foo_x4} "peephole2" } } */
+
+/* ldrd gen_peephole2_11 but using plus 8 and plus 12 in the offsets. */
+int __RTL (startwith ("peephole2")) foo_x4 (int *a)
+{
+(function "foo_x4"
+ (insn-chain
+ (cnote 1 NOTE_INSN_DELETED)
+ (block 2
+ (edge-from entry (flags "FALLTHRU"))
+ (cnote 3 [bb 2] NOTE_INSN_BASIC_BLOCK)
+ (cinsn 101 (set (reg:SI r2)
+ (mem/c:SI (plus:SI (reg:SI r0) (const_int 8)) [0 S4 A64])) "/home/matmal01/test.c":18)
+ (cinsn 102 (set (reg:SI r3)
+ (mem/c:SI (plus:SI (reg:SI r0) (const_int 12)) [0 S4 A32])) "/home/matmal01/test.c":18)
+ (cinsn 103 (set (reg:SI r0)
+ (plus:SI (reg:SI r2) (reg:SI r3))) "/home/matmal01/test.c":18)
+ (edge-to exit (flags "FALLTHRU"))
+ ) ;; block 2
+ ) ;; insn-chain
+ (crtl
+ (return_rtx
+ (reg/i:SI r0)
+ ) ;; return_rtx
+ ) ;; crtl
+) ;; function "main"
+}
+/* Ensure generated parallel starts with a set from the appropriate offset from
+ register 0.
+(parallel [
+ (set (reg:SI ...
+ (mem/c:SI (plus:SI (reg:SI 0 r0)
+ (const_int 8 .*
+*/
+/* { dg-final { scan-rtl-dump {Function foo_x4.*\(parallel \[\n[^\n]*\(set \(reg:SI[^\n]*\n *\(mem/c:SI \(plus:SI \(reg:SI 0 r0\)\n *\(const_int 8.*Function foo_x5} "peephole2" } } */
+
+/* strd gen_peephole2_12 but using plus 8 and plus 12 in the offsets. */
+int __RTL (startwith ("peephole2")) foo_x5 (int *a)
+{
+(function "foo12"
+ (insn-chain
+ (cnote 1 NOTE_INSN_DELETED)
+ (block 2
+ (edge-from entry (flags "FALLTHRU"))
+ (cnote 3 [bb 2] NOTE_INSN_BASIC_BLOCK)
+ (cinsn 101 (set (mem/c:SI (plus:SI (reg:SI r0) (const_int 8)) [0 S4 A64])
+ (reg:SI r2)) "/home/matmal01/test.c":18)
+ (cinsn 102 (set (mem/c:SI (plus:SI (reg:SI r0) (const_int 12)) [0 S4 A32])
+ (reg:SI r3)) "/home/matmal01/test.c":18)
+ (edge-to exit (flags "FALLTHRU"))
+ ) ;; block 2
+ ) ;; insn-chain
+ (crtl
+ (return_rtx
+ (reg/i:SI r0)
+ ) ;; return_rtx
+ ) ;; crtl
+) ;; function "main"
+}
+/* Ensure generated parallel starts with a set to the appropriate offset from
+ register 0. */
+/* { dg-final { scan-rtl-dump {Function foo_x5.*\(parallel \[\n[^\n]*\(set \(mem/c:SI \(plus:SI \(reg:SI 0 r0\)\n *\(const_int 8.*$} "peephole2" } } */
+
+
+/* { dg-final { scan-assembler-not "ldm" } } */
+/* { dg-final { scan-assembler-not "stm" } } */
+/* { dg-final { scan-assembler-times {ldrd\tr[2468], \[r0\]} 4 } } */
+/* { dg-final { scan-assembler-times {ldrd\tr[2468], \[r0, #8\]} 1 } } */
+/* { dg-final { scan-assembler-times {strd\tr[2468], \[r0\]} 6 } } */
+/* { dg-final { scan-assembler-times {strd\tr[2468], \[r0, #8\]} 1 } } */