+2015-10-20 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
+
+ * config/aarch64/aarch64.c (aarch64_mode_valid_for_sched_fusion_p):
+ New function.
+ (fusion_load_store): Use it.
+ * config/aarch64/aarch64-ldpstp.md: Add new peephole2s for
+ ldp and stp in VD modes.
+ * config/aarch64/aarch64-simd.md (load_pair<mode>, VD): New pattern.
+ (store_pair<mode>, VD): Likewise.
+
2015-10-20 Vladimir Makarov <vmakarov@redhat.com>
PR rtl-optimization/67609
}
})
+(define_peephole2
+ [(set (match_operand:VD 0 "register_operand" "")
+ (match_operand:VD 1 "aarch64_mem_pair_operand" ""))
+ (set (match_operand:VD 2 "register_operand" "")
+ (match_operand:VD 3 "memory_operand" ""))]
+ "aarch64_operands_ok_for_ldpstp (operands, true, <MODE>mode)"
+ [(parallel [(set (match_dup 0) (match_dup 1))
+ (set (match_dup 2) (match_dup 3))])]
+{
+ rtx base, offset_1, offset_2;
+
+ extract_base_offset_in_addr (operands[1], &base, &offset_1);
+ extract_base_offset_in_addr (operands[3], &base, &offset_2);
+ if (INTVAL (offset_1) > INTVAL (offset_2))
+ {
+ std::swap (operands[0], operands[2]);
+ std::swap (operands[1], operands[3]);
+ }
+})
+
+(define_peephole2
+ [(set (match_operand:VD 0 "aarch64_mem_pair_operand" "")
+ (match_operand:VD 1 "register_operand" ""))
+ (set (match_operand:VD 2 "memory_operand" "")
+ (match_operand:VD 3 "register_operand" ""))]
+ "TARGET_SIMD && aarch64_operands_ok_for_ldpstp (operands, false, <MODE>mode)"
+ [(parallel [(set (match_dup 0) (match_dup 1))
+ (set (match_dup 2) (match_dup 3))])]
+{
+ rtx base, offset_1, offset_2;
+
+ extract_base_offset_in_addr (operands[0], &base, &offset_1);
+ extract_base_offset_in_addr (operands[2], &base, &offset_2);
+ if (INTVAL (offset_1) > INTVAL (offset_2))
+ {
+ std::swap (operands[0], operands[2]);
+ std::swap (operands[1], operands[3]);
+ }
+})
+
+
;; Handle sign/zero extended consecutive load/store.
(define_peephole2
(set_attr "length" "4,4,4,8,8,8,4")]
)
+(define_insn "load_pair<mode>"
+ [(set (match_operand:VD 0 "register_operand" "=w")
+ (match_operand:VD 1 "aarch64_mem_pair_operand" "Ump"))
+ (set (match_operand:VD 2 "register_operand" "=w")
+ (match_operand:VD 3 "memory_operand" "m"))]
+ "TARGET_SIMD
+ && rtx_equal_p (XEXP (operands[3], 0),
+ plus_constant (Pmode,
+ XEXP (operands[1], 0),
+ GET_MODE_SIZE (<MODE>mode)))"
+ "ldp\\t%d0, %d2, %1"
+ [(set_attr "type" "neon_ldp")]
+)
+
+(define_insn "store_pair<mode>"
+ [(set (match_operand:VD 0 "aarch64_mem_pair_operand" "=Ump")
+ (match_operand:VD 1 "register_operand" "w"))
+ (set (match_operand:VD 2 "memory_operand" "=m")
+ (match_operand:VD 3 "register_operand" "w"))]
+ "TARGET_SIMD
+ && rtx_equal_p (XEXP (operands[2], 0),
+ plus_constant (Pmode,
+ XEXP (operands[0], 0),
+ GET_MODE_SIZE (<MODE>mode)))"
+ "stp\\t%d1, %d3, %0"
+ [(set_attr "type" "neon_stp")]
+)
+
(define_split
[(set (match_operand:VQ 0 "register_operand" "")
(match_operand:VQ 1 "register_operand" ""))]
&& offset % GET_MODE_SIZE (mode) == 0);
}
+/* Return true if MODE is one of the modes for which we
+ support LDP/STP operations. */
+
+static bool
+aarch64_mode_valid_for_sched_fusion_p (machine_mode mode)
+{
+ return mode == SImode || mode == DImode
+ || mode == SFmode || mode == DFmode
+ || (aarch64_vector_mode_supported_p (mode)
+ && GET_MODE_SIZE (mode) == 8);
+}
+
/* Return true if X is a valid address for machine mode MODE. If it is,
fill in INFO appropriately. STRICT_P is true if REG_OK_STRICT is in
effect. OUTER_CODE is PARALLEL for a load/store pair. */
src = SET_SRC (x);
dest = SET_DEST (x);
- if (GET_MODE (dest) != SImode && GET_MODE (dest) != DImode
- && GET_MODE (dest) != SFmode && GET_MODE (dest) != DFmode)
+ machine_mode dest_mode = GET_MODE (dest);
+
+ if (!aarch64_mode_valid_for_sched_fusion_p (dest_mode))
return SCHED_FUSION_NONE;
if (GET_CODE (src) == SIGN_EXTEND)
+2015-10-20 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
+
+ * gcc.target/aarch64/stp_vec_64_1.c: New test.
+ * gcc.target/aarch64/ldp_vec_64_1.c: Likewise.
+
2015-10-20 Alan Lawrence <alan.lawrence@arm.com>
* lib/target-supports.exp (check_effective_target_vect64): Add AArch64.
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-Ofast" } */
+
+typedef int int32x2_t __attribute__ ((__vector_size__ ((8))));
+
+void
+foo (int32x2_t *foo, int32x2_t *bar)
+{
+ int i = 0;
+ int32x2_t val = { 3, 2 };
+
+ for (i = 0; i < 1024; i+=2)
+ foo[i] = bar[i] + bar[i + 1];
+}
+
+/* { dg-final { scan-assembler "ldp\td\[0-9\]+, d\[0-9\]" } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-Ofast" } */
+
+
+typedef int int32x2_t __attribute__ ((__vector_size__ ((8))));
+
+void
+bar (int32x2_t *foo)
+{
+ int i = 0;
+ int32x2_t val = { 3, 2 };
+
+ for (i = 0; i < 256; i+=2)
+ {
+ foo[i] = val;
+ foo[i+1] = val;
+ }
+}
+
+/* { dg-final { scan-assembler "stp\td\[0-9\]+, d\[0-9\]" } } */