From 798d3d0454c3f8de61f95f9d8ff53da3465f19fa Mon Sep 17 00:00:00 2001 From: Greta Yorsh Date: Thu, 16 May 2013 13:02:06 +0100 Subject: [PATCH] Internal memcpy using LDRD/STRD 2013-05-16 Greta Yorsh gcc/ * config/arm/arm-protos.h (gen_movmem_ldrd_strd): New declaration. * config/arm/arm.c (next_consecutive_mem): New function. (gen_movmem_ldrd_strd): Likewise. * config/arm/arm.md (movmemqi): Update condition and code. (unaligned_loaddi, unaligned_storedi): New patterns. gcc/testsuite * gcc.target/arm/unaligned-memcpy-2.c: Adjust expected output. * gcc.target/arm/unaligned-memcpy-3.c: Likewise. * gcc.target/arm/unaligned-memcpy-4.c: Likewise. From-SVN: r198970 --- gcc/ChangeLog | 8 ++ gcc/config/arm/arm-protos.h | 1 + gcc/config/arm/arm.c | 128 ++++++++++++++++++ gcc/config/arm/arm.md | 68 +++++++++- gcc/testsuite/ChangeLog | 6 + .../gcc.target/arm/unaligned-memcpy-2.c | 5 +- .../gcc.target/arm/unaligned-memcpy-3.c | 9 +- .../gcc.target/arm/unaligned-memcpy-4.c | 8 +- 8 files changed, 226 insertions(+), 7 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 343d3fc5b5a..15fe33b4c16 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,11 @@ +2013-05-16 Greta Yorsh + + * config/arm/arm-protos.h (gen_movmem_ldrd_strd): New declaration. + * config/arm/arm.c (next_consecutive_mem): New function. + (gen_movmem_ldrd_strd): Likewise. + * config/arm/arm.md (movmemqi): Update condition and code. + (unaligned_loaddi, unaligned_storedi): New patterns. + 2013-05-16 Rainer Orth * config.gcc: Obsolete *-*-solaris2.9*. diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h index a6af9275712..c791341f69b 100644 --- a/gcc/config/arm/arm-protos.h +++ b/gcc/config/arm/arm-protos.h @@ -120,6 +120,7 @@ extern bool offset_ok_for_ldrd_strd (HOST_WIDE_INT); extern bool operands_ok_ldrd_strd (rtx, rtx, rtx, HOST_WIDE_INT, bool, bool); extern bool gen_operands_ldrd_strd (rtx *, bool, bool, bool); extern int arm_gen_movmemqi (rtx *); +extern bool gen_movmem_ldrd_strd (rtx *); extern enum machine_mode arm_select_cc_mode (RTX_CODE, rtx, rtx); extern enum machine_mode arm_select_dominance_cc_mode (rtx, rtx, HOST_WIDE_INT); diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index 036db8ab6fb..c4f5c6907f8 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -11855,6 +11855,134 @@ arm_gen_movmemqi (rtx *operands) return 1; } +/* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx +by mode size. */ +inline static rtx +next_consecutive_mem (rtx mem) +{ + enum machine_mode mode = GET_MODE (mem); + HOST_WIDE_INT offset = GET_MODE_SIZE (mode); + rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset); + + return adjust_automodify_address (mem, mode, addr, offset); +} + +/* Copy using LDRD/STRD instructions whenever possible. + Returns true upon success. */ +bool +gen_movmem_ldrd_strd (rtx *operands) +{ + unsigned HOST_WIDE_INT len; + HOST_WIDE_INT align; + rtx src, dst, base; + rtx reg0; + bool src_aligned, dst_aligned; + bool src_volatile, dst_volatile; + + gcc_assert (CONST_INT_P (operands[2])); + gcc_assert (CONST_INT_P (operands[3])); + + len = UINTVAL (operands[2]); + if (len > 64) + return false; + + /* Maximum alignment we can assume for both src and dst buffers. */ + align = INTVAL (operands[3]); + + if ((!unaligned_access) && (len >= 4) && ((align & 3) != 0)) + return false; + + /* Place src and dst addresses in registers + and update the corresponding mem rtx. */ + dst = operands[0]; + dst_volatile = MEM_VOLATILE_P (dst); + dst_aligned = MEM_ALIGN (dst) >= BITS_PER_WORD; + base = copy_to_mode_reg (SImode, XEXP (dst, 0)); + dst = adjust_automodify_address (dst, VOIDmode, base, 0); + + src = operands[1]; + src_volatile = MEM_VOLATILE_P (src); + src_aligned = MEM_ALIGN (src) >= BITS_PER_WORD; + base = copy_to_mode_reg (SImode, XEXP (src, 0)); + src = adjust_automodify_address (src, VOIDmode, base, 0); + + if (!unaligned_access && !(src_aligned && dst_aligned)) + return false; + + if (src_volatile || dst_volatile) + return false; + + /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */ + if (!(dst_aligned || src_aligned)) + return arm_gen_movmemqi (operands); + + src = adjust_address (src, DImode, 0); + dst = adjust_address (dst, DImode, 0); + while (len >= 8) + { + len -= 8; + reg0 = gen_reg_rtx (DImode); + if (src_aligned) + emit_move_insn (reg0, src); + else + emit_insn (gen_unaligned_loaddi (reg0, src)); + + if (dst_aligned) + emit_move_insn (dst, reg0); + else + emit_insn (gen_unaligned_storedi (dst, reg0)); + + src = next_consecutive_mem (src); + dst = next_consecutive_mem (dst); + } + + gcc_assert (len < 8); + if (len >= 4) + { + /* More than a word but less than a double-word to copy. Copy a word. */ + reg0 = gen_reg_rtx (SImode); + src = adjust_address (src, SImode, 0); + dst = adjust_address (dst, SImode, 0); + if (src_aligned) + emit_move_insn (reg0, src); + else + emit_insn (gen_unaligned_loadsi (reg0, src)); + + if (dst_aligned) + emit_move_insn (dst, reg0); + else + emit_insn (gen_unaligned_storesi (dst, reg0)); + + src = next_consecutive_mem (src); + dst = next_consecutive_mem (dst); + len -= 4; + } + + if (len == 0) + return true; + + /* Copy the remaining bytes. */ + if (len >= 2) + { + dst = adjust_address (dst, HImode, 0); + src = adjust_address (src, HImode, 0); + reg0 = gen_reg_rtx (SImode); + emit_insn (gen_unaligned_loadhiu (reg0, src)); + emit_insn (gen_unaligned_storehi (dst, gen_lowpart (HImode, reg0))); + src = next_consecutive_mem (src); + dst = next_consecutive_mem (dst); + if (len == 2) + return true; + } + + dst = adjust_address (dst, QImode, 0); + src = adjust_address (src, QImode, 0); + reg0 = gen_reg_rtx (QImode); + emit_move_insn (reg0, src); + emit_move_insn (dst, reg0); + return true; +} + /* Select a dominance comparison mode if possible for a test of the general form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms. COND_OR == DOM_CC_X_AND_Y => (X && Y) diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md index d3bc760952d..4b45c984bf4 100644 --- a/gcc/config/arm/arm.md +++ b/gcc/config/arm/arm.md @@ -4421,6 +4421,64 @@ (set_attr "predicable" "yes") (set_attr "type" "store1")]) +;; Unaligned double-word load and store. +;; Split after reload into two unaligned single-word accesses. +;; It prevents lower_subreg from splitting some other aligned +;; double-word accesses too early. Used for internal memcpy. + +(define_insn_and_split "unaligned_loaddi" + [(set (match_operand:DI 0 "s_register_operand" "=l,r") + (unspec:DI [(match_operand:DI 1 "memory_operand" "o,o")] + UNSPEC_UNALIGNED_LOAD))] + "unaligned_access && TARGET_32BIT" + "#" + "&& reload_completed" + [(set (match_dup 0) (unspec:SI [(match_dup 1)] UNSPEC_UNALIGNED_LOAD)) + (set (match_dup 2) (unspec:SI [(match_dup 3)] UNSPEC_UNALIGNED_LOAD))] + { + operands[2] = gen_highpart (SImode, operands[0]); + operands[0] = gen_lowpart (SImode, operands[0]); + operands[3] = gen_highpart (SImode, operands[1]); + operands[1] = gen_lowpart (SImode, operands[1]); + + /* If the first destination register overlaps with the base address, + swap the order in which the loads are emitted. */ + if (reg_overlap_mentioned_p (operands[0], operands[1])) + { + rtx tmp = operands[1]; + operands[1] = operands[3]; + operands[3] = tmp; + tmp = operands[0]; + operands[0] = operands[2]; + operands[2] = tmp; + } + } + [(set_attr "arch" "t2,any") + (set_attr "length" "4,8") + (set_attr "predicable" "yes") + (set_attr "type" "load2")]) + +(define_insn_and_split "unaligned_storedi" + [(set (match_operand:DI 0 "memory_operand" "=o,o") + (unspec:DI [(match_operand:DI 1 "s_register_operand" "l,r")] + UNSPEC_UNALIGNED_STORE))] + "unaligned_access && TARGET_32BIT" + "#" + "&& reload_completed" + [(set (match_dup 0) (unspec:SI [(match_dup 1)] UNSPEC_UNALIGNED_STORE)) + (set (match_dup 2) (unspec:SI [(match_dup 3)] UNSPEC_UNALIGNED_STORE))] + { + operands[2] = gen_highpart (SImode, operands[0]); + operands[0] = gen_lowpart (SImode, operands[0]); + operands[3] = gen_highpart (SImode, operands[1]); + operands[1] = gen_lowpart (SImode, operands[1]); + } + [(set_attr "arch" "t2,any") + (set_attr "length" "4,8") + (set_attr "predicable" "yes") + (set_attr "type" "store2")]) + + (define_insn "*extv_reg" [(set (match_operand:SI 0 "s_register_operand" "=r") (sign_extract:SI (match_operand:SI 1 "s_register_operand" "r") @@ -7374,10 +7432,18 @@ (match_operand:BLK 1 "general_operand" "") (match_operand:SI 2 "const_int_operand" "") (match_operand:SI 3 "const_int_operand" "")] - "TARGET_EITHER" + "" " if (TARGET_32BIT) { + if (TARGET_LDRD && current_tune->prefer_ldrd_strd + && !optimize_function_for_size_p (cfun)) + { + if (gen_movmem_ldrd_strd (operands)) + DONE; + FAIL; + } + if (arm_gen_movmemqi (operands)) DONE; FAIL; diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 7bacbb58eea..879b9bc1fc7 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,9 @@ +2013-05-16 Greta Yorsh + + * gcc.target/arm/unaligned-memcpy-2.c: Adjust expected output. + * gcc.target/arm/unaligned-memcpy-3.c: Likewise. + * gcc.target/arm/unaligned-memcpy-4.c: Likewise. + 2013-05-16 Nathan Sidwell * gcc.dg/visibility-21.c: New. diff --git a/gcc/testsuite/gcc.target/arm/unaligned-memcpy-2.c b/gcc/testsuite/gcc.target/arm/unaligned-memcpy-2.c index c7d24c9c5c3..f7bc2f4f316 100644 --- a/gcc/testsuite/gcc.target/arm/unaligned-memcpy-2.c +++ b/gcc/testsuite/gcc.target/arm/unaligned-memcpy-2.c @@ -14,7 +14,10 @@ void aligned_dest (char *src) /* Expect a multi-word store for the main part of the copy, but subword loads/stores for the remainder. */ -/* { dg-final { scan-assembler-times "stmia" 1 } } */ +/* { dg-final { scan-assembler-times "ldmia" 0 } } */ +/* { dg-final { scan-assembler-times "ldrd" 0 } } */ +/* { dg-final { scan-assembler-times "stmia" 1 { target { ! { arm_prefer_ldrd_strd } } } } } */ +/* { dg-final { scan-assembler-times "strd" 1 { target { arm_prefer_ldrd_strd } } } } */ /* { dg-final { scan-assembler-times "ldrh" 1 } } */ /* { dg-final { scan-assembler-times "strh" 1 } } */ /* { dg-final { scan-assembler-times "ldrb" 1 } } */ diff --git a/gcc/testsuite/gcc.target/arm/unaligned-memcpy-3.c b/gcc/testsuite/gcc.target/arm/unaligned-memcpy-3.c index 5f0413738c5..9e2d1641bbc 100644 --- a/gcc/testsuite/gcc.target/arm/unaligned-memcpy-3.c +++ b/gcc/testsuite/gcc.target/arm/unaligned-memcpy-3.c @@ -14,8 +14,11 @@ void aligned_src (char *dest) /* Expect a multi-word load for the main part of the copy, but subword loads/stores for the remainder. */ -/* { dg-final { scan-assembler-times "ldmia" 1 } } */ -/* { dg-final { scan-assembler-times "ldrh" 1 } } */ +/* { dg-final { scan-assembler-times "ldmia" 1 { target { ! { arm_prefer_ldrd_strd } } } } } */ +/* { dg-final { scan-assembler-times "ldrd" 1 { target { arm_prefer_ldrd_strd } } } } */ +/* { dg-final { scan-assembler-times "strd" 0 } } */ +/* { dg-final { scan-assembler-times "stm" 0 } } */ +/* { dg-final { scan-assembler-times "ldrh" 1 { target { ! { arm_prefer_ldrd_strd } } } } } */ /* { dg-final { scan-assembler-times "strh" 1 } } */ -/* { dg-final { scan-assembler-times "ldrb" 1 } } */ +/* { dg-final { scan-assembler-times "ldrb" 1 { target { ! { arm_prefer_ldrd_strd } } } } } */ /* { dg-final { scan-assembler-times "strb" 1 } } */ diff --git a/gcc/testsuite/gcc.target/arm/unaligned-memcpy-4.c b/gcc/testsuite/gcc.target/arm/unaligned-memcpy-4.c index 99957086e7e..4708c510140 100644 --- a/gcc/testsuite/gcc.target/arm/unaligned-memcpy-4.c +++ b/gcc/testsuite/gcc.target/arm/unaligned-memcpy-4.c @@ -14,5 +14,9 @@ void aligned_both (void) /* We know both src and dest to be aligned: expect multiword loads/stores. */ -/* { dg-final { scan-assembler-times "ldmia" 1 } } */ -/* { dg-final { scan-assembler-times "stmia" 1 } } */ +/* { dg-final { scan-assembler-times "ldmia" 1 { target { ! { arm_prefer_ldrd_strd } } } } } */ +/* { dg-final { scan-assembler-times "stmia" 1 { target { ! { arm_prefer_ldrd_strd } } } } } */ +/* { dg-final { scan-assembler "ldrd" { target { arm_prefer_ldrd_strd } } } } */ +/* { dg-final { scan-assembler-times "ldm" 0 { target { arm_prefer_ldrd_strd } } } } */ +/* { dg-final { scan-assembler "strd" { target { arm_prefer_ldrd_strd } } } } */ +/* { dg-final { scan-assembler-times "stm" 0 { target { arm_prefer_ldrd_strd } } } } */ -- 2.30.2