From 06e972705d2459498212969adac45c592c7a02bb Mon Sep 17 00:00:00 2001 From: Oleg Endo Date: Wed, 14 Feb 2018 12:33:37 +0000 Subject: [PATCH] re PR target/83831 ([RX] Unused bclr,bnot,bset insns) gcc/ PR target/83831 * config/rx/rx-protos.h (rx_reg_dead_or_unused_after_insn, rx_copy_reg_dead_or_unused_notes, rx_fuse_in_memory_bitop): New declarations. (set_of_reg): New struct. (rx_find_set_of_reg, rx_find_use_of_reg): New functions. * config/rx/rx.c (rx_reg_dead_or_unused_after_insn, rx_copy_reg_dead_or_unused_notes, rx_fuse_in_memory_bitop): New functions. * config/rx/rx.md (andsi3, iorsi3, xorsi3): Convert to insn_and_split. Split into bitclr, bitset, bitinvert patterns if appropriate. (*bitset, *bitinvert, *bitclr): Convert to named insn_and_split and use rx_fuse_in_memory_bitop. (*bitset_in_memory, *bitinvert_in_memory, *bitclr_in_memory): Convert to named insn, correct maximum insn length. gcc/testsuite/ PR target/83831 * gcc.target/rx/pr83831.c: New tests. From-SVN: r257655 --- gcc/ChangeLog | 18 +++++ gcc/config/rx/rx-protos.h | 106 ++++++++++++++++++++++++ gcc/config/rx/rx.c | 82 +++++++++++++++++++ gcc/config/rx/rx.md | 112 ++++++++++++++++++++------ gcc/testsuite/ChangeLog | 5 ++ gcc/testsuite/gcc.target/rx/pr83831.c | 77 ++++++++++++++++++ 6 files changed, 377 insertions(+), 23 deletions(-) create mode 100644 gcc/testsuite/gcc.target/rx/pr83831.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index bcaf137e461..22589babaa6 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,21 @@ +2018-02-14 Oleg Endo + + PR target/83831 + * config/rx/rx-protos.h (rx_reg_dead_or_unused_after_insn, + rx_copy_reg_dead_or_unused_notes, rx_fuse_in_memory_bitop): New + declarations. + (set_of_reg): New struct. + (rx_find_set_of_reg, rx_find_use_of_reg): New functions. + * config/rx/rx.c (rx_reg_dead_or_unused_after_insn, + rx_copy_reg_dead_or_unused_notes, rx_fuse_in_memory_bitop): New + functions. + * config/rx/rx.md (andsi3, iorsi3, xorsi3): Convert to insn_and_split. + Split into bitclr, bitset, bitinvert patterns if appropriate. + (*bitset, *bitinvert, *bitclr): Convert to named insn_and_split and + use rx_fuse_in_memory_bitop. + (*bitset_in_memory, *bitinvert_in_memory, *bitclr_in_memory): Convert + to named insn, correct maximum insn length. + 2018-02-14 Jozef Lawrynowicz PR target/79242 diff --git a/gcc/config/rx/rx-protos.h b/gcc/config/rx/rx-protos.h index b3c5bfc7a7f..0bb885d2da1 100644 --- a/gcc/config/rx/rx-protos.h +++ b/gcc/config/rx/rx-protos.h @@ -63,6 +63,112 @@ extern void rx_notice_update_cc (rtx, rtx); extern void rx_split_cbranch (machine_mode, enum rtx_code, rtx, rtx, rtx); extern machine_mode rx_select_cc_mode (enum rtx_code, rtx, rtx); + +extern bool rx_reg_dead_or_unused_after_insn (const rtx_insn* i, int regno); +extern void rx_copy_reg_dead_or_unused_notes (rtx reg, const rtx_insn* src, + rtx_insn* dst); + +extern bool rx_fuse_in_memory_bitop (rtx* operands, rtx_insn* curr_insn, + rtx (*gen_insn)(rtx, rtx)); + +/* Result value of rx_find_set_of_reg. */ +struct set_of_reg +{ + /* The insn where sh_find_set_of_reg stopped looking. + Can be NULL_RTX if the end of the insn list was reached. */ + rtx_insn* insn; + + /* The set rtx of the specified reg if found, NULL_RTX otherwise. */ + const_rtx set_rtx; + + /* The set source rtx of the specified reg if found, NULL_RTX otherwise. + Usually, this is the most interesting return value. */ + rtx set_src; +}; + +/* FIXME: Copy-pasta from SH. Move to rtl.h. + Given a reg rtx and a start insn, try to find the insn that sets + the specified reg by using the specified insn stepping function, + such as 'prev_nonnote_nondebug_insn_bb'. When the insn is found, + try to extract the rtx of the reg set. */ +template inline set_of_reg +rx_find_set_of_reg (rtx reg, rtx_insn* insn, F stepfunc, + bool ignore_reg_reg_copies = false) +{ + set_of_reg result; + result.insn = insn; + result.set_rtx = NULL_RTX; + result.set_src = NULL_RTX; + + if (!REG_P (reg) || insn == NULL_RTX) + return result; + + for (rtx_insn* i = stepfunc (insn); i != NULL_RTX; i = stepfunc (i)) + { + if (BARRIER_P (i)) + break; + if (!INSN_P (i) || DEBUG_INSN_P (i)) + continue; + if (reg_set_p (reg, i)) + { + if (CALL_P (i)) + break; + + result.insn = i; + result.set_rtx = set_of (reg, i); + + if (result.set_rtx == NULL_RTX || GET_CODE (result.set_rtx) != SET) + break; + + result.set_src = XEXP (result.set_rtx, 1); + + if (ignore_reg_reg_copies && REG_P (result.set_src)) + { + reg = result.set_src; + continue; + } + if (ignore_reg_reg_copies && SUBREG_P (result.set_src) + && REG_P (SUBREG_REG (result.set_src))) + { + reg = SUBREG_REG (result.set_src); + continue; + } + + break; + } + } + + /* If the searched reg is found inside a (mem (post_inc:SI (reg))), set_of + will return NULL and set_rtx will be NULL. + In this case report a 'not found'. result.insn will always be non-null + at this point, so no need to check it. */ + if (result.set_src != NULL && result.set_rtx == NULL) + result.set_src = NULL; + + return result; +} + +/* FIXME: Move to rtlh.h. */ +template inline rtx_insn* +rx_find_use_of_reg (rtx reg, rtx_insn* insn, F stepfunc) +{ + if (!REG_P (reg) || insn == NULL_RTX) + return NULL; + + for (rtx_insn* i = stepfunc (insn); i != NULL_RTX; i = stepfunc (i)) + { + if (BARRIER_P (i)) + break; + if (!INSN_P (i) || DEBUG_INSN_P (i)) + continue; + if (reg_overlap_mentioned_p (reg, PATTERN (i)) + || (CALL_P (i) && find_reg_fusage (i, USE, reg))) + return i; + } + + return NULL; +} + #endif #endif /* GCC_RX_PROTOS_H */ diff --git a/gcc/config/rx/rx.c b/gcc/config/rx/rx.c index be8229818ae..0eaf418cd71 100644 --- a/gcc/config/rx/rx.c +++ b/gcc/config/rx/rx.c @@ -3439,6 +3439,88 @@ rx_atomic_sequence::~rx_atomic_sequence (void) emit_insn (gen_mvtc (GEN_INT (CTRLREG_PSW), m_prev_psw_reg)); } +/* Given an insn and a reg number, tell whether the reg dies or is unused + after the insn. */ +bool +rx_reg_dead_or_unused_after_insn (const rtx_insn* i, int regno) +{ + return find_regno_note (i, REG_DEAD, regno) != NULL + || find_regno_note (i, REG_UNUSED, regno) != NULL; +} + +/* Copy dead and unused notes from SRC to DST for the specified REGNO. */ +void +rx_copy_reg_dead_or_unused_notes (rtx reg, const rtx_insn* src, rtx_insn* dst) +{ + int regno = REGNO (SUBREG_P (reg) ? SUBREG_REG (reg) : reg); + + if (rtx note = find_regno_note (src, REG_DEAD, regno)) + add_shallow_copy_of_reg_note (dst, note); + + if (rtx note = find_regno_note (src, REG_UNUSED, regno)) + add_shallow_copy_of_reg_note (dst, note); +} + +/* Try to fuse the current bit-operation insn with the surrounding memory load + and store. */ +bool +rx_fuse_in_memory_bitop (rtx* operands, rtx_insn* curr_insn, + rtx (*gen_insn)(rtx, rtx)) +{ + rtx op2_reg = SUBREG_P (operands[2]) ? SUBREG_REG (operands[2]) : operands[2]; + + set_of_reg op2_def = rx_find_set_of_reg (op2_reg, curr_insn, + prev_nonnote_nondebug_insn_bb); + if (op2_def.set_src == NULL_RTX + || !MEM_P (op2_def.set_src) + || GET_MODE (op2_def.set_src) != QImode + || !rx_is_restricted_memory_address (XEXP (op2_def.set_src, 0), + GET_MODE (op2_def.set_src)) + || reg_used_between_p (operands[2], op2_def.insn, curr_insn) + || !rx_reg_dead_or_unused_after_insn (curr_insn, REGNO (op2_reg)) + ) + return false; + + /* The register operand originates from a memory load and the memory load + could be fused with the bitop insn. + Look for the following memory store with the same memory operand. */ + rtx mem = op2_def.set_src; + + /* If the memory is an auto-mod address, it can't be fused. */ + if (GET_CODE (XEXP (mem, 0)) == POST_INC + || GET_CODE (XEXP (mem, 0)) == PRE_INC + || GET_CODE (XEXP (mem, 0)) == POST_DEC + || GET_CODE (XEXP (mem, 0)) == PRE_DEC) + return false; + + rtx_insn* op0_use = rx_find_use_of_reg (operands[0], curr_insn, + next_nonnote_nondebug_insn_bb); + if (op0_use == NULL + || !(GET_CODE (PATTERN (op0_use)) == SET + && RX_REG_P (XEXP (PATTERN (op0_use), 1)) + && reg_overlap_mentioned_p (operands[0], XEXP (PATTERN (op0_use), 1)) + && rtx_equal_p (mem, XEXP (PATTERN (op0_use), 0))) + || !rx_reg_dead_or_unused_after_insn (op0_use, REGNO (operands[0])) + || reg_set_between_p (operands[2], curr_insn, op0_use)) + return false; + + /* If the load-modify-store operation is fused it could potentially modify + load/store ordering if there are other memory accesses between the load + and the store for this insn. If there are volatile mems between the load + and store it's better not to change the ordering. If there is a call + between the load and store, it's also not safe to fuse it. */ + for (rtx_insn* i = next_nonnote_nondebug_insn_bb (op2_def.insn); + i != NULL && i != op0_use; + i = next_nonnote_nondebug_insn_bb (i)) + if (volatile_insn_p (PATTERN (i)) || CALL_P (i)) + return false; + + emit_insn (gen_insn (mem, operands[1])); + set_insn_deleted (op2_def.insn); + set_insn_deleted (op0_use); + return true; +} + /* Implement TARGET_HARD_REGNO_NREGS. */ static unsigned int diff --git a/gcc/config/rx/rx.md b/gcc/config/rx/rx.md index 3fb2ac854b0..116d4d4a939 100644 --- a/gcc/config/rx/rx.md +++ b/gcc/config/rx/rx.md @@ -1094,7 +1094,7 @@ DONE; }) -(define_insn "andsi3" +(define_insn_and_split "andsi3" [(set (match_operand:SI 0 "register_operand" "=r,r,r,r,r,r,r,r,r") (and:SI (match_operand:SI 1 "register_operand" "%0,0,0,0,0,0,r,r,0") (match_operand:SI 2 "rx_source_operand" "r,Uint04,Sint08,Sint16,Sint24,i,0,r,Q"))) @@ -1110,6 +1110,21 @@ and\t%1, %0 and\t%2, %1, %0 and\t%Q2, %0" + "&& RX_REG_P (operands[1]) && CONST_INT_P (operands[2]) + && pow2p_hwi (~UINTVAL (operands[2]))" + [(const_int 0)] +{ + /* For negated single bit constants use the bclr insn for smaller code. */ + + if (!rx_reg_dead_or_unused_after_insn (curr_insn, CC_REG)) + FAIL; + + rx_copy_reg_dead_or_unused_notes (operands[1], curr_insn, + emit_insn (gen_bitclr (operands[0], + GEN_INT (exact_log2 (~UINTVAL (operands[2]))), + operands[1]))); + DONE; +} [(set_attr "timings" "11,11,11,11,11,11,11,11,33") (set_attr "length" "2,2,3,4,5,6,2,5,5")] ) @@ -1383,7 +1398,7 @@ [(set_attr "length" "2,3")] ) -(define_insn "iorsi3" +(define_insn_and_split "iorsi3" [(set (match_operand:SI 0 "register_operand" "=r,r,r,r,r,r,r,r,r") (ior:SI (match_operand:SI 1 "register_operand" "%0,0,0,0,0,0,r,r,0") (match_operand:SI 2 "rx_source_operand" "r,Uint04,Sint08,Sint16,Sint24,i,0,r,Q"))) @@ -1399,6 +1414,21 @@ or\t%1, %0 or\t%2, %1, %0 or\t%Q2, %0" + "&& RX_REG_P (operands[1]) && CONST_INT_P (operands[2]) + && pow2p_hwi (UINTVAL (operands[2]))" + [(const_int 0)] +{ + /* For single bit constants use the bset insn for smaller code. */ + + if (!rx_reg_dead_or_unused_after_insn (curr_insn, CC_REG)) + FAIL; + + rx_copy_reg_dead_or_unused_notes (operands[1], curr_insn, + emit_insn (gen_bitset (operands[0], + GEN_INT (exact_log2 (UINTVAL (operands[2]))), + operands[1]))); + DONE; +} [(set_attr "timings" "11,11,11,11,11,11,11,11,33") (set_attr "length" "2,2,3,4,5,6,2,3,5")] ) @@ -1704,7 +1734,7 @@ DONE; }) -(define_insn "xorsi3" +(define_insn_and_split "xorsi3" [(set (match_operand:SI 0 "register_operand" "=r,r,r,r,r,r") (xor:SI (match_operand:SI 1 "register_operand" "%0,0,0,0,0,0") (match_operand:SI 2 "rx_source_operand" @@ -1712,6 +1742,21 @@ (clobber (reg:CC CC_REG))] "" "xor\t%Q2, %0" + "&& RX_REG_P (operands[1]) && CONST_INT_P (operands[2]) + && pow2p_hwi (UINTVAL (operands[2]))" + [(const_int 0)] +{ + /* For single bit constants use the bnot insn for smaller code. */ + + if (!rx_reg_dead_or_unused_after_insn (curr_insn, CC_REG)) + FAIL; + + rx_copy_reg_dead_or_unused_notes (operands[1], curr_insn, + emit_insn (gen_bitinvert (operands[0], + GEN_INT (exact_log2 (UINTVAL (operands[2]))), + operands[1]))); + DONE; +} [(set_attr "timings" "11,11,11,11,11,33") (set_attr "length" "3,4,5,6,7,6")] ) @@ -1960,50 +2005,63 @@ ;; Bit manipulation instructions. -;; ??? The *_in_memory patterns will not be matched without further help. -;; At one time we had the insv expander generate them, but I suspect that -;; in general we get better performance by exposing the register load to -;; the optimizers. -;; -;; An alternate solution would be to re-organize these patterns such -;; that allow both register and memory operands. This would allow the -;; register allocator to spill and not load the register operand. This -;; would be possible only for operations for which we have a constant -;; bit offset, so that we can adjust the address by ofs/8 and replace -;; the offset in the insn by ofs%8. - -(define_insn "*bitset" +;; The *_in_memory patterns will not be matched automatically, not even with +;; combiner bridge patterns. Especially when the memory operands have a +;; displacement, the resulting patterns look too complex. +;; Instead we manually look around the matched insn to see if there is a +;; preceeding memory load and a following memory store of the modified register +;; which can be fused into the single *_in_memory insn. +;; Do that before register allocation, as it can eliminate one temporary +;; register that needs to be allocated. + +(define_insn_and_split "bitset" [(set (match_operand:SI 0 "register_operand" "=r") (ior:SI (ashift:SI (const_int 1) (match_operand:SI 1 "rx_shift_operand" "ri")) (match_operand:SI 2 "register_operand" "0")))] "" "bset\t%1, %0" + "&& can_create_pseudo_p ()" + [(const_int 0)] +{ + if (rx_fuse_in_memory_bitop (operands, curr_insn, &gen_bitset_in_memory)) + DONE; + else + FAIL; +} [(set_attr "length" "3")] ) -(define_insn "*bitset_in_memory" +(define_insn "bitset_in_memory" [(set (match_operand:QI 0 "rx_restricted_mem_operand" "+Q") (ior:QI (ashift:QI (const_int 1) (match_operand:QI 1 "nonmemory_operand" "ri")) (match_dup 0)))] "" "bset\t%1, %0.B" - [(set_attr "length" "3") + [(set_attr "length" "5") (set_attr "timings" "33")] ) -(define_insn "*bitinvert" +(define_insn_and_split "bitinvert" [(set (match_operand:SI 0 "register_operand" "=r") (xor:SI (ashift:SI (const_int 1) (match_operand:SI 1 "rx_shift_operand" "ri")) (match_operand:SI 2 "register_operand" "0")))] "" "bnot\t%1, %0" + "&& can_create_pseudo_p ()" + [(const_int 0)] +{ + if (rx_fuse_in_memory_bitop (operands, curr_insn, &gen_bitinvert_in_memory)) + DONE; + else + FAIL; +} [(set_attr "length" "3")] ) -(define_insn "*bitinvert_in_memory" +(define_insn "bitinvert_in_memory" [(set (match_operand:QI 0 "rx_restricted_mem_operand" "+Q") (xor:QI (ashift:QI (const_int 1) (match_operand:QI 1 "nonmemory_operand" "ri")) @@ -2014,7 +2072,7 @@ (set_attr "timings" "33")] ) -(define_insn "*bitclr" +(define_insn_and_split "bitclr" [(set (match_operand:SI 0 "register_operand" "=r") (and:SI (not:SI (ashift:SI @@ -2023,10 +2081,18 @@ (match_operand:SI 2 "register_operand" "0")))] "" "bclr\t%1, %0" + "&& can_create_pseudo_p ()" + [(const_int 0)] +{ + if (rx_fuse_in_memory_bitop (operands, curr_insn, &gen_bitclr_in_memory)) + DONE; + else + FAIL; +} [(set_attr "length" "3")] ) -(define_insn "*bitclr_in_memory" +(define_insn "bitclr_in_memory" [(set (match_operand:QI 0 "rx_restricted_mem_operand" "+Q") (and:QI (not:QI (ashift:QI @@ -2035,7 +2101,7 @@ (match_dup 0)))] "" "bclr\t%1, %0.B" - [(set_attr "length" "3") + [(set_attr "length" "5") (set_attr "timings" "33")] ) diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index a8dc606ad35..776542aa5fa 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,8 @@ +2018-02-14 Oleg Endo + + PR target/83831 + * gcc.target/rx/pr83831.c: New tests. + 2018-02-14 Jozef Lawrynowicz PR target/79242 diff --git a/gcc/testsuite/gcc.target/rx/pr83831.c b/gcc/testsuite/gcc.target/rx/pr83831.c new file mode 100644 index 00000000000..1bd1e2b429b --- /dev/null +++ b/gcc/testsuite/gcc.target/rx/pr83831.c @@ -0,0 +1,77 @@ +/* { dg-do compile } */ +/* { dg-options "-O1" } */ +/* { dg-final { scan-assembler-times "bclr" 6 } } */ +/* { dg-final { scan-assembler-times "bset" 6 } } */ +/* { dg-final { scan-assembler-times "bnot" 6 } } */ + +void +test_0 (char* x, unsigned int y) +{ + /* Expect 4x bclr here. */ + x[0] &= 0xFE; + x[1] = y & ~(1 << 1); + x[2] &= 0xFE; + x[65000] &= 0xFE; +} + +unsigned int +test_1 (unsigned int x, unsigned int y) +{ + /* Expect 1x bclr here. */ + return x & ~(1 << y); +} + +unsigned int +test_2 (unsigned int x) +{ + /* Expect 1x bclr here. */ + return x & ~(1 << 1); +} + +void +test_3 (char* x, unsigned int y) +{ + /* Expect 4x bset here. */ + x[0] |= 0x10; + x[1] = y | (1 << 1); + x[2] |= 0x10; + x[65000] |= 0x10; +} + +unsigned int +test_4 (unsigned int x, unsigned int y) +{ + /* Expect 1x bset here. */ + return x | (1 << y); +} + +unsigned int +test_5 (unsigned int x) +{ + /* Expect 1x bset here. */ + return x | (1 << 8); +} + +void +test_6 (char* x, unsigned int y) +{ + /* Expect 4x bnot here. */ + x[0] ^= 0x10; + x[1] = y ^ (1 << 1); + x[2] ^= 0x10; + x[65000] ^= 0x10; +} + +unsigned int +test_7 (unsigned int x, unsigned int y) +{ + /* Expect 1x bnot here. */ + return x ^ (1 << y); +} + +unsigned int +test_8 (unsigned int x) +{ + /* Expect 1x bnot here. */ + return x ^ (1 << 8); +} -- 2.30.2