From: Kito Cheng Date: Sun, 3 Jun 2018 03:46:32 +0000 (+0000) Subject: [NDS32] Implement peephole2 patterns for tuning code size. X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=432e70af3e01dd71912bd69ee6abb2a307d8622b;p=gcc.git [NDS32] Implement peephole2 patterns for tuning code size. gcc/ * config/nds32/nds32-peephole2.md: Add new patterns for code size. From-SVN: r261124 --- diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 1e80862db4b..0dc3091bf1c 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,7 @@ +2018-06-03 Kito Cheng + + * config/nds32/nds32-peephole2.md: Add new patterns for code size. + 2018-06-03 Chung-Ju Wu * config/nds32/nds32-opts.h (nds32_arch_type): Add ARCH_V3J. diff --git a/gcc/config/nds32/nds32-peephole2.md b/gcc/config/nds32/nds32-peephole2.md index a5e77b1dcc7..033f62bae5a 100644 --- a/gcc/config/nds32/nds32-peephole2.md +++ b/gcc/config/nds32/nds32-peephole2.md @@ -22,3 +22,139 @@ ;; Use define_peephole2 to handle possible target-specific optimization. ;; ------------------------------------------------------------------------ +;; Try to utilize 16-bit instruction by swap operand if possible. +;; ------------------------------------------------------------------------ + +;; Try to make add as add45. +(define_peephole2 + [(set (match_operand:QIHISI 0 "register_operand" "") + (plus:QIHISI (match_operand:QIHISI 1 "register_operand" "") + (match_operand:QIHISI 2 "register_operand" "")))] + "reload_completed + && TARGET_16_BIT + && REGNO (operands[0]) == REGNO (operands[2]) + && REGNO (operands[0]) != REGNO (operands[1]) + && TEST_HARD_REG_BIT (reg_class_contents[MIDDLE_REGS], REGNO (operands[0]))" + [(set (match_dup 0) (plus:QIHISI (match_dup 2) (match_dup 1)))]) + +;; Try to make xor/ior/and/mult as xor33/ior33/and33/mult33. +(define_peephole2 + [(set (match_operand:SI 0 "register_operand" "") + (match_operator:SI 1 "nds32_have_33_inst_operator" + [(match_operand:SI 2 "register_operand" "") + (match_operand:SI 3 "register_operand" "")]))] + "reload_completed + && TARGET_16_BIT + && REGNO (operands[0]) == REGNO (operands[3]) + && REGNO (operands[0]) != REGNO (operands[2]) + && TEST_HARD_REG_BIT (reg_class_contents[LOW_REGS], REGNO (operands[0])) + && TEST_HARD_REG_BIT (reg_class_contents[LOW_REGS], REGNO (operands[2]))" + [(set (match_dup 0) (match_op_dup 1 [(match_dup 3) (match_dup 2)]))]) + +(define_peephole + [(set (match_operand:SI 0 "register_operand" "") + (match_operand:SI 1 "register_operand" "")) + (set (match_operand:SI 2 "register_operand" "") + (match_operand:SI 3 "register_operand" ""))] + "TARGET_16_BIT + && !TARGET_ISA_V2 + && NDS32_IS_GPR_REGNUM (REGNO (operands[0])) + && NDS32_IS_GPR_REGNUM (REGNO (operands[1])) + && ((REGNO (operands[0]) & 0x1) == 0) + && ((REGNO (operands[1]) & 0x1) == 0) + && (REGNO (operands[0]) + 1) == REGNO (operands[2]) + && (REGNO (operands[1]) + 1) == REGNO (operands[3])" + "movd44\t%0, %1" + [(set_attr "type" "alu") + (set_attr "length" "2")]) + +;; Merge two fcpyss to fcpysd. +(define_peephole2 + [(set (match_operand:SF 0 "float_even_register_operand" "") + (match_operand:SF 1 "float_even_register_operand" "")) + (set (match_operand:SF 2 "float_odd_register_operand" "") + (match_operand:SF 3 "float_odd_register_operand" ""))] + "(TARGET_FPU_SINGLE || TARGET_FPU_DOUBLE) + && REGNO (operands[0]) == REGNO (operands[2]) - 1 + && REGNO (operands[1]) == REGNO (operands[3]) - 1" + [(set (match_dup 4) (match_dup 5))] + { + operands[4] = gen_rtx_REG (DFmode, REGNO (operands[0])); + operands[5] = gen_rtx_REG (DFmode, REGNO (operands[1])); + }) + +(define_peephole2 + [(set (match_operand:SF 0 "float_odd_register_operand" "") + (match_operand:SF 1 "float_odd_register_operand" "")) + (set (match_operand:SF 2 "float_even_register_operand" "") + (match_operand:SF 3 "float_even_register_operand" ""))] + "(TARGET_FPU_SINGLE || TARGET_FPU_DOUBLE) + && REGNO (operands[2]) == REGNO (operands[0]) - 1 + && REGNO (operands[3]) == REGNO (operands[1]) - 1" + [(set (match_dup 4) (match_dup 5))] + { + operands[4] = gen_rtx_REG (DFmode, REGNO (operands[2])); + operands[5] = gen_rtx_REG (DFmode, REGNO (operands[3])); + }) + +;; ------------------------------------------------------------------------ +;; GCC will prefer [u]divmodsi3 rather than [u]divsi3 even remainder is +;; unused, so we use split to drop mod operation for lower register pressure. + +(define_split + [(set (match_operand:SI 0 "register_operand") + (div:SI (match_operand:SI 1 "register_operand") + (match_operand:SI 2 "register_operand"))) + (set (match_operand:SI 3 "register_operand") + (mod:SI (match_dup 1) (match_dup 2)))] + "find_regno_note (insn, REG_UNUSED, REGNO (operands[3])) != NULL + && can_create_pseudo_p ()" + [(set (match_dup 0) + (div:SI (match_dup 1) + (match_dup 2)))]) + +(define_split + [(set (match_operand:SI 0 "register_operand") + (udiv:SI (match_operand:SI 1 "register_operand") + (match_operand:SI 2 "register_operand"))) + (set (match_operand:SI 3 "register_operand") + (umod:SI (match_dup 1) (match_dup 2)))] + "find_regno_note (insn, REG_UNUSED, REGNO (operands[3])) != NULL + && can_create_pseudo_p ()" + [(set (match_dup 0) + (udiv:SI (match_dup 1) + (match_dup 2)))]) + +(define_peephole2 + [(set (match_operand:DI 0 "register_operand") + (mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand")) + (sign_extend:DI (match_operand:SI 2 "register_operand"))))] + "NDS32_EXT_DSP_P () + && peep2_regno_dead_p (1, WORDS_BIG_ENDIAN ? REGNO (operands[0]) + 1 : REGNO (operands[0]))" + [(const_int 1)] +{ + rtx highpart = nds32_di_high_part_subreg (operands[0]); + emit_insn (gen_smulsi3_highpart (highpart, operands[1], operands[2])); + DONE; +}) + +(define_split + [(set (match_operand:DI 0 "nds32_general_register_operand" "") + (match_operand:DI 1 "nds32_general_register_operand" ""))] + "find_regno_note (insn, REG_UNUSED, REGNO (operands[0])) != NULL + || find_regno_note (insn, REG_UNUSED, REGNO (operands[0]) + 1) != NULL" + [(set (match_dup 0) (match_dup 1))] +{ + rtx dead_note = find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0])); + HOST_WIDE_INT offset; + if (dead_note == NULL_RTX) + offset = 0; + else + offset = 4; + operands[0] = simplify_gen_subreg ( + SImode, operands[0], + DImode, offset); + operands[1] = simplify_gen_subreg ( + SImode, operands[1], + DImode, offset); +})