[NDS32] Implement peephole2 patterns for tuning code size.
authorKito Cheng <kito.cheng@gmail.com>
Sun, 3 Jun 2018 03:46:32 +0000 (03:46 +0000)
committerChung-Ju Wu <jasonwucj@gcc.gnu.org>
Sun, 3 Jun 2018 03:46:32 +0000 (03:46 +0000)
gcc/
* config/nds32/nds32-peephole2.md: Add new patterns for code size.

From-SVN: r261124

gcc/ChangeLog
gcc/config/nds32/nds32-peephole2.md

index 1e80862db4b29f8a99531edadb92b0d89d91b43e..0dc3091bf1c66c15d6b1a0983418b553fa91d049 100644 (file)
@@ -1,3 +1,7 @@
+2018-06-03  Kito Cheng  <kito.cheng@gmail.com>
+
+       * config/nds32/nds32-peephole2.md: Add new patterns for code size.
+
 2018-06-03  Chung-Ju Wu  <jasonwucj@gmail.com>
 
        * config/nds32/nds32-opts.h (nds32_arch_type): Add ARCH_V3J.
index a5e77b1dcc7050e609c7ea601db769f1b2624528..033f62bae5ad85461deb74a2887182b00d0f828c 100644 (file)
 ;; Use define_peephole2 to handle possible target-specific optimization.
 
 ;; ------------------------------------------------------------------------
+;; Try to utilize 16-bit instruction by swap operand if possible.
+;; ------------------------------------------------------------------------
+
+;; Try to make add as add45.
+(define_peephole2
+  [(set (match_operand:QIHISI 0 "register_operand"              "")
+       (plus:QIHISI (match_operand:QIHISI 1 "register_operand" "")
+                    (match_operand:QIHISI 2 "register_operand" "")))]
+  "reload_completed
+   && TARGET_16_BIT
+   && REGNO (operands[0]) == REGNO (operands[2])
+   && REGNO (operands[0]) != REGNO (operands[1])
+   && TEST_HARD_REG_BIT (reg_class_contents[MIDDLE_REGS], REGNO (operands[0]))"
+  [(set (match_dup 0) (plus:QIHISI (match_dup 2) (match_dup 1)))])
+
+;; Try to make xor/ior/and/mult as xor33/ior33/and33/mult33.
+(define_peephole2
+  [(set (match_operand:SI 0 "register_operand"    "")
+       (match_operator:SI 1 "nds32_have_33_inst_operator"
+         [(match_operand:SI 2 "register_operand" "")
+          (match_operand:SI 3 "register_operand" "")]))]
+  "reload_completed
+   && TARGET_16_BIT
+   && REGNO (operands[0]) == REGNO (operands[3])
+   && REGNO (operands[0]) != REGNO (operands[2])
+   && TEST_HARD_REG_BIT (reg_class_contents[LOW_REGS], REGNO (operands[0]))
+   && TEST_HARD_REG_BIT (reg_class_contents[LOW_REGS], REGNO (operands[2]))"
+  [(set (match_dup 0) (match_op_dup 1 [(match_dup 3) (match_dup 2)]))])
+
+(define_peephole
+  [(set (match_operand:SI 0 "register_operand" "")
+       (match_operand:SI 1 "register_operand" ""))
+   (set (match_operand:SI 2 "register_operand" "")
+       (match_operand:SI 3 "register_operand" ""))]
+  "TARGET_16_BIT
+   && !TARGET_ISA_V2
+   && NDS32_IS_GPR_REGNUM (REGNO (operands[0]))
+   && NDS32_IS_GPR_REGNUM (REGNO (operands[1]))
+   && ((REGNO (operands[0]) & 0x1) == 0)
+   && ((REGNO (operands[1]) & 0x1) == 0)
+   && (REGNO (operands[0]) + 1) == REGNO (operands[2])
+   && (REGNO (operands[1]) + 1) == REGNO (operands[3])"
+  "movd44\t%0, %1"
+  [(set_attr "type"   "alu")
+   (set_attr "length" "2")])
+
+;; Merge two fcpyss to fcpysd.
+(define_peephole2
+  [(set (match_operand:SF 0 "float_even_register_operand" "")
+       (match_operand:SF 1 "float_even_register_operand" ""))
+   (set (match_operand:SF 2 "float_odd_register_operand"  "")
+       (match_operand:SF 3 "float_odd_register_operand"  ""))]
+  "(TARGET_FPU_SINGLE || TARGET_FPU_DOUBLE)
+   && REGNO (operands[0]) == REGNO (operands[2]) - 1
+   && REGNO (operands[1]) == REGNO (operands[3]) - 1"
+  [(set (match_dup 4) (match_dup 5))]
+  {
+    operands[4] = gen_rtx_REG (DFmode, REGNO (operands[0]));
+    operands[5] = gen_rtx_REG (DFmode, REGNO (operands[1]));
+  })
+
+(define_peephole2
+  [(set (match_operand:SF 0 "float_odd_register_operand"  "")
+       (match_operand:SF 1 "float_odd_register_operand"  ""))
+   (set (match_operand:SF 2 "float_even_register_operand" "")
+       (match_operand:SF 3 "float_even_register_operand" ""))]
+  "(TARGET_FPU_SINGLE || TARGET_FPU_DOUBLE)
+   && REGNO (operands[2]) == REGNO (operands[0]) - 1
+   && REGNO (operands[3]) == REGNO (operands[1]) - 1"
+  [(set (match_dup 4) (match_dup 5))]
+  {
+    operands[4] = gen_rtx_REG (DFmode, REGNO (operands[2]));
+    operands[5] = gen_rtx_REG (DFmode, REGNO (operands[3]));
+  })
+
+;; ------------------------------------------------------------------------
+;; GCC will prefer [u]divmodsi3 rather than [u]divsi3 even remainder is
+;; unused, so we use split to drop mod operation for lower register pressure.
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand")
+       (div:SI (match_operand:SI 1 "register_operand")
+               (match_operand:SI 2 "register_operand")))
+   (set (match_operand:SI 3 "register_operand")
+       (mod:SI (match_dup 1) (match_dup 2)))]
+  "find_regno_note (insn, REG_UNUSED, REGNO (operands[3])) != NULL
+   && can_create_pseudo_p ()"
+  [(set (match_dup 0)
+       (div:SI (match_dup 1)
+               (match_dup 2)))])
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand")
+       (udiv:SI (match_operand:SI 1 "register_operand")
+                (match_operand:SI 2 "register_operand")))
+   (set (match_operand:SI 3 "register_operand")
+       (umod:SI (match_dup 1) (match_dup 2)))]
+  "find_regno_note (insn, REG_UNUSED, REGNO (operands[3])) != NULL
+   && can_create_pseudo_p ()"
+  [(set (match_dup 0)
+       (udiv:SI (match_dup 1)
+                (match_dup 2)))])
+
+(define_peephole2
+  [(set (match_operand:DI 0 "register_operand")
+       (mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand"))
+                (sign_extend:DI (match_operand:SI 2 "register_operand"))))]
+  "NDS32_EXT_DSP_P ()
+   && peep2_regno_dead_p (1, WORDS_BIG_ENDIAN ? REGNO (operands[0]) + 1 : REGNO (operands[0]))"
+  [(const_int 1)]
+{
+  rtx highpart = nds32_di_high_part_subreg (operands[0]);
+  emit_insn (gen_smulsi3_highpart (highpart, operands[1], operands[2]));
+  DONE;
+})
+
+(define_split
+  [(set (match_operand:DI 0 "nds32_general_register_operand" "")
+       (match_operand:DI 1 "nds32_general_register_operand" ""))]
+  "find_regno_note (insn, REG_UNUSED, REGNO (operands[0])) != NULL
+   || find_regno_note (insn, REG_UNUSED, REGNO (operands[0]) + 1) != NULL"
+  [(set (match_dup 0) (match_dup 1))]
+{
+  rtx dead_note = find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
+  HOST_WIDE_INT offset;
+  if (dead_note == NULL_RTX)
+    offset = 0;
+  else
+    offset = 4;
+  operands[0] = simplify_gen_subreg (
+                 SImode, operands[0],
+                 DImode, offset);
+  operands[1] = simplify_gen_subreg (
+                 SImode, operands[1],
+                 DImode, offset);
+})