From: Claudiu Zissulescu Date: Tue, 13 Nov 2018 12:06:58 +0000 (+0100) Subject: [ARC] Add peephole rules to combine store/loads into double store/loads X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=8fa2c211f7852b1fe330423f4af096016bb3acbc;p=gcc.git [ARC] Add peephole rules to combine store/loads into double store/loads Simple peephole rules which combines multiple ld/st instructions into 64-bit load/store instructions. It only works for architectures which are having double load/store option on. gcc/ Claudiu Zissulescu * config/arc/arc-protos.h (gen_operands_ldd_std): Add. * config/arc/arc.c (operands_ok_ldd_std): New function. (mem_ok_for_ldd_std): Likewise. (gen_operands_ldd_std): Likewise. * config/arc/arc.md: Add peephole2 rules for std/ldd. From-SVN: r266064 --- diff --git a/gcc/ChangeLog b/gcc/ChangeLog index ce95921107b..cc2ac29b3be 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,11 @@ +2018-11-13 Claudiu Zissulescu + + * config/arc/arc-protos.h (gen_operands_ldd_std): Add. + * config/arc/arc.c (operands_ok_ldd_std): New function. + (mem_ok_for_ldd_std): Likewise. + (gen_operands_ldd_std): Likewise. + * config/arc/arc.md: Add peephole2 rules for std/ldd. + 2018-11-13 Eric Botcazou * toplev.c (output_stack_usage): Turn test on flag_stack_usage into diff --git a/gcc/config/arc/arc-protos.h b/gcc/config/arc/arc-protos.h index ce4b6f84749..55f8ed4c643 100644 --- a/gcc/config/arc/arc-protos.h +++ b/gcc/config/arc/arc-protos.h @@ -45,6 +45,8 @@ extern bool compact_memory_operand_p (rtx, machine_mode, bool, bool); extern int arc_return_address_register (unsigned int); extern unsigned int arc_compute_function_type (struct function *); extern bool arc_is_uncached_mem_p (rtx); +extern bool arc_lra_p (void); +extern bool gen_operands_ldd_std (rtx *operands, bool load, bool commute); #endif /* RTX_CODE */ extern unsigned int arc_compute_frame_size (int); diff --git a/gcc/config/arc/arc.c b/gcc/config/arc/arc.c index 277b546d3d3..41157d41c4d 100644 --- a/gcc/config/arc/arc.c +++ b/gcc/config/arc/arc.c @@ -10880,6 +10880,167 @@ arc_cannot_substitute_mem_equiv_p (rtx) return true; } +/* Checks whether the operands are valid for use in an LDD/STD + instruction. Assumes that RT, and RT2 are REG. This is guaranteed + by the patterns. Assumes that the address in the base register RN + is word aligned. Pattern guarantees that both memory accesses use + the same base register, the offsets are constants within the range, + and the gap between the offsets is 4. If reload complete then + check that registers are legal. */ + +static bool +operands_ok_ldd_std (rtx rt, rtx rt2, HOST_WIDE_INT offset) +{ + unsigned int t, t2; + + if (!reload_completed) + return true; + + if (!(SMALL_INT_RANGE (offset, (GET_MODE_SIZE (DImode) - 1) & (~0x03), + (offset & (GET_MODE_SIZE (DImode) - 1) & 3 + ? 0 : -(-GET_MODE_SIZE (DImode) | (~0x03)) >> 1)))) + return false; + + t = REGNO (rt); + t2 = REGNO (rt2); + + if ((t2 == PROGRAM_COUNTER_REGNO) + || (t % 2 != 0) /* First destination register is not even. */ + || (t2 != t + 1)) + return false; + + return true; +} + +/* Helper for gen_operands_ldd_std. Returns true iff the memory + operand MEM's address contains an immediate offset from the base + register and has no side effects, in which case it sets BASE and + OFFSET accordingly. */ + +static bool +mem_ok_for_ldd_std (rtx mem, rtx *base, rtx *offset) +{ + rtx addr; + + gcc_assert (base != NULL && offset != NULL); + + /* TODO: Handle more general memory operand patterns, such as + PRE_DEC and PRE_INC. */ + + if (side_effects_p (mem)) + return false; + + /* Can't deal with subregs. */ + if (GET_CODE (mem) == SUBREG) + return false; + + gcc_assert (MEM_P (mem)); + + *offset = const0_rtx; + + addr = XEXP (mem, 0); + + /* If addr isn't valid for DImode, then we can't handle it. */ + if (!arc_legitimate_address_p (DImode, addr, + reload_in_progress || reload_completed)) + return false; + + if (REG_P (addr)) + { + *base = addr; + return true; + } + else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == MINUS) + { + *base = XEXP (addr, 0); + *offset = XEXP (addr, 1); + return (REG_P (*base) && CONST_INT_P (*offset)); + } + + return false; +} + +/* Called from peephole2 to replace two word-size accesses with a + single LDD/STD instruction. Returns true iff we can generate a new + instruction sequence. That is, both accesses use the same base + register and the gap between constant offsets is 4. OPERANDS are + the operands found by the peephole matcher; OPERANDS[0,1] are + register operands, and OPERANDS[2,3] are the corresponding memory + operands. LOAD indicates whether the access is load or store. */ + +bool +gen_operands_ldd_std (rtx *operands, bool load, bool commute) +{ + int i, gap; + HOST_WIDE_INT offsets[2], offset; + int nops = 2; + rtx cur_base, cur_offset, tmp; + rtx base = NULL_RTX; + + /* Check that the memory references are immediate offsets from the + same base register. Extract the base register, the destination + registers, and the corresponding memory offsets. */ + for (i = 0; i < nops; i++) + { + if (!mem_ok_for_ldd_std (operands[nops+i], &cur_base, &cur_offset)) + return false; + + if (i == 0) + base = cur_base; + else if (REGNO (base) != REGNO (cur_base)) + return false; + + offsets[i] = INTVAL (cur_offset); + if (GET_CODE (operands[i]) == SUBREG) + { + tmp = SUBREG_REG (operands[i]); + gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp)); + operands[i] = tmp; + } + } + + /* Make sure there is no dependency between the individual loads. */ + if (load && REGNO (operands[0]) == REGNO (base)) + return false; /* RAW. */ + + if (load && REGNO (operands[0]) == REGNO (operands[1])) + return false; /* WAW. */ + + /* Make sure the instructions are ordered with lower memory access first. */ + if (offsets[0] > offsets[1]) + { + gap = offsets[0] - offsets[1]; + offset = offsets[1]; + + /* Swap the instructions such that lower memory is accessed first. */ + std::swap (operands[0], operands[1]); + std::swap (operands[2], operands[3]); + } + else + { + gap = offsets[1] - offsets[0]; + offset = offsets[0]; + } + + /* Make sure accesses are to consecutive memory locations. */ + if (gap != 4) + return false; + + /* Make sure we generate legal instructions. */ + if (operands_ok_ldd_std (operands[0], operands[1], offset)) + return true; + + if (load && commute) + { + /* Try reordering registers. */ + std::swap (operands[0], operands[1]); + if (operands_ok_ldd_std (operands[0], operands[1], offset)) + return true; + } + + return false; +} + #undef TARGET_USE_ANCHORS_FOR_SYMBOL_P #define TARGET_USE_ANCHORS_FOR_SYMBOL_P arc_use_anchors_for_symbol_p diff --git a/gcc/config/arc/arc.md b/gcc/config/arc/arc.md index 2c9de8d4fd6..24fcf6e5108 100644 --- a/gcc/config/arc/arc.md +++ b/gcc/config/arc/arc.md @@ -6363,6 +6363,75 @@ core_3, archs4x, archs4xd, archs4xd_slow" [(set (reg:CC CC_REG) (compare:CC (match_dup 3) (ashift:SI (match_dup 1) (match_dup 2))))]) +(define_peephole2 ; std + [(set (match_operand:SI 2 "memory_operand" "") + (match_operand:SI 0 "register_operand" "")) + (set (match_operand:SI 3 "memory_operand" "") + (match_operand:SI 1 "register_operand" ""))] + "TARGET_LL64" + [(const_int 0)] +{ + if (!gen_operands_ldd_std (operands, false, false)) + FAIL; + operands[0] = gen_rtx_REG (DImode, REGNO (operands[0])); + operands[2] = adjust_address (operands[2], DImode, 0); + emit_insn (gen_rtx_SET (operands[2], operands[0])); + DONE; +}) + +(define_peephole2 ; ldd + [(set (match_operand:SI 0 "register_operand" "") + (match_operand:SI 2 "memory_operand" "")) + (set (match_operand:SI 1 "register_operand" "") + (match_operand:SI 3 "memory_operand" ""))] + "TARGET_LL64" + [(const_int 0)] +{ + if (!gen_operands_ldd_std (operands, true, false)) + FAIL; + operands[0] = gen_rtx_REG (DImode, REGNO (operands[0])); + operands[2] = adjust_address (operands[2], DImode, 0); + emit_insn (gen_rtx_SET (operands[0], operands[2])); + DONE; +}) + +;; We require consecutive registers for LDD instruction. Check if we +;; can reorder them and use an LDD. + +(define_peephole2 ; swap the destination registers of two loads + ; before a commutative operation. + [(set (match_operand:SI 0 "register_operand" "") + (match_operand:SI 2 "memory_operand" "")) + (set (match_operand:SI 1 "register_operand" "") + (match_operand:SI 3 "memory_operand" "")) + (set (match_operand:SI 4 "register_operand" "") + (match_operator:SI 5 "commutative_operator" + [(match_operand 6 "register_operand" "") + (match_operand 7 "register_operand" "") ]))] + "TARGET_LL64 + && (((rtx_equal_p (operands[0], operands[6])) + && (rtx_equal_p (operands[1], operands[7]))) + || ((rtx_equal_p (operands[0], operands[7])) + && (rtx_equal_p (operands[1], operands[6])))) + && (peep2_reg_dead_p (3, operands[0]) + || rtx_equal_p (operands[0], operands[4])) + && (peep2_reg_dead_p (3, operands[1]) + || rtx_equal_p (operands[1], operands[4]))" + [(set (match_dup 0) (match_dup 2)) + (set (match_dup 4) (match_op_dup 5 [(match_dup 6) (match_dup 7)]))] + { + if (!gen_operands_ldd_std (operands, true, true)) + { + FAIL; + } + else + { + operands[0] = gen_rtx_REG (DImode, REGNO (operands[0])); + operands[2] = adjust_address (operands[2], DImode, 0); + } + } +) + ;; include the arc-FPX instructions (include "fpx.md")