From: Craig Blackmore Date: Tue, 12 May 2020 21:41:08 +0000 (-0700) Subject: RISC-V: Add shorten_memrefs pass. X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=de6320a81695800de0f0f5fc3e4c6487a52cd430;p=gcc.git RISC-V: Add shorten_memrefs pass. gcc/ * config.gcc: Add riscv-shorten-memrefs.o to extra_objs for riscv. * config/riscv/riscv-passes.def: New file. * config/riscv/riscv-protos.h (make_pass_shorten_memrefs): Declare. * config/riscv/riscv-shorten-memrefs.c: New file. * config/riscv/riscv.c (tree-pass.h): New include. (riscv_compressed_reg_p): New Function (riscv_compressed_lw_offset_p): Likewise. (riscv_compressed_lw_address_p): Likewise. (riscv_shorten_lw_offset): Likewise. (riscv_legitimize_address): Attempt to convert base + large_offset to compressible new_base + small_offset. (riscv_address_cost): Make anticipated compressed load/stores cheaper for code size than uncompressed load/stores. (riscv_register_priority): Move compressed register check to riscv_compressed_reg_p. * config/riscv/riscv.h (C_S_BITS): Define. (CSW_MAX_OFFSET): Define. * config/riscv/riscv.opt (mshorten-memefs): New option. * config/riscv/t-riscv (riscv-shorten-memrefs.o): New rule. (PASSES_EXTRA): Add riscv-passes.def. * doc/invoke.texi: Document -mshorten-memrefs. * config/riscv/riscv.c (riscv_new_address_profitable_p): New function. (TARGET_NEW_ADDRESS_PROFITABLE_P): Define. * doc/tm.texi: Regenerate. * doc/tm.texi.in (TARGET_NEW_ADDRESS_PROFITABLE_P): New hook. * sched-deps.c (attempt_change): Use old address if it is cheaper than new address. * target.def (new_address_profitable_p): New hook. * targhooks.c (default_new_address_profitable_p): New function. * targhooks.h (default_new_address_profitable_p): Declare. gcc/testsuite/ * gcc.target/riscv/shorten-memrefs-1.c: New test. * gcc.target/riscv/shorten-memrefs-2.c: New test. * gcc.target/riscv/shorten-memrefs-3.c: New test. * gcc.target/riscv/shorten-memrefs-4.c: New test. * gcc.target/riscv/shorten-memrefs-5.c: New test. * gcc.target/riscv/shorten-memrefs-6.c: New test. * gcc.target/riscv/shorten-memrefs-7.c: New test. --- diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 6e4d3df3768..a360b5a9757 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,37 @@ +2020-05-12 Craig Blackmore + + * config.gcc: Add riscv-shorten-memrefs.o to extra_objs for riscv. + * config/riscv/riscv-passes.def: New file. + * config/riscv/riscv-protos.h (make_pass_shorten_memrefs): Declare. + * config/riscv/riscv-shorten-memrefs.c: New file. + * config/riscv/riscv.c (tree-pass.h): New include. + (riscv_compressed_reg_p): New Function + (riscv_compressed_lw_offset_p): Likewise. + (riscv_compressed_lw_address_p): Likewise. + (riscv_shorten_lw_offset): Likewise. + (riscv_legitimize_address): Attempt to convert base + large_offset + to compressible new_base + small_offset. + (riscv_address_cost): Make anticipated compressed load/stores + cheaper for code size than uncompressed load/stores. + (riscv_register_priority): Move compressed register check to + riscv_compressed_reg_p. + * config/riscv/riscv.h (C_S_BITS): Define. + (CSW_MAX_OFFSET): Define. + * config/riscv/riscv.opt (mshorten-memefs): New option. + * config/riscv/t-riscv (riscv-shorten-memrefs.o): New rule. + (PASSES_EXTRA): Add riscv-passes.def. + * doc/invoke.texi: Document -mshorten-memrefs. + + * config/riscv/riscv.c (riscv_new_address_profitable_p): New function. + (TARGET_NEW_ADDRESS_PROFITABLE_P): Define. + * doc/tm.texi: Regenerate. + * doc/tm.texi.in (TARGET_NEW_ADDRESS_PROFITABLE_P): New hook. + * sched-deps.c (attempt_change): Use old address if it is cheaper than + new address. + * target.def (new_address_profitable_p): New hook. + * targhooks.c (default_new_address_profitable_p): New function. + * targhooks.h (default_new_address_profitable_p): Declare. + 2020-05-12 Uroš Bizjak PR target/95046 diff --git a/gcc/config.gcc b/gcc/config.gcc index b7f16304712..f544932fc39 100644 --- a/gcc/config.gcc +++ b/gcc/config.gcc @@ -526,7 +526,7 @@ pru-*-*) ;; riscv*) cpu_type=riscv - extra_objs="riscv-builtins.o riscv-c.o riscv-sr.o" + extra_objs="riscv-builtins.o riscv-c.o riscv-sr.o riscv-shorten-memrefs.o" d_target_objs="riscv-d.o" ;; rs6000*-*-*) diff --git a/gcc/config/riscv/riscv-passes.def b/gcc/config/riscv/riscv-passes.def new file mode 100644 index 00000000000..8a4ea0918db --- /dev/null +++ b/gcc/config/riscv/riscv-passes.def @@ -0,0 +1,20 @@ +/* Declaration of target-specific passes for RISC-V. + Copyright (C) 2019 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + . */ + +INSERT_PASS_AFTER (pass_rtl_store_motion, 1, pass_shorten_memrefs); diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h index 8cf9137b5e7..72280ec1c76 100644 --- a/gcc/config/riscv/riscv-protos.h +++ b/gcc/config/riscv/riscv-protos.h @@ -91,4 +91,6 @@ extern std::string riscv_arch_str (); extern bool riscv_hard_regno_rename_ok (unsigned, unsigned); +rtl_opt_pass * make_pass_shorten_memrefs (gcc::context *ctxt); + #endif /* ! GCC_RISCV_PROTOS_H */ diff --git a/gcc/config/riscv/riscv-shorten-memrefs.c b/gcc/config/riscv/riscv-shorten-memrefs.c new file mode 100644 index 00000000000..3686005fe2e --- /dev/null +++ b/gcc/config/riscv/riscv-shorten-memrefs.c @@ -0,0 +1,200 @@ +/* Shorten memrefs pass for RISC-V. + Copyright (C) 2018-2019 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3, or (at your option) +any later version. + +GCC is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +#define IN_TARGET_CODE 1 + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" +#include "rtl.h" +#include "backend.h" +#include "regs.h" +#include "target.h" +#include "memmodel.h" +#include "emit-rtl.h" +#include "df.h" +#include "predict.h" +#include "tree-pass.h" + +/* Try to make more use of compressed load and store instructions by replacing + a load/store at address BASE + LARGE_OFFSET with a new load/store at address + NEW BASE + SMALL OFFSET. If NEW BASE is stored in a compressed register, the + load/store can be compressed. Since creating NEW BASE incurs an overhead, + the change is only attempted when BASE is referenced by at least four + load/stores in the same basic block. */ + +namespace { + +const pass_data pass_data_shorten_memrefs = +{ + RTL_PASS, /* type */ + "shorten_memrefs", /* name */ + OPTGROUP_NONE, /* optinfo_flags */ + TV_NONE, /* tv_id */ + 0, /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + 0, /* todo_flags_finish */ +}; + +class pass_shorten_memrefs : public rtl_opt_pass +{ +public: + pass_shorten_memrefs (gcc::context *ctxt) + : rtl_opt_pass (pass_data_shorten_memrefs, ctxt) + {} + + /* opt_pass methods: */ + virtual bool gate (function *) + { + return TARGET_RVC && riscv_mshorten_memrefs && optimize > 0; + } + virtual unsigned int execute (function *); + +private: + typedef int_hash regno_hash; + typedef hash_map regno_map; + + regno_map * analyze (basic_block bb); + void transform (regno_map *m, basic_block bb); + bool get_si_mem_base_reg (rtx mem, rtx *addr); +}; // class pass_shorten_memrefs + +bool +pass_shorten_memrefs::get_si_mem_base_reg (rtx mem, rtx *addr) +{ + if (!MEM_P (mem) || GET_MODE (mem) != SImode) + return false; + *addr = XEXP (mem, 0); + return GET_CODE (*addr) == PLUS && REG_P (XEXP (*addr, 0)); +} + +/* Count how many times each regno is referenced as base address for a memory + access. */ + +pass_shorten_memrefs::regno_map * +pass_shorten_memrefs::analyze (basic_block bb) +{ + regno_map *m = hash_map::create_ggc (10); + rtx_insn *insn; + + regstat_init_n_sets_and_refs (); + + FOR_BB_INSNS (bb, insn) + { + if (!NONJUMP_INSN_P (insn)) + continue; + rtx pat = PATTERN (insn); + if (GET_CODE (pat) != SET) + continue; + /* Analyze stores first then loads. */ + for (int i = 0; i < 2; i++) + { + rtx mem = XEXP (pat, i); + rtx addr; + if (get_si_mem_base_reg (mem, &addr)) + { + HOST_WIDE_INT regno = REGNO (XEXP (addr, 0)); + /* Do not count store zero as these cannot be compressed. */ + if (i == 0) + { + if (XEXP (pat, 1) == CONST0_RTX (GET_MODE (XEXP (pat, 1)))) + continue; + } + if (REG_N_REFS (regno) < 4) + continue; + m->get_or_insert (regno)++; + } + } + } + regstat_free_n_sets_and_refs (); + + return m; +} + +/* Convert BASE + LARGE_OFFSET to NEW_BASE + SMALL_OFFSET for each load/store + with a base reg referenced at least 4 times. */ + +void +pass_shorten_memrefs::transform (regno_map *m, basic_block bb) +{ + rtx_insn *insn; + FOR_BB_INSNS (bb, insn) + { + if (!NONJUMP_INSN_P (insn)) + continue; + rtx pat = PATTERN (insn); + if (GET_CODE (pat) != SET) + continue; + start_sequence (); + /* Transform stores first then loads. */ + for (int i = 0; i < 2; i++) + { + rtx mem = XEXP (pat, i); + rtx addr; + if (get_si_mem_base_reg (mem, &addr)) + { + HOST_WIDE_INT regno = REGNO (XEXP (addr, 0)); + /* Do not transform store zero as these cannot be compressed. */ + if (i == 0) + { + if (XEXP (pat, 1) == CONST0_RTX (GET_MODE (XEXP (pat, 1)))) + continue; + } + if (m->get_or_insert (regno) > 3) + { + addr + = targetm.legitimize_address (addr, addr, GET_MODE (mem)); + XEXP (pat, i) = replace_equiv_address (mem, addr); + df_insn_rescan (insn); + } + } + } + rtx_insn *seq = get_insns (); + end_sequence (); + emit_insn_before (seq, insn); + } +} + +unsigned int +pass_shorten_memrefs::execute (function *fn) +{ + basic_block bb; + + FOR_ALL_BB_FN (bb, fn) + { + regno_map *m; + if (optimize_bb_for_speed_p (bb)) + continue; + m = analyze (bb); + transform (m, bb); + } + + return 0; +} + +} // anon namespace + +rtl_opt_pass * +make_pass_shorten_memrefs (gcc::context *ctxt) +{ + return new pass_shorten_memrefs (ctxt); +} diff --git a/gcc/config/riscv/riscv.c b/gcc/config/riscv/riscv.c index 94b5ac01762..e4c08d780db 100644 --- a/gcc/config/riscv/riscv.c +++ b/gcc/config/riscv/riscv.c @@ -55,6 +55,7 @@ along with GCC; see the file COPYING3. If not see #include "diagnostic.h" #include "builtins.h" #include "predict.h" +#include "tree-pass.h" /* True if X is an UNSPEC wrapper around a SYMBOL_REF or LABEL_REF. */ #define UNSPEC_ADDRESS_P(X) \ @@ -848,6 +849,52 @@ riscv_legitimate_address_p (machine_mode mode, rtx x, bool strict_p) return riscv_classify_address (&addr, x, mode, strict_p); } +/* Return true if hard reg REGNO can be used in compressed instructions. */ + +static bool +riscv_compressed_reg_p (int regno) +{ + /* x8-x15/f8-f15 are compressible registers. */ + return (TARGET_RVC && (IN_RANGE (regno, GP_REG_FIRST + 8, GP_REG_FIRST + 15) + || IN_RANGE (regno, FP_REG_FIRST + 8, FP_REG_FIRST + 15))); +} + +/* Return true if x is an unsigned 5-bit immediate scaled by 4. */ + +static bool +riscv_compressed_lw_offset_p (rtx x) +{ + return (CONST_INT_P (x) + && (INTVAL (x) & 3) == 0 + && IN_RANGE (INTVAL (x), 0, CSW_MAX_OFFSET)); +} + +/* Return true if load/store from/to address x can be compressed. */ + +static bool +riscv_compressed_lw_address_p (rtx x) +{ + struct riscv_address_info addr; + bool result = riscv_classify_address (&addr, x, GET_MODE (x), + reload_completed); + + /* Before reload, assuming all load/stores of valid addresses get compressed + gives better code size than checking if the address is reg + small_offset + early on. */ + if (result && !reload_completed) + return true; + + /* Return false if address is not compressed_reg + small_offset. */ + if (!result + || addr.type != ADDRESS_REG + || (!riscv_compressed_reg_p (REGNO (addr.reg)) + && addr.reg != stack_pointer_rtx) + || !riscv_compressed_lw_offset_p (addr.offset)) + return false; + + return result; +} + /* Return the number of instructions needed to load or store a value of mode MODE at address X. Return 0 if X isn't valid for MODE. Assume that multiword moves may need to be split into word moves @@ -1308,6 +1355,24 @@ riscv_force_address (rtx x, machine_mode mode) return x; } +/* Modify base + offset so that offset fits within a compressed load/store insn + and the excess is added to base. */ + +static rtx +riscv_shorten_lw_offset (rtx base, HOST_WIDE_INT offset) +{ + rtx addr, high; + /* Leave OFFSET as an unsigned 5-bit offset scaled by 4 and put the excess + into HIGH. */ + high = GEN_INT (offset & ~CSW_MAX_OFFSET); + offset &= CSW_MAX_OFFSET; + if (!SMALL_OPERAND (INTVAL (high))) + high = force_reg (Pmode, high); + base = force_reg (Pmode, gen_rtx_PLUS (Pmode, high, base)); + addr = plus_constant (Pmode, base, offset); + return addr; +} + /* This function is used to implement LEGITIMIZE_ADDRESS. If X can be legitimized in a way that the generic machinery might not expect, return a new address, otherwise return NULL. MODE is the mode of @@ -1326,7 +1391,7 @@ riscv_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, if (riscv_split_symbol (NULL, x, mode, &addr, FALSE)) return riscv_force_address (addr, mode); - /* Handle BASE + OFFSET using riscv_add_offset. */ + /* Handle BASE + OFFSET. */ if (GET_CODE (x) == PLUS && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) != 0) { @@ -1335,7 +1400,14 @@ riscv_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, if (!riscv_valid_base_register_p (base, mode, false)) base = copy_to_mode_reg (Pmode, base); - addr = riscv_add_offset (NULL, base, offset); + if (optimize_function_for_size_p (cfun) + && (strcmp (current_pass->name, "shorten_memrefs") == 0) + && mode == SImode) + /* Convert BASE + LARGE_OFFSET into NEW_BASE + SMALL_OFFSET to allow + possible compressed load/store. */ + addr = riscv_shorten_lw_offset (base, offset); + else + addr = riscv_add_offset (NULL, base, offset); return riscv_force_address (addr, mode); } @@ -1833,6 +1905,11 @@ riscv_address_cost (rtx addr, machine_mode mode, addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED) { + /* When optimizing for size, make uncompressible 32-bit addresses more + * expensive so that compressible 32-bit addresses are preferred. */ + if (TARGET_RVC && !speed && riscv_mshorten_memrefs && mode == SImode + && !riscv_compressed_lw_address_p (addr)) + return riscv_address_insns (addr, mode, false) + 1; return riscv_address_insns (addr, mode, false); } @@ -4666,6 +4743,7 @@ riscv_option_override (void) error ("%<-mriscv-attribute%> RISC-V ELF attribute requires GNU as 2.32" " [%<-mriscv-attribute%>]"); #endif + } /* Implement TARGET_CONDITIONAL_REGISTER_USAGE. */ @@ -4705,9 +4783,9 @@ riscv_conditional_register_usage (void) static int riscv_register_priority (int regno) { - /* Favor x8-x15/f8-f15 to improve the odds of RVC instruction selection. */ - if (TARGET_RVC && (IN_RANGE (regno, GP_REG_FIRST + 8, GP_REG_FIRST + 15) - || IN_RANGE (regno, FP_REG_FIRST + 8, FP_REG_FIRST + 15))) + /* Favor compressed registers to improve the odds of RVC instruction + selection. */ + if (riscv_compressed_reg_p (regno)) return 1; return 0; @@ -5049,6 +5127,19 @@ riscv_hard_regno_rename_ok (unsigned from_regno ATTRIBUTE_UNUSED, return !cfun->machine->interrupt_handler_p || df_regs_ever_live_p (to_regno); } +/* Implement TARGET_NEW_ADDRESS_PROFITABLE_P. */ + +bool +riscv_new_address_profitable_p (rtx memref, rtx_insn *insn, rtx new_addr) +{ + /* Prefer old address if it is less expensive. */ + addr_space_t as = MEM_ADDR_SPACE (memref); + bool speed = optimize_bb_for_speed_p (BLOCK_FOR_INSN (insn)); + int old_cost = address_cost (XEXP (memref, 0), GET_MODE (memref), as, speed); + int new_cost = address_cost (new_addr, GET_MODE (memref), as, speed); + return new_cost <= old_cost; +} + /* Initialize the GCC target structure. */ #undef TARGET_ASM_ALIGNED_HI_OP #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t" @@ -5226,6 +5317,9 @@ riscv_hard_regno_rename_ok (unsigned from_regno ATTRIBUTE_UNUSED, #undef TARGET_MACHINE_DEPENDENT_REORG #define TARGET_MACHINE_DEPENDENT_REORG riscv_reorg +#undef TARGET_NEW_ADDRESS_PROFITABLE_P +#define TARGET_NEW_ADDRESS_PROFITABLE_P riscv_new_address_profitable_p + struct gcc_target targetm = TARGET_INITIALIZER; #include "gt-riscv.h" diff --git a/gcc/config/riscv/riscv.h b/gcc/config/riscv/riscv.h index 567c23380fe..e6209ede9d6 100644 --- a/gcc/config/riscv/riscv.h +++ b/gcc/config/riscv/riscv.h @@ -920,6 +920,7 @@ extern unsigned riscv_stack_boundary; #define SHIFT_RS1 15 #define SHIFT_IMM 20 #define IMM_BITS 12 +#define C_S_BITS 5 #define C_SxSP_BITS 6 #define IMM_REACH (1LL << IMM_BITS) @@ -929,6 +930,10 @@ extern unsigned riscv_stack_boundary; #define SWSP_REACH (4LL << C_SxSP_BITS) #define SDSP_REACH (8LL << C_SxSP_BITS) +/* This is the maximum value that can be represented in a compressed load/store + offset (an unsigned 5-bit value scaled by 4). */ +#define CSW_MAX_OFFSET ((4LL << C_S_BITS) - 1) & ~3 + /* Called from RISCV_REORG, this is defined in riscv-sr.c. */ extern void riscv_remove_unneeded_save_restore_calls (void); diff --git a/gcc/config/riscv/riscv.opt b/gcc/config/riscv/riscv.opt index 29de246759e..e4bfcb86f51 100644 --- a/gcc/config/riscv/riscv.opt +++ b/gcc/config/riscv/riscv.opt @@ -87,6 +87,12 @@ msave-restore Target Report Mask(SAVE_RESTORE) Use smaller but slower prologue and epilogue code. +mshorten-memrefs +Target Bool Var(riscv_mshorten_memrefs) Init(1) +Convert BASE + LARGE_OFFSET addresses to NEW_BASE + SMALL_OFFSET to allow more +memory accesses to be generated as compressed instructions. Currently targets +32-bit integer load/stores. + mcmodel= Target Report RejectNegative Joined Enum(code_model) Var(riscv_cmodel) Init(TARGET_DEFAULT_CMODEL) Specify the code model. diff --git a/gcc/config/riscv/t-riscv b/gcc/config/riscv/t-riscv index 5ecb3c160a6..4820fb35d31 100644 --- a/gcc/config/riscv/t-riscv +++ b/gcc/config/riscv/t-riscv @@ -19,3 +19,8 @@ riscv-d.o: $(srcdir)/config/riscv/riscv-d.c $(COMPILE) $< $(POSTCOMPILE) +riscv-shorten-memrefs.o: $(srcdir)/config/riscv/riscv-shorten-memrefs.c + $(COMPILE) $< + $(POSTCOMPILE) + +PASSES_EXTRA += $(srcdir)/config/riscv/riscv-passes.def diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 35e8242af5f..850aeac033d 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -1129,6 +1129,7 @@ See RS/6000 and PowerPC Options. -mpreferred-stack-boundary=@var{num} @gol -msmall-data-limit=@var{N-bytes} @gol -msave-restore -mno-save-restore @gol +-mshorten-memrefs -mno-shorten-memrefs @gol -mstrict-align -mno-strict-align @gol -mcmodel=medlow -mcmodel=medany @gol -mexplicit-relocs -mno-explicit-relocs @gol @@ -25329,6 +25330,15 @@ Do or don't use smaller but slower prologue and epilogue code that uses library function calls. The default is to use fast inline prologues and epilogues. +@item -mshorten-memrefs +@itemx -mno-shorten-memrefs +@opindex mshorten-memrefs +Do or do not attempt to make more use of compressed load/store instructions by +replacing a load/store of 'base register + large offset' with a new load/store +of 'new base + small offset'. If the new base gets stored in a compressed +register, then the new load/store can be compressed. Currently targets 32-bit +integer load/stores only. + @item -mstrict-align @itemx -mno-strict-align @opindex mstrict-align diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi index 710f674860a..b8d64d136e0 100644 --- a/gcc/doc/tm.texi +++ b/gcc/doc/tm.texi @@ -6967,6 +6967,13 @@ candidate as a replacement for the if-convertible sequence described in @code{if_info}. @end deftypefn +@deftypefn {Target Hook} bool TARGET_NEW_ADDRESS_PROFITABLE_P (rtx @var{memref}, rtx_insn * @var{insn}, rtx @var{new_addr}) +Return @code{true} if it is profitable to replace the address in +@var{memref} with @var{new_addr}. This allows targets to prevent the +scheduler from undoing address optimizations. The instruction containing the +memref is @var{insn}. The default implementation returns @code{true}. +@end deftypefn + @deftypefn {Target Hook} bool TARGET_NO_SPECULATION_IN_DELAY_SLOTS_P (void) This predicate controls the use of the eager delay slot filler to disallow speculatively executed instructions being placed in delay slots. Targets diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in index e31cdb251b5..3be984bbd5c 100644 --- a/gcc/doc/tm.texi.in +++ b/gcc/doc/tm.texi.in @@ -4697,6 +4697,8 @@ Define this macro if a non-short-circuit operation produced by @hook TARGET_NOCE_CONVERSION_PROFITABLE_P +@hook TARGET_NEW_ADDRESS_PROFITABLE_P + @hook TARGET_NO_SPECULATION_IN_DELAY_SLOTS_P @hook TARGET_ESTIMATED_POLY_VALUE diff --git a/gcc/sched-deps.c b/gcc/sched-deps.c index 331af5ffdb3..1bc75074e5d 100644 --- a/gcc/sched-deps.c +++ b/gcc/sched-deps.c @@ -4694,6 +4694,9 @@ attempt_change (struct mem_inc_info *mii, rtx new_addr) rtx mem = *mii->mem_loc; rtx new_mem; + if (!targetm.new_address_profitable_p (mem, mii->mem_insn, new_addr)) + return NULL_RTX; + /* Jump through a lot of hoops to keep the attributes up to date. We do not want to call one of the change address variants that take an offset even though we know the offset in many cases. These diff --git a/gcc/target.def b/gcc/target.def index f8d26e63021..f36aebb535c 100644 --- a/gcc/target.def +++ b/gcc/target.def @@ -3847,6 +3847,17 @@ candidate as a replacement for the if-convertible sequence described in\n\ bool, (rtx_insn *seq, struct noce_if_info *if_info), default_noce_conversion_profitable_p) +/* Return true if new_addr should be preferred over the existing address used by + memref in insn. */ +DEFHOOK +(new_address_profitable_p, + "Return @code{true} if it is profitable to replace the address in\n\ +@var{memref} with @var{new_addr}. This allows targets to prevent the\n\ +scheduler from undoing address optimizations. The instruction containing the\n\ +memref is @var{insn}. The default implementation returns @code{true}.", +bool, (rtx memref, rtx_insn * insn, rtx new_addr), +default_new_address_profitable_p) + DEFHOOK (estimated_poly_value, "Return an estimate of the runtime value of @var{val}, for use in\n\ diff --git a/gcc/targhooks.c b/gcc/targhooks.c index 4caab8cfbfa..e0982328d8e 100644 --- a/gcc/targhooks.c +++ b/gcc/targhooks.c @@ -1566,6 +1566,19 @@ default_mode_dependent_address_p (const_rtx addr ATTRIBUTE_UNUSED, return false; } +extern bool default_new_address_profitable_p (rtx, rtx); + + +/* The default implementation of TARGET_NEW_ADDRESS_PROFITABLE_P. */ + +bool +default_new_address_profitable_p (rtx memref ATTRIBUTE_UNUSED, + rtx_insn *insn ATTRIBUTE_UNUSED, + rtx new_addr ATTRIBUTE_UNUSED) +{ + return true; +} + bool default_target_option_valid_attribute_p (tree ARG_UNUSED (fndecl), tree ARG_UNUSED (name), diff --git a/gcc/targhooks.h b/gcc/targhooks.h index 9704d23f1db..9bab77bc638 100644 --- a/gcc/targhooks.h +++ b/gcc/targhooks.h @@ -186,6 +186,7 @@ extern tree default_emutls_var_init (tree, tree, tree); extern unsigned int default_hard_regno_nregs (unsigned int, machine_mode); extern bool default_hard_regno_scratch_ok (unsigned int); extern bool default_mode_dependent_address_p (const_rtx, addr_space_t); +extern bool default_new_address_profitable_p (rtx, rtx_insn *, rtx); extern bool default_target_option_valid_attribute_p (tree, tree, tree, int); extern bool default_target_option_pragma_parse (tree, tree); extern bool default_target_can_inline_p (tree, tree); diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index b84acda5a67..9ad48bad031 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,13 @@ +2020-05-12 Craig Blackmore + + * gcc.target/riscv/shorten-memrefs-1.c: New test. + * gcc.target/riscv/shorten-memrefs-2.c: New test. + * gcc.target/riscv/shorten-memrefs-3.c: New test. + * gcc.target/riscv/shorten-memrefs-4.c: New test. + * gcc.target/riscv/shorten-memrefs-5.c: New test. + * gcc.target/riscv/shorten-memrefs-6.c: New test. + * gcc.target/riscv/shorten-memrefs-7.c: New test. + 2020-05-12 Nathan Sidwell PR preprocessor/95013 diff --git a/gcc/testsuite/gcc.target/riscv/shorten-memrefs-1.c b/gcc/testsuite/gcc.target/riscv/shorten-memrefs-1.c new file mode 100644 index 00000000000..958942a6f7f --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/shorten-memrefs-1.c @@ -0,0 +1,26 @@ +/* { dg-options "-Os -march=rv32imc -mabi=ilp32" } */ + +/* These stores cannot be compressed because x0 is not a compressed reg. + Therefore the shorten_memrefs pass should not attempt to rewrite them into a + compressible format. */ + +void +store1z (int *array) +{ + array[200] = 0; + array[201] = 0; + array[202] = 0; + array[203] = 0; +} + +void +store2z (long long *array) +{ + array[200] = 0; + array[201] = 0; + array[202] = 0; + array[203] = 0; +} + +/* { dg-final { scan-assembler-not "store1z:\n\taddi" } } */ +/* { dg-final { scan-assembler-not "store2z:\n\taddi" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/shorten-memrefs-2.c b/gcc/testsuite/gcc.target/riscv/shorten-memrefs-2.c new file mode 100644 index 00000000000..2c2f41548c6 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/shorten-memrefs-2.c @@ -0,0 +1,51 @@ +/* { dg-options "-Os -march=rv32imc -mabi=ilp32" } */ + +/* shorten_memrefs should rewrite these load/stores into a compressible + format. */ + +void +store1a (int *array, int a) +{ + array[200] = a; + array[201] = a; + array[202] = a; + array[203] = a; +} + +void +store2a (long long *array, long long a) +{ + array[200] = a; + array[201] = a; + array[202] = a; + array[203] = a; +} + +int +load1r (int *array) +{ + int a = 0; + a += array[200]; + a += array[201]; + a += array[202]; + a += array[203]; + return a; +} + +long long +load2r (long long *array) +{ + int a = 0; + a += array[200]; + a += array[201]; + a += array[202]; + a += array[203]; + return a; +} + +/* { dg-final { scan-assembler "store1a:\n\taddi" } } */ +/* The sd insns in store2a are not rewritten because shorten_memrefs currently + only optimizes lw and sw. +/* { dg-final { scan-assembler "store2a:\n\taddi" { xfail riscv*-*-* } } } */ +/* { dg-final { scan-assembler "load1r:\n\taddi" } } */ +/* { dg-final { scan-assembler "load2r:\n\taddi" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/shorten-memrefs-3.c b/gcc/testsuite/gcc.target/riscv/shorten-memrefs-3.c new file mode 100644 index 00000000000..2001fe871ee --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/shorten-memrefs-3.c @@ -0,0 +1,39 @@ +/* { dg-options "-Os -march=rv32imc -mabi=ilp32" } */ + +/* These loads cannot be compressed because only one compressed reg is + available (since args are passed in a0-a4, that leaves a5-a7 available, of + which only a5 is a compressed reg). Therefore the shorten_memrefs pass should + not attempt to rewrite these loads into a compressible format. It may not + be possible to avoid this because shorten_memrefs happens before reg alloc. +*/ + +extern int sub1 (int, int, int, int, int, int, int); + +int +load1a (int a0, int a1, int a2, int a3, int a4, int *array) +{ + int a = 0; + a += array[200]; + a += array[201]; + a += array[202]; + a += array[203]; + return sub1 (a0, a1, a2, a3, a4, 0, a); +} + +extern long long sub2 (long long, long long, long long, long long, long long, + long long, long long); + +long long +load2a (long long a0, long long a1, long long a2, long long a3, long long a4, + long long *array) +{ + int a = 0; + a += array[200]; + a += array[201]; + a += array[202]; + a += array[203]; + return sub2 (a0, a1, a2, a3, a4, 0, a); +} + +/* { dg-final { scan-assembler-not "load1a:\n\taddi" { xfail riscv*-*-* } } } */ +/* { dg-final { scan-assembler-not "load2a:\n.*addi\[ \t\]*\[at\]\[0-9\],\[at\]\[0-9\],\[0-9\]*" { xfail riscv*-*-* } } } */ diff --git a/gcc/testsuite/gcc.target/riscv/shorten-memrefs-4.c b/gcc/testsuite/gcc.target/riscv/shorten-memrefs-4.c new file mode 100644 index 00000000000..cd4784913e4 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/shorten-memrefs-4.c @@ -0,0 +1,26 @@ +/* { dg-options "-Os -march=rv64imc -mabi=lp64" } */ + +/* These stores cannot be compressed because x0 is not a compressed reg. + Therefore the shorten_memrefs pass should not attempt to rewrite them into a + compressible format. */ + +void +store1z (int *array) +{ + array[200] = 0; + array[201] = 0; + array[202] = 0; + array[203] = 0; +} + +void +store2z (long long *array) +{ + array[200] = 0; + array[201] = 0; + array[202] = 0; + array[203] = 0; +} + +/* { dg-final { scan-assembler-not "store1z:\n\taddi" } } */ +/* { dg-final { scan-assembler-not "store2z:\n\taddi" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/shorten-memrefs-5.c b/gcc/testsuite/gcc.target/riscv/shorten-memrefs-5.c new file mode 100644 index 00000000000..80b3897e4da --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/shorten-memrefs-5.c @@ -0,0 +1,53 @@ +/* { dg-options "-Os -march=rv64imc -mabi=lp64" } */ + +/* shorten_memrefs should rewrite these load/stores into a compressible + format. */ + +void +store1a (int *array, int a) +{ + array[200] = a; + array[201] = a; + array[202] = a; + array[203] = a; +} + +void +store2a (long long *array, long long a) +{ + array[200] = a; + array[201] = a; + array[202] = a; + array[203] = a; +} + +int +load1r (int *array) +{ + int a = 0; + a += array[200]; + a += array[201]; + a += array[202]; + a += array[203]; + return a; +} + +long long +load2r (long long *array) +{ + int a = 0; + a += array[200]; + a += array[201]; + a += array[202]; + a += array[203]; + return a; +} + +/* { dg-final { scan-assembler "store1a:\n\taddi" } } */ +/* The sd insns in store2a are not rewritten because shorten_memrefs currently + only optimizes lw and sw. +/* { dg-final { scan-assembler "store2a:\n\taddi" { xfail riscv*-*-* } } } */ +/* { dg-final { scan-assembler "load1r:\n\taddi" } } */ +/* The ld insns in load2r are not rewritten because shorten_memrefs currently + only optimizes lw and sw. +/* { dg-final { scan-assembler "load2r:\n\taddi" { xfail riscv*-*-* } } } */ diff --git a/gcc/testsuite/gcc.target/riscv/shorten-memrefs-6.c b/gcc/testsuite/gcc.target/riscv/shorten-memrefs-6.c new file mode 100644 index 00000000000..3403c7044df --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/shorten-memrefs-6.c @@ -0,0 +1,39 @@ +/* { dg-options "-Os -march=rv64imc -mabi=lp64" } */ + +/* These loads cannot be compressed because only one compressed reg is + available (since args are passed in a0-a4, that leaves a5-a7 available, of + which only a5 is a compressed reg). Therefore the shorten_memrefs pass should + not attempt to rewrite these loads into a compressible format. It may not + be possible to avoid this because shorten_memrefs happens before reg alloc. +*/ + +extern int sub1 (int, int, int, int, int, int, int); + +int +load1a (int a0, int a1, int a2, int a3, int a4, int *array) +{ + int a = 0; + a += array[200]; + a += array[201]; + a += array[202]; + a += array[203]; + return sub1 (a0, a1, a2, a3, a4, 0, a); +} + +extern long long sub2 (long long, long long, long long, long long, long long, + long long, long long); + +long long +load2a (long long a0, long long a1, long long a2, long long a3, long long a4, + long long *array) +{ + int a = 0; + a += array[200]; + a += array[201]; + a += array[202]; + a += array[203]; + return sub2 (a0, a1, a2, a3, a4, 0, a); +} + +/* { dg-final { scan-assembler-not "load1a:\n\taddi" { xfail riscv*-*-* } } } */ +/* { dg-final { scan-assembler-not "load2a:\n.*addi\[ \t\]*\[at\]\[0-9\],\[at\]\[0-9\],\[0-9\]*" } } */ diff --git a/gcc/testsuite/gcc.target/riscv/shorten-memrefs-7.c b/gcc/testsuite/gcc.target/riscv/shorten-memrefs-7.c new file mode 100644 index 00000000000..a5833fd356d --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/shorten-memrefs-7.c @@ -0,0 +1,46 @@ +/* { dg-options "-Os -march=rv32imc -mabi=ilp32 -mno-shorten-memrefs" } */ + +/* Check that these load/stores do not get rewritten into a compressible format + when shorten_memrefs is disabled. */ + +void +store1a (int *array, int a) +{ + array[200] = a; + array[201] = a; + array[202] = a; + array[203] = a; +} + +void +store2a (long long *array, long long a) +{ + array[200] = a; + array[201] = a; + array[202] = a; + array[203] = a; +} + +int +load1r (int *array) +{ + int a = 0; + a += array[200]; + a += array[201]; + a += array[202]; + a += array[203]; + return a; +} + +long long +load2r (long long *array) +{ + int a = 0; + a += array[200]; + a += array[201]; + a += array[202]; + a += array[203]; + return a; +} + +/* { dg-final { scan-assembler-not "addi" } } */