From: Hans-Peter Nilsson Date: Sun, 12 Jul 2020 16:41:25 +0000 (+0200) Subject: cris: Add new pass eliminating compares after delay-slot-filling X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=9a2ae08b02d185a11e3e525e100ba637ce81c7ff;p=gcc.git cris: Add new pass eliminating compares after delay-slot-filling Delayed-branch-slot-filling a.k.a. reorg or dbr, often causes opportunities for more compare-elimination than were visible for the cmpelim pass. With cc0, these were caught by the elimination pass run in "final", thus the missed opportunities is a regression. A simple reorg-aware pass run just after reorg handles most of them, if not all. I chose to keep the "mach2" pass identifier string I copy-pasted from the SPARC port instead of inventing one like "postdbr_cmpelim". Note the gap in numbers in the test-case file names. gcc: PR target/93372 * config/cris/cris-passes.def: New file. * config/cris/t-cris (PASSES_EXTRA): Add cris-passes.def. * config/cris/cris.c: Add infrastructure bits and pass execute function cris_postdbr_cmpelim. * config/cris/cris-protos.h (make_pass_cris_postdbr_cmpelim): Declare. gcc/testsuite: * gcc.target/cris/pr93372-44.c, gcc.target/cris/pr93372-46.c: New. --- diff --git a/gcc/config/cris/cris-passes.def b/gcc/config/cris/cris-passes.def new file mode 100644 index 00000000000..db3c74d4978 --- /dev/null +++ b/gcc/config/cris/cris-passes.def @@ -0,0 +1,20 @@ +/* Description of target passes for Visium. + Copyright (C) 2020 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +INSERT_PASS_AFTER (pass_delay_slots, 1, pass_cris_postdbr_cmpelim); diff --git a/gcc/config/cris/cris-protos.h b/gcc/config/cris/cris-protos.h index 2db1ea1b8bc..053bba90df4 100644 --- a/gcc/config/cris/cris-protos.h +++ b/gcc/config/cris/cris-protos.h @@ -60,3 +60,5 @@ extern int cris_fatal (char *); extern int cris_initial_elimination_offset (int, int); extern void cris_init_expanders (void); + +extern rtl_opt_pass *make_pass_cris_postdbr_cmpelim (gcc::context *); diff --git a/gcc/config/cris/cris.c b/gcc/config/cris/cris.c index b26b9f2e883..59cbceef7a4 100644 --- a/gcc/config/cris/cris.c +++ b/gcc/config/cris/cris.c @@ -51,6 +51,8 @@ along with GCC; see the file COPYING3. If not see #include "output.h" #include "tm-constrs.h" #include "builtins.h" +#include "cfgrtl.h" +#include "tree-pass.h" /* This file should be included last. */ #include "target-def.h" @@ -129,6 +131,8 @@ static void cris_asm_output_mi_thunk static void cris_file_start (void); static void cris_init_libfuncs (void); +static unsigned int cris_postdbr_cmpelim (void); + static reg_class_t cris_preferred_reload_class (rtx, reg_class_t); static int cris_register_move_cost (machine_mode, reg_class_t, reg_class_t); @@ -298,6 +302,204 @@ int cris_cpu_version = CRIS_DEFAULT_CPU_VERSION; struct gcc_target targetm = TARGET_INITIALIZER; +namespace { + +const pass_data pass_data_cris_postdbr_cmpelim = +{ + RTL_PASS, /* type */ + "mach2", /* name */ + OPTGROUP_NONE, /* optinfo_flags */ + TV_MACH_DEP, /* tv_id */ + 0, /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + 0, /* todo_flags_finish */ +}; + +class pass_cris_postdbr_cmpelim : public rtl_opt_pass +{ +public: + pass_cris_postdbr_cmpelim (gcc::context *ctxt) + : rtl_opt_pass (pass_data_cris_postdbr_cmpelim, ctxt) + {} + + /* opt_pass methods: */ + virtual unsigned int execute (function *) + { + return cris_postdbr_cmpelim (); + } + + /* No use running this if reorg and cmpelim aren't both run. */ + virtual bool gate (function *) + { + return + optimize > 0 + && flag_delayed_branch + && flag_compare_elim_after_reload; + } +}; + +} // anon namespace + +rtl_opt_pass * +make_pass_cris_postdbr_cmpelim (gcc::context *ctxt) +{ + return new pass_cris_postdbr_cmpelim (ctxt); +} + +/* "Cheap version" of cmpelim, making use of the opportunities opened up + by reorg. + + Go through the insns of a function and look at each actual compare + insn; considering only those that compare a register to 0. If the + previous CC-affecting insn sets the compared register or if a move + reads from it, try to change that into a CC-setting move and try to + have it recognized. Bail at labels or non-matching insns that + clobber the compared register. If successful, delete the compare. + + Also, reorg isn't up to date regarding data-flow handling, so we + can't go beyond classic RTL scanning. */ + +static unsigned int +cris_postdbr_cmpelim () +{ + rtx_insn *insn; + rtx_insn *next; + rtx_insn *prev_cc_setter = 0; + rtx_insn *prev_cc_outer = 0; + rtx dccr = gen_rtx_REG (CCmode, CRIS_CC0_REGNUM); + + /* Now look for compares in the insn stream. */ + for (insn = get_insns (); insn; insn = next) + { + rtx_insn *outer_insn = insn; + rtx pat = PATTERN (insn); + + next = NEXT_INSN (outer_insn); + + /* Forget previous state when we see a label; we can't track or + merge its state. */ + if (LABEL_P (insn)) + { + prev_cc_setter = 0; + continue; + } + + if (!NONDEBUG_INSN_P (insn)) + continue; + + /* Consider filled delay slots; there might be a comparison there. + It's only the second insn in a sequence that is interesting. */ + if (GET_CODE (pat) == SEQUENCE) + insn = as_a XVECEXP (pat, 0, 1); + /* The "else" eliminates temptations to consider an insn in a + delay slot for elimination; it can only be a prev_cc_setter. */ + else if (prev_cc_setter != 0 && GET_CODE (pat) == SET) + { + rtx dest = SET_DEST (pat); + rtx src = SET_SRC (pat); + rtx prev_set; + + if (REG_P (dest) + && REGNO (dest) == CRIS_CC0_REGNUM + && GET_CODE (src) == COMPARE + && REG_P (XEXP (src, 0)) + && XEXP (src, 1) == const0_rtx + && (prev_set = single_set (prev_cc_setter)) != 0) + { + /* We have a candidate, and a prev_cc_setter to inspect. */ + rtx reg = XEXP (src, 0); + rtx prev_dest = SET_DEST (prev_set); + rtx prev_src = SET_SRC (prev_set); + bool src_same = rtx_equal_p (prev_src, reg); + + /* If the prev_cc_setter isn't a simple SET, or if the + compared register is modified in prev_cc_setter without + being the destination, or if it's modified between + prev_cc_setter (equal to or contained in prev_cc_outer) + and this insn, then we can't use the flags result. And + of course, the SET_DEST of prev_cc_setter (the main + interest, not dccr) has to be the same register and + mode we're interested in - or the SET_SRC. We've + already checked that the compared register isn't + changed in-between. */ + if (REG_P (prev_dest) + && ! reg_set_p (reg, prev_src) + && ! reg_set_between_p (reg, prev_cc_outer, outer_insn) + && (src_same || rtx_equal_p (prev_dest, reg))) + { + machine_mode ccmode = GET_MODE (src); + rtx modeadjusted_dccr + = (ccmode == CCmode ? dccr + : gen_rtx_REG (CCmode, CRIS_CC0_REGNUM)); + rtx compare + /* We don't need to copy_rtx pat: we're going to + delete that insn. */ + = (src_same ? pat + : gen_rtx_SET (modeadjusted_dccr, + gen_rtx_COMPARE (ccmode, + copy_rtx (prev_src), + const0_rtx))); + + /* Replace tentatively, the prev_set combo that is + ((set d s) (clobber dccr)) with + ((cmp s 0) (set d s)) where (cmp s 0) is the + compare we're looking at, and validate it or fail + the whole thing. First replace the ((set d s) ...) + with ((cmp s 0) ...)). */ + validate_change (prev_cc_setter, + &XVECEXP (PATTERN (prev_cc_setter), + 0, 0), compare, true); + + /* Then the clobber with the (set d s). */ + validate_change (prev_cc_setter, + &XVECEXP (PATTERN (prev_cc_setter), + 0, 1), prev_set, true); + + if (apply_change_group ()) + { + delete_insn (insn); + + /* We eliminated the compare. Then we must go to + the next insn: we can't consider the eliminated + insn for the next prev_cc_setter. + + FIXME: if later insns still match, we could do + the delete_insn part only, for them. But, it + seems rare that reorg would manage to move a + second CC-clobber to another delay-slot, + leaving two identical compares (and presumably + users). */ + prev_cc_setter = 0; + continue; + } + } + } + } + + if (reg_set_p (dccr, insn)) + { + rtx pat = PATTERN (insn); + + prev_cc_setter = 0; + + /* Make sure we can use it later on, otherwise forget it. + Don't look too close, we're going to pass a lot of these. + Just make sure the structure is that we can work with. */ + if (GET_CODE (pat) == PARALLEL + && XVECLEN (pat, 0) == 2 + && GET_CODE (XVECEXP (pat, 0, 1)) == CLOBBER) + { + prev_cc_setter = insn; + prev_cc_outer = outer_insn; + } + } + } + + return 0; +} + /* Helper for cris_load_multiple_op and cris_ret_movem_op. */ bool diff --git a/gcc/config/cris/t-cris b/gcc/config/cris/t-cris index 97d6efa534b..eb4411ede56 100644 --- a/gcc/config/cris/t-cris +++ b/gcc/config/cris/t-cris @@ -24,3 +24,5 @@ # The makefile macros etc. are included in the order found in the # section "Target Fragment" in the gcc info-files (or the paper copy) of # "Using and Porting GCC" + +PASSES_EXTRA += $(srcdir)/config/cris/cris-passes.def diff --git a/gcc/testsuite/gcc.target/cris/pr93372-44.c b/gcc/testsuite/gcc.target/cris/pr93372-44.c new file mode 100644 index 00000000000..5bd35d12087 --- /dev/null +++ b/gcc/testsuite/gcc.target/cris/pr93372-44.c @@ -0,0 +1,13 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ +/* { dg-final { scan-assembler-not {\tcmp|\ttest} } } */ + +extern void foo(void); +unsigned int x (unsigned int a, unsigned int b, unsigned int *c) +{ + unsigned int y = a & 15; + unsigned int z = y + b; + if (y == 0) + *c = z; + return z; +} diff --git a/gcc/testsuite/gcc.target/cris/pr93372-46.c b/gcc/testsuite/gcc.target/cris/pr93372-46.c new file mode 100644 index 00000000000..5d7c0301318 --- /dev/null +++ b/gcc/testsuite/gcc.target/cris/pr93372-46.c @@ -0,0 +1,16 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=v10" } */ +/* { dg-final { scan-assembler-not {\tnop} } } */ +/* { dg-final { scan-assembler-times {\tcmp|\ttest|\tmove.d \$r10,\$r} 1 } } */ + +/* We either have a move from "a" to some other register or a compare. */ + +extern void foo(void); +unsigned int x (unsigned int a, unsigned int b, unsigned int *c, unsigned int *d) +{ + unsigned int z = __builtin_clz(b); + if (a != 0) + *c = a; + *d = a; + return z; +}