X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=gcc%2Fpostreload-gcse.c;h=713180f5a86a72ba3f1613031465376b51ac25b7;hb=f2e9fe5f97d88fc876c44e6ffa57a2e85150adf9;hp=f8a770e25f2872d60c58cafb2622f49a3edc67f1;hpb=ff56974436c9c8f994c9c80040ca026bde0965ee;p=gcc.git diff --git a/gcc/postreload-gcse.c b/gcc/postreload-gcse.c index f8a770e25f2..713180f5a86 100644 --- a/gcc/postreload-gcse.c +++ b/gcc/postreload-gcse.c @@ -1,5 +1,5 @@ /* Post reload partially redundant load elimination - Copyright (C) 2004-2015 Free Software Foundation, Inc. + Copyright (C) 2004-2020 Free Software Foundation, Inc. This file is part of GCC. @@ -26,6 +26,7 @@ along with GCC; see the file COPYING3. If not see #include "tree.h" #include "predict.h" #include "df.h" +#include "memmodel.h" #include "tm_p.h" #include "insn-config.h" #include "emit-rtl.h" @@ -34,10 +35,13 @@ along with GCC; see the file COPYING3. If not see #include "cfgrtl.h" #include "profile.h" #include "expr.h" -#include "params.h" #include "tree-pass.h" #include "dbgcnt.h" +#include "intl.h" #include "gcse-common.h" +#include "gcse.h" +#include "regs.h" +#include "function-abi.h" /* The following code implements gcse after reload, the purpose of this pass is to cleanup redundant loads generated by reload and other @@ -363,7 +367,7 @@ insert_expr_in_table (rtx x, rtx_insn *insn) int do_not_record_p; hashval_t hash; struct expr *cur_expr, **slot; - struct occr *avail_occr, *last_occr = NULL; + struct occr *avail_occr; hash = hash_expr (x, &do_not_record_p); @@ -404,38 +408,22 @@ insert_expr_in_table (rtx x, rtx_insn *insn) cur_expr = *slot; } - /* Search for another occurrence in the same basic block. */ + /* Search for another occurrence in the same basic block. We insert + insns blockwise from start to end, so keep appending to the + start of the list so we have to check only a single element. */ avail_occr = cur_expr->avail_occr; - while (avail_occr - && BLOCK_FOR_INSN (avail_occr->insn) != BLOCK_FOR_INSN (insn)) - { - /* If an occurrence isn't found, save a pointer to the end of - the list. */ - last_occr = avail_occr; - avail_occr = avail_occr->next; - } - - if (avail_occr) - /* Found another instance of the expression in the same basic block. - Prefer this occurrence to the currently recorded one. We want - the last one in the block and the block is scanned from start - to end. */ + if (avail_occr + && BLOCK_FOR_INSN (avail_occr->insn) == BLOCK_FOR_INSN (insn)) avail_occr->insn = insn; else { /* First occurrence of this expression in this basic block. */ avail_occr = (struct occr *) obstack_alloc (&occr_obstack, sizeof (struct occr)); - - /* First occurrence of this expression in any block? */ - if (cur_expr->avail_occr == NULL) - cur_expr->avail_occr = avail_occr; - else - last_occr->next = avail_occr; - avail_occr->insn = insn; - avail_occr->next = NULL; + avail_occr->next = cur_expr->avail_occr; avail_occr->deleted_p = 0; + cur_expr->avail_occr = avail_occr; } } @@ -503,7 +491,7 @@ dump_hash_table (FILE *file) (long) expr_table->size (), (long) expr_table->elements (), expr_table->collisions ()); - if (expr_table->elements () > 0) + if (!expr_table->is_empty ()) { fprintf (file, "\n\ntable entries:\n"); expr_table->traverse (file); @@ -671,7 +659,7 @@ load_killed_in_block_p (int uid_limit, rtx x, bool after_insn) It will set mems_conflict_p to nonzero if there may be a conflict between X and SETTER. */ mems_conflict_p = 0; - note_stores (PATTERN (setter), find_mem_conflicts, x); + note_stores (setter, find_mem_conflicts, x); if (mems_conflict_p) return 1; @@ -773,7 +761,7 @@ record_opr_changes (rtx_insn *insn) rtx note; /* Find all stores and record them. */ - note_stores (PATTERN (insn), record_last_set_info, insn); + note_stores (insn, record_last_set_info, insn); /* Also record autoincremented REGs for this insn as changed. */ for (note = REG_NOTES (insn); note; note = XEXP (note, 1)) @@ -784,22 +772,14 @@ record_opr_changes (rtx_insn *insn) if (CALL_P (insn)) { unsigned int regno; - rtx link, x; hard_reg_set_iterator hrsi; - EXECUTE_IF_SET_IN_HARD_REG_SET (regs_invalidated_by_call, 0, regno, hrsi) + /* We don't track modes of hard registers, so we need to be + conservative and assume that partial kills are full kills. */ + HARD_REG_SET callee_clobbers + = insn_callee_abi (insn).full_and_partial_reg_clobbers (); + EXECUTE_IF_SET_IN_HARD_REG_SET (callee_clobbers, 0, regno, hrsi) record_last_reg_set_info_regno (insn, regno); - for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1)) - if (GET_CODE (XEXP (link, 0)) == CLOBBER) - { - x = XEXP (XEXP (link, 0), 0); - if (REG_P (x)) - { - gcc_assert (HARD_REGISTER_P (x)); - record_last_reg_set_info (insn, x); - } - } - if (! RTL_CONST_OR_PURE_CALL_P (insn)) record_last_mem_set_info (insn); } @@ -962,7 +942,9 @@ bb_has_well_behaved_predecessors (basic_block bb) FOR_EACH_EDGE (pred, ei, bb->preds) { - if ((pred->flags & EDGE_ABNORMAL) && EDGE_CRITICAL_P (pred)) + /* commit_one_edge_insertion refuses to insert on abnormal edges even if + the source has only one successor so EDGE_CRITICAL_P is too weak. */ + if ((pred->flags & EDGE_ABNORMAL) && !single_pred_p (pred->dest)) return false; if ((pred->flags & EDGE_ABNORMAL_CALL) && cfun->has_nonlocal_label) @@ -989,7 +971,8 @@ get_bb_avail_insn (basic_block bb, struct occr *orig_occr, int bitmap_index) /* If we could not find an occurrence in BB, see if BB has a single predecessor with an occurrence that is transparent through BB. */ - if (single_pred_p (bb) + if (transp + && single_pred_p (bb) && bitmap_bit_p (transp[bb->index], bitmap_index) && (occr = get_bb_avail_insn (single_pred (bb), orig_occr, bitmap_index))) { @@ -1042,14 +1025,16 @@ eliminate_partially_redundant_load (basic_block bb, rtx_insn *insn, struct unoccr *occr, *avail_occrs = NULL; struct unoccr *unoccr, *unavail_occrs = NULL, *rollback_unoccr = NULL; int npred_ok = 0; - gcov_type ok_count = 0; /* Redundant load execution count. */ - gcov_type critical_count = 0; /* Execution count of critical edges. */ + profile_count ok_count = profile_count::zero (); + /* Redundant load execution count. */ + profile_count critical_count = profile_count::zero (); + /* Execution count of critical edges. */ edge_iterator ei; bool critical_edge_split = false; /* The execution count of the loads to be added to make the load fully redundant. */ - gcov_type not_ok_count = 0; + profile_count not_ok_count = profile_count::zero (); basic_block pred_bb; pat = PATTERN (insn); @@ -1103,13 +1088,14 @@ eliminate_partially_redundant_load (basic_block bb, rtx_insn *insn, avail_insn = NULL; } - if (EDGE_CRITICAL_P (pred)) - critical_count += pred->count; + if (EDGE_CRITICAL_P (pred) && pred->count ().initialized_p ()) + critical_count += pred->count (); if (avail_insn != NULL_RTX) { npred_ok++; - ok_count += pred->count; + if (pred->count ().initialized_p ()) + ok_count = ok_count + pred->count (); if (! set_noop_p (PATTERN (gen_move_insn (copy_rtx (dest), copy_rtx (avail_reg))))) { @@ -1133,7 +1119,8 @@ eliminate_partially_redundant_load (basic_block bb, rtx_insn *insn, /* Adding a load on a critical edge will cause a split. */ if (EDGE_CRITICAL_P (pred)) critical_edge_split = true; - not_ok_count += pred->count; + if (pred->count ().initialized_p ()) + not_ok_count = not_ok_count + pred->count (); unoccr = (struct unoccr *) obstack_alloc (&unoccr_obstack, sizeof (struct unoccr)); unoccr->insn = NULL; @@ -1151,15 +1138,28 @@ eliminate_partially_redundant_load (basic_block bb, rtx_insn *insn, || (optimize_bb_for_size_p (bb) && npred_ok > 1) /* If we don't have profile information we cannot tell if splitting a critical edge is profitable or not so don't do it. */ - || ((! profile_info || ! flag_branch_probabilities + || ((!profile_info || profile_status_for_fn (cfun) != PROFILE_READ || targetm.cannot_modify_jumps_p ()) && critical_edge_split)) goto cleanup; /* Check if it's worth applying the partial redundancy elimination. */ - if (ok_count < GCSE_AFTER_RELOAD_PARTIAL_FRACTION * not_ok_count) + if (ok_count.to_gcov_type () + < param_gcse_after_reload_partial_fraction * not_ok_count.to_gcov_type ()) goto cleanup; - if (ok_count < GCSE_AFTER_RELOAD_CRITICAL_FRACTION * critical_count) + + gcov_type threshold; +#if (GCC_VERSION >= 5000) + if (__builtin_mul_overflow (param_gcse_after_reload_critical_fraction, + critical_count.to_gcov_type (), &threshold)) + threshold = profile_count::max_count; +#else + threshold + = (param_gcse_after_reload_critical_fraction + * critical_count.to_gcov_type ()); +#endif + + if (ok_count.to_gcov_type () < threshold) goto cleanup; /* Generate moves to the loaded register from where @@ -1349,6 +1349,10 @@ delete_redundant_insns (void) static void gcse_after_reload_main (rtx f ATTRIBUTE_UNUSED) { + /* Disable computing transparentness if it is too expensive. */ + bool do_transp + = !gcse_or_cprop_is_too_expensive (_("using simple load CSE after register " + "allocation")); memset (&stats, 0, sizeof (stats)); @@ -1364,21 +1368,27 @@ gcse_after_reload_main (rtx f ATTRIBUTE_UNUSED) if (dump_file) dump_hash_table (dump_file); - if (expr_table->elements () > 0) + if (!expr_table->is_empty ()) { /* Knowing which MEMs are transparent through a block can signifiantly increase the number of redundant loads found. So compute transparency information for each memory expression in the hash table. */ df_analyze (); - /* This can not be part of the normal allocation routine because - we have to know the number of elements in the hash table. */ - transp = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), - expr_table->elements ()); - bitmap_vector_ones (transp, last_basic_block_for_fn (cfun)); - expr_table->traverse (dump_file); + if (do_transp) + { + /* This cannot be part of the normal allocation routine because + we have to know the number of elements in the hash table. */ + transp = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), + expr_table->elements ()); + bitmap_vector_ones (transp, last_basic_block_for_fn (cfun)); + expr_table->traverse (dump_file); + } + else + transp = NULL; eliminate_partially_redundant_loads (); delete_redundant_insns (); - sbitmap_vector_free (transp); + if (do_transp) + sbitmap_vector_free (transp); if (dump_file) {