From: Mark Mitchell Date: Wed, 3 Nov 1999 22:45:45 +0000 (+0000) Subject: gcse.c (struct null_pointer_info): New type. X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=0511851c7bebdaee1226960fe405c9089a120a23;p=gcc.git gcse.c (struct null_pointer_info): New type. * gcse.c (struct null_pointer_info): New type. (get_bitmap_width): New function. (current_block): Remove. (nonnull_local): Likewise. (nonnull_killed): Likewise. (invalidate_nonnull_info): Take a null_pointer_info as input. (delete_null_pointer_checks_1): New function. (delete_null_pointer_checks): Use it. From-SVN: r30384 --- diff --git a/gcc/ChangeLog b/gcc/ChangeLog index bdf63379f12..b090806ac41 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,5 +1,14 @@ Wed Nov 3 14:51:59 1999 Mark P. Mitchell + * gcse.c (struct null_pointer_info): New type. + (get_bitmap_width): New function. + (current_block): Remove. + (nonnull_local): Likewise. + (nonnull_killed): Likewise. + (invalidate_nonnull_info): Take a null_pointer_info as input. + (delete_null_pointer_checks_1): New function. + (delete_null_pointer_checks): Use it. + * haifa-sched.c (find_rgns): Replace uses of alloca with xmalloc. (split_edges): Likewise. (schedule_block): Likewise. diff --git a/gcc/gcse.c b/gcc/gcse.c index e2d7cf7df70..067cfbfdd91 100644 --- a/gcc/gcse.c +++ b/gcc/gcse.c @@ -510,6 +510,18 @@ static sbitmap *rd_kill, *rd_gen, *reaching_defs, *rd_out; /* for available exprs */ static sbitmap *ae_kill, *ae_gen, *ae_in, *ae_out; +/* Objects of this type are passed around by the null-pointer check + removal routines. */ +struct null_pointer_info { + /* The basic block being processed. */ + int current_block; + /* The first register to be handled in this pass. */ + int min_reg; + /* One greater than the last register to be handled in this pass. */ + int max_reg; + sbitmap *nonnull_local; + sbitmap *nonnull_killed; +}; static void compute_can_copy PROTO ((void)); @@ -520,6 +532,7 @@ static void alloc_gcse_mem PROTO ((rtx)); static void free_gcse_mem PROTO ((void)); static void alloc_reg_set_mem PROTO ((int)); static void free_reg_set_mem PROTO ((void)); +static int get_bitmap_width PROTO ((int, int, int)); static void record_one_set PROTO ((int, rtx)); static void record_set_info PROTO ((rtx, rtx, void *)); static void compute_sets PROTO ((rtx)); @@ -622,6 +635,9 @@ static int handle_avail_expr PROTO ((rtx, struct expr *)); static int classic_gcse PROTO ((void)); static int one_classic_gcse_pass PROTO ((int)); static void invalidate_nonnull_info PROTO ((rtx, rtx, void *)); +static void delete_null_pointer_checks_1 PROTO ((int_list_ptr *, int *, + sbitmap *, sbitmap *, + struct null_pointer_info *)); static rtx process_insert_insn PROTO ((struct expr *)); static int pre_edge_insert PROTO ((struct edge_list *, struct expr **)); static int expr_reaches_here_p_work PROTO ((struct occr *, struct expr *, int, int, char *)); @@ -948,6 +964,50 @@ free_gcse_mem () free (mem_set_in_block); } +/* Many of the global optimization algorithms work by solving dataflow + equations for various expressions. Initially, some local value is + computed for each expression in each block. Then, the values + across the various blocks are combined (by following flow graph + edges) to arrive at global values. Conceptually, each set of + equations is independent. We may therefore solve all the equations + in parallel, solve them one at a time, or pick any intermediate + approach. + + When you're going to need N two-dimensional bitmaps, each X (say, + the number of blocks) by Y (say, the number of expressions), call + this function. It's not important what X and Y represent; only + that Y correspond to the things that can be done in parallel. This + function will return an appropriate chunking factor C; you should + solve C sets of equations in parallel. By going through this + function, we can easily trade space against time; by solving fewer + equations in parallel we use less space. */ + +static int +get_bitmap_width (n, x, y) + int n; + int x; + int y; +{ + /* It's not really worth figuring out *exactly* how much memory will + be used by a particular choice. The important thing is to get + something approximately right. */ + size_t max_bitmap_memory = 10 * 1024 * 1024; + + /* The number of bytes we'd use for a single column of minimum + width. */ + size_t column_size = n * x * sizeof (SBITMAP_ELT_TYPE); + + /* Often, it's reasonable just to solve all the equations in + parallel. */ + if (column_size * SBITMAP_SET_SIZE (y) <= max_bitmap_memory) + return y; + + /* Otherwise, pick the largest width we can, without going over the + limit. */ + return SBITMAP_ELT_BITS * ((max_bitmap_memory + column_size - 1) + / column_size); +} + /* Compute the local properties of each recorded expression. Local properties are those that are defined by the block, irrespective @@ -4923,23 +4983,19 @@ compute_transpout () /* Removal of useless null pointer checks */ -/* These need to be file static for communication between - invalidate_nonnull_info and delete_null_pointer_checks. */ -static int current_block; -static sbitmap *nonnull_local; -static sbitmap *nonnull_killed; - /* Called via note_stores. X is set by SETTER. If X is a register we must - invalidate nonnull_local and set nonnull_killed. + invalidate nonnull_local and set nonnull_killed. DATA is really a + `null_pointer_info *'. We ignore hard registers. */ static void invalidate_nonnull_info (x, setter, data) rtx x; rtx setter ATTRIBUTE_UNUSED; - void *data ATTRIBUTE_UNUSED; + void *data; { int offset, regno; + struct null_pointer_info* npi = (struct null_pointer_info *) data; offset = 0; while (GET_CODE (x) == SUBREG) @@ -4947,89 +5003,34 @@ invalidate_nonnull_info (x, setter, data) /* Ignore anything that is not a register or is a hard register. */ if (GET_CODE (x) != REG - || REGNO (x) < FIRST_PSEUDO_REGISTER) + || REGNO (x) < npi->min_reg + || REGNO (x) >= npi->max_reg) return; - regno = REGNO (x); + regno = REGNO (x) - npi->min_reg; - RESET_BIT (nonnull_local[current_block], regno); - SET_BIT (nonnull_killed[current_block], regno); - + RESET_BIT (npi->nonnull_local[npi->current_block], regno); + SET_BIT (npi->nonnull_killed[npi->current_block], regno); } -/* Find EQ/NE comparisons against zero which can be (indirectly) evaluated - at compile time. - - This is conceptually similar to global constant/copy propagation and - classic global CSE (it even uses the same dataflow equations as cprop). - - If a register is used as memory address with the form (mem (reg)), then we - know that REG can not be zero at that point in the program. Any instruction - which sets REG "kills" this property. - - So, if every path leading to a conditional branch has an available memory - reference of that form, then we know the register can not have the value - zero at the conditional branch. - - So we merely need to compute the local properies and propagate that data - around the cfg, then optimize where possible. +/* Do null-pointer check elimination for the registers indicated in + NPI. NONNULL_AVIN and NONNULL_AVOUT are pre-allocated sbitmaps; + they are not our responsibility to free. */ - We run this pass two times. Once before CSE, then again after CSE. This - has proven to be the most profitable approach. It is rare for new - optimization opportunities of this nature to appear after the first CSE - pass. - - This could probably be integrated with global cprop with a little work. */ - -void -delete_null_pointer_checks (f) - rtx f; +static void +delete_null_pointer_checks_1 (s_preds, block_reg, nonnull_avin, + nonnull_avout, npi) + int_list_ptr *s_preds; + int *block_reg; + sbitmap *nonnull_avin; + sbitmap *nonnull_avout; + struct null_pointer_info *npi; { - int_list_ptr *s_preds, *s_succs; - int *num_preds, *num_succs; int changed, bb; - sbitmap *nonnull_avin, *nonnull_avout; + int current_block; + sbitmap *nonnull_local = npi->nonnull_local; + sbitmap *nonnull_killed = npi->nonnull_killed; - /* First break the program into basic blocks. */ - find_basic_blocks (f, max_reg_num (), NULL, 1); - - /* If we have only a single block, then there's nothing to do. */ - if (n_basic_blocks <= 1) - { - /* Free storage allocated by find_basic_blocks. */ - free_basic_block_vars (0); - return; - } - - /* Trying to perform global optimizations on flow graphs which have - a high connectivity will take a long time and is unlikely to be - particularly useful. - - In normal circumstances a cfg should have about twice has many edges - as blocks. But we do not want to punish small functions which have - a couple switch statements. So we require a relatively large number - of basic blocks and the ratio of edges to blocks to be high. */ - if (n_basic_blocks > 1000 && n_edges / n_basic_blocks >= 20) - { - /* Free storage allocated by find_basic_blocks. */ - free_basic_block_vars (0); - return; - } - - /* We need predecessor/successor lists as well as pred/succ counts for - each basic block. */ - s_preds = (int_list_ptr *) gmalloc (n_basic_blocks * sizeof (int_list_ptr)); - s_succs = (int_list_ptr *) gmalloc (n_basic_blocks * sizeof (int_list_ptr)); - num_preds = (int *) gmalloc (n_basic_blocks * sizeof (int)); - num_succs = (int *) gmalloc (n_basic_blocks * sizeof (int)); - compute_preds_succs (s_preds, s_succs, num_preds, num_succs); - - /* Allocate bitmaps to hold local and global properties. */ - nonnull_local = sbitmap_vector_alloc (n_basic_blocks, max_reg_num ()); - nonnull_killed = sbitmap_vector_alloc (n_basic_blocks, max_reg_num ()); - nonnull_avin = sbitmap_vector_alloc (n_basic_blocks, max_reg_num ()); - nonnull_avout = sbitmap_vector_alloc (n_basic_blocks, max_reg_num ()); - /* Compute local properties, nonnull and killed. A register will have the nonnull property if at the end of the current block its value is known to be nonnull. The killed property indicates that somewhere in @@ -5044,6 +5045,9 @@ delete_null_pointer_checks (f) { rtx insn, stop_insn; + /* Set the current block for invalidate_nonnull_info. */ + npi->current_block = current_block; + /* Scan each insn in the basic block looking for memory references and register sets. */ stop_insn = NEXT_INSN (BLOCK_END (current_block)); @@ -5052,6 +5056,7 @@ delete_null_pointer_checks (f) insn = NEXT_INSN (insn)) { rtx set; + rtx reg; /* Ignore anything that is not a normal insn. */ if (GET_RTX_CLASS (GET_CODE (insn)) != 'i') @@ -5063,7 +5068,7 @@ delete_null_pointer_checks (f) set = single_set (insn); if (!set) { - note_stores (PATTERN (insn), invalidate_nonnull_info, NULL); + note_stores (PATTERN (insn), invalidate_nonnull_info, npi); continue; } @@ -5071,22 +5076,24 @@ delete_null_pointer_checks (f) in case it uses its address register as a dest (which kills the nonnull property). */ if (GET_CODE (SET_SRC (set)) == MEM - && GET_CODE (XEXP (SET_SRC (set), 0)) == REG - && REGNO (XEXP (SET_SRC (set), 0)) >= FIRST_PSEUDO_REGISTER) + && GET_CODE ((reg = XEXP (SET_SRC (set), 0))) == REG + && REGNO (reg) >= npi->min_reg + && REGNO (reg) < npi->max_reg) SET_BIT (nonnull_local[current_block], - REGNO (XEXP (SET_SRC (set), 0))); + REGNO (reg) - npi->min_reg); /* Now invalidate stuff clobbered by this insn. */ - note_stores (PATTERN (insn), invalidate_nonnull_info, NULL); + note_stores (PATTERN (insn), invalidate_nonnull_info, npi); /* And handle stores, we do these last since any sets in INSN can not kill the nonnull property if it is derived from a MEM appearing in a SET_DEST. */ if (GET_CODE (SET_DEST (set)) == MEM - && GET_CODE (XEXP (SET_DEST (set), 0)) == REG - && REGNO (XEXP (SET_DEST (set), 0)) >= FIRST_PSEUDO_REGISTER) + && GET_CODE ((reg = XEXP (SET_DEST (set), 0))) == REG + && REGNO (reg) >= npi->min_reg + && REGNO (reg) < npi->max_reg) SET_BIT (nonnull_local[current_block], - REGNO (XEXP (SET_DEST (set), 0))); + REGNO (reg) - npi->min_reg); } } @@ -5117,33 +5124,21 @@ delete_null_pointer_checks (f) for (bb = 0; bb < n_basic_blocks; bb++) { rtx last_insn = BLOCK_END (bb); - rtx condition, earliest, reg; + rtx condition, earliest; int compare_and_branch; - /* We only want conditional branches. */ - if (GET_CODE (last_insn) != JUMP_INSN - || !condjump_p (last_insn) - || simplejump_p (last_insn)) + /* Since MIN_REG is always at least FIRST_PSEUDO_REGISTER, and + since BLOCK_REG[BB] is zero if this block did not end with a + comparison against zero, this condition works. */ + if (block_reg[bb] < npi->min_reg + || block_reg[bb] >= npi->max_reg) continue; /* LAST_INSN is a conditional jump. Get its condition. */ condition = get_condition (last_insn, &earliest); - /* If we were unable to get the condition, or it is not a equality - comparison against zero then there's nothing we can do. */ - if (!condition - || (GET_CODE (condition) != NE && GET_CODE (condition) != EQ) - || GET_CODE (XEXP (condition, 1)) != CONST_INT - || XEXP (condition, 1) != CONST0_RTX (GET_MODE (XEXP (condition, 0)))) - continue; - - /* We must be checking a register against zero. */ - reg = XEXP (condition, 0); - if (GET_CODE (reg) != REG) - continue; - /* Is the register known to have a nonzero value? */ - if (!TEST_BIT (nonnull_avout[bb], REGNO (reg))) + if (!TEST_BIT (nonnull_avout[bb], block_reg[bb] - npi->min_reg)) continue; /* Try to compute whether the compare/branch at the loop end is one or @@ -5170,6 +5165,139 @@ delete_null_pointer_checks (f) delete_insn (last_insn); if (compare_and_branch == 2) delete_insn (earliest); + + /* Don't check this block again. (Note that BLOCK_END is + invalid here; we deleted the last instruction in the + block.) */ + block_reg[bb] = 0; + } +} + +/* Find EQ/NE comparisons against zero which can be (indirectly) evaluated + at compile time. + + This is conceptually similar to global constant/copy propagation and + classic global CSE (it even uses the same dataflow equations as cprop). + + If a register is used as memory address with the form (mem (reg)), then we + know that REG can not be zero at that point in the program. Any instruction + which sets REG "kills" this property. + + So, if every path leading to a conditional branch has an available memory + reference of that form, then we know the register can not have the value + zero at the conditional branch. + + So we merely need to compute the local properies and propagate that data + around the cfg, then optimize where possible. + + We run this pass two times. Once before CSE, then again after CSE. This + has proven to be the most profitable approach. It is rare for new + optimization opportunities of this nature to appear after the first CSE + pass. + + This could probably be integrated with global cprop with a little work. */ + +void +delete_null_pointer_checks (f) + rtx f; +{ + int_list_ptr *s_preds, *s_succs; + int *num_preds, *num_succs; + sbitmap *nonnull_avin, *nonnull_avout; + int *block_reg; + int bb; + int reg; + int regs_per_pass; + int max_reg; + struct null_pointer_info npi; + + /* First break the program into basic blocks. */ + find_basic_blocks (f, max_reg_num (), NULL, 1); + + /* If we have only a single block, then there's nothing to do. */ + if (n_basic_blocks <= 1) + { + /* Free storage allocated by find_basic_blocks. */ + free_basic_block_vars (0); + return; + } + + /* Trying to perform global optimizations on flow graphs which have + a high connectivity will take a long time and is unlikely to be + particularly useful. + + In normal circumstances a cfg should have about twice has many edges + as blocks. But we do not want to punish small functions which have + a couple switch statements. So we require a relatively large number + of basic blocks and the ratio of edges to blocks to be high. */ + if (n_basic_blocks > 1000 && n_edges / n_basic_blocks >= 20) + { + /* Free storage allocated by find_basic_blocks. */ + free_basic_block_vars (0); + return; + } + + /* We need predecessor/successor lists as well as pred/succ counts for + each basic block. */ + s_preds = (int_list_ptr *) gmalloc (n_basic_blocks * sizeof (int_list_ptr)); + s_succs = (int_list_ptr *) gmalloc (n_basic_blocks * sizeof (int_list_ptr)); + num_preds = (int *) gmalloc (n_basic_blocks * sizeof (int)); + num_succs = (int *) gmalloc (n_basic_blocks * sizeof (int)); + compute_preds_succs (s_preds, s_succs, num_preds, num_succs); + + /* We need four bitmaps, each with a bit for each register in each + basic block. */ + max_reg = max_reg_num (); + regs_per_pass = get_bitmap_width (4, n_basic_blocks, max_reg); + + /* Allocate bitmaps to hold local and global properties. */ + npi.nonnull_local = sbitmap_vector_alloc (n_basic_blocks, regs_per_pass); + npi.nonnull_killed = sbitmap_vector_alloc (n_basic_blocks, regs_per_pass); + nonnull_avin = sbitmap_vector_alloc (n_basic_blocks, regs_per_pass); + nonnull_avout = sbitmap_vector_alloc (n_basic_blocks, regs_per_pass); + + /* Go through the basic blocks, seeing whether or not each block + ends with a conditional branch whose condition is a comparison + against zero. Record the register compared in BLOCK_REG. */ + block_reg = (int *) xcalloc (n_basic_blocks, sizeof (int)); + for (bb = 0; bb < n_basic_blocks; bb++) + { + rtx last_insn = BLOCK_END (bb); + rtx condition, earliest, reg; + + /* We only want conditional branches. */ + if (GET_CODE (last_insn) != JUMP_INSN + || !condjump_p (last_insn) + || simplejump_p (last_insn)) + continue; + + /* LAST_INSN is a conditional jump. Get its condition. */ + condition = get_condition (last_insn, &earliest); + + /* If we were unable to get the condition, or it is not a equality + comparison against zero then there's nothing we can do. */ + if (!condition + || (GET_CODE (condition) != NE && GET_CODE (condition) != EQ) + || GET_CODE (XEXP (condition, 1)) != CONST_INT + || (XEXP (condition, 1) + != CONST0_RTX (GET_MODE (XEXP (condition, 0))))) + continue; + + /* We must be checking a register against zero. */ + reg = XEXP (condition, 0); + if (GET_CODE (reg) != REG) + continue; + + block_reg[bb] = REGNO (reg); + } + + /* Go through the algorithm for each block of registers. */ + for (reg = FIRST_PSEUDO_REGISTER; reg < max_reg; reg += regs_per_pass) + { + npi.min_reg = reg; + npi.max_reg = MIN (reg + regs_per_pass, max_reg); + delete_null_pointer_checks_1 (s_preds, block_reg, nonnull_avin, + nonnull_avout, &npi); } /* Free storage allocated by find_basic_blocks. */ @@ -5181,9 +5309,12 @@ delete_null_pointer_checks (f) free (num_preds); free (num_succs); + /* Free the table of registers compared at the end of every block. */ + free (block_reg); + /* Free bitmaps. */ - free (nonnull_local); - free (nonnull_killed); + free (npi.nonnull_local); + free (npi.nonnull_killed); free (nonnull_avin); free (nonnull_avout); }