gcc/gcse.c

   1 /* Global common subexpression elimination/Partial redundancy elimination
   2    and global constant/copy propagation for GNU compiler.
   3    Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005,
   4    2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
   5
   6 This file is part of GCC.
   7
   8 GCC is free software; you can redistribute it and/or modify it under
   9 the terms of the GNU General Public License as published by the Free
  10 Software Foundation; either version 3, or (at your option) any later
  11 version.
  12
  13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  16 for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GCC; see the file COPYING3.  If not see
  20 <http://www.gnu.org/licenses/>.  */
  21
  22 /* TODO
  23    - reordering of memory allocation and freeing to be more space efficient
  24    - do rough calc of how many regs are needed in each block, and a rough
  25      calc of how many regs are available in each class and use that to
  26      throttle back the code in cases where RTX_COST is minimal.
  27    - a store to the same address as a load does not kill the load if the
  28      source of the store is also the destination of the load.  Handling this
  29      allows more load motion, particularly out of loops.
  30
  31 */
  32
  33 /* References searched while implementing this.
  34
  35    Compilers Principles, Techniques and Tools
  36    Aho, Sethi, Ullman
  37    Addison-Wesley, 1988
  38
  39    Global Optimization by Suppression of Partial Redundancies
  40    E. Morel, C. Renvoise
  41    communications of the acm, Vol. 22, Num. 2, Feb. 1979
  42
  43    A Portable Machine-Independent Global Optimizer - Design and Measurements
  44    Frederick Chow
  45    Stanford Ph.D. thesis, Dec. 1983
  46
  47    A Fast Algorithm for Code Movement Optimization
  48    D.M. Dhamdhere
  49    SIGPLAN Notices, Vol. 23, Num. 10, Oct. 1988
  50
  51    A Solution to a Problem with Morel and Renvoise's
  52    Global Optimization by Suppression of Partial Redundancies
  53    K-H Drechsler, M.P. Stadel
  54    ACM TOPLAS, Vol. 10, Num. 4, Oct. 1988
  55
  56    Practical Adaptation of the Global Optimization
  57    Algorithm of Morel and Renvoise
  58    D.M. Dhamdhere
  59    ACM TOPLAS, Vol. 13, Num. 2. Apr. 1991
  60
  61    Efficiently Computing Static Single Assignment Form and the Control
  62    Dependence Graph
  63    R. Cytron, J. Ferrante, B.K. Rosen, M.N. Wegman, and F.K. Zadeck
  64    ACM TOPLAS, Vol. 13, Num. 4, Oct. 1991
  65
  66    Lazy Code Motion
  67    J. Knoop, O. Ruthing, B. Steffen
  68    ACM SIGPLAN Notices Vol. 27, Num. 7, Jul. 1992, '92 Conference on PLDI
  69
  70    What's In a Region?  Or Computing Control Dependence Regions in Near-Linear
  71    Time for Reducible Flow Control
  72    Thomas Ball
  73    ACM Letters on Programming Languages and Systems,
  74    Vol. 2, Num. 1-4, Mar-Dec 1993
  75
  76    An Efficient Representation for Sparse Sets
  77    Preston Briggs, Linda Torczon
  78    ACM Letters on Programming Languages and Systems,
  79    Vol. 2, Num. 1-4, Mar-Dec 1993
  80
  81    A Variation of Knoop, Ruthing, and Steffen's Lazy Code Motion
  82    K-H Drechsler, M.P. Stadel
  83    ACM SIGPLAN Notices, Vol. 28, Num. 5, May 1993
  84
  85    Partial Dead Code Elimination
  86    J. Knoop, O. Ruthing, B. Steffen
  87    ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
  88
  89    Effective Partial Redundancy Elimination
  90    P. Briggs, K.D. Cooper
  91    ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
  92
  93    The Program Structure Tree: Computing Control Regions in Linear Time
  94    R. Johnson, D. Pearson, K. Pingali
  95    ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
  96
  97    Optimal Code Motion: Theory and Practice
  98    J. Knoop, O. Ruthing, B. Steffen
  99    ACM TOPLAS, Vol. 16, Num. 4, Jul. 1994
 100
 101    The power of assignment motion
 102    J. Knoop, O. Ruthing, B. Steffen
 103    ACM SIGPLAN Notices Vol. 30, Num. 6, Jun. 1995, '95 Conference on PLDI
 104
 105    Global code motion / global value numbering
 106    C. Click
 107    ACM SIGPLAN Notices Vol. 30, Num. 6, Jun. 1995, '95 Conference on PLDI
 108
 109    Value Driven Redundancy Elimination
 110    L.T. Simpson
 111    Rice University Ph.D. thesis, Apr. 1996
 112
 113    Value Numbering
 114    L.T. Simpson
 115    Massively Scalar Compiler Project, Rice University, Sep. 1996
 116
 117    High Performance Compilers for Parallel Computing
 118    Michael Wolfe
 119    Addison-Wesley, 1996
 120
 121    Advanced Compiler Design and Implementation
 122    Steven Muchnick
 123    Morgan Kaufmann, 1997
 124
 125    Building an Optimizing Compiler
 126    Robert Morgan
 127    Digital Press, 1998
 128
 129    People wishing to speed up the code here should read:
 130      Elimination Algorithms for Data Flow Analysis
 131      B.G. Ryder, M.C. Paull
 132      ACM Computing Surveys, Vol. 18, Num. 3, Sep. 1986
 133
 134      How to Analyze Large Programs Efficiently and Informatively
 135      D.M. Dhamdhere, B.K. Rosen, F.K. Zadeck
 136      ACM SIGPLAN Notices Vol. 27, Num. 7, Jul. 1992, '92 Conference on PLDI
 137
 138    People wishing to do something different can find various possibilities
 139    in the above papers and elsewhere.
 140 */
 141
 142 #include "config.h"
 143 #include "system.h"
 144 #include "coretypes.h"
 145 #include "tm.h"
 146 #include "diagnostic-core.h"
 147 #include "toplev.h"
 148
 149 #include "rtl.h"
 150 #include "tree.h"
 151 #include "tm_p.h"
 152 #include "regs.h"
 153 #include "hard-reg-set.h"
 154 #include "flags.h"
 155 #include "insn-config.h"
 156 #include "recog.h"
 157 #include "basic-block.h"
 158 #include "output.h"
 159 #include "function.h"
 160 #include "expr.h"
 161 #include "except.h"
 162 #include "ggc.h"
 163 #include "params.h"
 164 #include "cselib.h"
 165 #include "intl.h"
 166 #include "obstack.h"
 167 #include "timevar.h"
 168 #include "tree-pass.h"
 169 #include "hashtab.h"
 170 #include "df.h"
 171 #include "dbgcnt.h"
 172 #include "target.h"
 173 #include "gcse.h"
 174
 175 /* We support GCSE via Partial Redundancy Elimination.  PRE optimizations
 176    are a superset of those done by classic GCSE.
 177
 178    We perform the following steps:
 179
 180    1) Compute table of places where registers are set.
 181
 182    2) Perform copy/constant propagation.
 183
 184    3) Perform global cse using lazy code motion if not optimizing
 185       for size, or code hoisting if we are.
 186
 187    4) Perform another pass of copy/constant propagation.  Try to bypass
 188       conditional jumps if the condition can be computed from a value of
 189       an incoming edge.
 190
 191    Two passes of copy/constant propagation are done because the first one
 192    enables more GCSE and the second one helps to clean up the copies that
 193    GCSE creates.  This is needed more for PRE than for Classic because Classic
 194    GCSE will try to use an existing register containing the common
 195    subexpression rather than create a new one.  This is harder to do for PRE
 196    because of the code motion (which Classic GCSE doesn't do).
 197
 198    Expressions we are interested in GCSE-ing are of the form
 199    (set (pseudo-reg) (expression)).
 200    Function want_to_gcse_p says what these are.
 201
 202    In addition, expressions in REG_EQUAL notes are candidates for GCSE-ing.
 203    This allows PRE to hoist expressions that are expressed in multiple insns,
 204    such as complex address calculations (e.g. for PIC code, or loads with a
 205    high part and a low part).
 206
 207    PRE handles moving invariant expressions out of loops (by treating them as
 208    partially redundant).
 209
 210    **********************
 211
 212    We used to support multiple passes but there are diminishing returns in
 213    doing so.  The first pass usually makes 90% of the changes that are doable.
 214    A second pass can make a few more changes made possible by the first pass.
 215    Experiments show any further passes don't make enough changes to justify
 216    the expense.
 217
 218    A study of spec92 using an unlimited number of passes:
 219    [1 pass] = 1208 substitutions, [2] = 577, [3] = 202, [4] = 192, [5] = 83,
 220    [6] = 34, [7] = 17, [8] = 9, [9] = 4, [10] = 4, [11] = 2,
 221    [12] = 2, [13] = 1, [15] = 1, [16] = 2, [41] = 1
 222
 223    It was found doing copy propagation between each pass enables further
 224    substitutions.
 225
 226    This study was done before expressions in REG_EQUAL notes were added as
 227    candidate expressions for optimization, and before the GIMPLE optimizers
 228    were added.  Probably, multiple passes is even less efficient now than
 229    at the time when the study was conducted.
 230
 231    PRE is quite expensive in complicated functions because the DFA can take
 232    a while to converge.  Hence we only perform one pass.
 233
 234    **********************
 235
 236    The steps for PRE are:
 237
 238    1) Build the hash table of expressions we wish to GCSE (expr_hash_table).
 239
 240    2) Perform the data flow analysis for PRE.
 241
 242    3) Delete the redundant instructions
 243
 244    4) Insert the required copies [if any] that make the partially
 245       redundant instructions fully redundant.
 246
 247    5) For other reaching expressions, insert an instruction to copy the value
 248       to a newly created pseudo that will reach the redundant instruction.
 249
 250    The deletion is done first so that when we do insertions we
 251    know which pseudo reg to use.
 252
 253    Various papers have argued that PRE DFA is expensive (O(n^2)) and others
 254    argue it is not.  The number of iterations for the algorithm to converge
 255    is typically 2-4 so I don't view it as that expensive (relatively speaking).
 256
 257    PRE GCSE depends heavily on the second CPROP pass to clean up the copies
 258    we create.  To make an expression reach the place where it's redundant,
 259    the result of the expression is copied to a new register, and the redundant
 260    expression is deleted by replacing it with this new register.  Classic GCSE
 261    doesn't have this problem as much as it computes the reaching defs of
 262    each register in each block and thus can try to use an existing
 263    register.  */
 264 \f
 265 /* GCSE global vars.  */
 266
 267 struct target_gcse default_target_gcse;
 268 #if SWITCHABLE_TARGET
 269 struct target_gcse *this_target_gcse = &default_target_gcse;
 270 #endif
 271
 272 /* Set to non-zero if CSE should run after all GCSE optimizations are done.  */
 273 int flag_rerun_cse_after_global_opts;
 274
 275 /* An obstack for our working variables.  */
 276 static struct obstack gcse_obstack;
 277
 278 struct reg_use {rtx reg_rtx; };
 279
 280 /* Hash table of expressions.  */
 281
 282 struct expr
 283 {
 284   /* The expression (SET_SRC for expressions, PATTERN for assignments).  */
 285   rtx expr;
 286   /* Index in the available expression bitmaps.  */
 287   int bitmap_index;
 288   /* Next entry with the same hash.  */
 289   struct expr *next_same_hash;
 290   /* List of anticipatable occurrences in basic blocks in the function.
 291      An "anticipatable occurrence" is one that is the first occurrence in the
 292      basic block, the operands are not modified in the basic block prior
 293      to the occurrence and the output is not used between the start of
 294      the block and the occurrence.  */
 295   struct occr *antic_occr;
 296   /* List of available occurrence in basic blocks in the function.
 297      An "available occurrence" is one that is the last occurrence in the
 298      basic block and the operands are not modified by following statements in
 299      the basic block [including this insn].  */
 300   struct occr *avail_occr;
 301   /* Non-null if the computation is PRE redundant.
 302      The value is the newly created pseudo-reg to record a copy of the
 303      expression in all the places that reach the redundant copy.  */
 304   rtx reaching_reg;
 305   /* Maximum distance in instructions this expression can travel.
 306      We avoid moving simple expressions for more than a few instructions
 307      to keep register pressure under control.
 308      A value of "0" removes restrictions on how far the expression can
 309      travel.  */
 310   int max_distance;
 311 };
 312
 313 /* Occurrence of an expression.
 314    There is one per basic block.  If a pattern appears more than once the
 315    last appearance is used [or first for anticipatable expressions].  */
 316
 317 struct occr
 318 {
 319   /* Next occurrence of this expression.  */
 320   struct occr *next;
 321   /* The insn that computes the expression.  */
 322   rtx insn;
 323   /* Nonzero if this [anticipatable] occurrence has been deleted.  */
 324   char deleted_p;
 325   /* Nonzero if this [available] occurrence has been copied to
 326      reaching_reg.  */
 327   /* ??? This is mutually exclusive with deleted_p, so they could share
 328      the same byte.  */
 329   char copied_p;
 330 };
 331
 332 /* Expression and copy propagation hash tables.
 333    Each hash table is an array of buckets.
 334    ??? It is known that if it were an array of entries, structure elements
 335    `next_same_hash' and `bitmap_index' wouldn't be necessary.  However, it is
 336    not clear whether in the final analysis a sufficient amount of memory would
 337    be saved as the size of the available expression bitmaps would be larger
 338    [one could build a mapping table without holes afterwards though].
 339    Someday I'll perform the computation and figure it out.  */
 340
 341 struct hash_table_d
 342 {
 343   /* The table itself.
 344      This is an array of `expr_hash_table_size' elements.  */
 345   struct expr **table;
 346
 347   /* Size of the hash table, in elements.  */
 348   unsigned int size;
 349
 350   /* Number of hash table elements.  */
 351   unsigned int n_elems;
 352
 353   /* Whether the table is expression of copy propagation one.  */
 354   int set_p;
 355 };
 356
 357 /* Expression hash table.  */
 358 static struct hash_table_d expr_hash_table;
 359
 360 /* Copy propagation hash table.  */
 361 static struct hash_table_d set_hash_table;
 362
 363 /* This is a list of expressions which are MEMs and will be used by load
 364    or store motion.
 365    Load motion tracks MEMs which aren't killed by
 366    anything except itself. (i.e., loads and stores to a single location).
 367    We can then allow movement of these MEM refs with a little special
 368    allowance. (all stores copy the same value to the reaching reg used
 369    for the loads).  This means all values used to store into memory must have
 370    no side effects so we can re-issue the setter value.
 371    Store Motion uses this structure as an expression table to track stores
 372    which look interesting, and might be moveable towards the exit block.  */
 373
 374 struct ls_expr
 375 {
 376   struct expr * expr;           /* Gcse expression reference for LM.  */
 377   rtx pattern;                  /* Pattern of this mem.  */
 378   rtx pattern_regs;             /* List of registers mentioned by the mem.  */
 379   rtx loads;                    /* INSN list of loads seen.  */
 380   rtx stores;                   /* INSN list of stores seen.  */
 381   struct ls_expr * next;        /* Next in the list.  */
 382   int invalid;                  /* Invalid for some reason.  */
 383   int index;                    /* If it maps to a bitmap index.  */
 384   unsigned int hash_index;      /* Index when in a hash table.  */
 385   rtx reaching_reg;             /* Register to use when re-writing.  */
 386 };
 387
 388 /* Array of implicit set patterns indexed by basic block index.  */
 389 static rtx *implicit_sets;
 390
 391 /* Head of the list of load/store memory refs.  */
 392 static struct ls_expr * pre_ldst_mems = NULL;
 393
 394 /* Hashtable for the load/store memory refs.  */
 395 static htab_t pre_ldst_table = NULL;
 396
 397 /* Bitmap containing one bit for each register in the program.
 398    Used when performing GCSE to track which registers have been set since
 399    the start of the basic block.  */
 400 static regset reg_set_bitmap;
 401
 402 /* Array, indexed by basic block number for a list of insns which modify
 403    memory within that block.  */
 404 static rtx * modify_mem_list;
 405 static bitmap modify_mem_list_set;
 406
 407 /* This array parallels modify_mem_list, but is kept canonicalized.  */
 408 static rtx * canon_modify_mem_list;
 409
 410 /* Bitmap indexed by block numbers to record which blocks contain
 411    function calls.  */
 412 static bitmap blocks_with_calls;
 413
 414 /* Various variables for statistics gathering.  */
 415
 416 /* Memory used in a pass.
 417    This isn't intended to be absolutely precise.  Its intent is only
 418    to keep an eye on memory usage.  */
 419 static int bytes_used;
 420
 421 /* GCSE substitutions made.  */
 422 static int gcse_subst_count;
 423 /* Number of copy instructions created.  */
 424 static int gcse_create_count;
 425 /* Number of local constants propagated.  */
 426 static int local_const_prop_count;
 427 /* Number of local copies propagated.  */
 428 static int local_copy_prop_count;
 429 /* Number of global constants propagated.  */
 430 static int global_const_prop_count;
 431 /* Number of global copies propagated.  */
 432 static int global_copy_prop_count;
 433 \f
 434 /* Doing code hoisting.  */
 435 static bool doing_code_hoisting_p = false;
 436 \f
 437 /* For available exprs */
 438 static sbitmap *ae_kill;
 439 \f
 440 static void compute_can_copy (void);
 441 static void *gmalloc (size_t) ATTRIBUTE_MALLOC;
 442 static void *gcalloc (size_t, size_t) ATTRIBUTE_MALLOC;
 443 static void *gcse_alloc (unsigned long);
 444 static void alloc_gcse_mem (void);
 445 static void free_gcse_mem (void);
 446 static void hash_scan_insn (rtx, struct hash_table_d *);
 447 static void hash_scan_set (rtx, rtx, struct hash_table_d *);
 448 static void hash_scan_clobber (rtx, rtx, struct hash_table_d *);
 449 static void hash_scan_call (rtx, rtx, struct hash_table_d *);
 450 static int want_to_gcse_p (rtx, int *);
 451 static bool gcse_constant_p (const_rtx);
 452 static int oprs_unchanged_p (const_rtx, const_rtx, int);
 453 static int oprs_anticipatable_p (const_rtx, const_rtx);
 454 static int oprs_available_p (const_rtx, const_rtx);
 455 static void insert_expr_in_table (rtx, enum machine_mode, rtx, int, int, int,
 456                                   struct hash_table_d *);
 457 static void insert_set_in_table (rtx, rtx, struct hash_table_d *);
 458 static unsigned int hash_expr (const_rtx, enum machine_mode, int *, int);
 459 static unsigned int hash_set (int, int);
 460 static int expr_equiv_p (const_rtx, const_rtx);
 461 static void record_last_reg_set_info (rtx, int);
 462 static void record_last_mem_set_info (rtx);
 463 static void record_last_set_info (rtx, const_rtx, void *);
 464 static void compute_hash_table (struct hash_table_d *);
 465 static void alloc_hash_table (struct hash_table_d *, int);
 466 static void free_hash_table (struct hash_table_d *);
 467 static void compute_hash_table_work (struct hash_table_d *);
 468 static void dump_hash_table (FILE *, const char *, struct hash_table_d *);
 469 static struct expr *lookup_set (unsigned int, struct hash_table_d *);
 470 static struct expr *next_set (unsigned int, struct expr *);
 471 static void reset_opr_set_tables (void);
 472 static int oprs_not_set_p (const_rtx, const_rtx);
 473 static void mark_call (rtx);
 474 static void mark_set (rtx, rtx);
 475 static void mark_clobber (rtx, rtx);
 476 static void mark_oprs_set (rtx);
 477 static void alloc_cprop_mem (int, int);
 478 static void free_cprop_mem (void);
 479 static void compute_transp (const_rtx, int, sbitmap *, int);
 480 static void compute_local_properties (sbitmap *, sbitmap *, sbitmap *,
 481                                       struct hash_table_d *);
 482 static void compute_cprop_data (void);
 483 static void find_used_regs (rtx *, void *);
 484 static int try_replace_reg (rtx, rtx, rtx);
 485 static struct expr *find_avail_set (int, rtx);
 486 static int cprop_jump (basic_block, rtx, rtx, rtx, rtx);
 487 static void mems_conflict_for_gcse_p (rtx, const_rtx, void *);
 488 static int load_killed_in_block_p (const_basic_block, int, const_rtx, int);
 489 static void canon_list_insert (rtx, const_rtx, void *);
 490 static int cprop_insn (rtx);
 491 static void find_implicit_sets (void);
 492 static int one_cprop_pass (void);
 493 static bool constprop_register (rtx, rtx, rtx);
 494 static struct expr *find_bypass_set (int, int);
 495 static bool reg_killed_on_edge (const_rtx, const_edge);
 496 static int bypass_block (basic_block, rtx, rtx);
 497 static int bypass_conditional_jumps (void);
 498 static void alloc_pre_mem (int, int);
 499 static void free_pre_mem (void);
 500 static void compute_pre_data (void);
 501 static int pre_expr_reaches_here_p (basic_block, struct expr *,
 502                                     basic_block);
 503 static void insert_insn_end_basic_block (struct expr *, basic_block);
 504 static void pre_insert_copy_insn (struct expr *, rtx);
 505 static void pre_insert_copies (void);
 506 static int pre_delete (void);
 507 static int pre_gcse (void);
 508 static int one_pre_gcse_pass (void);
 509 static void add_label_notes (rtx, rtx);
 510 static void alloc_code_hoist_mem (int, int);
 511 static void free_code_hoist_mem (void);
 512 static void compute_code_hoist_vbeinout (void);
 513 static void compute_code_hoist_data (void);
 514 static int hoist_expr_reaches_here_p (basic_block, int, basic_block, char *,
 515                                       int, int *);
 516 static int hoist_code (void);
 517 static int one_code_hoisting_pass (void);
 518 static rtx process_insert_insn (struct expr *);
 519 static int pre_edge_insert (struct edge_list *, struct expr **);
 520 static int pre_expr_reaches_here_p_work (basic_block, struct expr *,
 521                                          basic_block, char *);
 522 static struct ls_expr * ldst_entry (rtx);
 523 static void free_ldst_entry (struct ls_expr *);
 524 static void free_ldst_mems (void);
 525 static void print_ldst_list (FILE *);
 526 static struct ls_expr * find_rtx_in_ldst (rtx);
 527 static inline struct ls_expr * first_ls_expr (void);
 528 static inline struct ls_expr * next_ls_expr (struct ls_expr *);
 529 static int simple_mem (const_rtx);
 530 static void invalidate_any_buried_refs (rtx);
 531 static void compute_ld_motion_mems (void);
 532 static void trim_ld_motion_mems (void);
 533 static void update_ld_motion_stores (struct expr *);
 534 static void free_insn_expr_list_list (rtx *);
 535 static void clear_modify_mem_tables (void);
 536 static void free_modify_mem_tables (void);
 537 static rtx gcse_emit_move_after (rtx, rtx, rtx);
 538 static void local_cprop_find_used_regs (rtx *, void *);
 539 static bool do_local_cprop (rtx, rtx);
 540 static int local_cprop_pass (void);
 541 static bool is_too_expensive (const char *);
 542
 543 #define GNEW(T)                 ((T *) gmalloc (sizeof (T)))
 544 #define GCNEW(T)                ((T *) gcalloc (1, sizeof (T)))
 545
 546 #define GNEWVEC(T, N)           ((T *) gmalloc (sizeof (T) * (N)))
 547 #define GCNEWVEC(T, N)          ((T *) gcalloc ((N), sizeof (T)))
 548
 549 #define GNEWVAR(T, S)           ((T *) gmalloc ((S)))
 550 #define GCNEWVAR(T, S)          ((T *) gcalloc (1, (S)))
 551
 552 #define GOBNEW(T)               ((T *) gcse_alloc (sizeof (T)))
 553 #define GOBNEWVAR(T, S)         ((T *) gcse_alloc ((S)))
 554 \f
 555 /* Misc. utilities.  */
 556
 557 #define can_copy \
 558   (this_target_gcse->x_can_copy)
 559 #define can_copy_init_p \
 560   (this_target_gcse->x_can_copy_init_p)
 561
 562 /* Compute which modes support reg/reg copy operations.  */
 563
 564 static void
 565 compute_can_copy (void)
 566 {
 567   int i;
 568 #ifndef AVOID_CCMODE_COPIES
 569   rtx reg, insn;
 570 #endif
 571   memset (can_copy, 0, NUM_MACHINE_MODES);
 572
 573   start_sequence ();
 574   for (i = 0; i < NUM_MACHINE_MODES; i++)
 575     if (GET_MODE_CLASS (i) == MODE_CC)
 576       {
 577 #ifdef AVOID_CCMODE_COPIES
 578         can_copy[i] = 0;
 579 #else
 580         reg = gen_rtx_REG ((enum machine_mode) i, LAST_VIRTUAL_REGISTER + 1);
 581         insn = emit_insn (gen_rtx_SET (VOIDmode, reg, reg));
 582         if (recog (PATTERN (insn), insn, NULL) >= 0)
 583           can_copy[i] = 1;
 584 #endif
 585       }
 586     else
 587       can_copy[i] = 1;
 588
 589   end_sequence ();
 590 }
 591
 592 /* Returns whether the mode supports reg/reg copy operations.  */
 593
 594 bool
 595 can_copy_p (enum machine_mode mode)
 596 {
 597   if (! can_copy_init_p)
 598     {
 599       compute_can_copy ();
 600       can_copy_init_p = true;
 601     }
 602
 603   return can_copy[mode] != 0;
 604 }
 605
 606 \f
 607 /* Cover function to xmalloc to record bytes allocated.  */
 608
 609 static void *
 610 gmalloc (size_t size)
 611 {
 612   bytes_used += size;
 613   return xmalloc (size);
 614 }
 615
 616 /* Cover function to xcalloc to record bytes allocated.  */
 617
 618 static void *
 619 gcalloc (size_t nelem, size_t elsize)
 620 {
 621   bytes_used += nelem * elsize;
 622   return xcalloc (nelem, elsize);
 623 }
 624
 625 /* Cover function to obstack_alloc.  */
 626
 627 static void *
 628 gcse_alloc (unsigned long size)
 629 {
 630   bytes_used += size;
 631   return obstack_alloc (&gcse_obstack, size);
 632 }
 633
 634 /* Allocate memory for the reg/memory set tracking tables.
 635    This is called at the start of each pass.  */
 636
 637 static void
 638 alloc_gcse_mem (void)
 639 {
 640   /* Allocate vars to track sets of regs.  */
 641   reg_set_bitmap = ALLOC_REG_SET (NULL);
 642
 643   /* Allocate array to keep a list of insns which modify memory in each
 644      basic block.  */
 645   modify_mem_list = GCNEWVEC (rtx, last_basic_block);
 646   canon_modify_mem_list = GCNEWVEC (rtx, last_basic_block);
 647   modify_mem_list_set = BITMAP_ALLOC (NULL);
 648   blocks_with_calls = BITMAP_ALLOC (NULL);
 649 }
 650
 651 /* Free memory allocated by alloc_gcse_mem.  */
 652
 653 static void
 654 free_gcse_mem (void)
 655 {
 656   free_modify_mem_tables ();
 657   BITMAP_FREE (modify_mem_list_set);
 658   BITMAP_FREE (blocks_with_calls);
 659 }
 660 \f
 661 /* Compute the local properties of each recorded expression.
 662
 663    Local properties are those that are defined by the block, irrespective of
 664    other blocks.
 665
 666    An expression is transparent in a block if its operands are not modified
 667    in the block.
 668
 669    An expression is computed (locally available) in a block if it is computed
 670    at least once and expression would contain the same value if the
 671    computation was moved to the end of the block.
 672
 673    An expression is locally anticipatable in a block if it is computed at
 674    least once and expression would contain the same value if the computation
 675    was moved to the beginning of the block.
 676
 677    We call this routine for cprop, pre and code hoisting.  They all compute
 678    basically the same information and thus can easily share this code.
 679
 680    TRANSP, COMP, and ANTLOC are destination sbitmaps for recording local
 681    properties.  If NULL, then it is not necessary to compute or record that
 682    particular property.
 683
 684    TABLE controls which hash table to look at.  If it is  set hash table,
 685    additionally, TRANSP is computed as ~TRANSP, since this is really cprop's
 686    ABSALTERED.  */
 687
 688 static void
 689 compute_local_properties (sbitmap *transp, sbitmap *comp, sbitmap *antloc,
 690                           struct hash_table_d *table)
 691 {
 692   unsigned int i;
 693
 694   /* Initialize any bitmaps that were passed in.  */
 695   if (transp)
 696     {
 697       if (table->set_p)
 698         sbitmap_vector_zero (transp, last_basic_block);
 699       else
 700         sbitmap_vector_ones (transp, last_basic_block);
 701     }
 702
 703   if (comp)
 704     sbitmap_vector_zero (comp, last_basic_block);
 705   if (antloc)
 706     sbitmap_vector_zero (antloc, last_basic_block);
 707
 708   for (i = 0; i < table->size; i++)
 709     {
 710       struct expr *expr;
 711
 712       for (expr = table->table[i]; expr != NULL; expr = expr->next_same_hash)
 713         {
 714           int indx = expr->bitmap_index;
 715           struct occr *occr;
 716
 717           /* The expression is transparent in this block if it is not killed.
 718              We start by assuming all are transparent [none are killed], and
 719              then reset the bits for those that are.  */
 720           if (transp)
 721             compute_transp (expr->expr, indx, transp, table->set_p);
 722
 723           /* The occurrences recorded in antic_occr are exactly those that
 724              we want to set to nonzero in ANTLOC.  */
 725           if (antloc)
 726             for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
 727               {
 728                 SET_BIT (antloc[BLOCK_FOR_INSN (occr->insn)->index], indx);
 729
 730                 /* While we're scanning the table, this is a good place to
 731                    initialize this.  */
 732                 occr->deleted_p = 0;
 733               }
 734
 735           /* The occurrences recorded in avail_occr are exactly those that
 736              we want to set to nonzero in COMP.  */
 737           if (comp)
 738             for (occr = expr->avail_occr; occr != NULL; occr = occr->next)
 739               {
 740                 SET_BIT (comp[BLOCK_FOR_INSN (occr->insn)->index], indx);
 741
 742                 /* While we're scanning the table, this is a good place to
 743                    initialize this.  */
 744                 occr->copied_p = 0;
 745               }
 746
 747           /* While we're scanning the table, this is a good place to
 748              initialize this.  */
 749           expr->reaching_reg = 0;
 750         }
 751     }
 752 }
 753 \f
 754 /* Hash table support.  */
 755
 756 struct reg_avail_info
 757 {
 758   basic_block last_bb;
 759   int first_set;
 760   int last_set;
 761 };
 762
 763 static struct reg_avail_info *reg_avail_info;
 764 static basic_block current_bb;
 765
 766
 767 /* See whether X, the source of a set, is something we want to consider for
 768    GCSE.  */
 769
 770 static int
 771 want_to_gcse_p (rtx x, int *max_distance_ptr)
 772 {
 773 #ifdef STACK_REGS
 774   /* On register stack architectures, don't GCSE constants from the
 775      constant pool, as the benefits are often swamped by the overhead
 776      of shuffling the register stack between basic blocks.  */
 777   if (IS_STACK_MODE (GET_MODE (x)))
 778     x = avoid_constant_pool_reference (x);
 779 #endif
 780
 781   /* GCSE'ing constants:
 782
 783      We do not specifically distinguish between constant and non-constant
 784      expressions in PRE and Hoist.  We use rtx_cost below to limit
 785      the maximum distance simple expressions can travel.
 786
 787      Nevertheless, constants are much easier to GCSE, and, hence,
 788      it is easy to overdo the optimizations.  Usually, excessive PRE and
 789      Hoisting of constant leads to increased register pressure.
 790
 791      RA can deal with this by rematerialing some of the constants.
 792      Therefore, it is important that the back-end generates sets of constants
 793      in a way that allows reload rematerialize them under high register
 794      pressure, i.e., a pseudo register with REG_EQUAL to constant
 795      is set only once.  Failing to do so will result in IRA/reload
 796      spilling such constants under high register pressure instead of
 797      rematerializing them.  */
 798
 799   switch (GET_CODE (x))
 800     {
 801     case REG:
 802     case SUBREG:
 803     case CALL:
 804       return 0;
 805
 806     case CONST_INT:
 807     case CONST_DOUBLE:
 808     case CONST_FIXED:
 809     case CONST_VECTOR:
 810       if (!doing_code_hoisting_p)
 811         /* Do not PRE constants.  */
 812         return 0;
 813
 814       /* FALLTHRU */
 815
 816     default:
 817       if (doing_code_hoisting_p)
 818         /* PRE doesn't implement max_distance restriction.  */
 819         {
 820           int cost;
 821           int max_distance;
 822
 823           gcc_assert (!optimize_function_for_speed_p (cfun)
 824                       && optimize_function_for_size_p (cfun));
 825           cost = rtx_cost (x, SET, 0);
 826
 827           if (cost < COSTS_N_INSNS (GCSE_UNRESTRICTED_COST))
 828             {
 829               max_distance = (GCSE_COST_DISTANCE_RATIO * cost) / 10;
 830               if (max_distance == 0)
 831                 return 0;
 832
 833               gcc_assert (max_distance > 0);
 834             }
 835           else
 836             max_distance = 0;
 837
 838           if (max_distance_ptr)
 839             *max_distance_ptr = max_distance;
 840         }
 841
 842       return can_assign_to_reg_without_clobbers_p (x);
 843     }
 844 }
 845
 846 /* Used internally by can_assign_to_reg_without_clobbers_p.  */
 847
 848 static GTY(()) rtx test_insn;
 849
 850 /* Return true if we can assign X to a pseudo register such that the
 851    resulting insn does not result in clobbering a hard register as a
 852    side-effect.
 853
 854    Additionally, if the target requires it, check that the resulting insn
 855    can be copied.  If it cannot, this means that X is special and probably
 856    has hidden side-effects we don't want to mess with.
 857
 858    This function is typically used by code motion passes, to verify
 859    that it is safe to insert an insn without worrying about clobbering
 860    maybe live hard regs.  */
 861
 862 bool
 863 can_assign_to_reg_without_clobbers_p (rtx x)
 864 {
 865   int num_clobbers = 0;
 866   int icode;
 867
 868   /* If this is a valid operand, we are OK.  If it's VOIDmode, we aren't.  */
 869   if (general_operand (x, GET_MODE (x)))
 870     return 1;
 871   else if (GET_MODE (x) == VOIDmode)
 872     return 0;
 873
 874   /* Otherwise, check if we can make a valid insn from it.  First initialize
 875      our test insn if we haven't already.  */
 876   if (test_insn == 0)
 877     {
 878       test_insn
 879         = make_insn_raw (gen_rtx_SET (VOIDmode,
 880                                       gen_rtx_REG (word_mode,
 881                                                    FIRST_PSEUDO_REGISTER * 2),
 882                                       const0_rtx));
 883       NEXT_INSN (test_insn) = PREV_INSN (test_insn) = 0;
 884     }
 885
 886   /* Now make an insn like the one we would make when GCSE'ing and see if
 887      valid.  */
 888   PUT_MODE (SET_DEST (PATTERN (test_insn)), GET_MODE (x));
 889   SET_SRC (PATTERN (test_insn)) = x;
 890
 891   icode = recog (PATTERN (test_insn), test_insn, &num_clobbers);
 892   if (icode < 0)
 893     return false;
 894
 895   if (num_clobbers > 0 && added_clobbers_hard_reg_p (icode))
 896     return false;
 897
 898   if (targetm.cannot_copy_insn_p && targetm.cannot_copy_insn_p (test_insn))
 899     return false;
 900
 901   return true;
 902 }
 903
 904 /* Return nonzero if the operands of expression X are unchanged from the
 905    start of INSN's basic block up to but not including INSN (if AVAIL_P == 0),
 906    or from INSN to the end of INSN's basic block (if AVAIL_P != 0).  */
 907
 908 static int
 909 oprs_unchanged_p (const_rtx x, const_rtx insn, int avail_p)
 910 {
 911   int i, j;
 912   enum rtx_code code;
 913   const char *fmt;
 914
 915   if (x == 0)
 916     return 1;
 917
 918   code = GET_CODE (x);
 919   switch (code)
 920     {
 921     case REG:
 922       {
 923         struct reg_avail_info *info = &reg_avail_info[REGNO (x)];
 924
 925         if (info->last_bb != current_bb)
 926           return 1;
 927         if (avail_p)
 928           return info->last_set < DF_INSN_LUID (insn);
 929         else
 930           return info->first_set >= DF_INSN_LUID (insn);
 931       }
 932
 933     case MEM:
 934       if (load_killed_in_block_p (current_bb, DF_INSN_LUID (insn),
 935                                   x, avail_p))
 936         return 0;
 937       else
 938         return oprs_unchanged_p (XEXP (x, 0), insn, avail_p);
 939
 940     case PRE_DEC:
 941     case PRE_INC:
 942     case POST_DEC:
 943     case POST_INC:
 944     case PRE_MODIFY:
 945     case POST_MODIFY:
 946       return 0;
 947
 948     case PC:
 949     case CC0: /*FIXME*/
 950     case CONST:
 951     case CONST_INT:
 952     case CONST_DOUBLE:
 953     case CONST_FIXED:
 954     case CONST_VECTOR:
 955     case SYMBOL_REF:
 956     case LABEL_REF:
 957     case ADDR_VEC:
 958     case ADDR_DIFF_VEC:
 959       return 1;
 960
 961     default:
 962       break;
 963     }
 964
 965   for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
 966     {
 967       if (fmt[i] == 'e')
 968         {
 969           /* If we are about to do the last recursive call needed at this
 970              level, change it into iteration.  This function is called enough
 971              to be worth it.  */
 972           if (i == 0)
 973             return oprs_unchanged_p (XEXP (x, i), insn, avail_p);
 974
 975           else if (! oprs_unchanged_p (XEXP (x, i), insn, avail_p))
 976             return 0;
 977         }
 978       else if (fmt[i] == 'E')
 979         for (j = 0; j < XVECLEN (x, i); j++)
 980           if (! oprs_unchanged_p (XVECEXP (x, i, j), insn, avail_p))
 981             return 0;
 982     }
 983
 984   return 1;
 985 }
 986
 987 /* Used for communication between mems_conflict_for_gcse_p and
 988    load_killed_in_block_p.  Nonzero if mems_conflict_for_gcse_p finds a
 989    conflict between two memory references.  */
 990 static int gcse_mems_conflict_p;
 991
 992 /* Used for communication between mems_conflict_for_gcse_p and
 993    load_killed_in_block_p.  A memory reference for a load instruction,
 994    mems_conflict_for_gcse_p will see if a memory store conflicts with
 995    this memory load.  */
 996 static const_rtx gcse_mem_operand;
 997
 998 /* DEST is the output of an instruction.  If it is a memory reference, and
 999    possibly conflicts with the load found in gcse_mem_operand, then set
1000    gcse_mems_conflict_p to a nonzero value.  */
1001
1002 static void
1003 mems_conflict_for_gcse_p (rtx dest, const_rtx setter ATTRIBUTE_UNUSED,
1004                           void *data ATTRIBUTE_UNUSED)
1005 {
1006   while (GET_CODE (dest) == SUBREG
1007          || GET_CODE (dest) == ZERO_EXTRACT
1008          || GET_CODE (dest) == STRICT_LOW_PART)
1009     dest = XEXP (dest, 0);
1010
1011   /* If DEST is not a MEM, then it will not conflict with the load.  Note
1012      that function calls are assumed to clobber memory, but are handled
1013      elsewhere.  */
1014   if (! MEM_P (dest))
1015     return;
1016
1017   /* If we are setting a MEM in our list of specially recognized MEMs,
1018      don't mark as killed this time.  */
1019
1020   if (expr_equiv_p (dest, gcse_mem_operand) && pre_ldst_mems != NULL)
1021     {
1022       if (!find_rtx_in_ldst (dest))
1023         gcse_mems_conflict_p = 1;
1024       return;
1025     }
1026
1027   if (true_dependence (dest, GET_MODE (dest), gcse_mem_operand,
1028                        rtx_addr_varies_p))
1029     gcse_mems_conflict_p = 1;
1030 }
1031
1032 /* Return nonzero if the expression in X (a memory reference) is killed
1033    in block BB before or after the insn with the LUID in UID_LIMIT.
1034    AVAIL_P is nonzero for kills after UID_LIMIT, and zero for kills
1035    before UID_LIMIT.
1036
1037    To check the entire block, set UID_LIMIT to max_uid + 1 and
1038    AVAIL_P to 0.  */
1039
1040 static int
1041 load_killed_in_block_p (const_basic_block bb, int uid_limit, const_rtx x, int avail_p)
1042 {
1043   rtx list_entry = modify_mem_list[bb->index];
1044
1045   /* If this is a readonly then we aren't going to be changing it.  */
1046   if (MEM_READONLY_P (x))
1047     return 0;
1048
1049   while (list_entry)
1050     {
1051       rtx setter;
1052       /* Ignore entries in the list that do not apply.  */
1053       if ((avail_p
1054            && DF_INSN_LUID (XEXP (list_entry, 0)) < uid_limit)
1055           || (! avail_p
1056               && DF_INSN_LUID (XEXP (list_entry, 0)) > uid_limit))
1057         {
1058           list_entry = XEXP (list_entry, 1);
1059           continue;
1060         }
1061
1062       setter = XEXP (list_entry, 0);
1063
1064       /* If SETTER is a call everything is clobbered.  Note that calls
1065          to pure functions are never put on the list, so we need not
1066          worry about them.  */
1067       if (CALL_P (setter))
1068         return 1;
1069
1070       /* SETTER must be an INSN of some kind that sets memory.  Call
1071          note_stores to examine each hunk of memory that is modified.
1072
1073          The note_stores interface is pretty limited, so we have to
1074          communicate via global variables.  Yuk.  */
1075       gcse_mem_operand = x;
1076       gcse_mems_conflict_p = 0;
1077       note_stores (PATTERN (setter), mems_conflict_for_gcse_p, NULL);
1078       if (gcse_mems_conflict_p)
1079         return 1;
1080       list_entry = XEXP (list_entry, 1);
1081     }
1082   return 0;
1083 }
1084
1085 /* Return nonzero if the operands of expression X are unchanged from
1086    the start of INSN's basic block up to but not including INSN.  */
1087
1088 static int
1089 oprs_anticipatable_p (const_rtx x, const_rtx insn)
1090 {
1091   return oprs_unchanged_p (x, insn, 0);
1092 }
1093
1094 /* Return nonzero if the operands of expression X are unchanged from
1095    INSN to the end of INSN's basic block.  */
1096
1097 static int
1098 oprs_available_p (const_rtx x, const_rtx insn)
1099 {
1100   return oprs_unchanged_p (x, insn, 1);
1101 }
1102
1103 /* Hash expression X.
1104
1105    MODE is only used if X is a CONST_INT.  DO_NOT_RECORD_P is a boolean
1106    indicating if a volatile operand is found or if the expression contains
1107    something we don't want to insert in the table.  HASH_TABLE_SIZE is
1108    the current size of the hash table to be probed.  */
1109
1110 static unsigned int
1111 hash_expr (const_rtx x, enum machine_mode mode, int *do_not_record_p,
1112            int hash_table_size)
1113 {
1114   unsigned int hash;
1115
1116   *do_not_record_p = 0;
1117
1118   hash = hash_rtx (x, mode, do_not_record_p,
1119                    NULL,  /*have_reg_qty=*/false);
1120   return hash % hash_table_size;
1121 }
1122
1123 /* Hash a set of register REGNO.
1124
1125    Sets are hashed on the register that is set.  This simplifies the PRE copy
1126    propagation code.
1127
1128    ??? May need to make things more elaborate.  Later, as necessary.  */
1129
1130 static unsigned int
1131 hash_set (int regno, int hash_table_size)
1132 {
1133   unsigned int hash;
1134
1135   hash = regno;
1136   return hash % hash_table_size;
1137 }
1138
1139 /* Return nonzero if exp1 is equivalent to exp2.  */
1140
1141 static int
1142 expr_equiv_p (const_rtx x, const_rtx y)
1143 {
1144   return exp_equiv_p (x, y, 0, true);
1145 }
1146
1147 /* Insert expression X in INSN in the hash TABLE.
1148    If it is already present, record it as the last occurrence in INSN's
1149    basic block.
1150
1151    MODE is the mode of the value X is being stored into.
1152    It is only used if X is a CONST_INT.
1153
1154    ANTIC_P is nonzero if X is an anticipatable expression.
1155    AVAIL_P is nonzero if X is an available expression.
1156
1157    MAX_DISTANCE is the maximum distance in instructions this expression can
1158    be moved.  */
1159
1160 static void
1161 insert_expr_in_table (rtx x, enum machine_mode mode, rtx insn, int antic_p,
1162                       int avail_p, int max_distance, struct hash_table_d *table)
1163 {
1164   int found, do_not_record_p;
1165   unsigned int hash;
1166   struct expr *cur_expr, *last_expr = NULL;
1167   struct occr *antic_occr, *avail_occr;
1168
1169   hash = hash_expr (x, mode, &do_not_record_p, table->size);
1170
1171   /* Do not insert expression in table if it contains volatile operands,
1172      or if hash_expr determines the expression is something we don't want
1173      to or can't handle.  */
1174   if (do_not_record_p)
1175     return;
1176
1177   cur_expr = table->table[hash];
1178   found = 0;
1179
1180   while (cur_expr && 0 == (found = expr_equiv_p (cur_expr->expr, x)))
1181     {
1182       /* If the expression isn't found, save a pointer to the end of
1183          the list.  */
1184       last_expr = cur_expr;
1185       cur_expr = cur_expr->next_same_hash;
1186     }
1187
1188   if (! found)
1189     {
1190       cur_expr = GOBNEW (struct expr);
1191       bytes_used += sizeof (struct expr);
1192       if (table->table[hash] == NULL)
1193         /* This is the first pattern that hashed to this index.  */
1194         table->table[hash] = cur_expr;
1195       else
1196         /* Add EXPR to end of this hash chain.  */
1197         last_expr->next_same_hash = cur_expr;
1198
1199       /* Set the fields of the expr element.  */
1200       cur_expr->expr = x;
1201       cur_expr->bitmap_index = table->n_elems++;
1202       cur_expr->next_same_hash = NULL;
1203       cur_expr->antic_occr = NULL;
1204       cur_expr->avail_occr = NULL;
1205       gcc_assert (max_distance >= 0);
1206       cur_expr->max_distance = max_distance;
1207     }
1208   else
1209     gcc_assert (cur_expr->max_distance == max_distance);
1210
1211   /* Now record the occurrence(s).  */
1212   if (antic_p)
1213     {
1214       antic_occr = cur_expr->antic_occr;
1215
1216       if (antic_occr
1217           && BLOCK_FOR_INSN (antic_occr->insn) != BLOCK_FOR_INSN (insn))
1218         antic_occr = NULL;
1219
1220       if (antic_occr)
1221         /* Found another instance of the expression in the same basic block.
1222            Prefer the currently recorded one.  We want the first one in the
1223            block and the block is scanned from start to end.  */
1224         ; /* nothing to do */
1225       else
1226         {
1227           /* First occurrence of this expression in this basic block.  */
1228           antic_occr = GOBNEW (struct occr);
1229           bytes_used += sizeof (struct occr);
1230           antic_occr->insn = insn;
1231           antic_occr->next = cur_expr->antic_occr;
1232           antic_occr->deleted_p = 0;
1233           cur_expr->antic_occr = antic_occr;
1234         }
1235     }
1236
1237   if (avail_p)
1238     {
1239       avail_occr = cur_expr->avail_occr;
1240
1241       if (avail_occr
1242           && BLOCK_FOR_INSN (avail_occr->insn) == BLOCK_FOR_INSN (insn))
1243         {
1244           /* Found another instance of the expression in the same basic block.
1245              Prefer this occurrence to the currently recorded one.  We want
1246              the last one in the block and the block is scanned from start
1247              to end.  */
1248           avail_occr->insn = insn;
1249         }
1250       else
1251         {
1252           /* First occurrence of this expression in this basic block.  */
1253           avail_occr = GOBNEW (struct occr);
1254           bytes_used += sizeof (struct occr);
1255           avail_occr->insn = insn;
1256           avail_occr->next = cur_expr->avail_occr;
1257           avail_occr->deleted_p = 0;
1258           cur_expr->avail_occr = avail_occr;
1259         }
1260     }
1261 }
1262
1263 /* Insert pattern X in INSN in the hash table.
1264    X is a SET of a reg to either another reg or a constant.
1265    If it is already present, record it as the last occurrence in INSN's
1266    basic block.  */
1267
1268 static void
1269 insert_set_in_table (rtx x, rtx insn, struct hash_table_d *table)
1270 {
1271   int found;
1272   unsigned int hash;
1273   struct expr *cur_expr, *last_expr = NULL;
1274   struct occr *cur_occr;
1275
1276   gcc_assert (GET_CODE (x) == SET && REG_P (SET_DEST (x)));
1277
1278   hash = hash_set (REGNO (SET_DEST (x)), table->size);
1279
1280   cur_expr = table->table[hash];
1281   found = 0;
1282
1283   while (cur_expr && 0 == (found = expr_equiv_p (cur_expr->expr, x)))
1284     {
1285       /* If the expression isn't found, save a pointer to the end of
1286          the list.  */
1287       last_expr = cur_expr;
1288       cur_expr = cur_expr->next_same_hash;
1289     }
1290
1291   if (! found)
1292     {
1293       cur_expr = GOBNEW (struct expr);
1294       bytes_used += sizeof (struct expr);
1295       if (table->table[hash] == NULL)
1296         /* This is the first pattern that hashed to this index.  */
1297         table->table[hash] = cur_expr;
1298       else
1299         /* Add EXPR to end of this hash chain.  */
1300         last_expr->next_same_hash = cur_expr;
1301
1302       /* Set the fields of the expr element.
1303          We must copy X because it can be modified when copy propagation is
1304          performed on its operands.  */
1305       cur_expr->expr = copy_rtx (x);
1306       cur_expr->bitmap_index = table->n_elems++;
1307       cur_expr->next_same_hash = NULL;
1308       cur_expr->antic_occr = NULL;
1309       cur_expr->avail_occr = NULL;
1310       /* Not used for set_p tables.  */
1311       cur_expr->max_distance = 0;
1312     }
1313
1314   /* Now record the occurrence.  */
1315   cur_occr = cur_expr->avail_occr;
1316
1317   if (cur_occr
1318       && BLOCK_FOR_INSN (cur_occr->insn) == BLOCK_FOR_INSN (insn))
1319     {
1320       /* Found another instance of the expression in the same basic block.
1321          Prefer this occurrence to the currently recorded one.  We want
1322          the last one in the block and the block is scanned from start
1323          to end.  */
1324       cur_occr->insn = insn;
1325     }
1326   else
1327     {
1328       /* First occurrence of this expression in this basic block.  */
1329       cur_occr = GOBNEW (struct occr);
1330       bytes_used += sizeof (struct occr);
1331       cur_occr->insn = insn;
1332       cur_occr->next = cur_expr->avail_occr;
1333       cur_occr->deleted_p = 0;
1334       cur_expr->avail_occr = cur_occr;
1335     }
1336 }
1337
1338 /* Determine whether the rtx X should be treated as a constant for
1339    the purposes of GCSE's constant propagation.  */
1340
1341 static bool
1342 gcse_constant_p (const_rtx x)
1343 {
1344   /* Consider a COMPARE of two integers constant.  */
1345   if (GET_CODE (x) == COMPARE
1346       && CONST_INT_P (XEXP (x, 0))
1347       && CONST_INT_P (XEXP (x, 1)))
1348     return true;
1349
1350   /* Consider a COMPARE of the same registers is a constant
1351      if they are not floating point registers.  */
1352   if (GET_CODE(x) == COMPARE
1353       && REG_P (XEXP (x, 0)) && REG_P (XEXP (x, 1))
1354       && REGNO (XEXP (x, 0)) == REGNO (XEXP (x, 1))
1355       && ! FLOAT_MODE_P (GET_MODE (XEXP (x, 0)))
1356       && ! FLOAT_MODE_P (GET_MODE (XEXP (x, 1))))
1357     return true;
1358
1359   /* Since X might be inserted more than once we have to take care that it
1360      is sharable.  */
1361   return CONSTANT_P (x) && (GET_CODE (x) != CONST || shared_const_p (x));
1362 }
1363
1364 /* Scan pattern PAT of INSN and add an entry to the hash TABLE (set or
1365    expression one).  */
1366
1367 static void
1368 hash_scan_set (rtx pat, rtx insn, struct hash_table_d *table)
1369 {
1370   rtx src = SET_SRC (pat);
1371   rtx dest = SET_DEST (pat);
1372   rtx note;
1373
1374   if (GET_CODE (src) == CALL)
1375     hash_scan_call (src, insn, table);
1376
1377   else if (REG_P (dest))
1378     {
1379       unsigned int regno = REGNO (dest);
1380       rtx tmp;
1381       int max_distance = 0;
1382
1383       /* See if a REG_EQUAL note shows this equivalent to a simpler expression.
1384
1385          This allows us to do a single GCSE pass and still eliminate
1386          redundant constants, addresses or other expressions that are
1387          constructed with multiple instructions.
1388
1389          However, keep the original SRC if INSN is a simple reg-reg move.  In
1390          In this case, there will almost always be a REG_EQUAL note on the
1391          insn that sets SRC.  By recording the REG_EQUAL value here as SRC
1392          for INSN, we miss copy propagation opportunities and we perform the
1393          same PRE GCSE operation repeatedly on the same REG_EQUAL value if we
1394          do more than one PRE GCSE pass.
1395
1396          Note that this does not impede profitable constant propagations.  We
1397          "look through" reg-reg sets in lookup_avail_set.  */
1398       note = find_reg_equal_equiv_note (insn);
1399       if (note != 0
1400           && REG_NOTE_KIND (note) == REG_EQUAL
1401           && !REG_P (src)
1402           && (table->set_p
1403               ? gcse_constant_p (XEXP (note, 0))
1404               : want_to_gcse_p (XEXP (note, 0), NULL)))
1405         src = XEXP (note, 0), pat = gen_rtx_SET (VOIDmode, dest, src);
1406
1407       /* Only record sets of pseudo-regs in the hash table.  */
1408       if (! table->set_p
1409           && regno >= FIRST_PSEUDO_REGISTER
1410           /* Don't GCSE something if we can't do a reg/reg copy.  */
1411           && can_copy_p (GET_MODE (dest))
1412           /* GCSE commonly inserts instruction after the insn.  We can't
1413              do that easily for EH edges so disable GCSE on these for now.  */
1414           /* ??? We can now easily create new EH landing pads at the
1415              gimple level, for splitting edges; there's no reason we
1416              can't do the same thing at the rtl level.  */
1417           && !can_throw_internal (insn)
1418           /* Is SET_SRC something we want to gcse?  */
1419           && want_to_gcse_p (src, &max_distance)
1420           /* Don't CSE a nop.  */
1421           && ! set_noop_p (pat)
1422           /* Don't GCSE if it has attached REG_EQUIV note.
1423              At this point this only function parameters should have
1424              REG_EQUIV notes and if the argument slot is used somewhere
1425              explicitly, it means address of parameter has been taken,
1426              so we should not extend the lifetime of the pseudo.  */
1427           && (note == NULL_RTX || ! MEM_P (XEXP (note, 0))))
1428         {
1429           /* An expression is not anticipatable if its operands are
1430              modified before this insn or if this is not the only SET in
1431              this insn.  The latter condition does not have to mean that
1432              SRC itself is not anticipatable, but we just will not be
1433              able to handle code motion of insns with multiple sets.  */
1434           int antic_p = oprs_anticipatable_p (src, insn)
1435                         && !multiple_sets (insn);
1436           /* An expression is not available if its operands are
1437              subsequently modified, including this insn.  It's also not
1438              available if this is a branch, because we can't insert
1439              a set after the branch.  */
1440           int avail_p = (oprs_available_p (src, insn)
1441                          && ! JUMP_P (insn));
1442
1443           insert_expr_in_table (src, GET_MODE (dest), insn, antic_p, avail_p,
1444                                 max_distance, table);
1445         }
1446
1447       /* Record sets for constant/copy propagation.  */
1448       else if (table->set_p
1449                && regno >= FIRST_PSEUDO_REGISTER
1450                && ((REG_P (src)
1451                     && REGNO (src) >= FIRST_PSEUDO_REGISTER
1452                     && can_copy_p (GET_MODE (dest))
1453                     && REGNO (src) != regno)
1454                    || gcse_constant_p (src))
1455                /* A copy is not available if its src or dest is subsequently
1456                   modified.  Here we want to search from INSN+1 on, but
1457                   oprs_available_p searches from INSN on.  */
1458                && (insn == BB_END (BLOCK_FOR_INSN (insn))
1459                    || (tmp = next_nonnote_insn (insn)) == NULL_RTX
1460                    || BLOCK_FOR_INSN (tmp) != BLOCK_FOR_INSN (insn)
1461                    || oprs_available_p (pat, tmp)))
1462         insert_set_in_table (pat, insn, table);
1463     }
1464   /* In case of store we want to consider the memory value as available in
1465      the REG stored in that memory. This makes it possible to remove
1466      redundant loads from due to stores to the same location.  */
1467   else if (flag_gcse_las && REG_P (src) && MEM_P (dest))
1468       {
1469         unsigned int regno = REGNO (src);
1470
1471         /* Do not do this for constant/copy propagation.  */
1472         if (! table->set_p
1473             /* Only record sets of pseudo-regs in the hash table.  */
1474             && regno >= FIRST_PSEUDO_REGISTER
1475            /* Don't GCSE something if we can't do a reg/reg copy.  */
1476            && can_copy_p (GET_MODE (src))
1477            /* GCSE commonly inserts instruction after the insn.  We can't
1478               do that easily for EH edges so disable GCSE on these for now.  */
1479            && !can_throw_internal (insn)
1480            /* Is SET_DEST something we want to gcse?  */
1481            && want_to_gcse_p (dest, NULL)
1482            /* Don't CSE a nop.  */
1483            && ! set_noop_p (pat)
1484            /* Don't GCSE if it has attached REG_EQUIV note.
1485               At this point this only function parameters should have
1486               REG_EQUIV notes and if the argument slot is used somewhere
1487               explicitly, it means address of parameter has been taken,
1488               so we should not extend the lifetime of the pseudo.  */
1489            && ((note = find_reg_note (insn, REG_EQUIV, NULL_RTX)) == 0
1490                || ! MEM_P (XEXP (note, 0))))
1491              {
1492                /* Stores are never anticipatable.  */
1493                int antic_p = 0;
1494                /* An expression is not available if its operands are
1495                   subsequently modified, including this insn.  It's also not
1496                   available if this is a branch, because we can't insert
1497                   a set after the branch.  */
1498                int avail_p = oprs_available_p (dest, insn)
1499                              && ! JUMP_P (insn);
1500
1501                /* Record the memory expression (DEST) in the hash table.  */
1502                insert_expr_in_table (dest, GET_MODE (dest), insn, 0,
1503                                      antic_p, avail_p, table);
1504              }
1505       }
1506 }
1507
1508 static void
1509 hash_scan_clobber (rtx x ATTRIBUTE_UNUSED, rtx insn ATTRIBUTE_UNUSED,
1510                    struct hash_table_d *table ATTRIBUTE_UNUSED)
1511 {
1512   /* Currently nothing to do.  */
1513 }
1514
1515 static void
1516 hash_scan_call (rtx x ATTRIBUTE_UNUSED, rtx insn ATTRIBUTE_UNUSED,
1517                 struct hash_table_d *table ATTRIBUTE_UNUSED)
1518 {
1519   /* Currently nothing to do.  */
1520 }
1521
1522 /* Process INSN and add hash table entries as appropriate.
1523
1524    Only available expressions that set a single pseudo-reg are recorded.
1525
1526    Single sets in a PARALLEL could be handled, but it's an extra complication
1527    that isn't dealt with right now.  The trick is handling the CLOBBERs that
1528    are also in the PARALLEL.  Later.
1529
1530    If SET_P is nonzero, this is for the assignment hash table,
1531    otherwise it is for the expression hash table.  */
1532
1533 static void
1534 hash_scan_insn (rtx insn, struct hash_table_d *table)
1535 {
1536   rtx pat = PATTERN (insn);
1537   int i;
1538
1539   /* Pick out the sets of INSN and for other forms of instructions record
1540      what's been modified.  */
1541
1542   if (GET_CODE (pat) == SET)
1543     hash_scan_set (pat, insn, table);
1544   else if (GET_CODE (pat) == PARALLEL)
1545     for (i = 0; i < XVECLEN (pat, 0); i++)
1546       {
1547         rtx x = XVECEXP (pat, 0, i);
1548
1549         if (GET_CODE (x) == SET)
1550           hash_scan_set (x, insn, table);
1551         else if (GET_CODE (x) == CLOBBER)
1552           hash_scan_clobber (x, insn, table);
1553         else if (GET_CODE (x) == CALL)
1554           hash_scan_call (x, insn, table);
1555       }
1556
1557   else if (GET_CODE (pat) == CLOBBER)
1558     hash_scan_clobber (pat, insn, table);
1559   else if (GET_CODE (pat) == CALL)
1560     hash_scan_call (pat, insn, table);
1561 }
1562
1563 static void
1564 dump_hash_table (FILE *file, const char *name, struct hash_table_d *table)
1565 {
1566   int i;
1567   /* Flattened out table, so it's printed in proper order.  */
1568   struct expr **flat_table;
1569   unsigned int *hash_val;
1570   struct expr *expr;
1571
1572   flat_table = XCNEWVEC (struct expr *, table->n_elems);
1573   hash_val = XNEWVEC (unsigned int, table->n_elems);
1574
1575   for (i = 0; i < (int) table->size; i++)
1576     for (expr = table->table[i]; expr != NULL; expr = expr->next_same_hash)
1577       {
1578         flat_table[expr->bitmap_index] = expr;
1579         hash_val[expr->bitmap_index] = i;
1580       }
1581
1582   fprintf (file, "%s hash table (%d buckets, %d entries)\n",
1583            name, table->size, table->n_elems);
1584
1585   for (i = 0; i < (int) table->n_elems; i++)
1586     if (flat_table[i] != 0)
1587       {
1588         expr = flat_table[i];
1589         fprintf (file, "Index %d (hash value %d; max distance %d)\n  ",
1590                  expr->bitmap_index, hash_val[i], expr->max_distance);
1591         print_rtl (file, expr->expr);
1592         fprintf (file, "\n");
1593       }
1594
1595   fprintf (file, "\n");
1596
1597   free (flat_table);
1598   free (hash_val);
1599 }
1600
1601 /* Record register first/last/block set information for REGNO in INSN.
1602
1603    first_set records the first place in the block where the register
1604    is set and is used to compute "anticipatability".
1605
1606    last_set records the last place in the block where the register
1607    is set and is used to compute "availability".
1608
1609    last_bb records the block for which first_set and last_set are
1610    valid, as a quick test to invalidate them.  */
1611
1612 static void
1613 record_last_reg_set_info (rtx insn, int regno)
1614 {
1615   struct reg_avail_info *info = &reg_avail_info[regno];
1616   int luid = DF_INSN_LUID (insn);
1617
1618   info->last_set = luid;
1619   if (info->last_bb != current_bb)
1620     {
1621       info->last_bb = current_bb;
1622       info->first_set = luid;
1623     }
1624 }
1625
1626
1627 /* Record all of the canonicalized MEMs of record_last_mem_set_info's insn.
1628    Note we store a pair of elements in the list, so they have to be
1629    taken off pairwise.  */
1630
1631 static void
1632 canon_list_insert (rtx dest ATTRIBUTE_UNUSED, const_rtx unused1 ATTRIBUTE_UNUSED,
1633                    void * v_insn)
1634 {
1635   rtx dest_addr, insn;
1636   int bb;
1637
1638   while (GET_CODE (dest) == SUBREG
1639       || GET_CODE (dest) == ZERO_EXTRACT
1640       || GET_CODE (dest) == STRICT_LOW_PART)
1641     dest = XEXP (dest, 0);
1642
1643   /* If DEST is not a MEM, then it will not conflict with a load.  Note
1644      that function calls are assumed to clobber memory, but are handled
1645      elsewhere.  */
1646
1647   if (! MEM_P (dest))
1648     return;
1649
1650   dest_addr = get_addr (XEXP (dest, 0));
1651   dest_addr = canon_rtx (dest_addr);
1652   insn = (rtx) v_insn;
1653   bb = BLOCK_FOR_INSN (insn)->index;
1654
1655   canon_modify_mem_list[bb] =
1656     alloc_EXPR_LIST (VOIDmode, dest_addr, canon_modify_mem_list[bb]);
1657   canon_modify_mem_list[bb] =
1658     alloc_EXPR_LIST (VOIDmode, dest, canon_modify_mem_list[bb]);
1659 }
1660
1661 /* Record memory modification information for INSN.  We do not actually care
1662    about the memory location(s) that are set, or even how they are set (consider
1663    a CALL_INSN).  We merely need to record which insns modify memory.  */
1664
1665 static void
1666 record_last_mem_set_info (rtx insn)
1667 {
1668   int bb = BLOCK_FOR_INSN (insn)->index;
1669
1670   /* load_killed_in_block_p will handle the case of calls clobbering
1671      everything.  */
1672   modify_mem_list[bb] = alloc_INSN_LIST (insn, modify_mem_list[bb]);
1673   bitmap_set_bit (modify_mem_list_set, bb);
1674
1675   if (CALL_P (insn))
1676     {
1677       /* Note that traversals of this loop (other than for free-ing)
1678          will break after encountering a CALL_INSN.  So, there's no
1679          need to insert a pair of items, as canon_list_insert does.  */
1680       canon_modify_mem_list[bb] =
1681         alloc_INSN_LIST (insn, canon_modify_mem_list[bb]);
1682       bitmap_set_bit (blocks_with_calls, bb);
1683     }
1684   else
1685     note_stores (PATTERN (insn), canon_list_insert, (void*) insn);
1686 }
1687
1688 /* Called from compute_hash_table via note_stores to handle one
1689    SET or CLOBBER in an insn.  DATA is really the instruction in which
1690    the SET is taking place.  */
1691
1692 static void
1693 record_last_set_info (rtx dest, const_rtx setter ATTRIBUTE_UNUSED, void *data)
1694 {
1695   rtx last_set_insn = (rtx) data;
1696
1697   if (GET_CODE (dest) == SUBREG)
1698     dest = SUBREG_REG (dest);
1699
1700   if (REG_P (dest))
1701     record_last_reg_set_info (last_set_insn, REGNO (dest));
1702   else if (MEM_P (dest)
1703            /* Ignore pushes, they clobber nothing.  */
1704            && ! push_operand (dest, GET_MODE (dest)))
1705     record_last_mem_set_info (last_set_insn);
1706 }
1707
1708 /* Top level function to create an expression or assignment hash table.
1709
1710    Expression entries are placed in the hash table if
1711    - they are of the form (set (pseudo-reg) src),
1712    - src is something we want to perform GCSE on,
1713    - none of the operands are subsequently modified in the block
1714
1715    Assignment entries are placed in the hash table if
1716    - they are of the form (set (pseudo-reg) src),
1717    - src is something we want to perform const/copy propagation on,
1718    - none of the operands or target are subsequently modified in the block
1719
1720    Currently src must be a pseudo-reg or a const_int.
1721
1722    TABLE is the table computed.  */
1723
1724 static void
1725 compute_hash_table_work (struct hash_table_d *table)
1726 {
1727   int i;
1728
1729   /* re-Cache any INSN_LIST nodes we have allocated.  */
1730   clear_modify_mem_tables ();
1731   /* Some working arrays used to track first and last set in each block.  */
1732   reg_avail_info = GNEWVEC (struct reg_avail_info, max_reg_num ());
1733
1734   for (i = 0; i < max_reg_num (); ++i)
1735     reg_avail_info[i].last_bb = NULL;
1736
1737   FOR_EACH_BB (current_bb)
1738     {
1739       rtx insn;
1740       unsigned int regno;
1741
1742       /* First pass over the instructions records information used to
1743          determine when registers and memory are first and last set.  */
1744       FOR_BB_INSNS (current_bb, insn)
1745         {
1746           if (! INSN_P (insn))
1747             continue;
1748
1749           if (CALL_P (insn))
1750             {
1751               for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
1752                 if (TEST_HARD_REG_BIT (regs_invalidated_by_call, regno))
1753                   record_last_reg_set_info (insn, regno);
1754
1755               mark_call (insn);
1756             }
1757
1758           note_stores (PATTERN (insn), record_last_set_info, insn);
1759         }
1760
1761       /* Insert implicit sets in the hash table.  */
1762       if (table->set_p
1763           && implicit_sets[current_bb->index] != NULL_RTX)
1764         hash_scan_set (implicit_sets[current_bb->index],
1765                        BB_HEAD (current_bb), table);
1766
1767       /* The next pass builds the hash table.  */
1768       FOR_BB_INSNS (current_bb, insn)
1769         if (INSN_P (insn))
1770           hash_scan_insn (insn, table);
1771     }
1772
1773   free (reg_avail_info);
1774   reg_avail_info = NULL;
1775 }
1776
1777 /* Allocate space for the set/expr hash TABLE.
1778    It is used to determine the number of buckets to use.
1779    SET_P determines whether set or expression table will
1780    be created.  */
1781
1782 static void
1783 alloc_hash_table (struct hash_table_d *table, int set_p)
1784 {
1785   int n;
1786
1787   n = get_max_insn_count ();
1788
1789   table->size = n / 4;
1790   if (table->size < 11)
1791     table->size = 11;
1792
1793   /* Attempt to maintain efficient use of hash table.
1794      Making it an odd number is simplest for now.
1795      ??? Later take some measurements.  */
1796   table->size |= 1;
1797   n = table->size * sizeof (struct expr *);
1798   table->table = GNEWVAR (struct expr *, n);
1799   table->set_p = set_p;
1800 }
1801
1802 /* Free things allocated by alloc_hash_table.  */
1803
1804 static void
1805 free_hash_table (struct hash_table_d *table)
1806 {
1807   free (table->table);
1808 }
1809
1810 /* Compute the hash TABLE for doing copy/const propagation or
1811    expression hash table.  */
1812
1813 static void
1814 compute_hash_table (struct hash_table_d *table)
1815 {
1816   /* Initialize count of number of entries in hash table.  */
1817   table->n_elems = 0;
1818   memset (table->table, 0, table->size * sizeof (struct expr *));
1819
1820   compute_hash_table_work (table);
1821 }
1822 \f
1823 /* Expression tracking support.  */
1824
1825 /* Lookup REGNO in the set TABLE.  The result is a pointer to the
1826    table entry, or NULL if not found.  */
1827
1828 static struct expr *
1829 lookup_set (unsigned int regno, struct hash_table_d *table)
1830 {
1831   unsigned int hash = hash_set (regno, table->size);
1832   struct expr *expr;
1833
1834   expr = table->table[hash];
1835
1836   while (expr && REGNO (SET_DEST (expr->expr)) != regno)
1837     expr = expr->next_same_hash;
1838
1839   return expr;
1840 }
1841
1842 /* Return the next entry for REGNO in list EXPR.  */
1843
1844 static struct expr *
1845 next_set (unsigned int regno, struct expr *expr)
1846 {
1847   do
1848     expr = expr->next_same_hash;
1849   while (expr && REGNO (SET_DEST (expr->expr)) != regno);
1850
1851   return expr;
1852 }
1853
1854 /* Like free_INSN_LIST_list or free_EXPR_LIST_list, except that the node
1855    types may be mixed.  */
1856
1857 static void
1858 free_insn_expr_list_list (rtx *listp)
1859 {
1860   rtx list, next;
1861
1862   for (list = *listp; list ; list = next)
1863     {
1864       next = XEXP (list, 1);
1865       if (GET_CODE (list) == EXPR_LIST)
1866         free_EXPR_LIST_node (list);
1867       else
1868         free_INSN_LIST_node (list);
1869     }
1870
1871   *listp = NULL;
1872 }
1873
1874 /* Clear canon_modify_mem_list and modify_mem_list tables.  */
1875 static void
1876 clear_modify_mem_tables (void)
1877 {
1878   unsigned i;
1879   bitmap_iterator bi;
1880
1881   EXECUTE_IF_SET_IN_BITMAP (modify_mem_list_set, 0, i, bi)
1882     {
1883       free_INSN_LIST_list (modify_mem_list + i);
1884       free_insn_expr_list_list (canon_modify_mem_list + i);
1885     }
1886   bitmap_clear (modify_mem_list_set);
1887   bitmap_clear (blocks_with_calls);
1888 }
1889
1890 /* Release memory used by modify_mem_list_set.  */
1891
1892 static void
1893 free_modify_mem_tables (void)
1894 {
1895   clear_modify_mem_tables ();
1896   free (modify_mem_list);
1897   free (canon_modify_mem_list);
1898   modify_mem_list = 0;
1899   canon_modify_mem_list = 0;
1900 }
1901
1902 /* Reset tables used to keep track of what's still available [since the
1903    start of the block].  */
1904
1905 static void
1906 reset_opr_set_tables (void)
1907 {
1908   /* Maintain a bitmap of which regs have been set since beginning of
1909      the block.  */
1910   CLEAR_REG_SET (reg_set_bitmap);
1911
1912   /* Also keep a record of the last instruction to modify memory.
1913      For now this is very trivial, we only record whether any memory
1914      location has been modified.  */
1915   clear_modify_mem_tables ();
1916 }
1917
1918 /* Return nonzero if the operands of X are not set before INSN in
1919    INSN's basic block.  */
1920
1921 static int
1922 oprs_not_set_p (const_rtx x, const_rtx insn)
1923 {
1924   int i, j;
1925   enum rtx_code code;
1926   const char *fmt;
1927
1928   if (x == 0)
1929     return 1;
1930
1931   code = GET_CODE (x);
1932   switch (code)
1933     {
1934     case PC:
1935     case CC0:
1936     case CONST:
1937     case CONST_INT:
1938     case CONST_DOUBLE:
1939     case CONST_FIXED:
1940     case CONST_VECTOR:
1941     case SYMBOL_REF:
1942     case LABEL_REF:
1943     case ADDR_VEC:
1944     case ADDR_DIFF_VEC:
1945       return 1;
1946
1947     case MEM:
1948       if (load_killed_in_block_p (BLOCK_FOR_INSN (insn),
1949                                   DF_INSN_LUID (insn), x, 0))
1950         return 0;
1951       else
1952         return oprs_not_set_p (XEXP (x, 0), insn);
1953
1954     case REG:
1955       return ! REGNO_REG_SET_P (reg_set_bitmap, REGNO (x));
1956
1957     default:
1958       break;
1959     }
1960
1961   for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
1962     {
1963       if (fmt[i] == 'e')
1964         {
1965           /* If we are about to do the last recursive call
1966              needed at this level, change it into iteration.
1967              This function is called enough to be worth it.  */
1968           if (i == 0)
1969             return oprs_not_set_p (XEXP (x, i), insn);
1970
1971           if (! oprs_not_set_p (XEXP (x, i), insn))
1972             return 0;
1973         }
1974       else if (fmt[i] == 'E')
1975         for (j = 0; j < XVECLEN (x, i); j++)
1976           if (! oprs_not_set_p (XVECEXP (x, i, j), insn))
1977             return 0;
1978     }
1979
1980   return 1;
1981 }
1982
1983 /* Mark things set by a CALL.  */
1984
1985 static void
1986 mark_call (rtx insn)
1987 {
1988   if (! RTL_CONST_OR_PURE_CALL_P (insn))
1989     record_last_mem_set_info (insn);
1990 }
1991
1992 /* Mark things set by a SET.  */
1993
1994 static void
1995 mark_set (rtx pat, rtx insn)
1996 {
1997   rtx dest = SET_DEST (pat);
1998
1999   while (GET_CODE (dest) == SUBREG
2000          || GET_CODE (dest) == ZERO_EXTRACT
2001          || GET_CODE (dest) == STRICT_LOW_PART)
2002     dest = XEXP (dest, 0);
2003
2004   if (REG_P (dest))
2005     SET_REGNO_REG_SET (reg_set_bitmap, REGNO (dest));
2006   else if (MEM_P (dest))
2007     record_last_mem_set_info (insn);
2008
2009   if (GET_CODE (SET_SRC (pat)) == CALL)
2010     mark_call (insn);
2011 }
2012
2013 /* Record things set by a CLOBBER.  */
2014
2015 static void
2016 mark_clobber (rtx pat, rtx insn)
2017 {
2018   rtx clob = XEXP (pat, 0);
2019
2020   while (GET_CODE (clob) == SUBREG || GET_CODE (clob) == STRICT_LOW_PART)
2021     clob = XEXP (clob, 0);
2022
2023   if (REG_P (clob))
2024     SET_REGNO_REG_SET (reg_set_bitmap, REGNO (clob));
2025   else
2026     record_last_mem_set_info (insn);
2027 }
2028
2029 /* Record things set by INSN.
2030    This data is used by oprs_not_set_p.  */
2031
2032 static void
2033 mark_oprs_set (rtx insn)
2034 {
2035   rtx pat = PATTERN (insn);
2036   int i;
2037
2038   if (GET_CODE (pat) == SET)
2039     mark_set (pat, insn);
2040   else if (GET_CODE (pat) == PARALLEL)
2041     for (i = 0; i < XVECLEN (pat, 0); i++)
2042       {
2043         rtx x = XVECEXP (pat, 0, i);
2044
2045         if (GET_CODE (x) == SET)
2046           mark_set (x, insn);
2047         else if (GET_CODE (x) == CLOBBER)
2048           mark_clobber (x, insn);
2049         else if (GET_CODE (x) == CALL)
2050           mark_call (insn);
2051       }
2052
2053   else if (GET_CODE (pat) == CLOBBER)
2054     mark_clobber (pat, insn);
2055   else if (GET_CODE (pat) == CALL)
2056     mark_call (insn);
2057 }
2058
2059 \f
2060 /* Compute copy/constant propagation working variables.  */
2061
2062 /* Local properties of assignments.  */
2063 static sbitmap *cprop_pavloc;
2064 static sbitmap *cprop_absaltered;
2065
2066 /* Global properties of assignments (computed from the local properties).  */
2067 static sbitmap *cprop_avin;
2068 static sbitmap *cprop_avout;
2069
2070 /* Allocate vars used for copy/const propagation.  N_BLOCKS is the number of
2071    basic blocks.  N_SETS is the number of sets.  */
2072
2073 static void
2074 alloc_cprop_mem (int n_blocks, int n_sets)
2075 {
2076   cprop_pavloc = sbitmap_vector_alloc (n_blocks, n_sets);
2077   cprop_absaltered = sbitmap_vector_alloc (n_blocks, n_sets);
2078
2079   cprop_avin = sbitmap_vector_alloc (n_blocks, n_sets);
2080   cprop_avout = sbitmap_vector_alloc (n_blocks, n_sets);
2081 }
2082
2083 /* Free vars used by copy/const propagation.  */
2084
2085 static void
2086 free_cprop_mem (void)
2087 {
2088   sbitmap_vector_free (cprop_pavloc);
2089   sbitmap_vector_free (cprop_absaltered);
2090   sbitmap_vector_free (cprop_avin);
2091   sbitmap_vector_free (cprop_avout);
2092 }
2093
2094 /* For each block, compute whether X is transparent.  X is either an
2095    expression or an assignment [though we don't care which, for this context
2096    an assignment is treated as an expression].  For each block where an
2097    element of X is modified, set (SET_P == 1) or reset (SET_P == 0) the INDX
2098    bit in BMAP.  */
2099
2100 static void
2101 compute_transp (const_rtx x, int indx, sbitmap *bmap, int set_p)
2102 {
2103   int i, j;
2104   enum rtx_code code;
2105   const char *fmt;
2106
2107   /* repeat is used to turn tail-recursion into iteration since GCC
2108      can't do it when there's no return value.  */
2109  repeat:
2110
2111   if (x == 0)
2112     return;
2113
2114   code = GET_CODE (x);
2115   switch (code)
2116     {
2117     case REG:
2118       if (set_p)
2119         {
2120           df_ref def;
2121           for (def = DF_REG_DEF_CHAIN (REGNO (x));
2122                def;
2123                def = DF_REF_NEXT_REG (def))
2124             SET_BIT (bmap[DF_REF_BB (def)->index], indx);
2125         }
2126       else
2127         {
2128           df_ref def;
2129           for (def = DF_REG_DEF_CHAIN (REGNO (x));
2130                def;
2131                def = DF_REF_NEXT_REG (def))
2132             RESET_BIT (bmap[DF_REF_BB (def)->index], indx);
2133         }
2134
2135       return;
2136
2137     case MEM:
2138       if (! MEM_READONLY_P (x))
2139         {
2140           bitmap_iterator bi;
2141           unsigned bb_index;
2142
2143           /* First handle all the blocks with calls.  We don't need to
2144              do any list walking for them.  */
2145           EXECUTE_IF_SET_IN_BITMAP (blocks_with_calls, 0, bb_index, bi)
2146             {
2147               if (set_p)
2148                 SET_BIT (bmap[bb_index], indx);
2149               else
2150                 RESET_BIT (bmap[bb_index], indx);
2151             }
2152
2153             /* Now iterate over the blocks which have memory modifications
2154                but which do not have any calls.  */
2155             EXECUTE_IF_AND_COMPL_IN_BITMAP (modify_mem_list_set,
2156                                             blocks_with_calls,
2157                                             0, bb_index, bi)
2158               {
2159                 rtx list_entry = canon_modify_mem_list[bb_index];
2160
2161                 while (list_entry)
2162                   {
2163                     rtx dest, dest_addr;
2164
2165                     /* LIST_ENTRY must be an INSN of some kind that sets memory.
2166                        Examine each hunk of memory that is modified.  */
2167
2168                     dest = XEXP (list_entry, 0);
2169                     list_entry = XEXP (list_entry, 1);
2170                     dest_addr = XEXP (list_entry, 0);
2171
2172                     if (canon_true_dependence (dest, GET_MODE (dest), dest_addr,
2173                                                x, NULL_RTX, rtx_addr_varies_p))
2174                       {
2175                         if (set_p)
2176                           SET_BIT (bmap[bb_index], indx);
2177                         else
2178                           RESET_BIT (bmap[bb_index], indx);
2179                         break;
2180                       }
2181                     list_entry = XEXP (list_entry, 1);
2182                   }
2183               }
2184         }
2185
2186       x = XEXP (x, 0);
2187       goto repeat;
2188
2189     case PC:
2190     case CC0: /*FIXME*/
2191     case CONST:
2192     case CONST_INT:
2193     case CONST_DOUBLE:
2194     case CONST_FIXED:
2195     case CONST_VECTOR:
2196     case SYMBOL_REF:
2197     case LABEL_REF:
2198     case ADDR_VEC:
2199     case ADDR_DIFF_VEC:
2200       return;
2201
2202     default:
2203       break;
2204     }
2205
2206   for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
2207     {
2208       if (fmt[i] == 'e')
2209         {
2210           /* If we are about to do the last recursive call
2211              needed at this level, change it into iteration.
2212              This function is called enough to be worth it.  */
2213           if (i == 0)
2214             {
2215               x = XEXP (x, i);
2216               goto repeat;
2217             }
2218
2219           compute_transp (XEXP (x, i), indx, bmap, set_p);
2220         }
2221       else if (fmt[i] == 'E')
2222         for (j = 0; j < XVECLEN (x, i); j++)
2223           compute_transp (XVECEXP (x, i, j), indx, bmap, set_p);
2224     }
2225 }
2226
2227 /* Top level routine to do the dataflow analysis needed by copy/const
2228    propagation.  */
2229
2230 static void
2231 compute_cprop_data (void)
2232 {
2233   compute_local_properties (cprop_absaltered, cprop_pavloc, NULL, &set_hash_table);
2234   compute_available (cprop_pavloc, cprop_absaltered,
2235                      cprop_avout, cprop_avin);
2236 }
2237 \f
2238 /* Copy/constant propagation.  */
2239
2240 /* Maximum number of register uses in an insn that we handle.  */
2241 #define MAX_USES 8
2242
2243 /* Table of uses found in an insn.
2244    Allocated statically to avoid alloc/free complexity and overhead.  */
2245 static struct reg_use reg_use_table[MAX_USES];
2246
2247 /* Index into `reg_use_table' while building it.  */
2248 static int reg_use_count;
2249
2250 /* Set up a list of register numbers used in INSN.  The found uses are stored
2251    in `reg_use_table'.  `reg_use_count' is initialized to zero before entry,
2252    and contains the number of uses in the table upon exit.
2253
2254    ??? If a register appears multiple times we will record it multiple times.
2255    This doesn't hurt anything but it will slow things down.  */
2256
2257 static void
2258 find_used_regs (rtx *xptr, void *data ATTRIBUTE_UNUSED)
2259 {
2260   int i, j;
2261   enum rtx_code code;
2262   const char *fmt;
2263   rtx x = *xptr;
2264
2265   /* repeat is used to turn tail-recursion into iteration since GCC
2266      can't do it when there's no return value.  */
2267  repeat:
2268   if (x == 0)
2269     return;
2270
2271   code = GET_CODE (x);
2272   if (REG_P (x))
2273     {
2274       if (reg_use_count == MAX_USES)
2275         return;
2276
2277       reg_use_table[reg_use_count].reg_rtx = x;
2278       reg_use_count++;
2279     }
2280
2281   /* Recursively scan the operands of this expression.  */
2282
2283   for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
2284     {
2285       if (fmt[i] == 'e')
2286         {
2287           /* If we are about to do the last recursive call
2288              needed at this level, change it into iteration.
2289              This function is called enough to be worth it.  */
2290           if (i == 0)
2291             {
2292               x = XEXP (x, 0);
2293               goto repeat;
2294             }
2295
2296           find_used_regs (&XEXP (x, i), data);
2297         }
2298       else if (fmt[i] == 'E')
2299         for (j = 0; j < XVECLEN (x, i); j++)
2300           find_used_regs (&XVECEXP (x, i, j), data);
2301     }
2302 }
2303
2304 /* Try to replace all non-SET_DEST occurrences of FROM in INSN with TO.
2305    Returns nonzero is successful.  */
2306
2307 static int
2308 try_replace_reg (rtx from, rtx to, rtx insn)
2309 {
2310   rtx note = find_reg_equal_equiv_note (insn);
2311   rtx src = 0;
2312   int success = 0;
2313   rtx set = single_set (insn);
2314
2315   /* Usually we substitute easy stuff, so we won't copy everything.
2316      We however need to take care to not duplicate non-trivial CONST
2317      expressions.  */
2318   to = copy_rtx (to);
2319
2320   validate_replace_src_group (from, to, insn);
2321   if (num_changes_pending () && apply_change_group ())
2322     success = 1;
2323
2324   /* Try to simplify SET_SRC if we have substituted a constant.  */
2325   if (success && set && CONSTANT_P (to))
2326     {
2327       src = simplify_rtx (SET_SRC (set));
2328
2329       if (src)
2330         validate_change (insn, &SET_SRC (set), src, 0);
2331     }
2332
2333   /* If there is already a REG_EQUAL note, update the expression in it
2334      with our replacement.  */
2335   if (note != 0 && REG_NOTE_KIND (note) == REG_EQUAL)
2336     set_unique_reg_note (insn, REG_EQUAL,
2337                          simplify_replace_rtx (XEXP (note, 0), from, to));
2338   if (!success && set && reg_mentioned_p (from, SET_SRC (set)))
2339     {
2340       /* If above failed and this is a single set, try to simplify the source of
2341          the set given our substitution.  We could perhaps try this for multiple
2342          SETs, but it probably won't buy us anything.  */
2343       src = simplify_replace_rtx (SET_SRC (set), from, to);
2344
2345       if (!rtx_equal_p (src, SET_SRC (set))
2346           && validate_change (insn, &SET_SRC (set), src, 0))
2347         success = 1;
2348
2349       /* If we've failed to do replacement, have a single SET, don't already
2350          have a note, and have no special SET, add a REG_EQUAL note to not
2351          lose information.  */
2352       if (!success && note == 0 && set != 0
2353           && GET_CODE (SET_DEST (set)) != ZERO_EXTRACT
2354           && GET_CODE (SET_DEST (set)) != STRICT_LOW_PART)
2355         note = set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
2356     }
2357
2358   /* REG_EQUAL may get simplified into register.
2359      We don't allow that. Remove that note. This code ought
2360      not to happen, because previous code ought to synthesize
2361      reg-reg move, but be on the safe side.  */
2362   if (note && REG_NOTE_KIND (note) == REG_EQUAL && REG_P (XEXP (note, 0)))
2363     remove_note (insn, note);
2364
2365   return success;
2366 }
2367
2368 /* Find a set of REGNOs that are available on entry to INSN's block.  Returns
2369    NULL no such set is found.  */
2370
2371 static struct expr *
2372 find_avail_set (int regno, rtx insn)
2373 {
2374   /* SET1 contains the last set found that can be returned to the caller for
2375      use in a substitution.  */
2376   struct expr *set1 = 0;
2377
2378   /* Loops are not possible here.  To get a loop we would need two sets
2379      available at the start of the block containing INSN.  i.e. we would
2380      need two sets like this available at the start of the block:
2381
2382        (set (reg X) (reg Y))
2383        (set (reg Y) (reg X))
2384
2385      This can not happen since the set of (reg Y) would have killed the
2386      set of (reg X) making it unavailable at the start of this block.  */
2387   while (1)
2388     {
2389       rtx src;
2390       struct expr *set = lookup_set (regno, &set_hash_table);
2391
2392       /* Find a set that is available at the start of the block
2393          which contains INSN.  */
2394       while (set)
2395         {
2396           if (TEST_BIT (cprop_avin[BLOCK_FOR_INSN (insn)->index],
2397                         set->bitmap_index))
2398             break;
2399           set = next_set (regno, set);
2400         }
2401
2402       /* If no available set was found we've reached the end of the
2403          (possibly empty) copy chain.  */
2404       if (set == 0)
2405         break;
2406
2407       gcc_assert (GET_CODE (set->expr) == SET);
2408
2409       src = SET_SRC (set->expr);
2410
2411       /* We know the set is available.
2412          Now check that SRC is ANTLOC (i.e. none of the source operands
2413          have changed since the start of the block).
2414
2415          If the source operand changed, we may still use it for the next
2416          iteration of this loop, but we may not use it for substitutions.  */
2417
2418       if (gcse_constant_p (src) || oprs_not_set_p (src, insn))
2419         set1 = set;
2420
2421       /* If the source of the set is anything except a register, then
2422          we have reached the end of the copy chain.  */
2423       if (! REG_P (src))
2424         break;
2425
2426       /* Follow the copy chain, i.e. start another iteration of the loop
2427          and see if we have an available copy into SRC.  */
2428       regno = REGNO (src);
2429     }
2430
2431   /* SET1 holds the last set that was available and anticipatable at
2432      INSN.  */
2433   return set1;
2434 }
2435
2436 /* Subroutine of cprop_insn that tries to propagate constants into
2437    JUMP_INSNS.  JUMP must be a conditional jump.  If SETCC is non-NULL
2438    it is the instruction that immediately precedes JUMP, and must be a
2439    single SET of a register.  FROM is what we will try to replace,
2440    SRC is the constant we will try to substitute for it.  Returns nonzero
2441    if a change was made.  */
2442
2443 static int
2444 cprop_jump (basic_block bb, rtx setcc, rtx jump, rtx from, rtx src)
2445 {
2446   rtx new_rtx, set_src, note_src;
2447   rtx set = pc_set (jump);
2448   rtx note = find_reg_equal_equiv_note (jump);
2449
2450   if (note)
2451     {
2452       note_src = XEXP (note, 0);
2453       if (GET_CODE (note_src) == EXPR_LIST)
2454         note_src = NULL_RTX;
2455     }
2456   else note_src = NULL_RTX;
2457
2458   /* Prefer REG_EQUAL notes except those containing EXPR_LISTs.  */
2459   set_src = note_src ? note_src : SET_SRC (set);
2460
2461   /* First substitute the SETCC condition into the JUMP instruction,
2462      then substitute that given values into this expanded JUMP.  */
2463   if (setcc != NULL_RTX
2464       && !modified_between_p (from, setcc, jump)
2465       && !modified_between_p (src, setcc, jump))
2466     {
2467       rtx setcc_src;
2468       rtx setcc_set = single_set (setcc);
2469       rtx setcc_note = find_reg_equal_equiv_note (setcc);
2470       setcc_src = (setcc_note && GET_CODE (XEXP (setcc_note, 0)) != EXPR_LIST)
2471                 ? XEXP (setcc_note, 0) : SET_SRC (setcc_set);
2472       set_src = simplify_replace_rtx (set_src, SET_DEST (setcc_set),
2473                                       setcc_src);
2474     }
2475   else
2476     setcc = NULL_RTX;
2477
2478   new_rtx = simplify_replace_rtx (set_src, from, src);
2479
2480   /* If no simplification can be made, then try the next register.  */
2481   if (rtx_equal_p (new_rtx, SET_SRC (set)))
2482     return 0;
2483
2484   /* If this is now a no-op delete it, otherwise this must be a valid insn.  */
2485   if (new_rtx == pc_rtx)
2486     delete_insn (jump);
2487   else
2488     {
2489       /* Ensure the value computed inside the jump insn to be equivalent
2490          to one computed by setcc.  */
2491       if (setcc && modified_in_p (new_rtx, setcc))
2492         return 0;
2493       if (! validate_unshare_change (jump, &SET_SRC (set), new_rtx, 0))
2494         {
2495           /* When (some) constants are not valid in a comparison, and there
2496              are two registers to be replaced by constants before the entire
2497              comparison can be folded into a constant, we need to keep
2498              intermediate information in REG_EQUAL notes.  For targets with
2499              separate compare insns, such notes are added by try_replace_reg.
2500              When we have a combined compare-and-branch instruction, however,
2501              we need to attach a note to the branch itself to make this
2502              optimization work.  */
2503
2504           if (!rtx_equal_p (new_rtx, note_src))
2505             set_unique_reg_note (jump, REG_EQUAL, copy_rtx (new_rtx));
2506           return 0;
2507         }
2508
2509       /* Remove REG_EQUAL note after simplification.  */
2510       if (note_src)
2511         remove_note (jump, note);
2512      }
2513
2514 #ifdef HAVE_cc0
2515   /* Delete the cc0 setter.  */
2516   if (setcc != NULL && CC0_P (SET_DEST (single_set (setcc))))
2517     delete_insn (setcc);
2518 #endif
2519
2520   global_const_prop_count++;
2521   if (dump_file != NULL)
2522     {
2523       fprintf (dump_file,
2524                "GLOBAL CONST-PROP: Replacing reg %d in jump_insn %d with constant ",
2525                REGNO (from), INSN_UID (jump));
2526       print_rtl (dump_file, src);
2527       fprintf (dump_file, "\n");
2528     }
2529   purge_dead_edges (bb);
2530
2531   /* If a conditional jump has been changed into unconditional jump, remove
2532      the jump and make the edge fallthru - this is always called in
2533      cfglayout mode.  */
2534   if (new_rtx != pc_rtx && simplejump_p (jump))
2535     {
2536       edge e;
2537       edge_iterator ei;
2538
2539       for (ei = ei_start (bb->succs); (e = ei_safe_edge (ei)); ei_next (&ei))
2540         if (e->dest != EXIT_BLOCK_PTR
2541             && BB_HEAD (e->dest) == JUMP_LABEL (jump))
2542           {
2543             e->flags |= EDGE_FALLTHRU;
2544             break;
2545           }
2546       delete_insn (jump);
2547     }
2548
2549   return 1;
2550 }
2551
2552 static bool
2553 constprop_register (rtx insn, rtx from, rtx to)
2554 {
2555   rtx sset;
2556
2557   /* Check for reg or cc0 setting instructions followed by
2558      conditional branch instructions first.  */
2559   if ((sset = single_set (insn)) != NULL
2560       && NEXT_INSN (insn)
2561       && any_condjump_p (NEXT_INSN (insn)) && onlyjump_p (NEXT_INSN (insn)))
2562     {
2563       rtx dest = SET_DEST (sset);
2564       if ((REG_P (dest) || CC0_P (dest))
2565           && cprop_jump (BLOCK_FOR_INSN (insn), insn, NEXT_INSN (insn), from, to))
2566         return 1;
2567     }
2568
2569   /* Handle normal insns next.  */
2570   if (NONJUMP_INSN_P (insn)
2571       && try_replace_reg (from, to, insn))
2572     return 1;
2573
2574   /* Try to propagate a CONST_INT into a conditional jump.
2575      We're pretty specific about what we will handle in this
2576      code, we can extend this as necessary over time.
2577
2578      Right now the insn in question must look like
2579      (set (pc) (if_then_else ...))  */
2580   else if (any_condjump_p (insn) && onlyjump_p (insn))
2581     return cprop_jump (BLOCK_FOR_INSN (insn), NULL, insn, from, to);
2582   return 0;
2583 }
2584
2585 /* Perform constant and copy propagation on INSN.
2586    The result is nonzero if a change was made.  */
2587
2588 static int
2589 cprop_insn (rtx insn)
2590 {
2591   struct reg_use *reg_used;
2592   int changed = 0;
2593   rtx note;
2594
2595   if (!INSN_P (insn))
2596     return 0;
2597
2598   reg_use_count = 0;
2599   note_uses (&PATTERN (insn), find_used_regs, NULL);
2600
2601   note = find_reg_equal_equiv_note (insn);
2602
2603   /* We may win even when propagating constants into notes.  */
2604   if (note)
2605     find_used_regs (&XEXP (note, 0), NULL);
2606
2607   for (reg_used = &reg_use_table[0]; reg_use_count > 0;
2608        reg_used++, reg_use_count--)
2609     {
2610       unsigned int regno = REGNO (reg_used->reg_rtx);
2611       rtx pat, src;
2612       struct expr *set;
2613
2614       /* If the register has already been set in this block, there's
2615          nothing we can do.  */
2616       if (! oprs_not_set_p (reg_used->reg_rtx, insn))
2617         continue;
2618
2619       /* Find an assignment that sets reg_used and is available
2620          at the start of the block.  */
2621       set = find_avail_set (regno, insn);
2622       if (! set)
2623         continue;
2624
2625       pat = set->expr;
2626       /* ??? We might be able to handle PARALLELs.  Later.  */
2627       gcc_assert (GET_CODE (pat) == SET);
2628
2629       src = SET_SRC (pat);
2630
2631       /* Constant propagation.  */
2632       if (gcse_constant_p (src))
2633         {
2634           if (constprop_register (insn, reg_used->reg_rtx, src))
2635             {
2636               changed = 1;
2637               global_const_prop_count++;
2638               if (dump_file != NULL)
2639                 {
2640                   fprintf (dump_file, "GLOBAL CONST-PROP: Replacing reg %d in ", regno);
2641                   fprintf (dump_file, "insn %d with constant ", INSN_UID (insn));
2642                   print_rtl (dump_file, src);
2643                   fprintf (dump_file, "\n");
2644                 }
2645               if (INSN_DELETED_P (insn))
2646                 return 1;
2647             }
2648         }
2649       else if (REG_P (src)
2650                && REGNO (src) >= FIRST_PSEUDO_REGISTER
2651                && REGNO (src) != regno)
2652         {
2653           if (try_replace_reg (reg_used->reg_rtx, src, insn))
2654             {
2655               changed = 1;
2656               global_copy_prop_count++;
2657               if (dump_file != NULL)
2658                 {
2659                   fprintf (dump_file, "GLOBAL COPY-PROP: Replacing reg %d in insn %d",
2660                            regno, INSN_UID (insn));
2661                   fprintf (dump_file, " with reg %d\n", REGNO (src));
2662                 }
2663
2664               /* The original insn setting reg_used may or may not now be
2665                  deletable.  We leave the deletion to flow.  */
2666               /* FIXME: If it turns out that the insn isn't deletable,
2667                  then we may have unnecessarily extended register lifetimes
2668                  and made things worse.  */
2669             }
2670         }
2671     }
2672
2673   if (changed && DEBUG_INSN_P (insn))
2674     return 0;
2675
2676   return changed;
2677 }
2678
2679 /* Like find_used_regs, but avoid recording uses that appear in
2680    input-output contexts such as zero_extract or pre_dec.  This
2681    restricts the cases we consider to those for which local cprop
2682    can legitimately make replacements.  */
2683
2684 static void
2685 local_cprop_find_used_regs (rtx *xptr, void *data)
2686 {
2687   rtx x = *xptr;
2688
2689   if (x == 0)
2690     return;
2691
2692   switch (GET_CODE (x))
2693     {
2694     case ZERO_EXTRACT:
2695     case SIGN_EXTRACT:
2696     case STRICT_LOW_PART:
2697       return;
2698
2699     case PRE_DEC:
2700     case PRE_INC:
2701     case POST_DEC:
2702     case POST_INC:
2703     case PRE_MODIFY:
2704     case POST_MODIFY:
2705       /* Can only legitimately appear this early in the context of
2706          stack pushes for function arguments, but handle all of the
2707          codes nonetheless.  */
2708       return;
2709
2710     case SUBREG:
2711       /* Setting a subreg of a register larger than word_mode leaves
2712          the non-written words unchanged.  */
2713       if (GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (x))) > BITS_PER_WORD)
2714         return;
2715       break;
2716
2717     default:
2718       break;
2719     }
2720
2721   find_used_regs (xptr, data);
2722 }
2723
2724 /* Try to perform local const/copy propagation on X in INSN.  */
2725
2726 static bool
2727 do_local_cprop (rtx x, rtx insn)
2728 {
2729   rtx newreg = NULL, newcnst = NULL;
2730
2731   /* Rule out USE instructions and ASM statements as we don't want to
2732      change the hard registers mentioned.  */
2733   if (REG_P (x)
2734       && (REGNO (x) >= FIRST_PSEUDO_REGISTER
2735           || (GET_CODE (PATTERN (insn)) != USE
2736               && asm_noperands (PATTERN (insn)) < 0)))
2737     {
2738       cselib_val *val = cselib_lookup (x, GET_MODE (x), 0);
2739       struct elt_loc_list *l;
2740
2741       if (!val)
2742         return false;
2743       for (l = val->locs; l; l = l->next)
2744         {
2745           rtx this_rtx = l->loc;
2746           rtx note;
2747
2748           if (gcse_constant_p (this_rtx))
2749             newcnst = this_rtx;
2750           if (REG_P (this_rtx) && REGNO (this_rtx) >= FIRST_PSEUDO_REGISTER
2751               /* Don't copy propagate if it has attached REG_EQUIV note.
2752                  At this point this only function parameters should have
2753                  REG_EQUIV notes and if the argument slot is used somewhere
2754                  explicitly, it means address of parameter has been taken,
2755                  so we should not extend the lifetime of the pseudo.  */
2756               && (!(note = find_reg_note (l->setting_insn, REG_EQUIV, NULL_RTX))
2757                   || ! MEM_P (XEXP (note, 0))))
2758             newreg = this_rtx;
2759         }
2760       if (newcnst && constprop_register (insn, x, newcnst))
2761         {
2762           if (dump_file != NULL)
2763             {
2764               fprintf (dump_file, "LOCAL CONST-PROP: Replacing reg %d in ",
2765                        REGNO (x));
2766               fprintf (dump_file, "insn %d with constant ",
2767                        INSN_UID (insn));
2768               print_rtl (dump_file, newcnst);
2769               fprintf (dump_file, "\n");
2770             }
2771           local_const_prop_count++;
2772           return true;
2773         }
2774       else if (newreg && newreg != x && try_replace_reg (x, newreg, insn))
2775         {
2776           if (dump_file != NULL)
2777             {
2778               fprintf (dump_file,
2779                        "LOCAL COPY-PROP: Replacing reg %d in insn %d",
2780                        REGNO (x), INSN_UID (insn));
2781               fprintf (dump_file, " with reg %d\n", REGNO (newreg));
2782             }
2783           local_copy_prop_count++;
2784           return true;
2785         }
2786     }
2787   return false;
2788 }
2789
2790 /* Do local const/copy propagation (i.e. within each basic block).  */
2791
2792 static int
2793 local_cprop_pass (void)
2794 {
2795   basic_block bb;
2796   rtx insn;
2797   struct reg_use *reg_used;
2798   bool changed = false;
2799
2800   cselib_init (0);
2801   FOR_EACH_BB (bb)
2802     {
2803       FOR_BB_INSNS (bb, insn)
2804         {
2805           if (INSN_P (insn))
2806             {
2807               rtx note = find_reg_equal_equiv_note (insn);
2808               do
2809                 {
2810                   reg_use_count = 0;
2811                   note_uses (&PATTERN (insn), local_cprop_find_used_regs,
2812                              NULL);
2813                   if (note)
2814                     local_cprop_find_used_regs (&XEXP (note, 0), NULL);
2815
2816                   for (reg_used = &reg_use_table[0]; reg_use_count > 0;
2817                        reg_used++, reg_use_count--)
2818                     {
2819                       if (do_local_cprop (reg_used->reg_rtx, insn))
2820                         {
2821                           changed = true;
2822                           break;
2823                         }
2824                     }
2825                   if (INSN_DELETED_P (insn))
2826                     break;
2827                 }
2828               while (reg_use_count);
2829             }
2830           cselib_process_insn (insn);
2831         }
2832
2833       /* Forget everything at the end of a basic block.  */
2834       cselib_clear_table ();
2835     }
2836
2837   cselib_finish ();
2838
2839   return changed;
2840 }
2841
2842 /* Similar to get_condition, only the resulting condition must be
2843    valid at JUMP, instead of at EARLIEST.
2844
2845    This differs from noce_get_condition in ifcvt.c in that we prefer not to
2846    settle for the condition variable in the jump instruction being integral.
2847    We prefer to be able to record the value of a user variable, rather than
2848    the value of a temporary used in a condition.  This could be solved by
2849    recording the value of *every* register scanned by canonicalize_condition,
2850    but this would require some code reorganization.  */
2851
2852 rtx
2853 fis_get_condition (rtx jump)
2854 {
2855   return get_condition (jump, NULL, false, true);
2856 }
2857
2858 /* Check the comparison COND to see if we can safely form an implicit set from
2859    it.  COND is either an EQ or NE comparison.  */
2860
2861 static bool
2862 implicit_set_cond_p (const_rtx cond)
2863 {
2864   const enum machine_mode mode = GET_MODE (XEXP (cond, 0));
2865   const_rtx cst = XEXP (cond, 1);
2866
2867   /* We can't perform this optimization if either operand might be or might
2868      contain a signed zero.  */
2869   if (HONOR_SIGNED_ZEROS (mode))
2870     {
2871       /* It is sufficient to check if CST is or contains a zero.  We must
2872          handle float, complex, and vector.  If any subpart is a zero, then
2873          the optimization can't be performed.  */
2874       /* ??? The complex and vector checks are not implemented yet.  We just
2875          always return zero for them.  */
2876       if (GET_CODE (cst) == CONST_DOUBLE)
2877         {
2878           REAL_VALUE_TYPE d;
2879           REAL_VALUE_FROM_CONST_DOUBLE (d, cst);
2880           if (REAL_VALUES_EQUAL (d, dconst0))
2881             return 0;
2882         }
2883       else
2884         return 0;
2885     }
2886
2887   return gcse_constant_p (cst);
2888 }
2889
2890 /* Find the implicit sets of a function.  An "implicit set" is a constraint
2891    on the value of a variable, implied by a conditional jump.  For example,
2892    following "if (x == 2)", the then branch may be optimized as though the
2893    conditional performed an "explicit set", in this example, "x = 2".  This
2894    function records the set patterns that are implicit at the start of each
2895    basic block.
2896
2897    FIXME: This would be more effective if critical edges are pre-split.  As
2898           it is now, we can't record implicit sets for blocks that have
2899           critical successor edges.  This results in missed optimizations
2900           and in more (unnecessary) work in cfgcleanup.c:thread_jump().  */
2901
2902 static void
2903 find_implicit_sets (void)
2904 {
2905   basic_block bb, dest;
2906   unsigned int count;
2907   rtx cond, new_rtx;
2908
2909   count = 0;
2910   FOR_EACH_BB (bb)
2911     /* Check for more than one successor.  */
2912     if (EDGE_COUNT (bb->succs) > 1)
2913       {
2914         cond = fis_get_condition (BB_END (bb));
2915
2916         if (cond
2917             && (GET_CODE (cond) == EQ || GET_CODE (cond) == NE)
2918             && REG_P (XEXP (cond, 0))
2919             && REGNO (XEXP (cond, 0)) >= FIRST_PSEUDO_REGISTER
2920             && implicit_set_cond_p (cond))
2921           {
2922             dest = GET_CODE (cond) == EQ ? BRANCH_EDGE (bb)->dest
2923                                          : FALLTHRU_EDGE (bb)->dest;
2924
2925             if (dest
2926                 /* Record nothing for a critical edge.  */
2927                 && single_pred_p (dest)
2928                 && dest != EXIT_BLOCK_PTR)
2929               {
2930                 new_rtx = gen_rtx_SET (VOIDmode, XEXP (cond, 0),
2931                                              XEXP (cond, 1));
2932                 implicit_sets[dest->index] = new_rtx;
2933                 if (dump_file)
2934                   {
2935                     fprintf(dump_file, "Implicit set of reg %d in ",
2936                             REGNO (XEXP (cond, 0)));
2937                     fprintf(dump_file, "basic block %d\n", dest->index);
2938                   }
2939                 count++;
2940               }
2941           }
2942       }
2943
2944   if (dump_file)
2945     fprintf (dump_file, "Found %d implicit sets\n", count);
2946 }
2947
2948 /* Bypass conditional jumps.  */
2949
2950 /* The value of last_basic_block at the beginning of the jump_bypass
2951    pass.  The use of redirect_edge_and_branch_force may introduce new
2952    basic blocks, but the data flow analysis is only valid for basic
2953    block indices less than bypass_last_basic_block.  */
2954
2955 static int bypass_last_basic_block;
2956
2957 /* Find a set of REGNO to a constant that is available at the end of basic
2958    block BB.  Returns NULL if no such set is found.  Based heavily upon
2959    find_avail_set.  */
2960
2961 static struct expr *
2962 find_bypass_set (int regno, int bb)
2963 {
2964   struct expr *result = 0;
2965
2966   for (;;)
2967     {
2968       rtx src;
2969       struct expr *set = lookup_set (regno, &set_hash_table);
2970
2971       while (set)
2972         {
2973           if (TEST_BIT (cprop_avout[bb], set->bitmap_index))
2974             break;
2975           set = next_set (regno, set);
2976         }
2977
2978       if (set == 0)
2979         break;
2980
2981       gcc_assert (GET_CODE (set->expr) == SET);
2982
2983       src = SET_SRC (set->expr);
2984       if (gcse_constant_p (src))
2985         result = set;
2986
2987       if (! REG_P (src))
2988         break;
2989
2990       regno = REGNO (src);
2991     }
2992   return result;
2993 }
2994
2995
2996 /* Subroutine of bypass_block that checks whether a pseudo is killed by
2997    any of the instructions inserted on an edge.  Jump bypassing places
2998    condition code setters on CFG edges using insert_insn_on_edge.  This
2999    function is required to check that our data flow analysis is still
3000    valid prior to commit_edge_insertions.  */
3001
3002 static bool
3003 reg_killed_on_edge (const_rtx reg, const_edge e)
3004 {
3005   rtx insn;
3006
3007   for (insn = e->insns.r; insn; insn = NEXT_INSN (insn))
3008     if (INSN_P (insn) && reg_set_p (reg, insn))
3009       return true;
3010
3011   return false;
3012 }
3013
3014 /* Subroutine of bypass_conditional_jumps that attempts to bypass the given
3015    basic block BB which has more than one predecessor.  If not NULL, SETCC
3016    is the first instruction of BB, which is immediately followed by JUMP_INSN
3017    JUMP.  Otherwise, SETCC is NULL, and JUMP is the first insn of BB.
3018    Returns nonzero if a change was made.
3019
3020    During the jump bypassing pass, we may place copies of SETCC instructions
3021    on CFG edges.  The following routine must be careful to pay attention to
3022    these inserted insns when performing its transformations.  */
3023
3024 static int
3025 bypass_block (basic_block bb, rtx setcc, rtx jump)
3026 {
3027   rtx insn, note;
3028   edge e, edest;
3029   int i, change;
3030   int may_be_loop_header;
3031   unsigned removed_p;
3032   edge_iterator ei;
3033
3034   insn = (setcc != NULL) ? setcc : jump;
3035
3036   /* Determine set of register uses in INSN.  */
3037   reg_use_count = 0;
3038   note_uses (&PATTERN (insn), find_used_regs, NULL);
3039   note = find_reg_equal_equiv_note (insn);
3040   if (note)
3041     find_used_regs (&XEXP (note, 0), NULL);
3042
3043   may_be_loop_header = false;
3044   FOR_EACH_EDGE (e, ei, bb->preds)
3045     if (e->flags & EDGE_DFS_BACK)
3046       {
3047         may_be_loop_header = true;
3048         break;
3049       }
3050
3051   change = 0;
3052   for (ei = ei_start (bb->preds); (e = ei_safe_edge (ei)); )
3053     {
3054       removed_p = 0;
3055
3056       if (e->flags & EDGE_COMPLEX)
3057         {
3058           ei_next (&ei);
3059           continue;
3060         }
3061
3062       /* We can't redirect edges from new basic blocks.  */
3063       if (e->src->index >= bypass_last_basic_block)
3064         {
3065           ei_next (&ei);
3066           continue;
3067         }
3068
3069       /* The irreducible loops created by redirecting of edges entering the
3070          loop from outside would decrease effectiveness of some of the following
3071          optimizations, so prevent this.  */
3072       if (may_be_loop_header
3073           && !(e->flags & EDGE_DFS_BACK))
3074         {
3075           ei_next (&ei);
3076           continue;
3077         }
3078
3079       for (i = 0; i < reg_use_count; i++)
3080         {
3081           struct reg_use *reg_used = &reg_use_table[i];
3082           unsigned int regno = REGNO (reg_used->reg_rtx);
3083           basic_block dest, old_dest;
3084           struct expr *set;
3085           rtx src, new_rtx;
3086
3087           set = find_bypass_set (regno, e->src->index);
3088
3089           if (! set)
3090             continue;
3091
3092           /* Check the data flow is valid after edge insertions.  */
3093           if (e->insns.r && reg_killed_on_edge (reg_used->reg_rtx, e))
3094             continue;
3095
3096           src = SET_SRC (pc_set (jump));
3097
3098           if (setcc != NULL)
3099             src = simplify_replace_rtx (src,
3100                                         SET_DEST (PATTERN (setcc)),
3101                                         SET_SRC (PATTERN (setcc)));
3102
3103           new_rtx = simplify_replace_rtx (src, reg_used->reg_rtx,
3104                                           SET_SRC (set->expr));
3105
3106           /* Jump bypassing may have already placed instructions on
3107              edges of the CFG.  We can't bypass an outgoing edge that
3108              has instructions associated with it, as these insns won't
3109              get executed if the incoming edge is redirected.  */
3110
3111           if (new_rtx == pc_rtx)
3112             {
3113               edest = FALLTHRU_EDGE (bb);
3114               dest = edest->insns.r ? NULL : edest->dest;
3115             }
3116           else if (GET_CODE (new_rtx) == LABEL_REF)
3117             {
3118               dest = BLOCK_FOR_INSN (XEXP (new_rtx, 0));
3119               /* Don't bypass edges containing instructions.  */
3120               edest = find_edge (bb, dest);
3121               if (edest && edest->insns.r)
3122                 dest = NULL;
3123             }
3124           else
3125             dest = NULL;
3126
3127           /* Avoid unification of the edge with other edges from original
3128              branch.  We would end up emitting the instruction on "both"
3129              edges.  */
3130
3131           if (dest && setcc && !CC0_P (SET_DEST (PATTERN (setcc)))
3132               && find_edge (e->src, dest))
3133             dest = NULL;
3134
3135           old_dest = e->dest;
3136           if (dest != NULL
3137               && dest != old_dest
3138               && dest != EXIT_BLOCK_PTR)
3139             {
3140               redirect_edge_and_branch_force (e, dest);
3141
3142               /* Copy the register setter to the redirected edge.
3143                  Don't copy CC0 setters, as CC0 is dead after jump.  */
3144               if (setcc)
3145                 {
3146                   rtx pat = PATTERN (setcc);
3147                   if (!CC0_P (SET_DEST (pat)))
3148                     insert_insn_on_edge (copy_insn (pat), e);
3149                 }
3150
3151               if (dump_file != NULL)
3152                 {
3153                   fprintf (dump_file, "JUMP-BYPASS: Proved reg %d "
3154                                       "in jump_insn %d equals constant ",
3155                            regno, INSN_UID (jump));
3156                   print_rtl (dump_file, SET_SRC (set->expr));
3157                   fprintf (dump_file, "\nBypass edge from %d->%d to %d\n",
3158                            e->src->index, old_dest->index, dest->index);
3159                 }
3160               change = 1;
3161               removed_p = 1;
3162               break;
3163             }
3164         }
3165       if (!removed_p)
3166         ei_next (&ei);
3167     }
3168   return change;
3169 }
3170
3171 /* Find basic blocks with more than one predecessor that only contain a
3172    single conditional jump.  If the result of the comparison is known at
3173    compile-time from any incoming edge, redirect that edge to the
3174    appropriate target.  Returns nonzero if a change was made.
3175
3176    This function is now mis-named, because we also handle indirect jumps.  */
3177
3178 static int
3179 bypass_conditional_jumps (void)
3180 {
3181   basic_block bb;
3182   int changed;
3183   rtx setcc;
3184   rtx insn;
3185   rtx dest;
3186
3187   /* Note we start at block 1.  */
3188   if (ENTRY_BLOCK_PTR->next_bb == EXIT_BLOCK_PTR)
3189     return 0;
3190
3191   bypass_last_basic_block = last_basic_block;
3192   mark_dfs_back_edges ();
3193
3194   changed = 0;
3195   FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR->next_bb->next_bb,
3196                   EXIT_BLOCK_PTR, next_bb)
3197     {
3198       /* Check for more than one predecessor.  */
3199       if (!single_pred_p (bb))
3200         {
3201           setcc = NULL_RTX;
3202           FOR_BB_INSNS (bb, insn)
3203             if (DEBUG_INSN_P (insn))
3204               continue;
3205             else if (NONJUMP_INSN_P (insn))
3206               {
3207                 if (setcc)
3208                   break;
3209                 if (GET_CODE (PATTERN (insn)) != SET)
3210                   break;
3211
3212                 dest = SET_DEST (PATTERN (insn));
3213                 if (REG_P (dest) || CC0_P (dest))
3214                   setcc = insn;
3215                 else
3216                   break;
3217               }
3218             else if (JUMP_P (insn))
3219               {
3220                 if ((any_condjump_p (insn) || computed_jump_p (insn))
3221                     && onlyjump_p (insn))
3222                   changed |= bypass_block (bb, setcc, insn);
3223                 break;
3224               }
3225             else if (INSN_P (insn))
3226               break;
3227         }
3228     }
3229
3230   /* If we bypassed any register setting insns, we inserted a
3231      copy on the redirected edge.  These need to be committed.  */
3232   if (changed)
3233     commit_edge_insertions ();
3234
3235   return changed;
3236 }
3237 \f
3238 /* Compute PRE+LCM working variables.  */
3239
3240 /* Local properties of expressions.  */
3241 /* Nonzero for expressions that are transparent in the block.  */
3242 static sbitmap *transp;
3243
3244 /* Nonzero for expressions that are computed (available) in the block.  */
3245 static sbitmap *comp;
3246
3247 /* Nonzero for expressions that are locally anticipatable in the block.  */
3248 static sbitmap *antloc;
3249
3250 /* Nonzero for expressions where this block is an optimal computation
3251    point.  */
3252 static sbitmap *pre_optimal;
3253
3254 /* Nonzero for expressions which are redundant in a particular block.  */
3255 static sbitmap *pre_redundant;
3256
3257 /* Nonzero for expressions which should be inserted on a specific edge.  */
3258 static sbitmap *pre_insert_map;
3259
3260 /* Nonzero for expressions which should be deleted in a specific block.  */
3261 static sbitmap *pre_delete_map;
3262
3263 /* Contains the edge_list returned by pre_edge_lcm.  */
3264 static struct edge_list *edge_list;
3265
3266 /* Allocate vars used for PRE analysis.  */
3267
3268 static void
3269 alloc_pre_mem (int n_blocks, int n_exprs)
3270 {
3271   transp = sbitmap_vector_alloc (n_blocks, n_exprs);
3272   comp = sbitmap_vector_alloc (n_blocks, n_exprs);
3273   antloc = sbitmap_vector_alloc (n_blocks, n_exprs);
3274
3275   pre_optimal = NULL;
3276   pre_redundant = NULL;
3277   pre_insert_map = NULL;
3278   pre_delete_map = NULL;
3279   ae_kill = sbitmap_vector_alloc (n_blocks, n_exprs);
3280
3281   /* pre_insert and pre_delete are allocated later.  */
3282 }
3283
3284 /* Free vars used for PRE analysis.  */
3285
3286 static void
3287 free_pre_mem (void)
3288 {
3289   sbitmap_vector_free (transp);
3290   sbitmap_vector_free (comp);
3291
3292   /* ANTLOC and AE_KILL are freed just after pre_lcm finishes.  */
3293
3294   if (pre_optimal)
3295     sbitmap_vector_free (pre_optimal);
3296   if (pre_redundant)
3297     sbitmap_vector_free (pre_redundant);
3298   if (pre_insert_map)
3299     sbitmap_vector_free (pre_insert_map);
3300   if (pre_delete_map)
3301     sbitmap_vector_free (pre_delete_map);
3302
3303   transp = comp = NULL;
3304   pre_optimal = pre_redundant = pre_insert_map = pre_delete_map = NULL;
3305 }
3306
3307 /* Remove certain expressions from anticipatable and transparent
3308    sets of basic blocks that have incoming abnormal edge.
3309    For PRE remove potentially trapping expressions to avoid placing
3310    them on abnormal edges.  For hoisting remove memory references that
3311    can be clobbered by calls.  */
3312
3313 static void
3314 prune_expressions (bool pre_p)
3315 {
3316   sbitmap prune_exprs;
3317   unsigned int ui;
3318   basic_block bb;
3319
3320   prune_exprs = sbitmap_alloc (expr_hash_table.n_elems);
3321   sbitmap_zero (prune_exprs);
3322   for (ui = 0; ui < expr_hash_table.size; ui++)
3323     {
3324       struct expr *e;
3325       for (e = expr_hash_table.table[ui]; e != NULL; e = e->next_same_hash)
3326         {
3327           /* Note potentially trapping expressions.  */
3328           if (may_trap_p (e->expr))
3329             {
3330               SET_BIT (prune_exprs, e->bitmap_index);
3331               continue;
3332             }
3333
3334           if (!pre_p && MEM_P (e->expr))
3335             /* Note memory references that can be clobbered by a call.
3336                We do not split abnormal edges in hoisting, so would
3337                a memory reference get hoisted along an abnormal edge,
3338                it would be placed /before/ the call.  Therefore, only
3339                constant memory references can be hoisted along abnormal
3340                edges.  */
3341             {
3342               if (GET_CODE (XEXP (e->expr, 0)) == SYMBOL_REF
3343                   && CONSTANT_POOL_ADDRESS_P (XEXP (e->expr, 0)))
3344                 continue;
3345
3346               if (MEM_READONLY_P (e->expr)
3347                   && !MEM_VOLATILE_P (e->expr)
3348                   && MEM_NOTRAP_P (e->expr))
3349                 /* Constant memory reference, e.g., a PIC address.  */
3350                 continue;
3351
3352               /* ??? Optimally, we would use interprocedural alias
3353                  analysis to determine if this mem is actually killed
3354                  by this call.  */
3355
3356               SET_BIT (prune_exprs, e->bitmap_index);
3357             }
3358         }
3359     }
3360
3361   FOR_EACH_BB (bb)
3362     {
3363       edge e;
3364       edge_iterator ei;
3365
3366       /* If the current block is the destination of an abnormal edge, we
3367          kill all trapping (for PRE) and memory (for hoist) expressions
3368          because we won't be able to properly place the instruction on
3369          the edge.  So make them neither anticipatable nor transparent.
3370          This is fairly conservative.
3371
3372          ??? For hoisting it may be necessary to check for set-and-jump
3373          instructions here, not just for abnormal edges.  The general problem
3374          is that when an expression cannot not be placed right at the end of
3375          a basic block we should account for any side-effects of a subsequent
3376          jump instructions that could clobber the expression.  It would
3377          be best to implement this check along the lines of
3378          hoist_expr_reaches_here_p where the target block is already known
3379          and, hence, there's no need to conservatively prune expressions on
3380          "intermediate" set-and-jump instructions.  */
3381       FOR_EACH_EDGE (e, ei, bb->preds)
3382         if ((e->flags & EDGE_ABNORMAL)
3383             && (pre_p || CALL_P (BB_END (e->src))))
3384           {
3385             sbitmap_difference (antloc[bb->index],
3386                                 antloc[bb->index], prune_exprs);
3387             sbitmap_difference (transp[bb->index],
3388                                 transp[bb->index], prune_exprs);
3389             break;
3390           }
3391     }
3392
3393   sbitmap_free (prune_exprs);
3394 }
3395
3396 /* Top level routine to do the dataflow analysis needed by PRE.  */
3397
3398 static void
3399 compute_pre_data (void)
3400 {
3401   basic_block bb;
3402
3403   compute_local_properties (transp, comp, antloc, &expr_hash_table);
3404   prune_expressions (true);
3405   sbitmap_vector_zero (ae_kill, last_basic_block);
3406
3407   /* Compute ae_kill for each basic block using:
3408
3409      ~(TRANSP | COMP)
3410   */
3411
3412   FOR_EACH_BB (bb)
3413     {
3414       sbitmap_a_or_b (ae_kill[bb->index], transp[bb->index], comp[bb->index]);
3415       sbitmap_not (ae_kill[bb->index], ae_kill[bb->index]);
3416     }
3417
3418   edge_list = pre_edge_lcm (expr_hash_table.n_elems, transp, comp, antloc,
3419                             ae_kill, &pre_insert_map, &pre_delete_map);
3420   sbitmap_vector_free (antloc);
3421   antloc = NULL;
3422   sbitmap_vector_free (ae_kill);
3423   ae_kill = NULL;
3424 }
3425 \f
3426 /* PRE utilities */
3427
3428 /* Return nonzero if an occurrence of expression EXPR in OCCR_BB would reach
3429    block BB.
3430
3431    VISITED is a pointer to a working buffer for tracking which BB's have
3432    been visited.  It is NULL for the top-level call.
3433
3434    We treat reaching expressions that go through blocks containing the same
3435    reaching expression as "not reaching".  E.g. if EXPR is generated in blocks
3436    2 and 3, INSN is in block 4, and 2->3->4, we treat the expression in block
3437    2 as not reaching.  The intent is to improve the probability of finding
3438    only one reaching expression and to reduce register lifetimes by picking
3439    the closest such expression.  */
3440
3441 static int
3442 pre_expr_reaches_here_p_work (basic_block occr_bb, struct expr *expr, basic_block bb, char *visited)
3443 {
3444   edge pred;
3445   edge_iterator ei;
3446
3447   FOR_EACH_EDGE (pred, ei, bb->preds)
3448     {
3449       basic_block pred_bb = pred->src;
3450
3451       if (pred->src == ENTRY_BLOCK_PTR
3452           /* Has predecessor has already been visited?  */
3453           || visited[pred_bb->index])
3454         ;/* Nothing to do.  */
3455
3456       /* Does this predecessor generate this expression?  */
3457       else if (TEST_BIT (comp[pred_bb->index], expr->bitmap_index))
3458         {
3459           /* Is this the occurrence we're looking for?
3460              Note that there's only one generating occurrence per block
3461              so we just need to check the block number.  */
3462           if (occr_bb == pred_bb)
3463             return 1;
3464
3465           visited[pred_bb->index] = 1;
3466         }
3467       /* Ignore this predecessor if it kills the expression.  */
3468       else if (! TEST_BIT (transp[pred_bb->index], expr->bitmap_index))
3469         visited[pred_bb->index] = 1;
3470
3471       /* Neither gen nor kill.  */
3472       else
3473         {
3474           visited[pred_bb->index] = 1;
3475           if (pre_expr_reaches_here_p_work (occr_bb, expr, pred_bb, visited))
3476             return 1;
3477         }
3478     }
3479
3480   /* All paths have been checked.  */
3481   return 0;
3482 }
3483
3484 /* The wrapper for pre_expr_reaches_here_work that ensures that any
3485    memory allocated for that function is returned.  */
3486
3487 static int
3488 pre_expr_reaches_here_p (basic_block occr_bb, struct expr *expr, basic_block bb)
3489 {
3490   int rval;
3491   char *visited = XCNEWVEC (char, last_basic_block);
3492
3493   rval = pre_expr_reaches_here_p_work (occr_bb, expr, bb, visited);
3494
3495   free (visited);
3496   return rval;
3497 }
3498 \f
3499
3500 /* Given an expr, generate RTL which we can insert at the end of a BB,
3501    or on an edge.  Set the block number of any insns generated to
3502    the value of BB.  */
3503
3504 static rtx
3505 process_insert_insn (struct expr *expr)
3506 {
3507   rtx reg = expr->reaching_reg;
3508   rtx exp = copy_rtx (expr->expr);
3509   rtx pat;
3510
3511   start_sequence ();
3512
3513   /* If the expression is something that's an operand, like a constant,
3514      just copy it to a register.  */
3515   if (general_operand (exp, GET_MODE (reg)))
3516     emit_move_insn (reg, exp);
3517
3518   /* Otherwise, make a new insn to compute this expression and make sure the
3519      insn will be recognized (this also adds any needed CLOBBERs).  Copy the
3520      expression to make sure we don't have any sharing issues.  */
3521   else
3522     {
3523       rtx insn = emit_insn (gen_rtx_SET (VOIDmode, reg, exp));
3524
3525       if (insn_invalid_p (insn))
3526         gcc_unreachable ();
3527     }
3528
3529
3530   pat = get_insns ();
3531   end_sequence ();
3532
3533   return pat;
3534 }
3535
3536 /* Add EXPR to the end of basic block BB.
3537
3538    This is used by both the PRE and code hoisting.  */
3539
3540 static void
3541 insert_insn_end_basic_block (struct expr *expr, basic_block bb)
3542 {
3543   rtx insn = BB_END (bb);
3544   rtx new_insn;
3545   rtx reg = expr->reaching_reg;
3546   int regno = REGNO (reg);
3547   rtx pat, pat_end;
3548
3549   pat = process_insert_insn (expr);
3550   gcc_assert (pat && INSN_P (pat));
3551
3552   pat_end = pat;
3553   while (NEXT_INSN (pat_end) != NULL_RTX)
3554     pat_end = NEXT_INSN (pat_end);
3555
3556   /* If the last insn is a jump, insert EXPR in front [taking care to
3557      handle cc0, etc. properly].  Similarly we need to care trapping
3558      instructions in presence of non-call exceptions.  */
3559
3560   if (JUMP_P (insn)
3561       || (NONJUMP_INSN_P (insn)
3562           && (!single_succ_p (bb)
3563               || single_succ_edge (bb)->flags & EDGE_ABNORMAL)))
3564     {
3565 #ifdef HAVE_cc0
3566       rtx note;
3567 #endif
3568
3569       /* If this is a jump table, then we can't insert stuff here.  Since
3570          we know the previous real insn must be the tablejump, we insert
3571          the new instruction just before the tablejump.  */
3572       if (GET_CODE (PATTERN (insn)) == ADDR_VEC
3573           || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC)
3574         insn = prev_real_insn (insn);
3575
3576 #ifdef HAVE_cc0
3577       /* FIXME: 'twould be nice to call prev_cc0_setter here but it aborts
3578          if cc0 isn't set.  */
3579       note = find_reg_note (insn, REG_CC_SETTER, NULL_RTX);
3580       if (note)
3581         insn = XEXP (note, 0);
3582       else
3583         {
3584           rtx maybe_cc0_setter = prev_nonnote_insn (insn);
3585           if (maybe_cc0_setter
3586               && INSN_P (maybe_cc0_setter)
3587               && sets_cc0_p (PATTERN (maybe_cc0_setter)))
3588             insn = maybe_cc0_setter;
3589         }
3590 #endif
3591       /* FIXME: What if something in cc0/jump uses value set in new insn?  */
3592       new_insn = emit_insn_before_noloc (pat, insn, bb);
3593     }
3594
3595   /* Likewise if the last insn is a call, as will happen in the presence
3596      of exception handling.  */
3597   else if (CALL_P (insn)
3598            && (!single_succ_p (bb)
3599                || single_succ_edge (bb)->flags & EDGE_ABNORMAL))
3600     {
3601       /* Keeping in mind targets with small register classes and parameters
3602          in registers, we search backward and place the instructions before
3603          the first parameter is loaded.  Do this for everyone for consistency
3604          and a presumption that we'll get better code elsewhere as well.  */
3605
3606       /* Since different machines initialize their parameter registers
3607          in different orders, assume nothing.  Collect the set of all
3608          parameter registers.  */
3609       insn = find_first_parameter_load (insn, BB_HEAD (bb));
3610
3611       /* If we found all the parameter loads, then we want to insert
3612          before the first parameter load.
3613
3614          If we did not find all the parameter loads, then we might have
3615          stopped on the head of the block, which could be a CODE_LABEL.
3616          If we inserted before the CODE_LABEL, then we would be putting
3617          the insn in the wrong basic block.  In that case, put the insn
3618          after the CODE_LABEL.  Also, respect NOTE_INSN_BASIC_BLOCK.  */
3619       while (LABEL_P (insn)
3620              || NOTE_INSN_BASIC_BLOCK_P (insn))
3621         insn = NEXT_INSN (insn);
3622
3623       new_insn = emit_insn_before_noloc (pat, insn, bb);
3624     }
3625   else
3626     new_insn = emit_insn_after_noloc (pat, insn, bb);
3627
3628   while (1)
3629     {
3630       if (INSN_P (pat))
3631         add_label_notes (PATTERN (pat), new_insn);
3632       if (pat == pat_end)
3633         break;
3634       pat = NEXT_INSN (pat);
3635     }
3636
3637   gcse_create_count++;
3638
3639   if (dump_file)
3640     {
3641       fprintf (dump_file, "PRE/HOIST: end of bb %d, insn %d, ",
3642                bb->index, INSN_UID (new_insn));
3643       fprintf (dump_file, "copying expression %d to reg %d\n",
3644                expr->bitmap_index, regno);
3645     }
3646 }
3647
3648 /* Insert partially redundant expressions on edges in the CFG to make
3649    the expressions fully redundant.  */
3650
3651 static int
3652 pre_edge_insert (struct edge_list *edge_list, struct expr **index_map)
3653 {
3654   int e, i, j, num_edges, set_size, did_insert = 0;
3655   sbitmap *inserted;
3656
3657   /* Where PRE_INSERT_MAP is nonzero, we add the expression on that edge
3658      if it reaches any of the deleted expressions.  */
3659
3660   set_size = pre_insert_map[0]->size;
3661   num_edges = NUM_EDGES (edge_list);
3662   inserted = sbitmap_vector_alloc (num_edges, expr_hash_table.n_elems);
3663   sbitmap_vector_zero (inserted, num_edges);
3664
3665   for (e = 0; e < num_edges; e++)
3666     {
3667       int indx;
3668       basic_block bb = INDEX_EDGE_PRED_BB (edge_list, e);
3669
3670       for (i = indx = 0; i < set_size; i++, indx += SBITMAP_ELT_BITS)
3671         {
3672           SBITMAP_ELT_TYPE insert = pre_insert_map[e]->elms[i];
3673
3674           for (j = indx; insert && j < (int) expr_hash_table.n_elems; j++, insert >>= 1)
3675             if ((insert & 1) != 0 && index_map[j]->reaching_reg != NULL_RTX)
3676               {
3677                 struct expr *expr = index_map[j];
3678                 struct occr *occr;
3679
3680                 /* Now look at each deleted occurrence of this expression.  */
3681                 for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
3682                   {
3683                     if (! occr->deleted_p)
3684                       continue;
3685
3686                     /* Insert this expression on this edge if it would
3687                        reach the deleted occurrence in BB.  */
3688                     if (!TEST_BIT (inserted[e], j))
3689                       {
3690                         rtx insn;
3691                         edge eg = INDEX_EDGE (edge_list, e);
3692
3693                         /* We can't insert anything on an abnormal and
3694                            critical edge, so we insert the insn at the end of
3695                            the previous block. There are several alternatives
3696                            detailed in Morgans book P277 (sec 10.5) for
3697                            handling this situation.  This one is easiest for
3698                            now.  */
3699
3700                         if (eg->flags & EDGE_ABNORMAL)
3701                           insert_insn_end_basic_block (index_map[j], bb);
3702                         else
3703                           {
3704                             insn = process_insert_insn (index_map[j]);
3705                             insert_insn_on_edge (insn, eg);
3706                           }
3707
3708                         if (dump_file)
3709                           {
3710                             fprintf (dump_file, "PRE: edge (%d,%d), ",
3711                                      bb->index,
3712                                      INDEX_EDGE_SUCC_BB (edge_list, e)->index);
3713                             fprintf (dump_file, "copy expression %d\n",
3714                                      expr->bitmap_index);
3715                           }
3716
3717                         update_ld_motion_stores (expr);
3718                         SET_BIT (inserted[e], j);
3719                         did_insert = 1;
3720                         gcse_create_count++;
3721                       }
3722                   }
3723               }
3724         }
3725     }
3726
3727   sbitmap_vector_free (inserted);
3728   return did_insert;
3729 }
3730
3731 /* Copy the result of EXPR->EXPR generated by INSN to EXPR->REACHING_REG.
3732    Given "old_reg <- expr" (INSN), instead of adding after it
3733      reaching_reg <- old_reg
3734    it's better to do the following:
3735      reaching_reg <- expr
3736      old_reg      <- reaching_reg
3737    because this way copy propagation can discover additional PRE
3738    opportunities.  But if this fails, we try the old way.
3739    When "expr" is a store, i.e.
3740    given "MEM <- old_reg", instead of adding after it
3741      reaching_reg <- old_reg
3742    it's better to add it before as follows:
3743      reaching_reg <- old_reg
3744      MEM          <- reaching_reg.  */
3745
3746 static void
3747 pre_insert_copy_insn (struct expr *expr, rtx insn)
3748 {
3749   rtx reg = expr->reaching_reg;
3750   int regno = REGNO (reg);
3751   int indx = expr->bitmap_index;
3752   rtx pat = PATTERN (insn);
3753   rtx set, first_set, new_insn;
3754   rtx old_reg;
3755   int i;
3756
3757   /* This block matches the logic in hash_scan_insn.  */
3758   switch (GET_CODE (pat))
3759     {
3760     case SET:
3761       set = pat;
3762       break;
3763
3764     case PARALLEL:
3765       /* Search through the parallel looking for the set whose
3766          source was the expression that we're interested in.  */
3767       first_set = NULL_RTX;
3768       set = NULL_RTX;
3769       for (i = 0; i < XVECLEN (pat, 0); i++)
3770         {
3771           rtx x = XVECEXP (pat, 0, i);
3772           if (GET_CODE (x) == SET)
3773             {
3774               /* If the source was a REG_EQUAL or REG_EQUIV note, we
3775                  may not find an equivalent expression, but in this
3776                  case the PARALLEL will have a single set.  */
3777               if (first_set == NULL_RTX)
3778                 first_set = x;
3779               if (expr_equiv_p (SET_SRC (x), expr->expr))
3780                 {
3781                   set = x;
3782                   break;
3783                 }
3784             }
3785         }
3786
3787       gcc_assert (first_set);
3788       if (set == NULL_RTX)
3789         set = first_set;
3790       break;
3791
3792     default:
3793       gcc_unreachable ();
3794     }
3795
3796   if (REG_P (SET_DEST (set)))
3797     {
3798       old_reg = SET_DEST (set);
3799       /* Check if we can modify the set destination in the original insn.  */
3800       if (validate_change (insn, &SET_DEST (set), reg, 0))
3801         {
3802           new_insn = gen_move_insn (old_reg, reg);
3803           new_insn = emit_insn_after (new_insn, insn);
3804         }
3805       else
3806         {
3807           new_insn = gen_move_insn (reg, old_reg);
3808           new_insn = emit_insn_after (new_insn, insn);
3809         }
3810     }
3811   else /* This is possible only in case of a store to memory.  */
3812     {
3813       old_reg = SET_SRC (set);
3814       new_insn = gen_move_insn (reg, old_reg);
3815
3816       /* Check if we can modify the set source in the original insn.  */
3817       if (validate_change (insn, &SET_SRC (set), reg, 0))
3818         new_insn = emit_insn_before (new_insn, insn);
3819       else
3820         new_insn = emit_insn_after (new_insn, insn);
3821     }
3822
3823   gcse_create_count++;
3824
3825   if (dump_file)
3826     fprintf (dump_file,
3827              "PRE: bb %d, insn %d, copy expression %d in insn %d to reg %d\n",
3828               BLOCK_FOR_INSN (insn)->index, INSN_UID (new_insn), indx,
3829               INSN_UID (insn), regno);
3830 }
3831
3832 /* Copy available expressions that reach the redundant expression
3833    to `reaching_reg'.  */
3834
3835 static void
3836 pre_insert_copies (void)
3837 {
3838   unsigned int i, added_copy;
3839   struct expr *expr;
3840   struct occr *occr;
3841   struct occr *avail;
3842
3843   /* For each available expression in the table, copy the result to
3844      `reaching_reg' if the expression reaches a deleted one.
3845
3846      ??? The current algorithm is rather brute force.
3847      Need to do some profiling.  */
3848
3849   for (i = 0; i < expr_hash_table.size; i++)
3850     for (expr = expr_hash_table.table[i]; expr != NULL; expr = expr->next_same_hash)
3851       {
3852         /* If the basic block isn't reachable, PPOUT will be TRUE.  However,
3853            we don't want to insert a copy here because the expression may not
3854            really be redundant.  So only insert an insn if the expression was
3855            deleted.  This test also avoids further processing if the
3856            expression wasn't deleted anywhere.  */
3857         if (expr->reaching_reg == NULL)
3858           continue;
3859
3860         /* Set when we add a copy for that expression.  */
3861         added_copy = 0;
3862
3863         for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
3864           {
3865             if (! occr->deleted_p)
3866               continue;
3867
3868             for (avail = expr->avail_occr; avail != NULL; avail = avail->next)
3869               {
3870                 rtx insn = avail->insn;
3871
3872                 /* No need to handle this one if handled already.  */
3873                 if (avail->copied_p)
3874                   continue;
3875
3876                 /* Don't handle this one if it's a redundant one.  */
3877                 if (INSN_DELETED_P (insn))
3878                   continue;
3879
3880                 /* Or if the expression doesn't reach the deleted one.  */
3881                 if (! pre_expr_reaches_here_p (BLOCK_FOR_INSN (avail->insn),
3882                                                expr,
3883                                                BLOCK_FOR_INSN (occr->insn)))
3884                   continue;
3885
3886                 added_copy = 1;
3887
3888                 /* Copy the result of avail to reaching_reg.  */
3889                 pre_insert_copy_insn (expr, insn);
3890                 avail->copied_p = 1;
3891               }
3892           }
3893
3894           if (added_copy)
3895             update_ld_motion_stores (expr);
3896       }
3897 }
3898
3899 /* Emit move from SRC to DEST noting the equivalence with expression computed
3900    in INSN.  */
3901 static rtx
3902 gcse_emit_move_after (rtx src, rtx dest, rtx insn)
3903 {
3904   rtx new_rtx;
3905   rtx set = single_set (insn), set2;
3906   rtx note;
3907   rtx eqv;
3908
3909   /* This should never fail since we're creating a reg->reg copy
3910      we've verified to be valid.  */
3911
3912   new_rtx = emit_insn_after (gen_move_insn (dest, src), insn);
3913
3914   /* Note the equivalence for local CSE pass.  */
3915   set2 = single_set (new_rtx);
3916   if (!set2 || !rtx_equal_p (SET_DEST (set2), dest))
3917     return new_rtx;
3918   if ((note = find_reg_equal_equiv_note (insn)))
3919     eqv = XEXP (note, 0);
3920   else
3921     eqv = SET_SRC (set);
3922
3923   set_unique_reg_note (new_rtx, REG_EQUAL, copy_insn_1 (eqv));
3924
3925   return new_rtx;
3926 }
3927
3928 /* Delete redundant computations.
3929    Deletion is done by changing the insn to copy the `reaching_reg' of
3930    the expression into the result of the SET.  It is left to later passes
3931    (cprop, cse2, flow, combine, regmove) to propagate the copy or eliminate it.
3932
3933    Returns nonzero if a change is made.  */
3934
3935 static int
3936 pre_delete (void)
3937 {
3938   unsigned int i;
3939   int changed;
3940   struct expr *expr;
3941   struct occr *occr;
3942
3943   changed = 0;
3944   for (i = 0; i < expr_hash_table.size; i++)
3945     for (expr = expr_hash_table.table[i];
3946          expr != NULL;
3947          expr = expr->next_same_hash)
3948       {
3949         int indx = expr->bitmap_index;
3950
3951         /* We only need to search antic_occr since we require
3952            ANTLOC != 0.  */
3953
3954         for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
3955           {
3956             rtx insn = occr->insn;
3957             rtx set;
3958             basic_block bb = BLOCK_FOR_INSN (insn);
3959
3960             /* We only delete insns that have a single_set.  */
3961             if (TEST_BIT (pre_delete_map[bb->index], indx)
3962                 && (set = single_set (insn)) != 0
3963                 && dbg_cnt (pre_insn))
3964               {
3965                 /* Create a pseudo-reg to store the result of reaching
3966                    expressions into.  Get the mode for the new pseudo from
3967                    the mode of the original destination pseudo.  */
3968                 if (expr->reaching_reg == NULL)
3969                   expr->reaching_reg = gen_reg_rtx_and_attrs (SET_DEST (set));
3970
3971                 gcse_emit_move_after (expr->reaching_reg, SET_DEST (set), insn);
3972                 delete_insn (insn);
3973                 occr->deleted_p = 1;
3974                 changed = 1;
3975                 gcse_subst_count++;
3976
3977                 if (dump_file)
3978                   {
3979                     fprintf (dump_file,
3980                              "PRE: redundant insn %d (expression %d) in ",
3981                                INSN_UID (insn), indx);
3982                     fprintf (dump_file, "bb %d, reaching reg is %d\n",
3983                              bb->index, REGNO (expr->reaching_reg));
3984                   }
3985               }
3986           }
3987       }
3988
3989   return changed;
3990 }
3991
3992 /* Perform GCSE optimizations using PRE.
3993    This is called by one_pre_gcse_pass after all the dataflow analysis
3994    has been done.
3995
3996    This is based on the original Morel-Renvoise paper Fred Chow's thesis, and
3997    lazy code motion from Knoop, Ruthing and Steffen as described in Advanced
3998    Compiler Design and Implementation.
3999
4000    ??? A new pseudo reg is created to hold the reaching expression.  The nice
4001    thing about the classical approach is that it would try to use an existing
4002    reg.  If the register can't be adequately optimized [i.e. we introduce
4003    reload problems], one could add a pass here to propagate the new register
4004    through the block.
4005
4006    ??? We don't handle single sets in PARALLELs because we're [currently] not
4007    able to copy the rest of the parallel when we insert copies to create full
4008    redundancies from partial redundancies.  However, there's no reason why we
4009    can't handle PARALLELs in the cases where there are no partial
4010    redundancies.  */
4011
4012 static int
4013 pre_gcse (void)
4014 {
4015   unsigned int i;
4016   int did_insert, changed;
4017   struct expr **index_map;
4018   struct expr *expr;
4019
4020   /* Compute a mapping from expression number (`bitmap_index') to
4021      hash table entry.  */
4022
4023   index_map = XCNEWVEC (struct expr *, expr_hash_table.n_elems);
4024   for (i = 0; i < expr_hash_table.size; i++)
4025     for (expr = expr_hash_table.table[i]; expr != NULL; expr = expr->next_same_hash)
4026       index_map[expr->bitmap_index] = expr;
4027
4028   /* Delete the redundant insns first so that
4029      - we know what register to use for the new insns and for the other
4030        ones with reaching expressions
4031      - we know which insns are redundant when we go to create copies  */
4032
4033   changed = pre_delete ();
4034   did_insert = pre_edge_insert (edge_list, index_map);
4035
4036   /* In other places with reaching expressions, copy the expression to the
4037      specially allocated pseudo-reg that reaches the redundant expr.  */
4038   pre_insert_copies ();
4039   if (did_insert)
4040     {
4041       commit_edge_insertions ();
4042       changed = 1;
4043     }
4044
4045   free (index_map);
4046   return changed;
4047 }
4048
4049 /* Top level routine to perform one PRE GCSE pass.
4050
4051    Return nonzero if a change was made.  */
4052
4053 static int
4054 one_pre_gcse_pass (void)
4055 {
4056   int changed = 0;
4057
4058   gcse_subst_count = 0;
4059   gcse_create_count = 0;
4060
4061   /* Return if there's nothing to do, or it is too expensive.  */
4062   if (n_basic_blocks <= NUM_FIXED_BLOCKS + 1
4063       || is_too_expensive (_("PRE disabled")))
4064     return 0;
4065
4066   /* We need alias.  */
4067   init_alias_analysis ();
4068
4069   bytes_used = 0;
4070   gcc_obstack_init (&gcse_obstack);
4071   alloc_gcse_mem ();
4072
4073   alloc_hash_table (&expr_hash_table, 0);
4074   add_noreturn_fake_exit_edges ();
4075   if (flag_gcse_lm)
4076     compute_ld_motion_mems ();
4077
4078   compute_hash_table (&expr_hash_table);
4079   trim_ld_motion_mems ();
4080   if (dump_file)
4081     dump_hash_table (dump_file, "Expression", &expr_hash_table);
4082
4083   if (expr_hash_table.n_elems > 0)
4084     {
4085       alloc_pre_mem (last_basic_block, expr_hash_table.n_elems);
4086       compute_pre_data ();
4087       changed |= pre_gcse ();
4088       free_edge_list (edge_list);
4089       free_pre_mem ();
4090     }
4091
4092   free_ldst_mems ();
4093   remove_fake_exit_edges ();
4094   free_hash_table (&expr_hash_table);
4095
4096   free_gcse_mem ();
4097   obstack_free (&gcse_obstack, NULL);
4098
4099   /* We are finished with alias.  */
4100   end_alias_analysis ();
4101
4102   if (dump_file)
4103     {
4104       fprintf (dump_file, "PRE GCSE of %s, %d basic blocks, %d bytes needed, ",
4105                current_function_name (), n_basic_blocks, bytes_used);
4106       fprintf (dump_file, "%d substs, %d insns created\n",
4107                gcse_subst_count, gcse_create_count);
4108     }
4109
4110   return changed;
4111 }
4112 \f
4113 /* If X contains any LABEL_REF's, add REG_LABEL_OPERAND notes for them
4114    to INSN.  If such notes are added to an insn which references a
4115    CODE_LABEL, the LABEL_NUSES count is incremented.  We have to add
4116    that note, because the following loop optimization pass requires
4117    them.  */
4118
4119 /* ??? If there was a jump optimization pass after gcse and before loop,
4120    then we would not need to do this here, because jump would add the
4121    necessary REG_LABEL_OPERAND and REG_LABEL_TARGET notes.  */
4122
4123 static void
4124 add_label_notes (rtx x, rtx insn)
4125 {
4126   enum rtx_code code = GET_CODE (x);
4127   int i, j;
4128   const char *fmt;
4129
4130   if (code == LABEL_REF && !LABEL_REF_NONLOCAL_P (x))
4131     {
4132       /* This code used to ignore labels that referred to dispatch tables to
4133          avoid flow generating (slightly) worse code.
4134
4135          We no longer ignore such label references (see LABEL_REF handling in
4136          mark_jump_label for additional information).  */
4137
4138       /* There's no reason for current users to emit jump-insns with
4139          such a LABEL_REF, so we don't have to handle REG_LABEL_TARGET
4140          notes.  */
4141       gcc_assert (!JUMP_P (insn));
4142       add_reg_note (insn, REG_LABEL_OPERAND, XEXP (x, 0));
4143
4144       if (LABEL_P (XEXP (x, 0)))
4145         LABEL_NUSES (XEXP (x, 0))++;
4146
4147       return;
4148     }
4149
4150   for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
4151     {
4152       if (fmt[i] == 'e')
4153         add_label_notes (XEXP (x, i), insn);
4154       else if (fmt[i] == 'E')
4155         for (j = XVECLEN (x, i) - 1; j >= 0; j--)
4156           add_label_notes (XVECEXP (x, i, j), insn);
4157     }
4158 }
4159
4160 /* Code Hoisting variables and subroutines.  */
4161
4162 /* Very busy expressions.  */
4163 static sbitmap *hoist_vbein;
4164 static sbitmap *hoist_vbeout;
4165
4166 /* Hoistable expressions.  */
4167 static sbitmap *hoist_exprs;
4168
4169 /* ??? We could compute post dominators and run this algorithm in
4170    reverse to perform tail merging, doing so would probably be
4171    more effective than the tail merging code in jump.c.
4172
4173    It's unclear if tail merging could be run in parallel with
4174    code hoisting.  It would be nice.  */
4175
4176 /* Allocate vars used for code hoisting analysis.  */
4177
4178 static void
4179 alloc_code_hoist_mem (int n_blocks, int n_exprs)
4180 {
4181   antloc = sbitmap_vector_alloc (n_blocks, n_exprs);
4182   transp = sbitmap_vector_alloc (n_blocks, n_exprs);
4183   comp = sbitmap_vector_alloc (n_blocks, n_exprs);
4184
4185   hoist_vbein = sbitmap_vector_alloc (n_blocks, n_exprs);
4186   hoist_vbeout = sbitmap_vector_alloc (n_blocks, n_exprs);
4187   hoist_exprs = sbitmap_vector_alloc (n_blocks, n_exprs);
4188 }
4189
4190 /* Free vars used for code hoisting analysis.  */
4191
4192 static void
4193 free_code_hoist_mem (void)
4194 {
4195   sbitmap_vector_free (antloc);
4196   sbitmap_vector_free (transp);
4197   sbitmap_vector_free (comp);
4198
4199   sbitmap_vector_free (hoist_vbein);
4200   sbitmap_vector_free (hoist_vbeout);
4201   sbitmap_vector_free (hoist_exprs);
4202
4203   free_dominance_info (CDI_DOMINATORS);
4204 }
4205
4206 /* Compute the very busy expressions at entry/exit from each block.
4207
4208    An expression is very busy if all paths from a given point
4209    compute the expression.  */
4210
4211 static void
4212 compute_code_hoist_vbeinout (void)
4213 {
4214   int changed, passes;
4215   basic_block bb;
4216
4217   sbitmap_vector_zero (hoist_vbeout, last_basic_block);
4218   sbitmap_vector_zero (hoist_vbein, last_basic_block);
4219
4220   passes = 0;
4221   changed = 1;
4222
4223   while (changed)
4224     {
4225       changed = 0;
4226
4227       /* We scan the blocks in the reverse order to speed up
4228          the convergence.  */
4229       FOR_EACH_BB_REVERSE (bb)
4230         {
4231           if (bb->next_bb != EXIT_BLOCK_PTR)
4232             {
4233               sbitmap_intersection_of_succs (hoist_vbeout[bb->index],
4234                                              hoist_vbein, bb->index);
4235
4236               /* Include expressions in VBEout that are calculated
4237                  in BB and available at its end.  */
4238               sbitmap_a_or_b (hoist_vbeout[bb->index],
4239                               hoist_vbeout[bb->index], comp[bb->index]);
4240             }
4241
4242           changed |= sbitmap_a_or_b_and_c_cg (hoist_vbein[bb->index],
4243                                               antloc[bb->index],
4244                                               hoist_vbeout[bb->index],
4245                                               transp[bb->index]);
4246         }
4247
4248       passes++;
4249     }
4250
4251   if (dump_file)
4252     fprintf (dump_file, "hoisting vbeinout computation: %d passes\n", passes);
4253 }
4254
4255 /* Top level routine to do the dataflow analysis needed by code hoisting.  */
4256
4257 static void
4258 compute_code_hoist_data (void)
4259 {
4260   compute_local_properties (transp, comp, antloc, &expr_hash_table);
4261   prune_expressions (false);
4262   compute_code_hoist_vbeinout ();
4263   calculate_dominance_info (CDI_DOMINATORS);
4264   if (dump_file)
4265     fprintf (dump_file, "\n");
4266 }
4267
4268 /* Determine if the expression identified by EXPR_INDEX would
4269    reach BB unimpared if it was placed at the end of EXPR_BB.
4270    Stop the search if the expression would need to be moved more
4271    than DISTANCE instructions.
4272
4273    It's unclear exactly what Muchnick meant by "unimpared".  It seems
4274    to me that the expression must either be computed or transparent in
4275    *every* block in the path(s) from EXPR_BB to BB.  Any other definition
4276    would allow the expression to be hoisted out of loops, even if
4277    the expression wasn't a loop invariant.
4278
4279    Contrast this to reachability for PRE where an expression is
4280    considered reachable if *any* path reaches instead of *all*
4281    paths.  */
4282
4283 static int
4284 hoist_expr_reaches_here_p (basic_block expr_bb, int expr_index, basic_block bb,
4285                            char *visited, int distance, int *bb_size)
4286 {
4287   edge pred;
4288   edge_iterator ei;
4289   int visited_allocated_locally = 0;
4290
4291   /* Terminate the search if distance, for which EXPR is allowed to move,
4292      is exhausted.  */
4293   if (distance > 0)
4294     {
4295       distance -= bb_size[bb->index];
4296
4297       if (distance <= 0)
4298         return 0;
4299     }
4300   else
4301     gcc_assert (distance == 0);
4302
4303   if (visited == NULL)
4304     {
4305       visited_allocated_locally = 1;
4306       visited = XCNEWVEC (char, last_basic_block);
4307     }
4308
4309   FOR_EACH_EDGE (pred, ei, bb->preds)
4310     {
4311       basic_block pred_bb = pred->src;
4312
4313       if (pred->src == ENTRY_BLOCK_PTR)
4314         break;
4315       else if (pred_bb == expr_bb)
4316         continue;
4317       else if (visited[pred_bb->index])
4318         continue;
4319
4320       else if (! TEST_BIT (transp[pred_bb->index], expr_index))
4321         break;
4322
4323       /* Not killed.  */
4324       else
4325         {
4326           visited[pred_bb->index] = 1;
4327           if (! hoist_expr_reaches_here_p (expr_bb, expr_index, pred_bb,
4328                                            visited, distance, bb_size))
4329             break;
4330         }
4331     }
4332   if (visited_allocated_locally)
4333     free (visited);
4334
4335   return (pred == NULL);
4336 }
4337 \f
4338 /* Find occurence in BB.  */
4339 static struct occr *
4340 find_occr_in_bb (struct occr *occr, basic_block bb)
4341 {
4342   /* Find the right occurrence of this expression.  */
4343   while (occr && BLOCK_FOR_INSN (occr->insn) != bb)
4344     occr = occr->next;
4345
4346   return occr;
4347 }
4348
4349 /* Actually perform code hoisting.  */
4350
4351 static int
4352 hoist_code (void)
4353 {
4354   basic_block bb, dominated;
4355   VEC (basic_block, heap) *domby;
4356   unsigned int i,j;
4357   struct expr **index_map;
4358   struct expr *expr;
4359   int *to_bb_head;
4360   int *bb_size;
4361   int changed = 0;
4362
4363   sbitmap_vector_zero (hoist_exprs, last_basic_block);
4364
4365   /* Compute a mapping from expression number (`bitmap_index') to
4366      hash table entry.  */
4367
4368   index_map = XCNEWVEC (struct expr *, expr_hash_table.n_elems);
4369   for (i = 0; i < expr_hash_table.size; i++)
4370     for (expr = expr_hash_table.table[i]; expr != NULL; expr = expr->next_same_hash)
4371       index_map[expr->bitmap_index] = expr;
4372
4373   /* Calculate sizes of basic blocks and note how far
4374      each instruction is from the start of its block.  We then use this
4375      data to restrict distance an expression can travel.  */
4376
4377   to_bb_head = XCNEWVEC (int, get_max_uid ());
4378   bb_size = XCNEWVEC (int, last_basic_block);
4379
4380   FOR_EACH_BB (bb)
4381     {
4382       rtx insn;
4383       rtx bb_end;
4384       int to_head;
4385
4386       insn = BB_HEAD (bb);
4387       bb_end = BB_END (bb);
4388       to_head = 0;
4389
4390       while (insn != bb_end)
4391         {
4392           /* Don't count debug instructions to avoid them affecting
4393              decision choices.  */
4394           if (NONDEBUG_INSN_P (insn))
4395             to_bb_head[INSN_UID (insn)] = to_head++;
4396
4397           insn = NEXT_INSN (insn);
4398         }
4399
4400       bb_size[bb->index] = to_head;
4401     }
4402
4403   /* Walk over each basic block looking for potentially hoistable
4404      expressions, nothing gets hoisted from the entry block.  */
4405   FOR_EACH_BB (bb)
4406     {
4407       int found = 0;
4408       int insn_inserted_p;
4409
4410       domby = get_dominated_by (CDI_DOMINATORS, bb);
4411       /* Examine each expression that is very busy at the exit of this
4412          block.  These are the potentially hoistable expressions.  */
4413       for (i = 0; i < hoist_vbeout[bb->index]->n_bits; i++)
4414         {
4415           int hoistable = 0;
4416
4417           if (TEST_BIT (hoist_vbeout[bb->index], i))
4418             {
4419               /* If an expression is computed in BB and is available at end of
4420                  BB, hoist all occurences dominated by BB to BB.  */
4421               if (TEST_BIT (comp[bb->index], i))
4422                 hoistable++;
4423
4424               /* We've found a potentially hoistable expression, now
4425                  we look at every block BB dominates to see if it
4426                  computes the expression.  */
4427               for (j = 0; VEC_iterate (basic_block, domby, j, dominated); j++)
4428                 {
4429                   struct expr *expr = index_map[i];
4430                   struct occr *occr = NULL;
4431                   int max_distance;
4432
4433                   /* Ignore self dominance.  */
4434                   if (bb == dominated)
4435                     continue;
4436                   /* We've found a dominated block, now see if it computes
4437                      the busy expression and whether or not moving that
4438                      expression to the "beginning" of that block is safe.  */
4439                   if (!TEST_BIT (antloc[dominated->index], i))
4440                     continue;
4441
4442                   max_distance = expr->max_distance;
4443                   if (max_distance > 0)
4444                     {
4445                       struct occr *occr;
4446
4447                       occr = find_occr_in_bb (expr->antic_occr, dominated);
4448                       gcc_assert (occr);
4449
4450                       gcc_assert (NONDEBUG_INSN_P (occr->insn));
4451
4452                       /* Adjust MAX_DISTANCE to account for the fact that
4453                          OCCR won't have to travel all of DOMINATED, but
4454                          only part of it.  */
4455                       max_distance += (bb_size[dominated->index]
4456                                        - to_bb_head[INSN_UID (occr->insn)]);
4457                     }
4458
4459                   /* Note if the expression would reach the dominated block
4460                      unimpared if it was placed at the end of BB.
4461
4462                      Keep track of how many times this expression is hoistable
4463                      from a dominated block into BB.  */
4464                   if (hoist_expr_reaches_here_p (bb, i, dominated, NULL,
4465                                                  max_distance, bb_size))
4466                     hoistable++;
4467                 }
4468
4469               /* If we found more than one hoistable occurrence of this
4470                  expression, then note it in the bitmap of expressions to
4471                  hoist.  It makes no sense to hoist things which are computed
4472                  in only one BB, and doing so tends to pessimize register
4473                  allocation.  One could increase this value to try harder
4474                  to avoid any possible code expansion due to register
4475                  allocation issues; however experiments have shown that
4476                  the vast majority of hoistable expressions are only movable
4477                  from two successors, so raising this threshold is likely
4478                  to nullify any benefit we get from code hoisting.  */
4479               if (hoistable > 1 && dbg_cnt (hoist_insn))
4480                 {
4481                   SET_BIT (hoist_exprs[bb->index], i);
4482                   found = 1;
4483                 }
4484             }
4485         }
4486       /* If we found nothing to hoist, then quit now.  */
4487       if (! found)
4488         {
4489           VEC_free (basic_block, heap, domby);
4490           continue;
4491         }
4492
4493       /* Loop over all the hoistable expressions.  */
4494       for (i = 0; i < hoist_exprs[bb->index]->n_bits; i++)
4495         {
4496           /* We want to insert the expression into BB only once, so
4497              note when we've inserted it.  */
4498           insn_inserted_p = 0;
4499
4500           /* These tests should be the same as the tests above.  */
4501           if (TEST_BIT (hoist_exprs[bb->index], i))
4502             {
4503               /* We've found a potentially hoistable expression, now
4504                  we look at every block BB dominates to see if it
4505                  computes the expression.  */
4506               for (j = 0; VEC_iterate (basic_block, domby, j, dominated); j++)
4507                 {
4508                   struct expr *expr = index_map[i];
4509                   int max_distance;
4510
4511                   /* Ignore self dominance.  */
4512                   if (bb == dominated)
4513                     continue;
4514
4515                   /* We've found a dominated block, now see if it computes
4516                      the busy expression and whether or not moving that
4517                      expression to the "beginning" of that block is safe.  */
4518                   if (!TEST_BIT (antloc[dominated->index], i))
4519                     continue;
4520
4521                   max_distance = expr->max_distance;
4522                   if (max_distance > 0)
4523                     {
4524                       occr = find_occr_in_bb (expr->antic_occr, dominated);
4525                       gcc_assert (occr);
4526
4527                       gcc_assert (NONDEBUG_INSN_P (occr->insn));
4528
4529                       /* Adjust MAX_DISTANCE to account for the fact that
4530                          OCCR won't have to travel all of DOMINATED, but
4531                          only part of it.  */
4532                       max_distance += (bb_size[dominated->index]
4533                                        - to_bb_head[INSN_UID (occr->insn)]);
4534                     }
4535
4536                   /* The expression is computed in the dominated block and
4537                      it would be safe to compute it at the start of the
4538                      dominated block.  Now we have to determine if the
4539                      expression would reach the dominated block if it was
4540                      placed at the end of BB.
4541                      Note: the fact that hoist_exprs has i-th bit set means
4542                      that /some/, not necesserilly all, occurences from
4543                      the dominated blocks can be hoisted to BB.  Here we check
4544                      if a specific occurence can be hoisted to BB.  */
4545                   if (hoist_expr_reaches_here_p (bb, i, dominated, NULL,
4546                                                  max_distance, bb_size))
4547                     {
4548                       rtx insn;
4549                       rtx set;
4550
4551                       if (!occr)
4552                         {
4553                           occr = find_occr_in_bb (expr->antic_occr, dominated);
4554                           gcc_assert (occr);
4555                         }
4556
4557                       insn = occr->insn;
4558                       set = single_set (insn);
4559                       gcc_assert (set);
4560
4561                       /* Create a pseudo-reg to store the result of reaching
4562                          expressions into.  Get the mode for the new pseudo
4563                          from the mode of the original destination pseudo.
4564
4565                          It is important to use new pseudos whenever we
4566                          emit a set.  This will allow reload to use
4567                          rematerialization for such registers.  */
4568                       if (!insn_inserted_p)
4569                         expr->reaching_reg
4570                           = gen_reg_rtx_and_attrs (SET_DEST (set));
4571
4572                       gcse_emit_move_after (expr->reaching_reg, SET_DEST (set), insn);
4573                       delete_insn (insn);
4574                       occr->deleted_p = 1;
4575                       changed = 1;
4576                       gcse_subst_count++;
4577
4578                       if (!insn_inserted_p)
4579                         {
4580                           insert_insn_end_basic_block (index_map[i], bb);
4581                           insn_inserted_p = 1;
4582                         }
4583                     }
4584                 }
4585             }
4586         }
4587       VEC_free (basic_block, heap, domby);
4588     }
4589
4590   free (bb_size);
4591   free (to_bb_head);
4592   free (index_map);
4593
4594   return changed;
4595 }
4596
4597 /* Top level routine to perform one code hoisting (aka unification) pass
4598
4599    Return nonzero if a change was made.  */
4600
4601 static int
4602 one_code_hoisting_pass (void)
4603 {
4604   int changed = 0;
4605
4606   gcse_subst_count = 0;
4607   gcse_create_count = 0;
4608
4609   /* Return if there's nothing to do, or it is too expensive.  */
4610   if (n_basic_blocks <= NUM_FIXED_BLOCKS + 1
4611       || is_too_expensive (_("GCSE disabled")))
4612     return 0;
4613
4614   doing_code_hoisting_p = true;
4615
4616   /* We need alias.  */
4617   init_alias_analysis ();
4618
4619   bytes_used = 0;
4620   gcc_obstack_init (&gcse_obstack);
4621   alloc_gcse_mem ();
4622
4623   alloc_hash_table (&expr_hash_table, 0);
4624   compute_hash_table (&expr_hash_table);
4625   if (dump_file)
4626     dump_hash_table (dump_file, "Code Hosting Expressions", &expr_hash_table);
4627
4628   if (expr_hash_table.n_elems > 0)
4629     {
4630       alloc_code_hoist_mem (last_basic_block, expr_hash_table.n_elems);
4631       compute_code_hoist_data ();
4632       changed = hoist_code ();
4633       free_code_hoist_mem ();
4634     }
4635
4636   free_hash_table (&expr_hash_table);
4637   free_gcse_mem ();
4638   obstack_free (&gcse_obstack, NULL);
4639
4640   /* We are finished with alias.  */
4641   end_alias_analysis ();
4642
4643   if (dump_file)
4644     {
4645       fprintf (dump_file, "HOIST of %s, %d basic blocks, %d bytes needed, ",
4646                current_function_name (), n_basic_blocks, bytes_used);
4647       fprintf (dump_file, "%d substs, %d insns created\n",
4648                gcse_subst_count, gcse_create_count);
4649     }
4650
4651   doing_code_hoisting_p = false;
4652
4653   return changed;
4654 }
4655 \f
4656 /*  Here we provide the things required to do store motion towards
4657     the exit. In order for this to be effective, gcse also needed to
4658     be taught how to move a load when it is kill only by a store to itself.
4659
4660             int i;
4661             float a[10];
4662
4663             void foo(float scale)
4664             {
4665               for (i=0; i<10; i++)
4666                 a[i] *= scale;
4667             }
4668
4669     'i' is both loaded and stored to in the loop. Normally, gcse cannot move
4670     the load out since its live around the loop, and stored at the bottom
4671     of the loop.
4672
4673       The 'Load Motion' referred to and implemented in this file is
4674     an enhancement to gcse which when using edge based lcm, recognizes
4675     this situation and allows gcse to move the load out of the loop.
4676
4677       Once gcse has hoisted the load, store motion can then push this
4678     load towards the exit, and we end up with no loads or stores of 'i'
4679     in the loop.  */
4680
4681 static hashval_t
4682 pre_ldst_expr_hash (const void *p)
4683 {
4684   int do_not_record_p = 0;
4685   const struct ls_expr *const x = (const struct ls_expr *) p;
4686   return hash_rtx (x->pattern, GET_MODE (x->pattern), &do_not_record_p, NULL, false);
4687 }
4688
4689 static int
4690 pre_ldst_expr_eq (const void *p1, const void *p2)
4691 {
4692   const struct ls_expr *const ptr1 = (const struct ls_expr *) p1,
4693     *const ptr2 = (const struct ls_expr *) p2;
4694   return expr_equiv_p (ptr1->pattern, ptr2->pattern);
4695 }
4696
4697 /* This will search the ldst list for a matching expression. If it
4698    doesn't find one, we create one and initialize it.  */
4699
4700 static struct ls_expr *
4701 ldst_entry (rtx x)
4702 {
4703   int do_not_record_p = 0;
4704   struct ls_expr * ptr;
4705   unsigned int hash;
4706   void **slot;
4707   struct ls_expr e;
4708
4709   hash = hash_rtx (x, GET_MODE (x), &do_not_record_p,
4710                    NULL,  /*have_reg_qty=*/false);
4711
4712   e.pattern = x;
4713   slot = htab_find_slot_with_hash (pre_ldst_table, &e, hash, INSERT);
4714   if (*slot)
4715     return (struct ls_expr *)*slot;
4716
4717   ptr = XNEW (struct ls_expr);
4718
4719   ptr->next         = pre_ldst_mems;
4720   ptr->expr         = NULL;
4721   ptr->pattern      = x;
4722   ptr->pattern_regs = NULL_RTX;
4723   ptr->loads        = NULL_RTX;
4724   ptr->stores       = NULL_RTX;
4725   ptr->reaching_reg = NULL_RTX;
4726   ptr->invalid      = 0;
4727   ptr->index        = 0;
4728   ptr->hash_index   = hash;
4729   pre_ldst_mems     = ptr;
4730   *slot = ptr;
4731
4732   return ptr;
4733 }
4734
4735 /* Free up an individual ldst entry.  */
4736
4737 static void
4738 free_ldst_entry (struct ls_expr * ptr)
4739 {
4740   free_INSN_LIST_list (& ptr->loads);
4741   free_INSN_LIST_list (& ptr->stores);
4742
4743   free (ptr);
4744 }
4745
4746 /* Free up all memory associated with the ldst list.  */
4747
4748 static void
4749 free_ldst_mems (void)
4750 {
4751   if (pre_ldst_table)
4752     htab_delete (pre_ldst_table);
4753   pre_ldst_table = NULL;
4754
4755   while (pre_ldst_mems)
4756     {
4757       struct ls_expr * tmp = pre_ldst_mems;
4758
4759       pre_ldst_mems = pre_ldst_mems->next;
4760
4761       free_ldst_entry (tmp);
4762     }
4763
4764   pre_ldst_mems = NULL;
4765 }
4766
4767 /* Dump debugging info about the ldst list.  */
4768
4769 static void
4770 print_ldst_list (FILE * file)
4771 {
4772   struct ls_expr * ptr;
4773
4774   fprintf (file, "LDST list: \n");
4775
4776   for (ptr = first_ls_expr (); ptr != NULL; ptr = next_ls_expr (ptr))
4777     {
4778       fprintf (file, "  Pattern (%3d): ", ptr->index);
4779
4780       print_rtl (file, ptr->pattern);
4781
4782       fprintf (file, "\n         Loads : ");
4783
4784       if (ptr->loads)
4785         print_rtl (file, ptr->loads);
4786       else
4787         fprintf (file, "(nil)");
4788
4789       fprintf (file, "\n        Stores : ");
4790
4791       if (ptr->stores)
4792         print_rtl (file, ptr->stores);
4793       else
4794         fprintf (file, "(nil)");
4795
4796       fprintf (file, "\n\n");
4797     }
4798
4799   fprintf (file, "\n");
4800 }
4801
4802 /* Returns 1 if X is in the list of ldst only expressions.  */
4803
4804 static struct ls_expr *
4805 find_rtx_in_ldst (rtx x)
4806 {
4807   struct ls_expr e;
4808   void **slot;
4809   if (!pre_ldst_table)
4810     return NULL;
4811   e.pattern = x;
4812   slot = htab_find_slot (pre_ldst_table, &e, NO_INSERT);
4813   if (!slot || ((struct ls_expr *)*slot)->invalid)
4814     return NULL;
4815   return (struct ls_expr *) *slot;
4816 }
4817
4818 /* Return first item in the list.  */
4819
4820 static inline struct ls_expr *
4821 first_ls_expr (void)
4822 {
4823   return pre_ldst_mems;
4824 }
4825
4826 /* Return the next item in the list after the specified one.  */
4827
4828 static inline struct ls_expr *
4829 next_ls_expr (struct ls_expr * ptr)
4830 {
4831   return ptr->next;
4832 }
4833 \f
4834 /* Load Motion for loads which only kill themselves.  */
4835
4836 /* Return true if x is a simple MEM operation, with no registers or
4837    side effects. These are the types of loads we consider for the
4838    ld_motion list, otherwise we let the usual aliasing take care of it.  */
4839
4840 static int
4841 simple_mem (const_rtx x)
4842 {
4843   if (! MEM_P (x))
4844     return 0;
4845
4846   if (MEM_VOLATILE_P (x))
4847     return 0;
4848
4849   if (GET_MODE (x) == BLKmode)
4850     return 0;
4851
4852   /* If we are handling exceptions, we must be careful with memory references
4853      that may trap.  If we are not, the behavior is undefined, so we may just
4854      continue.  */
4855   if (cfun->can_throw_non_call_exceptions && may_trap_p (x))
4856     return 0;
4857
4858   if (side_effects_p (x))
4859     return 0;
4860
4861   /* Do not consider function arguments passed on stack.  */
4862   if (reg_mentioned_p (stack_pointer_rtx, x))
4863     return 0;
4864
4865   if (flag_float_store && FLOAT_MODE_P (GET_MODE (x)))
4866     return 0;
4867
4868   return 1;
4869 }
4870
4871 /* Make sure there isn't a buried reference in this pattern anywhere.
4872    If there is, invalidate the entry for it since we're not capable
4873    of fixing it up just yet.. We have to be sure we know about ALL
4874    loads since the aliasing code will allow all entries in the
4875    ld_motion list to not-alias itself.  If we miss a load, we will get
4876    the wrong value since gcse might common it and we won't know to
4877    fix it up.  */
4878
4879 static void
4880 invalidate_any_buried_refs (rtx x)
4881 {
4882   const char * fmt;
4883   int i, j;
4884   struct ls_expr * ptr;
4885
4886   /* Invalidate it in the list.  */
4887   if (MEM_P (x) && simple_mem (x))
4888     {
4889       ptr = ldst_entry (x);
4890       ptr->invalid = 1;
4891     }
4892
4893   /* Recursively process the insn.  */
4894   fmt = GET_RTX_FORMAT (GET_CODE (x));
4895
4896   for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
4897     {
4898       if (fmt[i] == 'e')
4899         invalidate_any_buried_refs (XEXP (x, i));
4900       else if (fmt[i] == 'E')
4901         for (j = XVECLEN (x, i) - 1; j >= 0; j--)
4902           invalidate_any_buried_refs (XVECEXP (x, i, j));
4903     }
4904 }
4905
4906 /* Find all the 'simple' MEMs which are used in LOADs and STORES.  Simple
4907    being defined as MEM loads and stores to symbols, with no side effects
4908    and no registers in the expression.  For a MEM destination, we also
4909    check that the insn is still valid if we replace the destination with a
4910    REG, as is done in update_ld_motion_stores.  If there are any uses/defs
4911    which don't match this criteria, they are invalidated and trimmed out
4912    later.  */
4913
4914 static void
4915 compute_ld_motion_mems (void)
4916 {
4917   struct ls_expr * ptr;
4918   basic_block bb;
4919   rtx insn;
4920
4921   pre_ldst_mems = NULL;
4922   pre_ldst_table = htab_create (13, pre_ldst_expr_hash,
4923                                 pre_ldst_expr_eq, NULL);
4924
4925   FOR_EACH_BB (bb)
4926     {
4927       FOR_BB_INSNS (bb, insn)
4928         {
4929           if (NONDEBUG_INSN_P (insn))
4930             {
4931               if (GET_CODE (PATTERN (insn)) == SET)
4932                 {
4933                   rtx src = SET_SRC (PATTERN (insn));
4934                   rtx dest = SET_DEST (PATTERN (insn));
4935
4936                   /* Check for a simple LOAD...  */
4937                   if (MEM_P (src) && simple_mem (src))
4938                     {
4939                       ptr = ldst_entry (src);
4940                       if (REG_P (dest))
4941                         ptr->loads = alloc_INSN_LIST (insn, ptr->loads);
4942                       else
4943                         ptr->invalid = 1;
4944                     }
4945                   else
4946                     {
4947                       /* Make sure there isn't a buried load somewhere.  */
4948                       invalidate_any_buried_refs (src);
4949                     }
4950
4951                   /* Check for stores. Don't worry about aliased ones, they
4952                      will block any movement we might do later. We only care
4953                      about this exact pattern since those are the only
4954                      circumstance that we will ignore the aliasing info.  */
4955                   if (MEM_P (dest) && simple_mem (dest))
4956                     {
4957                       ptr = ldst_entry (dest);
4958
4959                       if (! MEM_P (src)
4960                           && GET_CODE (src) != ASM_OPERANDS
4961                           /* Check for REG manually since want_to_gcse_p
4962                              returns 0 for all REGs.  */
4963                           && can_assign_to_reg_without_clobbers_p (src))
4964                         ptr->stores = alloc_INSN_LIST (insn, ptr->stores);
4965                       else
4966                         ptr->invalid = 1;
4967                     }
4968                 }
4969               else
4970                 invalidate_any_buried_refs (PATTERN (insn));
4971             }
4972         }
4973     }
4974 }
4975
4976 /* Remove any references that have been either invalidated or are not in the
4977    expression list for pre gcse.  */
4978
4979 static void
4980 trim_ld_motion_mems (void)
4981 {
4982   struct ls_expr * * last = & pre_ldst_mems;
4983   struct ls_expr * ptr = pre_ldst_mems;
4984
4985   while (ptr != NULL)
4986     {
4987       struct expr * expr;
4988
4989       /* Delete if entry has been made invalid.  */
4990       if (! ptr->invalid)
4991         {
4992           /* Delete if we cannot find this mem in the expression list.  */
4993           unsigned int hash = ptr->hash_index % expr_hash_table.size;
4994
4995           for (expr = expr_hash_table.table[hash];
4996                expr != NULL;
4997                expr = expr->next_same_hash)
4998             if (expr_equiv_p (expr->expr, ptr->pattern))
4999               break;
5000         }
5001       else
5002         expr = (struct expr *) 0;
5003
5004       if (expr)
5005         {
5006           /* Set the expression field if we are keeping it.  */
5007           ptr->expr = expr;
5008           last = & ptr->next;
5009           ptr = ptr->next;
5010         }
5011       else
5012         {
5013           *last = ptr->next;
5014           htab_remove_elt_with_hash (pre_ldst_table, ptr, ptr->hash_index);
5015           free_ldst_entry (ptr);
5016           ptr = * last;
5017         }
5018     }
5019
5020   /* Show the world what we've found.  */
5021   if (dump_file && pre_ldst_mems != NULL)
5022     print_ldst_list (dump_file);
5023 }
5024
5025 /* This routine will take an expression which we are replacing with
5026    a reaching register, and update any stores that are needed if
5027    that expression is in the ld_motion list.  Stores are updated by
5028    copying their SRC to the reaching register, and then storing
5029    the reaching register into the store location. These keeps the
5030    correct value in the reaching register for the loads.  */
5031
5032 static void
5033 update_ld_motion_stores (struct expr * expr)
5034 {
5035   struct ls_expr * mem_ptr;
5036
5037   if ((mem_ptr = find_rtx_in_ldst (expr->expr)))
5038     {
5039       /* We can try to find just the REACHED stores, but is shouldn't
5040          matter to set the reaching reg everywhere...  some might be
5041          dead and should be eliminated later.  */
5042
5043       /* We replace (set mem expr) with (set reg expr) (set mem reg)
5044          where reg is the reaching reg used in the load.  We checked in
5045          compute_ld_motion_mems that we can replace (set mem expr) with
5046          (set reg expr) in that insn.  */
5047       rtx list = mem_ptr->stores;
5048
5049       for ( ; list != NULL_RTX; list = XEXP (list, 1))
5050         {
5051           rtx insn = XEXP (list, 0);
5052           rtx pat = PATTERN (insn);
5053           rtx src = SET_SRC (pat);
5054           rtx reg = expr->reaching_reg;
5055           rtx copy;
5056
5057           /* If we've already copied it, continue.  */
5058           if (expr->reaching_reg == src)
5059             continue;
5060
5061           if (dump_file)
5062             {
5063               fprintf (dump_file, "PRE:  store updated with reaching reg ");
5064               print_rtl (dump_file, expr->reaching_reg);
5065               fprintf (dump_file, ":\n  ");
5066               print_inline_rtx (dump_file, insn, 8);
5067               fprintf (dump_file, "\n");
5068             }
5069
5070           copy = gen_move_insn (reg, copy_rtx (SET_SRC (pat)));
5071           emit_insn_before (copy, insn);
5072           SET_SRC (pat) = reg;
5073           df_insn_rescan (insn);
5074
5075           /* un-recognize this pattern since it's probably different now.  */
5076           INSN_CODE (insn) = -1;
5077           gcse_create_count++;
5078         }
5079     }
5080 }
5081 \f
5082 /* Return true if the graph is too expensive to optimize. PASS is the
5083    optimization about to be performed.  */
5084
5085 static bool
5086 is_too_expensive (const char *pass)
5087 {
5088   /* Trying to perform global optimizations on flow graphs which have
5089      a high connectivity will take a long time and is unlikely to be
5090      particularly useful.
5091
5092      In normal circumstances a cfg should have about twice as many
5093      edges as blocks.  But we do not want to punish small functions
5094      which have a couple switch statements.  Rather than simply
5095      threshold the number of blocks, uses something with a more
5096      graceful degradation.  */
5097   if (n_edges > 20000 + n_basic_blocks * 4)
5098     {
5099       warning (OPT_Wdisabled_optimization,
5100                "%s: %d basic blocks and %d edges/basic block",
5101                pass, n_basic_blocks, n_edges / n_basic_blocks);
5102
5103       return true;
5104     }
5105
5106   /* If allocating memory for the cprop bitmap would take up too much
5107      storage it's better just to disable the optimization.  */
5108   if ((n_basic_blocks
5109        * SBITMAP_SET_SIZE (max_reg_num ())
5110        * sizeof (SBITMAP_ELT_TYPE)) > MAX_GCSE_MEMORY)
5111     {
5112       warning (OPT_Wdisabled_optimization,
5113                "%s: %d basic blocks and %d registers",
5114                pass, n_basic_blocks, max_reg_num ());
5115
5116       return true;
5117     }
5118
5119   return false;
5120 }
5121
5122 \f
5123 /* Main function for the CPROP pass.  */
5124
5125 static int
5126 one_cprop_pass (void)
5127 {
5128   int changed = 0;
5129
5130   /* Return if there's nothing to do, or it is too expensive.  */
5131   if (n_basic_blocks <= NUM_FIXED_BLOCKS + 1
5132       || is_too_expensive (_ ("const/copy propagation disabled")))
5133     return 0;
5134
5135   global_const_prop_count = local_const_prop_count = 0;
5136   global_copy_prop_count = local_copy_prop_count = 0;
5137
5138   bytes_used = 0;
5139   gcc_obstack_init (&gcse_obstack);
5140   alloc_gcse_mem ();
5141
5142   /* Do a local const/copy propagation pass first.  The global pass
5143      only handles global opportunities.
5144      If the local pass changes something, remove any unreachable blocks
5145      because the CPROP global dataflow analysis may get into infinite
5146      loops for CFGs with unreachable blocks.
5147
5148      FIXME: This local pass should not be necessary after CSE (but for
5149             some reason it still is).  It is also (proven) not necessary
5150             to run the local pass right after FWPWOP.
5151
5152      FIXME: The global analysis would not get into infinite loops if it
5153             would use the DF solver (via df_simple_dataflow) instead of
5154             the solver implemented in this file.  */
5155   if (local_cprop_pass ())
5156     {
5157       delete_unreachable_blocks ();
5158       df_analyze ();
5159     }
5160
5161   /* Determine implicit sets.  */
5162   implicit_sets = XCNEWVEC (rtx, last_basic_block);
5163   find_implicit_sets ();
5164
5165   alloc_hash_table (&set_hash_table, 1);
5166   compute_hash_table (&set_hash_table);
5167
5168   /* Free implicit_sets before peak usage.  */
5169   free (implicit_sets);
5170   implicit_sets = NULL;
5171
5172   if (dump_file)
5173     dump_hash_table (dump_file, "SET", &set_hash_table);
5174   if (set_hash_table.n_elems > 0)
5175     {
5176       basic_block bb;
5177       rtx insn;
5178
5179       alloc_cprop_mem (last_basic_block, set_hash_table.n_elems);
5180       compute_cprop_data ();
5181
5182       FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR->next_bb->next_bb, EXIT_BLOCK_PTR, next_bb)
5183         {
5184           /* Reset tables used to keep track of what's still valid [since
5185              the start of the block].  */
5186           reset_opr_set_tables ();
5187
5188           FOR_BB_INSNS (bb, insn)
5189             if (INSN_P (insn))
5190               {
5191                 changed |= cprop_insn (insn);
5192
5193                 /* Keep track of everything modified by this insn.  */
5194                 /* ??? Need to be careful w.r.t. mods done to INSN.
5195                        Don't call mark_oprs_set if we turned the
5196                        insn into a NOTE.  */
5197                 if (! NOTE_P (insn))
5198                   mark_oprs_set (insn);
5199               }
5200         }
5201
5202       changed |= bypass_conditional_jumps ();
5203       free_cprop_mem ();
5204     }
5205
5206   free_hash_table (&set_hash_table);
5207   free_gcse_mem ();
5208   obstack_free (&gcse_obstack, NULL);
5209
5210   if (dump_file)
5211     {
5212       fprintf (dump_file, "CPROP of %s, %d basic blocks, %d bytes needed, ",
5213                current_function_name (), n_basic_blocks, bytes_used);
5214       fprintf (dump_file, "%d local const props, %d local copy props, ",
5215                local_const_prop_count, local_copy_prop_count);
5216       fprintf (dump_file, "%d global const props, %d global copy props\n\n",
5217                global_const_prop_count, global_copy_prop_count);
5218     }
5219
5220   return changed;
5221 }
5222
5223 \f
5224 /* All the passes implemented in this file.  Each pass has its
5225    own gate and execute function, and at the end of the file a
5226    pass definition for passes.c.
5227
5228    We do not construct an accurate cfg in functions which call
5229    setjmp, so none of these passes runs if the function calls
5230    setjmp.
5231    FIXME: Should just handle setjmp via REG_SETJMP notes.  */
5232
5233 static bool
5234 gate_rtl_cprop (void)
5235 {
5236   return optimize > 0 && flag_gcse
5237     && !cfun->calls_setjmp
5238     && dbg_cnt (cprop);
5239 }
5240
5241 static unsigned int
5242 execute_rtl_cprop (void)
5243 {
5244   delete_unreachable_blocks ();
5245   df_set_flags (DF_LR_RUN_DCE);
5246   df_analyze ();
5247   flag_rerun_cse_after_global_opts |= one_cprop_pass ();
5248   return 0;
5249 }
5250
5251 static bool
5252 gate_rtl_pre (void)
5253 {
5254   return optimize > 0 && flag_gcse
5255     && !cfun->calls_setjmp
5256     && optimize_function_for_speed_p (cfun)
5257     && dbg_cnt (pre);
5258 }
5259
5260 static unsigned int
5261 execute_rtl_pre (void)
5262 {
5263   delete_unreachable_blocks ();
5264   df_analyze ();
5265   flag_rerun_cse_after_global_opts |= one_pre_gcse_pass ();
5266   return 0;
5267 }
5268
5269 static bool
5270 gate_rtl_hoist (void)
5271 {
5272   return optimize > 0 && flag_gcse
5273     && !cfun->calls_setjmp
5274     /* It does not make sense to run code hoisting unless we are optimizing
5275        for code size -- it rarely makes programs faster, and can make then
5276        bigger if we did PRE (when optimizing for space, we don't run PRE).  */
5277     && optimize_function_for_size_p (cfun)
5278     && dbg_cnt (hoist);
5279 }
5280
5281 static unsigned int
5282 execute_rtl_hoist (void)
5283 {
5284   delete_unreachable_blocks ();
5285   df_analyze ();
5286   flag_rerun_cse_after_global_opts |= one_code_hoisting_pass ();
5287   return 0;
5288 }
5289
5290 struct rtl_opt_pass pass_rtl_cprop =
5291 {
5292  {
5293   RTL_PASS,
5294   "cprop",                              /* name */
5295   gate_rtl_cprop,                       /* gate */
5296   execute_rtl_cprop,                    /* execute */
5297   NULL,                                 /* sub */
5298   NULL,                                 /* next */
5299   0,                                    /* static_pass_number */
5300   TV_CPROP,                             /* tv_id */
5301   PROP_cfglayout,                       /* properties_required */
5302   0,                                    /* properties_provided */
5303   0,                                    /* properties_destroyed */
5304   0,                                    /* todo_flags_start */
5305   TODO_df_finish | TODO_verify_rtl_sharing |
5306   TODO_dump_func |
5307   TODO_verify_flow | TODO_ggc_collect   /* todo_flags_finish */
5308  }
5309 };
5310
5311 struct rtl_opt_pass pass_rtl_pre =
5312 {
5313  {
5314   RTL_PASS,
5315   "rtl pre",                            /* name */
5316   gate_rtl_pre,                         /* gate */
5317   execute_rtl_pre,                      /* execute */
5318   NULL,                                 /* sub */
5319   NULL,                                 /* next */
5320   0,                                    /* static_pass_number */
5321   TV_PRE,                               /* tv_id */
5322   PROP_cfglayout,                       /* properties_required */
5323   0,                                    /* properties_provided */
5324   0,                                    /* properties_destroyed */
5325   0,                                    /* todo_flags_start */
5326   TODO_df_finish | TODO_verify_rtl_sharing |
5327   TODO_dump_func |
5328   TODO_verify_flow | TODO_ggc_collect   /* todo_flags_finish */
5329  }
5330 };
5331
5332 struct rtl_opt_pass pass_rtl_hoist =
5333 {
5334  {
5335   RTL_PASS,
5336   "hoist",                              /* name */
5337   gate_rtl_hoist,                       /* gate */
5338   execute_rtl_hoist,                    /* execute */
5339   NULL,                                 /* sub */
5340   NULL,                                 /* next */
5341   0,                                    /* static_pass_number */
5342   TV_HOIST,                             /* tv_id */
5343   PROP_cfglayout,                       /* properties_required */
5344   0,                                    /* properties_provided */
5345   0,                                    /* properties_destroyed */
5346   0,                                    /* todo_flags_start */
5347   TODO_df_finish | TODO_verify_rtl_sharing |
5348   TODO_dump_func |
5349   TODO_verify_flow | TODO_ggc_collect   /* todo_flags_finish */
5350  }
5351 };
5352
5353 #include "gt-gcse.h"