gcc/fwprop.c

   1 /* RTL-based forward propagation pass for GNU compiler.
   2    Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010
   3    Free Software Foundation, Inc.
   4    Contributed by Paolo Bonzini and Steven Bosscher.
   5
   6 This file is part of GCC.
   7
   8 GCC is free software; you can redistribute it and/or modify it under
   9 the terms of the GNU General Public License as published by the Free
  10 Software Foundation; either version 3, or (at your option) any later
  11 version.
  12
  13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  16 for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GCC; see the file COPYING3.  If not see
  20 <http://www.gnu.org/licenses/>.  */
  21
  22 #include "config.h"
  23 #include "system.h"
  24 #include "coretypes.h"
  25 #include "tm.h"
  26 #include "toplev.h"
  27
  28 #include "timevar.h"
  29 #include "rtl.h"
  30 #include "tm_p.h"
  31 #include "insn-config.h"
  32 #include "recog.h"
  33 #include "flags.h"
  34 #include "obstack.h"
  35 #include "basic-block.h"
  36 #include "output.h"
  37 #include "df.h"
  38 #include "target.h"
  39 #include "cfgloop.h"
  40 #include "tree-pass.h"
  41 #include "domwalk.h"
  42 #include "emit-rtl.h"
  43
  44
  45 /* This pass does simple forward propagation and simplification when an
  46    operand of an insn can only come from a single def.  This pass uses
  47    df.c, so it is global.  However, we only do limited analysis of
  48    available expressions.
  49
  50    1) The pass tries to propagate the source of the def into the use,
  51    and checks if the result is independent of the substituted value.
  52    For example, the high word of a (zero_extend:DI (reg:SI M)) is always
  53    zero, independent of the source register.
  54
  55    In particular, we propagate constants into the use site.  Sometimes
  56    RTL expansion did not put the constant in the same insn on purpose,
  57    to satisfy a predicate, and the result will fail to be recognized;
  58    but this happens rarely and in this case we can still create a
  59    REG_EQUAL note.  For multi-word operations, this
  60
  61       (set (subreg:SI (reg:DI 120) 0) (const_int 0))
  62       (set (subreg:SI (reg:DI 120) 4) (const_int -1))
  63       (set (subreg:SI (reg:DI 122) 0)
  64          (ior:SI (subreg:SI (reg:DI 119) 0) (subreg:SI (reg:DI 120) 0)))
  65       (set (subreg:SI (reg:DI 122) 4)
  66          (ior:SI (subreg:SI (reg:DI 119) 4) (subreg:SI (reg:DI 120) 4)))
  67
  68    can be simplified to the much simpler
  69
  70       (set (subreg:SI (reg:DI 122) 0) (subreg:SI (reg:DI 119)))
  71       (set (subreg:SI (reg:DI 122) 4) (const_int -1))
  72
  73    This particular propagation is also effective at putting together
  74    complex addressing modes.  We are more aggressive inside MEMs, in
  75    that all definitions are propagated if the use is in a MEM; if the
  76    result is a valid memory address we check address_cost to decide
  77    whether the substitution is worthwhile.
  78
  79    2) The pass propagates register copies.  This is not as effective as
  80    the copy propagation done by CSE's canon_reg, which works by walking
  81    the instruction chain, it can help the other transformations.
  82
  83    We should consider removing this optimization, and instead reorder the
  84    RTL passes, because GCSE does this transformation too.  With some luck,
  85    the CSE pass at the end of rest_of_handle_gcse could also go away.
  86
  87    3) The pass looks for paradoxical subregs that are actually unnecessary.
  88    Things like this:
  89
  90      (set (reg:QI 120) (subreg:QI (reg:SI 118) 0))
  91      (set (reg:QI 121) (subreg:QI (reg:SI 119) 0))
  92      (set (reg:SI 122) (plus:SI (subreg:SI (reg:QI 120) 0)
  93                                 (subreg:SI (reg:QI 121) 0)))
  94
  95    are very common on machines that can only do word-sized operations.
  96    For each use of a paradoxical subreg (subreg:WIDER (reg:NARROW N) 0),
  97    if it has a single def and it is (subreg:NARROW (reg:WIDE M) 0),
  98    we can replace the paradoxical subreg with simply (reg:WIDE M).  The
  99    above will simplify this to
 100
 101      (set (reg:QI 120) (subreg:QI (reg:SI 118) 0))
 102      (set (reg:QI 121) (subreg:QI (reg:SI 119) 0))
 103      (set (reg:SI 122) (plus:SI (reg:SI 118) (reg:SI 119)))
 104
 105    where the first two insns are now dead.
 106
 107    We used to use reaching definitions to find which uses have a
 108    single reaching definition (sounds obvious...), but this is too
 109    complex a problem in nasty testcases like PR33928.  Now we use the
 110    multiple definitions problem in df-problems.c.  The similarity
 111    between that problem and SSA form creation is taken further, in
 112    that fwprop does a dominator walk to create its chains; however,
 113    instead of creating a PHI function where multiple definitions meet
 114    I just punt and record only singleton use-def chains, which is
 115    all that is needed by fwprop.  */
 116
 117
 118 static int num_changes;
 119
 120 DEF_VEC_P(df_ref);
 121 DEF_VEC_ALLOC_P(df_ref,heap);
 122 static VEC(df_ref,heap) *use_def_ref;
 123 static VEC(df_ref,heap) *reg_defs;
 124 static VEC(df_ref,heap) *reg_defs_stack;
 125
 126 /* The MD bitmaps are trimmed to include only live registers to cut
 127    memory usage on testcases like insn-recog.c.  Track live registers
 128    in the basic block and do not perform forward propagation if the
 129    destination is a dead pseudo occurring in a note.  */
 130 static bitmap local_md;
 131 static bitmap local_lr;
 132
 133 /* Return the only def in USE's use-def chain, or NULL if there is
 134    more than one def in the chain.  */
 135
 136 static inline df_ref
 137 get_def_for_use (df_ref use)
 138 {
 139   return VEC_index (df_ref, use_def_ref, DF_REF_ID (use));
 140 }
 141
 142
 143 /* Update the reg_defs vector with non-partial definitions in DEF_REC.
 144    TOP_FLAG says which artificials uses should be used, when DEF_REC
 145    is an artificial def vector.  LOCAL_MD is modified as after a
 146    df_md_simulate_* function; we do more or less the same processing
 147    done there, so we do not use those functions.  */
 148
 149 #define DF_MD_GEN_FLAGS \
 150         (DF_REF_PARTIAL | DF_REF_CONDITIONAL | DF_REF_MAY_CLOBBER)
 151
 152 static void
 153 process_defs (df_ref *def_rec, int top_flag)
 154 {
 155   df_ref def;
 156   while ((def = *def_rec++) != NULL)
 157     {
 158       df_ref curr_def = VEC_index (df_ref, reg_defs, DF_REF_REGNO (def));
 159       unsigned int dregno;
 160
 161       if ((DF_REF_FLAGS (def) & DF_REF_AT_TOP) != top_flag)
 162         continue;
 163
 164       dregno = DF_REF_REGNO (def);
 165       if (curr_def)
 166         VEC_safe_push (df_ref, heap, reg_defs_stack, curr_def);
 167       else
 168         {
 169           /* Do not store anything if "transitioning" from NULL to NULL.  But
 170              otherwise, push a special entry on the stack to tell the
 171              leave_block callback that the entry in reg_defs was NULL.  */
 172           if (DF_REF_FLAGS (def) & DF_MD_GEN_FLAGS)
 173             ;
 174           else
 175             VEC_safe_push (df_ref, heap, reg_defs_stack, def);
 176         }
 177
 178       if (DF_REF_FLAGS (def) & DF_MD_GEN_FLAGS)
 179         {
 180           bitmap_set_bit (local_md, dregno);
 181           VEC_replace (df_ref, reg_defs, dregno, NULL);
 182         }
 183       else
 184         {
 185           bitmap_clear_bit (local_md, dregno);
 186           VEC_replace (df_ref, reg_defs, dregno, def);
 187         }
 188     }
 189 }
 190
 191
 192 /* Fill the use_def_ref vector with values for the uses in USE_REC,
 193    taking reaching definitions info from LOCAL_MD and REG_DEFS.
 194    TOP_FLAG says which artificials uses should be used, when USE_REC
 195    is an artificial use vector.  */
 196
 197 static void
 198 process_uses (df_ref *use_rec, int top_flag)
 199 {
 200   df_ref use;
 201   while ((use = *use_rec++) != NULL)
 202     if ((DF_REF_FLAGS (use) & DF_REF_AT_TOP) == top_flag)
 203       {
 204         unsigned int uregno = DF_REF_REGNO (use);
 205         if (VEC_index (df_ref, reg_defs, uregno)
 206             && !bitmap_bit_p (local_md, uregno)
 207             && bitmap_bit_p (local_lr, uregno))
 208           VEC_replace (df_ref, use_def_ref, DF_REF_ID (use),
 209                        VEC_index (df_ref, reg_defs, uregno));
 210       }
 211 }
 212
 213
 214 static void
 215 single_def_use_enter_block (struct dom_walk_data *walk_data ATTRIBUTE_UNUSED,
 216                             basic_block bb)
 217 {
 218   int bb_index = bb->index;
 219   struct df_md_bb_info *md_bb_info = df_md_get_bb_info (bb_index);
 220   struct df_lr_bb_info *lr_bb_info = df_lr_get_bb_info (bb_index);
 221   rtx insn;
 222
 223   bitmap_copy (local_md, &md_bb_info->in);
 224   bitmap_copy (local_lr, &lr_bb_info->in);
 225
 226   /* Push a marker for the leave_block callback.  */
 227   VEC_safe_push (df_ref, heap, reg_defs_stack, NULL);
 228
 229   process_uses (df_get_artificial_uses (bb_index), DF_REF_AT_TOP);
 230   process_defs (df_get_artificial_defs (bb_index), DF_REF_AT_TOP);
 231
 232   /* We don't call df_simulate_initialize_forwards, as it may overestimate
 233      the live registers if there are unused artificial defs.  We prefer
 234      liveness to be underestimated.  */
 235
 236   FOR_BB_INSNS (bb, insn)
 237     if (INSN_P (insn))
 238       {
 239         unsigned int uid = INSN_UID (insn);
 240         process_uses (DF_INSN_UID_USES (uid), 0);
 241         process_uses (DF_INSN_UID_EQ_USES (uid), 0);
 242         process_defs (DF_INSN_UID_DEFS (uid), 0);
 243         df_simulate_one_insn_forwards (bb, insn, local_lr);
 244       }
 245
 246   process_uses (df_get_artificial_uses (bb_index), 0);
 247   process_defs (df_get_artificial_defs (bb_index), 0);
 248 }
 249
 250 /* Pop the definitions created in this basic block when leaving its
 251    dominated parts.  */
 252
 253 static void
 254 single_def_use_leave_block (struct dom_walk_data *walk_data ATTRIBUTE_UNUSED,
 255                             basic_block bb ATTRIBUTE_UNUSED)
 256 {
 257   df_ref saved_def;
 258   while ((saved_def = VEC_pop (df_ref, reg_defs_stack)) != NULL)
 259     {
 260       unsigned int dregno = DF_REF_REGNO (saved_def);
 261
 262       /* See also process_defs.  */
 263       if (saved_def == VEC_index (df_ref, reg_defs, dregno))
 264         VEC_replace (df_ref, reg_defs, dregno, NULL);
 265       else
 266         VEC_replace (df_ref, reg_defs, dregno, saved_def);
 267     }
 268 }
 269
 270
 271 /* Build a vector holding the reaching definitions of uses reached by a
 272    single dominating definition.  */
 273
 274 static void
 275 build_single_def_use_links (void)
 276 {
 277   struct dom_walk_data walk_data;
 278
 279   /* We use the multiple definitions problem to compute our restricted
 280      use-def chains.  */
 281   df_set_flags (DF_EQ_NOTES);
 282   df_md_add_problem ();
 283   df_note_add_problem ();
 284   df_analyze ();
 285   df_maybe_reorganize_use_refs (DF_REF_ORDER_BY_INSN_WITH_NOTES);
 286
 287   use_def_ref = VEC_alloc (df_ref, heap, DF_USES_TABLE_SIZE ());
 288   VEC_safe_grow_cleared (df_ref, heap, use_def_ref, DF_USES_TABLE_SIZE ());
 289
 290   reg_defs = VEC_alloc (df_ref, heap, max_reg_num ());
 291   VEC_safe_grow_cleared (df_ref, heap, reg_defs, max_reg_num ());
 292
 293   reg_defs_stack = VEC_alloc (df_ref, heap, n_basic_blocks * 10);
 294   local_md = BITMAP_ALLOC (NULL);
 295   local_lr = BITMAP_ALLOC (NULL);
 296
 297   /* Walk the dominator tree looking for single reaching definitions
 298      dominating the uses.  This is similar to how SSA form is built.  */
 299   walk_data.dom_direction = CDI_DOMINATORS;
 300   walk_data.initialize_block_local_data = NULL;
 301   walk_data.before_dom_children = single_def_use_enter_block;
 302   walk_data.after_dom_children = single_def_use_leave_block;
 303
 304   init_walk_dominator_tree (&walk_data);
 305   walk_dominator_tree (&walk_data, ENTRY_BLOCK_PTR);
 306   fini_walk_dominator_tree (&walk_data);
 307
 308   BITMAP_FREE (local_lr);
 309   BITMAP_FREE (local_md);
 310   VEC_free (df_ref, heap, reg_defs);
 311   VEC_free (df_ref, heap, reg_defs_stack);
 312 }
 313
 314 \f
 315 /* Do not try to replace constant addresses or addresses of local and
 316    argument slots.  These MEM expressions are made only once and inserted
 317    in many instructions, as well as being used to control symbol table
 318    output.  It is not safe to clobber them.
 319
 320    There are some uncommon cases where the address is already in a register
 321    for some reason, but we cannot take advantage of that because we have
 322    no easy way to unshare the MEM.  In addition, looking up all stack
 323    addresses is costly.  */
 324
 325 static bool
 326 can_simplify_addr (rtx addr)
 327 {
 328   rtx reg;
 329
 330   if (CONSTANT_ADDRESS_P (addr))
 331     return false;
 332
 333   if (GET_CODE (addr) == PLUS)
 334     reg = XEXP (addr, 0);
 335   else
 336     reg = addr;
 337
 338   return (!REG_P (reg)
 339           || (REGNO (reg) != FRAME_POINTER_REGNUM
 340               && REGNO (reg) != HARD_FRAME_POINTER_REGNUM
 341               && REGNO (reg) != ARG_POINTER_REGNUM));
 342 }
 343
 344 /* Returns a canonical version of X for the address, from the point of view,
 345    that all multiplications are represented as MULT instead of the multiply
 346    by a power of 2 being represented as ASHIFT.
 347
 348    Every ASHIFT we find has been made by simplify_gen_binary and was not
 349    there before, so it is not shared.  So we can do this in place.  */
 350
 351 static void
 352 canonicalize_address (rtx x)
 353 {
 354   for (;;)
 355     switch (GET_CODE (x))
 356       {
 357       case ASHIFT:
 358         if (CONST_INT_P (XEXP (x, 1))
 359             && INTVAL (XEXP (x, 1)) < GET_MODE_BITSIZE (GET_MODE (x))
 360             && INTVAL (XEXP (x, 1)) >= 0)
 361           {
 362             HOST_WIDE_INT shift = INTVAL (XEXP (x, 1));
 363             PUT_CODE (x, MULT);
 364             XEXP (x, 1) = gen_int_mode ((HOST_WIDE_INT) 1 << shift,
 365                                         GET_MODE (x));
 366           }
 367
 368         x = XEXP (x, 0);
 369         break;
 370
 371       case PLUS:
 372         if (GET_CODE (XEXP (x, 0)) == PLUS
 373             || GET_CODE (XEXP (x, 0)) == ASHIFT
 374             || GET_CODE (XEXP (x, 0)) == CONST)
 375           canonicalize_address (XEXP (x, 0));
 376
 377         x = XEXP (x, 1);
 378         break;
 379
 380       case CONST:
 381         x = XEXP (x, 0);
 382         break;
 383
 384       default:
 385         return;
 386       }
 387 }
 388
 389 /* OLD is a memory address.  Return whether it is good to use NEW instead,
 390    for a memory access in the given MODE.  */
 391
 392 static bool
 393 should_replace_address (rtx old_rtx, rtx new_rtx, enum machine_mode mode,
 394                         addr_space_t as, bool speed)
 395 {
 396   int gain;
 397
 398   if (rtx_equal_p (old_rtx, new_rtx)
 399       || !memory_address_addr_space_p (mode, new_rtx, as))
 400     return false;
 401
 402   /* Copy propagation is always ok.  */
 403   if (REG_P (old_rtx) && REG_P (new_rtx))
 404     return true;
 405
 406   /* Prefer the new address if it is less expensive.  */
 407   gain = (address_cost (old_rtx, mode, as, speed)
 408           - address_cost (new_rtx, mode, as, speed));
 409
 410   /* If the addresses have equivalent cost, prefer the new address
 411      if it has the highest `rtx_cost'.  That has the potential of
 412      eliminating the most insns without additional costs, and it
 413      is the same that cse.c used to do.  */
 414   if (gain == 0)
 415     gain = rtx_cost (new_rtx, SET, speed) - rtx_cost (old_rtx, SET, speed);
 416
 417   return (gain > 0);
 418 }
 419
 420
 421 /* Flags for the last parameter of propagate_rtx_1.  */
 422
 423 enum {
 424   /* If PR_CAN_APPEAR is true, propagate_rtx_1 always returns true;
 425      if it is false, propagate_rtx_1 returns false if, for at least
 426      one occurrence OLD, it failed to collapse the result to a constant.
 427      For example, (mult:M (reg:M A) (minus:M (reg:M B) (reg:M A))) may
 428      collapse to zero if replacing (reg:M B) with (reg:M A).
 429
 430      PR_CAN_APPEAR is disregarded inside MEMs: in that case,
 431      propagate_rtx_1 just tries to make cheaper and valid memory
 432      addresses.  */
 433   PR_CAN_APPEAR = 1,
 434
 435   /* If PR_HANDLE_MEM is not set, propagate_rtx_1 won't attempt any replacement
 436      outside memory addresses.  This is needed because propagate_rtx_1 does
 437      not do any analysis on memory; thus it is very conservative and in general
 438      it will fail if non-read-only MEMs are found in the source expression.
 439
 440      PR_HANDLE_MEM is set when the source of the propagation was not
 441      another MEM.  Then, it is safe not to treat non-read-only MEMs as
 442      ``opaque'' objects.  */
 443   PR_HANDLE_MEM = 2,
 444
 445   /* Set when costs should be optimized for speed.  */
 446   PR_OPTIMIZE_FOR_SPEED = 4
 447 };
 448
 449
 450 /* Replace all occurrences of OLD in *PX with NEW and try to simplify the
 451    resulting expression.  Replace *PX with a new RTL expression if an
 452    occurrence of OLD was found.
 453
 454    This is only a wrapper around simplify-rtx.c: do not add any pattern
 455    matching code here.  (The sole exception is the handling of LO_SUM, but
 456    that is because there is no simplify_gen_* function for LO_SUM).  */
 457
 458 static bool
 459 propagate_rtx_1 (rtx *px, rtx old_rtx, rtx new_rtx, int flags)
 460 {
 461   rtx x = *px, tem = NULL_RTX, op0, op1, op2;
 462   enum rtx_code code = GET_CODE (x);
 463   enum machine_mode mode = GET_MODE (x);
 464   enum machine_mode op_mode;
 465   bool can_appear = (flags & PR_CAN_APPEAR) != 0;
 466   bool valid_ops = true;
 467
 468   if (!(flags & PR_HANDLE_MEM) && MEM_P (x) && !MEM_READONLY_P (x))
 469     {
 470       /* If unsafe, change MEMs to CLOBBERs or SCRATCHes (to preserve whether
 471          they have side effects or not).  */
 472       *px = (side_effects_p (x)
 473              ? gen_rtx_CLOBBER (GET_MODE (x), const0_rtx)
 474              : gen_rtx_SCRATCH (GET_MODE (x)));
 475       return false;
 476     }
 477
 478   /* If X is OLD_RTX, return NEW_RTX.  But not if replacing only within an
 479      address, and we are *not* inside one.  */
 480   if (x == old_rtx)
 481     {
 482       *px = new_rtx;
 483       return can_appear;
 484     }
 485
 486   /* If this is an expression, try recursive substitution.  */
 487   switch (GET_RTX_CLASS (code))
 488     {
 489     case RTX_UNARY:
 490       op0 = XEXP (x, 0);
 491       op_mode = GET_MODE (op0);
 492       valid_ops &= propagate_rtx_1 (&op0, old_rtx, new_rtx, flags);
 493       if (op0 == XEXP (x, 0))
 494         return true;
 495       tem = simplify_gen_unary (code, mode, op0, op_mode);
 496       break;
 497
 498     case RTX_BIN_ARITH:
 499     case RTX_COMM_ARITH:
 500       op0 = XEXP (x, 0);
 501       op1 = XEXP (x, 1);
 502       valid_ops &= propagate_rtx_1 (&op0, old_rtx, new_rtx, flags);
 503       valid_ops &= propagate_rtx_1 (&op1, old_rtx, new_rtx, flags);
 504       if (op0 == XEXP (x, 0) && op1 == XEXP (x, 1))
 505         return true;
 506       tem = simplify_gen_binary (code, mode, op0, op1);
 507       break;
 508
 509     case RTX_COMPARE:
 510     case RTX_COMM_COMPARE:
 511       op0 = XEXP (x, 0);
 512       op1 = XEXP (x, 1);
 513       op_mode = GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
 514       valid_ops &= propagate_rtx_1 (&op0, old_rtx, new_rtx, flags);
 515       valid_ops &= propagate_rtx_1 (&op1, old_rtx, new_rtx, flags);
 516       if (op0 == XEXP (x, 0) && op1 == XEXP (x, 1))
 517         return true;
 518       tem = simplify_gen_relational (code, mode, op_mode, op0, op1);
 519       break;
 520
 521     case RTX_TERNARY:
 522     case RTX_BITFIELD_OPS:
 523       op0 = XEXP (x, 0);
 524       op1 = XEXP (x, 1);
 525       op2 = XEXP (x, 2);
 526       op_mode = GET_MODE (op0);
 527       valid_ops &= propagate_rtx_1 (&op0, old_rtx, new_rtx, flags);
 528       valid_ops &= propagate_rtx_1 (&op1, old_rtx, new_rtx, flags);
 529       valid_ops &= propagate_rtx_1 (&op2, old_rtx, new_rtx, flags);
 530       if (op0 == XEXP (x, 0) && op1 == XEXP (x, 1) && op2 == XEXP (x, 2))
 531         return true;
 532       if (op_mode == VOIDmode)
 533         op_mode = GET_MODE (op0);
 534       tem = simplify_gen_ternary (code, mode, op_mode, op0, op1, op2);
 535       break;
 536
 537     case RTX_EXTRA:
 538       /* The only case we try to handle is a SUBREG.  */
 539       if (code == SUBREG)
 540         {
 541           op0 = XEXP (x, 0);
 542           valid_ops &= propagate_rtx_1 (&op0, old_rtx, new_rtx, flags);
 543           if (op0 == XEXP (x, 0))
 544             return true;
 545           tem = simplify_gen_subreg (mode, op0, GET_MODE (SUBREG_REG (x)),
 546                                      SUBREG_BYTE (x));
 547         }
 548       break;
 549
 550     case RTX_OBJ:
 551       if (code == MEM && x != new_rtx)
 552         {
 553           rtx new_op0;
 554           op0 = XEXP (x, 0);
 555
 556           /* There are some addresses that we cannot work on.  */
 557           if (!can_simplify_addr (op0))
 558             return true;
 559
 560           op0 = new_op0 = targetm.delegitimize_address (op0);
 561           valid_ops &= propagate_rtx_1 (&new_op0, old_rtx, new_rtx,
 562                                         flags | PR_CAN_APPEAR);
 563
 564           /* Dismiss transformation that we do not want to carry on.  */
 565           if (!valid_ops
 566               || new_op0 == op0
 567               || !(GET_MODE (new_op0) == GET_MODE (op0)
 568                    || GET_MODE (new_op0) == VOIDmode))
 569             return true;
 570
 571           canonicalize_address (new_op0);
 572
 573           /* Copy propagations are always ok.  Otherwise check the costs.  */
 574           if (!(REG_P (old_rtx) && REG_P (new_rtx))
 575               && !should_replace_address (op0, new_op0, GET_MODE (x),
 576                                           MEM_ADDR_SPACE (x),
 577                                           flags & PR_OPTIMIZE_FOR_SPEED))
 578             return true;
 579
 580           tem = replace_equiv_address_nv (x, new_op0);
 581         }
 582
 583       else if (code == LO_SUM)
 584         {
 585           op0 = XEXP (x, 0);
 586           op1 = XEXP (x, 1);
 587
 588           /* The only simplification we do attempts to remove references to op0
 589              or make it constant -- in both cases, op0's invalidity will not
 590              make the result invalid.  */
 591           propagate_rtx_1 (&op0, old_rtx, new_rtx, flags | PR_CAN_APPEAR);
 592           valid_ops &= propagate_rtx_1 (&op1, old_rtx, new_rtx, flags);
 593           if (op0 == XEXP (x, 0) && op1 == XEXP (x, 1))
 594             return true;
 595
 596           /* (lo_sum (high x) x) -> x  */
 597           if (GET_CODE (op0) == HIGH && rtx_equal_p (XEXP (op0, 0), op1))
 598             tem = op1;
 599           else
 600             tem = gen_rtx_LO_SUM (mode, op0, op1);
 601
 602           /* OP1 is likely not a legitimate address, otherwise there would have
 603              been no LO_SUM.  We want it to disappear if it is invalid, return
 604              false in that case.  */
 605           return memory_address_p (mode, tem);
 606         }
 607
 608       else if (code == REG)
 609         {
 610           if (rtx_equal_p (x, old_rtx))
 611             {
 612               *px = new_rtx;
 613               return can_appear;
 614             }
 615         }
 616       break;
 617
 618     default:
 619       break;
 620     }
 621
 622   /* No change, no trouble.  */
 623   if (tem == NULL_RTX)
 624     return true;
 625
 626   *px = tem;
 627
 628   /* The replacement we made so far is valid, if all of the recursive
 629      replacements were valid, or we could simplify everything to
 630      a constant.  */
 631   return valid_ops || can_appear || CONSTANT_P (tem);
 632 }
 633
 634
 635 /* for_each_rtx traversal function that returns 1 if BODY points to
 636    a non-constant mem.  */
 637
 638 static int
 639 varying_mem_p (rtx *body, void *data ATTRIBUTE_UNUSED)
 640 {
 641   rtx x = *body;
 642   return MEM_P (x) && !MEM_READONLY_P (x);
 643 }
 644
 645
 646 /* Replace all occurrences of OLD in X with NEW and try to simplify the
 647    resulting expression (in mode MODE).  Return a new expression if it is
 648    a constant, otherwise X.
 649
 650    Simplifications where occurrences of NEW collapse to a constant are always
 651    accepted.  All simplifications are accepted if NEW is a pseudo too.
 652    Otherwise, we accept simplifications that have a lower or equal cost.  */
 653
 654 static rtx
 655 propagate_rtx (rtx x, enum machine_mode mode, rtx old_rtx, rtx new_rtx,
 656                bool speed)
 657 {
 658   rtx tem;
 659   bool collapsed;
 660   int flags;
 661
 662   if (REG_P (new_rtx) && REGNO (new_rtx) < FIRST_PSEUDO_REGISTER)
 663     return NULL_RTX;
 664
 665   flags = 0;
 666   if (REG_P (new_rtx) || CONSTANT_P (new_rtx))
 667     flags |= PR_CAN_APPEAR;
 668   if (!for_each_rtx (&new_rtx, varying_mem_p, NULL))
 669     flags |= PR_HANDLE_MEM;
 670
 671   if (speed)
 672     flags |= PR_OPTIMIZE_FOR_SPEED;
 673
 674   tem = x;
 675   collapsed = propagate_rtx_1 (&tem, old_rtx, copy_rtx (new_rtx), flags);
 676   if (tem == x || !collapsed)
 677     return NULL_RTX;
 678
 679   /* gen_lowpart_common will not be able to process VOIDmode entities other
 680      than CONST_INTs.  */
 681   if (GET_MODE (tem) == VOIDmode && !CONST_INT_P (tem))
 682     return NULL_RTX;
 683
 684   if (GET_MODE (tem) == VOIDmode)
 685     tem = rtl_hooks.gen_lowpart_no_emit (mode, tem);
 686   else
 687     gcc_assert (GET_MODE (tem) == mode);
 688
 689   return tem;
 690 }
 691
 692
 693 \f
 694
 695 /* Return true if the register from reference REF is killed
 696    between FROM to (but not including) TO.  */
 697
 698 static bool
 699 local_ref_killed_between_p (df_ref ref, rtx from, rtx to)
 700 {
 701   rtx insn;
 702
 703   for (insn = from; insn != to; insn = NEXT_INSN (insn))
 704     {
 705       df_ref *def_rec;
 706       if (!INSN_P (insn))
 707         continue;
 708
 709       for (def_rec = DF_INSN_DEFS (insn); *def_rec; def_rec++)
 710         {
 711           df_ref def = *def_rec;
 712           if (DF_REF_REGNO (ref) == DF_REF_REGNO (def))
 713             return true;
 714         }
 715     }
 716   return false;
 717 }
 718
 719
 720 /* Check if the given DEF is available in INSN.  This would require full
 721    computation of available expressions; we check only restricted conditions:
 722    - if DEF is the sole definition of its register, go ahead;
 723    - in the same basic block, we check for no definitions killing the
 724      definition of DEF_INSN;
 725    - if USE's basic block has DEF's basic block as the sole predecessor,
 726      we check if the definition is killed after DEF_INSN or before
 727      TARGET_INSN insn, in their respective basic blocks.  */
 728 static bool
 729 use_killed_between (df_ref use, rtx def_insn, rtx target_insn)
 730 {
 731   basic_block def_bb = BLOCK_FOR_INSN (def_insn);
 732   basic_block target_bb = BLOCK_FOR_INSN (target_insn);
 733   int regno;
 734   df_ref def;
 735
 736   /* We used to have a def reaching a use that is _before_ the def,
 737      with the def not dominating the use even though the use and def
 738      are in the same basic block, when a register may be used
 739      uninitialized in a loop.  This should not happen anymore since
 740      we do not use reaching definitions, but still we test for such
 741      cases and assume that DEF is not available.  */
 742   if (def_bb == target_bb
 743       ? DF_INSN_LUID (def_insn) >= DF_INSN_LUID (target_insn)
 744       : !dominated_by_p (CDI_DOMINATORS, target_bb, def_bb))
 745     return true;
 746
 747   /* Check if the reg in USE has only one definition.  We already
 748      know that this definition reaches use, or we wouldn't be here.
 749      However, this is invalid for hard registers because if they are
 750      live at the beginning of the function it does not mean that we
 751      have an uninitialized access.  */
 752   regno = DF_REF_REGNO (use);
 753   def = DF_REG_DEF_CHAIN (regno);
 754   if (def
 755       && DF_REF_NEXT_REG (def) == NULL
 756       && regno >= FIRST_PSEUDO_REGISTER)
 757     return false;
 758
 759   /* Check locally if we are in the same basic block.  */
 760   if (def_bb == target_bb)
 761     return local_ref_killed_between_p (use, def_insn, target_insn);
 762
 763   /* Finally, if DEF_BB is the sole predecessor of TARGET_BB.  */
 764   if (single_pred_p (target_bb)
 765       && single_pred (target_bb) == def_bb)
 766     {
 767       df_ref x;
 768
 769       /* See if USE is killed between DEF_INSN and the last insn in the
 770          basic block containing DEF_INSN.  */
 771       x = df_bb_regno_last_def_find (def_bb, regno);
 772       if (x && DF_INSN_LUID (DF_REF_INSN (x)) >= DF_INSN_LUID (def_insn))
 773         return true;
 774
 775       /* See if USE is killed between TARGET_INSN and the first insn in the
 776          basic block containing TARGET_INSN.  */
 777       x = df_bb_regno_first_def_find (target_bb, regno);
 778       if (x && DF_INSN_LUID (DF_REF_INSN (x)) < DF_INSN_LUID (target_insn))
 779         return true;
 780
 781       return false;
 782     }
 783
 784   /* Otherwise assume the worst case.  */
 785   return true;
 786 }
 787
 788
 789 /* Check if all uses in DEF_INSN can be used in TARGET_INSN.  This
 790    would require full computation of available expressions;
 791    we check only restricted conditions, see use_killed_between.  */
 792 static bool
 793 all_uses_available_at (rtx def_insn, rtx target_insn)
 794 {
 795   df_ref *use_rec;
 796   struct df_insn_info *insn_info = DF_INSN_INFO_GET (def_insn);
 797   rtx def_set = single_set (def_insn);
 798
 799   gcc_assert (def_set);
 800
 801   /* If target_insn comes right after def_insn, which is very common
 802      for addresses, we can use a quicker test.  */
 803   if (NEXT_INSN (def_insn) == target_insn
 804       && REG_P (SET_DEST (def_set)))
 805     {
 806       rtx def_reg = SET_DEST (def_set);
 807
 808       /* If the insn uses the reg that it defines, the substitution is
 809          invalid.  */
 810       for (use_rec = DF_INSN_INFO_USES (insn_info); *use_rec; use_rec++)
 811         {
 812           df_ref use = *use_rec;
 813           if (rtx_equal_p (DF_REF_REG (use), def_reg))
 814             return false;
 815         }
 816       for (use_rec = DF_INSN_INFO_EQ_USES (insn_info); *use_rec; use_rec++)
 817         {
 818           df_ref use = *use_rec;
 819           if (rtx_equal_p (DF_REF_REG (use), def_reg))
 820             return false;
 821         }
 822     }
 823   else
 824     {
 825       rtx def_reg = REG_P (SET_DEST (def_set)) ? SET_DEST (def_set) : NULL_RTX;
 826
 827       /* Look at all the uses of DEF_INSN, and see if they are not
 828          killed between DEF_INSN and TARGET_INSN.  */
 829       for (use_rec = DF_INSN_INFO_USES (insn_info); *use_rec; use_rec++)
 830         {
 831           df_ref use = *use_rec;
 832           if (def_reg && rtx_equal_p (DF_REF_REG (use), def_reg))
 833             return false;
 834           if (use_killed_between (use, def_insn, target_insn))
 835             return false;
 836         }
 837       for (use_rec = DF_INSN_INFO_EQ_USES (insn_info); *use_rec; use_rec++)
 838         {
 839           df_ref use = *use_rec;
 840           if (def_reg && rtx_equal_p (DF_REF_REG (use), def_reg))
 841             return false;
 842           if (use_killed_between (use, def_insn, target_insn))
 843             return false;
 844         }
 845     }
 846
 847   return true;
 848 }
 849
 850 \f
 851 struct find_occurrence_data
 852 {
 853   rtx find;
 854   rtx *retval;
 855 };
 856
 857 /* Callback for for_each_rtx, used in find_occurrence.
 858    See if PX is the rtx we have to find.  Return 1 to stop for_each_rtx
 859    if successful, or 0 to continue traversing otherwise.  */
 860
 861 static int
 862 find_occurrence_callback (rtx *px, void *data)
 863 {
 864   struct find_occurrence_data *fod = (struct find_occurrence_data *) data;
 865   rtx x = *px;
 866   rtx find = fod->find;
 867
 868   if (x == find)
 869     {
 870       fod->retval = px;
 871       return 1;
 872     }
 873
 874   return 0;
 875 }
 876
 877 /* Return a pointer to one of the occurrences of register FIND in *PX.  */
 878
 879 static rtx *
 880 find_occurrence (rtx *px, rtx find)
 881 {
 882   struct find_occurrence_data data;
 883
 884   gcc_assert (REG_P (find)
 885               || (GET_CODE (find) == SUBREG
 886                   && REG_P (SUBREG_REG (find))));
 887
 888   data.find = find;
 889   data.retval = NULL;
 890   for_each_rtx (px, find_occurrence_callback, &data);
 891   return data.retval;
 892 }
 893
 894 \f
 895 /* Inside INSN, the expression rooted at *LOC has been changed, moving some
 896    uses from USE_VEC.  Find those that are present, and create new items
 897    in the data flow object of the pass.  Mark any new uses as having the
 898    given TYPE.  */
 899 static void
 900 update_df (rtx insn, rtx *loc, df_ref *use_rec, enum df_ref_type type,
 901            int new_flags)
 902 {
 903   bool changed = false;
 904
 905   /* Add a use for the registers that were propagated.  */
 906   while (*use_rec)
 907     {
 908       df_ref use = *use_rec;
 909       df_ref orig_use = use, new_use;
 910       int width = -1;
 911       int offset = -1;
 912       enum machine_mode mode = VOIDmode;
 913       rtx *new_loc = find_occurrence (loc, DF_REF_REG (orig_use));
 914       use_rec++;
 915
 916       if (!new_loc)
 917         continue;
 918
 919       if (DF_REF_FLAGS_IS_SET (orig_use, DF_REF_SIGN_EXTRACT | DF_REF_ZERO_EXTRACT))
 920         {
 921           width = DF_REF_EXTRACT_WIDTH (orig_use);
 922           offset = DF_REF_EXTRACT_OFFSET (orig_use);
 923           mode = DF_REF_EXTRACT_MODE (orig_use);
 924         }
 925
 926       /* Add a new insn use.  Use the original type, because it says if the
 927          use was within a MEM.  */
 928       new_use = df_ref_create (DF_REF_REG (orig_use), new_loc,
 929                                insn, BLOCK_FOR_INSN (insn),
 930                                type, DF_REF_FLAGS (orig_use) | new_flags,
 931                                width, offset, mode);
 932
 933       /* Set up the use-def chain.  */
 934       gcc_assert (DF_REF_ID (new_use) == (int) VEC_length (df_ref, use_def_ref));
 935       VEC_safe_push (df_ref, heap, use_def_ref, get_def_for_use (orig_use));
 936       changed = true;
 937     }
 938   if (changed)
 939     df_insn_rescan (insn);
 940 }
 941
 942
 943 /* Try substituting NEW into LOC, which originated from forward propagation
 944    of USE's value from DEF_INSN.  SET_REG_EQUAL says whether we are
 945    substituting the whole SET_SRC, so we can set a REG_EQUAL note if the
 946    new insn is not recognized.  Return whether the substitution was
 947    performed.  */
 948
 949 static bool
 950 try_fwprop_subst (df_ref use, rtx *loc, rtx new_rtx, rtx def_insn, bool set_reg_equal)
 951 {
 952   rtx insn = DF_REF_INSN (use);
 953   enum df_ref_type type = DF_REF_TYPE (use);
 954   int flags = DF_REF_FLAGS (use);
 955   rtx set = single_set (insn);
 956   bool speed = optimize_bb_for_speed_p (BLOCK_FOR_INSN (insn));
 957   int old_cost = 0;
 958   bool ok;
 959
 960   /* forward_propagate_subreg may be operating on an instruction with
 961      multiple sets.  If so, assume the cost of the new instruction is
 962      not greater than the old one.  */
 963   if (set)
 964     old_cost = rtx_cost (SET_SRC (set), SET, speed);
 965   if (dump_file)
 966     {
 967       fprintf (dump_file, "\nIn insn %d, replacing\n ", INSN_UID (insn));
 968       print_inline_rtx (dump_file, *loc, 2);
 969       fprintf (dump_file, "\n with ");
 970       print_inline_rtx (dump_file, new_rtx, 2);
 971       fprintf (dump_file, "\n");
 972     }
 973
 974   validate_unshare_change (insn, loc, new_rtx, true);
 975   if (!verify_changes (0))
 976     {
 977       if (dump_file)
 978         fprintf (dump_file, "Changes to insn %d not recognized\n",
 979                  INSN_UID (insn));
 980       ok = false;
 981     }
 982
 983   else if (DF_REF_TYPE (use) == DF_REF_REG_USE
 984            && set
 985            && rtx_cost (SET_SRC (set), SET, speed) > old_cost)
 986     {
 987       if (dump_file)
 988         fprintf (dump_file, "Changes to insn %d not profitable\n",
 989                  INSN_UID (insn));
 990       ok = false;
 991     }
 992
 993   else
 994     {
 995       if (dump_file)
 996         fprintf (dump_file, "Changed insn %d\n", INSN_UID (insn));
 997       ok = true;
 998     }
 999
1000   if (ok)
1001     {
1002       confirm_change_group ();
1003       num_changes++;
1004
1005       df_ref_remove (use);
1006       if (!CONSTANT_P (new_rtx))
1007         {
1008           struct df_insn_info *insn_info = DF_INSN_INFO_GET (def_insn);
1009           update_df (insn, loc, DF_INSN_INFO_USES (insn_info), type, flags);
1010           update_df (insn, loc, DF_INSN_INFO_EQ_USES (insn_info), type, flags);
1011         }
1012     }
1013   else
1014     {
1015       cancel_changes (0);
1016
1017       /* Can also record a simplified value in a REG_EQUAL note,
1018          making a new one if one does not already exist.  */
1019       if (set_reg_equal)
1020         {
1021           if (dump_file)
1022             fprintf (dump_file, " Setting REG_EQUAL note\n");
1023
1024           set_unique_reg_note (insn, REG_EQUAL, copy_rtx (new_rtx));
1025
1026           /* ??? Is this still necessary if we add the note through
1027              set_unique_reg_note?  */
1028           if (!CONSTANT_P (new_rtx))
1029             {
1030               struct df_insn_info *insn_info = DF_INSN_INFO_GET (def_insn);
1031               update_df (insn, loc, DF_INSN_INFO_USES (insn_info),
1032                          type, DF_REF_IN_NOTE);
1033               update_df (insn, loc, DF_INSN_INFO_EQ_USES (insn_info),
1034                          type, DF_REF_IN_NOTE);
1035             }
1036         }
1037     }
1038
1039   return ok;
1040 }
1041
1042 /* For the given single_set INSN, containing SRC known to be a
1043    ZERO_EXTEND or SIGN_EXTEND of a register, return true if INSN
1044    is redundant due to the register being set by a LOAD_EXTEND_OP
1045    load from memory.  */
1046
1047 static bool
1048 free_load_extend (rtx src, rtx insn)
1049 {
1050   rtx reg;
1051   df_ref *use_vec;
1052   df_ref use = 0, def;
1053
1054   reg = XEXP (src, 0);
1055 #ifdef LOAD_EXTEND_OP
1056   if (LOAD_EXTEND_OP (GET_MODE (reg)) != GET_CODE (src))
1057 #endif
1058     return false;
1059
1060   for (use_vec = DF_INSN_USES (insn); *use_vec; use_vec++)
1061     {
1062       use = *use_vec;
1063
1064       if (!DF_REF_IS_ARTIFICIAL (use)
1065           && DF_REF_TYPE (use) == DF_REF_REG_USE
1066           && DF_REF_REG (use) == reg)
1067         break;
1068     }
1069   if (!use)
1070     return false;
1071
1072   def = get_def_for_use (use);
1073   if (!def)
1074     return false;
1075
1076   if (DF_REF_IS_ARTIFICIAL (def))
1077     return false;
1078
1079   if (NONJUMP_INSN_P (DF_REF_INSN (def)))
1080     {
1081       rtx patt = PATTERN (DF_REF_INSN (def));
1082
1083       if (GET_CODE (patt) == SET
1084           && GET_CODE (SET_SRC (patt)) == MEM
1085           && rtx_equal_p (SET_DEST (patt), reg))
1086         return true;
1087     }
1088   return false;
1089 }
1090
1091 /* If USE is a subreg, see if it can be replaced by a pseudo.  */
1092
1093 static bool
1094 forward_propagate_subreg (df_ref use, rtx def_insn, rtx def_set)
1095 {
1096   rtx use_reg = DF_REF_REG (use);
1097   rtx use_insn, src;
1098
1099   /* Only consider subregs... */
1100   enum machine_mode use_mode = GET_MODE (use_reg);
1101   if (GET_CODE (use_reg) != SUBREG
1102       || !REG_P (SET_DEST (def_set)))
1103     return false;
1104
1105   /* If this is a paradoxical SUBREG...  */
1106   if (GET_MODE_SIZE (use_mode)
1107       > GET_MODE_SIZE (GET_MODE (SUBREG_REG (use_reg))))
1108     {
1109       /* If this is a paradoxical SUBREG, we have no idea what value the
1110          extra bits would have.  However, if the operand is equivalent to
1111          a SUBREG whose operand is the same as our mode, and all the modes
1112          are within a word, we can just use the inner operand because
1113          these SUBREGs just say how to treat the register.  */
1114       use_insn = DF_REF_INSN (use);
1115       src = SET_SRC (def_set);
1116       if (GET_CODE (src) == SUBREG
1117           && REG_P (SUBREG_REG (src))
1118           && GET_MODE (SUBREG_REG (src)) == use_mode
1119           && subreg_lowpart_p (src)
1120           && all_uses_available_at (def_insn, use_insn))
1121         return try_fwprop_subst (use, DF_REF_LOC (use), SUBREG_REG (src),
1122                                  def_insn, false);
1123     }
1124
1125   /* If this is a SUBREG of a ZERO_EXTEND or SIGN_EXTEND, and the SUBREG
1126      is the low part of the reg being extended then just use the inner
1127      operand.  Don't do this if the ZERO_EXTEND or SIGN_EXTEND insn will
1128      be removed due to it matching a LOAD_EXTEND_OP load from memory.  */
1129   else if (subreg_lowpart_p (use_reg))
1130     {
1131       use_insn = DF_REF_INSN (use);
1132       src = SET_SRC (def_set);
1133       if ((GET_CODE (src) == ZERO_EXTEND
1134            || GET_CODE (src) == SIGN_EXTEND)
1135           && REG_P (XEXP (src, 0))
1136           && GET_MODE (XEXP (src, 0)) == use_mode
1137           && !free_load_extend (src, def_insn)
1138           && all_uses_available_at (def_insn, use_insn))
1139         return try_fwprop_subst (use, DF_REF_LOC (use), XEXP (src, 0),
1140                                  def_insn, false);
1141     }
1142
1143   return false;
1144 }
1145
1146 /* Try to replace USE with SRC (defined in DEF_INSN) in __asm.  */
1147
1148 static bool
1149 forward_propagate_asm (df_ref use, rtx def_insn, rtx def_set, rtx reg)
1150 {
1151   rtx use_insn = DF_REF_INSN (use), src, use_pat, asm_operands, new_rtx, *loc;
1152   int speed_p, i;
1153   df_ref *use_vec;
1154
1155   gcc_assert ((DF_REF_FLAGS (use) & DF_REF_IN_NOTE) == 0);
1156
1157   src = SET_SRC (def_set);
1158   use_pat = PATTERN (use_insn);
1159
1160   /* In __asm don't replace if src might need more registers than
1161      reg, as that could increase register pressure on the __asm.  */
1162   use_vec = DF_INSN_USES (def_insn);
1163   if (use_vec[0] && use_vec[1])
1164     return false;
1165
1166   speed_p = optimize_bb_for_speed_p (BLOCK_FOR_INSN (use_insn));
1167   asm_operands = NULL_RTX;
1168   switch (GET_CODE (use_pat))
1169     {
1170     case ASM_OPERANDS:
1171       asm_operands = use_pat;
1172       break;
1173     case SET:
1174       if (MEM_P (SET_DEST (use_pat)))
1175         {
1176           loc = &SET_DEST (use_pat);
1177           new_rtx = propagate_rtx (*loc, GET_MODE (*loc), reg, src, speed_p);
1178           if (new_rtx)
1179             validate_unshare_change (use_insn, loc, new_rtx, true);
1180         }
1181       asm_operands = SET_SRC (use_pat);
1182       break;
1183     case PARALLEL:
1184       for (i = 0; i < XVECLEN (use_pat, 0); i++)
1185         if (GET_CODE (XVECEXP (use_pat, 0, i)) == SET)
1186           {
1187             if (MEM_P (SET_DEST (XVECEXP (use_pat, 0, i))))
1188               {
1189                 loc = &SET_DEST (XVECEXP (use_pat, 0, i));
1190                 new_rtx = propagate_rtx (*loc, GET_MODE (*loc), reg,
1191                                          src, speed_p);
1192                 if (new_rtx)
1193                   validate_unshare_change (use_insn, loc, new_rtx, true);
1194               }
1195             asm_operands = SET_SRC (XVECEXP (use_pat, 0, i));
1196           }
1197         else if (GET_CODE (XVECEXP (use_pat, 0, i)) == ASM_OPERANDS)
1198           asm_operands = XVECEXP (use_pat, 0, i);
1199       break;
1200     default:
1201       gcc_unreachable ();
1202     }
1203
1204   gcc_assert (asm_operands && GET_CODE (asm_operands) == ASM_OPERANDS);
1205   for (i = 0; i < ASM_OPERANDS_INPUT_LENGTH (asm_operands); i++)
1206     {
1207       loc = &ASM_OPERANDS_INPUT (asm_operands, i);
1208       new_rtx = propagate_rtx (*loc, GET_MODE (*loc), reg, src, speed_p);
1209       if (new_rtx)
1210         validate_unshare_change (use_insn, loc, new_rtx, true);
1211     }
1212
1213   if (num_changes_pending () == 0 || !apply_change_group ())
1214     return false;
1215
1216   num_changes++;
1217   return true;
1218 }
1219
1220 /* Try to replace USE with SRC (defined in DEF_INSN) and simplify the
1221    result.  */
1222
1223 static bool
1224 forward_propagate_and_simplify (df_ref use, rtx def_insn, rtx def_set)
1225 {
1226   rtx use_insn = DF_REF_INSN (use);
1227   rtx use_set = single_set (use_insn);
1228   rtx src, reg, new_rtx, *loc;
1229   bool set_reg_equal;
1230   enum machine_mode mode;
1231   int asm_use = -1;
1232
1233   if (INSN_CODE (use_insn) < 0)
1234     asm_use = asm_noperands (PATTERN (use_insn));
1235
1236   if (!use_set && asm_use < 0 && !DEBUG_INSN_P (use_insn))
1237     return false;
1238
1239   /* Do not propagate into PC, CC0, etc.  */
1240   if (use_set && GET_MODE (SET_DEST (use_set)) == VOIDmode)
1241     return false;
1242
1243   /* If def and use are subreg, check if they match.  */
1244   reg = DF_REF_REG (use);
1245   if (GET_CODE (reg) == SUBREG
1246       && GET_CODE (SET_DEST (def_set)) == SUBREG
1247       && (SUBREG_BYTE (SET_DEST (def_set)) != SUBREG_BYTE (reg)
1248           || GET_MODE (SET_DEST (def_set)) != GET_MODE (reg)))
1249     return false;
1250
1251   /* Check if the def had a subreg, but the use has the whole reg.  */
1252   if (REG_P (reg) && GET_CODE (SET_DEST (def_set)) == SUBREG)
1253     return false;
1254
1255   /* Check if the use has a subreg, but the def had the whole reg.  Unlike the
1256      previous case, the optimization is possible and often useful indeed.  */
1257   if (GET_CODE (reg) == SUBREG && REG_P (SET_DEST (def_set)))
1258     reg = SUBREG_REG (reg);
1259
1260   /* Check if the substitution is valid (last, because it's the most
1261      expensive check!).  */
1262   src = SET_SRC (def_set);
1263   if (!CONSTANT_P (src) && !all_uses_available_at (def_insn, use_insn))
1264     return false;
1265
1266   /* Check if the def is loading something from the constant pool; in this
1267      case we would undo optimization such as compress_float_constant.
1268      Still, we can set a REG_EQUAL note.  */
1269   if (MEM_P (src) && MEM_READONLY_P (src))
1270     {
1271       rtx x = avoid_constant_pool_reference (src);
1272       if (x != src && use_set)
1273         {
1274           rtx note = find_reg_note (use_insn, REG_EQUAL, NULL_RTX);
1275           rtx old_rtx = note ? XEXP (note, 0) : SET_SRC (use_set);
1276           rtx new_rtx = simplify_replace_rtx (old_rtx, src, x);
1277           if (old_rtx != new_rtx)
1278             set_unique_reg_note (use_insn, REG_EQUAL, copy_rtx (new_rtx));
1279         }
1280       return false;
1281     }
1282
1283   if (asm_use >= 0)
1284     return forward_propagate_asm (use, def_insn, def_set, reg);
1285
1286   /* Else try simplifying.  */
1287
1288   if (DF_REF_TYPE (use) == DF_REF_REG_MEM_STORE)
1289     {
1290       loc = &SET_DEST (use_set);
1291       set_reg_equal = false;
1292     }
1293   else if (!use_set)
1294     {
1295       loc = &INSN_VAR_LOCATION_LOC (use_insn);
1296       set_reg_equal = false;
1297     }
1298   else
1299     {
1300       rtx note = find_reg_note (use_insn, REG_EQUAL, NULL_RTX);
1301       if (DF_REF_FLAGS (use) & DF_REF_IN_NOTE)
1302         loc = &XEXP (note, 0);
1303       else
1304         loc = &SET_SRC (use_set);
1305
1306       /* Do not replace an existing REG_EQUAL note if the insn is not
1307          recognized.  Either we're already replacing in the note, or
1308          we'll separately try plugging the definition in the note and
1309          simplifying.  */
1310       set_reg_equal = (note == NULL_RTX);
1311     }
1312
1313   if (GET_MODE (*loc) == VOIDmode)
1314     mode = GET_MODE (SET_DEST (use_set));
1315   else
1316     mode = GET_MODE (*loc);
1317
1318   new_rtx = propagate_rtx (*loc, mode, reg, src,
1319                            optimize_bb_for_speed_p (BLOCK_FOR_INSN (use_insn)));
1320
1321   if (!new_rtx)
1322     return false;
1323
1324   return try_fwprop_subst (use, loc, new_rtx, def_insn, set_reg_equal);
1325 }
1326
1327
1328 /* Given a use USE of an insn, if it has a single reaching
1329    definition, try to forward propagate it into that insn.  */
1330
1331 static void
1332 forward_propagate_into (df_ref use)
1333 {
1334   df_ref def;
1335   rtx def_insn, def_set, use_insn;
1336   rtx parent;
1337
1338   if (DF_REF_FLAGS (use) & DF_REF_READ_WRITE)
1339     return;
1340   if (DF_REF_IS_ARTIFICIAL (use))
1341     return;
1342
1343   /* Only consider uses that have a single definition.  */
1344   def = get_def_for_use (use);
1345   if (!def)
1346     return;
1347   if (DF_REF_FLAGS (def) & DF_REF_READ_WRITE)
1348     return;
1349   if (DF_REF_IS_ARTIFICIAL (def))
1350     return;
1351
1352   /* Do not propagate loop invariant definitions inside the loop.  */
1353   if (DF_REF_BB (def)->loop_father != DF_REF_BB (use)->loop_father)
1354     return;
1355
1356   /* Check if the use is still present in the insn!  */
1357   use_insn = DF_REF_INSN (use);
1358   if (DF_REF_FLAGS (use) & DF_REF_IN_NOTE)
1359     parent = find_reg_note (use_insn, REG_EQUAL, NULL_RTX);
1360   else
1361     parent = PATTERN (use_insn);
1362
1363   if (!reg_mentioned_p (DF_REF_REG (use), parent))
1364     return;
1365
1366   def_insn = DF_REF_INSN (def);
1367   if (multiple_sets (def_insn))
1368     return;
1369   def_set = single_set (def_insn);
1370   if (!def_set)
1371     return;
1372
1373   /* Only try one kind of propagation.  If two are possible, we'll
1374      do it on the following iterations.  */
1375   if (!forward_propagate_and_simplify (use, def_insn, def_set))
1376     forward_propagate_subreg (use, def_insn, def_set);
1377 }
1378
1379 \f
1380 static void
1381 fwprop_init (void)
1382 {
1383   num_changes = 0;
1384   calculate_dominance_info (CDI_DOMINATORS);
1385
1386   /* We do not always want to propagate into loops, so we have to find
1387      loops and be careful about them.  But we have to call flow_loops_find
1388      before df_analyze, because flow_loops_find may introduce new jump
1389      insns (sadly) if we are not working in cfglayout mode.  */
1390   loop_optimizer_init (0);
1391
1392   build_single_def_use_links ();
1393   df_set_flags (DF_DEFER_INSN_RESCAN);
1394 }
1395
1396 static void
1397 fwprop_done (void)
1398 {
1399   loop_optimizer_finalize ();
1400
1401   VEC_free (df_ref, heap, use_def_ref);
1402   free_dominance_info (CDI_DOMINATORS);
1403   cleanup_cfg (0);
1404   delete_trivially_dead_insns (get_insns (), max_reg_num ());
1405
1406   if (dump_file)
1407     fprintf (dump_file,
1408              "\nNumber of successful forward propagations: %d\n\n",
1409              num_changes);
1410 }
1411
1412
1413 /* Main entry point.  */
1414
1415 static bool
1416 gate_fwprop (void)
1417 {
1418   return optimize > 0 && flag_forward_propagate;
1419 }
1420
1421 static unsigned int
1422 fwprop (void)
1423 {
1424   unsigned i;
1425
1426   fwprop_init ();
1427
1428   /* Go through all the uses.  update_df will create new ones at the
1429      end, and we'll go through them as well.
1430
1431      Do not forward propagate addresses into loops until after unrolling.
1432      CSE did so because it was able to fix its own mess, but we are not.  */
1433
1434   for (i = 0; i < DF_USES_TABLE_SIZE (); i++)
1435     {
1436       df_ref use = DF_USES_GET (i);
1437       if (use)
1438         if (DF_REF_TYPE (use) == DF_REF_REG_USE
1439             || DF_REF_BB (use)->loop_father == NULL
1440             /* The outer most loop is not really a loop.  */
1441             || loop_outer (DF_REF_BB (use)->loop_father) == NULL)
1442           forward_propagate_into (use);
1443     }
1444
1445   fwprop_done ();
1446   return 0;
1447 }
1448
1449 struct rtl_opt_pass pass_rtl_fwprop =
1450 {
1451  {
1452   RTL_PASS,
1453   "fwprop1",                            /* name */
1454   gate_fwprop,                          /* gate */
1455   fwprop,                               /* execute */
1456   NULL,                                 /* sub */
1457   NULL,                                 /* next */
1458   0,                                    /* static_pass_number */
1459   TV_FWPROP,                            /* tv_id */
1460   0,                                    /* properties_required */
1461   0,                                    /* properties_provided */
1462   0,                                    /* properties_destroyed */
1463   0,                                    /* todo_flags_start */
1464   TODO_df_finish | TODO_verify_rtl_sharing |
1465   TODO_dump_func                        /* todo_flags_finish */
1466  }
1467 };
1468
1469 static unsigned int
1470 fwprop_addr (void)
1471 {
1472   unsigned i;
1473   fwprop_init ();
1474
1475   /* Go through all the uses.  update_df will create new ones at the
1476      end, and we'll go through them as well.  */
1477   for (i = 0; i < DF_USES_TABLE_SIZE (); i++)
1478     {
1479       df_ref use = DF_USES_GET (i);
1480       if (use)
1481         if (DF_REF_TYPE (use) != DF_REF_REG_USE
1482             && DF_REF_BB (use)->loop_father != NULL
1483             /* The outer most loop is not really a loop.  */
1484             && loop_outer (DF_REF_BB (use)->loop_father) != NULL)
1485           forward_propagate_into (use);
1486     }
1487
1488   fwprop_done ();
1489
1490   return 0;
1491 }
1492
1493 struct rtl_opt_pass pass_rtl_fwprop_addr =
1494 {
1495  {
1496   RTL_PASS,
1497   "fwprop2",                            /* name */
1498   gate_fwprop,                          /* gate */
1499   fwprop_addr,                          /* execute */
1500   NULL,                                 /* sub */
1501   NULL,                                 /* next */
1502   0,                                    /* static_pass_number */
1503   TV_FWPROP,                            /* tv_id */
1504   0,                                    /* properties_required */
1505   0,                                    /* properties_provided */
1506   0,                                    /* properties_destroyed */
1507   0,                                    /* todo_flags_start */
1508   TODO_df_finish | TODO_verify_rtl_sharing |
1509   TODO_dump_func                        /* todo_flags_finish */
1510  }
1511 };