gcc/tree-ssa-math-opts.c

   1 /* Global, SSA-based optimizations using mathematical identities.
   2    Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
   3    Free Software Foundation, Inc.
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it
   8 under the terms of the GNU General Public License as published by the
   9 Free Software Foundation; either version 3, or (at your option) any
  10 later version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT
  13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 /* Currently, the only mini-pass in this file tries to CSE reciprocal
  22    operations.  These are common in sequences such as this one:
  23
  24         modulus = sqrt(x*x + y*y + z*z);
  25         x = x / modulus;
  26         y = y / modulus;
  27         z = z / modulus;
  28
  29    that can be optimized to
  30
  31         modulus = sqrt(x*x + y*y + z*z);
  32         rmodulus = 1.0 / modulus;
  33         x = x * rmodulus;
  34         y = y * rmodulus;
  35         z = z * rmodulus;
  36
  37    We do this for loop invariant divisors, and with this pass whenever
  38    we notice that a division has the same divisor multiple times.
  39
  40    Of course, like in PRE, we don't insert a division if a dominator
  41    already has one.  However, this cannot be done as an extension of
  42    PRE for several reasons.
  43
  44    First of all, with some experiments it was found out that the
  45    transformation is not always useful if there are only two divisions
  46    hy the same divisor.  This is probably because modern processors
  47    can pipeline the divisions; on older, in-order processors it should
  48    still be effective to optimize two divisions by the same number.
  49    We make this a param, and it shall be called N in the remainder of
  50    this comment.
  51
  52    Second, if trapping math is active, we have less freedom on where
  53    to insert divisions: we can only do so in basic blocks that already
  54    contain one.  (If divisions don't trap, instead, we can insert
  55    divisions elsewhere, which will be in blocks that are common dominators
  56    of those that have the division).
  57
  58    We really don't want to compute the reciprocal unless a division will
  59    be found.  To do this, we won't insert the division in a basic block
  60    that has less than N divisions *post-dominating* it.
  61
  62    The algorithm constructs a subset of the dominator tree, holding the
  63    blocks containing the divisions and the common dominators to them,
  64    and walk it twice.  The first walk is in post-order, and it annotates
  65    each block with the number of divisions that post-dominate it: this
  66    gives information on where divisions can be inserted profitably.
  67    The second walk is in pre-order, and it inserts divisions as explained
  68    above, and replaces divisions by multiplications.
  69
  70    In the best case, the cost of the pass is O(n_statements).  In the
  71    worst-case, the cost is due to creating the dominator tree subset,
  72    with a cost of O(n_basic_blocks ^ 2); however this can only happen
  73    for n_statements / n_basic_blocks statements.  So, the amortized cost
  74    of creating the dominator tree subset is O(n_basic_blocks) and the
  75    worst-case cost of the pass is O(n_statements * n_basic_blocks).
  76
  77    More practically, the cost will be small because there are few
  78    divisions, and they tend to be in the same basic block, so insert_bb
  79    is called very few times.
  80
  81    If we did this using domwalk.c, an efficient implementation would have
  82    to work on all the variables in a single pass, because we could not
  83    work on just a subset of the dominator tree, as we do now, and the
  84    cost would also be something like O(n_statements * n_basic_blocks).
  85    The data structures would be more complex in order to work on all the
  86    variables in a single pass.  */
  87
  88 #include "config.h"
  89 #include "system.h"
  90 #include "coretypes.h"
  91 #include "tm.h"
  92 #include "flags.h"
  93 #include "tree.h"
  94 #include "tree-flow.h"
  95 #include "tree-pass.h"
  96 #include "alloc-pool.h"
  97 #include "basic-block.h"
  98 #include "target.h"
  99 #include "gimple-pretty-print.h"
 100
 101 /* FIXME: RTL headers have to be included here for optabs.  */
 102 #include "rtl.h"                /* Because optabs.h wants enum rtx_code.  */
 103 #include "expr.h"               /* Because optabs.h wants sepops.  */
 104 #include "optabs.h"
 105
 106 /* This structure represents one basic block that either computes a
 107    division, or is a common dominator for basic block that compute a
 108    division.  */
 109 struct occurrence {
 110   /* The basic block represented by this structure.  */
 111   basic_block bb;
 112
 113   /* If non-NULL, the SSA_NAME holding the definition for a reciprocal
 114      inserted in BB.  */
 115   tree recip_def;
 116
 117   /* If non-NULL, the GIMPLE_ASSIGN for a reciprocal computation that
 118      was inserted in BB.  */
 119   gimple recip_def_stmt;
 120
 121   /* Pointer to a list of "struct occurrence"s for blocks dominated
 122      by BB.  */
 123   struct occurrence *children;
 124
 125   /* Pointer to the next "struct occurrence"s in the list of blocks
 126      sharing a common dominator.  */
 127   struct occurrence *next;
 128
 129   /* The number of divisions that are in BB before compute_merit.  The
 130      number of divisions that are in BB or post-dominate it after
 131      compute_merit.  */
 132   int num_divisions;
 133
 134   /* True if the basic block has a division, false if it is a common
 135      dominator for basic blocks that do.  If it is false and trapping
 136      math is active, BB is not a candidate for inserting a reciprocal.  */
 137   bool bb_has_division;
 138 };
 139
 140 static struct
 141 {
 142   /* Number of 1.0/X ops inserted.  */
 143   int rdivs_inserted;
 144
 145   /* Number of 1.0/FUNC ops inserted.  */
 146   int rfuncs_inserted;
 147 } reciprocal_stats;
 148
 149 static struct
 150 {
 151   /* Number of cexpi calls inserted.  */
 152   int inserted;
 153 } sincos_stats;
 154
 155 static struct
 156 {
 157   /* Number of hand-written 16-bit bswaps found.  */
 158   int found_16bit;
 159
 160   /* Number of hand-written 32-bit bswaps found.  */
 161   int found_32bit;
 162
 163   /* Number of hand-written 64-bit bswaps found.  */
 164   int found_64bit;
 165 } bswap_stats;
 166
 167 static struct
 168 {
 169   /* Number of widening multiplication ops inserted.  */
 170   int widen_mults_inserted;
 171
 172   /* Number of integer multiply-and-accumulate ops inserted.  */
 173   int maccs_inserted;
 174
 175   /* Number of fp fused multiply-add ops inserted.  */
 176   int fmas_inserted;
 177 } widen_mul_stats;
 178
 179 /* The instance of "struct occurrence" representing the highest
 180    interesting block in the dominator tree.  */
 181 static struct occurrence *occ_head;
 182
 183 /* Allocation pool for getting instances of "struct occurrence".  */
 184 static alloc_pool occ_pool;
 185
 186
 187
 188 /* Allocate and return a new struct occurrence for basic block BB, and
 189    whose children list is headed by CHILDREN.  */
 190 static struct occurrence *
 191 occ_new (basic_block bb, struct occurrence *children)
 192 {
 193   struct occurrence *occ;
 194
 195   bb->aux = occ = (struct occurrence *) pool_alloc (occ_pool);
 196   memset (occ, 0, sizeof (struct occurrence));
 197
 198   occ->bb = bb;
 199   occ->children = children;
 200   return occ;
 201 }
 202
 203
 204 /* Insert NEW_OCC into our subset of the dominator tree.  P_HEAD points to a
 205    list of "struct occurrence"s, one per basic block, having IDOM as
 206    their common dominator.
 207
 208    We try to insert NEW_OCC as deep as possible in the tree, and we also
 209    insert any other block that is a common dominator for BB and one
 210    block already in the tree.  */
 211
 212 static void
 213 insert_bb (struct occurrence *new_occ, basic_block idom,
 214            struct occurrence **p_head)
 215 {
 216   struct occurrence *occ, **p_occ;
 217
 218   for (p_occ = p_head; (occ = *p_occ) != NULL; )
 219     {
 220       basic_block bb = new_occ->bb, occ_bb = occ->bb;
 221       basic_block dom = nearest_common_dominator (CDI_DOMINATORS, occ_bb, bb);
 222       if (dom == bb)
 223         {
 224           /* BB dominates OCC_BB.  OCC becomes NEW_OCC's child: remove OCC
 225              from its list.  */
 226           *p_occ = occ->next;
 227           occ->next = new_occ->children;
 228           new_occ->children = occ;
 229
 230           /* Try the next block (it may as well be dominated by BB).  */
 231         }
 232
 233       else if (dom == occ_bb)
 234         {
 235           /* OCC_BB dominates BB.  Tail recurse to look deeper.  */
 236           insert_bb (new_occ, dom, &occ->children);
 237           return;
 238         }
 239
 240       else if (dom != idom)
 241         {
 242           gcc_assert (!dom->aux);
 243
 244           /* There is a dominator between IDOM and BB, add it and make
 245              two children out of NEW_OCC and OCC.  First, remove OCC from
 246              its list.  */
 247           *p_occ = occ->next;
 248           new_occ->next = occ;
 249           occ->next = NULL;
 250
 251           /* None of the previous blocks has DOM as a dominator: if we tail
 252              recursed, we would reexamine them uselessly. Just switch BB with
 253              DOM, and go on looking for blocks dominated by DOM.  */
 254           new_occ = occ_new (dom, new_occ);
 255         }
 256
 257       else
 258         {
 259           /* Nothing special, go on with the next element.  */
 260           p_occ = &occ->next;
 261         }
 262     }
 263
 264   /* No place was found as a child of IDOM.  Make BB a sibling of IDOM.  */
 265   new_occ->next = *p_head;
 266   *p_head = new_occ;
 267 }
 268
 269 /* Register that we found a division in BB.  */
 270
 271 static inline void
 272 register_division_in (basic_block bb)
 273 {
 274   struct occurrence *occ;
 275
 276   occ = (struct occurrence *) bb->aux;
 277   if (!occ)
 278     {
 279       occ = occ_new (bb, NULL);
 280       insert_bb (occ, ENTRY_BLOCK_PTR, &occ_head);
 281     }
 282
 283   occ->bb_has_division = true;
 284   occ->num_divisions++;
 285 }
 286
 287
 288 /* Compute the number of divisions that postdominate each block in OCC and
 289    its children.  */
 290
 291 static void
 292 compute_merit (struct occurrence *occ)
 293 {
 294   struct occurrence *occ_child;
 295   basic_block dom = occ->bb;
 296
 297   for (occ_child = occ->children; occ_child; occ_child = occ_child->next)
 298     {
 299       basic_block bb;
 300       if (occ_child->children)
 301         compute_merit (occ_child);
 302
 303       if (flag_exceptions)
 304         bb = single_noncomplex_succ (dom);
 305       else
 306         bb = dom;
 307
 308       if (dominated_by_p (CDI_POST_DOMINATORS, bb, occ_child->bb))
 309         occ->num_divisions += occ_child->num_divisions;
 310     }
 311 }
 312
 313
 314 /* Return whether USE_STMT is a floating-point division by DEF.  */
 315 static inline bool
 316 is_division_by (gimple use_stmt, tree def)
 317 {
 318   return is_gimple_assign (use_stmt)
 319          && gimple_assign_rhs_code (use_stmt) == RDIV_EXPR
 320          && gimple_assign_rhs2 (use_stmt) == def
 321          /* Do not recognize x / x as valid division, as we are getting
 322             confused later by replacing all immediate uses x in such
 323             a stmt.  */
 324          && gimple_assign_rhs1 (use_stmt) != def;
 325 }
 326
 327 /* Walk the subset of the dominator tree rooted at OCC, setting the
 328    RECIP_DEF field to a definition of 1.0 / DEF that can be used in
 329    the given basic block.  The field may be left NULL, of course,
 330    if it is not possible or profitable to do the optimization.
 331
 332    DEF_BSI is an iterator pointing at the statement defining DEF.
 333    If RECIP_DEF is set, a dominator already has a computation that can
 334    be used.  */
 335
 336 static void
 337 insert_reciprocals (gimple_stmt_iterator *def_gsi, struct occurrence *occ,
 338                     tree def, tree recip_def, int threshold)
 339 {
 340   tree type;
 341   gimple new_stmt;
 342   gimple_stmt_iterator gsi;
 343   struct occurrence *occ_child;
 344
 345   if (!recip_def
 346       && (occ->bb_has_division || !flag_trapping_math)
 347       && occ->num_divisions >= threshold)
 348     {
 349       /* Make a variable with the replacement and substitute it.  */
 350       type = TREE_TYPE (def);
 351       recip_def = create_tmp_reg (type, "reciptmp");
 352       new_stmt = gimple_build_assign_with_ops (RDIV_EXPR, recip_def,
 353                                                build_one_cst (type), def);
 354
 355       if (occ->bb_has_division)
 356         {
 357           /* Case 1: insert before an existing division.  */
 358           gsi = gsi_after_labels (occ->bb);
 359           while (!gsi_end_p (gsi) && !is_division_by (gsi_stmt (gsi), def))
 360             gsi_next (&gsi);
 361
 362           gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
 363         }
 364       else if (def_gsi && occ->bb == def_gsi->bb)
 365         {
 366           /* Case 2: insert right after the definition.  Note that this will
 367              never happen if the definition statement can throw, because in
 368              that case the sole successor of the statement's basic block will
 369              dominate all the uses as well.  */
 370           gsi_insert_after (def_gsi, new_stmt, GSI_NEW_STMT);
 371         }
 372       else
 373         {
 374           /* Case 3: insert in a basic block not containing defs/uses.  */
 375           gsi = gsi_after_labels (occ->bb);
 376           gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
 377         }
 378
 379       reciprocal_stats.rdivs_inserted++;
 380
 381       occ->recip_def_stmt = new_stmt;
 382     }
 383
 384   occ->recip_def = recip_def;
 385   for (occ_child = occ->children; occ_child; occ_child = occ_child->next)
 386     insert_reciprocals (def_gsi, occ_child, def, recip_def, threshold);
 387 }
 388
 389
 390 /* Replace the division at USE_P with a multiplication by the reciprocal, if
 391    possible.  */
 392
 393 static inline void
 394 replace_reciprocal (use_operand_p use_p)
 395 {
 396   gimple use_stmt = USE_STMT (use_p);
 397   basic_block bb = gimple_bb (use_stmt);
 398   struct occurrence *occ = (struct occurrence *) bb->aux;
 399
 400   if (optimize_bb_for_speed_p (bb)
 401       && occ->recip_def && use_stmt != occ->recip_def_stmt)
 402     {
 403       gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
 404       gimple_assign_set_rhs_code (use_stmt, MULT_EXPR);
 405       SET_USE (use_p, occ->recip_def);
 406       fold_stmt_inplace (&gsi);
 407       update_stmt (use_stmt);
 408     }
 409 }
 410
 411
 412 /* Free OCC and return one more "struct occurrence" to be freed.  */
 413
 414 static struct occurrence *
 415 free_bb (struct occurrence *occ)
 416 {
 417   struct occurrence *child, *next;
 418
 419   /* First get the two pointers hanging off OCC.  */
 420   next = occ->next;
 421   child = occ->children;
 422   occ->bb->aux = NULL;
 423   pool_free (occ_pool, occ);
 424
 425   /* Now ensure that we don't recurse unless it is necessary.  */
 426   if (!child)
 427     return next;
 428   else
 429     {
 430       while (next)
 431         next = free_bb (next);
 432
 433       return child;
 434     }
 435 }
 436
 437
 438 /* Look for floating-point divisions among DEF's uses, and try to
 439    replace them by multiplications with the reciprocal.  Add
 440    as many statements computing the reciprocal as needed.
 441
 442    DEF must be a GIMPLE register of a floating-point type.  */
 443
 444 static void
 445 execute_cse_reciprocals_1 (gimple_stmt_iterator *def_gsi, tree def)
 446 {
 447   use_operand_p use_p;
 448   imm_use_iterator use_iter;
 449   struct occurrence *occ;
 450   int count = 0, threshold;
 451
 452   gcc_assert (FLOAT_TYPE_P (TREE_TYPE (def)) && is_gimple_reg (def));
 453
 454   FOR_EACH_IMM_USE_FAST (use_p, use_iter, def)
 455     {
 456       gimple use_stmt = USE_STMT (use_p);
 457       if (is_division_by (use_stmt, def))
 458         {
 459           register_division_in (gimple_bb (use_stmt));
 460           count++;
 461         }
 462     }
 463
 464   /* Do the expensive part only if we can hope to optimize something.  */
 465   threshold = targetm.min_divisions_for_recip_mul (TYPE_MODE (TREE_TYPE (def)));
 466   if (count >= threshold)
 467     {
 468       gimple use_stmt;
 469       for (occ = occ_head; occ; occ = occ->next)
 470         {
 471           compute_merit (occ);
 472           insert_reciprocals (def_gsi, occ, def, NULL, threshold);
 473         }
 474
 475       FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, def)
 476         {
 477           if (is_division_by (use_stmt, def))
 478             {
 479               FOR_EACH_IMM_USE_ON_STMT (use_p, use_iter)
 480                 replace_reciprocal (use_p);
 481             }
 482         }
 483     }
 484
 485   for (occ = occ_head; occ; )
 486     occ = free_bb (occ);
 487
 488   occ_head = NULL;
 489 }
 490
 491 static bool
 492 gate_cse_reciprocals (void)
 493 {
 494   return optimize && flag_reciprocal_math;
 495 }
 496
 497 /* Go through all the floating-point SSA_NAMEs, and call
 498    execute_cse_reciprocals_1 on each of them.  */
 499 static unsigned int
 500 execute_cse_reciprocals (void)
 501 {
 502   basic_block bb;
 503   tree arg;
 504
 505   occ_pool = create_alloc_pool ("dominators for recip",
 506                                 sizeof (struct occurrence),
 507                                 n_basic_blocks / 3 + 1);
 508
 509   memset (&reciprocal_stats, 0, sizeof (reciprocal_stats));
 510   calculate_dominance_info (CDI_DOMINATORS);
 511   calculate_dominance_info (CDI_POST_DOMINATORS);
 512
 513 #ifdef ENABLE_CHECKING
 514   FOR_EACH_BB (bb)
 515     gcc_assert (!bb->aux);
 516 #endif
 517
 518   for (arg = DECL_ARGUMENTS (cfun->decl); arg; arg = DECL_CHAIN (arg))
 519     if (FLOAT_TYPE_P (TREE_TYPE (arg))
 520         && is_gimple_reg (arg))
 521       {
 522         tree name = ssa_default_def (cfun, arg);
 523         if (name)
 524           execute_cse_reciprocals_1 (NULL, name);
 525       }
 526
 527   FOR_EACH_BB (bb)
 528     {
 529       gimple_stmt_iterator gsi;
 530       gimple phi;
 531       tree def;
 532
 533       for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
 534         {
 535           phi = gsi_stmt (gsi);
 536           def = PHI_RESULT (phi);
 537           if (! virtual_operand_p (def)
 538               && FLOAT_TYPE_P (TREE_TYPE (def)))
 539             execute_cse_reciprocals_1 (NULL, def);
 540         }
 541
 542       for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
 543         {
 544           gimple stmt = gsi_stmt (gsi);
 545
 546           if (gimple_has_lhs (stmt)
 547               && (def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF)) != NULL
 548               && FLOAT_TYPE_P (TREE_TYPE (def))
 549               && TREE_CODE (def) == SSA_NAME)
 550             execute_cse_reciprocals_1 (&gsi, def);
 551         }
 552
 553       if (optimize_bb_for_size_p (bb))
 554         continue;
 555
 556       /* Scan for a/func(b) and convert it to reciprocal a*rfunc(b).  */
 557       for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
 558         {
 559           gimple stmt = gsi_stmt (gsi);
 560           tree fndecl;
 561
 562           if (is_gimple_assign (stmt)
 563               && gimple_assign_rhs_code (stmt) == RDIV_EXPR)
 564             {
 565               tree arg1 = gimple_assign_rhs2 (stmt);
 566               gimple stmt1;
 567
 568               if (TREE_CODE (arg1) != SSA_NAME)
 569                 continue;
 570
 571               stmt1 = SSA_NAME_DEF_STMT (arg1);
 572
 573               if (is_gimple_call (stmt1)
 574                   && gimple_call_lhs (stmt1)
 575                   && (fndecl = gimple_call_fndecl (stmt1))
 576                   && (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL
 577                       || DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD))
 578                 {
 579                   enum built_in_function code;
 580                   bool md_code, fail;
 581                   imm_use_iterator ui;
 582                   use_operand_p use_p;
 583
 584                   code = DECL_FUNCTION_CODE (fndecl);
 585                   md_code = DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD;
 586
 587                   fndecl = targetm.builtin_reciprocal (code, md_code, false);
 588                   if (!fndecl)
 589                     continue;
 590
 591                   /* Check that all uses of the SSA name are divisions,
 592                      otherwise replacing the defining statement will do
 593                      the wrong thing.  */
 594                   fail = false;
 595                   FOR_EACH_IMM_USE_FAST (use_p, ui, arg1)
 596                     {
 597                       gimple stmt2 = USE_STMT (use_p);
 598                       if (is_gimple_debug (stmt2))
 599                         continue;
 600                       if (!is_gimple_assign (stmt2)
 601                           || gimple_assign_rhs_code (stmt2) != RDIV_EXPR
 602                           || gimple_assign_rhs1 (stmt2) == arg1
 603                           || gimple_assign_rhs2 (stmt2) != arg1)
 604                         {
 605                           fail = true;
 606                           break;
 607                         }
 608                     }
 609                   if (fail)
 610                     continue;
 611
 612                   gimple_replace_lhs (stmt1, arg1);
 613                   gimple_call_set_fndecl (stmt1, fndecl);
 614                   update_stmt (stmt1);
 615                   reciprocal_stats.rfuncs_inserted++;
 616
 617                   FOR_EACH_IMM_USE_STMT (stmt, ui, arg1)
 618                     {
 619                       gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
 620                       gimple_assign_set_rhs_code (stmt, MULT_EXPR);
 621                       fold_stmt_inplace (&gsi);
 622                       update_stmt (stmt);
 623                     }
 624                 }
 625             }
 626         }
 627     }
 628
 629   statistics_counter_event (cfun, "reciprocal divs inserted",
 630                             reciprocal_stats.rdivs_inserted);
 631   statistics_counter_event (cfun, "reciprocal functions inserted",
 632                             reciprocal_stats.rfuncs_inserted);
 633
 634   free_dominance_info (CDI_DOMINATORS);
 635   free_dominance_info (CDI_POST_DOMINATORS);
 636   free_alloc_pool (occ_pool);
 637   return 0;
 638 }
 639
 640 struct gimple_opt_pass pass_cse_reciprocals =
 641 {
 642  {
 643   GIMPLE_PASS,
 644   "recip",                              /* name */
 645   OPTGROUP_NONE,                        /* optinfo_flags */
 646   gate_cse_reciprocals,                 /* gate */
 647   execute_cse_reciprocals,              /* execute */
 648   NULL,                                 /* sub */
 649   NULL,                                 /* next */
 650   0,                                    /* static_pass_number */
 651   TV_NONE,                              /* tv_id */
 652   PROP_ssa,                             /* properties_required */
 653   0,                                    /* properties_provided */
 654   0,                                    /* properties_destroyed */
 655   0,                                    /* todo_flags_start */
 656   TODO_update_ssa | TODO_verify_ssa
 657     | TODO_verify_stmts                /* todo_flags_finish */
 658  }
 659 };
 660
 661 /* Records an occurrence at statement USE_STMT in the vector of trees
 662    STMTS if it is dominated by *TOP_BB or dominates it or this basic block
 663    is not yet initialized.  Returns true if the occurrence was pushed on
 664    the vector.  Adjusts *TOP_BB to be the basic block dominating all
 665    statements in the vector.  */
 666
 667 static bool
 668 maybe_record_sincos (VEC(gimple, heap) **stmts,
 669                      basic_block *top_bb, gimple use_stmt)
 670 {
 671   basic_block use_bb = gimple_bb (use_stmt);
 672   if (*top_bb
 673       && (*top_bb == use_bb
 674           || dominated_by_p (CDI_DOMINATORS, use_bb, *top_bb)))
 675     VEC_safe_push (gimple, heap, *stmts, use_stmt);
 676   else if (!*top_bb
 677            || dominated_by_p (CDI_DOMINATORS, *top_bb, use_bb))
 678     {
 679       VEC_safe_push (gimple, heap, *stmts, use_stmt);
 680       *top_bb = use_bb;
 681     }
 682   else
 683     return false;
 684
 685   return true;
 686 }
 687
 688 /* Look for sin, cos and cexpi calls with the same argument NAME and
 689    create a single call to cexpi CSEing the result in this case.
 690    We first walk over all immediate uses of the argument collecting
 691    statements that we can CSE in a vector and in a second pass replace
 692    the statement rhs with a REALPART or IMAGPART expression on the
 693    result of the cexpi call we insert before the use statement that
 694    dominates all other candidates.  */
 695
 696 static bool
 697 execute_cse_sincos_1 (tree name)
 698 {
 699   gimple_stmt_iterator gsi;
 700   imm_use_iterator use_iter;
 701   tree fndecl, res, type;
 702   gimple def_stmt, use_stmt, stmt;
 703   int seen_cos = 0, seen_sin = 0, seen_cexpi = 0;
 704   VEC(gimple, heap) *stmts = NULL;
 705   basic_block top_bb = NULL;
 706   int i;
 707   bool cfg_changed = false;
 708
 709   type = TREE_TYPE (name);
 710   FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, name)
 711     {
 712       if (gimple_code (use_stmt) != GIMPLE_CALL
 713           || !gimple_call_lhs (use_stmt)
 714           || !(fndecl = gimple_call_fndecl (use_stmt))
 715           || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
 716         continue;
 717
 718       switch (DECL_FUNCTION_CODE (fndecl))
 719         {
 720         CASE_FLT_FN (BUILT_IN_COS):
 721           seen_cos |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
 722           break;
 723
 724         CASE_FLT_FN (BUILT_IN_SIN):
 725           seen_sin |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
 726           break;
 727
 728         CASE_FLT_FN (BUILT_IN_CEXPI):
 729           seen_cexpi |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
 730           break;
 731
 732         default:;
 733         }
 734     }
 735
 736   if (seen_cos + seen_sin + seen_cexpi <= 1)
 737     {
 738       VEC_free(gimple, heap, stmts);
 739       return false;
 740     }
 741
 742   /* Simply insert cexpi at the beginning of top_bb but not earlier than
 743      the name def statement.  */
 744   fndecl = mathfn_built_in (type, BUILT_IN_CEXPI);
 745   if (!fndecl)
 746     return false;
 747   stmt = gimple_build_call (fndecl, 1, name);
 748   res = make_temp_ssa_name (TREE_TYPE (TREE_TYPE (fndecl)), stmt, "sincostmp");
 749   gimple_call_set_lhs (stmt, res);
 750
 751   def_stmt = SSA_NAME_DEF_STMT (name);
 752   if (!SSA_NAME_IS_DEFAULT_DEF (name)
 753       && gimple_code (def_stmt) != GIMPLE_PHI
 754       && gimple_bb (def_stmt) == top_bb)
 755     {
 756       gsi = gsi_for_stmt (def_stmt);
 757       gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
 758     }
 759   else
 760     {
 761       gsi = gsi_after_labels (top_bb);
 762       gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
 763     }
 764   sincos_stats.inserted++;
 765
 766   /* And adjust the recorded old call sites.  */
 767   for (i = 0; VEC_iterate(gimple, stmts, i, use_stmt); ++i)
 768     {
 769       tree rhs = NULL;
 770       fndecl = gimple_call_fndecl (use_stmt);
 771
 772       switch (DECL_FUNCTION_CODE (fndecl))
 773         {
 774         CASE_FLT_FN (BUILT_IN_COS):
 775           rhs = fold_build1 (REALPART_EXPR, type, res);
 776           break;
 777
 778         CASE_FLT_FN (BUILT_IN_SIN):
 779           rhs = fold_build1 (IMAGPART_EXPR, type, res);
 780           break;
 781
 782         CASE_FLT_FN (BUILT_IN_CEXPI):
 783           rhs = res;
 784           break;
 785
 786         default:;
 787           gcc_unreachable ();
 788         }
 789
 790         /* Replace call with a copy.  */
 791         stmt = gimple_build_assign (gimple_call_lhs (use_stmt), rhs);
 792
 793         gsi = gsi_for_stmt (use_stmt);
 794         gsi_replace (&gsi, stmt, true);
 795         if (gimple_purge_dead_eh_edges (gimple_bb (stmt)))
 796           cfg_changed = true;
 797     }
 798
 799   VEC_free(gimple, heap, stmts);
 800
 801   return cfg_changed;
 802 }
 803
 804 /* To evaluate powi(x,n), the floating point value x raised to the
 805    constant integer exponent n, we use a hybrid algorithm that
 806    combines the "window method" with look-up tables.  For an
 807    introduction to exponentiation algorithms and "addition chains",
 808    see section 4.6.3, "Evaluation of Powers" of Donald E. Knuth,
 809    "Seminumerical Algorithms", Vol. 2, "The Art of Computer Programming",
 810    3rd Edition, 1998, and Daniel M. Gordon, "A Survey of Fast Exponentiation
 811    Methods", Journal of Algorithms, Vol. 27, pp. 129-146, 1998.  */
 812
 813 /* Provide a default value for POWI_MAX_MULTS, the maximum number of
 814    multiplications to inline before calling the system library's pow
 815    function.  powi(x,n) requires at worst 2*bits(n)-2 multiplications,
 816    so this default never requires calling pow, powf or powl.  */
 817
 818 #ifndef POWI_MAX_MULTS
 819 #define POWI_MAX_MULTS  (2*HOST_BITS_PER_WIDE_INT-2)
 820 #endif
 821
 822 /* The size of the "optimal power tree" lookup table.  All
 823    exponents less than this value are simply looked up in the
 824    powi_table below.  This threshold is also used to size the
 825    cache of pseudo registers that hold intermediate results.  */
 826 #define POWI_TABLE_SIZE 256
 827
 828 /* The size, in bits of the window, used in the "window method"
 829    exponentiation algorithm.  This is equivalent to a radix of
 830    (1<<POWI_WINDOW_SIZE) in the corresponding "m-ary method".  */
 831 #define POWI_WINDOW_SIZE 3
 832
 833 /* The following table is an efficient representation of an
 834    "optimal power tree".  For each value, i, the corresponding
 835    value, j, in the table states than an optimal evaluation
 836    sequence for calculating pow(x,i) can be found by evaluating
 837    pow(x,j)*pow(x,i-j).  An optimal power tree for the first
 838    100 integers is given in Knuth's "Seminumerical algorithms".  */
 839
 840 static const unsigned char powi_table[POWI_TABLE_SIZE] =
 841   {
 842       0,   1,   1,   2,   2,   3,   3,   4,  /*   0 -   7 */
 843       4,   6,   5,   6,   6,  10,   7,   9,  /*   8 -  15 */
 844       8,  16,   9,  16,  10,  12,  11,  13,  /*  16 -  23 */
 845      12,  17,  13,  18,  14,  24,  15,  26,  /*  24 -  31 */
 846      16,  17,  17,  19,  18,  33,  19,  26,  /*  32 -  39 */
 847      20,  25,  21,  40,  22,  27,  23,  44,  /*  40 -  47 */
 848      24,  32,  25,  34,  26,  29,  27,  44,  /*  48 -  55 */
 849      28,  31,  29,  34,  30,  60,  31,  36,  /*  56 -  63 */
 850      32,  64,  33,  34,  34,  46,  35,  37,  /*  64 -  71 */
 851      36,  65,  37,  50,  38,  48,  39,  69,  /*  72 -  79 */
 852      40,  49,  41,  43,  42,  51,  43,  58,  /*  80 -  87 */
 853      44,  64,  45,  47,  46,  59,  47,  76,  /*  88 -  95 */
 854      48,  65,  49,  66,  50,  67,  51,  66,  /*  96 - 103 */
 855      52,  70,  53,  74,  54, 104,  55,  74,  /* 104 - 111 */
 856      56,  64,  57,  69,  58,  78,  59,  68,  /* 112 - 119 */
 857      60,  61,  61,  80,  62,  75,  63,  68,  /* 120 - 127 */
 858      64,  65,  65, 128,  66, 129,  67,  90,  /* 128 - 135 */
 859      68,  73,  69, 131,  70,  94,  71,  88,  /* 136 - 143 */
 860      72, 128,  73,  98,  74, 132,  75, 121,  /* 144 - 151 */
 861      76, 102,  77, 124,  78, 132,  79, 106,  /* 152 - 159 */
 862      80,  97,  81, 160,  82,  99,  83, 134,  /* 160 - 167 */
 863      84,  86,  85,  95,  86, 160,  87, 100,  /* 168 - 175 */
 864      88, 113,  89,  98,  90, 107,  91, 122,  /* 176 - 183 */
 865      92, 111,  93, 102,  94, 126,  95, 150,  /* 184 - 191 */
 866      96, 128,  97, 130,  98, 133,  99, 195,  /* 192 - 199 */
 867     100, 128, 101, 123, 102, 164, 103, 138,  /* 200 - 207 */
 868     104, 145, 105, 146, 106, 109, 107, 149,  /* 208 - 215 */
 869     108, 200, 109, 146, 110, 170, 111, 157,  /* 216 - 223 */
 870     112, 128, 113, 130, 114, 182, 115, 132,  /* 224 - 231 */
 871     116, 200, 117, 132, 118, 158, 119, 206,  /* 232 - 239 */
 872     120, 240, 121, 162, 122, 147, 123, 152,  /* 240 - 247 */
 873     124, 166, 125, 214, 126, 138, 127, 153,  /* 248 - 255 */
 874   };
 875
 876
 877 /* Return the number of multiplications required to calculate
 878    powi(x,n) where n is less than POWI_TABLE_SIZE.  This is a
 879    subroutine of powi_cost.  CACHE is an array indicating
 880    which exponents have already been calculated.  */
 881
 882 static int
 883 powi_lookup_cost (unsigned HOST_WIDE_INT n, bool *cache)
 884 {
 885   /* If we've already calculated this exponent, then this evaluation
 886      doesn't require any additional multiplications.  */
 887   if (cache[n])
 888     return 0;
 889
 890   cache[n] = true;
 891   return powi_lookup_cost (n - powi_table[n], cache)
 892          + powi_lookup_cost (powi_table[n], cache) + 1;
 893 }
 894
 895 /* Return the number of multiplications required to calculate
 896    powi(x,n) for an arbitrary x, given the exponent N.  This
 897    function needs to be kept in sync with powi_as_mults below.  */
 898
 899 static int
 900 powi_cost (HOST_WIDE_INT n)
 901 {
 902   bool cache[POWI_TABLE_SIZE];
 903   unsigned HOST_WIDE_INT digit;
 904   unsigned HOST_WIDE_INT val;
 905   int result;
 906
 907   if (n == 0)
 908     return 0;
 909
 910   /* Ignore the reciprocal when calculating the cost.  */
 911   val = (n < 0) ? -n : n;
 912
 913   /* Initialize the exponent cache.  */
 914   memset (cache, 0, POWI_TABLE_SIZE * sizeof (bool));
 915   cache[1] = true;
 916
 917   result = 0;
 918
 919   while (val >= POWI_TABLE_SIZE)
 920     {
 921       if (val & 1)
 922         {
 923           digit = val & ((1 << POWI_WINDOW_SIZE) - 1);
 924           result += powi_lookup_cost (digit, cache)
 925                     + POWI_WINDOW_SIZE + 1;
 926           val >>= POWI_WINDOW_SIZE;
 927         }
 928       else
 929         {
 930           val >>= 1;
 931           result++;
 932         }
 933     }
 934
 935   return result + powi_lookup_cost (val, cache);
 936 }
 937
 938 /* Recursive subroutine of powi_as_mults.  This function takes the
 939    array, CACHE, of already calculated exponents and an exponent N and
 940    returns a tree that corresponds to CACHE[1]**N, with type TYPE.  */
 941
 942 static tree
 943 powi_as_mults_1 (gimple_stmt_iterator *gsi, location_t loc, tree type,
 944                  HOST_WIDE_INT n, tree *cache)
 945 {
 946   tree op0, op1, ssa_target;
 947   unsigned HOST_WIDE_INT digit;
 948   gimple mult_stmt;
 949
 950   if (n < POWI_TABLE_SIZE && cache[n])
 951     return cache[n];
 952
 953   ssa_target = make_temp_ssa_name (type, NULL, "powmult");
 954
 955   if (n < POWI_TABLE_SIZE)
 956     {
 957       cache[n] = ssa_target;
 958       op0 = powi_as_mults_1 (gsi, loc, type, n - powi_table[n], cache);
 959       op1 = powi_as_mults_1 (gsi, loc, type, powi_table[n], cache);
 960     }
 961   else if (n & 1)
 962     {
 963       digit = n & ((1 << POWI_WINDOW_SIZE) - 1);
 964       op0 = powi_as_mults_1 (gsi, loc, type, n - digit, cache);
 965       op1 = powi_as_mults_1 (gsi, loc, type, digit, cache);
 966     }
 967   else
 968     {
 969       op0 = powi_as_mults_1 (gsi, loc, type, n >> 1, cache);
 970       op1 = op0;
 971     }
 972
 973   mult_stmt = gimple_build_assign_with_ops (MULT_EXPR, ssa_target, op0, op1);
 974   gimple_set_location (mult_stmt, loc);
 975   gsi_insert_before (gsi, mult_stmt, GSI_SAME_STMT);
 976
 977   return ssa_target;
 978 }
 979
 980 /* Convert ARG0**N to a tree of multiplications of ARG0 with itself.
 981    This function needs to be kept in sync with powi_cost above.  */
 982
 983 static tree
 984 powi_as_mults (gimple_stmt_iterator *gsi, location_t loc,
 985                tree arg0, HOST_WIDE_INT n)
 986 {
 987   tree cache[POWI_TABLE_SIZE], result, type = TREE_TYPE (arg0);
 988   gimple div_stmt;
 989   tree target;
 990
 991   if (n == 0)
 992     return build_real (type, dconst1);
 993
 994   memset (cache, 0,  sizeof (cache));
 995   cache[1] = arg0;
 996
 997   result = powi_as_mults_1 (gsi, loc, type, (n < 0) ? -n : n, cache);
 998   if (n >= 0)
 999     return result;
1000
1001   /* If the original exponent was negative, reciprocate the result.  */
1002   target = make_temp_ssa_name (type, NULL, "powmult");
1003   div_stmt = gimple_build_assign_with_ops (RDIV_EXPR, target,
1004                                            build_real (type, dconst1),
1005                                            result);
1006   gimple_set_location (div_stmt, loc);
1007   gsi_insert_before (gsi, div_stmt, GSI_SAME_STMT);
1008
1009   return target;
1010 }
1011
1012 /* ARG0 and N are the two arguments to a powi builtin in GSI with
1013    location info LOC.  If the arguments are appropriate, create an
1014    equivalent sequence of statements prior to GSI using an optimal
1015    number of multiplications, and return an expession holding the
1016    result.  */
1017
1018 static tree
1019 gimple_expand_builtin_powi (gimple_stmt_iterator *gsi, location_t loc,
1020                             tree arg0, HOST_WIDE_INT n)
1021 {
1022   /* Avoid largest negative number.  */
1023   if (n != -n
1024       && ((n >= -1 && n <= 2)
1025           || (optimize_function_for_speed_p (cfun)
1026               && powi_cost (n) <= POWI_MAX_MULTS)))
1027     return powi_as_mults (gsi, loc, arg0, n);
1028
1029   return NULL_TREE;
1030 }
1031
1032 /* Build a gimple call statement that calls FN with argument ARG.
1033    Set the lhs of the call statement to a fresh SSA name.  Insert the
1034    statement prior to GSI's current position, and return the fresh
1035    SSA name.  */
1036
1037 static tree
1038 build_and_insert_call (gimple_stmt_iterator *gsi, location_t loc,
1039                        tree fn, tree arg)
1040 {
1041   gimple call_stmt;
1042   tree ssa_target;
1043
1044   call_stmt = gimple_build_call (fn, 1, arg);
1045   ssa_target = make_temp_ssa_name (TREE_TYPE (arg), NULL, "powroot");
1046   gimple_set_lhs (call_stmt, ssa_target);
1047   gimple_set_location (call_stmt, loc);
1048   gsi_insert_before (gsi, call_stmt, GSI_SAME_STMT);
1049
1050   return ssa_target;
1051 }
1052
1053 /* Build a gimple binary operation with the given CODE and arguments
1054    ARG0, ARG1, assigning the result to a new SSA name for variable
1055    TARGET.  Insert the statement prior to GSI's current position, and
1056    return the fresh SSA name.*/
1057
1058 static tree
1059 build_and_insert_binop (gimple_stmt_iterator *gsi, location_t loc,
1060                         const char *name, enum tree_code code,
1061                         tree arg0, tree arg1)
1062 {
1063   tree result = make_temp_ssa_name (TREE_TYPE (arg0), NULL, name);
1064   gimple stmt = gimple_build_assign_with_ops (code, result, arg0, arg1);
1065   gimple_set_location (stmt, loc);
1066   gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
1067   return result;
1068 }
1069
1070 /* Build a gimple reference operation with the given CODE and argument
1071    ARG, assigning the result to a new SSA name of TYPE with NAME.
1072    Insert the statement prior to GSI's current position, and return
1073    the fresh SSA name.  */
1074
1075 static inline tree
1076 build_and_insert_ref (gimple_stmt_iterator *gsi, location_t loc, tree type,
1077                       const char *name, enum tree_code code, tree arg0)
1078 {
1079   tree result = make_temp_ssa_name (type, NULL, name);
1080   gimple stmt = gimple_build_assign (result, build1 (code, type, arg0));
1081   gimple_set_location (stmt, loc);
1082   gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
1083   return result;
1084 }
1085
1086 /* Build a gimple assignment to cast VAL to TYPE.  Insert the statement
1087    prior to GSI's current position, and return the fresh SSA name.  */
1088
1089 static tree
1090 build_and_insert_cast (gimple_stmt_iterator *gsi, location_t loc,
1091                        tree type, tree val)
1092 {
1093   tree result = make_ssa_name (type, NULL);
1094   gimple stmt = gimple_build_assign_with_ops (NOP_EXPR, result, val, NULL_TREE);
1095   gimple_set_location (stmt, loc);
1096   gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
1097   return result;
1098 }
1099
1100 /* ARG0 and ARG1 are the two arguments to a pow builtin call in GSI
1101    with location info LOC.  If possible, create an equivalent and
1102    less expensive sequence of statements prior to GSI, and return an
1103    expession holding the result.  */
1104
1105 static tree
1106 gimple_expand_builtin_pow (gimple_stmt_iterator *gsi, location_t loc,
1107                            tree arg0, tree arg1)
1108 {
1109   REAL_VALUE_TYPE c, cint, dconst1_4, dconst3_4, dconst1_3, dconst1_6;
1110   REAL_VALUE_TYPE c2, dconst3;
1111   HOST_WIDE_INT n;
1112   tree type, sqrtfn, cbrtfn, sqrt_arg0, sqrt_sqrt, result, cbrt_x, powi_cbrt_x;
1113   enum machine_mode mode;
1114   bool hw_sqrt_exists;
1115
1116   /* If the exponent isn't a constant, there's nothing of interest
1117      to be done.  */
1118   if (TREE_CODE (arg1) != REAL_CST)
1119     return NULL_TREE;
1120
1121   /* If the exponent is equivalent to an integer, expand to an optimal
1122      multiplication sequence when profitable.  */
1123   c = TREE_REAL_CST (arg1);
1124   n = real_to_integer (&c);
1125   real_from_integer (&cint, VOIDmode, n, n < 0 ? -1 : 0, 0);
1126
1127   if (real_identical (&c, &cint)
1128       && ((n >= -1 && n <= 2)
1129           || (flag_unsafe_math_optimizations
1130               && optimize_insn_for_speed_p ()
1131               && powi_cost (n) <= POWI_MAX_MULTS)))
1132     return gimple_expand_builtin_powi (gsi, loc, arg0, n);
1133
1134   /* Attempt various optimizations using sqrt and cbrt.  */
1135   type = TREE_TYPE (arg0);
1136   mode = TYPE_MODE (type);
1137   sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
1138
1139   /* Optimize pow(x,0.5) = sqrt(x).  This replacement is always safe
1140      unless signed zeros must be maintained.  pow(-0,0.5) = +0, while
1141      sqrt(-0) = -0.  */
1142   if (sqrtfn
1143       && REAL_VALUES_EQUAL (c, dconsthalf)
1144       && !HONOR_SIGNED_ZEROS (mode))
1145     return build_and_insert_call (gsi, loc, sqrtfn, arg0);
1146
1147   /* Optimize pow(x,0.25) = sqrt(sqrt(x)).  Assume on most machines that
1148      a builtin sqrt instruction is smaller than a call to pow with 0.25,
1149      so do this optimization even if -Os.  Don't do this optimization
1150      if we don't have a hardware sqrt insn.  */
1151   dconst1_4 = dconst1;
1152   SET_REAL_EXP (&dconst1_4, REAL_EXP (&dconst1_4) - 2);
1153   hw_sqrt_exists = optab_handler (sqrt_optab, mode) != CODE_FOR_nothing;
1154
1155   if (flag_unsafe_math_optimizations
1156       && sqrtfn
1157       && REAL_VALUES_EQUAL (c, dconst1_4)
1158       && hw_sqrt_exists)
1159     {
1160       /* sqrt(x)  */
1161       sqrt_arg0 = build_and_insert_call (gsi, loc, sqrtfn, arg0);
1162
1163       /* sqrt(sqrt(x))  */
1164       return build_and_insert_call (gsi, loc, sqrtfn, sqrt_arg0);
1165     }
1166
1167   /* Optimize pow(x,0.75) = sqrt(x) * sqrt(sqrt(x)) unless we are
1168      optimizing for space.  Don't do this optimization if we don't have
1169      a hardware sqrt insn.  */
1170   real_from_integer (&dconst3_4, VOIDmode, 3, 0, 0);
1171   SET_REAL_EXP (&dconst3_4, REAL_EXP (&dconst3_4) - 2);
1172
1173   if (flag_unsafe_math_optimizations
1174       && sqrtfn
1175       && optimize_function_for_speed_p (cfun)
1176       && REAL_VALUES_EQUAL (c, dconst3_4)
1177       && hw_sqrt_exists)
1178     {
1179       /* sqrt(x)  */
1180       sqrt_arg0 = build_and_insert_call (gsi, loc, sqrtfn, arg0);
1181
1182       /* sqrt(sqrt(x))  */
1183       sqrt_sqrt = build_and_insert_call (gsi, loc, sqrtfn, sqrt_arg0);
1184
1185       /* sqrt(x) * sqrt(sqrt(x))  */
1186       return build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1187                                      sqrt_arg0, sqrt_sqrt);
1188     }
1189
1190   /* Optimize pow(x,1./3.) = cbrt(x).  This requires unsafe math
1191      optimizations since 1./3. is not exactly representable.  If x
1192      is negative and finite, the correct value of pow(x,1./3.) is
1193      a NaN with the "invalid" exception raised, because the value
1194      of 1./3. actually has an even denominator.  The correct value
1195      of cbrt(x) is a negative real value.  */
1196   cbrtfn = mathfn_built_in (type, BUILT_IN_CBRT);
1197   dconst1_3 = real_value_truncate (mode, dconst_third ());
1198
1199   if (flag_unsafe_math_optimizations
1200       && cbrtfn
1201       && (gimple_val_nonnegative_real_p (arg0) || !HONOR_NANS (mode))
1202       && REAL_VALUES_EQUAL (c, dconst1_3))
1203     return build_and_insert_call (gsi, loc, cbrtfn, arg0);
1204
1205   /* Optimize pow(x,1./6.) = cbrt(sqrt(x)).  Don't do this optimization
1206      if we don't have a hardware sqrt insn.  */
1207   dconst1_6 = dconst1_3;
1208   SET_REAL_EXP (&dconst1_6, REAL_EXP (&dconst1_6) - 1);
1209
1210   if (flag_unsafe_math_optimizations
1211       && sqrtfn
1212       && cbrtfn
1213       && (gimple_val_nonnegative_real_p (arg0) || !HONOR_NANS (mode))
1214       && optimize_function_for_speed_p (cfun)
1215       && hw_sqrt_exists
1216       && REAL_VALUES_EQUAL (c, dconst1_6))
1217     {
1218       /* sqrt(x)  */
1219       sqrt_arg0 = build_and_insert_call (gsi, loc, sqrtfn, arg0);
1220
1221       /* cbrt(sqrt(x))  */
1222       return build_and_insert_call (gsi, loc, cbrtfn, sqrt_arg0);
1223     }
1224
1225   /* Optimize pow(x,c), where n = 2c for some nonzero integer n, into
1226
1227        sqrt(x) * powi(x, n/2),                n > 0;
1228        1.0 / (sqrt(x) * powi(x, abs(n/2))),   n < 0.
1229
1230      Do not calculate the powi factor when n/2 = 0.  */
1231   real_arithmetic (&c2, MULT_EXPR, &c, &dconst2);
1232   n = real_to_integer (&c2);
1233   real_from_integer (&cint, VOIDmode, n, n < 0 ? -1 : 0, 0);
1234
1235   if (flag_unsafe_math_optimizations
1236       && sqrtfn
1237       && real_identical (&c2, &cint))
1238     {
1239       tree powi_x_ndiv2 = NULL_TREE;
1240
1241       /* Attempt to fold powi(arg0, abs(n/2)) into multiplies.  If not
1242          possible or profitable, give up.  Skip the degenerate case when
1243          n is 1 or -1, where the result is always 1.  */
1244       if (absu_hwi (n) != 1)
1245         {
1246           powi_x_ndiv2 = gimple_expand_builtin_powi (gsi, loc, arg0,
1247                                                      abs_hwi (n / 2));
1248           if (!powi_x_ndiv2)
1249             return NULL_TREE;
1250         }
1251
1252       /* Calculate sqrt(x).  When n is not 1 or -1, multiply it by the
1253          result of the optimal multiply sequence just calculated.  */
1254       sqrt_arg0 = build_and_insert_call (gsi, loc, sqrtfn, arg0);
1255
1256       if (absu_hwi (n) == 1)
1257         result = sqrt_arg0;
1258       else
1259         result = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1260                                          sqrt_arg0, powi_x_ndiv2);
1261
1262       /* If n is negative, reciprocate the result.  */
1263       if (n < 0)
1264         result = build_and_insert_binop (gsi, loc, "powroot", RDIV_EXPR,
1265                                          build_real (type, dconst1), result);
1266       return result;
1267     }
1268
1269   /* Optimize pow(x,c), where 3c = n for some nonzero integer n, into
1270
1271      powi(x, n/3) * powi(cbrt(x), n%3),                    n > 0;
1272      1.0 / (powi(x, abs(n)/3) * powi(cbrt(x), abs(n)%3)),  n < 0.
1273
1274      Do not calculate the first factor when n/3 = 0.  As cbrt(x) is
1275      different from pow(x, 1./3.) due to rounding and behavior with
1276      negative x, we need to constrain this transformation to unsafe
1277      math and positive x or finite math.  */
1278   real_from_integer (&dconst3, VOIDmode, 3, 0, 0);
1279   real_arithmetic (&c2, MULT_EXPR, &c, &dconst3);
1280   real_round (&c2, mode, &c2);
1281   n = real_to_integer (&c2);
1282   real_from_integer (&cint, VOIDmode, n, n < 0 ? -1 : 0, 0);
1283   real_arithmetic (&c2, RDIV_EXPR, &cint, &dconst3);
1284   real_convert (&c2, mode, &c2);
1285
1286   if (flag_unsafe_math_optimizations
1287       && cbrtfn
1288       && (gimple_val_nonnegative_real_p (arg0) || !HONOR_NANS (mode))
1289       && real_identical (&c2, &c)
1290       && optimize_function_for_speed_p (cfun)
1291       && powi_cost (n / 3) <= POWI_MAX_MULTS)
1292     {
1293       tree powi_x_ndiv3 = NULL_TREE;
1294
1295       /* Attempt to fold powi(arg0, abs(n/3)) into multiplies.  If not
1296          possible or profitable, give up.  Skip the degenerate case when
1297          abs(n) < 3, where the result is always 1.  */
1298       if (absu_hwi (n) >= 3)
1299         {
1300           powi_x_ndiv3 = gimple_expand_builtin_powi (gsi, loc, arg0,
1301                                                      abs_hwi (n / 3));
1302           if (!powi_x_ndiv3)
1303             return NULL_TREE;
1304         }
1305
1306       /* Calculate powi(cbrt(x), n%3).  Don't use gimple_expand_builtin_powi
1307          as that creates an unnecessary variable.  Instead, just produce
1308          either cbrt(x) or cbrt(x) * cbrt(x).  */
1309       cbrt_x = build_and_insert_call (gsi, loc, cbrtfn, arg0);
1310
1311       if (absu_hwi (n) % 3 == 1)
1312         powi_cbrt_x = cbrt_x;
1313       else
1314         powi_cbrt_x = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1315                                               cbrt_x, cbrt_x);
1316
1317       /* Multiply the two subexpressions, unless powi(x,abs(n)/3) = 1.  */
1318       if (absu_hwi (n) < 3)
1319         result = powi_cbrt_x;
1320       else
1321         result = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1322                                          powi_x_ndiv3, powi_cbrt_x);
1323
1324       /* If n is negative, reciprocate the result.  */
1325       if (n < 0)
1326         result = build_and_insert_binop (gsi, loc, "powroot", RDIV_EXPR,
1327                                          build_real (type, dconst1), result);
1328
1329       return result;
1330     }
1331
1332   /* No optimizations succeeded.  */
1333   return NULL_TREE;
1334 }
1335
1336 /* ARG is the argument to a cabs builtin call in GSI with location info
1337    LOC.  Create a sequence of statements prior to GSI that calculates
1338    sqrt(R*R + I*I), where R and I are the real and imaginary components
1339    of ARG, respectively.  Return an expression holding the result.  */
1340
1341 static tree
1342 gimple_expand_builtin_cabs (gimple_stmt_iterator *gsi, location_t loc, tree arg)
1343 {
1344   tree real_part, imag_part, addend1, addend2, sum, result;
1345   tree type = TREE_TYPE (TREE_TYPE (arg));
1346   tree sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
1347   enum machine_mode mode = TYPE_MODE (type);
1348
1349   if (!flag_unsafe_math_optimizations
1350       || !optimize_bb_for_speed_p (gimple_bb (gsi_stmt (*gsi)))
1351       || !sqrtfn
1352       || optab_handler (sqrt_optab, mode) == CODE_FOR_nothing)
1353     return NULL_TREE;
1354
1355   real_part = build_and_insert_ref (gsi, loc, type, "cabs",
1356                                     REALPART_EXPR, arg);
1357   addend1 = build_and_insert_binop (gsi, loc, "cabs", MULT_EXPR,
1358                                     real_part, real_part);
1359   imag_part = build_and_insert_ref (gsi, loc, type, "cabs",
1360                                     IMAGPART_EXPR, arg);
1361   addend2 = build_and_insert_binop (gsi, loc, "cabs", MULT_EXPR,
1362                                     imag_part, imag_part);
1363   sum = build_and_insert_binop (gsi, loc, "cabs", PLUS_EXPR, addend1, addend2);
1364   result = build_and_insert_call (gsi, loc, sqrtfn, sum);
1365
1366   return result;
1367 }
1368
1369 /* Go through all calls to sin, cos and cexpi and call execute_cse_sincos_1
1370    on the SSA_NAME argument of each of them.  Also expand powi(x,n) into
1371    an optimal number of multiplies, when n is a constant.  */
1372
1373 static unsigned int
1374 execute_cse_sincos (void)
1375 {
1376   basic_block bb;
1377   bool cfg_changed = false;
1378
1379   calculate_dominance_info (CDI_DOMINATORS);
1380   memset (&sincos_stats, 0, sizeof (sincos_stats));
1381
1382   FOR_EACH_BB (bb)
1383     {
1384       gimple_stmt_iterator gsi;
1385       bool cleanup_eh = false;
1386
1387       for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1388         {
1389           gimple stmt = gsi_stmt (gsi);
1390           tree fndecl;
1391
1392           /* Only the last stmt in a bb could throw, no need to call
1393              gimple_purge_dead_eh_edges if we change something in the middle
1394              of a basic block.  */
1395           cleanup_eh = false;
1396
1397           if (is_gimple_call (stmt)
1398               && gimple_call_lhs (stmt)
1399               && (fndecl = gimple_call_fndecl (stmt))
1400               && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
1401             {
1402               tree arg, arg0, arg1, result;
1403               HOST_WIDE_INT n;
1404               location_t loc;
1405
1406               switch (DECL_FUNCTION_CODE (fndecl))
1407                 {
1408                 CASE_FLT_FN (BUILT_IN_COS):
1409                 CASE_FLT_FN (BUILT_IN_SIN):
1410                 CASE_FLT_FN (BUILT_IN_CEXPI):
1411                   /* Make sure we have either sincos or cexp.  */
1412                   if (!TARGET_HAS_SINCOS && !TARGET_C99_FUNCTIONS)
1413                     break;
1414
1415                   arg = gimple_call_arg (stmt, 0);
1416                   if (TREE_CODE (arg) == SSA_NAME)
1417                     cfg_changed |= execute_cse_sincos_1 (arg);
1418                   break;
1419
1420                 CASE_FLT_FN (BUILT_IN_POW):
1421                   arg0 = gimple_call_arg (stmt, 0);
1422                   arg1 = gimple_call_arg (stmt, 1);
1423
1424                   loc = gimple_location (stmt);
1425                   result = gimple_expand_builtin_pow (&gsi, loc, arg0, arg1);
1426
1427                   if (result)
1428                     {
1429                       tree lhs = gimple_get_lhs (stmt);
1430                       gimple new_stmt = gimple_build_assign (lhs, result);
1431                       gimple_set_location (new_stmt, loc);
1432                       unlink_stmt_vdef (stmt);
1433                       gsi_replace (&gsi, new_stmt, true);
1434                       cleanup_eh = true;
1435                       if (gimple_vdef (stmt))
1436                         release_ssa_name (gimple_vdef (stmt));
1437                     }
1438                   break;
1439
1440                 CASE_FLT_FN (BUILT_IN_POWI):
1441                   arg0 = gimple_call_arg (stmt, 0);
1442                   arg1 = gimple_call_arg (stmt, 1);
1443                   if (!host_integerp (arg1, 0))
1444                     break;
1445
1446                   n = TREE_INT_CST_LOW (arg1);
1447                   loc = gimple_location (stmt);
1448                   result = gimple_expand_builtin_powi (&gsi, loc, arg0, n);
1449
1450                   if (result)
1451                     {
1452                       tree lhs = gimple_get_lhs (stmt);
1453                       gimple new_stmt = gimple_build_assign (lhs, result);
1454                       gimple_set_location (new_stmt, loc);
1455                       unlink_stmt_vdef (stmt);
1456                       gsi_replace (&gsi, new_stmt, true);
1457                       cleanup_eh = true;
1458                       if (gimple_vdef (stmt))
1459                         release_ssa_name (gimple_vdef (stmt));
1460                     }
1461                   break;
1462
1463                 CASE_FLT_FN (BUILT_IN_CABS):
1464                   arg0 = gimple_call_arg (stmt, 0);
1465                   loc = gimple_location (stmt);
1466                   result = gimple_expand_builtin_cabs (&gsi, loc, arg0);
1467
1468                   if (result)
1469                     {
1470                       tree lhs = gimple_get_lhs (stmt);
1471                       gimple new_stmt = gimple_build_assign (lhs, result);
1472                       gimple_set_location (new_stmt, loc);
1473                       unlink_stmt_vdef (stmt);
1474                       gsi_replace (&gsi, new_stmt, true);
1475                       cleanup_eh = true;
1476                       if (gimple_vdef (stmt))
1477                         release_ssa_name (gimple_vdef (stmt));
1478                     }
1479                   break;
1480
1481                 default:;
1482                 }
1483             }
1484         }
1485       if (cleanup_eh)
1486         cfg_changed |= gimple_purge_dead_eh_edges (bb);
1487     }
1488
1489   statistics_counter_event (cfun, "sincos statements inserted",
1490                             sincos_stats.inserted);
1491
1492   free_dominance_info (CDI_DOMINATORS);
1493   return cfg_changed ? TODO_cleanup_cfg : 0;
1494 }
1495
1496 static bool
1497 gate_cse_sincos (void)
1498 {
1499   /* We no longer require either sincos or cexp, since powi expansion
1500      piggybacks on this pass.  */
1501   return optimize;
1502 }
1503
1504 struct gimple_opt_pass pass_cse_sincos =
1505 {
1506  {
1507   GIMPLE_PASS,
1508   "sincos",                             /* name */
1509   OPTGROUP_NONE,                        /* optinfo_flags */
1510   gate_cse_sincos,                      /* gate */
1511   execute_cse_sincos,                   /* execute */
1512   NULL,                                 /* sub */
1513   NULL,                                 /* next */
1514   0,                                    /* static_pass_number */
1515   TV_NONE,                              /* tv_id */
1516   PROP_ssa,                             /* properties_required */
1517   0,                                    /* properties_provided */
1518   0,                                    /* properties_destroyed */
1519   0,                                    /* todo_flags_start */
1520   TODO_update_ssa | TODO_verify_ssa
1521     | TODO_verify_stmts                 /* todo_flags_finish */
1522  }
1523 };
1524
1525 /* A symbolic number is used to detect byte permutation and selection
1526    patterns.  Therefore the field N contains an artificial number
1527    consisting of byte size markers:
1528
1529    0    - byte has the value 0
1530    1..size - byte contains the content of the byte
1531    number indexed with that value minus one  */
1532
1533 struct symbolic_number {
1534   unsigned HOST_WIDEST_INT n;
1535   int size;
1536 };
1537
1538 /* Perform a SHIFT or ROTATE operation by COUNT bits on symbolic
1539    number N.  Return false if the requested operation is not permitted
1540    on a symbolic number.  */
1541
1542 static inline bool
1543 do_shift_rotate (enum tree_code code,
1544                  struct symbolic_number *n,
1545                  int count)
1546 {
1547   if (count % 8 != 0)
1548     return false;
1549
1550   /* Zero out the extra bits of N in order to avoid them being shifted
1551      into the significant bits.  */
1552   if (n->size < (int)sizeof (HOST_WIDEST_INT))
1553     n->n &= ((unsigned HOST_WIDEST_INT)1 << (n->size * BITS_PER_UNIT)) - 1;
1554
1555   switch (code)
1556     {
1557     case LSHIFT_EXPR:
1558       n->n <<= count;
1559       break;
1560     case RSHIFT_EXPR:
1561       n->n >>= count;
1562       break;
1563     case LROTATE_EXPR:
1564       n->n = (n->n << count) | (n->n >> ((n->size * BITS_PER_UNIT) - count));
1565       break;
1566     case RROTATE_EXPR:
1567       n->n = (n->n >> count) | (n->n << ((n->size * BITS_PER_UNIT) - count));
1568       break;
1569     default:
1570       return false;
1571     }
1572   /* Zero unused bits for size.  */
1573   if (n->size < (int)sizeof (HOST_WIDEST_INT))
1574     n->n &= ((unsigned HOST_WIDEST_INT)1 << (n->size * BITS_PER_UNIT)) - 1;
1575   return true;
1576 }
1577
1578 /* Perform sanity checking for the symbolic number N and the gimple
1579    statement STMT.  */
1580
1581 static inline bool
1582 verify_symbolic_number_p (struct symbolic_number *n, gimple stmt)
1583 {
1584   tree lhs_type;
1585
1586   lhs_type = gimple_expr_type (stmt);
1587
1588   if (TREE_CODE (lhs_type) != INTEGER_TYPE)
1589     return false;
1590
1591   if (TYPE_PRECISION (lhs_type) != n->size * BITS_PER_UNIT)
1592     return false;
1593
1594   return true;
1595 }
1596
1597 /* find_bswap_1 invokes itself recursively with N and tries to perform
1598    the operation given by the rhs of STMT on the result.  If the
1599    operation could successfully be executed the function returns the
1600    tree expression of the source operand and NULL otherwise.  */
1601
1602 static tree
1603 find_bswap_1 (gimple stmt, struct symbolic_number *n, int limit)
1604 {
1605   enum tree_code code;
1606   tree rhs1, rhs2 = NULL;
1607   gimple rhs1_stmt, rhs2_stmt;
1608   tree source_expr1;
1609   enum gimple_rhs_class rhs_class;
1610
1611   if (!limit || !is_gimple_assign (stmt))
1612     return NULL_TREE;
1613
1614   rhs1 = gimple_assign_rhs1 (stmt);
1615
1616   if (TREE_CODE (rhs1) != SSA_NAME)
1617     return NULL_TREE;
1618
1619   code = gimple_assign_rhs_code (stmt);
1620   rhs_class = gimple_assign_rhs_class (stmt);
1621   rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
1622
1623   if (rhs_class == GIMPLE_BINARY_RHS)
1624     rhs2 = gimple_assign_rhs2 (stmt);
1625
1626   /* Handle unary rhs and binary rhs with integer constants as second
1627      operand.  */
1628
1629   if (rhs_class == GIMPLE_UNARY_RHS
1630       || (rhs_class == GIMPLE_BINARY_RHS
1631           && TREE_CODE (rhs2) == INTEGER_CST))
1632     {
1633       if (code != BIT_AND_EXPR
1634           && code != LSHIFT_EXPR
1635           && code != RSHIFT_EXPR
1636           && code != LROTATE_EXPR
1637           && code != RROTATE_EXPR
1638           && code != NOP_EXPR
1639           && code != CONVERT_EXPR)
1640         return NULL_TREE;
1641
1642       source_expr1 = find_bswap_1 (rhs1_stmt, n, limit - 1);
1643
1644       /* If find_bswap_1 returned NULL STMT is a leaf node and we have
1645          to initialize the symbolic number.  */
1646       if (!source_expr1)
1647         {
1648           /* Set up the symbolic number N by setting each byte to a
1649              value between 1 and the byte size of rhs1.  The highest
1650              order byte is set to n->size and the lowest order
1651              byte to 1.  */
1652           n->size = TYPE_PRECISION (TREE_TYPE (rhs1));
1653           if (n->size % BITS_PER_UNIT != 0)
1654             return NULL_TREE;
1655           n->size /= BITS_PER_UNIT;
1656           n->n = (sizeof (HOST_WIDEST_INT) < 8 ? 0 :
1657                   (unsigned HOST_WIDEST_INT)0x08070605 << 32 | 0x04030201);
1658
1659           if (n->size < (int)sizeof (HOST_WIDEST_INT))
1660             n->n &= ((unsigned HOST_WIDEST_INT)1 <<
1661                      (n->size * BITS_PER_UNIT)) - 1;
1662
1663           source_expr1 = rhs1;
1664         }
1665
1666       switch (code)
1667         {
1668         case BIT_AND_EXPR:
1669           {
1670             int i;
1671             unsigned HOST_WIDEST_INT val = widest_int_cst_value (rhs2);
1672             unsigned HOST_WIDEST_INT tmp = val;
1673
1674             /* Only constants masking full bytes are allowed.  */
1675             for (i = 0; i < n->size; i++, tmp >>= BITS_PER_UNIT)
1676               if ((tmp & 0xff) != 0 && (tmp & 0xff) != 0xff)
1677                 return NULL_TREE;
1678
1679             n->n &= val;
1680           }
1681           break;
1682         case LSHIFT_EXPR:
1683         case RSHIFT_EXPR:
1684         case LROTATE_EXPR:
1685         case RROTATE_EXPR:
1686           if (!do_shift_rotate (code, n, (int)TREE_INT_CST_LOW (rhs2)))
1687             return NULL_TREE;
1688           break;
1689         CASE_CONVERT:
1690           {
1691             int type_size;
1692
1693             type_size = TYPE_PRECISION (gimple_expr_type (stmt));
1694             if (type_size % BITS_PER_UNIT != 0)
1695               return NULL_TREE;
1696
1697             if (type_size / BITS_PER_UNIT < (int)(sizeof (HOST_WIDEST_INT)))
1698               {
1699                 /* If STMT casts to a smaller type mask out the bits not
1700                    belonging to the target type.  */
1701                 n->n &= ((unsigned HOST_WIDEST_INT)1 << type_size) - 1;
1702               }
1703             n->size = type_size / BITS_PER_UNIT;
1704           }
1705           break;
1706         default:
1707           return NULL_TREE;
1708         };
1709       return verify_symbolic_number_p (n, stmt) ? source_expr1 : NULL;
1710     }
1711
1712   /* Handle binary rhs.  */
1713
1714   if (rhs_class == GIMPLE_BINARY_RHS)
1715     {
1716       struct symbolic_number n1, n2;
1717       tree source_expr2;
1718
1719       if (code != BIT_IOR_EXPR)
1720         return NULL_TREE;
1721
1722       if (TREE_CODE (rhs2) != SSA_NAME)
1723         return NULL_TREE;
1724
1725       rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
1726
1727       switch (code)
1728         {
1729         case BIT_IOR_EXPR:
1730           source_expr1 = find_bswap_1 (rhs1_stmt, &n1, limit - 1);
1731
1732           if (!source_expr1)
1733             return NULL_TREE;
1734
1735           source_expr2 = find_bswap_1 (rhs2_stmt, &n2, limit - 1);
1736
1737           if (source_expr1 != source_expr2
1738               || n1.size != n2.size)
1739             return NULL_TREE;
1740
1741           n->size = n1.size;
1742           n->n = n1.n | n2.n;
1743
1744           if (!verify_symbolic_number_p (n, stmt))
1745             return NULL_TREE;
1746
1747           break;
1748         default:
1749           return NULL_TREE;
1750         }
1751       return source_expr1;
1752     }
1753   return NULL_TREE;
1754 }
1755
1756 /* Check if STMT completes a bswap implementation consisting of ORs,
1757    SHIFTs and ANDs.  Return the source tree expression on which the
1758    byte swap is performed and NULL if no bswap was found.  */
1759
1760 static tree
1761 find_bswap (gimple stmt)
1762 {
1763 /* The number which the find_bswap result should match in order to
1764    have a full byte swap.  The number is shifted to the left according
1765    to the size of the symbolic number before using it.  */
1766   unsigned HOST_WIDEST_INT cmp =
1767     sizeof (HOST_WIDEST_INT) < 8 ? 0 :
1768     (unsigned HOST_WIDEST_INT)0x01020304 << 32 | 0x05060708;
1769
1770   struct symbolic_number n;
1771   tree source_expr;
1772   int limit;
1773
1774   /* The last parameter determines the depth search limit.  It usually
1775      correlates directly to the number of bytes to be touched.  We
1776      increase that number by three  here in order to also
1777      cover signed -> unsigned converions of the src operand as can be seen
1778      in libgcc, and for initial shift/and operation of the src operand.  */
1779   limit = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (gimple_expr_type (stmt)));
1780   limit += 1 + (int) ceil_log2 ((unsigned HOST_WIDE_INT) limit);
1781   source_expr =  find_bswap_1 (stmt, &n, limit);
1782
1783   if (!source_expr)
1784     return NULL_TREE;
1785
1786   /* Zero out the extra bits of N and CMP.  */
1787   if (n.size < (int)sizeof (HOST_WIDEST_INT))
1788     {
1789       unsigned HOST_WIDEST_INT mask =
1790         ((unsigned HOST_WIDEST_INT)1 << (n.size * BITS_PER_UNIT)) - 1;
1791
1792       n.n &= mask;
1793       cmp >>= (sizeof (HOST_WIDEST_INT) - n.size) * BITS_PER_UNIT;
1794     }
1795
1796   /* A complete byte swap should make the symbolic number to start
1797      with the largest digit in the highest order byte.  */
1798   if (cmp != n.n)
1799     return NULL_TREE;
1800
1801   return source_expr;
1802 }
1803
1804 /* Find manual byte swap implementations and turn them into a bswap
1805    builtin invokation.  */
1806
1807 static unsigned int
1808 execute_optimize_bswap (void)
1809 {
1810   basic_block bb;
1811   bool bswap16_p, bswap32_p, bswap64_p;
1812   bool changed = false;
1813   tree bswap16_type = NULL_TREE, bswap32_type = NULL_TREE, bswap64_type = NULL_TREE;
1814
1815   if (BITS_PER_UNIT != 8)
1816     return 0;
1817
1818   if (sizeof (HOST_WIDEST_INT) < 8)
1819     return 0;
1820
1821   bswap16_p = (builtin_decl_explicit_p (BUILT_IN_BSWAP16)
1822                && optab_handler (bswap_optab, HImode) != CODE_FOR_nothing);
1823   bswap32_p = (builtin_decl_explicit_p (BUILT_IN_BSWAP32)
1824                && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing);
1825   bswap64_p = (builtin_decl_explicit_p (BUILT_IN_BSWAP64)
1826                && (optab_handler (bswap_optab, DImode) != CODE_FOR_nothing
1827                    || (bswap32_p && word_mode == SImode)));
1828
1829   if (!bswap16_p && !bswap32_p && !bswap64_p)
1830     return 0;
1831
1832   /* Determine the argument type of the builtins.  The code later on
1833      assumes that the return and argument type are the same.  */
1834   if (bswap16_p)
1835     {
1836       tree fndecl = builtin_decl_explicit (BUILT_IN_BSWAP16);
1837       bswap16_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
1838     }
1839
1840   if (bswap32_p)
1841     {
1842       tree fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
1843       bswap32_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
1844     }
1845
1846   if (bswap64_p)
1847     {
1848       tree fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
1849       bswap64_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
1850     }
1851
1852   memset (&bswap_stats, 0, sizeof (bswap_stats));
1853
1854   FOR_EACH_BB (bb)
1855     {
1856       gimple_stmt_iterator gsi;
1857
1858       /* We do a reverse scan for bswap patterns to make sure we get the
1859          widest match. As bswap pattern matching doesn't handle
1860          previously inserted smaller bswap replacements as sub-
1861          patterns, the wider variant wouldn't be detected.  */
1862       for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
1863         {
1864           gimple stmt = gsi_stmt (gsi);
1865           tree bswap_src, bswap_type;
1866           tree bswap_tmp;
1867           tree fndecl = NULL_TREE;
1868           int type_size;
1869           gimple call;
1870
1871           if (!is_gimple_assign (stmt)
1872               || gimple_assign_rhs_code (stmt) != BIT_IOR_EXPR)
1873             continue;
1874
1875           type_size = TYPE_PRECISION (gimple_expr_type (stmt));
1876
1877           switch (type_size)
1878             {
1879             case 16:
1880               if (bswap16_p)
1881                 {
1882                   fndecl = builtin_decl_explicit (BUILT_IN_BSWAP16);
1883                   bswap_type = bswap16_type;
1884                 }
1885               break;
1886             case 32:
1887               if (bswap32_p)
1888                 {
1889                   fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
1890                   bswap_type = bswap32_type;
1891                 }
1892               break;
1893             case 64:
1894               if (bswap64_p)
1895                 {
1896                   fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
1897                   bswap_type = bswap64_type;
1898                 }
1899               break;
1900             default:
1901               continue;
1902             }
1903
1904           if (!fndecl)
1905             continue;
1906
1907           bswap_src = find_bswap (stmt);
1908
1909           if (!bswap_src)
1910             continue;
1911
1912           changed = true;
1913           if (type_size == 16)
1914             bswap_stats.found_16bit++;
1915           else if (type_size == 32)
1916             bswap_stats.found_32bit++;
1917           else
1918             bswap_stats.found_64bit++;
1919
1920           bswap_tmp = bswap_src;
1921
1922           /* Convert the src expression if necessary.  */
1923           if (!useless_type_conversion_p (TREE_TYPE (bswap_tmp), bswap_type))
1924             {
1925               gimple convert_stmt;
1926               bswap_tmp = make_temp_ssa_name (bswap_type, NULL, "bswapsrc");
1927               convert_stmt = gimple_build_assign_with_ops
1928                                 (NOP_EXPR, bswap_tmp, bswap_src, NULL);
1929               gsi_insert_before (&gsi, convert_stmt, GSI_SAME_STMT);
1930             }
1931
1932           call = gimple_build_call (fndecl, 1, bswap_tmp);
1933
1934           bswap_tmp = gimple_assign_lhs (stmt);
1935
1936           /* Convert the result if necessary.  */
1937           if (!useless_type_conversion_p (TREE_TYPE (bswap_tmp), bswap_type))
1938             {
1939               gimple convert_stmt;
1940               bswap_tmp = make_temp_ssa_name (bswap_type, NULL, "bswapdst");
1941               convert_stmt = gimple_build_assign_with_ops
1942                         (NOP_EXPR, gimple_assign_lhs (stmt), bswap_tmp, NULL);
1943               gsi_insert_after (&gsi, convert_stmt, GSI_SAME_STMT);
1944             }
1945
1946           gimple_call_set_lhs (call, bswap_tmp);
1947
1948           if (dump_file)
1949             {
1950               fprintf (dump_file, "%d bit bswap implementation found at: ",
1951                        (int)type_size);
1952               print_gimple_stmt (dump_file, stmt, 0, 0);
1953             }
1954
1955           gsi_insert_after (&gsi, call, GSI_SAME_STMT);
1956           gsi_remove (&gsi, true);
1957         }
1958     }
1959
1960   statistics_counter_event (cfun, "16-bit bswap implementations found",
1961                             bswap_stats.found_16bit);
1962   statistics_counter_event (cfun, "32-bit bswap implementations found",
1963                             bswap_stats.found_32bit);
1964   statistics_counter_event (cfun, "64-bit bswap implementations found",
1965                             bswap_stats.found_64bit);
1966
1967   return (changed ? TODO_update_ssa | TODO_verify_ssa
1968           | TODO_verify_stmts : 0);
1969 }
1970
1971 static bool
1972 gate_optimize_bswap (void)
1973 {
1974   return flag_expensive_optimizations && optimize;
1975 }
1976
1977 struct gimple_opt_pass pass_optimize_bswap =
1978 {
1979  {
1980   GIMPLE_PASS,
1981   "bswap",                              /* name */
1982   OPTGROUP_NONE,                        /* optinfo_flags */
1983   gate_optimize_bswap,                  /* gate */
1984   execute_optimize_bswap,               /* execute */
1985   NULL,                                 /* sub */
1986   NULL,                                 /* next */
1987   0,                                    /* static_pass_number */
1988   TV_NONE,                              /* tv_id */
1989   PROP_ssa,                             /* properties_required */
1990   0,                                    /* properties_provided */
1991   0,                                    /* properties_destroyed */
1992   0,                                    /* todo_flags_start */
1993   0                                     /* todo_flags_finish */
1994  }
1995 };
1996
1997 /* Return true if stmt is a type conversion operation that can be stripped
1998    when used in a widening multiply operation.  */
1999 static bool
2000 widening_mult_conversion_strippable_p (tree result_type, gimple stmt)
2001 {
2002   enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
2003
2004   if (TREE_CODE (result_type) == INTEGER_TYPE)
2005     {
2006       tree op_type;
2007       tree inner_op_type;
2008
2009       if (!CONVERT_EXPR_CODE_P (rhs_code))
2010         return false;
2011
2012       op_type = TREE_TYPE (gimple_assign_lhs (stmt));
2013
2014       /* If the type of OP has the same precision as the result, then
2015          we can strip this conversion.  The multiply operation will be
2016          selected to create the correct extension as a by-product.  */
2017       if (TYPE_PRECISION (result_type) == TYPE_PRECISION (op_type))
2018         return true;
2019
2020       /* We can also strip a conversion if it preserves the signed-ness of
2021          the operation and doesn't narrow the range.  */
2022       inner_op_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
2023
2024       /* If the inner-most type is unsigned, then we can strip any
2025          intermediate widening operation.  If it's signed, then the
2026          intermediate widening operation must also be signed.  */
2027       if ((TYPE_UNSIGNED (inner_op_type)
2028            || TYPE_UNSIGNED (op_type) == TYPE_UNSIGNED (inner_op_type))
2029           && TYPE_PRECISION (op_type) > TYPE_PRECISION (inner_op_type))
2030         return true;
2031
2032       return false;
2033     }
2034
2035   return rhs_code == FIXED_CONVERT_EXPR;
2036 }
2037
2038 /* Return true if RHS is a suitable operand for a widening multiplication,
2039    assuming a target type of TYPE.
2040    There are two cases:
2041
2042      - RHS makes some value at least twice as wide.  Store that value
2043        in *NEW_RHS_OUT if so, and store its type in *TYPE_OUT.
2044
2045      - RHS is an integer constant.  Store that value in *NEW_RHS_OUT if so,
2046        but leave *TYPE_OUT untouched.  */
2047
2048 static bool
2049 is_widening_mult_rhs_p (tree type, tree rhs, tree *type_out,
2050                         tree *new_rhs_out)
2051 {
2052   gimple stmt;
2053   tree type1, rhs1;
2054
2055   if (TREE_CODE (rhs) == SSA_NAME)
2056     {
2057       stmt = SSA_NAME_DEF_STMT (rhs);
2058       if (is_gimple_assign (stmt))
2059         {
2060           if (! widening_mult_conversion_strippable_p (type, stmt))
2061             rhs1 = rhs;
2062           else
2063             {
2064               rhs1 = gimple_assign_rhs1 (stmt);
2065
2066               if (TREE_CODE (rhs1) == INTEGER_CST)
2067                 {
2068                   *new_rhs_out = rhs1;
2069                   *type_out = NULL;
2070                   return true;
2071                 }
2072             }
2073         }
2074       else
2075         rhs1 = rhs;
2076
2077       type1 = TREE_TYPE (rhs1);
2078
2079       if (TREE_CODE (type1) != TREE_CODE (type)
2080           || TYPE_PRECISION (type1) * 2 > TYPE_PRECISION (type))
2081         return false;
2082
2083       *new_rhs_out = rhs1;
2084       *type_out = type1;
2085       return true;
2086     }
2087
2088   if (TREE_CODE (rhs) == INTEGER_CST)
2089     {
2090       *new_rhs_out = rhs;
2091       *type_out = NULL;
2092       return true;
2093     }
2094
2095   return false;
2096 }
2097
2098 /* Return true if STMT performs a widening multiplication, assuming the
2099    output type is TYPE.  If so, store the unwidened types of the operands
2100    in *TYPE1_OUT and *TYPE2_OUT respectively.  Also fill *RHS1_OUT and
2101    *RHS2_OUT such that converting those operands to types *TYPE1_OUT
2102    and *TYPE2_OUT would give the operands of the multiplication.  */
2103
2104 static bool
2105 is_widening_mult_p (gimple stmt,
2106                     tree *type1_out, tree *rhs1_out,
2107                     tree *type2_out, tree *rhs2_out)
2108 {
2109   tree type = TREE_TYPE (gimple_assign_lhs (stmt));
2110
2111   if (TREE_CODE (type) != INTEGER_TYPE
2112       && TREE_CODE (type) != FIXED_POINT_TYPE)
2113     return false;
2114
2115   if (!is_widening_mult_rhs_p (type, gimple_assign_rhs1 (stmt), type1_out,
2116                                rhs1_out))
2117     return false;
2118
2119   if (!is_widening_mult_rhs_p (type, gimple_assign_rhs2 (stmt), type2_out,
2120                                rhs2_out))
2121     return false;
2122
2123   if (*type1_out == NULL)
2124     {
2125       if (*type2_out == NULL || !int_fits_type_p (*rhs1_out, *type2_out))
2126         return false;
2127       *type1_out = *type2_out;
2128     }
2129
2130   if (*type2_out == NULL)
2131     {
2132       if (!int_fits_type_p (*rhs2_out, *type1_out))
2133         return false;
2134       *type2_out = *type1_out;
2135     }
2136
2137   /* Ensure that the larger of the two operands comes first. */
2138   if (TYPE_PRECISION (*type1_out) < TYPE_PRECISION (*type2_out))
2139     {
2140       tree tmp;
2141       tmp = *type1_out;
2142       *type1_out = *type2_out;
2143       *type2_out = tmp;
2144       tmp = *rhs1_out;
2145       *rhs1_out = *rhs2_out;
2146       *rhs2_out = tmp;
2147     }
2148
2149   return true;
2150 }
2151
2152 /* Process a single gimple statement STMT, which has a MULT_EXPR as
2153    its rhs, and try to convert it into a WIDEN_MULT_EXPR.  The return
2154    value is true iff we converted the statement.  */
2155
2156 static bool
2157 convert_mult_to_widen (gimple stmt, gimple_stmt_iterator *gsi)
2158 {
2159   tree lhs, rhs1, rhs2, type, type1, type2;
2160   enum insn_code handler;
2161   enum machine_mode to_mode, from_mode, actual_mode;
2162   optab op;
2163   int actual_precision;
2164   location_t loc = gimple_location (stmt);
2165   bool from_unsigned1, from_unsigned2;
2166
2167   lhs = gimple_assign_lhs (stmt);
2168   type = TREE_TYPE (lhs);
2169   if (TREE_CODE (type) != INTEGER_TYPE)
2170     return false;
2171
2172   if (!is_widening_mult_p (stmt, &type1, &rhs1, &type2, &rhs2))
2173     return false;
2174
2175   to_mode = TYPE_MODE (type);
2176   from_mode = TYPE_MODE (type1);
2177   from_unsigned1 = TYPE_UNSIGNED (type1);
2178   from_unsigned2 = TYPE_UNSIGNED (type2);
2179
2180   if (from_unsigned1 && from_unsigned2)
2181     op = umul_widen_optab;
2182   else if (!from_unsigned1 && !from_unsigned2)
2183     op = smul_widen_optab;
2184   else
2185     op = usmul_widen_optab;
2186
2187   handler = find_widening_optab_handler_and_mode (op, to_mode, from_mode,
2188                                                   0, &actual_mode);
2189
2190   if (handler == CODE_FOR_nothing)
2191     {
2192       if (op != smul_widen_optab)
2193         {
2194           /* We can use a signed multiply with unsigned types as long as
2195              there is a wider mode to use, or it is the smaller of the two
2196              types that is unsigned.  Note that type1 >= type2, always.  */
2197           if ((TYPE_UNSIGNED (type1)
2198                && TYPE_PRECISION (type1) == GET_MODE_PRECISION (from_mode))
2199               || (TYPE_UNSIGNED (type2)
2200                   && TYPE_PRECISION (type2) == GET_MODE_PRECISION (from_mode)))
2201             {
2202               from_mode = GET_MODE_WIDER_MODE (from_mode);
2203               if (GET_MODE_SIZE (to_mode) <= GET_MODE_SIZE (from_mode))
2204                 return false;
2205             }
2206
2207           op = smul_widen_optab;
2208           handler = find_widening_optab_handler_and_mode (op, to_mode,
2209                                                           from_mode, 0,
2210                                                           &actual_mode);
2211
2212           if (handler == CODE_FOR_nothing)
2213             return false;
2214
2215           from_unsigned1 = from_unsigned2 = false;
2216         }
2217       else
2218         return false;
2219     }
2220
2221   /* Ensure that the inputs to the handler are in the correct precison
2222      for the opcode.  This will be the full mode size.  */
2223   actual_precision = GET_MODE_PRECISION (actual_mode);
2224   if (2 * actual_precision > TYPE_PRECISION (type))
2225     return false;
2226   if (actual_precision != TYPE_PRECISION (type1)
2227       || from_unsigned1 != TYPE_UNSIGNED (type1))
2228     rhs1 = build_and_insert_cast (gsi, loc,
2229                                   build_nonstandard_integer_type
2230                                     (actual_precision, from_unsigned1), rhs1);
2231   if (actual_precision != TYPE_PRECISION (type2)
2232       || from_unsigned2 != TYPE_UNSIGNED (type2))
2233     rhs2 = build_and_insert_cast (gsi, loc,
2234                                   build_nonstandard_integer_type
2235                                     (actual_precision, from_unsigned2), rhs2);
2236
2237   /* Handle constants.  */
2238   if (TREE_CODE (rhs1) == INTEGER_CST)
2239     rhs1 = fold_convert (type1, rhs1);
2240   if (TREE_CODE (rhs2) == INTEGER_CST)
2241     rhs2 = fold_convert (type2, rhs2);
2242
2243   gimple_assign_set_rhs1 (stmt, rhs1);
2244   gimple_assign_set_rhs2 (stmt, rhs2);
2245   gimple_assign_set_rhs_code (stmt, WIDEN_MULT_EXPR);
2246   update_stmt (stmt);
2247   widen_mul_stats.widen_mults_inserted++;
2248   return true;
2249 }
2250
2251 /* Process a single gimple statement STMT, which is found at the
2252    iterator GSI and has a either a PLUS_EXPR or a MINUS_EXPR as its
2253    rhs (given by CODE), and try to convert it into a
2254    WIDEN_MULT_PLUS_EXPR or a WIDEN_MULT_MINUS_EXPR.  The return value
2255    is true iff we converted the statement.  */
2256
2257 static bool
2258 convert_plusminus_to_widen (gimple_stmt_iterator *gsi, gimple stmt,
2259                             enum tree_code code)
2260 {
2261   gimple rhs1_stmt = NULL, rhs2_stmt = NULL;
2262   gimple conv1_stmt = NULL, conv2_stmt = NULL, conv_stmt;
2263   tree type, type1, type2, optype;
2264   tree lhs, rhs1, rhs2, mult_rhs1, mult_rhs2, add_rhs;
2265   enum tree_code rhs1_code = ERROR_MARK, rhs2_code = ERROR_MARK;
2266   optab this_optab;
2267   enum tree_code wmult_code;
2268   enum insn_code handler;
2269   enum machine_mode to_mode, from_mode, actual_mode;
2270   location_t loc = gimple_location (stmt);
2271   int actual_precision;
2272   bool from_unsigned1, from_unsigned2;
2273
2274   lhs = gimple_assign_lhs (stmt);
2275   type = TREE_TYPE (lhs);
2276   if (TREE_CODE (type) != INTEGER_TYPE
2277       && TREE_CODE (type) != FIXED_POINT_TYPE)
2278     return false;
2279
2280   if (code == MINUS_EXPR)
2281     wmult_code = WIDEN_MULT_MINUS_EXPR;
2282   else
2283     wmult_code = WIDEN_MULT_PLUS_EXPR;
2284
2285   rhs1 = gimple_assign_rhs1 (stmt);
2286   rhs2 = gimple_assign_rhs2 (stmt);
2287
2288   if (TREE_CODE (rhs1) == SSA_NAME)
2289     {
2290       rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
2291       if (is_gimple_assign (rhs1_stmt))
2292         rhs1_code = gimple_assign_rhs_code (rhs1_stmt);
2293     }
2294
2295   if (TREE_CODE (rhs2) == SSA_NAME)
2296     {
2297       rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
2298       if (is_gimple_assign (rhs2_stmt))
2299         rhs2_code = gimple_assign_rhs_code (rhs2_stmt);
2300     }
2301
2302   /* Allow for one conversion statement between the multiply
2303      and addition/subtraction statement.  If there are more than
2304      one conversions then we assume they would invalidate this
2305      transformation.  If that's not the case then they should have
2306      been folded before now.  */
2307   if (CONVERT_EXPR_CODE_P (rhs1_code))
2308     {
2309       conv1_stmt = rhs1_stmt;
2310       rhs1 = gimple_assign_rhs1 (rhs1_stmt);
2311       if (TREE_CODE (rhs1) == SSA_NAME)
2312         {
2313           rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
2314           if (is_gimple_assign (rhs1_stmt))
2315             rhs1_code = gimple_assign_rhs_code (rhs1_stmt);
2316         }
2317       else
2318         return false;
2319     }
2320   if (CONVERT_EXPR_CODE_P (rhs2_code))
2321     {
2322       conv2_stmt = rhs2_stmt;
2323       rhs2 = gimple_assign_rhs1 (rhs2_stmt);
2324       if (TREE_CODE (rhs2) == SSA_NAME)
2325         {
2326           rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
2327           if (is_gimple_assign (rhs2_stmt))
2328             rhs2_code = gimple_assign_rhs_code (rhs2_stmt);
2329         }
2330       else
2331         return false;
2332     }
2333
2334   /* If code is WIDEN_MULT_EXPR then it would seem unnecessary to call
2335      is_widening_mult_p, but we still need the rhs returns.
2336
2337      It might also appear that it would be sufficient to use the existing
2338      operands of the widening multiply, but that would limit the choice of
2339      multiply-and-accumulate instructions.  */
2340   if (code == PLUS_EXPR
2341       && (rhs1_code == MULT_EXPR || rhs1_code == WIDEN_MULT_EXPR))
2342     {
2343       if (!is_widening_mult_p (rhs1_stmt, &type1, &mult_rhs1,
2344                                &type2, &mult_rhs2))
2345         return false;
2346       add_rhs = rhs2;
2347       conv_stmt = conv1_stmt;
2348     }
2349   else if (rhs2_code == MULT_EXPR || rhs2_code == WIDEN_MULT_EXPR)
2350     {
2351       if (!is_widening_mult_p (rhs2_stmt, &type1, &mult_rhs1,
2352                                &type2, &mult_rhs2))
2353         return false;
2354       add_rhs = rhs1;
2355       conv_stmt = conv2_stmt;
2356     }
2357   else
2358     return false;
2359
2360   to_mode = TYPE_MODE (type);
2361   from_mode = TYPE_MODE (type1);
2362   from_unsigned1 = TYPE_UNSIGNED (type1);
2363   from_unsigned2 = TYPE_UNSIGNED (type2);
2364   optype = type1;
2365
2366   /* There's no such thing as a mixed sign madd yet, so use a wider mode.  */
2367   if (from_unsigned1 != from_unsigned2)
2368     {
2369       if (!INTEGRAL_TYPE_P (type))
2370         return false;
2371       /* We can use a signed multiply with unsigned types as long as
2372          there is a wider mode to use, or it is the smaller of the two
2373          types that is unsigned.  Note that type1 >= type2, always.  */
2374       if ((from_unsigned1
2375            && TYPE_PRECISION (type1) == GET_MODE_PRECISION (from_mode))
2376           || (from_unsigned2
2377               && TYPE_PRECISION (type2) == GET_MODE_PRECISION (from_mode)))
2378         {
2379           from_mode = GET_MODE_WIDER_MODE (from_mode);
2380           if (GET_MODE_SIZE (from_mode) >= GET_MODE_SIZE (to_mode))
2381             return false;
2382         }
2383
2384       from_unsigned1 = from_unsigned2 = false;
2385       optype = build_nonstandard_integer_type (GET_MODE_PRECISION (from_mode),
2386                                                false);
2387     }
2388
2389   /* If there was a conversion between the multiply and addition
2390      then we need to make sure it fits a multiply-and-accumulate.
2391      The should be a single mode change which does not change the
2392      value.  */
2393   if (conv_stmt)
2394     {
2395       /* We use the original, unmodified data types for this.  */
2396       tree from_type = TREE_TYPE (gimple_assign_rhs1 (conv_stmt));
2397       tree to_type = TREE_TYPE (gimple_assign_lhs (conv_stmt));
2398       int data_size = TYPE_PRECISION (type1) + TYPE_PRECISION (type2);
2399       bool is_unsigned = TYPE_UNSIGNED (type1) && TYPE_UNSIGNED (type2);
2400
2401       if (TYPE_PRECISION (from_type) > TYPE_PRECISION (to_type))
2402         {
2403           /* Conversion is a truncate.  */
2404           if (TYPE_PRECISION (to_type) < data_size)
2405             return false;
2406         }
2407       else if (TYPE_PRECISION (from_type) < TYPE_PRECISION (to_type))
2408         {
2409           /* Conversion is an extend.  Check it's the right sort.  */
2410           if (TYPE_UNSIGNED (from_type) != is_unsigned
2411               && !(is_unsigned && TYPE_PRECISION (from_type) > data_size))
2412             return false;
2413         }
2414       /* else convert is a no-op for our purposes.  */
2415     }
2416
2417   /* Verify that the machine can perform a widening multiply
2418      accumulate in this mode/signedness combination, otherwise
2419      this transformation is likely to pessimize code.  */
2420   this_optab = optab_for_tree_code (wmult_code, optype, optab_default);
2421   handler = find_widening_optab_handler_and_mode (this_optab, to_mode,
2422                                                   from_mode, 0, &actual_mode);
2423
2424   if (handler == CODE_FOR_nothing)
2425     return false;
2426
2427   /* Ensure that the inputs to the handler are in the correct precison
2428      for the opcode.  This will be the full mode size.  */
2429   actual_precision = GET_MODE_PRECISION (actual_mode);
2430   if (actual_precision != TYPE_PRECISION (type1)
2431       || from_unsigned1 != TYPE_UNSIGNED (type1))
2432     mult_rhs1 = build_and_insert_cast (gsi, loc,
2433                                        build_nonstandard_integer_type
2434                                          (actual_precision, from_unsigned1),
2435                                        mult_rhs1);
2436   if (actual_precision != TYPE_PRECISION (type2)
2437       || from_unsigned2 != TYPE_UNSIGNED (type2))
2438     mult_rhs2 = build_and_insert_cast (gsi, loc,
2439                                        build_nonstandard_integer_type
2440                                          (actual_precision, from_unsigned2),
2441                                        mult_rhs2);
2442
2443   if (!useless_type_conversion_p (type, TREE_TYPE (add_rhs)))
2444     add_rhs = build_and_insert_cast (gsi, loc, type, add_rhs);
2445
2446   /* Handle constants.  */
2447   if (TREE_CODE (mult_rhs1) == INTEGER_CST)
2448     mult_rhs1 = fold_convert (type1, mult_rhs1);
2449   if (TREE_CODE (mult_rhs2) == INTEGER_CST)
2450     mult_rhs2 = fold_convert (type2, mult_rhs2);
2451
2452   gimple_assign_set_rhs_with_ops_1 (gsi, wmult_code, mult_rhs1, mult_rhs2,
2453                                     add_rhs);
2454   update_stmt (gsi_stmt (*gsi));
2455   widen_mul_stats.maccs_inserted++;
2456   return true;
2457 }
2458
2459 /* Combine the multiplication at MUL_STMT with operands MULOP1 and MULOP2
2460    with uses in additions and subtractions to form fused multiply-add
2461    operations.  Returns true if successful and MUL_STMT should be removed.  */
2462
2463 static bool
2464 convert_mult_to_fma (gimple mul_stmt, tree op1, tree op2)
2465 {
2466   tree mul_result = gimple_get_lhs (mul_stmt);
2467   tree type = TREE_TYPE (mul_result);
2468   gimple use_stmt, neguse_stmt, fma_stmt;
2469   use_operand_p use_p;
2470   imm_use_iterator imm_iter;
2471
2472   if (FLOAT_TYPE_P (type)
2473       && flag_fp_contract_mode == FP_CONTRACT_OFF)
2474     return false;
2475
2476   /* We don't want to do bitfield reduction ops.  */
2477   if (INTEGRAL_TYPE_P (type)
2478       && (TYPE_PRECISION (type)
2479           != GET_MODE_PRECISION (TYPE_MODE (type))))
2480     return false;
2481
2482   /* If the target doesn't support it, don't generate it.  We assume that
2483      if fma isn't available then fms, fnma or fnms are not either.  */
2484   if (optab_handler (fma_optab, TYPE_MODE (type)) == CODE_FOR_nothing)
2485     return false;
2486
2487   /* If the multiplication has zero uses, it is kept around probably because
2488      of -fnon-call-exceptions.  Don't optimize it away in that case,
2489      it is DCE job.  */
2490   if (has_zero_uses (mul_result))
2491     return false;
2492
2493   /* Make sure that the multiplication statement becomes dead after
2494      the transformation, thus that all uses are transformed to FMAs.
2495      This means we assume that an FMA operation has the same cost
2496      as an addition.  */
2497   FOR_EACH_IMM_USE_FAST (use_p, imm_iter, mul_result)
2498     {
2499       enum tree_code use_code;
2500       tree result = mul_result;
2501       bool negate_p = false;
2502
2503       use_stmt = USE_STMT (use_p);
2504
2505       if (is_gimple_debug (use_stmt))
2506         continue;
2507
2508       /* For now restrict this operations to single basic blocks.  In theory
2509          we would want to support sinking the multiplication in
2510          m = a*b;
2511          if ()
2512            ma = m + c;
2513          else
2514            d = m;
2515          to form a fma in the then block and sink the multiplication to the
2516          else block.  */
2517       if (gimple_bb (use_stmt) != gimple_bb (mul_stmt))
2518         return false;
2519
2520       if (!is_gimple_assign (use_stmt))
2521         return false;
2522
2523       use_code = gimple_assign_rhs_code (use_stmt);
2524
2525       /* A negate on the multiplication leads to FNMA.  */
2526       if (use_code == NEGATE_EXPR)
2527         {
2528           ssa_op_iter iter;
2529           use_operand_p usep;
2530
2531           result = gimple_assign_lhs (use_stmt);
2532
2533           /* Make sure the negate statement becomes dead with this
2534              single transformation.  */
2535           if (!single_imm_use (gimple_assign_lhs (use_stmt),
2536                                &use_p, &neguse_stmt))
2537             return false;
2538
2539           /* Make sure the multiplication isn't also used on that stmt.  */
2540           FOR_EACH_PHI_OR_STMT_USE (usep, neguse_stmt, iter, SSA_OP_USE)
2541             if (USE_FROM_PTR (usep) == mul_result)
2542               return false;
2543
2544           /* Re-validate.  */
2545           use_stmt = neguse_stmt;
2546           if (gimple_bb (use_stmt) != gimple_bb (mul_stmt))
2547             return false;
2548           if (!is_gimple_assign (use_stmt))
2549             return false;
2550
2551           use_code = gimple_assign_rhs_code (use_stmt);
2552           negate_p = true;
2553         }
2554
2555       switch (use_code)
2556         {
2557         case MINUS_EXPR:
2558           if (gimple_assign_rhs2 (use_stmt) == result)
2559             negate_p = !negate_p;
2560           break;
2561         case PLUS_EXPR:
2562           break;
2563         default:
2564           /* FMA can only be formed from PLUS and MINUS.  */
2565           return false;
2566         }
2567
2568       /* We can't handle a * b + a * b.  */
2569       if (gimple_assign_rhs1 (use_stmt) == gimple_assign_rhs2 (use_stmt))
2570         return false;
2571
2572       /* While it is possible to validate whether or not the exact form
2573          that we've recognized is available in the backend, the assumption
2574          is that the transformation is never a loss.  For instance, suppose
2575          the target only has the plain FMA pattern available.  Consider
2576          a*b-c -> fma(a,b,-c): we've exchanged MUL+SUB for FMA+NEG, which
2577          is still two operations.  Consider -(a*b)-c -> fma(-a,b,-c): we
2578          still have 3 operations, but in the FMA form the two NEGs are
2579          independent and could be run in parallel.  */
2580     }
2581
2582   FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, mul_result)
2583     {
2584       gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
2585       enum tree_code use_code;
2586       tree addop, mulop1 = op1, result = mul_result;
2587       bool negate_p = false;
2588
2589       if (is_gimple_debug (use_stmt))
2590         continue;
2591
2592       use_code = gimple_assign_rhs_code (use_stmt);
2593       if (use_code == NEGATE_EXPR)
2594         {
2595           result = gimple_assign_lhs (use_stmt);
2596           single_imm_use (gimple_assign_lhs (use_stmt), &use_p, &neguse_stmt);
2597           gsi_remove (&gsi, true);
2598           release_defs (use_stmt);
2599
2600           use_stmt = neguse_stmt;
2601           gsi = gsi_for_stmt (use_stmt);
2602           use_code = gimple_assign_rhs_code (use_stmt);
2603           negate_p = true;
2604         }
2605
2606       if (gimple_assign_rhs1 (use_stmt) == result)
2607         {
2608           addop = gimple_assign_rhs2 (use_stmt);
2609           /* a * b - c -> a * b + (-c)  */
2610           if (gimple_assign_rhs_code (use_stmt) == MINUS_EXPR)
2611             addop = force_gimple_operand_gsi (&gsi,
2612                                               build1 (NEGATE_EXPR,
2613                                                       type, addop),
2614                                               true, NULL_TREE, true,
2615                                               GSI_SAME_STMT);
2616         }
2617       else
2618         {
2619           addop = gimple_assign_rhs1 (use_stmt);
2620           /* a - b * c -> (-b) * c + a */
2621           if (gimple_assign_rhs_code (use_stmt) == MINUS_EXPR)
2622             negate_p = !negate_p;
2623         }
2624
2625       if (negate_p)
2626         mulop1 = force_gimple_operand_gsi (&gsi,
2627                                            build1 (NEGATE_EXPR,
2628                                                    type, mulop1),
2629                                            true, NULL_TREE, true,
2630                                            GSI_SAME_STMT);
2631
2632       fma_stmt = gimple_build_assign_with_ops (FMA_EXPR,
2633                                                gimple_assign_lhs (use_stmt),
2634                                                mulop1, op2,
2635                                                addop);
2636       gsi_replace (&gsi, fma_stmt, true);
2637       widen_mul_stats.fmas_inserted++;
2638     }
2639
2640   return true;
2641 }
2642
2643 /* Find integer multiplications where the operands are extended from
2644    smaller types, and replace the MULT_EXPR with a WIDEN_MULT_EXPR
2645    where appropriate.  */
2646
2647 static unsigned int
2648 execute_optimize_widening_mul (void)
2649 {
2650   basic_block bb;
2651   bool cfg_changed = false;
2652
2653   memset (&widen_mul_stats, 0, sizeof (widen_mul_stats));
2654
2655   FOR_EACH_BB (bb)
2656     {
2657       gimple_stmt_iterator gsi;
2658
2659       for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi);)
2660         {
2661           gimple stmt = gsi_stmt (gsi);
2662           enum tree_code code;
2663
2664           if (is_gimple_assign (stmt))
2665             {
2666               code = gimple_assign_rhs_code (stmt);
2667               switch (code)
2668                 {
2669                 case MULT_EXPR:
2670                   if (!convert_mult_to_widen (stmt, &gsi)
2671                       && convert_mult_to_fma (stmt,
2672                                               gimple_assign_rhs1 (stmt),
2673                                               gimple_assign_rhs2 (stmt)))
2674                     {
2675                       gsi_remove (&gsi, true);
2676                       release_defs (stmt);
2677                       continue;
2678                     }
2679                   break;
2680
2681                 case PLUS_EXPR:
2682                 case MINUS_EXPR:
2683                   convert_plusminus_to_widen (&gsi, stmt, code);
2684                   break;
2685
2686                 default:;
2687                 }
2688             }
2689           else if (is_gimple_call (stmt)
2690                    && gimple_call_lhs (stmt))
2691             {
2692               tree fndecl = gimple_call_fndecl (stmt);
2693               if (fndecl
2694                   && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
2695                 {
2696                   switch (DECL_FUNCTION_CODE (fndecl))
2697                     {
2698                       case BUILT_IN_POWF:
2699                       case BUILT_IN_POW:
2700                       case BUILT_IN_POWL:
2701                         if (TREE_CODE (gimple_call_arg (stmt, 1)) == REAL_CST
2702                             && REAL_VALUES_EQUAL
2703                                  (TREE_REAL_CST (gimple_call_arg (stmt, 1)),
2704                                   dconst2)
2705                             && convert_mult_to_fma (stmt,
2706                                                     gimple_call_arg (stmt, 0),
2707                                                     gimple_call_arg (stmt, 0)))
2708                           {
2709                             unlink_stmt_vdef (stmt);
2710                             if (gsi_remove (&gsi, true)
2711                                 && gimple_purge_dead_eh_edges (bb))
2712                               cfg_changed = true;
2713                             release_defs (stmt);
2714                             continue;
2715                           }
2716                           break;
2717
2718                       default:;
2719                     }
2720                 }
2721             }
2722           gsi_next (&gsi);
2723         }
2724     }
2725
2726   statistics_counter_event (cfun, "widening multiplications inserted",
2727                             widen_mul_stats.widen_mults_inserted);
2728   statistics_counter_event (cfun, "widening maccs inserted",
2729                             widen_mul_stats.maccs_inserted);
2730   statistics_counter_event (cfun, "fused multiply-adds inserted",
2731                             widen_mul_stats.fmas_inserted);
2732
2733   return cfg_changed ? TODO_cleanup_cfg : 0;
2734 }
2735
2736 static bool
2737 gate_optimize_widening_mul (void)
2738 {
2739   return flag_expensive_optimizations && optimize;
2740 }
2741
2742 struct gimple_opt_pass pass_optimize_widening_mul =
2743 {
2744  {
2745   GIMPLE_PASS,
2746   "widening_mul",                       /* name */
2747   OPTGROUP_NONE,                        /* optinfo_flags */
2748   gate_optimize_widening_mul,           /* gate */
2749   execute_optimize_widening_mul,        /* execute */
2750   NULL,                                 /* sub */
2751   NULL,                                 /* next */
2752   0,                                    /* static_pass_number */
2753   TV_NONE,                              /* tv_id */
2754   PROP_ssa,                             /* properties_required */
2755   0,                                    /* properties_provided */
2756   0,                                    /* properties_destroyed */
2757   0,                                    /* todo_flags_start */
2758   TODO_verify_ssa
2759   | TODO_verify_stmts
2760   | TODO_update_ssa                     /* todo_flags_finish */
2761  }
2762 };