gcc/tree-ssa-math-opts.c

   1 /* Global, SSA-based optimizations using mathematical identities.
   2    Copyright (C) 2005-2015 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 3, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 /* Currently, the only mini-pass in this file tries to CSE reciprocal
  21    operations.  These are common in sequences such as this one:
  22
  23         modulus = sqrt(x*x + y*y + z*z);
  24         x = x / modulus;
  25         y = y / modulus;
  26         z = z / modulus;
  27
  28    that can be optimized to
  29
  30         modulus = sqrt(x*x + y*y + z*z);
  31         rmodulus = 1.0 / modulus;
  32         x = x * rmodulus;
  33         y = y * rmodulus;
  34         z = z * rmodulus;
  35
  36    We do this for loop invariant divisors, and with this pass whenever
  37    we notice that a division has the same divisor multiple times.
  38
  39    Of course, like in PRE, we don't insert a division if a dominator
  40    already has one.  However, this cannot be done as an extension of
  41    PRE for several reasons.
  42
  43    First of all, with some experiments it was found out that the
  44    transformation is not always useful if there are only two divisions
  45    hy the same divisor.  This is probably because modern processors
  46    can pipeline the divisions; on older, in-order processors it should
  47    still be effective to optimize two divisions by the same number.
  48    We make this a param, and it shall be called N in the remainder of
  49    this comment.
  50
  51    Second, if trapping math is active, we have less freedom on where
  52    to insert divisions: we can only do so in basic blocks that already
  53    contain one.  (If divisions don't trap, instead, we can insert
  54    divisions elsewhere, which will be in blocks that are common dominators
  55    of those that have the division).
  56
  57    We really don't want to compute the reciprocal unless a division will
  58    be found.  To do this, we won't insert the division in a basic block
  59    that has less than N divisions *post-dominating* it.
  60
  61    The algorithm constructs a subset of the dominator tree, holding the
  62    blocks containing the divisions and the common dominators to them,
  63    and walk it twice.  The first walk is in post-order, and it annotates
  64    each block with the number of divisions that post-dominate it: this
  65    gives information on where divisions can be inserted profitably.
  66    The second walk is in pre-order, and it inserts divisions as explained
  67    above, and replaces divisions by multiplications.
  68
  69    In the best case, the cost of the pass is O(n_statements).  In the
  70    worst-case, the cost is due to creating the dominator tree subset,
  71    with a cost of O(n_basic_blocks ^ 2); however this can only happen
  72    for n_statements / n_basic_blocks statements.  So, the amortized cost
  73    of creating the dominator tree subset is O(n_basic_blocks) and the
  74    worst-case cost of the pass is O(n_statements * n_basic_blocks).
  75
  76    More practically, the cost will be small because there are few
  77    divisions, and they tend to be in the same basic block, so insert_bb
  78    is called very few times.
  79
  80    If we did this using domwalk.c, an efficient implementation would have
  81    to work on all the variables in a single pass, because we could not
  82    work on just a subset of the dominator tree, as we do now, and the
  83    cost would also be something like O(n_statements * n_basic_blocks).
  84    The data structures would be more complex in order to work on all the
  85    variables in a single pass.  */
  86
  87 #include "config.h"
  88 #include "system.h"
  89 #include "coretypes.h"
  90 #include "tm.h"
  91 #include "flags.h"
  92 #include "hash-set.h"
  93 #include "vec.h"
  94 #include "input.h"
  95 #include "alias.h"
  96 #include "symtab.h"
  97 #include "inchash.h"
  98 #include "tree.h"
  99 #include "fold-const.h"
 100 #include "predict.h"
 101 #include "hard-reg-set.h"
 102 #include "function.h"
 103 #include "dominance.h"
 104 #include "cfg.h"
 105 #include "basic-block.h"
 106 #include "tree-ssa-alias.h"
 107 #include "internal-fn.h"
 108 #include "gimple-fold.h"
 109 #include "gimple-expr.h"
 110 #include "is-a.h"
 111 #include "gimple.h"
 112 #include "gimple-iterator.h"
 113 #include "gimplify.h"
 114 #include "gimplify-me.h"
 115 #include "stor-layout.h"
 116 #include "gimple-ssa.h"
 117 #include "tree-cfg.h"
 118 #include "tree-phinodes.h"
 119 #include "ssa-iterators.h"
 120 #include "stringpool.h"
 121 #include "tree-ssanames.h"
 122 #include "hashtab.h"
 123 #include "rtl.h"
 124 #include "statistics.h"
 125 #include "insn-config.h"
 126 #include "expmed.h"
 127 #include "dojump.h"
 128 #include "explow.h"
 129 #include "calls.h"
 130 #include "emit-rtl.h"
 131 #include "varasm.h"
 132 #include "stmt.h"
 133 #include "expr.h"
 134 #include "tree-dfa.h"
 135 #include "tree-ssa.h"
 136 #include "tree-pass.h"
 137 #include "alloc-pool.h"
 138 #include "target.h"
 139 #include "gimple-pretty-print.h"
 140 #include "builtins.h"
 141 #include "params.h"
 142
 143 /* FIXME: RTL headers have to be included here for optabs.  */
 144 #include "rtl.h"                /* Because optabs.h wants enum rtx_code.  */
 145 #include "expr.h"               /* Because optabs.h wants sepops.  */
 146 #include "insn-codes.h"
 147 #include "optabs.h"
 148
 149 /* This structure represents one basic block that either computes a
 150    division, or is a common dominator for basic block that compute a
 151    division.  */
 152 struct occurrence {
 153   /* The basic block represented by this structure.  */
 154   basic_block bb;
 155
 156   /* If non-NULL, the SSA_NAME holding the definition for a reciprocal
 157      inserted in BB.  */
 158   tree recip_def;
 159
 160   /* If non-NULL, the GIMPLE_ASSIGN for a reciprocal computation that
 161      was inserted in BB.  */
 162   gimple recip_def_stmt;
 163
 164   /* Pointer to a list of "struct occurrence"s for blocks dominated
 165      by BB.  */
 166   struct occurrence *children;
 167
 168   /* Pointer to the next "struct occurrence"s in the list of blocks
 169      sharing a common dominator.  */
 170   struct occurrence *next;
 171
 172   /* The number of divisions that are in BB before compute_merit.  The
 173      number of divisions that are in BB or post-dominate it after
 174      compute_merit.  */
 175   int num_divisions;
 176
 177   /* True if the basic block has a division, false if it is a common
 178      dominator for basic blocks that do.  If it is false and trapping
 179      math is active, BB is not a candidate for inserting a reciprocal.  */
 180   bool bb_has_division;
 181 };
 182
 183 static struct
 184 {
 185   /* Number of 1.0/X ops inserted.  */
 186   int rdivs_inserted;
 187
 188   /* Number of 1.0/FUNC ops inserted.  */
 189   int rfuncs_inserted;
 190 } reciprocal_stats;
 191
 192 static struct
 193 {
 194   /* Number of cexpi calls inserted.  */
 195   int inserted;
 196 } sincos_stats;
 197
 198 static struct
 199 {
 200   /* Number of hand-written 16-bit nop / bswaps found.  */
 201   int found_16bit;
 202
 203   /* Number of hand-written 32-bit nop / bswaps found.  */
 204   int found_32bit;
 205
 206   /* Number of hand-written 64-bit nop / bswaps found.  */
 207   int found_64bit;
 208 } nop_stats, bswap_stats;
 209
 210 static struct
 211 {
 212   /* Number of widening multiplication ops inserted.  */
 213   int widen_mults_inserted;
 214
 215   /* Number of integer multiply-and-accumulate ops inserted.  */
 216   int maccs_inserted;
 217
 218   /* Number of fp fused multiply-add ops inserted.  */
 219   int fmas_inserted;
 220 } widen_mul_stats;
 221
 222 /* The instance of "struct occurrence" representing the highest
 223    interesting block in the dominator tree.  */
 224 static struct occurrence *occ_head;
 225
 226 /* Allocation pool for getting instances of "struct occurrence".  */
 227 static pool_allocator<occurrence> *occ_pool;
 228
 229
 230
 231 /* Allocate and return a new struct occurrence for basic block BB, and
 232    whose children list is headed by CHILDREN.  */
 233 static struct occurrence *
 234 occ_new (basic_block bb, struct occurrence *children)
 235 {
 236   struct occurrence *occ;
 237
 238   bb->aux = occ = occ_pool->allocate ();
 239   memset (occ, 0, sizeof (struct occurrence));
 240
 241   occ->bb = bb;
 242   occ->children = children;
 243   return occ;
 244 }
 245
 246
 247 /* Insert NEW_OCC into our subset of the dominator tree.  P_HEAD points to a
 248    list of "struct occurrence"s, one per basic block, having IDOM as
 249    their common dominator.
 250
 251    We try to insert NEW_OCC as deep as possible in the tree, and we also
 252    insert any other block that is a common dominator for BB and one
 253    block already in the tree.  */
 254
 255 static void
 256 insert_bb (struct occurrence *new_occ, basic_block idom,
 257            struct occurrence **p_head)
 258 {
 259   struct occurrence *occ, **p_occ;
 260
 261   for (p_occ = p_head; (occ = *p_occ) != NULL; )
 262     {
 263       basic_block bb = new_occ->bb, occ_bb = occ->bb;
 264       basic_block dom = nearest_common_dominator (CDI_DOMINATORS, occ_bb, bb);
 265       if (dom == bb)
 266         {
 267           /* BB dominates OCC_BB.  OCC becomes NEW_OCC's child: remove OCC
 268              from its list.  */
 269           *p_occ = occ->next;
 270           occ->next = new_occ->children;
 271           new_occ->children = occ;
 272
 273           /* Try the next block (it may as well be dominated by BB).  */
 274         }
 275
 276       else if (dom == occ_bb)
 277         {
 278           /* OCC_BB dominates BB.  Tail recurse to look deeper.  */
 279           insert_bb (new_occ, dom, &occ->children);
 280           return;
 281         }
 282
 283       else if (dom != idom)
 284         {
 285           gcc_assert (!dom->aux);
 286
 287           /* There is a dominator between IDOM and BB, add it and make
 288              two children out of NEW_OCC and OCC.  First, remove OCC from
 289              its list.  */
 290           *p_occ = occ->next;
 291           new_occ->next = occ;
 292           occ->next = NULL;
 293
 294           /* None of the previous blocks has DOM as a dominator: if we tail
 295              recursed, we would reexamine them uselessly. Just switch BB with
 296              DOM, and go on looking for blocks dominated by DOM.  */
 297           new_occ = occ_new (dom, new_occ);
 298         }
 299
 300       else
 301         {
 302           /* Nothing special, go on with the next element.  */
 303           p_occ = &occ->next;
 304         }
 305     }
 306
 307   /* No place was found as a child of IDOM.  Make BB a sibling of IDOM.  */
 308   new_occ->next = *p_head;
 309   *p_head = new_occ;
 310 }
 311
 312 /* Register that we found a division in BB.  */
 313
 314 static inline void
 315 register_division_in (basic_block bb)
 316 {
 317   struct occurrence *occ;
 318
 319   occ = (struct occurrence *) bb->aux;
 320   if (!occ)
 321     {
 322       occ = occ_new (bb, NULL);
 323       insert_bb (occ, ENTRY_BLOCK_PTR_FOR_FN (cfun), &occ_head);
 324     }
 325
 326   occ->bb_has_division = true;
 327   occ->num_divisions++;
 328 }
 329
 330
 331 /* Compute the number of divisions that postdominate each block in OCC and
 332    its children.  */
 333
 334 static void
 335 compute_merit (struct occurrence *occ)
 336 {
 337   struct occurrence *occ_child;
 338   basic_block dom = occ->bb;
 339
 340   for (occ_child = occ->children; occ_child; occ_child = occ_child->next)
 341     {
 342       basic_block bb;
 343       if (occ_child->children)
 344         compute_merit (occ_child);
 345
 346       if (flag_exceptions)
 347         bb = single_noncomplex_succ (dom);
 348       else
 349         bb = dom;
 350
 351       if (dominated_by_p (CDI_POST_DOMINATORS, bb, occ_child->bb))
 352         occ->num_divisions += occ_child->num_divisions;
 353     }
 354 }
 355
 356
 357 /* Return whether USE_STMT is a floating-point division by DEF.  */
 358 static inline bool
 359 is_division_by (gimple use_stmt, tree def)
 360 {
 361   return is_gimple_assign (use_stmt)
 362          && gimple_assign_rhs_code (use_stmt) == RDIV_EXPR
 363          && gimple_assign_rhs2 (use_stmt) == def
 364          /* Do not recognize x / x as valid division, as we are getting
 365             confused later by replacing all immediate uses x in such
 366             a stmt.  */
 367          && gimple_assign_rhs1 (use_stmt) != def;
 368 }
 369
 370 /* Walk the subset of the dominator tree rooted at OCC, setting the
 371    RECIP_DEF field to a definition of 1.0 / DEF that can be used in
 372    the given basic block.  The field may be left NULL, of course,
 373    if it is not possible or profitable to do the optimization.
 374
 375    DEF_BSI is an iterator pointing at the statement defining DEF.
 376    If RECIP_DEF is set, a dominator already has a computation that can
 377    be used.  */
 378
 379 static void
 380 insert_reciprocals (gimple_stmt_iterator *def_gsi, struct occurrence *occ,
 381                     tree def, tree recip_def, int threshold)
 382 {
 383   tree type;
 384   gassign *new_stmt;
 385   gimple_stmt_iterator gsi;
 386   struct occurrence *occ_child;
 387
 388   if (!recip_def
 389       && (occ->bb_has_division || !flag_trapping_math)
 390       && occ->num_divisions >= threshold)
 391     {
 392       /* Make a variable with the replacement and substitute it.  */
 393       type = TREE_TYPE (def);
 394       recip_def = create_tmp_reg (type, "reciptmp");
 395       new_stmt = gimple_build_assign (recip_def, RDIV_EXPR,
 396                                       build_one_cst (type), def);
 397
 398       if (occ->bb_has_division)
 399         {
 400           /* Case 1: insert before an existing division.  */
 401           gsi = gsi_after_labels (occ->bb);
 402           while (!gsi_end_p (gsi) && !is_division_by (gsi_stmt (gsi), def))
 403             gsi_next (&gsi);
 404
 405           gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
 406         }
 407       else if (def_gsi && occ->bb == def_gsi->bb)
 408         {
 409           /* Case 2: insert right after the definition.  Note that this will
 410              never happen if the definition statement can throw, because in
 411              that case the sole successor of the statement's basic block will
 412              dominate all the uses as well.  */
 413           gsi_insert_after (def_gsi, new_stmt, GSI_NEW_STMT);
 414         }
 415       else
 416         {
 417           /* Case 3: insert in a basic block not containing defs/uses.  */
 418           gsi = gsi_after_labels (occ->bb);
 419           gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
 420         }
 421
 422       reciprocal_stats.rdivs_inserted++;
 423
 424       occ->recip_def_stmt = new_stmt;
 425     }
 426
 427   occ->recip_def = recip_def;
 428   for (occ_child = occ->children; occ_child; occ_child = occ_child->next)
 429     insert_reciprocals (def_gsi, occ_child, def, recip_def, threshold);
 430 }
 431
 432
 433 /* Replace the division at USE_P with a multiplication by the reciprocal, if
 434    possible.  */
 435
 436 static inline void
 437 replace_reciprocal (use_operand_p use_p)
 438 {
 439   gimple use_stmt = USE_STMT (use_p);
 440   basic_block bb = gimple_bb (use_stmt);
 441   struct occurrence *occ = (struct occurrence *) bb->aux;
 442
 443   if (optimize_bb_for_speed_p (bb)
 444       && occ->recip_def && use_stmt != occ->recip_def_stmt)
 445     {
 446       gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
 447       gimple_assign_set_rhs_code (use_stmt, MULT_EXPR);
 448       SET_USE (use_p, occ->recip_def);
 449       fold_stmt_inplace (&gsi);
 450       update_stmt (use_stmt);
 451     }
 452 }
 453
 454
 455 /* Free OCC and return one more "struct occurrence" to be freed.  */
 456
 457 static struct occurrence *
 458 free_bb (struct occurrence *occ)
 459 {
 460   struct occurrence *child, *next;
 461
 462   /* First get the two pointers hanging off OCC.  */
 463   next = occ->next;
 464   child = occ->children;
 465   occ->bb->aux = NULL;
 466   occ_pool->remove (occ);
 467
 468   /* Now ensure that we don't recurse unless it is necessary.  */
 469   if (!child)
 470     return next;
 471   else
 472     {
 473       while (next)
 474         next = free_bb (next);
 475
 476       return child;
 477     }
 478 }
 479
 480
 481 /* Look for floating-point divisions among DEF's uses, and try to
 482    replace them by multiplications with the reciprocal.  Add
 483    as many statements computing the reciprocal as needed.
 484
 485    DEF must be a GIMPLE register of a floating-point type.  */
 486
 487 static void
 488 execute_cse_reciprocals_1 (gimple_stmt_iterator *def_gsi, tree def)
 489 {
 490   use_operand_p use_p;
 491   imm_use_iterator use_iter;
 492   struct occurrence *occ;
 493   int count = 0, threshold;
 494
 495   gcc_assert (FLOAT_TYPE_P (TREE_TYPE (def)) && is_gimple_reg (def));
 496
 497   FOR_EACH_IMM_USE_FAST (use_p, use_iter, def)
 498     {
 499       gimple use_stmt = USE_STMT (use_p);
 500       if (is_division_by (use_stmt, def))
 501         {
 502           register_division_in (gimple_bb (use_stmt));
 503           count++;
 504         }
 505     }
 506
 507   /* Do the expensive part only if we can hope to optimize something.  */
 508   threshold = targetm.min_divisions_for_recip_mul (TYPE_MODE (TREE_TYPE (def)));
 509   if (count >= threshold)
 510     {
 511       gimple use_stmt;
 512       for (occ = occ_head; occ; occ = occ->next)
 513         {
 514           compute_merit (occ);
 515           insert_reciprocals (def_gsi, occ, def, NULL, threshold);
 516         }
 517
 518       FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, def)
 519         {
 520           if (is_division_by (use_stmt, def))
 521             {
 522               FOR_EACH_IMM_USE_ON_STMT (use_p, use_iter)
 523                 replace_reciprocal (use_p);
 524             }
 525         }
 526     }
 527
 528   for (occ = occ_head; occ; )
 529     occ = free_bb (occ);
 530
 531   occ_head = NULL;
 532 }
 533
 534 /* Go through all the floating-point SSA_NAMEs, and call
 535    execute_cse_reciprocals_1 on each of them.  */
 536 namespace {
 537
 538 const pass_data pass_data_cse_reciprocals =
 539 {
 540   GIMPLE_PASS, /* type */
 541   "recip", /* name */
 542   OPTGROUP_NONE, /* optinfo_flags */
 543   TV_NONE, /* tv_id */
 544   PROP_ssa, /* properties_required */
 545   0, /* properties_provided */
 546   0, /* properties_destroyed */
 547   0, /* todo_flags_start */
 548   TODO_update_ssa, /* todo_flags_finish */
 549 };
 550
 551 class pass_cse_reciprocals : public gimple_opt_pass
 552 {
 553 public:
 554   pass_cse_reciprocals (gcc::context *ctxt)
 555     : gimple_opt_pass (pass_data_cse_reciprocals, ctxt)
 556   {}
 557
 558   /* opt_pass methods: */
 559   virtual bool gate (function *) { return optimize && flag_reciprocal_math; }
 560   virtual unsigned int execute (function *);
 561
 562 }; // class pass_cse_reciprocals
 563
 564 unsigned int
 565 pass_cse_reciprocals::execute (function *fun)
 566 {
 567   basic_block bb;
 568   tree arg;
 569
 570   occ_pool = new pool_allocator<occurrence>
 571     ("dominators for recip", n_basic_blocks_for_fn (fun) / 3 + 1);
 572
 573   memset (&reciprocal_stats, 0, sizeof (reciprocal_stats));
 574   calculate_dominance_info (CDI_DOMINATORS);
 575   calculate_dominance_info (CDI_POST_DOMINATORS);
 576
 577 #ifdef ENABLE_CHECKING
 578   FOR_EACH_BB_FN (bb, fun)
 579     gcc_assert (!bb->aux);
 580 #endif
 581
 582   for (arg = DECL_ARGUMENTS (fun->decl); arg; arg = DECL_CHAIN (arg))
 583     if (FLOAT_TYPE_P (TREE_TYPE (arg))
 584         && is_gimple_reg (arg))
 585       {
 586         tree name = ssa_default_def (fun, arg);
 587         if (name)
 588           execute_cse_reciprocals_1 (NULL, name);
 589       }
 590
 591   FOR_EACH_BB_FN (bb, fun)
 592     {
 593       tree def;
 594
 595       for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
 596            gsi_next (&gsi))
 597         {
 598           gphi *phi = gsi.phi ();
 599           def = PHI_RESULT (phi);
 600           if (! virtual_operand_p (def)
 601               && FLOAT_TYPE_P (TREE_TYPE (def)))
 602             execute_cse_reciprocals_1 (NULL, def);
 603         }
 604
 605       for (gimple_stmt_iterator gsi = gsi_after_labels (bb); !gsi_end_p (gsi);
 606            gsi_next (&gsi))
 607         {
 608           gimple stmt = gsi_stmt (gsi);
 609
 610           if (gimple_has_lhs (stmt)
 611               && (def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF)) != NULL
 612               && FLOAT_TYPE_P (TREE_TYPE (def))
 613               && TREE_CODE (def) == SSA_NAME)
 614             execute_cse_reciprocals_1 (&gsi, def);
 615         }
 616
 617       if (optimize_bb_for_size_p (bb))
 618         continue;
 619
 620       /* Scan for a/func(b) and convert it to reciprocal a*rfunc(b).  */
 621       for (gimple_stmt_iterator gsi = gsi_after_labels (bb); !gsi_end_p (gsi);
 622            gsi_next (&gsi))
 623         {
 624           gimple stmt = gsi_stmt (gsi);
 625           tree fndecl;
 626
 627           if (is_gimple_assign (stmt)
 628               && gimple_assign_rhs_code (stmt) == RDIV_EXPR)
 629             {
 630               tree arg1 = gimple_assign_rhs2 (stmt);
 631               gimple stmt1;
 632
 633               if (TREE_CODE (arg1) != SSA_NAME)
 634                 continue;
 635
 636               stmt1 = SSA_NAME_DEF_STMT (arg1);
 637
 638               if (is_gimple_call (stmt1)
 639                   && gimple_call_lhs (stmt1)
 640                   && (fndecl = gimple_call_fndecl (stmt1))
 641                   && (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL
 642                       || DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD))
 643                 {
 644                   enum built_in_function code;
 645                   bool md_code, fail;
 646                   imm_use_iterator ui;
 647                   use_operand_p use_p;
 648
 649                   code = DECL_FUNCTION_CODE (fndecl);
 650                   md_code = DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD;
 651
 652                   fndecl = targetm.builtin_reciprocal (code, md_code, false);
 653                   if (!fndecl)
 654                     continue;
 655
 656                   /* Check that all uses of the SSA name are divisions,
 657                      otherwise replacing the defining statement will do
 658                      the wrong thing.  */
 659                   fail = false;
 660                   FOR_EACH_IMM_USE_FAST (use_p, ui, arg1)
 661                     {
 662                       gimple stmt2 = USE_STMT (use_p);
 663                       if (is_gimple_debug (stmt2))
 664                         continue;
 665                       if (!is_gimple_assign (stmt2)
 666                           || gimple_assign_rhs_code (stmt2) != RDIV_EXPR
 667                           || gimple_assign_rhs1 (stmt2) == arg1
 668                           || gimple_assign_rhs2 (stmt2) != arg1)
 669                         {
 670                           fail = true;
 671                           break;
 672                         }
 673                     }
 674                   if (fail)
 675                     continue;
 676
 677                   gimple_replace_ssa_lhs (stmt1, arg1);
 678                   gimple_call_set_fndecl (stmt1, fndecl);
 679                   update_stmt (stmt1);
 680                   reciprocal_stats.rfuncs_inserted++;
 681
 682                   FOR_EACH_IMM_USE_STMT (stmt, ui, arg1)
 683                     {
 684                       gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
 685                       gimple_assign_set_rhs_code (stmt, MULT_EXPR);
 686                       fold_stmt_inplace (&gsi);
 687                       update_stmt (stmt);
 688                     }
 689                 }
 690             }
 691         }
 692     }
 693
 694   statistics_counter_event (fun, "reciprocal divs inserted",
 695                             reciprocal_stats.rdivs_inserted);
 696   statistics_counter_event (fun, "reciprocal functions inserted",
 697                             reciprocal_stats.rfuncs_inserted);
 698
 699   free_dominance_info (CDI_DOMINATORS);
 700   free_dominance_info (CDI_POST_DOMINATORS);
 701   delete occ_pool;
 702   return 0;
 703 }
 704
 705 } // anon namespace
 706
 707 gimple_opt_pass *
 708 make_pass_cse_reciprocals (gcc::context *ctxt)
 709 {
 710   return new pass_cse_reciprocals (ctxt);
 711 }
 712
 713 /* Records an occurrence at statement USE_STMT in the vector of trees
 714    STMTS if it is dominated by *TOP_BB or dominates it or this basic block
 715    is not yet initialized.  Returns true if the occurrence was pushed on
 716    the vector.  Adjusts *TOP_BB to be the basic block dominating all
 717    statements in the vector.  */
 718
 719 static bool
 720 maybe_record_sincos (vec<gimple> *stmts,
 721                      basic_block *top_bb, gimple use_stmt)
 722 {
 723   basic_block use_bb = gimple_bb (use_stmt);
 724   if (*top_bb
 725       && (*top_bb == use_bb
 726           || dominated_by_p (CDI_DOMINATORS, use_bb, *top_bb)))
 727     stmts->safe_push (use_stmt);
 728   else if (!*top_bb
 729            || dominated_by_p (CDI_DOMINATORS, *top_bb, use_bb))
 730     {
 731       stmts->safe_push (use_stmt);
 732       *top_bb = use_bb;
 733     }
 734   else
 735     return false;
 736
 737   return true;
 738 }
 739
 740 /* Look for sin, cos and cexpi calls with the same argument NAME and
 741    create a single call to cexpi CSEing the result in this case.
 742    We first walk over all immediate uses of the argument collecting
 743    statements that we can CSE in a vector and in a second pass replace
 744    the statement rhs with a REALPART or IMAGPART expression on the
 745    result of the cexpi call we insert before the use statement that
 746    dominates all other candidates.  */
 747
 748 static bool
 749 execute_cse_sincos_1 (tree name)
 750 {
 751   gimple_stmt_iterator gsi;
 752   imm_use_iterator use_iter;
 753   tree fndecl, res, type;
 754   gimple def_stmt, use_stmt, stmt;
 755   int seen_cos = 0, seen_sin = 0, seen_cexpi = 0;
 756   auto_vec<gimple> stmts;
 757   basic_block top_bb = NULL;
 758   int i;
 759   bool cfg_changed = false;
 760
 761   type = TREE_TYPE (name);
 762   FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, name)
 763     {
 764       if (gimple_code (use_stmt) != GIMPLE_CALL
 765           || !gimple_call_lhs (use_stmt)
 766           || !(fndecl = gimple_call_fndecl (use_stmt))
 767           || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
 768         continue;
 769
 770       switch (DECL_FUNCTION_CODE (fndecl))
 771         {
 772         CASE_FLT_FN (BUILT_IN_COS):
 773           seen_cos |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
 774           break;
 775
 776         CASE_FLT_FN (BUILT_IN_SIN):
 777           seen_sin |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
 778           break;
 779
 780         CASE_FLT_FN (BUILT_IN_CEXPI):
 781           seen_cexpi |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
 782           break;
 783
 784         default:;
 785         }
 786     }
 787
 788   if (seen_cos + seen_sin + seen_cexpi <= 1)
 789     return false;
 790
 791   /* Simply insert cexpi at the beginning of top_bb but not earlier than
 792      the name def statement.  */
 793   fndecl = mathfn_built_in (type, BUILT_IN_CEXPI);
 794   if (!fndecl)
 795     return false;
 796   stmt = gimple_build_call (fndecl, 1, name);
 797   res = make_temp_ssa_name (TREE_TYPE (TREE_TYPE (fndecl)), stmt, "sincostmp");
 798   gimple_call_set_lhs (stmt, res);
 799
 800   def_stmt = SSA_NAME_DEF_STMT (name);
 801   if (!SSA_NAME_IS_DEFAULT_DEF (name)
 802       && gimple_code (def_stmt) != GIMPLE_PHI
 803       && gimple_bb (def_stmt) == top_bb)
 804     {
 805       gsi = gsi_for_stmt (def_stmt);
 806       gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
 807     }
 808   else
 809     {
 810       gsi = gsi_after_labels (top_bb);
 811       gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
 812     }
 813   sincos_stats.inserted++;
 814
 815   /* And adjust the recorded old call sites.  */
 816   for (i = 0; stmts.iterate (i, &use_stmt); ++i)
 817     {
 818       tree rhs = NULL;
 819       fndecl = gimple_call_fndecl (use_stmt);
 820
 821       switch (DECL_FUNCTION_CODE (fndecl))
 822         {
 823         CASE_FLT_FN (BUILT_IN_COS):
 824           rhs = fold_build1 (REALPART_EXPR, type, res);
 825           break;
 826
 827         CASE_FLT_FN (BUILT_IN_SIN):
 828           rhs = fold_build1 (IMAGPART_EXPR, type, res);
 829           break;
 830
 831         CASE_FLT_FN (BUILT_IN_CEXPI):
 832           rhs = res;
 833           break;
 834
 835         default:;
 836           gcc_unreachable ();
 837         }
 838
 839         /* Replace call with a copy.  */
 840         stmt = gimple_build_assign (gimple_call_lhs (use_stmt), rhs);
 841
 842         gsi = gsi_for_stmt (use_stmt);
 843         gsi_replace (&gsi, stmt, true);
 844         if (gimple_purge_dead_eh_edges (gimple_bb (stmt)))
 845           cfg_changed = true;
 846     }
 847
 848   return cfg_changed;
 849 }
 850
 851 /* To evaluate powi(x,n), the floating point value x raised to the
 852    constant integer exponent n, we use a hybrid algorithm that
 853    combines the "window method" with look-up tables.  For an
 854    introduction to exponentiation algorithms and "addition chains",
 855    see section 4.6.3, "Evaluation of Powers" of Donald E. Knuth,
 856    "Seminumerical Algorithms", Vol. 2, "The Art of Computer Programming",
 857    3rd Edition, 1998, and Daniel M. Gordon, "A Survey of Fast Exponentiation
 858    Methods", Journal of Algorithms, Vol. 27, pp. 129-146, 1998.  */
 859
 860 /* Provide a default value for POWI_MAX_MULTS, the maximum number of
 861    multiplications to inline before calling the system library's pow
 862    function.  powi(x,n) requires at worst 2*bits(n)-2 multiplications,
 863    so this default never requires calling pow, powf or powl.  */
 864
 865 #ifndef POWI_MAX_MULTS
 866 #define POWI_MAX_MULTS  (2*HOST_BITS_PER_WIDE_INT-2)
 867 #endif
 868
 869 /* The size of the "optimal power tree" lookup table.  All
 870    exponents less than this value are simply looked up in the
 871    powi_table below.  This threshold is also used to size the
 872    cache of pseudo registers that hold intermediate results.  */
 873 #define POWI_TABLE_SIZE 256
 874
 875 /* The size, in bits of the window, used in the "window method"
 876    exponentiation algorithm.  This is equivalent to a radix of
 877    (1<<POWI_WINDOW_SIZE) in the corresponding "m-ary method".  */
 878 #define POWI_WINDOW_SIZE 3
 879
 880 /* The following table is an efficient representation of an
 881    "optimal power tree".  For each value, i, the corresponding
 882    value, j, in the table states than an optimal evaluation
 883    sequence for calculating pow(x,i) can be found by evaluating
 884    pow(x,j)*pow(x,i-j).  An optimal power tree for the first
 885    100 integers is given in Knuth's "Seminumerical algorithms".  */
 886
 887 static const unsigned char powi_table[POWI_TABLE_SIZE] =
 888   {
 889       0,   1,   1,   2,   2,   3,   3,   4,  /*   0 -   7 */
 890       4,   6,   5,   6,   6,  10,   7,   9,  /*   8 -  15 */
 891       8,  16,   9,  16,  10,  12,  11,  13,  /*  16 -  23 */
 892      12,  17,  13,  18,  14,  24,  15,  26,  /*  24 -  31 */
 893      16,  17,  17,  19,  18,  33,  19,  26,  /*  32 -  39 */
 894      20,  25,  21,  40,  22,  27,  23,  44,  /*  40 -  47 */
 895      24,  32,  25,  34,  26,  29,  27,  44,  /*  48 -  55 */
 896      28,  31,  29,  34,  30,  60,  31,  36,  /*  56 -  63 */
 897      32,  64,  33,  34,  34,  46,  35,  37,  /*  64 -  71 */
 898      36,  65,  37,  50,  38,  48,  39,  69,  /*  72 -  79 */
 899      40,  49,  41,  43,  42,  51,  43,  58,  /*  80 -  87 */
 900      44,  64,  45,  47,  46,  59,  47,  76,  /*  88 -  95 */
 901      48,  65,  49,  66,  50,  67,  51,  66,  /*  96 - 103 */
 902      52,  70,  53,  74,  54, 104,  55,  74,  /* 104 - 111 */
 903      56,  64,  57,  69,  58,  78,  59,  68,  /* 112 - 119 */
 904      60,  61,  61,  80,  62,  75,  63,  68,  /* 120 - 127 */
 905      64,  65,  65, 128,  66, 129,  67,  90,  /* 128 - 135 */
 906      68,  73,  69, 131,  70,  94,  71,  88,  /* 136 - 143 */
 907      72, 128,  73,  98,  74, 132,  75, 121,  /* 144 - 151 */
 908      76, 102,  77, 124,  78, 132,  79, 106,  /* 152 - 159 */
 909      80,  97,  81, 160,  82,  99,  83, 134,  /* 160 - 167 */
 910      84,  86,  85,  95,  86, 160,  87, 100,  /* 168 - 175 */
 911      88, 113,  89,  98,  90, 107,  91, 122,  /* 176 - 183 */
 912      92, 111,  93, 102,  94, 126,  95, 150,  /* 184 - 191 */
 913      96, 128,  97, 130,  98, 133,  99, 195,  /* 192 - 199 */
 914     100, 128, 101, 123, 102, 164, 103, 138,  /* 200 - 207 */
 915     104, 145, 105, 146, 106, 109, 107, 149,  /* 208 - 215 */
 916     108, 200, 109, 146, 110, 170, 111, 157,  /* 216 - 223 */
 917     112, 128, 113, 130, 114, 182, 115, 132,  /* 224 - 231 */
 918     116, 200, 117, 132, 118, 158, 119, 206,  /* 232 - 239 */
 919     120, 240, 121, 162, 122, 147, 123, 152,  /* 240 - 247 */
 920     124, 166, 125, 214, 126, 138, 127, 153,  /* 248 - 255 */
 921   };
 922
 923
 924 /* Return the number of multiplications required to calculate
 925    powi(x,n) where n is less than POWI_TABLE_SIZE.  This is a
 926    subroutine of powi_cost.  CACHE is an array indicating
 927    which exponents have already been calculated.  */
 928
 929 static int
 930 powi_lookup_cost (unsigned HOST_WIDE_INT n, bool *cache)
 931 {
 932   /* If we've already calculated this exponent, then this evaluation
 933      doesn't require any additional multiplications.  */
 934   if (cache[n])
 935     return 0;
 936
 937   cache[n] = true;
 938   return powi_lookup_cost (n - powi_table[n], cache)
 939          + powi_lookup_cost (powi_table[n], cache) + 1;
 940 }
 941
 942 /* Return the number of multiplications required to calculate
 943    powi(x,n) for an arbitrary x, given the exponent N.  This
 944    function needs to be kept in sync with powi_as_mults below.  */
 945
 946 static int
 947 powi_cost (HOST_WIDE_INT n)
 948 {
 949   bool cache[POWI_TABLE_SIZE];
 950   unsigned HOST_WIDE_INT digit;
 951   unsigned HOST_WIDE_INT val;
 952   int result;
 953
 954   if (n == 0)
 955     return 0;
 956
 957   /* Ignore the reciprocal when calculating the cost.  */
 958   val = (n < 0) ? -n : n;
 959
 960   /* Initialize the exponent cache.  */
 961   memset (cache, 0, POWI_TABLE_SIZE * sizeof (bool));
 962   cache[1] = true;
 963
 964   result = 0;
 965
 966   while (val >= POWI_TABLE_SIZE)
 967     {
 968       if (val & 1)
 969         {
 970           digit = val & ((1 << POWI_WINDOW_SIZE) - 1);
 971           result += powi_lookup_cost (digit, cache)
 972                     + POWI_WINDOW_SIZE + 1;
 973           val >>= POWI_WINDOW_SIZE;
 974         }
 975       else
 976         {
 977           val >>= 1;
 978           result++;
 979         }
 980     }
 981
 982   return result + powi_lookup_cost (val, cache);
 983 }
 984
 985 /* Recursive subroutine of powi_as_mults.  This function takes the
 986    array, CACHE, of already calculated exponents and an exponent N and
 987    returns a tree that corresponds to CACHE[1]**N, with type TYPE.  */
 988
 989 static tree
 990 powi_as_mults_1 (gimple_stmt_iterator *gsi, location_t loc, tree type,
 991                  HOST_WIDE_INT n, tree *cache)
 992 {
 993   tree op0, op1, ssa_target;
 994   unsigned HOST_WIDE_INT digit;
 995   gassign *mult_stmt;
 996
 997   if (n < POWI_TABLE_SIZE && cache[n])
 998     return cache[n];
 999
1000   ssa_target = make_temp_ssa_name (type, NULL, "powmult");
1001
1002   if (n < POWI_TABLE_SIZE)
1003     {
1004       cache[n] = ssa_target;
1005       op0 = powi_as_mults_1 (gsi, loc, type, n - powi_table[n], cache);
1006       op1 = powi_as_mults_1 (gsi, loc, type, powi_table[n], cache);
1007     }
1008   else if (n & 1)
1009     {
1010       digit = n & ((1 << POWI_WINDOW_SIZE) - 1);
1011       op0 = powi_as_mults_1 (gsi, loc, type, n - digit, cache);
1012       op1 = powi_as_mults_1 (gsi, loc, type, digit, cache);
1013     }
1014   else
1015     {
1016       op0 = powi_as_mults_1 (gsi, loc, type, n >> 1, cache);
1017       op1 = op0;
1018     }
1019
1020   mult_stmt = gimple_build_assign (ssa_target, MULT_EXPR, op0, op1);
1021   gimple_set_location (mult_stmt, loc);
1022   gsi_insert_before (gsi, mult_stmt, GSI_SAME_STMT);
1023
1024   return ssa_target;
1025 }
1026
1027 /* Convert ARG0**N to a tree of multiplications of ARG0 with itself.
1028    This function needs to be kept in sync with powi_cost above.  */
1029
1030 static tree
1031 powi_as_mults (gimple_stmt_iterator *gsi, location_t loc,
1032                tree arg0, HOST_WIDE_INT n)
1033 {
1034   tree cache[POWI_TABLE_SIZE], result, type = TREE_TYPE (arg0);
1035   gassign *div_stmt;
1036   tree target;
1037
1038   if (n == 0)
1039     return build_real (type, dconst1);
1040
1041   memset (cache, 0,  sizeof (cache));
1042   cache[1] = arg0;
1043
1044   result = powi_as_mults_1 (gsi, loc, type, (n < 0) ? -n : n, cache);
1045   if (n >= 0)
1046     return result;
1047
1048   /* If the original exponent was negative, reciprocate the result.  */
1049   target = make_temp_ssa_name (type, NULL, "powmult");
1050   div_stmt = gimple_build_assign (target, RDIV_EXPR,
1051                                   build_real (type, dconst1), result);
1052   gimple_set_location (div_stmt, loc);
1053   gsi_insert_before (gsi, div_stmt, GSI_SAME_STMT);
1054
1055   return target;
1056 }
1057
1058 /* ARG0 and N are the two arguments to a powi builtin in GSI with
1059    location info LOC.  If the arguments are appropriate, create an
1060    equivalent sequence of statements prior to GSI using an optimal
1061    number of multiplications, and return an expession holding the
1062    result.  */
1063
1064 static tree
1065 gimple_expand_builtin_powi (gimple_stmt_iterator *gsi, location_t loc,
1066                             tree arg0, HOST_WIDE_INT n)
1067 {
1068   /* Avoid largest negative number.  */
1069   if (n != -n
1070       && ((n >= -1 && n <= 2)
1071           || (optimize_function_for_speed_p (cfun)
1072               && powi_cost (n) <= POWI_MAX_MULTS)))
1073     return powi_as_mults (gsi, loc, arg0, n);
1074
1075   return NULL_TREE;
1076 }
1077
1078 /* Build a gimple call statement that calls FN with argument ARG.
1079    Set the lhs of the call statement to a fresh SSA name.  Insert the
1080    statement prior to GSI's current position, and return the fresh
1081    SSA name.  */
1082
1083 static tree
1084 build_and_insert_call (gimple_stmt_iterator *gsi, location_t loc,
1085                        tree fn, tree arg)
1086 {
1087   gcall *call_stmt;
1088   tree ssa_target;
1089
1090   call_stmt = gimple_build_call (fn, 1, arg);
1091   ssa_target = make_temp_ssa_name (TREE_TYPE (arg), NULL, "powroot");
1092   gimple_set_lhs (call_stmt, ssa_target);
1093   gimple_set_location (call_stmt, loc);
1094   gsi_insert_before (gsi, call_stmt, GSI_SAME_STMT);
1095
1096   return ssa_target;
1097 }
1098
1099 /* Build a gimple binary operation with the given CODE and arguments
1100    ARG0, ARG1, assigning the result to a new SSA name for variable
1101    TARGET.  Insert the statement prior to GSI's current position, and
1102    return the fresh SSA name.*/
1103
1104 static tree
1105 build_and_insert_binop (gimple_stmt_iterator *gsi, location_t loc,
1106                         const char *name, enum tree_code code,
1107                         tree arg0, tree arg1)
1108 {
1109   tree result = make_temp_ssa_name (TREE_TYPE (arg0), NULL, name);
1110   gassign *stmt = gimple_build_assign (result, code, arg0, arg1);
1111   gimple_set_location (stmt, loc);
1112   gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
1113   return result;
1114 }
1115
1116 /* Build a gimple reference operation with the given CODE and argument
1117    ARG, assigning the result to a new SSA name of TYPE with NAME.
1118    Insert the statement prior to GSI's current position, and return
1119    the fresh SSA name.  */
1120
1121 static inline tree
1122 build_and_insert_ref (gimple_stmt_iterator *gsi, location_t loc, tree type,
1123                       const char *name, enum tree_code code, tree arg0)
1124 {
1125   tree result = make_temp_ssa_name (type, NULL, name);
1126   gimple stmt = gimple_build_assign (result, build1 (code, type, arg0));
1127   gimple_set_location (stmt, loc);
1128   gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
1129   return result;
1130 }
1131
1132 /* Build a gimple assignment to cast VAL to TYPE.  Insert the statement
1133    prior to GSI's current position, and return the fresh SSA name.  */
1134
1135 static tree
1136 build_and_insert_cast (gimple_stmt_iterator *gsi, location_t loc,
1137                        tree type, tree val)
1138 {
1139   tree result = make_ssa_name (type);
1140   gassign *stmt = gimple_build_assign (result, NOP_EXPR, val);
1141   gimple_set_location (stmt, loc);
1142   gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
1143   return result;
1144 }
1145
1146 struct pow_synth_sqrt_info
1147 {
1148   bool *factors;
1149   unsigned int deepest;
1150   unsigned int num_mults;
1151 };
1152
1153 /* Return true iff the real value C can be represented as a
1154    sum of powers of 0.5 up to N.  That is:
1155    C == SUM<i from 1..N> (a[i]*(0.5**i)) where a[i] is either 0 or 1.
1156    Record in INFO the various parameters of the synthesis algorithm such
1157    as the factors a[i], the maximum 0.5 power and the number of
1158    multiplications that will be required.  */
1159
1160 bool
1161 representable_as_half_series_p (REAL_VALUE_TYPE c, unsigned n,
1162                                  struct pow_synth_sqrt_info *info)
1163 {
1164   REAL_VALUE_TYPE factor = dconsthalf;
1165   REAL_VALUE_TYPE remainder = c;
1166
1167   info->deepest = 0;
1168   info->num_mults = 0;
1169   memset (info->factors, 0, n * sizeof (bool));
1170
1171   for (unsigned i = 0; i < n; i++)
1172     {
1173       REAL_VALUE_TYPE res;
1174
1175       /* If something inexact happened bail out now.  */
1176       if (REAL_ARITHMETIC (res, MINUS_EXPR, remainder, factor))
1177         return false;
1178
1179       /* We have hit zero.  The number is representable as a sum
1180          of powers of 0.5.  */
1181       if (REAL_VALUES_EQUAL (res, dconst0))
1182         {
1183           info->factors[i] = true;
1184           info->deepest = i + 1;
1185           return true;
1186         }
1187       else if (!REAL_VALUE_NEGATIVE (res))
1188         {
1189           remainder = res;
1190           info->factors[i] = true;
1191           info->num_mults++;
1192         }
1193       else
1194         info->factors[i] = false;
1195
1196       REAL_ARITHMETIC (factor, MULT_EXPR, factor, dconsthalf);
1197     }
1198   return false;
1199 }
1200
1201 /* Return the tree corresponding to FN being applied
1202    to ARG N times at GSI and LOC.
1203    Look up previous results from CACHE if need be.
1204    cache[0] should contain just plain ARG i.e. FN applied to ARG 0 times.  */
1205
1206 static tree
1207 get_fn_chain (tree arg, unsigned int n, gimple_stmt_iterator *gsi,
1208               tree fn, location_t loc, tree *cache)
1209 {
1210   tree res = cache[n];
1211   if (!res)
1212     {
1213       tree prev = get_fn_chain (arg, n - 1, gsi, fn, loc, cache);
1214       res = build_and_insert_call (gsi, loc, fn, prev);
1215       cache[n] = res;
1216     }
1217
1218   return res;
1219 }
1220
1221 /* Print to STREAM the repeated application of function FNAME to ARG
1222    N times.  So, for FNAME = "foo", ARG = "x", N = 2 it would print:
1223    "foo (foo (x))".  */
1224
1225 static void
1226 print_nested_fn (FILE* stream, const char *fname, const char* arg,
1227                  unsigned int n)
1228 {
1229   if (n == 0)
1230     fprintf (stream, "%s", arg);
1231   else
1232     {
1233       fprintf (stream, "%s (", fname);
1234       print_nested_fn (stream, fname, arg, n - 1);
1235       fprintf (stream, ")");
1236     }
1237 }
1238
1239 /* Print to STREAM the fractional sequence of sqrt chains
1240    applied to ARG, described by INFO.  Used for the dump file.  */
1241
1242 static void
1243 dump_fractional_sqrt_sequence (FILE *stream, const char *arg,
1244                                 struct pow_synth_sqrt_info *info)
1245 {
1246   for (unsigned int i = 0; i < info->deepest; i++)
1247     {
1248       bool is_set = info->factors[i];
1249       if (is_set)
1250         {
1251           print_nested_fn (stream, "sqrt", arg, i + 1);
1252           if (i != info->deepest - 1)
1253             fprintf (stream, " * ");
1254         }
1255     }
1256 }
1257
1258 /* Print to STREAM a representation of raising ARG to an integer
1259    power N.  Used for the dump file.  */
1260
1261 static void
1262 dump_integer_part (FILE *stream, const char* arg, HOST_WIDE_INT n)
1263 {
1264   if (n > 1)
1265     fprintf (stream, "powi (%s, " HOST_WIDE_INT_PRINT_DEC ")", arg, n);
1266   else if (n == 1)
1267     fprintf (stream, "%s", arg);
1268 }
1269
1270 /* Attempt to synthesize a POW[F] (ARG0, ARG1) call using chains of
1271    square roots.  Place at GSI and LOC.  Limit the maximum depth
1272    of the sqrt chains to MAX_DEPTH.  Return the tree holding the
1273    result of the expanded sequence or NULL_TREE if the expansion failed.
1274
1275    This routine assumes that ARG1 is a real number with a fractional part
1276    (the integer exponent case will have been handled earlier in
1277    gimple_expand_builtin_pow).
1278
1279    For ARG1 > 0.0:
1280    * For ARG1 composed of a whole part WHOLE_PART and a fractional part
1281      FRAC_PART i.e. WHOLE_PART == floor (ARG1) and
1282                     FRAC_PART == ARG1 - WHOLE_PART:
1283      Produce POWI (ARG0, WHOLE_PART) * POW (ARG0, FRAC_PART) where
1284      POW (ARG0, FRAC_PART) is expanded as a product of square root chains
1285      if it can be expressed as such, that is if FRAC_PART satisfies:
1286      FRAC_PART == <SUM from i = 1 until MAX_DEPTH> (a[i] * (0.5**i))
1287      where integer a[i] is either 0 or 1.
1288
1289      Example:
1290      POW (x, 3.625) == POWI (x, 3) * POW (x, 0.625)
1291        --> POWI (x, 3) * SQRT (x) * SQRT (SQRT (SQRT (x)))
1292
1293    For ARG1 < 0.0 there are two approaches:
1294    * (A) Expand to 1.0 / POW (ARG0, -ARG1) where POW (ARG0, -ARG1)
1295          is calculated as above.
1296
1297      Example:
1298      POW (x, -5.625) == 1.0 / POW (x, 5.625)
1299        -->  1.0 / (POWI (x, 5) * SQRT (x) * SQRT (SQRT (SQRT (x))))
1300
1301    * (B) : WHOLE_PART := - ceil (abs (ARG1))
1302            FRAC_PART  := ARG1 - WHOLE_PART
1303      and expand to POW (x, FRAC_PART) / POWI (x, WHOLE_PART).
1304      Example:
1305      POW (x, -5.875) == POW (x, 0.125) / POWI (X, 6)
1306        --> SQRT (SQRT (SQRT (x))) / (POWI (x, 6))
1307
1308    For ARG1 < 0.0 we choose between (A) and (B) depending on
1309    how many multiplications we'd have to do.
1310    So, for the example in (B): POW (x, -5.875), if we were to
1311    follow algorithm (A) we would produce:
1312    1.0 / POWI (X, 5) * SQRT (X) * SQRT (SQRT (X)) * SQRT (SQRT (SQRT (X)))
1313    which contains more multiplications than approach (B).
1314
1315    Hopefully, this approach will eliminate potentially expensive POW library
1316    calls when unsafe floating point math is enabled and allow the compiler to
1317    further optimise the multiplies, square roots and divides produced by this
1318    function.  */
1319
1320 static tree
1321 expand_pow_as_sqrts (gimple_stmt_iterator *gsi, location_t loc,
1322                      tree arg0, tree arg1, HOST_WIDE_INT max_depth)
1323 {
1324   tree type = TREE_TYPE (arg0);
1325   machine_mode mode = TYPE_MODE (type);
1326   tree sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
1327   bool one_over = true;
1328
1329   if (!sqrtfn)
1330     return NULL_TREE;
1331
1332   if (TREE_CODE (arg1) != REAL_CST)
1333     return NULL_TREE;
1334
1335   REAL_VALUE_TYPE exp_init = TREE_REAL_CST (arg1);
1336
1337   gcc_assert (max_depth > 0);
1338   tree *cache = XALLOCAVEC (tree, max_depth + 1);
1339
1340   struct pow_synth_sqrt_info synth_info;
1341   synth_info.factors = XALLOCAVEC (bool, max_depth + 1);
1342   synth_info.deepest = 0;
1343   synth_info.num_mults = 0;
1344
1345   bool neg_exp = REAL_VALUE_NEGATIVE (exp_init);
1346   REAL_VALUE_TYPE exp = real_value_abs (&exp_init);
1347
1348   /* The whole and fractional parts of exp.  */
1349   REAL_VALUE_TYPE whole_part;
1350   REAL_VALUE_TYPE frac_part;
1351
1352   real_floor (&whole_part, mode, &exp);
1353   REAL_ARITHMETIC (frac_part, MINUS_EXPR, exp, whole_part);
1354
1355
1356   REAL_VALUE_TYPE ceil_whole = dconst0;
1357   REAL_VALUE_TYPE ceil_fract = dconst0;
1358
1359   if (neg_exp)
1360     {
1361       real_ceil (&ceil_whole, mode, &exp);
1362       REAL_ARITHMETIC (ceil_fract, MINUS_EXPR, ceil_whole, exp);
1363     }
1364
1365   if (!representable_as_half_series_p (frac_part, max_depth, &synth_info))
1366     return NULL_TREE;
1367
1368   /* Check whether it's more profitable to not use 1.0 / ...  */
1369   if (neg_exp)
1370     {
1371       struct pow_synth_sqrt_info alt_synth_info;
1372       alt_synth_info.factors = XALLOCAVEC (bool, max_depth + 1);
1373       alt_synth_info.deepest = 0;
1374       alt_synth_info.num_mults = 0;
1375
1376       if (representable_as_half_series_p (ceil_fract, max_depth,
1377                                            &alt_synth_info)
1378           && alt_synth_info.deepest <= synth_info.deepest
1379           && alt_synth_info.num_mults < synth_info.num_mults)
1380         {
1381           whole_part = ceil_whole;
1382           frac_part = ceil_fract;
1383           synth_info.deepest = alt_synth_info.deepest;
1384           synth_info.num_mults = alt_synth_info.num_mults;
1385           memcpy (synth_info.factors, alt_synth_info.factors,
1386                   (max_depth + 1) * sizeof (bool));
1387           one_over = false;
1388         }
1389     }
1390
1391   HOST_WIDE_INT n = real_to_integer (&whole_part);
1392   REAL_VALUE_TYPE cint;
1393   real_from_integer (&cint, VOIDmode, n, SIGNED);
1394
1395   if (!real_identical (&whole_part, &cint))
1396     return NULL_TREE;
1397
1398   if (powi_cost (n) + synth_info.num_mults > POWI_MAX_MULTS)
1399     return NULL_TREE;
1400
1401   memset (cache, 0, (max_depth + 1) * sizeof (tree));
1402
1403   tree integer_res = n == 0 ? build_real (type, dconst1) : arg0;
1404
1405   /* Calculate the integer part of the exponent.  */
1406   if (n > 1)
1407     {
1408       integer_res = gimple_expand_builtin_powi (gsi, loc, arg0, n);
1409       if (!integer_res)
1410         return NULL_TREE;
1411     }
1412
1413   if (dump_file)
1414     {
1415       char string[64];
1416
1417       real_to_decimal (string, &exp_init, sizeof (string), 0, 1);
1418       fprintf (dump_file, "synthesizing pow (x, %s) as:\n", string);
1419
1420       if (neg_exp)
1421         {
1422           if (one_over)
1423             {
1424               fprintf (dump_file, "1.0 / (");
1425               dump_integer_part (dump_file, "x", n);
1426               if (n > 0)
1427                 fprintf (dump_file, " * ");
1428               dump_fractional_sqrt_sequence (dump_file, "x", &synth_info);
1429               fprintf (dump_file, ")");
1430             }
1431           else
1432             {
1433               dump_fractional_sqrt_sequence (dump_file, "x", &synth_info);
1434               fprintf (dump_file, " / (");
1435               dump_integer_part (dump_file, "x", n);
1436               fprintf (dump_file, ")");
1437             }
1438         }
1439       else
1440         {
1441           dump_fractional_sqrt_sequence (dump_file, "x", &synth_info);
1442           if (n > 0)
1443             fprintf (dump_file, " * ");
1444           dump_integer_part (dump_file, "x", n);
1445         }
1446
1447       fprintf (dump_file, "\ndeepest sqrt chain: %d\n", synth_info.deepest);
1448     }
1449
1450
1451   tree fract_res = NULL_TREE;
1452   cache[0] = arg0;
1453
1454   /* Calculate the fractional part of the exponent.  */
1455   for (unsigned i = 0; i < synth_info.deepest; i++)
1456     {
1457       if (synth_info.factors[i])
1458         {
1459           tree sqrt_chain = get_fn_chain (arg0, i + 1, gsi, sqrtfn, loc, cache);
1460
1461           if (!fract_res)
1462               fract_res = sqrt_chain;
1463
1464           else
1465             fract_res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1466                                            fract_res, sqrt_chain);
1467         }
1468     }
1469
1470   tree res = NULL_TREE;
1471
1472   if (neg_exp)
1473     {
1474       if (one_over)
1475         {
1476           if (n > 0)
1477             res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1478                                            fract_res, integer_res);
1479           else
1480             res = fract_res;
1481
1482           res = build_and_insert_binop (gsi, loc, "powrootrecip", RDIV_EXPR,
1483                                           build_real (type, dconst1), res);
1484         }
1485       else
1486         {
1487           res = build_and_insert_binop (gsi, loc, "powroot", RDIV_EXPR,
1488                                          fract_res, integer_res);
1489         }
1490     }
1491   else
1492     res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1493                                    fract_res, integer_res);
1494   return res;
1495 }
1496
1497 /* ARG0 and ARG1 are the two arguments to a pow builtin call in GSI
1498    with location info LOC.  If possible, create an equivalent and
1499    less expensive sequence of statements prior to GSI, and return an
1500    expession holding the result.  */
1501
1502 static tree
1503 gimple_expand_builtin_pow (gimple_stmt_iterator *gsi, location_t loc,
1504                            tree arg0, tree arg1)
1505 {
1506   REAL_VALUE_TYPE c, cint, dconst1_3, dconst1_4, dconst1_6;
1507   REAL_VALUE_TYPE c2, dconst3;
1508   HOST_WIDE_INT n;
1509   tree type, sqrtfn, cbrtfn, sqrt_arg0, result, cbrt_x, powi_cbrt_x;
1510   machine_mode mode;
1511   bool speed_p = optimize_bb_for_speed_p (gsi_bb (*gsi));
1512   bool hw_sqrt_exists, c_is_int, c2_is_int;
1513
1514   dconst1_4 = dconst1;
1515   SET_REAL_EXP (&dconst1_4, REAL_EXP (&dconst1_4) - 2);
1516
1517   /* If the exponent isn't a constant, there's nothing of interest
1518      to be done.  */
1519   if (TREE_CODE (arg1) != REAL_CST)
1520     return NULL_TREE;
1521
1522   /* If the exponent is equivalent to an integer, expand to an optimal
1523      multiplication sequence when profitable.  */
1524   c = TREE_REAL_CST (arg1);
1525   n = real_to_integer (&c);
1526   real_from_integer (&cint, VOIDmode, n, SIGNED);
1527   c_is_int = real_identical (&c, &cint);
1528
1529   if (c_is_int
1530       && ((n >= -1 && n <= 2)
1531           || (flag_unsafe_math_optimizations
1532               && speed_p
1533               && powi_cost (n) <= POWI_MAX_MULTS)))
1534     return gimple_expand_builtin_powi (gsi, loc, arg0, n);
1535
1536   /* Attempt various optimizations using sqrt and cbrt.  */
1537   type = TREE_TYPE (arg0);
1538   mode = TYPE_MODE (type);
1539   sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
1540
1541   /* Optimize pow(x,0.5) = sqrt(x).  This replacement is always safe
1542      unless signed zeros must be maintained.  pow(-0,0.5) = +0, while
1543      sqrt(-0) = -0.  */
1544   if (sqrtfn
1545       && REAL_VALUES_EQUAL (c, dconsthalf)
1546       && !HONOR_SIGNED_ZEROS (mode))
1547     return build_and_insert_call (gsi, loc, sqrtfn, arg0);
1548
1549   hw_sqrt_exists = optab_handler (sqrt_optab, mode) != CODE_FOR_nothing;
1550
1551   /* Optimize pow(x,1./3.) = cbrt(x).  This requires unsafe math
1552      optimizations since 1./3. is not exactly representable.  If x
1553      is negative and finite, the correct value of pow(x,1./3.) is
1554      a NaN with the "invalid" exception raised, because the value
1555      of 1./3. actually has an even denominator.  The correct value
1556      of cbrt(x) is a negative real value.  */
1557   cbrtfn = mathfn_built_in (type, BUILT_IN_CBRT);
1558   dconst1_3 = real_value_truncate (mode, dconst_third ());
1559
1560   if (flag_unsafe_math_optimizations
1561       && cbrtfn
1562       && (gimple_val_nonnegative_real_p (arg0) || !HONOR_NANS (mode))
1563       && REAL_VALUES_EQUAL (c, dconst1_3))
1564     return build_and_insert_call (gsi, loc, cbrtfn, arg0);
1565
1566   /* Optimize pow(x,1./6.) = cbrt(sqrt(x)).  Don't do this optimization
1567      if we don't have a hardware sqrt insn.  */
1568   dconst1_6 = dconst1_3;
1569   SET_REAL_EXP (&dconst1_6, REAL_EXP (&dconst1_6) - 1);
1570
1571   if (flag_unsafe_math_optimizations
1572       && sqrtfn
1573       && cbrtfn
1574       && (gimple_val_nonnegative_real_p (arg0) || !HONOR_NANS (mode))
1575       && speed_p
1576       && hw_sqrt_exists
1577       && REAL_VALUES_EQUAL (c, dconst1_6))
1578     {
1579       /* sqrt(x)  */
1580       sqrt_arg0 = build_and_insert_call (gsi, loc, sqrtfn, arg0);
1581
1582       /* cbrt(sqrt(x))  */
1583       return build_and_insert_call (gsi, loc, cbrtfn, sqrt_arg0);
1584     }
1585
1586
1587   /* Attempt to expand the POW as a product of square root chains.
1588      Expand the 0.25 case even when otpimising for size.  */
1589   if (flag_unsafe_math_optimizations
1590       && sqrtfn
1591       && hw_sqrt_exists
1592       && (speed_p || REAL_VALUES_EQUAL (c, dconst1_4))
1593       && !HONOR_SIGNED_ZEROS (mode))
1594     {
1595       unsigned int max_depth = speed_p
1596                                 ? PARAM_VALUE (PARAM_MAX_POW_SQRT_DEPTH)
1597                                 : 2;
1598
1599       tree expand_with_sqrts
1600         = expand_pow_as_sqrts (gsi, loc, arg0, arg1, max_depth);
1601
1602       if (expand_with_sqrts)
1603         return expand_with_sqrts;
1604     }
1605
1606   real_arithmetic (&c2, MULT_EXPR, &c, &dconst2);
1607   n = real_to_integer (&c2);
1608   real_from_integer (&cint, VOIDmode, n, SIGNED);
1609   c2_is_int = real_identical (&c2, &cint);
1610
1611   /* Optimize pow(x,c), where 3c = n for some nonzero integer n, into
1612
1613      powi(x, n/3) * powi(cbrt(x), n%3),                    n > 0;
1614      1.0 / (powi(x, abs(n)/3) * powi(cbrt(x), abs(n)%3)),  n < 0.
1615
1616      Do not calculate the first factor when n/3 = 0.  As cbrt(x) is
1617      different from pow(x, 1./3.) due to rounding and behavior with
1618      negative x, we need to constrain this transformation to unsafe
1619      math and positive x or finite math.  */
1620   real_from_integer (&dconst3, VOIDmode, 3, SIGNED);
1621   real_arithmetic (&c2, MULT_EXPR, &c, &dconst3);
1622   real_round (&c2, mode, &c2);
1623   n = real_to_integer (&c2);
1624   real_from_integer (&cint, VOIDmode, n, SIGNED);
1625   real_arithmetic (&c2, RDIV_EXPR, &cint, &dconst3);
1626   real_convert (&c2, mode, &c2);
1627
1628   if (flag_unsafe_math_optimizations
1629       && cbrtfn
1630       && (gimple_val_nonnegative_real_p (arg0) || !HONOR_NANS (mode))
1631       && real_identical (&c2, &c)
1632       && !c2_is_int
1633       && optimize_function_for_speed_p (cfun)
1634       && powi_cost (n / 3) <= POWI_MAX_MULTS)
1635     {
1636       tree powi_x_ndiv3 = NULL_TREE;
1637
1638       /* Attempt to fold powi(arg0, abs(n/3)) into multiplies.  If not
1639          possible or profitable, give up.  Skip the degenerate case when
1640          abs(n) < 3, where the result is always 1.  */
1641       if (absu_hwi (n) >= 3)
1642         {
1643           powi_x_ndiv3 = gimple_expand_builtin_powi (gsi, loc, arg0,
1644                                                      abs_hwi (n / 3));
1645           if (!powi_x_ndiv3)
1646             return NULL_TREE;
1647         }
1648
1649       /* Calculate powi(cbrt(x), n%3).  Don't use gimple_expand_builtin_powi
1650          as that creates an unnecessary variable.  Instead, just produce
1651          either cbrt(x) or cbrt(x) * cbrt(x).  */
1652       cbrt_x = build_and_insert_call (gsi, loc, cbrtfn, arg0);
1653
1654       if (absu_hwi (n) % 3 == 1)
1655         powi_cbrt_x = cbrt_x;
1656       else
1657         powi_cbrt_x = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1658                                               cbrt_x, cbrt_x);
1659
1660       /* Multiply the two subexpressions, unless powi(x,abs(n)/3) = 1.  */
1661       if (absu_hwi (n) < 3)
1662         result = powi_cbrt_x;
1663       else
1664         result = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1665                                          powi_x_ndiv3, powi_cbrt_x);
1666
1667       /* If n is negative, reciprocate the result.  */
1668       if (n < 0)
1669         result = build_and_insert_binop (gsi, loc, "powroot", RDIV_EXPR,
1670                                          build_real (type, dconst1), result);
1671
1672       return result;
1673     }
1674
1675   /* No optimizations succeeded.  */
1676   return NULL_TREE;
1677 }
1678
1679 /* ARG is the argument to a cabs builtin call in GSI with location info
1680    LOC.  Create a sequence of statements prior to GSI that calculates
1681    sqrt(R*R + I*I), where R and I are the real and imaginary components
1682    of ARG, respectively.  Return an expression holding the result.  */
1683
1684 static tree
1685 gimple_expand_builtin_cabs (gimple_stmt_iterator *gsi, location_t loc, tree arg)
1686 {
1687   tree real_part, imag_part, addend1, addend2, sum, result;
1688   tree type = TREE_TYPE (TREE_TYPE (arg));
1689   tree sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
1690   machine_mode mode = TYPE_MODE (type);
1691
1692   if (!flag_unsafe_math_optimizations
1693       || !optimize_bb_for_speed_p (gimple_bb (gsi_stmt (*gsi)))
1694       || !sqrtfn
1695       || optab_handler (sqrt_optab, mode) == CODE_FOR_nothing)
1696     return NULL_TREE;
1697
1698   real_part = build_and_insert_ref (gsi, loc, type, "cabs",
1699                                     REALPART_EXPR, arg);
1700   addend1 = build_and_insert_binop (gsi, loc, "cabs", MULT_EXPR,
1701                                     real_part, real_part);
1702   imag_part = build_and_insert_ref (gsi, loc, type, "cabs",
1703                                     IMAGPART_EXPR, arg);
1704   addend2 = build_and_insert_binop (gsi, loc, "cabs", MULT_EXPR,
1705                                     imag_part, imag_part);
1706   sum = build_and_insert_binop (gsi, loc, "cabs", PLUS_EXPR, addend1, addend2);
1707   result = build_and_insert_call (gsi, loc, sqrtfn, sum);
1708
1709   return result;
1710 }
1711
1712 /* Go through all calls to sin, cos and cexpi and call execute_cse_sincos_1
1713    on the SSA_NAME argument of each of them.  Also expand powi(x,n) into
1714    an optimal number of multiplies, when n is a constant.  */
1715
1716 namespace {
1717
1718 const pass_data pass_data_cse_sincos =
1719 {
1720   GIMPLE_PASS, /* type */
1721   "sincos", /* name */
1722   OPTGROUP_NONE, /* optinfo_flags */
1723   TV_NONE, /* tv_id */
1724   PROP_ssa, /* properties_required */
1725   0, /* properties_provided */
1726   0, /* properties_destroyed */
1727   0, /* todo_flags_start */
1728   TODO_update_ssa, /* todo_flags_finish */
1729 };
1730
1731 class pass_cse_sincos : public gimple_opt_pass
1732 {
1733 public:
1734   pass_cse_sincos (gcc::context *ctxt)
1735     : gimple_opt_pass (pass_data_cse_sincos, ctxt)
1736   {}
1737
1738   /* opt_pass methods: */
1739   virtual bool gate (function *)
1740     {
1741       /* We no longer require either sincos or cexp, since powi expansion
1742          piggybacks on this pass.  */
1743       return optimize;
1744     }
1745
1746   virtual unsigned int execute (function *);
1747
1748 }; // class pass_cse_sincos
1749
1750 unsigned int
1751 pass_cse_sincos::execute (function *fun)
1752 {
1753   basic_block bb;
1754   bool cfg_changed = false;
1755
1756   calculate_dominance_info (CDI_DOMINATORS);
1757   memset (&sincos_stats, 0, sizeof (sincos_stats));
1758
1759   FOR_EACH_BB_FN (bb, fun)
1760     {
1761       gimple_stmt_iterator gsi;
1762       bool cleanup_eh = false;
1763
1764       for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1765         {
1766           gimple stmt = gsi_stmt (gsi);
1767           tree fndecl;
1768
1769           /* Only the last stmt in a bb could throw, no need to call
1770              gimple_purge_dead_eh_edges if we change something in the middle
1771              of a basic block.  */
1772           cleanup_eh = false;
1773
1774           if (is_gimple_call (stmt)
1775               && gimple_call_lhs (stmt)
1776               && (fndecl = gimple_call_fndecl (stmt))
1777               && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
1778             {
1779               tree arg, arg0, arg1, result;
1780               HOST_WIDE_INT n;
1781               location_t loc;
1782
1783               switch (DECL_FUNCTION_CODE (fndecl))
1784                 {
1785                 CASE_FLT_FN (BUILT_IN_COS):
1786                 CASE_FLT_FN (BUILT_IN_SIN):
1787                 CASE_FLT_FN (BUILT_IN_CEXPI):
1788                   /* Make sure we have either sincos or cexp.  */
1789                   if (!targetm.libc_has_function (function_c99_math_complex)
1790                       && !targetm.libc_has_function (function_sincos))
1791                     break;
1792
1793                   arg = gimple_call_arg (stmt, 0);
1794                   if (TREE_CODE (arg) == SSA_NAME)
1795                     cfg_changed |= execute_cse_sincos_1 (arg);
1796                   break;
1797
1798                 CASE_FLT_FN (BUILT_IN_POW):
1799                   arg0 = gimple_call_arg (stmt, 0);
1800                   arg1 = gimple_call_arg (stmt, 1);
1801
1802                   loc = gimple_location (stmt);
1803                   result = gimple_expand_builtin_pow (&gsi, loc, arg0, arg1);
1804
1805                   if (result)
1806                     {
1807                       tree lhs = gimple_get_lhs (stmt);
1808                       gassign *new_stmt = gimple_build_assign (lhs, result);
1809                       gimple_set_location (new_stmt, loc);
1810                       unlink_stmt_vdef (stmt);
1811                       gsi_replace (&gsi, new_stmt, true);
1812                       cleanup_eh = true;
1813                       if (gimple_vdef (stmt))
1814                         release_ssa_name (gimple_vdef (stmt));
1815                     }
1816                   break;
1817
1818                 CASE_FLT_FN (BUILT_IN_POWI):
1819                   arg0 = gimple_call_arg (stmt, 0);
1820                   arg1 = gimple_call_arg (stmt, 1);
1821                   loc = gimple_location (stmt);
1822
1823                   if (real_minus_onep (arg0))
1824                     {
1825                       tree t0, t1, cond, one, minus_one;
1826                       gassign *stmt;
1827
1828                       t0 = TREE_TYPE (arg0);
1829                       t1 = TREE_TYPE (arg1);
1830                       one = build_real (t0, dconst1);
1831                       minus_one = build_real (t0, dconstm1);
1832
1833                       cond = make_temp_ssa_name (t1, NULL, "powi_cond");
1834                       stmt = gimple_build_assign (cond, BIT_AND_EXPR,
1835                                                   arg1, build_int_cst (t1, 1));
1836                       gimple_set_location (stmt, loc);
1837                       gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1838
1839                       result = make_temp_ssa_name (t0, NULL, "powi");
1840                       stmt = gimple_build_assign (result, COND_EXPR, cond,
1841                                                   minus_one, one);
1842                       gimple_set_location (stmt, loc);
1843                       gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1844                     }
1845                   else
1846                     {
1847                       if (!tree_fits_shwi_p (arg1))
1848                         break;
1849
1850                       n = tree_to_shwi (arg1);
1851                       result = gimple_expand_builtin_powi (&gsi, loc, arg0, n);
1852                     }
1853
1854                   if (result)
1855                     {
1856                       tree lhs = gimple_get_lhs (stmt);
1857                       gassign *new_stmt = gimple_build_assign (lhs, result);
1858                       gimple_set_location (new_stmt, loc);
1859                       unlink_stmt_vdef (stmt);
1860                       gsi_replace (&gsi, new_stmt, true);
1861                       cleanup_eh = true;
1862                       if (gimple_vdef (stmt))
1863                         release_ssa_name (gimple_vdef (stmt));
1864                     }
1865                   break;
1866
1867                 CASE_FLT_FN (BUILT_IN_CABS):
1868                   arg0 = gimple_call_arg (stmt, 0);
1869                   loc = gimple_location (stmt);
1870                   result = gimple_expand_builtin_cabs (&gsi, loc, arg0);
1871
1872                   if (result)
1873                     {
1874                       tree lhs = gimple_get_lhs (stmt);
1875                       gassign *new_stmt = gimple_build_assign (lhs, result);
1876                       gimple_set_location (new_stmt, loc);
1877                       unlink_stmt_vdef (stmt);
1878                       gsi_replace (&gsi, new_stmt, true);
1879                       cleanup_eh = true;
1880                       if (gimple_vdef (stmt))
1881                         release_ssa_name (gimple_vdef (stmt));
1882                     }
1883                   break;
1884
1885                 default:;
1886                 }
1887             }
1888         }
1889       if (cleanup_eh)
1890         cfg_changed |= gimple_purge_dead_eh_edges (bb);
1891     }
1892
1893   statistics_counter_event (fun, "sincos statements inserted",
1894                             sincos_stats.inserted);
1895
1896   free_dominance_info (CDI_DOMINATORS);
1897   return cfg_changed ? TODO_cleanup_cfg : 0;
1898 }
1899
1900 } // anon namespace
1901
1902 gimple_opt_pass *
1903 make_pass_cse_sincos (gcc::context *ctxt)
1904 {
1905   return new pass_cse_sincos (ctxt);
1906 }
1907
1908 /* A symbolic number is used to detect byte permutation and selection
1909    patterns.  Therefore the field N contains an artificial number
1910    consisting of octet sized markers:
1911
1912    0    - target byte has the value 0
1913    FF   - target byte has an unknown value (eg. due to sign extension)
1914    1..size - marker value is the target byte index minus one.
1915
1916    To detect permutations on memory sources (arrays and structures), a symbolic
1917    number is also associated a base address (the array or structure the load is
1918    made from), an offset from the base address and a range which gives the
1919    difference between the highest and lowest accessed memory location to make
1920    such a symbolic number. The range is thus different from size which reflects
1921    the size of the type of current expression. Note that for non memory source,
1922    range holds the same value as size.
1923
1924    For instance, for an array char a[], (short) a[0] | (short) a[3] would have
1925    a size of 2 but a range of 4 while (short) a[0] | ((short) a[0] << 1) would
1926    still have a size of 2 but this time a range of 1.  */
1927
1928 struct symbolic_number {
1929   uint64_t n;
1930   tree type;
1931   tree base_addr;
1932   tree offset;
1933   HOST_WIDE_INT bytepos;
1934   tree alias_set;
1935   tree vuse;
1936   unsigned HOST_WIDE_INT range;
1937 };
1938
1939 #define BITS_PER_MARKER 8
1940 #define MARKER_MASK ((1 << BITS_PER_MARKER) - 1)
1941 #define MARKER_BYTE_UNKNOWN MARKER_MASK
1942 #define HEAD_MARKER(n, size) \
1943   ((n) & ((uint64_t) MARKER_MASK << (((size) - 1) * BITS_PER_MARKER)))
1944
1945 /* The number which the find_bswap_or_nop_1 result should match in
1946    order to have a nop.  The number is masked according to the size of
1947    the symbolic number before using it.  */
1948 #define CMPNOP (sizeof (int64_t) < 8 ? 0 : \
1949   (uint64_t)0x08070605 << 32 | 0x04030201)
1950
1951 /* The number which the find_bswap_or_nop_1 result should match in
1952    order to have a byte swap.  The number is masked according to the
1953    size of the symbolic number before using it.  */
1954 #define CMPXCHG (sizeof (int64_t) < 8 ? 0 : \
1955   (uint64_t)0x01020304 << 32 | 0x05060708)
1956
1957 /* Perform a SHIFT or ROTATE operation by COUNT bits on symbolic
1958    number N.  Return false if the requested operation is not permitted
1959    on a symbolic number.  */
1960
1961 static inline bool
1962 do_shift_rotate (enum tree_code code,
1963                  struct symbolic_number *n,
1964                  int count)
1965 {
1966   int i, size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
1967   unsigned head_marker;
1968
1969   if (count % BITS_PER_UNIT != 0)
1970     return false;
1971   count = (count / BITS_PER_UNIT) * BITS_PER_MARKER;
1972
1973   /* Zero out the extra bits of N in order to avoid them being shifted
1974      into the significant bits.  */
1975   if (size < 64 / BITS_PER_MARKER)
1976     n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1;
1977
1978   switch (code)
1979     {
1980     case LSHIFT_EXPR:
1981       n->n <<= count;
1982       break;
1983     case RSHIFT_EXPR:
1984       head_marker = HEAD_MARKER (n->n, size);
1985       n->n >>= count;
1986       /* Arithmetic shift of signed type: result is dependent on the value.  */
1987       if (!TYPE_UNSIGNED (n->type) && head_marker)
1988         for (i = 0; i < count / BITS_PER_MARKER; i++)
1989           n->n |= (uint64_t) MARKER_BYTE_UNKNOWN
1990                   << ((size - 1 - i) * BITS_PER_MARKER);
1991       break;
1992     case LROTATE_EXPR:
1993       n->n = (n->n << count) | (n->n >> ((size * BITS_PER_MARKER) - count));
1994       break;
1995     case RROTATE_EXPR:
1996       n->n = (n->n >> count) | (n->n << ((size * BITS_PER_MARKER) - count));
1997       break;
1998     default:
1999       return false;
2000     }
2001   /* Zero unused bits for size.  */
2002   if (size < 64 / BITS_PER_MARKER)
2003     n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1;
2004   return true;
2005 }
2006
2007 /* Perform sanity checking for the symbolic number N and the gimple
2008    statement STMT.  */
2009
2010 static inline bool
2011 verify_symbolic_number_p (struct symbolic_number *n, gimple stmt)
2012 {
2013   tree lhs_type;
2014
2015   lhs_type = gimple_expr_type (stmt);
2016
2017   if (TREE_CODE (lhs_type) != INTEGER_TYPE)
2018     return false;
2019
2020   if (TYPE_PRECISION (lhs_type) != TYPE_PRECISION (n->type))
2021     return false;
2022
2023   return true;
2024 }
2025
2026 /* Initialize the symbolic number N for the bswap pass from the base element
2027    SRC manipulated by the bitwise OR expression.  */
2028
2029 static bool
2030 init_symbolic_number (struct symbolic_number *n, tree src)
2031 {
2032   int size;
2033
2034   n->base_addr = n->offset = n->alias_set = n->vuse = NULL_TREE;
2035
2036   /* Set up the symbolic number N by setting each byte to a value between 1 and
2037      the byte size of rhs1.  The highest order byte is set to n->size and the
2038      lowest order byte to 1.  */
2039   n->type = TREE_TYPE (src);
2040   size = TYPE_PRECISION (n->type);
2041   if (size % BITS_PER_UNIT != 0)
2042     return false;
2043   size /= BITS_PER_UNIT;
2044   if (size > 64 / BITS_PER_MARKER)
2045     return false;
2046   n->range = size;
2047   n->n = CMPNOP;
2048
2049   if (size < 64 / BITS_PER_MARKER)
2050     n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1;
2051
2052   return true;
2053 }
2054
2055 /* Check if STMT might be a byte swap or a nop from a memory source and returns
2056    the answer. If so, REF is that memory source and the base of the memory area
2057    accessed and the offset of the access from that base are recorded in N.  */
2058
2059 bool
2060 find_bswap_or_nop_load (gimple stmt, tree ref, struct symbolic_number *n)
2061 {
2062   /* Leaf node is an array or component ref. Memorize its base and
2063      offset from base to compare to other such leaf node.  */
2064   HOST_WIDE_INT bitsize, bitpos;
2065   machine_mode mode;
2066   int unsignedp, volatilep;
2067   tree offset, base_addr;
2068
2069   /* Not prepared to handle PDP endian.  */
2070   if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
2071     return false;
2072
2073   if (!gimple_assign_load_p (stmt) || gimple_has_volatile_ops (stmt))
2074     return false;
2075
2076   base_addr = get_inner_reference (ref, &bitsize, &bitpos, &offset, &mode,
2077                                    &unsignedp, &volatilep, false);
2078
2079   if (TREE_CODE (base_addr) == MEM_REF)
2080     {
2081       offset_int bit_offset = 0;
2082       tree off = TREE_OPERAND (base_addr, 1);
2083
2084       if (!integer_zerop (off))
2085         {
2086           offset_int boff, coff = mem_ref_offset (base_addr);
2087           boff = wi::lshift (coff, LOG2_BITS_PER_UNIT);
2088           bit_offset += boff;
2089         }
2090
2091       base_addr = TREE_OPERAND (base_addr, 0);
2092
2093       /* Avoid returning a negative bitpos as this may wreak havoc later.  */
2094       if (wi::neg_p (bit_offset))
2095         {
2096           offset_int mask = wi::mask <offset_int> (LOG2_BITS_PER_UNIT, false);
2097           offset_int tem = bit_offset.and_not (mask);
2098           /* TEM is the bitpos rounded to BITS_PER_UNIT towards -Inf.
2099              Subtract it to BIT_OFFSET and add it (scaled) to OFFSET.  */
2100           bit_offset -= tem;
2101           tem = wi::arshift (tem, LOG2_BITS_PER_UNIT);
2102           if (offset)
2103             offset = size_binop (PLUS_EXPR, offset,
2104                                     wide_int_to_tree (sizetype, tem));
2105           else
2106             offset = wide_int_to_tree (sizetype, tem);
2107         }
2108
2109       bitpos += bit_offset.to_shwi ();
2110     }
2111
2112   if (bitpos % BITS_PER_UNIT)
2113     return false;
2114   if (bitsize % BITS_PER_UNIT)
2115     return false;
2116
2117   if (!init_symbolic_number (n, ref))
2118     return false;
2119   n->base_addr = base_addr;
2120   n->offset = offset;
2121   n->bytepos = bitpos / BITS_PER_UNIT;
2122   n->alias_set = reference_alias_ptr_type (ref);
2123   n->vuse = gimple_vuse (stmt);
2124   return true;
2125 }
2126
2127 /* Compute the symbolic number N representing the result of a bitwise OR on 2
2128    symbolic number N1 and N2 whose source statements are respectively
2129    SOURCE_STMT1 and SOURCE_STMT2.  */
2130
2131 static gimple
2132 perform_symbolic_merge (gimple source_stmt1, struct symbolic_number *n1,
2133                         gimple source_stmt2, struct symbolic_number *n2,
2134                         struct symbolic_number *n)
2135 {
2136   int i, size;
2137   uint64_t mask;
2138   gimple source_stmt;
2139   struct symbolic_number *n_start;
2140
2141   /* Sources are different, cancel bswap if they are not memory location with
2142      the same base (array, structure, ...).  */
2143   if (gimple_assign_rhs1 (source_stmt1) != gimple_assign_rhs1 (source_stmt2))
2144     {
2145       int64_t inc;
2146       HOST_WIDE_INT start_sub, end_sub, end1, end2, end;
2147       struct symbolic_number *toinc_n_ptr, *n_end;
2148
2149       if (!n1->base_addr || !n2->base_addr
2150           || !operand_equal_p (n1->base_addr, n2->base_addr, 0))
2151         return NULL;
2152
2153       if (!n1->offset != !n2->offset
2154           || (n1->offset && !operand_equal_p (n1->offset, n2->offset, 0)))
2155         return NULL;
2156
2157       if (n1->bytepos < n2->bytepos)
2158         {
2159           n_start = n1;
2160           start_sub = n2->bytepos - n1->bytepos;
2161           source_stmt = source_stmt1;
2162         }
2163       else
2164         {
2165           n_start = n2;
2166           start_sub = n1->bytepos - n2->bytepos;
2167           source_stmt = source_stmt2;
2168         }
2169
2170       /* Find the highest address at which a load is performed and
2171          compute related info.  */
2172       end1 = n1->bytepos + (n1->range - 1);
2173       end2 = n2->bytepos + (n2->range - 1);
2174       if (end1 < end2)
2175         {
2176           end = end2;
2177           end_sub = end2 - end1;
2178         }
2179       else
2180         {
2181           end = end1;
2182           end_sub = end1 - end2;
2183         }
2184       n_end = (end2 > end1) ? n2 : n1;
2185
2186       /* Find symbolic number whose lsb is the most significant.  */
2187       if (BYTES_BIG_ENDIAN)
2188         toinc_n_ptr = (n_end == n1) ? n2 : n1;
2189       else
2190         toinc_n_ptr = (n_start == n1) ? n2 : n1;
2191
2192       n->range = end - n_start->bytepos + 1;
2193
2194       /* Check that the range of memory covered can be represented by
2195          a symbolic number.  */
2196       if (n->range > 64 / BITS_PER_MARKER)
2197         return NULL;
2198
2199       /* Reinterpret byte marks in symbolic number holding the value of
2200          bigger weight according to target endianness.  */
2201       inc = BYTES_BIG_ENDIAN ? end_sub : start_sub;
2202       size = TYPE_PRECISION (n1->type) / BITS_PER_UNIT;
2203       for (i = 0; i < size; i++, inc <<= BITS_PER_MARKER)
2204         {
2205           unsigned marker
2206             = (toinc_n_ptr->n >> (i * BITS_PER_MARKER)) & MARKER_MASK;
2207           if (marker && marker != MARKER_BYTE_UNKNOWN)
2208             toinc_n_ptr->n += inc;
2209         }
2210     }
2211   else
2212     {
2213       n->range = n1->range;
2214       n_start = n1;
2215       source_stmt = source_stmt1;
2216     }
2217
2218   if (!n1->alias_set
2219       || alias_ptr_types_compatible_p (n1->alias_set, n2->alias_set))
2220     n->alias_set = n1->alias_set;
2221   else
2222     n->alias_set = ptr_type_node;
2223   n->vuse = n_start->vuse;
2224   n->base_addr = n_start->base_addr;
2225   n->offset = n_start->offset;
2226   n->bytepos = n_start->bytepos;
2227   n->type = n_start->type;
2228   size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
2229
2230   for (i = 0, mask = MARKER_MASK; i < size; i++, mask <<= BITS_PER_MARKER)
2231     {
2232       uint64_t masked1, masked2;
2233
2234       masked1 = n1->n & mask;
2235       masked2 = n2->n & mask;
2236       if (masked1 && masked2 && masked1 != masked2)
2237         return NULL;
2238     }
2239   n->n = n1->n | n2->n;
2240
2241   return source_stmt;
2242 }
2243
2244 /* find_bswap_or_nop_1 invokes itself recursively with N and tries to perform
2245    the operation given by the rhs of STMT on the result.  If the operation
2246    could successfully be executed the function returns a gimple stmt whose
2247    rhs's first tree is the expression of the source operand and NULL
2248    otherwise.  */
2249
2250 static gimple
2251 find_bswap_or_nop_1 (gimple stmt, struct symbolic_number *n, int limit)
2252 {
2253   enum tree_code code;
2254   tree rhs1, rhs2 = NULL;
2255   gimple rhs1_stmt, rhs2_stmt, source_stmt1;
2256   enum gimple_rhs_class rhs_class;
2257
2258   if (!limit || !is_gimple_assign (stmt))
2259     return NULL;
2260
2261   rhs1 = gimple_assign_rhs1 (stmt);
2262
2263   if (find_bswap_or_nop_load (stmt, rhs1, n))
2264     return stmt;
2265
2266   if (TREE_CODE (rhs1) != SSA_NAME)
2267     return NULL;
2268
2269   code = gimple_assign_rhs_code (stmt);
2270   rhs_class = gimple_assign_rhs_class (stmt);
2271   rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
2272
2273   if (rhs_class == GIMPLE_BINARY_RHS)
2274     rhs2 = gimple_assign_rhs2 (stmt);
2275
2276   /* Handle unary rhs and binary rhs with integer constants as second
2277      operand.  */
2278
2279   if (rhs_class == GIMPLE_UNARY_RHS
2280       || (rhs_class == GIMPLE_BINARY_RHS
2281           && TREE_CODE (rhs2) == INTEGER_CST))
2282     {
2283       if (code != BIT_AND_EXPR
2284           && code != LSHIFT_EXPR
2285           && code != RSHIFT_EXPR
2286           && code != LROTATE_EXPR
2287           && code != RROTATE_EXPR
2288           && !CONVERT_EXPR_CODE_P (code))
2289         return NULL;
2290
2291       source_stmt1 = find_bswap_or_nop_1 (rhs1_stmt, n, limit - 1);
2292
2293       /* If find_bswap_or_nop_1 returned NULL, STMT is a leaf node and
2294          we have to initialize the symbolic number.  */
2295       if (!source_stmt1)
2296         {
2297           if (gimple_assign_load_p (stmt)
2298               || !init_symbolic_number (n, rhs1))
2299             return NULL;
2300           source_stmt1 = stmt;
2301         }
2302
2303       switch (code)
2304         {
2305         case BIT_AND_EXPR:
2306           {
2307             int i, size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
2308             uint64_t val = int_cst_value (rhs2), mask = 0;
2309             uint64_t tmp = (1 << BITS_PER_UNIT) - 1;
2310
2311             /* Only constants masking full bytes are allowed.  */
2312             for (i = 0; i < size; i++, tmp <<= BITS_PER_UNIT)
2313               if ((val & tmp) != 0 && (val & tmp) != tmp)
2314                 return NULL;
2315               else if (val & tmp)
2316                 mask |= (uint64_t) MARKER_MASK << (i * BITS_PER_MARKER);
2317
2318             n->n &= mask;
2319           }
2320           break;
2321         case LSHIFT_EXPR:
2322         case RSHIFT_EXPR:
2323         case LROTATE_EXPR:
2324         case RROTATE_EXPR:
2325           if (!do_shift_rotate (code, n, (int) TREE_INT_CST_LOW (rhs2)))
2326             return NULL;
2327           break;
2328         CASE_CONVERT:
2329           {
2330             int i, type_size, old_type_size;
2331             tree type;
2332
2333             type = gimple_expr_type (stmt);
2334             type_size = TYPE_PRECISION (type);
2335             if (type_size % BITS_PER_UNIT != 0)
2336               return NULL;
2337             type_size /= BITS_PER_UNIT;
2338             if (type_size > 64 / BITS_PER_MARKER)
2339               return NULL;
2340
2341             /* Sign extension: result is dependent on the value.  */
2342             old_type_size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
2343             if (!TYPE_UNSIGNED (n->type) && type_size > old_type_size
2344                 && HEAD_MARKER (n->n, old_type_size))
2345               for (i = 0; i < type_size - old_type_size; i++)
2346                 n->n |= (uint64_t) MARKER_BYTE_UNKNOWN
2347                         << ((type_size - 1 - i) * BITS_PER_MARKER);
2348
2349             if (type_size < 64 / BITS_PER_MARKER)
2350               {
2351                 /* If STMT casts to a smaller type mask out the bits not
2352                    belonging to the target type.  */
2353                 n->n &= ((uint64_t) 1 << (type_size * BITS_PER_MARKER)) - 1;
2354               }
2355             n->type = type;
2356             if (!n->base_addr)
2357               n->range = type_size;
2358           }
2359           break;
2360         default:
2361           return NULL;
2362         };
2363       return verify_symbolic_number_p (n, stmt) ? source_stmt1 : NULL;
2364     }
2365
2366   /* Handle binary rhs.  */
2367
2368   if (rhs_class == GIMPLE_BINARY_RHS)
2369     {
2370       struct symbolic_number n1, n2;
2371       gimple source_stmt, source_stmt2;
2372
2373       if (code != BIT_IOR_EXPR)
2374         return NULL;
2375
2376       if (TREE_CODE (rhs2) != SSA_NAME)
2377         return NULL;
2378
2379       rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
2380
2381       switch (code)
2382         {
2383         case BIT_IOR_EXPR:
2384           source_stmt1 = find_bswap_or_nop_1 (rhs1_stmt, &n1, limit - 1);
2385
2386           if (!source_stmt1)
2387             return NULL;
2388
2389           source_stmt2 = find_bswap_or_nop_1 (rhs2_stmt, &n2, limit - 1);
2390
2391           if (!source_stmt2)
2392             return NULL;
2393
2394           if (TYPE_PRECISION (n1.type) != TYPE_PRECISION (n2.type))
2395             return NULL;
2396
2397           if (!n1.vuse != !n2.vuse
2398               || (n1.vuse && !operand_equal_p (n1.vuse, n2.vuse, 0)))
2399             return NULL;
2400
2401           source_stmt
2402             = perform_symbolic_merge (source_stmt1, &n1, source_stmt2, &n2, n);
2403
2404           if (!source_stmt)
2405             return NULL;
2406
2407           if (!verify_symbolic_number_p (n, stmt))
2408             return NULL;
2409
2410           break;
2411         default:
2412           return NULL;
2413         }
2414       return source_stmt;
2415     }
2416   return NULL;
2417 }
2418
2419 /* Check if STMT completes a bswap implementation or a read in a given
2420    endianness consisting of ORs, SHIFTs and ANDs and sets *BSWAP
2421    accordingly.  It also sets N to represent the kind of operations
2422    performed: size of the resulting expression and whether it works on
2423    a memory source, and if so alias-set and vuse.  At last, the
2424    function returns a stmt whose rhs's first tree is the source
2425    expression.  */
2426
2427 static gimple
2428 find_bswap_or_nop (gimple stmt, struct symbolic_number *n, bool *bswap)
2429 {
2430 /* The number which the find_bswap_or_nop_1 result should match in order
2431    to have a full byte swap.  The number is shifted to the right
2432    according to the size of the symbolic number before using it.  */
2433   uint64_t cmpxchg = CMPXCHG;
2434   uint64_t cmpnop = CMPNOP;
2435
2436   gimple source_stmt;
2437   int limit;
2438
2439   /* The last parameter determines the depth search limit.  It usually
2440      correlates directly to the number n of bytes to be touched.  We
2441      increase that number by log2(n) + 1 here in order to also
2442      cover signed -> unsigned conversions of the src operand as can be seen
2443      in libgcc, and for initial shift/and operation of the src operand.  */
2444   limit = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (gimple_expr_type (stmt)));
2445   limit += 1 + (int) ceil_log2 ((unsigned HOST_WIDE_INT) limit);
2446   source_stmt = find_bswap_or_nop_1 (stmt, n, limit);
2447
2448   if (!source_stmt)
2449     return NULL;
2450
2451   /* Find real size of result (highest non-zero byte).  */
2452   if (n->base_addr)
2453     {
2454       int rsize;
2455       uint64_t tmpn;
2456
2457       for (tmpn = n->n, rsize = 0; tmpn; tmpn >>= BITS_PER_MARKER, rsize++);
2458       n->range = rsize;
2459     }
2460
2461   /* Zero out the extra bits of N and CMP*.  */
2462   if (n->range < (int) sizeof (int64_t))
2463     {
2464       uint64_t mask;
2465
2466       mask = ((uint64_t) 1 << (n->range * BITS_PER_MARKER)) - 1;
2467       cmpxchg >>= (64 / BITS_PER_MARKER - n->range) * BITS_PER_MARKER;
2468       cmpnop &= mask;
2469     }
2470
2471   /* A complete byte swap should make the symbolic number to start with
2472      the largest digit in the highest order byte. Unchanged symbolic
2473      number indicates a read with same endianness as target architecture.  */
2474   if (n->n == cmpnop)
2475     *bswap = false;
2476   else if (n->n == cmpxchg)
2477     *bswap = true;
2478   else
2479     return NULL;
2480
2481   /* Useless bit manipulation performed by code.  */
2482   if (!n->base_addr && n->n == cmpnop)
2483     return NULL;
2484
2485   n->range *= BITS_PER_UNIT;
2486   return source_stmt;
2487 }
2488
2489 namespace {
2490
2491 const pass_data pass_data_optimize_bswap =
2492 {
2493   GIMPLE_PASS, /* type */
2494   "bswap", /* name */
2495   OPTGROUP_NONE, /* optinfo_flags */
2496   TV_NONE, /* tv_id */
2497   PROP_ssa, /* properties_required */
2498   0, /* properties_provided */
2499   0, /* properties_destroyed */
2500   0, /* todo_flags_start */
2501   0, /* todo_flags_finish */
2502 };
2503
2504 class pass_optimize_bswap : public gimple_opt_pass
2505 {
2506 public:
2507   pass_optimize_bswap (gcc::context *ctxt)
2508     : gimple_opt_pass (pass_data_optimize_bswap, ctxt)
2509   {}
2510
2511   /* opt_pass methods: */
2512   virtual bool gate (function *)
2513     {
2514       return flag_expensive_optimizations && optimize;
2515     }
2516
2517   virtual unsigned int execute (function *);
2518
2519 }; // class pass_optimize_bswap
2520
2521 /* Perform the bswap optimization: replace the expression computed in the rhs
2522    of CUR_STMT by an equivalent bswap, load or load + bswap expression.
2523    Which of these alternatives replace the rhs is given by N->base_addr (non
2524    null if a load is needed) and BSWAP.  The type, VUSE and set-alias of the
2525    load to perform are also given in N while the builtin bswap invoke is given
2526    in FNDEL.  Finally, if a load is involved, SRC_STMT refers to one of the
2527    load statements involved to construct the rhs in CUR_STMT and N->range gives
2528    the size of the rhs expression for maintaining some statistics.
2529
2530    Note that if the replacement involve a load, CUR_STMT is moved just after
2531    SRC_STMT to do the load with the same VUSE which can lead to CUR_STMT
2532    changing of basic block.  */
2533
2534 static bool
2535 bswap_replace (gimple cur_stmt, gimple src_stmt, tree fndecl, tree bswap_type,
2536                tree load_type, struct symbolic_number *n, bool bswap)
2537 {
2538   gimple_stmt_iterator gsi;
2539   tree src, tmp, tgt;
2540   gimple bswap_stmt;
2541
2542   gsi = gsi_for_stmt (cur_stmt);
2543   src = gimple_assign_rhs1 (src_stmt);
2544   tgt = gimple_assign_lhs (cur_stmt);
2545
2546   /* Need to load the value from memory first.  */
2547   if (n->base_addr)
2548     {
2549       gimple_stmt_iterator gsi_ins = gsi_for_stmt (src_stmt);
2550       tree addr_expr, addr_tmp, val_expr, val_tmp;
2551       tree load_offset_ptr, aligned_load_type;
2552       gimple addr_stmt, load_stmt;
2553       unsigned align;
2554       HOST_WIDE_INT load_offset = 0;
2555
2556       align = get_object_alignment (src);
2557       /* If the new access is smaller than the original one, we need
2558          to perform big endian adjustment.  */
2559       if (BYTES_BIG_ENDIAN)
2560         {
2561           HOST_WIDE_INT bitsize, bitpos;
2562           machine_mode mode;
2563           int unsignedp, volatilep;
2564           tree offset;
2565
2566           get_inner_reference (src, &bitsize, &bitpos, &offset, &mode,
2567                                &unsignedp, &volatilep, false);
2568           if (n->range < (unsigned HOST_WIDE_INT) bitsize)
2569             {
2570               load_offset = (bitsize - n->range) / BITS_PER_UNIT;
2571               unsigned HOST_WIDE_INT l
2572                 = (load_offset * BITS_PER_UNIT) & (align - 1);
2573               if (l)
2574                 align = l & -l;
2575             }
2576         }
2577
2578       if (bswap
2579           && align < GET_MODE_ALIGNMENT (TYPE_MODE (load_type))
2580           && SLOW_UNALIGNED_ACCESS (TYPE_MODE (load_type), align))
2581         return false;
2582
2583       /* Move cur_stmt just before  one of the load of the original
2584          to ensure it has the same VUSE.  See PR61517 for what could
2585          go wrong.  */
2586       gsi_move_before (&gsi, &gsi_ins);
2587       gsi = gsi_for_stmt (cur_stmt);
2588
2589       /* Compute address to load from and cast according to the size
2590          of the load.  */
2591       addr_expr = build_fold_addr_expr (unshare_expr (src));
2592       if (is_gimple_mem_ref_addr (addr_expr))
2593         addr_tmp = addr_expr;
2594       else
2595         {
2596           addr_tmp = make_temp_ssa_name (TREE_TYPE (addr_expr), NULL,
2597                                          "load_src");
2598           addr_stmt = gimple_build_assign (addr_tmp, addr_expr);
2599           gsi_insert_before (&gsi, addr_stmt, GSI_SAME_STMT);
2600         }
2601
2602       /* Perform the load.  */
2603       aligned_load_type = load_type;
2604       if (align < TYPE_ALIGN (load_type))
2605         aligned_load_type = build_aligned_type (load_type, align);
2606       load_offset_ptr = build_int_cst (n->alias_set, load_offset);
2607       val_expr = fold_build2 (MEM_REF, aligned_load_type, addr_tmp,
2608                               load_offset_ptr);
2609
2610       if (!bswap)
2611         {
2612           if (n->range == 16)
2613             nop_stats.found_16bit++;
2614           else if (n->range == 32)
2615             nop_stats.found_32bit++;
2616           else
2617             {
2618               gcc_assert (n->range == 64);
2619               nop_stats.found_64bit++;
2620             }
2621
2622           /* Convert the result of load if necessary.  */
2623           if (!useless_type_conversion_p (TREE_TYPE (tgt), load_type))
2624             {
2625               val_tmp = make_temp_ssa_name (aligned_load_type, NULL,
2626                                             "load_dst");
2627               load_stmt = gimple_build_assign (val_tmp, val_expr);
2628               gimple_set_vuse (load_stmt, n->vuse);
2629               gsi_insert_before (&gsi, load_stmt, GSI_SAME_STMT);
2630               gimple_assign_set_rhs_with_ops (&gsi, NOP_EXPR, val_tmp);
2631             }
2632           else
2633             {
2634               gimple_assign_set_rhs_with_ops (&gsi, MEM_REF, val_expr);
2635               gimple_set_vuse (cur_stmt, n->vuse);
2636             }
2637           update_stmt (cur_stmt);
2638
2639           if (dump_file)
2640             {
2641               fprintf (dump_file,
2642                        "%d bit load in target endianness found at: ",
2643                        (int) n->range);
2644               print_gimple_stmt (dump_file, cur_stmt, 0, 0);
2645             }
2646           return true;
2647         }
2648       else
2649         {
2650           val_tmp = make_temp_ssa_name (aligned_load_type, NULL, "load_dst");
2651           load_stmt = gimple_build_assign (val_tmp, val_expr);
2652           gimple_set_vuse (load_stmt, n->vuse);
2653           gsi_insert_before (&gsi, load_stmt, GSI_SAME_STMT);
2654         }
2655       src = val_tmp;
2656     }
2657
2658   if (n->range == 16)
2659     bswap_stats.found_16bit++;
2660   else if (n->range == 32)
2661     bswap_stats.found_32bit++;
2662   else
2663     {
2664       gcc_assert (n->range == 64);
2665       bswap_stats.found_64bit++;
2666     }
2667
2668   tmp = src;
2669
2670   /* Convert the src expression if necessary.  */
2671   if (!useless_type_conversion_p (TREE_TYPE (tmp), bswap_type))
2672     {
2673       gimple convert_stmt;
2674
2675       tmp = make_temp_ssa_name (bswap_type, NULL, "bswapsrc");
2676       convert_stmt = gimple_build_assign (tmp, NOP_EXPR, src);
2677       gsi_insert_before (&gsi, convert_stmt, GSI_SAME_STMT);
2678     }
2679
2680   /* Canonical form for 16 bit bswap is a rotate expression.  Only 16bit values
2681      are considered as rotation of 2N bit values by N bits is generally not
2682      equivalent to a bswap.  Consider for instance 0x01020304 r>> 16 which
2683      gives 0x03040102 while a bswap for that value is 0x04030201.  */
2684   if (bswap && n->range == 16)
2685     {
2686       tree count = build_int_cst (NULL, BITS_PER_UNIT);
2687       src = fold_build2 (LROTATE_EXPR, bswap_type, tmp, count);
2688       bswap_stmt = gimple_build_assign (NULL, src);
2689     }
2690   else
2691     bswap_stmt = gimple_build_call (fndecl, 1, tmp);
2692
2693   tmp = tgt;
2694
2695   /* Convert the result if necessary.  */
2696   if (!useless_type_conversion_p (TREE_TYPE (tgt), bswap_type))
2697     {
2698       gimple convert_stmt;
2699
2700       tmp = make_temp_ssa_name (bswap_type, NULL, "bswapdst");
2701       convert_stmt = gimple_build_assign (tgt, NOP_EXPR, tmp);
2702       gsi_insert_after (&gsi, convert_stmt, GSI_SAME_STMT);
2703     }
2704
2705   gimple_set_lhs (bswap_stmt, tmp);
2706
2707   if (dump_file)
2708     {
2709       fprintf (dump_file, "%d bit bswap implementation found at: ",
2710                (int) n->range);
2711       print_gimple_stmt (dump_file, cur_stmt, 0, 0);
2712     }
2713
2714   gsi_insert_after (&gsi, bswap_stmt, GSI_SAME_STMT);
2715   gsi_remove (&gsi, true);
2716   return true;
2717 }
2718
2719 /* Find manual byte swap implementations as well as load in a given
2720    endianness. Byte swaps are turned into a bswap builtin invokation
2721    while endian loads are converted to bswap builtin invokation or
2722    simple load according to the target endianness.  */
2723
2724 unsigned int
2725 pass_optimize_bswap::execute (function *fun)
2726 {
2727   basic_block bb;
2728   bool bswap32_p, bswap64_p;
2729   bool changed = false;
2730   tree bswap32_type = NULL_TREE, bswap64_type = NULL_TREE;
2731
2732   if (BITS_PER_UNIT != 8)
2733     return 0;
2734
2735   bswap32_p = (builtin_decl_explicit_p (BUILT_IN_BSWAP32)
2736                && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing);
2737   bswap64_p = (builtin_decl_explicit_p (BUILT_IN_BSWAP64)
2738                && (optab_handler (bswap_optab, DImode) != CODE_FOR_nothing
2739                    || (bswap32_p && word_mode == SImode)));
2740
2741   /* Determine the argument type of the builtins.  The code later on
2742      assumes that the return and argument type are the same.  */
2743   if (bswap32_p)
2744     {
2745       tree fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
2746       bswap32_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
2747     }
2748
2749   if (bswap64_p)
2750     {
2751       tree fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
2752       bswap64_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
2753     }
2754
2755   memset (&nop_stats, 0, sizeof (nop_stats));
2756   memset (&bswap_stats, 0, sizeof (bswap_stats));
2757
2758   FOR_EACH_BB_FN (bb, fun)
2759     {
2760       gimple_stmt_iterator gsi;
2761
2762       /* We do a reverse scan for bswap patterns to make sure we get the
2763          widest match. As bswap pattern matching doesn't handle previously
2764          inserted smaller bswap replacements as sub-patterns, the wider
2765          variant wouldn't be detected.  */
2766       for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi);)
2767         {
2768           gimple src_stmt, cur_stmt = gsi_stmt (gsi);
2769           tree fndecl = NULL_TREE, bswap_type = NULL_TREE, load_type;
2770           enum tree_code code;
2771           struct symbolic_number n;
2772           bool bswap;
2773
2774           /* This gsi_prev (&gsi) is not part of the for loop because cur_stmt
2775              might be moved to a different basic block by bswap_replace and gsi
2776              must not points to it if that's the case.  Moving the gsi_prev
2777              there make sure that gsi points to the statement previous to
2778              cur_stmt while still making sure that all statements are
2779              considered in this basic block.  */
2780           gsi_prev (&gsi);
2781
2782           if (!is_gimple_assign (cur_stmt))
2783             continue;
2784
2785           code = gimple_assign_rhs_code (cur_stmt);
2786           switch (code)
2787             {
2788             case LROTATE_EXPR:
2789             case RROTATE_EXPR:
2790               if (!tree_fits_uhwi_p (gimple_assign_rhs2 (cur_stmt))
2791                   || tree_to_uhwi (gimple_assign_rhs2 (cur_stmt))
2792                      % BITS_PER_UNIT)
2793                 continue;
2794               /* Fall through.  */
2795             case BIT_IOR_EXPR:
2796               break;
2797             default:
2798               continue;
2799             }
2800
2801           src_stmt = find_bswap_or_nop (cur_stmt, &n, &bswap);
2802
2803           if (!src_stmt)
2804             continue;
2805
2806           switch (n.range)
2807             {
2808             case 16:
2809               /* Already in canonical form, nothing to do.  */
2810               if (code == LROTATE_EXPR || code == RROTATE_EXPR)
2811                 continue;
2812               load_type = bswap_type = uint16_type_node;
2813               break;
2814             case 32:
2815               load_type = uint32_type_node;
2816               if (bswap32_p)
2817                 {
2818                   fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
2819                   bswap_type = bswap32_type;
2820                 }
2821               break;
2822             case 64:
2823               load_type = uint64_type_node;
2824               if (bswap64_p)
2825                 {
2826                   fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
2827                   bswap_type = bswap64_type;
2828                 }
2829               break;
2830             default:
2831               continue;
2832             }
2833
2834           if (bswap && !fndecl && n.range != 16)
2835             continue;
2836
2837           if (bswap_replace (cur_stmt, src_stmt, fndecl, bswap_type, load_type,
2838                              &n, bswap))
2839             changed = true;
2840         }
2841     }
2842
2843   statistics_counter_event (fun, "16-bit nop implementations found",
2844                             nop_stats.found_16bit);
2845   statistics_counter_event (fun, "32-bit nop implementations found",
2846                             nop_stats.found_32bit);
2847   statistics_counter_event (fun, "64-bit nop implementations found",
2848                             nop_stats.found_64bit);
2849   statistics_counter_event (fun, "16-bit bswap implementations found",
2850                             bswap_stats.found_16bit);
2851   statistics_counter_event (fun, "32-bit bswap implementations found",
2852                             bswap_stats.found_32bit);
2853   statistics_counter_event (fun, "64-bit bswap implementations found",
2854                             bswap_stats.found_64bit);
2855
2856   return (changed ? TODO_update_ssa : 0);
2857 }
2858
2859 } // anon namespace
2860
2861 gimple_opt_pass *
2862 make_pass_optimize_bswap (gcc::context *ctxt)
2863 {
2864   return new pass_optimize_bswap (ctxt);
2865 }
2866
2867 /* Return true if stmt is a type conversion operation that can be stripped
2868    when used in a widening multiply operation.  */
2869 static bool
2870 widening_mult_conversion_strippable_p (tree result_type, gimple stmt)
2871 {
2872   enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
2873
2874   if (TREE_CODE (result_type) == INTEGER_TYPE)
2875     {
2876       tree op_type;
2877       tree inner_op_type;
2878
2879       if (!CONVERT_EXPR_CODE_P (rhs_code))
2880         return false;
2881
2882       op_type = TREE_TYPE (gimple_assign_lhs (stmt));
2883
2884       /* If the type of OP has the same precision as the result, then
2885          we can strip this conversion.  The multiply operation will be
2886          selected to create the correct extension as a by-product.  */
2887       if (TYPE_PRECISION (result_type) == TYPE_PRECISION (op_type))
2888         return true;
2889
2890       /* We can also strip a conversion if it preserves the signed-ness of
2891          the operation and doesn't narrow the range.  */
2892       inner_op_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
2893
2894       /* If the inner-most type is unsigned, then we can strip any
2895          intermediate widening operation.  If it's signed, then the
2896          intermediate widening operation must also be signed.  */
2897       if ((TYPE_UNSIGNED (inner_op_type)
2898            || TYPE_UNSIGNED (op_type) == TYPE_UNSIGNED (inner_op_type))
2899           && TYPE_PRECISION (op_type) > TYPE_PRECISION (inner_op_type))
2900         return true;
2901
2902       return false;
2903     }
2904
2905   return rhs_code == FIXED_CONVERT_EXPR;
2906 }
2907
2908 /* Return true if RHS is a suitable operand for a widening multiplication,
2909    assuming a target type of TYPE.
2910    There are two cases:
2911
2912      - RHS makes some value at least twice as wide.  Store that value
2913        in *NEW_RHS_OUT if so, and store its type in *TYPE_OUT.
2914
2915      - RHS is an integer constant.  Store that value in *NEW_RHS_OUT if so,
2916        but leave *TYPE_OUT untouched.  */
2917
2918 static bool
2919 is_widening_mult_rhs_p (tree type, tree rhs, tree *type_out,
2920                         tree *new_rhs_out)
2921 {
2922   gimple stmt;
2923   tree type1, rhs1;
2924
2925   if (TREE_CODE (rhs) == SSA_NAME)
2926     {
2927       stmt = SSA_NAME_DEF_STMT (rhs);
2928       if (is_gimple_assign (stmt))
2929         {
2930           if (! widening_mult_conversion_strippable_p (type, stmt))
2931             rhs1 = rhs;
2932           else
2933             {
2934               rhs1 = gimple_assign_rhs1 (stmt);
2935
2936               if (TREE_CODE (rhs1) == INTEGER_CST)
2937                 {
2938                   *new_rhs_out = rhs1;
2939                   *type_out = NULL;
2940                   return true;
2941                 }
2942             }
2943         }
2944       else
2945         rhs1 = rhs;
2946
2947       type1 = TREE_TYPE (rhs1);
2948
2949       if (TREE_CODE (type1) != TREE_CODE (type)
2950           || TYPE_PRECISION (type1) * 2 > TYPE_PRECISION (type))
2951         return false;
2952
2953       *new_rhs_out = rhs1;
2954       *type_out = type1;
2955       return true;
2956     }
2957
2958   if (TREE_CODE (rhs) == INTEGER_CST)
2959     {
2960       *new_rhs_out = rhs;
2961       *type_out = NULL;
2962       return true;
2963     }
2964
2965   return false;
2966 }
2967
2968 /* Return true if STMT performs a widening multiplication, assuming the
2969    output type is TYPE.  If so, store the unwidened types of the operands
2970    in *TYPE1_OUT and *TYPE2_OUT respectively.  Also fill *RHS1_OUT and
2971    *RHS2_OUT such that converting those operands to types *TYPE1_OUT
2972    and *TYPE2_OUT would give the operands of the multiplication.  */
2973
2974 static bool
2975 is_widening_mult_p (gimple stmt,
2976                     tree *type1_out, tree *rhs1_out,
2977                     tree *type2_out, tree *rhs2_out)
2978 {
2979   tree type = TREE_TYPE (gimple_assign_lhs (stmt));
2980
2981   if (TREE_CODE (type) != INTEGER_TYPE
2982       && TREE_CODE (type) != FIXED_POINT_TYPE)
2983     return false;
2984
2985   if (!is_widening_mult_rhs_p (type, gimple_assign_rhs1 (stmt), type1_out,
2986                                rhs1_out))
2987     return false;
2988
2989   if (!is_widening_mult_rhs_p (type, gimple_assign_rhs2 (stmt), type2_out,
2990                                rhs2_out))
2991     return false;
2992
2993   if (*type1_out == NULL)
2994     {
2995       if (*type2_out == NULL || !int_fits_type_p (*rhs1_out, *type2_out))
2996         return false;
2997       *type1_out = *type2_out;
2998     }
2999
3000   if (*type2_out == NULL)
3001     {
3002       if (!int_fits_type_p (*rhs2_out, *type1_out))
3003         return false;
3004       *type2_out = *type1_out;
3005     }
3006
3007   /* Ensure that the larger of the two operands comes first. */
3008   if (TYPE_PRECISION (*type1_out) < TYPE_PRECISION (*type2_out))
3009     {
3010       std::swap (*type1_out, *type2_out);
3011       std::swap (*rhs1_out, *rhs2_out);
3012     }
3013
3014   return true;
3015 }
3016
3017 /* Process a single gimple statement STMT, which has a MULT_EXPR as
3018    its rhs, and try to convert it into a WIDEN_MULT_EXPR.  The return
3019    value is true iff we converted the statement.  */
3020
3021 static bool
3022 convert_mult_to_widen (gimple stmt, gimple_stmt_iterator *gsi)
3023 {
3024   tree lhs, rhs1, rhs2, type, type1, type2;
3025   enum insn_code handler;
3026   machine_mode to_mode, from_mode, actual_mode;
3027   optab op;
3028   int actual_precision;
3029   location_t loc = gimple_location (stmt);
3030   bool from_unsigned1, from_unsigned2;
3031
3032   lhs = gimple_assign_lhs (stmt);
3033   type = TREE_TYPE (lhs);
3034   if (TREE_CODE (type) != INTEGER_TYPE)
3035     return false;
3036
3037   if (!is_widening_mult_p (stmt, &type1, &rhs1, &type2, &rhs2))
3038     return false;
3039
3040   to_mode = TYPE_MODE (type);
3041   from_mode = TYPE_MODE (type1);
3042   from_unsigned1 = TYPE_UNSIGNED (type1);
3043   from_unsigned2 = TYPE_UNSIGNED (type2);
3044
3045   if (from_unsigned1 && from_unsigned2)
3046     op = umul_widen_optab;
3047   else if (!from_unsigned1 && !from_unsigned2)
3048     op = smul_widen_optab;
3049   else
3050     op = usmul_widen_optab;
3051
3052   handler = find_widening_optab_handler_and_mode (op, to_mode, from_mode,
3053                                                   0, &actual_mode);
3054
3055   if (handler == CODE_FOR_nothing)
3056     {
3057       if (op != smul_widen_optab)
3058         {
3059           /* We can use a signed multiply with unsigned types as long as
3060              there is a wider mode to use, or it is the smaller of the two
3061              types that is unsigned.  Note that type1 >= type2, always.  */
3062           if ((TYPE_UNSIGNED (type1)
3063                && TYPE_PRECISION (type1) == GET_MODE_PRECISION (from_mode))
3064               || (TYPE_UNSIGNED (type2)
3065                   && TYPE_PRECISION (type2) == GET_MODE_PRECISION (from_mode)))
3066             {
3067               from_mode = GET_MODE_WIDER_MODE (from_mode);
3068               if (GET_MODE_SIZE (to_mode) <= GET_MODE_SIZE (from_mode))
3069                 return false;
3070             }
3071
3072           op = smul_widen_optab;
3073           handler = find_widening_optab_handler_and_mode (op, to_mode,
3074                                                           from_mode, 0,
3075                                                           &actual_mode);
3076
3077           if (handler == CODE_FOR_nothing)
3078             return false;
3079
3080           from_unsigned1 = from_unsigned2 = false;
3081         }
3082       else
3083         return false;
3084     }
3085
3086   /* Ensure that the inputs to the handler are in the correct precison
3087      for the opcode.  This will be the full mode size.  */
3088   actual_precision = GET_MODE_PRECISION (actual_mode);
3089   if (2 * actual_precision > TYPE_PRECISION (type))
3090     return false;
3091   if (actual_precision != TYPE_PRECISION (type1)
3092       || from_unsigned1 != TYPE_UNSIGNED (type1))
3093     rhs1 = build_and_insert_cast (gsi, loc,
3094                                   build_nonstandard_integer_type
3095                                     (actual_precision, from_unsigned1), rhs1);
3096   if (actual_precision != TYPE_PRECISION (type2)
3097       || from_unsigned2 != TYPE_UNSIGNED (type2))
3098     rhs2 = build_and_insert_cast (gsi, loc,
3099                                   build_nonstandard_integer_type
3100                                     (actual_precision, from_unsigned2), rhs2);
3101
3102   /* Handle constants.  */
3103   if (TREE_CODE (rhs1) == INTEGER_CST)
3104     rhs1 = fold_convert (type1, rhs1);
3105   if (TREE_CODE (rhs2) == INTEGER_CST)
3106     rhs2 = fold_convert (type2, rhs2);
3107
3108   gimple_assign_set_rhs1 (stmt, rhs1);
3109   gimple_assign_set_rhs2 (stmt, rhs2);
3110   gimple_assign_set_rhs_code (stmt, WIDEN_MULT_EXPR);
3111   update_stmt (stmt);
3112   widen_mul_stats.widen_mults_inserted++;
3113   return true;
3114 }
3115
3116 /* Process a single gimple statement STMT, which is found at the
3117    iterator GSI and has a either a PLUS_EXPR or a MINUS_EXPR as its
3118    rhs (given by CODE), and try to convert it into a
3119    WIDEN_MULT_PLUS_EXPR or a WIDEN_MULT_MINUS_EXPR.  The return value
3120    is true iff we converted the statement.  */
3121
3122 static bool
3123 convert_plusminus_to_widen (gimple_stmt_iterator *gsi, gimple stmt,
3124                             enum tree_code code)
3125 {
3126   gimple rhs1_stmt = NULL, rhs2_stmt = NULL;
3127   gimple conv1_stmt = NULL, conv2_stmt = NULL, conv_stmt;
3128   tree type, type1, type2, optype;
3129   tree lhs, rhs1, rhs2, mult_rhs1, mult_rhs2, add_rhs;
3130   enum tree_code rhs1_code = ERROR_MARK, rhs2_code = ERROR_MARK;
3131   optab this_optab;
3132   enum tree_code wmult_code;
3133   enum insn_code handler;
3134   machine_mode to_mode, from_mode, actual_mode;
3135   location_t loc = gimple_location (stmt);
3136   int actual_precision;
3137   bool from_unsigned1, from_unsigned2;
3138
3139   lhs = gimple_assign_lhs (stmt);
3140   type = TREE_TYPE (lhs);
3141   if (TREE_CODE (type) != INTEGER_TYPE
3142       && TREE_CODE (type) != FIXED_POINT_TYPE)
3143     return false;
3144
3145   if (code == MINUS_EXPR)
3146     wmult_code = WIDEN_MULT_MINUS_EXPR;
3147   else
3148     wmult_code = WIDEN_MULT_PLUS_EXPR;
3149
3150   rhs1 = gimple_assign_rhs1 (stmt);
3151   rhs2 = gimple_assign_rhs2 (stmt);
3152
3153   if (TREE_CODE (rhs1) == SSA_NAME)
3154     {
3155       rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
3156       if (is_gimple_assign (rhs1_stmt))
3157         rhs1_code = gimple_assign_rhs_code (rhs1_stmt);
3158     }
3159
3160   if (TREE_CODE (rhs2) == SSA_NAME)
3161     {
3162       rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
3163       if (is_gimple_assign (rhs2_stmt))
3164         rhs2_code = gimple_assign_rhs_code (rhs2_stmt);
3165     }
3166
3167   /* Allow for one conversion statement between the multiply
3168      and addition/subtraction statement.  If there are more than
3169      one conversions then we assume they would invalidate this
3170      transformation.  If that's not the case then they should have
3171      been folded before now.  */
3172   if (CONVERT_EXPR_CODE_P (rhs1_code))
3173     {
3174       conv1_stmt = rhs1_stmt;
3175       rhs1 = gimple_assign_rhs1 (rhs1_stmt);
3176       if (TREE_CODE (rhs1) == SSA_NAME)
3177         {
3178           rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
3179           if (is_gimple_assign (rhs1_stmt))
3180             rhs1_code = gimple_assign_rhs_code (rhs1_stmt);
3181         }
3182       else
3183         return false;
3184     }
3185   if (CONVERT_EXPR_CODE_P (rhs2_code))
3186     {
3187       conv2_stmt = rhs2_stmt;
3188       rhs2 = gimple_assign_rhs1 (rhs2_stmt);
3189       if (TREE_CODE (rhs2) == SSA_NAME)
3190         {
3191           rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
3192           if (is_gimple_assign (rhs2_stmt))
3193             rhs2_code = gimple_assign_rhs_code (rhs2_stmt);
3194         }
3195       else
3196         return false;
3197     }
3198
3199   /* If code is WIDEN_MULT_EXPR then it would seem unnecessary to call
3200      is_widening_mult_p, but we still need the rhs returns.
3201
3202      It might also appear that it would be sufficient to use the existing
3203      operands of the widening multiply, but that would limit the choice of
3204      multiply-and-accumulate instructions.
3205
3206      If the widened-multiplication result has more than one uses, it is
3207      probably wiser not to do the conversion.  */
3208   if (code == PLUS_EXPR
3209       && (rhs1_code == MULT_EXPR || rhs1_code == WIDEN_MULT_EXPR))
3210     {
3211       if (!has_single_use (rhs1)
3212           || !is_widening_mult_p (rhs1_stmt, &type1, &mult_rhs1,
3213                                   &type2, &mult_rhs2))
3214         return false;
3215       add_rhs = rhs2;
3216       conv_stmt = conv1_stmt;
3217     }
3218   else if (rhs2_code == MULT_EXPR || rhs2_code == WIDEN_MULT_EXPR)
3219     {
3220       if (!has_single_use (rhs2)
3221           || !is_widening_mult_p (rhs2_stmt, &type1, &mult_rhs1,
3222                                   &type2, &mult_rhs2))
3223         return false;
3224       add_rhs = rhs1;
3225       conv_stmt = conv2_stmt;
3226     }
3227   else
3228     return false;
3229
3230   to_mode = TYPE_MODE (type);
3231   from_mode = TYPE_MODE (type1);
3232   from_unsigned1 = TYPE_UNSIGNED (type1);
3233   from_unsigned2 = TYPE_UNSIGNED (type2);
3234   optype = type1;
3235
3236   /* There's no such thing as a mixed sign madd yet, so use a wider mode.  */
3237   if (from_unsigned1 != from_unsigned2)
3238     {
3239       if (!INTEGRAL_TYPE_P (type))
3240         return false;
3241       /* We can use a signed multiply with unsigned types as long as
3242          there is a wider mode to use, or it is the smaller of the two
3243          types that is unsigned.  Note that type1 >= type2, always.  */
3244       if ((from_unsigned1
3245            && TYPE_PRECISION (type1) == GET_MODE_PRECISION (from_mode))
3246           || (from_unsigned2
3247               && TYPE_PRECISION (type2) == GET_MODE_PRECISION (from_mode)))
3248         {
3249           from_mode = GET_MODE_WIDER_MODE (from_mode);
3250           if (GET_MODE_SIZE (from_mode) >= GET_MODE_SIZE (to_mode))
3251             return false;
3252         }
3253
3254       from_unsigned1 = from_unsigned2 = false;
3255       optype = build_nonstandard_integer_type (GET_MODE_PRECISION (from_mode),
3256                                                false);
3257     }
3258
3259   /* If there was a conversion between the multiply and addition
3260      then we need to make sure it fits a multiply-and-accumulate.
3261      The should be a single mode change which does not change the
3262      value.  */
3263   if (conv_stmt)
3264     {
3265       /* We use the original, unmodified data types for this.  */
3266       tree from_type = TREE_TYPE (gimple_assign_rhs1 (conv_stmt));
3267       tree to_type = TREE_TYPE (gimple_assign_lhs (conv_stmt));
3268       int data_size = TYPE_PRECISION (type1) + TYPE_PRECISION (type2);
3269       bool is_unsigned = TYPE_UNSIGNED (type1) && TYPE_UNSIGNED (type2);
3270
3271       if (TYPE_PRECISION (from_type) > TYPE_PRECISION (to_type))
3272         {
3273           /* Conversion is a truncate.  */
3274           if (TYPE_PRECISION (to_type) < data_size)
3275             return false;
3276         }
3277       else if (TYPE_PRECISION (from_type) < TYPE_PRECISION (to_type))
3278         {
3279           /* Conversion is an extend.  Check it's the right sort.  */
3280           if (TYPE_UNSIGNED (from_type) != is_unsigned
3281               && !(is_unsigned && TYPE_PRECISION (from_type) > data_size))
3282             return false;
3283         }
3284       /* else convert is a no-op for our purposes.  */
3285     }
3286
3287   /* Verify that the machine can perform a widening multiply
3288      accumulate in this mode/signedness combination, otherwise
3289      this transformation is likely to pessimize code.  */
3290   this_optab = optab_for_tree_code (wmult_code, optype, optab_default);
3291   handler = find_widening_optab_handler_and_mode (this_optab, to_mode,
3292                                                   from_mode, 0, &actual_mode);
3293
3294   if (handler == CODE_FOR_nothing)
3295     return false;
3296
3297   /* Ensure that the inputs to the handler are in the correct precison
3298      for the opcode.  This will be the full mode size.  */
3299   actual_precision = GET_MODE_PRECISION (actual_mode);
3300   if (actual_precision != TYPE_PRECISION (type1)
3301       || from_unsigned1 != TYPE_UNSIGNED (type1))
3302     mult_rhs1 = build_and_insert_cast (gsi, loc,
3303                                        build_nonstandard_integer_type
3304                                          (actual_precision, from_unsigned1),
3305                                        mult_rhs1);
3306   if (actual_precision != TYPE_PRECISION (type2)
3307       || from_unsigned2 != TYPE_UNSIGNED (type2))
3308     mult_rhs2 = build_and_insert_cast (gsi, loc,
3309                                        build_nonstandard_integer_type
3310                                          (actual_precision, from_unsigned2),
3311                                        mult_rhs2);
3312
3313   if (!useless_type_conversion_p (type, TREE_TYPE (add_rhs)))
3314     add_rhs = build_and_insert_cast (gsi, loc, type, add_rhs);
3315
3316   /* Handle constants.  */
3317   if (TREE_CODE (mult_rhs1) == INTEGER_CST)
3318     mult_rhs1 = fold_convert (type1, mult_rhs1);
3319   if (TREE_CODE (mult_rhs2) == INTEGER_CST)
3320     mult_rhs2 = fold_convert (type2, mult_rhs2);
3321
3322   gimple_assign_set_rhs_with_ops (gsi, wmult_code, mult_rhs1, mult_rhs2,
3323                                   add_rhs);
3324   update_stmt (gsi_stmt (*gsi));
3325   widen_mul_stats.maccs_inserted++;
3326   return true;
3327 }
3328
3329 /* Combine the multiplication at MUL_STMT with operands MULOP1 and MULOP2
3330    with uses in additions and subtractions to form fused multiply-add
3331    operations.  Returns true if successful and MUL_STMT should be removed.  */
3332
3333 static bool
3334 convert_mult_to_fma (gimple mul_stmt, tree op1, tree op2)
3335 {
3336   tree mul_result = gimple_get_lhs (mul_stmt);
3337   tree type = TREE_TYPE (mul_result);
3338   gimple use_stmt, neguse_stmt;
3339   gassign *fma_stmt;
3340   use_operand_p use_p;
3341   imm_use_iterator imm_iter;
3342
3343   if (FLOAT_TYPE_P (type)
3344       && flag_fp_contract_mode == FP_CONTRACT_OFF)
3345     return false;
3346
3347   /* We don't want to do bitfield reduction ops.  */
3348   if (INTEGRAL_TYPE_P (type)
3349       && (TYPE_PRECISION (type)
3350           != GET_MODE_PRECISION (TYPE_MODE (type))))
3351     return false;
3352
3353   /* If the target doesn't support it, don't generate it.  We assume that
3354      if fma isn't available then fms, fnma or fnms are not either.  */
3355   if (optab_handler (fma_optab, TYPE_MODE (type)) == CODE_FOR_nothing)
3356     return false;
3357
3358   /* If the multiplication has zero uses, it is kept around probably because
3359      of -fnon-call-exceptions.  Don't optimize it away in that case,
3360      it is DCE job.  */
3361   if (has_zero_uses (mul_result))
3362     return false;
3363
3364   /* Make sure that the multiplication statement becomes dead after
3365      the transformation, thus that all uses are transformed to FMAs.
3366      This means we assume that an FMA operation has the same cost
3367      as an addition.  */
3368   FOR_EACH_IMM_USE_FAST (use_p, imm_iter, mul_result)
3369     {
3370       enum tree_code use_code;
3371       tree result = mul_result;
3372       bool negate_p = false;
3373
3374       use_stmt = USE_STMT (use_p);
3375
3376       if (is_gimple_debug (use_stmt))
3377         continue;
3378
3379       /* For now restrict this operations to single basic blocks.  In theory
3380          we would want to support sinking the multiplication in
3381          m = a*b;
3382          if ()
3383            ma = m + c;
3384          else
3385            d = m;
3386          to form a fma in the then block and sink the multiplication to the
3387          else block.  */
3388       if (gimple_bb (use_stmt) != gimple_bb (mul_stmt))
3389         return false;
3390
3391       if (!is_gimple_assign (use_stmt))
3392         return false;
3393
3394       use_code = gimple_assign_rhs_code (use_stmt);
3395
3396       /* A negate on the multiplication leads to FNMA.  */
3397       if (use_code == NEGATE_EXPR)
3398         {
3399           ssa_op_iter iter;
3400           use_operand_p usep;
3401
3402           result = gimple_assign_lhs (use_stmt);
3403
3404           /* Make sure the negate statement becomes dead with this
3405              single transformation.  */
3406           if (!single_imm_use (gimple_assign_lhs (use_stmt),
3407                                &use_p, &neguse_stmt))
3408             return false;
3409
3410           /* Make sure the multiplication isn't also used on that stmt.  */
3411           FOR_EACH_PHI_OR_STMT_USE (usep, neguse_stmt, iter, SSA_OP_USE)
3412             if (USE_FROM_PTR (usep) == mul_result)
3413               return false;
3414
3415           /* Re-validate.  */
3416           use_stmt = neguse_stmt;
3417           if (gimple_bb (use_stmt) != gimple_bb (mul_stmt))
3418             return false;
3419           if (!is_gimple_assign (use_stmt))
3420             return false;
3421
3422           use_code = gimple_assign_rhs_code (use_stmt);
3423           negate_p = true;
3424         }
3425
3426       switch (use_code)
3427         {
3428         case MINUS_EXPR:
3429           if (gimple_assign_rhs2 (use_stmt) == result)
3430             negate_p = !negate_p;
3431           break;
3432         case PLUS_EXPR:
3433           break;
3434         default:
3435           /* FMA can only be formed from PLUS and MINUS.  */
3436           return false;
3437         }
3438
3439       /* If the subtrahend (gimple_assign_rhs2 (use_stmt)) is computed
3440          by a MULT_EXPR that we'll visit later, we might be able to
3441          get a more profitable match with fnma.
3442          OTOH, if we don't, a negate / fma pair has likely lower latency
3443          that a mult / subtract pair.  */
3444       if (use_code == MINUS_EXPR && !negate_p
3445           && gimple_assign_rhs1 (use_stmt) == result
3446           && optab_handler (fms_optab, TYPE_MODE (type)) == CODE_FOR_nothing
3447           && optab_handler (fnma_optab, TYPE_MODE (type)) != CODE_FOR_nothing)
3448         {
3449           tree rhs2 = gimple_assign_rhs2 (use_stmt);
3450
3451           if (TREE_CODE (rhs2) == SSA_NAME)
3452             {
3453               gimple stmt2 = SSA_NAME_DEF_STMT (rhs2);
3454               if (has_single_use (rhs2)
3455                   && is_gimple_assign (stmt2)
3456                   && gimple_assign_rhs_code (stmt2) == MULT_EXPR)
3457               return false;
3458             }
3459         }
3460
3461       /* We can't handle a * b + a * b.  */
3462       if (gimple_assign_rhs1 (use_stmt) == gimple_assign_rhs2 (use_stmt))
3463         return false;
3464
3465       /* While it is possible to validate whether or not the exact form
3466          that we've recognized is available in the backend, the assumption
3467          is that the transformation is never a loss.  For instance, suppose
3468          the target only has the plain FMA pattern available.  Consider
3469          a*b-c -> fma(a,b,-c): we've exchanged MUL+SUB for FMA+NEG, which
3470          is still two operations.  Consider -(a*b)-c -> fma(-a,b,-c): we
3471          still have 3 operations, but in the FMA form the two NEGs are
3472          independent and could be run in parallel.  */
3473     }
3474
3475   FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, mul_result)
3476     {
3477       gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
3478       enum tree_code use_code;
3479       tree addop, mulop1 = op1, result = mul_result;
3480       bool negate_p = false;
3481
3482       if (is_gimple_debug (use_stmt))
3483         continue;
3484
3485       use_code = gimple_assign_rhs_code (use_stmt);
3486       if (use_code == NEGATE_EXPR)
3487         {
3488           result = gimple_assign_lhs (use_stmt);
3489           single_imm_use (gimple_assign_lhs (use_stmt), &use_p, &neguse_stmt);
3490           gsi_remove (&gsi, true);
3491           release_defs (use_stmt);
3492
3493           use_stmt = neguse_stmt;
3494           gsi = gsi_for_stmt (use_stmt);
3495           use_code = gimple_assign_rhs_code (use_stmt);
3496           negate_p = true;
3497         }
3498
3499       if (gimple_assign_rhs1 (use_stmt) == result)
3500         {
3501           addop = gimple_assign_rhs2 (use_stmt);
3502           /* a * b - c -> a * b + (-c)  */
3503           if (gimple_assign_rhs_code (use_stmt) == MINUS_EXPR)
3504             addop = force_gimple_operand_gsi (&gsi,
3505                                               build1 (NEGATE_EXPR,
3506                                                       type, addop),
3507                                               true, NULL_TREE, true,
3508                                               GSI_SAME_STMT);
3509         }
3510       else
3511         {
3512           addop = gimple_assign_rhs1 (use_stmt);
3513           /* a - b * c -> (-b) * c + a */
3514           if (gimple_assign_rhs_code (use_stmt) == MINUS_EXPR)
3515             negate_p = !negate_p;
3516         }
3517
3518       if (negate_p)
3519         mulop1 = force_gimple_operand_gsi (&gsi,
3520                                            build1 (NEGATE_EXPR,
3521                                                    type, mulop1),
3522                                            true, NULL_TREE, true,
3523                                            GSI_SAME_STMT);
3524
3525       fma_stmt = gimple_build_assign (gimple_assign_lhs (use_stmt),
3526                                       FMA_EXPR, mulop1, op2, addop);
3527       gsi_replace (&gsi, fma_stmt, true);
3528       widen_mul_stats.fmas_inserted++;
3529     }
3530
3531   return true;
3532 }
3533
3534 /* Find integer multiplications where the operands are extended from
3535    smaller types, and replace the MULT_EXPR with a WIDEN_MULT_EXPR
3536    where appropriate.  */
3537
3538 namespace {
3539
3540 const pass_data pass_data_optimize_widening_mul =
3541 {
3542   GIMPLE_PASS, /* type */
3543   "widening_mul", /* name */
3544   OPTGROUP_NONE, /* optinfo_flags */
3545   TV_NONE, /* tv_id */
3546   PROP_ssa, /* properties_required */
3547   0, /* properties_provided */
3548   0, /* properties_destroyed */
3549   0, /* todo_flags_start */
3550   TODO_update_ssa, /* todo_flags_finish */
3551 };
3552
3553 class pass_optimize_widening_mul : public gimple_opt_pass
3554 {
3555 public:
3556   pass_optimize_widening_mul (gcc::context *ctxt)
3557     : gimple_opt_pass (pass_data_optimize_widening_mul, ctxt)
3558   {}
3559
3560   /* opt_pass methods: */
3561   virtual bool gate (function *)
3562     {
3563       return flag_expensive_optimizations && optimize;
3564     }
3565
3566   virtual unsigned int execute (function *);
3567
3568 }; // class pass_optimize_widening_mul
3569
3570 unsigned int
3571 pass_optimize_widening_mul::execute (function *fun)
3572 {
3573   basic_block bb;
3574   bool cfg_changed = false;
3575
3576   memset (&widen_mul_stats, 0, sizeof (widen_mul_stats));
3577
3578   FOR_EACH_BB_FN (bb, fun)
3579     {
3580       gimple_stmt_iterator gsi;
3581
3582       for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi);)
3583         {
3584           gimple stmt = gsi_stmt (gsi);
3585           enum tree_code code;
3586
3587           if (is_gimple_assign (stmt))
3588             {
3589               code = gimple_assign_rhs_code (stmt);
3590               switch (code)
3591                 {
3592                 case MULT_EXPR:
3593                   if (!convert_mult_to_widen (stmt, &gsi)
3594                       && convert_mult_to_fma (stmt,
3595                                               gimple_assign_rhs1 (stmt),
3596                                               gimple_assign_rhs2 (stmt)))
3597                     {
3598                       gsi_remove (&gsi, true);
3599                       release_defs (stmt);
3600                       continue;
3601                     }
3602                   break;
3603
3604                 case PLUS_EXPR:
3605                 case MINUS_EXPR:
3606                   convert_plusminus_to_widen (&gsi, stmt, code);
3607                   break;
3608
3609                 default:;
3610                 }
3611             }
3612           else if (is_gimple_call (stmt)
3613                    && gimple_call_lhs (stmt))
3614             {
3615               tree fndecl = gimple_call_fndecl (stmt);
3616               if (fndecl
3617                   && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
3618                 {
3619                   switch (DECL_FUNCTION_CODE (fndecl))
3620                     {
3621                       case BUILT_IN_POWF:
3622                       case BUILT_IN_POW:
3623                       case BUILT_IN_POWL:
3624                         if (TREE_CODE (gimple_call_arg (stmt, 1)) == REAL_CST
3625                             && REAL_VALUES_EQUAL
3626                                  (TREE_REAL_CST (gimple_call_arg (stmt, 1)),
3627                                   dconst2)
3628                             && convert_mult_to_fma (stmt,
3629                                                     gimple_call_arg (stmt, 0),
3630                                                     gimple_call_arg (stmt, 0)))
3631                           {
3632                             unlink_stmt_vdef (stmt);
3633                             if (gsi_remove (&gsi, true)
3634                                 && gimple_purge_dead_eh_edges (bb))
3635                               cfg_changed = true;
3636                             release_defs (stmt);
3637                             continue;
3638                           }
3639                           break;
3640
3641                       default:;
3642                     }
3643                 }
3644             }
3645           gsi_next (&gsi);
3646         }
3647     }
3648
3649   statistics_counter_event (fun, "widening multiplications inserted",
3650                             widen_mul_stats.widen_mults_inserted);
3651   statistics_counter_event (fun, "widening maccs inserted",
3652                             widen_mul_stats.maccs_inserted);
3653   statistics_counter_event (fun, "fused multiply-adds inserted",
3654                             widen_mul_stats.fmas_inserted);
3655
3656   return cfg_changed ? TODO_cleanup_cfg : 0;
3657 }
3658
3659 } // anon namespace
3660
3661 gimple_opt_pass *
3662 make_pass_optimize_widening_mul (gcc::context *ctxt)
3663 {
3664   return new pass_optimize_widening_mul (ctxt);
3665 }