* jvspec.c (jvgenmain_spec): Don't handle -fnew-verifier.
[gcc.git] / gcc / tree-ssa-math-opts.c
1 /* Global, SSA-based optimizations using mathematical identities.
2 Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010
3 Free Software Foundation, Inc.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 3, or (at your option) any
10 later version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 /* Currently, the only mini-pass in this file tries to CSE reciprocal
22 operations. These are common in sequences such as this one:
23
24 modulus = sqrt(x*x + y*y + z*z);
25 x = x / modulus;
26 y = y / modulus;
27 z = z / modulus;
28
29 that can be optimized to
30
31 modulus = sqrt(x*x + y*y + z*z);
32 rmodulus = 1.0 / modulus;
33 x = x * rmodulus;
34 y = y * rmodulus;
35 z = z * rmodulus;
36
37 We do this for loop invariant divisors, and with this pass whenever
38 we notice that a division has the same divisor multiple times.
39
40 Of course, like in PRE, we don't insert a division if a dominator
41 already has one. However, this cannot be done as an extension of
42 PRE for several reasons.
43
44 First of all, with some experiments it was found out that the
45 transformation is not always useful if there are only two divisions
46 hy the same divisor. This is probably because modern processors
47 can pipeline the divisions; on older, in-order processors it should
48 still be effective to optimize two divisions by the same number.
49 We make this a param, and it shall be called N in the remainder of
50 this comment.
51
52 Second, if trapping math is active, we have less freedom on where
53 to insert divisions: we can only do so in basic blocks that already
54 contain one. (If divisions don't trap, instead, we can insert
55 divisions elsewhere, which will be in blocks that are common dominators
56 of those that have the division).
57
58 We really don't want to compute the reciprocal unless a division will
59 be found. To do this, we won't insert the division in a basic block
60 that has less than N divisions *post-dominating* it.
61
62 The algorithm constructs a subset of the dominator tree, holding the
63 blocks containing the divisions and the common dominators to them,
64 and walk it twice. The first walk is in post-order, and it annotates
65 each block with the number of divisions that post-dominate it: this
66 gives information on where divisions can be inserted profitably.
67 The second walk is in pre-order, and it inserts divisions as explained
68 above, and replaces divisions by multiplications.
69
70 In the best case, the cost of the pass is O(n_statements). In the
71 worst-case, the cost is due to creating the dominator tree subset,
72 with a cost of O(n_basic_blocks ^ 2); however this can only happen
73 for n_statements / n_basic_blocks statements. So, the amortized cost
74 of creating the dominator tree subset is O(n_basic_blocks) and the
75 worst-case cost of the pass is O(n_statements * n_basic_blocks).
76
77 More practically, the cost will be small because there are few
78 divisions, and they tend to be in the same basic block, so insert_bb
79 is called very few times.
80
81 If we did this using domwalk.c, an efficient implementation would have
82 to work on all the variables in a single pass, because we could not
83 work on just a subset of the dominator tree, as we do now, and the
84 cost would also be something like O(n_statements * n_basic_blocks).
85 The data structures would be more complex in order to work on all the
86 variables in a single pass. */
87
88 #include "config.h"
89 #include "system.h"
90 #include "coretypes.h"
91 #include "tm.h"
92 #include "flags.h"
93 #include "tree.h"
94 #include "tree-flow.h"
95 #include "timevar.h"
96 #include "tree-pass.h"
97 #include "alloc-pool.h"
98 #include "basic-block.h"
99 #include "target.h"
100 #include "gimple-pretty-print.h"
101
102 /* FIXME: RTL headers have to be included here for optabs. */
103 #include "rtl.h" /* Because optabs.h wants enum rtx_code. */
104 #include "expr.h" /* Because optabs.h wants sepops. */
105 #include "optabs.h"
106
107 /* This structure represents one basic block that either computes a
108 division, or is a common dominator for basic block that compute a
109 division. */
110 struct occurrence {
111 /* The basic block represented by this structure. */
112 basic_block bb;
113
114 /* If non-NULL, the SSA_NAME holding the definition for a reciprocal
115 inserted in BB. */
116 tree recip_def;
117
118 /* If non-NULL, the GIMPLE_ASSIGN for a reciprocal computation that
119 was inserted in BB. */
120 gimple recip_def_stmt;
121
122 /* Pointer to a list of "struct occurrence"s for blocks dominated
123 by BB. */
124 struct occurrence *children;
125
126 /* Pointer to the next "struct occurrence"s in the list of blocks
127 sharing a common dominator. */
128 struct occurrence *next;
129
130 /* The number of divisions that are in BB before compute_merit. The
131 number of divisions that are in BB or post-dominate it after
132 compute_merit. */
133 int num_divisions;
134
135 /* True if the basic block has a division, false if it is a common
136 dominator for basic blocks that do. If it is false and trapping
137 math is active, BB is not a candidate for inserting a reciprocal. */
138 bool bb_has_division;
139 };
140
141
142 /* The instance of "struct occurrence" representing the highest
143 interesting block in the dominator tree. */
144 static struct occurrence *occ_head;
145
146 /* Allocation pool for getting instances of "struct occurrence". */
147 static alloc_pool occ_pool;
148
149
150
151 /* Allocate and return a new struct occurrence for basic block BB, and
152 whose children list is headed by CHILDREN. */
153 static struct occurrence *
154 occ_new (basic_block bb, struct occurrence *children)
155 {
156 struct occurrence *occ;
157
158 bb->aux = occ = (struct occurrence *) pool_alloc (occ_pool);
159 memset (occ, 0, sizeof (struct occurrence));
160
161 occ->bb = bb;
162 occ->children = children;
163 return occ;
164 }
165
166
167 /* Insert NEW_OCC into our subset of the dominator tree. P_HEAD points to a
168 list of "struct occurrence"s, one per basic block, having IDOM as
169 their common dominator.
170
171 We try to insert NEW_OCC as deep as possible in the tree, and we also
172 insert any other block that is a common dominator for BB and one
173 block already in the tree. */
174
175 static void
176 insert_bb (struct occurrence *new_occ, basic_block idom,
177 struct occurrence **p_head)
178 {
179 struct occurrence *occ, **p_occ;
180
181 for (p_occ = p_head; (occ = *p_occ) != NULL; )
182 {
183 basic_block bb = new_occ->bb, occ_bb = occ->bb;
184 basic_block dom = nearest_common_dominator (CDI_DOMINATORS, occ_bb, bb);
185 if (dom == bb)
186 {
187 /* BB dominates OCC_BB. OCC becomes NEW_OCC's child: remove OCC
188 from its list. */
189 *p_occ = occ->next;
190 occ->next = new_occ->children;
191 new_occ->children = occ;
192
193 /* Try the next block (it may as well be dominated by BB). */
194 }
195
196 else if (dom == occ_bb)
197 {
198 /* OCC_BB dominates BB. Tail recurse to look deeper. */
199 insert_bb (new_occ, dom, &occ->children);
200 return;
201 }
202
203 else if (dom != idom)
204 {
205 gcc_assert (!dom->aux);
206
207 /* There is a dominator between IDOM and BB, add it and make
208 two children out of NEW_OCC and OCC. First, remove OCC from
209 its list. */
210 *p_occ = occ->next;
211 new_occ->next = occ;
212 occ->next = NULL;
213
214 /* None of the previous blocks has DOM as a dominator: if we tail
215 recursed, we would reexamine them uselessly. Just switch BB with
216 DOM, and go on looking for blocks dominated by DOM. */
217 new_occ = occ_new (dom, new_occ);
218 }
219
220 else
221 {
222 /* Nothing special, go on with the next element. */
223 p_occ = &occ->next;
224 }
225 }
226
227 /* No place was found as a child of IDOM. Make BB a sibling of IDOM. */
228 new_occ->next = *p_head;
229 *p_head = new_occ;
230 }
231
232 /* Register that we found a division in BB. */
233
234 static inline void
235 register_division_in (basic_block bb)
236 {
237 struct occurrence *occ;
238
239 occ = (struct occurrence *) bb->aux;
240 if (!occ)
241 {
242 occ = occ_new (bb, NULL);
243 insert_bb (occ, ENTRY_BLOCK_PTR, &occ_head);
244 }
245
246 occ->bb_has_division = true;
247 occ->num_divisions++;
248 }
249
250
251 /* Compute the number of divisions that postdominate each block in OCC and
252 its children. */
253
254 static void
255 compute_merit (struct occurrence *occ)
256 {
257 struct occurrence *occ_child;
258 basic_block dom = occ->bb;
259
260 for (occ_child = occ->children; occ_child; occ_child = occ_child->next)
261 {
262 basic_block bb;
263 if (occ_child->children)
264 compute_merit (occ_child);
265
266 if (flag_exceptions)
267 bb = single_noncomplex_succ (dom);
268 else
269 bb = dom;
270
271 if (dominated_by_p (CDI_POST_DOMINATORS, bb, occ_child->bb))
272 occ->num_divisions += occ_child->num_divisions;
273 }
274 }
275
276
277 /* Return whether USE_STMT is a floating-point division by DEF. */
278 static inline bool
279 is_division_by (gimple use_stmt, tree def)
280 {
281 return is_gimple_assign (use_stmt)
282 && gimple_assign_rhs_code (use_stmt) == RDIV_EXPR
283 && gimple_assign_rhs2 (use_stmt) == def
284 /* Do not recognize x / x as valid division, as we are getting
285 confused later by replacing all immediate uses x in such
286 a stmt. */
287 && gimple_assign_rhs1 (use_stmt) != def;
288 }
289
290 /* Walk the subset of the dominator tree rooted at OCC, setting the
291 RECIP_DEF field to a definition of 1.0 / DEF that can be used in
292 the given basic block. The field may be left NULL, of course,
293 if it is not possible or profitable to do the optimization.
294
295 DEF_BSI is an iterator pointing at the statement defining DEF.
296 If RECIP_DEF is set, a dominator already has a computation that can
297 be used. */
298
299 static void
300 insert_reciprocals (gimple_stmt_iterator *def_gsi, struct occurrence *occ,
301 tree def, tree recip_def, int threshold)
302 {
303 tree type;
304 gimple new_stmt;
305 gimple_stmt_iterator gsi;
306 struct occurrence *occ_child;
307
308 if (!recip_def
309 && (occ->bb_has_division || !flag_trapping_math)
310 && occ->num_divisions >= threshold)
311 {
312 /* Make a variable with the replacement and substitute it. */
313 type = TREE_TYPE (def);
314 recip_def = make_rename_temp (type, "reciptmp");
315 new_stmt = gimple_build_assign_with_ops (RDIV_EXPR, recip_def,
316 build_one_cst (type), def);
317
318 if (occ->bb_has_division)
319 {
320 /* Case 1: insert before an existing division. */
321 gsi = gsi_after_labels (occ->bb);
322 while (!gsi_end_p (gsi) && !is_division_by (gsi_stmt (gsi), def))
323 gsi_next (&gsi);
324
325 gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
326 }
327 else if (def_gsi && occ->bb == def_gsi->bb)
328 {
329 /* Case 2: insert right after the definition. Note that this will
330 never happen if the definition statement can throw, because in
331 that case the sole successor of the statement's basic block will
332 dominate all the uses as well. */
333 gsi_insert_after (def_gsi, new_stmt, GSI_NEW_STMT);
334 }
335 else
336 {
337 /* Case 3: insert in a basic block not containing defs/uses. */
338 gsi = gsi_after_labels (occ->bb);
339 gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
340 }
341
342 occ->recip_def_stmt = new_stmt;
343 }
344
345 occ->recip_def = recip_def;
346 for (occ_child = occ->children; occ_child; occ_child = occ_child->next)
347 insert_reciprocals (def_gsi, occ_child, def, recip_def, threshold);
348 }
349
350
351 /* Replace the division at USE_P with a multiplication by the reciprocal, if
352 possible. */
353
354 static inline void
355 replace_reciprocal (use_operand_p use_p)
356 {
357 gimple use_stmt = USE_STMT (use_p);
358 basic_block bb = gimple_bb (use_stmt);
359 struct occurrence *occ = (struct occurrence *) bb->aux;
360
361 if (optimize_bb_for_speed_p (bb)
362 && occ->recip_def && use_stmt != occ->recip_def_stmt)
363 {
364 gimple_assign_set_rhs_code (use_stmt, MULT_EXPR);
365 SET_USE (use_p, occ->recip_def);
366 fold_stmt_inplace (use_stmt);
367 update_stmt (use_stmt);
368 }
369 }
370
371
372 /* Free OCC and return one more "struct occurrence" to be freed. */
373
374 static struct occurrence *
375 free_bb (struct occurrence *occ)
376 {
377 struct occurrence *child, *next;
378
379 /* First get the two pointers hanging off OCC. */
380 next = occ->next;
381 child = occ->children;
382 occ->bb->aux = NULL;
383 pool_free (occ_pool, occ);
384
385 /* Now ensure that we don't recurse unless it is necessary. */
386 if (!child)
387 return next;
388 else
389 {
390 while (next)
391 next = free_bb (next);
392
393 return child;
394 }
395 }
396
397
398 /* Look for floating-point divisions among DEF's uses, and try to
399 replace them by multiplications with the reciprocal. Add
400 as many statements computing the reciprocal as needed.
401
402 DEF must be a GIMPLE register of a floating-point type. */
403
404 static void
405 execute_cse_reciprocals_1 (gimple_stmt_iterator *def_gsi, tree def)
406 {
407 use_operand_p use_p;
408 imm_use_iterator use_iter;
409 struct occurrence *occ;
410 int count = 0, threshold;
411
412 gcc_assert (FLOAT_TYPE_P (TREE_TYPE (def)) && is_gimple_reg (def));
413
414 FOR_EACH_IMM_USE_FAST (use_p, use_iter, def)
415 {
416 gimple use_stmt = USE_STMT (use_p);
417 if (is_division_by (use_stmt, def))
418 {
419 register_division_in (gimple_bb (use_stmt));
420 count++;
421 }
422 }
423
424 /* Do the expensive part only if we can hope to optimize something. */
425 threshold = targetm.min_divisions_for_recip_mul (TYPE_MODE (TREE_TYPE (def)));
426 if (count >= threshold)
427 {
428 gimple use_stmt;
429 for (occ = occ_head; occ; occ = occ->next)
430 {
431 compute_merit (occ);
432 insert_reciprocals (def_gsi, occ, def, NULL, threshold);
433 }
434
435 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, def)
436 {
437 if (is_division_by (use_stmt, def))
438 {
439 FOR_EACH_IMM_USE_ON_STMT (use_p, use_iter)
440 replace_reciprocal (use_p);
441 }
442 }
443 }
444
445 for (occ = occ_head; occ; )
446 occ = free_bb (occ);
447
448 occ_head = NULL;
449 }
450
451 static bool
452 gate_cse_reciprocals (void)
453 {
454 return optimize && flag_reciprocal_math;
455 }
456
457 /* Go through all the floating-point SSA_NAMEs, and call
458 execute_cse_reciprocals_1 on each of them. */
459 static unsigned int
460 execute_cse_reciprocals (void)
461 {
462 basic_block bb;
463 tree arg;
464
465 occ_pool = create_alloc_pool ("dominators for recip",
466 sizeof (struct occurrence),
467 n_basic_blocks / 3 + 1);
468
469 calculate_dominance_info (CDI_DOMINATORS);
470 calculate_dominance_info (CDI_POST_DOMINATORS);
471
472 #ifdef ENABLE_CHECKING
473 FOR_EACH_BB (bb)
474 gcc_assert (!bb->aux);
475 #endif
476
477 for (arg = DECL_ARGUMENTS (cfun->decl); arg; arg = DECL_CHAIN (arg))
478 if (gimple_default_def (cfun, arg)
479 && FLOAT_TYPE_P (TREE_TYPE (arg))
480 && is_gimple_reg (arg))
481 execute_cse_reciprocals_1 (NULL, gimple_default_def (cfun, arg));
482
483 FOR_EACH_BB (bb)
484 {
485 gimple_stmt_iterator gsi;
486 gimple phi;
487 tree def;
488
489 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
490 {
491 phi = gsi_stmt (gsi);
492 def = PHI_RESULT (phi);
493 if (FLOAT_TYPE_P (TREE_TYPE (def))
494 && is_gimple_reg (def))
495 execute_cse_reciprocals_1 (NULL, def);
496 }
497
498 for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
499 {
500 gimple stmt = gsi_stmt (gsi);
501
502 if (gimple_has_lhs (stmt)
503 && (def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF)) != NULL
504 && FLOAT_TYPE_P (TREE_TYPE (def))
505 && TREE_CODE (def) == SSA_NAME)
506 execute_cse_reciprocals_1 (&gsi, def);
507 }
508
509 if (optimize_bb_for_size_p (bb))
510 continue;
511
512 /* Scan for a/func(b) and convert it to reciprocal a*rfunc(b). */
513 for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
514 {
515 gimple stmt = gsi_stmt (gsi);
516 tree fndecl;
517
518 if (is_gimple_assign (stmt)
519 && gimple_assign_rhs_code (stmt) == RDIV_EXPR)
520 {
521 tree arg1 = gimple_assign_rhs2 (stmt);
522 gimple stmt1;
523
524 if (TREE_CODE (arg1) != SSA_NAME)
525 continue;
526
527 stmt1 = SSA_NAME_DEF_STMT (arg1);
528
529 if (is_gimple_call (stmt1)
530 && gimple_call_lhs (stmt1)
531 && (fndecl = gimple_call_fndecl (stmt1))
532 && (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL
533 || DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD))
534 {
535 enum built_in_function code;
536 bool md_code, fail;
537 imm_use_iterator ui;
538 use_operand_p use_p;
539
540 code = DECL_FUNCTION_CODE (fndecl);
541 md_code = DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD;
542
543 fndecl = targetm.builtin_reciprocal (code, md_code, false);
544 if (!fndecl)
545 continue;
546
547 /* Check that all uses of the SSA name are divisions,
548 otherwise replacing the defining statement will do
549 the wrong thing. */
550 fail = false;
551 FOR_EACH_IMM_USE_FAST (use_p, ui, arg1)
552 {
553 gimple stmt2 = USE_STMT (use_p);
554 if (is_gimple_debug (stmt2))
555 continue;
556 if (!is_gimple_assign (stmt2)
557 || gimple_assign_rhs_code (stmt2) != RDIV_EXPR
558 || gimple_assign_rhs1 (stmt2) == arg1
559 || gimple_assign_rhs2 (stmt2) != arg1)
560 {
561 fail = true;
562 break;
563 }
564 }
565 if (fail)
566 continue;
567
568 gimple_replace_lhs (stmt1, arg1);
569 gimple_call_set_fndecl (stmt1, fndecl);
570 update_stmt (stmt1);
571
572 FOR_EACH_IMM_USE_STMT (stmt, ui, arg1)
573 {
574 gimple_assign_set_rhs_code (stmt, MULT_EXPR);
575 fold_stmt_inplace (stmt);
576 update_stmt (stmt);
577 }
578 }
579 }
580 }
581 }
582
583 free_dominance_info (CDI_DOMINATORS);
584 free_dominance_info (CDI_POST_DOMINATORS);
585 free_alloc_pool (occ_pool);
586 return 0;
587 }
588
589 struct gimple_opt_pass pass_cse_reciprocals =
590 {
591 {
592 GIMPLE_PASS,
593 "recip", /* name */
594 gate_cse_reciprocals, /* gate */
595 execute_cse_reciprocals, /* execute */
596 NULL, /* sub */
597 NULL, /* next */
598 0, /* static_pass_number */
599 TV_NONE, /* tv_id */
600 PROP_ssa, /* properties_required */
601 0, /* properties_provided */
602 0, /* properties_destroyed */
603 0, /* todo_flags_start */
604 TODO_dump_func | TODO_update_ssa | TODO_verify_ssa
605 | TODO_verify_stmts /* todo_flags_finish */
606 }
607 };
608
609 /* Records an occurrence at statement USE_STMT in the vector of trees
610 STMTS if it is dominated by *TOP_BB or dominates it or this basic block
611 is not yet initialized. Returns true if the occurrence was pushed on
612 the vector. Adjusts *TOP_BB to be the basic block dominating all
613 statements in the vector. */
614
615 static bool
616 maybe_record_sincos (VEC(gimple, heap) **stmts,
617 basic_block *top_bb, gimple use_stmt)
618 {
619 basic_block use_bb = gimple_bb (use_stmt);
620 if (*top_bb
621 && (*top_bb == use_bb
622 || dominated_by_p (CDI_DOMINATORS, use_bb, *top_bb)))
623 VEC_safe_push (gimple, heap, *stmts, use_stmt);
624 else if (!*top_bb
625 || dominated_by_p (CDI_DOMINATORS, *top_bb, use_bb))
626 {
627 VEC_safe_push (gimple, heap, *stmts, use_stmt);
628 *top_bb = use_bb;
629 }
630 else
631 return false;
632
633 return true;
634 }
635
636 /* Look for sin, cos and cexpi calls with the same argument NAME and
637 create a single call to cexpi CSEing the result in this case.
638 We first walk over all immediate uses of the argument collecting
639 statements that we can CSE in a vector and in a second pass replace
640 the statement rhs with a REALPART or IMAGPART expression on the
641 result of the cexpi call we insert before the use statement that
642 dominates all other candidates. */
643
644 static bool
645 execute_cse_sincos_1 (tree name)
646 {
647 gimple_stmt_iterator gsi;
648 imm_use_iterator use_iter;
649 tree fndecl, res, type;
650 gimple def_stmt, use_stmt, stmt;
651 int seen_cos = 0, seen_sin = 0, seen_cexpi = 0;
652 VEC(gimple, heap) *stmts = NULL;
653 basic_block top_bb = NULL;
654 int i;
655 bool cfg_changed = false;
656
657 type = TREE_TYPE (name);
658 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, name)
659 {
660 if (gimple_code (use_stmt) != GIMPLE_CALL
661 || !gimple_call_lhs (use_stmt)
662 || !(fndecl = gimple_call_fndecl (use_stmt))
663 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
664 continue;
665
666 switch (DECL_FUNCTION_CODE (fndecl))
667 {
668 CASE_FLT_FN (BUILT_IN_COS):
669 seen_cos |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
670 break;
671
672 CASE_FLT_FN (BUILT_IN_SIN):
673 seen_sin |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
674 break;
675
676 CASE_FLT_FN (BUILT_IN_CEXPI):
677 seen_cexpi |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
678 break;
679
680 default:;
681 }
682 }
683
684 if (seen_cos + seen_sin + seen_cexpi <= 1)
685 {
686 VEC_free(gimple, heap, stmts);
687 return false;
688 }
689
690 /* Simply insert cexpi at the beginning of top_bb but not earlier than
691 the name def statement. */
692 fndecl = mathfn_built_in (type, BUILT_IN_CEXPI);
693 if (!fndecl)
694 return false;
695 res = create_tmp_reg (TREE_TYPE (TREE_TYPE (fndecl)), "sincostmp");
696 stmt = gimple_build_call (fndecl, 1, name);
697 res = make_ssa_name (res, stmt);
698 gimple_call_set_lhs (stmt, res);
699
700 def_stmt = SSA_NAME_DEF_STMT (name);
701 if (!SSA_NAME_IS_DEFAULT_DEF (name)
702 && gimple_code (def_stmt) != GIMPLE_PHI
703 && gimple_bb (def_stmt) == top_bb)
704 {
705 gsi = gsi_for_stmt (def_stmt);
706 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
707 }
708 else
709 {
710 gsi = gsi_after_labels (top_bb);
711 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
712 }
713 update_stmt (stmt);
714
715 /* And adjust the recorded old call sites. */
716 for (i = 0; VEC_iterate(gimple, stmts, i, use_stmt); ++i)
717 {
718 tree rhs = NULL;
719 fndecl = gimple_call_fndecl (use_stmt);
720
721 switch (DECL_FUNCTION_CODE (fndecl))
722 {
723 CASE_FLT_FN (BUILT_IN_COS):
724 rhs = fold_build1 (REALPART_EXPR, type, res);
725 break;
726
727 CASE_FLT_FN (BUILT_IN_SIN):
728 rhs = fold_build1 (IMAGPART_EXPR, type, res);
729 break;
730
731 CASE_FLT_FN (BUILT_IN_CEXPI):
732 rhs = res;
733 break;
734
735 default:;
736 gcc_unreachable ();
737 }
738
739 /* Replace call with a copy. */
740 stmt = gimple_build_assign (gimple_call_lhs (use_stmt), rhs);
741
742 gsi = gsi_for_stmt (use_stmt);
743 gsi_replace (&gsi, stmt, true);
744 if (gimple_purge_dead_eh_edges (gimple_bb (stmt)))
745 cfg_changed = true;
746 }
747
748 VEC_free(gimple, heap, stmts);
749
750 return cfg_changed;
751 }
752
753 /* Go through all calls to sin, cos and cexpi and call execute_cse_sincos_1
754 on the SSA_NAME argument of each of them. */
755
756 static unsigned int
757 execute_cse_sincos (void)
758 {
759 basic_block bb;
760 bool cfg_changed = false;
761
762 calculate_dominance_info (CDI_DOMINATORS);
763
764 FOR_EACH_BB (bb)
765 {
766 gimple_stmt_iterator gsi;
767
768 for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
769 {
770 gimple stmt = gsi_stmt (gsi);
771 tree fndecl;
772
773 if (is_gimple_call (stmt)
774 && gimple_call_lhs (stmt)
775 && (fndecl = gimple_call_fndecl (stmt))
776 && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
777 {
778 tree arg;
779
780 switch (DECL_FUNCTION_CODE (fndecl))
781 {
782 CASE_FLT_FN (BUILT_IN_COS):
783 CASE_FLT_FN (BUILT_IN_SIN):
784 CASE_FLT_FN (BUILT_IN_CEXPI):
785 arg = gimple_call_arg (stmt, 0);
786 if (TREE_CODE (arg) == SSA_NAME)
787 cfg_changed |= execute_cse_sincos_1 (arg);
788 break;
789
790 default:;
791 }
792 }
793 }
794 }
795
796 free_dominance_info (CDI_DOMINATORS);
797 return cfg_changed ? TODO_cleanup_cfg : 0;
798 }
799
800 static bool
801 gate_cse_sincos (void)
802 {
803 /* Make sure we have either sincos or cexp. */
804 return (TARGET_HAS_SINCOS
805 || TARGET_C99_FUNCTIONS)
806 && optimize;
807 }
808
809 struct gimple_opt_pass pass_cse_sincos =
810 {
811 {
812 GIMPLE_PASS,
813 "sincos", /* name */
814 gate_cse_sincos, /* gate */
815 execute_cse_sincos, /* execute */
816 NULL, /* sub */
817 NULL, /* next */
818 0, /* static_pass_number */
819 TV_NONE, /* tv_id */
820 PROP_ssa, /* properties_required */
821 0, /* properties_provided */
822 0, /* properties_destroyed */
823 0, /* todo_flags_start */
824 TODO_dump_func | TODO_update_ssa | TODO_verify_ssa
825 | TODO_verify_stmts /* todo_flags_finish */
826 }
827 };
828
829 /* A symbolic number is used to detect byte permutation and selection
830 patterns. Therefore the field N contains an artificial number
831 consisting of byte size markers:
832
833 0 - byte has the value 0
834 1..size - byte contains the content of the byte
835 number indexed with that value minus one */
836
837 struct symbolic_number {
838 unsigned HOST_WIDEST_INT n;
839 int size;
840 };
841
842 /* Perform a SHIFT or ROTATE operation by COUNT bits on symbolic
843 number N. Return false if the requested operation is not permitted
844 on a symbolic number. */
845
846 static inline bool
847 do_shift_rotate (enum tree_code code,
848 struct symbolic_number *n,
849 int count)
850 {
851 if (count % 8 != 0)
852 return false;
853
854 /* Zero out the extra bits of N in order to avoid them being shifted
855 into the significant bits. */
856 if (n->size < (int)sizeof (HOST_WIDEST_INT))
857 n->n &= ((unsigned HOST_WIDEST_INT)1 << (n->size * BITS_PER_UNIT)) - 1;
858
859 switch (code)
860 {
861 case LSHIFT_EXPR:
862 n->n <<= count;
863 break;
864 case RSHIFT_EXPR:
865 n->n >>= count;
866 break;
867 case LROTATE_EXPR:
868 n->n = (n->n << count) | (n->n >> ((n->size * BITS_PER_UNIT) - count));
869 break;
870 case RROTATE_EXPR:
871 n->n = (n->n >> count) | (n->n << ((n->size * BITS_PER_UNIT) - count));
872 break;
873 default:
874 return false;
875 }
876 return true;
877 }
878
879 /* Perform sanity checking for the symbolic number N and the gimple
880 statement STMT. */
881
882 static inline bool
883 verify_symbolic_number_p (struct symbolic_number *n, gimple stmt)
884 {
885 tree lhs_type;
886
887 lhs_type = gimple_expr_type (stmt);
888
889 if (TREE_CODE (lhs_type) != INTEGER_TYPE)
890 return false;
891
892 if (TYPE_PRECISION (lhs_type) != n->size * BITS_PER_UNIT)
893 return false;
894
895 return true;
896 }
897
898 /* find_bswap_1 invokes itself recursively with N and tries to perform
899 the operation given by the rhs of STMT on the result. If the
900 operation could successfully be executed the function returns the
901 tree expression of the source operand and NULL otherwise. */
902
903 static tree
904 find_bswap_1 (gimple stmt, struct symbolic_number *n, int limit)
905 {
906 enum tree_code code;
907 tree rhs1, rhs2 = NULL;
908 gimple rhs1_stmt, rhs2_stmt;
909 tree source_expr1;
910 enum gimple_rhs_class rhs_class;
911
912 if (!limit || !is_gimple_assign (stmt))
913 return NULL_TREE;
914
915 rhs1 = gimple_assign_rhs1 (stmt);
916
917 if (TREE_CODE (rhs1) != SSA_NAME)
918 return NULL_TREE;
919
920 code = gimple_assign_rhs_code (stmt);
921 rhs_class = gimple_assign_rhs_class (stmt);
922 rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
923
924 if (rhs_class == GIMPLE_BINARY_RHS)
925 rhs2 = gimple_assign_rhs2 (stmt);
926
927 /* Handle unary rhs and binary rhs with integer constants as second
928 operand. */
929
930 if (rhs_class == GIMPLE_UNARY_RHS
931 || (rhs_class == GIMPLE_BINARY_RHS
932 && TREE_CODE (rhs2) == INTEGER_CST))
933 {
934 if (code != BIT_AND_EXPR
935 && code != LSHIFT_EXPR
936 && code != RSHIFT_EXPR
937 && code != LROTATE_EXPR
938 && code != RROTATE_EXPR
939 && code != NOP_EXPR
940 && code != CONVERT_EXPR)
941 return NULL_TREE;
942
943 source_expr1 = find_bswap_1 (rhs1_stmt, n, limit - 1);
944
945 /* If find_bswap_1 returned NULL STMT is a leaf node and we have
946 to initialize the symbolic number. */
947 if (!source_expr1)
948 {
949 /* Set up the symbolic number N by setting each byte to a
950 value between 1 and the byte size of rhs1. The highest
951 order byte is set to n->size and the lowest order
952 byte to 1. */
953 n->size = TYPE_PRECISION (TREE_TYPE (rhs1));
954 if (n->size % BITS_PER_UNIT != 0)
955 return NULL_TREE;
956 n->size /= BITS_PER_UNIT;
957 n->n = (sizeof (HOST_WIDEST_INT) < 8 ? 0 :
958 (unsigned HOST_WIDEST_INT)0x08070605 << 32 | 0x04030201);
959
960 if (n->size < (int)sizeof (HOST_WIDEST_INT))
961 n->n &= ((unsigned HOST_WIDEST_INT)1 <<
962 (n->size * BITS_PER_UNIT)) - 1;
963
964 source_expr1 = rhs1;
965 }
966
967 switch (code)
968 {
969 case BIT_AND_EXPR:
970 {
971 int i;
972 unsigned HOST_WIDEST_INT val = widest_int_cst_value (rhs2);
973 unsigned HOST_WIDEST_INT tmp = val;
974
975 /* Only constants masking full bytes are allowed. */
976 for (i = 0; i < n->size; i++, tmp >>= BITS_PER_UNIT)
977 if ((tmp & 0xff) != 0 && (tmp & 0xff) != 0xff)
978 return NULL_TREE;
979
980 n->n &= val;
981 }
982 break;
983 case LSHIFT_EXPR:
984 case RSHIFT_EXPR:
985 case LROTATE_EXPR:
986 case RROTATE_EXPR:
987 if (!do_shift_rotate (code, n, (int)TREE_INT_CST_LOW (rhs2)))
988 return NULL_TREE;
989 break;
990 CASE_CONVERT:
991 {
992 int type_size;
993
994 type_size = TYPE_PRECISION (gimple_expr_type (stmt));
995 if (type_size % BITS_PER_UNIT != 0)
996 return NULL_TREE;
997
998 if (type_size / BITS_PER_UNIT < (int)(sizeof (HOST_WIDEST_INT)))
999 {
1000 /* If STMT casts to a smaller type mask out the bits not
1001 belonging to the target type. */
1002 n->n &= ((unsigned HOST_WIDEST_INT)1 << type_size) - 1;
1003 }
1004 n->size = type_size / BITS_PER_UNIT;
1005 }
1006 break;
1007 default:
1008 return NULL_TREE;
1009 };
1010 return verify_symbolic_number_p (n, stmt) ? source_expr1 : NULL;
1011 }
1012
1013 /* Handle binary rhs. */
1014
1015 if (rhs_class == GIMPLE_BINARY_RHS)
1016 {
1017 struct symbolic_number n1, n2;
1018 tree source_expr2;
1019
1020 if (code != BIT_IOR_EXPR)
1021 return NULL_TREE;
1022
1023 if (TREE_CODE (rhs2) != SSA_NAME)
1024 return NULL_TREE;
1025
1026 rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
1027
1028 switch (code)
1029 {
1030 case BIT_IOR_EXPR:
1031 source_expr1 = find_bswap_1 (rhs1_stmt, &n1, limit - 1);
1032
1033 if (!source_expr1)
1034 return NULL_TREE;
1035
1036 source_expr2 = find_bswap_1 (rhs2_stmt, &n2, limit - 1);
1037
1038 if (source_expr1 != source_expr2
1039 || n1.size != n2.size)
1040 return NULL_TREE;
1041
1042 n->size = n1.size;
1043 n->n = n1.n | n2.n;
1044
1045 if (!verify_symbolic_number_p (n, stmt))
1046 return NULL_TREE;
1047
1048 break;
1049 default:
1050 return NULL_TREE;
1051 }
1052 return source_expr1;
1053 }
1054 return NULL_TREE;
1055 }
1056
1057 /* Check if STMT completes a bswap implementation consisting of ORs,
1058 SHIFTs and ANDs. Return the source tree expression on which the
1059 byte swap is performed and NULL if no bswap was found. */
1060
1061 static tree
1062 find_bswap (gimple stmt)
1063 {
1064 /* The number which the find_bswap result should match in order to
1065 have a full byte swap. The number is shifted to the left according
1066 to the size of the symbolic number before using it. */
1067 unsigned HOST_WIDEST_INT cmp =
1068 sizeof (HOST_WIDEST_INT) < 8 ? 0 :
1069 (unsigned HOST_WIDEST_INT)0x01020304 << 32 | 0x05060708;
1070
1071 struct symbolic_number n;
1072 tree source_expr;
1073
1074 /* The last parameter determines the depth search limit. It usually
1075 correlates directly to the number of bytes to be touched. We
1076 increase that number by one here in order to also cover signed ->
1077 unsigned conversions of the src operand as can be seen in
1078 libgcc. */
1079 source_expr = find_bswap_1 (stmt, &n,
1080 TREE_INT_CST_LOW (
1081 TYPE_SIZE_UNIT (gimple_expr_type (stmt))) + 1);
1082
1083 if (!source_expr)
1084 return NULL_TREE;
1085
1086 /* Zero out the extra bits of N and CMP. */
1087 if (n.size < (int)sizeof (HOST_WIDEST_INT))
1088 {
1089 unsigned HOST_WIDEST_INT mask =
1090 ((unsigned HOST_WIDEST_INT)1 << (n.size * BITS_PER_UNIT)) - 1;
1091
1092 n.n &= mask;
1093 cmp >>= (sizeof (HOST_WIDEST_INT) - n.size) * BITS_PER_UNIT;
1094 }
1095
1096 /* A complete byte swap should make the symbolic number to start
1097 with the largest digit in the highest order byte. */
1098 if (cmp != n.n)
1099 return NULL_TREE;
1100
1101 return source_expr;
1102 }
1103
1104 /* Find manual byte swap implementations and turn them into a bswap
1105 builtin invokation. */
1106
1107 static unsigned int
1108 execute_optimize_bswap (void)
1109 {
1110 basic_block bb;
1111 bool bswap32_p, bswap64_p;
1112 bool changed = false;
1113 tree bswap32_type = NULL_TREE, bswap64_type = NULL_TREE;
1114
1115 if (BITS_PER_UNIT != 8)
1116 return 0;
1117
1118 if (sizeof (HOST_WIDEST_INT) < 8)
1119 return 0;
1120
1121 bswap32_p = (built_in_decls[BUILT_IN_BSWAP32]
1122 && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing);
1123 bswap64_p = (built_in_decls[BUILT_IN_BSWAP64]
1124 && (optab_handler (bswap_optab, DImode) != CODE_FOR_nothing
1125 || (bswap32_p && word_mode == SImode)));
1126
1127 if (!bswap32_p && !bswap64_p)
1128 return 0;
1129
1130 /* Determine the argument type of the builtins. The code later on
1131 assumes that the return and argument type are the same. */
1132 if (bswap32_p)
1133 {
1134 tree fndecl = built_in_decls[BUILT_IN_BSWAP32];
1135 bswap32_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
1136 }
1137
1138 if (bswap64_p)
1139 {
1140 tree fndecl = built_in_decls[BUILT_IN_BSWAP64];
1141 bswap64_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
1142 }
1143
1144 FOR_EACH_BB (bb)
1145 {
1146 gimple_stmt_iterator gsi;
1147
1148 for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1149 {
1150 gimple stmt = gsi_stmt (gsi);
1151 tree bswap_src, bswap_type;
1152 tree bswap_tmp;
1153 tree fndecl = NULL_TREE;
1154 int type_size;
1155 gimple call;
1156
1157 if (!is_gimple_assign (stmt)
1158 || gimple_assign_rhs_code (stmt) != BIT_IOR_EXPR)
1159 continue;
1160
1161 type_size = TYPE_PRECISION (gimple_expr_type (stmt));
1162
1163 switch (type_size)
1164 {
1165 case 32:
1166 if (bswap32_p)
1167 {
1168 fndecl = built_in_decls[BUILT_IN_BSWAP32];
1169 bswap_type = bswap32_type;
1170 }
1171 break;
1172 case 64:
1173 if (bswap64_p)
1174 {
1175 fndecl = built_in_decls[BUILT_IN_BSWAP64];
1176 bswap_type = bswap64_type;
1177 }
1178 break;
1179 default:
1180 continue;
1181 }
1182
1183 if (!fndecl)
1184 continue;
1185
1186 bswap_src = find_bswap (stmt);
1187
1188 if (!bswap_src)
1189 continue;
1190
1191 changed = true;
1192
1193 bswap_tmp = bswap_src;
1194
1195 /* Convert the src expression if necessary. */
1196 if (!useless_type_conversion_p (TREE_TYPE (bswap_tmp), bswap_type))
1197 {
1198 gimple convert_stmt;
1199
1200 bswap_tmp = create_tmp_var (bswap_type, "bswapsrc");
1201 add_referenced_var (bswap_tmp);
1202 bswap_tmp = make_ssa_name (bswap_tmp, NULL);
1203
1204 convert_stmt = gimple_build_assign_with_ops (
1205 CONVERT_EXPR, bswap_tmp, bswap_src, NULL);
1206 gsi_insert_before (&gsi, convert_stmt, GSI_SAME_STMT);
1207 }
1208
1209 call = gimple_build_call (fndecl, 1, bswap_tmp);
1210
1211 bswap_tmp = gimple_assign_lhs (stmt);
1212
1213 /* Convert the result if necessary. */
1214 if (!useless_type_conversion_p (TREE_TYPE (bswap_tmp), bswap_type))
1215 {
1216 gimple convert_stmt;
1217
1218 bswap_tmp = create_tmp_var (bswap_type, "bswapdst");
1219 add_referenced_var (bswap_tmp);
1220 bswap_tmp = make_ssa_name (bswap_tmp, NULL);
1221 convert_stmt = gimple_build_assign_with_ops (
1222 CONVERT_EXPR, gimple_assign_lhs (stmt), bswap_tmp, NULL);
1223 gsi_insert_after (&gsi, convert_stmt, GSI_SAME_STMT);
1224 }
1225
1226 gimple_call_set_lhs (call, bswap_tmp);
1227
1228 if (dump_file)
1229 {
1230 fprintf (dump_file, "%d bit bswap implementation found at: ",
1231 (int)type_size);
1232 print_gimple_stmt (dump_file, stmt, 0, 0);
1233 }
1234
1235 gsi_insert_after (&gsi, call, GSI_SAME_STMT);
1236 gsi_remove (&gsi, true);
1237 }
1238 }
1239
1240 return (changed ? TODO_dump_func | TODO_update_ssa | TODO_verify_ssa
1241 | TODO_verify_stmts : 0);
1242 }
1243
1244 static bool
1245 gate_optimize_bswap (void)
1246 {
1247 return flag_expensive_optimizations && optimize;
1248 }
1249
1250 struct gimple_opt_pass pass_optimize_bswap =
1251 {
1252 {
1253 GIMPLE_PASS,
1254 "bswap", /* name */
1255 gate_optimize_bswap, /* gate */
1256 execute_optimize_bswap, /* execute */
1257 NULL, /* sub */
1258 NULL, /* next */
1259 0, /* static_pass_number */
1260 TV_NONE, /* tv_id */
1261 PROP_ssa, /* properties_required */
1262 0, /* properties_provided */
1263 0, /* properties_destroyed */
1264 0, /* todo_flags_start */
1265 0 /* todo_flags_finish */
1266 }
1267 };
1268
1269 /* Return true if RHS is a suitable operand for a widening multiplication.
1270 There are two cases:
1271
1272 - RHS makes some value twice as wide. Store that value in *NEW_RHS_OUT
1273 if so, and store its type in *TYPE_OUT.
1274
1275 - RHS is an integer constant. Store that value in *NEW_RHS_OUT if so,
1276 but leave *TYPE_OUT untouched. */
1277
1278 static bool
1279 is_widening_mult_rhs_p (tree rhs, tree *type_out, tree *new_rhs_out)
1280 {
1281 gimple stmt;
1282 tree type, type1, rhs1;
1283 enum tree_code rhs_code;
1284
1285 if (TREE_CODE (rhs) == SSA_NAME)
1286 {
1287 type = TREE_TYPE (rhs);
1288 stmt = SSA_NAME_DEF_STMT (rhs);
1289 if (!is_gimple_assign (stmt))
1290 return false;
1291
1292 rhs_code = gimple_assign_rhs_code (stmt);
1293 if (TREE_CODE (type) == INTEGER_TYPE
1294 ? !CONVERT_EXPR_CODE_P (rhs_code)
1295 : rhs_code != FIXED_CONVERT_EXPR)
1296 return false;
1297
1298 rhs1 = gimple_assign_rhs1 (stmt);
1299 type1 = TREE_TYPE (rhs1);
1300 if (TREE_CODE (type1) != TREE_CODE (type)
1301 || TYPE_PRECISION (type1) * 2 != TYPE_PRECISION (type))
1302 return false;
1303
1304 *new_rhs_out = rhs1;
1305 *type_out = type1;
1306 return true;
1307 }
1308
1309 if (TREE_CODE (rhs) == INTEGER_CST)
1310 {
1311 *new_rhs_out = rhs;
1312 *type_out = NULL;
1313 return true;
1314 }
1315
1316 return false;
1317 }
1318
1319 /* Return true if STMT performs a widening multiplication. If so,
1320 store the unwidened types of the operands in *TYPE1_OUT and *TYPE2_OUT
1321 respectively. Also fill *RHS1_OUT and *RHS2_OUT such that converting
1322 those operands to types *TYPE1_OUT and *TYPE2_OUT would give the
1323 operands of the multiplication. */
1324
1325 static bool
1326 is_widening_mult_p (gimple stmt,
1327 tree *type1_out, tree *rhs1_out,
1328 tree *type2_out, tree *rhs2_out)
1329 {
1330 tree type;
1331
1332 type = TREE_TYPE (gimple_assign_lhs (stmt));
1333 if (TREE_CODE (type) != INTEGER_TYPE
1334 && TREE_CODE (type) != FIXED_POINT_TYPE)
1335 return false;
1336
1337 if (!is_widening_mult_rhs_p (gimple_assign_rhs1 (stmt), type1_out, rhs1_out))
1338 return false;
1339
1340 if (!is_widening_mult_rhs_p (gimple_assign_rhs2 (stmt), type2_out, rhs2_out))
1341 return false;
1342
1343 if (*type1_out == NULL)
1344 {
1345 if (*type2_out == NULL || !int_fits_type_p (*rhs1_out, *type2_out))
1346 return false;
1347 *type1_out = *type2_out;
1348 }
1349
1350 if (*type2_out == NULL)
1351 {
1352 if (!int_fits_type_p (*rhs2_out, *type1_out))
1353 return false;
1354 *type2_out = *type1_out;
1355 }
1356
1357 return true;
1358 }
1359
1360 /* Process a single gimple statement STMT, which has a MULT_EXPR as
1361 its rhs, and try to convert it into a WIDEN_MULT_EXPR. The return
1362 value is true iff we converted the statement. */
1363
1364 static bool
1365 convert_mult_to_widen (gimple stmt)
1366 {
1367 tree lhs, rhs1, rhs2, type, type1, type2;
1368 enum insn_code handler;
1369
1370 lhs = gimple_assign_lhs (stmt);
1371 type = TREE_TYPE (lhs);
1372 if (TREE_CODE (type) != INTEGER_TYPE)
1373 return false;
1374
1375 if (!is_widening_mult_p (stmt, &type1, &rhs1, &type2, &rhs2))
1376 return false;
1377
1378 if (TYPE_UNSIGNED (type1) && TYPE_UNSIGNED (type2))
1379 handler = optab_handler (umul_widen_optab, TYPE_MODE (type));
1380 else if (!TYPE_UNSIGNED (type1) && !TYPE_UNSIGNED (type2))
1381 handler = optab_handler (smul_widen_optab, TYPE_MODE (type));
1382 else
1383 handler = optab_handler (usmul_widen_optab, TYPE_MODE (type));
1384
1385 if (handler == CODE_FOR_nothing)
1386 return false;
1387
1388 gimple_assign_set_rhs1 (stmt, fold_convert (type1, rhs1));
1389 gimple_assign_set_rhs2 (stmt, fold_convert (type2, rhs2));
1390 gimple_assign_set_rhs_code (stmt, WIDEN_MULT_EXPR);
1391 update_stmt (stmt);
1392 return true;
1393 }
1394
1395 /* Process a single gimple statement STMT, which is found at the
1396 iterator GSI and has a either a PLUS_EXPR or a MINUS_EXPR as its
1397 rhs (given by CODE), and try to convert it into a
1398 WIDEN_MULT_PLUS_EXPR or a WIDEN_MULT_MINUS_EXPR. The return value
1399 is true iff we converted the statement. */
1400
1401 static bool
1402 convert_plusminus_to_widen (gimple_stmt_iterator *gsi, gimple stmt,
1403 enum tree_code code)
1404 {
1405 gimple rhs1_stmt = NULL, rhs2_stmt = NULL;
1406 tree type, type1, type2;
1407 tree lhs, rhs1, rhs2, mult_rhs1, mult_rhs2, add_rhs;
1408 enum tree_code rhs1_code = ERROR_MARK, rhs2_code = ERROR_MARK;
1409 optab this_optab;
1410 enum tree_code wmult_code;
1411
1412 lhs = gimple_assign_lhs (stmt);
1413 type = TREE_TYPE (lhs);
1414 if (TREE_CODE (type) != INTEGER_TYPE
1415 && TREE_CODE (type) != FIXED_POINT_TYPE)
1416 return false;
1417
1418 if (code == MINUS_EXPR)
1419 wmult_code = WIDEN_MULT_MINUS_EXPR;
1420 else
1421 wmult_code = WIDEN_MULT_PLUS_EXPR;
1422
1423 rhs1 = gimple_assign_rhs1 (stmt);
1424 rhs2 = gimple_assign_rhs2 (stmt);
1425
1426 if (TREE_CODE (rhs1) == SSA_NAME)
1427 {
1428 rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
1429 if (is_gimple_assign (rhs1_stmt))
1430 rhs1_code = gimple_assign_rhs_code (rhs1_stmt);
1431 }
1432 else
1433 return false;
1434
1435 if (TREE_CODE (rhs2) == SSA_NAME)
1436 {
1437 rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
1438 if (is_gimple_assign (rhs2_stmt))
1439 rhs2_code = gimple_assign_rhs_code (rhs2_stmt);
1440 }
1441 else
1442 return false;
1443
1444 if (code == PLUS_EXPR && rhs1_code == MULT_EXPR)
1445 {
1446 if (!is_widening_mult_p (rhs1_stmt, &type1, &mult_rhs1,
1447 &type2, &mult_rhs2))
1448 return false;
1449 add_rhs = rhs2;
1450 }
1451 else if (rhs2_code == MULT_EXPR)
1452 {
1453 if (!is_widening_mult_p (rhs2_stmt, &type1, &mult_rhs1,
1454 &type2, &mult_rhs2))
1455 return false;
1456 add_rhs = rhs1;
1457 }
1458 else if (code == PLUS_EXPR && rhs1_code == WIDEN_MULT_EXPR)
1459 {
1460 mult_rhs1 = gimple_assign_rhs1 (rhs1_stmt);
1461 mult_rhs2 = gimple_assign_rhs2 (rhs1_stmt);
1462 type1 = TREE_TYPE (mult_rhs1);
1463 type2 = TREE_TYPE (mult_rhs2);
1464 add_rhs = rhs2;
1465 }
1466 else if (rhs2_code == WIDEN_MULT_EXPR)
1467 {
1468 mult_rhs1 = gimple_assign_rhs1 (rhs2_stmt);
1469 mult_rhs2 = gimple_assign_rhs2 (rhs2_stmt);
1470 type1 = TREE_TYPE (mult_rhs1);
1471 type2 = TREE_TYPE (mult_rhs2);
1472 add_rhs = rhs1;
1473 }
1474 else
1475 return false;
1476
1477 if (TYPE_UNSIGNED (type1) != TYPE_UNSIGNED (type2))
1478 return false;
1479
1480 /* Verify that the machine can perform a widening multiply
1481 accumulate in this mode/signedness combination, otherwise
1482 this transformation is likely to pessimize code. */
1483 this_optab = optab_for_tree_code (wmult_code, type1, optab_default);
1484 if (optab_handler (this_optab, TYPE_MODE (type)) == CODE_FOR_nothing)
1485 return false;
1486
1487 /* ??? May need some type verification here? */
1488
1489 gimple_assign_set_rhs_with_ops_1 (gsi, wmult_code,
1490 fold_convert (type1, mult_rhs1),
1491 fold_convert (type2, mult_rhs2),
1492 add_rhs);
1493 update_stmt (gsi_stmt (*gsi));
1494 return true;
1495 }
1496
1497 /* Find integer multiplications where the operands are extended from
1498 smaller types, and replace the MULT_EXPR with a WIDEN_MULT_EXPR
1499 where appropriate. */
1500
1501 static unsigned int
1502 execute_optimize_widening_mul (void)
1503 {
1504 bool changed = false;
1505 basic_block bb;
1506
1507 FOR_EACH_BB (bb)
1508 {
1509 gimple_stmt_iterator gsi;
1510
1511 for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1512 {
1513 gimple stmt = gsi_stmt (gsi);
1514 enum tree_code code;
1515
1516 if (!is_gimple_assign (stmt))
1517 continue;
1518
1519 code = gimple_assign_rhs_code (stmt);
1520 if (code == MULT_EXPR)
1521 changed |= convert_mult_to_widen (stmt);
1522 else if (code == PLUS_EXPR || code == MINUS_EXPR)
1523 changed |= convert_plusminus_to_widen (&gsi, stmt, code);
1524 }
1525 }
1526
1527 return (changed ? TODO_dump_func | TODO_update_ssa | TODO_verify_ssa
1528 | TODO_verify_stmts : 0);
1529 }
1530
1531 static bool
1532 gate_optimize_widening_mul (void)
1533 {
1534 return flag_expensive_optimizations && optimize;
1535 }
1536
1537 struct gimple_opt_pass pass_optimize_widening_mul =
1538 {
1539 {
1540 GIMPLE_PASS,
1541 "widening_mul", /* name */
1542 gate_optimize_widening_mul, /* gate */
1543 execute_optimize_widening_mul, /* execute */
1544 NULL, /* sub */
1545 NULL, /* next */
1546 0, /* static_pass_number */
1547 TV_NONE, /* tv_id */
1548 PROP_ssa, /* properties_required */
1549 0, /* properties_provided */
1550 0, /* properties_destroyed */
1551 0, /* todo_flags_start */
1552 0 /* todo_flags_finish */
1553 }
1554 };