1 /* Global, SSA-based optimizations using mathematical identities.
2 Copyright (C) 2005-2015 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* Currently, the only mini-pass in this file tries to CSE reciprocal
21 operations. These are common in sequences such as this one:
23 modulus = sqrt(x*x + y*y + z*z);
28 that can be optimized to
30 modulus = sqrt(x*x + y*y + z*z);
31 rmodulus = 1.0 / modulus;
36 We do this for loop invariant divisors, and with this pass whenever
37 we notice that a division has the same divisor multiple times.
39 Of course, like in PRE, we don't insert a division if a dominator
40 already has one. However, this cannot be done as an extension of
41 PRE for several reasons.
43 First of all, with some experiments it was found out that the
44 transformation is not always useful if there are only two divisions
45 hy the same divisor. This is probably because modern processors
46 can pipeline the divisions; on older, in-order processors it should
47 still be effective to optimize two divisions by the same number.
48 We make this a param, and it shall be called N in the remainder of
51 Second, if trapping math is active, we have less freedom on where
52 to insert divisions: we can only do so in basic blocks that already
53 contain one. (If divisions don't trap, instead, we can insert
54 divisions elsewhere, which will be in blocks that are common dominators
55 of those that have the division).
57 We really don't want to compute the reciprocal unless a division will
58 be found. To do this, we won't insert the division in a basic block
59 that has less than N divisions *post-dominating* it.
61 The algorithm constructs a subset of the dominator tree, holding the
62 blocks containing the divisions and the common dominators to them,
63 and walk it twice. The first walk is in post-order, and it annotates
64 each block with the number of divisions that post-dominate it: this
65 gives information on where divisions can be inserted profitably.
66 The second walk is in pre-order, and it inserts divisions as explained
67 above, and replaces divisions by multiplications.
69 In the best case, the cost of the pass is O(n_statements). In the
70 worst-case, the cost is due to creating the dominator tree subset,
71 with a cost of O(n_basic_blocks ^ 2); however this can only happen
72 for n_statements / n_basic_blocks statements. So, the amortized cost
73 of creating the dominator tree subset is O(n_basic_blocks) and the
74 worst-case cost of the pass is O(n_statements * n_basic_blocks).
76 More practically, the cost will be small because there are few
77 divisions, and they tend to be in the same basic block, so insert_bb
78 is called very few times.
80 If we did this using domwalk.c, an efficient implementation would have
81 to work on all the variables in a single pass, because we could not
82 work on just a subset of the dominator tree, as we do now, and the
83 cost would also be something like O(n_statements * n_basic_blocks).
84 The data structures would be more complex in order to work on all the
85 variables in a single pass. */
89 #include "coretypes.h"
99 #include "fold-const.h"
101 #include "hard-reg-set.h"
102 #include "function.h"
103 #include "dominance.h"
105 #include "basic-block.h"
106 #include "tree-ssa-alias.h"
107 #include "internal-fn.h"
108 #include "gimple-fold.h"
109 #include "gimple-expr.h"
112 #include "gimple-iterator.h"
113 #include "gimplify.h"
114 #include "gimplify-me.h"
115 #include "stor-layout.h"
116 #include "gimple-ssa.h"
117 #include "tree-cfg.h"
118 #include "tree-phinodes.h"
119 #include "ssa-iterators.h"
120 #include "stringpool.h"
121 #include "tree-ssanames.h"
124 #include "statistics.h"
125 #include "insn-config.h"
130 #include "emit-rtl.h"
134 #include "tree-dfa.h"
135 #include "tree-ssa.h"
136 #include "tree-pass.h"
137 #include "alloc-pool.h"
139 #include "gimple-pretty-print.h"
140 #include "builtins.h"
143 /* FIXME: RTL headers have to be included here for optabs. */
144 #include "rtl.h" /* Because optabs.h wants enum rtx_code. */
145 #include "expr.h" /* Because optabs.h wants sepops. */
146 #include "insn-codes.h"
149 /* This structure represents one basic block that either computes a
150 division, or is a common dominator for basic block that compute a
153 /* The basic block represented by this structure. */
156 /* If non-NULL, the SSA_NAME holding the definition for a reciprocal
160 /* If non-NULL, the GIMPLE_ASSIGN for a reciprocal computation that
161 was inserted in BB. */
162 gimple recip_def_stmt
;
164 /* Pointer to a list of "struct occurrence"s for blocks dominated
166 struct occurrence
*children
;
168 /* Pointer to the next "struct occurrence"s in the list of blocks
169 sharing a common dominator. */
170 struct occurrence
*next
;
172 /* The number of divisions that are in BB before compute_merit. The
173 number of divisions that are in BB or post-dominate it after
177 /* True if the basic block has a division, false if it is a common
178 dominator for basic blocks that do. If it is false and trapping
179 math is active, BB is not a candidate for inserting a reciprocal. */
180 bool bb_has_division
;
185 /* Number of 1.0/X ops inserted. */
188 /* Number of 1.0/FUNC ops inserted. */
194 /* Number of cexpi calls inserted. */
200 /* Number of hand-written 16-bit nop / bswaps found. */
203 /* Number of hand-written 32-bit nop / bswaps found. */
206 /* Number of hand-written 64-bit nop / bswaps found. */
208 } nop_stats
, bswap_stats
;
212 /* Number of widening multiplication ops inserted. */
213 int widen_mults_inserted
;
215 /* Number of integer multiply-and-accumulate ops inserted. */
218 /* Number of fp fused multiply-add ops inserted. */
222 /* The instance of "struct occurrence" representing the highest
223 interesting block in the dominator tree. */
224 static struct occurrence
*occ_head
;
226 /* Allocation pool for getting instances of "struct occurrence". */
227 static pool_allocator
<occurrence
> *occ_pool
;
231 /* Allocate and return a new struct occurrence for basic block BB, and
232 whose children list is headed by CHILDREN. */
233 static struct occurrence
*
234 occ_new (basic_block bb
, struct occurrence
*children
)
236 struct occurrence
*occ
;
238 bb
->aux
= occ
= occ_pool
->allocate ();
239 memset (occ
, 0, sizeof (struct occurrence
));
242 occ
->children
= children
;
247 /* Insert NEW_OCC into our subset of the dominator tree. P_HEAD points to a
248 list of "struct occurrence"s, one per basic block, having IDOM as
249 their common dominator.
251 We try to insert NEW_OCC as deep as possible in the tree, and we also
252 insert any other block that is a common dominator for BB and one
253 block already in the tree. */
256 insert_bb (struct occurrence
*new_occ
, basic_block idom
,
257 struct occurrence
**p_head
)
259 struct occurrence
*occ
, **p_occ
;
261 for (p_occ
= p_head
; (occ
= *p_occ
) != NULL
; )
263 basic_block bb
= new_occ
->bb
, occ_bb
= occ
->bb
;
264 basic_block dom
= nearest_common_dominator (CDI_DOMINATORS
, occ_bb
, bb
);
267 /* BB dominates OCC_BB. OCC becomes NEW_OCC's child: remove OCC
270 occ
->next
= new_occ
->children
;
271 new_occ
->children
= occ
;
273 /* Try the next block (it may as well be dominated by BB). */
276 else if (dom
== occ_bb
)
278 /* OCC_BB dominates BB. Tail recurse to look deeper. */
279 insert_bb (new_occ
, dom
, &occ
->children
);
283 else if (dom
!= idom
)
285 gcc_assert (!dom
->aux
);
287 /* There is a dominator between IDOM and BB, add it and make
288 two children out of NEW_OCC and OCC. First, remove OCC from
294 /* None of the previous blocks has DOM as a dominator: if we tail
295 recursed, we would reexamine them uselessly. Just switch BB with
296 DOM, and go on looking for blocks dominated by DOM. */
297 new_occ
= occ_new (dom
, new_occ
);
302 /* Nothing special, go on with the next element. */
307 /* No place was found as a child of IDOM. Make BB a sibling of IDOM. */
308 new_occ
->next
= *p_head
;
312 /* Register that we found a division in BB. */
315 register_division_in (basic_block bb
)
317 struct occurrence
*occ
;
319 occ
= (struct occurrence
*) bb
->aux
;
322 occ
= occ_new (bb
, NULL
);
323 insert_bb (occ
, ENTRY_BLOCK_PTR_FOR_FN (cfun
), &occ_head
);
326 occ
->bb_has_division
= true;
327 occ
->num_divisions
++;
331 /* Compute the number of divisions that postdominate each block in OCC and
335 compute_merit (struct occurrence
*occ
)
337 struct occurrence
*occ_child
;
338 basic_block dom
= occ
->bb
;
340 for (occ_child
= occ
->children
; occ_child
; occ_child
= occ_child
->next
)
343 if (occ_child
->children
)
344 compute_merit (occ_child
);
347 bb
= single_noncomplex_succ (dom
);
351 if (dominated_by_p (CDI_POST_DOMINATORS
, bb
, occ_child
->bb
))
352 occ
->num_divisions
+= occ_child
->num_divisions
;
357 /* Return whether USE_STMT is a floating-point division by DEF. */
359 is_division_by (gimple use_stmt
, tree def
)
361 return is_gimple_assign (use_stmt
)
362 && gimple_assign_rhs_code (use_stmt
) == RDIV_EXPR
363 && gimple_assign_rhs2 (use_stmt
) == def
364 /* Do not recognize x / x as valid division, as we are getting
365 confused later by replacing all immediate uses x in such
367 && gimple_assign_rhs1 (use_stmt
) != def
;
370 /* Walk the subset of the dominator tree rooted at OCC, setting the
371 RECIP_DEF field to a definition of 1.0 / DEF that can be used in
372 the given basic block. The field may be left NULL, of course,
373 if it is not possible or profitable to do the optimization.
375 DEF_BSI is an iterator pointing at the statement defining DEF.
376 If RECIP_DEF is set, a dominator already has a computation that can
380 insert_reciprocals (gimple_stmt_iterator
*def_gsi
, struct occurrence
*occ
,
381 tree def
, tree recip_def
, int threshold
)
385 gimple_stmt_iterator gsi
;
386 struct occurrence
*occ_child
;
389 && (occ
->bb_has_division
|| !flag_trapping_math
)
390 && occ
->num_divisions
>= threshold
)
392 /* Make a variable with the replacement and substitute it. */
393 type
= TREE_TYPE (def
);
394 recip_def
= create_tmp_reg (type
, "reciptmp");
395 new_stmt
= gimple_build_assign (recip_def
, RDIV_EXPR
,
396 build_one_cst (type
), def
);
398 if (occ
->bb_has_division
)
400 /* Case 1: insert before an existing division. */
401 gsi
= gsi_after_labels (occ
->bb
);
402 while (!gsi_end_p (gsi
) && !is_division_by (gsi_stmt (gsi
), def
))
405 gsi_insert_before (&gsi
, new_stmt
, GSI_SAME_STMT
);
407 else if (def_gsi
&& occ
->bb
== def_gsi
->bb
)
409 /* Case 2: insert right after the definition. Note that this will
410 never happen if the definition statement can throw, because in
411 that case the sole successor of the statement's basic block will
412 dominate all the uses as well. */
413 gsi_insert_after (def_gsi
, new_stmt
, GSI_NEW_STMT
);
417 /* Case 3: insert in a basic block not containing defs/uses. */
418 gsi
= gsi_after_labels (occ
->bb
);
419 gsi_insert_before (&gsi
, new_stmt
, GSI_SAME_STMT
);
422 reciprocal_stats
.rdivs_inserted
++;
424 occ
->recip_def_stmt
= new_stmt
;
427 occ
->recip_def
= recip_def
;
428 for (occ_child
= occ
->children
; occ_child
; occ_child
= occ_child
->next
)
429 insert_reciprocals (def_gsi
, occ_child
, def
, recip_def
, threshold
);
433 /* Replace the division at USE_P with a multiplication by the reciprocal, if
437 replace_reciprocal (use_operand_p use_p
)
439 gimple use_stmt
= USE_STMT (use_p
);
440 basic_block bb
= gimple_bb (use_stmt
);
441 struct occurrence
*occ
= (struct occurrence
*) bb
->aux
;
443 if (optimize_bb_for_speed_p (bb
)
444 && occ
->recip_def
&& use_stmt
!= occ
->recip_def_stmt
)
446 gimple_stmt_iterator gsi
= gsi_for_stmt (use_stmt
);
447 gimple_assign_set_rhs_code (use_stmt
, MULT_EXPR
);
448 SET_USE (use_p
, occ
->recip_def
);
449 fold_stmt_inplace (&gsi
);
450 update_stmt (use_stmt
);
455 /* Free OCC and return one more "struct occurrence" to be freed. */
457 static struct occurrence
*
458 free_bb (struct occurrence
*occ
)
460 struct occurrence
*child
, *next
;
462 /* First get the two pointers hanging off OCC. */
464 child
= occ
->children
;
466 occ_pool
->remove (occ
);
468 /* Now ensure that we don't recurse unless it is necessary. */
474 next
= free_bb (next
);
481 /* Look for floating-point divisions among DEF's uses, and try to
482 replace them by multiplications with the reciprocal. Add
483 as many statements computing the reciprocal as needed.
485 DEF must be a GIMPLE register of a floating-point type. */
488 execute_cse_reciprocals_1 (gimple_stmt_iterator
*def_gsi
, tree def
)
491 imm_use_iterator use_iter
;
492 struct occurrence
*occ
;
493 int count
= 0, threshold
;
495 gcc_assert (FLOAT_TYPE_P (TREE_TYPE (def
)) && is_gimple_reg (def
));
497 FOR_EACH_IMM_USE_FAST (use_p
, use_iter
, def
)
499 gimple use_stmt
= USE_STMT (use_p
);
500 if (is_division_by (use_stmt
, def
))
502 register_division_in (gimple_bb (use_stmt
));
507 /* Do the expensive part only if we can hope to optimize something. */
508 threshold
= targetm
.min_divisions_for_recip_mul (TYPE_MODE (TREE_TYPE (def
)));
509 if (count
>= threshold
)
512 for (occ
= occ_head
; occ
; occ
= occ
->next
)
515 insert_reciprocals (def_gsi
, occ
, def
, NULL
, threshold
);
518 FOR_EACH_IMM_USE_STMT (use_stmt
, use_iter
, def
)
520 if (is_division_by (use_stmt
, def
))
522 FOR_EACH_IMM_USE_ON_STMT (use_p
, use_iter
)
523 replace_reciprocal (use_p
);
528 for (occ
= occ_head
; occ
; )
534 /* Go through all the floating-point SSA_NAMEs, and call
535 execute_cse_reciprocals_1 on each of them. */
538 const pass_data pass_data_cse_reciprocals
=
540 GIMPLE_PASS
, /* type */
542 OPTGROUP_NONE
, /* optinfo_flags */
544 PROP_ssa
, /* properties_required */
545 0, /* properties_provided */
546 0, /* properties_destroyed */
547 0, /* todo_flags_start */
548 TODO_update_ssa
, /* todo_flags_finish */
551 class pass_cse_reciprocals
: public gimple_opt_pass
554 pass_cse_reciprocals (gcc::context
*ctxt
)
555 : gimple_opt_pass (pass_data_cse_reciprocals
, ctxt
)
558 /* opt_pass methods: */
559 virtual bool gate (function
*) { return optimize
&& flag_reciprocal_math
; }
560 virtual unsigned int execute (function
*);
562 }; // class pass_cse_reciprocals
565 pass_cse_reciprocals::execute (function
*fun
)
570 occ_pool
= new pool_allocator
<occurrence
>
571 ("dominators for recip", n_basic_blocks_for_fn (fun
) / 3 + 1);
573 memset (&reciprocal_stats
, 0, sizeof (reciprocal_stats
));
574 calculate_dominance_info (CDI_DOMINATORS
);
575 calculate_dominance_info (CDI_POST_DOMINATORS
);
577 #ifdef ENABLE_CHECKING
578 FOR_EACH_BB_FN (bb
, fun
)
579 gcc_assert (!bb
->aux
);
582 for (arg
= DECL_ARGUMENTS (fun
->decl
); arg
; arg
= DECL_CHAIN (arg
))
583 if (FLOAT_TYPE_P (TREE_TYPE (arg
))
584 && is_gimple_reg (arg
))
586 tree name
= ssa_default_def (fun
, arg
);
588 execute_cse_reciprocals_1 (NULL
, name
);
591 FOR_EACH_BB_FN (bb
, fun
)
595 for (gphi_iterator gsi
= gsi_start_phis (bb
); !gsi_end_p (gsi
);
598 gphi
*phi
= gsi
.phi ();
599 def
= PHI_RESULT (phi
);
600 if (! virtual_operand_p (def
)
601 && FLOAT_TYPE_P (TREE_TYPE (def
)))
602 execute_cse_reciprocals_1 (NULL
, def
);
605 for (gimple_stmt_iterator gsi
= gsi_after_labels (bb
); !gsi_end_p (gsi
);
608 gimple stmt
= gsi_stmt (gsi
);
610 if (gimple_has_lhs (stmt
)
611 && (def
= SINGLE_SSA_TREE_OPERAND (stmt
, SSA_OP_DEF
)) != NULL
612 && FLOAT_TYPE_P (TREE_TYPE (def
))
613 && TREE_CODE (def
) == SSA_NAME
)
614 execute_cse_reciprocals_1 (&gsi
, def
);
617 if (optimize_bb_for_size_p (bb
))
620 /* Scan for a/func(b) and convert it to reciprocal a*rfunc(b). */
621 for (gimple_stmt_iterator gsi
= gsi_after_labels (bb
); !gsi_end_p (gsi
);
624 gimple stmt
= gsi_stmt (gsi
);
627 if (is_gimple_assign (stmt
)
628 && gimple_assign_rhs_code (stmt
) == RDIV_EXPR
)
630 tree arg1
= gimple_assign_rhs2 (stmt
);
633 if (TREE_CODE (arg1
) != SSA_NAME
)
636 stmt1
= SSA_NAME_DEF_STMT (arg1
);
638 if (is_gimple_call (stmt1
)
639 && gimple_call_lhs (stmt1
)
640 && (fndecl
= gimple_call_fndecl (stmt1
))
641 && (DECL_BUILT_IN_CLASS (fndecl
) == BUILT_IN_NORMAL
642 || DECL_BUILT_IN_CLASS (fndecl
) == BUILT_IN_MD
))
644 enum built_in_function code
;
649 code
= DECL_FUNCTION_CODE (fndecl
);
650 md_code
= DECL_BUILT_IN_CLASS (fndecl
) == BUILT_IN_MD
;
652 fndecl
= targetm
.builtin_reciprocal (code
, md_code
, false);
656 /* Check that all uses of the SSA name are divisions,
657 otherwise replacing the defining statement will do
660 FOR_EACH_IMM_USE_FAST (use_p
, ui
, arg1
)
662 gimple stmt2
= USE_STMT (use_p
);
663 if (is_gimple_debug (stmt2
))
665 if (!is_gimple_assign (stmt2
)
666 || gimple_assign_rhs_code (stmt2
) != RDIV_EXPR
667 || gimple_assign_rhs1 (stmt2
) == arg1
668 || gimple_assign_rhs2 (stmt2
) != arg1
)
677 gimple_replace_ssa_lhs (stmt1
, arg1
);
678 gimple_call_set_fndecl (stmt1
, fndecl
);
680 reciprocal_stats
.rfuncs_inserted
++;
682 FOR_EACH_IMM_USE_STMT (stmt
, ui
, arg1
)
684 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
685 gimple_assign_set_rhs_code (stmt
, MULT_EXPR
);
686 fold_stmt_inplace (&gsi
);
694 statistics_counter_event (fun
, "reciprocal divs inserted",
695 reciprocal_stats
.rdivs_inserted
);
696 statistics_counter_event (fun
, "reciprocal functions inserted",
697 reciprocal_stats
.rfuncs_inserted
);
699 free_dominance_info (CDI_DOMINATORS
);
700 free_dominance_info (CDI_POST_DOMINATORS
);
708 make_pass_cse_reciprocals (gcc::context
*ctxt
)
710 return new pass_cse_reciprocals (ctxt
);
713 /* Records an occurrence at statement USE_STMT in the vector of trees
714 STMTS if it is dominated by *TOP_BB or dominates it or this basic block
715 is not yet initialized. Returns true if the occurrence was pushed on
716 the vector. Adjusts *TOP_BB to be the basic block dominating all
717 statements in the vector. */
720 maybe_record_sincos (vec
<gimple
> *stmts
,
721 basic_block
*top_bb
, gimple use_stmt
)
723 basic_block use_bb
= gimple_bb (use_stmt
);
725 && (*top_bb
== use_bb
726 || dominated_by_p (CDI_DOMINATORS
, use_bb
, *top_bb
)))
727 stmts
->safe_push (use_stmt
);
729 || dominated_by_p (CDI_DOMINATORS
, *top_bb
, use_bb
))
731 stmts
->safe_push (use_stmt
);
740 /* Look for sin, cos and cexpi calls with the same argument NAME and
741 create a single call to cexpi CSEing the result in this case.
742 We first walk over all immediate uses of the argument collecting
743 statements that we can CSE in a vector and in a second pass replace
744 the statement rhs with a REALPART or IMAGPART expression on the
745 result of the cexpi call we insert before the use statement that
746 dominates all other candidates. */
749 execute_cse_sincos_1 (tree name
)
751 gimple_stmt_iterator gsi
;
752 imm_use_iterator use_iter
;
753 tree fndecl
, res
, type
;
754 gimple def_stmt
, use_stmt
, stmt
;
755 int seen_cos
= 0, seen_sin
= 0, seen_cexpi
= 0;
756 auto_vec
<gimple
> stmts
;
757 basic_block top_bb
= NULL
;
759 bool cfg_changed
= false;
761 type
= TREE_TYPE (name
);
762 FOR_EACH_IMM_USE_STMT (use_stmt
, use_iter
, name
)
764 if (gimple_code (use_stmt
) != GIMPLE_CALL
765 || !gimple_call_lhs (use_stmt
)
766 || !(fndecl
= gimple_call_fndecl (use_stmt
))
767 || DECL_BUILT_IN_CLASS (fndecl
) != BUILT_IN_NORMAL
)
770 switch (DECL_FUNCTION_CODE (fndecl
))
772 CASE_FLT_FN (BUILT_IN_COS
):
773 seen_cos
|= maybe_record_sincos (&stmts
, &top_bb
, use_stmt
) ? 1 : 0;
776 CASE_FLT_FN (BUILT_IN_SIN
):
777 seen_sin
|= maybe_record_sincos (&stmts
, &top_bb
, use_stmt
) ? 1 : 0;
780 CASE_FLT_FN (BUILT_IN_CEXPI
):
781 seen_cexpi
|= maybe_record_sincos (&stmts
, &top_bb
, use_stmt
) ? 1 : 0;
788 if (seen_cos
+ seen_sin
+ seen_cexpi
<= 1)
791 /* Simply insert cexpi at the beginning of top_bb but not earlier than
792 the name def statement. */
793 fndecl
= mathfn_built_in (type
, BUILT_IN_CEXPI
);
796 stmt
= gimple_build_call (fndecl
, 1, name
);
797 res
= make_temp_ssa_name (TREE_TYPE (TREE_TYPE (fndecl
)), stmt
, "sincostmp");
798 gimple_call_set_lhs (stmt
, res
);
800 def_stmt
= SSA_NAME_DEF_STMT (name
);
801 if (!SSA_NAME_IS_DEFAULT_DEF (name
)
802 && gimple_code (def_stmt
) != GIMPLE_PHI
803 && gimple_bb (def_stmt
) == top_bb
)
805 gsi
= gsi_for_stmt (def_stmt
);
806 gsi_insert_after (&gsi
, stmt
, GSI_SAME_STMT
);
810 gsi
= gsi_after_labels (top_bb
);
811 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
813 sincos_stats
.inserted
++;
815 /* And adjust the recorded old call sites. */
816 for (i
= 0; stmts
.iterate (i
, &use_stmt
); ++i
)
819 fndecl
= gimple_call_fndecl (use_stmt
);
821 switch (DECL_FUNCTION_CODE (fndecl
))
823 CASE_FLT_FN (BUILT_IN_COS
):
824 rhs
= fold_build1 (REALPART_EXPR
, type
, res
);
827 CASE_FLT_FN (BUILT_IN_SIN
):
828 rhs
= fold_build1 (IMAGPART_EXPR
, type
, res
);
831 CASE_FLT_FN (BUILT_IN_CEXPI
):
839 /* Replace call with a copy. */
840 stmt
= gimple_build_assign (gimple_call_lhs (use_stmt
), rhs
);
842 gsi
= gsi_for_stmt (use_stmt
);
843 gsi_replace (&gsi
, stmt
, true);
844 if (gimple_purge_dead_eh_edges (gimple_bb (stmt
)))
851 /* To evaluate powi(x,n), the floating point value x raised to the
852 constant integer exponent n, we use a hybrid algorithm that
853 combines the "window method" with look-up tables. For an
854 introduction to exponentiation algorithms and "addition chains",
855 see section 4.6.3, "Evaluation of Powers" of Donald E. Knuth,
856 "Seminumerical Algorithms", Vol. 2, "The Art of Computer Programming",
857 3rd Edition, 1998, and Daniel M. Gordon, "A Survey of Fast Exponentiation
858 Methods", Journal of Algorithms, Vol. 27, pp. 129-146, 1998. */
860 /* Provide a default value for POWI_MAX_MULTS, the maximum number of
861 multiplications to inline before calling the system library's pow
862 function. powi(x,n) requires at worst 2*bits(n)-2 multiplications,
863 so this default never requires calling pow, powf or powl. */
865 #ifndef POWI_MAX_MULTS
866 #define POWI_MAX_MULTS (2*HOST_BITS_PER_WIDE_INT-2)
869 /* The size of the "optimal power tree" lookup table. All
870 exponents less than this value are simply looked up in the
871 powi_table below. This threshold is also used to size the
872 cache of pseudo registers that hold intermediate results. */
873 #define POWI_TABLE_SIZE 256
875 /* The size, in bits of the window, used in the "window method"
876 exponentiation algorithm. This is equivalent to a radix of
877 (1<<POWI_WINDOW_SIZE) in the corresponding "m-ary method". */
878 #define POWI_WINDOW_SIZE 3
880 /* The following table is an efficient representation of an
881 "optimal power tree". For each value, i, the corresponding
882 value, j, in the table states than an optimal evaluation
883 sequence for calculating pow(x,i) can be found by evaluating
884 pow(x,j)*pow(x,i-j). An optimal power tree for the first
885 100 integers is given in Knuth's "Seminumerical algorithms". */
887 static const unsigned char powi_table
[POWI_TABLE_SIZE
] =
889 0, 1, 1, 2, 2, 3, 3, 4, /* 0 - 7 */
890 4, 6, 5, 6, 6, 10, 7, 9, /* 8 - 15 */
891 8, 16, 9, 16, 10, 12, 11, 13, /* 16 - 23 */
892 12, 17, 13, 18, 14, 24, 15, 26, /* 24 - 31 */
893 16, 17, 17, 19, 18, 33, 19, 26, /* 32 - 39 */
894 20, 25, 21, 40, 22, 27, 23, 44, /* 40 - 47 */
895 24, 32, 25, 34, 26, 29, 27, 44, /* 48 - 55 */
896 28, 31, 29, 34, 30, 60, 31, 36, /* 56 - 63 */
897 32, 64, 33, 34, 34, 46, 35, 37, /* 64 - 71 */
898 36, 65, 37, 50, 38, 48, 39, 69, /* 72 - 79 */
899 40, 49, 41, 43, 42, 51, 43, 58, /* 80 - 87 */
900 44, 64, 45, 47, 46, 59, 47, 76, /* 88 - 95 */
901 48, 65, 49, 66, 50, 67, 51, 66, /* 96 - 103 */
902 52, 70, 53, 74, 54, 104, 55, 74, /* 104 - 111 */
903 56, 64, 57, 69, 58, 78, 59, 68, /* 112 - 119 */
904 60, 61, 61, 80, 62, 75, 63, 68, /* 120 - 127 */
905 64, 65, 65, 128, 66, 129, 67, 90, /* 128 - 135 */
906 68, 73, 69, 131, 70, 94, 71, 88, /* 136 - 143 */
907 72, 128, 73, 98, 74, 132, 75, 121, /* 144 - 151 */
908 76, 102, 77, 124, 78, 132, 79, 106, /* 152 - 159 */
909 80, 97, 81, 160, 82, 99, 83, 134, /* 160 - 167 */
910 84, 86, 85, 95, 86, 160, 87, 100, /* 168 - 175 */
911 88, 113, 89, 98, 90, 107, 91, 122, /* 176 - 183 */
912 92, 111, 93, 102, 94, 126, 95, 150, /* 184 - 191 */
913 96, 128, 97, 130, 98, 133, 99, 195, /* 192 - 199 */
914 100, 128, 101, 123, 102, 164, 103, 138, /* 200 - 207 */
915 104, 145, 105, 146, 106, 109, 107, 149, /* 208 - 215 */
916 108, 200, 109, 146, 110, 170, 111, 157, /* 216 - 223 */
917 112, 128, 113, 130, 114, 182, 115, 132, /* 224 - 231 */
918 116, 200, 117, 132, 118, 158, 119, 206, /* 232 - 239 */
919 120, 240, 121, 162, 122, 147, 123, 152, /* 240 - 247 */
920 124, 166, 125, 214, 126, 138, 127, 153, /* 248 - 255 */
924 /* Return the number of multiplications required to calculate
925 powi(x,n) where n is less than POWI_TABLE_SIZE. This is a
926 subroutine of powi_cost. CACHE is an array indicating
927 which exponents have already been calculated. */
930 powi_lookup_cost (unsigned HOST_WIDE_INT n
, bool *cache
)
932 /* If we've already calculated this exponent, then this evaluation
933 doesn't require any additional multiplications. */
938 return powi_lookup_cost (n
- powi_table
[n
], cache
)
939 + powi_lookup_cost (powi_table
[n
], cache
) + 1;
942 /* Return the number of multiplications required to calculate
943 powi(x,n) for an arbitrary x, given the exponent N. This
944 function needs to be kept in sync with powi_as_mults below. */
947 powi_cost (HOST_WIDE_INT n
)
949 bool cache
[POWI_TABLE_SIZE
];
950 unsigned HOST_WIDE_INT digit
;
951 unsigned HOST_WIDE_INT val
;
957 /* Ignore the reciprocal when calculating the cost. */
958 val
= (n
< 0) ? -n
: n
;
960 /* Initialize the exponent cache. */
961 memset (cache
, 0, POWI_TABLE_SIZE
* sizeof (bool));
966 while (val
>= POWI_TABLE_SIZE
)
970 digit
= val
& ((1 << POWI_WINDOW_SIZE
) - 1);
971 result
+= powi_lookup_cost (digit
, cache
)
972 + POWI_WINDOW_SIZE
+ 1;
973 val
>>= POWI_WINDOW_SIZE
;
982 return result
+ powi_lookup_cost (val
, cache
);
985 /* Recursive subroutine of powi_as_mults. This function takes the
986 array, CACHE, of already calculated exponents and an exponent N and
987 returns a tree that corresponds to CACHE[1]**N, with type TYPE. */
990 powi_as_mults_1 (gimple_stmt_iterator
*gsi
, location_t loc
, tree type
,
991 HOST_WIDE_INT n
, tree
*cache
)
993 tree op0
, op1
, ssa_target
;
994 unsigned HOST_WIDE_INT digit
;
997 if (n
< POWI_TABLE_SIZE
&& cache
[n
])
1000 ssa_target
= make_temp_ssa_name (type
, NULL
, "powmult");
1002 if (n
< POWI_TABLE_SIZE
)
1004 cache
[n
] = ssa_target
;
1005 op0
= powi_as_mults_1 (gsi
, loc
, type
, n
- powi_table
[n
], cache
);
1006 op1
= powi_as_mults_1 (gsi
, loc
, type
, powi_table
[n
], cache
);
1010 digit
= n
& ((1 << POWI_WINDOW_SIZE
) - 1);
1011 op0
= powi_as_mults_1 (gsi
, loc
, type
, n
- digit
, cache
);
1012 op1
= powi_as_mults_1 (gsi
, loc
, type
, digit
, cache
);
1016 op0
= powi_as_mults_1 (gsi
, loc
, type
, n
>> 1, cache
);
1020 mult_stmt
= gimple_build_assign (ssa_target
, MULT_EXPR
, op0
, op1
);
1021 gimple_set_location (mult_stmt
, loc
);
1022 gsi_insert_before (gsi
, mult_stmt
, GSI_SAME_STMT
);
1027 /* Convert ARG0**N to a tree of multiplications of ARG0 with itself.
1028 This function needs to be kept in sync with powi_cost above. */
1031 powi_as_mults (gimple_stmt_iterator
*gsi
, location_t loc
,
1032 tree arg0
, HOST_WIDE_INT n
)
1034 tree cache
[POWI_TABLE_SIZE
], result
, type
= TREE_TYPE (arg0
);
1039 return build_real (type
, dconst1
);
1041 memset (cache
, 0, sizeof (cache
));
1044 result
= powi_as_mults_1 (gsi
, loc
, type
, (n
< 0) ? -n
: n
, cache
);
1048 /* If the original exponent was negative, reciprocate the result. */
1049 target
= make_temp_ssa_name (type
, NULL
, "powmult");
1050 div_stmt
= gimple_build_assign (target
, RDIV_EXPR
,
1051 build_real (type
, dconst1
), result
);
1052 gimple_set_location (div_stmt
, loc
);
1053 gsi_insert_before (gsi
, div_stmt
, GSI_SAME_STMT
);
1058 /* ARG0 and N are the two arguments to a powi builtin in GSI with
1059 location info LOC. If the arguments are appropriate, create an
1060 equivalent sequence of statements prior to GSI using an optimal
1061 number of multiplications, and return an expession holding the
1065 gimple_expand_builtin_powi (gimple_stmt_iterator
*gsi
, location_t loc
,
1066 tree arg0
, HOST_WIDE_INT n
)
1068 /* Avoid largest negative number. */
1070 && ((n
>= -1 && n
<= 2)
1071 || (optimize_function_for_speed_p (cfun
)
1072 && powi_cost (n
) <= POWI_MAX_MULTS
)))
1073 return powi_as_mults (gsi
, loc
, arg0
, n
);
1078 /* Build a gimple call statement that calls FN with argument ARG.
1079 Set the lhs of the call statement to a fresh SSA name. Insert the
1080 statement prior to GSI's current position, and return the fresh
1084 build_and_insert_call (gimple_stmt_iterator
*gsi
, location_t loc
,
1090 call_stmt
= gimple_build_call (fn
, 1, arg
);
1091 ssa_target
= make_temp_ssa_name (TREE_TYPE (arg
), NULL
, "powroot");
1092 gimple_set_lhs (call_stmt
, ssa_target
);
1093 gimple_set_location (call_stmt
, loc
);
1094 gsi_insert_before (gsi
, call_stmt
, GSI_SAME_STMT
);
1099 /* Build a gimple binary operation with the given CODE and arguments
1100 ARG0, ARG1, assigning the result to a new SSA name for variable
1101 TARGET. Insert the statement prior to GSI's current position, and
1102 return the fresh SSA name.*/
1105 build_and_insert_binop (gimple_stmt_iterator
*gsi
, location_t loc
,
1106 const char *name
, enum tree_code code
,
1107 tree arg0
, tree arg1
)
1109 tree result
= make_temp_ssa_name (TREE_TYPE (arg0
), NULL
, name
);
1110 gassign
*stmt
= gimple_build_assign (result
, code
, arg0
, arg1
);
1111 gimple_set_location (stmt
, loc
);
1112 gsi_insert_before (gsi
, stmt
, GSI_SAME_STMT
);
1116 /* Build a gimple reference operation with the given CODE and argument
1117 ARG, assigning the result to a new SSA name of TYPE with NAME.
1118 Insert the statement prior to GSI's current position, and return
1119 the fresh SSA name. */
1122 build_and_insert_ref (gimple_stmt_iterator
*gsi
, location_t loc
, tree type
,
1123 const char *name
, enum tree_code code
, tree arg0
)
1125 tree result
= make_temp_ssa_name (type
, NULL
, name
);
1126 gimple stmt
= gimple_build_assign (result
, build1 (code
, type
, arg0
));
1127 gimple_set_location (stmt
, loc
);
1128 gsi_insert_before (gsi
, stmt
, GSI_SAME_STMT
);
1132 /* Build a gimple assignment to cast VAL to TYPE. Insert the statement
1133 prior to GSI's current position, and return the fresh SSA name. */
1136 build_and_insert_cast (gimple_stmt_iterator
*gsi
, location_t loc
,
1137 tree type
, tree val
)
1139 tree result
= make_ssa_name (type
);
1140 gassign
*stmt
= gimple_build_assign (result
, NOP_EXPR
, val
);
1141 gimple_set_location (stmt
, loc
);
1142 gsi_insert_before (gsi
, stmt
, GSI_SAME_STMT
);
1146 struct pow_synth_sqrt_info
1149 unsigned int deepest
;
1150 unsigned int num_mults
;
1153 /* Return true iff the real value C can be represented as a
1154 sum of powers of 0.5 up to N. That is:
1155 C == SUM<i from 1..N> (a[i]*(0.5**i)) where a[i] is either 0 or 1.
1156 Record in INFO the various parameters of the synthesis algorithm such
1157 as the factors a[i], the maximum 0.5 power and the number of
1158 multiplications that will be required. */
1161 representable_as_half_series_p (REAL_VALUE_TYPE c
, unsigned n
,
1162 struct pow_synth_sqrt_info
*info
)
1164 REAL_VALUE_TYPE factor
= dconsthalf
;
1165 REAL_VALUE_TYPE remainder
= c
;
1168 info
->num_mults
= 0;
1169 memset (info
->factors
, 0, n
* sizeof (bool));
1171 for (unsigned i
= 0; i
< n
; i
++)
1173 REAL_VALUE_TYPE res
;
1175 /* If something inexact happened bail out now. */
1176 if (REAL_ARITHMETIC (res
, MINUS_EXPR
, remainder
, factor
))
1179 /* We have hit zero. The number is representable as a sum
1180 of powers of 0.5. */
1181 if (REAL_VALUES_EQUAL (res
, dconst0
))
1183 info
->factors
[i
] = true;
1184 info
->deepest
= i
+ 1;
1187 else if (!REAL_VALUE_NEGATIVE (res
))
1190 info
->factors
[i
] = true;
1194 info
->factors
[i
] = false;
1196 REAL_ARITHMETIC (factor
, MULT_EXPR
, factor
, dconsthalf
);
1201 /* Return the tree corresponding to FN being applied
1202 to ARG N times at GSI and LOC.
1203 Look up previous results from CACHE if need be.
1204 cache[0] should contain just plain ARG i.e. FN applied to ARG 0 times. */
1207 get_fn_chain (tree arg
, unsigned int n
, gimple_stmt_iterator
*gsi
,
1208 tree fn
, location_t loc
, tree
*cache
)
1210 tree res
= cache
[n
];
1213 tree prev
= get_fn_chain (arg
, n
- 1, gsi
, fn
, loc
, cache
);
1214 res
= build_and_insert_call (gsi
, loc
, fn
, prev
);
1221 /* Print to STREAM the repeated application of function FNAME to ARG
1222 N times. So, for FNAME = "foo", ARG = "x", N = 2 it would print:
1226 print_nested_fn (FILE* stream
, const char *fname
, const char* arg
,
1230 fprintf (stream
, "%s", arg
);
1233 fprintf (stream
, "%s (", fname
);
1234 print_nested_fn (stream
, fname
, arg
, n
- 1);
1235 fprintf (stream
, ")");
1239 /* Print to STREAM the fractional sequence of sqrt chains
1240 applied to ARG, described by INFO. Used for the dump file. */
1243 dump_fractional_sqrt_sequence (FILE *stream
, const char *arg
,
1244 struct pow_synth_sqrt_info
*info
)
1246 for (unsigned int i
= 0; i
< info
->deepest
; i
++)
1248 bool is_set
= info
->factors
[i
];
1251 print_nested_fn (stream
, "sqrt", arg
, i
+ 1);
1252 if (i
!= info
->deepest
- 1)
1253 fprintf (stream
, " * ");
1258 /* Print to STREAM a representation of raising ARG to an integer
1259 power N. Used for the dump file. */
1262 dump_integer_part (FILE *stream
, const char* arg
, HOST_WIDE_INT n
)
1265 fprintf (stream
, "powi (%s, " HOST_WIDE_INT_PRINT_DEC
")", arg
, n
);
1267 fprintf (stream
, "%s", arg
);
1270 /* Attempt to synthesize a POW[F] (ARG0, ARG1) call using chains of
1271 square roots. Place at GSI and LOC. Limit the maximum depth
1272 of the sqrt chains to MAX_DEPTH. Return the tree holding the
1273 result of the expanded sequence or NULL_TREE if the expansion failed.
1275 This routine assumes that ARG1 is a real number with a fractional part
1276 (the integer exponent case will have been handled earlier in
1277 gimple_expand_builtin_pow).
1280 * For ARG1 composed of a whole part WHOLE_PART and a fractional part
1281 FRAC_PART i.e. WHOLE_PART == floor (ARG1) and
1282 FRAC_PART == ARG1 - WHOLE_PART:
1283 Produce POWI (ARG0, WHOLE_PART) * POW (ARG0, FRAC_PART) where
1284 POW (ARG0, FRAC_PART) is expanded as a product of square root chains
1285 if it can be expressed as such, that is if FRAC_PART satisfies:
1286 FRAC_PART == <SUM from i = 1 until MAX_DEPTH> (a[i] * (0.5**i))
1287 where integer a[i] is either 0 or 1.
1290 POW (x, 3.625) == POWI (x, 3) * POW (x, 0.625)
1291 --> POWI (x, 3) * SQRT (x) * SQRT (SQRT (SQRT (x)))
1293 For ARG1 < 0.0 there are two approaches:
1294 * (A) Expand to 1.0 / POW (ARG0, -ARG1) where POW (ARG0, -ARG1)
1295 is calculated as above.
1298 POW (x, -5.625) == 1.0 / POW (x, 5.625)
1299 --> 1.0 / (POWI (x, 5) * SQRT (x) * SQRT (SQRT (SQRT (x))))
1301 * (B) : WHOLE_PART := - ceil (abs (ARG1))
1302 FRAC_PART := ARG1 - WHOLE_PART
1303 and expand to POW (x, FRAC_PART) / POWI (x, WHOLE_PART).
1305 POW (x, -5.875) == POW (x, 0.125) / POWI (X, 6)
1306 --> SQRT (SQRT (SQRT (x))) / (POWI (x, 6))
1308 For ARG1 < 0.0 we choose between (A) and (B) depending on
1309 how many multiplications we'd have to do.
1310 So, for the example in (B): POW (x, -5.875), if we were to
1311 follow algorithm (A) we would produce:
1312 1.0 / POWI (X, 5) * SQRT (X) * SQRT (SQRT (X)) * SQRT (SQRT (SQRT (X)))
1313 which contains more multiplications than approach (B).
1315 Hopefully, this approach will eliminate potentially expensive POW library
1316 calls when unsafe floating point math is enabled and allow the compiler to
1317 further optimise the multiplies, square roots and divides produced by this
1321 expand_pow_as_sqrts (gimple_stmt_iterator
*gsi
, location_t loc
,
1322 tree arg0
, tree arg1
, HOST_WIDE_INT max_depth
)
1324 tree type
= TREE_TYPE (arg0
);
1325 machine_mode mode
= TYPE_MODE (type
);
1326 tree sqrtfn
= mathfn_built_in (type
, BUILT_IN_SQRT
);
1327 bool one_over
= true;
1332 if (TREE_CODE (arg1
) != REAL_CST
)
1335 REAL_VALUE_TYPE exp_init
= TREE_REAL_CST (arg1
);
1337 gcc_assert (max_depth
> 0);
1338 tree
*cache
= XALLOCAVEC (tree
, max_depth
+ 1);
1340 struct pow_synth_sqrt_info synth_info
;
1341 synth_info
.factors
= XALLOCAVEC (bool, max_depth
+ 1);
1342 synth_info
.deepest
= 0;
1343 synth_info
.num_mults
= 0;
1345 bool neg_exp
= REAL_VALUE_NEGATIVE (exp_init
);
1346 REAL_VALUE_TYPE exp
= real_value_abs (&exp_init
);
1348 /* The whole and fractional parts of exp. */
1349 REAL_VALUE_TYPE whole_part
;
1350 REAL_VALUE_TYPE frac_part
;
1352 real_floor (&whole_part
, mode
, &exp
);
1353 REAL_ARITHMETIC (frac_part
, MINUS_EXPR
, exp
, whole_part
);
1356 REAL_VALUE_TYPE ceil_whole
= dconst0
;
1357 REAL_VALUE_TYPE ceil_fract
= dconst0
;
1361 real_ceil (&ceil_whole
, mode
, &exp
);
1362 REAL_ARITHMETIC (ceil_fract
, MINUS_EXPR
, ceil_whole
, exp
);
1365 if (!representable_as_half_series_p (frac_part
, max_depth
, &synth_info
))
1368 /* Check whether it's more profitable to not use 1.0 / ... */
1371 struct pow_synth_sqrt_info alt_synth_info
;
1372 alt_synth_info
.factors
= XALLOCAVEC (bool, max_depth
+ 1);
1373 alt_synth_info
.deepest
= 0;
1374 alt_synth_info
.num_mults
= 0;
1376 if (representable_as_half_series_p (ceil_fract
, max_depth
,
1378 && alt_synth_info
.deepest
<= synth_info
.deepest
1379 && alt_synth_info
.num_mults
< synth_info
.num_mults
)
1381 whole_part
= ceil_whole
;
1382 frac_part
= ceil_fract
;
1383 synth_info
.deepest
= alt_synth_info
.deepest
;
1384 synth_info
.num_mults
= alt_synth_info
.num_mults
;
1385 memcpy (synth_info
.factors
, alt_synth_info
.factors
,
1386 (max_depth
+ 1) * sizeof (bool));
1391 HOST_WIDE_INT n
= real_to_integer (&whole_part
);
1392 REAL_VALUE_TYPE cint
;
1393 real_from_integer (&cint
, VOIDmode
, n
, SIGNED
);
1395 if (!real_identical (&whole_part
, &cint
))
1398 if (powi_cost (n
) + synth_info
.num_mults
> POWI_MAX_MULTS
)
1401 memset (cache
, 0, (max_depth
+ 1) * sizeof (tree
));
1403 tree integer_res
= n
== 0 ? build_real (type
, dconst1
) : arg0
;
1405 /* Calculate the integer part of the exponent. */
1408 integer_res
= gimple_expand_builtin_powi (gsi
, loc
, arg0
, n
);
1417 real_to_decimal (string
, &exp_init
, sizeof (string
), 0, 1);
1418 fprintf (dump_file
, "synthesizing pow (x, %s) as:\n", string
);
1424 fprintf (dump_file
, "1.0 / (");
1425 dump_integer_part (dump_file
, "x", n
);
1427 fprintf (dump_file
, " * ");
1428 dump_fractional_sqrt_sequence (dump_file
, "x", &synth_info
);
1429 fprintf (dump_file
, ")");
1433 dump_fractional_sqrt_sequence (dump_file
, "x", &synth_info
);
1434 fprintf (dump_file
, " / (");
1435 dump_integer_part (dump_file
, "x", n
);
1436 fprintf (dump_file
, ")");
1441 dump_fractional_sqrt_sequence (dump_file
, "x", &synth_info
);
1443 fprintf (dump_file
, " * ");
1444 dump_integer_part (dump_file
, "x", n
);
1447 fprintf (dump_file
, "\ndeepest sqrt chain: %d\n", synth_info
.deepest
);
1451 tree fract_res
= NULL_TREE
;
1454 /* Calculate the fractional part of the exponent. */
1455 for (unsigned i
= 0; i
< synth_info
.deepest
; i
++)
1457 if (synth_info
.factors
[i
])
1459 tree sqrt_chain
= get_fn_chain (arg0
, i
+ 1, gsi
, sqrtfn
, loc
, cache
);
1462 fract_res
= sqrt_chain
;
1465 fract_res
= build_and_insert_binop (gsi
, loc
, "powroot", MULT_EXPR
,
1466 fract_res
, sqrt_chain
);
1470 tree res
= NULL_TREE
;
1477 res
= build_and_insert_binop (gsi
, loc
, "powroot", MULT_EXPR
,
1478 fract_res
, integer_res
);
1482 res
= build_and_insert_binop (gsi
, loc
, "powrootrecip", RDIV_EXPR
,
1483 build_real (type
, dconst1
), res
);
1487 res
= build_and_insert_binop (gsi
, loc
, "powroot", RDIV_EXPR
,
1488 fract_res
, integer_res
);
1492 res
= build_and_insert_binop (gsi
, loc
, "powroot", MULT_EXPR
,
1493 fract_res
, integer_res
);
1497 /* ARG0 and ARG1 are the two arguments to a pow builtin call in GSI
1498 with location info LOC. If possible, create an equivalent and
1499 less expensive sequence of statements prior to GSI, and return an
1500 expession holding the result. */
1503 gimple_expand_builtin_pow (gimple_stmt_iterator
*gsi
, location_t loc
,
1504 tree arg0
, tree arg1
)
1506 REAL_VALUE_TYPE c
, cint
, dconst1_3
, dconst1_4
, dconst1_6
;
1507 REAL_VALUE_TYPE c2
, dconst3
;
1509 tree type
, sqrtfn
, cbrtfn
, sqrt_arg0
, result
, cbrt_x
, powi_cbrt_x
;
1511 bool speed_p
= optimize_bb_for_speed_p (gsi_bb (*gsi
));
1512 bool hw_sqrt_exists
, c_is_int
, c2_is_int
;
1514 dconst1_4
= dconst1
;
1515 SET_REAL_EXP (&dconst1_4
, REAL_EXP (&dconst1_4
) - 2);
1517 /* If the exponent isn't a constant, there's nothing of interest
1519 if (TREE_CODE (arg1
) != REAL_CST
)
1522 /* If the exponent is equivalent to an integer, expand to an optimal
1523 multiplication sequence when profitable. */
1524 c
= TREE_REAL_CST (arg1
);
1525 n
= real_to_integer (&c
);
1526 real_from_integer (&cint
, VOIDmode
, n
, SIGNED
);
1527 c_is_int
= real_identical (&c
, &cint
);
1530 && ((n
>= -1 && n
<= 2)
1531 || (flag_unsafe_math_optimizations
1533 && powi_cost (n
) <= POWI_MAX_MULTS
)))
1534 return gimple_expand_builtin_powi (gsi
, loc
, arg0
, n
);
1536 /* Attempt various optimizations using sqrt and cbrt. */
1537 type
= TREE_TYPE (arg0
);
1538 mode
= TYPE_MODE (type
);
1539 sqrtfn
= mathfn_built_in (type
, BUILT_IN_SQRT
);
1541 /* Optimize pow(x,0.5) = sqrt(x). This replacement is always safe
1542 unless signed zeros must be maintained. pow(-0,0.5) = +0, while
1545 && REAL_VALUES_EQUAL (c
, dconsthalf
)
1546 && !HONOR_SIGNED_ZEROS (mode
))
1547 return build_and_insert_call (gsi
, loc
, sqrtfn
, arg0
);
1549 hw_sqrt_exists
= optab_handler (sqrt_optab
, mode
) != CODE_FOR_nothing
;
1551 /* Optimize pow(x,1./3.) = cbrt(x). This requires unsafe math
1552 optimizations since 1./3. is not exactly representable. If x
1553 is negative and finite, the correct value of pow(x,1./3.) is
1554 a NaN with the "invalid" exception raised, because the value
1555 of 1./3. actually has an even denominator. The correct value
1556 of cbrt(x) is a negative real value. */
1557 cbrtfn
= mathfn_built_in (type
, BUILT_IN_CBRT
);
1558 dconst1_3
= real_value_truncate (mode
, dconst_third ());
1560 if (flag_unsafe_math_optimizations
1562 && (gimple_val_nonnegative_real_p (arg0
) || !HONOR_NANS (mode
))
1563 && REAL_VALUES_EQUAL (c
, dconst1_3
))
1564 return build_and_insert_call (gsi
, loc
, cbrtfn
, arg0
);
1566 /* Optimize pow(x,1./6.) = cbrt(sqrt(x)). Don't do this optimization
1567 if we don't have a hardware sqrt insn. */
1568 dconst1_6
= dconst1_3
;
1569 SET_REAL_EXP (&dconst1_6
, REAL_EXP (&dconst1_6
) - 1);
1571 if (flag_unsafe_math_optimizations
1574 && (gimple_val_nonnegative_real_p (arg0
) || !HONOR_NANS (mode
))
1577 && REAL_VALUES_EQUAL (c
, dconst1_6
))
1580 sqrt_arg0
= build_and_insert_call (gsi
, loc
, sqrtfn
, arg0
);
1583 return build_and_insert_call (gsi
, loc
, cbrtfn
, sqrt_arg0
);
1587 /* Attempt to expand the POW as a product of square root chains.
1588 Expand the 0.25 case even when otpimising for size. */
1589 if (flag_unsafe_math_optimizations
1592 && (speed_p
|| REAL_VALUES_EQUAL (c
, dconst1_4
))
1593 && !HONOR_SIGNED_ZEROS (mode
))
1595 unsigned int max_depth
= speed_p
1596 ? PARAM_VALUE (PARAM_MAX_POW_SQRT_DEPTH
)
1599 tree expand_with_sqrts
1600 = expand_pow_as_sqrts (gsi
, loc
, arg0
, arg1
, max_depth
);
1602 if (expand_with_sqrts
)
1603 return expand_with_sqrts
;
1606 real_arithmetic (&c2
, MULT_EXPR
, &c
, &dconst2
);
1607 n
= real_to_integer (&c2
);
1608 real_from_integer (&cint
, VOIDmode
, n
, SIGNED
);
1609 c2_is_int
= real_identical (&c2
, &cint
);
1611 /* Optimize pow(x,c), where 3c = n for some nonzero integer n, into
1613 powi(x, n/3) * powi(cbrt(x), n%3), n > 0;
1614 1.0 / (powi(x, abs(n)/3) * powi(cbrt(x), abs(n)%3)), n < 0.
1616 Do not calculate the first factor when n/3 = 0. As cbrt(x) is
1617 different from pow(x, 1./3.) due to rounding and behavior with
1618 negative x, we need to constrain this transformation to unsafe
1619 math and positive x or finite math. */
1620 real_from_integer (&dconst3
, VOIDmode
, 3, SIGNED
);
1621 real_arithmetic (&c2
, MULT_EXPR
, &c
, &dconst3
);
1622 real_round (&c2
, mode
, &c2
);
1623 n
= real_to_integer (&c2
);
1624 real_from_integer (&cint
, VOIDmode
, n
, SIGNED
);
1625 real_arithmetic (&c2
, RDIV_EXPR
, &cint
, &dconst3
);
1626 real_convert (&c2
, mode
, &c2
);
1628 if (flag_unsafe_math_optimizations
1630 && (gimple_val_nonnegative_real_p (arg0
) || !HONOR_NANS (mode
))
1631 && real_identical (&c2
, &c
)
1633 && optimize_function_for_speed_p (cfun
)
1634 && powi_cost (n
/ 3) <= POWI_MAX_MULTS
)
1636 tree powi_x_ndiv3
= NULL_TREE
;
1638 /* Attempt to fold powi(arg0, abs(n/3)) into multiplies. If not
1639 possible or profitable, give up. Skip the degenerate case when
1640 abs(n) < 3, where the result is always 1. */
1641 if (absu_hwi (n
) >= 3)
1643 powi_x_ndiv3
= gimple_expand_builtin_powi (gsi
, loc
, arg0
,
1649 /* Calculate powi(cbrt(x), n%3). Don't use gimple_expand_builtin_powi
1650 as that creates an unnecessary variable. Instead, just produce
1651 either cbrt(x) or cbrt(x) * cbrt(x). */
1652 cbrt_x
= build_and_insert_call (gsi
, loc
, cbrtfn
, arg0
);
1654 if (absu_hwi (n
) % 3 == 1)
1655 powi_cbrt_x
= cbrt_x
;
1657 powi_cbrt_x
= build_and_insert_binop (gsi
, loc
, "powroot", MULT_EXPR
,
1660 /* Multiply the two subexpressions, unless powi(x,abs(n)/3) = 1. */
1661 if (absu_hwi (n
) < 3)
1662 result
= powi_cbrt_x
;
1664 result
= build_and_insert_binop (gsi
, loc
, "powroot", MULT_EXPR
,
1665 powi_x_ndiv3
, powi_cbrt_x
);
1667 /* If n is negative, reciprocate the result. */
1669 result
= build_and_insert_binop (gsi
, loc
, "powroot", RDIV_EXPR
,
1670 build_real (type
, dconst1
), result
);
1675 /* No optimizations succeeded. */
1679 /* ARG is the argument to a cabs builtin call in GSI with location info
1680 LOC. Create a sequence of statements prior to GSI that calculates
1681 sqrt(R*R + I*I), where R and I are the real and imaginary components
1682 of ARG, respectively. Return an expression holding the result. */
1685 gimple_expand_builtin_cabs (gimple_stmt_iterator
*gsi
, location_t loc
, tree arg
)
1687 tree real_part
, imag_part
, addend1
, addend2
, sum
, result
;
1688 tree type
= TREE_TYPE (TREE_TYPE (arg
));
1689 tree sqrtfn
= mathfn_built_in (type
, BUILT_IN_SQRT
);
1690 machine_mode mode
= TYPE_MODE (type
);
1692 if (!flag_unsafe_math_optimizations
1693 || !optimize_bb_for_speed_p (gimple_bb (gsi_stmt (*gsi
)))
1695 || optab_handler (sqrt_optab
, mode
) == CODE_FOR_nothing
)
1698 real_part
= build_and_insert_ref (gsi
, loc
, type
, "cabs",
1699 REALPART_EXPR
, arg
);
1700 addend1
= build_and_insert_binop (gsi
, loc
, "cabs", MULT_EXPR
,
1701 real_part
, real_part
);
1702 imag_part
= build_and_insert_ref (gsi
, loc
, type
, "cabs",
1703 IMAGPART_EXPR
, arg
);
1704 addend2
= build_and_insert_binop (gsi
, loc
, "cabs", MULT_EXPR
,
1705 imag_part
, imag_part
);
1706 sum
= build_and_insert_binop (gsi
, loc
, "cabs", PLUS_EXPR
, addend1
, addend2
);
1707 result
= build_and_insert_call (gsi
, loc
, sqrtfn
, sum
);
1712 /* Go through all calls to sin, cos and cexpi and call execute_cse_sincos_1
1713 on the SSA_NAME argument of each of them. Also expand powi(x,n) into
1714 an optimal number of multiplies, when n is a constant. */
1718 const pass_data pass_data_cse_sincos
=
1720 GIMPLE_PASS
, /* type */
1721 "sincos", /* name */
1722 OPTGROUP_NONE
, /* optinfo_flags */
1723 TV_NONE
, /* tv_id */
1724 PROP_ssa
, /* properties_required */
1725 0, /* properties_provided */
1726 0, /* properties_destroyed */
1727 0, /* todo_flags_start */
1728 TODO_update_ssa
, /* todo_flags_finish */
1731 class pass_cse_sincos
: public gimple_opt_pass
1734 pass_cse_sincos (gcc::context
*ctxt
)
1735 : gimple_opt_pass (pass_data_cse_sincos
, ctxt
)
1738 /* opt_pass methods: */
1739 virtual bool gate (function
*)
1741 /* We no longer require either sincos or cexp, since powi expansion
1742 piggybacks on this pass. */
1746 virtual unsigned int execute (function
*);
1748 }; // class pass_cse_sincos
1751 pass_cse_sincos::execute (function
*fun
)
1754 bool cfg_changed
= false;
1756 calculate_dominance_info (CDI_DOMINATORS
);
1757 memset (&sincos_stats
, 0, sizeof (sincos_stats
));
1759 FOR_EACH_BB_FN (bb
, fun
)
1761 gimple_stmt_iterator gsi
;
1762 bool cleanup_eh
= false;
1764 for (gsi
= gsi_after_labels (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1766 gimple stmt
= gsi_stmt (gsi
);
1769 /* Only the last stmt in a bb could throw, no need to call
1770 gimple_purge_dead_eh_edges if we change something in the middle
1771 of a basic block. */
1774 if (is_gimple_call (stmt
)
1775 && gimple_call_lhs (stmt
)
1776 && (fndecl
= gimple_call_fndecl (stmt
))
1777 && DECL_BUILT_IN_CLASS (fndecl
) == BUILT_IN_NORMAL
)
1779 tree arg
, arg0
, arg1
, result
;
1783 switch (DECL_FUNCTION_CODE (fndecl
))
1785 CASE_FLT_FN (BUILT_IN_COS
):
1786 CASE_FLT_FN (BUILT_IN_SIN
):
1787 CASE_FLT_FN (BUILT_IN_CEXPI
):
1788 /* Make sure we have either sincos or cexp. */
1789 if (!targetm
.libc_has_function (function_c99_math_complex
)
1790 && !targetm
.libc_has_function (function_sincos
))
1793 arg
= gimple_call_arg (stmt
, 0);
1794 if (TREE_CODE (arg
) == SSA_NAME
)
1795 cfg_changed
|= execute_cse_sincos_1 (arg
);
1798 CASE_FLT_FN (BUILT_IN_POW
):
1799 arg0
= gimple_call_arg (stmt
, 0);
1800 arg1
= gimple_call_arg (stmt
, 1);
1802 loc
= gimple_location (stmt
);
1803 result
= gimple_expand_builtin_pow (&gsi
, loc
, arg0
, arg1
);
1807 tree lhs
= gimple_get_lhs (stmt
);
1808 gassign
*new_stmt
= gimple_build_assign (lhs
, result
);
1809 gimple_set_location (new_stmt
, loc
);
1810 unlink_stmt_vdef (stmt
);
1811 gsi_replace (&gsi
, new_stmt
, true);
1813 if (gimple_vdef (stmt
))
1814 release_ssa_name (gimple_vdef (stmt
));
1818 CASE_FLT_FN (BUILT_IN_POWI
):
1819 arg0
= gimple_call_arg (stmt
, 0);
1820 arg1
= gimple_call_arg (stmt
, 1);
1821 loc
= gimple_location (stmt
);
1823 if (real_minus_onep (arg0
))
1825 tree t0
, t1
, cond
, one
, minus_one
;
1828 t0
= TREE_TYPE (arg0
);
1829 t1
= TREE_TYPE (arg1
);
1830 one
= build_real (t0
, dconst1
);
1831 minus_one
= build_real (t0
, dconstm1
);
1833 cond
= make_temp_ssa_name (t1
, NULL
, "powi_cond");
1834 stmt
= gimple_build_assign (cond
, BIT_AND_EXPR
,
1835 arg1
, build_int_cst (t1
, 1));
1836 gimple_set_location (stmt
, loc
);
1837 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
1839 result
= make_temp_ssa_name (t0
, NULL
, "powi");
1840 stmt
= gimple_build_assign (result
, COND_EXPR
, cond
,
1842 gimple_set_location (stmt
, loc
);
1843 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
1847 if (!tree_fits_shwi_p (arg1
))
1850 n
= tree_to_shwi (arg1
);
1851 result
= gimple_expand_builtin_powi (&gsi
, loc
, arg0
, n
);
1856 tree lhs
= gimple_get_lhs (stmt
);
1857 gassign
*new_stmt
= gimple_build_assign (lhs
, result
);
1858 gimple_set_location (new_stmt
, loc
);
1859 unlink_stmt_vdef (stmt
);
1860 gsi_replace (&gsi
, new_stmt
, true);
1862 if (gimple_vdef (stmt
))
1863 release_ssa_name (gimple_vdef (stmt
));
1867 CASE_FLT_FN (BUILT_IN_CABS
):
1868 arg0
= gimple_call_arg (stmt
, 0);
1869 loc
= gimple_location (stmt
);
1870 result
= gimple_expand_builtin_cabs (&gsi
, loc
, arg0
);
1874 tree lhs
= gimple_get_lhs (stmt
);
1875 gassign
*new_stmt
= gimple_build_assign (lhs
, result
);
1876 gimple_set_location (new_stmt
, loc
);
1877 unlink_stmt_vdef (stmt
);
1878 gsi_replace (&gsi
, new_stmt
, true);
1880 if (gimple_vdef (stmt
))
1881 release_ssa_name (gimple_vdef (stmt
));
1890 cfg_changed
|= gimple_purge_dead_eh_edges (bb
);
1893 statistics_counter_event (fun
, "sincos statements inserted",
1894 sincos_stats
.inserted
);
1896 free_dominance_info (CDI_DOMINATORS
);
1897 return cfg_changed
? TODO_cleanup_cfg
: 0;
1903 make_pass_cse_sincos (gcc::context
*ctxt
)
1905 return new pass_cse_sincos (ctxt
);
1908 /* A symbolic number is used to detect byte permutation and selection
1909 patterns. Therefore the field N contains an artificial number
1910 consisting of octet sized markers:
1912 0 - target byte has the value 0
1913 FF - target byte has an unknown value (eg. due to sign extension)
1914 1..size - marker value is the target byte index minus one.
1916 To detect permutations on memory sources (arrays and structures), a symbolic
1917 number is also associated a base address (the array or structure the load is
1918 made from), an offset from the base address and a range which gives the
1919 difference between the highest and lowest accessed memory location to make
1920 such a symbolic number. The range is thus different from size which reflects
1921 the size of the type of current expression. Note that for non memory source,
1922 range holds the same value as size.
1924 For instance, for an array char a[], (short) a[0] | (short) a[3] would have
1925 a size of 2 but a range of 4 while (short) a[0] | ((short) a[0] << 1) would
1926 still have a size of 2 but this time a range of 1. */
1928 struct symbolic_number
{
1933 HOST_WIDE_INT bytepos
;
1936 unsigned HOST_WIDE_INT range
;
1939 #define BITS_PER_MARKER 8
1940 #define MARKER_MASK ((1 << BITS_PER_MARKER) - 1)
1941 #define MARKER_BYTE_UNKNOWN MARKER_MASK
1942 #define HEAD_MARKER(n, size) \
1943 ((n) & ((uint64_t) MARKER_MASK << (((size) - 1) * BITS_PER_MARKER)))
1945 /* The number which the find_bswap_or_nop_1 result should match in
1946 order to have a nop. The number is masked according to the size of
1947 the symbolic number before using it. */
1948 #define CMPNOP (sizeof (int64_t) < 8 ? 0 : \
1949 (uint64_t)0x08070605 << 32 | 0x04030201)
1951 /* The number which the find_bswap_or_nop_1 result should match in
1952 order to have a byte swap. The number is masked according to the
1953 size of the symbolic number before using it. */
1954 #define CMPXCHG (sizeof (int64_t) < 8 ? 0 : \
1955 (uint64_t)0x01020304 << 32 | 0x05060708)
1957 /* Perform a SHIFT or ROTATE operation by COUNT bits on symbolic
1958 number N. Return false if the requested operation is not permitted
1959 on a symbolic number. */
1962 do_shift_rotate (enum tree_code code
,
1963 struct symbolic_number
*n
,
1966 int i
, size
= TYPE_PRECISION (n
->type
) / BITS_PER_UNIT
;
1967 unsigned head_marker
;
1969 if (count
% BITS_PER_UNIT
!= 0)
1971 count
= (count
/ BITS_PER_UNIT
) * BITS_PER_MARKER
;
1973 /* Zero out the extra bits of N in order to avoid them being shifted
1974 into the significant bits. */
1975 if (size
< 64 / BITS_PER_MARKER
)
1976 n
->n
&= ((uint64_t) 1 << (size
* BITS_PER_MARKER
)) - 1;
1984 head_marker
= HEAD_MARKER (n
->n
, size
);
1986 /* Arithmetic shift of signed type: result is dependent on the value. */
1987 if (!TYPE_UNSIGNED (n
->type
) && head_marker
)
1988 for (i
= 0; i
< count
/ BITS_PER_MARKER
; i
++)
1989 n
->n
|= (uint64_t) MARKER_BYTE_UNKNOWN
1990 << ((size
- 1 - i
) * BITS_PER_MARKER
);
1993 n
->n
= (n
->n
<< count
) | (n
->n
>> ((size
* BITS_PER_MARKER
) - count
));
1996 n
->n
= (n
->n
>> count
) | (n
->n
<< ((size
* BITS_PER_MARKER
) - count
));
2001 /* Zero unused bits for size. */
2002 if (size
< 64 / BITS_PER_MARKER
)
2003 n
->n
&= ((uint64_t) 1 << (size
* BITS_PER_MARKER
)) - 1;
2007 /* Perform sanity checking for the symbolic number N and the gimple
2011 verify_symbolic_number_p (struct symbolic_number
*n
, gimple stmt
)
2015 lhs_type
= gimple_expr_type (stmt
);
2017 if (TREE_CODE (lhs_type
) != INTEGER_TYPE
)
2020 if (TYPE_PRECISION (lhs_type
) != TYPE_PRECISION (n
->type
))
2026 /* Initialize the symbolic number N for the bswap pass from the base element
2027 SRC manipulated by the bitwise OR expression. */
2030 init_symbolic_number (struct symbolic_number
*n
, tree src
)
2034 n
->base_addr
= n
->offset
= n
->alias_set
= n
->vuse
= NULL_TREE
;
2036 /* Set up the symbolic number N by setting each byte to a value between 1 and
2037 the byte size of rhs1. The highest order byte is set to n->size and the
2038 lowest order byte to 1. */
2039 n
->type
= TREE_TYPE (src
);
2040 size
= TYPE_PRECISION (n
->type
);
2041 if (size
% BITS_PER_UNIT
!= 0)
2043 size
/= BITS_PER_UNIT
;
2044 if (size
> 64 / BITS_PER_MARKER
)
2049 if (size
< 64 / BITS_PER_MARKER
)
2050 n
->n
&= ((uint64_t) 1 << (size
* BITS_PER_MARKER
)) - 1;
2055 /* Check if STMT might be a byte swap or a nop from a memory source and returns
2056 the answer. If so, REF is that memory source and the base of the memory area
2057 accessed and the offset of the access from that base are recorded in N. */
2060 find_bswap_or_nop_load (gimple stmt
, tree ref
, struct symbolic_number
*n
)
2062 /* Leaf node is an array or component ref. Memorize its base and
2063 offset from base to compare to other such leaf node. */
2064 HOST_WIDE_INT bitsize
, bitpos
;
2066 int unsignedp
, volatilep
;
2067 tree offset
, base_addr
;
2069 /* Not prepared to handle PDP endian. */
2070 if (BYTES_BIG_ENDIAN
!= WORDS_BIG_ENDIAN
)
2073 if (!gimple_assign_load_p (stmt
) || gimple_has_volatile_ops (stmt
))
2076 base_addr
= get_inner_reference (ref
, &bitsize
, &bitpos
, &offset
, &mode
,
2077 &unsignedp
, &volatilep
, false);
2079 if (TREE_CODE (base_addr
) == MEM_REF
)
2081 offset_int bit_offset
= 0;
2082 tree off
= TREE_OPERAND (base_addr
, 1);
2084 if (!integer_zerop (off
))
2086 offset_int boff
, coff
= mem_ref_offset (base_addr
);
2087 boff
= wi::lshift (coff
, LOG2_BITS_PER_UNIT
);
2091 base_addr
= TREE_OPERAND (base_addr
, 0);
2093 /* Avoid returning a negative bitpos as this may wreak havoc later. */
2094 if (wi::neg_p (bit_offset
))
2096 offset_int mask
= wi::mask
<offset_int
> (LOG2_BITS_PER_UNIT
, false);
2097 offset_int tem
= bit_offset
.and_not (mask
);
2098 /* TEM is the bitpos rounded to BITS_PER_UNIT towards -Inf.
2099 Subtract it to BIT_OFFSET and add it (scaled) to OFFSET. */
2101 tem
= wi::arshift (tem
, LOG2_BITS_PER_UNIT
);
2103 offset
= size_binop (PLUS_EXPR
, offset
,
2104 wide_int_to_tree (sizetype
, tem
));
2106 offset
= wide_int_to_tree (sizetype
, tem
);
2109 bitpos
+= bit_offset
.to_shwi ();
2112 if (bitpos
% BITS_PER_UNIT
)
2114 if (bitsize
% BITS_PER_UNIT
)
2117 if (!init_symbolic_number (n
, ref
))
2119 n
->base_addr
= base_addr
;
2121 n
->bytepos
= bitpos
/ BITS_PER_UNIT
;
2122 n
->alias_set
= reference_alias_ptr_type (ref
);
2123 n
->vuse
= gimple_vuse (stmt
);
2127 /* Compute the symbolic number N representing the result of a bitwise OR on 2
2128 symbolic number N1 and N2 whose source statements are respectively
2129 SOURCE_STMT1 and SOURCE_STMT2. */
2132 perform_symbolic_merge (gimple source_stmt1
, struct symbolic_number
*n1
,
2133 gimple source_stmt2
, struct symbolic_number
*n2
,
2134 struct symbolic_number
*n
)
2139 struct symbolic_number
*n_start
;
2141 /* Sources are different, cancel bswap if they are not memory location with
2142 the same base (array, structure, ...). */
2143 if (gimple_assign_rhs1 (source_stmt1
) != gimple_assign_rhs1 (source_stmt2
))
2146 HOST_WIDE_INT start_sub
, end_sub
, end1
, end2
, end
;
2147 struct symbolic_number
*toinc_n_ptr
, *n_end
;
2149 if (!n1
->base_addr
|| !n2
->base_addr
2150 || !operand_equal_p (n1
->base_addr
, n2
->base_addr
, 0))
2153 if (!n1
->offset
!= !n2
->offset
2154 || (n1
->offset
&& !operand_equal_p (n1
->offset
, n2
->offset
, 0)))
2157 if (n1
->bytepos
< n2
->bytepos
)
2160 start_sub
= n2
->bytepos
- n1
->bytepos
;
2161 source_stmt
= source_stmt1
;
2166 start_sub
= n1
->bytepos
- n2
->bytepos
;
2167 source_stmt
= source_stmt2
;
2170 /* Find the highest address at which a load is performed and
2171 compute related info. */
2172 end1
= n1
->bytepos
+ (n1
->range
- 1);
2173 end2
= n2
->bytepos
+ (n2
->range
- 1);
2177 end_sub
= end2
- end1
;
2182 end_sub
= end1
- end2
;
2184 n_end
= (end2
> end1
) ? n2
: n1
;
2186 /* Find symbolic number whose lsb is the most significant. */
2187 if (BYTES_BIG_ENDIAN
)
2188 toinc_n_ptr
= (n_end
== n1
) ? n2
: n1
;
2190 toinc_n_ptr
= (n_start
== n1
) ? n2
: n1
;
2192 n
->range
= end
- n_start
->bytepos
+ 1;
2194 /* Check that the range of memory covered can be represented by
2195 a symbolic number. */
2196 if (n
->range
> 64 / BITS_PER_MARKER
)
2199 /* Reinterpret byte marks in symbolic number holding the value of
2200 bigger weight according to target endianness. */
2201 inc
= BYTES_BIG_ENDIAN
? end_sub
: start_sub
;
2202 size
= TYPE_PRECISION (n1
->type
) / BITS_PER_UNIT
;
2203 for (i
= 0; i
< size
; i
++, inc
<<= BITS_PER_MARKER
)
2206 = (toinc_n_ptr
->n
>> (i
* BITS_PER_MARKER
)) & MARKER_MASK
;
2207 if (marker
&& marker
!= MARKER_BYTE_UNKNOWN
)
2208 toinc_n_ptr
->n
+= inc
;
2213 n
->range
= n1
->range
;
2215 source_stmt
= source_stmt1
;
2219 || alias_ptr_types_compatible_p (n1
->alias_set
, n2
->alias_set
))
2220 n
->alias_set
= n1
->alias_set
;
2222 n
->alias_set
= ptr_type_node
;
2223 n
->vuse
= n_start
->vuse
;
2224 n
->base_addr
= n_start
->base_addr
;
2225 n
->offset
= n_start
->offset
;
2226 n
->bytepos
= n_start
->bytepos
;
2227 n
->type
= n_start
->type
;
2228 size
= TYPE_PRECISION (n
->type
) / BITS_PER_UNIT
;
2230 for (i
= 0, mask
= MARKER_MASK
; i
< size
; i
++, mask
<<= BITS_PER_MARKER
)
2232 uint64_t masked1
, masked2
;
2234 masked1
= n1
->n
& mask
;
2235 masked2
= n2
->n
& mask
;
2236 if (masked1
&& masked2
&& masked1
!= masked2
)
2239 n
->n
= n1
->n
| n2
->n
;
2244 /* find_bswap_or_nop_1 invokes itself recursively with N and tries to perform
2245 the operation given by the rhs of STMT on the result. If the operation
2246 could successfully be executed the function returns a gimple stmt whose
2247 rhs's first tree is the expression of the source operand and NULL
2251 find_bswap_or_nop_1 (gimple stmt
, struct symbolic_number
*n
, int limit
)
2253 enum tree_code code
;
2254 tree rhs1
, rhs2
= NULL
;
2255 gimple rhs1_stmt
, rhs2_stmt
, source_stmt1
;
2256 enum gimple_rhs_class rhs_class
;
2258 if (!limit
|| !is_gimple_assign (stmt
))
2261 rhs1
= gimple_assign_rhs1 (stmt
);
2263 if (find_bswap_or_nop_load (stmt
, rhs1
, n
))
2266 if (TREE_CODE (rhs1
) != SSA_NAME
)
2269 code
= gimple_assign_rhs_code (stmt
);
2270 rhs_class
= gimple_assign_rhs_class (stmt
);
2271 rhs1_stmt
= SSA_NAME_DEF_STMT (rhs1
);
2273 if (rhs_class
== GIMPLE_BINARY_RHS
)
2274 rhs2
= gimple_assign_rhs2 (stmt
);
2276 /* Handle unary rhs and binary rhs with integer constants as second
2279 if (rhs_class
== GIMPLE_UNARY_RHS
2280 || (rhs_class
== GIMPLE_BINARY_RHS
2281 && TREE_CODE (rhs2
) == INTEGER_CST
))
2283 if (code
!= BIT_AND_EXPR
2284 && code
!= LSHIFT_EXPR
2285 && code
!= RSHIFT_EXPR
2286 && code
!= LROTATE_EXPR
2287 && code
!= RROTATE_EXPR
2288 && !CONVERT_EXPR_CODE_P (code
))
2291 source_stmt1
= find_bswap_or_nop_1 (rhs1_stmt
, n
, limit
- 1);
2293 /* If find_bswap_or_nop_1 returned NULL, STMT is a leaf node and
2294 we have to initialize the symbolic number. */
2297 if (gimple_assign_load_p (stmt
)
2298 || !init_symbolic_number (n
, rhs1
))
2300 source_stmt1
= stmt
;
2307 int i
, size
= TYPE_PRECISION (n
->type
) / BITS_PER_UNIT
;
2308 uint64_t val
= int_cst_value (rhs2
), mask
= 0;
2309 uint64_t tmp
= (1 << BITS_PER_UNIT
) - 1;
2311 /* Only constants masking full bytes are allowed. */
2312 for (i
= 0; i
< size
; i
++, tmp
<<= BITS_PER_UNIT
)
2313 if ((val
& tmp
) != 0 && (val
& tmp
) != tmp
)
2316 mask
|= (uint64_t) MARKER_MASK
<< (i
* BITS_PER_MARKER
);
2325 if (!do_shift_rotate (code
, n
, (int) TREE_INT_CST_LOW (rhs2
)))
2330 int i
, type_size
, old_type_size
;
2333 type
= gimple_expr_type (stmt
);
2334 type_size
= TYPE_PRECISION (type
);
2335 if (type_size
% BITS_PER_UNIT
!= 0)
2337 type_size
/= BITS_PER_UNIT
;
2338 if (type_size
> 64 / BITS_PER_MARKER
)
2341 /* Sign extension: result is dependent on the value. */
2342 old_type_size
= TYPE_PRECISION (n
->type
) / BITS_PER_UNIT
;
2343 if (!TYPE_UNSIGNED (n
->type
) && type_size
> old_type_size
2344 && HEAD_MARKER (n
->n
, old_type_size
))
2345 for (i
= 0; i
< type_size
- old_type_size
; i
++)
2346 n
->n
|= (uint64_t) MARKER_BYTE_UNKNOWN
2347 << ((type_size
- 1 - i
) * BITS_PER_MARKER
);
2349 if (type_size
< 64 / BITS_PER_MARKER
)
2351 /* If STMT casts to a smaller type mask out the bits not
2352 belonging to the target type. */
2353 n
->n
&= ((uint64_t) 1 << (type_size
* BITS_PER_MARKER
)) - 1;
2357 n
->range
= type_size
;
2363 return verify_symbolic_number_p (n
, stmt
) ? source_stmt1
: NULL
;
2366 /* Handle binary rhs. */
2368 if (rhs_class
== GIMPLE_BINARY_RHS
)
2370 struct symbolic_number n1
, n2
;
2371 gimple source_stmt
, source_stmt2
;
2373 if (code
!= BIT_IOR_EXPR
)
2376 if (TREE_CODE (rhs2
) != SSA_NAME
)
2379 rhs2_stmt
= SSA_NAME_DEF_STMT (rhs2
);
2384 source_stmt1
= find_bswap_or_nop_1 (rhs1_stmt
, &n1
, limit
- 1);
2389 source_stmt2
= find_bswap_or_nop_1 (rhs2_stmt
, &n2
, limit
- 1);
2394 if (TYPE_PRECISION (n1
.type
) != TYPE_PRECISION (n2
.type
))
2397 if (!n1
.vuse
!= !n2
.vuse
2398 || (n1
.vuse
&& !operand_equal_p (n1
.vuse
, n2
.vuse
, 0)))
2402 = perform_symbolic_merge (source_stmt1
, &n1
, source_stmt2
, &n2
, n
);
2407 if (!verify_symbolic_number_p (n
, stmt
))
2419 /* Check if STMT completes a bswap implementation or a read in a given
2420 endianness consisting of ORs, SHIFTs and ANDs and sets *BSWAP
2421 accordingly. It also sets N to represent the kind of operations
2422 performed: size of the resulting expression and whether it works on
2423 a memory source, and if so alias-set and vuse. At last, the
2424 function returns a stmt whose rhs's first tree is the source
2428 find_bswap_or_nop (gimple stmt
, struct symbolic_number
*n
, bool *bswap
)
2430 /* The number which the find_bswap_or_nop_1 result should match in order
2431 to have a full byte swap. The number is shifted to the right
2432 according to the size of the symbolic number before using it. */
2433 uint64_t cmpxchg
= CMPXCHG
;
2434 uint64_t cmpnop
= CMPNOP
;
2439 /* The last parameter determines the depth search limit. It usually
2440 correlates directly to the number n of bytes to be touched. We
2441 increase that number by log2(n) + 1 here in order to also
2442 cover signed -> unsigned conversions of the src operand as can be seen
2443 in libgcc, and for initial shift/and operation of the src operand. */
2444 limit
= TREE_INT_CST_LOW (TYPE_SIZE_UNIT (gimple_expr_type (stmt
)));
2445 limit
+= 1 + (int) ceil_log2 ((unsigned HOST_WIDE_INT
) limit
);
2446 source_stmt
= find_bswap_or_nop_1 (stmt
, n
, limit
);
2451 /* Find real size of result (highest non-zero byte). */
2457 for (tmpn
= n
->n
, rsize
= 0; tmpn
; tmpn
>>= BITS_PER_MARKER
, rsize
++);
2461 /* Zero out the extra bits of N and CMP*. */
2462 if (n
->range
< (int) sizeof (int64_t))
2466 mask
= ((uint64_t) 1 << (n
->range
* BITS_PER_MARKER
)) - 1;
2467 cmpxchg
>>= (64 / BITS_PER_MARKER
- n
->range
) * BITS_PER_MARKER
;
2471 /* A complete byte swap should make the symbolic number to start with
2472 the largest digit in the highest order byte. Unchanged symbolic
2473 number indicates a read with same endianness as target architecture. */
2476 else if (n
->n
== cmpxchg
)
2481 /* Useless bit manipulation performed by code. */
2482 if (!n
->base_addr
&& n
->n
== cmpnop
)
2485 n
->range
*= BITS_PER_UNIT
;
2491 const pass_data pass_data_optimize_bswap
=
2493 GIMPLE_PASS
, /* type */
2495 OPTGROUP_NONE
, /* optinfo_flags */
2496 TV_NONE
, /* tv_id */
2497 PROP_ssa
, /* properties_required */
2498 0, /* properties_provided */
2499 0, /* properties_destroyed */
2500 0, /* todo_flags_start */
2501 0, /* todo_flags_finish */
2504 class pass_optimize_bswap
: public gimple_opt_pass
2507 pass_optimize_bswap (gcc::context
*ctxt
)
2508 : gimple_opt_pass (pass_data_optimize_bswap
, ctxt
)
2511 /* opt_pass methods: */
2512 virtual bool gate (function
*)
2514 return flag_expensive_optimizations
&& optimize
;
2517 virtual unsigned int execute (function
*);
2519 }; // class pass_optimize_bswap
2521 /* Perform the bswap optimization: replace the expression computed in the rhs
2522 of CUR_STMT by an equivalent bswap, load or load + bswap expression.
2523 Which of these alternatives replace the rhs is given by N->base_addr (non
2524 null if a load is needed) and BSWAP. The type, VUSE and set-alias of the
2525 load to perform are also given in N while the builtin bswap invoke is given
2526 in FNDEL. Finally, if a load is involved, SRC_STMT refers to one of the
2527 load statements involved to construct the rhs in CUR_STMT and N->range gives
2528 the size of the rhs expression for maintaining some statistics.
2530 Note that if the replacement involve a load, CUR_STMT is moved just after
2531 SRC_STMT to do the load with the same VUSE which can lead to CUR_STMT
2532 changing of basic block. */
2535 bswap_replace (gimple cur_stmt
, gimple src_stmt
, tree fndecl
, tree bswap_type
,
2536 tree load_type
, struct symbolic_number
*n
, bool bswap
)
2538 gimple_stmt_iterator gsi
;
2542 gsi
= gsi_for_stmt (cur_stmt
);
2543 src
= gimple_assign_rhs1 (src_stmt
);
2544 tgt
= gimple_assign_lhs (cur_stmt
);
2546 /* Need to load the value from memory first. */
2549 gimple_stmt_iterator gsi_ins
= gsi_for_stmt (src_stmt
);
2550 tree addr_expr
, addr_tmp
, val_expr
, val_tmp
;
2551 tree load_offset_ptr
, aligned_load_type
;
2552 gimple addr_stmt
, load_stmt
;
2554 HOST_WIDE_INT load_offset
= 0;
2556 align
= get_object_alignment (src
);
2557 /* If the new access is smaller than the original one, we need
2558 to perform big endian adjustment. */
2559 if (BYTES_BIG_ENDIAN
)
2561 HOST_WIDE_INT bitsize
, bitpos
;
2563 int unsignedp
, volatilep
;
2566 get_inner_reference (src
, &bitsize
, &bitpos
, &offset
, &mode
,
2567 &unsignedp
, &volatilep
, false);
2568 if (n
->range
< (unsigned HOST_WIDE_INT
) bitsize
)
2570 load_offset
= (bitsize
- n
->range
) / BITS_PER_UNIT
;
2571 unsigned HOST_WIDE_INT l
2572 = (load_offset
* BITS_PER_UNIT
) & (align
- 1);
2579 && align
< GET_MODE_ALIGNMENT (TYPE_MODE (load_type
))
2580 && SLOW_UNALIGNED_ACCESS (TYPE_MODE (load_type
), align
))
2583 /* Move cur_stmt just before one of the load of the original
2584 to ensure it has the same VUSE. See PR61517 for what could
2586 gsi_move_before (&gsi
, &gsi_ins
);
2587 gsi
= gsi_for_stmt (cur_stmt
);
2589 /* Compute address to load from and cast according to the size
2591 addr_expr
= build_fold_addr_expr (unshare_expr (src
));
2592 if (is_gimple_mem_ref_addr (addr_expr
))
2593 addr_tmp
= addr_expr
;
2596 addr_tmp
= make_temp_ssa_name (TREE_TYPE (addr_expr
), NULL
,
2598 addr_stmt
= gimple_build_assign (addr_tmp
, addr_expr
);
2599 gsi_insert_before (&gsi
, addr_stmt
, GSI_SAME_STMT
);
2602 /* Perform the load. */
2603 aligned_load_type
= load_type
;
2604 if (align
< TYPE_ALIGN (load_type
))
2605 aligned_load_type
= build_aligned_type (load_type
, align
);
2606 load_offset_ptr
= build_int_cst (n
->alias_set
, load_offset
);
2607 val_expr
= fold_build2 (MEM_REF
, aligned_load_type
, addr_tmp
,
2613 nop_stats
.found_16bit
++;
2614 else if (n
->range
== 32)
2615 nop_stats
.found_32bit
++;
2618 gcc_assert (n
->range
== 64);
2619 nop_stats
.found_64bit
++;
2622 /* Convert the result of load if necessary. */
2623 if (!useless_type_conversion_p (TREE_TYPE (tgt
), load_type
))
2625 val_tmp
= make_temp_ssa_name (aligned_load_type
, NULL
,
2627 load_stmt
= gimple_build_assign (val_tmp
, val_expr
);
2628 gimple_set_vuse (load_stmt
, n
->vuse
);
2629 gsi_insert_before (&gsi
, load_stmt
, GSI_SAME_STMT
);
2630 gimple_assign_set_rhs_with_ops (&gsi
, NOP_EXPR
, val_tmp
);
2634 gimple_assign_set_rhs_with_ops (&gsi
, MEM_REF
, val_expr
);
2635 gimple_set_vuse (cur_stmt
, n
->vuse
);
2637 update_stmt (cur_stmt
);
2642 "%d bit load in target endianness found at: ",
2644 print_gimple_stmt (dump_file
, cur_stmt
, 0, 0);
2650 val_tmp
= make_temp_ssa_name (aligned_load_type
, NULL
, "load_dst");
2651 load_stmt
= gimple_build_assign (val_tmp
, val_expr
);
2652 gimple_set_vuse (load_stmt
, n
->vuse
);
2653 gsi_insert_before (&gsi
, load_stmt
, GSI_SAME_STMT
);
2659 bswap_stats
.found_16bit
++;
2660 else if (n
->range
== 32)
2661 bswap_stats
.found_32bit
++;
2664 gcc_assert (n
->range
== 64);
2665 bswap_stats
.found_64bit
++;
2670 /* Convert the src expression if necessary. */
2671 if (!useless_type_conversion_p (TREE_TYPE (tmp
), bswap_type
))
2673 gimple convert_stmt
;
2675 tmp
= make_temp_ssa_name (bswap_type
, NULL
, "bswapsrc");
2676 convert_stmt
= gimple_build_assign (tmp
, NOP_EXPR
, src
);
2677 gsi_insert_before (&gsi
, convert_stmt
, GSI_SAME_STMT
);
2680 /* Canonical form for 16 bit bswap is a rotate expression. Only 16bit values
2681 are considered as rotation of 2N bit values by N bits is generally not
2682 equivalent to a bswap. Consider for instance 0x01020304 r>> 16 which
2683 gives 0x03040102 while a bswap for that value is 0x04030201. */
2684 if (bswap
&& n
->range
== 16)
2686 tree count
= build_int_cst (NULL
, BITS_PER_UNIT
);
2687 src
= fold_build2 (LROTATE_EXPR
, bswap_type
, tmp
, count
);
2688 bswap_stmt
= gimple_build_assign (NULL
, src
);
2691 bswap_stmt
= gimple_build_call (fndecl
, 1, tmp
);
2695 /* Convert the result if necessary. */
2696 if (!useless_type_conversion_p (TREE_TYPE (tgt
), bswap_type
))
2698 gimple convert_stmt
;
2700 tmp
= make_temp_ssa_name (bswap_type
, NULL
, "bswapdst");
2701 convert_stmt
= gimple_build_assign (tgt
, NOP_EXPR
, tmp
);
2702 gsi_insert_after (&gsi
, convert_stmt
, GSI_SAME_STMT
);
2705 gimple_set_lhs (bswap_stmt
, tmp
);
2709 fprintf (dump_file
, "%d bit bswap implementation found at: ",
2711 print_gimple_stmt (dump_file
, cur_stmt
, 0, 0);
2714 gsi_insert_after (&gsi
, bswap_stmt
, GSI_SAME_STMT
);
2715 gsi_remove (&gsi
, true);
2719 /* Find manual byte swap implementations as well as load in a given
2720 endianness. Byte swaps are turned into a bswap builtin invokation
2721 while endian loads are converted to bswap builtin invokation or
2722 simple load according to the target endianness. */
2725 pass_optimize_bswap::execute (function
*fun
)
2728 bool bswap32_p
, bswap64_p
;
2729 bool changed
= false;
2730 tree bswap32_type
= NULL_TREE
, bswap64_type
= NULL_TREE
;
2732 if (BITS_PER_UNIT
!= 8)
2735 bswap32_p
= (builtin_decl_explicit_p (BUILT_IN_BSWAP32
)
2736 && optab_handler (bswap_optab
, SImode
) != CODE_FOR_nothing
);
2737 bswap64_p
= (builtin_decl_explicit_p (BUILT_IN_BSWAP64
)
2738 && (optab_handler (bswap_optab
, DImode
) != CODE_FOR_nothing
2739 || (bswap32_p
&& word_mode
== SImode
)));
2741 /* Determine the argument type of the builtins. The code later on
2742 assumes that the return and argument type are the same. */
2745 tree fndecl
= builtin_decl_explicit (BUILT_IN_BSWAP32
);
2746 bswap32_type
= TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl
)));
2751 tree fndecl
= builtin_decl_explicit (BUILT_IN_BSWAP64
);
2752 bswap64_type
= TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl
)));
2755 memset (&nop_stats
, 0, sizeof (nop_stats
));
2756 memset (&bswap_stats
, 0, sizeof (bswap_stats
));
2758 FOR_EACH_BB_FN (bb
, fun
)
2760 gimple_stmt_iterator gsi
;
2762 /* We do a reverse scan for bswap patterns to make sure we get the
2763 widest match. As bswap pattern matching doesn't handle previously
2764 inserted smaller bswap replacements as sub-patterns, the wider
2765 variant wouldn't be detected. */
2766 for (gsi
= gsi_last_bb (bb
); !gsi_end_p (gsi
);)
2768 gimple src_stmt
, cur_stmt
= gsi_stmt (gsi
);
2769 tree fndecl
= NULL_TREE
, bswap_type
= NULL_TREE
, load_type
;
2770 enum tree_code code
;
2771 struct symbolic_number n
;
2774 /* This gsi_prev (&gsi) is not part of the for loop because cur_stmt
2775 might be moved to a different basic block by bswap_replace and gsi
2776 must not points to it if that's the case. Moving the gsi_prev
2777 there make sure that gsi points to the statement previous to
2778 cur_stmt while still making sure that all statements are
2779 considered in this basic block. */
2782 if (!is_gimple_assign (cur_stmt
))
2785 code
= gimple_assign_rhs_code (cur_stmt
);
2790 if (!tree_fits_uhwi_p (gimple_assign_rhs2 (cur_stmt
))
2791 || tree_to_uhwi (gimple_assign_rhs2 (cur_stmt
))
2801 src_stmt
= find_bswap_or_nop (cur_stmt
, &n
, &bswap
);
2809 /* Already in canonical form, nothing to do. */
2810 if (code
== LROTATE_EXPR
|| code
== RROTATE_EXPR
)
2812 load_type
= bswap_type
= uint16_type_node
;
2815 load_type
= uint32_type_node
;
2818 fndecl
= builtin_decl_explicit (BUILT_IN_BSWAP32
);
2819 bswap_type
= bswap32_type
;
2823 load_type
= uint64_type_node
;
2826 fndecl
= builtin_decl_explicit (BUILT_IN_BSWAP64
);
2827 bswap_type
= bswap64_type
;
2834 if (bswap
&& !fndecl
&& n
.range
!= 16)
2837 if (bswap_replace (cur_stmt
, src_stmt
, fndecl
, bswap_type
, load_type
,
2843 statistics_counter_event (fun
, "16-bit nop implementations found",
2844 nop_stats
.found_16bit
);
2845 statistics_counter_event (fun
, "32-bit nop implementations found",
2846 nop_stats
.found_32bit
);
2847 statistics_counter_event (fun
, "64-bit nop implementations found",
2848 nop_stats
.found_64bit
);
2849 statistics_counter_event (fun
, "16-bit bswap implementations found",
2850 bswap_stats
.found_16bit
);
2851 statistics_counter_event (fun
, "32-bit bswap implementations found",
2852 bswap_stats
.found_32bit
);
2853 statistics_counter_event (fun
, "64-bit bswap implementations found",
2854 bswap_stats
.found_64bit
);
2856 return (changed
? TODO_update_ssa
: 0);
2862 make_pass_optimize_bswap (gcc::context
*ctxt
)
2864 return new pass_optimize_bswap (ctxt
);
2867 /* Return true if stmt is a type conversion operation that can be stripped
2868 when used in a widening multiply operation. */
2870 widening_mult_conversion_strippable_p (tree result_type
, gimple stmt
)
2872 enum tree_code rhs_code
= gimple_assign_rhs_code (stmt
);
2874 if (TREE_CODE (result_type
) == INTEGER_TYPE
)
2879 if (!CONVERT_EXPR_CODE_P (rhs_code
))
2882 op_type
= TREE_TYPE (gimple_assign_lhs (stmt
));
2884 /* If the type of OP has the same precision as the result, then
2885 we can strip this conversion. The multiply operation will be
2886 selected to create the correct extension as a by-product. */
2887 if (TYPE_PRECISION (result_type
) == TYPE_PRECISION (op_type
))
2890 /* We can also strip a conversion if it preserves the signed-ness of
2891 the operation and doesn't narrow the range. */
2892 inner_op_type
= TREE_TYPE (gimple_assign_rhs1 (stmt
));
2894 /* If the inner-most type is unsigned, then we can strip any
2895 intermediate widening operation. If it's signed, then the
2896 intermediate widening operation must also be signed. */
2897 if ((TYPE_UNSIGNED (inner_op_type
)
2898 || TYPE_UNSIGNED (op_type
) == TYPE_UNSIGNED (inner_op_type
))
2899 && TYPE_PRECISION (op_type
) > TYPE_PRECISION (inner_op_type
))
2905 return rhs_code
== FIXED_CONVERT_EXPR
;
2908 /* Return true if RHS is a suitable operand for a widening multiplication,
2909 assuming a target type of TYPE.
2910 There are two cases:
2912 - RHS makes some value at least twice as wide. Store that value
2913 in *NEW_RHS_OUT if so, and store its type in *TYPE_OUT.
2915 - RHS is an integer constant. Store that value in *NEW_RHS_OUT if so,
2916 but leave *TYPE_OUT untouched. */
2919 is_widening_mult_rhs_p (tree type
, tree rhs
, tree
*type_out
,
2925 if (TREE_CODE (rhs
) == SSA_NAME
)
2927 stmt
= SSA_NAME_DEF_STMT (rhs
);
2928 if (is_gimple_assign (stmt
))
2930 if (! widening_mult_conversion_strippable_p (type
, stmt
))
2934 rhs1
= gimple_assign_rhs1 (stmt
);
2936 if (TREE_CODE (rhs1
) == INTEGER_CST
)
2938 *new_rhs_out
= rhs1
;
2947 type1
= TREE_TYPE (rhs1
);
2949 if (TREE_CODE (type1
) != TREE_CODE (type
)
2950 || TYPE_PRECISION (type1
) * 2 > TYPE_PRECISION (type
))
2953 *new_rhs_out
= rhs1
;
2958 if (TREE_CODE (rhs
) == INTEGER_CST
)
2968 /* Return true if STMT performs a widening multiplication, assuming the
2969 output type is TYPE. If so, store the unwidened types of the operands
2970 in *TYPE1_OUT and *TYPE2_OUT respectively. Also fill *RHS1_OUT and
2971 *RHS2_OUT such that converting those operands to types *TYPE1_OUT
2972 and *TYPE2_OUT would give the operands of the multiplication. */
2975 is_widening_mult_p (gimple stmt
,
2976 tree
*type1_out
, tree
*rhs1_out
,
2977 tree
*type2_out
, tree
*rhs2_out
)
2979 tree type
= TREE_TYPE (gimple_assign_lhs (stmt
));
2981 if (TREE_CODE (type
) != INTEGER_TYPE
2982 && TREE_CODE (type
) != FIXED_POINT_TYPE
)
2985 if (!is_widening_mult_rhs_p (type
, gimple_assign_rhs1 (stmt
), type1_out
,
2989 if (!is_widening_mult_rhs_p (type
, gimple_assign_rhs2 (stmt
), type2_out
,
2993 if (*type1_out
== NULL
)
2995 if (*type2_out
== NULL
|| !int_fits_type_p (*rhs1_out
, *type2_out
))
2997 *type1_out
= *type2_out
;
3000 if (*type2_out
== NULL
)
3002 if (!int_fits_type_p (*rhs2_out
, *type1_out
))
3004 *type2_out
= *type1_out
;
3007 /* Ensure that the larger of the two operands comes first. */
3008 if (TYPE_PRECISION (*type1_out
) < TYPE_PRECISION (*type2_out
))
3010 std::swap (*type1_out
, *type2_out
);
3011 std::swap (*rhs1_out
, *rhs2_out
);
3017 /* Process a single gimple statement STMT, which has a MULT_EXPR as
3018 its rhs, and try to convert it into a WIDEN_MULT_EXPR. The return
3019 value is true iff we converted the statement. */
3022 convert_mult_to_widen (gimple stmt
, gimple_stmt_iterator
*gsi
)
3024 tree lhs
, rhs1
, rhs2
, type
, type1
, type2
;
3025 enum insn_code handler
;
3026 machine_mode to_mode
, from_mode
, actual_mode
;
3028 int actual_precision
;
3029 location_t loc
= gimple_location (stmt
);
3030 bool from_unsigned1
, from_unsigned2
;
3032 lhs
= gimple_assign_lhs (stmt
);
3033 type
= TREE_TYPE (lhs
);
3034 if (TREE_CODE (type
) != INTEGER_TYPE
)
3037 if (!is_widening_mult_p (stmt
, &type1
, &rhs1
, &type2
, &rhs2
))
3040 to_mode
= TYPE_MODE (type
);
3041 from_mode
= TYPE_MODE (type1
);
3042 from_unsigned1
= TYPE_UNSIGNED (type1
);
3043 from_unsigned2
= TYPE_UNSIGNED (type2
);
3045 if (from_unsigned1
&& from_unsigned2
)
3046 op
= umul_widen_optab
;
3047 else if (!from_unsigned1
&& !from_unsigned2
)
3048 op
= smul_widen_optab
;
3050 op
= usmul_widen_optab
;
3052 handler
= find_widening_optab_handler_and_mode (op
, to_mode
, from_mode
,
3055 if (handler
== CODE_FOR_nothing
)
3057 if (op
!= smul_widen_optab
)
3059 /* We can use a signed multiply with unsigned types as long as
3060 there is a wider mode to use, or it is the smaller of the two
3061 types that is unsigned. Note that type1 >= type2, always. */
3062 if ((TYPE_UNSIGNED (type1
)
3063 && TYPE_PRECISION (type1
) == GET_MODE_PRECISION (from_mode
))
3064 || (TYPE_UNSIGNED (type2
)
3065 && TYPE_PRECISION (type2
) == GET_MODE_PRECISION (from_mode
)))
3067 from_mode
= GET_MODE_WIDER_MODE (from_mode
);
3068 if (GET_MODE_SIZE (to_mode
) <= GET_MODE_SIZE (from_mode
))
3072 op
= smul_widen_optab
;
3073 handler
= find_widening_optab_handler_and_mode (op
, to_mode
,
3077 if (handler
== CODE_FOR_nothing
)
3080 from_unsigned1
= from_unsigned2
= false;
3086 /* Ensure that the inputs to the handler are in the correct precison
3087 for the opcode. This will be the full mode size. */
3088 actual_precision
= GET_MODE_PRECISION (actual_mode
);
3089 if (2 * actual_precision
> TYPE_PRECISION (type
))
3091 if (actual_precision
!= TYPE_PRECISION (type1
)
3092 || from_unsigned1
!= TYPE_UNSIGNED (type1
))
3093 rhs1
= build_and_insert_cast (gsi
, loc
,
3094 build_nonstandard_integer_type
3095 (actual_precision
, from_unsigned1
), rhs1
);
3096 if (actual_precision
!= TYPE_PRECISION (type2
)
3097 || from_unsigned2
!= TYPE_UNSIGNED (type2
))
3098 rhs2
= build_and_insert_cast (gsi
, loc
,
3099 build_nonstandard_integer_type
3100 (actual_precision
, from_unsigned2
), rhs2
);
3102 /* Handle constants. */
3103 if (TREE_CODE (rhs1
) == INTEGER_CST
)
3104 rhs1
= fold_convert (type1
, rhs1
);
3105 if (TREE_CODE (rhs2
) == INTEGER_CST
)
3106 rhs2
= fold_convert (type2
, rhs2
);
3108 gimple_assign_set_rhs1 (stmt
, rhs1
);
3109 gimple_assign_set_rhs2 (stmt
, rhs2
);
3110 gimple_assign_set_rhs_code (stmt
, WIDEN_MULT_EXPR
);
3112 widen_mul_stats
.widen_mults_inserted
++;
3116 /* Process a single gimple statement STMT, which is found at the
3117 iterator GSI and has a either a PLUS_EXPR or a MINUS_EXPR as its
3118 rhs (given by CODE), and try to convert it into a
3119 WIDEN_MULT_PLUS_EXPR or a WIDEN_MULT_MINUS_EXPR. The return value
3120 is true iff we converted the statement. */
3123 convert_plusminus_to_widen (gimple_stmt_iterator
*gsi
, gimple stmt
,
3124 enum tree_code code
)
3126 gimple rhs1_stmt
= NULL
, rhs2_stmt
= NULL
;
3127 gimple conv1_stmt
= NULL
, conv2_stmt
= NULL
, conv_stmt
;
3128 tree type
, type1
, type2
, optype
;
3129 tree lhs
, rhs1
, rhs2
, mult_rhs1
, mult_rhs2
, add_rhs
;
3130 enum tree_code rhs1_code
= ERROR_MARK
, rhs2_code
= ERROR_MARK
;
3132 enum tree_code wmult_code
;
3133 enum insn_code handler
;
3134 machine_mode to_mode
, from_mode
, actual_mode
;
3135 location_t loc
= gimple_location (stmt
);
3136 int actual_precision
;
3137 bool from_unsigned1
, from_unsigned2
;
3139 lhs
= gimple_assign_lhs (stmt
);
3140 type
= TREE_TYPE (lhs
);
3141 if (TREE_CODE (type
) != INTEGER_TYPE
3142 && TREE_CODE (type
) != FIXED_POINT_TYPE
)
3145 if (code
== MINUS_EXPR
)
3146 wmult_code
= WIDEN_MULT_MINUS_EXPR
;
3148 wmult_code
= WIDEN_MULT_PLUS_EXPR
;
3150 rhs1
= gimple_assign_rhs1 (stmt
);
3151 rhs2
= gimple_assign_rhs2 (stmt
);
3153 if (TREE_CODE (rhs1
) == SSA_NAME
)
3155 rhs1_stmt
= SSA_NAME_DEF_STMT (rhs1
);
3156 if (is_gimple_assign (rhs1_stmt
))
3157 rhs1_code
= gimple_assign_rhs_code (rhs1_stmt
);
3160 if (TREE_CODE (rhs2
) == SSA_NAME
)
3162 rhs2_stmt
= SSA_NAME_DEF_STMT (rhs2
);
3163 if (is_gimple_assign (rhs2_stmt
))
3164 rhs2_code
= gimple_assign_rhs_code (rhs2_stmt
);
3167 /* Allow for one conversion statement between the multiply
3168 and addition/subtraction statement. If there are more than
3169 one conversions then we assume they would invalidate this
3170 transformation. If that's not the case then they should have
3171 been folded before now. */
3172 if (CONVERT_EXPR_CODE_P (rhs1_code
))
3174 conv1_stmt
= rhs1_stmt
;
3175 rhs1
= gimple_assign_rhs1 (rhs1_stmt
);
3176 if (TREE_CODE (rhs1
) == SSA_NAME
)
3178 rhs1_stmt
= SSA_NAME_DEF_STMT (rhs1
);
3179 if (is_gimple_assign (rhs1_stmt
))
3180 rhs1_code
= gimple_assign_rhs_code (rhs1_stmt
);
3185 if (CONVERT_EXPR_CODE_P (rhs2_code
))
3187 conv2_stmt
= rhs2_stmt
;
3188 rhs2
= gimple_assign_rhs1 (rhs2_stmt
);
3189 if (TREE_CODE (rhs2
) == SSA_NAME
)
3191 rhs2_stmt
= SSA_NAME_DEF_STMT (rhs2
);
3192 if (is_gimple_assign (rhs2_stmt
))
3193 rhs2_code
= gimple_assign_rhs_code (rhs2_stmt
);
3199 /* If code is WIDEN_MULT_EXPR then it would seem unnecessary to call
3200 is_widening_mult_p, but we still need the rhs returns.
3202 It might also appear that it would be sufficient to use the existing
3203 operands of the widening multiply, but that would limit the choice of
3204 multiply-and-accumulate instructions.
3206 If the widened-multiplication result has more than one uses, it is
3207 probably wiser not to do the conversion. */
3208 if (code
== PLUS_EXPR
3209 && (rhs1_code
== MULT_EXPR
|| rhs1_code
== WIDEN_MULT_EXPR
))
3211 if (!has_single_use (rhs1
)
3212 || !is_widening_mult_p (rhs1_stmt
, &type1
, &mult_rhs1
,
3213 &type2
, &mult_rhs2
))
3216 conv_stmt
= conv1_stmt
;
3218 else if (rhs2_code
== MULT_EXPR
|| rhs2_code
== WIDEN_MULT_EXPR
)
3220 if (!has_single_use (rhs2
)
3221 || !is_widening_mult_p (rhs2_stmt
, &type1
, &mult_rhs1
,
3222 &type2
, &mult_rhs2
))
3225 conv_stmt
= conv2_stmt
;
3230 to_mode
= TYPE_MODE (type
);
3231 from_mode
= TYPE_MODE (type1
);
3232 from_unsigned1
= TYPE_UNSIGNED (type1
);
3233 from_unsigned2
= TYPE_UNSIGNED (type2
);
3236 /* There's no such thing as a mixed sign madd yet, so use a wider mode. */
3237 if (from_unsigned1
!= from_unsigned2
)
3239 if (!INTEGRAL_TYPE_P (type
))
3241 /* We can use a signed multiply with unsigned types as long as
3242 there is a wider mode to use, or it is the smaller of the two
3243 types that is unsigned. Note that type1 >= type2, always. */
3245 && TYPE_PRECISION (type1
) == GET_MODE_PRECISION (from_mode
))
3247 && TYPE_PRECISION (type2
) == GET_MODE_PRECISION (from_mode
)))
3249 from_mode
= GET_MODE_WIDER_MODE (from_mode
);
3250 if (GET_MODE_SIZE (from_mode
) >= GET_MODE_SIZE (to_mode
))
3254 from_unsigned1
= from_unsigned2
= false;
3255 optype
= build_nonstandard_integer_type (GET_MODE_PRECISION (from_mode
),
3259 /* If there was a conversion between the multiply and addition
3260 then we need to make sure it fits a multiply-and-accumulate.
3261 The should be a single mode change which does not change the
3265 /* We use the original, unmodified data types for this. */
3266 tree from_type
= TREE_TYPE (gimple_assign_rhs1 (conv_stmt
));
3267 tree to_type
= TREE_TYPE (gimple_assign_lhs (conv_stmt
));
3268 int data_size
= TYPE_PRECISION (type1
) + TYPE_PRECISION (type2
);
3269 bool is_unsigned
= TYPE_UNSIGNED (type1
) && TYPE_UNSIGNED (type2
);
3271 if (TYPE_PRECISION (from_type
) > TYPE_PRECISION (to_type
))
3273 /* Conversion is a truncate. */
3274 if (TYPE_PRECISION (to_type
) < data_size
)
3277 else if (TYPE_PRECISION (from_type
) < TYPE_PRECISION (to_type
))
3279 /* Conversion is an extend. Check it's the right sort. */
3280 if (TYPE_UNSIGNED (from_type
) != is_unsigned
3281 && !(is_unsigned
&& TYPE_PRECISION (from_type
) > data_size
))
3284 /* else convert is a no-op for our purposes. */
3287 /* Verify that the machine can perform a widening multiply
3288 accumulate in this mode/signedness combination, otherwise
3289 this transformation is likely to pessimize code. */
3290 this_optab
= optab_for_tree_code (wmult_code
, optype
, optab_default
);
3291 handler
= find_widening_optab_handler_and_mode (this_optab
, to_mode
,
3292 from_mode
, 0, &actual_mode
);
3294 if (handler
== CODE_FOR_nothing
)
3297 /* Ensure that the inputs to the handler are in the correct precison
3298 for the opcode. This will be the full mode size. */
3299 actual_precision
= GET_MODE_PRECISION (actual_mode
);
3300 if (actual_precision
!= TYPE_PRECISION (type1
)
3301 || from_unsigned1
!= TYPE_UNSIGNED (type1
))
3302 mult_rhs1
= build_and_insert_cast (gsi
, loc
,
3303 build_nonstandard_integer_type
3304 (actual_precision
, from_unsigned1
),
3306 if (actual_precision
!= TYPE_PRECISION (type2
)
3307 || from_unsigned2
!= TYPE_UNSIGNED (type2
))
3308 mult_rhs2
= build_and_insert_cast (gsi
, loc
,
3309 build_nonstandard_integer_type
3310 (actual_precision
, from_unsigned2
),
3313 if (!useless_type_conversion_p (type
, TREE_TYPE (add_rhs
)))
3314 add_rhs
= build_and_insert_cast (gsi
, loc
, type
, add_rhs
);
3316 /* Handle constants. */
3317 if (TREE_CODE (mult_rhs1
) == INTEGER_CST
)
3318 mult_rhs1
= fold_convert (type1
, mult_rhs1
);
3319 if (TREE_CODE (mult_rhs2
) == INTEGER_CST
)
3320 mult_rhs2
= fold_convert (type2
, mult_rhs2
);
3322 gimple_assign_set_rhs_with_ops (gsi
, wmult_code
, mult_rhs1
, mult_rhs2
,
3324 update_stmt (gsi_stmt (*gsi
));
3325 widen_mul_stats
.maccs_inserted
++;
3329 /* Combine the multiplication at MUL_STMT with operands MULOP1 and MULOP2
3330 with uses in additions and subtractions to form fused multiply-add
3331 operations. Returns true if successful and MUL_STMT should be removed. */
3334 convert_mult_to_fma (gimple mul_stmt
, tree op1
, tree op2
)
3336 tree mul_result
= gimple_get_lhs (mul_stmt
);
3337 tree type
= TREE_TYPE (mul_result
);
3338 gimple use_stmt
, neguse_stmt
;
3340 use_operand_p use_p
;
3341 imm_use_iterator imm_iter
;
3343 if (FLOAT_TYPE_P (type
)
3344 && flag_fp_contract_mode
== FP_CONTRACT_OFF
)
3347 /* We don't want to do bitfield reduction ops. */
3348 if (INTEGRAL_TYPE_P (type
)
3349 && (TYPE_PRECISION (type
)
3350 != GET_MODE_PRECISION (TYPE_MODE (type
))))
3353 /* If the target doesn't support it, don't generate it. We assume that
3354 if fma isn't available then fms, fnma or fnms are not either. */
3355 if (optab_handler (fma_optab
, TYPE_MODE (type
)) == CODE_FOR_nothing
)
3358 /* If the multiplication has zero uses, it is kept around probably because
3359 of -fnon-call-exceptions. Don't optimize it away in that case,
3361 if (has_zero_uses (mul_result
))
3364 /* Make sure that the multiplication statement becomes dead after
3365 the transformation, thus that all uses are transformed to FMAs.
3366 This means we assume that an FMA operation has the same cost
3368 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, mul_result
)
3370 enum tree_code use_code
;
3371 tree result
= mul_result
;
3372 bool negate_p
= false;
3374 use_stmt
= USE_STMT (use_p
);
3376 if (is_gimple_debug (use_stmt
))
3379 /* For now restrict this operations to single basic blocks. In theory
3380 we would want to support sinking the multiplication in
3386 to form a fma in the then block and sink the multiplication to the
3388 if (gimple_bb (use_stmt
) != gimple_bb (mul_stmt
))
3391 if (!is_gimple_assign (use_stmt
))
3394 use_code
= gimple_assign_rhs_code (use_stmt
);
3396 /* A negate on the multiplication leads to FNMA. */
3397 if (use_code
== NEGATE_EXPR
)
3402 result
= gimple_assign_lhs (use_stmt
);
3404 /* Make sure the negate statement becomes dead with this
3405 single transformation. */
3406 if (!single_imm_use (gimple_assign_lhs (use_stmt
),
3407 &use_p
, &neguse_stmt
))
3410 /* Make sure the multiplication isn't also used on that stmt. */
3411 FOR_EACH_PHI_OR_STMT_USE (usep
, neguse_stmt
, iter
, SSA_OP_USE
)
3412 if (USE_FROM_PTR (usep
) == mul_result
)
3416 use_stmt
= neguse_stmt
;
3417 if (gimple_bb (use_stmt
) != gimple_bb (mul_stmt
))
3419 if (!is_gimple_assign (use_stmt
))
3422 use_code
= gimple_assign_rhs_code (use_stmt
);
3429 if (gimple_assign_rhs2 (use_stmt
) == result
)
3430 negate_p
= !negate_p
;
3435 /* FMA can only be formed from PLUS and MINUS. */
3439 /* If the subtrahend (gimple_assign_rhs2 (use_stmt)) is computed
3440 by a MULT_EXPR that we'll visit later, we might be able to
3441 get a more profitable match with fnma.
3442 OTOH, if we don't, a negate / fma pair has likely lower latency
3443 that a mult / subtract pair. */
3444 if (use_code
== MINUS_EXPR
&& !negate_p
3445 && gimple_assign_rhs1 (use_stmt
) == result
3446 && optab_handler (fms_optab
, TYPE_MODE (type
)) == CODE_FOR_nothing
3447 && optab_handler (fnma_optab
, TYPE_MODE (type
)) != CODE_FOR_nothing
)
3449 tree rhs2
= gimple_assign_rhs2 (use_stmt
);
3451 if (TREE_CODE (rhs2
) == SSA_NAME
)
3453 gimple stmt2
= SSA_NAME_DEF_STMT (rhs2
);
3454 if (has_single_use (rhs2
)
3455 && is_gimple_assign (stmt2
)
3456 && gimple_assign_rhs_code (stmt2
) == MULT_EXPR
)
3461 /* We can't handle a * b + a * b. */
3462 if (gimple_assign_rhs1 (use_stmt
) == gimple_assign_rhs2 (use_stmt
))
3465 /* While it is possible to validate whether or not the exact form
3466 that we've recognized is available in the backend, the assumption
3467 is that the transformation is never a loss. For instance, suppose
3468 the target only has the plain FMA pattern available. Consider
3469 a*b-c -> fma(a,b,-c): we've exchanged MUL+SUB for FMA+NEG, which
3470 is still two operations. Consider -(a*b)-c -> fma(-a,b,-c): we
3471 still have 3 operations, but in the FMA form the two NEGs are
3472 independent and could be run in parallel. */
3475 FOR_EACH_IMM_USE_STMT (use_stmt
, imm_iter
, mul_result
)
3477 gimple_stmt_iterator gsi
= gsi_for_stmt (use_stmt
);
3478 enum tree_code use_code
;
3479 tree addop
, mulop1
= op1
, result
= mul_result
;
3480 bool negate_p
= false;
3482 if (is_gimple_debug (use_stmt
))
3485 use_code
= gimple_assign_rhs_code (use_stmt
);
3486 if (use_code
== NEGATE_EXPR
)
3488 result
= gimple_assign_lhs (use_stmt
);
3489 single_imm_use (gimple_assign_lhs (use_stmt
), &use_p
, &neguse_stmt
);
3490 gsi_remove (&gsi
, true);
3491 release_defs (use_stmt
);
3493 use_stmt
= neguse_stmt
;
3494 gsi
= gsi_for_stmt (use_stmt
);
3495 use_code
= gimple_assign_rhs_code (use_stmt
);
3499 if (gimple_assign_rhs1 (use_stmt
) == result
)
3501 addop
= gimple_assign_rhs2 (use_stmt
);
3502 /* a * b - c -> a * b + (-c) */
3503 if (gimple_assign_rhs_code (use_stmt
) == MINUS_EXPR
)
3504 addop
= force_gimple_operand_gsi (&gsi
,
3505 build1 (NEGATE_EXPR
,
3507 true, NULL_TREE
, true,
3512 addop
= gimple_assign_rhs1 (use_stmt
);
3513 /* a - b * c -> (-b) * c + a */
3514 if (gimple_assign_rhs_code (use_stmt
) == MINUS_EXPR
)
3515 negate_p
= !negate_p
;
3519 mulop1
= force_gimple_operand_gsi (&gsi
,
3520 build1 (NEGATE_EXPR
,
3522 true, NULL_TREE
, true,
3525 fma_stmt
= gimple_build_assign (gimple_assign_lhs (use_stmt
),
3526 FMA_EXPR
, mulop1
, op2
, addop
);
3527 gsi_replace (&gsi
, fma_stmt
, true);
3528 widen_mul_stats
.fmas_inserted
++;
3534 /* Find integer multiplications where the operands are extended from
3535 smaller types, and replace the MULT_EXPR with a WIDEN_MULT_EXPR
3536 where appropriate. */
3540 const pass_data pass_data_optimize_widening_mul
=
3542 GIMPLE_PASS
, /* type */
3543 "widening_mul", /* name */
3544 OPTGROUP_NONE
, /* optinfo_flags */
3545 TV_NONE
, /* tv_id */
3546 PROP_ssa
, /* properties_required */
3547 0, /* properties_provided */
3548 0, /* properties_destroyed */
3549 0, /* todo_flags_start */
3550 TODO_update_ssa
, /* todo_flags_finish */
3553 class pass_optimize_widening_mul
: public gimple_opt_pass
3556 pass_optimize_widening_mul (gcc::context
*ctxt
)
3557 : gimple_opt_pass (pass_data_optimize_widening_mul
, ctxt
)
3560 /* opt_pass methods: */
3561 virtual bool gate (function
*)
3563 return flag_expensive_optimizations
&& optimize
;
3566 virtual unsigned int execute (function
*);
3568 }; // class pass_optimize_widening_mul
3571 pass_optimize_widening_mul::execute (function
*fun
)
3574 bool cfg_changed
= false;
3576 memset (&widen_mul_stats
, 0, sizeof (widen_mul_stats
));
3578 FOR_EACH_BB_FN (bb
, fun
)
3580 gimple_stmt_iterator gsi
;
3582 for (gsi
= gsi_after_labels (bb
); !gsi_end_p (gsi
);)
3584 gimple stmt
= gsi_stmt (gsi
);
3585 enum tree_code code
;
3587 if (is_gimple_assign (stmt
))
3589 code
= gimple_assign_rhs_code (stmt
);
3593 if (!convert_mult_to_widen (stmt
, &gsi
)
3594 && convert_mult_to_fma (stmt
,
3595 gimple_assign_rhs1 (stmt
),
3596 gimple_assign_rhs2 (stmt
)))
3598 gsi_remove (&gsi
, true);
3599 release_defs (stmt
);
3606 convert_plusminus_to_widen (&gsi
, stmt
, code
);
3612 else if (is_gimple_call (stmt
)
3613 && gimple_call_lhs (stmt
))
3615 tree fndecl
= gimple_call_fndecl (stmt
);
3617 && DECL_BUILT_IN_CLASS (fndecl
) == BUILT_IN_NORMAL
)
3619 switch (DECL_FUNCTION_CODE (fndecl
))
3624 if (TREE_CODE (gimple_call_arg (stmt
, 1)) == REAL_CST
3625 && REAL_VALUES_EQUAL
3626 (TREE_REAL_CST (gimple_call_arg (stmt
, 1)),
3628 && convert_mult_to_fma (stmt
,
3629 gimple_call_arg (stmt
, 0),
3630 gimple_call_arg (stmt
, 0)))
3632 unlink_stmt_vdef (stmt
);
3633 if (gsi_remove (&gsi
, true)
3634 && gimple_purge_dead_eh_edges (bb
))
3636 release_defs (stmt
);
3649 statistics_counter_event (fun
, "widening multiplications inserted",
3650 widen_mul_stats
.widen_mults_inserted
);
3651 statistics_counter_event (fun
, "widening maccs inserted",
3652 widen_mul_stats
.maccs_inserted
);
3653 statistics_counter_event (fun
, "fused multiply-adds inserted",
3654 widen_mul_stats
.fmas_inserted
);
3656 return cfg_changed
? TODO_cleanup_cfg
: 0;
3662 make_pass_optimize_widening_mul (gcc::context
*ctxt
)
3664 return new pass_optimize_widening_mul (ctxt
);