1 /* Induction variable optimizations.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009
3 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 3, or (at your option) any
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* This pass tries to find the optimal set of induction variables for the loop.
22 It optimizes just the basic linear induction variables (although adding
23 support for other types should not be too hard). It includes the
24 optimizations commonly known as strength reduction, induction variable
25 coalescing and induction variable elimination. It does it in the
28 1) The interesting uses of induction variables are found. This includes
30 -- uses of induction variables in non-linear expressions
31 -- addresses of arrays
32 -- comparisons of induction variables
34 2) Candidates for the induction variables are found. This includes
36 -- old induction variables
37 -- the variables defined by expressions derived from the "interesting
40 3) The optimal (w.r. to a cost function) set of variables is chosen. The
41 cost function assigns a cost to sets of induction variables and consists
44 -- The use costs. Each of the interesting uses chooses the best induction
45 variable in the set and adds its cost to the sum. The cost reflects
46 the time spent on modifying the induction variables value to be usable
47 for the given purpose (adding base and offset for arrays, etc.).
48 -- The variable costs. Each of the variables has a cost assigned that
49 reflects the costs associated with incrementing the value of the
50 variable. The original variables are somewhat preferred.
51 -- The set cost. Depending on the size of the set, extra cost may be
52 added to reflect register pressure.
54 All the costs are defined in a machine-specific way, using the target
55 hooks and machine descriptions to determine them.
57 4) The trees are transformed to use the new variables, the dead code is
60 All of this is done loop by loop. Doing it globally is theoretically
61 possible, it might give a better performance and it might enable us
62 to decide costs more precisely, but getting all the interactions right
63 would be complicated. */
67 #include "coretypes.h"
72 #include "hard-reg-set.h"
73 #include "basic-block.h"
75 #include "diagnostic.h"
76 #include "tree-flow.h"
77 #include "tree-dump.h"
82 #include "tree-pass.h"
84 #include "insn-config.h"
86 #include "pointer-set.h"
88 #include "tree-chrec.h"
89 #include "tree-scalar-evolution.h"
92 #include "langhooks.h"
93 #include "tree-affine.h"
96 /* The infinite cost. */
97 #define INFTY 10000000
99 /* The expected number of loop iterations. TODO -- use profiling instead of
101 #define AVG_LOOP_NITER(LOOP) 5
104 /* Representation of the induction variable. */
107 tree base
; /* Initial value of the iv. */
108 tree base_object
; /* A memory object to that the induction variable points. */
109 tree step
; /* Step of the iv (constant only). */
110 tree ssa_name
; /* The ssa name with the value. */
111 bool biv_p
; /* Is it a biv? */
112 bool have_use_for
; /* Do we already have a use for it? */
113 unsigned use_id
; /* The identifier in the use if it is the case. */
116 /* Per-ssa version information (induction variable descriptions, etc.). */
119 tree name
; /* The ssa name. */
120 struct iv
*iv
; /* Induction variable description. */
121 bool has_nonlin_use
; /* For a loop-level invariant, whether it is used in
122 an expression that is not an induction variable. */
123 unsigned inv_id
; /* Id of an invariant. */
124 bool preserve_biv
; /* For the original biv, whether to preserve it. */
130 USE_NONLINEAR_EXPR
, /* Use in a nonlinear expression. */
131 USE_ADDRESS
, /* Use in an address. */
132 USE_COMPARE
/* Use is a compare. */
135 /* Cost of a computation. */
138 int cost
; /* The runtime cost. */
139 unsigned complexity
; /* The estimate of the complexity of the code for
140 the computation (in no concrete units --
141 complexity field should be larger for more
142 complex expressions and addressing modes). */
145 static const comp_cost zero_cost
= {0, 0};
146 static const comp_cost infinite_cost
= {INFTY
, INFTY
};
148 /* The candidate - cost pair. */
151 struct iv_cand
*cand
; /* The candidate. */
152 comp_cost cost
; /* The cost. */
153 bitmap depends_on
; /* The list of invariants that have to be
155 tree value
; /* For final value elimination, the expression for
156 the final value of the iv. For iv elimination,
157 the new bound to compare with. */
163 unsigned id
; /* The id of the use. */
164 enum use_type type
; /* Type of the use. */
165 struct iv
*iv
; /* The induction variable it is based on. */
166 gimple stmt
; /* Statement in that it occurs. */
167 tree
*op_p
; /* The place where it occurs. */
168 bitmap related_cands
; /* The set of "related" iv candidates, plus the common
171 unsigned n_map_members
; /* Number of candidates in the cost_map list. */
172 struct cost_pair
*cost_map
;
173 /* The costs wrto the iv candidates. */
175 struct iv_cand
*selected
;
176 /* The selected candidate. */
179 /* The position where the iv is computed. */
182 IP_NORMAL
, /* At the end, just before the exit condition. */
183 IP_END
, /* At the end of the latch block. */
184 IP_BEFORE_USE
, /* Immediately before a specific use. */
185 IP_AFTER_USE
, /* Immediately after a specific use. */
186 IP_ORIGINAL
/* The original biv. */
189 /* The induction variable candidate. */
192 unsigned id
; /* The number of the candidate. */
193 bool important
; /* Whether this is an "important" candidate, i.e. such
194 that it should be considered by all uses. */
195 enum iv_position pos
; /* Where it is computed. */
196 gimple incremented_at
;/* For original biv, the statement where it is
198 tree var_before
; /* The variable used for it before increment. */
199 tree var_after
; /* The variable used for it after increment. */
200 struct iv
*iv
; /* The value of the candidate. NULL for
201 "pseudocandidate" used to indicate the possibility
202 to replace the final value of an iv by direct
203 computation of the value. */
204 unsigned cost
; /* Cost of the candidate. */
205 unsigned cost_step
; /* Cost of the candidate's increment operation. */
206 struct iv_use
*ainc_use
; /* For IP_{BEFORE,AFTER}_USE candidates, the place
207 where it is incremented. */
208 bitmap depends_on
; /* The list of invariants that are used in step of the
212 /* The data used by the induction variable optimizations. */
214 typedef struct iv_use
*iv_use_p
;
216 DEF_VEC_ALLOC_P(iv_use_p
,heap
);
218 typedef struct iv_cand
*iv_cand_p
;
219 DEF_VEC_P(iv_cand_p
);
220 DEF_VEC_ALLOC_P(iv_cand_p
,heap
);
224 /* The currently optimized loop. */
225 struct loop
*current_loop
;
227 /* Numbers of iterations for all exits of the current loop. */
228 struct pointer_map_t
*niters
;
230 /* Number of registers used in it. */
233 /* The size of version_info array allocated. */
234 unsigned version_info_size
;
236 /* The array of information for the ssa names. */
237 struct version_info
*version_info
;
239 /* The bitmap of indices in version_info whose value was changed. */
242 /* The uses of induction variables. */
243 VEC(iv_use_p
,heap
) *iv_uses
;
245 /* The candidates. */
246 VEC(iv_cand_p
,heap
) *iv_candidates
;
248 /* A bitmap of important candidates. */
249 bitmap important_candidates
;
251 /* The maximum invariant id. */
254 /* Whether to consider just related and important candidates when replacing a
256 bool consider_all_candidates
;
258 /* Are we optimizing for speed? */
262 /* An assignment of iv candidates to uses. */
266 /* The number of uses covered by the assignment. */
269 /* Number of uses that cannot be expressed by the candidates in the set. */
272 /* Candidate assigned to a use, together with the related costs. */
273 struct cost_pair
**cand_for_use
;
275 /* Number of times each candidate is used. */
276 unsigned *n_cand_uses
;
278 /* The candidates used. */
281 /* The number of candidates in the set. */
284 /* Total number of registers needed. */
287 /* Total cost of expressing uses. */
288 comp_cost cand_use_cost
;
290 /* Total cost of candidates. */
293 /* Number of times each invariant is used. */
294 unsigned *n_invariant_uses
;
296 /* Total cost of the assignment. */
300 /* Difference of two iv candidate assignments. */
307 /* An old assignment (for rollback purposes). */
308 struct cost_pair
*old_cp
;
310 /* A new assignment. */
311 struct cost_pair
*new_cp
;
313 /* Next change in the list. */
314 struct iv_ca_delta
*next_change
;
317 /* Bound on number of candidates below that all candidates are considered. */
319 #define CONSIDER_ALL_CANDIDATES_BOUND \
320 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
322 /* If there are more iv occurrences, we just give up (it is quite unlikely that
323 optimizing such a loop would help, and it would take ages). */
325 #define MAX_CONSIDERED_USES \
326 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
328 /* If there are at most this number of ivs in the set, try removing unnecessary
329 ivs from the set always. */
331 #define ALWAYS_PRUNE_CAND_SET_BOUND \
332 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
334 /* The list of trees for that the decl_rtl field must be reset is stored
337 static VEC(tree
,heap
) *decl_rtl_to_reset
;
339 /* Number of uses recorded in DATA. */
341 static inline unsigned
342 n_iv_uses (struct ivopts_data
*data
)
344 return VEC_length (iv_use_p
, data
->iv_uses
);
347 /* Ith use recorded in DATA. */
349 static inline struct iv_use
*
350 iv_use (struct ivopts_data
*data
, unsigned i
)
352 return VEC_index (iv_use_p
, data
->iv_uses
, i
);
355 /* Number of candidates recorded in DATA. */
357 static inline unsigned
358 n_iv_cands (struct ivopts_data
*data
)
360 return VEC_length (iv_cand_p
, data
->iv_candidates
);
363 /* Ith candidate recorded in DATA. */
365 static inline struct iv_cand
*
366 iv_cand (struct ivopts_data
*data
, unsigned i
)
368 return VEC_index (iv_cand_p
, data
->iv_candidates
, i
);
371 /* The single loop exit if it dominates the latch, NULL otherwise. */
374 single_dom_exit (struct loop
*loop
)
376 edge exit
= single_exit (loop
);
381 if (!just_once_each_iteration_p (loop
, exit
->src
))
387 /* Dumps information about the induction variable IV to FILE. */
389 extern void dump_iv (FILE *, struct iv
*);
391 dump_iv (FILE *file
, struct iv
*iv
)
395 fprintf (file
, "ssa name ");
396 print_generic_expr (file
, iv
->ssa_name
, TDF_SLIM
);
397 fprintf (file
, "\n");
400 fprintf (file
, " type ");
401 print_generic_expr (file
, TREE_TYPE (iv
->base
), TDF_SLIM
);
402 fprintf (file
, "\n");
406 fprintf (file
, " base ");
407 print_generic_expr (file
, iv
->base
, TDF_SLIM
);
408 fprintf (file
, "\n");
410 fprintf (file
, " step ");
411 print_generic_expr (file
, iv
->step
, TDF_SLIM
);
412 fprintf (file
, "\n");
416 fprintf (file
, " invariant ");
417 print_generic_expr (file
, iv
->base
, TDF_SLIM
);
418 fprintf (file
, "\n");
423 fprintf (file
, " base object ");
424 print_generic_expr (file
, iv
->base_object
, TDF_SLIM
);
425 fprintf (file
, "\n");
429 fprintf (file
, " is a biv\n");
432 /* Dumps information about the USE to FILE. */
434 extern void dump_use (FILE *, struct iv_use
*);
436 dump_use (FILE *file
, struct iv_use
*use
)
438 fprintf (file
, "use %d\n", use
->id
);
442 case USE_NONLINEAR_EXPR
:
443 fprintf (file
, " generic\n");
447 fprintf (file
, " address\n");
451 fprintf (file
, " compare\n");
458 fprintf (file
, " in statement ");
459 print_gimple_stmt (file
, use
->stmt
, 0, 0);
460 fprintf (file
, "\n");
462 fprintf (file
, " at position ");
464 print_generic_expr (file
, *use
->op_p
, TDF_SLIM
);
465 fprintf (file
, "\n");
467 dump_iv (file
, use
->iv
);
469 if (use
->related_cands
)
471 fprintf (file
, " related candidates ");
472 dump_bitmap (file
, use
->related_cands
);
476 /* Dumps information about the uses to FILE. */
478 extern void dump_uses (FILE *, struct ivopts_data
*);
480 dump_uses (FILE *file
, struct ivopts_data
*data
)
485 for (i
= 0; i
< n_iv_uses (data
); i
++)
487 use
= iv_use (data
, i
);
489 dump_use (file
, use
);
490 fprintf (file
, "\n");
494 /* Dumps information about induction variable candidate CAND to FILE. */
496 extern void dump_cand (FILE *, struct iv_cand
*);
498 dump_cand (FILE *file
, struct iv_cand
*cand
)
500 struct iv
*iv
= cand
->iv
;
502 fprintf (file
, "candidate %d%s\n",
503 cand
->id
, cand
->important
? " (important)" : "");
505 if (cand
->depends_on
)
507 fprintf (file
, " depends on ");
508 dump_bitmap (file
, cand
->depends_on
);
513 fprintf (file
, " final value replacement\n");
520 fprintf (file
, " incremented before exit test\n");
524 fprintf (file
, " incremented before use %d\n", cand
->ainc_use
->id
);
528 fprintf (file
, " incremented after use %d\n", cand
->ainc_use
->id
);
532 fprintf (file
, " incremented at end\n");
536 fprintf (file
, " original biv\n");
543 /* Returns the info for ssa version VER. */
545 static inline struct version_info
*
546 ver_info (struct ivopts_data
*data
, unsigned ver
)
548 return data
->version_info
+ ver
;
551 /* Returns the info for ssa name NAME. */
553 static inline struct version_info
*
554 name_info (struct ivopts_data
*data
, tree name
)
556 return ver_info (data
, SSA_NAME_VERSION (name
));
559 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
563 stmt_after_ip_normal_pos (struct loop
*loop
, gimple stmt
)
565 basic_block bb
= ip_normal_pos (loop
), sbb
= gimple_bb (stmt
);
569 if (sbb
== loop
->latch
)
575 return stmt
== last_stmt (bb
);
578 /* Returns true if STMT if after the place where the original induction
579 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
580 if the positions are identical. */
583 stmt_after_inc_pos (struct iv_cand
*cand
, gimple stmt
, bool true_if_equal
)
585 basic_block cand_bb
= gimple_bb (cand
->incremented_at
);
586 basic_block stmt_bb
= gimple_bb (stmt
);
588 if (!dominated_by_p (CDI_DOMINATORS
, stmt_bb
, cand_bb
))
591 if (stmt_bb
!= cand_bb
)
595 && gimple_uid (stmt
) == gimple_uid (cand
->incremented_at
))
597 return gimple_uid (stmt
) > gimple_uid (cand
->incremented_at
);
600 /* Returns true if STMT if after the place where the induction variable
601 CAND is incremented in LOOP. */
604 stmt_after_increment (struct loop
*loop
, struct iv_cand
*cand
, gimple stmt
)
612 return stmt_after_ip_normal_pos (loop
, stmt
);
616 return stmt_after_inc_pos (cand
, stmt
, false);
619 return stmt_after_inc_pos (cand
, stmt
, true);
626 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
629 abnormal_ssa_name_p (tree exp
)
634 if (TREE_CODE (exp
) != SSA_NAME
)
637 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp
) != 0;
640 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
641 abnormal phi node. Callback for for_each_index. */
644 idx_contains_abnormal_ssa_name_p (tree base
, tree
*index
,
645 void *data ATTRIBUTE_UNUSED
)
647 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
649 if (abnormal_ssa_name_p (TREE_OPERAND (base
, 2)))
651 if (abnormal_ssa_name_p (TREE_OPERAND (base
, 3)))
655 return !abnormal_ssa_name_p (*index
);
658 /* Returns true if EXPR contains a ssa name that occurs in an
659 abnormal phi node. */
662 contains_abnormal_ssa_name_p (tree expr
)
665 enum tree_code_class codeclass
;
670 code
= TREE_CODE (expr
);
671 codeclass
= TREE_CODE_CLASS (code
);
673 if (code
== SSA_NAME
)
674 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr
) != 0;
676 if (code
== INTEGER_CST
677 || is_gimple_min_invariant (expr
))
680 if (code
== ADDR_EXPR
)
681 return !for_each_index (&TREE_OPERAND (expr
, 0),
682 idx_contains_abnormal_ssa_name_p
,
689 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 1)))
694 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 0)))
706 /* Returns tree describing number of iterations determined from
707 EXIT of DATA->current_loop, or NULL if something goes wrong. */
710 niter_for_exit (struct ivopts_data
*data
, edge exit
)
712 struct tree_niter_desc desc
;
718 data
->niters
= pointer_map_create ();
722 slot
= pointer_map_contains (data
->niters
, exit
);
726 /* Try to determine number of iterations. We must know it
727 unconditionally (i.e., without possibility of # of iterations
728 being zero). Also, we cannot safely work with ssa names that
729 appear in phi nodes on abnormal edges, so that we do not create
730 overlapping life ranges for them (PR 27283). */
731 if (number_of_iterations_exit (data
->current_loop
,
733 && integer_zerop (desc
.may_be_zero
)
734 && !contains_abnormal_ssa_name_p (desc
.niter
))
739 *pointer_map_insert (data
->niters
, exit
) = niter
;
742 niter
= (tree
) *slot
;
747 /* Returns tree describing number of iterations determined from
748 single dominating exit of DATA->current_loop, or NULL if something
752 niter_for_single_dom_exit (struct ivopts_data
*data
)
754 edge exit
= single_dom_exit (data
->current_loop
);
759 return niter_for_exit (data
, exit
);
762 /* Initializes data structures used by the iv optimization pass, stored
766 tree_ssa_iv_optimize_init (struct ivopts_data
*data
)
768 data
->version_info_size
= 2 * num_ssa_names
;
769 data
->version_info
= XCNEWVEC (struct version_info
, data
->version_info_size
);
770 data
->relevant
= BITMAP_ALLOC (NULL
);
771 data
->important_candidates
= BITMAP_ALLOC (NULL
);
772 data
->max_inv_id
= 0;
774 data
->iv_uses
= VEC_alloc (iv_use_p
, heap
, 20);
775 data
->iv_candidates
= VEC_alloc (iv_cand_p
, heap
, 20);
776 decl_rtl_to_reset
= VEC_alloc (tree
, heap
, 20);
779 /* Returns a memory object to that EXPR points. In case we are able to
780 determine that it does not point to any such object, NULL is returned. */
783 determine_base_object (tree expr
)
785 enum tree_code code
= TREE_CODE (expr
);
788 /* If this is a pointer casted to any type, we need to determine
789 the base object for the pointer; so handle conversions before
790 throwing away non-pointer expressions. */
791 if (CONVERT_EXPR_P (expr
))
792 return determine_base_object (TREE_OPERAND (expr
, 0));
794 if (!POINTER_TYPE_P (TREE_TYPE (expr
)))
803 obj
= TREE_OPERAND (expr
, 0);
804 base
= get_base_address (obj
);
809 if (TREE_CODE (base
) == INDIRECT_REF
)
810 return determine_base_object (TREE_OPERAND (base
, 0));
812 return fold_convert (ptr_type_node
,
813 build_fold_addr_expr (base
));
815 case POINTER_PLUS_EXPR
:
816 return determine_base_object (TREE_OPERAND (expr
, 0));
820 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
824 return fold_convert (ptr_type_node
, expr
);
828 /* Allocates an induction variable with given initial value BASE and step STEP
832 alloc_iv (tree base
, tree step
)
834 struct iv
*iv
= XCNEW (struct iv
);
835 gcc_assert (step
!= NULL_TREE
);
838 iv
->base_object
= determine_base_object (base
);
841 iv
->have_use_for
= false;
843 iv
->ssa_name
= NULL_TREE
;
848 /* Sets STEP and BASE for induction variable IV. */
851 set_iv (struct ivopts_data
*data
, tree iv
, tree base
, tree step
)
853 struct version_info
*info
= name_info (data
, iv
);
855 gcc_assert (!info
->iv
);
857 bitmap_set_bit (data
->relevant
, SSA_NAME_VERSION (iv
));
858 info
->iv
= alloc_iv (base
, step
);
859 info
->iv
->ssa_name
= iv
;
862 /* Finds induction variable declaration for VAR. */
865 get_iv (struct ivopts_data
*data
, tree var
)
868 tree type
= TREE_TYPE (var
);
870 if (!POINTER_TYPE_P (type
)
871 && !INTEGRAL_TYPE_P (type
))
874 if (!name_info (data
, var
)->iv
)
876 bb
= gimple_bb (SSA_NAME_DEF_STMT (var
));
879 || !flow_bb_inside_loop_p (data
->current_loop
, bb
))
880 set_iv (data
, var
, var
, build_int_cst (type
, 0));
883 return name_info (data
, var
)->iv
;
886 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
887 not define a simple affine biv with nonzero step. */
890 determine_biv_step (gimple phi
)
892 struct loop
*loop
= gimple_bb (phi
)->loop_father
;
893 tree name
= PHI_RESULT (phi
);
896 if (!is_gimple_reg (name
))
899 if (!simple_iv (loop
, loop
, name
, &iv
, true))
902 return integer_zerop (iv
.step
) ? NULL_TREE
: iv
.step
;
905 /* Finds basic ivs. */
908 find_bivs (struct ivopts_data
*data
)
911 tree step
, type
, base
;
913 struct loop
*loop
= data
->current_loop
;
914 gimple_stmt_iterator psi
;
916 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
918 phi
= gsi_stmt (psi
);
920 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi
)))
923 step
= determine_biv_step (phi
);
927 base
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_preheader_edge (loop
));
928 base
= expand_simple_operations (base
);
929 if (contains_abnormal_ssa_name_p (base
)
930 || contains_abnormal_ssa_name_p (step
))
933 type
= TREE_TYPE (PHI_RESULT (phi
));
934 base
= fold_convert (type
, base
);
937 if (POINTER_TYPE_P (type
))
938 step
= fold_convert (sizetype
, step
);
940 step
= fold_convert (type
, step
);
943 set_iv (data
, PHI_RESULT (phi
), base
, step
);
950 /* Marks basic ivs. */
953 mark_bivs (struct ivopts_data
*data
)
957 struct iv
*iv
, *incr_iv
;
958 struct loop
*loop
= data
->current_loop
;
960 gimple_stmt_iterator psi
;
962 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
964 phi
= gsi_stmt (psi
);
966 iv
= get_iv (data
, PHI_RESULT (phi
));
970 var
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_latch_edge (loop
));
971 incr_iv
= get_iv (data
, var
);
975 /* If the increment is in the subloop, ignore it. */
976 incr_bb
= gimple_bb (SSA_NAME_DEF_STMT (var
));
977 if (incr_bb
->loop_father
!= data
->current_loop
978 || (incr_bb
->flags
& BB_IRREDUCIBLE_LOOP
))
982 incr_iv
->biv_p
= true;
986 /* Checks whether STMT defines a linear induction variable and stores its
990 find_givs_in_stmt_scev (struct ivopts_data
*data
, gimple stmt
, affine_iv
*iv
)
993 struct loop
*loop
= data
->current_loop
;
995 iv
->base
= NULL_TREE
;
996 iv
->step
= NULL_TREE
;
998 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
1001 lhs
= gimple_assign_lhs (stmt
);
1002 if (TREE_CODE (lhs
) != SSA_NAME
)
1005 if (!simple_iv (loop
, loop_containing_stmt (stmt
), lhs
, iv
, true))
1007 iv
->base
= expand_simple_operations (iv
->base
);
1009 if (contains_abnormal_ssa_name_p (iv
->base
)
1010 || contains_abnormal_ssa_name_p (iv
->step
))
1016 /* Finds general ivs in statement STMT. */
1019 find_givs_in_stmt (struct ivopts_data
*data
, gimple stmt
)
1023 if (!find_givs_in_stmt_scev (data
, stmt
, &iv
))
1026 set_iv (data
, gimple_assign_lhs (stmt
), iv
.base
, iv
.step
);
1029 /* Finds general ivs in basic block BB. */
1032 find_givs_in_bb (struct ivopts_data
*data
, basic_block bb
)
1034 gimple_stmt_iterator bsi
;
1036 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
1037 find_givs_in_stmt (data
, gsi_stmt (bsi
));
1040 /* Finds general ivs. */
1043 find_givs (struct ivopts_data
*data
)
1045 struct loop
*loop
= data
->current_loop
;
1046 basic_block
*body
= get_loop_body_in_dom_order (loop
);
1049 for (i
= 0; i
< loop
->num_nodes
; i
++)
1050 find_givs_in_bb (data
, body
[i
]);
1054 /* For each ssa name defined in LOOP determines whether it is an induction
1055 variable and if so, its initial value and step. */
1058 find_induction_variables (struct ivopts_data
*data
)
1063 if (!find_bivs (data
))
1069 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1071 tree niter
= niter_for_single_dom_exit (data
);
1075 fprintf (dump_file
, " number of iterations ");
1076 print_generic_expr (dump_file
, niter
, TDF_SLIM
);
1077 fprintf (dump_file
, "\n\n");
1080 fprintf (dump_file
, "Induction variables:\n\n");
1082 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
1084 if (ver_info (data
, i
)->iv
)
1085 dump_iv (dump_file
, ver_info (data
, i
)->iv
);
1092 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1094 static struct iv_use
*
1095 record_use (struct ivopts_data
*data
, tree
*use_p
, struct iv
*iv
,
1096 gimple stmt
, enum use_type use_type
)
1098 struct iv_use
*use
= XCNEW (struct iv_use
);
1100 use
->id
= n_iv_uses (data
);
1101 use
->type
= use_type
;
1105 use
->related_cands
= BITMAP_ALLOC (NULL
);
1107 /* To avoid showing ssa name in the dumps, if it was not reset by the
1109 iv
->ssa_name
= NULL_TREE
;
1111 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1112 dump_use (dump_file
, use
);
1114 VEC_safe_push (iv_use_p
, heap
, data
->iv_uses
, use
);
1119 /* Checks whether OP is a loop-level invariant and if so, records it.
1120 NONLINEAR_USE is true if the invariant is used in a way we do not
1121 handle specially. */
1124 record_invariant (struct ivopts_data
*data
, tree op
, bool nonlinear_use
)
1127 struct version_info
*info
;
1129 if (TREE_CODE (op
) != SSA_NAME
1130 || !is_gimple_reg (op
))
1133 bb
= gimple_bb (SSA_NAME_DEF_STMT (op
));
1135 && flow_bb_inside_loop_p (data
->current_loop
, bb
))
1138 info
= name_info (data
, op
);
1140 info
->has_nonlin_use
|= nonlinear_use
;
1142 info
->inv_id
= ++data
->max_inv_id
;
1143 bitmap_set_bit (data
->relevant
, SSA_NAME_VERSION (op
));
1146 /* Checks whether the use OP is interesting and if so, records it. */
1148 static struct iv_use
*
1149 find_interesting_uses_op (struct ivopts_data
*data
, tree op
)
1156 if (TREE_CODE (op
) != SSA_NAME
)
1159 iv
= get_iv (data
, op
);
1163 if (iv
->have_use_for
)
1165 use
= iv_use (data
, iv
->use_id
);
1167 gcc_assert (use
->type
== USE_NONLINEAR_EXPR
);
1171 if (integer_zerop (iv
->step
))
1173 record_invariant (data
, op
, true);
1176 iv
->have_use_for
= true;
1178 civ
= XNEW (struct iv
);
1181 stmt
= SSA_NAME_DEF_STMT (op
);
1182 gcc_assert (gimple_code (stmt
) == GIMPLE_PHI
1183 || is_gimple_assign (stmt
));
1185 use
= record_use (data
, NULL
, civ
, stmt
, USE_NONLINEAR_EXPR
);
1186 iv
->use_id
= use
->id
;
1191 /* Given a condition in statement STMT, checks whether it is a compare
1192 of an induction variable and an invariant. If this is the case,
1193 CONTROL_VAR is set to location of the iv, BOUND to the location of
1194 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1195 induction variable descriptions, and true is returned. If this is not
1196 the case, CONTROL_VAR and BOUND are set to the arguments of the
1197 condition and false is returned. */
1200 extract_cond_operands (struct ivopts_data
*data
, gimple stmt
,
1201 tree
**control_var
, tree
**bound
,
1202 struct iv
**iv_var
, struct iv
**iv_bound
)
1204 /* The objects returned when COND has constant operands. */
1205 static struct iv const_iv
;
1207 tree
*op0
= &zero
, *op1
= &zero
, *tmp_op
;
1208 struct iv
*iv0
= &const_iv
, *iv1
= &const_iv
, *tmp_iv
;
1211 if (gimple_code (stmt
) == GIMPLE_COND
)
1213 op0
= gimple_cond_lhs_ptr (stmt
);
1214 op1
= gimple_cond_rhs_ptr (stmt
);
1218 op0
= gimple_assign_rhs1_ptr (stmt
);
1219 op1
= gimple_assign_rhs2_ptr (stmt
);
1222 zero
= integer_zero_node
;
1223 const_iv
.step
= integer_zero_node
;
1225 if (TREE_CODE (*op0
) == SSA_NAME
)
1226 iv0
= get_iv (data
, *op0
);
1227 if (TREE_CODE (*op1
) == SSA_NAME
)
1228 iv1
= get_iv (data
, *op1
);
1230 /* Exactly one of the compared values must be an iv, and the other one must
1235 if (integer_zerop (iv0
->step
))
1237 /* Control variable may be on the other side. */
1238 tmp_op
= op0
; op0
= op1
; op1
= tmp_op
;
1239 tmp_iv
= iv0
; iv0
= iv1
; iv1
= tmp_iv
;
1241 ret
= !integer_zerop (iv0
->step
) && integer_zerop (iv1
->step
);
1245 *control_var
= op0
;;
1256 /* Checks whether the condition in STMT is interesting and if so,
1260 find_interesting_uses_cond (struct ivopts_data
*data
, gimple stmt
)
1262 tree
*var_p
, *bound_p
;
1263 struct iv
*var_iv
, *civ
;
1265 if (!extract_cond_operands (data
, stmt
, &var_p
, &bound_p
, &var_iv
, NULL
))
1267 find_interesting_uses_op (data
, *var_p
);
1268 find_interesting_uses_op (data
, *bound_p
);
1272 civ
= XNEW (struct iv
);
1274 record_use (data
, NULL
, civ
, stmt
, USE_COMPARE
);
1277 /* Returns true if expression EXPR is obviously invariant in LOOP,
1278 i.e. if all its operands are defined outside of the LOOP. LOOP
1279 should not be the function body. */
1282 expr_invariant_in_loop_p (struct loop
*loop
, tree expr
)
1287 gcc_assert (loop_depth (loop
) > 0);
1289 if (is_gimple_min_invariant (expr
))
1292 if (TREE_CODE (expr
) == SSA_NAME
)
1294 def_bb
= gimple_bb (SSA_NAME_DEF_STMT (expr
));
1296 && flow_bb_inside_loop_p (loop
, def_bb
))
1305 len
= TREE_OPERAND_LENGTH (expr
);
1306 for (i
= 0; i
< len
; i
++)
1307 if (!expr_invariant_in_loop_p (loop
, TREE_OPERAND (expr
, i
)))
1313 /* Returns true if statement STMT is obviously invariant in LOOP,
1314 i.e. if all its operands on the RHS are defined outside of the LOOP.
1315 LOOP should not be the function body. */
1318 stmt_invariant_in_loop_p (struct loop
*loop
, gimple stmt
)
1323 gcc_assert (loop_depth (loop
) > 0);
1325 lhs
= gimple_get_lhs (stmt
);
1326 for (i
= 0; i
< gimple_num_ops (stmt
); i
++)
1328 tree op
= gimple_op (stmt
, i
);
1329 if (op
!= lhs
&& !expr_invariant_in_loop_p (loop
, op
))
1336 /* Cumulates the steps of indices into DATA and replaces their values with the
1337 initial ones. Returns false when the value of the index cannot be determined.
1338 Callback for for_each_index. */
1340 struct ifs_ivopts_data
1342 struct ivopts_data
*ivopts_data
;
1348 idx_find_step (tree base
, tree
*idx
, void *data
)
1350 struct ifs_ivopts_data
*dta
= (struct ifs_ivopts_data
*) data
;
1352 tree step
, iv_base
, iv_step
, lbound
, off
;
1353 struct loop
*loop
= dta
->ivopts_data
->current_loop
;
1355 if (TREE_CODE (base
) == MISALIGNED_INDIRECT_REF
1356 || TREE_CODE (base
) == ALIGN_INDIRECT_REF
)
1359 /* If base is a component ref, require that the offset of the reference
1361 if (TREE_CODE (base
) == COMPONENT_REF
)
1363 off
= component_ref_field_offset (base
);
1364 return expr_invariant_in_loop_p (loop
, off
);
1367 /* If base is array, first check whether we will be able to move the
1368 reference out of the loop (in order to take its address in strength
1369 reduction). In order for this to work we need both lower bound
1370 and step to be loop invariants. */
1371 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
1373 /* Moreover, for a range, the size needs to be invariant as well. */
1374 if (TREE_CODE (base
) == ARRAY_RANGE_REF
1375 && !expr_invariant_in_loop_p (loop
, TYPE_SIZE (TREE_TYPE (base
))))
1378 step
= array_ref_element_size (base
);
1379 lbound
= array_ref_low_bound (base
);
1381 if (!expr_invariant_in_loop_p (loop
, step
)
1382 || !expr_invariant_in_loop_p (loop
, lbound
))
1386 if (TREE_CODE (*idx
) != SSA_NAME
)
1389 iv
= get_iv (dta
->ivopts_data
, *idx
);
1393 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1394 *&x[0], which is not folded and does not trigger the
1395 ARRAY_REF path below. */
1398 if (integer_zerop (iv
->step
))
1401 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
1403 step
= array_ref_element_size (base
);
1405 /* We only handle addresses whose step is an integer constant. */
1406 if (TREE_CODE (step
) != INTEGER_CST
)
1410 /* The step for pointer arithmetics already is 1 byte. */
1411 step
= build_int_cst (sizetype
, 1);
1415 if (!convert_affine_scev (dta
->ivopts_data
->current_loop
,
1416 sizetype
, &iv_base
, &iv_step
, dta
->stmt
,
1419 /* The index might wrap. */
1423 step
= fold_build2 (MULT_EXPR
, sizetype
, step
, iv_step
);
1424 dta
->step
= fold_build2 (PLUS_EXPR
, sizetype
, dta
->step
, step
);
1429 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1430 object is passed to it in DATA. */
1433 idx_record_use (tree base
, tree
*idx
,
1436 struct ivopts_data
*data
= (struct ivopts_data
*) vdata
;
1437 find_interesting_uses_op (data
, *idx
);
1438 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
1440 find_interesting_uses_op (data
, array_ref_element_size (base
));
1441 find_interesting_uses_op (data
, array_ref_low_bound (base
));
1446 /* If we can prove that TOP = cst * BOT for some constant cst,
1447 store cst to MUL and return true. Otherwise return false.
1448 The returned value is always sign-extended, regardless of the
1449 signedness of TOP and BOT. */
1452 constant_multiple_of (tree top
, tree bot
, double_int
*mul
)
1455 enum tree_code code
;
1456 double_int res
, p0
, p1
;
1457 unsigned precision
= TYPE_PRECISION (TREE_TYPE (top
));
1462 if (operand_equal_p (top
, bot
, 0))
1464 *mul
= double_int_one
;
1468 code
= TREE_CODE (top
);
1472 mby
= TREE_OPERAND (top
, 1);
1473 if (TREE_CODE (mby
) != INTEGER_CST
)
1476 if (!constant_multiple_of (TREE_OPERAND (top
, 0), bot
, &res
))
1479 *mul
= double_int_sext (double_int_mul (res
, tree_to_double_int (mby
)),
1485 if (!constant_multiple_of (TREE_OPERAND (top
, 0), bot
, &p0
)
1486 || !constant_multiple_of (TREE_OPERAND (top
, 1), bot
, &p1
))
1489 if (code
== MINUS_EXPR
)
1490 p1
= double_int_neg (p1
);
1491 *mul
= double_int_sext (double_int_add (p0
, p1
), precision
);
1495 if (TREE_CODE (bot
) != INTEGER_CST
)
1498 p0
= double_int_sext (tree_to_double_int (top
), precision
);
1499 p1
= double_int_sext (tree_to_double_int (bot
), precision
);
1500 if (double_int_zero_p (p1
))
1502 *mul
= double_int_sext (double_int_sdivmod (p0
, p1
, FLOOR_DIV_EXPR
, &res
),
1504 return double_int_zero_p (res
);
1511 /* Returns true if memory reference REF with step STEP may be unaligned. */
1514 may_be_unaligned_p (tree ref
, tree step
)
1518 HOST_WIDE_INT bitsize
;
1519 HOST_WIDE_INT bitpos
;
1521 enum machine_mode mode
;
1522 int unsignedp
, volatilep
;
1523 unsigned base_align
;
1525 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1526 thus they are not misaligned. */
1527 if (TREE_CODE (ref
) == TARGET_MEM_REF
)
1530 /* The test below is basically copy of what expr.c:normal_inner_ref
1531 does to check whether the object must be loaded by parts when
1532 STRICT_ALIGNMENT is true. */
1533 base
= get_inner_reference (ref
, &bitsize
, &bitpos
, &toffset
, &mode
,
1534 &unsignedp
, &volatilep
, true);
1535 base_type
= TREE_TYPE (base
);
1536 base_align
= TYPE_ALIGN (base_type
);
1538 if (mode
!= BLKmode
)
1541 tree al
= build_int_cst (TREE_TYPE (step
),
1542 GET_MODE_ALIGNMENT (mode
) / BITS_PER_UNIT
);
1544 if (base_align
< GET_MODE_ALIGNMENT (mode
)
1545 || bitpos
% GET_MODE_ALIGNMENT (mode
) != 0
1546 || bitpos
% BITS_PER_UNIT
!= 0)
1549 if (!constant_multiple_of (step
, al
, &mul
))
1556 /* Return true if EXPR may be non-addressable. */
1559 may_be_nonaddressable_p (tree expr
)
1561 switch (TREE_CODE (expr
))
1563 case TARGET_MEM_REF
:
1564 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1565 target, thus they are always addressable. */
1569 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr
, 1))
1570 || may_be_nonaddressable_p (TREE_OPERAND (expr
, 0));
1572 case VIEW_CONVERT_EXPR
:
1573 /* This kind of view-conversions may wrap non-addressable objects
1574 and make them look addressable. After some processing the
1575 non-addressability may be uncovered again, causing ADDR_EXPRs
1576 of inappropriate objects to be built. */
1577 if (is_gimple_reg (TREE_OPERAND (expr
, 0))
1578 || !is_gimple_addressable (TREE_OPERAND (expr
, 0)))
1581 /* ... fall through ... */
1584 case ARRAY_RANGE_REF
:
1585 return may_be_nonaddressable_p (TREE_OPERAND (expr
, 0));
1597 /* Finds addresses in *OP_P inside STMT. */
1600 find_interesting_uses_address (struct ivopts_data
*data
, gimple stmt
, tree
*op_p
)
1602 tree base
= *op_p
, step
= build_int_cst (sizetype
, 0);
1604 struct ifs_ivopts_data ifs_ivopts_data
;
1606 /* Do not play with volatile memory references. A bit too conservative,
1607 perhaps, but safe. */
1608 if (gimple_has_volatile_ops (stmt
))
1611 /* Ignore bitfields for now. Not really something terribly complicated
1613 if (TREE_CODE (base
) == BIT_FIELD_REF
)
1616 base
= unshare_expr (base
);
1618 if (TREE_CODE (base
) == TARGET_MEM_REF
)
1620 tree type
= build_pointer_type (TREE_TYPE (base
));
1624 && TREE_CODE (TMR_BASE (base
)) == SSA_NAME
)
1626 civ
= get_iv (data
, TMR_BASE (base
));
1630 TMR_BASE (base
) = civ
->base
;
1633 if (TMR_INDEX (base
)
1634 && TREE_CODE (TMR_INDEX (base
)) == SSA_NAME
)
1636 civ
= get_iv (data
, TMR_INDEX (base
));
1640 TMR_INDEX (base
) = civ
->base
;
1645 if (TMR_STEP (base
))
1646 astep
= fold_build2 (MULT_EXPR
, type
, TMR_STEP (base
), astep
);
1648 step
= fold_build2 (PLUS_EXPR
, type
, step
, astep
);
1652 if (integer_zerop (step
))
1654 base
= tree_mem_ref_addr (type
, base
);
1658 ifs_ivopts_data
.ivopts_data
= data
;
1659 ifs_ivopts_data
.stmt
= stmt
;
1660 ifs_ivopts_data
.step
= build_int_cst (sizetype
, 0);
1661 if (!for_each_index (&base
, idx_find_step
, &ifs_ivopts_data
)
1662 || integer_zerop (ifs_ivopts_data
.step
))
1664 step
= ifs_ivopts_data
.step
;
1666 gcc_assert (TREE_CODE (base
) != ALIGN_INDIRECT_REF
);
1667 gcc_assert (TREE_CODE (base
) != MISALIGNED_INDIRECT_REF
);
1669 /* Check that the base expression is addressable. This needs
1670 to be done after substituting bases of IVs into it. */
1671 if (may_be_nonaddressable_p (base
))
1674 /* Moreover, on strict alignment platforms, check that it is
1675 sufficiently aligned. */
1676 if (STRICT_ALIGNMENT
&& may_be_unaligned_p (base
, step
))
1679 base
= build_fold_addr_expr (base
);
1681 /* Substituting bases of IVs into the base expression might
1682 have caused folding opportunities. */
1683 if (TREE_CODE (base
) == ADDR_EXPR
)
1685 tree
*ref
= &TREE_OPERAND (base
, 0);
1686 while (handled_component_p (*ref
))
1687 ref
= &TREE_OPERAND (*ref
, 0);
1688 if (TREE_CODE (*ref
) == INDIRECT_REF
)
1689 *ref
= fold_indirect_ref (*ref
);
1693 civ
= alloc_iv (base
, step
);
1694 record_use (data
, op_p
, civ
, stmt
, USE_ADDRESS
);
1698 for_each_index (op_p
, idx_record_use
, data
);
1701 /* Finds and records invariants used in STMT. */
1704 find_invariants_stmt (struct ivopts_data
*data
, gimple stmt
)
1707 use_operand_p use_p
;
1710 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
1712 op
= USE_FROM_PTR (use_p
);
1713 record_invariant (data
, op
, false);
1717 /* Finds interesting uses of induction variables in the statement STMT. */
1720 find_interesting_uses_stmt (struct ivopts_data
*data
, gimple stmt
)
1723 tree op
, *lhs
, *rhs
;
1725 use_operand_p use_p
;
1726 enum tree_code code
;
1728 find_invariants_stmt (data
, stmt
);
1730 if (gimple_code (stmt
) == GIMPLE_COND
)
1732 find_interesting_uses_cond (data
, stmt
);
1736 if (is_gimple_assign (stmt
))
1738 lhs
= gimple_assign_lhs_ptr (stmt
);
1739 rhs
= gimple_assign_rhs1_ptr (stmt
);
1741 if (TREE_CODE (*lhs
) == SSA_NAME
)
1743 /* If the statement defines an induction variable, the uses are not
1744 interesting by themselves. */
1746 iv
= get_iv (data
, *lhs
);
1748 if (iv
&& !integer_zerop (iv
->step
))
1752 code
= gimple_assign_rhs_code (stmt
);
1753 if (get_gimple_rhs_class (code
) == GIMPLE_SINGLE_RHS
1754 && (REFERENCE_CLASS_P (*rhs
)
1755 || is_gimple_val (*rhs
)))
1757 if (REFERENCE_CLASS_P (*rhs
))
1758 find_interesting_uses_address (data
, stmt
, rhs
);
1760 find_interesting_uses_op (data
, *rhs
);
1762 if (REFERENCE_CLASS_P (*lhs
))
1763 find_interesting_uses_address (data
, stmt
, lhs
);
1766 else if (TREE_CODE_CLASS (code
) == tcc_comparison
)
1768 find_interesting_uses_cond (data
, stmt
);
1772 /* TODO -- we should also handle address uses of type
1774 memory = call (whatever);
1781 if (gimple_code (stmt
) == GIMPLE_PHI
1782 && gimple_bb (stmt
) == data
->current_loop
->header
)
1784 iv
= get_iv (data
, PHI_RESULT (stmt
));
1786 if (iv
&& !integer_zerop (iv
->step
))
1790 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
1792 op
= USE_FROM_PTR (use_p
);
1794 if (TREE_CODE (op
) != SSA_NAME
)
1797 iv
= get_iv (data
, op
);
1801 find_interesting_uses_op (data
, op
);
1805 /* Finds interesting uses of induction variables outside of loops
1806 on loop exit edge EXIT. */
1809 find_interesting_uses_outside (struct ivopts_data
*data
, edge exit
)
1812 gimple_stmt_iterator psi
;
1815 for (psi
= gsi_start_phis (exit
->dest
); !gsi_end_p (psi
); gsi_next (&psi
))
1817 phi
= gsi_stmt (psi
);
1818 def
= PHI_ARG_DEF_FROM_EDGE (phi
, exit
);
1819 if (is_gimple_reg (def
))
1820 find_interesting_uses_op (data
, def
);
1824 /* Finds uses of the induction variables that are interesting. */
1827 find_interesting_uses (struct ivopts_data
*data
)
1830 gimple_stmt_iterator bsi
;
1831 basic_block
*body
= get_loop_body (data
->current_loop
);
1833 struct version_info
*info
;
1836 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1837 fprintf (dump_file
, "Uses:\n\n");
1839 for (i
= 0; i
< data
->current_loop
->num_nodes
; i
++)
1844 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
1845 if (e
->dest
!= EXIT_BLOCK_PTR
1846 && !flow_bb_inside_loop_p (data
->current_loop
, e
->dest
))
1847 find_interesting_uses_outside (data
, e
);
1849 for (bsi
= gsi_start_phis (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
1850 find_interesting_uses_stmt (data
, gsi_stmt (bsi
));
1851 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
1852 find_interesting_uses_stmt (data
, gsi_stmt (bsi
));
1855 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1859 fprintf (dump_file
, "\n");
1861 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
1863 info
= ver_info (data
, i
);
1866 fprintf (dump_file
, " ");
1867 print_generic_expr (dump_file
, info
->name
, TDF_SLIM
);
1868 fprintf (dump_file
, " is invariant (%d)%s\n",
1869 info
->inv_id
, info
->has_nonlin_use
? "" : ", eliminable");
1873 fprintf (dump_file
, "\n");
1879 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
1880 is true, assume we are inside an address. If TOP_COMPREF is true, assume
1881 we are at the top-level of the processed address. */
1884 strip_offset_1 (tree expr
, bool inside_addr
, bool top_compref
,
1885 unsigned HOST_WIDE_INT
*offset
)
1887 tree op0
= NULL_TREE
, op1
= NULL_TREE
, tmp
, step
;
1888 enum tree_code code
;
1889 tree type
, orig_type
= TREE_TYPE (expr
);
1890 unsigned HOST_WIDE_INT off0
, off1
, st
;
1891 tree orig_expr
= expr
;
1895 type
= TREE_TYPE (expr
);
1896 code
= TREE_CODE (expr
);
1902 if (!cst_and_fits_in_hwi (expr
)
1903 || integer_zerop (expr
))
1906 *offset
= int_cst_value (expr
);
1907 return build_int_cst (orig_type
, 0);
1909 case POINTER_PLUS_EXPR
:
1912 op0
= TREE_OPERAND (expr
, 0);
1913 op1
= TREE_OPERAND (expr
, 1);
1915 op0
= strip_offset_1 (op0
, false, false, &off0
);
1916 op1
= strip_offset_1 (op1
, false, false, &off1
);
1918 *offset
= (code
== MINUS_EXPR
? off0
- off1
: off0
+ off1
);
1919 if (op0
== TREE_OPERAND (expr
, 0)
1920 && op1
== TREE_OPERAND (expr
, 1))
1923 if (integer_zerop (op1
))
1925 else if (integer_zerop (op0
))
1927 if (code
== MINUS_EXPR
)
1928 expr
= fold_build1 (NEGATE_EXPR
, type
, op1
);
1933 expr
= fold_build2 (code
, type
, op0
, op1
);
1935 return fold_convert (orig_type
, expr
);
1938 op1
= TREE_OPERAND (expr
, 1);
1939 if (!cst_and_fits_in_hwi (op1
))
1942 op0
= TREE_OPERAND (expr
, 0);
1943 op0
= strip_offset_1 (op0
, false, false, &off0
);
1944 if (op0
== TREE_OPERAND (expr
, 0))
1947 *offset
= off0
* int_cst_value (op1
);
1948 if (integer_zerop (op0
))
1951 expr
= fold_build2 (MULT_EXPR
, type
, op0
, op1
);
1953 return fold_convert (orig_type
, expr
);
1956 case ARRAY_RANGE_REF
:
1960 step
= array_ref_element_size (expr
);
1961 if (!cst_and_fits_in_hwi (step
))
1964 st
= int_cst_value (step
);
1965 op1
= TREE_OPERAND (expr
, 1);
1966 op1
= strip_offset_1 (op1
, false, false, &off1
);
1967 *offset
= off1
* st
;
1970 && integer_zerop (op1
))
1972 /* Strip the component reference completely. */
1973 op0
= TREE_OPERAND (expr
, 0);
1974 op0
= strip_offset_1 (op0
, inside_addr
, top_compref
, &off0
);
1984 tmp
= component_ref_field_offset (expr
);
1986 && cst_and_fits_in_hwi (tmp
))
1988 /* Strip the component reference completely. */
1989 op0
= TREE_OPERAND (expr
, 0);
1990 op0
= strip_offset_1 (op0
, inside_addr
, top_compref
, &off0
);
1991 *offset
= off0
+ int_cst_value (tmp
);
1997 op0
= TREE_OPERAND (expr
, 0);
1998 op0
= strip_offset_1 (op0
, true, true, &off0
);
2001 if (op0
== TREE_OPERAND (expr
, 0))
2004 expr
= build_fold_addr_expr (op0
);
2005 return fold_convert (orig_type
, expr
);
2008 inside_addr
= false;
2015 /* Default handling of expressions for that we want to recurse into
2016 the first operand. */
2017 op0
= TREE_OPERAND (expr
, 0);
2018 op0
= strip_offset_1 (op0
, inside_addr
, false, &off0
);
2021 if (op0
== TREE_OPERAND (expr
, 0)
2022 && (!op1
|| op1
== TREE_OPERAND (expr
, 1)))
2025 expr
= copy_node (expr
);
2026 TREE_OPERAND (expr
, 0) = op0
;
2028 TREE_OPERAND (expr
, 1) = op1
;
2030 /* Inside address, we might strip the top level component references,
2031 thus changing type of the expression. Handling of ADDR_EXPR
2033 expr
= fold_convert (orig_type
, expr
);
2038 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2041 strip_offset (tree expr
, unsigned HOST_WIDE_INT
*offset
)
2043 return strip_offset_1 (expr
, false, false, offset
);
2046 /* Returns variant of TYPE that can be used as base for different uses.
2047 We return unsigned type with the same precision, which avoids problems
2051 generic_type_for (tree type
)
2053 if (POINTER_TYPE_P (type
))
2054 return unsigned_type_for (type
);
2056 if (TYPE_UNSIGNED (type
))
2059 return unsigned_type_for (type
);
2062 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2063 the bitmap to that we should store it. */
2065 static struct ivopts_data
*fd_ivopts_data
;
2067 find_depends (tree
*expr_p
, int *ws ATTRIBUTE_UNUSED
, void *data
)
2069 bitmap
*depends_on
= (bitmap
*) data
;
2070 struct version_info
*info
;
2072 if (TREE_CODE (*expr_p
) != SSA_NAME
)
2074 info
= name_info (fd_ivopts_data
, *expr_p
);
2076 if (!info
->inv_id
|| info
->has_nonlin_use
)
2080 *depends_on
= BITMAP_ALLOC (NULL
);
2081 bitmap_set_bit (*depends_on
, info
->inv_id
);
2086 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2087 position to POS. If USE is not NULL, the candidate is set as related to
2088 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2089 replacement of the final value of the iv by a direct computation. */
2091 static struct iv_cand
*
2092 add_candidate_1 (struct ivopts_data
*data
,
2093 tree base
, tree step
, bool important
, enum iv_position pos
,
2094 struct iv_use
*use
, gimple incremented_at
)
2097 struct iv_cand
*cand
= NULL
;
2098 tree type
, orig_type
;
2102 orig_type
= TREE_TYPE (base
);
2103 type
= generic_type_for (orig_type
);
2104 if (type
!= orig_type
)
2106 base
= fold_convert (type
, base
);
2107 step
= fold_convert (type
, step
);
2111 for (i
= 0; i
< n_iv_cands (data
); i
++)
2113 cand
= iv_cand (data
, i
);
2115 if (cand
->pos
!= pos
)
2118 if (cand
->incremented_at
!= incremented_at
2119 || ((pos
== IP_AFTER_USE
|| pos
== IP_BEFORE_USE
)
2120 && cand
->ainc_use
!= use
))
2134 if (operand_equal_p (base
, cand
->iv
->base
, 0)
2135 && operand_equal_p (step
, cand
->iv
->step
, 0))
2139 if (i
== n_iv_cands (data
))
2141 cand
= XCNEW (struct iv_cand
);
2147 cand
->iv
= alloc_iv (base
, step
);
2150 if (pos
!= IP_ORIGINAL
&& cand
->iv
)
2152 cand
->var_before
= create_tmp_var_raw (TREE_TYPE (base
), "ivtmp");
2153 cand
->var_after
= cand
->var_before
;
2155 cand
->important
= important
;
2156 cand
->incremented_at
= incremented_at
;
2157 VEC_safe_push (iv_cand_p
, heap
, data
->iv_candidates
, cand
);
2160 && TREE_CODE (step
) != INTEGER_CST
)
2162 fd_ivopts_data
= data
;
2163 walk_tree (&step
, find_depends
, &cand
->depends_on
, NULL
);
2166 if (pos
== IP_AFTER_USE
|| pos
== IP_BEFORE_USE
)
2167 cand
->ainc_use
= use
;
2169 cand
->ainc_use
= NULL
;
2171 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2172 dump_cand (dump_file
, cand
);
2175 if (important
&& !cand
->important
)
2177 cand
->important
= true;
2178 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2179 fprintf (dump_file
, "Candidate %d is important\n", cand
->id
);
2184 bitmap_set_bit (use
->related_cands
, i
);
2185 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2186 fprintf (dump_file
, "Candidate %d is related to use %d\n",
2193 /* Returns true if incrementing the induction variable at the end of the LOOP
2196 The purpose is to avoid splitting latch edge with a biv increment, thus
2197 creating a jump, possibly confusing other optimization passes and leaving
2198 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2199 is not available (so we do not have a better alternative), or if the latch
2200 edge is already nonempty. */
2203 allow_ip_end_pos_p (struct loop
*loop
)
2205 if (!ip_normal_pos (loop
))
2208 if (!empty_block_p (ip_end_pos (loop
)))
2214 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2215 Important field is set to IMPORTANT. */
2218 add_autoinc_candidates (struct ivopts_data
*data
, tree base
, tree step
,
2219 bool important
, struct iv_use
*use
)
2221 basic_block use_bb
= gimple_bb (use
->stmt
);
2222 enum machine_mode mem_mode
;
2223 unsigned HOST_WIDE_INT cstepi
;
2225 /* If we insert the increment in any position other than the standard
2226 ones, we must ensure that it is incremented once per iteration.
2227 It must not be in an inner nested loop, or one side of an if
2229 if (use_bb
->loop_father
!= data
->current_loop
2230 || !dominated_by_p (CDI_DOMINATORS
, data
->current_loop
->latch
, use_bb
)
2231 || stmt_could_throw_p (use
->stmt
)
2232 || !cst_and_fits_in_hwi (step
))
2235 cstepi
= int_cst_value (step
);
2237 mem_mode
= TYPE_MODE (TREE_TYPE (*use
->op_p
));
2238 if ((HAVE_PRE_INCREMENT
&& GET_MODE_SIZE (mem_mode
) == cstepi
)
2239 || (HAVE_PRE_DECREMENT
&& GET_MODE_SIZE (mem_mode
) == -cstepi
))
2241 enum tree_code code
= MINUS_EXPR
;
2243 tree new_step
= step
;
2245 if (POINTER_TYPE_P (TREE_TYPE (base
)))
2247 new_step
= fold_build1 (NEGATE_EXPR
, TREE_TYPE (step
), step
);
2248 code
= POINTER_PLUS_EXPR
;
2251 new_step
= fold_convert (TREE_TYPE (base
), new_step
);
2252 new_base
= fold_build2 (code
, TREE_TYPE (base
), base
, new_step
);
2253 add_candidate_1 (data
, new_base
, step
, important
, IP_BEFORE_USE
, use
,
2256 if ((HAVE_POST_INCREMENT
&& GET_MODE_SIZE (mem_mode
) == cstepi
)
2257 || (HAVE_POST_DECREMENT
&& GET_MODE_SIZE (mem_mode
) == -cstepi
))
2259 add_candidate_1 (data
, base
, step
, important
, IP_AFTER_USE
, use
,
2264 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2265 position to POS. If USE is not NULL, the candidate is set as related to
2266 it. The candidate computation is scheduled on all available positions. */
2269 add_candidate (struct ivopts_data
*data
,
2270 tree base
, tree step
, bool important
, struct iv_use
*use
)
2272 if (ip_normal_pos (data
->current_loop
))
2273 add_candidate_1 (data
, base
, step
, important
, IP_NORMAL
, use
, NULL
);
2274 if (ip_end_pos (data
->current_loop
)
2275 && allow_ip_end_pos_p (data
->current_loop
))
2276 add_candidate_1 (data
, base
, step
, important
, IP_END
, use
, NULL
);
2278 if (use
!= NULL
&& use
->type
== USE_ADDRESS
)
2279 add_autoinc_candidates (data
, base
, step
, important
, use
);
2282 /* Add a standard "0 + 1 * iteration" iv candidate for a
2283 type with SIZE bits. */
2286 add_standard_iv_candidates_for_size (struct ivopts_data
*data
,
2289 tree type
= lang_hooks
.types
.type_for_size (size
, true);
2290 add_candidate (data
, build_int_cst (type
, 0), build_int_cst (type
, 1),
2294 /* Adds standard iv candidates. */
2297 add_standard_iv_candidates (struct ivopts_data
*data
)
2299 add_standard_iv_candidates_for_size (data
, INT_TYPE_SIZE
);
2301 /* The same for a double-integer type if it is still fast enough. */
2302 if (BITS_PER_WORD
>= INT_TYPE_SIZE
* 2)
2303 add_standard_iv_candidates_for_size (data
, INT_TYPE_SIZE
* 2);
2307 /* Adds candidates bases on the old induction variable IV. */
2310 add_old_iv_candidates (struct ivopts_data
*data
, struct iv
*iv
)
2314 struct iv_cand
*cand
;
2316 add_candidate (data
, iv
->base
, iv
->step
, true, NULL
);
2318 /* The same, but with initial value zero. */
2319 if (POINTER_TYPE_P (TREE_TYPE (iv
->base
)))
2320 add_candidate (data
, size_int (0), iv
->step
, true, NULL
);
2322 add_candidate (data
, build_int_cst (TREE_TYPE (iv
->base
), 0),
2323 iv
->step
, true, NULL
);
2325 phi
= SSA_NAME_DEF_STMT (iv
->ssa_name
);
2326 if (gimple_code (phi
) == GIMPLE_PHI
)
2328 /* Additionally record the possibility of leaving the original iv
2330 def
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_latch_edge (data
->current_loop
));
2331 cand
= add_candidate_1 (data
,
2332 iv
->base
, iv
->step
, true, IP_ORIGINAL
, NULL
,
2333 SSA_NAME_DEF_STMT (def
));
2334 cand
->var_before
= iv
->ssa_name
;
2335 cand
->var_after
= def
;
2339 /* Adds candidates based on the old induction variables. */
2342 add_old_ivs_candidates (struct ivopts_data
*data
)
2348 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
2350 iv
= ver_info (data
, i
)->iv
;
2351 if (iv
&& iv
->biv_p
&& !integer_zerop (iv
->step
))
2352 add_old_iv_candidates (data
, iv
);
2356 /* Adds candidates based on the value of the induction variable IV and USE. */
2359 add_iv_value_candidates (struct ivopts_data
*data
,
2360 struct iv
*iv
, struct iv_use
*use
)
2362 unsigned HOST_WIDE_INT offset
;
2366 add_candidate (data
, iv
->base
, iv
->step
, false, use
);
2368 /* The same, but with initial value zero. Make such variable important,
2369 since it is generic enough so that possibly many uses may be based
2371 basetype
= TREE_TYPE (iv
->base
);
2372 if (POINTER_TYPE_P (basetype
))
2373 basetype
= sizetype
;
2374 add_candidate (data
, build_int_cst (basetype
, 0),
2375 iv
->step
, true, use
);
2377 /* Third, try removing the constant offset. Make sure to even
2378 add a candidate for &a[0] vs. (T *)&a. */
2379 base
= strip_offset (iv
->base
, &offset
);
2381 || base
!= iv
->base
)
2382 add_candidate (data
, base
, iv
->step
, false, use
);
2385 /* Adds candidates based on the uses. */
2388 add_derived_ivs_candidates (struct ivopts_data
*data
)
2392 for (i
= 0; i
< n_iv_uses (data
); i
++)
2394 struct iv_use
*use
= iv_use (data
, i
);
2401 case USE_NONLINEAR_EXPR
:
2404 /* Just add the ivs based on the value of the iv used here. */
2405 add_iv_value_candidates (data
, use
->iv
, use
);
2414 /* Record important candidates and add them to related_cands bitmaps
2418 record_important_candidates (struct ivopts_data
*data
)
2423 for (i
= 0; i
< n_iv_cands (data
); i
++)
2425 struct iv_cand
*cand
= iv_cand (data
, i
);
2427 if (cand
->important
)
2428 bitmap_set_bit (data
->important_candidates
, i
);
2431 data
->consider_all_candidates
= (n_iv_cands (data
)
2432 <= CONSIDER_ALL_CANDIDATES_BOUND
);
2434 if (data
->consider_all_candidates
)
2436 /* We will not need "related_cands" bitmaps in this case,
2437 so release them to decrease peak memory consumption. */
2438 for (i
= 0; i
< n_iv_uses (data
); i
++)
2440 use
= iv_use (data
, i
);
2441 BITMAP_FREE (use
->related_cands
);
2446 /* Add important candidates to the related_cands bitmaps. */
2447 for (i
= 0; i
< n_iv_uses (data
); i
++)
2448 bitmap_ior_into (iv_use (data
, i
)->related_cands
,
2449 data
->important_candidates
);
2453 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2454 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2455 we allocate a simple list to every use. */
2458 alloc_use_cost_map (struct ivopts_data
*data
)
2460 unsigned i
, size
, s
, j
;
2462 for (i
= 0; i
< n_iv_uses (data
); i
++)
2464 struct iv_use
*use
= iv_use (data
, i
);
2467 if (data
->consider_all_candidates
)
2468 size
= n_iv_cands (data
);
2472 EXECUTE_IF_SET_IN_BITMAP (use
->related_cands
, 0, j
, bi
)
2477 /* Round up to the power of two, so that moduling by it is fast. */
2478 for (size
= 1; size
< s
; size
<<= 1)
2482 use
->n_map_members
= size
;
2483 use
->cost_map
= XCNEWVEC (struct cost_pair
, size
);
2487 /* Returns description of computation cost of expression whose runtime
2488 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
2491 new_cost (unsigned runtime
, unsigned complexity
)
2495 cost
.cost
= runtime
;
2496 cost
.complexity
= complexity
;
2501 /* Adds costs COST1 and COST2. */
2504 add_costs (comp_cost cost1
, comp_cost cost2
)
2506 cost1
.cost
+= cost2
.cost
;
2507 cost1
.complexity
+= cost2
.complexity
;
2511 /* Subtracts costs COST1 and COST2. */
2514 sub_costs (comp_cost cost1
, comp_cost cost2
)
2516 cost1
.cost
-= cost2
.cost
;
2517 cost1
.complexity
-= cost2
.complexity
;
2522 /* Returns a negative number if COST1 < COST2, a positive number if
2523 COST1 > COST2, and 0 if COST1 = COST2. */
2526 compare_costs (comp_cost cost1
, comp_cost cost2
)
2528 if (cost1
.cost
== cost2
.cost
)
2529 return cost1
.complexity
- cost2
.complexity
;
2531 return cost1
.cost
- cost2
.cost
;
2534 /* Returns true if COST is infinite. */
2537 infinite_cost_p (comp_cost cost
)
2539 return cost
.cost
== INFTY
;
2542 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2543 on invariants DEPENDS_ON and that the value used in expressing it
2547 set_use_iv_cost (struct ivopts_data
*data
,
2548 struct iv_use
*use
, struct iv_cand
*cand
,
2549 comp_cost cost
, bitmap depends_on
, tree value
)
2553 if (infinite_cost_p (cost
))
2555 BITMAP_FREE (depends_on
);
2559 if (data
->consider_all_candidates
)
2561 use
->cost_map
[cand
->id
].cand
= cand
;
2562 use
->cost_map
[cand
->id
].cost
= cost
;
2563 use
->cost_map
[cand
->id
].depends_on
= depends_on
;
2564 use
->cost_map
[cand
->id
].value
= value
;
2568 /* n_map_members is a power of two, so this computes modulo. */
2569 s
= cand
->id
& (use
->n_map_members
- 1);
2570 for (i
= s
; i
< use
->n_map_members
; i
++)
2571 if (!use
->cost_map
[i
].cand
)
2573 for (i
= 0; i
< s
; i
++)
2574 if (!use
->cost_map
[i
].cand
)
2580 use
->cost_map
[i
].cand
= cand
;
2581 use
->cost_map
[i
].cost
= cost
;
2582 use
->cost_map
[i
].depends_on
= depends_on
;
2583 use
->cost_map
[i
].value
= value
;
2586 /* Gets cost of (USE, CANDIDATE) pair. */
2588 static struct cost_pair
*
2589 get_use_iv_cost (struct ivopts_data
*data
, struct iv_use
*use
,
2590 struct iv_cand
*cand
)
2593 struct cost_pair
*ret
;
2598 if (data
->consider_all_candidates
)
2600 ret
= use
->cost_map
+ cand
->id
;
2607 /* n_map_members is a power of two, so this computes modulo. */
2608 s
= cand
->id
& (use
->n_map_members
- 1);
2609 for (i
= s
; i
< use
->n_map_members
; i
++)
2610 if (use
->cost_map
[i
].cand
== cand
)
2611 return use
->cost_map
+ i
;
2613 for (i
= 0; i
< s
; i
++)
2614 if (use
->cost_map
[i
].cand
== cand
)
2615 return use
->cost_map
+ i
;
2620 /* Returns estimate on cost of computing SEQ. */
2623 seq_cost (rtx seq
, bool speed
)
2628 for (; seq
; seq
= NEXT_INSN (seq
))
2630 set
= single_set (seq
);
2632 cost
+= rtx_cost (set
, SET
,speed
);
2640 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2642 produce_memory_decl_rtl (tree obj
, int *regno
)
2647 if (TREE_STATIC (obj
) || DECL_EXTERNAL (obj
))
2649 const char *name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj
));
2650 x
= gen_rtx_SYMBOL_REF (Pmode
, name
);
2651 SET_SYMBOL_REF_DECL (x
, obj
);
2652 x
= gen_rtx_MEM (DECL_MODE (obj
), x
);
2653 targetm
.encode_section_info (obj
, x
, true);
2657 x
= gen_raw_REG (Pmode
, (*regno
)++);
2658 x
= gen_rtx_MEM (DECL_MODE (obj
), x
);
2664 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2665 walk_tree. DATA contains the actual fake register number. */
2668 prepare_decl_rtl (tree
*expr_p
, int *ws
, void *data
)
2670 tree obj
= NULL_TREE
;
2672 int *regno
= (int *) data
;
2674 switch (TREE_CODE (*expr_p
))
2677 for (expr_p
= &TREE_OPERAND (*expr_p
, 0);
2678 handled_component_p (*expr_p
);
2679 expr_p
= &TREE_OPERAND (*expr_p
, 0))
2682 if (DECL_P (obj
) && !DECL_RTL_SET_P (obj
))
2683 x
= produce_memory_decl_rtl (obj
, regno
);
2688 obj
= SSA_NAME_VAR (*expr_p
);
2689 if (!DECL_RTL_SET_P (obj
))
2690 x
= gen_raw_REG (DECL_MODE (obj
), (*regno
)++);
2699 if (DECL_RTL_SET_P (obj
))
2702 if (DECL_MODE (obj
) == BLKmode
)
2703 x
= produce_memory_decl_rtl (obj
, regno
);
2705 x
= gen_raw_REG (DECL_MODE (obj
), (*regno
)++);
2715 VEC_safe_push (tree
, heap
, decl_rtl_to_reset
, obj
);
2716 SET_DECL_RTL (obj
, x
);
2722 /* Determines cost of the computation of EXPR. */
2725 computation_cost (tree expr
, bool speed
)
2728 tree type
= TREE_TYPE (expr
);
2730 /* Avoid using hard regs in ways which may be unsupported. */
2731 int regno
= LAST_VIRTUAL_REGISTER
+ 1;
2732 enum function_frequency real_frequency
= cfun
->function_frequency
;
2734 cfun
->function_frequency
= FUNCTION_FREQUENCY_NORMAL
;
2735 crtl
->maybe_hot_insn_p
= speed
;
2736 walk_tree (&expr
, prepare_decl_rtl
, ®no
, NULL
);
2738 rslt
= expand_expr (expr
, NULL_RTX
, TYPE_MODE (type
), EXPAND_NORMAL
);
2741 default_rtl_profile ();
2742 cfun
->function_frequency
= real_frequency
;
2744 cost
= seq_cost (seq
, speed
);
2746 cost
+= address_cost (XEXP (rslt
, 0), TYPE_MODE (type
), speed
);
2751 /* Returns variable containing the value of candidate CAND at statement AT. */
2754 var_at_stmt (struct loop
*loop
, struct iv_cand
*cand
, gimple stmt
)
2756 if (stmt_after_increment (loop
, cand
, stmt
))
2757 return cand
->var_after
;
2759 return cand
->var_before
;
2762 /* Return the most significant (sign) bit of T. Similar to tree_int_cst_msb,
2763 but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE. */
2766 tree_int_cst_sign_bit (const_tree t
)
2768 unsigned bitno
= TYPE_PRECISION (TREE_TYPE (t
)) - 1;
2769 unsigned HOST_WIDE_INT w
;
2771 if (bitno
< HOST_BITS_PER_WIDE_INT
)
2772 w
= TREE_INT_CST_LOW (t
);
2775 w
= TREE_INT_CST_HIGH (t
);
2776 bitno
-= HOST_BITS_PER_WIDE_INT
;
2779 return (w
>> bitno
) & 1;
2782 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2783 same precision that is at least as wide as the precision of TYPE, stores
2784 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
2788 determine_common_wider_type (tree
*a
, tree
*b
)
2790 tree wider_type
= NULL
;
2792 tree atype
= TREE_TYPE (*a
);
2794 if (CONVERT_EXPR_P (*a
))
2796 suba
= TREE_OPERAND (*a
, 0);
2797 wider_type
= TREE_TYPE (suba
);
2798 if (TYPE_PRECISION (wider_type
) < TYPE_PRECISION (atype
))
2804 if (CONVERT_EXPR_P (*b
))
2806 subb
= TREE_OPERAND (*b
, 0);
2807 if (TYPE_PRECISION (wider_type
) != TYPE_PRECISION (TREE_TYPE (subb
)))
2818 /* Determines the expression by that USE is expressed from induction variable
2819 CAND at statement AT in LOOP. The expression is stored in a decomposed
2820 form into AFF. Returns false if USE cannot be expressed using CAND. */
2823 get_computation_aff (struct loop
*loop
,
2824 struct iv_use
*use
, struct iv_cand
*cand
, gimple at
,
2825 struct affine_tree_combination
*aff
)
2827 tree ubase
= use
->iv
->base
;
2828 tree ustep
= use
->iv
->step
;
2829 tree cbase
= cand
->iv
->base
;
2830 tree cstep
= cand
->iv
->step
, cstep_common
;
2831 tree utype
= TREE_TYPE (ubase
), ctype
= TREE_TYPE (cbase
);
2832 tree common_type
, var
;
2834 aff_tree cbase_aff
, var_aff
;
2837 if (TYPE_PRECISION (utype
) > TYPE_PRECISION (ctype
))
2839 /* We do not have a precision to express the values of use. */
2843 var
= var_at_stmt (loop
, cand
, at
);
2844 uutype
= unsigned_type_for (utype
);
2846 /* If the conversion is not noop, perform it. */
2847 if (TYPE_PRECISION (utype
) < TYPE_PRECISION (ctype
))
2849 cstep
= fold_convert (uutype
, cstep
);
2850 cbase
= fold_convert (uutype
, cbase
);
2851 var
= fold_convert (uutype
, var
);
2854 if (!constant_multiple_of (ustep
, cstep
, &rat
))
2857 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
2858 type, we achieve better folding by computing their difference in this
2859 wider type, and cast the result to UUTYPE. We do not need to worry about
2860 overflows, as all the arithmetics will in the end be performed in UUTYPE
2862 common_type
= determine_common_wider_type (&ubase
, &cbase
);
2864 /* use = ubase - ratio * cbase + ratio * var. */
2865 tree_to_aff_combination (ubase
, common_type
, aff
);
2866 tree_to_aff_combination (cbase
, common_type
, &cbase_aff
);
2867 tree_to_aff_combination (var
, uutype
, &var_aff
);
2869 /* We need to shift the value if we are after the increment. */
2870 if (stmt_after_increment (loop
, cand
, at
))
2874 if (common_type
!= uutype
)
2875 cstep_common
= fold_convert (common_type
, cstep
);
2877 cstep_common
= cstep
;
2879 tree_to_aff_combination (cstep_common
, common_type
, &cstep_aff
);
2880 aff_combination_add (&cbase_aff
, &cstep_aff
);
2883 aff_combination_scale (&cbase_aff
, double_int_neg (rat
));
2884 aff_combination_add (aff
, &cbase_aff
);
2885 if (common_type
!= uutype
)
2886 aff_combination_convert (aff
, uutype
);
2888 aff_combination_scale (&var_aff
, rat
);
2889 aff_combination_add (aff
, &var_aff
);
2894 /* Determines the expression by that USE is expressed from induction variable
2895 CAND at statement AT in LOOP. The computation is unshared. */
2898 get_computation_at (struct loop
*loop
,
2899 struct iv_use
*use
, struct iv_cand
*cand
, gimple at
)
2902 tree type
= TREE_TYPE (use
->iv
->base
);
2904 if (!get_computation_aff (loop
, use
, cand
, at
, &aff
))
2906 unshare_aff_combination (&aff
);
2907 return fold_convert (type
, aff_combination_to_tree (&aff
));
2910 /* Determines the expression by that USE is expressed from induction variable
2911 CAND in LOOP. The computation is unshared. */
2914 get_computation (struct loop
*loop
, struct iv_use
*use
, struct iv_cand
*cand
)
2916 return get_computation_at (loop
, use
, cand
, use
->stmt
);
2919 /* Returns cost of addition in MODE. */
2922 add_cost (enum machine_mode mode
, bool speed
)
2924 static unsigned costs
[NUM_MACHINE_MODES
];
2932 force_operand (gen_rtx_fmt_ee (PLUS
, mode
,
2933 gen_raw_REG (mode
, LAST_VIRTUAL_REGISTER
+ 1),
2934 gen_raw_REG (mode
, LAST_VIRTUAL_REGISTER
+ 2)),
2939 cost
= seq_cost (seq
, speed
);
2945 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2946 fprintf (dump_file
, "Addition in %s costs %d\n",
2947 GET_MODE_NAME (mode
), cost
);
2951 /* Entry in a hashtable of already known costs for multiplication. */
2954 HOST_WIDE_INT cst
; /* The constant to multiply by. */
2955 enum machine_mode mode
; /* In mode. */
2956 unsigned cost
; /* The cost. */
2959 /* Counts hash value for the ENTRY. */
2962 mbc_entry_hash (const void *entry
)
2964 const struct mbc_entry
*e
= (const struct mbc_entry
*) entry
;
2966 return 57 * (hashval_t
) e
->mode
+ (hashval_t
) (e
->cst
% 877);
2969 /* Compares the hash table entries ENTRY1 and ENTRY2. */
2972 mbc_entry_eq (const void *entry1
, const void *entry2
)
2974 const struct mbc_entry
*e1
= (const struct mbc_entry
*) entry1
;
2975 const struct mbc_entry
*e2
= (const struct mbc_entry
*) entry2
;
2977 return (e1
->mode
== e2
->mode
2978 && e1
->cst
== e2
->cst
);
2981 /* Returns cost of multiplication by constant CST in MODE. */
2984 multiply_by_cost (HOST_WIDE_INT cst
, enum machine_mode mode
, bool speed
)
2986 static htab_t costs
;
2987 struct mbc_entry
**cached
, act
;
2992 costs
= htab_create (100, mbc_entry_hash
, mbc_entry_eq
, free
);
2996 cached
= (struct mbc_entry
**) htab_find_slot (costs
, &act
, INSERT
);
2998 return (*cached
)->cost
;
3000 *cached
= XNEW (struct mbc_entry
);
3001 (*cached
)->mode
= mode
;
3002 (*cached
)->cst
= cst
;
3005 expand_mult (mode
, gen_raw_REG (mode
, LAST_VIRTUAL_REGISTER
+ 1),
3006 gen_int_mode (cst
, mode
), NULL_RTX
, 0);
3010 cost
= seq_cost (seq
, speed
);
3012 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3013 fprintf (dump_file
, "Multiplication by %d in %s costs %d\n",
3014 (int) cst
, GET_MODE_NAME (mode
), cost
);
3016 (*cached
)->cost
= cost
;
3021 /* Returns true if multiplying by RATIO is allowed in an address. Test the
3022 validity for a memory reference accessing memory of mode MODE. */
3025 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio
, enum machine_mode mode
)
3027 #define MAX_RATIO 128
3028 static sbitmap valid_mult
[MAX_MACHINE_MODE
];
3030 if (!valid_mult
[mode
])
3032 rtx reg1
= gen_raw_REG (Pmode
, LAST_VIRTUAL_REGISTER
+ 1);
3036 valid_mult
[mode
] = sbitmap_alloc (2 * MAX_RATIO
+ 1);
3037 sbitmap_zero (valid_mult
[mode
]);
3038 addr
= gen_rtx_fmt_ee (MULT
, Pmode
, reg1
, NULL_RTX
);
3039 for (i
= -MAX_RATIO
; i
<= MAX_RATIO
; i
++)
3041 XEXP (addr
, 1) = gen_int_mode (i
, Pmode
);
3042 if (memory_address_p (mode
, addr
))
3043 SET_BIT (valid_mult
[mode
], i
+ MAX_RATIO
);
3046 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3048 fprintf (dump_file
, " allowed multipliers:");
3049 for (i
= -MAX_RATIO
; i
<= MAX_RATIO
; i
++)
3050 if (TEST_BIT (valid_mult
[mode
], i
+ MAX_RATIO
))
3051 fprintf (dump_file
, " %d", (int) i
);
3052 fprintf (dump_file
, "\n");
3053 fprintf (dump_file
, "\n");
3057 if (ratio
> MAX_RATIO
|| ratio
< -MAX_RATIO
)
3060 return TEST_BIT (valid_mult
[mode
], ratio
+ MAX_RATIO
);
3063 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3064 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3065 variable is omitted. Compute the cost for a memory reference that accesses
3066 a memory location of mode MEM_MODE.
3068 MAY_AUTOINC is set to true if the autoincrement (increasing index by
3069 size of MEM_MODE / RATIO) is available. To make this determination, we
3070 look at the size of the increment to be made, which is given in CSTEP.
3071 CSTEP may be zero if the step is unknown.
3072 STMT_AFTER_INC is true iff the statement we're looking at is after the
3073 increment of the original biv.
3075 TODO -- there must be some better way. This all is quite crude. */
3078 get_address_cost (bool symbol_present
, bool var_present
,
3079 unsigned HOST_WIDE_INT offset
, HOST_WIDE_INT ratio
,
3080 HOST_WIDE_INT cstep
, enum machine_mode mem_mode
, bool speed
,
3081 bool stmt_after_inc
, bool *may_autoinc
)
3083 static bool initialized
[MAX_MACHINE_MODE
];
3084 static HOST_WIDE_INT rat
[MAX_MACHINE_MODE
], off
[MAX_MACHINE_MODE
];
3085 static HOST_WIDE_INT min_offset
[MAX_MACHINE_MODE
], max_offset
[MAX_MACHINE_MODE
];
3086 static unsigned costs
[MAX_MACHINE_MODE
][2][2][2][2];
3087 static bool has_preinc
[MAX_MACHINE_MODE
], has_postinc
[MAX_MACHINE_MODE
];
3088 static bool has_predec
[MAX_MACHINE_MODE
], has_postdec
[MAX_MACHINE_MODE
];
3089 unsigned cost
, acost
, complexity
;
3090 bool offset_p
, ratio_p
, autoinc
;
3091 HOST_WIDE_INT s_offset
, autoinc_offset
, msize
;
3092 unsigned HOST_WIDE_INT mask
;
3095 if (!initialized
[mem_mode
])
3098 HOST_WIDE_INT start
= BIGGEST_ALIGNMENT
/ BITS_PER_UNIT
;
3099 int old_cse_not_expected
;
3100 unsigned sym_p
, var_p
, off_p
, rat_p
, add_c
;
3101 rtx seq
, addr
, base
;
3104 initialized
[mem_mode
] = true;
3106 reg1
= gen_raw_REG (Pmode
, LAST_VIRTUAL_REGISTER
+ 1);
3108 addr
= gen_rtx_fmt_ee (PLUS
, Pmode
, reg1
, NULL_RTX
);
3109 for (i
= start
; i
<= 1 << 20; i
<<= 1)
3111 XEXP (addr
, 1) = gen_int_mode (i
, Pmode
);
3112 if (!memory_address_p (mem_mode
, addr
))
3115 max_offset
[mem_mode
] = i
== start
? 0 : i
>> 1;
3116 off
[mem_mode
] = max_offset
[mem_mode
];
3118 for (i
= start
; i
<= 1 << 20; i
<<= 1)
3120 XEXP (addr
, 1) = gen_int_mode (-i
, Pmode
);
3121 if (!memory_address_p (mem_mode
, addr
))
3124 min_offset
[mem_mode
] = i
== start
? 0 : -(i
>> 1);
3126 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3128 fprintf (dump_file
, "get_address_cost:\n");
3129 fprintf (dump_file
, " min offset %s %d\n",
3130 GET_MODE_NAME (mem_mode
),
3131 (int) min_offset
[mem_mode
]);
3132 fprintf (dump_file
, " max offset %s %d\n",
3133 GET_MODE_NAME (mem_mode
),
3134 (int) max_offset
[mem_mode
]);
3138 for (i
= 2; i
<= MAX_RATIO
; i
++)
3139 if (multiplier_allowed_in_address_p (i
, mem_mode
))
3145 /* Compute the cost of various addressing modes. */
3147 reg0
= gen_raw_REG (Pmode
, LAST_VIRTUAL_REGISTER
+ 1);
3148 reg1
= gen_raw_REG (Pmode
, LAST_VIRTUAL_REGISTER
+ 2);
3150 if (HAVE_PRE_DECREMENT
)
3152 addr
= gen_rtx_PRE_DEC (Pmode
, reg0
);
3153 has_predec
[mem_mode
] = memory_address_p (mem_mode
, addr
);
3155 if (HAVE_POST_DECREMENT
)
3157 addr
= gen_rtx_POST_DEC (Pmode
, reg0
);
3158 has_postdec
[mem_mode
] = memory_address_p (mem_mode
, addr
);
3160 if (HAVE_PRE_INCREMENT
)
3162 addr
= gen_rtx_PRE_INC (Pmode
, reg0
);
3163 has_preinc
[mem_mode
] = memory_address_p (mem_mode
, addr
);
3165 if (HAVE_POST_INCREMENT
)
3167 addr
= gen_rtx_POST_INC (Pmode
, reg0
);
3168 has_postinc
[mem_mode
] = memory_address_p (mem_mode
, addr
);
3170 for (i
= 0; i
< 16; i
++)
3173 var_p
= (i
>> 1) & 1;
3174 off_p
= (i
>> 2) & 1;
3175 rat_p
= (i
>> 3) & 1;
3179 addr
= gen_rtx_fmt_ee (MULT
, Pmode
, addr
,
3180 gen_int_mode (rat
[mem_mode
], Pmode
));
3183 addr
= gen_rtx_fmt_ee (PLUS
, Pmode
, addr
, reg1
);
3187 base
= gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (""));
3188 /* ??? We can run into trouble with some backends by presenting
3189 it with symbols which haven't been properly passed through
3190 targetm.encode_section_info. By setting the local bit, we
3191 enhance the probability of things working. */
3192 SYMBOL_REF_FLAGS (base
) = SYMBOL_FLAG_LOCAL
;
3195 base
= gen_rtx_fmt_e (CONST
, Pmode
,
3196 gen_rtx_fmt_ee (PLUS
, Pmode
,
3198 gen_int_mode (off
[mem_mode
],
3202 base
= gen_int_mode (off
[mem_mode
], Pmode
);
3207 addr
= gen_rtx_fmt_ee (PLUS
, Pmode
, addr
, base
);
3210 /* To avoid splitting addressing modes, pretend that no cse will
3212 old_cse_not_expected
= cse_not_expected
;
3213 cse_not_expected
= true;
3214 addr
= memory_address (mem_mode
, addr
);
3215 cse_not_expected
= old_cse_not_expected
;
3219 acost
= seq_cost (seq
, speed
);
3220 acost
+= address_cost (addr
, mem_mode
, speed
);
3224 costs
[mem_mode
][sym_p
][var_p
][off_p
][rat_p
] = acost
;
3227 /* On some targets, it is quite expensive to load symbol to a register,
3228 which makes addresses that contain symbols look much more expensive.
3229 However, the symbol will have to be loaded in any case before the
3230 loop (and quite likely we have it in register already), so it does not
3231 make much sense to penalize them too heavily. So make some final
3232 tweaks for the SYMBOL_PRESENT modes:
3234 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3235 var is cheaper, use this mode with small penalty.
3236 If VAR_PRESENT is true, try whether the mode with
3237 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3238 if this is the case, use it. */
3239 add_c
= add_cost (Pmode
, speed
);
3240 for (i
= 0; i
< 8; i
++)
3243 off_p
= (i
>> 1) & 1;
3244 rat_p
= (i
>> 2) & 1;
3246 acost
= costs
[mem_mode
][0][1][off_p
][rat_p
] + 1;
3250 if (acost
< costs
[mem_mode
][1][var_p
][off_p
][rat_p
])
3251 costs
[mem_mode
][1][var_p
][off_p
][rat_p
] = acost
;
3254 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3256 fprintf (dump_file
, "Address costs:\n");
3258 for (i
= 0; i
< 16; i
++)
3261 var_p
= (i
>> 1) & 1;
3262 off_p
= (i
>> 2) & 1;
3263 rat_p
= (i
>> 3) & 1;
3265 fprintf (dump_file
, " ");
3267 fprintf (dump_file
, "sym + ");
3269 fprintf (dump_file
, "var + ");
3271 fprintf (dump_file
, "cst + ");
3273 fprintf (dump_file
, "rat * ");
3275 acost
= costs
[mem_mode
][sym_p
][var_p
][off_p
][rat_p
];
3276 fprintf (dump_file
, "index costs %d\n", acost
);
3278 if (has_predec
[mem_mode
] || has_postdec
[mem_mode
]
3279 || has_preinc
[mem_mode
] || has_postinc
[mem_mode
])
3280 fprintf (dump_file
, " May include autoinc/dec\n");
3281 fprintf (dump_file
, "\n");
3285 bits
= GET_MODE_BITSIZE (Pmode
);
3286 mask
= ~(~(unsigned HOST_WIDE_INT
) 0 << (bits
- 1) << 1);
3288 if ((offset
>> (bits
- 1) & 1))
3293 msize
= GET_MODE_SIZE (mem_mode
);
3294 autoinc_offset
= offset
;
3296 autoinc_offset
+= ratio
* cstep
;
3297 if (symbol_present
|| var_present
|| ratio
!= 1)
3299 else if ((has_postinc
[mem_mode
] && autoinc_offset
== 0
3301 || (has_postdec
[mem_mode
] && autoinc_offset
== 0
3303 || (has_preinc
[mem_mode
] && autoinc_offset
== msize
3305 || (has_predec
[mem_mode
] && autoinc_offset
== -msize
3306 && msize
== -cstep
))
3310 offset_p
= (s_offset
!= 0
3311 && min_offset
[mem_mode
] <= s_offset
3312 && s_offset
<= max_offset
[mem_mode
]);
3313 ratio_p
= (ratio
!= 1
3314 && multiplier_allowed_in_address_p (ratio
, mem_mode
));
3316 if (ratio
!= 1 && !ratio_p
)
3317 cost
+= multiply_by_cost (ratio
, Pmode
, speed
);
3319 if (s_offset
&& !offset_p
&& !symbol_present
)
3320 cost
+= add_cost (Pmode
, speed
);
3323 *may_autoinc
= autoinc
;
3324 acost
= costs
[mem_mode
][symbol_present
][var_present
][offset_p
][ratio_p
];
3325 complexity
= (symbol_present
!= 0) + (var_present
!= 0) + offset_p
+ ratio_p
;
3326 return new_cost (cost
+ acost
, complexity
);
3329 /* Estimates cost of forcing expression EXPR into a variable. */
3332 force_expr_to_var_cost (tree expr
, bool speed
)
3334 static bool costs_initialized
= false;
3335 static unsigned integer_cost
[2];
3336 static unsigned symbol_cost
[2];
3337 static unsigned address_cost
[2];
3339 comp_cost cost0
, cost1
, cost
;
3340 enum machine_mode mode
;
3342 if (!costs_initialized
)
3344 tree type
= build_pointer_type (integer_type_node
);
3349 var
= create_tmp_var_raw (integer_type_node
, "test_var");
3350 TREE_STATIC (var
) = 1;
3351 x
= produce_memory_decl_rtl (var
, NULL
);
3352 SET_DECL_RTL (var
, x
);
3354 addr
= build1 (ADDR_EXPR
, type
, var
);
3357 for (i
= 0; i
< 2; i
++)
3359 integer_cost
[i
] = computation_cost (build_int_cst (integer_type_node
,
3362 symbol_cost
[i
] = computation_cost (addr
, i
) + 1;
3365 = computation_cost (build2 (POINTER_PLUS_EXPR
, type
,
3367 build_int_cst (sizetype
, 2000)), i
) + 1;
3368 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3370 fprintf (dump_file
, "force_expr_to_var_cost %s costs:\n", i
? "speed" : "size");
3371 fprintf (dump_file
, " integer %d\n", (int) integer_cost
[i
]);
3372 fprintf (dump_file
, " symbol %d\n", (int) symbol_cost
[i
]);
3373 fprintf (dump_file
, " address %d\n", (int) address_cost
[i
]);
3374 fprintf (dump_file
, " other %d\n", (int) target_spill_cost
[i
]);
3375 fprintf (dump_file
, "\n");
3379 costs_initialized
= true;
3384 if (SSA_VAR_P (expr
))
3387 if (is_gimple_min_invariant (expr
))
3389 if (TREE_CODE (expr
) == INTEGER_CST
)
3390 return new_cost (integer_cost
[speed
], 0);
3392 if (TREE_CODE (expr
) == ADDR_EXPR
)
3394 tree obj
= TREE_OPERAND (expr
, 0);
3396 if (TREE_CODE (obj
) == VAR_DECL
3397 || TREE_CODE (obj
) == PARM_DECL
3398 || TREE_CODE (obj
) == RESULT_DECL
)
3399 return new_cost (symbol_cost
[speed
], 0);
3402 return new_cost (address_cost
[speed
], 0);
3405 switch (TREE_CODE (expr
))
3407 case POINTER_PLUS_EXPR
:
3411 op0
= TREE_OPERAND (expr
, 0);
3412 op1
= TREE_OPERAND (expr
, 1);
3416 if (is_gimple_val (op0
))
3419 cost0
= force_expr_to_var_cost (op0
, speed
);
3421 if (is_gimple_val (op1
))
3424 cost1
= force_expr_to_var_cost (op1
, speed
);
3429 op0
= TREE_OPERAND (expr
, 0);
3433 if (is_gimple_val (op0
))
3436 cost0
= force_expr_to_var_cost (op0
, speed
);
3442 /* Just an arbitrary value, FIXME. */
3443 return new_cost (target_spill_cost
[speed
], 0);
3446 mode
= TYPE_MODE (TREE_TYPE (expr
));
3447 switch (TREE_CODE (expr
))
3449 case POINTER_PLUS_EXPR
:
3453 cost
= new_cost (add_cost (mode
, speed
), 0);
3457 if (cst_and_fits_in_hwi (op0
))
3458 cost
= new_cost (multiply_by_cost (int_cst_value (op0
), mode
, speed
), 0);
3459 else if (cst_and_fits_in_hwi (op1
))
3460 cost
= new_cost (multiply_by_cost (int_cst_value (op1
), mode
, speed
), 0);
3462 return new_cost (target_spill_cost
[speed
], 0);
3469 cost
= add_costs (cost
, cost0
);
3470 cost
= add_costs (cost
, cost1
);
3472 /* Bound the cost by target_spill_cost. The parts of complicated
3473 computations often are either loop invariant or at least can
3474 be shared between several iv uses, so letting this grow without
3475 limits would not give reasonable results. */
3476 if (cost
.cost
> (int) target_spill_cost
[speed
])
3477 cost
.cost
= target_spill_cost
[speed
];
3482 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3483 invariants the computation depends on. */
3486 force_var_cost (struct ivopts_data
*data
,
3487 tree expr
, bitmap
*depends_on
)
3491 fd_ivopts_data
= data
;
3492 walk_tree (&expr
, find_depends
, depends_on
, NULL
);
3495 return force_expr_to_var_cost (expr
, data
->speed
);
3498 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3499 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3500 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3501 invariants the computation depends on. */
3504 split_address_cost (struct ivopts_data
*data
,
3505 tree addr
, bool *symbol_present
, bool *var_present
,
3506 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
3509 HOST_WIDE_INT bitsize
;
3510 HOST_WIDE_INT bitpos
;
3512 enum machine_mode mode
;
3513 int unsignedp
, volatilep
;
3515 core
= get_inner_reference (addr
, &bitsize
, &bitpos
, &toffset
, &mode
,
3516 &unsignedp
, &volatilep
, false);
3519 || bitpos
% BITS_PER_UNIT
!= 0
3520 || TREE_CODE (core
) != VAR_DECL
)
3522 *symbol_present
= false;
3523 *var_present
= true;
3524 fd_ivopts_data
= data
;
3525 walk_tree (&addr
, find_depends
, depends_on
, NULL
);
3526 return new_cost (target_spill_cost
[data
->speed
], 0);
3529 *offset
+= bitpos
/ BITS_PER_UNIT
;
3530 if (TREE_STATIC (core
)
3531 || DECL_EXTERNAL (core
))
3533 *symbol_present
= true;
3534 *var_present
= false;
3538 *symbol_present
= false;
3539 *var_present
= true;
3543 /* Estimates cost of expressing difference of addresses E1 - E2 as
3544 var + symbol + offset. The value of offset is added to OFFSET,
3545 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3546 part is missing. DEPENDS_ON is a set of the invariants the computation
3550 ptr_difference_cost (struct ivopts_data
*data
,
3551 tree e1
, tree e2
, bool *symbol_present
, bool *var_present
,
3552 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
3554 HOST_WIDE_INT diff
= 0;
3555 aff_tree aff_e1
, aff_e2
;
3558 gcc_assert (TREE_CODE (e1
) == ADDR_EXPR
);
3560 if (ptr_difference_const (e1
, e2
, &diff
))
3563 *symbol_present
= false;
3564 *var_present
= false;
3568 if (integer_zerop (e2
))
3569 return split_address_cost (data
, TREE_OPERAND (e1
, 0),
3570 symbol_present
, var_present
, offset
, depends_on
);
3572 *symbol_present
= false;
3573 *var_present
= true;
3575 type
= signed_type_for (TREE_TYPE (e1
));
3576 tree_to_aff_combination (e1
, type
, &aff_e1
);
3577 tree_to_aff_combination (e2
, type
, &aff_e2
);
3578 aff_combination_scale (&aff_e2
, double_int_minus_one
);
3579 aff_combination_add (&aff_e1
, &aff_e2
);
3581 return force_var_cost (data
, aff_combination_to_tree (&aff_e1
), depends_on
);
3584 /* Estimates cost of expressing difference E1 - E2 as
3585 var + symbol + offset. The value of offset is added to OFFSET,
3586 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3587 part is missing. DEPENDS_ON is a set of the invariants the computation
3591 difference_cost (struct ivopts_data
*data
,
3592 tree e1
, tree e2
, bool *symbol_present
, bool *var_present
,
3593 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
3595 enum machine_mode mode
= TYPE_MODE (TREE_TYPE (e1
));
3596 unsigned HOST_WIDE_INT off1
, off2
;
3597 aff_tree aff_e1
, aff_e2
;
3600 e1
= strip_offset (e1
, &off1
);
3601 e2
= strip_offset (e2
, &off2
);
3602 *offset
+= off1
- off2
;
3607 if (TREE_CODE (e1
) == ADDR_EXPR
)
3608 return ptr_difference_cost (data
, e1
, e2
, symbol_present
, var_present
,
3609 offset
, depends_on
);
3610 *symbol_present
= false;
3612 if (operand_equal_p (e1
, e2
, 0))
3614 *var_present
= false;
3618 *var_present
= true;
3620 if (integer_zerop (e2
))
3621 return force_var_cost (data
, e1
, depends_on
);
3623 if (integer_zerop (e1
))
3625 comp_cost cost
= force_var_cost (data
, e2
, depends_on
);
3626 cost
.cost
+= multiply_by_cost (-1, mode
, data
->speed
);
3630 type
= signed_type_for (TREE_TYPE (e1
));
3631 tree_to_aff_combination (e1
, type
, &aff_e1
);
3632 tree_to_aff_combination (e2
, type
, &aff_e2
);
3633 aff_combination_scale (&aff_e2
, double_int_minus_one
);
3634 aff_combination_add (&aff_e1
, &aff_e2
);
3636 return force_var_cost (data
, aff_combination_to_tree (&aff_e1
), depends_on
);
3639 /* Determines the cost of the computation by that USE is expressed
3640 from induction variable CAND. If ADDRESS_P is true, we just need
3641 to create an address from it, otherwise we want to get it into
3642 register. A set of invariants we depend on is stored in
3643 DEPENDS_ON. AT is the statement at that the value is computed.
3644 If CAN_AUTOINC is nonnull, use it to record whether autoinc
3645 addressing is likely. */
3648 get_computation_cost_at (struct ivopts_data
*data
,
3649 struct iv_use
*use
, struct iv_cand
*cand
,
3650 bool address_p
, bitmap
*depends_on
, gimple at
,
3653 tree ubase
= use
->iv
->base
, ustep
= use
->iv
->step
;
3655 tree utype
= TREE_TYPE (ubase
), ctype
;
3656 unsigned HOST_WIDE_INT cstepi
, offset
= 0;
3657 HOST_WIDE_INT ratio
, aratio
;
3658 bool var_present
, symbol_present
, stmt_is_after_inc
;
3661 bool speed
= optimize_bb_for_speed_p (gimple_bb (at
));
3665 /* Only consider real candidates. */
3667 return infinite_cost
;
3669 cbase
= cand
->iv
->base
;
3670 cstep
= cand
->iv
->step
;
3671 ctype
= TREE_TYPE (cbase
);
3673 if (TYPE_PRECISION (utype
) > TYPE_PRECISION (ctype
))
3675 /* We do not have a precision to express the values of use. */
3676 return infinite_cost
;
3681 /* Do not try to express address of an object with computation based
3682 on address of a different object. This may cause problems in rtl
3683 level alias analysis (that does not expect this to be happening,
3684 as this is illegal in C), and would be unlikely to be useful
3686 if (use
->iv
->base_object
3687 && cand
->iv
->base_object
3688 && !operand_equal_p (use
->iv
->base_object
, cand
->iv
->base_object
, 0))
3689 return infinite_cost
;
3692 if (TYPE_PRECISION (utype
) < TYPE_PRECISION (ctype
))
3694 /* TODO -- add direct handling of this case. */
3698 /* CSTEPI is removed from the offset in case statement is after the
3699 increment. If the step is not constant, we use zero instead.
3700 This is a bit imprecise (there is the extra addition), but
3701 redundancy elimination is likely to transform the code so that
3702 it uses value of the variable before increment anyway,
3703 so it is not that much unrealistic. */
3704 if (cst_and_fits_in_hwi (cstep
))
3705 cstepi
= int_cst_value (cstep
);
3709 if (!constant_multiple_of (ustep
, cstep
, &rat
))
3710 return infinite_cost
;
3712 if (double_int_fits_in_shwi_p (rat
))
3713 ratio
= double_int_to_shwi (rat
);
3715 return infinite_cost
;
3718 ctype
= TREE_TYPE (cbase
);
3720 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
3721 or ratio == 1, it is better to handle this like
3723 ubase - ratio * cbase + ratio * var
3725 (also holds in the case ratio == -1, TODO. */
3727 if (cst_and_fits_in_hwi (cbase
))
3729 offset
= - ratio
* int_cst_value (cbase
);
3730 cost
= difference_cost (data
,
3731 ubase
, build_int_cst (utype
, 0),
3732 &symbol_present
, &var_present
, &offset
,
3735 else if (ratio
== 1)
3737 cost
= difference_cost (data
,
3739 &symbol_present
, &var_present
, &offset
,
3743 && !POINTER_TYPE_P (ctype
)
3744 && multiplier_allowed_in_address_p (ratio
,
3745 TYPE_MODE (TREE_TYPE (utype
))))
3748 = fold_build2 (MULT_EXPR
, ctype
, cbase
, build_int_cst (ctype
, ratio
));
3749 cost
= difference_cost (data
,
3751 &symbol_present
, &var_present
, &offset
,
3756 cost
= force_var_cost (data
, cbase
, depends_on
);
3757 cost
.cost
+= add_cost (TYPE_MODE (ctype
), data
->speed
);
3758 cost
= add_costs (cost
,
3759 difference_cost (data
,
3760 ubase
, build_int_cst (utype
, 0),
3761 &symbol_present
, &var_present
,
3762 &offset
, depends_on
));
3765 /* If we are after the increment, the value of the candidate is higher by
3767 stmt_is_after_inc
= stmt_after_increment (data
->current_loop
, cand
, at
);
3768 if (stmt_is_after_inc
)
3769 offset
-= ratio
* cstepi
;
3771 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3772 (symbol/var1/const parts may be omitted). If we are looking for an
3773 address, find the cost of addressing this. */
3775 return add_costs (cost
,
3776 get_address_cost (symbol_present
, var_present
,
3777 offset
, ratio
, cstepi
,
3778 TYPE_MODE (TREE_TYPE (utype
)),
3779 speed
, stmt_is_after_inc
,
3782 /* Otherwise estimate the costs for computing the expression. */
3783 if (!symbol_present
&& !var_present
&& !offset
)
3786 cost
.cost
+= multiply_by_cost (ratio
, TYPE_MODE (ctype
), speed
);
3790 /* Symbol + offset should be compile-time computable so consider that they
3791 are added once to the variable, if present. */
3792 if (var_present
&& (symbol_present
|| offset
))
3793 cost
.cost
+= add_cost (TYPE_MODE (ctype
), speed
)
3794 / AVG_LOOP_NITER (data
->current_loop
);
3796 /* Having offset does not affect runtime cost in case it is added to
3797 symbol, but it increases complexity. */
3801 cost
.cost
+= add_cost (TYPE_MODE (ctype
), speed
);
3803 aratio
= ratio
> 0 ? ratio
: -ratio
;
3805 cost
.cost
+= multiply_by_cost (aratio
, TYPE_MODE (ctype
), speed
);
3809 *can_autoinc
= false;
3812 /* Just get the expression, expand it and measure the cost. */
3813 tree comp
= get_computation_at (data
->current_loop
, use
, cand
, at
);
3816 return infinite_cost
;
3819 comp
= build1 (INDIRECT_REF
, TREE_TYPE (TREE_TYPE (comp
)), comp
);
3821 return new_cost (computation_cost (comp
, speed
), 0);
3825 /* Determines the cost of the computation by that USE is expressed
3826 from induction variable CAND. If ADDRESS_P is true, we just need
3827 to create an address from it, otherwise we want to get it into
3828 register. A set of invariants we depend on is stored in
3829 DEPENDS_ON. If CAN_AUTOINC is nonnull, use it to record whether
3830 autoinc addressing is likely. */
3833 get_computation_cost (struct ivopts_data
*data
,
3834 struct iv_use
*use
, struct iv_cand
*cand
,
3835 bool address_p
, bitmap
*depends_on
, bool *can_autoinc
)
3837 return get_computation_cost_at (data
,
3838 use
, cand
, address_p
, depends_on
, use
->stmt
,
3842 /* Determines cost of basing replacement of USE on CAND in a generic
3846 determine_use_iv_cost_generic (struct ivopts_data
*data
,
3847 struct iv_use
*use
, struct iv_cand
*cand
)
3852 /* The simple case first -- if we need to express value of the preserved
3853 original biv, the cost is 0. This also prevents us from counting the
3854 cost of increment twice -- once at this use and once in the cost of
3856 if (cand
->pos
== IP_ORIGINAL
3857 && cand
->incremented_at
== use
->stmt
)
3859 set_use_iv_cost (data
, use
, cand
, zero_cost
, NULL
, NULL_TREE
);
3863 cost
= get_computation_cost (data
, use
, cand
, false, &depends_on
, NULL
);
3864 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, NULL_TREE
);
3866 return !infinite_cost_p (cost
);
3869 /* Determines cost of basing replacement of USE on CAND in an address. */
3872 determine_use_iv_cost_address (struct ivopts_data
*data
,
3873 struct iv_use
*use
, struct iv_cand
*cand
)
3877 comp_cost cost
= get_computation_cost (data
, use
, cand
, true, &depends_on
,
3880 if (cand
->ainc_use
== use
)
3883 cost
.cost
-= cand
->cost_step
;
3884 /* If we generated the candidate solely for exploiting autoincrement
3885 opportunities, and it turns out it can't be used, set the cost to
3886 infinity to make sure we ignore it. */
3887 else if (cand
->pos
== IP_AFTER_USE
|| cand
->pos
== IP_BEFORE_USE
)
3888 cost
= infinite_cost
;
3890 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, NULL_TREE
);
3892 return !infinite_cost_p (cost
);
3895 /* Computes value of candidate CAND at position AT in iteration NITER, and
3896 stores it to VAL. */
3899 cand_value_at (struct loop
*loop
, struct iv_cand
*cand
, gimple at
, tree niter
,
3902 aff_tree step
, delta
, nit
;
3903 struct iv
*iv
= cand
->iv
;
3904 tree type
= TREE_TYPE (iv
->base
);
3905 tree steptype
= type
;
3906 if (POINTER_TYPE_P (type
))
3907 steptype
= sizetype
;
3909 tree_to_aff_combination (iv
->step
, steptype
, &step
);
3910 tree_to_aff_combination (niter
, TREE_TYPE (niter
), &nit
);
3911 aff_combination_convert (&nit
, steptype
);
3912 aff_combination_mult (&nit
, &step
, &delta
);
3913 if (stmt_after_increment (loop
, cand
, at
))
3914 aff_combination_add (&delta
, &step
);
3916 tree_to_aff_combination (iv
->base
, type
, val
);
3917 aff_combination_add (val
, &delta
);
3920 /* Returns period of induction variable iv. */
3923 iv_period (struct iv
*iv
)
3925 tree step
= iv
->step
, period
, type
;
3928 gcc_assert (step
&& TREE_CODE (step
) == INTEGER_CST
);
3930 /* Period of the iv is gcd (step, type range). Since type range is power
3931 of two, it suffices to determine the maximum power of two that divides
3933 pow2div
= num_ending_zeros (step
);
3934 type
= unsigned_type_for (TREE_TYPE (step
));
3936 period
= build_low_bits_mask (type
,
3937 (TYPE_PRECISION (type
)
3938 - tree_low_cst (pow2div
, 1)));
3943 /* Returns the comparison operator used when eliminating the iv USE. */
3945 static enum tree_code
3946 iv_elimination_compare (struct ivopts_data
*data
, struct iv_use
*use
)
3948 struct loop
*loop
= data
->current_loop
;
3952 ex_bb
= gimple_bb (use
->stmt
);
3953 exit
= EDGE_SUCC (ex_bb
, 0);
3954 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
3955 exit
= EDGE_SUCC (ex_bb
, 1);
3957 return (exit
->flags
& EDGE_TRUE_VALUE
? EQ_EXPR
: NE_EXPR
);
3960 /* Check whether it is possible to express the condition in USE by comparison
3961 of candidate CAND. If so, store the value compared with to BOUND. */
3964 may_eliminate_iv (struct ivopts_data
*data
,
3965 struct iv_use
*use
, struct iv_cand
*cand
, tree
*bound
)
3970 struct loop
*loop
= data
->current_loop
;
3973 if (TREE_CODE (cand
->iv
->step
) != INTEGER_CST
)
3976 /* For now works only for exits that dominate the loop latch.
3977 TODO: extend to other conditions inside loop body. */
3978 ex_bb
= gimple_bb (use
->stmt
);
3979 if (use
->stmt
!= last_stmt (ex_bb
)
3980 || gimple_code (use
->stmt
) != GIMPLE_COND
3981 || !dominated_by_p (CDI_DOMINATORS
, loop
->latch
, ex_bb
))
3984 exit
= EDGE_SUCC (ex_bb
, 0);
3985 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
3986 exit
= EDGE_SUCC (ex_bb
, 1);
3987 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
3990 nit
= niter_for_exit (data
, exit
);
3994 /* Determine whether we can use the variable to test the exit condition.
3995 This is the case iff the period of the induction variable is greater
3996 than the number of iterations for which the exit condition is true. */
3997 period
= iv_period (cand
->iv
);
3999 /* If the number of iterations is constant, compare against it directly. */
4000 if (TREE_CODE (nit
) == INTEGER_CST
)
4002 if (!tree_int_cst_lt (nit
, period
))
4006 /* If not, and if this is the only possible exit of the loop, see whether
4007 we can get a conservative estimate on the number of iterations of the
4008 entire loop and compare against that instead. */
4009 else if (loop_only_exit_p (loop
, exit
))
4011 double_int period_value
, max_niter
;
4012 if (!estimated_loop_iterations (loop
, true, &max_niter
))
4014 period_value
= tree_to_double_int (period
);
4015 if (double_int_ucmp (max_niter
, period_value
) >= 0)
4019 /* Otherwise, punt. */
4023 cand_value_at (loop
, cand
, use
->stmt
, nit
, &bnd
);
4025 *bound
= aff_combination_to_tree (&bnd
);
4026 /* It is unlikely that computing the number of iterations using division
4027 would be more profitable than keeping the original induction variable. */
4028 if (expression_expensive_p (*bound
))
4033 /* Determines cost of basing replacement of USE on CAND in a condition. */
4036 determine_use_iv_cost_condition (struct ivopts_data
*data
,
4037 struct iv_use
*use
, struct iv_cand
*cand
)
4039 tree bound
= NULL_TREE
;
4041 bitmap depends_on_elim
= NULL
, depends_on_express
= NULL
, depends_on
;
4042 comp_cost elim_cost
, express_cost
, cost
;
4045 /* Only consider real candidates. */
4048 set_use_iv_cost (data
, use
, cand
, infinite_cost
, NULL
, NULL_TREE
);
4052 /* Try iv elimination. */
4053 if (may_eliminate_iv (data
, use
, cand
, &bound
))
4055 elim_cost
= force_var_cost (data
, bound
, &depends_on_elim
);
4056 /* The bound is a loop invariant, so it will be only computed
4058 elim_cost
.cost
/= AVG_LOOP_NITER (data
->current_loop
);
4061 elim_cost
= infinite_cost
;
4063 /* Try expressing the original giv. If it is compared with an invariant,
4064 note that we cannot get rid of it. */
4065 ok
= extract_cond_operands (data
, use
->stmt
, NULL
, NULL
, NULL
, &cmp_iv
);
4068 express_cost
= get_computation_cost (data
, use
, cand
, false,
4069 &depends_on_express
, NULL
);
4070 fd_ivopts_data
= data
;
4071 walk_tree (&cmp_iv
->base
, find_depends
, &depends_on_express
, NULL
);
4073 /* Choose the better approach, preferring the eliminated IV. */
4074 if (compare_costs (elim_cost
, express_cost
) <= 0)
4077 depends_on
= depends_on_elim
;
4078 depends_on_elim
= NULL
;
4082 cost
= express_cost
;
4083 depends_on
= depends_on_express
;
4084 depends_on_express
= NULL
;
4088 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, bound
);
4090 if (depends_on_elim
)
4091 BITMAP_FREE (depends_on_elim
);
4092 if (depends_on_express
)
4093 BITMAP_FREE (depends_on_express
);
4095 return !infinite_cost_p (cost
);
4098 /* Determines cost of basing replacement of USE on CAND. Returns false
4099 if USE cannot be based on CAND. */
4102 determine_use_iv_cost (struct ivopts_data
*data
,
4103 struct iv_use
*use
, struct iv_cand
*cand
)
4107 case USE_NONLINEAR_EXPR
:
4108 return determine_use_iv_cost_generic (data
, use
, cand
);
4111 return determine_use_iv_cost_address (data
, use
, cand
);
4114 return determine_use_iv_cost_condition (data
, use
, cand
);
4121 /* Return true if get_computation_cost indicates that autoincrement is
4122 a possibility for the pair of USE and CAND, false otherwise. */
4125 autoinc_possible_for_pair (struct ivopts_data
*data
, struct iv_use
*use
,
4126 struct iv_cand
*cand
)
4132 if (use
->type
!= USE_ADDRESS
)
4135 cost
= get_computation_cost (data
, use
, cand
, true, &depends_on
,
4138 BITMAP_FREE (depends_on
);
4140 return !infinite_cost_p (cost
) && can_autoinc
;
4143 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
4144 use that allows autoincrement, and set their AINC_USE if possible. */
4147 set_autoinc_for_original_candidates (struct ivopts_data
*data
)
4151 for (i
= 0; i
< n_iv_cands (data
); i
++)
4153 struct iv_cand
*cand
= iv_cand (data
, i
);
4154 struct iv_use
*closest
= NULL
;
4155 if (cand
->pos
!= IP_ORIGINAL
)
4157 for (j
= 0; j
< n_iv_uses (data
); j
++)
4159 struct iv_use
*use
= iv_use (data
, j
);
4160 unsigned uid
= gimple_uid (use
->stmt
);
4161 if (gimple_bb (use
->stmt
) != gimple_bb (cand
->incremented_at
)
4162 || uid
> gimple_uid (cand
->incremented_at
))
4164 if (closest
== NULL
|| uid
> gimple_uid (closest
->stmt
))
4167 if (closest
== NULL
|| !autoinc_possible_for_pair (data
, closest
, cand
))
4169 cand
->ainc_use
= closest
;
4173 /* Finds the candidates for the induction variables. */
4176 find_iv_candidates (struct ivopts_data
*data
)
4178 /* Add commonly used ivs. */
4179 add_standard_iv_candidates (data
);
4181 /* Add old induction variables. */
4182 add_old_ivs_candidates (data
);
4184 /* Add induction variables derived from uses. */
4185 add_derived_ivs_candidates (data
);
4187 set_autoinc_for_original_candidates (data
);
4189 /* Record the important candidates. */
4190 record_important_candidates (data
);
4193 /* Determines costs of basing the use of the iv on an iv candidate. */
4196 determine_use_iv_costs (struct ivopts_data
*data
)
4200 struct iv_cand
*cand
;
4201 bitmap to_clear
= BITMAP_ALLOC (NULL
);
4203 alloc_use_cost_map (data
);
4205 for (i
= 0; i
< n_iv_uses (data
); i
++)
4207 use
= iv_use (data
, i
);
4209 if (data
->consider_all_candidates
)
4211 for (j
= 0; j
< n_iv_cands (data
); j
++)
4213 cand
= iv_cand (data
, j
);
4214 determine_use_iv_cost (data
, use
, cand
);
4221 EXECUTE_IF_SET_IN_BITMAP (use
->related_cands
, 0, j
, bi
)
4223 cand
= iv_cand (data
, j
);
4224 if (!determine_use_iv_cost (data
, use
, cand
))
4225 bitmap_set_bit (to_clear
, j
);
4228 /* Remove the candidates for that the cost is infinite from
4229 the list of related candidates. */
4230 bitmap_and_compl_into (use
->related_cands
, to_clear
);
4231 bitmap_clear (to_clear
);
4235 BITMAP_FREE (to_clear
);
4237 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4239 fprintf (dump_file
, "Use-candidate costs:\n");
4241 for (i
= 0; i
< n_iv_uses (data
); i
++)
4243 use
= iv_use (data
, i
);
4245 fprintf (dump_file
, "Use %d:\n", i
);
4246 fprintf (dump_file
, " cand\tcost\tcompl.\tdepends on\n");
4247 for (j
= 0; j
< use
->n_map_members
; j
++)
4249 if (!use
->cost_map
[j
].cand
4250 || infinite_cost_p (use
->cost_map
[j
].cost
))
4253 fprintf (dump_file
, " %d\t%d\t%d\t",
4254 use
->cost_map
[j
].cand
->id
,
4255 use
->cost_map
[j
].cost
.cost
,
4256 use
->cost_map
[j
].cost
.complexity
);
4257 if (use
->cost_map
[j
].depends_on
)
4258 bitmap_print (dump_file
,
4259 use
->cost_map
[j
].depends_on
, "","");
4260 fprintf (dump_file
, "\n");
4263 fprintf (dump_file
, "\n");
4265 fprintf (dump_file
, "\n");
4269 /* Determines cost of the candidate CAND. */
4272 determine_iv_cost (struct ivopts_data
*data
, struct iv_cand
*cand
)
4274 comp_cost cost_base
;
4275 unsigned cost
, cost_step
;
4284 /* There are two costs associated with the candidate -- its increment
4285 and its initialization. The second is almost negligible for any loop
4286 that rolls enough, so we take it just very little into account. */
4288 base
= cand
->iv
->base
;
4289 cost_base
= force_var_cost (data
, base
, NULL
);
4290 cost_step
= add_cost (TYPE_MODE (TREE_TYPE (base
)), data
->speed
);
4292 cost
= cost_step
+ cost_base
.cost
/ AVG_LOOP_NITER (current_loop
);
4294 /* Prefer the original ivs unless we may gain something by replacing it.
4295 The reason is to make debugging simpler; so this is not relevant for
4296 artificial ivs created by other optimization passes. */
4297 if (cand
->pos
!= IP_ORIGINAL
4298 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand
->var_before
)))
4301 /* Prefer not to insert statements into latch unless there are some
4302 already (so that we do not create unnecessary jumps). */
4303 if (cand
->pos
== IP_END
4304 && empty_block_p (ip_end_pos (data
->current_loop
)))
4308 cand
->cost_step
= cost_step
;
4311 /* Determines costs of computation of the candidates. */
4314 determine_iv_costs (struct ivopts_data
*data
)
4318 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4320 fprintf (dump_file
, "Candidate costs:\n");
4321 fprintf (dump_file
, " cand\tcost\n");
4324 for (i
= 0; i
< n_iv_cands (data
); i
++)
4326 struct iv_cand
*cand
= iv_cand (data
, i
);
4328 determine_iv_cost (data
, cand
);
4330 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4331 fprintf (dump_file
, " %d\t%d\n", i
, cand
->cost
);
4334 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4335 fprintf (dump_file
, "\n");
4338 /* Calculates cost for having SIZE induction variables. */
4341 ivopts_global_cost_for_size (struct ivopts_data
*data
, unsigned size
)
4343 /* We add size to the cost, so that we prefer eliminating ivs
4345 return size
+ estimate_reg_pressure_cost (size
, data
->regs_used
, data
->speed
);
4348 /* For each size of the induction variable set determine the penalty. */
4351 determine_set_costs (struct ivopts_data
*data
)
4355 gimple_stmt_iterator psi
;
4357 struct loop
*loop
= data
->current_loop
;
4360 /* We use the following model (definitely improvable, especially the
4361 cost function -- TODO):
4363 We estimate the number of registers available (using MD data), name it A.
4365 We estimate the number of registers used by the loop, name it U. This
4366 number is obtained as the number of loop phi nodes (not counting virtual
4367 registers and bivs) + the number of variables from outside of the loop.
4369 We set a reserve R (free regs that are used for temporary computations,
4370 etc.). For now the reserve is a constant 3.
4372 Let I be the number of induction variables.
4374 -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
4375 make a lot of ivs without a reason).
4376 -- if A - R < U + I <= A, the cost is I * PRES_COST
4377 -- if U + I > A, the cost is I * PRES_COST and
4378 number of uses * SPILL_COST * (U + I - A) / (U + I) is added. */
4380 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4382 fprintf (dump_file
, "Global costs:\n");
4383 fprintf (dump_file
, " target_avail_regs %d\n", target_avail_regs
);
4384 fprintf (dump_file
, " target_reg_cost %d\n", target_reg_cost
[data
->speed
]);
4385 fprintf (dump_file
, " target_spill_cost %d\n", target_spill_cost
[data
->speed
]);
4389 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
4391 phi
= gsi_stmt (psi
);
4392 op
= PHI_RESULT (phi
);
4394 if (!is_gimple_reg (op
))
4397 if (get_iv (data
, op
))
4403 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, j
, bi
)
4405 struct version_info
*info
= ver_info (data
, j
);
4407 if (info
->inv_id
&& info
->has_nonlin_use
)
4411 data
->regs_used
= n
;
4412 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4413 fprintf (dump_file
, " regs_used %d\n", n
);
4415 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4417 fprintf (dump_file
, " cost for size:\n");
4418 fprintf (dump_file
, " ivs\tcost\n");
4419 for (j
= 0; j
<= 2 * target_avail_regs
; j
++)
4420 fprintf (dump_file
, " %d\t%d\n", j
,
4421 ivopts_global_cost_for_size (data
, j
));
4422 fprintf (dump_file
, "\n");
4426 /* Returns true if A is a cheaper cost pair than B. */
4429 cheaper_cost_pair (struct cost_pair
*a
, struct cost_pair
*b
)
4439 cmp
= compare_costs (a
->cost
, b
->cost
);
4446 /* In case the costs are the same, prefer the cheaper candidate. */
4447 if (a
->cand
->cost
< b
->cand
->cost
)
4453 /* Computes the cost field of IVS structure. */
4456 iv_ca_recount_cost (struct ivopts_data
*data
, struct iv_ca
*ivs
)
4458 comp_cost cost
= ivs
->cand_use_cost
;
4459 cost
.cost
+= ivs
->cand_cost
;
4460 cost
.cost
+= ivopts_global_cost_for_size (data
, ivs
->n_regs
);
4465 /* Remove invariants in set INVS to set IVS. */
4468 iv_ca_set_remove_invariants (struct iv_ca
*ivs
, bitmap invs
)
4476 EXECUTE_IF_SET_IN_BITMAP (invs
, 0, iid
, bi
)
4478 ivs
->n_invariant_uses
[iid
]--;
4479 if (ivs
->n_invariant_uses
[iid
] == 0)
4484 /* Set USE not to be expressed by any candidate in IVS. */
4487 iv_ca_set_no_cp (struct ivopts_data
*data
, struct iv_ca
*ivs
,
4490 unsigned uid
= use
->id
, cid
;
4491 struct cost_pair
*cp
;
4493 cp
= ivs
->cand_for_use
[uid
];
4499 ivs
->cand_for_use
[uid
] = NULL
;
4500 ivs
->n_cand_uses
[cid
]--;
4502 if (ivs
->n_cand_uses
[cid
] == 0)
4504 bitmap_clear_bit (ivs
->cands
, cid
);
4505 /* Do not count the pseudocandidates. */
4509 ivs
->cand_cost
-= cp
->cand
->cost
;
4511 iv_ca_set_remove_invariants (ivs
, cp
->cand
->depends_on
);
4514 ivs
->cand_use_cost
= sub_costs (ivs
->cand_use_cost
, cp
->cost
);
4516 iv_ca_set_remove_invariants (ivs
, cp
->depends_on
);
4517 iv_ca_recount_cost (data
, ivs
);
4520 /* Add invariants in set INVS to set IVS. */
4523 iv_ca_set_add_invariants (struct iv_ca
*ivs
, bitmap invs
)
4531 EXECUTE_IF_SET_IN_BITMAP (invs
, 0, iid
, bi
)
4533 ivs
->n_invariant_uses
[iid
]++;
4534 if (ivs
->n_invariant_uses
[iid
] == 1)
4539 /* Set cost pair for USE in set IVS to CP. */
4542 iv_ca_set_cp (struct ivopts_data
*data
, struct iv_ca
*ivs
,
4543 struct iv_use
*use
, struct cost_pair
*cp
)
4545 unsigned uid
= use
->id
, cid
;
4547 if (ivs
->cand_for_use
[uid
] == cp
)
4550 if (ivs
->cand_for_use
[uid
])
4551 iv_ca_set_no_cp (data
, ivs
, use
);
4558 ivs
->cand_for_use
[uid
] = cp
;
4559 ivs
->n_cand_uses
[cid
]++;
4560 if (ivs
->n_cand_uses
[cid
] == 1)
4562 bitmap_set_bit (ivs
->cands
, cid
);
4563 /* Do not count the pseudocandidates. */
4567 ivs
->cand_cost
+= cp
->cand
->cost
;
4569 iv_ca_set_add_invariants (ivs
, cp
->cand
->depends_on
);
4572 ivs
->cand_use_cost
= add_costs (ivs
->cand_use_cost
, cp
->cost
);
4573 iv_ca_set_add_invariants (ivs
, cp
->depends_on
);
4574 iv_ca_recount_cost (data
, ivs
);
4578 /* Extend set IVS by expressing USE by some of the candidates in it
4582 iv_ca_add_use (struct ivopts_data
*data
, struct iv_ca
*ivs
,
4585 struct cost_pair
*best_cp
= NULL
, *cp
;
4589 gcc_assert (ivs
->upto
>= use
->id
);
4591 if (ivs
->upto
== use
->id
)
4597 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, i
, bi
)
4599 cp
= get_use_iv_cost (data
, use
, iv_cand (data
, i
));
4601 if (cheaper_cost_pair (cp
, best_cp
))
4605 iv_ca_set_cp (data
, ivs
, use
, best_cp
);
4608 /* Get cost for assignment IVS. */
4611 iv_ca_cost (struct iv_ca
*ivs
)
4613 /* This was a conditional expression but it triggered a bug in
4616 return infinite_cost
;
4621 /* Returns true if all dependences of CP are among invariants in IVS. */
4624 iv_ca_has_deps (struct iv_ca
*ivs
, struct cost_pair
*cp
)
4629 if (!cp
->depends_on
)
4632 EXECUTE_IF_SET_IN_BITMAP (cp
->depends_on
, 0, i
, bi
)
4634 if (ivs
->n_invariant_uses
[i
] == 0)
4641 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4642 it before NEXT_CHANGE. */
4644 static struct iv_ca_delta
*
4645 iv_ca_delta_add (struct iv_use
*use
, struct cost_pair
*old_cp
,
4646 struct cost_pair
*new_cp
, struct iv_ca_delta
*next_change
)
4648 struct iv_ca_delta
*change
= XNEW (struct iv_ca_delta
);
4651 change
->old_cp
= old_cp
;
4652 change
->new_cp
= new_cp
;
4653 change
->next_change
= next_change
;
4658 /* Joins two lists of changes L1 and L2. Destructive -- old lists
4661 static struct iv_ca_delta
*
4662 iv_ca_delta_join (struct iv_ca_delta
*l1
, struct iv_ca_delta
*l2
)
4664 struct iv_ca_delta
*last
;
4672 for (last
= l1
; last
->next_change
; last
= last
->next_change
)
4674 last
->next_change
= l2
;
4679 /* Returns candidate by that USE is expressed in IVS. */
4681 static struct cost_pair
*
4682 iv_ca_cand_for_use (struct iv_ca
*ivs
, struct iv_use
*use
)
4684 return ivs
->cand_for_use
[use
->id
];
4687 /* Reverse the list of changes DELTA, forming the inverse to it. */
4689 static struct iv_ca_delta
*
4690 iv_ca_delta_reverse (struct iv_ca_delta
*delta
)
4692 struct iv_ca_delta
*act
, *next
, *prev
= NULL
;
4693 struct cost_pair
*tmp
;
4695 for (act
= delta
; act
; act
= next
)
4697 next
= act
->next_change
;
4698 act
->next_change
= prev
;
4702 act
->old_cp
= act
->new_cp
;
4709 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
4710 reverted instead. */
4713 iv_ca_delta_commit (struct ivopts_data
*data
, struct iv_ca
*ivs
,
4714 struct iv_ca_delta
*delta
, bool forward
)
4716 struct cost_pair
*from
, *to
;
4717 struct iv_ca_delta
*act
;
4720 delta
= iv_ca_delta_reverse (delta
);
4722 for (act
= delta
; act
; act
= act
->next_change
)
4726 gcc_assert (iv_ca_cand_for_use (ivs
, act
->use
) == from
);
4727 iv_ca_set_cp (data
, ivs
, act
->use
, to
);
4731 iv_ca_delta_reverse (delta
);
4734 /* Returns true if CAND is used in IVS. */
4737 iv_ca_cand_used_p (struct iv_ca
*ivs
, struct iv_cand
*cand
)
4739 return ivs
->n_cand_uses
[cand
->id
] > 0;
4742 /* Returns number of induction variable candidates in the set IVS. */
4745 iv_ca_n_cands (struct iv_ca
*ivs
)
4747 return ivs
->n_cands
;
4750 /* Free the list of changes DELTA. */
4753 iv_ca_delta_free (struct iv_ca_delta
**delta
)
4755 struct iv_ca_delta
*act
, *next
;
4757 for (act
= *delta
; act
; act
= next
)
4759 next
= act
->next_change
;
4766 /* Allocates new iv candidates assignment. */
4768 static struct iv_ca
*
4769 iv_ca_new (struct ivopts_data
*data
)
4771 struct iv_ca
*nw
= XNEW (struct iv_ca
);
4775 nw
->cand_for_use
= XCNEWVEC (struct cost_pair
*, n_iv_uses (data
));
4776 nw
->n_cand_uses
= XCNEWVEC (unsigned, n_iv_cands (data
));
4777 nw
->cands
= BITMAP_ALLOC (NULL
);
4780 nw
->cand_use_cost
= zero_cost
;
4782 nw
->n_invariant_uses
= XCNEWVEC (unsigned, data
->max_inv_id
+ 1);
4783 nw
->cost
= zero_cost
;
4788 /* Free memory occupied by the set IVS. */
4791 iv_ca_free (struct iv_ca
**ivs
)
4793 free ((*ivs
)->cand_for_use
);
4794 free ((*ivs
)->n_cand_uses
);
4795 BITMAP_FREE ((*ivs
)->cands
);
4796 free ((*ivs
)->n_invariant_uses
);
4801 /* Dumps IVS to FILE. */
4804 iv_ca_dump (struct ivopts_data
*data
, FILE *file
, struct iv_ca
*ivs
)
4806 const char *pref
= " invariants ";
4808 comp_cost cost
= iv_ca_cost (ivs
);
4810 fprintf (file
, " cost %d (complexity %d)\n", cost
.cost
, cost
.complexity
);
4811 bitmap_print (file
, ivs
->cands
, " candidates ","\n");
4813 for (i
= 1; i
<= data
->max_inv_id
; i
++)
4814 if (ivs
->n_invariant_uses
[i
])
4816 fprintf (file
, "%s%d", pref
, i
);
4819 fprintf (file
, "\n");
4822 /* Try changing candidate in IVS to CAND for each use. Return cost of the
4823 new set, and store differences in DELTA. Number of induction variables
4824 in the new set is stored to N_IVS. */
4827 iv_ca_extend (struct ivopts_data
*data
, struct iv_ca
*ivs
,
4828 struct iv_cand
*cand
, struct iv_ca_delta
**delta
,
4834 struct cost_pair
*old_cp
, *new_cp
;
4837 for (i
= 0; i
< ivs
->upto
; i
++)
4839 use
= iv_use (data
, i
);
4840 old_cp
= iv_ca_cand_for_use (ivs
, use
);
4843 && old_cp
->cand
== cand
)
4846 new_cp
= get_use_iv_cost (data
, use
, cand
);
4850 if (!iv_ca_has_deps (ivs
, new_cp
))
4853 if (!cheaper_cost_pair (new_cp
, old_cp
))
4856 *delta
= iv_ca_delta_add (use
, old_cp
, new_cp
, *delta
);
4859 iv_ca_delta_commit (data
, ivs
, *delta
, true);
4860 cost
= iv_ca_cost (ivs
);
4862 *n_ivs
= iv_ca_n_cands (ivs
);
4863 iv_ca_delta_commit (data
, ivs
, *delta
, false);
4868 /* Try narrowing set IVS by removing CAND. Return the cost of
4869 the new set and store the differences in DELTA. */
4872 iv_ca_narrow (struct ivopts_data
*data
, struct iv_ca
*ivs
,
4873 struct iv_cand
*cand
, struct iv_ca_delta
**delta
)
4877 struct cost_pair
*old_cp
, *new_cp
, *cp
;
4879 struct iv_cand
*cnd
;
4883 for (i
= 0; i
< n_iv_uses (data
); i
++)
4885 use
= iv_use (data
, i
);
4887 old_cp
= iv_ca_cand_for_use (ivs
, use
);
4888 if (old_cp
->cand
!= cand
)
4893 if (data
->consider_all_candidates
)
4895 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, ci
, bi
)
4900 cnd
= iv_cand (data
, ci
);
4902 cp
= get_use_iv_cost (data
, use
, cnd
);
4905 if (!iv_ca_has_deps (ivs
, cp
))
4908 if (!cheaper_cost_pair (cp
, new_cp
))
4916 EXECUTE_IF_AND_IN_BITMAP (use
->related_cands
, ivs
->cands
, 0, ci
, bi
)
4921 cnd
= iv_cand (data
, ci
);
4923 cp
= get_use_iv_cost (data
, use
, cnd
);
4926 if (!iv_ca_has_deps (ivs
, cp
))
4929 if (!cheaper_cost_pair (cp
, new_cp
))
4938 iv_ca_delta_free (delta
);
4939 return infinite_cost
;
4942 *delta
= iv_ca_delta_add (use
, old_cp
, new_cp
, *delta
);
4945 iv_ca_delta_commit (data
, ivs
, *delta
, true);
4946 cost
= iv_ca_cost (ivs
);
4947 iv_ca_delta_commit (data
, ivs
, *delta
, false);
4952 /* Try optimizing the set of candidates IVS by removing candidates different
4953 from to EXCEPT_CAND from it. Return cost of the new set, and store
4954 differences in DELTA. */
4957 iv_ca_prune (struct ivopts_data
*data
, struct iv_ca
*ivs
,
4958 struct iv_cand
*except_cand
, struct iv_ca_delta
**delta
)
4961 struct iv_ca_delta
*act_delta
, *best_delta
;
4963 comp_cost best_cost
, acost
;
4964 struct iv_cand
*cand
;
4967 best_cost
= iv_ca_cost (ivs
);
4969 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, i
, bi
)
4971 cand
= iv_cand (data
, i
);
4973 if (cand
== except_cand
)
4976 acost
= iv_ca_narrow (data
, ivs
, cand
, &act_delta
);
4978 if (compare_costs (acost
, best_cost
) < 0)
4981 iv_ca_delta_free (&best_delta
);
4982 best_delta
= act_delta
;
4985 iv_ca_delta_free (&act_delta
);
4994 /* Recurse to possibly remove other unnecessary ivs. */
4995 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
4996 best_cost
= iv_ca_prune (data
, ivs
, except_cand
, delta
);
4997 iv_ca_delta_commit (data
, ivs
, best_delta
, false);
4998 *delta
= iv_ca_delta_join (best_delta
, *delta
);
5002 /* Tries to extend the sets IVS in the best possible way in order
5003 to express the USE. */
5006 try_add_cand_for (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5009 comp_cost best_cost
, act_cost
;
5012 struct iv_cand
*cand
;
5013 struct iv_ca_delta
*best_delta
= NULL
, *act_delta
;
5014 struct cost_pair
*cp
;
5016 iv_ca_add_use (data
, ivs
, use
);
5017 best_cost
= iv_ca_cost (ivs
);
5019 cp
= iv_ca_cand_for_use (ivs
, use
);
5022 best_delta
= iv_ca_delta_add (use
, NULL
, cp
, NULL
);
5023 iv_ca_set_no_cp (data
, ivs
, use
);
5026 /* First try important candidates not based on any memory object. Only if
5027 this fails, try the specific ones. Rationale -- in loops with many
5028 variables the best choice often is to use just one generic biv. If we
5029 added here many ivs specific to the uses, the optimization algorithm later
5030 would be likely to get stuck in a local minimum, thus causing us to create
5031 too many ivs. The approach from few ivs to more seems more likely to be
5032 successful -- starting from few ivs, replacing an expensive use by a
5033 specific iv should always be a win. */
5034 EXECUTE_IF_SET_IN_BITMAP (data
->important_candidates
, 0, i
, bi
)
5036 cand
= iv_cand (data
, i
);
5038 if (cand
->iv
->base_object
!= NULL_TREE
)
5041 if (iv_ca_cand_used_p (ivs
, cand
))
5044 cp
= get_use_iv_cost (data
, use
, cand
);
5048 iv_ca_set_cp (data
, ivs
, use
, cp
);
5049 act_cost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, NULL
);
5050 iv_ca_set_no_cp (data
, ivs
, use
);
5051 act_delta
= iv_ca_delta_add (use
, NULL
, cp
, act_delta
);
5053 if (compare_costs (act_cost
, best_cost
) < 0)
5055 best_cost
= act_cost
;
5057 iv_ca_delta_free (&best_delta
);
5058 best_delta
= act_delta
;
5061 iv_ca_delta_free (&act_delta
);
5064 if (infinite_cost_p (best_cost
))
5066 for (i
= 0; i
< use
->n_map_members
; i
++)
5068 cp
= use
->cost_map
+ i
;
5073 /* Already tried this. */
5074 if (cand
->important
&& cand
->iv
->base_object
== NULL_TREE
)
5077 if (iv_ca_cand_used_p (ivs
, cand
))
5081 iv_ca_set_cp (data
, ivs
, use
, cp
);
5082 act_cost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, NULL
);
5083 iv_ca_set_no_cp (data
, ivs
, use
);
5084 act_delta
= iv_ca_delta_add (use
, iv_ca_cand_for_use (ivs
, use
),
5087 if (compare_costs (act_cost
, best_cost
) < 0)
5089 best_cost
= act_cost
;
5092 iv_ca_delta_free (&best_delta
);
5093 best_delta
= act_delta
;
5096 iv_ca_delta_free (&act_delta
);
5100 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
5101 iv_ca_delta_free (&best_delta
);
5103 return !infinite_cost_p (best_cost
);
5106 /* Finds an initial assignment of candidates to uses. */
5108 static struct iv_ca
*
5109 get_initial_solution (struct ivopts_data
*data
)
5111 struct iv_ca
*ivs
= iv_ca_new (data
);
5114 for (i
= 0; i
< n_iv_uses (data
); i
++)
5115 if (!try_add_cand_for (data
, ivs
, iv_use (data
, i
)))
5124 /* Tries to improve set of induction variables IVS. */
5127 try_improve_iv_set (struct ivopts_data
*data
, struct iv_ca
*ivs
)
5130 comp_cost acost
, best_cost
= iv_ca_cost (ivs
);
5131 struct iv_ca_delta
*best_delta
= NULL
, *act_delta
, *tmp_delta
;
5132 struct iv_cand
*cand
;
5134 /* Try extending the set of induction variables by one. */
5135 for (i
= 0; i
< n_iv_cands (data
); i
++)
5137 cand
= iv_cand (data
, i
);
5139 if (iv_ca_cand_used_p (ivs
, cand
))
5142 acost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, &n_ivs
);
5146 /* If we successfully added the candidate and the set is small enough,
5147 try optimizing it by removing other candidates. */
5148 if (n_ivs
<= ALWAYS_PRUNE_CAND_SET_BOUND
)
5150 iv_ca_delta_commit (data
, ivs
, act_delta
, true);
5151 acost
= iv_ca_prune (data
, ivs
, cand
, &tmp_delta
);
5152 iv_ca_delta_commit (data
, ivs
, act_delta
, false);
5153 act_delta
= iv_ca_delta_join (act_delta
, tmp_delta
);
5156 if (compare_costs (acost
, best_cost
) < 0)
5159 iv_ca_delta_free (&best_delta
);
5160 best_delta
= act_delta
;
5163 iv_ca_delta_free (&act_delta
);
5168 /* Try removing the candidates from the set instead. */
5169 best_cost
= iv_ca_prune (data
, ivs
, NULL
, &best_delta
);
5171 /* Nothing more we can do. */
5176 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
5177 gcc_assert (compare_costs (best_cost
, iv_ca_cost (ivs
)) == 0);
5178 iv_ca_delta_free (&best_delta
);
5182 /* Attempts to find the optimal set of induction variables. We do simple
5183 greedy heuristic -- we try to replace at most one candidate in the selected
5184 solution and remove the unused ivs while this improves the cost. */
5186 static struct iv_ca
*
5187 find_optimal_iv_set (struct ivopts_data
*data
)
5193 /* Get the initial solution. */
5194 set
= get_initial_solution (data
);
5197 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5198 fprintf (dump_file
, "Unable to substitute for ivs, failed.\n");
5202 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5204 fprintf (dump_file
, "Initial set of candidates:\n");
5205 iv_ca_dump (data
, dump_file
, set
);
5208 while (try_improve_iv_set (data
, set
))
5210 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5212 fprintf (dump_file
, "Improved to:\n");
5213 iv_ca_dump (data
, dump_file
, set
);
5217 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5219 comp_cost cost
= iv_ca_cost (set
);
5220 fprintf (dump_file
, "Final cost %d (complexity %d)\n\n", cost
.cost
, cost
.complexity
);
5223 for (i
= 0; i
< n_iv_uses (data
); i
++)
5225 use
= iv_use (data
, i
);
5226 use
->selected
= iv_ca_cand_for_use (set
, use
)->cand
;
5232 /* Creates a new induction variable corresponding to CAND. */
5235 create_new_iv (struct ivopts_data
*data
, struct iv_cand
*cand
)
5237 gimple_stmt_iterator incr_pos
;
5247 incr_pos
= gsi_last_bb (ip_normal_pos (data
->current_loop
));
5251 incr_pos
= gsi_last_bb (ip_end_pos (data
->current_loop
));
5259 incr_pos
= gsi_for_stmt (cand
->incremented_at
);
5263 /* Mark that the iv is preserved. */
5264 name_info (data
, cand
->var_before
)->preserve_biv
= true;
5265 name_info (data
, cand
->var_after
)->preserve_biv
= true;
5267 /* Rewrite the increment so that it uses var_before directly. */
5268 find_interesting_uses_op (data
, cand
->var_after
)->selected
= cand
;
5273 gimple_add_tmp_var (cand
->var_before
);
5274 add_referenced_var (cand
->var_before
);
5276 base
= unshare_expr (cand
->iv
->base
);
5278 create_iv (base
, unshare_expr (cand
->iv
->step
),
5279 cand
->var_before
, data
->current_loop
,
5280 &incr_pos
, after
, &cand
->var_before
, &cand
->var_after
);
5283 /* Creates new induction variables described in SET. */
5286 create_new_ivs (struct ivopts_data
*data
, struct iv_ca
*set
)
5289 struct iv_cand
*cand
;
5292 EXECUTE_IF_SET_IN_BITMAP (set
->cands
, 0, i
, bi
)
5294 cand
= iv_cand (data
, i
);
5295 create_new_iv (data
, cand
);
5299 /* Returns the phi-node in BB with result RESULT. */
5302 get_phi_with_result (basic_block bb
, tree result
)
5304 gimple_stmt_iterator i
= gsi_start_phis (bb
);
5306 for (; !gsi_end_p (i
); gsi_next (&i
))
5307 if (gimple_phi_result (gsi_stmt (i
)) == result
)
5308 return gsi_stmt (i
);
5315 /* Removes statement STMT (real or a phi node). If INCLUDING_DEFINED_NAME
5316 is true, remove also the ssa name defined by the statement. */
5319 remove_statement (gimple stmt
, bool including_defined_name
)
5321 if (gimple_code (stmt
) == GIMPLE_PHI
)
5323 gimple bb_phi
= get_phi_with_result (gimple_bb (stmt
),
5324 gimple_phi_result (stmt
));
5325 gimple_stmt_iterator bsi
= gsi_for_stmt (bb_phi
);
5326 remove_phi_node (&bsi
, including_defined_name
);
5330 gimple_stmt_iterator bsi
= gsi_for_stmt (stmt
);
5331 gsi_remove (&bsi
, true);
5332 release_defs (stmt
);
5336 /* Rewrites USE (definition of iv used in a nonlinear expression)
5337 using candidate CAND. */
5340 rewrite_use_nonlinear_expr (struct ivopts_data
*data
,
5341 struct iv_use
*use
, struct iv_cand
*cand
)
5346 gimple_stmt_iterator bsi
;
5348 /* An important special case -- if we are asked to express value of
5349 the original iv by itself, just exit; there is no need to
5350 introduce a new computation (that might also need casting the
5351 variable to unsigned and back). */
5352 if (cand
->pos
== IP_ORIGINAL
5353 && cand
->incremented_at
== use
->stmt
)
5355 tree step
, ctype
, utype
;
5356 enum tree_code incr_code
= PLUS_EXPR
, old_code
;
5358 gcc_assert (is_gimple_assign (use
->stmt
));
5359 gcc_assert (gimple_assign_lhs (use
->stmt
) == cand
->var_after
);
5361 step
= cand
->iv
->step
;
5362 ctype
= TREE_TYPE (step
);
5363 utype
= TREE_TYPE (cand
->var_after
);
5364 if (TREE_CODE (step
) == NEGATE_EXPR
)
5366 incr_code
= MINUS_EXPR
;
5367 step
= TREE_OPERAND (step
, 0);
5370 /* Check whether we may leave the computation unchanged.
5371 This is the case only if it does not rely on other
5372 computations in the loop -- otherwise, the computation
5373 we rely upon may be removed in remove_unused_ivs,
5374 thus leading to ICE. */
5375 old_code
= gimple_assign_rhs_code (use
->stmt
);
5376 if (old_code
== PLUS_EXPR
5377 || old_code
== MINUS_EXPR
5378 || old_code
== POINTER_PLUS_EXPR
)
5380 if (gimple_assign_rhs1 (use
->stmt
) == cand
->var_before
)
5381 op
= gimple_assign_rhs2 (use
->stmt
);
5382 else if (old_code
!= MINUS_EXPR
5383 && gimple_assign_rhs2 (use
->stmt
) == cand
->var_before
)
5384 op
= gimple_assign_rhs1 (use
->stmt
);
5392 && (TREE_CODE (op
) == INTEGER_CST
5393 || operand_equal_p (op
, step
, 0)))
5396 /* Otherwise, add the necessary computations to express
5398 op
= fold_convert (ctype
, cand
->var_before
);
5399 comp
= fold_convert (utype
,
5400 build2 (incr_code
, ctype
, op
,
5401 unshare_expr (step
)));
5405 comp
= get_computation (data
->current_loop
, use
, cand
);
5406 gcc_assert (comp
!= NULL_TREE
);
5409 switch (gimple_code (use
->stmt
))
5412 tgt
= PHI_RESULT (use
->stmt
);
5414 /* If we should keep the biv, do not replace it. */
5415 if (name_info (data
, tgt
)->preserve_biv
)
5418 bsi
= gsi_after_labels (gimple_bb (use
->stmt
));
5422 tgt
= gimple_assign_lhs (use
->stmt
);
5423 bsi
= gsi_for_stmt (use
->stmt
);
5430 op
= force_gimple_operand_gsi (&bsi
, comp
, false, SSA_NAME_VAR (tgt
),
5431 true, GSI_SAME_STMT
);
5433 if (gimple_code (use
->stmt
) == GIMPLE_PHI
)
5435 ass
= gimple_build_assign (tgt
, op
);
5436 gsi_insert_before (&bsi
, ass
, GSI_SAME_STMT
);
5437 remove_statement (use
->stmt
, false);
5441 gimple_assign_set_rhs_from_tree (&bsi
, op
);
5442 use
->stmt
= gsi_stmt (bsi
);
5446 /* Replaces ssa name in index IDX by its basic variable. Callback for
5450 idx_remove_ssa_names (tree base
, tree
*idx
,
5451 void *data ATTRIBUTE_UNUSED
)
5455 if (TREE_CODE (*idx
) == SSA_NAME
)
5456 *idx
= SSA_NAME_VAR (*idx
);
5458 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
5460 op
= &TREE_OPERAND (base
, 2);
5462 && TREE_CODE (*op
) == SSA_NAME
)
5463 *op
= SSA_NAME_VAR (*op
);
5464 op
= &TREE_OPERAND (base
, 3);
5466 && TREE_CODE (*op
) == SSA_NAME
)
5467 *op
= SSA_NAME_VAR (*op
);
5473 /* Unshares REF and replaces ssa names inside it by their basic variables. */
5476 unshare_and_remove_ssa_names (tree ref
)
5478 ref
= unshare_expr (ref
);
5479 for_each_index (&ref
, idx_remove_ssa_names
, NULL
);
5484 /* Copies the reference information from OLD_REF to NEW_REF. */
5487 copy_ref_info (tree new_ref
, tree old_ref
)
5489 if (TREE_CODE (old_ref
) == TARGET_MEM_REF
)
5490 copy_mem_ref_info (new_ref
, old_ref
);
5492 TMR_ORIGINAL (new_ref
) = unshare_and_remove_ssa_names (old_ref
);
5495 /* Rewrites USE (address that is an iv) using candidate CAND. */
5498 rewrite_use_address (struct ivopts_data
*data
,
5499 struct iv_use
*use
, struct iv_cand
*cand
)
5502 gimple_stmt_iterator bsi
= gsi_for_stmt (use
->stmt
);
5506 ok
= get_computation_aff (data
->current_loop
, use
, cand
, use
->stmt
, &aff
);
5508 unshare_aff_combination (&aff
);
5510 ref
= create_mem_ref (&bsi
, TREE_TYPE (*use
->op_p
), &aff
, data
->speed
);
5511 copy_ref_info (ref
, *use
->op_p
);
5515 /* Rewrites USE (the condition such that one of the arguments is an iv) using
5519 rewrite_use_compare (struct ivopts_data
*data
,
5520 struct iv_use
*use
, struct iv_cand
*cand
)
5522 tree comp
, *var_p
, op
, bound
;
5523 gimple_stmt_iterator bsi
= gsi_for_stmt (use
->stmt
);
5524 enum tree_code compare
;
5525 struct cost_pair
*cp
= get_use_iv_cost (data
, use
, cand
);
5531 tree var
= var_at_stmt (data
->current_loop
, cand
, use
->stmt
);
5532 tree var_type
= TREE_TYPE (var
);
5535 compare
= iv_elimination_compare (data
, use
);
5536 bound
= unshare_expr (fold_convert (var_type
, bound
));
5537 op
= force_gimple_operand (bound
, &stmts
, true, NULL_TREE
);
5539 gsi_insert_seq_on_edge_immediate (
5540 loop_preheader_edge (data
->current_loop
),
5543 gimple_cond_set_lhs (use
->stmt
, var
);
5544 gimple_cond_set_code (use
->stmt
, compare
);
5545 gimple_cond_set_rhs (use
->stmt
, op
);
5549 /* The induction variable elimination failed; just express the original
5551 comp
= get_computation (data
->current_loop
, use
, cand
);
5552 gcc_assert (comp
!= NULL_TREE
);
5554 ok
= extract_cond_operands (data
, use
->stmt
, &var_p
, NULL
, NULL
, NULL
);
5557 *var_p
= force_gimple_operand_gsi (&bsi
, comp
, true, SSA_NAME_VAR (*var_p
),
5558 true, GSI_SAME_STMT
);
5561 /* Rewrites USE using candidate CAND. */
5564 rewrite_use (struct ivopts_data
*data
, struct iv_use
*use
, struct iv_cand
*cand
)
5568 case USE_NONLINEAR_EXPR
:
5569 rewrite_use_nonlinear_expr (data
, use
, cand
);
5573 rewrite_use_address (data
, use
, cand
);
5577 rewrite_use_compare (data
, use
, cand
);
5584 update_stmt (use
->stmt
);
5587 /* Rewrite the uses using the selected induction variables. */
5590 rewrite_uses (struct ivopts_data
*data
)
5593 struct iv_cand
*cand
;
5596 for (i
= 0; i
< n_iv_uses (data
); i
++)
5598 use
= iv_use (data
, i
);
5599 cand
= use
->selected
;
5602 rewrite_use (data
, use
, cand
);
5606 /* Removes the ivs that are not used after rewriting. */
5609 remove_unused_ivs (struct ivopts_data
*data
)
5614 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, j
, bi
)
5616 struct version_info
*info
;
5618 info
= ver_info (data
, j
);
5620 && !integer_zerop (info
->iv
->step
)
5622 && !info
->iv
->have_use_for
5623 && !info
->preserve_biv
)
5624 remove_statement (SSA_NAME_DEF_STMT (info
->iv
->ssa_name
), true);
5628 /* Frees data allocated by the optimization of a single loop. */
5631 free_loop_data (struct ivopts_data
*data
)
5639 pointer_map_destroy (data
->niters
);
5640 data
->niters
= NULL
;
5643 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
5645 struct version_info
*info
;
5647 info
= ver_info (data
, i
);
5651 info
->has_nonlin_use
= false;
5652 info
->preserve_biv
= false;
5655 bitmap_clear (data
->relevant
);
5656 bitmap_clear (data
->important_candidates
);
5658 for (i
= 0; i
< n_iv_uses (data
); i
++)
5660 struct iv_use
*use
= iv_use (data
, i
);
5663 BITMAP_FREE (use
->related_cands
);
5664 for (j
= 0; j
< use
->n_map_members
; j
++)
5665 if (use
->cost_map
[j
].depends_on
)
5666 BITMAP_FREE (use
->cost_map
[j
].depends_on
);
5667 free (use
->cost_map
);
5670 VEC_truncate (iv_use_p
, data
->iv_uses
, 0);
5672 for (i
= 0; i
< n_iv_cands (data
); i
++)
5674 struct iv_cand
*cand
= iv_cand (data
, i
);
5678 if (cand
->depends_on
)
5679 BITMAP_FREE (cand
->depends_on
);
5682 VEC_truncate (iv_cand_p
, data
->iv_candidates
, 0);
5684 if (data
->version_info_size
< num_ssa_names
)
5686 data
->version_info_size
= 2 * num_ssa_names
;
5687 free (data
->version_info
);
5688 data
->version_info
= XCNEWVEC (struct version_info
, data
->version_info_size
);
5691 data
->max_inv_id
= 0;
5693 for (i
= 0; VEC_iterate (tree
, decl_rtl_to_reset
, i
, obj
); i
++)
5694 SET_DECL_RTL (obj
, NULL_RTX
);
5696 VEC_truncate (tree
, decl_rtl_to_reset
, 0);
5699 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
5703 tree_ssa_iv_optimize_finalize (struct ivopts_data
*data
)
5705 free_loop_data (data
);
5706 free (data
->version_info
);
5707 BITMAP_FREE (data
->relevant
);
5708 BITMAP_FREE (data
->important_candidates
);
5710 VEC_free (tree
, heap
, decl_rtl_to_reset
);
5711 VEC_free (iv_use_p
, heap
, data
->iv_uses
);
5712 VEC_free (iv_cand_p
, heap
, data
->iv_candidates
);
5715 /* Optimizes the LOOP. Returns true if anything changed. */
5718 tree_ssa_iv_optimize_loop (struct ivopts_data
*data
, struct loop
*loop
)
5720 bool changed
= false;
5721 struct iv_ca
*iv_ca
;
5725 gcc_assert (!data
->niters
);
5726 data
->current_loop
= loop
;
5727 data
->speed
= optimize_loop_for_speed_p (loop
);
5729 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5731 fprintf (dump_file
, "Processing loop %d\n", loop
->num
);
5733 exit
= single_dom_exit (loop
);
5736 fprintf (dump_file
, " single exit %d -> %d, exit condition ",
5737 exit
->src
->index
, exit
->dest
->index
);
5738 print_gimple_stmt (dump_file
, last_stmt (exit
->src
), 0, TDF_SLIM
);
5739 fprintf (dump_file
, "\n");
5742 fprintf (dump_file
, "\n");
5745 body
= get_loop_body (loop
);
5746 renumber_gimple_stmt_uids_in_blocks (body
, loop
->num_nodes
);
5749 /* For each ssa name determines whether it behaves as an induction variable
5751 if (!find_induction_variables (data
))
5754 /* Finds interesting uses (item 1). */
5755 find_interesting_uses (data
);
5756 if (n_iv_uses (data
) > MAX_CONSIDERED_USES
)
5759 /* Finds candidates for the induction variables (item 2). */
5760 find_iv_candidates (data
);
5762 /* Calculates the costs (item 3, part 1). */
5763 determine_iv_costs (data
);
5764 determine_use_iv_costs (data
);
5765 determine_set_costs (data
);
5767 /* Find the optimal set of induction variables (item 3, part 2). */
5768 iv_ca
= find_optimal_iv_set (data
);
5773 /* Create the new induction variables (item 4, part 1). */
5774 create_new_ivs (data
, iv_ca
);
5775 iv_ca_free (&iv_ca
);
5777 /* Rewrite the uses (item 4, part 2). */
5778 rewrite_uses (data
);
5780 /* Remove the ivs that are unused after rewriting. */
5781 remove_unused_ivs (data
);
5783 /* We have changed the structure of induction variables; it might happen
5784 that definitions in the scev database refer to some of them that were
5789 free_loop_data (data
);
5794 /* Main entry point. Optimizes induction variables in loops. */
5797 tree_ssa_iv_optimize (void)
5800 struct ivopts_data data
;
5803 tree_ssa_iv_optimize_init (&data
);
5805 /* Optimize the loops starting with the innermost ones. */
5806 FOR_EACH_LOOP (li
, loop
, LI_FROM_INNERMOST
)
5808 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5809 flow_loop_dump (loop
, dump_file
, NULL
, 1);
5811 tree_ssa_iv_optimize_loop (&data
, loop
);
5814 tree_ssa_iv_optimize_finalize (&data
);