1 /* Induction variable optimizations.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
3 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 3, or (at your option) any
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* This pass tries to find the optimal set of induction variables for the loop.
22 It optimizes just the basic linear induction variables (although adding
23 support for other types should not be too hard). It includes the
24 optimizations commonly known as strength reduction, induction variable
25 coalescing and induction variable elimination. It does it in the
28 1) The interesting uses of induction variables are found. This includes
30 -- uses of induction variables in non-linear expressions
31 -- addresses of arrays
32 -- comparisons of induction variables
34 2) Candidates for the induction variables are found. This includes
36 -- old induction variables
37 -- the variables defined by expressions derived from the "interesting
40 3) The optimal (w.r. to a cost function) set of variables is chosen. The
41 cost function assigns a cost to sets of induction variables and consists
44 -- The use costs. Each of the interesting uses chooses the best induction
45 variable in the set and adds its cost to the sum. The cost reflects
46 the time spent on modifying the induction variables value to be usable
47 for the given purpose (adding base and offset for arrays, etc.).
48 -- The variable costs. Each of the variables has a cost assigned that
49 reflects the costs associated with incrementing the value of the
50 variable. The original variables are somewhat preferred.
51 -- The set cost. Depending on the size of the set, extra cost may be
52 added to reflect register pressure.
54 All the costs are defined in a machine-specific way, using the target
55 hooks and machine descriptions to determine them.
57 4) The trees are transformed to use the new variables, the dead code is
60 All of this is done loop by loop. Doing it globally is theoretically
61 possible, it might give a better performance and it might enable us
62 to decide costs more precisely, but getting all the interactions right
63 would be complicated. */
67 #include "coretypes.h"
71 #include "basic-block.h"
73 #include "tree-pretty-print.h"
74 #include "gimple-pretty-print.h"
75 #include "tree-flow.h"
76 #include "tree-dump.h"
79 #include "tree-pass.h"
81 #include "insn-config.h"
83 #include "pointer-set.h"
85 #include "tree-chrec.h"
86 #include "tree-scalar-evolution.h"
89 #include "langhooks.h"
90 #include "tree-affine.h"
92 #include "tree-inline.h"
93 #include "tree-ssa-propagate.h"
95 /* FIXME: Expressions are expanded to RTL in this pass to determine the
96 cost of different addressing modes. This should be moved to a TBD
97 interface between the GIMPLE and RTL worlds. */
100 /* The infinite cost. */
101 #define INFTY 10000000
103 #define AVG_LOOP_NITER(LOOP) 5
105 /* Returns the expected number of loop iterations for LOOP.
106 The average trip count is computed from profile data if it
109 static inline HOST_WIDE_INT
110 avg_loop_niter (struct loop
*loop
)
112 HOST_WIDE_INT niter
= estimated_loop_iterations_int (loop
, false);
114 return AVG_LOOP_NITER (loop
);
119 /* Representation of the induction variable. */
122 tree base
; /* Initial value of the iv. */
123 tree base_object
; /* A memory object to that the induction variable points. */
124 tree step
; /* Step of the iv (constant only). */
125 tree ssa_name
; /* The ssa name with the value. */
126 bool biv_p
; /* Is it a biv? */
127 bool have_use_for
; /* Do we already have a use for it? */
128 unsigned use_id
; /* The identifier in the use if it is the case. */
131 /* Per-ssa version information (induction variable descriptions, etc.). */
134 tree name
; /* The ssa name. */
135 struct iv
*iv
; /* Induction variable description. */
136 bool has_nonlin_use
; /* For a loop-level invariant, whether it is used in
137 an expression that is not an induction variable. */
138 bool preserve_biv
; /* For the original biv, whether to preserve it. */
139 unsigned inv_id
; /* Id of an invariant. */
145 USE_NONLINEAR_EXPR
, /* Use in a nonlinear expression. */
146 USE_ADDRESS
, /* Use in an address. */
147 USE_COMPARE
/* Use is a compare. */
150 /* Cost of a computation. */
153 int cost
; /* The runtime cost. */
154 unsigned complexity
; /* The estimate of the complexity of the code for
155 the computation (in no concrete units --
156 complexity field should be larger for more
157 complex expressions and addressing modes). */
160 static const comp_cost zero_cost
= {0, 0};
161 static const comp_cost infinite_cost
= {INFTY
, INFTY
};
163 /* The candidate - cost pair. */
166 struct iv_cand
*cand
; /* The candidate. */
167 comp_cost cost
; /* The cost. */
168 bitmap depends_on
; /* The list of invariants that have to be
170 tree value
; /* For final value elimination, the expression for
171 the final value of the iv. For iv elimination,
172 the new bound to compare with. */
173 int inv_expr_id
; /* Loop invariant expression id. */
179 unsigned id
; /* The id of the use. */
180 enum use_type type
; /* Type of the use. */
181 struct iv
*iv
; /* The induction variable it is based on. */
182 gimple stmt
; /* Statement in that it occurs. */
183 tree
*op_p
; /* The place where it occurs. */
184 bitmap related_cands
; /* The set of "related" iv candidates, plus the common
187 unsigned n_map_members
; /* Number of candidates in the cost_map list. */
188 struct cost_pair
*cost_map
;
189 /* The costs wrto the iv candidates. */
191 struct iv_cand
*selected
;
192 /* The selected candidate. */
195 /* The position where the iv is computed. */
198 IP_NORMAL
, /* At the end, just before the exit condition. */
199 IP_END
, /* At the end of the latch block. */
200 IP_BEFORE_USE
, /* Immediately before a specific use. */
201 IP_AFTER_USE
, /* Immediately after a specific use. */
202 IP_ORIGINAL
/* The original biv. */
205 /* The induction variable candidate. */
208 unsigned id
; /* The number of the candidate. */
209 bool important
; /* Whether this is an "important" candidate, i.e. such
210 that it should be considered by all uses. */
211 ENUM_BITFIELD(iv_position
) pos
: 8; /* Where it is computed. */
212 gimple incremented_at
;/* For original biv, the statement where it is
214 tree var_before
; /* The variable used for it before increment. */
215 tree var_after
; /* The variable used for it after increment. */
216 struct iv
*iv
; /* The value of the candidate. NULL for
217 "pseudocandidate" used to indicate the possibility
218 to replace the final value of an iv by direct
219 computation of the value. */
220 unsigned cost
; /* Cost of the candidate. */
221 unsigned cost_step
; /* Cost of the candidate's increment operation. */
222 struct iv_use
*ainc_use
; /* For IP_{BEFORE,AFTER}_USE candidates, the place
223 where it is incremented. */
224 bitmap depends_on
; /* The list of invariants that are used in step of the
228 /* Loop invariant expression hashtable entry. */
229 struct iv_inv_expr_ent
236 /* The data used by the induction variable optimizations. */
238 typedef struct iv_use
*iv_use_p
;
240 DEF_VEC_ALLOC_P(iv_use_p
,heap
);
242 typedef struct iv_cand
*iv_cand_p
;
243 DEF_VEC_P(iv_cand_p
);
244 DEF_VEC_ALLOC_P(iv_cand_p
,heap
);
248 /* The currently optimized loop. */
249 struct loop
*current_loop
;
251 /* Numbers of iterations for all exits of the current loop. */
252 struct pointer_map_t
*niters
;
254 /* Number of registers used in it. */
257 /* The size of version_info array allocated. */
258 unsigned version_info_size
;
260 /* The array of information for the ssa names. */
261 struct version_info
*version_info
;
263 /* The hashtable of loop invariant expressions created
267 /* Loop invariant expression id. */
270 /* The bitmap of indices in version_info whose value was changed. */
273 /* The uses of induction variables. */
274 VEC(iv_use_p
,heap
) *iv_uses
;
276 /* The candidates. */
277 VEC(iv_cand_p
,heap
) *iv_candidates
;
279 /* A bitmap of important candidates. */
280 bitmap important_candidates
;
282 /* The maximum invariant id. */
285 /* Whether to consider just related and important candidates when replacing a
287 bool consider_all_candidates
;
289 /* Are we optimizing for speed? */
292 /* Whether the loop body includes any function calls. */
293 bool body_includes_call
;
296 /* An assignment of iv candidates to uses. */
300 /* The number of uses covered by the assignment. */
303 /* Number of uses that cannot be expressed by the candidates in the set. */
306 /* Candidate assigned to a use, together with the related costs. */
307 struct cost_pair
**cand_for_use
;
309 /* Number of times each candidate is used. */
310 unsigned *n_cand_uses
;
312 /* The candidates used. */
315 /* The number of candidates in the set. */
318 /* Total number of registers needed. */
321 /* Total cost of expressing uses. */
322 comp_cost cand_use_cost
;
324 /* Total cost of candidates. */
327 /* Number of times each invariant is used. */
328 unsigned *n_invariant_uses
;
330 /* The array holding the number of uses of each loop
331 invariant expressions created by ivopt. */
332 unsigned *used_inv_expr
;
334 /* The number of created loop invariants. */
335 unsigned num_used_inv_expr
;
337 /* Total cost of the assignment. */
341 /* Difference of two iv candidate assignments. */
348 /* An old assignment (for rollback purposes). */
349 struct cost_pair
*old_cp
;
351 /* A new assignment. */
352 struct cost_pair
*new_cp
;
354 /* Next change in the list. */
355 struct iv_ca_delta
*next_change
;
358 /* Bound on number of candidates below that all candidates are considered. */
360 #define CONSIDER_ALL_CANDIDATES_BOUND \
361 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
363 /* If there are more iv occurrences, we just give up (it is quite unlikely that
364 optimizing such a loop would help, and it would take ages). */
366 #define MAX_CONSIDERED_USES \
367 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
369 /* If there are at most this number of ivs in the set, try removing unnecessary
370 ivs from the set always. */
372 #define ALWAYS_PRUNE_CAND_SET_BOUND \
373 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
375 /* The list of trees for that the decl_rtl field must be reset is stored
378 static VEC(tree
,heap
) *decl_rtl_to_reset
;
380 /* Number of uses recorded in DATA. */
382 static inline unsigned
383 n_iv_uses (struct ivopts_data
*data
)
385 return VEC_length (iv_use_p
, data
->iv_uses
);
388 /* Ith use recorded in DATA. */
390 static inline struct iv_use
*
391 iv_use (struct ivopts_data
*data
, unsigned i
)
393 return VEC_index (iv_use_p
, data
->iv_uses
, i
);
396 /* Number of candidates recorded in DATA. */
398 static inline unsigned
399 n_iv_cands (struct ivopts_data
*data
)
401 return VEC_length (iv_cand_p
, data
->iv_candidates
);
404 /* Ith candidate recorded in DATA. */
406 static inline struct iv_cand
*
407 iv_cand (struct ivopts_data
*data
, unsigned i
)
409 return VEC_index (iv_cand_p
, data
->iv_candidates
, i
);
412 /* The single loop exit if it dominates the latch, NULL otherwise. */
415 single_dom_exit (struct loop
*loop
)
417 edge exit
= single_exit (loop
);
422 if (!just_once_each_iteration_p (loop
, exit
->src
))
428 /* Dumps information about the induction variable IV to FILE. */
430 extern void dump_iv (FILE *, struct iv
*);
432 dump_iv (FILE *file
, struct iv
*iv
)
436 fprintf (file
, "ssa name ");
437 print_generic_expr (file
, iv
->ssa_name
, TDF_SLIM
);
438 fprintf (file
, "\n");
441 fprintf (file
, " type ");
442 print_generic_expr (file
, TREE_TYPE (iv
->base
), TDF_SLIM
);
443 fprintf (file
, "\n");
447 fprintf (file
, " base ");
448 print_generic_expr (file
, iv
->base
, TDF_SLIM
);
449 fprintf (file
, "\n");
451 fprintf (file
, " step ");
452 print_generic_expr (file
, iv
->step
, TDF_SLIM
);
453 fprintf (file
, "\n");
457 fprintf (file
, " invariant ");
458 print_generic_expr (file
, iv
->base
, TDF_SLIM
);
459 fprintf (file
, "\n");
464 fprintf (file
, " base object ");
465 print_generic_expr (file
, iv
->base_object
, TDF_SLIM
);
466 fprintf (file
, "\n");
470 fprintf (file
, " is a biv\n");
473 /* Dumps information about the USE to FILE. */
475 extern void dump_use (FILE *, struct iv_use
*);
477 dump_use (FILE *file
, struct iv_use
*use
)
479 fprintf (file
, "use %d\n", use
->id
);
483 case USE_NONLINEAR_EXPR
:
484 fprintf (file
, " generic\n");
488 fprintf (file
, " address\n");
492 fprintf (file
, " compare\n");
499 fprintf (file
, " in statement ");
500 print_gimple_stmt (file
, use
->stmt
, 0, 0);
501 fprintf (file
, "\n");
503 fprintf (file
, " at position ");
505 print_generic_expr (file
, *use
->op_p
, TDF_SLIM
);
506 fprintf (file
, "\n");
508 dump_iv (file
, use
->iv
);
510 if (use
->related_cands
)
512 fprintf (file
, " related candidates ");
513 dump_bitmap (file
, use
->related_cands
);
517 /* Dumps information about the uses to FILE. */
519 extern void dump_uses (FILE *, struct ivopts_data
*);
521 dump_uses (FILE *file
, struct ivopts_data
*data
)
526 for (i
= 0; i
< n_iv_uses (data
); i
++)
528 use
= iv_use (data
, i
);
530 dump_use (file
, use
);
531 fprintf (file
, "\n");
535 /* Dumps information about induction variable candidate CAND to FILE. */
537 extern void dump_cand (FILE *, struct iv_cand
*);
539 dump_cand (FILE *file
, struct iv_cand
*cand
)
541 struct iv
*iv
= cand
->iv
;
543 fprintf (file
, "candidate %d%s\n",
544 cand
->id
, cand
->important
? " (important)" : "");
546 if (cand
->depends_on
)
548 fprintf (file
, " depends on ");
549 dump_bitmap (file
, cand
->depends_on
);
554 fprintf (file
, " final value replacement\n");
558 if (cand
->var_before
)
560 fprintf (file
, " var_before ");
561 print_generic_expr (file
, cand
->var_before
, TDF_SLIM
);
562 fprintf (file
, "\n");
566 fprintf (file
, " var_after ");
567 print_generic_expr (file
, cand
->var_after
, TDF_SLIM
);
568 fprintf (file
, "\n");
574 fprintf (file
, " incremented before exit test\n");
578 fprintf (file
, " incremented before use %d\n", cand
->ainc_use
->id
);
582 fprintf (file
, " incremented after use %d\n", cand
->ainc_use
->id
);
586 fprintf (file
, " incremented at end\n");
590 fprintf (file
, " original biv\n");
597 /* Returns the info for ssa version VER. */
599 static inline struct version_info
*
600 ver_info (struct ivopts_data
*data
, unsigned ver
)
602 return data
->version_info
+ ver
;
605 /* Returns the info for ssa name NAME. */
607 static inline struct version_info
*
608 name_info (struct ivopts_data
*data
, tree name
)
610 return ver_info (data
, SSA_NAME_VERSION (name
));
613 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
617 stmt_after_ip_normal_pos (struct loop
*loop
, gimple stmt
)
619 basic_block bb
= ip_normal_pos (loop
), sbb
= gimple_bb (stmt
);
623 if (sbb
== loop
->latch
)
629 return stmt
== last_stmt (bb
);
632 /* Returns true if STMT if after the place where the original induction
633 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
634 if the positions are identical. */
637 stmt_after_inc_pos (struct iv_cand
*cand
, gimple stmt
, bool true_if_equal
)
639 basic_block cand_bb
= gimple_bb (cand
->incremented_at
);
640 basic_block stmt_bb
= gimple_bb (stmt
);
642 if (!dominated_by_p (CDI_DOMINATORS
, stmt_bb
, cand_bb
))
645 if (stmt_bb
!= cand_bb
)
649 && gimple_uid (stmt
) == gimple_uid (cand
->incremented_at
))
651 return gimple_uid (stmt
) > gimple_uid (cand
->incremented_at
);
654 /* Returns true if STMT if after the place where the induction variable
655 CAND is incremented in LOOP. */
658 stmt_after_increment (struct loop
*loop
, struct iv_cand
*cand
, gimple stmt
)
666 return stmt_after_ip_normal_pos (loop
, stmt
);
670 return stmt_after_inc_pos (cand
, stmt
, false);
673 return stmt_after_inc_pos (cand
, stmt
, true);
680 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
683 abnormal_ssa_name_p (tree exp
)
688 if (TREE_CODE (exp
) != SSA_NAME
)
691 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp
) != 0;
694 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
695 abnormal phi node. Callback for for_each_index. */
698 idx_contains_abnormal_ssa_name_p (tree base
, tree
*index
,
699 void *data ATTRIBUTE_UNUSED
)
701 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
703 if (abnormal_ssa_name_p (TREE_OPERAND (base
, 2)))
705 if (abnormal_ssa_name_p (TREE_OPERAND (base
, 3)))
709 return !abnormal_ssa_name_p (*index
);
712 /* Returns true if EXPR contains a ssa name that occurs in an
713 abnormal phi node. */
716 contains_abnormal_ssa_name_p (tree expr
)
719 enum tree_code_class codeclass
;
724 code
= TREE_CODE (expr
);
725 codeclass
= TREE_CODE_CLASS (code
);
727 if (code
== SSA_NAME
)
728 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr
) != 0;
730 if (code
== INTEGER_CST
731 || is_gimple_min_invariant (expr
))
734 if (code
== ADDR_EXPR
)
735 return !for_each_index (&TREE_OPERAND (expr
, 0),
736 idx_contains_abnormal_ssa_name_p
,
739 if (code
== COND_EXPR
)
740 return contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 0))
741 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 1))
742 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 2));
748 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 1)))
753 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 0)))
765 /* Returns tree describing number of iterations determined from
766 EXIT of DATA->current_loop, or NULL if something goes wrong. */
769 niter_for_exit (struct ivopts_data
*data
, edge exit
,
770 struct tree_niter_desc
**desc_p
)
772 struct tree_niter_desc
* desc
= NULL
;
778 data
->niters
= pointer_map_create ();
782 slot
= pointer_map_contains (data
->niters
, exit
);
786 /* Try to determine number of iterations. We must know it
787 unconditionally (i.e., without possibility of # of iterations
788 being zero). Also, we cannot safely work with ssa names that
789 appear in phi nodes on abnormal edges, so that we do not create
790 overlapping life ranges for them (PR 27283). */
791 desc
= XNEW (struct tree_niter_desc
);
792 if (number_of_iterations_exit (data
->current_loop
,
794 && integer_zerop (desc
->may_be_zero
)
795 && !contains_abnormal_ssa_name_p (desc
->niter
))
801 slot
= pointer_map_insert (data
->niters
, exit
);
805 niter
= ((struct tree_niter_desc
*) *slot
)->niter
;
808 *desc_p
= (struct tree_niter_desc
*) *slot
;
812 /* Returns tree describing number of iterations determined from
813 single dominating exit of DATA->current_loop, or NULL if something
817 niter_for_single_dom_exit (struct ivopts_data
*data
)
819 edge exit
= single_dom_exit (data
->current_loop
);
824 return niter_for_exit (data
, exit
, NULL
);
827 /* Hash table equality function for expressions. */
830 htab_inv_expr_eq (const void *ent1
, const void *ent2
)
832 const struct iv_inv_expr_ent
*expr1
=
833 (const struct iv_inv_expr_ent
*)ent1
;
834 const struct iv_inv_expr_ent
*expr2
=
835 (const struct iv_inv_expr_ent
*)ent2
;
837 return operand_equal_p (expr1
->expr
, expr2
->expr
, 0);
840 /* Hash function for loop invariant expressions. */
843 htab_inv_expr_hash (const void *ent
)
845 const struct iv_inv_expr_ent
*expr
=
846 (const struct iv_inv_expr_ent
*)ent
;
850 /* Initializes data structures used by the iv optimization pass, stored
854 tree_ssa_iv_optimize_init (struct ivopts_data
*data
)
856 data
->version_info_size
= 2 * num_ssa_names
;
857 data
->version_info
= XCNEWVEC (struct version_info
, data
->version_info_size
);
858 data
->relevant
= BITMAP_ALLOC (NULL
);
859 data
->important_candidates
= BITMAP_ALLOC (NULL
);
860 data
->max_inv_id
= 0;
862 data
->iv_uses
= VEC_alloc (iv_use_p
, heap
, 20);
863 data
->iv_candidates
= VEC_alloc (iv_cand_p
, heap
, 20);
864 data
->inv_expr_tab
= htab_create (10, htab_inv_expr_hash
,
865 htab_inv_expr_eq
, free
);
866 data
->inv_expr_id
= 0;
867 decl_rtl_to_reset
= VEC_alloc (tree
, heap
, 20);
870 /* Returns a memory object to that EXPR points. In case we are able to
871 determine that it does not point to any such object, NULL is returned. */
874 determine_base_object (tree expr
)
876 enum tree_code code
= TREE_CODE (expr
);
879 /* If this is a pointer casted to any type, we need to determine
880 the base object for the pointer; so handle conversions before
881 throwing away non-pointer expressions. */
882 if (CONVERT_EXPR_P (expr
))
883 return determine_base_object (TREE_OPERAND (expr
, 0));
885 if (!POINTER_TYPE_P (TREE_TYPE (expr
)))
894 obj
= TREE_OPERAND (expr
, 0);
895 base
= get_base_address (obj
);
900 if (TREE_CODE (base
) == MEM_REF
)
901 return determine_base_object (TREE_OPERAND (base
, 0));
903 return fold_convert (ptr_type_node
,
904 build_fold_addr_expr (base
));
906 case POINTER_PLUS_EXPR
:
907 return determine_base_object (TREE_OPERAND (expr
, 0));
911 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
915 return fold_convert (ptr_type_node
, expr
);
919 /* Allocates an induction variable with given initial value BASE and step STEP
923 alloc_iv (tree base
, tree step
)
925 struct iv
*iv
= XCNEW (struct iv
);
926 gcc_assert (step
!= NULL_TREE
);
929 iv
->base_object
= determine_base_object (base
);
932 iv
->have_use_for
= false;
934 iv
->ssa_name
= NULL_TREE
;
939 /* Sets STEP and BASE for induction variable IV. */
942 set_iv (struct ivopts_data
*data
, tree iv
, tree base
, tree step
)
944 struct version_info
*info
= name_info (data
, iv
);
946 gcc_assert (!info
->iv
);
948 bitmap_set_bit (data
->relevant
, SSA_NAME_VERSION (iv
));
949 info
->iv
= alloc_iv (base
, step
);
950 info
->iv
->ssa_name
= iv
;
953 /* Finds induction variable declaration for VAR. */
956 get_iv (struct ivopts_data
*data
, tree var
)
959 tree type
= TREE_TYPE (var
);
961 if (!POINTER_TYPE_P (type
)
962 && !INTEGRAL_TYPE_P (type
))
965 if (!name_info (data
, var
)->iv
)
967 bb
= gimple_bb (SSA_NAME_DEF_STMT (var
));
970 || !flow_bb_inside_loop_p (data
->current_loop
, bb
))
971 set_iv (data
, var
, var
, build_int_cst (type
, 0));
974 return name_info (data
, var
)->iv
;
977 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
978 not define a simple affine biv with nonzero step. */
981 determine_biv_step (gimple phi
)
983 struct loop
*loop
= gimple_bb (phi
)->loop_father
;
984 tree name
= PHI_RESULT (phi
);
987 if (!is_gimple_reg (name
))
990 if (!simple_iv (loop
, loop
, name
, &iv
, true))
993 return integer_zerop (iv
.step
) ? NULL_TREE
: iv
.step
;
996 /* Finds basic ivs. */
999 find_bivs (struct ivopts_data
*data
)
1002 tree step
, type
, base
;
1004 struct loop
*loop
= data
->current_loop
;
1005 gimple_stmt_iterator psi
;
1007 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
1009 phi
= gsi_stmt (psi
);
1011 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi
)))
1014 step
= determine_biv_step (phi
);
1018 base
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_preheader_edge (loop
));
1019 base
= expand_simple_operations (base
);
1020 if (contains_abnormal_ssa_name_p (base
)
1021 || contains_abnormal_ssa_name_p (step
))
1024 type
= TREE_TYPE (PHI_RESULT (phi
));
1025 base
= fold_convert (type
, base
);
1028 if (POINTER_TYPE_P (type
))
1029 step
= fold_convert (sizetype
, step
);
1031 step
= fold_convert (type
, step
);
1034 set_iv (data
, PHI_RESULT (phi
), base
, step
);
1041 /* Marks basic ivs. */
1044 mark_bivs (struct ivopts_data
*data
)
1048 struct iv
*iv
, *incr_iv
;
1049 struct loop
*loop
= data
->current_loop
;
1050 basic_block incr_bb
;
1051 gimple_stmt_iterator psi
;
1053 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
1055 phi
= gsi_stmt (psi
);
1057 iv
= get_iv (data
, PHI_RESULT (phi
));
1061 var
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_latch_edge (loop
));
1062 incr_iv
= get_iv (data
, var
);
1066 /* If the increment is in the subloop, ignore it. */
1067 incr_bb
= gimple_bb (SSA_NAME_DEF_STMT (var
));
1068 if (incr_bb
->loop_father
!= data
->current_loop
1069 || (incr_bb
->flags
& BB_IRREDUCIBLE_LOOP
))
1073 incr_iv
->biv_p
= true;
1077 /* Checks whether STMT defines a linear induction variable and stores its
1078 parameters to IV. */
1081 find_givs_in_stmt_scev (struct ivopts_data
*data
, gimple stmt
, affine_iv
*iv
)
1084 struct loop
*loop
= data
->current_loop
;
1086 iv
->base
= NULL_TREE
;
1087 iv
->step
= NULL_TREE
;
1089 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
1092 lhs
= gimple_assign_lhs (stmt
);
1093 if (TREE_CODE (lhs
) != SSA_NAME
)
1096 if (!simple_iv (loop
, loop_containing_stmt (stmt
), lhs
, iv
, true))
1098 iv
->base
= expand_simple_operations (iv
->base
);
1100 if (contains_abnormal_ssa_name_p (iv
->base
)
1101 || contains_abnormal_ssa_name_p (iv
->step
))
1107 /* Finds general ivs in statement STMT. */
1110 find_givs_in_stmt (struct ivopts_data
*data
, gimple stmt
)
1114 if (!find_givs_in_stmt_scev (data
, stmt
, &iv
))
1117 set_iv (data
, gimple_assign_lhs (stmt
), iv
.base
, iv
.step
);
1120 /* Finds general ivs in basic block BB. */
1123 find_givs_in_bb (struct ivopts_data
*data
, basic_block bb
)
1125 gimple_stmt_iterator bsi
;
1127 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
1128 find_givs_in_stmt (data
, gsi_stmt (bsi
));
1131 /* Finds general ivs. */
1134 find_givs (struct ivopts_data
*data
)
1136 struct loop
*loop
= data
->current_loop
;
1137 basic_block
*body
= get_loop_body_in_dom_order (loop
);
1140 for (i
= 0; i
< loop
->num_nodes
; i
++)
1141 find_givs_in_bb (data
, body
[i
]);
1145 /* For each ssa name defined in LOOP determines whether it is an induction
1146 variable and if so, its initial value and step. */
1149 find_induction_variables (struct ivopts_data
*data
)
1154 if (!find_bivs (data
))
1160 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1162 tree niter
= niter_for_single_dom_exit (data
);
1166 fprintf (dump_file
, " number of iterations ");
1167 print_generic_expr (dump_file
, niter
, TDF_SLIM
);
1168 fprintf (dump_file
, "\n\n");
1171 fprintf (dump_file
, "Induction variables:\n\n");
1173 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
1175 if (ver_info (data
, i
)->iv
)
1176 dump_iv (dump_file
, ver_info (data
, i
)->iv
);
1183 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1185 static struct iv_use
*
1186 record_use (struct ivopts_data
*data
, tree
*use_p
, struct iv
*iv
,
1187 gimple stmt
, enum use_type use_type
)
1189 struct iv_use
*use
= XCNEW (struct iv_use
);
1191 use
->id
= n_iv_uses (data
);
1192 use
->type
= use_type
;
1196 use
->related_cands
= BITMAP_ALLOC (NULL
);
1198 /* To avoid showing ssa name in the dumps, if it was not reset by the
1200 iv
->ssa_name
= NULL_TREE
;
1202 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1203 dump_use (dump_file
, use
);
1205 VEC_safe_push (iv_use_p
, heap
, data
->iv_uses
, use
);
1210 /* Checks whether OP is a loop-level invariant and if so, records it.
1211 NONLINEAR_USE is true if the invariant is used in a way we do not
1212 handle specially. */
1215 record_invariant (struct ivopts_data
*data
, tree op
, bool nonlinear_use
)
1218 struct version_info
*info
;
1220 if (TREE_CODE (op
) != SSA_NAME
1221 || !is_gimple_reg (op
))
1224 bb
= gimple_bb (SSA_NAME_DEF_STMT (op
));
1226 && flow_bb_inside_loop_p (data
->current_loop
, bb
))
1229 info
= name_info (data
, op
);
1231 info
->has_nonlin_use
|= nonlinear_use
;
1233 info
->inv_id
= ++data
->max_inv_id
;
1234 bitmap_set_bit (data
->relevant
, SSA_NAME_VERSION (op
));
1237 /* Checks whether the use OP is interesting and if so, records it. */
1239 static struct iv_use
*
1240 find_interesting_uses_op (struct ivopts_data
*data
, tree op
)
1247 if (TREE_CODE (op
) != SSA_NAME
)
1250 iv
= get_iv (data
, op
);
1254 if (iv
->have_use_for
)
1256 use
= iv_use (data
, iv
->use_id
);
1258 gcc_assert (use
->type
== USE_NONLINEAR_EXPR
);
1262 if (integer_zerop (iv
->step
))
1264 record_invariant (data
, op
, true);
1267 iv
->have_use_for
= true;
1269 civ
= XNEW (struct iv
);
1272 stmt
= SSA_NAME_DEF_STMT (op
);
1273 gcc_assert (gimple_code (stmt
) == GIMPLE_PHI
1274 || is_gimple_assign (stmt
));
1276 use
= record_use (data
, NULL
, civ
, stmt
, USE_NONLINEAR_EXPR
);
1277 iv
->use_id
= use
->id
;
1282 /* Given a condition in statement STMT, checks whether it is a compare
1283 of an induction variable and an invariant. If this is the case,
1284 CONTROL_VAR is set to location of the iv, BOUND to the location of
1285 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1286 induction variable descriptions, and true is returned. If this is not
1287 the case, CONTROL_VAR and BOUND are set to the arguments of the
1288 condition and false is returned. */
1291 extract_cond_operands (struct ivopts_data
*data
, gimple stmt
,
1292 tree
**control_var
, tree
**bound
,
1293 struct iv
**iv_var
, struct iv
**iv_bound
)
1295 /* The objects returned when COND has constant operands. */
1296 static struct iv const_iv
;
1298 tree
*op0
= &zero
, *op1
= &zero
, *tmp_op
;
1299 struct iv
*iv0
= &const_iv
, *iv1
= &const_iv
, *tmp_iv
;
1302 if (gimple_code (stmt
) == GIMPLE_COND
)
1304 op0
= gimple_cond_lhs_ptr (stmt
);
1305 op1
= gimple_cond_rhs_ptr (stmt
);
1309 op0
= gimple_assign_rhs1_ptr (stmt
);
1310 op1
= gimple_assign_rhs2_ptr (stmt
);
1313 zero
= integer_zero_node
;
1314 const_iv
.step
= integer_zero_node
;
1316 if (TREE_CODE (*op0
) == SSA_NAME
)
1317 iv0
= get_iv (data
, *op0
);
1318 if (TREE_CODE (*op1
) == SSA_NAME
)
1319 iv1
= get_iv (data
, *op1
);
1321 /* Exactly one of the compared values must be an iv, and the other one must
1326 if (integer_zerop (iv0
->step
))
1328 /* Control variable may be on the other side. */
1329 tmp_op
= op0
; op0
= op1
; op1
= tmp_op
;
1330 tmp_iv
= iv0
; iv0
= iv1
; iv1
= tmp_iv
;
1332 ret
= !integer_zerop (iv0
->step
) && integer_zerop (iv1
->step
);
1336 *control_var
= op0
;;
1347 /* Checks whether the condition in STMT is interesting and if so,
1351 find_interesting_uses_cond (struct ivopts_data
*data
, gimple stmt
)
1353 tree
*var_p
, *bound_p
;
1354 struct iv
*var_iv
, *civ
;
1356 if (!extract_cond_operands (data
, stmt
, &var_p
, &bound_p
, &var_iv
, NULL
))
1358 find_interesting_uses_op (data
, *var_p
);
1359 find_interesting_uses_op (data
, *bound_p
);
1363 civ
= XNEW (struct iv
);
1365 record_use (data
, NULL
, civ
, stmt
, USE_COMPARE
);
1368 /* Returns true if expression EXPR is obviously invariant in LOOP,
1369 i.e. if all its operands are defined outside of the LOOP. LOOP
1370 should not be the function body. */
1373 expr_invariant_in_loop_p (struct loop
*loop
, tree expr
)
1378 gcc_assert (loop_depth (loop
) > 0);
1380 if (is_gimple_min_invariant (expr
))
1383 if (TREE_CODE (expr
) == SSA_NAME
)
1385 def_bb
= gimple_bb (SSA_NAME_DEF_STMT (expr
));
1387 && flow_bb_inside_loop_p (loop
, def_bb
))
1396 len
= TREE_OPERAND_LENGTH (expr
);
1397 for (i
= 0; i
< len
; i
++)
1398 if (!expr_invariant_in_loop_p (loop
, TREE_OPERAND (expr
, i
)))
1404 /* Returns true if statement STMT is obviously invariant in LOOP,
1405 i.e. if all its operands on the RHS are defined outside of the LOOP.
1406 LOOP should not be the function body. */
1409 stmt_invariant_in_loop_p (struct loop
*loop
, gimple stmt
)
1414 gcc_assert (loop_depth (loop
) > 0);
1416 lhs
= gimple_get_lhs (stmt
);
1417 for (i
= 0; i
< gimple_num_ops (stmt
); i
++)
1419 tree op
= gimple_op (stmt
, i
);
1420 if (op
!= lhs
&& !expr_invariant_in_loop_p (loop
, op
))
1427 /* Cumulates the steps of indices into DATA and replaces their values with the
1428 initial ones. Returns false when the value of the index cannot be determined.
1429 Callback for for_each_index. */
1431 struct ifs_ivopts_data
1433 struct ivopts_data
*ivopts_data
;
1439 idx_find_step (tree base
, tree
*idx
, void *data
)
1441 struct ifs_ivopts_data
*dta
= (struct ifs_ivopts_data
*) data
;
1443 tree step
, iv_base
, iv_step
, lbound
, off
;
1444 struct loop
*loop
= dta
->ivopts_data
->current_loop
;
1446 if (TREE_CODE (base
) == MISALIGNED_INDIRECT_REF
)
1449 /* If base is a component ref, require that the offset of the reference
1451 if (TREE_CODE (base
) == COMPONENT_REF
)
1453 off
= component_ref_field_offset (base
);
1454 return expr_invariant_in_loop_p (loop
, off
);
1457 /* If base is array, first check whether we will be able to move the
1458 reference out of the loop (in order to take its address in strength
1459 reduction). In order for this to work we need both lower bound
1460 and step to be loop invariants. */
1461 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
1463 /* Moreover, for a range, the size needs to be invariant as well. */
1464 if (TREE_CODE (base
) == ARRAY_RANGE_REF
1465 && !expr_invariant_in_loop_p (loop
, TYPE_SIZE (TREE_TYPE (base
))))
1468 step
= array_ref_element_size (base
);
1469 lbound
= array_ref_low_bound (base
);
1471 if (!expr_invariant_in_loop_p (loop
, step
)
1472 || !expr_invariant_in_loop_p (loop
, lbound
))
1476 if (TREE_CODE (*idx
) != SSA_NAME
)
1479 iv
= get_iv (dta
->ivopts_data
, *idx
);
1483 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1484 *&x[0], which is not folded and does not trigger the
1485 ARRAY_REF path below. */
1488 if (integer_zerop (iv
->step
))
1491 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
1493 step
= array_ref_element_size (base
);
1495 /* We only handle addresses whose step is an integer constant. */
1496 if (TREE_CODE (step
) != INTEGER_CST
)
1500 /* The step for pointer arithmetics already is 1 byte. */
1501 step
= size_one_node
;
1505 if (!convert_affine_scev (dta
->ivopts_data
->current_loop
,
1506 sizetype
, &iv_base
, &iv_step
, dta
->stmt
,
1509 /* The index might wrap. */
1513 step
= fold_build2 (MULT_EXPR
, sizetype
, step
, iv_step
);
1514 dta
->step
= fold_build2 (PLUS_EXPR
, sizetype
, dta
->step
, step
);
1519 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1520 object is passed to it in DATA. */
1523 idx_record_use (tree base
, tree
*idx
,
1526 struct ivopts_data
*data
= (struct ivopts_data
*) vdata
;
1527 find_interesting_uses_op (data
, *idx
);
1528 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
1530 find_interesting_uses_op (data
, array_ref_element_size (base
));
1531 find_interesting_uses_op (data
, array_ref_low_bound (base
));
1536 /* If we can prove that TOP = cst * BOT for some constant cst,
1537 store cst to MUL and return true. Otherwise return false.
1538 The returned value is always sign-extended, regardless of the
1539 signedness of TOP and BOT. */
1542 constant_multiple_of (tree top
, tree bot
, double_int
*mul
)
1545 enum tree_code code
;
1546 double_int res
, p0
, p1
;
1547 unsigned precision
= TYPE_PRECISION (TREE_TYPE (top
));
1552 if (operand_equal_p (top
, bot
, 0))
1554 *mul
= double_int_one
;
1558 code
= TREE_CODE (top
);
1562 mby
= TREE_OPERAND (top
, 1);
1563 if (TREE_CODE (mby
) != INTEGER_CST
)
1566 if (!constant_multiple_of (TREE_OPERAND (top
, 0), bot
, &res
))
1569 *mul
= double_int_sext (double_int_mul (res
, tree_to_double_int (mby
)),
1575 if (!constant_multiple_of (TREE_OPERAND (top
, 0), bot
, &p0
)
1576 || !constant_multiple_of (TREE_OPERAND (top
, 1), bot
, &p1
))
1579 if (code
== MINUS_EXPR
)
1580 p1
= double_int_neg (p1
);
1581 *mul
= double_int_sext (double_int_add (p0
, p1
), precision
);
1585 if (TREE_CODE (bot
) != INTEGER_CST
)
1588 p0
= double_int_sext (tree_to_double_int (top
), precision
);
1589 p1
= double_int_sext (tree_to_double_int (bot
), precision
);
1590 if (double_int_zero_p (p1
))
1592 *mul
= double_int_sext (double_int_sdivmod (p0
, p1
, FLOOR_DIV_EXPR
, &res
),
1594 return double_int_zero_p (res
);
1601 /* Returns true if memory reference REF with step STEP may be unaligned. */
1604 may_be_unaligned_p (tree ref
, tree step
)
1608 HOST_WIDE_INT bitsize
;
1609 HOST_WIDE_INT bitpos
;
1611 enum machine_mode mode
;
1612 int unsignedp
, volatilep
;
1613 unsigned base_align
;
1615 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1616 thus they are not misaligned. */
1617 if (TREE_CODE (ref
) == TARGET_MEM_REF
)
1620 /* The test below is basically copy of what expr.c:normal_inner_ref
1621 does to check whether the object must be loaded by parts when
1622 STRICT_ALIGNMENT is true. */
1623 base
= get_inner_reference (ref
, &bitsize
, &bitpos
, &toffset
, &mode
,
1624 &unsignedp
, &volatilep
, true);
1625 base_type
= TREE_TYPE (base
);
1626 base_align
= TYPE_ALIGN (base_type
);
1628 if (mode
!= BLKmode
)
1630 unsigned mode_align
= GET_MODE_ALIGNMENT (mode
);
1632 if (base_align
< mode_align
1633 || (bitpos
% mode_align
) != 0
1634 || (bitpos
% BITS_PER_UNIT
) != 0)
1638 && (highest_pow2_factor (toffset
) * BITS_PER_UNIT
) < mode_align
)
1641 if ((highest_pow2_factor (step
) * BITS_PER_UNIT
) < mode_align
)
1648 /* Return true if EXPR may be non-addressable. */
1651 may_be_nonaddressable_p (tree expr
)
1653 switch (TREE_CODE (expr
))
1655 case TARGET_MEM_REF
:
1656 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1657 target, thus they are always addressable. */
1661 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr
, 1))
1662 || may_be_nonaddressable_p (TREE_OPERAND (expr
, 0));
1664 case VIEW_CONVERT_EXPR
:
1665 /* This kind of view-conversions may wrap non-addressable objects
1666 and make them look addressable. After some processing the
1667 non-addressability may be uncovered again, causing ADDR_EXPRs
1668 of inappropriate objects to be built. */
1669 if (is_gimple_reg (TREE_OPERAND (expr
, 0))
1670 || !is_gimple_addressable (TREE_OPERAND (expr
, 0)))
1673 /* ... fall through ... */
1676 case ARRAY_RANGE_REF
:
1677 return may_be_nonaddressable_p (TREE_OPERAND (expr
, 0));
1689 /* Finds addresses in *OP_P inside STMT. */
1692 find_interesting_uses_address (struct ivopts_data
*data
, gimple stmt
, tree
*op_p
)
1694 tree base
= *op_p
, step
= size_zero_node
;
1696 struct ifs_ivopts_data ifs_ivopts_data
;
1698 /* Do not play with volatile memory references. A bit too conservative,
1699 perhaps, but safe. */
1700 if (gimple_has_volatile_ops (stmt
))
1703 /* Ignore bitfields for now. Not really something terribly complicated
1705 if (TREE_CODE (base
) == BIT_FIELD_REF
)
1708 base
= unshare_expr (base
);
1710 if (TREE_CODE (base
) == TARGET_MEM_REF
)
1712 tree type
= build_pointer_type (TREE_TYPE (base
));
1716 && TREE_CODE (TMR_BASE (base
)) == SSA_NAME
)
1718 civ
= get_iv (data
, TMR_BASE (base
));
1722 TMR_BASE (base
) = civ
->base
;
1725 if (TMR_INDEX2 (base
)
1726 && TREE_CODE (TMR_INDEX2 (base
)) == SSA_NAME
)
1728 civ
= get_iv (data
, TMR_INDEX2 (base
));
1732 TMR_INDEX2 (base
) = civ
->base
;
1735 if (TMR_INDEX (base
)
1736 && TREE_CODE (TMR_INDEX (base
)) == SSA_NAME
)
1738 civ
= get_iv (data
, TMR_INDEX (base
));
1742 TMR_INDEX (base
) = civ
->base
;
1747 if (TMR_STEP (base
))
1748 astep
= fold_build2 (MULT_EXPR
, type
, TMR_STEP (base
), astep
);
1750 step
= fold_build2 (PLUS_EXPR
, type
, step
, astep
);
1754 if (integer_zerop (step
))
1756 base
= tree_mem_ref_addr (type
, base
);
1760 ifs_ivopts_data
.ivopts_data
= data
;
1761 ifs_ivopts_data
.stmt
= stmt
;
1762 ifs_ivopts_data
.step
= size_zero_node
;
1763 if (!for_each_index (&base
, idx_find_step
, &ifs_ivopts_data
)
1764 || integer_zerop (ifs_ivopts_data
.step
))
1766 step
= ifs_ivopts_data
.step
;
1768 gcc_assert (TREE_CODE (base
) != MISALIGNED_INDIRECT_REF
);
1770 /* Check that the base expression is addressable. This needs
1771 to be done after substituting bases of IVs into it. */
1772 if (may_be_nonaddressable_p (base
))
1775 /* Moreover, on strict alignment platforms, check that it is
1776 sufficiently aligned. */
1777 if (STRICT_ALIGNMENT
&& may_be_unaligned_p (base
, step
))
1780 base
= build_fold_addr_expr (base
);
1782 /* Substituting bases of IVs into the base expression might
1783 have caused folding opportunities. */
1784 if (TREE_CODE (base
) == ADDR_EXPR
)
1786 tree
*ref
= &TREE_OPERAND (base
, 0);
1787 while (handled_component_p (*ref
))
1788 ref
= &TREE_OPERAND (*ref
, 0);
1789 if (TREE_CODE (*ref
) == MEM_REF
)
1791 tree tem
= TREE_OPERAND (*ref
, 0);
1793 if (tem
!= TREE_OPERAND (*ref
, 0))
1794 tem
= fold_build2 (MEM_REF
, TREE_TYPE (*ref
),
1795 tem
, TREE_OPERAND (*ref
, 1));
1797 tem
= fold_binary (MEM_REF
, TREE_TYPE (*ref
),
1798 tem
, TREE_OPERAND (*ref
, 1));
1805 civ
= alloc_iv (base
, step
);
1806 record_use (data
, op_p
, civ
, stmt
, USE_ADDRESS
);
1810 for_each_index (op_p
, idx_record_use
, data
);
1813 /* Finds and records invariants used in STMT. */
1816 find_invariants_stmt (struct ivopts_data
*data
, gimple stmt
)
1819 use_operand_p use_p
;
1822 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
1824 op
= USE_FROM_PTR (use_p
);
1825 record_invariant (data
, op
, false);
1829 /* Finds interesting uses of induction variables in the statement STMT. */
1832 find_interesting_uses_stmt (struct ivopts_data
*data
, gimple stmt
)
1835 tree op
, *lhs
, *rhs
;
1837 use_operand_p use_p
;
1838 enum tree_code code
;
1840 find_invariants_stmt (data
, stmt
);
1842 if (gimple_code (stmt
) == GIMPLE_COND
)
1844 find_interesting_uses_cond (data
, stmt
);
1848 if (is_gimple_assign (stmt
))
1850 lhs
= gimple_assign_lhs_ptr (stmt
);
1851 rhs
= gimple_assign_rhs1_ptr (stmt
);
1853 if (TREE_CODE (*lhs
) == SSA_NAME
)
1855 /* If the statement defines an induction variable, the uses are not
1856 interesting by themselves. */
1858 iv
= get_iv (data
, *lhs
);
1860 if (iv
&& !integer_zerop (iv
->step
))
1864 code
= gimple_assign_rhs_code (stmt
);
1865 if (get_gimple_rhs_class (code
) == GIMPLE_SINGLE_RHS
1866 && (REFERENCE_CLASS_P (*rhs
)
1867 || is_gimple_val (*rhs
)))
1869 if (REFERENCE_CLASS_P (*rhs
))
1870 find_interesting_uses_address (data
, stmt
, rhs
);
1872 find_interesting_uses_op (data
, *rhs
);
1874 if (REFERENCE_CLASS_P (*lhs
))
1875 find_interesting_uses_address (data
, stmt
, lhs
);
1878 else if (TREE_CODE_CLASS (code
) == tcc_comparison
)
1880 find_interesting_uses_cond (data
, stmt
);
1884 /* TODO -- we should also handle address uses of type
1886 memory = call (whatever);
1893 if (gimple_code (stmt
) == GIMPLE_PHI
1894 && gimple_bb (stmt
) == data
->current_loop
->header
)
1896 iv
= get_iv (data
, PHI_RESULT (stmt
));
1898 if (iv
&& !integer_zerop (iv
->step
))
1902 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
1904 op
= USE_FROM_PTR (use_p
);
1906 if (TREE_CODE (op
) != SSA_NAME
)
1909 iv
= get_iv (data
, op
);
1913 find_interesting_uses_op (data
, op
);
1917 /* Finds interesting uses of induction variables outside of loops
1918 on loop exit edge EXIT. */
1921 find_interesting_uses_outside (struct ivopts_data
*data
, edge exit
)
1924 gimple_stmt_iterator psi
;
1927 for (psi
= gsi_start_phis (exit
->dest
); !gsi_end_p (psi
); gsi_next (&psi
))
1929 phi
= gsi_stmt (psi
);
1930 def
= PHI_ARG_DEF_FROM_EDGE (phi
, exit
);
1931 if (is_gimple_reg (def
))
1932 find_interesting_uses_op (data
, def
);
1936 /* Finds uses of the induction variables that are interesting. */
1939 find_interesting_uses (struct ivopts_data
*data
)
1942 gimple_stmt_iterator bsi
;
1943 basic_block
*body
= get_loop_body (data
->current_loop
);
1945 struct version_info
*info
;
1948 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1949 fprintf (dump_file
, "Uses:\n\n");
1951 for (i
= 0; i
< data
->current_loop
->num_nodes
; i
++)
1956 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
1957 if (e
->dest
!= EXIT_BLOCK_PTR
1958 && !flow_bb_inside_loop_p (data
->current_loop
, e
->dest
))
1959 find_interesting_uses_outside (data
, e
);
1961 for (bsi
= gsi_start_phis (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
1962 find_interesting_uses_stmt (data
, gsi_stmt (bsi
));
1963 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
1964 if (!is_gimple_debug (gsi_stmt (bsi
)))
1965 find_interesting_uses_stmt (data
, gsi_stmt (bsi
));
1968 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1972 fprintf (dump_file
, "\n");
1974 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
1976 info
= ver_info (data
, i
);
1979 fprintf (dump_file
, " ");
1980 print_generic_expr (dump_file
, info
->name
, TDF_SLIM
);
1981 fprintf (dump_file
, " is invariant (%d)%s\n",
1982 info
->inv_id
, info
->has_nonlin_use
? "" : ", eliminable");
1986 fprintf (dump_file
, "\n");
1992 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
1993 is true, assume we are inside an address. If TOP_COMPREF is true, assume
1994 we are at the top-level of the processed address. */
1997 strip_offset_1 (tree expr
, bool inside_addr
, bool top_compref
,
1998 unsigned HOST_WIDE_INT
*offset
)
2000 tree op0
= NULL_TREE
, op1
= NULL_TREE
, tmp
, step
;
2001 enum tree_code code
;
2002 tree type
, orig_type
= TREE_TYPE (expr
);
2003 unsigned HOST_WIDE_INT off0
, off1
, st
;
2004 tree orig_expr
= expr
;
2008 type
= TREE_TYPE (expr
);
2009 code
= TREE_CODE (expr
);
2015 if (!cst_and_fits_in_hwi (expr
)
2016 || integer_zerop (expr
))
2019 *offset
= int_cst_value (expr
);
2020 return build_int_cst (orig_type
, 0);
2022 case POINTER_PLUS_EXPR
:
2025 op0
= TREE_OPERAND (expr
, 0);
2026 op1
= TREE_OPERAND (expr
, 1);
2028 op0
= strip_offset_1 (op0
, false, false, &off0
);
2029 op1
= strip_offset_1 (op1
, false, false, &off1
);
2031 *offset
= (code
== MINUS_EXPR
? off0
- off1
: off0
+ off1
);
2032 if (op0
== TREE_OPERAND (expr
, 0)
2033 && op1
== TREE_OPERAND (expr
, 1))
2036 if (integer_zerop (op1
))
2038 else if (integer_zerop (op0
))
2040 if (code
== MINUS_EXPR
)
2041 expr
= fold_build1 (NEGATE_EXPR
, type
, op1
);
2046 expr
= fold_build2 (code
, type
, op0
, op1
);
2048 return fold_convert (orig_type
, expr
);
2051 op1
= TREE_OPERAND (expr
, 1);
2052 if (!cst_and_fits_in_hwi (op1
))
2055 op0
= TREE_OPERAND (expr
, 0);
2056 op0
= strip_offset_1 (op0
, false, false, &off0
);
2057 if (op0
== TREE_OPERAND (expr
, 0))
2060 *offset
= off0
* int_cst_value (op1
);
2061 if (integer_zerop (op0
))
2064 expr
= fold_build2 (MULT_EXPR
, type
, op0
, op1
);
2066 return fold_convert (orig_type
, expr
);
2069 case ARRAY_RANGE_REF
:
2073 step
= array_ref_element_size (expr
);
2074 if (!cst_and_fits_in_hwi (step
))
2077 st
= int_cst_value (step
);
2078 op1
= TREE_OPERAND (expr
, 1);
2079 op1
= strip_offset_1 (op1
, false, false, &off1
);
2080 *offset
= off1
* st
;
2083 && integer_zerop (op1
))
2085 /* Strip the component reference completely. */
2086 op0
= TREE_OPERAND (expr
, 0);
2087 op0
= strip_offset_1 (op0
, inside_addr
, top_compref
, &off0
);
2097 tmp
= component_ref_field_offset (expr
);
2099 && cst_and_fits_in_hwi (tmp
))
2101 /* Strip the component reference completely. */
2102 op0
= TREE_OPERAND (expr
, 0);
2103 op0
= strip_offset_1 (op0
, inside_addr
, top_compref
, &off0
);
2104 *offset
= off0
+ int_cst_value (tmp
);
2110 op0
= TREE_OPERAND (expr
, 0);
2111 op0
= strip_offset_1 (op0
, true, true, &off0
);
2114 if (op0
== TREE_OPERAND (expr
, 0))
2117 expr
= build_fold_addr_expr (op0
);
2118 return fold_convert (orig_type
, expr
);
2121 /* ??? Offset operand? */
2122 inside_addr
= false;
2129 /* Default handling of expressions for that we want to recurse into
2130 the first operand. */
2131 op0
= TREE_OPERAND (expr
, 0);
2132 op0
= strip_offset_1 (op0
, inside_addr
, false, &off0
);
2135 if (op0
== TREE_OPERAND (expr
, 0)
2136 && (!op1
|| op1
== TREE_OPERAND (expr
, 1)))
2139 expr
= copy_node (expr
);
2140 TREE_OPERAND (expr
, 0) = op0
;
2142 TREE_OPERAND (expr
, 1) = op1
;
2144 /* Inside address, we might strip the top level component references,
2145 thus changing type of the expression. Handling of ADDR_EXPR
2147 expr
= fold_convert (orig_type
, expr
);
2152 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2155 strip_offset (tree expr
, unsigned HOST_WIDE_INT
*offset
)
2157 return strip_offset_1 (expr
, false, false, offset
);
2160 /* Returns variant of TYPE that can be used as base for different uses.
2161 We return unsigned type with the same precision, which avoids problems
2165 generic_type_for (tree type
)
2167 if (POINTER_TYPE_P (type
))
2168 return unsigned_type_for (type
);
2170 if (TYPE_UNSIGNED (type
))
2173 return unsigned_type_for (type
);
2176 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2177 the bitmap to that we should store it. */
2179 static struct ivopts_data
*fd_ivopts_data
;
2181 find_depends (tree
*expr_p
, int *ws ATTRIBUTE_UNUSED
, void *data
)
2183 bitmap
*depends_on
= (bitmap
*) data
;
2184 struct version_info
*info
;
2186 if (TREE_CODE (*expr_p
) != SSA_NAME
)
2188 info
= name_info (fd_ivopts_data
, *expr_p
);
2190 if (!info
->inv_id
|| info
->has_nonlin_use
)
2194 *depends_on
= BITMAP_ALLOC (NULL
);
2195 bitmap_set_bit (*depends_on
, info
->inv_id
);
2200 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2201 position to POS. If USE is not NULL, the candidate is set as related to
2202 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2203 replacement of the final value of the iv by a direct computation. */
2205 static struct iv_cand
*
2206 add_candidate_1 (struct ivopts_data
*data
,
2207 tree base
, tree step
, bool important
, enum iv_position pos
,
2208 struct iv_use
*use
, gimple incremented_at
)
2211 struct iv_cand
*cand
= NULL
;
2212 tree type
, orig_type
;
2216 orig_type
= TREE_TYPE (base
);
2217 type
= generic_type_for (orig_type
);
2218 if (type
!= orig_type
)
2220 base
= fold_convert (type
, base
);
2221 step
= fold_convert (type
, step
);
2225 for (i
= 0; i
< n_iv_cands (data
); i
++)
2227 cand
= iv_cand (data
, i
);
2229 if (cand
->pos
!= pos
)
2232 if (cand
->incremented_at
!= incremented_at
2233 || ((pos
== IP_AFTER_USE
|| pos
== IP_BEFORE_USE
)
2234 && cand
->ainc_use
!= use
))
2248 if (operand_equal_p (base
, cand
->iv
->base
, 0)
2249 && operand_equal_p (step
, cand
->iv
->step
, 0)
2250 && (TYPE_PRECISION (TREE_TYPE (base
))
2251 == TYPE_PRECISION (TREE_TYPE (cand
->iv
->base
))))
2255 if (i
== n_iv_cands (data
))
2257 cand
= XCNEW (struct iv_cand
);
2263 cand
->iv
= alloc_iv (base
, step
);
2266 if (pos
!= IP_ORIGINAL
&& cand
->iv
)
2268 cand
->var_before
= create_tmp_var_raw (TREE_TYPE (base
), "ivtmp");
2269 cand
->var_after
= cand
->var_before
;
2271 cand
->important
= important
;
2272 cand
->incremented_at
= incremented_at
;
2273 VEC_safe_push (iv_cand_p
, heap
, data
->iv_candidates
, cand
);
2276 && TREE_CODE (step
) != INTEGER_CST
)
2278 fd_ivopts_data
= data
;
2279 walk_tree (&step
, find_depends
, &cand
->depends_on
, NULL
);
2282 if (pos
== IP_AFTER_USE
|| pos
== IP_BEFORE_USE
)
2283 cand
->ainc_use
= use
;
2285 cand
->ainc_use
= NULL
;
2287 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2288 dump_cand (dump_file
, cand
);
2291 if (important
&& !cand
->important
)
2293 cand
->important
= true;
2294 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2295 fprintf (dump_file
, "Candidate %d is important\n", cand
->id
);
2300 bitmap_set_bit (use
->related_cands
, i
);
2301 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2302 fprintf (dump_file
, "Candidate %d is related to use %d\n",
2309 /* Returns true if incrementing the induction variable at the end of the LOOP
2312 The purpose is to avoid splitting latch edge with a biv increment, thus
2313 creating a jump, possibly confusing other optimization passes and leaving
2314 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2315 is not available (so we do not have a better alternative), or if the latch
2316 edge is already nonempty. */
2319 allow_ip_end_pos_p (struct loop
*loop
)
2321 if (!ip_normal_pos (loop
))
2324 if (!empty_block_p (ip_end_pos (loop
)))
2330 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2331 Important field is set to IMPORTANT. */
2334 add_autoinc_candidates (struct ivopts_data
*data
, tree base
, tree step
,
2335 bool important
, struct iv_use
*use
)
2337 basic_block use_bb
= gimple_bb (use
->stmt
);
2338 enum machine_mode mem_mode
;
2339 unsigned HOST_WIDE_INT cstepi
;
2341 /* If we insert the increment in any position other than the standard
2342 ones, we must ensure that it is incremented once per iteration.
2343 It must not be in an inner nested loop, or one side of an if
2345 if (use_bb
->loop_father
!= data
->current_loop
2346 || !dominated_by_p (CDI_DOMINATORS
, data
->current_loop
->latch
, use_bb
)
2347 || stmt_could_throw_p (use
->stmt
)
2348 || !cst_and_fits_in_hwi (step
))
2351 cstepi
= int_cst_value (step
);
2353 mem_mode
= TYPE_MODE (TREE_TYPE (*use
->op_p
));
2354 if ((HAVE_PRE_INCREMENT
&& GET_MODE_SIZE (mem_mode
) == cstepi
)
2355 || (HAVE_PRE_DECREMENT
&& GET_MODE_SIZE (mem_mode
) == -cstepi
))
2357 enum tree_code code
= MINUS_EXPR
;
2359 tree new_step
= step
;
2361 if (POINTER_TYPE_P (TREE_TYPE (base
)))
2363 new_step
= fold_build1 (NEGATE_EXPR
, TREE_TYPE (step
), step
);
2364 code
= POINTER_PLUS_EXPR
;
2367 new_step
= fold_convert (TREE_TYPE (base
), new_step
);
2368 new_base
= fold_build2 (code
, TREE_TYPE (base
), base
, new_step
);
2369 add_candidate_1 (data
, new_base
, step
, important
, IP_BEFORE_USE
, use
,
2372 if ((HAVE_POST_INCREMENT
&& GET_MODE_SIZE (mem_mode
) == cstepi
)
2373 || (HAVE_POST_DECREMENT
&& GET_MODE_SIZE (mem_mode
) == -cstepi
))
2375 add_candidate_1 (data
, base
, step
, important
, IP_AFTER_USE
, use
,
2380 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2381 position to POS. If USE is not NULL, the candidate is set as related to
2382 it. The candidate computation is scheduled on all available positions. */
2385 add_candidate (struct ivopts_data
*data
,
2386 tree base
, tree step
, bool important
, struct iv_use
*use
)
2388 if (ip_normal_pos (data
->current_loop
))
2389 add_candidate_1 (data
, base
, step
, important
, IP_NORMAL
, use
, NULL
);
2390 if (ip_end_pos (data
->current_loop
)
2391 && allow_ip_end_pos_p (data
->current_loop
))
2392 add_candidate_1 (data
, base
, step
, important
, IP_END
, use
, NULL
);
2394 if (use
!= NULL
&& use
->type
== USE_ADDRESS
)
2395 add_autoinc_candidates (data
, base
, step
, important
, use
);
2398 /* Add a standard "0 + 1 * iteration" iv candidate for a
2399 type with SIZE bits. */
2402 add_standard_iv_candidates_for_size (struct ivopts_data
*data
,
2405 tree type
= lang_hooks
.types
.type_for_size (size
, true);
2406 add_candidate (data
, build_int_cst (type
, 0), build_int_cst (type
, 1),
2410 /* Adds standard iv candidates. */
2413 add_standard_iv_candidates (struct ivopts_data
*data
)
2415 add_standard_iv_candidates_for_size (data
, INT_TYPE_SIZE
);
2417 /* The same for a double-integer type if it is still fast enough. */
2418 if (BITS_PER_WORD
>= INT_TYPE_SIZE
* 2)
2419 add_standard_iv_candidates_for_size (data
, INT_TYPE_SIZE
* 2);
2423 /* Adds candidates bases on the old induction variable IV. */
2426 add_old_iv_candidates (struct ivopts_data
*data
, struct iv
*iv
)
2430 struct iv_cand
*cand
;
2432 add_candidate (data
, iv
->base
, iv
->step
, true, NULL
);
2434 /* The same, but with initial value zero. */
2435 if (POINTER_TYPE_P (TREE_TYPE (iv
->base
)))
2436 add_candidate (data
, size_int (0), iv
->step
, true, NULL
);
2438 add_candidate (data
, build_int_cst (TREE_TYPE (iv
->base
), 0),
2439 iv
->step
, true, NULL
);
2441 phi
= SSA_NAME_DEF_STMT (iv
->ssa_name
);
2442 if (gimple_code (phi
) == GIMPLE_PHI
)
2444 /* Additionally record the possibility of leaving the original iv
2446 def
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_latch_edge (data
->current_loop
));
2447 cand
= add_candidate_1 (data
,
2448 iv
->base
, iv
->step
, true, IP_ORIGINAL
, NULL
,
2449 SSA_NAME_DEF_STMT (def
));
2450 cand
->var_before
= iv
->ssa_name
;
2451 cand
->var_after
= def
;
2455 /* Adds candidates based on the old induction variables. */
2458 add_old_ivs_candidates (struct ivopts_data
*data
)
2464 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
2466 iv
= ver_info (data
, i
)->iv
;
2467 if (iv
&& iv
->biv_p
&& !integer_zerop (iv
->step
))
2468 add_old_iv_candidates (data
, iv
);
2472 /* Adds candidates based on the value of the induction variable IV and USE. */
2475 add_iv_value_candidates (struct ivopts_data
*data
,
2476 struct iv
*iv
, struct iv_use
*use
)
2478 unsigned HOST_WIDE_INT offset
;
2482 add_candidate (data
, iv
->base
, iv
->step
, false, use
);
2484 /* The same, but with initial value zero. Make such variable important,
2485 since it is generic enough so that possibly many uses may be based
2487 basetype
= TREE_TYPE (iv
->base
);
2488 if (POINTER_TYPE_P (basetype
))
2489 basetype
= sizetype
;
2490 add_candidate (data
, build_int_cst (basetype
, 0),
2491 iv
->step
, true, use
);
2493 /* Third, try removing the constant offset. Make sure to even
2494 add a candidate for &a[0] vs. (T *)&a. */
2495 base
= strip_offset (iv
->base
, &offset
);
2497 || base
!= iv
->base
)
2498 add_candidate (data
, base
, iv
->step
, false, use
);
2501 /* Adds candidates based on the uses. */
2504 add_derived_ivs_candidates (struct ivopts_data
*data
)
2508 for (i
= 0; i
< n_iv_uses (data
); i
++)
2510 struct iv_use
*use
= iv_use (data
, i
);
2517 case USE_NONLINEAR_EXPR
:
2520 /* Just add the ivs based on the value of the iv used here. */
2521 add_iv_value_candidates (data
, use
->iv
, use
);
2530 /* Record important candidates and add them to related_cands bitmaps
2534 record_important_candidates (struct ivopts_data
*data
)
2539 for (i
= 0; i
< n_iv_cands (data
); i
++)
2541 struct iv_cand
*cand
= iv_cand (data
, i
);
2543 if (cand
->important
)
2544 bitmap_set_bit (data
->important_candidates
, i
);
2547 data
->consider_all_candidates
= (n_iv_cands (data
)
2548 <= CONSIDER_ALL_CANDIDATES_BOUND
);
2550 if (data
->consider_all_candidates
)
2552 /* We will not need "related_cands" bitmaps in this case,
2553 so release them to decrease peak memory consumption. */
2554 for (i
= 0; i
< n_iv_uses (data
); i
++)
2556 use
= iv_use (data
, i
);
2557 BITMAP_FREE (use
->related_cands
);
2562 /* Add important candidates to the related_cands bitmaps. */
2563 for (i
= 0; i
< n_iv_uses (data
); i
++)
2564 bitmap_ior_into (iv_use (data
, i
)->related_cands
,
2565 data
->important_candidates
);
2569 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2570 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2571 we allocate a simple list to every use. */
2574 alloc_use_cost_map (struct ivopts_data
*data
)
2576 unsigned i
, size
, s
, j
;
2578 for (i
= 0; i
< n_iv_uses (data
); i
++)
2580 struct iv_use
*use
= iv_use (data
, i
);
2583 if (data
->consider_all_candidates
)
2584 size
= n_iv_cands (data
);
2588 EXECUTE_IF_SET_IN_BITMAP (use
->related_cands
, 0, j
, bi
)
2593 /* Round up to the power of two, so that moduling by it is fast. */
2594 for (size
= 1; size
< s
; size
<<= 1)
2598 use
->n_map_members
= size
;
2599 use
->cost_map
= XCNEWVEC (struct cost_pair
, size
);
2603 /* Returns description of computation cost of expression whose runtime
2604 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
2607 new_cost (unsigned runtime
, unsigned complexity
)
2611 cost
.cost
= runtime
;
2612 cost
.complexity
= complexity
;
2617 /* Adds costs COST1 and COST2. */
2620 add_costs (comp_cost cost1
, comp_cost cost2
)
2622 cost1
.cost
+= cost2
.cost
;
2623 cost1
.complexity
+= cost2
.complexity
;
2627 /* Subtracts costs COST1 and COST2. */
2630 sub_costs (comp_cost cost1
, comp_cost cost2
)
2632 cost1
.cost
-= cost2
.cost
;
2633 cost1
.complexity
-= cost2
.complexity
;
2638 /* Returns a negative number if COST1 < COST2, a positive number if
2639 COST1 > COST2, and 0 if COST1 = COST2. */
2642 compare_costs (comp_cost cost1
, comp_cost cost2
)
2644 if (cost1
.cost
== cost2
.cost
)
2645 return cost1
.complexity
- cost2
.complexity
;
2647 return cost1
.cost
- cost2
.cost
;
2650 /* Returns true if COST is infinite. */
2653 infinite_cost_p (comp_cost cost
)
2655 return cost
.cost
== INFTY
;
2658 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2659 on invariants DEPENDS_ON and that the value used in expressing it
2663 set_use_iv_cost (struct ivopts_data
*data
,
2664 struct iv_use
*use
, struct iv_cand
*cand
,
2665 comp_cost cost
, bitmap depends_on
, tree value
,
2670 if (infinite_cost_p (cost
))
2672 BITMAP_FREE (depends_on
);
2676 if (data
->consider_all_candidates
)
2678 use
->cost_map
[cand
->id
].cand
= cand
;
2679 use
->cost_map
[cand
->id
].cost
= cost
;
2680 use
->cost_map
[cand
->id
].depends_on
= depends_on
;
2681 use
->cost_map
[cand
->id
].value
= value
;
2682 use
->cost_map
[cand
->id
].inv_expr_id
= inv_expr_id
;
2686 /* n_map_members is a power of two, so this computes modulo. */
2687 s
= cand
->id
& (use
->n_map_members
- 1);
2688 for (i
= s
; i
< use
->n_map_members
; i
++)
2689 if (!use
->cost_map
[i
].cand
)
2691 for (i
= 0; i
< s
; i
++)
2692 if (!use
->cost_map
[i
].cand
)
2698 use
->cost_map
[i
].cand
= cand
;
2699 use
->cost_map
[i
].cost
= cost
;
2700 use
->cost_map
[i
].depends_on
= depends_on
;
2701 use
->cost_map
[i
].value
= value
;
2702 use
->cost_map
[i
].inv_expr_id
= inv_expr_id
;
2705 /* Gets cost of (USE, CANDIDATE) pair. */
2707 static struct cost_pair
*
2708 get_use_iv_cost (struct ivopts_data
*data
, struct iv_use
*use
,
2709 struct iv_cand
*cand
)
2712 struct cost_pair
*ret
;
2717 if (data
->consider_all_candidates
)
2719 ret
= use
->cost_map
+ cand
->id
;
2726 /* n_map_members is a power of two, so this computes modulo. */
2727 s
= cand
->id
& (use
->n_map_members
- 1);
2728 for (i
= s
; i
< use
->n_map_members
; i
++)
2729 if (use
->cost_map
[i
].cand
== cand
)
2730 return use
->cost_map
+ i
;
2732 for (i
= 0; i
< s
; i
++)
2733 if (use
->cost_map
[i
].cand
== cand
)
2734 return use
->cost_map
+ i
;
2739 /* Returns estimate on cost of computing SEQ. */
2742 seq_cost (rtx seq
, bool speed
)
2747 for (; seq
; seq
= NEXT_INSN (seq
))
2749 set
= single_set (seq
);
2751 cost
+= rtx_cost (set
, SET
,speed
);
2759 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2761 produce_memory_decl_rtl (tree obj
, int *regno
)
2763 addr_space_t as
= TYPE_ADDR_SPACE (TREE_TYPE (obj
));
2764 enum machine_mode address_mode
= targetm
.addr_space
.address_mode (as
);
2768 if (TREE_STATIC (obj
) || DECL_EXTERNAL (obj
))
2770 const char *name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj
));
2771 x
= gen_rtx_SYMBOL_REF (address_mode
, name
);
2772 SET_SYMBOL_REF_DECL (x
, obj
);
2773 x
= gen_rtx_MEM (DECL_MODE (obj
), x
);
2774 set_mem_addr_space (x
, as
);
2775 targetm
.encode_section_info (obj
, x
, true);
2779 x
= gen_raw_REG (address_mode
, (*regno
)++);
2780 x
= gen_rtx_MEM (DECL_MODE (obj
), x
);
2781 set_mem_addr_space (x
, as
);
2787 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2788 walk_tree. DATA contains the actual fake register number. */
2791 prepare_decl_rtl (tree
*expr_p
, int *ws
, void *data
)
2793 tree obj
= NULL_TREE
;
2795 int *regno
= (int *) data
;
2797 switch (TREE_CODE (*expr_p
))
2800 for (expr_p
= &TREE_OPERAND (*expr_p
, 0);
2801 handled_component_p (*expr_p
);
2802 expr_p
= &TREE_OPERAND (*expr_p
, 0))
2805 if (DECL_P (obj
) && !DECL_RTL_SET_P (obj
))
2806 x
= produce_memory_decl_rtl (obj
, regno
);
2811 obj
= SSA_NAME_VAR (*expr_p
);
2812 if (!DECL_RTL_SET_P (obj
))
2813 x
= gen_raw_REG (DECL_MODE (obj
), (*regno
)++);
2822 if (DECL_RTL_SET_P (obj
))
2825 if (DECL_MODE (obj
) == BLKmode
)
2826 x
= produce_memory_decl_rtl (obj
, regno
);
2828 x
= gen_raw_REG (DECL_MODE (obj
), (*regno
)++);
2838 VEC_safe_push (tree
, heap
, decl_rtl_to_reset
, obj
);
2839 SET_DECL_RTL (obj
, x
);
2845 /* Determines cost of the computation of EXPR. */
2848 computation_cost (tree expr
, bool speed
)
2851 tree type
= TREE_TYPE (expr
);
2853 /* Avoid using hard regs in ways which may be unsupported. */
2854 int regno
= LAST_VIRTUAL_REGISTER
+ 1;
2855 struct cgraph_node
*node
= cgraph_node (current_function_decl
);
2856 enum node_frequency real_frequency
= node
->frequency
;
2858 node
->frequency
= NODE_FREQUENCY_NORMAL
;
2859 crtl
->maybe_hot_insn_p
= speed
;
2860 walk_tree (&expr
, prepare_decl_rtl
, ®no
, NULL
);
2862 rslt
= expand_expr (expr
, NULL_RTX
, TYPE_MODE (type
), EXPAND_NORMAL
);
2865 default_rtl_profile ();
2866 node
->frequency
= real_frequency
;
2868 cost
= seq_cost (seq
, speed
);
2870 cost
+= address_cost (XEXP (rslt
, 0), TYPE_MODE (type
),
2871 TYPE_ADDR_SPACE (type
), speed
);
2876 /* Returns variable containing the value of candidate CAND at statement AT. */
2879 var_at_stmt (struct loop
*loop
, struct iv_cand
*cand
, gimple stmt
)
2881 if (stmt_after_increment (loop
, cand
, stmt
))
2882 return cand
->var_after
;
2884 return cand
->var_before
;
2887 /* Return the most significant (sign) bit of T. Similar to tree_int_cst_msb,
2888 but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE. */
2891 tree_int_cst_sign_bit (const_tree t
)
2893 unsigned bitno
= TYPE_PRECISION (TREE_TYPE (t
)) - 1;
2894 unsigned HOST_WIDE_INT w
;
2896 if (bitno
< HOST_BITS_PER_WIDE_INT
)
2897 w
= TREE_INT_CST_LOW (t
);
2900 w
= TREE_INT_CST_HIGH (t
);
2901 bitno
-= HOST_BITS_PER_WIDE_INT
;
2904 return (w
>> bitno
) & 1;
2907 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2908 same precision that is at least as wide as the precision of TYPE, stores
2909 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
2913 determine_common_wider_type (tree
*a
, tree
*b
)
2915 tree wider_type
= NULL
;
2917 tree atype
= TREE_TYPE (*a
);
2919 if (CONVERT_EXPR_P (*a
))
2921 suba
= TREE_OPERAND (*a
, 0);
2922 wider_type
= TREE_TYPE (suba
);
2923 if (TYPE_PRECISION (wider_type
) < TYPE_PRECISION (atype
))
2929 if (CONVERT_EXPR_P (*b
))
2931 subb
= TREE_OPERAND (*b
, 0);
2932 if (TYPE_PRECISION (wider_type
) != TYPE_PRECISION (TREE_TYPE (subb
)))
2943 /* Determines the expression by that USE is expressed from induction variable
2944 CAND at statement AT in LOOP. The expression is stored in a decomposed
2945 form into AFF. Returns false if USE cannot be expressed using CAND. */
2948 get_computation_aff (struct loop
*loop
,
2949 struct iv_use
*use
, struct iv_cand
*cand
, gimple at
,
2950 struct affine_tree_combination
*aff
)
2952 tree ubase
= use
->iv
->base
;
2953 tree ustep
= use
->iv
->step
;
2954 tree cbase
= cand
->iv
->base
;
2955 tree cstep
= cand
->iv
->step
, cstep_common
;
2956 tree utype
= TREE_TYPE (ubase
), ctype
= TREE_TYPE (cbase
);
2957 tree common_type
, var
;
2959 aff_tree cbase_aff
, var_aff
;
2962 if (TYPE_PRECISION (utype
) > TYPE_PRECISION (ctype
))
2964 /* We do not have a precision to express the values of use. */
2968 var
= var_at_stmt (loop
, cand
, at
);
2969 uutype
= unsigned_type_for (utype
);
2971 /* If the conversion is not noop, perform it. */
2972 if (TYPE_PRECISION (utype
) < TYPE_PRECISION (ctype
))
2974 cstep
= fold_convert (uutype
, cstep
);
2975 cbase
= fold_convert (uutype
, cbase
);
2976 var
= fold_convert (uutype
, var
);
2979 if (!constant_multiple_of (ustep
, cstep
, &rat
))
2982 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
2983 type, we achieve better folding by computing their difference in this
2984 wider type, and cast the result to UUTYPE. We do not need to worry about
2985 overflows, as all the arithmetics will in the end be performed in UUTYPE
2987 common_type
= determine_common_wider_type (&ubase
, &cbase
);
2989 /* use = ubase - ratio * cbase + ratio * var. */
2990 tree_to_aff_combination (ubase
, common_type
, aff
);
2991 tree_to_aff_combination (cbase
, common_type
, &cbase_aff
);
2992 tree_to_aff_combination (var
, uutype
, &var_aff
);
2994 /* We need to shift the value if we are after the increment. */
2995 if (stmt_after_increment (loop
, cand
, at
))
2999 if (common_type
!= uutype
)
3000 cstep_common
= fold_convert (common_type
, cstep
);
3002 cstep_common
= cstep
;
3004 tree_to_aff_combination (cstep_common
, common_type
, &cstep_aff
);
3005 aff_combination_add (&cbase_aff
, &cstep_aff
);
3008 aff_combination_scale (&cbase_aff
, double_int_neg (rat
));
3009 aff_combination_add (aff
, &cbase_aff
);
3010 if (common_type
!= uutype
)
3011 aff_combination_convert (aff
, uutype
);
3013 aff_combination_scale (&var_aff
, rat
);
3014 aff_combination_add (aff
, &var_aff
);
3019 /* Determines the expression by that USE is expressed from induction variable
3020 CAND at statement AT in LOOP. The computation is unshared. */
3023 get_computation_at (struct loop
*loop
,
3024 struct iv_use
*use
, struct iv_cand
*cand
, gimple at
)
3027 tree type
= TREE_TYPE (use
->iv
->base
);
3029 if (!get_computation_aff (loop
, use
, cand
, at
, &aff
))
3031 unshare_aff_combination (&aff
);
3032 return fold_convert (type
, aff_combination_to_tree (&aff
));
3035 /* Determines the expression by that USE is expressed from induction variable
3036 CAND in LOOP. The computation is unshared. */
3039 get_computation (struct loop
*loop
, struct iv_use
*use
, struct iv_cand
*cand
)
3041 return get_computation_at (loop
, use
, cand
, use
->stmt
);
3044 /* Adjust the cost COST for being in loop setup rather than loop body.
3045 If we're optimizing for space, the loop setup overhead is constant;
3046 if we're optimizing for speed, amortize it over the per-iteration cost. */
3048 adjust_setup_cost (struct ivopts_data
*data
, unsigned cost
)
3052 else if (optimize_loop_for_speed_p (data
->current_loop
))
3053 return cost
/ avg_loop_niter (data
->current_loop
);
3058 /* Returns cost of addition in MODE. */
3061 add_cost (enum machine_mode mode
, bool speed
)
3063 static unsigned costs
[NUM_MACHINE_MODES
];
3071 force_operand (gen_rtx_fmt_ee (PLUS
, mode
,
3072 gen_raw_REG (mode
, LAST_VIRTUAL_REGISTER
+ 1),
3073 gen_raw_REG (mode
, LAST_VIRTUAL_REGISTER
+ 2)),
3078 cost
= seq_cost (seq
, speed
);
3084 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3085 fprintf (dump_file
, "Addition in %s costs %d\n",
3086 GET_MODE_NAME (mode
), cost
);
3090 /* Entry in a hashtable of already known costs for multiplication. */
3093 HOST_WIDE_INT cst
; /* The constant to multiply by. */
3094 enum machine_mode mode
; /* In mode. */
3095 unsigned cost
; /* The cost. */
3098 /* Counts hash value for the ENTRY. */
3101 mbc_entry_hash (const void *entry
)
3103 const struct mbc_entry
*e
= (const struct mbc_entry
*) entry
;
3105 return 57 * (hashval_t
) e
->mode
+ (hashval_t
) (e
->cst
% 877);
3108 /* Compares the hash table entries ENTRY1 and ENTRY2. */
3111 mbc_entry_eq (const void *entry1
, const void *entry2
)
3113 const struct mbc_entry
*e1
= (const struct mbc_entry
*) entry1
;
3114 const struct mbc_entry
*e2
= (const struct mbc_entry
*) entry2
;
3116 return (e1
->mode
== e2
->mode
3117 && e1
->cst
== e2
->cst
);
3120 /* Returns cost of multiplication by constant CST in MODE. */
3123 multiply_by_cost (HOST_WIDE_INT cst
, enum machine_mode mode
, bool speed
)
3125 static htab_t costs
;
3126 struct mbc_entry
**cached
, act
;
3131 costs
= htab_create (100, mbc_entry_hash
, mbc_entry_eq
, free
);
3135 cached
= (struct mbc_entry
**) htab_find_slot (costs
, &act
, INSERT
);
3137 return (*cached
)->cost
;
3139 *cached
= XNEW (struct mbc_entry
);
3140 (*cached
)->mode
= mode
;
3141 (*cached
)->cst
= cst
;
3144 expand_mult (mode
, gen_raw_REG (mode
, LAST_VIRTUAL_REGISTER
+ 1),
3145 gen_int_mode (cst
, mode
), NULL_RTX
, 0);
3149 cost
= seq_cost (seq
, speed
);
3151 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3152 fprintf (dump_file
, "Multiplication by %d in %s costs %d\n",
3153 (int) cst
, GET_MODE_NAME (mode
), cost
);
3155 (*cached
)->cost
= cost
;
3160 /* Returns true if multiplying by RATIO is allowed in an address. Test the
3161 validity for a memory reference accessing memory of mode MODE in
3162 address space AS. */
3164 DEF_VEC_P (sbitmap
);
3165 DEF_VEC_ALLOC_P (sbitmap
, heap
);
3168 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio
, enum machine_mode mode
,
3171 #define MAX_RATIO 128
3172 unsigned int data_index
= (int) as
* MAX_MACHINE_MODE
+ (int) mode
;
3173 static VEC (sbitmap
, heap
) *valid_mult_list
;
3176 if (data_index
>= VEC_length (sbitmap
, valid_mult_list
))
3177 VEC_safe_grow_cleared (sbitmap
, heap
, valid_mult_list
, data_index
+ 1);
3179 valid_mult
= VEC_index (sbitmap
, valid_mult_list
, data_index
);
3182 enum machine_mode address_mode
= targetm
.addr_space
.address_mode (as
);
3183 rtx reg1
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 1);
3187 valid_mult
= sbitmap_alloc (2 * MAX_RATIO
+ 1);
3188 sbitmap_zero (valid_mult
);
3189 addr
= gen_rtx_fmt_ee (MULT
, address_mode
, reg1
, NULL_RTX
);
3190 for (i
= -MAX_RATIO
; i
<= MAX_RATIO
; i
++)
3192 XEXP (addr
, 1) = gen_int_mode (i
, address_mode
);
3193 if (memory_address_addr_space_p (mode
, addr
, as
))
3194 SET_BIT (valid_mult
, i
+ MAX_RATIO
);
3197 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3199 fprintf (dump_file
, " allowed multipliers:");
3200 for (i
= -MAX_RATIO
; i
<= MAX_RATIO
; i
++)
3201 if (TEST_BIT (valid_mult
, i
+ MAX_RATIO
))
3202 fprintf (dump_file
, " %d", (int) i
);
3203 fprintf (dump_file
, "\n");
3204 fprintf (dump_file
, "\n");
3207 VEC_replace (sbitmap
, valid_mult_list
, data_index
, valid_mult
);
3210 if (ratio
> MAX_RATIO
|| ratio
< -MAX_RATIO
)
3213 return TEST_BIT (valid_mult
, ratio
+ MAX_RATIO
);
3216 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3217 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3218 variable is omitted. Compute the cost for a memory reference that accesses
3219 a memory location of mode MEM_MODE in address space AS.
3221 MAY_AUTOINC is set to true if the autoincrement (increasing index by
3222 size of MEM_MODE / RATIO) is available. To make this determination, we
3223 look at the size of the increment to be made, which is given in CSTEP.
3224 CSTEP may be zero if the step is unknown.
3225 STMT_AFTER_INC is true iff the statement we're looking at is after the
3226 increment of the original biv.
3228 TODO -- there must be some better way. This all is quite crude. */
3232 HOST_WIDE_INT min_offset
, max_offset
;
3233 unsigned costs
[2][2][2][2];
3234 } *address_cost_data
;
3236 DEF_VEC_P (address_cost_data
);
3237 DEF_VEC_ALLOC_P (address_cost_data
, heap
);
3240 get_address_cost (bool symbol_present
, bool var_present
,
3241 unsigned HOST_WIDE_INT offset
, HOST_WIDE_INT ratio
,
3242 HOST_WIDE_INT cstep
, enum machine_mode mem_mode
,
3243 addr_space_t as
, bool speed
,
3244 bool stmt_after_inc
, bool *may_autoinc
)
3246 enum machine_mode address_mode
= targetm
.addr_space
.address_mode (as
);
3247 static VEC(address_cost_data
, heap
) *address_cost_data_list
;
3248 unsigned int data_index
= (int) as
* MAX_MACHINE_MODE
+ (int) mem_mode
;
3249 address_cost_data data
;
3250 static bool has_preinc
[MAX_MACHINE_MODE
], has_postinc
[MAX_MACHINE_MODE
];
3251 static bool has_predec
[MAX_MACHINE_MODE
], has_postdec
[MAX_MACHINE_MODE
];
3252 unsigned cost
, acost
, complexity
;
3253 bool offset_p
, ratio_p
, autoinc
;
3254 HOST_WIDE_INT s_offset
, autoinc_offset
, msize
;
3255 unsigned HOST_WIDE_INT mask
;
3258 if (data_index
>= VEC_length (address_cost_data
, address_cost_data_list
))
3259 VEC_safe_grow_cleared (address_cost_data
, heap
, address_cost_data_list
,
3262 data
= VEC_index (address_cost_data
, address_cost_data_list
, data_index
);
3266 HOST_WIDE_INT rat
, off
= 0;
3267 int old_cse_not_expected
, width
;
3268 unsigned sym_p
, var_p
, off_p
, rat_p
, add_c
;
3269 rtx seq
, addr
, base
;
3272 data
= (address_cost_data
) xcalloc (1, sizeof (*data
));
3274 reg1
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 1);
3276 width
= GET_MODE_BITSIZE (address_mode
) - 1;
3277 if (width
> (HOST_BITS_PER_WIDE_INT
- 1))
3278 width
= HOST_BITS_PER_WIDE_INT
- 1;
3279 addr
= gen_rtx_fmt_ee (PLUS
, address_mode
, reg1
, NULL_RTX
);
3281 for (i
= width
; i
>= 0; i
--)
3283 off
= -((HOST_WIDE_INT
) 1 << i
);
3284 XEXP (addr
, 1) = gen_int_mode (off
, address_mode
);
3285 if (memory_address_addr_space_p (mem_mode
, addr
, as
))
3288 data
->min_offset
= (i
== -1? 0 : off
);
3290 for (i
= width
; i
>= 0; i
--)
3292 off
= ((HOST_WIDE_INT
) 1 << i
) - 1;
3293 XEXP (addr
, 1) = gen_int_mode (off
, address_mode
);
3294 if (memory_address_addr_space_p (mem_mode
, addr
, as
))
3299 data
->max_offset
= off
;
3301 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3303 fprintf (dump_file
, "get_address_cost:\n");
3304 fprintf (dump_file
, " min offset %s " HOST_WIDE_INT_PRINT_DEC
"\n",
3305 GET_MODE_NAME (mem_mode
),
3307 fprintf (dump_file
, " max offset %s " HOST_WIDE_INT_PRINT_DEC
"\n",
3308 GET_MODE_NAME (mem_mode
),
3313 for (i
= 2; i
<= MAX_RATIO
; i
++)
3314 if (multiplier_allowed_in_address_p (i
, mem_mode
, as
))
3320 /* Compute the cost of various addressing modes. */
3322 reg0
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 1);
3323 reg1
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 2);
3325 if (HAVE_PRE_DECREMENT
)
3327 addr
= gen_rtx_PRE_DEC (address_mode
, reg0
);
3328 has_predec
[mem_mode
]
3329 = memory_address_addr_space_p (mem_mode
, addr
, as
);
3331 if (HAVE_POST_DECREMENT
)
3333 addr
= gen_rtx_POST_DEC (address_mode
, reg0
);
3334 has_postdec
[mem_mode
]
3335 = memory_address_addr_space_p (mem_mode
, addr
, as
);
3337 if (HAVE_PRE_INCREMENT
)
3339 addr
= gen_rtx_PRE_INC (address_mode
, reg0
);
3340 has_preinc
[mem_mode
]
3341 = memory_address_addr_space_p (mem_mode
, addr
, as
);
3343 if (HAVE_POST_INCREMENT
)
3345 addr
= gen_rtx_POST_INC (address_mode
, reg0
);
3346 has_postinc
[mem_mode
]
3347 = memory_address_addr_space_p (mem_mode
, addr
, as
);
3349 for (i
= 0; i
< 16; i
++)
3352 var_p
= (i
>> 1) & 1;
3353 off_p
= (i
>> 2) & 1;
3354 rat_p
= (i
>> 3) & 1;
3358 addr
= gen_rtx_fmt_ee (MULT
, address_mode
, addr
,
3359 gen_int_mode (rat
, address_mode
));
3362 addr
= gen_rtx_fmt_ee (PLUS
, address_mode
, addr
, reg1
);
3366 base
= gen_rtx_SYMBOL_REF (address_mode
, ggc_strdup (""));
3367 /* ??? We can run into trouble with some backends by presenting
3368 it with symbols which haven't been properly passed through
3369 targetm.encode_section_info. By setting the local bit, we
3370 enhance the probability of things working. */
3371 SYMBOL_REF_FLAGS (base
) = SYMBOL_FLAG_LOCAL
;
3374 base
= gen_rtx_fmt_e (CONST
, address_mode
,
3376 (PLUS
, address_mode
, base
,
3377 gen_int_mode (off
, address_mode
)));
3380 base
= gen_int_mode (off
, address_mode
);
3385 addr
= gen_rtx_fmt_ee (PLUS
, address_mode
, addr
, base
);
3388 /* To avoid splitting addressing modes, pretend that no cse will
3390 old_cse_not_expected
= cse_not_expected
;
3391 cse_not_expected
= true;
3392 addr
= memory_address_addr_space (mem_mode
, addr
, as
);
3393 cse_not_expected
= old_cse_not_expected
;
3397 acost
= seq_cost (seq
, speed
);
3398 acost
+= address_cost (addr
, mem_mode
, as
, speed
);
3402 data
->costs
[sym_p
][var_p
][off_p
][rat_p
] = acost
;
3405 /* On some targets, it is quite expensive to load symbol to a register,
3406 which makes addresses that contain symbols look much more expensive.
3407 However, the symbol will have to be loaded in any case before the
3408 loop (and quite likely we have it in register already), so it does not
3409 make much sense to penalize them too heavily. So make some final
3410 tweaks for the SYMBOL_PRESENT modes:
3412 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3413 var is cheaper, use this mode with small penalty.
3414 If VAR_PRESENT is true, try whether the mode with
3415 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3416 if this is the case, use it. */
3417 add_c
= add_cost (address_mode
, speed
);
3418 for (i
= 0; i
< 8; i
++)
3421 off_p
= (i
>> 1) & 1;
3422 rat_p
= (i
>> 2) & 1;
3424 acost
= data
->costs
[0][1][off_p
][rat_p
] + 1;
3428 if (acost
< data
->costs
[1][var_p
][off_p
][rat_p
])
3429 data
->costs
[1][var_p
][off_p
][rat_p
] = acost
;
3432 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3434 fprintf (dump_file
, "Address costs:\n");
3436 for (i
= 0; i
< 16; i
++)
3439 var_p
= (i
>> 1) & 1;
3440 off_p
= (i
>> 2) & 1;
3441 rat_p
= (i
>> 3) & 1;
3443 fprintf (dump_file
, " ");
3445 fprintf (dump_file
, "sym + ");
3447 fprintf (dump_file
, "var + ");
3449 fprintf (dump_file
, "cst + ");
3451 fprintf (dump_file
, "rat * ");
3453 acost
= data
->costs
[sym_p
][var_p
][off_p
][rat_p
];
3454 fprintf (dump_file
, "index costs %d\n", acost
);
3456 if (has_predec
[mem_mode
] || has_postdec
[mem_mode
]
3457 || has_preinc
[mem_mode
] || has_postinc
[mem_mode
])
3458 fprintf (dump_file
, " May include autoinc/dec\n");
3459 fprintf (dump_file
, "\n");
3462 VEC_replace (address_cost_data
, address_cost_data_list
,
3466 bits
= GET_MODE_BITSIZE (address_mode
);
3467 mask
= ~(~(unsigned HOST_WIDE_INT
) 0 << (bits
- 1) << 1);
3469 if ((offset
>> (bits
- 1) & 1))
3474 msize
= GET_MODE_SIZE (mem_mode
);
3475 autoinc_offset
= offset
;
3477 autoinc_offset
+= ratio
* cstep
;
3478 if (symbol_present
|| var_present
|| ratio
!= 1)
3480 else if ((has_postinc
[mem_mode
] && autoinc_offset
== 0
3482 || (has_postdec
[mem_mode
] && autoinc_offset
== 0
3484 || (has_preinc
[mem_mode
] && autoinc_offset
== msize
3486 || (has_predec
[mem_mode
] && autoinc_offset
== -msize
3487 && msize
== -cstep
))
3491 offset_p
= (s_offset
!= 0
3492 && data
->min_offset
<= s_offset
3493 && s_offset
<= data
->max_offset
);
3494 ratio_p
= (ratio
!= 1
3495 && multiplier_allowed_in_address_p (ratio
, mem_mode
, as
));
3497 if (ratio
!= 1 && !ratio_p
)
3498 cost
+= multiply_by_cost (ratio
, address_mode
, speed
);
3500 if (s_offset
&& !offset_p
&& !symbol_present
)
3501 cost
+= add_cost (address_mode
, speed
);
3504 *may_autoinc
= autoinc
;
3505 acost
= data
->costs
[symbol_present
][var_present
][offset_p
][ratio_p
];
3506 complexity
= (symbol_present
!= 0) + (var_present
!= 0) + offset_p
+ ratio_p
;
3507 return new_cost (cost
+ acost
, complexity
);
3510 /* Estimates cost of forcing expression EXPR into a variable. */
3513 force_expr_to_var_cost (tree expr
, bool speed
)
3515 static bool costs_initialized
= false;
3516 static unsigned integer_cost
[2];
3517 static unsigned symbol_cost
[2];
3518 static unsigned address_cost
[2];
3520 comp_cost cost0
, cost1
, cost
;
3521 enum machine_mode mode
;
3523 if (!costs_initialized
)
3525 tree type
= build_pointer_type (integer_type_node
);
3530 var
= create_tmp_var_raw (integer_type_node
, "test_var");
3531 TREE_STATIC (var
) = 1;
3532 x
= produce_memory_decl_rtl (var
, NULL
);
3533 SET_DECL_RTL (var
, x
);
3535 addr
= build1 (ADDR_EXPR
, type
, var
);
3538 for (i
= 0; i
< 2; i
++)
3540 integer_cost
[i
] = computation_cost (build_int_cst (integer_type_node
,
3543 symbol_cost
[i
] = computation_cost (addr
, i
) + 1;
3546 = computation_cost (build2 (POINTER_PLUS_EXPR
, type
,
3548 build_int_cst (sizetype
, 2000)), i
) + 1;
3549 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3551 fprintf (dump_file
, "force_expr_to_var_cost %s costs:\n", i
? "speed" : "size");
3552 fprintf (dump_file
, " integer %d\n", (int) integer_cost
[i
]);
3553 fprintf (dump_file
, " symbol %d\n", (int) symbol_cost
[i
]);
3554 fprintf (dump_file
, " address %d\n", (int) address_cost
[i
]);
3555 fprintf (dump_file
, " other %d\n", (int) target_spill_cost
[i
]);
3556 fprintf (dump_file
, "\n");
3560 costs_initialized
= true;
3565 if (SSA_VAR_P (expr
))
3568 if (is_gimple_min_invariant (expr
))
3570 if (TREE_CODE (expr
) == INTEGER_CST
)
3571 return new_cost (integer_cost
[speed
], 0);
3573 if (TREE_CODE (expr
) == ADDR_EXPR
)
3575 tree obj
= TREE_OPERAND (expr
, 0);
3577 if (TREE_CODE (obj
) == VAR_DECL
3578 || TREE_CODE (obj
) == PARM_DECL
3579 || TREE_CODE (obj
) == RESULT_DECL
)
3580 return new_cost (symbol_cost
[speed
], 0);
3583 return new_cost (address_cost
[speed
], 0);
3586 switch (TREE_CODE (expr
))
3588 case POINTER_PLUS_EXPR
:
3592 op0
= TREE_OPERAND (expr
, 0);
3593 op1
= TREE_OPERAND (expr
, 1);
3597 if (is_gimple_val (op0
))
3600 cost0
= force_expr_to_var_cost (op0
, speed
);
3602 if (is_gimple_val (op1
))
3605 cost1
= force_expr_to_var_cost (op1
, speed
);
3610 op0
= TREE_OPERAND (expr
, 0);
3614 if (is_gimple_val (op0
))
3617 cost0
= force_expr_to_var_cost (op0
, speed
);
3623 /* Just an arbitrary value, FIXME. */
3624 return new_cost (target_spill_cost
[speed
], 0);
3627 mode
= TYPE_MODE (TREE_TYPE (expr
));
3628 switch (TREE_CODE (expr
))
3630 case POINTER_PLUS_EXPR
:
3634 cost
= new_cost (add_cost (mode
, speed
), 0);
3638 if (cst_and_fits_in_hwi (op0
))
3639 cost
= new_cost (multiply_by_cost (int_cst_value (op0
), mode
, speed
), 0);
3640 else if (cst_and_fits_in_hwi (op1
))
3641 cost
= new_cost (multiply_by_cost (int_cst_value (op1
), mode
, speed
), 0);
3643 return new_cost (target_spill_cost
[speed
], 0);
3650 cost
= add_costs (cost
, cost0
);
3651 cost
= add_costs (cost
, cost1
);
3653 /* Bound the cost by target_spill_cost. The parts of complicated
3654 computations often are either loop invariant or at least can
3655 be shared between several iv uses, so letting this grow without
3656 limits would not give reasonable results. */
3657 if (cost
.cost
> (int) target_spill_cost
[speed
])
3658 cost
.cost
= target_spill_cost
[speed
];
3663 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3664 invariants the computation depends on. */
3667 force_var_cost (struct ivopts_data
*data
,
3668 tree expr
, bitmap
*depends_on
)
3672 fd_ivopts_data
= data
;
3673 walk_tree (&expr
, find_depends
, depends_on
, NULL
);
3676 return force_expr_to_var_cost (expr
, data
->speed
);
3679 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3680 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3681 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3682 invariants the computation depends on. */
3685 split_address_cost (struct ivopts_data
*data
,
3686 tree addr
, bool *symbol_present
, bool *var_present
,
3687 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
3690 HOST_WIDE_INT bitsize
;
3691 HOST_WIDE_INT bitpos
;
3693 enum machine_mode mode
;
3694 int unsignedp
, volatilep
;
3696 core
= get_inner_reference (addr
, &bitsize
, &bitpos
, &toffset
, &mode
,
3697 &unsignedp
, &volatilep
, false);
3700 || bitpos
% BITS_PER_UNIT
!= 0
3701 || TREE_CODE (core
) != VAR_DECL
)
3703 *symbol_present
= false;
3704 *var_present
= true;
3705 fd_ivopts_data
= data
;
3706 walk_tree (&addr
, find_depends
, depends_on
, NULL
);
3707 return new_cost (target_spill_cost
[data
->speed
], 0);
3710 *offset
+= bitpos
/ BITS_PER_UNIT
;
3711 if (TREE_STATIC (core
)
3712 || DECL_EXTERNAL (core
))
3714 *symbol_present
= true;
3715 *var_present
= false;
3719 *symbol_present
= false;
3720 *var_present
= true;
3724 /* Estimates cost of expressing difference of addresses E1 - E2 as
3725 var + symbol + offset. The value of offset is added to OFFSET,
3726 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3727 part is missing. DEPENDS_ON is a set of the invariants the computation
3731 ptr_difference_cost (struct ivopts_data
*data
,
3732 tree e1
, tree e2
, bool *symbol_present
, bool *var_present
,
3733 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
3735 HOST_WIDE_INT diff
= 0;
3736 aff_tree aff_e1
, aff_e2
;
3739 gcc_assert (TREE_CODE (e1
) == ADDR_EXPR
);
3741 if (ptr_difference_const (e1
, e2
, &diff
))
3744 *symbol_present
= false;
3745 *var_present
= false;
3749 if (integer_zerop (e2
))
3750 return split_address_cost (data
, TREE_OPERAND (e1
, 0),
3751 symbol_present
, var_present
, offset
, depends_on
);
3753 *symbol_present
= false;
3754 *var_present
= true;
3756 type
= signed_type_for (TREE_TYPE (e1
));
3757 tree_to_aff_combination (e1
, type
, &aff_e1
);
3758 tree_to_aff_combination (e2
, type
, &aff_e2
);
3759 aff_combination_scale (&aff_e2
, double_int_minus_one
);
3760 aff_combination_add (&aff_e1
, &aff_e2
);
3762 return force_var_cost (data
, aff_combination_to_tree (&aff_e1
), depends_on
);
3765 /* Estimates cost of expressing difference E1 - E2 as
3766 var + symbol + offset. The value of offset is added to OFFSET,
3767 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3768 part is missing. DEPENDS_ON is a set of the invariants the computation
3772 difference_cost (struct ivopts_data
*data
,
3773 tree e1
, tree e2
, bool *symbol_present
, bool *var_present
,
3774 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
3776 enum machine_mode mode
= TYPE_MODE (TREE_TYPE (e1
));
3777 unsigned HOST_WIDE_INT off1
, off2
;
3778 aff_tree aff_e1
, aff_e2
;
3781 e1
= strip_offset (e1
, &off1
);
3782 e2
= strip_offset (e2
, &off2
);
3783 *offset
+= off1
- off2
;
3788 if (TREE_CODE (e1
) == ADDR_EXPR
)
3789 return ptr_difference_cost (data
, e1
, e2
, symbol_present
, var_present
,
3790 offset
, depends_on
);
3791 *symbol_present
= false;
3793 if (operand_equal_p (e1
, e2
, 0))
3795 *var_present
= false;
3799 *var_present
= true;
3801 if (integer_zerop (e2
))
3802 return force_var_cost (data
, e1
, depends_on
);
3804 if (integer_zerop (e1
))
3806 comp_cost cost
= force_var_cost (data
, e2
, depends_on
);
3807 cost
.cost
+= multiply_by_cost (-1, mode
, data
->speed
);
3811 type
= signed_type_for (TREE_TYPE (e1
));
3812 tree_to_aff_combination (e1
, type
, &aff_e1
);
3813 tree_to_aff_combination (e2
, type
, &aff_e2
);
3814 aff_combination_scale (&aff_e2
, double_int_minus_one
);
3815 aff_combination_add (&aff_e1
, &aff_e2
);
3817 return force_var_cost (data
, aff_combination_to_tree (&aff_e1
), depends_on
);
3820 /* Returns true if AFF1 and AFF2 are identical. */
3823 compare_aff_trees (aff_tree
*aff1
, aff_tree
*aff2
)
3827 if (aff1
->n
!= aff2
->n
)
3830 for (i
= 0; i
< aff1
->n
; i
++)
3832 if (double_int_cmp (aff1
->elts
[i
].coef
, aff2
->elts
[i
].coef
, 0) != 0)
3835 if (!operand_equal_p (aff1
->elts
[i
].val
, aff2
->elts
[i
].val
, 0))
3841 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
3842 requires a new compiler generated temporary. Returns -1 otherwise.
3843 ADDRESS_P is a flag indicating if the expression is for address
3847 get_loop_invariant_expr_id (struct ivopts_data
*data
, tree ubase
,
3848 tree cbase
, HOST_WIDE_INT ratio
,
3851 aff_tree ubase_aff
, cbase_aff
;
3853 struct iv_inv_expr_ent ent
;
3854 struct iv_inv_expr_ent
**slot
;
3861 if ((TREE_CODE (ubase
) == INTEGER_CST
)
3862 && (TREE_CODE (cbase
) == INTEGER_CST
))
3865 /* Strips the constant part. */
3866 if (TREE_CODE (ubase
) == PLUS_EXPR
3867 || TREE_CODE (ubase
) == MINUS_EXPR
3868 || TREE_CODE (ubase
) == POINTER_PLUS_EXPR
)
3870 if (TREE_CODE (TREE_OPERAND (ubase
, 1)) == INTEGER_CST
)
3871 ubase
= TREE_OPERAND (ubase
, 0);
3874 /* Strips the constant part. */
3875 if (TREE_CODE (cbase
) == PLUS_EXPR
3876 || TREE_CODE (cbase
) == MINUS_EXPR
3877 || TREE_CODE (cbase
) == POINTER_PLUS_EXPR
)
3879 if (TREE_CODE (TREE_OPERAND (cbase
, 1)) == INTEGER_CST
)
3880 cbase
= TREE_OPERAND (cbase
, 0);
3885 if (((TREE_CODE (ubase
) == SSA_NAME
)
3886 || (TREE_CODE (ubase
) == ADDR_EXPR
3887 && is_gimple_min_invariant (ubase
)))
3888 && (TREE_CODE (cbase
) == INTEGER_CST
))
3891 if (((TREE_CODE (cbase
) == SSA_NAME
)
3892 || (TREE_CODE (cbase
) == ADDR_EXPR
3893 && is_gimple_min_invariant (cbase
)))
3894 && (TREE_CODE (ubase
) == INTEGER_CST
))
3900 if(operand_equal_p (ubase
, cbase
, 0))
3903 if (TREE_CODE (ubase
) == ADDR_EXPR
3904 && TREE_CODE (cbase
) == ADDR_EXPR
)
3908 usym
= TREE_OPERAND (ubase
, 0);
3909 csym
= TREE_OPERAND (cbase
, 0);
3910 if (TREE_CODE (usym
) == ARRAY_REF
)
3912 tree ind
= TREE_OPERAND (usym
, 1);
3913 if (TREE_CODE (ind
) == INTEGER_CST
3914 && host_integerp (ind
, 0)
3915 && TREE_INT_CST_LOW (ind
) == 0)
3916 usym
= TREE_OPERAND (usym
, 0);
3918 if (TREE_CODE (csym
) == ARRAY_REF
)
3920 tree ind
= TREE_OPERAND (csym
, 1);
3921 if (TREE_CODE (ind
) == INTEGER_CST
3922 && host_integerp (ind
, 0)
3923 && TREE_INT_CST_LOW (ind
) == 0)
3924 csym
= TREE_OPERAND (csym
, 0);
3926 if (operand_equal_p (usym
, csym
, 0))
3929 /* Now do more complex comparison */
3930 tree_to_aff_combination (ubase
, TREE_TYPE (ubase
), &ubase_aff
);
3931 tree_to_aff_combination (cbase
, TREE_TYPE (cbase
), &cbase_aff
);
3932 if (compare_aff_trees (&ubase_aff
, &cbase_aff
))
3936 tree_to_aff_combination (ub
, TREE_TYPE (ub
), &ubase_aff
);
3937 tree_to_aff_combination (cb
, TREE_TYPE (cb
), &cbase_aff
);
3939 aff_combination_scale (&cbase_aff
, shwi_to_double_int (-1 * ratio
));
3940 aff_combination_add (&ubase_aff
, &cbase_aff
);
3941 expr
= aff_combination_to_tree (&ubase_aff
);
3943 ent
.hash
= iterative_hash_expr (expr
, 0);
3944 slot
= (struct iv_inv_expr_ent
**) htab_find_slot (data
->inv_expr_tab
,
3949 *slot
= XNEW (struct iv_inv_expr_ent
);
3950 (*slot
)->expr
= expr
;
3951 (*slot
)->hash
= ent
.hash
;
3952 (*slot
)->id
= data
->inv_expr_id
++;
3958 /* Determines the cost of the computation by that USE is expressed
3959 from induction variable CAND. If ADDRESS_P is true, we just need
3960 to create an address from it, otherwise we want to get it into
3961 register. A set of invariants we depend on is stored in
3962 DEPENDS_ON. AT is the statement at that the value is computed.
3963 If CAN_AUTOINC is nonnull, use it to record whether autoinc
3964 addressing is likely. */
3967 get_computation_cost_at (struct ivopts_data
*data
,
3968 struct iv_use
*use
, struct iv_cand
*cand
,
3969 bool address_p
, bitmap
*depends_on
, gimple at
,
3973 tree ubase
= use
->iv
->base
, ustep
= use
->iv
->step
;
3975 tree utype
= TREE_TYPE (ubase
), ctype
;
3976 unsigned HOST_WIDE_INT cstepi
, offset
= 0;
3977 HOST_WIDE_INT ratio
, aratio
;
3978 bool var_present
, symbol_present
, stmt_is_after_inc
;
3981 bool speed
= optimize_bb_for_speed_p (gimple_bb (at
));
3985 /* Only consider real candidates. */
3987 return infinite_cost
;
3989 cbase
= cand
->iv
->base
;
3990 cstep
= cand
->iv
->step
;
3991 ctype
= TREE_TYPE (cbase
);
3993 if (TYPE_PRECISION (utype
) > TYPE_PRECISION (ctype
))
3995 /* We do not have a precision to express the values of use. */
3996 return infinite_cost
;
4001 /* Do not try to express address of an object with computation based
4002 on address of a different object. This may cause problems in rtl
4003 level alias analysis (that does not expect this to be happening,
4004 as this is illegal in C), and would be unlikely to be useful
4006 if (use
->iv
->base_object
4007 && cand
->iv
->base_object
4008 && !operand_equal_p (use
->iv
->base_object
, cand
->iv
->base_object
, 0))
4009 return infinite_cost
;
4012 if (TYPE_PRECISION (utype
) < TYPE_PRECISION (ctype
))
4014 /* TODO -- add direct handling of this case. */
4018 /* CSTEPI is removed from the offset in case statement is after the
4019 increment. If the step is not constant, we use zero instead.
4020 This is a bit imprecise (there is the extra addition), but
4021 redundancy elimination is likely to transform the code so that
4022 it uses value of the variable before increment anyway,
4023 so it is not that much unrealistic. */
4024 if (cst_and_fits_in_hwi (cstep
))
4025 cstepi
= int_cst_value (cstep
);
4029 if (!constant_multiple_of (ustep
, cstep
, &rat
))
4030 return infinite_cost
;
4032 if (double_int_fits_in_shwi_p (rat
))
4033 ratio
= double_int_to_shwi (rat
);
4035 return infinite_cost
;
4038 ctype
= TREE_TYPE (cbase
);
4040 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
4041 or ratio == 1, it is better to handle this like
4043 ubase - ratio * cbase + ratio * var
4045 (also holds in the case ratio == -1, TODO. */
4047 if (cst_and_fits_in_hwi (cbase
))
4049 offset
= - ratio
* int_cst_value (cbase
);
4050 cost
= difference_cost (data
,
4051 ubase
, build_int_cst (utype
, 0),
4052 &symbol_present
, &var_present
, &offset
,
4054 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4056 else if (ratio
== 1)
4058 cost
= difference_cost (data
,
4060 &symbol_present
, &var_present
, &offset
,
4062 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4065 && !POINTER_TYPE_P (ctype
)
4066 && multiplier_allowed_in_address_p
4067 (ratio
, TYPE_MODE (TREE_TYPE (utype
)),
4068 TYPE_ADDR_SPACE (TREE_TYPE (utype
))))
4071 = fold_build2 (MULT_EXPR
, ctype
, cbase
, build_int_cst (ctype
, ratio
));
4072 cost
= difference_cost (data
,
4074 &symbol_present
, &var_present
, &offset
,
4076 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4080 cost
= force_var_cost (data
, cbase
, depends_on
);
4081 cost
= add_costs (cost
,
4082 difference_cost (data
,
4083 ubase
, build_int_cst (utype
, 0),
4084 &symbol_present
, &var_present
,
4085 &offset
, depends_on
));
4086 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4087 cost
.cost
+= add_cost (TYPE_MODE (ctype
), data
->speed
);
4093 get_loop_invariant_expr_id (data
, ubase
, cbase
, ratio
, address_p
);
4094 /* Clear depends on. */
4095 if (*inv_expr_id
!= -1 && depends_on
&& *depends_on
)
4096 bitmap_clear (*depends_on
);
4099 /* If we are after the increment, the value of the candidate is higher by
4101 stmt_is_after_inc
= stmt_after_increment (data
->current_loop
, cand
, at
);
4102 if (stmt_is_after_inc
)
4103 offset
-= ratio
* cstepi
;
4105 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4106 (symbol/var1/const parts may be omitted). If we are looking for an
4107 address, find the cost of addressing this. */
4109 return add_costs (cost
,
4110 get_address_cost (symbol_present
, var_present
,
4111 offset
, ratio
, cstepi
,
4112 TYPE_MODE (TREE_TYPE (utype
)),
4113 TYPE_ADDR_SPACE (TREE_TYPE (utype
)),
4114 speed
, stmt_is_after_inc
,
4117 /* Otherwise estimate the costs for computing the expression. */
4118 if (!symbol_present
&& !var_present
&& !offset
)
4121 cost
.cost
+= multiply_by_cost (ratio
, TYPE_MODE (ctype
), speed
);
4125 /* Symbol + offset should be compile-time computable so consider that they
4126 are added once to the variable, if present. */
4127 if (var_present
&& (symbol_present
|| offset
))
4128 cost
.cost
+= adjust_setup_cost (data
,
4129 add_cost (TYPE_MODE (ctype
), speed
));
4131 /* Having offset does not affect runtime cost in case it is added to
4132 symbol, but it increases complexity. */
4136 cost
.cost
+= add_cost (TYPE_MODE (ctype
), speed
);
4138 aratio
= ratio
> 0 ? ratio
: -ratio
;
4140 cost
.cost
+= multiply_by_cost (aratio
, TYPE_MODE (ctype
), speed
);
4145 *can_autoinc
= false;
4148 /* Just get the expression, expand it and measure the cost. */
4149 tree comp
= get_computation_at (data
->current_loop
, use
, cand
, at
);
4152 return infinite_cost
;
4155 comp
= build_simple_mem_ref (comp
);
4157 return new_cost (computation_cost (comp
, speed
), 0);
4161 /* Determines the cost of the computation by that USE is expressed
4162 from induction variable CAND. If ADDRESS_P is true, we just need
4163 to create an address from it, otherwise we want to get it into
4164 register. A set of invariants we depend on is stored in
4165 DEPENDS_ON. If CAN_AUTOINC is nonnull, use it to record whether
4166 autoinc addressing is likely. */
4169 get_computation_cost (struct ivopts_data
*data
,
4170 struct iv_use
*use
, struct iv_cand
*cand
,
4171 bool address_p
, bitmap
*depends_on
,
4172 bool *can_autoinc
, int *inv_expr_id
)
4174 return get_computation_cost_at (data
,
4175 use
, cand
, address_p
, depends_on
, use
->stmt
,
4176 can_autoinc
, inv_expr_id
);
4179 /* Determines cost of basing replacement of USE on CAND in a generic
4183 determine_use_iv_cost_generic (struct ivopts_data
*data
,
4184 struct iv_use
*use
, struct iv_cand
*cand
)
4188 int inv_expr_id
= -1;
4190 /* The simple case first -- if we need to express value of the preserved
4191 original biv, the cost is 0. This also prevents us from counting the
4192 cost of increment twice -- once at this use and once in the cost of
4194 if (cand
->pos
== IP_ORIGINAL
4195 && cand
->incremented_at
== use
->stmt
)
4197 set_use_iv_cost (data
, use
, cand
, zero_cost
, NULL
, NULL_TREE
, -1);
4201 cost
= get_computation_cost (data
, use
, cand
, false, &depends_on
,
4202 NULL
, &inv_expr_id
);
4204 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, NULL_TREE
,
4207 return !infinite_cost_p (cost
);
4210 /* Determines cost of basing replacement of USE on CAND in an address. */
4213 determine_use_iv_cost_address (struct ivopts_data
*data
,
4214 struct iv_use
*use
, struct iv_cand
*cand
)
4218 int inv_expr_id
= -1;
4219 comp_cost cost
= get_computation_cost (data
, use
, cand
, true, &depends_on
,
4220 &can_autoinc
, &inv_expr_id
);
4222 if (cand
->ainc_use
== use
)
4225 cost
.cost
-= cand
->cost_step
;
4226 /* If we generated the candidate solely for exploiting autoincrement
4227 opportunities, and it turns out it can't be used, set the cost to
4228 infinity to make sure we ignore it. */
4229 else if (cand
->pos
== IP_AFTER_USE
|| cand
->pos
== IP_BEFORE_USE
)
4230 cost
= infinite_cost
;
4232 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, NULL_TREE
,
4235 return !infinite_cost_p (cost
);
4238 /* Computes value of candidate CAND at position AT in iteration NITER, and
4239 stores it to VAL. */
4242 cand_value_at (struct loop
*loop
, struct iv_cand
*cand
, gimple at
, tree niter
,
4245 aff_tree step
, delta
, nit
;
4246 struct iv
*iv
= cand
->iv
;
4247 tree type
= TREE_TYPE (iv
->base
);
4248 tree steptype
= type
;
4249 if (POINTER_TYPE_P (type
))
4250 steptype
= sizetype
;
4252 tree_to_aff_combination (iv
->step
, steptype
, &step
);
4253 tree_to_aff_combination (niter
, TREE_TYPE (niter
), &nit
);
4254 aff_combination_convert (&nit
, steptype
);
4255 aff_combination_mult (&nit
, &step
, &delta
);
4256 if (stmt_after_increment (loop
, cand
, at
))
4257 aff_combination_add (&delta
, &step
);
4259 tree_to_aff_combination (iv
->base
, type
, val
);
4260 aff_combination_add (val
, &delta
);
4263 /* Returns period of induction variable iv. */
4266 iv_period (struct iv
*iv
)
4268 tree step
= iv
->step
, period
, type
;
4271 gcc_assert (step
&& TREE_CODE (step
) == INTEGER_CST
);
4273 type
= unsigned_type_for (TREE_TYPE (step
));
4274 /* Period of the iv is lcm (step, type_range)/step -1,
4275 i.e., N*type_range/step - 1. Since type range is power
4276 of two, N == (step >> num_of_ending_zeros_binary (step),
4277 so the final result is
4279 (type_range >> num_of_ending_zeros_binary (step)) - 1
4282 pow2div
= num_ending_zeros (step
);
4284 period
= build_low_bits_mask (type
,
4285 (TYPE_PRECISION (type
)
4286 - tree_low_cst (pow2div
, 1)));
4291 /* Returns the comparison operator used when eliminating the iv USE. */
4293 static enum tree_code
4294 iv_elimination_compare (struct ivopts_data
*data
, struct iv_use
*use
)
4296 struct loop
*loop
= data
->current_loop
;
4300 ex_bb
= gimple_bb (use
->stmt
);
4301 exit
= EDGE_SUCC (ex_bb
, 0);
4302 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
4303 exit
= EDGE_SUCC (ex_bb
, 1);
4305 return (exit
->flags
& EDGE_TRUE_VALUE
? EQ_EXPR
: NE_EXPR
);
4308 /* Check whether it is possible to express the condition in USE by comparison
4309 of candidate CAND. If so, store the value compared with to BOUND. */
4312 may_eliminate_iv (struct ivopts_data
*data
,
4313 struct iv_use
*use
, struct iv_cand
*cand
, tree
*bound
)
4318 struct loop
*loop
= data
->current_loop
;
4320 struct tree_niter_desc
*desc
= NULL
;
4322 if (TREE_CODE (cand
->iv
->step
) != INTEGER_CST
)
4325 /* For now works only for exits that dominate the loop latch.
4326 TODO: extend to other conditions inside loop body. */
4327 ex_bb
= gimple_bb (use
->stmt
);
4328 if (use
->stmt
!= last_stmt (ex_bb
)
4329 || gimple_code (use
->stmt
) != GIMPLE_COND
4330 || !dominated_by_p (CDI_DOMINATORS
, loop
->latch
, ex_bb
))
4333 exit
= EDGE_SUCC (ex_bb
, 0);
4334 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
4335 exit
= EDGE_SUCC (ex_bb
, 1);
4336 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
4339 nit
= niter_for_exit (data
, exit
, &desc
);
4343 /* Determine whether we can use the variable to test the exit condition.
4344 This is the case iff the period of the induction variable is greater
4345 than the number of iterations for which the exit condition is true. */
4346 period
= iv_period (cand
->iv
);
4348 /* If the number of iterations is constant, compare against it directly. */
4349 if (TREE_CODE (nit
) == INTEGER_CST
)
4351 /* See cand_value_at. */
4352 if (stmt_after_increment (loop
, cand
, use
->stmt
))
4354 if (!tree_int_cst_lt (nit
, period
))
4359 if (tree_int_cst_lt (period
, nit
))
4364 /* If not, and if this is the only possible exit of the loop, see whether
4365 we can get a conservative estimate on the number of iterations of the
4366 entire loop and compare against that instead. */
4369 double_int period_value
, max_niter
;
4371 max_niter
= desc
->max
;
4372 if (stmt_after_increment (loop
, cand
, use
->stmt
))
4373 max_niter
= double_int_add (max_niter
, double_int_one
);
4374 period_value
= tree_to_double_int (period
);
4375 if (double_int_ucmp (max_niter
, period_value
) > 0)
4377 /* See if we can take advantage of infered loop bound information. */
4378 if (loop_only_exit_p (loop
, exit
))
4380 if (!estimated_loop_iterations (loop
, true, &max_niter
))
4382 /* The loop bound is already adjusted by adding 1. */
4383 if (double_int_ucmp (max_niter
, period_value
) > 0)
4391 cand_value_at (loop
, cand
, use
->stmt
, nit
, &bnd
);
4393 *bound
= aff_combination_to_tree (&bnd
);
4394 /* It is unlikely that computing the number of iterations using division
4395 would be more profitable than keeping the original induction variable. */
4396 if (expression_expensive_p (*bound
))
4402 /* Determines cost of basing replacement of USE on CAND in a condition. */
4405 determine_use_iv_cost_condition (struct ivopts_data
*data
,
4406 struct iv_use
*use
, struct iv_cand
*cand
)
4408 tree bound
= NULL_TREE
;
4410 bitmap depends_on_elim
= NULL
, depends_on_express
= NULL
, depends_on
;
4411 comp_cost elim_cost
, express_cost
, cost
;
4413 int inv_expr_id
= -1;
4414 tree
*control_var
, *bound_cst
;
4416 /* Only consider real candidates. */
4419 set_use_iv_cost (data
, use
, cand
, infinite_cost
, NULL
, NULL_TREE
, -1);
4423 /* Try iv elimination. */
4424 if (may_eliminate_iv (data
, use
, cand
, &bound
))
4426 elim_cost
= force_var_cost (data
, bound
, &depends_on_elim
);
4427 /* The bound is a loop invariant, so it will be only computed
4429 elim_cost
.cost
= adjust_setup_cost (data
, elim_cost
.cost
);
4432 elim_cost
= infinite_cost
;
4434 /* Try expressing the original giv. If it is compared with an invariant,
4435 note that we cannot get rid of it. */
4436 ok
= extract_cond_operands (data
, use
->stmt
, &control_var
, &bound_cst
,
4440 /* When the condition is a comparison of the candidate IV against
4441 zero, prefer this IV.
4443 TODO: The constant that we're substracting from the cost should
4444 be target-dependent. This information should be added to the
4445 target costs for each backend. */
4446 if (!infinite_cost_p (elim_cost
) /* Do not try to decrease infinite! */
4447 && integer_zerop (*bound_cst
)
4448 && (operand_equal_p (*control_var
, cand
->var_after
, 0)
4449 || operand_equal_p (*control_var
, cand
->var_before
, 0)))
4450 elim_cost
.cost
-= 1;
4452 express_cost
= get_computation_cost (data
, use
, cand
, false,
4453 &depends_on_express
, NULL
,
4455 fd_ivopts_data
= data
;
4456 walk_tree (&cmp_iv
->base
, find_depends
, &depends_on_express
, NULL
);
4458 /* Choose the better approach, preferring the eliminated IV. */
4459 if (compare_costs (elim_cost
, express_cost
) <= 0)
4462 depends_on
= depends_on_elim
;
4463 depends_on_elim
= NULL
;
4467 cost
= express_cost
;
4468 depends_on
= depends_on_express
;
4469 depends_on_express
= NULL
;
4473 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, bound
, inv_expr_id
);
4475 if (depends_on_elim
)
4476 BITMAP_FREE (depends_on_elim
);
4477 if (depends_on_express
)
4478 BITMAP_FREE (depends_on_express
);
4480 return !infinite_cost_p (cost
);
4483 /* Determines cost of basing replacement of USE on CAND. Returns false
4484 if USE cannot be based on CAND. */
4487 determine_use_iv_cost (struct ivopts_data
*data
,
4488 struct iv_use
*use
, struct iv_cand
*cand
)
4492 case USE_NONLINEAR_EXPR
:
4493 return determine_use_iv_cost_generic (data
, use
, cand
);
4496 return determine_use_iv_cost_address (data
, use
, cand
);
4499 return determine_use_iv_cost_condition (data
, use
, cand
);
4506 /* Return true if get_computation_cost indicates that autoincrement is
4507 a possibility for the pair of USE and CAND, false otherwise. */
4510 autoinc_possible_for_pair (struct ivopts_data
*data
, struct iv_use
*use
,
4511 struct iv_cand
*cand
)
4517 if (use
->type
!= USE_ADDRESS
)
4520 cost
= get_computation_cost (data
, use
, cand
, true, &depends_on
,
4521 &can_autoinc
, NULL
);
4523 BITMAP_FREE (depends_on
);
4525 return !infinite_cost_p (cost
) && can_autoinc
;
4528 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
4529 use that allows autoincrement, and set their AINC_USE if possible. */
4532 set_autoinc_for_original_candidates (struct ivopts_data
*data
)
4536 for (i
= 0; i
< n_iv_cands (data
); i
++)
4538 struct iv_cand
*cand
= iv_cand (data
, i
);
4539 struct iv_use
*closest
= NULL
;
4540 if (cand
->pos
!= IP_ORIGINAL
)
4542 for (j
= 0; j
< n_iv_uses (data
); j
++)
4544 struct iv_use
*use
= iv_use (data
, j
);
4545 unsigned uid
= gimple_uid (use
->stmt
);
4546 if (gimple_bb (use
->stmt
) != gimple_bb (cand
->incremented_at
)
4547 || uid
> gimple_uid (cand
->incremented_at
))
4549 if (closest
== NULL
|| uid
> gimple_uid (closest
->stmt
))
4552 if (closest
== NULL
|| !autoinc_possible_for_pair (data
, closest
, cand
))
4554 cand
->ainc_use
= closest
;
4558 /* Finds the candidates for the induction variables. */
4561 find_iv_candidates (struct ivopts_data
*data
)
4563 /* Add commonly used ivs. */
4564 add_standard_iv_candidates (data
);
4566 /* Add old induction variables. */
4567 add_old_ivs_candidates (data
);
4569 /* Add induction variables derived from uses. */
4570 add_derived_ivs_candidates (data
);
4572 set_autoinc_for_original_candidates (data
);
4574 /* Record the important candidates. */
4575 record_important_candidates (data
);
4578 /* Determines costs of basing the use of the iv on an iv candidate. */
4581 determine_use_iv_costs (struct ivopts_data
*data
)
4585 struct iv_cand
*cand
;
4586 bitmap to_clear
= BITMAP_ALLOC (NULL
);
4588 alloc_use_cost_map (data
);
4590 for (i
= 0; i
< n_iv_uses (data
); i
++)
4592 use
= iv_use (data
, i
);
4594 if (data
->consider_all_candidates
)
4596 for (j
= 0; j
< n_iv_cands (data
); j
++)
4598 cand
= iv_cand (data
, j
);
4599 determine_use_iv_cost (data
, use
, cand
);
4606 EXECUTE_IF_SET_IN_BITMAP (use
->related_cands
, 0, j
, bi
)
4608 cand
= iv_cand (data
, j
);
4609 if (!determine_use_iv_cost (data
, use
, cand
))
4610 bitmap_set_bit (to_clear
, j
);
4613 /* Remove the candidates for that the cost is infinite from
4614 the list of related candidates. */
4615 bitmap_and_compl_into (use
->related_cands
, to_clear
);
4616 bitmap_clear (to_clear
);
4620 BITMAP_FREE (to_clear
);
4622 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4624 fprintf (dump_file
, "Use-candidate costs:\n");
4626 for (i
= 0; i
< n_iv_uses (data
); i
++)
4628 use
= iv_use (data
, i
);
4630 fprintf (dump_file
, "Use %d:\n", i
);
4631 fprintf (dump_file
, " cand\tcost\tcompl.\tdepends on\n");
4632 for (j
= 0; j
< use
->n_map_members
; j
++)
4634 if (!use
->cost_map
[j
].cand
4635 || infinite_cost_p (use
->cost_map
[j
].cost
))
4638 fprintf (dump_file
, " %d\t%d\t%d\t",
4639 use
->cost_map
[j
].cand
->id
,
4640 use
->cost_map
[j
].cost
.cost
,
4641 use
->cost_map
[j
].cost
.complexity
);
4642 if (use
->cost_map
[j
].depends_on
)
4643 bitmap_print (dump_file
,
4644 use
->cost_map
[j
].depends_on
, "","");
4645 if (use
->cost_map
[j
].inv_expr_id
!= -1)
4646 fprintf (dump_file
, " inv_expr:%d", use
->cost_map
[j
].inv_expr_id
);
4647 fprintf (dump_file
, "\n");
4650 fprintf (dump_file
, "\n");
4652 fprintf (dump_file
, "\n");
4656 /* Determines cost of the candidate CAND. */
4659 determine_iv_cost (struct ivopts_data
*data
, struct iv_cand
*cand
)
4661 comp_cost cost_base
;
4662 unsigned cost
, cost_step
;
4671 /* There are two costs associated with the candidate -- its increment
4672 and its initialization. The second is almost negligible for any loop
4673 that rolls enough, so we take it just very little into account. */
4675 base
= cand
->iv
->base
;
4676 cost_base
= force_var_cost (data
, base
, NULL
);
4677 cost_step
= add_cost (TYPE_MODE (TREE_TYPE (base
)), data
->speed
);
4679 cost
= cost_step
+ adjust_setup_cost (data
, cost_base
.cost
);
4681 /* Prefer the original ivs unless we may gain something by replacing it.
4682 The reason is to make debugging simpler; so this is not relevant for
4683 artificial ivs created by other optimization passes. */
4684 if (cand
->pos
!= IP_ORIGINAL
4685 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand
->var_before
)))
4688 /* Prefer not to insert statements into latch unless there are some
4689 already (so that we do not create unnecessary jumps). */
4690 if (cand
->pos
== IP_END
4691 && empty_block_p (ip_end_pos (data
->current_loop
)))
4695 cand
->cost_step
= cost_step
;
4698 /* Determines costs of computation of the candidates. */
4701 determine_iv_costs (struct ivopts_data
*data
)
4705 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4707 fprintf (dump_file
, "Candidate costs:\n");
4708 fprintf (dump_file
, " cand\tcost\n");
4711 for (i
= 0; i
< n_iv_cands (data
); i
++)
4713 struct iv_cand
*cand
= iv_cand (data
, i
);
4715 determine_iv_cost (data
, cand
);
4717 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4718 fprintf (dump_file
, " %d\t%d\n", i
, cand
->cost
);
4721 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4722 fprintf (dump_file
, "\n");
4725 /* Calculates cost for having SIZE induction variables. */
4728 ivopts_global_cost_for_size (struct ivopts_data
*data
, unsigned size
)
4730 /* We add size to the cost, so that we prefer eliminating ivs
4732 return size
+ estimate_reg_pressure_cost (size
, data
->regs_used
, data
->speed
,
4733 data
->body_includes_call
);
4736 /* For each size of the induction variable set determine the penalty. */
4739 determine_set_costs (struct ivopts_data
*data
)
4743 gimple_stmt_iterator psi
;
4745 struct loop
*loop
= data
->current_loop
;
4748 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4750 fprintf (dump_file
, "Global costs:\n");
4751 fprintf (dump_file
, " target_avail_regs %d\n", target_avail_regs
);
4752 fprintf (dump_file
, " target_clobbered_regs %d\n", target_clobbered_regs
);
4753 fprintf (dump_file
, " target_reg_cost %d\n", target_reg_cost
[data
->speed
]);
4754 fprintf (dump_file
, " target_spill_cost %d\n", target_spill_cost
[data
->speed
]);
4758 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
4760 phi
= gsi_stmt (psi
);
4761 op
= PHI_RESULT (phi
);
4763 if (!is_gimple_reg (op
))
4766 if (get_iv (data
, op
))
4772 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, j
, bi
)
4774 struct version_info
*info
= ver_info (data
, j
);
4776 if (info
->inv_id
&& info
->has_nonlin_use
)
4780 data
->regs_used
= n
;
4781 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4782 fprintf (dump_file
, " regs_used %d\n", n
);
4784 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4786 fprintf (dump_file
, " cost for size:\n");
4787 fprintf (dump_file
, " ivs\tcost\n");
4788 for (j
= 0; j
<= 2 * target_avail_regs
; j
++)
4789 fprintf (dump_file
, " %d\t%d\n", j
,
4790 ivopts_global_cost_for_size (data
, j
));
4791 fprintf (dump_file
, "\n");
4795 /* Returns true if A is a cheaper cost pair than B. */
4798 cheaper_cost_pair (struct cost_pair
*a
, struct cost_pair
*b
)
4808 cmp
= compare_costs (a
->cost
, b
->cost
);
4815 /* In case the costs are the same, prefer the cheaper candidate. */
4816 if (a
->cand
->cost
< b
->cand
->cost
)
4823 /* Returns candidate by that USE is expressed in IVS. */
4825 static struct cost_pair
*
4826 iv_ca_cand_for_use (struct iv_ca
*ivs
, struct iv_use
*use
)
4828 return ivs
->cand_for_use
[use
->id
];
4831 /* Computes the cost field of IVS structure. */
4834 iv_ca_recount_cost (struct ivopts_data
*data
, struct iv_ca
*ivs
)
4836 comp_cost cost
= ivs
->cand_use_cost
;
4838 cost
.cost
+= ivs
->cand_cost
;
4840 cost
.cost
+= ivopts_global_cost_for_size (data
,
4841 ivs
->n_regs
+ ivs
->num_used_inv_expr
);
4846 /* Remove invariants in set INVS to set IVS. */
4849 iv_ca_set_remove_invariants (struct iv_ca
*ivs
, bitmap invs
)
4857 EXECUTE_IF_SET_IN_BITMAP (invs
, 0, iid
, bi
)
4859 ivs
->n_invariant_uses
[iid
]--;
4860 if (ivs
->n_invariant_uses
[iid
] == 0)
4865 /* Set USE not to be expressed by any candidate in IVS. */
4868 iv_ca_set_no_cp (struct ivopts_data
*data
, struct iv_ca
*ivs
,
4871 unsigned uid
= use
->id
, cid
;
4872 struct cost_pair
*cp
;
4874 cp
= ivs
->cand_for_use
[uid
];
4880 ivs
->cand_for_use
[uid
] = NULL
;
4881 ivs
->n_cand_uses
[cid
]--;
4883 if (ivs
->n_cand_uses
[cid
] == 0)
4885 bitmap_clear_bit (ivs
->cands
, cid
);
4886 /* Do not count the pseudocandidates. */
4890 ivs
->cand_cost
-= cp
->cand
->cost
;
4892 iv_ca_set_remove_invariants (ivs
, cp
->cand
->depends_on
);
4895 ivs
->cand_use_cost
= sub_costs (ivs
->cand_use_cost
, cp
->cost
);
4897 iv_ca_set_remove_invariants (ivs
, cp
->depends_on
);
4899 if (cp
->inv_expr_id
!= -1)
4901 ivs
->used_inv_expr
[cp
->inv_expr_id
]--;
4902 if (ivs
->used_inv_expr
[cp
->inv_expr_id
] == 0)
4903 ivs
->num_used_inv_expr
--;
4905 iv_ca_recount_cost (data
, ivs
);
4908 /* Add invariants in set INVS to set IVS. */
4911 iv_ca_set_add_invariants (struct iv_ca
*ivs
, bitmap invs
)
4919 EXECUTE_IF_SET_IN_BITMAP (invs
, 0, iid
, bi
)
4921 ivs
->n_invariant_uses
[iid
]++;
4922 if (ivs
->n_invariant_uses
[iid
] == 1)
4927 /* Set cost pair for USE in set IVS to CP. */
4930 iv_ca_set_cp (struct ivopts_data
*data
, struct iv_ca
*ivs
,
4931 struct iv_use
*use
, struct cost_pair
*cp
)
4933 unsigned uid
= use
->id
, cid
;
4935 if (ivs
->cand_for_use
[uid
] == cp
)
4938 if (ivs
->cand_for_use
[uid
])
4939 iv_ca_set_no_cp (data
, ivs
, use
);
4946 ivs
->cand_for_use
[uid
] = cp
;
4947 ivs
->n_cand_uses
[cid
]++;
4948 if (ivs
->n_cand_uses
[cid
] == 1)
4950 bitmap_set_bit (ivs
->cands
, cid
);
4951 /* Do not count the pseudocandidates. */
4955 ivs
->cand_cost
+= cp
->cand
->cost
;
4957 iv_ca_set_add_invariants (ivs
, cp
->cand
->depends_on
);
4960 ivs
->cand_use_cost
= add_costs (ivs
->cand_use_cost
, cp
->cost
);
4961 iv_ca_set_add_invariants (ivs
, cp
->depends_on
);
4963 if (cp
->inv_expr_id
!= -1)
4965 ivs
->used_inv_expr
[cp
->inv_expr_id
]++;
4966 if (ivs
->used_inv_expr
[cp
->inv_expr_id
] == 1)
4967 ivs
->num_used_inv_expr
++;
4969 iv_ca_recount_cost (data
, ivs
);
4973 /* Extend set IVS by expressing USE by some of the candidates in it
4974 if possible. All important candidates will be considered
4975 if IMPORTANT_CANDIDATES is true. */
4978 iv_ca_add_use (struct ivopts_data
*data
, struct iv_ca
*ivs
,
4979 struct iv_use
*use
, bool important_candidates
)
4981 struct cost_pair
*best_cp
= NULL
, *cp
;
4986 gcc_assert (ivs
->upto
>= use
->id
);
4988 if (ivs
->upto
== use
->id
)
4994 cands
= (important_candidates
? data
->important_candidates
: ivs
->cands
);
4995 EXECUTE_IF_SET_IN_BITMAP (cands
, 0, i
, bi
)
4997 struct iv_cand
*cand
= iv_cand (data
, i
);
4999 cp
= get_use_iv_cost (data
, use
, cand
);
5001 if (cheaper_cost_pair (cp
, best_cp
))
5005 iv_ca_set_cp (data
, ivs
, use
, best_cp
);
5008 /* Get cost for assignment IVS. */
5011 iv_ca_cost (struct iv_ca
*ivs
)
5013 /* This was a conditional expression but it triggered a bug in
5016 return infinite_cost
;
5021 /* Returns true if all dependences of CP are among invariants in IVS. */
5024 iv_ca_has_deps (struct iv_ca
*ivs
, struct cost_pair
*cp
)
5029 if (!cp
->depends_on
)
5032 EXECUTE_IF_SET_IN_BITMAP (cp
->depends_on
, 0, i
, bi
)
5034 if (ivs
->n_invariant_uses
[i
] == 0)
5041 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5042 it before NEXT_CHANGE. */
5044 static struct iv_ca_delta
*
5045 iv_ca_delta_add (struct iv_use
*use
, struct cost_pair
*old_cp
,
5046 struct cost_pair
*new_cp
, struct iv_ca_delta
*next_change
)
5048 struct iv_ca_delta
*change
= XNEW (struct iv_ca_delta
);
5051 change
->old_cp
= old_cp
;
5052 change
->new_cp
= new_cp
;
5053 change
->next_change
= next_change
;
5058 /* Joins two lists of changes L1 and L2. Destructive -- old lists
5061 static struct iv_ca_delta
*
5062 iv_ca_delta_join (struct iv_ca_delta
*l1
, struct iv_ca_delta
*l2
)
5064 struct iv_ca_delta
*last
;
5072 for (last
= l1
; last
->next_change
; last
= last
->next_change
)
5074 last
->next_change
= l2
;
5079 /* Reverse the list of changes DELTA, forming the inverse to it. */
5081 static struct iv_ca_delta
*
5082 iv_ca_delta_reverse (struct iv_ca_delta
*delta
)
5084 struct iv_ca_delta
*act
, *next
, *prev
= NULL
;
5085 struct cost_pair
*tmp
;
5087 for (act
= delta
; act
; act
= next
)
5089 next
= act
->next_change
;
5090 act
->next_change
= prev
;
5094 act
->old_cp
= act
->new_cp
;
5101 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
5102 reverted instead. */
5105 iv_ca_delta_commit (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5106 struct iv_ca_delta
*delta
, bool forward
)
5108 struct cost_pair
*from
, *to
;
5109 struct iv_ca_delta
*act
;
5112 delta
= iv_ca_delta_reverse (delta
);
5114 for (act
= delta
; act
; act
= act
->next_change
)
5118 gcc_assert (iv_ca_cand_for_use (ivs
, act
->use
) == from
);
5119 iv_ca_set_cp (data
, ivs
, act
->use
, to
);
5123 iv_ca_delta_reverse (delta
);
5126 /* Returns true if CAND is used in IVS. */
5129 iv_ca_cand_used_p (struct iv_ca
*ivs
, struct iv_cand
*cand
)
5131 return ivs
->n_cand_uses
[cand
->id
] > 0;
5134 /* Returns number of induction variable candidates in the set IVS. */
5137 iv_ca_n_cands (struct iv_ca
*ivs
)
5139 return ivs
->n_cands
;
5142 /* Free the list of changes DELTA. */
5145 iv_ca_delta_free (struct iv_ca_delta
**delta
)
5147 struct iv_ca_delta
*act
, *next
;
5149 for (act
= *delta
; act
; act
= next
)
5151 next
= act
->next_change
;
5158 /* Allocates new iv candidates assignment. */
5160 static struct iv_ca
*
5161 iv_ca_new (struct ivopts_data
*data
)
5163 struct iv_ca
*nw
= XNEW (struct iv_ca
);
5167 nw
->cand_for_use
= XCNEWVEC (struct cost_pair
*, n_iv_uses (data
));
5168 nw
->n_cand_uses
= XCNEWVEC (unsigned, n_iv_cands (data
));
5169 nw
->cands
= BITMAP_ALLOC (NULL
);
5172 nw
->cand_use_cost
= zero_cost
;
5174 nw
->n_invariant_uses
= XCNEWVEC (unsigned, data
->max_inv_id
+ 1);
5175 nw
->cost
= zero_cost
;
5176 nw
->used_inv_expr
= XCNEWVEC (unsigned, data
->inv_expr_id
+ 1);
5177 nw
->num_used_inv_expr
= 0;
5182 /* Free memory occupied by the set IVS. */
5185 iv_ca_free (struct iv_ca
**ivs
)
5187 free ((*ivs
)->cand_for_use
);
5188 free ((*ivs
)->n_cand_uses
);
5189 BITMAP_FREE ((*ivs
)->cands
);
5190 free ((*ivs
)->n_invariant_uses
);
5191 free ((*ivs
)->used_inv_expr
);
5196 /* Dumps IVS to FILE. */
5199 iv_ca_dump (struct ivopts_data
*data
, FILE *file
, struct iv_ca
*ivs
)
5201 const char *pref
= " invariants ";
5203 comp_cost cost
= iv_ca_cost (ivs
);
5205 fprintf (file
, " cost: %d (complexity %d)\n", cost
.cost
, cost
.complexity
);
5206 fprintf (file
, " cand_cost: %d\n cand_use_cost: %d (complexity %d)\n",
5207 ivs
->cand_cost
, ivs
->cand_use_cost
.cost
, ivs
->cand_use_cost
.complexity
);
5208 bitmap_print (file
, ivs
->cands
, " candidates: ","\n");
5210 for (i
= 0; i
< ivs
->upto
; i
++)
5212 struct iv_use
*use
= iv_use (data
, i
);
5213 struct cost_pair
*cp
= iv_ca_cand_for_use (ivs
, use
);
5215 fprintf (file
, " use:%d --> iv_cand:%d, cost=(%d,%d)\n",
5216 use
->id
, cp
->cand
->id
, cp
->cost
.cost
, cp
->cost
.complexity
);
5218 fprintf (file
, " use:%d --> ??\n", use
->id
);
5221 for (i
= 1; i
<= data
->max_inv_id
; i
++)
5222 if (ivs
->n_invariant_uses
[i
])
5224 fprintf (file
, "%s%d", pref
, i
);
5227 fprintf (file
, "\n\n");
5230 /* Try changing candidate in IVS to CAND for each use. Return cost of the
5231 new set, and store differences in DELTA. Number of induction variables
5232 in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
5233 the function will try to find a solution with mimimal iv candidates. */
5236 iv_ca_extend (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5237 struct iv_cand
*cand
, struct iv_ca_delta
**delta
,
5238 unsigned *n_ivs
, bool min_ncand
)
5243 struct cost_pair
*old_cp
, *new_cp
;
5246 for (i
= 0; i
< ivs
->upto
; i
++)
5248 use
= iv_use (data
, i
);
5249 old_cp
= iv_ca_cand_for_use (ivs
, use
);
5252 && old_cp
->cand
== cand
)
5255 new_cp
= get_use_iv_cost (data
, use
, cand
);
5259 if (!min_ncand
&& !iv_ca_has_deps (ivs
, new_cp
))
5262 if (!min_ncand
&& !cheaper_cost_pair (new_cp
, old_cp
))
5265 *delta
= iv_ca_delta_add (use
, old_cp
, new_cp
, *delta
);
5268 iv_ca_delta_commit (data
, ivs
, *delta
, true);
5269 cost
= iv_ca_cost (ivs
);
5271 *n_ivs
= iv_ca_n_cands (ivs
);
5272 iv_ca_delta_commit (data
, ivs
, *delta
, false);
5277 /* Try narrowing set IVS by removing CAND. Return the cost of
5278 the new set and store the differences in DELTA. */
5281 iv_ca_narrow (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5282 struct iv_cand
*cand
, struct iv_ca_delta
**delta
)
5286 struct cost_pair
*old_cp
, *new_cp
, *cp
;
5288 struct iv_cand
*cnd
;
5292 for (i
= 0; i
< n_iv_uses (data
); i
++)
5294 use
= iv_use (data
, i
);
5296 old_cp
= iv_ca_cand_for_use (ivs
, use
);
5297 if (old_cp
->cand
!= cand
)
5302 if (data
->consider_all_candidates
)
5304 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, ci
, bi
)
5309 cnd
= iv_cand (data
, ci
);
5311 cp
= get_use_iv_cost (data
, use
, cnd
);
5315 if (!iv_ca_has_deps (ivs
, cp
))
5318 if (!cheaper_cost_pair (cp
, new_cp
))
5326 EXECUTE_IF_AND_IN_BITMAP (use
->related_cands
, ivs
->cands
, 0, ci
, bi
)
5331 cnd
= iv_cand (data
, ci
);
5333 cp
= get_use_iv_cost (data
, use
, cnd
);
5336 if (!iv_ca_has_deps (ivs
, cp
))
5339 if (!cheaper_cost_pair (cp
, new_cp
))
5348 iv_ca_delta_free (delta
);
5349 return infinite_cost
;
5352 *delta
= iv_ca_delta_add (use
, old_cp
, new_cp
, *delta
);
5355 iv_ca_delta_commit (data
, ivs
, *delta
, true);
5356 cost
= iv_ca_cost (ivs
);
5357 iv_ca_delta_commit (data
, ivs
, *delta
, false);
5362 /* Try optimizing the set of candidates IVS by removing candidates different
5363 from to EXCEPT_CAND from it. Return cost of the new set, and store
5364 differences in DELTA. */
5367 iv_ca_prune (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5368 struct iv_cand
*except_cand
, struct iv_ca_delta
**delta
)
5371 struct iv_ca_delta
*act_delta
, *best_delta
;
5373 comp_cost best_cost
, acost
;
5374 struct iv_cand
*cand
;
5377 best_cost
= iv_ca_cost (ivs
);
5379 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, i
, bi
)
5381 cand
= iv_cand (data
, i
);
5383 if (cand
== except_cand
)
5386 acost
= iv_ca_narrow (data
, ivs
, cand
, &act_delta
);
5388 if (compare_costs (acost
, best_cost
) < 0)
5391 iv_ca_delta_free (&best_delta
);
5392 best_delta
= act_delta
;
5395 iv_ca_delta_free (&act_delta
);
5404 /* Recurse to possibly remove other unnecessary ivs. */
5405 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
5406 best_cost
= iv_ca_prune (data
, ivs
, except_cand
, delta
);
5407 iv_ca_delta_commit (data
, ivs
, best_delta
, false);
5408 *delta
= iv_ca_delta_join (best_delta
, *delta
);
5412 /* Tries to extend the sets IVS in the best possible way in order
5413 to express the USE. If ORIGINALP is true, prefer candidates from
5414 the original set of IVs, otherwise favor important candidates not
5415 based on any memory object. */
5418 try_add_cand_for (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5419 struct iv_use
*use
, bool originalp
)
5421 comp_cost best_cost
, act_cost
;
5424 struct iv_cand
*cand
;
5425 struct iv_ca_delta
*best_delta
= NULL
, *act_delta
;
5426 struct cost_pair
*cp
;
5428 iv_ca_add_use (data
, ivs
, use
, false);
5429 best_cost
= iv_ca_cost (ivs
);
5431 cp
= iv_ca_cand_for_use (ivs
, use
);
5436 iv_ca_add_use (data
, ivs
, use
, true);
5437 best_cost
= iv_ca_cost (ivs
);
5438 cp
= iv_ca_cand_for_use (ivs
, use
);
5442 best_delta
= iv_ca_delta_add (use
, NULL
, cp
, NULL
);
5443 iv_ca_set_no_cp (data
, ivs
, use
);
5446 /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
5447 first try important candidates not based on any memory object. Only if
5448 this fails, try the specific ones. Rationale -- in loops with many
5449 variables the best choice often is to use just one generic biv. If we
5450 added here many ivs specific to the uses, the optimization algorithm later
5451 would be likely to get stuck in a local minimum, thus causing us to create
5452 too many ivs. The approach from few ivs to more seems more likely to be
5453 successful -- starting from few ivs, replacing an expensive use by a
5454 specific iv should always be a win. */
5455 EXECUTE_IF_SET_IN_BITMAP (data
->important_candidates
, 0, i
, bi
)
5457 cand
= iv_cand (data
, i
);
5459 if (originalp
&& cand
->pos
!=IP_ORIGINAL
)
5462 if (!originalp
&& cand
->iv
->base_object
!= NULL_TREE
)
5465 if (iv_ca_cand_used_p (ivs
, cand
))
5468 cp
= get_use_iv_cost (data
, use
, cand
);
5472 iv_ca_set_cp (data
, ivs
, use
, cp
);
5473 act_cost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, NULL
,
5475 iv_ca_set_no_cp (data
, ivs
, use
);
5476 act_delta
= iv_ca_delta_add (use
, NULL
, cp
, act_delta
);
5478 if (compare_costs (act_cost
, best_cost
) < 0)
5480 best_cost
= act_cost
;
5482 iv_ca_delta_free (&best_delta
);
5483 best_delta
= act_delta
;
5486 iv_ca_delta_free (&act_delta
);
5489 if (infinite_cost_p (best_cost
))
5491 for (i
= 0; i
< use
->n_map_members
; i
++)
5493 cp
= use
->cost_map
+ i
;
5498 /* Already tried this. */
5499 if (cand
->important
)
5501 if (originalp
&& cand
->pos
== IP_ORIGINAL
)
5503 if (!originalp
&& cand
->iv
->base_object
== NULL_TREE
)
5507 if (iv_ca_cand_used_p (ivs
, cand
))
5511 iv_ca_set_cp (data
, ivs
, use
, cp
);
5512 act_cost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, NULL
, true);
5513 iv_ca_set_no_cp (data
, ivs
, use
);
5514 act_delta
= iv_ca_delta_add (use
, iv_ca_cand_for_use (ivs
, use
),
5517 if (compare_costs (act_cost
, best_cost
) < 0)
5519 best_cost
= act_cost
;
5522 iv_ca_delta_free (&best_delta
);
5523 best_delta
= act_delta
;
5526 iv_ca_delta_free (&act_delta
);
5530 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
5531 iv_ca_delta_free (&best_delta
);
5533 return !infinite_cost_p (best_cost
);
5536 /* Finds an initial assignment of candidates to uses. */
5538 static struct iv_ca
*
5539 get_initial_solution (struct ivopts_data
*data
, bool originalp
)
5541 struct iv_ca
*ivs
= iv_ca_new (data
);
5544 for (i
= 0; i
< n_iv_uses (data
); i
++)
5545 if (!try_add_cand_for (data
, ivs
, iv_use (data
, i
), originalp
))
5554 /* Tries to improve set of induction variables IVS. */
5557 try_improve_iv_set (struct ivopts_data
*data
, struct iv_ca
*ivs
)
5560 comp_cost acost
, best_cost
= iv_ca_cost (ivs
);
5561 struct iv_ca_delta
*best_delta
= NULL
, *act_delta
, *tmp_delta
;
5562 struct iv_cand
*cand
;
5564 /* Try extending the set of induction variables by one. */
5565 for (i
= 0; i
< n_iv_cands (data
); i
++)
5567 cand
= iv_cand (data
, i
);
5569 if (iv_ca_cand_used_p (ivs
, cand
))
5572 acost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, &n_ivs
, false);
5576 /* If we successfully added the candidate and the set is small enough,
5577 try optimizing it by removing other candidates. */
5578 if (n_ivs
<= ALWAYS_PRUNE_CAND_SET_BOUND
)
5580 iv_ca_delta_commit (data
, ivs
, act_delta
, true);
5581 acost
= iv_ca_prune (data
, ivs
, cand
, &tmp_delta
);
5582 iv_ca_delta_commit (data
, ivs
, act_delta
, false);
5583 act_delta
= iv_ca_delta_join (act_delta
, tmp_delta
);
5586 if (compare_costs (acost
, best_cost
) < 0)
5589 iv_ca_delta_free (&best_delta
);
5590 best_delta
= act_delta
;
5593 iv_ca_delta_free (&act_delta
);
5598 /* Try removing the candidates from the set instead. */
5599 best_cost
= iv_ca_prune (data
, ivs
, NULL
, &best_delta
);
5601 /* Nothing more we can do. */
5606 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
5607 gcc_assert (compare_costs (best_cost
, iv_ca_cost (ivs
)) == 0);
5608 iv_ca_delta_free (&best_delta
);
5612 /* Attempts to find the optimal set of induction variables. We do simple
5613 greedy heuristic -- we try to replace at most one candidate in the selected
5614 solution and remove the unused ivs while this improves the cost. */
5616 static struct iv_ca
*
5617 find_optimal_iv_set_1 (struct ivopts_data
*data
, bool originalp
)
5621 /* Get the initial solution. */
5622 set
= get_initial_solution (data
, originalp
);
5625 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5626 fprintf (dump_file
, "Unable to substitute for ivs, failed.\n");
5630 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5632 fprintf (dump_file
, "Initial set of candidates:\n");
5633 iv_ca_dump (data
, dump_file
, set
);
5636 while (try_improve_iv_set (data
, set
))
5638 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5640 fprintf (dump_file
, "Improved to:\n");
5641 iv_ca_dump (data
, dump_file
, set
);
5648 static struct iv_ca
*
5649 find_optimal_iv_set (struct ivopts_data
*data
)
5652 struct iv_ca
*set
, *origset
;
5654 comp_cost cost
, origcost
;
5656 /* Determine the cost based on a strategy that starts with original IVs,
5657 and try again using a strategy that prefers candidates not based
5659 origset
= find_optimal_iv_set_1 (data
, true);
5660 set
= find_optimal_iv_set_1 (data
, false);
5662 if (!origset
&& !set
)
5665 origcost
= origset
? iv_ca_cost (origset
) : infinite_cost
;
5666 cost
= set
? iv_ca_cost (set
) : infinite_cost
;
5668 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5670 fprintf (dump_file
, "Original cost %d (complexity %d)\n\n",
5671 origcost
.cost
, origcost
.complexity
);
5672 fprintf (dump_file
, "Final cost %d (complexity %d)\n\n",
5673 cost
.cost
, cost
.complexity
);
5676 /* Choose the one with the best cost. */
5677 if (compare_costs (origcost
, cost
) <= 0)
5684 iv_ca_free (&origset
);
5686 for (i
= 0; i
< n_iv_uses (data
); i
++)
5688 use
= iv_use (data
, i
);
5689 use
->selected
= iv_ca_cand_for_use (set
, use
)->cand
;
5695 /* Creates a new induction variable corresponding to CAND. */
5698 create_new_iv (struct ivopts_data
*data
, struct iv_cand
*cand
)
5700 gimple_stmt_iterator incr_pos
;
5710 incr_pos
= gsi_last_bb (ip_normal_pos (data
->current_loop
));
5714 incr_pos
= gsi_last_bb (ip_end_pos (data
->current_loop
));
5722 incr_pos
= gsi_for_stmt (cand
->incremented_at
);
5726 /* Mark that the iv is preserved. */
5727 name_info (data
, cand
->var_before
)->preserve_biv
= true;
5728 name_info (data
, cand
->var_after
)->preserve_biv
= true;
5730 /* Rewrite the increment so that it uses var_before directly. */
5731 find_interesting_uses_op (data
, cand
->var_after
)->selected
= cand
;
5735 gimple_add_tmp_var (cand
->var_before
);
5736 add_referenced_var (cand
->var_before
);
5738 base
= unshare_expr (cand
->iv
->base
);
5740 create_iv (base
, unshare_expr (cand
->iv
->step
),
5741 cand
->var_before
, data
->current_loop
,
5742 &incr_pos
, after
, &cand
->var_before
, &cand
->var_after
);
5745 /* Creates new induction variables described in SET. */
5748 create_new_ivs (struct ivopts_data
*data
, struct iv_ca
*set
)
5751 struct iv_cand
*cand
;
5754 EXECUTE_IF_SET_IN_BITMAP (set
->cands
, 0, i
, bi
)
5756 cand
= iv_cand (data
, i
);
5757 create_new_iv (data
, cand
);
5760 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5762 fprintf (dump_file
, "\nSelected IV set: \n");
5763 EXECUTE_IF_SET_IN_BITMAP (set
->cands
, 0, i
, bi
)
5765 cand
= iv_cand (data
, i
);
5766 dump_cand (dump_file
, cand
);
5768 fprintf (dump_file
, "\n");
5772 /* Rewrites USE (definition of iv used in a nonlinear expression)
5773 using candidate CAND. */
5776 rewrite_use_nonlinear_expr (struct ivopts_data
*data
,
5777 struct iv_use
*use
, struct iv_cand
*cand
)
5782 gimple_stmt_iterator bsi
;
5784 /* An important special case -- if we are asked to express value of
5785 the original iv by itself, just exit; there is no need to
5786 introduce a new computation (that might also need casting the
5787 variable to unsigned and back). */
5788 if (cand
->pos
== IP_ORIGINAL
5789 && cand
->incremented_at
== use
->stmt
)
5791 tree step
, ctype
, utype
;
5792 enum tree_code incr_code
= PLUS_EXPR
, old_code
;
5794 gcc_assert (is_gimple_assign (use
->stmt
));
5795 gcc_assert (gimple_assign_lhs (use
->stmt
) == cand
->var_after
);
5797 step
= cand
->iv
->step
;
5798 ctype
= TREE_TYPE (step
);
5799 utype
= TREE_TYPE (cand
->var_after
);
5800 if (TREE_CODE (step
) == NEGATE_EXPR
)
5802 incr_code
= MINUS_EXPR
;
5803 step
= TREE_OPERAND (step
, 0);
5806 /* Check whether we may leave the computation unchanged.
5807 This is the case only if it does not rely on other
5808 computations in the loop -- otherwise, the computation
5809 we rely upon may be removed in remove_unused_ivs,
5810 thus leading to ICE. */
5811 old_code
= gimple_assign_rhs_code (use
->stmt
);
5812 if (old_code
== PLUS_EXPR
5813 || old_code
== MINUS_EXPR
5814 || old_code
== POINTER_PLUS_EXPR
)
5816 if (gimple_assign_rhs1 (use
->stmt
) == cand
->var_before
)
5817 op
= gimple_assign_rhs2 (use
->stmt
);
5818 else if (old_code
!= MINUS_EXPR
5819 && gimple_assign_rhs2 (use
->stmt
) == cand
->var_before
)
5820 op
= gimple_assign_rhs1 (use
->stmt
);
5828 && (TREE_CODE (op
) == INTEGER_CST
5829 || operand_equal_p (op
, step
, 0)))
5832 /* Otherwise, add the necessary computations to express
5834 op
= fold_convert (ctype
, cand
->var_before
);
5835 comp
= fold_convert (utype
,
5836 build2 (incr_code
, ctype
, op
,
5837 unshare_expr (step
)));
5841 comp
= get_computation (data
->current_loop
, use
, cand
);
5842 gcc_assert (comp
!= NULL_TREE
);
5845 switch (gimple_code (use
->stmt
))
5848 tgt
= PHI_RESULT (use
->stmt
);
5850 /* If we should keep the biv, do not replace it. */
5851 if (name_info (data
, tgt
)->preserve_biv
)
5854 bsi
= gsi_after_labels (gimple_bb (use
->stmt
));
5858 tgt
= gimple_assign_lhs (use
->stmt
);
5859 bsi
= gsi_for_stmt (use
->stmt
);
5866 if (!valid_gimple_rhs_p (comp
)
5867 || (gimple_code (use
->stmt
) != GIMPLE_PHI
5868 /* We can't allow re-allocating the stmt as it might be pointed
5870 && (get_gimple_rhs_num_ops (TREE_CODE (comp
))
5871 >= gimple_num_ops (gsi_stmt (bsi
)))))
5873 comp
= force_gimple_operand_gsi (&bsi
, comp
, true, NULL_TREE
,
5874 true, GSI_SAME_STMT
);
5875 if (POINTER_TYPE_P (TREE_TYPE (tgt
)))
5876 duplicate_ssa_name_ptr_info (comp
, SSA_NAME_PTR_INFO (tgt
));
5879 if (gimple_code (use
->stmt
) == GIMPLE_PHI
)
5881 ass
= gimple_build_assign (tgt
, comp
);
5882 gsi_insert_before (&bsi
, ass
, GSI_SAME_STMT
);
5884 bsi
= gsi_for_stmt (use
->stmt
);
5885 remove_phi_node (&bsi
, false);
5889 gimple_assign_set_rhs_from_tree (&bsi
, comp
);
5890 use
->stmt
= gsi_stmt (bsi
);
5894 /* Copies the reference information from OLD_REF to NEW_REF. */
5897 copy_ref_info (tree new_ref
, tree old_ref
)
5899 tree new_ptr_base
= NULL_TREE
;
5901 TREE_SIDE_EFFECTS (new_ref
) = TREE_SIDE_EFFECTS (old_ref
);
5902 TREE_THIS_VOLATILE (new_ref
) = TREE_THIS_VOLATILE (old_ref
);
5904 if (TREE_CODE (new_ref
) == TARGET_MEM_REF
)
5905 new_ptr_base
= TMR_BASE (new_ref
);
5906 else if (TREE_CODE (new_ref
) == MEM_REF
)
5907 new_ptr_base
= TREE_OPERAND (new_ref
, 0);
5909 /* We can transfer points-to information from an old pointer
5910 or decl base to the new one. */
5912 && TREE_CODE (new_ptr_base
) == SSA_NAME
5913 && POINTER_TYPE_P (TREE_TYPE (new_ptr_base
))
5914 && !SSA_NAME_PTR_INFO (new_ptr_base
))
5916 tree base
= get_base_address (old_ref
);
5919 else if ((INDIRECT_REF_P (base
)
5920 || TREE_CODE (base
) == MEM_REF
)
5921 && TREE_CODE (TREE_OPERAND (base
, 0)) == SSA_NAME
)
5922 duplicate_ssa_name_ptr_info
5923 (new_ptr_base
, SSA_NAME_PTR_INFO (TREE_OPERAND (base
, 0)));
5924 else if (TREE_CODE (base
) == VAR_DECL
5925 || TREE_CODE (base
) == PARM_DECL
5926 || TREE_CODE (base
) == RESULT_DECL
)
5928 struct ptr_info_def
*pi
= get_ptr_info (new_ptr_base
);
5929 pt_solution_set_var (&pi
->pt
, base
);
5934 /* Performs a peephole optimization to reorder the iv update statement with
5935 a mem ref to enable instruction combining in later phases. The mem ref uses
5936 the iv value before the update, so the reordering transformation requires
5937 adjustment of the offset. CAND is the selected IV_CAND.
5941 t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset
5949 directly propagating t over to (1) will introduce overlapping live range
5950 thus increase register pressure. This peephole transform it into:
5954 t = MEM_REF (base, iv2, 8, 8);
5961 adjust_iv_update_pos (struct iv_cand
*cand
, struct iv_use
*use
)
5964 gimple iv_update
, stmt
;
5966 gimple_stmt_iterator gsi
, gsi_iv
;
5968 if (cand
->pos
!= IP_NORMAL
)
5971 var_after
= cand
->var_after
;
5972 iv_update
= SSA_NAME_DEF_STMT (var_after
);
5974 bb
= gimple_bb (iv_update
);
5975 gsi
= gsi_last_nondebug_bb (bb
);
5976 stmt
= gsi_stmt (gsi
);
5978 /* Only handle conditional statement for now. */
5979 if (gimple_code (stmt
) != GIMPLE_COND
)
5982 gsi_prev_nondebug (&gsi
);
5983 stmt
= gsi_stmt (gsi
);
5984 if (stmt
!= iv_update
)
5987 gsi_prev_nondebug (&gsi
);
5988 if (gsi_end_p (gsi
))
5991 stmt
= gsi_stmt (gsi
);
5992 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
5995 if (stmt
!= use
->stmt
)
5998 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
6001 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6003 fprintf (dump_file
, "Reordering \n");
6004 print_gimple_stmt (dump_file
, iv_update
, 0, 0);
6005 print_gimple_stmt (dump_file
, use
->stmt
, 0, 0);
6006 fprintf (dump_file
, "\n");
6009 gsi
= gsi_for_stmt (use
->stmt
);
6010 gsi_iv
= gsi_for_stmt (iv_update
);
6011 gsi_move_before (&gsi_iv
, &gsi
);
6013 cand
->pos
= IP_BEFORE_USE
;
6014 cand
->incremented_at
= use
->stmt
;
6017 /* Rewrites USE (address that is an iv) using candidate CAND. */
6020 rewrite_use_address (struct ivopts_data
*data
,
6021 struct iv_use
*use
, struct iv_cand
*cand
)
6024 gimple_stmt_iterator bsi
= gsi_for_stmt (use
->stmt
);
6025 tree base_hint
= NULL_TREE
;
6029 adjust_iv_update_pos (cand
, use
);
6030 ok
= get_computation_aff (data
->current_loop
, use
, cand
, use
->stmt
, &aff
);
6032 unshare_aff_combination (&aff
);
6034 /* To avoid undefined overflow problems, all IV candidates use unsigned
6035 integer types. The drawback is that this makes it impossible for
6036 create_mem_ref to distinguish an IV that is based on a memory object
6037 from one that represents simply an offset.
6039 To work around this problem, we pass a hint to create_mem_ref that
6040 indicates which variable (if any) in aff is an IV based on a memory
6041 object. Note that we only consider the candidate. If this is not
6042 based on an object, the base of the reference is in some subexpression
6043 of the use -- but these will use pointer types, so they are recognized
6044 by the create_mem_ref heuristics anyway. */
6045 if (cand
->iv
->base_object
)
6046 base_hint
= var_at_stmt (data
->current_loop
, cand
, use
->stmt
);
6048 iv
= var_at_stmt (data
->current_loop
, cand
, use
->stmt
);
6049 ref
= create_mem_ref (&bsi
, TREE_TYPE (*use
->op_p
), &aff
,
6050 reference_alias_ptr_type (*use
->op_p
),
6051 iv
, base_hint
, data
->speed
);
6052 copy_ref_info (ref
, *use
->op_p
);
6056 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6060 rewrite_use_compare (struct ivopts_data
*data
,
6061 struct iv_use
*use
, struct iv_cand
*cand
)
6063 tree comp
, *var_p
, op
, bound
;
6064 gimple_stmt_iterator bsi
= gsi_for_stmt (use
->stmt
);
6065 enum tree_code compare
;
6066 struct cost_pair
*cp
= get_use_iv_cost (data
, use
, cand
);
6072 tree var
= var_at_stmt (data
->current_loop
, cand
, use
->stmt
);
6073 tree var_type
= TREE_TYPE (var
);
6076 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6078 fprintf (dump_file
, "Replacing exit test: ");
6079 print_gimple_stmt (dump_file
, use
->stmt
, 0, TDF_SLIM
);
6081 compare
= iv_elimination_compare (data
, use
);
6082 bound
= unshare_expr (fold_convert (var_type
, bound
));
6083 op
= force_gimple_operand (bound
, &stmts
, true, NULL_TREE
);
6085 gsi_insert_seq_on_edge_immediate (
6086 loop_preheader_edge (data
->current_loop
),
6089 gimple_cond_set_lhs (use
->stmt
, var
);
6090 gimple_cond_set_code (use
->stmt
, compare
);
6091 gimple_cond_set_rhs (use
->stmt
, op
);
6095 /* The induction variable elimination failed; just express the original
6097 comp
= get_computation (data
->current_loop
, use
, cand
);
6098 gcc_assert (comp
!= NULL_TREE
);
6100 ok
= extract_cond_operands (data
, use
->stmt
, &var_p
, NULL
, NULL
, NULL
);
6103 *var_p
= force_gimple_operand_gsi (&bsi
, comp
, true, SSA_NAME_VAR (*var_p
),
6104 true, GSI_SAME_STMT
);
6107 /* Rewrites USE using candidate CAND. */
6110 rewrite_use (struct ivopts_data
*data
, struct iv_use
*use
, struct iv_cand
*cand
)
6114 case USE_NONLINEAR_EXPR
:
6115 rewrite_use_nonlinear_expr (data
, use
, cand
);
6119 rewrite_use_address (data
, use
, cand
);
6123 rewrite_use_compare (data
, use
, cand
);
6130 update_stmt (use
->stmt
);
6133 /* Rewrite the uses using the selected induction variables. */
6136 rewrite_uses (struct ivopts_data
*data
)
6139 struct iv_cand
*cand
;
6142 for (i
= 0; i
< n_iv_uses (data
); i
++)
6144 use
= iv_use (data
, i
);
6145 cand
= use
->selected
;
6148 rewrite_use (data
, use
, cand
);
6152 /* Removes the ivs that are not used after rewriting. */
6155 remove_unused_ivs (struct ivopts_data
*data
)
6159 bitmap toremove
= BITMAP_ALLOC (NULL
);
6161 /* Figure out an order in which to release SSA DEFs so that we don't
6162 release something that we'd have to propagate into a debug stmt
6164 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, j
, bi
)
6166 struct version_info
*info
;
6168 info
= ver_info (data
, j
);
6170 && !integer_zerop (info
->iv
->step
)
6172 && !info
->iv
->have_use_for
6173 && !info
->preserve_biv
)
6174 bitmap_set_bit (toremove
, SSA_NAME_VERSION (info
->iv
->ssa_name
));
6177 release_defs_bitset (toremove
);
6179 BITMAP_FREE (toremove
);
6182 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
6183 for pointer_map_traverse. */
6186 free_tree_niter_desc (const void *key ATTRIBUTE_UNUSED
, void **value
,
6187 void *data ATTRIBUTE_UNUSED
)
6189 struct tree_niter_desc
*const niter
= (struct tree_niter_desc
*) *value
;
6195 /* Frees data allocated by the optimization of a single loop. */
6198 free_loop_data (struct ivopts_data
*data
)
6206 pointer_map_traverse (data
->niters
, free_tree_niter_desc
, NULL
);
6207 pointer_map_destroy (data
->niters
);
6208 data
->niters
= NULL
;
6211 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
6213 struct version_info
*info
;
6215 info
= ver_info (data
, i
);
6219 info
->has_nonlin_use
= false;
6220 info
->preserve_biv
= false;
6223 bitmap_clear (data
->relevant
);
6224 bitmap_clear (data
->important_candidates
);
6226 for (i
= 0; i
< n_iv_uses (data
); i
++)
6228 struct iv_use
*use
= iv_use (data
, i
);
6231 BITMAP_FREE (use
->related_cands
);
6232 for (j
= 0; j
< use
->n_map_members
; j
++)
6233 if (use
->cost_map
[j
].depends_on
)
6234 BITMAP_FREE (use
->cost_map
[j
].depends_on
);
6235 free (use
->cost_map
);
6238 VEC_truncate (iv_use_p
, data
->iv_uses
, 0);
6240 for (i
= 0; i
< n_iv_cands (data
); i
++)
6242 struct iv_cand
*cand
= iv_cand (data
, i
);
6246 if (cand
->depends_on
)
6247 BITMAP_FREE (cand
->depends_on
);
6250 VEC_truncate (iv_cand_p
, data
->iv_candidates
, 0);
6252 if (data
->version_info_size
< num_ssa_names
)
6254 data
->version_info_size
= 2 * num_ssa_names
;
6255 free (data
->version_info
);
6256 data
->version_info
= XCNEWVEC (struct version_info
, data
->version_info_size
);
6259 data
->max_inv_id
= 0;
6261 FOR_EACH_VEC_ELT (tree
, decl_rtl_to_reset
, i
, obj
)
6262 SET_DECL_RTL (obj
, NULL_RTX
);
6264 VEC_truncate (tree
, decl_rtl_to_reset
, 0);
6266 htab_empty (data
->inv_expr_tab
);
6267 data
->inv_expr_id
= 0;
6270 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
6274 tree_ssa_iv_optimize_finalize (struct ivopts_data
*data
)
6276 free_loop_data (data
);
6277 free (data
->version_info
);
6278 BITMAP_FREE (data
->relevant
);
6279 BITMAP_FREE (data
->important_candidates
);
6281 VEC_free (tree
, heap
, decl_rtl_to_reset
);
6282 VEC_free (iv_use_p
, heap
, data
->iv_uses
);
6283 VEC_free (iv_cand_p
, heap
, data
->iv_candidates
);
6284 htab_delete (data
->inv_expr_tab
);
6287 /* Returns true if the loop body BODY includes any function calls. */
6290 loop_body_includes_call (basic_block
*body
, unsigned num_nodes
)
6292 gimple_stmt_iterator gsi
;
6295 for (i
= 0; i
< num_nodes
; i
++)
6296 for (gsi
= gsi_start_bb (body
[i
]); !gsi_end_p (gsi
); gsi_next (&gsi
))
6298 gimple stmt
= gsi_stmt (gsi
);
6299 if (is_gimple_call (stmt
)
6300 && !is_inexpensive_builtin (gimple_call_fndecl (stmt
)))
6306 /* Optimizes the LOOP. Returns true if anything changed. */
6309 tree_ssa_iv_optimize_loop (struct ivopts_data
*data
, struct loop
*loop
)
6311 bool changed
= false;
6312 struct iv_ca
*iv_ca
;
6316 gcc_assert (!data
->niters
);
6317 data
->current_loop
= loop
;
6318 data
->speed
= optimize_loop_for_speed_p (loop
);
6320 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6322 fprintf (dump_file
, "Processing loop %d\n", loop
->num
);
6324 exit
= single_dom_exit (loop
);
6327 fprintf (dump_file
, " single exit %d -> %d, exit condition ",
6328 exit
->src
->index
, exit
->dest
->index
);
6329 print_gimple_stmt (dump_file
, last_stmt (exit
->src
), 0, TDF_SLIM
);
6330 fprintf (dump_file
, "\n");
6333 fprintf (dump_file
, "\n");
6336 body
= get_loop_body (loop
);
6337 data
->body_includes_call
= loop_body_includes_call (body
, loop
->num_nodes
);
6338 renumber_gimple_stmt_uids_in_blocks (body
, loop
->num_nodes
);
6341 /* For each ssa name determines whether it behaves as an induction variable
6343 if (!find_induction_variables (data
))
6346 /* Finds interesting uses (item 1). */
6347 find_interesting_uses (data
);
6348 if (n_iv_uses (data
) > MAX_CONSIDERED_USES
)
6351 /* Finds candidates for the induction variables (item 2). */
6352 find_iv_candidates (data
);
6354 /* Calculates the costs (item 3, part 1). */
6355 determine_iv_costs (data
);
6356 determine_use_iv_costs (data
);
6357 determine_set_costs (data
);
6359 /* Find the optimal set of induction variables (item 3, part 2). */
6360 iv_ca
= find_optimal_iv_set (data
);
6365 /* Create the new induction variables (item 4, part 1). */
6366 create_new_ivs (data
, iv_ca
);
6367 iv_ca_free (&iv_ca
);
6369 /* Rewrite the uses (item 4, part 2). */
6370 rewrite_uses (data
);
6372 /* Remove the ivs that are unused after rewriting. */
6373 remove_unused_ivs (data
);
6375 /* We have changed the structure of induction variables; it might happen
6376 that definitions in the scev database refer to some of them that were
6381 free_loop_data (data
);
6386 /* Main entry point. Optimizes induction variables in loops. */
6389 tree_ssa_iv_optimize (void)
6392 struct ivopts_data data
;
6395 tree_ssa_iv_optimize_init (&data
);
6397 /* Optimize the loops starting with the innermost ones. */
6398 FOR_EACH_LOOP (li
, loop
, LI_FROM_INNERMOST
)
6400 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6401 flow_loop_dump (loop
, dump_file
, NULL
, 1);
6403 tree_ssa_iv_optimize_loop (&data
, loop
);
6406 tree_ssa_iv_optimize_finalize (&data
);