1 /* Induction variable optimizations.
2 Copyright (C) 2003-2015 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* This pass tries to find the optimal set of induction variables for the loop.
21 It optimizes just the basic linear induction variables (although adding
22 support for other types should not be too hard). It includes the
23 optimizations commonly known as strength reduction, induction variable
24 coalescing and induction variable elimination. It does it in the
27 1) The interesting uses of induction variables are found. This includes
29 -- uses of induction variables in non-linear expressions
30 -- addresses of arrays
31 -- comparisons of induction variables
33 2) Candidates for the induction variables are found. This includes
35 -- old induction variables
36 -- the variables defined by expressions derived from the "interesting
39 3) The optimal (w.r. to a cost function) set of variables is chosen. The
40 cost function assigns a cost to sets of induction variables and consists
43 -- The use costs. Each of the interesting uses chooses the best induction
44 variable in the set and adds its cost to the sum. The cost reflects
45 the time spent on modifying the induction variables value to be usable
46 for the given purpose (adding base and offset for arrays, etc.).
47 -- The variable costs. Each of the variables has a cost assigned that
48 reflects the costs associated with incrementing the value of the
49 variable. The original variables are somewhat preferred.
50 -- The set cost. Depending on the size of the set, extra cost may be
51 added to reflect register pressure.
53 All the costs are defined in a machine-specific way, using the target
54 hooks and machine descriptions to determine them.
56 4) The trees are transformed to use the new variables, the dead code is
59 All of this is done loop by loop. Doing it globally is theoretically
60 possible, it might give a better performance and it might enable us
61 to decide costs more precisely, but getting all the interactions right
62 would be complicated. */
66 #include "coretypes.h"
71 #include "fold-const.h"
72 #include "stor-layout.h"
75 #include "hard-reg-set.h"
77 #include "dominance.h"
79 #include "basic-block.h"
80 #include "gimple-pretty-print.h"
81 #include "tree-ssa-alias.h"
82 #include "internal-fn.h"
84 #include "gimple-expr.h"
87 #include "gimple-iterator.h"
88 #include "gimplify-me.h"
89 #include "gimple-ssa.h"
92 #include "tree-phinodes.h"
93 #include "ssa-iterators.h"
94 #include "stringpool.h"
95 #include "tree-ssanames.h"
96 #include "tree-ssa-loop-ivopts.h"
97 #include "tree-ssa-loop-manip.h"
98 #include "tree-ssa-loop-niter.h"
99 #include "tree-ssa-loop.h"
102 #include "insn-config.h"
107 #include "emit-rtl.h"
111 #include "tree-dfa.h"
112 #include "tree-ssa.h"
114 #include "tree-pass.h"
115 #include "tree-chrec.h"
116 #include "tree-scalar-evolution.h"
118 #include "langhooks.h"
119 #include "tree-affine.h"
121 #include "tree-inline.h"
122 #include "tree-ssa-propagate.h"
123 #include "tree-ssa-address.h"
124 #include "builtins.h"
125 #include "tree-vectorizer.h"
127 /* FIXME: Expressions are expanded to RTL in this pass to determine the
128 cost of different addressing modes. This should be moved to a TBD
129 interface between the GIMPLE and RTL worlds. */
132 /* The infinite cost. */
133 #define INFTY 10000000
135 #define AVG_LOOP_NITER(LOOP) 5
137 /* Returns the expected number of loop iterations for LOOP.
138 The average trip count is computed from profile data if it
141 static inline HOST_WIDE_INT
142 avg_loop_niter (struct loop
*loop
)
144 HOST_WIDE_INT niter
= estimated_stmt_executions_int (loop
);
146 return AVG_LOOP_NITER (loop
);
151 /* Representation of the induction variable. */
154 tree base
; /* Initial value of the iv. */
155 tree base_object
; /* A memory object to that the induction variable points. */
156 tree step
; /* Step of the iv (constant only). */
157 tree ssa_name
; /* The ssa name with the value. */
158 unsigned use_id
; /* The identifier in the use if it is the case. */
159 bool biv_p
; /* Is it a biv? */
160 bool have_use_for
; /* Do we already have a use for it? */
161 bool no_overflow
; /* True if the iv doesn't overflow. */
164 /* Per-ssa version information (induction variable descriptions, etc.). */
167 tree name
; /* The ssa name. */
168 struct iv
*iv
; /* Induction variable description. */
169 bool has_nonlin_use
; /* For a loop-level invariant, whether it is used in
170 an expression that is not an induction variable. */
171 bool preserve_biv
; /* For the original biv, whether to preserve it. */
172 unsigned inv_id
; /* Id of an invariant. */
178 USE_NONLINEAR_EXPR
, /* Use in a nonlinear expression. */
179 USE_ADDRESS
, /* Use in an address. */
180 USE_COMPARE
/* Use is a compare. */
183 /* Cost of a computation. */
186 int cost
; /* The runtime cost. */
187 unsigned complexity
; /* The estimate of the complexity of the code for
188 the computation (in no concrete units --
189 complexity field should be larger for more
190 complex expressions and addressing modes). */
193 static const comp_cost no_cost
= {0, 0};
194 static const comp_cost infinite_cost
= {INFTY
, INFTY
};
196 /* The candidate - cost pair. */
199 struct iv_cand
*cand
; /* The candidate. */
200 comp_cost cost
; /* The cost. */
201 bitmap depends_on
; /* The list of invariants that have to be
203 tree value
; /* For final value elimination, the expression for
204 the final value of the iv. For iv elimination,
205 the new bound to compare with. */
206 enum tree_code comp
; /* For iv elimination, the comparison. */
207 int inv_expr_id
; /* Loop invariant expression id. */
213 unsigned id
; /* The id of the use. */
214 unsigned sub_id
; /* The id of the sub use. */
215 enum use_type type
; /* Type of the use. */
216 struct iv
*iv
; /* The induction variable it is based on. */
217 gimple stmt
; /* Statement in that it occurs. */
218 tree
*op_p
; /* The place where it occurs. */
219 bitmap related_cands
; /* The set of "related" iv candidates, plus the common
222 unsigned n_map_members
; /* Number of candidates in the cost_map list. */
223 struct cost_pair
*cost_map
;
224 /* The costs wrto the iv candidates. */
226 struct iv_cand
*selected
;
227 /* The selected candidate. */
229 struct iv_use
*next
; /* The next sub use. */
230 tree addr_base
; /* Base address with const offset stripped. */
231 unsigned HOST_WIDE_INT addr_offset
;
232 /* Const offset stripped from base address. */
235 /* The position where the iv is computed. */
238 IP_NORMAL
, /* At the end, just before the exit condition. */
239 IP_END
, /* At the end of the latch block. */
240 IP_BEFORE_USE
, /* Immediately before a specific use. */
241 IP_AFTER_USE
, /* Immediately after a specific use. */
242 IP_ORIGINAL
/* The original biv. */
245 /* The induction variable candidate. */
248 unsigned id
; /* The number of the candidate. */
249 bool important
; /* Whether this is an "important" candidate, i.e. such
250 that it should be considered by all uses. */
251 ENUM_BITFIELD(iv_position
) pos
: 8; /* Where it is computed. */
252 gimple incremented_at
;/* For original biv, the statement where it is
254 tree var_before
; /* The variable used for it before increment. */
255 tree var_after
; /* The variable used for it after increment. */
256 struct iv
*iv
; /* The value of the candidate. NULL for
257 "pseudocandidate" used to indicate the possibility
258 to replace the final value of an iv by direct
259 computation of the value. */
260 unsigned cost
; /* Cost of the candidate. */
261 unsigned cost_step
; /* Cost of the candidate's increment operation. */
262 struct iv_use
*ainc_use
; /* For IP_{BEFORE,AFTER}_USE candidates, the place
263 where it is incremented. */
264 bitmap depends_on
; /* The list of invariants that are used in step of the
268 /* Loop invariant expression hashtable entry. */
269 struct iv_inv_expr_ent
276 /* The data used by the induction variable optimizations. */
278 typedef struct iv_use
*iv_use_p
;
280 typedef struct iv_cand
*iv_cand_p
;
282 /* Hashtable helpers. */
284 struct iv_inv_expr_hasher
: free_ptr_hash
<iv_inv_expr_ent
>
286 static inline hashval_t
hash (const iv_inv_expr_ent
*);
287 static inline bool equal (const iv_inv_expr_ent
*, const iv_inv_expr_ent
*);
290 /* Hash function for loop invariant expressions. */
293 iv_inv_expr_hasher::hash (const iv_inv_expr_ent
*expr
)
298 /* Hash table equality function for expressions. */
301 iv_inv_expr_hasher::equal (const iv_inv_expr_ent
*expr1
,
302 const iv_inv_expr_ent
*expr2
)
304 return expr1
->hash
== expr2
->hash
305 && operand_equal_p (expr1
->expr
, expr2
->expr
, 0);
310 /* The currently optimized loop. */
311 struct loop
*current_loop
;
312 source_location loop_loc
;
314 /* Numbers of iterations for all exits of the current loop. */
315 hash_map
<edge
, tree_niter_desc
*> *niters
;
317 /* Number of registers used in it. */
320 /* The size of version_info array allocated. */
321 unsigned version_info_size
;
323 /* The array of information for the ssa names. */
324 struct version_info
*version_info
;
326 /* The hashtable of loop invariant expressions created
328 hash_table
<iv_inv_expr_hasher
> *inv_expr_tab
;
330 /* Loop invariant expression id. */
333 /* The bitmap of indices in version_info whose value was changed. */
336 /* The uses of induction variables. */
337 vec
<iv_use_p
> iv_uses
;
339 /* The candidates. */
340 vec
<iv_cand_p
> iv_candidates
;
342 /* A bitmap of important candidates. */
343 bitmap important_candidates
;
345 /* Cache used by tree_to_aff_combination_expand. */
346 hash_map
<tree
, name_expansion
*> *name_expansion_cache
;
348 /* The maximum invariant id. */
351 /* Whether to consider just related and important candidates when replacing a
353 bool consider_all_candidates
;
355 /* Are we optimizing for speed? */
358 /* Whether the loop body includes any function calls. */
359 bool body_includes_call
;
361 /* Whether the loop body can only be exited via single exit. */
362 bool loop_single_exit_p
;
365 /* An assignment of iv candidates to uses. */
369 /* The number of uses covered by the assignment. */
372 /* Number of uses that cannot be expressed by the candidates in the set. */
375 /* Candidate assigned to a use, together with the related costs. */
376 struct cost_pair
**cand_for_use
;
378 /* Number of times each candidate is used. */
379 unsigned *n_cand_uses
;
381 /* The candidates used. */
384 /* The number of candidates in the set. */
387 /* Total number of registers needed. */
390 /* Total cost of expressing uses. */
391 comp_cost cand_use_cost
;
393 /* Total cost of candidates. */
396 /* Number of times each invariant is used. */
397 unsigned *n_invariant_uses
;
399 /* The array holding the number of uses of each loop
400 invariant expressions created by ivopt. */
401 unsigned *used_inv_expr
;
403 /* The number of created loop invariants. */
404 unsigned num_used_inv_expr
;
406 /* Total cost of the assignment. */
410 /* Difference of two iv candidate assignments. */
417 /* An old assignment (for rollback purposes). */
418 struct cost_pair
*old_cp
;
420 /* A new assignment. */
421 struct cost_pair
*new_cp
;
423 /* Next change in the list. */
424 struct iv_ca_delta
*next_change
;
427 /* Bound on number of candidates below that all candidates are considered. */
429 #define CONSIDER_ALL_CANDIDATES_BOUND \
430 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
432 /* If there are more iv occurrences, we just give up (it is quite unlikely that
433 optimizing such a loop would help, and it would take ages). */
435 #define MAX_CONSIDERED_USES \
436 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
438 /* If there are at most this number of ivs in the set, try removing unnecessary
439 ivs from the set always. */
441 #define ALWAYS_PRUNE_CAND_SET_BOUND \
442 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
444 /* The list of trees for that the decl_rtl field must be reset is stored
447 static vec
<tree
> decl_rtl_to_reset
;
449 static comp_cost
force_expr_to_var_cost (tree
, bool);
451 /* Number of uses recorded in DATA. */
453 static inline unsigned
454 n_iv_uses (struct ivopts_data
*data
)
456 return data
->iv_uses
.length ();
459 /* Ith use recorded in DATA. */
461 static inline struct iv_use
*
462 iv_use (struct ivopts_data
*data
, unsigned i
)
464 return data
->iv_uses
[i
];
467 /* Number of candidates recorded in DATA. */
469 static inline unsigned
470 n_iv_cands (struct ivopts_data
*data
)
472 return data
->iv_candidates
.length ();
475 /* Ith candidate recorded in DATA. */
477 static inline struct iv_cand
*
478 iv_cand (struct ivopts_data
*data
, unsigned i
)
480 return data
->iv_candidates
[i
];
483 /* The single loop exit if it dominates the latch, NULL otherwise. */
486 single_dom_exit (struct loop
*loop
)
488 edge exit
= single_exit (loop
);
493 if (!just_once_each_iteration_p (loop
, exit
->src
))
499 /* Dumps information about the induction variable IV to FILE. */
502 dump_iv (FILE *file
, struct iv
*iv
, bool dump_name
)
504 if (iv
->ssa_name
&& dump_name
)
506 fprintf (file
, "ssa name ");
507 print_generic_expr (file
, iv
->ssa_name
, TDF_SLIM
);
508 fprintf (file
, "\n");
511 fprintf (file
, " type ");
512 print_generic_expr (file
, TREE_TYPE (iv
->base
), TDF_SLIM
);
513 fprintf (file
, "\n");
517 fprintf (file
, " base ");
518 print_generic_expr (file
, iv
->base
, TDF_SLIM
);
519 fprintf (file
, "\n");
521 fprintf (file
, " step ");
522 print_generic_expr (file
, iv
->step
, TDF_SLIM
);
523 fprintf (file
, "\n");
527 fprintf (file
, " invariant ");
528 print_generic_expr (file
, iv
->base
, TDF_SLIM
);
529 fprintf (file
, "\n");
534 fprintf (file
, " base object ");
535 print_generic_expr (file
, iv
->base_object
, TDF_SLIM
);
536 fprintf (file
, "\n");
540 fprintf (file
, " is a biv\n");
543 /* Dumps information about the USE to FILE. */
546 dump_use (FILE *file
, struct iv_use
*use
)
548 fprintf (file
, "use %d", use
->id
);
550 fprintf (file
, ".%d", use
->sub_id
);
552 fprintf (file
, "\n");
556 case USE_NONLINEAR_EXPR
:
557 fprintf (file
, " generic\n");
561 fprintf (file
, " address\n");
565 fprintf (file
, " compare\n");
572 fprintf (file
, " in statement ");
573 print_gimple_stmt (file
, use
->stmt
, 0, 0);
574 fprintf (file
, "\n");
576 fprintf (file
, " at position ");
578 print_generic_expr (file
, *use
->op_p
, TDF_SLIM
);
579 fprintf (file
, "\n");
581 dump_iv (file
, use
->iv
, false);
583 if (use
->related_cands
)
585 fprintf (file
, " related candidates ");
586 dump_bitmap (file
, use
->related_cands
);
590 /* Dumps information about the uses to FILE. */
593 dump_uses (FILE *file
, struct ivopts_data
*data
)
598 for (i
= 0; i
< n_iv_uses (data
); i
++)
600 use
= iv_use (data
, i
);
603 dump_use (file
, use
);
607 fprintf (file
, "\n");
611 /* Dumps information about induction variable candidate CAND to FILE. */
614 dump_cand (FILE *file
, struct iv_cand
*cand
)
616 struct iv
*iv
= cand
->iv
;
618 fprintf (file
, "candidate %d%s\n",
619 cand
->id
, cand
->important
? " (important)" : "");
621 if (cand
->depends_on
)
623 fprintf (file
, " depends on ");
624 dump_bitmap (file
, cand
->depends_on
);
629 fprintf (file
, " final value replacement\n");
633 if (cand
->var_before
)
635 fprintf (file
, " var_before ");
636 print_generic_expr (file
, cand
->var_before
, TDF_SLIM
);
637 fprintf (file
, "\n");
641 fprintf (file
, " var_after ");
642 print_generic_expr (file
, cand
->var_after
, TDF_SLIM
);
643 fprintf (file
, "\n");
649 fprintf (file
, " incremented before exit test\n");
653 fprintf (file
, " incremented before use %d\n", cand
->ainc_use
->id
);
657 fprintf (file
, " incremented after use %d\n", cand
->ainc_use
->id
);
661 fprintf (file
, " incremented at end\n");
665 fprintf (file
, " original biv\n");
669 dump_iv (file
, iv
, false);
672 /* Returns the info for ssa version VER. */
674 static inline struct version_info
*
675 ver_info (struct ivopts_data
*data
, unsigned ver
)
677 return data
->version_info
+ ver
;
680 /* Returns the info for ssa name NAME. */
682 static inline struct version_info
*
683 name_info (struct ivopts_data
*data
, tree name
)
685 return ver_info (data
, SSA_NAME_VERSION (name
));
688 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
692 stmt_after_ip_normal_pos (struct loop
*loop
, gimple stmt
)
694 basic_block bb
= ip_normal_pos (loop
), sbb
= gimple_bb (stmt
);
698 if (sbb
== loop
->latch
)
704 return stmt
== last_stmt (bb
);
707 /* Returns true if STMT if after the place where the original induction
708 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
709 if the positions are identical. */
712 stmt_after_inc_pos (struct iv_cand
*cand
, gimple stmt
, bool true_if_equal
)
714 basic_block cand_bb
= gimple_bb (cand
->incremented_at
);
715 basic_block stmt_bb
= gimple_bb (stmt
);
717 if (!dominated_by_p (CDI_DOMINATORS
, stmt_bb
, cand_bb
))
720 if (stmt_bb
!= cand_bb
)
724 && gimple_uid (stmt
) == gimple_uid (cand
->incremented_at
))
726 return gimple_uid (stmt
) > gimple_uid (cand
->incremented_at
);
729 /* Returns true if STMT if after the place where the induction variable
730 CAND is incremented in LOOP. */
733 stmt_after_increment (struct loop
*loop
, struct iv_cand
*cand
, gimple stmt
)
741 return stmt_after_ip_normal_pos (loop
, stmt
);
745 return stmt_after_inc_pos (cand
, stmt
, false);
748 return stmt_after_inc_pos (cand
, stmt
, true);
755 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
758 abnormal_ssa_name_p (tree exp
)
763 if (TREE_CODE (exp
) != SSA_NAME
)
766 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp
) != 0;
769 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
770 abnormal phi node. Callback for for_each_index. */
773 idx_contains_abnormal_ssa_name_p (tree base
, tree
*index
,
774 void *data ATTRIBUTE_UNUSED
)
776 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
778 if (abnormal_ssa_name_p (TREE_OPERAND (base
, 2)))
780 if (abnormal_ssa_name_p (TREE_OPERAND (base
, 3)))
784 return !abnormal_ssa_name_p (*index
);
787 /* Returns true if EXPR contains a ssa name that occurs in an
788 abnormal phi node. */
791 contains_abnormal_ssa_name_p (tree expr
)
794 enum tree_code_class codeclass
;
799 code
= TREE_CODE (expr
);
800 codeclass
= TREE_CODE_CLASS (code
);
802 if (code
== SSA_NAME
)
803 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr
) != 0;
805 if (code
== INTEGER_CST
806 || is_gimple_min_invariant (expr
))
809 if (code
== ADDR_EXPR
)
810 return !for_each_index (&TREE_OPERAND (expr
, 0),
811 idx_contains_abnormal_ssa_name_p
,
814 if (code
== COND_EXPR
)
815 return contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 0))
816 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 1))
817 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 2));
823 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 1)))
828 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 0)))
840 /* Returns the structure describing number of iterations determined from
841 EXIT of DATA->current_loop, or NULL if something goes wrong. */
843 static struct tree_niter_desc
*
844 niter_for_exit (struct ivopts_data
*data
, edge exit
)
846 struct tree_niter_desc
*desc
;
847 tree_niter_desc
**slot
;
851 data
->niters
= new hash_map
<edge
, tree_niter_desc
*>;
855 slot
= data
->niters
->get (exit
);
859 /* Try to determine number of iterations. We cannot safely work with ssa
860 names that appear in phi nodes on abnormal edges, so that we do not
861 create overlapping life ranges for them (PR 27283). */
862 desc
= XNEW (struct tree_niter_desc
);
863 if (!number_of_iterations_exit (data
->current_loop
,
865 || contains_abnormal_ssa_name_p (desc
->niter
))
870 data
->niters
->put (exit
, desc
);
878 /* Returns the structure describing number of iterations determined from
879 single dominating exit of DATA->current_loop, or NULL if something
882 static struct tree_niter_desc
*
883 niter_for_single_dom_exit (struct ivopts_data
*data
)
885 edge exit
= single_dom_exit (data
->current_loop
);
890 return niter_for_exit (data
, exit
);
893 /* Initializes data structures used by the iv optimization pass, stored
897 tree_ssa_iv_optimize_init (struct ivopts_data
*data
)
899 data
->version_info_size
= 2 * num_ssa_names
;
900 data
->version_info
= XCNEWVEC (struct version_info
, data
->version_info_size
);
901 data
->relevant
= BITMAP_ALLOC (NULL
);
902 data
->important_candidates
= BITMAP_ALLOC (NULL
);
903 data
->max_inv_id
= 0;
905 data
->iv_uses
.create (20);
906 data
->iv_candidates
.create (20);
907 data
->inv_expr_tab
= new hash_table
<iv_inv_expr_hasher
> (10);
908 data
->inv_expr_id
= 0;
909 data
->name_expansion_cache
= NULL
;
910 decl_rtl_to_reset
.create (20);
913 /* Returns a memory object to that EXPR points. In case we are able to
914 determine that it does not point to any such object, NULL is returned. */
917 determine_base_object (tree expr
)
919 enum tree_code code
= TREE_CODE (expr
);
922 /* If this is a pointer casted to any type, we need to determine
923 the base object for the pointer; so handle conversions before
924 throwing away non-pointer expressions. */
925 if (CONVERT_EXPR_P (expr
))
926 return determine_base_object (TREE_OPERAND (expr
, 0));
928 if (!POINTER_TYPE_P (TREE_TYPE (expr
)))
937 obj
= TREE_OPERAND (expr
, 0);
938 base
= get_base_address (obj
);
943 if (TREE_CODE (base
) == MEM_REF
)
944 return determine_base_object (TREE_OPERAND (base
, 0));
946 return fold_convert (ptr_type_node
,
947 build_fold_addr_expr (base
));
949 case POINTER_PLUS_EXPR
:
950 return determine_base_object (TREE_OPERAND (expr
, 0));
954 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
958 return fold_convert (ptr_type_node
, expr
);
962 /* Return true if address expression with non-DECL_P operand appears
966 contain_complex_addr_expr (tree expr
)
971 switch (TREE_CODE (expr
))
973 case POINTER_PLUS_EXPR
:
976 res
|= contain_complex_addr_expr (TREE_OPERAND (expr
, 0));
977 res
|= contain_complex_addr_expr (TREE_OPERAND (expr
, 1));
981 return (!DECL_P (TREE_OPERAND (expr
, 0)));
990 /* Allocates an induction variable with given initial value BASE and step STEP
991 for loop LOOP. NO_OVERFLOW implies the iv doesn't overflow. */
994 alloc_iv (tree base
, tree step
, bool no_overflow
= false)
997 struct iv
*iv
= XCNEW (struct iv
);
998 gcc_assert (step
!= NULL_TREE
);
1000 /* Lower address expression in base except ones with DECL_P as operand.
1002 1) More accurate cost can be computed for address expressions;
1003 2) Duplicate candidates won't be created for bases in different
1004 forms, like &a[0] and &a. */
1006 if ((TREE_CODE (expr
) == ADDR_EXPR
&& !DECL_P (TREE_OPERAND (expr
, 0)))
1007 || contain_complex_addr_expr (expr
))
1010 tree_to_aff_combination (expr
, TREE_TYPE (base
), &comb
);
1011 base
= fold_convert (TREE_TYPE (base
), aff_combination_to_tree (&comb
));
1015 iv
->base_object
= determine_base_object (base
);
1018 iv
->have_use_for
= false;
1020 iv
->ssa_name
= NULL_TREE
;
1021 iv
->no_overflow
= no_overflow
;
1026 /* Sets STEP and BASE for induction variable IV. NO_OVERFLOW implies the IV
1027 doesn't overflow. */
1030 set_iv (struct ivopts_data
*data
, tree iv
, tree base
, tree step
,
1033 struct version_info
*info
= name_info (data
, iv
);
1035 gcc_assert (!info
->iv
);
1037 bitmap_set_bit (data
->relevant
, SSA_NAME_VERSION (iv
));
1038 info
->iv
= alloc_iv (base
, step
, no_overflow
);
1039 info
->iv
->ssa_name
= iv
;
1042 /* Finds induction variable declaration for VAR. */
1045 get_iv (struct ivopts_data
*data
, tree var
)
1048 tree type
= TREE_TYPE (var
);
1050 if (!POINTER_TYPE_P (type
)
1051 && !INTEGRAL_TYPE_P (type
))
1054 if (!name_info (data
, var
)->iv
)
1056 bb
= gimple_bb (SSA_NAME_DEF_STMT (var
));
1059 || !flow_bb_inside_loop_p (data
->current_loop
, bb
))
1060 set_iv (data
, var
, var
, build_int_cst (type
, 0), true);
1063 return name_info (data
, var
)->iv
;
1066 /* Return the first non-invariant ssa var found in EXPR. */
1069 extract_single_var_from_expr (tree expr
)
1073 enum tree_code code
;
1075 if (!expr
|| is_gimple_min_invariant (expr
))
1078 code
= TREE_CODE (expr
);
1079 if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code
)))
1081 n
= TREE_OPERAND_LENGTH (expr
);
1082 for (i
= 0; i
< n
; i
++)
1084 tmp
= extract_single_var_from_expr (TREE_OPERAND (expr
, i
));
1090 return (TREE_CODE (expr
) == SSA_NAME
) ? expr
: NULL
;
1093 /* Finds basic ivs. */
1096 find_bivs (struct ivopts_data
*data
)
1100 tree step
, type
, base
, stop
;
1102 struct loop
*loop
= data
->current_loop
;
1105 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
1109 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi
)))
1112 if (virtual_operand_p (PHI_RESULT (phi
)))
1115 if (!simple_iv (loop
, loop
, PHI_RESULT (phi
), &iv
, true))
1118 if (integer_zerop (iv
.step
))
1122 base
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_preheader_edge (loop
));
1123 /* Stop expanding iv base at the first ssa var referred by iv step.
1124 Ideally we should stop at any ssa var, because that's expensive
1125 and unusual to happen, we just do it on the first one.
1127 See PR64705 for the rationale. */
1128 stop
= extract_single_var_from_expr (step
);
1129 base
= expand_simple_operations (base
, stop
);
1130 if (contains_abnormal_ssa_name_p (base
)
1131 || contains_abnormal_ssa_name_p (step
))
1134 type
= TREE_TYPE (PHI_RESULT (phi
));
1135 base
= fold_convert (type
, base
);
1138 if (POINTER_TYPE_P (type
))
1139 step
= convert_to_ptrofftype (step
);
1141 step
= fold_convert (type
, step
);
1144 set_iv (data
, PHI_RESULT (phi
), base
, step
, iv
.no_overflow
);
1151 /* Marks basic ivs. */
1154 mark_bivs (struct ivopts_data
*data
)
1159 struct iv
*iv
, *incr_iv
;
1160 struct loop
*loop
= data
->current_loop
;
1161 basic_block incr_bb
;
1164 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
1168 iv
= get_iv (data
, PHI_RESULT (phi
));
1172 var
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_latch_edge (loop
));
1173 def
= SSA_NAME_DEF_STMT (var
);
1174 /* Don't mark iv peeled from other one as biv. */
1176 && gimple_code (def
) == GIMPLE_PHI
1177 && gimple_bb (def
) == loop
->header
)
1180 incr_iv
= get_iv (data
, var
);
1184 /* If the increment is in the subloop, ignore it. */
1185 incr_bb
= gimple_bb (SSA_NAME_DEF_STMT (var
));
1186 if (incr_bb
->loop_father
!= data
->current_loop
1187 || (incr_bb
->flags
& BB_IRREDUCIBLE_LOOP
))
1191 incr_iv
->biv_p
= true;
1195 /* Checks whether STMT defines a linear induction variable and stores its
1196 parameters to IV. */
1199 find_givs_in_stmt_scev (struct ivopts_data
*data
, gimple stmt
, affine_iv
*iv
)
1202 struct loop
*loop
= data
->current_loop
;
1204 iv
->base
= NULL_TREE
;
1205 iv
->step
= NULL_TREE
;
1207 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
1210 lhs
= gimple_assign_lhs (stmt
);
1211 if (TREE_CODE (lhs
) != SSA_NAME
)
1214 if (!simple_iv (loop
, loop_containing_stmt (stmt
), lhs
, iv
, true))
1217 /* Stop expanding iv base at the first ssa var referred by iv step.
1218 Ideally we should stop at any ssa var, because that's expensive
1219 and unusual to happen, we just do it on the first one.
1221 See PR64705 for the rationale. */
1222 stop
= extract_single_var_from_expr (iv
->step
);
1223 iv
->base
= expand_simple_operations (iv
->base
, stop
);
1224 if (contains_abnormal_ssa_name_p (iv
->base
)
1225 || contains_abnormal_ssa_name_p (iv
->step
))
1228 /* If STMT could throw, then do not consider STMT as defining a GIV.
1229 While this will suppress optimizations, we can not safely delete this
1230 GIV and associated statements, even if it appears it is not used. */
1231 if (stmt_could_throw_p (stmt
))
1237 /* Finds general ivs in statement STMT. */
1240 find_givs_in_stmt (struct ivopts_data
*data
, gimple stmt
)
1244 if (!find_givs_in_stmt_scev (data
, stmt
, &iv
))
1247 set_iv (data
, gimple_assign_lhs (stmt
), iv
.base
, iv
.step
, iv
.no_overflow
);
1250 /* Finds general ivs in basic block BB. */
1253 find_givs_in_bb (struct ivopts_data
*data
, basic_block bb
)
1255 gimple_stmt_iterator bsi
;
1257 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
1258 find_givs_in_stmt (data
, gsi_stmt (bsi
));
1261 /* Finds general ivs. */
1264 find_givs (struct ivopts_data
*data
)
1266 struct loop
*loop
= data
->current_loop
;
1267 basic_block
*body
= get_loop_body_in_dom_order (loop
);
1270 for (i
= 0; i
< loop
->num_nodes
; i
++)
1271 find_givs_in_bb (data
, body
[i
]);
1275 /* For each ssa name defined in LOOP determines whether it is an induction
1276 variable and if so, its initial value and step. */
1279 find_induction_variables (struct ivopts_data
*data
)
1284 if (!find_bivs (data
))
1290 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1292 struct tree_niter_desc
*niter
= niter_for_single_dom_exit (data
);
1296 fprintf (dump_file
, " number of iterations ");
1297 print_generic_expr (dump_file
, niter
->niter
, TDF_SLIM
);
1298 if (!integer_zerop (niter
->may_be_zero
))
1300 fprintf (dump_file
, "; zero if ");
1301 print_generic_expr (dump_file
, niter
->may_be_zero
, TDF_SLIM
);
1303 fprintf (dump_file
, "\n\n");
1306 fprintf (dump_file
, "Induction variables:\n\n");
1308 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
1310 if (ver_info (data
, i
)->iv
)
1311 dump_iv (dump_file
, ver_info (data
, i
)->iv
, true);
1318 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.
1319 For address type use, ADDR_BASE is the stripped IV base, ADDR_OFFSET
1320 is the const offset stripped from IV base. For uses of other types,
1321 ADDR_BASE and ADDR_OFFSET are zero by default. */
1323 static struct iv_use
*
1324 record_use (struct ivopts_data
*data
, tree
*use_p
, struct iv
*iv
,
1325 gimple stmt
, enum use_type use_type
, tree addr_base
= NULL
,
1326 unsigned HOST_WIDE_INT addr_offset
= 0)
1328 struct iv_use
*use
= XCNEW (struct iv_use
);
1330 use
->id
= n_iv_uses (data
);
1332 use
->type
= use_type
;
1336 use
->related_cands
= BITMAP_ALLOC (NULL
);
1338 use
->addr_base
= addr_base
;
1339 use
->addr_offset
= addr_offset
;
1341 data
->iv_uses
.safe_push (use
);
1346 /* Records a sub use of type USE_TYPE at *USE_P in STMT whose value is IV.
1347 The sub use is recorded under the one whose use id is ID_GROUP. */
1349 static struct iv_use
*
1350 record_sub_use (struct ivopts_data
*data
, tree
*use_p
,
1351 struct iv
*iv
, gimple stmt
, enum use_type use_type
,
1352 tree addr_base
, unsigned HOST_WIDE_INT addr_offset
,
1353 unsigned int id_group
)
1355 struct iv_use
*use
= XCNEW (struct iv_use
);
1356 struct iv_use
*group
= iv_use (data
, id_group
);
1358 use
->id
= group
->id
;
1360 use
->type
= use_type
;
1364 use
->related_cands
= NULL
;
1365 use
->addr_base
= addr_base
;
1366 use
->addr_offset
= addr_offset
;
1368 /* Sub use list is maintained in offset ascending order. */
1369 if (addr_offset
<= group
->addr_offset
)
1371 use
->related_cands
= group
->related_cands
;
1372 group
->related_cands
= NULL
;
1374 data
->iv_uses
[id_group
] = use
;
1382 group
= group
->next
;
1384 while (group
&& addr_offset
> group
->addr_offset
);
1385 use
->next
= pre
->next
;
1389 /* To avoid showing ssa name in the dumps, if it was not reset by the
1391 iv
->ssa_name
= NULL_TREE
;
1396 /* Checks whether OP is a loop-level invariant and if so, records it.
1397 NONLINEAR_USE is true if the invariant is used in a way we do not
1398 handle specially. */
1401 record_invariant (struct ivopts_data
*data
, tree op
, bool nonlinear_use
)
1404 struct version_info
*info
;
1406 if (TREE_CODE (op
) != SSA_NAME
1407 || virtual_operand_p (op
))
1410 bb
= gimple_bb (SSA_NAME_DEF_STMT (op
));
1412 && flow_bb_inside_loop_p (data
->current_loop
, bb
))
1415 info
= name_info (data
, op
);
1417 info
->has_nonlin_use
|= nonlinear_use
;
1419 info
->inv_id
= ++data
->max_inv_id
;
1420 bitmap_set_bit (data
->relevant
, SSA_NAME_VERSION (op
));
1423 /* Checks whether the use OP is interesting and if so, records it. */
1425 static struct iv_use
*
1426 find_interesting_uses_op (struct ivopts_data
*data
, tree op
)
1433 if (TREE_CODE (op
) != SSA_NAME
)
1436 iv
= get_iv (data
, op
);
1440 if (iv
->have_use_for
)
1442 use
= iv_use (data
, iv
->use_id
);
1444 gcc_assert (use
->type
== USE_NONLINEAR_EXPR
);
1448 if (integer_zerop (iv
->step
))
1450 record_invariant (data
, op
, true);
1453 iv
->have_use_for
= true;
1455 civ
= XNEW (struct iv
);
1458 stmt
= SSA_NAME_DEF_STMT (op
);
1459 gcc_assert (gimple_code (stmt
) == GIMPLE_PHI
1460 || is_gimple_assign (stmt
));
1462 use
= record_use (data
, NULL
, civ
, stmt
, USE_NONLINEAR_EXPR
);
1463 iv
->use_id
= use
->id
;
1468 /* Given a condition in statement STMT, checks whether it is a compare
1469 of an induction variable and an invariant. If this is the case,
1470 CONTROL_VAR is set to location of the iv, BOUND to the location of
1471 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1472 induction variable descriptions, and true is returned. If this is not
1473 the case, CONTROL_VAR and BOUND are set to the arguments of the
1474 condition and false is returned. */
1477 extract_cond_operands (struct ivopts_data
*data
, gimple stmt
,
1478 tree
**control_var
, tree
**bound
,
1479 struct iv
**iv_var
, struct iv
**iv_bound
)
1481 /* The objects returned when COND has constant operands. */
1482 static struct iv const_iv
;
1484 tree
*op0
= &zero
, *op1
= &zero
;
1485 struct iv
*iv0
= &const_iv
, *iv1
= &const_iv
;
1488 if (gimple_code (stmt
) == GIMPLE_COND
)
1490 gcond
*cond_stmt
= as_a
<gcond
*> (stmt
);
1491 op0
= gimple_cond_lhs_ptr (cond_stmt
);
1492 op1
= gimple_cond_rhs_ptr (cond_stmt
);
1496 op0
= gimple_assign_rhs1_ptr (stmt
);
1497 op1
= gimple_assign_rhs2_ptr (stmt
);
1500 zero
= integer_zero_node
;
1501 const_iv
.step
= integer_zero_node
;
1503 if (TREE_CODE (*op0
) == SSA_NAME
)
1504 iv0
= get_iv (data
, *op0
);
1505 if (TREE_CODE (*op1
) == SSA_NAME
)
1506 iv1
= get_iv (data
, *op1
);
1508 /* Exactly one of the compared values must be an iv, and the other one must
1513 if (integer_zerop (iv0
->step
))
1515 /* Control variable may be on the other side. */
1516 std::swap (op0
, op1
);
1517 std::swap (iv0
, iv1
);
1519 ret
= !integer_zerop (iv0
->step
) && integer_zerop (iv1
->step
);
1534 /* Checks whether the condition in STMT is interesting and if so,
1538 find_interesting_uses_cond (struct ivopts_data
*data
, gimple stmt
)
1540 tree
*var_p
, *bound_p
;
1541 struct iv
*var_iv
, *civ
;
1543 if (!extract_cond_operands (data
, stmt
, &var_p
, &bound_p
, &var_iv
, NULL
))
1545 find_interesting_uses_op (data
, *var_p
);
1546 find_interesting_uses_op (data
, *bound_p
);
1550 civ
= XNEW (struct iv
);
1552 record_use (data
, NULL
, civ
, stmt
, USE_COMPARE
);
1555 /* Returns the outermost loop EXPR is obviously invariant in
1556 relative to the loop LOOP, i.e. if all its operands are defined
1557 outside of the returned loop. Returns NULL if EXPR is not
1558 even obviously invariant in LOOP. */
1561 outermost_invariant_loop_for_expr (struct loop
*loop
, tree expr
)
1566 if (is_gimple_min_invariant (expr
))
1567 return current_loops
->tree_root
;
1569 if (TREE_CODE (expr
) == SSA_NAME
)
1571 def_bb
= gimple_bb (SSA_NAME_DEF_STMT (expr
));
1574 if (flow_bb_inside_loop_p (loop
, def_bb
))
1576 return superloop_at_depth (loop
,
1577 loop_depth (def_bb
->loop_father
) + 1);
1580 return current_loops
->tree_root
;
1586 unsigned maxdepth
= 0;
1587 len
= TREE_OPERAND_LENGTH (expr
);
1588 for (i
= 0; i
< len
; i
++)
1590 struct loop
*ivloop
;
1591 if (!TREE_OPERAND (expr
, i
))
1594 ivloop
= outermost_invariant_loop_for_expr (loop
, TREE_OPERAND (expr
, i
));
1597 maxdepth
= MAX (maxdepth
, loop_depth (ivloop
));
1600 return superloop_at_depth (loop
, maxdepth
);
1603 /* Returns true if expression EXPR is obviously invariant in LOOP,
1604 i.e. if all its operands are defined outside of the LOOP. LOOP
1605 should not be the function body. */
1608 expr_invariant_in_loop_p (struct loop
*loop
, tree expr
)
1613 gcc_assert (loop_depth (loop
) > 0);
1615 if (is_gimple_min_invariant (expr
))
1618 if (TREE_CODE (expr
) == SSA_NAME
)
1620 def_bb
= gimple_bb (SSA_NAME_DEF_STMT (expr
));
1622 && flow_bb_inside_loop_p (loop
, def_bb
))
1631 len
= TREE_OPERAND_LENGTH (expr
);
1632 for (i
= 0; i
< len
; i
++)
1633 if (TREE_OPERAND (expr
, i
)
1634 && !expr_invariant_in_loop_p (loop
, TREE_OPERAND (expr
, i
)))
1640 /* Cumulates the steps of indices into DATA and replaces their values with the
1641 initial ones. Returns false when the value of the index cannot be determined.
1642 Callback for for_each_index. */
1644 struct ifs_ivopts_data
1646 struct ivopts_data
*ivopts_data
;
1652 idx_find_step (tree base
, tree
*idx
, void *data
)
1654 struct ifs_ivopts_data
*dta
= (struct ifs_ivopts_data
*) data
;
1656 bool use_overflow_semantics
= false;
1657 tree step
, iv_base
, iv_step
, lbound
, off
;
1658 struct loop
*loop
= dta
->ivopts_data
->current_loop
;
1660 /* If base is a component ref, require that the offset of the reference
1662 if (TREE_CODE (base
) == COMPONENT_REF
)
1664 off
= component_ref_field_offset (base
);
1665 return expr_invariant_in_loop_p (loop
, off
);
1668 /* If base is array, first check whether we will be able to move the
1669 reference out of the loop (in order to take its address in strength
1670 reduction). In order for this to work we need both lower bound
1671 and step to be loop invariants. */
1672 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
1674 /* Moreover, for a range, the size needs to be invariant as well. */
1675 if (TREE_CODE (base
) == ARRAY_RANGE_REF
1676 && !expr_invariant_in_loop_p (loop
, TYPE_SIZE (TREE_TYPE (base
))))
1679 step
= array_ref_element_size (base
);
1680 lbound
= array_ref_low_bound (base
);
1682 if (!expr_invariant_in_loop_p (loop
, step
)
1683 || !expr_invariant_in_loop_p (loop
, lbound
))
1687 if (TREE_CODE (*idx
) != SSA_NAME
)
1690 iv
= get_iv (dta
->ivopts_data
, *idx
);
1694 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1695 *&x[0], which is not folded and does not trigger the
1696 ARRAY_REF path below. */
1699 if (integer_zerop (iv
->step
))
1702 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
1704 step
= array_ref_element_size (base
);
1706 /* We only handle addresses whose step is an integer constant. */
1707 if (TREE_CODE (step
) != INTEGER_CST
)
1711 /* The step for pointer arithmetics already is 1 byte. */
1712 step
= size_one_node
;
1716 if (iv
->no_overflow
&& nowrap_type_p (TREE_TYPE (iv_step
)))
1717 use_overflow_semantics
= true;
1719 if (!convert_affine_scev (dta
->ivopts_data
->current_loop
,
1720 sizetype
, &iv_base
, &iv_step
, dta
->stmt
,
1721 use_overflow_semantics
))
1723 /* The index might wrap. */
1727 step
= fold_build2 (MULT_EXPR
, sizetype
, step
, iv_step
);
1728 dta
->step
= fold_build2 (PLUS_EXPR
, sizetype
, dta
->step
, step
);
1733 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1734 object is passed to it in DATA. */
1737 idx_record_use (tree base
, tree
*idx
,
1740 struct ivopts_data
*data
= (struct ivopts_data
*) vdata
;
1741 find_interesting_uses_op (data
, *idx
);
1742 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
1744 find_interesting_uses_op (data
, array_ref_element_size (base
));
1745 find_interesting_uses_op (data
, array_ref_low_bound (base
));
1750 /* If we can prove that TOP = cst * BOT for some constant cst,
1751 store cst to MUL and return true. Otherwise return false.
1752 The returned value is always sign-extended, regardless of the
1753 signedness of TOP and BOT. */
1756 constant_multiple_of (tree top
, tree bot
, widest_int
*mul
)
1759 enum tree_code code
;
1760 unsigned precision
= TYPE_PRECISION (TREE_TYPE (top
));
1761 widest_int res
, p0
, p1
;
1766 if (operand_equal_p (top
, bot
, 0))
1772 code
= TREE_CODE (top
);
1776 mby
= TREE_OPERAND (top
, 1);
1777 if (TREE_CODE (mby
) != INTEGER_CST
)
1780 if (!constant_multiple_of (TREE_OPERAND (top
, 0), bot
, &res
))
1783 *mul
= wi::sext (res
* wi::to_widest (mby
), precision
);
1788 if (!constant_multiple_of (TREE_OPERAND (top
, 0), bot
, &p0
)
1789 || !constant_multiple_of (TREE_OPERAND (top
, 1), bot
, &p1
))
1792 if (code
== MINUS_EXPR
)
1794 *mul
= wi::sext (p0
+ p1
, precision
);
1798 if (TREE_CODE (bot
) != INTEGER_CST
)
1801 p0
= widest_int::from (top
, SIGNED
);
1802 p1
= widest_int::from (bot
, SIGNED
);
1805 *mul
= wi::sext (wi::divmod_trunc (p0
, p1
, SIGNED
, &res
), precision
);
1813 /* Return true if memory reference REF with step STEP may be unaligned. */
1816 may_be_unaligned_p (tree ref
, tree step
)
1818 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1819 thus they are not misaligned. */
1820 if (TREE_CODE (ref
) == TARGET_MEM_REF
)
1823 unsigned int align
= TYPE_ALIGN (TREE_TYPE (ref
));
1824 if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref
))) > align
)
1825 align
= GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref
)));
1827 unsigned HOST_WIDE_INT bitpos
;
1828 unsigned int ref_align
;
1829 get_object_alignment_1 (ref
, &ref_align
, &bitpos
);
1830 if (ref_align
< align
1831 || (bitpos
% align
) != 0
1832 || (bitpos
% BITS_PER_UNIT
) != 0)
1835 unsigned int trailing_zeros
= tree_ctz (step
);
1836 if (trailing_zeros
< HOST_BITS_PER_INT
1837 && (1U << trailing_zeros
) * BITS_PER_UNIT
< align
)
1843 /* Return true if EXPR may be non-addressable. */
1846 may_be_nonaddressable_p (tree expr
)
1848 switch (TREE_CODE (expr
))
1850 case TARGET_MEM_REF
:
1851 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1852 target, thus they are always addressable. */
1856 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr
, 1))
1857 || may_be_nonaddressable_p (TREE_OPERAND (expr
, 0));
1859 case VIEW_CONVERT_EXPR
:
1860 /* This kind of view-conversions may wrap non-addressable objects
1861 and make them look addressable. After some processing the
1862 non-addressability may be uncovered again, causing ADDR_EXPRs
1863 of inappropriate objects to be built. */
1864 if (is_gimple_reg (TREE_OPERAND (expr
, 0))
1865 || !is_gimple_addressable (TREE_OPERAND (expr
, 0)))
1868 /* ... fall through ... */
1871 case ARRAY_RANGE_REF
:
1872 return may_be_nonaddressable_p (TREE_OPERAND (expr
, 0));
1885 strip_offset (tree expr
, unsigned HOST_WIDE_INT
*offset
);
1887 /* Record a use of type USE_TYPE at *USE_P in STMT whose value is IV.
1888 If there is an existing use which has same stripped iv base and step,
1889 this function records this one as a sub use to that; otherwise records
1890 it as a normal one. */
1892 static struct iv_use
*
1893 record_group_use (struct ivopts_data
*data
, tree
*use_p
,
1894 struct iv
*iv
, gimple stmt
, enum use_type use_type
)
1899 unsigned HOST_WIDE_INT addr_offset
;
1901 /* Only support sub use for address type uses, that is, with base
1903 if (!iv
->base_object
)
1904 return record_use (data
, use_p
, iv
, stmt
, use_type
);
1906 addr_base
= strip_offset (iv
->base
, &addr_offset
);
1907 for (i
= 0; i
< n_iv_uses (data
); i
++)
1909 use
= iv_use (data
, i
);
1910 if (use
->type
!= USE_ADDRESS
|| !use
->iv
->base_object
)
1913 /* Check if it has the same stripped base and step. */
1914 if (operand_equal_p (iv
->base_object
, use
->iv
->base_object
, 0)
1915 && operand_equal_p (iv
->step
, use
->iv
->step
, 0)
1916 && operand_equal_p (addr_base
, use
->addr_base
, 0))
1920 if (i
== n_iv_uses (data
))
1921 return record_use (data
, use_p
, iv
, stmt
,
1922 use_type
, addr_base
, addr_offset
);
1924 return record_sub_use (data
, use_p
, iv
, stmt
,
1925 use_type
, addr_base
, addr_offset
, i
);
1928 /* Finds addresses in *OP_P inside STMT. */
1931 find_interesting_uses_address (struct ivopts_data
*data
, gimple stmt
, tree
*op_p
)
1933 tree base
= *op_p
, step
= size_zero_node
;
1935 struct ifs_ivopts_data ifs_ivopts_data
;
1937 /* Do not play with volatile memory references. A bit too conservative,
1938 perhaps, but safe. */
1939 if (gimple_has_volatile_ops (stmt
))
1942 /* Ignore bitfields for now. Not really something terribly complicated
1944 if (TREE_CODE (base
) == BIT_FIELD_REF
)
1947 base
= unshare_expr (base
);
1949 if (TREE_CODE (base
) == TARGET_MEM_REF
)
1951 tree type
= build_pointer_type (TREE_TYPE (base
));
1955 && TREE_CODE (TMR_BASE (base
)) == SSA_NAME
)
1957 civ
= get_iv (data
, TMR_BASE (base
));
1961 TMR_BASE (base
) = civ
->base
;
1964 if (TMR_INDEX2 (base
)
1965 && TREE_CODE (TMR_INDEX2 (base
)) == SSA_NAME
)
1967 civ
= get_iv (data
, TMR_INDEX2 (base
));
1971 TMR_INDEX2 (base
) = civ
->base
;
1974 if (TMR_INDEX (base
)
1975 && TREE_CODE (TMR_INDEX (base
)) == SSA_NAME
)
1977 civ
= get_iv (data
, TMR_INDEX (base
));
1981 TMR_INDEX (base
) = civ
->base
;
1986 if (TMR_STEP (base
))
1987 astep
= fold_build2 (MULT_EXPR
, type
, TMR_STEP (base
), astep
);
1989 step
= fold_build2 (PLUS_EXPR
, type
, step
, astep
);
1993 if (integer_zerop (step
))
1995 base
= tree_mem_ref_addr (type
, base
);
1999 ifs_ivopts_data
.ivopts_data
= data
;
2000 ifs_ivopts_data
.stmt
= stmt
;
2001 ifs_ivopts_data
.step
= size_zero_node
;
2002 if (!for_each_index (&base
, idx_find_step
, &ifs_ivopts_data
)
2003 || integer_zerop (ifs_ivopts_data
.step
))
2005 step
= ifs_ivopts_data
.step
;
2007 /* Check that the base expression is addressable. This needs
2008 to be done after substituting bases of IVs into it. */
2009 if (may_be_nonaddressable_p (base
))
2012 /* Moreover, on strict alignment platforms, check that it is
2013 sufficiently aligned. */
2014 if (STRICT_ALIGNMENT
&& may_be_unaligned_p (base
, step
))
2017 base
= build_fold_addr_expr (base
);
2019 /* Substituting bases of IVs into the base expression might
2020 have caused folding opportunities. */
2021 if (TREE_CODE (base
) == ADDR_EXPR
)
2023 tree
*ref
= &TREE_OPERAND (base
, 0);
2024 while (handled_component_p (*ref
))
2025 ref
= &TREE_OPERAND (*ref
, 0);
2026 if (TREE_CODE (*ref
) == MEM_REF
)
2028 tree tem
= fold_binary (MEM_REF
, TREE_TYPE (*ref
),
2029 TREE_OPERAND (*ref
, 0),
2030 TREE_OPERAND (*ref
, 1));
2037 civ
= alloc_iv (base
, step
);
2038 record_group_use (data
, op_p
, civ
, stmt
, USE_ADDRESS
);
2042 for_each_index (op_p
, idx_record_use
, data
);
2045 /* Finds and records invariants used in STMT. */
2048 find_invariants_stmt (struct ivopts_data
*data
, gimple stmt
)
2051 use_operand_p use_p
;
2054 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
2056 op
= USE_FROM_PTR (use_p
);
2057 record_invariant (data
, op
, false);
2061 /* Finds interesting uses of induction variables in the statement STMT. */
2064 find_interesting_uses_stmt (struct ivopts_data
*data
, gimple stmt
)
2067 tree op
, *lhs
, *rhs
;
2069 use_operand_p use_p
;
2070 enum tree_code code
;
2072 find_invariants_stmt (data
, stmt
);
2074 if (gimple_code (stmt
) == GIMPLE_COND
)
2076 find_interesting_uses_cond (data
, stmt
);
2080 if (is_gimple_assign (stmt
))
2082 lhs
= gimple_assign_lhs_ptr (stmt
);
2083 rhs
= gimple_assign_rhs1_ptr (stmt
);
2085 if (TREE_CODE (*lhs
) == SSA_NAME
)
2087 /* If the statement defines an induction variable, the uses are not
2088 interesting by themselves. */
2090 iv
= get_iv (data
, *lhs
);
2092 if (iv
&& !integer_zerop (iv
->step
))
2096 code
= gimple_assign_rhs_code (stmt
);
2097 if (get_gimple_rhs_class (code
) == GIMPLE_SINGLE_RHS
2098 && (REFERENCE_CLASS_P (*rhs
)
2099 || is_gimple_val (*rhs
)))
2101 if (REFERENCE_CLASS_P (*rhs
))
2102 find_interesting_uses_address (data
, stmt
, rhs
);
2104 find_interesting_uses_op (data
, *rhs
);
2106 if (REFERENCE_CLASS_P (*lhs
))
2107 find_interesting_uses_address (data
, stmt
, lhs
);
2110 else if (TREE_CODE_CLASS (code
) == tcc_comparison
)
2112 find_interesting_uses_cond (data
, stmt
);
2116 /* TODO -- we should also handle address uses of type
2118 memory = call (whatever);
2125 if (gimple_code (stmt
) == GIMPLE_PHI
2126 && gimple_bb (stmt
) == data
->current_loop
->header
)
2128 iv
= get_iv (data
, PHI_RESULT (stmt
));
2130 if (iv
&& !integer_zerop (iv
->step
))
2134 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
2136 op
= USE_FROM_PTR (use_p
);
2138 if (TREE_CODE (op
) != SSA_NAME
)
2141 iv
= get_iv (data
, op
);
2145 find_interesting_uses_op (data
, op
);
2149 /* Finds interesting uses of induction variables outside of loops
2150 on loop exit edge EXIT. */
2153 find_interesting_uses_outside (struct ivopts_data
*data
, edge exit
)
2159 for (psi
= gsi_start_phis (exit
->dest
); !gsi_end_p (psi
); gsi_next (&psi
))
2162 def
= PHI_ARG_DEF_FROM_EDGE (phi
, exit
);
2163 if (!virtual_operand_p (def
))
2164 find_interesting_uses_op (data
, def
);
2168 /* Finds uses of the induction variables that are interesting. */
2171 find_interesting_uses (struct ivopts_data
*data
)
2174 gimple_stmt_iterator bsi
;
2175 basic_block
*body
= get_loop_body (data
->current_loop
);
2177 struct version_info
*info
;
2180 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2181 fprintf (dump_file
, "Uses:\n\n");
2183 for (i
= 0; i
< data
->current_loop
->num_nodes
; i
++)
2188 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
2189 if (e
->dest
!= EXIT_BLOCK_PTR_FOR_FN (cfun
)
2190 && !flow_bb_inside_loop_p (data
->current_loop
, e
->dest
))
2191 find_interesting_uses_outside (data
, e
);
2193 for (bsi
= gsi_start_phis (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
2194 find_interesting_uses_stmt (data
, gsi_stmt (bsi
));
2195 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
2196 if (!is_gimple_debug (gsi_stmt (bsi
)))
2197 find_interesting_uses_stmt (data
, gsi_stmt (bsi
));
2200 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2204 fprintf (dump_file
, "\n");
2206 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
2208 info
= ver_info (data
, i
);
2211 fprintf (dump_file
, " ");
2212 print_generic_expr (dump_file
, info
->name
, TDF_SLIM
);
2213 fprintf (dump_file
, " is invariant (%d)%s\n",
2214 info
->inv_id
, info
->has_nonlin_use
? "" : ", eliminable");
2218 fprintf (dump_file
, "\n");
2224 /* Compute maximum offset of [base + offset] addressing mode
2225 for memory reference represented by USE. */
2227 static HOST_WIDE_INT
2228 compute_max_addr_offset (struct iv_use
*use
)
2232 HOST_WIDE_INT i
, off
;
2233 unsigned list_index
, num
;
2235 machine_mode mem_mode
, addr_mode
;
2236 static vec
<HOST_WIDE_INT
> max_offset_list
;
2238 as
= TYPE_ADDR_SPACE (TREE_TYPE (use
->iv
->base
));
2239 mem_mode
= TYPE_MODE (TREE_TYPE (*use
->op_p
));
2241 num
= max_offset_list
.length ();
2242 list_index
= (unsigned) as
* MAX_MACHINE_MODE
+ (unsigned) mem_mode
;
2243 if (list_index
>= num
)
2245 max_offset_list
.safe_grow (list_index
+ MAX_MACHINE_MODE
);
2246 for (; num
< max_offset_list
.length (); num
++)
2247 max_offset_list
[num
] = -1;
2250 off
= max_offset_list
[list_index
];
2254 addr_mode
= targetm
.addr_space
.address_mode (as
);
2255 reg
= gen_raw_REG (addr_mode
, LAST_VIRTUAL_REGISTER
+ 1);
2256 addr
= gen_rtx_fmt_ee (PLUS
, addr_mode
, reg
, NULL_RTX
);
2258 width
= GET_MODE_BITSIZE (addr_mode
) - 1;
2259 if (width
> (HOST_BITS_PER_WIDE_INT
- 1))
2260 width
= HOST_BITS_PER_WIDE_INT
- 1;
2262 for (i
= width
; i
> 0; i
--)
2264 off
= ((unsigned HOST_WIDE_INT
) 1 << i
) - 1;
2265 XEXP (addr
, 1) = gen_int_mode (off
, addr_mode
);
2266 if (memory_address_addr_space_p (mem_mode
, addr
, as
))
2269 /* For some strict-alignment targets, the offset must be naturally
2270 aligned. Try an aligned offset if mem_mode is not QImode. */
2271 off
= ((unsigned HOST_WIDE_INT
) 1 << i
);
2272 if (off
> GET_MODE_SIZE (mem_mode
) && mem_mode
!= QImode
)
2274 off
-= GET_MODE_SIZE (mem_mode
);
2275 XEXP (addr
, 1) = gen_int_mode (off
, addr_mode
);
2276 if (memory_address_addr_space_p (mem_mode
, addr
, as
))
2283 max_offset_list
[list_index
] = off
;
2287 /* Check if all small groups should be split. Return true if and
2290 1) At least one groups contain two uses with different offsets.
2291 2) No group contains more than two uses with different offsets.
2293 Return false otherwise. We want to split such groups because:
2295 1) Small groups don't have much benefit and may interfer with
2296 general candidate selection.
2297 2) Size for problem with only small groups is usually small and
2298 general algorithm can handle it well.
2300 TODO -- Above claim may not hold when auto increment is supported. */
2303 split_all_small_groups (struct ivopts_data
*data
)
2305 bool split_p
= false;
2306 unsigned int i
, n
, distinct
;
2307 struct iv_use
*pre
, *use
;
2309 n
= n_iv_uses (data
);
2310 for (i
= 0; i
< n
; i
++)
2312 use
= iv_use (data
, i
);
2317 gcc_assert (use
->type
== USE_ADDRESS
);
2318 for (pre
= use
, use
= use
->next
; use
; pre
= use
, use
= use
->next
)
2320 if (pre
->addr_offset
!= use
->addr_offset
)
2333 /* For each group of address type uses, this function further groups
2334 these uses according to the maximum offset supported by target's
2335 [base + offset] addressing mode. */
2338 group_address_uses (struct ivopts_data
*data
)
2340 HOST_WIDE_INT max_offset
= -1;
2341 unsigned int i
, n
, sub_id
;
2342 struct iv_use
*pre
, *use
;
2343 unsigned HOST_WIDE_INT addr_offset_first
;
2345 /* Reset max offset to split all small groups. */
2346 if (split_all_small_groups (data
))
2349 n
= n_iv_uses (data
);
2350 for (i
= 0; i
< n
; i
++)
2352 use
= iv_use (data
, i
);
2356 gcc_assert (use
->type
== USE_ADDRESS
);
2357 if (max_offset
!= 0)
2358 max_offset
= compute_max_addr_offset (use
);
2363 addr_offset_first
= use
->addr_offset
;
2364 /* Only uses with offset that can fit in offset part against
2365 the first use can be grouped together. */
2366 for (pre
= use
, use
= use
->next
;
2367 use
&& (use
->addr_offset
- addr_offset_first
2368 <= (unsigned HOST_WIDE_INT
) max_offset
);
2369 pre
= use
, use
= use
->next
)
2372 use
->sub_id
= ++sub_id
;
2375 /* Break the list and create new group. */
2379 use
->id
= n_iv_uses (data
);
2380 use
->related_cands
= BITMAP_ALLOC (NULL
);
2381 data
->iv_uses
.safe_push (use
);
2386 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2387 dump_uses (dump_file
, data
);
2390 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
2391 is true, assume we are inside an address. If TOP_COMPREF is true, assume
2392 we are at the top-level of the processed address. */
2395 strip_offset_1 (tree expr
, bool inside_addr
, bool top_compref
,
2396 HOST_WIDE_INT
*offset
)
2398 tree op0
= NULL_TREE
, op1
= NULL_TREE
, tmp
, step
;
2399 enum tree_code code
;
2400 tree type
, orig_type
= TREE_TYPE (expr
);
2401 HOST_WIDE_INT off0
, off1
, st
;
2402 tree orig_expr
= expr
;
2406 type
= TREE_TYPE (expr
);
2407 code
= TREE_CODE (expr
);
2413 if (!cst_and_fits_in_hwi (expr
)
2414 || integer_zerop (expr
))
2417 *offset
= int_cst_value (expr
);
2418 return build_int_cst (orig_type
, 0);
2420 case POINTER_PLUS_EXPR
:
2423 op0
= TREE_OPERAND (expr
, 0);
2424 op1
= TREE_OPERAND (expr
, 1);
2426 op0
= strip_offset_1 (op0
, false, false, &off0
);
2427 op1
= strip_offset_1 (op1
, false, false, &off1
);
2429 *offset
= (code
== MINUS_EXPR
? off0
- off1
: off0
+ off1
);
2430 if (op0
== TREE_OPERAND (expr
, 0)
2431 && op1
== TREE_OPERAND (expr
, 1))
2434 if (integer_zerop (op1
))
2436 else if (integer_zerop (op0
))
2438 if (code
== MINUS_EXPR
)
2439 expr
= fold_build1 (NEGATE_EXPR
, type
, op1
);
2444 expr
= fold_build2 (code
, type
, op0
, op1
);
2446 return fold_convert (orig_type
, expr
);
2449 op1
= TREE_OPERAND (expr
, 1);
2450 if (!cst_and_fits_in_hwi (op1
))
2453 op0
= TREE_OPERAND (expr
, 0);
2454 op0
= strip_offset_1 (op0
, false, false, &off0
);
2455 if (op0
== TREE_OPERAND (expr
, 0))
2458 *offset
= off0
* int_cst_value (op1
);
2459 if (integer_zerop (op0
))
2462 expr
= fold_build2 (MULT_EXPR
, type
, op0
, op1
);
2464 return fold_convert (orig_type
, expr
);
2467 case ARRAY_RANGE_REF
:
2471 step
= array_ref_element_size (expr
);
2472 if (!cst_and_fits_in_hwi (step
))
2475 st
= int_cst_value (step
);
2476 op1
= TREE_OPERAND (expr
, 1);
2477 op1
= strip_offset_1 (op1
, false, false, &off1
);
2478 *offset
= off1
* st
;
2481 && integer_zerop (op1
))
2483 /* Strip the component reference completely. */
2484 op0
= TREE_OPERAND (expr
, 0);
2485 op0
= strip_offset_1 (op0
, inside_addr
, top_compref
, &off0
);
2498 tmp
= component_ref_field_offset (expr
);
2499 field
= TREE_OPERAND (expr
, 1);
2501 && cst_and_fits_in_hwi (tmp
)
2502 && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field
)))
2504 HOST_WIDE_INT boffset
, abs_off
;
2506 /* Strip the component reference completely. */
2507 op0
= TREE_OPERAND (expr
, 0);
2508 op0
= strip_offset_1 (op0
, inside_addr
, top_compref
, &off0
);
2509 boffset
= int_cst_value (DECL_FIELD_BIT_OFFSET (field
));
2510 abs_off
= abs_hwi (boffset
) / BITS_PER_UNIT
;
2514 *offset
= off0
+ int_cst_value (tmp
) + abs_off
;
2521 op0
= TREE_OPERAND (expr
, 0);
2522 op0
= strip_offset_1 (op0
, true, true, &off0
);
2525 if (op0
== TREE_OPERAND (expr
, 0))
2528 expr
= build_fold_addr_expr (op0
);
2529 return fold_convert (orig_type
, expr
);
2532 /* ??? Offset operand? */
2533 inside_addr
= false;
2540 /* Default handling of expressions for that we want to recurse into
2541 the first operand. */
2542 op0
= TREE_OPERAND (expr
, 0);
2543 op0
= strip_offset_1 (op0
, inside_addr
, false, &off0
);
2546 if (op0
== TREE_OPERAND (expr
, 0)
2547 && (!op1
|| op1
== TREE_OPERAND (expr
, 1)))
2550 expr
= copy_node (expr
);
2551 TREE_OPERAND (expr
, 0) = op0
;
2553 TREE_OPERAND (expr
, 1) = op1
;
2555 /* Inside address, we might strip the top level component references,
2556 thus changing type of the expression. Handling of ADDR_EXPR
2558 expr
= fold_convert (orig_type
, expr
);
2563 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2566 strip_offset (tree expr
, unsigned HOST_WIDE_INT
*offset
)
2569 tree core
= strip_offset_1 (expr
, false, false, &off
);
2574 /* Returns variant of TYPE that can be used as base for different uses.
2575 We return unsigned type with the same precision, which avoids problems
2579 generic_type_for (tree type
)
2581 if (POINTER_TYPE_P (type
))
2582 return unsigned_type_for (type
);
2584 if (TYPE_UNSIGNED (type
))
2587 return unsigned_type_for (type
);
2590 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2591 the bitmap to that we should store it. */
2593 static struct ivopts_data
*fd_ivopts_data
;
2595 find_depends (tree
*expr_p
, int *ws ATTRIBUTE_UNUSED
, void *data
)
2597 bitmap
*depends_on
= (bitmap
*) data
;
2598 struct version_info
*info
;
2600 if (TREE_CODE (*expr_p
) != SSA_NAME
)
2602 info
= name_info (fd_ivopts_data
, *expr_p
);
2604 if (!info
->inv_id
|| info
->has_nonlin_use
)
2608 *depends_on
= BITMAP_ALLOC (NULL
);
2609 bitmap_set_bit (*depends_on
, info
->inv_id
);
2614 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2615 position to POS. If USE is not NULL, the candidate is set as related to
2616 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2617 replacement of the final value of the iv by a direct computation. */
2619 static struct iv_cand
*
2620 add_candidate_1 (struct ivopts_data
*data
,
2621 tree base
, tree step
, bool important
, enum iv_position pos
,
2622 struct iv_use
*use
, gimple incremented_at
)
2625 struct iv_cand
*cand
= NULL
;
2626 tree type
, orig_type
;
2628 /* For non-original variables, make sure their values are computed in a type
2629 that does not invoke undefined behavior on overflows (since in general,
2630 we cannot prove that these induction variables are non-wrapping). */
2631 if (pos
!= IP_ORIGINAL
)
2633 orig_type
= TREE_TYPE (base
);
2634 type
= generic_type_for (orig_type
);
2635 if (type
!= orig_type
)
2637 base
= fold_convert (type
, base
);
2638 step
= fold_convert (type
, step
);
2642 for (i
= 0; i
< n_iv_cands (data
); i
++)
2644 cand
= iv_cand (data
, i
);
2646 if (cand
->pos
!= pos
)
2649 if (cand
->incremented_at
!= incremented_at
2650 || ((pos
== IP_AFTER_USE
|| pos
== IP_BEFORE_USE
)
2651 && cand
->ainc_use
!= use
))
2665 if (operand_equal_p (base
, cand
->iv
->base
, 0)
2666 && operand_equal_p (step
, cand
->iv
->step
, 0)
2667 && (TYPE_PRECISION (TREE_TYPE (base
))
2668 == TYPE_PRECISION (TREE_TYPE (cand
->iv
->base
))))
2672 if (i
== n_iv_cands (data
))
2674 cand
= XCNEW (struct iv_cand
);
2680 cand
->iv
= alloc_iv (base
, step
);
2683 if (pos
!= IP_ORIGINAL
&& cand
->iv
)
2685 cand
->var_before
= create_tmp_var_raw (TREE_TYPE (base
), "ivtmp");
2686 cand
->var_after
= cand
->var_before
;
2688 cand
->important
= important
;
2689 cand
->incremented_at
= incremented_at
;
2690 data
->iv_candidates
.safe_push (cand
);
2693 && TREE_CODE (step
) != INTEGER_CST
)
2695 fd_ivopts_data
= data
;
2696 walk_tree (&step
, find_depends
, &cand
->depends_on
, NULL
);
2699 if (pos
== IP_AFTER_USE
|| pos
== IP_BEFORE_USE
)
2700 cand
->ainc_use
= use
;
2702 cand
->ainc_use
= NULL
;
2704 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2705 dump_cand (dump_file
, cand
);
2708 if (important
&& !cand
->important
)
2710 cand
->important
= true;
2711 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2712 fprintf (dump_file
, "Candidate %d is important\n", cand
->id
);
2717 bitmap_set_bit (use
->related_cands
, i
);
2718 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2719 fprintf (dump_file
, "Candidate %d is related to use %d\n",
2726 /* Returns true if incrementing the induction variable at the end of the LOOP
2729 The purpose is to avoid splitting latch edge with a biv increment, thus
2730 creating a jump, possibly confusing other optimization passes and leaving
2731 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2732 is not available (so we do not have a better alternative), or if the latch
2733 edge is already nonempty. */
2736 allow_ip_end_pos_p (struct loop
*loop
)
2738 if (!ip_normal_pos (loop
))
2741 if (!empty_block_p (ip_end_pos (loop
)))
2747 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2748 Important field is set to IMPORTANT. */
2751 add_autoinc_candidates (struct ivopts_data
*data
, tree base
, tree step
,
2752 bool important
, struct iv_use
*use
)
2754 basic_block use_bb
= gimple_bb (use
->stmt
);
2755 machine_mode mem_mode
;
2756 unsigned HOST_WIDE_INT cstepi
;
2758 /* If we insert the increment in any position other than the standard
2759 ones, we must ensure that it is incremented once per iteration.
2760 It must not be in an inner nested loop, or one side of an if
2762 if (use_bb
->loop_father
!= data
->current_loop
2763 || !dominated_by_p (CDI_DOMINATORS
, data
->current_loop
->latch
, use_bb
)
2764 || stmt_could_throw_p (use
->stmt
)
2765 || !cst_and_fits_in_hwi (step
))
2768 cstepi
= int_cst_value (step
);
2770 mem_mode
= TYPE_MODE (TREE_TYPE (*use
->op_p
));
2771 if (((USE_LOAD_PRE_INCREMENT (mem_mode
)
2772 || USE_STORE_PRE_INCREMENT (mem_mode
))
2773 && GET_MODE_SIZE (mem_mode
) == cstepi
)
2774 || ((USE_LOAD_PRE_DECREMENT (mem_mode
)
2775 || USE_STORE_PRE_DECREMENT (mem_mode
))
2776 && GET_MODE_SIZE (mem_mode
) == -cstepi
))
2778 enum tree_code code
= MINUS_EXPR
;
2780 tree new_step
= step
;
2782 if (POINTER_TYPE_P (TREE_TYPE (base
)))
2784 new_step
= fold_build1 (NEGATE_EXPR
, TREE_TYPE (step
), step
);
2785 code
= POINTER_PLUS_EXPR
;
2788 new_step
= fold_convert (TREE_TYPE (base
), new_step
);
2789 new_base
= fold_build2 (code
, TREE_TYPE (base
), base
, new_step
);
2790 add_candidate_1 (data
, new_base
, step
, important
, IP_BEFORE_USE
, use
,
2793 if (((USE_LOAD_POST_INCREMENT (mem_mode
)
2794 || USE_STORE_POST_INCREMENT (mem_mode
))
2795 && GET_MODE_SIZE (mem_mode
) == cstepi
)
2796 || ((USE_LOAD_POST_DECREMENT (mem_mode
)
2797 || USE_STORE_POST_DECREMENT (mem_mode
))
2798 && GET_MODE_SIZE (mem_mode
) == -cstepi
))
2800 add_candidate_1 (data
, base
, step
, important
, IP_AFTER_USE
, use
,
2805 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2806 position to POS. If USE is not NULL, the candidate is set as related to
2807 it. The candidate computation is scheduled on all available positions. */
2810 add_candidate (struct ivopts_data
*data
,
2811 tree base
, tree step
, bool important
, struct iv_use
*use
)
2813 gcc_assert (use
== NULL
|| use
->sub_id
== 0);
2815 if (ip_normal_pos (data
->current_loop
))
2816 add_candidate_1 (data
, base
, step
, important
, IP_NORMAL
, use
, NULL
);
2817 if (ip_end_pos (data
->current_loop
)
2818 && allow_ip_end_pos_p (data
->current_loop
))
2819 add_candidate_1 (data
, base
, step
, important
, IP_END
, use
, NULL
);
2821 if (use
!= NULL
&& use
->type
== USE_ADDRESS
)
2822 add_autoinc_candidates (data
, base
, step
, important
, use
);
2825 /* Adds standard iv candidates. */
2828 add_standard_iv_candidates (struct ivopts_data
*data
)
2830 add_candidate (data
, integer_zero_node
, integer_one_node
, true, NULL
);
2832 /* The same for a double-integer type if it is still fast enough. */
2834 (long_integer_type_node
) > TYPE_PRECISION (integer_type_node
)
2835 && TYPE_PRECISION (long_integer_type_node
) <= BITS_PER_WORD
)
2836 add_candidate (data
, build_int_cst (long_integer_type_node
, 0),
2837 build_int_cst (long_integer_type_node
, 1), true, NULL
);
2839 /* The same for a double-integer type if it is still fast enough. */
2841 (long_long_integer_type_node
) > TYPE_PRECISION (long_integer_type_node
)
2842 && TYPE_PRECISION (long_long_integer_type_node
) <= BITS_PER_WORD
)
2843 add_candidate (data
, build_int_cst (long_long_integer_type_node
, 0),
2844 build_int_cst (long_long_integer_type_node
, 1), true, NULL
);
2848 /* Adds candidates bases on the old induction variable IV. */
2851 add_old_iv_candidates (struct ivopts_data
*data
, struct iv
*iv
)
2855 struct iv_cand
*cand
;
2857 add_candidate (data
, iv
->base
, iv
->step
, true, NULL
);
2859 /* The same, but with initial value zero. */
2860 if (POINTER_TYPE_P (TREE_TYPE (iv
->base
)))
2861 add_candidate (data
, size_int (0), iv
->step
, true, NULL
);
2863 add_candidate (data
, build_int_cst (TREE_TYPE (iv
->base
), 0),
2864 iv
->step
, true, NULL
);
2866 phi
= SSA_NAME_DEF_STMT (iv
->ssa_name
);
2867 if (gimple_code (phi
) == GIMPLE_PHI
)
2869 /* Additionally record the possibility of leaving the original iv
2871 def
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_latch_edge (data
->current_loop
));
2872 /* Don't add candidate if it's from another PHI node because
2873 it's an affine iv appearing in the form of PEELED_CHREC. */
2874 phi
= SSA_NAME_DEF_STMT (def
);
2875 if (gimple_code (phi
) != GIMPLE_PHI
)
2877 cand
= add_candidate_1 (data
,
2878 iv
->base
, iv
->step
, true, IP_ORIGINAL
, NULL
,
2879 SSA_NAME_DEF_STMT (def
));
2880 cand
->var_before
= iv
->ssa_name
;
2881 cand
->var_after
= def
;
2884 gcc_assert (gimple_bb (phi
) == data
->current_loop
->header
);
2888 /* Adds candidates based on the old induction variables. */
2891 add_old_ivs_candidates (struct ivopts_data
*data
)
2897 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
2899 iv
= ver_info (data
, i
)->iv
;
2900 if (iv
&& iv
->biv_p
&& !integer_zerop (iv
->step
))
2901 add_old_iv_candidates (data
, iv
);
2905 /* Adds candidates based on the value of the induction variable IV and USE. */
2908 add_iv_value_candidates (struct ivopts_data
*data
,
2909 struct iv
*iv
, struct iv_use
*use
)
2911 unsigned HOST_WIDE_INT offset
;
2915 add_candidate (data
, iv
->base
, iv
->step
, false, use
);
2917 /* The same, but with initial value zero. Make such variable important,
2918 since it is generic enough so that possibly many uses may be based
2920 basetype
= TREE_TYPE (iv
->base
);
2921 if (POINTER_TYPE_P (basetype
))
2922 basetype
= sizetype
;
2923 add_candidate (data
, build_int_cst (basetype
, 0),
2924 iv
->step
, true, use
);
2926 /* Third, try removing the constant offset. Make sure to even
2927 add a candidate for &a[0] vs. (T *)&a. */
2928 base
= strip_offset (iv
->base
, &offset
);
2930 || base
!= iv
->base
)
2931 add_candidate (data
, base
, iv
->step
, false, use
);
2934 /* Adds candidates based on the uses. */
2937 add_derived_ivs_candidates (struct ivopts_data
*data
)
2941 for (i
= 0; i
< n_iv_uses (data
); i
++)
2943 struct iv_use
*use
= iv_use (data
, i
);
2950 case USE_NONLINEAR_EXPR
:
2953 /* Just add the ivs based on the value of the iv used here. */
2954 add_iv_value_candidates (data
, use
->iv
, use
);
2963 /* Record important candidates and add them to related_cands bitmaps
2967 record_important_candidates (struct ivopts_data
*data
)
2972 for (i
= 0; i
< n_iv_cands (data
); i
++)
2974 struct iv_cand
*cand
= iv_cand (data
, i
);
2976 if (cand
->important
)
2977 bitmap_set_bit (data
->important_candidates
, i
);
2980 data
->consider_all_candidates
= (n_iv_cands (data
)
2981 <= CONSIDER_ALL_CANDIDATES_BOUND
);
2983 if (data
->consider_all_candidates
)
2985 /* We will not need "related_cands" bitmaps in this case,
2986 so release them to decrease peak memory consumption. */
2987 for (i
= 0; i
< n_iv_uses (data
); i
++)
2989 use
= iv_use (data
, i
);
2990 BITMAP_FREE (use
->related_cands
);
2995 /* Add important candidates to the related_cands bitmaps. */
2996 for (i
= 0; i
< n_iv_uses (data
); i
++)
2997 bitmap_ior_into (iv_use (data
, i
)->related_cands
,
2998 data
->important_candidates
);
3002 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
3003 If consider_all_candidates is true, we use a two-dimensional array, otherwise
3004 we allocate a simple list to every use. */
3007 alloc_use_cost_map (struct ivopts_data
*data
)
3009 unsigned i
, size
, s
;
3011 for (i
= 0; i
< n_iv_uses (data
); i
++)
3013 struct iv_use
*use
= iv_use (data
, i
);
3015 if (data
->consider_all_candidates
)
3016 size
= n_iv_cands (data
);
3019 s
= bitmap_count_bits (use
->related_cands
);
3021 /* Round up to the power of two, so that moduling by it is fast. */
3022 size
= s
? (1 << ceil_log2 (s
)) : 1;
3025 use
->n_map_members
= size
;
3026 use
->cost_map
= XCNEWVEC (struct cost_pair
, size
);
3030 /* Returns description of computation cost of expression whose runtime
3031 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
3034 new_cost (unsigned runtime
, unsigned complexity
)
3038 cost
.cost
= runtime
;
3039 cost
.complexity
= complexity
;
3044 /* Returns true if COST is infinite. */
3047 infinite_cost_p (comp_cost cost
)
3049 return cost
.cost
== INFTY
;
3052 /* Adds costs COST1 and COST2. */
3055 add_costs (comp_cost cost1
, comp_cost cost2
)
3057 if (infinite_cost_p (cost1
) || infinite_cost_p (cost2
))
3058 return infinite_cost
;
3060 cost1
.cost
+= cost2
.cost
;
3061 cost1
.complexity
+= cost2
.complexity
;
3065 /* Subtracts costs COST1 and COST2. */
3068 sub_costs (comp_cost cost1
, comp_cost cost2
)
3070 cost1
.cost
-= cost2
.cost
;
3071 cost1
.complexity
-= cost2
.complexity
;
3076 /* Returns a negative number if COST1 < COST2, a positive number if
3077 COST1 > COST2, and 0 if COST1 = COST2. */
3080 compare_costs (comp_cost cost1
, comp_cost cost2
)
3082 if (cost1
.cost
== cost2
.cost
)
3083 return cost1
.complexity
- cost2
.complexity
;
3085 return cost1
.cost
- cost2
.cost
;
3088 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
3089 on invariants DEPENDS_ON and that the value used in expressing it
3090 is VALUE, and in case of iv elimination the comparison operator is COMP. */
3093 set_use_iv_cost (struct ivopts_data
*data
,
3094 struct iv_use
*use
, struct iv_cand
*cand
,
3095 comp_cost cost
, bitmap depends_on
, tree value
,
3096 enum tree_code comp
, int inv_expr_id
)
3100 if (infinite_cost_p (cost
))
3102 BITMAP_FREE (depends_on
);
3106 if (data
->consider_all_candidates
)
3108 use
->cost_map
[cand
->id
].cand
= cand
;
3109 use
->cost_map
[cand
->id
].cost
= cost
;
3110 use
->cost_map
[cand
->id
].depends_on
= depends_on
;
3111 use
->cost_map
[cand
->id
].value
= value
;
3112 use
->cost_map
[cand
->id
].comp
= comp
;
3113 use
->cost_map
[cand
->id
].inv_expr_id
= inv_expr_id
;
3117 /* n_map_members is a power of two, so this computes modulo. */
3118 s
= cand
->id
& (use
->n_map_members
- 1);
3119 for (i
= s
; i
< use
->n_map_members
; i
++)
3120 if (!use
->cost_map
[i
].cand
)
3122 for (i
= 0; i
< s
; i
++)
3123 if (!use
->cost_map
[i
].cand
)
3129 use
->cost_map
[i
].cand
= cand
;
3130 use
->cost_map
[i
].cost
= cost
;
3131 use
->cost_map
[i
].depends_on
= depends_on
;
3132 use
->cost_map
[i
].value
= value
;
3133 use
->cost_map
[i
].comp
= comp
;
3134 use
->cost_map
[i
].inv_expr_id
= inv_expr_id
;
3137 /* Gets cost of (USE, CANDIDATE) pair. */
3139 static struct cost_pair
*
3140 get_use_iv_cost (struct ivopts_data
*data
, struct iv_use
*use
,
3141 struct iv_cand
*cand
)
3144 struct cost_pair
*ret
;
3149 if (data
->consider_all_candidates
)
3151 ret
= use
->cost_map
+ cand
->id
;
3158 /* n_map_members is a power of two, so this computes modulo. */
3159 s
= cand
->id
& (use
->n_map_members
- 1);
3160 for (i
= s
; i
< use
->n_map_members
; i
++)
3161 if (use
->cost_map
[i
].cand
== cand
)
3162 return use
->cost_map
+ i
;
3163 else if (use
->cost_map
[i
].cand
== NULL
)
3165 for (i
= 0; i
< s
; i
++)
3166 if (use
->cost_map
[i
].cand
== cand
)
3167 return use
->cost_map
+ i
;
3168 else if (use
->cost_map
[i
].cand
== NULL
)
3174 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
3176 produce_memory_decl_rtl (tree obj
, int *regno
)
3178 addr_space_t as
= TYPE_ADDR_SPACE (TREE_TYPE (obj
));
3179 machine_mode address_mode
= targetm
.addr_space
.address_mode (as
);
3183 if (TREE_STATIC (obj
) || DECL_EXTERNAL (obj
))
3185 const char *name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj
));
3186 x
= gen_rtx_SYMBOL_REF (address_mode
, name
);
3187 SET_SYMBOL_REF_DECL (x
, obj
);
3188 x
= gen_rtx_MEM (DECL_MODE (obj
), x
);
3189 set_mem_addr_space (x
, as
);
3190 targetm
.encode_section_info (obj
, x
, true);
3194 x
= gen_raw_REG (address_mode
, (*regno
)++);
3195 x
= gen_rtx_MEM (DECL_MODE (obj
), x
);
3196 set_mem_addr_space (x
, as
);
3202 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
3203 walk_tree. DATA contains the actual fake register number. */
3206 prepare_decl_rtl (tree
*expr_p
, int *ws
, void *data
)
3208 tree obj
= NULL_TREE
;
3210 int *regno
= (int *) data
;
3212 switch (TREE_CODE (*expr_p
))
3215 for (expr_p
= &TREE_OPERAND (*expr_p
, 0);
3216 handled_component_p (*expr_p
);
3217 expr_p
= &TREE_OPERAND (*expr_p
, 0))
3220 if (DECL_P (obj
) && HAS_RTL_P (obj
) && !DECL_RTL_SET_P (obj
))
3221 x
= produce_memory_decl_rtl (obj
, regno
);
3226 obj
= SSA_NAME_VAR (*expr_p
);
3227 /* Defer handling of anonymous SSA_NAMEs to the expander. */
3230 if (!DECL_RTL_SET_P (obj
))
3231 x
= gen_raw_REG (DECL_MODE (obj
), (*regno
)++);
3240 if (DECL_RTL_SET_P (obj
))
3243 if (DECL_MODE (obj
) == BLKmode
)
3244 x
= produce_memory_decl_rtl (obj
, regno
);
3246 x
= gen_raw_REG (DECL_MODE (obj
), (*regno
)++);
3256 decl_rtl_to_reset
.safe_push (obj
);
3257 SET_DECL_RTL (obj
, x
);
3263 /* Determines cost of the computation of EXPR. */
3266 computation_cost (tree expr
, bool speed
)
3270 tree type
= TREE_TYPE (expr
);
3272 /* Avoid using hard regs in ways which may be unsupported. */
3273 int regno
= LAST_VIRTUAL_REGISTER
+ 1;
3274 struct cgraph_node
*node
= cgraph_node::get (current_function_decl
);
3275 enum node_frequency real_frequency
= node
->frequency
;
3277 node
->frequency
= NODE_FREQUENCY_NORMAL
;
3278 crtl
->maybe_hot_insn_p
= speed
;
3279 walk_tree (&expr
, prepare_decl_rtl
, ®no
, NULL
);
3281 rslt
= expand_expr (expr
, NULL_RTX
, TYPE_MODE (type
), EXPAND_NORMAL
);
3284 default_rtl_profile ();
3285 node
->frequency
= real_frequency
;
3287 cost
= seq_cost (seq
, speed
);
3289 cost
+= address_cost (XEXP (rslt
, 0), TYPE_MODE (type
),
3290 TYPE_ADDR_SPACE (type
), speed
);
3291 else if (!REG_P (rslt
))
3292 cost
+= set_src_cost (rslt
, speed
);
3297 /* Returns variable containing the value of candidate CAND at statement AT. */
3300 var_at_stmt (struct loop
*loop
, struct iv_cand
*cand
, gimple stmt
)
3302 if (stmt_after_increment (loop
, cand
, stmt
))
3303 return cand
->var_after
;
3305 return cand
->var_before
;
3308 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3309 same precision that is at least as wide as the precision of TYPE, stores
3310 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
3314 determine_common_wider_type (tree
*a
, tree
*b
)
3316 tree wider_type
= NULL
;
3318 tree atype
= TREE_TYPE (*a
);
3320 if (CONVERT_EXPR_P (*a
))
3322 suba
= TREE_OPERAND (*a
, 0);
3323 wider_type
= TREE_TYPE (suba
);
3324 if (TYPE_PRECISION (wider_type
) < TYPE_PRECISION (atype
))
3330 if (CONVERT_EXPR_P (*b
))
3332 subb
= TREE_OPERAND (*b
, 0);
3333 if (TYPE_PRECISION (wider_type
) != TYPE_PRECISION (TREE_TYPE (subb
)))
3344 /* Determines the expression by that USE is expressed from induction variable
3345 CAND at statement AT in LOOP. The expression is stored in a decomposed
3346 form into AFF. Returns false if USE cannot be expressed using CAND. */
3349 get_computation_aff (struct loop
*loop
,
3350 struct iv_use
*use
, struct iv_cand
*cand
, gimple at
,
3351 struct aff_tree
*aff
)
3353 tree ubase
= use
->iv
->base
;
3354 tree ustep
= use
->iv
->step
;
3355 tree cbase
= cand
->iv
->base
;
3356 tree cstep
= cand
->iv
->step
, cstep_common
;
3357 tree utype
= TREE_TYPE (ubase
), ctype
= TREE_TYPE (cbase
);
3358 tree common_type
, var
;
3360 aff_tree cbase_aff
, var_aff
;
3363 if (TYPE_PRECISION (utype
) > TYPE_PRECISION (ctype
))
3365 /* We do not have a precision to express the values of use. */
3369 var
= var_at_stmt (loop
, cand
, at
);
3370 uutype
= unsigned_type_for (utype
);
3372 /* If the conversion is not noop, perform it. */
3373 if (TYPE_PRECISION (utype
) < TYPE_PRECISION (ctype
))
3375 cstep
= fold_convert (uutype
, cstep
);
3376 cbase
= fold_convert (uutype
, cbase
);
3377 var
= fold_convert (uutype
, var
);
3380 if (!constant_multiple_of (ustep
, cstep
, &rat
))
3383 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3384 type, we achieve better folding by computing their difference in this
3385 wider type, and cast the result to UUTYPE. We do not need to worry about
3386 overflows, as all the arithmetics will in the end be performed in UUTYPE
3388 common_type
= determine_common_wider_type (&ubase
, &cbase
);
3390 /* use = ubase - ratio * cbase + ratio * var. */
3391 tree_to_aff_combination (ubase
, common_type
, aff
);
3392 tree_to_aff_combination (cbase
, common_type
, &cbase_aff
);
3393 tree_to_aff_combination (var
, uutype
, &var_aff
);
3395 /* We need to shift the value if we are after the increment. */
3396 if (stmt_after_increment (loop
, cand
, at
))
3400 if (common_type
!= uutype
)
3401 cstep_common
= fold_convert (common_type
, cstep
);
3403 cstep_common
= cstep
;
3405 tree_to_aff_combination (cstep_common
, common_type
, &cstep_aff
);
3406 aff_combination_add (&cbase_aff
, &cstep_aff
);
3409 aff_combination_scale (&cbase_aff
, -rat
);
3410 aff_combination_add (aff
, &cbase_aff
);
3411 if (common_type
!= uutype
)
3412 aff_combination_convert (aff
, uutype
);
3414 aff_combination_scale (&var_aff
, rat
);
3415 aff_combination_add (aff
, &var_aff
);
3420 /* Return the type of USE. */
3423 get_use_type (struct iv_use
*use
)
3425 tree base_type
= TREE_TYPE (use
->iv
->base
);
3428 if (use
->type
== USE_ADDRESS
)
3430 /* The base_type may be a void pointer. Create a pointer type based on
3431 the mem_ref instead. */
3432 type
= build_pointer_type (TREE_TYPE (*use
->op_p
));
3433 gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type
))
3434 == TYPE_ADDR_SPACE (TREE_TYPE (base_type
)));
3442 /* Determines the expression by that USE is expressed from induction variable
3443 CAND at statement AT in LOOP. The computation is unshared. */
3446 get_computation_at (struct loop
*loop
,
3447 struct iv_use
*use
, struct iv_cand
*cand
, gimple at
)
3450 tree type
= get_use_type (use
);
3452 if (!get_computation_aff (loop
, use
, cand
, at
, &aff
))
3454 unshare_aff_combination (&aff
);
3455 return fold_convert (type
, aff_combination_to_tree (&aff
));
3458 /* Determines the expression by that USE is expressed from induction variable
3459 CAND in LOOP. The computation is unshared. */
3462 get_computation (struct loop
*loop
, struct iv_use
*use
, struct iv_cand
*cand
)
3464 return get_computation_at (loop
, use
, cand
, use
->stmt
);
3467 /* Adjust the cost COST for being in loop setup rather than loop body.
3468 If we're optimizing for space, the loop setup overhead is constant;
3469 if we're optimizing for speed, amortize it over the per-iteration cost. */
3471 adjust_setup_cost (struct ivopts_data
*data
, unsigned cost
)
3475 else if (optimize_loop_for_speed_p (data
->current_loop
))
3476 return cost
/ avg_loop_niter (data
->current_loop
);
3481 /* Returns true if multiplying by RATIO is allowed in an address. Test the
3482 validity for a memory reference accessing memory of mode MODE in
3483 address space AS. */
3487 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio
, machine_mode mode
,
3490 #define MAX_RATIO 128
3491 unsigned int data_index
= (int) as
* MAX_MACHINE_MODE
+ (int) mode
;
3492 static vec
<sbitmap
> valid_mult_list
;
3495 if (data_index
>= valid_mult_list
.length ())
3496 valid_mult_list
.safe_grow_cleared (data_index
+ 1);
3498 valid_mult
= valid_mult_list
[data_index
];
3501 machine_mode address_mode
= targetm
.addr_space
.address_mode (as
);
3502 rtx reg1
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 1);
3503 rtx reg2
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 2);
3507 valid_mult
= sbitmap_alloc (2 * MAX_RATIO
+ 1);
3508 bitmap_clear (valid_mult
);
3509 scaled
= gen_rtx_fmt_ee (MULT
, address_mode
, reg1
, NULL_RTX
);
3510 addr
= gen_rtx_fmt_ee (PLUS
, address_mode
, scaled
, reg2
);
3511 for (i
= -MAX_RATIO
; i
<= MAX_RATIO
; i
++)
3513 XEXP (scaled
, 1) = gen_int_mode (i
, address_mode
);
3514 if (memory_address_addr_space_p (mode
, addr
, as
)
3515 || memory_address_addr_space_p (mode
, scaled
, as
))
3516 bitmap_set_bit (valid_mult
, i
+ MAX_RATIO
);
3519 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3521 fprintf (dump_file
, " allowed multipliers:");
3522 for (i
= -MAX_RATIO
; i
<= MAX_RATIO
; i
++)
3523 if (bitmap_bit_p (valid_mult
, i
+ MAX_RATIO
))
3524 fprintf (dump_file
, " %d", (int) i
);
3525 fprintf (dump_file
, "\n");
3526 fprintf (dump_file
, "\n");
3529 valid_mult_list
[data_index
] = valid_mult
;
3532 if (ratio
> MAX_RATIO
|| ratio
< -MAX_RATIO
)
3535 return bitmap_bit_p (valid_mult
, ratio
+ MAX_RATIO
);
3538 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3539 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3540 variable is omitted. Compute the cost for a memory reference that accesses
3541 a memory location of mode MEM_MODE in address space AS.
3543 MAY_AUTOINC is set to true if the autoincrement (increasing index by
3544 size of MEM_MODE / RATIO) is available. To make this determination, we
3545 look at the size of the increment to be made, which is given in CSTEP.
3546 CSTEP may be zero if the step is unknown.
3547 STMT_AFTER_INC is true iff the statement we're looking at is after the
3548 increment of the original biv.
3550 TODO -- there must be some better way. This all is quite crude. */
3554 AINC_PRE_INC
, /* Pre increment. */
3555 AINC_PRE_DEC
, /* Pre decrement. */
3556 AINC_POST_INC
, /* Post increment. */
3557 AINC_POST_DEC
, /* Post decrement. */
3558 AINC_NONE
/* Also the number of auto increment types. */
3561 typedef struct address_cost_data_s
3563 HOST_WIDE_INT min_offset
, max_offset
;
3564 unsigned costs
[2][2][2][2];
3565 unsigned ainc_costs
[AINC_NONE
];
3566 } *address_cost_data
;
3570 get_address_cost (bool symbol_present
, bool var_present
,
3571 unsigned HOST_WIDE_INT offset
, HOST_WIDE_INT ratio
,
3572 HOST_WIDE_INT cstep
, machine_mode mem_mode
,
3573 addr_space_t as
, bool speed
,
3574 bool stmt_after_inc
, bool *may_autoinc
)
3576 machine_mode address_mode
= targetm
.addr_space
.address_mode (as
);
3577 static vec
<address_cost_data
> address_cost_data_list
;
3578 unsigned int data_index
= (int) as
* MAX_MACHINE_MODE
+ (int) mem_mode
;
3579 address_cost_data data
;
3580 static bool has_preinc
[MAX_MACHINE_MODE
], has_postinc
[MAX_MACHINE_MODE
];
3581 static bool has_predec
[MAX_MACHINE_MODE
], has_postdec
[MAX_MACHINE_MODE
];
3582 unsigned cost
, acost
, complexity
;
3583 enum ainc_type autoinc_type
;
3584 bool offset_p
, ratio_p
, autoinc
;
3585 HOST_WIDE_INT s_offset
, autoinc_offset
, msize
;
3586 unsigned HOST_WIDE_INT mask
;
3589 if (data_index
>= address_cost_data_list
.length ())
3590 address_cost_data_list
.safe_grow_cleared (data_index
+ 1);
3592 data
= address_cost_data_list
[data_index
];
3596 HOST_WIDE_INT rat
, off
= 0;
3597 int old_cse_not_expected
, width
;
3598 unsigned sym_p
, var_p
, off_p
, rat_p
, add_c
;
3603 data
= (address_cost_data
) xcalloc (1, sizeof (*data
));
3605 reg1
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 1);
3607 width
= GET_MODE_BITSIZE (address_mode
) - 1;
3608 if (width
> (HOST_BITS_PER_WIDE_INT
- 1))
3609 width
= HOST_BITS_PER_WIDE_INT
- 1;
3610 addr
= gen_rtx_fmt_ee (PLUS
, address_mode
, reg1
, NULL_RTX
);
3612 for (i
= width
; i
>= 0; i
--)
3614 off
= -((unsigned HOST_WIDE_INT
) 1 << i
);
3615 XEXP (addr
, 1) = gen_int_mode (off
, address_mode
);
3616 if (memory_address_addr_space_p (mem_mode
, addr
, as
))
3619 data
->min_offset
= (i
== -1? 0 : off
);
3621 for (i
= width
; i
>= 0; i
--)
3623 off
= ((unsigned HOST_WIDE_INT
) 1 << i
) - 1;
3624 XEXP (addr
, 1) = gen_int_mode (off
, address_mode
);
3625 if (memory_address_addr_space_p (mem_mode
, addr
, as
))
3627 /* For some strict-alignment targets, the offset must be naturally
3628 aligned. Try an aligned offset if mem_mode is not QImode. */
3629 off
= mem_mode
!= QImode
3630 ? ((unsigned HOST_WIDE_INT
) 1 << i
)
3631 - GET_MODE_SIZE (mem_mode
)
3635 XEXP (addr
, 1) = gen_int_mode (off
, address_mode
);
3636 if (memory_address_addr_space_p (mem_mode
, addr
, as
))
3642 data
->max_offset
= off
;
3644 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3646 fprintf (dump_file
, "get_address_cost:\n");
3647 fprintf (dump_file
, " min offset %s " HOST_WIDE_INT_PRINT_DEC
"\n",
3648 GET_MODE_NAME (mem_mode
),
3650 fprintf (dump_file
, " max offset %s " HOST_WIDE_INT_PRINT_DEC
"\n",
3651 GET_MODE_NAME (mem_mode
),
3656 for (i
= 2; i
<= MAX_RATIO
; i
++)
3657 if (multiplier_allowed_in_address_p (i
, mem_mode
, as
))
3663 /* Compute the cost of various addressing modes. */
3665 reg0
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 1);
3666 reg1
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 2);
3668 if (USE_LOAD_PRE_DECREMENT (mem_mode
)
3669 || USE_STORE_PRE_DECREMENT (mem_mode
))
3671 addr
= gen_rtx_PRE_DEC (address_mode
, reg0
);
3672 has_predec
[mem_mode
]
3673 = memory_address_addr_space_p (mem_mode
, addr
, as
);
3675 if (has_predec
[mem_mode
])
3676 data
->ainc_costs
[AINC_PRE_DEC
]
3677 = address_cost (addr
, mem_mode
, as
, speed
);
3679 if (USE_LOAD_POST_DECREMENT (mem_mode
)
3680 || USE_STORE_POST_DECREMENT (mem_mode
))
3682 addr
= gen_rtx_POST_DEC (address_mode
, reg0
);
3683 has_postdec
[mem_mode
]
3684 = memory_address_addr_space_p (mem_mode
, addr
, as
);
3686 if (has_postdec
[mem_mode
])
3687 data
->ainc_costs
[AINC_POST_DEC
]
3688 = address_cost (addr
, mem_mode
, as
, speed
);
3690 if (USE_LOAD_PRE_INCREMENT (mem_mode
)
3691 || USE_STORE_PRE_DECREMENT (mem_mode
))
3693 addr
= gen_rtx_PRE_INC (address_mode
, reg0
);
3694 has_preinc
[mem_mode
]
3695 = memory_address_addr_space_p (mem_mode
, addr
, as
);
3697 if (has_preinc
[mem_mode
])
3698 data
->ainc_costs
[AINC_PRE_INC
]
3699 = address_cost (addr
, mem_mode
, as
, speed
);
3701 if (USE_LOAD_POST_INCREMENT (mem_mode
)
3702 || USE_STORE_POST_INCREMENT (mem_mode
))
3704 addr
= gen_rtx_POST_INC (address_mode
, reg0
);
3705 has_postinc
[mem_mode
]
3706 = memory_address_addr_space_p (mem_mode
, addr
, as
);
3708 if (has_postinc
[mem_mode
])
3709 data
->ainc_costs
[AINC_POST_INC
]
3710 = address_cost (addr
, mem_mode
, as
, speed
);
3712 for (i
= 0; i
< 16; i
++)
3715 var_p
= (i
>> 1) & 1;
3716 off_p
= (i
>> 2) & 1;
3717 rat_p
= (i
>> 3) & 1;
3721 addr
= gen_rtx_fmt_ee (MULT
, address_mode
, addr
,
3722 gen_int_mode (rat
, address_mode
));
3725 addr
= gen_rtx_fmt_ee (PLUS
, address_mode
, addr
, reg1
);
3729 base
= gen_rtx_SYMBOL_REF (address_mode
, ggc_strdup (""));
3730 /* ??? We can run into trouble with some backends by presenting
3731 it with symbols which haven't been properly passed through
3732 targetm.encode_section_info. By setting the local bit, we
3733 enhance the probability of things working. */
3734 SYMBOL_REF_FLAGS (base
) = SYMBOL_FLAG_LOCAL
;
3737 base
= gen_rtx_fmt_e (CONST
, address_mode
,
3739 (PLUS
, address_mode
, base
,
3740 gen_int_mode (off
, address_mode
)));
3743 base
= gen_int_mode (off
, address_mode
);
3748 addr
= gen_rtx_fmt_ee (PLUS
, address_mode
, addr
, base
);
3751 /* To avoid splitting addressing modes, pretend that no cse will
3753 old_cse_not_expected
= cse_not_expected
;
3754 cse_not_expected
= true;
3755 addr
= memory_address_addr_space (mem_mode
, addr
, as
);
3756 cse_not_expected
= old_cse_not_expected
;
3760 acost
= seq_cost (seq
, speed
);
3761 acost
+= address_cost (addr
, mem_mode
, as
, speed
);
3765 data
->costs
[sym_p
][var_p
][off_p
][rat_p
] = acost
;
3768 /* On some targets, it is quite expensive to load symbol to a register,
3769 which makes addresses that contain symbols look much more expensive.
3770 However, the symbol will have to be loaded in any case before the
3771 loop (and quite likely we have it in register already), so it does not
3772 make much sense to penalize them too heavily. So make some final
3773 tweaks for the SYMBOL_PRESENT modes:
3775 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3776 var is cheaper, use this mode with small penalty.
3777 If VAR_PRESENT is true, try whether the mode with
3778 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3779 if this is the case, use it. */
3780 add_c
= add_cost (speed
, address_mode
);
3781 for (i
= 0; i
< 8; i
++)
3784 off_p
= (i
>> 1) & 1;
3785 rat_p
= (i
>> 2) & 1;
3787 acost
= data
->costs
[0][1][off_p
][rat_p
] + 1;
3791 if (acost
< data
->costs
[1][var_p
][off_p
][rat_p
])
3792 data
->costs
[1][var_p
][off_p
][rat_p
] = acost
;
3795 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3797 fprintf (dump_file
, "Address costs:\n");
3799 for (i
= 0; i
< 16; i
++)
3802 var_p
= (i
>> 1) & 1;
3803 off_p
= (i
>> 2) & 1;
3804 rat_p
= (i
>> 3) & 1;
3806 fprintf (dump_file
, " ");
3808 fprintf (dump_file
, "sym + ");
3810 fprintf (dump_file
, "var + ");
3812 fprintf (dump_file
, "cst + ");
3814 fprintf (dump_file
, "rat * ");
3816 acost
= data
->costs
[sym_p
][var_p
][off_p
][rat_p
];
3817 fprintf (dump_file
, "index costs %d\n", acost
);
3819 if (has_predec
[mem_mode
] || has_postdec
[mem_mode
]
3820 || has_preinc
[mem_mode
] || has_postinc
[mem_mode
])
3821 fprintf (dump_file
, " May include autoinc/dec\n");
3822 fprintf (dump_file
, "\n");
3825 address_cost_data_list
[data_index
] = data
;
3828 bits
= GET_MODE_BITSIZE (address_mode
);
3829 mask
= ~(~(unsigned HOST_WIDE_INT
) 0 << (bits
- 1) << 1);
3831 if ((offset
>> (bits
- 1) & 1))
3836 autoinc_type
= AINC_NONE
;
3837 msize
= GET_MODE_SIZE (mem_mode
);
3838 autoinc_offset
= offset
;
3840 autoinc_offset
+= ratio
* cstep
;
3841 if (symbol_present
|| var_present
|| ratio
!= 1)
3845 if (has_postinc
[mem_mode
] && autoinc_offset
== 0
3847 autoinc_type
= AINC_POST_INC
;
3848 else if (has_postdec
[mem_mode
] && autoinc_offset
== 0
3850 autoinc_type
= AINC_POST_DEC
;
3851 else if (has_preinc
[mem_mode
] && autoinc_offset
== msize
3853 autoinc_type
= AINC_PRE_INC
;
3854 else if (has_predec
[mem_mode
] && autoinc_offset
== -msize
3856 autoinc_type
= AINC_PRE_DEC
;
3858 if (autoinc_type
!= AINC_NONE
)
3863 offset_p
= (s_offset
!= 0
3864 && data
->min_offset
<= s_offset
3865 && s_offset
<= data
->max_offset
);
3866 ratio_p
= (ratio
!= 1
3867 && multiplier_allowed_in_address_p (ratio
, mem_mode
, as
));
3869 if (ratio
!= 1 && !ratio_p
)
3870 cost
+= mult_by_coeff_cost (ratio
, address_mode
, speed
);
3872 if (s_offset
&& !offset_p
&& !symbol_present
)
3873 cost
+= add_cost (speed
, address_mode
);
3876 *may_autoinc
= autoinc
;
3878 acost
= data
->ainc_costs
[autoinc_type
];
3880 acost
= data
->costs
[symbol_present
][var_present
][offset_p
][ratio_p
];
3881 complexity
= (symbol_present
!= 0) + (var_present
!= 0) + offset_p
+ ratio_p
;
3882 return new_cost (cost
+ acost
, complexity
);
3885 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE. MULT is the
3886 the EXPR operand holding the shift. COST0 and COST1 are the costs for
3887 calculating the operands of EXPR. Returns true if successful, and returns
3888 the cost in COST. */
3891 get_shiftadd_cost (tree expr
, machine_mode mode
, comp_cost cost0
,
3892 comp_cost cost1
, tree mult
, bool speed
, comp_cost
*cost
)
3895 tree op1
= TREE_OPERAND (expr
, 1);
3896 tree cst
= TREE_OPERAND (mult
, 1);
3897 tree multop
= TREE_OPERAND (mult
, 0);
3898 int m
= exact_log2 (int_cst_value (cst
));
3899 int maxm
= MIN (BITS_PER_WORD
, GET_MODE_BITSIZE (mode
));
3900 int as_cost
, sa_cost
;
3903 if (!(m
>= 0 && m
< maxm
))
3906 mult_in_op1
= operand_equal_p (op1
, mult
, 0);
3908 as_cost
= add_cost (speed
, mode
) + shift_cost (speed
, mode
, m
);
3910 /* If the target has a cheap shift-and-add or shift-and-sub instruction,
3911 use that in preference to a shift insn followed by an add insn. */
3912 sa_cost
= (TREE_CODE (expr
) != MINUS_EXPR
3913 ? shiftadd_cost (speed
, mode
, m
)
3915 ? shiftsub1_cost (speed
, mode
, m
)
3916 : shiftsub0_cost (speed
, mode
, m
)));
3918 res
= new_cost (MIN (as_cost
, sa_cost
), 0);
3919 res
= add_costs (res
, mult_in_op1
? cost0
: cost1
);
3921 STRIP_NOPS (multop
);
3922 if (!is_gimple_val (multop
))
3923 res
= add_costs (res
, force_expr_to_var_cost (multop
, speed
));
3929 /* Estimates cost of forcing expression EXPR into a variable. */
3932 force_expr_to_var_cost (tree expr
, bool speed
)
3934 static bool costs_initialized
= false;
3935 static unsigned integer_cost
[2];
3936 static unsigned symbol_cost
[2];
3937 static unsigned address_cost
[2];
3939 comp_cost cost0
, cost1
, cost
;
3942 if (!costs_initialized
)
3944 tree type
= build_pointer_type (integer_type_node
);
3949 var
= create_tmp_var_raw (integer_type_node
, "test_var");
3950 TREE_STATIC (var
) = 1;
3951 x
= produce_memory_decl_rtl (var
, NULL
);
3952 SET_DECL_RTL (var
, x
);
3954 addr
= build1 (ADDR_EXPR
, type
, var
);
3957 for (i
= 0; i
< 2; i
++)
3959 integer_cost
[i
] = computation_cost (build_int_cst (integer_type_node
,
3962 symbol_cost
[i
] = computation_cost (addr
, i
) + 1;
3965 = computation_cost (fold_build_pointer_plus_hwi (addr
, 2000), i
) + 1;
3966 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3968 fprintf (dump_file
, "force_expr_to_var_cost %s costs:\n", i
? "speed" : "size");
3969 fprintf (dump_file
, " integer %d\n", (int) integer_cost
[i
]);
3970 fprintf (dump_file
, " symbol %d\n", (int) symbol_cost
[i
]);
3971 fprintf (dump_file
, " address %d\n", (int) address_cost
[i
]);
3972 fprintf (dump_file
, " other %d\n", (int) target_spill_cost
[i
]);
3973 fprintf (dump_file
, "\n");
3977 costs_initialized
= true;
3982 if (SSA_VAR_P (expr
))
3985 if (is_gimple_min_invariant (expr
))
3987 if (TREE_CODE (expr
) == INTEGER_CST
)
3988 return new_cost (integer_cost
[speed
], 0);
3990 if (TREE_CODE (expr
) == ADDR_EXPR
)
3992 tree obj
= TREE_OPERAND (expr
, 0);
3994 if (TREE_CODE (obj
) == VAR_DECL
3995 || TREE_CODE (obj
) == PARM_DECL
3996 || TREE_CODE (obj
) == RESULT_DECL
)
3997 return new_cost (symbol_cost
[speed
], 0);
4000 return new_cost (address_cost
[speed
], 0);
4003 switch (TREE_CODE (expr
))
4005 case POINTER_PLUS_EXPR
:
4009 op0
= TREE_OPERAND (expr
, 0);
4010 op1
= TREE_OPERAND (expr
, 1);
4017 op0
= TREE_OPERAND (expr
, 0);
4023 /* Just an arbitrary value, FIXME. */
4024 return new_cost (target_spill_cost
[speed
], 0);
4027 if (op0
== NULL_TREE
4028 || TREE_CODE (op0
) == SSA_NAME
|| CONSTANT_CLASS_P (op0
))
4031 cost0
= force_expr_to_var_cost (op0
, speed
);
4033 if (op1
== NULL_TREE
4034 || TREE_CODE (op1
) == SSA_NAME
|| CONSTANT_CLASS_P (op1
))
4037 cost1
= force_expr_to_var_cost (op1
, speed
);
4039 mode
= TYPE_MODE (TREE_TYPE (expr
));
4040 switch (TREE_CODE (expr
))
4042 case POINTER_PLUS_EXPR
:
4046 cost
= new_cost (add_cost (speed
, mode
), 0);
4047 if (TREE_CODE (expr
) != NEGATE_EXPR
)
4049 tree mult
= NULL_TREE
;
4051 if (TREE_CODE (op1
) == MULT_EXPR
)
4053 else if (TREE_CODE (op0
) == MULT_EXPR
)
4056 if (mult
!= NULL_TREE
4057 && cst_and_fits_in_hwi (TREE_OPERAND (mult
, 1))
4058 && get_shiftadd_cost (expr
, mode
, cost0
, cost1
, mult
,
4066 tree inner_mode
, outer_mode
;
4067 outer_mode
= TREE_TYPE (expr
);
4068 inner_mode
= TREE_TYPE (op0
);
4069 cost
= new_cost (convert_cost (TYPE_MODE (outer_mode
),
4070 TYPE_MODE (inner_mode
), speed
), 0);
4075 if (cst_and_fits_in_hwi (op0
))
4076 cost
= new_cost (mult_by_coeff_cost (int_cst_value (op0
),
4078 else if (cst_and_fits_in_hwi (op1
))
4079 cost
= new_cost (mult_by_coeff_cost (int_cst_value (op1
),
4082 return new_cost (target_spill_cost
[speed
], 0);
4089 cost
= add_costs (cost
, cost0
);
4090 cost
= add_costs (cost
, cost1
);
4092 /* Bound the cost by target_spill_cost. The parts of complicated
4093 computations often are either loop invariant or at least can
4094 be shared between several iv uses, so letting this grow without
4095 limits would not give reasonable results. */
4096 if (cost
.cost
> (int) target_spill_cost
[speed
])
4097 cost
.cost
= target_spill_cost
[speed
];
4102 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
4103 invariants the computation depends on. */
4106 force_var_cost (struct ivopts_data
*data
,
4107 tree expr
, bitmap
*depends_on
)
4111 fd_ivopts_data
= data
;
4112 walk_tree (&expr
, find_depends
, depends_on
, NULL
);
4115 return force_expr_to_var_cost (expr
, data
->speed
);
4118 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
4119 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
4120 to false if the corresponding part is missing. DEPENDS_ON is a set of the
4121 invariants the computation depends on. */
4124 split_address_cost (struct ivopts_data
*data
,
4125 tree addr
, bool *symbol_present
, bool *var_present
,
4126 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
4129 HOST_WIDE_INT bitsize
;
4130 HOST_WIDE_INT bitpos
;
4133 int unsignedp
, volatilep
;
4135 core
= get_inner_reference (addr
, &bitsize
, &bitpos
, &toffset
, &mode
,
4136 &unsignedp
, &volatilep
, false);
4139 || bitpos
% BITS_PER_UNIT
!= 0
4140 || TREE_CODE (core
) != VAR_DECL
)
4142 *symbol_present
= false;
4143 *var_present
= true;
4144 fd_ivopts_data
= data
;
4145 walk_tree (&addr
, find_depends
, depends_on
, NULL
);
4146 return new_cost (target_spill_cost
[data
->speed
], 0);
4149 *offset
+= bitpos
/ BITS_PER_UNIT
;
4150 if (TREE_STATIC (core
)
4151 || DECL_EXTERNAL (core
))
4153 *symbol_present
= true;
4154 *var_present
= false;
4158 *symbol_present
= false;
4159 *var_present
= true;
4163 /* Estimates cost of expressing difference of addresses E1 - E2 as
4164 var + symbol + offset. The value of offset is added to OFFSET,
4165 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
4166 part is missing. DEPENDS_ON is a set of the invariants the computation
4170 ptr_difference_cost (struct ivopts_data
*data
,
4171 tree e1
, tree e2
, bool *symbol_present
, bool *var_present
,
4172 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
4174 HOST_WIDE_INT diff
= 0;
4175 aff_tree aff_e1
, aff_e2
;
4178 gcc_assert (TREE_CODE (e1
) == ADDR_EXPR
);
4180 if (ptr_difference_const (e1
, e2
, &diff
))
4183 *symbol_present
= false;
4184 *var_present
= false;
4188 if (integer_zerop (e2
))
4189 return split_address_cost (data
, TREE_OPERAND (e1
, 0),
4190 symbol_present
, var_present
, offset
, depends_on
);
4192 *symbol_present
= false;
4193 *var_present
= true;
4195 type
= signed_type_for (TREE_TYPE (e1
));
4196 tree_to_aff_combination (e1
, type
, &aff_e1
);
4197 tree_to_aff_combination (e2
, type
, &aff_e2
);
4198 aff_combination_scale (&aff_e2
, -1);
4199 aff_combination_add (&aff_e1
, &aff_e2
);
4201 return force_var_cost (data
, aff_combination_to_tree (&aff_e1
), depends_on
);
4204 /* Estimates cost of expressing difference E1 - E2 as
4205 var + symbol + offset. The value of offset is added to OFFSET,
4206 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
4207 part is missing. DEPENDS_ON is a set of the invariants the computation
4211 difference_cost (struct ivopts_data
*data
,
4212 tree e1
, tree e2
, bool *symbol_present
, bool *var_present
,
4213 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
4215 machine_mode mode
= TYPE_MODE (TREE_TYPE (e1
));
4216 unsigned HOST_WIDE_INT off1
, off2
;
4217 aff_tree aff_e1
, aff_e2
;
4220 e1
= strip_offset (e1
, &off1
);
4221 e2
= strip_offset (e2
, &off2
);
4222 *offset
+= off1
- off2
;
4227 if (TREE_CODE (e1
) == ADDR_EXPR
)
4228 return ptr_difference_cost (data
, e1
, e2
, symbol_present
, var_present
,
4229 offset
, depends_on
);
4230 *symbol_present
= false;
4232 if (operand_equal_p (e1
, e2
, 0))
4234 *var_present
= false;
4238 *var_present
= true;
4240 if (integer_zerop (e2
))
4241 return force_var_cost (data
, e1
, depends_on
);
4243 if (integer_zerop (e1
))
4245 comp_cost cost
= force_var_cost (data
, e2
, depends_on
);
4246 cost
.cost
+= mult_by_coeff_cost (-1, mode
, data
->speed
);
4250 type
= signed_type_for (TREE_TYPE (e1
));
4251 tree_to_aff_combination (e1
, type
, &aff_e1
);
4252 tree_to_aff_combination (e2
, type
, &aff_e2
);
4253 aff_combination_scale (&aff_e2
, -1);
4254 aff_combination_add (&aff_e1
, &aff_e2
);
4256 return force_var_cost (data
, aff_combination_to_tree (&aff_e1
), depends_on
);
4259 /* Returns true if AFF1 and AFF2 are identical. */
4262 compare_aff_trees (aff_tree
*aff1
, aff_tree
*aff2
)
4266 if (aff1
->n
!= aff2
->n
)
4269 for (i
= 0; i
< aff1
->n
; i
++)
4271 if (aff1
->elts
[i
].coef
!= aff2
->elts
[i
].coef
)
4274 if (!operand_equal_p (aff1
->elts
[i
].val
, aff2
->elts
[i
].val
, 0))
4280 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id. */
4283 get_expr_id (struct ivopts_data
*data
, tree expr
)
4285 struct iv_inv_expr_ent ent
;
4286 struct iv_inv_expr_ent
**slot
;
4289 ent
.hash
= iterative_hash_expr (expr
, 0);
4290 slot
= data
->inv_expr_tab
->find_slot (&ent
, INSERT
);
4294 *slot
= XNEW (struct iv_inv_expr_ent
);
4295 (*slot
)->expr
= expr
;
4296 (*slot
)->hash
= ent
.hash
;
4297 (*slot
)->id
= data
->inv_expr_id
++;
4301 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
4302 requires a new compiler generated temporary. Returns -1 otherwise.
4303 ADDRESS_P is a flag indicating if the expression is for address
4307 get_loop_invariant_expr_id (struct ivopts_data
*data
, tree ubase
,
4308 tree cbase
, HOST_WIDE_INT ratio
,
4311 aff_tree ubase_aff
, cbase_aff
;
4319 if ((TREE_CODE (ubase
) == INTEGER_CST
)
4320 && (TREE_CODE (cbase
) == INTEGER_CST
))
4323 /* Strips the constant part. */
4324 if (TREE_CODE (ubase
) == PLUS_EXPR
4325 || TREE_CODE (ubase
) == MINUS_EXPR
4326 || TREE_CODE (ubase
) == POINTER_PLUS_EXPR
)
4328 if (TREE_CODE (TREE_OPERAND (ubase
, 1)) == INTEGER_CST
)
4329 ubase
= TREE_OPERAND (ubase
, 0);
4332 /* Strips the constant part. */
4333 if (TREE_CODE (cbase
) == PLUS_EXPR
4334 || TREE_CODE (cbase
) == MINUS_EXPR
4335 || TREE_CODE (cbase
) == POINTER_PLUS_EXPR
)
4337 if (TREE_CODE (TREE_OPERAND (cbase
, 1)) == INTEGER_CST
)
4338 cbase
= TREE_OPERAND (cbase
, 0);
4343 if (((TREE_CODE (ubase
) == SSA_NAME
)
4344 || (TREE_CODE (ubase
) == ADDR_EXPR
4345 && is_gimple_min_invariant (ubase
)))
4346 && (TREE_CODE (cbase
) == INTEGER_CST
))
4349 if (((TREE_CODE (cbase
) == SSA_NAME
)
4350 || (TREE_CODE (cbase
) == ADDR_EXPR
4351 && is_gimple_min_invariant (cbase
)))
4352 && (TREE_CODE (ubase
) == INTEGER_CST
))
4358 if (operand_equal_p (ubase
, cbase
, 0))
4361 if (TREE_CODE (ubase
) == ADDR_EXPR
4362 && TREE_CODE (cbase
) == ADDR_EXPR
)
4366 usym
= TREE_OPERAND (ubase
, 0);
4367 csym
= TREE_OPERAND (cbase
, 0);
4368 if (TREE_CODE (usym
) == ARRAY_REF
)
4370 tree ind
= TREE_OPERAND (usym
, 1);
4371 if (TREE_CODE (ind
) == INTEGER_CST
4372 && tree_fits_shwi_p (ind
)
4373 && tree_to_shwi (ind
) == 0)
4374 usym
= TREE_OPERAND (usym
, 0);
4376 if (TREE_CODE (csym
) == ARRAY_REF
)
4378 tree ind
= TREE_OPERAND (csym
, 1);
4379 if (TREE_CODE (ind
) == INTEGER_CST
4380 && tree_fits_shwi_p (ind
)
4381 && tree_to_shwi (ind
) == 0)
4382 csym
= TREE_OPERAND (csym
, 0);
4384 if (operand_equal_p (usym
, csym
, 0))
4387 /* Now do more complex comparison */
4388 tree_to_aff_combination (ubase
, TREE_TYPE (ubase
), &ubase_aff
);
4389 tree_to_aff_combination (cbase
, TREE_TYPE (cbase
), &cbase_aff
);
4390 if (compare_aff_trees (&ubase_aff
, &cbase_aff
))
4394 tree_to_aff_combination (ub
, TREE_TYPE (ub
), &ubase_aff
);
4395 tree_to_aff_combination (cb
, TREE_TYPE (cb
), &cbase_aff
);
4397 aff_combination_scale (&cbase_aff
, -1 * ratio
);
4398 aff_combination_add (&ubase_aff
, &cbase_aff
);
4399 expr
= aff_combination_to_tree (&ubase_aff
);
4400 return get_expr_id (data
, expr
);
4405 /* Determines the cost of the computation by that USE is expressed
4406 from induction variable CAND. If ADDRESS_P is true, we just need
4407 to create an address from it, otherwise we want to get it into
4408 register. A set of invariants we depend on is stored in
4409 DEPENDS_ON. AT is the statement at that the value is computed.
4410 If CAN_AUTOINC is nonnull, use it to record whether autoinc
4411 addressing is likely. */
4414 get_computation_cost_at (struct ivopts_data
*data
,
4415 struct iv_use
*use
, struct iv_cand
*cand
,
4416 bool address_p
, bitmap
*depends_on
, gimple at
,
4420 tree ubase
= use
->iv
->base
, ustep
= use
->iv
->step
;
4422 tree utype
= TREE_TYPE (ubase
), ctype
;
4423 unsigned HOST_WIDE_INT cstepi
, offset
= 0;
4424 HOST_WIDE_INT ratio
, aratio
;
4425 bool var_present
, symbol_present
, stmt_is_after_inc
;
4428 bool speed
= optimize_bb_for_speed_p (gimple_bb (at
));
4429 machine_mode mem_mode
= (address_p
4430 ? TYPE_MODE (TREE_TYPE (*use
->op_p
))
4435 /* Only consider real candidates. */
4437 return infinite_cost
;
4439 cbase
= cand
->iv
->base
;
4440 cstep
= cand
->iv
->step
;
4441 ctype
= TREE_TYPE (cbase
);
4443 if (TYPE_PRECISION (utype
) > TYPE_PRECISION (ctype
))
4445 /* We do not have a precision to express the values of use. */
4446 return infinite_cost
;
4450 || (use
->iv
->base_object
4451 && cand
->iv
->base_object
4452 && POINTER_TYPE_P (TREE_TYPE (use
->iv
->base_object
))
4453 && POINTER_TYPE_P (TREE_TYPE (cand
->iv
->base_object
))))
4455 /* Do not try to express address of an object with computation based
4456 on address of a different object. This may cause problems in rtl
4457 level alias analysis (that does not expect this to be happening,
4458 as this is illegal in C), and would be unlikely to be useful
4460 if (use
->iv
->base_object
4461 && cand
->iv
->base_object
4462 && !operand_equal_p (use
->iv
->base_object
, cand
->iv
->base_object
, 0))
4463 return infinite_cost
;
4466 if (TYPE_PRECISION (utype
) < TYPE_PRECISION (ctype
))
4468 /* TODO -- add direct handling of this case. */
4472 /* CSTEPI is removed from the offset in case statement is after the
4473 increment. If the step is not constant, we use zero instead.
4474 This is a bit imprecise (there is the extra addition), but
4475 redundancy elimination is likely to transform the code so that
4476 it uses value of the variable before increment anyway,
4477 so it is not that much unrealistic. */
4478 if (cst_and_fits_in_hwi (cstep
))
4479 cstepi
= int_cst_value (cstep
);
4483 if (!constant_multiple_of (ustep
, cstep
, &rat
))
4484 return infinite_cost
;
4486 if (wi::fits_shwi_p (rat
))
4487 ratio
= rat
.to_shwi ();
4489 return infinite_cost
;
4492 ctype
= TREE_TYPE (cbase
);
4494 stmt_is_after_inc
= stmt_after_increment (data
->current_loop
, cand
, at
);
4496 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
4497 or ratio == 1, it is better to handle this like
4499 ubase - ratio * cbase + ratio * var
4501 (also holds in the case ratio == -1, TODO. */
4503 if (cst_and_fits_in_hwi (cbase
))
4505 offset
= - ratio
* (unsigned HOST_WIDE_INT
) int_cst_value (cbase
);
4506 cost
= difference_cost (data
,
4507 ubase
, build_int_cst (utype
, 0),
4508 &symbol_present
, &var_present
, &offset
,
4510 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4512 else if (ratio
== 1)
4514 tree real_cbase
= cbase
;
4516 /* Check to see if any adjustment is needed. */
4517 if (cstepi
== 0 && stmt_is_after_inc
)
4519 aff_tree real_cbase_aff
;
4522 tree_to_aff_combination (cbase
, TREE_TYPE (real_cbase
),
4524 tree_to_aff_combination (cstep
, TREE_TYPE (cstep
), &cstep_aff
);
4526 aff_combination_add (&real_cbase_aff
, &cstep_aff
);
4527 real_cbase
= aff_combination_to_tree (&real_cbase_aff
);
4530 cost
= difference_cost (data
,
4532 &symbol_present
, &var_present
, &offset
,
4534 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4537 && !POINTER_TYPE_P (ctype
)
4538 && multiplier_allowed_in_address_p
4540 TYPE_ADDR_SPACE (TREE_TYPE (utype
))))
4543 = fold_build2 (MULT_EXPR
, ctype
, cbase
, build_int_cst (ctype
, ratio
));
4544 cost
= difference_cost (data
,
4546 &symbol_present
, &var_present
, &offset
,
4548 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4552 cost
= force_var_cost (data
, cbase
, depends_on
);
4553 cost
= add_costs (cost
,
4554 difference_cost (data
,
4555 ubase
, build_int_cst (utype
, 0),
4556 &symbol_present
, &var_present
,
4557 &offset
, depends_on
));
4558 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4559 cost
.cost
+= add_cost (data
->speed
, TYPE_MODE (ctype
));
4562 /* Set of invariants depended on by sub use has already been computed
4563 for the first use in the group. */
4567 if (depends_on
&& *depends_on
)
4568 bitmap_clear (*depends_on
);
4570 else if (inv_expr_id
)
4573 get_loop_invariant_expr_id (data
, ubase
, cbase
, ratio
, address_p
);
4574 /* Clear depends on. */
4575 if (*inv_expr_id
!= -1 && depends_on
&& *depends_on
)
4576 bitmap_clear (*depends_on
);
4579 /* If we are after the increment, the value of the candidate is higher by
4581 if (stmt_is_after_inc
)
4582 offset
-= ratio
* cstepi
;
4584 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4585 (symbol/var1/const parts may be omitted). If we are looking for an
4586 address, find the cost of addressing this. */
4588 return add_costs (cost
,
4589 get_address_cost (symbol_present
, var_present
,
4590 offset
, ratio
, cstepi
,
4592 TYPE_ADDR_SPACE (TREE_TYPE (utype
)),
4593 speed
, stmt_is_after_inc
,
4596 /* Otherwise estimate the costs for computing the expression. */
4597 if (!symbol_present
&& !var_present
&& !offset
)
4600 cost
.cost
+= mult_by_coeff_cost (ratio
, TYPE_MODE (ctype
), speed
);
4604 /* Symbol + offset should be compile-time computable so consider that they
4605 are added once to the variable, if present. */
4606 if (var_present
&& (symbol_present
|| offset
))
4607 cost
.cost
+= adjust_setup_cost (data
,
4608 add_cost (speed
, TYPE_MODE (ctype
)));
4610 /* Having offset does not affect runtime cost in case it is added to
4611 symbol, but it increases complexity. */
4615 cost
.cost
+= add_cost (speed
, TYPE_MODE (ctype
));
4617 aratio
= ratio
> 0 ? ratio
: -ratio
;
4619 cost
.cost
+= mult_by_coeff_cost (aratio
, TYPE_MODE (ctype
), speed
);
4624 *can_autoinc
= false;
4627 /* Just get the expression, expand it and measure the cost. */
4628 tree comp
= get_computation_at (data
->current_loop
, use
, cand
, at
);
4631 return infinite_cost
;
4634 comp
= build_simple_mem_ref (comp
);
4636 return new_cost (computation_cost (comp
, speed
), 0);
4640 /* Determines the cost of the computation by that USE is expressed
4641 from induction variable CAND. If ADDRESS_P is true, we just need
4642 to create an address from it, otherwise we want to get it into
4643 register. A set of invariants we depend on is stored in
4644 DEPENDS_ON. If CAN_AUTOINC is nonnull, use it to record whether
4645 autoinc addressing is likely. */
4648 get_computation_cost (struct ivopts_data
*data
,
4649 struct iv_use
*use
, struct iv_cand
*cand
,
4650 bool address_p
, bitmap
*depends_on
,
4651 bool *can_autoinc
, int *inv_expr_id
)
4653 return get_computation_cost_at (data
,
4654 use
, cand
, address_p
, depends_on
, use
->stmt
,
4655 can_autoinc
, inv_expr_id
);
4658 /* Determines cost of basing replacement of USE on CAND in a generic
4662 determine_use_iv_cost_generic (struct ivopts_data
*data
,
4663 struct iv_use
*use
, struct iv_cand
*cand
)
4667 int inv_expr_id
= -1;
4669 /* The simple case first -- if we need to express value of the preserved
4670 original biv, the cost is 0. This also prevents us from counting the
4671 cost of increment twice -- once at this use and once in the cost of
4673 if (cand
->pos
== IP_ORIGINAL
4674 && cand
->incremented_at
== use
->stmt
)
4676 set_use_iv_cost (data
, use
, cand
, no_cost
, NULL
, NULL_TREE
,
4681 cost
= get_computation_cost (data
, use
, cand
, false, &depends_on
,
4682 NULL
, &inv_expr_id
);
4684 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, NULL_TREE
, ERROR_MARK
,
4687 return !infinite_cost_p (cost
);
4690 /* Determines cost of basing replacement of USE on CAND in an address. */
4693 determine_use_iv_cost_address (struct ivopts_data
*data
,
4694 struct iv_use
*use
, struct iv_cand
*cand
)
4698 int inv_expr_id
= -1;
4699 struct iv_use
*sub_use
;
4701 comp_cost cost
= get_computation_cost (data
, use
, cand
, true, &depends_on
,
4702 &can_autoinc
, &inv_expr_id
);
4704 if (cand
->ainc_use
== use
)
4707 cost
.cost
-= cand
->cost_step
;
4708 /* If we generated the candidate solely for exploiting autoincrement
4709 opportunities, and it turns out it can't be used, set the cost to
4710 infinity to make sure we ignore it. */
4711 else if (cand
->pos
== IP_AFTER_USE
|| cand
->pos
== IP_BEFORE_USE
)
4712 cost
= infinite_cost
;
4714 for (sub_use
= use
->next
;
4715 sub_use
&& !infinite_cost_p (cost
);
4716 sub_use
= sub_use
->next
)
4718 sub_cost
= get_computation_cost (data
, sub_use
, cand
, true, &depends_on
,
4719 &can_autoinc
, &inv_expr_id
);
4720 cost
= add_costs (cost
, sub_cost
);
4723 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, NULL_TREE
, ERROR_MARK
,
4726 return !infinite_cost_p (cost
);
4729 /* Computes value of candidate CAND at position AT in iteration NITER, and
4730 stores it to VAL. */
4733 cand_value_at (struct loop
*loop
, struct iv_cand
*cand
, gimple at
, tree niter
,
4736 aff_tree step
, delta
, nit
;
4737 struct iv
*iv
= cand
->iv
;
4738 tree type
= TREE_TYPE (iv
->base
);
4739 tree steptype
= type
;
4740 if (POINTER_TYPE_P (type
))
4741 steptype
= sizetype
;
4742 steptype
= unsigned_type_for (type
);
4744 tree_to_aff_combination (iv
->step
, TREE_TYPE (iv
->step
), &step
);
4745 aff_combination_convert (&step
, steptype
);
4746 tree_to_aff_combination (niter
, TREE_TYPE (niter
), &nit
);
4747 aff_combination_convert (&nit
, steptype
);
4748 aff_combination_mult (&nit
, &step
, &delta
);
4749 if (stmt_after_increment (loop
, cand
, at
))
4750 aff_combination_add (&delta
, &step
);
4752 tree_to_aff_combination (iv
->base
, type
, val
);
4753 if (!POINTER_TYPE_P (type
))
4754 aff_combination_convert (val
, steptype
);
4755 aff_combination_add (val
, &delta
);
4758 /* Returns period of induction variable iv. */
4761 iv_period (struct iv
*iv
)
4763 tree step
= iv
->step
, period
, type
;
4766 gcc_assert (step
&& TREE_CODE (step
) == INTEGER_CST
);
4768 type
= unsigned_type_for (TREE_TYPE (step
));
4769 /* Period of the iv is lcm (step, type_range)/step -1,
4770 i.e., N*type_range/step - 1. Since type range is power
4771 of two, N == (step >> num_of_ending_zeros_binary (step),
4772 so the final result is
4774 (type_range >> num_of_ending_zeros_binary (step)) - 1
4777 pow2div
= num_ending_zeros (step
);
4779 period
= build_low_bits_mask (type
,
4780 (TYPE_PRECISION (type
)
4781 - tree_to_uhwi (pow2div
)));
4786 /* Returns the comparison operator used when eliminating the iv USE. */
4788 static enum tree_code
4789 iv_elimination_compare (struct ivopts_data
*data
, struct iv_use
*use
)
4791 struct loop
*loop
= data
->current_loop
;
4795 ex_bb
= gimple_bb (use
->stmt
);
4796 exit
= EDGE_SUCC (ex_bb
, 0);
4797 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
4798 exit
= EDGE_SUCC (ex_bb
, 1);
4800 return (exit
->flags
& EDGE_TRUE_VALUE
? EQ_EXPR
: NE_EXPR
);
4803 /* Returns true if we can prove that BASE - OFFSET does not overflow. For now,
4804 we only detect the situation that BASE = SOMETHING + OFFSET, where the
4805 calculation is performed in non-wrapping type.
4807 TODO: More generally, we could test for the situation that
4808 BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4809 This would require knowing the sign of OFFSET. */
4812 difference_cannot_overflow_p (struct ivopts_data
*data
, tree base
, tree offset
)
4814 enum tree_code code
;
4816 aff_tree aff_e1
, aff_e2
, aff_offset
;
4818 if (!nowrap_type_p (TREE_TYPE (base
)))
4821 base
= expand_simple_operations (base
);
4823 if (TREE_CODE (base
) == SSA_NAME
)
4825 gimple stmt
= SSA_NAME_DEF_STMT (base
);
4827 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
4830 code
= gimple_assign_rhs_code (stmt
);
4831 if (get_gimple_rhs_class (code
) != GIMPLE_BINARY_RHS
)
4834 e1
= gimple_assign_rhs1 (stmt
);
4835 e2
= gimple_assign_rhs2 (stmt
);
4839 code
= TREE_CODE (base
);
4840 if (get_gimple_rhs_class (code
) != GIMPLE_BINARY_RHS
)
4842 e1
= TREE_OPERAND (base
, 0);
4843 e2
= TREE_OPERAND (base
, 1);
4846 /* Use affine expansion as deeper inspection to prove the equality. */
4847 tree_to_aff_combination_expand (e2
, TREE_TYPE (e2
),
4848 &aff_e2
, &data
->name_expansion_cache
);
4849 tree_to_aff_combination_expand (offset
, TREE_TYPE (offset
),
4850 &aff_offset
, &data
->name_expansion_cache
);
4851 aff_combination_scale (&aff_offset
, -1);
4855 aff_combination_add (&aff_e2
, &aff_offset
);
4856 if (aff_combination_zero_p (&aff_e2
))
4859 tree_to_aff_combination_expand (e1
, TREE_TYPE (e1
),
4860 &aff_e1
, &data
->name_expansion_cache
);
4861 aff_combination_add (&aff_e1
, &aff_offset
);
4862 return aff_combination_zero_p (&aff_e1
);
4864 case POINTER_PLUS_EXPR
:
4865 aff_combination_add (&aff_e2
, &aff_offset
);
4866 return aff_combination_zero_p (&aff_e2
);
4873 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4874 comparison with CAND. NITER describes the number of iterations of
4875 the loops. If successful, the comparison in COMP_P is altered accordingly.
4877 We aim to handle the following situation:
4893 Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4894 We aim to optimize this to
4902 while (p < p_0 - a + b);
4904 This preserves the correctness, since the pointer arithmetics does not
4905 overflow. More precisely:
4907 1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4908 overflow in computing it or the values of p.
4909 2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4910 overflow. To prove this, we use the fact that p_0 = base + a. */
4913 iv_elimination_compare_lt (struct ivopts_data
*data
,
4914 struct iv_cand
*cand
, enum tree_code
*comp_p
,
4915 struct tree_niter_desc
*niter
)
4917 tree cand_type
, a
, b
, mbz
, nit_type
= TREE_TYPE (niter
->niter
), offset
;
4918 struct aff_tree nit
, tmpa
, tmpb
;
4919 enum tree_code comp
;
4922 /* We need to know that the candidate induction variable does not overflow.
4923 While more complex analysis may be used to prove this, for now just
4924 check that the variable appears in the original program and that it
4925 is computed in a type that guarantees no overflows. */
4926 cand_type
= TREE_TYPE (cand
->iv
->base
);
4927 if (cand
->pos
!= IP_ORIGINAL
|| !nowrap_type_p (cand_type
))
4930 /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4931 the calculation of the BOUND could overflow, making the comparison
4933 if (!data
->loop_single_exit_p
)
4936 /* We need to be able to decide whether candidate is increasing or decreasing
4937 in order to choose the right comparison operator. */
4938 if (!cst_and_fits_in_hwi (cand
->iv
->step
))
4940 step
= int_cst_value (cand
->iv
->step
);
4942 /* Check that the number of iterations matches the expected pattern:
4943 a + 1 > b ? 0 : b - a - 1. */
4944 mbz
= niter
->may_be_zero
;
4945 if (TREE_CODE (mbz
) == GT_EXPR
)
4947 /* Handle a + 1 > b. */
4948 tree op0
= TREE_OPERAND (mbz
, 0);
4949 if (TREE_CODE (op0
) == PLUS_EXPR
&& integer_onep (TREE_OPERAND (op0
, 1)))
4951 a
= TREE_OPERAND (op0
, 0);
4952 b
= TREE_OPERAND (mbz
, 1);
4957 else if (TREE_CODE (mbz
) == LT_EXPR
)
4959 tree op1
= TREE_OPERAND (mbz
, 1);
4961 /* Handle b < a + 1. */
4962 if (TREE_CODE (op1
) == PLUS_EXPR
&& integer_onep (TREE_OPERAND (op1
, 1)))
4964 a
= TREE_OPERAND (op1
, 0);
4965 b
= TREE_OPERAND (mbz
, 0);
4973 /* Expected number of iterations is B - A - 1. Check that it matches
4974 the actual number, i.e., that B - A - NITER = 1. */
4975 tree_to_aff_combination (niter
->niter
, nit_type
, &nit
);
4976 tree_to_aff_combination (fold_convert (nit_type
, a
), nit_type
, &tmpa
);
4977 tree_to_aff_combination (fold_convert (nit_type
, b
), nit_type
, &tmpb
);
4978 aff_combination_scale (&nit
, -1);
4979 aff_combination_scale (&tmpa
, -1);
4980 aff_combination_add (&tmpb
, &tmpa
);
4981 aff_combination_add (&tmpb
, &nit
);
4982 if (tmpb
.n
!= 0 || tmpb
.offset
!= 1)
4985 /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
4987 offset
= fold_build2 (MULT_EXPR
, TREE_TYPE (cand
->iv
->step
),
4989 fold_convert (TREE_TYPE (cand
->iv
->step
), a
));
4990 if (!difference_cannot_overflow_p (data
, cand
->iv
->base
, offset
))
4993 /* Determine the new comparison operator. */
4994 comp
= step
< 0 ? GT_EXPR
: LT_EXPR
;
4995 if (*comp_p
== NE_EXPR
)
4997 else if (*comp_p
== EQ_EXPR
)
4998 *comp_p
= invert_tree_comparison (comp
, false);
5005 /* Check whether it is possible to express the condition in USE by comparison
5006 of candidate CAND. If so, store the value compared with to BOUND, and the
5007 comparison operator to COMP. */
5010 may_eliminate_iv (struct ivopts_data
*data
,
5011 struct iv_use
*use
, struct iv_cand
*cand
, tree
*bound
,
5012 enum tree_code
*comp
)
5017 struct loop
*loop
= data
->current_loop
;
5019 struct tree_niter_desc
*desc
= NULL
;
5021 if (TREE_CODE (cand
->iv
->step
) != INTEGER_CST
)
5024 /* For now works only for exits that dominate the loop latch.
5025 TODO: extend to other conditions inside loop body. */
5026 ex_bb
= gimple_bb (use
->stmt
);
5027 if (use
->stmt
!= last_stmt (ex_bb
)
5028 || gimple_code (use
->stmt
) != GIMPLE_COND
5029 || !dominated_by_p (CDI_DOMINATORS
, loop
->latch
, ex_bb
))
5032 exit
= EDGE_SUCC (ex_bb
, 0);
5033 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
5034 exit
= EDGE_SUCC (ex_bb
, 1);
5035 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
5038 desc
= niter_for_exit (data
, exit
);
5042 /* Determine whether we can use the variable to test the exit condition.
5043 This is the case iff the period of the induction variable is greater
5044 than the number of iterations for which the exit condition is true. */
5045 period
= iv_period (cand
->iv
);
5047 /* If the number of iterations is constant, compare against it directly. */
5048 if (TREE_CODE (desc
->niter
) == INTEGER_CST
)
5050 /* See cand_value_at. */
5051 if (stmt_after_increment (loop
, cand
, use
->stmt
))
5053 if (!tree_int_cst_lt (desc
->niter
, period
))
5058 if (tree_int_cst_lt (period
, desc
->niter
))
5063 /* If not, and if this is the only possible exit of the loop, see whether
5064 we can get a conservative estimate on the number of iterations of the
5065 entire loop and compare against that instead. */
5068 widest_int period_value
, max_niter
;
5070 max_niter
= desc
->max
;
5071 if (stmt_after_increment (loop
, cand
, use
->stmt
))
5073 period_value
= wi::to_widest (period
);
5074 if (wi::gtu_p (max_niter
, period_value
))
5076 /* See if we can take advantage of inferred loop bound information. */
5077 if (data
->loop_single_exit_p
)
5079 if (!max_loop_iterations (loop
, &max_niter
))
5081 /* The loop bound is already adjusted by adding 1. */
5082 if (wi::gtu_p (max_niter
, period_value
))
5090 cand_value_at (loop
, cand
, use
->stmt
, desc
->niter
, &bnd
);
5092 *bound
= fold_convert (TREE_TYPE (cand
->iv
->base
),
5093 aff_combination_to_tree (&bnd
));
5094 *comp
= iv_elimination_compare (data
, use
);
5096 /* It is unlikely that computing the number of iterations using division
5097 would be more profitable than keeping the original induction variable. */
5098 if (expression_expensive_p (*bound
))
5101 /* Sometimes, it is possible to handle the situation that the number of
5102 iterations may be zero unless additional assumtions by using <
5103 instead of != in the exit condition.
5105 TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
5106 base the exit condition on it. However, that is often too
5108 if (!integer_zerop (desc
->may_be_zero
))
5109 return iv_elimination_compare_lt (data
, cand
, comp
, desc
);
5114 /* Calculates the cost of BOUND, if it is a PARM_DECL. A PARM_DECL must
5115 be copied, if is is used in the loop body and DATA->body_includes_call. */
5118 parm_decl_cost (struct ivopts_data
*data
, tree bound
)
5120 tree sbound
= bound
;
5121 STRIP_NOPS (sbound
);
5123 if (TREE_CODE (sbound
) == SSA_NAME
5124 && SSA_NAME_IS_DEFAULT_DEF (sbound
)
5125 && TREE_CODE (SSA_NAME_VAR (sbound
)) == PARM_DECL
5126 && data
->body_includes_call
)
5127 return COSTS_N_INSNS (1);
5132 /* Determines cost of basing replacement of USE on CAND in a condition. */
5135 determine_use_iv_cost_condition (struct ivopts_data
*data
,
5136 struct iv_use
*use
, struct iv_cand
*cand
)
5138 tree bound
= NULL_TREE
;
5140 bitmap depends_on_elim
= NULL
, depends_on_express
= NULL
, depends_on
;
5141 comp_cost elim_cost
, express_cost
, cost
, bound_cost
;
5143 int elim_inv_expr_id
= -1, express_inv_expr_id
= -1, inv_expr_id
;
5144 tree
*control_var
, *bound_cst
;
5145 enum tree_code comp
= ERROR_MARK
;
5147 /* Only consider real candidates. */
5150 set_use_iv_cost (data
, use
, cand
, infinite_cost
, NULL
, NULL_TREE
,
5155 /* Try iv elimination. */
5156 if (may_eliminate_iv (data
, use
, cand
, &bound
, &comp
))
5158 elim_cost
= force_var_cost (data
, bound
, &depends_on_elim
);
5159 if (elim_cost
.cost
== 0)
5160 elim_cost
.cost
= parm_decl_cost (data
, bound
);
5161 else if (TREE_CODE (bound
) == INTEGER_CST
)
5163 /* If we replace a loop condition 'i < n' with 'p < base + n',
5164 depends_on_elim will have 'base' and 'n' set, which implies
5165 that both 'base' and 'n' will be live during the loop. More likely,
5166 'base + n' will be loop invariant, resulting in only one live value
5167 during the loop. So in that case we clear depends_on_elim and set
5168 elim_inv_expr_id instead. */
5169 if (depends_on_elim
&& bitmap_count_bits (depends_on_elim
) > 1)
5171 elim_inv_expr_id
= get_expr_id (data
, bound
);
5172 bitmap_clear (depends_on_elim
);
5174 /* The bound is a loop invariant, so it will be only computed
5176 elim_cost
.cost
= adjust_setup_cost (data
, elim_cost
.cost
);
5179 elim_cost
= infinite_cost
;
5181 /* Try expressing the original giv. If it is compared with an invariant,
5182 note that we cannot get rid of it. */
5183 ok
= extract_cond_operands (data
, use
->stmt
, &control_var
, &bound_cst
,
5187 /* When the condition is a comparison of the candidate IV against
5188 zero, prefer this IV.
5190 TODO: The constant that we're subtracting from the cost should
5191 be target-dependent. This information should be added to the
5192 target costs for each backend. */
5193 if (!infinite_cost_p (elim_cost
) /* Do not try to decrease infinite! */
5194 && integer_zerop (*bound_cst
)
5195 && (operand_equal_p (*control_var
, cand
->var_after
, 0)
5196 || operand_equal_p (*control_var
, cand
->var_before
, 0)))
5197 elim_cost
.cost
-= 1;
5199 express_cost
= get_computation_cost (data
, use
, cand
, false,
5200 &depends_on_express
, NULL
,
5201 &express_inv_expr_id
);
5202 fd_ivopts_data
= data
;
5203 walk_tree (&cmp_iv
->base
, find_depends
, &depends_on_express
, NULL
);
5205 /* Count the cost of the original bound as well. */
5206 bound_cost
= force_var_cost (data
, *bound_cst
, NULL
);
5207 if (bound_cost
.cost
== 0)
5208 bound_cost
.cost
= parm_decl_cost (data
, *bound_cst
);
5209 else if (TREE_CODE (*bound_cst
) == INTEGER_CST
)
5210 bound_cost
.cost
= 0;
5211 express_cost
.cost
+= bound_cost
.cost
;
5213 /* Choose the better approach, preferring the eliminated IV. */
5214 if (compare_costs (elim_cost
, express_cost
) <= 0)
5217 depends_on
= depends_on_elim
;
5218 depends_on_elim
= NULL
;
5219 inv_expr_id
= elim_inv_expr_id
;
5223 cost
= express_cost
;
5224 depends_on
= depends_on_express
;
5225 depends_on_express
= NULL
;
5228 inv_expr_id
= express_inv_expr_id
;
5231 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, bound
, comp
, inv_expr_id
);
5233 if (depends_on_elim
)
5234 BITMAP_FREE (depends_on_elim
);
5235 if (depends_on_express
)
5236 BITMAP_FREE (depends_on_express
);
5238 return !infinite_cost_p (cost
);
5241 /* Determines cost of basing replacement of USE on CAND. Returns false
5242 if USE cannot be based on CAND. */
5245 determine_use_iv_cost (struct ivopts_data
*data
,
5246 struct iv_use
*use
, struct iv_cand
*cand
)
5250 case USE_NONLINEAR_EXPR
:
5251 return determine_use_iv_cost_generic (data
, use
, cand
);
5254 return determine_use_iv_cost_address (data
, use
, cand
);
5257 return determine_use_iv_cost_condition (data
, use
, cand
);
5264 /* Return true if get_computation_cost indicates that autoincrement is
5265 a possibility for the pair of USE and CAND, false otherwise. */
5268 autoinc_possible_for_pair (struct ivopts_data
*data
, struct iv_use
*use
,
5269 struct iv_cand
*cand
)
5275 if (use
->type
!= USE_ADDRESS
)
5278 cost
= get_computation_cost (data
, use
, cand
, true, &depends_on
,
5279 &can_autoinc
, NULL
);
5281 BITMAP_FREE (depends_on
);
5283 return !infinite_cost_p (cost
) && can_autoinc
;
5286 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
5287 use that allows autoincrement, and set their AINC_USE if possible. */
5290 set_autoinc_for_original_candidates (struct ivopts_data
*data
)
5294 for (i
= 0; i
< n_iv_cands (data
); i
++)
5296 struct iv_cand
*cand
= iv_cand (data
, i
);
5297 struct iv_use
*closest_before
= NULL
;
5298 struct iv_use
*closest_after
= NULL
;
5299 if (cand
->pos
!= IP_ORIGINAL
)
5302 for (j
= 0; j
< n_iv_uses (data
); j
++)
5304 struct iv_use
*use
= iv_use (data
, j
);
5305 unsigned uid
= gimple_uid (use
->stmt
);
5307 if (gimple_bb (use
->stmt
) != gimple_bb (cand
->incremented_at
))
5310 if (uid
< gimple_uid (cand
->incremented_at
)
5311 && (closest_before
== NULL
5312 || uid
> gimple_uid (closest_before
->stmt
)))
5313 closest_before
= use
;
5315 if (uid
> gimple_uid (cand
->incremented_at
)
5316 && (closest_after
== NULL
5317 || uid
< gimple_uid (closest_after
->stmt
)))
5318 closest_after
= use
;
5321 if (closest_before
!= NULL
5322 && autoinc_possible_for_pair (data
, closest_before
, cand
))
5323 cand
->ainc_use
= closest_before
;
5324 else if (closest_after
!= NULL
5325 && autoinc_possible_for_pair (data
, closest_after
, cand
))
5326 cand
->ainc_use
= closest_after
;
5330 /* Finds the candidates for the induction variables. */
5333 find_iv_candidates (struct ivopts_data
*data
)
5335 /* Add commonly used ivs. */
5336 add_standard_iv_candidates (data
);
5338 /* Add old induction variables. */
5339 add_old_ivs_candidates (data
);
5341 /* Add induction variables derived from uses. */
5342 add_derived_ivs_candidates (data
);
5344 set_autoinc_for_original_candidates (data
);
5346 /* Record the important candidates. */
5347 record_important_candidates (data
);
5350 /* Determines costs of basing the use of the iv on an iv candidate. */
5353 determine_use_iv_costs (struct ivopts_data
*data
)
5357 struct iv_cand
*cand
;
5358 bitmap to_clear
= BITMAP_ALLOC (NULL
);
5360 alloc_use_cost_map (data
);
5362 for (i
= 0; i
< n_iv_uses (data
); i
++)
5364 use
= iv_use (data
, i
);
5366 if (data
->consider_all_candidates
)
5368 for (j
= 0; j
< n_iv_cands (data
); j
++)
5370 cand
= iv_cand (data
, j
);
5371 determine_use_iv_cost (data
, use
, cand
);
5378 EXECUTE_IF_SET_IN_BITMAP (use
->related_cands
, 0, j
, bi
)
5380 cand
= iv_cand (data
, j
);
5381 if (!determine_use_iv_cost (data
, use
, cand
))
5382 bitmap_set_bit (to_clear
, j
);
5385 /* Remove the candidates for that the cost is infinite from
5386 the list of related candidates. */
5387 bitmap_and_compl_into (use
->related_cands
, to_clear
);
5388 bitmap_clear (to_clear
);
5392 BITMAP_FREE (to_clear
);
5394 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5396 fprintf (dump_file
, "Use-candidate costs:\n");
5398 for (i
= 0; i
< n_iv_uses (data
); i
++)
5400 use
= iv_use (data
, i
);
5402 fprintf (dump_file
, "Use %d:\n", i
);
5403 fprintf (dump_file
, " cand\tcost\tcompl.\tdepends on\n");
5404 for (j
= 0; j
< use
->n_map_members
; j
++)
5406 if (!use
->cost_map
[j
].cand
5407 || infinite_cost_p (use
->cost_map
[j
].cost
))
5410 fprintf (dump_file
, " %d\t%d\t%d\t",
5411 use
->cost_map
[j
].cand
->id
,
5412 use
->cost_map
[j
].cost
.cost
,
5413 use
->cost_map
[j
].cost
.complexity
);
5414 if (use
->cost_map
[j
].depends_on
)
5415 bitmap_print (dump_file
,
5416 use
->cost_map
[j
].depends_on
, "","");
5417 if (use
->cost_map
[j
].inv_expr_id
!= -1)
5418 fprintf (dump_file
, " inv_expr:%d", use
->cost_map
[j
].inv_expr_id
);
5419 fprintf (dump_file
, "\n");
5422 fprintf (dump_file
, "\n");
5424 fprintf (dump_file
, "\n");
5428 /* Determines cost of the candidate CAND. */
5431 determine_iv_cost (struct ivopts_data
*data
, struct iv_cand
*cand
)
5433 comp_cost cost_base
;
5434 unsigned cost
, cost_step
;
5443 /* There are two costs associated with the candidate -- its increment
5444 and its initialization. The second is almost negligible for any loop
5445 that rolls enough, so we take it just very little into account. */
5447 base
= cand
->iv
->base
;
5448 cost_base
= force_var_cost (data
, base
, NULL
);
5449 /* It will be exceptional that the iv register happens to be initialized with
5450 the proper value at no cost. In general, there will at least be a regcopy
5452 if (cost_base
.cost
== 0)
5453 cost_base
.cost
= COSTS_N_INSNS (1);
5454 cost_step
= add_cost (data
->speed
, TYPE_MODE (TREE_TYPE (base
)));
5456 cost
= cost_step
+ adjust_setup_cost (data
, cost_base
.cost
);
5458 /* Prefer the original ivs unless we may gain something by replacing it.
5459 The reason is to make debugging simpler; so this is not relevant for
5460 artificial ivs created by other optimization passes. */
5461 if (cand
->pos
!= IP_ORIGINAL
5462 || !SSA_NAME_VAR (cand
->var_before
)
5463 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand
->var_before
)))
5466 /* Prefer not to insert statements into latch unless there are some
5467 already (so that we do not create unnecessary jumps). */
5468 if (cand
->pos
== IP_END
5469 && empty_block_p (ip_end_pos (data
->current_loop
)))
5473 cand
->cost_step
= cost_step
;
5476 /* Determines costs of computation of the candidates. */
5479 determine_iv_costs (struct ivopts_data
*data
)
5483 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5485 fprintf (dump_file
, "Candidate costs:\n");
5486 fprintf (dump_file
, " cand\tcost\n");
5489 for (i
= 0; i
< n_iv_cands (data
); i
++)
5491 struct iv_cand
*cand
= iv_cand (data
, i
);
5493 determine_iv_cost (data
, cand
);
5495 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5496 fprintf (dump_file
, " %d\t%d\n", i
, cand
->cost
);
5499 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5500 fprintf (dump_file
, "\n");
5503 /* Calculates cost for having SIZE induction variables. */
5506 ivopts_global_cost_for_size (struct ivopts_data
*data
, unsigned size
)
5508 /* We add size to the cost, so that we prefer eliminating ivs
5510 return size
+ estimate_reg_pressure_cost (size
, data
->regs_used
, data
->speed
,
5511 data
->body_includes_call
);
5514 /* For each size of the induction variable set determine the penalty. */
5517 determine_set_costs (struct ivopts_data
*data
)
5523 struct loop
*loop
= data
->current_loop
;
5526 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5528 fprintf (dump_file
, "Global costs:\n");
5529 fprintf (dump_file
, " target_avail_regs %d\n", target_avail_regs
);
5530 fprintf (dump_file
, " target_clobbered_regs %d\n", target_clobbered_regs
);
5531 fprintf (dump_file
, " target_reg_cost %d\n", target_reg_cost
[data
->speed
]);
5532 fprintf (dump_file
, " target_spill_cost %d\n", target_spill_cost
[data
->speed
]);
5536 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
5539 op
= PHI_RESULT (phi
);
5541 if (virtual_operand_p (op
))
5544 if (get_iv (data
, op
))
5550 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, j
, bi
)
5552 struct version_info
*info
= ver_info (data
, j
);
5554 if (info
->inv_id
&& info
->has_nonlin_use
)
5558 data
->regs_used
= n
;
5559 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5560 fprintf (dump_file
, " regs_used %d\n", n
);
5562 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5564 fprintf (dump_file
, " cost for size:\n");
5565 fprintf (dump_file
, " ivs\tcost\n");
5566 for (j
= 0; j
<= 2 * target_avail_regs
; j
++)
5567 fprintf (dump_file
, " %d\t%d\n", j
,
5568 ivopts_global_cost_for_size (data
, j
));
5569 fprintf (dump_file
, "\n");
5573 /* Returns true if A is a cheaper cost pair than B. */
5576 cheaper_cost_pair (struct cost_pair
*a
, struct cost_pair
*b
)
5586 cmp
= compare_costs (a
->cost
, b
->cost
);
5593 /* In case the costs are the same, prefer the cheaper candidate. */
5594 if (a
->cand
->cost
< b
->cand
->cost
)
5601 /* Returns candidate by that USE is expressed in IVS. */
5603 static struct cost_pair
*
5604 iv_ca_cand_for_use (struct iv_ca
*ivs
, struct iv_use
*use
)
5606 return ivs
->cand_for_use
[use
->id
];
5609 /* Computes the cost field of IVS structure. */
5612 iv_ca_recount_cost (struct ivopts_data
*data
, struct iv_ca
*ivs
)
5614 comp_cost cost
= ivs
->cand_use_cost
;
5616 cost
.cost
+= ivs
->cand_cost
;
5618 cost
.cost
+= ivopts_global_cost_for_size (data
,
5619 ivs
->n_regs
+ ivs
->num_used_inv_expr
);
5624 /* Remove invariants in set INVS to set IVS. */
5627 iv_ca_set_remove_invariants (struct iv_ca
*ivs
, bitmap invs
)
5635 EXECUTE_IF_SET_IN_BITMAP (invs
, 0, iid
, bi
)
5637 ivs
->n_invariant_uses
[iid
]--;
5638 if (ivs
->n_invariant_uses
[iid
] == 0)
5643 /* Set USE not to be expressed by any candidate in IVS. */
5646 iv_ca_set_no_cp (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5649 unsigned uid
= use
->id
, cid
;
5650 struct cost_pair
*cp
;
5652 cp
= ivs
->cand_for_use
[uid
];
5658 ivs
->cand_for_use
[uid
] = NULL
;
5659 ivs
->n_cand_uses
[cid
]--;
5661 if (ivs
->n_cand_uses
[cid
] == 0)
5663 bitmap_clear_bit (ivs
->cands
, cid
);
5664 /* Do not count the pseudocandidates. */
5668 ivs
->cand_cost
-= cp
->cand
->cost
;
5670 iv_ca_set_remove_invariants (ivs
, cp
->cand
->depends_on
);
5673 ivs
->cand_use_cost
= sub_costs (ivs
->cand_use_cost
, cp
->cost
);
5675 iv_ca_set_remove_invariants (ivs
, cp
->depends_on
);
5677 if (cp
->inv_expr_id
!= -1)
5679 ivs
->used_inv_expr
[cp
->inv_expr_id
]--;
5680 if (ivs
->used_inv_expr
[cp
->inv_expr_id
] == 0)
5681 ivs
->num_used_inv_expr
--;
5683 iv_ca_recount_cost (data
, ivs
);
5686 /* Add invariants in set INVS to set IVS. */
5689 iv_ca_set_add_invariants (struct iv_ca
*ivs
, bitmap invs
)
5697 EXECUTE_IF_SET_IN_BITMAP (invs
, 0, iid
, bi
)
5699 ivs
->n_invariant_uses
[iid
]++;
5700 if (ivs
->n_invariant_uses
[iid
] == 1)
5705 /* Set cost pair for USE in set IVS to CP. */
5708 iv_ca_set_cp (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5709 struct iv_use
*use
, struct cost_pair
*cp
)
5711 unsigned uid
= use
->id
, cid
;
5713 if (ivs
->cand_for_use
[uid
] == cp
)
5716 if (ivs
->cand_for_use
[uid
])
5717 iv_ca_set_no_cp (data
, ivs
, use
);
5724 ivs
->cand_for_use
[uid
] = cp
;
5725 ivs
->n_cand_uses
[cid
]++;
5726 if (ivs
->n_cand_uses
[cid
] == 1)
5728 bitmap_set_bit (ivs
->cands
, cid
);
5729 /* Do not count the pseudocandidates. */
5733 ivs
->cand_cost
+= cp
->cand
->cost
;
5735 iv_ca_set_add_invariants (ivs
, cp
->cand
->depends_on
);
5738 ivs
->cand_use_cost
= add_costs (ivs
->cand_use_cost
, cp
->cost
);
5739 iv_ca_set_add_invariants (ivs
, cp
->depends_on
);
5741 if (cp
->inv_expr_id
!= -1)
5743 ivs
->used_inv_expr
[cp
->inv_expr_id
]++;
5744 if (ivs
->used_inv_expr
[cp
->inv_expr_id
] == 1)
5745 ivs
->num_used_inv_expr
++;
5747 iv_ca_recount_cost (data
, ivs
);
5751 /* Extend set IVS by expressing USE by some of the candidates in it
5752 if possible. Consider all important candidates if candidates in
5753 set IVS don't give any result. */
5756 iv_ca_add_use (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5759 struct cost_pair
*best_cp
= NULL
, *cp
;
5762 struct iv_cand
*cand
;
5764 gcc_assert (ivs
->upto
>= use
->id
);
5768 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, i
, bi
)
5770 cand
= iv_cand (data
, i
);
5771 cp
= get_use_iv_cost (data
, use
, cand
);
5772 if (cheaper_cost_pair (cp
, best_cp
))
5776 if (best_cp
== NULL
)
5778 EXECUTE_IF_SET_IN_BITMAP (data
->important_candidates
, 0, i
, bi
)
5780 cand
= iv_cand (data
, i
);
5781 cp
= get_use_iv_cost (data
, use
, cand
);
5782 if (cheaper_cost_pair (cp
, best_cp
))
5787 iv_ca_set_cp (data
, ivs
, use
, best_cp
);
5790 /* Get cost for assignment IVS. */
5793 iv_ca_cost (struct iv_ca
*ivs
)
5795 /* This was a conditional expression but it triggered a bug in
5798 return infinite_cost
;
5803 /* Returns true if all dependences of CP are among invariants in IVS. */
5806 iv_ca_has_deps (struct iv_ca
*ivs
, struct cost_pair
*cp
)
5811 if (!cp
->depends_on
)
5814 EXECUTE_IF_SET_IN_BITMAP (cp
->depends_on
, 0, i
, bi
)
5816 if (ivs
->n_invariant_uses
[i
] == 0)
5823 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5824 it before NEXT_CHANGE. */
5826 static struct iv_ca_delta
*
5827 iv_ca_delta_add (struct iv_use
*use
, struct cost_pair
*old_cp
,
5828 struct cost_pair
*new_cp
, struct iv_ca_delta
*next_change
)
5830 struct iv_ca_delta
*change
= XNEW (struct iv_ca_delta
);
5833 change
->old_cp
= old_cp
;
5834 change
->new_cp
= new_cp
;
5835 change
->next_change
= next_change
;
5840 /* Joins two lists of changes L1 and L2. Destructive -- old lists
5843 static struct iv_ca_delta
*
5844 iv_ca_delta_join (struct iv_ca_delta
*l1
, struct iv_ca_delta
*l2
)
5846 struct iv_ca_delta
*last
;
5854 for (last
= l1
; last
->next_change
; last
= last
->next_change
)
5856 last
->next_change
= l2
;
5861 /* Reverse the list of changes DELTA, forming the inverse to it. */
5863 static struct iv_ca_delta
*
5864 iv_ca_delta_reverse (struct iv_ca_delta
*delta
)
5866 struct iv_ca_delta
*act
, *next
, *prev
= NULL
;
5868 for (act
= delta
; act
; act
= next
)
5870 next
= act
->next_change
;
5871 act
->next_change
= prev
;
5874 std::swap (act
->old_cp
, act
->new_cp
);
5880 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
5881 reverted instead. */
5884 iv_ca_delta_commit (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5885 struct iv_ca_delta
*delta
, bool forward
)
5887 struct cost_pair
*from
, *to
;
5888 struct iv_ca_delta
*act
;
5891 delta
= iv_ca_delta_reverse (delta
);
5893 for (act
= delta
; act
; act
= act
->next_change
)
5897 gcc_assert (iv_ca_cand_for_use (ivs
, act
->use
) == from
);
5898 iv_ca_set_cp (data
, ivs
, act
->use
, to
);
5902 iv_ca_delta_reverse (delta
);
5905 /* Returns true if CAND is used in IVS. */
5908 iv_ca_cand_used_p (struct iv_ca
*ivs
, struct iv_cand
*cand
)
5910 return ivs
->n_cand_uses
[cand
->id
] > 0;
5913 /* Returns number of induction variable candidates in the set IVS. */
5916 iv_ca_n_cands (struct iv_ca
*ivs
)
5918 return ivs
->n_cands
;
5921 /* Free the list of changes DELTA. */
5924 iv_ca_delta_free (struct iv_ca_delta
**delta
)
5926 struct iv_ca_delta
*act
, *next
;
5928 for (act
= *delta
; act
; act
= next
)
5930 next
= act
->next_change
;
5937 /* Allocates new iv candidates assignment. */
5939 static struct iv_ca
*
5940 iv_ca_new (struct ivopts_data
*data
)
5942 struct iv_ca
*nw
= XNEW (struct iv_ca
);
5946 nw
->cand_for_use
= XCNEWVEC (struct cost_pair
*, n_iv_uses (data
));
5947 nw
->n_cand_uses
= XCNEWVEC (unsigned, n_iv_cands (data
));
5948 nw
->cands
= BITMAP_ALLOC (NULL
);
5951 nw
->cand_use_cost
= no_cost
;
5953 nw
->n_invariant_uses
= XCNEWVEC (unsigned, data
->max_inv_id
+ 1);
5955 nw
->used_inv_expr
= XCNEWVEC (unsigned, data
->inv_expr_id
+ 1);
5956 nw
->num_used_inv_expr
= 0;
5961 /* Free memory occupied by the set IVS. */
5964 iv_ca_free (struct iv_ca
**ivs
)
5966 free ((*ivs
)->cand_for_use
);
5967 free ((*ivs
)->n_cand_uses
);
5968 BITMAP_FREE ((*ivs
)->cands
);
5969 free ((*ivs
)->n_invariant_uses
);
5970 free ((*ivs
)->used_inv_expr
);
5975 /* Dumps IVS to FILE. */
5978 iv_ca_dump (struct ivopts_data
*data
, FILE *file
, struct iv_ca
*ivs
)
5980 const char *pref
= " invariants ";
5982 comp_cost cost
= iv_ca_cost (ivs
);
5984 fprintf (file
, " cost: %d (complexity %d)\n", cost
.cost
, cost
.complexity
);
5985 fprintf (file
, " cand_cost: %d\n cand_use_cost: %d (complexity %d)\n",
5986 ivs
->cand_cost
, ivs
->cand_use_cost
.cost
, ivs
->cand_use_cost
.complexity
);
5987 bitmap_print (file
, ivs
->cands
, " candidates: ","\n");
5989 for (i
= 0; i
< ivs
->upto
; i
++)
5991 struct iv_use
*use
= iv_use (data
, i
);
5992 struct cost_pair
*cp
= iv_ca_cand_for_use (ivs
, use
);
5994 fprintf (file
, " use:%d --> iv_cand:%d, cost=(%d,%d)\n",
5995 use
->id
, cp
->cand
->id
, cp
->cost
.cost
, cp
->cost
.complexity
);
5997 fprintf (file
, " use:%d --> ??\n", use
->id
);
6000 for (i
= 1; i
<= data
->max_inv_id
; i
++)
6001 if (ivs
->n_invariant_uses
[i
])
6003 fprintf (file
, "%s%d", pref
, i
);
6006 fprintf (file
, "\n\n");
6009 /* Try changing candidate in IVS to CAND for each use. Return cost of the
6010 new set, and store differences in DELTA. Number of induction variables
6011 in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
6012 the function will try to find a solution with mimimal iv candidates. */
6015 iv_ca_extend (struct ivopts_data
*data
, struct iv_ca
*ivs
,
6016 struct iv_cand
*cand
, struct iv_ca_delta
**delta
,
6017 unsigned *n_ivs
, bool min_ncand
)
6022 struct cost_pair
*old_cp
, *new_cp
;
6025 for (i
= 0; i
< ivs
->upto
; i
++)
6027 use
= iv_use (data
, i
);
6028 old_cp
= iv_ca_cand_for_use (ivs
, use
);
6031 && old_cp
->cand
== cand
)
6034 new_cp
= get_use_iv_cost (data
, use
, cand
);
6038 if (!min_ncand
&& !iv_ca_has_deps (ivs
, new_cp
))
6041 if (!min_ncand
&& !cheaper_cost_pair (new_cp
, old_cp
))
6044 *delta
= iv_ca_delta_add (use
, old_cp
, new_cp
, *delta
);
6047 iv_ca_delta_commit (data
, ivs
, *delta
, true);
6048 cost
= iv_ca_cost (ivs
);
6050 *n_ivs
= iv_ca_n_cands (ivs
);
6051 iv_ca_delta_commit (data
, ivs
, *delta
, false);
6056 /* Try narrowing set IVS by removing CAND. Return the cost of
6057 the new set and store the differences in DELTA. START is
6058 the candidate with which we start narrowing. */
6061 iv_ca_narrow (struct ivopts_data
*data
, struct iv_ca
*ivs
,
6062 struct iv_cand
*cand
, struct iv_cand
*start
,
6063 struct iv_ca_delta
**delta
)
6067 struct cost_pair
*old_cp
, *new_cp
, *cp
;
6069 struct iv_cand
*cnd
;
6070 comp_cost cost
, best_cost
, acost
;
6073 for (i
= 0; i
< n_iv_uses (data
); i
++)
6075 use
= iv_use (data
, i
);
6077 old_cp
= iv_ca_cand_for_use (ivs
, use
);
6078 if (old_cp
->cand
!= cand
)
6081 best_cost
= iv_ca_cost (ivs
);
6082 /* Start narrowing with START. */
6083 new_cp
= get_use_iv_cost (data
, use
, start
);
6085 if (data
->consider_all_candidates
)
6087 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, ci
, bi
)
6089 if (ci
== cand
->id
|| (start
&& ci
== start
->id
))
6092 cnd
= iv_cand (data
, ci
);
6094 cp
= get_use_iv_cost (data
, use
, cnd
);
6098 iv_ca_set_cp (data
, ivs
, use
, cp
);
6099 acost
= iv_ca_cost (ivs
);
6101 if (compare_costs (acost
, best_cost
) < 0)
6110 EXECUTE_IF_AND_IN_BITMAP (use
->related_cands
, ivs
->cands
, 0, ci
, bi
)
6112 if (ci
== cand
->id
|| (start
&& ci
== start
->id
))
6115 cnd
= iv_cand (data
, ci
);
6117 cp
= get_use_iv_cost (data
, use
, cnd
);
6121 iv_ca_set_cp (data
, ivs
, use
, cp
);
6122 acost
= iv_ca_cost (ivs
);
6124 if (compare_costs (acost
, best_cost
) < 0)
6131 /* Restore to old cp for use. */
6132 iv_ca_set_cp (data
, ivs
, use
, old_cp
);
6136 iv_ca_delta_free (delta
);
6137 return infinite_cost
;
6140 *delta
= iv_ca_delta_add (use
, old_cp
, new_cp
, *delta
);
6143 iv_ca_delta_commit (data
, ivs
, *delta
, true);
6144 cost
= iv_ca_cost (ivs
);
6145 iv_ca_delta_commit (data
, ivs
, *delta
, false);
6150 /* Try optimizing the set of candidates IVS by removing candidates different
6151 from to EXCEPT_CAND from it. Return cost of the new set, and store
6152 differences in DELTA. */
6155 iv_ca_prune (struct ivopts_data
*data
, struct iv_ca
*ivs
,
6156 struct iv_cand
*except_cand
, struct iv_ca_delta
**delta
)
6159 struct iv_ca_delta
*act_delta
, *best_delta
;
6161 comp_cost best_cost
, acost
;
6162 struct iv_cand
*cand
;
6165 best_cost
= iv_ca_cost (ivs
);
6167 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, i
, bi
)
6169 cand
= iv_cand (data
, i
);
6171 if (cand
== except_cand
)
6174 acost
= iv_ca_narrow (data
, ivs
, cand
, except_cand
, &act_delta
);
6176 if (compare_costs (acost
, best_cost
) < 0)
6179 iv_ca_delta_free (&best_delta
);
6180 best_delta
= act_delta
;
6183 iv_ca_delta_free (&act_delta
);
6192 /* Recurse to possibly remove other unnecessary ivs. */
6193 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
6194 best_cost
= iv_ca_prune (data
, ivs
, except_cand
, delta
);
6195 iv_ca_delta_commit (data
, ivs
, best_delta
, false);
6196 *delta
= iv_ca_delta_join (best_delta
, *delta
);
6200 /* Check if CAND_IDX is a candidate other than OLD_CAND and has
6201 cheaper local cost for USE than BEST_CP. Return pointer to
6202 the corresponding cost_pair, otherwise just return BEST_CP. */
6204 static struct cost_pair
*
6205 cheaper_cost_with_cand (struct ivopts_data
*data
, struct iv_use
*use
,
6206 unsigned int cand_idx
, struct iv_cand
*old_cand
,
6207 struct cost_pair
*best_cp
)
6209 struct iv_cand
*cand
;
6210 struct cost_pair
*cp
;
6212 gcc_assert (old_cand
!= NULL
&& best_cp
!= NULL
);
6213 if (cand_idx
== old_cand
->id
)
6216 cand
= iv_cand (data
, cand_idx
);
6217 cp
= get_use_iv_cost (data
, use
, cand
);
6218 if (cp
!= NULL
&& cheaper_cost_pair (cp
, best_cp
))
6224 /* Try breaking local optimal fixed-point for IVS by replacing candidates
6225 which are used by more than one iv uses. For each of those candidates,
6226 this function tries to represent iv uses under that candidate using
6227 other ones with lower local cost, then tries to prune the new set.
6228 If the new set has lower cost, It returns the new cost after recording
6229 candidate replacement in list DELTA. */
6232 iv_ca_replace (struct ivopts_data
*data
, struct iv_ca
*ivs
,
6233 struct iv_ca_delta
**delta
)
6235 bitmap_iterator bi
, bj
;
6236 unsigned int i
, j
, k
;
6238 struct iv_cand
*cand
;
6239 comp_cost orig_cost
, acost
;
6240 struct iv_ca_delta
*act_delta
, *tmp_delta
;
6241 struct cost_pair
*old_cp
, *best_cp
= NULL
;
6244 orig_cost
= iv_ca_cost (ivs
);
6246 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, i
, bi
)
6248 if (ivs
->n_cand_uses
[i
] == 1
6249 || ivs
->n_cand_uses
[i
] > ALWAYS_PRUNE_CAND_SET_BOUND
)
6252 cand
= iv_cand (data
, i
);
6255 /* Represent uses under current candidate using other ones with
6256 lower local cost. */
6257 for (j
= 0; j
< ivs
->upto
; j
++)
6259 use
= iv_use (data
, j
);
6260 old_cp
= iv_ca_cand_for_use (ivs
, use
);
6262 if (old_cp
->cand
!= cand
)
6266 if (data
->consider_all_candidates
)
6267 for (k
= 0; k
< n_iv_cands (data
); k
++)
6268 best_cp
= cheaper_cost_with_cand (data
, use
, k
,
6269 old_cp
->cand
, best_cp
);
6271 EXECUTE_IF_SET_IN_BITMAP (use
->related_cands
, 0, k
, bj
)
6272 best_cp
= cheaper_cost_with_cand (data
, use
, k
,
6273 old_cp
->cand
, best_cp
);
6275 if (best_cp
== old_cp
)
6278 act_delta
= iv_ca_delta_add (use
, old_cp
, best_cp
, act_delta
);
6280 /* No need for further prune. */
6284 /* Prune the new candidate set. */
6285 iv_ca_delta_commit (data
, ivs
, act_delta
, true);
6286 acost
= iv_ca_prune (data
, ivs
, NULL
, &tmp_delta
);
6287 iv_ca_delta_commit (data
, ivs
, act_delta
, false);
6288 act_delta
= iv_ca_delta_join (act_delta
, tmp_delta
);
6290 if (compare_costs (acost
, orig_cost
) < 0)
6296 iv_ca_delta_free (&act_delta
);
6302 /* Tries to extend the sets IVS in the best possible way in order
6303 to express the USE. If ORIGINALP is true, prefer candidates from
6304 the original set of IVs, otherwise favor important candidates not
6305 based on any memory object. */
6308 try_add_cand_for (struct ivopts_data
*data
, struct iv_ca
*ivs
,
6309 struct iv_use
*use
, bool originalp
)
6311 comp_cost best_cost
, act_cost
;
6314 struct iv_cand
*cand
;
6315 struct iv_ca_delta
*best_delta
= NULL
, *act_delta
;
6316 struct cost_pair
*cp
;
6318 iv_ca_add_use (data
, ivs
, use
);
6319 best_cost
= iv_ca_cost (ivs
);
6320 cp
= iv_ca_cand_for_use (ivs
, use
);
6323 best_delta
= iv_ca_delta_add (use
, NULL
, cp
, NULL
);
6324 iv_ca_set_no_cp (data
, ivs
, use
);
6327 /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
6328 first try important candidates not based on any memory object. Only if
6329 this fails, try the specific ones. Rationale -- in loops with many
6330 variables the best choice often is to use just one generic biv. If we
6331 added here many ivs specific to the uses, the optimization algorithm later
6332 would be likely to get stuck in a local minimum, thus causing us to create
6333 too many ivs. The approach from few ivs to more seems more likely to be
6334 successful -- starting from few ivs, replacing an expensive use by a
6335 specific iv should always be a win. */
6336 EXECUTE_IF_SET_IN_BITMAP (data
->important_candidates
, 0, i
, bi
)
6338 cand
= iv_cand (data
, i
);
6340 if (originalp
&& cand
->pos
!=IP_ORIGINAL
)
6343 if (!originalp
&& cand
->iv
->base_object
!= NULL_TREE
)
6346 if (iv_ca_cand_used_p (ivs
, cand
))
6349 cp
= get_use_iv_cost (data
, use
, cand
);
6353 iv_ca_set_cp (data
, ivs
, use
, cp
);
6354 act_cost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, NULL
,
6356 iv_ca_set_no_cp (data
, ivs
, use
);
6357 act_delta
= iv_ca_delta_add (use
, NULL
, cp
, act_delta
);
6359 if (compare_costs (act_cost
, best_cost
) < 0)
6361 best_cost
= act_cost
;
6363 iv_ca_delta_free (&best_delta
);
6364 best_delta
= act_delta
;
6367 iv_ca_delta_free (&act_delta
);
6370 if (infinite_cost_p (best_cost
))
6372 for (i
= 0; i
< use
->n_map_members
; i
++)
6374 cp
= use
->cost_map
+ i
;
6379 /* Already tried this. */
6380 if (cand
->important
)
6382 if (originalp
&& cand
->pos
== IP_ORIGINAL
)
6384 if (!originalp
&& cand
->iv
->base_object
== NULL_TREE
)
6388 if (iv_ca_cand_used_p (ivs
, cand
))
6392 iv_ca_set_cp (data
, ivs
, use
, cp
);
6393 act_cost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, NULL
, true);
6394 iv_ca_set_no_cp (data
, ivs
, use
);
6395 act_delta
= iv_ca_delta_add (use
, iv_ca_cand_for_use (ivs
, use
),
6398 if (compare_costs (act_cost
, best_cost
) < 0)
6400 best_cost
= act_cost
;
6403 iv_ca_delta_free (&best_delta
);
6404 best_delta
= act_delta
;
6407 iv_ca_delta_free (&act_delta
);
6411 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
6412 iv_ca_delta_free (&best_delta
);
6414 return !infinite_cost_p (best_cost
);
6417 /* Finds an initial assignment of candidates to uses. */
6419 static struct iv_ca
*
6420 get_initial_solution (struct ivopts_data
*data
, bool originalp
)
6422 struct iv_ca
*ivs
= iv_ca_new (data
);
6425 for (i
= 0; i
< n_iv_uses (data
); i
++)
6426 if (!try_add_cand_for (data
, ivs
, iv_use (data
, i
), originalp
))
6435 /* Tries to improve set of induction variables IVS. TRY_REPLACE_P
6436 points to a bool variable, this function tries to break local
6437 optimal fixed-point by replacing candidates in IVS if it's true. */
6440 try_improve_iv_set (struct ivopts_data
*data
,
6441 struct iv_ca
*ivs
, bool *try_replace_p
)
6444 comp_cost acost
, best_cost
= iv_ca_cost (ivs
);
6445 struct iv_ca_delta
*best_delta
= NULL
, *act_delta
, *tmp_delta
;
6446 struct iv_cand
*cand
;
6448 /* Try extending the set of induction variables by one. */
6449 for (i
= 0; i
< n_iv_cands (data
); i
++)
6451 cand
= iv_cand (data
, i
);
6453 if (iv_ca_cand_used_p (ivs
, cand
))
6456 acost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, &n_ivs
, false);
6460 /* If we successfully added the candidate and the set is small enough,
6461 try optimizing it by removing other candidates. */
6462 if (n_ivs
<= ALWAYS_PRUNE_CAND_SET_BOUND
)
6464 iv_ca_delta_commit (data
, ivs
, act_delta
, true);
6465 acost
= iv_ca_prune (data
, ivs
, cand
, &tmp_delta
);
6466 iv_ca_delta_commit (data
, ivs
, act_delta
, false);
6467 act_delta
= iv_ca_delta_join (act_delta
, tmp_delta
);
6470 if (compare_costs (acost
, best_cost
) < 0)
6473 iv_ca_delta_free (&best_delta
);
6474 best_delta
= act_delta
;
6477 iv_ca_delta_free (&act_delta
);
6482 /* Try removing the candidates from the set instead. */
6483 best_cost
= iv_ca_prune (data
, ivs
, NULL
, &best_delta
);
6485 if (!best_delta
&& *try_replace_p
)
6487 *try_replace_p
= false;
6488 /* So far candidate selecting algorithm tends to choose fewer IVs
6489 so that it can handle cases in which loops have many variables
6490 but the best choice is often to use only one general biv. One
6491 weakness is it can't handle opposite cases, in which different
6492 candidates should be chosen with respect to each use. To solve
6493 the problem, we replace candidates in a manner described by the
6494 comments of iv_ca_replace, thus give general algorithm a chance
6495 to break local optimal fixed-point in these cases. */
6496 best_cost
= iv_ca_replace (data
, ivs
, &best_delta
);
6503 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
6504 gcc_assert (compare_costs (best_cost
, iv_ca_cost (ivs
)) == 0);
6505 iv_ca_delta_free (&best_delta
);
6509 /* Attempts to find the optimal set of induction variables. We do simple
6510 greedy heuristic -- we try to replace at most one candidate in the selected
6511 solution and remove the unused ivs while this improves the cost. */
6513 static struct iv_ca
*
6514 find_optimal_iv_set_1 (struct ivopts_data
*data
, bool originalp
)
6517 bool try_replace_p
= true;
6519 /* Get the initial solution. */
6520 set
= get_initial_solution (data
, originalp
);
6523 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6524 fprintf (dump_file
, "Unable to substitute for ivs, failed.\n");
6528 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6530 fprintf (dump_file
, "Initial set of candidates:\n");
6531 iv_ca_dump (data
, dump_file
, set
);
6534 while (try_improve_iv_set (data
, set
, &try_replace_p
))
6536 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6538 fprintf (dump_file
, "Improved to:\n");
6539 iv_ca_dump (data
, dump_file
, set
);
6546 static struct iv_ca
*
6547 find_optimal_iv_set (struct ivopts_data
*data
)
6550 struct iv_ca
*set
, *origset
;
6552 comp_cost cost
, origcost
;
6554 /* Determine the cost based on a strategy that starts with original IVs,
6555 and try again using a strategy that prefers candidates not based
6557 origset
= find_optimal_iv_set_1 (data
, true);
6558 set
= find_optimal_iv_set_1 (data
, false);
6560 if (!origset
&& !set
)
6563 origcost
= origset
? iv_ca_cost (origset
) : infinite_cost
;
6564 cost
= set
? iv_ca_cost (set
) : infinite_cost
;
6566 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6568 fprintf (dump_file
, "Original cost %d (complexity %d)\n\n",
6569 origcost
.cost
, origcost
.complexity
);
6570 fprintf (dump_file
, "Final cost %d (complexity %d)\n\n",
6571 cost
.cost
, cost
.complexity
);
6574 /* Choose the one with the best cost. */
6575 if (compare_costs (origcost
, cost
) <= 0)
6582 iv_ca_free (&origset
);
6584 for (i
= 0; i
< n_iv_uses (data
); i
++)
6586 use
= iv_use (data
, i
);
6587 use
->selected
= iv_ca_cand_for_use (set
, use
)->cand
;
6593 /* Creates a new induction variable corresponding to CAND. */
6596 create_new_iv (struct ivopts_data
*data
, struct iv_cand
*cand
)
6598 gimple_stmt_iterator incr_pos
;
6608 incr_pos
= gsi_last_bb (ip_normal_pos (data
->current_loop
));
6612 incr_pos
= gsi_last_bb (ip_end_pos (data
->current_loop
));
6620 incr_pos
= gsi_for_stmt (cand
->incremented_at
);
6624 /* Mark that the iv is preserved. */
6625 name_info (data
, cand
->var_before
)->preserve_biv
= true;
6626 name_info (data
, cand
->var_after
)->preserve_biv
= true;
6628 /* Rewrite the increment so that it uses var_before directly. */
6629 find_interesting_uses_op (data
, cand
->var_after
)->selected
= cand
;
6633 gimple_add_tmp_var (cand
->var_before
);
6635 base
= unshare_expr (cand
->iv
->base
);
6637 create_iv (base
, unshare_expr (cand
->iv
->step
),
6638 cand
->var_before
, data
->current_loop
,
6639 &incr_pos
, after
, &cand
->var_before
, &cand
->var_after
);
6642 /* Creates new induction variables described in SET. */
6645 create_new_ivs (struct ivopts_data
*data
, struct iv_ca
*set
)
6648 struct iv_cand
*cand
;
6651 EXECUTE_IF_SET_IN_BITMAP (set
->cands
, 0, i
, bi
)
6653 cand
= iv_cand (data
, i
);
6654 create_new_iv (data
, cand
);
6657 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6659 fprintf (dump_file
, "Selected IV set for loop %d",
6660 data
->current_loop
->num
);
6661 if (data
->loop_loc
!= UNKNOWN_LOCATION
)
6662 fprintf (dump_file
, " at %s:%d", LOCATION_FILE (data
->loop_loc
),
6663 LOCATION_LINE (data
->loop_loc
));
6664 fprintf (dump_file
, ", %lu IVs:\n", bitmap_count_bits (set
->cands
));
6665 EXECUTE_IF_SET_IN_BITMAP (set
->cands
, 0, i
, bi
)
6667 cand
= iv_cand (data
, i
);
6668 dump_cand (dump_file
, cand
);
6670 fprintf (dump_file
, "\n");
6674 /* Rewrites USE (definition of iv used in a nonlinear expression)
6675 using candidate CAND. */
6678 rewrite_use_nonlinear_expr (struct ivopts_data
*data
,
6679 struct iv_use
*use
, struct iv_cand
*cand
)
6684 gimple_stmt_iterator bsi
;
6686 /* An important special case -- if we are asked to express value of
6687 the original iv by itself, just exit; there is no need to
6688 introduce a new computation (that might also need casting the
6689 variable to unsigned and back). */
6690 if (cand
->pos
== IP_ORIGINAL
6691 && cand
->incremented_at
== use
->stmt
)
6693 enum tree_code stmt_code
;
6695 gcc_assert (is_gimple_assign (use
->stmt
));
6696 gcc_assert (gimple_assign_lhs (use
->stmt
) == cand
->var_after
);
6698 /* Check whether we may leave the computation unchanged.
6699 This is the case only if it does not rely on other
6700 computations in the loop -- otherwise, the computation
6701 we rely upon may be removed in remove_unused_ivs,
6702 thus leading to ICE. */
6703 stmt_code
= gimple_assign_rhs_code (use
->stmt
);
6704 if (stmt_code
== PLUS_EXPR
6705 || stmt_code
== MINUS_EXPR
6706 || stmt_code
== POINTER_PLUS_EXPR
)
6708 if (gimple_assign_rhs1 (use
->stmt
) == cand
->var_before
)
6709 op
= gimple_assign_rhs2 (use
->stmt
);
6710 else if (gimple_assign_rhs2 (use
->stmt
) == cand
->var_before
)
6711 op
= gimple_assign_rhs1 (use
->stmt
);
6718 if (op
&& expr_invariant_in_loop_p (data
->current_loop
, op
))
6722 comp
= get_computation (data
->current_loop
, use
, cand
);
6723 gcc_assert (comp
!= NULL_TREE
);
6725 switch (gimple_code (use
->stmt
))
6728 tgt
= PHI_RESULT (use
->stmt
);
6730 /* If we should keep the biv, do not replace it. */
6731 if (name_info (data
, tgt
)->preserve_biv
)
6734 bsi
= gsi_after_labels (gimple_bb (use
->stmt
));
6738 tgt
= gimple_assign_lhs (use
->stmt
);
6739 bsi
= gsi_for_stmt (use
->stmt
);
6746 if (!valid_gimple_rhs_p (comp
)
6747 || (gimple_code (use
->stmt
) != GIMPLE_PHI
6748 /* We can't allow re-allocating the stmt as it might be pointed
6750 && (get_gimple_rhs_num_ops (TREE_CODE (comp
))
6751 >= gimple_num_ops (gsi_stmt (bsi
)))))
6753 comp
= force_gimple_operand_gsi (&bsi
, comp
, true, NULL_TREE
,
6754 true, GSI_SAME_STMT
);
6755 if (POINTER_TYPE_P (TREE_TYPE (tgt
)))
6757 duplicate_ssa_name_ptr_info (comp
, SSA_NAME_PTR_INFO (tgt
));
6758 /* As this isn't a plain copy we have to reset alignment
6760 if (SSA_NAME_PTR_INFO (comp
))
6761 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp
));
6765 if (gimple_code (use
->stmt
) == GIMPLE_PHI
)
6767 ass
= gimple_build_assign (tgt
, comp
);
6768 gsi_insert_before (&bsi
, ass
, GSI_SAME_STMT
);
6770 bsi
= gsi_for_stmt (use
->stmt
);
6771 remove_phi_node (&bsi
, false);
6775 gimple_assign_set_rhs_from_tree (&bsi
, comp
);
6776 use
->stmt
= gsi_stmt (bsi
);
6780 /* Performs a peephole optimization to reorder the iv update statement with
6781 a mem ref to enable instruction combining in later phases. The mem ref uses
6782 the iv value before the update, so the reordering transformation requires
6783 adjustment of the offset. CAND is the selected IV_CAND.
6787 t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset
6795 directly propagating t over to (1) will introduce overlapping live range
6796 thus increase register pressure. This peephole transform it into:
6800 t = MEM_REF (base, iv2, 8, 8);
6807 adjust_iv_update_pos (struct iv_cand
*cand
, struct iv_use
*use
)
6810 gimple iv_update
, stmt
;
6812 gimple_stmt_iterator gsi
, gsi_iv
;
6814 if (cand
->pos
!= IP_NORMAL
)
6817 var_after
= cand
->var_after
;
6818 iv_update
= SSA_NAME_DEF_STMT (var_after
);
6820 bb
= gimple_bb (iv_update
);
6821 gsi
= gsi_last_nondebug_bb (bb
);
6822 stmt
= gsi_stmt (gsi
);
6824 /* Only handle conditional statement for now. */
6825 if (gimple_code (stmt
) != GIMPLE_COND
)
6828 gsi_prev_nondebug (&gsi
);
6829 stmt
= gsi_stmt (gsi
);
6830 if (stmt
!= iv_update
)
6833 gsi_prev_nondebug (&gsi
);
6834 if (gsi_end_p (gsi
))
6837 stmt
= gsi_stmt (gsi
);
6838 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
6841 if (stmt
!= use
->stmt
)
6844 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
6847 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6849 fprintf (dump_file
, "Reordering \n");
6850 print_gimple_stmt (dump_file
, iv_update
, 0, 0);
6851 print_gimple_stmt (dump_file
, use
->stmt
, 0, 0);
6852 fprintf (dump_file
, "\n");
6855 gsi
= gsi_for_stmt (use
->stmt
);
6856 gsi_iv
= gsi_for_stmt (iv_update
);
6857 gsi_move_before (&gsi_iv
, &gsi
);
6859 cand
->pos
= IP_BEFORE_USE
;
6860 cand
->incremented_at
= use
->stmt
;
6863 /* Rewrites USE (address that is an iv) using candidate CAND. */
6866 rewrite_use_address_1 (struct ivopts_data
*data
,
6867 struct iv_use
*use
, struct iv_cand
*cand
)
6870 gimple_stmt_iterator bsi
= gsi_for_stmt (use
->stmt
);
6871 tree base_hint
= NULL_TREE
;
6875 adjust_iv_update_pos (cand
, use
);
6876 ok
= get_computation_aff (data
->current_loop
, use
, cand
, use
->stmt
, &aff
);
6878 unshare_aff_combination (&aff
);
6880 /* To avoid undefined overflow problems, all IV candidates use unsigned
6881 integer types. The drawback is that this makes it impossible for
6882 create_mem_ref to distinguish an IV that is based on a memory object
6883 from one that represents simply an offset.
6885 To work around this problem, we pass a hint to create_mem_ref that
6886 indicates which variable (if any) in aff is an IV based on a memory
6887 object. Note that we only consider the candidate. If this is not
6888 based on an object, the base of the reference is in some subexpression
6889 of the use -- but these will use pointer types, so they are recognized
6890 by the create_mem_ref heuristics anyway. */
6891 if (cand
->iv
->base_object
)
6892 base_hint
= var_at_stmt (data
->current_loop
, cand
, use
->stmt
);
6894 iv
= var_at_stmt (data
->current_loop
, cand
, use
->stmt
);
6895 ref
= create_mem_ref (&bsi
, TREE_TYPE (*use
->op_p
), &aff
,
6896 reference_alias_ptr_type (*use
->op_p
),
6897 iv
, base_hint
, data
->speed
);
6898 copy_ref_info (ref
, *use
->op_p
);
6902 /* Rewrites USE (address that is an iv) using candidate CAND. If it's the
6903 first use of a group, rewrites sub uses in the group too. */
6906 rewrite_use_address (struct ivopts_data
*data
,
6907 struct iv_use
*use
, struct iv_cand
*cand
)
6909 struct iv_use
*next
;
6911 gcc_assert (use
->sub_id
== 0);
6912 rewrite_use_address_1 (data
, use
, cand
);
6913 update_stmt (use
->stmt
);
6915 for (next
= use
->next
; next
!= NULL
; next
= next
->next
)
6917 rewrite_use_address_1 (data
, next
, cand
);
6918 update_stmt (next
->stmt
);
6924 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6928 rewrite_use_compare (struct ivopts_data
*data
,
6929 struct iv_use
*use
, struct iv_cand
*cand
)
6931 tree comp
, *var_p
, op
, bound
;
6932 gimple_stmt_iterator bsi
= gsi_for_stmt (use
->stmt
);
6933 enum tree_code compare
;
6934 struct cost_pair
*cp
= get_use_iv_cost (data
, use
, cand
);
6940 tree var
= var_at_stmt (data
->current_loop
, cand
, use
->stmt
);
6941 tree var_type
= TREE_TYPE (var
);
6944 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6946 fprintf (dump_file
, "Replacing exit test: ");
6947 print_gimple_stmt (dump_file
, use
->stmt
, 0, TDF_SLIM
);
6950 bound
= unshare_expr (fold_convert (var_type
, bound
));
6951 op
= force_gimple_operand (bound
, &stmts
, true, NULL_TREE
);
6953 gsi_insert_seq_on_edge_immediate (
6954 loop_preheader_edge (data
->current_loop
),
6957 gcond
*cond_stmt
= as_a
<gcond
*> (use
->stmt
);
6958 gimple_cond_set_lhs (cond_stmt
, var
);
6959 gimple_cond_set_code (cond_stmt
, compare
);
6960 gimple_cond_set_rhs (cond_stmt
, op
);
6964 /* The induction variable elimination failed; just express the original
6966 comp
= get_computation (data
->current_loop
, use
, cand
);
6967 gcc_assert (comp
!= NULL_TREE
);
6969 ok
= extract_cond_operands (data
, use
->stmt
, &var_p
, NULL
, NULL
, NULL
);
6972 *var_p
= force_gimple_operand_gsi (&bsi
, comp
, true, SSA_NAME_VAR (*var_p
),
6973 true, GSI_SAME_STMT
);
6976 /* Rewrites USE using candidate CAND. */
6979 rewrite_use (struct ivopts_data
*data
, struct iv_use
*use
, struct iv_cand
*cand
)
6983 case USE_NONLINEAR_EXPR
:
6984 rewrite_use_nonlinear_expr (data
, use
, cand
);
6988 rewrite_use_address (data
, use
, cand
);
6992 rewrite_use_compare (data
, use
, cand
);
6999 update_stmt (use
->stmt
);
7002 /* Rewrite the uses using the selected induction variables. */
7005 rewrite_uses (struct ivopts_data
*data
)
7008 struct iv_cand
*cand
;
7011 for (i
= 0; i
< n_iv_uses (data
); i
++)
7013 use
= iv_use (data
, i
);
7014 cand
= use
->selected
;
7017 rewrite_use (data
, use
, cand
);
7021 /* Removes the ivs that are not used after rewriting. */
7024 remove_unused_ivs (struct ivopts_data
*data
)
7028 bitmap toremove
= BITMAP_ALLOC (NULL
);
7030 /* Figure out an order in which to release SSA DEFs so that we don't
7031 release something that we'd have to propagate into a debug stmt
7033 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, j
, bi
)
7035 struct version_info
*info
;
7037 info
= ver_info (data
, j
);
7039 && !integer_zerop (info
->iv
->step
)
7041 && !info
->iv
->have_use_for
7042 && !info
->preserve_biv
)
7044 bitmap_set_bit (toremove
, SSA_NAME_VERSION (info
->iv
->ssa_name
));
7046 tree def
= info
->iv
->ssa_name
;
7048 if (MAY_HAVE_DEBUG_STMTS
&& SSA_NAME_DEF_STMT (def
))
7050 imm_use_iterator imm_iter
;
7051 use_operand_p use_p
;
7055 FOR_EACH_IMM_USE_STMT (stmt
, imm_iter
, def
)
7057 if (!gimple_debug_bind_p (stmt
))
7060 /* We just want to determine whether to do nothing
7061 (count == 0), to substitute the computed
7062 expression into a single use of the SSA DEF by
7063 itself (count == 1), or to use a debug temp
7064 because the SSA DEF is used multiple times or as
7065 part of a larger expression (count > 1). */
7067 if (gimple_debug_bind_get_value (stmt
) != def
)
7071 BREAK_FROM_IMM_USE_STMT (imm_iter
);
7077 struct iv_use dummy_use
;
7078 struct iv_cand
*best_cand
= NULL
, *cand
;
7079 unsigned i
, best_pref
= 0, cand_pref
;
7081 memset (&dummy_use
, 0, sizeof (dummy_use
));
7082 dummy_use
.iv
= info
->iv
;
7083 for (i
= 0; i
< n_iv_uses (data
) && i
< 64; i
++)
7085 cand
= iv_use (data
, i
)->selected
;
7086 if (cand
== best_cand
)
7088 cand_pref
= operand_equal_p (cand
->iv
->step
,
7092 += TYPE_MODE (TREE_TYPE (cand
->iv
->base
))
7093 == TYPE_MODE (TREE_TYPE (info
->iv
->base
))
7096 += TREE_CODE (cand
->iv
->base
) == INTEGER_CST
7098 if (best_cand
== NULL
|| best_pref
< cand_pref
)
7101 best_pref
= cand_pref
;
7108 tree comp
= get_computation_at (data
->current_loop
,
7109 &dummy_use
, best_cand
,
7110 SSA_NAME_DEF_STMT (def
));
7116 tree vexpr
= make_node (DEBUG_EXPR_DECL
);
7117 DECL_ARTIFICIAL (vexpr
) = 1;
7118 TREE_TYPE (vexpr
) = TREE_TYPE (comp
);
7119 if (SSA_NAME_VAR (def
))
7120 DECL_MODE (vexpr
) = DECL_MODE (SSA_NAME_VAR (def
));
7122 DECL_MODE (vexpr
) = TYPE_MODE (TREE_TYPE (vexpr
));
7124 = gimple_build_debug_bind (vexpr
, comp
, NULL
);
7125 gimple_stmt_iterator gsi
;
7127 if (gimple_code (SSA_NAME_DEF_STMT (def
)) == GIMPLE_PHI
)
7128 gsi
= gsi_after_labels (gimple_bb
7129 (SSA_NAME_DEF_STMT (def
)));
7131 gsi
= gsi_for_stmt (SSA_NAME_DEF_STMT (def
));
7133 gsi_insert_before (&gsi
, def_temp
, GSI_SAME_STMT
);
7137 FOR_EACH_IMM_USE_STMT (stmt
, imm_iter
, def
)
7139 if (!gimple_debug_bind_p (stmt
))
7142 FOR_EACH_IMM_USE_ON_STMT (use_p
, imm_iter
)
7143 SET_USE (use_p
, comp
);
7151 release_defs_bitset (toremove
);
7153 BITMAP_FREE (toremove
);
7156 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
7157 for hash_map::traverse. */
7160 free_tree_niter_desc (edge
const &, tree_niter_desc
*const &value
, void *)
7166 /* Frees data allocated by the optimization of a single loop. */
7169 free_loop_data (struct ivopts_data
*data
)
7177 data
->niters
->traverse
<void *, free_tree_niter_desc
> (NULL
);
7178 delete data
->niters
;
7179 data
->niters
= NULL
;
7182 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
7184 struct version_info
*info
;
7186 info
= ver_info (data
, i
);
7189 info
->has_nonlin_use
= false;
7190 info
->preserve_biv
= false;
7193 bitmap_clear (data
->relevant
);
7194 bitmap_clear (data
->important_candidates
);
7196 for (i
= 0; i
< n_iv_uses (data
); i
++)
7198 struct iv_use
*use
= iv_use (data
, i
);
7199 struct iv_use
*pre
= use
, *sub
= use
->next
;
7203 gcc_assert (sub
->related_cands
== NULL
);
7204 gcc_assert (sub
->n_map_members
== 0 && sub
->cost_map
== NULL
);
7213 BITMAP_FREE (use
->related_cands
);
7214 for (j
= 0; j
< use
->n_map_members
; j
++)
7215 if (use
->cost_map
[j
].depends_on
)
7216 BITMAP_FREE (use
->cost_map
[j
].depends_on
);
7217 free (use
->cost_map
);
7220 data
->iv_uses
.truncate (0);
7222 for (i
= 0; i
< n_iv_cands (data
); i
++)
7224 struct iv_cand
*cand
= iv_cand (data
, i
);
7227 if (cand
->depends_on
)
7228 BITMAP_FREE (cand
->depends_on
);
7231 data
->iv_candidates
.truncate (0);
7233 if (data
->version_info_size
< num_ssa_names
)
7235 data
->version_info_size
= 2 * num_ssa_names
;
7236 free (data
->version_info
);
7237 data
->version_info
= XCNEWVEC (struct version_info
, data
->version_info_size
);
7240 data
->max_inv_id
= 0;
7242 FOR_EACH_VEC_ELT (decl_rtl_to_reset
, i
, obj
)
7243 SET_DECL_RTL (obj
, NULL_RTX
);
7245 decl_rtl_to_reset
.truncate (0);
7247 data
->inv_expr_tab
->empty ();
7248 data
->inv_expr_id
= 0;
7251 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
7255 tree_ssa_iv_optimize_finalize (struct ivopts_data
*data
)
7257 free_loop_data (data
);
7258 free (data
->version_info
);
7259 BITMAP_FREE (data
->relevant
);
7260 BITMAP_FREE (data
->important_candidates
);
7262 decl_rtl_to_reset
.release ();
7263 data
->iv_uses
.release ();
7264 data
->iv_candidates
.release ();
7265 delete data
->inv_expr_tab
;
7266 data
->inv_expr_tab
= NULL
;
7267 free_affine_expand_cache (&data
->name_expansion_cache
);
7270 /* Returns true if the loop body BODY includes any function calls. */
7273 loop_body_includes_call (basic_block
*body
, unsigned num_nodes
)
7275 gimple_stmt_iterator gsi
;
7278 for (i
= 0; i
< num_nodes
; i
++)
7279 for (gsi
= gsi_start_bb (body
[i
]); !gsi_end_p (gsi
); gsi_next (&gsi
))
7281 gimple stmt
= gsi_stmt (gsi
);
7282 if (is_gimple_call (stmt
)
7283 && !is_inexpensive_builtin (gimple_call_fndecl (stmt
)))
7289 /* Optimizes the LOOP. Returns true if anything changed. */
7292 tree_ssa_iv_optimize_loop (struct ivopts_data
*data
, struct loop
*loop
)
7294 bool changed
= false;
7295 struct iv_ca
*iv_ca
;
7296 edge exit
= single_dom_exit (loop
);
7299 gcc_assert (!data
->niters
);
7300 data
->current_loop
= loop
;
7301 data
->loop_loc
= find_loop_location (loop
);
7302 data
->speed
= optimize_loop_for_speed_p (loop
);
7304 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
7306 fprintf (dump_file
, "Processing loop %d", loop
->num
);
7307 if (data
->loop_loc
!= UNKNOWN_LOCATION
)
7308 fprintf (dump_file
, " at %s:%d", LOCATION_FILE (data
->loop_loc
),
7309 LOCATION_LINE (data
->loop_loc
));
7310 fprintf (dump_file
, "\n");
7314 fprintf (dump_file
, " single exit %d -> %d, exit condition ",
7315 exit
->src
->index
, exit
->dest
->index
);
7316 print_gimple_stmt (dump_file
, last_stmt (exit
->src
), 0, TDF_SLIM
);
7317 fprintf (dump_file
, "\n");
7320 fprintf (dump_file
, "\n");
7323 body
= get_loop_body (loop
);
7324 data
->body_includes_call
= loop_body_includes_call (body
, loop
->num_nodes
);
7325 renumber_gimple_stmt_uids_in_blocks (body
, loop
->num_nodes
);
7328 data
->loop_single_exit_p
= exit
!= NULL
&& loop_only_exit_p (loop
, exit
);
7330 /* For each ssa name determines whether it behaves as an induction variable
7332 if (!find_induction_variables (data
))
7335 /* Finds interesting uses (item 1). */
7336 find_interesting_uses (data
);
7337 group_address_uses (data
);
7338 if (n_iv_uses (data
) > MAX_CONSIDERED_USES
)
7341 /* Finds candidates for the induction variables (item 2). */
7342 find_iv_candidates (data
);
7344 /* Calculates the costs (item 3, part 1). */
7345 determine_iv_costs (data
);
7346 determine_use_iv_costs (data
);
7347 determine_set_costs (data
);
7349 /* Find the optimal set of induction variables (item 3, part 2). */
7350 iv_ca
= find_optimal_iv_set (data
);
7355 /* Create the new induction variables (item 4, part 1). */
7356 create_new_ivs (data
, iv_ca
);
7357 iv_ca_free (&iv_ca
);
7359 /* Rewrite the uses (item 4, part 2). */
7360 rewrite_uses (data
);
7362 /* Remove the ivs that are unused after rewriting. */
7363 remove_unused_ivs (data
);
7365 /* We have changed the structure of induction variables; it might happen
7366 that definitions in the scev database refer to some of them that were
7371 free_loop_data (data
);
7376 /* Main entry point. Optimizes induction variables in loops. */
7379 tree_ssa_iv_optimize (void)
7382 struct ivopts_data data
;
7384 tree_ssa_iv_optimize_init (&data
);
7386 /* Optimize the loops starting with the innermost ones. */
7387 FOR_EACH_LOOP (loop
, LI_FROM_INNERMOST
)
7389 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
7390 flow_loop_dump (loop
, dump_file
, NULL
, 1);
7392 tree_ssa_iv_optimize_loop (&data
, loop
);
7395 tree_ssa_iv_optimize_finalize (&data
);