1 /* Induction variable optimizations.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
3 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 3, or (at your option) any
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* This pass tries to find the optimal set of induction variables for the loop.
22 It optimizes just the basic linear induction variables (although adding
23 support for other types should not be too hard). It includes the
24 optimizations commonly known as strength reduction, induction variable
25 coalescing and induction variable elimination. It does it in the
28 1) The interesting uses of induction variables are found. This includes
30 -- uses of induction variables in non-linear expressions
31 -- addresses of arrays
32 -- comparisons of induction variables
34 2) Candidates for the induction variables are found. This includes
36 -- old induction variables
37 -- the variables defined by expressions derived from the "interesting
40 3) The optimal (w.r. to a cost function) set of variables is chosen. The
41 cost function assigns a cost to sets of induction variables and consists
44 -- The use costs. Each of the interesting uses chooses the best induction
45 variable in the set and adds its cost to the sum. The cost reflects
46 the time spent on modifying the induction variables value to be usable
47 for the given purpose (adding base and offset for arrays, etc.).
48 -- The variable costs. Each of the variables has a cost assigned that
49 reflects the costs associated with incrementing the value of the
50 variable. The original variables are somewhat preferred.
51 -- The set cost. Depending on the size of the set, extra cost may be
52 added to reflect register pressure.
54 All the costs are defined in a machine-specific way, using the target
55 hooks and machine descriptions to determine them.
57 4) The trees are transformed to use the new variables, the dead code is
60 All of this is done loop by loop. Doing it globally is theoretically
61 possible, it might give a better performance and it might enable us
62 to decide costs more precisely, but getting all the interactions right
63 would be complicated. */
67 #include "coretypes.h"
71 #include "basic-block.h"
72 #include "gimple-pretty-print.h"
73 #include "tree-flow.h"
75 #include "tree-pass.h"
77 #include "insn-config.h"
78 #include "pointer-set.h"
80 #include "tree-chrec.h"
81 #include "tree-scalar-evolution.h"
84 #include "langhooks.h"
85 #include "tree-affine.h"
87 #include "tree-inline.h"
88 #include "tree-ssa-propagate.h"
91 static hashval_t
mbc_entry_hash (const void *);
92 static int mbc_entry_eq (const void*, const void *);
94 /* FIXME: Expressions are expanded to RTL in this pass to determine the
95 cost of different addressing modes. This should be moved to a TBD
96 interface between the GIMPLE and RTL worlds. */
100 /* The infinite cost. */
101 #define INFTY 10000000
103 #define AVG_LOOP_NITER(LOOP) 5
105 /* Returns the expected number of loop iterations for LOOP.
106 The average trip count is computed from profile data if it
109 static inline HOST_WIDE_INT
110 avg_loop_niter (struct loop
*loop
)
112 HOST_WIDE_INT niter
= estimated_stmt_executions_int (loop
);
114 return AVG_LOOP_NITER (loop
);
119 /* Representation of the induction variable. */
122 tree base
; /* Initial value of the iv. */
123 tree base_object
; /* A memory object to that the induction variable points. */
124 tree step
; /* Step of the iv (constant only). */
125 tree ssa_name
; /* The ssa name with the value. */
126 bool biv_p
; /* Is it a biv? */
127 bool have_use_for
; /* Do we already have a use for it? */
128 unsigned use_id
; /* The identifier in the use if it is the case. */
131 /* Per-ssa version information (induction variable descriptions, etc.). */
134 tree name
; /* The ssa name. */
135 struct iv
*iv
; /* Induction variable description. */
136 bool has_nonlin_use
; /* For a loop-level invariant, whether it is used in
137 an expression that is not an induction variable. */
138 bool preserve_biv
; /* For the original biv, whether to preserve it. */
139 unsigned inv_id
; /* Id of an invariant. */
145 USE_NONLINEAR_EXPR
, /* Use in a nonlinear expression. */
146 USE_ADDRESS
, /* Use in an address. */
147 USE_COMPARE
/* Use is a compare. */
150 /* Cost of a computation. */
153 int cost
; /* The runtime cost. */
154 unsigned complexity
; /* The estimate of the complexity of the code for
155 the computation (in no concrete units --
156 complexity field should be larger for more
157 complex expressions and addressing modes). */
160 static const comp_cost no_cost
= {0, 0};
161 static const comp_cost infinite_cost
= {INFTY
, INFTY
};
163 /* The candidate - cost pair. */
166 struct iv_cand
*cand
; /* The candidate. */
167 comp_cost cost
; /* The cost. */
168 bitmap depends_on
; /* The list of invariants that have to be
170 tree value
; /* For final value elimination, the expression for
171 the final value of the iv. For iv elimination,
172 the new bound to compare with. */
173 enum tree_code comp
; /* For iv elimination, the comparison. */
174 int inv_expr_id
; /* Loop invariant expression id. */
180 unsigned id
; /* The id of the use. */
181 enum use_type type
; /* Type of the use. */
182 struct iv
*iv
; /* The induction variable it is based on. */
183 gimple stmt
; /* Statement in that it occurs. */
184 tree
*op_p
; /* The place where it occurs. */
185 bitmap related_cands
; /* The set of "related" iv candidates, plus the common
188 unsigned n_map_members
; /* Number of candidates in the cost_map list. */
189 struct cost_pair
*cost_map
;
190 /* The costs wrto the iv candidates. */
192 struct iv_cand
*selected
;
193 /* The selected candidate. */
196 /* The position where the iv is computed. */
199 IP_NORMAL
, /* At the end, just before the exit condition. */
200 IP_END
, /* At the end of the latch block. */
201 IP_BEFORE_USE
, /* Immediately before a specific use. */
202 IP_AFTER_USE
, /* Immediately after a specific use. */
203 IP_ORIGINAL
/* The original biv. */
206 /* The induction variable candidate. */
209 unsigned id
; /* The number of the candidate. */
210 bool important
; /* Whether this is an "important" candidate, i.e. such
211 that it should be considered by all uses. */
212 ENUM_BITFIELD(iv_position
) pos
: 8; /* Where it is computed. */
213 gimple incremented_at
;/* For original biv, the statement where it is
215 tree var_before
; /* The variable used for it before increment. */
216 tree var_after
; /* The variable used for it after increment. */
217 struct iv
*iv
; /* The value of the candidate. NULL for
218 "pseudocandidate" used to indicate the possibility
219 to replace the final value of an iv by direct
220 computation of the value. */
221 unsigned cost
; /* Cost of the candidate. */
222 unsigned cost_step
; /* Cost of the candidate's increment operation. */
223 struct iv_use
*ainc_use
; /* For IP_{BEFORE,AFTER}_USE candidates, the place
224 where it is incremented. */
225 bitmap depends_on
; /* The list of invariants that are used in step of the
229 /* Loop invariant expression hashtable entry. */
230 struct iv_inv_expr_ent
237 /* The data used by the induction variable optimizations. */
239 typedef struct iv_use
*iv_use_p
;
241 DEF_VEC_ALLOC_P(iv_use_p
,heap
);
243 typedef struct iv_cand
*iv_cand_p
;
244 DEF_VEC_P(iv_cand_p
);
245 DEF_VEC_ALLOC_P(iv_cand_p
,heap
);
249 /* The currently optimized loop. */
250 struct loop
*current_loop
;
252 /* Numbers of iterations for all exits of the current loop. */
253 struct pointer_map_t
*niters
;
255 /* Number of registers used in it. */
258 /* The size of version_info array allocated. */
259 unsigned version_info_size
;
261 /* The array of information for the ssa names. */
262 struct version_info
*version_info
;
264 /* The hashtable of loop invariant expressions created
268 /* Loop invariant expression id. */
271 /* The bitmap of indices in version_info whose value was changed. */
274 /* The uses of induction variables. */
275 VEC(iv_use_p
,heap
) *iv_uses
;
277 /* The candidates. */
278 VEC(iv_cand_p
,heap
) *iv_candidates
;
280 /* A bitmap of important candidates. */
281 bitmap important_candidates
;
283 /* The maximum invariant id. */
286 /* Whether to consider just related and important candidates when replacing a
288 bool consider_all_candidates
;
290 /* Are we optimizing for speed? */
293 /* Whether the loop body includes any function calls. */
294 bool body_includes_call
;
296 /* Whether the loop body can only be exited via single exit. */
297 bool loop_single_exit_p
;
300 /* An assignment of iv candidates to uses. */
304 /* The number of uses covered by the assignment. */
307 /* Number of uses that cannot be expressed by the candidates in the set. */
310 /* Candidate assigned to a use, together with the related costs. */
311 struct cost_pair
**cand_for_use
;
313 /* Number of times each candidate is used. */
314 unsigned *n_cand_uses
;
316 /* The candidates used. */
319 /* The number of candidates in the set. */
322 /* Total number of registers needed. */
325 /* Total cost of expressing uses. */
326 comp_cost cand_use_cost
;
328 /* Total cost of candidates. */
331 /* Number of times each invariant is used. */
332 unsigned *n_invariant_uses
;
334 /* The array holding the number of uses of each loop
335 invariant expressions created by ivopt. */
336 unsigned *used_inv_expr
;
338 /* The number of created loop invariants. */
339 unsigned num_used_inv_expr
;
341 /* Total cost of the assignment. */
345 /* Difference of two iv candidate assignments. */
352 /* An old assignment (for rollback purposes). */
353 struct cost_pair
*old_cp
;
355 /* A new assignment. */
356 struct cost_pair
*new_cp
;
358 /* Next change in the list. */
359 struct iv_ca_delta
*next_change
;
362 /* Bound on number of candidates below that all candidates are considered. */
364 #define CONSIDER_ALL_CANDIDATES_BOUND \
365 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
367 /* If there are more iv occurrences, we just give up (it is quite unlikely that
368 optimizing such a loop would help, and it would take ages). */
370 #define MAX_CONSIDERED_USES \
371 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
373 /* If there are at most this number of ivs in the set, try removing unnecessary
374 ivs from the set always. */
376 #define ALWAYS_PRUNE_CAND_SET_BOUND \
377 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
379 /* The list of trees for that the decl_rtl field must be reset is stored
382 static VEC(tree
,heap
) *decl_rtl_to_reset
;
384 /* Cached costs for multiplies by constants, and a flag to indicate
385 when they're valid. */
386 static htab_t mult_costs
[2];
387 static bool cost_tables_exist
= false;
389 static comp_cost
force_expr_to_var_cost (tree
, bool);
391 /* Number of uses recorded in DATA. */
393 static inline unsigned
394 n_iv_uses (struct ivopts_data
*data
)
396 return VEC_length (iv_use_p
, data
->iv_uses
);
399 /* Ith use recorded in DATA. */
401 static inline struct iv_use
*
402 iv_use (struct ivopts_data
*data
, unsigned i
)
404 return VEC_index (iv_use_p
, data
->iv_uses
, i
);
407 /* Number of candidates recorded in DATA. */
409 static inline unsigned
410 n_iv_cands (struct ivopts_data
*data
)
412 return VEC_length (iv_cand_p
, data
->iv_candidates
);
415 /* Ith candidate recorded in DATA. */
417 static inline struct iv_cand
*
418 iv_cand (struct ivopts_data
*data
, unsigned i
)
420 return VEC_index (iv_cand_p
, data
->iv_candidates
, i
);
423 /* The single loop exit if it dominates the latch, NULL otherwise. */
426 single_dom_exit (struct loop
*loop
)
428 edge exit
= single_exit (loop
);
433 if (!just_once_each_iteration_p (loop
, exit
->src
))
439 /* Dumps information about the induction variable IV to FILE. */
441 extern void dump_iv (FILE *, struct iv
*);
443 dump_iv (FILE *file
, struct iv
*iv
)
447 fprintf (file
, "ssa name ");
448 print_generic_expr (file
, iv
->ssa_name
, TDF_SLIM
);
449 fprintf (file
, "\n");
452 fprintf (file
, " type ");
453 print_generic_expr (file
, TREE_TYPE (iv
->base
), TDF_SLIM
);
454 fprintf (file
, "\n");
458 fprintf (file
, " base ");
459 print_generic_expr (file
, iv
->base
, TDF_SLIM
);
460 fprintf (file
, "\n");
462 fprintf (file
, " step ");
463 print_generic_expr (file
, iv
->step
, TDF_SLIM
);
464 fprintf (file
, "\n");
468 fprintf (file
, " invariant ");
469 print_generic_expr (file
, iv
->base
, TDF_SLIM
);
470 fprintf (file
, "\n");
475 fprintf (file
, " base object ");
476 print_generic_expr (file
, iv
->base_object
, TDF_SLIM
);
477 fprintf (file
, "\n");
481 fprintf (file
, " is a biv\n");
484 /* Dumps information about the USE to FILE. */
486 extern void dump_use (FILE *, struct iv_use
*);
488 dump_use (FILE *file
, struct iv_use
*use
)
490 fprintf (file
, "use %d\n", use
->id
);
494 case USE_NONLINEAR_EXPR
:
495 fprintf (file
, " generic\n");
499 fprintf (file
, " address\n");
503 fprintf (file
, " compare\n");
510 fprintf (file
, " in statement ");
511 print_gimple_stmt (file
, use
->stmt
, 0, 0);
512 fprintf (file
, "\n");
514 fprintf (file
, " at position ");
516 print_generic_expr (file
, *use
->op_p
, TDF_SLIM
);
517 fprintf (file
, "\n");
519 dump_iv (file
, use
->iv
);
521 if (use
->related_cands
)
523 fprintf (file
, " related candidates ");
524 dump_bitmap (file
, use
->related_cands
);
528 /* Dumps information about the uses to FILE. */
530 extern void dump_uses (FILE *, struct ivopts_data
*);
532 dump_uses (FILE *file
, struct ivopts_data
*data
)
537 for (i
= 0; i
< n_iv_uses (data
); i
++)
539 use
= iv_use (data
, i
);
541 dump_use (file
, use
);
542 fprintf (file
, "\n");
546 /* Dumps information about induction variable candidate CAND to FILE. */
548 extern void dump_cand (FILE *, struct iv_cand
*);
550 dump_cand (FILE *file
, struct iv_cand
*cand
)
552 struct iv
*iv
= cand
->iv
;
554 fprintf (file
, "candidate %d%s\n",
555 cand
->id
, cand
->important
? " (important)" : "");
557 if (cand
->depends_on
)
559 fprintf (file
, " depends on ");
560 dump_bitmap (file
, cand
->depends_on
);
565 fprintf (file
, " final value replacement\n");
569 if (cand
->var_before
)
571 fprintf (file
, " var_before ");
572 print_generic_expr (file
, cand
->var_before
, TDF_SLIM
);
573 fprintf (file
, "\n");
577 fprintf (file
, " var_after ");
578 print_generic_expr (file
, cand
->var_after
, TDF_SLIM
);
579 fprintf (file
, "\n");
585 fprintf (file
, " incremented before exit test\n");
589 fprintf (file
, " incremented before use %d\n", cand
->ainc_use
->id
);
593 fprintf (file
, " incremented after use %d\n", cand
->ainc_use
->id
);
597 fprintf (file
, " incremented at end\n");
601 fprintf (file
, " original biv\n");
608 /* Returns the info for ssa version VER. */
610 static inline struct version_info
*
611 ver_info (struct ivopts_data
*data
, unsigned ver
)
613 return data
->version_info
+ ver
;
616 /* Returns the info for ssa name NAME. */
618 static inline struct version_info
*
619 name_info (struct ivopts_data
*data
, tree name
)
621 return ver_info (data
, SSA_NAME_VERSION (name
));
624 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
628 stmt_after_ip_normal_pos (struct loop
*loop
, gimple stmt
)
630 basic_block bb
= ip_normal_pos (loop
), sbb
= gimple_bb (stmt
);
634 if (sbb
== loop
->latch
)
640 return stmt
== last_stmt (bb
);
643 /* Returns true if STMT if after the place where the original induction
644 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
645 if the positions are identical. */
648 stmt_after_inc_pos (struct iv_cand
*cand
, gimple stmt
, bool true_if_equal
)
650 basic_block cand_bb
= gimple_bb (cand
->incremented_at
);
651 basic_block stmt_bb
= gimple_bb (stmt
);
653 if (!dominated_by_p (CDI_DOMINATORS
, stmt_bb
, cand_bb
))
656 if (stmt_bb
!= cand_bb
)
660 && gimple_uid (stmt
) == gimple_uid (cand
->incremented_at
))
662 return gimple_uid (stmt
) > gimple_uid (cand
->incremented_at
);
665 /* Returns true if STMT if after the place where the induction variable
666 CAND is incremented in LOOP. */
669 stmt_after_increment (struct loop
*loop
, struct iv_cand
*cand
, gimple stmt
)
677 return stmt_after_ip_normal_pos (loop
, stmt
);
681 return stmt_after_inc_pos (cand
, stmt
, false);
684 return stmt_after_inc_pos (cand
, stmt
, true);
691 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
694 abnormal_ssa_name_p (tree exp
)
699 if (TREE_CODE (exp
) != SSA_NAME
)
702 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp
) != 0;
705 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
706 abnormal phi node. Callback for for_each_index. */
709 idx_contains_abnormal_ssa_name_p (tree base
, tree
*index
,
710 void *data ATTRIBUTE_UNUSED
)
712 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
714 if (abnormal_ssa_name_p (TREE_OPERAND (base
, 2)))
716 if (abnormal_ssa_name_p (TREE_OPERAND (base
, 3)))
720 return !abnormal_ssa_name_p (*index
);
723 /* Returns true if EXPR contains a ssa name that occurs in an
724 abnormal phi node. */
727 contains_abnormal_ssa_name_p (tree expr
)
730 enum tree_code_class codeclass
;
735 code
= TREE_CODE (expr
);
736 codeclass
= TREE_CODE_CLASS (code
);
738 if (code
== SSA_NAME
)
739 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr
) != 0;
741 if (code
== INTEGER_CST
742 || is_gimple_min_invariant (expr
))
745 if (code
== ADDR_EXPR
)
746 return !for_each_index (&TREE_OPERAND (expr
, 0),
747 idx_contains_abnormal_ssa_name_p
,
750 if (code
== COND_EXPR
)
751 return contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 0))
752 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 1))
753 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 2));
759 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 1)))
764 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 0)))
776 /* Returns the structure describing number of iterations determined from
777 EXIT of DATA->current_loop, or NULL if something goes wrong. */
779 static struct tree_niter_desc
*
780 niter_for_exit (struct ivopts_data
*data
, edge exit
)
782 struct tree_niter_desc
*desc
;
787 data
->niters
= pointer_map_create ();
791 slot
= pointer_map_contains (data
->niters
, exit
);
795 /* Try to determine number of iterations. We cannot safely work with ssa
796 names that appear in phi nodes on abnormal edges, so that we do not
797 create overlapping life ranges for them (PR 27283). */
798 desc
= XNEW (struct tree_niter_desc
);
799 if (!number_of_iterations_exit (data
->current_loop
,
801 || contains_abnormal_ssa_name_p (desc
->niter
))
806 slot
= pointer_map_insert (data
->niters
, exit
);
810 desc
= (struct tree_niter_desc
*) *slot
;
815 /* Returns the structure describing number of iterations determined from
816 single dominating exit of DATA->current_loop, or NULL if something
819 static struct tree_niter_desc
*
820 niter_for_single_dom_exit (struct ivopts_data
*data
)
822 edge exit
= single_dom_exit (data
->current_loop
);
827 return niter_for_exit (data
, exit
);
830 /* Hash table equality function for expressions. */
833 htab_inv_expr_eq (const void *ent1
, const void *ent2
)
835 const struct iv_inv_expr_ent
*expr1
=
836 (const struct iv_inv_expr_ent
*)ent1
;
837 const struct iv_inv_expr_ent
*expr2
=
838 (const struct iv_inv_expr_ent
*)ent2
;
840 return expr1
->hash
== expr2
->hash
841 && operand_equal_p (expr1
->expr
, expr2
->expr
, 0);
844 /* Hash function for loop invariant expressions. */
847 htab_inv_expr_hash (const void *ent
)
849 const struct iv_inv_expr_ent
*expr
=
850 (const struct iv_inv_expr_ent
*)ent
;
854 /* Allocate data structures for the cost model. */
857 initialize_costs (void)
859 mult_costs
[0] = htab_create (100, mbc_entry_hash
, mbc_entry_eq
, free
);
860 mult_costs
[1] = htab_create (100, mbc_entry_hash
, mbc_entry_eq
, free
);
861 cost_tables_exist
= true;
864 /* Release data structures for the cost model. */
867 finalize_costs (void)
869 cost_tables_exist
= false;
870 htab_delete (mult_costs
[0]);
871 htab_delete (mult_costs
[1]);
874 /* Initializes data structures used by the iv optimization pass, stored
878 tree_ssa_iv_optimize_init (struct ivopts_data
*data
)
880 data
->version_info_size
= 2 * num_ssa_names
;
881 data
->version_info
= XCNEWVEC (struct version_info
, data
->version_info_size
);
882 data
->relevant
= BITMAP_ALLOC (NULL
);
883 data
->important_candidates
= BITMAP_ALLOC (NULL
);
884 data
->max_inv_id
= 0;
886 data
->iv_uses
= VEC_alloc (iv_use_p
, heap
, 20);
887 data
->iv_candidates
= VEC_alloc (iv_cand_p
, heap
, 20);
888 data
->inv_expr_tab
= htab_create (10, htab_inv_expr_hash
,
889 htab_inv_expr_eq
, free
);
890 data
->inv_expr_id
= 0;
891 decl_rtl_to_reset
= VEC_alloc (tree
, heap
, 20);
896 /* Returns a memory object to that EXPR points. In case we are able to
897 determine that it does not point to any such object, NULL is returned. */
900 determine_base_object (tree expr
)
902 enum tree_code code
= TREE_CODE (expr
);
905 /* If this is a pointer casted to any type, we need to determine
906 the base object for the pointer; so handle conversions before
907 throwing away non-pointer expressions. */
908 if (CONVERT_EXPR_P (expr
))
909 return determine_base_object (TREE_OPERAND (expr
, 0));
911 if (!POINTER_TYPE_P (TREE_TYPE (expr
)))
920 obj
= TREE_OPERAND (expr
, 0);
921 base
= get_base_address (obj
);
926 if (TREE_CODE (base
) == MEM_REF
)
927 return determine_base_object (TREE_OPERAND (base
, 0));
929 return fold_convert (ptr_type_node
,
930 build_fold_addr_expr (base
));
932 case POINTER_PLUS_EXPR
:
933 return determine_base_object (TREE_OPERAND (expr
, 0));
937 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
941 return fold_convert (ptr_type_node
, expr
);
945 /* Allocates an induction variable with given initial value BASE and step STEP
949 alloc_iv (tree base
, tree step
)
951 struct iv
*iv
= XCNEW (struct iv
);
952 gcc_assert (step
!= NULL_TREE
);
955 iv
->base_object
= determine_base_object (base
);
958 iv
->have_use_for
= false;
960 iv
->ssa_name
= NULL_TREE
;
965 /* Sets STEP and BASE for induction variable IV. */
968 set_iv (struct ivopts_data
*data
, tree iv
, tree base
, tree step
)
970 struct version_info
*info
= name_info (data
, iv
);
972 gcc_assert (!info
->iv
);
974 bitmap_set_bit (data
->relevant
, SSA_NAME_VERSION (iv
));
975 info
->iv
= alloc_iv (base
, step
);
976 info
->iv
->ssa_name
= iv
;
979 /* Finds induction variable declaration for VAR. */
982 get_iv (struct ivopts_data
*data
, tree var
)
985 tree type
= TREE_TYPE (var
);
987 if (!POINTER_TYPE_P (type
)
988 && !INTEGRAL_TYPE_P (type
))
991 if (!name_info (data
, var
)->iv
)
993 bb
= gimple_bb (SSA_NAME_DEF_STMT (var
));
996 || !flow_bb_inside_loop_p (data
->current_loop
, bb
))
997 set_iv (data
, var
, var
, build_int_cst (type
, 0));
1000 return name_info (data
, var
)->iv
;
1003 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
1004 not define a simple affine biv with nonzero step. */
1007 determine_biv_step (gimple phi
)
1009 struct loop
*loop
= gimple_bb (phi
)->loop_father
;
1010 tree name
= PHI_RESULT (phi
);
1013 if (!is_gimple_reg (name
))
1016 if (!simple_iv (loop
, loop
, name
, &iv
, true))
1019 return integer_zerop (iv
.step
) ? NULL_TREE
: iv
.step
;
1022 /* Finds basic ivs. */
1025 find_bivs (struct ivopts_data
*data
)
1028 tree step
, type
, base
;
1030 struct loop
*loop
= data
->current_loop
;
1031 gimple_stmt_iterator psi
;
1033 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
1035 phi
= gsi_stmt (psi
);
1037 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi
)))
1040 step
= determine_biv_step (phi
);
1044 base
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_preheader_edge (loop
));
1045 base
= expand_simple_operations (base
);
1046 if (contains_abnormal_ssa_name_p (base
)
1047 || contains_abnormal_ssa_name_p (step
))
1050 type
= TREE_TYPE (PHI_RESULT (phi
));
1051 base
= fold_convert (type
, base
);
1054 if (POINTER_TYPE_P (type
))
1055 step
= convert_to_ptrofftype (step
);
1057 step
= fold_convert (type
, step
);
1060 set_iv (data
, PHI_RESULT (phi
), base
, step
);
1067 /* Marks basic ivs. */
1070 mark_bivs (struct ivopts_data
*data
)
1074 struct iv
*iv
, *incr_iv
;
1075 struct loop
*loop
= data
->current_loop
;
1076 basic_block incr_bb
;
1077 gimple_stmt_iterator psi
;
1079 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
1081 phi
= gsi_stmt (psi
);
1083 iv
= get_iv (data
, PHI_RESULT (phi
));
1087 var
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_latch_edge (loop
));
1088 incr_iv
= get_iv (data
, var
);
1092 /* If the increment is in the subloop, ignore it. */
1093 incr_bb
= gimple_bb (SSA_NAME_DEF_STMT (var
));
1094 if (incr_bb
->loop_father
!= data
->current_loop
1095 || (incr_bb
->flags
& BB_IRREDUCIBLE_LOOP
))
1099 incr_iv
->biv_p
= true;
1103 /* Checks whether STMT defines a linear induction variable and stores its
1104 parameters to IV. */
1107 find_givs_in_stmt_scev (struct ivopts_data
*data
, gimple stmt
, affine_iv
*iv
)
1110 struct loop
*loop
= data
->current_loop
;
1112 iv
->base
= NULL_TREE
;
1113 iv
->step
= NULL_TREE
;
1115 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
1118 lhs
= gimple_assign_lhs (stmt
);
1119 if (TREE_CODE (lhs
) != SSA_NAME
)
1122 if (!simple_iv (loop
, loop_containing_stmt (stmt
), lhs
, iv
, true))
1124 iv
->base
= expand_simple_operations (iv
->base
);
1126 if (contains_abnormal_ssa_name_p (iv
->base
)
1127 || contains_abnormal_ssa_name_p (iv
->step
))
1130 /* If STMT could throw, then do not consider STMT as defining a GIV.
1131 While this will suppress optimizations, we can not safely delete this
1132 GIV and associated statements, even if it appears it is not used. */
1133 if (stmt_could_throw_p (stmt
))
1139 /* Finds general ivs in statement STMT. */
1142 find_givs_in_stmt (struct ivopts_data
*data
, gimple stmt
)
1146 if (!find_givs_in_stmt_scev (data
, stmt
, &iv
))
1149 set_iv (data
, gimple_assign_lhs (stmt
), iv
.base
, iv
.step
);
1152 /* Finds general ivs in basic block BB. */
1155 find_givs_in_bb (struct ivopts_data
*data
, basic_block bb
)
1157 gimple_stmt_iterator bsi
;
1159 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
1160 find_givs_in_stmt (data
, gsi_stmt (bsi
));
1163 /* Finds general ivs. */
1166 find_givs (struct ivopts_data
*data
)
1168 struct loop
*loop
= data
->current_loop
;
1169 basic_block
*body
= get_loop_body_in_dom_order (loop
);
1172 for (i
= 0; i
< loop
->num_nodes
; i
++)
1173 find_givs_in_bb (data
, body
[i
]);
1177 /* For each ssa name defined in LOOP determines whether it is an induction
1178 variable and if so, its initial value and step. */
1181 find_induction_variables (struct ivopts_data
*data
)
1186 if (!find_bivs (data
))
1192 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1194 struct tree_niter_desc
*niter
= niter_for_single_dom_exit (data
);
1198 fprintf (dump_file
, " number of iterations ");
1199 print_generic_expr (dump_file
, niter
->niter
, TDF_SLIM
);
1200 if (!integer_zerop (niter
->may_be_zero
))
1202 fprintf (dump_file
, "; zero if ");
1203 print_generic_expr (dump_file
, niter
->may_be_zero
, TDF_SLIM
);
1205 fprintf (dump_file
, "\n\n");
1208 fprintf (dump_file
, "Induction variables:\n\n");
1210 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
1212 if (ver_info (data
, i
)->iv
)
1213 dump_iv (dump_file
, ver_info (data
, i
)->iv
);
1220 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1222 static struct iv_use
*
1223 record_use (struct ivopts_data
*data
, tree
*use_p
, struct iv
*iv
,
1224 gimple stmt
, enum use_type use_type
)
1226 struct iv_use
*use
= XCNEW (struct iv_use
);
1228 use
->id
= n_iv_uses (data
);
1229 use
->type
= use_type
;
1233 use
->related_cands
= BITMAP_ALLOC (NULL
);
1235 /* To avoid showing ssa name in the dumps, if it was not reset by the
1237 iv
->ssa_name
= NULL_TREE
;
1239 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1240 dump_use (dump_file
, use
);
1242 VEC_safe_push (iv_use_p
, heap
, data
->iv_uses
, use
);
1247 /* Checks whether OP is a loop-level invariant and if so, records it.
1248 NONLINEAR_USE is true if the invariant is used in a way we do not
1249 handle specially. */
1252 record_invariant (struct ivopts_data
*data
, tree op
, bool nonlinear_use
)
1255 struct version_info
*info
;
1257 if (TREE_CODE (op
) != SSA_NAME
1258 || !is_gimple_reg (op
))
1261 bb
= gimple_bb (SSA_NAME_DEF_STMT (op
));
1263 && flow_bb_inside_loop_p (data
->current_loop
, bb
))
1266 info
= name_info (data
, op
);
1268 info
->has_nonlin_use
|= nonlinear_use
;
1270 info
->inv_id
= ++data
->max_inv_id
;
1271 bitmap_set_bit (data
->relevant
, SSA_NAME_VERSION (op
));
1274 /* Checks whether the use OP is interesting and if so, records it. */
1276 static struct iv_use
*
1277 find_interesting_uses_op (struct ivopts_data
*data
, tree op
)
1284 if (TREE_CODE (op
) != SSA_NAME
)
1287 iv
= get_iv (data
, op
);
1291 if (iv
->have_use_for
)
1293 use
= iv_use (data
, iv
->use_id
);
1295 gcc_assert (use
->type
== USE_NONLINEAR_EXPR
);
1299 if (integer_zerop (iv
->step
))
1301 record_invariant (data
, op
, true);
1304 iv
->have_use_for
= true;
1306 civ
= XNEW (struct iv
);
1309 stmt
= SSA_NAME_DEF_STMT (op
);
1310 gcc_assert (gimple_code (stmt
) == GIMPLE_PHI
1311 || is_gimple_assign (stmt
));
1313 use
= record_use (data
, NULL
, civ
, stmt
, USE_NONLINEAR_EXPR
);
1314 iv
->use_id
= use
->id
;
1319 /* Given a condition in statement STMT, checks whether it is a compare
1320 of an induction variable and an invariant. If this is the case,
1321 CONTROL_VAR is set to location of the iv, BOUND to the location of
1322 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1323 induction variable descriptions, and true is returned. If this is not
1324 the case, CONTROL_VAR and BOUND are set to the arguments of the
1325 condition and false is returned. */
1328 extract_cond_operands (struct ivopts_data
*data
, gimple stmt
,
1329 tree
**control_var
, tree
**bound
,
1330 struct iv
**iv_var
, struct iv
**iv_bound
)
1332 /* The objects returned when COND has constant operands. */
1333 static struct iv const_iv
;
1335 tree
*op0
= &zero
, *op1
= &zero
, *tmp_op
;
1336 struct iv
*iv0
= &const_iv
, *iv1
= &const_iv
, *tmp_iv
;
1339 if (gimple_code (stmt
) == GIMPLE_COND
)
1341 op0
= gimple_cond_lhs_ptr (stmt
);
1342 op1
= gimple_cond_rhs_ptr (stmt
);
1346 op0
= gimple_assign_rhs1_ptr (stmt
);
1347 op1
= gimple_assign_rhs2_ptr (stmt
);
1350 zero
= integer_zero_node
;
1351 const_iv
.step
= integer_zero_node
;
1353 if (TREE_CODE (*op0
) == SSA_NAME
)
1354 iv0
= get_iv (data
, *op0
);
1355 if (TREE_CODE (*op1
) == SSA_NAME
)
1356 iv1
= get_iv (data
, *op1
);
1358 /* Exactly one of the compared values must be an iv, and the other one must
1363 if (integer_zerop (iv0
->step
))
1365 /* Control variable may be on the other side. */
1366 tmp_op
= op0
; op0
= op1
; op1
= tmp_op
;
1367 tmp_iv
= iv0
; iv0
= iv1
; iv1
= tmp_iv
;
1369 ret
= !integer_zerop (iv0
->step
) && integer_zerop (iv1
->step
);
1373 *control_var
= op0
;;
1384 /* Checks whether the condition in STMT is interesting and if so,
1388 find_interesting_uses_cond (struct ivopts_data
*data
, gimple stmt
)
1390 tree
*var_p
, *bound_p
;
1391 struct iv
*var_iv
, *civ
;
1393 if (!extract_cond_operands (data
, stmt
, &var_p
, &bound_p
, &var_iv
, NULL
))
1395 find_interesting_uses_op (data
, *var_p
);
1396 find_interesting_uses_op (data
, *bound_p
);
1400 civ
= XNEW (struct iv
);
1402 record_use (data
, NULL
, civ
, stmt
, USE_COMPARE
);
1405 /* Returns true if expression EXPR is obviously invariant in LOOP,
1406 i.e. if all its operands are defined outside of the LOOP. LOOP
1407 should not be the function body. */
1410 expr_invariant_in_loop_p (struct loop
*loop
, tree expr
)
1415 gcc_assert (loop_depth (loop
) > 0);
1417 if (is_gimple_min_invariant (expr
))
1420 if (TREE_CODE (expr
) == SSA_NAME
)
1422 def_bb
= gimple_bb (SSA_NAME_DEF_STMT (expr
));
1424 && flow_bb_inside_loop_p (loop
, def_bb
))
1433 len
= TREE_OPERAND_LENGTH (expr
);
1434 for (i
= 0; i
< len
; i
++)
1435 if (TREE_OPERAND (expr
, i
)
1436 && !expr_invariant_in_loop_p (loop
, TREE_OPERAND (expr
, i
)))
1442 /* Returns true if statement STMT is obviously invariant in LOOP,
1443 i.e. if all its operands on the RHS are defined outside of the LOOP.
1444 LOOP should not be the function body. */
1447 stmt_invariant_in_loop_p (struct loop
*loop
, gimple stmt
)
1452 gcc_assert (loop_depth (loop
) > 0);
1454 lhs
= gimple_get_lhs (stmt
);
1455 for (i
= 0; i
< gimple_num_ops (stmt
); i
++)
1457 tree op
= gimple_op (stmt
, i
);
1458 if (op
!= lhs
&& !expr_invariant_in_loop_p (loop
, op
))
1465 /* Cumulates the steps of indices into DATA and replaces their values with the
1466 initial ones. Returns false when the value of the index cannot be determined.
1467 Callback for for_each_index. */
1469 struct ifs_ivopts_data
1471 struct ivopts_data
*ivopts_data
;
1477 idx_find_step (tree base
, tree
*idx
, void *data
)
1479 struct ifs_ivopts_data
*dta
= (struct ifs_ivopts_data
*) data
;
1481 tree step
, iv_base
, iv_step
, lbound
, off
;
1482 struct loop
*loop
= dta
->ivopts_data
->current_loop
;
1484 /* If base is a component ref, require that the offset of the reference
1486 if (TREE_CODE (base
) == COMPONENT_REF
)
1488 off
= component_ref_field_offset (base
);
1489 return expr_invariant_in_loop_p (loop
, off
);
1492 /* If base is array, first check whether we will be able to move the
1493 reference out of the loop (in order to take its address in strength
1494 reduction). In order for this to work we need both lower bound
1495 and step to be loop invariants. */
1496 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
1498 /* Moreover, for a range, the size needs to be invariant as well. */
1499 if (TREE_CODE (base
) == ARRAY_RANGE_REF
1500 && !expr_invariant_in_loop_p (loop
, TYPE_SIZE (TREE_TYPE (base
))))
1503 step
= array_ref_element_size (base
);
1504 lbound
= array_ref_low_bound (base
);
1506 if (!expr_invariant_in_loop_p (loop
, step
)
1507 || !expr_invariant_in_loop_p (loop
, lbound
))
1511 if (TREE_CODE (*idx
) != SSA_NAME
)
1514 iv
= get_iv (dta
->ivopts_data
, *idx
);
1518 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1519 *&x[0], which is not folded and does not trigger the
1520 ARRAY_REF path below. */
1523 if (integer_zerop (iv
->step
))
1526 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
1528 step
= array_ref_element_size (base
);
1530 /* We only handle addresses whose step is an integer constant. */
1531 if (TREE_CODE (step
) != INTEGER_CST
)
1535 /* The step for pointer arithmetics already is 1 byte. */
1536 step
= size_one_node
;
1540 if (!convert_affine_scev (dta
->ivopts_data
->current_loop
,
1541 sizetype
, &iv_base
, &iv_step
, dta
->stmt
,
1544 /* The index might wrap. */
1548 step
= fold_build2 (MULT_EXPR
, sizetype
, step
, iv_step
);
1549 dta
->step
= fold_build2 (PLUS_EXPR
, sizetype
, dta
->step
, step
);
1554 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1555 object is passed to it in DATA. */
1558 idx_record_use (tree base
, tree
*idx
,
1561 struct ivopts_data
*data
= (struct ivopts_data
*) vdata
;
1562 find_interesting_uses_op (data
, *idx
);
1563 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
1565 find_interesting_uses_op (data
, array_ref_element_size (base
));
1566 find_interesting_uses_op (data
, array_ref_low_bound (base
));
1571 /* If we can prove that TOP = cst * BOT for some constant cst,
1572 store cst to MUL and return true. Otherwise return false.
1573 The returned value is always sign-extended, regardless of the
1574 signedness of TOP and BOT. */
1577 constant_multiple_of (tree top
, tree bot
, double_int
*mul
)
1580 enum tree_code code
;
1581 double_int res
, p0
, p1
;
1582 unsigned precision
= TYPE_PRECISION (TREE_TYPE (top
));
1587 if (operand_equal_p (top
, bot
, 0))
1589 *mul
= double_int_one
;
1593 code
= TREE_CODE (top
);
1597 mby
= TREE_OPERAND (top
, 1);
1598 if (TREE_CODE (mby
) != INTEGER_CST
)
1601 if (!constant_multiple_of (TREE_OPERAND (top
, 0), bot
, &res
))
1604 *mul
= double_int_sext (double_int_mul (res
, tree_to_double_int (mby
)),
1610 if (!constant_multiple_of (TREE_OPERAND (top
, 0), bot
, &p0
)
1611 || !constant_multiple_of (TREE_OPERAND (top
, 1), bot
, &p1
))
1614 if (code
== MINUS_EXPR
)
1615 p1
= double_int_neg (p1
);
1616 *mul
= double_int_sext (double_int_add (p0
, p1
), precision
);
1620 if (TREE_CODE (bot
) != INTEGER_CST
)
1623 p0
= double_int_sext (tree_to_double_int (top
), precision
);
1624 p1
= double_int_sext (tree_to_double_int (bot
), precision
);
1625 if (double_int_zero_p (p1
))
1627 *mul
= double_int_sext (double_int_sdivmod (p0
, p1
, FLOOR_DIV_EXPR
, &res
),
1629 return double_int_zero_p (res
);
1636 /* Returns true if memory reference REF with step STEP may be unaligned. */
1639 may_be_unaligned_p (tree ref
, tree step
)
1643 HOST_WIDE_INT bitsize
;
1644 HOST_WIDE_INT bitpos
;
1646 enum machine_mode mode
;
1647 int unsignedp
, volatilep
;
1648 unsigned base_align
;
1650 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1651 thus they are not misaligned. */
1652 if (TREE_CODE (ref
) == TARGET_MEM_REF
)
1655 /* The test below is basically copy of what expr.c:normal_inner_ref
1656 does to check whether the object must be loaded by parts when
1657 STRICT_ALIGNMENT is true. */
1658 base
= get_inner_reference (ref
, &bitsize
, &bitpos
, &toffset
, &mode
,
1659 &unsignedp
, &volatilep
, true);
1660 base_type
= TREE_TYPE (base
);
1661 base_align
= get_object_alignment (base
);
1662 base_align
= MAX (base_align
, TYPE_ALIGN (base_type
));
1664 if (mode
!= BLKmode
)
1666 unsigned mode_align
= GET_MODE_ALIGNMENT (mode
);
1668 if (base_align
< mode_align
1669 || (bitpos
% mode_align
) != 0
1670 || (bitpos
% BITS_PER_UNIT
) != 0)
1674 && (highest_pow2_factor (toffset
) * BITS_PER_UNIT
) < mode_align
)
1677 if ((highest_pow2_factor (step
) * BITS_PER_UNIT
) < mode_align
)
1684 /* Return true if EXPR may be non-addressable. */
1687 may_be_nonaddressable_p (tree expr
)
1689 switch (TREE_CODE (expr
))
1691 case TARGET_MEM_REF
:
1692 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1693 target, thus they are always addressable. */
1697 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr
, 1))
1698 || may_be_nonaddressable_p (TREE_OPERAND (expr
, 0));
1700 case VIEW_CONVERT_EXPR
:
1701 /* This kind of view-conversions may wrap non-addressable objects
1702 and make them look addressable. After some processing the
1703 non-addressability may be uncovered again, causing ADDR_EXPRs
1704 of inappropriate objects to be built. */
1705 if (is_gimple_reg (TREE_OPERAND (expr
, 0))
1706 || !is_gimple_addressable (TREE_OPERAND (expr
, 0)))
1709 /* ... fall through ... */
1712 case ARRAY_RANGE_REF
:
1713 return may_be_nonaddressable_p (TREE_OPERAND (expr
, 0));
1725 /* Finds addresses in *OP_P inside STMT. */
1728 find_interesting_uses_address (struct ivopts_data
*data
, gimple stmt
, tree
*op_p
)
1730 tree base
= *op_p
, step
= size_zero_node
;
1732 struct ifs_ivopts_data ifs_ivopts_data
;
1734 /* Do not play with volatile memory references. A bit too conservative,
1735 perhaps, but safe. */
1736 if (gimple_has_volatile_ops (stmt
))
1739 /* Ignore bitfields for now. Not really something terribly complicated
1741 if (TREE_CODE (base
) == BIT_FIELD_REF
)
1744 base
= unshare_expr (base
);
1746 if (TREE_CODE (base
) == TARGET_MEM_REF
)
1748 tree type
= build_pointer_type (TREE_TYPE (base
));
1752 && TREE_CODE (TMR_BASE (base
)) == SSA_NAME
)
1754 civ
= get_iv (data
, TMR_BASE (base
));
1758 TMR_BASE (base
) = civ
->base
;
1761 if (TMR_INDEX2 (base
)
1762 && TREE_CODE (TMR_INDEX2 (base
)) == SSA_NAME
)
1764 civ
= get_iv (data
, TMR_INDEX2 (base
));
1768 TMR_INDEX2 (base
) = civ
->base
;
1771 if (TMR_INDEX (base
)
1772 && TREE_CODE (TMR_INDEX (base
)) == SSA_NAME
)
1774 civ
= get_iv (data
, TMR_INDEX (base
));
1778 TMR_INDEX (base
) = civ
->base
;
1783 if (TMR_STEP (base
))
1784 astep
= fold_build2 (MULT_EXPR
, type
, TMR_STEP (base
), astep
);
1786 step
= fold_build2 (PLUS_EXPR
, type
, step
, astep
);
1790 if (integer_zerop (step
))
1792 base
= tree_mem_ref_addr (type
, base
);
1796 ifs_ivopts_data
.ivopts_data
= data
;
1797 ifs_ivopts_data
.stmt
= stmt
;
1798 ifs_ivopts_data
.step
= size_zero_node
;
1799 if (!for_each_index (&base
, idx_find_step
, &ifs_ivopts_data
)
1800 || integer_zerop (ifs_ivopts_data
.step
))
1802 step
= ifs_ivopts_data
.step
;
1804 /* Check that the base expression is addressable. This needs
1805 to be done after substituting bases of IVs into it. */
1806 if (may_be_nonaddressable_p (base
))
1809 /* Moreover, on strict alignment platforms, check that it is
1810 sufficiently aligned. */
1811 if (STRICT_ALIGNMENT
&& may_be_unaligned_p (base
, step
))
1814 base
= build_fold_addr_expr (base
);
1816 /* Substituting bases of IVs into the base expression might
1817 have caused folding opportunities. */
1818 if (TREE_CODE (base
) == ADDR_EXPR
)
1820 tree
*ref
= &TREE_OPERAND (base
, 0);
1821 while (handled_component_p (*ref
))
1822 ref
= &TREE_OPERAND (*ref
, 0);
1823 if (TREE_CODE (*ref
) == MEM_REF
)
1825 tree tem
= fold_binary (MEM_REF
, TREE_TYPE (*ref
),
1826 TREE_OPERAND (*ref
, 0),
1827 TREE_OPERAND (*ref
, 1));
1834 civ
= alloc_iv (base
, step
);
1835 record_use (data
, op_p
, civ
, stmt
, USE_ADDRESS
);
1839 for_each_index (op_p
, idx_record_use
, data
);
1842 /* Finds and records invariants used in STMT. */
1845 find_invariants_stmt (struct ivopts_data
*data
, gimple stmt
)
1848 use_operand_p use_p
;
1851 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
1853 op
= USE_FROM_PTR (use_p
);
1854 record_invariant (data
, op
, false);
1858 /* Finds interesting uses of induction variables in the statement STMT. */
1861 find_interesting_uses_stmt (struct ivopts_data
*data
, gimple stmt
)
1864 tree op
, *lhs
, *rhs
;
1866 use_operand_p use_p
;
1867 enum tree_code code
;
1869 find_invariants_stmt (data
, stmt
);
1871 if (gimple_code (stmt
) == GIMPLE_COND
)
1873 find_interesting_uses_cond (data
, stmt
);
1877 if (is_gimple_assign (stmt
))
1879 lhs
= gimple_assign_lhs_ptr (stmt
);
1880 rhs
= gimple_assign_rhs1_ptr (stmt
);
1882 if (TREE_CODE (*lhs
) == SSA_NAME
)
1884 /* If the statement defines an induction variable, the uses are not
1885 interesting by themselves. */
1887 iv
= get_iv (data
, *lhs
);
1889 if (iv
&& !integer_zerop (iv
->step
))
1893 code
= gimple_assign_rhs_code (stmt
);
1894 if (get_gimple_rhs_class (code
) == GIMPLE_SINGLE_RHS
1895 && (REFERENCE_CLASS_P (*rhs
)
1896 || is_gimple_val (*rhs
)))
1898 if (REFERENCE_CLASS_P (*rhs
))
1899 find_interesting_uses_address (data
, stmt
, rhs
);
1901 find_interesting_uses_op (data
, *rhs
);
1903 if (REFERENCE_CLASS_P (*lhs
))
1904 find_interesting_uses_address (data
, stmt
, lhs
);
1907 else if (TREE_CODE_CLASS (code
) == tcc_comparison
)
1909 find_interesting_uses_cond (data
, stmt
);
1913 /* TODO -- we should also handle address uses of type
1915 memory = call (whatever);
1922 if (gimple_code (stmt
) == GIMPLE_PHI
1923 && gimple_bb (stmt
) == data
->current_loop
->header
)
1925 iv
= get_iv (data
, PHI_RESULT (stmt
));
1927 if (iv
&& !integer_zerop (iv
->step
))
1931 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
1933 op
= USE_FROM_PTR (use_p
);
1935 if (TREE_CODE (op
) != SSA_NAME
)
1938 iv
= get_iv (data
, op
);
1942 find_interesting_uses_op (data
, op
);
1946 /* Finds interesting uses of induction variables outside of loops
1947 on loop exit edge EXIT. */
1950 find_interesting_uses_outside (struct ivopts_data
*data
, edge exit
)
1953 gimple_stmt_iterator psi
;
1956 for (psi
= gsi_start_phis (exit
->dest
); !gsi_end_p (psi
); gsi_next (&psi
))
1958 phi
= gsi_stmt (psi
);
1959 def
= PHI_ARG_DEF_FROM_EDGE (phi
, exit
);
1960 if (is_gimple_reg (def
))
1961 find_interesting_uses_op (data
, def
);
1965 /* Finds uses of the induction variables that are interesting. */
1968 find_interesting_uses (struct ivopts_data
*data
)
1971 gimple_stmt_iterator bsi
;
1972 basic_block
*body
= get_loop_body (data
->current_loop
);
1974 struct version_info
*info
;
1977 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1978 fprintf (dump_file
, "Uses:\n\n");
1980 for (i
= 0; i
< data
->current_loop
->num_nodes
; i
++)
1985 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
1986 if (e
->dest
!= EXIT_BLOCK_PTR
1987 && !flow_bb_inside_loop_p (data
->current_loop
, e
->dest
))
1988 find_interesting_uses_outside (data
, e
);
1990 for (bsi
= gsi_start_phis (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
1991 find_interesting_uses_stmt (data
, gsi_stmt (bsi
));
1992 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
1993 if (!is_gimple_debug (gsi_stmt (bsi
)))
1994 find_interesting_uses_stmt (data
, gsi_stmt (bsi
));
1997 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2001 fprintf (dump_file
, "\n");
2003 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
2005 info
= ver_info (data
, i
);
2008 fprintf (dump_file
, " ");
2009 print_generic_expr (dump_file
, info
->name
, TDF_SLIM
);
2010 fprintf (dump_file
, " is invariant (%d)%s\n",
2011 info
->inv_id
, info
->has_nonlin_use
? "" : ", eliminable");
2015 fprintf (dump_file
, "\n");
2021 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
2022 is true, assume we are inside an address. If TOP_COMPREF is true, assume
2023 we are at the top-level of the processed address. */
2026 strip_offset_1 (tree expr
, bool inside_addr
, bool top_compref
,
2027 unsigned HOST_WIDE_INT
*offset
)
2029 tree op0
= NULL_TREE
, op1
= NULL_TREE
, tmp
, step
;
2030 enum tree_code code
;
2031 tree type
, orig_type
= TREE_TYPE (expr
);
2032 unsigned HOST_WIDE_INT off0
, off1
, st
;
2033 tree orig_expr
= expr
;
2037 type
= TREE_TYPE (expr
);
2038 code
= TREE_CODE (expr
);
2044 if (!cst_and_fits_in_hwi (expr
)
2045 || integer_zerop (expr
))
2048 *offset
= int_cst_value (expr
);
2049 return build_int_cst (orig_type
, 0);
2051 case POINTER_PLUS_EXPR
:
2054 op0
= TREE_OPERAND (expr
, 0);
2055 op1
= TREE_OPERAND (expr
, 1);
2057 op0
= strip_offset_1 (op0
, false, false, &off0
);
2058 op1
= strip_offset_1 (op1
, false, false, &off1
);
2060 *offset
= (code
== MINUS_EXPR
? off0
- off1
: off0
+ off1
);
2061 if (op0
== TREE_OPERAND (expr
, 0)
2062 && op1
== TREE_OPERAND (expr
, 1))
2065 if (integer_zerop (op1
))
2067 else if (integer_zerop (op0
))
2069 if (code
== MINUS_EXPR
)
2070 expr
= fold_build1 (NEGATE_EXPR
, type
, op1
);
2075 expr
= fold_build2 (code
, type
, op0
, op1
);
2077 return fold_convert (orig_type
, expr
);
2080 op1
= TREE_OPERAND (expr
, 1);
2081 if (!cst_and_fits_in_hwi (op1
))
2084 op0
= TREE_OPERAND (expr
, 0);
2085 op0
= strip_offset_1 (op0
, false, false, &off0
);
2086 if (op0
== TREE_OPERAND (expr
, 0))
2089 *offset
= off0
* int_cst_value (op1
);
2090 if (integer_zerop (op0
))
2093 expr
= fold_build2 (MULT_EXPR
, type
, op0
, op1
);
2095 return fold_convert (orig_type
, expr
);
2098 case ARRAY_RANGE_REF
:
2102 step
= array_ref_element_size (expr
);
2103 if (!cst_and_fits_in_hwi (step
))
2106 st
= int_cst_value (step
);
2107 op1
= TREE_OPERAND (expr
, 1);
2108 op1
= strip_offset_1 (op1
, false, false, &off1
);
2109 *offset
= off1
* st
;
2112 && integer_zerop (op1
))
2114 /* Strip the component reference completely. */
2115 op0
= TREE_OPERAND (expr
, 0);
2116 op0
= strip_offset_1 (op0
, inside_addr
, top_compref
, &off0
);
2126 tmp
= component_ref_field_offset (expr
);
2128 && cst_and_fits_in_hwi (tmp
))
2130 /* Strip the component reference completely. */
2131 op0
= TREE_OPERAND (expr
, 0);
2132 op0
= strip_offset_1 (op0
, inside_addr
, top_compref
, &off0
);
2133 *offset
= off0
+ int_cst_value (tmp
);
2139 op0
= TREE_OPERAND (expr
, 0);
2140 op0
= strip_offset_1 (op0
, true, true, &off0
);
2143 if (op0
== TREE_OPERAND (expr
, 0))
2146 expr
= build_fold_addr_expr (op0
);
2147 return fold_convert (orig_type
, expr
);
2150 /* ??? Offset operand? */
2151 inside_addr
= false;
2158 /* Default handling of expressions for that we want to recurse into
2159 the first operand. */
2160 op0
= TREE_OPERAND (expr
, 0);
2161 op0
= strip_offset_1 (op0
, inside_addr
, false, &off0
);
2164 if (op0
== TREE_OPERAND (expr
, 0)
2165 && (!op1
|| op1
== TREE_OPERAND (expr
, 1)))
2168 expr
= copy_node (expr
);
2169 TREE_OPERAND (expr
, 0) = op0
;
2171 TREE_OPERAND (expr
, 1) = op1
;
2173 /* Inside address, we might strip the top level component references,
2174 thus changing type of the expression. Handling of ADDR_EXPR
2176 expr
= fold_convert (orig_type
, expr
);
2181 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2184 strip_offset (tree expr
, unsigned HOST_WIDE_INT
*offset
)
2186 return strip_offset_1 (expr
, false, false, offset
);
2189 /* Returns variant of TYPE that can be used as base for different uses.
2190 We return unsigned type with the same precision, which avoids problems
2194 generic_type_for (tree type
)
2196 if (POINTER_TYPE_P (type
))
2197 return unsigned_type_for (type
);
2199 if (TYPE_UNSIGNED (type
))
2202 return unsigned_type_for (type
);
2205 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2206 the bitmap to that we should store it. */
2208 static struct ivopts_data
*fd_ivopts_data
;
2210 find_depends (tree
*expr_p
, int *ws ATTRIBUTE_UNUSED
, void *data
)
2212 bitmap
*depends_on
= (bitmap
*) data
;
2213 struct version_info
*info
;
2215 if (TREE_CODE (*expr_p
) != SSA_NAME
)
2217 info
= name_info (fd_ivopts_data
, *expr_p
);
2219 if (!info
->inv_id
|| info
->has_nonlin_use
)
2223 *depends_on
= BITMAP_ALLOC (NULL
);
2224 bitmap_set_bit (*depends_on
, info
->inv_id
);
2229 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2230 position to POS. If USE is not NULL, the candidate is set as related to
2231 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2232 replacement of the final value of the iv by a direct computation. */
2234 static struct iv_cand
*
2235 add_candidate_1 (struct ivopts_data
*data
,
2236 tree base
, tree step
, bool important
, enum iv_position pos
,
2237 struct iv_use
*use
, gimple incremented_at
)
2240 struct iv_cand
*cand
= NULL
;
2241 tree type
, orig_type
;
2243 /* For non-original variables, make sure their values are computed in a type
2244 that does not invoke undefined behavior on overflows (since in general,
2245 we cannot prove that these induction variables are non-wrapping). */
2246 if (pos
!= IP_ORIGINAL
)
2248 orig_type
= TREE_TYPE (base
);
2249 type
= generic_type_for (orig_type
);
2250 if (type
!= orig_type
)
2252 base
= fold_convert (type
, base
);
2253 step
= fold_convert (type
, step
);
2257 for (i
= 0; i
< n_iv_cands (data
); i
++)
2259 cand
= iv_cand (data
, i
);
2261 if (cand
->pos
!= pos
)
2264 if (cand
->incremented_at
!= incremented_at
2265 || ((pos
== IP_AFTER_USE
|| pos
== IP_BEFORE_USE
)
2266 && cand
->ainc_use
!= use
))
2280 if (operand_equal_p (base
, cand
->iv
->base
, 0)
2281 && operand_equal_p (step
, cand
->iv
->step
, 0)
2282 && (TYPE_PRECISION (TREE_TYPE (base
))
2283 == TYPE_PRECISION (TREE_TYPE (cand
->iv
->base
))))
2287 if (i
== n_iv_cands (data
))
2289 cand
= XCNEW (struct iv_cand
);
2295 cand
->iv
= alloc_iv (base
, step
);
2298 if (pos
!= IP_ORIGINAL
&& cand
->iv
)
2300 cand
->var_before
= create_tmp_var_raw (TREE_TYPE (base
), "ivtmp");
2301 cand
->var_after
= cand
->var_before
;
2303 cand
->important
= important
;
2304 cand
->incremented_at
= incremented_at
;
2305 VEC_safe_push (iv_cand_p
, heap
, data
->iv_candidates
, cand
);
2308 && TREE_CODE (step
) != INTEGER_CST
)
2310 fd_ivopts_data
= data
;
2311 walk_tree (&step
, find_depends
, &cand
->depends_on
, NULL
);
2314 if (pos
== IP_AFTER_USE
|| pos
== IP_BEFORE_USE
)
2315 cand
->ainc_use
= use
;
2317 cand
->ainc_use
= NULL
;
2319 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2320 dump_cand (dump_file
, cand
);
2323 if (important
&& !cand
->important
)
2325 cand
->important
= true;
2326 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2327 fprintf (dump_file
, "Candidate %d is important\n", cand
->id
);
2332 bitmap_set_bit (use
->related_cands
, i
);
2333 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2334 fprintf (dump_file
, "Candidate %d is related to use %d\n",
2341 /* Returns true if incrementing the induction variable at the end of the LOOP
2344 The purpose is to avoid splitting latch edge with a biv increment, thus
2345 creating a jump, possibly confusing other optimization passes and leaving
2346 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2347 is not available (so we do not have a better alternative), or if the latch
2348 edge is already nonempty. */
2351 allow_ip_end_pos_p (struct loop
*loop
)
2353 if (!ip_normal_pos (loop
))
2356 if (!empty_block_p (ip_end_pos (loop
)))
2362 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2363 Important field is set to IMPORTANT. */
2366 add_autoinc_candidates (struct ivopts_data
*data
, tree base
, tree step
,
2367 bool important
, struct iv_use
*use
)
2369 basic_block use_bb
= gimple_bb (use
->stmt
);
2370 enum machine_mode mem_mode
;
2371 unsigned HOST_WIDE_INT cstepi
;
2373 /* If we insert the increment in any position other than the standard
2374 ones, we must ensure that it is incremented once per iteration.
2375 It must not be in an inner nested loop, or one side of an if
2377 if (use_bb
->loop_father
!= data
->current_loop
2378 || !dominated_by_p (CDI_DOMINATORS
, data
->current_loop
->latch
, use_bb
)
2379 || stmt_could_throw_p (use
->stmt
)
2380 || !cst_and_fits_in_hwi (step
))
2383 cstepi
= int_cst_value (step
);
2385 mem_mode
= TYPE_MODE (TREE_TYPE (*use
->op_p
));
2386 if (((USE_LOAD_PRE_INCREMENT (mem_mode
)
2387 || USE_STORE_PRE_INCREMENT (mem_mode
))
2388 && GET_MODE_SIZE (mem_mode
) == cstepi
)
2389 || ((USE_LOAD_PRE_DECREMENT (mem_mode
)
2390 || USE_STORE_PRE_DECREMENT (mem_mode
))
2391 && GET_MODE_SIZE (mem_mode
) == -cstepi
))
2393 enum tree_code code
= MINUS_EXPR
;
2395 tree new_step
= step
;
2397 if (POINTER_TYPE_P (TREE_TYPE (base
)))
2399 new_step
= fold_build1 (NEGATE_EXPR
, TREE_TYPE (step
), step
);
2400 code
= POINTER_PLUS_EXPR
;
2403 new_step
= fold_convert (TREE_TYPE (base
), new_step
);
2404 new_base
= fold_build2 (code
, TREE_TYPE (base
), base
, new_step
);
2405 add_candidate_1 (data
, new_base
, step
, important
, IP_BEFORE_USE
, use
,
2408 if (((USE_LOAD_POST_INCREMENT (mem_mode
)
2409 || USE_STORE_POST_INCREMENT (mem_mode
))
2410 && GET_MODE_SIZE (mem_mode
) == cstepi
)
2411 || ((USE_LOAD_POST_DECREMENT (mem_mode
)
2412 || USE_STORE_POST_DECREMENT (mem_mode
))
2413 && GET_MODE_SIZE (mem_mode
) == -cstepi
))
2415 add_candidate_1 (data
, base
, step
, important
, IP_AFTER_USE
, use
,
2420 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2421 position to POS. If USE is not NULL, the candidate is set as related to
2422 it. The candidate computation is scheduled on all available positions. */
2425 add_candidate (struct ivopts_data
*data
,
2426 tree base
, tree step
, bool important
, struct iv_use
*use
)
2428 if (ip_normal_pos (data
->current_loop
))
2429 add_candidate_1 (data
, base
, step
, important
, IP_NORMAL
, use
, NULL
);
2430 if (ip_end_pos (data
->current_loop
)
2431 && allow_ip_end_pos_p (data
->current_loop
))
2432 add_candidate_1 (data
, base
, step
, important
, IP_END
, use
, NULL
);
2434 if (use
!= NULL
&& use
->type
== USE_ADDRESS
)
2435 add_autoinc_candidates (data
, base
, step
, important
, use
);
2438 /* Adds standard iv candidates. */
2441 add_standard_iv_candidates (struct ivopts_data
*data
)
2443 add_candidate (data
, integer_zero_node
, integer_one_node
, true, NULL
);
2445 /* The same for a double-integer type if it is still fast enough. */
2447 (long_integer_type_node
) > TYPE_PRECISION (integer_type_node
)
2448 && TYPE_PRECISION (long_integer_type_node
) <= BITS_PER_WORD
)
2449 add_candidate (data
, build_int_cst (long_integer_type_node
, 0),
2450 build_int_cst (long_integer_type_node
, 1), true, NULL
);
2452 /* The same for a double-integer type if it is still fast enough. */
2454 (long_long_integer_type_node
) > TYPE_PRECISION (long_integer_type_node
)
2455 && TYPE_PRECISION (long_long_integer_type_node
) <= BITS_PER_WORD
)
2456 add_candidate (data
, build_int_cst (long_long_integer_type_node
, 0),
2457 build_int_cst (long_long_integer_type_node
, 1), true, NULL
);
2461 /* Adds candidates bases on the old induction variable IV. */
2464 add_old_iv_candidates (struct ivopts_data
*data
, struct iv
*iv
)
2468 struct iv_cand
*cand
;
2470 add_candidate (data
, iv
->base
, iv
->step
, true, NULL
);
2472 /* The same, but with initial value zero. */
2473 if (POINTER_TYPE_P (TREE_TYPE (iv
->base
)))
2474 add_candidate (data
, size_int (0), iv
->step
, true, NULL
);
2476 add_candidate (data
, build_int_cst (TREE_TYPE (iv
->base
), 0),
2477 iv
->step
, true, NULL
);
2479 phi
= SSA_NAME_DEF_STMT (iv
->ssa_name
);
2480 if (gimple_code (phi
) == GIMPLE_PHI
)
2482 /* Additionally record the possibility of leaving the original iv
2484 def
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_latch_edge (data
->current_loop
));
2485 cand
= add_candidate_1 (data
,
2486 iv
->base
, iv
->step
, true, IP_ORIGINAL
, NULL
,
2487 SSA_NAME_DEF_STMT (def
));
2488 cand
->var_before
= iv
->ssa_name
;
2489 cand
->var_after
= def
;
2493 /* Adds candidates based on the old induction variables. */
2496 add_old_ivs_candidates (struct ivopts_data
*data
)
2502 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
2504 iv
= ver_info (data
, i
)->iv
;
2505 if (iv
&& iv
->biv_p
&& !integer_zerop (iv
->step
))
2506 add_old_iv_candidates (data
, iv
);
2510 /* Adds candidates based on the value of the induction variable IV and USE. */
2513 add_iv_value_candidates (struct ivopts_data
*data
,
2514 struct iv
*iv
, struct iv_use
*use
)
2516 unsigned HOST_WIDE_INT offset
;
2520 add_candidate (data
, iv
->base
, iv
->step
, false, use
);
2522 /* The same, but with initial value zero. Make such variable important,
2523 since it is generic enough so that possibly many uses may be based
2525 basetype
= TREE_TYPE (iv
->base
);
2526 if (POINTER_TYPE_P (basetype
))
2527 basetype
= sizetype
;
2528 add_candidate (data
, build_int_cst (basetype
, 0),
2529 iv
->step
, true, use
);
2531 /* Third, try removing the constant offset. Make sure to even
2532 add a candidate for &a[0] vs. (T *)&a. */
2533 base
= strip_offset (iv
->base
, &offset
);
2535 || base
!= iv
->base
)
2536 add_candidate (data
, base
, iv
->step
, false, use
);
2539 /* Adds candidates based on the uses. */
2542 add_derived_ivs_candidates (struct ivopts_data
*data
)
2546 for (i
= 0; i
< n_iv_uses (data
); i
++)
2548 struct iv_use
*use
= iv_use (data
, i
);
2555 case USE_NONLINEAR_EXPR
:
2558 /* Just add the ivs based on the value of the iv used here. */
2559 add_iv_value_candidates (data
, use
->iv
, use
);
2568 /* Record important candidates and add them to related_cands bitmaps
2572 record_important_candidates (struct ivopts_data
*data
)
2577 for (i
= 0; i
< n_iv_cands (data
); i
++)
2579 struct iv_cand
*cand
= iv_cand (data
, i
);
2581 if (cand
->important
)
2582 bitmap_set_bit (data
->important_candidates
, i
);
2585 data
->consider_all_candidates
= (n_iv_cands (data
)
2586 <= CONSIDER_ALL_CANDIDATES_BOUND
);
2588 if (data
->consider_all_candidates
)
2590 /* We will not need "related_cands" bitmaps in this case,
2591 so release them to decrease peak memory consumption. */
2592 for (i
= 0; i
< n_iv_uses (data
); i
++)
2594 use
= iv_use (data
, i
);
2595 BITMAP_FREE (use
->related_cands
);
2600 /* Add important candidates to the related_cands bitmaps. */
2601 for (i
= 0; i
< n_iv_uses (data
); i
++)
2602 bitmap_ior_into (iv_use (data
, i
)->related_cands
,
2603 data
->important_candidates
);
2607 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2608 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2609 we allocate a simple list to every use. */
2612 alloc_use_cost_map (struct ivopts_data
*data
)
2614 unsigned i
, size
, s
, j
;
2616 for (i
= 0; i
< n_iv_uses (data
); i
++)
2618 struct iv_use
*use
= iv_use (data
, i
);
2621 if (data
->consider_all_candidates
)
2622 size
= n_iv_cands (data
);
2626 EXECUTE_IF_SET_IN_BITMAP (use
->related_cands
, 0, j
, bi
)
2631 /* Round up to the power of two, so that moduling by it is fast. */
2632 for (size
= 1; size
< s
; size
<<= 1)
2636 use
->n_map_members
= size
;
2637 use
->cost_map
= XCNEWVEC (struct cost_pair
, size
);
2641 /* Returns description of computation cost of expression whose runtime
2642 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
2645 new_cost (unsigned runtime
, unsigned complexity
)
2649 cost
.cost
= runtime
;
2650 cost
.complexity
= complexity
;
2655 /* Adds costs COST1 and COST2. */
2658 add_costs (comp_cost cost1
, comp_cost cost2
)
2660 cost1
.cost
+= cost2
.cost
;
2661 cost1
.complexity
+= cost2
.complexity
;
2665 /* Subtracts costs COST1 and COST2. */
2668 sub_costs (comp_cost cost1
, comp_cost cost2
)
2670 cost1
.cost
-= cost2
.cost
;
2671 cost1
.complexity
-= cost2
.complexity
;
2676 /* Returns a negative number if COST1 < COST2, a positive number if
2677 COST1 > COST2, and 0 if COST1 = COST2. */
2680 compare_costs (comp_cost cost1
, comp_cost cost2
)
2682 if (cost1
.cost
== cost2
.cost
)
2683 return cost1
.complexity
- cost2
.complexity
;
2685 return cost1
.cost
- cost2
.cost
;
2688 /* Returns true if COST is infinite. */
2691 infinite_cost_p (comp_cost cost
)
2693 return cost
.cost
== INFTY
;
2696 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2697 on invariants DEPENDS_ON and that the value used in expressing it
2698 is VALUE, and in case of iv elimination the comparison operator is COMP. */
2701 set_use_iv_cost (struct ivopts_data
*data
,
2702 struct iv_use
*use
, struct iv_cand
*cand
,
2703 comp_cost cost
, bitmap depends_on
, tree value
,
2704 enum tree_code comp
, int inv_expr_id
)
2708 if (infinite_cost_p (cost
))
2710 BITMAP_FREE (depends_on
);
2714 if (data
->consider_all_candidates
)
2716 use
->cost_map
[cand
->id
].cand
= cand
;
2717 use
->cost_map
[cand
->id
].cost
= cost
;
2718 use
->cost_map
[cand
->id
].depends_on
= depends_on
;
2719 use
->cost_map
[cand
->id
].value
= value
;
2720 use
->cost_map
[cand
->id
].comp
= comp
;
2721 use
->cost_map
[cand
->id
].inv_expr_id
= inv_expr_id
;
2725 /* n_map_members is a power of two, so this computes modulo. */
2726 s
= cand
->id
& (use
->n_map_members
- 1);
2727 for (i
= s
; i
< use
->n_map_members
; i
++)
2728 if (!use
->cost_map
[i
].cand
)
2730 for (i
= 0; i
< s
; i
++)
2731 if (!use
->cost_map
[i
].cand
)
2737 use
->cost_map
[i
].cand
= cand
;
2738 use
->cost_map
[i
].cost
= cost
;
2739 use
->cost_map
[i
].depends_on
= depends_on
;
2740 use
->cost_map
[i
].value
= value
;
2741 use
->cost_map
[i
].comp
= comp
;
2742 use
->cost_map
[i
].inv_expr_id
= inv_expr_id
;
2745 /* Gets cost of (USE, CANDIDATE) pair. */
2747 static struct cost_pair
*
2748 get_use_iv_cost (struct ivopts_data
*data
, struct iv_use
*use
,
2749 struct iv_cand
*cand
)
2752 struct cost_pair
*ret
;
2757 if (data
->consider_all_candidates
)
2759 ret
= use
->cost_map
+ cand
->id
;
2766 /* n_map_members is a power of two, so this computes modulo. */
2767 s
= cand
->id
& (use
->n_map_members
- 1);
2768 for (i
= s
; i
< use
->n_map_members
; i
++)
2769 if (use
->cost_map
[i
].cand
== cand
)
2770 return use
->cost_map
+ i
;
2772 for (i
= 0; i
< s
; i
++)
2773 if (use
->cost_map
[i
].cand
== cand
)
2774 return use
->cost_map
+ i
;
2779 /* Returns estimate on cost of computing SEQ. */
2782 seq_cost (rtx seq
, bool speed
)
2787 for (; seq
; seq
= NEXT_INSN (seq
))
2789 set
= single_set (seq
);
2791 cost
+= set_src_cost (SET_SRC (set
), speed
);
2799 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2801 produce_memory_decl_rtl (tree obj
, int *regno
)
2803 addr_space_t as
= TYPE_ADDR_SPACE (TREE_TYPE (obj
));
2804 enum machine_mode address_mode
= targetm
.addr_space
.address_mode (as
);
2808 if (TREE_STATIC (obj
) || DECL_EXTERNAL (obj
))
2810 const char *name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj
));
2811 x
= gen_rtx_SYMBOL_REF (address_mode
, name
);
2812 SET_SYMBOL_REF_DECL (x
, obj
);
2813 x
= gen_rtx_MEM (DECL_MODE (obj
), x
);
2814 set_mem_addr_space (x
, as
);
2815 targetm
.encode_section_info (obj
, x
, true);
2819 x
= gen_raw_REG (address_mode
, (*regno
)++);
2820 x
= gen_rtx_MEM (DECL_MODE (obj
), x
);
2821 set_mem_addr_space (x
, as
);
2827 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2828 walk_tree. DATA contains the actual fake register number. */
2831 prepare_decl_rtl (tree
*expr_p
, int *ws
, void *data
)
2833 tree obj
= NULL_TREE
;
2835 int *regno
= (int *) data
;
2837 switch (TREE_CODE (*expr_p
))
2840 for (expr_p
= &TREE_OPERAND (*expr_p
, 0);
2841 handled_component_p (*expr_p
);
2842 expr_p
= &TREE_OPERAND (*expr_p
, 0))
2845 if (DECL_P (obj
) && !DECL_RTL_SET_P (obj
))
2846 x
= produce_memory_decl_rtl (obj
, regno
);
2851 obj
= SSA_NAME_VAR (*expr_p
);
2852 if (!DECL_RTL_SET_P (obj
))
2853 x
= gen_raw_REG (DECL_MODE (obj
), (*regno
)++);
2862 if (DECL_RTL_SET_P (obj
))
2865 if (DECL_MODE (obj
) == BLKmode
)
2866 x
= produce_memory_decl_rtl (obj
, regno
);
2868 x
= gen_raw_REG (DECL_MODE (obj
), (*regno
)++);
2878 VEC_safe_push (tree
, heap
, decl_rtl_to_reset
, obj
);
2879 SET_DECL_RTL (obj
, x
);
2885 /* Determines cost of the computation of EXPR. */
2888 computation_cost (tree expr
, bool speed
)
2891 tree type
= TREE_TYPE (expr
);
2893 /* Avoid using hard regs in ways which may be unsupported. */
2894 int regno
= LAST_VIRTUAL_REGISTER
+ 1;
2895 struct cgraph_node
*node
= cgraph_get_node (current_function_decl
);
2896 enum node_frequency real_frequency
= node
->frequency
;
2898 node
->frequency
= NODE_FREQUENCY_NORMAL
;
2899 crtl
->maybe_hot_insn_p
= speed
;
2900 walk_tree (&expr
, prepare_decl_rtl
, ®no
, NULL
);
2902 rslt
= expand_expr (expr
, NULL_RTX
, TYPE_MODE (type
), EXPAND_NORMAL
);
2905 default_rtl_profile ();
2906 node
->frequency
= real_frequency
;
2908 cost
= seq_cost (seq
, speed
);
2910 cost
+= address_cost (XEXP (rslt
, 0), TYPE_MODE (type
),
2911 TYPE_ADDR_SPACE (type
), speed
);
2912 else if (!REG_P (rslt
))
2913 cost
+= set_src_cost (rslt
, speed
);
2918 /* Returns variable containing the value of candidate CAND at statement AT. */
2921 var_at_stmt (struct loop
*loop
, struct iv_cand
*cand
, gimple stmt
)
2923 if (stmt_after_increment (loop
, cand
, stmt
))
2924 return cand
->var_after
;
2926 return cand
->var_before
;
2929 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2930 same precision that is at least as wide as the precision of TYPE, stores
2931 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
2935 determine_common_wider_type (tree
*a
, tree
*b
)
2937 tree wider_type
= NULL
;
2939 tree atype
= TREE_TYPE (*a
);
2941 if (CONVERT_EXPR_P (*a
))
2943 suba
= TREE_OPERAND (*a
, 0);
2944 wider_type
= TREE_TYPE (suba
);
2945 if (TYPE_PRECISION (wider_type
) < TYPE_PRECISION (atype
))
2951 if (CONVERT_EXPR_P (*b
))
2953 subb
= TREE_OPERAND (*b
, 0);
2954 if (TYPE_PRECISION (wider_type
) != TYPE_PRECISION (TREE_TYPE (subb
)))
2965 /* Determines the expression by that USE is expressed from induction variable
2966 CAND at statement AT in LOOP. The expression is stored in a decomposed
2967 form into AFF. Returns false if USE cannot be expressed using CAND. */
2970 get_computation_aff (struct loop
*loop
,
2971 struct iv_use
*use
, struct iv_cand
*cand
, gimple at
,
2972 struct affine_tree_combination
*aff
)
2974 tree ubase
= use
->iv
->base
;
2975 tree ustep
= use
->iv
->step
;
2976 tree cbase
= cand
->iv
->base
;
2977 tree cstep
= cand
->iv
->step
, cstep_common
;
2978 tree utype
= TREE_TYPE (ubase
), ctype
= TREE_TYPE (cbase
);
2979 tree common_type
, var
;
2981 aff_tree cbase_aff
, var_aff
;
2984 if (TYPE_PRECISION (utype
) > TYPE_PRECISION (ctype
))
2986 /* We do not have a precision to express the values of use. */
2990 var
= var_at_stmt (loop
, cand
, at
);
2991 uutype
= unsigned_type_for (utype
);
2993 /* If the conversion is not noop, perform it. */
2994 if (TYPE_PRECISION (utype
) < TYPE_PRECISION (ctype
))
2996 cstep
= fold_convert (uutype
, cstep
);
2997 cbase
= fold_convert (uutype
, cbase
);
2998 var
= fold_convert (uutype
, var
);
3001 if (!constant_multiple_of (ustep
, cstep
, &rat
))
3004 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3005 type, we achieve better folding by computing their difference in this
3006 wider type, and cast the result to UUTYPE. We do not need to worry about
3007 overflows, as all the arithmetics will in the end be performed in UUTYPE
3009 common_type
= determine_common_wider_type (&ubase
, &cbase
);
3011 /* use = ubase - ratio * cbase + ratio * var. */
3012 tree_to_aff_combination (ubase
, common_type
, aff
);
3013 tree_to_aff_combination (cbase
, common_type
, &cbase_aff
);
3014 tree_to_aff_combination (var
, uutype
, &var_aff
);
3016 /* We need to shift the value if we are after the increment. */
3017 if (stmt_after_increment (loop
, cand
, at
))
3021 if (common_type
!= uutype
)
3022 cstep_common
= fold_convert (common_type
, cstep
);
3024 cstep_common
= cstep
;
3026 tree_to_aff_combination (cstep_common
, common_type
, &cstep_aff
);
3027 aff_combination_add (&cbase_aff
, &cstep_aff
);
3030 aff_combination_scale (&cbase_aff
, double_int_neg (rat
));
3031 aff_combination_add (aff
, &cbase_aff
);
3032 if (common_type
!= uutype
)
3033 aff_combination_convert (aff
, uutype
);
3035 aff_combination_scale (&var_aff
, rat
);
3036 aff_combination_add (aff
, &var_aff
);
3041 /* Determines the expression by that USE is expressed from induction variable
3042 CAND at statement AT in LOOP. The computation is unshared. */
3045 get_computation_at (struct loop
*loop
,
3046 struct iv_use
*use
, struct iv_cand
*cand
, gimple at
)
3049 tree type
= TREE_TYPE (use
->iv
->base
);
3051 if (!get_computation_aff (loop
, use
, cand
, at
, &aff
))
3053 unshare_aff_combination (&aff
);
3054 return fold_convert (type
, aff_combination_to_tree (&aff
));
3057 /* Determines the expression by that USE is expressed from induction variable
3058 CAND in LOOP. The computation is unshared. */
3061 get_computation (struct loop
*loop
, struct iv_use
*use
, struct iv_cand
*cand
)
3063 return get_computation_at (loop
, use
, cand
, use
->stmt
);
3066 /* Adjust the cost COST for being in loop setup rather than loop body.
3067 If we're optimizing for space, the loop setup overhead is constant;
3068 if we're optimizing for speed, amortize it over the per-iteration cost. */
3070 adjust_setup_cost (struct ivopts_data
*data
, unsigned cost
)
3074 else if (optimize_loop_for_speed_p (data
->current_loop
))
3075 return cost
/ avg_loop_niter (data
->current_loop
);
3080 /* Returns cost of addition in MODE. */
3083 add_regs_cost (enum machine_mode mode
, bool speed
)
3085 static unsigned costs
[NUM_MACHINE_MODES
][2];
3089 if (costs
[mode
][speed
])
3090 return costs
[mode
][speed
];
3093 force_operand (gen_rtx_fmt_ee (PLUS
, mode
,
3094 gen_raw_REG (mode
, LAST_VIRTUAL_REGISTER
+ 1),
3095 gen_raw_REG (mode
, LAST_VIRTUAL_REGISTER
+ 2)),
3100 cost
= seq_cost (seq
, speed
);
3104 costs
[mode
][speed
] = cost
;
3106 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3107 fprintf (dump_file
, "Addition in %s costs %d\n",
3108 GET_MODE_NAME (mode
), cost
);
3112 /* Returns cost of multiplication in MODE. */
3115 multiply_regs_cost (enum machine_mode mode
, bool speed
)
3117 static unsigned costs
[NUM_MACHINE_MODES
][2];
3121 if (costs
[mode
][speed
])
3122 return costs
[mode
][speed
];
3125 force_operand (gen_rtx_fmt_ee (MULT
, mode
,
3126 gen_raw_REG (mode
, LAST_VIRTUAL_REGISTER
+ 1),
3127 gen_raw_REG (mode
, LAST_VIRTUAL_REGISTER
+ 2)),
3132 cost
= seq_cost (seq
, speed
);
3136 costs
[mode
][speed
] = cost
;
3138 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3139 fprintf (dump_file
, "Multiplication in %s costs %d\n",
3140 GET_MODE_NAME (mode
), cost
);
3144 /* Returns cost of addition with a constant in MODE. */
3147 add_const_cost (enum machine_mode mode
, bool speed
)
3149 static unsigned costs
[NUM_MACHINE_MODES
][2];
3153 if (costs
[mode
][speed
])
3154 return costs
[mode
][speed
];
3156 /* Arbitrarily generate insns for x + 2, as the exact constant
3157 shouldn't matter. */
3159 force_operand (gen_rtx_fmt_ee (PLUS
, mode
,
3160 gen_raw_REG (mode
, LAST_VIRTUAL_REGISTER
+ 1),
3161 gen_int_mode (2, mode
)),
3166 cost
= seq_cost (seq
, speed
);
3170 costs
[mode
][speed
] = cost
;
3172 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3173 fprintf (dump_file
, "Addition to constant in %s costs %d\n",
3174 GET_MODE_NAME (mode
), cost
);
3178 /* Returns cost of extend or truncate in MODE. */
3181 extend_or_trunc_reg_cost (tree type_to
, tree type_from
, bool speed
)
3183 static unsigned costs
[NUM_MACHINE_MODES
][NUM_MACHINE_MODES
][2];
3186 enum machine_mode mode_to
= TYPE_MODE (type_to
);
3187 enum machine_mode mode_from
= TYPE_MODE (type_from
);
3188 tree size_to
= TYPE_SIZE (type_to
);
3189 tree size_from
= TYPE_SIZE (type_from
);
3192 gcc_assert (TREE_CODE (size_to
) == INTEGER_CST
3193 && TREE_CODE (size_from
) == INTEGER_CST
);
3195 if (costs
[mode_to
][mode_from
][speed
])
3196 return costs
[mode_to
][mode_from
][speed
];
3198 if (tree_int_cst_lt (size_to
, size_from
))
3200 else if (TYPE_UNSIGNED (type_to
))
3206 gen_rtx_fmt_e (code
, mode_to
,
3207 gen_raw_REG (mode_from
, LAST_VIRTUAL_REGISTER
+ 1));
3211 cost
= seq_cost (seq
, speed
);
3215 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3216 fprintf (dump_file
, "Conversion from %s to %s costs %d\n",
3217 GET_MODE_NAME (mode_to
), GET_MODE_NAME (mode_from
), cost
);
3219 costs
[mode_to
][mode_from
][speed
] = cost
;
3223 /* Returns cost of negation in MODE. */
3226 negate_reg_cost (enum machine_mode mode
, bool speed
)
3228 static unsigned costs
[NUM_MACHINE_MODES
][2];
3232 if (costs
[mode
][speed
])
3233 return costs
[mode
][speed
];
3236 force_operand (gen_rtx_fmt_e (NEG
, mode
,
3237 gen_raw_REG (mode
, LAST_VIRTUAL_REGISTER
+ 1)),
3242 cost
= seq_cost (seq
, speed
);
3246 costs
[mode
][speed
] = cost
;
3248 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3249 fprintf (dump_file
, "Negation in %s costs %d\n",
3250 GET_MODE_NAME (mode
), cost
);
3254 /* Entry in a hashtable of already known costs for multiplication. */
3257 HOST_WIDE_INT cst
; /* The constant to multiply by. */
3258 enum machine_mode mode
; /* In mode. */
3259 unsigned cost
; /* The cost. */
3262 /* Counts hash value for the ENTRY. */
3265 mbc_entry_hash (const void *entry
)
3267 const struct mbc_entry
*e
= (const struct mbc_entry
*) entry
;
3269 return 57 * (hashval_t
) e
->mode
+ (hashval_t
) (e
->cst
% 877);
3272 /* Compares the hash table entries ENTRY1 and ENTRY2. */
3275 mbc_entry_eq (const void *entry1
, const void *entry2
)
3277 const struct mbc_entry
*e1
= (const struct mbc_entry
*) entry1
;
3278 const struct mbc_entry
*e2
= (const struct mbc_entry
*) entry2
;
3280 return (e1
->mode
== e2
->mode
3281 && e1
->cst
== e2
->cst
);
3284 /* Returns cost of multiplication by constant CST in MODE. */
3287 multiply_by_const_cost (HOST_WIDE_INT cst
, enum machine_mode mode
, bool speed
)
3289 struct mbc_entry
**cached
, act
;
3293 gcc_assert (cost_tables_exist
);
3297 cached
= (struct mbc_entry
**)
3298 htab_find_slot (mult_costs
[speed
], &act
, INSERT
);
3301 return (*cached
)->cost
;
3303 *cached
= XNEW (struct mbc_entry
);
3304 (*cached
)->mode
= mode
;
3305 (*cached
)->cst
= cst
;
3308 expand_mult (mode
, gen_raw_REG (mode
, LAST_VIRTUAL_REGISTER
+ 1),
3309 gen_int_mode (cst
, mode
), NULL_RTX
, 0);
3313 cost
= seq_cost (seq
, speed
);
3315 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3316 fprintf (dump_file
, "Multiplication by %d in %s costs %d\n",
3317 (int) cst
, GET_MODE_NAME (mode
), cost
);
3319 (*cached
)->cost
= cost
;
3324 /* Returns true if multiplying by RATIO is allowed in an address. Test the
3325 validity for a memory reference accessing memory of mode MODE in
3326 address space AS. */
3328 DEF_VEC_P (sbitmap
);
3329 DEF_VEC_ALLOC_P (sbitmap
, heap
);
3332 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio
, enum machine_mode mode
,
3335 #define MAX_RATIO 128
3336 unsigned int data_index
= (int) as
* MAX_MACHINE_MODE
+ (int) mode
;
3337 static VEC (sbitmap
, heap
) *valid_mult_list
;
3340 if (data_index
>= VEC_length (sbitmap
, valid_mult_list
))
3341 VEC_safe_grow_cleared (sbitmap
, heap
, valid_mult_list
, data_index
+ 1);
3343 valid_mult
= VEC_index (sbitmap
, valid_mult_list
, data_index
);
3346 enum machine_mode address_mode
= targetm
.addr_space
.address_mode (as
);
3347 rtx reg1
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 1);
3351 valid_mult
= sbitmap_alloc (2 * MAX_RATIO
+ 1);
3352 sbitmap_zero (valid_mult
);
3353 addr
= gen_rtx_fmt_ee (MULT
, address_mode
, reg1
, NULL_RTX
);
3354 for (i
= -MAX_RATIO
; i
<= MAX_RATIO
; i
++)
3356 XEXP (addr
, 1) = gen_int_mode (i
, address_mode
);
3357 if (memory_address_addr_space_p (mode
, addr
, as
))
3358 SET_BIT (valid_mult
, i
+ MAX_RATIO
);
3361 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3363 fprintf (dump_file
, " allowed multipliers:");
3364 for (i
= -MAX_RATIO
; i
<= MAX_RATIO
; i
++)
3365 if (TEST_BIT (valid_mult
, i
+ MAX_RATIO
))
3366 fprintf (dump_file
, " %d", (int) i
);
3367 fprintf (dump_file
, "\n");
3368 fprintf (dump_file
, "\n");
3371 VEC_replace (sbitmap
, valid_mult_list
, data_index
, valid_mult
);
3374 if (ratio
> MAX_RATIO
|| ratio
< -MAX_RATIO
)
3377 return TEST_BIT (valid_mult
, ratio
+ MAX_RATIO
);
3380 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3381 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3382 variable is omitted. Compute the cost for a memory reference that accesses
3383 a memory location of mode MEM_MODE in address space AS.
3385 MAY_AUTOINC is set to true if the autoincrement (increasing index by
3386 size of MEM_MODE / RATIO) is available. To make this determination, we
3387 look at the size of the increment to be made, which is given in CSTEP.
3388 CSTEP may be zero if the step is unknown.
3389 STMT_AFTER_INC is true iff the statement we're looking at is after the
3390 increment of the original biv.
3392 TODO -- there must be some better way. This all is quite crude. */
3396 HOST_WIDE_INT min_offset
, max_offset
;
3397 unsigned costs
[2][2][2][2];
3398 } *address_cost_data
;
3400 DEF_VEC_P (address_cost_data
);
3401 DEF_VEC_ALLOC_P (address_cost_data
, heap
);
3404 get_address_cost (bool symbol_present
, bool var_present
,
3405 unsigned HOST_WIDE_INT offset
, HOST_WIDE_INT ratio
,
3406 HOST_WIDE_INT cstep
, enum machine_mode mem_mode
,
3407 addr_space_t as
, bool speed
,
3408 bool stmt_after_inc
, bool *may_autoinc
)
3410 enum machine_mode address_mode
= targetm
.addr_space
.address_mode (as
);
3411 static VEC(address_cost_data
, heap
) *address_cost_data_list
;
3412 unsigned int data_index
= (int) as
* MAX_MACHINE_MODE
+ (int) mem_mode
;
3413 address_cost_data data
;
3414 static bool has_preinc
[MAX_MACHINE_MODE
], has_postinc
[MAX_MACHINE_MODE
];
3415 static bool has_predec
[MAX_MACHINE_MODE
], has_postdec
[MAX_MACHINE_MODE
];
3416 unsigned cost
, acost
, complexity
;
3417 bool offset_p
, ratio_p
, autoinc
;
3418 HOST_WIDE_INT s_offset
, autoinc_offset
, msize
;
3419 unsigned HOST_WIDE_INT mask
;
3422 if (data_index
>= VEC_length (address_cost_data
, address_cost_data_list
))
3423 VEC_safe_grow_cleared (address_cost_data
, heap
, address_cost_data_list
,
3426 data
= VEC_index (address_cost_data
, address_cost_data_list
, data_index
);
3430 HOST_WIDE_INT rat
, off
= 0;
3431 int old_cse_not_expected
, width
;
3432 unsigned sym_p
, var_p
, off_p
, rat_p
, add_c
;
3433 rtx seq
, addr
, base
;
3436 data
= (address_cost_data
) xcalloc (1, sizeof (*data
));
3438 reg1
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 1);
3440 width
= GET_MODE_BITSIZE (address_mode
) - 1;
3441 if (width
> (HOST_BITS_PER_WIDE_INT
- 1))
3442 width
= HOST_BITS_PER_WIDE_INT
- 1;
3443 addr
= gen_rtx_fmt_ee (PLUS
, address_mode
, reg1
, NULL_RTX
);
3445 for (i
= width
; i
>= 0; i
--)
3447 off
= -((HOST_WIDE_INT
) 1 << i
);
3448 XEXP (addr
, 1) = gen_int_mode (off
, address_mode
);
3449 if (memory_address_addr_space_p (mem_mode
, addr
, as
))
3452 data
->min_offset
= (i
== -1? 0 : off
);
3454 for (i
= width
; i
>= 0; i
--)
3456 off
= ((HOST_WIDE_INT
) 1 << i
) - 1;
3457 XEXP (addr
, 1) = gen_int_mode (off
, address_mode
);
3458 if (memory_address_addr_space_p (mem_mode
, addr
, as
))
3463 data
->max_offset
= off
;
3465 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3467 fprintf (dump_file
, "get_address_cost:\n");
3468 fprintf (dump_file
, " min offset %s " HOST_WIDE_INT_PRINT_DEC
"\n",
3469 GET_MODE_NAME (mem_mode
),
3471 fprintf (dump_file
, " max offset %s " HOST_WIDE_INT_PRINT_DEC
"\n",
3472 GET_MODE_NAME (mem_mode
),
3477 for (i
= 2; i
<= MAX_RATIO
; i
++)
3478 if (multiplier_allowed_in_address_p (i
, mem_mode
, as
))
3484 /* Compute the cost of various addressing modes. */
3486 reg0
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 1);
3487 reg1
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 2);
3489 if (USE_LOAD_PRE_DECREMENT (mem_mode
)
3490 || USE_STORE_PRE_DECREMENT (mem_mode
))
3492 addr
= gen_rtx_PRE_DEC (address_mode
, reg0
);
3493 has_predec
[mem_mode
]
3494 = memory_address_addr_space_p (mem_mode
, addr
, as
);
3496 if (USE_LOAD_POST_DECREMENT (mem_mode
)
3497 || USE_STORE_POST_DECREMENT (mem_mode
))
3499 addr
= gen_rtx_POST_DEC (address_mode
, reg0
);
3500 has_postdec
[mem_mode
]
3501 = memory_address_addr_space_p (mem_mode
, addr
, as
);
3503 if (USE_LOAD_PRE_INCREMENT (mem_mode
)
3504 || USE_STORE_PRE_DECREMENT (mem_mode
))
3506 addr
= gen_rtx_PRE_INC (address_mode
, reg0
);
3507 has_preinc
[mem_mode
]
3508 = memory_address_addr_space_p (mem_mode
, addr
, as
);
3510 if (USE_LOAD_POST_INCREMENT (mem_mode
)
3511 || USE_STORE_POST_INCREMENT (mem_mode
))
3513 addr
= gen_rtx_POST_INC (address_mode
, reg0
);
3514 has_postinc
[mem_mode
]
3515 = memory_address_addr_space_p (mem_mode
, addr
, as
);
3517 for (i
= 0; i
< 16; i
++)
3520 var_p
= (i
>> 1) & 1;
3521 off_p
= (i
>> 2) & 1;
3522 rat_p
= (i
>> 3) & 1;
3526 addr
= gen_rtx_fmt_ee (MULT
, address_mode
, addr
,
3527 gen_int_mode (rat
, address_mode
));
3530 addr
= gen_rtx_fmt_ee (PLUS
, address_mode
, addr
, reg1
);
3534 base
= gen_rtx_SYMBOL_REF (address_mode
, ggc_strdup (""));
3535 /* ??? We can run into trouble with some backends by presenting
3536 it with symbols which haven't been properly passed through
3537 targetm.encode_section_info. By setting the local bit, we
3538 enhance the probability of things working. */
3539 SYMBOL_REF_FLAGS (base
) = SYMBOL_FLAG_LOCAL
;
3542 base
= gen_rtx_fmt_e (CONST
, address_mode
,
3544 (PLUS
, address_mode
, base
,
3545 gen_int_mode (off
, address_mode
)));
3548 base
= gen_int_mode (off
, address_mode
);
3553 addr
= gen_rtx_fmt_ee (PLUS
, address_mode
, addr
, base
);
3556 /* To avoid splitting addressing modes, pretend that no cse will
3558 old_cse_not_expected
= cse_not_expected
;
3559 cse_not_expected
= true;
3560 addr
= memory_address_addr_space (mem_mode
, addr
, as
);
3561 cse_not_expected
= old_cse_not_expected
;
3565 acost
= seq_cost (seq
, speed
);
3566 acost
+= address_cost (addr
, mem_mode
, as
, speed
);
3570 data
->costs
[sym_p
][var_p
][off_p
][rat_p
] = acost
;
3573 /* On some targets, it is quite expensive to load symbol to a register,
3574 which makes addresses that contain symbols look much more expensive.
3575 However, the symbol will have to be loaded in any case before the
3576 loop (and quite likely we have it in register already), so it does not
3577 make much sense to penalize them too heavily. So make some final
3578 tweaks for the SYMBOL_PRESENT modes:
3580 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3581 var is cheaper, use this mode with small penalty.
3582 If VAR_PRESENT is true, try whether the mode with
3583 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3584 if this is the case, use it. */
3585 add_c
= add_regs_cost (address_mode
, speed
);
3586 for (i
= 0; i
< 8; i
++)
3589 off_p
= (i
>> 1) & 1;
3590 rat_p
= (i
>> 2) & 1;
3592 acost
= data
->costs
[0][1][off_p
][rat_p
] + 1;
3596 if (acost
< data
->costs
[1][var_p
][off_p
][rat_p
])
3597 data
->costs
[1][var_p
][off_p
][rat_p
] = acost
;
3600 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3602 fprintf (dump_file
, "Address costs:\n");
3604 for (i
= 0; i
< 16; i
++)
3607 var_p
= (i
>> 1) & 1;
3608 off_p
= (i
>> 2) & 1;
3609 rat_p
= (i
>> 3) & 1;
3611 fprintf (dump_file
, " ");
3613 fprintf (dump_file
, "sym + ");
3615 fprintf (dump_file
, "var + ");
3617 fprintf (dump_file
, "cst + ");
3619 fprintf (dump_file
, "rat * ");
3621 acost
= data
->costs
[sym_p
][var_p
][off_p
][rat_p
];
3622 fprintf (dump_file
, "index costs %d\n", acost
);
3624 if (has_predec
[mem_mode
] || has_postdec
[mem_mode
]
3625 || has_preinc
[mem_mode
] || has_postinc
[mem_mode
])
3626 fprintf (dump_file
, " May include autoinc/dec\n");
3627 fprintf (dump_file
, "\n");
3630 VEC_replace (address_cost_data
, address_cost_data_list
,
3634 bits
= GET_MODE_BITSIZE (address_mode
);
3635 mask
= ~(~(unsigned HOST_WIDE_INT
) 0 << (bits
- 1) << 1);
3637 if ((offset
>> (bits
- 1) & 1))
3642 msize
= GET_MODE_SIZE (mem_mode
);
3643 autoinc_offset
= offset
;
3645 autoinc_offset
+= ratio
* cstep
;
3646 if (symbol_present
|| var_present
|| ratio
!= 1)
3648 else if ((has_postinc
[mem_mode
] && autoinc_offset
== 0
3650 || (has_postdec
[mem_mode
] && autoinc_offset
== 0
3652 || (has_preinc
[mem_mode
] && autoinc_offset
== msize
3654 || (has_predec
[mem_mode
] && autoinc_offset
== -msize
3655 && msize
== -cstep
))
3659 offset_p
= (s_offset
!= 0
3660 && data
->min_offset
<= s_offset
3661 && s_offset
<= data
->max_offset
);
3662 ratio_p
= (ratio
!= 1
3663 && multiplier_allowed_in_address_p (ratio
, mem_mode
, as
));
3665 if (ratio
!= 1 && !ratio_p
)
3666 cost
+= multiply_by_const_cost (ratio
, address_mode
, speed
);
3668 if (s_offset
&& !offset_p
&& !symbol_present
)
3669 cost
+= add_regs_cost (address_mode
, speed
);
3672 *may_autoinc
= autoinc
;
3673 acost
= data
->costs
[symbol_present
][var_present
][offset_p
][ratio_p
];
3674 complexity
= (symbol_present
!= 0) + (var_present
!= 0) + offset_p
+ ratio_p
;
3675 return new_cost (cost
+ acost
, complexity
);
3678 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE. MULT is the
3679 the EXPR operand holding the shift. COST0 and COST1 are the costs for
3680 calculating the operands of EXPR. Returns true if successful, and returns
3681 the cost in COST. */
3684 get_shiftadd_cost (tree expr
, enum machine_mode mode
, comp_cost cost0
,
3685 comp_cost cost1
, tree mult
, bool speed
, comp_cost
*cost
)
3688 tree op1
= TREE_OPERAND (expr
, 1);
3689 tree cst
= TREE_OPERAND (mult
, 1);
3690 tree multop
= TREE_OPERAND (mult
, 0);
3691 int m
= exact_log2 (int_cst_value (cst
));
3692 int maxm
= MIN (BITS_PER_WORD
, GET_MODE_BITSIZE (mode
));
3695 if (!(m
>= 0 && m
< maxm
))
3698 sa_cost
= (TREE_CODE (expr
) != MINUS_EXPR
3699 ? shiftadd_cost
[speed
][mode
][m
]
3701 ? shiftsub1_cost
[speed
][mode
][m
]
3702 : shiftsub0_cost
[speed
][mode
][m
]));
3703 res
= new_cost (sa_cost
, 0);
3704 res
= add_costs (res
, mult
== op1
? cost0
: cost1
);
3706 STRIP_NOPS (multop
);
3707 if (!is_gimple_val (multop
))
3708 res
= add_costs (res
, force_expr_to_var_cost (multop
, speed
));
3714 /* Estimates cost of forcing expression EXPR into a variable. */
3717 force_expr_to_var_cost (tree expr
, bool speed
)
3719 static bool costs_initialized
= false;
3720 static unsigned integer_cost
[2];
3721 static unsigned symbol_cost
[2];
3722 static unsigned address_cost
[2];
3724 comp_cost cost0
, cost1
, cost
;
3725 enum machine_mode mode
;
3727 if (!costs_initialized
)
3729 tree type
= build_pointer_type (integer_type_node
);
3734 var
= create_tmp_var_raw (integer_type_node
, "test_var");
3735 TREE_STATIC (var
) = 1;
3736 x
= produce_memory_decl_rtl (var
, NULL
);
3737 SET_DECL_RTL (var
, x
);
3739 addr
= build1 (ADDR_EXPR
, type
, var
);
3742 for (i
= 0; i
< 2; i
++)
3744 integer_cost
[i
] = computation_cost (build_int_cst (integer_type_node
,
3747 symbol_cost
[i
] = computation_cost (addr
, i
) + 1;
3750 = computation_cost (fold_build_pointer_plus_hwi (addr
, 2000), i
) + 1;
3751 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3753 fprintf (dump_file
, "force_expr_to_var_cost %s costs:\n", i
? "speed" : "size");
3754 fprintf (dump_file
, " integer %d\n", (int) integer_cost
[i
]);
3755 fprintf (dump_file
, " symbol %d\n", (int) symbol_cost
[i
]);
3756 fprintf (dump_file
, " address %d\n", (int) address_cost
[i
]);
3757 fprintf (dump_file
, " other %d\n", (int) target_spill_cost
[i
]);
3758 fprintf (dump_file
, "\n");
3762 costs_initialized
= true;
3767 if (SSA_VAR_P (expr
))
3770 if (is_gimple_min_invariant (expr
))
3772 if (TREE_CODE (expr
) == INTEGER_CST
)
3773 return new_cost (integer_cost
[speed
], 0);
3775 if (TREE_CODE (expr
) == ADDR_EXPR
)
3777 tree obj
= TREE_OPERAND (expr
, 0);
3779 if (TREE_CODE (obj
) == VAR_DECL
3780 || TREE_CODE (obj
) == PARM_DECL
3781 || TREE_CODE (obj
) == RESULT_DECL
)
3782 return new_cost (symbol_cost
[speed
], 0);
3785 return new_cost (address_cost
[speed
], 0);
3788 switch (TREE_CODE (expr
))
3790 case POINTER_PLUS_EXPR
:
3794 op0
= TREE_OPERAND (expr
, 0);
3795 op1
= TREE_OPERAND (expr
, 1);
3799 if (is_gimple_val (op0
))
3802 cost0
= force_expr_to_var_cost (op0
, speed
);
3804 if (is_gimple_val (op1
))
3807 cost1
= force_expr_to_var_cost (op1
, speed
);
3812 op0
= TREE_OPERAND (expr
, 0);
3816 if (is_gimple_val (op0
))
3819 cost0
= force_expr_to_var_cost (op0
, speed
);
3825 /* Just an arbitrary value, FIXME. */
3826 return new_cost (target_spill_cost
[speed
], 0);
3829 mode
= TYPE_MODE (TREE_TYPE (expr
));
3830 switch (TREE_CODE (expr
))
3832 case POINTER_PLUS_EXPR
:
3836 cost
= new_cost (add_regs_cost (mode
, speed
), 0);
3837 if (TREE_CODE (expr
) != NEGATE_EXPR
)
3839 tree mult
= NULL_TREE
;
3841 if (TREE_CODE (op1
) == MULT_EXPR
)
3843 else if (TREE_CODE (op0
) == MULT_EXPR
)
3846 if (mult
!= NULL_TREE
3847 && cst_and_fits_in_hwi (TREE_OPERAND (mult
, 1))
3848 && get_shiftadd_cost (expr
, mode
, cost0
, cost1
, mult
, speed
,
3855 if (cst_and_fits_in_hwi (op0
))
3856 cost
= new_cost (multiply_by_const_cost (int_cst_value (op0
),
3858 else if (cst_and_fits_in_hwi (op1
))
3859 cost
= new_cost (multiply_by_const_cost (int_cst_value (op1
),
3862 return new_cost (target_spill_cost
[speed
], 0);
3869 cost
= add_costs (cost
, cost0
);
3870 cost
= add_costs (cost
, cost1
);
3872 /* Bound the cost by target_spill_cost. The parts of complicated
3873 computations often are either loop invariant or at least can
3874 be shared between several iv uses, so letting this grow without
3875 limits would not give reasonable results. */
3876 if (cost
.cost
> (int) target_spill_cost
[speed
])
3877 cost
.cost
= target_spill_cost
[speed
];
3882 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3883 invariants the computation depends on. */
3886 force_var_cost (struct ivopts_data
*data
,
3887 tree expr
, bitmap
*depends_on
)
3891 fd_ivopts_data
= data
;
3892 walk_tree (&expr
, find_depends
, depends_on
, NULL
);
3895 return force_expr_to_var_cost (expr
, data
->speed
);
3898 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3899 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3900 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3901 invariants the computation depends on. */
3904 split_address_cost (struct ivopts_data
*data
,
3905 tree addr
, bool *symbol_present
, bool *var_present
,
3906 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
3909 HOST_WIDE_INT bitsize
;
3910 HOST_WIDE_INT bitpos
;
3912 enum machine_mode mode
;
3913 int unsignedp
, volatilep
;
3915 core
= get_inner_reference (addr
, &bitsize
, &bitpos
, &toffset
, &mode
,
3916 &unsignedp
, &volatilep
, false);
3919 || bitpos
% BITS_PER_UNIT
!= 0
3920 || TREE_CODE (core
) != VAR_DECL
)
3922 *symbol_present
= false;
3923 *var_present
= true;
3924 fd_ivopts_data
= data
;
3925 walk_tree (&addr
, find_depends
, depends_on
, NULL
);
3926 return new_cost (target_spill_cost
[data
->speed
], 0);
3929 *offset
+= bitpos
/ BITS_PER_UNIT
;
3930 if (TREE_STATIC (core
)
3931 || DECL_EXTERNAL (core
))
3933 *symbol_present
= true;
3934 *var_present
= false;
3938 *symbol_present
= false;
3939 *var_present
= true;
3943 /* Estimates cost of expressing difference of addresses E1 - E2 as
3944 var + symbol + offset. The value of offset is added to OFFSET,
3945 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3946 part is missing. DEPENDS_ON is a set of the invariants the computation
3950 ptr_difference_cost (struct ivopts_data
*data
,
3951 tree e1
, tree e2
, bool *symbol_present
, bool *var_present
,
3952 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
3954 HOST_WIDE_INT diff
= 0;
3955 aff_tree aff_e1
, aff_e2
;
3958 gcc_assert (TREE_CODE (e1
) == ADDR_EXPR
);
3960 if (ptr_difference_const (e1
, e2
, &diff
))
3963 *symbol_present
= false;
3964 *var_present
= false;
3968 if (integer_zerop (e2
))
3969 return split_address_cost (data
, TREE_OPERAND (e1
, 0),
3970 symbol_present
, var_present
, offset
, depends_on
);
3972 *symbol_present
= false;
3973 *var_present
= true;
3975 type
= signed_type_for (TREE_TYPE (e1
));
3976 tree_to_aff_combination (e1
, type
, &aff_e1
);
3977 tree_to_aff_combination (e2
, type
, &aff_e2
);
3978 aff_combination_scale (&aff_e2
, double_int_minus_one
);
3979 aff_combination_add (&aff_e1
, &aff_e2
);
3981 return force_var_cost (data
, aff_combination_to_tree (&aff_e1
), depends_on
);
3984 /* Estimates cost of expressing difference E1 - E2 as
3985 var + symbol + offset. The value of offset is added to OFFSET,
3986 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3987 part is missing. DEPENDS_ON is a set of the invariants the computation
3991 difference_cost (struct ivopts_data
*data
,
3992 tree e1
, tree e2
, bool *symbol_present
, bool *var_present
,
3993 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
3995 enum machine_mode mode
= TYPE_MODE (TREE_TYPE (e1
));
3996 unsigned HOST_WIDE_INT off1
, off2
;
3997 aff_tree aff_e1
, aff_e2
;
4000 e1
= strip_offset (e1
, &off1
);
4001 e2
= strip_offset (e2
, &off2
);
4002 *offset
+= off1
- off2
;
4007 if (TREE_CODE (e1
) == ADDR_EXPR
)
4008 return ptr_difference_cost (data
, e1
, e2
, symbol_present
, var_present
,
4009 offset
, depends_on
);
4010 *symbol_present
= false;
4012 if (operand_equal_p (e1
, e2
, 0))
4014 *var_present
= false;
4018 *var_present
= true;
4020 if (integer_zerop (e2
))
4021 return force_var_cost (data
, e1
, depends_on
);
4023 if (integer_zerop (e1
))
4025 comp_cost cost
= force_var_cost (data
, e2
, depends_on
);
4026 cost
.cost
+= multiply_by_const_cost (-1, mode
, data
->speed
);
4030 type
= signed_type_for (TREE_TYPE (e1
));
4031 tree_to_aff_combination (e1
, type
, &aff_e1
);
4032 tree_to_aff_combination (e2
, type
, &aff_e2
);
4033 aff_combination_scale (&aff_e2
, double_int_minus_one
);
4034 aff_combination_add (&aff_e1
, &aff_e2
);
4036 return force_var_cost (data
, aff_combination_to_tree (&aff_e1
), depends_on
);
4039 /* Returns true if AFF1 and AFF2 are identical. */
4042 compare_aff_trees (aff_tree
*aff1
, aff_tree
*aff2
)
4046 if (aff1
->n
!= aff2
->n
)
4049 for (i
= 0; i
< aff1
->n
; i
++)
4051 if (double_int_cmp (aff1
->elts
[i
].coef
, aff2
->elts
[i
].coef
, 0) != 0)
4054 if (!operand_equal_p (aff1
->elts
[i
].val
, aff2
->elts
[i
].val
, 0))
4060 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id. */
4063 get_expr_id (struct ivopts_data
*data
, tree expr
)
4065 struct iv_inv_expr_ent ent
;
4066 struct iv_inv_expr_ent
**slot
;
4069 ent
.hash
= iterative_hash_expr (expr
, 0);
4070 slot
= (struct iv_inv_expr_ent
**) htab_find_slot (data
->inv_expr_tab
,
4075 *slot
= XNEW (struct iv_inv_expr_ent
);
4076 (*slot
)->expr
= expr
;
4077 (*slot
)->hash
= ent
.hash
;
4078 (*slot
)->id
= data
->inv_expr_id
++;
4082 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
4083 requires a new compiler generated temporary. Returns -1 otherwise.
4084 ADDRESS_P is a flag indicating if the expression is for address
4088 get_loop_invariant_expr_id (struct ivopts_data
*data
, tree ubase
,
4089 tree cbase
, HOST_WIDE_INT ratio
,
4092 aff_tree ubase_aff
, cbase_aff
;
4100 if ((TREE_CODE (ubase
) == INTEGER_CST
)
4101 && (TREE_CODE (cbase
) == INTEGER_CST
))
4104 /* Strips the constant part. */
4105 if (TREE_CODE (ubase
) == PLUS_EXPR
4106 || TREE_CODE (ubase
) == MINUS_EXPR
4107 || TREE_CODE (ubase
) == POINTER_PLUS_EXPR
)
4109 if (TREE_CODE (TREE_OPERAND (ubase
, 1)) == INTEGER_CST
)
4110 ubase
= TREE_OPERAND (ubase
, 0);
4113 /* Strips the constant part. */
4114 if (TREE_CODE (cbase
) == PLUS_EXPR
4115 || TREE_CODE (cbase
) == MINUS_EXPR
4116 || TREE_CODE (cbase
) == POINTER_PLUS_EXPR
)
4118 if (TREE_CODE (TREE_OPERAND (cbase
, 1)) == INTEGER_CST
)
4119 cbase
= TREE_OPERAND (cbase
, 0);
4124 if (((TREE_CODE (ubase
) == SSA_NAME
)
4125 || (TREE_CODE (ubase
) == ADDR_EXPR
4126 && is_gimple_min_invariant (ubase
)))
4127 && (TREE_CODE (cbase
) == INTEGER_CST
))
4130 if (((TREE_CODE (cbase
) == SSA_NAME
)
4131 || (TREE_CODE (cbase
) == ADDR_EXPR
4132 && is_gimple_min_invariant (cbase
)))
4133 && (TREE_CODE (ubase
) == INTEGER_CST
))
4139 if(operand_equal_p (ubase
, cbase
, 0))
4142 if (TREE_CODE (ubase
) == ADDR_EXPR
4143 && TREE_CODE (cbase
) == ADDR_EXPR
)
4147 usym
= TREE_OPERAND (ubase
, 0);
4148 csym
= TREE_OPERAND (cbase
, 0);
4149 if (TREE_CODE (usym
) == ARRAY_REF
)
4151 tree ind
= TREE_OPERAND (usym
, 1);
4152 if (TREE_CODE (ind
) == INTEGER_CST
4153 && host_integerp (ind
, 0)
4154 && TREE_INT_CST_LOW (ind
) == 0)
4155 usym
= TREE_OPERAND (usym
, 0);
4157 if (TREE_CODE (csym
) == ARRAY_REF
)
4159 tree ind
= TREE_OPERAND (csym
, 1);
4160 if (TREE_CODE (ind
) == INTEGER_CST
4161 && host_integerp (ind
, 0)
4162 && TREE_INT_CST_LOW (ind
) == 0)
4163 csym
= TREE_OPERAND (csym
, 0);
4165 if (operand_equal_p (usym
, csym
, 0))
4168 /* Now do more complex comparison */
4169 tree_to_aff_combination (ubase
, TREE_TYPE (ubase
), &ubase_aff
);
4170 tree_to_aff_combination (cbase
, TREE_TYPE (cbase
), &cbase_aff
);
4171 if (compare_aff_trees (&ubase_aff
, &cbase_aff
))
4175 tree_to_aff_combination (ub
, TREE_TYPE (ub
), &ubase_aff
);
4176 tree_to_aff_combination (cb
, TREE_TYPE (cb
), &cbase_aff
);
4178 aff_combination_scale (&cbase_aff
, shwi_to_double_int (-1 * ratio
));
4179 aff_combination_add (&ubase_aff
, &cbase_aff
);
4180 expr
= aff_combination_to_tree (&ubase_aff
);
4181 return get_expr_id (data
, expr
);
4186 /* Determines the cost of the computation by that USE is expressed
4187 from induction variable CAND. If ADDRESS_P is true, we just need
4188 to create an address from it, otherwise we want to get it into
4189 register. A set of invariants we depend on is stored in
4190 DEPENDS_ON. AT is the statement at that the value is computed.
4191 If CAN_AUTOINC is nonnull, use it to record whether autoinc
4192 addressing is likely. */
4195 get_computation_cost_at (struct ivopts_data
*data
,
4196 struct iv_use
*use
, struct iv_cand
*cand
,
4197 bool address_p
, bitmap
*depends_on
, gimple at
,
4201 tree ubase
= use
->iv
->base
, ustep
= use
->iv
->step
;
4203 tree utype
= TREE_TYPE (ubase
), ctype
;
4204 unsigned HOST_WIDE_INT cstepi
, offset
= 0;
4205 HOST_WIDE_INT ratio
, aratio
;
4206 bool var_present
, symbol_present
, stmt_is_after_inc
;
4209 bool speed
= optimize_bb_for_speed_p (gimple_bb (at
));
4213 /* Only consider real candidates. */
4215 return infinite_cost
;
4217 cbase
= cand
->iv
->base
;
4218 cstep
= cand
->iv
->step
;
4219 ctype
= TREE_TYPE (cbase
);
4221 if (TYPE_PRECISION (utype
) > TYPE_PRECISION (ctype
))
4223 /* We do not have a precision to express the values of use. */
4224 return infinite_cost
;
4228 || (use
->iv
->base_object
4229 && cand
->iv
->base_object
4230 && POINTER_TYPE_P (TREE_TYPE (use
->iv
->base_object
))
4231 && POINTER_TYPE_P (TREE_TYPE (cand
->iv
->base_object
))))
4233 /* Do not try to express address of an object with computation based
4234 on address of a different object. This may cause problems in rtl
4235 level alias analysis (that does not expect this to be happening,
4236 as this is illegal in C), and would be unlikely to be useful
4238 if (use
->iv
->base_object
4239 && cand
->iv
->base_object
4240 && !operand_equal_p (use
->iv
->base_object
, cand
->iv
->base_object
, 0))
4241 return infinite_cost
;
4244 if (TYPE_PRECISION (utype
) < TYPE_PRECISION (ctype
))
4246 /* TODO -- add direct handling of this case. */
4250 /* CSTEPI is removed from the offset in case statement is after the
4251 increment. If the step is not constant, we use zero instead.
4252 This is a bit imprecise (there is the extra addition), but
4253 redundancy elimination is likely to transform the code so that
4254 it uses value of the variable before increment anyway,
4255 so it is not that much unrealistic. */
4256 if (cst_and_fits_in_hwi (cstep
))
4257 cstepi
= int_cst_value (cstep
);
4261 if (!constant_multiple_of (ustep
, cstep
, &rat
))
4262 return infinite_cost
;
4264 if (double_int_fits_in_shwi_p (rat
))
4265 ratio
= double_int_to_shwi (rat
);
4267 return infinite_cost
;
4270 ctype
= TREE_TYPE (cbase
);
4272 stmt_is_after_inc
= stmt_after_increment (data
->current_loop
, cand
, at
);
4274 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
4275 or ratio == 1, it is better to handle this like
4277 ubase - ratio * cbase + ratio * var
4279 (also holds in the case ratio == -1, TODO. */
4281 if (cst_and_fits_in_hwi (cbase
))
4283 offset
= - ratio
* int_cst_value (cbase
);
4284 cost
= difference_cost (data
,
4285 ubase
, build_int_cst (utype
, 0),
4286 &symbol_present
, &var_present
, &offset
,
4288 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4290 else if (ratio
== 1)
4292 tree real_cbase
= cbase
;
4294 /* Check to see if any adjustment is needed. */
4295 if (cstepi
== 0 && stmt_is_after_inc
)
4297 aff_tree real_cbase_aff
;
4300 tree_to_aff_combination (cbase
, TREE_TYPE (real_cbase
),
4302 tree_to_aff_combination (cstep
, TREE_TYPE (cstep
), &cstep_aff
);
4304 aff_combination_add (&real_cbase_aff
, &cstep_aff
);
4305 real_cbase
= aff_combination_to_tree (&real_cbase_aff
);
4308 cost
= difference_cost (data
,
4310 &symbol_present
, &var_present
, &offset
,
4312 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4315 && !POINTER_TYPE_P (ctype
)
4316 && multiplier_allowed_in_address_p
4317 (ratio
, TYPE_MODE (TREE_TYPE (utype
)),
4318 TYPE_ADDR_SPACE (TREE_TYPE (utype
))))
4321 = fold_build2 (MULT_EXPR
, ctype
, cbase
, build_int_cst (ctype
, ratio
));
4322 cost
= difference_cost (data
,
4324 &symbol_present
, &var_present
, &offset
,
4326 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4330 cost
= force_var_cost (data
, cbase
, depends_on
);
4331 cost
= add_costs (cost
,
4332 difference_cost (data
,
4333 ubase
, build_int_cst (utype
, 0),
4334 &symbol_present
, &var_present
,
4335 &offset
, depends_on
));
4336 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4337 cost
.cost
+= add_regs_cost (TYPE_MODE (ctype
), data
->speed
);
4343 get_loop_invariant_expr_id (data
, ubase
, cbase
, ratio
, address_p
);
4344 /* Clear depends on. */
4345 if (*inv_expr_id
!= -1 && depends_on
&& *depends_on
)
4346 bitmap_clear (*depends_on
);
4349 /* If we are after the increment, the value of the candidate is higher by
4351 if (stmt_is_after_inc
)
4352 offset
-= ratio
* cstepi
;
4354 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4355 (symbol/var1/const parts may be omitted). If we are looking for an
4356 address, find the cost of addressing this. */
4358 return add_costs (cost
,
4359 get_address_cost (symbol_present
, var_present
,
4360 offset
, ratio
, cstepi
,
4361 TYPE_MODE (TREE_TYPE (utype
)),
4362 TYPE_ADDR_SPACE (TREE_TYPE (utype
)),
4363 speed
, stmt_is_after_inc
,
4366 /* Otherwise estimate the costs for computing the expression. */
4367 if (!symbol_present
&& !var_present
&& !offset
)
4370 cost
.cost
+= multiply_by_const_cost (ratio
, TYPE_MODE (ctype
), speed
);
4374 /* Symbol + offset should be compile-time computable so consider that they
4375 are added once to the variable, if present. */
4376 if (var_present
&& (symbol_present
|| offset
))
4377 cost
.cost
+= adjust_setup_cost (data
,
4378 add_regs_cost (TYPE_MODE (ctype
), speed
));
4380 /* Having offset does not affect runtime cost in case it is added to
4381 symbol, but it increases complexity. */
4385 cost
.cost
+= add_regs_cost (TYPE_MODE (ctype
), speed
);
4387 aratio
= ratio
> 0 ? ratio
: -ratio
;
4389 cost
.cost
+= multiply_by_const_cost (aratio
, TYPE_MODE (ctype
), speed
);
4394 *can_autoinc
= false;
4397 /* Just get the expression, expand it and measure the cost. */
4398 tree comp
= get_computation_at (data
->current_loop
, use
, cand
, at
);
4401 return infinite_cost
;
4404 comp
= build_simple_mem_ref (comp
);
4406 return new_cost (computation_cost (comp
, speed
), 0);
4410 /* Determines the cost of the computation by that USE is expressed
4411 from induction variable CAND. If ADDRESS_P is true, we just need
4412 to create an address from it, otherwise we want to get it into
4413 register. A set of invariants we depend on is stored in
4414 DEPENDS_ON. If CAN_AUTOINC is nonnull, use it to record whether
4415 autoinc addressing is likely. */
4418 get_computation_cost (struct ivopts_data
*data
,
4419 struct iv_use
*use
, struct iv_cand
*cand
,
4420 bool address_p
, bitmap
*depends_on
,
4421 bool *can_autoinc
, int *inv_expr_id
)
4423 return get_computation_cost_at (data
,
4424 use
, cand
, address_p
, depends_on
, use
->stmt
,
4425 can_autoinc
, inv_expr_id
);
4428 /* Determines cost of basing replacement of USE on CAND in a generic
4432 determine_use_iv_cost_generic (struct ivopts_data
*data
,
4433 struct iv_use
*use
, struct iv_cand
*cand
)
4437 int inv_expr_id
= -1;
4439 /* The simple case first -- if we need to express value of the preserved
4440 original biv, the cost is 0. This also prevents us from counting the
4441 cost of increment twice -- once at this use and once in the cost of
4443 if (cand
->pos
== IP_ORIGINAL
4444 && cand
->incremented_at
== use
->stmt
)
4446 set_use_iv_cost (data
, use
, cand
, no_cost
, NULL
, NULL_TREE
,
4451 cost
= get_computation_cost (data
, use
, cand
, false, &depends_on
,
4452 NULL
, &inv_expr_id
);
4454 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, NULL_TREE
, ERROR_MARK
,
4457 return !infinite_cost_p (cost
);
4460 /* Determines cost of basing replacement of USE on CAND in an address. */
4463 determine_use_iv_cost_address (struct ivopts_data
*data
,
4464 struct iv_use
*use
, struct iv_cand
*cand
)
4468 int inv_expr_id
= -1;
4469 comp_cost cost
= get_computation_cost (data
, use
, cand
, true, &depends_on
,
4470 &can_autoinc
, &inv_expr_id
);
4472 if (cand
->ainc_use
== use
)
4475 cost
.cost
-= cand
->cost_step
;
4476 /* If we generated the candidate solely for exploiting autoincrement
4477 opportunities, and it turns out it can't be used, set the cost to
4478 infinity to make sure we ignore it. */
4479 else if (cand
->pos
== IP_AFTER_USE
|| cand
->pos
== IP_BEFORE_USE
)
4480 cost
= infinite_cost
;
4482 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, NULL_TREE
, ERROR_MARK
,
4485 return !infinite_cost_p (cost
);
4488 /* Computes value of candidate CAND at position AT in iteration NITER, and
4489 stores it to VAL. */
4492 cand_value_at (struct loop
*loop
, struct iv_cand
*cand
, gimple at
, tree niter
,
4495 aff_tree step
, delta
, nit
;
4496 struct iv
*iv
= cand
->iv
;
4497 tree type
= TREE_TYPE (iv
->base
);
4498 tree steptype
= type
;
4499 if (POINTER_TYPE_P (type
))
4500 steptype
= sizetype
;
4502 tree_to_aff_combination (iv
->step
, steptype
, &step
);
4503 tree_to_aff_combination (niter
, TREE_TYPE (niter
), &nit
);
4504 aff_combination_convert (&nit
, steptype
);
4505 aff_combination_mult (&nit
, &step
, &delta
);
4506 if (stmt_after_increment (loop
, cand
, at
))
4507 aff_combination_add (&delta
, &step
);
4509 tree_to_aff_combination (iv
->base
, type
, val
);
4510 aff_combination_add (val
, &delta
);
4513 /* Returns period of induction variable iv. */
4516 iv_period (struct iv
*iv
)
4518 tree step
= iv
->step
, period
, type
;
4521 gcc_assert (step
&& TREE_CODE (step
) == INTEGER_CST
);
4523 type
= unsigned_type_for (TREE_TYPE (step
));
4524 /* Period of the iv is lcm (step, type_range)/step -1,
4525 i.e., N*type_range/step - 1. Since type range is power
4526 of two, N == (step >> num_of_ending_zeros_binary (step),
4527 so the final result is
4529 (type_range >> num_of_ending_zeros_binary (step)) - 1
4532 pow2div
= num_ending_zeros (step
);
4534 period
= build_low_bits_mask (type
,
4535 (TYPE_PRECISION (type
)
4536 - tree_low_cst (pow2div
, 1)));
4541 /* Returns the comparison operator used when eliminating the iv USE. */
4543 static enum tree_code
4544 iv_elimination_compare (struct ivopts_data
*data
, struct iv_use
*use
)
4546 struct loop
*loop
= data
->current_loop
;
4550 ex_bb
= gimple_bb (use
->stmt
);
4551 exit
= EDGE_SUCC (ex_bb
, 0);
4552 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
4553 exit
= EDGE_SUCC (ex_bb
, 1);
4555 return (exit
->flags
& EDGE_TRUE_VALUE
? EQ_EXPR
: NE_EXPR
);
4559 strip_wrap_conserving_type_conversions (tree exp
)
4561 while (tree_ssa_useless_type_conversion (exp
)
4562 && (nowrap_type_p (TREE_TYPE (exp
))
4563 == nowrap_type_p (TREE_TYPE (TREE_OPERAND (exp
, 0)))))
4564 exp
= TREE_OPERAND (exp
, 0);
4568 /* Walk the SSA form and check whether E == WHAT. Fairly simplistic, we
4569 check for an exact match. */
4572 expr_equal_p (tree e
, tree what
)
4575 enum tree_code code
;
4577 e
= strip_wrap_conserving_type_conversions (e
);
4578 what
= strip_wrap_conserving_type_conversions (what
);
4580 code
= TREE_CODE (what
);
4581 if (TREE_TYPE (e
) != TREE_TYPE (what
))
4584 if (operand_equal_p (e
, what
, 0))
4587 if (TREE_CODE (e
) != SSA_NAME
)
4590 stmt
= SSA_NAME_DEF_STMT (e
);
4591 if (gimple_code (stmt
) != GIMPLE_ASSIGN
4592 || gimple_assign_rhs_code (stmt
) != code
)
4595 switch (get_gimple_rhs_class (code
))
4597 case GIMPLE_BINARY_RHS
:
4598 if (!expr_equal_p (gimple_assign_rhs2 (stmt
), TREE_OPERAND (what
, 1)))
4602 case GIMPLE_UNARY_RHS
:
4603 case GIMPLE_SINGLE_RHS
:
4604 return expr_equal_p (gimple_assign_rhs1 (stmt
), TREE_OPERAND (what
, 0));
4610 /* Returns true if we can prove that BASE - OFFSET does not overflow. For now,
4611 we only detect the situation that BASE = SOMETHING + OFFSET, where the
4612 calculation is performed in non-wrapping type.
4614 TODO: More generally, we could test for the situation that
4615 BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4616 This would require knowing the sign of OFFSET.
4618 Also, we only look for the first addition in the computation of BASE.
4619 More complex analysis would be better, but introducing it just for
4620 this optimization seems like an overkill. */
4623 difference_cannot_overflow_p (tree base
, tree offset
)
4625 enum tree_code code
;
4628 if (!nowrap_type_p (TREE_TYPE (base
)))
4631 base
= expand_simple_operations (base
);
4633 if (TREE_CODE (base
) == SSA_NAME
)
4635 gimple stmt
= SSA_NAME_DEF_STMT (base
);
4637 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
4640 code
= gimple_assign_rhs_code (stmt
);
4641 if (get_gimple_rhs_class (code
) != GIMPLE_BINARY_RHS
)
4644 e1
= gimple_assign_rhs1 (stmt
);
4645 e2
= gimple_assign_rhs2 (stmt
);
4649 code
= TREE_CODE (base
);
4650 if (get_gimple_rhs_class (code
) != GIMPLE_BINARY_RHS
)
4652 e1
= TREE_OPERAND (base
, 0);
4653 e2
= TREE_OPERAND (base
, 1);
4656 /* TODO: deeper inspection may be necessary to prove the equality. */
4660 return expr_equal_p (e1
, offset
) || expr_equal_p (e2
, offset
);
4661 case POINTER_PLUS_EXPR
:
4662 return expr_equal_p (e2
, offset
);
4669 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4670 comparison with CAND. NITER describes the number of iterations of
4671 the loops. If successful, the comparison in COMP_P is altered accordingly.
4673 We aim to handle the following situation:
4689 Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4690 We aim to optimize this to
4698 while (p < p_0 - a + b);
4700 This preserves the correctness, since the pointer arithmetics does not
4701 overflow. More precisely:
4703 1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4704 overflow in computing it or the values of p.
4705 2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4706 overflow. To prove this, we use the fact that p_0 = base + a. */
4709 iv_elimination_compare_lt (struct ivopts_data
*data
,
4710 struct iv_cand
*cand
, enum tree_code
*comp_p
,
4711 struct tree_niter_desc
*niter
)
4713 tree cand_type
, a
, b
, mbz
, nit_type
= TREE_TYPE (niter
->niter
), offset
;
4714 struct affine_tree_combination nit
, tmpa
, tmpb
;
4715 enum tree_code comp
;
4718 /* We need to know that the candidate induction variable does not overflow.
4719 While more complex analysis may be used to prove this, for now just
4720 check that the variable appears in the original program and that it
4721 is computed in a type that guarantees no overflows. */
4722 cand_type
= TREE_TYPE (cand
->iv
->base
);
4723 if (cand
->pos
!= IP_ORIGINAL
|| !nowrap_type_p (cand_type
))
4726 /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4727 the calculation of the BOUND could overflow, making the comparison
4729 if (!data
->loop_single_exit_p
)
4732 /* We need to be able to decide whether candidate is increasing or decreasing
4733 in order to choose the right comparison operator. */
4734 if (!cst_and_fits_in_hwi (cand
->iv
->step
))
4736 step
= int_cst_value (cand
->iv
->step
);
4738 /* Check that the number of iterations matches the expected pattern:
4739 a + 1 > b ? 0 : b - a - 1. */
4740 mbz
= niter
->may_be_zero
;
4741 if (TREE_CODE (mbz
) == GT_EXPR
)
4743 /* Handle a + 1 > b. */
4744 tree op0
= TREE_OPERAND (mbz
, 0);
4745 if (TREE_CODE (op0
) == PLUS_EXPR
&& integer_onep (TREE_OPERAND (op0
, 1)))
4747 a
= TREE_OPERAND (op0
, 0);
4748 b
= TREE_OPERAND (mbz
, 1);
4753 else if (TREE_CODE (mbz
) == LT_EXPR
)
4755 tree op1
= TREE_OPERAND (mbz
, 1);
4757 /* Handle b < a + 1. */
4758 if (TREE_CODE (op1
) == PLUS_EXPR
&& integer_onep (TREE_OPERAND (op1
, 1)))
4760 a
= TREE_OPERAND (op1
, 0);
4761 b
= TREE_OPERAND (mbz
, 0);
4769 /* Expected number of iterations is B - A - 1. Check that it matches
4770 the actual number, i.e., that B - A - NITER = 1. */
4771 tree_to_aff_combination (niter
->niter
, nit_type
, &nit
);
4772 tree_to_aff_combination (fold_convert (nit_type
, a
), nit_type
, &tmpa
);
4773 tree_to_aff_combination (fold_convert (nit_type
, b
), nit_type
, &tmpb
);
4774 aff_combination_scale (&nit
, double_int_minus_one
);
4775 aff_combination_scale (&tmpa
, double_int_minus_one
);
4776 aff_combination_add (&tmpb
, &tmpa
);
4777 aff_combination_add (&tmpb
, &nit
);
4778 if (tmpb
.n
!= 0 || !double_int_equal_p (tmpb
.offset
, double_int_one
))
4781 /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
4783 offset
= fold_build2 (MULT_EXPR
, TREE_TYPE (cand
->iv
->step
),
4785 fold_convert (TREE_TYPE (cand
->iv
->step
), a
));
4786 if (!difference_cannot_overflow_p (cand
->iv
->base
, offset
))
4789 /* Determine the new comparison operator. */
4790 comp
= step
< 0 ? GT_EXPR
: LT_EXPR
;
4791 if (*comp_p
== NE_EXPR
)
4793 else if (*comp_p
== EQ_EXPR
)
4794 *comp_p
= invert_tree_comparison (comp
, false);
4801 /* Check whether it is possible to express the condition in USE by comparison
4802 of candidate CAND. If so, store the value compared with to BOUND, and the
4803 comparison operator to COMP. */
4806 may_eliminate_iv (struct ivopts_data
*data
,
4807 struct iv_use
*use
, struct iv_cand
*cand
, tree
*bound
,
4808 enum tree_code
*comp
)
4813 struct loop
*loop
= data
->current_loop
;
4815 struct tree_niter_desc
*desc
= NULL
;
4817 if (TREE_CODE (cand
->iv
->step
) != INTEGER_CST
)
4820 /* For now works only for exits that dominate the loop latch.
4821 TODO: extend to other conditions inside loop body. */
4822 ex_bb
= gimple_bb (use
->stmt
);
4823 if (use
->stmt
!= last_stmt (ex_bb
)
4824 || gimple_code (use
->stmt
) != GIMPLE_COND
4825 || !dominated_by_p (CDI_DOMINATORS
, loop
->latch
, ex_bb
))
4828 exit
= EDGE_SUCC (ex_bb
, 0);
4829 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
4830 exit
= EDGE_SUCC (ex_bb
, 1);
4831 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
4834 desc
= niter_for_exit (data
, exit
);
4838 /* Determine whether we can use the variable to test the exit condition.
4839 This is the case iff the period of the induction variable is greater
4840 than the number of iterations for which the exit condition is true. */
4841 period
= iv_period (cand
->iv
);
4843 /* If the number of iterations is constant, compare against it directly. */
4844 if (TREE_CODE (desc
->niter
) == INTEGER_CST
)
4846 /* See cand_value_at. */
4847 if (stmt_after_increment (loop
, cand
, use
->stmt
))
4849 if (!tree_int_cst_lt (desc
->niter
, period
))
4854 if (tree_int_cst_lt (period
, desc
->niter
))
4859 /* If not, and if this is the only possible exit of the loop, see whether
4860 we can get a conservative estimate on the number of iterations of the
4861 entire loop and compare against that instead. */
4864 double_int period_value
, max_niter
;
4866 max_niter
= desc
->max
;
4867 if (stmt_after_increment (loop
, cand
, use
->stmt
))
4868 max_niter
= double_int_add (max_niter
, double_int_one
);
4869 period_value
= tree_to_double_int (period
);
4870 if (double_int_ucmp (max_niter
, period_value
) > 0)
4872 /* See if we can take advantage of inferred loop bound information. */
4873 if (data
->loop_single_exit_p
)
4875 if (!max_loop_iterations (loop
, &max_niter
))
4877 /* The loop bound is already adjusted by adding 1. */
4878 if (double_int_ucmp (max_niter
, period_value
) > 0)
4886 cand_value_at (loop
, cand
, use
->stmt
, desc
->niter
, &bnd
);
4888 *bound
= aff_combination_to_tree (&bnd
);
4889 *comp
= iv_elimination_compare (data
, use
);
4891 /* It is unlikely that computing the number of iterations using division
4892 would be more profitable than keeping the original induction variable. */
4893 if (expression_expensive_p (*bound
))
4896 /* Sometimes, it is possible to handle the situation that the number of
4897 iterations may be zero unless additional assumtions by using <
4898 instead of != in the exit condition.
4900 TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
4901 base the exit condition on it. However, that is often too
4903 if (!integer_zerop (desc
->may_be_zero
))
4904 return iv_elimination_compare_lt (data
, cand
, comp
, desc
);
4909 /* Calculates the cost of BOUND, if it is a PARM_DECL. A PARM_DECL must
4910 be copied, if is is used in the loop body and DATA->body_includes_call. */
4913 parm_decl_cost (struct ivopts_data
*data
, tree bound
)
4915 tree sbound
= bound
;
4916 STRIP_NOPS (sbound
);
4918 if (TREE_CODE (sbound
) == SSA_NAME
4919 && TREE_CODE (SSA_NAME_VAR (sbound
)) == PARM_DECL
4920 && gimple_nop_p (SSA_NAME_DEF_STMT (sbound
))
4921 && data
->body_includes_call
)
4922 return COSTS_N_INSNS (1);
4927 /* Determines cost of basing replacement of USE on CAND in a condition. */
4930 determine_use_iv_cost_condition (struct ivopts_data
*data
,
4931 struct iv_use
*use
, struct iv_cand
*cand
)
4933 tree bound
= NULL_TREE
;
4935 bitmap depends_on_elim
= NULL
, depends_on_express
= NULL
, depends_on
;
4936 comp_cost elim_cost
, express_cost
, cost
, bound_cost
;
4938 int elim_inv_expr_id
= -1, express_inv_expr_id
= -1, inv_expr_id
;
4939 tree
*control_var
, *bound_cst
;
4940 enum tree_code comp
= ERROR_MARK
;
4942 /* Only consider real candidates. */
4945 set_use_iv_cost (data
, use
, cand
, infinite_cost
, NULL
, NULL_TREE
,
4950 /* Try iv elimination. */
4951 if (may_eliminate_iv (data
, use
, cand
, &bound
, &comp
))
4953 elim_cost
= force_var_cost (data
, bound
, &depends_on_elim
);
4954 if (elim_cost
.cost
== 0)
4955 elim_cost
.cost
= parm_decl_cost (data
, bound
);
4956 else if (TREE_CODE (bound
) == INTEGER_CST
)
4958 /* If we replace a loop condition 'i < n' with 'p < base + n',
4959 depends_on_elim will have 'base' and 'n' set, which implies
4960 that both 'base' and 'n' will be live during the loop. More likely,
4961 'base + n' will be loop invariant, resulting in only one live value
4962 during the loop. So in that case we clear depends_on_elim and set
4963 elim_inv_expr_id instead. */
4964 if (depends_on_elim
&& bitmap_count_bits (depends_on_elim
) > 1)
4966 elim_inv_expr_id
= get_expr_id (data
, bound
);
4967 bitmap_clear (depends_on_elim
);
4969 /* The bound is a loop invariant, so it will be only computed
4971 elim_cost
.cost
= adjust_setup_cost (data
, elim_cost
.cost
);
4974 elim_cost
= infinite_cost
;
4976 /* Try expressing the original giv. If it is compared with an invariant,
4977 note that we cannot get rid of it. */
4978 ok
= extract_cond_operands (data
, use
->stmt
, &control_var
, &bound_cst
,
4982 /* When the condition is a comparison of the candidate IV against
4983 zero, prefer this IV.
4985 TODO: The constant that we're subtracting from the cost should
4986 be target-dependent. This information should be added to the
4987 target costs for each backend. */
4988 if (!infinite_cost_p (elim_cost
) /* Do not try to decrease infinite! */
4989 && integer_zerop (*bound_cst
)
4990 && (operand_equal_p (*control_var
, cand
->var_after
, 0)
4991 || operand_equal_p (*control_var
, cand
->var_before
, 0)))
4992 elim_cost
.cost
-= 1;
4994 express_cost
= get_computation_cost (data
, use
, cand
, false,
4995 &depends_on_express
, NULL
,
4996 &express_inv_expr_id
);
4997 fd_ivopts_data
= data
;
4998 walk_tree (&cmp_iv
->base
, find_depends
, &depends_on_express
, NULL
);
5000 /* Count the cost of the original bound as well. */
5001 bound_cost
= force_var_cost (data
, *bound_cst
, NULL
);
5002 if (bound_cost
.cost
== 0)
5003 bound_cost
.cost
= parm_decl_cost (data
, *bound_cst
);
5004 else if (TREE_CODE (*bound_cst
) == INTEGER_CST
)
5005 bound_cost
.cost
= 0;
5006 express_cost
.cost
+= bound_cost
.cost
;
5008 /* Choose the better approach, preferring the eliminated IV. */
5009 if (compare_costs (elim_cost
, express_cost
) <= 0)
5012 depends_on
= depends_on_elim
;
5013 depends_on_elim
= NULL
;
5014 inv_expr_id
= elim_inv_expr_id
;
5018 cost
= express_cost
;
5019 depends_on
= depends_on_express
;
5020 depends_on_express
= NULL
;
5023 inv_expr_id
= express_inv_expr_id
;
5026 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, bound
, comp
, inv_expr_id
);
5028 if (depends_on_elim
)
5029 BITMAP_FREE (depends_on_elim
);
5030 if (depends_on_express
)
5031 BITMAP_FREE (depends_on_express
);
5033 return !infinite_cost_p (cost
);
5036 /* Determines cost of basing replacement of USE on CAND. Returns false
5037 if USE cannot be based on CAND. */
5040 determine_use_iv_cost (struct ivopts_data
*data
,
5041 struct iv_use
*use
, struct iv_cand
*cand
)
5045 case USE_NONLINEAR_EXPR
:
5046 return determine_use_iv_cost_generic (data
, use
, cand
);
5049 return determine_use_iv_cost_address (data
, use
, cand
);
5052 return determine_use_iv_cost_condition (data
, use
, cand
);
5059 /* Return true if get_computation_cost indicates that autoincrement is
5060 a possibility for the pair of USE and CAND, false otherwise. */
5063 autoinc_possible_for_pair (struct ivopts_data
*data
, struct iv_use
*use
,
5064 struct iv_cand
*cand
)
5070 if (use
->type
!= USE_ADDRESS
)
5073 cost
= get_computation_cost (data
, use
, cand
, true, &depends_on
,
5074 &can_autoinc
, NULL
);
5076 BITMAP_FREE (depends_on
);
5078 return !infinite_cost_p (cost
) && can_autoinc
;
5081 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
5082 use that allows autoincrement, and set their AINC_USE if possible. */
5085 set_autoinc_for_original_candidates (struct ivopts_data
*data
)
5089 for (i
= 0; i
< n_iv_cands (data
); i
++)
5091 struct iv_cand
*cand
= iv_cand (data
, i
);
5092 struct iv_use
*closest
= NULL
;
5093 if (cand
->pos
!= IP_ORIGINAL
)
5095 for (j
= 0; j
< n_iv_uses (data
); j
++)
5097 struct iv_use
*use
= iv_use (data
, j
);
5098 unsigned uid
= gimple_uid (use
->stmt
);
5099 if (gimple_bb (use
->stmt
) != gimple_bb (cand
->incremented_at
)
5100 || uid
> gimple_uid (cand
->incremented_at
))
5102 if (closest
== NULL
|| uid
> gimple_uid (closest
->stmt
))
5105 if (closest
== NULL
|| !autoinc_possible_for_pair (data
, closest
, cand
))
5107 cand
->ainc_use
= closest
;
5111 /* Finds the candidates for the induction variables. */
5114 find_iv_candidates (struct ivopts_data
*data
)
5116 /* Add commonly used ivs. */
5117 add_standard_iv_candidates (data
);
5119 /* Add old induction variables. */
5120 add_old_ivs_candidates (data
);
5122 /* Add induction variables derived from uses. */
5123 add_derived_ivs_candidates (data
);
5125 set_autoinc_for_original_candidates (data
);
5127 /* Record the important candidates. */
5128 record_important_candidates (data
);
5131 /* Determines costs of basing the use of the iv on an iv candidate. */
5134 determine_use_iv_costs (struct ivopts_data
*data
)
5138 struct iv_cand
*cand
;
5139 bitmap to_clear
= BITMAP_ALLOC (NULL
);
5141 alloc_use_cost_map (data
);
5143 for (i
= 0; i
< n_iv_uses (data
); i
++)
5145 use
= iv_use (data
, i
);
5147 if (data
->consider_all_candidates
)
5149 for (j
= 0; j
< n_iv_cands (data
); j
++)
5151 cand
= iv_cand (data
, j
);
5152 determine_use_iv_cost (data
, use
, cand
);
5159 EXECUTE_IF_SET_IN_BITMAP (use
->related_cands
, 0, j
, bi
)
5161 cand
= iv_cand (data
, j
);
5162 if (!determine_use_iv_cost (data
, use
, cand
))
5163 bitmap_set_bit (to_clear
, j
);
5166 /* Remove the candidates for that the cost is infinite from
5167 the list of related candidates. */
5168 bitmap_and_compl_into (use
->related_cands
, to_clear
);
5169 bitmap_clear (to_clear
);
5173 BITMAP_FREE (to_clear
);
5175 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5177 fprintf (dump_file
, "Use-candidate costs:\n");
5179 for (i
= 0; i
< n_iv_uses (data
); i
++)
5181 use
= iv_use (data
, i
);
5183 fprintf (dump_file
, "Use %d:\n", i
);
5184 fprintf (dump_file
, " cand\tcost\tcompl.\tdepends on\n");
5185 for (j
= 0; j
< use
->n_map_members
; j
++)
5187 if (!use
->cost_map
[j
].cand
5188 || infinite_cost_p (use
->cost_map
[j
].cost
))
5191 fprintf (dump_file
, " %d\t%d\t%d\t",
5192 use
->cost_map
[j
].cand
->id
,
5193 use
->cost_map
[j
].cost
.cost
,
5194 use
->cost_map
[j
].cost
.complexity
);
5195 if (use
->cost_map
[j
].depends_on
)
5196 bitmap_print (dump_file
,
5197 use
->cost_map
[j
].depends_on
, "","");
5198 if (use
->cost_map
[j
].inv_expr_id
!= -1)
5199 fprintf (dump_file
, " inv_expr:%d", use
->cost_map
[j
].inv_expr_id
);
5200 fprintf (dump_file
, "\n");
5203 fprintf (dump_file
, "\n");
5205 fprintf (dump_file
, "\n");
5209 /* Determines cost of the candidate CAND. */
5212 determine_iv_cost (struct ivopts_data
*data
, struct iv_cand
*cand
)
5214 comp_cost cost_base
;
5215 unsigned cost
, cost_step
;
5224 /* There are two costs associated with the candidate -- its increment
5225 and its initialization. The second is almost negligible for any loop
5226 that rolls enough, so we take it just very little into account. */
5228 base
= cand
->iv
->base
;
5229 cost_base
= force_var_cost (data
, base
, NULL
);
5230 /* It will be exceptional that the iv register happens to be initialized with
5231 the proper value at no cost. In general, there will at least be a regcopy
5233 if (cost_base
.cost
== 0)
5234 cost_base
.cost
= COSTS_N_INSNS (1);
5235 cost_step
= add_regs_cost (TYPE_MODE (TREE_TYPE (base
)), data
->speed
);
5237 cost
= cost_step
+ adjust_setup_cost (data
, cost_base
.cost
);
5239 /* Prefer the original ivs unless we may gain something by replacing it.
5240 The reason is to make debugging simpler; so this is not relevant for
5241 artificial ivs created by other optimization passes. */
5242 if (cand
->pos
!= IP_ORIGINAL
5243 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand
->var_before
)))
5246 /* Prefer not to insert statements into latch unless there are some
5247 already (so that we do not create unnecessary jumps). */
5248 if (cand
->pos
== IP_END
5249 && empty_block_p (ip_end_pos (data
->current_loop
)))
5253 cand
->cost_step
= cost_step
;
5256 /* Determines costs of computation of the candidates. */
5259 determine_iv_costs (struct ivopts_data
*data
)
5263 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5265 fprintf (dump_file
, "Candidate costs:\n");
5266 fprintf (dump_file
, " cand\tcost\n");
5269 for (i
= 0; i
< n_iv_cands (data
); i
++)
5271 struct iv_cand
*cand
= iv_cand (data
, i
);
5273 determine_iv_cost (data
, cand
);
5275 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5276 fprintf (dump_file
, " %d\t%d\n", i
, cand
->cost
);
5279 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5280 fprintf (dump_file
, "\n");
5283 /* Calculates cost for having SIZE induction variables. */
5286 ivopts_global_cost_for_size (struct ivopts_data
*data
, unsigned size
)
5288 /* We add size to the cost, so that we prefer eliminating ivs
5290 return size
+ estimate_reg_pressure_cost (size
, data
->regs_used
, data
->speed
,
5291 data
->body_includes_call
);
5294 /* For each size of the induction variable set determine the penalty. */
5297 determine_set_costs (struct ivopts_data
*data
)
5301 gimple_stmt_iterator psi
;
5303 struct loop
*loop
= data
->current_loop
;
5306 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5308 fprintf (dump_file
, "Global costs:\n");
5309 fprintf (dump_file
, " target_avail_regs %d\n", target_avail_regs
);
5310 fprintf (dump_file
, " target_clobbered_regs %d\n", target_clobbered_regs
);
5311 fprintf (dump_file
, " target_reg_cost %d\n", target_reg_cost
[data
->speed
]);
5312 fprintf (dump_file
, " target_spill_cost %d\n", target_spill_cost
[data
->speed
]);
5316 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
5318 phi
= gsi_stmt (psi
);
5319 op
= PHI_RESULT (phi
);
5321 if (!is_gimple_reg (op
))
5324 if (get_iv (data
, op
))
5330 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, j
, bi
)
5332 struct version_info
*info
= ver_info (data
, j
);
5334 if (info
->inv_id
&& info
->has_nonlin_use
)
5338 data
->regs_used
= n
;
5339 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5340 fprintf (dump_file
, " regs_used %d\n", n
);
5342 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5344 fprintf (dump_file
, " cost for size:\n");
5345 fprintf (dump_file
, " ivs\tcost\n");
5346 for (j
= 0; j
<= 2 * target_avail_regs
; j
++)
5347 fprintf (dump_file
, " %d\t%d\n", j
,
5348 ivopts_global_cost_for_size (data
, j
));
5349 fprintf (dump_file
, "\n");
5353 /* Returns true if A is a cheaper cost pair than B. */
5356 cheaper_cost_pair (struct cost_pair
*a
, struct cost_pair
*b
)
5366 cmp
= compare_costs (a
->cost
, b
->cost
);
5373 /* In case the costs are the same, prefer the cheaper candidate. */
5374 if (a
->cand
->cost
< b
->cand
->cost
)
5381 /* Returns candidate by that USE is expressed in IVS. */
5383 static struct cost_pair
*
5384 iv_ca_cand_for_use (struct iv_ca
*ivs
, struct iv_use
*use
)
5386 return ivs
->cand_for_use
[use
->id
];
5389 /* Computes the cost field of IVS structure. */
5392 iv_ca_recount_cost (struct ivopts_data
*data
, struct iv_ca
*ivs
)
5394 comp_cost cost
= ivs
->cand_use_cost
;
5396 cost
.cost
+= ivs
->cand_cost
;
5398 cost
.cost
+= ivopts_global_cost_for_size (data
,
5399 ivs
->n_regs
+ ivs
->num_used_inv_expr
);
5404 /* Remove invariants in set INVS to set IVS. */
5407 iv_ca_set_remove_invariants (struct iv_ca
*ivs
, bitmap invs
)
5415 EXECUTE_IF_SET_IN_BITMAP (invs
, 0, iid
, bi
)
5417 ivs
->n_invariant_uses
[iid
]--;
5418 if (ivs
->n_invariant_uses
[iid
] == 0)
5423 /* Set USE not to be expressed by any candidate in IVS. */
5426 iv_ca_set_no_cp (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5429 unsigned uid
= use
->id
, cid
;
5430 struct cost_pair
*cp
;
5432 cp
= ivs
->cand_for_use
[uid
];
5438 ivs
->cand_for_use
[uid
] = NULL
;
5439 ivs
->n_cand_uses
[cid
]--;
5441 if (ivs
->n_cand_uses
[cid
] == 0)
5443 bitmap_clear_bit (ivs
->cands
, cid
);
5444 /* Do not count the pseudocandidates. */
5448 ivs
->cand_cost
-= cp
->cand
->cost
;
5450 iv_ca_set_remove_invariants (ivs
, cp
->cand
->depends_on
);
5453 ivs
->cand_use_cost
= sub_costs (ivs
->cand_use_cost
, cp
->cost
);
5455 iv_ca_set_remove_invariants (ivs
, cp
->depends_on
);
5457 if (cp
->inv_expr_id
!= -1)
5459 ivs
->used_inv_expr
[cp
->inv_expr_id
]--;
5460 if (ivs
->used_inv_expr
[cp
->inv_expr_id
] == 0)
5461 ivs
->num_used_inv_expr
--;
5463 iv_ca_recount_cost (data
, ivs
);
5466 /* Add invariants in set INVS to set IVS. */
5469 iv_ca_set_add_invariants (struct iv_ca
*ivs
, bitmap invs
)
5477 EXECUTE_IF_SET_IN_BITMAP (invs
, 0, iid
, bi
)
5479 ivs
->n_invariant_uses
[iid
]++;
5480 if (ivs
->n_invariant_uses
[iid
] == 1)
5485 /* Set cost pair for USE in set IVS to CP. */
5488 iv_ca_set_cp (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5489 struct iv_use
*use
, struct cost_pair
*cp
)
5491 unsigned uid
= use
->id
, cid
;
5493 if (ivs
->cand_for_use
[uid
] == cp
)
5496 if (ivs
->cand_for_use
[uid
])
5497 iv_ca_set_no_cp (data
, ivs
, use
);
5504 ivs
->cand_for_use
[uid
] = cp
;
5505 ivs
->n_cand_uses
[cid
]++;
5506 if (ivs
->n_cand_uses
[cid
] == 1)
5508 bitmap_set_bit (ivs
->cands
, cid
);
5509 /* Do not count the pseudocandidates. */
5513 ivs
->cand_cost
+= cp
->cand
->cost
;
5515 iv_ca_set_add_invariants (ivs
, cp
->cand
->depends_on
);
5518 ivs
->cand_use_cost
= add_costs (ivs
->cand_use_cost
, cp
->cost
);
5519 iv_ca_set_add_invariants (ivs
, cp
->depends_on
);
5521 if (cp
->inv_expr_id
!= -1)
5523 ivs
->used_inv_expr
[cp
->inv_expr_id
]++;
5524 if (ivs
->used_inv_expr
[cp
->inv_expr_id
] == 1)
5525 ivs
->num_used_inv_expr
++;
5527 iv_ca_recount_cost (data
, ivs
);
5531 /* Extend set IVS by expressing USE by some of the candidates in it
5532 if possible. All important candidates will be considered
5533 if IMPORTANT_CANDIDATES is true. */
5536 iv_ca_add_use (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5537 struct iv_use
*use
, bool important_candidates
)
5539 struct cost_pair
*best_cp
= NULL
, *cp
;
5544 gcc_assert (ivs
->upto
>= use
->id
);
5546 if (ivs
->upto
== use
->id
)
5552 cands
= (important_candidates
? data
->important_candidates
: ivs
->cands
);
5553 EXECUTE_IF_SET_IN_BITMAP (cands
, 0, i
, bi
)
5555 struct iv_cand
*cand
= iv_cand (data
, i
);
5557 cp
= get_use_iv_cost (data
, use
, cand
);
5559 if (cheaper_cost_pair (cp
, best_cp
))
5563 iv_ca_set_cp (data
, ivs
, use
, best_cp
);
5566 /* Get cost for assignment IVS. */
5569 iv_ca_cost (struct iv_ca
*ivs
)
5571 /* This was a conditional expression but it triggered a bug in
5574 return infinite_cost
;
5579 /* Returns true if all dependences of CP are among invariants in IVS. */
5582 iv_ca_has_deps (struct iv_ca
*ivs
, struct cost_pair
*cp
)
5587 if (!cp
->depends_on
)
5590 EXECUTE_IF_SET_IN_BITMAP (cp
->depends_on
, 0, i
, bi
)
5592 if (ivs
->n_invariant_uses
[i
] == 0)
5599 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5600 it before NEXT_CHANGE. */
5602 static struct iv_ca_delta
*
5603 iv_ca_delta_add (struct iv_use
*use
, struct cost_pair
*old_cp
,
5604 struct cost_pair
*new_cp
, struct iv_ca_delta
*next_change
)
5606 struct iv_ca_delta
*change
= XNEW (struct iv_ca_delta
);
5609 change
->old_cp
= old_cp
;
5610 change
->new_cp
= new_cp
;
5611 change
->next_change
= next_change
;
5616 /* Joins two lists of changes L1 and L2. Destructive -- old lists
5619 static struct iv_ca_delta
*
5620 iv_ca_delta_join (struct iv_ca_delta
*l1
, struct iv_ca_delta
*l2
)
5622 struct iv_ca_delta
*last
;
5630 for (last
= l1
; last
->next_change
; last
= last
->next_change
)
5632 last
->next_change
= l2
;
5637 /* Reverse the list of changes DELTA, forming the inverse to it. */
5639 static struct iv_ca_delta
*
5640 iv_ca_delta_reverse (struct iv_ca_delta
*delta
)
5642 struct iv_ca_delta
*act
, *next
, *prev
= NULL
;
5643 struct cost_pair
*tmp
;
5645 for (act
= delta
; act
; act
= next
)
5647 next
= act
->next_change
;
5648 act
->next_change
= prev
;
5652 act
->old_cp
= act
->new_cp
;
5659 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
5660 reverted instead. */
5663 iv_ca_delta_commit (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5664 struct iv_ca_delta
*delta
, bool forward
)
5666 struct cost_pair
*from
, *to
;
5667 struct iv_ca_delta
*act
;
5670 delta
= iv_ca_delta_reverse (delta
);
5672 for (act
= delta
; act
; act
= act
->next_change
)
5676 gcc_assert (iv_ca_cand_for_use (ivs
, act
->use
) == from
);
5677 iv_ca_set_cp (data
, ivs
, act
->use
, to
);
5681 iv_ca_delta_reverse (delta
);
5684 /* Returns true if CAND is used in IVS. */
5687 iv_ca_cand_used_p (struct iv_ca
*ivs
, struct iv_cand
*cand
)
5689 return ivs
->n_cand_uses
[cand
->id
] > 0;
5692 /* Returns number of induction variable candidates in the set IVS. */
5695 iv_ca_n_cands (struct iv_ca
*ivs
)
5697 return ivs
->n_cands
;
5700 /* Free the list of changes DELTA. */
5703 iv_ca_delta_free (struct iv_ca_delta
**delta
)
5705 struct iv_ca_delta
*act
, *next
;
5707 for (act
= *delta
; act
; act
= next
)
5709 next
= act
->next_change
;
5716 /* Allocates new iv candidates assignment. */
5718 static struct iv_ca
*
5719 iv_ca_new (struct ivopts_data
*data
)
5721 struct iv_ca
*nw
= XNEW (struct iv_ca
);
5725 nw
->cand_for_use
= XCNEWVEC (struct cost_pair
*, n_iv_uses (data
));
5726 nw
->n_cand_uses
= XCNEWVEC (unsigned, n_iv_cands (data
));
5727 nw
->cands
= BITMAP_ALLOC (NULL
);
5730 nw
->cand_use_cost
= no_cost
;
5732 nw
->n_invariant_uses
= XCNEWVEC (unsigned, data
->max_inv_id
+ 1);
5734 nw
->used_inv_expr
= XCNEWVEC (unsigned, data
->inv_expr_id
+ 1);
5735 nw
->num_used_inv_expr
= 0;
5740 /* Free memory occupied by the set IVS. */
5743 iv_ca_free (struct iv_ca
**ivs
)
5745 free ((*ivs
)->cand_for_use
);
5746 free ((*ivs
)->n_cand_uses
);
5747 BITMAP_FREE ((*ivs
)->cands
);
5748 free ((*ivs
)->n_invariant_uses
);
5749 free ((*ivs
)->used_inv_expr
);
5754 /* Dumps IVS to FILE. */
5757 iv_ca_dump (struct ivopts_data
*data
, FILE *file
, struct iv_ca
*ivs
)
5759 const char *pref
= " invariants ";
5761 comp_cost cost
= iv_ca_cost (ivs
);
5763 fprintf (file
, " cost: %d (complexity %d)\n", cost
.cost
, cost
.complexity
);
5764 fprintf (file
, " cand_cost: %d\n cand_use_cost: %d (complexity %d)\n",
5765 ivs
->cand_cost
, ivs
->cand_use_cost
.cost
, ivs
->cand_use_cost
.complexity
);
5766 bitmap_print (file
, ivs
->cands
, " candidates: ","\n");
5768 for (i
= 0; i
< ivs
->upto
; i
++)
5770 struct iv_use
*use
= iv_use (data
, i
);
5771 struct cost_pair
*cp
= iv_ca_cand_for_use (ivs
, use
);
5773 fprintf (file
, " use:%d --> iv_cand:%d, cost=(%d,%d)\n",
5774 use
->id
, cp
->cand
->id
, cp
->cost
.cost
, cp
->cost
.complexity
);
5776 fprintf (file
, " use:%d --> ??\n", use
->id
);
5779 for (i
= 1; i
<= data
->max_inv_id
; i
++)
5780 if (ivs
->n_invariant_uses
[i
])
5782 fprintf (file
, "%s%d", pref
, i
);
5785 fprintf (file
, "\n\n");
5788 /* Try changing candidate in IVS to CAND for each use. Return cost of the
5789 new set, and store differences in DELTA. Number of induction variables
5790 in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
5791 the function will try to find a solution with mimimal iv candidates. */
5794 iv_ca_extend (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5795 struct iv_cand
*cand
, struct iv_ca_delta
**delta
,
5796 unsigned *n_ivs
, bool min_ncand
)
5801 struct cost_pair
*old_cp
, *new_cp
;
5804 for (i
= 0; i
< ivs
->upto
; i
++)
5806 use
= iv_use (data
, i
);
5807 old_cp
= iv_ca_cand_for_use (ivs
, use
);
5810 && old_cp
->cand
== cand
)
5813 new_cp
= get_use_iv_cost (data
, use
, cand
);
5817 if (!min_ncand
&& !iv_ca_has_deps (ivs
, new_cp
))
5820 if (!min_ncand
&& !cheaper_cost_pair (new_cp
, old_cp
))
5823 *delta
= iv_ca_delta_add (use
, old_cp
, new_cp
, *delta
);
5826 iv_ca_delta_commit (data
, ivs
, *delta
, true);
5827 cost
= iv_ca_cost (ivs
);
5829 *n_ivs
= iv_ca_n_cands (ivs
);
5830 iv_ca_delta_commit (data
, ivs
, *delta
, false);
5835 /* Try narrowing set IVS by removing CAND. Return the cost of
5836 the new set and store the differences in DELTA. */
5839 iv_ca_narrow (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5840 struct iv_cand
*cand
, struct iv_ca_delta
**delta
)
5844 struct cost_pair
*old_cp
, *new_cp
, *cp
;
5846 struct iv_cand
*cnd
;
5850 for (i
= 0; i
< n_iv_uses (data
); i
++)
5852 use
= iv_use (data
, i
);
5854 old_cp
= iv_ca_cand_for_use (ivs
, use
);
5855 if (old_cp
->cand
!= cand
)
5860 if (data
->consider_all_candidates
)
5862 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, ci
, bi
)
5867 cnd
= iv_cand (data
, ci
);
5869 cp
= get_use_iv_cost (data
, use
, cnd
);
5873 if (!iv_ca_has_deps (ivs
, cp
))
5876 if (!cheaper_cost_pair (cp
, new_cp
))
5884 EXECUTE_IF_AND_IN_BITMAP (use
->related_cands
, ivs
->cands
, 0, ci
, bi
)
5889 cnd
= iv_cand (data
, ci
);
5891 cp
= get_use_iv_cost (data
, use
, cnd
);
5894 if (!iv_ca_has_deps (ivs
, cp
))
5897 if (!cheaper_cost_pair (cp
, new_cp
))
5906 iv_ca_delta_free (delta
);
5907 return infinite_cost
;
5910 *delta
= iv_ca_delta_add (use
, old_cp
, new_cp
, *delta
);
5913 iv_ca_delta_commit (data
, ivs
, *delta
, true);
5914 cost
= iv_ca_cost (ivs
);
5915 iv_ca_delta_commit (data
, ivs
, *delta
, false);
5920 /* Try optimizing the set of candidates IVS by removing candidates different
5921 from to EXCEPT_CAND from it. Return cost of the new set, and store
5922 differences in DELTA. */
5925 iv_ca_prune (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5926 struct iv_cand
*except_cand
, struct iv_ca_delta
**delta
)
5929 struct iv_ca_delta
*act_delta
, *best_delta
;
5931 comp_cost best_cost
, acost
;
5932 struct iv_cand
*cand
;
5935 best_cost
= iv_ca_cost (ivs
);
5937 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, i
, bi
)
5939 cand
= iv_cand (data
, i
);
5941 if (cand
== except_cand
)
5944 acost
= iv_ca_narrow (data
, ivs
, cand
, &act_delta
);
5946 if (compare_costs (acost
, best_cost
) < 0)
5949 iv_ca_delta_free (&best_delta
);
5950 best_delta
= act_delta
;
5953 iv_ca_delta_free (&act_delta
);
5962 /* Recurse to possibly remove other unnecessary ivs. */
5963 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
5964 best_cost
= iv_ca_prune (data
, ivs
, except_cand
, delta
);
5965 iv_ca_delta_commit (data
, ivs
, best_delta
, false);
5966 *delta
= iv_ca_delta_join (best_delta
, *delta
);
5970 /* Tries to extend the sets IVS in the best possible way in order
5971 to express the USE. If ORIGINALP is true, prefer candidates from
5972 the original set of IVs, otherwise favor important candidates not
5973 based on any memory object. */
5976 try_add_cand_for (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5977 struct iv_use
*use
, bool originalp
)
5979 comp_cost best_cost
, act_cost
;
5982 struct iv_cand
*cand
;
5983 struct iv_ca_delta
*best_delta
= NULL
, *act_delta
;
5984 struct cost_pair
*cp
;
5986 iv_ca_add_use (data
, ivs
, use
, false);
5987 best_cost
= iv_ca_cost (ivs
);
5989 cp
= iv_ca_cand_for_use (ivs
, use
);
5994 iv_ca_add_use (data
, ivs
, use
, true);
5995 best_cost
= iv_ca_cost (ivs
);
5996 cp
= iv_ca_cand_for_use (ivs
, use
);
6000 best_delta
= iv_ca_delta_add (use
, NULL
, cp
, NULL
);
6001 iv_ca_set_no_cp (data
, ivs
, use
);
6004 /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
6005 first try important candidates not based on any memory object. Only if
6006 this fails, try the specific ones. Rationale -- in loops with many
6007 variables the best choice often is to use just one generic biv. If we
6008 added here many ivs specific to the uses, the optimization algorithm later
6009 would be likely to get stuck in a local minimum, thus causing us to create
6010 too many ivs. The approach from few ivs to more seems more likely to be
6011 successful -- starting from few ivs, replacing an expensive use by a
6012 specific iv should always be a win. */
6013 EXECUTE_IF_SET_IN_BITMAP (data
->important_candidates
, 0, i
, bi
)
6015 cand
= iv_cand (data
, i
);
6017 if (originalp
&& cand
->pos
!=IP_ORIGINAL
)
6020 if (!originalp
&& cand
->iv
->base_object
!= NULL_TREE
)
6023 if (iv_ca_cand_used_p (ivs
, cand
))
6026 cp
= get_use_iv_cost (data
, use
, cand
);
6030 iv_ca_set_cp (data
, ivs
, use
, cp
);
6031 act_cost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, NULL
,
6033 iv_ca_set_no_cp (data
, ivs
, use
);
6034 act_delta
= iv_ca_delta_add (use
, NULL
, cp
, act_delta
);
6036 if (compare_costs (act_cost
, best_cost
) < 0)
6038 best_cost
= act_cost
;
6040 iv_ca_delta_free (&best_delta
);
6041 best_delta
= act_delta
;
6044 iv_ca_delta_free (&act_delta
);
6047 if (infinite_cost_p (best_cost
))
6049 for (i
= 0; i
< use
->n_map_members
; i
++)
6051 cp
= use
->cost_map
+ i
;
6056 /* Already tried this. */
6057 if (cand
->important
)
6059 if (originalp
&& cand
->pos
== IP_ORIGINAL
)
6061 if (!originalp
&& cand
->iv
->base_object
== NULL_TREE
)
6065 if (iv_ca_cand_used_p (ivs
, cand
))
6069 iv_ca_set_cp (data
, ivs
, use
, cp
);
6070 act_cost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, NULL
, true);
6071 iv_ca_set_no_cp (data
, ivs
, use
);
6072 act_delta
= iv_ca_delta_add (use
, iv_ca_cand_for_use (ivs
, use
),
6075 if (compare_costs (act_cost
, best_cost
) < 0)
6077 best_cost
= act_cost
;
6080 iv_ca_delta_free (&best_delta
);
6081 best_delta
= act_delta
;
6084 iv_ca_delta_free (&act_delta
);
6088 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
6089 iv_ca_delta_free (&best_delta
);
6091 return !infinite_cost_p (best_cost
);
6094 /* Finds an initial assignment of candidates to uses. */
6096 static struct iv_ca
*
6097 get_initial_solution (struct ivopts_data
*data
, bool originalp
)
6099 struct iv_ca
*ivs
= iv_ca_new (data
);
6102 for (i
= 0; i
< n_iv_uses (data
); i
++)
6103 if (!try_add_cand_for (data
, ivs
, iv_use (data
, i
), originalp
))
6112 /* Tries to improve set of induction variables IVS. */
6115 try_improve_iv_set (struct ivopts_data
*data
, struct iv_ca
*ivs
)
6118 comp_cost acost
, best_cost
= iv_ca_cost (ivs
);
6119 struct iv_ca_delta
*best_delta
= NULL
, *act_delta
, *tmp_delta
;
6120 struct iv_cand
*cand
;
6122 /* Try extending the set of induction variables by one. */
6123 for (i
= 0; i
< n_iv_cands (data
); i
++)
6125 cand
= iv_cand (data
, i
);
6127 if (iv_ca_cand_used_p (ivs
, cand
))
6130 acost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, &n_ivs
, false);
6134 /* If we successfully added the candidate and the set is small enough,
6135 try optimizing it by removing other candidates. */
6136 if (n_ivs
<= ALWAYS_PRUNE_CAND_SET_BOUND
)
6138 iv_ca_delta_commit (data
, ivs
, act_delta
, true);
6139 acost
= iv_ca_prune (data
, ivs
, cand
, &tmp_delta
);
6140 iv_ca_delta_commit (data
, ivs
, act_delta
, false);
6141 act_delta
= iv_ca_delta_join (act_delta
, tmp_delta
);
6144 if (compare_costs (acost
, best_cost
) < 0)
6147 iv_ca_delta_free (&best_delta
);
6148 best_delta
= act_delta
;
6151 iv_ca_delta_free (&act_delta
);
6156 /* Try removing the candidates from the set instead. */
6157 best_cost
= iv_ca_prune (data
, ivs
, NULL
, &best_delta
);
6159 /* Nothing more we can do. */
6164 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
6165 gcc_assert (compare_costs (best_cost
, iv_ca_cost (ivs
)) == 0);
6166 iv_ca_delta_free (&best_delta
);
6170 /* Attempts to find the optimal set of induction variables. We do simple
6171 greedy heuristic -- we try to replace at most one candidate in the selected
6172 solution and remove the unused ivs while this improves the cost. */
6174 static struct iv_ca
*
6175 find_optimal_iv_set_1 (struct ivopts_data
*data
, bool originalp
)
6179 /* Get the initial solution. */
6180 set
= get_initial_solution (data
, originalp
);
6183 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6184 fprintf (dump_file
, "Unable to substitute for ivs, failed.\n");
6188 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6190 fprintf (dump_file
, "Initial set of candidates:\n");
6191 iv_ca_dump (data
, dump_file
, set
);
6194 while (try_improve_iv_set (data
, set
))
6196 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6198 fprintf (dump_file
, "Improved to:\n");
6199 iv_ca_dump (data
, dump_file
, set
);
6206 static struct iv_ca
*
6207 find_optimal_iv_set (struct ivopts_data
*data
)
6210 struct iv_ca
*set
, *origset
;
6212 comp_cost cost
, origcost
;
6214 /* Determine the cost based on a strategy that starts with original IVs,
6215 and try again using a strategy that prefers candidates not based
6217 origset
= find_optimal_iv_set_1 (data
, true);
6218 set
= find_optimal_iv_set_1 (data
, false);
6220 if (!origset
&& !set
)
6223 origcost
= origset
? iv_ca_cost (origset
) : infinite_cost
;
6224 cost
= set
? iv_ca_cost (set
) : infinite_cost
;
6226 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6228 fprintf (dump_file
, "Original cost %d (complexity %d)\n\n",
6229 origcost
.cost
, origcost
.complexity
);
6230 fprintf (dump_file
, "Final cost %d (complexity %d)\n\n",
6231 cost
.cost
, cost
.complexity
);
6234 /* Choose the one with the best cost. */
6235 if (compare_costs (origcost
, cost
) <= 0)
6242 iv_ca_free (&origset
);
6244 for (i
= 0; i
< n_iv_uses (data
); i
++)
6246 use
= iv_use (data
, i
);
6247 use
->selected
= iv_ca_cand_for_use (set
, use
)->cand
;
6253 /* Creates a new induction variable corresponding to CAND. */
6256 create_new_iv (struct ivopts_data
*data
, struct iv_cand
*cand
)
6258 gimple_stmt_iterator incr_pos
;
6268 incr_pos
= gsi_last_bb (ip_normal_pos (data
->current_loop
));
6272 incr_pos
= gsi_last_bb (ip_end_pos (data
->current_loop
));
6280 incr_pos
= gsi_for_stmt (cand
->incremented_at
);
6284 /* Mark that the iv is preserved. */
6285 name_info (data
, cand
->var_before
)->preserve_biv
= true;
6286 name_info (data
, cand
->var_after
)->preserve_biv
= true;
6288 /* Rewrite the increment so that it uses var_before directly. */
6289 find_interesting_uses_op (data
, cand
->var_after
)->selected
= cand
;
6293 gimple_add_tmp_var (cand
->var_before
);
6294 add_referenced_var (cand
->var_before
);
6296 base
= unshare_expr (cand
->iv
->base
);
6298 create_iv (base
, unshare_expr (cand
->iv
->step
),
6299 cand
->var_before
, data
->current_loop
,
6300 &incr_pos
, after
, &cand
->var_before
, &cand
->var_after
);
6303 /* Creates new induction variables described in SET. */
6306 create_new_ivs (struct ivopts_data
*data
, struct iv_ca
*set
)
6309 struct iv_cand
*cand
;
6312 EXECUTE_IF_SET_IN_BITMAP (set
->cands
, 0, i
, bi
)
6314 cand
= iv_cand (data
, i
);
6315 create_new_iv (data
, cand
);
6318 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6320 fprintf (dump_file
, "\nSelected IV set: \n");
6321 EXECUTE_IF_SET_IN_BITMAP (set
->cands
, 0, i
, bi
)
6323 cand
= iv_cand (data
, i
);
6324 dump_cand (dump_file
, cand
);
6326 fprintf (dump_file
, "\n");
6330 /* Rewrites USE (definition of iv used in a nonlinear expression)
6331 using candidate CAND. */
6334 rewrite_use_nonlinear_expr (struct ivopts_data
*data
,
6335 struct iv_use
*use
, struct iv_cand
*cand
)
6340 gimple_stmt_iterator bsi
;
6342 /* An important special case -- if we are asked to express value of
6343 the original iv by itself, just exit; there is no need to
6344 introduce a new computation (that might also need casting the
6345 variable to unsigned and back). */
6346 if (cand
->pos
== IP_ORIGINAL
6347 && cand
->incremented_at
== use
->stmt
)
6349 tree step
, ctype
, utype
;
6350 enum tree_code incr_code
= PLUS_EXPR
, old_code
;
6352 gcc_assert (is_gimple_assign (use
->stmt
));
6353 gcc_assert (gimple_assign_lhs (use
->stmt
) == cand
->var_after
);
6355 step
= cand
->iv
->step
;
6356 ctype
= TREE_TYPE (step
);
6357 utype
= TREE_TYPE (cand
->var_after
);
6358 if (TREE_CODE (step
) == NEGATE_EXPR
)
6360 incr_code
= MINUS_EXPR
;
6361 step
= TREE_OPERAND (step
, 0);
6364 /* Check whether we may leave the computation unchanged.
6365 This is the case only if it does not rely on other
6366 computations in the loop -- otherwise, the computation
6367 we rely upon may be removed in remove_unused_ivs,
6368 thus leading to ICE. */
6369 old_code
= gimple_assign_rhs_code (use
->stmt
);
6370 if (old_code
== PLUS_EXPR
6371 || old_code
== MINUS_EXPR
6372 || old_code
== POINTER_PLUS_EXPR
)
6374 if (gimple_assign_rhs1 (use
->stmt
) == cand
->var_before
)
6375 op
= gimple_assign_rhs2 (use
->stmt
);
6376 else if (old_code
!= MINUS_EXPR
6377 && gimple_assign_rhs2 (use
->stmt
) == cand
->var_before
)
6378 op
= gimple_assign_rhs1 (use
->stmt
);
6386 && (TREE_CODE (op
) == INTEGER_CST
6387 || operand_equal_p (op
, step
, 0)))
6390 /* Otherwise, add the necessary computations to express
6392 op
= fold_convert (ctype
, cand
->var_before
);
6393 comp
= fold_convert (utype
,
6394 build2 (incr_code
, ctype
, op
,
6395 unshare_expr (step
)));
6399 comp
= get_computation (data
->current_loop
, use
, cand
);
6400 gcc_assert (comp
!= NULL_TREE
);
6403 switch (gimple_code (use
->stmt
))
6406 tgt
= PHI_RESULT (use
->stmt
);
6408 /* If we should keep the biv, do not replace it. */
6409 if (name_info (data
, tgt
)->preserve_biv
)
6412 bsi
= gsi_after_labels (gimple_bb (use
->stmt
));
6416 tgt
= gimple_assign_lhs (use
->stmt
);
6417 bsi
= gsi_for_stmt (use
->stmt
);
6424 if (!valid_gimple_rhs_p (comp
)
6425 || (gimple_code (use
->stmt
) != GIMPLE_PHI
6426 /* We can't allow re-allocating the stmt as it might be pointed
6428 && (get_gimple_rhs_num_ops (TREE_CODE (comp
))
6429 >= gimple_num_ops (gsi_stmt (bsi
)))))
6431 comp
= force_gimple_operand_gsi (&bsi
, comp
, true, NULL_TREE
,
6432 true, GSI_SAME_STMT
);
6433 if (POINTER_TYPE_P (TREE_TYPE (tgt
)))
6435 duplicate_ssa_name_ptr_info (comp
, SSA_NAME_PTR_INFO (tgt
));
6436 /* As this isn't a plain copy we have to reset alignment
6438 if (SSA_NAME_PTR_INFO (comp
))
6439 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp
));
6443 if (gimple_code (use
->stmt
) == GIMPLE_PHI
)
6445 ass
= gimple_build_assign (tgt
, comp
);
6446 gsi_insert_before (&bsi
, ass
, GSI_SAME_STMT
);
6448 bsi
= gsi_for_stmt (use
->stmt
);
6449 remove_phi_node (&bsi
, false);
6453 gimple_assign_set_rhs_from_tree (&bsi
, comp
);
6454 use
->stmt
= gsi_stmt (bsi
);
6458 /* Performs a peephole optimization to reorder the iv update statement with
6459 a mem ref to enable instruction combining in later phases. The mem ref uses
6460 the iv value before the update, so the reordering transformation requires
6461 adjustment of the offset. CAND is the selected IV_CAND.
6465 t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset
6473 directly propagating t over to (1) will introduce overlapping live range
6474 thus increase register pressure. This peephole transform it into:
6478 t = MEM_REF (base, iv2, 8, 8);
6485 adjust_iv_update_pos (struct iv_cand
*cand
, struct iv_use
*use
)
6488 gimple iv_update
, stmt
;
6490 gimple_stmt_iterator gsi
, gsi_iv
;
6492 if (cand
->pos
!= IP_NORMAL
)
6495 var_after
= cand
->var_after
;
6496 iv_update
= SSA_NAME_DEF_STMT (var_after
);
6498 bb
= gimple_bb (iv_update
);
6499 gsi
= gsi_last_nondebug_bb (bb
);
6500 stmt
= gsi_stmt (gsi
);
6502 /* Only handle conditional statement for now. */
6503 if (gimple_code (stmt
) != GIMPLE_COND
)
6506 gsi_prev_nondebug (&gsi
);
6507 stmt
= gsi_stmt (gsi
);
6508 if (stmt
!= iv_update
)
6511 gsi_prev_nondebug (&gsi
);
6512 if (gsi_end_p (gsi
))
6515 stmt
= gsi_stmt (gsi
);
6516 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
6519 if (stmt
!= use
->stmt
)
6522 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
6525 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6527 fprintf (dump_file
, "Reordering \n");
6528 print_gimple_stmt (dump_file
, iv_update
, 0, 0);
6529 print_gimple_stmt (dump_file
, use
->stmt
, 0, 0);
6530 fprintf (dump_file
, "\n");
6533 gsi
= gsi_for_stmt (use
->stmt
);
6534 gsi_iv
= gsi_for_stmt (iv_update
);
6535 gsi_move_before (&gsi_iv
, &gsi
);
6537 cand
->pos
= IP_BEFORE_USE
;
6538 cand
->incremented_at
= use
->stmt
;
6541 /* Rewrites USE (address that is an iv) using candidate CAND. */
6544 rewrite_use_address (struct ivopts_data
*data
,
6545 struct iv_use
*use
, struct iv_cand
*cand
)
6548 gimple_stmt_iterator bsi
= gsi_for_stmt (use
->stmt
);
6549 tree base_hint
= NULL_TREE
;
6553 adjust_iv_update_pos (cand
, use
);
6554 ok
= get_computation_aff (data
->current_loop
, use
, cand
, use
->stmt
, &aff
);
6556 unshare_aff_combination (&aff
);
6558 /* To avoid undefined overflow problems, all IV candidates use unsigned
6559 integer types. The drawback is that this makes it impossible for
6560 create_mem_ref to distinguish an IV that is based on a memory object
6561 from one that represents simply an offset.
6563 To work around this problem, we pass a hint to create_mem_ref that
6564 indicates which variable (if any) in aff is an IV based on a memory
6565 object. Note that we only consider the candidate. If this is not
6566 based on an object, the base of the reference is in some subexpression
6567 of the use -- but these will use pointer types, so they are recognized
6568 by the create_mem_ref heuristics anyway. */
6569 if (cand
->iv
->base_object
)
6570 base_hint
= var_at_stmt (data
->current_loop
, cand
, use
->stmt
);
6572 iv
= var_at_stmt (data
->current_loop
, cand
, use
->stmt
);
6573 ref
= create_mem_ref (&bsi
, TREE_TYPE (*use
->op_p
), &aff
,
6574 reference_alias_ptr_type (*use
->op_p
),
6575 iv
, base_hint
, data
->speed
);
6576 copy_ref_info (ref
, *use
->op_p
);
6580 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6584 rewrite_use_compare (struct ivopts_data
*data
,
6585 struct iv_use
*use
, struct iv_cand
*cand
)
6587 tree comp
, *var_p
, op
, bound
;
6588 gimple_stmt_iterator bsi
= gsi_for_stmt (use
->stmt
);
6589 enum tree_code compare
;
6590 struct cost_pair
*cp
= get_use_iv_cost (data
, use
, cand
);
6596 tree var
= var_at_stmt (data
->current_loop
, cand
, use
->stmt
);
6597 tree var_type
= TREE_TYPE (var
);
6600 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6602 fprintf (dump_file
, "Replacing exit test: ");
6603 print_gimple_stmt (dump_file
, use
->stmt
, 0, TDF_SLIM
);
6606 bound
= unshare_expr (fold_convert (var_type
, bound
));
6607 op
= force_gimple_operand (bound
, &stmts
, true, NULL_TREE
);
6609 gsi_insert_seq_on_edge_immediate (
6610 loop_preheader_edge (data
->current_loop
),
6613 gimple_cond_set_lhs (use
->stmt
, var
);
6614 gimple_cond_set_code (use
->stmt
, compare
);
6615 gimple_cond_set_rhs (use
->stmt
, op
);
6619 /* The induction variable elimination failed; just express the original
6621 comp
= get_computation (data
->current_loop
, use
, cand
);
6622 gcc_assert (comp
!= NULL_TREE
);
6624 ok
= extract_cond_operands (data
, use
->stmt
, &var_p
, NULL
, NULL
, NULL
);
6627 *var_p
= force_gimple_operand_gsi (&bsi
, comp
, true, SSA_NAME_VAR (*var_p
),
6628 true, GSI_SAME_STMT
);
6631 /* Rewrites USE using candidate CAND. */
6634 rewrite_use (struct ivopts_data
*data
, struct iv_use
*use
, struct iv_cand
*cand
)
6638 case USE_NONLINEAR_EXPR
:
6639 rewrite_use_nonlinear_expr (data
, use
, cand
);
6643 rewrite_use_address (data
, use
, cand
);
6647 rewrite_use_compare (data
, use
, cand
);
6654 update_stmt (use
->stmt
);
6657 /* Rewrite the uses using the selected induction variables. */
6660 rewrite_uses (struct ivopts_data
*data
)
6663 struct iv_cand
*cand
;
6666 for (i
= 0; i
< n_iv_uses (data
); i
++)
6668 use
= iv_use (data
, i
);
6669 cand
= use
->selected
;
6672 rewrite_use (data
, use
, cand
);
6676 /* Removes the ivs that are not used after rewriting. */
6679 remove_unused_ivs (struct ivopts_data
*data
)
6683 bitmap toremove
= BITMAP_ALLOC (NULL
);
6685 /* Figure out an order in which to release SSA DEFs so that we don't
6686 release something that we'd have to propagate into a debug stmt
6688 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, j
, bi
)
6690 struct version_info
*info
;
6692 info
= ver_info (data
, j
);
6694 && !integer_zerop (info
->iv
->step
)
6696 && !info
->iv
->have_use_for
6697 && !info
->preserve_biv
)
6698 bitmap_set_bit (toremove
, SSA_NAME_VERSION (info
->iv
->ssa_name
));
6701 release_defs_bitset (toremove
);
6703 BITMAP_FREE (toremove
);
6706 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
6707 for pointer_map_traverse. */
6710 free_tree_niter_desc (const void *key ATTRIBUTE_UNUSED
, void **value
,
6711 void *data ATTRIBUTE_UNUSED
)
6713 struct tree_niter_desc
*const niter
= (struct tree_niter_desc
*) *value
;
6719 /* Frees data allocated by the optimization of a single loop. */
6722 free_loop_data (struct ivopts_data
*data
)
6730 pointer_map_traverse (data
->niters
, free_tree_niter_desc
, NULL
);
6731 pointer_map_destroy (data
->niters
);
6732 data
->niters
= NULL
;
6735 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
6737 struct version_info
*info
;
6739 info
= ver_info (data
, i
);
6742 info
->has_nonlin_use
= false;
6743 info
->preserve_biv
= false;
6746 bitmap_clear (data
->relevant
);
6747 bitmap_clear (data
->important_candidates
);
6749 for (i
= 0; i
< n_iv_uses (data
); i
++)
6751 struct iv_use
*use
= iv_use (data
, i
);
6754 BITMAP_FREE (use
->related_cands
);
6755 for (j
= 0; j
< use
->n_map_members
; j
++)
6756 if (use
->cost_map
[j
].depends_on
)
6757 BITMAP_FREE (use
->cost_map
[j
].depends_on
);
6758 free (use
->cost_map
);
6761 VEC_truncate (iv_use_p
, data
->iv_uses
, 0);
6763 for (i
= 0; i
< n_iv_cands (data
); i
++)
6765 struct iv_cand
*cand
= iv_cand (data
, i
);
6768 if (cand
->depends_on
)
6769 BITMAP_FREE (cand
->depends_on
);
6772 VEC_truncate (iv_cand_p
, data
->iv_candidates
, 0);
6774 if (data
->version_info_size
< num_ssa_names
)
6776 data
->version_info_size
= 2 * num_ssa_names
;
6777 free (data
->version_info
);
6778 data
->version_info
= XCNEWVEC (struct version_info
, data
->version_info_size
);
6781 data
->max_inv_id
= 0;
6783 FOR_EACH_VEC_ELT (tree
, decl_rtl_to_reset
, i
, obj
)
6784 SET_DECL_RTL (obj
, NULL_RTX
);
6786 VEC_truncate (tree
, decl_rtl_to_reset
, 0);
6788 htab_empty (data
->inv_expr_tab
);
6789 data
->inv_expr_id
= 0;
6792 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
6796 tree_ssa_iv_optimize_finalize (struct ivopts_data
*data
)
6798 free_loop_data (data
);
6799 free (data
->version_info
);
6800 BITMAP_FREE (data
->relevant
);
6801 BITMAP_FREE (data
->important_candidates
);
6803 VEC_free (tree
, heap
, decl_rtl_to_reset
);
6804 VEC_free (iv_use_p
, heap
, data
->iv_uses
);
6805 VEC_free (iv_cand_p
, heap
, data
->iv_candidates
);
6806 htab_delete (data
->inv_expr_tab
);
6811 /* Returns true if the loop body BODY includes any function calls. */
6814 loop_body_includes_call (basic_block
*body
, unsigned num_nodes
)
6816 gimple_stmt_iterator gsi
;
6819 for (i
= 0; i
< num_nodes
; i
++)
6820 for (gsi
= gsi_start_bb (body
[i
]); !gsi_end_p (gsi
); gsi_next (&gsi
))
6822 gimple stmt
= gsi_stmt (gsi
);
6823 if (is_gimple_call (stmt
)
6824 && !is_inexpensive_builtin (gimple_call_fndecl (stmt
)))
6830 /* Optimizes the LOOP. Returns true if anything changed. */
6833 tree_ssa_iv_optimize_loop (struct ivopts_data
*data
, struct loop
*loop
)
6835 bool changed
= false;
6836 struct iv_ca
*iv_ca
;
6837 edge exit
= single_dom_exit (loop
);
6840 gcc_assert (!data
->niters
);
6841 data
->current_loop
= loop
;
6842 data
->speed
= optimize_loop_for_speed_p (loop
);
6844 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6846 fprintf (dump_file
, "Processing loop %d\n", loop
->num
);
6850 fprintf (dump_file
, " single exit %d -> %d, exit condition ",
6851 exit
->src
->index
, exit
->dest
->index
);
6852 print_gimple_stmt (dump_file
, last_stmt (exit
->src
), 0, TDF_SLIM
);
6853 fprintf (dump_file
, "\n");
6856 fprintf (dump_file
, "\n");
6859 body
= get_loop_body (loop
);
6860 data
->body_includes_call
= loop_body_includes_call (body
, loop
->num_nodes
);
6861 renumber_gimple_stmt_uids_in_blocks (body
, loop
->num_nodes
);
6864 data
->loop_single_exit_p
= exit
!= NULL
&& loop_only_exit_p (loop
, exit
);
6866 /* For each ssa name determines whether it behaves as an induction variable
6868 if (!find_induction_variables (data
))
6871 /* Finds interesting uses (item 1). */
6872 find_interesting_uses (data
);
6873 if (n_iv_uses (data
) > MAX_CONSIDERED_USES
)
6876 /* Finds candidates for the induction variables (item 2). */
6877 find_iv_candidates (data
);
6879 /* Calculates the costs (item 3, part 1). */
6880 determine_iv_costs (data
);
6881 determine_use_iv_costs (data
);
6882 determine_set_costs (data
);
6884 /* Find the optimal set of induction variables (item 3, part 2). */
6885 iv_ca
= find_optimal_iv_set (data
);
6890 /* Create the new induction variables (item 4, part 1). */
6891 create_new_ivs (data
, iv_ca
);
6892 iv_ca_free (&iv_ca
);
6894 /* Rewrite the uses (item 4, part 2). */
6895 rewrite_uses (data
);
6897 /* Remove the ivs that are unused after rewriting. */
6898 remove_unused_ivs (data
);
6900 /* We have changed the structure of induction variables; it might happen
6901 that definitions in the scev database refer to some of them that were
6906 free_loop_data (data
);
6911 /* Main entry point. Optimizes induction variables in loops. */
6914 tree_ssa_iv_optimize (void)
6917 struct ivopts_data data
;
6920 tree_ssa_iv_optimize_init (&data
);
6922 /* Optimize the loops starting with the innermost ones. */
6923 FOR_EACH_LOOP (li
, loop
, LI_FROM_INNERMOST
)
6925 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6926 flow_loop_dump (loop
, dump_file
, NULL
, 1);
6928 tree_ssa_iv_optimize_loop (&data
, loop
);
6931 tree_ssa_iv_optimize_finalize (&data
);