tree-ssa-loop-ivopts.c (get_phi_with_result): Remove.
[gcc.git] / gcc / tree-ssa-loop-ivopts.c
1 /* Induction variable optimizations.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009
3 Free Software Foundation, Inc.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 3, or (at your option) any
10 later version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 /* This pass tries to find the optimal set of induction variables for the loop.
22 It optimizes just the basic linear induction variables (although adding
23 support for other types should not be too hard). It includes the
24 optimizations commonly known as strength reduction, induction variable
25 coalescing and induction variable elimination. It does it in the
26 following steps:
27
28 1) The interesting uses of induction variables are found. This includes
29
30 -- uses of induction variables in non-linear expressions
31 -- addresses of arrays
32 -- comparisons of induction variables
33
34 2) Candidates for the induction variables are found. This includes
35
36 -- old induction variables
37 -- the variables defined by expressions derived from the "interesting
38 uses" above
39
40 3) The optimal (w.r. to a cost function) set of variables is chosen. The
41 cost function assigns a cost to sets of induction variables and consists
42 of three parts:
43
44 -- The use costs. Each of the interesting uses chooses the best induction
45 variable in the set and adds its cost to the sum. The cost reflects
46 the time spent on modifying the induction variables value to be usable
47 for the given purpose (adding base and offset for arrays, etc.).
48 -- The variable costs. Each of the variables has a cost assigned that
49 reflects the costs associated with incrementing the value of the
50 variable. The original variables are somewhat preferred.
51 -- The set cost. Depending on the size of the set, extra cost may be
52 added to reflect register pressure.
53
54 All the costs are defined in a machine-specific way, using the target
55 hooks and machine descriptions to determine them.
56
57 4) The trees are transformed to use the new variables, the dead code is
58 removed.
59
60 All of this is done loop by loop. Doing it globally is theoretically
61 possible, it might give a better performance and it might enable us
62 to decide costs more precisely, but getting all the interactions right
63 would be complicated. */
64
65 #include "config.h"
66 #include "system.h"
67 #include "coretypes.h"
68 #include "tm.h"
69 #include "tree.h"
70 #include "rtl.h"
71 #include "tm_p.h"
72 #include "hard-reg-set.h"
73 #include "basic-block.h"
74 #include "output.h"
75 #include "diagnostic.h"
76 #include "tree-flow.h"
77 #include "tree-dump.h"
78 #include "timevar.h"
79 #include "cfgloop.h"
80 #include "varray.h"
81 #include "expr.h"
82 #include "tree-pass.h"
83 #include "ggc.h"
84 #include "insn-config.h"
85 #include "recog.h"
86 #include "pointer-set.h"
87 #include "hashtab.h"
88 #include "tree-chrec.h"
89 #include "tree-scalar-evolution.h"
90 #include "cfgloop.h"
91 #include "params.h"
92 #include "langhooks.h"
93 #include "tree-affine.h"
94 #include "target.h"
95
96 /* The infinite cost. */
97 #define INFTY 10000000
98
99 /* The expected number of loop iterations. TODO -- use profiling instead of
100 this. */
101 #define AVG_LOOP_NITER(LOOP) 5
102
103
104 /* Representation of the induction variable. */
105 struct iv
106 {
107 tree base; /* Initial value of the iv. */
108 tree base_object; /* A memory object to that the induction variable points. */
109 tree step; /* Step of the iv (constant only). */
110 tree ssa_name; /* The ssa name with the value. */
111 bool biv_p; /* Is it a biv? */
112 bool have_use_for; /* Do we already have a use for it? */
113 unsigned use_id; /* The identifier in the use if it is the case. */
114 };
115
116 /* Per-ssa version information (induction variable descriptions, etc.). */
117 struct version_info
118 {
119 tree name; /* The ssa name. */
120 struct iv *iv; /* Induction variable description. */
121 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
122 an expression that is not an induction variable. */
123 unsigned inv_id; /* Id of an invariant. */
124 bool preserve_biv; /* For the original biv, whether to preserve it. */
125 };
126
127 /* Types of uses. */
128 enum use_type
129 {
130 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
131 USE_ADDRESS, /* Use in an address. */
132 USE_COMPARE /* Use is a compare. */
133 };
134
135 /* Cost of a computation. */
136 typedef struct
137 {
138 int cost; /* The runtime cost. */
139 unsigned complexity; /* The estimate of the complexity of the code for
140 the computation (in no concrete units --
141 complexity field should be larger for more
142 complex expressions and addressing modes). */
143 } comp_cost;
144
145 static const comp_cost zero_cost = {0, 0};
146 static const comp_cost infinite_cost = {INFTY, INFTY};
147
148 /* The candidate - cost pair. */
149 struct cost_pair
150 {
151 struct iv_cand *cand; /* The candidate. */
152 comp_cost cost; /* The cost. */
153 bitmap depends_on; /* The list of invariants that have to be
154 preserved. */
155 tree value; /* For final value elimination, the expression for
156 the final value of the iv. For iv elimination,
157 the new bound to compare with. */
158 };
159
160 /* Use. */
161 struct iv_use
162 {
163 unsigned id; /* The id of the use. */
164 enum use_type type; /* Type of the use. */
165 struct iv *iv; /* The induction variable it is based on. */
166 gimple stmt; /* Statement in that it occurs. */
167 tree *op_p; /* The place where it occurs. */
168 bitmap related_cands; /* The set of "related" iv candidates, plus the common
169 important ones. */
170
171 unsigned n_map_members; /* Number of candidates in the cost_map list. */
172 struct cost_pair *cost_map;
173 /* The costs wrto the iv candidates. */
174
175 struct iv_cand *selected;
176 /* The selected candidate. */
177 };
178
179 /* The position where the iv is computed. */
180 enum iv_position
181 {
182 IP_NORMAL, /* At the end, just before the exit condition. */
183 IP_END, /* At the end of the latch block. */
184 IP_BEFORE_USE, /* Immediately before a specific use. */
185 IP_AFTER_USE, /* Immediately after a specific use. */
186 IP_ORIGINAL /* The original biv. */
187 };
188
189 /* The induction variable candidate. */
190 struct iv_cand
191 {
192 unsigned id; /* The number of the candidate. */
193 bool important; /* Whether this is an "important" candidate, i.e. such
194 that it should be considered by all uses. */
195 enum iv_position pos; /* Where it is computed. */
196 gimple incremented_at;/* For original biv, the statement where it is
197 incremented. */
198 tree var_before; /* The variable used for it before increment. */
199 tree var_after; /* The variable used for it after increment. */
200 struct iv *iv; /* The value of the candidate. NULL for
201 "pseudocandidate" used to indicate the possibility
202 to replace the final value of an iv by direct
203 computation of the value. */
204 unsigned cost; /* Cost of the candidate. */
205 unsigned cost_step; /* Cost of the candidate's increment operation. */
206 struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
207 where it is incremented. */
208 bitmap depends_on; /* The list of invariants that are used in step of the
209 biv. */
210 };
211
212 /* The data used by the induction variable optimizations. */
213
214 typedef struct iv_use *iv_use_p;
215 DEF_VEC_P(iv_use_p);
216 DEF_VEC_ALLOC_P(iv_use_p,heap);
217
218 typedef struct iv_cand *iv_cand_p;
219 DEF_VEC_P(iv_cand_p);
220 DEF_VEC_ALLOC_P(iv_cand_p,heap);
221
222 struct ivopts_data
223 {
224 /* The currently optimized loop. */
225 struct loop *current_loop;
226
227 /* Numbers of iterations for all exits of the current loop. */
228 struct pointer_map_t *niters;
229
230 /* Number of registers used in it. */
231 unsigned regs_used;
232
233 /* The size of version_info array allocated. */
234 unsigned version_info_size;
235
236 /* The array of information for the ssa names. */
237 struct version_info *version_info;
238
239 /* The bitmap of indices in version_info whose value was changed. */
240 bitmap relevant;
241
242 /* The uses of induction variables. */
243 VEC(iv_use_p,heap) *iv_uses;
244
245 /* The candidates. */
246 VEC(iv_cand_p,heap) *iv_candidates;
247
248 /* A bitmap of important candidates. */
249 bitmap important_candidates;
250
251 /* The maximum invariant id. */
252 unsigned max_inv_id;
253
254 /* Whether to consider just related and important candidates when replacing a
255 use. */
256 bool consider_all_candidates;
257
258 /* Are we optimizing for speed? */
259 bool speed;
260 };
261
262 /* An assignment of iv candidates to uses. */
263
264 struct iv_ca
265 {
266 /* The number of uses covered by the assignment. */
267 unsigned upto;
268
269 /* Number of uses that cannot be expressed by the candidates in the set. */
270 unsigned bad_uses;
271
272 /* Candidate assigned to a use, together with the related costs. */
273 struct cost_pair **cand_for_use;
274
275 /* Number of times each candidate is used. */
276 unsigned *n_cand_uses;
277
278 /* The candidates used. */
279 bitmap cands;
280
281 /* The number of candidates in the set. */
282 unsigned n_cands;
283
284 /* Total number of registers needed. */
285 unsigned n_regs;
286
287 /* Total cost of expressing uses. */
288 comp_cost cand_use_cost;
289
290 /* Total cost of candidates. */
291 unsigned cand_cost;
292
293 /* Number of times each invariant is used. */
294 unsigned *n_invariant_uses;
295
296 /* Total cost of the assignment. */
297 comp_cost cost;
298 };
299
300 /* Difference of two iv candidate assignments. */
301
302 struct iv_ca_delta
303 {
304 /* Changed use. */
305 struct iv_use *use;
306
307 /* An old assignment (for rollback purposes). */
308 struct cost_pair *old_cp;
309
310 /* A new assignment. */
311 struct cost_pair *new_cp;
312
313 /* Next change in the list. */
314 struct iv_ca_delta *next_change;
315 };
316
317 /* Bound on number of candidates below that all candidates are considered. */
318
319 #define CONSIDER_ALL_CANDIDATES_BOUND \
320 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
321
322 /* If there are more iv occurrences, we just give up (it is quite unlikely that
323 optimizing such a loop would help, and it would take ages). */
324
325 #define MAX_CONSIDERED_USES \
326 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
327
328 /* If there are at most this number of ivs in the set, try removing unnecessary
329 ivs from the set always. */
330
331 #define ALWAYS_PRUNE_CAND_SET_BOUND \
332 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
333
334 /* The list of trees for that the decl_rtl field must be reset is stored
335 here. */
336
337 static VEC(tree,heap) *decl_rtl_to_reset;
338
339 /* Number of uses recorded in DATA. */
340
341 static inline unsigned
342 n_iv_uses (struct ivopts_data *data)
343 {
344 return VEC_length (iv_use_p, data->iv_uses);
345 }
346
347 /* Ith use recorded in DATA. */
348
349 static inline struct iv_use *
350 iv_use (struct ivopts_data *data, unsigned i)
351 {
352 return VEC_index (iv_use_p, data->iv_uses, i);
353 }
354
355 /* Number of candidates recorded in DATA. */
356
357 static inline unsigned
358 n_iv_cands (struct ivopts_data *data)
359 {
360 return VEC_length (iv_cand_p, data->iv_candidates);
361 }
362
363 /* Ith candidate recorded in DATA. */
364
365 static inline struct iv_cand *
366 iv_cand (struct ivopts_data *data, unsigned i)
367 {
368 return VEC_index (iv_cand_p, data->iv_candidates, i);
369 }
370
371 /* The single loop exit if it dominates the latch, NULL otherwise. */
372
373 edge
374 single_dom_exit (struct loop *loop)
375 {
376 edge exit = single_exit (loop);
377
378 if (!exit)
379 return NULL;
380
381 if (!just_once_each_iteration_p (loop, exit->src))
382 return NULL;
383
384 return exit;
385 }
386
387 /* Dumps information about the induction variable IV to FILE. */
388
389 extern void dump_iv (FILE *, struct iv *);
390 void
391 dump_iv (FILE *file, struct iv *iv)
392 {
393 if (iv->ssa_name)
394 {
395 fprintf (file, "ssa name ");
396 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
397 fprintf (file, "\n");
398 }
399
400 fprintf (file, " type ");
401 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
402 fprintf (file, "\n");
403
404 if (iv->step)
405 {
406 fprintf (file, " base ");
407 print_generic_expr (file, iv->base, TDF_SLIM);
408 fprintf (file, "\n");
409
410 fprintf (file, " step ");
411 print_generic_expr (file, iv->step, TDF_SLIM);
412 fprintf (file, "\n");
413 }
414 else
415 {
416 fprintf (file, " invariant ");
417 print_generic_expr (file, iv->base, TDF_SLIM);
418 fprintf (file, "\n");
419 }
420
421 if (iv->base_object)
422 {
423 fprintf (file, " base object ");
424 print_generic_expr (file, iv->base_object, TDF_SLIM);
425 fprintf (file, "\n");
426 }
427
428 if (iv->biv_p)
429 fprintf (file, " is a biv\n");
430 }
431
432 /* Dumps information about the USE to FILE. */
433
434 extern void dump_use (FILE *, struct iv_use *);
435 void
436 dump_use (FILE *file, struct iv_use *use)
437 {
438 fprintf (file, "use %d\n", use->id);
439
440 switch (use->type)
441 {
442 case USE_NONLINEAR_EXPR:
443 fprintf (file, " generic\n");
444 break;
445
446 case USE_ADDRESS:
447 fprintf (file, " address\n");
448 break;
449
450 case USE_COMPARE:
451 fprintf (file, " compare\n");
452 break;
453
454 default:
455 gcc_unreachable ();
456 }
457
458 fprintf (file, " in statement ");
459 print_gimple_stmt (file, use->stmt, 0, 0);
460 fprintf (file, "\n");
461
462 fprintf (file, " at position ");
463 if (use->op_p)
464 print_generic_expr (file, *use->op_p, TDF_SLIM);
465 fprintf (file, "\n");
466
467 dump_iv (file, use->iv);
468
469 if (use->related_cands)
470 {
471 fprintf (file, " related candidates ");
472 dump_bitmap (file, use->related_cands);
473 }
474 }
475
476 /* Dumps information about the uses to FILE. */
477
478 extern void dump_uses (FILE *, struct ivopts_data *);
479 void
480 dump_uses (FILE *file, struct ivopts_data *data)
481 {
482 unsigned i;
483 struct iv_use *use;
484
485 for (i = 0; i < n_iv_uses (data); i++)
486 {
487 use = iv_use (data, i);
488
489 dump_use (file, use);
490 fprintf (file, "\n");
491 }
492 }
493
494 /* Dumps information about induction variable candidate CAND to FILE. */
495
496 extern void dump_cand (FILE *, struct iv_cand *);
497 void
498 dump_cand (FILE *file, struct iv_cand *cand)
499 {
500 struct iv *iv = cand->iv;
501
502 fprintf (file, "candidate %d%s\n",
503 cand->id, cand->important ? " (important)" : "");
504
505 if (cand->depends_on)
506 {
507 fprintf (file, " depends on ");
508 dump_bitmap (file, cand->depends_on);
509 }
510
511 if (!iv)
512 {
513 fprintf (file, " final value replacement\n");
514 return;
515 }
516
517 switch (cand->pos)
518 {
519 case IP_NORMAL:
520 fprintf (file, " incremented before exit test\n");
521 break;
522
523 case IP_BEFORE_USE:
524 fprintf (file, " incremented before use %d\n", cand->ainc_use->id);
525 break;
526
527 case IP_AFTER_USE:
528 fprintf (file, " incremented after use %d\n", cand->ainc_use->id);
529 break;
530
531 case IP_END:
532 fprintf (file, " incremented at end\n");
533 break;
534
535 case IP_ORIGINAL:
536 fprintf (file, " original biv\n");
537 break;
538 }
539
540 dump_iv (file, iv);
541 }
542
543 /* Returns the info for ssa version VER. */
544
545 static inline struct version_info *
546 ver_info (struct ivopts_data *data, unsigned ver)
547 {
548 return data->version_info + ver;
549 }
550
551 /* Returns the info for ssa name NAME. */
552
553 static inline struct version_info *
554 name_info (struct ivopts_data *data, tree name)
555 {
556 return ver_info (data, SSA_NAME_VERSION (name));
557 }
558
559 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
560 emitted in LOOP. */
561
562 static bool
563 stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
564 {
565 basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
566
567 gcc_assert (bb);
568
569 if (sbb == loop->latch)
570 return true;
571
572 if (sbb != bb)
573 return false;
574
575 return stmt == last_stmt (bb);
576 }
577
578 /* Returns true if STMT if after the place where the original induction
579 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
580 if the positions are identical. */
581
582 static bool
583 stmt_after_inc_pos (struct iv_cand *cand, gimple stmt, bool true_if_equal)
584 {
585 basic_block cand_bb = gimple_bb (cand->incremented_at);
586 basic_block stmt_bb = gimple_bb (stmt);
587
588 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
589 return false;
590
591 if (stmt_bb != cand_bb)
592 return true;
593
594 if (true_if_equal
595 && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
596 return true;
597 return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
598 }
599
600 /* Returns true if STMT if after the place where the induction variable
601 CAND is incremented in LOOP. */
602
603 static bool
604 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
605 {
606 switch (cand->pos)
607 {
608 case IP_END:
609 return false;
610
611 case IP_NORMAL:
612 return stmt_after_ip_normal_pos (loop, stmt);
613
614 case IP_ORIGINAL:
615 case IP_AFTER_USE:
616 return stmt_after_inc_pos (cand, stmt, false);
617
618 case IP_BEFORE_USE:
619 return stmt_after_inc_pos (cand, stmt, true);
620
621 default:
622 gcc_unreachable ();
623 }
624 }
625
626 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
627
628 static bool
629 abnormal_ssa_name_p (tree exp)
630 {
631 if (!exp)
632 return false;
633
634 if (TREE_CODE (exp) != SSA_NAME)
635 return false;
636
637 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
638 }
639
640 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
641 abnormal phi node. Callback for for_each_index. */
642
643 static bool
644 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
645 void *data ATTRIBUTE_UNUSED)
646 {
647 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
648 {
649 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
650 return false;
651 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
652 return false;
653 }
654
655 return !abnormal_ssa_name_p (*index);
656 }
657
658 /* Returns true if EXPR contains a ssa name that occurs in an
659 abnormal phi node. */
660
661 bool
662 contains_abnormal_ssa_name_p (tree expr)
663 {
664 enum tree_code code;
665 enum tree_code_class codeclass;
666
667 if (!expr)
668 return false;
669
670 code = TREE_CODE (expr);
671 codeclass = TREE_CODE_CLASS (code);
672
673 if (code == SSA_NAME)
674 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
675
676 if (code == INTEGER_CST
677 || is_gimple_min_invariant (expr))
678 return false;
679
680 if (code == ADDR_EXPR)
681 return !for_each_index (&TREE_OPERAND (expr, 0),
682 idx_contains_abnormal_ssa_name_p,
683 NULL);
684
685 switch (codeclass)
686 {
687 case tcc_binary:
688 case tcc_comparison:
689 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
690 return true;
691
692 /* Fallthru. */
693 case tcc_unary:
694 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
695 return true;
696
697 break;
698
699 default:
700 gcc_unreachable ();
701 }
702
703 return false;
704 }
705
706 /* Returns tree describing number of iterations determined from
707 EXIT of DATA->current_loop, or NULL if something goes wrong. */
708
709 static tree
710 niter_for_exit (struct ivopts_data *data, edge exit)
711 {
712 struct tree_niter_desc desc;
713 tree niter;
714 void **slot;
715
716 if (!data->niters)
717 {
718 data->niters = pointer_map_create ();
719 slot = NULL;
720 }
721 else
722 slot = pointer_map_contains (data->niters, exit);
723
724 if (!slot)
725 {
726 /* Try to determine number of iterations. We must know it
727 unconditionally (i.e., without possibility of # of iterations
728 being zero). Also, we cannot safely work with ssa names that
729 appear in phi nodes on abnormal edges, so that we do not create
730 overlapping life ranges for them (PR 27283). */
731 if (number_of_iterations_exit (data->current_loop,
732 exit, &desc, true)
733 && integer_zerop (desc.may_be_zero)
734 && !contains_abnormal_ssa_name_p (desc.niter))
735 niter = desc.niter;
736 else
737 niter = NULL_TREE;
738
739 *pointer_map_insert (data->niters, exit) = niter;
740 }
741 else
742 niter = (tree) *slot;
743
744 return niter;
745 }
746
747 /* Returns tree describing number of iterations determined from
748 single dominating exit of DATA->current_loop, or NULL if something
749 goes wrong. */
750
751 static tree
752 niter_for_single_dom_exit (struct ivopts_data *data)
753 {
754 edge exit = single_dom_exit (data->current_loop);
755
756 if (!exit)
757 return NULL;
758
759 return niter_for_exit (data, exit);
760 }
761
762 /* Initializes data structures used by the iv optimization pass, stored
763 in DATA. */
764
765 static void
766 tree_ssa_iv_optimize_init (struct ivopts_data *data)
767 {
768 data->version_info_size = 2 * num_ssa_names;
769 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
770 data->relevant = BITMAP_ALLOC (NULL);
771 data->important_candidates = BITMAP_ALLOC (NULL);
772 data->max_inv_id = 0;
773 data->niters = NULL;
774 data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
775 data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
776 decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
777 }
778
779 /* Returns a memory object to that EXPR points. In case we are able to
780 determine that it does not point to any such object, NULL is returned. */
781
782 static tree
783 determine_base_object (tree expr)
784 {
785 enum tree_code code = TREE_CODE (expr);
786 tree base, obj;
787
788 /* If this is a pointer casted to any type, we need to determine
789 the base object for the pointer; so handle conversions before
790 throwing away non-pointer expressions. */
791 if (CONVERT_EXPR_P (expr))
792 return determine_base_object (TREE_OPERAND (expr, 0));
793
794 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
795 return NULL_TREE;
796
797 switch (code)
798 {
799 case INTEGER_CST:
800 return NULL_TREE;
801
802 case ADDR_EXPR:
803 obj = TREE_OPERAND (expr, 0);
804 base = get_base_address (obj);
805
806 if (!base)
807 return expr;
808
809 if (TREE_CODE (base) == INDIRECT_REF)
810 return determine_base_object (TREE_OPERAND (base, 0));
811
812 return fold_convert (ptr_type_node,
813 build_fold_addr_expr (base));
814
815 case POINTER_PLUS_EXPR:
816 return determine_base_object (TREE_OPERAND (expr, 0));
817
818 case PLUS_EXPR:
819 case MINUS_EXPR:
820 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
821 gcc_unreachable ();
822
823 default:
824 return fold_convert (ptr_type_node, expr);
825 }
826 }
827
828 /* Allocates an induction variable with given initial value BASE and step STEP
829 for loop LOOP. */
830
831 static struct iv *
832 alloc_iv (tree base, tree step)
833 {
834 struct iv *iv = XCNEW (struct iv);
835 gcc_assert (step != NULL_TREE);
836
837 iv->base = base;
838 iv->base_object = determine_base_object (base);
839 iv->step = step;
840 iv->biv_p = false;
841 iv->have_use_for = false;
842 iv->use_id = 0;
843 iv->ssa_name = NULL_TREE;
844
845 return iv;
846 }
847
848 /* Sets STEP and BASE for induction variable IV. */
849
850 static void
851 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
852 {
853 struct version_info *info = name_info (data, iv);
854
855 gcc_assert (!info->iv);
856
857 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
858 info->iv = alloc_iv (base, step);
859 info->iv->ssa_name = iv;
860 }
861
862 /* Finds induction variable declaration for VAR. */
863
864 static struct iv *
865 get_iv (struct ivopts_data *data, tree var)
866 {
867 basic_block bb;
868 tree type = TREE_TYPE (var);
869
870 if (!POINTER_TYPE_P (type)
871 && !INTEGRAL_TYPE_P (type))
872 return NULL;
873
874 if (!name_info (data, var)->iv)
875 {
876 bb = gimple_bb (SSA_NAME_DEF_STMT (var));
877
878 if (!bb
879 || !flow_bb_inside_loop_p (data->current_loop, bb))
880 set_iv (data, var, var, build_int_cst (type, 0));
881 }
882
883 return name_info (data, var)->iv;
884 }
885
886 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
887 not define a simple affine biv with nonzero step. */
888
889 static tree
890 determine_biv_step (gimple phi)
891 {
892 struct loop *loop = gimple_bb (phi)->loop_father;
893 tree name = PHI_RESULT (phi);
894 affine_iv iv;
895
896 if (!is_gimple_reg (name))
897 return NULL_TREE;
898
899 if (!simple_iv (loop, loop, name, &iv, true))
900 return NULL_TREE;
901
902 return integer_zerop (iv.step) ? NULL_TREE : iv.step;
903 }
904
905 /* Finds basic ivs. */
906
907 static bool
908 find_bivs (struct ivopts_data *data)
909 {
910 gimple phi;
911 tree step, type, base;
912 bool found = false;
913 struct loop *loop = data->current_loop;
914 gimple_stmt_iterator psi;
915
916 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
917 {
918 phi = gsi_stmt (psi);
919
920 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
921 continue;
922
923 step = determine_biv_step (phi);
924 if (!step)
925 continue;
926
927 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
928 base = expand_simple_operations (base);
929 if (contains_abnormal_ssa_name_p (base)
930 || contains_abnormal_ssa_name_p (step))
931 continue;
932
933 type = TREE_TYPE (PHI_RESULT (phi));
934 base = fold_convert (type, base);
935 if (step)
936 {
937 if (POINTER_TYPE_P (type))
938 step = fold_convert (sizetype, step);
939 else
940 step = fold_convert (type, step);
941 }
942
943 set_iv (data, PHI_RESULT (phi), base, step);
944 found = true;
945 }
946
947 return found;
948 }
949
950 /* Marks basic ivs. */
951
952 static void
953 mark_bivs (struct ivopts_data *data)
954 {
955 gimple phi;
956 tree var;
957 struct iv *iv, *incr_iv;
958 struct loop *loop = data->current_loop;
959 basic_block incr_bb;
960 gimple_stmt_iterator psi;
961
962 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
963 {
964 phi = gsi_stmt (psi);
965
966 iv = get_iv (data, PHI_RESULT (phi));
967 if (!iv)
968 continue;
969
970 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
971 incr_iv = get_iv (data, var);
972 if (!incr_iv)
973 continue;
974
975 /* If the increment is in the subloop, ignore it. */
976 incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
977 if (incr_bb->loop_father != data->current_loop
978 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
979 continue;
980
981 iv->biv_p = true;
982 incr_iv->biv_p = true;
983 }
984 }
985
986 /* Checks whether STMT defines a linear induction variable and stores its
987 parameters to IV. */
988
989 static bool
990 find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
991 {
992 tree lhs;
993 struct loop *loop = data->current_loop;
994
995 iv->base = NULL_TREE;
996 iv->step = NULL_TREE;
997
998 if (gimple_code (stmt) != GIMPLE_ASSIGN)
999 return false;
1000
1001 lhs = gimple_assign_lhs (stmt);
1002 if (TREE_CODE (lhs) != SSA_NAME)
1003 return false;
1004
1005 if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1006 return false;
1007 iv->base = expand_simple_operations (iv->base);
1008
1009 if (contains_abnormal_ssa_name_p (iv->base)
1010 || contains_abnormal_ssa_name_p (iv->step))
1011 return false;
1012
1013 return true;
1014 }
1015
1016 /* Finds general ivs in statement STMT. */
1017
1018 static void
1019 find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1020 {
1021 affine_iv iv;
1022
1023 if (!find_givs_in_stmt_scev (data, stmt, &iv))
1024 return;
1025
1026 set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step);
1027 }
1028
1029 /* Finds general ivs in basic block BB. */
1030
1031 static void
1032 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1033 {
1034 gimple_stmt_iterator bsi;
1035
1036 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1037 find_givs_in_stmt (data, gsi_stmt (bsi));
1038 }
1039
1040 /* Finds general ivs. */
1041
1042 static void
1043 find_givs (struct ivopts_data *data)
1044 {
1045 struct loop *loop = data->current_loop;
1046 basic_block *body = get_loop_body_in_dom_order (loop);
1047 unsigned i;
1048
1049 for (i = 0; i < loop->num_nodes; i++)
1050 find_givs_in_bb (data, body[i]);
1051 free (body);
1052 }
1053
1054 /* For each ssa name defined in LOOP determines whether it is an induction
1055 variable and if so, its initial value and step. */
1056
1057 static bool
1058 find_induction_variables (struct ivopts_data *data)
1059 {
1060 unsigned i;
1061 bitmap_iterator bi;
1062
1063 if (!find_bivs (data))
1064 return false;
1065
1066 find_givs (data);
1067 mark_bivs (data);
1068
1069 if (dump_file && (dump_flags & TDF_DETAILS))
1070 {
1071 tree niter = niter_for_single_dom_exit (data);
1072
1073 if (niter)
1074 {
1075 fprintf (dump_file, " number of iterations ");
1076 print_generic_expr (dump_file, niter, TDF_SLIM);
1077 fprintf (dump_file, "\n\n");
1078 };
1079
1080 fprintf (dump_file, "Induction variables:\n\n");
1081
1082 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1083 {
1084 if (ver_info (data, i)->iv)
1085 dump_iv (dump_file, ver_info (data, i)->iv);
1086 }
1087 }
1088
1089 return true;
1090 }
1091
1092 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1093
1094 static struct iv_use *
1095 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1096 gimple stmt, enum use_type use_type)
1097 {
1098 struct iv_use *use = XCNEW (struct iv_use);
1099
1100 use->id = n_iv_uses (data);
1101 use->type = use_type;
1102 use->iv = iv;
1103 use->stmt = stmt;
1104 use->op_p = use_p;
1105 use->related_cands = BITMAP_ALLOC (NULL);
1106
1107 /* To avoid showing ssa name in the dumps, if it was not reset by the
1108 caller. */
1109 iv->ssa_name = NULL_TREE;
1110
1111 if (dump_file && (dump_flags & TDF_DETAILS))
1112 dump_use (dump_file, use);
1113
1114 VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1115
1116 return use;
1117 }
1118
1119 /* Checks whether OP is a loop-level invariant and if so, records it.
1120 NONLINEAR_USE is true if the invariant is used in a way we do not
1121 handle specially. */
1122
1123 static void
1124 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1125 {
1126 basic_block bb;
1127 struct version_info *info;
1128
1129 if (TREE_CODE (op) != SSA_NAME
1130 || !is_gimple_reg (op))
1131 return;
1132
1133 bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1134 if (bb
1135 && flow_bb_inside_loop_p (data->current_loop, bb))
1136 return;
1137
1138 info = name_info (data, op);
1139 info->name = op;
1140 info->has_nonlin_use |= nonlinear_use;
1141 if (!info->inv_id)
1142 info->inv_id = ++data->max_inv_id;
1143 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1144 }
1145
1146 /* Checks whether the use OP is interesting and if so, records it. */
1147
1148 static struct iv_use *
1149 find_interesting_uses_op (struct ivopts_data *data, tree op)
1150 {
1151 struct iv *iv;
1152 struct iv *civ;
1153 gimple stmt;
1154 struct iv_use *use;
1155
1156 if (TREE_CODE (op) != SSA_NAME)
1157 return NULL;
1158
1159 iv = get_iv (data, op);
1160 if (!iv)
1161 return NULL;
1162
1163 if (iv->have_use_for)
1164 {
1165 use = iv_use (data, iv->use_id);
1166
1167 gcc_assert (use->type == USE_NONLINEAR_EXPR);
1168 return use;
1169 }
1170
1171 if (integer_zerop (iv->step))
1172 {
1173 record_invariant (data, op, true);
1174 return NULL;
1175 }
1176 iv->have_use_for = true;
1177
1178 civ = XNEW (struct iv);
1179 *civ = *iv;
1180
1181 stmt = SSA_NAME_DEF_STMT (op);
1182 gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1183 || is_gimple_assign (stmt));
1184
1185 use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1186 iv->use_id = use->id;
1187
1188 return use;
1189 }
1190
1191 /* Given a condition in statement STMT, checks whether it is a compare
1192 of an induction variable and an invariant. If this is the case,
1193 CONTROL_VAR is set to location of the iv, BOUND to the location of
1194 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1195 induction variable descriptions, and true is returned. If this is not
1196 the case, CONTROL_VAR and BOUND are set to the arguments of the
1197 condition and false is returned. */
1198
1199 static bool
1200 extract_cond_operands (struct ivopts_data *data, gimple stmt,
1201 tree **control_var, tree **bound,
1202 struct iv **iv_var, struct iv **iv_bound)
1203 {
1204 /* The objects returned when COND has constant operands. */
1205 static struct iv const_iv;
1206 static tree zero;
1207 tree *op0 = &zero, *op1 = &zero, *tmp_op;
1208 struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1209 bool ret = false;
1210
1211 if (gimple_code (stmt) == GIMPLE_COND)
1212 {
1213 op0 = gimple_cond_lhs_ptr (stmt);
1214 op1 = gimple_cond_rhs_ptr (stmt);
1215 }
1216 else
1217 {
1218 op0 = gimple_assign_rhs1_ptr (stmt);
1219 op1 = gimple_assign_rhs2_ptr (stmt);
1220 }
1221
1222 zero = integer_zero_node;
1223 const_iv.step = integer_zero_node;
1224
1225 if (TREE_CODE (*op0) == SSA_NAME)
1226 iv0 = get_iv (data, *op0);
1227 if (TREE_CODE (*op1) == SSA_NAME)
1228 iv1 = get_iv (data, *op1);
1229
1230 /* Exactly one of the compared values must be an iv, and the other one must
1231 be an invariant. */
1232 if (!iv0 || !iv1)
1233 goto end;
1234
1235 if (integer_zerop (iv0->step))
1236 {
1237 /* Control variable may be on the other side. */
1238 tmp_op = op0; op0 = op1; op1 = tmp_op;
1239 tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1240 }
1241 ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1242
1243 end:
1244 if (control_var)
1245 *control_var = op0;;
1246 if (iv_var)
1247 *iv_var = iv0;;
1248 if (bound)
1249 *bound = op1;
1250 if (iv_bound)
1251 *iv_bound = iv1;
1252
1253 return ret;
1254 }
1255
1256 /* Checks whether the condition in STMT is interesting and if so,
1257 records it. */
1258
1259 static void
1260 find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1261 {
1262 tree *var_p, *bound_p;
1263 struct iv *var_iv, *civ;
1264
1265 if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1266 {
1267 find_interesting_uses_op (data, *var_p);
1268 find_interesting_uses_op (data, *bound_p);
1269 return;
1270 }
1271
1272 civ = XNEW (struct iv);
1273 *civ = *var_iv;
1274 record_use (data, NULL, civ, stmt, USE_COMPARE);
1275 }
1276
1277 /* Returns true if expression EXPR is obviously invariant in LOOP,
1278 i.e. if all its operands are defined outside of the LOOP. LOOP
1279 should not be the function body. */
1280
1281 bool
1282 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1283 {
1284 basic_block def_bb;
1285 unsigned i, len;
1286
1287 gcc_assert (loop_depth (loop) > 0);
1288
1289 if (is_gimple_min_invariant (expr))
1290 return true;
1291
1292 if (TREE_CODE (expr) == SSA_NAME)
1293 {
1294 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1295 if (def_bb
1296 && flow_bb_inside_loop_p (loop, def_bb))
1297 return false;
1298
1299 return true;
1300 }
1301
1302 if (!EXPR_P (expr))
1303 return false;
1304
1305 len = TREE_OPERAND_LENGTH (expr);
1306 for (i = 0; i < len; i++)
1307 if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1308 return false;
1309
1310 return true;
1311 }
1312
1313 /* Returns true if statement STMT is obviously invariant in LOOP,
1314 i.e. if all its operands on the RHS are defined outside of the LOOP.
1315 LOOP should not be the function body. */
1316
1317 bool
1318 stmt_invariant_in_loop_p (struct loop *loop, gimple stmt)
1319 {
1320 unsigned i;
1321 tree lhs;
1322
1323 gcc_assert (loop_depth (loop) > 0);
1324
1325 lhs = gimple_get_lhs (stmt);
1326 for (i = 0; i < gimple_num_ops (stmt); i++)
1327 {
1328 tree op = gimple_op (stmt, i);
1329 if (op != lhs && !expr_invariant_in_loop_p (loop, op))
1330 return false;
1331 }
1332
1333 return true;
1334 }
1335
1336 /* Cumulates the steps of indices into DATA and replaces their values with the
1337 initial ones. Returns false when the value of the index cannot be determined.
1338 Callback for for_each_index. */
1339
1340 struct ifs_ivopts_data
1341 {
1342 struct ivopts_data *ivopts_data;
1343 gimple stmt;
1344 tree step;
1345 };
1346
1347 static bool
1348 idx_find_step (tree base, tree *idx, void *data)
1349 {
1350 struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1351 struct iv *iv;
1352 tree step, iv_base, iv_step, lbound, off;
1353 struct loop *loop = dta->ivopts_data->current_loop;
1354
1355 if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1356 || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1357 return false;
1358
1359 /* If base is a component ref, require that the offset of the reference
1360 be invariant. */
1361 if (TREE_CODE (base) == COMPONENT_REF)
1362 {
1363 off = component_ref_field_offset (base);
1364 return expr_invariant_in_loop_p (loop, off);
1365 }
1366
1367 /* If base is array, first check whether we will be able to move the
1368 reference out of the loop (in order to take its address in strength
1369 reduction). In order for this to work we need both lower bound
1370 and step to be loop invariants. */
1371 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1372 {
1373 /* Moreover, for a range, the size needs to be invariant as well. */
1374 if (TREE_CODE (base) == ARRAY_RANGE_REF
1375 && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1376 return false;
1377
1378 step = array_ref_element_size (base);
1379 lbound = array_ref_low_bound (base);
1380
1381 if (!expr_invariant_in_loop_p (loop, step)
1382 || !expr_invariant_in_loop_p (loop, lbound))
1383 return false;
1384 }
1385
1386 if (TREE_CODE (*idx) != SSA_NAME)
1387 return true;
1388
1389 iv = get_iv (dta->ivopts_data, *idx);
1390 if (!iv)
1391 return false;
1392
1393 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1394 *&x[0], which is not folded and does not trigger the
1395 ARRAY_REF path below. */
1396 *idx = iv->base;
1397
1398 if (integer_zerop (iv->step))
1399 return true;
1400
1401 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1402 {
1403 step = array_ref_element_size (base);
1404
1405 /* We only handle addresses whose step is an integer constant. */
1406 if (TREE_CODE (step) != INTEGER_CST)
1407 return false;
1408 }
1409 else
1410 /* The step for pointer arithmetics already is 1 byte. */
1411 step = build_int_cst (sizetype, 1);
1412
1413 iv_base = iv->base;
1414 iv_step = iv->step;
1415 if (!convert_affine_scev (dta->ivopts_data->current_loop,
1416 sizetype, &iv_base, &iv_step, dta->stmt,
1417 false))
1418 {
1419 /* The index might wrap. */
1420 return false;
1421 }
1422
1423 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1424 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1425
1426 return true;
1427 }
1428
1429 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1430 object is passed to it in DATA. */
1431
1432 static bool
1433 idx_record_use (tree base, tree *idx,
1434 void *vdata)
1435 {
1436 struct ivopts_data *data = (struct ivopts_data *) vdata;
1437 find_interesting_uses_op (data, *idx);
1438 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1439 {
1440 find_interesting_uses_op (data, array_ref_element_size (base));
1441 find_interesting_uses_op (data, array_ref_low_bound (base));
1442 }
1443 return true;
1444 }
1445
1446 /* If we can prove that TOP = cst * BOT for some constant cst,
1447 store cst to MUL and return true. Otherwise return false.
1448 The returned value is always sign-extended, regardless of the
1449 signedness of TOP and BOT. */
1450
1451 static bool
1452 constant_multiple_of (tree top, tree bot, double_int *mul)
1453 {
1454 tree mby;
1455 enum tree_code code;
1456 double_int res, p0, p1;
1457 unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1458
1459 STRIP_NOPS (top);
1460 STRIP_NOPS (bot);
1461
1462 if (operand_equal_p (top, bot, 0))
1463 {
1464 *mul = double_int_one;
1465 return true;
1466 }
1467
1468 code = TREE_CODE (top);
1469 switch (code)
1470 {
1471 case MULT_EXPR:
1472 mby = TREE_OPERAND (top, 1);
1473 if (TREE_CODE (mby) != INTEGER_CST)
1474 return false;
1475
1476 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1477 return false;
1478
1479 *mul = double_int_sext (double_int_mul (res, tree_to_double_int (mby)),
1480 precision);
1481 return true;
1482
1483 case PLUS_EXPR:
1484 case MINUS_EXPR:
1485 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1486 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1487 return false;
1488
1489 if (code == MINUS_EXPR)
1490 p1 = double_int_neg (p1);
1491 *mul = double_int_sext (double_int_add (p0, p1), precision);
1492 return true;
1493
1494 case INTEGER_CST:
1495 if (TREE_CODE (bot) != INTEGER_CST)
1496 return false;
1497
1498 p0 = double_int_sext (tree_to_double_int (top), precision);
1499 p1 = double_int_sext (tree_to_double_int (bot), precision);
1500 if (double_int_zero_p (p1))
1501 return false;
1502 *mul = double_int_sext (double_int_sdivmod (p0, p1, FLOOR_DIV_EXPR, &res),
1503 precision);
1504 return double_int_zero_p (res);
1505
1506 default:
1507 return false;
1508 }
1509 }
1510
1511 /* Returns true if memory reference REF with step STEP may be unaligned. */
1512
1513 static bool
1514 may_be_unaligned_p (tree ref, tree step)
1515 {
1516 tree base;
1517 tree base_type;
1518 HOST_WIDE_INT bitsize;
1519 HOST_WIDE_INT bitpos;
1520 tree toffset;
1521 enum machine_mode mode;
1522 int unsignedp, volatilep;
1523 unsigned base_align;
1524
1525 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1526 thus they are not misaligned. */
1527 if (TREE_CODE (ref) == TARGET_MEM_REF)
1528 return false;
1529
1530 /* The test below is basically copy of what expr.c:normal_inner_ref
1531 does to check whether the object must be loaded by parts when
1532 STRICT_ALIGNMENT is true. */
1533 base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1534 &unsignedp, &volatilep, true);
1535 base_type = TREE_TYPE (base);
1536 base_align = TYPE_ALIGN (base_type);
1537
1538 if (mode != BLKmode)
1539 {
1540 double_int mul;
1541 tree al = build_int_cst (TREE_TYPE (step),
1542 GET_MODE_ALIGNMENT (mode) / BITS_PER_UNIT);
1543
1544 if (base_align < GET_MODE_ALIGNMENT (mode)
1545 || bitpos % GET_MODE_ALIGNMENT (mode) != 0
1546 || bitpos % BITS_PER_UNIT != 0)
1547 return true;
1548
1549 if (!constant_multiple_of (step, al, &mul))
1550 return true;
1551 }
1552
1553 return false;
1554 }
1555
1556 /* Return true if EXPR may be non-addressable. */
1557
1558 static bool
1559 may_be_nonaddressable_p (tree expr)
1560 {
1561 switch (TREE_CODE (expr))
1562 {
1563 case TARGET_MEM_REF:
1564 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1565 target, thus they are always addressable. */
1566 return false;
1567
1568 case COMPONENT_REF:
1569 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1570 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1571
1572 case VIEW_CONVERT_EXPR:
1573 /* This kind of view-conversions may wrap non-addressable objects
1574 and make them look addressable. After some processing the
1575 non-addressability may be uncovered again, causing ADDR_EXPRs
1576 of inappropriate objects to be built. */
1577 if (is_gimple_reg (TREE_OPERAND (expr, 0))
1578 || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1579 return true;
1580
1581 /* ... fall through ... */
1582
1583 case ARRAY_REF:
1584 case ARRAY_RANGE_REF:
1585 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1586
1587 CASE_CONVERT:
1588 return true;
1589
1590 default:
1591 break;
1592 }
1593
1594 return false;
1595 }
1596
1597 /* Finds addresses in *OP_P inside STMT. */
1598
1599 static void
1600 find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1601 {
1602 tree base = *op_p, step = build_int_cst (sizetype, 0);
1603 struct iv *civ;
1604 struct ifs_ivopts_data ifs_ivopts_data;
1605
1606 /* Do not play with volatile memory references. A bit too conservative,
1607 perhaps, but safe. */
1608 if (gimple_has_volatile_ops (stmt))
1609 goto fail;
1610
1611 /* Ignore bitfields for now. Not really something terribly complicated
1612 to handle. TODO. */
1613 if (TREE_CODE (base) == BIT_FIELD_REF)
1614 goto fail;
1615
1616 base = unshare_expr (base);
1617
1618 if (TREE_CODE (base) == TARGET_MEM_REF)
1619 {
1620 tree type = build_pointer_type (TREE_TYPE (base));
1621 tree astep;
1622
1623 if (TMR_BASE (base)
1624 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1625 {
1626 civ = get_iv (data, TMR_BASE (base));
1627 if (!civ)
1628 goto fail;
1629
1630 TMR_BASE (base) = civ->base;
1631 step = civ->step;
1632 }
1633 if (TMR_INDEX (base)
1634 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1635 {
1636 civ = get_iv (data, TMR_INDEX (base));
1637 if (!civ)
1638 goto fail;
1639
1640 TMR_INDEX (base) = civ->base;
1641 astep = civ->step;
1642
1643 if (astep)
1644 {
1645 if (TMR_STEP (base))
1646 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1647
1648 step = fold_build2 (PLUS_EXPR, type, step, astep);
1649 }
1650 }
1651
1652 if (integer_zerop (step))
1653 goto fail;
1654 base = tree_mem_ref_addr (type, base);
1655 }
1656 else
1657 {
1658 ifs_ivopts_data.ivopts_data = data;
1659 ifs_ivopts_data.stmt = stmt;
1660 ifs_ivopts_data.step = build_int_cst (sizetype, 0);
1661 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1662 || integer_zerop (ifs_ivopts_data.step))
1663 goto fail;
1664 step = ifs_ivopts_data.step;
1665
1666 gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1667 gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1668
1669 /* Check that the base expression is addressable. This needs
1670 to be done after substituting bases of IVs into it. */
1671 if (may_be_nonaddressable_p (base))
1672 goto fail;
1673
1674 /* Moreover, on strict alignment platforms, check that it is
1675 sufficiently aligned. */
1676 if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
1677 goto fail;
1678
1679 base = build_fold_addr_expr (base);
1680
1681 /* Substituting bases of IVs into the base expression might
1682 have caused folding opportunities. */
1683 if (TREE_CODE (base) == ADDR_EXPR)
1684 {
1685 tree *ref = &TREE_OPERAND (base, 0);
1686 while (handled_component_p (*ref))
1687 ref = &TREE_OPERAND (*ref, 0);
1688 if (TREE_CODE (*ref) == INDIRECT_REF)
1689 *ref = fold_indirect_ref (*ref);
1690 }
1691 }
1692
1693 civ = alloc_iv (base, step);
1694 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1695 return;
1696
1697 fail:
1698 for_each_index (op_p, idx_record_use, data);
1699 }
1700
1701 /* Finds and records invariants used in STMT. */
1702
1703 static void
1704 find_invariants_stmt (struct ivopts_data *data, gimple stmt)
1705 {
1706 ssa_op_iter iter;
1707 use_operand_p use_p;
1708 tree op;
1709
1710 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1711 {
1712 op = USE_FROM_PTR (use_p);
1713 record_invariant (data, op, false);
1714 }
1715 }
1716
1717 /* Finds interesting uses of induction variables in the statement STMT. */
1718
1719 static void
1720 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
1721 {
1722 struct iv *iv;
1723 tree op, *lhs, *rhs;
1724 ssa_op_iter iter;
1725 use_operand_p use_p;
1726 enum tree_code code;
1727
1728 find_invariants_stmt (data, stmt);
1729
1730 if (gimple_code (stmt) == GIMPLE_COND)
1731 {
1732 find_interesting_uses_cond (data, stmt);
1733 return;
1734 }
1735
1736 if (is_gimple_assign (stmt))
1737 {
1738 lhs = gimple_assign_lhs_ptr (stmt);
1739 rhs = gimple_assign_rhs1_ptr (stmt);
1740
1741 if (TREE_CODE (*lhs) == SSA_NAME)
1742 {
1743 /* If the statement defines an induction variable, the uses are not
1744 interesting by themselves. */
1745
1746 iv = get_iv (data, *lhs);
1747
1748 if (iv && !integer_zerop (iv->step))
1749 return;
1750 }
1751
1752 code = gimple_assign_rhs_code (stmt);
1753 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
1754 && (REFERENCE_CLASS_P (*rhs)
1755 || is_gimple_val (*rhs)))
1756 {
1757 if (REFERENCE_CLASS_P (*rhs))
1758 find_interesting_uses_address (data, stmt, rhs);
1759 else
1760 find_interesting_uses_op (data, *rhs);
1761
1762 if (REFERENCE_CLASS_P (*lhs))
1763 find_interesting_uses_address (data, stmt, lhs);
1764 return;
1765 }
1766 else if (TREE_CODE_CLASS (code) == tcc_comparison)
1767 {
1768 find_interesting_uses_cond (data, stmt);
1769 return;
1770 }
1771
1772 /* TODO -- we should also handle address uses of type
1773
1774 memory = call (whatever);
1775
1776 and
1777
1778 call (memory). */
1779 }
1780
1781 if (gimple_code (stmt) == GIMPLE_PHI
1782 && gimple_bb (stmt) == data->current_loop->header)
1783 {
1784 iv = get_iv (data, PHI_RESULT (stmt));
1785
1786 if (iv && !integer_zerop (iv->step))
1787 return;
1788 }
1789
1790 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1791 {
1792 op = USE_FROM_PTR (use_p);
1793
1794 if (TREE_CODE (op) != SSA_NAME)
1795 continue;
1796
1797 iv = get_iv (data, op);
1798 if (!iv)
1799 continue;
1800
1801 find_interesting_uses_op (data, op);
1802 }
1803 }
1804
1805 /* Finds interesting uses of induction variables outside of loops
1806 on loop exit edge EXIT. */
1807
1808 static void
1809 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1810 {
1811 gimple phi;
1812 gimple_stmt_iterator psi;
1813 tree def;
1814
1815 for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
1816 {
1817 phi = gsi_stmt (psi);
1818 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1819 if (is_gimple_reg (def))
1820 find_interesting_uses_op (data, def);
1821 }
1822 }
1823
1824 /* Finds uses of the induction variables that are interesting. */
1825
1826 static void
1827 find_interesting_uses (struct ivopts_data *data)
1828 {
1829 basic_block bb;
1830 gimple_stmt_iterator bsi;
1831 basic_block *body = get_loop_body (data->current_loop);
1832 unsigned i;
1833 struct version_info *info;
1834 edge e;
1835
1836 if (dump_file && (dump_flags & TDF_DETAILS))
1837 fprintf (dump_file, "Uses:\n\n");
1838
1839 for (i = 0; i < data->current_loop->num_nodes; i++)
1840 {
1841 edge_iterator ei;
1842 bb = body[i];
1843
1844 FOR_EACH_EDGE (e, ei, bb->succs)
1845 if (e->dest != EXIT_BLOCK_PTR
1846 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1847 find_interesting_uses_outside (data, e);
1848
1849 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1850 find_interesting_uses_stmt (data, gsi_stmt (bsi));
1851 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1852 if (!is_gimple_debug (gsi_stmt (bsi)))
1853 find_interesting_uses_stmt (data, gsi_stmt (bsi));
1854 }
1855
1856 if (dump_file && (dump_flags & TDF_DETAILS))
1857 {
1858 bitmap_iterator bi;
1859
1860 fprintf (dump_file, "\n");
1861
1862 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1863 {
1864 info = ver_info (data, i);
1865 if (info->inv_id)
1866 {
1867 fprintf (dump_file, " ");
1868 print_generic_expr (dump_file, info->name, TDF_SLIM);
1869 fprintf (dump_file, " is invariant (%d)%s\n",
1870 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1871 }
1872 }
1873
1874 fprintf (dump_file, "\n");
1875 }
1876
1877 free (body);
1878 }
1879
1880 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
1881 is true, assume we are inside an address. If TOP_COMPREF is true, assume
1882 we are at the top-level of the processed address. */
1883
1884 static tree
1885 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1886 unsigned HOST_WIDE_INT *offset)
1887 {
1888 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
1889 enum tree_code code;
1890 tree type, orig_type = TREE_TYPE (expr);
1891 unsigned HOST_WIDE_INT off0, off1, st;
1892 tree orig_expr = expr;
1893
1894 STRIP_NOPS (expr);
1895
1896 type = TREE_TYPE (expr);
1897 code = TREE_CODE (expr);
1898 *offset = 0;
1899
1900 switch (code)
1901 {
1902 case INTEGER_CST:
1903 if (!cst_and_fits_in_hwi (expr)
1904 || integer_zerop (expr))
1905 return orig_expr;
1906
1907 *offset = int_cst_value (expr);
1908 return build_int_cst (orig_type, 0);
1909
1910 case POINTER_PLUS_EXPR:
1911 case PLUS_EXPR:
1912 case MINUS_EXPR:
1913 op0 = TREE_OPERAND (expr, 0);
1914 op1 = TREE_OPERAND (expr, 1);
1915
1916 op0 = strip_offset_1 (op0, false, false, &off0);
1917 op1 = strip_offset_1 (op1, false, false, &off1);
1918
1919 *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
1920 if (op0 == TREE_OPERAND (expr, 0)
1921 && op1 == TREE_OPERAND (expr, 1))
1922 return orig_expr;
1923
1924 if (integer_zerop (op1))
1925 expr = op0;
1926 else if (integer_zerop (op0))
1927 {
1928 if (code == MINUS_EXPR)
1929 expr = fold_build1 (NEGATE_EXPR, type, op1);
1930 else
1931 expr = op1;
1932 }
1933 else
1934 expr = fold_build2 (code, type, op0, op1);
1935
1936 return fold_convert (orig_type, expr);
1937
1938 case MULT_EXPR:
1939 op1 = TREE_OPERAND (expr, 1);
1940 if (!cst_and_fits_in_hwi (op1))
1941 return orig_expr;
1942
1943 op0 = TREE_OPERAND (expr, 0);
1944 op0 = strip_offset_1 (op0, false, false, &off0);
1945 if (op0 == TREE_OPERAND (expr, 0))
1946 return orig_expr;
1947
1948 *offset = off0 * int_cst_value (op1);
1949 if (integer_zerop (op0))
1950 expr = op0;
1951 else
1952 expr = fold_build2 (MULT_EXPR, type, op0, op1);
1953
1954 return fold_convert (orig_type, expr);
1955
1956 case ARRAY_REF:
1957 case ARRAY_RANGE_REF:
1958 if (!inside_addr)
1959 return orig_expr;
1960
1961 step = array_ref_element_size (expr);
1962 if (!cst_and_fits_in_hwi (step))
1963 break;
1964
1965 st = int_cst_value (step);
1966 op1 = TREE_OPERAND (expr, 1);
1967 op1 = strip_offset_1 (op1, false, false, &off1);
1968 *offset = off1 * st;
1969
1970 if (top_compref
1971 && integer_zerop (op1))
1972 {
1973 /* Strip the component reference completely. */
1974 op0 = TREE_OPERAND (expr, 0);
1975 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1976 *offset += off0;
1977 return op0;
1978 }
1979 break;
1980
1981 case COMPONENT_REF:
1982 if (!inside_addr)
1983 return orig_expr;
1984
1985 tmp = component_ref_field_offset (expr);
1986 if (top_compref
1987 && cst_and_fits_in_hwi (tmp))
1988 {
1989 /* Strip the component reference completely. */
1990 op0 = TREE_OPERAND (expr, 0);
1991 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1992 *offset = off0 + int_cst_value (tmp);
1993 return op0;
1994 }
1995 break;
1996
1997 case ADDR_EXPR:
1998 op0 = TREE_OPERAND (expr, 0);
1999 op0 = strip_offset_1 (op0, true, true, &off0);
2000 *offset += off0;
2001
2002 if (op0 == TREE_OPERAND (expr, 0))
2003 return orig_expr;
2004
2005 expr = build_fold_addr_expr (op0);
2006 return fold_convert (orig_type, expr);
2007
2008 case INDIRECT_REF:
2009 inside_addr = false;
2010 break;
2011
2012 default:
2013 return orig_expr;
2014 }
2015
2016 /* Default handling of expressions for that we want to recurse into
2017 the first operand. */
2018 op0 = TREE_OPERAND (expr, 0);
2019 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2020 *offset += off0;
2021
2022 if (op0 == TREE_OPERAND (expr, 0)
2023 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2024 return orig_expr;
2025
2026 expr = copy_node (expr);
2027 TREE_OPERAND (expr, 0) = op0;
2028 if (op1)
2029 TREE_OPERAND (expr, 1) = op1;
2030
2031 /* Inside address, we might strip the top level component references,
2032 thus changing type of the expression. Handling of ADDR_EXPR
2033 will fix that. */
2034 expr = fold_convert (orig_type, expr);
2035
2036 return expr;
2037 }
2038
2039 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2040
2041 static tree
2042 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2043 {
2044 return strip_offset_1 (expr, false, false, offset);
2045 }
2046
2047 /* Returns variant of TYPE that can be used as base for different uses.
2048 We return unsigned type with the same precision, which avoids problems
2049 with overflows. */
2050
2051 static tree
2052 generic_type_for (tree type)
2053 {
2054 if (POINTER_TYPE_P (type))
2055 return unsigned_type_for (type);
2056
2057 if (TYPE_UNSIGNED (type))
2058 return type;
2059
2060 return unsigned_type_for (type);
2061 }
2062
2063 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2064 the bitmap to that we should store it. */
2065
2066 static struct ivopts_data *fd_ivopts_data;
2067 static tree
2068 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2069 {
2070 bitmap *depends_on = (bitmap *) data;
2071 struct version_info *info;
2072
2073 if (TREE_CODE (*expr_p) != SSA_NAME)
2074 return NULL_TREE;
2075 info = name_info (fd_ivopts_data, *expr_p);
2076
2077 if (!info->inv_id || info->has_nonlin_use)
2078 return NULL_TREE;
2079
2080 if (!*depends_on)
2081 *depends_on = BITMAP_ALLOC (NULL);
2082 bitmap_set_bit (*depends_on, info->inv_id);
2083
2084 return NULL_TREE;
2085 }
2086
2087 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2088 position to POS. If USE is not NULL, the candidate is set as related to
2089 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2090 replacement of the final value of the iv by a direct computation. */
2091
2092 static struct iv_cand *
2093 add_candidate_1 (struct ivopts_data *data,
2094 tree base, tree step, bool important, enum iv_position pos,
2095 struct iv_use *use, gimple incremented_at)
2096 {
2097 unsigned i;
2098 struct iv_cand *cand = NULL;
2099 tree type, orig_type;
2100
2101 if (base)
2102 {
2103 orig_type = TREE_TYPE (base);
2104 type = generic_type_for (orig_type);
2105 if (type != orig_type)
2106 {
2107 base = fold_convert (type, base);
2108 step = fold_convert (type, step);
2109 }
2110 }
2111
2112 for (i = 0; i < n_iv_cands (data); i++)
2113 {
2114 cand = iv_cand (data, i);
2115
2116 if (cand->pos != pos)
2117 continue;
2118
2119 if (cand->incremented_at != incremented_at
2120 || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2121 && cand->ainc_use != use))
2122 continue;
2123
2124 if (!cand->iv)
2125 {
2126 if (!base && !step)
2127 break;
2128
2129 continue;
2130 }
2131
2132 if (!base && !step)
2133 continue;
2134
2135 if (operand_equal_p (base, cand->iv->base, 0)
2136 && operand_equal_p (step, cand->iv->step, 0))
2137 break;
2138 }
2139
2140 if (i == n_iv_cands (data))
2141 {
2142 cand = XCNEW (struct iv_cand);
2143 cand->id = i;
2144
2145 if (!base && !step)
2146 cand->iv = NULL;
2147 else
2148 cand->iv = alloc_iv (base, step);
2149
2150 cand->pos = pos;
2151 if (pos != IP_ORIGINAL && cand->iv)
2152 {
2153 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2154 cand->var_after = cand->var_before;
2155 }
2156 cand->important = important;
2157 cand->incremented_at = incremented_at;
2158 VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
2159
2160 if (step
2161 && TREE_CODE (step) != INTEGER_CST)
2162 {
2163 fd_ivopts_data = data;
2164 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2165 }
2166
2167 if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2168 cand->ainc_use = use;
2169 else
2170 cand->ainc_use = NULL;
2171
2172 if (dump_file && (dump_flags & TDF_DETAILS))
2173 dump_cand (dump_file, cand);
2174 }
2175
2176 if (important && !cand->important)
2177 {
2178 cand->important = true;
2179 if (dump_file && (dump_flags & TDF_DETAILS))
2180 fprintf (dump_file, "Candidate %d is important\n", cand->id);
2181 }
2182
2183 if (use)
2184 {
2185 bitmap_set_bit (use->related_cands, i);
2186 if (dump_file && (dump_flags & TDF_DETAILS))
2187 fprintf (dump_file, "Candidate %d is related to use %d\n",
2188 cand->id, use->id);
2189 }
2190
2191 return cand;
2192 }
2193
2194 /* Returns true if incrementing the induction variable at the end of the LOOP
2195 is allowed.
2196
2197 The purpose is to avoid splitting latch edge with a biv increment, thus
2198 creating a jump, possibly confusing other optimization passes and leaving
2199 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2200 is not available (so we do not have a better alternative), or if the latch
2201 edge is already nonempty. */
2202
2203 static bool
2204 allow_ip_end_pos_p (struct loop *loop)
2205 {
2206 if (!ip_normal_pos (loop))
2207 return true;
2208
2209 if (!empty_block_p (ip_end_pos (loop)))
2210 return true;
2211
2212 return false;
2213 }
2214
2215 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2216 Important field is set to IMPORTANT. */
2217
2218 static void
2219 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2220 bool important, struct iv_use *use)
2221 {
2222 basic_block use_bb = gimple_bb (use->stmt);
2223 enum machine_mode mem_mode;
2224 unsigned HOST_WIDE_INT cstepi;
2225
2226 /* If we insert the increment in any position other than the standard
2227 ones, we must ensure that it is incremented once per iteration.
2228 It must not be in an inner nested loop, or one side of an if
2229 statement. */
2230 if (use_bb->loop_father != data->current_loop
2231 || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2232 || stmt_could_throw_p (use->stmt)
2233 || !cst_and_fits_in_hwi (step))
2234 return;
2235
2236 cstepi = int_cst_value (step);
2237
2238 mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2239 if ((HAVE_PRE_INCREMENT && GET_MODE_SIZE (mem_mode) == cstepi)
2240 || (HAVE_PRE_DECREMENT && GET_MODE_SIZE (mem_mode) == -cstepi))
2241 {
2242 enum tree_code code = MINUS_EXPR;
2243 tree new_base;
2244 tree new_step = step;
2245
2246 if (POINTER_TYPE_P (TREE_TYPE (base)))
2247 {
2248 new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2249 code = POINTER_PLUS_EXPR;
2250 }
2251 else
2252 new_step = fold_convert (TREE_TYPE (base), new_step);
2253 new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2254 add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2255 use->stmt);
2256 }
2257 if ((HAVE_POST_INCREMENT && GET_MODE_SIZE (mem_mode) == cstepi)
2258 || (HAVE_POST_DECREMENT && GET_MODE_SIZE (mem_mode) == -cstepi))
2259 {
2260 add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2261 use->stmt);
2262 }
2263 }
2264
2265 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2266 position to POS. If USE is not NULL, the candidate is set as related to
2267 it. The candidate computation is scheduled on all available positions. */
2268
2269 static void
2270 add_candidate (struct ivopts_data *data,
2271 tree base, tree step, bool important, struct iv_use *use)
2272 {
2273 if (ip_normal_pos (data->current_loop))
2274 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2275 if (ip_end_pos (data->current_loop)
2276 && allow_ip_end_pos_p (data->current_loop))
2277 add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2278
2279 if (use != NULL && use->type == USE_ADDRESS)
2280 add_autoinc_candidates (data, base, step, important, use);
2281 }
2282
2283 /* Add a standard "0 + 1 * iteration" iv candidate for a
2284 type with SIZE bits. */
2285
2286 static void
2287 add_standard_iv_candidates_for_size (struct ivopts_data *data,
2288 unsigned int size)
2289 {
2290 tree type = lang_hooks.types.type_for_size (size, true);
2291 add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2292 true, NULL);
2293 }
2294
2295 /* Adds standard iv candidates. */
2296
2297 static void
2298 add_standard_iv_candidates (struct ivopts_data *data)
2299 {
2300 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2301
2302 /* The same for a double-integer type if it is still fast enough. */
2303 if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2304 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2305 }
2306
2307
2308 /* Adds candidates bases on the old induction variable IV. */
2309
2310 static void
2311 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2312 {
2313 gimple phi;
2314 tree def;
2315 struct iv_cand *cand;
2316
2317 add_candidate (data, iv->base, iv->step, true, NULL);
2318
2319 /* The same, but with initial value zero. */
2320 if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
2321 add_candidate (data, size_int (0), iv->step, true, NULL);
2322 else
2323 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2324 iv->step, true, NULL);
2325
2326 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2327 if (gimple_code (phi) == GIMPLE_PHI)
2328 {
2329 /* Additionally record the possibility of leaving the original iv
2330 untouched. */
2331 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2332 cand = add_candidate_1 (data,
2333 iv->base, iv->step, true, IP_ORIGINAL, NULL,
2334 SSA_NAME_DEF_STMT (def));
2335 cand->var_before = iv->ssa_name;
2336 cand->var_after = def;
2337 }
2338 }
2339
2340 /* Adds candidates based on the old induction variables. */
2341
2342 static void
2343 add_old_ivs_candidates (struct ivopts_data *data)
2344 {
2345 unsigned i;
2346 struct iv *iv;
2347 bitmap_iterator bi;
2348
2349 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2350 {
2351 iv = ver_info (data, i)->iv;
2352 if (iv && iv->biv_p && !integer_zerop (iv->step))
2353 add_old_iv_candidates (data, iv);
2354 }
2355 }
2356
2357 /* Adds candidates based on the value of the induction variable IV and USE. */
2358
2359 static void
2360 add_iv_value_candidates (struct ivopts_data *data,
2361 struct iv *iv, struct iv_use *use)
2362 {
2363 unsigned HOST_WIDE_INT offset;
2364 tree base;
2365 tree basetype;
2366
2367 add_candidate (data, iv->base, iv->step, false, use);
2368
2369 /* The same, but with initial value zero. Make such variable important,
2370 since it is generic enough so that possibly many uses may be based
2371 on it. */
2372 basetype = TREE_TYPE (iv->base);
2373 if (POINTER_TYPE_P (basetype))
2374 basetype = sizetype;
2375 add_candidate (data, build_int_cst (basetype, 0),
2376 iv->step, true, use);
2377
2378 /* Third, try removing the constant offset. Make sure to even
2379 add a candidate for &a[0] vs. (T *)&a. */
2380 base = strip_offset (iv->base, &offset);
2381 if (offset
2382 || base != iv->base)
2383 add_candidate (data, base, iv->step, false, use);
2384 }
2385
2386 /* Adds candidates based on the uses. */
2387
2388 static void
2389 add_derived_ivs_candidates (struct ivopts_data *data)
2390 {
2391 unsigned i;
2392
2393 for (i = 0; i < n_iv_uses (data); i++)
2394 {
2395 struct iv_use *use = iv_use (data, i);
2396
2397 if (!use)
2398 continue;
2399
2400 switch (use->type)
2401 {
2402 case USE_NONLINEAR_EXPR:
2403 case USE_COMPARE:
2404 case USE_ADDRESS:
2405 /* Just add the ivs based on the value of the iv used here. */
2406 add_iv_value_candidates (data, use->iv, use);
2407 break;
2408
2409 default:
2410 gcc_unreachable ();
2411 }
2412 }
2413 }
2414
2415 /* Record important candidates and add them to related_cands bitmaps
2416 if needed. */
2417
2418 static void
2419 record_important_candidates (struct ivopts_data *data)
2420 {
2421 unsigned i;
2422 struct iv_use *use;
2423
2424 for (i = 0; i < n_iv_cands (data); i++)
2425 {
2426 struct iv_cand *cand = iv_cand (data, i);
2427
2428 if (cand->important)
2429 bitmap_set_bit (data->important_candidates, i);
2430 }
2431
2432 data->consider_all_candidates = (n_iv_cands (data)
2433 <= CONSIDER_ALL_CANDIDATES_BOUND);
2434
2435 if (data->consider_all_candidates)
2436 {
2437 /* We will not need "related_cands" bitmaps in this case,
2438 so release them to decrease peak memory consumption. */
2439 for (i = 0; i < n_iv_uses (data); i++)
2440 {
2441 use = iv_use (data, i);
2442 BITMAP_FREE (use->related_cands);
2443 }
2444 }
2445 else
2446 {
2447 /* Add important candidates to the related_cands bitmaps. */
2448 for (i = 0; i < n_iv_uses (data); i++)
2449 bitmap_ior_into (iv_use (data, i)->related_cands,
2450 data->important_candidates);
2451 }
2452 }
2453
2454 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2455 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2456 we allocate a simple list to every use. */
2457
2458 static void
2459 alloc_use_cost_map (struct ivopts_data *data)
2460 {
2461 unsigned i, size, s, j;
2462
2463 for (i = 0; i < n_iv_uses (data); i++)
2464 {
2465 struct iv_use *use = iv_use (data, i);
2466 bitmap_iterator bi;
2467
2468 if (data->consider_all_candidates)
2469 size = n_iv_cands (data);
2470 else
2471 {
2472 s = 0;
2473 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2474 {
2475 s++;
2476 }
2477
2478 /* Round up to the power of two, so that moduling by it is fast. */
2479 for (size = 1; size < s; size <<= 1)
2480 continue;
2481 }
2482
2483 use->n_map_members = size;
2484 use->cost_map = XCNEWVEC (struct cost_pair, size);
2485 }
2486 }
2487
2488 /* Returns description of computation cost of expression whose runtime
2489 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
2490
2491 static comp_cost
2492 new_cost (unsigned runtime, unsigned complexity)
2493 {
2494 comp_cost cost;
2495
2496 cost.cost = runtime;
2497 cost.complexity = complexity;
2498
2499 return cost;
2500 }
2501
2502 /* Adds costs COST1 and COST2. */
2503
2504 static comp_cost
2505 add_costs (comp_cost cost1, comp_cost cost2)
2506 {
2507 cost1.cost += cost2.cost;
2508 cost1.complexity += cost2.complexity;
2509
2510 return cost1;
2511 }
2512 /* Subtracts costs COST1 and COST2. */
2513
2514 static comp_cost
2515 sub_costs (comp_cost cost1, comp_cost cost2)
2516 {
2517 cost1.cost -= cost2.cost;
2518 cost1.complexity -= cost2.complexity;
2519
2520 return cost1;
2521 }
2522
2523 /* Returns a negative number if COST1 < COST2, a positive number if
2524 COST1 > COST2, and 0 if COST1 = COST2. */
2525
2526 static int
2527 compare_costs (comp_cost cost1, comp_cost cost2)
2528 {
2529 if (cost1.cost == cost2.cost)
2530 return cost1.complexity - cost2.complexity;
2531
2532 return cost1.cost - cost2.cost;
2533 }
2534
2535 /* Returns true if COST is infinite. */
2536
2537 static bool
2538 infinite_cost_p (comp_cost cost)
2539 {
2540 return cost.cost == INFTY;
2541 }
2542
2543 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2544 on invariants DEPENDS_ON and that the value used in expressing it
2545 is VALUE. */
2546
2547 static void
2548 set_use_iv_cost (struct ivopts_data *data,
2549 struct iv_use *use, struct iv_cand *cand,
2550 comp_cost cost, bitmap depends_on, tree value)
2551 {
2552 unsigned i, s;
2553
2554 if (infinite_cost_p (cost))
2555 {
2556 BITMAP_FREE (depends_on);
2557 return;
2558 }
2559
2560 if (data->consider_all_candidates)
2561 {
2562 use->cost_map[cand->id].cand = cand;
2563 use->cost_map[cand->id].cost = cost;
2564 use->cost_map[cand->id].depends_on = depends_on;
2565 use->cost_map[cand->id].value = value;
2566 return;
2567 }
2568
2569 /* n_map_members is a power of two, so this computes modulo. */
2570 s = cand->id & (use->n_map_members - 1);
2571 for (i = s; i < use->n_map_members; i++)
2572 if (!use->cost_map[i].cand)
2573 goto found;
2574 for (i = 0; i < s; i++)
2575 if (!use->cost_map[i].cand)
2576 goto found;
2577
2578 gcc_unreachable ();
2579
2580 found:
2581 use->cost_map[i].cand = cand;
2582 use->cost_map[i].cost = cost;
2583 use->cost_map[i].depends_on = depends_on;
2584 use->cost_map[i].value = value;
2585 }
2586
2587 /* Gets cost of (USE, CANDIDATE) pair. */
2588
2589 static struct cost_pair *
2590 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2591 struct iv_cand *cand)
2592 {
2593 unsigned i, s;
2594 struct cost_pair *ret;
2595
2596 if (!cand)
2597 return NULL;
2598
2599 if (data->consider_all_candidates)
2600 {
2601 ret = use->cost_map + cand->id;
2602 if (!ret->cand)
2603 return NULL;
2604
2605 return ret;
2606 }
2607
2608 /* n_map_members is a power of two, so this computes modulo. */
2609 s = cand->id & (use->n_map_members - 1);
2610 for (i = s; i < use->n_map_members; i++)
2611 if (use->cost_map[i].cand == cand)
2612 return use->cost_map + i;
2613
2614 for (i = 0; i < s; i++)
2615 if (use->cost_map[i].cand == cand)
2616 return use->cost_map + i;
2617
2618 return NULL;
2619 }
2620
2621 /* Returns estimate on cost of computing SEQ. */
2622
2623 static unsigned
2624 seq_cost (rtx seq, bool speed)
2625 {
2626 unsigned cost = 0;
2627 rtx set;
2628
2629 for (; seq; seq = NEXT_INSN (seq))
2630 {
2631 set = single_set (seq);
2632 if (set)
2633 cost += rtx_cost (set, SET,speed);
2634 else
2635 cost++;
2636 }
2637
2638 return cost;
2639 }
2640
2641 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2642 static rtx
2643 produce_memory_decl_rtl (tree obj, int *regno)
2644 {
2645 rtx x;
2646
2647 gcc_assert (obj);
2648 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2649 {
2650 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2651 x = gen_rtx_SYMBOL_REF (Pmode, name);
2652 SET_SYMBOL_REF_DECL (x, obj);
2653 x = gen_rtx_MEM (DECL_MODE (obj), x);
2654 targetm.encode_section_info (obj, x, true);
2655 }
2656 else
2657 {
2658 x = gen_raw_REG (Pmode, (*regno)++);
2659 x = gen_rtx_MEM (DECL_MODE (obj), x);
2660 }
2661
2662 return x;
2663 }
2664
2665 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2666 walk_tree. DATA contains the actual fake register number. */
2667
2668 static tree
2669 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2670 {
2671 tree obj = NULL_TREE;
2672 rtx x = NULL_RTX;
2673 int *regno = (int *) data;
2674
2675 switch (TREE_CODE (*expr_p))
2676 {
2677 case ADDR_EXPR:
2678 for (expr_p = &TREE_OPERAND (*expr_p, 0);
2679 handled_component_p (*expr_p);
2680 expr_p = &TREE_OPERAND (*expr_p, 0))
2681 continue;
2682 obj = *expr_p;
2683 if (DECL_P (obj) && !DECL_RTL_SET_P (obj))
2684 x = produce_memory_decl_rtl (obj, regno);
2685 break;
2686
2687 case SSA_NAME:
2688 *ws = 0;
2689 obj = SSA_NAME_VAR (*expr_p);
2690 if (!DECL_RTL_SET_P (obj))
2691 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2692 break;
2693
2694 case VAR_DECL:
2695 case PARM_DECL:
2696 case RESULT_DECL:
2697 *ws = 0;
2698 obj = *expr_p;
2699
2700 if (DECL_RTL_SET_P (obj))
2701 break;
2702
2703 if (DECL_MODE (obj) == BLKmode)
2704 x = produce_memory_decl_rtl (obj, regno);
2705 else
2706 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2707
2708 break;
2709
2710 default:
2711 break;
2712 }
2713
2714 if (x)
2715 {
2716 VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2717 SET_DECL_RTL (obj, x);
2718 }
2719
2720 return NULL_TREE;
2721 }
2722
2723 /* Determines cost of the computation of EXPR. */
2724
2725 static unsigned
2726 computation_cost (tree expr, bool speed)
2727 {
2728 rtx seq, rslt;
2729 tree type = TREE_TYPE (expr);
2730 unsigned cost;
2731 /* Avoid using hard regs in ways which may be unsupported. */
2732 int regno = LAST_VIRTUAL_REGISTER + 1;
2733 enum function_frequency real_frequency = cfun->function_frequency;
2734
2735 cfun->function_frequency = FUNCTION_FREQUENCY_NORMAL;
2736 crtl->maybe_hot_insn_p = speed;
2737 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2738 start_sequence ();
2739 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2740 seq = get_insns ();
2741 end_sequence ();
2742 default_rtl_profile ();
2743 cfun->function_frequency = real_frequency;
2744
2745 cost = seq_cost (seq, speed);
2746 if (MEM_P (rslt))
2747 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type), speed);
2748
2749 return cost;
2750 }
2751
2752 /* Returns variable containing the value of candidate CAND at statement AT. */
2753
2754 static tree
2755 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
2756 {
2757 if (stmt_after_increment (loop, cand, stmt))
2758 return cand->var_after;
2759 else
2760 return cand->var_before;
2761 }
2762
2763 /* Return the most significant (sign) bit of T. Similar to tree_int_cst_msb,
2764 but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE. */
2765
2766 int
2767 tree_int_cst_sign_bit (const_tree t)
2768 {
2769 unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
2770 unsigned HOST_WIDE_INT w;
2771
2772 if (bitno < HOST_BITS_PER_WIDE_INT)
2773 w = TREE_INT_CST_LOW (t);
2774 else
2775 {
2776 w = TREE_INT_CST_HIGH (t);
2777 bitno -= HOST_BITS_PER_WIDE_INT;
2778 }
2779
2780 return (w >> bitno) & 1;
2781 }
2782
2783 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2784 same precision that is at least as wide as the precision of TYPE, stores
2785 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
2786 type of A and B. */
2787
2788 static tree
2789 determine_common_wider_type (tree *a, tree *b)
2790 {
2791 tree wider_type = NULL;
2792 tree suba, subb;
2793 tree atype = TREE_TYPE (*a);
2794
2795 if (CONVERT_EXPR_P (*a))
2796 {
2797 suba = TREE_OPERAND (*a, 0);
2798 wider_type = TREE_TYPE (suba);
2799 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
2800 return atype;
2801 }
2802 else
2803 return atype;
2804
2805 if (CONVERT_EXPR_P (*b))
2806 {
2807 subb = TREE_OPERAND (*b, 0);
2808 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
2809 return atype;
2810 }
2811 else
2812 return atype;
2813
2814 *a = suba;
2815 *b = subb;
2816 return wider_type;
2817 }
2818
2819 /* Determines the expression by that USE is expressed from induction variable
2820 CAND at statement AT in LOOP. The expression is stored in a decomposed
2821 form into AFF. Returns false if USE cannot be expressed using CAND. */
2822
2823 static bool
2824 get_computation_aff (struct loop *loop,
2825 struct iv_use *use, struct iv_cand *cand, gimple at,
2826 struct affine_tree_combination *aff)
2827 {
2828 tree ubase = use->iv->base;
2829 tree ustep = use->iv->step;
2830 tree cbase = cand->iv->base;
2831 tree cstep = cand->iv->step, cstep_common;
2832 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2833 tree common_type, var;
2834 tree uutype;
2835 aff_tree cbase_aff, var_aff;
2836 double_int rat;
2837
2838 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2839 {
2840 /* We do not have a precision to express the values of use. */
2841 return false;
2842 }
2843
2844 var = var_at_stmt (loop, cand, at);
2845 uutype = unsigned_type_for (utype);
2846
2847 /* If the conversion is not noop, perform it. */
2848 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
2849 {
2850 cstep = fold_convert (uutype, cstep);
2851 cbase = fold_convert (uutype, cbase);
2852 var = fold_convert (uutype, var);
2853 }
2854
2855 if (!constant_multiple_of (ustep, cstep, &rat))
2856 return false;
2857
2858 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
2859 type, we achieve better folding by computing their difference in this
2860 wider type, and cast the result to UUTYPE. We do not need to worry about
2861 overflows, as all the arithmetics will in the end be performed in UUTYPE
2862 anyway. */
2863 common_type = determine_common_wider_type (&ubase, &cbase);
2864
2865 /* use = ubase - ratio * cbase + ratio * var. */
2866 tree_to_aff_combination (ubase, common_type, aff);
2867 tree_to_aff_combination (cbase, common_type, &cbase_aff);
2868 tree_to_aff_combination (var, uutype, &var_aff);
2869
2870 /* We need to shift the value if we are after the increment. */
2871 if (stmt_after_increment (loop, cand, at))
2872 {
2873 aff_tree cstep_aff;
2874
2875 if (common_type != uutype)
2876 cstep_common = fold_convert (common_type, cstep);
2877 else
2878 cstep_common = cstep;
2879
2880 tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
2881 aff_combination_add (&cbase_aff, &cstep_aff);
2882 }
2883
2884 aff_combination_scale (&cbase_aff, double_int_neg (rat));
2885 aff_combination_add (aff, &cbase_aff);
2886 if (common_type != uutype)
2887 aff_combination_convert (aff, uutype);
2888
2889 aff_combination_scale (&var_aff, rat);
2890 aff_combination_add (aff, &var_aff);
2891
2892 return true;
2893 }
2894
2895 /* Determines the expression by that USE is expressed from induction variable
2896 CAND at statement AT in LOOP. The computation is unshared. */
2897
2898 static tree
2899 get_computation_at (struct loop *loop,
2900 struct iv_use *use, struct iv_cand *cand, gimple at)
2901 {
2902 aff_tree aff;
2903 tree type = TREE_TYPE (use->iv->base);
2904
2905 if (!get_computation_aff (loop, use, cand, at, &aff))
2906 return NULL_TREE;
2907 unshare_aff_combination (&aff);
2908 return fold_convert (type, aff_combination_to_tree (&aff));
2909 }
2910
2911 /* Determines the expression by that USE is expressed from induction variable
2912 CAND in LOOP. The computation is unshared. */
2913
2914 static tree
2915 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
2916 {
2917 return get_computation_at (loop, use, cand, use->stmt);
2918 }
2919
2920 /* Returns cost of addition in MODE. */
2921
2922 static unsigned
2923 add_cost (enum machine_mode mode, bool speed)
2924 {
2925 static unsigned costs[NUM_MACHINE_MODES];
2926 rtx seq;
2927 unsigned cost;
2928
2929 if (costs[mode])
2930 return costs[mode];
2931
2932 start_sequence ();
2933 force_operand (gen_rtx_fmt_ee (PLUS, mode,
2934 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
2935 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)),
2936 NULL_RTX);
2937 seq = get_insns ();
2938 end_sequence ();
2939
2940 cost = seq_cost (seq, speed);
2941 if (!cost)
2942 cost = 1;
2943
2944 costs[mode] = cost;
2945
2946 if (dump_file && (dump_flags & TDF_DETAILS))
2947 fprintf (dump_file, "Addition in %s costs %d\n",
2948 GET_MODE_NAME (mode), cost);
2949 return cost;
2950 }
2951
2952 /* Entry in a hashtable of already known costs for multiplication. */
2953 struct mbc_entry
2954 {
2955 HOST_WIDE_INT cst; /* The constant to multiply by. */
2956 enum machine_mode mode; /* In mode. */
2957 unsigned cost; /* The cost. */
2958 };
2959
2960 /* Counts hash value for the ENTRY. */
2961
2962 static hashval_t
2963 mbc_entry_hash (const void *entry)
2964 {
2965 const struct mbc_entry *e = (const struct mbc_entry *) entry;
2966
2967 return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
2968 }
2969
2970 /* Compares the hash table entries ENTRY1 and ENTRY2. */
2971
2972 static int
2973 mbc_entry_eq (const void *entry1, const void *entry2)
2974 {
2975 const struct mbc_entry *e1 = (const struct mbc_entry *) entry1;
2976 const struct mbc_entry *e2 = (const struct mbc_entry *) entry2;
2977
2978 return (e1->mode == e2->mode
2979 && e1->cst == e2->cst);
2980 }
2981
2982 /* Returns cost of multiplication by constant CST in MODE. */
2983
2984 unsigned
2985 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode, bool speed)
2986 {
2987 static htab_t costs;
2988 struct mbc_entry **cached, act;
2989 rtx seq;
2990 unsigned cost;
2991
2992 if (!costs)
2993 costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
2994
2995 act.mode = mode;
2996 act.cst = cst;
2997 cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
2998 if (*cached)
2999 return (*cached)->cost;
3000
3001 *cached = XNEW (struct mbc_entry);
3002 (*cached)->mode = mode;
3003 (*cached)->cst = cst;
3004
3005 start_sequence ();
3006 expand_mult (mode, gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3007 gen_int_mode (cst, mode), NULL_RTX, 0);
3008 seq = get_insns ();
3009 end_sequence ();
3010
3011 cost = seq_cost (seq, speed);
3012
3013 if (dump_file && (dump_flags & TDF_DETAILS))
3014 fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
3015 (int) cst, GET_MODE_NAME (mode), cost);
3016
3017 (*cached)->cost = cost;
3018
3019 return cost;
3020 }
3021
3022 /* Returns true if multiplying by RATIO is allowed in an address. Test the
3023 validity for a memory reference accessing memory of mode MODE. */
3024
3025 bool
3026 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode)
3027 {
3028 #define MAX_RATIO 128
3029 static sbitmap valid_mult[MAX_MACHINE_MODE];
3030
3031 if (!valid_mult[mode])
3032 {
3033 rtx reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3034 rtx addr;
3035 HOST_WIDE_INT i;
3036
3037 valid_mult[mode] = sbitmap_alloc (2 * MAX_RATIO + 1);
3038 sbitmap_zero (valid_mult[mode]);
3039 addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX);
3040 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3041 {
3042 XEXP (addr, 1) = gen_int_mode (i, Pmode);
3043 if (memory_address_p (mode, addr))
3044 SET_BIT (valid_mult[mode], i + MAX_RATIO);
3045 }
3046
3047 if (dump_file && (dump_flags & TDF_DETAILS))
3048 {
3049 fprintf (dump_file, " allowed multipliers:");
3050 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3051 if (TEST_BIT (valid_mult[mode], i + MAX_RATIO))
3052 fprintf (dump_file, " %d", (int) i);
3053 fprintf (dump_file, "\n");
3054 fprintf (dump_file, "\n");
3055 }
3056 }
3057
3058 if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3059 return false;
3060
3061 return TEST_BIT (valid_mult[mode], ratio + MAX_RATIO);
3062 }
3063
3064 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3065 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3066 variable is omitted. Compute the cost for a memory reference that accesses
3067 a memory location of mode MEM_MODE.
3068
3069 MAY_AUTOINC is set to true if the autoincrement (increasing index by
3070 size of MEM_MODE / RATIO) is available. To make this determination, we
3071 look at the size of the increment to be made, which is given in CSTEP.
3072 CSTEP may be zero if the step is unknown.
3073 STMT_AFTER_INC is true iff the statement we're looking at is after the
3074 increment of the original biv.
3075
3076 TODO -- there must be some better way. This all is quite crude. */
3077
3078 static comp_cost
3079 get_address_cost (bool symbol_present, bool var_present,
3080 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3081 HOST_WIDE_INT cstep, enum machine_mode mem_mode, bool speed,
3082 bool stmt_after_inc, bool *may_autoinc)
3083 {
3084 static bool initialized[MAX_MACHINE_MODE];
3085 static HOST_WIDE_INT rat[MAX_MACHINE_MODE], off[MAX_MACHINE_MODE];
3086 static HOST_WIDE_INT min_offset[MAX_MACHINE_MODE], max_offset[MAX_MACHINE_MODE];
3087 static unsigned costs[MAX_MACHINE_MODE][2][2][2][2];
3088 static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3089 static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3090 unsigned cost, acost, complexity;
3091 bool offset_p, ratio_p, autoinc;
3092 HOST_WIDE_INT s_offset, autoinc_offset, msize;
3093 unsigned HOST_WIDE_INT mask;
3094 unsigned bits;
3095
3096 if (!initialized[mem_mode])
3097 {
3098 HOST_WIDE_INT i;
3099 HOST_WIDE_INT start = BIGGEST_ALIGNMENT / BITS_PER_UNIT;
3100 int old_cse_not_expected;
3101 unsigned sym_p, var_p, off_p, rat_p, add_c;
3102 rtx seq, addr, base;
3103 rtx reg0, reg1;
3104
3105 initialized[mem_mode] = true;
3106
3107 reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3108
3109 addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX);
3110 for (i = start; i <= 1 << 20; i <<= 1)
3111 {
3112 XEXP (addr, 1) = gen_int_mode (i, Pmode);
3113 if (!memory_address_p (mem_mode, addr))
3114 break;
3115 }
3116 max_offset[mem_mode] = i == start ? 0 : i >> 1;
3117 off[mem_mode] = max_offset[mem_mode];
3118
3119 for (i = start; i <= 1 << 20; i <<= 1)
3120 {
3121 XEXP (addr, 1) = gen_int_mode (-i, Pmode);
3122 if (!memory_address_p (mem_mode, addr))
3123 break;
3124 }
3125 min_offset[mem_mode] = i == start ? 0 : -(i >> 1);
3126
3127 if (dump_file && (dump_flags & TDF_DETAILS))
3128 {
3129 fprintf (dump_file, "get_address_cost:\n");
3130 fprintf (dump_file, " min offset %s %d\n",
3131 GET_MODE_NAME (mem_mode),
3132 (int) min_offset[mem_mode]);
3133 fprintf (dump_file, " max offset %s %d\n",
3134 GET_MODE_NAME (mem_mode),
3135 (int) max_offset[mem_mode]);
3136 }
3137
3138 rat[mem_mode] = 1;
3139 for (i = 2; i <= MAX_RATIO; i++)
3140 if (multiplier_allowed_in_address_p (i, mem_mode))
3141 {
3142 rat[mem_mode] = i;
3143 break;
3144 }
3145
3146 /* Compute the cost of various addressing modes. */
3147 acost = 0;
3148 reg0 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3149 reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 2);
3150
3151 if (HAVE_PRE_DECREMENT)
3152 {
3153 addr = gen_rtx_PRE_DEC (Pmode, reg0);
3154 has_predec[mem_mode] = memory_address_p (mem_mode, addr);
3155 }
3156 if (HAVE_POST_DECREMENT)
3157 {
3158 addr = gen_rtx_POST_DEC (Pmode, reg0);
3159 has_postdec[mem_mode] = memory_address_p (mem_mode, addr);
3160 }
3161 if (HAVE_PRE_INCREMENT)
3162 {
3163 addr = gen_rtx_PRE_INC (Pmode, reg0);
3164 has_preinc[mem_mode] = memory_address_p (mem_mode, addr);
3165 }
3166 if (HAVE_POST_INCREMENT)
3167 {
3168 addr = gen_rtx_POST_INC (Pmode, reg0);
3169 has_postinc[mem_mode] = memory_address_p (mem_mode, addr);
3170 }
3171 for (i = 0; i < 16; i++)
3172 {
3173 sym_p = i & 1;
3174 var_p = (i >> 1) & 1;
3175 off_p = (i >> 2) & 1;
3176 rat_p = (i >> 3) & 1;
3177
3178 addr = reg0;
3179 if (rat_p)
3180 addr = gen_rtx_fmt_ee (MULT, Pmode, addr,
3181 gen_int_mode (rat[mem_mode], Pmode));
3182
3183 if (var_p)
3184 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, reg1);
3185
3186 if (sym_p)
3187 {
3188 base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
3189 /* ??? We can run into trouble with some backends by presenting
3190 it with symbols which haven't been properly passed through
3191 targetm.encode_section_info. By setting the local bit, we
3192 enhance the probability of things working. */
3193 SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3194
3195 if (off_p)
3196 base = gen_rtx_fmt_e (CONST, Pmode,
3197 gen_rtx_fmt_ee (PLUS, Pmode,
3198 base,
3199 gen_int_mode (off[mem_mode],
3200 Pmode)));
3201 }
3202 else if (off_p)
3203 base = gen_int_mode (off[mem_mode], Pmode);
3204 else
3205 base = NULL_RTX;
3206
3207 if (base)
3208 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, base);
3209
3210 start_sequence ();
3211 /* To avoid splitting addressing modes, pretend that no cse will
3212 follow. */
3213 old_cse_not_expected = cse_not_expected;
3214 cse_not_expected = true;
3215 addr = memory_address (mem_mode, addr);
3216 cse_not_expected = old_cse_not_expected;
3217 seq = get_insns ();
3218 end_sequence ();
3219
3220 acost = seq_cost (seq, speed);
3221 acost += address_cost (addr, mem_mode, speed);
3222
3223 if (!acost)
3224 acost = 1;
3225 costs[mem_mode][sym_p][var_p][off_p][rat_p] = acost;
3226 }
3227
3228 /* On some targets, it is quite expensive to load symbol to a register,
3229 which makes addresses that contain symbols look much more expensive.
3230 However, the symbol will have to be loaded in any case before the
3231 loop (and quite likely we have it in register already), so it does not
3232 make much sense to penalize them too heavily. So make some final
3233 tweaks for the SYMBOL_PRESENT modes:
3234
3235 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3236 var is cheaper, use this mode with small penalty.
3237 If VAR_PRESENT is true, try whether the mode with
3238 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3239 if this is the case, use it. */
3240 add_c = add_cost (Pmode, speed);
3241 for (i = 0; i < 8; i++)
3242 {
3243 var_p = i & 1;
3244 off_p = (i >> 1) & 1;
3245 rat_p = (i >> 2) & 1;
3246
3247 acost = costs[mem_mode][0][1][off_p][rat_p] + 1;
3248 if (var_p)
3249 acost += add_c;
3250
3251 if (acost < costs[mem_mode][1][var_p][off_p][rat_p])
3252 costs[mem_mode][1][var_p][off_p][rat_p] = acost;
3253 }
3254
3255 if (dump_file && (dump_flags & TDF_DETAILS))
3256 {
3257 fprintf (dump_file, "Address costs:\n");
3258
3259 for (i = 0; i < 16; i++)
3260 {
3261 sym_p = i & 1;
3262 var_p = (i >> 1) & 1;
3263 off_p = (i >> 2) & 1;
3264 rat_p = (i >> 3) & 1;
3265
3266 fprintf (dump_file, " ");
3267 if (sym_p)
3268 fprintf (dump_file, "sym + ");
3269 if (var_p)
3270 fprintf (dump_file, "var + ");
3271 if (off_p)
3272 fprintf (dump_file, "cst + ");
3273 if (rat_p)
3274 fprintf (dump_file, "rat * ");
3275
3276 acost = costs[mem_mode][sym_p][var_p][off_p][rat_p];
3277 fprintf (dump_file, "index costs %d\n", acost);
3278 }
3279 if (has_predec[mem_mode] || has_postdec[mem_mode]
3280 || has_preinc[mem_mode] || has_postinc[mem_mode])
3281 fprintf (dump_file, " May include autoinc/dec\n");
3282 fprintf (dump_file, "\n");
3283 }
3284 }
3285
3286 bits = GET_MODE_BITSIZE (Pmode);
3287 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3288 offset &= mask;
3289 if ((offset >> (bits - 1) & 1))
3290 offset |= ~mask;
3291 s_offset = offset;
3292
3293 autoinc = false;
3294 msize = GET_MODE_SIZE (mem_mode);
3295 autoinc_offset = offset;
3296 if (stmt_after_inc)
3297 autoinc_offset += ratio * cstep;
3298 if (symbol_present || var_present || ratio != 1)
3299 autoinc = false;
3300 else if ((has_postinc[mem_mode] && autoinc_offset == 0
3301 && msize == cstep)
3302 || (has_postdec[mem_mode] && autoinc_offset == 0
3303 && msize == -cstep)
3304 || (has_preinc[mem_mode] && autoinc_offset == msize
3305 && msize == cstep)
3306 || (has_predec[mem_mode] && autoinc_offset == -msize
3307 && msize == -cstep))
3308 autoinc = true;
3309
3310 cost = 0;
3311 offset_p = (s_offset != 0
3312 && min_offset[mem_mode] <= s_offset
3313 && s_offset <= max_offset[mem_mode]);
3314 ratio_p = (ratio != 1
3315 && multiplier_allowed_in_address_p (ratio, mem_mode));
3316
3317 if (ratio != 1 && !ratio_p)
3318 cost += multiply_by_cost (ratio, Pmode, speed);
3319
3320 if (s_offset && !offset_p && !symbol_present)
3321 cost += add_cost (Pmode, speed);
3322
3323 if (may_autoinc)
3324 *may_autoinc = autoinc;
3325 acost = costs[mem_mode][symbol_present][var_present][offset_p][ratio_p];
3326 complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3327 return new_cost (cost + acost, complexity);
3328 }
3329
3330 /* Estimates cost of forcing expression EXPR into a variable. */
3331
3332 static comp_cost
3333 force_expr_to_var_cost (tree expr, bool speed)
3334 {
3335 static bool costs_initialized = false;
3336 static unsigned integer_cost [2];
3337 static unsigned symbol_cost [2];
3338 static unsigned address_cost [2];
3339 tree op0, op1;
3340 comp_cost cost0, cost1, cost;
3341 enum machine_mode mode;
3342
3343 if (!costs_initialized)
3344 {
3345 tree type = build_pointer_type (integer_type_node);
3346 tree var, addr;
3347 rtx x;
3348 int i;
3349
3350 var = create_tmp_var_raw (integer_type_node, "test_var");
3351 TREE_STATIC (var) = 1;
3352 x = produce_memory_decl_rtl (var, NULL);
3353 SET_DECL_RTL (var, x);
3354
3355 addr = build1 (ADDR_EXPR, type, var);
3356
3357
3358 for (i = 0; i < 2; i++)
3359 {
3360 integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3361 2000), i);
3362
3363 symbol_cost[i] = computation_cost (addr, i) + 1;
3364
3365 address_cost[i]
3366 = computation_cost (build2 (POINTER_PLUS_EXPR, type,
3367 addr,
3368 build_int_cst (sizetype, 2000)), i) + 1;
3369 if (dump_file && (dump_flags & TDF_DETAILS))
3370 {
3371 fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3372 fprintf (dump_file, " integer %d\n", (int) integer_cost[i]);
3373 fprintf (dump_file, " symbol %d\n", (int) symbol_cost[i]);
3374 fprintf (dump_file, " address %d\n", (int) address_cost[i]);
3375 fprintf (dump_file, " other %d\n", (int) target_spill_cost[i]);
3376 fprintf (dump_file, "\n");
3377 }
3378 }
3379
3380 costs_initialized = true;
3381 }
3382
3383 STRIP_NOPS (expr);
3384
3385 if (SSA_VAR_P (expr))
3386 return zero_cost;
3387
3388 if (is_gimple_min_invariant (expr))
3389 {
3390 if (TREE_CODE (expr) == INTEGER_CST)
3391 return new_cost (integer_cost [speed], 0);
3392
3393 if (TREE_CODE (expr) == ADDR_EXPR)
3394 {
3395 tree obj = TREE_OPERAND (expr, 0);
3396
3397 if (TREE_CODE (obj) == VAR_DECL
3398 || TREE_CODE (obj) == PARM_DECL
3399 || TREE_CODE (obj) == RESULT_DECL)
3400 return new_cost (symbol_cost [speed], 0);
3401 }
3402
3403 return new_cost (address_cost [speed], 0);
3404 }
3405
3406 switch (TREE_CODE (expr))
3407 {
3408 case POINTER_PLUS_EXPR:
3409 case PLUS_EXPR:
3410 case MINUS_EXPR:
3411 case MULT_EXPR:
3412 op0 = TREE_OPERAND (expr, 0);
3413 op1 = TREE_OPERAND (expr, 1);
3414 STRIP_NOPS (op0);
3415 STRIP_NOPS (op1);
3416
3417 if (is_gimple_val (op0))
3418 cost0 = zero_cost;
3419 else
3420 cost0 = force_expr_to_var_cost (op0, speed);
3421
3422 if (is_gimple_val (op1))
3423 cost1 = zero_cost;
3424 else
3425 cost1 = force_expr_to_var_cost (op1, speed);
3426
3427 break;
3428
3429 case NEGATE_EXPR:
3430 op0 = TREE_OPERAND (expr, 0);
3431 STRIP_NOPS (op0);
3432 op1 = NULL_TREE;
3433
3434 if (is_gimple_val (op0))
3435 cost0 = zero_cost;
3436 else
3437 cost0 = force_expr_to_var_cost (op0, speed);
3438
3439 cost1 = zero_cost;
3440 break;
3441
3442 default:
3443 /* Just an arbitrary value, FIXME. */
3444 return new_cost (target_spill_cost[speed], 0);
3445 }
3446
3447 mode = TYPE_MODE (TREE_TYPE (expr));
3448 switch (TREE_CODE (expr))
3449 {
3450 case POINTER_PLUS_EXPR:
3451 case PLUS_EXPR:
3452 case MINUS_EXPR:
3453 case NEGATE_EXPR:
3454 cost = new_cost (add_cost (mode, speed), 0);
3455 break;
3456
3457 case MULT_EXPR:
3458 if (cst_and_fits_in_hwi (op0))
3459 cost = new_cost (multiply_by_cost (int_cst_value (op0), mode, speed), 0);
3460 else if (cst_and_fits_in_hwi (op1))
3461 cost = new_cost (multiply_by_cost (int_cst_value (op1), mode, speed), 0);
3462 else
3463 return new_cost (target_spill_cost [speed], 0);
3464 break;
3465
3466 default:
3467 gcc_unreachable ();
3468 }
3469
3470 cost = add_costs (cost, cost0);
3471 cost = add_costs (cost, cost1);
3472
3473 /* Bound the cost by target_spill_cost. The parts of complicated
3474 computations often are either loop invariant or at least can
3475 be shared between several iv uses, so letting this grow without
3476 limits would not give reasonable results. */
3477 if (cost.cost > (int) target_spill_cost [speed])
3478 cost.cost = target_spill_cost [speed];
3479
3480 return cost;
3481 }
3482
3483 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3484 invariants the computation depends on. */
3485
3486 static comp_cost
3487 force_var_cost (struct ivopts_data *data,
3488 tree expr, bitmap *depends_on)
3489 {
3490 if (depends_on)
3491 {
3492 fd_ivopts_data = data;
3493 walk_tree (&expr, find_depends, depends_on, NULL);
3494 }
3495
3496 return force_expr_to_var_cost (expr, data->speed);
3497 }
3498
3499 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3500 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3501 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3502 invariants the computation depends on. */
3503
3504 static comp_cost
3505 split_address_cost (struct ivopts_data *data,
3506 tree addr, bool *symbol_present, bool *var_present,
3507 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3508 {
3509 tree core;
3510 HOST_WIDE_INT bitsize;
3511 HOST_WIDE_INT bitpos;
3512 tree toffset;
3513 enum machine_mode mode;
3514 int unsignedp, volatilep;
3515
3516 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3517 &unsignedp, &volatilep, false);
3518
3519 if (toffset != 0
3520 || bitpos % BITS_PER_UNIT != 0
3521 || TREE_CODE (core) != VAR_DECL)
3522 {
3523 *symbol_present = false;
3524 *var_present = true;
3525 fd_ivopts_data = data;
3526 walk_tree (&addr, find_depends, depends_on, NULL);
3527 return new_cost (target_spill_cost[data->speed], 0);
3528 }
3529
3530 *offset += bitpos / BITS_PER_UNIT;
3531 if (TREE_STATIC (core)
3532 || DECL_EXTERNAL (core))
3533 {
3534 *symbol_present = true;
3535 *var_present = false;
3536 return zero_cost;
3537 }
3538
3539 *symbol_present = false;
3540 *var_present = true;
3541 return zero_cost;
3542 }
3543
3544 /* Estimates cost of expressing difference of addresses E1 - E2 as
3545 var + symbol + offset. The value of offset is added to OFFSET,
3546 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3547 part is missing. DEPENDS_ON is a set of the invariants the computation
3548 depends on. */
3549
3550 static comp_cost
3551 ptr_difference_cost (struct ivopts_data *data,
3552 tree e1, tree e2, bool *symbol_present, bool *var_present,
3553 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3554 {
3555 HOST_WIDE_INT diff = 0;
3556 aff_tree aff_e1, aff_e2;
3557 tree type;
3558
3559 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3560
3561 if (ptr_difference_const (e1, e2, &diff))
3562 {
3563 *offset += diff;
3564 *symbol_present = false;
3565 *var_present = false;
3566 return zero_cost;
3567 }
3568
3569 if (integer_zerop (e2))
3570 return split_address_cost (data, TREE_OPERAND (e1, 0),
3571 symbol_present, var_present, offset, depends_on);
3572
3573 *symbol_present = false;
3574 *var_present = true;
3575
3576 type = signed_type_for (TREE_TYPE (e1));
3577 tree_to_aff_combination (e1, type, &aff_e1);
3578 tree_to_aff_combination (e2, type, &aff_e2);
3579 aff_combination_scale (&aff_e2, double_int_minus_one);
3580 aff_combination_add (&aff_e1, &aff_e2);
3581
3582 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3583 }
3584
3585 /* Estimates cost of expressing difference E1 - E2 as
3586 var + symbol + offset. The value of offset is added to OFFSET,
3587 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3588 part is missing. DEPENDS_ON is a set of the invariants the computation
3589 depends on. */
3590
3591 static comp_cost
3592 difference_cost (struct ivopts_data *data,
3593 tree e1, tree e2, bool *symbol_present, bool *var_present,
3594 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3595 {
3596 enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3597 unsigned HOST_WIDE_INT off1, off2;
3598 aff_tree aff_e1, aff_e2;
3599 tree type;
3600
3601 e1 = strip_offset (e1, &off1);
3602 e2 = strip_offset (e2, &off2);
3603 *offset += off1 - off2;
3604
3605 STRIP_NOPS (e1);
3606 STRIP_NOPS (e2);
3607
3608 if (TREE_CODE (e1) == ADDR_EXPR)
3609 return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
3610 offset, depends_on);
3611 *symbol_present = false;
3612
3613 if (operand_equal_p (e1, e2, 0))
3614 {
3615 *var_present = false;
3616 return zero_cost;
3617 }
3618
3619 *var_present = true;
3620
3621 if (integer_zerop (e2))
3622 return force_var_cost (data, e1, depends_on);
3623
3624 if (integer_zerop (e1))
3625 {
3626 comp_cost cost = force_var_cost (data, e2, depends_on);
3627 cost.cost += multiply_by_cost (-1, mode, data->speed);
3628 return cost;
3629 }
3630
3631 type = signed_type_for (TREE_TYPE (e1));
3632 tree_to_aff_combination (e1, type, &aff_e1);
3633 tree_to_aff_combination (e2, type, &aff_e2);
3634 aff_combination_scale (&aff_e2, double_int_minus_one);
3635 aff_combination_add (&aff_e1, &aff_e2);
3636
3637 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3638 }
3639
3640 /* Determines the cost of the computation by that USE is expressed
3641 from induction variable CAND. If ADDRESS_P is true, we just need
3642 to create an address from it, otherwise we want to get it into
3643 register. A set of invariants we depend on is stored in
3644 DEPENDS_ON. AT is the statement at that the value is computed.
3645 If CAN_AUTOINC is nonnull, use it to record whether autoinc
3646 addressing is likely. */
3647
3648 static comp_cost
3649 get_computation_cost_at (struct ivopts_data *data,
3650 struct iv_use *use, struct iv_cand *cand,
3651 bool address_p, bitmap *depends_on, gimple at,
3652 bool *can_autoinc)
3653 {
3654 tree ubase = use->iv->base, ustep = use->iv->step;
3655 tree cbase, cstep;
3656 tree utype = TREE_TYPE (ubase), ctype;
3657 unsigned HOST_WIDE_INT cstepi, offset = 0;
3658 HOST_WIDE_INT ratio, aratio;
3659 bool var_present, symbol_present, stmt_is_after_inc;
3660 comp_cost cost;
3661 double_int rat;
3662 bool speed = optimize_bb_for_speed_p (gimple_bb (at));
3663
3664 *depends_on = NULL;
3665
3666 /* Only consider real candidates. */
3667 if (!cand->iv)
3668 return infinite_cost;
3669
3670 cbase = cand->iv->base;
3671 cstep = cand->iv->step;
3672 ctype = TREE_TYPE (cbase);
3673
3674 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3675 {
3676 /* We do not have a precision to express the values of use. */
3677 return infinite_cost;
3678 }
3679
3680 if (address_p)
3681 {
3682 /* Do not try to express address of an object with computation based
3683 on address of a different object. This may cause problems in rtl
3684 level alias analysis (that does not expect this to be happening,
3685 as this is illegal in C), and would be unlikely to be useful
3686 anyway. */
3687 if (use->iv->base_object
3688 && cand->iv->base_object
3689 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3690 return infinite_cost;
3691 }
3692
3693 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3694 {
3695 /* TODO -- add direct handling of this case. */
3696 goto fallback;
3697 }
3698
3699 /* CSTEPI is removed from the offset in case statement is after the
3700 increment. If the step is not constant, we use zero instead.
3701 This is a bit imprecise (there is the extra addition), but
3702 redundancy elimination is likely to transform the code so that
3703 it uses value of the variable before increment anyway,
3704 so it is not that much unrealistic. */
3705 if (cst_and_fits_in_hwi (cstep))
3706 cstepi = int_cst_value (cstep);
3707 else
3708 cstepi = 0;
3709
3710 if (!constant_multiple_of (ustep, cstep, &rat))
3711 return infinite_cost;
3712
3713 if (double_int_fits_in_shwi_p (rat))
3714 ratio = double_int_to_shwi (rat);
3715 else
3716 return infinite_cost;
3717
3718 STRIP_NOPS (cbase);
3719 ctype = TREE_TYPE (cbase);
3720
3721 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
3722 or ratio == 1, it is better to handle this like
3723
3724 ubase - ratio * cbase + ratio * var
3725
3726 (also holds in the case ratio == -1, TODO. */
3727
3728 if (cst_and_fits_in_hwi (cbase))
3729 {
3730 offset = - ratio * int_cst_value (cbase);
3731 cost = difference_cost (data,
3732 ubase, build_int_cst (utype, 0),
3733 &symbol_present, &var_present, &offset,
3734 depends_on);
3735 }
3736 else if (ratio == 1)
3737 {
3738 cost = difference_cost (data,
3739 ubase, cbase,
3740 &symbol_present, &var_present, &offset,
3741 depends_on);
3742 }
3743 else if (address_p
3744 && !POINTER_TYPE_P (ctype)
3745 && multiplier_allowed_in_address_p (ratio,
3746 TYPE_MODE (TREE_TYPE (utype))))
3747 {
3748 cbase
3749 = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
3750 cost = difference_cost (data,
3751 ubase, cbase,
3752 &symbol_present, &var_present, &offset,
3753 depends_on);
3754 }
3755 else
3756 {
3757 cost = force_var_cost (data, cbase, depends_on);
3758 cost.cost += add_cost (TYPE_MODE (ctype), data->speed);
3759 cost = add_costs (cost,
3760 difference_cost (data,
3761 ubase, build_int_cst (utype, 0),
3762 &symbol_present, &var_present,
3763 &offset, depends_on));
3764 }
3765
3766 /* If we are after the increment, the value of the candidate is higher by
3767 one iteration. */
3768 stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
3769 if (stmt_is_after_inc)
3770 offset -= ratio * cstepi;
3771
3772 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3773 (symbol/var1/const parts may be omitted). If we are looking for an
3774 address, find the cost of addressing this. */
3775 if (address_p)
3776 return add_costs (cost,
3777 get_address_cost (symbol_present, var_present,
3778 offset, ratio, cstepi,
3779 TYPE_MODE (TREE_TYPE (utype)),
3780 speed, stmt_is_after_inc,
3781 can_autoinc));
3782
3783 /* Otherwise estimate the costs for computing the expression. */
3784 if (!symbol_present && !var_present && !offset)
3785 {
3786 if (ratio != 1)
3787 cost.cost += multiply_by_cost (ratio, TYPE_MODE (ctype), speed);
3788 return cost;
3789 }
3790
3791 /* Symbol + offset should be compile-time computable so consider that they
3792 are added once to the variable, if present. */
3793 if (var_present && (symbol_present || offset))
3794 cost.cost += add_cost (TYPE_MODE (ctype), speed)
3795 / AVG_LOOP_NITER (data->current_loop);
3796
3797 /* Having offset does not affect runtime cost in case it is added to
3798 symbol, but it increases complexity. */
3799 if (offset)
3800 cost.complexity++;
3801
3802 cost.cost += add_cost (TYPE_MODE (ctype), speed);
3803
3804 aratio = ratio > 0 ? ratio : -ratio;
3805 if (aratio != 1)
3806 cost.cost += multiply_by_cost (aratio, TYPE_MODE (ctype), speed);
3807
3808 fallback:
3809 if (can_autoinc)
3810 *can_autoinc = false;
3811
3812 {
3813 /* Just get the expression, expand it and measure the cost. */
3814 tree comp = get_computation_at (data->current_loop, use, cand, at);
3815
3816 if (!comp)
3817 return infinite_cost;
3818
3819 if (address_p)
3820 comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
3821
3822 return new_cost (computation_cost (comp, speed), 0);
3823 }
3824 }
3825
3826 /* Determines the cost of the computation by that USE is expressed
3827 from induction variable CAND. If ADDRESS_P is true, we just need
3828 to create an address from it, otherwise we want to get it into
3829 register. A set of invariants we depend on is stored in
3830 DEPENDS_ON. If CAN_AUTOINC is nonnull, use it to record whether
3831 autoinc addressing is likely. */
3832
3833 static comp_cost
3834 get_computation_cost (struct ivopts_data *data,
3835 struct iv_use *use, struct iv_cand *cand,
3836 bool address_p, bitmap *depends_on, bool *can_autoinc)
3837 {
3838 return get_computation_cost_at (data,
3839 use, cand, address_p, depends_on, use->stmt,
3840 can_autoinc);
3841 }
3842
3843 /* Determines cost of basing replacement of USE on CAND in a generic
3844 expression. */
3845
3846 static bool
3847 determine_use_iv_cost_generic (struct ivopts_data *data,
3848 struct iv_use *use, struct iv_cand *cand)
3849 {
3850 bitmap depends_on;
3851 comp_cost cost;
3852
3853 /* The simple case first -- if we need to express value of the preserved
3854 original biv, the cost is 0. This also prevents us from counting the
3855 cost of increment twice -- once at this use and once in the cost of
3856 the candidate. */
3857 if (cand->pos == IP_ORIGINAL
3858 && cand->incremented_at == use->stmt)
3859 {
3860 set_use_iv_cost (data, use, cand, zero_cost, NULL, NULL_TREE);
3861 return true;
3862 }
3863
3864 cost = get_computation_cost (data, use, cand, false, &depends_on, NULL);
3865 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3866
3867 return !infinite_cost_p (cost);
3868 }
3869
3870 /* Determines cost of basing replacement of USE on CAND in an address. */
3871
3872 static bool
3873 determine_use_iv_cost_address (struct ivopts_data *data,
3874 struct iv_use *use, struct iv_cand *cand)
3875 {
3876 bitmap depends_on;
3877 bool can_autoinc;
3878 comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
3879 &can_autoinc);
3880
3881 if (cand->ainc_use == use)
3882 {
3883 if (can_autoinc)
3884 cost.cost -= cand->cost_step;
3885 /* If we generated the candidate solely for exploiting autoincrement
3886 opportunities, and it turns out it can't be used, set the cost to
3887 infinity to make sure we ignore it. */
3888 else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
3889 cost = infinite_cost;
3890 }
3891 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3892
3893 return !infinite_cost_p (cost);
3894 }
3895
3896 /* Computes value of candidate CAND at position AT in iteration NITER, and
3897 stores it to VAL. */
3898
3899 static void
3900 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
3901 aff_tree *val)
3902 {
3903 aff_tree step, delta, nit;
3904 struct iv *iv = cand->iv;
3905 tree type = TREE_TYPE (iv->base);
3906 tree steptype = type;
3907 if (POINTER_TYPE_P (type))
3908 steptype = sizetype;
3909
3910 tree_to_aff_combination (iv->step, steptype, &step);
3911 tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
3912 aff_combination_convert (&nit, steptype);
3913 aff_combination_mult (&nit, &step, &delta);
3914 if (stmt_after_increment (loop, cand, at))
3915 aff_combination_add (&delta, &step);
3916
3917 tree_to_aff_combination (iv->base, type, val);
3918 aff_combination_add (val, &delta);
3919 }
3920
3921 /* Returns period of induction variable iv. */
3922
3923 static tree
3924 iv_period (struct iv *iv)
3925 {
3926 tree step = iv->step, period, type;
3927 tree pow2div;
3928
3929 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
3930
3931 /* Period of the iv is gcd (step, type range). Since type range is power
3932 of two, it suffices to determine the maximum power of two that divides
3933 step. */
3934 pow2div = num_ending_zeros (step);
3935 type = unsigned_type_for (TREE_TYPE (step));
3936
3937 period = build_low_bits_mask (type,
3938 (TYPE_PRECISION (type)
3939 - tree_low_cst (pow2div, 1)));
3940
3941 return period;
3942 }
3943
3944 /* Returns the comparison operator used when eliminating the iv USE. */
3945
3946 static enum tree_code
3947 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
3948 {
3949 struct loop *loop = data->current_loop;
3950 basic_block ex_bb;
3951 edge exit;
3952
3953 ex_bb = gimple_bb (use->stmt);
3954 exit = EDGE_SUCC (ex_bb, 0);
3955 if (flow_bb_inside_loop_p (loop, exit->dest))
3956 exit = EDGE_SUCC (ex_bb, 1);
3957
3958 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
3959 }
3960
3961 /* Check whether it is possible to express the condition in USE by comparison
3962 of candidate CAND. If so, store the value compared with to BOUND. */
3963
3964 static bool
3965 may_eliminate_iv (struct ivopts_data *data,
3966 struct iv_use *use, struct iv_cand *cand, tree *bound)
3967 {
3968 basic_block ex_bb;
3969 edge exit;
3970 tree nit, period;
3971 struct loop *loop = data->current_loop;
3972 aff_tree bnd;
3973
3974 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
3975 return false;
3976
3977 /* For now works only for exits that dominate the loop latch.
3978 TODO: extend to other conditions inside loop body. */
3979 ex_bb = gimple_bb (use->stmt);
3980 if (use->stmt != last_stmt (ex_bb)
3981 || gimple_code (use->stmt) != GIMPLE_COND
3982 || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
3983 return false;
3984
3985 exit = EDGE_SUCC (ex_bb, 0);
3986 if (flow_bb_inside_loop_p (loop, exit->dest))
3987 exit = EDGE_SUCC (ex_bb, 1);
3988 if (flow_bb_inside_loop_p (loop, exit->dest))
3989 return false;
3990
3991 nit = niter_for_exit (data, exit);
3992 if (!nit)
3993 return false;
3994
3995 /* Determine whether we can use the variable to test the exit condition.
3996 This is the case iff the period of the induction variable is greater
3997 than the number of iterations for which the exit condition is true. */
3998 period = iv_period (cand->iv);
3999
4000 /* If the number of iterations is constant, compare against it directly. */
4001 if (TREE_CODE (nit) == INTEGER_CST)
4002 {
4003 if (!tree_int_cst_lt (nit, period))
4004 return false;
4005 }
4006
4007 /* If not, and if this is the only possible exit of the loop, see whether
4008 we can get a conservative estimate on the number of iterations of the
4009 entire loop and compare against that instead. */
4010 else if (loop_only_exit_p (loop, exit))
4011 {
4012 double_int period_value, max_niter;
4013 if (!estimated_loop_iterations (loop, true, &max_niter))
4014 return false;
4015 period_value = tree_to_double_int (period);
4016 if (double_int_ucmp (max_niter, period_value) >= 0)
4017 return false;
4018 }
4019
4020 /* Otherwise, punt. */
4021 else
4022 return false;
4023
4024 cand_value_at (loop, cand, use->stmt, nit, &bnd);
4025
4026 *bound = aff_combination_to_tree (&bnd);
4027 /* It is unlikely that computing the number of iterations using division
4028 would be more profitable than keeping the original induction variable. */
4029 if (expression_expensive_p (*bound))
4030 return false;
4031 return true;
4032 }
4033
4034 /* Determines cost of basing replacement of USE on CAND in a condition. */
4035
4036 static bool
4037 determine_use_iv_cost_condition (struct ivopts_data *data,
4038 struct iv_use *use, struct iv_cand *cand)
4039 {
4040 tree bound = NULL_TREE;
4041 struct iv *cmp_iv;
4042 bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
4043 comp_cost elim_cost, express_cost, cost;
4044 bool ok;
4045
4046 /* Only consider real candidates. */
4047 if (!cand->iv)
4048 {
4049 set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE);
4050 return false;
4051 }
4052
4053 /* Try iv elimination. */
4054 if (may_eliminate_iv (data, use, cand, &bound))
4055 {
4056 elim_cost = force_var_cost (data, bound, &depends_on_elim);
4057 /* The bound is a loop invariant, so it will be only computed
4058 once. */
4059 elim_cost.cost /= AVG_LOOP_NITER (data->current_loop);
4060 }
4061 else
4062 elim_cost = infinite_cost;
4063
4064 /* Try expressing the original giv. If it is compared with an invariant,
4065 note that we cannot get rid of it. */
4066 ok = extract_cond_operands (data, use->stmt, NULL, NULL, NULL, &cmp_iv);
4067 gcc_assert (ok);
4068
4069 express_cost = get_computation_cost (data, use, cand, false,
4070 &depends_on_express, NULL);
4071 fd_ivopts_data = data;
4072 walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
4073
4074 /* Choose the better approach, preferring the eliminated IV. */
4075 if (compare_costs (elim_cost, express_cost) <= 0)
4076 {
4077 cost = elim_cost;
4078 depends_on = depends_on_elim;
4079 depends_on_elim = NULL;
4080 }
4081 else
4082 {
4083 cost = express_cost;
4084 depends_on = depends_on_express;
4085 depends_on_express = NULL;
4086 bound = NULL_TREE;
4087 }
4088
4089 set_use_iv_cost (data, use, cand, cost, depends_on, bound);
4090
4091 if (depends_on_elim)
4092 BITMAP_FREE (depends_on_elim);
4093 if (depends_on_express)
4094 BITMAP_FREE (depends_on_express);
4095
4096 return !infinite_cost_p (cost);
4097 }
4098
4099 /* Determines cost of basing replacement of USE on CAND. Returns false
4100 if USE cannot be based on CAND. */
4101
4102 static bool
4103 determine_use_iv_cost (struct ivopts_data *data,
4104 struct iv_use *use, struct iv_cand *cand)
4105 {
4106 switch (use->type)
4107 {
4108 case USE_NONLINEAR_EXPR:
4109 return determine_use_iv_cost_generic (data, use, cand);
4110
4111 case USE_ADDRESS:
4112 return determine_use_iv_cost_address (data, use, cand);
4113
4114 case USE_COMPARE:
4115 return determine_use_iv_cost_condition (data, use, cand);
4116
4117 default:
4118 gcc_unreachable ();
4119 }
4120 }
4121
4122 /* Return true if get_computation_cost indicates that autoincrement is
4123 a possibility for the pair of USE and CAND, false otherwise. */
4124
4125 static bool
4126 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
4127 struct iv_cand *cand)
4128 {
4129 bitmap depends_on;
4130 bool can_autoinc;
4131 comp_cost cost;
4132
4133 if (use->type != USE_ADDRESS)
4134 return false;
4135
4136 cost = get_computation_cost (data, use, cand, true, &depends_on,
4137 &can_autoinc);
4138
4139 BITMAP_FREE (depends_on);
4140
4141 return !infinite_cost_p (cost) && can_autoinc;
4142 }
4143
4144 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
4145 use that allows autoincrement, and set their AINC_USE if possible. */
4146
4147 static void
4148 set_autoinc_for_original_candidates (struct ivopts_data *data)
4149 {
4150 unsigned i, j;
4151
4152 for (i = 0; i < n_iv_cands (data); i++)
4153 {
4154 struct iv_cand *cand = iv_cand (data, i);
4155 struct iv_use *closest = NULL;
4156 if (cand->pos != IP_ORIGINAL)
4157 continue;
4158 for (j = 0; j < n_iv_uses (data); j++)
4159 {
4160 struct iv_use *use = iv_use (data, j);
4161 unsigned uid = gimple_uid (use->stmt);
4162 if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at)
4163 || uid > gimple_uid (cand->incremented_at))
4164 continue;
4165 if (closest == NULL || uid > gimple_uid (closest->stmt))
4166 closest = use;
4167 }
4168 if (closest == NULL || !autoinc_possible_for_pair (data, closest, cand))
4169 continue;
4170 cand->ainc_use = closest;
4171 }
4172 }
4173
4174 /* Finds the candidates for the induction variables. */
4175
4176 static void
4177 find_iv_candidates (struct ivopts_data *data)
4178 {
4179 /* Add commonly used ivs. */
4180 add_standard_iv_candidates (data);
4181
4182 /* Add old induction variables. */
4183 add_old_ivs_candidates (data);
4184
4185 /* Add induction variables derived from uses. */
4186 add_derived_ivs_candidates (data);
4187
4188 set_autoinc_for_original_candidates (data);
4189
4190 /* Record the important candidates. */
4191 record_important_candidates (data);
4192 }
4193
4194 /* Determines costs of basing the use of the iv on an iv candidate. */
4195
4196 static void
4197 determine_use_iv_costs (struct ivopts_data *data)
4198 {
4199 unsigned i, j;
4200 struct iv_use *use;
4201 struct iv_cand *cand;
4202 bitmap to_clear = BITMAP_ALLOC (NULL);
4203
4204 alloc_use_cost_map (data);
4205
4206 for (i = 0; i < n_iv_uses (data); i++)
4207 {
4208 use = iv_use (data, i);
4209
4210 if (data->consider_all_candidates)
4211 {
4212 for (j = 0; j < n_iv_cands (data); j++)
4213 {
4214 cand = iv_cand (data, j);
4215 determine_use_iv_cost (data, use, cand);
4216 }
4217 }
4218 else
4219 {
4220 bitmap_iterator bi;
4221
4222 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
4223 {
4224 cand = iv_cand (data, j);
4225 if (!determine_use_iv_cost (data, use, cand))
4226 bitmap_set_bit (to_clear, j);
4227 }
4228
4229 /* Remove the candidates for that the cost is infinite from
4230 the list of related candidates. */
4231 bitmap_and_compl_into (use->related_cands, to_clear);
4232 bitmap_clear (to_clear);
4233 }
4234 }
4235
4236 BITMAP_FREE (to_clear);
4237
4238 if (dump_file && (dump_flags & TDF_DETAILS))
4239 {
4240 fprintf (dump_file, "Use-candidate costs:\n");
4241
4242 for (i = 0; i < n_iv_uses (data); i++)
4243 {
4244 use = iv_use (data, i);
4245
4246 fprintf (dump_file, "Use %d:\n", i);
4247 fprintf (dump_file, " cand\tcost\tcompl.\tdepends on\n");
4248 for (j = 0; j < use->n_map_members; j++)
4249 {
4250 if (!use->cost_map[j].cand
4251 || infinite_cost_p (use->cost_map[j].cost))
4252 continue;
4253
4254 fprintf (dump_file, " %d\t%d\t%d\t",
4255 use->cost_map[j].cand->id,
4256 use->cost_map[j].cost.cost,
4257 use->cost_map[j].cost.complexity);
4258 if (use->cost_map[j].depends_on)
4259 bitmap_print (dump_file,
4260 use->cost_map[j].depends_on, "","");
4261 fprintf (dump_file, "\n");
4262 }
4263
4264 fprintf (dump_file, "\n");
4265 }
4266 fprintf (dump_file, "\n");
4267 }
4268 }
4269
4270 /* Determines cost of the candidate CAND. */
4271
4272 static void
4273 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
4274 {
4275 comp_cost cost_base;
4276 unsigned cost, cost_step;
4277 tree base;
4278
4279 if (!cand->iv)
4280 {
4281 cand->cost = 0;
4282 return;
4283 }
4284
4285 /* There are two costs associated with the candidate -- its increment
4286 and its initialization. The second is almost negligible for any loop
4287 that rolls enough, so we take it just very little into account. */
4288
4289 base = cand->iv->base;
4290 cost_base = force_var_cost (data, base, NULL);
4291 cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)), data->speed);
4292
4293 cost = cost_step + cost_base.cost / AVG_LOOP_NITER (current_loop);
4294
4295 /* Prefer the original ivs unless we may gain something by replacing it.
4296 The reason is to make debugging simpler; so this is not relevant for
4297 artificial ivs created by other optimization passes. */
4298 if (cand->pos != IP_ORIGINAL
4299 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
4300 cost++;
4301
4302 /* Prefer not to insert statements into latch unless there are some
4303 already (so that we do not create unnecessary jumps). */
4304 if (cand->pos == IP_END
4305 && empty_block_p (ip_end_pos (data->current_loop)))
4306 cost++;
4307
4308 cand->cost = cost;
4309 cand->cost_step = cost_step;
4310 }
4311
4312 /* Determines costs of computation of the candidates. */
4313
4314 static void
4315 determine_iv_costs (struct ivopts_data *data)
4316 {
4317 unsigned i;
4318
4319 if (dump_file && (dump_flags & TDF_DETAILS))
4320 {
4321 fprintf (dump_file, "Candidate costs:\n");
4322 fprintf (dump_file, " cand\tcost\n");
4323 }
4324
4325 for (i = 0; i < n_iv_cands (data); i++)
4326 {
4327 struct iv_cand *cand = iv_cand (data, i);
4328
4329 determine_iv_cost (data, cand);
4330
4331 if (dump_file && (dump_flags & TDF_DETAILS))
4332 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
4333 }
4334
4335 if (dump_file && (dump_flags & TDF_DETAILS))
4336 fprintf (dump_file, "\n");
4337 }
4338
4339 /* Calculates cost for having SIZE induction variables. */
4340
4341 static unsigned
4342 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
4343 {
4344 /* We add size to the cost, so that we prefer eliminating ivs
4345 if possible. */
4346 return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed);
4347 }
4348
4349 /* For each size of the induction variable set determine the penalty. */
4350
4351 static void
4352 determine_set_costs (struct ivopts_data *data)
4353 {
4354 unsigned j, n;
4355 gimple phi;
4356 gimple_stmt_iterator psi;
4357 tree op;
4358 struct loop *loop = data->current_loop;
4359 bitmap_iterator bi;
4360
4361 /* We use the following model (definitely improvable, especially the
4362 cost function -- TODO):
4363
4364 We estimate the number of registers available (using MD data), name it A.
4365
4366 We estimate the number of registers used by the loop, name it U. This
4367 number is obtained as the number of loop phi nodes (not counting virtual
4368 registers and bivs) + the number of variables from outside of the loop.
4369
4370 We set a reserve R (free regs that are used for temporary computations,
4371 etc.). For now the reserve is a constant 3.
4372
4373 Let I be the number of induction variables.
4374
4375 -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
4376 make a lot of ivs without a reason).
4377 -- if A - R < U + I <= A, the cost is I * PRES_COST
4378 -- if U + I > A, the cost is I * PRES_COST and
4379 number of uses * SPILL_COST * (U + I - A) / (U + I) is added. */
4380
4381 if (dump_file && (dump_flags & TDF_DETAILS))
4382 {
4383 fprintf (dump_file, "Global costs:\n");
4384 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
4385 fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost[data->speed]);
4386 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost[data->speed]);
4387 }
4388
4389 n = 0;
4390 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
4391 {
4392 phi = gsi_stmt (psi);
4393 op = PHI_RESULT (phi);
4394
4395 if (!is_gimple_reg (op))
4396 continue;
4397
4398 if (get_iv (data, op))
4399 continue;
4400
4401 n++;
4402 }
4403
4404 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
4405 {
4406 struct version_info *info = ver_info (data, j);
4407
4408 if (info->inv_id && info->has_nonlin_use)
4409 n++;
4410 }
4411
4412 data->regs_used = n;
4413 if (dump_file && (dump_flags & TDF_DETAILS))
4414 fprintf (dump_file, " regs_used %d\n", n);
4415
4416 if (dump_file && (dump_flags & TDF_DETAILS))
4417 {
4418 fprintf (dump_file, " cost for size:\n");
4419 fprintf (dump_file, " ivs\tcost\n");
4420 for (j = 0; j <= 2 * target_avail_regs; j++)
4421 fprintf (dump_file, " %d\t%d\n", j,
4422 ivopts_global_cost_for_size (data, j));
4423 fprintf (dump_file, "\n");
4424 }
4425 }
4426
4427 /* Returns true if A is a cheaper cost pair than B. */
4428
4429 static bool
4430 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
4431 {
4432 int cmp;
4433
4434 if (!a)
4435 return false;
4436
4437 if (!b)
4438 return true;
4439
4440 cmp = compare_costs (a->cost, b->cost);
4441 if (cmp < 0)
4442 return true;
4443
4444 if (cmp > 0)
4445 return false;
4446
4447 /* In case the costs are the same, prefer the cheaper candidate. */
4448 if (a->cand->cost < b->cand->cost)
4449 return true;
4450
4451 return false;
4452 }
4453
4454 /* Computes the cost field of IVS structure. */
4455
4456 static void
4457 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
4458 {
4459 comp_cost cost = ivs->cand_use_cost;
4460 cost.cost += ivs->cand_cost;
4461 cost.cost += ivopts_global_cost_for_size (data, ivs->n_regs);
4462
4463 ivs->cost = cost;
4464 }
4465
4466 /* Remove invariants in set INVS to set IVS. */
4467
4468 static void
4469 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
4470 {
4471 bitmap_iterator bi;
4472 unsigned iid;
4473
4474 if (!invs)
4475 return;
4476
4477 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4478 {
4479 ivs->n_invariant_uses[iid]--;
4480 if (ivs->n_invariant_uses[iid] == 0)
4481 ivs->n_regs--;
4482 }
4483 }
4484
4485 /* Set USE not to be expressed by any candidate in IVS. */
4486
4487 static void
4488 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
4489 struct iv_use *use)
4490 {
4491 unsigned uid = use->id, cid;
4492 struct cost_pair *cp;
4493
4494 cp = ivs->cand_for_use[uid];
4495 if (!cp)
4496 return;
4497 cid = cp->cand->id;
4498
4499 ivs->bad_uses++;
4500 ivs->cand_for_use[uid] = NULL;
4501 ivs->n_cand_uses[cid]--;
4502
4503 if (ivs->n_cand_uses[cid] == 0)
4504 {
4505 bitmap_clear_bit (ivs->cands, cid);
4506 /* Do not count the pseudocandidates. */
4507 if (cp->cand->iv)
4508 ivs->n_regs--;
4509 ivs->n_cands--;
4510 ivs->cand_cost -= cp->cand->cost;
4511
4512 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
4513 }
4514
4515 ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
4516
4517 iv_ca_set_remove_invariants (ivs, cp->depends_on);
4518 iv_ca_recount_cost (data, ivs);
4519 }
4520
4521 /* Add invariants in set INVS to set IVS. */
4522
4523 static void
4524 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
4525 {
4526 bitmap_iterator bi;
4527 unsigned iid;
4528
4529 if (!invs)
4530 return;
4531
4532 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4533 {
4534 ivs->n_invariant_uses[iid]++;
4535 if (ivs->n_invariant_uses[iid] == 1)
4536 ivs->n_regs++;
4537 }
4538 }
4539
4540 /* Set cost pair for USE in set IVS to CP. */
4541
4542 static void
4543 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
4544 struct iv_use *use, struct cost_pair *cp)
4545 {
4546 unsigned uid = use->id, cid;
4547
4548 if (ivs->cand_for_use[uid] == cp)
4549 return;
4550
4551 if (ivs->cand_for_use[uid])
4552 iv_ca_set_no_cp (data, ivs, use);
4553
4554 if (cp)
4555 {
4556 cid = cp->cand->id;
4557
4558 ivs->bad_uses--;
4559 ivs->cand_for_use[uid] = cp;
4560 ivs->n_cand_uses[cid]++;
4561 if (ivs->n_cand_uses[cid] == 1)
4562 {
4563 bitmap_set_bit (ivs->cands, cid);
4564 /* Do not count the pseudocandidates. */
4565 if (cp->cand->iv)
4566 ivs->n_regs++;
4567 ivs->n_cands++;
4568 ivs->cand_cost += cp->cand->cost;
4569
4570 iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
4571 }
4572
4573 ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
4574 iv_ca_set_add_invariants (ivs, cp->depends_on);
4575 iv_ca_recount_cost (data, ivs);
4576 }
4577 }
4578
4579 /* Extend set IVS by expressing USE by some of the candidates in it
4580 if possible. */
4581
4582 static void
4583 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
4584 struct iv_use *use)
4585 {
4586 struct cost_pair *best_cp = NULL, *cp;
4587 bitmap_iterator bi;
4588 unsigned i;
4589
4590 gcc_assert (ivs->upto >= use->id);
4591
4592 if (ivs->upto == use->id)
4593 {
4594 ivs->upto++;
4595 ivs->bad_uses++;
4596 }
4597
4598 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4599 {
4600 cp = get_use_iv_cost (data, use, iv_cand (data, i));
4601
4602 if (cheaper_cost_pair (cp, best_cp))
4603 best_cp = cp;
4604 }
4605
4606 iv_ca_set_cp (data, ivs, use, best_cp);
4607 }
4608
4609 /* Get cost for assignment IVS. */
4610
4611 static comp_cost
4612 iv_ca_cost (struct iv_ca *ivs)
4613 {
4614 /* This was a conditional expression but it triggered a bug in
4615 Sun C 5.5. */
4616 if (ivs->bad_uses)
4617 return infinite_cost;
4618 else
4619 return ivs->cost;
4620 }
4621
4622 /* Returns true if all dependences of CP are among invariants in IVS. */
4623
4624 static bool
4625 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
4626 {
4627 unsigned i;
4628 bitmap_iterator bi;
4629
4630 if (!cp->depends_on)
4631 return true;
4632
4633 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
4634 {
4635 if (ivs->n_invariant_uses[i] == 0)
4636 return false;
4637 }
4638
4639 return true;
4640 }
4641
4642 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4643 it before NEXT_CHANGE. */
4644
4645 static struct iv_ca_delta *
4646 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
4647 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
4648 {
4649 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
4650
4651 change->use = use;
4652 change->old_cp = old_cp;
4653 change->new_cp = new_cp;
4654 change->next_change = next_change;
4655
4656 return change;
4657 }
4658
4659 /* Joins two lists of changes L1 and L2. Destructive -- old lists
4660 are rewritten. */
4661
4662 static struct iv_ca_delta *
4663 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
4664 {
4665 struct iv_ca_delta *last;
4666
4667 if (!l2)
4668 return l1;
4669
4670 if (!l1)
4671 return l2;
4672
4673 for (last = l1; last->next_change; last = last->next_change)
4674 continue;
4675 last->next_change = l2;
4676
4677 return l1;
4678 }
4679
4680 /* Returns candidate by that USE is expressed in IVS. */
4681
4682 static struct cost_pair *
4683 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4684 {
4685 return ivs->cand_for_use[use->id];
4686 }
4687
4688 /* Reverse the list of changes DELTA, forming the inverse to it. */
4689
4690 static struct iv_ca_delta *
4691 iv_ca_delta_reverse (struct iv_ca_delta *delta)
4692 {
4693 struct iv_ca_delta *act, *next, *prev = NULL;
4694 struct cost_pair *tmp;
4695
4696 for (act = delta; act; act = next)
4697 {
4698 next = act->next_change;
4699 act->next_change = prev;
4700 prev = act;
4701
4702 tmp = act->old_cp;
4703 act->old_cp = act->new_cp;
4704 act->new_cp = tmp;
4705 }
4706
4707 return prev;
4708 }
4709
4710 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
4711 reverted instead. */
4712
4713 static void
4714 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
4715 struct iv_ca_delta *delta, bool forward)
4716 {
4717 struct cost_pair *from, *to;
4718 struct iv_ca_delta *act;
4719
4720 if (!forward)
4721 delta = iv_ca_delta_reverse (delta);
4722
4723 for (act = delta; act; act = act->next_change)
4724 {
4725 from = act->old_cp;
4726 to = act->new_cp;
4727 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
4728 iv_ca_set_cp (data, ivs, act->use, to);
4729 }
4730
4731 if (!forward)
4732 iv_ca_delta_reverse (delta);
4733 }
4734
4735 /* Returns true if CAND is used in IVS. */
4736
4737 static bool
4738 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
4739 {
4740 return ivs->n_cand_uses[cand->id] > 0;
4741 }
4742
4743 /* Returns number of induction variable candidates in the set IVS. */
4744
4745 static unsigned
4746 iv_ca_n_cands (struct iv_ca *ivs)
4747 {
4748 return ivs->n_cands;
4749 }
4750
4751 /* Free the list of changes DELTA. */
4752
4753 static void
4754 iv_ca_delta_free (struct iv_ca_delta **delta)
4755 {
4756 struct iv_ca_delta *act, *next;
4757
4758 for (act = *delta; act; act = next)
4759 {
4760 next = act->next_change;
4761 free (act);
4762 }
4763
4764 *delta = NULL;
4765 }
4766
4767 /* Allocates new iv candidates assignment. */
4768
4769 static struct iv_ca *
4770 iv_ca_new (struct ivopts_data *data)
4771 {
4772 struct iv_ca *nw = XNEW (struct iv_ca);
4773
4774 nw->upto = 0;
4775 nw->bad_uses = 0;
4776 nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
4777 nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
4778 nw->cands = BITMAP_ALLOC (NULL);
4779 nw->n_cands = 0;
4780 nw->n_regs = 0;
4781 nw->cand_use_cost = zero_cost;
4782 nw->cand_cost = 0;
4783 nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
4784 nw->cost = zero_cost;
4785
4786 return nw;
4787 }
4788
4789 /* Free memory occupied by the set IVS. */
4790
4791 static void
4792 iv_ca_free (struct iv_ca **ivs)
4793 {
4794 free ((*ivs)->cand_for_use);
4795 free ((*ivs)->n_cand_uses);
4796 BITMAP_FREE ((*ivs)->cands);
4797 free ((*ivs)->n_invariant_uses);
4798 free (*ivs);
4799 *ivs = NULL;
4800 }
4801
4802 /* Dumps IVS to FILE. */
4803
4804 static void
4805 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
4806 {
4807 const char *pref = " invariants ";
4808 unsigned i;
4809 comp_cost cost = iv_ca_cost (ivs);
4810
4811 fprintf (file, " cost %d (complexity %d)\n", cost.cost, cost.complexity);
4812 bitmap_print (file, ivs->cands, " candidates ","\n");
4813
4814 for (i = 1; i <= data->max_inv_id; i++)
4815 if (ivs->n_invariant_uses[i])
4816 {
4817 fprintf (file, "%s%d", pref, i);
4818 pref = ", ";
4819 }
4820 fprintf (file, "\n");
4821 }
4822
4823 /* Try changing candidate in IVS to CAND for each use. Return cost of the
4824 new set, and store differences in DELTA. Number of induction variables
4825 in the new set is stored to N_IVS. */
4826
4827 static comp_cost
4828 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
4829 struct iv_cand *cand, struct iv_ca_delta **delta,
4830 unsigned *n_ivs)
4831 {
4832 unsigned i;
4833 comp_cost cost;
4834 struct iv_use *use;
4835 struct cost_pair *old_cp, *new_cp;
4836
4837 *delta = NULL;
4838 for (i = 0; i < ivs->upto; i++)
4839 {
4840 use = iv_use (data, i);
4841 old_cp = iv_ca_cand_for_use (ivs, use);
4842
4843 if (old_cp
4844 && old_cp->cand == cand)
4845 continue;
4846
4847 new_cp = get_use_iv_cost (data, use, cand);
4848 if (!new_cp)
4849 continue;
4850
4851 if (!iv_ca_has_deps (ivs, new_cp))
4852 continue;
4853
4854 if (!cheaper_cost_pair (new_cp, old_cp))
4855 continue;
4856
4857 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4858 }
4859
4860 iv_ca_delta_commit (data, ivs, *delta, true);
4861 cost = iv_ca_cost (ivs);
4862 if (n_ivs)
4863 *n_ivs = iv_ca_n_cands (ivs);
4864 iv_ca_delta_commit (data, ivs, *delta, false);
4865
4866 return cost;
4867 }
4868
4869 /* Try narrowing set IVS by removing CAND. Return the cost of
4870 the new set and store the differences in DELTA. */
4871
4872 static comp_cost
4873 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
4874 struct iv_cand *cand, struct iv_ca_delta **delta)
4875 {
4876 unsigned i, ci;
4877 struct iv_use *use;
4878 struct cost_pair *old_cp, *new_cp, *cp;
4879 bitmap_iterator bi;
4880 struct iv_cand *cnd;
4881 comp_cost cost;
4882
4883 *delta = NULL;
4884 for (i = 0; i < n_iv_uses (data); i++)
4885 {
4886 use = iv_use (data, i);
4887
4888 old_cp = iv_ca_cand_for_use (ivs, use);
4889 if (old_cp->cand != cand)
4890 continue;
4891
4892 new_cp = NULL;
4893
4894 if (data->consider_all_candidates)
4895 {
4896 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
4897 {
4898 if (ci == cand->id)
4899 continue;
4900
4901 cnd = iv_cand (data, ci);
4902
4903 cp = get_use_iv_cost (data, use, cnd);
4904 if (!cp)
4905 continue;
4906 if (!iv_ca_has_deps (ivs, cp))
4907 continue;
4908
4909 if (!cheaper_cost_pair (cp, new_cp))
4910 continue;
4911
4912 new_cp = cp;
4913 }
4914 }
4915 else
4916 {
4917 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
4918 {
4919 if (ci == cand->id)
4920 continue;
4921
4922 cnd = iv_cand (data, ci);
4923
4924 cp = get_use_iv_cost (data, use, cnd);
4925 if (!cp)
4926 continue;
4927 if (!iv_ca_has_deps (ivs, cp))
4928 continue;
4929
4930 if (!cheaper_cost_pair (cp, new_cp))
4931 continue;
4932
4933 new_cp = cp;
4934 }
4935 }
4936
4937 if (!new_cp)
4938 {
4939 iv_ca_delta_free (delta);
4940 return infinite_cost;
4941 }
4942
4943 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4944 }
4945
4946 iv_ca_delta_commit (data, ivs, *delta, true);
4947 cost = iv_ca_cost (ivs);
4948 iv_ca_delta_commit (data, ivs, *delta, false);
4949
4950 return cost;
4951 }
4952
4953 /* Try optimizing the set of candidates IVS by removing candidates different
4954 from to EXCEPT_CAND from it. Return cost of the new set, and store
4955 differences in DELTA. */
4956
4957 static comp_cost
4958 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
4959 struct iv_cand *except_cand, struct iv_ca_delta **delta)
4960 {
4961 bitmap_iterator bi;
4962 struct iv_ca_delta *act_delta, *best_delta;
4963 unsigned i;
4964 comp_cost best_cost, acost;
4965 struct iv_cand *cand;
4966
4967 best_delta = NULL;
4968 best_cost = iv_ca_cost (ivs);
4969
4970 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4971 {
4972 cand = iv_cand (data, i);
4973
4974 if (cand == except_cand)
4975 continue;
4976
4977 acost = iv_ca_narrow (data, ivs, cand, &act_delta);
4978
4979 if (compare_costs (acost, best_cost) < 0)
4980 {
4981 best_cost = acost;
4982 iv_ca_delta_free (&best_delta);
4983 best_delta = act_delta;
4984 }
4985 else
4986 iv_ca_delta_free (&act_delta);
4987 }
4988
4989 if (!best_delta)
4990 {
4991 *delta = NULL;
4992 return best_cost;
4993 }
4994
4995 /* Recurse to possibly remove other unnecessary ivs. */
4996 iv_ca_delta_commit (data, ivs, best_delta, true);
4997 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
4998 iv_ca_delta_commit (data, ivs, best_delta, false);
4999 *delta = iv_ca_delta_join (best_delta, *delta);
5000 return best_cost;
5001 }
5002
5003 /* Tries to extend the sets IVS in the best possible way in order
5004 to express the USE. */
5005
5006 static bool
5007 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
5008 struct iv_use *use)
5009 {
5010 comp_cost best_cost, act_cost;
5011 unsigned i;
5012 bitmap_iterator bi;
5013 struct iv_cand *cand;
5014 struct iv_ca_delta *best_delta = NULL, *act_delta;
5015 struct cost_pair *cp;
5016
5017 iv_ca_add_use (data, ivs, use);
5018 best_cost = iv_ca_cost (ivs);
5019
5020 cp = iv_ca_cand_for_use (ivs, use);
5021 if (cp)
5022 {
5023 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
5024 iv_ca_set_no_cp (data, ivs, use);
5025 }
5026
5027 /* First try important candidates not based on any memory object. Only if
5028 this fails, try the specific ones. Rationale -- in loops with many
5029 variables the best choice often is to use just one generic biv. If we
5030 added here many ivs specific to the uses, the optimization algorithm later
5031 would be likely to get stuck in a local minimum, thus causing us to create
5032 too many ivs. The approach from few ivs to more seems more likely to be
5033 successful -- starting from few ivs, replacing an expensive use by a
5034 specific iv should always be a win. */
5035 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5036 {
5037 cand = iv_cand (data, i);
5038
5039 if (cand->iv->base_object != NULL_TREE)
5040 continue;
5041
5042 if (iv_ca_cand_used_p (ivs, cand))
5043 continue;
5044
5045 cp = get_use_iv_cost (data, use, cand);
5046 if (!cp)
5047 continue;
5048
5049 iv_ca_set_cp (data, ivs, use, cp);
5050 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
5051 iv_ca_set_no_cp (data, ivs, use);
5052 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
5053
5054 if (compare_costs (act_cost, best_cost) < 0)
5055 {
5056 best_cost = act_cost;
5057
5058 iv_ca_delta_free (&best_delta);
5059 best_delta = act_delta;
5060 }
5061 else
5062 iv_ca_delta_free (&act_delta);
5063 }
5064
5065 if (infinite_cost_p (best_cost))
5066 {
5067 for (i = 0; i < use->n_map_members; i++)
5068 {
5069 cp = use->cost_map + i;
5070 cand = cp->cand;
5071 if (!cand)
5072 continue;
5073
5074 /* Already tried this. */
5075 if (cand->important && cand->iv->base_object == NULL_TREE)
5076 continue;
5077
5078 if (iv_ca_cand_used_p (ivs, cand))
5079 continue;
5080
5081 act_delta = NULL;
5082 iv_ca_set_cp (data, ivs, use, cp);
5083 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
5084 iv_ca_set_no_cp (data, ivs, use);
5085 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
5086 cp, act_delta);
5087
5088 if (compare_costs (act_cost, best_cost) < 0)
5089 {
5090 best_cost = act_cost;
5091
5092 if (best_delta)
5093 iv_ca_delta_free (&best_delta);
5094 best_delta = act_delta;
5095 }
5096 else
5097 iv_ca_delta_free (&act_delta);
5098 }
5099 }
5100
5101 iv_ca_delta_commit (data, ivs, best_delta, true);
5102 iv_ca_delta_free (&best_delta);
5103
5104 return !infinite_cost_p (best_cost);
5105 }
5106
5107 /* Finds an initial assignment of candidates to uses. */
5108
5109 static struct iv_ca *
5110 get_initial_solution (struct ivopts_data *data)
5111 {
5112 struct iv_ca *ivs = iv_ca_new (data);
5113 unsigned i;
5114
5115 for (i = 0; i < n_iv_uses (data); i++)
5116 if (!try_add_cand_for (data, ivs, iv_use (data, i)))
5117 {
5118 iv_ca_free (&ivs);
5119 return NULL;
5120 }
5121
5122 return ivs;
5123 }
5124
5125 /* Tries to improve set of induction variables IVS. */
5126
5127 static bool
5128 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
5129 {
5130 unsigned i, n_ivs;
5131 comp_cost acost, best_cost = iv_ca_cost (ivs);
5132 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
5133 struct iv_cand *cand;
5134
5135 /* Try extending the set of induction variables by one. */
5136 for (i = 0; i < n_iv_cands (data); i++)
5137 {
5138 cand = iv_cand (data, i);
5139
5140 if (iv_ca_cand_used_p (ivs, cand))
5141 continue;
5142
5143 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs);
5144 if (!act_delta)
5145 continue;
5146
5147 /* If we successfully added the candidate and the set is small enough,
5148 try optimizing it by removing other candidates. */
5149 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
5150 {
5151 iv_ca_delta_commit (data, ivs, act_delta, true);
5152 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
5153 iv_ca_delta_commit (data, ivs, act_delta, false);
5154 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
5155 }
5156
5157 if (compare_costs (acost, best_cost) < 0)
5158 {
5159 best_cost = acost;
5160 iv_ca_delta_free (&best_delta);
5161 best_delta = act_delta;
5162 }
5163 else
5164 iv_ca_delta_free (&act_delta);
5165 }
5166
5167 if (!best_delta)
5168 {
5169 /* Try removing the candidates from the set instead. */
5170 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
5171
5172 /* Nothing more we can do. */
5173 if (!best_delta)
5174 return false;
5175 }
5176
5177 iv_ca_delta_commit (data, ivs, best_delta, true);
5178 gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
5179 iv_ca_delta_free (&best_delta);
5180 return true;
5181 }
5182
5183 /* Attempts to find the optimal set of induction variables. We do simple
5184 greedy heuristic -- we try to replace at most one candidate in the selected
5185 solution and remove the unused ivs while this improves the cost. */
5186
5187 static struct iv_ca *
5188 find_optimal_iv_set (struct ivopts_data *data)
5189 {
5190 unsigned i;
5191 struct iv_ca *set;
5192 struct iv_use *use;
5193
5194 /* Get the initial solution. */
5195 set = get_initial_solution (data);
5196 if (!set)
5197 {
5198 if (dump_file && (dump_flags & TDF_DETAILS))
5199 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
5200 return NULL;
5201 }
5202
5203 if (dump_file && (dump_flags & TDF_DETAILS))
5204 {
5205 fprintf (dump_file, "Initial set of candidates:\n");
5206 iv_ca_dump (data, dump_file, set);
5207 }
5208
5209 while (try_improve_iv_set (data, set))
5210 {
5211 if (dump_file && (dump_flags & TDF_DETAILS))
5212 {
5213 fprintf (dump_file, "Improved to:\n");
5214 iv_ca_dump (data, dump_file, set);
5215 }
5216 }
5217
5218 if (dump_file && (dump_flags & TDF_DETAILS))
5219 {
5220 comp_cost cost = iv_ca_cost (set);
5221 fprintf (dump_file, "Final cost %d (complexity %d)\n\n", cost.cost, cost.complexity);
5222 }
5223
5224 for (i = 0; i < n_iv_uses (data); i++)
5225 {
5226 use = iv_use (data, i);
5227 use->selected = iv_ca_cand_for_use (set, use)->cand;
5228 }
5229
5230 return set;
5231 }
5232
5233 /* Creates a new induction variable corresponding to CAND. */
5234
5235 static void
5236 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
5237 {
5238 gimple_stmt_iterator incr_pos;
5239 tree base;
5240 bool after = false;
5241
5242 if (!cand->iv)
5243 return;
5244
5245 switch (cand->pos)
5246 {
5247 case IP_NORMAL:
5248 incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
5249 break;
5250
5251 case IP_END:
5252 incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
5253 after = true;
5254 break;
5255
5256 case IP_AFTER_USE:
5257 after = true;
5258 /* fall through */
5259 case IP_BEFORE_USE:
5260 incr_pos = gsi_for_stmt (cand->incremented_at);
5261 break;
5262
5263 case IP_ORIGINAL:
5264 /* Mark that the iv is preserved. */
5265 name_info (data, cand->var_before)->preserve_biv = true;
5266 name_info (data, cand->var_after)->preserve_biv = true;
5267
5268 /* Rewrite the increment so that it uses var_before directly. */
5269 find_interesting_uses_op (data, cand->var_after)->selected = cand;
5270
5271 return;
5272 }
5273
5274 gimple_add_tmp_var (cand->var_before);
5275 add_referenced_var (cand->var_before);
5276
5277 base = unshare_expr (cand->iv->base);
5278
5279 create_iv (base, unshare_expr (cand->iv->step),
5280 cand->var_before, data->current_loop,
5281 &incr_pos, after, &cand->var_before, &cand->var_after);
5282 }
5283
5284 /* Creates new induction variables described in SET. */
5285
5286 static void
5287 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
5288 {
5289 unsigned i;
5290 struct iv_cand *cand;
5291 bitmap_iterator bi;
5292
5293 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
5294 {
5295 cand = iv_cand (data, i);
5296 create_new_iv (data, cand);
5297 }
5298 }
5299
5300
5301 /* Rewrites USE (definition of iv used in a nonlinear expression)
5302 using candidate CAND. */
5303
5304 static void
5305 rewrite_use_nonlinear_expr (struct ivopts_data *data,
5306 struct iv_use *use, struct iv_cand *cand)
5307 {
5308 tree comp;
5309 tree op, tgt;
5310 gimple ass;
5311 gimple_stmt_iterator bsi;
5312
5313 /* An important special case -- if we are asked to express value of
5314 the original iv by itself, just exit; there is no need to
5315 introduce a new computation (that might also need casting the
5316 variable to unsigned and back). */
5317 if (cand->pos == IP_ORIGINAL
5318 && cand->incremented_at == use->stmt)
5319 {
5320 tree step, ctype, utype;
5321 enum tree_code incr_code = PLUS_EXPR, old_code;
5322
5323 gcc_assert (is_gimple_assign (use->stmt));
5324 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
5325
5326 step = cand->iv->step;
5327 ctype = TREE_TYPE (step);
5328 utype = TREE_TYPE (cand->var_after);
5329 if (TREE_CODE (step) == NEGATE_EXPR)
5330 {
5331 incr_code = MINUS_EXPR;
5332 step = TREE_OPERAND (step, 0);
5333 }
5334
5335 /* Check whether we may leave the computation unchanged.
5336 This is the case only if it does not rely on other
5337 computations in the loop -- otherwise, the computation
5338 we rely upon may be removed in remove_unused_ivs,
5339 thus leading to ICE. */
5340 old_code = gimple_assign_rhs_code (use->stmt);
5341 if (old_code == PLUS_EXPR
5342 || old_code == MINUS_EXPR
5343 || old_code == POINTER_PLUS_EXPR)
5344 {
5345 if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
5346 op = gimple_assign_rhs2 (use->stmt);
5347 else if (old_code != MINUS_EXPR
5348 && gimple_assign_rhs2 (use->stmt) == cand->var_before)
5349 op = gimple_assign_rhs1 (use->stmt);
5350 else
5351 op = NULL_TREE;
5352 }
5353 else
5354 op = NULL_TREE;
5355
5356 if (op
5357 && (TREE_CODE (op) == INTEGER_CST
5358 || operand_equal_p (op, step, 0)))
5359 return;
5360
5361 /* Otherwise, add the necessary computations to express
5362 the iv. */
5363 op = fold_convert (ctype, cand->var_before);
5364 comp = fold_convert (utype,
5365 build2 (incr_code, ctype, op,
5366 unshare_expr (step)));
5367 }
5368 else
5369 {
5370 comp = get_computation (data->current_loop, use, cand);
5371 gcc_assert (comp != NULL_TREE);
5372 }
5373
5374 switch (gimple_code (use->stmt))
5375 {
5376 case GIMPLE_PHI:
5377 tgt = PHI_RESULT (use->stmt);
5378
5379 /* If we should keep the biv, do not replace it. */
5380 if (name_info (data, tgt)->preserve_biv)
5381 return;
5382
5383 bsi = gsi_after_labels (gimple_bb (use->stmt));
5384 break;
5385
5386 case GIMPLE_ASSIGN:
5387 tgt = gimple_assign_lhs (use->stmt);
5388 bsi = gsi_for_stmt (use->stmt);
5389 break;
5390
5391 default:
5392 gcc_unreachable ();
5393 }
5394
5395 op = force_gimple_operand_gsi (&bsi, comp, false, SSA_NAME_VAR (tgt),
5396 true, GSI_SAME_STMT);
5397
5398 if (gimple_code (use->stmt) == GIMPLE_PHI)
5399 {
5400 ass = gimple_build_assign (tgt, op);
5401 gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
5402
5403 bsi = gsi_for_stmt (use->stmt);
5404 remove_phi_node (&bsi, false);
5405 }
5406 else
5407 {
5408 gimple_assign_set_rhs_from_tree (&bsi, op);
5409 use->stmt = gsi_stmt (bsi);
5410 }
5411 }
5412
5413 /* Replaces ssa name in index IDX by its basic variable. Callback for
5414 for_each_index. */
5415
5416 static bool
5417 idx_remove_ssa_names (tree base, tree *idx,
5418 void *data ATTRIBUTE_UNUSED)
5419 {
5420 tree *op;
5421
5422 if (TREE_CODE (*idx) == SSA_NAME)
5423 *idx = SSA_NAME_VAR (*idx);
5424
5425 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
5426 {
5427 op = &TREE_OPERAND (base, 2);
5428 if (*op
5429 && TREE_CODE (*op) == SSA_NAME)
5430 *op = SSA_NAME_VAR (*op);
5431 op = &TREE_OPERAND (base, 3);
5432 if (*op
5433 && TREE_CODE (*op) == SSA_NAME)
5434 *op = SSA_NAME_VAR (*op);
5435 }
5436
5437 return true;
5438 }
5439
5440 /* Unshares REF and replaces ssa names inside it by their basic variables. */
5441
5442 static tree
5443 unshare_and_remove_ssa_names (tree ref)
5444 {
5445 ref = unshare_expr (ref);
5446 for_each_index (&ref, idx_remove_ssa_names, NULL);
5447
5448 return ref;
5449 }
5450
5451 /* Copies the reference information from OLD_REF to NEW_REF. */
5452
5453 static void
5454 copy_ref_info (tree new_ref, tree old_ref)
5455 {
5456 if (TREE_CODE (old_ref) == TARGET_MEM_REF)
5457 copy_mem_ref_info (new_ref, old_ref);
5458 else
5459 TMR_ORIGINAL (new_ref) = unshare_and_remove_ssa_names (old_ref);
5460 }
5461
5462 /* Rewrites USE (address that is an iv) using candidate CAND. */
5463
5464 static void
5465 rewrite_use_address (struct ivopts_data *data,
5466 struct iv_use *use, struct iv_cand *cand)
5467 {
5468 aff_tree aff;
5469 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
5470 tree ref;
5471 bool ok;
5472
5473 ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
5474 gcc_assert (ok);
5475 unshare_aff_combination (&aff);
5476
5477 ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff, data->speed);
5478 copy_ref_info (ref, *use->op_p);
5479 *use->op_p = ref;
5480 }
5481
5482 /* Rewrites USE (the condition such that one of the arguments is an iv) using
5483 candidate CAND. */
5484
5485 static void
5486 rewrite_use_compare (struct ivopts_data *data,
5487 struct iv_use *use, struct iv_cand *cand)
5488 {
5489 tree comp, *var_p, op, bound;
5490 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
5491 enum tree_code compare;
5492 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5493 bool ok;
5494
5495 bound = cp->value;
5496 if (bound)
5497 {
5498 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
5499 tree var_type = TREE_TYPE (var);
5500 gimple_seq stmts;
5501
5502 compare = iv_elimination_compare (data, use);
5503 bound = unshare_expr (fold_convert (var_type, bound));
5504 op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
5505 if (stmts)
5506 gsi_insert_seq_on_edge_immediate (
5507 loop_preheader_edge (data->current_loop),
5508 stmts);
5509
5510 gimple_cond_set_lhs (use->stmt, var);
5511 gimple_cond_set_code (use->stmt, compare);
5512 gimple_cond_set_rhs (use->stmt, op);
5513 return;
5514 }
5515
5516 /* The induction variable elimination failed; just express the original
5517 giv. */
5518 comp = get_computation (data->current_loop, use, cand);
5519 gcc_assert (comp != NULL_TREE);
5520
5521 ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
5522 gcc_assert (ok);
5523
5524 *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
5525 true, GSI_SAME_STMT);
5526 }
5527
5528 /* Rewrites USE using candidate CAND. */
5529
5530 static void
5531 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
5532 {
5533 switch (use->type)
5534 {
5535 case USE_NONLINEAR_EXPR:
5536 rewrite_use_nonlinear_expr (data, use, cand);
5537 break;
5538
5539 case USE_ADDRESS:
5540 rewrite_use_address (data, use, cand);
5541 break;
5542
5543 case USE_COMPARE:
5544 rewrite_use_compare (data, use, cand);
5545 break;
5546
5547 default:
5548 gcc_unreachable ();
5549 }
5550
5551 update_stmt (use->stmt);
5552 }
5553
5554 /* Rewrite the uses using the selected induction variables. */
5555
5556 static void
5557 rewrite_uses (struct ivopts_data *data)
5558 {
5559 unsigned i;
5560 struct iv_cand *cand;
5561 struct iv_use *use;
5562
5563 for (i = 0; i < n_iv_uses (data); i++)
5564 {
5565 use = iv_use (data, i);
5566 cand = use->selected;
5567 gcc_assert (cand);
5568
5569 rewrite_use (data, use, cand);
5570 }
5571 }
5572
5573 /* Removes the ivs that are not used after rewriting. */
5574
5575 static void
5576 remove_unused_ivs (struct ivopts_data *data)
5577 {
5578 unsigned j;
5579 bitmap_iterator bi;
5580 bitmap toremove = BITMAP_ALLOC (NULL);
5581
5582 /* Figure out an order in which to release SSA DEFs so that we don't
5583 release something that we'd have to propagate into a debug stmt
5584 afterwards. */
5585 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5586 {
5587 struct version_info *info;
5588
5589 info = ver_info (data, j);
5590 if (info->iv
5591 && !integer_zerop (info->iv->step)
5592 && !info->inv_id
5593 && !info->iv->have_use_for
5594 && !info->preserve_biv)
5595 bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
5596 }
5597
5598 release_defs_bitset (toremove);
5599
5600 BITMAP_FREE (toremove);
5601 }
5602
5603 /* Frees data allocated by the optimization of a single loop. */
5604
5605 static void
5606 free_loop_data (struct ivopts_data *data)
5607 {
5608 unsigned i, j;
5609 bitmap_iterator bi;
5610 tree obj;
5611
5612 if (data->niters)
5613 {
5614 pointer_map_destroy (data->niters);
5615 data->niters = NULL;
5616 }
5617
5618 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5619 {
5620 struct version_info *info;
5621
5622 info = ver_info (data, i);
5623 if (info->iv)
5624 free (info->iv);
5625 info->iv = NULL;
5626 info->has_nonlin_use = false;
5627 info->preserve_biv = false;
5628 info->inv_id = 0;
5629 }
5630 bitmap_clear (data->relevant);
5631 bitmap_clear (data->important_candidates);
5632
5633 for (i = 0; i < n_iv_uses (data); i++)
5634 {
5635 struct iv_use *use = iv_use (data, i);
5636
5637 free (use->iv);
5638 BITMAP_FREE (use->related_cands);
5639 for (j = 0; j < use->n_map_members; j++)
5640 if (use->cost_map[j].depends_on)
5641 BITMAP_FREE (use->cost_map[j].depends_on);
5642 free (use->cost_map);
5643 free (use);
5644 }
5645 VEC_truncate (iv_use_p, data->iv_uses, 0);
5646
5647 for (i = 0; i < n_iv_cands (data); i++)
5648 {
5649 struct iv_cand *cand = iv_cand (data, i);
5650
5651 if (cand->iv)
5652 free (cand->iv);
5653 if (cand->depends_on)
5654 BITMAP_FREE (cand->depends_on);
5655 free (cand);
5656 }
5657 VEC_truncate (iv_cand_p, data->iv_candidates, 0);
5658
5659 if (data->version_info_size < num_ssa_names)
5660 {
5661 data->version_info_size = 2 * num_ssa_names;
5662 free (data->version_info);
5663 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
5664 }
5665
5666 data->max_inv_id = 0;
5667
5668 for (i = 0; VEC_iterate (tree, decl_rtl_to_reset, i, obj); i++)
5669 SET_DECL_RTL (obj, NULL_RTX);
5670
5671 VEC_truncate (tree, decl_rtl_to_reset, 0);
5672 }
5673
5674 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
5675 loop tree. */
5676
5677 static void
5678 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
5679 {
5680 free_loop_data (data);
5681 free (data->version_info);
5682 BITMAP_FREE (data->relevant);
5683 BITMAP_FREE (data->important_candidates);
5684
5685 VEC_free (tree, heap, decl_rtl_to_reset);
5686 VEC_free (iv_use_p, heap, data->iv_uses);
5687 VEC_free (iv_cand_p, heap, data->iv_candidates);
5688 }
5689
5690 /* Optimizes the LOOP. Returns true if anything changed. */
5691
5692 static bool
5693 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
5694 {
5695 bool changed = false;
5696 struct iv_ca *iv_ca;
5697 edge exit;
5698 basic_block *body;
5699
5700 gcc_assert (!data->niters);
5701 data->current_loop = loop;
5702 data->speed = optimize_loop_for_speed_p (loop);
5703
5704 if (dump_file && (dump_flags & TDF_DETAILS))
5705 {
5706 fprintf (dump_file, "Processing loop %d\n", loop->num);
5707
5708 exit = single_dom_exit (loop);
5709 if (exit)
5710 {
5711 fprintf (dump_file, " single exit %d -> %d, exit condition ",
5712 exit->src->index, exit->dest->index);
5713 print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
5714 fprintf (dump_file, "\n");
5715 }
5716
5717 fprintf (dump_file, "\n");
5718 }
5719
5720 body = get_loop_body (loop);
5721 renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
5722 free (body);
5723
5724 /* For each ssa name determines whether it behaves as an induction variable
5725 in some loop. */
5726 if (!find_induction_variables (data))
5727 goto finish;
5728
5729 /* Finds interesting uses (item 1). */
5730 find_interesting_uses (data);
5731 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
5732 goto finish;
5733
5734 /* Finds candidates for the induction variables (item 2). */
5735 find_iv_candidates (data);
5736
5737 /* Calculates the costs (item 3, part 1). */
5738 determine_iv_costs (data);
5739 determine_use_iv_costs (data);
5740 determine_set_costs (data);
5741
5742 /* Find the optimal set of induction variables (item 3, part 2). */
5743 iv_ca = find_optimal_iv_set (data);
5744 if (!iv_ca)
5745 goto finish;
5746 changed = true;
5747
5748 /* Create the new induction variables (item 4, part 1). */
5749 create_new_ivs (data, iv_ca);
5750 iv_ca_free (&iv_ca);
5751
5752 /* Rewrite the uses (item 4, part 2). */
5753 rewrite_uses (data);
5754
5755 /* Remove the ivs that are unused after rewriting. */
5756 remove_unused_ivs (data);
5757
5758 /* We have changed the structure of induction variables; it might happen
5759 that definitions in the scev database refer to some of them that were
5760 eliminated. */
5761 scev_reset ();
5762
5763 finish:
5764 free_loop_data (data);
5765
5766 return changed;
5767 }
5768
5769 /* Main entry point. Optimizes induction variables in loops. */
5770
5771 void
5772 tree_ssa_iv_optimize (void)
5773 {
5774 struct loop *loop;
5775 struct ivopts_data data;
5776 loop_iterator li;
5777
5778 tree_ssa_iv_optimize_init (&data);
5779
5780 /* Optimize the loops starting with the innermost ones. */
5781 FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
5782 {
5783 if (dump_file && (dump_flags & TDF_DETAILS))
5784 flow_loop_dump (loop, dump_file, NULL, 1);
5785
5786 tree_ssa_iv_optimize_loop (&data, loop);
5787 }
5788
5789 tree_ssa_iv_optimize_finalize (&data);
5790 }