inclhack.def (hpux_imaginary_i): Remove spaces.
[gcc.git] / gcc / tree-ssa-loop-ivopts.c
1 /* Induction variable optimizations.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009
3 Free Software Foundation, Inc.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 3, or (at your option) any
10 later version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 /* This pass tries to find the optimal set of induction variables for the loop.
22 It optimizes just the basic linear induction variables (although adding
23 support for other types should not be too hard). It includes the
24 optimizations commonly known as strength reduction, induction variable
25 coalescing and induction variable elimination. It does it in the
26 following steps:
27
28 1) The interesting uses of induction variables are found. This includes
29
30 -- uses of induction variables in non-linear expressions
31 -- addresses of arrays
32 -- comparisons of induction variables
33
34 2) Candidates for the induction variables are found. This includes
35
36 -- old induction variables
37 -- the variables defined by expressions derived from the "interesting
38 uses" above
39
40 3) The optimal (w.r. to a cost function) set of variables is chosen. The
41 cost function assigns a cost to sets of induction variables and consists
42 of three parts:
43
44 -- The use costs. Each of the interesting uses chooses the best induction
45 variable in the set and adds its cost to the sum. The cost reflects
46 the time spent on modifying the induction variables value to be usable
47 for the given purpose (adding base and offset for arrays, etc.).
48 -- The variable costs. Each of the variables has a cost assigned that
49 reflects the costs associated with incrementing the value of the
50 variable. The original variables are somewhat preferred.
51 -- The set cost. Depending on the size of the set, extra cost may be
52 added to reflect register pressure.
53
54 All the costs are defined in a machine-specific way, using the target
55 hooks and machine descriptions to determine them.
56
57 4) The trees are transformed to use the new variables, the dead code is
58 removed.
59
60 All of this is done loop by loop. Doing it globally is theoretically
61 possible, it might give a better performance and it might enable us
62 to decide costs more precisely, but getting all the interactions right
63 would be complicated. */
64
65 #include "config.h"
66 #include "system.h"
67 #include "coretypes.h"
68 #include "tm.h"
69 #include "tree.h"
70 #include "rtl.h"
71 #include "tm_p.h"
72 #include "hard-reg-set.h"
73 #include "basic-block.h"
74 #include "output.h"
75 #include "diagnostic.h"
76 #include "tree-flow.h"
77 #include "tree-dump.h"
78 #include "timevar.h"
79 #include "cfgloop.h"
80 #include "varray.h"
81 #include "expr.h"
82 #include "tree-pass.h"
83 #include "ggc.h"
84 #include "insn-config.h"
85 #include "recog.h"
86 #include "pointer-set.h"
87 #include "hashtab.h"
88 #include "tree-chrec.h"
89 #include "tree-scalar-evolution.h"
90 #include "cfgloop.h"
91 #include "params.h"
92 #include "langhooks.h"
93 #include "tree-affine.h"
94 #include "target.h"
95
96 /* The infinite cost. */
97 #define INFTY 10000000
98
99 /* The expected number of loop iterations. TODO -- use profiling instead of
100 this. */
101 #define AVG_LOOP_NITER(LOOP) 5
102
103
104 /* Representation of the induction variable. */
105 struct iv
106 {
107 tree base; /* Initial value of the iv. */
108 tree base_object; /* A memory object to that the induction variable points. */
109 tree step; /* Step of the iv (constant only). */
110 tree ssa_name; /* The ssa name with the value. */
111 bool biv_p; /* Is it a biv? */
112 bool have_use_for; /* Do we already have a use for it? */
113 unsigned use_id; /* The identifier in the use if it is the case. */
114 };
115
116 /* Per-ssa version information (induction variable descriptions, etc.). */
117 struct version_info
118 {
119 tree name; /* The ssa name. */
120 struct iv *iv; /* Induction variable description. */
121 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
122 an expression that is not an induction variable. */
123 unsigned inv_id; /* Id of an invariant. */
124 bool preserve_biv; /* For the original biv, whether to preserve it. */
125 };
126
127 /* Types of uses. */
128 enum use_type
129 {
130 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
131 USE_ADDRESS, /* Use in an address. */
132 USE_COMPARE /* Use is a compare. */
133 };
134
135 /* Cost of a computation. */
136 typedef struct
137 {
138 int cost; /* The runtime cost. */
139 unsigned complexity; /* The estimate of the complexity of the code for
140 the computation (in no concrete units --
141 complexity field should be larger for more
142 complex expressions and addressing modes). */
143 } comp_cost;
144
145 static const comp_cost zero_cost = {0, 0};
146 static const comp_cost infinite_cost = {INFTY, INFTY};
147
148 /* The candidate - cost pair. */
149 struct cost_pair
150 {
151 struct iv_cand *cand; /* The candidate. */
152 comp_cost cost; /* The cost. */
153 bitmap depends_on; /* The list of invariants that have to be
154 preserved. */
155 tree value; /* For final value elimination, the expression for
156 the final value of the iv. For iv elimination,
157 the new bound to compare with. */
158 };
159
160 /* Use. */
161 struct iv_use
162 {
163 unsigned id; /* The id of the use. */
164 enum use_type type; /* Type of the use. */
165 struct iv *iv; /* The induction variable it is based on. */
166 gimple stmt; /* Statement in that it occurs. */
167 tree *op_p; /* The place where it occurs. */
168 bitmap related_cands; /* The set of "related" iv candidates, plus the common
169 important ones. */
170
171 unsigned n_map_members; /* Number of candidates in the cost_map list. */
172 struct cost_pair *cost_map;
173 /* The costs wrto the iv candidates. */
174
175 struct iv_cand *selected;
176 /* The selected candidate. */
177 };
178
179 /* The position where the iv is computed. */
180 enum iv_position
181 {
182 IP_NORMAL, /* At the end, just before the exit condition. */
183 IP_END, /* At the end of the latch block. */
184 IP_BEFORE_USE, /* Immediately before a specific use. */
185 IP_AFTER_USE, /* Immediately after a specific use. */
186 IP_ORIGINAL /* The original biv. */
187 };
188
189 /* The induction variable candidate. */
190 struct iv_cand
191 {
192 unsigned id; /* The number of the candidate. */
193 bool important; /* Whether this is an "important" candidate, i.e. such
194 that it should be considered by all uses. */
195 enum iv_position pos; /* Where it is computed. */
196 gimple incremented_at;/* For original biv, the statement where it is
197 incremented. */
198 tree var_before; /* The variable used for it before increment. */
199 tree var_after; /* The variable used for it after increment. */
200 struct iv *iv; /* The value of the candidate. NULL for
201 "pseudocandidate" used to indicate the possibility
202 to replace the final value of an iv by direct
203 computation of the value. */
204 unsigned cost; /* Cost of the candidate. */
205 unsigned cost_step; /* Cost of the candidate's increment operation. */
206 struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
207 where it is incremented. */
208 bitmap depends_on; /* The list of invariants that are used in step of the
209 biv. */
210 };
211
212 /* The data used by the induction variable optimizations. */
213
214 typedef struct iv_use *iv_use_p;
215 DEF_VEC_P(iv_use_p);
216 DEF_VEC_ALLOC_P(iv_use_p,heap);
217
218 typedef struct iv_cand *iv_cand_p;
219 DEF_VEC_P(iv_cand_p);
220 DEF_VEC_ALLOC_P(iv_cand_p,heap);
221
222 struct ivopts_data
223 {
224 /* The currently optimized loop. */
225 struct loop *current_loop;
226
227 /* Numbers of iterations for all exits of the current loop. */
228 struct pointer_map_t *niters;
229
230 /* Number of registers used in it. */
231 unsigned regs_used;
232
233 /* The size of version_info array allocated. */
234 unsigned version_info_size;
235
236 /* The array of information for the ssa names. */
237 struct version_info *version_info;
238
239 /* The bitmap of indices in version_info whose value was changed. */
240 bitmap relevant;
241
242 /* The uses of induction variables. */
243 VEC(iv_use_p,heap) *iv_uses;
244
245 /* The candidates. */
246 VEC(iv_cand_p,heap) *iv_candidates;
247
248 /* A bitmap of important candidates. */
249 bitmap important_candidates;
250
251 /* The maximum invariant id. */
252 unsigned max_inv_id;
253
254 /* Whether to consider just related and important candidates when replacing a
255 use. */
256 bool consider_all_candidates;
257
258 /* Are we optimizing for speed? */
259 bool speed;
260 };
261
262 /* An assignment of iv candidates to uses. */
263
264 struct iv_ca
265 {
266 /* The number of uses covered by the assignment. */
267 unsigned upto;
268
269 /* Number of uses that cannot be expressed by the candidates in the set. */
270 unsigned bad_uses;
271
272 /* Candidate assigned to a use, together with the related costs. */
273 struct cost_pair **cand_for_use;
274
275 /* Number of times each candidate is used. */
276 unsigned *n_cand_uses;
277
278 /* The candidates used. */
279 bitmap cands;
280
281 /* The number of candidates in the set. */
282 unsigned n_cands;
283
284 /* Total number of registers needed. */
285 unsigned n_regs;
286
287 /* Total cost of expressing uses. */
288 comp_cost cand_use_cost;
289
290 /* Total cost of candidates. */
291 unsigned cand_cost;
292
293 /* Number of times each invariant is used. */
294 unsigned *n_invariant_uses;
295
296 /* Total cost of the assignment. */
297 comp_cost cost;
298 };
299
300 /* Difference of two iv candidate assignments. */
301
302 struct iv_ca_delta
303 {
304 /* Changed use. */
305 struct iv_use *use;
306
307 /* An old assignment (for rollback purposes). */
308 struct cost_pair *old_cp;
309
310 /* A new assignment. */
311 struct cost_pair *new_cp;
312
313 /* Next change in the list. */
314 struct iv_ca_delta *next_change;
315 };
316
317 /* Bound on number of candidates below that all candidates are considered. */
318
319 #define CONSIDER_ALL_CANDIDATES_BOUND \
320 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
321
322 /* If there are more iv occurrences, we just give up (it is quite unlikely that
323 optimizing such a loop would help, and it would take ages). */
324
325 #define MAX_CONSIDERED_USES \
326 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
327
328 /* If there are at most this number of ivs in the set, try removing unnecessary
329 ivs from the set always. */
330
331 #define ALWAYS_PRUNE_CAND_SET_BOUND \
332 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
333
334 /* The list of trees for that the decl_rtl field must be reset is stored
335 here. */
336
337 static VEC(tree,heap) *decl_rtl_to_reset;
338
339 /* Number of uses recorded in DATA. */
340
341 static inline unsigned
342 n_iv_uses (struct ivopts_data *data)
343 {
344 return VEC_length (iv_use_p, data->iv_uses);
345 }
346
347 /* Ith use recorded in DATA. */
348
349 static inline struct iv_use *
350 iv_use (struct ivopts_data *data, unsigned i)
351 {
352 return VEC_index (iv_use_p, data->iv_uses, i);
353 }
354
355 /* Number of candidates recorded in DATA. */
356
357 static inline unsigned
358 n_iv_cands (struct ivopts_data *data)
359 {
360 return VEC_length (iv_cand_p, data->iv_candidates);
361 }
362
363 /* Ith candidate recorded in DATA. */
364
365 static inline struct iv_cand *
366 iv_cand (struct ivopts_data *data, unsigned i)
367 {
368 return VEC_index (iv_cand_p, data->iv_candidates, i);
369 }
370
371 /* The single loop exit if it dominates the latch, NULL otherwise. */
372
373 edge
374 single_dom_exit (struct loop *loop)
375 {
376 edge exit = single_exit (loop);
377
378 if (!exit)
379 return NULL;
380
381 if (!just_once_each_iteration_p (loop, exit->src))
382 return NULL;
383
384 return exit;
385 }
386
387 /* Dumps information about the induction variable IV to FILE. */
388
389 extern void dump_iv (FILE *, struct iv *);
390 void
391 dump_iv (FILE *file, struct iv *iv)
392 {
393 if (iv->ssa_name)
394 {
395 fprintf (file, "ssa name ");
396 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
397 fprintf (file, "\n");
398 }
399
400 fprintf (file, " type ");
401 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
402 fprintf (file, "\n");
403
404 if (iv->step)
405 {
406 fprintf (file, " base ");
407 print_generic_expr (file, iv->base, TDF_SLIM);
408 fprintf (file, "\n");
409
410 fprintf (file, " step ");
411 print_generic_expr (file, iv->step, TDF_SLIM);
412 fprintf (file, "\n");
413 }
414 else
415 {
416 fprintf (file, " invariant ");
417 print_generic_expr (file, iv->base, TDF_SLIM);
418 fprintf (file, "\n");
419 }
420
421 if (iv->base_object)
422 {
423 fprintf (file, " base object ");
424 print_generic_expr (file, iv->base_object, TDF_SLIM);
425 fprintf (file, "\n");
426 }
427
428 if (iv->biv_p)
429 fprintf (file, " is a biv\n");
430 }
431
432 /* Dumps information about the USE to FILE. */
433
434 extern void dump_use (FILE *, struct iv_use *);
435 void
436 dump_use (FILE *file, struct iv_use *use)
437 {
438 fprintf (file, "use %d\n", use->id);
439
440 switch (use->type)
441 {
442 case USE_NONLINEAR_EXPR:
443 fprintf (file, " generic\n");
444 break;
445
446 case USE_ADDRESS:
447 fprintf (file, " address\n");
448 break;
449
450 case USE_COMPARE:
451 fprintf (file, " compare\n");
452 break;
453
454 default:
455 gcc_unreachable ();
456 }
457
458 fprintf (file, " in statement ");
459 print_gimple_stmt (file, use->stmt, 0, 0);
460 fprintf (file, "\n");
461
462 fprintf (file, " at position ");
463 if (use->op_p)
464 print_generic_expr (file, *use->op_p, TDF_SLIM);
465 fprintf (file, "\n");
466
467 dump_iv (file, use->iv);
468
469 if (use->related_cands)
470 {
471 fprintf (file, " related candidates ");
472 dump_bitmap (file, use->related_cands);
473 }
474 }
475
476 /* Dumps information about the uses to FILE. */
477
478 extern void dump_uses (FILE *, struct ivopts_data *);
479 void
480 dump_uses (FILE *file, struct ivopts_data *data)
481 {
482 unsigned i;
483 struct iv_use *use;
484
485 for (i = 0; i < n_iv_uses (data); i++)
486 {
487 use = iv_use (data, i);
488
489 dump_use (file, use);
490 fprintf (file, "\n");
491 }
492 }
493
494 /* Dumps information about induction variable candidate CAND to FILE. */
495
496 extern void dump_cand (FILE *, struct iv_cand *);
497 void
498 dump_cand (FILE *file, struct iv_cand *cand)
499 {
500 struct iv *iv = cand->iv;
501
502 fprintf (file, "candidate %d%s\n",
503 cand->id, cand->important ? " (important)" : "");
504
505 if (cand->depends_on)
506 {
507 fprintf (file, " depends on ");
508 dump_bitmap (file, cand->depends_on);
509 }
510
511 if (!iv)
512 {
513 fprintf (file, " final value replacement\n");
514 return;
515 }
516
517 switch (cand->pos)
518 {
519 case IP_NORMAL:
520 fprintf (file, " incremented before exit test\n");
521 break;
522
523 case IP_BEFORE_USE:
524 fprintf (file, " incremented before use %d\n", cand->ainc_use->id);
525 break;
526
527 case IP_AFTER_USE:
528 fprintf (file, " incremented after use %d\n", cand->ainc_use->id);
529 break;
530
531 case IP_END:
532 fprintf (file, " incremented at end\n");
533 break;
534
535 case IP_ORIGINAL:
536 fprintf (file, " original biv\n");
537 break;
538 }
539
540 dump_iv (file, iv);
541 }
542
543 /* Returns the info for ssa version VER. */
544
545 static inline struct version_info *
546 ver_info (struct ivopts_data *data, unsigned ver)
547 {
548 return data->version_info + ver;
549 }
550
551 /* Returns the info for ssa name NAME. */
552
553 static inline struct version_info *
554 name_info (struct ivopts_data *data, tree name)
555 {
556 return ver_info (data, SSA_NAME_VERSION (name));
557 }
558
559 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
560 emitted in LOOP. */
561
562 static bool
563 stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
564 {
565 basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
566
567 gcc_assert (bb);
568
569 if (sbb == loop->latch)
570 return true;
571
572 if (sbb != bb)
573 return false;
574
575 return stmt == last_stmt (bb);
576 }
577
578 /* Returns true if STMT if after the place where the original induction
579 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
580 if the positions are identical. */
581
582 static bool
583 stmt_after_inc_pos (struct iv_cand *cand, gimple stmt, bool true_if_equal)
584 {
585 basic_block cand_bb = gimple_bb (cand->incremented_at);
586 basic_block stmt_bb = gimple_bb (stmt);
587
588 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
589 return false;
590
591 if (stmt_bb != cand_bb)
592 return true;
593
594 if (true_if_equal
595 && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
596 return true;
597 return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
598 }
599
600 /* Returns true if STMT if after the place where the induction variable
601 CAND is incremented in LOOP. */
602
603 static bool
604 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
605 {
606 switch (cand->pos)
607 {
608 case IP_END:
609 return false;
610
611 case IP_NORMAL:
612 return stmt_after_ip_normal_pos (loop, stmt);
613
614 case IP_ORIGINAL:
615 case IP_AFTER_USE:
616 return stmt_after_inc_pos (cand, stmt, false);
617
618 case IP_BEFORE_USE:
619 return stmt_after_inc_pos (cand, stmt, true);
620
621 default:
622 gcc_unreachable ();
623 }
624 }
625
626 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
627
628 static bool
629 abnormal_ssa_name_p (tree exp)
630 {
631 if (!exp)
632 return false;
633
634 if (TREE_CODE (exp) != SSA_NAME)
635 return false;
636
637 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
638 }
639
640 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
641 abnormal phi node. Callback for for_each_index. */
642
643 static bool
644 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
645 void *data ATTRIBUTE_UNUSED)
646 {
647 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
648 {
649 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
650 return false;
651 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
652 return false;
653 }
654
655 return !abnormal_ssa_name_p (*index);
656 }
657
658 /* Returns true if EXPR contains a ssa name that occurs in an
659 abnormal phi node. */
660
661 bool
662 contains_abnormal_ssa_name_p (tree expr)
663 {
664 enum tree_code code;
665 enum tree_code_class codeclass;
666
667 if (!expr)
668 return false;
669
670 code = TREE_CODE (expr);
671 codeclass = TREE_CODE_CLASS (code);
672
673 if (code == SSA_NAME)
674 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
675
676 if (code == INTEGER_CST
677 || is_gimple_min_invariant (expr))
678 return false;
679
680 if (code == ADDR_EXPR)
681 return !for_each_index (&TREE_OPERAND (expr, 0),
682 idx_contains_abnormal_ssa_name_p,
683 NULL);
684
685 switch (codeclass)
686 {
687 case tcc_binary:
688 case tcc_comparison:
689 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
690 return true;
691
692 /* Fallthru. */
693 case tcc_unary:
694 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
695 return true;
696
697 break;
698
699 default:
700 gcc_unreachable ();
701 }
702
703 return false;
704 }
705
706 /* Returns tree describing number of iterations determined from
707 EXIT of DATA->current_loop, or NULL if something goes wrong. */
708
709 static tree
710 niter_for_exit (struct ivopts_data *data, edge exit)
711 {
712 struct tree_niter_desc desc;
713 tree niter;
714 void **slot;
715
716 if (!data->niters)
717 {
718 data->niters = pointer_map_create ();
719 slot = NULL;
720 }
721 else
722 slot = pointer_map_contains (data->niters, exit);
723
724 if (!slot)
725 {
726 /* Try to determine number of iterations. We must know it
727 unconditionally (i.e., without possibility of # of iterations
728 being zero). Also, we cannot safely work with ssa names that
729 appear in phi nodes on abnormal edges, so that we do not create
730 overlapping life ranges for them (PR 27283). */
731 if (number_of_iterations_exit (data->current_loop,
732 exit, &desc, true)
733 && integer_zerop (desc.may_be_zero)
734 && !contains_abnormal_ssa_name_p (desc.niter))
735 niter = desc.niter;
736 else
737 niter = NULL_TREE;
738
739 *pointer_map_insert (data->niters, exit) = niter;
740 }
741 else
742 niter = (tree) *slot;
743
744 return niter;
745 }
746
747 /* Returns tree describing number of iterations determined from
748 single dominating exit of DATA->current_loop, or NULL if something
749 goes wrong. */
750
751 static tree
752 niter_for_single_dom_exit (struct ivopts_data *data)
753 {
754 edge exit = single_dom_exit (data->current_loop);
755
756 if (!exit)
757 return NULL;
758
759 return niter_for_exit (data, exit);
760 }
761
762 /* Initializes data structures used by the iv optimization pass, stored
763 in DATA. */
764
765 static void
766 tree_ssa_iv_optimize_init (struct ivopts_data *data)
767 {
768 data->version_info_size = 2 * num_ssa_names;
769 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
770 data->relevant = BITMAP_ALLOC (NULL);
771 data->important_candidates = BITMAP_ALLOC (NULL);
772 data->max_inv_id = 0;
773 data->niters = NULL;
774 data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
775 data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
776 decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
777 }
778
779 /* Returns a memory object to that EXPR points. In case we are able to
780 determine that it does not point to any such object, NULL is returned. */
781
782 static tree
783 determine_base_object (tree expr)
784 {
785 enum tree_code code = TREE_CODE (expr);
786 tree base, obj;
787
788 /* If this is a pointer casted to any type, we need to determine
789 the base object for the pointer; so handle conversions before
790 throwing away non-pointer expressions. */
791 if (CONVERT_EXPR_P (expr))
792 return determine_base_object (TREE_OPERAND (expr, 0));
793
794 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
795 return NULL_TREE;
796
797 switch (code)
798 {
799 case INTEGER_CST:
800 return NULL_TREE;
801
802 case ADDR_EXPR:
803 obj = TREE_OPERAND (expr, 0);
804 base = get_base_address (obj);
805
806 if (!base)
807 return expr;
808
809 if (TREE_CODE (base) == INDIRECT_REF)
810 return determine_base_object (TREE_OPERAND (base, 0));
811
812 return fold_convert (ptr_type_node,
813 build_fold_addr_expr (base));
814
815 case POINTER_PLUS_EXPR:
816 return determine_base_object (TREE_OPERAND (expr, 0));
817
818 case PLUS_EXPR:
819 case MINUS_EXPR:
820 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
821 gcc_unreachable ();
822
823 default:
824 return fold_convert (ptr_type_node, expr);
825 }
826 }
827
828 /* Allocates an induction variable with given initial value BASE and step STEP
829 for loop LOOP. */
830
831 static struct iv *
832 alloc_iv (tree base, tree step)
833 {
834 struct iv *iv = XCNEW (struct iv);
835 gcc_assert (step != NULL_TREE);
836
837 iv->base = base;
838 iv->base_object = determine_base_object (base);
839 iv->step = step;
840 iv->biv_p = false;
841 iv->have_use_for = false;
842 iv->use_id = 0;
843 iv->ssa_name = NULL_TREE;
844
845 return iv;
846 }
847
848 /* Sets STEP and BASE for induction variable IV. */
849
850 static void
851 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
852 {
853 struct version_info *info = name_info (data, iv);
854
855 gcc_assert (!info->iv);
856
857 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
858 info->iv = alloc_iv (base, step);
859 info->iv->ssa_name = iv;
860 }
861
862 /* Finds induction variable declaration for VAR. */
863
864 static struct iv *
865 get_iv (struct ivopts_data *data, tree var)
866 {
867 basic_block bb;
868 tree type = TREE_TYPE (var);
869
870 if (!POINTER_TYPE_P (type)
871 && !INTEGRAL_TYPE_P (type))
872 return NULL;
873
874 if (!name_info (data, var)->iv)
875 {
876 bb = gimple_bb (SSA_NAME_DEF_STMT (var));
877
878 if (!bb
879 || !flow_bb_inside_loop_p (data->current_loop, bb))
880 set_iv (data, var, var, build_int_cst (type, 0));
881 }
882
883 return name_info (data, var)->iv;
884 }
885
886 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
887 not define a simple affine biv with nonzero step. */
888
889 static tree
890 determine_biv_step (gimple phi)
891 {
892 struct loop *loop = gimple_bb (phi)->loop_father;
893 tree name = PHI_RESULT (phi);
894 affine_iv iv;
895
896 if (!is_gimple_reg (name))
897 return NULL_TREE;
898
899 if (!simple_iv (loop, loop, name, &iv, true))
900 return NULL_TREE;
901
902 return integer_zerop (iv.step) ? NULL_TREE : iv.step;
903 }
904
905 /* Finds basic ivs. */
906
907 static bool
908 find_bivs (struct ivopts_data *data)
909 {
910 gimple phi;
911 tree step, type, base;
912 bool found = false;
913 struct loop *loop = data->current_loop;
914 gimple_stmt_iterator psi;
915
916 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
917 {
918 phi = gsi_stmt (psi);
919
920 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
921 continue;
922
923 step = determine_biv_step (phi);
924 if (!step)
925 continue;
926
927 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
928 base = expand_simple_operations (base);
929 if (contains_abnormal_ssa_name_p (base)
930 || contains_abnormal_ssa_name_p (step))
931 continue;
932
933 type = TREE_TYPE (PHI_RESULT (phi));
934 base = fold_convert (type, base);
935 if (step)
936 {
937 if (POINTER_TYPE_P (type))
938 step = fold_convert (sizetype, step);
939 else
940 step = fold_convert (type, step);
941 }
942
943 set_iv (data, PHI_RESULT (phi), base, step);
944 found = true;
945 }
946
947 return found;
948 }
949
950 /* Marks basic ivs. */
951
952 static void
953 mark_bivs (struct ivopts_data *data)
954 {
955 gimple phi;
956 tree var;
957 struct iv *iv, *incr_iv;
958 struct loop *loop = data->current_loop;
959 basic_block incr_bb;
960 gimple_stmt_iterator psi;
961
962 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
963 {
964 phi = gsi_stmt (psi);
965
966 iv = get_iv (data, PHI_RESULT (phi));
967 if (!iv)
968 continue;
969
970 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
971 incr_iv = get_iv (data, var);
972 if (!incr_iv)
973 continue;
974
975 /* If the increment is in the subloop, ignore it. */
976 incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
977 if (incr_bb->loop_father != data->current_loop
978 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
979 continue;
980
981 iv->biv_p = true;
982 incr_iv->biv_p = true;
983 }
984 }
985
986 /* Checks whether STMT defines a linear induction variable and stores its
987 parameters to IV. */
988
989 static bool
990 find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
991 {
992 tree lhs;
993 struct loop *loop = data->current_loop;
994
995 iv->base = NULL_TREE;
996 iv->step = NULL_TREE;
997
998 if (gimple_code (stmt) != GIMPLE_ASSIGN)
999 return false;
1000
1001 lhs = gimple_assign_lhs (stmt);
1002 if (TREE_CODE (lhs) != SSA_NAME)
1003 return false;
1004
1005 if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1006 return false;
1007 iv->base = expand_simple_operations (iv->base);
1008
1009 if (contains_abnormal_ssa_name_p (iv->base)
1010 || contains_abnormal_ssa_name_p (iv->step))
1011 return false;
1012
1013 return true;
1014 }
1015
1016 /* Finds general ivs in statement STMT. */
1017
1018 static void
1019 find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1020 {
1021 affine_iv iv;
1022
1023 if (!find_givs_in_stmt_scev (data, stmt, &iv))
1024 return;
1025
1026 set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step);
1027 }
1028
1029 /* Finds general ivs in basic block BB. */
1030
1031 static void
1032 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1033 {
1034 gimple_stmt_iterator bsi;
1035
1036 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1037 find_givs_in_stmt (data, gsi_stmt (bsi));
1038 }
1039
1040 /* Finds general ivs. */
1041
1042 static void
1043 find_givs (struct ivopts_data *data)
1044 {
1045 struct loop *loop = data->current_loop;
1046 basic_block *body = get_loop_body_in_dom_order (loop);
1047 unsigned i;
1048
1049 for (i = 0; i < loop->num_nodes; i++)
1050 find_givs_in_bb (data, body[i]);
1051 free (body);
1052 }
1053
1054 /* For each ssa name defined in LOOP determines whether it is an induction
1055 variable and if so, its initial value and step. */
1056
1057 static bool
1058 find_induction_variables (struct ivopts_data *data)
1059 {
1060 unsigned i;
1061 bitmap_iterator bi;
1062
1063 if (!find_bivs (data))
1064 return false;
1065
1066 find_givs (data);
1067 mark_bivs (data);
1068
1069 if (dump_file && (dump_flags & TDF_DETAILS))
1070 {
1071 tree niter = niter_for_single_dom_exit (data);
1072
1073 if (niter)
1074 {
1075 fprintf (dump_file, " number of iterations ");
1076 print_generic_expr (dump_file, niter, TDF_SLIM);
1077 fprintf (dump_file, "\n\n");
1078 };
1079
1080 fprintf (dump_file, "Induction variables:\n\n");
1081
1082 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1083 {
1084 if (ver_info (data, i)->iv)
1085 dump_iv (dump_file, ver_info (data, i)->iv);
1086 }
1087 }
1088
1089 return true;
1090 }
1091
1092 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1093
1094 static struct iv_use *
1095 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1096 gimple stmt, enum use_type use_type)
1097 {
1098 struct iv_use *use = XCNEW (struct iv_use);
1099
1100 use->id = n_iv_uses (data);
1101 use->type = use_type;
1102 use->iv = iv;
1103 use->stmt = stmt;
1104 use->op_p = use_p;
1105 use->related_cands = BITMAP_ALLOC (NULL);
1106
1107 /* To avoid showing ssa name in the dumps, if it was not reset by the
1108 caller. */
1109 iv->ssa_name = NULL_TREE;
1110
1111 if (dump_file && (dump_flags & TDF_DETAILS))
1112 dump_use (dump_file, use);
1113
1114 VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1115
1116 return use;
1117 }
1118
1119 /* Checks whether OP is a loop-level invariant and if so, records it.
1120 NONLINEAR_USE is true if the invariant is used in a way we do not
1121 handle specially. */
1122
1123 static void
1124 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1125 {
1126 basic_block bb;
1127 struct version_info *info;
1128
1129 if (TREE_CODE (op) != SSA_NAME
1130 || !is_gimple_reg (op))
1131 return;
1132
1133 bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1134 if (bb
1135 && flow_bb_inside_loop_p (data->current_loop, bb))
1136 return;
1137
1138 info = name_info (data, op);
1139 info->name = op;
1140 info->has_nonlin_use |= nonlinear_use;
1141 if (!info->inv_id)
1142 info->inv_id = ++data->max_inv_id;
1143 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1144 }
1145
1146 /* Checks whether the use OP is interesting and if so, records it. */
1147
1148 static struct iv_use *
1149 find_interesting_uses_op (struct ivopts_data *data, tree op)
1150 {
1151 struct iv *iv;
1152 struct iv *civ;
1153 gimple stmt;
1154 struct iv_use *use;
1155
1156 if (TREE_CODE (op) != SSA_NAME)
1157 return NULL;
1158
1159 iv = get_iv (data, op);
1160 if (!iv)
1161 return NULL;
1162
1163 if (iv->have_use_for)
1164 {
1165 use = iv_use (data, iv->use_id);
1166
1167 gcc_assert (use->type == USE_NONLINEAR_EXPR);
1168 return use;
1169 }
1170
1171 if (integer_zerop (iv->step))
1172 {
1173 record_invariant (data, op, true);
1174 return NULL;
1175 }
1176 iv->have_use_for = true;
1177
1178 civ = XNEW (struct iv);
1179 *civ = *iv;
1180
1181 stmt = SSA_NAME_DEF_STMT (op);
1182 gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1183 || is_gimple_assign (stmt));
1184
1185 use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1186 iv->use_id = use->id;
1187
1188 return use;
1189 }
1190
1191 /* Given a condition in statement STMT, checks whether it is a compare
1192 of an induction variable and an invariant. If this is the case,
1193 CONTROL_VAR is set to location of the iv, BOUND to the location of
1194 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1195 induction variable descriptions, and true is returned. If this is not
1196 the case, CONTROL_VAR and BOUND are set to the arguments of the
1197 condition and false is returned. */
1198
1199 static bool
1200 extract_cond_operands (struct ivopts_data *data, gimple stmt,
1201 tree **control_var, tree **bound,
1202 struct iv **iv_var, struct iv **iv_bound)
1203 {
1204 /* The objects returned when COND has constant operands. */
1205 static struct iv const_iv;
1206 static tree zero;
1207 tree *op0 = &zero, *op1 = &zero, *tmp_op;
1208 struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1209 bool ret = false;
1210
1211 if (gimple_code (stmt) == GIMPLE_COND)
1212 {
1213 op0 = gimple_cond_lhs_ptr (stmt);
1214 op1 = gimple_cond_rhs_ptr (stmt);
1215 }
1216 else
1217 {
1218 op0 = gimple_assign_rhs1_ptr (stmt);
1219 op1 = gimple_assign_rhs2_ptr (stmt);
1220 }
1221
1222 zero = integer_zero_node;
1223 const_iv.step = integer_zero_node;
1224
1225 if (TREE_CODE (*op0) == SSA_NAME)
1226 iv0 = get_iv (data, *op0);
1227 if (TREE_CODE (*op1) == SSA_NAME)
1228 iv1 = get_iv (data, *op1);
1229
1230 /* Exactly one of the compared values must be an iv, and the other one must
1231 be an invariant. */
1232 if (!iv0 || !iv1)
1233 goto end;
1234
1235 if (integer_zerop (iv0->step))
1236 {
1237 /* Control variable may be on the other side. */
1238 tmp_op = op0; op0 = op1; op1 = tmp_op;
1239 tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1240 }
1241 ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1242
1243 end:
1244 if (control_var)
1245 *control_var = op0;;
1246 if (iv_var)
1247 *iv_var = iv0;;
1248 if (bound)
1249 *bound = op1;
1250 if (iv_bound)
1251 *iv_bound = iv1;
1252
1253 return ret;
1254 }
1255
1256 /* Checks whether the condition in STMT is interesting and if so,
1257 records it. */
1258
1259 static void
1260 find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1261 {
1262 tree *var_p, *bound_p;
1263 struct iv *var_iv, *civ;
1264
1265 if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1266 {
1267 find_interesting_uses_op (data, *var_p);
1268 find_interesting_uses_op (data, *bound_p);
1269 return;
1270 }
1271
1272 civ = XNEW (struct iv);
1273 *civ = *var_iv;
1274 record_use (data, NULL, civ, stmt, USE_COMPARE);
1275 }
1276
1277 /* Returns true if expression EXPR is obviously invariant in LOOP,
1278 i.e. if all its operands are defined outside of the LOOP. LOOP
1279 should not be the function body. */
1280
1281 bool
1282 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1283 {
1284 basic_block def_bb;
1285 unsigned i, len;
1286
1287 gcc_assert (loop_depth (loop) > 0);
1288
1289 if (is_gimple_min_invariant (expr))
1290 return true;
1291
1292 if (TREE_CODE (expr) == SSA_NAME)
1293 {
1294 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1295 if (def_bb
1296 && flow_bb_inside_loop_p (loop, def_bb))
1297 return false;
1298
1299 return true;
1300 }
1301
1302 if (!EXPR_P (expr))
1303 return false;
1304
1305 len = TREE_OPERAND_LENGTH (expr);
1306 for (i = 0; i < len; i++)
1307 if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1308 return false;
1309
1310 return true;
1311 }
1312
1313 /* Returns true if statement STMT is obviously invariant in LOOP,
1314 i.e. if all its operands on the RHS are defined outside of the LOOP.
1315 LOOP should not be the function body. */
1316
1317 bool
1318 stmt_invariant_in_loop_p (struct loop *loop, gimple stmt)
1319 {
1320 unsigned i;
1321 tree lhs;
1322
1323 gcc_assert (loop_depth (loop) > 0);
1324
1325 lhs = gimple_get_lhs (stmt);
1326 for (i = 0; i < gimple_num_ops (stmt); i++)
1327 {
1328 tree op = gimple_op (stmt, i);
1329 if (op != lhs && !expr_invariant_in_loop_p (loop, op))
1330 return false;
1331 }
1332
1333 return true;
1334 }
1335
1336 /* Cumulates the steps of indices into DATA and replaces their values with the
1337 initial ones. Returns false when the value of the index cannot be determined.
1338 Callback for for_each_index. */
1339
1340 struct ifs_ivopts_data
1341 {
1342 struct ivopts_data *ivopts_data;
1343 gimple stmt;
1344 tree step;
1345 };
1346
1347 static bool
1348 idx_find_step (tree base, tree *idx, void *data)
1349 {
1350 struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1351 struct iv *iv;
1352 tree step, iv_base, iv_step, lbound, off;
1353 struct loop *loop = dta->ivopts_data->current_loop;
1354
1355 if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1356 || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1357 return false;
1358
1359 /* If base is a component ref, require that the offset of the reference
1360 be invariant. */
1361 if (TREE_CODE (base) == COMPONENT_REF)
1362 {
1363 off = component_ref_field_offset (base);
1364 return expr_invariant_in_loop_p (loop, off);
1365 }
1366
1367 /* If base is array, first check whether we will be able to move the
1368 reference out of the loop (in order to take its address in strength
1369 reduction). In order for this to work we need both lower bound
1370 and step to be loop invariants. */
1371 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1372 {
1373 /* Moreover, for a range, the size needs to be invariant as well. */
1374 if (TREE_CODE (base) == ARRAY_RANGE_REF
1375 && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1376 return false;
1377
1378 step = array_ref_element_size (base);
1379 lbound = array_ref_low_bound (base);
1380
1381 if (!expr_invariant_in_loop_p (loop, step)
1382 || !expr_invariant_in_loop_p (loop, lbound))
1383 return false;
1384 }
1385
1386 if (TREE_CODE (*idx) != SSA_NAME)
1387 return true;
1388
1389 iv = get_iv (dta->ivopts_data, *idx);
1390 if (!iv)
1391 return false;
1392
1393 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1394 *&x[0], which is not folded and does not trigger the
1395 ARRAY_REF path below. */
1396 *idx = iv->base;
1397
1398 if (integer_zerop (iv->step))
1399 return true;
1400
1401 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1402 {
1403 step = array_ref_element_size (base);
1404
1405 /* We only handle addresses whose step is an integer constant. */
1406 if (TREE_CODE (step) != INTEGER_CST)
1407 return false;
1408 }
1409 else
1410 /* The step for pointer arithmetics already is 1 byte. */
1411 step = build_int_cst (sizetype, 1);
1412
1413 iv_base = iv->base;
1414 iv_step = iv->step;
1415 if (!convert_affine_scev (dta->ivopts_data->current_loop,
1416 sizetype, &iv_base, &iv_step, dta->stmt,
1417 false))
1418 {
1419 /* The index might wrap. */
1420 return false;
1421 }
1422
1423 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1424 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1425
1426 return true;
1427 }
1428
1429 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1430 object is passed to it in DATA. */
1431
1432 static bool
1433 idx_record_use (tree base, tree *idx,
1434 void *vdata)
1435 {
1436 struct ivopts_data *data = (struct ivopts_data *) vdata;
1437 find_interesting_uses_op (data, *idx);
1438 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1439 {
1440 find_interesting_uses_op (data, array_ref_element_size (base));
1441 find_interesting_uses_op (data, array_ref_low_bound (base));
1442 }
1443 return true;
1444 }
1445
1446 /* If we can prove that TOP = cst * BOT for some constant cst,
1447 store cst to MUL and return true. Otherwise return false.
1448 The returned value is always sign-extended, regardless of the
1449 signedness of TOP and BOT. */
1450
1451 static bool
1452 constant_multiple_of (tree top, tree bot, double_int *mul)
1453 {
1454 tree mby;
1455 enum tree_code code;
1456 double_int res, p0, p1;
1457 unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1458
1459 STRIP_NOPS (top);
1460 STRIP_NOPS (bot);
1461
1462 if (operand_equal_p (top, bot, 0))
1463 {
1464 *mul = double_int_one;
1465 return true;
1466 }
1467
1468 code = TREE_CODE (top);
1469 switch (code)
1470 {
1471 case MULT_EXPR:
1472 mby = TREE_OPERAND (top, 1);
1473 if (TREE_CODE (mby) != INTEGER_CST)
1474 return false;
1475
1476 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1477 return false;
1478
1479 *mul = double_int_sext (double_int_mul (res, tree_to_double_int (mby)),
1480 precision);
1481 return true;
1482
1483 case PLUS_EXPR:
1484 case MINUS_EXPR:
1485 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1486 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1487 return false;
1488
1489 if (code == MINUS_EXPR)
1490 p1 = double_int_neg (p1);
1491 *mul = double_int_sext (double_int_add (p0, p1), precision);
1492 return true;
1493
1494 case INTEGER_CST:
1495 if (TREE_CODE (bot) != INTEGER_CST)
1496 return false;
1497
1498 p0 = double_int_sext (tree_to_double_int (top), precision);
1499 p1 = double_int_sext (tree_to_double_int (bot), precision);
1500 if (double_int_zero_p (p1))
1501 return false;
1502 *mul = double_int_sext (double_int_sdivmod (p0, p1, FLOOR_DIV_EXPR, &res),
1503 precision);
1504 return double_int_zero_p (res);
1505
1506 default:
1507 return false;
1508 }
1509 }
1510
1511 /* Returns true if memory reference REF with step STEP may be unaligned. */
1512
1513 static bool
1514 may_be_unaligned_p (tree ref, tree step)
1515 {
1516 tree base;
1517 tree base_type;
1518 HOST_WIDE_INT bitsize;
1519 HOST_WIDE_INT bitpos;
1520 tree toffset;
1521 enum machine_mode mode;
1522 int unsignedp, volatilep;
1523 unsigned base_align;
1524
1525 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1526 thus they are not misaligned. */
1527 if (TREE_CODE (ref) == TARGET_MEM_REF)
1528 return false;
1529
1530 /* The test below is basically copy of what expr.c:normal_inner_ref
1531 does to check whether the object must be loaded by parts when
1532 STRICT_ALIGNMENT is true. */
1533 base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1534 &unsignedp, &volatilep, true);
1535 base_type = TREE_TYPE (base);
1536 base_align = TYPE_ALIGN (base_type);
1537
1538 if (mode != BLKmode)
1539 {
1540 double_int mul;
1541 tree al = build_int_cst (TREE_TYPE (step),
1542 GET_MODE_ALIGNMENT (mode) / BITS_PER_UNIT);
1543
1544 if (base_align < GET_MODE_ALIGNMENT (mode)
1545 || bitpos % GET_MODE_ALIGNMENT (mode) != 0
1546 || bitpos % BITS_PER_UNIT != 0)
1547 return true;
1548
1549 if (!constant_multiple_of (step, al, &mul))
1550 return true;
1551 }
1552
1553 return false;
1554 }
1555
1556 /* Return true if EXPR may be non-addressable. */
1557
1558 static bool
1559 may_be_nonaddressable_p (tree expr)
1560 {
1561 switch (TREE_CODE (expr))
1562 {
1563 case TARGET_MEM_REF:
1564 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1565 target, thus they are always addressable. */
1566 return false;
1567
1568 case COMPONENT_REF:
1569 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1570 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1571
1572 case VIEW_CONVERT_EXPR:
1573 /* This kind of view-conversions may wrap non-addressable objects
1574 and make them look addressable. After some processing the
1575 non-addressability may be uncovered again, causing ADDR_EXPRs
1576 of inappropriate objects to be built. */
1577 if (is_gimple_reg (TREE_OPERAND (expr, 0))
1578 || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1579 return true;
1580
1581 /* ... fall through ... */
1582
1583 case ARRAY_REF:
1584 case ARRAY_RANGE_REF:
1585 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1586
1587 CASE_CONVERT:
1588 return true;
1589
1590 default:
1591 break;
1592 }
1593
1594 return false;
1595 }
1596
1597 /* Finds addresses in *OP_P inside STMT. */
1598
1599 static void
1600 find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1601 {
1602 tree base = *op_p, step = build_int_cst (sizetype, 0);
1603 struct iv *civ;
1604 struct ifs_ivopts_data ifs_ivopts_data;
1605
1606 /* Do not play with volatile memory references. A bit too conservative,
1607 perhaps, but safe. */
1608 if (gimple_has_volatile_ops (stmt))
1609 goto fail;
1610
1611 /* Ignore bitfields for now. Not really something terribly complicated
1612 to handle. TODO. */
1613 if (TREE_CODE (base) == BIT_FIELD_REF)
1614 goto fail;
1615
1616 base = unshare_expr (base);
1617
1618 if (TREE_CODE (base) == TARGET_MEM_REF)
1619 {
1620 tree type = build_pointer_type (TREE_TYPE (base));
1621 tree astep;
1622
1623 if (TMR_BASE (base)
1624 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1625 {
1626 civ = get_iv (data, TMR_BASE (base));
1627 if (!civ)
1628 goto fail;
1629
1630 TMR_BASE (base) = civ->base;
1631 step = civ->step;
1632 }
1633 if (TMR_INDEX (base)
1634 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1635 {
1636 civ = get_iv (data, TMR_INDEX (base));
1637 if (!civ)
1638 goto fail;
1639
1640 TMR_INDEX (base) = civ->base;
1641 astep = civ->step;
1642
1643 if (astep)
1644 {
1645 if (TMR_STEP (base))
1646 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1647
1648 step = fold_build2 (PLUS_EXPR, type, step, astep);
1649 }
1650 }
1651
1652 if (integer_zerop (step))
1653 goto fail;
1654 base = tree_mem_ref_addr (type, base);
1655 }
1656 else
1657 {
1658 ifs_ivopts_data.ivopts_data = data;
1659 ifs_ivopts_data.stmt = stmt;
1660 ifs_ivopts_data.step = build_int_cst (sizetype, 0);
1661 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1662 || integer_zerop (ifs_ivopts_data.step))
1663 goto fail;
1664 step = ifs_ivopts_data.step;
1665
1666 gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1667 gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1668
1669 /* Check that the base expression is addressable. This needs
1670 to be done after substituting bases of IVs into it. */
1671 if (may_be_nonaddressable_p (base))
1672 goto fail;
1673
1674 /* Moreover, on strict alignment platforms, check that it is
1675 sufficiently aligned. */
1676 if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
1677 goto fail;
1678
1679 base = build_fold_addr_expr (base);
1680
1681 /* Substituting bases of IVs into the base expression might
1682 have caused folding opportunities. */
1683 if (TREE_CODE (base) == ADDR_EXPR)
1684 {
1685 tree *ref = &TREE_OPERAND (base, 0);
1686 while (handled_component_p (*ref))
1687 ref = &TREE_OPERAND (*ref, 0);
1688 if (TREE_CODE (*ref) == INDIRECT_REF)
1689 *ref = fold_indirect_ref (*ref);
1690 }
1691 }
1692
1693 civ = alloc_iv (base, step);
1694 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1695 return;
1696
1697 fail:
1698 for_each_index (op_p, idx_record_use, data);
1699 }
1700
1701 /* Finds and records invariants used in STMT. */
1702
1703 static void
1704 find_invariants_stmt (struct ivopts_data *data, gimple stmt)
1705 {
1706 ssa_op_iter iter;
1707 use_operand_p use_p;
1708 tree op;
1709
1710 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1711 {
1712 op = USE_FROM_PTR (use_p);
1713 record_invariant (data, op, false);
1714 }
1715 }
1716
1717 /* Finds interesting uses of induction variables in the statement STMT. */
1718
1719 static void
1720 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
1721 {
1722 struct iv *iv;
1723 tree op, *lhs, *rhs;
1724 ssa_op_iter iter;
1725 use_operand_p use_p;
1726 enum tree_code code;
1727
1728 find_invariants_stmt (data, stmt);
1729
1730 if (gimple_code (stmt) == GIMPLE_COND)
1731 {
1732 find_interesting_uses_cond (data, stmt);
1733 return;
1734 }
1735
1736 if (is_gimple_assign (stmt))
1737 {
1738 lhs = gimple_assign_lhs_ptr (stmt);
1739 rhs = gimple_assign_rhs1_ptr (stmt);
1740
1741 if (TREE_CODE (*lhs) == SSA_NAME)
1742 {
1743 /* If the statement defines an induction variable, the uses are not
1744 interesting by themselves. */
1745
1746 iv = get_iv (data, *lhs);
1747
1748 if (iv && !integer_zerop (iv->step))
1749 return;
1750 }
1751
1752 code = gimple_assign_rhs_code (stmt);
1753 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
1754 && (REFERENCE_CLASS_P (*rhs)
1755 || is_gimple_val (*rhs)))
1756 {
1757 if (REFERENCE_CLASS_P (*rhs))
1758 find_interesting_uses_address (data, stmt, rhs);
1759 else
1760 find_interesting_uses_op (data, *rhs);
1761
1762 if (REFERENCE_CLASS_P (*lhs))
1763 find_interesting_uses_address (data, stmt, lhs);
1764 return;
1765 }
1766 else if (TREE_CODE_CLASS (code) == tcc_comparison)
1767 {
1768 find_interesting_uses_cond (data, stmt);
1769 return;
1770 }
1771
1772 /* TODO -- we should also handle address uses of type
1773
1774 memory = call (whatever);
1775
1776 and
1777
1778 call (memory). */
1779 }
1780
1781 if (gimple_code (stmt) == GIMPLE_PHI
1782 && gimple_bb (stmt) == data->current_loop->header)
1783 {
1784 iv = get_iv (data, PHI_RESULT (stmt));
1785
1786 if (iv && !integer_zerop (iv->step))
1787 return;
1788 }
1789
1790 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1791 {
1792 op = USE_FROM_PTR (use_p);
1793
1794 if (TREE_CODE (op) != SSA_NAME)
1795 continue;
1796
1797 iv = get_iv (data, op);
1798 if (!iv)
1799 continue;
1800
1801 find_interesting_uses_op (data, op);
1802 }
1803 }
1804
1805 /* Finds interesting uses of induction variables outside of loops
1806 on loop exit edge EXIT. */
1807
1808 static void
1809 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1810 {
1811 gimple phi;
1812 gimple_stmt_iterator psi;
1813 tree def;
1814
1815 for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
1816 {
1817 phi = gsi_stmt (psi);
1818 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1819 if (is_gimple_reg (def))
1820 find_interesting_uses_op (data, def);
1821 }
1822 }
1823
1824 /* Finds uses of the induction variables that are interesting. */
1825
1826 static void
1827 find_interesting_uses (struct ivopts_data *data)
1828 {
1829 basic_block bb;
1830 gimple_stmt_iterator bsi;
1831 basic_block *body = get_loop_body (data->current_loop);
1832 unsigned i;
1833 struct version_info *info;
1834 edge e;
1835
1836 if (dump_file && (dump_flags & TDF_DETAILS))
1837 fprintf (dump_file, "Uses:\n\n");
1838
1839 for (i = 0; i < data->current_loop->num_nodes; i++)
1840 {
1841 edge_iterator ei;
1842 bb = body[i];
1843
1844 FOR_EACH_EDGE (e, ei, bb->succs)
1845 if (e->dest != EXIT_BLOCK_PTR
1846 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1847 find_interesting_uses_outside (data, e);
1848
1849 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1850 find_interesting_uses_stmt (data, gsi_stmt (bsi));
1851 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1852 find_interesting_uses_stmt (data, gsi_stmt (bsi));
1853 }
1854
1855 if (dump_file && (dump_flags & TDF_DETAILS))
1856 {
1857 bitmap_iterator bi;
1858
1859 fprintf (dump_file, "\n");
1860
1861 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1862 {
1863 info = ver_info (data, i);
1864 if (info->inv_id)
1865 {
1866 fprintf (dump_file, " ");
1867 print_generic_expr (dump_file, info->name, TDF_SLIM);
1868 fprintf (dump_file, " is invariant (%d)%s\n",
1869 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1870 }
1871 }
1872
1873 fprintf (dump_file, "\n");
1874 }
1875
1876 free (body);
1877 }
1878
1879 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
1880 is true, assume we are inside an address. If TOP_COMPREF is true, assume
1881 we are at the top-level of the processed address. */
1882
1883 static tree
1884 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1885 unsigned HOST_WIDE_INT *offset)
1886 {
1887 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
1888 enum tree_code code;
1889 tree type, orig_type = TREE_TYPE (expr);
1890 unsigned HOST_WIDE_INT off0, off1, st;
1891 tree orig_expr = expr;
1892
1893 STRIP_NOPS (expr);
1894
1895 type = TREE_TYPE (expr);
1896 code = TREE_CODE (expr);
1897 *offset = 0;
1898
1899 switch (code)
1900 {
1901 case INTEGER_CST:
1902 if (!cst_and_fits_in_hwi (expr)
1903 || integer_zerop (expr))
1904 return orig_expr;
1905
1906 *offset = int_cst_value (expr);
1907 return build_int_cst (orig_type, 0);
1908
1909 case POINTER_PLUS_EXPR:
1910 case PLUS_EXPR:
1911 case MINUS_EXPR:
1912 op0 = TREE_OPERAND (expr, 0);
1913 op1 = TREE_OPERAND (expr, 1);
1914
1915 op0 = strip_offset_1 (op0, false, false, &off0);
1916 op1 = strip_offset_1 (op1, false, false, &off1);
1917
1918 *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
1919 if (op0 == TREE_OPERAND (expr, 0)
1920 && op1 == TREE_OPERAND (expr, 1))
1921 return orig_expr;
1922
1923 if (integer_zerop (op1))
1924 expr = op0;
1925 else if (integer_zerop (op0))
1926 {
1927 if (code == MINUS_EXPR)
1928 expr = fold_build1 (NEGATE_EXPR, type, op1);
1929 else
1930 expr = op1;
1931 }
1932 else
1933 expr = fold_build2 (code, type, op0, op1);
1934
1935 return fold_convert (orig_type, expr);
1936
1937 case MULT_EXPR:
1938 op1 = TREE_OPERAND (expr, 1);
1939 if (!cst_and_fits_in_hwi (op1))
1940 return orig_expr;
1941
1942 op0 = TREE_OPERAND (expr, 0);
1943 op0 = strip_offset_1 (op0, false, false, &off0);
1944 if (op0 == TREE_OPERAND (expr, 0))
1945 return orig_expr;
1946
1947 *offset = off0 * int_cst_value (op1);
1948 if (integer_zerop (op0))
1949 expr = op0;
1950 else
1951 expr = fold_build2 (MULT_EXPR, type, op0, op1);
1952
1953 return fold_convert (orig_type, expr);
1954
1955 case ARRAY_REF:
1956 case ARRAY_RANGE_REF:
1957 if (!inside_addr)
1958 return orig_expr;
1959
1960 step = array_ref_element_size (expr);
1961 if (!cst_and_fits_in_hwi (step))
1962 break;
1963
1964 st = int_cst_value (step);
1965 op1 = TREE_OPERAND (expr, 1);
1966 op1 = strip_offset_1 (op1, false, false, &off1);
1967 *offset = off1 * st;
1968
1969 if (top_compref
1970 && integer_zerop (op1))
1971 {
1972 /* Strip the component reference completely. */
1973 op0 = TREE_OPERAND (expr, 0);
1974 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1975 *offset += off0;
1976 return op0;
1977 }
1978 break;
1979
1980 case COMPONENT_REF:
1981 if (!inside_addr)
1982 return orig_expr;
1983
1984 tmp = component_ref_field_offset (expr);
1985 if (top_compref
1986 && cst_and_fits_in_hwi (tmp))
1987 {
1988 /* Strip the component reference completely. */
1989 op0 = TREE_OPERAND (expr, 0);
1990 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1991 *offset = off0 + int_cst_value (tmp);
1992 return op0;
1993 }
1994 break;
1995
1996 case ADDR_EXPR:
1997 op0 = TREE_OPERAND (expr, 0);
1998 op0 = strip_offset_1 (op0, true, true, &off0);
1999 *offset += off0;
2000
2001 if (op0 == TREE_OPERAND (expr, 0))
2002 return orig_expr;
2003
2004 expr = build_fold_addr_expr (op0);
2005 return fold_convert (orig_type, expr);
2006
2007 case INDIRECT_REF:
2008 inside_addr = false;
2009 break;
2010
2011 default:
2012 return orig_expr;
2013 }
2014
2015 /* Default handling of expressions for that we want to recurse into
2016 the first operand. */
2017 op0 = TREE_OPERAND (expr, 0);
2018 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2019 *offset += off0;
2020
2021 if (op0 == TREE_OPERAND (expr, 0)
2022 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2023 return orig_expr;
2024
2025 expr = copy_node (expr);
2026 TREE_OPERAND (expr, 0) = op0;
2027 if (op1)
2028 TREE_OPERAND (expr, 1) = op1;
2029
2030 /* Inside address, we might strip the top level component references,
2031 thus changing type of the expression. Handling of ADDR_EXPR
2032 will fix that. */
2033 expr = fold_convert (orig_type, expr);
2034
2035 return expr;
2036 }
2037
2038 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2039
2040 static tree
2041 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2042 {
2043 return strip_offset_1 (expr, false, false, offset);
2044 }
2045
2046 /* Returns variant of TYPE that can be used as base for different uses.
2047 We return unsigned type with the same precision, which avoids problems
2048 with overflows. */
2049
2050 static tree
2051 generic_type_for (tree type)
2052 {
2053 if (POINTER_TYPE_P (type))
2054 return unsigned_type_for (type);
2055
2056 if (TYPE_UNSIGNED (type))
2057 return type;
2058
2059 return unsigned_type_for (type);
2060 }
2061
2062 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2063 the bitmap to that we should store it. */
2064
2065 static struct ivopts_data *fd_ivopts_data;
2066 static tree
2067 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2068 {
2069 bitmap *depends_on = (bitmap *) data;
2070 struct version_info *info;
2071
2072 if (TREE_CODE (*expr_p) != SSA_NAME)
2073 return NULL_TREE;
2074 info = name_info (fd_ivopts_data, *expr_p);
2075
2076 if (!info->inv_id || info->has_nonlin_use)
2077 return NULL_TREE;
2078
2079 if (!*depends_on)
2080 *depends_on = BITMAP_ALLOC (NULL);
2081 bitmap_set_bit (*depends_on, info->inv_id);
2082
2083 return NULL_TREE;
2084 }
2085
2086 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2087 position to POS. If USE is not NULL, the candidate is set as related to
2088 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2089 replacement of the final value of the iv by a direct computation. */
2090
2091 static struct iv_cand *
2092 add_candidate_1 (struct ivopts_data *data,
2093 tree base, tree step, bool important, enum iv_position pos,
2094 struct iv_use *use, gimple incremented_at)
2095 {
2096 unsigned i;
2097 struct iv_cand *cand = NULL;
2098 tree type, orig_type;
2099
2100 if (base)
2101 {
2102 orig_type = TREE_TYPE (base);
2103 type = generic_type_for (orig_type);
2104 if (type != orig_type)
2105 {
2106 base = fold_convert (type, base);
2107 step = fold_convert (type, step);
2108 }
2109 }
2110
2111 for (i = 0; i < n_iv_cands (data); i++)
2112 {
2113 cand = iv_cand (data, i);
2114
2115 if (cand->pos != pos)
2116 continue;
2117
2118 if (cand->incremented_at != incremented_at
2119 || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2120 && cand->ainc_use != use))
2121 continue;
2122
2123 if (!cand->iv)
2124 {
2125 if (!base && !step)
2126 break;
2127
2128 continue;
2129 }
2130
2131 if (!base && !step)
2132 continue;
2133
2134 if (operand_equal_p (base, cand->iv->base, 0)
2135 && operand_equal_p (step, cand->iv->step, 0))
2136 break;
2137 }
2138
2139 if (i == n_iv_cands (data))
2140 {
2141 cand = XCNEW (struct iv_cand);
2142 cand->id = i;
2143
2144 if (!base && !step)
2145 cand->iv = NULL;
2146 else
2147 cand->iv = alloc_iv (base, step);
2148
2149 cand->pos = pos;
2150 if (pos != IP_ORIGINAL && cand->iv)
2151 {
2152 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2153 cand->var_after = cand->var_before;
2154 }
2155 cand->important = important;
2156 cand->incremented_at = incremented_at;
2157 VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
2158
2159 if (step
2160 && TREE_CODE (step) != INTEGER_CST)
2161 {
2162 fd_ivopts_data = data;
2163 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2164 }
2165
2166 if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2167 cand->ainc_use = use;
2168 else
2169 cand->ainc_use = NULL;
2170
2171 if (dump_file && (dump_flags & TDF_DETAILS))
2172 dump_cand (dump_file, cand);
2173 }
2174
2175 if (important && !cand->important)
2176 {
2177 cand->important = true;
2178 if (dump_file && (dump_flags & TDF_DETAILS))
2179 fprintf (dump_file, "Candidate %d is important\n", cand->id);
2180 }
2181
2182 if (use)
2183 {
2184 bitmap_set_bit (use->related_cands, i);
2185 if (dump_file && (dump_flags & TDF_DETAILS))
2186 fprintf (dump_file, "Candidate %d is related to use %d\n",
2187 cand->id, use->id);
2188 }
2189
2190 return cand;
2191 }
2192
2193 /* Returns true if incrementing the induction variable at the end of the LOOP
2194 is allowed.
2195
2196 The purpose is to avoid splitting latch edge with a biv increment, thus
2197 creating a jump, possibly confusing other optimization passes and leaving
2198 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2199 is not available (so we do not have a better alternative), or if the latch
2200 edge is already nonempty. */
2201
2202 static bool
2203 allow_ip_end_pos_p (struct loop *loop)
2204 {
2205 if (!ip_normal_pos (loop))
2206 return true;
2207
2208 if (!empty_block_p (ip_end_pos (loop)))
2209 return true;
2210
2211 return false;
2212 }
2213
2214 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2215 Important field is set to IMPORTANT. */
2216
2217 static void
2218 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2219 bool important, struct iv_use *use)
2220 {
2221 basic_block use_bb = gimple_bb (use->stmt);
2222 enum machine_mode mem_mode;
2223 unsigned HOST_WIDE_INT cstepi;
2224
2225 /* If we insert the increment in any position other than the standard
2226 ones, we must ensure that it is incremented once per iteration.
2227 It must not be in an inner nested loop, or one side of an if
2228 statement. */
2229 if (use_bb->loop_father != data->current_loop
2230 || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2231 || stmt_could_throw_p (use->stmt)
2232 || !cst_and_fits_in_hwi (step))
2233 return;
2234
2235 cstepi = int_cst_value (step);
2236
2237 mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2238 if ((HAVE_PRE_INCREMENT && GET_MODE_SIZE (mem_mode) == cstepi)
2239 || (HAVE_PRE_DECREMENT && GET_MODE_SIZE (mem_mode) == -cstepi))
2240 {
2241 enum tree_code code = MINUS_EXPR;
2242 tree new_base;
2243 tree new_step = step;
2244
2245 if (POINTER_TYPE_P (TREE_TYPE (base)))
2246 {
2247 new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2248 code = POINTER_PLUS_EXPR;
2249 }
2250 else
2251 new_step = fold_convert (TREE_TYPE (base), new_step);
2252 new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2253 add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2254 use->stmt);
2255 }
2256 if ((HAVE_POST_INCREMENT && GET_MODE_SIZE (mem_mode) == cstepi)
2257 || (HAVE_POST_DECREMENT && GET_MODE_SIZE (mem_mode) == -cstepi))
2258 {
2259 add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2260 use->stmt);
2261 }
2262 }
2263
2264 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2265 position to POS. If USE is not NULL, the candidate is set as related to
2266 it. The candidate computation is scheduled on all available positions. */
2267
2268 static void
2269 add_candidate (struct ivopts_data *data,
2270 tree base, tree step, bool important, struct iv_use *use)
2271 {
2272 if (ip_normal_pos (data->current_loop))
2273 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2274 if (ip_end_pos (data->current_loop)
2275 && allow_ip_end_pos_p (data->current_loop))
2276 add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2277
2278 if (use != NULL && use->type == USE_ADDRESS)
2279 add_autoinc_candidates (data, base, step, important, use);
2280 }
2281
2282 /* Add a standard "0 + 1 * iteration" iv candidate for a
2283 type with SIZE bits. */
2284
2285 static void
2286 add_standard_iv_candidates_for_size (struct ivopts_data *data,
2287 unsigned int size)
2288 {
2289 tree type = lang_hooks.types.type_for_size (size, true);
2290 add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2291 true, NULL);
2292 }
2293
2294 /* Adds standard iv candidates. */
2295
2296 static void
2297 add_standard_iv_candidates (struct ivopts_data *data)
2298 {
2299 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2300
2301 /* The same for a double-integer type if it is still fast enough. */
2302 if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2303 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2304 }
2305
2306
2307 /* Adds candidates bases on the old induction variable IV. */
2308
2309 static void
2310 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2311 {
2312 gimple phi;
2313 tree def;
2314 struct iv_cand *cand;
2315
2316 add_candidate (data, iv->base, iv->step, true, NULL);
2317
2318 /* The same, but with initial value zero. */
2319 if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
2320 add_candidate (data, size_int (0), iv->step, true, NULL);
2321 else
2322 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2323 iv->step, true, NULL);
2324
2325 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2326 if (gimple_code (phi) == GIMPLE_PHI)
2327 {
2328 /* Additionally record the possibility of leaving the original iv
2329 untouched. */
2330 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2331 cand = add_candidate_1 (data,
2332 iv->base, iv->step, true, IP_ORIGINAL, NULL,
2333 SSA_NAME_DEF_STMT (def));
2334 cand->var_before = iv->ssa_name;
2335 cand->var_after = def;
2336 }
2337 }
2338
2339 /* Adds candidates based on the old induction variables. */
2340
2341 static void
2342 add_old_ivs_candidates (struct ivopts_data *data)
2343 {
2344 unsigned i;
2345 struct iv *iv;
2346 bitmap_iterator bi;
2347
2348 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2349 {
2350 iv = ver_info (data, i)->iv;
2351 if (iv && iv->biv_p && !integer_zerop (iv->step))
2352 add_old_iv_candidates (data, iv);
2353 }
2354 }
2355
2356 /* Adds candidates based on the value of the induction variable IV and USE. */
2357
2358 static void
2359 add_iv_value_candidates (struct ivopts_data *data,
2360 struct iv *iv, struct iv_use *use)
2361 {
2362 unsigned HOST_WIDE_INT offset;
2363 tree base;
2364 tree basetype;
2365
2366 add_candidate (data, iv->base, iv->step, false, use);
2367
2368 /* The same, but with initial value zero. Make such variable important,
2369 since it is generic enough so that possibly many uses may be based
2370 on it. */
2371 basetype = TREE_TYPE (iv->base);
2372 if (POINTER_TYPE_P (basetype))
2373 basetype = sizetype;
2374 add_candidate (data, build_int_cst (basetype, 0),
2375 iv->step, true, use);
2376
2377 /* Third, try removing the constant offset. Make sure to even
2378 add a candidate for &a[0] vs. (T *)&a. */
2379 base = strip_offset (iv->base, &offset);
2380 if (offset
2381 || base != iv->base)
2382 add_candidate (data, base, iv->step, false, use);
2383 }
2384
2385 /* Adds candidates based on the uses. */
2386
2387 static void
2388 add_derived_ivs_candidates (struct ivopts_data *data)
2389 {
2390 unsigned i;
2391
2392 for (i = 0; i < n_iv_uses (data); i++)
2393 {
2394 struct iv_use *use = iv_use (data, i);
2395
2396 if (!use)
2397 continue;
2398
2399 switch (use->type)
2400 {
2401 case USE_NONLINEAR_EXPR:
2402 case USE_COMPARE:
2403 case USE_ADDRESS:
2404 /* Just add the ivs based on the value of the iv used here. */
2405 add_iv_value_candidates (data, use->iv, use);
2406 break;
2407
2408 default:
2409 gcc_unreachable ();
2410 }
2411 }
2412 }
2413
2414 /* Record important candidates and add them to related_cands bitmaps
2415 if needed. */
2416
2417 static void
2418 record_important_candidates (struct ivopts_data *data)
2419 {
2420 unsigned i;
2421 struct iv_use *use;
2422
2423 for (i = 0; i < n_iv_cands (data); i++)
2424 {
2425 struct iv_cand *cand = iv_cand (data, i);
2426
2427 if (cand->important)
2428 bitmap_set_bit (data->important_candidates, i);
2429 }
2430
2431 data->consider_all_candidates = (n_iv_cands (data)
2432 <= CONSIDER_ALL_CANDIDATES_BOUND);
2433
2434 if (data->consider_all_candidates)
2435 {
2436 /* We will not need "related_cands" bitmaps in this case,
2437 so release them to decrease peak memory consumption. */
2438 for (i = 0; i < n_iv_uses (data); i++)
2439 {
2440 use = iv_use (data, i);
2441 BITMAP_FREE (use->related_cands);
2442 }
2443 }
2444 else
2445 {
2446 /* Add important candidates to the related_cands bitmaps. */
2447 for (i = 0; i < n_iv_uses (data); i++)
2448 bitmap_ior_into (iv_use (data, i)->related_cands,
2449 data->important_candidates);
2450 }
2451 }
2452
2453 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2454 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2455 we allocate a simple list to every use. */
2456
2457 static void
2458 alloc_use_cost_map (struct ivopts_data *data)
2459 {
2460 unsigned i, size, s, j;
2461
2462 for (i = 0; i < n_iv_uses (data); i++)
2463 {
2464 struct iv_use *use = iv_use (data, i);
2465 bitmap_iterator bi;
2466
2467 if (data->consider_all_candidates)
2468 size = n_iv_cands (data);
2469 else
2470 {
2471 s = 0;
2472 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2473 {
2474 s++;
2475 }
2476
2477 /* Round up to the power of two, so that moduling by it is fast. */
2478 for (size = 1; size < s; size <<= 1)
2479 continue;
2480 }
2481
2482 use->n_map_members = size;
2483 use->cost_map = XCNEWVEC (struct cost_pair, size);
2484 }
2485 }
2486
2487 /* Returns description of computation cost of expression whose runtime
2488 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
2489
2490 static comp_cost
2491 new_cost (unsigned runtime, unsigned complexity)
2492 {
2493 comp_cost cost;
2494
2495 cost.cost = runtime;
2496 cost.complexity = complexity;
2497
2498 return cost;
2499 }
2500
2501 /* Adds costs COST1 and COST2. */
2502
2503 static comp_cost
2504 add_costs (comp_cost cost1, comp_cost cost2)
2505 {
2506 cost1.cost += cost2.cost;
2507 cost1.complexity += cost2.complexity;
2508
2509 return cost1;
2510 }
2511 /* Subtracts costs COST1 and COST2. */
2512
2513 static comp_cost
2514 sub_costs (comp_cost cost1, comp_cost cost2)
2515 {
2516 cost1.cost -= cost2.cost;
2517 cost1.complexity -= cost2.complexity;
2518
2519 return cost1;
2520 }
2521
2522 /* Returns a negative number if COST1 < COST2, a positive number if
2523 COST1 > COST2, and 0 if COST1 = COST2. */
2524
2525 static int
2526 compare_costs (comp_cost cost1, comp_cost cost2)
2527 {
2528 if (cost1.cost == cost2.cost)
2529 return cost1.complexity - cost2.complexity;
2530
2531 return cost1.cost - cost2.cost;
2532 }
2533
2534 /* Returns true if COST is infinite. */
2535
2536 static bool
2537 infinite_cost_p (comp_cost cost)
2538 {
2539 return cost.cost == INFTY;
2540 }
2541
2542 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2543 on invariants DEPENDS_ON and that the value used in expressing it
2544 is VALUE. */
2545
2546 static void
2547 set_use_iv_cost (struct ivopts_data *data,
2548 struct iv_use *use, struct iv_cand *cand,
2549 comp_cost cost, bitmap depends_on, tree value)
2550 {
2551 unsigned i, s;
2552
2553 if (infinite_cost_p (cost))
2554 {
2555 BITMAP_FREE (depends_on);
2556 return;
2557 }
2558
2559 if (data->consider_all_candidates)
2560 {
2561 use->cost_map[cand->id].cand = cand;
2562 use->cost_map[cand->id].cost = cost;
2563 use->cost_map[cand->id].depends_on = depends_on;
2564 use->cost_map[cand->id].value = value;
2565 return;
2566 }
2567
2568 /* n_map_members is a power of two, so this computes modulo. */
2569 s = cand->id & (use->n_map_members - 1);
2570 for (i = s; i < use->n_map_members; i++)
2571 if (!use->cost_map[i].cand)
2572 goto found;
2573 for (i = 0; i < s; i++)
2574 if (!use->cost_map[i].cand)
2575 goto found;
2576
2577 gcc_unreachable ();
2578
2579 found:
2580 use->cost_map[i].cand = cand;
2581 use->cost_map[i].cost = cost;
2582 use->cost_map[i].depends_on = depends_on;
2583 use->cost_map[i].value = value;
2584 }
2585
2586 /* Gets cost of (USE, CANDIDATE) pair. */
2587
2588 static struct cost_pair *
2589 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2590 struct iv_cand *cand)
2591 {
2592 unsigned i, s;
2593 struct cost_pair *ret;
2594
2595 if (!cand)
2596 return NULL;
2597
2598 if (data->consider_all_candidates)
2599 {
2600 ret = use->cost_map + cand->id;
2601 if (!ret->cand)
2602 return NULL;
2603
2604 return ret;
2605 }
2606
2607 /* n_map_members is a power of two, so this computes modulo. */
2608 s = cand->id & (use->n_map_members - 1);
2609 for (i = s; i < use->n_map_members; i++)
2610 if (use->cost_map[i].cand == cand)
2611 return use->cost_map + i;
2612
2613 for (i = 0; i < s; i++)
2614 if (use->cost_map[i].cand == cand)
2615 return use->cost_map + i;
2616
2617 return NULL;
2618 }
2619
2620 /* Returns estimate on cost of computing SEQ. */
2621
2622 static unsigned
2623 seq_cost (rtx seq, bool speed)
2624 {
2625 unsigned cost = 0;
2626 rtx set;
2627
2628 for (; seq; seq = NEXT_INSN (seq))
2629 {
2630 set = single_set (seq);
2631 if (set)
2632 cost += rtx_cost (set, SET,speed);
2633 else
2634 cost++;
2635 }
2636
2637 return cost;
2638 }
2639
2640 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2641 static rtx
2642 produce_memory_decl_rtl (tree obj, int *regno)
2643 {
2644 rtx x;
2645
2646 gcc_assert (obj);
2647 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2648 {
2649 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2650 x = gen_rtx_SYMBOL_REF (Pmode, name);
2651 SET_SYMBOL_REF_DECL (x, obj);
2652 x = gen_rtx_MEM (DECL_MODE (obj), x);
2653 targetm.encode_section_info (obj, x, true);
2654 }
2655 else
2656 {
2657 x = gen_raw_REG (Pmode, (*regno)++);
2658 x = gen_rtx_MEM (DECL_MODE (obj), x);
2659 }
2660
2661 return x;
2662 }
2663
2664 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2665 walk_tree. DATA contains the actual fake register number. */
2666
2667 static tree
2668 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2669 {
2670 tree obj = NULL_TREE;
2671 rtx x = NULL_RTX;
2672 int *regno = (int *) data;
2673
2674 switch (TREE_CODE (*expr_p))
2675 {
2676 case ADDR_EXPR:
2677 for (expr_p = &TREE_OPERAND (*expr_p, 0);
2678 handled_component_p (*expr_p);
2679 expr_p = &TREE_OPERAND (*expr_p, 0))
2680 continue;
2681 obj = *expr_p;
2682 if (DECL_P (obj) && !DECL_RTL_SET_P (obj))
2683 x = produce_memory_decl_rtl (obj, regno);
2684 break;
2685
2686 case SSA_NAME:
2687 *ws = 0;
2688 obj = SSA_NAME_VAR (*expr_p);
2689 if (!DECL_RTL_SET_P (obj))
2690 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2691 break;
2692
2693 case VAR_DECL:
2694 case PARM_DECL:
2695 case RESULT_DECL:
2696 *ws = 0;
2697 obj = *expr_p;
2698
2699 if (DECL_RTL_SET_P (obj))
2700 break;
2701
2702 if (DECL_MODE (obj) == BLKmode)
2703 x = produce_memory_decl_rtl (obj, regno);
2704 else
2705 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2706
2707 break;
2708
2709 default:
2710 break;
2711 }
2712
2713 if (x)
2714 {
2715 VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2716 SET_DECL_RTL (obj, x);
2717 }
2718
2719 return NULL_TREE;
2720 }
2721
2722 /* Determines cost of the computation of EXPR. */
2723
2724 static unsigned
2725 computation_cost (tree expr, bool speed)
2726 {
2727 rtx seq, rslt;
2728 tree type = TREE_TYPE (expr);
2729 unsigned cost;
2730 /* Avoid using hard regs in ways which may be unsupported. */
2731 int regno = LAST_VIRTUAL_REGISTER + 1;
2732 enum function_frequency real_frequency = cfun->function_frequency;
2733
2734 cfun->function_frequency = FUNCTION_FREQUENCY_NORMAL;
2735 crtl->maybe_hot_insn_p = speed;
2736 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2737 start_sequence ();
2738 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2739 seq = get_insns ();
2740 end_sequence ();
2741 default_rtl_profile ();
2742 cfun->function_frequency = real_frequency;
2743
2744 cost = seq_cost (seq, speed);
2745 if (MEM_P (rslt))
2746 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type), speed);
2747
2748 return cost;
2749 }
2750
2751 /* Returns variable containing the value of candidate CAND at statement AT. */
2752
2753 static tree
2754 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
2755 {
2756 if (stmt_after_increment (loop, cand, stmt))
2757 return cand->var_after;
2758 else
2759 return cand->var_before;
2760 }
2761
2762 /* Return the most significant (sign) bit of T. Similar to tree_int_cst_msb,
2763 but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE. */
2764
2765 int
2766 tree_int_cst_sign_bit (const_tree t)
2767 {
2768 unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
2769 unsigned HOST_WIDE_INT w;
2770
2771 if (bitno < HOST_BITS_PER_WIDE_INT)
2772 w = TREE_INT_CST_LOW (t);
2773 else
2774 {
2775 w = TREE_INT_CST_HIGH (t);
2776 bitno -= HOST_BITS_PER_WIDE_INT;
2777 }
2778
2779 return (w >> bitno) & 1;
2780 }
2781
2782 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2783 same precision that is at least as wide as the precision of TYPE, stores
2784 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
2785 type of A and B. */
2786
2787 static tree
2788 determine_common_wider_type (tree *a, tree *b)
2789 {
2790 tree wider_type = NULL;
2791 tree suba, subb;
2792 tree atype = TREE_TYPE (*a);
2793
2794 if (CONVERT_EXPR_P (*a))
2795 {
2796 suba = TREE_OPERAND (*a, 0);
2797 wider_type = TREE_TYPE (suba);
2798 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
2799 return atype;
2800 }
2801 else
2802 return atype;
2803
2804 if (CONVERT_EXPR_P (*b))
2805 {
2806 subb = TREE_OPERAND (*b, 0);
2807 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
2808 return atype;
2809 }
2810 else
2811 return atype;
2812
2813 *a = suba;
2814 *b = subb;
2815 return wider_type;
2816 }
2817
2818 /* Determines the expression by that USE is expressed from induction variable
2819 CAND at statement AT in LOOP. The expression is stored in a decomposed
2820 form into AFF. Returns false if USE cannot be expressed using CAND. */
2821
2822 static bool
2823 get_computation_aff (struct loop *loop,
2824 struct iv_use *use, struct iv_cand *cand, gimple at,
2825 struct affine_tree_combination *aff)
2826 {
2827 tree ubase = use->iv->base;
2828 tree ustep = use->iv->step;
2829 tree cbase = cand->iv->base;
2830 tree cstep = cand->iv->step, cstep_common;
2831 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2832 tree common_type, var;
2833 tree uutype;
2834 aff_tree cbase_aff, var_aff;
2835 double_int rat;
2836
2837 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2838 {
2839 /* We do not have a precision to express the values of use. */
2840 return false;
2841 }
2842
2843 var = var_at_stmt (loop, cand, at);
2844 uutype = unsigned_type_for (utype);
2845
2846 /* If the conversion is not noop, perform it. */
2847 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
2848 {
2849 cstep = fold_convert (uutype, cstep);
2850 cbase = fold_convert (uutype, cbase);
2851 var = fold_convert (uutype, var);
2852 }
2853
2854 if (!constant_multiple_of (ustep, cstep, &rat))
2855 return false;
2856
2857 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
2858 type, we achieve better folding by computing their difference in this
2859 wider type, and cast the result to UUTYPE. We do not need to worry about
2860 overflows, as all the arithmetics will in the end be performed in UUTYPE
2861 anyway. */
2862 common_type = determine_common_wider_type (&ubase, &cbase);
2863
2864 /* use = ubase - ratio * cbase + ratio * var. */
2865 tree_to_aff_combination (ubase, common_type, aff);
2866 tree_to_aff_combination (cbase, common_type, &cbase_aff);
2867 tree_to_aff_combination (var, uutype, &var_aff);
2868
2869 /* We need to shift the value if we are after the increment. */
2870 if (stmt_after_increment (loop, cand, at))
2871 {
2872 aff_tree cstep_aff;
2873
2874 if (common_type != uutype)
2875 cstep_common = fold_convert (common_type, cstep);
2876 else
2877 cstep_common = cstep;
2878
2879 tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
2880 aff_combination_add (&cbase_aff, &cstep_aff);
2881 }
2882
2883 aff_combination_scale (&cbase_aff, double_int_neg (rat));
2884 aff_combination_add (aff, &cbase_aff);
2885 if (common_type != uutype)
2886 aff_combination_convert (aff, uutype);
2887
2888 aff_combination_scale (&var_aff, rat);
2889 aff_combination_add (aff, &var_aff);
2890
2891 return true;
2892 }
2893
2894 /* Determines the expression by that USE is expressed from induction variable
2895 CAND at statement AT in LOOP. The computation is unshared. */
2896
2897 static tree
2898 get_computation_at (struct loop *loop,
2899 struct iv_use *use, struct iv_cand *cand, gimple at)
2900 {
2901 aff_tree aff;
2902 tree type = TREE_TYPE (use->iv->base);
2903
2904 if (!get_computation_aff (loop, use, cand, at, &aff))
2905 return NULL_TREE;
2906 unshare_aff_combination (&aff);
2907 return fold_convert (type, aff_combination_to_tree (&aff));
2908 }
2909
2910 /* Determines the expression by that USE is expressed from induction variable
2911 CAND in LOOP. The computation is unshared. */
2912
2913 static tree
2914 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
2915 {
2916 return get_computation_at (loop, use, cand, use->stmt);
2917 }
2918
2919 /* Returns cost of addition in MODE. */
2920
2921 static unsigned
2922 add_cost (enum machine_mode mode, bool speed)
2923 {
2924 static unsigned costs[NUM_MACHINE_MODES];
2925 rtx seq;
2926 unsigned cost;
2927
2928 if (costs[mode])
2929 return costs[mode];
2930
2931 start_sequence ();
2932 force_operand (gen_rtx_fmt_ee (PLUS, mode,
2933 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
2934 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)),
2935 NULL_RTX);
2936 seq = get_insns ();
2937 end_sequence ();
2938
2939 cost = seq_cost (seq, speed);
2940 if (!cost)
2941 cost = 1;
2942
2943 costs[mode] = cost;
2944
2945 if (dump_file && (dump_flags & TDF_DETAILS))
2946 fprintf (dump_file, "Addition in %s costs %d\n",
2947 GET_MODE_NAME (mode), cost);
2948 return cost;
2949 }
2950
2951 /* Entry in a hashtable of already known costs for multiplication. */
2952 struct mbc_entry
2953 {
2954 HOST_WIDE_INT cst; /* The constant to multiply by. */
2955 enum machine_mode mode; /* In mode. */
2956 unsigned cost; /* The cost. */
2957 };
2958
2959 /* Counts hash value for the ENTRY. */
2960
2961 static hashval_t
2962 mbc_entry_hash (const void *entry)
2963 {
2964 const struct mbc_entry *e = (const struct mbc_entry *) entry;
2965
2966 return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
2967 }
2968
2969 /* Compares the hash table entries ENTRY1 and ENTRY2. */
2970
2971 static int
2972 mbc_entry_eq (const void *entry1, const void *entry2)
2973 {
2974 const struct mbc_entry *e1 = (const struct mbc_entry *) entry1;
2975 const struct mbc_entry *e2 = (const struct mbc_entry *) entry2;
2976
2977 return (e1->mode == e2->mode
2978 && e1->cst == e2->cst);
2979 }
2980
2981 /* Returns cost of multiplication by constant CST in MODE. */
2982
2983 unsigned
2984 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode, bool speed)
2985 {
2986 static htab_t costs;
2987 struct mbc_entry **cached, act;
2988 rtx seq;
2989 unsigned cost;
2990
2991 if (!costs)
2992 costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
2993
2994 act.mode = mode;
2995 act.cst = cst;
2996 cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
2997 if (*cached)
2998 return (*cached)->cost;
2999
3000 *cached = XNEW (struct mbc_entry);
3001 (*cached)->mode = mode;
3002 (*cached)->cst = cst;
3003
3004 start_sequence ();
3005 expand_mult (mode, gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3006 gen_int_mode (cst, mode), NULL_RTX, 0);
3007 seq = get_insns ();
3008 end_sequence ();
3009
3010 cost = seq_cost (seq, speed);
3011
3012 if (dump_file && (dump_flags & TDF_DETAILS))
3013 fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
3014 (int) cst, GET_MODE_NAME (mode), cost);
3015
3016 (*cached)->cost = cost;
3017
3018 return cost;
3019 }
3020
3021 /* Returns true if multiplying by RATIO is allowed in an address. Test the
3022 validity for a memory reference accessing memory of mode MODE. */
3023
3024 bool
3025 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode)
3026 {
3027 #define MAX_RATIO 128
3028 static sbitmap valid_mult[MAX_MACHINE_MODE];
3029
3030 if (!valid_mult[mode])
3031 {
3032 rtx reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3033 rtx addr;
3034 HOST_WIDE_INT i;
3035
3036 valid_mult[mode] = sbitmap_alloc (2 * MAX_RATIO + 1);
3037 sbitmap_zero (valid_mult[mode]);
3038 addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX);
3039 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3040 {
3041 XEXP (addr, 1) = gen_int_mode (i, Pmode);
3042 if (memory_address_p (mode, addr))
3043 SET_BIT (valid_mult[mode], i + MAX_RATIO);
3044 }
3045
3046 if (dump_file && (dump_flags & TDF_DETAILS))
3047 {
3048 fprintf (dump_file, " allowed multipliers:");
3049 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3050 if (TEST_BIT (valid_mult[mode], i + MAX_RATIO))
3051 fprintf (dump_file, " %d", (int) i);
3052 fprintf (dump_file, "\n");
3053 fprintf (dump_file, "\n");
3054 }
3055 }
3056
3057 if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3058 return false;
3059
3060 return TEST_BIT (valid_mult[mode], ratio + MAX_RATIO);
3061 }
3062
3063 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3064 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3065 variable is omitted. Compute the cost for a memory reference that accesses
3066 a memory location of mode MEM_MODE.
3067
3068 MAY_AUTOINC is set to true if the autoincrement (increasing index by
3069 size of MEM_MODE / RATIO) is available. To make this determination, we
3070 look at the size of the increment to be made, which is given in CSTEP.
3071 CSTEP may be zero if the step is unknown.
3072 STMT_AFTER_INC is true iff the statement we're looking at is after the
3073 increment of the original biv.
3074
3075 TODO -- there must be some better way. This all is quite crude. */
3076
3077 static comp_cost
3078 get_address_cost (bool symbol_present, bool var_present,
3079 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3080 HOST_WIDE_INT cstep, enum machine_mode mem_mode, bool speed,
3081 bool stmt_after_inc, bool *may_autoinc)
3082 {
3083 static bool initialized[MAX_MACHINE_MODE];
3084 static HOST_WIDE_INT rat[MAX_MACHINE_MODE], off[MAX_MACHINE_MODE];
3085 static HOST_WIDE_INT min_offset[MAX_MACHINE_MODE], max_offset[MAX_MACHINE_MODE];
3086 static unsigned costs[MAX_MACHINE_MODE][2][2][2][2];
3087 static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3088 static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3089 unsigned cost, acost, complexity;
3090 bool offset_p, ratio_p, autoinc;
3091 HOST_WIDE_INT s_offset, autoinc_offset, msize;
3092 unsigned HOST_WIDE_INT mask;
3093 unsigned bits;
3094
3095 if (!initialized[mem_mode])
3096 {
3097 HOST_WIDE_INT i;
3098 HOST_WIDE_INT start = BIGGEST_ALIGNMENT / BITS_PER_UNIT;
3099 int old_cse_not_expected;
3100 unsigned sym_p, var_p, off_p, rat_p, add_c;
3101 rtx seq, addr, base;
3102 rtx reg0, reg1;
3103
3104 initialized[mem_mode] = true;
3105
3106 reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3107
3108 addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX);
3109 for (i = start; i <= 1 << 20; i <<= 1)
3110 {
3111 XEXP (addr, 1) = gen_int_mode (i, Pmode);
3112 if (!memory_address_p (mem_mode, addr))
3113 break;
3114 }
3115 max_offset[mem_mode] = i == start ? 0 : i >> 1;
3116 off[mem_mode] = max_offset[mem_mode];
3117
3118 for (i = start; i <= 1 << 20; i <<= 1)
3119 {
3120 XEXP (addr, 1) = gen_int_mode (-i, Pmode);
3121 if (!memory_address_p (mem_mode, addr))
3122 break;
3123 }
3124 min_offset[mem_mode] = i == start ? 0 : -(i >> 1);
3125
3126 if (dump_file && (dump_flags & TDF_DETAILS))
3127 {
3128 fprintf (dump_file, "get_address_cost:\n");
3129 fprintf (dump_file, " min offset %s %d\n",
3130 GET_MODE_NAME (mem_mode),
3131 (int) min_offset[mem_mode]);
3132 fprintf (dump_file, " max offset %s %d\n",
3133 GET_MODE_NAME (mem_mode),
3134 (int) max_offset[mem_mode]);
3135 }
3136
3137 rat[mem_mode] = 1;
3138 for (i = 2; i <= MAX_RATIO; i++)
3139 if (multiplier_allowed_in_address_p (i, mem_mode))
3140 {
3141 rat[mem_mode] = i;
3142 break;
3143 }
3144
3145 /* Compute the cost of various addressing modes. */
3146 acost = 0;
3147 reg0 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3148 reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 2);
3149
3150 if (HAVE_PRE_DECREMENT)
3151 {
3152 addr = gen_rtx_PRE_DEC (Pmode, reg0);
3153 has_predec[mem_mode] = memory_address_p (mem_mode, addr);
3154 }
3155 if (HAVE_POST_DECREMENT)
3156 {
3157 addr = gen_rtx_POST_DEC (Pmode, reg0);
3158 has_postdec[mem_mode] = memory_address_p (mem_mode, addr);
3159 }
3160 if (HAVE_PRE_INCREMENT)
3161 {
3162 addr = gen_rtx_PRE_INC (Pmode, reg0);
3163 has_preinc[mem_mode] = memory_address_p (mem_mode, addr);
3164 }
3165 if (HAVE_POST_INCREMENT)
3166 {
3167 addr = gen_rtx_POST_INC (Pmode, reg0);
3168 has_postinc[mem_mode] = memory_address_p (mem_mode, addr);
3169 }
3170 for (i = 0; i < 16; i++)
3171 {
3172 sym_p = i & 1;
3173 var_p = (i >> 1) & 1;
3174 off_p = (i >> 2) & 1;
3175 rat_p = (i >> 3) & 1;
3176
3177 addr = reg0;
3178 if (rat_p)
3179 addr = gen_rtx_fmt_ee (MULT, Pmode, addr,
3180 gen_int_mode (rat[mem_mode], Pmode));
3181
3182 if (var_p)
3183 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, reg1);
3184
3185 if (sym_p)
3186 {
3187 base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
3188 /* ??? We can run into trouble with some backends by presenting
3189 it with symbols which haven't been properly passed through
3190 targetm.encode_section_info. By setting the local bit, we
3191 enhance the probability of things working. */
3192 SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3193
3194 if (off_p)
3195 base = gen_rtx_fmt_e (CONST, Pmode,
3196 gen_rtx_fmt_ee (PLUS, Pmode,
3197 base,
3198 gen_int_mode (off[mem_mode],
3199 Pmode)));
3200 }
3201 else if (off_p)
3202 base = gen_int_mode (off[mem_mode], Pmode);
3203 else
3204 base = NULL_RTX;
3205
3206 if (base)
3207 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, base);
3208
3209 start_sequence ();
3210 /* To avoid splitting addressing modes, pretend that no cse will
3211 follow. */
3212 old_cse_not_expected = cse_not_expected;
3213 cse_not_expected = true;
3214 addr = memory_address (mem_mode, addr);
3215 cse_not_expected = old_cse_not_expected;
3216 seq = get_insns ();
3217 end_sequence ();
3218
3219 acost = seq_cost (seq, speed);
3220 acost += address_cost (addr, mem_mode, speed);
3221
3222 if (!acost)
3223 acost = 1;
3224 costs[mem_mode][sym_p][var_p][off_p][rat_p] = acost;
3225 }
3226
3227 /* On some targets, it is quite expensive to load symbol to a register,
3228 which makes addresses that contain symbols look much more expensive.
3229 However, the symbol will have to be loaded in any case before the
3230 loop (and quite likely we have it in register already), so it does not
3231 make much sense to penalize them too heavily. So make some final
3232 tweaks for the SYMBOL_PRESENT modes:
3233
3234 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3235 var is cheaper, use this mode with small penalty.
3236 If VAR_PRESENT is true, try whether the mode with
3237 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3238 if this is the case, use it. */
3239 add_c = add_cost (Pmode, speed);
3240 for (i = 0; i < 8; i++)
3241 {
3242 var_p = i & 1;
3243 off_p = (i >> 1) & 1;
3244 rat_p = (i >> 2) & 1;
3245
3246 acost = costs[mem_mode][0][1][off_p][rat_p] + 1;
3247 if (var_p)
3248 acost += add_c;
3249
3250 if (acost < costs[mem_mode][1][var_p][off_p][rat_p])
3251 costs[mem_mode][1][var_p][off_p][rat_p] = acost;
3252 }
3253
3254 if (dump_file && (dump_flags & TDF_DETAILS))
3255 {
3256 fprintf (dump_file, "Address costs:\n");
3257
3258 for (i = 0; i < 16; i++)
3259 {
3260 sym_p = i & 1;
3261 var_p = (i >> 1) & 1;
3262 off_p = (i >> 2) & 1;
3263 rat_p = (i >> 3) & 1;
3264
3265 fprintf (dump_file, " ");
3266 if (sym_p)
3267 fprintf (dump_file, "sym + ");
3268 if (var_p)
3269 fprintf (dump_file, "var + ");
3270 if (off_p)
3271 fprintf (dump_file, "cst + ");
3272 if (rat_p)
3273 fprintf (dump_file, "rat * ");
3274
3275 acost = costs[mem_mode][sym_p][var_p][off_p][rat_p];
3276 fprintf (dump_file, "index costs %d\n", acost);
3277 }
3278 if (has_predec[mem_mode] || has_postdec[mem_mode]
3279 || has_preinc[mem_mode] || has_postinc[mem_mode])
3280 fprintf (dump_file, " May include autoinc/dec\n");
3281 fprintf (dump_file, "\n");
3282 }
3283 }
3284
3285 bits = GET_MODE_BITSIZE (Pmode);
3286 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3287 offset &= mask;
3288 if ((offset >> (bits - 1) & 1))
3289 offset |= ~mask;
3290 s_offset = offset;
3291
3292 autoinc = false;
3293 msize = GET_MODE_SIZE (mem_mode);
3294 autoinc_offset = offset;
3295 if (stmt_after_inc)
3296 autoinc_offset += ratio * cstep;
3297 if (symbol_present || var_present || ratio != 1)
3298 autoinc = false;
3299 else if ((has_postinc[mem_mode] && autoinc_offset == 0
3300 && msize == cstep)
3301 || (has_postdec[mem_mode] && autoinc_offset == 0
3302 && msize == -cstep)
3303 || (has_preinc[mem_mode] && autoinc_offset == msize
3304 && msize == cstep)
3305 || (has_predec[mem_mode] && autoinc_offset == -msize
3306 && msize == -cstep))
3307 autoinc = true;
3308
3309 cost = 0;
3310 offset_p = (s_offset != 0
3311 && min_offset[mem_mode] <= s_offset
3312 && s_offset <= max_offset[mem_mode]);
3313 ratio_p = (ratio != 1
3314 && multiplier_allowed_in_address_p (ratio, mem_mode));
3315
3316 if (ratio != 1 && !ratio_p)
3317 cost += multiply_by_cost (ratio, Pmode, speed);
3318
3319 if (s_offset && !offset_p && !symbol_present)
3320 cost += add_cost (Pmode, speed);
3321
3322 if (may_autoinc)
3323 *may_autoinc = autoinc;
3324 acost = costs[mem_mode][symbol_present][var_present][offset_p][ratio_p];
3325 complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3326 return new_cost (cost + acost, complexity);
3327 }
3328
3329 /* Estimates cost of forcing expression EXPR into a variable. */
3330
3331 static comp_cost
3332 force_expr_to_var_cost (tree expr, bool speed)
3333 {
3334 static bool costs_initialized = false;
3335 static unsigned integer_cost [2];
3336 static unsigned symbol_cost [2];
3337 static unsigned address_cost [2];
3338 tree op0, op1;
3339 comp_cost cost0, cost1, cost;
3340 enum machine_mode mode;
3341
3342 if (!costs_initialized)
3343 {
3344 tree type = build_pointer_type (integer_type_node);
3345 tree var, addr;
3346 rtx x;
3347 int i;
3348
3349 var = create_tmp_var_raw (integer_type_node, "test_var");
3350 TREE_STATIC (var) = 1;
3351 x = produce_memory_decl_rtl (var, NULL);
3352 SET_DECL_RTL (var, x);
3353
3354 addr = build1 (ADDR_EXPR, type, var);
3355
3356
3357 for (i = 0; i < 2; i++)
3358 {
3359 integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3360 2000), i);
3361
3362 symbol_cost[i] = computation_cost (addr, i) + 1;
3363
3364 address_cost[i]
3365 = computation_cost (build2 (POINTER_PLUS_EXPR, type,
3366 addr,
3367 build_int_cst (sizetype, 2000)), i) + 1;
3368 if (dump_file && (dump_flags & TDF_DETAILS))
3369 {
3370 fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3371 fprintf (dump_file, " integer %d\n", (int) integer_cost[i]);
3372 fprintf (dump_file, " symbol %d\n", (int) symbol_cost[i]);
3373 fprintf (dump_file, " address %d\n", (int) address_cost[i]);
3374 fprintf (dump_file, " other %d\n", (int) target_spill_cost[i]);
3375 fprintf (dump_file, "\n");
3376 }
3377 }
3378
3379 costs_initialized = true;
3380 }
3381
3382 STRIP_NOPS (expr);
3383
3384 if (SSA_VAR_P (expr))
3385 return zero_cost;
3386
3387 if (is_gimple_min_invariant (expr))
3388 {
3389 if (TREE_CODE (expr) == INTEGER_CST)
3390 return new_cost (integer_cost [speed], 0);
3391
3392 if (TREE_CODE (expr) == ADDR_EXPR)
3393 {
3394 tree obj = TREE_OPERAND (expr, 0);
3395
3396 if (TREE_CODE (obj) == VAR_DECL
3397 || TREE_CODE (obj) == PARM_DECL
3398 || TREE_CODE (obj) == RESULT_DECL)
3399 return new_cost (symbol_cost [speed], 0);
3400 }
3401
3402 return new_cost (address_cost [speed], 0);
3403 }
3404
3405 switch (TREE_CODE (expr))
3406 {
3407 case POINTER_PLUS_EXPR:
3408 case PLUS_EXPR:
3409 case MINUS_EXPR:
3410 case MULT_EXPR:
3411 op0 = TREE_OPERAND (expr, 0);
3412 op1 = TREE_OPERAND (expr, 1);
3413 STRIP_NOPS (op0);
3414 STRIP_NOPS (op1);
3415
3416 if (is_gimple_val (op0))
3417 cost0 = zero_cost;
3418 else
3419 cost0 = force_expr_to_var_cost (op0, speed);
3420
3421 if (is_gimple_val (op1))
3422 cost1 = zero_cost;
3423 else
3424 cost1 = force_expr_to_var_cost (op1, speed);
3425
3426 break;
3427
3428 case NEGATE_EXPR:
3429 op0 = TREE_OPERAND (expr, 0);
3430 STRIP_NOPS (op0);
3431 op1 = NULL_TREE;
3432
3433 if (is_gimple_val (op0))
3434 cost0 = zero_cost;
3435 else
3436 cost0 = force_expr_to_var_cost (op0, speed);
3437
3438 cost1 = zero_cost;
3439 break;
3440
3441 default:
3442 /* Just an arbitrary value, FIXME. */
3443 return new_cost (target_spill_cost[speed], 0);
3444 }
3445
3446 mode = TYPE_MODE (TREE_TYPE (expr));
3447 switch (TREE_CODE (expr))
3448 {
3449 case POINTER_PLUS_EXPR:
3450 case PLUS_EXPR:
3451 case MINUS_EXPR:
3452 case NEGATE_EXPR:
3453 cost = new_cost (add_cost (mode, speed), 0);
3454 break;
3455
3456 case MULT_EXPR:
3457 if (cst_and_fits_in_hwi (op0))
3458 cost = new_cost (multiply_by_cost (int_cst_value (op0), mode, speed), 0);
3459 else if (cst_and_fits_in_hwi (op1))
3460 cost = new_cost (multiply_by_cost (int_cst_value (op1), mode, speed), 0);
3461 else
3462 return new_cost (target_spill_cost [speed], 0);
3463 break;
3464
3465 default:
3466 gcc_unreachable ();
3467 }
3468
3469 cost = add_costs (cost, cost0);
3470 cost = add_costs (cost, cost1);
3471
3472 /* Bound the cost by target_spill_cost. The parts of complicated
3473 computations often are either loop invariant or at least can
3474 be shared between several iv uses, so letting this grow without
3475 limits would not give reasonable results. */
3476 if (cost.cost > (int) target_spill_cost [speed])
3477 cost.cost = target_spill_cost [speed];
3478
3479 return cost;
3480 }
3481
3482 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3483 invariants the computation depends on. */
3484
3485 static comp_cost
3486 force_var_cost (struct ivopts_data *data,
3487 tree expr, bitmap *depends_on)
3488 {
3489 if (depends_on)
3490 {
3491 fd_ivopts_data = data;
3492 walk_tree (&expr, find_depends, depends_on, NULL);
3493 }
3494
3495 return force_expr_to_var_cost (expr, data->speed);
3496 }
3497
3498 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3499 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3500 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3501 invariants the computation depends on. */
3502
3503 static comp_cost
3504 split_address_cost (struct ivopts_data *data,
3505 tree addr, bool *symbol_present, bool *var_present,
3506 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3507 {
3508 tree core;
3509 HOST_WIDE_INT bitsize;
3510 HOST_WIDE_INT bitpos;
3511 tree toffset;
3512 enum machine_mode mode;
3513 int unsignedp, volatilep;
3514
3515 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3516 &unsignedp, &volatilep, false);
3517
3518 if (toffset != 0
3519 || bitpos % BITS_PER_UNIT != 0
3520 || TREE_CODE (core) != VAR_DECL)
3521 {
3522 *symbol_present = false;
3523 *var_present = true;
3524 fd_ivopts_data = data;
3525 walk_tree (&addr, find_depends, depends_on, NULL);
3526 return new_cost (target_spill_cost[data->speed], 0);
3527 }
3528
3529 *offset += bitpos / BITS_PER_UNIT;
3530 if (TREE_STATIC (core)
3531 || DECL_EXTERNAL (core))
3532 {
3533 *symbol_present = true;
3534 *var_present = false;
3535 return zero_cost;
3536 }
3537
3538 *symbol_present = false;
3539 *var_present = true;
3540 return zero_cost;
3541 }
3542
3543 /* Estimates cost of expressing difference of addresses E1 - E2 as
3544 var + symbol + offset. The value of offset is added to OFFSET,
3545 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3546 part is missing. DEPENDS_ON is a set of the invariants the computation
3547 depends on. */
3548
3549 static comp_cost
3550 ptr_difference_cost (struct ivopts_data *data,
3551 tree e1, tree e2, bool *symbol_present, bool *var_present,
3552 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3553 {
3554 HOST_WIDE_INT diff = 0;
3555 aff_tree aff_e1, aff_e2;
3556 tree type;
3557
3558 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3559
3560 if (ptr_difference_const (e1, e2, &diff))
3561 {
3562 *offset += diff;
3563 *symbol_present = false;
3564 *var_present = false;
3565 return zero_cost;
3566 }
3567
3568 if (integer_zerop (e2))
3569 return split_address_cost (data, TREE_OPERAND (e1, 0),
3570 symbol_present, var_present, offset, depends_on);
3571
3572 *symbol_present = false;
3573 *var_present = true;
3574
3575 type = signed_type_for (TREE_TYPE (e1));
3576 tree_to_aff_combination (e1, type, &aff_e1);
3577 tree_to_aff_combination (e2, type, &aff_e2);
3578 aff_combination_scale (&aff_e2, double_int_minus_one);
3579 aff_combination_add (&aff_e1, &aff_e2);
3580
3581 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3582 }
3583
3584 /* Estimates cost of expressing difference E1 - E2 as
3585 var + symbol + offset. The value of offset is added to OFFSET,
3586 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3587 part is missing. DEPENDS_ON is a set of the invariants the computation
3588 depends on. */
3589
3590 static comp_cost
3591 difference_cost (struct ivopts_data *data,
3592 tree e1, tree e2, bool *symbol_present, bool *var_present,
3593 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3594 {
3595 enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3596 unsigned HOST_WIDE_INT off1, off2;
3597 aff_tree aff_e1, aff_e2;
3598 tree type;
3599
3600 e1 = strip_offset (e1, &off1);
3601 e2 = strip_offset (e2, &off2);
3602 *offset += off1 - off2;
3603
3604 STRIP_NOPS (e1);
3605 STRIP_NOPS (e2);
3606
3607 if (TREE_CODE (e1) == ADDR_EXPR)
3608 return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
3609 offset, depends_on);
3610 *symbol_present = false;
3611
3612 if (operand_equal_p (e1, e2, 0))
3613 {
3614 *var_present = false;
3615 return zero_cost;
3616 }
3617
3618 *var_present = true;
3619
3620 if (integer_zerop (e2))
3621 return force_var_cost (data, e1, depends_on);
3622
3623 if (integer_zerop (e1))
3624 {
3625 comp_cost cost = force_var_cost (data, e2, depends_on);
3626 cost.cost += multiply_by_cost (-1, mode, data->speed);
3627 return cost;
3628 }
3629
3630 type = signed_type_for (TREE_TYPE (e1));
3631 tree_to_aff_combination (e1, type, &aff_e1);
3632 tree_to_aff_combination (e2, type, &aff_e2);
3633 aff_combination_scale (&aff_e2, double_int_minus_one);
3634 aff_combination_add (&aff_e1, &aff_e2);
3635
3636 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3637 }
3638
3639 /* Determines the cost of the computation by that USE is expressed
3640 from induction variable CAND. If ADDRESS_P is true, we just need
3641 to create an address from it, otherwise we want to get it into
3642 register. A set of invariants we depend on is stored in
3643 DEPENDS_ON. AT is the statement at that the value is computed.
3644 If CAN_AUTOINC is nonnull, use it to record whether autoinc
3645 addressing is likely. */
3646
3647 static comp_cost
3648 get_computation_cost_at (struct ivopts_data *data,
3649 struct iv_use *use, struct iv_cand *cand,
3650 bool address_p, bitmap *depends_on, gimple at,
3651 bool *can_autoinc)
3652 {
3653 tree ubase = use->iv->base, ustep = use->iv->step;
3654 tree cbase, cstep;
3655 tree utype = TREE_TYPE (ubase), ctype;
3656 unsigned HOST_WIDE_INT cstepi, offset = 0;
3657 HOST_WIDE_INT ratio, aratio;
3658 bool var_present, symbol_present, stmt_is_after_inc;
3659 comp_cost cost;
3660 double_int rat;
3661 bool speed = optimize_bb_for_speed_p (gimple_bb (at));
3662
3663 *depends_on = NULL;
3664
3665 /* Only consider real candidates. */
3666 if (!cand->iv)
3667 return infinite_cost;
3668
3669 cbase = cand->iv->base;
3670 cstep = cand->iv->step;
3671 ctype = TREE_TYPE (cbase);
3672
3673 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3674 {
3675 /* We do not have a precision to express the values of use. */
3676 return infinite_cost;
3677 }
3678
3679 if (address_p)
3680 {
3681 /* Do not try to express address of an object with computation based
3682 on address of a different object. This may cause problems in rtl
3683 level alias analysis (that does not expect this to be happening,
3684 as this is illegal in C), and would be unlikely to be useful
3685 anyway. */
3686 if (use->iv->base_object
3687 && cand->iv->base_object
3688 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3689 return infinite_cost;
3690 }
3691
3692 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3693 {
3694 /* TODO -- add direct handling of this case. */
3695 goto fallback;
3696 }
3697
3698 /* CSTEPI is removed from the offset in case statement is after the
3699 increment. If the step is not constant, we use zero instead.
3700 This is a bit imprecise (there is the extra addition), but
3701 redundancy elimination is likely to transform the code so that
3702 it uses value of the variable before increment anyway,
3703 so it is not that much unrealistic. */
3704 if (cst_and_fits_in_hwi (cstep))
3705 cstepi = int_cst_value (cstep);
3706 else
3707 cstepi = 0;
3708
3709 if (!constant_multiple_of (ustep, cstep, &rat))
3710 return infinite_cost;
3711
3712 if (double_int_fits_in_shwi_p (rat))
3713 ratio = double_int_to_shwi (rat);
3714 else
3715 return infinite_cost;
3716
3717 STRIP_NOPS (cbase);
3718 ctype = TREE_TYPE (cbase);
3719
3720 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
3721 or ratio == 1, it is better to handle this like
3722
3723 ubase - ratio * cbase + ratio * var
3724
3725 (also holds in the case ratio == -1, TODO. */
3726
3727 if (cst_and_fits_in_hwi (cbase))
3728 {
3729 offset = - ratio * int_cst_value (cbase);
3730 cost = difference_cost (data,
3731 ubase, build_int_cst (utype, 0),
3732 &symbol_present, &var_present, &offset,
3733 depends_on);
3734 }
3735 else if (ratio == 1)
3736 {
3737 cost = difference_cost (data,
3738 ubase, cbase,
3739 &symbol_present, &var_present, &offset,
3740 depends_on);
3741 }
3742 else if (address_p
3743 && !POINTER_TYPE_P (ctype)
3744 && multiplier_allowed_in_address_p (ratio,
3745 TYPE_MODE (TREE_TYPE (utype))))
3746 {
3747 cbase
3748 = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
3749 cost = difference_cost (data,
3750 ubase, cbase,
3751 &symbol_present, &var_present, &offset,
3752 depends_on);
3753 }
3754 else
3755 {
3756 cost = force_var_cost (data, cbase, depends_on);
3757 cost.cost += add_cost (TYPE_MODE (ctype), data->speed);
3758 cost = add_costs (cost,
3759 difference_cost (data,
3760 ubase, build_int_cst (utype, 0),
3761 &symbol_present, &var_present,
3762 &offset, depends_on));
3763 }
3764
3765 /* If we are after the increment, the value of the candidate is higher by
3766 one iteration. */
3767 stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
3768 if (stmt_is_after_inc)
3769 offset -= ratio * cstepi;
3770
3771 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3772 (symbol/var1/const parts may be omitted). If we are looking for an
3773 address, find the cost of addressing this. */
3774 if (address_p)
3775 return add_costs (cost,
3776 get_address_cost (symbol_present, var_present,
3777 offset, ratio, cstepi,
3778 TYPE_MODE (TREE_TYPE (utype)),
3779 speed, stmt_is_after_inc,
3780 can_autoinc));
3781
3782 /* Otherwise estimate the costs for computing the expression. */
3783 if (!symbol_present && !var_present && !offset)
3784 {
3785 if (ratio != 1)
3786 cost.cost += multiply_by_cost (ratio, TYPE_MODE (ctype), speed);
3787 return cost;
3788 }
3789
3790 /* Symbol + offset should be compile-time computable so consider that they
3791 are added once to the variable, if present. */
3792 if (var_present && (symbol_present || offset))
3793 cost.cost += add_cost (TYPE_MODE (ctype), speed)
3794 / AVG_LOOP_NITER (data->current_loop);
3795
3796 /* Having offset does not affect runtime cost in case it is added to
3797 symbol, but it increases complexity. */
3798 if (offset)
3799 cost.complexity++;
3800
3801 cost.cost += add_cost (TYPE_MODE (ctype), speed);
3802
3803 aratio = ratio > 0 ? ratio : -ratio;
3804 if (aratio != 1)
3805 cost.cost += multiply_by_cost (aratio, TYPE_MODE (ctype), speed);
3806
3807 fallback:
3808 if (can_autoinc)
3809 *can_autoinc = false;
3810
3811 {
3812 /* Just get the expression, expand it and measure the cost. */
3813 tree comp = get_computation_at (data->current_loop, use, cand, at);
3814
3815 if (!comp)
3816 return infinite_cost;
3817
3818 if (address_p)
3819 comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
3820
3821 return new_cost (computation_cost (comp, speed), 0);
3822 }
3823 }
3824
3825 /* Determines the cost of the computation by that USE is expressed
3826 from induction variable CAND. If ADDRESS_P is true, we just need
3827 to create an address from it, otherwise we want to get it into
3828 register. A set of invariants we depend on is stored in
3829 DEPENDS_ON. If CAN_AUTOINC is nonnull, use it to record whether
3830 autoinc addressing is likely. */
3831
3832 static comp_cost
3833 get_computation_cost (struct ivopts_data *data,
3834 struct iv_use *use, struct iv_cand *cand,
3835 bool address_p, bitmap *depends_on, bool *can_autoinc)
3836 {
3837 return get_computation_cost_at (data,
3838 use, cand, address_p, depends_on, use->stmt,
3839 can_autoinc);
3840 }
3841
3842 /* Determines cost of basing replacement of USE on CAND in a generic
3843 expression. */
3844
3845 static bool
3846 determine_use_iv_cost_generic (struct ivopts_data *data,
3847 struct iv_use *use, struct iv_cand *cand)
3848 {
3849 bitmap depends_on;
3850 comp_cost cost;
3851
3852 /* The simple case first -- if we need to express value of the preserved
3853 original biv, the cost is 0. This also prevents us from counting the
3854 cost of increment twice -- once at this use and once in the cost of
3855 the candidate. */
3856 if (cand->pos == IP_ORIGINAL
3857 && cand->incremented_at == use->stmt)
3858 {
3859 set_use_iv_cost (data, use, cand, zero_cost, NULL, NULL_TREE);
3860 return true;
3861 }
3862
3863 cost = get_computation_cost (data, use, cand, false, &depends_on, NULL);
3864 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3865
3866 return !infinite_cost_p (cost);
3867 }
3868
3869 /* Determines cost of basing replacement of USE on CAND in an address. */
3870
3871 static bool
3872 determine_use_iv_cost_address (struct ivopts_data *data,
3873 struct iv_use *use, struct iv_cand *cand)
3874 {
3875 bitmap depends_on;
3876 bool can_autoinc;
3877 comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
3878 &can_autoinc);
3879
3880 if (cand->ainc_use == use)
3881 {
3882 if (can_autoinc)
3883 cost.cost -= cand->cost_step;
3884 /* If we generated the candidate solely for exploiting autoincrement
3885 opportunities, and it turns out it can't be used, set the cost to
3886 infinity to make sure we ignore it. */
3887 else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
3888 cost = infinite_cost;
3889 }
3890 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3891
3892 return !infinite_cost_p (cost);
3893 }
3894
3895 /* Computes value of candidate CAND at position AT in iteration NITER, and
3896 stores it to VAL. */
3897
3898 static void
3899 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
3900 aff_tree *val)
3901 {
3902 aff_tree step, delta, nit;
3903 struct iv *iv = cand->iv;
3904 tree type = TREE_TYPE (iv->base);
3905 tree steptype = type;
3906 if (POINTER_TYPE_P (type))
3907 steptype = sizetype;
3908
3909 tree_to_aff_combination (iv->step, steptype, &step);
3910 tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
3911 aff_combination_convert (&nit, steptype);
3912 aff_combination_mult (&nit, &step, &delta);
3913 if (stmt_after_increment (loop, cand, at))
3914 aff_combination_add (&delta, &step);
3915
3916 tree_to_aff_combination (iv->base, type, val);
3917 aff_combination_add (val, &delta);
3918 }
3919
3920 /* Returns period of induction variable iv. */
3921
3922 static tree
3923 iv_period (struct iv *iv)
3924 {
3925 tree step = iv->step, period, type;
3926 tree pow2div;
3927
3928 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
3929
3930 /* Period of the iv is gcd (step, type range). Since type range is power
3931 of two, it suffices to determine the maximum power of two that divides
3932 step. */
3933 pow2div = num_ending_zeros (step);
3934 type = unsigned_type_for (TREE_TYPE (step));
3935
3936 period = build_low_bits_mask (type,
3937 (TYPE_PRECISION (type)
3938 - tree_low_cst (pow2div, 1)));
3939
3940 return period;
3941 }
3942
3943 /* Returns the comparison operator used when eliminating the iv USE. */
3944
3945 static enum tree_code
3946 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
3947 {
3948 struct loop *loop = data->current_loop;
3949 basic_block ex_bb;
3950 edge exit;
3951
3952 ex_bb = gimple_bb (use->stmt);
3953 exit = EDGE_SUCC (ex_bb, 0);
3954 if (flow_bb_inside_loop_p (loop, exit->dest))
3955 exit = EDGE_SUCC (ex_bb, 1);
3956
3957 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
3958 }
3959
3960 /* Check whether it is possible to express the condition in USE by comparison
3961 of candidate CAND. If so, store the value compared with to BOUND. */
3962
3963 static bool
3964 may_eliminate_iv (struct ivopts_data *data,
3965 struct iv_use *use, struct iv_cand *cand, tree *bound)
3966 {
3967 basic_block ex_bb;
3968 edge exit;
3969 tree nit, period;
3970 struct loop *loop = data->current_loop;
3971 aff_tree bnd;
3972
3973 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
3974 return false;
3975
3976 /* For now works only for exits that dominate the loop latch.
3977 TODO: extend to other conditions inside loop body. */
3978 ex_bb = gimple_bb (use->stmt);
3979 if (use->stmt != last_stmt (ex_bb)
3980 || gimple_code (use->stmt) != GIMPLE_COND
3981 || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
3982 return false;
3983
3984 exit = EDGE_SUCC (ex_bb, 0);
3985 if (flow_bb_inside_loop_p (loop, exit->dest))
3986 exit = EDGE_SUCC (ex_bb, 1);
3987 if (flow_bb_inside_loop_p (loop, exit->dest))
3988 return false;
3989
3990 nit = niter_for_exit (data, exit);
3991 if (!nit)
3992 return false;
3993
3994 /* Determine whether we can use the variable to test the exit condition.
3995 This is the case iff the period of the induction variable is greater
3996 than the number of iterations for which the exit condition is true. */
3997 period = iv_period (cand->iv);
3998
3999 /* If the number of iterations is constant, compare against it directly. */
4000 if (TREE_CODE (nit) == INTEGER_CST)
4001 {
4002 if (!tree_int_cst_lt (nit, period))
4003 return false;
4004 }
4005
4006 /* If not, and if this is the only possible exit of the loop, see whether
4007 we can get a conservative estimate on the number of iterations of the
4008 entire loop and compare against that instead. */
4009 else if (loop_only_exit_p (loop, exit))
4010 {
4011 double_int period_value, max_niter;
4012 if (!estimated_loop_iterations (loop, true, &max_niter))
4013 return false;
4014 period_value = tree_to_double_int (period);
4015 if (double_int_ucmp (max_niter, period_value) >= 0)
4016 return false;
4017 }
4018
4019 /* Otherwise, punt. */
4020 else
4021 return false;
4022
4023 cand_value_at (loop, cand, use->stmt, nit, &bnd);
4024
4025 *bound = aff_combination_to_tree (&bnd);
4026 /* It is unlikely that computing the number of iterations using division
4027 would be more profitable than keeping the original induction variable. */
4028 if (expression_expensive_p (*bound))
4029 return false;
4030 return true;
4031 }
4032
4033 /* Determines cost of basing replacement of USE on CAND in a condition. */
4034
4035 static bool
4036 determine_use_iv_cost_condition (struct ivopts_data *data,
4037 struct iv_use *use, struct iv_cand *cand)
4038 {
4039 tree bound = NULL_TREE;
4040 struct iv *cmp_iv;
4041 bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
4042 comp_cost elim_cost, express_cost, cost;
4043 bool ok;
4044
4045 /* Only consider real candidates. */
4046 if (!cand->iv)
4047 {
4048 set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE);
4049 return false;
4050 }
4051
4052 /* Try iv elimination. */
4053 if (may_eliminate_iv (data, use, cand, &bound))
4054 {
4055 elim_cost = force_var_cost (data, bound, &depends_on_elim);
4056 /* The bound is a loop invariant, so it will be only computed
4057 once. */
4058 elim_cost.cost /= AVG_LOOP_NITER (data->current_loop);
4059 }
4060 else
4061 elim_cost = infinite_cost;
4062
4063 /* Try expressing the original giv. If it is compared with an invariant,
4064 note that we cannot get rid of it. */
4065 ok = extract_cond_operands (data, use->stmt, NULL, NULL, NULL, &cmp_iv);
4066 gcc_assert (ok);
4067
4068 express_cost = get_computation_cost (data, use, cand, false,
4069 &depends_on_express, NULL);
4070 fd_ivopts_data = data;
4071 walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
4072
4073 /* Choose the better approach, preferring the eliminated IV. */
4074 if (compare_costs (elim_cost, express_cost) <= 0)
4075 {
4076 cost = elim_cost;
4077 depends_on = depends_on_elim;
4078 depends_on_elim = NULL;
4079 }
4080 else
4081 {
4082 cost = express_cost;
4083 depends_on = depends_on_express;
4084 depends_on_express = NULL;
4085 bound = NULL_TREE;
4086 }
4087
4088 set_use_iv_cost (data, use, cand, cost, depends_on, bound);
4089
4090 if (depends_on_elim)
4091 BITMAP_FREE (depends_on_elim);
4092 if (depends_on_express)
4093 BITMAP_FREE (depends_on_express);
4094
4095 return !infinite_cost_p (cost);
4096 }
4097
4098 /* Determines cost of basing replacement of USE on CAND. Returns false
4099 if USE cannot be based on CAND. */
4100
4101 static bool
4102 determine_use_iv_cost (struct ivopts_data *data,
4103 struct iv_use *use, struct iv_cand *cand)
4104 {
4105 switch (use->type)
4106 {
4107 case USE_NONLINEAR_EXPR:
4108 return determine_use_iv_cost_generic (data, use, cand);
4109
4110 case USE_ADDRESS:
4111 return determine_use_iv_cost_address (data, use, cand);
4112
4113 case USE_COMPARE:
4114 return determine_use_iv_cost_condition (data, use, cand);
4115
4116 default:
4117 gcc_unreachable ();
4118 }
4119 }
4120
4121 /* Return true if get_computation_cost indicates that autoincrement is
4122 a possibility for the pair of USE and CAND, false otherwise. */
4123
4124 static bool
4125 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
4126 struct iv_cand *cand)
4127 {
4128 bitmap depends_on;
4129 bool can_autoinc;
4130 comp_cost cost;
4131
4132 if (use->type != USE_ADDRESS)
4133 return false;
4134
4135 cost = get_computation_cost (data, use, cand, true, &depends_on,
4136 &can_autoinc);
4137
4138 BITMAP_FREE (depends_on);
4139
4140 return !infinite_cost_p (cost) && can_autoinc;
4141 }
4142
4143 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
4144 use that allows autoincrement, and set their AINC_USE if possible. */
4145
4146 static void
4147 set_autoinc_for_original_candidates (struct ivopts_data *data)
4148 {
4149 unsigned i, j;
4150
4151 for (i = 0; i < n_iv_cands (data); i++)
4152 {
4153 struct iv_cand *cand = iv_cand (data, i);
4154 struct iv_use *closest = NULL;
4155 if (cand->pos != IP_ORIGINAL)
4156 continue;
4157 for (j = 0; j < n_iv_uses (data); j++)
4158 {
4159 struct iv_use *use = iv_use (data, j);
4160 unsigned uid = gimple_uid (use->stmt);
4161 if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at)
4162 || uid > gimple_uid (cand->incremented_at))
4163 continue;
4164 if (closest == NULL || uid > gimple_uid (closest->stmt))
4165 closest = use;
4166 }
4167 if (closest == NULL || !autoinc_possible_for_pair (data, closest, cand))
4168 continue;
4169 cand->ainc_use = closest;
4170 }
4171 }
4172
4173 /* Finds the candidates for the induction variables. */
4174
4175 static void
4176 find_iv_candidates (struct ivopts_data *data)
4177 {
4178 /* Add commonly used ivs. */
4179 add_standard_iv_candidates (data);
4180
4181 /* Add old induction variables. */
4182 add_old_ivs_candidates (data);
4183
4184 /* Add induction variables derived from uses. */
4185 add_derived_ivs_candidates (data);
4186
4187 set_autoinc_for_original_candidates (data);
4188
4189 /* Record the important candidates. */
4190 record_important_candidates (data);
4191 }
4192
4193 /* Determines costs of basing the use of the iv on an iv candidate. */
4194
4195 static void
4196 determine_use_iv_costs (struct ivopts_data *data)
4197 {
4198 unsigned i, j;
4199 struct iv_use *use;
4200 struct iv_cand *cand;
4201 bitmap to_clear = BITMAP_ALLOC (NULL);
4202
4203 alloc_use_cost_map (data);
4204
4205 for (i = 0; i < n_iv_uses (data); i++)
4206 {
4207 use = iv_use (data, i);
4208
4209 if (data->consider_all_candidates)
4210 {
4211 for (j = 0; j < n_iv_cands (data); j++)
4212 {
4213 cand = iv_cand (data, j);
4214 determine_use_iv_cost (data, use, cand);
4215 }
4216 }
4217 else
4218 {
4219 bitmap_iterator bi;
4220
4221 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
4222 {
4223 cand = iv_cand (data, j);
4224 if (!determine_use_iv_cost (data, use, cand))
4225 bitmap_set_bit (to_clear, j);
4226 }
4227
4228 /* Remove the candidates for that the cost is infinite from
4229 the list of related candidates. */
4230 bitmap_and_compl_into (use->related_cands, to_clear);
4231 bitmap_clear (to_clear);
4232 }
4233 }
4234
4235 BITMAP_FREE (to_clear);
4236
4237 if (dump_file && (dump_flags & TDF_DETAILS))
4238 {
4239 fprintf (dump_file, "Use-candidate costs:\n");
4240
4241 for (i = 0; i < n_iv_uses (data); i++)
4242 {
4243 use = iv_use (data, i);
4244
4245 fprintf (dump_file, "Use %d:\n", i);
4246 fprintf (dump_file, " cand\tcost\tcompl.\tdepends on\n");
4247 for (j = 0; j < use->n_map_members; j++)
4248 {
4249 if (!use->cost_map[j].cand
4250 || infinite_cost_p (use->cost_map[j].cost))
4251 continue;
4252
4253 fprintf (dump_file, " %d\t%d\t%d\t",
4254 use->cost_map[j].cand->id,
4255 use->cost_map[j].cost.cost,
4256 use->cost_map[j].cost.complexity);
4257 if (use->cost_map[j].depends_on)
4258 bitmap_print (dump_file,
4259 use->cost_map[j].depends_on, "","");
4260 fprintf (dump_file, "\n");
4261 }
4262
4263 fprintf (dump_file, "\n");
4264 }
4265 fprintf (dump_file, "\n");
4266 }
4267 }
4268
4269 /* Determines cost of the candidate CAND. */
4270
4271 static void
4272 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
4273 {
4274 comp_cost cost_base;
4275 unsigned cost, cost_step;
4276 tree base;
4277
4278 if (!cand->iv)
4279 {
4280 cand->cost = 0;
4281 return;
4282 }
4283
4284 /* There are two costs associated with the candidate -- its increment
4285 and its initialization. The second is almost negligible for any loop
4286 that rolls enough, so we take it just very little into account. */
4287
4288 base = cand->iv->base;
4289 cost_base = force_var_cost (data, base, NULL);
4290 cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)), data->speed);
4291
4292 cost = cost_step + cost_base.cost / AVG_LOOP_NITER (current_loop);
4293
4294 /* Prefer the original ivs unless we may gain something by replacing it.
4295 The reason is to make debugging simpler; so this is not relevant for
4296 artificial ivs created by other optimization passes. */
4297 if (cand->pos != IP_ORIGINAL
4298 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
4299 cost++;
4300
4301 /* Prefer not to insert statements into latch unless there are some
4302 already (so that we do not create unnecessary jumps). */
4303 if (cand->pos == IP_END
4304 && empty_block_p (ip_end_pos (data->current_loop)))
4305 cost++;
4306
4307 cand->cost = cost;
4308 cand->cost_step = cost_step;
4309 }
4310
4311 /* Determines costs of computation of the candidates. */
4312
4313 static void
4314 determine_iv_costs (struct ivopts_data *data)
4315 {
4316 unsigned i;
4317
4318 if (dump_file && (dump_flags & TDF_DETAILS))
4319 {
4320 fprintf (dump_file, "Candidate costs:\n");
4321 fprintf (dump_file, " cand\tcost\n");
4322 }
4323
4324 for (i = 0; i < n_iv_cands (data); i++)
4325 {
4326 struct iv_cand *cand = iv_cand (data, i);
4327
4328 determine_iv_cost (data, cand);
4329
4330 if (dump_file && (dump_flags & TDF_DETAILS))
4331 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
4332 }
4333
4334 if (dump_file && (dump_flags & TDF_DETAILS))
4335 fprintf (dump_file, "\n");
4336 }
4337
4338 /* Calculates cost for having SIZE induction variables. */
4339
4340 static unsigned
4341 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
4342 {
4343 /* We add size to the cost, so that we prefer eliminating ivs
4344 if possible. */
4345 return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed);
4346 }
4347
4348 /* For each size of the induction variable set determine the penalty. */
4349
4350 static void
4351 determine_set_costs (struct ivopts_data *data)
4352 {
4353 unsigned j, n;
4354 gimple phi;
4355 gimple_stmt_iterator psi;
4356 tree op;
4357 struct loop *loop = data->current_loop;
4358 bitmap_iterator bi;
4359
4360 /* We use the following model (definitely improvable, especially the
4361 cost function -- TODO):
4362
4363 We estimate the number of registers available (using MD data), name it A.
4364
4365 We estimate the number of registers used by the loop, name it U. This
4366 number is obtained as the number of loop phi nodes (not counting virtual
4367 registers and bivs) + the number of variables from outside of the loop.
4368
4369 We set a reserve R (free regs that are used for temporary computations,
4370 etc.). For now the reserve is a constant 3.
4371
4372 Let I be the number of induction variables.
4373
4374 -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
4375 make a lot of ivs without a reason).
4376 -- if A - R < U + I <= A, the cost is I * PRES_COST
4377 -- if U + I > A, the cost is I * PRES_COST and
4378 number of uses * SPILL_COST * (U + I - A) / (U + I) is added. */
4379
4380 if (dump_file && (dump_flags & TDF_DETAILS))
4381 {
4382 fprintf (dump_file, "Global costs:\n");
4383 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
4384 fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost[data->speed]);
4385 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost[data->speed]);
4386 }
4387
4388 n = 0;
4389 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
4390 {
4391 phi = gsi_stmt (psi);
4392 op = PHI_RESULT (phi);
4393
4394 if (!is_gimple_reg (op))
4395 continue;
4396
4397 if (get_iv (data, op))
4398 continue;
4399
4400 n++;
4401 }
4402
4403 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
4404 {
4405 struct version_info *info = ver_info (data, j);
4406
4407 if (info->inv_id && info->has_nonlin_use)
4408 n++;
4409 }
4410
4411 data->regs_used = n;
4412 if (dump_file && (dump_flags & TDF_DETAILS))
4413 fprintf (dump_file, " regs_used %d\n", n);
4414
4415 if (dump_file && (dump_flags & TDF_DETAILS))
4416 {
4417 fprintf (dump_file, " cost for size:\n");
4418 fprintf (dump_file, " ivs\tcost\n");
4419 for (j = 0; j <= 2 * target_avail_regs; j++)
4420 fprintf (dump_file, " %d\t%d\n", j,
4421 ivopts_global_cost_for_size (data, j));
4422 fprintf (dump_file, "\n");
4423 }
4424 }
4425
4426 /* Returns true if A is a cheaper cost pair than B. */
4427
4428 static bool
4429 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
4430 {
4431 int cmp;
4432
4433 if (!a)
4434 return false;
4435
4436 if (!b)
4437 return true;
4438
4439 cmp = compare_costs (a->cost, b->cost);
4440 if (cmp < 0)
4441 return true;
4442
4443 if (cmp > 0)
4444 return false;
4445
4446 /* In case the costs are the same, prefer the cheaper candidate. */
4447 if (a->cand->cost < b->cand->cost)
4448 return true;
4449
4450 return false;
4451 }
4452
4453 /* Computes the cost field of IVS structure. */
4454
4455 static void
4456 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
4457 {
4458 comp_cost cost = ivs->cand_use_cost;
4459 cost.cost += ivs->cand_cost;
4460 cost.cost += ivopts_global_cost_for_size (data, ivs->n_regs);
4461
4462 ivs->cost = cost;
4463 }
4464
4465 /* Remove invariants in set INVS to set IVS. */
4466
4467 static void
4468 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
4469 {
4470 bitmap_iterator bi;
4471 unsigned iid;
4472
4473 if (!invs)
4474 return;
4475
4476 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4477 {
4478 ivs->n_invariant_uses[iid]--;
4479 if (ivs->n_invariant_uses[iid] == 0)
4480 ivs->n_regs--;
4481 }
4482 }
4483
4484 /* Set USE not to be expressed by any candidate in IVS. */
4485
4486 static void
4487 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
4488 struct iv_use *use)
4489 {
4490 unsigned uid = use->id, cid;
4491 struct cost_pair *cp;
4492
4493 cp = ivs->cand_for_use[uid];
4494 if (!cp)
4495 return;
4496 cid = cp->cand->id;
4497
4498 ivs->bad_uses++;
4499 ivs->cand_for_use[uid] = NULL;
4500 ivs->n_cand_uses[cid]--;
4501
4502 if (ivs->n_cand_uses[cid] == 0)
4503 {
4504 bitmap_clear_bit (ivs->cands, cid);
4505 /* Do not count the pseudocandidates. */
4506 if (cp->cand->iv)
4507 ivs->n_regs--;
4508 ivs->n_cands--;
4509 ivs->cand_cost -= cp->cand->cost;
4510
4511 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
4512 }
4513
4514 ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
4515
4516 iv_ca_set_remove_invariants (ivs, cp->depends_on);
4517 iv_ca_recount_cost (data, ivs);
4518 }
4519
4520 /* Add invariants in set INVS to set IVS. */
4521
4522 static void
4523 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
4524 {
4525 bitmap_iterator bi;
4526 unsigned iid;
4527
4528 if (!invs)
4529 return;
4530
4531 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4532 {
4533 ivs->n_invariant_uses[iid]++;
4534 if (ivs->n_invariant_uses[iid] == 1)
4535 ivs->n_regs++;
4536 }
4537 }
4538
4539 /* Set cost pair for USE in set IVS to CP. */
4540
4541 static void
4542 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
4543 struct iv_use *use, struct cost_pair *cp)
4544 {
4545 unsigned uid = use->id, cid;
4546
4547 if (ivs->cand_for_use[uid] == cp)
4548 return;
4549
4550 if (ivs->cand_for_use[uid])
4551 iv_ca_set_no_cp (data, ivs, use);
4552
4553 if (cp)
4554 {
4555 cid = cp->cand->id;
4556
4557 ivs->bad_uses--;
4558 ivs->cand_for_use[uid] = cp;
4559 ivs->n_cand_uses[cid]++;
4560 if (ivs->n_cand_uses[cid] == 1)
4561 {
4562 bitmap_set_bit (ivs->cands, cid);
4563 /* Do not count the pseudocandidates. */
4564 if (cp->cand->iv)
4565 ivs->n_regs++;
4566 ivs->n_cands++;
4567 ivs->cand_cost += cp->cand->cost;
4568
4569 iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
4570 }
4571
4572 ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
4573 iv_ca_set_add_invariants (ivs, cp->depends_on);
4574 iv_ca_recount_cost (data, ivs);
4575 }
4576 }
4577
4578 /* Extend set IVS by expressing USE by some of the candidates in it
4579 if possible. */
4580
4581 static void
4582 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
4583 struct iv_use *use)
4584 {
4585 struct cost_pair *best_cp = NULL, *cp;
4586 bitmap_iterator bi;
4587 unsigned i;
4588
4589 gcc_assert (ivs->upto >= use->id);
4590
4591 if (ivs->upto == use->id)
4592 {
4593 ivs->upto++;
4594 ivs->bad_uses++;
4595 }
4596
4597 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4598 {
4599 cp = get_use_iv_cost (data, use, iv_cand (data, i));
4600
4601 if (cheaper_cost_pair (cp, best_cp))
4602 best_cp = cp;
4603 }
4604
4605 iv_ca_set_cp (data, ivs, use, best_cp);
4606 }
4607
4608 /* Get cost for assignment IVS. */
4609
4610 static comp_cost
4611 iv_ca_cost (struct iv_ca *ivs)
4612 {
4613 /* This was a conditional expression but it triggered a bug in
4614 Sun C 5.5. */
4615 if (ivs->bad_uses)
4616 return infinite_cost;
4617 else
4618 return ivs->cost;
4619 }
4620
4621 /* Returns true if all dependences of CP are among invariants in IVS. */
4622
4623 static bool
4624 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
4625 {
4626 unsigned i;
4627 bitmap_iterator bi;
4628
4629 if (!cp->depends_on)
4630 return true;
4631
4632 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
4633 {
4634 if (ivs->n_invariant_uses[i] == 0)
4635 return false;
4636 }
4637
4638 return true;
4639 }
4640
4641 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4642 it before NEXT_CHANGE. */
4643
4644 static struct iv_ca_delta *
4645 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
4646 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
4647 {
4648 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
4649
4650 change->use = use;
4651 change->old_cp = old_cp;
4652 change->new_cp = new_cp;
4653 change->next_change = next_change;
4654
4655 return change;
4656 }
4657
4658 /* Joins two lists of changes L1 and L2. Destructive -- old lists
4659 are rewritten. */
4660
4661 static struct iv_ca_delta *
4662 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
4663 {
4664 struct iv_ca_delta *last;
4665
4666 if (!l2)
4667 return l1;
4668
4669 if (!l1)
4670 return l2;
4671
4672 for (last = l1; last->next_change; last = last->next_change)
4673 continue;
4674 last->next_change = l2;
4675
4676 return l1;
4677 }
4678
4679 /* Returns candidate by that USE is expressed in IVS. */
4680
4681 static struct cost_pair *
4682 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4683 {
4684 return ivs->cand_for_use[use->id];
4685 }
4686
4687 /* Reverse the list of changes DELTA, forming the inverse to it. */
4688
4689 static struct iv_ca_delta *
4690 iv_ca_delta_reverse (struct iv_ca_delta *delta)
4691 {
4692 struct iv_ca_delta *act, *next, *prev = NULL;
4693 struct cost_pair *tmp;
4694
4695 for (act = delta; act; act = next)
4696 {
4697 next = act->next_change;
4698 act->next_change = prev;
4699 prev = act;
4700
4701 tmp = act->old_cp;
4702 act->old_cp = act->new_cp;
4703 act->new_cp = tmp;
4704 }
4705
4706 return prev;
4707 }
4708
4709 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
4710 reverted instead. */
4711
4712 static void
4713 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
4714 struct iv_ca_delta *delta, bool forward)
4715 {
4716 struct cost_pair *from, *to;
4717 struct iv_ca_delta *act;
4718
4719 if (!forward)
4720 delta = iv_ca_delta_reverse (delta);
4721
4722 for (act = delta; act; act = act->next_change)
4723 {
4724 from = act->old_cp;
4725 to = act->new_cp;
4726 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
4727 iv_ca_set_cp (data, ivs, act->use, to);
4728 }
4729
4730 if (!forward)
4731 iv_ca_delta_reverse (delta);
4732 }
4733
4734 /* Returns true if CAND is used in IVS. */
4735
4736 static bool
4737 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
4738 {
4739 return ivs->n_cand_uses[cand->id] > 0;
4740 }
4741
4742 /* Returns number of induction variable candidates in the set IVS. */
4743
4744 static unsigned
4745 iv_ca_n_cands (struct iv_ca *ivs)
4746 {
4747 return ivs->n_cands;
4748 }
4749
4750 /* Free the list of changes DELTA. */
4751
4752 static void
4753 iv_ca_delta_free (struct iv_ca_delta **delta)
4754 {
4755 struct iv_ca_delta *act, *next;
4756
4757 for (act = *delta; act; act = next)
4758 {
4759 next = act->next_change;
4760 free (act);
4761 }
4762
4763 *delta = NULL;
4764 }
4765
4766 /* Allocates new iv candidates assignment. */
4767
4768 static struct iv_ca *
4769 iv_ca_new (struct ivopts_data *data)
4770 {
4771 struct iv_ca *nw = XNEW (struct iv_ca);
4772
4773 nw->upto = 0;
4774 nw->bad_uses = 0;
4775 nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
4776 nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
4777 nw->cands = BITMAP_ALLOC (NULL);
4778 nw->n_cands = 0;
4779 nw->n_regs = 0;
4780 nw->cand_use_cost = zero_cost;
4781 nw->cand_cost = 0;
4782 nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
4783 nw->cost = zero_cost;
4784
4785 return nw;
4786 }
4787
4788 /* Free memory occupied by the set IVS. */
4789
4790 static void
4791 iv_ca_free (struct iv_ca **ivs)
4792 {
4793 free ((*ivs)->cand_for_use);
4794 free ((*ivs)->n_cand_uses);
4795 BITMAP_FREE ((*ivs)->cands);
4796 free ((*ivs)->n_invariant_uses);
4797 free (*ivs);
4798 *ivs = NULL;
4799 }
4800
4801 /* Dumps IVS to FILE. */
4802
4803 static void
4804 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
4805 {
4806 const char *pref = " invariants ";
4807 unsigned i;
4808 comp_cost cost = iv_ca_cost (ivs);
4809
4810 fprintf (file, " cost %d (complexity %d)\n", cost.cost, cost.complexity);
4811 bitmap_print (file, ivs->cands, " candidates ","\n");
4812
4813 for (i = 1; i <= data->max_inv_id; i++)
4814 if (ivs->n_invariant_uses[i])
4815 {
4816 fprintf (file, "%s%d", pref, i);
4817 pref = ", ";
4818 }
4819 fprintf (file, "\n");
4820 }
4821
4822 /* Try changing candidate in IVS to CAND for each use. Return cost of the
4823 new set, and store differences in DELTA. Number of induction variables
4824 in the new set is stored to N_IVS. */
4825
4826 static comp_cost
4827 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
4828 struct iv_cand *cand, struct iv_ca_delta **delta,
4829 unsigned *n_ivs)
4830 {
4831 unsigned i;
4832 comp_cost cost;
4833 struct iv_use *use;
4834 struct cost_pair *old_cp, *new_cp;
4835
4836 *delta = NULL;
4837 for (i = 0; i < ivs->upto; i++)
4838 {
4839 use = iv_use (data, i);
4840 old_cp = iv_ca_cand_for_use (ivs, use);
4841
4842 if (old_cp
4843 && old_cp->cand == cand)
4844 continue;
4845
4846 new_cp = get_use_iv_cost (data, use, cand);
4847 if (!new_cp)
4848 continue;
4849
4850 if (!iv_ca_has_deps (ivs, new_cp))
4851 continue;
4852
4853 if (!cheaper_cost_pair (new_cp, old_cp))
4854 continue;
4855
4856 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4857 }
4858
4859 iv_ca_delta_commit (data, ivs, *delta, true);
4860 cost = iv_ca_cost (ivs);
4861 if (n_ivs)
4862 *n_ivs = iv_ca_n_cands (ivs);
4863 iv_ca_delta_commit (data, ivs, *delta, false);
4864
4865 return cost;
4866 }
4867
4868 /* Try narrowing set IVS by removing CAND. Return the cost of
4869 the new set and store the differences in DELTA. */
4870
4871 static comp_cost
4872 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
4873 struct iv_cand *cand, struct iv_ca_delta **delta)
4874 {
4875 unsigned i, ci;
4876 struct iv_use *use;
4877 struct cost_pair *old_cp, *new_cp, *cp;
4878 bitmap_iterator bi;
4879 struct iv_cand *cnd;
4880 comp_cost cost;
4881
4882 *delta = NULL;
4883 for (i = 0; i < n_iv_uses (data); i++)
4884 {
4885 use = iv_use (data, i);
4886
4887 old_cp = iv_ca_cand_for_use (ivs, use);
4888 if (old_cp->cand != cand)
4889 continue;
4890
4891 new_cp = NULL;
4892
4893 if (data->consider_all_candidates)
4894 {
4895 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
4896 {
4897 if (ci == cand->id)
4898 continue;
4899
4900 cnd = iv_cand (data, ci);
4901
4902 cp = get_use_iv_cost (data, use, cnd);
4903 if (!cp)
4904 continue;
4905 if (!iv_ca_has_deps (ivs, cp))
4906 continue;
4907
4908 if (!cheaper_cost_pair (cp, new_cp))
4909 continue;
4910
4911 new_cp = cp;
4912 }
4913 }
4914 else
4915 {
4916 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
4917 {
4918 if (ci == cand->id)
4919 continue;
4920
4921 cnd = iv_cand (data, ci);
4922
4923 cp = get_use_iv_cost (data, use, cnd);
4924 if (!cp)
4925 continue;
4926 if (!iv_ca_has_deps (ivs, cp))
4927 continue;
4928
4929 if (!cheaper_cost_pair (cp, new_cp))
4930 continue;
4931
4932 new_cp = cp;
4933 }
4934 }
4935
4936 if (!new_cp)
4937 {
4938 iv_ca_delta_free (delta);
4939 return infinite_cost;
4940 }
4941
4942 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4943 }
4944
4945 iv_ca_delta_commit (data, ivs, *delta, true);
4946 cost = iv_ca_cost (ivs);
4947 iv_ca_delta_commit (data, ivs, *delta, false);
4948
4949 return cost;
4950 }
4951
4952 /* Try optimizing the set of candidates IVS by removing candidates different
4953 from to EXCEPT_CAND from it. Return cost of the new set, and store
4954 differences in DELTA. */
4955
4956 static comp_cost
4957 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
4958 struct iv_cand *except_cand, struct iv_ca_delta **delta)
4959 {
4960 bitmap_iterator bi;
4961 struct iv_ca_delta *act_delta, *best_delta;
4962 unsigned i;
4963 comp_cost best_cost, acost;
4964 struct iv_cand *cand;
4965
4966 best_delta = NULL;
4967 best_cost = iv_ca_cost (ivs);
4968
4969 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4970 {
4971 cand = iv_cand (data, i);
4972
4973 if (cand == except_cand)
4974 continue;
4975
4976 acost = iv_ca_narrow (data, ivs, cand, &act_delta);
4977
4978 if (compare_costs (acost, best_cost) < 0)
4979 {
4980 best_cost = acost;
4981 iv_ca_delta_free (&best_delta);
4982 best_delta = act_delta;
4983 }
4984 else
4985 iv_ca_delta_free (&act_delta);
4986 }
4987
4988 if (!best_delta)
4989 {
4990 *delta = NULL;
4991 return best_cost;
4992 }
4993
4994 /* Recurse to possibly remove other unnecessary ivs. */
4995 iv_ca_delta_commit (data, ivs, best_delta, true);
4996 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
4997 iv_ca_delta_commit (data, ivs, best_delta, false);
4998 *delta = iv_ca_delta_join (best_delta, *delta);
4999 return best_cost;
5000 }
5001
5002 /* Tries to extend the sets IVS in the best possible way in order
5003 to express the USE. */
5004
5005 static bool
5006 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
5007 struct iv_use *use)
5008 {
5009 comp_cost best_cost, act_cost;
5010 unsigned i;
5011 bitmap_iterator bi;
5012 struct iv_cand *cand;
5013 struct iv_ca_delta *best_delta = NULL, *act_delta;
5014 struct cost_pair *cp;
5015
5016 iv_ca_add_use (data, ivs, use);
5017 best_cost = iv_ca_cost (ivs);
5018
5019 cp = iv_ca_cand_for_use (ivs, use);
5020 if (cp)
5021 {
5022 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
5023 iv_ca_set_no_cp (data, ivs, use);
5024 }
5025
5026 /* First try important candidates not based on any memory object. Only if
5027 this fails, try the specific ones. Rationale -- in loops with many
5028 variables the best choice often is to use just one generic biv. If we
5029 added here many ivs specific to the uses, the optimization algorithm later
5030 would be likely to get stuck in a local minimum, thus causing us to create
5031 too many ivs. The approach from few ivs to more seems more likely to be
5032 successful -- starting from few ivs, replacing an expensive use by a
5033 specific iv should always be a win. */
5034 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5035 {
5036 cand = iv_cand (data, i);
5037
5038 if (cand->iv->base_object != NULL_TREE)
5039 continue;
5040
5041 if (iv_ca_cand_used_p (ivs, cand))
5042 continue;
5043
5044 cp = get_use_iv_cost (data, use, cand);
5045 if (!cp)
5046 continue;
5047
5048 iv_ca_set_cp (data, ivs, use, cp);
5049 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
5050 iv_ca_set_no_cp (data, ivs, use);
5051 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
5052
5053 if (compare_costs (act_cost, best_cost) < 0)
5054 {
5055 best_cost = act_cost;
5056
5057 iv_ca_delta_free (&best_delta);
5058 best_delta = act_delta;
5059 }
5060 else
5061 iv_ca_delta_free (&act_delta);
5062 }
5063
5064 if (infinite_cost_p (best_cost))
5065 {
5066 for (i = 0; i < use->n_map_members; i++)
5067 {
5068 cp = use->cost_map + i;
5069 cand = cp->cand;
5070 if (!cand)
5071 continue;
5072
5073 /* Already tried this. */
5074 if (cand->important && cand->iv->base_object == NULL_TREE)
5075 continue;
5076
5077 if (iv_ca_cand_used_p (ivs, cand))
5078 continue;
5079
5080 act_delta = NULL;
5081 iv_ca_set_cp (data, ivs, use, cp);
5082 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
5083 iv_ca_set_no_cp (data, ivs, use);
5084 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
5085 cp, act_delta);
5086
5087 if (compare_costs (act_cost, best_cost) < 0)
5088 {
5089 best_cost = act_cost;
5090
5091 if (best_delta)
5092 iv_ca_delta_free (&best_delta);
5093 best_delta = act_delta;
5094 }
5095 else
5096 iv_ca_delta_free (&act_delta);
5097 }
5098 }
5099
5100 iv_ca_delta_commit (data, ivs, best_delta, true);
5101 iv_ca_delta_free (&best_delta);
5102
5103 return !infinite_cost_p (best_cost);
5104 }
5105
5106 /* Finds an initial assignment of candidates to uses. */
5107
5108 static struct iv_ca *
5109 get_initial_solution (struct ivopts_data *data)
5110 {
5111 struct iv_ca *ivs = iv_ca_new (data);
5112 unsigned i;
5113
5114 for (i = 0; i < n_iv_uses (data); i++)
5115 if (!try_add_cand_for (data, ivs, iv_use (data, i)))
5116 {
5117 iv_ca_free (&ivs);
5118 return NULL;
5119 }
5120
5121 return ivs;
5122 }
5123
5124 /* Tries to improve set of induction variables IVS. */
5125
5126 static bool
5127 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
5128 {
5129 unsigned i, n_ivs;
5130 comp_cost acost, best_cost = iv_ca_cost (ivs);
5131 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
5132 struct iv_cand *cand;
5133
5134 /* Try extending the set of induction variables by one. */
5135 for (i = 0; i < n_iv_cands (data); i++)
5136 {
5137 cand = iv_cand (data, i);
5138
5139 if (iv_ca_cand_used_p (ivs, cand))
5140 continue;
5141
5142 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs);
5143 if (!act_delta)
5144 continue;
5145
5146 /* If we successfully added the candidate and the set is small enough,
5147 try optimizing it by removing other candidates. */
5148 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
5149 {
5150 iv_ca_delta_commit (data, ivs, act_delta, true);
5151 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
5152 iv_ca_delta_commit (data, ivs, act_delta, false);
5153 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
5154 }
5155
5156 if (compare_costs (acost, best_cost) < 0)
5157 {
5158 best_cost = acost;
5159 iv_ca_delta_free (&best_delta);
5160 best_delta = act_delta;
5161 }
5162 else
5163 iv_ca_delta_free (&act_delta);
5164 }
5165
5166 if (!best_delta)
5167 {
5168 /* Try removing the candidates from the set instead. */
5169 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
5170
5171 /* Nothing more we can do. */
5172 if (!best_delta)
5173 return false;
5174 }
5175
5176 iv_ca_delta_commit (data, ivs, best_delta, true);
5177 gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
5178 iv_ca_delta_free (&best_delta);
5179 return true;
5180 }
5181
5182 /* Attempts to find the optimal set of induction variables. We do simple
5183 greedy heuristic -- we try to replace at most one candidate in the selected
5184 solution and remove the unused ivs while this improves the cost. */
5185
5186 static struct iv_ca *
5187 find_optimal_iv_set (struct ivopts_data *data)
5188 {
5189 unsigned i;
5190 struct iv_ca *set;
5191 struct iv_use *use;
5192
5193 /* Get the initial solution. */
5194 set = get_initial_solution (data);
5195 if (!set)
5196 {
5197 if (dump_file && (dump_flags & TDF_DETAILS))
5198 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
5199 return NULL;
5200 }
5201
5202 if (dump_file && (dump_flags & TDF_DETAILS))
5203 {
5204 fprintf (dump_file, "Initial set of candidates:\n");
5205 iv_ca_dump (data, dump_file, set);
5206 }
5207
5208 while (try_improve_iv_set (data, set))
5209 {
5210 if (dump_file && (dump_flags & TDF_DETAILS))
5211 {
5212 fprintf (dump_file, "Improved to:\n");
5213 iv_ca_dump (data, dump_file, set);
5214 }
5215 }
5216
5217 if (dump_file && (dump_flags & TDF_DETAILS))
5218 {
5219 comp_cost cost = iv_ca_cost (set);
5220 fprintf (dump_file, "Final cost %d (complexity %d)\n\n", cost.cost, cost.complexity);
5221 }
5222
5223 for (i = 0; i < n_iv_uses (data); i++)
5224 {
5225 use = iv_use (data, i);
5226 use->selected = iv_ca_cand_for_use (set, use)->cand;
5227 }
5228
5229 return set;
5230 }
5231
5232 /* Creates a new induction variable corresponding to CAND. */
5233
5234 static void
5235 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
5236 {
5237 gimple_stmt_iterator incr_pos;
5238 tree base;
5239 bool after = false;
5240
5241 if (!cand->iv)
5242 return;
5243
5244 switch (cand->pos)
5245 {
5246 case IP_NORMAL:
5247 incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
5248 break;
5249
5250 case IP_END:
5251 incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
5252 after = true;
5253 break;
5254
5255 case IP_AFTER_USE:
5256 after = true;
5257 /* fall through */
5258 case IP_BEFORE_USE:
5259 incr_pos = gsi_for_stmt (cand->incremented_at);
5260 break;
5261
5262 case IP_ORIGINAL:
5263 /* Mark that the iv is preserved. */
5264 name_info (data, cand->var_before)->preserve_biv = true;
5265 name_info (data, cand->var_after)->preserve_biv = true;
5266
5267 /* Rewrite the increment so that it uses var_before directly. */
5268 find_interesting_uses_op (data, cand->var_after)->selected = cand;
5269
5270 return;
5271 }
5272
5273 gimple_add_tmp_var (cand->var_before);
5274 add_referenced_var (cand->var_before);
5275
5276 base = unshare_expr (cand->iv->base);
5277
5278 create_iv (base, unshare_expr (cand->iv->step),
5279 cand->var_before, data->current_loop,
5280 &incr_pos, after, &cand->var_before, &cand->var_after);
5281 }
5282
5283 /* Creates new induction variables described in SET. */
5284
5285 static void
5286 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
5287 {
5288 unsigned i;
5289 struct iv_cand *cand;
5290 bitmap_iterator bi;
5291
5292 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
5293 {
5294 cand = iv_cand (data, i);
5295 create_new_iv (data, cand);
5296 }
5297 }
5298
5299 /* Returns the phi-node in BB with result RESULT. */
5300
5301 static gimple
5302 get_phi_with_result (basic_block bb, tree result)
5303 {
5304 gimple_stmt_iterator i = gsi_start_phis (bb);
5305
5306 for (; !gsi_end_p (i); gsi_next (&i))
5307 if (gimple_phi_result (gsi_stmt (i)) == result)
5308 return gsi_stmt (i);
5309
5310 gcc_unreachable ();
5311 return NULL;
5312 }
5313
5314
5315 /* Removes statement STMT (real or a phi node). If INCLUDING_DEFINED_NAME
5316 is true, remove also the ssa name defined by the statement. */
5317
5318 static void
5319 remove_statement (gimple stmt, bool including_defined_name)
5320 {
5321 if (gimple_code (stmt) == GIMPLE_PHI)
5322 {
5323 gimple bb_phi = get_phi_with_result (gimple_bb (stmt),
5324 gimple_phi_result (stmt));
5325 gimple_stmt_iterator bsi = gsi_for_stmt (bb_phi);
5326 remove_phi_node (&bsi, including_defined_name);
5327 }
5328 else
5329 {
5330 gimple_stmt_iterator bsi = gsi_for_stmt (stmt);
5331 gsi_remove (&bsi, true);
5332 release_defs (stmt);
5333 }
5334 }
5335
5336 /* Rewrites USE (definition of iv used in a nonlinear expression)
5337 using candidate CAND. */
5338
5339 static void
5340 rewrite_use_nonlinear_expr (struct ivopts_data *data,
5341 struct iv_use *use, struct iv_cand *cand)
5342 {
5343 tree comp;
5344 tree op, tgt;
5345 gimple ass;
5346 gimple_stmt_iterator bsi;
5347
5348 /* An important special case -- if we are asked to express value of
5349 the original iv by itself, just exit; there is no need to
5350 introduce a new computation (that might also need casting the
5351 variable to unsigned and back). */
5352 if (cand->pos == IP_ORIGINAL
5353 && cand->incremented_at == use->stmt)
5354 {
5355 tree step, ctype, utype;
5356 enum tree_code incr_code = PLUS_EXPR, old_code;
5357
5358 gcc_assert (is_gimple_assign (use->stmt));
5359 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
5360
5361 step = cand->iv->step;
5362 ctype = TREE_TYPE (step);
5363 utype = TREE_TYPE (cand->var_after);
5364 if (TREE_CODE (step) == NEGATE_EXPR)
5365 {
5366 incr_code = MINUS_EXPR;
5367 step = TREE_OPERAND (step, 0);
5368 }
5369
5370 /* Check whether we may leave the computation unchanged.
5371 This is the case only if it does not rely on other
5372 computations in the loop -- otherwise, the computation
5373 we rely upon may be removed in remove_unused_ivs,
5374 thus leading to ICE. */
5375 old_code = gimple_assign_rhs_code (use->stmt);
5376 if (old_code == PLUS_EXPR
5377 || old_code == MINUS_EXPR
5378 || old_code == POINTER_PLUS_EXPR)
5379 {
5380 if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
5381 op = gimple_assign_rhs2 (use->stmt);
5382 else if (old_code != MINUS_EXPR
5383 && gimple_assign_rhs2 (use->stmt) == cand->var_before)
5384 op = gimple_assign_rhs1 (use->stmt);
5385 else
5386 op = NULL_TREE;
5387 }
5388 else
5389 op = NULL_TREE;
5390
5391 if (op
5392 && (TREE_CODE (op) == INTEGER_CST
5393 || operand_equal_p (op, step, 0)))
5394 return;
5395
5396 /* Otherwise, add the necessary computations to express
5397 the iv. */
5398 op = fold_convert (ctype, cand->var_before);
5399 comp = fold_convert (utype,
5400 build2 (incr_code, ctype, op,
5401 unshare_expr (step)));
5402 }
5403 else
5404 {
5405 comp = get_computation (data->current_loop, use, cand);
5406 gcc_assert (comp != NULL_TREE);
5407 }
5408
5409 switch (gimple_code (use->stmt))
5410 {
5411 case GIMPLE_PHI:
5412 tgt = PHI_RESULT (use->stmt);
5413
5414 /* If we should keep the biv, do not replace it. */
5415 if (name_info (data, tgt)->preserve_biv)
5416 return;
5417
5418 bsi = gsi_after_labels (gimple_bb (use->stmt));
5419 break;
5420
5421 case GIMPLE_ASSIGN:
5422 tgt = gimple_assign_lhs (use->stmt);
5423 bsi = gsi_for_stmt (use->stmt);
5424 break;
5425
5426 default:
5427 gcc_unreachable ();
5428 }
5429
5430 op = force_gimple_operand_gsi (&bsi, comp, false, SSA_NAME_VAR (tgt),
5431 true, GSI_SAME_STMT);
5432
5433 if (gimple_code (use->stmt) == GIMPLE_PHI)
5434 {
5435 ass = gimple_build_assign (tgt, op);
5436 gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
5437 remove_statement (use->stmt, false);
5438 }
5439 else
5440 {
5441 gimple_assign_set_rhs_from_tree (&bsi, op);
5442 use->stmt = gsi_stmt (bsi);
5443 }
5444 }
5445
5446 /* Replaces ssa name in index IDX by its basic variable. Callback for
5447 for_each_index. */
5448
5449 static bool
5450 idx_remove_ssa_names (tree base, tree *idx,
5451 void *data ATTRIBUTE_UNUSED)
5452 {
5453 tree *op;
5454
5455 if (TREE_CODE (*idx) == SSA_NAME)
5456 *idx = SSA_NAME_VAR (*idx);
5457
5458 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
5459 {
5460 op = &TREE_OPERAND (base, 2);
5461 if (*op
5462 && TREE_CODE (*op) == SSA_NAME)
5463 *op = SSA_NAME_VAR (*op);
5464 op = &TREE_OPERAND (base, 3);
5465 if (*op
5466 && TREE_CODE (*op) == SSA_NAME)
5467 *op = SSA_NAME_VAR (*op);
5468 }
5469
5470 return true;
5471 }
5472
5473 /* Unshares REF and replaces ssa names inside it by their basic variables. */
5474
5475 static tree
5476 unshare_and_remove_ssa_names (tree ref)
5477 {
5478 ref = unshare_expr (ref);
5479 for_each_index (&ref, idx_remove_ssa_names, NULL);
5480
5481 return ref;
5482 }
5483
5484 /* Copies the reference information from OLD_REF to NEW_REF. */
5485
5486 static void
5487 copy_ref_info (tree new_ref, tree old_ref)
5488 {
5489 if (TREE_CODE (old_ref) == TARGET_MEM_REF)
5490 copy_mem_ref_info (new_ref, old_ref);
5491 else
5492 TMR_ORIGINAL (new_ref) = unshare_and_remove_ssa_names (old_ref);
5493 }
5494
5495 /* Rewrites USE (address that is an iv) using candidate CAND. */
5496
5497 static void
5498 rewrite_use_address (struct ivopts_data *data,
5499 struct iv_use *use, struct iv_cand *cand)
5500 {
5501 aff_tree aff;
5502 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
5503 tree ref;
5504 bool ok;
5505
5506 ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
5507 gcc_assert (ok);
5508 unshare_aff_combination (&aff);
5509
5510 ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff, data->speed);
5511 copy_ref_info (ref, *use->op_p);
5512 *use->op_p = ref;
5513 }
5514
5515 /* Rewrites USE (the condition such that one of the arguments is an iv) using
5516 candidate CAND. */
5517
5518 static void
5519 rewrite_use_compare (struct ivopts_data *data,
5520 struct iv_use *use, struct iv_cand *cand)
5521 {
5522 tree comp, *var_p, op, bound;
5523 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
5524 enum tree_code compare;
5525 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5526 bool ok;
5527
5528 bound = cp->value;
5529 if (bound)
5530 {
5531 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
5532 tree var_type = TREE_TYPE (var);
5533 gimple_seq stmts;
5534
5535 compare = iv_elimination_compare (data, use);
5536 bound = unshare_expr (fold_convert (var_type, bound));
5537 op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
5538 if (stmts)
5539 gsi_insert_seq_on_edge_immediate (
5540 loop_preheader_edge (data->current_loop),
5541 stmts);
5542
5543 gimple_cond_set_lhs (use->stmt, var);
5544 gimple_cond_set_code (use->stmt, compare);
5545 gimple_cond_set_rhs (use->stmt, op);
5546 return;
5547 }
5548
5549 /* The induction variable elimination failed; just express the original
5550 giv. */
5551 comp = get_computation (data->current_loop, use, cand);
5552 gcc_assert (comp != NULL_TREE);
5553
5554 ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
5555 gcc_assert (ok);
5556
5557 *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
5558 true, GSI_SAME_STMT);
5559 }
5560
5561 /* Rewrites USE using candidate CAND. */
5562
5563 static void
5564 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
5565 {
5566 switch (use->type)
5567 {
5568 case USE_NONLINEAR_EXPR:
5569 rewrite_use_nonlinear_expr (data, use, cand);
5570 break;
5571
5572 case USE_ADDRESS:
5573 rewrite_use_address (data, use, cand);
5574 break;
5575
5576 case USE_COMPARE:
5577 rewrite_use_compare (data, use, cand);
5578 break;
5579
5580 default:
5581 gcc_unreachable ();
5582 }
5583
5584 update_stmt (use->stmt);
5585 }
5586
5587 /* Rewrite the uses using the selected induction variables. */
5588
5589 static void
5590 rewrite_uses (struct ivopts_data *data)
5591 {
5592 unsigned i;
5593 struct iv_cand *cand;
5594 struct iv_use *use;
5595
5596 for (i = 0; i < n_iv_uses (data); i++)
5597 {
5598 use = iv_use (data, i);
5599 cand = use->selected;
5600 gcc_assert (cand);
5601
5602 rewrite_use (data, use, cand);
5603 }
5604 }
5605
5606 /* Removes the ivs that are not used after rewriting. */
5607
5608 static void
5609 remove_unused_ivs (struct ivopts_data *data)
5610 {
5611 unsigned j;
5612 bitmap_iterator bi;
5613
5614 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5615 {
5616 struct version_info *info;
5617
5618 info = ver_info (data, j);
5619 if (info->iv
5620 && !integer_zerop (info->iv->step)
5621 && !info->inv_id
5622 && !info->iv->have_use_for
5623 && !info->preserve_biv)
5624 remove_statement (SSA_NAME_DEF_STMT (info->iv->ssa_name), true);
5625 }
5626 }
5627
5628 /* Frees data allocated by the optimization of a single loop. */
5629
5630 static void
5631 free_loop_data (struct ivopts_data *data)
5632 {
5633 unsigned i, j;
5634 bitmap_iterator bi;
5635 tree obj;
5636
5637 if (data->niters)
5638 {
5639 pointer_map_destroy (data->niters);
5640 data->niters = NULL;
5641 }
5642
5643 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5644 {
5645 struct version_info *info;
5646
5647 info = ver_info (data, i);
5648 if (info->iv)
5649 free (info->iv);
5650 info->iv = NULL;
5651 info->has_nonlin_use = false;
5652 info->preserve_biv = false;
5653 info->inv_id = 0;
5654 }
5655 bitmap_clear (data->relevant);
5656 bitmap_clear (data->important_candidates);
5657
5658 for (i = 0; i < n_iv_uses (data); i++)
5659 {
5660 struct iv_use *use = iv_use (data, i);
5661
5662 free (use->iv);
5663 BITMAP_FREE (use->related_cands);
5664 for (j = 0; j < use->n_map_members; j++)
5665 if (use->cost_map[j].depends_on)
5666 BITMAP_FREE (use->cost_map[j].depends_on);
5667 free (use->cost_map);
5668 free (use);
5669 }
5670 VEC_truncate (iv_use_p, data->iv_uses, 0);
5671
5672 for (i = 0; i < n_iv_cands (data); i++)
5673 {
5674 struct iv_cand *cand = iv_cand (data, i);
5675
5676 if (cand->iv)
5677 free (cand->iv);
5678 if (cand->depends_on)
5679 BITMAP_FREE (cand->depends_on);
5680 free (cand);
5681 }
5682 VEC_truncate (iv_cand_p, data->iv_candidates, 0);
5683
5684 if (data->version_info_size < num_ssa_names)
5685 {
5686 data->version_info_size = 2 * num_ssa_names;
5687 free (data->version_info);
5688 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
5689 }
5690
5691 data->max_inv_id = 0;
5692
5693 for (i = 0; VEC_iterate (tree, decl_rtl_to_reset, i, obj); i++)
5694 SET_DECL_RTL (obj, NULL_RTX);
5695
5696 VEC_truncate (tree, decl_rtl_to_reset, 0);
5697 }
5698
5699 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
5700 loop tree. */
5701
5702 static void
5703 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
5704 {
5705 free_loop_data (data);
5706 free (data->version_info);
5707 BITMAP_FREE (data->relevant);
5708 BITMAP_FREE (data->important_candidates);
5709
5710 VEC_free (tree, heap, decl_rtl_to_reset);
5711 VEC_free (iv_use_p, heap, data->iv_uses);
5712 VEC_free (iv_cand_p, heap, data->iv_candidates);
5713 }
5714
5715 /* Optimizes the LOOP. Returns true if anything changed. */
5716
5717 static bool
5718 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
5719 {
5720 bool changed = false;
5721 struct iv_ca *iv_ca;
5722 edge exit;
5723 basic_block *body;
5724
5725 gcc_assert (!data->niters);
5726 data->current_loop = loop;
5727 data->speed = optimize_loop_for_speed_p (loop);
5728
5729 if (dump_file && (dump_flags & TDF_DETAILS))
5730 {
5731 fprintf (dump_file, "Processing loop %d\n", loop->num);
5732
5733 exit = single_dom_exit (loop);
5734 if (exit)
5735 {
5736 fprintf (dump_file, " single exit %d -> %d, exit condition ",
5737 exit->src->index, exit->dest->index);
5738 print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
5739 fprintf (dump_file, "\n");
5740 }
5741
5742 fprintf (dump_file, "\n");
5743 }
5744
5745 body = get_loop_body (loop);
5746 renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
5747 free (body);
5748
5749 /* For each ssa name determines whether it behaves as an induction variable
5750 in some loop. */
5751 if (!find_induction_variables (data))
5752 goto finish;
5753
5754 /* Finds interesting uses (item 1). */
5755 find_interesting_uses (data);
5756 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
5757 goto finish;
5758
5759 /* Finds candidates for the induction variables (item 2). */
5760 find_iv_candidates (data);
5761
5762 /* Calculates the costs (item 3, part 1). */
5763 determine_iv_costs (data);
5764 determine_use_iv_costs (data);
5765 determine_set_costs (data);
5766
5767 /* Find the optimal set of induction variables (item 3, part 2). */
5768 iv_ca = find_optimal_iv_set (data);
5769 if (!iv_ca)
5770 goto finish;
5771 changed = true;
5772
5773 /* Create the new induction variables (item 4, part 1). */
5774 create_new_ivs (data, iv_ca);
5775 iv_ca_free (&iv_ca);
5776
5777 /* Rewrite the uses (item 4, part 2). */
5778 rewrite_uses (data);
5779
5780 /* Remove the ivs that are unused after rewriting. */
5781 remove_unused_ivs (data);
5782
5783 /* We have changed the structure of induction variables; it might happen
5784 that definitions in the scev database refer to some of them that were
5785 eliminated. */
5786 scev_reset ();
5787
5788 finish:
5789 free_loop_data (data);
5790
5791 return changed;
5792 }
5793
5794 /* Main entry point. Optimizes induction variables in loops. */
5795
5796 void
5797 tree_ssa_iv_optimize (void)
5798 {
5799 struct loop *loop;
5800 struct ivopts_data data;
5801 loop_iterator li;
5802
5803 tree_ssa_iv_optimize_init (&data);
5804
5805 /* Optimize the loops starting with the innermost ones. */
5806 FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
5807 {
5808 if (dump_file && (dump_flags & TDF_DETAILS))
5809 flow_loop_dump (loop, dump_file, NULL, 1);
5810
5811 tree_ssa_iv_optimize_loop (&data, loop);
5812 }
5813
5814 tree_ssa_iv_optimize_finalize (&data);
5815 }